#!/bin/bash
# 2009-2010, 2012, 2017, 2020 Etersoft www.etersoft.ru
# Author: Vitaly Lipatov <lav@etersoft.ru>
# Public domain

# TODO: rewrite with shell commands, perl or C
# Python - http://www.linuxtopia.org/online_books/programming_books/python_programming/python_ch16s03.html
# Shell  - http://linux.byexamples.com/archives/127/uniq-and-basic-set-theory/
#        - http://maiaco.com/articles/shellSetOperations.php
# Perl   - http://docstore.mik.ua/orelly/perl/cookbook/ch04_09.htm
#        - http://blogs.perl.org/users/polettix/2012/03/sets-operations.html
# http://rosettacode.org/wiki/Symmetric_difference
# TODO: add unit tests
# http://ru.wikipedia.org/wiki/Операции_над_множествами

# Base set operations:
# * union
#   "1 2 3" "3 4 5" -> "1 2 3 4 5"
# * intersection
#   "1 2 3" "3 4 5" -> "3"
# * relative complement (substracted, difference) ( A ? B – members in A but not in B )
# http://en.wikipedia.org/wiki/Complement_%28set_theory%29
#   "1 3" "1 2 3 4" -> "2 4"
# * symmetric difference (симметричная разность) ( A ^ B – members in A or B but not both )
# http://en.wikipedia.org/wiki/Symmetric_difference
#   "1 2 3" "3 4 5" -> "1 2 4 5"

fatal()
{
        echo "FATAL: $*" >&2
        exit 1
}

strip()
{
        set -f
        set -- $@
        echo "$*"
        set +f
}

# legacy
strip_spaces()
{
        strip "$@"
}

# deprecated: use 'strip' instead
# legacy, line-by-line processing
filter_strip_spaces()
{
        local line
        while read -r line ; do
                strip "$line"
        done
}

is_empty()
{
        [ "$(strip "$*")" = "" ]
}

isempty()
{
        is_empty "$@"
}

first()
{
        set -f
        set -- $@
        echo "$1"
        set +f
}

last()
{
        local i last
        set -f
        for i in $@ ; do
                last="$i"
        done
        set +f
        echo "$last"
}

firstupper()
{
    # FIXME: works with GNU sed only
    list "$*" | sed 's/.*/\u&/'
}

tolower()
{
    # tr is broken in busybox (checked with OpenWrt)
    #echo "$*" | tr "[:upper:]" "[:lower:]"
    list "$*" | awk '{print tolower($0)}'
}


has_space()
{
        local res
        set -f
        set -- $@
        [ $# -gt 1 ]; res=$?
        set +f
        return $res
}

list()
{
        local i
        set -f
        for i in $@ ; do
                echo "$i"
        done
        set +f
}

count()
{
        set -f
        set -- $@
        echo $#
        set +f
}

union()
{
        set -f
        strip $(list $@ | sort -u)
        set +f
}

intersection()
{
        local RES=""
        local i j
        set -f
        for i in $2 ; do
            for j in $1 ; do
                [ "$i" = "$j" ] && RES="$RES $i"
            done
        done
        set +f
        strip "$RES"
}

uniq()
{
        union $@
}

has()
{
	local wd="$1"
	shift
	echo "$*" | grep -q -- "$wd"
}

# Note: used egrep! write '[0-9]+(first|two)', not '[0-9]\+...'
match()
{
	local wd="$1"
	shift
	echo "$*" | grep -E -q -- "$wd"
}


# remove_from_list "1." "11 12 21 22" -> "21 22"
reg_remove()
{
        local i
        local RES=
        set -f
        for i in $2 ; do
                echo "$i" | grep -q "^$1$" || RES="$RES $i"
        done
        set +f
        strip "$RES"
}

# remove_from_list "1." "11 12 21 22" -> "21 22"
reg_wordremove()
{
        local i
        local RES=""
        set -f
        for i in $2 ; do
                echo "$i" | grep -q -w "$1" || RES="$RES $i"
        done
        set +f
        strip "$RES"
}

reg_rqremove()
{
        local i
        local RES=""
        set -f
        for i in $2 ; do
                [ "$i" = "$1" ] || RES="$RES $i"
        done
        set +f
        strip "$RES"
}

# Args: LIST1 LIST2
# do_exclude_list print LIST2 list exclude fields contains also in LIST1
# Example: exclude "1 3" "1 2 3 4" -> "2 4"
exclude()
{
        local i
        local RES="$2"
        set -f
        for i in $1 ; do
                RES="$(reg_rqremove "$i" "$RES")"
        done
        set +f
        strip "$RES"
}

# regexclude_list "22 1." "11 12 21 22" -> "21"
reg_exclude()
{
        set -f
        # Each pattern as separate line with ^...$ anchors (BRE, not ERE)
        strip "$(list $2 | grep -vf <(list $1 | sed 's/.*/^&$/') | tr '\n' ' ')"
        set +f
}

# regexclude_list "22 1." "11 12 21 22" -> "21"
reg_wordexclude()
{
        set -f
        # Each pattern as separate line for grep -f (BRE, not ERE)
        strip "$(list $2 | grep -vwf <(list $1) | tr '\n' ' ')"
        set +f
}

# internal function
if_contain()
{
        local i
        for i in $2 ; do
            [ "$i" = "$1" ] && return
        done
        return 1
}

difference()
{
        local RES=""
        local i
        set -f
        for i in $1 ; do
            if_contain $i "$2" || RES="$RES $i"
        done
        for i in $2 ; do
            if_contain $i "$1" || RES="$RES $i"
        done
        set +f
        strip "$RES"
}


# reg_include "1." "11 12 21 22" -> "11 12"
reg_include()
{
        set -f
        # Each pattern as separate line with ^...$ anchors (BRE, not ERE)
        strip "$(list $2 | grep -f <(list $1 | sed 's/.*/^&$/') | tr '\n' ' ')"
        set +f
}

# reg_wordinclude "1." "11 12 21 22" -> "11 12"
reg_wordinclude()
{
        set -f
        # Each pattern as separate line for grep -f (BRE, not ERE)
        strip "$(list $2 | grep -wf <(list $1) | tr '\n' ' ')"
        set +f
}

contains()
{
    local word item found
    set -f
    for word in $1 ; do
        found=0
        for item in $2 ; do
            [ "$item" = "$word" ] && found=1 && break
        done
        [ "$found" = 0 ] && set +f && return 1
    done
    set +f
    return 0
}

example()
{
        local CMD="$1"
        local ARG1="$2"
        shift 2
        echo "\$ $0 $CMD \"$ARG1\" \"$@\""
        $0 $CMD "$ARG1" "$@"
}

example_res()
{
	example "$@" && echo TRUE || echo FALSE
}

help()
{
        echo "estrlist developed for string list operations. See also cut, join, paste..."
        echo "Usage: $0 <command> [args]"
        echo "Commands:"
        echo "  strip [args]                      - remove extra spaces"
#        echo "  reg_remove  <PATTERN> [word list] - remove words containing a match to the given PATTERN (grep notation)"
#        echo "  reg_wordremove  <PATTERN> [word list] - remove words containing a match to the given PATTERN (grep -w notation)"
        echo "  exclude <list1> <list2>           - print list2 items exclude list1 items"
        echo "  reg_exclude <list PATTERN> [word list] - print only words that do not match PATTERN"
        echo "  reg_wordexclude <list PATTERN> [word list] - print only words that do not match PATTERN (word boundary)"
        echo "  reg_include <list PATTERN> [word list] - print only words that match PATTERN"
        echo "  reg_wordinclude <list PATTERN> [word list] - print only words that match PATTERN (word boundary)"
        echo "  has <PATTERN> string              - check the string for a match to the regular expression given in PATTERN (grep notation)"
        echo "  match <PATTERN> string            - check the string for a match to the regular expression given in PATTERN (egrep notation)"
        echo "  isempty [string] (is_empty)       - true if string has no any symbols (only zero or more spaces)"
        echo "  has_space [string]                - true if string has whitespace (space, tab, newline)"
        echo "  union [word list]                 - sort and remove duplicates"
        echo "  intersection <list1> <list2>      - print only intersected items (the same in both lists)"
        echo "  difference <list1> <list2>        - symmetric difference between lists items (not in both lists)"
        echo "  uniq [word list]                  - alias for union"
        echo "  list [word list]                  - just list words line by line"
        echo "  count [word list]                 - print word count"
        echo "  contains <word> [word list]       - check if word list contains the word"
        echo "  first <word list>                 - print first word"
        echo "  last <word list>                  - print last word"
        echo "  firstupper <word list>            - print the words with first letter of each in upper case"
        echo "  tolower <word list>               - print the words in lower case"
        echo
        echo "Legacy:"
        echo "  strip_spaces                      - alias for strip"
        echo
        echo "Examples:"
        example strip "  hello   world  "
        echo "echo '  hello   world  ' | \$0 strip -"
#        example reg_remove "1." "11 12 21 22"
#        example reg_wordremove "1." "11 12 21 22"
        example exclude "1 3" "1 2 3 4"
        example reg_exclude "22 1." "11 12 21 22"
        example reg_wordexclude "wo.* er" "work were more else"
        example reg_include "1." "11 12 21 22"
        example reg_wordinclude "lib" "lib libfoo foolib"
        example reg_wordinclude "lib.*" "lib-foo libbar other"
        example union "1 2 2 3 3"
        example_res contains "wo" "wo wor"
        example_res contains "word" "wo wor"
        example count "1 2 3 4 10"
        example_res isempty "  "
        #example_res isempty " 1 "
        example_res has ex "exactly"
        example_res has exo "exactly"
        example_res match "M[0-9]+" "M250"
        example_res match "M[0-9]+" "MI"
        example_res first "1 2 3"
        example_res last "1 2 3"
        example_res firstupper "world camp"
        example_res tolower "World Camp"
}

COMMAND="$1"
if [ -z "$COMMAND" ] ; then
        echo "Run with --help for get command description." >&2
        exit 1
fi

if [ "$COMMAND" = "-h" ] || [ "$COMMAND" = "--help" ] ; then
        COMMAND="help"
fi

# command aliases
case "$COMMAND" in
    reg_remove|reg_wordremove)
        fatal "obsoleted command $COMMAND"
        ;;
esac

shift

# FIXME: do to call function directly, use case instead?
if [ "$COMMAND" = "--" ] ; then
    # ignore all options (-)
    COMMAND="$1"
    shift
    "$COMMAND" "$@"
elif [ "$1" = "-" ] ; then
    shift
    "$COMMAND" "$(cat) $@"
elif [ "$2" = "-" ] ; then
    "$COMMAND" "$1" "$(cat)"
else
    "$COMMAND" "$@"
fi
