diff options
author | P. J. McDermott <pj@pehjota.net> | 2016-02-20 18:04:48 (EST) |
---|---|---|
committer | P. J. McDermott <pj@pehjota.net> | 2016-02-20 18:04:48 (EST) |
commit | 0a72d231a7d42396e3551f38eb63d55fea100669 (patch) | |
tree | 2e5068a74632573d2c525b91e1924cad37629464 | |
parent | 897bd7b6ae7514896dae3253e854db5edc0b8009 (diff) | |
download | eggshell-0a72d231a7d42396e3551f38eb63d55fea100669.zip eggshell-0a72d231a7d42396e3551f38eb63d55fea100669.tar.gz eggshell-0a72d231a7d42396e3551f38eb63d55fea100669.tar.bz2 |
Rewrite codegen
The previous version didn't handle nested or multiple command
substitutions.
The stack format has also been extended to support arithmetic
expansions.
-rw-r--r-- | parsing/codegen.sh | 91 | ||||
-rw-r--r-- | parsing/lexer.sh | 3 | ||||
-rw-r--r-- | parsing/parse.sh | 2 |
3 files changed, 78 insertions, 18 deletions
diff --git a/parsing/codegen.sh b/parsing/codegen.sh index c28ffca..0e32987 100644 --- a/parsing/codegen.sh +++ b/parsing/codegen.sh @@ -1,4 +1,10 @@ -toktext='' +sc= + +sgetc() +{ + sc="$(dd bs=1 count=1 2>/dev/null; printf '.')" + sc="${sc%.}" +} toktext() { @@ -57,29 +63,80 @@ toktext() *) n='';; esac - toktext="${n}" + printf '%s' "${n}" } -codegen() +codegen_sub() { - local toks="${1}" + local array="${1}" shift 1 - local subtoks= - local t= - - case "${toks}" in - *"${STX}"*"${ETX}"*) - subtoks="${toks#*${STX}}" - subtoks="${subtoks%${ETX}*}" - toks="${toks%%${STX}*}$(codegen \ - "${subtoks}")${toks##*${ETX}}" - ;; - esac IFS="${RS}" - for t in ${toks}; do + for t in ${array}; do toktext "${t}" - printf '%s ' "${toktext}" + printf ' ' done unset IFS } + +# The token stack is encoded in a string in the following grammar: +# Terminal symbols: +# TOKEN +# Production rules: +# stack = tokens [ '<SOH>' type '<STX>' stack '<ETX>' [ tokens ] ] ; +# tokens = TOKEN { '<RS>' TOKEN } ; +# type = 'C' | 'A' ; +# We need to recurse through this stack to get to all the tokens. +# Each element in the stack (an array of tokens) gets run through the codegen to +# become text that is inserted into the array below. +parse_stack() +{ + local array= + + array='' + while :; do + sgetc + case "${sc}" in + '') + # EOF + break + ;; + "${SOH}") + # New stack element + sgetc + case "${sc}" in + 'C') + # Command substitution + sgetc # STX + array="${array}$(parse_stack)" + ;; + 'A') + # Arithmetic expansion + sgetc # STX + ;; + esac + ;; + "${ETX}") + # End of stack element + break + ;; + *) + # Token character + array="${array}${sc}" + ;; + esac + done + codegen_sub "${array}" +} + +codegen() +{ + local toks="${1}" + shift 1 + + if printf '%s' "${toks}" | parse_stack; then + return 0 + else + return 1 + fi +} diff --git a/parsing/lexer.sh b/parsing/lexer.sh index 3f6f52f..a7fbb80 100644 --- a/parsing/lexer.sh +++ b/parsing/lexer.sh @@ -467,7 +467,8 @@ scan_wordexp() res="${res#*${RS}}" toks="${res%%${RS}*}" lineno=${ln_off} - wordexp="\$(${STX}${toks}${ETX})" + wordexp="\$(${SOH}C${STX}${toks}" + wordexp="${wordexp}${ETX})" # ")" is recognized in run_sublexer(). ;; esac diff --git a/parsing/parse.sh b/parsing/parse.sh index 196ad84..a71f90b 100644 --- a/parsing/parse.sh +++ b/parsing/parse.sh @@ -1,3 +1,4 @@ +SOH="$(printf '\001.')"; SOH="${SOH%.}" STX="$(printf '\002.')"; STX="${STX%.}" ETX="$(printf '\003.')"; ETX="${ETX%.}" HT="$(printf '\t.')"; HT="${HT%.}" @@ -646,4 +647,5 @@ try 'foo $(bar)' try 'foo $(bar); baz' try 'foo $(bar)' 'baz' try 'foo $(bar) baz' +try 'foo$(bar$(baz))qux' #try 'foo $((1 + 1))' |