From 0a72d231a7d42396e3551f38eb63d55fea100669 Mon Sep 17 00:00:00 2001 From: P. J. McDermott Date: Sat, 20 Feb 2016 18:04:48 -0500 Subject: Rewrite codegen The previous version didn't handle nested or multiple command substitutions. The stack format has also been extended to support arithmetic expansions. --- diff --git a/parsing/codegen.sh b/parsing/codegen.sh index c28ffca..0e32987 100644 --- a/parsing/codegen.sh +++ b/parsing/codegen.sh @@ -1,4 +1,10 @@ -toktext='' +sc= + +sgetc() +{ + sc="$(dd bs=1 count=1 2>/dev/null; printf '.')" + sc="${sc%.}" +} toktext() { @@ -57,29 +63,80 @@ toktext() *) n='';; esac - toktext="${n}" + printf '%s' "${n}" } -codegen() +codegen_sub() { - local toks="${1}" + local array="${1}" shift 1 - local subtoks= - local t= - - case "${toks}" in - *"${STX}"*"${ETX}"*) - subtoks="${toks#*${STX}}" - subtoks="${subtoks%${ETX}*}" - toks="${toks%%${STX}*}$(codegen \ - "${subtoks}")${toks##*${ETX}}" - ;; - esac IFS="${RS}" - for t in ${toks}; do + for t in ${array}; do toktext "${t}" - printf '%s ' "${toktext}" + printf ' ' done unset IFS } + +# The token stack is encoded in a string in the following grammar: +# Terminal symbols: +# TOKEN +# Production rules: +# stack = tokens [ '' type '' stack '' [ tokens ] ] ; +# tokens = TOKEN { '' TOKEN } ; +# type = 'C' | 'A' ; +# We need to recurse through this stack to get to all the tokens. +# Each element in the stack (an array of tokens) gets run through the codegen to +# become text that is inserted into the array below. +parse_stack() +{ + local array= + + array='' + while :; do + sgetc + case "${sc}" in + '') + # EOF + break + ;; + "${SOH}") + # New stack element + sgetc + case "${sc}" in + 'C') + # Command substitution + sgetc # STX + array="${array}$(parse_stack)" + ;; + 'A') + # Arithmetic expansion + sgetc # STX + ;; + esac + ;; + "${ETX}") + # End of stack element + break + ;; + *) + # Token character + array="${array}${sc}" + ;; + esac + done + codegen_sub "${array}" +} + +codegen() +{ + local toks="${1}" + shift 1 + + if printf '%s' "${toks}" | parse_stack; then + return 0 + else + return 1 + fi +} diff --git a/parsing/lexer.sh b/parsing/lexer.sh index 3f6f52f..a7fbb80 100644 --- a/parsing/lexer.sh +++ b/parsing/lexer.sh @@ -467,7 +467,8 @@ scan_wordexp() res="${res#*${RS}}" toks="${res%%${RS}*}" lineno=${ln_off} - wordexp="\$(${STX}${toks}${ETX})" + wordexp="\$(${SOH}C${STX}${toks}" + wordexp="${wordexp}${ETX})" # ")" is recognized in run_sublexer(). ;; esac diff --git a/parsing/parse.sh b/parsing/parse.sh index 196ad84..a71f90b 100644 --- a/parsing/parse.sh +++ b/parsing/parse.sh @@ -1,3 +1,4 @@ +SOH="$(printf '\001.')"; SOH="${SOH%.}" STX="$(printf '\002.')"; STX="${STX%.}" ETX="$(printf '\003.')"; ETX="${ETX%.}" HT="$(printf '\t.')"; HT="${HT%.}" @@ -646,4 +647,5 @@ try 'foo $(bar)' try 'foo $(bar); baz' try 'foo $(bar)' 'baz' try 'foo $(bar) baz' +try 'foo$(bar$(baz))qux' #try 'foo $((1 + 1))' -- cgit v0.9.1