diff options
Diffstat (limited to 'parsing')
-rw-r--r-- | parsing/lexer.sh | 148 | ||||
-rw-r--r-- | parsing/parse.sh | 12 |
2 files changed, 156 insertions, 4 deletions
diff --git a/parsing/lexer.sh b/parsing/lexer.sh index ce39c23..c88c8b6 100644 --- a/parsing/lexer.sh +++ b/parsing/lexer.sh @@ -4,6 +4,9 @@ ln_off= start= c= wordexp= +here_stack= +here_awaiting_end= +here_awaiting_word= tok= tokens= @@ -67,6 +70,12 @@ lgetc() next() { + case "${here_stack}" in *"${RS}"*) + if ${here_awaiting_word}; then + next_here + return + fi + esac while :; do dbg "parsing char '$c' at lineno $lineno" case "${c}" in @@ -76,6 +85,15 @@ next() return ;; "${LF}") + case "${here_stack}" in *"${RS}"*) + if ${here_awaiting_end}; then + synexp '' + else + here_awaiting_end=false + here_awaiting_word=true + fi + ;; + esac lgetc lineno=$((${lineno} + 1)) tok=T_NEWLINE @@ -164,6 +182,89 @@ next() done } +next_here() +{ + local here= + local here_strip_tabs= + local here_end= + local here_escaped= + local line= + local word= + local res= + local wordexp= + + # Pop the here-document off of the stack. + here="${here_stack##*${RS}}" + here_strip_tabs="${here%%${US}*}" + here_end="${here%${US}*}" + here_end="${here_end#*${US}}" + here_escaped="${here##*${US}}" + here_stack="${here_stack%${RS}*}" + here_awaiting_word=false + + line='' + word='' + while :; do + case "${c}" in + '') + # Bash throws a warning when EOF occurs in a + # here document. mksh throws an error. dash, + # BusyBox ash, ksh93, and zsh accept EOF as a + # delimiter. We aim for the lowest common + # denominator, so throw an error like mksh does. + synerr 'Here-document "%s" unclosed' \ + "${here_end}" + ;; + "${LF}") + line="${line}${c}" + word="${word}${line}" + case "${line}" in "${here_end}${LF}") + lgetc + tok="T_WORD${US}${word}" + return + ;; + esac + line='' + ;; + "${HT}") + if ${here_strip_tabs}; then + case "${line}" in + '') + ;; + *) + line="${line}${c}" + ;; + esac + else + line="${line}${c}" + fi + ;; + '$') + if ! ${here_escaped}; then + lgetc + if ! res="$(scan_wordexp)"; then + exit 1 + fi + ln_off=${res%%${RS}*} + res="${res#*${RS}}" + c="${res%%${RS}*}" + res="${res#*${RS}}" + wordexp="${res%%${RS}*}" + lineno=$((${lineno} + ${ln_off})) + line="${line}${wordexp}" + continue + else + line="${line}${c}" + fi + ;; + *) + line="${line}${c}" + ;; + esac + lgetc + done +} + next_io() { case "${c}" in @@ -175,20 +276,32 @@ next_io() case "${c}" in '-') lgetc tok=T_DLESSDASH + here_stack="${here_stack}${RS}" + here_stack="${here_stack}true" + here_awaiting_end=true + here_awaiting_word=false + break ;; esac tok=T_DLESS + here_stack="${here_stack}${RS}false" + here_awaiting_end=true + here_awaiting_word=false + break ;; '&') lgetc tok=T_LESSAND + break ;; '>') lgetc tok=T_LESSGREAT + break ;; esac tok=T_LESS + break ;; '>') lgetc @@ -196,17 +309,21 @@ next_io() '>') lgetc tok=T_DGREAT + break ;; '&') lgetc tok=T_GREATAND + break ;; '|') lgetc tok=T_CLOBBER + break ;; esac tok=T_GREAT + break ;; esac } @@ -228,6 +345,22 @@ next_word() # We must advance lineno because scan_word() was run in a subshell. lineno=$((${lineno} + ${ln_off})) tok="T_WORD${US}${word}" + + case "${here_stack}" in *"${RS}"*) + if ${here_awaiting_end}; then + here_stack="${here_stack}${US}${word}" + case "${word}" in + *\\*|*'"'*|*"'"*) + here_stack="${here_stack}${US}true" + ;; + *) + here_stack="${here_stack}${US}false" + ;; + esac + here_awaiting_end=false + fi + ;; + esac } # @@ -267,6 +400,15 @@ scan_word() word="${word}${c}" ;; '$') + case "${here_stack}" in *"${RS}"*) + if ${here_awaiting_end}; then + synerr '%s %s %s %s' \ + 'Word expansions' \ + 'not supported in' \ + 'here-document' \ + 'delimiters' + fi + esac lgetc if ! res=$(scan_wordexp); then exit 1 @@ -737,6 +879,9 @@ run_sublexer() fname="${fn}" lineno=${ln} start="${st}" + here_stack="${US}" + here_awaiting_end=false + here_awaiting_word=false tokens='' c="${ch}" @@ -767,6 +912,9 @@ run_lexer() fname="${fn}" lineno=1 start="${st}" + here_stack="${US}" + here_awaiting_end=false + here_awaiting_word=false tokens='' # Read the first character and recognize the first token. diff --git a/parsing/parse.sh b/parsing/parse.sh index 6508142..81b8a54 100644 --- a/parsing/parse.sh +++ b/parsing/parse.sh @@ -649,7 +649,11 @@ try() #try 'foo $(bar) baz' #try 'foo$(bar$(baz))qux' #try 'foo $((1 + 1))' -try '$((1 + 1))' -try '$((1 + (1 + 1)))' -try '$((1 + $(foo) + 1))' -try '$((1' +#try '$((1 + 1))' +#try '$((1 + (1 + 1)))' +#try '$((1 + $(foo) + 1))' +#try '$((1' +try 'foo <<EOF' 'bar' 'EOF' +try 'foo <<-EOF' "${HT}bar" "${HT}EOF" +try 'foo <<EOF' '$(bar)' 'EOF' +try 'foo <<E"O"F' '$(bar)' 'E"O"F' # BUG |