From ff264a09cdf373ed2f9e14757ebd03317bb1955c Mon Sep 17 00:00:00 2001 From: P. J. McDermott Date: Sat, 20 Feb 2016 14:54:49 -0500 Subject: Move parse_sub() logic into lexer Now the parser starts the lexer, which in turn starts the parser. Previously, the parser started both the lexer and itself and then asked the lexer all kinds of personal questions, like "Where are you?", "Whatcha doin'?", and "What are you wearing?". No one wants to know what their lexer is wearing. Seriously, don't ask. Parsers should mind their own business. Social interactions and personal questions between code modules should be kept to a minimum. --- (limited to 'parsing') diff --git a/parsing/lexer.sh b/parsing/lexer.sh index 928021e..a3ab762 100644 --- a/parsing/lexer.sh +++ b/parsing/lexer.sh @@ -1,5 +1,6 @@ fname= lineno= +start= c= tok= tokens= @@ -399,9 +400,9 @@ scan_wordexp() ;; *) # Command substitution - if ! res="$(parse_sub "${fname}" \ - ${lineno} "${c}" false)" - then + if ! res="$(run_sublexer "sub${fname}" \ + ${lineno} "${start}" \ + "${c}")"; then exit 1 fi ln_off=${res%%${RS}*} @@ -411,7 +412,7 @@ scan_wordexp() toks="${res%%${RS}*}" lineno=${ln_off} wordexp="\$(${STX}${toks}${ETX})" - # ")" is recognized in parse_sub(). + # ")" is recognized in run_sublexer(). ;; esac ;; @@ -608,42 +609,63 @@ error() exit 1 } -init_lexer() +run_sublexer() { local fn="${1}" local ln="${2}" - local char="${3}" - shift 3 + local st="${3}" + local ch="${4}" + shift 4 + # Initialize global variables. fname="${fn}" lineno=${ln} + start="${st}" tokens='' - case "${char}" in - '') - pgetc + + c="${ch}" + next + + #dbg=true + # If this returns (does not exit), there are no errors. + ${start} + case "${tok%${US}*}" in + T_RPAREN) ;; *) - c="${char}" + synerr 'Missing ")"' ;; esac - next -} -get_lineno() -{ - printf '%d' ${lineno} + printf "%d${RS}%c${RS}%s" ${lineno} "${c}" "${tokens}" return 0 } -get_lexer_char() +run_lexer() { - printf '%c' "${c}" - return 0 -} + local fn="${1}" + local st="${2}" + shift 2 -get_tokens() -{ + # Initialize global variables. + fname="${fn}" + lineno=1 + start="${st}" + tokens='' + + # Read the first character and recognize the first token. + pgetc + next + + # If this returns (does not exit), there are no errors. + ${start} + if ! accept T_EOF; then + synexp '' + fi + + # Return the tokens. printf '%s' "${tokens}" + return 0 } diff --git a/parsing/parse.sh b/parsing/parse.sh index d9fb6a1..196ad84 100644 --- a/parsing/parse.sh +++ b/parsing/parse.sh @@ -5,10 +5,11 @@ ETX="$(printf '\003.')"; ETX="${ETX%.}" RS="$(printf '\036.')"; RS="${RS%.}" US="$(printf '\037.')"; US="${US%.}" +dbg=true +dbg=false + dbg() { - dbg=true - dbg=false if ${dbg}; then printf 'DEBUG: %s\n' "${@}" >&2 fi @@ -583,50 +584,12 @@ sequential_sep() return 1 } -# Maybe parse() should just tell the lexer what the starting production (parser -# entry point) is. -parse_sub() -{ - local fn="${1}" - local ln="${2}" - local lexer_char="${3}" - local complete="${4}" - shift 3 - - init_lexer "${fn}" ${ln} "${lexer_char}" - - # If this returns (does not exit), there are no errors. - complete_command - if ${complete} && ! accept T_EOF; then - synexp '' - fi - if ! ${complete}; then - case "${tok%${US}*}" in - T_RPAREN) - ;; - *) - synerr 'Missing ")"' - ;; - esac - fi - - get_lineno - printf '%c' "${RS}" - get_lexer_char - printf '%c' "${RS}" - get_tokens - - return 0 -} - parse() { local fn="${1}" shift 1 - local toks= - if toks="$(parse_sub "${fn}" 1 '' true)"; then - printf '%s\n' "${toks#*${RS}*${RS}}" + if run_lexer "${fn}" complete_command; then return 0 fi return 1 -- cgit v0.9.1