diff options
author | P. J. McDermott <pj@pehjota.net> | 2016-02-20 14:54:49 (EST) |
---|---|---|
committer | P. J. McDermott <pj@pehjota.net> | 2016-02-20 15:10:18 (EST) |
commit | ff264a09cdf373ed2f9e14757ebd03317bb1955c (patch) | |
tree | 22a9a87f32a6e7a1f7997481a1efd46541a640b6 | |
parent | e978b31de0d40fe2054327f9adfc78b9d3fc0e1d (diff) | |
download | eggshell-ff264a09cdf373ed2f9e14757ebd03317bb1955c.zip eggshell-ff264a09cdf373ed2f9e14757ebd03317bb1955c.tar.gz eggshell-ff264a09cdf373ed2f9e14757ebd03317bb1955c.tar.bz2 |
Move parse_sub() logic into lexer
Now the parser starts the lexer, which in turn starts the parser.
Previously, the parser started both the lexer and itself and then asked
the lexer all kinds of personal questions, like "Where are you?",
"Whatcha doin'?", and "What are you wearing?". No one wants to know
what their lexer is wearing. Seriously, don't ask. Parsers should mind
their own business. Social interactions and personal questions between
code modules should be kept to a minimum.
-rw-r--r-- | parsing/lexer.sh | 66 | ||||
-rw-r--r-- | parsing/parse.sh | 45 |
2 files changed, 48 insertions, 63 deletions
diff --git a/parsing/lexer.sh b/parsing/lexer.sh index 928021e..a3ab762 100644 --- a/parsing/lexer.sh +++ b/parsing/lexer.sh @@ -1,5 +1,6 @@ fname= lineno= +start= c= tok= tokens= @@ -399,9 +400,9 @@ scan_wordexp() ;; *) # Command substitution - if ! res="$(parse_sub "${fname}" \ - ${lineno} "${c}" false)" - then + if ! res="$(run_sublexer "sub${fname}" \ + ${lineno} "${start}" \ + "${c}")"; then exit 1 fi ln_off=${res%%${RS}*} @@ -411,7 +412,7 @@ scan_wordexp() toks="${res%%${RS}*}" lineno=${ln_off} wordexp="\$(${STX}${toks}${ETX})" - # ")" is recognized in parse_sub(). + # ")" is recognized in run_sublexer(). ;; esac ;; @@ -608,42 +609,63 @@ error() exit 1 } -init_lexer() +run_sublexer() { local fn="${1}" local ln="${2}" - local char="${3}" - shift 3 + local st="${3}" + local ch="${4}" + shift 4 + # Initialize global variables. fname="${fn}" lineno=${ln} + start="${st}" tokens='' - case "${char}" in - '') - pgetc + + c="${ch}" + next + + #dbg=true + # If this returns (does not exit), there are no errors. + ${start} + case "${tok%${US}*}" in + T_RPAREN) ;; *) - c="${char}" + synerr 'Missing ")"' ;; esac - next -} -get_lineno() -{ - printf '%d' ${lineno} + printf "%d${RS}%c${RS}%s" ${lineno} "${c}" "${tokens}" return 0 } -get_lexer_char() +run_lexer() { - printf '%c' "${c}" - return 0 -} + local fn="${1}" + local st="${2}" + shift 2 -get_tokens() -{ + # Initialize global variables. + fname="${fn}" + lineno=1 + start="${st}" + tokens='' + + # Read the first character and recognize the first token. + pgetc + next + + # If this returns (does not exit), there are no errors. + ${start} + if ! accept T_EOF; then + synexp '' + fi + + # Return the tokens. printf '%s' "${tokens}" + return 0 } diff --git a/parsing/parse.sh b/parsing/parse.sh index d9fb6a1..196ad84 100644 --- a/parsing/parse.sh +++ b/parsing/parse.sh @@ -5,10 +5,11 @@ ETX="$(printf '\003.')"; ETX="${ETX%.}" RS="$(printf '\036.')"; RS="${RS%.}" US="$(printf '\037.')"; US="${US%.}" +dbg=true +dbg=false + dbg() { - dbg=true - dbg=false if ${dbg}; then printf 'DEBUG: %s\n' "${@}" >&2 fi @@ -583,50 +584,12 @@ sequential_sep() return 1 } -# Maybe parse() should just tell the lexer what the starting production (parser -# entry point) is. -parse_sub() -{ - local fn="${1}" - local ln="${2}" - local lexer_char="${3}" - local complete="${4}" - shift 3 - - init_lexer "${fn}" ${ln} "${lexer_char}" - - # If this returns (does not exit), there are no errors. - complete_command - if ${complete} && ! accept T_EOF; then - synexp '' - fi - if ! ${complete}; then - case "${tok%${US}*}" in - T_RPAREN) - ;; - *) - synerr 'Missing ")"' - ;; - esac - fi - - get_lineno - printf '%c' "${RS}" - get_lexer_char - printf '%c' "${RS}" - get_tokens - - return 0 -} - parse() { local fn="${1}" shift 1 - local toks= - if toks="$(parse_sub "${fn}" 1 '' true)"; then - printf '%s\n' "${toks#*${RS}*${RS}}" + if run_lexer "${fn}" complete_command; then return 0 fi return 1 |