summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorP. J. McDermott <pj@pehjota.net>2016-02-20 14:54:49 (EST)
committer P. J. McDermott <pj@pehjota.net>2016-02-20 15:10:18 (EST)
commitff264a09cdf373ed2f9e14757ebd03317bb1955c (patch)
tree22a9a87f32a6e7a1f7997481a1efd46541a640b6
parente978b31de0d40fe2054327f9adfc78b9d3fc0e1d (diff)
downloadeggshell-ff264a09cdf373ed2f9e14757ebd03317bb1955c.zip
eggshell-ff264a09cdf373ed2f9e14757ebd03317bb1955c.tar.gz
eggshell-ff264a09cdf373ed2f9e14757ebd03317bb1955c.tar.bz2
Move parse_sub() logic into lexer
Now the parser starts the lexer, which in turn starts the parser. Previously, the parser started both the lexer and itself and then asked the lexer all kinds of personal questions, like "Where are you?", "Whatcha doin'?", and "What are you wearing?". No one wants to know what their lexer is wearing. Seriously, don't ask. Parsers should mind their own business. Social interactions and personal questions between code modules should be kept to a minimum.
-rw-r--r--parsing/lexer.sh66
-rw-r--r--parsing/parse.sh45
2 files changed, 48 insertions, 63 deletions
diff --git a/parsing/lexer.sh b/parsing/lexer.sh
index 928021e..a3ab762 100644
--- a/parsing/lexer.sh
+++ b/parsing/lexer.sh
@@ -1,5 +1,6 @@
fname=
lineno=
+start=
c=
tok=
tokens=
@@ -399,9 +400,9 @@ scan_wordexp()
;;
*)
# Command substitution
- if ! res="$(parse_sub "${fname}" \
- ${lineno} "${c}" false)"
- then
+ if ! res="$(run_sublexer "sub${fname}" \
+ ${lineno} "${start}" \
+ "${c}")"; then
exit 1
fi
ln_off=${res%%${RS}*}
@@ -411,7 +412,7 @@ scan_wordexp()
toks="${res%%${RS}*}"
lineno=${ln_off}
wordexp="\$(${STX}${toks}${ETX})"
- # ")" is recognized in parse_sub().
+ # ")" is recognized in run_sublexer().
;;
esac
;;
@@ -608,42 +609,63 @@ error()
exit 1
}
-init_lexer()
+run_sublexer()
{
local fn="${1}"
local ln="${2}"
- local char="${3}"
- shift 3
+ local st="${3}"
+ local ch="${4}"
+ shift 4
+ # Initialize global variables.
fname="${fn}"
lineno=${ln}
+ start="${st}"
tokens=''
- case "${char}" in
- '')
- pgetc
+
+ c="${ch}"
+ next
+
+ #dbg=true
+ # If this returns (does not exit), there are no errors.
+ ${start}
+ case "${tok%${US}*}" in
+ T_RPAREN)
;;
*)
- c="${char}"
+ synerr 'Missing ")"'
;;
esac
- next
-}
-get_lineno()
-{
- printf '%d' ${lineno}
+ printf "%d${RS}%c${RS}%s" ${lineno} "${c}" "${tokens}"
return 0
}
-get_lexer_char()
+run_lexer()
{
- printf '%c' "${c}"
- return 0
-}
+ local fn="${1}"
+ local st="${2}"
+ shift 2
-get_tokens()
-{
+ # Initialize global variables.
+ fname="${fn}"
+ lineno=1
+ start="${st}"
+ tokens=''
+
+ # Read the first character and recognize the first token.
+ pgetc
+ next
+
+ # If this returns (does not exit), there are no errors.
+ ${start}
+ if ! accept T_EOF; then
+ synexp ''
+ fi
+
+ # Return the tokens.
printf '%s' "${tokens}"
+
return 0
}
diff --git a/parsing/parse.sh b/parsing/parse.sh
index d9fb6a1..196ad84 100644
--- a/parsing/parse.sh
+++ b/parsing/parse.sh
@@ -5,10 +5,11 @@ ETX="$(printf '\003.')"; ETX="${ETX%.}"
RS="$(printf '\036.')"; RS="${RS%.}"
US="$(printf '\037.')"; US="${US%.}"
+dbg=true
+dbg=false
+
dbg()
{
- dbg=true
- dbg=false
if ${dbg}; then
printf 'DEBUG: %s\n' "${@}" >&2
fi
@@ -583,50 +584,12 @@ sequential_sep()
return 1
}
-# Maybe parse() should just tell the lexer what the starting production (parser
-# entry point) is.
-parse_sub()
-{
- local fn="${1}"
- local ln="${2}"
- local lexer_char="${3}"
- local complete="${4}"
- shift 3
-
- init_lexer "${fn}" ${ln} "${lexer_char}"
-
- # If this returns (does not exit), there are no errors.
- complete_command
- if ${complete} && ! accept T_EOF; then
- synexp ''
- fi
- if ! ${complete}; then
- case "${tok%${US}*}" in
- T_RPAREN)
- ;;
- *)
- synerr 'Missing ")"'
- ;;
- esac
- fi
-
- get_lineno
- printf '%c' "${RS}"
- get_lexer_char
- printf '%c' "${RS}"
- get_tokens
-
- return 0
-}
-
parse()
{
local fn="${1}"
shift 1
- local toks=
- if toks="$(parse_sub "${fn}" 1 '' true)"; then
- printf '%s\n' "${toks#*${RS}*${RS}}"
+ if run_lexer "${fn}" complete_command; then
return 0
fi
return 1