summaryrefslogtreecommitdiffstats
path: root/eshtrans/frontend
diff options
context:
space:
mode:
authorP. J. McDermott <pj@pehjota.net>2016-02-21 04:39:39 (EST)
committer P. J. McDermott <pj@pehjota.net>2016-02-21 04:39:39 (EST)
commitc9f95bf852092d8b1640b92f1c31e84420bb51dd (patch)
treeb47ff3e2a91fa39f234df6ddfd9559b6d4714de2 /eshtrans/frontend
parent4e6bfd6fe0d48ddf49cd61bb8cb31881a1e5e369 (diff)
downloadeggshell-c9f95bf852092d8b1640b92f1c31e84420bb51dd.zip
eggshell-c9f95bf852092d8b1640b92f1c31e84420bb51dd.tar.gz
eggshell-c9f95bf852092d8b1640b92f1c31e84420bb51dd.tar.bz2
Copy everything into a new eshtrans/ directory
Split out and rename functions and variables where appropriate. Also add license headers. (The old scripts under parsing/ can be used under the same license.)
Diffstat (limited to 'eshtrans/frontend')
-rw-r--r--eshtrans/frontend/lexer.esh990
-rw-r--r--eshtrans/frontend/main.esh30
-rw-r--r--eshtrans/frontend/parser.esh591
3 files changed, 1611 insertions, 0 deletions
diff --git a/eshtrans/frontend/lexer.esh b/eshtrans/frontend/lexer.esh
new file mode 100644
index 0000000..0991239
--- /dev/null
+++ b/eshtrans/frontend/lexer.esh
@@ -0,0 +1,990 @@
+# Eggshell lexer
+#
+# Copyright (C) 2016 Patrick "P. J." McDermott
+#
+# This file is part of the Eggshell Compiler.
+#
+# The Eggshell Compiler is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# The Eggshell Compiler is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the Eggshell Compiler. If not, see
+# <http://www.gnu.org/licenses/>.
+
+dbg=false
+
+fname=
+lineno=
+ln_off=
+start=
+c=
+wordexp=
+here_queue=
+here_awaiting_end=
+here_awaiting_word=
+tok=
+tokens=
+
+dbg()
+{
+ if ${dbg}; then
+ printf 'DEBUG: %s\n' "${@}" >&2
+ fi
+}
+
+#
+# Error handling (used by scanning and interface functions)
+#
+
+error()
+{
+ local fmt="${1}"
+ shift 1
+
+ case "${fname}" in
+ '-')
+ printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2
+ ;;
+ *)
+ printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2
+ ;;
+ esac
+
+ # The parser and lexer run in a subshell, so this just returns up to the
+ # caller like an exception.
+ exit 1
+}
+
+synexp()
+{
+ local t="${1}"
+ shift 1
+
+ if [ "x${t}" = 'x' ]; then
+ synerr '%s unexpected' "$(tokname "${tok}")"
+ else
+ synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \
+ "$(tokname "${t}")"
+ fi
+}
+
+synerr()
+{
+ local fmt="${1}"
+ shift 1
+
+ error "Syntax error: ${fmt}" "${@}"
+}
+
+#
+# Input reading
+#
+
+lgetc()
+{
+ c="$(dd bs=1 count=1 2>/dev/null; printf '.')"
+ c="${c%.}"
+}
+
+#
+# Token recognition
+#
+
+next()
+{
+ if ${here_awaiting_word}; then
+ next_here
+ return
+ fi
+ while :; do
+ dbg "parsing char '$c' at lineno $lineno"
+ case "${c}" in
+ '')
+ lgetc
+ tok=T_EOF
+ return
+ ;;
+ "${LF}")
+ if ${here_awaiting_end}; then
+ synexp ''
+ else
+ case "${here_queue}" in *"${RS}"*)
+ here_awaiting_end=false
+ here_awaiting_word=true
+ ;;
+ esac
+ fi
+ lgetc
+ lineno=$((${lineno} + 1))
+ tok=T_NEWLINE
+ return
+ ;;
+ ' '|"${HT}")
+ lgetc
+ continue
+ ;;
+ \\)
+ lgetc
+ case "${c}" in "${LF}")
+ lineno=$((${lineno} + 1))
+ lgetc
+ continue
+ ;;
+ esac
+ next_word \\
+ return
+ ;;
+ '#')
+ lgetc
+ while :; do
+ case "${c}" in "${LF}"|'')
+ break
+ ;;
+ esac
+ lgetc
+ done
+ continue
+ ;;
+ '&')
+ lgetc
+ case "${c}" in '&')
+ lgetc
+ tok=T_AND_IF
+ return
+ ;;
+ esac
+ tok=T_AND
+ return
+ ;;
+ '|')
+ lgetc
+ case "${c}" in '|')
+ lgetc
+ tok=T_OR_IF
+ return
+ ;;
+ esac
+ tok=T_PIPE
+ return
+ ;;
+ ';')
+ lgetc
+ case "${c}" in ';')
+ lgetc
+ tok=T_DSEMI
+ return
+ ;;
+ esac
+ dbg T_SEMI
+ tok=T_SEMI
+ return
+ ;;
+ '(')
+ lgetc
+ tok=T_LPAREN
+ return
+ ;;
+ ')')
+ lgetc
+ tok=T_RPAREN
+ return
+ ;;
+ '<'|'>')
+ next_io
+ return
+ ;;
+ *)
+ next_word ''
+ return
+ ;;
+ esac
+ lgetc
+ done
+}
+
+next_here()
+{
+ local here=
+ local here_strip_tabs=
+ local here_end=
+ local here_escaped=
+ local line=
+ local word=
+ local res=
+ local wordexp=
+
+ # Dequeue the here-document.
+ here="${here_queue%%${RS}*}"
+ here_strip_tabs="${here%%${US}*}"
+ here_end="${here%${US}*}"
+ here_end="$(printf '%s' "${here_end#*${US}}" | \
+ sed 's/\\//g; s/"//g; s/'\''//g;')" # Stupid Vim: ')"
+ here_escaped="${here##*${US}}"
+ here_queue="${here_queue#*${RS}}"
+ here_awaiting_word=false
+
+ line=''
+ word=''
+ while :; do
+ case "${c}" in
+ '')
+ # Bash throws a warning when EOF occurs in a
+ # here document. mksh throws an error. dash,
+ # BusyBox ash, ksh93, and zsh accept EOF as a
+ # delimiter. We aim for the lowest common
+ # denominator, so throw an error like mksh does.
+ synerr 'Here-document "%s" unclosed' \
+ "${here_end}"
+ ;;
+ "${LF}")
+ word="${word}${line}"
+ case "${line}" in "${here_end}")
+ tok="T_WORD${US}${word}"
+ return
+ ;;
+ esac
+ word="${word}${c}"
+ line=''
+ ;;
+ "${HT}")
+ if ${here_strip_tabs}; then
+ case "${line}" in
+ '')
+ ;;
+ *)
+ line="${line}${c}"
+ ;;
+ esac
+ else
+ line="${line}${c}"
+ fi
+ ;;
+ '$')
+ if ! ${here_escaped}; then
+ lgetc
+ if ! res="$(scan_wordexp)"; then
+ exit 1
+ fi
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ wordexp="${res%%${RS}*}"
+ lineno=$((${lineno} + ${ln_off}))
+ line="${line}${wordexp}"
+ continue
+ else
+ line="${line}${c}"
+ fi
+ ;;
+ *)
+ line="${line}${c}"
+ ;;
+ esac
+ lgetc
+ done
+}
+
+next_io()
+{
+ case "${c}" in
+ '<')
+ lgetc
+ case "${c}" in
+ '<')
+ lgetc
+ case "${c}" in '-')
+ lgetc
+ tok=T_DLESSDASH
+ here_queue="${here_queue}true"
+ here_awaiting_end=true
+ here_awaiting_word=false
+ break
+ ;;
+ esac
+ tok=T_DLESS
+ here_queue="${here_queue}false"
+ here_awaiting_end=true
+ here_awaiting_word=false
+ break
+ ;;
+ '&')
+ lgetc
+ tok=T_LESSAND
+ break
+ ;;
+ '>')
+ lgetc
+ tok=T_LESSGREAT
+ break
+ ;;
+ esac
+ tok=T_LESS
+ break
+ ;;
+ '>')
+ lgetc
+ case "${c}" in
+ '>')
+ lgetc
+ tok=T_DGREAT
+ break
+ ;;
+ '&')
+ lgetc
+ tok=T_GREATAND
+ break
+ ;;
+ '|')
+ lgetc
+ tok=T_CLOBBER
+ break
+ ;;
+ esac
+ tok=T_GREAT
+ break
+ ;;
+ esac
+}
+
+next_word()
+{
+ local prev_c="${1}"
+ shift 1
+ local res=
+ local word=
+
+ if ! res="$(scan_word false)"; then
+ exit 1
+ fi
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ word="${prev_c}${res%%${RS}*}"
+
+ # We must advance lineno because scan_word() was run in a subshell.
+ lineno=$((${lineno} + ${ln_off}))
+ tok="T_WORD${US}${word}"
+
+ if ${here_awaiting_end}; then
+ here_queue="${here_queue}${US}${word}"
+ case "${word}" in
+ *\\*|*'"'*|*"'"*)
+ here_queue="${here_queue}${US}true"
+ ;;
+ *)
+ here_queue="${here_queue}${US}false"
+ ;;
+ esac
+ here_queue="${here_queue}${RS}"
+ here_awaiting_end=false
+ fi
+}
+
+#
+# Token scanning
+#
+
+scan_word()
+{
+ local in_param="${1}"
+ shift 1
+ local res=
+ local word=
+ local quoted=
+ local lines=
+ local wordexp=
+
+ word=''
+ quoted=false
+ lines=0
+ while :; do
+ dbg "parsing word char '$c' at lineno $lineno"
+ case "${c}" in
+ '')
+ break
+ ;;
+ "${LF}")
+ if ! ${in_param} && ! ${quoted}; then
+ break
+ fi
+ lineno=$((${lineno} + 1))
+ lines=$((${lines} + 1))
+ word="${word}${c}"
+ ;;
+ ' '|"${HT}"|'&'|'|'|';'|'('|')'|'<'|'>')
+ if ! ${in_param} && ! ${quoted}; then
+ break
+ fi
+ word="${word}${c}"
+ ;;
+ '$')
+ case "${here_queue}" in *"${RS}"*)
+ if ${here_awaiting_end}; then
+ synerr '%s %s %s %s' \
+ 'Word expansions' \
+ 'not supported in' \
+ 'here-document' \
+ 'delimiters'
+ fi
+ esac
+ lgetc
+ if ! res=$(scan_wordexp); then
+ exit 1
+ fi
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ wordexp="${res%%${RS}*}"
+ # We must advance lineno because scan_wordexp()
+ # was run in a subshell.
+ lineno=$((${lineno} + ${ln_off}))
+ word="${word}${wordexp}"
+ # scan_wordexp() leaves behind an unused
+ # character, so we should skip the lgetc() call
+ # below.
+ continue
+ ;;
+ '`')
+ synerr 'Backquoted (old-style) %s' \
+ 'command substitution not supported'
+ break
+ ;;
+ \\)
+ word="${word}${c}"
+ lgetc
+ case "${c}" in '')
+ # Bash, ksh93, mksh, and zsh ignore a
+ # backslash at the end of a file, but
+ # dash and BusyBox ash include it in the
+ # word. To help with script
+ # portability, we'll throw an error
+ # (which is a reasonable thing to do
+ # anyway).
+ synerr 'Unexpected end of file %s' \
+ 'after "\"'
+ ;;
+ esac
+ word="${word}${c}"
+ ;;
+ \')
+ word="${word}${c}"
+ while :; do
+ lgetc
+ word="${word}${c}"
+ case "${c}" in
+ '')
+ synerr '%s %s' \
+ 'Unterminated' \
+ 'quoted string'
+ ;;
+ \')
+ break
+ ;;
+ esac
+ done
+ ;;
+ '"')
+ word="${word}${c}"
+ if ${quoted}; then
+ quoted=false
+ else
+ quoted=true
+ fi
+ ;;
+ '}')
+ if ${in_param} && ! ${quoted}; then
+ break
+ fi
+ word="${word}${c}"
+ ;;
+ *)
+ word="${word}${c}"
+ ;;
+ esac
+ lgetc
+ done
+
+ if ${quoted}; then
+ synerr 'Unterminated quoted string'
+ fi
+
+ printf "%d${RS}%c${RS}%s" ${lines} "${c}" "${word}"
+}
+
+scan_wordexp()
+{
+ local res=
+ local toks=
+ local param=
+
+ wordexp=''
+ ln_off=0
+ case "${c}" in
+ '{')
+ # Parameter expansion brace
+ scan_wordexp_param_brace
+ ;;
+ '(')
+ # Arithmetic expansion or command substitution
+ lgetc
+ case "${c}" in
+ '(')
+ # Arithmetic expansion
+ scan_wordexp_arith
+ ;;
+ *)
+ # Command substitution
+ if ! res="$(run_sublexer "sub${fname}" \
+ ${lineno} "${start}" \
+ "${c}")"; then
+ exit 1
+ fi
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ toks="${res%%${RS}*}"
+ lineno=${ln_off}
+ wordexp="\$(${SOH}C${STX}${toks}"
+ wordexp="${wordexp}${ETX})"
+ # ")" is recognized in run_sublexer().
+ ;;
+ esac
+ ;;
+ [@*#?$!A-Za-z0-9_-])
+ if ! res="$(scan_param)"; then
+ exit 1
+ fi
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ param="${res%%${RS}*}"
+ lineno=$((${lineno} + ${ln_off}))
+ wordexp="\$${param}"
+ ;;
+ esac
+
+ printf "%d${RS}%c${RS}%s" ${ln_off} "${c}" "${wordexp}"
+ return 0
+}
+
+scan_wordexp_param_brace()
+{
+ local mod=
+ local res=
+ local param=
+ local word=
+
+ mod=true
+
+ lgetc
+ case "${c}" in
+ '#')
+ lgetc
+ case "${c}" in
+ [@*#?$!A-Za-z0-9_-])
+ # String length expansion
+ if ! res="$(scan_param)"; then
+ exit 1
+ fi
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ param="${res%%${RS}*}"
+ lineno=$((${lineno} + ${ln_off}))
+ # Disable modifications.
+ mod=false
+ ;;
+ *)
+ # Special parameter "#"
+ param='#'
+ ;;
+ esac
+ ;;
+ *)
+ if ! res="$(scan_param)"; then
+ exit 1
+ fi
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ param="${res%%${RS}*}"
+ lineno=$((${lineno} + ${ln_off}))
+ ;;
+ esac
+ wordexp="\${${param}"
+
+ # If modifications are allowed
+ if ${mod}; then
+ # Check for modifications.
+ mod=false
+ case "${c}" in
+ ':')
+ mod=true
+ wordexp="${wordexp}${c}"
+ lgetc
+ case "${c}" in '-'|'='|'?'|'+')
+ wordexp="${wordexp}${c}"
+ lgetc
+ ;;
+ esac
+ ;;
+ '-'|'='|'?'|'+')
+ mod=true
+ wordexp="${wordexp}${c}"
+ lgetc
+ ;;
+ '%')
+ mod=true
+ wordexp="${wordexp}${c}"
+ lgetc
+ case "${c}" in '%')
+ wordexp="${wordexp}${c}"
+ lgetc
+ ;;
+ esac
+ ;;
+ '#')
+ mod=true
+ wordexp="${wordexp}${c}"
+ lgetc
+ case "${c}" in '#')
+ wordexp="${wordexp}${c}"
+ lgetc
+ ;;
+ esac
+ ;;
+ esac
+ fi
+
+ # If a modification was found
+ if ${mod}; then
+ # Get word.
+ if ! res="$(scan_word true)"; then
+ exit 1
+ fi
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ word="${res%%${RS}*}"
+ # We must advance lineno because scan_word() was run in a
+ # subshell.
+ lineno=$((${lineno} + ${ln_off}))
+ wordexp="${wordexp}${word}"
+ dbg "param mod word: '$word'"
+ fi
+
+ # Check for right brace.
+ case "${c}" in
+ '}')
+ wordexp="${wordexp}${c}"
+ lgetc
+ ;;
+ *)
+ synerr 'Missing "}"'
+ ;;
+ esac
+
+ return 0
+}
+
+scan_param()
+{
+ local param=
+
+ param=''
+ case "${c}" in
+ [@*#?$!0-])
+ # Special parameter
+ param="${c}"
+ lgetc
+ ;;
+ [1-9])
+ # Positional parameter
+ param="${param}${c}"
+ lgetc
+ while :; do
+ case "${c}" in [!0-9])
+ break
+ ;;
+ esac
+ param="${param}${c}"
+ lgetc
+ done
+ ;;
+ [A-Za-z_])
+ # Parameter name
+ param="${param}${c}"
+ lgetc
+ while :; do
+ case "${c}" in [!A-Za-z0-9_])
+ break
+ ;;
+ esac
+ param="${param}${c}"
+ lgetc
+ done
+ ;;
+ *)
+ synerr 'Bad parameter name'
+ ;;
+ esac
+
+ printf "%d${RS}%c${RS}%s" 0 "${c}" "${param}"
+ return 0
+}
+
+scan_wordexp_arith()
+{
+ local arith=
+ local paren_lvl=
+ local res=
+ local sub_wordexp=
+
+ arith=''
+ paren_lvl=0
+ while :; do
+ lgetc
+ case "${c}" in
+ '')
+ synerr 'end of file unexpected (%s)' \
+ 'expecting "))"'
+ ;;
+ '(')
+ arith="${arith}${c}"
+ paren_lvl=$((${paren_lvl} + 1))
+ ;;
+ ')')
+ if [ ${paren_lvl} -eq 0 ]; then
+ lgetc
+ case "${c}" in ')')
+ wordexp="\$((${arith}))"
+ lgetc
+ return 0
+ ;;
+ esac
+ synerr 'Arithmetic expansion: ")" %s' \
+ 'unexpected'
+ fi
+ arith="${arith}${c}"
+ paren_lvl=$((${paren_lvl} - 1))
+ ;;
+ '$')
+ lgetc
+ if ! res=$(scan_wordexp); then
+ exit 1
+ fi
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ sub_wordexp="${res%%${RS}*}"
+ # We must advance lineno because scan_wordexp()
+ # was run in a subshell.
+ lineno=$((${lineno} + ${ln_off}))
+ arith="${arith}${sub_wordexp}"
+ ;;
+ *)
+ arith="${arith}${c}"
+ ;;
+ esac
+ done
+}
+
+run_sublexer()
+{
+ local fn="${1}"
+ local ln="${2}"
+ local st="${3}"
+ local ch="${4}"
+ shift 4
+
+ # Initialize global variables.
+ fname="${fn}"
+ lineno=${ln}
+ start="${st}"
+ here_queue=''
+ here_awaiting_end=false
+ here_awaiting_word=false
+ tokens=''
+
+ c="${ch}"
+ next
+
+ #dbg=true
+ # If this returns (does not exit), there are no errors.
+ ${start}
+ case "${tok%${US}*}" in
+ T_RPAREN)
+ ;;
+ *)
+ synerr 'Missing ")"'
+ ;;
+ esac
+
+ printf "%d${RS}%c${RS}%s" ${lineno} "${c}" "${tokens}"
+ return 0
+}
+
+#
+# Interface
+#
+
+run_lexer()
+{
+ local fn="${1}"
+ local st="${2}"
+ shift 2
+
+ # Initialize global variables.
+ fname="${fn}"
+ lineno=1
+ start="${st}"
+ here_queue=''
+ here_awaiting_end=false
+ here_awaiting_word=false
+ tokens=''
+
+ # Read the first character and recognize the first token.
+ lgetc
+ next
+
+ if ! ${start}; then
+ # Unexpected EOF
+ synexp ''
+ fi
+ if ! accept T_EOF; then
+ synexp ''
+ fi
+
+ # Return the tokens.
+ printf '%s' "${tokens}"
+
+ return 0
+}
+
+accept()
+{
+ local t="${1}"
+ shift 1
+ local rw=
+
+ dbg "looking for $t, current tok ${tok%%${US}*}"
+ case "${t}" in
+ T_IF|T_THEN|T_ELSE|T_ELIF|T_FI|\
+ T_DO|T_DONE|T_CASE|T_ESAC|T_WHILE|T_UNTIL|\
+ T_FOR|T_LBRACE|T_RBRACE|T_BANG|T_IN)
+ dbg "looking for reserved word $t, have '$tok'"
+ if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
+ # Reserved words are recognized as literal
+ # T_WORDs.
+ if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
+ return 1
+ fi
+ # T_WORD data unit must match reserved word
+ # exactly.
+ if ! [ "x${tok#T_WORD${US}}" = \
+ "x$(toktext "${t}")" ]; then
+ return 1
+ fi
+ # If the token matches the reserved word,
+ # replace it with the reserved word token.
+ tok="${t}"
+ fi
+ ;;
+ T_NAME)
+ # Names are recognized as literal T_WORDs.
+ if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
+ return 1
+ fi
+ # Validate name.
+ case "${tok%%${US}*}" in
+ [A-Za-z_][0-9A-Za-z_]*)
+ ;;
+ *)
+ return 1
+ ;;
+ esac
+ tok="T_NAME${US}${tok#T_WORD${US}}"
+ ;;
+ T_FNAME)
+ # Function names are recognized as literal T_WORDs.
+ if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
+ return 1
+ fi
+ # Validate name.
+ case "${tok%%${US}*}" in
+ [A-Za-z_][0-9A-Za-z_]*)
+ ;;
+ *)
+ return 1
+ ;;
+ esac
+ # Verify that the function name doesn't match any
+ # reserved words.
+ for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \
+ T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \
+ T_LBRACE T_RBRACE T_BANG T_IN; do
+ if [ "x${tok#T_WORD${US}}" = \
+ "x$(toktext "${rw}")" ]; then
+ tok="${rw}"
+ return 1
+ fi
+ done
+ tok="T_FNAME${US}${tok#T_WORD${US}}"
+ ;;
+ T_CMDNAME)
+ # The first word of a simple command is to be checked
+ # for reserved words.
+ if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
+ return 1
+ fi
+ # Verify that the word doesn't match any reserved words.
+ for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \
+ T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \
+ T_LBRACE T_RBRACE T_BANG T_IN; do
+ if [ "x${tok#T_WORD${US}}" = \
+ "x$(toktext "${rw}")" ]; then
+ tok="${rw}"
+ return 1
+ fi
+ done
+ ;;
+ *)
+ if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
+ return 1
+ fi
+ ;;
+ esac
+
+ dbg "accept $t"
+ tokens="${tokens}${tok}${RS}"
+ next
+ return 0
+}
+
+expect()
+{
+ local t="${1}"
+ shift 1
+
+ if accept "${t}"; then
+ return 0
+ else
+ synexp "${t}"
+ fi
+}
diff --git a/eshtrans/frontend/main.esh b/eshtrans/frontend/main.esh
new file mode 100644
index 0000000..b9f93a6
--- /dev/null
+++ b/eshtrans/frontend/main.esh
@@ -0,0 +1,30 @@
+# Eggshell frontend interface
+#
+# Copyright (C) 2016 Patrick "P. J." McDermott
+#
+# This file is part of the Eggshell Compiler.
+#
+# The Eggshell Compiler is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# The Eggshell Compiler is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the Eggshell Compiler. If not, see
+# <http://www.gnu.org/licenses/>.
+
+esh_parse()
+{
+ local fn="${1}"
+ shift 1
+
+ if run_lexer "${fn}" complete_command; then
+ return 0
+ fi
+ return 1
+}
diff --git a/eshtrans/frontend/parser.esh b/eshtrans/frontend/parser.esh
new file mode 100644
index 0000000..d49fa77
--- /dev/null
+++ b/eshtrans/frontend/parser.esh
@@ -0,0 +1,591 @@
+# Eggshell parser
+#
+# Copyright (C) 2016 Patrick "P. J." McDermott
+#
+# This file is part of the Eggshell Compiler.
+#
+# The Eggshell Compiler is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# The Eggshell Compiler is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the Eggshell Compiler. If not, see
+# <http://www.gnu.org/licenses/>.
+
+ptrace=false
+
+#
+# Function tracing
+#
+
+ptrace_begn()
+{
+ local fn="${1}"
+ shift 1
+
+ if ${ptrace}; then
+ printf 'TRACE: BEGN %s()\n' "${fn}" >&2
+ fi
+}
+
+ptrace_pass()
+{
+ local fn="${1}"
+ shift 1
+
+ if ${ptrace}; then
+ printf 'TRACE: PASS %s()\n' "${fn}" >&2
+ fi
+}
+
+ptrace_fail()
+{
+ local fn="${1}"
+ shift 1
+
+ if ${ptrace}; then
+ printf 'TRACE: FAIL %s()\n' "${fn}" >&2
+ fi
+}
+
+#
+# Parser
+#
+
+complete_command()
+{
+ if list; then
+ separator
+ return 0
+ fi
+ return 1
+}
+
+list()
+{
+ ptrace_begn list
+ if and_or; then
+ while separator && and_or; do
+ :
+ done
+ ptrace_pass list
+ return 0
+ fi
+ ptrace_fail list
+ return 1
+}
+
+and_or()
+{
+ ptrace_begn and_or
+ if pipeline; then
+ while accept T_AND_IF || accept T_OR_IF; do
+ if ! linebreak || ! pipeline; then
+ ptrace_fail and_or
+ return 1
+ fi
+ done
+ ptrace_pass and_or
+ return 0
+ fi
+ ptrace_fail and_or
+ return 1
+}
+
+pipeline()
+{
+ ptrace_begn pipeline
+ accept T_BANG
+ if pipe_sequence; then
+ ptrace_pass pipeline
+ return 0
+ fi
+ ptrace_fail pipeline
+ return 1
+}
+
+pipe_sequence()
+{
+ ptrace_begn pipe_sequence
+ if command; then
+ while accept T_PIPE; do
+ if ! linebreak || ! command; then
+ ptrace_fail pipe_sequence
+ return 1
+ fi
+ done
+ ptrace_pass pipe_sequence
+ return 0
+ fi
+ ptrace_fail pipe_sequence
+ return 1
+}
+
+command()
+{
+ ptrace_begn command
+ if simple_command; then
+ ptrace_pass command
+ return 0
+ elif compound_command; then
+ redirect_list
+ ptrace_pass command
+ return 0
+ fi
+ ptrace_fail command
+ return 1
+}
+
+compound_command()
+{
+ ptrace_begn compound_command
+ if brace_group; then
+ ptrace_pass compound_command
+ return 0
+ elif subshell; then
+ ptrace_pass compound_command
+ return 0
+ elif for_clause; then
+ ptrace_pass compound_command
+ return 0
+ elif case_clause; then
+ ptrace_pass compound_command
+ return 0
+ elif if_clause; then
+ ptrace_pass compound_command
+ return 0
+ elif while_clause; then
+ ptrace_pass compound_command
+ return 0
+ elif until_clause; then
+ ptrace_pass compound_command
+ return 0
+ fi
+ ptrace_fail compound_command
+ return 1
+}
+
+subshell()
+{
+ ptrace_begn subshell
+ if accept T_LPAREN && compound_list && expect T_RPAREN; then
+ ptrace_pass subshell
+ return 0
+ fi
+ ptrace_fail subshell
+ return 1
+}
+
+compound_list()
+{
+ ptrace_begn compound_list
+ newline_list
+ if term; then
+ separator
+ ptrace_pass compound_list
+ return 0
+ fi
+ ptrace_fail compound_list
+ return 1
+}
+
+term()
+{
+ ptrace_begn term
+ if and_or; then
+ while separator; do
+ and_or
+ done
+ ptrace_pass term
+ return 0
+ fi
+ ptrace_fail term
+ return 1
+}
+
+for_clause()
+{
+ ptrace_begn for_clause
+ if accept T_FOR; then
+ if expect T_NAME && linebreak; then
+ if accept T_IN; then
+ wordlist
+ if ! sequential_sep; then
+ ptrace_fail for_clause
+ return 1
+ fi
+ fi
+ if do_group; then
+ ptrace_pass for_clause
+ return 0
+ fi
+ fi
+ fi
+ ptrace_fail for_clause
+ return 1
+}
+
+wordlist()
+{
+ ptrace_begn wordlist
+ if accept T_WORD; then
+ while accept T_WORD; do :; done
+ ptrace_pass wordlist
+ return 0
+ fi
+ ptrace_fail wordlist
+ return 1
+}
+
+case_clause()
+{
+ if accept T_CASE; then
+ if expect T_WORD && linebreak && expect T_IN && linebreak; then
+ case_list || case_list_ns
+ expect T_ESAC
+ return 0
+ fi
+ fi
+ return 1
+}
+
+case_list_ns()
+{
+ if case_list && case_item_ns; then
+ return 0
+ elif case_item_ns; then
+ return 0
+ fi
+ return 1
+}
+
+case_list()
+{
+ if case_item; then
+ while case_item; do
+ :
+ done
+ return 0
+ fi
+ return 1
+}
+
+case_item_ns()
+{
+ accept T_LPAREN
+ if pattern && expect RPAREN; then
+ compound_list
+ if linebreak; then
+ return 0
+ fi
+ fi
+ return 1
+}
+
+case_item()
+{
+ accept T_LPAREN
+ if pattern && expect T_RPAREN; then
+ if compound_list || linebreak; then
+ if expect T_DSEMI && linebreak; then
+ return 0
+ fi
+ fi
+ fi
+ return 1
+}
+
+pattern()
+{
+ if accept T_CMDNAME; then
+ while accept T_PIPE; do
+ expect T_WORD
+ done
+ return 0
+ fi
+ return 1
+}
+
+if_clause()
+{
+ if accept T_IF; then
+ if compound_list && expect T_THEN && compound_list; then
+ else_part
+ expect T_FI
+ return 0
+ fi
+ fi
+ return 1
+}
+
+else_part()
+{
+ while accept T_ELIF; do
+ if compound_list && expect T_THEN && compound_list; then
+ continue
+ fi
+ return 1
+ done
+ if accept T_ELSE; then
+ if compound_list; then
+ return 0
+ fi
+ fi
+ return 1
+}
+
+while_clause()
+{
+ if accept T_WHILE; then
+ if compound_list && do_group; then
+ return 0
+ fi
+ fi
+ return 1
+}
+
+until_clause()
+{
+ if accept T_UNTIL; then
+ if compound_list && do_group; then
+ return 0
+ fi
+ fi
+ return 1
+}
+
+function_body()
+{
+ ptrace_begn function_body
+ if compound_command; then
+ redirect_list
+ ptrace_pass function_body
+ return 0
+ fi
+ ptrace_fail function_body
+ return 1
+}
+
+brace_group()
+{
+ ptrace_begn brace_group
+ if accept T_LBRACE && compound_list && expect T_RBRACE; then
+ ptrace_pass brace_group
+ return 0
+ fi
+ ptrace_fail brace_group
+ return 1
+}
+
+do_group()
+{
+ ptrace_begn do_group
+ if accept T_DO && compound_list && expect T_DONE; then
+ ptrace_pass do_group
+ return 0
+ fi
+ ptrace_fail do_group
+ return 1
+}
+
+simple_command()
+{
+ ptrace_begn simple_command
+ if cmd_prefix; then
+ if cmd_word; then
+ cmd_suffix
+ fi
+ ptrace_pass simple_command
+ return 0
+ elif accept T_FNAME; then
+ if accept T_LPAREN; then
+ expect T_RPAREN
+ if linebreak && function_body; then
+ ptrace_pass simple_command
+ return 0
+ fi
+ else
+ cmd_suffix
+ ptrace_pass simple_command
+ return 0
+ fi
+ elif cmd_name; then
+ cmd_suffix
+ ptrace_pass simple_command
+ return 0
+ fi
+ ptrace_fail simple_command
+ return 1
+}
+
+cmd_name()
+{
+ ptrace_begn cmd_name
+ # TODO: Assignment
+ if accept T_CMDNAME; then
+ ptrace_pass cmd_name
+ return 0
+ fi
+ ptrace_fail cmd_name
+ return 1
+}
+
+cmd_word()
+{
+ ptrace_begn cmd_word
+ # TODO: Assignment
+ if accept T_WORD; then
+ ptrace_pass cmd_word
+ return 0
+ fi
+ ptrace_fail cmd_word
+ return 1
+}
+
+cmd_prefix()
+{
+ ptrace_begn cmd_prefix
+ if io_redirect || accept T_ASSIGNMENT_WORD; then
+ while io_redirect || accept T_ASSIGNMENT_WORD; do
+ :
+ done
+ ptrace_pass cmd_prefix
+ return 0
+ fi
+ ptrace_fail cmd_prefix
+ return 1
+}
+
+cmd_suffix()
+{
+ ptrace_begn cmd_suffix
+ if io_redirect || accept T_WORD; then
+ while io_redirect || accept T_WORD; do
+ :
+ done
+ ptrace_pass cmd_suffix
+ return 0
+ fi
+ ptrace_fail cmd_suffix
+ return 1
+}
+
+redirect_list()
+{
+ ptrace_begn redirect_list
+ if io_redirect; then
+ while io_redirect; do
+ :
+ done
+ ptrace_pass redirect_list
+ return 0
+ fi
+ ptrace_fail redirect_list
+ return 1
+}
+
+io_redirect()
+{
+ ptrace_begn io_redirect
+ if io_file || io_here; then
+ ptrace_pass io_redirect
+ return 0
+ fi
+ ptrace_fail io_redirect
+ return 1
+}
+
+io_file()
+{
+ if accept T_LESS || accept T_LESSAND || accept T_GREAT || \
+ accept T_GREATAND || accept T_DGREAT || \
+ accept T_LESSGREAT || accept T_CLOBBER; then
+ if filename; then
+ return 0
+ fi
+ fi
+ return 1
+}
+
+filename()
+{
+ if accept T_WORD; then
+ return 0
+ fi
+ return 1
+}
+
+io_here()
+{
+ if accept T_DLESS || accept T_DLESSDASH; then
+ if here_end; then
+ return 0
+ fi
+ fi
+ return 1
+}
+
+here_end()
+{
+ if accept T_WORD; then
+ return 0
+ fi
+ return 1
+}
+
+newline_list()
+{
+ if accept T_NEWLINE; then
+ while accept T_NEWLINE; do
+ :
+ done
+ return 0
+ fi
+ return 1
+}
+
+linebreak()
+{
+ newline_list
+ return 0
+}
+
+separator_op()
+{
+ if accept T_AND || accept T_SEMI; then
+ return 0
+ fi
+ return 1
+}
+
+separator()
+{
+ if separator_op && linebreak; then
+ return 0
+ elif newline_list; then
+ return 0
+ fi
+ return 1
+}
+
+sequential_sep()
+{
+ ptrace_begn sequential_sep
+ if accept T_SEMI; then
+ if linebreak; then
+ ptrace_pass sequential_sep
+ return 0
+ fi
+ elif newline_list; then
+ ptrace_pass sequential_sep
+ return 0
+ fi
+ ptrace_fail sequential_sep
+ return 1
+}