summaryrefslogtreecommitdiffstats
path: root/parsing
diff options
context:
space:
mode:
authorP. J. McDermott <pj@pehjota.net>2016-02-21 21:13:33 (EST)
committer P. J. McDermott <pj@pehjota.net>2016-02-21 21:13:33 (EST)
commit8a9a6865954ade85d4a55f955829ae08941c31b8 (patch)
tree966d1c1a8bc4c6f7ed0671b8c53f1272be77dc4f /parsing
parent5649a9aa1ce56c0cfdcab088983f2d3a4cb32a4c (diff)
downloadeggshell-8a9a6865954ade85d4a55f955829ae08941c31b8.zip
eggshell-8a9a6865954ade85d4a55f955829ae08941c31b8.tar.gz
eggshell-8a9a6865954ade85d4a55f955829ae08941c31b8.tar.bz2
Remove old demo parsing code
Diffstat (limited to 'parsing')
-rw-r--r--parsing/codegen.sh85
-rw-r--r--parsing/lexer.sh958
-rw-r--r--parsing/parse.sh676
-rw-r--r--parsing/tokens.sh119
4 files changed, 0 insertions, 1838 deletions
diff --git a/parsing/codegen.sh b/parsing/codegen.sh
deleted file mode 100644
index 8d9d2fd..0000000
--- a/parsing/codegen.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-sc=
-
-sgetc()
-{
- sc="$(dd bs=1 count=1 2>/dev/null; printf '.')"
- sc="${sc%.}"
-}
-
-codegen_sub()
-{
- local array="${1}"
- shift 1
-
- IFS="${RS}"
- for t in ${array}; do
- toktext "${t}"
- case "${t%${US}*}" in
- T_NEWLINE)
- ;;
- *)
- printf ' '
- ;;
- esac
- done
- unset IFS
-}
-
-# The token stack is encoded in a string in the following grammar:
-# Terminal symbols:
-# TOKEN
-# Production rules:
-# stack = tokens [ '<SOH>' type '<STX>' stack '<ETX>' [ tokens ] ] ;
-# tokens = TOKEN { '<RS>' TOKEN } ;
-# type = 'C' ;
-# We need to recurse through this stack to get to all the tokens.
-# Each element in the stack (an array of tokens) gets run through the codegen to
-# become text that is inserted into the array below.
-parse_stack()
-{
- local array=
-
- array=''
- while :; do
- sgetc
- case "${sc}" in
- '')
- # EOF
- break
- ;;
- "${SOH}")
- # New stack element
- sgetc
- case "${sc}" in
- 'C')
- # Command substitution
- sgetc # STX
- array="${array}$(parse_stack)."
- array="${array%.}"
- ;;
- esac
- ;;
- "${ETX}")
- # End of stack element
- break
- ;;
- *)
- # Token character
- array="${array}${sc}"
- ;;
- esac
- done
- codegen_sub "${array}"
-}
-
-codegen()
-{
- local toks="${1}"
- shift 1
-
- if printf '%s' "${toks}" | parse_stack; then
- return 0
- else
- return 1
- fi
-}
diff --git a/parsing/lexer.sh b/parsing/lexer.sh
deleted file mode 100644
index 886e7f8..0000000
--- a/parsing/lexer.sh
+++ /dev/null
@@ -1,958 +0,0 @@
-fname=
-lineno=
-ln_off=
-start=
-c=
-wordexp=
-here_queue=
-here_awaiting_end=
-here_awaiting_word=
-tok=
-tokens=
-
-#
-# Error handling (used by scanning and interface functions)
-#
-
-error()
-{
- local fmt="${1}"
- shift 1
-
- case "${fname}" in
- '-')
- printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2
- ;;
- *)
- printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2
- ;;
- esac
-
- # The parser and lexer run in a subshell, so this just returns up to the
- # caller like an exception.
- exit 1
-}
-
-synexp()
-{
- local t="${1}"
- shift 1
-
- if [ "x${t}" = 'x' ]; then
- synerr '%s unexpected' "$(tokname "${tok}")"
- else
- synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \
- "$(tokname "${t}")"
- fi
-}
-
-synerr()
-{
- local fmt="${1}"
- shift 1
-
- error "Syntax error: ${fmt}" "${@}"
-}
-
-#
-# Input reading
-#
-
-lgetc()
-{
- c="$(dd bs=1 count=1 2>/dev/null; printf '.')"
- c="${c%.}"
-}
-
-#
-# Token recognition
-#
-
-next()
-{
- if ${here_awaiting_word}; then
- next_here
- return
- fi
- while :; do
- dbg "parsing char '$c' at lineno $lineno"
- case "${c}" in
- '')
- lgetc
- tok=T_EOF
- return
- ;;
- "${LF}")
- if ${here_awaiting_end}; then
- synexp ''
- else
- case "${here_queue}" in *"${RS}"*)
- here_awaiting_end=false
- here_awaiting_word=true
- ;;
- esac
- fi
- lgetc
- lineno=$((${lineno} + 1))
- tok=T_NEWLINE
- return
- ;;
- ' '|"${HT}")
- lgetc
- continue
- ;;
- \\)
- lgetc
- case "${c}" in "${LF}")
- lineno=$((${lineno} + 1))
- lgetc
- continue
- ;;
- esac
- next_word \\
- return
- ;;
- '#')
- lgetc
- while :; do
- case "${c}" in "${LF}"|'')
- break
- ;;
- esac
- lgetc
- done
- continue
- ;;
- '&')
- lgetc
- case "${c}" in '&')
- lgetc
- tok=T_AND_IF
- return
- ;;
- esac
- tok=T_AND
- return
- ;;
- '|')
- lgetc
- case "${c}" in '|')
- lgetc
- tok=T_OR_IF
- return
- ;;
- esac
- tok=T_PIPE
- return
- ;;
- ';')
- lgetc
- case "${c}" in ';')
- lgetc
- tok=T_DSEMI
- return
- ;;
- esac
- dbg T_SEMI
- tok=T_SEMI
- return
- ;;
- '(')
- lgetc
- tok=T_LPAREN
- return
- ;;
- ')')
- lgetc
- tok=T_RPAREN
- return
- ;;
- '<'|'>')
- next_io
- return
- ;;
- *)
- next_word ''
- return
- ;;
- esac
- lgetc
- done
-}
-
-next_here()
-{
- local here=
- local here_strip_tabs=
- local here_end=
- local here_escaped=
- local line=
- local word=
- local res=
- local wordexp=
-
- # Dequeue the here-document.
- here="${here_queue%%${RS}*}"
- here_strip_tabs="${here%%${US}*}"
- here_end="${here%${US}*}"
- here_end="$(printf '%s' "${here_end#*${US}}" | \
- sed 's/\\//g; s/"//g; s/'\''//g;')" # Stupid Vim: ')"
- here_escaped="${here##*${US}}"
- here_queue="${here_queue#*${RS}}"
- here_awaiting_word=false
-
- line=''
- word=''
- while :; do
- case "${c}" in
- '')
- # Bash throws a warning when EOF occurs in a
- # here document. mksh throws an error. dash,
- # BusyBox ash, ksh93, and zsh accept EOF as a
- # delimiter. We aim for the lowest common
- # denominator, so throw an error like mksh does.
- synerr 'Here-document "%s" unclosed' \
- "${here_end}"
- ;;
- "${LF}")
- word="${word}${line}"
- case "${line}" in "${here_end}")
- tok="T_WORD${US}${word}"
- return
- ;;
- esac
- word="${word}${c}"
- line=''
- ;;
- "${HT}")
- if ${here_strip_tabs}; then
- case "${line}" in
- '')
- ;;
- *)
- line="${line}${c}"
- ;;
- esac
- else
- line="${line}${c}"
- fi
- ;;
- '$')
- if ! ${here_escaped}; then
- lgetc
- if ! res="$(scan_wordexp)"; then
- exit 1
- fi
- ln_off=${res%%${RS}*}
- res="${res#*${RS}}"
- c="${res%%${RS}*}"
- res="${res#*${RS}}"
- wordexp="${res%%${RS}*}"
- lineno=$((${lineno} + ${ln_off}))
- line="${line}${wordexp}"
- continue
- else
- line="${line}${c}"
- fi
- ;;
- *)
- line="${line}${c}"
- ;;
- esac
- lgetc
- done
-}
-
-next_io()
-{
- case "${c}" in
- '<')
- lgetc
- case "${c}" in
- '<')
- lgetc
- case "${c}" in '-')
- lgetc
- tok=T_DLESSDASH
- here_queue="${here_queue}true"
- here_awaiting_end=true
- here_awaiting_word=false
- break
- ;;
- esac
- tok=T_DLESS
- here_queue="${here_queue}false"
- here_awaiting_end=true
- here_awaiting_word=false
- break
- ;;
- '&')
- lgetc
- tok=T_LESSAND
- break
- ;;
- '>')
- lgetc
- tok=T_LESSGREAT
- break
- ;;
- esac
- tok=T_LESS
- break
- ;;
- '>')
- lgetc
- case "${c}" in
- '>')
- lgetc
- tok=T_DGREAT
- break
- ;;
- '&')
- lgetc
- tok=T_GREATAND
- break
- ;;
- '|')
- lgetc
- tok=T_CLOBBER
- break
- ;;
- esac
- tok=T_GREAT
- break
- ;;
- esac
-}
-
-next_word()
-{
- local prev_c="${1}"
- shift 1
- local res=
- local word=
-
- if ! res="$(scan_word false)"; then
- exit 1
- fi
- ln_off=${res%%${RS}*}
- res="${res#*${RS}}"
- c="${res%%${RS}*}"
- res="${res#*${RS}}"
- word="${prev_c}${res%%${RS}*}"
-
- # We must advance lineno because scan_word() was run in a subshell.
- lineno=$((${lineno} + ${ln_off}))
- tok="T_WORD${US}${word}"
-
- if ${here_awaiting_end}; then
- here_queue="${here_queue}${US}${word}"
- case "${word}" in
- *\\*|*'"'*|*"'"*)
- here_queue="${here_queue}${US}true"
- ;;
- *)
- here_queue="${here_queue}${US}false"
- ;;
- esac
- here_queue="${here_queue}${RS}"
- here_awaiting_end=false
- fi
-}
-
-#
-# Token scanning
-#
-
-scan_word()
-{
- local in_param="${1}"
- local res=
- local word=
- local quoted=
- local lines=
- local wordexp=
-
- word=''
- quoted=false
- lines=0
- while :; do
- dbg "parsing word char '$c' at lineno $lineno"
- case "${c}" in
- '')
- break
- ;;
- "${LF}")
- if ! ${in_param} && ! ${quoted}; then
- break
- fi
- lineno=$((${lineno} + 1))
- lines=$((${lines} + 1))
- word="${word}${c}"
- ;;
- ' '|"${HT}"|'&'|'|'|';'|'('|')'|'<'|'>')
- if ! ${in_param} && ! ${quoted}; then
- break
- fi
- word="${word}${c}"
- ;;
- '$')
- case "${here_queue}" in *"${RS}"*)
- if ${here_awaiting_end}; then
- synerr '%s %s %s %s' \
- 'Word expansions' \
- 'not supported in' \
- 'here-document' \
- 'delimiters'
- fi
- esac
- lgetc
- if ! res=$(scan_wordexp); then
- exit 1
- fi
- ln_off=${res%%${RS}*}
- res="${res#*${RS}}"
- c="${res%%${RS}*}"
- res="${res#*${RS}}"
- wordexp="${res%%${RS}*}"
- # We must advance lineno because scan_wordexp()
- # was run in a subshell.
- lineno=$((${lineno} + ${ln_off}))
- word="${word}${wordexp}"
- # scan_wordexp() leaves behind an unused
- # character, so we should skip the lgetc() call
- # below.
- continue
- ;;
- '`')
- synerr 'Backquoted (old-style) %s' \
- 'command substitution not supported'
- break
- ;;
- \\)
- word="${word}${c}"
- lgetc
- case "${c}" in '')
- # Bash, ksh93, mksh, and zsh ignore a
- # backslash at the end of a file, but
- # dash and BusyBox ash include it in the
- # word. To help with script
- # portability, we'll throw an error
- # (which is a reasonable thing to do
- # anyway).
- synerr 'Unexpected end of file %s' \
- 'after "\"'
- ;;
- esac
- word="${word}${c}"
- ;;
- \')
- word="${word}${c}"
- while :; do
- lgetc
- word="${word}${c}"
- case "${c}" in
- '')
- synerr '%s %s' \
- 'Unterminated' \
- 'quoted string'
- ;;
- \')
- break
- ;;
- esac
- done
- ;;
- '"')
- word="${word}${c}"
- if ${quoted}; then
- quoted=false
- else
- quoted=true
- fi
- ;;
- '}')
- if ${in_param} && ! ${quoted}; then
- break
- fi
- word="${word}${c}"
- ;;
- *)
- word="${word}${c}"
- ;;
- esac
- lgetc
- done
-
- if ${quoted}; then
- synerr 'Unterminated quoted string'
- fi
-
- printf "%d${RS}%c${RS}%s" ${lines} "${c}" "${word}"
-}
-
-scan_wordexp()
-{
- local res=
- local toks=
- local param=
-
- wordexp=''
- ln_off=0
- case "${c}" in
- '{')
- # Parameter expansion brace
- scan_wordexp_param_brace
- ;;
- '(')
- # Arithmetic expansion or command substitution
- lgetc
- case "${c}" in
- '(')
- # Arithmetic expansion
- scan_wordexp_arith
- ;;
- *)
- # Command substitution
- if ! res="$(run_sublexer "sub${fname}" \
- ${lineno} "${start}" \
- "${c}")"; then
- exit 1
- fi
- ln_off=${res%%${RS}*}
- res="${res#*${RS}}"
- c="${res%%${RS}*}"
- res="${res#*${RS}}"
- toks="${res%%${RS}*}"
- lineno=${ln_off}
- wordexp="\$(${SOH}C${STX}${toks}"
- wordexp="${wordexp}${ETX})"
- # ")" is recognized in run_sublexer().
- ;;
- esac
- ;;
- [@*#?$!A-Za-z0-9_-])
- if ! res="$(scan_param)"; then
- exit 1
- fi
- ln_off=${res%%${RS}*}
- res="${res#*${RS}}"
- c="${res%%${RS}*}"
- res="${res#*${RS}}"
- param="${res%%${RS}*}"
- lineno=$((${lineno} + ${ln_off}))
- wordexp="\$${param}"
- ;;
- esac
-
- printf "%d${RS}%c${RS}%s" ${ln_off} "${c}" "${wordexp}"
- return 0
-}
-
-scan_wordexp_param_brace()
-{
- local mod=
- local res=
- local param=
- local word=
-
- mod=true
-
- lgetc
- case "${c}" in
- '#')
- lgetc
- case "${c}" in
- [@*#?$!A-Za-z0-9_-])
- # String length expansion
- if ! res="$(scan_param)"; then
- exit 1
- fi
- ln_off=${res%%${RS}*}
- res="${res#*${RS}}"
- c="${res%%${RS}*}"
- res="${res#*${RS}}"
- param="${res%%${RS}*}"
- lineno=$((${lineno} + ${ln_off}))
- # Disable modifications.
- mod=false
- ;;
- *)
- # Special parameter "#"
- param='#'
- ;;
- esac
- ;;
- *)
- if ! res="$(scan_param)"; then
- exit 1
- fi
- ln_off=${res%%${RS}*}
- res="${res#*${RS}}"
- c="${res%%${RS}*}"
- res="${res#*${RS}}"
- param="${res%%${RS}*}"
- lineno=$((${lineno} + ${ln_off}))
- ;;
- esac
- wordexp="\${${param}"
-
- # If modifications are allowed
- if ${mod}; then
- # Check for modifications.
- mod=false
- case "${c}" in
- ':')
- mod=true
- wordexp="${wordexp}${c}"
- lgetc
- case "${c}" in '-'|'='|'?'|'+')
- wordexp="${wordexp}${c}"
- lgetc
- ;;
- esac
- ;;
- '-'|'='|'?'|'+')
- mod=true
- wordexp="${wordexp}${c}"
- lgetc
- ;;
- '%')
- mod=true
- wordexp="${wordexp}${c}"
- lgetc
- case "${c}" in '%')
- wordexp="${wordexp}${c}"
- lgetc
- ;;
- esac
- ;;
- '#')
- mod=true
- wordexp="${wordexp}${c}"
- lgetc
- case "${c}" in '#')
- wordexp="${wordexp}${c}"
- lgetc
- ;;
- esac
- ;;
- esac
- fi
-
- # If a modification was found
- if ${mod}; then
- # Get word.
- if ! res="$(scan_word true)"; then
- exit 1
- fi
- ln_off=${res%%${RS}*}
- res="${res#*${RS}}"
- c="${res%%${RS}*}"
- res="${res#*${RS}}"
- word="${res%%${RS}*}"
- # We must advance lineno because scan_word() was run in a
- # subshell.
- lineno=$((${lineno} + ${ln_off}))
- wordexp="${wordexp}${word}"
- dbg "param mod word: '$word'"
- fi
-
- # Check for right brace.
- case "${c}" in
- '}')
- wordexp="${wordexp}${c}"
- lgetc
- ;;
- *)
- synerr 'Missing "}"'
- ;;
- esac
-
- return 0
-}
-
-scan_param()
-{
- local param=
-
- param=''
- case "${c}" in
- [@*#?$!0-])
- # Special parameter
- param="${c}"
- lgetc
- ;;
- [1-9])
- # Positional parameter
- param="${param}${c}"
- lgetc
- while :; do
- case "${c}" in [!0-9])
- break
- ;;
- esac
- param="${param}${c}"
- lgetc
- done
- ;;
- [A-Za-z_])
- # Parameter name
- param="${param}${c}"
- lgetc
- while :; do
- case "${c}" in [!A-Za-z0-9_])
- break
- ;;
- esac
- param="${param}${c}"
- lgetc
- done
- ;;
- *)
- synerr 'Bad parameter name'
- ;;
- esac
-
- printf "%d${RS}%c${RS}%s" 0 "${c}" "${param}"
- return 0
-}
-
-scan_wordexp_arith()
-{
- local arith=
- local paren_lvl=
- local res=
- local sub_wordexp=
-
- arith=''
- paren_lvl=0
- while :; do
- lgetc
- case "${c}" in
- '')
- synerr 'end of file unexpected (%s)' \
- 'expecting "))"'
- ;;
- '(')
- arith="${arith}${c}"
- paren_lvl=$((${paren_lvl} + 1))
- ;;
- ')')
- if [ ${paren_lvl} -eq 0 ]; then
- lgetc
- case "${c}" in ')')
- wordexp="\$((${arith}))"
- lgetc
- return 0
- ;;
- esac
- synerr 'Arithmetic expansion: ")" %s' \
- 'unexpected'
- fi
- arith="${arith}${c}"
- paren_lvl=$((${paren_lvl} - 1))
- ;;
- '$')
- lgetc
- if ! res=$(scan_wordexp); then
- exit 1
- fi
- ln_off=${res%%${RS}*}
- res="${res#*${RS}}"
- c="${res%%${RS}*}"
- res="${res#*${RS}}"
- sub_wordexp="${res%%${RS}*}"
- # We must advance lineno because scan_wordexp()
- # was run in a subshell.
- lineno=$((${lineno} + ${ln_off}))
- arith="${arith}${sub_wordexp}"
- ;;
- *)
- arith="${arith}${c}"
- ;;
- esac
- done
-}
-
-#
-# Interface
-#
-
-# Check the current token. If it matches, add it to the syntax array.
-accept()
-{
- local t="${1}"
- local rw=
-
- dbg "looking for $t, current tok ${tok%%${US}*}"
- case "${t}" in
- T_IF|T_THEN|T_ELSE|T_ELIF|T_FI|\
- T_DO|T_DONE|T_CASE|T_ESAC|T_WHILE|T_UNTIL|\
- T_FOR|T_LBRACE|T_RBRACE|T_BANG|T_IN)
- dbg "looking for reserved word $t, have '$tok'"
- if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
- # Reserved words are recognized as literal
- # T_WORDs.
- if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
- return 1
- fi
- # T_WORD data unit must match reserved word
- # exactly.
- if ! [ "x${tok#T_WORD${US}}" = \
- "x$(toktext "${t}")" ]; then
- return 1
- fi
- # If the token matches the reserved word,
- # replace it with the reserved word token.
- tok="${t}"
- fi
- ;;
- T_NAME)
- # Names are recognized as literal T_WORDs.
- if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
- return 1
- fi
- # Validate name.
- case "${tok%%${US}*}" in
- [A-Za-z_][0-9A-Za-z_]*)
- ;;
- *)
- return 1
- ;;
- esac
- tok="T_NAME${US}${tok#T_WORD${US}}"
- ;;
- T_FNAME)
- # Function names are recognized as literal T_WORDs.
- if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
- return 1
- fi
- # Validate name.
- case "${tok%%${US}*}" in
- [A-Za-z_][0-9A-Za-z_]*)
- ;;
- *)
- return 1
- ;;
- esac
- # Verify that the function name doesn't match any
- # reserved words.
- for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \
- T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \
- T_LBRACE T_RBRACE T_BANG T_IN; do
- if [ "x${tok#T_WORD${US}}" = \
- "x$(toktext "${rw}")" ]; then
- tok="${rw}"
- return 1
- fi
- done
- tok="T_FNAME${US}${tok#T_WORD${US}}"
- ;;
- T_CMDNAME)
- # The first word of a simple command is to be checked
- # for reserved words.
- if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
- return 1
- fi
- # Verify that the word doesn't match any reserved words.
- for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \
- T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \
- T_LBRACE T_RBRACE T_BANG T_IN; do
- if [ "x${tok#T_WORD${US}}" = \
- "x$(toktext "${rw}")" ]; then
- tok="${rw}"
- return 1
- fi
- done
- ;;
- *)
- if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
- return 1
- fi
- ;;
- esac
-
- dbg "accept $t"
- tokens="${tokens}${tok}${RS}"
- next
- return 0
-}
-
-expect()
-{
- local t="${1}"
-
- if accept "${t}"; then
- return 0
- else
- synexp "${t}"
- fi
-}
-
-# Called by the lexer, not the parser
-run_sublexer()
-{
- local fn="${1}"
- local ln="${2}"
- local st="${3}"
- local ch="${4}"
- shift 4
-
- # Initialize global variables.
- fname="${fn}"
- lineno=${ln}
- start="${st}"
- here_queue=''
- here_awaiting_end=false
- here_awaiting_word=false
- tokens=''
-
- c="${ch}"
- next
-
- #dbg=true
- # If this returns (does not exit), there are no errors.
- ${start}
- case "${tok%${US}*}" in
- T_RPAREN)
- ;;
- *)
- synerr 'Missing ")"'
- ;;
- esac
-
- printf "%d${RS}%c${RS}%s" ${lineno} "${c}" "${tokens}"
- return 0
-}
-
-run_lexer()
-{
- local fn="${1}"
- local st="${2}"
- shift 2
-
- # Initialize global variables.
- fname="${fn}"
- lineno=1
- start="${st}"
- here_queue=''
- here_awaiting_end=false
- here_awaiting_word=false
- tokens=''
-
- # Read the first character and recognize the first token.
- lgetc
- next
-
- # If this returns (does not exit), there are no errors.
- ${start}
- if ! accept T_EOF; then
- synexp ''
- fi
-
- # Return the tokens.
- printf '%s' "${tokens}"
-
- return 0
-}
diff --git a/parsing/parse.sh b/parsing/parse.sh
deleted file mode 100644
index 199005a..0000000
--- a/parsing/parse.sh
+++ /dev/null
@@ -1,676 +0,0 @@
-SOH="$(printf '\001.')"; SOH="${SOH%.}"
-STX="$(printf '\002.')"; STX="${STX%.}"
-ETX="$(printf '\003.')"; ETX="${ETX%.}"
- HT="$(printf '\t.')"; HT="${HT%.}"
- LF="$(printf '\n.')"; LF="${LF%.}"
- RS="$(printf '\036.')"; RS="${RS%.}"
- US="$(printf '\037.')"; US="${US%.}"
-
-dbg=true
-dbg=false
-
-dbg()
-{
- if ${dbg}; then
- printf 'DEBUG: %s\n' "${@}" >&2
- fi
-}
-
-ptrace=false
-
-ptrace_begn()
-{
- local fn="${1}"
- shift 1
-
- if ${ptrace}; then
- printf 'TRACE: BEGN %s()\n' "${fn}" >&2
- fi
-}
-
-ptrace_pass()
-{
- local fn="${1}"
- shift 1
-
- if ${ptrace}; then
- printf 'TRACE: PASS %s()\n' "${fn}" >&2
- fi
-}
-
-ptrace_fail()
-{
- local fn="${1}"
- shift 1
-
- if ${ptrace}; then
- printf 'TRACE: FAIL %s()\n' "${fn}" >&2
- fi
-}
-
-. ./tokens.sh
-. ./lexer.sh
-. ./codegen.sh
-
-complete_command()
-{
- if list; then
- separator
- return 0
- fi
- # Unexpected EOF
- synexp ''
-}
-
-list()
-{
- ptrace_begn list
- if and_or; then
- while separator && and_or; do
- :
- done
- ptrace_pass list
- return 0
- fi
- ptrace_fail list
- return 1
-}
-
-and_or()
-{
- ptrace_begn and_or
- if pipeline; then
- while accept T_AND_IF || accept T_OR_IF; do
- if ! linebreak || ! pipeline; then
- ptrace_fail and_or
- return 1
- fi
- done
- ptrace_pass and_or
- return 0
- fi
- ptrace_fail and_or
- return 1
-}
-
-pipeline()
-{
- ptrace_begn pipeline
- accept T_BANG
- if pipe_sequence; then
- ptrace_pass pipeline
- return 0
- fi
- ptrace_fail pipeline
- return 1
-}
-
-pipe_sequence()
-{
- ptrace_begn pipe_sequence
- if command; then
- while accept T_PIPE; do
- if ! linebreak || ! command; then
- ptrace_fail pipe_sequence
- return 1
- fi
- done
- ptrace_pass pipe_sequence
- return 0
- fi
- ptrace_fail pipe_sequence
- return 1
-}
-
-command()
-{
- ptrace_begn command
- if simple_command; then
- ptrace_pass command
- return 0
- elif compound_command; then
- redirect_list
- ptrace_pass command
- return 0
- fi
- ptrace_fail command
- return 1
-}
-
-compound_command()
-{
- ptrace_begn compound_command
- if brace_group; then
- ptrace_pass compound_command
- return 0
- elif subshell; then
- ptrace_pass compound_command
- return 0
- elif for_clause; then
- ptrace_pass compound_command
- return 0
- elif case_clause; then
- ptrace_pass compound_command
- return 0
- elif if_clause; then
- ptrace_pass compound_command
- return 0
- elif while_clause; then
- ptrace_pass compound_command
- return 0
- elif until_clause; then
- ptrace_pass compound_command
- return 0
- fi
- ptrace_fail compound_command
- return 1
-}
-
-subshell()
-{
- ptrace_begn subshell
- if accept T_LPAREN && compound_list && expect T_RPAREN; then
- ptrace_pass subshell
- return 0
- fi
- ptrace_fail subshell
- return 1
-}
-
-compound_list()
-{
- ptrace_begn compound_list
- newline_list
- if term; then
- separator
- ptrace_pass compound_list
- return 0
- fi
- ptrace_fail compound_list
- return 1
-}
-
-term()
-{
- ptrace_begn term
- if and_or; then
- while separator; do
- and_or
- done
- ptrace_pass term
- return 0
- fi
- ptrace_fail term
- return 1
-}
-
-for_clause()
-{
- ptrace_begn for_clause
- if accept T_FOR; then
- if expect T_NAME && linebreak; then
- if accept T_IN; then
- wordlist
- if ! sequential_sep; then
- ptrace_fail for_clause
- return 1
- fi
- fi
- if do_group; then
- ptrace_pass for_clause
- return 0
- fi
- fi
- fi
- ptrace_fail for_clause
- return 1
-}
-
-wordlist()
-{
- ptrace_begn wordlist
- if accept T_WORD; then
- while accept T_WORD; do :; done
- ptrace_pass wordlist
- return 0
- fi
- ptrace_fail wordlist
- return 1
-}
-
-case_clause()
-{
- if accept T_CASE; then
- if expect T_WORD && linebreak && expect T_IN && linebreak; then
- case_list || case_list_ns
- expect T_ESAC
- return 0
- fi
- fi
- return 1
-}
-
-case_list_ns()
-{
- if case_list && case_item_ns; then
- return 0
- elif case_item_ns; then
- return 0
- fi
- return 1
-}
-
-case_list()
-{
- if case_item; then
- while case_item; do
- :
- done
- return 0
- fi
- return 1
-}
-
-case_item_ns()
-{
- accept T_LPAREN
- if pattern && expect RPAREN; then
- compound_list
- if linebreak; then
- return 0
- fi
- fi
- return 1
-}
-
-case_item()
-{
- accept T_LPAREN
- if pattern && expect T_RPAREN; then
- if compound_list || linebreak; then
- if expect T_DSEMI && linebreak; then
- return 0
- fi
- fi
- fi
- return 1
-}
-
-pattern()
-{
- if accept T_CMDNAME; then
- while accept T_PIPE; do
- expect T_WORD
- done
- return 0
- fi
- return 1
-}
-
-if_clause()
-{
- if accept T_IF; then
- if compound_list && expect T_THEN && compound_list; then
- else_part
- expect T_FI
- return 0
- fi
- fi
- return 1
-}
-
-else_part()
-{
- while accept T_ELIF; do
- if compound_list && expect T_THEN && compound_list; then
- continue
- fi
- return 1
- done
- if accept T_ELSE; then
- if compound_list; then
- return 0
- fi
- fi
- return 1
-}
-
-while_clause()
-{
- if accept T_WHILE; then
- if compound_list && do_group; then
- return 0
- fi
- fi
- return 1
-}
-
-until_clause()
-{
- if accept T_UNTIL; then
- if compound_list && do_group; then
- return 0
- fi
- fi
- return 1
-}
-
-function_body()
-{
- ptrace_begn function_body
- if compound_command; then
- redirect_list
- ptrace_pass function_body
- return 0
- fi
- ptrace_fail function_body
- return 1
-}
-
-brace_group()
-{
- ptrace_begn brace_group
- if accept T_LBRACE && compound_list && expect T_RBRACE; then
- ptrace_pass brace_group
- return 0
- fi
- ptrace_fail brace_group
- return 1
-}
-
-do_group()
-{
- ptrace_begn do_group
- if accept T_DO && compound_list && expect T_DONE; then
- ptrace_pass do_group
- return 0
- fi
- ptrace_fail do_group
- return 1
-}
-
-simple_command()
-{
- ptrace_begn simple_command
- if cmd_prefix; then
- if cmd_word; then
- cmd_suffix
- fi
- ptrace_pass simple_command
- return 0
- elif accept T_FNAME; then
- if accept T_LPAREN; then
- expect T_RPAREN
- if linebreak && function_body; then
- ptrace_pass simple_command
- return 0
- fi
- else
- cmd_suffix
- ptrace_pass simple_command
- return 0
- fi
- elif cmd_name; then
- cmd_suffix
- ptrace_pass simple_command
- return 0
- fi
- ptrace_fail simple_command
- return 1
-}
-
-cmd_name()
-{
- ptrace_begn cmd_name
- # TODO: Assignment
- if accept T_CMDNAME; then
- ptrace_pass cmd_name
- return 0
- fi
- ptrace_fail cmd_name
- return 1
-}
-
-cmd_word()
-{
- ptrace_begn cmd_word
- # TODO: Assignment
- if accept T_WORD; then
- ptrace_pass cmd_word
- return 0
- fi
- ptrace_fail cmd_word
- return 1
-}
-
-cmd_prefix()
-{
- ptrace_begn cmd_prefix
- if io_redirect || accept T_ASSIGNMENT_WORD; then
- while io_redirect || accept T_ASSIGNMENT_WORD; do
- :
- done
- ptrace_pass cmd_prefix
- return 0
- fi
- ptrace_fail cmd_prefix
- return 1
-}
-
-cmd_suffix()
-{
- ptrace_begn cmd_suffix
- if io_redirect || accept T_WORD; then
- while io_redirect || accept T_WORD; do
- :
- done
- ptrace_pass cmd_suffix
- return 0
- fi
- ptrace_fail cmd_suffix
- return 1
-}
-
-redirect_list()
-{
- ptrace_begn redirect_list
- if io_redirect; then
- while io_redirect; do
- :
- done
- ptrace_pass redirect_list
- return 0
- fi
- ptrace_fail redirect_list
- return 1
-}
-
-io_redirect()
-{
- ptrace_begn io_redirect
- if io_file || io_here; then
- ptrace_pass io_redirect
- return 0
- fi
- ptrace_fail io_redirect
- return 1
-}
-
-io_file()
-{
- if accept T_LESS || accept T_LESSAND || accept T_GREAT || \
- accept T_GREATAND || accept T_DGREAT || \
- accept T_LESSGREAT || accept T_CLOBBER; then
- if filename; then
- return 0
- fi
- fi
- return 1
-}
-
-filename()
-{
- if accept T_WORD; then
- return 0
- fi
- return 1
-}
-
-io_here()
-{
- if accept T_DLESS || accept T_DLESSDASH; then
- if here_end; then
- return 0
- fi
- fi
- return 1
-}
-
-here_end()
-{
- if accept T_WORD; then
- return 0
- fi
- return 1
-}
-
-newline_list()
-{
- if accept T_NEWLINE; then
- while accept T_NEWLINE; do
- :
- done
- return 0
- fi
- return 1
-}
-
-linebreak()
-{
- newline_list
- return 0
-}
-
-separator_op()
-{
- if accept T_AND || accept T_SEMI; then
- return 0
- fi
- return 1
-}
-
-separator()
-{
- if separator_op && linebreak; then
- return 0
- elif newline_list; then
- return 0
- fi
- return 1
-}
-
-sequential_sep()
-{
- ptrace_begn sequential_sep
- if accept T_SEMI; then
- if linebreak; then
- ptrace_pass sequential_sep
- return 0
- fi
- elif newline_list; then
- ptrace_pass sequential_sep
- return 0
- fi
- ptrace_fail sequential_sep
- return 1
-}
-
-parse()
-{
- local fn="${1}"
- shift 1
-
- if run_lexer "${fn}" complete_command; then
- return 0
- fi
- return 1
-}
-
-try()
-{
- local tokens=
- local t=
-
- printf 'Trying script:\n'
- printf '\t%s\n' "${@}"
- if tokens="$(printf '%s\n' "${@}" | parse -)"; then
- printf 'Tokens: %s\n' "${tokens}" | sed "
- s/${SOH}/<SOH>/g; s/${STX}/<STX>/g; s/${ETX}/<ETX>/g;
- s/${RS}/<RS>/g; s/${US}/<US>/g;
- "
- IFS="${RS}"
- for t in ${tokens}; do
- printf 'Token: %s\n' "$(tokname "${t}")"
- case "${t%${US}*}" in T_NAME|T_FNAME|T_CMDNAME|T_WORD)
- printf ' "%s"\n' "${t#*${US}}"
- ;;
- esac
- done
- printf 'Generated code:\n'
- IFS="${LF}"
- printf '\t%s\n' $(codegen "${tokens}")
- unset IFS
- else
- printf 'FAIL\n'
- fi
- printf '\n\n'
-}
-
-#try '"foo bar" && $baz || qux' '${quux%uux quuux'
-#try '"foo bar" && $baz || qux' '${quux%uux } quuux'
-#try 'foo ${bar}'
-#try 'foo ${#bar}'
-#try 'foo ${bar#baz}'
-#try 'foo ${#bar#}'
-#try 'foo ${^}'
-#try 'foo `bar`'
-#try 'foo &&'
-#try '{ foo; }'
-#try '( foo )'
-#try 'for i in 1 2 3; do stuff; done'
-#try 'if foo; then bar; fi'
-#try 'if foo; then bar; elif baz; then qux; else quux; fi'
-#try 'if ; then ; fi'
-#try 'while foo; do bar; done'
-#try 'while ; do ; done'
-#try 'foo(){ bar; }'
-#try 'case foo in bar) baz;; (qux) quux;; quux);; esac'
-#try 'foo bar ( baz )'
-#try 'foo $(bar)'
-#try 'foo $(bar); baz'
-#try 'foo $(bar)' 'baz'
-#try 'foo $(bar) baz'
-#try 'foo$(bar$(baz))qux'
-#try 'foo $((1 + 1))'
-#try '$((1 + 1))'
-#try '$((1 + (1 + 1)))'
-#try '$((1 + $(foo) + 1))'
-#try '$((1'
-#try 'foo <<EOF' 'bar' 'EOF'
-#try 'foo <<-EOF' "${HT}bar" "${HT}EOF"
-#try 'foo <<EOF' '$(bar)' 'EOF'
-#try 'foo <<E"O"F' '$(bar)' 'EOF'
-#try 'foo <<"EOF"' '$(bar)' 'EOF'
-#try 'foo <<E\OF' '$(bar)' 'EOF'
-#try 'foo <<\EOF' '$(bar)' 'EOF'
-#try 'foo <<EOF1; bar <<EOF2' 'baz' 'EOF1' 'qux' 'EOF2'
-#try '\foo'
-#try '"foo bar" baz'
-#try '"foo'
-#try 'foo\" bar'
-#try 'foo\'
-#try "foo'"
-#try 'foo\' 'bar'
-#try 'v=foo'
-try 'if &&'
-try 'if true; do'
diff --git a/parsing/tokens.sh b/parsing/tokens.sh
deleted file mode 100644
index 68db85d..0000000
--- a/parsing/tokens.sh
+++ /dev/null
@@ -1,119 +0,0 @@
-tokname()
-{
- local t="${1}"
- shift 1
- local n=
-
- case "${t%${US}*}" in
- # Operators
- T_EOF) n='end of file';;
- T_NEWLINE) n='newline';;
- T_AND) n='"&"';;
- T_SEMI) n='";"';;
- T_AND_IF) n='"&&"';;
- T_OR_IF) n='"||"';;
- T_DSEMI) n='";;"';;
- T_LESS) n='"<"';;
- T_GREAT) n='">"';;
- T_DLESS) n='"<<"';;
- T_DGREAT) n='">>"';;
- T_LESS) n='"<"';;
- T_LESSAND) n='"<&"';;
- T_GREAT) n='">"';;
- T_GREATAND) n='">&"';;
- T_LESSGREAT) n='"<>"';;
- T_DLESSDASH) n='"<<-"';;
- T_CLOBBER) n='">|"';;
- T_PIPE) n='"|"';;
- T_LPAREN) n='"("';;
- T_RPAREN) n='")"';;
- # Reserved words
- T_IF) n='"if"';;
- T_THEN) n='"then"';;
- T_ELSE) n='"else"';;
- T_ELIF) n='"elif"';;
- T_FI) n='"fi"';;
- T_DO) n='"do"';;
- T_DONE) n='"done"';;
- T_CASE) n='"case"';;
- T_ESAC) n='"esac"';;
- T_WHILE) n='"while"';;
- T_UNTIL) n='"until"';;
- T_FOR) n='"for"';;
- T_LBRACE) n='"{"';;
- T_RBRACE) n='"}"';;
- T_BANG) n='"!"';;
- T_IN) n='"in"';;
- # Special symbols
- T_NAME) n='parameter name';;
- T_FNAME) n='function name';;
- T_CMDNAME) n='command name';;
- T_IO_NUMBER) n='I/O number';;
- T_WORD) n='word';;
- T_ASSIGNMENT_WORD) n='assignment word';;
- # Unknown
- *) n='unknown token';;
- esac
-
- printf '%s' "${n}"
-}
-
-toktext()
-{
- local t="${1}"
- shift 1
- local n=
-
- case "${t%${US}*}" in
- # Operators
- T_EOF) n='';;
- T_NEWLINE) n="${LF}";;
- T_AND) n='&';;
- T_SEMI) n=';';;
- T_AND_IF) n='&&';;
- T_OR_IF) n='||';;
- T_DSEMI) n=';;';;
- T_LESS) n='<';;
- T_GREAT) n='>';;
- T_DLESS) n='<<';;
- T_DGREAT) n='>>';;
- T_LESS) n='<';;
- T_LESSAND) n='<&';;
- T_GREAT) n='>';;
- T_GREATAND) n='>&';;
- T_LESSGREAT) n='<>';;
- T_DLESSDASH) n='<<-';;
- T_CLOBBER) n='>|';;
- T_PIPE) n='|';;
- T_LPAREN) n='(';;
- T_RPAREN) n=')';;
- # Reserved words
- T_IF) n='if';;
- T_THEN) n='then';;
- T_ELSE) n='else';;
- T_ELIF) n='elif';;
- T_FI) n='fi';;
- T_DO) n='do';;
- T_DONE) n='done';;
- T_CASE) n='case';;
- T_ESAC) n='esac';;
- T_WHILE) n='while';;
- T_UNTIL) n='until';;
- T_FOR) n='for';;
- T_LBRACE) n='{';;
- T_RBRACE) n='}';;
- T_BANG) n='!';;
- T_IN) n='in';;
- # Special symbols
- T_NAME) n="${t#*${US}}";;
- T_FNAME) n="${t#*${US}}";;
- T_CMDNAME) n="${t#*${US}}";;
- T_IO_NUMBER) n="${t#*${US}}";;
- T_WORD) n="${t#*${US}}";;
- T_ASSIGNMENT_WORD) n="${t#*${US}}";;
- # Unknown
- *) n='';;
- esac
-
- printf '%s' "${n}"
-}