diff options
Diffstat (limited to 'parsing')
-rw-r--r-- | parsing/codegen.sh | 85 | ||||
-rw-r--r-- | parsing/lexer.sh | 958 | ||||
-rw-r--r-- | parsing/parse.sh | 676 | ||||
-rw-r--r-- | parsing/tokens.sh | 119 |
4 files changed, 0 insertions, 1838 deletions
diff --git a/parsing/codegen.sh b/parsing/codegen.sh deleted file mode 100644 index 8d9d2fd..0000000 --- a/parsing/codegen.sh +++ /dev/null @@ -1,85 +0,0 @@ -sc= - -sgetc() -{ - sc="$(dd bs=1 count=1 2>/dev/null; printf '.')" - sc="${sc%.}" -} - -codegen_sub() -{ - local array="${1}" - shift 1 - - IFS="${RS}" - for t in ${array}; do - toktext "${t}" - case "${t%${US}*}" in - T_NEWLINE) - ;; - *) - printf ' ' - ;; - esac - done - unset IFS -} - -# The token stack is encoded in a string in the following grammar: -# Terminal symbols: -# TOKEN -# Production rules: -# stack = tokens [ '<SOH>' type '<STX>' stack '<ETX>' [ tokens ] ] ; -# tokens = TOKEN { '<RS>' TOKEN } ; -# type = 'C' ; -# We need to recurse through this stack to get to all the tokens. -# Each element in the stack (an array of tokens) gets run through the codegen to -# become text that is inserted into the array below. -parse_stack() -{ - local array= - - array='' - while :; do - sgetc - case "${sc}" in - '') - # EOF - break - ;; - "${SOH}") - # New stack element - sgetc - case "${sc}" in - 'C') - # Command substitution - sgetc # STX - array="${array}$(parse_stack)." - array="${array%.}" - ;; - esac - ;; - "${ETX}") - # End of stack element - break - ;; - *) - # Token character - array="${array}${sc}" - ;; - esac - done - codegen_sub "${array}" -} - -codegen() -{ - local toks="${1}" - shift 1 - - if printf '%s' "${toks}" | parse_stack; then - return 0 - else - return 1 - fi -} diff --git a/parsing/lexer.sh b/parsing/lexer.sh deleted file mode 100644 index 886e7f8..0000000 --- a/parsing/lexer.sh +++ /dev/null @@ -1,958 +0,0 @@ -fname= -lineno= -ln_off= -start= -c= -wordexp= -here_queue= -here_awaiting_end= -here_awaiting_word= -tok= -tokens= - -# -# Error handling (used by scanning and interface functions) -# - -error() -{ - local fmt="${1}" - shift 1 - - case "${fname}" in - '-') - printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2 - ;; - *) - printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2 - ;; - esac - - # The parser and lexer run in a subshell, so this just returns up to the - # caller like an exception. - exit 1 -} - -synexp() -{ - local t="${1}" - shift 1 - - if [ "x${t}" = 'x' ]; then - synerr '%s unexpected' "$(tokname "${tok}")" - else - synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \ - "$(tokname "${t}")" - fi -} - -synerr() -{ - local fmt="${1}" - shift 1 - - error "Syntax error: ${fmt}" "${@}" -} - -# -# Input reading -# - -lgetc() -{ - c="$(dd bs=1 count=1 2>/dev/null; printf '.')" - c="${c%.}" -} - -# -# Token recognition -# - -next() -{ - if ${here_awaiting_word}; then - next_here - return - fi - while :; do - dbg "parsing char '$c' at lineno $lineno" - case "${c}" in - '') - lgetc - tok=T_EOF - return - ;; - "${LF}") - if ${here_awaiting_end}; then - synexp '' - else - case "${here_queue}" in *"${RS}"*) - here_awaiting_end=false - here_awaiting_word=true - ;; - esac - fi - lgetc - lineno=$((${lineno} + 1)) - tok=T_NEWLINE - return - ;; - ' '|"${HT}") - lgetc - continue - ;; - \\) - lgetc - case "${c}" in "${LF}") - lineno=$((${lineno} + 1)) - lgetc - continue - ;; - esac - next_word \\ - return - ;; - '#') - lgetc - while :; do - case "${c}" in "${LF}"|'') - break - ;; - esac - lgetc - done - continue - ;; - '&') - lgetc - case "${c}" in '&') - lgetc - tok=T_AND_IF - return - ;; - esac - tok=T_AND - return - ;; - '|') - lgetc - case "${c}" in '|') - lgetc - tok=T_OR_IF - return - ;; - esac - tok=T_PIPE - return - ;; - ';') - lgetc - case "${c}" in ';') - lgetc - tok=T_DSEMI - return - ;; - esac - dbg T_SEMI - tok=T_SEMI - return - ;; - '(') - lgetc - tok=T_LPAREN - return - ;; - ')') - lgetc - tok=T_RPAREN - return - ;; - '<'|'>') - next_io - return - ;; - *) - next_word '' - return - ;; - esac - lgetc - done -} - -next_here() -{ - local here= - local here_strip_tabs= - local here_end= - local here_escaped= - local line= - local word= - local res= - local wordexp= - - # Dequeue the here-document. - here="${here_queue%%${RS}*}" - here_strip_tabs="${here%%${US}*}" - here_end="${here%${US}*}" - here_end="$(printf '%s' "${here_end#*${US}}" | \ - sed 's/\\//g; s/"//g; s/'\''//g;')" # Stupid Vim: ')" - here_escaped="${here##*${US}}" - here_queue="${here_queue#*${RS}}" - here_awaiting_word=false - - line='' - word='' - while :; do - case "${c}" in - '') - # Bash throws a warning when EOF occurs in a - # here document. mksh throws an error. dash, - # BusyBox ash, ksh93, and zsh accept EOF as a - # delimiter. We aim for the lowest common - # denominator, so throw an error like mksh does. - synerr 'Here-document "%s" unclosed' \ - "${here_end}" - ;; - "${LF}") - word="${word}${line}" - case "${line}" in "${here_end}") - tok="T_WORD${US}${word}" - return - ;; - esac - word="${word}${c}" - line='' - ;; - "${HT}") - if ${here_strip_tabs}; then - case "${line}" in - '') - ;; - *) - line="${line}${c}" - ;; - esac - else - line="${line}${c}" - fi - ;; - '$') - if ! ${here_escaped}; then - lgetc - if ! res="$(scan_wordexp)"; then - exit 1 - fi - ln_off=${res%%${RS}*} - res="${res#*${RS}}" - c="${res%%${RS}*}" - res="${res#*${RS}}" - wordexp="${res%%${RS}*}" - lineno=$((${lineno} + ${ln_off})) - line="${line}${wordexp}" - continue - else - line="${line}${c}" - fi - ;; - *) - line="${line}${c}" - ;; - esac - lgetc - done -} - -next_io() -{ - case "${c}" in - '<') - lgetc - case "${c}" in - '<') - lgetc - case "${c}" in '-') - lgetc - tok=T_DLESSDASH - here_queue="${here_queue}true" - here_awaiting_end=true - here_awaiting_word=false - break - ;; - esac - tok=T_DLESS - here_queue="${here_queue}false" - here_awaiting_end=true - here_awaiting_word=false - break - ;; - '&') - lgetc - tok=T_LESSAND - break - ;; - '>') - lgetc - tok=T_LESSGREAT - break - ;; - esac - tok=T_LESS - break - ;; - '>') - lgetc - case "${c}" in - '>') - lgetc - tok=T_DGREAT - break - ;; - '&') - lgetc - tok=T_GREATAND - break - ;; - '|') - lgetc - tok=T_CLOBBER - break - ;; - esac - tok=T_GREAT - break - ;; - esac -} - -next_word() -{ - local prev_c="${1}" - shift 1 - local res= - local word= - - if ! res="$(scan_word false)"; then - exit 1 - fi - ln_off=${res%%${RS}*} - res="${res#*${RS}}" - c="${res%%${RS}*}" - res="${res#*${RS}}" - word="${prev_c}${res%%${RS}*}" - - # We must advance lineno because scan_word() was run in a subshell. - lineno=$((${lineno} + ${ln_off})) - tok="T_WORD${US}${word}" - - if ${here_awaiting_end}; then - here_queue="${here_queue}${US}${word}" - case "${word}" in - *\\*|*'"'*|*"'"*) - here_queue="${here_queue}${US}true" - ;; - *) - here_queue="${here_queue}${US}false" - ;; - esac - here_queue="${here_queue}${RS}" - here_awaiting_end=false - fi -} - -# -# Token scanning -# - -scan_word() -{ - local in_param="${1}" - local res= - local word= - local quoted= - local lines= - local wordexp= - - word='' - quoted=false - lines=0 - while :; do - dbg "parsing word char '$c' at lineno $lineno" - case "${c}" in - '') - break - ;; - "${LF}") - if ! ${in_param} && ! ${quoted}; then - break - fi - lineno=$((${lineno} + 1)) - lines=$((${lines} + 1)) - word="${word}${c}" - ;; - ' '|"${HT}"|'&'|'|'|';'|'('|')'|'<'|'>') - if ! ${in_param} && ! ${quoted}; then - break - fi - word="${word}${c}" - ;; - '$') - case "${here_queue}" in *"${RS}"*) - if ${here_awaiting_end}; then - synerr '%s %s %s %s' \ - 'Word expansions' \ - 'not supported in' \ - 'here-document' \ - 'delimiters' - fi - esac - lgetc - if ! res=$(scan_wordexp); then - exit 1 - fi - ln_off=${res%%${RS}*} - res="${res#*${RS}}" - c="${res%%${RS}*}" - res="${res#*${RS}}" - wordexp="${res%%${RS}*}" - # We must advance lineno because scan_wordexp() - # was run in a subshell. - lineno=$((${lineno} + ${ln_off})) - word="${word}${wordexp}" - # scan_wordexp() leaves behind an unused - # character, so we should skip the lgetc() call - # below. - continue - ;; - '`') - synerr 'Backquoted (old-style) %s' \ - 'command substitution not supported' - break - ;; - \\) - word="${word}${c}" - lgetc - case "${c}" in '') - # Bash, ksh93, mksh, and zsh ignore a - # backslash at the end of a file, but - # dash and BusyBox ash include it in the - # word. To help with script - # portability, we'll throw an error - # (which is a reasonable thing to do - # anyway). - synerr 'Unexpected end of file %s' \ - 'after "\"' - ;; - esac - word="${word}${c}" - ;; - \') - word="${word}${c}" - while :; do - lgetc - word="${word}${c}" - case "${c}" in - '') - synerr '%s %s' \ - 'Unterminated' \ - 'quoted string' - ;; - \') - break - ;; - esac - done - ;; - '"') - word="${word}${c}" - if ${quoted}; then - quoted=false - else - quoted=true - fi - ;; - '}') - if ${in_param} && ! ${quoted}; then - break - fi - word="${word}${c}" - ;; - *) - word="${word}${c}" - ;; - esac - lgetc - done - - if ${quoted}; then - synerr 'Unterminated quoted string' - fi - - printf "%d${RS}%c${RS}%s" ${lines} "${c}" "${word}" -} - -scan_wordexp() -{ - local res= - local toks= - local param= - - wordexp='' - ln_off=0 - case "${c}" in - '{') - # Parameter expansion brace - scan_wordexp_param_brace - ;; - '(') - # Arithmetic expansion or command substitution - lgetc - case "${c}" in - '(') - # Arithmetic expansion - scan_wordexp_arith - ;; - *) - # Command substitution - if ! res="$(run_sublexer "sub${fname}" \ - ${lineno} "${start}" \ - "${c}")"; then - exit 1 - fi - ln_off=${res%%${RS}*} - res="${res#*${RS}}" - c="${res%%${RS}*}" - res="${res#*${RS}}" - toks="${res%%${RS}*}" - lineno=${ln_off} - wordexp="\$(${SOH}C${STX}${toks}" - wordexp="${wordexp}${ETX})" - # ")" is recognized in run_sublexer(). - ;; - esac - ;; - [@*#?$!A-Za-z0-9_-]) - if ! res="$(scan_param)"; then - exit 1 - fi - ln_off=${res%%${RS}*} - res="${res#*${RS}}" - c="${res%%${RS}*}" - res="${res#*${RS}}" - param="${res%%${RS}*}" - lineno=$((${lineno} + ${ln_off})) - wordexp="\$${param}" - ;; - esac - - printf "%d${RS}%c${RS}%s" ${ln_off} "${c}" "${wordexp}" - return 0 -} - -scan_wordexp_param_brace() -{ - local mod= - local res= - local param= - local word= - - mod=true - - lgetc - case "${c}" in - '#') - lgetc - case "${c}" in - [@*#?$!A-Za-z0-9_-]) - # String length expansion - if ! res="$(scan_param)"; then - exit 1 - fi - ln_off=${res%%${RS}*} - res="${res#*${RS}}" - c="${res%%${RS}*}" - res="${res#*${RS}}" - param="${res%%${RS}*}" - lineno=$((${lineno} + ${ln_off})) - # Disable modifications. - mod=false - ;; - *) - # Special parameter "#" - param='#' - ;; - esac - ;; - *) - if ! res="$(scan_param)"; then - exit 1 - fi - ln_off=${res%%${RS}*} - res="${res#*${RS}}" - c="${res%%${RS}*}" - res="${res#*${RS}}" - param="${res%%${RS}*}" - lineno=$((${lineno} + ${ln_off})) - ;; - esac - wordexp="\${${param}" - - # If modifications are allowed - if ${mod}; then - # Check for modifications. - mod=false - case "${c}" in - ':') - mod=true - wordexp="${wordexp}${c}" - lgetc - case "${c}" in '-'|'='|'?'|'+') - wordexp="${wordexp}${c}" - lgetc - ;; - esac - ;; - '-'|'='|'?'|'+') - mod=true - wordexp="${wordexp}${c}" - lgetc - ;; - '%') - mod=true - wordexp="${wordexp}${c}" - lgetc - case "${c}" in '%') - wordexp="${wordexp}${c}" - lgetc - ;; - esac - ;; - '#') - mod=true - wordexp="${wordexp}${c}" - lgetc - case "${c}" in '#') - wordexp="${wordexp}${c}" - lgetc - ;; - esac - ;; - esac - fi - - # If a modification was found - if ${mod}; then - # Get word. - if ! res="$(scan_word true)"; then - exit 1 - fi - ln_off=${res%%${RS}*} - res="${res#*${RS}}" - c="${res%%${RS}*}" - res="${res#*${RS}}" - word="${res%%${RS}*}" - # We must advance lineno because scan_word() was run in a - # subshell. - lineno=$((${lineno} + ${ln_off})) - wordexp="${wordexp}${word}" - dbg "param mod word: '$word'" - fi - - # Check for right brace. - case "${c}" in - '}') - wordexp="${wordexp}${c}" - lgetc - ;; - *) - synerr 'Missing "}"' - ;; - esac - - return 0 -} - -scan_param() -{ - local param= - - param='' - case "${c}" in - [@*#?$!0-]) - # Special parameter - param="${c}" - lgetc - ;; - [1-9]) - # Positional parameter - param="${param}${c}" - lgetc - while :; do - case "${c}" in [!0-9]) - break - ;; - esac - param="${param}${c}" - lgetc - done - ;; - [A-Za-z_]) - # Parameter name - param="${param}${c}" - lgetc - while :; do - case "${c}" in [!A-Za-z0-9_]) - break - ;; - esac - param="${param}${c}" - lgetc - done - ;; - *) - synerr 'Bad parameter name' - ;; - esac - - printf "%d${RS}%c${RS}%s" 0 "${c}" "${param}" - return 0 -} - -scan_wordexp_arith() -{ - local arith= - local paren_lvl= - local res= - local sub_wordexp= - - arith='' - paren_lvl=0 - while :; do - lgetc - case "${c}" in - '') - synerr 'end of file unexpected (%s)' \ - 'expecting "))"' - ;; - '(') - arith="${arith}${c}" - paren_lvl=$((${paren_lvl} + 1)) - ;; - ')') - if [ ${paren_lvl} -eq 0 ]; then - lgetc - case "${c}" in ')') - wordexp="\$((${arith}))" - lgetc - return 0 - ;; - esac - synerr 'Arithmetic expansion: ")" %s' \ - 'unexpected' - fi - arith="${arith}${c}" - paren_lvl=$((${paren_lvl} - 1)) - ;; - '$') - lgetc - if ! res=$(scan_wordexp); then - exit 1 - fi - ln_off=${res%%${RS}*} - res="${res#*${RS}}" - c="${res%%${RS}*}" - res="${res#*${RS}}" - sub_wordexp="${res%%${RS}*}" - # We must advance lineno because scan_wordexp() - # was run in a subshell. - lineno=$((${lineno} + ${ln_off})) - arith="${arith}${sub_wordexp}" - ;; - *) - arith="${arith}${c}" - ;; - esac - done -} - -# -# Interface -# - -# Check the current token. If it matches, add it to the syntax array. -accept() -{ - local t="${1}" - local rw= - - dbg "looking for $t, current tok ${tok%%${US}*}" - case "${t}" in - T_IF|T_THEN|T_ELSE|T_ELIF|T_FI|\ - T_DO|T_DONE|T_CASE|T_ESAC|T_WHILE|T_UNTIL|\ - T_FOR|T_LBRACE|T_RBRACE|T_BANG|T_IN) - dbg "looking for reserved word $t, have '$tok'" - if ! [ "x${tok%%${US}*}" = "x${t}" ]; then - # Reserved words are recognized as literal - # T_WORDs. - if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then - return 1 - fi - # T_WORD data unit must match reserved word - # exactly. - if ! [ "x${tok#T_WORD${US}}" = \ - "x$(toktext "${t}")" ]; then - return 1 - fi - # If the token matches the reserved word, - # replace it with the reserved word token. - tok="${t}" - fi - ;; - T_NAME) - # Names are recognized as literal T_WORDs. - if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then - return 1 - fi - # Validate name. - case "${tok%%${US}*}" in - [A-Za-z_][0-9A-Za-z_]*) - ;; - *) - return 1 - ;; - esac - tok="T_NAME${US}${tok#T_WORD${US}}" - ;; - T_FNAME) - # Function names are recognized as literal T_WORDs. - if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then - return 1 - fi - # Validate name. - case "${tok%%${US}*}" in - [A-Za-z_][0-9A-Za-z_]*) - ;; - *) - return 1 - ;; - esac - # Verify that the function name doesn't match any - # reserved words. - for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \ - T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \ - T_LBRACE T_RBRACE T_BANG T_IN; do - if [ "x${tok#T_WORD${US}}" = \ - "x$(toktext "${rw}")" ]; then - tok="${rw}" - return 1 - fi - done - tok="T_FNAME${US}${tok#T_WORD${US}}" - ;; - T_CMDNAME) - # The first word of a simple command is to be checked - # for reserved words. - if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then - return 1 - fi - # Verify that the word doesn't match any reserved words. - for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \ - T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \ - T_LBRACE T_RBRACE T_BANG T_IN; do - if [ "x${tok#T_WORD${US}}" = \ - "x$(toktext "${rw}")" ]; then - tok="${rw}" - return 1 - fi - done - ;; - *) - if ! [ "x${tok%%${US}*}" = "x${t}" ]; then - return 1 - fi - ;; - esac - - dbg "accept $t" - tokens="${tokens}${tok}${RS}" - next - return 0 -} - -expect() -{ - local t="${1}" - - if accept "${t}"; then - return 0 - else - synexp "${t}" - fi -} - -# Called by the lexer, not the parser -run_sublexer() -{ - local fn="${1}" - local ln="${2}" - local st="${3}" - local ch="${4}" - shift 4 - - # Initialize global variables. - fname="${fn}" - lineno=${ln} - start="${st}" - here_queue='' - here_awaiting_end=false - here_awaiting_word=false - tokens='' - - c="${ch}" - next - - #dbg=true - # If this returns (does not exit), there are no errors. - ${start} - case "${tok%${US}*}" in - T_RPAREN) - ;; - *) - synerr 'Missing ")"' - ;; - esac - - printf "%d${RS}%c${RS}%s" ${lineno} "${c}" "${tokens}" - return 0 -} - -run_lexer() -{ - local fn="${1}" - local st="${2}" - shift 2 - - # Initialize global variables. - fname="${fn}" - lineno=1 - start="${st}" - here_queue='' - here_awaiting_end=false - here_awaiting_word=false - tokens='' - - # Read the first character and recognize the first token. - lgetc - next - - # If this returns (does not exit), there are no errors. - ${start} - if ! accept T_EOF; then - synexp '' - fi - - # Return the tokens. - printf '%s' "${tokens}" - - return 0 -} diff --git a/parsing/parse.sh b/parsing/parse.sh deleted file mode 100644 index 199005a..0000000 --- a/parsing/parse.sh +++ /dev/null @@ -1,676 +0,0 @@ -SOH="$(printf '\001.')"; SOH="${SOH%.}" -STX="$(printf '\002.')"; STX="${STX%.}" -ETX="$(printf '\003.')"; ETX="${ETX%.}" - HT="$(printf '\t.')"; HT="${HT%.}" - LF="$(printf '\n.')"; LF="${LF%.}" - RS="$(printf '\036.')"; RS="${RS%.}" - US="$(printf '\037.')"; US="${US%.}" - -dbg=true -dbg=false - -dbg() -{ - if ${dbg}; then - printf 'DEBUG: %s\n' "${@}" >&2 - fi -} - -ptrace=false - -ptrace_begn() -{ - local fn="${1}" - shift 1 - - if ${ptrace}; then - printf 'TRACE: BEGN %s()\n' "${fn}" >&2 - fi -} - -ptrace_pass() -{ - local fn="${1}" - shift 1 - - if ${ptrace}; then - printf 'TRACE: PASS %s()\n' "${fn}" >&2 - fi -} - -ptrace_fail() -{ - local fn="${1}" - shift 1 - - if ${ptrace}; then - printf 'TRACE: FAIL %s()\n' "${fn}" >&2 - fi -} - -. ./tokens.sh -. ./lexer.sh -. ./codegen.sh - -complete_command() -{ - if list; then - separator - return 0 - fi - # Unexpected EOF - synexp '' -} - -list() -{ - ptrace_begn list - if and_or; then - while separator && and_or; do - : - done - ptrace_pass list - return 0 - fi - ptrace_fail list - return 1 -} - -and_or() -{ - ptrace_begn and_or - if pipeline; then - while accept T_AND_IF || accept T_OR_IF; do - if ! linebreak || ! pipeline; then - ptrace_fail and_or - return 1 - fi - done - ptrace_pass and_or - return 0 - fi - ptrace_fail and_or - return 1 -} - -pipeline() -{ - ptrace_begn pipeline - accept T_BANG - if pipe_sequence; then - ptrace_pass pipeline - return 0 - fi - ptrace_fail pipeline - return 1 -} - -pipe_sequence() -{ - ptrace_begn pipe_sequence - if command; then - while accept T_PIPE; do - if ! linebreak || ! command; then - ptrace_fail pipe_sequence - return 1 - fi - done - ptrace_pass pipe_sequence - return 0 - fi - ptrace_fail pipe_sequence - return 1 -} - -command() -{ - ptrace_begn command - if simple_command; then - ptrace_pass command - return 0 - elif compound_command; then - redirect_list - ptrace_pass command - return 0 - fi - ptrace_fail command - return 1 -} - -compound_command() -{ - ptrace_begn compound_command - if brace_group; then - ptrace_pass compound_command - return 0 - elif subshell; then - ptrace_pass compound_command - return 0 - elif for_clause; then - ptrace_pass compound_command - return 0 - elif case_clause; then - ptrace_pass compound_command - return 0 - elif if_clause; then - ptrace_pass compound_command - return 0 - elif while_clause; then - ptrace_pass compound_command - return 0 - elif until_clause; then - ptrace_pass compound_command - return 0 - fi - ptrace_fail compound_command - return 1 -} - -subshell() -{ - ptrace_begn subshell - if accept T_LPAREN && compound_list && expect T_RPAREN; then - ptrace_pass subshell - return 0 - fi - ptrace_fail subshell - return 1 -} - -compound_list() -{ - ptrace_begn compound_list - newline_list - if term; then - separator - ptrace_pass compound_list - return 0 - fi - ptrace_fail compound_list - return 1 -} - -term() -{ - ptrace_begn term - if and_or; then - while separator; do - and_or - done - ptrace_pass term - return 0 - fi - ptrace_fail term - return 1 -} - -for_clause() -{ - ptrace_begn for_clause - if accept T_FOR; then - if expect T_NAME && linebreak; then - if accept T_IN; then - wordlist - if ! sequential_sep; then - ptrace_fail for_clause - return 1 - fi - fi - if do_group; then - ptrace_pass for_clause - return 0 - fi - fi - fi - ptrace_fail for_clause - return 1 -} - -wordlist() -{ - ptrace_begn wordlist - if accept T_WORD; then - while accept T_WORD; do :; done - ptrace_pass wordlist - return 0 - fi - ptrace_fail wordlist - return 1 -} - -case_clause() -{ - if accept T_CASE; then - if expect T_WORD && linebreak && expect T_IN && linebreak; then - case_list || case_list_ns - expect T_ESAC - return 0 - fi - fi - return 1 -} - -case_list_ns() -{ - if case_list && case_item_ns; then - return 0 - elif case_item_ns; then - return 0 - fi - return 1 -} - -case_list() -{ - if case_item; then - while case_item; do - : - done - return 0 - fi - return 1 -} - -case_item_ns() -{ - accept T_LPAREN - if pattern && expect RPAREN; then - compound_list - if linebreak; then - return 0 - fi - fi - return 1 -} - -case_item() -{ - accept T_LPAREN - if pattern && expect T_RPAREN; then - if compound_list || linebreak; then - if expect T_DSEMI && linebreak; then - return 0 - fi - fi - fi - return 1 -} - -pattern() -{ - if accept T_CMDNAME; then - while accept T_PIPE; do - expect T_WORD - done - return 0 - fi - return 1 -} - -if_clause() -{ - if accept T_IF; then - if compound_list && expect T_THEN && compound_list; then - else_part - expect T_FI - return 0 - fi - fi - return 1 -} - -else_part() -{ - while accept T_ELIF; do - if compound_list && expect T_THEN && compound_list; then - continue - fi - return 1 - done - if accept T_ELSE; then - if compound_list; then - return 0 - fi - fi - return 1 -} - -while_clause() -{ - if accept T_WHILE; then - if compound_list && do_group; then - return 0 - fi - fi - return 1 -} - -until_clause() -{ - if accept T_UNTIL; then - if compound_list && do_group; then - return 0 - fi - fi - return 1 -} - -function_body() -{ - ptrace_begn function_body - if compound_command; then - redirect_list - ptrace_pass function_body - return 0 - fi - ptrace_fail function_body - return 1 -} - -brace_group() -{ - ptrace_begn brace_group - if accept T_LBRACE && compound_list && expect T_RBRACE; then - ptrace_pass brace_group - return 0 - fi - ptrace_fail brace_group - return 1 -} - -do_group() -{ - ptrace_begn do_group - if accept T_DO && compound_list && expect T_DONE; then - ptrace_pass do_group - return 0 - fi - ptrace_fail do_group - return 1 -} - -simple_command() -{ - ptrace_begn simple_command - if cmd_prefix; then - if cmd_word; then - cmd_suffix - fi - ptrace_pass simple_command - return 0 - elif accept T_FNAME; then - if accept T_LPAREN; then - expect T_RPAREN - if linebreak && function_body; then - ptrace_pass simple_command - return 0 - fi - else - cmd_suffix - ptrace_pass simple_command - return 0 - fi - elif cmd_name; then - cmd_suffix - ptrace_pass simple_command - return 0 - fi - ptrace_fail simple_command - return 1 -} - -cmd_name() -{ - ptrace_begn cmd_name - # TODO: Assignment - if accept T_CMDNAME; then - ptrace_pass cmd_name - return 0 - fi - ptrace_fail cmd_name - return 1 -} - -cmd_word() -{ - ptrace_begn cmd_word - # TODO: Assignment - if accept T_WORD; then - ptrace_pass cmd_word - return 0 - fi - ptrace_fail cmd_word - return 1 -} - -cmd_prefix() -{ - ptrace_begn cmd_prefix - if io_redirect || accept T_ASSIGNMENT_WORD; then - while io_redirect || accept T_ASSIGNMENT_WORD; do - : - done - ptrace_pass cmd_prefix - return 0 - fi - ptrace_fail cmd_prefix - return 1 -} - -cmd_suffix() -{ - ptrace_begn cmd_suffix - if io_redirect || accept T_WORD; then - while io_redirect || accept T_WORD; do - : - done - ptrace_pass cmd_suffix - return 0 - fi - ptrace_fail cmd_suffix - return 1 -} - -redirect_list() -{ - ptrace_begn redirect_list - if io_redirect; then - while io_redirect; do - : - done - ptrace_pass redirect_list - return 0 - fi - ptrace_fail redirect_list - return 1 -} - -io_redirect() -{ - ptrace_begn io_redirect - if io_file || io_here; then - ptrace_pass io_redirect - return 0 - fi - ptrace_fail io_redirect - return 1 -} - -io_file() -{ - if accept T_LESS || accept T_LESSAND || accept T_GREAT || \ - accept T_GREATAND || accept T_DGREAT || \ - accept T_LESSGREAT || accept T_CLOBBER; then - if filename; then - return 0 - fi - fi - return 1 -} - -filename() -{ - if accept T_WORD; then - return 0 - fi - return 1 -} - -io_here() -{ - if accept T_DLESS || accept T_DLESSDASH; then - if here_end; then - return 0 - fi - fi - return 1 -} - -here_end() -{ - if accept T_WORD; then - return 0 - fi - return 1 -} - -newline_list() -{ - if accept T_NEWLINE; then - while accept T_NEWLINE; do - : - done - return 0 - fi - return 1 -} - -linebreak() -{ - newline_list - return 0 -} - -separator_op() -{ - if accept T_AND || accept T_SEMI; then - return 0 - fi - return 1 -} - -separator() -{ - if separator_op && linebreak; then - return 0 - elif newline_list; then - return 0 - fi - return 1 -} - -sequential_sep() -{ - ptrace_begn sequential_sep - if accept T_SEMI; then - if linebreak; then - ptrace_pass sequential_sep - return 0 - fi - elif newline_list; then - ptrace_pass sequential_sep - return 0 - fi - ptrace_fail sequential_sep - return 1 -} - -parse() -{ - local fn="${1}" - shift 1 - - if run_lexer "${fn}" complete_command; then - return 0 - fi - return 1 -} - -try() -{ - local tokens= - local t= - - printf 'Trying script:\n' - printf '\t%s\n' "${@}" - if tokens="$(printf '%s\n' "${@}" | parse -)"; then - printf 'Tokens: %s\n' "${tokens}" | sed " - s/${SOH}/<SOH>/g; s/${STX}/<STX>/g; s/${ETX}/<ETX>/g; - s/${RS}/<RS>/g; s/${US}/<US>/g; - " - IFS="${RS}" - for t in ${tokens}; do - printf 'Token: %s\n' "$(tokname "${t}")" - case "${t%${US}*}" in T_NAME|T_FNAME|T_CMDNAME|T_WORD) - printf ' "%s"\n' "${t#*${US}}" - ;; - esac - done - printf 'Generated code:\n' - IFS="${LF}" - printf '\t%s\n' $(codegen "${tokens}") - unset IFS - else - printf 'FAIL\n' - fi - printf '\n\n' -} - -#try '"foo bar" && $baz || qux' '${quux%uux quuux' -#try '"foo bar" && $baz || qux' '${quux%uux } quuux' -#try 'foo ${bar}' -#try 'foo ${#bar}' -#try 'foo ${bar#baz}' -#try 'foo ${#bar#}' -#try 'foo ${^}' -#try 'foo `bar`' -#try 'foo &&' -#try '{ foo; }' -#try '( foo )' -#try 'for i in 1 2 3; do stuff; done' -#try 'if foo; then bar; fi' -#try 'if foo; then bar; elif baz; then qux; else quux; fi' -#try 'if ; then ; fi' -#try 'while foo; do bar; done' -#try 'while ; do ; done' -#try 'foo(){ bar; }' -#try 'case foo in bar) baz;; (qux) quux;; quux);; esac' -#try 'foo bar ( baz )' -#try 'foo $(bar)' -#try 'foo $(bar); baz' -#try 'foo $(bar)' 'baz' -#try 'foo $(bar) baz' -#try 'foo$(bar$(baz))qux' -#try 'foo $((1 + 1))' -#try '$((1 + 1))' -#try '$((1 + (1 + 1)))' -#try '$((1 + $(foo) + 1))' -#try '$((1' -#try 'foo <<EOF' 'bar' 'EOF' -#try 'foo <<-EOF' "${HT}bar" "${HT}EOF" -#try 'foo <<EOF' '$(bar)' 'EOF' -#try 'foo <<E"O"F' '$(bar)' 'EOF' -#try 'foo <<"EOF"' '$(bar)' 'EOF' -#try 'foo <<E\OF' '$(bar)' 'EOF' -#try 'foo <<\EOF' '$(bar)' 'EOF' -#try 'foo <<EOF1; bar <<EOF2' 'baz' 'EOF1' 'qux' 'EOF2' -#try '\foo' -#try '"foo bar" baz' -#try '"foo' -#try 'foo\" bar' -#try 'foo\' -#try "foo'" -#try 'foo\' 'bar' -#try 'v=foo' -try 'if &&' -try 'if true; do' diff --git a/parsing/tokens.sh b/parsing/tokens.sh deleted file mode 100644 index 68db85d..0000000 --- a/parsing/tokens.sh +++ /dev/null @@ -1,119 +0,0 @@ -tokname() -{ - local t="${1}" - shift 1 - local n= - - case "${t%${US}*}" in - # Operators - T_EOF) n='end of file';; - T_NEWLINE) n='newline';; - T_AND) n='"&"';; - T_SEMI) n='";"';; - T_AND_IF) n='"&&"';; - T_OR_IF) n='"||"';; - T_DSEMI) n='";;"';; - T_LESS) n='"<"';; - T_GREAT) n='">"';; - T_DLESS) n='"<<"';; - T_DGREAT) n='">>"';; - T_LESS) n='"<"';; - T_LESSAND) n='"<&"';; - T_GREAT) n='">"';; - T_GREATAND) n='">&"';; - T_LESSGREAT) n='"<>"';; - T_DLESSDASH) n='"<<-"';; - T_CLOBBER) n='">|"';; - T_PIPE) n='"|"';; - T_LPAREN) n='"("';; - T_RPAREN) n='")"';; - # Reserved words - T_IF) n='"if"';; - T_THEN) n='"then"';; - T_ELSE) n='"else"';; - T_ELIF) n='"elif"';; - T_FI) n='"fi"';; - T_DO) n='"do"';; - T_DONE) n='"done"';; - T_CASE) n='"case"';; - T_ESAC) n='"esac"';; - T_WHILE) n='"while"';; - T_UNTIL) n='"until"';; - T_FOR) n='"for"';; - T_LBRACE) n='"{"';; - T_RBRACE) n='"}"';; - T_BANG) n='"!"';; - T_IN) n='"in"';; - # Special symbols - T_NAME) n='parameter name';; - T_FNAME) n='function name';; - T_CMDNAME) n='command name';; - T_IO_NUMBER) n='I/O number';; - T_WORD) n='word';; - T_ASSIGNMENT_WORD) n='assignment word';; - # Unknown - *) n='unknown token';; - esac - - printf '%s' "${n}" -} - -toktext() -{ - local t="${1}" - shift 1 - local n= - - case "${t%${US}*}" in - # Operators - T_EOF) n='';; - T_NEWLINE) n="${LF}";; - T_AND) n='&';; - T_SEMI) n=';';; - T_AND_IF) n='&&';; - T_OR_IF) n='||';; - T_DSEMI) n=';;';; - T_LESS) n='<';; - T_GREAT) n='>';; - T_DLESS) n='<<';; - T_DGREAT) n='>>';; - T_LESS) n='<';; - T_LESSAND) n='<&';; - T_GREAT) n='>';; - T_GREATAND) n='>&';; - T_LESSGREAT) n='<>';; - T_DLESSDASH) n='<<-';; - T_CLOBBER) n='>|';; - T_PIPE) n='|';; - T_LPAREN) n='(';; - T_RPAREN) n=')';; - # Reserved words - T_IF) n='if';; - T_THEN) n='then';; - T_ELSE) n='else';; - T_ELIF) n='elif';; - T_FI) n='fi';; - T_DO) n='do';; - T_DONE) n='done';; - T_CASE) n='case';; - T_ESAC) n='esac';; - T_WHILE) n='while';; - T_UNTIL) n='until';; - T_FOR) n='for';; - T_LBRACE) n='{';; - T_RBRACE) n='}';; - T_BANG) n='!';; - T_IN) n='in';; - # Special symbols - T_NAME) n="${t#*${US}}";; - T_FNAME) n="${t#*${US}}";; - T_CMDNAME) n="${t#*${US}}";; - T_IO_NUMBER) n="${t#*${US}}";; - T_WORD) n="${t#*${US}}";; - T_ASSIGNMENT_WORD) n="${t#*${US}}";; - # Unknown - *) n='';; - esac - - printf '%s' "${n}" -} |