From b77d7967e9cf00002c6bc9fd1bbcf891a197115e Mon Sep 17 00:00:00 2001 From: P. J. McDermott Date: Fri, 19 Feb 2016 17:36:43 -0500 Subject: Add error handling to lexer --- (limited to 'parsing') diff --git a/parsing/lexer.sh b/parsing/lexer.sh index 3081a79..1f40138 100644 --- a/parsing/lexer.sh +++ b/parsing/lexer.sh @@ -13,7 +13,7 @@ pgetc() next() { while :; do - echo "parsing char '$c'" >&2 + dbg "parsing char '$c' at lineno $lineno" case "${c}" in '') pgetc @@ -143,7 +143,9 @@ next_word() local lineno_offset= local word= - res="$(scan_word false)" + if ! res="$(scan_word false)"; then + exit 1 + fi lineno_offset=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" @@ -169,7 +171,7 @@ scan_word() quoted=false lines=0 while :; do - echo "parsing word char '$c'" >&2 + dbg "parsing word char '$c' at lineno $lineno" case "${c}" in '') break @@ -190,7 +192,9 @@ scan_word() ;; '$') pgetc - res=$(scan_wordexp) + if ! res=$(scan_wordexp); then + exit 1 + fi lineno_offset=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" @@ -205,6 +209,11 @@ scan_word() # below. continue ;; + '`') + synerr 'Backquoted (old-style) %s' \ + 'command substitution not supported' + break + ;; \') word="${word}${c}" while :; do @@ -243,16 +252,18 @@ scan_word() scan_wordexp() { local wordexp= - local lineno_offset= + local ln_off= local mod= local res= + local param= local word= wordexp='' - lineno_offset=0 + ln_off=0 case "${c}" in '{') # Parameter expansion brace + mod=true pgetc case "${c}" in '#') @@ -261,7 +272,16 @@ scan_wordexp() [@*#?$!A-Za-z0-9_-]) # String length # expansion - next_param + res="$(scan_param)" + ln_off=${res%%${RS}*} + res="${res#*${RS}}" + c="${res%%${RS}*}" + res="${res#*${RS}}" + param="${res%%${RS}*}" + lineno=$((${lineno} + \ + ${ln_off})) + # Disable modifications. + mod=false ;; *) # Special parameter "#" @@ -270,62 +290,72 @@ scan_wordexp() esac ;; *) - next_param + if ! res="$(scan_param)"; then + exit 1 + fi + ln_off=${res%%${RS}*} + res="${res#*${RS}}" + c="${res%%${RS}*}" + res="${res#*${RS}}" + param="${res%%${RS}*}" + lineno=$((${lineno} + ${ln_off})) ;; esac wordexp="\${${param}" - # Check for modifications - mod=false - case "${c}" in - ':') - mod=true - wordexp="${wordexp}${c}" - pgetc - case "${c}" in '-'|'='|'?'|'+') + if ${mod}; then + # Check for modifications + mod=false + case "${c}" in + ':') + mod=true wordexp="${wordexp}${c}" pgetc + case "${c}" in '-'|'='|'?'|'+') + wordexp="${wordexp}${c}" + pgetc ;; - esac - ;; - '-'|'='|'?'|'+') - mod=true - wordexp="${wordexp}${c}" - pgetc - ;; - '%') - mod=true - wordexp="${wordexp}${c}" - pgetc - case "${c}" in '%') + esac + ;; + '-'|'='|'?'|'+') + mod=true wordexp="${wordexp}${c}" pgetc ;; - esac - ;; - '#') - mod=true - wordexp="${wordexp}${c}" - pgetc - case "${c}" in '#') + '%') + mod=true wordexp="${wordexp}${c}" pgetc + case "${c}" in '%') + wordexp="${wordexp}${c}" + pgetc + ;; + esac ;; - esac - ;; - esac + '#') + mod=true + wordexp="${wordexp}${c}" + pgetc + case "${c}" in '#') + wordexp="${wordexp}${c}" + pgetc + ;; + esac + ;; + esac + fi if ${mod}; then # Get word. res="$(scan_word true)" - lineno_offset=${res%%${RS}*} + ln_off=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" word="${res%%${RS}*}" # We must advance lineno because scan_word() was # run in a subshell. - lineno=$((${lineno} + ${lineno_offset})) + lineno=$((${lineno} + ${ln_off})) wordexp="${wordexp}${word}" - echo "param mod word: '$word'" >&2 + dbg "param mod word: '$word'" fi # Check for right brace. case "${c}" in @@ -339,19 +369,28 @@ scan_wordexp() esac ;; '(') + # Arithmetic expansion or command substitution ;; [@*#?$!A-Za-z0-9_-]) - next_param + res="$(scan_param)" + ln_off=${res%%${RS}*} + res="${res#*${RS}}" + c="${res%%${RS}*}" + res="${res#*${RS}}" + param="${res%%${RS}*}" + lineno=$((${lineno} + ${ln_off})) wordexp="\$${param}" ;; esac - printf "%d${RS}%s${RS}%s" ${lineno_offset} "${c}" "${wordexp}" - + printf "%d${RS}%s${RS}%s" ${ln_off} "${c}" "${wordexp}" + return 0 } -next_param() +scan_param() { + local param= + param='' case "${c}" in [@*#?$!0-]) @@ -385,7 +424,13 @@ next_param() pgetc done ;; + *) + synerr 'Bad parameter name' + ;; esac + + printf "%d${RS}%s${RS}%s" 0 "${c}" "${param}" + return 0 } # Check the current token. If it matches, add it to the syntax array. @@ -394,7 +439,7 @@ accept() local t="${1}" if [ "x${tok%%${US}*}" = "x${t}" ]; then - echo "accept $t" >&2 + dbg "accept $t" tokens="${tokens}${tok}${RS}" next return 0 @@ -402,6 +447,18 @@ accept() return 1 } +expect() +{ + local t="${1}" + + if accept "${t}"; then + return 0 + else + synexp "${t}" + return 1 + fi +} + error() { local fmt="${1}" @@ -415,6 +472,10 @@ error() printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2 ;; esac + + # The parser runs in a subshell, so this just returns up to the caller + # like an exception. + exit 1 } init_lexer() diff --git a/parsing/parse.sh b/parsing/parse.sh index 2ac4215..d85a42e 100644 --- a/parsing/parse.sh +++ b/parsing/parse.sh @@ -3,6 +3,15 @@ LF="$(printf '\n.')"; LF="${LF%.}" RS="$(printf '\036.')"; RS="${RS%.}" US="$(printf '\037.')"; US="${US%.}" +dbg() +{ + dbg=true + dbg=false + if ${dbg}; then + printf 'DEBUG: %s\n' "${@}" >&2 + fi +} + . ./tokens.sh . ./lexer.sh @@ -109,25 +118,43 @@ parse() shift 1 init_lexer "${fn}" + + # If this returns (does not exit), there are no errors. while complete_command; do :; done - if :; then # TODO: Test for EOF or errors - get_tokens - return 0 + + get_tokens + + return 0 +} + +try() +{ + local tokens= + local t= + + printf 'Trying script:\n' + printf '\t%s\n' "${@}" + if tokens="$(printf '%s\n' "${@}" | parse -)"; then + IFS="${RS}" + for t in ${tokens}; do + printf 'Token: %s\n' "$(tokname "${t}")" + case "${t%${US}*}" in T_WORD) + printf ' "%s"\n' "${t#T_WORD${US}}" + ;; + esac + done + unset IFS + else + printf 'FAIL\n' fi - return 1 + printf '\n\n' } -if tokens="$(printf '%s\n' '"foo bar" && $baz || qux' '${quux%uux } quuux' | \ - parse -)"; then - IFS="${RS}" - for t in ${tokens}; do - printf 'Token: %s\n' "$(tokname "${t}")" - case "${t%${US}*}" in T_WORD) - printf ' "%s"\n' "${t#T_WORD${US}}" - ;; - esac - done - unset IFS -else - echo FAIL -fi +try '"foo bar" && $baz || qux' '${quux%uux quuux' +try '"foo bar" && $baz || qux' '${quux%uux } quuux' +try 'foo ${bar}' +try 'foo ${#bar}' +try 'foo ${bar#baz}' +try 'foo ${#bar#}' +try 'foo ${^}' +try 'foo `bar`' -- cgit v0.9.1