From b77d7967e9cf00002c6bc9fd1bbcf891a197115e Mon Sep 17 00:00:00 2001 From: P. J. McDermott Date: Fri, 19 Feb 2016 17:36:43 -0500 Subject: Add error handling to lexer --- (limited to 'parsing/lexer.sh') diff --git a/parsing/lexer.sh b/parsing/lexer.sh index 3081a79..1f40138 100644 --- a/parsing/lexer.sh +++ b/parsing/lexer.sh @@ -13,7 +13,7 @@ pgetc() next() { while :; do - echo "parsing char '$c'" >&2 + dbg "parsing char '$c' at lineno $lineno" case "${c}" in '') pgetc @@ -143,7 +143,9 @@ next_word() local lineno_offset= local word= - res="$(scan_word false)" + if ! res="$(scan_word false)"; then + exit 1 + fi lineno_offset=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" @@ -169,7 +171,7 @@ scan_word() quoted=false lines=0 while :; do - echo "parsing word char '$c'" >&2 + dbg "parsing word char '$c' at lineno $lineno" case "${c}" in '') break @@ -190,7 +192,9 @@ scan_word() ;; '$') pgetc - res=$(scan_wordexp) + if ! res=$(scan_wordexp); then + exit 1 + fi lineno_offset=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" @@ -205,6 +209,11 @@ scan_word() # below. continue ;; + '`') + synerr 'Backquoted (old-style) %s' \ + 'command substitution not supported' + break + ;; \') word="${word}${c}" while :; do @@ -243,16 +252,18 @@ scan_word() scan_wordexp() { local wordexp= - local lineno_offset= + local ln_off= local mod= local res= + local param= local word= wordexp='' - lineno_offset=0 + ln_off=0 case "${c}" in '{') # Parameter expansion brace + mod=true pgetc case "${c}" in '#') @@ -261,7 +272,16 @@ scan_wordexp() [@*#?$!A-Za-z0-9_-]) # String length # expansion - next_param + res="$(scan_param)" + ln_off=${res%%${RS}*} + res="${res#*${RS}}" + c="${res%%${RS}*}" + res="${res#*${RS}}" + param="${res%%${RS}*}" + lineno=$((${lineno} + \ + ${ln_off})) + # Disable modifications. + mod=false ;; *) # Special parameter "#" @@ -270,62 +290,72 @@ scan_wordexp() esac ;; *) - next_param + if ! res="$(scan_param)"; then + exit 1 + fi + ln_off=${res%%${RS}*} + res="${res#*${RS}}" + c="${res%%${RS}*}" + res="${res#*${RS}}" + param="${res%%${RS}*}" + lineno=$((${lineno} + ${ln_off})) ;; esac wordexp="\${${param}" - # Check for modifications - mod=false - case "${c}" in - ':') - mod=true - wordexp="${wordexp}${c}" - pgetc - case "${c}" in '-'|'='|'?'|'+') + if ${mod}; then + # Check for modifications + mod=false + case "${c}" in + ':') + mod=true wordexp="${wordexp}${c}" pgetc + case "${c}" in '-'|'='|'?'|'+') + wordexp="${wordexp}${c}" + pgetc ;; - esac - ;; - '-'|'='|'?'|'+') - mod=true - wordexp="${wordexp}${c}" - pgetc - ;; - '%') - mod=true - wordexp="${wordexp}${c}" - pgetc - case "${c}" in '%') + esac + ;; + '-'|'='|'?'|'+') + mod=true wordexp="${wordexp}${c}" pgetc ;; - esac - ;; - '#') - mod=true - wordexp="${wordexp}${c}" - pgetc - case "${c}" in '#') + '%') + mod=true wordexp="${wordexp}${c}" pgetc + case "${c}" in '%') + wordexp="${wordexp}${c}" + pgetc + ;; + esac ;; - esac - ;; - esac + '#') + mod=true + wordexp="${wordexp}${c}" + pgetc + case "${c}" in '#') + wordexp="${wordexp}${c}" + pgetc + ;; + esac + ;; + esac + fi if ${mod}; then # Get word. res="$(scan_word true)" - lineno_offset=${res%%${RS}*} + ln_off=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" word="${res%%${RS}*}" # We must advance lineno because scan_word() was # run in a subshell. - lineno=$((${lineno} + ${lineno_offset})) + lineno=$((${lineno} + ${ln_off})) wordexp="${wordexp}${word}" - echo "param mod word: '$word'" >&2 + dbg "param mod word: '$word'" fi # Check for right brace. case "${c}" in @@ -339,19 +369,28 @@ scan_wordexp() esac ;; '(') + # Arithmetic expansion or command substitution ;; [@*#?$!A-Za-z0-9_-]) - next_param + res="$(scan_param)" + ln_off=${res%%${RS}*} + res="${res#*${RS}}" + c="${res%%${RS}*}" + res="${res#*${RS}}" + param="${res%%${RS}*}" + lineno=$((${lineno} + ${ln_off})) wordexp="\$${param}" ;; esac - printf "%d${RS}%s${RS}%s" ${lineno_offset} "${c}" "${wordexp}" - + printf "%d${RS}%s${RS}%s" ${ln_off} "${c}" "${wordexp}" + return 0 } -next_param() +scan_param() { + local param= + param='' case "${c}" in [@*#?$!0-]) @@ -385,7 +424,13 @@ next_param() pgetc done ;; + *) + synerr 'Bad parameter name' + ;; esac + + printf "%d${RS}%s${RS}%s" 0 "${c}" "${param}" + return 0 } # Check the current token. If it matches, add it to the syntax array. @@ -394,7 +439,7 @@ accept() local t="${1}" if [ "x${tok%%${US}*}" = "x${t}" ]; then - echo "accept $t" >&2 + dbg "accept $t" tokens="${tokens}${tok}${RS}" next return 0 @@ -402,6 +447,18 @@ accept() return 1 } +expect() +{ + local t="${1}" + + if accept "${t}"; then + return 0 + else + synexp "${t}" + return 1 + fi +} + error() { local fmt="${1}" @@ -415,6 +472,10 @@ error() printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2 ;; esac + + # The parser runs in a subshell, so this just returns up to the caller + # like an exception. + exit 1 } init_lexer() -- cgit v0.9.1