From c55bf702cb737865884a10993896acfb0c69f09d Mon Sep 17 00:00:00 2001 From: P. J. McDermott Date: Sat, 20 Feb 2016 15:33:17 -0500 Subject: Organize lexer functions --- (limited to 'parsing') diff --git a/parsing/lexer.sh b/parsing/lexer.sh index a3ab762..292da43 100644 --- a/parsing/lexer.sh +++ b/parsing/lexer.sh @@ -5,12 +5,64 @@ c= tok= tokens= +# +# Error handling (used by scanning and interface functions) +# + +error() +{ + local fmt="${1}" + shift 1 + + case "${fname}" in + '-') + printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2 + ;; + *) + printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2 + ;; + esac + + # The parser and lexer run in a subshell, so this just returns up to the + # caller like an exception. + exit 1 +} + +synexp() +{ + local t="${1}" + shift 1 + + if [ "x${t}" = 'x' ]; then + synerr '%s unexpected' "$(tokname "${tok}")" + else + synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \ + "$(tokname "${t}")" + fi +} + +synerr() +{ + local fmt="${1}" + shift 1 + + error "Syntax error: ${fmt}" "${@}" +} + +# +# Input reading +# + pgetc() { c="$(dd bs=1 count=1 2>/dev/null; printf '.')" c="${c%.}" } +# +# Token recognition +# + next() { while :; do @@ -177,6 +229,10 @@ next_word() tok="T_WORD${US}${word}" } +# +# Token scanning +# + scan_word() { local in_param="${1}" @@ -266,7 +322,7 @@ scan_word() pgetc done - printf "%d${RS}%s${RS}%s" ${lines} "${c}" "${word}" + printf "%d${RS}%c${RS}%s" ${lines} "${c}" "${word}" } scan_wordexp() @@ -428,7 +484,7 @@ scan_wordexp() ;; esac - printf "%d${RS}%s${RS}%s" ${ln_off} "${c}" "${wordexp}" + printf "%d${RS}%c${RS}%s" ${ln_off} "${c}" "${wordexp}" return 0 } @@ -474,10 +530,14 @@ scan_param() ;; esac - printf "%d${RS}%s${RS}%s" 0 "${c}" "${param}" + printf "%d${RS}%c${RS}%s" 0 "${c}" "${param}" return 0 } +# +# Interface +# + # Check the current token. If it matches, add it to the syntax array. accept() { @@ -586,29 +646,10 @@ expect() return 0 else synexp "${t}" - return 1 fi } -error() -{ - local fmt="${1}" - shift 1 - - case "${fname}" in - '-') - printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2 - ;; - *) - printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2 - ;; - esac - - # The parser runs in a subshell, so this just returns up to the caller - # like an exception. - exit 1 -} - +# Called by the lexer, not the parser run_sublexer() { local fn="${1}" @@ -668,24 +709,3 @@ run_lexer() return 0 } - -synexp() -{ - local t="${1}" - shift 1 - - if [ "x${t}" = 'x' ]; then - synerr '%s unexpected' "$(tokname "${tok}")" - else - synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \ - "$(tokname "${t}")" - fi -} - -synerr() -{ - local fmt="${1}" - shift 1 - - error "Syntax error: ${fmt}" "${@}" -} -- cgit v0.9.1