From 58fded07a19e6100e307bba9de8e72854b57f1c4 Mon Sep 17 00:00:00 2001 From: P. J. McDermott Date: Sun, 28 Feb 2016 01:47:19 -0500 Subject: Merge branch 'feature/no-dd-lexer' --- (limited to 'eshtrans/frontend/lexer.esh') diff --git a/eshtrans/frontend/lexer.esh b/eshtrans/frontend/lexer.esh index a9aaa6d..8bba0e0 100644 --- a/eshtrans/frontend/lexer.esh +++ b/eshtrans/frontend/lexer.esh @@ -18,7 +18,7 @@ # along with the Eggshell Compiler. If not, see # . -#dbg=false +dbg=false fname= lineno= @@ -31,12 +31,12 @@ here_awaiting_end= here_awaiting_word= tok= -#dbg() -#{ -# if ${dbg}; then -# printf 'DEBUG: %s\n' "${@}" >&2 -# fi -#} +dbg() +{ + if ${dbg}; then + printf 'DEBUG: %s\n' "${@}" >&2 + fi +} # # Error handling (used by scanning and interface functions) @@ -88,8 +88,29 @@ synerr() lgetc() { - c="$(dd bs=1 count=1 2>/dev/null; printf '.')" - c="${c%.}" + if [ ${lbufi} -ge ${lbufc} ]; then + c='' + else + eval "c=\${lbufv_${lbufi}}" + #echo "LGETC:$lineno: $lbufi '$c'" >&2 + lbufi=$((${lbufi} + 1)) + fi +} + +lungetc() +{ + lbufi=$((${lbufi} - 2)) + eval "c=\${lbufv_${lbufi}}" + #echo "LUNGETC:$lineno: $lbufi '$c'" >&2 + lbufi=$((${lbufi} + 1)) +} + +lsetc() +{ + lbufi=$((${lbufi} - 1)) + eval "c=\${lbufv_${lbufi}}" + #echo "LSETC:$lineno: $lbufi '$c'" >&2 + lbufi=$((${lbufi} + 1)) } # @@ -103,7 +124,7 @@ next() return fi while :; do - #dbg "parsing char '$c' at lineno $lineno" + dbg "parsing char '$c' at lineno $lineno" case "${c}" in '') lgetc @@ -137,7 +158,8 @@ next() continue ;; esac - next_word \\ + lungetc + next_word return ;; '#') @@ -181,7 +203,7 @@ next() return ;; esac - #dbg T_SEMI + dbg T_SEMI tok=T_SEMI return ;; @@ -200,7 +222,7 @@ next() return ;; *) - next_word '' + next_word return ;; esac @@ -272,7 +294,8 @@ next_here() fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" lineno=$((${lineno} + ${ln_off})) line="${line}${res}" @@ -353,16 +376,15 @@ next_io() next_word() { - local prev_c="${1}" - shift 1 local res= - if ! res="$(scan_word false "${prev_c}")"; then + if ! res="$(scan_word false)"; then exit 1 fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" # We must advance lineno because scan_word() was run in a subshell. @@ -391,8 +413,7 @@ next_word() scan_word() { local in_param="${1}" - local prev_c="${2}" - shift 2 + shift 1 local lines= local word= local quoted= @@ -403,17 +424,8 @@ scan_word() word='' quoted=false - # Sort of a localized ungetc(). - case "${prev_c}" in - '') ;; - *) - tmp_c="${c}" - c="${prev_c}" - ;; - esac - while :; do - #dbg "parsing word char '$c' at lineno $lineno" + dbg "parsing word char '$c' at lineno $lineno" case "${c}" in '') break @@ -443,16 +455,14 @@ scan_word() fi ;; esac - case "${prev_c}" in - '') lgetc;; - *) c="${tmp_c}"; prev_c='';; - esac - if ! res=$(scan_wordexp); then + lgetc + if ! res="$(scan_wordexp)"; then exit 1 fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" # We must advance lineno because scan_wordexp() # was run in a subshell. @@ -470,13 +480,10 @@ scan_word() break ;; \\) - #dbg 'first backslash in word' + dbg 'first backslash in word' word="${word}${c}" - case "${prev_c}" in - '') lgetc;; - *) c="${tmp_c}"; prev_c='';; - esac - #dbg "next char: '$c'" + lgetc + dbg "next char: '$c'" case "${c}" in '') # Bash, ksh93, mksh, and zsh ignore a # backslash at the end of a file, but @@ -494,17 +501,11 @@ scan_word() \') word="${word}${c}" if ${quoted}; then - case "${prev_c}" in - '') lgetc;; - *) c="${tmp_c}"; prev_c='';; - esac + lgetc continue fi while :; do - case "${prev_c}" in - '') lgetc;; - *) c="${tmp_c}"; prev_c='';; - esac + lgetc word="${word}${c}" case "${c}" in '') @@ -540,17 +541,14 @@ scan_word() word="${word}${c}" ;; esac - case "${prev_c}" in - '') lgetc;; - *) c="${tmp_c}"; prev_c='';; - esac + lgetc done if ${quoted}; then synerr 'Unterminated quoted string' fi - printf "%d${RS}%c${RS}%s" ${lines} "${c}" "${word}" + printf "%d${RS}%d${RS}%s" ${lines} ${lbufi} "${word}" } scan_wordexp() @@ -576,10 +574,11 @@ scan_wordexp() # Command substitution if ! res="$(run_sublexer "sub${fname}" \ ${lineno} "${start}" \ - "${c}")"; then + ${lbufi})"; then exit 1 fi - c="${res##*${RS}}" + lbufi="${res##*${RS}}" + lsetc res="${res%${RS}*}" ln_off=${res##*${RS}} res="${res%${RS}*}" @@ -597,14 +596,15 @@ scan_wordexp() fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" lineno=$((${lineno} + ${ln_off})) wordexp="\$${res}" ;; esac - printf "%d${RS}%c${RS}%s" ${ln_off} "${c}" "${wordexp}" + printf "%d${RS}%d${RS}%s" ${ln_off} ${lbufi} "${wordexp}" return 0 } @@ -628,7 +628,8 @@ scan_wordexp_param_brace() fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" param="#${res}" lineno=$((${lineno} + ${ln_off})) @@ -647,7 +648,8 @@ scan_wordexp_param_brace() fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" param="${res}" lineno=$((${lineno} + ${ln_off})) @@ -701,18 +703,19 @@ scan_wordexp_param_brace() # If a modification was found if ${mod}; then # Get word. - if ! res="$(scan_word true '')"; then + if ! res="$(scan_word true)"; then exit 1 fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" # We must advance lineno because scan_word() was run in a # subshell. lineno=$((${lineno} + ${ln_off})) wordexp="${wordexp}${res}" - #dbg "param mod word: '$res'" + dbg "param mod word: '$res'" fi # Check for right brace. @@ -771,7 +774,7 @@ scan_param() ;; esac - printf "%d${RS}%c${RS}%s" 0 "${c}" "${param}" + printf "%d${RS}%d${RS}%s" 0 ${lbufi} "${param}" return 0 } @@ -811,12 +814,13 @@ scan_wordexp_arith() ;; '$') lgetc - if ! res=$(scan_wordexp); then + if ! res="$(scan_wordexp)"; then exit 1 fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" # We must advance lineno because scan_wordexp() # was run in a subshell. @@ -837,7 +841,7 @@ run_sublexer() local fn="${1}" local ln="${2}" local st="${3}" - local ch="${4}" + local i="${4}" shift 4 # Initialize global variables. @@ -848,7 +852,8 @@ run_sublexer() here_awaiting_end=false here_awaiting_word=false - c="${ch}" + lbufi="${i}" + lsetc next #dbg=true @@ -862,7 +867,7 @@ run_sublexer() ;; esac - printf "${RS}%d${RS}%c" ${lineno} "${c}" + printf "${RS}%d${RS}%d" ${lineno} ${lbufi} return 0 } @@ -873,8 +878,9 @@ run_sublexer() run_lexer() { local fn="${1}" - local st="${2}" - shift 2 + local buf="${2}" + local st="${3}" + shift 3 # Initialize global variables. fname="${fn}" @@ -884,6 +890,20 @@ run_lexer() here_awaiting_end=false here_awaiting_word=false + # Read file into array + eval "$(printf '%s' "${buf}" | awk -v FS='' -v j=0 \ + -v squote="'" -v esc_squote="'\\\\''" ' + { + for (i = 1; i <= NF; ++i) { + sub(squote, esc_squote, $i); + printf("lbufv_%d='\''%s'\''\n", j++, $i); + }; + printf("lbufv_%d='\''\n'\''\n", j++); + } + ')" + lbufi=0 + lbufc=${#buf} + # Read the first character and recognize the first token. lgetc next @@ -905,13 +925,13 @@ accept() shift 1 local rw= - #dbg "looking for $t, current tok ${tok%%${US}*}" + dbg "looking for $t, current tok ${tok%%${US}*}" case "${t}" in T_IF|T_THEN|T_ELSE|T_ELIF|T_FI|T_DO|T_DONE|\ T_CASE|T_ESAC|T_WHILE|T_UNTIL|T_FOR|\ T_LBRACE|T_RBRACE|T_BANG|T_IN|\ T_STATIC|T_LOCAL|T_RETURN) - #dbg "looking for reserved word $t, have '$tok'" + dbg "looking for reserved word $t, have '$tok'" if ! [ "x${tok%%${US}*}" = "x${t}" ]; then # Reserved words are recognized as literal # T_WORDs. @@ -1012,7 +1032,7 @@ accept() ;; esac - #dbg "accept $t" + dbg "accept $t" printf '%s' "${tok}${RS}" next return 0 -- cgit v0.9.1