From 3fabff2df0215812a042ee70b4708b63be8a4fd0 Mon Sep 17 00:00:00 2001 From: P. J. McDermott Date: Sat, 27 Feb 2016 15:40:48 -0500 Subject: [WIP] eshtrans/frontend: Split input into internal char array --- (limited to 'eshtrans') diff --git a/eshtrans/frontend/lexer.esh b/eshtrans/frontend/lexer.esh index a9aaa6d..0b628a6 100644 --- a/eshtrans/frontend/lexer.esh +++ b/eshtrans/frontend/lexer.esh @@ -88,8 +88,21 @@ synerr() lgetc() { - c="$(dd bs=1 count=1 2>/dev/null; printf '.')" - c="${c%.}" + if [ ${lbufi} -ge ${lbufc} ]; then + c='' + else + eval "c=\${lbufv_${lbufi}}" + lbufi=$((${lbufi} + 1)) + fi +} + +lsetc() +{ + if [ ${lbufi} -ge ${lbufc} ]; then + c='' + else + eval "c=\${lbufv_${lbufi}}" + fi } # @@ -272,7 +285,8 @@ next_here() fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" lineno=$((${lineno} + ${ln_off})) line="${line}${res}" @@ -362,7 +376,8 @@ next_word() fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" # We must advance lineno because scan_word() was run in a subshell. @@ -447,12 +462,13 @@ scan_word() '') lgetc;; *) c="${tmp_c}"; prev_c='';; esac - if ! res=$(scan_wordexp); then + if ! res="$(scan_wordexp)"; then exit 1 fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" # We must advance lineno because scan_wordexp() # was run in a subshell. @@ -550,7 +566,7 @@ scan_word() synerr 'Unterminated quoted string' fi - printf "%d${RS}%c${RS}%s" ${lines} "${c}" "${word}" + printf "%d${RS}%d${RS}%s" ${lines} ${lbufi} "${word}" } scan_wordexp() @@ -579,7 +595,8 @@ scan_wordexp() "${c}")"; then exit 1 fi - c="${res##*${RS}}" + lbufi="${res##*${RS}}" + lsetc res="${res%${RS}*}" ln_off=${res##*${RS}} res="${res%${RS}*}" @@ -597,14 +614,15 @@ scan_wordexp() fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" lineno=$((${lineno} + ${ln_off})) wordexp="\$${res}" ;; esac - printf "%d${RS}%c${RS}%s" ${ln_off} "${c}" "${wordexp}" + printf "%d${RS}%d${RS}%s" ${ln_off} ${lbufi} "${wordexp}" return 0 } @@ -628,7 +646,8 @@ scan_wordexp_param_brace() fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" param="#${res}" lineno=$((${lineno} + ${ln_off})) @@ -647,7 +666,8 @@ scan_wordexp_param_brace() fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" param="${res}" lineno=$((${lineno} + ${ln_off})) @@ -706,7 +726,8 @@ scan_wordexp_param_brace() fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" # We must advance lineno because scan_word() was run in a # subshell. @@ -771,7 +792,7 @@ scan_param() ;; esac - printf "%d${RS}%c${RS}%s" 0 "${c}" "${param}" + printf "%d${RS}%d${RS}%s" 0 ${lbufi} "${param}" return 0 } @@ -811,12 +832,13 @@ scan_wordexp_arith() ;; '$') lgetc - if ! res=$(scan_wordexp); then + if ! res="$(scan_wordexp)"; then exit 1 fi ln_off=${res%%${RS}*} res="${res#*${RS}}" - c="${res%%${RS}*}" + lbufi="${res%%${RS}*}" + lsetc res="${res#*${RS}}" # We must advance lineno because scan_wordexp() # was run in a subshell. @@ -862,7 +884,7 @@ run_sublexer() ;; esac - printf "${RS}%d${RS}%c" ${lineno} "${c}" + printf "${RS}%d${RS}%d" ${lineno} ${lbufi} return 0 } @@ -873,8 +895,9 @@ run_sublexer() run_lexer() { local fn="${1}" - local st="${2}" - shift 2 + local buf="${2}" + local st="${3}" + shift 3 # Initialize global variables. fname="${fn}" @@ -884,6 +907,20 @@ run_lexer() here_awaiting_end=false here_awaiting_word=false + # Read file into array + eval "$(printf '%s' "${buf}" | awk -v FS='' -v j=0 \ + -v squote="'" -v esc_squote="'\\\\''" ' + { + for (i = 1; i <= NF; ++i) { + sub(squote, esc_squote, $i); + printf("lbufv_%d='\''%s'\''\n", j++, $i); + }; + printf("lbufv_%d='\''\n'\''\n", j++); + } + ')" + lbufi=0 + lbufc=${#buf} + # Read the first character and recognize the first token. lgetc next diff --git a/eshtrans/frontend/main.esh b/eshtrans/frontend/main.esh index b9f93a6..fc25b5f 100644 --- a/eshtrans/frontend/main.esh +++ b/eshtrans/frontend/main.esh @@ -21,9 +21,10 @@ esh_parse() { local fn="${1}" - shift 1 + local buf="${2}" + shift 2 - if run_lexer "${fn}" complete_command; then + if run_lexer "${fn}" "${buf}" complete_command; then return 0 fi return 1 diff --git a/eshtrans/main.esh b/eshtrans/main.esh index 7e0cb8c..588ca1d 100644 --- a/eshtrans/main.esh +++ b/eshtrans/main.esh @@ -88,10 +88,10 @@ main() fi input="${1}" if [ "x${output}" = 'x-' ]; then - sh_codegen "$(esh_parse "${input}" <"${input}")" + sh_codegen "$(esh_parse "${input}" "$(cat "${input}")")" else - sh_codegen "$(esh_parse "${input}" <"${input}")" \ - >"${output}" + sh_codegen "$(esh_parse "${input}" \ + "$(cat "${input}")")" >"${output}" fi fi } -- cgit v0.9.1