summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorP. J. McDermott <pj@pehjota.net>2016-02-28 01:47:19 (EST)
committer P. J. McDermott <pj@pehjota.net>2016-02-28 01:47:19 (EST)
commit58fded07a19e6100e307bba9de8e72854b57f1c4 (patch)
tree5418804d8f548cf59da65900198633fbfc16518b
parent9868dcf4746ff021b7de95c7eab7cc8baa3cd616 (diff)
parent5ee168e734cc0d91e171f22e1269825df7d3cdfa (diff)
downloadeggshell-58fded07a19e6100e307bba9de8e72854b57f1c4.zip
eggshell-58fded07a19e6100e307bba9de8e72854b57f1c4.tar.gz
eggshell-58fded07a19e6100e307bba9de8e72854b57f1c4.tar.bz2
Merge branch 'feature/no-dd-lexer'
-rw-r--r--eshtrans/frontend/lexer.esh170
-rw-r--r--eshtrans/frontend/main.esh5
-rw-r--r--eshtrans/main.esh8
3 files changed, 104 insertions, 79 deletions
diff --git a/eshtrans/frontend/lexer.esh b/eshtrans/frontend/lexer.esh
index a9aaa6d..8bba0e0 100644
--- a/eshtrans/frontend/lexer.esh
+++ b/eshtrans/frontend/lexer.esh
@@ -18,7 +18,7 @@
# along with the Eggshell Compiler. If not, see
# <http://www.gnu.org/licenses/>.
-#dbg=false
+dbg=false
fname=
lineno=
@@ -31,12 +31,12 @@ here_awaiting_end=
here_awaiting_word=
tok=
-#dbg()
-#{
-# if ${dbg}; then
-# printf 'DEBUG: %s\n' "${@}" >&2
-# fi
-#}
+dbg()
+{
+ if ${dbg}; then
+ printf 'DEBUG: %s\n' "${@}" >&2
+ fi
+}
#
# Error handling (used by scanning and interface functions)
@@ -88,8 +88,29 @@ synerr()
lgetc()
{
- c="$(dd bs=1 count=1 2>/dev/null; printf '.')"
- c="${c%.}"
+ if [ ${lbufi} -ge ${lbufc} ]; then
+ c=''
+ else
+ eval "c=\${lbufv_${lbufi}}"
+ #echo "LGETC:$lineno: $lbufi '$c'" >&2
+ lbufi=$((${lbufi} + 1))
+ fi
+}
+
+lungetc()
+{
+ lbufi=$((${lbufi} - 2))
+ eval "c=\${lbufv_${lbufi}}"
+ #echo "LUNGETC:$lineno: $lbufi '$c'" >&2
+ lbufi=$((${lbufi} + 1))
+}
+
+lsetc()
+{
+ lbufi=$((${lbufi} - 1))
+ eval "c=\${lbufv_${lbufi}}"
+ #echo "LSETC:$lineno: $lbufi '$c'" >&2
+ lbufi=$((${lbufi} + 1))
}
#
@@ -103,7 +124,7 @@ next()
return
fi
while :; do
- #dbg "parsing char '$c' at lineno $lineno"
+ dbg "parsing char '$c' at lineno $lineno"
case "${c}" in
'')
lgetc
@@ -137,7 +158,8 @@ next()
continue
;;
esac
- next_word \\
+ lungetc
+ next_word
return
;;
'#')
@@ -181,7 +203,7 @@ next()
return
;;
esac
- #dbg T_SEMI
+ dbg T_SEMI
tok=T_SEMI
return
;;
@@ -200,7 +222,7 @@ next()
return
;;
*)
- next_word ''
+ next_word
return
;;
esac
@@ -272,7 +294,8 @@ next_here()
fi
ln_off=${res%%${RS}*}
res="${res#*${RS}}"
- c="${res%%${RS}*}"
+ lbufi="${res%%${RS}*}"
+ lsetc
res="${res#*${RS}}"
lineno=$((${lineno} + ${ln_off}))
line="${line}${res}"
@@ -353,16 +376,15 @@ next_io()
next_word()
{
- local prev_c="${1}"
- shift 1
local res=
- if ! res="$(scan_word false "${prev_c}")"; then
+ if ! res="$(scan_word false)"; then
exit 1
fi
ln_off=${res%%${RS}*}
res="${res#*${RS}}"
- c="${res%%${RS}*}"
+ lbufi="${res%%${RS}*}"
+ lsetc
res="${res#*${RS}}"
# We must advance lineno because scan_word() was run in a subshell.
@@ -391,8 +413,7 @@ next_word()
scan_word()
{
local in_param="${1}"
- local prev_c="${2}"
- shift 2
+ shift 1
local lines=
local word=
local quoted=
@@ -403,17 +424,8 @@ scan_word()
word=''
quoted=false
- # Sort of a localized ungetc().
- case "${prev_c}" in
- '') ;;
- *)
- tmp_c="${c}"
- c="${prev_c}"
- ;;
- esac
-
while :; do
- #dbg "parsing word char '$c' at lineno $lineno"
+ dbg "parsing word char '$c' at lineno $lineno"
case "${c}" in
'')
break
@@ -443,16 +455,14 @@ scan_word()
fi
;;
esac
- case "${prev_c}" in
- '') lgetc;;
- *) c="${tmp_c}"; prev_c='';;
- esac
- if ! res=$(scan_wordexp); then
+ lgetc
+ if ! res="$(scan_wordexp)"; then
exit 1
fi
ln_off=${res%%${RS}*}
res="${res#*${RS}}"
- c="${res%%${RS}*}"
+ lbufi="${res%%${RS}*}"
+ lsetc
res="${res#*${RS}}"
# We must advance lineno because scan_wordexp()
# was run in a subshell.
@@ -470,13 +480,10 @@ scan_word()
break
;;
\\)
- #dbg 'first backslash in word'
+ dbg 'first backslash in word'
word="${word}${c}"
- case "${prev_c}" in
- '') lgetc;;
- *) c="${tmp_c}"; prev_c='';;
- esac
- #dbg "next char: '$c'"
+ lgetc
+ dbg "next char: '$c'"
case "${c}" in '')
# Bash, ksh93, mksh, and zsh ignore a
# backslash at the end of a file, but
@@ -494,17 +501,11 @@ scan_word()
\')
word="${word}${c}"
if ${quoted}; then
- case "${prev_c}" in
- '') lgetc;;
- *) c="${tmp_c}"; prev_c='';;
- esac
+ lgetc
continue
fi
while :; do
- case "${prev_c}" in
- '') lgetc;;
- *) c="${tmp_c}"; prev_c='';;
- esac
+ lgetc
word="${word}${c}"
case "${c}" in
'')
@@ -540,17 +541,14 @@ scan_word()
word="${word}${c}"
;;
esac
- case "${prev_c}" in
- '') lgetc;;
- *) c="${tmp_c}"; prev_c='';;
- esac
+ lgetc
done
if ${quoted}; then
synerr 'Unterminated quoted string'
fi
- printf "%d${RS}%c${RS}%s" ${lines} "${c}" "${word}"
+ printf "%d${RS}%d${RS}%s" ${lines} ${lbufi} "${word}"
}
scan_wordexp()
@@ -576,10 +574,11 @@ scan_wordexp()
# Command substitution
if ! res="$(run_sublexer "sub${fname}" \
${lineno} "${start}" \
- "${c}")"; then
+ ${lbufi})"; then
exit 1
fi
- c="${res##*${RS}}"
+ lbufi="${res##*${RS}}"
+ lsetc
res="${res%${RS}*}"
ln_off=${res##*${RS}}
res="${res%${RS}*}"
@@ -597,14 +596,15 @@ scan_wordexp()
fi
ln_off=${res%%${RS}*}
res="${res#*${RS}}"
- c="${res%%${RS}*}"
+ lbufi="${res%%${RS}*}"
+ lsetc
res="${res#*${RS}}"
lineno=$((${lineno} + ${ln_off}))
wordexp="\$${res}"
;;
esac
- printf "%d${RS}%c${RS}%s" ${ln_off} "${c}" "${wordexp}"
+ printf "%d${RS}%d${RS}%s" ${ln_off} ${lbufi} "${wordexp}"
return 0
}
@@ -628,7 +628,8 @@ scan_wordexp_param_brace()
fi
ln_off=${res%%${RS}*}
res="${res#*${RS}}"
- c="${res%%${RS}*}"
+ lbufi="${res%%${RS}*}"
+ lsetc
res="${res#*${RS}}"
param="#${res}"
lineno=$((${lineno} + ${ln_off}))
@@ -647,7 +648,8 @@ scan_wordexp_param_brace()
fi
ln_off=${res%%${RS}*}
res="${res#*${RS}}"
- c="${res%%${RS}*}"
+ lbufi="${res%%${RS}*}"
+ lsetc
res="${res#*${RS}}"
param="${res}"
lineno=$((${lineno} + ${ln_off}))
@@ -701,18 +703,19 @@ scan_wordexp_param_brace()
# If a modification was found
if ${mod}; then
# Get word.
- if ! res="$(scan_word true '')"; then
+ if ! res="$(scan_word true)"; then
exit 1
fi
ln_off=${res%%${RS}*}
res="${res#*${RS}}"
- c="${res%%${RS}*}"
+ lbufi="${res%%${RS}*}"
+ lsetc
res="${res#*${RS}}"
# We must advance lineno because scan_word() was run in a
# subshell.
lineno=$((${lineno} + ${ln_off}))
wordexp="${wordexp}${res}"
- #dbg "param mod word: '$res'"
+ dbg "param mod word: '$res'"
fi
# Check for right brace.
@@ -771,7 +774,7 @@ scan_param()
;;
esac
- printf "%d${RS}%c${RS}%s" 0 "${c}" "${param}"
+ printf "%d${RS}%d${RS}%s" 0 ${lbufi} "${param}"
return 0
}
@@ -811,12 +814,13 @@ scan_wordexp_arith()
;;
'$')
lgetc
- if ! res=$(scan_wordexp); then
+ if ! res="$(scan_wordexp)"; then
exit 1
fi
ln_off=${res%%${RS}*}
res="${res#*${RS}}"
- c="${res%%${RS}*}"
+ lbufi="${res%%${RS}*}"
+ lsetc
res="${res#*${RS}}"
# We must advance lineno because scan_wordexp()
# was run in a subshell.
@@ -837,7 +841,7 @@ run_sublexer()
local fn="${1}"
local ln="${2}"
local st="${3}"
- local ch="${4}"
+ local i="${4}"
shift 4
# Initialize global variables.
@@ -848,7 +852,8 @@ run_sublexer()
here_awaiting_end=false
here_awaiting_word=false
- c="${ch}"
+ lbufi="${i}"
+ lsetc
next
#dbg=true
@@ -862,7 +867,7 @@ run_sublexer()
;;
esac
- printf "${RS}%d${RS}%c" ${lineno} "${c}"
+ printf "${RS}%d${RS}%d" ${lineno} ${lbufi}
return 0
}
@@ -873,8 +878,9 @@ run_sublexer()
run_lexer()
{
local fn="${1}"
- local st="${2}"
- shift 2
+ local buf="${2}"
+ local st="${3}"
+ shift 3
# Initialize global variables.
fname="${fn}"
@@ -884,6 +890,20 @@ run_lexer()
here_awaiting_end=false
here_awaiting_word=false
+ # Read file into array
+ eval "$(printf '%s' "${buf}" | awk -v FS='' -v j=0 \
+ -v squote="'" -v esc_squote="'\\\\''" '
+ {
+ for (i = 1; i <= NF; ++i) {
+ sub(squote, esc_squote, $i);
+ printf("lbufv_%d='\''%s'\''\n", j++, $i);
+ };
+ printf("lbufv_%d='\''\n'\''\n", j++);
+ }
+ ')"
+ lbufi=0
+ lbufc=${#buf}
+
# Read the first character and recognize the first token.
lgetc
next
@@ -905,13 +925,13 @@ accept()
shift 1
local rw=
- #dbg "looking for $t, current tok ${tok%%${US}*}"
+ dbg "looking for $t, current tok ${tok%%${US}*}"
case "${t}" in
T_IF|T_THEN|T_ELSE|T_ELIF|T_FI|T_DO|T_DONE|\
T_CASE|T_ESAC|T_WHILE|T_UNTIL|T_FOR|\
T_LBRACE|T_RBRACE|T_BANG|T_IN|\
T_STATIC|T_LOCAL|T_RETURN)
- #dbg "looking for reserved word $t, have '$tok'"
+ dbg "looking for reserved word $t, have '$tok'"
if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
# Reserved words are recognized as literal
# T_WORDs.
@@ -1012,7 +1032,7 @@ accept()
;;
esac
- #dbg "accept $t"
+ dbg "accept $t"
printf '%s' "${tok}${RS}"
next
return 0
diff --git a/eshtrans/frontend/main.esh b/eshtrans/frontend/main.esh
index b9f93a6..fc25b5f 100644
--- a/eshtrans/frontend/main.esh
+++ b/eshtrans/frontend/main.esh
@@ -21,9 +21,10 @@
esh_parse()
{
local fn="${1}"
- shift 1
+ local buf="${2}"
+ shift 2
- if run_lexer "${fn}" complete_command; then
+ if run_lexer "${fn}" "${buf}" complete_command; then
return 0
fi
return 1
diff --git a/eshtrans/main.esh b/eshtrans/main.esh
index 7e0cb8c..ac3895e 100644
--- a/eshtrans/main.esh
+++ b/eshtrans/main.esh
@@ -88,9 +88,13 @@ main()
fi
input="${1}"
if [ "x${output}" = 'x-' ]; then
- sh_codegen "$(esh_parse "${input}" <"${input}")"
+ contents="$(cat "${input}"; printf '.')"
+ contents="${contents%.}"
+ sh_codegen "$(esh_parse "${input}" "${contents}")"
else
- sh_codegen "$(esh_parse "${input}" <"${input}")" \
+ contents="$(cat "${input}"; printf '.')"
+ contents="${contents%.}"
+ sh_codegen "$(esh_parse "${input}" "${contents}")" \
>"${output}"
fi
fi