summaryrefslogtreecommitdiffstats
path: root/parsing
diff options
context:
space:
mode:
authorP. J. McDermott <pj@pehjota.net>2016-02-19 17:36:43 (EST)
committer P. J. McDermott <pj@pehjota.net>2016-02-19 17:41:15 (EST)
commitb77d7967e9cf00002c6bc9fd1bbcf891a197115e (patch)
tree996ebdb5f0cd2a1e30b25fd9bf0dad6a9a28acd1 /parsing
parentf6e55a026abf33867141896b1e227e791942c2a3 (diff)
downloadeggshell-b77d7967e9cf00002c6bc9fd1bbcf891a197115e.zip
eggshell-b77d7967e9cf00002c6bc9fd1bbcf891a197115e.tar.gz
eggshell-b77d7967e9cf00002c6bc9fd1bbcf891a197115e.tar.bz2
Add error handling to lexer
Diffstat (limited to 'parsing')
-rw-r--r--parsing/lexer.sh153
-rw-r--r--parsing/parse.sh63
2 files changed, 152 insertions, 64 deletions
diff --git a/parsing/lexer.sh b/parsing/lexer.sh
index 3081a79..1f40138 100644
--- a/parsing/lexer.sh
+++ b/parsing/lexer.sh
@@ -13,7 +13,7 @@ pgetc()
next()
{
while :; do
- echo "parsing char '$c'" >&2
+ dbg "parsing char '$c' at lineno $lineno"
case "${c}" in
'')
pgetc
@@ -143,7 +143,9 @@ next_word()
local lineno_offset=
local word=
- res="$(scan_word false)"
+ if ! res="$(scan_word false)"; then
+ exit 1
+ fi
lineno_offset=${res%%${RS}*}
res="${res#*${RS}}"
c="${res%%${RS}*}"
@@ -169,7 +171,7 @@ scan_word()
quoted=false
lines=0
while :; do
- echo "parsing word char '$c'" >&2
+ dbg "parsing word char '$c' at lineno $lineno"
case "${c}" in
'')
break
@@ -190,7 +192,9 @@ scan_word()
;;
'$')
pgetc
- res=$(scan_wordexp)
+ if ! res=$(scan_wordexp); then
+ exit 1
+ fi
lineno_offset=${res%%${RS}*}
res="${res#*${RS}}"
c="${res%%${RS}*}"
@@ -205,6 +209,11 @@ scan_word()
# below.
continue
;;
+ '`')
+ synerr 'Backquoted (old-style) %s' \
+ 'command substitution not supported'
+ break
+ ;;
\')
word="${word}${c}"
while :; do
@@ -243,16 +252,18 @@ scan_word()
scan_wordexp()
{
local wordexp=
- local lineno_offset=
+ local ln_off=
local mod=
local res=
+ local param=
local word=
wordexp=''
- lineno_offset=0
+ ln_off=0
case "${c}" in
'{')
# Parameter expansion brace
+ mod=true
pgetc
case "${c}" in
'#')
@@ -261,7 +272,16 @@ scan_wordexp()
[@*#?$!A-Za-z0-9_-])
# String length
# expansion
- next_param
+ res="$(scan_param)"
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ param="${res%%${RS}*}"
+ lineno=$((${lineno} + \
+ ${ln_off}))
+ # Disable modifications.
+ mod=false
;;
*)
# Special parameter "#"
@@ -270,62 +290,72 @@ scan_wordexp()
esac
;;
*)
- next_param
+ if ! res="$(scan_param)"; then
+ exit 1
+ fi
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ param="${res%%${RS}*}"
+ lineno=$((${lineno} + ${ln_off}))
;;
esac
wordexp="\${${param}"
- # Check for modifications
- mod=false
- case "${c}" in
- ':')
- mod=true
- wordexp="${wordexp}${c}"
- pgetc
- case "${c}" in '-'|'='|'?'|'+')
+ if ${mod}; then
+ # Check for modifications
+ mod=false
+ case "${c}" in
+ ':')
+ mod=true
wordexp="${wordexp}${c}"
pgetc
+ case "${c}" in '-'|'='|'?'|'+')
+ wordexp="${wordexp}${c}"
+ pgetc
;;
- esac
- ;;
- '-'|'='|'?'|'+')
- mod=true
- wordexp="${wordexp}${c}"
- pgetc
- ;;
- '%')
- mod=true
- wordexp="${wordexp}${c}"
- pgetc
- case "${c}" in '%')
+ esac
+ ;;
+ '-'|'='|'?'|'+')
+ mod=true
wordexp="${wordexp}${c}"
pgetc
;;
- esac
- ;;
- '#')
- mod=true
- wordexp="${wordexp}${c}"
- pgetc
- case "${c}" in '#')
+ '%')
+ mod=true
wordexp="${wordexp}${c}"
pgetc
+ case "${c}" in '%')
+ wordexp="${wordexp}${c}"
+ pgetc
+ ;;
+ esac
;;
- esac
- ;;
- esac
+ '#')
+ mod=true
+ wordexp="${wordexp}${c}"
+ pgetc
+ case "${c}" in '#')
+ wordexp="${wordexp}${c}"
+ pgetc
+ ;;
+ esac
+ ;;
+ esac
+ fi
if ${mod}; then
# Get word.
res="$(scan_word true)"
- lineno_offset=${res%%${RS}*}
+ ln_off=${res%%${RS}*}
res="${res#*${RS}}"
c="${res%%${RS}*}"
res="${res#*${RS}}"
word="${res%%${RS}*}"
# We must advance lineno because scan_word() was
# run in a subshell.
- lineno=$((${lineno} + ${lineno_offset}))
+ lineno=$((${lineno} + ${ln_off}))
wordexp="${wordexp}${word}"
- echo "param mod word: '$word'" >&2
+ dbg "param mod word: '$word'"
fi
# Check for right brace.
case "${c}" in
@@ -339,19 +369,28 @@ scan_wordexp()
esac
;;
'(')
+ # Arithmetic expansion or command substitution
;;
[@*#?$!A-Za-z0-9_-])
- next_param
+ res="$(scan_param)"
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ param="${res%%${RS}*}"
+ lineno=$((${lineno} + ${ln_off}))
wordexp="\$${param}"
;;
esac
- printf "%d${RS}%s${RS}%s" ${lineno_offset} "${c}" "${wordexp}"
-
+ printf "%d${RS}%s${RS}%s" ${ln_off} "${c}" "${wordexp}"
+ return 0
}
-next_param()
+scan_param()
{
+ local param=
+
param=''
case "${c}" in
[@*#?$!0-])
@@ -385,7 +424,13 @@ next_param()
pgetc
done
;;
+ *)
+ synerr 'Bad parameter name'
+ ;;
esac
+
+ printf "%d${RS}%s${RS}%s" 0 "${c}" "${param}"
+ return 0
}
# Check the current token. If it matches, add it to the syntax array.
@@ -394,7 +439,7 @@ accept()
local t="${1}"
if [ "x${tok%%${US}*}" = "x${t}" ]; then
- echo "accept $t" >&2
+ dbg "accept $t"
tokens="${tokens}${tok}${RS}"
next
return 0
@@ -402,6 +447,18 @@ accept()
return 1
}
+expect()
+{
+ local t="${1}"
+
+ if accept "${t}"; then
+ return 0
+ else
+ synexp "${t}"
+ return 1
+ fi
+}
+
error()
{
local fmt="${1}"
@@ -415,6 +472,10 @@ error()
printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2
;;
esac
+
+ # The parser runs in a subshell, so this just returns up to the caller
+ # like an exception.
+ exit 1
}
init_lexer()
diff --git a/parsing/parse.sh b/parsing/parse.sh
index 2ac4215..d85a42e 100644
--- a/parsing/parse.sh
+++ b/parsing/parse.sh
@@ -3,6 +3,15 @@ LF="$(printf '\n.')"; LF="${LF%.}"
RS="$(printf '\036.')"; RS="${RS%.}"
US="$(printf '\037.')"; US="${US%.}"
+dbg()
+{
+ dbg=true
+ dbg=false
+ if ${dbg}; then
+ printf 'DEBUG: %s\n' "${@}" >&2
+ fi
+}
+
. ./tokens.sh
. ./lexer.sh
@@ -109,25 +118,43 @@ parse()
shift 1
init_lexer "${fn}"
+
+ # If this returns (does not exit), there are no errors.
while complete_command; do :; done
- if :; then # TODO: Test for EOF or errors
- get_tokens
- return 0
+
+ get_tokens
+
+ return 0
+}
+
+try()
+{
+ local tokens=
+ local t=
+
+ printf 'Trying script:\n'
+ printf '\t%s\n' "${@}"
+ if tokens="$(printf '%s\n' "${@}" | parse -)"; then
+ IFS="${RS}"
+ for t in ${tokens}; do
+ printf 'Token: %s\n' "$(tokname "${t}")"
+ case "${t%${US}*}" in T_WORD)
+ printf ' "%s"\n' "${t#T_WORD${US}}"
+ ;;
+ esac
+ done
+ unset IFS
+ else
+ printf 'FAIL\n'
fi
- return 1
+ printf '\n\n'
}
-if tokens="$(printf '%s\n' '"foo bar" && $baz || qux' '${quux%uux } quuux' | \
- parse -)"; then
- IFS="${RS}"
- for t in ${tokens}; do
- printf 'Token: %s\n' "$(tokname "${t}")"
- case "${t%${US}*}" in T_WORD)
- printf ' "%s"\n' "${t#T_WORD${US}}"
- ;;
- esac
- done
- unset IFS
-else
- echo FAIL
-fi
+try '"foo bar" && $baz || qux' '${quux%uux quuux'
+try '"foo bar" && $baz || qux' '${quux%uux } quuux'
+try 'foo ${bar}'
+try 'foo ${#bar}'
+try 'foo ${bar#baz}'
+try 'foo ${#bar#}'
+try 'foo ${^}'
+try 'foo `bar`'