summaryrefslogtreecommitdiffstats
path: root/parsing
diff options
context:
space:
mode:
Diffstat (limited to 'parsing')
-rw-r--r--parsing/lexer.sh108
1 files changed, 64 insertions, 44 deletions
diff --git a/parsing/lexer.sh b/parsing/lexer.sh
index a3ab762..292da43 100644
--- a/parsing/lexer.sh
+++ b/parsing/lexer.sh
@@ -5,12 +5,64 @@ c=
tok=
tokens=
+#
+# Error handling (used by scanning and interface functions)
+#
+
+error()
+{
+ local fmt="${1}"
+ shift 1
+
+ case "${fname}" in
+ '-')
+ printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2
+ ;;
+ *)
+ printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2
+ ;;
+ esac
+
+ # The parser and lexer run in a subshell, so this just returns up to the
+ # caller like an exception.
+ exit 1
+}
+
+synexp()
+{
+ local t="${1}"
+ shift 1
+
+ if [ "x${t}" = 'x' ]; then
+ synerr '%s unexpected' "$(tokname "${tok}")"
+ else
+ synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \
+ "$(tokname "${t}")"
+ fi
+}
+
+synerr()
+{
+ local fmt="${1}"
+ shift 1
+
+ error "Syntax error: ${fmt}" "${@}"
+}
+
+#
+# Input reading
+#
+
pgetc()
{
c="$(dd bs=1 count=1 2>/dev/null; printf '.')"
c="${c%.}"
}
+#
+# Token recognition
+#
+
next()
{
while :; do
@@ -177,6 +229,10 @@ next_word()
tok="T_WORD${US}${word}"
}
+#
+# Token scanning
+#
+
scan_word()
{
local in_param="${1}"
@@ -266,7 +322,7 @@ scan_word()
pgetc
done
- printf "%d${RS}%s${RS}%s" ${lines} "${c}" "${word}"
+ printf "%d${RS}%c${RS}%s" ${lines} "${c}" "${word}"
}
scan_wordexp()
@@ -428,7 +484,7 @@ scan_wordexp()
;;
esac
- printf "%d${RS}%s${RS}%s" ${ln_off} "${c}" "${wordexp}"
+ printf "%d${RS}%c${RS}%s" ${ln_off} "${c}" "${wordexp}"
return 0
}
@@ -474,10 +530,14 @@ scan_param()
;;
esac
- printf "%d${RS}%s${RS}%s" 0 "${c}" "${param}"
+ printf "%d${RS}%c${RS}%s" 0 "${c}" "${param}"
return 0
}
+#
+# Interface
+#
+
# Check the current token. If it matches, add it to the syntax array.
accept()
{
@@ -586,29 +646,10 @@ expect()
return 0
else
synexp "${t}"
- return 1
fi
}
-error()
-{
- local fmt="${1}"
- shift 1
-
- case "${fname}" in
- '-')
- printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2
- ;;
- *)
- printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2
- ;;
- esac
-
- # The parser runs in a subshell, so this just returns up to the caller
- # like an exception.
- exit 1
-}
-
+# Called by the lexer, not the parser
run_sublexer()
{
local fn="${1}"
@@ -668,24 +709,3 @@ run_lexer()
return 0
}
-
-synexp()
-{
- local t="${1}"
- shift 1
-
- if [ "x${t}" = 'x' ]; then
- synerr '%s unexpected' "$(tokname "${tok}")"
- else
- synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \
- "$(tokname "${t}")"
- fi
-}
-
-synerr()
-{
- local fmt="${1}"
- shift 1
-
- error "Syntax error: ${fmt}" "${@}"
-}