summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorP. J. McDermott <pj@pehjota.net>2016-02-20 18:04:48 (EST)
committer P. J. McDermott <pj@pehjota.net>2016-02-20 18:04:48 (EST)
commit0a72d231a7d42396e3551f38eb63d55fea100669 (patch)
tree2e5068a74632573d2c525b91e1924cad37629464
parent897bd7b6ae7514896dae3253e854db5edc0b8009 (diff)
downloadeggshell-0a72d231a7d42396e3551f38eb63d55fea100669.zip
eggshell-0a72d231a7d42396e3551f38eb63d55fea100669.tar.gz
eggshell-0a72d231a7d42396e3551f38eb63d55fea100669.tar.bz2
Rewrite codegen
The previous version didn't handle nested or multiple command substitutions. The stack format has also been extended to support arithmetic expansions.
-rw-r--r--parsing/codegen.sh91
-rw-r--r--parsing/lexer.sh3
-rw-r--r--parsing/parse.sh2
3 files changed, 78 insertions, 18 deletions
diff --git a/parsing/codegen.sh b/parsing/codegen.sh
index c28ffca..0e32987 100644
--- a/parsing/codegen.sh
+++ b/parsing/codegen.sh
@@ -1,4 +1,10 @@
-toktext=''
+sc=
+
+sgetc()
+{
+ sc="$(dd bs=1 count=1 2>/dev/null; printf '.')"
+ sc="${sc%.}"
+}
toktext()
{
@@ -57,29 +63,80 @@ toktext()
*) n='';;
esac
- toktext="${n}"
+ printf '%s' "${n}"
}
-codegen()
+codegen_sub()
{
- local toks="${1}"
+ local array="${1}"
shift 1
- local subtoks=
- local t=
-
- case "${toks}" in
- *"${STX}"*"${ETX}"*)
- subtoks="${toks#*${STX}}"
- subtoks="${subtoks%${ETX}*}"
- toks="${toks%%${STX}*}$(codegen \
- "${subtoks}")${toks##*${ETX}}"
- ;;
- esac
IFS="${RS}"
- for t in ${toks}; do
+ for t in ${array}; do
toktext "${t}"
- printf '%s ' "${toktext}"
+ printf ' '
done
unset IFS
}
+
+# The token stack is encoded in a string in the following grammar:
+# Terminal symbols:
+# TOKEN
+# Production rules:
+# stack = tokens [ '<SOH>' type '<STX>' stack '<ETX>' [ tokens ] ] ;
+# tokens = TOKEN { '<RS>' TOKEN } ;
+# type = 'C' | 'A' ;
+# We need to recurse through this stack to get to all the tokens.
+# Each element in the stack (an array of tokens) gets run through the codegen to
+# become text that is inserted into the array below.
+parse_stack()
+{
+ local array=
+
+ array=''
+ while :; do
+ sgetc
+ case "${sc}" in
+ '')
+ # EOF
+ break
+ ;;
+ "${SOH}")
+ # New stack element
+ sgetc
+ case "${sc}" in
+ 'C')
+ # Command substitution
+ sgetc # STX
+ array="${array}$(parse_stack)"
+ ;;
+ 'A')
+ # Arithmetic expansion
+ sgetc # STX
+ ;;
+ esac
+ ;;
+ "${ETX}")
+ # End of stack element
+ break
+ ;;
+ *)
+ # Token character
+ array="${array}${sc}"
+ ;;
+ esac
+ done
+ codegen_sub "${array}"
+}
+
+codegen()
+{
+ local toks="${1}"
+ shift 1
+
+ if printf '%s' "${toks}" | parse_stack; then
+ return 0
+ else
+ return 1
+ fi
+}
diff --git a/parsing/lexer.sh b/parsing/lexer.sh
index 3f6f52f..a7fbb80 100644
--- a/parsing/lexer.sh
+++ b/parsing/lexer.sh
@@ -467,7 +467,8 @@ scan_wordexp()
res="${res#*${RS}}"
toks="${res%%${RS}*}"
lineno=${ln_off}
- wordexp="\$(${STX}${toks}${ETX})"
+ wordexp="\$(${SOH}C${STX}${toks}"
+ wordexp="${wordexp}${ETX})"
# ")" is recognized in run_sublexer().
;;
esac
diff --git a/parsing/parse.sh b/parsing/parse.sh
index 196ad84..a71f90b 100644
--- a/parsing/parse.sh
+++ b/parsing/parse.sh
@@ -1,3 +1,4 @@
+SOH="$(printf '\001.')"; SOH="${SOH%.}"
STX="$(printf '\002.')"; STX="${STX%.}"
ETX="$(printf '\003.')"; ETX="${ETX%.}"
HT="$(printf '\t.')"; HT="${HT%.}"
@@ -646,4 +647,5 @@ try 'foo $(bar)'
try 'foo $(bar); baz'
try 'foo $(bar)' 'baz'
try 'foo $(bar) baz'
+try 'foo$(bar$(baz))qux'
#try 'foo $((1 + 1))'