summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--parsing/lexer.sh148
-rw-r--r--parsing/parse.sh12
2 files changed, 156 insertions, 4 deletions
diff --git a/parsing/lexer.sh b/parsing/lexer.sh
index ce39c23..c88c8b6 100644
--- a/parsing/lexer.sh
+++ b/parsing/lexer.sh
@@ -4,6 +4,9 @@ ln_off=
start=
c=
wordexp=
+here_stack=
+here_awaiting_end=
+here_awaiting_word=
tok=
tokens=
@@ -67,6 +70,12 @@ lgetc()
next()
{
+ case "${here_stack}" in *"${RS}"*)
+ if ${here_awaiting_word}; then
+ next_here
+ return
+ fi
+ esac
while :; do
dbg "parsing char '$c' at lineno $lineno"
case "${c}" in
@@ -76,6 +85,15 @@ next()
return
;;
"${LF}")
+ case "${here_stack}" in *"${RS}"*)
+ if ${here_awaiting_end}; then
+ synexp ''
+ else
+ here_awaiting_end=false
+ here_awaiting_word=true
+ fi
+ ;;
+ esac
lgetc
lineno=$((${lineno} + 1))
tok=T_NEWLINE
@@ -164,6 +182,89 @@ next()
done
}
+next_here()
+{
+ local here=
+ local here_strip_tabs=
+ local here_end=
+ local here_escaped=
+ local line=
+ local word=
+ local res=
+ local wordexp=
+
+ # Pop the here-document off of the stack.
+ here="${here_stack##*${RS}}"
+ here_strip_tabs="${here%%${US}*}"
+ here_end="${here%${US}*}"
+ here_end="${here_end#*${US}}"
+ here_escaped="${here##*${US}}"
+ here_stack="${here_stack%${RS}*}"
+ here_awaiting_word=false
+
+ line=''
+ word=''
+ while :; do
+ case "${c}" in
+ '')
+ # Bash throws a warning when EOF occurs in a
+ # here document. mksh throws an error. dash,
+ # BusyBox ash, ksh93, and zsh accept EOF as a
+ # delimiter. We aim for the lowest common
+ # denominator, so throw an error like mksh does.
+ synerr 'Here-document "%s" unclosed' \
+ "${here_end}"
+ ;;
+ "${LF}")
+ line="${line}${c}"
+ word="${word}${line}"
+ case "${line}" in "${here_end}${LF}")
+ lgetc
+ tok="T_WORD${US}${word}"
+ return
+ ;;
+ esac
+ line=''
+ ;;
+ "${HT}")
+ if ${here_strip_tabs}; then
+ case "${line}" in
+ '')
+ ;;
+ *)
+ line="${line}${c}"
+ ;;
+ esac
+ else
+ line="${line}${c}"
+ fi
+ ;;
+ '$')
+ if ! ${here_escaped}; then
+ lgetc
+ if ! res="$(scan_wordexp)"; then
+ exit 1
+ fi
+ ln_off=${res%%${RS}*}
+ res="${res#*${RS}}"
+ c="${res%%${RS}*}"
+ res="${res#*${RS}}"
+ wordexp="${res%%${RS}*}"
+ lineno=$((${lineno} + ${ln_off}))
+ line="${line}${wordexp}"
+ continue
+ else
+ line="${line}${c}"
+ fi
+ ;;
+ *)
+ line="${line}${c}"
+ ;;
+ esac
+ lgetc
+ done
+}
+
next_io()
{
case "${c}" in
@@ -175,20 +276,32 @@ next_io()
case "${c}" in '-')
lgetc
tok=T_DLESSDASH
+ here_stack="${here_stack}${RS}"
+ here_stack="${here_stack}true"
+ here_awaiting_end=true
+ here_awaiting_word=false
+ break
;;
esac
tok=T_DLESS
+ here_stack="${here_stack}${RS}false"
+ here_awaiting_end=true
+ here_awaiting_word=false
+ break
;;
'&')
lgetc
tok=T_LESSAND
+ break
;;
'>')
lgetc
tok=T_LESSGREAT
+ break
;;
esac
tok=T_LESS
+ break
;;
'>')
lgetc
@@ -196,17 +309,21 @@ next_io()
'>')
lgetc
tok=T_DGREAT
+ break
;;
'&')
lgetc
tok=T_GREATAND
+ break
;;
'|')
lgetc
tok=T_CLOBBER
+ break
;;
esac
tok=T_GREAT
+ break
;;
esac
}
@@ -228,6 +345,22 @@ next_word()
# We must advance lineno because scan_word() was run in a subshell.
lineno=$((${lineno} + ${ln_off}))
tok="T_WORD${US}${word}"
+
+ case "${here_stack}" in *"${RS}"*)
+ if ${here_awaiting_end}; then
+ here_stack="${here_stack}${US}${word}"
+ case "${word}" in
+ *\\*|*'"'*|*"'"*)
+ here_stack="${here_stack}${US}true"
+ ;;
+ *)
+ here_stack="${here_stack}${US}false"
+ ;;
+ esac
+ here_awaiting_end=false
+ fi
+ ;;
+ esac
}
#
@@ -267,6 +400,15 @@ scan_word()
word="${word}${c}"
;;
'$')
+ case "${here_stack}" in *"${RS}"*)
+ if ${here_awaiting_end}; then
+ synerr '%s %s %s %s' \
+ 'Word expansions' \
+ 'not supported in' \
+ 'here-document' \
+ 'delimiters'
+ fi
+ esac
lgetc
if ! res=$(scan_wordexp); then
exit 1
@@ -737,6 +879,9 @@ run_sublexer()
fname="${fn}"
lineno=${ln}
start="${st}"
+ here_stack="${US}"
+ here_awaiting_end=false
+ here_awaiting_word=false
tokens=''
c="${ch}"
@@ -767,6 +912,9 @@ run_lexer()
fname="${fn}"
lineno=1
start="${st}"
+ here_stack="${US}"
+ here_awaiting_end=false
+ here_awaiting_word=false
tokens=''
# Read the first character and recognize the first token.
diff --git a/parsing/parse.sh b/parsing/parse.sh
index 6508142..81b8a54 100644
--- a/parsing/parse.sh
+++ b/parsing/parse.sh
@@ -649,7 +649,11 @@ try()
#try 'foo $(bar) baz'
#try 'foo$(bar$(baz))qux'
#try 'foo $((1 + 1))'
-try '$((1 + 1))'
-try '$((1 + (1 + 1)))'
-try '$((1 + $(foo) + 1))'
-try '$((1'
+#try '$((1 + 1))'
+#try '$((1 + (1 + 1)))'
+#try '$((1 + $(foo) + 1))'
+#try '$((1'
+try 'foo <<EOF' 'bar' 'EOF'
+try 'foo <<-EOF' "${HT}bar" "${HT}EOF"
+try 'foo <<EOF' '$(bar)' 'EOF'
+try 'foo <<E"O"F' '$(bar)' 'E"O"F' # BUG