From 48996f2bfc32321dbd6c22d8fcdd0bb385f611cc Mon Sep 17 00:00:00 2001
From: P. J. McDermott <pj@pehjota.net>
Date: Fri, 19 Feb 2016 22:56:37 -0500
Subject: Improve reserved word recognition

And fix term() to allow do_group() to find T_DONE.

Also, ERMAHGERD DEBERG.
---
diff --git a/parsing/lexer.sh b/parsing/lexer.sh
index c939d1c..01bc721 100644
--- a/parsing/lexer.sh
+++ b/parsing/lexer.sh
@@ -82,6 +82,7 @@ next()
 					return
 					;;
 				esac
+				dbg T_SEMI
 				tok=T_SEMI
 				return
 				;;
@@ -457,23 +458,28 @@ accept()
 	local t="${1}"
 	local rw=
 
+	dbg "looking for $t, current tok ${tok%%${US}*}"
 	case "${t}" in
 		T_IF|T_THEN|T_ELSE|T_ELIF|T_FI|\
 		T_DO|T_DONE|T_CASE|T_ESAC|T_WHILE|T_UNTIL|\
 		T_FOR|T_LBRACE|T_RBRACE|T_BANG|T_IN)
 			dbg "looking for reserved word $t, have '$tok'"
-			# Reserved words are recognized as literal T_WORDs.
-			if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
-				return 1
-			fi
-			# T_WORD data unit must match reserved word exactly.
-			if ! [ "x${tok#T_WORD${US}}" = "x$(tokname "${t}")" ]
-			then
-				return 1
+			if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
+				# Reserved words are recognized as literal
+				# T_WORDs.
+				if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
+					return 1
+				fi
+				# T_WORD data unit must match reserved word
+				# exactly.
+				if ! [ "x${tok#T_WORD${US}}" = \
+						"x$(tokname "${t}")" ]; then
+					return 1
+				fi
+				# If the token matches the reserved word,
+				# replace it with the reserved word token.
+				tok="${t}"
 			fi
-			# If the token matches the reserved word, replace it
-			# with the reserved word token.
-			tok="${t}"
 			;;
 		T_NAME)
 			# Names are recognized as literal T_WORDs.
@@ -516,6 +522,21 @@ accept()
 			done
 			tok="T_FNAME${US}${tok#T_WORD${US}}"
 			;;
+		T_WORD)
+			if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
+				return 1
+			fi
+			# Verify that the word doesn't match any reserved words.
+			for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \
+					T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \
+					T_IN; do
+				if [ "x${tok#T_WORD${US}}" = \
+						"x$(tokname "${rw}")" ]; then
+					tok="${rw}"
+					return 1
+				fi
+			done
+			;;
 		*)
 			if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
 				return 1
diff --git a/parsing/parse.sh b/parsing/parse.sh
index b39d84e..ff6b5ad 100644
--- a/parsing/parse.sh
+++ b/parsing/parse.sh
@@ -26,6 +26,7 @@ complete_command()
 
 list()
 {
+	dbg 'list()'
 	if and_or; then
 		while separator_op; do
 			if ! and_or; then
@@ -38,6 +39,7 @@ list()
 }
 and_or()
 {
+	dbg 'and_or()'
 	if pipeline; then
 		while accept T_AND_IF || accept T_OR_IF; do
 			if ! linebreak || ! pipeline; then
@@ -50,6 +52,7 @@ and_or()
 }
 pipeline()
 {
+	dbg 'pipeline()'
 	accept T_BANG
 	if pipe_sequence; then
 		return 0
@@ -59,6 +62,7 @@ pipeline()
 
 pipe_sequence()
 {
+	dbg 'pipe_sequence()'
 	if command; then
 		while accept T_PIPE; do
 			if ! linebreak || ! command; then
@@ -72,6 +76,7 @@ pipe_sequence()
 
 command()
 {
+	dbg 'command()'
 	if compound_command; then
 		redirect_list
 		return 0
@@ -85,6 +90,7 @@ command()
 
 compound_command()
 {
+	dbg 'compound_command()'
 	if brace_group; then
 		return 0
 	elif subshell; then
@@ -105,6 +111,7 @@ compound_command()
 
 subshell()
 {
+	dbg 'subshell()'
 	if accept T_LPAREN && compound_list && expect T_RPAREN; then
 		return 0
 	fi
@@ -113,8 +120,10 @@ subshell()
 
 compound_list()
 {
+	dbg 'compound_list()'
 	newline_list
 	if term; then
+		dbg FOUND TERM
 		separator
 		return 0
 	fi
@@ -123,11 +132,10 @@ compound_list()
 
 term()
 {
+	dbg 'term()'
 	if and_or; then
 		while separator; do
-			if ! and_or; then
-				return 1
-			fi
+			and_or
 		done
 		return 0
 	fi
@@ -155,6 +163,7 @@ for_clause()
 
 wordlist()
 {
+	dbg 'wordlist()'
 	if accept T_WORD; then
 		while accept T_WORD; do :; done
 		return 0
@@ -236,6 +245,7 @@ function_body()
 
 brace_group()
 {
+	dbg 'brace_group()'
 	if accept T_LBRACE && compound_list && expect T_RBRACE; then
 		return 0
 	fi
@@ -244,6 +254,7 @@ brace_group()
 
 do_group()
 {
+	dbg 'do_group()'
 	if accept T_DO && compound_list && expect T_DONE; then
 		return 0
 	fi
@@ -252,6 +263,7 @@ do_group()
 
 simple_command()
 {
+	dbg 'simple_command()'
 	if cmd_prefix; then
 		if cmd_word; then
 			cmd_suffix
@@ -266,6 +278,7 @@ simple_command()
 
 cmd_name()
 {
+	dbg 'cmd_name()'
 	# TODO: Assignment
 	if accept T_WORD; then
 		return 0
@@ -275,6 +288,7 @@ cmd_name()
 
 cmd_word()
 {
+	dbg 'cmd_word()'
 	# TODO: Assignment
 	if accept T_WORD; then
 		return 0
@@ -367,6 +381,7 @@ separator()
 
 sequential_sep()
 {
+	dbg 'sequential_sep()'
 	if accept T_SEMI; then
 		if linebreak; then
 			return 0
--
cgit v0.9.1