From 3fabff2df0215812a042ee70b4708b63be8a4fd0 Mon Sep 17 00:00:00 2001
From: P. J. McDermott <pj@pehjota.net>
Date: Sat, 27 Feb 2016 15:40:48 -0500
Subject: [WIP] eshtrans/frontend: Split input into internal char array

---
(limited to 'eshtrans')

diff --git a/eshtrans/frontend/lexer.esh b/eshtrans/frontend/lexer.esh
index a9aaa6d..0b628a6 100644
--- a/eshtrans/frontend/lexer.esh
+++ b/eshtrans/frontend/lexer.esh
@@ -88,8 +88,21 @@ synerr()
 
 lgetc()
 {
-	c="$(dd bs=1 count=1 2>/dev/null; printf '.')"
-	c="${c%.}"
+	if [ ${lbufi} -ge ${lbufc} ]; then
+		c=''
+	else
+		eval "c=\${lbufv_${lbufi}}"
+		lbufi=$((${lbufi} + 1))
+	fi
+}
+
+lsetc()
+{
+	if [ ${lbufi} -ge ${lbufc} ]; then
+		c=''
+	else
+		eval "c=\${lbufv_${lbufi}}"
+	fi
 }
 
 #
@@ -272,7 +285,8 @@ next_here()
 					fi
 					ln_off=${res%%${RS}*}
 					res="${res#*${RS}}"
-					c="${res%%${RS}*}"
+					lbufi="${res%%${RS}*}"
+					lsetc
 					res="${res#*${RS}}"
 					lineno=$((${lineno} + ${ln_off}))
 					line="${line}${res}"
@@ -362,7 +376,8 @@ next_word()
 	fi
 	ln_off=${res%%${RS}*}
 	res="${res#*${RS}}"
-	c="${res%%${RS}*}"
+	lbufi="${res%%${RS}*}"
+	lsetc
 	res="${res#*${RS}}"
 
 	# We must advance lineno because scan_word() was run in a subshell.
@@ -447,12 +462,13 @@ scan_word()
 					'') lgetc;;
 					*) c="${tmp_c}"; prev_c='';;
 				esac
-				if ! res=$(scan_wordexp); then
+				if ! res="$(scan_wordexp)"; then
 					exit 1
 				fi
 				ln_off=${res%%${RS}*}
 				res="${res#*${RS}}"
-				c="${res%%${RS}*}"
+				lbufi="${res%%${RS}*}"
+				lsetc
 				res="${res#*${RS}}"
 				# We must advance lineno because scan_wordexp()
 				# was run in a subshell.
@@ -550,7 +566,7 @@ scan_word()
 		synerr 'Unterminated quoted string'
 	fi
 
-	printf "%d${RS}%c${RS}%s" ${lines} "${c}" "${word}"
+	printf "%d${RS}%d${RS}%s" ${lines} ${lbufi} "${word}"
 }
 
 scan_wordexp()
@@ -579,7 +595,8 @@ scan_wordexp()
 							"${c}")"; then
 						exit 1
 					fi
-					c="${res##*${RS}}"
+					lbufi="${res##*${RS}}"
+					lsetc
 					res="${res%${RS}*}"
 					ln_off=${res##*${RS}}
 					res="${res%${RS}*}"
@@ -597,14 +614,15 @@ scan_wordexp()
 			fi
 			ln_off=${res%%${RS}*}
 			res="${res#*${RS}}"
-			c="${res%%${RS}*}"
+			lbufi="${res%%${RS}*}"
+			lsetc
 			res="${res#*${RS}}"
 			lineno=$((${lineno} + ${ln_off}))
 			wordexp="\$${res}"
 			;;
 	esac
 
-	printf "%d${RS}%c${RS}%s" ${ln_off} "${c}" "${wordexp}"
+	printf "%d${RS}%d${RS}%s" ${ln_off} ${lbufi} "${wordexp}"
 	return 0
 }
 
@@ -628,7 +646,8 @@ scan_wordexp_param_brace()
 					fi
 					ln_off=${res%%${RS}*}
 					res="${res#*${RS}}"
-					c="${res%%${RS}*}"
+					lbufi="${res%%${RS}*}"
+					lsetc
 					res="${res#*${RS}}"
 					param="#${res}"
 					lineno=$((${lineno} + ${ln_off}))
@@ -647,7 +666,8 @@ scan_wordexp_param_brace()
 			fi
 			ln_off=${res%%${RS}*}
 			res="${res#*${RS}}"
-			c="${res%%${RS}*}"
+			lbufi="${res%%${RS}*}"
+			lsetc
 			res="${res#*${RS}}"
 			param="${res}"
 			lineno=$((${lineno} + ${ln_off}))
@@ -706,7 +726,8 @@ scan_wordexp_param_brace()
 		fi
 		ln_off=${res%%${RS}*}
 		res="${res#*${RS}}"
-		c="${res%%${RS}*}"
+		lbufi="${res%%${RS}*}"
+		lsetc
 		res="${res#*${RS}}"
 		# We must advance lineno because scan_word() was run in a
 		# subshell.
@@ -771,7 +792,7 @@ scan_param()
 			;;
 	esac
 
-	printf "%d${RS}%c${RS}%s" 0 "${c}" "${param}"
+	printf "%d${RS}%d${RS}%s" 0 ${lbufi} "${param}"
 	return 0
 }
 
@@ -811,12 +832,13 @@ scan_wordexp_arith()
 				;;
 			'$')
 				lgetc
-				if ! res=$(scan_wordexp); then
+				if ! res="$(scan_wordexp)"; then
 					exit 1
 				fi
 				ln_off=${res%%${RS}*}
 				res="${res#*${RS}}"
-				c="${res%%${RS}*}"
+				lbufi="${res%%${RS}*}"
+				lsetc
 				res="${res#*${RS}}"
 				# We must advance lineno because scan_wordexp()
 				# was run in a subshell.
@@ -862,7 +884,7 @@ run_sublexer()
 			;;
 	esac
 
-	printf "${RS}%d${RS}%c" ${lineno} "${c}"
+	printf "${RS}%d${RS}%d" ${lineno} ${lbufi}
 	return 0
 }
 
@@ -873,8 +895,9 @@ run_sublexer()
 run_lexer()
 {
 	local fn="${1}"
-	local st="${2}"
-	shift 2
+	local buf="${2}"
+	local st="${3}"
+	shift 3
 
 	# Initialize global variables.
 	fname="${fn}"
@@ -884,6 +907,20 @@ run_lexer()
 	here_awaiting_end=false
 	here_awaiting_word=false
 
+	# Read file into array
+	eval "$(printf '%s' "${buf}" | awk -v FS='' -v j=0 \
+		-v squote="'" -v esc_squote="'\\\\''" '
+		{
+			for (i = 1; i <= NF; ++i) {
+				sub(squote, esc_squote, $i);
+				printf("lbufv_%d='\''%s'\''\n", j++, $i);
+			};
+			printf("lbufv_%d='\''\n'\''\n", j++);
+		}
+		')"
+	lbufi=0
+	lbufc=${#buf}
+
 	# Read the first character and recognize the first token.
 	lgetc
 	next
diff --git a/eshtrans/frontend/main.esh b/eshtrans/frontend/main.esh
index b9f93a6..fc25b5f 100644
--- a/eshtrans/frontend/main.esh
+++ b/eshtrans/frontend/main.esh
@@ -21,9 +21,10 @@
 esh_parse()
 {
 	local fn="${1}"
-	shift 1
+	local buf="${2}"
+	shift 2
 
-	if run_lexer "${fn}" complete_command; then
+	if run_lexer "${fn}" "${buf}" complete_command; then
 		return 0
 	fi
 	return 1
diff --git a/eshtrans/main.esh b/eshtrans/main.esh
index 7e0cb8c..588ca1d 100644
--- a/eshtrans/main.esh
+++ b/eshtrans/main.esh
@@ -88,10 +88,10 @@ main()
 		fi
 		input="${1}"
 		if [ "x${output}" = 'x-' ]; then
-			sh_codegen "$(esh_parse "${input}" <"${input}")"
+			sh_codegen "$(esh_parse "${input}" "$(cat "${input}")")"
 		else
-			sh_codegen "$(esh_parse "${input}" <"${input}")" \
-				>"${output}"
+			sh_codegen "$(esh_parse "${input}" \
+				"$(cat "${input}")")" >"${output}"
 		fi
 	fi
 }
--
cgit v0.9.1