From 2559f6bf37044669ec36ee72fa2f09ec15fa9e72 Mon Sep 17 00:00:00 2001
From: P. J. McDermott <pj@pehjota.net>
Date: Sat, 11 Feb 2017 02:35:45 -0500
Subject: research/split.sh: Add a sed+SCL version

Also comments.  Comments are good.
---
diff --git a/research/split.sh b/research/split.sh
index 9a3aed3..d003fa4 100644
--- a/research/split.sh
+++ b/research/split.sh
@@ -1,7 +1,27 @@
+# This is the lexer's old file-splitting code:
+#	eval "$(printf '%s' "${buf}" | awk -v FS='' -v j=0 \
+#		-v squote="'" -v esc_squote="'\\\\''" '
+#		{
+#			for (i = 1; i <= NF; ++i) {
+#				sub(squote, esc_squote, $i);
+#				printf("lbufv_%d=" squote "%s" squote "\n",
+#					j++, $i);
+#			};
+#			printf("lbufv_%d=" squote "\n" squote "\n", j++);
+#		}
+#		')"
+# It relies on behavior specific to GNU awk.
+# POSIX on awk's FS: "If FS is a null string, the behavior is unspecified."
+# Cf. <http://stackoverflow.com/a/31135987> regarding other implementations.
+# So we need to get even more creative.
+
 US="$(printf '\037.')"; US="${US%.}"
 
-buf='Hello, world!'
+buf='Hello, world!
+Hi!
+'
 
+# Use sed to preprocess the buffer and give awk a proper field separator.
 printf '%s' "${buf}" | sed "s/\\(.\\)/\\1${US}/g" | \
 	awk -v FS="${US}" -v j=0 -v squote="'" -v esc_squote="'\\\\''" '
 	{
@@ -16,3 +36,20 @@ printf '%s' "${buf}" | sed "s/\\(.\\)/\\1${US}/g" | \
 		printf("lbufv_%d=" squote "\n" squote "\n", j++);
 	}
 	'
+
+# And we might as well get rid of awk and use sed and SCL.
+i=0
+IFS="${US}"
+for c in $(printf '%s' "${buf}" | sed -n "
+		1h;   # Put the first line in the hold space.
+		1!H;  # Append to the hold space each subsequent line.
+		\${   # Once the hold space contains the whole buffer...
+			g;  # Prepare to edit the buffer.
+			s/\\(.\\)/\\1${US}/g;  # Put US after each char.
+			p;  # Print the result.
+		};
+		"); do
+	printf "lbufv_%d='%s'\n" ${i} "${c}"
+	i=$((${i} + 1))
+done
+unset IFS
--
cgit v0.9.1