research/split.sh: Add a sed+SCL version

Also comments. Comments are good.
author: P. J. McDermott <pj@pehjota.net> 2017-02-11 02:35:45 (EST)
committer: P. J. McDermott <pj@pehjota.net> 2017-02-11 02:35:45 (EST)
commit: 2559f6bf37044669ec36ee72fa2f09ec15fa9e72 (patch)
tree: 2aa2a63c48d4d914850c7fcd91dcde8dab19c151
parent: 215d095f88c0cdc3fd65ae7e3f5c8d87a535481c (diff)
download: eggshell-2559f6bf37044669ec36ee72fa2f09ec15fa9e72.zip
eggshell-2559f6bf37044669ec36ee72fa2f09ec15fa9e72.tar.gz
eggshell-2559f6bf37044669ec36ee72fa2f09ec15fa9e72.tar.bz2
1 files changed, 38 insertions, 1 deletions
diff --git a/research/split.sh b/research/split.sh
index 9a3aed3..d003fa4 100644
--- a/research/split.sh
+++ b/research/split.sh
@@ -1,7 +1,27 @@
+# This is the lexer's old file-splitting code:
+#	eval "$(printf '%s' "${buf}" | awk -v FS='' -v j=0 \
+#		-v squote="'" -v esc_squote="'\\\\''" '
+#		{
+#			for (i = 1; i <= NF; ++i) {
+#				sub(squote, esc_squote, $i);
+#				printf("lbufv_%d=" squote "%s" squote "\n",
+#					j++, $i);
+#			};
+#			printf("lbufv_%d=" squote "\n" squote "\n", j++);
+#		}
+#		')"
+# It relies on behavior specific to GNU awk.
+# POSIX on awk's FS: "If FS is a null string, the behavior is unspecified."
+# Cf. <http://stackoverflow.com/a/31135987> regarding other implementations.
+# So we need to get even more creative.
+
 US="$(printf '\037.')"; US="${US%.}"
 
-buf='Hello, world!'
+buf='Hello, world!
+Hi!
+'
 
+# Use sed to preprocess the buffer and give awk a proper field separator.
 printf '%s' "${buf}" | sed "s/\\(.\\)/\\1${US}/g" | \
 	awk -v FS="${US}" -v j=0 -v squote="'" -v esc_squote="'\\\\''" '
 	{
@@ -16,3 +36,20 @@ printf '%s' "${buf}" | sed "s/\\(.\\)/\\1${US}/g" | \
 		printf("lbufv_%d=" squote "\n" squote "\n", j++);
 	}
 	'
+
+# And we might as well get rid of awk and use sed and SCL.
+i=0
+IFS="${US}"
+for c in $(printf '%s' "${buf}" | sed -n "
+		1h;   # Put the first line in the hold space.
+		1!H;  # Append to the hold space each subsequent line.
+		\${   # Once the hold space contains the whole buffer...
+			g;  # Prepare to edit the buffer.
+			s/\\(.\\)/\\1${US}/g;  # Put US after each char.
+			p;  # Print the result.
+		};
+		"); do
+	printf "lbufv_%d='%s'\n" ${i} "${c}"
+	i=$((${i} + 1))
+done
+unset IFS
author	P. J. McDermott <pj@pehjota.net>	2017-02-11 02:35:45 (EST)
committer	P. J. McDermott <pj@pehjota.net>	2017-02-11 02:35:45 (EST)
commit	2559f6bf37044669ec36ee72fa2f09ec15fa9e72 (patch)
tree	2aa2a63c48d4d914850c7fcd91dcde8dab19c151
parent	215d095f88c0cdc3fd65ae7e3f5c8d87a535481c (diff)
download	eggshell-2559f6bf37044669ec36ee72fa2f09ec15fa9e72.zip eggshell-2559f6bf37044669ec36ee72fa2f09ec15fa9e72.tar.gz eggshell-2559f6bf37044669ec36ee72fa2f09ec15fa9e72.tar.bz2