From 2559f6bf37044669ec36ee72fa2f09ec15fa9e72 Mon Sep 17 00:00:00 2001 From: P. J. McDermott Date: Sat, 11 Feb 2017 02:35:45 -0500 Subject: research/split.sh: Add a sed+SCL version Also comments. Comments are good. --- diff --git a/research/split.sh b/research/split.sh index 9a3aed3..d003fa4 100644 --- a/research/split.sh +++ b/research/split.sh @@ -1,7 +1,27 @@ +# This is the lexer's old file-splitting code: +# eval "$(printf '%s' "${buf}" | awk -v FS='' -v j=0 \ +# -v squote="'" -v esc_squote="'\\\\''" ' +# { +# for (i = 1; i <= NF; ++i) { +# sub(squote, esc_squote, $i); +# printf("lbufv_%d=" squote "%s" squote "\n", +# j++, $i); +# }; +# printf("lbufv_%d=" squote "\n" squote "\n", j++); +# } +# ')" +# It relies on behavior specific to GNU awk. +# POSIX on awk's FS: "If FS is a null string, the behavior is unspecified." +# Cf. regarding other implementations. +# So we need to get even more creative. + US="$(printf '\037.')"; US="${US%.}" -buf='Hello, world!' +buf='Hello, world! +Hi! +' +# Use sed to preprocess the buffer and give awk a proper field separator. printf '%s' "${buf}" | sed "s/\\(.\\)/\\1${US}/g" | \ awk -v FS="${US}" -v j=0 -v squote="'" -v esc_squote="'\\\\''" ' { @@ -16,3 +36,20 @@ printf '%s' "${buf}" | sed "s/\\(.\\)/\\1${US}/g" | \ printf("lbufv_%d=" squote "\n" squote "\n", j++); } ' + +# And we might as well get rid of awk and use sed and SCL. +i=0 +IFS="${US}" +for c in $(printf '%s' "${buf}" | sed -n " + 1h; # Put the first line in the hold space. + 1!H; # Append to the hold space each subsequent line. + \${ # Once the hold space contains the whole buffer... + g; # Prepare to edit the buffer. + s/\\(.\\)/\\1${US}/g; # Put US after each char. + p; # Print the result. + }; + "); do + printf "lbufv_%d='%s'\n" ${i} "${c}" + i=$((${i} + 1)) +done +unset IFS -- cgit v0.9.1