diff options
author | P. J. McDermott <pj@pehjota.net> | 2017-02-11 02:35:45 (EST) |
---|---|---|
committer | P. J. McDermott <pj@pehjota.net> | 2017-02-11 02:35:45 (EST) |
commit | 2559f6bf37044669ec36ee72fa2f09ec15fa9e72 (patch) | |
tree | 2aa2a63c48d4d914850c7fcd91dcde8dab19c151 | |
parent | 215d095f88c0cdc3fd65ae7e3f5c8d87a535481c (diff) | |
download | eggshell-2559f6bf37044669ec36ee72fa2f09ec15fa9e72.zip eggshell-2559f6bf37044669ec36ee72fa2f09ec15fa9e72.tar.gz eggshell-2559f6bf37044669ec36ee72fa2f09ec15fa9e72.tar.bz2 |
research/split.sh: Add a sed+SCL version
Also comments. Comments are good.
-rw-r--r-- | research/split.sh | 39 |
1 files changed, 38 insertions, 1 deletions
diff --git a/research/split.sh b/research/split.sh index 9a3aed3..d003fa4 100644 --- a/research/split.sh +++ b/research/split.sh @@ -1,7 +1,27 @@ +# This is the lexer's old file-splitting code: +# eval "$(printf '%s' "${buf}" | awk -v FS='' -v j=0 \ +# -v squote="'" -v esc_squote="'\\\\''" ' +# { +# for (i = 1; i <= NF; ++i) { +# sub(squote, esc_squote, $i); +# printf("lbufv_%d=" squote "%s" squote "\n", +# j++, $i); +# }; +# printf("lbufv_%d=" squote "\n" squote "\n", j++); +# } +# ')" +# It relies on behavior specific to GNU awk. +# POSIX on awk's FS: "If FS is a null string, the behavior is unspecified." +# Cf. <http://stackoverflow.com/a/31135987> regarding other implementations. +# So we need to get even more creative. + US="$(printf '\037.')"; US="${US%.}" -buf='Hello, world!' +buf='Hello, world! +Hi! +' +# Use sed to preprocess the buffer and give awk a proper field separator. printf '%s' "${buf}" | sed "s/\\(.\\)/\\1${US}/g" | \ awk -v FS="${US}" -v j=0 -v squote="'" -v esc_squote="'\\\\''" ' { @@ -16,3 +36,20 @@ printf '%s' "${buf}" | sed "s/\\(.\\)/\\1${US}/g" | \ printf("lbufv_%d=" squote "\n" squote "\n", j++); } ' + +# And we might as well get rid of awk and use sed and SCL. +i=0 +IFS="${US}" +for c in $(printf '%s' "${buf}" | sed -n " + 1h; # Put the first line in the hold space. + 1!H; # Append to the hold space each subsequent line. + \${ # Once the hold space contains the whole buffer... + g; # Prepare to edit the buffer. + s/\\(.\\)/\\1${US}/g; # Put US after each char. + p; # Print the result. + }; + "); do + printf "lbufv_%d='%s'\n" ${i} "${c}" + i=$((${i} + 1)) +done +unset IFS |