summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorP. J. McDermott <pj@pehjota.net>2017-02-11 03:19:31 (EST)
committer P. J. McDermott <pj@pehjota.net>2017-02-11 03:19:31 (EST)
commit53c48293168024ce8d62158abbd5684788a1b08f (patch)
tree8117220e69bc87cff87bd79cadbbd217ad8f637c
parent2559f6bf37044669ec36ee72fa2f09ec15fa9e72 (diff)
downloadeggshell-53c48293168024ce8d62158abbd5684788a1b08f.zip
eggshell-53c48293168024ce8d62158abbd5684788a1b08f.tar.gz
eggshell-53c48293168024ce8d62158abbd5684788a1b08f.tar.bz2
research/arrays-and-hashes.txt: New fileHEADmaster
-rw-r--r--research/arrays-and-hashes.txt465
1 files changed, 465 insertions, 0 deletions
diff --git a/research/arrays-and-hashes.txt b/research/arrays-and-hashes.txt
new file mode 100644
index 0000000..eac18cd
--- /dev/null
+++ b/research/arrays-and-hashes.txt
@@ -0,0 +1,465 @@
+Arrays
+======
+
+Arrays are simple lists of values. Arrays are zero-indexed, but internally
+there is an offset that is affected by `shift` and `unshift` calls.
+
+Array keys are non-zero integers.
+
+An array has a length limited only by the highest non-zero integer in the
+shell's built-in arithmetic. An array is grown by calls to `push` and `unshift`
+as well as by assigning values with out-of-bounds keys. An array is shrunk by
+calls to `pop` and `shift` as well as by unsetting the 0th and last values.
+
+Array Expansion
+---------------
+
+Array expansion initializes an array. The format for array expansion is as
+follows:
+
+ @(values)
+
+where *values* is zero or more *word*s. A reference to the new array is
+substituted.
+
+eshtrans will translate array expansions into calls to `__a_new` (part of
+libeshtrans) with array references returned in registers allocated at compile
+time.
+
+For example:
+
+ cardinals=@("zero" "one" "two")
+
+compiles to:
+
+ __a_new __r0 "zero" "one" "two"
+ cardinals=${__r0}
+
+while:
+
+ cmd @("zero" "one" "two")
+
+compiles to:
+
+ __a_new __r0 "zero" "one" "two"
+ cmd ${__r0}
+
+and:
+
+ cardinals=@(@("zero" "cero") @("one" "uno") @("two" "dos"))
+
+compiles to:
+
+ __a_new __r0 "zero" "cero"
+ __a_new __r1 "one" "uno"
+ __a_new __r2 "two" "dos"
+ __a_new __r3 ${__r0} ${__r1} ${__r2}
+ cardinals=${__r3}
+
+Quoting the `@(` operator removes its special meaning.
+
+Array Value Assignment
+----------------------
+
+The format for array value assignment is as follows:
+
+ name[key]=word
+
+eshtrans will translate array value assignments into calls to `__a_set` (part of
+libeshtrans).
+
+For example:
+
+ cardinals[0]="zero"
+
+compiles to:
+
+ __a_set cardinals 0 "zero"
+
+Array Value Expansion
+---------------------
+
+The format for array value expansion is as follows:
+
+ ${name[key]}
+
+eshtrans will translate array value expansions into calls to `__a_get` (part of
+libeshtrans).
+
+For example:
+
+ zero=${cardinals[0]}
+
+compiles to:
+
+ __a_get __r0 cardinals 0
+ zero=${__r0}
+
+`unset`
+-------
+
+`unset` is a reserved word and a function that unsets variables, functions,
+array values, and arrays.
+
+The format for unsetting an array value is as follows:
+
+ unset name[key]
+
+The format for unsetting an array is as follows:
+
+ unset name
+
+eshtrans will check the types of all of the operands of `unset` commands and
+translate them into calls to `unset` (the shell's built-in utility),
+`__a_unset`, or `__a_delete` as appropriate.
+
+For example:
+
+ unset cardinals[1] cardinals[2]
+
+compiles to:
+
+ __a_unset cardinals 1
+ __a_unset cardinals 2
+
+and:
+
+ unset cardinals
+
+compiles to:
+
+ __a_delete cardinals
+
+libeshtrans Functions
+---------------------
+
+ # __a_new(__r)
+ # Paramters:
+ # * __r: Register in which to store array reference
+ __a_new()
+ {
+ __r=${1}
+ shift 1
+
+ # Allocate a slot on the array heap.
+ __malloc __a
+ __a=__mr
+
+ # Set values.
+ __k=0
+ for __v in "${@}"; do
+ eval "${__a}__${__k}=\${__v}"
+ __k=$((${__k} + 1))
+ done
+
+ # Set length.
+ eval "${__a}_l=\${__k}"
+
+ # Store the array reference in the specified return register.
+ eval "${__r}=\${__a}
+ }
+
+ # __a_set(__a __k __v)
+ # Parameters:
+ # * __a: Array
+ # * __k: Key
+ # * __v: Value
+ __a_set()
+ {
+ __a=${1}
+ __k=${2}
+ __v=${3}
+
+ # Validate key.
+ case ${__k} in *[!0-9]*)
+ printf 'Error: Illegal array key: %s\n' "${__k}" 1>&2
+ exit 1
+ ;; esac
+
+ # Enlarge array if key is out-of-bounds.
+ eval "__l=\${${__a}_l}"
+ if [ ${__k} -ge ${__l} ]; then
+ eval "${__a}_l=\$((\${__k} - 1))"
+ fi
+
+ # Set value.
+ eval "${__a}__${__k}=\${__v}"
+ }
+
+ # __a_get(__r __a __k)
+ # Parameters:
+ # * __r: Register in which to store value
+ # * __a: Array
+ # * __k: Key
+ __a_get()
+ {
+ __r=${1}
+ __a=${2}
+ __k=${3}
+
+ # Validate key.
+ case ${__k} in *[!0-9]*)
+ printf 'Error: Illegal array key: %s\n' "${__k}" 1>&2
+ exit 1
+ ;; esac
+
+ # Offset key.
+ eval "__k=\$((\${__k} + \${${__a}__o}))"
+
+ # Store the value in the specified return register.
+ # NB: If `set -u` is on, the shell will throw an error on undefined values
+ # or out-of-bound keys, as one would expect.
+ eval "${__r}=\${${__a}__${__k}}"
+ }
+
+ # __a_unset(__a __k)
+ # Parameters:
+ # * __a: Array
+ # * __k: Key
+ __a_unset()
+ {
+ __a=${1}
+ __k=${2}
+
+ # ...
+ }
+
+ # __a_delete(__a)
+ # Parameters:
+ # * __a: Array
+ __a_delete()
+ {
+ __a=${1}
+
+ # ...
+ }
+
+libesh Functions
+----------------
+
+ # length(a)
+ # Parameters:
+ # * a: Array
+ length(a)
+ {
+ l=
+
+ eval "${&l}=\${${a}_l}"
+
+ return "${l}"
+ }
+
+ # push(a v)
+ # Parameters:
+ # * a: Array
+ # * v: Value
+ push(a v)
+ {
+ l=
+ k=
+
+ # Get the current length.
+ eval "${&l}=\${${a}_l}"
+ # Offset the length to get the new key.
+ eval "${&k}=\$((${l} + \${${a}_o}))"
+ # Set the new value.
+ eval "${a}__${k}=\${${&v}}"
+ # Increment the length.
+ eval "${a}_l=\$((${l} + 1))"
+ }
+
+ # pop(a)
+ # Parameters:
+ # * a: Array
+ pop(a)
+ {
+ # ...
+ }
+
+ # __shift(a)
+ # Parameters:
+ # * a: Array
+ __shift(a)
+ {
+ # ...
+ }
+
+ # unshift(a v)
+ # Parameters:
+ # * a: Array
+ # * v: Value
+ unshift(a v)
+ {
+ # ...
+ }
+
+ # split(fs str)
+ # Parameters:
+ # * fs: Field separator
+ # * str: String to split
+ split(fs str)
+ {
+ # See split.sh
+ }
+
+ # join(str a)
+ # Parameters:
+ # * str: Separator
+ # * a: Array to join
+ # str will be duplicated around unset values in the middle of the array.
+ join()
+ {
+ l=
+ joined=
+
+ # Get the current length.
+ eval "${&l}=\${${a}_l}"
+
+ # Join all the array values.
+ joined="$(eval "printf '%s' \"$(printf "\\\${${a}__%d}\${str}" \
+ $(awk -v end=${l} \
+ 'BEGIN { for (i = 0; i < end; ++i) print(i); }'))\"")"
+
+ return "${joined%${str}}"
+ }
+
+Hashes
+======
+
+Hashes are sets of key/value pairs.
+
+Hash keys may only consist of Latin letters, digits, and underscores.
+
+Each hash has a list of keys, which is returned by the `keys` function.
+
+Hash Expansion
+--------------
+
+Hash expansion initializes a hash. The format for hash expansion is as follows:
+
+ %(values)
+
+where *values* is zero or more *word*s of the following format:
+
+ key=value
+
+A reference to the new hash is substituted.
+
+eshtrans will translate hash expansions into calls to `__h_new` (part of
+libeshtrans) with hash references returned in registers allocated at compile
+time.
+
+For example:
+
+ numerals=%(zero=0 one=1 two=2)
+
+compiles to:
+
+ __h_new __r0 zero 0 one 1 two 2
+ numerals=${__r0}
+
+while:
+
+ cmd %(zero=0 one=1 two=2)
+
+compiles to:
+
+ __h_new __r0 zero 0 one 1 two 2
+ cmd ${__r0}
+
+and:
+
+ numerals=%(\
+ zero=%(arabic=0 roman=N) \
+ one=%(arabic=1 roman=i) \
+ two=%(arabic=2 roman=ii) \
+ )
+
+compiles to:
+
+ __h_new __r0 arabic 0 roman N
+ __h_new __r1 arabic 1 roman i
+ __h_new __r2 arabic 2 roman ii
+ __h_new __r3 zero ${__r0} one ${__r1} two ${__r2}
+ numerals=${__r3}
+
+Quoting the `%(` operator removes its special meaning.
+
+Hash Value Assignment
+---------------------
+
+The format for hash value assignment is as follows:
+
+ name{key}=word
+
+eshtrans will translate hash value assignments into calls to `__h_set` (part of
+libeshtrans).
+
+For example:
+
+ numerals{zero}=0
+
+compiles to:
+
+ __h_set numerals zero 0
+
+Hash Value Expansion
+--------------------
+
+The format for hash value expansion is as follows:
+
+ ${name{key}}
+
+eshtrans will translate hash value expansions into calls to `__h_get` (part of
+libeshtrans).
+
+For example:
+
+ zero=${numerals{zero}}
+
+compiles to:
+
+ __h_get __r0 numerals zero
+ zero=${__r0}
+
+`ENV` Hash
+----------
+
+Environment variables can be accessed through the `ENV` hash. This circumvents
+identifier mangling.
+
+Environment variables can be expanded as in the following example:
+
+ IFS=:
+ for dir in ${ENV{PATH}}; do
+ : ${dir:=.}
+ if [ -x "${dir}/${cmd}" ]; then
+ printf '%s\n' "${dir}/${cmd}"
+ break
+ fi
+ done
+
+Environment variables can be set as in the following example:
+
+ ENV[LC_ALL]=C
+
+TODO
+====
+
+ * Array function `each` (maintains an iterator)
+ * Hash value functions: `length`, `keys`, `values`, `each`
+ - `keys` `join`s the keys array with `${US}`, then returns the result?
+ + Maybe sets `IFS="${US}"`, uses the shell's field splitting to remove
+ `${US}`, and returns a string (bad: space-delimited string of keys that
+ may contain spaces)
+ + Or just return the array?
+ - `each` iterates over the keys array (skipping unset keys) and returns a
+ key and value, delimited somehow (or, a new array?)
+ * Unsetting hash values and hashes
+ - Will need to unset keys array value
+ - Maintain a key ID (or key key) for each hash key (i.e. the key in the keys
+ array)
+ - Keys array could become large and sparse with lots of unsetting and
+ setting; should be heap instead?
+ * As above, consider building the hash structure on a heap built on an array
+ * Reference type checking in functions