fname= lineno= c= tok= tokens= pgetc() { c="$(dd bs=1 count=1 2>/dev/null; printf '.')" c="${c%.}" } next() { while :; do echo "parsing char '$c'" >&2 case "${c}" in '') pgetc tok=T_EOF return ;; "${LF}") pgetc lineno=$((${lineno} + 1)) tok=T_NEWLINE return ;; ' '|"${HT}") pgetc continue ;; \\) pgetc case "${c}" in "${LF}") lineno=$((${lineno} + 1)) pgetc continue ;; esac next_word return ;; '#') pgetc while :; do case "${c}" in "${LF}"|'') break ;; esac pgetc done continue ;; '&') pgetc case "${c}" in '&') pgetc tok=T_AND_IF return ;; esac tok=T_AND return ;; '|') pgetc case "${c}" in '|') pgetc tok=T_OR_IF return ;; esac tok=T_PIPE return ;; ';') pgetc case "${c}" in ';') pgetc tok=T_DSEMI return ;; esac tok=T_SEMI return ;; '(') pgetc tok=T_LPAREN return ;; ')') pgetc tok=T_RPAREN return ;; '<'|'>') next_io return ;; *) next_word return ;; esac pgetc done } next_io() { case "${c}" in '<') pgetc case "${c}" in '<') pgetc case "${c}" in '-') pgetc tok=T_DLESSDASH ;; esac tok=T_DLESS ;; esac tok=T_LESS ;; '>') pgetc case "${c}" in '>') pgetc tok=T_DGREAT ;; esac tok=T_GREAT ;; esac } next_word() { local res= local lineno_offset= local word= res="$(scan_word false)" lineno_offset=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" word="${res%%${RS}*}" # We must advance lineno because scan_word() was run in a subshell. lineno=$((${lineno} + ${lineno_offset})) tok="T_WORD${US}${word}" } scan_word() { local in_param="${1}" local res= local word= local quoted= local lines= local lineno_offset= local wordexp= word='' quoted=false lines=0 while :; do echo "parsing word char '$c'" >&2 case "${c}" in '') break ;; "${LF}") if ! ${in_param} && ! ${quoted}; then break fi lineno=$((${lineno} + 1)) lines=$((${lines} + 1)) word="${word}${c}" ;; ' '|"${HT}") if ! ${in_param} && ! ${quoted}; then break fi word="${word}${c}" ;; '$') pgetc res=$(scan_wordexp) lineno_offset=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" wordexp="${res%%${RS}*}" # We must advance lineno because scan_wordexp() # was run in a subshell. lineno=$((${lineno} + ${lineno_offset})) word="${word}${wordexp}" # scan_wordexp() leaves behind an unused # character, so we should skip the pgetc() call # below. continue ;; \') word="${word}${c}" while :; do pgetc word="${word}${c}" case "${c}" in \') break ;; esac done ;; '"') word="${word}${c}" if ${quoted}; then quoted=false else quoted=true fi ;; '}') if ${in_param} && ! ${quoted}; then break fi word="${word}${c}" ;; *) word="${word}${c}" ;; esac pgetc done printf "%d${RS}%s${RS}%s" ${lines} "${c}" "${word}" } scan_wordexp() { local wordexp= local lineno_offset= local mod= local res= local word= wordexp='' lineno_offset=0 case "${c}" in '{') # Parameter expansion brace pgetc case "${c}" in '#') pgetc case "${c}" in [@*#?$!A-Za-z0-9_-]) # String length # expansion next_param ;; *) # Special parameter "#" param='#' ;; esac ;; *) next_param ;; esac wordexp="\${${param}" # Check for modifications mod=false case "${c}" in ':') mod=true wordexp="${wordexp}${c}" pgetc case "${c}" in '-'|'='|'?'|'+') wordexp="${wordexp}${c}" pgetc ;; esac ;; '-'|'='|'?'|'+') mod=true wordexp="${wordexp}${c}" pgetc ;; '%') mod=true wordexp="${wordexp}${c}" pgetc case "${c}" in '%') wordexp="${wordexp}${c}" pgetc ;; esac ;; '#') mod=true wordexp="${wordexp}${c}" pgetc case "${c}" in '#') wordexp="${wordexp}${c}" pgetc ;; esac ;; esac if ${mod}; then # Get word. res="$(scan_word true)" lineno_offset=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" word="${res%%${RS}*}" # We must advance lineno because scan_word() was # run in a subshell. lineno=$((${lineno} + ${lineno_offset})) wordexp="${wordexp}${word}" echo "param mod word: '$word'" >&2 fi # Check for right brace. case "${c}" in '}') wordexp="${wordexp}${c}" pgetc ;; *) synerr 'Missing "}"' ;; esac ;; '(') ;; [@*#?$!A-Za-z0-9_-]) next_param wordexp="\$${param}" ;; esac printf "%d${RS}%s${RS}%s" ${lineno_offset} "${c}" "${wordexp}" } next_param() { param='' case "${c}" in [@*#?$!0-]) # Special parameter param="${c}" pgetc ;; [1-9]) # Positional parameter param="${param}${c}" pgetc while :; do case "${c}" in [!0-9]) break ;; esac param="${param}${c}" pgetc done ;; [A-Za-z_]) # Parameter name param="${param}${c}" pgetc while :; do case "${c}" in [!A-Za-z0-9_]) break ;; esac param="${param}${c}" pgetc done ;; esac } # Check the current token. If it matches, add it to the syntax array. accept() { local t="${1}" if [ "x${tok%%${US}*}" = "x${t}" ]; then echo "accept $t" >&2 tokens="${tokens}${tok}${RS}" next return 0 fi return 1 } error() { local fmt="${1}" shift 1 case "${fname}" in '-') printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2 ;; *) printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2 ;; esac } init_lexer() { local fn="${1}" shift 1 fname="${fn}" lineno=1 tokens='' pgetc next } get_tokens() { printf '%s' "${tokens}" return 0 } synexp() { local t="${1}" shift 1 if [ "x${t}" = 'x' ]; then synerr '%s unexpected' "$(tokname "${tok}")" else synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \ "$(tokname "${t}")" fi } synerr() { local fmt="${1}" shift 1 error "Syntax error: ${fmt}" "${@}" }