fname= lineno= c= tok= tokens= pgetc() { c="$(dd bs=1 count=1 2>/dev/null; printf '.')" c="${c%.}" } next() { while :; do dbg "parsing char '$c' at lineno $lineno" case "${c}" in '') pgetc tok=T_EOF return ;; "${LF}") pgetc lineno=$((${lineno} + 1)) tok=T_NEWLINE return ;; ' '|"${HT}") pgetc continue ;; \\) pgetc case "${c}" in "${LF}") lineno=$((${lineno} + 1)) pgetc continue ;; esac next_word return ;; '#') pgetc while :; do case "${c}" in "${LF}"|'') break ;; esac pgetc done continue ;; '&') pgetc case "${c}" in '&') pgetc tok=T_AND_IF return ;; esac tok=T_AND return ;; '|') pgetc case "${c}" in '|') pgetc tok=T_OR_IF return ;; esac tok=T_PIPE return ;; ';') pgetc case "${c}" in ';') pgetc tok=T_DSEMI return ;; esac tok=T_SEMI return ;; '(') pgetc tok=T_LPAREN return ;; ')') pgetc tok=T_RPAREN return ;; '<'|'>') next_io return ;; *) next_word return ;; esac pgetc done } next_io() { case "${c}" in '<') pgetc case "${c}" in '<') pgetc case "${c}" in '-') pgetc tok=T_DLESSDASH ;; esac tok=T_DLESS ;; esac tok=T_LESS ;; '>') pgetc case "${c}" in '>') pgetc tok=T_DGREAT ;; esac tok=T_GREAT ;; esac } next_word() { local res= local lineno_offset= local word= if ! res="$(scan_word false)"; then exit 1 fi lineno_offset=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" word="${res%%${RS}*}" # We must advance lineno because scan_word() was run in a subshell. lineno=$((${lineno} + ${lineno_offset})) tok="T_WORD${US}${word}" } scan_word() { local in_param="${1}" local res= local word= local quoted= local lines= local lineno_offset= local wordexp= word='' quoted=false lines=0 while :; do dbg "parsing word char '$c' at lineno $lineno" case "${c}" in '') break ;; "${LF}") if ! ${in_param} && ! ${quoted}; then break fi lineno=$((${lineno} + 1)) lines=$((${lines} + 1)) word="${word}${c}" ;; ' '|"${HT}") if ! ${in_param} && ! ${quoted}; then break fi word="${word}${c}" ;; '$') pgetc if ! res=$(scan_wordexp); then exit 1 fi lineno_offset=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" wordexp="${res%%${RS}*}" # We must advance lineno because scan_wordexp() # was run in a subshell. lineno=$((${lineno} + ${lineno_offset})) word="${word}${wordexp}" # scan_wordexp() leaves behind an unused # character, so we should skip the pgetc() call # below. continue ;; '`') synerr 'Backquoted (old-style) %s' \ 'command substitution not supported' break ;; \') word="${word}${c}" while :; do pgetc word="${word}${c}" case "${c}" in \') break ;; esac done ;; '"') word="${word}${c}" if ${quoted}; then quoted=false else quoted=true fi ;; '}') if ${in_param} && ! ${quoted}; then break fi word="${word}${c}" ;; *) word="${word}${c}" ;; esac pgetc done printf "%d${RS}%s${RS}%s" ${lines} "${c}" "${word}" } scan_wordexp() { local wordexp= local ln_off= local mod= local res= local param= local word= wordexp='' ln_off=0 case "${c}" in '{') # Parameter expansion brace mod=true pgetc case "${c}" in '#') pgetc case "${c}" in [@*#?$!A-Za-z0-9_-]) # String length # expansion res="$(scan_param)" ln_off=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" param="${res%%${RS}*}" lineno=$((${lineno} + \ ${ln_off})) # Disable modifications. mod=false ;; *) # Special parameter "#" param='#' ;; esac ;; *) if ! res="$(scan_param)"; then exit 1 fi ln_off=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" param="${res%%${RS}*}" lineno=$((${lineno} + ${ln_off})) ;; esac wordexp="\${${param}" if ${mod}; then # Check for modifications mod=false case "${c}" in ':') mod=true wordexp="${wordexp}${c}" pgetc case "${c}" in '-'|'='|'?'|'+') wordexp="${wordexp}${c}" pgetc ;; esac ;; '-'|'='|'?'|'+') mod=true wordexp="${wordexp}${c}" pgetc ;; '%') mod=true wordexp="${wordexp}${c}" pgetc case "${c}" in '%') wordexp="${wordexp}${c}" pgetc ;; esac ;; '#') mod=true wordexp="${wordexp}${c}" pgetc case "${c}" in '#') wordexp="${wordexp}${c}" pgetc ;; esac ;; esac fi if ${mod}; then # Get word. res="$(scan_word true)" ln_off=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" word="${res%%${RS}*}" # We must advance lineno because scan_word() was # run in a subshell. lineno=$((${lineno} + ${ln_off})) wordexp="${wordexp}${word}" dbg "param mod word: '$word'" fi # Check for right brace. case "${c}" in '}') wordexp="${wordexp}${c}" pgetc ;; *) synerr 'Missing "}"' ;; esac ;; '(') # Arithmetic expansion or command substitution ;; [@*#?$!A-Za-z0-9_-]) res="$(scan_param)" ln_off=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" param="${res%%${RS}*}" lineno=$((${lineno} + ${ln_off})) wordexp="\$${param}" ;; esac printf "%d${RS}%s${RS}%s" ${ln_off} "${c}" "${wordexp}" return 0 } scan_param() { local param= param='' case "${c}" in [@*#?$!0-]) # Special parameter param="${c}" pgetc ;; [1-9]) # Positional parameter param="${param}${c}" pgetc while :; do case "${c}" in [!0-9]) break ;; esac param="${param}${c}" pgetc done ;; [A-Za-z_]) # Parameter name param="${param}${c}" pgetc while :; do case "${c}" in [!A-Za-z0-9_]) break ;; esac param="${param}${c}" pgetc done ;; *) synerr 'Bad parameter name' ;; esac printf "%d${RS}%s${RS}%s" 0 "${c}" "${param}" return 0 } # Check the current token. If it matches, add it to the syntax array. accept() { local t="${1}" if [ "x${tok%%${US}*}" = "x${t}" ]; then dbg "accept $t" tokens="${tokens}${tok}${RS}" next return 0 fi return 1 } expect() { local t="${1}" if accept "${t}"; then return 0 else synexp "${t}" return 1 fi } error() { local fmt="${1}" shift 1 case "${fname}" in '-') printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2 ;; *) printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2 ;; esac # The parser runs in a subshell, so this just returns up to the caller # like an exception. exit 1 } init_lexer() { local fn="${1}" shift 1 fname="${fn}" lineno=1 tokens='' pgetc next } get_tokens() { printf '%s' "${tokens}" return 0 } synexp() { local t="${1}" shift 1 if [ "x${t}" = 'x' ]; then synerr '%s unexpected' "$(tokname "${tok}")" else synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \ "$(tokname "${t}")" fi } synerr() { local fmt="${1}" shift 1 error "Syntax error: ${fmt}" "${@}" }