fname= lineno= c= tok= tokens= pgetc() { c="$(dd bs=1 count=1 2>/dev/null; printf '.')" c="${c%.}" } next() { while :; do dbg "parsing char '$c' at lineno $lineno" case "${c}" in '') pgetc tok=T_EOF return ;; "${LF}") pgetc lineno=$((${lineno} + 1)) tok=T_NEWLINE return ;; ' '|"${HT}") pgetc continue ;; \\) pgetc case "${c}" in "${LF}") lineno=$((${lineno} + 1)) pgetc continue ;; esac next_word return ;; '#') pgetc while :; do case "${c}" in "${LF}"|'') break ;; esac pgetc done continue ;; '&') pgetc case "${c}" in '&') pgetc tok=T_AND_IF return ;; esac tok=T_AND return ;; '|') pgetc case "${c}" in '|') pgetc tok=T_OR_IF return ;; esac tok=T_PIPE return ;; ';') pgetc case "${c}" in ';') pgetc tok=T_DSEMI return ;; esac dbg T_SEMI tok=T_SEMI return ;; '(') pgetc tok=T_LPAREN return ;; ')') pgetc tok=T_RPAREN return ;; '<'|'>') next_io return ;; *) next_word return ;; esac pgetc done } next_io() { case "${c}" in '<') pgetc case "${c}" in '<') pgetc case "${c}" in '-') pgetc tok=T_DLESSDASH ;; esac tok=T_DLESS ;; '&') pgetc tok=T_LESSAND ;; '>') pgetc tok=T_LESSGREAT ;; esac tok=T_LESS ;; '>') pgetc case "${c}" in '>') pgetc tok=T_DGREAT ;; '&') pgetc tok=T_GREATAND ;; '|') pgetc tok=T_CLOBBER ;; esac tok=T_GREAT ;; esac } next_word() { local res= local lineno_offset= local word= if ! res="$(scan_word false)"; then exit 1 fi lineno_offset=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" word="${res%%${RS}*}" # We must advance lineno because scan_word() was run in a subshell. lineno=$((${lineno} + ${lineno_offset})) tok="T_WORD${US}${word}" } scan_word() { local in_param="${1}" local res= local word= local quoted= local lines= local lineno_offset= local wordexp= word='' quoted=false lines=0 while :; do dbg "parsing word char '$c' at lineno $lineno" case "${c}" in '') break ;; "${LF}") if ! ${in_param} && ! ${quoted}; then break fi lineno=$((${lineno} + 1)) lines=$((${lines} + 1)) word="${word}${c}" ;; ' '|"${HT}"|'&'|'|'|';'|'('|')'|'<'|'>') if ! ${in_param} && ! ${quoted}; then break fi word="${word}${c}" ;; '$') pgetc if ! res=$(scan_wordexp); then exit 1 fi lineno_offset=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" wordexp="${res%%${RS}*}" # We must advance lineno because scan_wordexp() # was run in a subshell. lineno=$((${lineno} + ${lineno_offset})) word="${word}${wordexp}" # scan_wordexp() leaves behind an unused # character, so we should skip the pgetc() call # below. continue ;; '`') synerr 'Backquoted (old-style) %s' \ 'command substitution not supported' break ;; \') word="${word}${c}" while :; do pgetc word="${word}${c}" case "${c}" in \') break ;; esac done ;; '"') word="${word}${c}" if ${quoted}; then quoted=false else quoted=true fi ;; '}') if ${in_param} && ! ${quoted}; then break fi word="${word}${c}" ;; *) word="${word}${c}" ;; esac pgetc done printf "%d${RS}%s${RS}%s" ${lines} "${c}" "${word}" } scan_wordexp() { local wordexp= local ln_off= local mod= local res= local param= local word= local toks= wordexp='' ln_off=0 case "${c}" in '{') # Parameter expansion brace mod=true pgetc case "${c}" in '#') pgetc case "${c}" in [@*#?$!A-Za-z0-9_-]) # String length # expansion res="$(scan_param)" ln_off=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" param="${res%%${RS}*}" lineno=$((${lineno} + \ ${ln_off})) # Disable modifications. mod=false ;; *) # Special parameter "#" param='#' ;; esac ;; *) if ! res="$(scan_param)"; then exit 1 fi ln_off=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" param="${res%%${RS}*}" lineno=$((${lineno} + ${ln_off})) ;; esac wordexp="\${${param}" if ${mod}; then # Check for modifications mod=false case "${c}" in ':') mod=true wordexp="${wordexp}${c}" pgetc case "${c}" in '-'|'='|'?'|'+') wordexp="${wordexp}${c}" pgetc ;; esac ;; '-'|'='|'?'|'+') mod=true wordexp="${wordexp}${c}" pgetc ;; '%') mod=true wordexp="${wordexp}${c}" pgetc case "${c}" in '%') wordexp="${wordexp}${c}" pgetc ;; esac ;; '#') mod=true wordexp="${wordexp}${c}" pgetc case "${c}" in '#') wordexp="${wordexp}${c}" pgetc ;; esac ;; esac fi if ${mod}; then # Get word. res="$(scan_word true)" ln_off=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" word="${res%%${RS}*}" # We must advance lineno because scan_word() was # run in a subshell. lineno=$((${lineno} + ${ln_off})) wordexp="${wordexp}${word}" dbg "param mod word: '$word'" fi # Check for right brace. case "${c}" in '}') wordexp="${wordexp}${c}" pgetc ;; *) synerr 'Missing "}"' ;; esac ;; '(') # Arithmetic expansion or command substitution pgetc case "${c}" in '(') # Arithmetic expansion synerr 'Arithmetic expansion is %s' \ 'not yet supported' ;; *) # Command substitution if ! res="$(parse_sub "${fname}" \ ${lineno} "${c}" false)" then exit 1 fi ln_off=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" toks="${res%%${RS}*}" lineno=${ln_off} wordexp="\$(${STX}${toks}${ETX}" # Get ")" case "${c}" in ')') wordexp="${wordexp}${c}" pgetc ;; *) synerr 'Missing ")"' ;; esac ;; esac ;; [@*#?$!A-Za-z0-9_-]) res="$(scan_param)" ln_off=${res%%${RS}*} res="${res#*${RS}}" c="${res%%${RS}*}" res="${res#*${RS}}" param="${res%%${RS}*}" lineno=$((${lineno} + ${ln_off})) wordexp="\$${param}" ;; esac printf "%d${RS}%s${RS}%s" ${ln_off} "${c}" "${wordexp}" return 0 } scan_param() { local param= param='' case "${c}" in [@*#?$!0-]) # Special parameter param="${c}" pgetc ;; [1-9]) # Positional parameter param="${param}${c}" pgetc while :; do case "${c}" in [!0-9]) break ;; esac param="${param}${c}" pgetc done ;; [A-Za-z_]) # Parameter name param="${param}${c}" pgetc while :; do case "${c}" in [!A-Za-z0-9_]) break ;; esac param="${param}${c}" pgetc done ;; *) synerr 'Bad parameter name' ;; esac printf "%d${RS}%s${RS}%s" 0 "${c}" "${param}" return 0 } # Check the current token. If it matches, add it to the syntax array. accept() { local t="${1}" local rw= dbg "looking for $t, current tok ${tok%%${US}*}" case "${t}" in T_IF|T_THEN|T_ELSE|T_ELIF|T_FI|\ T_DO|T_DONE|T_CASE|T_ESAC|T_WHILE|T_UNTIL|\ T_FOR|T_LBRACE|T_RBRACE|T_BANG|T_IN) dbg "looking for reserved word $t, have '$tok'" if ! [ "x${tok%%${US}*}" = "x${t}" ]; then # Reserved words are recognized as literal # T_WORDs. if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then return 1 fi # T_WORD data unit must match reserved word # exactly. if ! [ "x${tok#T_WORD${US}}" = \ "x$(tokname "${t}")" ]; then return 1 fi # If the token matches the reserved word, # replace it with the reserved word token. tok="${t}" fi ;; T_NAME) # Names are recognized as literal T_WORDs. if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then return 1 fi # Validate name. case "${tok%%${US}*}" in [A-Za-z_][0-9A-Za-z_]*) ;; *) return 1 ;; esac tok="T_NAME${US}${tok#T_WORD${US}}" ;; T_FNAME) # Function names are recognized as literal T_WORDs. if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then return 1 fi # Validate name. case "${tok%%${US}*}" in [A-Za-z_][0-9A-Za-z_]*) ;; *) return 1 ;; esac # Verify that the function name doesn't match any # reserved words. for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \ T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \ T_LBRACE T_RBRACE T_BANG T_IN; do if [ "x${tok#T_WORD${US}}" = \ "x$(tokname "${rw}")" ]; then tok="${rw}" return 1 fi done tok="T_FNAME${US}${tok#T_WORD${US}}" ;; T_CMDNAME) # The first word of a simple command is to be checked # for reserved words. if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then return 1 fi # Verify that the word doesn't match any reserved words. for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \ T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \ T_LBRACE T_RBRACE T_BANG T_IN; do if [ "x${tok#T_WORD${US}}" = \ "x$(tokname "${rw}")" ]; then tok="${rw}" return 1 fi done ;; *) if ! [ "x${tok%%${US}*}" = "x${t}" ]; then return 1 fi ;; esac dbg "accept $t" tokens="${tokens}${tok}${RS}" next return 0 } expect() { local t="${1}" if accept "${t}"; then return 0 else synexp "${t}" return 1 fi } error() { local fmt="${1}" shift 1 case "${fname}" in '-') printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2 ;; *) printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2 ;; esac # The parser runs in a subshell, so this just returns up to the caller # like an exception. exit 1 } init_lexer() { local fn="${1}" local ln="${2}" local char="${3}" shift 3 fname="${fn}" lineno=${ln} tokens='' case "${char}" in '') pgetc ;; *) c="${char}" ;; esac next } get_lineno() { printf '%d' ${lineno} return 0 } get_lexer_char() { printf '%c' "${c}" return 0 } get_tokens() { printf '%s' "${tokens}" return 0 } synexp() { local t="${1}" shift 1 if [ "x${t}" = 'x' ]; then synerr '%s unexpected' "$(tokname "${tok}")" else synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \ "$(tokname "${t}")" fi } synerr() { local fmt="${1}" shift 1 error "Syntax error: ${fmt}" "${@}" }