From 3b934b8e6c9b43b9fb9599bed3ff4e50af3bc77d Mon Sep 17 00:00:00 2001 From: P. J. McDermott Date: Fri, 19 Feb 2016 21:36:29 -0500 Subject: Add more productions, special tokens, etc. --- diff --git a/parsing/lexer.sh b/parsing/lexer.sh index f92534a..c939d1c 100644 --- a/parsing/lexer.sh +++ b/parsing/lexer.sh @@ -455,13 +455,15 @@ scan_param() accept() { local t="${1}" + local rw= case "${t}" in T_IF|T_THEN|T_ELSE|T_ELIF|T_FI|\ T_DO|T_DONE|T_CASE|T_ESAC|T_WHILE|T_UNTIL|\ T_FOR|T_LBRACE|T_RBRACE|T_BANG|T_IN) + dbg "looking for reserved word $t, have '$tok'" # Reserved words are recognized as literal T_WORDs. - if ! [ "x${tok%%${US}*}" = T_WORD ]; then + if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then return 1 fi # T_WORD data unit must match reserved word exactly. @@ -473,6 +475,47 @@ accept() # with the reserved word token. tok="${t}" ;; + T_NAME) + # Names are recognized as literal T_WORDs. + if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then + return 1 + fi + # Validate name. + case "${tok%%${US}*}" in + [A-Za-z_][0-9A-Za-z_]*) + ;; + *) + return 1 + ;; + esac + tok="T_NAME${US}${tok#T_WORD${US}}" + ;; + T_FNAME) + # Function names are recognized as literal T_WORDs. + if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then + return 1 + fi + # Validate name. + case "${tok%%${US}*}" in + [A-Za-z_][0-9A-Za-z_]*) + ;; + *) + return 1 + ;; + esac + # Verify that the function name doesn't match any + # reserved words. + for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \ + T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \ + T_IN; do + if [ "x${tok#T_WORD${US}}" = \ + "x$(tokname "${rw}")" ]; then + tok="${rw}" + return 1 + fi + done + tok="T_FNAME${US}${tok#T_WORD${US}}" + ;; *) if ! [ "x${tok%%${US}*}" = "x${t}" ]; then return 1 diff --git a/parsing/parse.sh b/parsing/parse.sh index d85a42e..b39d84e 100644 --- a/parsing/parse.sh +++ b/parsing/parse.sh @@ -6,7 +6,7 @@ US="$(printf '\037.')"; US="${US%.}" dbg() { dbg=true - dbg=false + #dbg=false if ${dbg}; then printf 'DEBUG: %s\n' "${@}" >&2 fi @@ -72,11 +72,263 @@ pipe_sequence() command() { - # XXX: Unfinished - accept T_WORD + if compound_command; then + redirect_list + return 0 + elif function_defn; then + return 0 + elif simple_command; then + return 0 + fi + return 1 +} + +compound_command() +{ + if brace_group; then + return 0 + elif subshell; then + return 0 + elif for_clause; then + return 0 + elif case_clause; then + return 0 + elif if_clause; then + return 0 + elif while_clause; then + return 0 + elif until_clause; then + return 0 + fi + return 1 +} + +subshell() +{ + if accept T_LPAREN && compound_list && expect T_RPAREN; then + return 0 + fi + return 1 +} + +compound_list() +{ + newline_list + if term; then + separator + return 0 + fi + return 1 +} + +term() +{ + if and_or; then + while separator; do + if ! and_or; then + return 1 + fi + done + return 0 + fi + return 1 +} + +for_clause() +{ + dbg 'for_clause()' + if accept T_FOR; then + if expect T_NAME && linebreak; then + if accept T_IN; then + wordlist + if ! sequential_sep; then + return 1 + fi + fi + if do_group; then + return 0 + fi + fi + fi + return 1 +} + +wordlist() +{ + if accept T_WORD; then + while accept T_WORD; do :; done + return 0 + fi + return 1 +} + +case_clause() +{ + : TODO: Implement + return 1 +} + +case_list_ns() +{ + : TODO: Implement + return 1 +} + +case_list() +{ + : TODO: Implement + return 1 +} + +case_item_ns() +{ + : TODO: Implement + return 1 +} + +case_item() +{ + : TODO: Implement + return 1 +} + +pattern() +{ + : TODO: Implement + return 1 +} + +if_clause() +{ + : TODO: Implement + return 1 +} + +else_part() +{ + : TODO: Implement + return 1 +} + +while_clause() +{ + : TODO: Implement + return 1 +} + +until_clause() +{ + : TODO: Implement + return 1 +} + +function_defn() +{ + : TODO: Implement + return 1 +} + +function_body() +{ + : TODO: Implement + return 1 +} + +brace_group() +{ + if accept T_LBRACE && compound_list && expect T_RBRACE; then + return 0 + fi + return 1 +} + +do_group() +{ + if accept T_DO && compound_list && expect T_DONE; then + return 0 + fi + return 1 +} + +simple_command() +{ + if cmd_prefix; then + if cmd_word; then + cmd_suffix + fi + return 0 + elif cmd_name; then + cmd_suffix + return 0 + fi + return 1 +} + +cmd_name() +{ + # TODO: Assignment + if accept T_WORD; then + return 0 + fi + return 1 +} + +cmd_word() +{ + # TODO: Assignment + if accept T_WORD; then + return 0 + fi + return 1 +} + +cmd_prefix() +{ + : TODO: Implement + return 1 +} + +cmd_suffix() +{ + : TODO: Implement + return 1 } +redirect_list() +{ + : TODO: Implement + return 1 +} +io_redirect() +{ + : TODO: Implement + return 1 +} + +io_file() +{ + : TODO: Implement + return 1 +} + +filename() +{ + : TODO: Implement + return 1 +} + +io_here() +{ + : TODO: Implement + return 1 +} + +here_end() +{ + : TODO: Implement + return 1 +} newline_list() { @@ -88,6 +340,7 @@ newline_list() fi return 1 } + linebreak() { newline_list @@ -112,6 +365,18 @@ separator() return 1 } +sequential_sep() +{ + if accept T_SEMI; then + if linebreak; then + return 0 + fi + elif newline_list; then + return 0 + fi + return 1 +} + parse() { local fn="${1}" @@ -150,11 +415,15 @@ try() printf '\n\n' } -try '"foo bar" && $baz || qux' '${quux%uux quuux' -try '"foo bar" && $baz || qux' '${quux%uux } quuux' -try 'foo ${bar}' -try 'foo ${#bar}' -try 'foo ${bar#baz}' -try 'foo ${#bar#}' -try 'foo ${^}' -try 'foo `bar`' +#try '"foo bar" && $baz || qux' '${quux%uux quuux' +#try '"foo bar" && $baz || qux' '${quux%uux } quuux' +#try 'foo ${bar}' +#try 'foo ${#bar}' +#try 'foo ${bar#baz}' +#try 'foo ${#bar#}' +#try 'foo ${^}' +#try 'foo `bar`' +#try 'foo &&' +#try '{ foo ; }' +#try '( foo )' +try 'for i in 1 2 3 ; do stuff ; done' -- cgit v0.9.1