From 48996f2bfc32321dbd6c22d8fcdd0bb385f611cc Mon Sep 17 00:00:00 2001 From: P. J. McDermott Date: Fri, 19 Feb 2016 22:56:37 -0500 Subject: Improve reserved word recognition And fix term() to allow do_group() to find T_DONE. Also, ERMAHGERD DEBERG. --- diff --git a/parsing/lexer.sh b/parsing/lexer.sh index c939d1c..01bc721 100644 --- a/parsing/lexer.sh +++ b/parsing/lexer.sh @@ -82,6 +82,7 @@ next() return ;; esac + dbg T_SEMI tok=T_SEMI return ;; @@ -457,23 +458,28 @@ accept() local t="${1}" local rw= + dbg "looking for $t, current tok ${tok%%${US}*}" case "${t}" in T_IF|T_THEN|T_ELSE|T_ELIF|T_FI|\ T_DO|T_DONE|T_CASE|T_ESAC|T_WHILE|T_UNTIL|\ T_FOR|T_LBRACE|T_RBRACE|T_BANG|T_IN) dbg "looking for reserved word $t, have '$tok'" - # Reserved words are recognized as literal T_WORDs. - if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then - return 1 - fi - # T_WORD data unit must match reserved word exactly. - if ! [ "x${tok#T_WORD${US}}" = "x$(tokname "${t}")" ] - then - return 1 + if ! [ "x${tok%%${US}*}" = "x${t}" ]; then + # Reserved words are recognized as literal + # T_WORDs. + if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then + return 1 + fi + # T_WORD data unit must match reserved word + # exactly. + if ! [ "x${tok#T_WORD${US}}" = \ + "x$(tokname "${t}")" ]; then + return 1 + fi + # If the token matches the reserved word, + # replace it with the reserved word token. + tok="${t}" fi - # If the token matches the reserved word, replace it - # with the reserved word token. - tok="${t}" ;; T_NAME) # Names are recognized as literal T_WORDs. @@ -516,6 +522,21 @@ accept() done tok="T_FNAME${US}${tok#T_WORD${US}}" ;; + T_WORD) + if ! [ "x${tok%%${US}*}" = "x${t}" ]; then + return 1 + fi + # Verify that the word doesn't match any reserved words. + for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \ + T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \ + T_IN; do + if [ "x${tok#T_WORD${US}}" = \ + "x$(tokname "${rw}")" ]; then + tok="${rw}" + return 1 + fi + done + ;; *) if ! [ "x${tok%%${US}*}" = "x${t}" ]; then return 1 diff --git a/parsing/parse.sh b/parsing/parse.sh index b39d84e..ff6b5ad 100644 --- a/parsing/parse.sh +++ b/parsing/parse.sh @@ -26,6 +26,7 @@ complete_command() list() { + dbg 'list()' if and_or; then while separator_op; do if ! and_or; then @@ -38,6 +39,7 @@ list() } and_or() { + dbg 'and_or()' if pipeline; then while accept T_AND_IF || accept T_OR_IF; do if ! linebreak || ! pipeline; then @@ -50,6 +52,7 @@ and_or() } pipeline() { + dbg 'pipeline()' accept T_BANG if pipe_sequence; then return 0 @@ -59,6 +62,7 @@ pipeline() pipe_sequence() { + dbg 'pipe_sequence()' if command; then while accept T_PIPE; do if ! linebreak || ! command; then @@ -72,6 +76,7 @@ pipe_sequence() command() { + dbg 'command()' if compound_command; then redirect_list return 0 @@ -85,6 +90,7 @@ command() compound_command() { + dbg 'compound_command()' if brace_group; then return 0 elif subshell; then @@ -105,6 +111,7 @@ compound_command() subshell() { + dbg 'subshell()' if accept T_LPAREN && compound_list && expect T_RPAREN; then return 0 fi @@ -113,8 +120,10 @@ subshell() compound_list() { + dbg 'compound_list()' newline_list if term; then + dbg FOUND TERM separator return 0 fi @@ -123,11 +132,10 @@ compound_list() term() { + dbg 'term()' if and_or; then while separator; do - if ! and_or; then - return 1 - fi + and_or done return 0 fi @@ -155,6 +163,7 @@ for_clause() wordlist() { + dbg 'wordlist()' if accept T_WORD; then while accept T_WORD; do :; done return 0 @@ -236,6 +245,7 @@ function_body() brace_group() { + dbg 'brace_group()' if accept T_LBRACE && compound_list && expect T_RBRACE; then return 0 fi @@ -244,6 +254,7 @@ brace_group() do_group() { + dbg 'do_group()' if accept T_DO && compound_list && expect T_DONE; then return 0 fi @@ -252,6 +263,7 @@ do_group() simple_command() { + dbg 'simple_command()' if cmd_prefix; then if cmd_word; then cmd_suffix @@ -266,6 +278,7 @@ simple_command() cmd_name() { + dbg 'cmd_name()' # TODO: Assignment if accept T_WORD; then return 0 @@ -275,6 +288,7 @@ cmd_name() cmd_word() { + dbg 'cmd_word()' # TODO: Assignment if accept T_WORD; then return 0 @@ -367,6 +381,7 @@ separator() sequential_sep() { + dbg 'sequential_sep()' if accept T_SEMI; then if linebreak; then return 0 -- cgit v0.9.1