Copy everything into a new eshtrans/ directory

Split out and rename functions and variables where appropriate. Also add license headers. (The old scripts under parsing/ can be used under the same license.)
author: P. J. McDermott <pj@pehjota.net> 2016-02-21 04:39:39 (EST)
committer: P. J. McDermott <pj@pehjota.net> 2016-02-21 04:39:39 (EST)
commit: c9f95bf852092d8b1640b92f1c31e84420bb51dd (patch)
tree: b47ff3e2a91fa39f234df6ddfd9559b6d4714de2 /eshtrans/frontend
parent: 4e6bfd6fe0d48ddf49cd61bb8cb31881a1e5e369 (diff)
download: eggshell-c9f95bf852092d8b1640b92f1c31e84420bb51dd.zip
eggshell-c9f95bf852092d8b1640b92f1c31e84420bb51dd.tar.gz
eggshell-c9f95bf852092d8b1640b92f1c31e84420bb51dd.tar.bz2
3 files changed, 1611 insertions, 0 deletions
diff --git a/eshtrans/frontend/lexer.esh b/eshtrans/frontend/lexer.esh
new file mode 100644
index 0000000..0991239
--- /dev/null
+++ b/eshtrans/frontend/lexer.esh
@@ -0,0 +1,990 @@
+# Eggshell lexer
+#
+# Copyright (C) 2016  Patrick "P. J." McDermott
+#
+# This file is part of the Eggshell Compiler.
+#
+# The Eggshell Compiler is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# The Eggshell Compiler is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the Eggshell Compiler.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+dbg=false
+
+fname=
+lineno=
+ln_off=
+start=
+c=
+wordexp=
+here_queue=
+here_awaiting_end=
+here_awaiting_word=
+tok=
+tokens=
+
+dbg()
+{
+	if ${dbg}; then
+		printf 'DEBUG: %s\n' "${@}" >&2
+	fi
+}
+
+#
+# Error handling (used by scanning and interface functions)
+#
+
+error()
+{
+	local fmt="${1}"
+	shift 1
+
+	case "${fname}" in
+		'-')
+			printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2
+			;;
+		*)
+			printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2
+			;;
+	esac
+
+	# The parser and lexer run in a subshell, so this just returns up to the
+	# caller like an exception.
+	exit 1
+}
+
+synexp()
+{
+	local t="${1}"
+	shift 1
+
+	if [ "x${t}" = 'x' ]; then
+		synerr '%s unexpected' "$(tokname "${tok}")"
+	else
+		synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \
+			"$(tokname "${t}")"
+	fi
+}
+
+synerr()
+{
+	local fmt="${1}"
+	shift 1
+
+	error "Syntax error: ${fmt}" "${@}"
+}
+
+#
+# Input reading
+#
+
+lgetc()
+{
+	c="$(dd bs=1 count=1 2>/dev/null; printf '.')"
+	c="${c%.}"
+}
+
+#
+# Token recognition
+#
+
+next()
+{
+	if ${here_awaiting_word}; then
+		next_here
+		return
+	fi
+	while :; do
+		dbg "parsing char '$c' at lineno $lineno"
+		case "${c}" in
+			'')
+				lgetc
+				tok=T_EOF
+				return
+				;;
+			"${LF}")
+				if ${here_awaiting_end}; then
+					synexp ''
+				else
+					case "${here_queue}" in *"${RS}"*)
+						here_awaiting_end=false
+						here_awaiting_word=true
+						;;
+					esac
+				fi
+				lgetc
+				lineno=$((${lineno} + 1))
+				tok=T_NEWLINE
+				return
+				;;
+			' '|"${HT}")
+				lgetc
+				continue
+				;;
+			\\)
+				lgetc
+				case "${c}" in "${LF}")
+					lineno=$((${lineno} + 1))
+					lgetc
+					continue
+					;;
+				esac
+				next_word \\
+				return
+				;;
+			'#')
+				lgetc
+				while :; do
+					case "${c}" in "${LF}"|'')
+						break
+						;;
+					esac
+					lgetc
+				done
+				continue
+				;;
+			'&')
+				lgetc
+				case "${c}" in '&')
+					lgetc
+					tok=T_AND_IF
+					return
+					;;
+				esac
+				tok=T_AND
+				return
+				;;
+			'|')
+				lgetc
+				case "${c}" in '|')
+					lgetc
+					tok=T_OR_IF
+					return
+					;;
+				esac
+				tok=T_PIPE
+				return
+				;;
+			';')
+				lgetc
+				case "${c}" in ';')
+					lgetc
+					tok=T_DSEMI
+					return
+					;;
+				esac
+				dbg T_SEMI
+				tok=T_SEMI
+				return
+				;;
+			'(')
+				lgetc
+				tok=T_LPAREN
+				return
+				;;
+			')')
+				lgetc
+				tok=T_RPAREN
+				return
+				;;
+			'<'|'>')
+				next_io
+				return
+				;;
+			*)
+				next_word ''
+				return
+				;;
+		esac
+		lgetc
+	done
+}
+
+next_here()
+{
+	local here=
+	local here_strip_tabs=
+	local here_end=
+	local here_escaped=
+	local line=
+	local word=
+	local res=
+	local wordexp=
+
+	# Dequeue the here-document.
+	here="${here_queue%%${RS}*}"
+	here_strip_tabs="${here%%${US}*}"
+	here_end="${here%${US}*}"
+	here_end="$(printf '%s' "${here_end#*${US}}" | \
+		sed 's/\\//g; s/"//g; s/'\''//g;')"  # Stupid Vim: ')"
+	here_escaped="${here##*${US}}"
+	here_queue="${here_queue#*${RS}}"
+	here_awaiting_word=false
+
+	line=''
+	word=''
+	while :; do
+		case "${c}" in
+			'')
+				# Bash throws a warning when EOF occurs in a
+				# here document.  mksh throws an error.  dash,
+				# BusyBox ash, ksh93, and zsh accept EOF as a
+				# delimiter.  We aim for the lowest common
+				# denominator, so throw an error like mksh does.
+				synerr 'Here-document "%s" unclosed' \
+					"${here_end}"
+				;;
+			"${LF}")
+				word="${word}${line}"
+				case "${line}" in "${here_end}")
+					tok="T_WORD${US}${word}"
+					return
+					;;
+				esac
+				word="${word}${c}"
+				line=''
+				;;
+			"${HT}")
+				if ${here_strip_tabs}; then
+					case "${line}" in
+						'')
+							;;
+						*)
+							line="${line}${c}"
+							;;
+					esac
+				else
+					line="${line}${c}"
+				fi
+				;;
+			'$')
+				if ! ${here_escaped}; then
+					lgetc
+					if ! res="$(scan_wordexp)"; then
+						exit 1
+					fi
+					ln_off=${res%%${RS}*}
+					res="${res#*${RS}}"
+					c="${res%%${RS}*}"
+					res="${res#*${RS}}"
+					wordexp="${res%%${RS}*}"
+					lineno=$((${lineno} + ${ln_off}))
+					line="${line}${wordexp}"
+					continue
+				else
+					line="${line}${c}"
+				fi
+				;;
+			*)
+				line="${line}${c}"
+				;;
+		esac
+		lgetc
+	done
+}
+
+next_io()
+{
+	case "${c}" in
+		'<')
+			lgetc
+			case "${c}" in
+				'<')
+					lgetc
+					case "${c}" in '-')
+						lgetc
+						tok=T_DLESSDASH
+						here_queue="${here_queue}true"
+						here_awaiting_end=true
+						here_awaiting_word=false
+						break
+						;;
+					esac
+					tok=T_DLESS
+					here_queue="${here_queue}false"
+					here_awaiting_end=true
+					here_awaiting_word=false
+					break
+					;;
+				'&')
+					lgetc
+					tok=T_LESSAND
+					break
+					;;
+				'>')
+					lgetc
+					tok=T_LESSGREAT
+					break
+					;;
+			esac
+			tok=T_LESS
+			break
+			;;
+		'>')
+			lgetc
+			case "${c}" in
+				'>')
+					lgetc
+					tok=T_DGREAT
+					break
+					;;
+				'&')
+					lgetc
+					tok=T_GREATAND
+					break
+					;;
+				'|')
+					lgetc
+					tok=T_CLOBBER
+					break
+					;;
+			esac
+			tok=T_GREAT
+			break
+			;;
+	esac
+}
+
+next_word()
+{
+	local prev_c="${1}"
+	shift 1
+	local res=
+	local word=
+
+	if ! res="$(scan_word false)"; then
+		exit 1
+	fi
+	ln_off=${res%%${RS}*}
+	res="${res#*${RS}}"
+	c="${res%%${RS}*}"
+	res="${res#*${RS}}"
+	word="${prev_c}${res%%${RS}*}"
+
+	# We must advance lineno because scan_word() was run in a subshell.
+	lineno=$((${lineno} + ${ln_off}))
+	tok="T_WORD${US}${word}"
+
+	if ${here_awaiting_end}; then
+		here_queue="${here_queue}${US}${word}"
+		case "${word}" in
+			*\\*|*'"'*|*"'"*)
+				here_queue="${here_queue}${US}true"
+				;;
+			*)
+				here_queue="${here_queue}${US}false"
+				;;
+		esac
+		here_queue="${here_queue}${RS}"
+		here_awaiting_end=false
+	fi
+}
+
+#
+# Token scanning
+#
+
+scan_word()
+{
+	local in_param="${1}"
+	shift 1
+	local res=
+	local word=
+	local quoted=
+	local lines=
+	local wordexp=
+
+	word=''
+	quoted=false
+	lines=0
+	while :; do
+		dbg "parsing word char '$c' at lineno $lineno"
+		case "${c}" in
+			'')
+				break
+				;;
+			"${LF}")
+				if ! ${in_param} && ! ${quoted}; then
+					break
+				fi
+				lineno=$((${lineno} + 1))
+				lines=$((${lines} + 1))
+				word="${word}${c}"
+				;;
+			' '|"${HT}"|'&'|'|'|';'|'('|')'|'<'|'>')
+				if ! ${in_param} && ! ${quoted}; then
+					break
+				fi
+				word="${word}${c}"
+				;;
+			'$')
+				case "${here_queue}" in *"${RS}"*)
+					if ${here_awaiting_end}; then
+						synerr '%s %s %s %s' \
+							'Word expansions' \
+							'not supported in' \
+							'here-document' \
+							'delimiters'
+					fi
+				esac
+				lgetc
+				if ! res=$(scan_wordexp); then
+					exit 1
+				fi
+				ln_off=${res%%${RS}*}
+				res="${res#*${RS}}"
+				c="${res%%${RS}*}"
+				res="${res#*${RS}}"
+				wordexp="${res%%${RS}*}"
+				# We must advance lineno because scan_wordexp()
+				# was run in a subshell.
+				lineno=$((${lineno} + ${ln_off}))
+				word="${word}${wordexp}"
+				# scan_wordexp() leaves behind an unused
+				# character, so we should skip the lgetc() call
+				# below.
+				continue
+				;;
+			'`')
+				synerr 'Backquoted (old-style) %s' \
+					'command substitution not supported'
+				break
+				;;
+			\\)
+				word="${word}${c}"
+				lgetc
+				case "${c}" in '')
+					# Bash, ksh93, mksh, and zsh ignore a
+					# backslash at the end of a file, but
+					# dash and BusyBox ash include it in the
+					# word.  To help with script
+					# portability, we'll throw an error
+					# (which is a reasonable thing to do
+					# anyway).
+					synerr 'Unexpected end of file %s' \
+						'after "\"'
+					;;
+				esac
+				word="${word}${c}"
+				;;
+			\')
+				word="${word}${c}"
+				while :; do
+					lgetc
+					word="${word}${c}"
+					case "${c}" in
+						'')
+							synerr '%s %s' \
+								'Unterminated' \
+								'quoted string'
+							;;
+						\')
+							break
+							;;
+					esac
+				done
+				;;
+			'"')
+				word="${word}${c}"
+				if ${quoted}; then
+					quoted=false
+				else
+					quoted=true
+				fi
+				;;
+			'}')
+				if ${in_param} && ! ${quoted}; then
+					break
+				fi
+				word="${word}${c}"
+				;;
+			*)
+				word="${word}${c}"
+				;;
+		esac
+		lgetc
+	done
+
+	if ${quoted}; then
+		synerr 'Unterminated quoted string'
+	fi
+
+	printf "%d${RS}%c${RS}%s" ${lines} "${c}" "${word}"
+}
+
+scan_wordexp()
+{
+	local res=
+	local toks=
+	local param=
+
+	wordexp=''
+	ln_off=0
+	case "${c}" in
+		'{')
+			# Parameter expansion brace
+			scan_wordexp_param_brace
+			;;
+		'(')
+			# Arithmetic expansion or command substitution
+			lgetc
+			case "${c}" in
+				'(')
+					# Arithmetic expansion
+					scan_wordexp_arith
+					;;
+				*)
+					# Command substitution
+					if ! res="$(run_sublexer "sub${fname}" \
+							${lineno} "${start}" \
+							"${c}")"; then
+						exit 1
+					fi
+					ln_off=${res%%${RS}*}
+					res="${res#*${RS}}"
+					c="${res%%${RS}*}"
+					res="${res#*${RS}}"
+					toks="${res%%${RS}*}"
+					lineno=${ln_off}
+					wordexp="\$(${SOH}C${STX}${toks}"
+					wordexp="${wordexp}${ETX})"
+					# ")" is recognized in run_sublexer().
+					;;
+			esac
+			;;
+		[@*#?$!A-Za-z0-9_-])
+			if ! res="$(scan_param)"; then
+				exit 1
+			fi
+			ln_off=${res%%${RS}*}
+			res="${res#*${RS}}"
+			c="${res%%${RS}*}"
+			res="${res#*${RS}}"
+			param="${res%%${RS}*}"
+			lineno=$((${lineno} + ${ln_off}))
+			wordexp="\$${param}"
+			;;
+	esac
+
+	printf "%d${RS}%c${RS}%s" ${ln_off} "${c}" "${wordexp}"
+	return 0
+}
+
+scan_wordexp_param_brace()
+{
+	local mod=
+	local res=
+	local param=
+	local word=
+
+	mod=true
+
+	lgetc
+	case "${c}" in
+		'#')
+			lgetc
+			case "${c}" in
+				[@*#?$!A-Za-z0-9_-])
+					# String length expansion
+					if ! res="$(scan_param)"; then
+						exit 1
+					fi
+					ln_off=${res%%${RS}*}
+					res="${res#*${RS}}"
+					c="${res%%${RS}*}"
+					res="${res#*${RS}}"
+					param="${res%%${RS}*}"
+					lineno=$((${lineno} + ${ln_off}))
+					# Disable modifications.
+					mod=false
+					;;
+				*)
+					# Special parameter "#"
+					param='#'
+					;;
+			esac
+			;;
+		*)
+			if ! res="$(scan_param)"; then
+				exit 1
+			fi
+			ln_off=${res%%${RS}*}
+			res="${res#*${RS}}"
+			c="${res%%${RS}*}"
+			res="${res#*${RS}}"
+			param="${res%%${RS}*}"
+			lineno=$((${lineno} + ${ln_off}))
+			;;
+	esac
+	wordexp="\${${param}"
+
+	# If modifications are allowed
+	if ${mod}; then
+		# Check for modifications.
+		mod=false
+		case "${c}" in
+			':')
+				mod=true
+				wordexp="${wordexp}${c}"
+				lgetc
+				case "${c}" in '-'|'='|'?'|'+')
+					wordexp="${wordexp}${c}"
+					lgetc
+				;;
+				esac
+				;;
+			'-'|'='|'?'|'+')
+				mod=true
+				wordexp="${wordexp}${c}"
+				lgetc
+				;;
+			'%')
+				mod=true
+				wordexp="${wordexp}${c}"
+				lgetc
+				case "${c}" in '%')
+					wordexp="${wordexp}${c}"
+					lgetc
+					;;
+				esac
+				;;
+			'#')
+				mod=true
+				wordexp="${wordexp}${c}"
+				lgetc
+				case "${c}" in '#')
+					wordexp="${wordexp}${c}"
+					lgetc
+					;;
+				esac
+				;;
+		esac
+	fi
+
+	# If a modification was found
+	if ${mod}; then
+		# Get word.
+		if ! res="$(scan_word true)"; then
+			exit 1
+		fi
+		ln_off=${res%%${RS}*}
+		res="${res#*${RS}}"
+		c="${res%%${RS}*}"
+		res="${res#*${RS}}"
+		word="${res%%${RS}*}"
+		# We must advance lineno because scan_word() was run in a
+		# subshell.
+		lineno=$((${lineno} + ${ln_off}))
+		wordexp="${wordexp}${word}"
+		dbg "param mod word: '$word'"
+	fi
+
+	# Check for right brace.
+	case "${c}" in
+		'}')
+			wordexp="${wordexp}${c}"
+			lgetc
+			;;
+		*)
+			synerr 'Missing "}"'
+			;;
+	esac
+
+	return 0
+}
+
+scan_param()
+{
+	local param=
+
+	param=''
+	case "${c}" in
+		[@*#?$!0-])
+			# Special parameter
+			param="${c}"
+			lgetc
+			;;
+		[1-9])
+			# Positional parameter
+			param="${param}${c}"
+			lgetc
+			while :; do
+				case "${c}" in [!0-9])
+					break
+					;;
+				esac
+				param="${param}${c}"
+				lgetc
+			done
+			;;
+		[A-Za-z_])
+			# Parameter name
+			param="${param}${c}"
+			lgetc
+			while :; do
+				case "${c}" in [!A-Za-z0-9_])
+					break
+					;;
+				esac
+				param="${param}${c}"
+				lgetc
+			done
+			;;
+		*)
+			synerr 'Bad parameter name'
+			;;
+	esac
+
+	printf "%d${RS}%c${RS}%s" 0 "${c}" "${param}"
+	return 0
+}
+
+scan_wordexp_arith()
+{
+	local arith=
+	local paren_lvl=
+	local res=
+	local sub_wordexp=
+
+	arith=''
+	paren_lvl=0
+	while :; do
+		lgetc
+		case "${c}" in
+			'')
+				synerr 'end of file unexpected (%s)' \
+					'expecting "))"'
+				;;
+			'(')
+				arith="${arith}${c}"
+				paren_lvl=$((${paren_lvl} + 1))
+				;;
+			')')
+				if [ ${paren_lvl} -eq 0 ]; then
+					lgetc
+					case "${c}" in ')')
+						wordexp="\$((${arith}))"
+						lgetc
+						return 0
+						;;
+					esac
+					synerr 'Arithmetic expansion: ")" %s' \
+						'unexpected'
+				fi
+				arith="${arith}${c}"
+				paren_lvl=$((${paren_lvl} - 1))
+				;;
+			'$')
+				lgetc
+				if ! res=$(scan_wordexp); then
+					exit 1
+				fi
+				ln_off=${res%%${RS}*}
+				res="${res#*${RS}}"
+				c="${res%%${RS}*}"
+				res="${res#*${RS}}"
+				sub_wordexp="${res%%${RS}*}"
+				# We must advance lineno because scan_wordexp()
+				# was run in a subshell.
+				lineno=$((${lineno} + ${ln_off}))
+				arith="${arith}${sub_wordexp}"
+				;;
+			*)
+				arith="${arith}${c}"
+				;;
+		esac
+	done
+}
+
+run_sublexer()
+{
+	local fn="${1}"
+	local ln="${2}"
+	local st="${3}"
+	local ch="${4}"
+	shift 4
+
+	# Initialize global variables.
+	fname="${fn}"
+	lineno=${ln}
+	start="${st}"
+	here_queue=''
+	here_awaiting_end=false
+	here_awaiting_word=false
+	tokens=''
+
+	c="${ch}"
+	next
+
+	#dbg=true
+	# If this returns (does not exit), there are no errors.
+	${start}
+	case "${tok%${US}*}" in
+		T_RPAREN)
+			;;
+		*)
+			synerr 'Missing ")"'
+			;;
+	esac
+
+	printf "%d${RS}%c${RS}%s" ${lineno} "${c}" "${tokens}"
+	return 0
+}
+
+#
+# Interface
+#
+
+run_lexer()
+{
+	local fn="${1}"
+	local st="${2}"
+	shift 2
+
+	# Initialize global variables.
+	fname="${fn}"
+	lineno=1
+	start="${st}"
+	here_queue=''
+	here_awaiting_end=false
+	here_awaiting_word=false
+	tokens=''
+
+	# Read the first character and recognize the first token.
+	lgetc
+	next
+
+	if ! ${start}; then
+		# Unexpected EOF
+		synexp ''
+	fi
+	if ! accept T_EOF; then
+		synexp ''
+	fi
+
+	# Return the tokens.
+	printf '%s' "${tokens}"
+
+	return 0
+}
+
+accept()
+{
+	local t="${1}"
+	shift 1
+	local rw=
+
+	dbg "looking for $t, current tok ${tok%%${US}*}"
+	case "${t}" in
+		T_IF|T_THEN|T_ELSE|T_ELIF|T_FI|\
+		T_DO|T_DONE|T_CASE|T_ESAC|T_WHILE|T_UNTIL|\
+		T_FOR|T_LBRACE|T_RBRACE|T_BANG|T_IN)
+			dbg "looking for reserved word $t, have '$tok'"
+			if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
+				# Reserved words are recognized as literal
+				# T_WORDs.
+				if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
+					return 1
+				fi
+				# T_WORD data unit must match reserved word
+				# exactly.
+				if ! [ "x${tok#T_WORD${US}}" = \
+						"x$(toktext "${t}")" ]; then
+					return 1
+				fi
+				# If the token matches the reserved word,
+				# replace it with the reserved word token.
+				tok="${t}"
+			fi
+			;;
+		T_NAME)
+			# Names are recognized as literal T_WORDs.
+			if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
+				return 1
+			fi
+			# Validate name.
+			case "${tok%%${US}*}" in
+				[A-Za-z_][0-9A-Za-z_]*)
+					;;
+				*)
+					return 1
+					;;
+			esac
+			tok="T_NAME${US}${tok#T_WORD${US}}"
+			;;
+		T_FNAME)
+			# Function names are recognized as literal T_WORDs.
+			if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
+				return 1
+			fi
+			# Validate name.
+			case "${tok%%${US}*}" in
+				[A-Za-z_][0-9A-Za-z_]*)
+					;;
+				*)
+					return 1
+					;;
+			esac
+			# Verify that the function name doesn't match any
+			# reserved words.
+			for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \
+					T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \
+					T_LBRACE T_RBRACE T_BANG T_IN; do
+				if [ "x${tok#T_WORD${US}}" = \
+						"x$(toktext "${rw}")" ]; then
+					tok="${rw}"
+					return 1
+				fi
+			done
+			tok="T_FNAME${US}${tok#T_WORD${US}}"
+			;;
+		T_CMDNAME)
+			# The first word of a simple command is to be checked
+			# for reserved words.
+			if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
+				return 1
+			fi
+			# Verify that the word doesn't match any reserved words.
+			for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \
+					T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \
+					T_LBRACE T_RBRACE T_BANG T_IN; do
+				if [ "x${tok#T_WORD${US}}" = \
+						"x$(toktext "${rw}")" ]; then
+					tok="${rw}"
+					return 1
+				fi
+			done
+			;;
+		*)
+			if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
+				return 1
+			fi
+			;;
+	esac
+
+	dbg "accept $t"
+	tokens="${tokens}${tok}${RS}"
+	next
+	return 0
+}
+
+expect()
+{
+	local t="${1}"
+	shift 1
+
+	if accept "${t}"; then
+		return 0
+	else
+		synexp "${t}"
+	fi
+}
diff --git a/eshtrans/frontend/main.esh b/eshtrans/frontend/main.esh
new file mode 100644
index 0000000..b9f93a6
--- /dev/null
+++ b/eshtrans/frontend/main.esh
@@ -0,0 +1,30 @@
+# Eggshell frontend interface
+#
+# Copyright (C) 2016  Patrick "P. J." McDermott
+#
+# This file is part of the Eggshell Compiler.
+#
+# The Eggshell Compiler is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# The Eggshell Compiler is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the Eggshell Compiler.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+esh_parse()
+{
+	local fn="${1}"
+	shift 1
+
+	if run_lexer "${fn}" complete_command; then
+		return 0
+	fi
+	return 1
+}
diff --git a/eshtrans/frontend/parser.esh b/eshtrans/frontend/parser.esh
new file mode 100644
index 0000000..d49fa77
--- /dev/null
+++ b/eshtrans/frontend/parser.esh
@@ -0,0 +1,591 @@
+# Eggshell parser
+#
+# Copyright (C) 2016  Patrick "P. J." McDermott
+#
+# This file is part of the Eggshell Compiler.
+#
+# The Eggshell Compiler is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# The Eggshell Compiler is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the Eggshell Compiler.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+ptrace=false
+
+#
+# Function tracing
+#
+
+ptrace_begn()
+{
+	local fn="${1}"
+	shift 1
+
+	if ${ptrace}; then
+		printf 'TRACE: BEGN %s()\n' "${fn}" >&2
+	fi
+}
+
+ptrace_pass()
+{
+	local fn="${1}"
+	shift 1
+
+	if ${ptrace}; then
+		printf 'TRACE: PASS %s()\n' "${fn}" >&2
+	fi
+}
+
+ptrace_fail()
+{
+	local fn="${1}"
+	shift 1
+
+	if ${ptrace}; then
+		printf 'TRACE: FAIL %s()\n' "${fn}" >&2
+	fi
+}
+
+#
+# Parser
+#
+
+complete_command()
+{
+	if list; then
+		separator
+		return 0
+	fi
+	return 1
+}
+
+list()
+{
+	ptrace_begn list
+	if and_or; then
+		while separator && and_or; do
+			:
+		done
+		ptrace_pass list
+		return 0
+	fi
+	ptrace_fail list
+	return 1
+}
+
+and_or()
+{
+	ptrace_begn and_or
+	if pipeline; then
+		while accept T_AND_IF || accept T_OR_IF; do
+			if ! linebreak || ! pipeline; then
+				ptrace_fail and_or
+				return 1
+			fi
+		done
+		ptrace_pass and_or
+		return 0
+	fi
+	ptrace_fail and_or
+	return 1
+}
+
+pipeline()
+{
+	ptrace_begn pipeline
+	accept T_BANG
+	if pipe_sequence; then
+		ptrace_pass pipeline
+		return 0
+	fi
+	ptrace_fail pipeline
+	return 1
+}
+
+pipe_sequence()
+{
+	ptrace_begn pipe_sequence
+	if command; then
+		while accept T_PIPE; do
+			if ! linebreak || ! command; then
+				ptrace_fail pipe_sequence
+				return 1
+			fi
+		done
+		ptrace_pass pipe_sequence
+		return 0
+	fi
+	ptrace_fail pipe_sequence
+	return 1
+}
+
+command()
+{
+	ptrace_begn command
+	if simple_command; then
+		ptrace_pass command
+		return 0
+	elif compound_command; then
+		redirect_list
+		ptrace_pass command
+		return 0
+	fi
+	ptrace_fail command
+	return 1
+}
+
+compound_command()
+{
+	ptrace_begn compound_command
+	if brace_group; then
+		ptrace_pass compound_command
+		return 0
+	elif subshell; then
+		ptrace_pass compound_command
+		return 0
+	elif for_clause; then
+		ptrace_pass compound_command
+		return 0
+	elif case_clause; then
+		ptrace_pass compound_command
+		return 0
+	elif if_clause; then
+		ptrace_pass compound_command
+		return 0
+	elif while_clause; then
+		ptrace_pass compound_command
+		return 0
+	elif until_clause; then
+		ptrace_pass compound_command
+		return 0
+	fi
+	ptrace_fail compound_command
+	return 1
+}
+
+subshell()
+{
+	ptrace_begn subshell
+	if accept T_LPAREN && compound_list && expect T_RPAREN; then
+		ptrace_pass subshell
+		return 0
+	fi
+	ptrace_fail subshell
+	return 1
+}
+
+compound_list()
+{
+	ptrace_begn compound_list
+	newline_list
+	if term; then
+		separator
+		ptrace_pass compound_list
+		return 0
+	fi
+	ptrace_fail compound_list
+	return 1
+}
+
+term()
+{
+	ptrace_begn term
+	if and_or; then
+		while separator; do
+			and_or
+		done
+		ptrace_pass term
+		return 0
+	fi
+	ptrace_fail term
+	return 1
+}
+
+for_clause()
+{
+	ptrace_begn for_clause
+	if accept T_FOR; then
+		if expect T_NAME && linebreak; then
+			if accept T_IN; then
+				wordlist
+				if ! sequential_sep; then
+					ptrace_fail for_clause
+					return 1
+				fi
+			fi
+			if do_group; then
+				ptrace_pass for_clause
+				return 0
+			fi
+		fi
+	fi
+	ptrace_fail for_clause
+	return 1
+}
+
+wordlist()
+{
+	ptrace_begn wordlist
+	if accept T_WORD; then
+		while accept T_WORD; do :; done
+		ptrace_pass wordlist
+		return 0
+	fi
+	ptrace_fail wordlist
+	return 1
+}
+
+case_clause()
+{
+	if accept T_CASE; then
+		if expect T_WORD && linebreak && expect T_IN && linebreak; then
+			case_list || case_list_ns
+			expect T_ESAC
+			return 0
+		fi
+	fi
+	return 1
+}
+
+case_list_ns()
+{
+	if case_list && case_item_ns; then
+		return 0
+	elif case_item_ns; then
+		return 0
+	fi
+	return 1
+}
+
+case_list()
+{
+	if case_item; then
+		while case_item; do
+			:
+		done
+		return 0
+	fi
+	return 1
+}
+
+case_item_ns()
+{
+	accept T_LPAREN
+	if pattern && expect RPAREN; then
+		compound_list
+		if linebreak; then
+			return 0
+		fi
+	fi
+	return 1
+}
+
+case_item()
+{
+	accept T_LPAREN
+	if pattern && expect T_RPAREN; then
+		if compound_list || linebreak; then
+			if expect T_DSEMI && linebreak; then
+				return 0
+			fi
+		fi
+	fi
+	return 1
+}
+
+pattern()
+{
+	if accept T_CMDNAME; then
+		while accept T_PIPE; do
+			expect T_WORD
+		done
+		return 0
+	fi
+	return 1
+}
+
+if_clause()
+{
+	if accept T_IF; then
+		if compound_list && expect T_THEN && compound_list; then
+			else_part
+			expect T_FI
+			return 0
+		fi
+	fi
+	return 1
+}
+
+else_part()
+{
+	while accept T_ELIF; do
+		if compound_list && expect T_THEN && compound_list; then
+			continue
+		fi
+		return 1
+	done
+	if accept T_ELSE; then
+		if compound_list; then
+			return 0
+		fi
+	fi
+	return 1
+}
+
+while_clause()
+{
+	if accept T_WHILE; then
+		if compound_list && do_group; then
+			return 0
+		fi
+	fi
+	return 1
+}
+
+until_clause()
+{
+	if accept T_UNTIL; then
+		if compound_list && do_group; then
+			return 0
+		fi
+	fi
+	return 1
+}
+
+function_body()
+{
+	ptrace_begn function_body
+	if compound_command; then
+		redirect_list
+		ptrace_pass function_body
+		return 0
+	fi
+	ptrace_fail function_body
+	return 1
+}
+
+brace_group()
+{
+	ptrace_begn brace_group
+	if accept T_LBRACE && compound_list && expect T_RBRACE; then
+		ptrace_pass brace_group
+		return 0
+	fi
+	ptrace_fail brace_group
+	return 1
+}
+
+do_group()
+{
+	ptrace_begn do_group
+	if accept T_DO && compound_list && expect T_DONE; then
+		ptrace_pass do_group
+		return 0
+	fi
+	ptrace_fail do_group
+	return 1
+}
+
+simple_command()
+{
+	ptrace_begn simple_command
+	if cmd_prefix; then
+		if cmd_word; then
+			cmd_suffix
+		fi
+		ptrace_pass simple_command
+		return 0
+	elif accept T_FNAME; then
+		if accept T_LPAREN; then
+			expect T_RPAREN
+			if linebreak && function_body; then
+				ptrace_pass simple_command
+				return 0
+			fi
+		else
+			cmd_suffix
+			ptrace_pass simple_command
+			return 0
+		fi
+	elif cmd_name; then
+		cmd_suffix
+		ptrace_pass simple_command
+		return 0
+	fi
+	ptrace_fail simple_command
+	return 1
+}
+
+cmd_name()
+{
+	ptrace_begn cmd_name
+	# TODO: Assignment
+	if accept T_CMDNAME; then
+		ptrace_pass cmd_name
+		return 0
+	fi
+	ptrace_fail cmd_name
+	return 1
+}
+
+cmd_word()
+{
+	ptrace_begn cmd_word
+	# TODO: Assignment
+	if accept T_WORD; then
+		ptrace_pass cmd_word
+		return 0
+	fi
+	ptrace_fail cmd_word
+	return 1
+}
+
+cmd_prefix()
+{
+	ptrace_begn cmd_prefix
+	if io_redirect || accept T_ASSIGNMENT_WORD; then
+		while io_redirect || accept T_ASSIGNMENT_WORD; do
+			:
+		done
+		ptrace_pass cmd_prefix
+		return 0
+	fi
+	ptrace_fail cmd_prefix
+	return 1
+}
+
+cmd_suffix()
+{
+	ptrace_begn cmd_suffix
+	if io_redirect || accept T_WORD; then
+		while io_redirect || accept T_WORD; do
+			:
+		done
+		ptrace_pass cmd_suffix
+		return 0
+	fi
+	ptrace_fail cmd_suffix
+	return 1
+}
+
+redirect_list()
+{
+	ptrace_begn redirect_list
+	if io_redirect; then
+		while io_redirect; do
+			:
+		done
+		ptrace_pass redirect_list
+		return 0
+	fi
+	ptrace_fail redirect_list
+	return 1
+}
+
+io_redirect()
+{
+	ptrace_begn io_redirect
+	if io_file || io_here; then
+		ptrace_pass io_redirect
+		return 0
+	fi
+	ptrace_fail io_redirect
+	return 1
+}
+
+io_file()
+{
+	if accept T_LESS || accept T_LESSAND || accept T_GREAT || \
+			accept T_GREATAND || accept T_DGREAT || \
+			accept T_LESSGREAT || accept T_CLOBBER; then
+		if filename; then
+			return 0
+		fi
+	fi
+	return 1
+}
+
+filename()
+{
+	if accept T_WORD; then
+		return 0
+	fi
+	return 1
+}
+
+io_here()
+{
+	if accept T_DLESS || accept T_DLESSDASH; then
+		if here_end; then
+			return 0
+		fi
+	fi
+	return 1
+}
+
+here_end()
+{
+	if accept T_WORD; then
+		return 0
+	fi
+	return 1
+}
+
+newline_list()
+{
+	if accept T_NEWLINE; then
+		while accept T_NEWLINE; do
+			:
+		done
+		return 0
+	fi
+	return 1
+}
+
+linebreak()
+{
+	newline_list
+	return 0
+}
+
+separator_op()
+{
+	if accept T_AND || accept T_SEMI; then
+		return 0
+	fi
+	return 1
+}
+
+separator()
+{
+	if separator_op && linebreak; then
+		return 0
+	elif newline_list; then
+		return 0
+	fi
+	return 1
+}
+
+sequential_sep()
+{
+	ptrace_begn sequential_sep
+	if accept T_SEMI; then
+		if linebreak; then
+			ptrace_pass sequential_sep
+			return 0
+		fi
+	elif newline_list; then
+		ptrace_pass sequential_sep
+		return 0
+	fi
+	ptrace_fail sequential_sep
+	return 1
+}
author	P. J. McDermott <pj@pehjota.net>	2016-02-21 04:39:39 (EST)
committer	P. J. McDermott <pj@pehjota.net>	2016-02-21 04:39:39 (EST)
commit	c9f95bf852092d8b1640b92f1c31e84420bb51dd (patch)
tree	b47ff3e2a91fa39f234df6ddfd9559b6d4714de2 /eshtrans/frontend
parent	4e6bfd6fe0d48ddf49cd61bb8cb31881a1e5e369 (diff)
download	eggshell-c9f95bf852092d8b1640b92f1c31e84420bb51dd.zip eggshell-c9f95bf852092d8b1640b92f1c31e84420bb51dd.tar.gz eggshell-c9f95bf852092d8b1640b92f1c31e84420bb51dd.tar.bz2