Remove old demo parsing code

author: P. J. McDermott <pj@pehjota.net> 2016-02-21 21:13:33 (EST)
committer: P. J. McDermott <pj@pehjota.net> 2016-02-21 21:13:33 (EST)
commit: 8a9a6865954ade85d4a55f955829ae08941c31b8 (patch)
tree: 966d1c1a8bc4c6f7ed0671b8c53f1272be77dc4f /parsing/lexer.sh
parent: 5649a9aa1ce56c0cfdcab088983f2d3a4cb32a4c (diff)
download: eggshell-8a9a6865954ade85d4a55f955829ae08941c31b8.zip
eggshell-8a9a6865954ade85d4a55f955829ae08941c31b8.tar.gz
eggshell-8a9a6865954ade85d4a55f955829ae08941c31b8.tar.bz2
1 files changed, 0 insertions, 958 deletions
diff --git a/parsing/lexer.sh b/parsing/lexer.sh
deleted file mode 100644
index 886e7f8..0000000
--- a/parsing/lexer.sh
+++ /dev/null
@@ -1,958 +0,0 @@
-fname=
-lineno=
-ln_off=
-start=
-c=
-wordexp=
-here_queue=
-here_awaiting_end=
-here_awaiting_word=
-tok=
-tokens=
-
-#
-# Error handling (used by scanning and interface functions)
-#
-
-error()
-{
-	local fmt="${1}"
-	shift 1
-
-	case "${fname}" in
-		'-')
-			printf "stdin:%d: ${fmt}\n" ${lineno} "${@}" >&2
-			;;
-		*)
-			printf "%s:%d: ${fmt}\n" "${fname}" ${lineno} "${@}" >&2
-			;;
-	esac
-
-	# The parser and lexer run in a subshell, so this just returns up to the
-	# caller like an exception.
-	exit 1
-}
-
-synexp()
-{
-	local t="${1}"
-	shift 1
-
-	if [ "x${t}" = 'x' ]; then
-		synerr '%s unexpected' "$(tokname "${tok}")"
-	else
-		synerr '%s unexpected (expecting %s)' "$(tokname "${tok}")" \
-			"$(tokname "${t}")"
-	fi
-}
-
-synerr()
-{
-	local fmt="${1}"
-	shift 1
-
-	error "Syntax error: ${fmt}" "${@}"
-}
-
-#
-# Input reading
-#
-
-lgetc()
-{
-	c="$(dd bs=1 count=1 2>/dev/null; printf '.')"
-	c="${c%.}"
-}
-
-#
-# Token recognition
-#
-
-next()
-{
-	if ${here_awaiting_word}; then
-		next_here
-		return
-	fi
-	while :; do
-		dbg "parsing char '$c' at lineno $lineno"
-		case "${c}" in
-			'')
-				lgetc
-				tok=T_EOF
-				return
-				;;
-			"${LF}")
-				if ${here_awaiting_end}; then
-					synexp ''
-				else
-					case "${here_queue}" in *"${RS}"*)
-						here_awaiting_end=false
-						here_awaiting_word=true
-						;;
-					esac
-				fi
-				lgetc
-				lineno=$((${lineno} + 1))
-				tok=T_NEWLINE
-				return
-				;;
-			' '|"${HT}")
-				lgetc
-				continue
-				;;
-			\\)
-				lgetc
-				case "${c}" in "${LF}")
-					lineno=$((${lineno} + 1))
-					lgetc
-					continue
-					;;
-				esac
-				next_word \\
-				return
-				;;
-			'#')
-				lgetc
-				while :; do
-					case "${c}" in "${LF}"|'')
-						break
-						;;
-					esac
-					lgetc
-				done
-				continue
-				;;
-			'&')
-				lgetc
-				case "${c}" in '&')
-					lgetc
-					tok=T_AND_IF
-					return
-					;;
-				esac
-				tok=T_AND
-				return
-				;;
-			'|')
-				lgetc
-				case "${c}" in '|')
-					lgetc
-					tok=T_OR_IF
-					return
-					;;
-				esac
-				tok=T_PIPE
-				return
-				;;
-			';')
-				lgetc
-				case "${c}" in ';')
-					lgetc
-					tok=T_DSEMI
-					return
-					;;
-				esac
-				dbg T_SEMI
-				tok=T_SEMI
-				return
-				;;
-			'(')
-				lgetc
-				tok=T_LPAREN
-				return
-				;;
-			')')
-				lgetc
-				tok=T_RPAREN
-				return
-				;;
-			'<'|'>')
-				next_io
-				return
-				;;
-			*)
-				next_word ''
-				return
-				;;
-		esac
-		lgetc
-	done
-}
-
-next_here()
-{
-	local here=
-	local here_strip_tabs=
-	local here_end=
-	local here_escaped=
-	local line=
-	local word=
-	local res=
-	local wordexp=
-
-	# Dequeue the here-document.
-	here="${here_queue%%${RS}*}"
-	here_strip_tabs="${here%%${US}*}"
-	here_end="${here%${US}*}"
-	here_end="$(printf '%s' "${here_end#*${US}}" | \
-		sed 's/\\//g; s/"//g; s/'\''//g;')"  # Stupid Vim: ')"
-	here_escaped="${here##*${US}}"
-	here_queue="${here_queue#*${RS}}"
-	here_awaiting_word=false
-
-	line=''
-	word=''
-	while :; do
-		case "${c}" in
-			'')
-				# Bash throws a warning when EOF occurs in a
-				# here document.  mksh throws an error.  dash,
-				# BusyBox ash, ksh93, and zsh accept EOF as a
-				# delimiter.  We aim for the lowest common
-				# denominator, so throw an error like mksh does.
-				synerr 'Here-document "%s" unclosed' \
-					"${here_end}"
-				;;
-			"${LF}")
-				word="${word}${line}"
-				case "${line}" in "${here_end}")
-					tok="T_WORD${US}${word}"
-					return
-					;;
-				esac
-				word="${word}${c}"
-				line=''
-				;;
-			"${HT}")
-				if ${here_strip_tabs}; then
-					case "${line}" in
-						'')
-							;;
-						*)
-							line="${line}${c}"
-							;;
-					esac
-				else
-					line="${line}${c}"
-				fi
-				;;
-			'$')
-				if ! ${here_escaped}; then
-					lgetc
-					if ! res="$(scan_wordexp)"; then
-						exit 1
-					fi
-					ln_off=${res%%${RS}*}
-					res="${res#*${RS}}"
-					c="${res%%${RS}*}"
-					res="${res#*${RS}}"
-					wordexp="${res%%${RS}*}"
-					lineno=$((${lineno} + ${ln_off}))
-					line="${line}${wordexp}"
-					continue
-				else
-					line="${line}${c}"
-				fi
-				;;
-			*)
-				line="${line}${c}"
-				;;
-		esac
-		lgetc
-	done
-}
-
-next_io()
-{
-	case "${c}" in
-		'<')
-			lgetc
-			case "${c}" in
-				'<')
-					lgetc
-					case "${c}" in '-')
-						lgetc
-						tok=T_DLESSDASH
-						here_queue="${here_queue}true"
-						here_awaiting_end=true
-						here_awaiting_word=false
-						break
-						;;
-					esac
-					tok=T_DLESS
-					here_queue="${here_queue}false"
-					here_awaiting_end=true
-					here_awaiting_word=false
-					break
-					;;
-				'&')
-					lgetc
-					tok=T_LESSAND
-					break
-					;;
-				'>')
-					lgetc
-					tok=T_LESSGREAT
-					break
-					;;
-			esac
-			tok=T_LESS
-			break
-			;;
-		'>')
-			lgetc
-			case "${c}" in
-				'>')
-					lgetc
-					tok=T_DGREAT
-					break
-					;;
-				'&')
-					lgetc
-					tok=T_GREATAND
-					break
-					;;
-				'|')
-					lgetc
-					tok=T_CLOBBER
-					break
-					;;
-			esac
-			tok=T_GREAT
-			break
-			;;
-	esac
-}
-
-next_word()
-{
-	local prev_c="${1}"
-	shift 1
-	local res=
-	local word=
-
-	if ! res="$(scan_word false)"; then
-		exit 1
-	fi
-	ln_off=${res%%${RS}*}
-	res="${res#*${RS}}"
-	c="${res%%${RS}*}"
-	res="${res#*${RS}}"
-	word="${prev_c}${res%%${RS}*}"
-
-	# We must advance lineno because scan_word() was run in a subshell.
-	lineno=$((${lineno} + ${ln_off}))
-	tok="T_WORD${US}${word}"
-
-	if ${here_awaiting_end}; then
-		here_queue="${here_queue}${US}${word}"
-		case "${word}" in
-			*\\*|*'"'*|*"'"*)
-				here_queue="${here_queue}${US}true"
-				;;
-			*)
-				here_queue="${here_queue}${US}false"
-				;;
-		esac
-		here_queue="${here_queue}${RS}"
-		here_awaiting_end=false
-	fi
-}
-
-#
-# Token scanning
-#
-
-scan_word()
-{
-	local in_param="${1}"
-	local res=
-	local word=
-	local quoted=
-	local lines=
-	local wordexp=
-
-	word=''
-	quoted=false
-	lines=0
-	while :; do
-		dbg "parsing word char '$c' at lineno $lineno"
-		case "${c}" in
-			'')
-				break
-				;;
-			"${LF}")
-				if ! ${in_param} && ! ${quoted}; then
-					break
-				fi
-				lineno=$((${lineno} + 1))
-				lines=$((${lines} + 1))
-				word="${word}${c}"
-				;;
-			' '|"${HT}"|'&'|'|'|';'|'('|')'|'<'|'>')
-				if ! ${in_param} && ! ${quoted}; then
-					break
-				fi
-				word="${word}${c}"
-				;;
-			'$')
-				case "${here_queue}" in *"${RS}"*)
-					if ${here_awaiting_end}; then
-						synerr '%s %s %s %s' \
-							'Word expansions' \
-							'not supported in' \
-							'here-document' \
-							'delimiters'
-					fi
-				esac
-				lgetc
-				if ! res=$(scan_wordexp); then
-					exit 1
-				fi
-				ln_off=${res%%${RS}*}
-				res="${res#*${RS}}"
-				c="${res%%${RS}*}"
-				res="${res#*${RS}}"
-				wordexp="${res%%${RS}*}"
-				# We must advance lineno because scan_wordexp()
-				# was run in a subshell.
-				lineno=$((${lineno} + ${ln_off}))
-				word="${word}${wordexp}"
-				# scan_wordexp() leaves behind an unused
-				# character, so we should skip the lgetc() call
-				# below.
-				continue
-				;;
-			'`')
-				synerr 'Backquoted (old-style) %s' \
-					'command substitution not supported'
-				break
-				;;
-			\\)
-				word="${word}${c}"
-				lgetc
-				case "${c}" in '')
-					# Bash, ksh93, mksh, and zsh ignore a
-					# backslash at the end of a file, but
-					# dash and BusyBox ash include it in the
-					# word.  To help with script
-					# portability, we'll throw an error
-					# (which is a reasonable thing to do
-					# anyway).
-					synerr 'Unexpected end of file %s' \
-						'after "\"'
-					;;
-				esac
-				word="${word}${c}"
-				;;
-			\')
-				word="${word}${c}"
-				while :; do
-					lgetc
-					word="${word}${c}"
-					case "${c}" in
-						'')
-							synerr '%s %s' \
-								'Unterminated' \
-								'quoted string'
-							;;
-						\')
-							break
-							;;
-					esac
-				done
-				;;
-			'"')
-				word="${word}${c}"
-				if ${quoted}; then
-					quoted=false
-				else
-					quoted=true
-				fi
-				;;
-			'}')
-				if ${in_param} && ! ${quoted}; then
-					break
-				fi
-				word="${word}${c}"
-				;;
-			*)
-				word="${word}${c}"
-				;;
-		esac
-		lgetc
-	done
-
-	if ${quoted}; then
-		synerr 'Unterminated quoted string'
-	fi
-
-	printf "%d${RS}%c${RS}%s" ${lines} "${c}" "${word}"
-}
-
-scan_wordexp()
-{
-	local res=
-	local toks=
-	local param=
-
-	wordexp=''
-	ln_off=0
-	case "${c}" in
-		'{')
-			# Parameter expansion brace
-			scan_wordexp_param_brace
-			;;
-		'(')
-			# Arithmetic expansion or command substitution
-			lgetc
-			case "${c}" in
-				'(')
-					# Arithmetic expansion
-					scan_wordexp_arith
-					;;
-				*)
-					# Command substitution
-					if ! res="$(run_sublexer "sub${fname}" \
-							${lineno} "${start}" \
-							"${c}")"; then
-						exit 1
-					fi
-					ln_off=${res%%${RS}*}
-					res="${res#*${RS}}"
-					c="${res%%${RS}*}"
-					res="${res#*${RS}}"
-					toks="${res%%${RS}*}"
-					lineno=${ln_off}
-					wordexp="\$(${SOH}C${STX}${toks}"
-					wordexp="${wordexp}${ETX})"
-					# ")" is recognized in run_sublexer().
-					;;
-			esac
-			;;
-		[@*#?$!A-Za-z0-9_-])
-			if ! res="$(scan_param)"; then
-				exit 1
-			fi
-			ln_off=${res%%${RS}*}
-			res="${res#*${RS}}"
-			c="${res%%${RS}*}"
-			res="${res#*${RS}}"
-			param="${res%%${RS}*}"
-			lineno=$((${lineno} + ${ln_off}))
-			wordexp="\$${param}"
-			;;
-	esac
-
-	printf "%d${RS}%c${RS}%s" ${ln_off} "${c}" "${wordexp}"
-	return 0
-}
-
-scan_wordexp_param_brace()
-{
-	local mod=
-	local res=
-	local param=
-	local word=
-
-	mod=true
-
-	lgetc
-	case "${c}" in
-		'#')
-			lgetc
-			case "${c}" in
-				[@*#?$!A-Za-z0-9_-])
-					# String length expansion
-					if ! res="$(scan_param)"; then
-						exit 1
-					fi
-					ln_off=${res%%${RS}*}
-					res="${res#*${RS}}"
-					c="${res%%${RS}*}"
-					res="${res#*${RS}}"
-					param="${res%%${RS}*}"
-					lineno=$((${lineno} + ${ln_off}))
-					# Disable modifications.
-					mod=false
-					;;
-				*)
-					# Special parameter "#"
-					param='#'
-					;;
-			esac
-			;;
-		*)
-			if ! res="$(scan_param)"; then
-				exit 1
-			fi
-			ln_off=${res%%${RS}*}
-			res="${res#*${RS}}"
-			c="${res%%${RS}*}"
-			res="${res#*${RS}}"
-			param="${res%%${RS}*}"
-			lineno=$((${lineno} + ${ln_off}))
-			;;
-	esac
-	wordexp="\${${param}"
-
-	# If modifications are allowed
-	if ${mod}; then
-		# Check for modifications.
-		mod=false
-		case "${c}" in
-			':')
-				mod=true
-				wordexp="${wordexp}${c}"
-				lgetc
-				case "${c}" in '-'|'='|'?'|'+')
-					wordexp="${wordexp}${c}"
-					lgetc
-				;;
-				esac
-				;;
-			'-'|'='|'?'|'+')
-				mod=true
-				wordexp="${wordexp}${c}"
-				lgetc
-				;;
-			'%')
-				mod=true
-				wordexp="${wordexp}${c}"
-				lgetc
-				case "${c}" in '%')
-					wordexp="${wordexp}${c}"
-					lgetc
-					;;
-				esac
-				;;
-			'#')
-				mod=true
-				wordexp="${wordexp}${c}"
-				lgetc
-				case "${c}" in '#')
-					wordexp="${wordexp}${c}"
-					lgetc
-					;;
-				esac
-				;;
-		esac
-	fi
-
-	# If a modification was found
-	if ${mod}; then
-		# Get word.
-		if ! res="$(scan_word true)"; then
-			exit 1
-		fi
-		ln_off=${res%%${RS}*}
-		res="${res#*${RS}}"
-		c="${res%%${RS}*}"
-		res="${res#*${RS}}"
-		word="${res%%${RS}*}"
-		# We must advance lineno because scan_word() was run in a
-		# subshell.
-		lineno=$((${lineno} + ${ln_off}))
-		wordexp="${wordexp}${word}"
-		dbg "param mod word: '$word'"
-	fi
-
-	# Check for right brace.
-	case "${c}" in
-		'}')
-			wordexp="${wordexp}${c}"
-			lgetc
-			;;
-		*)
-			synerr 'Missing "}"'
-			;;
-	esac
-
-	return 0
-}
-
-scan_param()
-{
-	local param=
-
-	param=''
-	case "${c}" in
-		[@*#?$!0-])
-			# Special parameter
-			param="${c}"
-			lgetc
-			;;
-		[1-9])
-			# Positional parameter
-			param="${param}${c}"
-			lgetc
-			while :; do
-				case "${c}" in [!0-9])
-					break
-					;;
-				esac
-				param="${param}${c}"
-				lgetc
-			done
-			;;
-		[A-Za-z_])
-			# Parameter name
-			param="${param}${c}"
-			lgetc
-			while :; do
-				case "${c}" in [!A-Za-z0-9_])
-					break
-					;;
-				esac
-				param="${param}${c}"
-				lgetc
-			done
-			;;
-		*)
-			synerr 'Bad parameter name'
-			;;
-	esac
-
-	printf "%d${RS}%c${RS}%s" 0 "${c}" "${param}"
-	return 0
-}
-
-scan_wordexp_arith()
-{
-	local arith=
-	local paren_lvl=
-	local res=
-	local sub_wordexp=
-
-	arith=''
-	paren_lvl=0
-	while :; do
-		lgetc
-		case "${c}" in
-			'')
-				synerr 'end of file unexpected (%s)' \
-					'expecting "))"'
-				;;
-			'(')
-				arith="${arith}${c}"
-				paren_lvl=$((${paren_lvl} + 1))
-				;;
-			')')
-				if [ ${paren_lvl} -eq 0 ]; then
-					lgetc
-					case "${c}" in ')')
-						wordexp="\$((${arith}))"
-						lgetc
-						return 0
-						;;
-					esac
-					synerr 'Arithmetic expansion: ")" %s' \
-						'unexpected'
-				fi
-				arith="${arith}${c}"
-				paren_lvl=$((${paren_lvl} - 1))
-				;;
-			'$')
-				lgetc
-				if ! res=$(scan_wordexp); then
-					exit 1
-				fi
-				ln_off=${res%%${RS}*}
-				res="${res#*${RS}}"
-				c="${res%%${RS}*}"
-				res="${res#*${RS}}"
-				sub_wordexp="${res%%${RS}*}"
-				# We must advance lineno because scan_wordexp()
-				# was run in a subshell.
-				lineno=$((${lineno} + ${ln_off}))
-				arith="${arith}${sub_wordexp}"
-				;;
-			*)
-				arith="${arith}${c}"
-				;;
-		esac
-	done
-}
-
-#
-# Interface
-#
-
-# Check the current token.  If it matches, add it to the syntax array.
-accept()
-{
-	local t="${1}"
-	local rw=
-
-	dbg "looking for $t, current tok ${tok%%${US}*}"
-	case "${t}" in
-		T_IF|T_THEN|T_ELSE|T_ELIF|T_FI|\
-		T_DO|T_DONE|T_CASE|T_ESAC|T_WHILE|T_UNTIL|\
-		T_FOR|T_LBRACE|T_RBRACE|T_BANG|T_IN)
-			dbg "looking for reserved word $t, have '$tok'"
-			if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
-				# Reserved words are recognized as literal
-				# T_WORDs.
-				if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
-					return 1
-				fi
-				# T_WORD data unit must match reserved word
-				# exactly.
-				if ! [ "x${tok#T_WORD${US}}" = \
-						"x$(toktext "${t}")" ]; then
-					return 1
-				fi
-				# If the token matches the reserved word,
-				# replace it with the reserved word token.
-				tok="${t}"
-			fi
-			;;
-		T_NAME)
-			# Names are recognized as literal T_WORDs.
-			if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
-				return 1
-			fi
-			# Validate name.
-			case "${tok%%${US}*}" in
-				[A-Za-z_][0-9A-Za-z_]*)
-					;;
-				*)
-					return 1
-					;;
-			esac
-			tok="T_NAME${US}${tok#T_WORD${US}}"
-			;;
-		T_FNAME)
-			# Function names are recognized as literal T_WORDs.
-			if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
-				return 1
-			fi
-			# Validate name.
-			case "${tok%%${US}*}" in
-				[A-Za-z_][0-9A-Za-z_]*)
-					;;
-				*)
-					return 1
-					;;
-			esac
-			# Verify that the function name doesn't match any
-			# reserved words.
-			for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \
-					T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \
-					T_LBRACE T_RBRACE T_BANG T_IN; do
-				if [ "x${tok#T_WORD${US}}" = \
-						"x$(toktext "${rw}")" ]; then
-					tok="${rw}"
-					return 1
-				fi
-			done
-			tok="T_FNAME${US}${tok#T_WORD${US}}"
-			;;
-		T_CMDNAME)
-			# The first word of a simple command is to be checked
-			# for reserved words.
-			if ! [ "x${tok%%${US}*}" = 'xT_WORD' ]; then
-				return 1
-			fi
-			# Verify that the word doesn't match any reserved words.
-			for rw in T_IF T_THEN T_ELSE T_ELIF T_FI T_DO T_DONE \
-					T_CASE T_ESAC T_WHILE T_UNTIL T_FOR \
-					T_LBRACE T_RBRACE T_BANG T_IN; do
-				if [ "x${tok#T_WORD${US}}" = \
-						"x$(toktext "${rw}")" ]; then
-					tok="${rw}"
-					return 1
-				fi
-			done
-			;;
-		*)
-			if ! [ "x${tok%%${US}*}" = "x${t}" ]; then
-				return 1
-			fi
-			;;
-	esac
-
-	dbg "accept $t"
-	tokens="${tokens}${tok}${RS}"
-	next
-	return 0
-}
-
-expect()
-{
-	local t="${1}"
-
-	if accept "${t}"; then
-		return 0
-	else
-		synexp "${t}"
-	fi
-}
-
-# Called by the lexer, not the parser
-run_sublexer()
-{
-	local fn="${1}"
-	local ln="${2}"
-	local st="${3}"
-	local ch="${4}"
-	shift 4
-
-	# Initialize global variables.
-	fname="${fn}"
-	lineno=${ln}
-	start="${st}"
-	here_queue=''
-	here_awaiting_end=false
-	here_awaiting_word=false
-	tokens=''
-
-	c="${ch}"
-	next
-
-	#dbg=true
-	# If this returns (does not exit), there are no errors.
-	${start}
-	case "${tok%${US}*}" in
-		T_RPAREN)
-			;;
-		*)
-			synerr 'Missing ")"'
-			;;
-	esac
-
-	printf "%d${RS}%c${RS}%s" ${lineno} "${c}" "${tokens}"
-	return 0
-}
-
-run_lexer()
-{
-	local fn="${1}"
-	local st="${2}"
-	shift 2
-
-	# Initialize global variables.
-	fname="${fn}"
-	lineno=1
-	start="${st}"
-	here_queue=''
-	here_awaiting_end=false
-	here_awaiting_word=false
-	tokens=''
-
-	# Read the first character and recognize the first token.
-	lgetc
-	next
-
-	# If this returns (does not exit), there are no errors.
-	${start}
-	if ! accept T_EOF; then
-		synexp ''
-	fi
-
-	# Return the tokens.
-	printf '%s' "${tokens}"
-
-	return 0
-}
author	P. J. McDermott <pj@pehjota.net>	2016-02-21 21:13:33 (EST)
committer	P. J. McDermott <pj@pehjota.net>	2016-02-21 21:13:33 (EST)
commit	8a9a6865954ade85d4a55f955829ae08941c31b8 (patch)
tree	966d1c1a8bc4c6f7ed0671b8c53f1272be77dc4f /parsing/lexer.sh
parent	5649a9aa1ce56c0cfdcab088983f2d3a4cb32a4c (diff)
download	eggshell-8a9a6865954ade85d4a55f955829ae08941c31b8.zip eggshell-8a9a6865954ade85d4a55f955829ae08941c31b8.tar.gz eggshell-8a9a6865954ade85d4a55f955829ae08941c31b8.tar.bz2