summaryrefslogtreecommitdiffstats
path: root/parsing/codegen.sh
blob: 8d9d2fdcd2e320769854af0d3223f56cd92f57b5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
sc=

sgetc()
{
	sc="$(dd bs=1 count=1 2>/dev/null; printf '.')"
	sc="${sc%.}"
}

codegen_sub()
{
	local array="${1}"
	shift 1

	IFS="${RS}"
	for t in ${array}; do
		toktext "${t}"
		case "${t%${US}*}" in
			T_NEWLINE)
				;;
			*)
				printf ' '
				;;
		esac
	done
	unset IFS
}

# The token stack is encoded in a string in the following grammar:
#     Terminal symbols:
#         TOKEN
#     Production rules:
#         stack  = tokens [ '<SOH>' type '<STX>' stack '<ETX>' [ tokens ] ] ;
#         tokens = TOKEN { '<RS>' TOKEN } ;
#         type   = 'C' ;
# We need to recurse through this stack to get to all the tokens.
# Each element in the stack (an array of tokens) gets run through the codegen to
# become text that is inserted into the array below.
parse_stack()
{
	local array=

	array=''
	while :; do
		sgetc
		case "${sc}" in
			'')
				# EOF
				break
				;;
			"${SOH}")
				# New stack element
				sgetc
				case "${sc}" in
					'C')
						# Command substitution
						sgetc  # STX
						array="${array}$(parse_stack)."
						array="${array%.}"
						;;
				esac
				;;
			"${ETX}")
				# End of stack element
				break
				;;
			*)
				# Token character
				array="${array}${sc}"
				;;
		esac
	done
	codegen_sub "${array}"
}

codegen()
{
	local toks="${1}"
	shift 1

	if printf '%s' "${toks}" | parse_stack; then
		return 0
	else
		return 1
	fi
}