Adds GNU awk implementaion
[jackhill/mal.git] / gawk / reader.awk
1 function reader_read_string(token, v, r)
2 {
3 token = substr(token, 1, length(token) - 1)
4 while (match(token, /\\["n\\]?/, r)) {
5 switch (r[0]) {
6 case "\\":
7 return "!\"Invalid escape character '" substr(token, RSTART, 2) "'."
8 case "\\n":
9 v = v substr(token, 1, RSTART - 1) "\n"
10 break
11 default:
12 v = v substr(token, 1, RSTART - 1) substr(r[0], 2, 1)
13 break
14 }
15 token = substr(token, RSTART + RLENGTH)
16 }
17 return v token
18 }
19
20 function reader_read_atom(token)
21 {
22 switch (token) {
23 case "true":
24 case "false":
25 case "nil":
26 return "#" token
27 case /^:/:
28 return ":" token
29 case /^"/:
30 return reader_read_string(token)
31 case /^-?[0-9]+$/:
32 return "+" token
33 default:
34 return "'" token
35 }
36 }
37
38 function reader_read_list(reader, type, end, idx, len, ret)
39 {
40 idx = types_allocate()
41 len = 0
42 while (reader["curidx"] in reader) {
43 if (reader[reader["curidx"]] == end) {
44 types_heap[idx]["len"] = len
45 reader["curidx"]++
46 return type idx
47 }
48 ret = reader_read_from(reader)
49 if (ret ~ /^!/) {
50 types_heap[idx]["len"] = len
51 types_release(type idx)
52 return ret
53 }
54 types_heap[idx][len++] = ret
55 }
56 types_heap[idx]["len"] = len
57 types_release(type idx)
58 return "!\"expect " end ", got EOF"
59 }
60
61 function reader_read_hash(reader, idx, key, val)
62 {
63 idx = types_allocate()
64 while (reader["curidx"] in reader) {
65 if (reader[reader["curidx"]] == "}") {
66 reader["curidx"]++
67 return "{" idx
68 }
69 key = reader_read_from(reader)
70 if (key ~ /^!/) {
71 types_release("{" idx)
72 return key
73 }
74 if (key !~ /^[":]/) {
75 types_release(key)
76 types_release("{" idx)
77 return "!\"Hash-map key must be string or keyword."
78 }
79 if (!(reader["curidx"] in reader)) {
80 types_release("{" idx)
81 return "!\"Element count of hash-map must be even."
82 }
83 val = reader_read_from(reader)
84 if (val ~ /^!/) {
85 types_release("{" idx)
86 return val
87 }
88 types_heap[idx][key] = val
89 }
90 types_release("{" idx)
91 return "!\"expect }, got EOF"
92 }
93
94 function reader_read_abbrev(reader, symbol, val, idx)
95 {
96 val = reader_read_from(reader)
97 if (val ~ /^!/) {
98 return val
99 }
100 idx = types_allocate()
101 types_heap[idx]["len"] = 2
102 types_heap[idx][0] = symbol
103 types_heap[idx][1] = val
104 return "(" idx
105 }
106
107 function reader_read_with_meta(reader, meta, val, idx)
108 {
109 meta = reader_read_from(reader)
110 if (meta ~ /^!/) {
111 return meta
112 }
113 val = reader_read_from(reader)
114 if (val ~ /^!/) {
115 types_release(meta)
116 return val
117 }
118 idx = types_allocate()
119 types_heap[idx]["len"] = 3
120 types_heap[idx][0] = "'with-meta"
121 types_heap[idx][1] = val
122 types_heap[idx][2] = meta
123 return "(" idx
124 }
125
126 function reader_read_from(reader, current)
127 {
128 current = reader[reader["curidx"]++]
129 switch (current) {
130 case "(":
131 return reader_read_list(reader, "(", ")")
132 case "[":
133 return reader_read_list(reader, "[", "]")
134 case "{":
135 return reader_read_hash(reader)
136 case ")":
137 case "]":
138 case "}":
139 return "!\"Unexpected token '" current "'."
140 case "'":
141 return reader_read_abbrev(reader, "'quote")
142 case "`":
143 return reader_read_abbrev(reader, "'quasiquote")
144 case "~":
145 return reader_read_abbrev(reader, "'unquote")
146 case "~@":
147 return reader_read_abbrev(reader, "'splice-unquote")
148 case "@":
149 return reader_read_abbrev(reader, "'deref")
150 case "^":
151 return reader_read_with_meta(reader)
152 default:
153 return reader_read_atom(current)
154 }
155 }
156
157 function reader_tokenizer(str, reader, len, r)
158 {
159 for (len = 0; match(str, /^[ \t\r\n,]*(~@|[\[\]{}()'`~^@]|\"(\\[^\r\n]|[^\\"\r\n])*\"|;[^\r\n]*|[^ \t\r\n\[\]{}('"`,;)^~@][^ \t\r\n\[\]{}('"`,;)]*)/, r); ) {
160 if (substr(r[1], 1, 1) != ";") {
161 reader[len++] = r[1]
162 }
163 str = substr(str, RSTART + RLENGTH)
164 }
165 if (str !~ /^[ \t\r\n,]*$/) {
166 return "!\"Cannot tokenize '" str "'."
167 }
168 reader["len"] = len
169 return ""
170 }
171
172 function reader_read_str(str, reader, ret)
173 {
174 ret = reader_tokenizer(str, reader)
175 if (ret != "") {
176 return ret
177 }
178 if (reader["len"] == 0) {
179 return "#nil"
180 }
181 ret = reader_read_from(reader)
182 if (ret ~ /^!/) {
183 return ret
184 }
185 if (reader["len"] != reader["curidx"]) {
186 types_release(ret)
187 return "!\"Unexpected token '" reader[reader["curidx"]] "'."
188 }
189 return ret
190 }