awk: fix ignored escaped 'u' warning.
[jackhill/mal.git] / awk / reader.awk
1 function reader_read_string(token, v, r)
2 {
3 token = substr(token, 1, length(token) - 1)
4 gsub(/\\\\/, "\xf7", token)
5 gsub(/\\"/, "\"", token)
6 gsub(/\\n/, "\n", token)
7 gsub("\xf7", "\\", token)
8 return token
9 }
10
11 function reader_read_atom(token)
12 {
13 switch (token) {
14 case "true":
15 case "false":
16 case "nil":
17 return "#" token
18 case /^:/:
19 return ":" token
20 case /^"/:
21 return reader_read_string(token)
22 case /^-?[0-9]+$/:
23 return "+" token
24 default:
25 return "'" token
26 }
27 }
28
29 function reader_read_list(reader, type, end, idx, len, ret)
30 {
31 idx = types_allocate()
32 len = 0
33 while (reader["curidx"] in reader) {
34 if (reader[reader["curidx"]] == end) {
35 types_heap[idx]["len"] = len
36 reader["curidx"]++
37 return type idx
38 }
39 ret = reader_read_from(reader)
40 if (ret ~ /^!/) {
41 types_heap[idx]["len"] = len
42 types_release(type idx)
43 return ret
44 }
45 types_heap[idx][len++] = ret
46 }
47 types_heap[idx]["len"] = len
48 types_release(type idx)
49 return "!\"expect " end ", got EOF"
50 }
51
52 function reader_read_hash(reader, idx, key, val)
53 {
54 idx = types_allocate()
55 while (reader["curidx"] in reader) {
56 if (reader[reader["curidx"]] == "}") {
57 reader["curidx"]++
58 return "{" idx
59 }
60 key = reader_read_from(reader)
61 if (key ~ /^!/) {
62 types_release("{" idx)
63 return key
64 }
65 if (key !~ /^[":]/) {
66 types_release(key)
67 types_release("{" idx)
68 return "!\"Hash-map key must be string or keyword."
69 }
70 if (!(reader["curidx"] in reader)) {
71 types_release("{" idx)
72 return "!\"Element count of hash-map must be even."
73 }
74 val = reader_read_from(reader)
75 if (val ~ /^!/) {
76 types_release("{" idx)
77 return val
78 }
79 types_heap[idx][key] = val
80 }
81 types_release("{" idx)
82 return "!\"expect }, got EOF"
83 }
84
85 function reader_read_abbrev(reader, symbol, val, idx)
86 {
87 val = reader_read_from(reader)
88 if (val ~ /^!/) {
89 return val
90 }
91 idx = types_allocate()
92 types_heap[idx]["len"] = 2
93 types_heap[idx][0] = symbol
94 types_heap[idx][1] = val
95 return "(" idx
96 }
97
98 function reader_read_with_meta(reader, meta, val, idx)
99 {
100 meta = reader_read_from(reader)
101 if (meta ~ /^!/) {
102 return meta
103 }
104 val = reader_read_from(reader)
105 if (val ~ /^!/) {
106 types_release(meta)
107 return val
108 }
109 idx = types_allocate()
110 types_heap[idx]["len"] = 3
111 types_heap[idx][0] = "'with-meta"
112 types_heap[idx][1] = val
113 types_heap[idx][2] = meta
114 return "(" idx
115 }
116
117 function reader_read_from(reader, current)
118 {
119 current = reader[reader["curidx"]++]
120 switch (current) {
121 case "(":
122 return reader_read_list(reader, "(", ")")
123 case "[":
124 return reader_read_list(reader, "[", "]")
125 case "{":
126 return reader_read_hash(reader)
127 case ")":
128 case "]":
129 case "}":
130 return "!\"Unexpected token '" current "'."
131 case "'":
132 return reader_read_abbrev(reader, "'quote")
133 case "`":
134 return reader_read_abbrev(reader, "'quasiquote")
135 case "~":
136 return reader_read_abbrev(reader, "'unquote")
137 case "~@":
138 return reader_read_abbrev(reader, "'splice-unquote")
139 case "@":
140 return reader_read_abbrev(reader, "'deref")
141 case "^":
142 return reader_read_with_meta(reader)
143 default:
144 return reader_read_atom(current)
145 }
146 }
147
148 function reader_tokenizer(str, reader, len, r)
149 {
150 for (len = 0; match(str, /^[ \t\r\n,]*(~@|[\[\]{}()'`~^@]|\"(\\[^\r\n]|[^\\"\r\n])*\"|;[^\r\n]*|[^ \t\r\n\[\]{}('"`,;)^~@][^ \t\r\n\[\]{}('"`,;)]*)/, r); ) {
151 if (substr(r[1], 1, 1) != ";") {
152 reader[len++] = r[1]
153 }
154 str = substr(str, RSTART + RLENGTH)
155 }
156 if (str !~ /^[ \t\r\n,]*$/) {
157 return "!\"Cannot tokenize '" str "'."
158 }
159 reader["len"] = len
160 return ""
161 }
162
163 function reader_read_str(str, reader, ret)
164 {
165 ret = reader_tokenizer(str, reader)
166 if (ret != "") {
167 return ret
168 }
169 if (reader["len"] == 0) {
170 return "#nil"
171 }
172 ret = reader_read_from(reader)
173 if (ret ~ /^!/) {
174 return ret
175 }
176 if (reader["len"] != reader["curidx"]) {
177 types_release(ret)
178 return "!\"Unexpected token '" reader[reader["curidx"]] "'."
179 }
180 return ret
181 }