DISABLE FDs (REMOVE ME).
[jackhill/mal.git] / awk / reader.awk
CommitLineData
8c7587af
MK
1function reader_read_string(token, v, r)
2{
3 token = substr(token, 1, length(token) - 1)
b5ec219b 4 gsub(/\\\\/, "\xf7", token)
8d78bc26
JM
5 gsub(/\\"/, "\"", token)
6 gsub(/\\n/, "\n", token)
b5ec219b 7 gsub("\xf7", "\\", token)
8d78bc26 8 return token
8c7587af
MK
9}
10
11function reader_read_atom(token)
12{
13 switch (token) {
14 case "true":
15 case "false":
16 case "nil":
17 return "#" token
18 case /^:/:
19 return ":" token
20 case /^"/:
c9b7b019 21 if (token ~ /^\"(\\.|[^\\"])*\"$/) {
4aa0ebdf
JM
22 return reader_read_string(token)
23 } else {
24 return "!\"Expected '\"', got EOF."
25 }
8c7587af
MK
26 case /^-?[0-9]+$/:
27 return "+" token
28 default:
29 return "'" token
30 }
31}
32
33function reader_read_list(reader, type, end, idx, len, ret)
34{
35 idx = types_allocate()
36 len = 0
37 while (reader["curidx"] in reader) {
38 if (reader[reader["curidx"]] == end) {
39 types_heap[idx]["len"] = len
40 reader["curidx"]++
41 return type idx
42 }
43 ret = reader_read_from(reader)
44 if (ret ~ /^!/) {
45 types_heap[idx]["len"] = len
46 types_release(type idx)
47 return ret
48 }
49 types_heap[idx][len++] = ret
50 }
51 types_heap[idx]["len"] = len
52 types_release(type idx)
970935da 53 return "!\"expected '" end "', got EOF"
8c7587af
MK
54}
55
56function reader_read_hash(reader, idx, key, val)
57{
58 idx = types_allocate()
59 while (reader["curidx"] in reader) {
60 if (reader[reader["curidx"]] == "}") {
61 reader["curidx"]++
62 return "{" idx
63 }
64 key = reader_read_from(reader)
65 if (key ~ /^!/) {
66 types_release("{" idx)
67 return key
68 }
69 if (key !~ /^[":]/) {
70 types_release(key)
71 types_release("{" idx)
72 return "!\"Hash-map key must be string or keyword."
73 }
74 if (!(reader["curidx"] in reader)) {
75 types_release("{" idx)
76 return "!\"Element count of hash-map must be even."
77 }
78 val = reader_read_from(reader)
79 if (val ~ /^!/) {
80 types_release("{" idx)
81 return val
82 }
83 types_heap[idx][key] = val
84 }
85 types_release("{" idx)
970935da 86 return "!\"expected '}', got EOF"
8c7587af
MK
87}
88
89function reader_read_abbrev(reader, symbol, val, idx)
90{
91 val = reader_read_from(reader)
92 if (val ~ /^!/) {
93 return val
94 }
95 idx = types_allocate()
96 types_heap[idx]["len"] = 2
97 types_heap[idx][0] = symbol
98 types_heap[idx][1] = val
99 return "(" idx
100}
101
102function reader_read_with_meta(reader, meta, val, idx)
103{
104 meta = reader_read_from(reader)
105 if (meta ~ /^!/) {
106 return meta
107 }
108 val = reader_read_from(reader)
109 if (val ~ /^!/) {
110 types_release(meta)
111 return val
112 }
113 idx = types_allocate()
114 types_heap[idx]["len"] = 3
115 types_heap[idx][0] = "'with-meta"
116 types_heap[idx][1] = val
117 types_heap[idx][2] = meta
118 return "(" idx
119}
120
121function reader_read_from(reader, current)
122{
123 current = reader[reader["curidx"]++]
124 switch (current) {
125 case "(":
126 return reader_read_list(reader, "(", ")")
127 case "[":
128 return reader_read_list(reader, "[", "]")
129 case "{":
130 return reader_read_hash(reader)
131 case ")":
132 case "]":
133 case "}":
134 return "!\"Unexpected token '" current "'."
135 case "'":
136 return reader_read_abbrev(reader, "'quote")
137 case "`":
138 return reader_read_abbrev(reader, "'quasiquote")
139 case "~":
140 return reader_read_abbrev(reader, "'unquote")
141 case "~@":
142 return reader_read_abbrev(reader, "'splice-unquote")
143 case "@":
144 return reader_read_abbrev(reader, "'deref")
145 case "^":
146 return reader_read_with_meta(reader)
147 default:
148 return reader_read_atom(current)
149 }
150}
151
152function reader_tokenizer(str, reader, len, r)
153{
c9b7b019 154 for (len = 0; match(str, /^[ \t\r\n,]*(~@|[\[\]{}()'`~^@]|\"(\\.|[^\\"])*\"?|;[^\r\n]*|[^ \t\r\n\[\]{}('"`,;)^~@][^ \t\r\n\[\]{}('"`,;)]*)/, r); ) {
8c7587af
MK
155 if (substr(r[1], 1, 1) != ";") {
156 reader[len++] = r[1]
157 }
158 str = substr(str, RSTART + RLENGTH)
159 }
160 if (str !~ /^[ \t\r\n,]*$/) {
161 return "!\"Cannot tokenize '" str "'."
162 }
163 reader["len"] = len
164 return ""
165}
166
167function reader_read_str(str, reader, ret)
168{
169 ret = reader_tokenizer(str, reader)
170 if (ret != "") {
171 return ret
172 }
173 if (reader["len"] == 0) {
174 return "#nil"
175 }
176 ret = reader_read_from(reader)
177 if (ret ~ /^!/) {
178 return ret
179 }
180 if (reader["len"] != reader["curidx"]) {
181 types_release(ret)
182 return "!\"Unexpected token '" reader[reader["curidx"]] "'."
183 }
184 return ret
185}