Merged master into ada branch + fix Makefile
[jackhill/mal.git] / awk / reader.awk
1 function reader_read_string(token, v, r)
2 {
3 token = substr(token, 1, length(token) - 1)
4 gsub(/\\\\/, "\\", token)
5 gsub(/\\"/, "\"", token)
6 gsub(/\\n/, "\n", token)
7 return token
8 }
9
10 function reader_read_atom(token)
11 {
12 switch (token) {
13 case "true":
14 case "false":
15 case "nil":
16 return "#" token
17 case /^:/:
18 return ":" token
19 case /^"/:
20 return reader_read_string(token)
21 case /^-?[0-9]+$/:
22 return "+" token
23 default:
24 return "'" token
25 }
26 }
27
28 function reader_read_list(reader, type, end, idx, len, ret)
29 {
30 idx = types_allocate()
31 len = 0
32 while (reader["curidx"] in reader) {
33 if (reader[reader["curidx"]] == end) {
34 types_heap[idx]["len"] = len
35 reader["curidx"]++
36 return type idx
37 }
38 ret = reader_read_from(reader)
39 if (ret ~ /^!/) {
40 types_heap[idx]["len"] = len
41 types_release(type idx)
42 return ret
43 }
44 types_heap[idx][len++] = ret
45 }
46 types_heap[idx]["len"] = len
47 types_release(type idx)
48 return "!\"expect " end ", got EOF"
49 }
50
51 function reader_read_hash(reader, idx, key, val)
52 {
53 idx = types_allocate()
54 while (reader["curidx"] in reader) {
55 if (reader[reader["curidx"]] == "}") {
56 reader["curidx"]++
57 return "{" idx
58 }
59 key = reader_read_from(reader)
60 if (key ~ /^!/) {
61 types_release("{" idx)
62 return key
63 }
64 if (key !~ /^[":]/) {
65 types_release(key)
66 types_release("{" idx)
67 return "!\"Hash-map key must be string or keyword."
68 }
69 if (!(reader["curidx"] in reader)) {
70 types_release("{" idx)
71 return "!\"Element count of hash-map must be even."
72 }
73 val = reader_read_from(reader)
74 if (val ~ /^!/) {
75 types_release("{" idx)
76 return val
77 }
78 types_heap[idx][key] = val
79 }
80 types_release("{" idx)
81 return "!\"expect }, got EOF"
82 }
83
84 function reader_read_abbrev(reader, symbol, val, idx)
85 {
86 val = reader_read_from(reader)
87 if (val ~ /^!/) {
88 return val
89 }
90 idx = types_allocate()
91 types_heap[idx]["len"] = 2
92 types_heap[idx][0] = symbol
93 types_heap[idx][1] = val
94 return "(" idx
95 }
96
97 function reader_read_with_meta(reader, meta, val, idx)
98 {
99 meta = reader_read_from(reader)
100 if (meta ~ /^!/) {
101 return meta
102 }
103 val = reader_read_from(reader)
104 if (val ~ /^!/) {
105 types_release(meta)
106 return val
107 }
108 idx = types_allocate()
109 types_heap[idx]["len"] = 3
110 types_heap[idx][0] = "'with-meta"
111 types_heap[idx][1] = val
112 types_heap[idx][2] = meta
113 return "(" idx
114 }
115
116 function reader_read_from(reader, current)
117 {
118 current = reader[reader["curidx"]++]
119 switch (current) {
120 case "(":
121 return reader_read_list(reader, "(", ")")
122 case "[":
123 return reader_read_list(reader, "[", "]")
124 case "{":
125 return reader_read_hash(reader)
126 case ")":
127 case "]":
128 case "}":
129 return "!\"Unexpected token '" current "'."
130 case "'":
131 return reader_read_abbrev(reader, "'quote")
132 case "`":
133 return reader_read_abbrev(reader, "'quasiquote")
134 case "~":
135 return reader_read_abbrev(reader, "'unquote")
136 case "~@":
137 return reader_read_abbrev(reader, "'splice-unquote")
138 case "@":
139 return reader_read_abbrev(reader, "'deref")
140 case "^":
141 return reader_read_with_meta(reader)
142 default:
143 return reader_read_atom(current)
144 }
145 }
146
147 function reader_tokenizer(str, reader, len, r)
148 {
149 for (len = 0; match(str, /^[ \t\r\n,]*(~@|[\[\]{}()'`~^@]|\"(\\[^\r\n]|[^\\"\r\n])*\"|;[^\r\n]*|[^ \t\r\n\[\]{}('"`,;)^~@][^ \t\r\n\[\]{}('"`,;)]*)/, r); ) {
150 if (substr(r[1], 1, 1) != ";") {
151 reader[len++] = r[1]
152 }
153 str = substr(str, RSTART + RLENGTH)
154 }
155 if (str !~ /^[ \t\r\n,]*$/) {
156 return "!\"Cannot tokenize '" str "'."
157 }
158 reader["len"] = len
159 return ""
160 }
161
162 function reader_read_str(str, reader, ret)
163 {
164 ret = reader_tokenizer(str, reader)
165 if (ret != "") {
166 return ret
167 }
168 if (reader["len"] == 0) {
169 return "#nil"
170 }
171 ret = reader_read_from(reader)
172 if (ret ~ /^!/) {
173 return ret
174 }
175 if (reader["len"] != reader["curidx"]) {
176 types_release(ret)
177 return "!\"Unexpected token '" reader[reader["curidx"]] "'."
178 }
179 return ret
180 }