Commit | Line | Data |
---|---|---|
8c7587af MK |
1 | function reader_read_string(token, v, r) |
2 | { | |
3 | token = substr(token, 1, length(token) - 1) | |
b5ec219b | 4 | gsub(/\\\\/, "\xf7", token) |
8d78bc26 JM |
5 | gsub(/\\"/, "\"", token) |
6 | gsub(/\\n/, "\n", token) | |
b5ec219b | 7 | gsub("\xf7", "\\", token) |
8d78bc26 | 8 | return token |
8c7587af MK |
9 | } |
10 | ||
11 | function reader_read_atom(token) | |
12 | { | |
13 | switch (token) { | |
14 | case "true": | |
15 | case "false": | |
16 | case "nil": | |
17 | return "#" token | |
18 | case /^:/: | |
19 | return ":" token | |
20 | case /^"/: | |
c9b7b019 | 21 | if (token ~ /^\"(\\.|[^\\"])*\"$/) { |
4aa0ebdf JM |
22 | return reader_read_string(token) |
23 | } else { | |
24 | return "!\"Expected '\"', got EOF." | |
25 | } | |
8c7587af MK |
26 | case /^-?[0-9]+$/: |
27 | return "+" token | |
28 | default: | |
29 | return "'" token | |
30 | } | |
31 | } | |
32 | ||
33 | function reader_read_list(reader, type, end, idx, len, ret) | |
34 | { | |
35 | idx = types_allocate() | |
36 | len = 0 | |
37 | while (reader["curidx"] in reader) { | |
38 | if (reader[reader["curidx"]] == end) { | |
39 | types_heap[idx]["len"] = len | |
40 | reader["curidx"]++ | |
41 | return type idx | |
42 | } | |
43 | ret = reader_read_from(reader) | |
44 | if (ret ~ /^!/) { | |
45 | types_heap[idx]["len"] = len | |
46 | types_release(type idx) | |
47 | return ret | |
48 | } | |
49 | types_heap[idx][len++] = ret | |
50 | } | |
51 | types_heap[idx]["len"] = len | |
52 | types_release(type idx) | |
970935da | 53 | return "!\"expected '" end "', got EOF" |
8c7587af MK |
54 | } |
55 | ||
56 | function reader_read_hash(reader, idx, key, val) | |
57 | { | |
58 | idx = types_allocate() | |
59 | while (reader["curidx"] in reader) { | |
60 | if (reader[reader["curidx"]] == "}") { | |
61 | reader["curidx"]++ | |
62 | return "{" idx | |
63 | } | |
64 | key = reader_read_from(reader) | |
65 | if (key ~ /^!/) { | |
66 | types_release("{" idx) | |
67 | return key | |
68 | } | |
69 | if (key !~ /^[":]/) { | |
70 | types_release(key) | |
71 | types_release("{" idx) | |
72 | return "!\"Hash-map key must be string or keyword." | |
73 | } | |
74 | if (!(reader["curidx"] in reader)) { | |
75 | types_release("{" idx) | |
76 | return "!\"Element count of hash-map must be even." | |
77 | } | |
78 | val = reader_read_from(reader) | |
79 | if (val ~ /^!/) { | |
80 | types_release("{" idx) | |
81 | return val | |
82 | } | |
83 | types_heap[idx][key] = val | |
84 | } | |
85 | types_release("{" idx) | |
970935da | 86 | return "!\"expected '}', got EOF" |
8c7587af MK |
87 | } |
88 | ||
89 | function reader_read_abbrev(reader, symbol, val, idx) | |
90 | { | |
91 | val = reader_read_from(reader) | |
92 | if (val ~ /^!/) { | |
93 | return val | |
94 | } | |
95 | idx = types_allocate() | |
96 | types_heap[idx]["len"] = 2 | |
97 | types_heap[idx][0] = symbol | |
98 | types_heap[idx][1] = val | |
99 | return "(" idx | |
100 | } | |
101 | ||
102 | function reader_read_with_meta(reader, meta, val, idx) | |
103 | { | |
104 | meta = reader_read_from(reader) | |
105 | if (meta ~ /^!/) { | |
106 | return meta | |
107 | } | |
108 | val = reader_read_from(reader) | |
109 | if (val ~ /^!/) { | |
110 | types_release(meta) | |
111 | return val | |
112 | } | |
113 | idx = types_allocate() | |
114 | types_heap[idx]["len"] = 3 | |
115 | types_heap[idx][0] = "'with-meta" | |
116 | types_heap[idx][1] = val | |
117 | types_heap[idx][2] = meta | |
118 | return "(" idx | |
119 | } | |
120 | ||
121 | function reader_read_from(reader, current) | |
122 | { | |
123 | current = reader[reader["curidx"]++] | |
124 | switch (current) { | |
125 | case "(": | |
126 | return reader_read_list(reader, "(", ")") | |
127 | case "[": | |
128 | return reader_read_list(reader, "[", "]") | |
129 | case "{": | |
130 | return reader_read_hash(reader) | |
131 | case ")": | |
132 | case "]": | |
133 | case "}": | |
134 | return "!\"Unexpected token '" current "'." | |
135 | case "'": | |
136 | return reader_read_abbrev(reader, "'quote") | |
137 | case "`": | |
138 | return reader_read_abbrev(reader, "'quasiquote") | |
139 | case "~": | |
140 | return reader_read_abbrev(reader, "'unquote") | |
141 | case "~@": | |
142 | return reader_read_abbrev(reader, "'splice-unquote") | |
143 | case "@": | |
144 | return reader_read_abbrev(reader, "'deref") | |
145 | case "^": | |
146 | return reader_read_with_meta(reader) | |
147 | default: | |
148 | return reader_read_atom(current) | |
149 | } | |
150 | } | |
151 | ||
152 | function reader_tokenizer(str, reader, len, r) | |
153 | { | |
c9b7b019 | 154 | for (len = 0; match(str, /^[ \t\r\n,]*(~@|[\[\]{}()'`~^@]|\"(\\.|[^\\"])*\"?|;[^\r\n]*|[^ \t\r\n\[\]{}('"`,;)^~@][^ \t\r\n\[\]{}('"`,;)]*)/, r); ) { |
8c7587af MK |
155 | if (substr(r[1], 1, 1) != ";") { |
156 | reader[len++] = r[1] | |
157 | } | |
158 | str = substr(str, RSTART + RLENGTH) | |
159 | } | |
160 | if (str !~ /^[ \t\r\n,]*$/) { | |
161 | return "!\"Cannot tokenize '" str "'." | |
162 | } | |
163 | reader["len"] = len | |
164 | return "" | |
165 | } | |
166 | ||
167 | function reader_read_str(str, reader, ret) | |
168 | { | |
169 | ret = reader_tokenizer(str, reader) | |
170 | if (ret != "") { | |
171 | return ret | |
172 | } | |
173 | if (reader["len"] == 0) { | |
174 | return "#nil" | |
175 | } | |
176 | ret = reader_read_from(reader) | |
177 | if (ret ~ /^!/) { | |
178 | return ret | |
179 | } | |
180 | if (reader["len"] != reader["curidx"]) { | |
181 | types_release(ret) | |
182 | return "!\"Unexpected token '" reader[reader["curidx"]] "'." | |
183 | } | |
184 | return ret | |
185 | } |