All: don't ignore */mal. Fixes #99
[jackhill/mal.git] / c / reader.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4
5 //#include <glib/gregex.h>
6 //#include <glib-object.h>
7 #include <glib.h>
8
9 #include "types.h"
10 #include "reader.h"
11
12 // Declare
13 MalVal *read_form(Reader *reader);
14
15 Reader *reader_new() {
16 Reader *reader = (Reader*)malloc(sizeof(Reader));
17 reader->array = g_array_sized_new(TRUE, FALSE, sizeof(char *), 8);
18 reader->position = 0;
19 return reader;
20 }
21
22 int reader_append(Reader *reader, char* token) {
23 g_array_append_val(reader->array, token);
24 return TRUE;
25 }
26
27 char *reader_peek(Reader *reader) {
28 return g_array_index(reader->array, char*, reader->position);
29 }
30
31 char *reader_next(Reader *reader) {
32 if (reader->position >= reader->array->len) {
33 return NULL;
34 } else {
35 return g_array_index(reader->array, char*, reader->position++);
36 }
37 }
38
39 void reader_free(Reader *reader) {
40 int i;
41 for(i=0; i < reader->array->len; i++) {
42 free(g_array_index(reader->array, char*, i));
43 }
44 g_array_free(reader->array, TRUE);
45 free(reader);
46 }
47
48 Reader *tokenize(char *line) {
49 GRegex *regex;
50 GMatchInfo *matchInfo;
51 GError *err = NULL;
52
53 Reader *reader = reader_new();
54
55 regex = g_regex_new ("[\\s ,]*(~@|[\\[\\]{}()'`~@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\\s \\[\\]{}()'\"`~@,;]*)", 0, 0, &err);
56 g_regex_match (regex, line, 0, &matchInfo);
57
58 if (err != NULL) {
59 fprintf(stderr, "Tokenize error: %s\n", err->message);
60 return NULL;
61 }
62
63 while (g_match_info_matches(matchInfo)) {
64 gchar *result = g_match_info_fetch(matchInfo, 1);
65 if (result[0] != '\0' && result[0] != ';') {
66 reader_append(reader, result);
67 }
68 g_match_info_next(matchInfo, &err);
69 }
70 g_match_info_free(matchInfo);
71 g_regex_unref(regex);
72 if (reader->array->len == 0) {
73 reader_free(reader);
74 return NULL;
75 } else {
76 return reader;
77 }
78 }
79
80
81 // From http://creativeandcritical.net/str-replace-c/ - Laird Shaw
82 char *replace_str(const char *str, const char *old, const char *new)
83 {
84 char *ret, *r;
85 const char *p, *q;
86 size_t oldlen = strlen(old);
87 size_t count, retlen, newlen = strlen(new);
88
89 if (oldlen != newlen) {
90 for (count = 0, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen)
91 count++;
92 /* this is undefined if p - str > PTRDIFF_MAX */
93 retlen = p - str + strlen(p) + count * (newlen - oldlen);
94 } else
95 retlen = strlen(str);
96
97 if ((ret = malloc(retlen + 1)) == NULL)
98 return NULL;
99
100 for (r = ret, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen) {
101 /* this is undefined if q - p > PTRDIFF_MAX */
102 ptrdiff_t l = q - p;
103 memcpy(r, p, l);
104 r += l;
105 memcpy(r, new, newlen);
106 r += newlen;
107 }
108 strcpy(r, p);
109
110 return ret;
111 }
112
113
114 MalVal *read_atom(Reader *reader) {
115 char *token;
116 GRegex *regex;
117 GMatchInfo *matchInfo;
118 GError *err = NULL;
119 gint pos;
120 MalVal *atom;
121
122 token = reader_next(reader);
123 //g_print("read_atom token: %s\n", token);
124
125 regex = g_regex_new ("(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|:(.*)|(^[^\"]*$)", 0, 0, &err);
126 g_regex_match (regex, token, 0, &matchInfo);
127
128 if (g_match_info_fetch_pos(matchInfo, 1, &pos, NULL) && pos != -1) {
129 //g_print("read_atom integer\n");
130 atom = malval_new_integer(g_ascii_strtoll(token, NULL, 10));
131 } else if (g_match_info_fetch_pos(matchInfo, 2, &pos, NULL) && pos != -1) {
132 //g_print("read_atom float\n");
133 atom = malval_new_float(g_ascii_strtod(token, NULL));
134 } else if (g_match_info_fetch_pos(matchInfo, 3, &pos, NULL) && pos != -1) {
135 //g_print("read_atom nil\n");
136 atom = &mal_nil;
137 } else if (g_match_info_fetch_pos(matchInfo, 4, &pos, NULL) && pos != -1) {
138 //g_print("read_atom true\n");
139 atom = &mal_true;
140 } else if (g_match_info_fetch_pos(matchInfo, 5, &pos, NULL) && pos != -1) {
141 //g_print("read_atom false\n");
142 atom = &mal_false;
143 } else if (g_match_info_fetch_pos(matchInfo, 6, &pos, NULL) && pos != -1) {
144 //g_print("read_atom string: %s\n", token);
145 char *str_tmp = replace_str(g_match_info_fetch(matchInfo, 6), "\\\"", "\"");
146 atom = malval_new_string(str_tmp);
147 } else if (g_match_info_fetch_pos(matchInfo, 7, &pos, NULL) && pos != -1) {
148 //g_print("read_atom keyword\n");
149 atom = malval_new_keyword(g_match_info_fetch(matchInfo, 7));
150 } else if (g_match_info_fetch_pos(matchInfo, 8, &pos, NULL) && pos != -1) {
151 //g_print("read_atom symbol\n");
152 atom = malval_new_symbol(g_match_info_fetch(matchInfo, 8));
153 } else {
154 malval_free(atom);
155 atom = NULL;
156 }
157 return atom;
158 }
159
160 MalVal *read_list(Reader *reader, MalType type, char start, char end) {
161 MalVal *ast, *form;
162 char *token = reader_next(reader);
163 //g_print("read_list start token: %s\n", token);
164 if (token[0] != start) { abort("expected '(' or '['"); }
165
166 ast = malval_new_list(type, g_array_new(TRUE, TRUE, sizeof(MalVal*)));
167
168 while ((token = reader_peek(reader)) &&
169 token[0] != end) {
170 //g_print("read_list internal token %s\n", token);
171 form = read_form(reader);
172 if (!form) {
173 if (!mal_error) { abort("unknown read_list failure"); }
174 g_array_free(ast->val.array, TRUE);
175 malval_free(ast);
176 return NULL;
177 }
178 g_array_append_val(ast->val.array, form);
179 }
180 if (!token) { abort("expected ')' or ']', got EOF"); }
181 reader_next(reader);
182 //g_print("read_list end token: %s\n", token);
183 return ast;
184 }
185
186 MalVal *read_hash_map(Reader *reader) {
187 MalVal *lst = read_list(reader, MAL_LIST, '{', '}');
188 MalVal *hm = _hash_map(lst);
189 malval_free(lst);
190 return hm;
191 }
192
193
194 MalVal *read_form(Reader *reader) {
195 char *token;
196 MalVal *form = NULL, *tmp;
197
198 // while(token = reader_next(reader)) {
199 // printf("token: %s\n", token);
200 // }
201 // return NULL;
202
203 token = reader_peek(reader);
204
205 if (!token) { return NULL; }
206 //g_print("read_form token: %s\n", token);
207
208 switch (token[0]) {
209 case ';':
210 abort("comments not yet implemented");
211 break;
212 case '\'':
213 reader_next(reader);
214 form = _listX(2, malval_new_symbol("quote"),
215 read_form(reader));
216 break;
217 case '`':
218 reader_next(reader);
219 form = _listX(2, malval_new_symbol("quasiquote"),
220 read_form(reader));
221 break;
222 case '~':
223 reader_next(reader);
224 if (token[1] == '@') {
225 form = _listX(2, malval_new_symbol("splice-unquote"),
226 read_form(reader));
227 } else {
228 form = _listX(2, malval_new_symbol("unquote"),
229 read_form(reader));
230 };
231 break;
232 case '^':
233 reader_next(reader);
234 MalVal *meta = read_form(reader);
235 form = _listX(3, malval_new_symbol("with-meta"),
236 read_form(reader), meta);
237 break;
238 case '@':
239 reader_next(reader);
240 form = _listX(2, malval_new_symbol("deref"),
241 read_form(reader));
242 break;
243
244
245 // list
246 case ')':
247 abort("unexpected ')'");
248 break;
249 case '(':
250 form = read_list(reader, MAL_LIST, '(', ')');
251 break;
252
253 // vector
254 case ']':
255 abort("unexpected ']'");
256 break;
257 case '[':
258 form = read_list(reader, MAL_VECTOR, '[', ']');
259 break;
260
261 // hash-map
262 case '}':
263 abort("unexpected '}'");
264 break;
265 case '{':
266 form = read_hash_map(reader);
267 break;
268
269 default:
270 form = read_atom(reader);
271 break;
272 }
273 return form;
274
275 }
276
277 MalVal *read_str (char *str) {
278 Reader *reader;
279 char *token;
280 MalVal *ast = NULL;
281
282 reader = tokenize(str);
283 if (reader) {
284 ast = read_form(reader);
285 reader_free(reader);
286 }
287
288 return ast;
289 }