5 //#include <glib/gregex.h>
6 //#include <glib-object.h>
13 MalVal
*read_form(Reader
*reader
);
15 Reader
*reader_new() {
16 Reader
*reader
= (Reader
*)malloc(sizeof(Reader
));
17 reader
->array
= g_array_sized_new(TRUE
, FALSE
, sizeof(char *), 8);
22 int reader_append(Reader
*reader
, char* token
) {
23 g_array_append_val(reader
->array
, token
);
27 char *reader_peek(Reader
*reader
) {
28 return g_array_index(reader
->array
, char*, reader
->position
);
31 char *reader_next(Reader
*reader
) {
32 if (reader
->position
>= reader
->array
->len
) {
35 return g_array_index(reader
->array
, char*, reader
->position
++);
39 void reader_free(Reader
*reader
) {
41 for(i
=0; i
< reader
->array
->len
; i
++) {
42 free(g_array_index(reader
->array
, char*, i
));
44 g_array_free(reader
->array
, TRUE
);
48 Reader
*tokenize(char *line
) {
50 GMatchInfo
*matchInfo
;
53 Reader
*reader
= reader_new();
55 regex
= g_regex_new ("[\\s ,]*(~@|[\\[\\]{}()'`~@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\\s \\[\\]{}()'\"`~@,;]*)", 0, 0, &err
);
56 g_regex_match (regex
, line
, 0, &matchInfo
);
59 fprintf(stderr
, "Tokenize error: %s\n", err
->message
);
63 while (g_match_info_matches(matchInfo
)) {
64 gchar
*result
= g_match_info_fetch(matchInfo
, 1);
65 if (result
[0] != '\0' && result
[0] != ';') {
66 reader_append(reader
, result
);
68 g_match_info_next(matchInfo
, &err
);
70 g_match_info_free(matchInfo
);
72 if (reader
->array
->len
== 0) {
81 // From http://creativeandcritical.net/str-replace-c/ - Laird Shaw
82 char *replace_str(const char *str
, const char *old
, const char *new)
86 size_t oldlen
= strlen(old
);
87 size_t count
, retlen
, newlen
= strlen(new);
89 if (oldlen
!= newlen
) {
90 for (count
= 0, p
= str
; (q
= strstr(p
, old
)) != NULL
; p
= q
+ oldlen
)
92 /* this is undefined if p - str > PTRDIFF_MAX */
93 retlen
= p
- str
+ strlen(p
) + count
* (newlen
- oldlen
);
97 if ((ret
= malloc(retlen
+ 1)) == NULL
)
100 for (r
= ret
, p
= str
; (q
= strstr(p
, old
)) != NULL
; p
= q
+ oldlen
) {
101 /* this is undefined if q - p > PTRDIFF_MAX */
105 memcpy(r
, new, newlen
);
114 MalVal
*read_atom(Reader
*reader
) {
117 GMatchInfo
*matchInfo
;
122 token
= reader_next(reader
);
123 //g_print("read_atom token: %s\n", token);
125 regex
= g_regex_new ("(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|:(.*)|(^[^\"]*$)", 0, 0, &err
);
126 g_regex_match (regex
, token
, 0, &matchInfo
);
128 if (g_match_info_fetch_pos(matchInfo
, 1, &pos
, NULL
) && pos
!= -1) {
129 //g_print("read_atom integer\n");
130 atom
= malval_new_integer(g_ascii_strtoll(token
, NULL
, 10));
131 } else if (g_match_info_fetch_pos(matchInfo
, 2, &pos
, NULL
) && pos
!= -1) {
132 //g_print("read_atom float\n");
133 atom
= malval_new_float(g_ascii_strtod(token
, NULL
));
134 } else if (g_match_info_fetch_pos(matchInfo
, 3, &pos
, NULL
) && pos
!= -1) {
135 //g_print("read_atom nil\n");
137 } else if (g_match_info_fetch_pos(matchInfo
, 4, &pos
, NULL
) && pos
!= -1) {
138 //g_print("read_atom true\n");
140 } else if (g_match_info_fetch_pos(matchInfo
, 5, &pos
, NULL
) && pos
!= -1) {
141 //g_print("read_atom false\n");
143 } else if (g_match_info_fetch_pos(matchInfo
, 6, &pos
, NULL
) && pos
!= -1) {
144 //g_print("read_atom string: %s\n", token);
145 char *str_tmp
= replace_str(g_match_info_fetch(matchInfo
, 6), "\\\"", "\"");
146 atom
= malval_new_string(str_tmp
);
147 } else if (g_match_info_fetch_pos(matchInfo
, 7, &pos
, NULL
) && pos
!= -1) {
148 //g_print("read_atom keyword\n");
149 atom
= malval_new_keyword(g_match_info_fetch(matchInfo
, 7));
150 } else if (g_match_info_fetch_pos(matchInfo
, 8, &pos
, NULL
) && pos
!= -1) {
151 //g_print("read_atom symbol\n");
152 atom
= malval_new_symbol(g_match_info_fetch(matchInfo
, 8));
160 MalVal
*read_list(Reader
*reader
, MalType type
, char start
, char end
) {
162 char *token
= reader_next(reader
);
163 //g_print("read_list start token: %s\n", token);
164 if (token
[0] != start
) { abort("expected '(' or '['"); }
166 ast
= malval_new_list(type
, g_array_new(TRUE
, TRUE
, sizeof(MalVal
*)));
168 while ((token
= reader_peek(reader
)) &&
170 //g_print("read_list internal token %s\n", token);
171 form
= read_form(reader
);
173 if (!mal_error
) { abort("unknown read_list failure"); }
174 g_array_free(ast
->val
.array
, TRUE
);
178 g_array_append_val(ast
->val
.array
, form
);
180 if (!token
) { abort("expected ')' or ']', got EOF"); }
182 //g_print("read_list end token: %s\n", token);
186 MalVal
*read_hash_map(Reader
*reader
) {
187 MalVal
*lst
= read_list(reader
, MAL_LIST
, '{', '}');
188 MalVal
*hm
= _hash_map(lst
);
194 MalVal
*read_form(Reader
*reader
) {
196 MalVal
*form
= NULL
, *tmp
;
198 // while(token = reader_next(reader)) {
199 // printf("token: %s\n", token);
203 token
= reader_peek(reader
);
205 if (!token
) { return NULL
; }
206 //g_print("read_form token: %s\n", token);
210 abort("comments not yet implemented");
214 form
= _listX(2, malval_new_symbol("quote"),
219 form
= _listX(2, malval_new_symbol("quasiquote"),
224 if (token
[1] == '@') {
225 form
= _listX(2, malval_new_symbol("splice-unquote"),
228 form
= _listX(2, malval_new_symbol("unquote"),
234 MalVal
*meta
= read_form(reader
);
235 form
= _listX(3, malval_new_symbol("with-meta"),
236 read_form(reader
), meta
);
240 form
= _listX(2, malval_new_symbol("deref"),
247 abort("unexpected ')'");
250 form
= read_list(reader
, MAL_LIST
, '(', ')');
255 abort("unexpected ']'");
258 form
= read_list(reader
, MAL_VECTOR
, '[', ']');
263 abort("unexpected '}'");
266 form
= read_hash_map(reader
);
270 form
= read_atom(reader
);
277 MalVal
*read_str (char *str
) {
282 reader
= tokenize(str
);
284 ast
= read_form(reader
);