Commit | Line | Data |
---|---|---|
21986733 DM |
1 | #include "yeti_regex.i" |
2 | require, "types.i" | |
3 | ||
4aa0ebdf | 4 | TOKENIZER_REGEXP = regcomp("[[:space:],]*(~@|[][{}()'`~@]|\"([\\].|[^\\\"])*\"?|;.*|[^][[:space:]{}()'\"`~@,;]*)", newline=1) |
21986733 DM |
5 | |
6 | func tokenize(str) | |
7 | { | |
8 | match0 = "" | |
9 | match1 = "" | |
10 | pos = 1 | |
11 | tokens = [] | |
12 | while (1) { | |
13 | m = regmatch(TOKENIZER_REGEXP, str, match0, match1, start=pos, indices=1) | |
14 | if (m == 0) break | |
15 | b = match1(1) | |
16 | e = match1(2) - 1 | |
17 | if (e < b) { | |
18 | pos = match1(2) + 1 | |
19 | continue | |
20 | } | |
21 | token = strpart(str, b:e) | |
22 | pos = match1(2) | |
23 | if (strpart(token, 1:1) == ";") continue | |
24 | grow, tokens, [token] | |
25 | } | |
26 | return tokens | |
27 | } | |
28 | ||
29 | struct Reader { | |
30 | pointer tokens | |
31 | int pos | |
32 | } | |
33 | ||
34 | func reader_peek(rdr) | |
35 | { | |
36 | if (rdr.pos > numberof(*rdr.tokens)) return string(0) | |
37 | return (*rdr.tokens)(rdr.pos) | |
38 | } | |
39 | ||
40 | func reader_next(rdr) | |
41 | { | |
42 | token = reader_peek(rdr) | |
43 | rdr.pos += 1 | |
44 | return token | |
45 | } | |
46 | ||
47 | NUMBER_REGEXP = regcomp("^-?[0-9]+$") | |
4aa0ebdf JM |
48 | STR_REGEXP = regcomp("^\".*\"$") |
49 | STR_BAD_REGEXP = regcomp("^\".*$") | |
21986733 DM |
50 | |
51 | func unescape(s) | |
52 | { | |
33f404af DM |
53 | s = strpart(s, 2:-1) // remove surrounding quotes |
54 | s = streplaceall(s, "\\\\", "\x01") | |
55 | s = streplaceall(s, "\\n", "\n") | |
56 | s = streplaceall(s, "\\\"", "\"") | |
57 | return streplaceall(s, "\x01", "\\") | |
21986733 DM |
58 | } |
59 | ||
60 | func read_atom(rdr) | |
61 | { | |
62 | token = reader_next(rdr) | |
63 | if (token == "nil") return MAL_NIL | |
64 | else if (token == "true") return MAL_TRUE | |
65 | else if (token == "false") return MAL_FALSE | |
66 | else if (regmatch(NUMBER_REGEXP, token)) return MalNumber(val=tonum(token)) | |
4aa0ebdf JM |
67 | else if (regmatch(STR_REGEXP, token)) return MalString(val=unescape(token)) |
68 | else if (regmatch(STR_BAD_REGEXP, token)) return MalError(message=("expected '\"', got EOF")) | |
21986733 DM |
69 | else if (strpart(token, 1:1) == ":") return MalKeyword(val=strpart(token, 2:)) |
70 | else return MalSymbol(val=token) | |
71 | } | |
72 | ||
73 | func read_seq(rdr, start_char, end_char) | |
74 | { | |
75 | token = reader_next(rdr) | |
76 | if (token != start_char) { | |
24928363 | 77 | return MalError(message=("expected '" + start_char + "', got EOF")) |
21986733 DM |
78 | } |
79 | ||
80 | elements = [] | |
81 | token = reader_peek(rdr) | |
82 | while (token != end_char) { | |
83 | if (token == string(0)) { | |
24928363 | 84 | return MalError(message=("expected '" + end_char + "', got EOF")) |
21986733 DM |
85 | } |
86 | e = read_form(rdr) | |
87 | if (structof(e) == MalError) return e | |
88 | grow, elements, [&e] | |
89 | token = reader_peek(rdr) | |
90 | } | |
91 | token = reader_next(rdr) | |
92 | return elements | |
93 | } | |
94 | ||
95 | func read_list(rdr) | |
96 | { | |
97 | seq = read_seq(rdr, "(", ")") | |
98 | if (structof(seq) == MalError) return seq | |
99 | return MalList(val=&seq) | |
100 | } | |
101 | ||
102 | func read_vector(rdr) | |
103 | { | |
104 | seq = read_seq(rdr, "[", "]") | |
105 | if (structof(seq) == MalError) return seq | |
106 | return MalVector(val=&seq) | |
107 | } | |
108 | ||
109 | func read_hashmap(rdr) | |
110 | { | |
111 | seq = read_seq(rdr, "{", "}") | |
112 | if (structof(seq) == MalError) return seq | |
113 | return array_to_hashmap(seq) | |
114 | } | |
115 | ||
116 | func reader_macro(rdr, symbol_name) | |
117 | { | |
118 | shortcut = reader_next(rdr) | |
119 | form = read_form(rdr) | |
120 | if (structof(form) == MalError) return form | |
121 | seq = [&MalSymbol(val=symbol_name), &form] | |
122 | return MalList(val=&seq) | |
123 | } | |
124 | ||
125 | func reader_with_meta_macro(rdr) | |
126 | { | |
127 | shortcut = reader_next(rdr) | |
128 | meta = read_form(rdr) | |
129 | if (structof(meta) == MalError) return meta | |
130 | form = read_form(rdr) | |
131 | if (structof(form) == MalError) return form | |
132 | seq = [&MalSymbol(val="with-meta"), &form, &meta] | |
133 | return MalList(val=&seq) | |
134 | } | |
135 | ||
136 | func read_form(rdr) | |
137 | { | |
138 | token = reader_peek(rdr) | |
139 | if (token == "'") return reader_macro(rdr, "quote") | |
140 | else if (token == "`") return reader_macro(rdr, "quasiquote") | |
141 | else if (token == "~") return reader_macro(rdr, "unquote") | |
142 | else if (token == "~@") return reader_macro(rdr, "splice-unquote") | |
143 | else if (token == "@") return reader_macro(rdr, "deref") | |
144 | else if (token == "^") return reader_with_meta_macro(rdr) | |
145 | else if (token == "(") return read_list(rdr) | |
146 | else if (token == ")") return MalError(message="unexpected ')'") | |
147 | else if (token == "[") return read_vector(rdr) | |
148 | else if (token == "]") return MalError(message="unexpected ']'") | |
149 | else if (token == "{") return read_hashmap(rdr) | |
150 | else if (token == "}") return MalError(message="unexpected '}'") | |
151 | else return read_atom(rdr) | |
152 | } | |
153 | ||
154 | func read_str(str) | |
155 | { | |
156 | tokens = tokenize(str) | |
157 | rdr = Reader(tokens=&tokens, pos=1) | |
158 | return read_form(rdr) | |
159 | } |