Merge pull request #337 from AndreaCrotti/upgrade-libraries
[jackhill/mal.git] / yorick / reader.i
CommitLineData
21986733
DM
1#include "yeti_regex.i"
2require, "types.i"
3
4aa0ebdf 4TOKENIZER_REGEXP = regcomp("[[:space:],]*(~@|[][{}()'`~@]|\"([\\].|[^\\\"])*\"?|;.*|[^][[:space:]{}()'\"`~@,;]*)", newline=1)
21986733
DM
5
6func tokenize(str)
7{
8 match0 = ""
9 match1 = ""
10 pos = 1
11 tokens = []
12 while (1) {
13 m = regmatch(TOKENIZER_REGEXP, str, match0, match1, start=pos, indices=1)
14 if (m == 0) break
15 b = match1(1)
16 e = match1(2) - 1
17 if (e < b) {
18 pos = match1(2) + 1
19 continue
20 }
21 token = strpart(str, b:e)
22 pos = match1(2)
23 if (strpart(token, 1:1) == ";") continue
24 grow, tokens, [token]
25 }
26 return tokens
27}
28
29struct Reader {
30 pointer tokens
31 int pos
32}
33
34func reader_peek(rdr)
35{
36 if (rdr.pos > numberof(*rdr.tokens)) return string(0)
37 return (*rdr.tokens)(rdr.pos)
38}
39
40func reader_next(rdr)
41{
42 token = reader_peek(rdr)
43 rdr.pos += 1
44 return token
45}
46
47NUMBER_REGEXP = regcomp("^-?[0-9]+$")
4aa0ebdf
JM
48STR_REGEXP = regcomp("^\".*\"$")
49STR_BAD_REGEXP = regcomp("^\".*$")
21986733
DM
50
51func unescape(s)
52{
33f404af
DM
53 s = strpart(s, 2:-1) // remove surrounding quotes
54 s = streplaceall(s, "\\\\", "\x01")
55 s = streplaceall(s, "\\n", "\n")
56 s = streplaceall(s, "\\\"", "\"")
57 return streplaceall(s, "\x01", "\\")
21986733
DM
58}
59
60func read_atom(rdr)
61{
62 token = reader_next(rdr)
63 if (token == "nil") return MAL_NIL
64 else if (token == "true") return MAL_TRUE
65 else if (token == "false") return MAL_FALSE
66 else if (regmatch(NUMBER_REGEXP, token)) return MalNumber(val=tonum(token))
4aa0ebdf
JM
67 else if (regmatch(STR_REGEXP, token)) return MalString(val=unescape(token))
68 else if (regmatch(STR_BAD_REGEXP, token)) return MalError(message=("expected '\"', got EOF"))
21986733
DM
69 else if (strpart(token, 1:1) == ":") return MalKeyword(val=strpart(token, 2:))
70 else return MalSymbol(val=token)
71}
72
73func read_seq(rdr, start_char, end_char)
74{
75 token = reader_next(rdr)
76 if (token != start_char) {
24928363 77 return MalError(message=("expected '" + start_char + "', got EOF"))
21986733
DM
78 }
79
80 elements = []
81 token = reader_peek(rdr)
82 while (token != end_char) {
83 if (token == string(0)) {
24928363 84 return MalError(message=("expected '" + end_char + "', got EOF"))
21986733
DM
85 }
86 e = read_form(rdr)
87 if (structof(e) == MalError) return e
88 grow, elements, [&e]
89 token = reader_peek(rdr)
90 }
91 token = reader_next(rdr)
92 return elements
93}
94
95func read_list(rdr)
96{
97 seq = read_seq(rdr, "(", ")")
98 if (structof(seq) == MalError) return seq
99 return MalList(val=&seq)
100}
101
102func read_vector(rdr)
103{
104 seq = read_seq(rdr, "[", "]")
105 if (structof(seq) == MalError) return seq
106 return MalVector(val=&seq)
107}
108
109func read_hashmap(rdr)
110{
111 seq = read_seq(rdr, "{", "}")
112 if (structof(seq) == MalError) return seq
113 return array_to_hashmap(seq)
114}
115
116func reader_macro(rdr, symbol_name)
117{
118 shortcut = reader_next(rdr)
119 form = read_form(rdr)
120 if (structof(form) == MalError) return form
121 seq = [&MalSymbol(val=symbol_name), &form]
122 return MalList(val=&seq)
123}
124
125func reader_with_meta_macro(rdr)
126{
127 shortcut = reader_next(rdr)
128 meta = read_form(rdr)
129 if (structof(meta) == MalError) return meta
130 form = read_form(rdr)
131 if (structof(form) == MalError) return form
132 seq = [&MalSymbol(val="with-meta"), &form, &meta]
133 return MalList(val=&seq)
134}
135
136func read_form(rdr)
137{
138 token = reader_peek(rdr)
139 if (token == "'") return reader_macro(rdr, "quote")
140 else if (token == "`") return reader_macro(rdr, "quasiquote")
141 else if (token == "~") return reader_macro(rdr, "unquote")
142 else if (token == "~@") return reader_macro(rdr, "splice-unquote")
143 else if (token == "@") return reader_macro(rdr, "deref")
144 else if (token == "^") return reader_with_meta_macro(rdr)
145 else if (token == "(") return read_list(rdr)
146 else if (token == ")") return MalError(message="unexpected ')'")
147 else if (token == "[") return read_vector(rdr)
148 else if (token == "]") return MalError(message="unexpected ']'")
149 else if (token == "{") return read_hashmap(rdr)
150 else if (token == "}") return MalError(message="unexpected '}'")
151 else return read_atom(rdr)
152}
153
154func read_str(str)
155{
156 tokens = tokenize(str)
157 rdr = Reader(tokens=&tokens, pos=1)
158 return read_form(rdr)
159}