Merge pull request #400 from asarhaddon/improve-mal-impl-macro-no-meta
[jackhill/mal.git] / yorick / reader.i
1 #include "yeti_regex.i"
2 require, "types.i"
3
4 TOKENIZER_REGEXP = regcomp("[[:space:],]*(~@|[][{}()'`~@]|\"([\\].|[^\\\"])*\"?|;.*|[^][[:space:]{}()'\"`~@,;]*)", newline=1)
5
6 func tokenize(str)
7 {
8 match0 = ""
9 match1 = ""
10 pos = 1
11 tokens = []
12 while (1) {
13 m = regmatch(TOKENIZER_REGEXP, str, match0, match1, start=pos, indices=1)
14 if (m == 0) break
15 b = match1(1)
16 e = match1(2) - 1
17 if (e < b) {
18 pos = match1(2) + 1
19 continue
20 }
21 token = strpart(str, b:e)
22 pos = match1(2)
23 if (strpart(token, 1:1) == ";") continue
24 grow, tokens, [token]
25 }
26 return tokens
27 }
28
29 struct Reader {
30 pointer tokens
31 int pos
32 }
33
34 func reader_peek(rdr)
35 {
36 if (rdr.pos > numberof(*rdr.tokens)) return string(0)
37 return (*rdr.tokens)(rdr.pos)
38 }
39
40 func reader_next(rdr)
41 {
42 token = reader_peek(rdr)
43 rdr.pos += 1
44 return token
45 }
46
47 NUMBER_REGEXP = regcomp("^-?[0-9]+$")
48 STR_REGEXP = regcomp("^\"([\\].|[^\\\"])*\"$")
49 STR_BAD_REGEXP = regcomp("^\".*$")
50
51 func unescape(s)
52 {
53 s = strpart(s, 2:-1) // remove surrounding quotes
54 s = streplaceall(s, "\\\\", "\x01")
55 s = streplaceall(s, "\\n", "\n")
56 s = streplaceall(s, "\\\"", "\"")
57 return streplaceall(s, "\x01", "\\")
58 }
59
60 func read_atom(rdr)
61 {
62 token = reader_next(rdr)
63 if (token == "nil") return MAL_NIL
64 else if (token == "true") return MAL_TRUE
65 else if (token == "false") return MAL_FALSE
66 else if (regmatch(NUMBER_REGEXP, token)) return MalNumber(val=tonum(token))
67 else if (regmatch(STR_REGEXP, token)) return MalString(val=unescape(token))
68 else if (regmatch(STR_BAD_REGEXP, token)) return MalError(message=("expected '\"', got EOF"))
69 else if (strpart(token, 1:1) == ":") return MalKeyword(val=strpart(token, 2:))
70 else return MalSymbol(val=token)
71 }
72
73 func read_seq(rdr, start_char, end_char)
74 {
75 token = reader_next(rdr)
76 if (token != start_char) {
77 return MalError(message=("expected '" + start_char + "', got EOF"))
78 }
79
80 elements = []
81 token = reader_peek(rdr)
82 while (token != end_char) {
83 if (token == string(0)) {
84 return MalError(message=("expected '" + end_char + "', got EOF"))
85 }
86 e = read_form(rdr)
87 if (structof(e) == MalError) return e
88 grow, elements, [&e]
89 token = reader_peek(rdr)
90 }
91 token = reader_next(rdr)
92 return elements
93 }
94
95 func read_list(rdr)
96 {
97 seq = read_seq(rdr, "(", ")")
98 if (structof(seq) == MalError) return seq
99 return MalList(val=&seq)
100 }
101
102 func read_vector(rdr)
103 {
104 seq = read_seq(rdr, "[", "]")
105 if (structof(seq) == MalError) return seq
106 return MalVector(val=&seq)
107 }
108
109 func read_hashmap(rdr)
110 {
111 seq = read_seq(rdr, "{", "}")
112 if (structof(seq) == MalError) return seq
113 return array_to_hashmap(seq)
114 }
115
116 func reader_macro(rdr, symbol_name)
117 {
118 shortcut = reader_next(rdr)
119 form = read_form(rdr)
120 if (structof(form) == MalError) return form
121 seq = [&MalSymbol(val=symbol_name), &form]
122 return MalList(val=&seq)
123 }
124
125 func reader_with_meta_macro(rdr)
126 {
127 shortcut = reader_next(rdr)
128 meta = read_form(rdr)
129 if (structof(meta) == MalError) return meta
130 form = read_form(rdr)
131 if (structof(form) == MalError) return form
132 seq = [&MalSymbol(val="with-meta"), &form, &meta]
133 return MalList(val=&seq)
134 }
135
136 func read_form(rdr)
137 {
138 token = reader_peek(rdr)
139 if (token == "'") return reader_macro(rdr, "quote")
140 else if (token == "`") return reader_macro(rdr, "quasiquote")
141 else if (token == "~") return reader_macro(rdr, "unquote")
142 else if (token == "~@") return reader_macro(rdr, "splice-unquote")
143 else if (token == "@") return reader_macro(rdr, "deref")
144 else if (token == "^") return reader_with_meta_macro(rdr)
145 else if (token == "(") return read_list(rdr)
146 else if (token == ")") return MalError(message="unexpected ')'")
147 else if (token == "[") return read_vector(rdr)
148 else if (token == "]") return MalError(message="unexpected ']'")
149 else if (token == "{") return read_hashmap(rdr)
150 else if (token == "}") return MalError(message="unexpected '}'")
151 else return read_atom(rdr)
152 }
153
154 func read_str(str)
155 {
156 tokens = tokenize(str)
157 rdr = Reader(tokens=&tokens, pos=1)
158 return read_form(rdr)
159 }