c++11: step 2
[jackhill/mal.git] / cpp / Reader.cpp
1 #include "MAL.h"
2 #include "Types.h"
3
4 #include <regex>
5
6 typedef std::regex Regex;
7 typedef std::sregex_iterator RegexIter;
8
9 static const Regex tokenRegex("[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\\s\\[\\]{}('\"`,;)]+)");
10 static const Regex intRegex("^[-+]?\\d+$");
11 static const Regex closeRegex("[\\)\\]}]");
12
13 class Tokeniser
14 {
15 public:
16 Tokeniser(const String& input);
17
18 String peek() const {
19 ASSERT(!eof(), "Tokeniser reading past EOF in peek");
20 return m_iter->str(1);
21 }
22
23 String next() {
24 ASSERT(!eof(), "Tokeniser reading past EOF in next");
25 String ret = peek();
26 checkPrefix();
27 ++m_iter;
28 skipWhitespace();
29 return ret;
30 }
31
32 bool eof() const {
33 return m_iter == m_end;
34 }
35
36 private:
37 void skipWhitespace();
38 void checkPrefix();
39
40 RegexIter m_iter;
41 RegexIter m_end;
42 };
43
44 Tokeniser::Tokeniser(const String& input)
45 : m_iter(input.begin(), input.end(), tokenRegex)
46 , m_end()
47 {
48 skipWhitespace();
49 }
50
51 static bool isWhitespace(const String& token)
52 {
53 return token.empty() || (token[0] == ';');
54 }
55
56 void Tokeniser::checkPrefix()
57 {
58 // This is the unmatched portion before the match.
59 auto prefix = m_iter->prefix();
60
61 if (prefix.length() == 0) {
62 return;
63 }
64
65 const String& text = prefix.str();
66 if (text == "\"") {
67 ASSERT(false, "Expected \", got EOF");
68 }
69 ASSERT(false, "Unexpected \"%s\"", text.c_str());
70 }
71
72 void Tokeniser::skipWhitespace()
73 {
74 while (!eof() && isWhitespace(peek())) {
75 checkPrefix();
76 ++m_iter;
77 }
78 }
79
80 static malValuePtr readAtom(Tokeniser& tokeniser);
81 static malValuePtr readForm(Tokeniser& tokeniser);
82 static void readList(Tokeniser& tokeniser, malValueVec* items,
83 const String& end);
84 static malValuePtr processMacro(Tokeniser& tokeniser, const String& symbol);
85
86 malValuePtr readStr(const String& input)
87 {
88 Tokeniser tokeniser(input);
89 if (tokeniser.eof()) {
90 throw malEmptyInputException();
91 }
92 return readForm(tokeniser);
93 }
94
95 static malValuePtr readForm(Tokeniser& tokeniser)
96 {
97 ASSERT(!tokeniser.eof(), "Expected form, got EOF");
98 String token = tokeniser.peek();
99
100 ASSERT(!std::regex_match(token, closeRegex),
101 "Unexpected \"%s\"", token.c_str());
102
103 if (token == "(") {
104 tokeniser.next();
105 std::unique_ptr<malValueVec> items(new malValueVec);
106 readList(tokeniser, items.get(), ")");
107 return mal::list(items.release());
108 }
109 if (token == "[") {
110 tokeniser.next();
111 std::unique_ptr<malValueVec> items(new malValueVec);
112 readList(tokeniser, items.get(), "]");
113 return mal::vector(items.release());
114 }
115 if (token == "{") {
116 tokeniser.next();
117 std::unique_ptr<malValueVec> items(new malValueVec);
118 items->push_back(mal::symbol("hash-map"));
119 readList(tokeniser, items.get(), "}");
120 return mal::list(items.release());
121 }
122 return readAtom(tokeniser);
123 }
124
125 static malValuePtr readAtom(Tokeniser& tokeniser)
126 {
127 struct ReaderMacro {
128 const char* token;
129 const char* symbol;
130 };
131 ReaderMacro macroTable[] = {
132 { "@", "deref" },
133 { "`", "quasiquote" },
134 { "'", "quote" },
135 { "~@", "splice-unquote" },
136 { "~", "unquote" },
137 };
138 const ReaderMacro* macroTableEnd = macroTable + ARRAY_SIZE(macroTable);
139
140 struct Constant {
141 const char* token;
142 malValuePtr value;
143 };
144 Constant constTable[] = {
145 { "false", mal::falseValue() },
146 { "nil", mal::nilValue() },
147 { "true", mal::trueValue() },
148 };
149 const Constant* constTableEnd = constTable + ARRAY_SIZE(constTable);
150
151 String token = tokeniser.next();
152 if (token[0] == '"') {
153 return mal::string(unescape(token));
154 }
155 if (token[0] == ':') {
156 return mal::keyword(token);
157 }
158 if (token == "^") {
159 malValuePtr meta = readForm(tokeniser);
160 malValuePtr value = readForm(tokeniser);
161 // Note that meta and value switch places
162 return mal::list(mal::symbol("with-meta"), value, meta);
163 }
164 for (Constant* it = constTable; it != constTableEnd; ++it) {
165 if (token == it->token) {
166 return it->value;
167 }
168 }
169 for (ReaderMacro *it = macroTable; it < macroTableEnd; ++it) {
170 if (token == it->token) {
171 return processMacro(tokeniser, it->symbol);
172 }
173 }
174 if (std::regex_match(token, intRegex)) {
175 return mal::integer(token);
176 }
177 return mal::symbol(token);
178 }
179
180 static void readList(Tokeniser& tokeniser, malValueVec* items,
181 const String& end)
182 {
183 while (1) {
184 ASSERT(!tokeniser.eof(), "Expected \"%s\", got EOF", end.c_str());
185 if (tokeniser.peek() == end) {
186 tokeniser.next();
187 return;
188 }
189 items->push_back(readForm(tokeniser));
190 }
191 }
192
193 static malValuePtr processMacro(Tokeniser& tokeniser, const String& symbol)
194 {
195 return mal::list(mal::symbol(symbol), readForm(tokeniser));
196 }