6 typedef std::regex Regex
;
8 static const Regex
intRegex("^[-+]?\\d+$");
9 static const Regex
closeRegex("[\\)\\]}]");
11 static const Regex
whitespaceRegex("[\\s,]+|;.*");
12 static const Regex tokenRegexes
[] = {
14 Regex("[\\[\\]{}()'`~^@]"),
15 Regex("\"(?:\\\\.|[^\\\\\"])*\""),
16 Regex("[^\\s\\[\\]{}('\"`,;)]+"),
22 Tokeniser(const String
& input
);
25 ASSERT(!eof(), "Tokeniser reading past EOF in peek\n");
30 ASSERT(!eof(), "Tokeniser reading past EOF in next\n");
37 return m_iter
== m_end
;
41 void skipWhitespace();
44 bool matchRegex(const Regex
& regex
);
46 typedef String::const_iterator StringIter
;
53 Tokeniser::Tokeniser(const String
& input
)
54 : m_iter(input
.begin())
60 bool Tokeniser::matchRegex(const Regex
& regex
)
67 auto flags
= std::regex_constants::match_continuous
;
68 if (!std::regex_search(m_iter
, m_end
, match
, regex
, flags
)) {
72 ASSERT(match
.size() == 1, "Should only have one submatch, not %lu\n",
74 ASSERT(match
.position(0) == 0, "Need to match first character\n");
75 ASSERT(match
.length(0) > 0, "Need to match a non-empty string\n");
77 // Don't advance m_iter now, do it after we've consumed the token in
78 // next(). If we do it now, we hit eof() when there's still one token left.
79 m_token
= match
.str(0);
84 void Tokeniser::nextToken()
86 m_iter
+= m_token
.size();
93 for (auto &it
: tokenRegexes
) {
99 String
mismatch(m_iter
, m_end
);
100 if (mismatch
[0] == '"') {
101 MAL_CHECK(false, "Expected \", got EOF");
104 MAL_CHECK(false, "Unexpected \"%s\"", mismatch
.c_str());
108 void Tokeniser::skipWhitespace()
110 while (matchRegex(whitespaceRegex
)) {
111 m_iter
+= m_token
.size();
115 static malValuePtr
readAtom(Tokeniser
& tokeniser
);
116 static malValuePtr
readForm(Tokeniser
& tokeniser
);
117 static void readList(Tokeniser
& tokeniser
, malValueVec
* items
,
119 static malValuePtr
processMacro(Tokeniser
& tokeniser
, const String
& symbol
);
121 malValuePtr
readStr(const String
& input
)
123 Tokeniser
tokeniser(input
);
124 if (tokeniser
.eof()) {
125 throw malEmptyInputException();
127 return readForm(tokeniser
);
130 static malValuePtr
readForm(Tokeniser
& tokeniser
)
132 MAL_CHECK(!tokeniser
.eof(), "Expected form, got EOF");
133 String token
= tokeniser
.peek();
135 MAL_CHECK(!std::regex_match(token
, closeRegex
),
136 "Unexpected \"%s\"", token
.c_str());
140 std::unique_ptr
<malValueVec
> items(new malValueVec
);
141 readList(tokeniser
, items
.get(), ")");
142 return mal::list(items
.release());
146 std::unique_ptr
<malValueVec
> items(new malValueVec
);
147 readList(tokeniser
, items
.get(), "]");
148 return mal::vector(items
.release());
153 readList(tokeniser
, &items
, "}");
154 return mal::hash(items
.begin(), items
.end(), false);
156 return readAtom(tokeniser
);
159 static malValuePtr
readAtom(Tokeniser
& tokeniser
)
165 ReaderMacro macroTable
[] = {
167 { "`", "quasiquote" },
169 { "~@", "splice-unquote" },
177 Constant constantTable
[] = {
178 { "false", mal::falseValue() },
179 { "nil", mal::nilValue() },
180 { "true", mal::trueValue() },
183 String token
= tokeniser
.next();
184 if (token
[0] == '"') {
185 return mal::string(unescape(token
));
187 if (token
[0] == ':') {
188 return mal::keyword(token
);
191 malValuePtr meta
= readForm(tokeniser
);
192 malValuePtr value
= readForm(tokeniser
);
193 // Note that meta and value switch places
194 return mal::list(mal::symbol("with-meta"), value
, meta
);
196 for (auto &constant
: constantTable
) {
197 if (token
== constant
.token
) {
198 return constant
.value
;
201 for (auto ¯o
: macroTable
) {
202 if (token
== macro
.token
) {
203 return processMacro(tokeniser
, macro
.symbol
);
206 if (std::regex_match(token
, intRegex
)) {
207 return mal::integer(token
);
209 return mal::symbol(token
);
212 static void readList(Tokeniser
& tokeniser
, malValueVec
* items
,
216 MAL_CHECK(!tokeniser
.eof(), "Expected \"%s\", got EOF", end
.c_str());
217 if (tokeniser
.peek() == end
) {
221 items
->push_back(readForm(tokeniser
));
225 static malValuePtr
processMacro(Tokeniser
& tokeniser
, const String
& symbol
)
227 return mal::list(mal::symbol(symbol
), readForm(tokeniser
));