6 typedef std::regex Regex
;
7 typedef std::sregex_iterator RegexIter
;
9 static const Regex
tokenRegex("[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\\s\\[\\]{}('\"`,;)]+)");
10 static const Regex
intRegex("^[-+]?\\d+$");
11 static const Regex
closeRegex("[\\)\\]}]");
16 Tokeniser(const String
& input
);
19 ASSERT(!eof(), "Tokeniser reading past EOF in peek");
20 return m_iter
->str(1);
24 ASSERT(!eof(), "Tokeniser reading past EOF in next");
33 return m_iter
== m_end
;
37 void skipWhitespace();
44 Tokeniser::Tokeniser(const String
& input
)
45 : m_iter(input
.begin(), input
.end(), tokenRegex
)
51 static bool isWhitespace(const String
& token
)
53 return token
.empty() || (token
[0] == ';');
56 void Tokeniser::checkPrefix()
58 // This is the unmatched portion before the match.
59 auto prefix
= m_iter
->prefix();
61 if (prefix
.length() == 0) {
65 const String
& text
= prefix
.str();
67 ASSERT(false, "Expected \", got EOF");
69 ASSERT(false, "Unexpected \"%s\"", text
.c_str());
72 void Tokeniser::skipWhitespace()
74 while (!eof() && isWhitespace(peek())) {
80 static malValuePtr
readAtom(Tokeniser
& tokeniser
);
81 static malValuePtr
readForm(Tokeniser
& tokeniser
);
82 static void readList(Tokeniser
& tokeniser
, malValueVec
* items
,
84 static malValuePtr
processMacro(Tokeniser
& tokeniser
, const String
& symbol
);
86 malValuePtr
readStr(const String
& input
)
88 Tokeniser
tokeniser(input
);
89 if (tokeniser
.eof()) {
90 throw malEmptyInputException();
92 return readForm(tokeniser
);
95 static malValuePtr
readForm(Tokeniser
& tokeniser
)
97 ASSERT(!tokeniser
.eof(), "Expected form, got EOF");
98 String token
= tokeniser
.peek();
100 ASSERT(!std::regex_match(token
, closeRegex
),
101 "Unexpected \"%s\"", token
.c_str());
105 std::unique_ptr
<malValueVec
> items(new malValueVec
);
106 readList(tokeniser
, items
.get(), ")");
107 return mal::list(items
.release());
111 std::unique_ptr
<malValueVec
> items(new malValueVec
);
112 readList(tokeniser
, items
.get(), "]");
113 return mal::vector(items
.release());
117 std::unique_ptr
<malValueVec
> items(new malValueVec
);
118 items
->push_back(mal::symbol("hash-map"));
119 readList(tokeniser
, items
.get(), "}");
120 return mal::list(items
.release());
122 return readAtom(tokeniser
);
125 static malValuePtr
readAtom(Tokeniser
& tokeniser
)
131 ReaderMacro macroTable
[] = {
133 { "`", "quasiquote" },
135 { "~@", "splice-unquote" },
138 const ReaderMacro
* macroTableEnd
= macroTable
+ ARRAY_SIZE(macroTable
);
144 Constant constTable
[] = {
145 { "false", mal::falseValue() },
146 { "nil", mal::nilValue() },
147 { "true", mal::trueValue() },
149 const Constant
* constTableEnd
= constTable
+ ARRAY_SIZE(constTable
);
151 String token
= tokeniser
.next();
152 if (token
[0] == '"') {
153 return mal::string(unescape(token
));
155 if (token
[0] == ':') {
156 return mal::keyword(token
);
159 malValuePtr meta
= readForm(tokeniser
);
160 malValuePtr value
= readForm(tokeniser
);
161 // Note that meta and value switch places
162 return mal::list(mal::symbol("with-meta"), value
, meta
);
164 for (Constant
* it
= constTable
; it
!= constTableEnd
; ++it
) {
165 if (token
== it
->token
) {
169 for (ReaderMacro
*it
= macroTable
; it
< macroTableEnd
; ++it
) {
170 if (token
== it
->token
) {
171 return processMacro(tokeniser
, it
->symbol
);
174 if (std::regex_match(token
, intRegex
)) {
175 return mal::integer(token
);
177 return mal::symbol(token
);
180 static void readList(Tokeniser
& tokeniser
, malValueVec
* items
,
184 ASSERT(!tokeniser
.eof(), "Expected \"%s\", got EOF", end
.c_str());
185 if (tokeniser
.peek() == end
) {
189 items
->push_back(readForm(tokeniser
));
193 static malValuePtr
processMacro(Tokeniser
& tokeniser
, const String
& symbol
)
195 return mal::list(mal::symbol(symbol
), readForm(tokeniser
));