Step 4 of Make-a-Lisp for Erlang
[jackhill/mal.git] / erlang / src / reader.erl
CommitLineData
2ee368dc
NF
1%%%
2%%% Reader
3%%%
4
5-module(reader).
6
7-export([read_str/1]).
8
9-record(reader, {
10 tokens=[], % the input tokens remaining
11 tree % the subtree parsed by a read_* function
12}).
13
14-spec read_str(string()) -> {ok, term()} | {error, term()}.
15read_str(Input) ->
16 case tokenize(Input) of
17 {ok, []} -> {ok, none};
18 {ok, Tokens} ->
19 case read_form(#reader{tokens=Tokens}) of
20 % extract the final result of parsing
21 {ok, Reader} -> {ok, Reader#reader.tree};
22 {error, Reason} -> {error, Reason}
23 end;
24 {error, Reason} -> {error, Reason}
25 end.
26
27-spec read_form(#reader{}) -> {ok, #reader{}} | {error, term()}.
28read_form(Reader) ->
29 Token = peek(Reader),
30 case Token of
31 close_list -> {error, "unexected ')'"};
32 close_vector -> {error, "unexected ']'"};
33 close_map -> {error, "unexected '}'"};
34 open_list -> read_list(Reader);
35 open_vector -> read_vector(Reader);
36 open_map -> read_map(Reader);
37 quote -> read_quoted(Reader, Token);
38 quasiquote -> read_quoted(Reader, Token);
39 unquote -> read_quoted(Reader, Token);
40 'splice-unquote' -> read_quoted(Reader, Token);
41 deref -> read_quoted(Reader, Token);
42 'with-meta' -> read_meta(Reader);
43 _ -> read_atom(Reader)
44 end.
45
46read_list(Reader) ->
47 read_seq(Reader, $), open_list, close_list, list).
48
49read_vector(Reader) ->
50 % Erlang has no array/vector type, so just use list
51 read_seq(Reader, $], open_vector, close_vector, vector).
52
53read_map(Reader) ->
54 case read_seq(Reader, $}, open_map, close_map, map) of
55 {ok, Reader1} ->
56 {map, Map} = Reader1#reader.tree,
57 NewMap = list_to_map(Map),
58 Tokens = Reader1#reader.tokens,
59 {ok, #reader{tokens=Tokens, tree={map, NewMap}}};
60 {error, Reason} -> {error, Reason}
61 end.
62
63read_seq(Reader, CloseChar, OpenDelim, CloseDelim, Type) ->
64 {First, Reader1} = next(Reader),
65 case First of
66 OpenDelim ->
67 case read_seq_tail(Reader1, CloseChar, CloseDelim, []) of
68 {ok, Reader2} ->
69 % prepend our type tag to the result
70 Result = {Type, Reader2#reader.tree},
71 Reader3 = #reader{tokens=Reader2#reader.tokens, tree=Result},
72 {ok, Reader3};
73 {error, Reason} -> {error, Reason}
74 end;
75 Bogey -> {error, io_lib:format("error in read_seq, expected ~p but got ~p",
76 [OpenDelim, Bogey])}
77 end.
78
79read_seq_tail(Reader, CloseChar, CloseDelim, AccIn) ->
80 Token = peek(Reader),
81 case Token of
82 [] -> {error, io_lib:format("expected '~c', got EOF", [CloseChar])};
83 CloseDelim ->
84 {_Token, Reader1} = next(Reader),
85 Reader2 = #reader{tokens=Reader1#reader.tokens, tree=lists:reverse(AccIn)},
86 {ok, Reader2};
87 _ ->
88 case read_form(Reader) of
89 {ok, Reader3} ->
90 read_seq_tail(Reader3, CloseChar, CloseDelim, [Reader3#reader.tree|AccIn]);
91 {error, Reason} -> {error, Reason}
92 end
93 end.
94
95% Convert a list of key/value pairs into a map. The elements are not
96% tuples; the keys are the even numbered members, and the values are the
97% odd numbered members. Fails if list has an odd number of members.
98list_to_map(L) ->
99 list_to_map(L, #{}).
100
101list_to_map([], AccIn) ->
102 AccIn;
583a62df 103list_to_map([_H], _AccIn) ->
2ee368dc
NF
104 {error, "odd number of hash-map keys/values"};
105list_to_map([K,V|T], AccIn) ->
106 list_to_map(T, maps:put(K, V, AccIn)).
107
108% Convert syntactic sugar into normalized form (e.g. ` => (quasiquote)).
109read_quoted(Reader, Token) ->
110 % discard the quoted token
111 {_T, Reader1} = next(Reader),
112 case read_form(Reader1) of
113 {ok, Reader2} ->
114 Result = {list, [{symbol, atom_to_list(Token)}, Reader2#reader.tree]},
115 {ok, #reader{tokens=Reader2#reader.tokens, tree=Result}};
116 {error, Reason} -> {error, Reason}
117 end.
118
119read_meta(Reader) ->
120 % discard the meta token
121 {_T, Reader1} = next(Reader),
122 case read_form(Reader1) of
123 {ok, Reader2} ->
124 M = Reader2#reader.tree,
125 case read_form(Reader2) of
126 {ok, Reader3} ->
127 X = Reader3#reader.tree,
a61ea75a 128 Result = {list, [{symbol, "with-meta"}, X, M]},
2ee368dc
NF
129 {ok, #reader{tokens=Reader3#reader.tokens, tree=Result}};
130 {error, Reason} -> {error, Reason}
131 end;
132 {error, Reason} -> {error, Reason}
133 end.
134
135read_atom(Reader) ->
136 {Token, Reader1} = next(Reader),
137 Result = case Token of
138 {integer, Value} -> {integer, list_to_integer(Value)};
139 {string, String} -> {string, String};
140 {keyword, Keyword} -> {keyword, Keyword};
141 {symbol, Symbol} ->
142 case Symbol of
143 "true" -> true;
144 "false" -> false;
145 "nil" -> nil;
146 _ -> {symbol, Symbol}
147 end
148 end,
149 {ok, #reader{tokens=Reader1#reader.tokens, tree=Result}}.
150
151peek(Reader) ->
152 case Reader#reader.tokens of
153 [] -> [];
154 [H|_T] -> H
155 end.
156
157next(Reader) ->
158 [H|NewTokens] = Reader#reader.tokens,
159 {H, #reader{tokens=NewTokens}}.
160
161-spec tokenize(string()) -> {ok, [term()]} | {error, term()}.
162tokenize(Input) ->
163 tokenize(Input, []).
164
165-spec tokenize(string(), [term()]) -> {ok, [term()]} | {error, term()}.
166tokenize(Input, Tokens) ->
167 case lex_single(Input) of
168 eof -> {ok, lists:reverse(Tokens)};
169 {error, Reason} -> {error, Reason};
170 {ignored, Rest} -> tokenize(Rest, Tokens);
171 {Token, Rest} -> tokenize(Rest, [Token|Tokens])
172 end.
173
174lex_single([]) ->
175 eof;
176lex_single([Char|Rest]) ->
177 case Char of
178 $( -> {open_list, Rest};
179 $) -> {close_list, Rest};
180 $[ -> {open_vector, Rest};
181 $] -> {close_vector, Rest};
182 ${ -> {open_map, Rest};
183 $} -> {close_map, Rest};
184 $" -> lex_string(Rest, []);
185 $; -> lex_comment(Rest);
186 $: -> lex_symbol(Rest, keyword);
187 $' -> {quote, Rest};
188 $` -> {quasiquote, Rest};
189 $~ -> lex_unquote(Rest);
190 $@ -> {deref, Rest};
191 $^ -> {'with-meta', Rest};
192 N when N >= $0, N =< $9 -> lex_number(Rest, [Char]);
193 S when S == 32; S == $,; S == $\r; S == $\n; S == $\t -> lex_spaces(Rest);
194 $\\ -> {error, io_lib:format("bare escape literal ~c~c", [Char, hd(Rest)])};
195 $. -> {error, "bare dot (.) not supported"};
196 _ -> lex_symbol([Char|Rest], symbol)
197 end.
198
199lex_comment([]) ->
200 {ignored, []};
201lex_comment([C|Rest]) when C == $\r; C == $\n ->
202 {ignored, Rest};
203lex_comment([_C|Rest]) ->
204 lex_comment(Rest).
205
206lex_spaces([C|Rest]) when C == 32; C == $,; C == $\r; C == $\n; C == $\t ->
207 lex_spaces(Rest);
208lex_spaces(Rest) ->
209 {ignored, Rest}.
210
211lex_string([], _String) ->
212 {error, "expected '\"', got EOF"};
213lex_string([$\\,Escaped|Rest], String) ->
214 % unescape the string while building it
215 case Escaped of
216 [] -> {error, "end of string reached in escape"};
2ee368dc
NF
217 _ -> lex_string(Rest, [Escaped|String])
218 end;
219lex_string([$"|Rest], String) ->
220 {{string, lists:reverse(String)}, Rest};
221lex_string([C|Rest], String) ->
222 lex_string(Rest, [C|String]).
223
224lex_number([N|Rest], Number) when N >= $0, N =< $9 ->
225 lex_number(Rest, [N|Number]);
226lex_number(Rest, Number) ->
227 {{integer, lists:reverse(Number)}, Rest}.
228
229% Lex the remainder of either a keyword or a symbol. The Type is used as
230% the tag for the returned tuple (e.g. the atoms keyword or symbol).
231lex_symbol(Input, Type) ->
232 IsSymbol = fun(C) ->
233 is_letter(C) orelse is_digit(C) orelse is_symbol(C)
234 end,
235 Symbol = lists:takewhile(IsSymbol, Input),
236 case Symbol of
237 [] -> {error, io_lib:format("invalid symbol: ~10s", [Input])};
238 _ -> {{Type, Symbol}, lists:sublist(Input, length(Symbol) + 1, length(Input))}
239 end.
240
241is_digit(C) ->
242 C >= $0 andalso C =< $9.
243
244is_letter(C) ->
245 C >= $a andalso C =< $z orelse C >= $A andalso C =< $Z.
246
247is_symbol(C) ->
248 lists:member(C, "!#$%&*+-/:<=>?@^_|\~").
249
250lex_unquote([$@|Rest]) ->
251 {'splice-unquote', Rest};
252lex_unquote(Rest) ->
253 {unquote, Rest}.