Merge pull request #191 from zmower/ada
[jackhill/mal.git] / ada / reader.adb
1 with Ada.IO_Exceptions;
2 with Ada.Characters.Latin_1;
3 with Ada.Exceptions;
4 with Ada.Strings.Maps.Constants;
5 with Ada.Strings.Unbounded;
6 with Ada.Text_IO;
7 with Smart_Pointers;
8 with Types.Vector;
9 with Types.Hash_Map;
10
11 package body Reader is
12
13 use Types;
14
15 package ACL renames Ada.Characters.Latin_1;
16
17 type Lexemes is (Ignored_Tok,
18 Start_List_Tok, Start_Vector_Tok, Start_Hash_Tok,
19 Meta_Tok, Deref_Tok,
20 Quote_Tok, Quasi_Quote_Tok, Splice_Unq_Tok, Unquote_Tok,
21 Int_Tok, Float_Tok,
22 Str_Tok, Sym_Tok);
23
24 type Token (ID : Lexemes := Ignored_Tok) is record
25 case ID is
26 when Int_Tok =>
27 Int_Val : Mal_Integer;
28 when Float_Tok =>
29 Float_Val : Mal_Float;
30 when Str_Tok | Sym_Tok =>
31 Start_Char, Stop_Char : Natural;
32 when others => null;
33 end case;
34 end record;
35
36 Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set :=
37 Ada.Strings.Maps.To_Set
38 (ACL.HT & ACL.LF & ACL.CR & ACL.Space & ACL.Comma);
39
40 -- [^\s\[\]{}('"`,;)]
41 Terminator_Syms : Ada.Strings.Maps.Character_Set :=
42 Ada.Strings.Maps."or"
43 (Lisp_Whitespace,
44 Ada.Strings.Maps.To_Set ("[]{}('""`,;)"));
45
46 -- The unterminated string error
47 String_Error : exception;
48
49
50 function Convert_String (S : String) return String is
51 use Ada.Strings.Unbounded;
52 Res : Unbounded_String;
53 I : Positive;
54 Str_Last : Natural;
55 begin
56 Str_Last := S'Last;
57 I := S'First;
58 while I <= Str_Last loop
59 if S (I) = '\' then
60 if I+1 > Str_Last then
61 Append (Res, S (I));
62 I := I + 1;
63 elsif S (I+1) = 'n' then
64 Append (Res, Ada.Characters.Latin_1.LF);
65 I := I + 2;
66 elsif S (I+1) = '"' then
67 Append (Res, S (I+1));
68 I := I + 2;
69 elsif S (I+1) = '\' then
70 Append (Res, S (I+1));
71 I := I + 2;
72 else
73 Append (Res, S (I));
74 I := I + 1;
75 end if;
76 else
77 Append (Res, S (I));
78 I := I + 1;
79 end if;
80 end loop;
81 return To_String (Res);
82 end Convert_String;
83
84 Str_Len : Natural := 0;
85 Saved_Line : Ada.Strings.Unbounded.Unbounded_String;
86 Char_To_Read : Natural := 1;
87
88 function Get_Token return Token is
89 Res : Token;
90 I, J : Natural;
91 use Ada.Strings.Unbounded;
92 begin
93
94 <<Tail_Call_Opt>>
95
96 -- Skip over whitespace...
97 I := Char_To_Read;
98 while I <= Str_Len and then
99 Ada.Strings.Maps.Is_In (Element (Saved_Line, I), Lisp_Whitespace) loop
100 I := I + 1;
101 end loop;
102
103 -- Filter out lines consisting of only whitespace
104 if I > Str_Len then
105 return (ID => Ignored_Tok);
106 end if;
107
108 J := I;
109
110 case Element (Saved_Line, J) is
111
112 when ''' => Res := (ID => Quote_Tok); Char_To_Read := J+1;
113
114 when '`' => Res := (ID => Quasi_Quote_Tok); Char_To_Read := J+1;
115
116 when '~' => -- Tilde
117
118 if J+1 <= Str_Len and then Element (Saved_Line, J+1) = '@' then
119 Res := (ID => Splice_Unq_Tok);
120 Char_To_Read := J+2;
121 else
122 -- Just a Tilde
123 Res := (ID => Unquote_Tok);
124 Char_To_Read := J+1;
125 end if;
126
127 when '(' => Res := (ID => Start_List_Tok); Char_To_Read := J+1;
128 when '[' => Res := (ID => Start_Vector_Tok); Char_To_Read := J+1;
129 when '{' => Res := (ID => Start_Hash_Tok); Char_To_Read := J+1;
130
131 when '^' => Res := (ID => Meta_Tok); Char_To_Read := J+1;
132 when '@' => Res := (ID => Deref_Tok); Char_To_Read := J+1;
133
134 when ']' | '}' | ')' =>
135
136 Res := (ID => Sym_Tok, Start_Char => J, Stop_Char => J);
137 Char_To_Read := J+1;
138
139 when '"' => -- a string
140
141 -- Skip over "
142 J := J + 1;
143 while J <= Str_Len and then
144 (Element (Saved_Line, J) /= '"' or else
145 Element (Saved_Line, J-1) = '\') loop
146 J := J + 1;
147 end loop;
148
149 -- So we either ran out of string..
150 if J > Str_Len then
151 raise String_Error;
152 end if;
153
154 -- or we reached an unescaped "
155 Res := (ID => Str_Tok, Start_Char => I, Stop_Char => J);
156 Char_To_Read := J + 1;
157
158 when ';' => -- a comment
159
160 -- Read to the end of the line or until
161 -- the saved_line string is exhausted.
162 -- NB if we reach the end we don't care
163 -- what the last char was.
164 while J < Str_Len and Element (Saved_Line, J) /= ACL.LF loop
165 J := J + 1;
166 end loop;
167 if J = Str_Len then
168 Res := (ID => Ignored_Tok);
169 else
170 Char_To_Read := J + 1;
171 -- was: Res := Get_Token;
172 goto Tail_Call_Opt;
173 end if;
174
175 when others => -- an atom
176
177 while J <= Str_Len and then
178 not Ada.Strings.Maps.Is_In (Element (Saved_Line, J), Terminator_Syms) loop
179 J := J + 1;
180 end loop;
181
182 -- Either we ran out of string or
183 -- the one at J was the start of a new token
184 Char_To_Read := J;
185 J := J - 1;
186
187 declare
188 Dots : Natural;
189 All_Digits : Boolean;
190 begin
191 -- check if all digits or .
192 Dots := 0;
193 All_Digits := True;
194 for K in I .. J loop
195 if (K = I and K /= J) and then Element (Saved_Line, K) = '-' then
196 null;
197 elsif Element (Saved_Line, K) = '.' then
198 Dots := Dots + 1;
199 elsif not (Element (Saved_Line, K) in '0' .. '9') then
200 All_Digits := False;
201 exit;
202 end if;
203 end loop;
204
205 if All_Digits then
206 if Dots = 0 then
207 Res :=
208 (ID => Int_Tok,
209 Int_Val => Mal_Integer'Value (Slice (Saved_Line, I, J)));
210 elsif Dots = 1 then
211 Res :=
212 (ID => Float_Tok,
213 Float_Val => Mal_Float'Value (Slice (Saved_Line, I, J)));
214 else
215 Res := (ID => Sym_Tok, Start_Char => I, Stop_Char => J);
216 end if;
217 else
218 Res := (ID => Sym_Tok, Start_Char => I, Stop_Char => J);
219 end if;
220
221 end;
222
223 end case;
224
225 return Res;
226
227 end Get_Token;
228
229
230 function Read_List (LT : Types.List_Types)
231 return Types.Mal_Handle is
232
233 MTA : Mal_Handle;
234
235 begin
236
237 MTA := Read_Form;
238
239 declare
240 List_SP : Mal_Handle;
241 List_P : List_Class_Ptr;
242 Close : String (1..1) := (1 => Types.Closing (LT));
243 begin
244
245 case LT is
246 when List_List => List_SP := New_List_Mal_Type (List_Type => LT);
247 when Vector_List => List_SP := Vector.New_Vector_Mal_Type;
248 when Hashed_List => List_SP := Hash_Map.New_Hash_Map_Mal_Type;
249 end case;
250
251 -- Need to append to a variable so...
252 List_P := Deref_List_Class (List_SP);
253
254 loop
255
256 if Is_Null (MTA) then
257 return New_Error_Mal_Type (Str => "expected '" & Close & "'");
258 end if;
259
260 exit when Deref (MTA).Sym_Type = Sym and then
261 Symbol_Mal_Type (Deref (MTA).all).Get_Sym = Close;
262
263 Append (List_P.all, MTA);
264
265 MTA := Read_Form;
266
267 end loop;
268
269 return List_SP;
270
271 end;
272
273 end Read_List;
274
275
276 function Read_Form return Types.Mal_Handle is
277 Tok : Token;
278 MTS : Mal_Handle;
279 use Ada.Strings.Unbounded;
280 begin
281
282 Tok := Get_Token;
283
284 case Tok.ID is
285
286 when Ignored_Tok => return Smart_Pointers.Null_Smart_Pointer;
287
288 when Int_Tok => return New_Int_Mal_Type (Tok.Int_Val);
289
290 when Float_Tok => return New_Float_Mal_Type (Tok.Float_Val);
291
292 when Start_List_Tok => return Read_List (List_List);
293
294 when Start_Vector_Tok => return Read_List (Vector_List);
295
296 when Start_Hash_Tok => return Read_List (Hashed_List);
297
298 when Meta_Tok =>
299
300 declare
301 Meta, Obj : Mal_Handle;
302 begin
303 Meta := Read_Form;
304 Obj := Read_Form;
305 return Make_New_List
306 ((1 => New_Symbol_Mal_Type ("with-meta"),
307 2 => Obj,
308 3 => Meta));
309 end;
310
311 when Deref_Tok =>
312
313 return Make_New_List
314 ((1 => New_Symbol_Mal_Type ("deref"),
315 2 => Read_Form));
316
317 when Quote_Tok =>
318
319 return Make_New_List
320 ((1 => New_Symbol_Mal_Type ("quote"),
321 2 => Read_Form));
322
323 when Quasi_Quote_Tok =>
324
325 return Make_New_List
326 ((1 => New_Symbol_Mal_Type ("quasiquote"),
327 2 => Read_Form));
328
329 when Splice_Unq_Tok =>
330
331 return Make_New_List
332 ((1 => New_Symbol_Mal_Type ("splice-unquote"),
333 2 => Read_Form));
334
335 when Unquote_Tok =>
336
337 return Make_New_List
338 ((1 => New_Symbol_Mal_Type ("unquote"),
339 2 => Read_Form));
340
341 when Str_Tok =>
342
343 -- +/-1 strips out the double quotes.
344 -- Convert_String converts backquoted charaters to raw format.
345 return New_String_Mal_Type
346 (Convert_String
347 (Slice (Saved_Line, Tok.Start_Char + 1, Tok.Stop_Char - 1)));
348
349 when Sym_Tok =>
350
351 -- Mal interpreter is required to know about true, false and nil.
352 declare
353 S : String := Slice (Saved_Line, Tok.Start_Char, Tok.Stop_Char);
354 begin
355 if S = "true" then
356 return New_Bool_Mal_Type (True);
357 elsif S = "false" then
358 return New_Bool_Mal_Type (False);
359 elsif S = "nil" then
360 return New_Nil_Mal_Type;
361 else
362 return New_Symbol_Mal_Type (S);
363 end if;
364 end;
365
366 end case;
367
368 end Read_Form;
369
370
371 procedure Lex_Init (S : String) is
372 begin
373 Str_Len := S'Length;
374 Saved_Line := Ada.Strings.Unbounded.To_Unbounded_String (S);
375 Char_To_Read := 1;
376 end Lex_Init;
377
378
379 function Read_Str (S : String) return Types.Mal_Handle is
380 I, Str_Len : Natural := S'Length;
381 begin
382
383 Lex_Init (S);
384
385 return Read_Form;
386
387 exception
388 when String_Error =>
389 return New_Error_Mal_Type (Str => "expected '""'");
390 end Read_Str;
391
392
393 end Reader;