Commit | Line | Data |
---|---|---|
673ee61c CM |
1 | with Ada.IO_Exceptions; |
2 | with Ada.Characters.Latin_1; | |
7895cb30 | 3 | with Ada.Exceptions; |
673ee61c CM |
4 | with Ada.Strings.Maps.Constants; |
5 | with Ada.Strings.Unbounded; | |
6 | with Ada.Text_IO; | |
acb927d4 | 7 | with Smart_Pointers; |
705f3d2c | 8 | with Types.Vector; |
874db2ac | 9 | with Types.Hash_Map; |
673ee61c CM |
10 | |
11 | package body Reader is | |
12 | ||
1d7d7515 CM |
13 | use Types; |
14 | ||
6a6d21b8 CM |
15 | package ACL renames Ada.Characters.Latin_1; |
16 | ||
ee927d07 CM |
17 | type Lexemes is (Ignored_Tok, |
18 | Start_List_Tok, Start_Vector_Tok, Start_Hash_Tok, | |
19 | Meta_Tok, Deref_Tok, | |
20 | Quote_Tok, Quasi_Quote_Tok, Splice_Unq_Tok, Unquote_Tok, | |
21 | Int_Tok, Float_Tok, | |
51fa7633 | 22 | Str_Tok, Sym_Tok); |
673ee61c | 23 | |
ee927d07 CM |
24 | type Token (ID : Lexemes := Ignored_Tok) is record |
25 | case ID is | |
26 | when Int_Tok => | |
27 | Int_Val : Mal_Integer; | |
28 | when Float_Tok => | |
29 | Float_Val : Mal_Float; | |
51fa7633 | 30 | when Str_Tok | Sym_Tok => |
3428be48 | 31 | Start_Char, Stop_Char : Natural; |
ee927d07 CM |
32 | when others => null; |
33 | end case; | |
34 | end record; | |
35 | ||
673ee61c | 36 | Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set := |
936693cf CM |
37 | Ada.Strings.Maps.To_Set |
38 | (ACL.HT & ACL.LF & ACL.CR & ACL.Space & ACL.Comma); | |
673ee61c | 39 | |
9e76408b CM |
40 | -- [^\s\[\]{}('"`,;)] |
41 | Terminator_Syms : Ada.Strings.Maps.Character_Set := | |
42 | Ada.Strings.Maps."or" | |
43 | (Lisp_Whitespace, | |
44 | Ada.Strings.Maps.To_Set ("[]{}('""`,;)")); | |
673ee61c | 45 | |
7895cb30 CM |
46 | -- The unterminated string error |
47 | String_Error : exception; | |
48 | ||
acb927d4 | 49 | |
a974463a CM |
50 | function Convert_String (S : String) return String is |
51 | use Ada.Strings.Unbounded; | |
52 | Res : Unbounded_String; | |
53 | I : Positive; | |
54 | Str_Last : Natural; | |
55 | begin | |
56 | Str_Last := S'Last; | |
57 | I := S'First; | |
58 | while I <= Str_Last loop | |
59 | if S (I) = '\' then | |
60 | if I+1 > Str_Last then | |
61 | Append (Res, S (I)); | |
62 | I := I + 1; | |
63 | elsif S (I+1) = 'n' then | |
64 | Append (Res, Ada.Characters.Latin_1.LF); | |
65 | I := I + 2; | |
66 | elsif S (I+1) = '"' then | |
67 | Append (Res, S (I+1)); | |
68 | I := I + 2; | |
69 | elsif S (I+1) = '\' then | |
70 | Append (Res, S (I+1)); | |
71 | I := I + 2; | |
72 | else | |
563eba42 NB |
73 | Append (Res, S (I .. I+1)); |
74 | I := I + 2; | |
a974463a CM |
75 | end if; |
76 | else | |
77 | Append (Res, S (I)); | |
78 | I := I + 1; | |
79 | end if; | |
80 | end loop; | |
81 | return To_String (Res); | |
82 | end Convert_String; | |
673ee61c | 83 | |
3428be48 CM |
84 | Str_Len : Natural := 0; |
85 | Saved_Line : Ada.Strings.Unbounded.Unbounded_String; | |
86 | Char_To_Read : Natural := 1; | |
7895cb30 | 87 | |
ee927d07 CM |
88 | function Get_Token return Token is |
89 | Res : Token; | |
3428be48 CM |
90 | I, J : Natural; |
91 | use Ada.Strings.Unbounded; | |
673ee61c | 92 | begin |
2ef4808f CM |
93 | |
94 | <<Tail_Call_Opt>> | |
ee927d07 CM |
95 | |
96 | -- Skip over whitespace... | |
9e76408b CM |
97 | I := Char_To_Read; |
98 | while I <= Str_Len and then | |
3428be48 | 99 | Ada.Strings.Maps.Is_In (Element (Saved_Line, I), Lisp_Whitespace) loop |
9e76408b CM |
100 | I := I + 1; |
101 | end loop; | |
7895cb30 | 102 | |
9e76408b CM |
103 | -- Filter out lines consisting of only whitespace |
104 | if I > Str_Len then | |
ee927d07 | 105 | return (ID => Ignored_Tok); |
9e76408b | 106 | end if; |
7895cb30 | 107 | |
9e76408b | 108 | J := I; |
ee927d07 | 109 | |
3428be48 | 110 | case Element (Saved_Line, J) is |
ee927d07 CM |
111 | |
112 | when ''' => Res := (ID => Quote_Tok); Char_To_Read := J+1; | |
113 | ||
114 | when '`' => Res := (ID => Quasi_Quote_Tok); Char_To_Read := J+1; | |
115 | ||
116 | when '~' => -- Tilde | |
117 | ||
3428be48 | 118 | if J+1 <= Str_Len and then Element (Saved_Line, J+1) = '@' then |
ee927d07 | 119 | Res := (ID => Splice_Unq_Tok); |
9e76408b CM |
120 | Char_To_Read := J+2; |
121 | else | |
ee927d07 CM |
122 | -- Just a Tilde |
123 | Res := (ID => Unquote_Tok); | |
9e76408b CM |
124 | Char_To_Read := J+1; |
125 | end if; | |
ee927d07 CM |
126 | |
127 | when '(' => Res := (ID => Start_List_Tok); Char_To_Read := J+1; | |
128 | when '[' => Res := (ID => Start_Vector_Tok); Char_To_Read := J+1; | |
129 | when '{' => Res := (ID => Start_Hash_Tok); Char_To_Read := J+1; | |
130 | ||
131 | when '^' => Res := (ID => Meta_Tok); Char_To_Read := J+1; | |
132 | when '@' => Res := (ID => Deref_Tok); Char_To_Read := J+1; | |
133 | ||
134 | when ']' | '}' | ')' => | |
9e76408b | 135 | |
51fa7633 | 136 | Res := (ID => Sym_Tok, Start_Char => J, Stop_Char => J); |
9e76408b CM |
137 | Char_To_Read := J+1; |
138 | ||
139 | when '"' => -- a string | |
140 | ||
141 | -- Skip over " | |
142 | J := J + 1; | |
143 | while J <= Str_Len and then | |
3428be48 CM |
144 | (Element (Saved_Line, J) /= '"' or else |
145 | Element (Saved_Line, J-1) = '\') loop | |
9e76408b CM |
146 | J := J + 1; |
147 | end loop; | |
7895cb30 | 148 | |
9e76408b CM |
149 | -- So we either ran out of string.. |
150 | if J > Str_Len then | |
7895cb30 | 151 | raise String_Error; |
9e76408b CM |
152 | end if; |
153 | ||
154 | -- or we reached an unescaped " | |
ee927d07 | 155 | Res := (ID => Str_Tok, Start_Char => I, Stop_Char => J); |
9e76408b CM |
156 | Char_To_Read := J + 1; |
157 | ||
158 | when ';' => -- a comment | |
159 | ||
4356e17f CM |
160 | -- Read to the end of the line or until |
161 | -- the saved_line string is exhausted. | |
162 | -- NB if we reach the end we don't care | |
163 | -- what the last char was. | |
3428be48 | 164 | while J < Str_Len and Element (Saved_Line, J) /= ACL.LF loop |
9e76408b CM |
165 | J := J + 1; |
166 | end loop; | |
4356e17f | 167 | if J = Str_Len then |
ee927d07 | 168 | Res := (ID => Ignored_Tok); |
4356e17f CM |
169 | else |
170 | Char_To_Read := J + 1; | |
2ef4808f CM |
171 | -- was: Res := Get_Token; |
172 | goto Tail_Call_Opt; | |
4356e17f | 173 | end if; |
9e76408b CM |
174 | |
175 | when others => -- an atom | |
176 | ||
177 | while J <= Str_Len and then | |
3428be48 | 178 | not Ada.Strings.Maps.Is_In (Element (Saved_Line, J), Terminator_Syms) loop |
9e76408b CM |
179 | J := J + 1; |
180 | end loop; | |
181 | ||
182 | -- Either we ran out of string or | |
183 | -- the one at J was the start of a new token | |
184 | Char_To_Read := J; | |
185 | J := J - 1; | |
186 | ||
ee927d07 CM |
187 | declare |
188 | Dots : Natural; | |
189 | All_Digits : Boolean; | |
190 | begin | |
191 | -- check if all digits or . | |
192 | Dots := 0; | |
193 | All_Digits := True; | |
194 | for K in I .. J loop | |
c2df4f13 CM |
195 | if (K = I and K /= J) and then Element (Saved_Line, K) = '-' then |
196 | null; | |
197 | elsif Element (Saved_Line, K) = '.' then | |
ee927d07 | 198 | Dots := Dots + 1; |
3428be48 | 199 | elsif not (Element (Saved_Line, K) in '0' .. '9') then |
ee927d07 CM |
200 | All_Digits := False; |
201 | exit; | |
202 | end if; | |
203 | end loop; | |
204 | ||
205 | if All_Digits then | |
206 | if Dots = 0 then | |
207 | Res := | |
208 | (ID => Int_Tok, | |
3428be48 | 209 | Int_Val => Mal_Integer'Value (Slice (Saved_Line, I, J))); |
ee927d07 CM |
210 | elsif Dots = 1 then |
211 | Res := | |
212 | (ID => Float_Tok, | |
3428be48 | 213 | Float_Val => Mal_Float'Value (Slice (Saved_Line, I, J))); |
ee927d07 | 214 | else |
51fa7633 | 215 | Res := (ID => Sym_Tok, Start_Char => I, Stop_Char => J); |
ee927d07 | 216 | end if; |
9e76408b | 217 | else |
51fa7633 | 218 | Res := (ID => Sym_Tok, Start_Char => I, Stop_Char => J); |
9e76408b | 219 | end if; |
ee927d07 CM |
220 | |
221 | end; | |
9e76408b CM |
222 | |
223 | end case; | |
224 | ||
225 | return Res; | |
7895cb30 | 226 | |
673ee61c CM |
227 | end Get_Token; |
228 | ||
229 | ||
e808554d | 230 | function Read_List (LT : Types.List_Types) |
fbad73cb | 231 | return Types.Mal_Handle is |
acb927d4 | 232 | |
0844c154 | 233 | MTA : Mal_Handle; |
acb927d4 | 234 | |
673ee61c | 235 | begin |
acb927d4 | 236 | |
13ce1681 CM |
237 | MTA := Read_Form; |
238 | ||
18e21187 CM |
239 | declare |
240 | List_SP : Mal_Handle; | |
241 | List_P : List_Class_Ptr; | |
242 | Close : String (1..1) := (1 => Types.Closing (LT)); | |
243 | begin | |
13ce1681 | 244 | |
18e21187 CM |
245 | case LT is |
246 | when List_List => List_SP := New_List_Mal_Type (List_Type => LT); | |
247 | when Vector_List => List_SP := Vector.New_Vector_Mal_Type; | |
248 | when Hashed_List => List_SP := Hash_Map.New_Hash_Map_Mal_Type; | |
249 | end case; | |
673ee61c | 250 | |
18e21187 CM |
251 | -- Need to append to a variable so... |
252 | List_P := Deref_List_Class (List_SP); | |
673ee61c | 253 | |
18e21187 | 254 | loop |
02840f75 | 255 | |
18e21187 | 256 | if Is_Null (MTA) then |
2adfa11c | 257 | return New_Error_Mal_Type (Str => "expected '" & Close & "', got EOF"); |
18e21187 | 258 | end if; |
02840f75 | 259 | |
18e21187 CM |
260 | exit when Deref (MTA).Sym_Type = Sym and then |
261 | Symbol_Mal_Type (Deref (MTA).all).Get_Sym = Close; | |
02840f75 | 262 | |
18e21187 CM |
263 | Append (List_P.all, MTA); |
264 | ||
265 | MTA := Read_Form; | |
266 | ||
267 | end loop; | |
268 | ||
269 | return List_SP; | |
270 | ||
271 | end; | |
272 | ||
273 | end Read_List; | |
ee927d07 CM |
274 | |
275 | ||
fbad73cb | 276 | function Read_Form return Types.Mal_Handle is |
ee927d07 | 277 | Tok : Token; |
fbad73cb | 278 | MTS : Mal_Handle; |
3428be48 | 279 | use Ada.Strings.Unbounded; |
673ee61c | 280 | begin |
6a6d21b8 | 281 | |
ee927d07 | 282 | Tok := Get_Token; |
6a6d21b8 | 283 | |
ee927d07 | 284 | case Tok.ID is |
6a6d21b8 | 285 | |
ee927d07 | 286 | when Ignored_Tok => return Smart_Pointers.Null_Smart_Pointer; |
9cbc9695 | 287 | |
ee927d07 | 288 | when Int_Tok => return New_Int_Mal_Type (Tok.Int_Val); |
38d0c57f | 289 | |
ee927d07 | 290 | when Float_Tok => return New_Float_Mal_Type (Tok.Float_Val); |
38d0c57f | 291 | |
ee927d07 | 292 | when Start_List_Tok => return Read_List (List_List); |
38d0c57f | 293 | |
ee927d07 CM |
294 | when Start_Vector_Tok => return Read_List (Vector_List); |
295 | ||
296 | when Start_Hash_Tok => return Read_List (Hashed_List); | |
38d0c57f | 297 | |
ee927d07 | 298 | when Meta_Tok => |
02840f75 | 299 | |
ee927d07 CM |
300 | declare |
301 | Meta, Obj : Mal_Handle; | |
302 | begin | |
303 | Meta := Read_Form; | |
304 | Obj := Read_Form; | |
18e21187 CM |
305 | return Make_New_List |
306 | ((1 => New_Symbol_Mal_Type ("with-meta"), | |
307 | 2 => Obj, | |
308 | 3 => Meta)); | |
ee927d07 | 309 | end; |
38d0c57f | 310 | |
18e21187 CM |
311 | when Deref_Tok => |
312 | ||
313 | return Make_New_List | |
314 | ((1 => New_Symbol_Mal_Type ("deref"), | |
315 | 2 => Read_Form)); | |
316 | ||
317 | when Quote_Tok => | |
318 | ||
319 | return Make_New_List | |
320 | ((1 => New_Symbol_Mal_Type ("quote"), | |
321 | 2 => Read_Form)); | |
322 | ||
323 | when Quasi_Quote_Tok => | |
324 | ||
325 | return Make_New_List | |
326 | ((1 => New_Symbol_Mal_Type ("quasiquote"), | |
327 | 2 => Read_Form)); | |
6a6d21b8 | 328 | |
18e21187 | 329 | when Splice_Unq_Tok => |
7895cb30 | 330 | |
18e21187 CM |
331 | return Make_New_List |
332 | ((1 => New_Symbol_Mal_Type ("splice-unquote"), | |
333 | 2 => Read_Form)); | |
7895cb30 | 334 | |
18e21187 | 335 | when Unquote_Tok => |
ee927d07 | 336 | |
18e21187 CM |
337 | return Make_New_List |
338 | ((1 => New_Symbol_Mal_Type ("unquote"), | |
339 | 2 => Read_Form)); | |
ee927d07 CM |
340 | |
341 | when Str_Tok => | |
342 | ||
564a4525 CM |
343 | -- +/-1 strips out the double quotes. |
344 | -- Convert_String converts backquoted charaters to raw format. | |
ee927d07 | 345 | return New_String_Mal_Type |
564a4525 CM |
346 | (Convert_String |
347 | (Slice (Saved_Line, Tok.Start_Char + 1, Tok.Stop_Char - 1))); | |
ee927d07 | 348 | |
51fa7633 | 349 | when Sym_Tok => |
ee927d07 | 350 | |
91285859 CM |
351 | -- Mal interpreter is required to know about true, false and nil. |
352 | declare | |
353 | S : String := Slice (Saved_Line, Tok.Start_Char, Tok.Stop_Char); | |
354 | begin | |
355 | if S = "true" then | |
356 | return New_Bool_Mal_Type (True); | |
357 | elsif S = "false" then | |
358 | return New_Bool_Mal_Type (False); | |
8083b525 CM |
359 | elsif S = "nil" then |
360 | return New_Nil_Mal_Type; | |
91285859 CM |
361 | else |
362 | return New_Symbol_Mal_Type (S); | |
363 | end if; | |
364 | end; | |
ee927d07 CM |
365 | |
366 | end case; | |
6a6d21b8 | 367 | |
673ee61c CM |
368 | end Read_Form; |
369 | ||
ee927d07 | 370 | |
8c49f5a7 CM |
371 | procedure Lex_Init (S : String) is |
372 | begin | |
9e76408b | 373 | Str_Len := S'Length; |
3428be48 | 374 | Saved_Line := Ada.Strings.Unbounded.To_Unbounded_String (S); |
9e76408b | 375 | Char_To_Read := 1; |
8c49f5a7 | 376 | end Lex_Init; |
673ee61c | 377 | |
ee927d07 | 378 | |
fbad73cb | 379 | function Read_Str (S : String) return Types.Mal_Handle is |
d32be19b | 380 | I, Str_Len : Natural := S'Length; |
673ee61c | 381 | begin |
8c49f5a7 CM |
382 | |
383 | Lex_Init (S); | |
384 | ||
673ee61c | 385 | return Read_Form; |
9e76408b | 386 | |
1d7d7515 CM |
387 | exception |
388 | when String_Error => | |
2adfa11c | 389 | return New_Error_Mal_Type (Str => "expected '""', got EOF"); |
673ee61c CM |
390 | end Read_Str; |
391 | ||
392 | ||
393 | end Reader; |