Commit | Line | Data |
---|---|---|
673ee61c CM |
1 | with Ada.IO_Exceptions; |
2 | with Ada.Characters.Latin_1; | |
7895cb30 | 3 | with Ada.Exceptions; |
673ee61c CM |
4 | with Ada.Strings.Maps.Constants; |
5 | with Ada.Strings.Unbounded; | |
6 | with Ada.Text_IO; | |
acb927d4 | 7 | with Smart_Pointers; |
705f3d2c | 8 | with Types.Vector; |
874db2ac | 9 | with Types.Hash_Map; |
673ee61c CM |
10 | |
11 | package body Reader is | |
12 | ||
1d7d7515 CM |
13 | use Types; |
14 | ||
6a6d21b8 CM |
15 | package ACL renames Ada.Characters.Latin_1; |
16 | ||
ee927d07 CM |
17 | type Lexemes is (Ignored_Tok, |
18 | Start_List_Tok, Start_Vector_Tok, Start_Hash_Tok, | |
19 | Meta_Tok, Deref_Tok, | |
20 | Quote_Tok, Quasi_Quote_Tok, Splice_Unq_Tok, Unquote_Tok, | |
21 | Int_Tok, Float_Tok, | |
51fa7633 | 22 | Str_Tok, Sym_Tok); |
673ee61c | 23 | |
ee927d07 CM |
24 | type Token (ID : Lexemes := Ignored_Tok) is record |
25 | case ID is | |
26 | when Int_Tok => | |
27 | Int_Val : Mal_Integer; | |
28 | when Float_Tok => | |
29 | Float_Val : Mal_Float; | |
51fa7633 | 30 | when Str_Tok | Sym_Tok => |
3428be48 | 31 | Start_Char, Stop_Char : Natural; |
ee927d07 CM |
32 | when others => null; |
33 | end case; | |
34 | end record; | |
35 | ||
673ee61c | 36 | Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set := |
936693cf CM |
37 | Ada.Strings.Maps.To_Set |
38 | (ACL.HT & ACL.LF & ACL.CR & ACL.Space & ACL.Comma); | |
673ee61c | 39 | |
9e76408b CM |
40 | -- [^\s\[\]{}('"`,;)] |
41 | Terminator_Syms : Ada.Strings.Maps.Character_Set := | |
42 | Ada.Strings.Maps."or" | |
43 | (Lisp_Whitespace, | |
44 | Ada.Strings.Maps.To_Set ("[]{}('""`,;)")); | |
673ee61c | 45 | |
7895cb30 CM |
46 | -- The unterminated string error |
47 | String_Error : exception; | |
48 | ||
acb927d4 | 49 | |
a974463a CM |
50 | function Convert_String (S : String) return String is |
51 | use Ada.Strings.Unbounded; | |
52 | Res : Unbounded_String; | |
53 | I : Positive; | |
54 | Str_Last : Natural; | |
55 | begin | |
56 | Str_Last := S'Last; | |
57 | I := S'First; | |
58 | while I <= Str_Last loop | |
59 | if S (I) = '\' then | |
60 | if I+1 > Str_Last then | |
61 | Append (Res, S (I)); | |
62 | I := I + 1; | |
63 | elsif S (I+1) = 'n' then | |
64 | Append (Res, Ada.Characters.Latin_1.LF); | |
65 | I := I + 2; | |
66 | elsif S (I+1) = '"' then | |
67 | Append (Res, S (I+1)); | |
68 | I := I + 2; | |
69 | elsif S (I+1) = '\' then | |
70 | Append (Res, S (I+1)); | |
71 | I := I + 2; | |
72 | else | |
7b2080df NB |
73 | Append (Res, S (I)); |
74 | I := I + 1; | |
a974463a CM |
75 | end if; |
76 | else | |
77 | Append (Res, S (I)); | |
78 | I := I + 1; | |
79 | end if; | |
80 | end loop; | |
81 | return To_String (Res); | |
82 | end Convert_String; | |
673ee61c | 83 | |
3428be48 CM |
84 | Str_Len : Natural := 0; |
85 | Saved_Line : Ada.Strings.Unbounded.Unbounded_String; | |
86 | Char_To_Read : Natural := 1; | |
7895cb30 | 87 | |
ee927d07 CM |
88 | function Get_Token return Token is |
89 | Res : Token; | |
3428be48 CM |
90 | I, J : Natural; |
91 | use Ada.Strings.Unbounded; | |
673ee61c | 92 | begin |
2ef4808f CM |
93 | |
94 | <<Tail_Call_Opt>> | |
ee927d07 CM |
95 | |
96 | -- Skip over whitespace... | |
9e76408b CM |
97 | I := Char_To_Read; |
98 | while I <= Str_Len and then | |
3428be48 | 99 | Ada.Strings.Maps.Is_In (Element (Saved_Line, I), Lisp_Whitespace) loop |
9e76408b CM |
100 | I := I + 1; |
101 | end loop; | |
7895cb30 | 102 | |
9e76408b CM |
103 | -- Filter out lines consisting of only whitespace |
104 | if I > Str_Len then | |
ee927d07 | 105 | return (ID => Ignored_Tok); |
9e76408b | 106 | end if; |
7895cb30 | 107 | |
9e76408b | 108 | J := I; |
ee927d07 | 109 | |
3428be48 | 110 | case Element (Saved_Line, J) is |
ee927d07 CM |
111 | |
112 | when ''' => Res := (ID => Quote_Tok); Char_To_Read := J+1; | |
113 | ||
114 | when '`' => Res := (ID => Quasi_Quote_Tok); Char_To_Read := J+1; | |
115 | ||
116 | when '~' => -- Tilde | |
117 | ||
3428be48 | 118 | if J+1 <= Str_Len and then Element (Saved_Line, J+1) = '@' then |
ee927d07 | 119 | Res := (ID => Splice_Unq_Tok); |
9e76408b CM |
120 | Char_To_Read := J+2; |
121 | else | |
ee927d07 CM |
122 | -- Just a Tilde |
123 | Res := (ID => Unquote_Tok); | |
9e76408b CM |
124 | Char_To_Read := J+1; |
125 | end if; | |
ee927d07 CM |
126 | |
127 | when '(' => Res := (ID => Start_List_Tok); Char_To_Read := J+1; | |
128 | when '[' => Res := (ID => Start_Vector_Tok); Char_To_Read := J+1; | |
129 | when '{' => Res := (ID => Start_Hash_Tok); Char_To_Read := J+1; | |
130 | ||
131 | when '^' => Res := (ID => Meta_Tok); Char_To_Read := J+1; | |
132 | when '@' => Res := (ID => Deref_Tok); Char_To_Read := J+1; | |
133 | ||
134 | when ']' | '}' | ')' => | |
9e76408b | 135 | |
51fa7633 | 136 | Res := (ID => Sym_Tok, Start_Char => J, Stop_Char => J); |
9e76408b CM |
137 | Char_To_Read := J+1; |
138 | ||
139 | when '"' => -- a string | |
140 | ||
7b2080df NB |
141 | loop |
142 | if Str_Len <= J then | |
143 | raise String_Error; | |
144 | end if; | |
9e76408b | 145 | J := J + 1; |
7b2080df NB |
146 | exit when Element (Saved_Line, J) = '"'; |
147 | if Element (Saved_Line, J) = '\' then | |
148 | J := J + 1; | |
149 | end if; | |
9e76408b | 150 | end loop; |
7895cb30 | 151 | |
ee927d07 | 152 | Res := (ID => Str_Tok, Start_Char => I, Stop_Char => J); |
9e76408b CM |
153 | Char_To_Read := J + 1; |
154 | ||
155 | when ';' => -- a comment | |
156 | ||
4356e17f CM |
157 | -- Read to the end of the line or until |
158 | -- the saved_line string is exhausted. | |
159 | -- NB if we reach the end we don't care | |
160 | -- what the last char was. | |
3428be48 | 161 | while J < Str_Len and Element (Saved_Line, J) /= ACL.LF loop |
9e76408b CM |
162 | J := J + 1; |
163 | end loop; | |
4356e17f | 164 | if J = Str_Len then |
ee927d07 | 165 | Res := (ID => Ignored_Tok); |
4356e17f CM |
166 | else |
167 | Char_To_Read := J + 1; | |
2ef4808f CM |
168 | -- was: Res := Get_Token; |
169 | goto Tail_Call_Opt; | |
4356e17f | 170 | end if; |
9e76408b CM |
171 | |
172 | when others => -- an atom | |
173 | ||
174 | while J <= Str_Len and then | |
3428be48 | 175 | not Ada.Strings.Maps.Is_In (Element (Saved_Line, J), Terminator_Syms) loop |
9e76408b CM |
176 | J := J + 1; |
177 | end loop; | |
178 | ||
179 | -- Either we ran out of string or | |
180 | -- the one at J was the start of a new token | |
181 | Char_To_Read := J; | |
182 | J := J - 1; | |
183 | ||
ee927d07 CM |
184 | declare |
185 | Dots : Natural; | |
186 | All_Digits : Boolean; | |
187 | begin | |
188 | -- check if all digits or . | |
189 | Dots := 0; | |
190 | All_Digits := True; | |
191 | for K in I .. J loop | |
c2df4f13 CM |
192 | if (K = I and K /= J) and then Element (Saved_Line, K) = '-' then |
193 | null; | |
194 | elsif Element (Saved_Line, K) = '.' then | |
ee927d07 | 195 | Dots := Dots + 1; |
3428be48 | 196 | elsif not (Element (Saved_Line, K) in '0' .. '9') then |
ee927d07 CM |
197 | All_Digits := False; |
198 | exit; | |
199 | end if; | |
200 | end loop; | |
201 | ||
202 | if All_Digits then | |
203 | if Dots = 0 then | |
204 | Res := | |
205 | (ID => Int_Tok, | |
3428be48 | 206 | Int_Val => Mal_Integer'Value (Slice (Saved_Line, I, J))); |
ee927d07 CM |
207 | elsif Dots = 1 then |
208 | Res := | |
209 | (ID => Float_Tok, | |
3428be48 | 210 | Float_Val => Mal_Float'Value (Slice (Saved_Line, I, J))); |
ee927d07 | 211 | else |
51fa7633 | 212 | Res := (ID => Sym_Tok, Start_Char => I, Stop_Char => J); |
ee927d07 | 213 | end if; |
9e76408b | 214 | else |
51fa7633 | 215 | Res := (ID => Sym_Tok, Start_Char => I, Stop_Char => J); |
9e76408b | 216 | end if; |
ee927d07 CM |
217 | |
218 | end; | |
9e76408b CM |
219 | |
220 | end case; | |
221 | ||
222 | return Res; | |
7895cb30 | 223 | |
673ee61c CM |
224 | end Get_Token; |
225 | ||
226 | ||
e808554d | 227 | function Read_List (LT : Types.List_Types) |
fbad73cb | 228 | return Types.Mal_Handle is |
acb927d4 | 229 | |
0844c154 | 230 | MTA : Mal_Handle; |
acb927d4 | 231 | |
673ee61c | 232 | begin |
acb927d4 | 233 | |
13ce1681 CM |
234 | MTA := Read_Form; |
235 | ||
18e21187 CM |
236 | declare |
237 | List_SP : Mal_Handle; | |
238 | List_P : List_Class_Ptr; | |
239 | Close : String (1..1) := (1 => Types.Closing (LT)); | |
240 | begin | |
13ce1681 | 241 | |
18e21187 CM |
242 | case LT is |
243 | when List_List => List_SP := New_List_Mal_Type (List_Type => LT); | |
244 | when Vector_List => List_SP := Vector.New_Vector_Mal_Type; | |
245 | when Hashed_List => List_SP := Hash_Map.New_Hash_Map_Mal_Type; | |
246 | end case; | |
673ee61c | 247 | |
18e21187 CM |
248 | -- Need to append to a variable so... |
249 | List_P := Deref_List_Class (List_SP); | |
673ee61c | 250 | |
18e21187 | 251 | loop |
02840f75 | 252 | |
18e21187 | 253 | if Is_Null (MTA) then |
2adfa11c | 254 | return New_Error_Mal_Type (Str => "expected '" & Close & "', got EOF"); |
18e21187 | 255 | end if; |
02840f75 | 256 | |
18e21187 CM |
257 | exit when Deref (MTA).Sym_Type = Sym and then |
258 | Symbol_Mal_Type (Deref (MTA).all).Get_Sym = Close; | |
02840f75 | 259 | |
18e21187 CM |
260 | Append (List_P.all, MTA); |
261 | ||
262 | MTA := Read_Form; | |
263 | ||
264 | end loop; | |
265 | ||
266 | return List_SP; | |
267 | ||
268 | end; | |
269 | ||
270 | end Read_List; | |
ee927d07 CM |
271 | |
272 | ||
fbad73cb | 273 | function Read_Form return Types.Mal_Handle is |
ee927d07 | 274 | Tok : Token; |
fbad73cb | 275 | MTS : Mal_Handle; |
3428be48 | 276 | use Ada.Strings.Unbounded; |
673ee61c | 277 | begin |
6a6d21b8 | 278 | |
ee927d07 | 279 | Tok := Get_Token; |
6a6d21b8 | 280 | |
ee927d07 | 281 | case Tok.ID is |
6a6d21b8 | 282 | |
ee927d07 | 283 | when Ignored_Tok => return Smart_Pointers.Null_Smart_Pointer; |
9cbc9695 | 284 | |
ee927d07 | 285 | when Int_Tok => return New_Int_Mal_Type (Tok.Int_Val); |
38d0c57f | 286 | |
ee927d07 | 287 | when Float_Tok => return New_Float_Mal_Type (Tok.Float_Val); |
38d0c57f | 288 | |
ee927d07 | 289 | when Start_List_Tok => return Read_List (List_List); |
38d0c57f | 290 | |
ee927d07 CM |
291 | when Start_Vector_Tok => return Read_List (Vector_List); |
292 | ||
293 | when Start_Hash_Tok => return Read_List (Hashed_List); | |
38d0c57f | 294 | |
ee927d07 | 295 | when Meta_Tok => |
02840f75 | 296 | |
ee927d07 CM |
297 | declare |
298 | Meta, Obj : Mal_Handle; | |
299 | begin | |
300 | Meta := Read_Form; | |
301 | Obj := Read_Form; | |
18e21187 CM |
302 | return Make_New_List |
303 | ((1 => New_Symbol_Mal_Type ("with-meta"), | |
304 | 2 => Obj, | |
305 | 3 => Meta)); | |
ee927d07 | 306 | end; |
38d0c57f | 307 | |
18e21187 CM |
308 | when Deref_Tok => |
309 | ||
310 | return Make_New_List | |
311 | ((1 => New_Symbol_Mal_Type ("deref"), | |
312 | 2 => Read_Form)); | |
313 | ||
314 | when Quote_Tok => | |
315 | ||
316 | return Make_New_List | |
317 | ((1 => New_Symbol_Mal_Type ("quote"), | |
318 | 2 => Read_Form)); | |
319 | ||
320 | when Quasi_Quote_Tok => | |
321 | ||
322 | return Make_New_List | |
323 | ((1 => New_Symbol_Mal_Type ("quasiquote"), | |
324 | 2 => Read_Form)); | |
6a6d21b8 | 325 | |
18e21187 | 326 | when Splice_Unq_Tok => |
7895cb30 | 327 | |
18e21187 CM |
328 | return Make_New_List |
329 | ((1 => New_Symbol_Mal_Type ("splice-unquote"), | |
330 | 2 => Read_Form)); | |
7895cb30 | 331 | |
18e21187 | 332 | when Unquote_Tok => |
ee927d07 | 333 | |
18e21187 CM |
334 | return Make_New_List |
335 | ((1 => New_Symbol_Mal_Type ("unquote"), | |
336 | 2 => Read_Form)); | |
ee927d07 CM |
337 | |
338 | when Str_Tok => | |
339 | ||
564a4525 CM |
340 | -- +/-1 strips out the double quotes. |
341 | -- Convert_String converts backquoted charaters to raw format. | |
ee927d07 | 342 | return New_String_Mal_Type |
564a4525 CM |
343 | (Convert_String |
344 | (Slice (Saved_Line, Tok.Start_Char + 1, Tok.Stop_Char - 1))); | |
ee927d07 | 345 | |
51fa7633 | 346 | when Sym_Tok => |
ee927d07 | 347 | |
91285859 CM |
348 | -- Mal interpreter is required to know about true, false and nil. |
349 | declare | |
350 | S : String := Slice (Saved_Line, Tok.Start_Char, Tok.Stop_Char); | |
351 | begin | |
352 | if S = "true" then | |
353 | return New_Bool_Mal_Type (True); | |
354 | elsif S = "false" then | |
355 | return New_Bool_Mal_Type (False); | |
8083b525 CM |
356 | elsif S = "nil" then |
357 | return New_Nil_Mal_Type; | |
91285859 CM |
358 | else |
359 | return New_Symbol_Mal_Type (S); | |
360 | end if; | |
361 | end; | |
ee927d07 CM |
362 | |
363 | end case; | |
6a6d21b8 | 364 | |
673ee61c CM |
365 | end Read_Form; |
366 | ||
ee927d07 | 367 | |
8c49f5a7 CM |
368 | procedure Lex_Init (S : String) is |
369 | begin | |
9e76408b | 370 | Str_Len := S'Length; |
3428be48 | 371 | Saved_Line := Ada.Strings.Unbounded.To_Unbounded_String (S); |
9e76408b | 372 | Char_To_Read := 1; |
8c49f5a7 | 373 | end Lex_Init; |
673ee61c | 374 | |
ee927d07 | 375 | |
fbad73cb | 376 | function Read_Str (S : String) return Types.Mal_Handle is |
d32be19b | 377 | I, Str_Len : Natural := S'Length; |
673ee61c | 378 | begin |
8c49f5a7 CM |
379 | |
380 | Lex_Init (S); | |
381 | ||
673ee61c | 382 | return Read_Form; |
9e76408b | 383 | |
1d7d7515 CM |
384 | exception |
385 | when String_Error => | |
2adfa11c | 386 | return New_Error_Mal_Type (Str => "expected '""', got EOF"); |
673ee61c CM |
387 | end Read_Str; |
388 | ||
389 | ||
390 | end Reader; |