Commit | Line | Data |
---|---|---|
673ee61c CM |
1 | with Ada.IO_Exceptions; |
2 | with Ada.Characters.Latin_1; | |
7895cb30 | 3 | with Ada.Exceptions; |
673ee61c CM |
4 | with Ada.Strings.Maps.Constants; |
5 | with Ada.Strings.Unbounded; | |
6 | with Ada.Text_IO; | |
acb927d4 | 7 | with Smart_Pointers; |
673ee61c CM |
8 | |
9 | package body Reader is | |
10 | ||
6a6d21b8 CM |
11 | package ACL renames Ada.Characters.Latin_1; |
12 | ||
6b28be38 CM |
13 | type Lexemes is (Whitespace, Comment, |
14 | Int, Float_Tok, Sym, | |
13ce1681 CM |
15 | Nil, True_Tok, False_Tok, |
16 | LE_Tok, GE_Tok, Exp_Tok, Splice_Unq, | |
6b28be38 | 17 | Str, Atom); |
673ee61c | 18 | |
673ee61c | 19 | Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set := |
936693cf CM |
20 | Ada.Strings.Maps.To_Set |
21 | (ACL.HT & ACL.LF & ACL.CR & ACL.Space & ACL.Comma); | |
673ee61c | 22 | |
9e76408b CM |
23 | -- [^\s\[\]{}('"`,;)] |
24 | Terminator_Syms : Ada.Strings.Maps.Character_Set := | |
25 | Ada.Strings.Maps."or" | |
26 | (Lisp_Whitespace, | |
27 | Ada.Strings.Maps.To_Set ("[]{}('""`,;)")); | |
673ee61c | 28 | |
7895cb30 CM |
29 | -- This is raised if an invalid character is encountered |
30 | Lexical_Error : exception; | |
31 | ||
32 | -- The unterminated string error | |
33 | String_Error : exception; | |
34 | ||
acb927d4 | 35 | |
a974463a CM |
36 | function Convert_String (S : String) return String is |
37 | use Ada.Strings.Unbounded; | |
38 | Res : Unbounded_String; | |
39 | I : Positive; | |
40 | Str_Last : Natural; | |
41 | begin | |
42 | Str_Last := S'Last; | |
43 | I := S'First; | |
44 | while I <= Str_Last loop | |
45 | if S (I) = '\' then | |
46 | if I+1 > Str_Last then | |
47 | Append (Res, S (I)); | |
48 | I := I + 1; | |
49 | elsif S (I+1) = 'n' then | |
50 | Append (Res, Ada.Characters.Latin_1.LF); | |
51 | I := I + 2; | |
52 | elsif S (I+1) = '"' then | |
53 | Append (Res, S (I+1)); | |
54 | I := I + 2; | |
55 | elsif S (I+1) = '\' then | |
56 | Append (Res, S (I+1)); | |
57 | I := I + 2; | |
58 | else | |
59 | Append (Res, S (I)); | |
60 | I := I + 1; | |
61 | end if; | |
62 | else | |
63 | Append (Res, S (I)); | |
64 | I := I + 1; | |
65 | end if; | |
66 | end loop; | |
67 | return To_String (Res); | |
68 | end Convert_String; | |
673ee61c | 69 | |
9e76408b CM |
70 | subtype String_Indices is Integer range 0 .. Max_Line_Len; |
71 | ||
72 | Str_Len : String_Indices := 0; | |
7895cb30 | 73 | Saved_Line : String (1..Max_Line_Len); |
9e76408b | 74 | Char_To_Read : String_Indices := 1; |
7895cb30 | 75 | |
fbad73cb | 76 | function Get_Token return Types.Mal_Handle is |
0429a8c1 | 77 | use Types; |
fbad73cb | 78 | Res : Types.Mal_Handle; |
9e76408b CM |
79 | I, J : String_Indices; |
80 | Dots : Natural; | |
81 | All_Digits : Boolean; | |
673ee61c | 82 | begin |
9e76408b CM |
83 | I := Char_To_Read; |
84 | while I <= Str_Len and then | |
85 | Ada.Strings.Maps.Is_In (Saved_Line (I), Lisp_Whitespace) loop | |
86 | I := I + 1; | |
87 | end loop; | |
7895cb30 | 88 | |
9e76408b CM |
89 | -- Filter out lines consisting of only whitespace |
90 | if I > Str_Len then | |
91 | return Smart_Pointers.Null_Smart_Pointer; | |
92 | end if; | |
7895cb30 | 93 | |
9e76408b CM |
94 | J := I; |
95 | case Saved_Line (J) is | |
96 | when '~' => -- Circumflex | |
97 | if J+1 <= Str_Len and then Saved_Line(J+1) = '@' then | |
98 | Res := New_Unitary_Mal_Type | |
99 | (Func => Splice_Unquote, | |
100 | Op => Smart_Pointers.Null_Smart_Pointer); | |
101 | Char_To_Read := J+2; | |
102 | else | |
103 | -- Just a circumflex | |
104 | Res := New_Atom_Mal_Type (Saved_Line (J..J)); | |
105 | Char_To_Read := J+1; | |
106 | end if; | |
107 | when '[' | ']' | | |
108 | '{' | '}' | | |
109 | '(' | ')' | | |
110 | ''' | '`' | | |
111 | '^' | '@' => | |
112 | ||
113 | Res := New_Atom_Mal_Type (Saved_Line (J..J)); | |
114 | Char_To_Read := J+1; | |
115 | ||
116 | when '"' => -- a string | |
117 | ||
118 | -- Skip over " | |
119 | J := J + 1; | |
120 | while J <= Str_Len and then | |
121 | (Saved_Line (J) /= '"' or else | |
122 | Saved_Line (J-1) = '\') loop | |
123 | J := J + 1; | |
124 | end loop; | |
7895cb30 | 125 | |
9e76408b CM |
126 | -- So we either ran out of string.. |
127 | if J > Str_Len then | |
7895cb30 | 128 | raise String_Error; |
9e76408b CM |
129 | end if; |
130 | ||
131 | -- or we reached an unescaped " | |
132 | Res := New_String_Mal_Type | |
133 | (Str => Convert_String (Saved_Line (I .. J))); | |
134 | Char_To_Read := J + 1; | |
135 | ||
136 | when ';' => -- a comment | |
137 | ||
138 | Res := Smart_Pointers.Null_Smart_Pointer; | |
139 | while Saved_Line (J) /= ACL.LF loop | |
140 | J := J + 1; | |
141 | end loop; | |
142 | Char_To_Read := J + 1; | |
143 | Res := Get_Token; | |
144 | ||
145 | when others => -- an atom | |
146 | ||
147 | while J <= Str_Len and then | |
148 | not Ada.Strings.Maps.Is_In (Saved_Line (J), Terminator_Syms) loop | |
149 | J := J + 1; | |
150 | end loop; | |
151 | ||
152 | -- Either we ran out of string or | |
153 | -- the one at J was the start of a new token | |
154 | Char_To_Read := J; | |
155 | J := J - 1; | |
156 | ||
157 | -- check if all digits or . | |
158 | Dots := 0; | |
159 | All_Digits := True; | |
160 | for K in I .. J loop | |
161 | if Saved_Line (K) = '.' then | |
162 | Dots := Dots + 1; | |
163 | elsif not (Saved_Line (K) in '0' .. '9') then | |
164 | All_Digits := False; | |
165 | exit; | |
166 | end if; | |
167 | end loop; | |
168 | ||
169 | if All_Digits then | |
170 | if Dots = 0 then | |
171 | Res := New_Int_Mal_Type | |
172 | (Int => Mal_Integer'Value (Saved_Line (I .. J))); | |
173 | elsif Dots = 1 then | |
174 | Res := New_Float_Mal_Type | |
175 | (Floating => Mal_Float'Value (Saved_Line (I..J))); | |
176 | else | |
177 | Res := New_Atom_Mal_Type (Saved_Line (I..J)); | |
178 | end if; | |
7895cb30 | 179 | else |
9e76408b | 180 | Res := New_Atom_Mal_Type (Saved_Line (I..J)); |
7895cb30 | 181 | end if; |
9e76408b CM |
182 | |
183 | end case; | |
184 | ||
185 | return Res; | |
7895cb30 | 186 | |
673ee61c CM |
187 | end Get_Token; |
188 | ||
189 | ||
190 | -- Parsing | |
fbad73cb | 191 | function Read_Form return Types.Mal_Handle; |
673ee61c | 192 | |
e808554d | 193 | function Read_List (LT : Types.List_Types) |
fbad73cb | 194 | return Types.Mal_Handle is |
acb927d4 | 195 | |
6a6d21b8 | 196 | use Types; |
0844c154 | 197 | MTA : Mal_Handle; |
acb927d4 | 198 | |
673ee61c | 199 | begin |
acb927d4 | 200 | |
13ce1681 CM |
201 | MTA := Read_Form; |
202 | ||
203 | if Deref (MTA).Sym_Type = Atom and then | |
204 | Deref_Atom (MTA).Get_Atom = "fn*" then | |
205 | ||
0844c154 CM |
206 | declare |
207 | Params, Expr, Close_Lambda : Mal_Handle; | |
208 | begin | |
209 | Params := Read_Form; | |
210 | Expr := Read_Form; | |
211 | Close_Lambda := Read_Form; -- the ) at the end of the lambda | |
212 | return New_Lambda_Mal_Type (Params, Expr); | |
213 | exception | |
214 | when Lexical_Error => | |
215 | ||
216 | -- List_MT about to go out of scope but its a Mal_Handle | |
217 | -- so it is automatically garbage collected. | |
218 | ||
219 | return New_Error_Mal_Type (Str => "Lexical error in fn*"); | |
220 | ||
221 | end; | |
13ce1681 CM |
222 | |
223 | else | |
224 | ||
0844c154 CM |
225 | declare |
226 | List_SP : Mal_Handle; | |
227 | List_P : List_Ptr; | |
228 | Close : String (1..1) := (1 => Types.Closing (LT)); | |
229 | begin | |
230 | List_SP := New_List_Mal_Type (List_Type => LT); | |
231 | ||
232 | -- Need to append to a variable so... | |
233 | List_P := Deref_List (List_SP); | |
234 | loop | |
235 | exit when Is_Null (MTA) or else | |
236 | (Deref (MTA).Sym_Type = Atom and then | |
237 | Atom_Mal_Type (Deref (MTA).all).Get_Atom = Close); | |
238 | Append (List_P.all, MTA); | |
239 | MTA := Read_Form; | |
240 | end loop; | |
241 | return List_SP; | |
242 | exception | |
243 | when Lexical_Error => | |
244 | ||
245 | -- List_MT about to go out of scope but its a Mal_Handle | |
246 | -- so it is automatically garbage collected. | |
247 | ||
248 | return New_Error_Mal_Type (Str => "expected '" & Close & "'"); | |
249 | ||
250 | end; | |
13ce1681 CM |
251 | end if; |
252 | ||
7895cb30 CM |
253 | exception |
254 | when Lexical_Error => | |
0429a8c1 | 255 | |
0844c154 | 256 | return New_Error_Mal_Type (Str => "Lexical error in Read_List"); |
acb927d4 | 257 | |
673ee61c CM |
258 | end Read_List; |
259 | ||
260 | ||
fbad73cb | 261 | function Read_Form return Types.Mal_Handle is |
673ee61c | 262 | use Types; |
fbad73cb | 263 | MTS : Mal_Handle; |
673ee61c | 264 | begin |
6a6d21b8 | 265 | |
0429a8c1 | 266 | MTS := Get_Token; |
6a6d21b8 | 267 | |
acb927d4 CM |
268 | if Is_Null (MTS) then |
269 | return Smart_Pointers.Null_Smart_Pointer; | |
6a6d21b8 CM |
270 | end if; |
271 | ||
9cbc9695 CM |
272 | if Deref (MTS).Sym_Type = Atom then |
273 | ||
274 | declare | |
275 | Symbol : String := Atom_Mal_Type (Deref (MTS).all).Get_Atom; | |
276 | begin | |
277 | -- Listy things and quoting... | |
278 | if Symbol = "(" then | |
279 | return Read_List (List_List); | |
280 | elsif Symbol = "[" then | |
281 | return Read_List (Vector_List); | |
282 | elsif Symbol = "{" then | |
283 | return Read_List (Hashed_List); | |
284 | elsif Symbol = "^" then | |
acb927d4 | 285 | declare |
9cbc9695 | 286 | Meta, Obj : Mal_Handle; |
acb927d4 | 287 | begin |
9cbc9695 CM |
288 | Meta := Read_Form; |
289 | Obj := Read_Form; | |
290 | declare | |
291 | MT : Mal_Ptr := Deref (Obj); | |
292 | begin | |
293 | Set_Meta (MT.all, Meta); | |
294 | end; | |
295 | return Obj; | |
acb927d4 | 296 | end; |
9cbc9695 | 297 | elsif Symbol = ACL.Apostrophe & "" then |
38d0c57f CM |
298 | |
299 | declare | |
300 | List_SP : Mal_Handle; | |
301 | List_P : List_Ptr; | |
302 | begin | |
303 | List_SP := New_List_Mal_Type (List_Type => List_List); | |
304 | List_P := Deref_List (List_SP); | |
305 | Append (List_P.all, New_Atom_Mal_Type ("quote")); | |
306 | Append (List_P.all, Read_Form); | |
307 | return List_SP; | |
308 | end; | |
309 | ||
9cbc9695 | 310 | elsif Symbol = ACL.Grave & "" then |
38d0c57f CM |
311 | |
312 | declare | |
313 | List_SP : Mal_Handle; | |
314 | List_P : List_Ptr; | |
315 | begin | |
316 | List_SP := New_List_Mal_Type (List_Type => List_List); | |
317 | List_P := Deref_List (List_SP); | |
318 | Append (List_P.all, New_Atom_Mal_Type ("quasiquote")); | |
319 | Append (List_P.all, Read_Form); | |
320 | return List_SP; | |
321 | end; | |
322 | ||
9cbc9695 | 323 | elsif Symbol = ACL.Tilde & "" then |
38d0c57f CM |
324 | |
325 | declare | |
326 | List_SP : Mal_Handle; | |
327 | List_P : List_Ptr; | |
328 | begin | |
329 | List_SP := New_List_Mal_Type (List_Type => List_List); | |
330 | List_P := Deref_List (List_SP); | |
331 | Append (List_P.all, New_Atom_Mal_Type ("unquote")); | |
332 | Append (List_P.all, Read_Form); | |
333 | return List_SP; | |
334 | end; | |
335 | ||
9cbc9695 CM |
336 | elsif Symbol = ACL.Commercial_At & "" then |
337 | return New_Unitary_Mal_Type (Func => Deref, Op => Read_Form); | |
338 | else | |
339 | return MTS; | |
340 | end if; | |
341 | end; | |
6a6d21b8 | 342 | |
acb927d4 CM |
343 | elsif Deref(MTS).Sym_Type = Unitary and then |
344 | Unitary_Mal_Type (Deref (MTS).all).Get_Func = Splice_Unquote then | |
7895cb30 | 345 | |
7afdd78d CM |
346 | declare |
347 | List_SP : Mal_Handle; | |
348 | List_P : List_Ptr; | |
349 | begin | |
350 | List_SP := New_List_Mal_Type (List_Type => List_List); | |
351 | List_P := Deref_List (List_SP); | |
352 | Append (List_P.all, New_Atom_Mal_Type ("splice-unquote")); | |
353 | Append (List_P.all, Read_Form); | |
354 | return List_SP; | |
355 | end; | |
7895cb30 | 356 | |
673ee61c | 357 | else |
0429a8c1 | 358 | return MTS; |
673ee61c | 359 | end if; |
6a6d21b8 | 360 | |
7895cb30 CM |
361 | exception |
362 | when String_Error => | |
acb927d4 | 363 | return New_Error_Mal_Type (Str => "expected '""'"); |
673ee61c CM |
364 | end Read_Form; |
365 | ||
8c49f5a7 CM |
366 | procedure Lex_Init (S : String) is |
367 | begin | |
9e76408b | 368 | Str_Len := S'Length; |
8c49f5a7 | 369 | Saved_Line (1..S'Length) := S; -- Needed for error recovery |
9e76408b | 370 | Char_To_Read := 1; |
8c49f5a7 | 371 | end Lex_Init; |
673ee61c | 372 | |
fbad73cb | 373 | function Read_Str (S : String) return Types.Mal_Handle is |
d32be19b | 374 | I, Str_Len : Natural := S'Length; |
673ee61c | 375 | begin |
8c49f5a7 CM |
376 | |
377 | Lex_Init (S); | |
378 | ||
673ee61c | 379 | return Read_Form; |
9e76408b | 380 | |
673ee61c CM |
381 | end Read_Str; |
382 | ||
383 | ||
384 | end Reader; |