Commit | Line | Data |
---|---|---|
673ee61c CM |
1 | with Ada.IO_Exceptions; |
2 | with Ada.Characters.Latin_1; | |
7895cb30 | 3 | with Ada.Exceptions; |
673ee61c CM |
4 | with Ada.Strings.Maps.Constants; |
5 | with Ada.Strings.Unbounded; | |
6 | with Ada.Text_IO; | |
7 | with Opentoken.Recognizer.Character_Set; | |
8 | with Opentoken.Recognizer.Identifier; | |
9 | with Opentoken.Recognizer.Integer; | |
10 | with Opentoken.Recognizer.Keyword; | |
11 | with Opentoken.Recognizer.Line_Comment; | |
c0f15c87 | 12 | with Opentoken.Recognizer.Real; |
673ee61c CM |
13 | with Opentoken.Recognizer.Separator; |
14 | with Opentoken.Recognizer.Single_Character_Set; | |
15 | with Opentoken.Recognizer.String; | |
16 | with OpenToken.Text_Feeder.String; | |
17 | with Opentoken.Token.Enumerated.Analyzer; | |
acb927d4 | 18 | with Smart_Pointers; |
673ee61c CM |
19 | |
20 | package body Reader is | |
21 | ||
6a6d21b8 CM |
22 | package ACL renames Ada.Characters.Latin_1; |
23 | ||
6b28be38 CM |
24 | type Lexemes is (Whitespace, Comment, |
25 | Int, Float_Tok, Sym, | |
13ce1681 CM |
26 | Nil, True_Tok, False_Tok, |
27 | LE_Tok, GE_Tok, Exp_Tok, Splice_Unq, | |
6b28be38 | 28 | Str, Atom); |
673ee61c | 29 | |
6b28be38 CM |
30 | package Lisp_Tokens is |
31 | new Opentoken.Token.Enumerated (Lexemes, Lexemes'Image, 10); | |
32 | ||
33 | package Tokenizer is new Lisp_Tokens.Analyzer (Int, Atom); | |
673ee61c | 34 | |
13ce1681 CM |
35 | LE_Recognizer : constant Tokenizer.Recognizable_Token := |
36 | Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("<=")); | |
37 | ||
38 | GE_Recognizer : constant Tokenizer.Recognizable_Token := | |
39 | Tokenizer.Get(Opentoken.Recognizer.Separator.Get (">=")); | |
40 | ||
673ee61c CM |
41 | Exp_Recognizer : constant Tokenizer.Recognizable_Token := |
42 | Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("**")); | |
43 | ||
6a6d21b8 CM |
44 | Splice_Unq_Recognizer : constant Tokenizer.Recognizable_Token := |
45 | Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("~@")); | |
46 | ||
673ee61c CM |
47 | Nil_Recognizer : constant Tokenizer.Recognizable_Token := |
48 | Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("nil")); | |
49 | ||
50 | True_Recognizer : constant Tokenizer.Recognizable_Token := | |
51 | Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("true")); | |
52 | ||
53 | False_Recognizer : constant Tokenizer.Recognizable_Token := | |
11714407 | 54 | Tokenizer.Get (Opentoken.Recognizer.Keyword.Get ("false")); |
673ee61c | 55 | |
673ee61c CM |
56 | Int_Recognizer : constant Tokenizer.Recognizable_Token := |
57 | Tokenizer.Get(Opentoken.Recognizer.Integer.Get); | |
58 | ||
c0f15c87 CM |
59 | Float_Recognizer : constant Tokenizer.Recognizable_Token := |
60 | Tokenizer.Get(Opentoken.Recognizer.Real.Get); | |
61 | ||
11714407 CM |
62 | -- Use the C style for escaped strings. |
63 | String_Recognizer : constant Tokenizer.Recognizable_Token := | |
64 | Tokenizer.Get | |
65 | (Opentoken.Recognizer.String.Get | |
66 | (Escapeable => True, | |
67 | Double_Delimiter => False)); | |
673ee61c CM |
68 | |
69 | -- Atom definition | |
c140811a | 70 | -- Note Start_Chars includes : for keywords. |
673ee61c | 71 | Start_Chars : Ada.Strings.Maps.Character_Set := |
c140811a CM |
72 | Ada.Strings.Maps."or" |
73 | (Ada.Strings.Maps.Constants.Letter_Set, | |
74 | Ada.Strings.Maps.To_Set (':')); | |
673ee61c CM |
75 | |
76 | Body_Chars : Ada.Strings.Maps.Character_Set := | |
77 | Ada.Strings.Maps."or" | |
78 | (Ada.Strings.Maps.Constants.Alphanumeric_Set, | |
13ce1681 | 79 | Ada.Strings.Maps.To_Set ("-!*?")); |
673ee61c CM |
80 | |
81 | Atom_Recognizer : constant Tokenizer.Recognizable_Token := | |
82 | Tokenizer.Get | |
83 | (Opentoken.Recognizer.Identifier.Get (Start_Chars, Body_Chars)); | |
84 | ||
85 | Lisp_Syms : constant Ada.Strings.Maps.Character_Set := | |
a90ea3c7 | 86 | Ada.Strings.Maps.To_Set ("[]{}()'`~^@&+-*/<>="); |
673ee61c CM |
87 | |
88 | Sym_Recognizer : constant Tokenizer.Recognizable_Token := | |
89 | Tokenizer.Get (Opentoken.Recognizer.Single_Character_Set.Get (Lisp_Syms)); | |
90 | ||
91 | Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set := | |
6a6d21b8 | 92 | Ada.Strings.Maps.To_Set (ACL.HT & ACL.Space & ACL.Comma); |
673ee61c CM |
93 | |
94 | Whitesp_Recognizer : constant Tokenizer.Recognizable_Token := | |
95 | Tokenizer.Get (Opentoken.Recognizer.Character_Set.Get (Lisp_Whitespace)); | |
96 | ||
97 | Comment_Recognizer : constant Tokenizer.Recognizable_Token := | |
98 | Tokenizer.Get(Opentoken.Recognizer.Line_Comment.Get (";")); | |
99 | ||
100 | Syntax : constant Tokenizer.Syntax := | |
6a6d21b8 | 101 | (Int => Int_Recognizer, |
c0f15c87 | 102 | Float_Tok => Float_Recognizer, |
6a6d21b8 CM |
103 | Sym => Sym_Recognizer, |
104 | Nil => Nil_Recognizer, | |
105 | True_Tok => True_Recognizer, | |
106 | False_Tok => False_Recognizer, | |
13ce1681 CM |
107 | LE_Tok => LE_Recognizer, |
108 | GE_Tok => GE_Recognizer, | |
6a6d21b8 CM |
109 | Exp_Tok => Exp_Recognizer, |
110 | Splice_Unq => Splice_Unq_Recognizer, | |
111 | Str => String_Recognizer, | |
112 | Atom => Atom_Recognizer, | |
113 | Whitespace => Whitesp_Recognizer, | |
114 | Comment => Comment_Recognizer); | |
673ee61c CM |
115 | |
116 | Input_Feeder : aliased OpenToken.Text_Feeder.String.Instance; | |
117 | ||
118 | Analyzer : Tokenizer.Instance := | |
119 | Tokenizer.Initialize (Syntax, Input_Feeder'access); | |
120 | ||
121 | ||
7895cb30 CM |
122 | -- This is raised if an invalid character is encountered |
123 | Lexical_Error : exception; | |
124 | ||
125 | -- The unterminated string error | |
126 | String_Error : exception; | |
127 | ||
acb927d4 | 128 | |
673ee61c CM |
129 | function Get_Token_String return String is |
130 | begin | |
131 | return Tokenizer.Lexeme (Analyzer); | |
132 | end Get_Token_String; | |
133 | ||
134 | ||
135 | function Get_Token_Char return Character is | |
136 | S : String := Tokenizer.Lexeme (Analyzer); | |
137 | begin | |
acb927d4 | 138 | return S (S'First); |
673ee61c CM |
139 | end Get_Token_Char; |
140 | ||
a974463a CM |
141 | function Convert_String (S : String) return String is |
142 | use Ada.Strings.Unbounded; | |
143 | Res : Unbounded_String; | |
144 | I : Positive; | |
145 | Str_Last : Natural; | |
146 | begin | |
147 | Str_Last := S'Last; | |
148 | I := S'First; | |
149 | while I <= Str_Last loop | |
150 | if S (I) = '\' then | |
151 | if I+1 > Str_Last then | |
152 | Append (Res, S (I)); | |
153 | I := I + 1; | |
154 | elsif S (I+1) = 'n' then | |
155 | Append (Res, Ada.Characters.Latin_1.LF); | |
156 | I := I + 2; | |
157 | elsif S (I+1) = '"' then | |
158 | Append (Res, S (I+1)); | |
159 | I := I + 2; | |
160 | elsif S (I+1) = '\' then | |
161 | Append (Res, S (I+1)); | |
162 | I := I + 2; | |
163 | else | |
164 | Append (Res, S (I)); | |
165 | I := I + 1; | |
166 | end if; | |
167 | else | |
168 | Append (Res, S (I)); | |
169 | I := I + 1; | |
170 | end if; | |
171 | end loop; | |
172 | return To_String (Res); | |
173 | end Convert_String; | |
673ee61c | 174 | |
acb927d4 | 175 | -- Saved_Line is needed to detect the unterminated string error. |
7895cb30 CM |
176 | Saved_Line : String (1..Max_Line_Len); |
177 | ||
fbad73cb | 178 | function Get_Token return Types.Mal_Handle is |
0429a8c1 | 179 | use Types; |
fbad73cb | 180 | Res : Types.Mal_Handle; |
673ee61c CM |
181 | begin |
182 | Tokenizer.Find_Next (Analyzer); | |
183 | case Tokenizer.ID (Analyzer) is | |
184 | when Int => | |
acb927d4 CM |
185 | Res := New_Int_Mal_Type |
186 | (Int => Mal_Integer'Value (Get_Token_String)); | |
c0f15c87 | 187 | when Float_Tok => |
acb927d4 CM |
188 | Res := New_Float_Mal_Type |
189 | (Floating => Mal_Float'Value (Get_Token_String)); | |
673ee61c | 190 | when Sym => |
9cbc9695 | 191 | Res := New_Atom_Mal_Type (Str => Get_Token_Char & ""); |
673ee61c | 192 | when Nil => |
acb927d4 | 193 | Res := New_Atom_Mal_Type (Str => Get_Token_String); |
673ee61c | 194 | when True_Tok => |
acb927d4 | 195 | Res := New_Atom_Mal_Type (Str => Get_Token_String); |
673ee61c | 196 | when False_Tok => |
acb927d4 | 197 | Res := New_Atom_Mal_Type (Str => Get_Token_String); |
13ce1681 CM |
198 | when LE_Tok => |
199 | Res := New_Atom_Mal_Type (Str => Get_Token_String); | |
200 | when GE_Tok => | |
201 | Res := New_Atom_Mal_Type (Str => Get_Token_String); | |
673ee61c | 202 | when Exp_Tok => |
acb927d4 | 203 | Res := New_Atom_Mal_Type (Str => Get_Token_String); |
6a6d21b8 | 204 | when Splice_Unq => |
acb927d4 CM |
205 | Res := New_Unitary_Mal_Type |
206 | (Func => Splice_Unquote, | |
207 | Op => Smart_Pointers.Null_Smart_Pointer); | |
673ee61c | 208 | when Str => |
a974463a CM |
209 | Res := New_String_Mal_Type |
210 | (Str => Convert_String (Get_Token_String)); | |
673ee61c | 211 | when Atom => |
acb927d4 | 212 | Res := New_Atom_Mal_Type (Str => Get_Token_String); |
673ee61c CM |
213 | end case; |
214 | return Res; | |
7895cb30 CM |
215 | |
216 | exception | |
217 | ||
218 | when E : OpenToken.Syntax_Error => | |
219 | ||
220 | -- Extra debug info | |
221 | -- declare | |
222 | -- Err_Pos : Integer := Analyzer.Column + 1; | |
223 | -- begin | |
224 | -- for J in 1..Err_Pos + 5 loop | |
225 | -- Ada.Text_IO.Put (Ada.Text_IO.Standard_Error, ' '); | |
226 | -- end loop; | |
227 | -- Ada.Text_IO.Put_Line (Ada.Text_IO.Standard_Error, "^"); | |
228 | -- end; | |
13ce1681 | 229 | -- |
7895cb30 CM |
230 | -- Ada.Text_IO.Put_Line |
231 | -- (Ada.Text_IO.Standard_Error, | |
232 | -- Ada.Exceptions.Exception_Information (E)); | |
233 | ||
234 | declare | |
235 | Col : Integer := Analyzer.Column; | |
236 | begin | |
237 | if Saved_Line (Col) ='"' then | |
238 | raise String_Error; | |
239 | else | |
240 | raise Lexical_Error; | |
241 | end if; | |
242 | end; | |
243 | ||
673ee61c CM |
244 | end Get_Token; |
245 | ||
246 | ||
247 | -- Parsing | |
fbad73cb | 248 | function Read_Form return Types.Mal_Handle; |
673ee61c | 249 | |
e808554d | 250 | function Read_List (LT : Types.List_Types) |
fbad73cb | 251 | return Types.Mal_Handle is |
acb927d4 | 252 | |
6a6d21b8 | 253 | use Types; |
13ce1681 | 254 | List_SP, MTA, Params, Expr, Close_Lambda : Mal_Handle; |
acb927d4 | 255 | List_P : List_Ptr; |
9cbc9695 | 256 | Close : String (1..1) := (1 => Types.Closing (LT)); |
acb927d4 | 257 | |
673ee61c | 258 | begin |
acb927d4 CM |
259 | |
260 | List_SP := New_List_Mal_Type (List_Type => LT); | |
261 | ||
262 | -- Need to append to a variable so... | |
263 | List_P := Deref_List (List_SP); | |
264 | ||
13ce1681 CM |
265 | MTA := Read_Form; |
266 | ||
267 | if Deref (MTA).Sym_Type = Atom and then | |
268 | Deref_Atom (MTA).Get_Atom = "fn*" then | |
269 | ||
270 | Params := Read_Form; | |
271 | Expr := Read_Form; | |
272 | Close_Lambda := Read_Form; -- the ) at the end of the lambda | |
273 | return New_Lambda_Mal_Type (Params, Expr); | |
274 | ||
275 | else | |
276 | ||
277 | loop | |
278 | exit when Is_Null (MTA) or else | |
279 | (Deref (MTA).Sym_Type = Atom and then | |
280 | Atom_Mal_Type (Deref (MTA).all).Get_Atom = Close); | |
281 | Append (List_P.all, MTA); | |
282 | MTA := Read_Form; | |
283 | end loop; | |
284 | return List_SP; | |
285 | end if; | |
286 | ||
7895cb30 CM |
287 | exception |
288 | when Lexical_Error => | |
0429a8c1 | 289 | |
fbad73cb | 290 | -- List_MT about to go out of scope but its a Mal_Handle |
0429a8c1 CM |
291 | -- so it is automatically garbage collected. |
292 | ||
acb927d4 CM |
293 | return New_Error_Mal_Type (Str => "expected '" & Close & "'"); |
294 | ||
673ee61c CM |
295 | end Read_List; |
296 | ||
297 | ||
fbad73cb | 298 | function Read_Form return Types.Mal_Handle is |
673ee61c | 299 | use Types; |
fbad73cb | 300 | MTS : Mal_Handle; |
673ee61c | 301 | begin |
6a6d21b8 | 302 | |
0429a8c1 | 303 | MTS := Get_Token; |
6a6d21b8 | 304 | |
acb927d4 CM |
305 | if Is_Null (MTS) then |
306 | return Smart_Pointers.Null_Smart_Pointer; | |
6a6d21b8 CM |
307 | end if; |
308 | ||
9cbc9695 CM |
309 | if Deref (MTS).Sym_Type = Atom then |
310 | ||
311 | declare | |
312 | Symbol : String := Atom_Mal_Type (Deref (MTS).all).Get_Atom; | |
313 | begin | |
314 | -- Listy things and quoting... | |
315 | if Symbol = "(" then | |
316 | return Read_List (List_List); | |
317 | elsif Symbol = "[" then | |
318 | return Read_List (Vector_List); | |
319 | elsif Symbol = "{" then | |
320 | return Read_List (Hashed_List); | |
321 | elsif Symbol = "^" then | |
acb927d4 | 322 | declare |
9cbc9695 | 323 | Meta, Obj : Mal_Handle; |
acb927d4 | 324 | begin |
9cbc9695 CM |
325 | Meta := Read_Form; |
326 | Obj := Read_Form; | |
327 | declare | |
328 | MT : Mal_Ptr := Deref (Obj); | |
329 | begin | |
330 | Set_Meta (MT.all, Meta); | |
331 | end; | |
332 | return Obj; | |
acb927d4 | 333 | end; |
9cbc9695 CM |
334 | elsif Symbol = ACL.Apostrophe & "" then |
335 | return New_Unitary_Mal_Type (Func => Quote, Op => Read_Form); | |
336 | elsif Symbol = ACL.Grave & "" then | |
337 | return New_Unitary_Mal_Type (Func => Quasiquote, Op => Read_Form); | |
338 | elsif Symbol = ACL.Tilde & "" then | |
339 | return New_Unitary_Mal_Type (Func => Unquote, Op => Read_Form); | |
340 | elsif Symbol = ACL.Commercial_At & "" then | |
341 | return New_Unitary_Mal_Type (Func => Deref, Op => Read_Form); | |
342 | else | |
343 | return MTS; | |
344 | end if; | |
345 | end; | |
6a6d21b8 | 346 | |
acb927d4 CM |
347 | elsif Deref(MTS).Sym_Type = Unitary and then |
348 | Unitary_Mal_Type (Deref (MTS).all).Get_Func = Splice_Unquote then | |
7895cb30 | 349 | |
acb927d4 | 350 | return New_Unitary_Mal_Type (Func => Splice_Unquote, Op => Read_Form); |
7895cb30 | 351 | |
673ee61c | 352 | else |
0429a8c1 | 353 | return MTS; |
673ee61c | 354 | end if; |
6a6d21b8 | 355 | |
7895cb30 CM |
356 | exception |
357 | when String_Error => | |
acb927d4 | 358 | return New_Error_Mal_Type (Str => "expected '""'"); |
673ee61c CM |
359 | end Read_Form; |
360 | ||
361 | ||
fbad73cb | 362 | function Read_Str (S : String) return Types.Mal_Handle is |
d32be19b | 363 | I, Str_Len : Natural := S'Length; |
673ee61c | 364 | begin |
d32be19b CM |
365 | -- Filter out lines consisting of only whitespace and/or comments |
366 | I := 1; | |
367 | while I <= Str_Len and then | |
368 | Ada.Strings.Maps.Is_In (S (I), Lisp_Whitespace) loop | |
369 | I := I + 1; | |
370 | end loop; | |
371 | if I > Str_Len or else S (I) = ';' then | |
acb927d4 | 372 | return Smart_Pointers.Null_Smart_Pointer; |
d32be19b CM |
373 | end if; |
374 | ||
673ee61c CM |
375 | Analyzer.Reset; |
376 | Input_Feeder.Set (S); | |
7895cb30 | 377 | Saved_Line (1..S'Length) := S; -- Needed for error recovery |
673ee61c | 378 | return Read_Form; |
673ee61c CM |
379 | end Read_Str; |
380 | ||
381 | ||
382 | end Reader; |