Ada: step6 mostly works
[jackhill/mal.git] / ada / reader.adb
1 with Ada.IO_Exceptions;
2 with Ada.Characters.Latin_1;
3 with Ada.Exceptions;
4 with Ada.Strings.Maps.Constants;
5 with Ada.Strings.Unbounded;
6 with Ada.Text_IO;
7 with Opentoken.Recognizer.Character_Set;
8 with Opentoken.Recognizer.Identifier;
9 with Opentoken.Recognizer.Integer;
10 with Opentoken.Recognizer.Keyword;
11 with Opentoken.Recognizer.Line_Comment;
12 with Opentoken.Recognizer.Real;
13 with Opentoken.Recognizer.Separator;
14 with Opentoken.Recognizer.Single_Character_Set;
15 with Opentoken.Recognizer.String;
16 with OpenToken.Text_Feeder.String;
17 with Opentoken.Token.Enumerated.Analyzer;
18 with Smart_Pointers;
19
20 package body Reader is
21
22 package ACL renames Ada.Characters.Latin_1;
23
24 type Lexemes is (Whitespace, Comment,
25 Int, Float_Tok, Sym,
26 Nil, True_Tok, False_Tok,
27 LE_Tok, GE_Tok, Exp_Tok, Splice_Unq,
28 Str, Atom);
29
30 package Lisp_Tokens is
31 new Opentoken.Token.Enumerated (Lexemes, Lexemes'Image, 10);
32
33 package Tokenizer is new Lisp_Tokens.Analyzer (Int, Atom);
34
35 LE_Recognizer : constant Tokenizer.Recognizable_Token :=
36 Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("<="));
37
38 GE_Recognizer : constant Tokenizer.Recognizable_Token :=
39 Tokenizer.Get(Opentoken.Recognizer.Separator.Get (">="));
40
41 Exp_Recognizer : constant Tokenizer.Recognizable_Token :=
42 Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("**"));
43
44 Splice_Unq_Recognizer : constant Tokenizer.Recognizable_Token :=
45 Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("~@"));
46
47 Nil_Recognizer : constant Tokenizer.Recognizable_Token :=
48 Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("nil"));
49
50 True_Recognizer : constant Tokenizer.Recognizable_Token :=
51 Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("true"));
52
53 False_Recognizer : constant Tokenizer.Recognizable_Token :=
54 Tokenizer.Get (Opentoken.Recognizer.Keyword.Get ("false"));
55
56 Int_Recognizer : constant Tokenizer.Recognizable_Token :=
57 Tokenizer.Get(Opentoken.Recognizer.Integer.Get);
58
59 Float_Recognizer : constant Tokenizer.Recognizable_Token :=
60 Tokenizer.Get(Opentoken.Recognizer.Real.Get);
61
62 -- Use the C style for escaped strings.
63 String_Recognizer : constant Tokenizer.Recognizable_Token :=
64 Tokenizer.Get
65 (Opentoken.Recognizer.String.Get
66 (Escapeable => True,
67 Double_Delimiter => False));
68
69 -- Atom definition
70 -- Note Start_Chars includes : for keywords.
71 Start_Chars : Ada.Strings.Maps.Character_Set :=
72 Ada.Strings.Maps."or"
73 (Ada.Strings.Maps.Constants.Letter_Set,
74 Ada.Strings.Maps.To_Set (':'));
75
76 Body_Chars : Ada.Strings.Maps.Character_Set :=
77 Ada.Strings.Maps."or"
78 (Ada.Strings.Maps.Constants.Alphanumeric_Set,
79 Ada.Strings.Maps.To_Set ("-!*?"));
80
81 Atom_Recognizer : constant Tokenizer.Recognizable_Token :=
82 Tokenizer.Get
83 (Opentoken.Recognizer.Identifier.Get (Start_Chars, Body_Chars));
84
85 Lisp_Syms : constant Ada.Strings.Maps.Character_Set :=
86 Ada.Strings.Maps.To_Set ("[]{}()'`~^@&+-*/<>=");
87
88 Sym_Recognizer : constant Tokenizer.Recognizable_Token :=
89 Tokenizer.Get (Opentoken.Recognizer.Single_Character_Set.Get (Lisp_Syms));
90
91 Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set :=
92 Ada.Strings.Maps.To_Set (ACL.HT & ACL.LF & ACL.Space & ACL.Comma);
93
94 Whitesp_Recognizer : constant Tokenizer.Recognizable_Token :=
95 Tokenizer.Get (Opentoken.Recognizer.Character_Set.Get (Lisp_Whitespace));
96
97 Comment_Recognizer : constant Tokenizer.Recognizable_Token :=
98 Tokenizer.Get(Opentoken.Recognizer.Line_Comment.Get (";"));
99
100 Syntax : constant Tokenizer.Syntax :=
101 (Int => Int_Recognizer,
102 Float_Tok => Float_Recognizer,
103 Sym => Sym_Recognizer,
104 Nil => Nil_Recognizer,
105 True_Tok => True_Recognizer,
106 False_Tok => False_Recognizer,
107 LE_Tok => LE_Recognizer,
108 GE_Tok => GE_Recognizer,
109 Exp_Tok => Exp_Recognizer,
110 Splice_Unq => Splice_Unq_Recognizer,
111 Str => String_Recognizer,
112 Atom => Atom_Recognizer,
113 Whitespace => Whitesp_Recognizer,
114 Comment => Comment_Recognizer);
115
116 Input_Feeder : aliased OpenToken.Text_Feeder.String.Instance;
117
118 Analyzer : Tokenizer.Instance :=
119 Tokenizer.Initialize (Syntax, Input_Feeder'access);
120
121
122 -- This is raised if an invalid character is encountered
123 Lexical_Error : exception;
124
125 -- The unterminated string error
126 String_Error : exception;
127
128
129 function Get_Token_String return String is
130 begin
131 return Tokenizer.Lexeme (Analyzer);
132 end Get_Token_String;
133
134
135 function Get_Token_Char return Character is
136 S : String := Tokenizer.Lexeme (Analyzer);
137 begin
138 return S (S'First);
139 end Get_Token_Char;
140
141 function Convert_String (S : String) return String is
142 use Ada.Strings.Unbounded;
143 Res : Unbounded_String;
144 I : Positive;
145 Str_Last : Natural;
146 begin
147 Str_Last := S'Last;
148 I := S'First;
149 while I <= Str_Last loop
150 if S (I) = '\' then
151 if I+1 > Str_Last then
152 Append (Res, S (I));
153 I := I + 1;
154 elsif S (I+1) = 'n' then
155 Append (Res, Ada.Characters.Latin_1.LF);
156 I := I + 2;
157 elsif S (I+1) = '"' then
158 Append (Res, S (I+1));
159 I := I + 2;
160 elsif S (I+1) = '\' then
161 Append (Res, S (I+1));
162 I := I + 2;
163 else
164 Append (Res, S (I));
165 I := I + 1;
166 end if;
167 else
168 Append (Res, S (I));
169 I := I + 1;
170 end if;
171 end loop;
172 return To_String (Res);
173 end Convert_String;
174
175 -- Saved_Line is needed to detect the unterminated string error.
176 Saved_Line : String (1..Max_Line_Len);
177
178 function Get_Token return Types.Mal_Handle is
179 use Types;
180 Res : Types.Mal_Handle;
181 begin
182 Tokenizer.Find_Next (Analyzer);
183 case Tokenizer.ID (Analyzer) is
184 when Int =>
185 Res := New_Int_Mal_Type
186 (Int => Mal_Integer'Value (Get_Token_String));
187 when Float_Tok =>
188 Res := New_Float_Mal_Type
189 (Floating => Mal_Float'Value (Get_Token_String));
190 when Sym =>
191 Res := New_Atom_Mal_Type (Str => Get_Token_Char & "");
192 when Nil =>
193 Res := New_Atom_Mal_Type (Str => Get_Token_String);
194 when True_Tok =>
195 Res := New_Atom_Mal_Type (Str => Get_Token_String);
196 when False_Tok =>
197 Res := New_Atom_Mal_Type (Str => Get_Token_String);
198 when LE_Tok =>
199 Res := New_Atom_Mal_Type (Str => Get_Token_String);
200 when GE_Tok =>
201 Res := New_Atom_Mal_Type (Str => Get_Token_String);
202 when Exp_Tok =>
203 Res := New_Atom_Mal_Type (Str => Get_Token_String);
204 when Splice_Unq =>
205 Res := New_Unitary_Mal_Type
206 (Func => Splice_Unquote,
207 Op => Smart_Pointers.Null_Smart_Pointer);
208 when Str =>
209 Res := New_String_Mal_Type
210 (Str => Convert_String (Get_Token_String));
211 when Atom =>
212 Res := New_Atom_Mal_Type (Str => Get_Token_String);
213 end case;
214 return Res;
215
216 exception
217
218 when E : OpenToken.Syntax_Error =>
219
220 -- Extra debug info
221 -- declare
222 -- Err_Pos : Integer := Analyzer.Column + 1;
223 -- begin
224 -- for J in 1..Err_Pos + 5 loop
225 -- Ada.Text_IO.Put (Ada.Text_IO.Standard_Error, ' ');
226 -- end loop;
227 -- Ada.Text_IO.Put_Line (Ada.Text_IO.Standard_Error, "^");
228 -- end;
229 --
230 -- Ada.Text_IO.Put_Line
231 -- (Ada.Text_IO.Standard_Error,
232 -- Ada.Exceptions.Exception_Information (E));
233
234 declare
235 Col : Integer := Analyzer.Column;
236 begin
237 if Saved_Line (Col) ='"' then
238 raise String_Error;
239 else
240 raise Lexical_Error;
241 end if;
242 end;
243
244 end Get_Token;
245
246
247 -- Parsing
248 function Read_Form return Types.Mal_Handle;
249
250 function Read_List (LT : Types.List_Types)
251 return Types.Mal_Handle is
252
253 use Types;
254 List_SP, MTA, Params, Expr, Close_Lambda : Mal_Handle;
255 List_P : List_Ptr;
256 Close : String (1..1) := (1 => Types.Closing (LT));
257
258 begin
259
260 List_SP := New_List_Mal_Type (List_Type => LT);
261
262 -- Need to append to a variable so...
263 List_P := Deref_List (List_SP);
264
265 MTA := Read_Form;
266
267 if Deref (MTA).Sym_Type = Atom and then
268 Deref_Atom (MTA).Get_Atom = "fn*" then
269
270 Params := Read_Form;
271 Expr := Read_Form;
272 Close_Lambda := Read_Form; -- the ) at the end of the lambda
273 return New_Lambda_Mal_Type (Params, Expr);
274
275 else
276
277 loop
278 exit when Is_Null (MTA) or else
279 (Deref (MTA).Sym_Type = Atom and then
280 Atom_Mal_Type (Deref (MTA).all).Get_Atom = Close);
281 Append (List_P.all, MTA);
282 MTA := Read_Form;
283 end loop;
284 return List_SP;
285 end if;
286
287 exception
288 when Lexical_Error =>
289
290 -- List_MT about to go out of scope but its a Mal_Handle
291 -- so it is automatically garbage collected.
292
293 return New_Error_Mal_Type (Str => "expected '" & Close & "'");
294
295 end Read_List;
296
297
298 function Read_Form return Types.Mal_Handle is
299 use Types;
300 MTS : Mal_Handle;
301 begin
302
303 MTS := Get_Token;
304
305 if Is_Null (MTS) then
306 return Smart_Pointers.Null_Smart_Pointer;
307 end if;
308
309 if Deref (MTS).Sym_Type = Atom then
310
311 declare
312 Symbol : String := Atom_Mal_Type (Deref (MTS).all).Get_Atom;
313 begin
314 -- Listy things and quoting...
315 if Symbol = "(" then
316 return Read_List (List_List);
317 elsif Symbol = "[" then
318 return Read_List (Vector_List);
319 elsif Symbol = "{" then
320 return Read_List (Hashed_List);
321 elsif Symbol = "^" then
322 declare
323 Meta, Obj : Mal_Handle;
324 begin
325 Meta := Read_Form;
326 Obj := Read_Form;
327 declare
328 MT : Mal_Ptr := Deref (Obj);
329 begin
330 Set_Meta (MT.all, Meta);
331 end;
332 return Obj;
333 end;
334 elsif Symbol = ACL.Apostrophe & "" then
335 return New_Unitary_Mal_Type (Func => Quote, Op => Read_Form);
336 elsif Symbol = ACL.Grave & "" then
337 return New_Unitary_Mal_Type (Func => Quasiquote, Op => Read_Form);
338 elsif Symbol = ACL.Tilde & "" then
339 return New_Unitary_Mal_Type (Func => Unquote, Op => Read_Form);
340 elsif Symbol = ACL.Commercial_At & "" then
341 return New_Unitary_Mal_Type (Func => Deref, Op => Read_Form);
342 else
343 return MTS;
344 end if;
345 end;
346
347 elsif Deref(MTS).Sym_Type = Unitary and then
348 Unitary_Mal_Type (Deref (MTS).all).Get_Func = Splice_Unquote then
349
350 return New_Unitary_Mal_Type (Func => Splice_Unquote, Op => Read_Form);
351
352 else
353 return MTS;
354 end if;
355
356 exception
357 when String_Error =>
358 return New_Error_Mal_Type (Str => "expected '""'");
359 end Read_Form;
360
361 procedure Lex_Init (S : String) is
362 begin
363 Analyzer.Reset;
364 Input_Feeder.Set (S);
365 Saved_Line (1..S'Length) := S; -- Needed for error recovery
366 end Lex_Init;
367
368 function Read_Str (S : String) return Types.Mal_Handle is
369 I, Str_Len : Natural := S'Length;
370 begin
371 -- Filter out lines consisting of only whitespace and/or comments
372 I := 1;
373 while I <= Str_Len and then
374 Ada.Strings.Maps.Is_In (S (I), Lisp_Whitespace) loop
375 I := I + 1;
376 end loop;
377 if I > Str_Len or else S (I) = ';' then
378 return Smart_Pointers.Null_Smart_Pointer;
379 end if;
380
381 Lex_Init (S);
382
383 return Read_Form;
384 end Read_Str;
385
386
387 end Reader;