Ada: add step5.
[jackhill/mal.git] / ada / reader.adb
CommitLineData
673ee61c
CM
1with Ada.IO_Exceptions;
2with Ada.Characters.Latin_1;
7895cb30 3with Ada.Exceptions;
673ee61c
CM
4with Ada.Strings.Maps.Constants;
5with Ada.Strings.Unbounded;
6with Ada.Text_IO;
7with Opentoken.Recognizer.Character_Set;
8with Opentoken.Recognizer.Identifier;
9with Opentoken.Recognizer.Integer;
10with Opentoken.Recognizer.Keyword;
11with Opentoken.Recognizer.Line_Comment;
c0f15c87 12with Opentoken.Recognizer.Real;
673ee61c
CM
13with Opentoken.Recognizer.Separator;
14with Opentoken.Recognizer.Single_Character_Set;
15with Opentoken.Recognizer.String;
16with OpenToken.Text_Feeder.String;
17with Opentoken.Token.Enumerated.Analyzer;
acb927d4 18with Smart_Pointers;
673ee61c
CM
19
20package body Reader is
21
6a6d21b8
CM
22 package ACL renames Ada.Characters.Latin_1;
23
6b28be38
CM
24 type Lexemes is (Whitespace, Comment,
25 Int, Float_Tok, Sym,
13ce1681
CM
26 Nil, True_Tok, False_Tok,
27 LE_Tok, GE_Tok, Exp_Tok, Splice_Unq,
6b28be38 28 Str, Atom);
673ee61c 29
6b28be38
CM
30 package Lisp_Tokens is
31 new Opentoken.Token.Enumerated (Lexemes, Lexemes'Image, 10);
32
33 package Tokenizer is new Lisp_Tokens.Analyzer (Int, Atom);
673ee61c 34
13ce1681
CM
35 LE_Recognizer : constant Tokenizer.Recognizable_Token :=
36 Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("<="));
37
38 GE_Recognizer : constant Tokenizer.Recognizable_Token :=
39 Tokenizer.Get(Opentoken.Recognizer.Separator.Get (">="));
40
673ee61c
CM
41 Exp_Recognizer : constant Tokenizer.Recognizable_Token :=
42 Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("**"));
43
6a6d21b8
CM
44 Splice_Unq_Recognizer : constant Tokenizer.Recognizable_Token :=
45 Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("~@"));
46
673ee61c
CM
47 Nil_Recognizer : constant Tokenizer.Recognizable_Token :=
48 Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("nil"));
49
50 True_Recognizer : constant Tokenizer.Recognizable_Token :=
51 Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("true"));
52
53 False_Recognizer : constant Tokenizer.Recognizable_Token :=
11714407 54 Tokenizer.Get (Opentoken.Recognizer.Keyword.Get ("false"));
673ee61c 55
673ee61c
CM
56 Int_Recognizer : constant Tokenizer.Recognizable_Token :=
57 Tokenizer.Get(Opentoken.Recognizer.Integer.Get);
58
c0f15c87
CM
59 Float_Recognizer : constant Tokenizer.Recognizable_Token :=
60 Tokenizer.Get(Opentoken.Recognizer.Real.Get);
61
11714407
CM
62 -- Use the C style for escaped strings.
63 String_Recognizer : constant Tokenizer.Recognizable_Token :=
64 Tokenizer.Get
65 (Opentoken.Recognizer.String.Get
66 (Escapeable => True,
67 Double_Delimiter => False));
673ee61c
CM
68
69 -- Atom definition
c140811a 70 -- Note Start_Chars includes : for keywords.
673ee61c 71 Start_Chars : Ada.Strings.Maps.Character_Set :=
c140811a
CM
72 Ada.Strings.Maps."or"
73 (Ada.Strings.Maps.Constants.Letter_Set,
74 Ada.Strings.Maps.To_Set (':'));
673ee61c
CM
75
76 Body_Chars : Ada.Strings.Maps.Character_Set :=
77 Ada.Strings.Maps."or"
78 (Ada.Strings.Maps.Constants.Alphanumeric_Set,
13ce1681 79 Ada.Strings.Maps.To_Set ("-!*?"));
673ee61c
CM
80
81 Atom_Recognizer : constant Tokenizer.Recognizable_Token :=
82 Tokenizer.Get
83 (Opentoken.Recognizer.Identifier.Get (Start_Chars, Body_Chars));
84
85 Lisp_Syms : constant Ada.Strings.Maps.Character_Set :=
a90ea3c7 86 Ada.Strings.Maps.To_Set ("[]{}()'`~^@&+-*/<>=");
673ee61c
CM
87
88 Sym_Recognizer : constant Tokenizer.Recognizable_Token :=
89 Tokenizer.Get (Opentoken.Recognizer.Single_Character_Set.Get (Lisp_Syms));
90
91 Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set :=
6a6d21b8 92 Ada.Strings.Maps.To_Set (ACL.HT & ACL.Space & ACL.Comma);
673ee61c
CM
93
94 Whitesp_Recognizer : constant Tokenizer.Recognizable_Token :=
95 Tokenizer.Get (Opentoken.Recognizer.Character_Set.Get (Lisp_Whitespace));
96
97 Comment_Recognizer : constant Tokenizer.Recognizable_Token :=
98 Tokenizer.Get(Opentoken.Recognizer.Line_Comment.Get (";"));
99
100 Syntax : constant Tokenizer.Syntax :=
6a6d21b8 101 (Int => Int_Recognizer,
c0f15c87 102 Float_Tok => Float_Recognizer,
6a6d21b8
CM
103 Sym => Sym_Recognizer,
104 Nil => Nil_Recognizer,
105 True_Tok => True_Recognizer,
106 False_Tok => False_Recognizer,
13ce1681
CM
107 LE_Tok => LE_Recognizer,
108 GE_Tok => GE_Recognizer,
6a6d21b8
CM
109 Exp_Tok => Exp_Recognizer,
110 Splice_Unq => Splice_Unq_Recognizer,
111 Str => String_Recognizer,
112 Atom => Atom_Recognizer,
113 Whitespace => Whitesp_Recognizer,
114 Comment => Comment_Recognizer);
673ee61c
CM
115
116 Input_Feeder : aliased OpenToken.Text_Feeder.String.Instance;
117
118 Analyzer : Tokenizer.Instance :=
119 Tokenizer.Initialize (Syntax, Input_Feeder'access);
120
121
7895cb30
CM
122 -- This is raised if an invalid character is encountered
123 Lexical_Error : exception;
124
125 -- The unterminated string error
126 String_Error : exception;
127
acb927d4 128
673ee61c
CM
129 function Get_Token_String return String is
130 begin
131 return Tokenizer.Lexeme (Analyzer);
132 end Get_Token_String;
133
134
135 function Get_Token_Char return Character is
136 S : String := Tokenizer.Lexeme (Analyzer);
137 begin
acb927d4 138 return S (S'First);
673ee61c
CM
139 end Get_Token_Char;
140
a974463a
CM
141 function Convert_String (S : String) return String is
142 use Ada.Strings.Unbounded;
143 Res : Unbounded_String;
144 I : Positive;
145 Str_Last : Natural;
146 begin
147 Str_Last := S'Last;
148 I := S'First;
149 while I <= Str_Last loop
150 if S (I) = '\' then
151 if I+1 > Str_Last then
152 Append (Res, S (I));
153 I := I + 1;
154 elsif S (I+1) = 'n' then
155 Append (Res, Ada.Characters.Latin_1.LF);
156 I := I + 2;
157 elsif S (I+1) = '"' then
158 Append (Res, S (I+1));
159 I := I + 2;
160 elsif S (I+1) = '\' then
161 Append (Res, S (I+1));
162 I := I + 2;
163 else
164 Append (Res, S (I));
165 I := I + 1;
166 end if;
167 else
168 Append (Res, S (I));
169 I := I + 1;
170 end if;
171 end loop;
172 return To_String (Res);
173 end Convert_String;
673ee61c 174
acb927d4 175 -- Saved_Line is needed to detect the unterminated string error.
7895cb30
CM
176 Saved_Line : String (1..Max_Line_Len);
177
fbad73cb 178 function Get_Token return Types.Mal_Handle is
0429a8c1 179 use Types;
fbad73cb 180 Res : Types.Mal_Handle;
673ee61c
CM
181 begin
182 Tokenizer.Find_Next (Analyzer);
183 case Tokenizer.ID (Analyzer) is
184 when Int =>
acb927d4
CM
185 Res := New_Int_Mal_Type
186 (Int => Mal_Integer'Value (Get_Token_String));
c0f15c87 187 when Float_Tok =>
acb927d4
CM
188 Res := New_Float_Mal_Type
189 (Floating => Mal_Float'Value (Get_Token_String));
673ee61c 190 when Sym =>
9cbc9695 191 Res := New_Atom_Mal_Type (Str => Get_Token_Char & "");
673ee61c 192 when Nil =>
acb927d4 193 Res := New_Atom_Mal_Type (Str => Get_Token_String);
673ee61c 194 when True_Tok =>
acb927d4 195 Res := New_Atom_Mal_Type (Str => Get_Token_String);
673ee61c 196 when False_Tok =>
acb927d4 197 Res := New_Atom_Mal_Type (Str => Get_Token_String);
13ce1681
CM
198 when LE_Tok =>
199 Res := New_Atom_Mal_Type (Str => Get_Token_String);
200 when GE_Tok =>
201 Res := New_Atom_Mal_Type (Str => Get_Token_String);
673ee61c 202 when Exp_Tok =>
acb927d4 203 Res := New_Atom_Mal_Type (Str => Get_Token_String);
6a6d21b8 204 when Splice_Unq =>
acb927d4
CM
205 Res := New_Unitary_Mal_Type
206 (Func => Splice_Unquote,
207 Op => Smart_Pointers.Null_Smart_Pointer);
673ee61c 208 when Str =>
a974463a
CM
209 Res := New_String_Mal_Type
210 (Str => Convert_String (Get_Token_String));
673ee61c 211 when Atom =>
acb927d4 212 Res := New_Atom_Mal_Type (Str => Get_Token_String);
673ee61c
CM
213 end case;
214 return Res;
7895cb30
CM
215
216 exception
217
218 when E : OpenToken.Syntax_Error =>
219
220-- Extra debug info
221-- declare
222-- Err_Pos : Integer := Analyzer.Column + 1;
223-- begin
224-- for J in 1..Err_Pos + 5 loop
225-- Ada.Text_IO.Put (Ada.Text_IO.Standard_Error, ' ');
226-- end loop;
227-- Ada.Text_IO.Put_Line (Ada.Text_IO.Standard_Error, "^");
228-- end;
13ce1681 229--
7895cb30
CM
230-- Ada.Text_IO.Put_Line
231-- (Ada.Text_IO.Standard_Error,
232-- Ada.Exceptions.Exception_Information (E));
233
234 declare
235 Col : Integer := Analyzer.Column;
236 begin
237 if Saved_Line (Col) ='"' then
238 raise String_Error;
239 else
240 raise Lexical_Error;
241 end if;
242 end;
243
673ee61c
CM
244 end Get_Token;
245
246
247 -- Parsing
fbad73cb 248 function Read_Form return Types.Mal_Handle;
673ee61c 249
e808554d 250 function Read_List (LT : Types.List_Types)
fbad73cb 251 return Types.Mal_Handle is
acb927d4 252
6a6d21b8 253 use Types;
13ce1681 254 List_SP, MTA, Params, Expr, Close_Lambda : Mal_Handle;
acb927d4 255 List_P : List_Ptr;
9cbc9695 256 Close : String (1..1) := (1 => Types.Closing (LT));
acb927d4 257
673ee61c 258 begin
acb927d4
CM
259
260 List_SP := New_List_Mal_Type (List_Type => LT);
261
262 -- Need to append to a variable so...
263 List_P := Deref_List (List_SP);
264
13ce1681
CM
265 MTA := Read_Form;
266
267 if Deref (MTA).Sym_Type = Atom and then
268 Deref_Atom (MTA).Get_Atom = "fn*" then
269
270 Params := Read_Form;
271 Expr := Read_Form;
272 Close_Lambda := Read_Form; -- the ) at the end of the lambda
273 return New_Lambda_Mal_Type (Params, Expr);
274
275 else
276
277 loop
278 exit when Is_Null (MTA) or else
279 (Deref (MTA).Sym_Type = Atom and then
280 Atom_Mal_Type (Deref (MTA).all).Get_Atom = Close);
281 Append (List_P.all, MTA);
282 MTA := Read_Form;
283 end loop;
284 return List_SP;
285 end if;
286
7895cb30
CM
287 exception
288 when Lexical_Error =>
0429a8c1 289
fbad73cb 290 -- List_MT about to go out of scope but its a Mal_Handle
0429a8c1
CM
291 -- so it is automatically garbage collected.
292
acb927d4
CM
293 return New_Error_Mal_Type (Str => "expected '" & Close & "'");
294
673ee61c
CM
295 end Read_List;
296
297
fbad73cb 298 function Read_Form return Types.Mal_Handle is
673ee61c 299 use Types;
fbad73cb 300 MTS : Mal_Handle;
673ee61c 301 begin
6a6d21b8 302
0429a8c1 303 MTS := Get_Token;
6a6d21b8 304
acb927d4
CM
305 if Is_Null (MTS) then
306 return Smart_Pointers.Null_Smart_Pointer;
6a6d21b8
CM
307 end if;
308
9cbc9695
CM
309 if Deref (MTS).Sym_Type = Atom then
310
311 declare
312 Symbol : String := Atom_Mal_Type (Deref (MTS).all).Get_Atom;
313 begin
314 -- Listy things and quoting...
315 if Symbol = "(" then
316 return Read_List (List_List);
317 elsif Symbol = "[" then
318 return Read_List (Vector_List);
319 elsif Symbol = "{" then
320 return Read_List (Hashed_List);
321 elsif Symbol = "^" then
acb927d4 322 declare
9cbc9695 323 Meta, Obj : Mal_Handle;
acb927d4 324 begin
9cbc9695
CM
325 Meta := Read_Form;
326 Obj := Read_Form;
327 declare
328 MT : Mal_Ptr := Deref (Obj);
329 begin
330 Set_Meta (MT.all, Meta);
331 end;
332 return Obj;
acb927d4 333 end;
9cbc9695
CM
334 elsif Symbol = ACL.Apostrophe & "" then
335 return New_Unitary_Mal_Type (Func => Quote, Op => Read_Form);
336 elsif Symbol = ACL.Grave & "" then
337 return New_Unitary_Mal_Type (Func => Quasiquote, Op => Read_Form);
338 elsif Symbol = ACL.Tilde & "" then
339 return New_Unitary_Mal_Type (Func => Unquote, Op => Read_Form);
340 elsif Symbol = ACL.Commercial_At & "" then
341 return New_Unitary_Mal_Type (Func => Deref, Op => Read_Form);
342 else
343 return MTS;
344 end if;
345 end;
6a6d21b8 346
acb927d4
CM
347 elsif Deref(MTS).Sym_Type = Unitary and then
348 Unitary_Mal_Type (Deref (MTS).all).Get_Func = Splice_Unquote then
7895cb30 349
acb927d4 350 return New_Unitary_Mal_Type (Func => Splice_Unquote, Op => Read_Form);
7895cb30 351
673ee61c 352 else
0429a8c1 353 return MTS;
673ee61c 354 end if;
6a6d21b8 355
7895cb30
CM
356 exception
357 when String_Error =>
acb927d4 358 return New_Error_Mal_Type (Str => "expected '""'");
673ee61c
CM
359 end Read_Form;
360
361
fbad73cb 362 function Read_Str (S : String) return Types.Mal_Handle is
d32be19b 363 I, Str_Len : Natural := S'Length;
673ee61c 364 begin
d32be19b
CM
365 -- Filter out lines consisting of only whitespace and/or comments
366 I := 1;
367 while I <= Str_Len and then
368 Ada.Strings.Maps.Is_In (S (I), Lisp_Whitespace) loop
369 I := I + 1;
370 end loop;
371 if I > Str_Len or else S (I) = ';' then
acb927d4 372 return Smart_Pointers.Null_Smart_Pointer;
d32be19b
CM
373 end if;
374
673ee61c
CM
375 Analyzer.Reset;
376 Input_Feeder.Set (S);
7895cb30 377 Saved_Line (1..S'Length) := S; -- Needed for error recovery
673ee61c 378 return Read_Form;
673ee61c
CM
379 end Read_Str;
380
381
382end Reader;