Ada: add keywords (well atoms beginning with :)
[jackhill/mal.git] / ada / reader.adb
CommitLineData
673ee61c
CM
1with Ada.IO_Exceptions;
2with Ada.Characters.Latin_1;
3with Ada.Strings.Maps.Constants;
4with Ada.Strings.Unbounded;
5with Ada.Text_IO;
6with Opentoken.Recognizer.Character_Set;
7with Opentoken.Recognizer.Identifier;
8with Opentoken.Recognizer.Integer;
9with Opentoken.Recognizer.Keyword;
10with Opentoken.Recognizer.Line_Comment;
c0f15c87 11with Opentoken.Recognizer.Real;
673ee61c
CM
12with Opentoken.Recognizer.Separator;
13with Opentoken.Recognizer.Single_Character_Set;
14with Opentoken.Recognizer.String;
15with OpenToken.Text_Feeder.String;
16with Opentoken.Token.Enumerated.Analyzer;
17
18package body Reader is
19
6a6d21b8
CM
20 package ACL renames Ada.Characters.Latin_1;
21
c0f15c87 22 type Lexemes is (Int, Float_Tok, Sym,
6a6d21b8 23 Nil, True_Tok, False_Tok, Exp_Tok, Splice_Unq,
673ee61c
CM
24 Str, Atom,
25 Whitespace, Comment);
26
27 package Lisp_Tokens is new Opentoken.Token.Enumerated (Lexemes);
28 package Tokenizer is new Lisp_Tokens.Analyzer;
29
30 Exp_Recognizer : constant Tokenizer.Recognizable_Token :=
31 Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("**"));
32
6a6d21b8
CM
33 Splice_Unq_Recognizer : constant Tokenizer.Recognizable_Token :=
34 Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("~@"));
35
673ee61c
CM
36 Nil_Recognizer : constant Tokenizer.Recognizable_Token :=
37 Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("nil"));
38
39 True_Recognizer : constant Tokenizer.Recognizable_Token :=
40 Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("true"));
41
42 False_Recognizer : constant Tokenizer.Recognizable_Token :=
11714407 43 Tokenizer.Get (Opentoken.Recognizer.Keyword.Get ("false"));
673ee61c
CM
44
45 ID_Recognizer : constant Tokenizer.Recognizable_Token :=
46 Tokenizer.Get(Opentoken.Recognizer.Identifier.Get);
47
48 Int_Recognizer : constant Tokenizer.Recognizable_Token :=
49 Tokenizer.Get(Opentoken.Recognizer.Integer.Get);
50
c0f15c87
CM
51 Float_Recognizer : constant Tokenizer.Recognizable_Token :=
52 Tokenizer.Get(Opentoken.Recognizer.Real.Get);
53
11714407
CM
54 -- Use the C style for escaped strings.
55 String_Recognizer : constant Tokenizer.Recognizable_Token :=
56 Tokenizer.Get
57 (Opentoken.Recognizer.String.Get
58 (Escapeable => True,
59 Double_Delimiter => False));
673ee61c
CM
60
61 -- Atom definition
c140811a 62 -- Note Start_Chars includes : for keywords.
673ee61c 63 Start_Chars : Ada.Strings.Maps.Character_Set :=
c140811a
CM
64 Ada.Strings.Maps."or"
65 (Ada.Strings.Maps.Constants.Letter_Set,
66 Ada.Strings.Maps.To_Set (':'));
673ee61c
CM
67
68 Body_Chars : Ada.Strings.Maps.Character_Set :=
69 Ada.Strings.Maps."or"
70 (Ada.Strings.Maps.Constants.Alphanumeric_Set,
71 Ada.Strings.Maps.To_Set ('-'));
72
73 Atom_Recognizer : constant Tokenizer.Recognizable_Token :=
74 Tokenizer.Get
75 (Opentoken.Recognizer.Identifier.Get (Start_Chars, Body_Chars));
76
77 Lisp_Syms : constant Ada.Strings.Maps.Character_Set :=
78 Ada.Strings.Maps.To_Set ("[]{}()'`~^@+-*/");
79
80 Sym_Recognizer : constant Tokenizer.Recognizable_Token :=
81 Tokenizer.Get (Opentoken.Recognizer.Single_Character_Set.Get (Lisp_Syms));
82
83 Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set :=
6a6d21b8 84 Ada.Strings.Maps.To_Set (ACL.HT & ACL.Space & ACL.Comma);
673ee61c
CM
85
86 Whitesp_Recognizer : constant Tokenizer.Recognizable_Token :=
87 Tokenizer.Get (Opentoken.Recognizer.Character_Set.Get (Lisp_Whitespace));
88
89 Comment_Recognizer : constant Tokenizer.Recognizable_Token :=
90 Tokenizer.Get(Opentoken.Recognizer.Line_Comment.Get (";"));
91
92 Syntax : constant Tokenizer.Syntax :=
6a6d21b8 93 (Int => Int_Recognizer,
c0f15c87 94 Float_Tok => Float_Recognizer,
6a6d21b8
CM
95 Sym => Sym_Recognizer,
96 Nil => Nil_Recognizer,
97 True_Tok => True_Recognizer,
98 False_Tok => False_Recognizer,
99 Exp_Tok => Exp_Recognizer,
100 Splice_Unq => Splice_Unq_Recognizer,
101 Str => String_Recognizer,
102 Atom => Atom_Recognizer,
103 Whitespace => Whitesp_Recognizer,
104 Comment => Comment_Recognizer);
673ee61c
CM
105
106 Input_Feeder : aliased OpenToken.Text_Feeder.String.Instance;
107
108 Analyzer : Tokenizer.Instance :=
109 Tokenizer.Initialize (Syntax, Input_Feeder'access);
110
111
112 function Get_Token_String return String is
113 begin
114 return Tokenizer.Lexeme (Analyzer);
115 end Get_Token_String;
116
117
118 function Get_Token_Char return Character is
119 S : String := Tokenizer.Lexeme (Analyzer);
120 begin
121 return S(S'First);
122 end Get_Token_Char;
123
124
125 function Get_Token return Types.Mal_Type_Access is
126 Res : Types.Mal_Type_Access;
127 begin
128 Tokenizer.Find_Next (Analyzer);
129 case Tokenizer.ID (Analyzer) is
130 when Int =>
131 Res := new Types.Mal_Type'
132 (Sym_Type => Types.Int,
133 Int_Val => Integer'Value (Get_Token_String));
c0f15c87
CM
134 when Float_Tok =>
135 Res := new Types.Mal_Type'
136 (Sym_Type => Types.Floating,
137 Float_Val => Float'Value (Get_Token_String));
673ee61c
CM
138 when Sym =>
139 Res := new Types.Mal_Type'
140 (Sym_Type => Types.Sym, Symbol => Get_Token_Char);
141 when Nil =>
142 Res := new Types.Mal_Type'
143 (Sym_Type => Types.Atom,
144 The_Atom => Ada.Strings.Unbounded.To_Unbounded_String
145 (Get_Token_String));
146 when True_Tok =>
147 Res := new Types.Mal_Type'
148 (Sym_Type => Types.Atom,
149 The_Atom => Ada.Strings.Unbounded.To_Unbounded_String
150 (Get_Token_String));
151 when False_Tok =>
152 Res := new Types.Mal_Type'
153 (Sym_Type => Types.Atom,
154 The_Atom => Ada.Strings.Unbounded.To_Unbounded_String
155 (Get_Token_String));
156 when Exp_Tok =>
157 Res := new Types.Mal_Type'
158 (Sym_Type => Types.Atom,
159 The_Atom => Ada.Strings.Unbounded.To_Unbounded_String
160 (Get_Token_String));
6a6d21b8
CM
161 when Splice_Unq =>
162 Res := new Types.Mal_Type'
163 (Sym_Type => Types.Unitary,
164 The_Function => Types.Splice_Unquote,
165 The_Operand => null);
673ee61c
CM
166 when Str =>
167 Res := new Types.Mal_Type'
168 (Sym_Type => Types.Str,
169 The_String => Ada.Strings.Unbounded.To_Unbounded_String
170 (Get_Token_String));
171 when Atom =>
172 Res := new Types.Mal_Type'
173 (Sym_Type => Types.Atom,
174 The_Atom => Ada.Strings.Unbounded.To_Unbounded_String
175 (Get_Token_String));
176 when Whitespace | Comment => null;
177 end case;
178 return Res;
179 end Get_Token;
180
181
182 -- Parsing
183 function Read_Form return Types.Mal_Type_Access;
184
e808554d
CM
185 function Read_List (LT : Types.List_Types)
186 return Types.Mal_Type_Access is
6a6d21b8
CM
187 use Types;
188 List_MT, MTA : Mal_Type_Access;
e808554d 189 Close : Character := Types.Closing (LT);
673ee61c 190 begin
6a6d21b8
CM
191 List_MT := new Mal_Type'
192 (Sym_Type => List,
e808554d 193 List_Type => LT,
6a6d21b8 194 The_List => Lists.Empty_List);
673ee61c
CM
195 loop
196 MTA := Read_Form;
197 exit when MTA = null or else
e808554d 198 MTA.all = (Sym_Type => Sym, Symbol => Close);
6a6d21b8 199 Lists.Append (List_MT.The_List, MTA);
673ee61c
CM
200 end loop;
201 return List_MT;
202 end Read_List;
203
204
205 function Read_Form return Types.Mal_Type_Access is
206 use Types;
207 MT : Types.Mal_Type_Access;
208 begin
6a6d21b8 209
673ee61c 210 MT := Get_Token;
6a6d21b8
CM
211
212 if MT = null then
213 return null;
214 end if;
215
216 if MT.Sym_Type = Sym then
217
218 if MT.Symbol = '(' then
e808554d
CM
219 return Read_List (List_List);
220 elsif MT.Symbol = '[' then
221 return Read_List (Vector_List);
222 elsif MT.Symbol = '{' then
223 return Read_List (Hashed_List);
6a6d21b8
CM
224 elsif MT.Symbol = ACL.Apostrophe then
225 return new Mal_Type'
226 (Sym_Type => Unitary,
227 The_Function => Quote,
228 The_Operand => Read_Form);
229 elsif MT.Symbol = ACL.Grave then
230 return new Mal_Type'
231 (Sym_Type => Unitary,
232 The_Function => Quasiquote,
233 The_Operand => Read_Form);
234 elsif MT.Symbol = ACL.Tilde then
235 return new Mal_Type'
236 (Sym_Type => Unitary,
237 The_Function => Unquote,
238 The_Operand => Read_Form);
239 else
240 return MT;
241 end if;
242
243 elsif MT.Sym_Type = Unitary and then
244 MT.The_Function = Splice_Unquote then
245 return new Mal_Type'
246 (Sym_Type => Unitary,
247 The_Function => Splice_Unquote,
248 The_Operand => Read_Form);
673ee61c
CM
249 else
250 return MT;
251 end if;
6a6d21b8 252
673ee61c
CM
253 end Read_Form;
254
255
256 function Read_Str (S : String) return Types.Mal_Type_Access is
257 begin
258 Analyzer.Reset;
259 Input_Feeder.Set (S);
260 return Read_Form;
261 exception
262 when OPENTOKEN.SYNTAX_ERROR =>
263 Ada.Text_IO.Put_Line
264 (Ada.Text_IO.Standard_Error,
265 "Lexical error at char " & Integer'Image (Analyzer.Line));
266 raise Ada.IO_Exceptions.End_Error;
267 end Read_Str;
268
269
270end Reader;