Ada: add quoting
[jackhill/mal.git] / ada / reader.adb
CommitLineData
673ee61c
CM
1with Ada.IO_Exceptions;
2with Ada.Characters.Latin_1;
3with Ada.Strings.Maps.Constants;
4with Ada.Strings.Unbounded;
5with Ada.Text_IO;
6with Opentoken.Recognizer.Character_Set;
7with Opentoken.Recognizer.Identifier;
8with Opentoken.Recognizer.Integer;
9with Opentoken.Recognizer.Keyword;
10with Opentoken.Recognizer.Line_Comment;
11with Opentoken.Recognizer.Separator;
12with Opentoken.Recognizer.Single_Character_Set;
13with Opentoken.Recognizer.String;
14with OpenToken.Text_Feeder.String;
15with Opentoken.Token.Enumerated.Analyzer;
16
17package body Reader is
18
6a6d21b8
CM
19 package ACL renames Ada.Characters.Latin_1;
20
673ee61c 21 type Lexemes is (Int, Sym,
6a6d21b8 22 Nil, True_Tok, False_Tok, Exp_Tok, Splice_Unq,
673ee61c
CM
23 Str, Atom,
24 Whitespace, Comment);
25
26 package Lisp_Tokens is new Opentoken.Token.Enumerated (Lexemes);
27 package Tokenizer is new Lisp_Tokens.Analyzer;
28
29 Exp_Recognizer : constant Tokenizer.Recognizable_Token :=
30 Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("**"));
31
6a6d21b8
CM
32 Splice_Unq_Recognizer : constant Tokenizer.Recognizable_Token :=
33 Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("~@"));
34
673ee61c
CM
35 Nil_Recognizer : constant Tokenizer.Recognizable_Token :=
36 Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("nil"));
37
38 True_Recognizer : constant Tokenizer.Recognizable_Token :=
39 Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("true"));
40
41 False_Recognizer : constant Tokenizer.Recognizable_Token :=
11714407 42 Tokenizer.Get (Opentoken.Recognizer.Keyword.Get ("false"));
673ee61c
CM
43
44 ID_Recognizer : constant Tokenizer.Recognizable_Token :=
45 Tokenizer.Get(Opentoken.Recognizer.Identifier.Get);
46
47 Int_Recognizer : constant Tokenizer.Recognizable_Token :=
48 Tokenizer.Get(Opentoken.Recognizer.Integer.Get);
49
11714407
CM
50 -- Use the C style for escaped strings.
51 String_Recognizer : constant Tokenizer.Recognizable_Token :=
52 Tokenizer.Get
53 (Opentoken.Recognizer.String.Get
54 (Escapeable => True,
55 Double_Delimiter => False));
673ee61c
CM
56
57 -- Atom definition
58 Start_Chars : Ada.Strings.Maps.Character_Set :=
59 Ada.Strings.Maps.Constants.Letter_Set;
60
61 Body_Chars : Ada.Strings.Maps.Character_Set :=
62 Ada.Strings.Maps."or"
63 (Ada.Strings.Maps.Constants.Alphanumeric_Set,
64 Ada.Strings.Maps.To_Set ('-'));
65
66 Atom_Recognizer : constant Tokenizer.Recognizable_Token :=
67 Tokenizer.Get
68 (Opentoken.Recognizer.Identifier.Get (Start_Chars, Body_Chars));
69
70 Lisp_Syms : constant Ada.Strings.Maps.Character_Set :=
71 Ada.Strings.Maps.To_Set ("[]{}()'`~^@+-*/");
72
73 Sym_Recognizer : constant Tokenizer.Recognizable_Token :=
74 Tokenizer.Get (Opentoken.Recognizer.Single_Character_Set.Get (Lisp_Syms));
75
76 Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set :=
6a6d21b8 77 Ada.Strings.Maps.To_Set (ACL.HT & ACL.Space & ACL.Comma);
673ee61c
CM
78
79 Whitesp_Recognizer : constant Tokenizer.Recognizable_Token :=
80 Tokenizer.Get (Opentoken.Recognizer.Character_Set.Get (Lisp_Whitespace));
81
82 Comment_Recognizer : constant Tokenizer.Recognizable_Token :=
83 Tokenizer.Get(Opentoken.Recognizer.Line_Comment.Get (";"));
84
85 Syntax : constant Tokenizer.Syntax :=
6a6d21b8
CM
86 (Int => Int_Recognizer,
87 Sym => Sym_Recognizer,
88 Nil => Nil_Recognizer,
89 True_Tok => True_Recognizer,
90 False_Tok => False_Recognizer,
91 Exp_Tok => Exp_Recognizer,
92 Splice_Unq => Splice_Unq_Recognizer,
93 Str => String_Recognizer,
94 Atom => Atom_Recognizer,
95 Whitespace => Whitesp_Recognizer,
96 Comment => Comment_Recognizer);
673ee61c
CM
97
98 Input_Feeder : aliased OpenToken.Text_Feeder.String.Instance;
99
100 Analyzer : Tokenizer.Instance :=
101 Tokenizer.Initialize (Syntax, Input_Feeder'access);
102
103
104 function Get_Token_String return String is
105 begin
106 return Tokenizer.Lexeme (Analyzer);
107 end Get_Token_String;
108
109
110 function Get_Token_Char return Character is
111 S : String := Tokenizer.Lexeme (Analyzer);
112 begin
113 return S(S'First);
114 end Get_Token_Char;
115
116
117 function Get_Token return Types.Mal_Type_Access is
118 Res : Types.Mal_Type_Access;
119 begin
120 Tokenizer.Find_Next (Analyzer);
121 case Tokenizer.ID (Analyzer) is
122 when Int =>
123 Res := new Types.Mal_Type'
124 (Sym_Type => Types.Int,
125 Int_Val => Integer'Value (Get_Token_String));
126 when Sym =>
127 Res := new Types.Mal_Type'
128 (Sym_Type => Types.Sym, Symbol => Get_Token_Char);
129 when Nil =>
130 Res := new Types.Mal_Type'
131 (Sym_Type => Types.Atom,
132 The_Atom => Ada.Strings.Unbounded.To_Unbounded_String
133 (Get_Token_String));
134 when True_Tok =>
135 Res := new Types.Mal_Type'
136 (Sym_Type => Types.Atom,
137 The_Atom => Ada.Strings.Unbounded.To_Unbounded_String
138 (Get_Token_String));
139 when False_Tok =>
140 Res := new Types.Mal_Type'
141 (Sym_Type => Types.Atom,
142 The_Atom => Ada.Strings.Unbounded.To_Unbounded_String
143 (Get_Token_String));
144 when Exp_Tok =>
145 Res := new Types.Mal_Type'
146 (Sym_Type => Types.Atom,
147 The_Atom => Ada.Strings.Unbounded.To_Unbounded_String
148 (Get_Token_String));
6a6d21b8
CM
149 when Splice_Unq =>
150 Res := new Types.Mal_Type'
151 (Sym_Type => Types.Unitary,
152 The_Function => Types.Splice_Unquote,
153 The_Operand => null);
673ee61c
CM
154 when Str =>
155 Res := new Types.Mal_Type'
156 (Sym_Type => Types.Str,
157 The_String => Ada.Strings.Unbounded.To_Unbounded_String
158 (Get_Token_String));
159 when Atom =>
160 Res := new Types.Mal_Type'
161 (Sym_Type => Types.Atom,
162 The_Atom => Ada.Strings.Unbounded.To_Unbounded_String
163 (Get_Token_String));
164 when Whitespace | Comment => null;
165 end case;
166 return Res;
167 end Get_Token;
168
169
170 -- Parsing
171 function Read_Form return Types.Mal_Type_Access;
172
173 function Read_List return Types.Mal_Type_Access is
6a6d21b8
CM
174 use Types;
175 List_MT, MTA : Mal_Type_Access;
673ee61c 176 begin
6a6d21b8
CM
177 List_MT := new Mal_Type'
178 (Sym_Type => List,
179 The_List => Lists.Empty_List);
673ee61c
CM
180 loop
181 MTA := Read_Form;
182 exit when MTA = null or else
183 MTA.all = (Sym_Type => Sym, Symbol => ')');
6a6d21b8 184 Lists.Append (List_MT.The_List, MTA);
673ee61c
CM
185 end loop;
186 return List_MT;
187 end Read_List;
188
189
190 function Read_Form return Types.Mal_Type_Access is
191 use Types;
192 MT : Types.Mal_Type_Access;
193 begin
6a6d21b8 194
673ee61c 195 MT := Get_Token;
6a6d21b8
CM
196
197 if MT = null then
198 return null;
199 end if;
200
201 if MT.Sym_Type = Sym then
202
203 if MT.Symbol = '(' then
204 return Read_List;
205 elsif MT.Symbol = ACL.Apostrophe then
206 return new Mal_Type'
207 (Sym_Type => Unitary,
208 The_Function => Quote,
209 The_Operand => Read_Form);
210 elsif MT.Symbol = ACL.Grave then
211 return new Mal_Type'
212 (Sym_Type => Unitary,
213 The_Function => Quasiquote,
214 The_Operand => Read_Form);
215 elsif MT.Symbol = ACL.Tilde then
216 return new Mal_Type'
217 (Sym_Type => Unitary,
218 The_Function => Unquote,
219 The_Operand => Read_Form);
220 else
221 return MT;
222 end if;
223
224 elsif MT.Sym_Type = Unitary and then
225 MT.The_Function = Splice_Unquote then
226 return new Mal_Type'
227 (Sym_Type => Unitary,
228 The_Function => Splice_Unquote,
229 The_Operand => Read_Form);
673ee61c
CM
230 else
231 return MT;
232 end if;
6a6d21b8 233
673ee61c
CM
234 end Read_Form;
235
236
237 function Read_Str (S : String) return Types.Mal_Type_Access is
238 begin
239 Analyzer.Reset;
240 Input_Feeder.Set (S);
241 return Read_Form;
242 exception
243 when OPENTOKEN.SYNTAX_ERROR =>
244 Ada.Text_IO.Put_Line
245 (Ada.Text_IO.Standard_Error,
246 "Lexical error at char " & Integer'Image (Analyzer.Line));
247 raise Ada.IO_Exceptions.End_Error;
248 end Read_Str;
249
250
251end Reader;