Commit | Line | Data |
---|---|---|
673ee61c CM |
1 | with Ada.IO_Exceptions; |
2 | with Ada.Characters.Latin_1; | |
3 | with Ada.Strings.Maps.Constants; | |
4 | with Ada.Strings.Unbounded; | |
5 | with Ada.Text_IO; | |
6 | with Opentoken.Recognizer.Character_Set; | |
7 | with Opentoken.Recognizer.Identifier; | |
8 | with Opentoken.Recognizer.Integer; | |
9 | with Opentoken.Recognizer.Keyword; | |
10 | with Opentoken.Recognizer.Line_Comment; | |
c0f15c87 | 11 | with Opentoken.Recognizer.Real; |
673ee61c CM |
12 | with Opentoken.Recognizer.Separator; |
13 | with Opentoken.Recognizer.Single_Character_Set; | |
14 | with Opentoken.Recognizer.String; | |
15 | with OpenToken.Text_Feeder.String; | |
16 | with Opentoken.Token.Enumerated.Analyzer; | |
17 | ||
18 | package body Reader is | |
19 | ||
6a6d21b8 CM |
20 | package ACL renames Ada.Characters.Latin_1; |
21 | ||
c0f15c87 | 22 | type Lexemes is (Int, Float_Tok, Sym, |
6a6d21b8 | 23 | Nil, True_Tok, False_Tok, Exp_Tok, Splice_Unq, |
673ee61c CM |
24 | Str, Atom, |
25 | Whitespace, Comment); | |
26 | ||
27 | package Lisp_Tokens is new Opentoken.Token.Enumerated (Lexemes); | |
28 | package Tokenizer is new Lisp_Tokens.Analyzer; | |
29 | ||
30 | Exp_Recognizer : constant Tokenizer.Recognizable_Token := | |
31 | Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("**")); | |
32 | ||
6a6d21b8 CM |
33 | Splice_Unq_Recognizer : constant Tokenizer.Recognizable_Token := |
34 | Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("~@")); | |
35 | ||
673ee61c CM |
36 | Nil_Recognizer : constant Tokenizer.Recognizable_Token := |
37 | Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("nil")); | |
38 | ||
39 | True_Recognizer : constant Tokenizer.Recognizable_Token := | |
40 | Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("true")); | |
41 | ||
42 | False_Recognizer : constant Tokenizer.Recognizable_Token := | |
11714407 | 43 | Tokenizer.Get (Opentoken.Recognizer.Keyword.Get ("false")); |
673ee61c CM |
44 | |
45 | ID_Recognizer : constant Tokenizer.Recognizable_Token := | |
46 | Tokenizer.Get(Opentoken.Recognizer.Identifier.Get); | |
47 | ||
48 | Int_Recognizer : constant Tokenizer.Recognizable_Token := | |
49 | Tokenizer.Get(Opentoken.Recognizer.Integer.Get); | |
50 | ||
c0f15c87 CM |
51 | Float_Recognizer : constant Tokenizer.Recognizable_Token := |
52 | Tokenizer.Get(Opentoken.Recognizer.Real.Get); | |
53 | ||
11714407 CM |
54 | -- Use the C style for escaped strings. |
55 | String_Recognizer : constant Tokenizer.Recognizable_Token := | |
56 | Tokenizer.Get | |
57 | (Opentoken.Recognizer.String.Get | |
58 | (Escapeable => True, | |
59 | Double_Delimiter => False)); | |
673ee61c CM |
60 | |
61 | -- Atom definition | |
c140811a | 62 | -- Note Start_Chars includes : for keywords. |
673ee61c | 63 | Start_Chars : Ada.Strings.Maps.Character_Set := |
c140811a CM |
64 | Ada.Strings.Maps."or" |
65 | (Ada.Strings.Maps.Constants.Letter_Set, | |
66 | Ada.Strings.Maps.To_Set (':')); | |
673ee61c CM |
67 | |
68 | Body_Chars : Ada.Strings.Maps.Character_Set := | |
69 | Ada.Strings.Maps."or" | |
70 | (Ada.Strings.Maps.Constants.Alphanumeric_Set, | |
71 | Ada.Strings.Maps.To_Set ('-')); | |
72 | ||
73 | Atom_Recognizer : constant Tokenizer.Recognizable_Token := | |
74 | Tokenizer.Get | |
75 | (Opentoken.Recognizer.Identifier.Get (Start_Chars, Body_Chars)); | |
76 | ||
77 | Lisp_Syms : constant Ada.Strings.Maps.Character_Set := | |
78 | Ada.Strings.Maps.To_Set ("[]{}()'`~^@+-*/"); | |
79 | ||
80 | Sym_Recognizer : constant Tokenizer.Recognizable_Token := | |
81 | Tokenizer.Get (Opentoken.Recognizer.Single_Character_Set.Get (Lisp_Syms)); | |
82 | ||
83 | Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set := | |
6a6d21b8 | 84 | Ada.Strings.Maps.To_Set (ACL.HT & ACL.Space & ACL.Comma); |
673ee61c CM |
85 | |
86 | Whitesp_Recognizer : constant Tokenizer.Recognizable_Token := | |
87 | Tokenizer.Get (Opentoken.Recognizer.Character_Set.Get (Lisp_Whitespace)); | |
88 | ||
89 | Comment_Recognizer : constant Tokenizer.Recognizable_Token := | |
90 | Tokenizer.Get(Opentoken.Recognizer.Line_Comment.Get (";")); | |
91 | ||
92 | Syntax : constant Tokenizer.Syntax := | |
6a6d21b8 | 93 | (Int => Int_Recognizer, |
c0f15c87 | 94 | Float_Tok => Float_Recognizer, |
6a6d21b8 CM |
95 | Sym => Sym_Recognizer, |
96 | Nil => Nil_Recognizer, | |
97 | True_Tok => True_Recognizer, | |
98 | False_Tok => False_Recognizer, | |
99 | Exp_Tok => Exp_Recognizer, | |
100 | Splice_Unq => Splice_Unq_Recognizer, | |
101 | Str => String_Recognizer, | |
102 | Atom => Atom_Recognizer, | |
103 | Whitespace => Whitesp_Recognizer, | |
104 | Comment => Comment_Recognizer); | |
673ee61c CM |
105 | |
106 | Input_Feeder : aliased OpenToken.Text_Feeder.String.Instance; | |
107 | ||
108 | Analyzer : Tokenizer.Instance := | |
109 | Tokenizer.Initialize (Syntax, Input_Feeder'access); | |
110 | ||
111 | ||
112 | function Get_Token_String return String is | |
113 | begin | |
114 | return Tokenizer.Lexeme (Analyzer); | |
115 | end Get_Token_String; | |
116 | ||
117 | ||
118 | function Get_Token_Char return Character is | |
119 | S : String := Tokenizer.Lexeme (Analyzer); | |
120 | begin | |
121 | return S(S'First); | |
122 | end Get_Token_Char; | |
123 | ||
124 | ||
125 | function Get_Token return Types.Mal_Type_Access is | |
126 | Res : Types.Mal_Type_Access; | |
127 | begin | |
128 | Tokenizer.Find_Next (Analyzer); | |
129 | case Tokenizer.ID (Analyzer) is | |
130 | when Int => | |
131 | Res := new Types.Mal_Type' | |
132 | (Sym_Type => Types.Int, | |
133 | Int_Val => Integer'Value (Get_Token_String)); | |
c0f15c87 CM |
134 | when Float_Tok => |
135 | Res := new Types.Mal_Type' | |
136 | (Sym_Type => Types.Floating, | |
137 | Float_Val => Float'Value (Get_Token_String)); | |
673ee61c CM |
138 | when Sym => |
139 | Res := new Types.Mal_Type' | |
140 | (Sym_Type => Types.Sym, Symbol => Get_Token_Char); | |
141 | when Nil => | |
142 | Res := new Types.Mal_Type' | |
143 | (Sym_Type => Types.Atom, | |
144 | The_Atom => Ada.Strings.Unbounded.To_Unbounded_String | |
145 | (Get_Token_String)); | |
146 | when True_Tok => | |
147 | Res := new Types.Mal_Type' | |
148 | (Sym_Type => Types.Atom, | |
149 | The_Atom => Ada.Strings.Unbounded.To_Unbounded_String | |
150 | (Get_Token_String)); | |
151 | when False_Tok => | |
152 | Res := new Types.Mal_Type' | |
153 | (Sym_Type => Types.Atom, | |
154 | The_Atom => Ada.Strings.Unbounded.To_Unbounded_String | |
155 | (Get_Token_String)); | |
156 | when Exp_Tok => | |
157 | Res := new Types.Mal_Type' | |
158 | (Sym_Type => Types.Atom, | |
159 | The_Atom => Ada.Strings.Unbounded.To_Unbounded_String | |
160 | (Get_Token_String)); | |
6a6d21b8 CM |
161 | when Splice_Unq => |
162 | Res := new Types.Mal_Type' | |
163 | (Sym_Type => Types.Unitary, | |
164 | The_Function => Types.Splice_Unquote, | |
165 | The_Operand => null); | |
673ee61c CM |
166 | when Str => |
167 | Res := new Types.Mal_Type' | |
168 | (Sym_Type => Types.Str, | |
169 | The_String => Ada.Strings.Unbounded.To_Unbounded_String | |
170 | (Get_Token_String)); | |
171 | when Atom => | |
172 | Res := new Types.Mal_Type' | |
173 | (Sym_Type => Types.Atom, | |
174 | The_Atom => Ada.Strings.Unbounded.To_Unbounded_String | |
175 | (Get_Token_String)); | |
176 | when Whitespace | Comment => null; | |
177 | end case; | |
178 | return Res; | |
179 | end Get_Token; | |
180 | ||
181 | ||
182 | -- Parsing | |
183 | function Read_Form return Types.Mal_Type_Access; | |
184 | ||
e808554d CM |
185 | function Read_List (LT : Types.List_Types) |
186 | return Types.Mal_Type_Access is | |
6a6d21b8 CM |
187 | use Types; |
188 | List_MT, MTA : Mal_Type_Access; | |
e808554d | 189 | Close : Character := Types.Closing (LT); |
673ee61c | 190 | begin |
6a6d21b8 CM |
191 | List_MT := new Mal_Type' |
192 | (Sym_Type => List, | |
e808554d | 193 | List_Type => LT, |
6a6d21b8 | 194 | The_List => Lists.Empty_List); |
673ee61c CM |
195 | loop |
196 | MTA := Read_Form; | |
197 | exit when MTA = null or else | |
e808554d | 198 | MTA.all = (Sym_Type => Sym, Symbol => Close); |
6a6d21b8 | 199 | Lists.Append (List_MT.The_List, MTA); |
673ee61c CM |
200 | end loop; |
201 | return List_MT; | |
202 | end Read_List; | |
203 | ||
204 | ||
205 | function Read_Form return Types.Mal_Type_Access is | |
206 | use Types; | |
207 | MT : Types.Mal_Type_Access; | |
208 | begin | |
6a6d21b8 | 209 | |
673ee61c | 210 | MT := Get_Token; |
6a6d21b8 CM |
211 | |
212 | if MT = null then | |
213 | return null; | |
214 | end if; | |
215 | ||
216 | if MT.Sym_Type = Sym then | |
217 | ||
218 | if MT.Symbol = '(' then | |
e808554d CM |
219 | return Read_List (List_List); |
220 | elsif MT.Symbol = '[' then | |
221 | return Read_List (Vector_List); | |
222 | elsif MT.Symbol = '{' then | |
223 | return Read_List (Hashed_List); | |
6a6d21b8 CM |
224 | elsif MT.Symbol = ACL.Apostrophe then |
225 | return new Mal_Type' | |
226 | (Sym_Type => Unitary, | |
227 | The_Function => Quote, | |
228 | The_Operand => Read_Form); | |
229 | elsif MT.Symbol = ACL.Grave then | |
230 | return new Mal_Type' | |
231 | (Sym_Type => Unitary, | |
232 | The_Function => Quasiquote, | |
233 | The_Operand => Read_Form); | |
234 | elsif MT.Symbol = ACL.Tilde then | |
235 | return new Mal_Type' | |
236 | (Sym_Type => Unitary, | |
237 | The_Function => Unquote, | |
238 | The_Operand => Read_Form); | |
239 | else | |
240 | return MT; | |
241 | end if; | |
242 | ||
243 | elsif MT.Sym_Type = Unitary and then | |
244 | MT.The_Function = Splice_Unquote then | |
245 | return new Mal_Type' | |
246 | (Sym_Type => Unitary, | |
247 | The_Function => Splice_Unquote, | |
248 | The_Operand => Read_Form); | |
673ee61c CM |
249 | else |
250 | return MT; | |
251 | end if; | |
6a6d21b8 | 252 | |
673ee61c CM |
253 | end Read_Form; |
254 | ||
255 | ||
256 | function Read_Str (S : String) return Types.Mal_Type_Access is | |
257 | begin | |
258 | Analyzer.Reset; | |
259 | Input_Feeder.Set (S); | |
260 | return Read_Form; | |
261 | exception | |
262 | when OPENTOKEN.SYNTAX_ERROR => | |
263 | Ada.Text_IO.Put_Line | |
264 | (Ada.Text_IO.Standard_Error, | |
265 | "Lexical error at char " & Integer'Image (Analyzer.Line)); | |
266 | raise Ada.IO_Exceptions.End_Error; | |
267 | end Read_Str; | |
268 | ||
269 | ||
270 | end Reader; |