Commit | Line | Data |
---|---|---|
673ee61c CM |
1 | with Ada.IO_Exceptions; |
2 | with Ada.Characters.Latin_1; | |
3 | with Ada.Strings.Maps.Constants; | |
4 | with Ada.Strings.Unbounded; | |
5 | with Ada.Text_IO; | |
6 | with Opentoken.Recognizer.Character_Set; | |
7 | with Opentoken.Recognizer.Identifier; | |
8 | with Opentoken.Recognizer.Integer; | |
9 | with Opentoken.Recognizer.Keyword; | |
10 | with Opentoken.Recognizer.Line_Comment; | |
11 | with Opentoken.Recognizer.Separator; | |
12 | with Opentoken.Recognizer.Single_Character_Set; | |
13 | with Opentoken.Recognizer.String; | |
14 | with OpenToken.Text_Feeder.String; | |
15 | with Opentoken.Token.Enumerated.Analyzer; | |
16 | ||
17 | package body Reader is | |
18 | ||
6a6d21b8 CM |
19 | package ACL renames Ada.Characters.Latin_1; |
20 | ||
673ee61c | 21 | type Lexemes is (Int, Sym, |
6a6d21b8 | 22 | Nil, True_Tok, False_Tok, Exp_Tok, Splice_Unq, |
673ee61c CM |
23 | Str, Atom, |
24 | Whitespace, Comment); | |
25 | ||
26 | package Lisp_Tokens is new Opentoken.Token.Enumerated (Lexemes); | |
27 | package Tokenizer is new Lisp_Tokens.Analyzer; | |
28 | ||
29 | Exp_Recognizer : constant Tokenizer.Recognizable_Token := | |
30 | Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("**")); | |
31 | ||
6a6d21b8 CM |
32 | Splice_Unq_Recognizer : constant Tokenizer.Recognizable_Token := |
33 | Tokenizer.Get(Opentoken.Recognizer.Separator.Get ("~@")); | |
34 | ||
673ee61c CM |
35 | Nil_Recognizer : constant Tokenizer.Recognizable_Token := |
36 | Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("nil")); | |
37 | ||
38 | True_Recognizer : constant Tokenizer.Recognizable_Token := | |
39 | Tokenizer.Get(Opentoken.Recognizer.Keyword.Get ("true")); | |
40 | ||
41 | False_Recognizer : constant Tokenizer.Recognizable_Token := | |
11714407 | 42 | Tokenizer.Get (Opentoken.Recognizer.Keyword.Get ("false")); |
673ee61c CM |
43 | |
44 | ID_Recognizer : constant Tokenizer.Recognizable_Token := | |
45 | Tokenizer.Get(Opentoken.Recognizer.Identifier.Get); | |
46 | ||
47 | Int_Recognizer : constant Tokenizer.Recognizable_Token := | |
48 | Tokenizer.Get(Opentoken.Recognizer.Integer.Get); | |
49 | ||
11714407 CM |
50 | -- Use the C style for escaped strings. |
51 | String_Recognizer : constant Tokenizer.Recognizable_Token := | |
52 | Tokenizer.Get | |
53 | (Opentoken.Recognizer.String.Get | |
54 | (Escapeable => True, | |
55 | Double_Delimiter => False)); | |
673ee61c CM |
56 | |
57 | -- Atom definition | |
58 | Start_Chars : Ada.Strings.Maps.Character_Set := | |
59 | Ada.Strings.Maps.Constants.Letter_Set; | |
60 | ||
61 | Body_Chars : Ada.Strings.Maps.Character_Set := | |
62 | Ada.Strings.Maps."or" | |
63 | (Ada.Strings.Maps.Constants.Alphanumeric_Set, | |
64 | Ada.Strings.Maps.To_Set ('-')); | |
65 | ||
66 | Atom_Recognizer : constant Tokenizer.Recognizable_Token := | |
67 | Tokenizer.Get | |
68 | (Opentoken.Recognizer.Identifier.Get (Start_Chars, Body_Chars)); | |
69 | ||
70 | Lisp_Syms : constant Ada.Strings.Maps.Character_Set := | |
71 | Ada.Strings.Maps.To_Set ("[]{}()'`~^@+-*/"); | |
72 | ||
73 | Sym_Recognizer : constant Tokenizer.Recognizable_Token := | |
74 | Tokenizer.Get (Opentoken.Recognizer.Single_Character_Set.Get (Lisp_Syms)); | |
75 | ||
76 | Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set := | |
6a6d21b8 | 77 | Ada.Strings.Maps.To_Set (ACL.HT & ACL.Space & ACL.Comma); |
673ee61c CM |
78 | |
79 | Whitesp_Recognizer : constant Tokenizer.Recognizable_Token := | |
80 | Tokenizer.Get (Opentoken.Recognizer.Character_Set.Get (Lisp_Whitespace)); | |
81 | ||
82 | Comment_Recognizer : constant Tokenizer.Recognizable_Token := | |
83 | Tokenizer.Get(Opentoken.Recognizer.Line_Comment.Get (";")); | |
84 | ||
85 | Syntax : constant Tokenizer.Syntax := | |
6a6d21b8 CM |
86 | (Int => Int_Recognizer, |
87 | Sym => Sym_Recognizer, | |
88 | Nil => Nil_Recognizer, | |
89 | True_Tok => True_Recognizer, | |
90 | False_Tok => False_Recognizer, | |
91 | Exp_Tok => Exp_Recognizer, | |
92 | Splice_Unq => Splice_Unq_Recognizer, | |
93 | Str => String_Recognizer, | |
94 | Atom => Atom_Recognizer, | |
95 | Whitespace => Whitesp_Recognizer, | |
96 | Comment => Comment_Recognizer); | |
673ee61c CM |
97 | |
98 | Input_Feeder : aliased OpenToken.Text_Feeder.String.Instance; | |
99 | ||
100 | Analyzer : Tokenizer.Instance := | |
101 | Tokenizer.Initialize (Syntax, Input_Feeder'access); | |
102 | ||
103 | ||
104 | function Get_Token_String return String is | |
105 | begin | |
106 | return Tokenizer.Lexeme (Analyzer); | |
107 | end Get_Token_String; | |
108 | ||
109 | ||
110 | function Get_Token_Char return Character is | |
111 | S : String := Tokenizer.Lexeme (Analyzer); | |
112 | begin | |
113 | return S(S'First); | |
114 | end Get_Token_Char; | |
115 | ||
116 | ||
117 | function Get_Token return Types.Mal_Type_Access is | |
118 | Res : Types.Mal_Type_Access; | |
119 | begin | |
120 | Tokenizer.Find_Next (Analyzer); | |
121 | case Tokenizer.ID (Analyzer) is | |
122 | when Int => | |
123 | Res := new Types.Mal_Type' | |
124 | (Sym_Type => Types.Int, | |
125 | Int_Val => Integer'Value (Get_Token_String)); | |
126 | when Sym => | |
127 | Res := new Types.Mal_Type' | |
128 | (Sym_Type => Types.Sym, Symbol => Get_Token_Char); | |
129 | when Nil => | |
130 | Res := new Types.Mal_Type' | |
131 | (Sym_Type => Types.Atom, | |
132 | The_Atom => Ada.Strings.Unbounded.To_Unbounded_String | |
133 | (Get_Token_String)); | |
134 | when True_Tok => | |
135 | Res := new Types.Mal_Type' | |
136 | (Sym_Type => Types.Atom, | |
137 | The_Atom => Ada.Strings.Unbounded.To_Unbounded_String | |
138 | (Get_Token_String)); | |
139 | when False_Tok => | |
140 | Res := new Types.Mal_Type' | |
141 | (Sym_Type => Types.Atom, | |
142 | The_Atom => Ada.Strings.Unbounded.To_Unbounded_String | |
143 | (Get_Token_String)); | |
144 | when Exp_Tok => | |
145 | Res := new Types.Mal_Type' | |
146 | (Sym_Type => Types.Atom, | |
147 | The_Atom => Ada.Strings.Unbounded.To_Unbounded_String | |
148 | (Get_Token_String)); | |
6a6d21b8 CM |
149 | when Splice_Unq => |
150 | Res := new Types.Mal_Type' | |
151 | (Sym_Type => Types.Unitary, | |
152 | The_Function => Types.Splice_Unquote, | |
153 | The_Operand => null); | |
673ee61c CM |
154 | when Str => |
155 | Res := new Types.Mal_Type' | |
156 | (Sym_Type => Types.Str, | |
157 | The_String => Ada.Strings.Unbounded.To_Unbounded_String | |
158 | (Get_Token_String)); | |
159 | when Atom => | |
160 | Res := new Types.Mal_Type' | |
161 | (Sym_Type => Types.Atom, | |
162 | The_Atom => Ada.Strings.Unbounded.To_Unbounded_String | |
163 | (Get_Token_String)); | |
164 | when Whitespace | Comment => null; | |
165 | end case; | |
166 | return Res; | |
167 | end Get_Token; | |
168 | ||
169 | ||
170 | -- Parsing | |
171 | function Read_Form return Types.Mal_Type_Access; | |
172 | ||
173 | function Read_List return Types.Mal_Type_Access is | |
6a6d21b8 CM |
174 | use Types; |
175 | List_MT, MTA : Mal_Type_Access; | |
673ee61c | 176 | begin |
6a6d21b8 CM |
177 | List_MT := new Mal_Type' |
178 | (Sym_Type => List, | |
179 | The_List => Lists.Empty_List); | |
673ee61c CM |
180 | loop |
181 | MTA := Read_Form; | |
182 | exit when MTA = null or else | |
183 | MTA.all = (Sym_Type => Sym, Symbol => ')'); | |
6a6d21b8 | 184 | Lists.Append (List_MT.The_List, MTA); |
673ee61c CM |
185 | end loop; |
186 | return List_MT; | |
187 | end Read_List; | |
188 | ||
189 | ||
190 | function Read_Form return Types.Mal_Type_Access is | |
191 | use Types; | |
192 | MT : Types.Mal_Type_Access; | |
193 | begin | |
6a6d21b8 | 194 | |
673ee61c | 195 | MT := Get_Token; |
6a6d21b8 CM |
196 | |
197 | if MT = null then | |
198 | return null; | |
199 | end if; | |
200 | ||
201 | if MT.Sym_Type = Sym then | |
202 | ||
203 | if MT.Symbol = '(' then | |
204 | return Read_List; | |
205 | elsif MT.Symbol = ACL.Apostrophe then | |
206 | return new Mal_Type' | |
207 | (Sym_Type => Unitary, | |
208 | The_Function => Quote, | |
209 | The_Operand => Read_Form); | |
210 | elsif MT.Symbol = ACL.Grave then | |
211 | return new Mal_Type' | |
212 | (Sym_Type => Unitary, | |
213 | The_Function => Quasiquote, | |
214 | The_Operand => Read_Form); | |
215 | elsif MT.Symbol = ACL.Tilde then | |
216 | return new Mal_Type' | |
217 | (Sym_Type => Unitary, | |
218 | The_Function => Unquote, | |
219 | The_Operand => Read_Form); | |
220 | else | |
221 | return MT; | |
222 | end if; | |
223 | ||
224 | elsif MT.Sym_Type = Unitary and then | |
225 | MT.The_Function = Splice_Unquote then | |
226 | return new Mal_Type' | |
227 | (Sym_Type => Unitary, | |
228 | The_Function => Splice_Unquote, | |
229 | The_Operand => Read_Form); | |
673ee61c CM |
230 | else |
231 | return MT; | |
232 | end if; | |
6a6d21b8 | 233 | |
673ee61c CM |
234 | end Read_Form; |
235 | ||
236 | ||
237 | function Read_Str (S : String) return Types.Mal_Type_Access is | |
238 | begin | |
239 | Analyzer.Reset; | |
240 | Input_Feeder.Set (S); | |
241 | return Read_Form; | |
242 | exception | |
243 | when OPENTOKEN.SYNTAX_ERROR => | |
244 | Ada.Text_IO.Put_Line | |
245 | (Ada.Text_IO.Standard_Error, | |
246 | "Lexical error at char " & Integer'Image (Analyzer.Line)); | |
247 | raise Ada.IO_Exceptions.End_Error; | |
248 | end Read_Str; | |
249 | ||
250 | ||
251 | end Reader; |