Commit | Line | Data |
---|---|---|
7f918cf1 CE |
1 | (* MLton 20020329 (built Fri Mar 29 21:56:03 2002 on asv-058) *) |
2 | (* created this file on Wed Apr 3 11:12:56 2002. *) | |
3 | (* Do not edit this file. *) | |
4 | (* Flag settings: *) | |
5 | (* chunk: chunk per function *) | |
6 | (* debug: false *) | |
7 | (* defines: [] *) | |
8 | (* detect overflow: true *) | |
9 | (* drop passes: [] *) | |
10 | (* exn history: false *) | |
11 | (* fixed heap: None *) | |
12 | (* gc check: Limit *) | |
13 | (* host: self *) | |
14 | (* host type: Linux *) | |
15 | (* indentation: 3 *) | |
16 | (* includes: [mlton.h] *) | |
17 | (* inline: NonRecursive {product = 320,small = 60} *) | |
18 | (* input file: sources *) | |
19 | (* instrument: false *) | |
20 | (* instrument Sxml: false *) | |
21 | (* keepSSA: false *) | |
22 | (* keep diagnostics: [] *) | |
23 | (* keep dot: false *) | |
24 | (* keep passes: [] *) | |
25 | (* lib dir: /usr/local/lib/mlton/self *) | |
26 | (* limit check: loop headers (fullCFG = false, loopExits = true) *) | |
27 | (* limit check counts: false *) | |
28 | (* loop passes: 1 *) | |
29 | (* native: true *) | |
30 | (* native commented: 0 *) | |
31 | (* native live stack: false *) | |
32 | (* native optimize: 1 *) | |
33 | (* native move hoist: true *) | |
34 | (* native copy prop: true *) | |
35 | (* native cutoff: 100 *) | |
36 | (* native live transfer: 8 *) | |
37 | (* native future: 64 *) | |
38 | (* native ieee fp: false *) | |
39 | (* native split: Some (20000) *) | |
40 | (* new return: false *) | |
41 | (* polyvariance: Some ({rounds = 2,small = 30,product = 300}) *) | |
42 | (* print at fun entry: false *) | |
43 | (* profile: false *) | |
44 | (* safe: true *) | |
45 | (* show basis used: false *) | |
46 | (* show types: false *) | |
47 | (* stack cont: false *) | |
48 | (* static: false *) | |
49 | (* TextIO buffer size: 4096 *) | |
50 | (* type check: false *) | |
51 | (* use basis library: true *) | |
52 | (* verbosity: Silent *) | |
53 | (* start of FunctionalIO/srcSML/FunctionalIO_sig.sml *) | |
54 | (* | |
55 | This file is part of the FunctionalIO project - | |
56 | which provides functional input streams. | |
57 | ||
58 | Copyright (C) 2000 ANOQ of the Sun (alias Johnny Andersen). | |
59 | ||
60 | E-mail: anoq@HardcoreProcessing.com | |
61 | ||
62 | This library is free software; you can redistribute it and/or | |
63 | modify it under the terms of the GNU Library General Public | |
64 | License as published by the Free Software Foundation; either | |
65 | version 2 of the License, or (at your option) any later version. | |
66 | ||
67 | This library is distributed in the hope that it will be useful, | |
68 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
69 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
70 | Library General Public License for more details. | |
71 | ||
72 | As a special exception, if you do not do anything which is not in | |
73 | the spirit of the GNU Library General Public License, you are not | |
74 | required to physically compile this software into a separate library, | |
75 | since this is generally not possible with current Stanard ML compilers. | |
76 | However if you do something which is not in the spirit of the | |
77 | GNU Library General Public License you will have to follow the | |
78 | licence perpetually - thus disallowing you to use it for any | |
79 | commercial purposes at all. | |
80 | ||
81 | If you are interested in a warranty or commercial support for this | |
82 | software, contact Hardcore Processing <sales@HardcoreProcessing.com> | |
83 | for more information. | |
84 | ||
85 | You should have received a copy of the GNU Library General Public | |
86 | License along with this library; if not, write to the Free | |
87 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
88 | *) | |
89 | ||
90 | signature FUNCTIONAL_IO = | |
91 | sig | |
92 | include IO | |
93 | ||
94 | type vector | |
95 | type elem | |
96 | type instream | |
97 | type outstream | |
98 | ||
99 | val input : instream -> vector * instream | |
100 | val input1 : instream -> (elem * instream) option | |
101 | val inputN : (instream * int) -> vector * instream | |
102 | ||
103 | val closeIn : instream -> unit (* Closes file for further input. | |
104 | New end of file becomes the furthest | |
105 | position in the file that has been read | |
106 | internally. *) | |
107 | end | |
108 | ||
109 | signature FUNC_BIN_IO = | |
110 | sig | |
111 | include FUNCTIONAL_IO | |
112 | ||
113 | val openIn : string -> instream | |
114 | end | |
115 | ||
116 | (* For now we just keep things simple *) | |
117 | signature FUNC_TEXT_IO = FUNC_BIN_IO(* stop of FunctionalIO/srcSML/FunctionalIO_sig.sml *) | |
118 | (* start of FunctionalIO/srcSML/FunctionalIO.sml *) | |
119 | (* | |
120 | This file is part of the FunctionalIO project - | |
121 | which provides functional input streams. | |
122 | ||
123 | Copyright (C) 2000 ANOQ of the Sun (alias Johnny Andersen). | |
124 | ||
125 | E-mail: anoq@HardcoreProcessing.com | |
126 | ||
127 | This library is free software; you can redistribute it and/or | |
128 | modify it under the terms of the GNU Library General Public | |
129 | License as published by the Free Software Foundation; either | |
130 | version 2 of the License, or (at your option) any later version. | |
131 | ||
132 | This library is distributed in the hope that it will be useful, | |
133 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
134 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
135 | Library General Public License for more details. | |
136 | ||
137 | As a special exception, if you do not do anything which is not in | |
138 | the spirit of the GNU Library General Public License, you are not | |
139 | required to physically compile this software into a separate library, | |
140 | since this is generally not possible with current Stanard ML compilers. | |
141 | However if you do something which is not in the spirit of the | |
142 | GNU Library General Public License you will have to follow the | |
143 | licence perpetually - thus disallowing you to use it for any | |
144 | commercial purposes at all. | |
145 | ||
146 | If you are interested in a warranty or commercial support for this | |
147 | software, contact Hardcore Processing <sales@HardcoreProcessing.com> | |
148 | for more information. | |
149 | ||
150 | You should have received a copy of the GNU Library General Public | |
151 | License along with this library; if not, write to the Free | |
152 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
153 | *) | |
154 | ||
155 | (* Notice: This is not thread safe! *) | |
156 | functor FFunctionalIO(type vec | |
157 | type element | |
158 | type instream | |
159 | val impOpenIn : string -> instream | |
160 | val impCloseIn : instream -> unit | |
161 | val impInput : instream -> vec | |
162 | val vecConcat : vec list -> vec | |
163 | val vecExtract : (vec * int * int option) -> vec | |
164 | val vecSub : (vec * int) -> element | |
165 | val vecLength : vec -> int) = | |
166 | struct | |
167 | open IO | |
168 | ||
169 | type vector = vec | |
170 | type elem = element | |
171 | ||
172 | datatype impInChunk = | |
173 | ImpInChunkEnd | |
174 | | ImpInChunkVector of vector * (impInChunk ref) | |
175 | | ImpInChunkStream of instream | |
176 | ||
177 | (* The integer is a functional position in the first | |
178 | vector chunk - if there is a vector chunk at all. *) | |
179 | type instream = int * (impInChunk ref) | |
180 | type outstream = unit | |
181 | ||
182 | (* Internal function implementing inputN *) | |
183 | fun fInputN _ acc inStrRef ImpInChunkEnd n = | |
184 | (* (print "inputN, ChunkEnd\n"; *) | |
185 | (vecConcat (rev acc), (0, inStrRef)) | |
186 | | fInputN pos acc inStrRef (ImpInChunkVector (v, next)) n = | |
187 | let | |
188 | (* val _ = print "inputN, ChunkVector\n" *) | |
189 | val len = vecLength v - pos | |
190 | in | |
191 | if len = n then | |
192 | if pos = 0 then | |
193 | (vecConcat (rev (v::acc)), (0, next)) | |
194 | else | |
195 | let | |
196 | val data = vecExtract (v, pos, SOME(n)) | |
197 | in | |
198 | (vecConcat (rev (data::acc)), (0, next)) | |
199 | end | |
200 | else if len > n then | |
201 | let | |
202 | val data = vecExtract (v, pos, SOME(n)) | |
203 | in | |
204 | (vecConcat (rev (data::acc)), | |
205 | (pos + n, inStrRef)) | |
206 | end | |
207 | else (* i.e.: len < n *) | |
208 | let | |
209 | val data = vecExtract (v, pos, SOME(len)) | |
210 | in | |
211 | fInputN 0 (data::acc) next (!next) (n - len) | |
212 | end | |
213 | end | |
214 | | fInputN _ acc inStrRef (ImpInChunkStream inStr) n = | |
215 | let | |
216 | (* val _ = print "inputN, ChunkStream\n" *) | |
217 | val newVec = impInput inStr | |
218 | val _ = inStrRef := | |
219 | (case vecLength newVec of | |
220 | 0 => (impCloseIn inStr; | |
221 | ImpInChunkEnd) | |
222 | | _ => ImpInChunkVector | |
223 | (newVec, ref (ImpInChunkStream inStr))) | |
224 | in | |
225 | fInputN 0 acc inStrRef (!inStrRef) n | |
226 | end | |
227 | ||
228 | ||
229 | (* FIXME: Return 0-length vector for NONE? *) | |
230 | fun inputN ((pos, inStr), n) = | |
231 | if n < 0 then (* FIXME: No check for maxLen! *) | |
232 | raise Size | |
233 | else | |
234 | fInputN pos nil inStr (!inStr) n | |
235 | ||
236 | (* FIXME: Implement more efficiently... *) | |
237 | fun input1 inStr = | |
238 | let | |
239 | val (v, s) = inputN (inStr, 1) | |
240 | in | |
241 | if vecLength v >= 1 then | |
242 | SOME(vecSub (v, 0), s) | |
243 | else | |
244 | NONE | |
245 | end | |
246 | ||
247 | (* Internal function implementing input *) | |
248 | fun fInput pos inStrRef (ImpInChunkVector (v, next)) = | |
249 | (vecExtract (v, pos, NONE), (0, next)) | |
250 | | fInput pos inStrRef _ = | |
251 | (* FIXME: Implement more efficiently? *) | |
252 | fInputN 0 nil inStrRef (!inStrRef) 64 | |
253 | ||
254 | fun input (pos, inStr) = | |
255 | fInput pos inStr (!inStr) | |
256 | ||
257 | (* Internal function implementing closeIn | |
258 | Maybe we should make a more general parametrized | |
259 | traverse function... *) | |
260 | fun fCloseIn inStrRef (ImpInChunkStream inStr) = | |
261 | (impCloseIn inStr; | |
262 | inStrRef := ImpInChunkEnd) | |
263 | | fCloseIn _ ImpInChunkEnd = | |
264 | () | |
265 | | fCloseIn _ (ImpInChunkVector (_, next)) = | |
266 | fCloseIn next (!next) | |
267 | ||
268 | (* Closes file for further input. | |
269 | New end of file becomes the furthest | |
270 | position in the file that has been read | |
271 | internally. *) | |
272 | fun closeIn (_, inStr) = | |
273 | fCloseIn inStr (!inStr) | |
274 | ||
275 | fun openIn fileName = | |
276 | (0, | |
277 | ref (ImpInChunkStream | |
278 | (impOpenIn fileName))) | |
279 | end | |
280 | ||
281 | structure Word8Vector = | |
282 | struct | |
283 | open Word8Vector | |
284 | fun extract (arr, s, l) = | |
285 | Word8VectorSlice.vector (Word8VectorSlice.slice (arr, s, l)) | |
286 | end | |
287 | structure CharVector = | |
288 | struct | |
289 | open CharVector | |
290 | fun extract (arr, s, l) = | |
291 | CharVectorSlice.vector (CharVectorSlice.slice (arr, s, l)) | |
292 | end | |
293 | ||
294 | structure FuncBinIO = | |
295 | FFunctionalIO(type vec = Word8Vector.vector | |
296 | type element = Word8.word | |
297 | type instream = BinIO.instream | |
298 | val impOpenIn = BinIO.openIn | |
299 | val impCloseIn = BinIO.closeIn | |
300 | val impInput = BinIO.input | |
301 | val vecConcat = Word8Vector.concat | |
302 | val vecExtract = Word8Vector.extract | |
303 | val vecSub = Word8Vector.sub | |
304 | val vecLength = Word8Vector.length) | |
305 | :> FUNC_BIN_IO | |
306 | where type vector = Word8Vector.vector | |
307 | and type elem = Word8.word | |
308 | ||
309 | structure FuncTextIO = | |
310 | FFunctionalIO(type vec = CharVector.vector | |
311 | type element = Char.char | |
312 | type instream = TextIO.instream | |
313 | val impOpenIn = TextIO.openIn | |
314 | val impCloseIn = TextIO.closeIn | |
315 | val impInput = TextIO.input | |
316 | val vecConcat = CharVector.concat | |
317 | val vecExtract = CharVector.extract | |
318 | val vecSub = CharVector.sub | |
319 | val vecLength = CharVector.length) | |
320 | :> FUNC_TEXT_IO | |
321 | where type vector = CharVector.vector | |
322 | and type elem = Char.char(* stop of FunctionalIO/srcSML/FunctionalIO.sml *) | |
323 | (* start of ParsingToolkit/srcSML/ParserCombinators_sig.sml *) | |
324 | (* | |
325 | This file is part of the ParsingToolkit project - | |
326 | which provides combinator parsers for functional input streams. | |
327 | ||
328 | Copyright (C) 2000 ANOQ of the Sun (alias Johnny Andersen). | |
329 | ||
330 | Authors: Fritz Henglein <henglein@it.edu> | |
331 | ANOQ of the Sun (alias Johnny Andersen) | |
332 | <anoq@HardcoreProcessing.com> | |
333 | ||
334 | This library is free software; you can redistribute it and/or | |
335 | modify it under the terms of the GNU Library General Public | |
336 | License as published by the Free Software Foundation; either | |
337 | version 2 of the License, or (at your option) any later version. | |
338 | ||
339 | This library is distributed in the hope that it will be useful, | |
340 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
341 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
342 | Library General Public License for more details. | |
343 | ||
344 | As a special exception, if you do not do anything which is not in | |
345 | the spirit of the GNU Library General Public License, you are not | |
346 | required to physically compile this software into a separate library, | |
347 | since this is generally not possible with current Stanard ML compilers. | |
348 | However if you do something which is not in the spirit of the | |
349 | GNU Library General Public License you will have to follow the | |
350 | licence perpetually - thus disallowing you to use it for any | |
351 | commercial purposes at all. | |
352 | ||
353 | If you are interested in a warranty or commercial support for this | |
354 | software, contact Hardcore Processing <sales@HardcoreProcessing.com> | |
355 | for more information. | |
356 | ||
357 | You should have received a copy of the GNU Library General Public | |
358 | License along with this library; if not, write to the Free | |
359 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
360 | *) | |
361 | ||
362 | (* Built upon Fritz Henglein's implementation of | |
363 | parser combinators as found in Larry Paulson's | |
364 | "ML for the Working Programmer" *) | |
365 | ||
366 | signature PARSER_COMBINATORS = | |
367 | sig | |
368 | type instream (* This is FuncTextIO.instream in TextIOParserCombinators | |
369 | and FuncBinIO.instream in BinIOParserCombinators *) | |
370 | type vec (* This is string in TextIOParserCombinators | |
371 | and Word8Vector.vector in BinIOParserCombinators *) | |
372 | type elem (* This is char in TextIOParserCombinators | |
373 | and Word8.word in BinIOParserCombinators *) | |
374 | ||
375 | (* This is the type of a parser. A parser is a function | |
376 | taking a functional instream as argument. It returns | |
377 | a value that has been created by the parser during parsing, | |
378 | and a new functional instream with the stream position | |
379 | updated to where the parser stopped reading. *) | |
380 | type 'a parser = instream -> ('a * instream) | |
381 | ||
382 | (* SyntaxError is raised when a parser fails to parse. *) | |
383 | exception SyntaxError of string * instream | |
384 | ||
385 | (* Functions given to the >> combinator are expected to | |
386 | raise ValidityError on invalid arguments. *) | |
387 | exception ValidityError of string | |
388 | ||
389 | (* The combinators *) | |
390 | (* The purpose of this combinator is to try parsing with | |
391 | 2 parser functions and return the result of the | |
392 | first function that succeeds. *) | |
393 | val || : ('a parser) * ('a parser) -> ('a parser) | |
394 | ||
395 | (* This combinator will execute 2 parsers in sequence and | |
396 | return a pair of the results of the parsers. *) | |
397 | val -- : ('a parser) * ('b parser) -> (('a * 'b) parser) | |
398 | ||
399 | (* This combinator executes 2 parsers in sequence and | |
400 | ignores the result of the first parser. *) | |
401 | val $-- : ('a parser) * ('b parser) -> ('b parser) | |
402 | ||
403 | (* This combinator executes 2 parsers in sequence and | |
404 | ignores the result of the second parser. *) | |
405 | val --$ : ('a parser) * ('b parser) -> ('a parser) | |
406 | ||
407 | (* Execute a parser and run the result through a function. *) | |
408 | val >> : ('a parser) * ('a -> 'b) -> ('b parser) | |
409 | ||
410 | (* This combinator is for reading an verifying an expected keyword. *) | |
411 | val $$ : vec -> (vec parser) | |
412 | ||
413 | (* Some handy built-in parsers. *) | |
414 | ||
415 | (* Doesn't parse anything, just returns nil. *) | |
416 | val empty : ('a list) parser | |
417 | ||
418 | (* Given a predicate, returns a parser that will read an | |
419 | element from the stream if the predicate is true. *) | |
420 | val getIf : (elem -> bool) -> (elem parser) | |
421 | ||
422 | (* Given a parser, returns a parser that will read a list | |
423 | of values with the given parser. Parses as many values as possible. *) | |
424 | val repeat : ('a parser) -> (('a list) parser) | |
425 | ||
426 | (* Given a predicate, returns a parser that will read a list of | |
427 | elements, until the predicate is false. *) | |
428 | val repeatIf : (elem -> bool) -> ((elem list) parser) | |
429 | ||
430 | (* Given a number n and a parser, returns a parser that | |
431 | parses a list of n values with the given parser. *) | |
432 | val repeatN : int -> ('a parser) -> ('a list parser) | |
433 | ||
434 | (* Same as repeatIf, except that this will read at least one | |
435 | value - or fail. *) | |
436 | val repeatOneIf : (elem -> bool) -> ((elem list) parser) | |
437 | end | |
438 | (* stop of ParsingToolkit/srcSML/ParserCombinators_sig.sml *) | |
439 | (* start of ParsingToolkit/srcSML/ParserCombinators.sml *) | |
440 | (* | |
441 | This file is part of the ParsingToolkit project - | |
442 | which provides combinator parsers for functional input streams. | |
443 | ||
444 | Copyright (C) 2000 ANOQ of the Sun (alias Johnny Andersen). | |
445 | ||
446 | Authors: Fritz Henglein <henglein@it.edu> | |
447 | ANOQ of the Sun (alias Johnny Andersen) | |
448 | <anoq@HardcoreProcessing.com> | |
449 | ||
450 | This library is free software; you can redistribute it and/or | |
451 | modify it under the terms of the GNU Library General Public | |
452 | License as published by the Free Software Foundation; either | |
453 | version 2 of the License, or (at your option) any later version. | |
454 | ||
455 | This library is distributed in the hope that it will be useful, | |
456 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
457 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
458 | Library General Public License for more details. | |
459 | ||
460 | As a special exception, if you do not do anything which is not in | |
461 | the spirit of the GNU Library General Public License, you are not | |
462 | required to physically compile this software into a separate library, | |
463 | since this is generally not possible with current Stanard ML compilers. | |
464 | However if you do something which is not in the spirit of the | |
465 | GNU Library General Public License you will have to follow the | |
466 | licence perpetually - thus disallowing you to use it for any | |
467 | commercial purposes at all. | |
468 | ||
469 | If you are interested in a warranty or commercial support for this | |
470 | software, contact Hardcore Processing <sales@HardcoreProcessing.com> | |
471 | for more information. | |
472 | ||
473 | You should have received a copy of the GNU Library General Public | |
474 | License along with this library; if not, write to the Free | |
475 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
476 | *) | |
477 | ||
478 | (* Built upon Fritz Henglein's implementation of | |
479 | parser combinators as found in Larry Paulson's | |
480 | "ML for the Working Programmer" *) | |
481 | ||
482 | infix 6 $-- | |
483 | infix 6 --$ | |
484 | infix 5 -- | |
485 | infix 3 >> | |
486 | (* infix 1 // *) | |
487 | infix 0 || | |
488 | ||
489 | functor FParserCombinators(structure FIO : FUNCTIONAL_IO | |
490 | type vec | |
491 | type elem | |
492 | val elemToString : elem -> string | |
493 | val vecLength : vec -> int | |
494 | val vecEqual : (vec * vec) -> bool | |
495 | val vecToString : vec -> string | |
496 | sharing type elem = FIO.elem | |
497 | sharing type vec = FIO.vector) | |
498 | :> PARSER_COMBINATORS where type instream = FIO.instream | |
499 | and type vec = vec | |
500 | and type elem = elem = | |
501 | struct | |
502 | type instream = FIO.instream | |
503 | type vec = vec | |
504 | type elem = elem | |
505 | ||
506 | (* FIXME: Use FIO.StreamIO.PrimIO.compare instead... *) | |
507 | (* val csSize = Substring.size *) | |
508 | ||
509 | (* Exceptions for combinator errors *) | |
510 | exception SyntaxError of string * instream | |
511 | exception ValidityError of string | |
512 | ||
513 | type 'a parser = instream -> ('a * instream) | |
514 | ||
515 | fun (pf1 || pf2) stream = | |
516 | pf1 stream handle | |
517 | exn1 as (SyntaxError (_, stream1)) => | |
518 | (pf2 stream handle | |
519 | exn2 as (SyntaxError (_, stream2)) => | |
520 | raise exn1 | |
521 | (* FIXME: We could compare with FIO.StreamIO.PrimIO.compare if it was implemented *) | |
522 | (* if csSize stream1 < csSize stream2 then | |
523 | raise exn1 | |
524 | else | |
525 | raise exn2 *) ) | |
526 | ||
527 | fun (pf1 -- pf2) stream = | |
528 | let | |
529 | val (res1, stream1) = pf1 stream | |
530 | val (res2, stream2) = pf2 stream1 | |
531 | in | |
532 | ((res1, res2), stream2) | |
533 | end | |
534 | ||
535 | fun (pf1 $-- pf2) stream = | |
536 | let | |
537 | val (_, stream1) = pf1 stream | |
538 | in | |
539 | pf2 stream1 | |
540 | end | |
541 | ||
542 | fun (pf1 --$ pf2) stream = | |
543 | let | |
544 | val (res1, stream1) = pf1 stream | |
545 | val (_, stream2) = pf2 stream1 | |
546 | in | |
547 | (res1, stream2) | |
548 | end | |
549 | ||
550 | fun (pf >> f) stream = | |
551 | let | |
552 | val (res, stream') = pf stream | |
553 | in | |
554 | (f res, stream') handle | |
555 | ValidityError msg => | |
556 | raise SyntaxError (msg, stream') | |
557 | end | |
558 | ||
559 | fun $$ s stream = | |
560 | let | |
561 | val (v, stream2) = FIO.inputN (stream, vecLength s) | |
562 | in | |
563 | if vecEqual (v, s) then | |
564 | (s, stream2) | |
565 | else | |
566 | raise SyntaxError ((vecToString s) ^ " expected", stream) | |
567 | end | |
568 | ||
569 | fun empty stream = (nil, stream) | |
570 | ||
571 | (* Utility functions *) | |
572 | (* This implementation (and several others I've tried) runs | |
573 | out of memory on test.rib. I assume it is because of the | |
574 | stack of exceptions. *) | |
575 | (* | |
576 | fun repeat pf stream = | |
577 | (pf -- repeat pf >> op:: || empty) stream *) | |
578 | ||
579 | (* Working implementation of repeat - takes 1.45 sec for parsing | |
580 | test.rib when using SML/NJ 110.0.6 on Linux on a 166Mhz Pentium. | |
581 | It does not use huge amounts of memory when compared to the amount | |
582 | of data being read. *) | |
583 | fun repeat pf stream = | |
584 | let | |
585 | fun oneIter stream = | |
586 | let | |
587 | val (res, stream') = pf stream | |
588 | in | |
589 | (SOME(res), stream') | |
590 | end | |
591 | handle SyntaxError _ => | |
592 | (NONE, stream) | |
593 | ||
594 | fun rep acc (NONE, stream) = | |
595 | (rev acc, stream) | |
596 | | rep acc (SOME(res), stream) = | |
597 | rep (res :: acc) (oneIter stream) | |
598 | in | |
599 | rep nil (oneIter stream) | |
600 | end | |
601 | ||
602 | fun repeatN n pf stream = | |
603 | if n > 0 then | |
604 | let | |
605 | fun oneIter stream = | |
606 | let | |
607 | val (res, stream') = pf stream | |
608 | in | |
609 | (SOME(res), stream') | |
610 | end | |
611 | handle SyntaxError _ => | |
612 | (NONE, stream) | |
613 | ||
614 | fun rep n acc (NONE, stream) = | |
615 | raise SyntaxError ("Could not repeat parser the last " ^ (Int.toString n) ^ " times!", stream) | |
616 | | rep n acc (SOME(res), stream) = | |
617 | if n > 0 then | |
618 | rep (n - 1) (res :: acc) (oneIter stream) | |
619 | else | |
620 | (rev (res::acc), stream) | |
621 | in | |
622 | (* We subtract 1 from n because we call oneIter once already here *) | |
623 | rep (n - 1) nil (oneIter stream) | |
624 | end | |
625 | else | |
626 | (nil, stream) | |
627 | ||
628 | fun getIf pred stream = | |
629 | case FIO.input1 stream of | |
630 | SOME (res as (e, stream')) => | |
631 | if pred e then | |
632 | res | |
633 | else | |
634 | raise SyntaxError ((elemToString e) ^ " unexpected", stream) | |
635 | | NONE => | |
636 | raise SyntaxError ("Unexpected end of file", stream) | |
637 | ||
638 | fun repeatIf pred = | |
639 | repeat (getIf pred) | |
640 | ||
641 | fun repeatOneIf pred = | |
642 | getIf pred -- repeatIf pred >> op:: | |
643 | end | |
644 | ||
645 | structure TextIOParserCombinators = | |
646 | FParserCombinators(structure FIO = FuncTextIO | |
647 | type vec = string | |
648 | type elem = char | |
649 | val elemToString = Char.toString | |
650 | val vecLength = size | |
651 | val vecEqual = (fn (s1, s2) => s1 = s2) | |
652 | val vecToString = (fn s => s)) | |
653 | ||
654 | structure BinIOParserCombinators = | |
655 | FParserCombinators(structure FIO = FuncBinIO | |
656 | type vec = Word8Vector.vector | |
657 | type elem = Word8.word | |
658 | (* FIXME: These anonymous functions are ugly... | |
659 | implement them somewhere else *) | |
660 | val elemToString = (fn e => | |
661 | Char.toString | |
662 | (chr (Word8.toInt e))) | |
663 | val vecLength = Word8Vector.length | |
664 | val vecEqual = (fn (v1, v2) => | |
665 | #2 (Word8Vector.foldl | |
666 | (fn (e, (index, value)) => | |
667 | (index + 1, | |
668 | value andalso ((Word8.compare (e, Word8Vector.sub (v2, index))) = EQUAL))) | |
669 | (0, true) v1)) | |
670 | val vecToString = (fn v => | |
671 | String.concat | |
672 | (Word8Vector.foldr | |
673 | (fn (e, acc) => (elemToString e)::acc) | |
674 | nil v))) | |
675 | (* stop of ParsingToolkit/srcSML/ParserCombinators.sml *) | |
676 | (* start of ParsingToolkit/srcSML/TextIOParserCombExtra_sig.sml *) | |
677 | (* | |
678 | This file is part of the ParsingToolkit project - | |
679 | which provides combinator parsers for functional input streams. | |
680 | ||
681 | Copyright (C) 2000 ANOQ of the Sun (alias Johnny Andersen). | |
682 | ||
683 | Authors: Fritz Henglein <henglein@it.edu> | |
684 | ANOQ of the Sun (alias Johnny Andersen) | |
685 | <anoq@HardcoreProcessing.com> | |
686 | ||
687 | This library is free software; you can redistribute it and/or | |
688 | modify it under the terms of the GNU Library General Public | |
689 | License as published by the Free Software Foundation; either | |
690 | version 2 of the License, or (at your option) any later version. | |
691 | ||
692 | This library is distributed in the hope that it will be useful, | |
693 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
694 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
695 | Library General Public License for more details. | |
696 | ||
697 | As a special exception, if you do not do anything which is not in | |
698 | the spirit of the GNU Library General Public License, you are not | |
699 | required to physically compile this software into a separate library, | |
700 | since this is generally not possible with current Stanard ML compilers. | |
701 | However if you do something which is not in the spirit of the | |
702 | GNU Library General Public License you will have to follow the | |
703 | licence perpetually - thus disallowing you to use it for any | |
704 | commercial purposes at all. | |
705 | ||
706 | If you are interested in a warranty or commercial support for this | |
707 | software, contact Hardcore Processing <sales@HardcoreProcessing.com> | |
708 | for more information. | |
709 | ||
710 | You should have received a copy of the GNU Library General Public | |
711 | License along with this library; if not, write to the Free | |
712 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
713 | *) | |
714 | ||
715 | (* Built upon Fritz Henglein's implementation of | |
716 | parser combinators as found in Larry Paulson's | |
717 | "ML for the Working Programmer" *) | |
718 | ||
719 | signature TEXT_IO_PARSER_COMB_EXTRA = | |
720 | sig | |
721 | type instream = TextIOParserCombinators.instream | |
722 | type elem = TextIOParserCombinators.elem | |
723 | type 'a parser = 'a TextIOParserCombinators.parser | |
724 | ||
725 | val isWhitespaceChar : char -> bool | |
726 | val isLetterChar : char -> bool | |
727 | val isDigitChar : char -> bool | |
728 | ||
729 | val whitespaceForce : elem list parser | |
730 | val whitespace : elem list parser | |
731 | ||
732 | val getReal : instream -> real * instream | |
733 | val getRealWS : real parser | |
734 | end | |
735 | (* stop of ParsingToolkit/srcSML/TextIOParserCombExtra_sig.sml *) | |
736 | (* start of ParsingToolkit/srcSML/TextIOParserCombExtra.sml *) | |
737 | (* | |
738 | This file is part of the ParsingToolkit project - | |
739 | which provides combinator parsers for functional input streams. | |
740 | ||
741 | Copyright (C) 2000 ANOQ of the Sun (alias Johnny Andersen). | |
742 | ||
743 | Authors: Fritz Henglein <henglein@it.edu> | |
744 | ANOQ of the Sun (alias Johnny Andersen) | |
745 | <anoq@HardcoreProcessing.com> | |
746 | ||
747 | This library is free software; you can redistribute it and/or | |
748 | modify it under the terms of the GNU Library General Public | |
749 | License as published by the Free Software Foundation; either | |
750 | version 2 of the License, or (at your option) any later version. | |
751 | ||
752 | This library is distributed in the hope that it will be useful, | |
753 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
754 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
755 | Library General Public License for more details. | |
756 | ||
757 | As a special exception, if you do not do anything which is not in | |
758 | the spirit of the GNU Library General Public License, you are not | |
759 | required to physically compile this software into a separate library, | |
760 | since this is generally not possible with current Stanard ML compilers. | |
761 | However if you do something which is not in the spirit of the | |
762 | GNU Library General Public License you will have to follow the | |
763 | licence perpetually - thus disallowing you to use it for any | |
764 | commercial purposes at all. | |
765 | ||
766 | If you are interested in a warranty or commercial support for this | |
767 | software, contact Hardcore Processing <sales@HardcoreProcessing.com> | |
768 | for more information. | |
769 | ||
770 | You should have received a copy of the GNU Library General Public | |
771 | License along with this library; if not, write to the Free | |
772 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
773 | *) | |
774 | ||
775 | (* Built upon Fritz Henglein's implementation of | |
776 | parser combinators as found in Larry Paulson's | |
777 | "ML for the Working Programmer" *) | |
778 | ||
779 | functor FTextIOParserCombExtra() :> TEXT_IO_PARSER_COMB_EXTRA = | |
780 | struct | |
781 | open TextIOParserCombinators | |
782 | ||
783 | (* Character predicates *) | |
784 | ||
785 | fun isWhitespaceChar #" " = true | |
786 | | isWhitespaceChar #"\009" = true | |
787 | | isWhitespaceChar #"\r" = true | |
788 | | isWhitespaceChar #"\n" = true | |
789 | | isWhitespaceChar _ = false | |
790 | ||
791 | val isLetterChar = Char.isAlpha (* must be generalized! *) | |
792 | val isDigitChar = Char.isDigit | |
793 | ||
794 | (* Lexical scanner functions *) | |
795 | val whitespaceForce = | |
796 | repeatOneIf isWhitespaceChar | |
797 | ||
798 | val whitespace = | |
799 | repeatIf isWhitespaceChar | |
800 | ||
801 | fun getReal stream = | |
802 | let | |
803 | val (digits, stream1) = | |
804 | repeatOneIf (fn c => isDigitChar c orelse | |
805 | c = #"." orelse | |
806 | c = #"-") | |
807 | stream | |
808 | val realChars = map (fn c => if c = #"-" then #"~" else c) digits | |
809 | val realStr = implode realChars | |
810 | in | |
811 | case Real.fromString realStr of | |
812 | NONE => | |
813 | raise SyntaxError ("The string " ^ realStr ^ " is not a real number", stream1) | |
814 | | SOME r => | |
815 | (r, stream1) | |
816 | end | |
817 | ||
818 | val getRealWS = getReal --$ whitespace | |
819 | end | |
820 | ||
821 | structure TextIOParserCombExtra = FTextIOParserCombExtra()(* stop of ParsingToolkit/srcSML/TextIOParserCombExtra.sml *) | |
822 | (* start of RepeatParserCombinatorTest.sml *) | |
823 | (* Create a text file with a Standard ML Modules code snippet *) | |
824 | val outStr = TextIO.openOut "RepeatParserCombinator.txt" | |
825 | val _ = TextIO.output (outStr, "structure MyStruct = struct end") | |
826 | val _ = TextIO.closeOut outStr | |
827 | ||
828 | (* Now for the combinator parser test using the repeat and >> combinators *) | |
829 | structure FIO = FuncTextIO | |
830 | ||
831 | local | |
832 | open TextIOParserCombinators | |
833 | in | |
834 | (* White space *) | |
835 | fun isWhitespaceChar #" " = true | |
836 | | isWhitespaceChar #"\009" = true | |
837 | | isWhitespaceChar #"\r" = true | |
838 | | isWhitespaceChar #"\n" = true | |
839 | | isWhitespaceChar _ = false | |
840 | ||
841 | val whitespace = | |
842 | (repeatIf isWhitespaceChar) >> (fn _ => ()) | |
843 | (* End white *) | |
844 | ||
845 | val EQUALS = ($$ "=") -- whitespace | |
846 | ||
847 | fun isIdentChar c = | |
848 | Char.isAlphaNum c orelse c = #"'" orelse c = #"_" | |
849 | ||
850 | fun isIdentStartChar c = | |
851 | Char.isAlpha c orelse c = #"'" | |
852 | ||
853 | fun isSymbolChar (#"!") = true | |
854 | | isSymbolChar (#"%") = true | |
855 | | isSymbolChar (#"&") = true | |
856 | | isSymbolChar (#"$") = true | |
857 | | isSymbolChar (#"#") = true | |
858 | | isSymbolChar (#"+") = true | |
859 | | isSymbolChar (#"-") = true | |
860 | | isSymbolChar (#"/") = true | |
861 | | isSymbolChar (#":") = true | |
862 | | isSymbolChar (#"<") = true | |
863 | | isSymbolChar (#"=") = true | |
864 | | isSymbolChar (#">") = true | |
865 | | isSymbolChar (#"?") = true | |
866 | | isSymbolChar (#"@") = true | |
867 | | isSymbolChar (#"\\") = true | |
868 | | isSymbolChar (#"~") = true | |
869 | | isSymbolChar (#"`") = true | |
870 | | isSymbolChar (#"^") = true | |
871 | | isSymbolChar (#"|") = true | |
872 | | isSymbolChar (#"*") = true | |
873 | | isSymbolChar _ = false | |
874 | ||
875 | val alphaIdentNoWS = ((getIf isIdentStartChar) -- (repeatIf isIdentChar)) | |
876 | >> (fn (a, b) => implode (a::b)) | |
877 | val symbolIdentNoWS = (repeatOneIf isSymbolChar) >> implode | |
878 | ||
879 | val identNoWS = alphaIdentNoWS || symbolIdentNoWS | |
880 | val ident = identNoWS --$ whitespace | |
881 | ||
882 | fun validateReserved res str = | |
883 | if str = res then | |
884 | str | |
885 | else | |
886 | raise ValidityError ("Expected token " ^ res ^ | |
887 | " not found! Found " ^ str ^ | |
888 | " instead.") | |
889 | ||
890 | fun reserved parser res inStr = | |
891 | (parser >> (validateReserved res)) inStr | |
892 | ||
893 | (* And the next is really beautiful functional programming :) *) | |
894 | val STRUCTURE = reserved ident "structure" | |
895 | val STRUCT = reserved ident "struct" | |
896 | val END = reserved ident "end" | |
897 | ||
898 | fun parseStructure inStr = | |
899 | let | |
900 | val (result, inStr2) = | |
901 | ((((STRUCTURE $-- ident) --$ (EQUALS --$ STRUCT)) --$ END) | |
902 | >> (fn str => str)) | |
903 | inStr | |
904 | in | |
905 | result | |
906 | end | |
907 | handle (SyntaxError (msg, inStr)) => | |
908 | (print ("Syntax error while parsing:\n" ^ msg ^ "\n"); "") | |
909 | | _ => | |
910 | (print "Error during parsing!\n"; "") | |
911 | ||
912 | fun import fileName = | |
913 | let | |
914 | val inStr = FIO.openIn fileName | |
915 | val result = parseStructure inStr | |
916 | val _ = FIO.closeIn inStr | |
917 | in | |
918 | result | |
919 | end | |
920 | end (* end local *) | |
921 | ||
922 | val structureName = import "RepeatParserCombinator.txt" | |
923 | val _ = print ("The name of the structure is: " ^ structureName) | |
924 | val _ = print "\n" | |
925 | (* stop of RepeatParserCombinatorTest.sml *) |