Commit | Line | Data |
---|---|---|
1b706edf LC |
1 | ;;; EARLEY -- Earley's parser, written by Marc Feeley. |
2 | ||
3 | ; $Id: earley.sch,v 1.2 1999/07/12 18:05:19 lth Exp $ | |
4 | ; 990708 / lth -- changed 'main' to 'earley-benchmark'. | |
5 | ; | |
6 | ; (make-parser grammar lexer) is used to create a parser from the grammar | |
7 | ; description `grammar' and the lexer function `lexer'. | |
8 | ; | |
9 | ; A grammar is a list of definitions. Each definition defines a non-terminal | |
10 | ; by a set of rules. Thus a definition has the form: (nt rule1 rule2...). | |
11 | ; A given non-terminal can only be defined once. The first non-terminal | |
12 | ; defined is the grammar's goal. Each rule is a possibly empty list of | |
13 | ; non-terminals. Thus a rule has the form: (nt1 nt2...). A non-terminal | |
14 | ; can be any scheme value. Note that all grammar symbols are treated as | |
15 | ; non-terminals. This is fine though because the lexer will be outputing | |
16 | ; non-terminals. | |
17 | ; | |
18 | ; The lexer defines what a token is and the mapping between tokens and | |
19 | ; the grammar's non-terminals. It is a function of one argument, the input, | |
20 | ; that returns the list of tokens corresponding to the input. Each token is | |
21 | ; represented by a list. The first element is some `user-defined' information | |
22 | ; associated with the token and the rest represents the token's class(es) (as a | |
23 | ; list of non-terminals that this token corresponds to). | |
24 | ; | |
25 | ; The result of `make-parser' is a function that parses the single input it | |
26 | ; is given into the grammar's goal. The result is a `parse' which can be | |
27 | ; manipulated with the procedures: `parse->parsed?', `parse->trees' | |
28 | ; and `parse->nb-trees' (see below). | |
29 | ; | |
30 | ; Let's assume that we want a parser for the grammar | |
31 | ; | |
32 | ; S -> x = E | |
33 | ; E -> E + E | V | |
34 | ; V -> V y | | |
35 | ; | |
36 | ; and that the input to the parser is a string of characters. Also, assume we | |
37 | ; would like to map the characters `x', `y', `+' and `=' into the corresponding | |
38 | ; non-terminals in the grammar. Such a parser could be created with | |
39 | ; | |
40 | ; (make-parser | |
41 | ; '( | |
42 | ; (s (x = e)) | |
43 | ; (e (e + e) (v)) | |
44 | ; (v (v y) ()) | |
45 | ; ) | |
46 | ; (lambda (str) | |
47 | ; (map (lambda (char) | |
48 | ; (list char ; user-info = the character itself | |
49 | ; (case char | |
50 | ; ((#\x) 'x) | |
51 | ; ((#\y) 'y) | |
52 | ; ((#\+) '+) | |
53 | ; ((#\=) '=) | |
54 | ; (else (fatal-error "lexer error"))))) | |
55 | ; (string->list str))) | |
56 | ; ) | |
57 | ; | |
58 | ; An alternative definition (that does not check for lexical errors) is | |
59 | ; | |
60 | ; (make-parser | |
61 | ; '( | |
62 | ; (s (#\x #\= e)) | |
63 | ; (e (e #\+ e) (v)) | |
64 | ; (v (v #\y) ()) | |
65 | ; ) | |
66 | ; (lambda (str) (map (lambda (char) (list char char)) (string->list str))) | |
67 | ; ) | |
68 | ; | |
69 | ; To help with the rest of the discussion, here are a few definitions: | |
70 | ; | |
71 | ; An input pointer (for an input of `n' tokens) is a value between 0 and `n'. | |
72 | ; It indicates a point between two input tokens (0 = beginning, `n' = end). | |
73 | ; For example, if `n' = 4, there are 5 input pointers: | |
74 | ; | |
75 | ; input token1 token2 token3 token4 | |
76 | ; input pointers 0 1 2 3 4 | |
77 | ; | |
78 | ; A configuration indicates the extent to which a given rule is parsed (this | |
79 | ; is the common `dot notation'). For simplicity, a configuration is | |
80 | ; represented as an integer, with successive configurations in the same | |
81 | ; rule associated with successive integers. It is assumed that the grammar | |
82 | ; has been extended with rules to aid scanning. These rules are of the | |
83 | ; form `nt ->', and there is one such rule for every non-terminal. Note | |
84 | ; that these rules are special because they only apply when the corresponding | |
85 | ; non-terminal is returned by the lexer. | |
86 | ; | |
87 | ; A configuration set is a configuration grouped with the set of input pointers | |
88 | ; representing where the head non-terminal of the configuration was predicted. | |
89 | ; | |
90 | ; Here are the rules and configurations for the grammar given above: | |
91 | ; | |
92 | ; S -> . \ | |
93 | ; 0 | | |
94 | ; x -> . | | |
95 | ; 1 | | |
96 | ; = -> . | | |
97 | ; 2 | | |
98 | ; E -> . | | |
99 | ; 3 > special rules (for scanning) | |
100 | ; + -> . | | |
101 | ; 4 | | |
102 | ; V -> . | | |
103 | ; 5 | | |
104 | ; y -> . | | |
105 | ; 6 / | |
106 | ; S -> . x . = . E . | |
107 | ; 7 8 9 10 | |
108 | ; E -> . E . + . E . | |
109 | ; 11 12 13 14 | |
110 | ; E -> . V . | |
111 | ; 15 16 | |
112 | ; V -> . V . y . | |
113 | ; 17 18 19 | |
114 | ; V -> . | |
115 | ; 20 | |
116 | ; | |
117 | ; Starters of the non-terminal `nt' are configurations that are leftmost | |
118 | ; in a non-special rule for `nt'. Enders of the non-terminal `nt' are | |
119 | ; configurations that are rightmost in any rule for `nt'. Predictors of the | |
120 | ; non-terminal `nt' are configurations that are directly to the left of `nt' | |
121 | ; in any rule. | |
122 | ; | |
123 | ; For the grammar given above, | |
124 | ; | |
125 | ; Starters of V = (17 20) | |
126 | ; Enders of V = (5 19 20) | |
127 | ; Predictors of V = (15 17) | |
128 | ||
129 | (define (make-parser grammar lexer) | |
130 | ||
131 | (define (non-terminals grammar) ; return vector of non-terminals in grammar | |
132 | ||
133 | (define (add-nt nt nts) | |
134 | (if (member nt nts) nts (cons nt nts))) ; use equal? for equality tests | |
135 | ||
136 | (let def-loop ((defs grammar) (nts '())) | |
137 | (if (pair? defs) | |
138 | (let* ((def (car defs)) | |
139 | (head (car def))) | |
140 | (let rule-loop ((rules (cdr def)) | |
141 | (nts (add-nt head nts))) | |
142 | (if (pair? rules) | |
143 | (let ((rule (car rules))) | |
144 | (let loop ((l rule) (nts nts)) | |
145 | (if (pair? l) | |
146 | (let ((nt (car l))) | |
147 | (loop (cdr l) (add-nt nt nts))) | |
148 | (rule-loop (cdr rules) nts)))) | |
149 | (def-loop (cdr defs) nts)))) | |
150 | (list->vector (reverse nts))))) ; goal non-terminal must be at index 0 | |
151 | ||
152 | (define (ind nt nts) ; return index of non-terminal `nt' in `nts' | |
153 | (let loop ((i (- (vector-length nts) 1))) | |
154 | (if (>= i 0) | |
155 | (if (equal? (vector-ref nts i) nt) i (loop (- i 1))) | |
156 | #f))) | |
157 | ||
158 | (define (nb-configurations grammar) ; return nb of configurations in grammar | |
159 | (let def-loop ((defs grammar) (nb-confs 0)) | |
160 | (if (pair? defs) | |
161 | (let ((def (car defs))) | |
162 | (let rule-loop ((rules (cdr def)) (nb-confs nb-confs)) | |
163 | (if (pair? rules) | |
164 | (let ((rule (car rules))) | |
165 | (let loop ((l rule) (nb-confs nb-confs)) | |
166 | (if (pair? l) | |
167 | (loop (cdr l) (+ nb-confs 1)) | |
168 | (rule-loop (cdr rules) (+ nb-confs 1))))) | |
169 | (def-loop (cdr defs) nb-confs)))) | |
170 | nb-confs))) | |
171 | ||
172 | ; First, associate a numeric identifier to every non-terminal in the | |
173 | ; grammar (with the goal non-terminal associated with 0). | |
174 | ; | |
175 | ; So, for the grammar given above we get: | |
176 | ; | |
177 | ; s -> 0 x -> 1 = -> 4 e ->3 + -> 4 v -> 5 y -> 6 | |
178 | ||
179 | (let* ((nts (non-terminals grammar)) ; id map = list of non-terms | |
180 | (nb-nts (vector-length nts)) ; the number of non-terms | |
181 | (nb-confs (+ (nb-configurations grammar) nb-nts)) ; the nb of confs | |
182 | (starters (make-vector nb-nts '())) ; starters for every non-term | |
183 | (enders (make-vector nb-nts '())) ; enders for every non-term | |
184 | (predictors (make-vector nb-nts '())) ; predictors for every non-term | |
185 | (steps (make-vector nb-confs #f)) ; what to do in a given conf | |
186 | (names (make-vector nb-confs #f))) ; name of rules | |
187 | ||
188 | (define (setup-tables grammar nts starters enders predictors steps names) | |
189 | ||
190 | (define (add-conf conf nt nts class) | |
191 | (let ((i (ind nt nts))) | |
192 | (vector-set! class i (cons conf (vector-ref class i))))) | |
193 | ||
194 | (let ((nb-nts (vector-length nts))) | |
195 | ||
196 | (let nt-loop ((i (- nb-nts 1))) | |
197 | (if (>= i 0) | |
198 | (begin | |
199 | (vector-set! steps i (- i nb-nts)) | |
200 | (vector-set! names i (list (vector-ref nts i) 0)) | |
201 | (vector-set! enders i (list i)) | |
202 | (nt-loop (- i 1))))) | |
203 | ||
204 | (let def-loop ((defs grammar) (conf (vector-length nts))) | |
205 | (if (pair? defs) | |
206 | (let* ((def (car defs)) | |
207 | (head (car def))) | |
208 | (let rule-loop ((rules (cdr def)) (conf conf) (rule-num 1)) | |
209 | (if (pair? rules) | |
210 | (let ((rule (car rules))) | |
211 | (vector-set! names conf (list head rule-num)) | |
212 | (add-conf conf head nts starters) | |
213 | (let loop ((l rule) (conf conf)) | |
214 | (if (pair? l) | |
215 | (let ((nt (car l))) | |
216 | (vector-set! steps conf (ind nt nts)) | |
217 | (add-conf conf nt nts predictors) | |
218 | (loop (cdr l) (+ conf 1))) | |
219 | (begin | |
220 | (vector-set! steps conf (- (ind head nts) nb-nts)) | |
221 | (add-conf conf head nts enders) | |
222 | (rule-loop (cdr rules) (+ conf 1) (+ rule-num 1)))))) | |
223 | (def-loop (cdr defs) conf)))))))) | |
224 | ||
225 | ; Now, for each non-terminal, compute the starters, enders and predictors and | |
226 | ; the names and steps tables. | |
227 | ||
228 | (setup-tables grammar nts starters enders predictors steps names) | |
229 | ||
230 | ; Build the parser description | |
231 | ||
232 | (let ((parser-descr (vector lexer | |
233 | nts | |
234 | starters | |
235 | enders | |
236 | predictors | |
237 | steps | |
238 | names))) | |
239 | (lambda (input) | |
240 | ||
241 | (define (ind nt nts) ; return index of non-terminal `nt' in `nts' | |
242 | (let loop ((i (- (vector-length nts) 1))) | |
243 | (if (>= i 0) | |
244 | (if (equal? (vector-ref nts i) nt) i (loop (- i 1))) | |
245 | #f))) | |
246 | ||
247 | (define (comp-tok tok nts) ; transform token to parsing format | |
248 | (let loop ((l1 (cdr tok)) (l2 '())) | |
249 | (if (pair? l1) | |
250 | (let ((i (ind (car l1) nts))) | |
251 | (if i | |
252 | (loop (cdr l1) (cons i l2)) | |
253 | (loop (cdr l1) l2))) | |
254 | (cons (car tok) (reverse l2))))) | |
255 | ||
256 | (define (input->tokens input lexer nts) | |
257 | (list->vector (map (lambda (tok) (comp-tok tok nts)) (lexer input)))) | |
258 | ||
259 | (define (make-states nb-toks nb-confs) | |
260 | (let ((states (make-vector (+ nb-toks 1) #f))) | |
261 | (let loop ((i nb-toks)) | |
262 | (if (>= i 0) | |
263 | (let ((v (make-vector (+ nb-confs 1) #f))) | |
264 | (vector-set! v 0 -1) | |
265 | (vector-set! states i v) | |
266 | (loop (- i 1))) | |
267 | states)))) | |
268 | ||
269 | (define (conf-set-get state conf) | |
270 | (vector-ref state (+ conf 1))) | |
271 | ||
272 | (define (conf-set-get* state state-num conf) | |
273 | (let ((conf-set (conf-set-get state conf))) | |
274 | (if conf-set | |
275 | conf-set | |
276 | (let ((conf-set (make-vector (+ state-num 6) #f))) | |
277 | (vector-set! conf-set 1 -3) ; old elems tail (points to head) | |
278 | (vector-set! conf-set 2 -1) ; old elems head | |
279 | (vector-set! conf-set 3 -1) ; new elems tail (points to head) | |
280 | (vector-set! conf-set 4 -1) ; new elems head | |
281 | (vector-set! state (+ conf 1) conf-set) | |
282 | conf-set)))) | |
283 | ||
284 | (define (conf-set-merge-new! conf-set) | |
285 | (vector-set! conf-set | |
286 | (+ (vector-ref conf-set 1) 5) | |
287 | (vector-ref conf-set 4)) | |
288 | (vector-set! conf-set 1 (vector-ref conf-set 3)) | |
289 | (vector-set! conf-set 3 -1) | |
290 | (vector-set! conf-set 4 -1)) | |
291 | ||
292 | (define (conf-set-head conf-set) | |
293 | (vector-ref conf-set 2)) | |
294 | ||
295 | (define (conf-set-next conf-set i) | |
296 | (vector-ref conf-set (+ i 5))) | |
297 | ||
298 | (define (conf-set-member? state conf i) | |
299 | (let ((conf-set (vector-ref state (+ conf 1)))) | |
300 | (if conf-set | |
301 | (conf-set-next conf-set i) | |
302 | #f))) | |
303 | ||
304 | (define (conf-set-adjoin state conf-set conf i) | |
305 | (let ((tail (vector-ref conf-set 3))) ; put new element at tail | |
306 | (vector-set! conf-set (+ i 5) -1) | |
307 | (vector-set! conf-set (+ tail 5) i) | |
308 | (vector-set! conf-set 3 i) | |
309 | (if (< tail 0) | |
310 | (begin | |
311 | (vector-set! conf-set 0 (vector-ref state 0)) | |
312 | (vector-set! state 0 conf))))) | |
313 | ||
314 | (define (conf-set-adjoin* states state-num l i) | |
315 | (let ((state (vector-ref states state-num))) | |
316 | (let loop ((l1 l)) | |
317 | (if (pair? l1) | |
318 | (let* ((conf (car l1)) | |
319 | (conf-set (conf-set-get* state state-num conf))) | |
320 | (if (not (conf-set-next conf-set i)) | |
321 | (begin | |
322 | (conf-set-adjoin state conf-set conf i) | |
323 | (loop (cdr l1))) | |
324 | (loop (cdr l1)))))))) | |
325 | ||
326 | (define (conf-set-adjoin** states states* state-num conf i) | |
327 | (let ((state (vector-ref states state-num))) | |
328 | (if (conf-set-member? state conf i) | |
329 | (let* ((state* (vector-ref states* state-num)) | |
330 | (conf-set* (conf-set-get* state* state-num conf))) | |
331 | (if (not (conf-set-next conf-set* i)) | |
332 | (conf-set-adjoin state* conf-set* conf i)) | |
333 | #t) | |
334 | #f))) | |
335 | ||
336 | (define (conf-set-union state conf-set conf other-set) | |
337 | (let loop ((i (conf-set-head other-set))) | |
338 | (if (>= i 0) | |
339 | (if (not (conf-set-next conf-set i)) | |
340 | (begin | |
341 | (conf-set-adjoin state conf-set conf i) | |
342 | (loop (conf-set-next other-set i))) | |
343 | (loop (conf-set-next other-set i)))))) | |
344 | ||
345 | (define (forw states state-num starters enders predictors steps nts) | |
346 | ||
347 | (define (predict state state-num conf-set conf nt starters enders) | |
348 | ||
349 | ; add configurations which start the non-terminal `nt' to the | |
350 | ; right of the dot | |
351 | ||
352 | (let loop1 ((l (vector-ref starters nt))) | |
353 | (if (pair? l) | |
354 | (let* ((starter (car l)) | |
355 | (starter-set (conf-set-get* state state-num starter))) | |
356 | (if (not (conf-set-next starter-set state-num)) | |
357 | (begin | |
358 | (conf-set-adjoin state starter-set starter state-num) | |
359 | (loop1 (cdr l))) | |
360 | (loop1 (cdr l)))))) | |
361 | ||
362 | ; check for possible completion of the non-terminal `nt' to the | |
363 | ; right of the dot | |
364 | ||
365 | (let loop2 ((l (vector-ref enders nt))) | |
366 | (if (pair? l) | |
367 | (let ((ender (car l))) | |
368 | (if (conf-set-member? state ender state-num) | |
369 | (let* ((next (+ conf 1)) | |
370 | (next-set (conf-set-get* state state-num next))) | |
371 | (conf-set-union state next-set next conf-set) | |
372 | (loop2 (cdr l))) | |
373 | (loop2 (cdr l))))))) | |
374 | ||
375 | (define (reduce states state state-num conf-set head preds) | |
376 | ||
377 | ; a non-terminal is now completed so check for reductions that | |
378 | ; are now possible at the configurations `preds' | |
379 | ||
380 | (let loop1 ((l preds)) | |
381 | (if (pair? l) | |
382 | (let ((pred (car l))) | |
383 | (let loop2 ((i head)) | |
384 | (if (>= i 0) | |
385 | (let ((pred-set (conf-set-get (vector-ref states i) pred))) | |
386 | (if pred-set | |
387 | (let* ((next (+ pred 1)) | |
388 | (next-set (conf-set-get* state state-num next))) | |
389 | (conf-set-union state next-set next pred-set))) | |
390 | (loop2 (conf-set-next conf-set i))) | |
391 | (loop1 (cdr l)))))))) | |
392 | ||
393 | (let ((state (vector-ref states state-num)) | |
394 | (nb-nts (vector-length nts))) | |
395 | (let loop () | |
396 | (let ((conf (vector-ref state 0))) | |
397 | (if (>= conf 0) | |
398 | (let* ((step (vector-ref steps conf)) | |
399 | (conf-set (vector-ref state (+ conf 1))) | |
400 | (head (vector-ref conf-set 4))) | |
401 | (vector-set! state 0 (vector-ref conf-set 0)) | |
402 | (conf-set-merge-new! conf-set) | |
403 | (if (>= step 0) | |
404 | (predict state state-num conf-set conf step starters enders) | |
405 | (let ((preds (vector-ref predictors (+ step nb-nts)))) | |
406 | (reduce states state state-num conf-set head preds))) | |
407 | (loop))))))) | |
408 | ||
409 | (define (forward starters enders predictors steps nts toks) | |
410 | (let* ((nb-toks (vector-length toks)) | |
411 | (nb-confs (vector-length steps)) | |
412 | (states (make-states nb-toks nb-confs)) | |
413 | (goal-starters (vector-ref starters 0))) | |
414 | (conf-set-adjoin* states 0 goal-starters 0) ; predict goal | |
415 | (forw states 0 starters enders predictors steps nts) | |
416 | (let loop ((i 0)) | |
417 | (if (< i nb-toks) | |
418 | (let ((tok-nts (cdr (vector-ref toks i)))) | |
419 | (conf-set-adjoin* states (+ i 1) tok-nts i) ; scan token | |
420 | (forw states (+ i 1) starters enders predictors steps nts) | |
421 | (loop (+ i 1))))) | |
422 | states)) | |
423 | ||
424 | (define (produce conf i j enders steps toks states states* nb-nts) | |
425 | (let ((prev (- conf 1))) | |
426 | (if (and (>= conf nb-nts) (>= (vector-ref steps prev) 0)) | |
427 | (let loop1 ((l (vector-ref enders (vector-ref steps prev)))) | |
428 | (if (pair? l) | |
429 | (let* ((ender (car l)) | |
430 | (ender-set (conf-set-get (vector-ref states j) | |
431 | ender))) | |
432 | (if ender-set | |
433 | (let loop2 ((k (conf-set-head ender-set))) | |
434 | (if (>= k 0) | |
435 | (begin | |
436 | (and (>= k i) | |
437 | (conf-set-adjoin** states states* k prev i) | |
438 | (conf-set-adjoin** states states* j ender k)) | |
439 | (loop2 (conf-set-next ender-set k))) | |
440 | (loop1 (cdr l)))) | |
441 | (loop1 (cdr l))))))))) | |
442 | ||
443 | (define (back states states* state-num enders steps nb-nts toks) | |
444 | (let ((state* (vector-ref states* state-num))) | |
445 | (let loop1 () | |
446 | (let ((conf (vector-ref state* 0))) | |
447 | (if (>= conf 0) | |
448 | (let* ((conf-set (vector-ref state* (+ conf 1))) | |
449 | (head (vector-ref conf-set 4))) | |
450 | (vector-set! state* 0 (vector-ref conf-set 0)) | |
451 | (conf-set-merge-new! conf-set) | |
452 | (let loop2 ((i head)) | |
453 | (if (>= i 0) | |
454 | (begin | |
455 | (produce conf i state-num enders steps | |
456 | toks states states* nb-nts) | |
457 | (loop2 (conf-set-next conf-set i))) | |
458 | (loop1))))))))) | |
459 | ||
460 | (define (backward states enders steps nts toks) | |
461 | (let* ((nb-toks (vector-length toks)) | |
462 | (nb-confs (vector-length steps)) | |
463 | (nb-nts (vector-length nts)) | |
464 | (states* (make-states nb-toks nb-confs)) | |
465 | (goal-enders (vector-ref enders 0))) | |
466 | (let loop1 ((l goal-enders)) | |
467 | (if (pair? l) | |
468 | (let ((conf (car l))) | |
469 | (conf-set-adjoin** states states* nb-toks conf 0) | |
470 | (loop1 (cdr l))))) | |
471 | (let loop2 ((i nb-toks)) | |
472 | (if (>= i 0) | |
473 | (begin | |
474 | (back states states* i enders steps nb-nts toks) | |
475 | (loop2 (- i 1))))) | |
476 | states*)) | |
477 | ||
478 | (define (parsed? nt i j nts enders states) | |
479 | (let ((nt* (ind nt nts))) | |
480 | (if nt* | |
481 | (let ((nb-nts (vector-length nts))) | |
482 | (let loop ((l (vector-ref enders nt*))) | |
483 | (if (pair? l) | |
484 | (let ((conf (car l))) | |
485 | (if (conf-set-member? (vector-ref states j) conf i) | |
486 | #t | |
487 | (loop (cdr l)))) | |
488 | #f))) | |
489 | #f))) | |
490 | ||
491 | (define (deriv-trees conf i j enders steps names toks states nb-nts) | |
492 | (let ((name (vector-ref names conf))) | |
493 | ||
494 | (if name ; `conf' is at the start of a rule (either special or not) | |
495 | (if (< conf nb-nts) | |
496 | (list (list name (car (vector-ref toks i)))) | |
497 | (list (list name))) | |
498 | ||
499 | (let ((prev (- conf 1))) | |
500 | (let loop1 ((l1 (vector-ref enders (vector-ref steps prev))) | |
501 | (l2 '())) | |
502 | (if (pair? l1) | |
503 | (let* ((ender (car l1)) | |
504 | (ender-set (conf-set-get (vector-ref states j) | |
505 | ender))) | |
506 | (if ender-set | |
507 | (let loop2 ((k (conf-set-head ender-set)) (l2 l2)) | |
508 | (if (>= k 0) | |
509 | (if (and (>= k i) | |
510 | (conf-set-member? (vector-ref states k) | |
511 | prev i)) | |
512 | (let ((prev-trees | |
513 | (deriv-trees prev i k enders steps names | |
514 | toks states nb-nts)) | |
515 | (ender-trees | |
516 | (deriv-trees ender k j enders steps names | |
517 | toks states nb-nts))) | |
518 | (let loop3 ((l3 ender-trees) (l2 l2)) | |
519 | (if (pair? l3) | |
520 | (let ((ender-tree (list (car l3)))) | |
521 | (let loop4 ((l4 prev-trees) (l2 l2)) | |
522 | (if (pair? l4) | |
523 | (loop4 (cdr l4) | |
524 | (cons (append (car l4) | |
525 | ender-tree) | |
526 | l2)) | |
527 | (loop3 (cdr l3) l2)))) | |
528 | (loop2 (conf-set-next ender-set k) l2)))) | |
529 | (loop2 (conf-set-next ender-set k) l2)) | |
530 | (loop1 (cdr l1) l2))) | |
531 | (loop1 (cdr l1) l2))) | |
532 | l2)))))) | |
533 | ||
534 | (define (deriv-trees* nt i j nts enders steps names toks states) | |
535 | (let ((nt* (ind nt nts))) | |
536 | (if nt* | |
537 | (let ((nb-nts (vector-length nts))) | |
538 | (let loop ((l (vector-ref enders nt*)) (trees '())) | |
539 | (if (pair? l) | |
540 | (let ((conf (car l))) | |
541 | (if (conf-set-member? (vector-ref states j) conf i) | |
542 | (loop (cdr l) | |
543 | (append (deriv-trees conf i j enders steps names | |
544 | toks states nb-nts) | |
545 | trees)) | |
546 | (loop (cdr l) trees))) | |
547 | trees))) | |
548 | #f))) | |
549 | ||
550 | (define (nb-deriv-trees conf i j enders steps toks states nb-nts) | |
551 | (let ((prev (- conf 1))) | |
552 | (if (or (< conf nb-nts) (< (vector-ref steps prev) 0)) | |
553 | 1 | |
554 | (let loop1 ((l (vector-ref enders (vector-ref steps prev))) | |
555 | (n 0)) | |
556 | (if (pair? l) | |
557 | (let* ((ender (car l)) | |
558 | (ender-set (conf-set-get (vector-ref states j) | |
559 | ender))) | |
560 | (if ender-set | |
561 | (let loop2 ((k (conf-set-head ender-set)) (n n)) | |
562 | (if (>= k 0) | |
563 | (if (and (>= k i) | |
564 | (conf-set-member? (vector-ref states k) | |
565 | prev i)) | |
566 | (let ((nb-prev-trees | |
567 | (nb-deriv-trees prev i k enders steps | |
568 | toks states nb-nts)) | |
569 | (nb-ender-trees | |
570 | (nb-deriv-trees ender k j enders steps | |
571 | toks states nb-nts))) | |
572 | (loop2 (conf-set-next ender-set k) | |
573 | (+ n (* nb-prev-trees nb-ender-trees)))) | |
574 | (loop2 (conf-set-next ender-set k) n)) | |
575 | (loop1 (cdr l) n))) | |
576 | (loop1 (cdr l) n))) | |
577 | n))))) | |
578 | ||
579 | (define (nb-deriv-trees* nt i j nts enders steps toks states) | |
580 | (let ((nt* (ind nt nts))) | |
581 | (if nt* | |
582 | (let ((nb-nts (vector-length nts))) | |
583 | (let loop ((l (vector-ref enders nt*)) (nb-trees 0)) | |
584 | (if (pair? l) | |
585 | (let ((conf (car l))) | |
586 | (if (conf-set-member? (vector-ref states j) conf i) | |
587 | (loop (cdr l) | |
588 | (+ (nb-deriv-trees conf i j enders steps | |
589 | toks states nb-nts) | |
590 | nb-trees)) | |
591 | (loop (cdr l) nb-trees))) | |
592 | nb-trees))) | |
593 | #f))) | |
594 | ||
595 | (let* ((lexer (vector-ref parser-descr 0)) | |
596 | (nts (vector-ref parser-descr 1)) | |
597 | (starters (vector-ref parser-descr 2)) | |
598 | (enders (vector-ref parser-descr 3)) | |
599 | (predictors (vector-ref parser-descr 4)) | |
600 | (steps (vector-ref parser-descr 5)) | |
601 | (names (vector-ref parser-descr 6)) | |
602 | (toks (input->tokens input lexer nts))) | |
603 | ||
604 | (vector nts | |
605 | starters | |
606 | enders | |
607 | predictors | |
608 | steps | |
609 | names | |
610 | toks | |
611 | (backward (forward starters enders predictors steps nts toks) | |
612 | enders steps nts toks) | |
613 | parsed? | |
614 | deriv-trees* | |
615 | nb-deriv-trees*)))))) | |
616 | ||
617 | (define (parse->parsed? parse nt i j) | |
618 | (let* ((nts (vector-ref parse 0)) | |
619 | (enders (vector-ref parse 2)) | |
620 | (states (vector-ref parse 7)) | |
621 | (parsed? (vector-ref parse 8))) | |
622 | (parsed? nt i j nts enders states))) | |
623 | ||
624 | (define (parse->trees parse nt i j) | |
625 | (let* ((nts (vector-ref parse 0)) | |
626 | (enders (vector-ref parse 2)) | |
627 | (steps (vector-ref parse 4)) | |
628 | (names (vector-ref parse 5)) | |
629 | (toks (vector-ref parse 6)) | |
630 | (states (vector-ref parse 7)) | |
631 | (deriv-trees* (vector-ref parse 9))) | |
632 | (deriv-trees* nt i j nts enders steps names toks states))) | |
633 | ||
634 | (define (parse->nb-trees parse nt i j) | |
635 | (let* ((nts (vector-ref parse 0)) | |
636 | (enders (vector-ref parse 2)) | |
637 | (steps (vector-ref parse 4)) | |
638 | (toks (vector-ref parse 6)) | |
639 | (states (vector-ref parse 7)) | |
640 | (nb-deriv-trees* (vector-ref parse 10))) | |
641 | (nb-deriv-trees* nt i j nts enders steps toks states))) | |
642 | ||
643 | (define (test k) | |
644 | (let ((p (make-parser '( (s (a) (s s)) ) | |
645 | (lambda (l) (map (lambda (x) (list x x)) l))))) | |
646 | (let ((x (p (vector->list (make-vector k 'a))))) | |
647 | (length (parse->trees x 's 0 k))))) | |
648 | ||
649 | (define (earley-benchmark . args) | |
650 | (let ((k (if (null? args) 9 (car args)))) | |
651 | (run-benchmark | |
652 | "earley" | |
653 | 1 | |
654 | (lambda () (test k)) | |
655 | (lambda (result) | |
656 | (display result) | |
657 | (newline) | |
658 | #t)))) |