Merge commit '01a301d1b606b84d986b735049e7155d2f4cd6aa'
[bpt/guile.git] / test-suite / tests / peg.test
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ;;;;; PEG test suite.
3 ;; Tests the parsing capabilities of (ice-9 peg). Could use more
4 ;; tests for edge cases.
5 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6
7 (define-module (test-suite test-peg)
8 :use-module (test-suite lib)
9 :use-module (ice-9 peg)
10 :use-module (ice-9 pretty-print)
11 :use-module (srfi srfi-1))
12
13 ;; Doubled up for pasting into REPL.
14 (use-modules (test-suite lib))
15 (use-modules (ice-9 peg))
16 (use-modules (ice-9 pretty-print))
17 (use-modules (srfi srfi-1))
18
19 ;; Evaluates an expression at the toplevel. Not the prettiest
20 ;; solution to runtime issues ever, but m3h. Runs at toplevel so that
21 ;; symbols are bound globally instead of in the scope of the pass-if
22 ;; expression.
23 (define (eeval exp)
24 (eval exp (interaction-environment)))
25 (define make-prec (@@ (ice-9 peg) make-prec))
26
27 ;; Maps the nonterminals defined in the PEG parser written as a PEG to
28 ;; the nonterminals defined in the PEG parser written with
29 ;; S-expressions.
30 (define grammar-mapping
31 '((grammar peg-grammar)
32 (pattern peg-pattern)
33 (alternative peg-alternative)
34 (suffix peg-suffix)
35 (primary peg-primary)
36 (literal peg-literal)
37 (charclass peg-charclass)
38 (CCrange charclass-range)
39 (CCsingle charclass-single)
40 (nonterminal peg-nonterminal)
41 (sp peg-sp)))
42
43 ;; Transforms the nonterminals defined in the PEG parser written as a PEG to the nonterminals defined in the PEG parser written with S-expressions.
44 (define (grammar-transform x)
45 (let ((res (assoc x grammar-mapping)))
46 (if res (cadr res) x)))
47
48 ;; Maps a function onto a tree (recurses until it finds atoms, then calls the function on the atoms).
49 (define (tree-map fn lst)
50 (if (list? lst)
51 (if (null? lst)
52 lst
53 (cons (tree-map fn (car lst))
54 (tree-map fn (cdr lst))))
55 (fn lst)))
56
57 ;; Tests to make sure that we can parse a PEG defining a grammar for
58 ;; PEGs, then uses that grammar to parse the same PEG again to make
59 ;; sure we get the same result (i.e. make sure our PEG grammar
60 ;; expressed as a PEG is equivalent to our PEG grammar expressed with
61 ;; S-expressions).
62 (with-test-prefix "PEG Grammar"
63 (pass-if
64 "defining PEGs with PEG"
65 (and (eeval `(define-peg-string-patterns ,(@@ (ice-9 peg) peg-as-peg))) #t))
66 (pass-if
67 "equivalence of definitions"
68 (equal?
69 (peg:tree (match-pattern (@@ (ice-9 peg) peg-grammar) (@@ (ice-9 peg) peg-as-peg)))
70 (tree-map
71 grammar-transform
72 (peg:tree (match-pattern grammar (@@ (ice-9 peg) peg-as-peg)))))))
73
74 ;; A grammar for pascal-style comments from Wikipedia.
75 (define comment-grammar
76 "Begin <-- '(*'
77 End <-- '*)'
78 C <- Begin N* End
79 N <- C / (!Begin !End Z)
80 Z <- .")
81
82 ;; A short /etc/passwd file.
83 (define *etc-passwd*
84 "root:x:0:0:root:/root:/bin/bash
85 daemon:x:1:1:daemon:/usr/sbin:/bin/sh
86 bin:x:2:2:bin:/bin:/bin/sh
87 sys:x:3:3:sys:/dev:/bin/sh
88 nobody:x:65534:65534:nobody:/nonexistent:/bin/sh
89 messagebus:x:103:107::/var/run/dbus:/bin/false
90 ")
91
92 ;; A grammar for parsing /etc/passwd files.
93 (define-peg-string-patterns
94 "passwd <-- entry* !.
95 entry <-- login CO pass CO uid CO gid CO nameORcomment CO homedir CO shell NL*
96 login <-- text
97 pass <-- text
98 uid <-- [0-9]*
99 gid <-- [0-9]*
100 nameORcomment <-- text
101 homedir <-- path
102 shell <-- path
103 path <-- (SLASH pathELEMENT)*
104 pathELEMENT <-- (!NL !CO !'/' .)*
105 text <- (!NL !CO .)*
106 CO < ':'
107 NL < '\n'
108 SLASH < '/'")
109
110 ;; Tests some actual parsing using PEGs.
111 (with-test-prefix "Parsing"
112 (eeval `(define-peg-string-patterns ,comment-grammar))
113 (pass-if
114 ;; Pascal-style comment parsing
115 "simple comment"
116 (equal?
117 (match-pattern C "(*blah*)")
118 (make-prec 0 8 "(*blah*)"
119 '((Begin "(*") "blah" (End "*)")))))
120 (pass-if
121 "simple comment padded"
122 (equal?
123 (match-pattern C "(*blah*)abc")
124 (make-prec 0 8 "(*blah*)abc"
125 '((Begin "(*") "blah" (End "*)")))))
126 (pass-if
127 "nested comment"
128 (equal?
129 (match-pattern C "(*1(*2*)*)")
130 (make-prec 0 10 "(*1(*2*)*)"
131 '((Begin "(*") ("1" ((Begin "(*") "2" (End "*)"))) (End "*)")))))
132 (pass-if
133 "early termination"
134 (not (match-pattern C "(*blah")))
135 (pass-if
136 "never starts"
137 (not (match-pattern C "blah")))
138 ;; /etc/passwd parsing
139 (pass-if
140 "/etc/passwd"
141 (equal?
142 (match-pattern passwd *etc-passwd*)
143 (make-prec 0 220 *etc-passwd*
144 '(passwd (entry (login "root") (pass "x") (uid "0") (gid "0") (nameORcomment "root") (homedir (path (pathELEMENT "root"))) (shell (path (pathELEMENT "bin") (pathELEMENT "bash")))) (entry (login "daemon") (pass "x") (uid "1") (gid "1") (nameORcomment "daemon") (homedir (path (pathELEMENT "usr") (pathELEMENT "sbin"))) (shell (path (pathELEMENT "bin") (pathELEMENT "sh")))) (entry (login "bin") (pass "x") (uid "2") (gid "2") (nameORcomment "bin") (homedir (path (pathELEMENT "bin"))) (shell (path (pathELEMENT "bin") (pathELEMENT "sh")))) (entry (login "sys") (pass "x") (uid "3") (gid "3") (nameORcomment "sys") (homedir (path (pathELEMENT "dev"))) (shell (path (pathELEMENT "bin") (pathELEMENT "sh")))) (entry (login "nobody") (pass "x") (uid "65534") (gid "65534") (nameORcomment "nobody") (homedir (path (pathELEMENT "nonexistent"))) (shell (path (pathELEMENT "bin") (pathELEMENT "sh")))) (entry (login "messagebus") (pass "x") (uid "103") (gid "107") nameORcomment (homedir (path (pathELEMENT "var") (pathELEMENT "run") (pathELEMENT "dbus"))) (shell (path (pathELEMENT "bin") (pathELEMENT "false")))))))))
145
146 ;; Tests the functions for pulling data out of PEG Match Records.
147 (with-test-prefix "PEG Match Records"
148 (define-peg-pattern bs all (peg "'b'+"))
149 (pass-if
150 "basic parameter extraction"
151 (equal?
152 (let ((pm (search-for-pattern bs "aabbcc")))
153 `((string ,(peg:string pm))
154 (start ,(peg:start pm))
155 (end ,(peg:end pm))
156 (substring ,(peg:substring pm))
157 (tree ,(peg:tree pm))
158 (record? ,(peg-record? pm))))
159 '((string "aabbcc")
160 (start 2)
161 (end 4)
162 (substring "bb")
163 (tree (bs "bb"))
164 (record? #t)))))
165
166 ;; PEG for parsing right-associative equations.
167 (define-peg-string-patterns
168 "expr <- sum
169 sum <-- (product ('+' / '-') sum) / product
170 product <-- (value ('*' / '/') product) / value
171 value <-- number / '(' expr ')'
172 number <-- [0-9]+")
173
174 ;; Functions to actually evaluate the equations parsed with the PEG.
175 (define (parse-sum sum left . rest)
176 (if (null? rest)
177 (apply parse-product left)
178 (list (string->symbol (car rest))
179 (apply parse-product left)
180 (apply parse-sum (cadr rest)))))
181
182 (define (parse-product product left . rest)
183 (if (null? rest)
184 (apply parse-value left)
185 (list (string->symbol (car rest))
186 (apply parse-value left)
187 (apply parse-product (cadr rest)))))
188
189 (define (parse-value value first . rest)
190 (if (null? rest)
191 (string->number (cadr first))
192 (apply parse-sum (car rest))))
193
194 (define parse-expr parse-sum)
195 (define (eq-parse str) (apply parse-expr (peg:tree (match-pattern expr str))))
196
197 (with-test-prefix "Parsing right-associative equations"
198 (pass-if
199 "1"
200 (equal? (eq-parse "1") 1))
201 (pass-if
202 "1+2"
203 (equal? (eq-parse "1+2") '(+ 1 2)))
204 (pass-if
205 "1+2+3"
206 (equal? (eq-parse "1+2+3") '(+ 1 (+ 2 3))))
207 (pass-if
208 "1+2*3+4"
209 (equal? (eq-parse "1+2*3+4") '(+ 1 (+ (* 2 3) 4))))
210 (pass-if
211 "1+2/3*(4+5)/6-7-8"
212 (equal? (eq-parse "1+2/3*(4+5)/6-7-8")
213 '(+ 1 (- (/ 2 (* 3 (/ (+ 4 5) 6))) (- 7 8)))))
214 (pass-if
215 "1+1/2*3+(1+1)/2"
216 (equal? (eq-parse "1+1/2*3+(1+1)/2")
217 '(+ 1 (+ (/ 1 (* 2 3)) (/ (+ 1 1) 2))))))
218
219 ;; PEG for parsing left-associative equations (normal ones).
220 (define-peg-string-patterns
221 "expr <- sum
222 sum <-- (product ('+' / '-'))* product
223 product <-- (value ('*' / '/'))* value
224 value <-- number / '(' expr ')'
225 number <-- [0-9]+")
226
227 ;; Functions to actually evaluate the equations parsed with the PEG.
228 (define (make-left-parser next-func)
229 (lambda (sum first . rest)
230 (if (null? rest)
231 (apply next-func first)
232 (if (string? (cadr first))
233 (list (string->symbol (cadr first))
234 (apply next-func (car first))
235 (apply next-func (car rest)))
236 (car
237 (reduce
238 (lambda (l r)
239 (list (list (cadr r) (car r) (apply next-func (car l)))
240 (string->symbol (cadr l))))
241 'ignore
242 (append
243 (list (list (apply next-func (caar first))
244 (string->symbol (cadar first))))
245 (cdr first)
246 (list (append rest '("done"))))))))))
247
248 (define (parse-value value first . rest)
249 (if (null? rest)
250 (string->number (cadr first))
251 (apply parse-sum (car rest))))
252 (define parse-product (make-left-parser parse-value))
253 (define parse-sum (make-left-parser parse-product))
254 (define parse-expr parse-sum)
255 (define (eq-parse str) (apply parse-expr (peg:tree (match-pattern expr str))))
256
257 (with-test-prefix "Parsing left-associative equations"
258 (pass-if
259 "1"
260 (equal? (eq-parse "1") 1))
261 (pass-if
262 "1+2"
263 (equal? (eq-parse "1+2") '(+ 1 2)))
264 (pass-if
265 "1+2+3"
266 (equal? (eq-parse "1+2+3") '(+ (+ 1 2) 3)))
267 (pass-if
268 "1+2*3+4"
269 (equal? (eq-parse "1+2*3+4") '(+ (+ 1 (* 2 3)) 4)))
270 (pass-if
271 "1+2/3*(4+5)/6-7-8"
272 (equal? (eq-parse "1+2/3*(4+5)/6-7-8")
273 '(- (- (+ 1 (/ (* (/ 2 3) (+ 4 5)) 6)) 7) 8)))
274 (pass-if
275 "1+1/2*3+(1+1)/2"
276 (equal? (eq-parse "1+1/2*3+(1+1)/2")
277 '(+ (+ 1 (* (/ 1 2) 3)) (/ (+ 1 1) 2)))))
278