Merge commit '01a301d1b606b84d986b735049e7155d2f4cd6aa'
[bpt/guile.git] / test-suite / tests / peg.test
CommitLineData
eee0877c
AW
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;;;;; PEG test suite.
3;; Tests the parsing capabilities of (ice-9 peg). Could use more
4;; tests for edge cases.
5;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6
7(define-module (test-suite test-peg)
8 :use-module (test-suite lib)
9 :use-module (ice-9 peg)
10 :use-module (ice-9 pretty-print)
11 :use-module (srfi srfi-1))
12
13;; Doubled up for pasting into REPL.
14(use-modules (test-suite lib))
15(use-modules (ice-9 peg))
16(use-modules (ice-9 pretty-print))
17(use-modules (srfi srfi-1))
18
19;; Evaluates an expression at the toplevel. Not the prettiest
20;; solution to runtime issues ever, but m3h. Runs at toplevel so that
21;; symbols are bound globally instead of in the scope of the pass-if
22;; expression.
23(define (eeval exp)
24 (eval exp (interaction-environment)))
25(define make-prec (@@ (ice-9 peg) make-prec))
26
27;; Maps the nonterminals defined in the PEG parser written as a PEG to
28;; the nonterminals defined in the PEG parser written with
29;; S-expressions.
30(define grammar-mapping
31 '((grammar peg-grammar)
32 (pattern peg-pattern)
33 (alternative peg-alternative)
34 (suffix peg-suffix)
35 (primary peg-primary)
36 (literal peg-literal)
37 (charclass peg-charclass)
38 (CCrange charclass-range)
39 (CCsingle charclass-single)
40 (nonterminal peg-nonterminal)
41 (sp peg-sp)))
42
43;; Transforms the nonterminals defined in the PEG parser written as a PEG to the nonterminals defined in the PEG parser written with S-expressions.
44(define (grammar-transform x)
45 (let ((res (assoc x grammar-mapping)))
46 (if res (cadr res) x)))
47
48;; Maps a function onto a tree (recurses until it finds atoms, then calls the function on the atoms).
49(define (tree-map fn lst)
50 (if (list? lst)
51 (if (null? lst)
52 lst
53 (cons (tree-map fn (car lst))
54 (tree-map fn (cdr lst))))
55 (fn lst)))
56
57;; Tests to make sure that we can parse a PEG defining a grammar for
58;; PEGs, then uses that grammar to parse the same PEG again to make
59;; sure we get the same result (i.e. make sure our PEG grammar
60;; expressed as a PEG is equivalent to our PEG grammar expressed with
61;; S-expressions).
62(with-test-prefix "PEG Grammar"
63 (pass-if
64 "defining PEGs with PEG"
3ebd5786 65 (and (eeval `(define-peg-string-patterns ,(@@ (ice-9 peg) peg-as-peg))) #t))
eee0877c
AW
66 (pass-if
67 "equivalence of definitions"
68 (equal?
8022f502 69 (peg:tree (match-pattern (@@ (ice-9 peg) peg-grammar) (@@ (ice-9 peg) peg-as-peg)))
eee0877c
AW
70 (tree-map
71 grammar-transform
8022f502 72 (peg:tree (match-pattern grammar (@@ (ice-9 peg) peg-as-peg)))))))
eee0877c
AW
73
74;; A grammar for pascal-style comments from Wikipedia.
75(define comment-grammar
76 "Begin <-- '(*'
77End <-- '*)'
78C <- Begin N* End
79N <- C / (!Begin !End Z)
80Z <- .")
81
82;; A short /etc/passwd file.
83(define *etc-passwd*
84 "root:x:0:0:root:/root:/bin/bash
85daemon:x:1:1:daemon:/usr/sbin:/bin/sh
86bin:x:2:2:bin:/bin:/bin/sh
87sys:x:3:3:sys:/dev:/bin/sh
88nobody:x:65534:65534:nobody:/nonexistent:/bin/sh
89messagebus:x:103:107::/var/run/dbus:/bin/false
90")
91
92;; A grammar for parsing /etc/passwd files.
3ebd5786 93(define-peg-string-patterns
eee0877c
AW
94 "passwd <-- entry* !.
95entry <-- login CO pass CO uid CO gid CO nameORcomment CO homedir CO shell NL*
96login <-- text
97pass <-- text
98uid <-- [0-9]*
99gid <-- [0-9]*
100nameORcomment <-- text
101homedir <-- path
102shell <-- path
103path <-- (SLASH pathELEMENT)*
104pathELEMENT <-- (!NL !CO !'/' .)*
105text <- (!NL !CO .)*
106CO < ':'
107NL < '\n'
108SLASH < '/'")
109
110;; Tests some actual parsing using PEGs.
111(with-test-prefix "Parsing"
3ebd5786 112 (eeval `(define-peg-string-patterns ,comment-grammar))
eee0877c
AW
113 (pass-if
114 ;; Pascal-style comment parsing
115 "simple comment"
116 (equal?
8022f502 117 (match-pattern C "(*blah*)")
eee0877c
AW
118 (make-prec 0 8 "(*blah*)"
119 '((Begin "(*") "blah" (End "*)")))))
120 (pass-if
121 "simple comment padded"
122 (equal?
8022f502 123 (match-pattern C "(*blah*)abc")
eee0877c
AW
124 (make-prec 0 8 "(*blah*)abc"
125 '((Begin "(*") "blah" (End "*)")))))
126 (pass-if
127 "nested comment"
128 (equal?
8022f502 129 (match-pattern C "(*1(*2*)*)")
eee0877c
AW
130 (make-prec 0 10 "(*1(*2*)*)"
131 '((Begin "(*") ("1" ((Begin "(*") "2" (End "*)"))) (End "*)")))))
132 (pass-if
133 "early termination"
8022f502 134 (not (match-pattern C "(*blah")))
eee0877c
AW
135 (pass-if
136 "never starts"
8022f502 137 (not (match-pattern C "blah")))
eee0877c
AW
138 ;; /etc/passwd parsing
139 (pass-if
140 "/etc/passwd"
141 (equal?
8022f502 142 (match-pattern passwd *etc-passwd*)
eee0877c
AW
143 (make-prec 0 220 *etc-passwd*
144 '(passwd (entry (login "root") (pass "x") (uid "0") (gid "0") (nameORcomment "root") (homedir (path (pathELEMENT "root"))) (shell (path (pathELEMENT "bin") (pathELEMENT "bash")))) (entry (login "daemon") (pass "x") (uid "1") (gid "1") (nameORcomment "daemon") (homedir (path (pathELEMENT "usr") (pathELEMENT "sbin"))) (shell (path (pathELEMENT "bin") (pathELEMENT "sh")))) (entry (login "bin") (pass "x") (uid "2") (gid "2") (nameORcomment "bin") (homedir (path (pathELEMENT "bin"))) (shell (path (pathELEMENT "bin") (pathELEMENT "sh")))) (entry (login "sys") (pass "x") (uid "3") (gid "3") (nameORcomment "sys") (homedir (path (pathELEMENT "dev"))) (shell (path (pathELEMENT "bin") (pathELEMENT "sh")))) (entry (login "nobody") (pass "x") (uid "65534") (gid "65534") (nameORcomment "nobody") (homedir (path (pathELEMENT "nonexistent"))) (shell (path (pathELEMENT "bin") (pathELEMENT "sh")))) (entry (login "messagebus") (pass "x") (uid "103") (gid "107") nameORcomment (homedir (path (pathELEMENT "var") (pathELEMENT "run") (pathELEMENT "dbus"))) (shell (path (pathELEMENT "bin") (pathELEMENT "false")))))))))
145
146;; Tests the functions for pulling data out of PEG Match Records.
147(with-test-prefix "PEG Match Records"
40ebbd64 148 (define-peg-pattern bs all (peg "'b'+"))
eee0877c
AW
149 (pass-if
150 "basic parameter extraction"
151 (equal?
d7e2f5e3 152 (let ((pm (search-for-pattern bs "aabbcc")))
eee0877c
AW
153 `((string ,(peg:string pm))
154 (start ,(peg:start pm))
155 (end ,(peg:end pm))
156 (substring ,(peg:substring pm))
157 (tree ,(peg:tree pm))
158 (record? ,(peg-record? pm))))
159 '((string "aabbcc")
160 (start 2)
161 (end 4)
162 (substring "bb")
163 (tree (bs "bb"))
164 (record? #t)))))
165
166;; PEG for parsing right-associative equations.
3ebd5786 167(define-peg-string-patterns
eee0877c
AW
168 "expr <- sum
169sum <-- (product ('+' / '-') sum) / product
170product <-- (value ('*' / '/') product) / value
171value <-- number / '(' expr ')'
172number <-- [0-9]+")
173
174;; Functions to actually evaluate the equations parsed with the PEG.
175(define (parse-sum sum left . rest)
176 (if (null? rest)
177 (apply parse-product left)
178 (list (string->symbol (car rest))
179 (apply parse-product left)
180 (apply parse-sum (cadr rest)))))
181
182(define (parse-product product left . rest)
183 (if (null? rest)
184 (apply parse-value left)
185 (list (string->symbol (car rest))
186 (apply parse-value left)
187 (apply parse-product (cadr rest)))))
188
189(define (parse-value value first . rest)
190 (if (null? rest)
191 (string->number (cadr first))
192 (apply parse-sum (car rest))))
193
194(define parse-expr parse-sum)
8022f502 195(define (eq-parse str) (apply parse-expr (peg:tree (match-pattern expr str))))
eee0877c
AW
196
197(with-test-prefix "Parsing right-associative equations"
198 (pass-if
199 "1"
200 (equal? (eq-parse "1") 1))
201 (pass-if
202 "1+2"
203 (equal? (eq-parse "1+2") '(+ 1 2)))
204 (pass-if
205 "1+2+3"
206 (equal? (eq-parse "1+2+3") '(+ 1 (+ 2 3))))
207 (pass-if
208 "1+2*3+4"
209 (equal? (eq-parse "1+2*3+4") '(+ 1 (+ (* 2 3) 4))))
210 (pass-if
211 "1+2/3*(4+5)/6-7-8"
212 (equal? (eq-parse "1+2/3*(4+5)/6-7-8")
213 '(+ 1 (- (/ 2 (* 3 (/ (+ 4 5) 6))) (- 7 8)))))
214 (pass-if
215 "1+1/2*3+(1+1)/2"
216 (equal? (eq-parse "1+1/2*3+(1+1)/2")
217 '(+ 1 (+ (/ 1 (* 2 3)) (/ (+ 1 1) 2))))))
218
219;; PEG for parsing left-associative equations (normal ones).
3ebd5786 220(define-peg-string-patterns
eee0877c
AW
221 "expr <- sum
222sum <-- (product ('+' / '-'))* product
223product <-- (value ('*' / '/'))* value
224value <-- number / '(' expr ')'
225number <-- [0-9]+")
226
227;; Functions to actually evaluate the equations parsed with the PEG.
228(define (make-left-parser next-func)
229 (lambda (sum first . rest)
230 (if (null? rest)
231 (apply next-func first)
232 (if (string? (cadr first))
233 (list (string->symbol (cadr first))
234 (apply next-func (car first))
235 (apply next-func (car rest)))
236 (car
237 (reduce
238 (lambda (l r)
239 (list (list (cadr r) (car r) (apply next-func (car l)))
240 (string->symbol (cadr l))))
241 'ignore
242 (append
243 (list (list (apply next-func (caar first))
244 (string->symbol (cadar first))))
245 (cdr first)
246 (list (append rest '("done"))))))))))
247
248(define (parse-value value first . rest)
249 (if (null? rest)
250 (string->number (cadr first))
251 (apply parse-sum (car rest))))
252(define parse-product (make-left-parser parse-value))
253(define parse-sum (make-left-parser parse-product))
254(define parse-expr parse-sum)
8022f502 255(define (eq-parse str) (apply parse-expr (peg:tree (match-pattern expr str))))
eee0877c
AW
256
257(with-test-prefix "Parsing left-associative equations"
258 (pass-if
259 "1"
260 (equal? (eq-parse "1") 1))
261 (pass-if
262 "1+2"
263 (equal? (eq-parse "1+2") '(+ 1 2)))
264 (pass-if
265 "1+2+3"
266 (equal? (eq-parse "1+2+3") '(+ (+ 1 2) 3)))
267 (pass-if
268 "1+2*3+4"
269 (equal? (eq-parse "1+2*3+4") '(+ (+ 1 (* 2 3)) 4)))
270 (pass-if
271 "1+2/3*(4+5)/6-7-8"
272 (equal? (eq-parse "1+2/3*(4+5)/6-7-8")
273 '(- (- (+ 1 (/ (* (/ 2 3) (+ 4 5)) 6)) 7) 8)))
274 (pass-if
275 "1+1/2*3+(1+1)/2"
276 (equal? (eq-parse "1+1/2*3+(1+1)/2")
277 '(+ (+ 1 (* (/ 1 2) 3)) (/ (+ 1 1) 2)))))
278