Merge commit 'f6ddf827f8f192af7a8cd255bd8374a0d38bbb74'
[bpt/guile.git] / module / ice-9 / peg / string-peg.scm
1 ;;;; string-peg.scm --- representing PEG grammars as strings
2 ;;;;
3 ;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4 ;;;;
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
9 ;;;;
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 ;;;;
19
20 (define-module (ice-9 peg string-peg)
21 #:export (peg-as-peg
22 define-peg-string-patterns
23 peg-grammar)
24 #:use-module (ice-9 peg using-parsers)
25 #:use-module (ice-9 peg codegen)
26 #:use-module (ice-9 peg simplify-tree))
27
28 ;; Gets the left-hand depth of a list.
29 (define (depth lst)
30 (if (or (not (list? lst)) (null? lst))
31 0
32 (+ 1 (depth (car lst)))))
33
34 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
35 ;;;;; Parse string PEGs using sexp PEGs.
36 ;; See the variable PEG-AS-PEG for an easier-to-read syntax.
37 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38
39 ;; Grammar for PEGs in PEG grammar.
40 (define peg-as-peg
41 "grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
42 pattern <-- alternative (SLASH sp alternative)*
43 alternative <-- ([!&]? sp suffix)+
44 suffix <-- primary ([*+?] sp)*
45 primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
46 literal <-- ['] (!['] .)* ['] sp
47 charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
48 CCrange <-- . '-' .
49 CCsingle <-- .
50 nonterminal <-- [a-zA-Z0-9-]+ sp
51 sp < [ \t\n]*
52 SLASH < '/'
53 LB < '['
54 RB < ']'
55 ")
56
57 (define-syntax define-sexp-parser
58 (lambda (x)
59 (syntax-case x ()
60 ((_ sym accum pat)
61 (let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
62 (accumsym (syntax->datum #'accum))
63 (syn (wrap-parser-for-users x matchf accumsym #'sym)))
64 #`(define sym #,syn))))))
65
66 (define-sexp-parser peg-grammar all
67 (+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern)))
68 (define-sexp-parser peg-pattern all
69 (and peg-alternative
70 (* (and (ignore "/") peg-sp peg-alternative))))
71 (define-sexp-parser peg-alternative all
72 (+ (and (? (or "!" "&")) peg-sp peg-suffix)))
73 (define-sexp-parser peg-suffix all
74 (and peg-primary (* (and (or "*" "+" "?") peg-sp))))
75 (define-sexp-parser peg-primary all
76 (or (and "(" peg-sp peg-pattern ")" peg-sp)
77 (and "." peg-sp)
78 peg-literal
79 peg-charclass
80 (and peg-nonterminal (not-followed-by "<"))))
81 (define-sexp-parser peg-literal all
82 (and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp))
83 (define-sexp-parser peg-charclass all
84 (and (ignore "[")
85 (* (and (not-followed-by "]")
86 (or charclass-range charclass-single)))
87 (ignore "]")
88 peg-sp))
89 (define-sexp-parser charclass-range all (and peg-any "-" peg-any))
90 (define-sexp-parser charclass-single all peg-any)
91 (define-sexp-parser peg-nonterminal all
92 (and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp))
93 (define-sexp-parser peg-sp none
94 (* (or " " "\t" "\n")))
95
96 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
97 ;;;;; PARSE STRING PEGS
98 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
99
100 ;; Takes a string representing a PEG grammar and returns syntax that
101 ;; will define all of the nonterminals in the grammar with equivalent
102 ;; PEG s-expressions.
103 (define (peg-parser str for-syntax)
104 (let ((parsed (match-pattern peg-grammar str)))
105 (if (not parsed)
106 (begin
107 ;; (display "Invalid PEG grammar!\n")
108 #f)
109 (let ((lst (peg:tree parsed)))
110 (cond
111 ((or (not (list? lst)) (null? lst))
112 lst)
113 ((eq? (car lst) 'peg-grammar)
114 #`(begin
115 #,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
116 (context-flatten (lambda (lst) (<= (depth lst) 2))
117 (cdr lst))))))))))
118
119 ;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
120 ;; defines all the appropriate nonterminals.
121 (define-syntax define-peg-string-patterns
122 (lambda (x)
123 (syntax-case x ()
124 ((_ str)
125 (peg-parser (syntax->datum #'str) x)))))
126
127 ;; lst has format (nonterm grabber pattern), where
128 ;; nonterm is a symbol (the name of the nonterminal),
129 ;; grabber is a string (either "<", "<-" or "<--"), and
130 ;; pattern is the parse of a PEG pattern expressed as as string.
131 (define (peg-nonterm->defn lst for-syntax)
132 (let* ((nonterm (car lst))
133 (grabber (cadr lst))
134 (pattern (caddr lst))
135 (nonterm-name (datum->syntax for-syntax
136 (string->symbol (cadr nonterm)))))
137 #`(define-peg-pattern #,nonterm-name
138 #,(cond
139 ((string=? grabber "<--") (datum->syntax for-syntax 'all))
140 ((string=? grabber "<-") (datum->syntax for-syntax 'body))
141 (else (datum->syntax for-syntax 'none)))
142 #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
143
144 ;; lst has format ('peg-pattern ...).
145 ;; After the context-flatten, (cdr lst) has format
146 ;; (('peg-alternative ...) ...), where the outer list is a collection
147 ;; of elements from a '/' alternative.
148 (define (peg-pattern->defn lst for-syntax)
149 #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
150 (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
151 (cdr lst)))))
152
153 ;; lst has format ('peg-alternative ...).
154 ;; After the context-flatten, (cdr lst) has the format
155 ;; (item ...), where each item has format either ("!" ...), ("&" ...),
156 ;; or ('peg-suffix ...).
157 (define (peg-alternative->defn lst for-syntax)
158 #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
159 (context-flatten (lambda (x) (or (string? (car x))
160 (eq? (car x) 'peg-suffix)))
161 (cdr lst)))))
162
163 ;; lst has the format either
164 ;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or
165 ;; ('peg-suffix ...).
166 (define (peg-body->defn lst for-syntax)
167 (cond
168 ((equal? (car lst) "&")
169 #`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
170 ((equal? (car lst) "!")
171 #`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
172 ((eq? (car lst) 'peg-suffix)
173 (peg-suffix->defn lst for-syntax))
174 (else `(peg-parse-body-fail ,lst))))
175
176 ;; lst has format ('peg-suffix <peg-primary> (? (/ "*" "?" "+")))
177 (define (peg-suffix->defn lst for-syntax)
178 (let ((inner-defn (peg-primary->defn (cadr lst) for-syntax)))
179 (cond
180 ((null? (cddr lst))
181 inner-defn)
182 ((equal? (caddr lst) "*")
183 #`(* #,inner-defn))
184 ((equal? (caddr lst) "?")
185 #`(? #,inner-defn))
186 ((equal? (caddr lst) "+")
187 #`(+ #,inner-defn)))))
188
189 ;; Parse a primary.
190 (define (peg-primary->defn lst for-syntax)
191 (let ((el (cadr lst)))
192 (cond
193 ((list? el)
194 (cond
195 ((eq? (car el) 'peg-literal)
196 (peg-literal->defn el for-syntax))
197 ((eq? (car el) 'peg-charclass)
198 (peg-charclass->defn el for-syntax))
199 ((eq? (car el) 'peg-nonterminal)
200 (datum->syntax for-syntax (string->symbol (cadr el))))))
201 ((string? el)
202 (cond
203 ((equal? el "(")
204 (peg-pattern->defn (caddr lst) for-syntax))
205 ((equal? el ".")
206 (datum->syntax for-syntax 'peg-any))
207 (else (datum->syntax for-syntax
208 `(peg-parse-any unknown-string ,lst)))))
209 (else (datum->syntax for-syntax
210 `(peg-parse-any unknown-el ,lst))))))
211
212 ;; Trims characters off the front and end of STR.
213 ;; (trim-1chars "'ab'") -> "ab"
214 (define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
215
216 ;; Parses a literal.
217 (define (peg-literal->defn lst for-syntax)
218 (datum->syntax for-syntax (trim-1chars (cadr lst))))
219
220 ;; Parses a charclass.
221 (define (peg-charclass->defn lst for-syntax)
222 #`(or
223 #,@(map
224 (lambda (cc)
225 (cond
226 ((eq? (car cc) 'charclass-range)
227 #`(range #,(datum->syntax
228 for-syntax
229 (string-ref (cadr cc) 0))
230 #,(datum->syntax
231 for-syntax
232 (string-ref (cadr cc) 2))))
233 ((eq? (car cc) 'charclass-single)
234 (datum->syntax for-syntax (cadr cc)))))
235 (context-flatten
236 (lambda (x) (or (eq? (car x) 'charclass-range)
237 (eq? (car x) 'charclass-single)))
238 (cdr lst)))))
239
240 ;; Compresses a list to save the optimizer work.
241 ;; e.g. (or (and a)) -> a
242 (define (compressor-core lst)
243 (if (or (not (list? lst)) (null? lst))
244 lst
245 (cond
246 ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
247 (null? (cddr lst)))
248 (compressor-core (cadr lst)))
249 ((and (eq? (car lst) 'body)
250 (eq? (cadr lst) 'lit)
251 (eq? (cadddr lst) 1))
252 (compressor-core (caddr lst)))
253 (else (map compressor-core lst)))))
254
255 (define (compressor syn for-syntax)
256 (datum->syntax for-syntax
257 (compressor-core (syntax->datum syn))))
258
259 ;; Builds a lambda-expressions for the pattern STR using accum.
260 (define (peg-string-compile args accum)
261 (syntax-case args ()
262 ((str-stx) (string? (syntax->datum #'str-stx))
263 (let ((string (syntax->datum #'str-stx)))
264 (compile-peg-pattern
265 (compressor
266 (peg-pattern->defn
267 (peg:tree (match-pattern peg-pattern string)) #'str-stx)
268 #'str-stx)
269 (if (eq? accum 'all) 'body accum))))
270 (else (error "Bad embedded PEG string" args))))
271
272 (add-peg-compiler! 'peg peg-string-compile)
273