1 ;;;; string-peg.scm --- representing PEG grammars as strings
3 ;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 (define-module (ice-9 peg string-peg)
22 define-peg-string-patterns
24 #:use-module (ice-9 peg using-parsers)
25 #:use-module (ice-9 peg codegen)
26 #:use-module (ice-9 peg simplify-tree))
28 ;; Gets the left-hand depth of a list.
30 (if (or (not (list? lst)) (null? lst))
32 (+ 1 (depth (car lst)))))
34 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
35 ;;;;; Parse string PEGs using sexp PEGs.
36 ;; See the variable PEG-AS-PEG for an easier-to-read syntax.
37 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
39 ;; Grammar for PEGs in PEG grammar.
41 "grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
42 pattern <-- alternative (SLASH sp alternative)*
43 alternative <-- ([!&]? sp suffix)+
44 suffix <-- primary ([*+?] sp)*
45 primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
46 literal <-- ['] (!['] .)* ['] sp
47 charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
50 nonterminal <-- [a-zA-Z0-9-]+ sp
57 (define-syntax define-sexp-parser
61 (let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
62 (accumsym (syntax->datum #'accum))
63 (syn (wrap-parser-for-users x matchf accumsym #'sym)))
64 #`(define sym #,syn))))))
66 (define-sexp-parser peg-grammar all
67 (+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern)))
68 (define-sexp-parser peg-pattern all
70 (* (and (ignore "/") peg-sp peg-alternative))))
71 (define-sexp-parser peg-alternative all
72 (+ (and (? (or "!" "&")) peg-sp peg-suffix)))
73 (define-sexp-parser peg-suffix all
74 (and peg-primary (* (and (or "*" "+" "?") peg-sp))))
75 (define-sexp-parser peg-primary all
76 (or (and "(" peg-sp peg-pattern ")" peg-sp)
80 (and peg-nonterminal (not-followed-by "<"))))
81 (define-sexp-parser peg-literal all
82 (and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp))
83 (define-sexp-parser peg-charclass all
85 (* (and (not-followed-by "]")
86 (or charclass-range charclass-single)))
89 (define-sexp-parser charclass-range all (and peg-any "-" peg-any))
90 (define-sexp-parser charclass-single all peg-any)
91 (define-sexp-parser peg-nonterminal all
92 (and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp))
93 (define-sexp-parser peg-sp none
94 (* (or " " "\t" "\n")))
96 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
97 ;;;;; PARSE STRING PEGS
98 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
100 ;; Takes a string representing a PEG grammar and returns syntax that
101 ;; will define all of the nonterminals in the grammar with equivalent
102 ;; PEG s-expressions.
103 (define (peg-parser str for-syntax)
104 (let ((parsed (match-pattern peg-grammar str)))
107 ;; (display "Invalid PEG grammar!\n")
109 (let ((lst (peg:tree parsed)))
111 ((or (not (list? lst)) (null? lst))
113 ((eq? (car lst) 'peg-grammar)
115 #,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
116 (context-flatten (lambda (lst) (<= (depth lst) 2))
119 ;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
120 ;; defines all the appropriate nonterminals.
121 (define-syntax define-peg-string-patterns
125 (peg-parser (syntax->datum #'str) x)))))
127 ;; lst has format (nonterm grabber pattern), where
128 ;; nonterm is a symbol (the name of the nonterminal),
129 ;; grabber is a string (either "<", "<-" or "<--"), and
130 ;; pattern is the parse of a PEG pattern expressed as as string.
131 (define (peg-nonterm->defn lst for-syntax)
132 (let* ((nonterm (car lst))
134 (pattern (caddr lst))
135 (nonterm-name (datum->syntax for-syntax
136 (string->symbol (cadr nonterm)))))
137 #`(define-peg-pattern #,nonterm-name
139 ((string=? grabber "<--") (datum->syntax for-syntax 'all))
140 ((string=? grabber "<-") (datum->syntax for-syntax 'body))
141 (else (datum->syntax for-syntax 'none)))
142 #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
144 ;; lst has format ('peg-pattern ...).
145 ;; After the context-flatten, (cdr lst) has format
146 ;; (('peg-alternative ...) ...), where the outer list is a collection
147 ;; of elements from a '/' alternative.
148 (define (peg-pattern->defn lst for-syntax)
149 #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
150 (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
153 ;; lst has format ('peg-alternative ...).
154 ;; After the context-flatten, (cdr lst) has the format
155 ;; (item ...), where each item has format either ("!" ...), ("&" ...),
156 ;; or ('peg-suffix ...).
157 (define (peg-alternative->defn lst for-syntax)
158 #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
159 (context-flatten (lambda (x) (or (string? (car x))
160 (eq? (car x) 'peg-suffix)))
163 ;; lst has the format either
164 ;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or
165 ;; ('peg-suffix ...).
166 (define (peg-body->defn lst for-syntax)
168 ((equal? (car lst) "&")
169 #`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
170 ((equal? (car lst) "!")
171 #`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
172 ((eq? (car lst) 'peg-suffix)
173 (peg-suffix->defn lst for-syntax))
174 (else `(peg-parse-body-fail ,lst))))
176 ;; lst has format ('peg-suffix <peg-primary> (? (/ "*" "?" "+")))
177 (define (peg-suffix->defn lst for-syntax)
178 (let ((inner-defn (peg-primary->defn (cadr lst) for-syntax)))
182 ((equal? (caddr lst) "*")
184 ((equal? (caddr lst) "?")
186 ((equal? (caddr lst) "+")
187 #`(+ #,inner-defn)))))
190 (define (peg-primary->defn lst for-syntax)
191 (let ((el (cadr lst)))
195 ((eq? (car el) 'peg-literal)
196 (peg-literal->defn el for-syntax))
197 ((eq? (car el) 'peg-charclass)
198 (peg-charclass->defn el for-syntax))
199 ((eq? (car el) 'peg-nonterminal)
200 (datum->syntax for-syntax (string->symbol (cadr el))))))
204 (peg-pattern->defn (caddr lst) for-syntax))
206 (datum->syntax for-syntax 'peg-any))
207 (else (datum->syntax for-syntax
208 `(peg-parse-any unknown-string ,lst)))))
209 (else (datum->syntax for-syntax
210 `(peg-parse-any unknown-el ,lst))))))
212 ;; Trims characters off the front and end of STR.
213 ;; (trim-1chars "'ab'") -> "ab"
214 (define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
217 (define (peg-literal->defn lst for-syntax)
218 (datum->syntax for-syntax (trim-1chars (cadr lst))))
220 ;; Parses a charclass.
221 (define (peg-charclass->defn lst for-syntax)
226 ((eq? (car cc) 'charclass-range)
227 #`(range #,(datum->syntax
229 (string-ref (cadr cc) 0))
232 (string-ref (cadr cc) 2))))
233 ((eq? (car cc) 'charclass-single)
234 (datum->syntax for-syntax (cadr cc)))))
236 (lambda (x) (or (eq? (car x) 'charclass-range)
237 (eq? (car x) 'charclass-single)))
240 ;; Compresses a list to save the optimizer work.
241 ;; e.g. (or (and a)) -> a
242 (define (compressor-core lst)
243 (if (or (not (list? lst)) (null? lst))
246 ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
248 (compressor-core (cadr lst)))
249 ((and (eq? (car lst) 'body)
250 (eq? (cadr lst) 'lit)
251 (eq? (cadddr lst) 1))
252 (compressor-core (caddr lst)))
253 (else (map compressor-core lst)))))
255 (define (compressor syn for-syntax)
256 (datum->syntax for-syntax
257 (compressor-core (syntax->datum syn))))
259 ;; Builds a lambda-expressions for the pattern STR using accum.
260 (define (peg-string-compile args accum)
262 ((str-stx) (string? (syntax->datum #'str-stx))
263 (let ((string (syntax->datum #'str-stx)))
267 (peg:tree (match-pattern peg-pattern string)) #'str-stx)
269 (if (eq? accum 'all) 'body accum))))
270 (else (error "Bad embedded PEG string" args))))
272 (add-peg-compiler! 'peg peg-string-compile)