Merge commit 'dc65b88d839c326889618112c4870ad3a64e9446'

[bpt/guile.git] / doc / ref / api-peg.texi
diff --git a/doc/ref/api-peg.texi b/doc/ref/api-peg.texi

index 8944441..0e16aab 100644 (file)
--- a/doc/ref/api-peg.texi
+++ b/doc/ref/api-peg.texi
@@ -19,14 +19,14 @@ familiarize yourself with the syntax:
  
  The module works by compiling PEGs down to lambda expressions.  These
  can either be stored in variables at compile-time by the define macros
-(@code{define-nonterm} and @code{define-grammar}) or calculated
+(@code{define-peg-pattern} and @code{define-peg-string-patterns}) or calculated
  explicitly at runtime with the compile functions
-(@code{peg-sexp-compile} and @code{peg-string-compile}).
+(@code{compile-peg-pattern} and @code{peg-string-compile}).
  
-They can then be used for either parsing (@code{peg-parse}) or matching
-(@code{peg-match}).  For convenience, @code{peg-match} also takes
-pattern literals in case you want to inline a simple search (people
-often use regular expressions this way).
+They can then be used for either parsing (@code{match-pattern}) or searching
+(@code{search-for-pattern}).  For convenience, @code{search-for-pattern}
+also takes pattern literals in case you want to inline a simple search
+(people often use regular expressions this way).
  
  The rest of this documentation consists of a syntax reference, an API
  reference, and a tutorial.
@@ -200,17 +200,17 @@ and
  The most straightforward way to define a PEG is by using one of the
  define macros (both of these macroexpand into @code{define}
  expressions).  These macros bind parsing functions to variables.  These
-parsing functions may be invoked by @code{peg-parse} or
-@code{peg-match}, which return a PEG match record.  Raw data can be
+parsing functions may be invoked by @code{match-pattern} or
+@code{search-for-pattern}, which return a PEG match record.  Raw data can be
  retrieved from this record with the PEG match deconstructor functions.
  More complicated (and perhaps enlightening) examples can be found in the
  tutorial.
  
-@deffn {Scheme Macro} define-grammar peg-string
+@deffn {Scheme Macro} define-peg-string-patterns peg-string
  Defines all the nonterminals in the PEG @var{peg-string}.  More
-precisely, @code{define-grammar} takes a superset of PEGs.  A normal PEG
+precisely, @code{define-peg-string-patterns} takes a superset of PEGs.  A normal PEG
  has a @code{<-} between the nonterminal and the pattern.
-@code{define-grammar} uses this symbol to determine what information it
+@code{define-peg-string-patterns} uses this symbol to determine what information it
  should propagate up the parse tree.  The normal @code{<-} propagates the
  matched text up the parse tree, @code{<--} propagates the matched text
  up the parse tree tagged with the name of the nonterminal, and @code{<}
@@ -220,20 +220,20 @@ character (in normal PEGs nonterminals can only be alphabetic).
  
  For example, if we:
  @lisp
-(define-grammar 
+(define-peg-string-patterns 
    "as <- 'a'+
  bs <- 'b'+
  as-or-bs <- as/bs")
-(define-grammar 
+(define-peg-string-patterns 
    "as-tag <-- 'a'+
  bs-tag <-- 'b'+
  as-or-bs-tag <-- as-tag/bs-tag")
  @end lisp
  Then:
  @lisp
-(peg-parse as-or-bs "aabbcc") @result{}
+(match-pattern as-or-bs "aabbcc") @result{}
  #<peg start: 0 end: 2 string: aabbcc tree: aa>
-(peg-parse as-or-bs-tag "aabbcc") @result{}
+(match-pattern as-or-bs-tag "aabbcc") @result{}
  #<peg start: 0 end: 2 string: aabbcc tree: (as-or-bs-tag (as-tag aa))>
  @end lisp
  
@@ -242,7 +242,7 @@ Note that in doing this, we have bound 6 variables at the toplevel
  @var{as-or-bs-tag}).
  @end deffn
  
-@deffn {Scheme Macro} define-nonterm name capture-type peg-sexp
+@deffn {Scheme Macro} define-peg-pattern name capture-type peg-sexp
  Defines a single nonterminal @var{name}.  @var{capture-type} determines
  how much information is passed up the parse tree.  @var{peg-sexp} is a
  PEG in S-expression form.
@@ -261,18 +261,18 @@ passes nothing up the parse tree.
  
  For Example, if we:
  @lisp
-(define-nonterm as body (+ "a"))
-(define-nonterm bs body (+ "b"))
-(define-nonterm as-or-bs body (or as bs))
-(define-nonterm as-tag all (+ "a"))
-(define-nonterm bs-tag all (+ "b"))
-(define-nonterm as-or-bs-tag all (or as-tag bs-tag))
+(define-peg-pattern as body (+ "a"))
+(define-peg-pattern bs body (+ "b"))
+(define-peg-pattern as-or-bs body (or as bs))
+(define-peg-pattern as-tag all (+ "a"))
+(define-peg-pattern bs-tag all (+ "b"))
+(define-peg-pattern as-or-bs-tag all (or as-tag bs-tag))
  @end lisp
  Then:
  @lisp
-(peg-parse as-or-bs "aabbcc") @result{} 
+(match-pattern as-or-bs "aabbcc") @result{} 
  #<peg start: 0 end: 2 string: aabbcc tree: aa>
-(peg-parse as-or-bs-tag "aabbcc") @result{} 
+(match-pattern as-or-bs-tag "aabbcc") @result{} 
  #<peg start: 0 end: 2 string: aabbcc tree: (as-or-bs-tag (as-tag aa))>
  @end lisp
  
@@ -288,14 +288,14 @@ runtime.  These functions let you do that using either syntax.
  @deffn {Scheme Procedure} peg-string-compile peg-string capture-type
  Compiles the PEG pattern in @var{peg-string} propagating according to
  @var{capture-type} (capture-type can be any of the values from
-@code{define-nonterm}).
+@code{define-peg-pattern}).
  @end deffn
  
  
-@deffn {Scheme Procedure} peg-sexp-compile peg-sexp capture-type
+@deffn {Scheme Procedure} compile-peg-pattern peg-sexp capture-type
  Compiles the PEG pattern in @var{peg-sexp} propagating according to
  @var{capture-type} (capture-type can be any of the values from
-@code{define-nonterm}).
+@code{define-peg-pattern}).
  @end deffn
  
  The functions return syntax objects, which can be useful if you want to
@@ -304,13 +304,13 @@ can do the following:
  
  @lisp
  (define exp '(+ "a"))
-(define as (compile (peg-sexp-compile exp 'body)))
+(define as (compile (compile-peg-pattern exp 'body)))
  @end lisp
  
  You can use this nonterminal with all of the regular PEG functions:
  
  @lisp
-(peg-parse as "aaaaa") @result{}
+(match-pattern as "aaaaa") @result{}
  #<peg start: 0 end: 5 string: bbbbb tree: bbbbb>
  @end lisp
  
@@ -319,17 +319,17 @@ You can use this nonterminal with all of the regular PEG functions:
  For our purposes, ``parsing'' means parsing a string into a tree
  starting from the first character, while ``matching'' means searching
  through the string for a substring.  In practice, the only difference
-between the two functions is that @code{peg-parse} gives up if it can't
-find a valid substring starting at index 0 and @code{peg-match} keeps
+between the two functions is that @code{match-pattern} gives up if it can't
+find a valid substring starting at index 0 and @code{search-for-pattern} keeps
  looking.  They are both equally capable of ``parsing'' and ``matching''
  given those constraints.
  
-@deffn {Scheme Procedure} peg-parse nonterm string 
+@deffn {Scheme Procedure} match-pattern nonterm string 
  Parses @var{string} using the PEG stored in @var{nonterm}.  If no match
-was found, @code{peg-parse} returns false.  If a match was found, a PEG
+was found, @code{match-pattern} returns false.  If a match was found, a PEG
  match record is returned.
  
-The @code{capture-type} argument to @code{define-nonterm} allows you to
+The @code{capture-type} argument to @code{define-peg-pattern} allows you to
  choose what information to hold on to while parsing.  The options are:
  
  @table @code
@@ -342,108 +342,108 @@ nothing
  @end table
  
  @lisp
-(define-nonterm as all (+ "a"))
-(peg-parse as "aabbcc") @result{} 
+(define-peg-pattern as all (+ "a"))
+(match-pattern as "aabbcc") @result{} 
  #<peg start: 0 end: 2 string: aabbcc tree: (as aa)>
  
-(define-nonterm as body (+ "a"))
-(peg-parse as "aabbcc") @result{} 
+(define-peg-pattern as body (+ "a"))
+(match-pattern as "aabbcc") @result{} 
  #<peg start: 0 end: 2 string: aabbcc tree: aa>
  
-(define-nonterm as none (+ "a"))
-(peg-parse as "aabbcc") @result{} 
+(define-peg-pattern as none (+ "a"))
+(match-pattern as "aabbcc") @result{} 
  #<peg start: 0 end: 2 string: aabbcc tree: ()>
  
-(define-nonterm bs body (+ "b"))
-(peg-parse bs "aabbcc") @result{} 
+(define-peg-pattern bs body (+ "b"))
+(match-pattern bs "aabbcc") @result{} 
  #f
  @end lisp
  @end deffn
  
-@deffn {Scheme Macro} peg-match nonterm-or-peg string
+@deffn {Scheme Macro} search-for-pattern nonterm-or-peg string
  Searches through @var{string} looking for a matching subexpression.
  @var{nonterm-or-peg} can either be a nonterminal or a literal PEG
-pattern.  When a literal PEG pattern is provided, @code{peg-match} works
+pattern.  When a literal PEG pattern is provided, @code{search-for-pattern} works
  very similarly to the regular expression searches many hackers are used
-to.  If no match was found, @code{peg-match} returns false.  If a match
+to.  If no match was found, @code{search-for-pattern} returns false.  If a match
  was found, a PEG match record is returned.
  
  @lisp
-(define-nonterm as body (+ "a"))
-(peg-match as "aabbcc") @result{} 
+(define-peg-pattern as body (+ "a"))
+(search-for-pattern as "aabbcc") @result{} 
  #<peg start: 0 end: 2 string: aabbcc tree: aa>
-(peg-match (+ "a") "aabbcc") @result{} 
+(search-for-pattern (+ "a") "aabbcc") @result{} 
  #<peg start: 0 end: 2 string: aabbcc tree: aa>
-(peg-match "'a'+" "aabbcc") @result{} 
+(search-for-pattern "'a'+" "aabbcc") @result{} 
  #<peg start: 0 end: 2 string: aabbcc tree: aa>
  
-(define-nonterm as all (+ "a"))
-(peg-match as "aabbcc") @result{} 
+(define-peg-pattern as all (+ "a"))
+(search-for-pattern as "aabbcc") @result{} 
  #<peg start: 0 end: 2 string: aabbcc tree: (as aa)>
  
-(define-nonterm bs body (+ "b"))
-(peg-match bs "aabbcc") @result{} 
+(define-peg-pattern bs body (+ "b"))
+(search-for-pattern bs "aabbcc") @result{} 
  #<peg start: 2 end: 4 string: aabbcc tree: bb>
-(peg-match (+ "b") "aabbcc") @result{} 
+(search-for-pattern (+ "b") "aabbcc") @result{} 
  #<peg start: 2 end: 4 string: aabbcc tree: bb>
-(peg-match "'b'+" "aabbcc") @result{} 
+(search-for-pattern "'b'+" "aabbcc") @result{} 
  #<peg start: 2 end: 4 string: aabbcc tree: bb>
  
-(define-nonterm zs body (+ "z"))
-(peg-match zs "aabbcc") @result{} 
+(define-peg-pattern zs body (+ "z"))
+(search-for-pattern zs "aabbcc") @result{} 
  #f
-(peg-match (+ "z") "aabbcc") @result{} 
+(search-for-pattern (+ "z") "aabbcc") @result{} 
  #f
-(peg-match "'z'+" "aabbcc") @result{} 
+(search-for-pattern "'z'+" "aabbcc") @result{} 
  #f
  @end lisp
  @end deffn
  
  @subsubheading PEG Match Records
-The @code{peg-parse} and @code{peg-match} functions both return PEG
+The @code{match-pattern} and @code{search-for-pattern} functions both return PEG
  match records.  Actual information can be extracted from these with the
  following functions.
  
-@deffn {Scheme Procedure} peg:string peg-match
+@deffn {Scheme Procedure} peg:string match-record
  Returns the original string that was parsed in the creation of
-@code{peg-match}.
+@code{match-record}.
  @end deffn
  
-@deffn {Scheme Procedure} peg:start peg-match
+@deffn {Scheme Procedure} peg:start match-record
  Returns the index of the first parsed character in the original string
  (from @code{peg:string}).  If this is the same as @code{peg:end},
  nothing was parsed.
  @end deffn
  
-@deffn {Scheme Procedure} peg:end peg-match
+@deffn {Scheme Procedure} peg:end match-record
  Returns one more than the index of the last parsed character in the
  original string (from @code{peg:string}).  If this is the same as
  @code{peg:start}, nothing was parsed.
  @end deffn
  
-@deffn {Scheme Procedure} peg:substring peg-match
-Returns the substring parsed by @code{peg-match}.  This is equivalent to
-@code{(substring (peg:string peg-match) (peg:start peg-match) (peg:end
-peg-match))}.
+@deffn {Scheme Procedure} peg:substring match-record
+Returns the substring parsed by @code{match-record}.  This is equivalent to
+@code{(substring (peg:string match-record) (peg:start match-record) (peg:end
+match-record))}.
  @end deffn
  
-@deffn {Scheme Procedure} peg:tree peg-match
-Returns the tree parsed by @code{peg-match}.
+@deffn {Scheme Procedure} peg:tree match-record
+Returns the tree parsed by @code{match-record}.
  @end deffn
  
-@deffn {Scheme Procedure} peg-record? peg-match
-Returns true if @code{peg-match} is a PEG match record, or false
+@deffn {Scheme Procedure} peg-record? match-record
+Returns true if @code{match-record} is a PEG match record, or false
  otherwise.
  @end deffn
  
  Example:
  @lisp
-(define-nonterm bs all (peg "'b'+"))
+(define-peg-pattern bs all (peg "'b'+"))
  
-(peg-match bs "aabbcc") @result{}
+(search-for-pattern bs "aabbcc") @result{}
  #<peg start: 2 end: 4 string: aabbcc tree: (bs bb)>
  
-(let ((pm (peg-match bs "aabbcc")))
+(let ((pm (search-for-pattern bs "aabbcc")))
     `((string ,(peg:string pm))
       (start ,(peg:start pm))
       (end ,(peg:end pm))
@@ -513,7 +513,7 @@ As a first pass at this, we might want to have all the entries in
  
  Doing this with string-based PEG syntax would look like this:
  @lisp
-(define-grammar
+(define-peg-string-patterns
    "passwd <- entry* !.
  entry <-- (! NL .)* NL*
  NL < '\n'")
@@ -534,10 +534,10 @@ away the captured data.
  
  Here is the same PEG defined using S-expressions:
  @lisp
-(define-nonterm passwd body (and (* entry) (not-followed-by peg-any)))
-(define-nonterm entry all (and (* (and (not-followed-by NL) peg-any))
+(define-peg-pattern passwd body (and (* entry) (not-followed-by peg-any)))
+(define-peg-pattern entry all (and (* (and (not-followed-by NL) peg-any))
                                (* NL)))
-(define-nonterm NL none "\n")
+(define-peg-pattern NL none "\n")
  @end lisp
  
  Obviously this is much more verbose.  On the other hand, it's more
@@ -552,14 +552,14 @@ the @code{peg} keyword can be used to embed string syntax in
  S-expression syntax.  For instance, we could have written:
  
  @lisp
-(define-nonterm passwd body (peg "entry* !."))
+(define-peg-pattern passwd body (peg "entry* !."))
  @end lisp
  
  However we define it, parsing @code{*etc-passwd*} with the @code{passwd}
  nonterminal yields the same results:
  
  @lisp
-(peg:tree (peg-parse passwd *etc-passwd*)) @result{}
+(peg:tree (match-pattern passwd *etc-passwd*)) @result{}
  ((entry "root:x:0:0:root:/root:/bin/bash")
   (entry "daemon:x:1:1:daemon:/usr/sbin:/bin/sh")
   (entry "bin:x:2:2:bin:/bin:/bin/sh")
@@ -571,7 +571,7 @@ nonterminal yields the same results:
  However, here is something to be wary of:
  
  @lisp
-(peg:tree (peg-parse passwd "one entry")) @result{}
+(peg:tree (match-pattern passwd "one entry")) @result{}
  (entry "one entry")
  @end lisp
  
@@ -596,14 +596,14 @@ predicate that should indicate whether a given sublist is good enough
  
  What we want here is @code{keyword-flatten}.
  @lisp
-(keyword-flatten '(entry) (peg:tree (peg-parse passwd *etc-passwd*))) @result{}
+(keyword-flatten '(entry) (peg:tree (match-pattern passwd *etc-passwd*))) @result{}
  ((entry "root:x:0:0:root:/root:/bin/bash")
   (entry "daemon:x:1:1:daemon:/usr/sbin:/bin/sh")
   (entry "bin:x:2:2:bin:/bin:/bin/sh")
   (entry "sys:x:3:3:sys:/dev:/bin/sh")
   (entry "nobody:x:65534:65534:nobody:/nonexistent:/bin/sh")
   (entry "messagebus:x:103:107::/var/run/dbus:/bin/false"))
-(keyword-flatten '(entry) (peg:tree (peg-parse passwd "one entry"))) @result{}
+(keyword-flatten '(entry) (peg:tree (match-pattern passwd "one entry"))) @result{}
  ((entry "one entry"))
  @end lisp
  
@@ -613,8 +613,8 @@ probably just tag the @code{passwd} nonterminal to remove the ambiguity
  symbol for strings)..
  
  @lisp
-(define-nonterm tag-passwd all (peg "entry* !."))
-(peg:tree (peg-parse tag-passwd *etc-passwd*)) @result{}
+(define-peg-pattern tag-passwd all (peg "entry* !."))
+(peg:tree (match-pattern tag-passwd *etc-passwd*)) @result{}
  (tag-passwd
    (entry "root:x:0:0:root:/root:/bin/bash")
    (entry "daemon:x:1:1:daemon:/usr/sbin:/bin/sh")
@@ -622,7 +622,7 @@ symbol for strings)..
    (entry "sys:x:3:3:sys:/dev:/bin/sh")
    (entry "nobody:x:65534:65534:nobody:/nonexistent:/bin/sh")
    (entry "messagebus:x:103:107::/var/run/dbus:/bin/false"))
-(peg:tree (peg-parse tag-passwd "one entry"))
+(peg:tree (match-pattern tag-passwd "one entry"))
  (tag-passwd 
    (entry "one entry"))
  @end lisp
@@ -644,7 +644,7 @@ Let's extend this example a bit more and actually pull some useful
  information out of the passwd file:
  
  @lisp
-(define-grammar
+(define-peg-string-patterns
    "passwd <-- entry* !.
  entry <-- login C pass C uid C gid C nameORcomment C homedir C shell NL*
  login <-- text
@@ -732,7 +732,7 @@ continue because it didn't have to match the nameORcomment to continue.
  We can parse simple mathematical expressions with the following PEG:
  
  @lisp
-(define-grammar
+(define-peg-string-patterns
    "expr <- sum
  sum <-- (product ('+' / '-') sum) / product
  product <-- (value ('*' / '/') product) / value
@@ -742,7 +742,7 @@ number <-- [0-9]+")
  
  Then:
  @lisp
-(peg:tree (peg-parse expr "1+1/2*3+(1+1)/2")) @result{}
+(peg:tree (match-pattern expr "1+1/2*3+(1+1)/2")) @result{}
  (sum (product (value (number "1")))
       "+"
       (sum (product
@@ -799,7 +799,7 @@ PEG, it would be worth abstracting.)
  
  Then:
  @lisp
-(apply parse-expr (peg:tree (peg-parse expr "1+1/2*3+(1+1)/2"))) @result{}
+(apply parse-expr (peg:tree (match-pattern expr "1+1/2*3+(1+1)/2"))) @result{}
  (+ 1 (+ (/ 1 (* 2 3)) (/ (+ 1 1) 2)))
  @end lisp
  
@@ -822,7 +822,7 @@ a good choice:
  @lisp
  (use-modules (srfi srfi-1))
  
-(define-grammar
+(define-peg-string-patterns
    "expr <- sum
  sum <-- (product ('+' / '-'))* product
  product <-- (value ('*' / '/'))* value
@@ -865,7 +865,7 @@ number <-- [0-9]+")
  
  Then:
  @lisp
-(apply parse-expr (peg:tree (peg-parse expr "1+1/2*3+(1+1)/2"))) @result{}
+(apply parse-expr (peg:tree (match-pattern expr "1+1/2*3+(1+1)/2"))) @result{}
  (+ (+ 1 (* (/ 1 2) 3)) (/ (+ 1 1) 2))
  @end lisp
  
@@ -881,7 +881,7 @@ For a more tantalizing example, consider the following grammar that
  parses (highly) simplified C functions:
  
  @lisp
-(define-grammar
+(define-peg-string-patterns
    "cfunc <-- cSP ctype cSP cname cSP cargs cLB cSP cbody cRB
  ctype <-- cidentifier
  cname <-- cidentifier
@@ -901,7 +901,7 @@ cSP < [ \t\n]*")
  
  Then:
  @lisp
-(peg-parse cfunc "int square(int a) @{ return a*a;@}") @result{}
+(match-pattern cfunc "int square(int a) @{ return a*a;@}") @result{}
  (32
   (cfunc (ctype "int")
          (cname "square")
@@ -911,7 +911,7 @@ Then:
  
  And:
  @lisp
-(peg-parse cfunc "int mod(int a, int b) @{ int c = a/b;return a-b*c; @}") @result{}
+(match-pattern cfunc "int mod(int a, int b) @{ int c = a/b;return a-b*c; @}") @result{}
  (52
   (cfunc (ctype "int")
          (cname "mod")
@@ -924,7 +924,7 @@ And:
  By wrapping all the @code{carg} nonterminals in a @code{cargs}
  nonterminal, we were able to remove any ambiguity in the parsing
  structure and avoid having to call @code{context-flatten} on the output
-of @code{peg-parse}.  We used the same trick with the @code{cstatement}
+of @code{match-pattern}.  We used the same trick with the @code{cstatement}
  nonterminals, wrapping them in a @code{cbody} nonterminal.
  
  The whitespace nonterminal @code{cSP} used here is a (very) useful
@@ -986,9 +986,9 @@ can be any other data the function wishes to return, or '() if it
  doesn't have any more data.
  
  The one caveat is that if the extra data it returns is a list, any
-adjacent strings in that list will be appended by @code{peg-parse}. For
+adjacent strings in that list will be appended by @code{match-pattern}. For
  instance, if a parsing function returns @code{(13 ("a" "b" "c"))},
-@code{peg-parse} will take @code{(13 ("abc"))} as its value.
+@code{match-pattern} will take @code{(13 ("abc"))} as its value.
  
  For example, here is a function to match ``ab'' using the actual
  interface.
@@ -1001,11 +1001,11 @@ interface.
  @end lisp
  
  The above function can be used to match a string by running
-@code{(peg-parse match-a-b "ab")}.
+@code{(match-pattern match-a-b "ab")}.
  
  @subsubheading Code Generators and Extensible Syntax
  
-PEG expressions, such as those in a @code{define-nonterm} form, are
+PEG expressions, such as those in a @code{define-peg-pattern} form, are
  interpreted internally in two steps.
  
  First, any string PEG is expanded into an s-expression PEG by the code
@@ -1013,10 +1013,10 @@ in the @code{(ice-9 peg string-peg)} module.
  
  Then, then s-expression PEG that results is compiled into a parsing
  function by the @code{(ice-9 peg codegen)} module. In particular, the
-function @code{peg-sexp-compile} is called on the s-expression. It then
+function @code{compile-peg-pattern} is called on the s-expression. It then
  decides what to do based on the form it is passed.
  
-The PEG syntax can be expanded by providing @code{peg-sexp-compile} more
+The PEG syntax can be expanded by providing @code{compile-peg-pattern} more
  options for what to do with its forms. The extended syntax will be
  associated with a symbol, for instance @code{my-parsing-form}, and will
  be called on all PEG expressions of the form
@@ -1027,7 +1027,7 @@ be called on all PEG expressions of the form
  The parsing function should take two arguments. The first will be a
  syntax object containing a list with all of the arguments to the form
  (but not the form's name), and the second will be the
-@code{capture-type} argument that is passed to @code{define-nonterm}.
+@code{capture-type} argument that is passed to @code{define-peg-pattern}.
  
  New functions can be registered by calling @code{(add-peg-compiler!
  symbol function)}, where @code{symbol} is the symbol that will indicate