module/ice-9/match.upstream.scm

   1 ;;;; match.scm -- portable hygienic pattern matcher -*- coding: utf-8 -*-
   2 ;;
   3 ;; This code is written by Alex Shinn and placed in the
   4 ;; Public Domain.  All warranties are disclaimed.
   5
   6 ;;> @example-import[(srfi 9)]
   7
   8 ;;> This is a full superset of the popular @hyperlink[
   9 ;;> "http://www.cs.indiana.edu/scheme-repository/code.match.html"]{match}
  10 ;;> package by Andrew Wright, written in fully portable @scheme{syntax-rules}
  11 ;;> and thus preserving hygiene.
  12
  13 ;;> The most notable extensions are the ability to use @emph{non-linear}
  14 ;;> patterns - patterns in which the same identifier occurs multiple
  15 ;;> times, tail patterns after ellipsis, and the experimental tree patterns.
  16
  17 ;;> @subsubsection{Patterns}
  18
  19 ;;> Patterns are written to look like the printed representation of
  20 ;;> the objects they match.  The basic usage is
  21
  22 ;;> @scheme{(match expr (pat body ...) ...)}
  23
  24 ;;> where the result of @var{expr} is matched against each pattern in
  25 ;;> turn, and the corresponding body is evaluated for the first to
  26 ;;> succeed.  Thus, a list of three elements matches a list of three
  27 ;;> elements.
  28
  29 ;;> @example{(let ((ls (list 1 2 3))) (match ls ((1 2 3) #t)))}
  30
  31 ;;> If no patterns match an error is signalled.
  32
  33 ;;> Identifiers will match anything, and make the corresponding
  34 ;;> binding available in the body.
  35
  36 ;;> @example{(match (list 1 2 3) ((a b c) b))}
  37
  38 ;;> If the same identifier occurs multiple times, the first instance
  39 ;;> will match anything, but subsequent instances must match a value
  40 ;;> which is @scheme{equal?} to the first.
  41
  42 ;;> @example{(match (list 1 2 1) ((a a b) 1) ((a b a) 2))}
  43
  44 ;;> The special identifier @scheme{_} matches anything, no matter how
  45 ;;> many times it is used, and does not bind the result in the body.
  46
  47 ;;> @example{(match (list 1 2 1) ((_ _ b) 1) ((a b a) 2))}
  48
  49 ;;> To match a literal identifier (or list or any other literal), use
  50 ;;> @scheme{quote}.
  51
  52 ;;> @example{(match 'a ('b 1) ('a 2))}
  53
  54 ;;> Analogous to its normal usage in scheme, @scheme{quasiquote} can
  55 ;;> be used to quote a mostly literally matching object with selected
  56 ;;> parts unquoted.
  57
  58 ;;> @example|{(match (list 1 2 3) (`(1 ,b ,c) (list b c)))}|
  59
  60 ;;> Often you want to match any number of a repeated pattern.  Inside
  61 ;;> a list pattern you can append @scheme{...} after an element to
  62 ;;> match zero or more of that pattern (like a regexp Kleene star).
  63
  64 ;;> @example{(match (list 1 2) ((1 2 3 ...) #t))}
  65 ;;> @example{(match (list 1 2 3) ((1 2 3 ...) #t))}
  66 ;;> @example{(match (list 1 2 3 3 3) ((1 2 3 ...) #t))}
  67
  68 ;;> Pattern variables matched inside the repeated pattern are bound to
  69 ;;> a list of each matching instance in the body.
  70
  71 ;;> @example{(match (list 1 2) ((a b c ...) c))}
  72 ;;> @example{(match (list 1 2 3) ((a b c ...) c))}
  73 ;;> @example{(match (list 1 2 3 4 5) ((a b c ...) c))}
  74
  75 ;;> More than one @scheme{...} may not be used in the same list, since
  76 ;;> this would require exponential backtracking in the general case.
  77 ;;> However, @scheme{...} need not be the final element in the list,
  78 ;;> and may be succeeded by a fixed number of patterns.
  79
  80 ;;> @example{(match (list 1 2 3 4) ((a b c ... d e) c))}
  81 ;;> @example{(match (list 1 2 3 4 5) ((a b c ... d e) c))}
  82 ;;> @example{(match (list 1 2 3 4 5 6 7) ((a b c ... d e) c))}
  83
  84 ;;> @scheme{___} is provided as an alias for @scheme{...} when it is
  85 ;;> inconvenient to use the ellipsis (as in a syntax-rules template).
  86
  87 ;;> The @scheme{..1} syntax is exactly like the @scheme{...} except
  88 ;;> that it matches one or more repetitions (like a regexp "+").
  89
  90 ;;> @example{(match (list 1 2) ((a b c ..1) c))}
  91 ;;> @example{(match (list 1 2 3) ((a b c ..1) c))}
  92
  93 ;;> The boolean operators @scheme{and}, @scheme{or} and @scheme{not}
  94 ;;> can be used to group and negate patterns analogously to their
  95 ;;> Scheme counterparts.
  96
  97 ;;> The @scheme{and} operator ensures that all subpatterns match.
  98 ;;> This operator is often used with the idiom @scheme{(and x pat)} to
  99 ;;> bind @var{x} to the entire value that matches @var{pat}
 100 ;;> (c.f. "as-patterns" in ML or Haskell).  Another common use is in
 101 ;;> conjunction with @scheme{not} patterns to match a general case
 102 ;;> with certain exceptions.
 103
 104 ;;> @example{(match 1 ((and) #t))}
 105 ;;> @example{(match 1 ((and x) x))}
 106 ;;> @example{(match 1 ((and x 1) x))}
 107
 108 ;;> The @scheme{or} operator ensures that at least one subpattern
 109 ;;> matches.  If the same identifier occurs in different subpatterns,
 110 ;;> it is matched independently.  All identifiers from all subpatterns
 111 ;;> are bound if the @scheme{or} operator matches, but the binding is
 112 ;;> only defined for identifiers from the subpattern which matched.
 113
 114 ;;> @example{(match 1 ((or) #t) (else #f))}
 115 ;;> @example{(match 1 ((or x) x))}
 116 ;;> @example{(match 1 ((or x 2) x))}
 117
 118 ;;> The @scheme{not} operator succeeds if the given pattern doesn't
 119 ;;> match.  None of the identifiers used are available in the body.
 120
 121 ;;> @example{(match 1 ((not 2) #t))}
 122
 123 ;;> The more general operator @scheme{?} can be used to provide a
 124 ;;> predicate.  The usage is @scheme{(? predicate pat ...)} where
 125 ;;> @var{predicate} is a Scheme expression evaluating to a predicate
 126 ;;> called on the value to match, and any optional patterns after the
 127 ;;> predicate are then matched as in an @scheme{and} pattern.
 128
 129 ;;> @example{(match 1 ((? odd? x) x))}
 130
 131 ;;> The field operator @scheme{=} is used to extract an arbitrary
 132 ;;> field and match against it.  It is useful for more complex or
 133 ;;> conditional destructuring that can't be more directly expressed in
 134 ;;> the pattern syntax.  The usage is @scheme{(= field pat)}, where
 135 ;;> @var{field} can be any expression, and should result in a
 136 ;;> procedure of one argument, which is applied to the value to match
 137 ;;> to generate a new value to match against @var{pat}.
 138
 139 ;;> Thus the pattern @scheme{(and (= car x) (= cdr y))} is equivalent
 140 ;;> to @scheme{(x . y)}, except it will result in an immediate error
 141 ;;> if the value isn't a pair.
 142
 143 ;;> @example{(match '(1 . 2) ((= car x) x))}
 144 ;;> @example{(match 4 ((= sqrt x) x))}
 145
 146 ;;> The record operator @scheme{$} is used as a concise way to match
 147 ;;> records defined by SRFI-9 (or SRFI-99).  The usage is
 148 ;;> @scheme{($ rtd field ...)}, where @var{rtd} should be the record
 149 ;;> type descriptor specified as the first argument to
 150 ;;> @scheme{define-record-type}, and each @var{field} is a subpattern
 151 ;;> matched against the fields of the record in order.  Not all fields
 152 ;;> must be present.
 153
 154 ;;> @example{
 155 ;;> (let ()
 156 ;;>   (define-record-type employee
 157 ;;>     (make-employee name title)
 158 ;;>     employee?
 159 ;;>     (name get-name)
 160 ;;>     (title get-title))
 161 ;;>   (match (make-employee "Bob" "Doctor")
 162 ;;>     (($ employee n t) (list t n))))
 163 ;;> }
 164
 165 ;;> The @scheme{set!} and @scheme{get!} operators are used to bind an
 166 ;;> identifier to the setter and getter of a field, respectively.  The
 167 ;;> setter is a procedure of one argument, which mutates the field to
 168 ;;> that argument.  The getter is a procedure of no arguments which
 169 ;;> returns the current value of the field.
 170
 171 ;;> @example{(let ((x (cons 1 2))) (match x ((1 . (set! s)) (s 3) x)))}
 172 ;;> @example{(match '(1 . 2) ((1 . (get! g)) (g)))}
 173
 174 ;;> The new operator @scheme{***} can be used to search a tree for
 175 ;;> subpatterns.  A pattern of the form @scheme{(x *** y)} represents
 176 ;;> the subpattern @var{y} located somewhere in a tree where the path
 177 ;;> from the current object to @var{y} can be seen as a list of the
 178 ;;> form @scheme{(x ...)}.  @var{y} can immediately match the current
 179 ;;> object in which case the path is the empty list.  In a sense it's
 180 ;;> a 2-dimensional version of the @scheme{...} pattern.
 181
 182 ;;> As a common case the pattern @scheme{(_ *** y)} can be used to
 183 ;;> search for @var{y} anywhere in a tree, regardless of the path
 184 ;;> used.
 185
 186 ;;> @example{(match '(a (a (a b))) ((x *** 'b) x))}
 187 ;;> @example{(match '(a (b) (c (d e) (f g))) ((x *** 'g) x))}
 188
 189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 190 ;; Notes
 191
 192 ;; The implementation is a simple generative pattern matcher - each
 193 ;; pattern is expanded into the required tests, calling a failure
 194 ;; continuation if the tests fail.  This makes the logic easy to
 195 ;; follow and extend, but produces sub-optimal code in cases where you
 196 ;; have many similar clauses due to repeating the same tests.
 197 ;; Nonetheless a smart compiler should be able to remove the redundant
 198 ;; tests.  For MATCH-LET and DESTRUCTURING-BIND type uses there is no
 199 ;; performance hit.
 200
 201 ;; The original version was written on 2006/11/29 and described in the
 202 ;; following Usenet post:
 203 ;;   http://groups.google.com/group/comp.lang.scheme/msg/0941234de7112ffd
 204 ;; and is still available at
 205 ;;   http://synthcode.com/scheme/match-simple.scm
 206 ;; It's just 80 lines for the core MATCH, and an extra 40 lines for
 207 ;; MATCH-LET, MATCH-LAMBDA and other syntactic sugar.
 208 ;;
 209 ;; A variant of this file which uses COND-EXPAND in a few places for
 210 ;; performance can be found at
 211 ;;   http://synthcode.com/scheme/match-cond-expand.scm
 212 ;;
 213 ;; 2012/05/23 - fixing combinatorial explosion of code in certain or patterns
 214 ;; 2011/09/25 - fixing bug when directly matching an identifier repeated in
 215 ;;              the pattern (thanks to Stefan Israelsson Tampe)
 216 ;; 2011/01/27 - fixing bug when matching tail patterns against improper lists
 217 ;; 2010/09/26 - adding `..1' patterns (thanks to Ludovic Courtès)
 218 ;; 2010/09/07 - fixing identifier extraction in some `...' and `***' patterns
 219 ;; 2009/11/25 - adding `***' tree search patterns
 220 ;; 2008/03/20 - fixing bug where (a ...) matched non-lists
 221 ;; 2008/03/15 - removing redundant check in vector patterns
 222 ;; 2008/03/06 - you can use `...' portably now (thanks to Taylor Campbell)
 223 ;; 2007/09/04 - fixing quasiquote patterns
 224 ;; 2007/07/21 - allowing ellipse patterns in non-final list positions
 225 ;; 2007/04/10 - fixing potential hygiene issue in match-check-ellipse
 226 ;;              (thanks to Taylor Campbell)
 227 ;; 2007/04/08 - clean up, commenting
 228 ;; 2006/12/24 - bugfixes
 229 ;; 2006/12/01 - non-linear patterns, shared variables in OR, get!/set!
 230
 231 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 232 ;; force compile-time syntax errors with useful messages
 233
 234 (define-syntax match-syntax-error
 235   (syntax-rules ()
 236     ((_) (match-syntax-error "invalid match-syntax-error usage"))))
 237
 238 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 239
 240 ;;> @subsubsection{Syntax}
 241
 242 ;;> @subsubsubsection{@rawcode{(match expr (pattern . body) ...)@br{}
 243 ;;> (match expr (pattern (=> failure) . body) ...)}}
 244
 245 ;;> The result of @var{expr} is matched against each @var{pattern} in
 246 ;;> turn, according to the pattern rules described in the previous
 247 ;;> section, until the the first @var{pattern} matches.  When a match is
 248 ;;> found, the corresponding @var{body}s are evaluated in order,
 249 ;;> and the result of the last expression is returned as the result
 250 ;;> of the entire @scheme{match}.  If a @var{failure} is provided,
 251 ;;> then it is bound to a procedure of no arguments which continues,
 252 ;;> processing at the next @var{pattern}.  If no @var{pattern} matches,
 253 ;;> an error is signalled.
 254
 255 ;; The basic interface.  MATCH just performs some basic syntax
 256 ;; validation, binds the match expression to a temporary variable `v',
 257 ;; and passes it on to MATCH-NEXT.  It's a constant throughout the
 258 ;; code below that the binding `v' is a direct variable reference, not
 259 ;; an expression.
 260
 261 (define-syntax match
 262   (syntax-rules ()
 263     ((match)
 264      (match-syntax-error "missing match expression"))
 265     ((match atom)
 266      (match-syntax-error "no match clauses"))
 267     ((match (app ...) (pat . body) ...)
 268      (let ((v (app ...)))
 269        (match-next v ((app ...) (set! (app ...))) (pat . body) ...)))
 270     ((match #(vec ...) (pat . body) ...)
 271      (let ((v #(vec ...)))
 272        (match-next v (v (set! v)) (pat . body) ...)))
 273     ((match atom (pat . body) ...)
 274      (let ((v atom))
 275        (match-next v (atom (set! atom)) (pat . body) ...)))
 276     ))
 277
 278 ;; MATCH-NEXT passes each clause to MATCH-ONE in turn with its failure
 279 ;; thunk, which is expanded by recursing MATCH-NEXT on the remaining
 280 ;; clauses.  `g+s' is a list of two elements, the get! and set!
 281 ;; expressions respectively.
 282
 283 (define-syntax match-next
 284   (syntax-rules (=>)
 285     ;; no more clauses, the match failed
 286     ((match-next v g+s)
 287      (error 'match "no matching pattern"))
 288     ;; named failure continuation
 289     ((match-next v g+s (pat (=> failure) . body) . rest)
 290      (let ((failure (lambda () (match-next v g+s . rest))))
 291        ;; match-one analyzes the pattern for us
 292        (match-one v pat g+s (match-drop-ids (begin . body)) (failure) ())))
 293     ;; anonymous failure continuation, give it a dummy name
 294     ((match-next v g+s (pat . body) . rest)
 295      (match-next v g+s (pat (=> failure) . body) . rest))))
 296
 297 ;; MATCH-ONE first checks for ellipse patterns, otherwise passes on to
 298 ;; MATCH-TWO.
 299
 300 (define-syntax match-one
 301   (syntax-rules ()
 302     ;; If it's a list of two or more values, check to see if the
 303     ;; second one is an ellipse and handle accordingly, otherwise go
 304     ;; to MATCH-TWO.
 305     ((match-one v (p q . r) g+s sk fk i)
 306      (match-check-ellipse
 307       q
 308       (match-extract-vars p (match-gen-ellipses v p r  g+s sk fk i) i ())
 309       (match-two v (p q . r) g+s sk fk i)))
 310     ;; Go directly to MATCH-TWO.
 311     ((match-one . x)
 312      (match-two . x))))
 313
 314 ;; This is the guts of the pattern matcher.  We are passed a lot of
 315 ;; information in the form:
 316 ;;
 317 ;;   (match-two var pattern getter setter success-k fail-k (ids ...))
 318 ;;
 319 ;; usually abbreviated
 320 ;;
 321 ;;   (match-two v p g+s sk fk i)
 322 ;;
 323 ;; where VAR is the symbol name of the current variable we are
 324 ;; matching, PATTERN is the current pattern, getter and setter are the
 325 ;; corresponding accessors (e.g. CAR and SET-CAR! of the pair holding
 326 ;; VAR), SUCCESS-K is the success continuation, FAIL-K is the failure
 327 ;; continuation (which is just a thunk call and is thus safe to expand
 328 ;; multiple times) and IDS are the list of identifiers bound in the
 329 ;; pattern so far.
 330
 331 (define-syntax match-two
 332   (syntax-rules (_ ___ ..1 *** quote quasiquote ? $ = and or not set! get!)
 333     ((match-two v () g+s (sk ...) fk i)
 334      (if (null? v) (sk ... i) fk))
 335     ((match-two v (quote p) g+s (sk ...) fk i)
 336      (if (equal? v 'p) (sk ... i) fk))
 337     ((match-two v (quasiquote p) . x)
 338      (match-quasiquote v p . x))
 339     ((match-two v (and) g+s (sk ...) fk i) (sk ... i))
 340     ((match-two v (and p q ...) g+s sk fk i)
 341      (match-one v p g+s (match-one v (and q ...) g+s sk fk) fk i))
 342     ((match-two v (or) g+s sk fk i) fk)
 343     ((match-two v (or p) . x)
 344      (match-one v p . x))
 345     ((match-two v (or p ...) g+s sk fk i)
 346      (match-extract-vars (or p ...) (match-gen-or v (p ...) g+s sk fk i) i ()))
 347     ((match-two v (not p) g+s (sk ...) fk i)
 348      (match-one v p g+s (match-drop-ids fk) (sk ... i) i))
 349     ((match-two v (get! getter) (g s) (sk ...) fk i)
 350      (let ((getter (lambda () g))) (sk ... i)))
 351     ((match-two v (set! setter) (g (s ...)) (sk ...) fk i)
 352      (let ((setter (lambda (x) (s ... x)))) (sk ... i)))
 353     ((match-two v (? pred . p) g+s sk fk i)
 354      (if (pred v) (match-one v (and . p) g+s sk fk i) fk))
 355     ((match-two v (= proc p) . x)
 356      (let ((w (proc v))) (match-one w p . x)))
 357     ((match-two v (p ___ . r) g+s sk fk i)
 358      (match-extract-vars p (match-gen-ellipses v p r g+s sk fk i) i ()))
 359     ((match-two v (p) g+s sk fk i)
 360      (if (and (pair? v) (null? (cdr v)))
 361          (let ((w (car v)))
 362            (match-one w p ((car v) (set-car! v)) sk fk i))
 363          fk))
 364     ((match-two v (p *** q) g+s sk fk i)
 365      (match-extract-vars p (match-gen-search v p q g+s sk fk i) i ()))
 366     ((match-two v (p *** . q) g+s sk fk i)
 367      (match-syntax-error "invalid use of ***" (p *** . q)))
 368     ((match-two v (p ..1) g+s sk fk i)
 369      (if (pair? v)
 370          (match-one v (p ___) g+s sk fk i)
 371          fk))
 372     ((match-two v ($ rec p ...) g+s sk fk i)
 373      (if (is-a? v rec)
 374          (match-record-refs v rec 0 (p ...) g+s sk fk i)
 375          fk))
 376     ((match-two v (p . q) g+s sk fk i)
 377      (if (pair? v)
 378          (let ((w (car v)) (x (cdr v)))
 379            (match-one w p ((car v) (set-car! v))
 380                       (match-one x q ((cdr v) (set-cdr! v)) sk fk)
 381                       fk
 382                       i))
 383          fk))
 384     ((match-two v #(p ...) g+s . x)
 385      (match-vector v 0 () (p ...) . x))
 386     ((match-two v _ g+s (sk ...) fk i) (sk ... i))
 387     ;; Not a pair or vector or special literal, test to see if it's a
 388     ;; new symbol, in which case we just bind it, or if it's an
 389     ;; already bound symbol or some other literal, in which case we
 390     ;; compare it with EQUAL?.
 391     ((match-two v x g+s (sk ...) fk (id ...))
 392      (let-syntax
 393          ((new-sym?
 394            (syntax-rules (id ...)
 395              ((new-sym? x sk2 fk2) sk2)
 396              ((new-sym? y sk2 fk2) fk2))))
 397        (new-sym? random-sym-to-match
 398                  (let ((x v)) (sk ... (id ... x)))
 399                  (if (equal? v x) (sk ... (id ...)) fk))))
 400     ))
 401
 402 ;; QUASIQUOTE patterns
 403
 404 (define-syntax match-quasiquote
 405   (syntax-rules (unquote unquote-splicing quasiquote)
 406     ((_ v (unquote p) g+s sk fk i)
 407      (match-one v p g+s sk fk i))
 408     ((_ v ((unquote-splicing p) . rest) g+s sk fk i)
 409      (if (pair? v)
 410        (match-one v
 411                   (p . tmp)
 412                   (match-quasiquote tmp rest g+s sk fk)
 413                   fk
 414                   i)
 415        fk))
 416     ((_ v (quasiquote p) g+s sk fk i . depth)
 417      (match-quasiquote v p g+s sk fk i #f . depth))
 418     ((_ v (unquote p) g+s sk fk i x . depth)
 419      (match-quasiquote v p g+s sk fk i . depth))
 420     ((_ v (unquote-splicing p) g+s sk fk i x . depth)
 421      (match-quasiquote v p g+s sk fk i . depth))
 422     ((_ v (p . q) g+s sk fk i . depth)
 423      (if (pair? v)
 424        (let ((w (car v)) (x (cdr v)))
 425          (match-quasiquote
 426           w p g+s
 427           (match-quasiquote-step x q g+s sk fk depth)
 428           fk i . depth))
 429        fk))
 430     ((_ v #(elt ...) g+s sk fk i . depth)
 431      (if (vector? v)
 432        (let ((ls (vector->list v)))
 433          (match-quasiquote ls (elt ...) g+s sk fk i . depth))
 434        fk))
 435     ((_ v x g+s sk fk i . depth)
 436      (match-one v 'x g+s sk fk i))))
 437
 438 (define-syntax match-quasiquote-step
 439   (syntax-rules ()
 440     ((match-quasiquote-step x q g+s sk fk depth i)
 441      (match-quasiquote x q g+s sk fk i . depth))))
 442
 443 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 444 ;; Utilities
 445
 446 ;; Takes two values and just expands into the first.
 447 (define-syntax match-drop-ids
 448   (syntax-rules ()
 449     ((_ expr ids ...) expr)))
 450
 451 (define-syntax match-tuck-ids
 452   (syntax-rules ()
 453     ((_ (letish args (expr ...)) ids ...)
 454      (letish args (expr ... ids ...)))))
 455
 456 (define-syntax match-drop-first-arg
 457   (syntax-rules ()
 458     ((_ arg expr) expr)))
 459
 460 ;; To expand an OR group we try each clause in succession, passing the
 461 ;; first that succeeds to the success continuation.  On failure for
 462 ;; any clause, we just try the next clause, finally resorting to the
 463 ;; failure continuation fk if all clauses fail.  The only trick is
 464 ;; that we want to unify the identifiers, so that the success
 465 ;; continuation can refer to a variable from any of the OR clauses.
 466
 467 (define-syntax match-gen-or
 468   (syntax-rules ()
 469     ((_ v p g+s (sk ...) fk (i ...) ((id id-ls) ...))
 470      (let ((sk2 (lambda (id ...) (sk ... (i ... id ...)))))
 471        (match-gen-or-step v p g+s (match-drop-ids (sk2 id ...)) fk (i ...))))))
 472
 473 (define-syntax match-gen-or-step
 474   (syntax-rules ()
 475     ((_ v () g+s sk fk . x)
 476      ;; no OR clauses, call the failure continuation
 477      fk)
 478     ((_ v (p) . x)
 479      ;; last (or only) OR clause, just expand normally
 480      (match-one v p . x))
 481     ((_ v (p . q) g+s sk fk i)
 482      ;; match one and try the remaining on failure
 483      (let ((fk2 (lambda () (match-gen-or-step v q g+s sk fk i))))
 484        (match-one v p g+s sk (fk2) i)))
 485     ))
 486
 487 ;; We match a pattern (p ...) by matching the pattern p in a loop on
 488 ;; each element of the variable, accumulating the bound ids into lists.
 489
 490 ;; Look at the body of the simple case - it's just a named let loop,
 491 ;; matching each element in turn to the same pattern.  The only trick
 492 ;; is that we want to keep track of the lists of each extracted id, so
 493 ;; when the loop recurses we cons the ids onto their respective list
 494 ;; variables, and on success we bind the ids (what the user input and
 495 ;; expects to see in the success body) to the reversed accumulated
 496 ;; list IDs.
 497
 498 (define-syntax match-gen-ellipses
 499   (syntax-rules ()
 500     ((_ v p () g+s (sk ...) fk i ((id id-ls) ...))
 501      (match-check-identifier p
 502        ;; simplest case equivalent to (p ...), just bind the list
 503        (let ((p v))
 504          (if (list? p)
 505              (sk ... i)
 506              fk))
 507        ;; simple case, match all elements of the list
 508        (let loop ((ls v) (id-ls '()) ...)
 509          (cond
 510            ((null? ls)
 511             (let ((id (reverse id-ls)) ...) (sk ... i)))
 512            ((pair? ls)
 513             (let ((w (car ls)))
 514               (match-one w p ((car ls) (set-car! ls))
 515                          (match-drop-ids (loop (cdr ls) (cons id id-ls) ...))
 516                          fk i)))
 517            (else
 518             fk)))))
 519     ((_ v p r g+s (sk ...) fk i ((id id-ls) ...))
 520      ;; general case, trailing patterns to match, keep track of the
 521      ;; remaining list length so we don't need any backtracking
 522      (match-verify-no-ellipses
 523       r
 524       (let* ((tail-len (length 'r))
 525              (ls v)
 526              (len (and (list? ls) (length ls))))
 527         (if (or (not len) (< len tail-len))
 528             fk
 529             (let loop ((ls ls) (n len) (id-ls '()) ...)
 530               (cond
 531                 ((= n tail-len)
 532                  (let ((id (reverse id-ls)) ...)
 533                    (match-one ls r (#f #f) (sk ...) fk i)))
 534                 ((pair? ls)
 535                  (let ((w (car ls)))
 536                    (match-one w p ((car ls) (set-car! ls))
 537                               (match-drop-ids
 538                                (loop (cdr ls) (- n 1) (cons id id-ls) ...))
 539                               fk
 540                               i)))
 541                 (else
 542                  fk)))))))))
 543
 544 ;; This is just a safety check.  Although unlike syntax-rules we allow
 545 ;; trailing patterns after an ellipses, we explicitly disable multiple
 546 ;; ellipses at the same level.  This is because in the general case
 547 ;; such patterns are exponential in the number of ellipses, and we
 548 ;; don't want to make it easy to construct very expensive operations
 549 ;; with simple looking patterns.  For example, it would be O(n^2) for
 550 ;; patterns like (a ... b ...) because we must consider every trailing
 551 ;; element for every possible break for the leading "a ...".
 552
 553 (define-syntax match-verify-no-ellipses
 554   (syntax-rules ()
 555     ((_ (x . y) sk)
 556      (match-check-ellipse
 557       x
 558       (match-syntax-error
 559        "multiple ellipse patterns not allowed at same level")
 560       (match-verify-no-ellipses y sk)))
 561     ((_ () sk)
 562      sk)
 563     ((_ x sk)
 564      (match-syntax-error "dotted tail not allowed after ellipse" x))))
 565
 566 ;; To implement the tree search, we use two recursive procedures.  TRY
 567 ;; attempts to match Y once, and on success it calls the normal SK on
 568 ;; the accumulated list ids as in MATCH-GEN-ELLIPSES.  On failure, we
 569 ;; call NEXT which first checks if the current value is a list
 570 ;; beginning with X, then calls TRY on each remaining element of the
 571 ;; list.  Since TRY will recursively call NEXT again on failure, this
 572 ;; effects a full depth-first search.
 573 ;;
 574 ;; The failure continuation throughout is a jump to the next step in
 575 ;; the tree search, initialized with the original failure continuation
 576 ;; FK.
 577
 578 (define-syntax match-gen-search
 579   (syntax-rules ()
 580     ((match-gen-search v p q g+s sk fk i ((id id-ls) ...))
 581      (letrec ((try (lambda (w fail id-ls ...)
 582                      (match-one w q g+s
 583                                 (match-tuck-ids
 584                                  (let ((id (reverse id-ls)) ...)
 585                                    sk))
 586                                 (next w fail id-ls ...) i)))
 587               (next (lambda (w fail id-ls ...)
 588                       (if (not (pair? w))
 589                           (fail)
 590                           (let ((u (car w)))
 591                             (match-one
 592                              u p ((car w) (set-car! w))
 593                              (match-drop-ids
 594                               ;; accumulate the head variables from
 595                               ;; the p pattern, and loop over the tail
 596                               (let ((id-ls (cons id id-ls)) ...)
 597                                 (let lp ((ls (cdr w)))
 598                                   (if (pair? ls)
 599                                       (try (car ls)
 600                                            (lambda () (lp (cdr ls)))
 601                                            id-ls ...)
 602                                       (fail)))))
 603                              (fail) i))))))
 604        ;; the initial id-ls binding here is a dummy to get the right
 605        ;; number of '()s
 606        (let ((id-ls '()) ...)
 607          (try v (lambda () fk) id-ls ...))))))
 608
 609 ;; Vector patterns are just more of the same, with the slight
 610 ;; exception that we pass around the current vector index being
 611 ;; matched.
 612
 613 (define-syntax match-vector
 614   (syntax-rules (___)
 615     ((_ v n pats (p q) . x)
 616      (match-check-ellipse q
 617                           (match-gen-vector-ellipses v n pats p . x)
 618                           (match-vector-two v n pats (p q) . x)))
 619     ((_ v n pats (p ___) sk fk i)
 620      (match-gen-vector-ellipses v n pats p sk fk i))
 621     ((_ . x)
 622      (match-vector-two . x))))
 623
 624 ;; Check the exact vector length, then check each element in turn.
 625
 626 (define-syntax match-vector-two
 627   (syntax-rules ()
 628     ((_ v n ((pat index) ...) () sk fk i)
 629      (if (vector? v)
 630          (let ((len (vector-length v)))
 631            (if (= len n)
 632                (match-vector-step v ((pat index) ...) sk fk i)
 633                fk))
 634          fk))
 635     ((_ v n (pats ...) (p . q) . x)
 636      (match-vector v (+ n 1) (pats ... (p n)) q . x))))
 637
 638 (define-syntax match-vector-step
 639   (syntax-rules ()
 640     ((_ v () (sk ...) fk i) (sk ... i))
 641     ((_ v ((pat index) . rest) sk fk i)
 642      (let ((w (vector-ref v index)))
 643        (match-one w pat ((vector-ref v index) (vector-set! v index))
 644                   (match-vector-step v rest sk fk)
 645                   fk i)))))
 646
 647 ;; With a vector ellipse pattern we first check to see if the vector
 648 ;; length is at least the required length.
 649
 650 (define-syntax match-gen-vector-ellipses
 651   (syntax-rules ()
 652     ((_ v n ((pat index) ...) p sk fk i)
 653      (if (vector? v)
 654        (let ((len (vector-length v)))
 655          (if (>= len n)
 656            (match-vector-step v ((pat index) ...)
 657                               (match-vector-tail v p n len sk fk)
 658                               fk i)
 659            fk))
 660        fk))))
 661
 662 (define-syntax match-vector-tail
 663   (syntax-rules ()
 664     ((_ v p n len sk fk i)
 665      (match-extract-vars p (match-vector-tail-two v p n len sk fk i) i ()))))
 666
 667 (define-syntax match-vector-tail-two
 668   (syntax-rules ()
 669     ((_ v p n len (sk ...) fk i ((id id-ls) ...))
 670      (let loop ((j n) (id-ls '()) ...)
 671        (if (>= j len)
 672          (let ((id (reverse id-ls)) ...) (sk ... i))
 673          (let ((w (vector-ref v j)))
 674            (match-one w p ((vector-ref v j) (vetor-set! v j))
 675                       (match-drop-ids (loop (+ j 1) (cons id id-ls) ...))
 676                       fk i)))))))
 677
 678 (define-syntax match-record-refs
 679   (syntax-rules ()
 680     ((_ v rec n (p . q) g+s sk fk i)
 681      (let ((w (slot-ref rec v n)))
 682        (match-one w p ((slot-ref rec v n) (slot-set! rec v n))
 683                   (match-record-refs v rec (+ n 1) q g+s sk fk) fk i)))
 684     ((_ v rec n () g+s (sk ...) fk i)
 685      (sk ... i))))
 686
 687 ;; Extract all identifiers in a pattern.  A little more complicated
 688 ;; than just looking for symbols, we need to ignore special keywords
 689 ;; and non-pattern forms (such as the predicate expression in ?
 690 ;; patterns), and also ignore previously bound identifiers.
 691 ;;
 692 ;; Calls the continuation with all new vars as a list of the form
 693 ;; ((orig-var tmp-name) ...), where tmp-name can be used to uniquely
 694 ;; pair with the original variable (e.g. it's used in the ellipse
 695 ;; generation for list variables).
 696 ;;
 697 ;; (match-extract-vars pattern continuation (ids ...) (new-vars ...))
 698
 699 (define-syntax match-extract-vars
 700   (syntax-rules (_ ___ ..1 *** ? $ = quote quasiquote and or not get! set!)
 701     ((match-extract-vars (? pred . p) . x)
 702      (match-extract-vars p . x))
 703     ((match-extract-vars ($ rec . p) . x)
 704      (match-extract-vars p . x))
 705     ((match-extract-vars (= proc p) . x)
 706      (match-extract-vars p . x))
 707     ((match-extract-vars (quote x) (k ...) i v)
 708      (k ... v))
 709     ((match-extract-vars (quasiquote x) k i v)
 710      (match-extract-quasiquote-vars x k i v (#t)))
 711     ((match-extract-vars (and . p) . x)
 712      (match-extract-vars p . x))
 713     ((match-extract-vars (or . p) . x)
 714      (match-extract-vars p . x))
 715     ((match-extract-vars (not . p) . x)
 716      (match-extract-vars p . x))
 717     ;; A non-keyword pair, expand the CAR with a continuation to
 718     ;; expand the CDR.
 719     ((match-extract-vars (p q . r) k i v)
 720      (match-check-ellipse
 721       q
 722       (match-extract-vars (p . r) k i v)
 723       (match-extract-vars p (match-extract-vars-step (q . r) k i v) i ())))
 724     ((match-extract-vars (p . q) k i v)
 725      (match-extract-vars p (match-extract-vars-step q k i v) i ()))
 726     ((match-extract-vars #(p ...) . x)
 727      (match-extract-vars (p ...) . x))
 728     ((match-extract-vars _ (k ...) i v)    (k ... v))
 729     ((match-extract-vars ___ (k ...) i v)  (k ... v))
 730     ((match-extract-vars *** (k ...) i v)  (k ... v))
 731     ((match-extract-vars ..1 (k ...) i v)  (k ... v))
 732     ;; This is the main part, the only place where we might add a new
 733     ;; var if it's an unbound symbol.
 734     ((match-extract-vars p (k ...) (i ...) v)
 735      (let-syntax
 736          ((new-sym?
 737            (syntax-rules (i ...)
 738              ((new-sym? p sk fk) sk)
 739              ((new-sym? any sk fk) fk))))
 740        (new-sym? random-sym-to-match
 741                  (k ... ((p p-ls) . v))
 742                  (k ... v))))
 743     ))
 744
 745 ;; Stepper used in the above so it can expand the CAR and CDR
 746 ;; separately.
 747
 748 (define-syntax match-extract-vars-step
 749   (syntax-rules ()
 750     ((_ p k i v ((v2 v2-ls) ...))
 751      (match-extract-vars p k (v2 ... . i) ((v2 v2-ls) ... . v)))
 752     ))
 753
 754 (define-syntax match-extract-quasiquote-vars
 755   (syntax-rules (quasiquote unquote unquote-splicing)
 756     ((match-extract-quasiquote-vars (quasiquote x) k i v d)
 757      (match-extract-quasiquote-vars x k i v (#t . d)))
 758     ((match-extract-quasiquote-vars (unquote-splicing x) k i v d)
 759      (match-extract-quasiquote-vars (unquote x) k i v d))
 760     ((match-extract-quasiquote-vars (unquote x) k i v (#t))
 761      (match-extract-vars x k i v))
 762     ((match-extract-quasiquote-vars (unquote x) k i v (#t . d))
 763      (match-extract-quasiquote-vars x k i v d))
 764     ((match-extract-quasiquote-vars (x . y) k i v (#t . d))
 765      (match-extract-quasiquote-vars
 766       x
 767       (match-extract-quasiquote-vars-step y k i v d) i ()))
 768     ((match-extract-quasiquote-vars #(x ...) k i v (#t . d))
 769      (match-extract-quasiquote-vars (x ...) k i v d))
 770     ((match-extract-quasiquote-vars x (k ...) i v (#t . d))
 771      (k ... v))
 772     ))
 773
 774 (define-syntax match-extract-quasiquote-vars-step
 775   (syntax-rules ()
 776     ((_ x k i v d ((v2 v2-ls) ...))
 777      (match-extract-quasiquote-vars x k (v2 ... . i) ((v2 v2-ls) ... . v) d))
 778     ))
 779
 780
 781 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 782 ;; Gimme some sugar baby.
 783
 784 ;;> Shortcut for @scheme{lambda} + @scheme{match}.  Creates a
 785 ;;> procedure of one argument, and matches that argument against each
 786 ;;> clause.
 787
 788 (define-syntax match-lambda
 789   (syntax-rules ()
 790     ((_ (pattern . body) ...) (lambda (expr) (match expr (pattern . body) ...)))))
 791
 792 ;;> Similar to @scheme{match-lambda}.  Creates a procedure of any
 793 ;;> number of arguments, and matches the argument list against each
 794 ;;> clause.
 795
 796 (define-syntax match-lambda*
 797   (syntax-rules ()
 798     ((_ (pattern . body) ...) (lambda expr (match expr (pattern . body) ...)))))
 799
 800 ;;> Matches each var to the corresponding expression, and evaluates
 801 ;;> the body with all match variables in scope.  Raises an error if
 802 ;;> any of the expressions fail to match.  Syntax analogous to named
 803 ;;> let can also be used for recursive functions which match on their
 804 ;;> arguments as in @scheme{match-lambda*}.
 805
 806 (define-syntax match-let
 807   (syntax-rules ()
 808     ((_ ((var value) ...) . body)
 809      (match-let/helper let () () ((var value) ...) . body))
 810     ((_ loop ((var init) ...) . body)
 811      (match-named-let loop ((var init) ...) . body))))
 812
 813 ;;> Similar to @scheme{match-let}, but analogously to @scheme{letrec}
 814 ;;> matches and binds the variables with all match variables in scope.
 815
 816 (define-syntax match-letrec
 817   (syntax-rules ()
 818     ((_ ((var value) ...) . body)
 819      (match-let/helper letrec () () ((var value) ...) . body))))
 820
 821 (define-syntax match-let/helper
 822   (syntax-rules ()
 823     ((_ let ((var expr) ...) () () . body)
 824      (let ((var expr) ...) . body))
 825     ((_ let ((var expr) ...) ((pat tmp) ...) () . body)
 826      (let ((var expr) ...)
 827        (match-let* ((pat tmp) ...)
 828          . body)))
 829     ((_ let (v ...) (p ...) (((a . b) expr) . rest) . body)
 830      (match-let/helper
 831       let (v ... (tmp expr)) (p ... ((a . b) tmp)) rest . body))
 832     ((_ let (v ...) (p ...) ((#(a ...) expr) . rest) . body)
 833      (match-let/helper
 834       let (v ... (tmp expr)) (p ... (#(a ...) tmp)) rest . body))
 835     ((_ let (v ...) (p ...) ((a expr) . rest) . body)
 836      (match-let/helper let (v ... (a expr)) (p ...) rest . body))))
 837
 838 (define-syntax match-named-let
 839   (syntax-rules ()
 840     ((_ loop ((pat expr var) ...) () . body)
 841      (let loop ((var expr) ...)
 842        (match-let ((pat var) ...)
 843          . body)))
 844     ((_ loop (v ...) ((pat expr) . rest) . body)
 845      (match-named-let loop (v ... (pat expr tmp)) rest . body))))
 846
 847 ;;> @subsubsubsection{@rawcode{(match-let* ((var value) ...) body ...)}}
 848
 849 ;;> Similar to @scheme{match-let}, but analogously to @scheme{let*}
 850 ;;> matches and binds the variables in sequence, with preceding match
 851 ;;> variables in scope.
 852
 853 (define-syntax match-let*
 854   (syntax-rules ()
 855     ((_ () . body)
 856      (begin . body))
 857     ((_ ((pat expr) . rest) . body)
 858      (match expr (pat (match-let* rest . body))))))
 859
 860
 861 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 862 ;; Otherwise COND-EXPANDed bits.
 863
 864 ;; This *should* work, but doesn't :(
 865 ;;   (define-syntax match-check-ellipse
 866 ;;     (syntax-rules (...)
 867 ;;       ((_ ... sk fk) sk)
 868 ;;       ((_ x sk fk) fk)))
 869
 870 ;; This is a little more complicated, and introduces a new let-syntax,
 871 ;; but should work portably in any R[56]RS Scheme.  Taylor Campbell
 872 ;; originally came up with the idea.
 873 (define-syntax match-check-ellipse
 874   (syntax-rules ()
 875     ;; these two aren't necessary but provide fast-case failures
 876     ((match-check-ellipse (a . b) success-k failure-k) failure-k)
 877     ((match-check-ellipse #(a ...) success-k failure-k) failure-k)
 878     ;; matching an atom
 879     ((match-check-ellipse id success-k failure-k)
 880      (let-syntax ((ellipse? (syntax-rules ()
 881                               ;; iff `id' is `...' here then this will
 882                               ;; match a list of any length
 883                               ((ellipse? (foo id) sk fk) sk)
 884                               ((ellipse? other sk fk) fk))))
 885        ;; this list of three elements will only many the (foo id) list
 886        ;; above if `id' is `...'
 887        (ellipse? (a b c) success-k failure-k)))))
 888
 889
 890 ;; This is portable but can be more efficient with non-portable
 891 ;; extensions.  This trick was originally discovered by Oleg Kiselyov.
 892
 893 (define-syntax match-check-identifier
 894   (syntax-rules ()
 895     ;; fast-case failures, lists and vectors are not identifiers
 896     ((_ (x . y) success-k failure-k) failure-k)
 897     ((_ #(x ...) success-k failure-k) failure-k)
 898     ;; x is an atom
 899     ((_ x success-k failure-k)
 900      (let-syntax
 901          ((sym?
 902            (syntax-rules ()
 903              ;; if the symbol `abracadabra' matches x, then x is a
 904              ;; symbol
 905              ((sym? x sk fk) sk)
 906              ;; otherwise x is a non-symbol datum
 907              ((sym? y sk fk) fk))))
 908        (sym? abracadabra success-k failure-k)))))