-;;;; Copyright (C) 1997, 1999, 2001, 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
+;;;; Copyright (C) 1997, 1999, 2001, 2004, 2005, 2006, 2008, 2010 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
-;;;; version 2.1 of the License, or (at your option) any later version.
+;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; POSIX regex support functions.
(define-module (ice-9 regex)
- :export (match:count match:string match:prefix match:suffix
- regexp-match? regexp-quote match:start match:end match:substring
- string-match regexp-substitute fold-matches list-matches
- regexp-substitute/global))
+ #:export (match:count match:string match:prefix match:suffix
+ regexp-match? regexp-quote match:start match:end match:substring
+ string-match regexp-substitute fold-matches list-matches
+ regexp-substitute/global))
;; References:
;;
(and (vector? match)
(string? (vector-ref match 0))
(let loop ((i 1))
- (cond ((>= i (vector-length match)) #t)
- ((and (pair? (vector-ref match i))
- (integer? (car (vector-ref match i)))
- (integer? (cdr (vector-ref match i))))
- (loop (+ 1 i)))
- (else #f)))))
+ (cond ((>= i (vector-length match)) #t)
+ ((and (pair? (vector-ref match i))
+ (integer? (car (vector-ref match i)))
+ (integer? (cdr (vector-ref match i))))
+ (loop (+ 1 i)))
+ (else #f)))))
;; * . \ ^ $ and [ are special in both regexp/basic and regexp/extended and
;; can be backslash escaped.
(call-with-output-string
(lambda (p)
(string-for-each (lambda (c)
- (case c
- ((#\* #\. #\\ #\^ #\$ #\[)
- (write-char #\\ p)
- (write-char c p))
- ((#\( #\) #\+ #\? #\{ #\} #\|)
- (write-char #\[ p)
- (write-char c p)
- (write-char #\] p))
- (else
- (write-char c p))))
- string))))
-
-(define (match:start match . args)
- (let* ((matchnum (if (pair? args)
- (+ 1 (car args))
- 1))
- (start (car (vector-ref match matchnum))))
+ (case c
+ ((#\* #\. #\\ #\^ #\$ #\[)
+ (write-char #\\ p)
+ (write-char c p))
+ ((#\( #\) #\+ #\? #\{ #\} #\|)
+ (write-char #\[ p)
+ (write-char c p)
+ (write-char #\] p))
+ (else
+ (write-char c p))))
+ string))))
+
+(define* (match:start match #:optional (n 0))
+ (let ((start (car (vector-ref match (1+ n)))))
(if (= start -1) #f start)))
-(define (match:end match . args)
- (let* ((matchnum (if (pair? args)
- (+ 1 (car args))
- 1))
- (end (cdr (vector-ref match matchnum))))
+(define* (match:end match #:optional (n 0))
+ (let* ((end (cdr (vector-ref match (1+ n)))))
(if (= end -1) #f end)))
-(define (match:substring match . args)
- (let* ((matchnum (if (pair? args)
- (car args)
- 0))
- (start (match:start match matchnum))
- (end (match:end match matchnum)))
+(define* (match:substring match #:optional (n 0))
+ (let* ((start (match:start match n))
+ (end (match:end match n)))
(and start end (substring (match:string match) start end))))
(define (string-match pattern str . args)
(let ((rx (make-regexp pattern))
- (start (if (pair? args) (car args) 0)))
+ (start (if (pair? args) (car args) 0)))
(regexp-exec rx str start)))
(define (regexp-substitute port match . items)
(if (not port)
(call-with-output-string
(lambda (p)
- (apply regexp-substitute p match items)))
+ (apply regexp-substitute p match items)))
;; Otherwise, process each substitution argument in `items'.
(for-each (lambda (obj)
- (cond ((string? obj) (display obj port))
- ((integer? obj) (display (match:substring match obj) port))
- ((eq? 'pre obj) (display (match:prefix match) port))
- ((eq? 'post obj) (display (match:suffix match) port))
- (else (error 'wrong-type-arg obj))))
- items)))
+ (cond ((string? obj) (display obj port))
+ ((integer? obj) (display (match:substring match obj) port))
+ ((eq? 'pre obj) (display (match:prefix match) port))
+ ((eq? 'post obj) (display (match:suffix match) port))
+ (else (error 'wrong-type-arg obj))))
+ items)))
;;; If we call fold-matches, below, with a regexp that can match the
;;; empty string, it's not obvious what "all the matches" means. How
;;; many empty strings are there in the string "a"? Our answer:
;;;
-;;; This function applies PROC to every non-overlapping, maximal
+;;; This function applies PROC to every non-overlapping, maximal
;;; match of REGEXP in STRING.
;;;
;;; "non-overlapping": There are two non-overlapping matches of "" in
;;; `b'. Around or within `xxx', only the match covering all three
;;; x's counts, because the rest are not maximal.
-(define (fold-matches regexp string init proc . flags)
- (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp)))
- (flags (if (null? flags) 0 (car flags))))
+(define* (fold-matches regexp string init proc #:optional (flags 0))
+ (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp))))
(let loop ((start 0)
- (value init)
- (abuts #f)) ; True if start abuts a previous match.
+ (value init)
+ (abuts #f)) ; True if start abuts a previous match.
+ (define bol (if (zero? start) 0 regexp/notbol))
(let ((m (if (> start (string-length string)) #f
- (regexp-exec regexp string start flags))))
- (cond
- ((not m) value)
- ((and (= (match:start m) (match:end m)) abuts)
- ;; We matched an empty string, but that would overlap the
- ;; match immediately before. Try again at a position
- ;; further to the right.
- (loop (+ start 1) value #f))
- (else
- (loop (match:end m) (proc m value) #t)))))))
-
-(define (list-matches regexp string . flags)
- (reverse! (apply fold-matches regexp string '() cons flags)))
+ (regexp-exec regexp string start (logior flags bol)))))
+ (cond
+ ((not m) value)
+ ((and (= (match:start m) (match:end m)) abuts)
+ ;; We matched an empty string, but that would overlap the
+ ;; match immediately before. Try again at a position
+ ;; further to the right.
+ (loop (+ start 1) value #f))
+ (else
+ (loop (match:end m) (proc m value) #t)))))))
+
+(define* (list-matches regexp string #:optional (flags 0))
+ (reverse! (fold-matches regexp string '() cons flags)))
(define (regexp-substitute/global port regexp string . items)
(if (not port)
(call-with-output-string
(lambda (p)
- (apply regexp-substitute/global p regexp string items)))
+ (apply regexp-substitute/global p regexp string items)))
;; Walk the set of non-overlapping, maximal matches.
(let next-match ((matches (list-matches regexp string))
- (start 0))
- (if (null? matches)
- (display (substring string start) port)
- (let ((m (car matches)))
-
- ;; Process all of the items for this match. Don't use
- ;; for-each, because we need to make sure 'post at the
- ;; end of the item list is a tail call.
- (let next-item ((items items))
-
- (define (do-item item)
- (cond
- ((string? item) (display item port))
- ((integer? item) (display (match:substring m item) port))
- ((procedure? item) (display (item m) port))
- ((eq? item 'pre)
- (display
- (substring string start (match:start m))
- port))
- ((eq? item 'post)
- (next-match (cdr matches) (match:end m)))
- (else (error 'wrong-type-arg item))))
-
- (if (pair? items)
- (if (null? (cdr items))
- (do-item (car items)) ; This is a tail call.
- (begin
- (do-item (car items)) ; This is not.
- (next-item (cdr items)))))))))))
+ (start 0))
+ (if (null? matches)
+ (display (substring string start) port)
+ (let ((m (car matches)))
+
+ ;; Process all of the items for this match. Don't use
+ ;; for-each, because we need to make sure 'post at the
+ ;; end of the item list is a tail call.
+ (let next-item ((items items))
+
+ (define (do-item item)
+ (cond
+ ((string? item) (display item port))
+ ((integer? item) (display (match:substring m item) port))
+ ((procedure? item) (display (item m) port))
+ ((eq? item 'pre)
+ (display
+ (substring string start (match:start m))
+ port))
+ ((eq? item 'post)
+ (next-match (cdr matches) (match:end m)))
+ (else (error 'wrong-type-arg item))))
+
+ (if (pair? items)
+ (if (null? (cdr items))
+ (do-item (car items)) ; This is a tail call.
+ (begin
+ (do-item (car items)) ; This is not.
+ (next-item (cdr items)))))))))))