X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/a482f2cc7b2edaa2c5cd66efe81cbf4428919dbe..b81a5bf26d9980be1dfd9bd9c477b41c503d5124:/ice-9/regex.scm diff --git a/ice-9/regex.scm b/ice-9/regex.scm index 023c0b7bc..21beb1665 100644 --- a/ice-9/regex.scm +++ b/ice-9/regex.scm @@ -1,48 +1,52 @@ -;;;; Copyright (C) 1997, 1999 Free Software Foundation, Inc. -;;;; -;;;; This program is free software; you can redistribute it and/or modify -;;;; it under the terms of the GNU General Public License as published by -;;;; the Free Software Foundation; either version 2, or (at your option) -;;;; any later version. +;;;; Copyright (C) 1997, 1999, 2001, 2004, 2005, 2006 Free Software Foundation, Inc. +;;;; +;;;; This library is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU Lesser General Public +;;;; License as published by the Free Software Foundation; either +;;;; version 2.1 of the License, or (at your option) any later version. ;;;; -;;;; This program is distributed in the hope that it will be useful, +;;;; This library is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;;; GNU General Public License for more details. +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; Lesser General Public License for more details. ;;;; -;;;; You should have received a copy of the GNU General Public License -;;;; along with this software; see the file COPYING. If not, write to -;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330, -;;;; Boston, MA 02111-1307 USA -;;;; -;;;; As a special exception, the Free Software Foundation gives permission -;;;; for additional uses of the text contained in its release of GUILE. -;;;; -;;;; The exception is that, if you link the GUILE library with other files -;;;; to produce an executable, this does not by itself cause the -;;;; resulting executable to be covered by the GNU General Public License. -;;;; Your use of that executable is in no way restricted on account of -;;;; linking the GUILE library code into it. -;;;; -;;;; This exception does not however invalidate any other reasons why -;;;; the executable file might be covered by the GNU General Public License. +;;;; You should have received a copy of the GNU Lesser General Public +;;;; License along with this library; if not, write to the Free Software +;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;;;; -;;;; This exception applies only to the code released by the -;;;; Free Software Foundation under the name GUILE. If you copy -;;;; code from other Free Software Foundation releases into a copy of -;;;; GUILE, as the General Public License permits, the exception does -;;;; not apply to the code that you add in this way. To avoid misleading -;;;; anyone as to the status of such modified files, you must delete -;;;; this exception notice from them. -;;;; -;;;; If you write modifications of your own for GUILE, it is your choice -;;;; whether to permit this exception to apply to your modifications. -;;;; If you do not wish that, delete this exception notice. -;;;; + +;;; Commentary: + +;; These procedures are exported: +;; (match:count match) +;; (match:string match) +;; (match:prefix match) +;; (match:suffix match) +;; (regexp-match? match) +;; (regexp-quote string) +;; (match:start match . submatch-num) +;; (match:end match . submatch-num) +;; (match:substring match . submatch-num) +;; (string-match pattern str . start) +;; (regexp-substitute port match . items) +;; (fold-matches regexp string init proc . flags) +;; (list-matches regexp string . flags) +;; (regexp-substitute/global port regexp string . items) + +;;; Code: ;;;; POSIX regex support functions. -(define-module (ice-9 regex)) +(define-module (ice-9 regex) + :export (match:count match:string match:prefix match:suffix + regexp-match? regexp-quote match:start match:end match:substring + string-match regexp-substitute fold-matches list-matches + regexp-substitute/global)) + +;; References: +;; +;; POSIX spec: +;; http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html ;;; FIXME: ;;; It is not clear what should happen if a `match' function @@ -53,22 +57,22 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; These procedures are not defined in SCSH, but I found them useful. -(define-public (match:count match) +(define (match:count match) (- (vector-length match) 1)) -(define-public (match:string match) +(define (match:string match) (vector-ref match 0)) -(define-public (match:prefix match) +(define (match:prefix match) (substring (match:string match) 0 (match:start match 0))) -(define-public (match:suffix match) +(define (match:suffix match) (substring (match:string match) (match:end match 0))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; SCSH compatibility routines. -(define-public (regexp-match? match) +(define (regexp-match? match) (and (vector? match) (string? (vector-ref match 0)) (let loop ((i 1)) @@ -79,33 +83,52 @@ (loop (+ 1 i))) (else #f))))) -(define-public (regexp-quote regexp) +;; * . \ ^ $ and [ are special in both regexp/basic and regexp/extended and +;; can be backslash escaped. +;; +;; ( ) + ? { } and | are special in regexp/extended so must be quoted. But +;; that can't be done with a backslash since in regexp/basic where they're +;; not special, adding a backslash makes them become special. Character +;; class forms [(] etc are used instead. +;; +;; ) is not special when not preceded by a (, and * and ? are not special at +;; the start of a string, but we quote all of these always, so the result +;; can be concatenated or merged into some larger regexp. +;; +;; ] is not special outside a [ ] character class, so doesn't need to be +;; quoted. +;; +(define (regexp-quote string) (call-with-output-string (lambda (p) - (let loop ((i 0)) - (and (< i (string-length regexp)) - (begin - (case (string-ref regexp i) - ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\}) - (write-char #\\ p))) - (write-char (string-ref regexp i) p) - (loop (1+ i)))))))) - -(define-public (match:start match . args) + (string-for-each (lambda (c) + (case c + ((#\* #\. #\\ #\^ #\$ #\[) + (write-char #\\ p) + (write-char c p)) + ((#\( #\) #\+ #\? #\{ #\} #\|) + (write-char #\[ p) + (write-char c p) + (write-char #\] p)) + (else + (write-char c p)))) + string)))) + +(define (match:start match . args) (let* ((matchnum (if (pair? args) (+ 1 (car args)) 1)) (start (car (vector-ref match matchnum)))) (if (= start -1) #f start))) -(define-public (match:end match . args) +(define (match:end match . args) (let* ((matchnum (if (pair? args) (+ 1 (car args)) 1)) (end (cdr (vector-ref match matchnum)))) (if (= end -1) #f end))) -(define-public (match:substring match . args) +(define (match:substring match . args) (let* ((matchnum (if (pair? args) (car args) 0)) @@ -113,12 +136,12 @@ (end (match:end match matchnum))) (and start end (substring (match:string match) start end)))) -(define-public (string-match pattern str . args) +(define (string-match pattern str . args) (let ((rx (make-regexp pattern)) (start (if (pair? args) (car args) 0))) (regexp-exec rx str start))) -(define-public (regexp-substitute port match . items) +(define (regexp-substitute port match . items) ;; If `port' is #f, send output to a string. (if (not port) (call-with-output-string @@ -153,7 +176,7 @@ ;;; `b'. Around or within `xxx', only the match covering all three ;;; x's counts, because the rest are not maximal. -(define-public (fold-matches regexp string init proc . flags) +(define (fold-matches regexp string init proc . flags) (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp))) (flags (if (null? flags) 0 flags))) (let loop ((start 0) @@ -171,10 +194,10 @@ (else (loop (match:end m) (proc m value) #t))))))) -(define-public (list-matches regexp string . flags) +(define (list-matches regexp string . flags) (reverse! (apply fold-matches regexp string '() cons flags))) -(define-public (regexp-substitute/global port regexp string . items) +(define (regexp-substitute/global port regexp string . items) ;; If `port' is #f, send output to a string. (if (not port) @@ -193,13 +216,13 @@ ;; for-each, because we need to make sure 'post at the ;; end of the item list is a tail call. (let next-item ((items items)) - + (define (do-item item) (cond ((string? item) (display item port)) ((integer? item) (display (match:substring m item) port)) ((procedure? item) (display (item m) port)) - ((eq? item 'pre) + ((eq? item 'pre) (display (substring string start (match:start m)) port))