;;;; regexp.test --- test Guile's regexps -*- coding: utf-8; mode: scheme -*- ;;;; Jim Blandy --- September 1999 ;;;; ;;;; Copyright (C) 1999, 2004, 2006, 2007, 2008, 2009, 2010, ;;;; 2012, 2013, 2014 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public ;;;; License as published by the Free Software Foundation; either ;;;; version 3 of the License, or (at your option) any later version. ;;;; ;;;; This library is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;;; Lesser General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU Lesser General Public ;;;; License along with this library; if not, write to the Free Software ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA (define-module (test-suite test-regexp) #:use-module (test-suite lib) #:use-module (srfi srfi-1) #:use-module (ice-9 regex)) (when (defined? 'setlocale) (setlocale LC_ALL "C")) ;; Don't fail if we can't display a test name to stdout/stderr. (set-port-conversion-strategy! (current-output-port) 'escape) (set-port-conversion-strategy! (current-error-port) 'escape) ;;; Run a regexp-substitute or regexp-substitute/global test, once ;;; providing a real port and once providing #f, requesting direct ;;; string output. (define (vary-port func expected . args) (pass-if "port is string port" (equal? expected (call-with-output-string (lambda (port) (apply func port args))))) (pass-if "port is #f" (equal? expected (apply func #f args)))) (define (object->string obj) (call-with-output-string (lambda (port) (write obj port)))) ;;; ;;; make-regexp ;;; (with-test-prefix "make-regexp" (pass-if-exception "no args" exception:wrong-num-args (make-regexp)) (pass-if-exception "bad pat arg" exception:wrong-type-arg (make-regexp 'blah)) ;; in guile prior to 1.6.5 make-regex didn't validate its flags args (pass-if-exception "bad arg 2" exception:wrong-type-arg (make-regexp "xyz" 'abc)) (pass-if-exception "bad arg 3" exception:wrong-type-arg (make-regexp "xyz" regexp/icase 'abc))) ;;; ;;; match:string ;;; (with-test-prefix "match:string" (pass-if "foo" (string=? "foo" (match:string (string-match ".*" "foo")))) (pass-if "foo offset 1" (string=? "foo" (match:string (string-match ".*" "foo" 1))))) ;;; ;;; regexp-exec ;;; (with-test-prefix "regexp-exec" (pass-if-exception "non-integer offset" exception:wrong-type-arg (let ((re (make-regexp "ab+"))) (regexp-exec re "aaaabbbb" 1.5 'bogus-flags-arg))) (pass-if-exception "non-string input" exception:wrong-type-arg (let ((re (make-regexp "ab+"))) (regexp-exec re 'not-a-string))) (pass-if-exception "non-string input, with offset" exception:wrong-type-arg (let ((re (make-regexp "ab+"))) (regexp-exec re 'not-a-string 5))) ;; in guile 1.8.1 and earlier, a #\nul character in the input string was ;; only detected in a critical section, and the resulting error throw ;; abort()ed the program (pass-if-exception "nul in input" exception:string-contains-nul (let ((re (make-regexp "ab+"))) (regexp-exec re (string #\a #\b (integer->char 0))))) ;; in guile 1.8.1 and earlier, a bogus flags argument was only detected ;; inside a critical section, and the resulting error throw abort()ed the ;; program (pass-if-exception "non-integer flags" exception:wrong-type-arg (let ((re (make-regexp "ab+"))) (regexp-exec re "aaaabbbb" 0 'bogus-flags-arg)))) ;;; ;;; fold-matches ;;; (with-test-prefix "fold-matches" (pass-if "without flags" (equal? '("hello") (fold-matches "^[a-z]+$" "hello" '() (lambda (match result) (cons (match:substring match) result))))) (pass-if "with flags" ;; Prior to 1.8.6, passing an additional flag would not work. (null? (fold-matches "^[a-z]+$" "hello" '() (lambda (match result) (cons (match:substring match) result)) (logior regexp/notbol regexp/noteol)))) (pass-if "regexp/notbol is set correctly" (equal? '("foo") (fold-matches "^foo" "foofoofoofoo" '() (lambda (match result) (cons (match:substring match) result)))))) ;;; ;;; regexp-quote ;;; (define-syntax with-ascii-or-latin1-locale (syntax-rules () ((_ chr body ...) (if (> chr 127) (with-latin1-locale body ...) (begin body ...))))) (define char-code-limit 256) (with-test-prefix "regexp-quote" (pass-if-exception "no args" exception:wrong-num-args (regexp-quote)) (pass-if-exception "bad string arg" exception:wrong-type-arg (regexp-quote 'blah)) (let ((lst `((regexp/basic ,regexp/basic) (regexp/extended ,regexp/extended))) ;; String of all latin-1 characters, except #\nul which doesn't ;; work because it's the usual end-of-string for the underlying ;; C regexec(). (allchars (list->string (map integer->char (cdr (iota 256)))))) (for-each (lambda (elem) (let ((name (car elem)) (flag (cadr elem))) (with-test-prefix name ;; Try on each individual latin-1 character, except #\nul. (do ((i 1 (1+ i))) ((>= i 256)) (let* ((c (integer->char i)) (s (string c))) (pass-if (list "char" i (format #f "~s ~s" c s)) (with-ascii-or-latin1-locale i (let* ((q (regexp-quote s)) (m (regexp-exec (make-regexp q flag) s))) (and (= 0 (match:start m)) (= 1 (match:end m)))))))) ;; Try on pattern "aX" where X is each latin-1 character, ;; except #\nul. This exposes things like "?" which are ;; special only when they follow a pattern to repeat or ;; whatever ("a" in this case). (do ((i 1 (1+ i))) ((>= i 256)) (let* ((c (integer->char i)) (s (string #\a c)) (q (regexp-quote s))) (pass-if (list "string \"aX\"" i (format #f "~s ~s ~s" c s q)) (with-ascii-or-latin1-locale i (let* ((m (regexp-exec (make-regexp q flag) s))) (and (= 0 (match:start m)) (= 2 (match:end m)))))))) (pass-if "string of all chars" (with-latin1-locale (let ((m (regexp-exec (make-regexp (regexp-quote allchars) flag) allchars))) (and (= 0 (match:start m)) (= (string-length allchars) (match:end m))))))))) lst))) ;;; ;;; regexp-substitute ;;; (with-test-prefix "regexp-substitute" (let ((match (string-match "patleft(sub1)patmid(sub2)patright" "contleftpatleftsub1patmidsub2patrightcontright"))) (define (try expected . args) (with-test-prefix (object->string args) (apply vary-port regexp-substitute expected match args))) (try "") (try "string1" "string1") (try "string1string2" "string1" "string2") (try "patleftsub1patmidsub2patright" 0) (try "hi-patleftsub1patmidsub2patright-bye" "hi-" 0 "-bye") (try "sub1" 1) (try "hi-sub1-bye" "hi-" 1 "-bye") (try "hi-sub2-bye" "hi-" 2 "-bye") (try "contleft" 'pre) (try "contright" 'post) (try "contrightcontleft" 'post 'pre) (try "contrightcontleftcontrightcontleft" 'post 'pre 'post 'pre) (try "contrightsub2sub1contleft" 'post 2 1 'pre) (try "foosub1sub1sub1sub1bar" "foo" 1 1 1 1 "bar"))) (with-test-prefix "regexp-substitute/global" (define (try expected . args) (with-test-prefix (object->string args) (apply vary-port regexp-substitute/global expected args))) (try "hi" "a(x*)b" "ab" "hi") (try "" "a(x*)b" "ab" 1) (try "xx" "a(x*)b" "axxb" 1) (try "xx" "a(x*)b" "_axxb_" 1) (try "pre" "a(x*)b" "preaxxbpost" 'pre) (try "post" "a(x*)b" "preaxxbpost" 'post) (try "string" "x" "string" 'pre "y" 'post) (try "4" "a(x*)b" "_axxb_" (lambda (m) (number->string (match:end m 1)))) (try "_aybycyd_" "x+" "_axbxxcxxxd_" 'pre "y" 'post) ;; This should not go into an infinite loop, just because the regexp ;; can match the empty string. This test also kind of beats on our ;; definition of where a null string can match. (try "y_yaybycydy_y" "x*" "_axbxxcxxxd_" 'pre "y" 'post) ;; These kind of bother me. The extension from regexp-substitute to ;; regexp-substitute/global is only natural if your item list ;; includes both pre and post. If those are required, why bother ;; to include them at all? (try "4:7:12:_" "a(x*)b" "_axxbaxbaxxxb_" (lambda (m) (number->string (match:end m 1))) ":" 'post) (try "4:10:19:_:19:10:4" "a(x*)b" "_axxbaxxxxbaxxxxxxxb_" (lambda (m) (number->string (match:end m 1))) ":" 'post ":" (lambda (m) (number->string (match:end m 1)))) ;; Jan Nieuwenhuizen's bug, 2 Sep 1999 (try "" "_" (make-string 500 #\_) 'post)) (with-test-prefix "nonascii locales" (pass-if "match structures refer to char offsets" (with-locale "en_US.utf8" ;; bug #31650 (equal? (match:substring (string-match ".*" "calçot") 0) "calçot"))) (pass-if "match structures refer to char offsets, non-ASCII pattern" (with-locale "en_US.utf8" ;; bug #31650 (equal? (match:substring (string-match "λ: The Ultimate (.*)" "λ: The Ultimate GOTO") 1) "GOTO"))))