Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / test-suite / tests / regexp.test
CommitLineData
644c5165 1;;;; regexp.test --- test Guile's regexps -*- coding: utf-8; mode: scheme -*-
3dcdcfe8
JB
2;;;; Jim Blandy <jimb@red-bean.com> --- September 1999
3;;;;
1c242b37 4;;;; Copyright (C) 1999, 2004, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3dcdcfe8 5;;;;
53befeb7
NJ
6;;;; This library is free software; you can redistribute it and/or
7;;;; modify it under the terms of the GNU Lesser General Public
8;;;; License as published by the Free Software Foundation; either
9;;;; version 3 of the License, or (at your option) any later version.
3dcdcfe8 10;;;;
53befeb7 11;;;; This library is distributed in the hope that it will be useful,
3dcdcfe8 12;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
53befeb7
NJ
13;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14;;;; Lesser General Public License for more details.
3dcdcfe8 15;;;;
53befeb7
NJ
16;;;; You should have received a copy of the GNU Lesser General Public
17;;;; License along with this library; if not, write to the Free Software
18;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3dcdcfe8 19
76ed3e87
LC
20(define-module (test-suite test-regexp)
21 #:use-module (test-suite lib)
7583976b 22 #:use-module (srfi srfi-1)
76ed3e87 23 #:use-module (ice-9 regex))
3dcdcfe8 24
1c242b37
LC
25(if (defined? 'setlocale)
26 (setlocale LC_ALL "C"))
e354d768 27
dec84a0a
LC
28;; Don't fail if we can't display a test name to stdout/stderr.
29(set-port-conversion-strategy! (current-output-port) 'escape)
30(set-port-conversion-strategy! (current-error-port) 'escape)
31
76ed3e87 32\f
3dcdcfe8
JB
33;;; Run a regexp-substitute or regexp-substitute/global test, once
34;;; providing a real port and once providing #f, requesting direct
35;;; string output.
36(define (vary-port func expected . args)
37 (pass-if "port is string port"
38 (equal? expected
39 (call-with-output-string
40 (lambda (port)
41 (apply func port args)))))
42 (pass-if "port is #f"
43 (equal? expected
44 (apply func #f args))))
45
46(define (object->string obj)
47 (call-with-output-string
48 (lambda (port)
49 (write obj port))))
50
710491c5
KR
51;;;
52;;; make-regexp
53;;;
54
55(with-test-prefix "make-regexp"
56
57 (pass-if-exception "no args" exception:wrong-num-args
58 (make-regexp))
59
60 (pass-if-exception "bad pat arg" exception:wrong-type-arg
61 (make-regexp 'blah))
62
63 ;; in guile prior to 1.6.5 make-regex didn't validate its flags args
64 (pass-if-exception "bad arg 2" exception:wrong-type-arg
65 (make-regexp "xyz" 'abc))
66
67 (pass-if-exception "bad arg 3" exception:wrong-type-arg
68 (make-regexp "xyz" regexp/icase 'abc)))
69
38923713
KR
70;;;
71;;; match:string
72;;;
73
74(with-test-prefix "match:string"
75
76 (pass-if "foo"
77 (string=? "foo" (match:string (string-match ".*" "foo"))))
78
79 (pass-if "foo offset 1"
80 (string=? "foo" (match:string (string-match ".*" "foo" 1)))))
81
8e1973d9
KR
82;;;
83;;; regexp-exec
84;;;
85
86(with-test-prefix "regexp-exec"
87
88 (pass-if-exception "non-integer offset" exception:wrong-type-arg
89 (let ((re (make-regexp "ab+")))
90 (regexp-exec re "aaaabbbb" 1.5 'bogus-flags-arg)))
91
92 (pass-if-exception "non-string input" exception:wrong-type-arg
93 (let ((re (make-regexp "ab+")))
94 (regexp-exec re 'not-a-string)))
95
96 (pass-if-exception "non-string input, with offset" exception:wrong-type-arg
97 (let ((re (make-regexp "ab+")))
98 (regexp-exec re 'not-a-string 5)))
99
100 ;; in guile 1.8.1 and earlier, a #\nul character in the input string was
101 ;; only detected in a critical section, and the resulting error throw
102 ;; abort()ed the program
103 (pass-if-exception "nul in input" exception:string-contains-nul
104 (let ((re (make-regexp "ab+")))
105 (regexp-exec re (string #\a #\b (integer->char 0)))))
106
107 ;; in guile 1.8.1 and earlier, a bogus flags argument was only detected
108 ;; inside a critical section, and the resulting error throw abort()ed the
109 ;; program
110 (pass-if-exception "non-integer flags" exception:wrong-type-arg
111 (let ((re (make-regexp "ab+")))
112 (regexp-exec re "aaaabbbb" 0 'bogus-flags-arg))))
113
c6333102
LC
114;;;
115;;; fold-matches
116;;;
117
118(with-test-prefix "fold-matches"
119
120 (pass-if "without flags"
121 (equal? '("hello")
122 (fold-matches "^[a-z]+$" "hello" '()
123 (lambda (match result)
124 (cons (match:substring match)
125 result)))))
126
127 (pass-if "with flags"
128 ;; Prior to 1.8.6, passing an additional flag would not work.
129 (null?
130 (fold-matches "^[a-z]+$" "hello" '()
131 (lambda (match result)
132 (cons (match:substring match)
133 result))
134 (logior regexp/notbol regexp/noteol)))))
135
136
1df6de96
KR
137;;;
138;;; regexp-quote
139;;;
140
8a954f3d
LC
141(define-syntax with-ascii-or-latin1-locale
142 (syntax-rules ()
143 ((_ chr body ...)
144 (if (> chr 127)
145 (with-latin1-locale body ...)
146 (begin body ...)))))
147
a1a3a0dd
AW
148(define char-code-limit 256)
149
1df6de96
KR
150(with-test-prefix "regexp-quote"
151
e354d768
MG
152 (pass-if-exception "no args" exception:wrong-num-args
153 (regexp-quote))
154
155 (pass-if-exception "bad string arg" exception:wrong-type-arg
156 (regexp-quote 'blah))
157
158 (let ((lst `((regexp/basic ,regexp/basic)
159 (regexp/extended ,regexp/extended)))
160 ;; string of all characters, except #\nul which doesn't work because
161 ;; it's the usual end-of-string for the underlying C regexec()
162 (allchars (list->string (map integer->char
163 (cdr (iota char-code-limit))))))
164 (for-each
165 (lambda (elem)
166 (let ((name (car elem))
167 (flag (cadr elem)))
168
169 (with-test-prefix name
170
171 ;; try on each individual character, except #\nul
172 (do ((i 1 (1+ i)))
173 ((>= i char-code-limit))
174 (let* ((c (integer->char i))
1c242b37
LC
175 (s (string c)))
176 (pass-if (list "char" i (format #f "~s ~s" c s))
8a954f3d 177 (with-ascii-or-latin1-locale i
1c242b37
LC
178 (let* ((q (regexp-quote s))
179 (m (regexp-exec (make-regexp q flag) s)))
180 (and (= 0 (match:start m))
181 (= 1 (match:end m))))))))
e354d768
MG
182
183 ;; try on pattern "aX" where X is each character, except #\nul
184 ;; this exposes things like "?" which are special only when they
185 ;; follow a pattern to repeat or whatever ("a" in this case)
186 (do ((i 1 (1+ i)))
187 ((>= i char-code-limit))
188 (let* ((c (integer->char i))
189 (s (string #\a c))
190 (q (regexp-quote s)))
7583976b 191 (pass-if (list "string \"aX\"" i (format #f "~s ~s ~s" c s q))
8a954f3d 192 (with-ascii-or-latin1-locale i
7583976b 193 (let* ((m (regexp-exec (make-regexp q flag) s)))
e354d768 194 (and (= 0 (match:start m))
1c242b37 195 (= 2 (match:end m))))))))
e354d768
MG
196
197 (pass-if "string of all chars"
1c242b37
LC
198 (with-latin1-locale
199 (let ((m (regexp-exec (make-regexp (regexp-quote allchars)
200 flag) allchars)))
201 (and (= 0 (match:start m))
202 (= (string-length allchars) (match:end m)))))))))
e354d768 203 lst)))
1df6de96 204
710491c5
KR
205;;;
206;;; regexp-substitute
207;;;
208
3dcdcfe8
JB
209(with-test-prefix "regexp-substitute"
210 (let ((match
211 (string-match "patleft(sub1)patmid(sub2)patright"
212 "contleftpatleftsub1patmidsub2patrightcontright")))
213 (define (try expected . args)
214 (with-test-prefix (object->string args)
215 (apply vary-port regexp-substitute expected match args)))
216
217 (try "")
218 (try "string1" "string1")
219 (try "string1string2" "string1" "string2")
220 (try "patleftsub1patmidsub2patright" 0)
221 (try "hi-patleftsub1patmidsub2patright-bye" "hi-" 0 "-bye")
222 (try "sub1" 1)
223 (try "hi-sub1-bye" "hi-" 1 "-bye")
224 (try "hi-sub2-bye" "hi-" 2 "-bye")
225 (try "contleft" 'pre)
226 (try "contright" 'post)
227 (try "contrightcontleft" 'post 'pre)
228 (try "contrightcontleftcontrightcontleft" 'post 'pre 'post 'pre)
229 (try "contrightsub2sub1contleft" 'post 2 1 'pre)
230 (try "foosub1sub1sub1sub1bar" "foo" 1 1 1 1 "bar")))
231
232(with-test-prefix "regexp-substitute/global"
233
234 (define (try expected . args)
235 (with-test-prefix (object->string args)
236 (apply vary-port regexp-substitute/global expected args)))
237
3dcdcfe8
JB
238 (try "hi" "a(x*)b" "ab" "hi")
239 (try "" "a(x*)b" "ab" 1)
240 (try "xx" "a(x*)b" "axxb" 1)
241 (try "xx" "a(x*)b" "_axxb_" 1)
242 (try "pre" "a(x*)b" "preaxxbpost" 'pre)
243 (try "post" "a(x*)b" "preaxxbpost" 'post)
f88fdc6e 244 (try "string" "x" "string" 'pre "y" 'post)
3dcdcfe8
JB
245 (try "4" "a(x*)b" "_axxb_" (lambda (m)
246 (number->string (match:end m 1))))
247
248 (try "_aybycyd_" "x+" "_axbxxcxxxd_" 'pre "y" 'post)
249
250 ;; This should not go into an infinite loop, just because the regexp
251 ;; can match the empty string. This test also kind of beats on our
252 ;; definition of where a null string can match.
253 (try "y_yaybycydy_y" "x*" "_axbxxcxxxd_" 'pre "y" 'post)
254
255 ;; These kind of bother me. The extension from regexp-substitute to
256 ;; regexp-substitute/global is only natural if your item list
257 ;; includes both pre and post. If those are required, why bother
258 ;; to include them at all?
259 (try "4:7:12:_" "a(x*)b" "_axxbaxbaxxxb_"
260 (lambda (m) (number->string (match:end m 1))) ":"
261 'post)
262 (try "4:10:19:_:19:10:4" "a(x*)b" "_axxbaxxxxbaxxxxxxxb_"
263 (lambda (m) (number->string (match:end m 1))) ":"
264 'post
265 ":" (lambda (m) (number->string (match:end m 1))))
266
267 ;; Jan Nieuwenhuizen's bug, 2 Sep 1999
268 (try "" "_" (make-string 500 #\_)
269 'post))
644c5165
AW
270
271(with-test-prefix "nonascii locales"
43ecaffc
LC
272 (pass-if "match structures refer to char offsets"
273 (with-locale "en_US.utf8"
274 ;; bug #31650
644c5165
AW
275 (equal? (match:substring (string-match ".*" "calçot") 0)
276 "calçot"))))