Fix bytevector-copy when applied to SRFI-4 homogeneous numeric vectors.
[bpt/guile.git] / test-suite / tests / regexp.test
CommitLineData
644c5165 1;;;; regexp.test --- test Guile's regexps -*- coding: utf-8; mode: scheme -*-
3dcdcfe8
JB
2;;;; Jim Blandy <jimb@red-bean.com> --- September 1999
3;;;;
7aa394b5 4;;;; Copyright (C) 1999, 2004, 2006, 2007, 2008, 2009, 2010,
0ce22459 5;;;; 2012, 2013, 2014 Free Software Foundation, Inc.
7aa394b5 6;;;;
53befeb7
NJ
7;;;; This library is free software; you can redistribute it and/or
8;;;; modify it under the terms of the GNU Lesser General Public
9;;;; License as published by the Free Software Foundation; either
10;;;; version 3 of the License, or (at your option) any later version.
3dcdcfe8 11;;;;
53befeb7 12;;;; This library is distributed in the hope that it will be useful,
3dcdcfe8 13;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
53befeb7
NJ
14;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;;;; Lesser General Public License for more details.
3dcdcfe8 16;;;;
53befeb7
NJ
17;;;; You should have received a copy of the GNU Lesser General Public
18;;;; License along with this library; if not, write to the Free Software
19;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3dcdcfe8 20
76ed3e87
LC
21(define-module (test-suite test-regexp)
22 #:use-module (test-suite lib)
7583976b 23 #:use-module (srfi srfi-1)
76ed3e87 24 #:use-module (ice-9 regex))
3dcdcfe8 25
0ce22459
MW
26(when (defined? 'setlocale)
27 (setlocale LC_ALL "C"))
e354d768 28
dec84a0a
LC
29;; Don't fail if we can't display a test name to stdout/stderr.
30(set-port-conversion-strategy! (current-output-port) 'escape)
31(set-port-conversion-strategy! (current-error-port) 'escape)
32
76ed3e87 33\f
3dcdcfe8
JB
34;;; Run a regexp-substitute or regexp-substitute/global test, once
35;;; providing a real port and once providing #f, requesting direct
36;;; string output.
37(define (vary-port func expected . args)
38 (pass-if "port is string port"
39 (equal? expected
40 (call-with-output-string
41 (lambda (port)
42 (apply func port args)))))
43 (pass-if "port is #f"
44 (equal? expected
45 (apply func #f args))))
46
47(define (object->string obj)
48 (call-with-output-string
49 (lambda (port)
50 (write obj port))))
51
710491c5
KR
52;;;
53;;; make-regexp
54;;;
55
56(with-test-prefix "make-regexp"
57
58 (pass-if-exception "no args" exception:wrong-num-args
59 (make-regexp))
60
61 (pass-if-exception "bad pat arg" exception:wrong-type-arg
62 (make-regexp 'blah))
63
64 ;; in guile prior to 1.6.5 make-regex didn't validate its flags args
65 (pass-if-exception "bad arg 2" exception:wrong-type-arg
66 (make-regexp "xyz" 'abc))
67
68 (pass-if-exception "bad arg 3" exception:wrong-type-arg
69 (make-regexp "xyz" regexp/icase 'abc)))
70
38923713
KR
71;;;
72;;; match:string
73;;;
74
75(with-test-prefix "match:string"
76
77 (pass-if "foo"
78 (string=? "foo" (match:string (string-match ".*" "foo"))))
79
80 (pass-if "foo offset 1"
81 (string=? "foo" (match:string (string-match ".*" "foo" 1)))))
82
8e1973d9
KR
83;;;
84;;; regexp-exec
85;;;
86
87(with-test-prefix "regexp-exec"
88
89 (pass-if-exception "non-integer offset" exception:wrong-type-arg
90 (let ((re (make-regexp "ab+")))
91 (regexp-exec re "aaaabbbb" 1.5 'bogus-flags-arg)))
92
93 (pass-if-exception "non-string input" exception:wrong-type-arg
94 (let ((re (make-regexp "ab+")))
95 (regexp-exec re 'not-a-string)))
96
97 (pass-if-exception "non-string input, with offset" exception:wrong-type-arg
98 (let ((re (make-regexp "ab+")))
99 (regexp-exec re 'not-a-string 5)))
100
101 ;; in guile 1.8.1 and earlier, a #\nul character in the input string was
102 ;; only detected in a critical section, and the resulting error throw
103 ;; abort()ed the program
104 (pass-if-exception "nul in input" exception:string-contains-nul
105 (let ((re (make-regexp "ab+")))
106 (regexp-exec re (string #\a #\b (integer->char 0)))))
107
108 ;; in guile 1.8.1 and earlier, a bogus flags argument was only detected
109 ;; inside a critical section, and the resulting error throw abort()ed the
110 ;; program
111 (pass-if-exception "non-integer flags" exception:wrong-type-arg
112 (let ((re (make-regexp "ab+")))
113 (regexp-exec re "aaaabbbb" 0 'bogus-flags-arg))))
114
c6333102
LC
115;;;
116;;; fold-matches
117;;;
118
119(with-test-prefix "fold-matches"
120
121 (pass-if "without flags"
122 (equal? '("hello")
123 (fold-matches "^[a-z]+$" "hello" '()
124 (lambda (match result)
125 (cons (match:substring match)
126 result)))))
127
128 (pass-if "with flags"
129 ;; Prior to 1.8.6, passing an additional flag would not work.
130 (null?
131 (fold-matches "^[a-z]+$" "hello" '()
132 (lambda (match result)
133 (cons (match:substring match)
134 result))
d6e1c8bf
CJY
135 (logior regexp/notbol regexp/noteol))))
136
137 (pass-if "regexp/notbol is set correctly"
138 (equal? '("foo")
139 (fold-matches "^foo" "foofoofoofoo" '()
140 (lambda (match result)
141 (cons (match:substring match)
142 result))))))
c6333102
LC
143
144
1df6de96
KR
145;;;
146;;; regexp-quote
147;;;
148
8a954f3d
LC
149(define-syntax with-ascii-or-latin1-locale
150 (syntax-rules ()
151 ((_ chr body ...)
152 (if (> chr 127)
153 (with-latin1-locale body ...)
154 (begin body ...)))))
155
211e71a1
LC
156;; Since `regexp-quote' uses string ports, and since it is used below
157;; with non-ASCII characters, these ports must be Unicode-capable.
158(define-syntax with-unicode
159 (syntax-rules ()
160 ((_ exp)
161 (with-fluids ((%default-port-encoding "UTF-8"))
162 exp))))
163
1df6de96
KR
164(with-test-prefix "regexp-quote"
165
e354d768
MG
166 (pass-if-exception "no args" exception:wrong-num-args
167 (regexp-quote))
168
169 (pass-if-exception "bad string arg" exception:wrong-type-arg
170 (regexp-quote 'blah))
171
172 (let ((lst `((regexp/basic ,regexp/basic)
173 (regexp/extended ,regexp/extended)))
921cd222
AW
174 ;; String of all latin-1 characters, except #\nul which doesn't
175 ;; work because it's the usual end-of-string for the underlying
176 ;; C regexec().
177 (allchars (list->string (map integer->char (cdr (iota 256))))))
e354d768
MG
178 (for-each
179 (lambda (elem)
180 (let ((name (car elem))
181 (flag (cadr elem)))
182
183 (with-test-prefix name
184
921cd222 185 ;; Try on each individual latin-1 character, except #\nul.
e354d768 186 (do ((i 1 (1+ i)))
921cd222 187 ((>= i 256))
e354d768 188 (let* ((c (integer->char i))
1c242b37
LC
189 (s (string c)))
190 (pass-if (list "char" i (format #f "~s ~s" c s))
8a954f3d 191 (with-ascii-or-latin1-locale i
211e71a1 192 (let* ((q (with-unicode (regexp-quote s)))
1c242b37
LC
193 (m (regexp-exec (make-regexp q flag) s)))
194 (and (= 0 (match:start m))
195 (= 1 (match:end m))))))))
e354d768 196
921cd222
AW
197 ;; Try on pattern "aX" where X is each latin-1 character,
198 ;; except #\nul. This exposes things like "?" which are
199 ;; special only when they follow a pattern to repeat or
200 ;; whatever ("a" in this case).
e354d768 201 (do ((i 1 (1+ i)))
921cd222 202 ((>= i 256))
e354d768
MG
203 (let* ((c (integer->char i))
204 (s (string #\a c))
211e71a1 205 (q (with-unicode (regexp-quote s))))
7583976b 206 (pass-if (list "string \"aX\"" i (format #f "~s ~s ~s" c s q))
8a954f3d 207 (with-ascii-or-latin1-locale i
7583976b 208 (let* ((m (regexp-exec (make-regexp q flag) s)))
e354d768 209 (and (= 0 (match:start m))
1c242b37 210 (= 2 (match:end m))))))))
e354d768
MG
211
212 (pass-if "string of all chars"
1c242b37 213 (with-latin1-locale
211e71a1
LC
214 (let ((m (regexp-exec (make-regexp (with-unicode
215 (regexp-quote allchars))
1c242b37
LC
216 flag) allchars)))
217 (and (= 0 (match:start m))
218 (= (string-length allchars) (match:end m)))))))))
e354d768 219 lst)))
1df6de96 220
710491c5
KR
221;;;
222;;; regexp-substitute
223;;;
224
3dcdcfe8
JB
225(with-test-prefix "regexp-substitute"
226 (let ((match
227 (string-match "patleft(sub1)patmid(sub2)patright"
228 "contleftpatleftsub1patmidsub2patrightcontright")))
229 (define (try expected . args)
230 (with-test-prefix (object->string args)
231 (apply vary-port regexp-substitute expected match args)))
232
233 (try "")
234 (try "string1" "string1")
235 (try "string1string2" "string1" "string2")
236 (try "patleftsub1patmidsub2patright" 0)
237 (try "hi-patleftsub1patmidsub2patright-bye" "hi-" 0 "-bye")
238 (try "sub1" 1)
239 (try "hi-sub1-bye" "hi-" 1 "-bye")
240 (try "hi-sub2-bye" "hi-" 2 "-bye")
241 (try "contleft" 'pre)
242 (try "contright" 'post)
243 (try "contrightcontleft" 'post 'pre)
244 (try "contrightcontleftcontrightcontleft" 'post 'pre 'post 'pre)
245 (try "contrightsub2sub1contleft" 'post 2 1 'pre)
246 (try "foosub1sub1sub1sub1bar" "foo" 1 1 1 1 "bar")))
247
248(with-test-prefix "regexp-substitute/global"
249
250 (define (try expected . args)
251 (with-test-prefix (object->string args)
252 (apply vary-port regexp-substitute/global expected args)))
253
3dcdcfe8
JB
254 (try "hi" "a(x*)b" "ab" "hi")
255 (try "" "a(x*)b" "ab" 1)
256 (try "xx" "a(x*)b" "axxb" 1)
257 (try "xx" "a(x*)b" "_axxb_" 1)
258 (try "pre" "a(x*)b" "preaxxbpost" 'pre)
259 (try "post" "a(x*)b" "preaxxbpost" 'post)
f88fdc6e 260 (try "string" "x" "string" 'pre "y" 'post)
3dcdcfe8
JB
261 (try "4" "a(x*)b" "_axxb_" (lambda (m)
262 (number->string (match:end m 1))))
263
264 (try "_aybycyd_" "x+" "_axbxxcxxxd_" 'pre "y" 'post)
265
266 ;; This should not go into an infinite loop, just because the regexp
267 ;; can match the empty string. This test also kind of beats on our
268 ;; definition of where a null string can match.
269 (try "y_yaybycydy_y" "x*" "_axbxxcxxxd_" 'pre "y" 'post)
270
271 ;; These kind of bother me. The extension from regexp-substitute to
272 ;; regexp-substitute/global is only natural if your item list
273 ;; includes both pre and post. If those are required, why bother
274 ;; to include them at all?
275 (try "4:7:12:_" "a(x*)b" "_axxbaxbaxxxb_"
276 (lambda (m) (number->string (match:end m 1))) ":"
277 'post)
278 (try "4:10:19:_:19:10:4" "a(x*)b" "_axxbaxxxxbaxxxxxxxb_"
279 (lambda (m) (number->string (match:end m 1))) ":"
280 'post
281 ":" (lambda (m) (number->string (match:end m 1))))
282
283 ;; Jan Nieuwenhuizen's bug, 2 Sep 1999
284 (try "" "_" (make-string 500 #\_)
285 'post))
644c5165
AW
286
287(with-test-prefix "nonascii locales"
43ecaffc
LC
288 (pass-if "match structures refer to char offsets"
289 (with-locale "en_US.utf8"
290 ;; bug #31650
644c5165 291 (equal? (match:substring (string-match ".*" "calçot") 0)
7aa394b5
LC
292 "calçot")))
293
294 (pass-if "match structures refer to char offsets, non-ASCII pattern"
295 (with-locale "en_US.utf8"
296 ;; bug #31650
297 (equal? (match:substring (string-match "λ: The Ultimate (.*)"
298 "λ: The Ultimate GOTO")
299 1)
300 "GOTO"))))