Commit | Line | Data |
---|---|---|
644c5165 | 1 | ;;;; regexp.test --- test Guile's regexps -*- coding: utf-8; mode: scheme -*- |
3dcdcfe8 JB |
2 | ;;;; Jim Blandy <jimb@red-bean.com> --- September 1999 |
3 | ;;;; | |
7aa394b5 | 4 | ;;;; Copyright (C) 1999, 2004, 2006, 2007, 2008, 2009, 2010, |
0ce22459 | 5 | ;;;; 2012, 2013, 2014 Free Software Foundation, Inc. |
7aa394b5 | 6 | ;;;; |
53befeb7 NJ |
7 | ;;;; This library is free software; you can redistribute it and/or |
8 | ;;;; modify it under the terms of the GNU Lesser General Public | |
9 | ;;;; License as published by the Free Software Foundation; either | |
10 | ;;;; version 3 of the License, or (at your option) any later version. | |
3dcdcfe8 | 11 | ;;;; |
53befeb7 | 12 | ;;;; This library is distributed in the hope that it will be useful, |
3dcdcfe8 | 13 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
53befeb7 NJ |
14 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | ;;;; Lesser General Public License for more details. | |
3dcdcfe8 | 16 | ;;;; |
53befeb7 NJ |
17 | ;;;; You should have received a copy of the GNU Lesser General Public |
18 | ;;;; License along with this library; if not, write to the Free Software | |
19 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
3dcdcfe8 | 20 | |
76ed3e87 LC |
21 | (define-module (test-suite test-regexp) |
22 | #:use-module (test-suite lib) | |
7583976b | 23 | #:use-module (srfi srfi-1) |
76ed3e87 | 24 | #:use-module (ice-9 regex)) |
3dcdcfe8 | 25 | |
0ce22459 MW |
26 | (when (defined? 'setlocale) |
27 | (setlocale LC_ALL "C")) | |
e354d768 | 28 | |
dec84a0a LC |
29 | ;; Don't fail if we can't display a test name to stdout/stderr. |
30 | (set-port-conversion-strategy! (current-output-port) 'escape) | |
31 | (set-port-conversion-strategy! (current-error-port) 'escape) | |
32 | ||
76ed3e87 | 33 | \f |
3dcdcfe8 JB |
34 | ;;; Run a regexp-substitute or regexp-substitute/global test, once |
35 | ;;; providing a real port and once providing #f, requesting direct | |
36 | ;;; string output. | |
37 | (define (vary-port func expected . args) | |
38 | (pass-if "port is string port" | |
39 | (equal? expected | |
40 | (call-with-output-string | |
41 | (lambda (port) | |
42 | (apply func port args))))) | |
43 | (pass-if "port is #f" | |
44 | (equal? expected | |
45 | (apply func #f args)))) | |
46 | ||
47 | (define (object->string obj) | |
48 | (call-with-output-string | |
49 | (lambda (port) | |
50 | (write obj port)))) | |
51 | ||
710491c5 KR |
52 | ;;; |
53 | ;;; make-regexp | |
54 | ;;; | |
55 | ||
56 | (with-test-prefix "make-regexp" | |
57 | ||
58 | (pass-if-exception "no args" exception:wrong-num-args | |
59 | (make-regexp)) | |
60 | ||
61 | (pass-if-exception "bad pat arg" exception:wrong-type-arg | |
62 | (make-regexp 'blah)) | |
63 | ||
64 | ;; in guile prior to 1.6.5 make-regex didn't validate its flags args | |
65 | (pass-if-exception "bad arg 2" exception:wrong-type-arg | |
66 | (make-regexp "xyz" 'abc)) | |
67 | ||
68 | (pass-if-exception "bad arg 3" exception:wrong-type-arg | |
69 | (make-regexp "xyz" regexp/icase 'abc))) | |
70 | ||
38923713 KR |
71 | ;;; |
72 | ;;; match:string | |
73 | ;;; | |
74 | ||
75 | (with-test-prefix "match:string" | |
76 | ||
77 | (pass-if "foo" | |
78 | (string=? "foo" (match:string (string-match ".*" "foo")))) | |
79 | ||
80 | (pass-if "foo offset 1" | |
81 | (string=? "foo" (match:string (string-match ".*" "foo" 1))))) | |
82 | ||
8e1973d9 KR |
83 | ;;; |
84 | ;;; regexp-exec | |
85 | ;;; | |
86 | ||
87 | (with-test-prefix "regexp-exec" | |
88 | ||
89 | (pass-if-exception "non-integer offset" exception:wrong-type-arg | |
90 | (let ((re (make-regexp "ab+"))) | |
91 | (regexp-exec re "aaaabbbb" 1.5 'bogus-flags-arg))) | |
92 | ||
93 | (pass-if-exception "non-string input" exception:wrong-type-arg | |
94 | (let ((re (make-regexp "ab+"))) | |
95 | (regexp-exec re 'not-a-string))) | |
96 | ||
97 | (pass-if-exception "non-string input, with offset" exception:wrong-type-arg | |
98 | (let ((re (make-regexp "ab+"))) | |
99 | (regexp-exec re 'not-a-string 5))) | |
100 | ||
101 | ;; in guile 1.8.1 and earlier, a #\nul character in the input string was | |
102 | ;; only detected in a critical section, and the resulting error throw | |
103 | ;; abort()ed the program | |
104 | (pass-if-exception "nul in input" exception:string-contains-nul | |
105 | (let ((re (make-regexp "ab+"))) | |
106 | (regexp-exec re (string #\a #\b (integer->char 0))))) | |
107 | ||
108 | ;; in guile 1.8.1 and earlier, a bogus flags argument was only detected | |
109 | ;; inside a critical section, and the resulting error throw abort()ed the | |
110 | ;; program | |
111 | (pass-if-exception "non-integer flags" exception:wrong-type-arg | |
112 | (let ((re (make-regexp "ab+"))) | |
113 | (regexp-exec re "aaaabbbb" 0 'bogus-flags-arg)))) | |
114 | ||
c6333102 LC |
115 | ;;; |
116 | ;;; fold-matches | |
117 | ;;; | |
118 | ||
119 | (with-test-prefix "fold-matches" | |
120 | ||
121 | (pass-if "without flags" | |
122 | (equal? '("hello") | |
123 | (fold-matches "^[a-z]+$" "hello" '() | |
124 | (lambda (match result) | |
125 | (cons (match:substring match) | |
126 | result))))) | |
127 | ||
128 | (pass-if "with flags" | |
129 | ;; Prior to 1.8.6, passing an additional flag would not work. | |
130 | (null? | |
131 | (fold-matches "^[a-z]+$" "hello" '() | |
132 | (lambda (match result) | |
133 | (cons (match:substring match) | |
134 | result)) | |
d6e1c8bf CJY |
135 | (logior regexp/notbol regexp/noteol)))) |
136 | ||
137 | (pass-if "regexp/notbol is set correctly" | |
138 | (equal? '("foo") | |
139 | (fold-matches "^foo" "foofoofoofoo" '() | |
140 | (lambda (match result) | |
141 | (cons (match:substring match) | |
142 | result)))))) | |
c6333102 LC |
143 | |
144 | ||
1df6de96 KR |
145 | ;;; |
146 | ;;; regexp-quote | |
147 | ;;; | |
148 | ||
8a954f3d LC |
149 | (define-syntax with-ascii-or-latin1-locale |
150 | (syntax-rules () | |
151 | ((_ chr body ...) | |
152 | (if (> chr 127) | |
153 | (with-latin1-locale body ...) | |
154 | (begin body ...))))) | |
155 | ||
211e71a1 LC |
156 | ;; Since `regexp-quote' uses string ports, and since it is used below |
157 | ;; with non-ASCII characters, these ports must be Unicode-capable. | |
158 | (define-syntax with-unicode | |
159 | (syntax-rules () | |
160 | ((_ exp) | |
161 | (with-fluids ((%default-port-encoding "UTF-8")) | |
162 | exp)))) | |
163 | ||
1df6de96 KR |
164 | (with-test-prefix "regexp-quote" |
165 | ||
e354d768 MG |
166 | (pass-if-exception "no args" exception:wrong-num-args |
167 | (regexp-quote)) | |
168 | ||
169 | (pass-if-exception "bad string arg" exception:wrong-type-arg | |
170 | (regexp-quote 'blah)) | |
171 | ||
172 | (let ((lst `((regexp/basic ,regexp/basic) | |
173 | (regexp/extended ,regexp/extended))) | |
921cd222 AW |
174 | ;; String of all latin-1 characters, except #\nul which doesn't |
175 | ;; work because it's the usual end-of-string for the underlying | |
176 | ;; C regexec(). | |
177 | (allchars (list->string (map integer->char (cdr (iota 256)))))) | |
e354d768 MG |
178 | (for-each |
179 | (lambda (elem) | |
180 | (let ((name (car elem)) | |
181 | (flag (cadr elem))) | |
182 | ||
183 | (with-test-prefix name | |
184 | ||
921cd222 | 185 | ;; Try on each individual latin-1 character, except #\nul. |
e354d768 | 186 | (do ((i 1 (1+ i))) |
921cd222 | 187 | ((>= i 256)) |
e354d768 | 188 | (let* ((c (integer->char i)) |
1c242b37 LC |
189 | (s (string c))) |
190 | (pass-if (list "char" i (format #f "~s ~s" c s)) | |
8a954f3d | 191 | (with-ascii-or-latin1-locale i |
211e71a1 | 192 | (let* ((q (with-unicode (regexp-quote s))) |
1c242b37 LC |
193 | (m (regexp-exec (make-regexp q flag) s))) |
194 | (and (= 0 (match:start m)) | |
195 | (= 1 (match:end m)))))))) | |
e354d768 | 196 | |
921cd222 AW |
197 | ;; Try on pattern "aX" where X is each latin-1 character, |
198 | ;; except #\nul. This exposes things like "?" which are | |
199 | ;; special only when they follow a pattern to repeat or | |
200 | ;; whatever ("a" in this case). | |
e354d768 | 201 | (do ((i 1 (1+ i))) |
921cd222 | 202 | ((>= i 256)) |
e354d768 MG |
203 | (let* ((c (integer->char i)) |
204 | (s (string #\a c)) | |
211e71a1 | 205 | (q (with-unicode (regexp-quote s)))) |
7583976b | 206 | (pass-if (list "string \"aX\"" i (format #f "~s ~s ~s" c s q)) |
8a954f3d | 207 | (with-ascii-or-latin1-locale i |
7583976b | 208 | (let* ((m (regexp-exec (make-regexp q flag) s))) |
e354d768 | 209 | (and (= 0 (match:start m)) |
1c242b37 | 210 | (= 2 (match:end m)))))))) |
e354d768 MG |
211 | |
212 | (pass-if "string of all chars" | |
1c242b37 | 213 | (with-latin1-locale |
211e71a1 LC |
214 | (let ((m (regexp-exec (make-regexp (with-unicode |
215 | (regexp-quote allchars)) | |
1c242b37 LC |
216 | flag) allchars))) |
217 | (and (= 0 (match:start m)) | |
218 | (= (string-length allchars) (match:end m))))))))) | |
e354d768 | 219 | lst))) |
1df6de96 | 220 | |
710491c5 KR |
221 | ;;; |
222 | ;;; regexp-substitute | |
223 | ;;; | |
224 | ||
3dcdcfe8 JB |
225 | (with-test-prefix "regexp-substitute" |
226 | (let ((match | |
227 | (string-match "patleft(sub1)patmid(sub2)patright" | |
228 | "contleftpatleftsub1patmidsub2patrightcontright"))) | |
229 | (define (try expected . args) | |
230 | (with-test-prefix (object->string args) | |
231 | (apply vary-port regexp-substitute expected match args))) | |
232 | ||
233 | (try "") | |
234 | (try "string1" "string1") | |
235 | (try "string1string2" "string1" "string2") | |
236 | (try "patleftsub1patmidsub2patright" 0) | |
237 | (try "hi-patleftsub1patmidsub2patright-bye" "hi-" 0 "-bye") | |
238 | (try "sub1" 1) | |
239 | (try "hi-sub1-bye" "hi-" 1 "-bye") | |
240 | (try "hi-sub2-bye" "hi-" 2 "-bye") | |
241 | (try "contleft" 'pre) | |
242 | (try "contright" 'post) | |
243 | (try "contrightcontleft" 'post 'pre) | |
244 | (try "contrightcontleftcontrightcontleft" 'post 'pre 'post 'pre) | |
245 | (try "contrightsub2sub1contleft" 'post 2 1 'pre) | |
246 | (try "foosub1sub1sub1sub1bar" "foo" 1 1 1 1 "bar"))) | |
247 | ||
248 | (with-test-prefix "regexp-substitute/global" | |
249 | ||
250 | (define (try expected . args) | |
251 | (with-test-prefix (object->string args) | |
252 | (apply vary-port regexp-substitute/global expected args))) | |
253 | ||
3dcdcfe8 JB |
254 | (try "hi" "a(x*)b" "ab" "hi") |
255 | (try "" "a(x*)b" "ab" 1) | |
256 | (try "xx" "a(x*)b" "axxb" 1) | |
257 | (try "xx" "a(x*)b" "_axxb_" 1) | |
258 | (try "pre" "a(x*)b" "preaxxbpost" 'pre) | |
259 | (try "post" "a(x*)b" "preaxxbpost" 'post) | |
f88fdc6e | 260 | (try "string" "x" "string" 'pre "y" 'post) |
3dcdcfe8 JB |
261 | (try "4" "a(x*)b" "_axxb_" (lambda (m) |
262 | (number->string (match:end m 1)))) | |
263 | ||
264 | (try "_aybycyd_" "x+" "_axbxxcxxxd_" 'pre "y" 'post) | |
265 | ||
266 | ;; This should not go into an infinite loop, just because the regexp | |
267 | ;; can match the empty string. This test also kind of beats on our | |
268 | ;; definition of where a null string can match. | |
269 | (try "y_yaybycydy_y" "x*" "_axbxxcxxxd_" 'pre "y" 'post) | |
270 | ||
271 | ;; These kind of bother me. The extension from regexp-substitute to | |
272 | ;; regexp-substitute/global is only natural if your item list | |
273 | ;; includes both pre and post. If those are required, why bother | |
274 | ;; to include them at all? | |
275 | (try "4:7:12:_" "a(x*)b" "_axxbaxbaxxxb_" | |
276 | (lambda (m) (number->string (match:end m 1))) ":" | |
277 | 'post) | |
278 | (try "4:10:19:_:19:10:4" "a(x*)b" "_axxbaxxxxbaxxxxxxxb_" | |
279 | (lambda (m) (number->string (match:end m 1))) ":" | |
280 | 'post | |
281 | ":" (lambda (m) (number->string (match:end m 1)))) | |
282 | ||
283 | ;; Jan Nieuwenhuizen's bug, 2 Sep 1999 | |
284 | (try "" "_" (make-string 500 #\_) | |
285 | 'post)) | |
644c5165 AW |
286 | |
287 | (with-test-prefix "nonascii locales" | |
43ecaffc LC |
288 | (pass-if "match structures refer to char offsets" |
289 | (with-locale "en_US.utf8" | |
290 | ;; bug #31650 | |
644c5165 | 291 | (equal? (match:substring (string-match ".*" "calçot") 0) |
7aa394b5 LC |
292 | "calçot"))) |
293 | ||
294 | (pass-if "match structures refer to char offsets, non-ASCII pattern" | |
295 | (with-locale "en_US.utf8" | |
296 | ;; bug #31650 | |
297 | (equal? (match:substring (string-match "λ: The Ultimate (.*)" | |
298 | "λ: The Ultimate GOTO") | |
299 | 1) | |
300 | "GOTO")))) |