Commit | Line | Data |
---|---|---|
644c5165 | 1 | ;;;; regexp.test --- test Guile's regexps -*- coding: utf-8; mode: scheme -*- |
3dcdcfe8 JB |
2 | ;;;; Jim Blandy <jimb@red-bean.com> --- September 1999 |
3 | ;;;; | |
1c242b37 | 4 | ;;;; Copyright (C) 1999, 2004, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. |
3dcdcfe8 | 5 | ;;;; |
53befeb7 NJ |
6 | ;;;; This library is free software; you can redistribute it and/or |
7 | ;;;; modify it under the terms of the GNU Lesser General Public | |
8 | ;;;; License as published by the Free Software Foundation; either | |
9 | ;;;; version 3 of the License, or (at your option) any later version. | |
3dcdcfe8 | 10 | ;;;; |
53befeb7 | 11 | ;;;; This library is distributed in the hope that it will be useful, |
3dcdcfe8 | 12 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
53befeb7 NJ |
13 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | ;;;; Lesser General Public License for more details. | |
3dcdcfe8 | 15 | ;;;; |
53befeb7 NJ |
16 | ;;;; You should have received a copy of the GNU Lesser General Public |
17 | ;;;; License along with this library; if not, write to the Free Software | |
18 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
3dcdcfe8 | 19 | |
76ed3e87 LC |
20 | (define-module (test-suite test-regexp) |
21 | #:use-module (test-suite lib) | |
7583976b | 22 | #:use-module (srfi srfi-1) |
76ed3e87 | 23 | #:use-module (ice-9 regex)) |
3dcdcfe8 | 24 | |
1c242b37 LC |
25 | (if (defined? 'setlocale) |
26 | (setlocale LC_ALL "C")) | |
e354d768 | 27 | |
dec84a0a LC |
28 | ;; Don't fail if we can't display a test name to stdout/stderr. |
29 | (set-port-conversion-strategy! (current-output-port) 'escape) | |
30 | (set-port-conversion-strategy! (current-error-port) 'escape) | |
31 | ||
76ed3e87 | 32 | \f |
3dcdcfe8 JB |
33 | ;;; Run a regexp-substitute or regexp-substitute/global test, once |
34 | ;;; providing a real port and once providing #f, requesting direct | |
35 | ;;; string output. | |
36 | (define (vary-port func expected . args) | |
37 | (pass-if "port is string port" | |
38 | (equal? expected | |
39 | (call-with-output-string | |
40 | (lambda (port) | |
41 | (apply func port args))))) | |
42 | (pass-if "port is #f" | |
43 | (equal? expected | |
44 | (apply func #f args)))) | |
45 | ||
46 | (define (object->string obj) | |
47 | (call-with-output-string | |
48 | (lambda (port) | |
49 | (write obj port)))) | |
50 | ||
710491c5 KR |
51 | ;;; |
52 | ;;; make-regexp | |
53 | ;;; | |
54 | ||
55 | (with-test-prefix "make-regexp" | |
56 | ||
57 | (pass-if-exception "no args" exception:wrong-num-args | |
58 | (make-regexp)) | |
59 | ||
60 | (pass-if-exception "bad pat arg" exception:wrong-type-arg | |
61 | (make-regexp 'blah)) | |
62 | ||
63 | ;; in guile prior to 1.6.5 make-regex didn't validate its flags args | |
64 | (pass-if-exception "bad arg 2" exception:wrong-type-arg | |
65 | (make-regexp "xyz" 'abc)) | |
66 | ||
67 | (pass-if-exception "bad arg 3" exception:wrong-type-arg | |
68 | (make-regexp "xyz" regexp/icase 'abc))) | |
69 | ||
38923713 KR |
70 | ;;; |
71 | ;;; match:string | |
72 | ;;; | |
73 | ||
74 | (with-test-prefix "match:string" | |
75 | ||
76 | (pass-if "foo" | |
77 | (string=? "foo" (match:string (string-match ".*" "foo")))) | |
78 | ||
79 | (pass-if "foo offset 1" | |
80 | (string=? "foo" (match:string (string-match ".*" "foo" 1))))) | |
81 | ||
8e1973d9 KR |
82 | ;;; |
83 | ;;; regexp-exec | |
84 | ;;; | |
85 | ||
86 | (with-test-prefix "regexp-exec" | |
87 | ||
88 | (pass-if-exception "non-integer offset" exception:wrong-type-arg | |
89 | (let ((re (make-regexp "ab+"))) | |
90 | (regexp-exec re "aaaabbbb" 1.5 'bogus-flags-arg))) | |
91 | ||
92 | (pass-if-exception "non-string input" exception:wrong-type-arg | |
93 | (let ((re (make-regexp "ab+"))) | |
94 | (regexp-exec re 'not-a-string))) | |
95 | ||
96 | (pass-if-exception "non-string input, with offset" exception:wrong-type-arg | |
97 | (let ((re (make-regexp "ab+"))) | |
98 | (regexp-exec re 'not-a-string 5))) | |
99 | ||
100 | ;; in guile 1.8.1 and earlier, a #\nul character in the input string was | |
101 | ;; only detected in a critical section, and the resulting error throw | |
102 | ;; abort()ed the program | |
103 | (pass-if-exception "nul in input" exception:string-contains-nul | |
104 | (let ((re (make-regexp "ab+"))) | |
105 | (regexp-exec re (string #\a #\b (integer->char 0))))) | |
106 | ||
107 | ;; in guile 1.8.1 and earlier, a bogus flags argument was only detected | |
108 | ;; inside a critical section, and the resulting error throw abort()ed the | |
109 | ;; program | |
110 | (pass-if-exception "non-integer flags" exception:wrong-type-arg | |
111 | (let ((re (make-regexp "ab+"))) | |
112 | (regexp-exec re "aaaabbbb" 0 'bogus-flags-arg)))) | |
113 | ||
c6333102 LC |
114 | ;;; |
115 | ;;; fold-matches | |
116 | ;;; | |
117 | ||
118 | (with-test-prefix "fold-matches" | |
119 | ||
120 | (pass-if "without flags" | |
121 | (equal? '("hello") | |
122 | (fold-matches "^[a-z]+$" "hello" '() | |
123 | (lambda (match result) | |
124 | (cons (match:substring match) | |
125 | result))))) | |
126 | ||
127 | (pass-if "with flags" | |
128 | ;; Prior to 1.8.6, passing an additional flag would not work. | |
129 | (null? | |
130 | (fold-matches "^[a-z]+$" "hello" '() | |
131 | (lambda (match result) | |
132 | (cons (match:substring match) | |
133 | result)) | |
134 | (logior regexp/notbol regexp/noteol))))) | |
135 | ||
136 | ||
1df6de96 KR |
137 | ;;; |
138 | ;;; regexp-quote | |
139 | ;;; | |
140 | ||
8a954f3d LC |
141 | (define-syntax with-ascii-or-latin1-locale |
142 | (syntax-rules () | |
143 | ((_ chr body ...) | |
144 | (if (> chr 127) | |
145 | (with-latin1-locale body ...) | |
146 | (begin body ...))))) | |
147 | ||
a1a3a0dd AW |
148 | (define char-code-limit 256) |
149 | ||
1df6de96 KR |
150 | (with-test-prefix "regexp-quote" |
151 | ||
e354d768 MG |
152 | (pass-if-exception "no args" exception:wrong-num-args |
153 | (regexp-quote)) | |
154 | ||
155 | (pass-if-exception "bad string arg" exception:wrong-type-arg | |
156 | (regexp-quote 'blah)) | |
157 | ||
158 | (let ((lst `((regexp/basic ,regexp/basic) | |
159 | (regexp/extended ,regexp/extended))) | |
160 | ;; string of all characters, except #\nul which doesn't work because | |
161 | ;; it's the usual end-of-string for the underlying C regexec() | |
162 | (allchars (list->string (map integer->char | |
163 | (cdr (iota char-code-limit)))))) | |
164 | (for-each | |
165 | (lambda (elem) | |
166 | (let ((name (car elem)) | |
167 | (flag (cadr elem))) | |
168 | ||
169 | (with-test-prefix name | |
170 | ||
171 | ;; try on each individual character, except #\nul | |
172 | (do ((i 1 (1+ i))) | |
173 | ((>= i char-code-limit)) | |
174 | (let* ((c (integer->char i)) | |
1c242b37 LC |
175 | (s (string c))) |
176 | (pass-if (list "char" i (format #f "~s ~s" c s)) | |
8a954f3d | 177 | (with-ascii-or-latin1-locale i |
1c242b37 LC |
178 | (let* ((q (regexp-quote s)) |
179 | (m (regexp-exec (make-regexp q flag) s))) | |
180 | (and (= 0 (match:start m)) | |
181 | (= 1 (match:end m)))))))) | |
e354d768 MG |
182 | |
183 | ;; try on pattern "aX" where X is each character, except #\nul | |
184 | ;; this exposes things like "?" which are special only when they | |
185 | ;; follow a pattern to repeat or whatever ("a" in this case) | |
186 | (do ((i 1 (1+ i))) | |
187 | ((>= i char-code-limit)) | |
188 | (let* ((c (integer->char i)) | |
189 | (s (string #\a c)) | |
190 | (q (regexp-quote s))) | |
7583976b | 191 | (pass-if (list "string \"aX\"" i (format #f "~s ~s ~s" c s q)) |
8a954f3d | 192 | (with-ascii-or-latin1-locale i |
7583976b | 193 | (let* ((m (regexp-exec (make-regexp q flag) s))) |
e354d768 | 194 | (and (= 0 (match:start m)) |
1c242b37 | 195 | (= 2 (match:end m)))))))) |
e354d768 MG |
196 | |
197 | (pass-if "string of all chars" | |
1c242b37 LC |
198 | (with-latin1-locale |
199 | (let ((m (regexp-exec (make-regexp (regexp-quote allchars) | |
200 | flag) allchars))) | |
201 | (and (= 0 (match:start m)) | |
202 | (= (string-length allchars) (match:end m))))))))) | |
e354d768 | 203 | lst))) |
1df6de96 | 204 | |
710491c5 KR |
205 | ;;; |
206 | ;;; regexp-substitute | |
207 | ;;; | |
208 | ||
3dcdcfe8 JB |
209 | (with-test-prefix "regexp-substitute" |
210 | (let ((match | |
211 | (string-match "patleft(sub1)patmid(sub2)patright" | |
212 | "contleftpatleftsub1patmidsub2patrightcontright"))) | |
213 | (define (try expected . args) | |
214 | (with-test-prefix (object->string args) | |
215 | (apply vary-port regexp-substitute expected match args))) | |
216 | ||
217 | (try "") | |
218 | (try "string1" "string1") | |
219 | (try "string1string2" "string1" "string2") | |
220 | (try "patleftsub1patmidsub2patright" 0) | |
221 | (try "hi-patleftsub1patmidsub2patright-bye" "hi-" 0 "-bye") | |
222 | (try "sub1" 1) | |
223 | (try "hi-sub1-bye" "hi-" 1 "-bye") | |
224 | (try "hi-sub2-bye" "hi-" 2 "-bye") | |
225 | (try "contleft" 'pre) | |
226 | (try "contright" 'post) | |
227 | (try "contrightcontleft" 'post 'pre) | |
228 | (try "contrightcontleftcontrightcontleft" 'post 'pre 'post 'pre) | |
229 | (try "contrightsub2sub1contleft" 'post 2 1 'pre) | |
230 | (try "foosub1sub1sub1sub1bar" "foo" 1 1 1 1 "bar"))) | |
231 | ||
232 | (with-test-prefix "regexp-substitute/global" | |
233 | ||
234 | (define (try expected . args) | |
235 | (with-test-prefix (object->string args) | |
236 | (apply vary-port regexp-substitute/global expected args))) | |
237 | ||
3dcdcfe8 JB |
238 | (try "hi" "a(x*)b" "ab" "hi") |
239 | (try "" "a(x*)b" "ab" 1) | |
240 | (try "xx" "a(x*)b" "axxb" 1) | |
241 | (try "xx" "a(x*)b" "_axxb_" 1) | |
242 | (try "pre" "a(x*)b" "preaxxbpost" 'pre) | |
243 | (try "post" "a(x*)b" "preaxxbpost" 'post) | |
f88fdc6e | 244 | (try "string" "x" "string" 'pre "y" 'post) |
3dcdcfe8 JB |
245 | (try "4" "a(x*)b" "_axxb_" (lambda (m) |
246 | (number->string (match:end m 1)))) | |
247 | ||
248 | (try "_aybycyd_" "x+" "_axbxxcxxxd_" 'pre "y" 'post) | |
249 | ||
250 | ;; This should not go into an infinite loop, just because the regexp | |
251 | ;; can match the empty string. This test also kind of beats on our | |
252 | ;; definition of where a null string can match. | |
253 | (try "y_yaybycydy_y" "x*" "_axbxxcxxxd_" 'pre "y" 'post) | |
254 | ||
255 | ;; These kind of bother me. The extension from regexp-substitute to | |
256 | ;; regexp-substitute/global is only natural if your item list | |
257 | ;; includes both pre and post. If those are required, why bother | |
258 | ;; to include them at all? | |
259 | (try "4:7:12:_" "a(x*)b" "_axxbaxbaxxxb_" | |
260 | (lambda (m) (number->string (match:end m 1))) ":" | |
261 | 'post) | |
262 | (try "4:10:19:_:19:10:4" "a(x*)b" "_axxbaxxxxbaxxxxxxxb_" | |
263 | (lambda (m) (number->string (match:end m 1))) ":" | |
264 | 'post | |
265 | ":" (lambda (m) (number->string (match:end m 1)))) | |
266 | ||
267 | ;; Jan Nieuwenhuizen's bug, 2 Sep 1999 | |
268 | (try "" "_" (make-string 500 #\_) | |
269 | 'post)) | |
644c5165 AW |
270 | |
271 | (with-test-prefix "nonascii locales" | |
43ecaffc LC |
272 | (pass-if "match structures refer to char offsets" |
273 | (with-locale "en_US.utf8" | |
274 | ;; bug #31650 | |
644c5165 AW |
275 | (equal? (match:substring (string-match ".*" "calçot") 0) |
276 | "calçot")))) |