Add R6RS character names
[bpt/guile.git] / test-suite / tests / strings.test
CommitLineData
9aa2c796
JB
1;;;; strings.test --- test suite for Guile's string functions -*- scheme -*-
2;;;; Jim Blandy <jimb@red-bean.com> --- August 1999
3;;;;
f5d7662f 4;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
9aa2c796 5;;;;
53befeb7
NJ
6;;;; This library is free software; you can redistribute it and/or
7;;;; modify it under the terms of the GNU Lesser General Public
8;;;; License as published by the Free Software Foundation; either
9;;;; version 3 of the License, or (at your option) any later version.
9aa2c796 10;;;;
53befeb7 11;;;; This library is distributed in the hope that it will be useful,
9aa2c796 12;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
53befeb7
NJ
13;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14;;;; Lesser General Public License for more details.
9aa2c796 15;;;;
53befeb7
NJ
16;;;; You should have received a copy of the GNU Lesser General Public
17;;;; License along with this library; if not, write to the Free Software
18;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
9aa2c796 19
6e7d5622
KR
20(define-module (test-strings)
21 #:use-module (test-suite lib))
22
d7e4c2da
MV
23(define exception:read-only-string
24 (cons 'misc-error "^string is read-only"))
f5d7662f
MG
25(define exception:illegal-escape
26 (cons 'read-error "illegal character in escape sequence"))
2759c092
MG
27;; Wrong types may have either the 'wrong-type-arg key when
28;; interpreted or 'vm-error when compiled. This matches both.
29(define exception:wrong-type-arg
30 (cons #t "Wrong type"))
548b9252 31
6e7d5622
KR
32;; Create a string from integer char values, eg. (string-ints 65) => "A"
33(define (string-ints . args)
34 (apply string (map integer->char args)))
35
f5d7662f
MG
36;;
37;; string internals
38;;
39
40;; Some abbreviations
41;; BMP - Basic Multilingual Plane (codepoints below U+FFFF)
42;; SMP - Suplementary Multilingual Plane (codebpoints from U+10000 to U+1FFFF)
43
44(with-test-prefix "string internals"
45
46 (pass-if "new string starts at 1st char in stringbuf"
47 (let ((s "abc"))
48 (= 0 (assq-ref (%string-dump s) 'start))))
49
50 (pass-if "length of new string same as stringbuf"
51 (let ((s "def"))
52 (= (string-length s) (assq-ref (%string-dump s) 'stringbuf-length))))
53
54 (pass-if "contents of new string same as stringbuf"
55 (let ((s "ghi"))
56 (string=? s (assq-ref (%string-dump s) 'stringbuf-chars))))
57
58 (pass-if "writable strings are not read-only"
59 (let ((s "zyx"))
60 (not (assq-ref (%string-dump s) 'read-only))))
61
62 (pass-if "read-only strings are read-only"
63 (let ((s (substring/read-only "zyx" 0)))
64 (assq-ref (%string-dump s) 'read-only)))
65
f5d7662f
MG
66 (pass-if "new Latin-1 encoded strings are not shared"
67 (let ((s "abc"))
68 (not (assq-ref (%string-dump s) 'stringbuf-shared))))
69
70 (pass-if "new UCS-4 encoded strings are not shared"
71 (let ((s "\u0100bc"))
72 (not (assq-ref (%string-dump s) 'stringbuf-shared))))
73
74 ;; Should this be true? It isn't currently true.
75 (pass-if "null shared substrings are shared"
76 (let* ((s1 "")
77 (s2 (substring/shared s1 0 0)))
78 (throw 'untested)
79 (eq? (assq-ref (%string-dump s2) 'shared)
80 s1)))
81
82 (pass-if "ASCII shared substrings are shared"
83 (let* ((s1 "foobar")
84 (s2 (substring/shared s1 0 3)))
85 (eq? (assq-ref (%string-dump s2) 'shared)
86 s1)))
87
88 (pass-if "BMP shared substrings are shared"
89 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
90 (s2 (substring/shared s1 0 3)))
91 (eq? (assq-ref (%string-dump s2) 'shared)
92 s1)))
93
94 (pass-if "null substrings are not shared"
95 (let* ((s1 "")
96 (s2 (substring s1 0 0)))
97 (not (eq? (assq-ref (%string-dump s2) 'shared)
98 s1))))
99
100 (pass-if "ASCII substrings are not shared"
101 (let* ((s1 "foobar")
102 (s2 (substring s1 0 3)))
103 (not (eq? (assq-ref (%string-dump s2) 'shared)
104 s1))))
105
106 (pass-if "BMP substrings are not shared"
107 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
108 (s2 (substring s1 0 3)))
109 (not (eq? (assq-ref (%string-dump s2) 'shared)
110 s1))))
111
112 (pass-if "ASCII substrings share stringbufs before copy-on-write"
113 (let* ((s1 "foobar")
114 (s2 (substring s1 0 3)))
115 (assq-ref (%string-dump s1) 'stringbuf-shared)))
116
117 (pass-if "BMP substrings share stringbufs before copy-on-write"
118 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
119 (s2 (substring s1 0 3)))
120 (assq-ref (%string-dump s1) 'stringbuf-shared)))
121
122 (pass-if "ASCII substrings don't share stringbufs after copy-on-write"
123 (let* ((s1 "foobar")
124 (s2 (substring s1 0 3)))
125 (string-set! s2 0 #\F)
126 (not (assq-ref (%string-dump s2) 'stringbuf-shared))))
127
128 (pass-if "BMP substrings don't share stringbufs after copy-on-write"
129 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
130 (s2 (substring s1 0 3)))
131 (string-set! s2 0 #\F)
132 (not (assq-ref (%string-dump s2) 'stringbuf-shared))))
133
134 (with-test-prefix "encodings"
135
136 (pass-if "null strings are Latin-1 encoded"
137 (let ((s ""))
138 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
139
140 (pass-if "ASCII strings are Latin-1 encoded"
141 (let ((s "jkl"))
142 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
143
144 (pass-if "Latin-1 strings are Latin-1 encoded"
145 (let ((s "\xC0\xC1\xC2"))
146 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
147
148 (pass-if "BMP strings are UCS-4 encoded"
149 (let ((s "\u0100\u0101\x0102"))
150 (assq-ref (%string-dump s) 'stringbuf-wide)))
151
152 (pass-if "SMP strings are UCS-4 encoded"
153 (let ((s "\U010300\u010301\x010302"))
154 (assq-ref (%string-dump s) 'stringbuf-wide)))
155
156 (pass-if "null list->string is Latin-1 encoded"
157 (let ((s (string-ints)))
158 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
159
160 (pass-if "ASCII list->string is Latin-1 encoded"
161 (let ((s (string-ints 65 66 67)))
162 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
163
164 (pass-if "Latin-1 list->string is Latin-1 encoded"
165 (let ((s (string-ints #xc0 #xc1 #xc2)))
166 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
167
168 (pass-if "BMP list->string is UCS-4 encoded"
169 (let ((s (string-ints #x0100 #x0101 #x0102)))
170 (assq-ref (%string-dump s) 'stringbuf-wide)))
171
172 (pass-if "SMP list->string is UCS-4 encoded"
173 (let ((s (string-ints #x010300 #x010301 #x010302)))
174 (assq-ref (%string-dump s) 'stringbuf-wide)))
175
176 (pass-if "encoding of string not based on escape style"
177 (let ((s "\U000040"))
178 (not (assq-ref (%string-dump s) 'stringbuf-wide))))))
179
fee95176 180(with-test-prefix "escapes"
f5d7662f
MG
181
182 (pass-if-exception "non-hex char in two-digit hex-escape"
183 exception:illegal-escape
184 (with-input-from-string "\"\\x0g\"" read))
185
186 (pass-if-exception "non-hex char in four-digit hex-escape"
187 exception:illegal-escape
188 (with-input-from-string "\"\\u000g\"" read))
189
190 (pass-if-exception "non-hex char in six-digit hex-escape"
191 exception:illegal-escape
192 (with-input-from-string "\"\\U00000g\"" read))
193
194 (pass-if-exception "premature termination of two-digit hex-escape"
195 exception:illegal-escape
196 (with-input-from-string "\"\\x0\"" read))
197
198 (pass-if-exception "premature termination of four-digit hex-escape"
199 exception:illegal-escape
200 (with-input-from-string "\"\\u000\"" read))
201
202 (pass-if-exception "premature termination of six-digit hex-escape"
203 exception:illegal-escape
204 (with-input-from-string "\"\\U00000\"" read))
205
206 (pass-if "extra hex digits ignored for two-digit hex escape"
207 (eqv? (string-ref "--\xfff--" 2)
208 (integer->char #xff)))
209
210 (pass-if "extra hex digits ignored for four-digit hex escape"
211 (eqv? (string-ref "--\u0100f--" 2)
212 (integer->char #x0100)))
213
214 (pass-if "extra hex digits ignored for six-digit hex escape"
215 (eqv? (string-ref "--\U010300f--" 2)
216 (integer->char #x010300)))
217
218 (pass-if "escaped characters match non-escaped ASCII characters"
fee95176
MG
219 (string=? "ABC" "\x41\u0042\U000043"))
220
221 (pass-if "R5RS backslash escapes"
222 (string=? "\"\\" (string #\" #\\)))
223
224 (pass-if "Guile extensions backslash escapes"
225 (string=? "\0\a\f\n\r\t\v"
226 (apply string (map integer->char '(0 7 12 10 13 9 11))))))
227
228;;
229;; string?
230;;
231(with-test-prefix "string?"
232
233 (pass-if "string"
234 (string? "abc"))
235
236 (pass-if "symbol"
237 (not (string? 'abc))))
238
239;;
240;; string-null?
241;;
242
243(with-test-prefix "string-null?"
244
245 (pass-if "null string"
246 (string-null? ""))
247
248 (pass-if "non-null string"
249 (not (string-null? "a")))
250
251 (pass-if "respects \\0"
252 (not (string-null? "\0")))
253
254 (pass-if-exception "symbol"
255 exception:wrong-type-arg
256 (string-null? 'a)))
6e7d5622
KR
257
258;;
259;; string=?
260;;
261
049fa449
DH
262(with-test-prefix "string=?"
263
264 (pass-if "respects 1st parameter's string length"
265 (not (string=? "foo\0" "foo")))
266
267 (pass-if "respects 2nd paramter's string length"
268 (not (string=? "foo" "foo\0")))
269
270 (with-test-prefix "wrong argument type"
271
272 (pass-if-exception "1st argument symbol"
273 exception:wrong-type-arg
274 (string=? 'a "a"))
275
276 (pass-if-exception "2nd argument symbol"
277 exception:wrong-type-arg
278 (string=? "a" 'b))))
279
6e7d5622
KR
280;;
281;; string<?
282;;
283
049fa449
DH
284(with-test-prefix "string<?"
285
286 (pass-if "respects string length"
287 (and (not (string<? "foo\0a" "foo\0a"))
288 (string<? "foo\0a" "foo\0b")))
289
290 (with-test-prefix "wrong argument type"
291
292 (pass-if-exception "1st argument symbol"
293 exception:wrong-type-arg
294 (string<? 'a "a"))
295
296 (pass-if-exception "2nd argument symbol"
297 exception:wrong-type-arg
6e7d5622
KR
298 (string<? "a" 'b)))
299
300 (pass-if "same as char<?"
301 (eq? (char<? (integer->char 0) (integer->char 255))
302 (string<? (string-ints 0) (string-ints 255)))))
303
304;;
305;; string-ci<?
306;;
049fa449
DH
307
308(with-test-prefix "string-ci<?"
309
310 (pass-if "respects string length"
311 (and (not (string-ci<? "foo\0a" "foo\0a"))
312 (string-ci<? "foo\0a" "foo\0b")))
313
314 (with-test-prefix "wrong argument type"
315
316 (pass-if-exception "1st argument symbol"
317 exception:wrong-type-arg
318 (string-ci<? 'a "a"))
319
320 (pass-if-exception "2nd argument symbol"
321 exception:wrong-type-arg
6e7d5622
KR
322 (string-ci<? "a" 'b)))
323
324 (pass-if "same as char-ci<?"
325 (eq? (char-ci<? (integer->char 0) (integer->char 255))
326 (string-ci<? (string-ints 0) (string-ints 255)))))
327
328;;
329;; string<=?
330;;
331
332(with-test-prefix "string<=?"
333
334 (pass-if "same as char<=?"
335 (eq? (char<=? (integer->char 0) (integer->char 255))
336 (string<=? (string-ints 0) (string-ints 255)))))
337
338;;
339;; string-ci<=?
340;;
341
342(with-test-prefix "string-ci<=?"
343
344 (pass-if "same as char-ci<=?"
345 (eq? (char-ci<=? (integer->char 0) (integer->char 255))
346 (string-ci<=? (string-ints 0) (string-ints 255)))))
347
348;;
349;; string>?
350;;
351
352(with-test-prefix "string>?"
353
354 (pass-if "same as char>?"
355 (eq? (char>? (integer->char 0) (integer->char 255))
356 (string>? (string-ints 0) (string-ints 255)))))
357
358;;
359;; string-ci>?
360;;
361
362(with-test-prefix "string-ci>?"
363
364 (pass-if "same as char-ci>?"
365 (eq? (char-ci>? (integer->char 0) (integer->char 255))
366 (string-ci>? (string-ints 0) (string-ints 255)))))
367
368;;
369;; string>=?
370;;
371
372(with-test-prefix "string>=?"
373
374 (pass-if "same as char>=?"
375 (eq? (char>=? (integer->char 0) (integer->char 255))
376 (string>=? (string-ints 0) (string-ints 255)))))
377
378;;
379;; string-ci>=?
380;;
381
382(with-test-prefix "string-ci>=?"
383
384 (pass-if "same as char-ci>=?"
385 (eq? (char-ci>=? (integer->char 0) (integer->char 255))
386 (string-ci>=? (string-ints 0) (string-ints 255)))))
387
edb7bb47
JG
388;;
389;; Unicode string normalization forms
390;;
391
392;;
393;; string-normalize-nfd
394;;
395
396(with-test-prefix "string-normalize-nfd"
397
398 (pass-if "canonical decomposition is equal?"
399 (equal? (string-normalize-nfd "\xe9") "\x65\u0301")))
400
401;;
402;; string-normalize-nfkd
403;;
404
405(with-test-prefix "string-normalize-nfkd"
406
407 (pass-if "compatibility decomposition is equal?"
408 (equal? (string-normalize-nfkd "\u1e9b\u0323") "s\u0323\u0307")))
409
410;;
411;; string-normalize-nfc
412;;
413
414(with-test-prefix "string-normalize-nfc"
415
416 (pass-if "canonical composition is equal?"
417 (equal? (string-normalize-nfc "\x65\u0301") "\xe9")))
418
419;;
420;; string-normalize-nfkc
421;;
422
423(with-test-prefix "string-normalize-nfkc"
424
425 (pass-if "compatibility composition is equal?"
426 (equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69")))
427
3ae3166b
LC
428;;
429;; string-ref
430;;
431
432(with-test-prefix "string-ref"
433
434 (pass-if-exception "empty string"
435 exception:out-of-range
436 (string-ref "" 0))
437
438 (pass-if-exception "empty string and non-zero index"
439 exception:out-of-range
440 (string-ref "" 123))
441
442 (pass-if-exception "out of range"
443 exception:out-of-range
444 (string-ref "hello" 123))
445
446 (pass-if-exception "negative index"
447 exception:out-of-range
448 (string-ref "hello" -1))
449
f5d7662f
MG
450 (pass-if "regular string, ASCII char"
451 (char=? (string-ref "GNU Guile" 4) #\G))
452
453 (pass-if "regular string, hex escaped Latin-1 char"
454 (char=? (string-ref "--\xff--" 2)
455 (integer->char #xff)))
456
457 (pass-if "regular string, hex escaped BMP char"
458 (char=? (string-ref "--\u0100--" 2)
459 (integer->char #x0100)))
460
461 (pass-if "regular string, hex escaped SMP char"
462 (char=? (string-ref "--\U010300--" 2)
463 (integer->char #x010300))))
3ae3166b 464
6e7d5622
KR
465;;
466;; string-set!
467;;
049fa449
DH
468
469(with-test-prefix "string-set!"
470
3ae3166b
LC
471 (pass-if-exception "empty string"
472 exception:out-of-range
473 (string-set! (string-copy "") 0 #\x))
474
475 (pass-if-exception "empty string and non-zero index"
476 exception:out-of-range
477 (string-set! (string-copy "") 123 #\x))
478
479 (pass-if-exception "out of range"
480 exception:out-of-range
481 (string-set! (string-copy "hello") 123 #\x))
482
483 (pass-if-exception "negative index"
484 exception:out-of-range
485 (string-set! (string-copy "hello") -1 #\x))
486
b144a33c 487 (pass-if-exception "read-only string"
d7e4c2da 488 exception:read-only-string
3ae3166b
LC
489 (string-set! (substring/read-only "abc" 0) 1 #\space))
490
f5d7662f 491 (pass-if "regular string, ASCII char"
3ae3166b
LC
492 (let ((s (string-copy "GNU guile")))
493 (string-set! s 4 #\G)
f5d7662f
MG
494 (char=? (string-ref s 4) #\G)))
495
496 (pass-if "regular string, Latin-1 char"
497 (let ((s (string-copy "GNU guile")))
498 (string-set! s 4 (integer->char #xfe))
499 (char=? (string-ref s 4) (integer->char #xfe))))
500
501 (pass-if "regular string, BMP char"
502 (let ((s (string-copy "GNU guile")))
503 (string-set! s 4 (integer->char #x0100))
504 (char=? (string-ref s 4) (integer->char #x0100))))
505
506 (pass-if "regular string, SMP char"
507 (let ((s (string-copy "GNU guile")))
508 (string-set! s 4 (integer->char #x010300))
509 (char=? (string-ref s 4) (integer->char #x010300)))))
3ae3166b 510
3c7cf7f5
MG
511;;
512;; list->string
513;;
514(with-test-prefix "string"
515
516 (pass-if-exception "convert circular list to string"
517 exception:wrong-type-arg
518 (let ((foo (list #\a #\b #\c)))
519 (set-cdr! (cddr foo) (cdr foo))
520 (apply string foo))))
521
50e20a60
KR
522(with-test-prefix "string-split"
523
524 ;; in guile 1.6.7 and earlier, character >=128 wasn't matched in the string
525 (pass-if "char 255"
526 (equal? '("a" "b")
527 (string-split (string #\a (integer->char 255) #\b)
528 (integer->char 255)))))
529
049fa449
DH
530(with-test-prefix "substring-move!"
531
532 (pass-if-exception "substring-move! checks start and end correctly"
533 exception:out-of-range
534 (substring-move! "sample" 3 0 "test" 3)))
1c17f6b0
MV
535
536(with-test-prefix "substring/shared"
537
538 (pass-if "modify indirectly"
539 (let ((str (string-copy "foofoofoo")))
540 (string-upcase! (substring/shared str 3 6))
541 (string=? str "fooFOOfoo")))
542
543 (pass-if "modify cow indirectly"
544 (let* ((str1 (string-copy "foofoofoo"))
545 (str2 (string-copy str1)))
546 (string-upcase! (substring/shared str2 3 6))
547 (and (string=? str1 "foofoofoo")
7aa29a87
MV
548 (string=? str2 "fooFOOfoo"))))
549
550 (pass-if "modify double indirectly"
d7e4c2da 551 (let* ((str1 (string-copy "foofoofoo"))
7aa29a87
MV
552 (str2 (substring/shared str1 2 7)))
553 (string-upcase! (substring/shared str2 1 4))
554 (string=? str1 "fooFOOfoo")))
555
556 (pass-if "modify cow double indirectly"
557 (let* ((str1 "foofoofoo")
558 (str2 (substring str1 2 7)))
559 (string-upcase! (substring/shared str2 1 4))
560 (and (string=? str1 "foofoofoo")
561 (string=? str2 "oFOOf")))))