GOOPS cosmetics
[bpt/guile.git] / test-suite / tests / strings.test
1 ;;;; strings.test --- test suite for Guile's string functions -*- scheme -*-
2 ;;;; Jim Blandy <jimb@red-bean.com> --- August 1999
3 ;;;;
4 ;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009, 2010,
5 ;;;; 2011, 2013 Free Software Foundation, Inc.
6 ;;;;
7 ;;;; This library is free software; you can redistribute it and/or
8 ;;;; modify it under the terms of the GNU Lesser General Public
9 ;;;; License as published by the Free Software Foundation; either
10 ;;;; version 3 of the License, or (at your option) any later version.
11 ;;;;
12 ;;;; This library is distributed in the hope that it will be useful,
13 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;;;; Lesser General Public License for more details.
16 ;;;;
17 ;;;; You should have received a copy of the GNU Lesser General Public
18 ;;;; License along with this library; if not, write to the Free Software
19 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
21 (define-module (test-strings)
22 #:use-module ((system base compile) #:select (compile))
23 #:use-module (test-suite lib))
24
25 (define exception:read-only-string
26 (cons 'misc-error "^string is read-only"))
27 (define exception:illegal-escape
28 (cons 'read-error "illegal character in escape sequence"))
29 ;; Wrong types may have either the 'wrong-type-arg key when
30 ;; interpreted or 'vm-error when compiled. This matches both.
31 (define exception:wrong-type-arg
32 (cons #t "Wrong type"))
33
34 ;; Create a string from integer char values, eg. (string-ints 65) => "A"
35 (define (string-ints . args)
36 (apply string (map integer->char args)))
37
38 ;;
39 ;; string internals
40 ;;
41
42 ;; Some abbreviations
43 ;; BMP - Basic Multilingual Plane (codepoints below U+FFFF)
44 ;; SMP - Suplementary Multilingual Plane (codebpoints from U+10000 to U+1FFFF)
45
46 (with-test-prefix "string internals"
47
48 (pass-if "new string starts at 1st char in stringbuf"
49 (let ((s "abc"))
50 (= 0 (assq-ref (%string-dump s) 'start))))
51
52 (pass-if "length of new string same as stringbuf"
53 (let ((s "def"))
54 (= (string-length s) (assq-ref (%string-dump s) 'stringbuf-length))))
55
56 (pass-if "contents of new string same as stringbuf"
57 (let ((s "ghi"))
58 (string=? s (assq-ref (%string-dump s) 'stringbuf-chars))))
59
60 (pass-if "writable strings are not read-only"
61 (let ((s "zyx"))
62 (not (assq-ref (%string-dump s) 'read-only))))
63
64 (pass-if "read-only strings are read-only"
65 (let ((s (substring/read-only "zyx" 0)))
66 (assq-ref (%string-dump s) 'read-only)))
67
68 (pass-if "new Latin-1 encoded strings are not shared"
69 (let ((s "abc"))
70 (not (assq-ref (%string-dump s) 'stringbuf-shared))))
71
72 (pass-if "new UCS-4 encoded strings are not shared"
73 (let ((s "\u0100bc"))
74 (not (assq-ref (%string-dump s) 'stringbuf-shared))))
75
76 ;; Should this be true? It isn't currently true.
77 (pass-if "null shared substrings are shared"
78 (let* ((s1 "")
79 (s2 (substring/shared s1 0 0)))
80 (throw 'untested)
81 (eq? (assq-ref (%string-dump s2) 'shared)
82 s1)))
83
84 (pass-if "ASCII shared substrings are shared"
85 (let* ((s1 "foobar")
86 (s2 (substring/shared s1 0 3)))
87 (eq? (assq-ref (%string-dump s2) 'shared)
88 s1)))
89
90 (pass-if "BMP shared substrings are shared"
91 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
92 (s2 (substring/shared s1 0 3)))
93 (eq? (assq-ref (%string-dump s2) 'shared)
94 s1)))
95
96 (pass-if "null substrings are not shared"
97 (let* ((s1 "")
98 (s2 (substring s1 0 0)))
99 (not (eq? (assq-ref (%string-dump s2) 'shared)
100 s1))))
101
102 (pass-if "ASCII substrings are not shared"
103 (let* ((s1 "foobar")
104 (s2 (substring s1 0 3)))
105 (not (eq? (assq-ref (%string-dump s2) 'shared)
106 s1))))
107
108 (pass-if "BMP substrings are not shared"
109 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
110 (s2 (substring s1 0 3)))
111 (not (eq? (assq-ref (%string-dump s2) 'shared)
112 s1))))
113
114 (pass-if "ASCII substrings share stringbufs before copy-on-write"
115 (let* ((s1 "foobar")
116 (s2 (substring s1 0 3)))
117 (assq-ref (%string-dump s1) 'stringbuf-shared)))
118
119 (pass-if "BMP substrings share stringbufs before copy-on-write"
120 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
121 (s2 (substring s1 0 3)))
122 (assq-ref (%string-dump s1) 'stringbuf-shared)))
123
124 (pass-if "ASCII substrings don't share stringbufs after copy-on-write"
125 (let* ((s1 "foobar")
126 (s2 (substring s1 0 3)))
127 (string-set! s2 0 #\F)
128 (not (assq-ref (%string-dump s2) 'stringbuf-shared))))
129
130 (pass-if "BMP substrings don't share stringbufs after copy-on-write"
131 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
132 (s2 (substring s1 0 3)))
133 (string-set! s2 0 #\F)
134 (not (assq-ref (%string-dump s2) 'stringbuf-shared))))
135
136 (with-test-prefix "encodings"
137
138 (pass-if "null strings are Latin-1 encoded"
139 (let ((s ""))
140 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
141
142 (pass-if "ASCII strings are Latin-1 encoded"
143 (let ((s "jkl"))
144 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
145
146 (pass-if "Latin-1 strings are Latin-1 encoded"
147 (let ((s "\xC0\xC1\xC2"))
148 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
149
150 (pass-if "BMP strings are UCS-4 encoded"
151 (let ((s "\u0100\u0101\x0102"))
152 (assq-ref (%string-dump s) 'stringbuf-wide)))
153
154 (pass-if "SMP strings are UCS-4 encoded"
155 (let ((s "\U010300\u010301\x010302"))
156 (assq-ref (%string-dump s) 'stringbuf-wide)))
157
158 (pass-if "null list->string is Latin-1 encoded"
159 (let ((s (string-ints)))
160 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
161
162 (pass-if "ASCII list->string is Latin-1 encoded"
163 (let ((s (string-ints 65 66 67)))
164 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
165
166 (pass-if "Latin-1 list->string is Latin-1 encoded"
167 (let ((s (string-ints #xc0 #xc1 #xc2)))
168 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
169
170 (pass-if "BMP list->string is UCS-4 encoded"
171 (let ((s (string-ints #x0100 #x0101 #x0102)))
172 (assq-ref (%string-dump s) 'stringbuf-wide)))
173
174 (pass-if "SMP list->string is UCS-4 encoded"
175 (let ((s (string-ints #x010300 #x010301 #x010302)))
176 (assq-ref (%string-dump s) 'stringbuf-wide)))
177
178 (pass-if "encoding of string not based on escape style"
179 (let ((s "\U000040"))
180 (not (assq-ref (%string-dump s) 'stringbuf-wide))))))
181
182 (with-test-prefix "escapes"
183
184 (pass-if-exception "non-hex char in two-digit hex-escape"
185 exception:illegal-escape
186 (with-input-from-string "\"\\x0g\"" read))
187
188 (pass-if-exception "non-hex char in four-digit hex-escape"
189 exception:illegal-escape
190 (with-input-from-string "\"\\u000g\"" read))
191
192 (pass-if-exception "non-hex char in six-digit hex-escape"
193 exception:illegal-escape
194 (with-input-from-string "\"\\U00000g\"" read))
195
196 (pass-if-exception "premature termination of two-digit hex-escape"
197 exception:illegal-escape
198 (with-input-from-string "\"\\x0\"" read))
199
200 (pass-if-exception "premature termination of four-digit hex-escape"
201 exception:illegal-escape
202 (with-input-from-string "\"\\u000\"" read))
203
204 (pass-if-exception "premature termination of six-digit hex-escape"
205 exception:illegal-escape
206 (with-input-from-string "\"\\U00000\"" read))
207
208 (pass-if "extra hex digits ignored for two-digit hex escape"
209 (eqv? (string-ref "--\xfff--" 2)
210 (integer->char #xff)))
211
212 (pass-if "extra hex digits ignored for four-digit hex escape"
213 (eqv? (string-ref "--\u0100f--" 2)
214 (integer->char #x0100)))
215
216 (pass-if "extra hex digits ignored for six-digit hex escape"
217 (eqv? (string-ref "--\U010300f--" 2)
218 (integer->char #x010300)))
219
220 (pass-if "escaped characters match non-escaped ASCII characters"
221 (string=? "ABC" "\x41\u0042\U000043"))
222
223 (pass-if "R5RS backslash escapes"
224 (string=? "\"\\" (string #\" #\\)))
225
226 (pass-if "R6RS backslash escapes"
227 (string=? "\a\b\t\n\v\f\r"
228 (string #\alarm #\backspace #\tab #\newline #\vtab
229 #\page #\return)))
230
231 (pass-if "Guile extensions backslash escapes"
232 (string=? "\0" (string #\nul))))
233
234 ;;
235 ;; string?
236 ;;
237 (with-test-prefix "string?"
238
239 (pass-if "string"
240 (string? "abc"))
241
242 (pass-if "symbol"
243 (not (string? 'abc))))
244
245 ;;
246 ;; literals
247 ;;
248
249 (with-test-prefix "literals"
250
251 ;; The "Storage Model" section of R5RS reads: "In such systems literal
252 ;; constants and the strings returned by `symbol->string' are
253 ;; immutable objects". `eval' doesn't support it yet, but it doesn't
254 ;; really matter because `eval' doesn't coalesce repeated constants,
255 ;; unlike the bytecode compiler.
256
257 (pass-if-exception "literals are constant"
258 exception:read-only-string
259 (compile '(string-set! "literal string" 0 #\x)
260 #:from 'scheme
261 #:to 'value)))
262
263 ;;
264 ;; string-null?
265 ;;
266
267 (with-test-prefix "string-null?"
268
269 (pass-if "null string"
270 (string-null? ""))
271
272 (pass-if "non-null string"
273 (not (string-null? "a")))
274
275 (pass-if "respects \\0"
276 (not (string-null? "\0")))
277
278 (pass-if-exception "symbol"
279 exception:wrong-type-arg
280 (string-null? 'a)))
281
282 ;;
283 ;; string=?
284 ;;
285
286 (with-test-prefix "string=?"
287
288 (pass-if "respects 1st parameter's string length"
289 (not (string=? "foo\0" "foo")))
290
291 (pass-if "respects 2nd paramter's string length"
292 (not (string=? "foo" "foo\0")))
293
294 (with-test-prefix "wrong argument type"
295
296 (pass-if-exception "1st argument symbol"
297 exception:wrong-type-arg
298 (string=? 'a "a"))
299
300 (pass-if-exception "2nd argument symbol"
301 exception:wrong-type-arg
302 (string=? "a" 'b))
303
304 (pass-if-exception "1st argument EOF"
305 exception:wrong-type-arg
306 (string=? (with-input-from-string "" read) "b"))
307
308 (pass-if-exception "2nd argument EOF"
309 exception:wrong-type-arg
310 (string=? "a" (with-input-from-string "" read)))))
311
312 ;;
313 ;; string<?
314 ;;
315
316 (with-test-prefix "string<?"
317
318 (pass-if "respects string length"
319 (and (not (string<? "foo\0a" "foo\0a"))
320 (string<? "foo\0a" "foo\0b")))
321
322 (with-test-prefix "wrong argument type"
323
324 (pass-if-exception "1st argument symbol"
325 exception:wrong-type-arg
326 (string<? 'a "a"))
327
328 (pass-if-exception "2nd argument symbol"
329 exception:wrong-type-arg
330 (string<? "a" 'b)))
331
332 (pass-if "same as char<?"
333 (eq? (char<? (integer->char 0) (integer->char 255))
334 (string<? (string-ints 0) (string-ints 255)))))
335
336 ;;
337 ;; string-ci<?
338 ;;
339
340 (with-test-prefix "string-ci<?"
341
342 (pass-if "respects string length"
343 (and (not (string-ci<? "foo\0a" "foo\0a"))
344 (string-ci<? "foo\0a" "foo\0b")))
345
346 (with-test-prefix "wrong argument type"
347
348 (pass-if-exception "1st argument symbol"
349 exception:wrong-type-arg
350 (string-ci<? 'a "a"))
351
352 (pass-if-exception "2nd argument symbol"
353 exception:wrong-type-arg
354 (string-ci<? "a" 'b)))
355
356 (pass-if "same as char-ci<?"
357 (eq? (char-ci<? (integer->char 0) (integer->char 255))
358 (string-ci<? (string-ints 0) (string-ints 255)))))
359
360 ;;
361 ;; string<=?
362 ;;
363
364 (with-test-prefix "string<=?"
365
366 (pass-if "same as char<=?"
367 (eq? (char<=? (integer->char 0) (integer->char 255))
368 (string<=? (string-ints 0) (string-ints 255)))))
369
370 ;;
371 ;; string-ci<=?
372 ;;
373
374 (with-test-prefix "string-ci<=?"
375
376 (pass-if "same as char-ci<=?"
377 (eq? (char-ci<=? (integer->char 0) (integer->char 255))
378 (string-ci<=? (string-ints 0) (string-ints 255)))))
379
380 ;;
381 ;; string>?
382 ;;
383
384 (with-test-prefix "string>?"
385
386 (pass-if "same as char>?"
387 (eq? (char>? (integer->char 0) (integer->char 255))
388 (string>? (string-ints 0) (string-ints 255)))))
389
390 ;;
391 ;; string-ci>?
392 ;;
393
394 (with-test-prefix "string-ci>?"
395
396 (pass-if "same as char-ci>?"
397 (eq? (char-ci>? (integer->char 0) (integer->char 255))
398 (string-ci>? (string-ints 0) (string-ints 255)))))
399
400 ;;
401 ;; string>=?
402 ;;
403
404 (with-test-prefix "string>=?"
405
406 (pass-if "same as char>=?"
407 (eq? (char>=? (integer->char 0) (integer->char 255))
408 (string>=? (string-ints 0) (string-ints 255)))))
409
410 ;;
411 ;; string-ci>=?
412 ;;
413
414 (with-test-prefix "string-ci>=?"
415
416 (pass-if "same as char-ci>=?"
417 (eq? (char-ci>=? (integer->char 0) (integer->char 255))
418 (string-ci>=? (string-ints 0) (string-ints 255)))))
419
420 ;;
421 ;; Unicode string normalization forms
422 ;;
423
424 ;;
425 ;; string-normalize-nfd
426 ;;
427
428 (with-test-prefix "string-normalize-nfd"
429
430 (pass-if "canonical decomposition is equal?"
431 (equal? (string-normalize-nfd "\xe9") "\x65\u0301")))
432
433 ;;
434 ;; string-normalize-nfkd
435 ;;
436
437 (with-test-prefix "string-normalize-nfkd"
438
439 (pass-if "compatibility decomposition is equal?"
440 (equal? (string-normalize-nfkd "\u1e9b\u0323") "s\u0323\u0307")))
441
442 ;;
443 ;; string-normalize-nfc
444 ;;
445
446 (with-test-prefix "string-normalize-nfc"
447
448 (pass-if "canonical composition is equal?"
449 (equal? (string-normalize-nfc "\x65\u0301") "\xe9")))
450
451 ;;
452 ;; string-normalize-nfkc
453 ;;
454
455 (with-test-prefix "string-normalize-nfkc"
456
457 (pass-if "compatibility composition is equal?"
458 (equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69")))
459
460 ;;
461 ;; string-ref
462 ;;
463
464 (with-test-prefix "string-ref"
465
466 (pass-if-exception "empty string"
467 exception:out-of-range
468 (string-ref "" 0))
469
470 (pass-if-exception "empty string and non-zero index"
471 exception:out-of-range
472 (string-ref "" 123))
473
474 (pass-if-exception "out of range"
475 exception:out-of-range
476 (string-ref "hello" 123))
477
478 (pass-if-exception "negative index"
479 exception:out-of-range
480 (string-ref "hello" -1))
481
482 (pass-if "regular string, ASCII char"
483 (char=? (string-ref "GNU Guile" 4) #\G))
484
485 (pass-if "regular string, hex escaped Latin-1 char"
486 (char=? (string-ref "--\xff--" 2)
487 (integer->char #xff)))
488
489 (pass-if "regular string, hex escaped BMP char"
490 (char=? (string-ref "--\u0100--" 2)
491 (integer->char #x0100)))
492
493 (pass-if "regular string, hex escaped SMP char"
494 (char=? (string-ref "--\U010300--" 2)
495 (integer->char #x010300))))
496
497 ;;
498 ;; string-set!
499 ;;
500
501 (with-test-prefix "string-set!"
502
503 (pass-if-exception "empty string"
504 exception:out-of-range
505 (string-set! (string-copy "") 0 #\x))
506
507 (pass-if-exception "empty string and non-zero index"
508 exception:out-of-range
509 (string-set! (string-copy "") 123 #\x))
510
511 (pass-if-exception "out of range"
512 exception:out-of-range
513 (string-set! (string-copy "hello") 123 #\x))
514
515 (pass-if-exception "negative index"
516 exception:out-of-range
517 (string-set! (string-copy "hello") -1 #\x))
518
519 (pass-if-exception "read-only string"
520 exception:read-only-string
521 (string-set! (substring/read-only "abc" 0) 1 #\space))
522
523 (pass-if "regular string, ASCII char"
524 (let ((s (string-copy "GNU guile")))
525 (string-set! s 4 #\G)
526 (char=? (string-ref s 4) #\G)))
527
528 (pass-if "regular string, Latin-1 char"
529 (let ((s (string-copy "GNU guile")))
530 (string-set! s 4 (integer->char #xfe))
531 (char=? (string-ref s 4) (integer->char #xfe))))
532
533 (pass-if "regular string, BMP char"
534 (let ((s (string-copy "GNU guile")))
535 (string-set! s 4 (integer->char #x0100))
536 (char=? (string-ref s 4) (integer->char #x0100))))
537
538 (pass-if "regular string, SMP char"
539 (let ((s (string-copy "GNU guile")))
540 (string-set! s 4 (integer->char #x010300))
541 (char=? (string-ref s 4) (integer->char #x010300)))))
542
543 ;;
544 ;; list->string
545 ;;
546 (with-test-prefix "string"
547
548 (pass-if-exception "convert circular list to string"
549 '(wrong-type-arg . "Apply to non-list")
550 (let ((foo (list #\a #\b #\c)))
551 (set-cdr! (cddr foo) (cdr foo))
552 (apply string foo))))
553
554 (with-test-prefix "string-split"
555
556 ;; in guile 1.6.7 and earlier, character >=128 wasn't matched in the string
557 (pass-if "char 255"
558 (equal? '("a" "b")
559 (string-split (string #\a (integer->char 255) #\b)
560 (integer->char 255))))
561
562 (pass-if "empty string - char"
563 (equal? '("")
564 (string-split "" #\:)))
565
566 (pass-if "non-empty - char - no delimiters"
567 (equal? '("foobarfrob")
568 (string-split "foobarfrob" #\:)))
569
570 (pass-if "non-empty - char - delimiters"
571 (equal? '("foo" "bar" "frob")
572 (string-split "foo:bar:frob" #\:)))
573
574 (pass-if "non-empty - char - leading delimiters"
575 (equal? '("" "" "foo" "bar" "frob")
576 (string-split "::foo:bar:frob" #\:)))
577
578 (pass-if "non-empty - char - trailing delimiters"
579 (equal? '("foo" "bar" "frob" "" "")
580 (string-split "foo:bar:frob::" #\:)))
581
582 (pass-if "empty string - charset"
583 (equal? '("")
584 (string-split "" (char-set #\:))))
585
586 (pass-if "non-empty - charset - no delimiters"
587 (equal? '("foobarfrob")
588 (string-split "foobarfrob" (char-set #\:))))
589
590 (pass-if "non-empty - charset - delimiters"
591 (equal? '("foo" "bar" "frob")
592 (string-split "foo:bar:frob" (char-set #\:))))
593
594 (pass-if "non-empty - charset - leading delimiters"
595 (equal? '("" "" "foo" "bar" "frob")
596 (string-split "::foo:bar:frob" (char-set #\:))))
597
598 (pass-if "non-empty - charset - trailing delimiters"
599 (equal? '("foo" "bar" "frob" "" "")
600 (string-split "foo:bar:frob::" (char-set #\:))))
601
602 (pass-if "empty string - pred"
603 (equal? '("")
604 (string-split "" (negate char-alphabetic?))))
605
606 (pass-if "non-empty - pred - no delimiters"
607 (equal? '("foobarfrob")
608 (string-split "foobarfrob" (negate char-alphabetic?))))
609
610 (pass-if "non-empty - pred - delimiters"
611 (equal? '("foo" "bar" "frob")
612 (string-split "foo:bar:frob" (negate char-alphabetic?))))
613
614 (pass-if "non-empty - pred - leading delimiters"
615 (equal? '("" "" "foo" "bar" "frob")
616 (string-split "::foo:bar:frob" (negate char-alphabetic?))))
617
618 (pass-if "non-empty - pred - trailing delimiters"
619 (equal? '("foo" "bar" "frob" "" "")
620 (string-split "foo:bar:frob::" (negate char-alphabetic?)))))
621
622 (with-test-prefix "substring-move!"
623
624 (pass-if-exception "substring-move! checks start and end correctly"
625 exception:out-of-range
626 (substring-move! "sample" 3 0 "test" 3)))
627
628 (with-test-prefix "substring/shared"
629
630 (pass-if "modify indirectly"
631 (let ((str (string-copy "foofoofoo")))
632 (string-upcase! (substring/shared str 3 6))
633 (string=? str "fooFOOfoo")))
634
635 (pass-if "modify cow indirectly"
636 (let* ((str1 (string-copy "foofoofoo"))
637 (str2 (string-copy str1)))
638 (string-upcase! (substring/shared str2 3 6))
639 (and (string=? str1 "foofoofoo")
640 (string=? str2 "fooFOOfoo"))))
641
642 (pass-if "modify double indirectly"
643 (let* ((str1 (string-copy "foofoofoo"))
644 (str2 (substring/shared str1 2 7)))
645 (string-upcase! (substring/shared str2 1 4))
646 (string=? str1 "fooFOOfoo")))
647
648 (pass-if "modify cow double indirectly"
649 (let* ((str1 "foofoofoo")
650 (str2 (substring str1 2 7)))
651 (string-upcase! (substring/shared str2 1 4))
652 (and (string=? str1 "foofoofoo")
653 (string=? str2 "oFOOf")))))