1 ;;;; strings.test --- test suite for Guile's string functions -*- scheme -*-
2 ;;;; Jim Blandy <jimb@red-bean.com> --- August 1999
4 ;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009, 2010,
5 ;;;; 2011, 2013 Free Software Foundation, Inc.
7 ;;;; This library is free software; you can redistribute it and/or
8 ;;;; modify it under the terms of the GNU Lesser General Public
9 ;;;; License as published by the Free Software Foundation; either
10 ;;;; version 3 of the License, or (at your option) any later version.
12 ;;;; This library is distributed in the hope that it will be useful,
13 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;;;; Lesser General Public License for more details.
17 ;;;; You should have received a copy of the GNU Lesser General Public
18 ;;;; License along with this library; if not, write to the Free Software
19 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 (define-module (test-strings)
22 #:use-module ((system base compile) #:select (compile))
23 #:use-module (test-suite lib))
25 (define exception:read-only-string
26 (cons 'misc-error "^string is read-only"))
27 (define exception:illegal-escape
28 (cons 'read-error "illegal character in escape sequence"))
29 ;; Wrong types may have either the 'wrong-type-arg key when
30 ;; interpreted or 'vm-error when compiled. This matches both.
31 (define exception:wrong-type-arg
32 (cons #t "Wrong type"))
34 ;; Create a string from integer char values, eg. (string-ints 65) => "A"
35 (define (string-ints . args)
36 (apply string (map integer->char args)))
43 ;; BMP - Basic Multilingual Plane (codepoints below U+FFFF)
44 ;; SMP - Suplementary Multilingual Plane (codebpoints from U+10000 to U+1FFFF)
46 (with-test-prefix "string internals"
48 (pass-if "new string starts at 1st char in stringbuf"
50 (= 0 (assq-ref (%string-dump s) 'start))))
52 (pass-if "length of new string same as stringbuf"
54 (= (string-length s) (assq-ref (%string-dump s) 'stringbuf-length))))
56 (pass-if "contents of new string same as stringbuf"
58 (string=? s (assq-ref (%string-dump s) 'stringbuf-chars))))
60 (pass-if "writable strings are not read-only"
62 (not (assq-ref (%string-dump s) 'read-only))))
64 (pass-if "read-only strings are read-only"
65 (let ((s (substring/read-only "zyx" 0)))
66 (assq-ref (%string-dump s) 'read-only)))
68 (pass-if "new Latin-1 encoded strings are not shared"
70 (not (assq-ref (%string-dump s) 'stringbuf-shared))))
72 (pass-if "new UCS-4 encoded strings are not shared"
74 (not (assq-ref (%string-dump s) 'stringbuf-shared))))
76 ;; Should this be true? It isn't currently true.
77 (pass-if "null shared substrings are shared"
79 (s2 (substring/shared s1 0 0)))
81 (eq? (assq-ref (%string-dump s2) 'shared)
84 (pass-if "ASCII shared substrings are shared"
86 (s2 (substring/shared s1 0 3)))
87 (eq? (assq-ref (%string-dump s2) 'shared)
90 (pass-if "BMP shared substrings are shared"
91 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
92 (s2 (substring/shared s1 0 3)))
93 (eq? (assq-ref (%string-dump s2) 'shared)
96 (pass-if "null substrings are not shared"
98 (s2 (substring s1 0 0)))
99 (not (eq? (assq-ref (%string-dump s2) 'shared)
102 (pass-if "ASCII substrings are not shared"
104 (s2 (substring s1 0 3)))
105 (not (eq? (assq-ref (%string-dump s2) 'shared)
108 (pass-if "BMP substrings are not shared"
109 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
110 (s2 (substring s1 0 3)))
111 (not (eq? (assq-ref (%string-dump s2) 'shared)
114 (pass-if "ASCII substrings share stringbufs before copy-on-write"
116 (s2 (substring s1 0 3)))
117 (assq-ref (%string-dump s1) 'stringbuf-shared)))
119 (pass-if "BMP substrings share stringbufs before copy-on-write"
120 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
121 (s2 (substring s1 0 3)))
122 (assq-ref (%string-dump s1) 'stringbuf-shared)))
124 (pass-if "ASCII substrings don't share stringbufs after copy-on-write"
126 (s2 (substring s1 0 3)))
127 (string-set! s2 0 #\F)
128 (not (assq-ref (%string-dump s2) 'stringbuf-shared))))
130 (pass-if "BMP substrings don't share stringbufs after copy-on-write"
131 (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
132 (s2 (substring s1 0 3)))
133 (string-set! s2 0 #\F)
134 (not (assq-ref (%string-dump s2) 'stringbuf-shared))))
136 (with-test-prefix "encodings"
138 (pass-if "null strings are Latin-1 encoded"
140 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
142 (pass-if "ASCII strings are Latin-1 encoded"
144 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
146 (pass-if "Latin-1 strings are Latin-1 encoded"
147 (let ((s "\xC0\xC1\xC2"))
148 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
150 (pass-if "BMP strings are UCS-4 encoded"
151 (let ((s "\u0100\u0101\x0102"))
152 (assq-ref (%string-dump s) 'stringbuf-wide)))
154 (pass-if "SMP strings are UCS-4 encoded"
155 (let ((s "\U010300\u010301\x010302"))
156 (assq-ref (%string-dump s) 'stringbuf-wide)))
158 (pass-if "null list->string is Latin-1 encoded"
159 (let ((s (string-ints)))
160 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
162 (pass-if "ASCII list->string is Latin-1 encoded"
163 (let ((s (string-ints 65 66 67)))
164 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
166 (pass-if "Latin-1 list->string is Latin-1 encoded"
167 (let ((s (string-ints #xc0 #xc1 #xc2)))
168 (not (assq-ref (%string-dump s) 'stringbuf-wide))))
170 (pass-if "BMP list->string is UCS-4 encoded"
171 (let ((s (string-ints #x0100 #x0101 #x0102)))
172 (assq-ref (%string-dump s) 'stringbuf-wide)))
174 (pass-if "SMP list->string is UCS-4 encoded"
175 (let ((s (string-ints #x010300 #x010301 #x010302)))
176 (assq-ref (%string-dump s) 'stringbuf-wide)))
178 (pass-if "encoding of string not based on escape style"
179 (let ((s "\U000040"))
180 (not (assq-ref (%string-dump s) 'stringbuf-wide))))))
182 (with-test-prefix "escapes"
184 (pass-if-exception "non-hex char in two-digit hex-escape"
185 exception:illegal-escape
186 (with-input-from-string "\"\\x0g\"" read))
188 (pass-if-exception "non-hex char in four-digit hex-escape"
189 exception:illegal-escape
190 (with-input-from-string "\"\\u000g\"" read))
192 (pass-if-exception "non-hex char in six-digit hex-escape"
193 exception:illegal-escape
194 (with-input-from-string "\"\\U00000g\"" read))
196 (pass-if-exception "premature termination of two-digit hex-escape"
197 exception:illegal-escape
198 (with-input-from-string "\"\\x0\"" read))
200 (pass-if-exception "premature termination of four-digit hex-escape"
201 exception:illegal-escape
202 (with-input-from-string "\"\\u000\"" read))
204 (pass-if-exception "premature termination of six-digit hex-escape"
205 exception:illegal-escape
206 (with-input-from-string "\"\\U00000\"" read))
208 (pass-if "extra hex digits ignored for two-digit hex escape"
209 (eqv? (string-ref "--\xfff--" 2)
210 (integer->char #xff)))
212 (pass-if "extra hex digits ignored for four-digit hex escape"
213 (eqv? (string-ref "--\u0100f--" 2)
214 (integer->char #x0100)))
216 (pass-if "extra hex digits ignored for six-digit hex escape"
217 (eqv? (string-ref "--\U010300f--" 2)
218 (integer->char #x010300)))
220 (pass-if "escaped characters match non-escaped ASCII characters"
221 (string=? "ABC" "\x41\u0042\U000043"))
223 (pass-if "R5RS backslash escapes"
224 (string=? "\"\\" (string #\" #\\)))
226 (pass-if "R6RS backslash escapes"
227 (string=? "\a\b\t\n\v\f\r"
228 (string #\alarm #\backspace #\tab #\newline #\vtab
231 (pass-if "Guile extensions backslash escapes"
232 (string=? "\0" (string #\nul))))
237 (with-test-prefix "string?"
243 (not (string? 'abc))))
249 (with-test-prefix "literals"
251 ;; The "Storage Model" section of R5RS reads: "In such systems literal
252 ;; constants and the strings returned by `symbol->string' are
253 ;; immutable objects". `eval' doesn't support it yet, but it doesn't
254 ;; really matter because `eval' doesn't coalesce repeated constants,
255 ;; unlike the bytecode compiler.
257 (pass-if-exception "literals are constant"
258 exception:read-only-string
259 (compile '(string-set! "literal string" 0 #\x)
267 (with-test-prefix "string-null?"
269 (pass-if "null string"
272 (pass-if "non-null string"
273 (not (string-null? "a")))
275 (pass-if "respects \\0"
276 (not (string-null? "\0")))
278 (pass-if-exception "symbol"
279 exception:wrong-type-arg
286 (with-test-prefix "string=?"
288 (pass-if "respects 1st parameter's string length"
289 (not (string=? "foo\0" "foo")))
291 (pass-if "respects 2nd paramter's string length"
292 (not (string=? "foo" "foo\0")))
294 (with-test-prefix "wrong argument type"
296 (pass-if-exception "1st argument symbol"
297 exception:wrong-type-arg
300 (pass-if-exception "2nd argument symbol"
301 exception:wrong-type-arg
304 (pass-if-exception "1st argument EOF"
305 exception:wrong-type-arg
306 (string=? (with-input-from-string "" read) "b"))
308 (pass-if-exception "2nd argument EOF"
309 exception:wrong-type-arg
310 (string=? "a" (with-input-from-string "" read)))))
316 (with-test-prefix "string<?"
318 (pass-if "respects string length"
319 (and (not (string<? "foo\0a" "foo\0a"))
320 (string<? "foo\0a" "foo\0b")))
322 (with-test-prefix "wrong argument type"
324 (pass-if-exception "1st argument symbol"
325 exception:wrong-type-arg
328 (pass-if-exception "2nd argument symbol"
329 exception:wrong-type-arg
332 (pass-if "same as char<?"
333 (eq? (char<? (integer->char 0) (integer->char 255))
334 (string<? (string-ints 0) (string-ints 255)))))
340 (with-test-prefix "string-ci<?"
342 (pass-if "respects string length"
343 (and (not (string-ci<? "foo\0a" "foo\0a"))
344 (string-ci<? "foo\0a" "foo\0b")))
346 (with-test-prefix "wrong argument type"
348 (pass-if-exception "1st argument symbol"
349 exception:wrong-type-arg
350 (string-ci<? 'a "a"))
352 (pass-if-exception "2nd argument symbol"
353 exception:wrong-type-arg
354 (string-ci<? "a" 'b)))
356 (pass-if "same as char-ci<?"
357 (eq? (char-ci<? (integer->char 0) (integer->char 255))
358 (string-ci<? (string-ints 0) (string-ints 255)))))
364 (with-test-prefix "string<=?"
366 (pass-if "same as char<=?"
367 (eq? (char<=? (integer->char 0) (integer->char 255))
368 (string<=? (string-ints 0) (string-ints 255)))))
374 (with-test-prefix "string-ci<=?"
376 (pass-if "same as char-ci<=?"
377 (eq? (char-ci<=? (integer->char 0) (integer->char 255))
378 (string-ci<=? (string-ints 0) (string-ints 255)))))
384 (with-test-prefix "string>?"
386 (pass-if "same as char>?"
387 (eq? (char>? (integer->char 0) (integer->char 255))
388 (string>? (string-ints 0) (string-ints 255)))))
394 (with-test-prefix "string-ci>?"
396 (pass-if "same as char-ci>?"
397 (eq? (char-ci>? (integer->char 0) (integer->char 255))
398 (string-ci>? (string-ints 0) (string-ints 255)))))
404 (with-test-prefix "string>=?"
406 (pass-if "same as char>=?"
407 (eq? (char>=? (integer->char 0) (integer->char 255))
408 (string>=? (string-ints 0) (string-ints 255)))))
414 (with-test-prefix "string-ci>=?"
416 (pass-if "same as char-ci>=?"
417 (eq? (char-ci>=? (integer->char 0) (integer->char 255))
418 (string-ci>=? (string-ints 0) (string-ints 255)))))
421 ;; Unicode string normalization forms
425 ;; string-normalize-nfd
428 (with-test-prefix "string-normalize-nfd"
430 (pass-if "canonical decomposition is equal?"
431 (equal? (string-normalize-nfd "\xe9") "\x65\u0301")))
434 ;; string-normalize-nfkd
437 (with-test-prefix "string-normalize-nfkd"
439 (pass-if "compatibility decomposition is equal?"
440 (equal? (string-normalize-nfkd "\u1e9b\u0323") "s\u0323\u0307")))
443 ;; string-normalize-nfc
446 (with-test-prefix "string-normalize-nfc"
448 (pass-if "canonical composition is equal?"
449 (equal? (string-normalize-nfc "\x65\u0301") "\xe9")))
452 ;; string-normalize-nfkc
455 (with-test-prefix "string-normalize-nfkc"
457 (pass-if "compatibility composition is equal?"
458 (equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69")))
464 (with-test-prefix "string-ref"
466 (pass-if-exception "empty string"
467 exception:out-of-range
470 (pass-if-exception "empty string and non-zero index"
471 exception:out-of-range
474 (pass-if-exception "out of range"
475 exception:out-of-range
476 (string-ref "hello" 123))
478 (pass-if-exception "negative index"
479 exception:out-of-range
480 (string-ref "hello" -1))
482 (pass-if "regular string, ASCII char"
483 (char=? (string-ref "GNU Guile" 4) #\G))
485 (pass-if "regular string, hex escaped Latin-1 char"
486 (char=? (string-ref "--\xff--" 2)
487 (integer->char #xff)))
489 (pass-if "regular string, hex escaped BMP char"
490 (char=? (string-ref "--\u0100--" 2)
491 (integer->char #x0100)))
493 (pass-if "regular string, hex escaped SMP char"
494 (char=? (string-ref "--\U010300--" 2)
495 (integer->char #x010300))))
501 (with-test-prefix "string-set!"
503 (pass-if-exception "empty string"
504 exception:out-of-range
505 (string-set! (string-copy "") 0 #\x))
507 (pass-if-exception "empty string and non-zero index"
508 exception:out-of-range
509 (string-set! (string-copy "") 123 #\x))
511 (pass-if-exception "out of range"
512 exception:out-of-range
513 (string-set! (string-copy "hello") 123 #\x))
515 (pass-if-exception "negative index"
516 exception:out-of-range
517 (string-set! (string-copy "hello") -1 #\x))
519 (pass-if-exception "read-only string"
520 exception:read-only-string
521 (string-set! (substring/read-only "abc" 0) 1 #\space))
523 (pass-if "regular string, ASCII char"
524 (let ((s (string-copy "GNU guile")))
525 (string-set! s 4 #\G)
526 (char=? (string-ref s 4) #\G)))
528 (pass-if "regular string, Latin-1 char"
529 (let ((s (string-copy "GNU guile")))
530 (string-set! s 4 (integer->char #xfe))
531 (char=? (string-ref s 4) (integer->char #xfe))))
533 (pass-if "regular string, BMP char"
534 (let ((s (string-copy "GNU guile")))
535 (string-set! s 4 (integer->char #x0100))
536 (char=? (string-ref s 4) (integer->char #x0100))))
538 (pass-if "regular string, SMP char"
539 (let ((s (string-copy "GNU guile")))
540 (string-set! s 4 (integer->char #x010300))
541 (char=? (string-ref s 4) (integer->char #x010300)))))
546 (with-test-prefix "string"
548 (pass-if-exception "convert circular list to string"
549 '(wrong-type-arg . "Apply to non-list")
550 (let ((foo (list #\a #\b #\c)))
551 (set-cdr! (cddr foo) (cdr foo))
552 (apply string foo))))
554 (with-test-prefix "string-split"
556 ;; in guile 1.6.7 and earlier, character >=128 wasn't matched in the string
559 (string-split (string #\a (integer->char 255) #\b)
560 (integer->char 255))))
562 (pass-if "empty string - char"
564 (string-split "" #\:)))
566 (pass-if "non-empty - char - no delimiters"
567 (equal? '("foobarfrob")
568 (string-split "foobarfrob" #\:)))
570 (pass-if "non-empty - char - delimiters"
571 (equal? '("foo" "bar" "frob")
572 (string-split "foo:bar:frob" #\:)))
574 (pass-if "non-empty - char - leading delimiters"
575 (equal? '("" "" "foo" "bar" "frob")
576 (string-split "::foo:bar:frob" #\:)))
578 (pass-if "non-empty - char - trailing delimiters"
579 (equal? '("foo" "bar" "frob" "" "")
580 (string-split "foo:bar:frob::" #\:)))
582 (pass-if "empty string - charset"
584 (string-split "" (char-set #\:))))
586 (pass-if "non-empty - charset - no delimiters"
587 (equal? '("foobarfrob")
588 (string-split "foobarfrob" (char-set #\:))))
590 (pass-if "non-empty - charset - delimiters"
591 (equal? '("foo" "bar" "frob")
592 (string-split "foo:bar:frob" (char-set #\:))))
594 (pass-if "non-empty - charset - leading delimiters"
595 (equal? '("" "" "foo" "bar" "frob")
596 (string-split "::foo:bar:frob" (char-set #\:))))
598 (pass-if "non-empty - charset - trailing delimiters"
599 (equal? '("foo" "bar" "frob" "" "")
600 (string-split "foo:bar:frob::" (char-set #\:))))
602 (pass-if "empty string - pred"
604 (string-split "" (negate char-alphabetic?))))
606 (pass-if "non-empty - pred - no delimiters"
607 (equal? '("foobarfrob")
608 (string-split "foobarfrob" (negate char-alphabetic?))))
610 (pass-if "non-empty - pred - delimiters"
611 (equal? '("foo" "bar" "frob")
612 (string-split "foo:bar:frob" (negate char-alphabetic?))))
614 (pass-if "non-empty - pred - leading delimiters"
615 (equal? '("" "" "foo" "bar" "frob")
616 (string-split "::foo:bar:frob" (negate char-alphabetic?))))
618 (pass-if "non-empty - pred - trailing delimiters"
619 (equal? '("foo" "bar" "frob" "" "")
620 (string-split "foo:bar:frob::" (negate char-alphabetic?)))))
622 (with-test-prefix "substring-move!"
624 (pass-if-exception "substring-move! checks start and end correctly"
625 exception:out-of-range
626 (substring-move! "sample" 3 0 "test" 3)))
628 (with-test-prefix "substring/shared"
630 (pass-if "modify indirectly"
631 (let ((str (string-copy "foofoofoo")))
632 (string-upcase! (substring/shared str 3 6))
633 (string=? str "fooFOOfoo")))
635 (pass-if "modify cow indirectly"
636 (let* ((str1 (string-copy "foofoofoo"))
637 (str2 (string-copy str1)))
638 (string-upcase! (substring/shared str2 3 6))
639 (and (string=? str1 "foofoofoo")
640 (string=? str2 "fooFOOfoo"))))
642 (pass-if "modify double indirectly"
643 (let* ((str1 (string-copy "foofoofoo"))
644 (str2 (substring/shared str1 2 7)))
645 (string-upcase! (substring/shared str2 1 4))
646 (string=? str1 "fooFOOfoo")))
648 (pass-if "modify cow double indirectly"
649 (let* ((str1 "foofoofoo")
650 (str2 (substring str1 2 7)))
651 (string-upcase! (substring/shared str2 1 4))
652 (and (string=? str1 "foofoofoo")
653 (string=? str2 "oFOOf")))))