Merge branch 'stable-2.0'
[bpt/guile.git] / test-suite / tests / encoding-utf8.test
CommitLineData
a5229ee8 1;;;; encoding-utf8.test --- test suite for Guile's string encodings -*- mode: scheme; coding: utf-8 -*-
889975e5 2;;;;
a3d7d5d5 3;;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
a5229ee8
LC
4;;;;
5;;;; This library is free software; you can redistribute it and/or
6;;;; modify it under the terms of the GNU Lesser General Public
7;;;; License as published by the Free Software Foundation; either
8;;;; version 3 of the License, or (at your option) any later version.
9;;;;
10;;;; This library is distributed in the hope that it will be useful,
889975e5 11;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
a5229ee8
LC
12;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;;; Lesser General Public License for more details.
14;;;;
15;;;; You should have received a copy of the GNU Lesser General Public
16;;;; License along with this library; if not, write to the Free Software
17;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
889975e5
MG
18
19(define-module (test-strings)
20 #:use-module (test-suite lib)
21 #:use-module (srfi srfi-1))
22
889975e5
MG
23;; Create a string from integer char values, eg. (string-ints 65) => "A"
24(define (string-ints . args)
25 (apply string (map integer->char args)))
26
ce3ed012
MG
27(define oldlocale #f)
28(if (defined? 'setlocale)
29 (set! oldlocale (setlocale LC_ALL "")))
889975e5 30
bda0d85f
MG
31(define ascii-a (integer->char 65)) ; LATIN CAPITAL LETTER A
32(define a-acute (integer->char #x00c1)) ; LATIN CAPITAL LETTER A WITH ACUTE
33(define alpha (integer->char #x03b1)) ; GREEK SMALL LETTER ALPHA
34(define cherokee-a (integer->char #x13a0)) ; CHEROKEE LETTER A
35
36(with-test-prefix "characters"
37 (pass-if "input A"
38 (char=? ascii-a #\A))
39
40 (pass-if "input A acute"
41 (char=? a-acute #\Á))
42
43 (pass-if "input alpha"
44 (char=? alpha #\α))
45
46 (pass-if "input Cherokee A"
47 (char=? cherokee-a #\Ꭰ))
48
49 (pass-if "display A"
50 (let ((pt (open-output-string)))
51 (set-port-encoding! pt "UTF-8")
52 (set-port-conversion-strategy! pt 'substitute)
53 (display ascii-a pt)
54 (string=? "A"
55 (get-output-string pt))))
56
57 (pass-if "display A acute"
58 (let ((pt (open-output-string)))
59 (set-port-encoding! pt "UTF-8")
60 (set-port-conversion-strategy! pt 'substitute)
61 (display a-acute pt)
62 (string=? "Á"
63 (get-output-string pt))))
64
65 (pass-if "display alpha"
66 (let ((pt (open-output-string)))
67 (set-port-encoding! pt "UTF-8")
68 (set-port-conversion-strategy! pt 'substitute)
69 (display alpha pt)
70 (string-ci=? "α"
71 (get-output-string pt))))
72
73 (pass-if "display Cherokee A"
74 (let ((pt (open-output-string)))
75 (set-port-encoding! pt "UTF-8")
76 (set-port-conversion-strategy! pt 'substitute)
77 (display cherokee-a pt)
78 (string-ci=? "Ꭰ"
79 (get-output-string pt))))
80
81 (pass-if "write A"
82 (let ((pt (open-output-string)))
83 (set-port-encoding! pt "UTF-8")
84 (set-port-conversion-strategy! pt 'escape)
85 (write ascii-a pt)
86 (string=? "#\\A"
87 (get-output-string pt))))
88
89 (pass-if "write A acute"
90 (let ((pt (open-output-string)))
91 (set-port-encoding! pt "UTF-8")
92 (set-port-conversion-strategy! pt 'escape)
93 (write a-acute pt)
94 (string=? "#\\Á"
95 (get-output-string pt))))
96
33d92fe6
LC
97 (pass-if "write A followed by combining accent"
98 (let ((pt (open-output-string)))
99 (set-port-encoding! pt "UTF-8")
100 (set-port-conversion-strategy! pt 'escape)
101 (write (string #\A (integer->char #x030f)) pt)
102 (string-ci=? "\"Ȁ\""
103 (get-output-string pt))))
104
bda0d85f
MG
105 (pass-if "write alpha"
106 (let ((pt (open-output-string)))
107 (set-port-encoding! pt "UTF-8")
108 (set-port-conversion-strategy! pt 'escape)
109 (write alpha pt)
110 (string=? "#\\α"
111 (get-output-string pt))))
112
113 (pass-if "write Cherokee A"
114 (let ((pt (open-output-string)))
115 (set-port-encoding! pt "UTF-8")
116 (set-port-conversion-strategy! pt 'escape)
117 (write cherokee-a pt)
118 (string=? "#\\Ꭰ"
119 (get-output-string pt)))))
120
889975e5
MG
121(define s1 "última")
122(define s2 "cédula")
123(define s3 "años")
124(define s4 "羅生門")
125
126(with-test-prefix "string length"
127
128 (pass-if "última"
764246cf 129 (eqv? (string-length s1) 6))
889975e5
MG
130
131 (pass-if "cédula"
764246cf 132 (eqv? (string-length s2) 6))
889975e5
MG
133
134 (pass-if "años"
764246cf 135 (eqv? (string-length s3) 4))
889975e5
MG
136
137 (pass-if "羅生門"
764246cf 138 (eqv? (string-length s4) 3)))
889975e5
MG
139
140(with-test-prefix "internal encoding"
141
142 (pass-if "última"
143 (string=? s1 (string-ints #xfa #x6c #x74 #x69 #x6d #x61)))
144
145 (pass-if "cédula"
146 (string=? s2 (string-ints #x63 #xe9 #x64 #x75 #x6c #x61)))
147
148 (pass-if "años"
149 (string=? s3 (string-ints #x61 #xf1 #x6f #x73)))
150
151 (pass-if "羅生門"
152 (string=? s4 (string-ints #x7f85 #x751f #x9580))))
153
154(with-test-prefix "chars"
155
156 (pass-if "última"
157 (list= eqv? (string->list s1)
158 (list #\ú #\l #\t #\i #\m #\a)))
159
160 (pass-if "cédula"
161 (list= eqv? (string->list s2)
162 (list #\c #\é #\d #\u #\l #\a)))
163
164 (pass-if "años"
165 (list= eqv? (string->list s3)
166 (list #\a #\ñ #\o #\s)))
167
168 (pass-if "羅生門"
169 (list= eqv? (string->list s4)
170 (list #\羅 #\生 #\門))))
171
172(with-test-prefix "symbols == strings"
173
174 (pass-if "última"
175 (eq? (string->symbol s1) 'última))
176
177 (pass-if "cédula"
178 (eq? (string->symbol s2) 'cédula))
179
180 (pass-if "años"
181 (eq? (string->symbol s3) 'años))
182
183 (pass-if "羅生門"
184 (eq? (string->symbol s4) '羅生門)))
185
186(with-test-prefix "non-ascii variable names"
187
188 (pass-if "1"
189 (let ((芥川龍之介 1)
190 (ñ 2))
764246cf 191 (eqv? (+ 芥川龍之介 ñ) 3))))
889975e5 192
ce3ed012
MG
193(if (defined? 'setlocale)
194 (setlocale LC_ALL oldlocale))