Commit | Line | Data |
---|---|---|
8d138742 CE |
1 | /* |
2 | ** Copyright 2000-2003 Double Precision, Inc. | |
3 | ** See COPYING for distribution information. | |
4 | ** | |
5 | */ | |
6 | ||
7 | #include "unicode_config.h" | |
8 | #include "unicode.h" | |
9 | #include <string.h> | |
10 | #include <stdlib.h> | |
11 | ||
12 | static const char rcsid[]="$Id: utf7imap.c,v 1.6 2004/05/23 14:28:25 mrsam Exp $"; | |
13 | ||
14 | static const char mbase64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; | |
15 | ||
16 | static char mbase64_lookup[256]; | |
17 | static int mbase64_lookup_init=0; | |
18 | ||
19 | unicode_char *unicode_modutf7touc(const char *s, int *err) | |
20 | { | |
21 | size_t l=strlen(s), i; | |
22 | unicode_char *uc=malloc(sizeof(unicode_char)*(l+1)); | |
23 | /* That's the worst case scenario, that's all. */ | |
24 | ||
25 | if (!uc) | |
26 | return (NULL); | |
27 | ||
28 | if (err) | |
29 | *err= -1; | |
30 | ||
31 | /* First time through - initialize fast lookup table */ | |
32 | ||
33 | if (!mbase64_lookup_init) | |
34 | { | |
35 | mbase64_lookup_init=1; | |
36 | ||
37 | for (i=0; i<256; i++) | |
38 | mbase64_lookup[i]= (char)-1; | |
39 | ||
40 | for (i=0; mbase64[i]; i++) | |
41 | mbase64_lookup[(int)mbase64[i]]=i; | |
42 | } | |
43 | i=0; | |
44 | ||
45 | for (l=0; s[l]; l++) | |
46 | { | |
47 | unicode_char uu; | |
48 | int bitcount; | |
49 | ||
50 | if ( s[l] < 0x20 || s[l] >= 0x7F ) | |
51 | { | |
52 | free(uc); | |
53 | if (err) *err=l; | |
54 | return (NULL); | |
55 | } | |
56 | ||
57 | if ( s[l] != '&' ) | |
58 | { | |
59 | uc[i++]= (int)(unsigned char)s[l]; | |
60 | continue; | |
61 | } | |
62 | ||
63 | if ( s[++l] == '-' ) | |
64 | { | |
65 | uc[i++]='&'; | |
66 | continue; | |
67 | } | |
68 | ||
69 | bitcount=0; | |
70 | uu=0; | |
71 | ||
72 | for ( ; s[l] != '-'; l++) | |
73 | { | |
74 | int bits; | |
75 | ||
76 | if ((char)(bits= | |
77 | mbase64_lookup[s[l] & 255]) == (char)-1) | |
78 | { | |
79 | free(uc); | |
80 | if (err) *err=l; | |
81 | return (0); | |
82 | } | |
83 | ||
84 | if (bitcount + 6 >= 16) | |
85 | /* These six more bits are enough for UCS2 */ | |
86 | { | |
87 | int n=bitcount + 6 - 16; /* Leftover */ | |
88 | ||
89 | uu = (uu << (6-n)) | (bits >> n); | |
90 | uc[i++] = (uu & 0xFFFF); | |
91 | ||
92 | uu = bits; /* The leftovers */ | |
93 | bitcount=n; | |
94 | } | |
95 | else | |
96 | { | |
97 | uu = (uu << 6) | bits; | |
98 | bitcount += 6; | |
99 | } | |
100 | } | |
101 | } | |
102 | uc[i]=0; | |
103 | return (uc); | |
104 | } | |
105 | ||
106 | static size_t uctoutf7_pass(const unicode_char *, const unicode_char *, | |
107 | char *); | |
108 | ||
109 | char *unicode_uctomodutf7(const unicode_char *p) | |
110 | { | |
111 | return unicode_uctomodutf7x(p, NULL); | |
112 | } | |
113 | ||
114 | char *unicode_uctomodutf7x(const unicode_char *p, | |
115 | const unicode_char *specials) | |
116 | { | |
117 | size_t n=uctoutf7_pass(p, specials, NULL); | |
118 | char *s=malloc(n); | |
119 | ||
120 | if (s) | |
121 | uctoutf7_pass(p, specials, s); | |
122 | return (s); | |
123 | } | |
124 | ||
125 | static int is_special(unicode_char uc, const unicode_char *specials) | |
126 | { | |
127 | while (specials && *specials) | |
128 | if (*specials++ == uc) | |
129 | return 1; | |
130 | ||
131 | return uc < 0x20 || uc >= 0x7F; | |
132 | } | |
133 | ||
134 | static size_t uctoutf7_pass(const unicode_char *uc, | |
135 | const unicode_char *specials, | |
136 | char *p) | |
137 | { | |
138 | size_t n=0; | |
139 | ||
140 | while (*uc) | |
141 | { | |
142 | unsigned bits, bitcount; | |
143 | ||
144 | if (!is_special(*uc, specials)) | |
145 | { | |
146 | /* Straightforward deal for straightforward ASCII */ | |
147 | ||
148 | if (p) | |
149 | *p++ = (char)*uc; | |
150 | ++n; | |
151 | ||
152 | if (*uc++ == '&') | |
153 | { | |
154 | if (p) *p++ = '-'; | |
155 | ++n; | |
156 | } | |
157 | continue; | |
158 | } | |
159 | ||
160 | if (p) *p++ = '&'; /* Begin modified base64 */ | |
161 | ++n; | |
162 | ||
163 | bits=bitcount=0; | |
164 | while ( *uc && is_special(*uc, specials)) | |
165 | { | |
166 | unicode_char uu= *uc++ & 0xFFFF; | |
167 | int counter=16; | |
168 | ||
169 | if (!uu) uu=0xFFFD; | |
170 | ||
171 | /* Process 16 bits */ | |
172 | ||
173 | while (counter) | |
174 | { | |
175 | int x; | |
176 | ||
177 | if (counter + bitcount < 6) | |
178 | { | |
179 | /* Add these bits, then we're done */ | |
180 | ||
181 | bits = (bits << counter) | | |
182 | (uu >> (16-counter)); | |
183 | bitcount += counter; | |
184 | break; | |
185 | } | |
186 | ||
187 | /* Have enough bits to encode */ | |
188 | ||
189 | x= 6 - bitcount; | |
190 | ||
191 | bits = (bits << x) | (uu >> (16-x)); | |
192 | uu = (uu << x) & 0xFFFF; | |
193 | counter -= x; | |
194 | ||
195 | if (p) | |
196 | *p++ = mbase64[bits]; | |
197 | ++n; | |
198 | bits=bitcount=0; | |
199 | } | |
200 | } | |
201 | ||
202 | if (bitcount) /* Leftovers */ | |
203 | { | |
204 | bits <<= (6-bitcount); | |
205 | if (p) | |
206 | *p++ = mbase64[bits]; | |
207 | ++n; | |
208 | } | |
209 | ||
210 | if (p) | |
211 | *p++ = '-'; | |
212 | ++n; | |
213 | /* End modified base64 */ | |
214 | } | |
215 | ||
216 | if (p) | |
217 | *p=0; | |
218 | ++n; | |
219 | return (n); | |
220 | } | |
221 | ||
222 | static char *toupper_func(const struct unicode_info *u, | |
223 | const char *cp, int *ip) | |
224 | { | |
225 | unicode_char *uc=unicode_modutf7touc(cp, ip), *p; | |
226 | char *s; | |
227 | ||
228 | if (!uc) return (0); | |
229 | ||
230 | for (p=uc; *p; p++) | |
231 | *p=unicode_uc(*p); | |
232 | ||
233 | s=unicode_uctomodutf7(uc); | |
234 | if (!s && ip) | |
235 | *ip=0; | |
236 | free(uc); | |
237 | return (s); | |
238 | } | |
239 | ||
240 | static char *tolower_func(const struct unicode_info *u, | |
241 | const char *cp, int *ip) | |
242 | { | |
243 | unicode_char *uc=unicode_modutf7touc(cp, ip), *p; | |
244 | char *s; | |
245 | ||
246 | if (!uc) return (0); | |
247 | ||
248 | for (p=uc; *p; p++) | |
249 | *p=unicode_lc(*p); | |
250 | ||
251 | s=unicode_uctomodutf7(uc); | |
252 | free(uc); | |
253 | if (!s && ip) | |
254 | *ip=0; | |
255 | return (s); | |
256 | } | |
257 | ||
258 | static char *totitle_func(const struct unicode_info *u, | |
259 | const char *cp, int *ip) | |
260 | { | |
261 | unicode_char *uc=unicode_modutf7touc(cp, ip), *p; | |
262 | char *s; | |
263 | ||
264 | if (!uc) return (0); | |
265 | ||
266 | for (p=uc; *p; p++) | |
267 | *p=unicode_tc(*p); | |
268 | ||
269 | s=unicode_uctomodutf7(uc); | |
270 | if (!s && ip) | |
271 | *ip=0; | |
272 | free(uc); | |
273 | return (s); | |
274 | } | |
275 | ||
276 | static unicode_char *tou(const struct unicode_info *ui, const char *cs, | |
277 | int *err) | |
278 | { | |
279 | return unicode_modutf7touc(cs, err); | |
280 | } | |
281 | ||
282 | ||
283 | static char *fromu(const struct unicode_info *ui, | |
284 | const unicode_char *uc, int *err) | |
285 | { | |
286 | if (err) *err= -1; | |
287 | return unicode_uctomodutf7(uc); | |
288 | } | |
289 | ||
290 | const struct unicode_info unicode_IMAP_MODUTF7 = { | |
291 | "X-IMAP-MODUTF-7", | |
292 | UNICODE_UTF | UNICODE_MB | | |
293 | UNICODE_HEADER_BASE64, | |
294 | tou, | |
295 | fromu, | |
296 | toupper_func, | |
297 | tolower_func, | |
298 | totitle_func}; |