Imported Upstream version 0.63.0
[hcoop/debian/courier-authlib.git] / unicode / unicode.c
1 /*
2 ** Copyright 2000-2003 Double Precision, Inc.
3 ** See COPYING for distribution information.
4 **
5 ** $Id: unicode.c,v 1.9 2004/02/08 04:59:15 mrsam Exp $
6 */
7
8 #include "unicode_config.h"
9 #include "unicode.h"
10 #include <string.h>
11 #include <ctype.h>
12 #include <stdlib.h>
13 #include <errno.h>
14
15 const char *unicode_default_chset()
16 {
17 const char *p=UNICODECHARSET;
18
19 if (unicode_find(p))
20 return (p);
21
22 return (unicode_ISO8859_1.chset);
23 }
24
25 const struct unicode_info *unicode_find(const char *chset)
26 {
27 char *p, *q;
28 int i;
29
30 if (!chset) /* Default character set */
31 return (&unicode_ISO8859_1);
32
33 p=strdup(chset);
34 if (!p)
35 return (0);
36
37 for (q=p; *q; q++)
38 *q=toupper(*q);
39
40 if (strcmp(unicode_ISO8859_1.chset, p) == 0)
41 {
42 free(p);
43 return (&unicode_ISO8859_1);
44 }
45
46 for (i=0; unicode_chsetlist[i].chsetname; i++)
47 if (strcmp(unicode_chsetlist[i].chsetname, p) == 0)
48 {
49 free(p);
50 return (unicode_chsetlist[i].ptr);
51 }
52 free(p);
53 return (0);
54 }
55
56 char *unicode_convert(const char *txt, const struct unicode_info *from,
57 const struct unicode_info *to)
58 {
59 unicode_char *uc;
60 char *s;
61 int dummy;
62
63 if (strcmp(from->chset, to->chset) == 0) /* NOOP */
64 return (strdup(txt));
65
66 uc=(*from->c2u)(from, txt, &dummy);
67 if (!uc)
68 {
69 if (dummy >= 0)
70 errno=EINVAL;
71
72 return (0);
73 }
74
75 s=(*to->u2c)(to, uc, &dummy);
76
77 free(uc);
78
79 if (dummy >= 0)
80 errno=EINVAL;
81
82 return (s);
83 }
84
85 char *unicode_convert_fromchset(const char *txt, const char *from,
86 const struct unicode_info *to)
87 {
88 const struct unicode_info *fromu=unicode_find(from);
89
90 if (!fromu)
91 {
92 errno=EINVAL;
93 return (0);
94 }
95 return (unicode_convert(txt, fromu, to));
96 }
97
98 /*
99 ** Convert being character sets, except ignore errors.
100 */
101
102 struct ux_buf {
103 char *buffer;
104 size_t bufsize;
105 } ;
106
107 static int ux_alloc(struct ux_buf *p, size_t l)
108 {
109 char *newbuf;
110
111 if (l < p->bufsize)
112 return (0);
113
114 l += 64;
115
116 newbuf=p->buffer ? realloc(p->buffer, l):malloc(l);
117
118 if (!newbuf)
119 return (-1);
120
121 p->buffer=newbuf;
122 p->bufsize=l;
123 return (0);
124 }
125
126 char *unicode_xconvert(const char *txt, const struct unicode_info *from,
127 const struct unicode_info *to)
128 {
129 unicode_char *uc;
130 char *s, *cur_conv;
131 int dummy, dummy2;
132 struct ux_buf dst_str;
133
134 char *orig_str=strdup(txt);
135
136 if (!orig_str)
137 return (0);
138
139 if (strcmp(from->chset, to->chset) == 0) /* NOOP */
140 return (orig_str);
141
142 dst_str.bufsize=0;
143 dst_str.buffer=0;
144
145 if (ux_alloc(&dst_str, strlen(txt)*2))
146 {
147 free(orig_str);
148 return (NULL);
149 }
150
151 dst_str.buffer[0]=0;
152
153 cur_conv=orig_str;
154
155 while (*cur_conv)
156 {
157 size_t l;
158 unicode_char *ucptr;
159
160 l=strlen(cur_conv);
161
162 if (from->flags & UNICODE_REPLACEABLE)
163 {
164 uc=(*from->c2u)(from, cur_conv, NULL);
165 if (!uc)
166 {
167 free(orig_str);
168 free(dst_str.buffer);
169 return NULL;
170 }
171 }
172 else
173 uc=(*from->c2u)(from, cur_conv, &dummy);
174
175 if (!uc)
176 {
177 char save_char;
178
179 if (dummy < 0)
180 {
181 free(orig_str);
182 free(dst_str.buffer);
183 return (NULL);
184 }
185
186 /* Error converting original text to unicode.
187 ** Back up, and convert all the characters up until
188 ** the error character.
189 */
190
191 l=dummy;
192
193 save_char=cur_conv[dummy];
194
195 cur_conv[dummy]=0;
196
197 uc=(*from->c2u)(from, cur_conv, &dummy2);
198 cur_conv[dummy]=save_char;
199
200 if (!uc)
201 {
202 free(orig_str);
203 free(dst_str.buffer);
204 return (NULL);
205 }
206 }
207
208 /* Ok, now convert unicode to dest charset, using the same
209 ** trial-and-error process.
210 */
211
212 ucptr=uc;
213
214 while (*ucptr)
215 {
216 size_t cnt_done;
217
218 for (cnt_done=0; ucptr[cnt_done]; cnt_done++)
219 ;
220
221 if (to->flags & UNICODE_REPLACEABLE)
222 {
223 s=(*to->u2c)(to, ucptr, NULL);
224 if (!s)
225 {
226 free(orig_str);
227 free(dst_str.buffer);
228 free(uc);
229 return NULL;
230 }
231 }
232 else
233 s=(*to->u2c)(to, ucptr, &dummy);
234
235 if (!s)
236 {
237 unicode_char save_char;
238
239 if (dummy < 0)
240 {
241 free(orig_str);
242 free(dst_str.buffer);
243 free(uc);
244 return (NULL);
245 }
246
247 cnt_done=dummy;
248
249 save_char=ucptr[dummy];
250 ucptr[dummy]=0;
251 s=(*to->u2c)(to, ucptr, &dummy2);
252 ucptr[dummy]=save_char;
253
254 if (!s)
255 {
256 free(orig_str);
257 free(dst_str.buffer);
258 free(uc);
259 return (NULL);
260 }
261 }
262
263 if (ux_alloc(&dst_str,
264 strlen(dst_str.buffer)+strlen(s)+2))
265 {
266 free(s);
267 free(orig_str);
268 free(dst_str.buffer);
269 free(uc);
270 return (NULL);
271 }
272
273 strcat(dst_str.buffer, s);
274 free(s);
275 ucptr += cnt_done;
276 if (*ucptr)
277 {
278 strcat(dst_str.buffer, ".");
279 ++ucptr;
280 }
281 }
282
283 cur_conv += l;
284
285 if (*cur_conv)
286 {
287 char buf[2];
288
289 if (ux_alloc(&dst_str, strlen(dst_str.buffer)+1))
290 {
291 free(orig_str);
292 free(dst_str.buffer);
293 free(uc);
294 return (NULL);
295 }
296
297 buf[0]= *cur_conv++;
298 buf[1]=0;
299 strcat(dst_str.buffer, buf);
300 }
301 free(uc);
302 }
303
304 free(orig_str);
305 return (dst_str.buffer);
306 }