Commit | Line | Data |
---|---|---|
8d138742 CE |
1 | /* |
2 | ** Copyright 2000-2003 Double Precision, Inc. | |
3 | ** See COPYING for distribution information. | |
4 | ** | |
5 | ** $Id: unicode.c,v 1.9 2004/02/08 04:59:15 mrsam Exp $ | |
6 | */ | |
7 | ||
8 | #include "unicode_config.h" | |
9 | #include "unicode.h" | |
10 | #include <string.h> | |
11 | #include <ctype.h> | |
12 | #include <stdlib.h> | |
13 | #include <errno.h> | |
14 | ||
15 | const char *unicode_default_chset() | |
16 | { | |
17 | const char *p=UNICODECHARSET; | |
18 | ||
19 | if (unicode_find(p)) | |
20 | return (p); | |
21 | ||
22 | return (unicode_ISO8859_1.chset); | |
23 | } | |
24 | ||
25 | const struct unicode_info *unicode_find(const char *chset) | |
26 | { | |
27 | char *p, *q; | |
28 | int i; | |
29 | ||
30 | if (!chset) /* Default character set */ | |
31 | return (&unicode_ISO8859_1); | |
32 | ||
33 | p=strdup(chset); | |
34 | if (!p) | |
35 | return (0); | |
36 | ||
37 | for (q=p; *q; q++) | |
38 | *q=toupper(*q); | |
39 | ||
40 | if (strcmp(unicode_ISO8859_1.chset, p) == 0) | |
41 | { | |
42 | free(p); | |
43 | return (&unicode_ISO8859_1); | |
44 | } | |
45 | ||
46 | for (i=0; unicode_chsetlist[i].chsetname; i++) | |
47 | if (strcmp(unicode_chsetlist[i].chsetname, p) == 0) | |
48 | { | |
49 | free(p); | |
50 | return (unicode_chsetlist[i].ptr); | |
51 | } | |
52 | free(p); | |
53 | return (0); | |
54 | } | |
55 | ||
56 | char *unicode_convert(const char *txt, const struct unicode_info *from, | |
57 | const struct unicode_info *to) | |
58 | { | |
59 | unicode_char *uc; | |
60 | char *s; | |
61 | int dummy; | |
62 | ||
63 | if (strcmp(from->chset, to->chset) == 0) /* NOOP */ | |
64 | return (strdup(txt)); | |
65 | ||
66 | uc=(*from->c2u)(from, txt, &dummy); | |
67 | if (!uc) | |
68 | { | |
69 | if (dummy >= 0) | |
70 | errno=EINVAL; | |
71 | ||
72 | return (0); | |
73 | } | |
74 | ||
75 | s=(*to->u2c)(to, uc, &dummy); | |
76 | ||
77 | free(uc); | |
78 | ||
79 | if (dummy >= 0) | |
80 | errno=EINVAL; | |
81 | ||
82 | return (s); | |
83 | } | |
84 | ||
85 | char *unicode_convert_fromchset(const char *txt, const char *from, | |
86 | const struct unicode_info *to) | |
87 | { | |
88 | const struct unicode_info *fromu=unicode_find(from); | |
89 | ||
90 | if (!fromu) | |
91 | { | |
92 | errno=EINVAL; | |
93 | return (0); | |
94 | } | |
95 | return (unicode_convert(txt, fromu, to)); | |
96 | } | |
97 | ||
98 | /* | |
99 | ** Convert being character sets, except ignore errors. | |
100 | */ | |
101 | ||
102 | struct ux_buf { | |
103 | char *buffer; | |
104 | size_t bufsize; | |
105 | } ; | |
106 | ||
107 | static int ux_alloc(struct ux_buf *p, size_t l) | |
108 | { | |
109 | char *newbuf; | |
110 | ||
111 | if (l < p->bufsize) | |
112 | return (0); | |
113 | ||
114 | l += 64; | |
115 | ||
116 | newbuf=p->buffer ? realloc(p->buffer, l):malloc(l); | |
117 | ||
118 | if (!newbuf) | |
119 | return (-1); | |
120 | ||
121 | p->buffer=newbuf; | |
122 | p->bufsize=l; | |
123 | return (0); | |
124 | } | |
125 | ||
126 | char *unicode_xconvert(const char *txt, const struct unicode_info *from, | |
127 | const struct unicode_info *to) | |
128 | { | |
129 | unicode_char *uc; | |
130 | char *s, *cur_conv; | |
131 | int dummy, dummy2; | |
132 | struct ux_buf dst_str; | |
133 | ||
134 | char *orig_str=strdup(txt); | |
135 | ||
136 | if (!orig_str) | |
137 | return (0); | |
138 | ||
139 | if (strcmp(from->chset, to->chset) == 0) /* NOOP */ | |
140 | return (orig_str); | |
141 | ||
142 | dst_str.bufsize=0; | |
143 | dst_str.buffer=0; | |
144 | ||
145 | if (ux_alloc(&dst_str, strlen(txt)*2)) | |
146 | { | |
147 | free(orig_str); | |
148 | return (NULL); | |
149 | } | |
150 | ||
151 | dst_str.buffer[0]=0; | |
152 | ||
153 | cur_conv=orig_str; | |
154 | ||
155 | while (*cur_conv) | |
156 | { | |
157 | size_t l; | |
158 | unicode_char *ucptr; | |
159 | ||
160 | l=strlen(cur_conv); | |
161 | ||
162 | if (from->flags & UNICODE_REPLACEABLE) | |
163 | { | |
164 | uc=(*from->c2u)(from, cur_conv, NULL); | |
165 | if (!uc) | |
166 | { | |
167 | free(orig_str); | |
168 | free(dst_str.buffer); | |
169 | return NULL; | |
170 | } | |
171 | } | |
172 | else | |
173 | uc=(*from->c2u)(from, cur_conv, &dummy); | |
174 | ||
175 | if (!uc) | |
176 | { | |
177 | char save_char; | |
178 | ||
179 | if (dummy < 0) | |
180 | { | |
181 | free(orig_str); | |
182 | free(dst_str.buffer); | |
183 | return (NULL); | |
184 | } | |
185 | ||
186 | /* Error converting original text to unicode. | |
187 | ** Back up, and convert all the characters up until | |
188 | ** the error character. | |
189 | */ | |
190 | ||
191 | l=dummy; | |
192 | ||
193 | save_char=cur_conv[dummy]; | |
194 | ||
195 | cur_conv[dummy]=0; | |
196 | ||
197 | uc=(*from->c2u)(from, cur_conv, &dummy2); | |
198 | cur_conv[dummy]=save_char; | |
199 | ||
200 | if (!uc) | |
201 | { | |
202 | free(orig_str); | |
203 | free(dst_str.buffer); | |
204 | return (NULL); | |
205 | } | |
206 | } | |
207 | ||
208 | /* Ok, now convert unicode to dest charset, using the same | |
209 | ** trial-and-error process. | |
210 | */ | |
211 | ||
212 | ucptr=uc; | |
213 | ||
214 | while (*ucptr) | |
215 | { | |
216 | size_t cnt_done; | |
217 | ||
218 | for (cnt_done=0; ucptr[cnt_done]; cnt_done++) | |
219 | ; | |
220 | ||
221 | if (to->flags & UNICODE_REPLACEABLE) | |
222 | { | |
223 | s=(*to->u2c)(to, ucptr, NULL); | |
224 | if (!s) | |
225 | { | |
226 | free(orig_str); | |
227 | free(dst_str.buffer); | |
228 | free(uc); | |
229 | return NULL; | |
230 | } | |
231 | } | |
232 | else | |
233 | s=(*to->u2c)(to, ucptr, &dummy); | |
234 | ||
235 | if (!s) | |
236 | { | |
237 | unicode_char save_char; | |
238 | ||
239 | if (dummy < 0) | |
240 | { | |
241 | free(orig_str); | |
242 | free(dst_str.buffer); | |
243 | free(uc); | |
244 | return (NULL); | |
245 | } | |
246 | ||
247 | cnt_done=dummy; | |
248 | ||
249 | save_char=ucptr[dummy]; | |
250 | ucptr[dummy]=0; | |
251 | s=(*to->u2c)(to, ucptr, &dummy2); | |
252 | ucptr[dummy]=save_char; | |
253 | ||
254 | if (!s) | |
255 | { | |
256 | free(orig_str); | |
257 | free(dst_str.buffer); | |
258 | free(uc); | |
259 | return (NULL); | |
260 | } | |
261 | } | |
262 | ||
263 | if (ux_alloc(&dst_str, | |
264 | strlen(dst_str.buffer)+strlen(s)+2)) | |
265 | { | |
266 | free(s); | |
267 | free(orig_str); | |
268 | free(dst_str.buffer); | |
269 | free(uc); | |
270 | return (NULL); | |
271 | } | |
272 | ||
273 | strcat(dst_str.buffer, s); | |
274 | free(s); | |
275 | ucptr += cnt_done; | |
276 | if (*ucptr) | |
277 | { | |
278 | strcat(dst_str.buffer, "."); | |
279 | ++ucptr; | |
280 | } | |
281 | } | |
282 | ||
283 | cur_conv += l; | |
284 | ||
285 | if (*cur_conv) | |
286 | { | |
287 | char buf[2]; | |
288 | ||
289 | if (ux_alloc(&dst_str, strlen(dst_str.buffer)+1)) | |
290 | { | |
291 | free(orig_str); | |
292 | free(dst_str.buffer); | |
293 | free(uc); | |
294 | return (NULL); | |
295 | } | |
296 | ||
297 | buf[0]= *cur_conv++; | |
298 | buf[1]=0; | |
299 | strcat(dst_str.buffer, buf); | |
300 | } | |
301 | free(uc); | |
302 | } | |
303 | ||
304 | free(orig_str); | |
305 | return (dst_str.buffer); | |
306 | } |