Imported Upstream version 0.63.0
[hcoop/debian/courier-authlib.git] / unicode / eucjp.c
1 /*
2 * EUC-JP <=> Unicode translate functions.
3 * by Hatuka*nezumi - IKEDA Soji <nezumi@jca.apc.org>
4 *
5 */
6
7 #include <stdio.h>
8 #include <string.h>
9 #include "unicode.h"
10
11 extern const unicode_char* jisx0208_to_uni_tbls[];
12 extern const unicode_char* jisx0212_to_uni_tbls[];
13 extern const unsigned* uni_to_jisx0208_tbls[];
14 extern const unsigned* uni_to_jisx0212_tbls[];
15
16 static unicode_char *c2u(const struct unicode_info *u,
17 const char *eucjp_str, int *err)
18 {
19 unicode_char *uc=0;
20 unsigned char hi=0, lo=0;
21 int len=0;
22 int i=0;
23 int pos=0;
24
25 if(err) *err = -1;
26
27 len = strlen(eucjp_str);
28 uc = (unicode_char*)malloc((len+1) * sizeof(unicode_char) *2);
29
30 if (!uc)
31 return NULL;
32
33 for(i=0; i<len;) {
34 /* US-ASCII */
35 if((unsigned char)eucjp_str[i] < 0x80)
36 {
37 uc[pos++] = (unicode_char)(eucjp_str[i]);
38 i++;
39 }
40 /* JIS X 0201 GR; SS2 */
41 else if ((unsigned char)eucjp_str[i] == 0x8e
42 && (unsigned char)eucjp_str[i+1] >= 0xa1
43 && (unsigned char)eucjp_str[i+1] <= 0xdf)
44 {
45 lo = (unsigned char)eucjp_str[i+1];
46
47 /* EUCJP -> JIS */
48 lo -= 0x80;
49
50 uc[pos++] = (unicode_char)(lo+(unsigned)0xff40);
51 i+=2;
52 }
53 /* JIS X 0212; SS3 */
54 else if ((unsigned char)eucjp_str[i] == 0x8f
55 && (unsigned char)eucjp_str[i+1] >= 0xa1
56 && (unsigned char)eucjp_str[i+2] >= 0xa1)
57 {
58 hi = (unsigned char)eucjp_str[i+1];
59 lo = (unsigned char)eucjp_str[i+2];
60
61 /* EUCJP -> JIS */
62 hi -= 0x80;
63 lo -= 0x80;
64
65 if (jisx0212_to_uni_tbls[hi-0x21] != NULL
66 && jisx0212_to_uni_tbls[hi-0x21][lo-0x21] != 0x003f)
67 uc[pos++] = jisx0212_to_uni_tbls[hi-0x21][lo-0x21];
68 else if (err)
69 {
70 *err = i;
71 free(uc);
72 return NULL;
73 }
74 else
75 uc[pos++] = (unicode_char)0xfffd;
76 i+=3;
77 }
78 /* JIS X 0208 */
79 else if ((unsigned char)eucjp_str[i] >= 0xa1
80 && (unsigned char)eucjp_str[i+1] >= 0xa1)
81 {
82 hi = (unsigned char)eucjp_str[i];
83 lo = (unsigned char)eucjp_str[i+1];
84
85 /* EUCJP -> JIS */
86 hi -= 0x80;
87 lo -= 0x80;
88
89 /* JIS -> Unicode */
90 if (jisx0208_to_uni_tbls[hi-0x21] != NULL
91 && jisx0208_to_uni_tbls[hi-0x21][lo-0x21] != 0x003f)
92 uc[pos++] = jisx0208_to_uni_tbls[hi-0x21][lo-0x21];
93
94 else if (err)
95 {
96 *err = i;
97 free(uc);
98 return NULL;
99 }
100 else
101 uc[pos++] = (unicode_char)0xfffd;
102 i+=2;
103 }
104 /* Not found */
105 else if (err)
106 {
107 *err = i;
108 free(uc);
109 return NULL;
110 }
111 else
112 {
113 uc[pos++] = (unicode_char)0xfffd;
114 i++;
115 }
116 }
117 uc[pos++] = 0;
118
119 return uc;
120 }
121
122 static char *u2c(const struct unicode_info *u,
123 const unicode_char *str, int *err)
124 {
125 int i=0;
126 int pos=0;
127 int len=0;
128 char* s;
129
130 if(err) *err = -1;
131
132 while(str[len])
133 len++;
134 s = malloc((len+1)*2);
135
136 if (!s)
137 return NULL;
138
139 for(i=0; str[i]; i++)
140 {
141 int jis_char = 0;
142 unsigned char hi=0, lo=0;
143
144 unsigned char str_i_high=str[i] >> 8;
145
146 /* EUC-JP is mapped inside BMP range. */
147 if (str[i] >= (unicode_char)0x10000)
148 {
149 if (err)
150 {
151 *err = i;
152 free(s);
153 return NULL;
154 }
155 s[pos++] = '?';
156 }
157 /* US-ASCII */
158 else if (str[i] < (unicode_char)0x0080)
159 s[pos++] = str[i];
160 /* For compatibility: 2 characters replaced by JIS X 0201 */
161 else if (str[i] == (unicode_char)0x00A5) /* YEN SIGN */
162 s[pos++] = 0x5C;
163 else if (str[i] == (unicode_char)0x203E) /* OVERLINE */
164 s[pos++] = 0x7E;
165 /* JIS X 0201 GR */
166 else if (str[i] >= (unicode_char)0xff61
167 && str[i] <= (unicode_char)0xff9f)
168 {
169 lo = (unsigned char)(str[i] - (unsigned)0xff40);
170 /* JIS -> EUCJP */
171 lo += 0x80;
172 s[pos++] = (char)0x8e;
173 s[pos++] = lo;
174 }
175 /* JIS X 0208 */
176 else if (uni_to_jisx0208_tbls[str_i_high] != NULL
177 && uni_to_jisx0208_tbls[str_i_high][str[i] & 0xff] != 0x003F)
178 {
179 /* Unicode -> JIS */
180 jis_char = uni_to_jisx0208_tbls[str_i_high][str[i] & 0xff];
181 hi = jis_char >> 8;
182 lo = jis_char & 0xff;
183
184 if (hi)
185 {
186 /* JIS -> EUCJP */
187 hi += 0x80;
188 lo += 0x80;
189
190 s[pos++] = hi;
191 s[pos++] = lo;
192 }
193 else if (err)
194 {
195 *err = i;
196 free(s);
197 return NULL;
198 }
199 else
200 s[pos++] = '?';
201 }
202 /* Otherwise, search on JIS X 0212 */
203 else if (uni_to_jisx0212_tbls[str_i_high] != NULL
204 && uni_to_jisx0212_tbls[str_i_high][str[i] & 0xff] != 0x003F)
205 {
206 /* Unicode -> JIS */
207 jis_char = uni_to_jisx0212_tbls[str_i_high][str[i] & 0xff];
208 hi = jis_char >> 8;
209 lo = jis_char & 0xff;
210
211 if (hi) {
212 /* JIS -> EUCJP */
213 hi += 0x80;
214 lo += 0x80;
215
216 s[pos++] = (char)0x8f;
217 s[pos++] = hi;
218 s[pos++] = lo;
219 }
220 else if (err)
221 {
222 *err = i;
223 free(s);
224 return NULL;
225 }
226 else
227 s[pos++] = '?';
228 }
229 /* Not found */
230 else if (err)
231 {
232 *err = i;
233 free(s);
234 return NULL;
235 }
236 else
237 s[pos++] = '?';
238 }
239 s[pos] = 0;
240
241 return s;
242 }
243
244 static char *toupper_func(const struct unicode_info *u,
245 const char *cp, int *ip)
246 {
247 unicode_char *uc = c2u(u, cp, ip);
248 char *s;
249 size_t i;
250
251 if (!uc)
252 return (NULL);
253
254 for (i=0; uc[i] && i<10000; i++) {
255 if ((unicode_char)'a' <= uc[i] && uc[i] <= (unicode_char)'z')
256 uc[i] = uc[i] - ((unicode_char)'a' - (unicode_char)'A');
257 }
258
259 s = u2c(u, uc, NULL);
260 free(uc);
261 return (s);
262 }
263
264 static char *tolower_func(const struct unicode_info *u,
265 const char *cp, int *ip)
266 {
267 unicode_char *uc = c2u(u, cp, ip);
268 char *s;
269 size_t i;
270
271 if (!uc)
272 return (NULL);
273
274 for (i=0; uc[i]; i++) {
275 if ((unicode_char)'A' <= uc[i] && uc[i] <= (unicode_char)'Z')
276 uc[i] = uc[i] + ((unicode_char)'a' - (unicode_char)'A');
277 }
278
279 s = u2c(u, uc, NULL);
280 free(uc);
281
282 return (s);
283 }
284
285
286 static char *totitle_func(const struct unicode_info *u,
287 const char *cp, int *ip)
288 {
289 unicode_char *uc = c2u(u, cp, ip);
290 char *s;
291
292 if (!uc)
293 return (NULL);
294
295 /* Uh, sorry, what's "title" char? */
296 /*
297 * for (i=0; uc[i]; i++)
298 * uc[i] = unicode_tc(uc[i]);
299 */
300
301 s = u2c(u, uc, NULL);
302 free(uc);
303 return (s);
304 }
305
306 extern const struct unicode_info unicode_UTF8;
307
308 const struct unicode_info unicode_EUC_JP = {
309 "EUC-JP",
310 UNICODE_MB | UNICODE_REPLACEABLE | UNICODE_USASCII |
311 UNICODE_HEADER_BASE64 | UNICODE_BODY_BASE64,
312 c2u,
313 u2c,
314 toupper_func,
315 tolower_func,
316 totitle_func,
317 &unicode_UTF8
318 };
319