2 * EUC-JP <=> Unicode translate functions.
3 * by Hatuka*nezumi - IKEDA Soji <nezumi@jca.apc.org>
11 extern const unicode_char
* jisx0208_to_uni_tbls
[];
12 extern const unicode_char
* jisx0212_to_uni_tbls
[];
13 extern const unsigned* uni_to_jisx0208_tbls
[];
14 extern const unsigned* uni_to_jisx0212_tbls
[];
16 static unicode_char
*c2u(const struct unicode_info
*u
,
17 const char *eucjp_str
, int *err
)
20 unsigned char hi
=0, lo
=0;
27 len
= strlen(eucjp_str
);
28 uc
= (unicode_char
*)malloc((len
+1) * sizeof(unicode_char
) *2);
35 if((unsigned char)eucjp_str
[i
] < 0x80)
37 uc
[pos
++] = (unicode_char
)(eucjp_str
[i
]);
40 /* JIS X 0201 GR; SS2 */
41 else if ((unsigned char)eucjp_str
[i
] == 0x8e
42 && (unsigned char)eucjp_str
[i
+1] >= 0xa1
43 && (unsigned char)eucjp_str
[i
+1] <= 0xdf)
45 lo
= (unsigned char)eucjp_str
[i
+1];
50 uc
[pos
++] = (unicode_char
)(lo
+(unsigned)0xff40);
54 else if ((unsigned char)eucjp_str
[i
] == 0x8f
55 && (unsigned char)eucjp_str
[i
+1] >= 0xa1
56 && (unsigned char)eucjp_str
[i
+2] >= 0xa1)
58 hi
= (unsigned char)eucjp_str
[i
+1];
59 lo
= (unsigned char)eucjp_str
[i
+2];
65 if (jisx0212_to_uni_tbls
[hi
-0x21] != NULL
66 && jisx0212_to_uni_tbls
[hi
-0x21][lo
-0x21] != 0x003f)
67 uc
[pos
++] = jisx0212_to_uni_tbls
[hi
-0x21][lo
-0x21];
75 uc
[pos
++] = (unicode_char
)0xfffd;
79 else if ((unsigned char)eucjp_str
[i
] >= 0xa1
80 && (unsigned char)eucjp_str
[i
+1] >= 0xa1)
82 hi
= (unsigned char)eucjp_str
[i
];
83 lo
= (unsigned char)eucjp_str
[i
+1];
90 if (jisx0208_to_uni_tbls
[hi
-0x21] != NULL
91 && jisx0208_to_uni_tbls
[hi
-0x21][lo
-0x21] != 0x003f)
92 uc
[pos
++] = jisx0208_to_uni_tbls
[hi
-0x21][lo
-0x21];
101 uc
[pos
++] = (unicode_char
)0xfffd;
113 uc
[pos
++] = (unicode_char
)0xfffd;
122 static char *u2c(const struct unicode_info
*u
,
123 const unicode_char
*str
, int *err
)
134 s
= malloc((len
+1)*2);
139 for(i
=0; str
[i
]; i
++)
142 unsigned char hi
=0, lo
=0;
144 unsigned char str_i_high
=str
[i
] >> 8;
146 /* EUC-JP is mapped inside BMP range. */
147 if (str
[i
] >= (unicode_char
)0x10000)
158 else if (str
[i
] < (unicode_char
)0x0080)
160 /* For compatibility: 2 characters replaced by JIS X 0201 */
161 else if (str
[i
] == (unicode_char
)0x00A5) /* YEN SIGN */
163 else if (str
[i
] == (unicode_char
)0x203E) /* OVERLINE */
166 else if (str
[i
] >= (unicode_char
)0xff61
167 && str
[i
] <= (unicode_char
)0xff9f)
169 lo
= (unsigned char)(str
[i
] - (unsigned)0xff40);
172 s
[pos
++] = (char)0x8e;
176 else if (uni_to_jisx0208_tbls
[str_i_high
] != NULL
177 && uni_to_jisx0208_tbls
[str_i_high
][str
[i
] & 0xff] != 0x003F)
180 jis_char
= uni_to_jisx0208_tbls
[str_i_high
][str
[i
] & 0xff];
182 lo
= jis_char
& 0xff;
202 /* Otherwise, search on JIS X 0212 */
203 else if (uni_to_jisx0212_tbls
[str_i_high
] != NULL
204 && uni_to_jisx0212_tbls
[str_i_high
][str
[i
] & 0xff] != 0x003F)
207 jis_char
= uni_to_jisx0212_tbls
[str_i_high
][str
[i
] & 0xff];
209 lo
= jis_char
& 0xff;
216 s
[pos
++] = (char)0x8f;
244 static char *toupper_func(const struct unicode_info
*u
,
245 const char *cp
, int *ip
)
247 unicode_char
*uc
= c2u(u
, cp
, ip
);
254 for (i
=0; uc
[i
] && i
<10000; i
++) {
255 if ((unicode_char
)'a' <= uc
[i
] && uc
[i
] <= (unicode_char
)'z')
256 uc
[i
] = uc
[i
] - ((unicode_char
)'a' - (unicode_char
)'A');
259 s
= u2c(u
, uc
, NULL
);
264 static char *tolower_func(const struct unicode_info
*u
,
265 const char *cp
, int *ip
)
267 unicode_char
*uc
= c2u(u
, cp
, ip
);
274 for (i
=0; uc
[i
]; i
++) {
275 if ((unicode_char
)'A' <= uc
[i
] && uc
[i
] <= (unicode_char
)'Z')
276 uc
[i
] = uc
[i
] + ((unicode_char
)'a' - (unicode_char
)'A');
279 s
= u2c(u
, uc
, NULL
);
286 static char *totitle_func(const struct unicode_info
*u
,
287 const char *cp
, int *ip
)
289 unicode_char
*uc
= c2u(u
, cp
, ip
);
295 /* Uh, sorry, what's "title" char? */
297 * for (i=0; uc[i]; i++)
298 * uc[i] = unicode_tc(uc[i]);
301 s
= u2c(u
, uc
, NULL
);
306 extern const struct unicode_info unicode_UTF8
;
308 const struct unicode_info unicode_EUC_JP
= {
310 UNICODE_MB
| UNICODE_REPLACEABLE
| UNICODE_USASCII
|
311 UNICODE_HEADER_BASE64
| UNICODE_BODY_BASE64
,