2 * Shift_JIS <=> Unicode translate functions.
3 * by Yu Kobayashi <mail@yukoba.jp>
4 * Modification for JIS X 0208:1997 Annex 1 implementation
5 * by Hatuka*nezumi - IKEDA Soji <nezumi@jca.apc.org>
15 extern const unicode_char
* jisx0208_to_uni_tbls
[];
16 extern const unsigned* uni_to_jisx0208_tbls
[];
18 static unicode_char
*c2u(const struct unicode_info
*u
,
19 const char *sjis_str
, int *err
)
22 unsigned char hi
=0, lo
=0;
29 len
= strlen(sjis_str
);
30 uc
= (unicode_char
*)malloc((len
+1) * sizeof(unicode_char
) *2);
36 /* 2 Characters replaced by JIS X 0201 */
37 if (sjis_str
[i
] == 0x5C) /* YEN SIGN */
39 uc
[pos
++] = (unicode_char
)0x00A5;
42 else if (sjis_str
[i
] == 0x7E) /* OVERLINE */
44 uc
[pos
++] = (unicode_char
)0x203E;
47 /* Other JIS X 0201 GL */
48 else if ((unsigned)sjis_str
[i
] < 0x80)
50 uc
[pos
++] = (unicode_char
)sjis_str
[i
];
54 else if ((unsigned char)sjis_str
[i
] >= 0xa1
55 && (unsigned char)sjis_str
[i
] <= 0xdf)
57 lo
= (unsigned char)sjis_str
[i
];
59 /* SHIFT_JIS -> JIS */
62 uc
[pos
++] = (unicode_char
)(lo
+(unsigned)0xff40);
65 /* 2 byte characters */
66 else if ((((unsigned char)sjis_str
[i
] >= 0x81
67 && (unsigned char)sjis_str
[i
] <= 0x9F)
68 || ((unsigned char)sjis_str
[i
] >= 0xE0
69 && (unsigned char)sjis_str
[i
] <= 0xEF))
70 && (((unsigned char)sjis_str
[i
+1] >= 0x40
71 && (unsigned char)sjis_str
[i
+1] <= 0x7E)
72 || ((unsigned char)sjis_str
[i
+1] >= 0x80
73 && (unsigned char)sjis_str
[i
+1] <= 0xFC)))
75 hi
= (unsigned char)sjis_str
[i
];
76 lo
= (unsigned char)sjis_str
[i
+1];
106 if (jisx0208_to_uni_tbls
[hi
-0x21] != NULL
107 && jisx0208_to_uni_tbls
[hi
-0x21][lo
-0x21] !=
108 (unicode_char
)0x003F)
109 uc
[pos
++] = jisx0208_to_uni_tbls
[hi
-0x21][lo
-0x21];
117 uc
[pos
++] = (unicode_char
)0xFFFD;
129 uc
[pos
++] = (unicode_char
)0xFFFD;
138 static char *u2c(const struct unicode_info
*u
,
139 const unicode_char
*str
, int *err
)
150 s
= malloc((len
+1)*2);
155 for(i
=0; str
[i
]; i
++) {
157 unsigned char hi
=0, lo
=0;
159 unsigned char str_i_high
=str
[i
] >> 8;
161 /* SHIFT_JIS is mapped inside BMP range */
162 if (str
[i
] >= (unicode_char
)0x10000)
172 /* JIS X 0201 GL or US-ASCII */
173 else if (str
[i
] < (unicode_char
)0x0080)
174 s
[pos
++] = (char)str
[i
];
175 /* 2 characters replaced by JIS X 0201 */
176 else if (str
[i
] == 0x00A5) /* YEN SIGN */
177 s
[pos
++] = (char)0x5C;
178 else if (str
[i
] == 0x203E) /* OVERLINE */
179 s
[pos
++] = (char)0x7E;
181 else if (str
[i
] >= (unicode_char
)0xff61
182 && str
[i
] <= (unicode_char
)0xff9f)
184 lo
= (unsigned char)(str
[i
] - (unsigned)0xff40);
185 /* JIS -> SHIFT_JIS */
190 else if (uni_to_jisx0208_tbls
[str_i_high
] == NULL
191 || uni_to_jisx0208_tbls
[str_i_high
][str
[i
] & 0xff] == '?')
201 /* 2 byte characters */
204 jis_char
= uni_to_jisx0208_tbls
[str_i_high
][str
[i
] & 0xff];
206 lo
= jis_char
& 0xff;
209 if( ( hi
% 2 ) == 0 )
235 static char *toupper_func(const struct unicode_info
*u
,
236 const char *cp
, int *ip
)
238 unicode_char
*uc
= c2u(u
, cp
, ip
);
245 for (i
=0; uc
[i
] && i
<10000; i
++) {
246 if ((unicode_char
)'a' <= uc
[i
] && uc
[i
] <= (unicode_char
)'z')
247 uc
[i
] = uc
[i
] - ((unicode_char
)'a' - (unicode_char
)'A');
250 s
= u2c(u
, uc
, NULL
);
255 static char *tolower_func(const struct unicode_info
*u
,
256 const char *cp
, int *ip
)
258 unicode_char
*uc
= c2u(u
, cp
, ip
);
265 for (i
=0; uc
[i
]; i
++) {
266 if ((unicode_char
)'A' <= uc
[i
] && uc
[i
] <= (unicode_char
)'Z')
267 uc
[i
] = uc
[i
] + ((unicode_char
)'a' - (unicode_char
)'A');
270 s
= u2c(u
, uc
, NULL
);
277 static char *totitle_func(const struct unicode_info
*u
,
278 const char *cp
, int *ip
)
280 unicode_char
*uc
= c2u(u
, cp
, ip
);
286 /* Uh, sorry, what's "title" char? */
288 * for (i=0; uc[i]; i++)
289 * uc[i] = unicode_tc(uc[i]);
292 s
= u2c(u
, uc
, NULL
);
297 extern const struct unicode_info unicode_UTF8
;
299 const struct unicode_info unicode_SHIFT_JIS
= {
301 UNICODE_MB
| UNICODE_REPLACEABLE
|
302 UNICODE_HEADER_BASE64
| UNICODE_BODY_BASE64
,