Imported Upstream version 0.63.0
[hcoop/debian/courier-authlib.git] / unicode / shiftjis.c
CommitLineData
8d138742
CE
1/*
2 * Shift_JIS <=> Unicode translate functions.
3 * by Yu Kobayashi <mail@yukoba.jp>
4 * Modification for JIS X 0208:1997 Annex 1 implementation
5 * by Hatuka*nezumi - IKEDA Soji <nezumi@jca.apc.org>
6 *
7 */
8
9#include <stdio.h>
10#include <string.h>
11#include "unicode.h"
12
13#define SJIS_DEBUG 0
14
15extern const unicode_char* jisx0208_to_uni_tbls[];
16extern const unsigned* uni_to_jisx0208_tbls[];
17
18static unicode_char *c2u(const struct unicode_info *u,
19 const char *sjis_str, int *err)
20{
21 unicode_char *uc=0;
22 unsigned char hi=0, lo=0;
23 int len=0;
24 int i=0;
25 int pos=0;
26
27 if(err) *err = -1;
28
29 len = strlen(sjis_str);
30 uc = (unicode_char*)malloc((len+1) * sizeof(unicode_char) *2);
31
32 if (!uc)
33 return NULL;
34
35 for(i=0; i<len;) {
36 /* 2 Characters replaced by JIS X 0201 */
37 if (sjis_str[i] == 0x5C) /* YEN SIGN */
38 {
39 uc[pos++] = (unicode_char)0x00A5;
40 i++;
41 }
42 else if (sjis_str[i] == 0x7E) /* OVERLINE */
43 {
44 uc[pos++] = (unicode_char)0x203E;
45 i++;
46 }
47 /* Other JIS X 0201 GL */
48 else if ((unsigned)sjis_str[i] < 0x80)
49 {
50 uc[pos++] = (unicode_char)sjis_str[i];
51 i++;
52 }
53 /* JIS X 0201 GR */
54 else if ((unsigned char)sjis_str[i] >= 0xa1
55 && (unsigned char)sjis_str[i] <= 0xdf)
56 {
57 lo = (unsigned char)sjis_str[i];
58
59 /* SHIFT_JIS -> JIS */
60 lo -= 0x80;
61
62 uc[pos++] = (unicode_char)(lo+(unsigned)0xff40);
63 i++;
64 }
65 /* 2 byte characters */
66 else if ((((unsigned char)sjis_str[i] >= 0x81
67 && (unsigned char)sjis_str[i] <= 0x9F)
68 || ((unsigned char)sjis_str[i] >= 0xE0
69 && (unsigned char)sjis_str[i] <= 0xEF))
70 && (((unsigned char)sjis_str[i+1] >= 0x40
71 && (unsigned char)sjis_str[i+1] <= 0x7E)
72 || ((unsigned char)sjis_str[i+1] >= 0x80
73 && (unsigned char)sjis_str[i+1] <= 0xFC)))
74 {
75 hi = (unsigned char)sjis_str[i];
76 lo = (unsigned char)sjis_str[i+1];
77
78 /* SJIS -> JIS */
79 if( lo < 0x9f ) {
80 if( hi < 0xa0 ) {
81 hi -= 0x81;
82 hi *= 2;
83 hi += 0x21;
84 } else {
85 hi -= 0xe0;
86 hi *= 2;
87 hi += 0x5f;
88 }
89 if( lo > 0x7f )
90 --lo;
91 lo -= 0x1f;
92 } else {
93 if( hi < 0xa0 ) {
94 hi -= 0x81;
95 hi *= 2;
96 hi += 0x22;
97 } else {
98 hi -= 0xe0;
99 hi *= 2;
100 hi += 0x60;
101 }
102 lo -= 0x7e;
103 }
104
105 /* JIS -> Unicode */
106 if (jisx0208_to_uni_tbls[hi-0x21] != NULL
107 && jisx0208_to_uni_tbls[hi-0x21][lo-0x21] !=
108 (unicode_char)0x003F)
109 uc[pos++] = jisx0208_to_uni_tbls[hi-0x21][lo-0x21];
110 else if (err)
111 {
112 *err = i;
113 free(uc);
114 return NULL;
115 }
116 else
117 uc[pos++] = (unicode_char)0xFFFD;
118
119 i+=2;
120 }
121 else if (err)
122 {
123 *err = i;
124 free(uc);
125 return NULL;
126 }
127 else
128 {
129 uc[pos++] = (unicode_char)0xFFFD;
130 i++;
131 }
132 }
133 uc[pos++] = 0;
134
135 return uc;
136}
137
138static char *u2c(const struct unicode_info *u,
139 const unicode_char *str, int *err)
140{
141 int i=0;
142 int pos=0;
143 int len=0;
144 char* s;
145
146 if(err) *err = -1;
147
148 while(str[len])
149 len++;
150 s = malloc((len+1)*2);
151
152 if (!s)
153 return NULL;
154
155 for(i=0; str[i]; i++) {
156 int jis_char = 0;
157 unsigned char hi=0, lo=0;
158
159 unsigned char str_i_high=str[i] >> 8;
160
161 /* SHIFT_JIS is mapped inside BMP range */
162 if (str[i] >= (unicode_char)0x10000)
163 {
164 if (err)
165 {
166 *err = i;
167 free(s);
168 return NULL;
169 }
170 s[pos++] = '?';
171 }
172 /* JIS X 0201 GL or US-ASCII */
173 else if (str[i] < (unicode_char)0x0080)
174 s[pos++] = (char)str[i];
175 /* 2 characters replaced by JIS X 0201 */
176 else if (str[i] == 0x00A5) /* YEN SIGN */
177 s[pos++] = (char)0x5C;
178 else if (str[i] == 0x203E) /* OVERLINE */
179 s[pos++] = (char)0x7E;
180 /* JIS X 0201 GR */
181 else if (str[i] >= (unicode_char)0xff61
182 && str[i] <= (unicode_char)0xff9f)
183 {
184 lo = (unsigned char)(str[i] - (unsigned)0xff40);
185 /* JIS -> SHIFT_JIS */
186 lo += 0x80;
187 s[pos++] = lo;
188 }
189 /* Not found */
190 else if (uni_to_jisx0208_tbls[str_i_high] == NULL
191 || uni_to_jisx0208_tbls[str_i_high][str[i] & 0xff] == '?')
192 {
193 if (err)
194 {
195 *err = i;
196 free(s);
197 return NULL;
198 }
199 s[pos++] = '?';
200 }
201 /* 2 byte characters */
202 else
203 {
204 jis_char = uni_to_jisx0208_tbls[str_i_high][str[i] & 0xff];
205 hi = jis_char >> 8;
206 lo = jis_char & 0xff;
207
208 /* JIS -> SJIS */
209 if( ( hi % 2 ) == 0 )
210 lo += 0x7d;
211 else
212 lo += 0x1f;
213
214 if( lo > 0x7e )
215 ++ lo;
216
217 if( hi < 0x5f ) {
218 ++hi;
219 hi /= 2;
220 hi += 0x70;
221 } else {
222 ++hi;
223 hi /= 2;
224 hi += 0xb0;
225 }
226 s[pos++] = hi;
227 s[pos++] = lo;
228 }
229 }
230 s[pos] = 0;
231
232 return s;
233}
234
235static char *toupper_func(const struct unicode_info *u,
236 const char *cp, int *ip)
237{
238 unicode_char *uc = c2u(u, cp, ip);
239 char *s;
240 size_t i;
241
242 if (!uc)
243 return (NULL);
244
245 for (i=0; uc[i] && i<10000; i++) {
246 if ((unicode_char)'a' <= uc[i] && uc[i] <= (unicode_char)'z')
247 uc[i] = uc[i] - ((unicode_char)'a' - (unicode_char)'A');
248 }
249
250 s = u2c(u, uc, NULL);
251 free(uc);
252 return (s);
253}
254
255static char *tolower_func(const struct unicode_info *u,
256 const char *cp, int *ip)
257{
258 unicode_char *uc = c2u(u, cp, ip);
259 char *s;
260 size_t i;
261
262 if (!uc)
263 return (NULL);
264
265 for (i=0; uc[i]; i++) {
266 if ((unicode_char)'A' <= uc[i] && uc[i] <= (unicode_char)'Z')
267 uc[i] = uc[i] + ((unicode_char)'a' - (unicode_char)'A');
268 }
269
270 s = u2c(u, uc, NULL);
271 free(uc);
272
273 return (s);
274}
275
276
277static char *totitle_func(const struct unicode_info *u,
278 const char *cp, int *ip)
279{
280 unicode_char *uc = c2u(u, cp, ip);
281 char *s;
282
283 if (!uc)
284 return (NULL);
285
286 /* Uh, sorry, what's "title" char? */
287 /*
288 * for (i=0; uc[i]; i++)
289 * uc[i] = unicode_tc(uc[i]);
290 */
291
292 s = u2c(u, uc, NULL);
293 free(uc);
294 return (s);
295}
296
297extern const struct unicode_info unicode_UTF8;
298
299const struct unicode_info unicode_SHIFT_JIS = {
300 "SHIFT_JIS",
301 UNICODE_MB | UNICODE_REPLACEABLE |
302 UNICODE_HEADER_BASE64 | UNICODE_BODY_BASE64,
303 c2u,
304 u2c,
305 toupper_func,
306 tolower_func,
307 totitle_func,
308 &unicode_UTF8
309};
310