| 1 | /* |
| 2 | * ISO-2022-JP <=> Unicode translate functions. |
| 3 | * by Norihisa Washitake <nori@washitake.com> |
| 4 | * US-ASCII/JIS X 0201/JIS X 0212 support |
| 5 | * by Hatuka*nezumi - IKEDA Soji <nezumi@jca.apc.org> |
| 6 | * |
| 7 | * $Id: iso2022jp.c,v 1.12 2004/05/23 14:28:24 mrsam Exp $ |
| 8 | * |
| 9 | * This conversion is highly expensive, so it is recommended |
| 10 | * that you do not include iso-2022-jp supprt unless you need it. |
| 11 | */ |
| 12 | |
| 13 | /* |
| 14 | * Debug Option. |
| 15 | * if you want to make iso2022jp test application, |
| 16 | * please set the value of _DEBUG to non-zero. |
| 17 | */ |
| 18 | #define JIS_DEBUG 0 |
| 19 | /* #define JIS_BUILD_APP */ |
| 20 | |
| 21 | #include "iso2022jp.h" |
| 22 | |
| 23 | #if (JIS_DEBUG) > 0 |
| 24 | #ifdef JIS_BUILD_APP |
| 25 | #define JIS_OUT fprintf |
| 26 | #define JIS_OUT_FH stderr |
| 27 | #else |
| 28 | #include <syslog.h> |
| 29 | #define JIS_OUT syslog |
| 30 | #define JIS_OUT_FH (LOG_MAIL|LOG_DEBUG) |
| 31 | #endif |
| 32 | #endif |
| 33 | |
| 34 | |
| 35 | /* |
| 36 | * read_jis_char. |
| 37 | * -- from my second kanji conversion library in 2001. -- |
| 38 | * Arguments: |
| 39 | * src: text in iso-2022-jp. |
| 40 | * ch: character info of each character. |
| 41 | * Returns: |
| 42 | * characters to be skipped in original text. |
| 43 | * this value is at least 1. |
| 44 | */ |
| 45 | |
| 46 | static size_t read_jis_char(const char* src, struct jischar_t *ch) |
| 47 | { |
| 48 | /* |
| 49 | * In most cases, JIS characters are grouped in 0x20 |
| 50 | * characters. So we switch by value of src[0]/0x20. |
| 51 | */ |
| 52 | switch (src[0] >> 5) { |
| 53 | case 0: /* 0x00 to 0x1F */ |
| 54 | switch (src[0]) { |
| 55 | case JIS_CHAR_SI: |
| 56 | ch->type = JIS_TYPE_8BITKANA; |
| 57 | ch->value = 0; |
| 58 | return 1; |
| 59 | case JIS_CHAR_SO: |
| 60 | ch->type = JIS_TYPE_ASCII; |
| 61 | ch->value = 0; |
| 62 | return 1; |
| 63 | case JIS_CHAR_ESC: |
| 64 | ch->value = 0; |
| 65 | switch (src[1]) { |
| 66 | case '(': /* 94 character set (G0) */ |
| 67 | switch (src[2]) { |
| 68 | case 'B': /* US-ASCII */ |
| 69 | ch->type = JIS_TYPE_ASCII; |
| 70 | return 3; |
| 71 | case 'I': /* JIS X 0201 GR */ |
| 72 | ch->type = JIS_TYPE_7BITKANA; |
| 73 | return 3; |
| 74 | case 'J': /* JIS X 0201 GL */ |
| 75 | ch->type = JIS_TYPE_ROMAN; |
| 76 | return 3; |
| 77 | default: |
| 78 | ch->type = JIS_TYPE_ASCII; |
| 79 | ch->value = JIS_CHAR_ESC; |
| 80 | return 1; |
| 81 | } |
| 82 | case '$': /* 94/96n character set */ |
| 83 | switch (src[2]) { |
| 84 | case '@': /* JIS C 6226:1978 */ |
| 85 | ch->type = JIS_TYPE_JISX0208_1978; |
| 86 | return 3; |
| 87 | case 'B': /* JIS X 0208:1983/1990/1997 */ |
| 88 | ch->type = JIS_TYPE_JISX0208; |
| 89 | return 3; |
| 90 | case '(': |
| 91 | switch (src[3]) { |
| 92 | case '@': /* JIS C 6226:1978 */ |
| 93 | ch->type = JIS_TYPE_JISX0208_1978; |
| 94 | return 4; |
| 95 | case 'B': /* JIS X 0208:1983/1990/1997 */ |
| 96 | ch->type = JIS_TYPE_JISX0208; |
| 97 | return 4; |
| 98 | case 'D': /* JIS X 0212:1990 */ |
| 99 | ch->type = JIS_TYPE_JISX0212; |
| 100 | return 4; |
| 101 | default: |
| 102 | ch->type = JIS_TYPE_BINARY; |
| 103 | ch->value = JIS_CHAR_ESC; |
| 104 | return 1; |
| 105 | } |
| 106 | default: |
| 107 | ch->type = JIS_TYPE_BINARY; |
| 108 | ch->value = JIS_CHAR_ESC; |
| 109 | return 1; |
| 110 | } |
| 111 | case 'K': /* NEC KANJI (IN) */ |
| 112 | ch->type = JIS_TYPE_JISX0208_1978; |
| 113 | return 1; |
| 114 | case 'H': /* NEC KANJI (OUT) */ |
| 115 | ch->type = JIS_TYPE_ASCII; |
| 116 | return 1; |
| 117 | } |
| 118 | default: |
| 119 | ch->type = JIS_TYPE_BINARY; |
| 120 | ch->value = src[0]; |
| 121 | return 1; |
| 122 | } |
| 123 | case 1: /* 0x20 to 0x3F */ |
| 124 | case 2: /* 0x40 to 0x5F */ |
| 125 | if (ch->type == JIS_TYPE_7BITKANA) { |
| 126 | ch->value = src[0] + 0x80; |
| 127 | return 1; |
| 128 | } |
| 129 | /* Other than 7bit kana are passed to next */ |
| 130 | case 3: /* 0x60 to 0x7F */ |
| 131 | if (src[0] == 0x7F) { |
| 132 | ch->type = JIS_TYPE_BINARY; |
| 133 | ch->value = src[0]; |
| 134 | return 1; |
| 135 | } |
| 136 | if ((ch->type == JIS_TYPE_JISX0208 |
| 137 | || ch->type == JIS_TYPE_JISX0208_1978 |
| 138 | || ch->type == JIS_TYPE_JISX0212) && src[1]) { |
| 139 | ch->value = (src[0] * 0x100) + src[1]; |
| 140 | return 2; |
| 141 | } |
| 142 | ch->value = src[0]; |
| 143 | return 1; |
| 144 | case 4: /* 0x80 to 0x9F */ |
| 145 | ch->value = src[0]; |
| 146 | ch->type = JIS_TYPE_BINARY; |
| 147 | return 1; |
| 148 | case 5: /* 0xA0 to 0xBF */ |
| 149 | case 6: /* 0xC0 to 0xDF */ |
| 150 | if (ch->type == JIS_TYPE_8BITKANA) { |
| 151 | if (0xA0 < (unsigned)src[0] && (unsigned)src[0] <= 0xDF) { |
| 152 | ch->value = (unsigned char)src[0]; |
| 153 | return 1; |
| 154 | } |
| 155 | } |
| 156 | ch->type = JIS_TYPE_BINARY; |
| 157 | ch->value = (unsigned char)src[0]; |
| 158 | return 1; |
| 159 | case 7: /* 0xE0 to 0xFF */ |
| 160 | ch->value = (unsigned char)src[0]; |
| 161 | ch->type = JIS_TYPE_BINARY; |
| 162 | return 1; |
| 163 | default: |
| 164 | ch->value = (unsigned char)src[0]; |
| 165 | ch->type = JIS_TYPE_BINARY; |
| 166 | return 1; |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | static unicode_char c2u_conv(int j, int jis_type) |
| 171 | { |
| 172 | unsigned int upper = (j >> 8); |
| 173 | unsigned int lower = j & 0xFF; |
| 174 | const unicode_char **tbls; |
| 175 | |
| 176 | if (!upper) |
| 177 | { |
| 178 | switch (jis_type) |
| 179 | { |
| 180 | /* JIS X 0201 GR */ |
| 181 | case JIS_TYPE_7BITKANA: |
| 182 | case JIS_TYPE_8BITKANA: |
| 183 | if (0xA1 <= lower && lower <=0xDF) |
| 184 | return (unicode_char)(lower + (0xFF9F - 0xDF)); |
| 185 | else |
| 186 | return (unicode_char)0xFFFD; |
| 187 | break; |
| 188 | |
| 189 | /* JIS X 0201 GL */ |
| 190 | case JIS_TYPE_ROMAN: |
| 191 | /* 2 characters replaced by JIS X 0201 */ |
| 192 | if (lower == 0x5C) /* REVERSE SOLIDUS -> YEN SIGN */ |
| 193 | return (unicode_char)0x00A5; |
| 194 | if (lower == 0x7E) /* TILDE -> OVERLINE */ |
| 195 | return (unicode_char)0x203E; |
| 196 | /* break; */ |
| 197 | /* US-ASCII or Control characters */ |
| 198 | case JIS_TYPE_ASCII: |
| 199 | case JIS_TYPE_BINARY: |
| 200 | if (lower < 0x80) |
| 201 | return (unicode_char)lower; |
| 202 | else |
| 203 | return (unicode_char)0xFFFD; |
| 204 | break; |
| 205 | |
| 206 | /* Otherwise return REPLACEMENT CHARACTER. */ |
| 207 | default: |
| 208 | return (unicode_char)0xFFFD; |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | switch (jis_type) |
| 213 | { |
| 214 | /* JIS X 0208:1983/1990/1997 */ |
| 215 | case JIS_TYPE_JISX0208: |
| 216 | tbls = jisx0208_to_uni_tbls; |
| 217 | break; |
| 218 | |
| 219 | /* JIS C 6226:1978 */ |
| 220 | case JIS_TYPE_JISX0208_1978: |
| 221 | tbls = jisx0208_1978_to_uni_tbls; |
| 222 | break; |
| 223 | |
| 224 | /* JIS X 0212:1990 */ |
| 225 | case JIS_TYPE_JISX0212: |
| 226 | tbls = jisx0212_to_uni_tbls; |
| 227 | break; |
| 228 | |
| 229 | /* Otherwise return REPLACEMENT CHARACTER. */ |
| 230 | default: |
| 231 | return (unicode_char)0xFFFD; |
| 232 | break; |
| 233 | } |
| 234 | |
| 235 | if (0x20 < upper && upper < 0x7F |
| 236 | && 0x20 < lower && lower < 0x7F) |
| 237 | { |
| 238 | if (tbls[upper-0x21] != NULL |
| 239 | && tbls[upper-0x21][lower-0x21] != (unicode_char)0x003F) |
| 240 | { |
| 241 | if (tbls[upper-0x21][lower-0x21]) |
| 242 | return tbls[upper-0x21][lower-0x21]; |
| 243 | return (unicode_char)0xFFFD; |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | /* we should think of 8bit-JIS, maybe. */ |
| 248 | /* but currently returns the REPLACEMENT CHARACTER. */ |
| 249 | return (unicode_char)0xFFFD; |
| 250 | } |
| 251 | |
| 252 | static unicode_char *c2u(const struct unicode_info *u, |
| 253 | const char *jis_str, int *err) |
| 254 | { |
| 255 | size_t i, cnt, w; |
| 256 | unicode_char *uc; |
| 257 | struct jischar_t jchar; |
| 258 | |
| 259 | if (err) |
| 260 | *err = -1; |
| 261 | |
| 262 | /* Count the number of potential unicode characters first. */ |
| 263 | i = cnt = 0; |
| 264 | jchar.type = 0; |
| 265 | jchar.value = 0; |
| 266 | while (jis_str[i]) { |
| 267 | i += read_jis_char(&jis_str[i], &jchar); |
| 268 | if (jchar.value) |
| 269 | ++cnt; |
| 270 | } |
| 271 | |
| 272 | uc = malloc((cnt+1) * sizeof(unicode_char)); |
| 273 | #if JIS_DEBUG>0 |
| 274 | if (uc) |
| 275 | JIS_OUT(JIS_OUT_FH, "c2u: allocated heap; 0x%04X bytes.\n", cnt+1); |
| 276 | else |
| 277 | JIS_OUT(JIS_OUT_FH, "c2u: heap allocation failed; 0x%04X bytes.\n", cnt+1); |
| 278 | #endif |
| 279 | if (!uc) |
| 280 | return (NULL); |
| 281 | |
| 282 | i = cnt = 0; |
| 283 | jchar.type = 0; |
| 284 | jchar.value = 0; |
| 285 | while (jis_str[i]) { |
| 286 | w = read_jis_char(&jis_str[i], &jchar); |
| 287 | if (jchar.value) { |
| 288 | uc[cnt] = c2u_conv(jchar.value, jchar.type); |
| 289 | #if JIS_DEBUG > 1 |
| 290 | JIS_OUT(JIS_OUT_FH, "c2u: converted; JIS 0x%04X => U+%04X", jchar.value, uc[cnt]); |
| 291 | #endif |
| 292 | if (uc[cnt] == (unicode_char)0xFFFD && err) |
| 293 | { |
| 294 | *err = i; |
| 295 | free(uc); |
| 296 | return NULL; |
| 297 | } |
| 298 | ++cnt; |
| 299 | } |
| 300 | i+=w; |
| 301 | } |
| 302 | |
| 303 | uc[cnt] = 0; |
| 304 | #if JIS_DEBUG > 0 |
| 305 | JIS_OUT(JIS_OUT_FH, "c2u: end of heap; 0x%04X bytes.", cnt+1); |
| 306 | #endif |
| 307 | return (uc); |
| 308 | } |
| 309 | |
| 310 | static void revlookup(unicode_char u, struct jischar_t *ch) |
| 311 | { |
| 312 | unsigned int upper = u >> 8; |
| 313 | unsigned int lower = u & 0xff; |
| 314 | |
| 315 | /* ISO-2022-JP(-1) is mapped inside BMP range. */ |
| 316 | if (u >= (unicode_char)0x10000) |
| 317 | { |
| 318 | ch->type = JIS_TYPE_BINARY; |
| 319 | ch->value = 0x003F; |
| 320 | return; |
| 321 | } |
| 322 | |
| 323 | /* US-ASCII */ |
| 324 | if (u < (unicode_char)0x0080) |
| 325 | { |
| 326 | ch->type = JIS_TYPE_ASCII; |
| 327 | ch->value = (unsigned)u; |
| 328 | return; |
| 329 | } |
| 330 | |
| 331 | /* 2 Characters replaced by JIS X 0201 */ |
| 332 | if (u == (unicode_char)0x00a5) |
| 333 | { |
| 334 | ch->type = JIS_TYPE_ROMAN; |
| 335 | ch->value = 0x5C; |
| 336 | return; |
| 337 | } |
| 338 | if (u == (unicode_char)0x203E) |
| 339 | { |
| 340 | ch->type = JIS_TYPE_ROMAN; |
| 341 | ch->value = 0x7E; |
| 342 | return; |
| 343 | } |
| 344 | |
| 345 | /* JIS X 0201 GR */ |
| 346 | if ((unicode_char)0xFF61 <= u && u <= (unicode_char)0xFF9F) |
| 347 | { |
| 348 | ch->type = JIS_TYPE_8BITKANA; |
| 349 | ch->value = u - (unsigned)0xFF40 + (unsigned)0x80; |
| 350 | return; |
| 351 | } |
| 352 | |
| 353 | /* JIS X 0208/JIS X 0212 */ |
| 354 | if (uni_to_jisx0208_tbls[upper] != NULL |
| 355 | && uni_to_jisx0208_tbls[upper][lower] != 0x003F) |
| 356 | { |
| 357 | ch->type = JIS_TYPE_JISX0208; |
| 358 | ch->value = uni_to_jisx0208_tbls[upper][lower]; |
| 359 | return; |
| 360 | } |
| 361 | if (uni_to_jisx0212_tbls[upper] != NULL |
| 362 | && uni_to_jisx0212_tbls[upper][lower] != 0x003F) |
| 363 | { |
| 364 | ch->type = JIS_TYPE_JISX0212; |
| 365 | ch->value = uni_to_jisx0212_tbls[upper][lower]; |
| 366 | return; |
| 367 | } |
| 368 | |
| 369 | /* return 'unknown' character if unknown */ |
| 370 | ch->type = JIS_TYPE_BINARY; |
| 371 | ch->value = 0x003F; |
| 372 | return; |
| 373 | } |
| 374 | |
| 375 | #if 0 |
| 376 | static int get_iso2022jp_type(unsigned j) |
| 377 | { |
| 378 | if (0xA0 < j && j < 0xE0) |
| 379 | return JIS_TYPE_8BITKANA; |
| 380 | if (j > 0xff) |
| 381 | return JIS_TYPE_KANJI; |
| 382 | return JIS_TYPE_ASCII; |
| 383 | } |
| 384 | #endif |
| 385 | |
| 386 | static char *u2c(const struct unicode_info *u, |
| 387 | const unicode_char *str, int *err) |
| 388 | { |
| 389 | size_t i, cnt; |
| 390 | int j; |
| 391 | int jtype = JIS_TYPE_ASCII; |
| 392 | int jt; |
| 393 | char *s; |
| 394 | struct jischar_t ch; |
| 395 | |
| 396 | if (err) |
| 397 | *err = -1; |
| 398 | |
| 399 | for (i = cnt = 0; str[i]; i++) { |
| 400 | revlookup(str[i], &ch); |
| 401 | jt = ch.type; |
| 402 | j = ch.value; |
| 403 | if (jt != jtype) { |
| 404 | cnt += ((jt == JIS_TYPE_JISX0212) ? 4 : 3); |
| 405 | jtype = jt; |
| 406 | } |
| 407 | cnt += ((jtype == JIS_TYPE_JISX0208 || jtype == JIS_TYPE_JISX0212) ? 2 : 1); |
| 408 | } |
| 409 | if (jtype != JIS_TYPE_ASCII && jtype != JIS_TYPE_BINARY) |
| 410 | cnt += 3; |
| 411 | |
| 412 | s = malloc(cnt+1); |
| 413 | #if JIS_DEBUG > 0 |
| 414 | if (s) |
| 415 | JIS_OUT(JIS_OUT_FH, "u2c: allocated heap; 0x%04X bytes.\n", cnt+1); |
| 416 | else |
| 417 | JIS_OUT(JIS_OUT_FH, "u2c: heap allocation failed; 0x%04X bytes.\n", cnt+1); |
| 418 | #endif |
| 419 | if (!s) |
| 420 | return (NULL); |
| 421 | |
| 422 | jtype = JIS_TYPE_ASCII; |
| 423 | for (i = cnt = 0; str[i]; i++) { |
| 424 | revlookup(str[i], &ch); |
| 425 | |
| 426 | jt = ch.type; |
| 427 | j = ch.value; |
| 428 | if (jt != jtype) { |
| 429 | switch (jt) { |
| 430 | case JIS_TYPE_JISX0208: |
| 431 | s[cnt++] = JIS_CHAR_ESC; |
| 432 | s[cnt++] = '$'; |
| 433 | s[cnt++] = 'B'; |
| 434 | #if JIS_DEBUG > 2 |
| 435 | JIS_OUT(JIS_OUT_FH, "u2c: changed map; JIS_TYPE_JISX0208.\n"); |
| 436 | #endif |
| 437 | break; |
| 438 | case JIS_TYPE_JISX0212: |
| 439 | s[cnt++] = JIS_CHAR_ESC; |
| 440 | s[cnt++] = '$'; |
| 441 | s[cnt++] = '('; |
| 442 | s[cnt++] = 'D'; |
| 443 | break; |
| 444 | case JIS_TYPE_7BITKANA: |
| 445 | case JIS_TYPE_8BITKANA: |
| 446 | s[cnt++] = JIS_CHAR_ESC; |
| 447 | s[cnt++] = '('; |
| 448 | s[cnt++] = 'I'; |
| 449 | #if JIS_DEBUG > 2 |
| 450 | JIS_OUT(JIS_OUT_FH, "u2c: changed map; JIS_TYPE_8BITKANA.\n"); |
| 451 | #endif |
| 452 | break; |
| 453 | case JIS_TYPE_ROMAN: |
| 454 | s[cnt++] = JIS_CHAR_ESC; |
| 455 | s[cnt++] = '('; |
| 456 | s[cnt++] = 'J'; |
| 457 | break; |
| 458 | default: |
| 459 | s[cnt++] = JIS_CHAR_ESC; |
| 460 | s[cnt++] = '('; |
| 461 | s[cnt++] = 'B'; |
| 462 | #if JIS_DEBUG > 2 |
| 463 | JIS_OUT(JIS_OUT_FH, "u2c: changed map; JIS_TYPE_ASCII.\n"); |
| 464 | #endif |
| 465 | break; |
| 466 | } |
| 467 | jtype = jt; |
| 468 | } |
| 469 | switch (jtype) { |
| 470 | case JIS_TYPE_JISX0208: |
| 471 | case JIS_TYPE_JISX0212: |
| 472 | s[cnt++] = (char)(j >> 8); |
| 473 | s[cnt++] = (char)(j & 0xff); |
| 474 | break; |
| 475 | case JIS_TYPE_7BITKANA: |
| 476 | case JIS_TYPE_8BITKANA: |
| 477 | s[cnt++] = (char)(j - 0x80); |
| 478 | break; |
| 479 | default: |
| 480 | s[cnt++] = (char)j; |
| 481 | break; |
| 482 | } |
| 483 | #if JIS_DEBUG > 1 |
| 484 | JIS_OUT(JIS_OUT_FH, "u2c: converted; U+%04X => JIS 0x%04X\n", str[i], j); |
| 485 | #endif |
| 486 | if (jtype == JIS_TYPE_BINARY && j == 0x003F) |
| 487 | if (err) |
| 488 | { |
| 489 | *err = i; |
| 490 | free(s); |
| 491 | return NULL; |
| 492 | } |
| 493 | } |
| 494 | if (jtype != JIS_TYPE_ASCII && jtype != JIS_TYPE_BINARY) { |
| 495 | s[cnt++] = JIS_CHAR_ESC; |
| 496 | s[cnt++] = '('; |
| 497 | s[cnt++] = 'B'; |
| 498 | } |
| 499 | s[cnt] = '\x0'; |
| 500 | |
| 501 | #if JIS_DEBUG > 0 |
| 502 | JIS_OUT(JIS_OUT_FH, "u2c: end of heap; 0x%04X bytes.\n", cnt+1); |
| 503 | #endif |
| 504 | return s; |
| 505 | } |
| 506 | |
| 507 | static char *toupper_func(const struct unicode_info *u, |
| 508 | const char *cp, int *ip) |
| 509 | { |
| 510 | unicode_char *uc = c2u(u, cp, ip); |
| 511 | char *s; |
| 512 | size_t i; |
| 513 | |
| 514 | if (!uc) |
| 515 | return (NULL); |
| 516 | |
| 517 | for (i=0; uc[i]; i++) { |
| 518 | if ((unicode_char)'a' <= uc[i] && uc[i] <= (unicode_char)'z') |
| 519 | uc[i] = uc[i] - ((unicode_char)'a' - (unicode_char)'A'); |
| 520 | } |
| 521 | |
| 522 | s = u2c(u, uc, NULL); |
| 523 | free(uc); |
| 524 | return (s); |
| 525 | } |
| 526 | |
| 527 | static char *tolower_func(const struct unicode_info *u, |
| 528 | const char *cp, int *ip) |
| 529 | { |
| 530 | unicode_char *uc = c2u(u, cp, ip); |
| 531 | char *s; |
| 532 | size_t i; |
| 533 | |
| 534 | if (!uc) |
| 535 | return (NULL); |
| 536 | |
| 537 | for (i=0; uc[i]; i++) { |
| 538 | if ((unicode_char)'A' <= uc[i] && uc[i] <= (unicode_char)'Z') |
| 539 | uc[i] = uc[i] + ((unicode_char)'a' - (unicode_char)'A'); |
| 540 | } |
| 541 | |
| 542 | s = u2c(u, uc, NULL); |
| 543 | free(uc); |
| 544 | |
| 545 | return (s); |
| 546 | } |
| 547 | |
| 548 | |
| 549 | static char *totitle_func(const struct unicode_info *u, |
| 550 | const char *cp, int *ip) |
| 551 | { |
| 552 | unicode_char *uc = c2u(u, cp, ip); |
| 553 | char *s; |
| 554 | |
| 555 | if (!uc) |
| 556 | return (NULL); |
| 557 | |
| 558 | /* Uh, sorry, what's "title" char? */ |
| 559 | /* |
| 560 | * for (i=0; uc[i]; i++) |
| 561 | * uc[i] = unicode_tc(uc[i]); |
| 562 | */ |
| 563 | |
| 564 | s = u2c(u, uc, NULL); |
| 565 | free(uc); |
| 566 | return (s); |
| 567 | } |
| 568 | |
| 569 | extern const struct unicode_info unicode_UTF8; |
| 570 | |
| 571 | const struct unicode_info unicode_ISO2022_JP = { |
| 572 | "ISO-2022-JP", |
| 573 | UNICODE_MB | UNICODE_REPLACEABLE | UNICODE_SISO | |
| 574 | UNICODE_HEADER_BASE64, |
| 575 | c2u, |
| 576 | u2c, |
| 577 | toupper_func, |
| 578 | tolower_func, |
| 579 | totitle_func, |
| 580 | &unicode_UTF8 |
| 581 | }; |
| 582 | |
| 583 | const struct unicode_info unicode_ISO2022_JP_1 = { |
| 584 | "ISO-2022-JP-1", |
| 585 | UNICODE_MB | UNICODE_REPLACEABLE | UNICODE_SISO | |
| 586 | UNICODE_HEADER_BASE64, |
| 587 | c2u, |
| 588 | u2c, |
| 589 | toupper_func, |
| 590 | tolower_func, |
| 591 | totitle_func, |
| 592 | &unicode_UTF8 |
| 593 | }; |
| 594 | |
| 595 | #if (JIS_DEBUG > 0) && defined(JIS_BUILD_APP) |
| 596 | int main(int argc, char** argv) |
| 597 | { |
| 598 | FILE* fp; |
| 599 | char c; |
| 600 | int cnt; |
| 601 | char* str; |
| 602 | unicode_char* ustr; |
| 603 | char* jstr; |
| 604 | int i; |
| 605 | |
| 606 | if (argc<2) { |
| 607 | JIS_OUT(JIS_OUT_FH, "usage: %s filename(s)\n", argv[0]); |
| 608 | exit(1); |
| 609 | } |
| 610 | |
| 611 | while (argc > 1) { |
| 612 | --argc; |
| 613 | JIS_OUT(JIS_OUT_FH, "main: opening file %s.\n", argv[argc]); |
| 614 | fp = fopen(argv[argc], "r"); |
| 615 | cnt=0; |
| 616 | while (c = fgetc(fp) != EOF) |
| 617 | cnt++; |
| 618 | |
| 619 | str = malloc(cnt+1); |
| 620 | fseek(fp, 0, SEEK_SET); |
| 621 | fread(str, cnt, 1, fp); |
| 622 | str[cnt] = 0; |
| 623 | |
| 624 | ustr = c2u(str, NULL); |
| 625 | /* for (i=0; ustr[i]; i++) |
| 626 | * putchar(ustr[i]); |
| 627 | */ |
| 628 | jstr = u2c(ustr, NULL); |
| 629 | for (i=0; jstr[i]; i++) |
| 630 | putchar(jstr[i]); |
| 631 | |
| 632 | free(jstr); |
| 633 | free(ustr); |
| 634 | free(str); |
| 635 | } |
| 636 | return 1; |
| 637 | } |
| 638 | #endif /* defined(JIS_BUILD_APP) */ |