| 1 | # cp932.awk -- Add sort keys and append user defined area to CP932-2BYTE.map. |
| 2 | # Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 3 | # National Institute of Advanced Industrial Science and Technology (AIST) |
| 4 | # Registration Number H13PRO009 |
| 5 | |
| 6 | # This file is part of GNU Emacs. |
| 7 | |
| 8 | # GNU Emacs is free software: you can redistribute it and/or modify |
| 9 | # it under the terms of the GNU General Public License as published by |
| 10 | # the Free Software Foundation, either version 3 of the License, or |
| 11 | # (at your option) any later version. |
| 12 | |
| 13 | # GNU Emacs is distributed in the hope that it will be useful, |
| 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | # GNU General Public License for more details. |
| 17 | |
| 18 | # You should have received a copy of the GNU General Public License |
| 19 | # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 20 | |
| 21 | # Commentary: |
| 22 | |
| 23 | # Add a sort key 0, 1, 2, or 3 at the tail of each line as a comment |
| 24 | # to realize the round trip mapping to Unicode works as described in |
| 25 | # this page: |
| 26 | # http://support.microsoft.com/default.aspx?scid=kb;EN-US;170559 |
| 27 | # Each sort key means as below: |
| 28 | # 0: JISX0208 characters. |
| 29 | # 1: NEC special characters. |
| 30 | # 2: IBM extension characters. |
| 31 | # 3: NEC selection of IBM extension characters. |
| 32 | # 4: user defined area |
| 33 | |
| 34 | BEGIN { |
| 35 | tohex["A"] = 10; |
| 36 | tohex["B"] = 11; |
| 37 | tohex["C"] = 12; |
| 38 | tohex["D"] = 13; |
| 39 | tohex["E"] = 14; |
| 40 | tohex["F"] = 15; |
| 41 | } |
| 42 | |
| 43 | function decode_hex(str) { |
| 44 | n = 0; |
| 45 | len = length(str); |
| 46 | for (i = 1; i <= len; i++) |
| 47 | { |
| 48 | c = substr(str, i, 1); |
| 49 | if (c >= "0" && c <= "9") |
| 50 | n = n * 16 + (c - "0"); |
| 51 | else |
| 52 | n = n * 16 + tohex[c]; |
| 53 | } |
| 54 | return n; |
| 55 | } |
| 56 | |
| 57 | function sjis_to_jis_ku(code) |
| 58 | { |
| 59 | s1 = int(code / 256); |
| 60 | s2 = code % 256; |
| 61 | if (s2 >= 159) # s2 >= 0x9F |
| 62 | { |
| 63 | if (s1 >= 224) # s1 >= 0xE0 |
| 64 | j1 = s1 * 2 - 352; # j1 = s1 * 2 - 0x160 |
| 65 | else |
| 66 | j1 = s1 * 2 - 224; # j1 = s1 * 2 - 0xE0 |
| 67 | j2 = s2 - 126 # j2 = s2 - #x7E |
| 68 | } |
| 69 | else |
| 70 | { |
| 71 | if (s1 >= 224) |
| 72 | j1 = s1 * 2 - 353; # j1 = s1 * 2 - 0x161 |
| 73 | else |
| 74 | j1 = s1 * 2 - 225; # j1 = s1 * 2 - 0xE1 |
| 75 | if (s2 >= 127) # s2 >= #x7F |
| 76 | j2 = s2 - 32; |
| 77 | else |
| 78 | j2 = s2 - 31; |
| 79 | } |
| 80 | return j1 - 32; |
| 81 | } |
| 82 | |
| 83 | /^0x[89E]/ { |
| 84 | sjis=decode_hex(substr($1, 3, 4)) |
| 85 | ku=sjis_to_jis_ku(sjis); |
| 86 | if (ku == 13) |
| 87 | printf "%s # 1 %02X%02X\n", $0, j1, j2; |
| 88 | else if (ku >= 89 && ku <= 92) |
| 89 | printf "%s # 3 %02X%02X\n", $0, j1, j2; |
| 90 | else |
| 91 | printf "%s # 0 %02X%02X\n", $0, j1, j2; |
| 92 | next; |
| 93 | } |
| 94 | |
| 95 | /^0xF/ { |
| 96 | printf "%s # 2\n", $0; |
| 97 | next; |
| 98 | } |
| 99 | |
| 100 | { |
| 101 | print; |
| 102 | } |
| 103 | |
| 104 | END { |
| 105 | code = 57344; # 0xE000 |
| 106 | for (i = 240; i < 250; i++) |
| 107 | { |
| 108 | for (j = 64; j <= 126; j++) |
| 109 | printf "0x%02X%02X 0x%04X # 4\n", i, j, code++; |
| 110 | for (j = 128; j <= 158; j++) |
| 111 | printf "0x%02X%02X 0x%04X # 4\n", i, j, code++; |
| 112 | for (; j <= 252; j++) |
| 113 | printf "0x%02X%02X 0x%04X # 4\n", i, j, code++; |
| 114 | } |
| 115 | } |
| 116 | |