| 1 | # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 2 | # National Institute of Advanced Industrial Science and Technology (AIST) |
| 3 | # Registration Number H13PRO009 |
| 4 | |
| 5 | # This file is part of GNU Emacs. |
| 6 | |
| 7 | # GNU Emacs is free software: you can redistribute it and/or modify |
| 8 | # it under the terms of the GNU General Public License as published by |
| 9 | # the Free Software Foundation, either version 3 of the License, or |
| 10 | # (at your option) any later version. |
| 11 | |
| 12 | # GNU Emacs is distributed in the hope that it will be useful, |
| 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | # GNU General Public License for more details. |
| 16 | |
| 17 | # You should have received a copy of the GNU General Public License |
| 18 | # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 19 | |
| 20 | BEGIN { |
| 21 | tohex["A"] = 10; |
| 22 | tohex["B"] = 11; |
| 23 | tohex["C"] = 12; |
| 24 | tohex["D"] = 13; |
| 25 | tohex["E"] = 14; |
| 26 | tohex["F"] = 15; |
| 27 | tohex["a"] = 10; |
| 28 | tohex["b"] = 11; |
| 29 | tohex["c"] = 12; |
| 30 | tohex["d"] = 13; |
| 31 | tohex["e"] = 14; |
| 32 | tohex["f"] = 15; |
| 33 | from_gb = 0; |
| 34 | to_gb = -1; |
| 35 | to_unicode = 0; |
| 36 | from_unicode = 0; |
| 37 | } |
| 38 | |
| 39 | function decode_hex(str) { |
| 40 | n = 0; |
| 41 | len = length(str); |
| 42 | for (i = 1; i <= len; i++) |
| 43 | { |
| 44 | c = substr (str, i, 1); |
| 45 | if (c >= "0" && c <= "9") |
| 46 | n = n * 16 + (c - "0"); |
| 47 | else |
| 48 | n = n * 16 + tohex[c]; |
| 49 | } |
| 50 | return n; |
| 51 | } |
| 52 | |
| 53 | function gb_to_index(gb) { |
| 54 | b0 = int(gb / 256); |
| 55 | b1 = gb % 256; |
| 56 | idx = (((b0 - 129)) * 191 + b1 - 64); |
| 57 | # if (b1 >= 128) |
| 58 | # idx--; |
| 59 | return idx |
| 60 | } |
| 61 | |
| 62 | function index_to_gb(idx) { |
| 63 | b0 = int(idx / 191) + 129; |
| 64 | b1 = (idx % 191) + 64; |
| 65 | # if (b1 >= 127) |
| 66 | # b1++; |
| 67 | return (b0 * 256 + b1); |
| 68 | } |
| 69 | |
| 70 | /^\#/ { |
| 71 | print; |
| 72 | next; |
| 73 | } |
| 74 | |
| 75 | { |
| 76 | gb = gb_to_index(decode_hex(substr($1, 3, 4))); |
| 77 | unicode = decode_hex(substr($2, 3)); |
| 78 | if ((gb == to_gb + 1) && (unicode == to_unicode + 1)) |
| 79 | { |
| 80 | to_gb++; |
| 81 | to_unicode++; |
| 82 | } |
| 83 | else if (gb > to_gb) # ignore the case gb == to_gb that is a duplication |
| 84 | { |
| 85 | if (from_gb == to_gb) |
| 86 | { |
| 87 | if (from_unicode <= 65535) |
| 88 | printf "0x%04X 0x%04X\n", index_to_gb(from_gb), from_unicode; |
| 89 | else |
| 90 | printf "0x%04X 0x%08X\n", index_to_gb(from_gb), from_unicode; |
| 91 | } |
| 92 | else if (from_gb < to_gb) |
| 93 | { |
| 94 | if (from_unicode <= 65535) |
| 95 | printf "0x%04X-0x%04X 0x%04X\n", |
| 96 | index_to_gb(from_gb), index_to_gb(to_gb), from_unicode; |
| 97 | else |
| 98 | printf "0x%04X-0x%04X 0x%08X\n", |
| 99 | index_to_gb(from_gb), index_to_gb(to_gb), from_unicode; |
| 100 | } |
| 101 | from_gb = to_gb = gb; |
| 102 | from_unicode = to_unicode = unicode; |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | END { |
| 107 | if (from_gb <= to_gb) |
| 108 | printf "0x%04X-0x%04X 0x%04X\n", |
| 109 | index_to_gb(from_gb), index_to_gb(to_gb), from_unicode; |
| 110 | } |
| 111 | |