| 1 | # compact.awk -- Make charset map compact. |
| 2 | # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 3 | # National Institute of Advanced Industrial Science and Technology (AIST) |
| 4 | # Registration Number H13PRO009 |
| 5 | |
| 6 | # This file is part of GNU Emacs. |
| 7 | |
| 8 | # GNU Emacs is free software: you can redistribute it and/or modify |
| 9 | # it under the terms of the GNU General Public License as published by |
| 10 | # the Free Software Foundation, either version 3 of the License, or |
| 11 | # (at your option) any later version. |
| 12 | |
| 13 | # GNU Emacs is distributed in the hope that it will be useful, |
| 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | # GNU General Public License for more details. |
| 17 | |
| 18 | # You should have received a copy of the GNU General Public License |
| 19 | # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 20 | |
| 21 | # Commentary: |
| 22 | # Make a charset map compact by changing this kind of line sequence: |
| 23 | # 0x00 0x0000 |
| 24 | # 0x01 0x0001 |
| 25 | # ... |
| 26 | # 0x7F 0x007F |
| 27 | # to one line of this format: |
| 28 | # 0x00-0x7F 0x0000 |
| 29 | |
| 30 | BEGIN { |
| 31 | tohex["0"] = 1; |
| 32 | tohex["1"] = 2; |
| 33 | tohex["2"] = 3; |
| 34 | tohex["3"] = 4; |
| 35 | tohex["4"] = 5; |
| 36 | tohex["5"] = 6; |
| 37 | tohex["6"] = 7; |
| 38 | tohex["7"] = 8; |
| 39 | tohex["8"] = 9; |
| 40 | tohex["9"] = 10; |
| 41 | tohex["A"] = 11; |
| 42 | tohex["B"] = 12; |
| 43 | tohex["C"] = 13; |
| 44 | tohex["D"] = 14; |
| 45 | tohex["E"] = 15; |
| 46 | tohex["F"] = 16; |
| 47 | tohex["a"] = 11; |
| 48 | tohex["b"] = 12; |
| 49 | tohex["c"] = 13; |
| 50 | tohex["d"] = 14; |
| 51 | tohex["e"] = 15; |
| 52 | tohex["f"] = 16; |
| 53 | from_code = 0; |
| 54 | to_code = -1; |
| 55 | to_unicode = 0; |
| 56 | from_unicode = 0; |
| 57 | } |
| 58 | |
| 59 | function decode_hex(str, idx) { |
| 60 | n = 0; |
| 61 | len = length(str); |
| 62 | for (i = idx; i <= len; i++) |
| 63 | { |
| 64 | c = tohex[substr (str, i, 1)]; |
| 65 | if (c == 0) |
| 66 | break; |
| 67 | n = n * 16 + c - 1; |
| 68 | } |
| 69 | return n; |
| 70 | } |
| 71 | |
| 72 | /^\#/ { |
| 73 | print; |
| 74 | next; |
| 75 | } |
| 76 | |
| 77 | { |
| 78 | code = decode_hex($1, 3); |
| 79 | unicode = decode_hex($2, 3); |
| 80 | if ((code == to_code + 1) && (unicode == to_unicode + 1)) |
| 81 | { |
| 82 | to_code++; |
| 83 | to_unicode++; |
| 84 | } |
| 85 | else |
| 86 | { |
| 87 | if (to_code < 256) |
| 88 | { |
| 89 | if (from_code == to_code) |
| 90 | printf "0x%02X 0x%04X\n", from_code, from_unicode; |
| 91 | else if (from_code < to_code) |
| 92 | printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode; |
| 93 | } |
| 94 | else |
| 95 | { |
| 96 | if (from_code == to_code) |
| 97 | printf "0x%04X 0x%04X\n", from_code, from_unicode; |
| 98 | else if (from_code < to_code) |
| 99 | printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode; |
| 100 | } |
| 101 | from_code = to_code = code; |
| 102 | from_unicode = to_unicode = unicode; |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | END { |
| 107 | if (to_code < 256) |
| 108 | { |
| 109 | if (from_code == to_code) |
| 110 | printf "0x%02X 0x%04X\n", from_code, from_unicode; |
| 111 | else |
| 112 | printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode; |
| 113 | } |
| 114 | else |
| 115 | { |
| 116 | if (from_code == to_code) |
| 117 | printf "0x%04X 0x%04X\n", from_code, from_unicode; |
| 118 | else |
| 119 | printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode; |
| 120 | } |
| 121 | } |
| 122 | |