Commit | Line | Data |
---|---|---|
463f5630 | 1 | # compact.awk -- Make charset map compact. |
5df4f04c | 2 | # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
463f5630 KH |
3 | # National Institute of Advanced Industrial Science and Technology (AIST) |
4 | # Registration Number H13PRO009 | |
9ad5de0c | 5 | |
463f5630 | 6 | # This file is part of GNU Emacs. |
9ad5de0c GM |
7 | |
8 | # GNU Emacs is free software: you can redistribute it and/or modify | |
463f5630 | 9 | # it under the terms of the GNU General Public License as published by |
9ad5de0c GM |
10 | # the Free Software Foundation, either version 3 of the License, or |
11 | # (at your option) any later version. | |
12 | ||
463f5630 KH |
13 | # GNU Emacs is distributed in the hope that it will be useful, |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | # GNU General Public License for more details. | |
9ad5de0c | 17 | |
463f5630 | 18 | # You should have received a copy of the GNU General Public License |
9ad5de0c | 19 | # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
463f5630 | 20 | |
9ad5de0c | 21 | # Commentary: |
463f5630 KH |
22 | # Make a charset map compact by changing this kind of line sequence: |
23 | # 0x00 0x0000 | |
24 | # 0x01 0x0001 | |
25 | # ... | |
26 | # 0x7F 0x007F | |
27 | # to one line of this format: | |
28 | # 0x00-0x7F 0x0000 | |
29 | ||
30 | BEGIN { | |
31 | tohex["0"] = 1; | |
32 | tohex["1"] = 2; | |
33 | tohex["2"] = 3; | |
34 | tohex["3"] = 4; | |
35 | tohex["4"] = 5; | |
36 | tohex["5"] = 6; | |
37 | tohex["6"] = 7; | |
38 | tohex["7"] = 8; | |
39 | tohex["8"] = 9; | |
40 | tohex["9"] = 10; | |
41 | tohex["A"] = 11; | |
42 | tohex["B"] = 12; | |
43 | tohex["C"] = 13; | |
44 | tohex["D"] = 14; | |
45 | tohex["E"] = 15; | |
46 | tohex["F"] = 16; | |
47 | tohex["a"] = 11; | |
48 | tohex["b"] = 12; | |
49 | tohex["c"] = 13; | |
50 | tohex["d"] = 14; | |
51 | tohex["e"] = 15; | |
52 | tohex["f"] = 16; | |
53 | from_code = 0; | |
54 | to_code = -1; | |
55 | to_unicode = 0; | |
56 | from_unicode = 0; | |
57 | } | |
58 | ||
59 | function decode_hex(str, idx) { | |
60 | n = 0; | |
61 | len = length(str); | |
62 | for (i = idx; i <= len; i++) | |
63 | { | |
64 | c = tohex[substr (str, i, 1)]; | |
65 | if (c == 0) | |
66 | break; | |
67 | n = n * 16 + c - 1; | |
68 | } | |
69 | return n; | |
70 | } | |
71 | ||
72 | /^\#/ { | |
73 | print; | |
74 | next; | |
75 | } | |
76 | ||
77 | { | |
78 | code = decode_hex($1, 3); | |
79 | unicode = decode_hex($2, 3); | |
80 | if ((code == to_code + 1) && (unicode == to_unicode + 1)) | |
81 | { | |
82 | to_code++; | |
83 | to_unicode++; | |
84 | } | |
85 | else | |
86 | { | |
87 | if (to_code < 256) | |
88 | { | |
89 | if (from_code == to_code) | |
90 | printf "0x%02X 0x%04X\n", from_code, from_unicode; | |
91 | else if (from_code < to_code) | |
92 | printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode; | |
93 | } | |
94 | else | |
95 | { | |
96 | if (from_code == to_code) | |
97 | printf "0x%04X 0x%04X\n", from_code, from_unicode; | |
98 | else if (from_code < to_code) | |
99 | printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode; | |
100 | } | |
101 | from_code = to_code = code; | |
102 | from_unicode = to_unicode = unicode; | |
103 | } | |
104 | } | |
105 | ||
106 | END { | |
107 | if (to_code < 256) | |
108 | { | |
109 | if (from_code == to_code) | |
110 | printf "0x%02X 0x%04X\n", from_code, from_unicode; | |
111 | else | |
112 | printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode; | |
113 | } | |
114 | else | |
115 | { | |
116 | if (from_code == to_code) | |
117 | printf "0x%04X 0x%04X\n", from_code, from_unicode; | |
118 | else | |
119 | printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode; | |
120 | } | |
121 | } | |
21e99729 | 122 |