Commit | Line | Data |
---|---|---|
463f5630 | 1 | # compact.awk -- Make charset map compact. |
ed3be7aa | 2 | # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 |
463f5630 KH |
3 | # National Institute of Advanced Industrial Science and Technology (AIST) |
4 | # Registration Number H13PRO009 | |
5 | # | |
6 | # This file is part of GNU Emacs. | |
7 | # | |
8 | # GNU Emacs is free software; you can redistribute it and/or modify | |
9 | # it under the terms of the GNU General Public License as published by | |
ed3be7aa | 10 | # the Free Software Foundation; either version 3, or (at your option) |
463f5630 KH |
11 | # any later version. |
12 | # | |
13 | # GNU Emacs is distributed in the hope that it will be useful, | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | # GNU General Public License for more details. | |
17 | # | |
18 | # You should have received a copy of the GNU General Public License | |
19 | # along with GNU Emacs; see the file COPYING. If not, write to the | |
ed3be7aa GM |
20 | # Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
21 | # Boston, MA 02110-1301, USA. | |
463f5630 KH |
22 | |
23 | # Comment: | |
24 | # Make a charset map compact by changing this kind of line sequence: | |
25 | # 0x00 0x0000 | |
26 | # 0x01 0x0001 | |
27 | # ... | |
28 | # 0x7F 0x007F | |
29 | # to one line of this format: | |
30 | # 0x00-0x7F 0x0000 | |
31 | ||
32 | BEGIN { | |
33 | tohex["0"] = 1; | |
34 | tohex["1"] = 2; | |
35 | tohex["2"] = 3; | |
36 | tohex["3"] = 4; | |
37 | tohex["4"] = 5; | |
38 | tohex["5"] = 6; | |
39 | tohex["6"] = 7; | |
40 | tohex["7"] = 8; | |
41 | tohex["8"] = 9; | |
42 | tohex["9"] = 10; | |
43 | tohex["A"] = 11; | |
44 | tohex["B"] = 12; | |
45 | tohex["C"] = 13; | |
46 | tohex["D"] = 14; | |
47 | tohex["E"] = 15; | |
48 | tohex["F"] = 16; | |
49 | tohex["a"] = 11; | |
50 | tohex["b"] = 12; | |
51 | tohex["c"] = 13; | |
52 | tohex["d"] = 14; | |
53 | tohex["e"] = 15; | |
54 | tohex["f"] = 16; | |
55 | from_code = 0; | |
56 | to_code = -1; | |
57 | to_unicode = 0; | |
58 | from_unicode = 0; | |
59 | } | |
60 | ||
61 | function decode_hex(str, idx) { | |
62 | n = 0; | |
63 | len = length(str); | |
64 | for (i = idx; i <= len; i++) | |
65 | { | |
66 | c = tohex[substr (str, i, 1)]; | |
67 | if (c == 0) | |
68 | break; | |
69 | n = n * 16 + c - 1; | |
70 | } | |
71 | return n; | |
72 | } | |
73 | ||
74 | /^\#/ { | |
75 | print; | |
76 | next; | |
77 | } | |
78 | ||
79 | { | |
80 | code = decode_hex($1, 3); | |
81 | unicode = decode_hex($2, 3); | |
82 | if ((code == to_code + 1) && (unicode == to_unicode + 1)) | |
83 | { | |
84 | to_code++; | |
85 | to_unicode++; | |
86 | } | |
87 | else | |
88 | { | |
89 | if (to_code < 256) | |
90 | { | |
91 | if (from_code == to_code) | |
92 | printf "0x%02X 0x%04X\n", from_code, from_unicode; | |
93 | else if (from_code < to_code) | |
94 | printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode; | |
95 | } | |
96 | else | |
97 | { | |
98 | if (from_code == to_code) | |
99 | printf "0x%04X 0x%04X\n", from_code, from_unicode; | |
100 | else if (from_code < to_code) | |
101 | printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode; | |
102 | } | |
103 | from_code = to_code = code; | |
104 | from_unicode = to_unicode = unicode; | |
105 | } | |
106 | } | |
107 | ||
108 | END { | |
109 | if (to_code < 256) | |
110 | { | |
111 | if (from_code == to_code) | |
112 | printf "0x%02X 0x%04X\n", from_code, from_unicode; | |
113 | else | |
114 | printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode; | |
115 | } | |
116 | else | |
117 | { | |
118 | if (from_code == to_code) | |
119 | printf "0x%04X 0x%04X\n", from_code, from_unicode; | |
120 | else | |
121 | printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode; | |
122 | } | |
123 | } | |
21e99729 MB |
124 | |
125 | # arch-tag: 7e6f57c3-8e62-4af3-8916-ca67bca3a0ce |