HCoop Git - bpt/emacs.git/blame_incremental - admin/charsets/compact.awk

... / ...

Commit	Line	Data
	1	# compact.awk -- Make charset map compact.
	2	# Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
	3	# National Institute of Advanced Industrial Science and Technology (AIST)
	4	# Registration Number H13PRO009
	5
	6	# This file is part of GNU Emacs.
	7
	8	# GNU Emacs is free software: you can redistribute it and/or modify
	9	# it under the terms of the GNU General Public License as published by
	10	# the Free Software Foundation, either version 3 of the License, or
	11	# (at your option) any later version.
	12
	13	# GNU Emacs is distributed in the hope that it will be useful,
	14	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	# GNU General Public License for more details.
	17
	18	# You should have received a copy of the GNU General Public License
	19	# along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
	20
	21	# Commentary:
	22	# Make a charset map compact by changing this kind of line sequence:
	23	# 0x00 0x0000
	24	# 0x01 0x0001
	25	# ...
	26	# 0x7F 0x007F
	27	# to one line of this format:
	28	# 0x00-0x7F 0x0000
	29
	30	BEGIN {
	31	tohex["0"] = 1;
	32	tohex["1"] = 2;
	33	tohex["2"] = 3;
	34	tohex["3"] = 4;
	35	tohex["4"] = 5;
	36	tohex["5"] = 6;
	37	tohex["6"] = 7;
	38	tohex["7"] = 8;
	39	tohex["8"] = 9;
	40	tohex["9"] = 10;
	41	tohex["A"] = 11;
	42	tohex["B"] = 12;
	43	tohex["C"] = 13;
	44	tohex["D"] = 14;
	45	tohex["E"] = 15;
	46	tohex["F"] = 16;
	47	tohex["a"] = 11;
	48	tohex["b"] = 12;
	49	tohex["c"] = 13;
	50	tohex["d"] = 14;
	51	tohex["e"] = 15;
	52	tohex["f"] = 16;
	53	from_code = 0;
	54	to_code = -1;
	55	to_unicode = 0;
	56	from_unicode = 0;
	57	}
	58
	59	function decode_hex(str, idx) {
	60	n = 0;
	61	len = length(str);
	62	for (i = idx; i <= len; i++)
	63	{
	64	c = tohex[substr (str, i, 1)];
	65	if (c == 0)
	66	break;
	67	n = n * 16 + c - 1;
	68	}
	69	return n;
	70	}
	71
	72	/^\#/ {
	73	print;
	74	next;
	75	}
	76
	77	{
	78	code = decode_hex($1, 3);
	79	unicode = decode_hex($2, 3);
	80	if ((code == to_code + 1) && (unicode == to_unicode + 1))
	81	{
	82	to_code++;
	83	to_unicode++;
	84	}
	85	else
	86	{
	87	if (to_code < 256)
	88	{
	89	if (from_code == to_code)
	90	printf "0x%02X 0x%04X\n", from_code, from_unicode;
	91	else if (from_code < to_code)
	92	printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
	93	}
	94	else
	95	{
	96	if (from_code == to_code)
	97	printf "0x%04X 0x%04X\n", from_code, from_unicode;
	98	else if (from_code < to_code)
	99	printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
	100	}
	101	from_code = to_code = code;
	102	from_unicode = to_unicode = unicode;
	103	}
	104	}
	105
	106	END {
	107	if (to_code < 256)
	108	{
	109	if (from_code == to_code)
	110	printf "0x%02X 0x%04X\n", from_code, from_unicode;
	111	else
	112	printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
	113	}
	114	else
	115	{
	116	if (from_code == to_code)
	117	printf "0x%04X 0x%04X\n", from_code, from_unicode;
	118	else
	119	printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
	120	}
	121	}
	122