[hcoop/debian/courier-authlib.git] / unicode / mkultcase.pl

# Copyright 2000-2004 Double Precision, Inc.
# See COPYING for distribution information.
#
# $Id: mkultcase.pl,v 1.3 2004/02/01 06:09:09 mrsam Exp $
#
# Generate unicode upper/lower/titlecase translations.

print '/*
** Copyright 2000-2004 Double Precision, Inc.
** See COPYING for distribution information.
**
** $Id: mkultcase.pl,v 1.3 2004/02/01 06:09:09 mrsam Exp $
*/

#include "unicode.h"

';

my $set=shift;

open (U, "UnicodeData.txt") || die "$!\n";

while (<U>)
{
	chomp;

my @fields= split /;/;

my ($code, $uc, $lc, $tc);

	$code="0x$fields[0]";
	eval "\$code=$code;";

	$uc=$fields[12];
	if ($uc ne "")
	{
		eval "\$uc=0x$uc;";
		$UC{$code}=$uc;
		$FLAG{$code}=1;
	}

	$lc=$fields[13];
	if ($lc ne "")
	{
		eval "\$lc=0x$lc;";
		$LC{$code}=$lc;
		$FLAG{$code}=1;
	}

	$tc=$fields[14];
	if ($tc ne "")
	{
		eval "\$tc=0x$tc;";
		$TC{$code}=$tc;
		$FLAG{$code}=1;
	}
}

close(U);

my $tabsize=1024;

grep ($bucket[ $_ % $tabsize ] .= "$_\n", keys %FLAG);

my $maxcnt=0;

for ($i=0; $i < $tabsize; $i++)
{
    my $cnt=0;

    grep ( ++$cnt, split (/\n/, $bucket[$i]));

    $maxcnt=$cnt if $cnt > $maxcnt;
}

print "const unsigned unicode_case_hash=$tabsize;\n";
print "/* unicode_case_maxbucket="
    . ($maxcnt+2) . "*/\n";

print "const unicode_char unicode_case_tab[][4]={\n";

my $idx=0;

for ($i=0; $i<$tabsize; $i++)
{
    $offset[$i]=$idx;

    grep {
	my $j=$_;
	my $u=$UC{$j}+0;
	my $l=$LC{$j}+0;
	my $t=$TC{$j}+0;

	if ($u || $l || $t)
	{
	    $u=$j unless $u;
	    $l=$j unless $l;
	    $t=$u unless $t;

	    printf("{0x%04x,0x%04x,0x%04x,0x%04x},",$j,$u,$l,$t);
	    print "\n" if ($idx % 4) == 3;
	    ++$idx;
	}
    } split(/\n/, $bucket[$i]);
}
print "{0,0,0,0}};

const unsigned unicode_case_offset[$tabsize]={
";

for ($i=0; $i<$tabsize;$i++)
{
    printf("%4d", $offset[$i]);
    print "," if $i < $tabsize-1;

    print "\n" if ($i % 16) == 15;
}
print "};\n";
Commit	Line	Data
8d138742 CE	1	# Copyright 2000-2004 Double Precision, Inc.
	2	# See COPYING for distribution information.
	3	#
	4	# $Id: mkultcase.pl,v 1.3 2004/02/01 06:09:09 mrsam Exp $
	5	#
	6	# Generate unicode upper/lower/titlecase translations.
	7
	8	print '/*
	9	** Copyright 2000-2004 Double Precision, Inc.
	10	** See COPYING for distribution information.
	11	**
	12	** $Id: mkultcase.pl,v 1.3 2004/02/01 06:09:09 mrsam Exp $
	13	*/
	14
	15	#include "unicode.h"
	16
	17	';
	18
	19	my $set=shift;
	20
	21	open (U, "UnicodeData.txt") \|\| die "$!\n";
	22
	23	while (<U>)
	24	{
	25	chomp;
	26
	27	my @fields= split /;/;
	28
	29	my ($code, $uc, $lc, $tc);
	30
	31	$code="0x$fields[0]";
	32	eval "\$code=$code;";
	33
	34	$uc=$fields[12];
	35	if ($uc ne "")
	36	{
	37	eval "\$uc=0x$uc;";
	38	$UC{$code}=$uc;
	39	$FLAG{$code}=1;
	40	}
	41
	42	$lc=$fields[13];
	43	if ($lc ne "")
	44	{
	45	eval "\$lc=0x$lc;";
	46	$LC{$code}=$lc;
	47	$FLAG{$code}=1;
	48	}
	49
	50	$tc=$fields[14];
	51	if ($tc ne "")
	52	{
	53	eval "\$tc=0x$tc;";
	54	$TC{$code}=$tc;
	55	$FLAG{$code}=1;
	56	}
	57	}
	58
	59	close(U);
	60
	61	my $tabsize=1024;
	62
	63	grep ($bucket[ $_ % $tabsize ] .= "$_\n", keys %FLAG);
	64
65	my $maxcnt=0;
66
67	for ($i=0; $i < $tabsize; $i++)
68	{
69	my $cnt=0;
70
71	grep ( ++$cnt, split (/\n/, $bucket[$i]));
72
73	$maxcnt=$cnt if $cnt > $maxcnt;
74	}
75
76	print "const unsigned unicode_case_hash=$tabsize;\n";
77	print "/* unicode_case_maxbucket="
78	. ($maxcnt+2) . "*/\n";
79
80	print "const unicode_char unicode_case_tab[][4]={\n";
81
82	my $idx=0;
83
84	for ($i=0; $i<$tabsize; $i++)
85	{
86	$offset[$i]=$idx;
87
88	grep {
89	my $j=$_;
90	my $u=$UC{$j}+0;
91	my $l=$LC{$j}+0;
92	my $t=$TC{$j}+0;
93
94	if ($u \|\| $l \|\| $t)
95	{
96	$u=$j unless $u;
97	$l=$j unless $l;
98	$t=$u unless $t;
99
100	printf("{0x%04x,0x%04x,0x%04x,0x%04x},",$j,$u,$l,$t);
101	print "\n" if ($idx % 4) == 3;
102	++$idx;
103	}
104	} split(/\n/, $bucket[$i]);
105	}
106	print "{0,0,0,0}};
107
108	const unsigned unicode_case_offset[$tabsize]={
109	";
110
111	for ($i=0; $i<$tabsize;$i++)
112	{
113	printf("%4d", $offset[$i]);
114	print "," if $i < $tabsize-1;
115
116	print "\n" if ($i % 16) == 15;
117	}
118	print "};\n";