xref: /titanic_44/usr/src/cmd/localedef/data/convert_map.pl (revision b8afd3a780ce850ff107bb3be330465bf47f84bd)
1#! /usr/perl5/bin/perl
2#
3# This file and its contents are supplied under the terms of the
4# Common Development and Distribution License ("CDDL"), version 1.0.
5# You may only use this file in accordance with the terms of version
6# 1.0 of the CDDL.
7#
8# A full copy of the text of the CDDL should have accompanied this
9# source.  A copy is of the CDDL is also available via the Internet
10# at http://www.illumos.org/license/CDDL.
11#
12
13#
14# Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
15#
16
17# This converts MAPPING files to localedef character maps
18# suitable for use with the UTF-8 derived localedef data.
19
20sub ucs_to_utf8
21{
22    my $ucs = shift;
23    my $utf8;
24
25    if ($ucs <= 0x7f) {
26	$utf8 = sprintf("\\x%02X", $ucs).$utf8;
27    } elsif ($ucs <= 0x7ff) {
28	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
29	$ucs >>= 6;
30	$utf8 = sprintf("\\x%02X", $ucs | 0xc0).$utf8;
31
32    } elsif ($ucs <= 0xffff) {
33	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
34	$ucs >>= 6;
35	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
36	$ucs >>= 6;
37	$utf8 = sprintf("\\x%02X", $ucs | 0xe0).$utf8;
38
39    } elsif ($ucs <= 0x1fffff) {
40	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
41	$ucs >>= 6;
42	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
43	$ucs >>= 6;
44	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
45	$ucs >>= 6;
46	$utf8 = sprintf("\\x%02X", $ucs | 0xf0).$utf8;
47
48    } elsif ($ucs <= 0x03ffffff) {
49	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
50	$ucs >>= 6;
51	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
52	$ucs >>= 6;
53	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
54	$ucs >>= 6;
55	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
56	$ucs >>= 6;
57	$utf8 = sprintf("\\x%02X", $ucs | 0xf8).$utf8;
58
59    } else {
60	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
61	$ucs >>= 6;
62	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
63	$ucs >>= 6;
64	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
65	$ucs >>= 6;
66	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
67	$ucs >>= 6;
68	$utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
69	$ucs >>= 6;
70	$utf8 = sprintf("\\x%02X", $ucs | 0xf8).$utf8;
71    }
72
73    return ($utf8);
74}
75
76my %unames;
77my %uvalues;
78
79#
80# This is not a general purpose Character Map parser, but its good enough
81# for the stock one supplied with CLDR.
82#
83sub load_utf8_cm
84{
85    my $file = shift;
86
87    open(UTF8, "$file") || die "open";
88
89    while (<UTF8>) {
90	next if (/^#/);
91	next if (/^\s*$/);
92	next if (/^\s*CHARMAP\s*$/);
93	next if (/^\s*END\s*CHARMAP\s*$/);
94	chomp;
95	@words = split /\s+/;
96	$name = $words[0];
97	$utf8val = $words[1];
98
99	if (defined($unames{$utf8val})) {
100	    $unames{$utf8val} .= "\n" .$name;
101	} else {
102	    $unames{$utf8val} = $name;
103	}
104	$uvalues{$name} = $utf8val;
105    }
106    close(UTF8);
107}
108
109my %map;
110
111sub load_map
112{
113    my $file = shift;
114
115    open(MAP, "$file") || die "open";
116
117    while (<MAP>) {
118	next if (/^#/);
119	next if (/^\s*$/);
120	chomp;
121	@words = split /\s+/;
122	$utf8 = $words[1];
123	$utf8 =~ s/^\\x[0]*//;
124	$utf8 = ucs_to_utf8(hex($utf8));
125	$val = $words[0];
126	if (defined ($map{$val})) {
127	    $map{$val} .= " ".$utf8;
128	} else {
129	    $map{$val} = $utf8;
130	}
131    }
132}
133
134sub mb_str
135{
136    my $val = shift;
137    my $str = "";
138    $val = hex($val);
139
140    if ($val == 0) {
141	return ("\\x00");
142    }
143    while ($val) {
144	$str = sprintf("\\x%02x", $val & 0xff).$str;
145	$val >>= 8;
146    }
147    return ($str);
148}
149
150$mf = shift(@ARGV);
151
152load_utf8_cm("UTF-8.cm");
153load_map($mf);
154
155
156print("CHARMAP\n");
157foreach $val (sort (keys (%map))) {
158    #$utf8 = $map{$val};
159    foreach $utf8 (split / /, $map{$val}) {
160	$ref = $unames{$utf8};
161	foreach $name (sort (split /\n/, $ref)) {
162	    print "$name";
163	    my $nt = int((64 - length($name) + 7) / 8);
164	    while ($nt) {
165		print "\t";
166		$nt--;
167	    }
168	    print mb_str($val)."\n";
169	}
170    }
171}
172print "END CHARMAP\n";
173