1#! /usr/perl5/bin/perl 2# 3# This file and its contents are supplied under the terms of the 4# Common Development and Distribution License ("CDDL"), version 1.0. 5# You may only use this file in accordance with the terms version 6# 1.0 of the CDDL. 7# 8# A full copy of the text of the CDDL should have accompanied this 9# source. A copy is of the CDDL is also available via the Internet 10# at http://www.illumos.org/license/CDDL. 11# 12 13# 14# Copyright 2010 Nexenta Systems, Inc. All rights reserved. 15# 16 17# This converts MAPPING files to localedef character maps 18# suitable for use with the UTF-8 derived localedef data. 19 20sub ucs_to_utf8 21{ 22 my $ucs = shift; 23 my $utf8; 24 25 if ($ucs <= 0x7f) { 26 $utf8 = sprintf("\\x%02X", $ucs).$utf8; 27 } elsif ($ucs <= 0x7ff) { 28 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 29 $ucs >>= 6; 30 $utf8 = sprintf("\\x%02X", $ucs | 0xc0).$utf8; 31 32 } elsif ($ucs <= 0xffff) { 33 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 34 $ucs >>= 6; 35 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 36 $ucs >>= 6; 37 $utf8 = sprintf("\\x%02X", $ucs | 0xe0).$utf8; 38 39 } elsif ($ucs <= 0x1fffff) { 40 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 41 $ucs >>= 6; 42 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 43 $ucs >>= 6; 44 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 45 $ucs >>= 6; 46 $utf8 = sprintf("\\x%02X", $ucs | 0xf0).$utf8; 47 48 } elsif ($ucs <= 0x03ffffff) { 49 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 50 $ucs >>= 6; 51 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 52 $ucs >>= 6; 53 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 54 $ucs >>= 6; 55 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 56 $ucs >>= 6; 57 $utf8 = sprintf("\\x%02X", $ucs | 0xf8).$utf8; 58 59 } else { 60 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 61 $ucs >>= 6; 62 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 63 $ucs >>= 6; 64 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 65 $ucs >>= 6; 66 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 67 $ucs >>= 6; 68 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8; 69 $ucs >>= 6; 70 $utf8 = sprintf("\\x%02X", $ucs | 0xf8).$utf8; 71 } 72 73 return ($utf8); 74} 75 76my %unames; 77my %uvalues; 78 79# 80# This is not a general purpose Character Map parser, but its good enough 81# for the stock one supplied with CLDR. 82# 83sub load_utf8_cm 84{ 85 my $file = shift; 86 87 open(UTF8, "$file") || die "open"; 88 89 while (<UTF8>) { 90 next if (/^#/); 91 next if (/^\s*$/); 92 next if (/^\s*CHARMAP\s*$/); 93 next if (/^\s*END\s*CHARMAP\s*$/); 94 chomp; 95 @words = split /\s+/; 96 $name = $words[0]; 97 $utf8val = $words[1]; 98 99 if (defined($unames{$utf8val})) { 100 $unames{$utf8val} .= "\n" .$name; 101 } else { 102 $unames{$utf8val} = $name; 103 } 104 $uvalues{$name} = $utf8val; 105 } 106 close(UTF8); 107} 108 109my %map; 110 111sub load_map 112{ 113 my $file = shift; 114 115 open(MAP, "$file") || die "open"; 116 117 while (<MAP>) { 118 next if (/^#/); 119 next if (/^\s*$/); 120 chomp; 121 @words = split /\s+/; 122 $utf8 = $words[1]; 123 $utf8 =~ s/^\\x[0]*//; 124 $utf8 = ucs_to_utf8(hex($utf8)); 125 $val = $words[0]; 126 if (defined ($map{$val})) { 127 $map{$val} .= " ".$utf8; 128 } else { 129 $map{$val} = $utf8; 130 } 131 } 132} 133 134sub mb_str 135{ 136 my $val = shift; 137 my $str = ""; 138 $val = hex($val); 139 140 if ($val == 0) { 141 return ("\\x00"); 142 } 143 while ($val) { 144 $str = sprintf("\\x%02x", $val & 0xff).$str; 145 $val >>= 8; 146 } 147 return ($str); 148} 149 150$mf = shift(@ARGV); 151 152load_utf8_cm("UTF-8.cm"); 153load_map($mf); 154 155 156print("CHARMAP\n"); 157foreach $val (sort (keys (%map))) { 158 #$utf8 = $map{$val}; 159 foreach $utf8 (split / /, $map{$val}) { 160 $ref = $unames{$utf8}; 161 foreach $name (sort (split /\n/, $ref)) { 162 print "$name"; 163 my $nt = int((64 - length($name) + 7) / 8); 164 while ($nt) { 165 print "\t"; 166 $nt--; 167 } 168 print mb_str($val)."\n"; 169 } 170 } 171} 172print "END CHARMAP\n"; 173