1#!/usr/local/bin/perl -w 2 3# SPDX-License-Identifier: BSD-2-Clause 4# 5# Copyright 2009 Edwin Groothuis <edwin@FreeBSD.org> 6# Copyright 2015 John Marino <draco@marino.st> 7# Copyright 2020 Yuri Pankov <yuripv@FreeBSD.org> 8# 9# Redistribution and use in source and binary forms, with or without 10# modification, are permitted provided that the following conditions 11# are met: 12# 1. Redistributions of source code must retain the above copyright 13# notice, this list of conditions and the following disclaimer. 14# 2. Redistributions in binary form must reproduce the above copyright 15# notice, this list of conditions and the following disclaimer in the 16# documentation and/or other materials provided with the distribution. 17# 18# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28# SUCH DAMAGE. 29# 30 31use strict; 32use Encode qw(encode decode); 33 34my %utf8map = (); 35my $utf8charmap = "$ARGV[0]"; 36my $outfilename = "$ARGV[1]"; 37 38get_utf8map("$utf8charmap"); 39generate_header(); 40make_widths("$outfilename"); 41generate_footer(); 42 43############################ 44 45sub utf8to32 { 46 my @kl = split /\\x/, $_[0]; 47 48 shift @kl if ($kl[0] eq ''); 49 my $k = pack('H2' x scalar @kl, @kl); 50 my $ux = encode('UTF-32BE', decode('UTF-8', $k)); 51 my $u = uc(unpack('H*', $ux)); 52 # Remove BOM 53 $u =~ s/^0000FEFF//; 54 # Remove heading bytes of 0 55 while ($u =~ m/^0/ and length($u) > 4) { 56 $u =~ s/^0//; 57 } 58 59 return $u; 60} 61 62sub get_utf8map { 63 my $file = shift; 64 65 open(FIN, $file); 66 my @lines = <FIN>; 67 close(FIN); 68 chomp(@lines); 69 70 my $incharmap = 0; 71 foreach my $l (@lines) { 72 $l =~ s/\r//; 73 next if ($l =~ /^\#/); 74 next if ($l eq ""); 75 76 if ($l eq "CHARMAP") { 77 $incharmap = 1; 78 next; 79 } 80 81 next if (!$incharmap); 82 last if ($l eq "END CHARMAP"); 83 84 $l =~ /^(<[^\s]+>)\s+(.*)/; 85 my $k = utf8to32($2); # UTF-8 char code 86 my $v = $1; 87 88# print STDERR "register: $k - $v\n"; 89 $utf8map{$k} = $v; 90 } 91} 92 93sub generate_header { 94 my $version = <STDIN>; 95 chomp($version); 96 97 open(FOUT, ">", "$outfilename") 98 or die ("can't write to $outfilename\n"); 99 print FOUT <<EOF; 100# Warning: Do not edit. This file is automatically generated from the 101# tools in /usr/src/tools/tools/locale. The data is obtained from the 102# utf8proc $version. 103# ----------------------------------------------------------------------------- 104WIDTH 105EOF 106} 107 108sub generate_footer { 109 print FOUT "END WIDTH\n"; 110 close (FOUT); 111} 112 113sub make_widths { 114 my @lines = <STDIN>; 115 chomp(@lines); 116 117 foreach my $l (@lines) { 118 my ($wc, $wcw) = split(/ /, $l, -1); 119 120 next if !defined $utf8map{$wc}; 121 122 print FOUT "$utf8map{$wc}\t$wcw\n"; 123 } 124} 125