mkutable (a63915c2d7ff177ce364488f86eff99949402051) | mkutable (6f26c71d76bb795b30684affb3b57870a7926b26) |
---|---|
1#! /usr/bin/perl 2use strict; 3 4my $USAGE = <<__EOF__; 5 usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt 6 -n = take non-matching types 7 -f = zero-based type field (default 2) 8__EOF__ 9 | 1#! /usr/bin/perl 2use strict; 3 4my $USAGE = <<__EOF__; 5 usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt 6 -n = take non-matching types 7 -f = zero-based type field (default 2) 8__EOF__ 9 |
10use vars qw( $opt_f $opt_n ); | |
11use Getopt::Std; | 10use Getopt::Std; |
11use vars qw( $opt_f $opt_n ); 12 |
|
12my $type_field = 2; 13 14# Override Unicode tables for certain control chars 15# that are expected to be found in normal text files. 16my %force_space = ( 17 0x08 => 1, # backspace 18 0x09 => 1, # tab 19 0x0a => 1, # newline 20 0x0c => 1, # form feed 21 0x0d => 1, # carriage return 22); 23 | 13my $type_field = 2; 14 15# Override Unicode tables for certain control chars 16# that are expected to be found in normal text files. 17my %force_space = ( 18 0x08 => 1, # backspace 19 0x09 => 1, # tab 20 0x0a => 1, # newline 21 0x0c => 1, # form feed 22 0x0d => 1, # carriage return 23); 24 |
25# Hangul Jamo medial vowels and final consonants should be zero width. 26my @force_compose = ( 27 [0x1160, 0x11ff], 28 [0xd7b0, 0xd7c6], 29 [0xd7cb, 0xd7fb] 30); 31 |
|
24exit (main() ? 0 : 1); 25 26sub main { | 32exit (main() ? 0 : 1); 33 34sub main { |
27 my $date = `date`; 28 chomp $date; | |
29 my $args = join ' ', @ARGV; | 35 my $args = join ' ', @ARGV; |
30 my $header = "/* Generated by \"$0 $args\" on $date */\n"; 31 | |
32 die $USAGE if not getopts('f:n'); 33 $type_field = $opt_f if $opt_f; | 36 die $USAGE if not getopts('f:n'); 37 $type_field = $opt_f if $opt_f; |
38 |
|
34 my %types; 35 my $arg; 36 while ($arg = shift @ARGV) { 37 last if $arg eq '--'; 38 $types{$arg} = 1; 39 } 40 my %out = ( 'types' => \%types ); 41 | 39 my %types; 40 my $arg; 41 while ($arg = shift @ARGV) { 42 last if $arg eq '--'; 43 $types{$arg} = 1; 44 } 45 my %out = ( 'types' => \%types ); 46 |
42 print $header; | 47 my %force_compose; 48 foreach my $comp (@force_compose) { 49 my ($lo,$hi) = @$comp; 50 for (my $ch = $lo; $ch <= $hi; ++$ch) { 51 $force_compose{$ch} = 1; 52 } 53 } 54 55 my $date = `date`; 56 chomp $date; 57 print "/* Generated by \"$0 $args\" on $date */\n"; 58 |
43 my $last_code = 0; 44 while (<>) { 45 chomp; 46 s/#.*//; 47 my @fields = split /;/; 48 next if not @fields; 49 my ($lo_code, $hi_code); 50 my $codes = $fields[0]; 51 if ($codes =~ /(\w+)\.\.(\w+)/) { 52 $lo_code = hex $1; 53 $hi_code = hex $2; 54 } else { | 59 my $last_code = 0; 60 while (<>) { 61 chomp; 62 s/#.*//; 63 my @fields = split /;/; 64 next if not @fields; 65 my ($lo_code, $hi_code); 66 my $codes = $fields[0]; 67 if ($codes =~ /(\w+)\.\.(\w+)/) { 68 $lo_code = hex $1; 69 $hi_code = hex $2; 70 } else { |
55 $lo_code = $hi_code = hex $fields[0]; | 71 $lo_code = $hi_code = hex $codes; |
56 } 57 my $type = $fields[$type_field]; 58 $type =~ s/\s//g; 59 for ($last_code = $lo_code; $last_code <= $hi_code; ++$last_code) { | 72 } 73 my $type = $fields[$type_field]; 74 $type =~ s/\s//g; 75 for ($last_code = $lo_code; $last_code <= $hi_code; ++$last_code) { |
60 $type = 'Zs' if $force_space{$last_code}; 61 output(\%out, $last_code, $type); | 76 output(\%out, $last_code, 77 $force_space{$last_code} ? 'Zs' : $force_compose{$last_code} ? 'Mn' : $type); |
62 } 63 } 64 output(\%out, $last_code); 65 return 1; 66} 67 68sub output { 69 my ($out, $code, $type) = @_; --- 27 unchanged lines hidden --- | 78 } 79 } 80 output(\%out, $last_code); 81 return 1; 82} 83 84sub output { 85 my ($out, $code, $type) = @_; --- 27 unchanged lines hidden --- |