mkutable (a63915c2d7ff177ce364488f86eff99949402051) mkutable (6f26c71d76bb795b30684affb3b57870a7926b26)
1#! /usr/bin/perl
2use strict;
3
4my $USAGE = <<__EOF__;
5 usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt
6 -n = take non-matching types
7 -f = zero-based type field (default 2)
8__EOF__
9
1#! /usr/bin/perl
2use strict;
3
4my $USAGE = <<__EOF__;
5 usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt
6 -n = take non-matching types
7 -f = zero-based type field (default 2)
8__EOF__
9
10use vars qw( $opt_f $opt_n );
11use Getopt::Std;
10use Getopt::Std;
11use vars qw( $opt_f $opt_n );
12
12my $type_field = 2;
13
14# Override Unicode tables for certain control chars
15# that are expected to be found in normal text files.
16my %force_space = (
17 0x08 => 1, # backspace
18 0x09 => 1, # tab
19 0x0a => 1, # newline
20 0x0c => 1, # form feed
21 0x0d => 1, # carriage return
22);
23
13my $type_field = 2;
14
15# Override Unicode tables for certain control chars
16# that are expected to be found in normal text files.
17my %force_space = (
18 0x08 => 1, # backspace
19 0x09 => 1, # tab
20 0x0a => 1, # newline
21 0x0c => 1, # form feed
22 0x0d => 1, # carriage return
23);
24
25# Hangul Jamo medial vowels and final consonants should be zero width.
26my @force_compose = (
27 [0x1160, 0x11ff],
28 [0xd7b0, 0xd7c6],
29 [0xd7cb, 0xd7fb]
30);
31
24exit (main() ? 0 : 1);
25
26sub main {
32exit (main() ? 0 : 1);
33
34sub main {
27 my $date = `date`;
28 chomp $date;
29 my $args = join ' ', @ARGV;
35 my $args = join ' ', @ARGV;
30 my $header = "/* Generated by \"$0 $args\" on $date */\n";
31
32 die $USAGE if not getopts('f:n');
33 $type_field = $opt_f if $opt_f;
36 die $USAGE if not getopts('f:n');
37 $type_field = $opt_f if $opt_f;
38
34 my %types;
35 my $arg;
36 while ($arg = shift @ARGV) {
37 last if $arg eq '--';
38 $types{$arg} = 1;
39 }
40 my %out = ( 'types' => \%types );
41
39 my %types;
40 my $arg;
41 while ($arg = shift @ARGV) {
42 last if $arg eq '--';
43 $types{$arg} = 1;
44 }
45 my %out = ( 'types' => \%types );
46
42 print $header;
47 my %force_compose;
48 foreach my $comp (@force_compose) {
49 my ($lo,$hi) = @$comp;
50 for (my $ch = $lo; $ch <= $hi; ++$ch) {
51 $force_compose{$ch} = 1;
52 }
53 }
54
55 my $date = `date`;
56 chomp $date;
57 print "/* Generated by \"$0 $args\" on $date */\n";
58
43 my $last_code = 0;
44 while (<>) {
45 chomp;
46 s/#.*//;
47 my @fields = split /;/;
48 next if not @fields;
49 my ($lo_code, $hi_code);
50 my $codes = $fields[0];
51 if ($codes =~ /(\w+)\.\.(\w+)/) {
52 $lo_code = hex $1;
53 $hi_code = hex $2;
54 } else {
59 my $last_code = 0;
60 while (<>) {
61 chomp;
62 s/#.*//;
63 my @fields = split /;/;
64 next if not @fields;
65 my ($lo_code, $hi_code);
66 my $codes = $fields[0];
67 if ($codes =~ /(\w+)\.\.(\w+)/) {
68 $lo_code = hex $1;
69 $hi_code = hex $2;
70 } else {
55 $lo_code = $hi_code = hex $fields[0];
71 $lo_code = $hi_code = hex $codes;
56 }
57 my $type = $fields[$type_field];
58 $type =~ s/\s//g;
59 for ($last_code = $lo_code; $last_code <= $hi_code; ++$last_code) {
72 }
73 my $type = $fields[$type_field];
74 $type =~ s/\s//g;
75 for ($last_code = $lo_code; $last_code <= $hi_code; ++$last_code) {
60 $type = 'Zs' if $force_space{$last_code};
61 output(\%out, $last_code, $type);
76 output(\%out, $last_code,
77 $force_space{$last_code} ? 'Zs' : $force_compose{$last_code} ? 'Mn' : $type);
62 }
63 }
64 output(\%out, $last_code);
65 return 1;
66}
67
68sub output {
69 my ($out, $code, $type) = @_;

--- 27 unchanged lines hidden ---
78 }
79 }
80 output(\%out, $last_code);
81 return 1;
82}
83
84sub output {
85 my ($out, $code, $type) = @_;

--- 27 unchanged lines hidden ---