xref: /freebsd/contrib/less/mkutable (revision f6b74a7d164b5fada266d00e723155a178a4529f)
1a15691bfSXin LI#! /usr/bin/perl
2a15691bfSXin LIuse strict;
3a15691bfSXin LI
4a15691bfSXin LImy $USAGE = <<__EOF__;
5a15691bfSXin LI   usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt
6a15691bfSXin LI          -n = take non-matching types
7a15691bfSXin LI      -f = zero-based type field (default 2)
8a15691bfSXin LI__EOF__
9a15691bfSXin LI
10a15691bfSXin LIuse vars qw( $opt_f $opt_n );
11a15691bfSXin LIuse Getopt::Std;
12a15691bfSXin LImy $type_field = 2;
13a15691bfSXin LI
14*f6b74a7dSXin LIexit (main() ? 0 : 1);
15a15691bfSXin LI
16a15691bfSXin LIsub main {
17a15691bfSXin LI    my $date = `date`;
18a15691bfSXin LI    chomp $date;
19a15691bfSXin LI    my $args = join ' ', @ARGV;
20a15691bfSXin LI    my $header = "/* Generated by \"$0 $args\" on $date */\n";
21a15691bfSXin LI
22a15691bfSXin LI    die $USAGE if not getopts('f:n');
23a15691bfSXin LI    $type_field = $opt_f if $opt_f;
24a15691bfSXin LI    my %types;
25a15691bfSXin LI    my $arg;
26a15691bfSXin LI    while ($arg = shift @ARGV) {
27a15691bfSXin LI        last if $arg eq '--';
28a15691bfSXin LI        $types{$arg} = 1;
29a15691bfSXin LI    }
30a15691bfSXin LI    my %out = ( 'types' => \%types );
31a15691bfSXin LI
32a15691bfSXin LI    print $header;
33*f6b74a7dSXin LI    my $last_code = 0;
34a15691bfSXin LI    while (<>) {
35a15691bfSXin LI        chomp;
36a15691bfSXin LI        s/#.*//;
37a15691bfSXin LI        my @fields = split /;/;
38a15691bfSXin LI        next if not @fields;
39*f6b74a7dSXin LI        my ($lo_code, $hi_code);
40*f6b74a7dSXin LI        my $codes = $fields[0];
41*f6b74a7dSXin LI        if ($codes =~ /(\w+)\.\.(\w+)/) {
42*f6b74a7dSXin LI            $lo_code = hex $1;
43*f6b74a7dSXin LI            $hi_code = hex $2;
44*f6b74a7dSXin LI        } else {
45*f6b74a7dSXin LI            $lo_code = $hi_code = hex $fields[0];
46*f6b74a7dSXin LI        }
47a15691bfSXin LI        my $type = $fields[$type_field];
48a15691bfSXin LI        $type =~ s/\s//g;
49*f6b74a7dSXin LI        for ($last_code = $lo_code; $last_code <= $hi_code; ++$last_code) {
50*f6b74a7dSXin LI            output(\%out, $last_code, $type);
51a15691bfSXin LI        }
52a15691bfSXin LI    }
53*f6b74a7dSXin LI    output(\%out, $last_code);
54*f6b74a7dSXin LI    return 1;
55a15691bfSXin LI}
56a15691bfSXin LI
57a15691bfSXin LIsub output {
58a15691bfSXin LI    my ($out, $code, $type) = @_;
59*f6b74a7dSXin LI    my $type_ok = ($type and ${${$out}{types}}{$type});
60*f6b74a7dSXin LI    $type_ok = not $type_ok if $opt_n;
61*f6b74a7dSXin LI    my $prev_code = $$out{prev_code};
62*f6b74a7dSXin LI
63*f6b74a7dSXin LI    if (not $type_ok) {
64*f6b74a7dSXin LI        end_run($out, $prev_code);
65*f6b74a7dSXin LI    } elsif (not $$out{in_run} or $type ne $$out{run_type} or $code != $prev_code+1) {
66*f6b74a7dSXin LI        end_run($out, $prev_code);
67a15691bfSXin LI        start_run($out, $code, $type);
68a15691bfSXin LI    }
69*f6b74a7dSXin LI    $$out{prev_code} = $code;
70a15691bfSXin LI}
71a15691bfSXin LI
72a15691bfSXin LIsub start_run {
73a15691bfSXin LI    my ($out, $code, $type) = @_;
74a15691bfSXin LI    $$out{start_code} = $code;
75*f6b74a7dSXin LI    $$out{prev_code} = $code;
76*f6b74a7dSXin LI    $$out{run_type} = $type;
77a15691bfSXin LI    $$out{in_run} = 1;
78a15691bfSXin LI}
79a15691bfSXin LI
80a15691bfSXin LIsub end_run {
81a15691bfSXin LI    my ($out, $code) = @_;
82a15691bfSXin LI    return if not $$out{in_run};
83*f6b74a7dSXin LI    printf "\t{ 0x%04x, 0x%04x }, /* %s */\n", $$out{start_code}, $code, $$out{run_type};
84a15691bfSXin LI    $$out{in_run} = 0;
85a15691bfSXin LI}
86