xref: /freebsd/contrib/less/mkutable (revision d93a896ef95946b0bf1219866fcb324b78543444)
1#! /usr/bin/perl
2use strict;
3
4my $USAGE = <<__EOF__;
5   usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt
6          -n = take non-matching types
7      -f = zero-based type field (default 2)
8__EOF__
9
10use vars qw( $opt_f $opt_n );
11use Getopt::Std;
12my $type_field = 2;
13
14exit (main() ? 0 : 1);
15
16sub main {
17    my $date = `date`;
18    chomp $date;
19    my $args = join ' ', @ARGV;
20    my $header = "/* Generated by \"$0 $args\" on $date */\n";
21
22    die $USAGE if not getopts('f:n');
23    $type_field = $opt_f if $opt_f;
24    my %types;
25    my $arg;
26    while ($arg = shift @ARGV) {
27        last if $arg eq '--';
28        $types{$arg} = 1;
29    }
30    my %out = ( 'types' => \%types );
31
32    print $header;
33    my $last_code = 0;
34    while (<>) {
35        chomp;
36        s/#.*//;
37        my @fields = split /;/;
38        next if not @fields;
39        my ($lo_code, $hi_code);
40        my $codes = $fields[0];
41        if ($codes =~ /(\w+)\.\.(\w+)/) {
42            $lo_code = hex $1;
43            $hi_code = hex $2;
44        } else {
45            $lo_code = $hi_code = hex $fields[0];
46        }
47        my $type = $fields[$type_field];
48        $type =~ s/\s//g;
49        for ($last_code = $lo_code; $last_code <= $hi_code; ++$last_code) {
50            output(\%out, $last_code, $type);
51        }
52    }
53    output(\%out, $last_code);
54    return 1;
55}
56
57sub output {
58    my ($out, $code, $type) = @_;
59    my $type_ok = ($type and ${${$out}{types}}{$type});
60    $type_ok = not $type_ok if $opt_n;
61    my $prev_code = $$out{prev_code};
62
63    if (not $type_ok) {
64        end_run($out, $prev_code);
65    } elsif (not $$out{in_run} or $type ne $$out{run_type} or $code != $prev_code+1) {
66        end_run($out, $prev_code);
67        start_run($out, $code, $type);
68    }
69    $$out{prev_code} = $code;
70}
71
72sub start_run {
73    my ($out, $code, $type) = @_;
74    $$out{start_code} = $code;
75    $$out{prev_code} = $code;
76    $$out{run_type} = $type;
77    $$out{in_run} = 1;
78}
79
80sub end_run {
81    my ($out, $code) = @_;
82    return if not $$out{in_run};
83    printf "\t{ 0x%04x, 0x%04x }, /* %s */\n", $$out{start_code}, $code, $$out{run_type};
84    $$out{in_run} = 0;
85}
86