xref: /freebsd/contrib/less/mkutable (revision fafb1ee7bdc5d8a7d07cd03b2fb0bbb76f7a9d7c)
1#! /usr/bin/perl
2use strict;
3
4my $USAGE = <<__EOF__;
5   usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt
6          -n = take non-matching types
7	  -f = zero-based type field (default 2)
8__EOF__
9
10use vars qw( $opt_f $opt_n );
11use Getopt::Std;
12my $type_field = 2;
13
14exit (main() ? 1 : 0);
15
16sub main {
17    my $date = `date`;
18    chomp $date;
19    my $args = join ' ', @ARGV;
20    my $header = "/* Generated by \"$0 $args\" on $date */\n";
21
22    die $USAGE if not getopts('f:n');
23    $type_field = $opt_f if $opt_f;
24    my %types;
25    my $arg;
26    while ($arg = shift @ARGV) {
27        last if $arg eq '--';
28        $types{$arg} = 1;
29    }
30    my %out = ( 'types' => \%types );
31    my $last_code = 0;
32
33    print $header;
34    while (<>) {
35        chomp;
36        s/#.*//;
37        my @fields = split /;/;
38        next if not @fields;
39        my $code = hex $fields[0];
40        my $type = $fields[$type_field];
41        $type =~ s/\s//g;
42        while (++$last_code < $code) {
43            output(\%out, $last_code, '?');
44        }
45        output(\%out, $code, $type);
46    }
47    output(\%out, $last_code+1, '?');
48}
49
50sub output {
51    my ($out, $code, $type) = @_;
52    my $match = ${${$out}{types}}{$type};
53    my $type_change = (not $$out{start_type} or $type ne $$out{start_type});
54    $match = not $match if $opt_n;
55    if ($match and (not $$out{in_run} or $type_change)) {
56        end_run($out, $code-1);
57        start_run($out, $code, $type);
58    } elsif (not $match and $$out{in_run}) {
59        end_run($out, $code-1);
60    }
61}
62
63sub start_run {
64    my ($out, $code, $type) = @_;
65    $$out{start_code} = $code;
66    $$out{start_type} = $type;
67    $$out{in_run} = 1;
68}
69
70sub end_run {
71    my ($out, $code) = @_;
72    return if not $$out{in_run};
73    printf "\t{ 0x%04x, 0x%04x }, /* %s */\n", $$out{start_code}, $code, $$out{start_type};
74    $$out{in_run} = 0;
75}
76