xref: /illumos-gate/usr/src/data/locale/tools/mkwidths.py (revision d70bcb7258b79267aad36309c42fd499e844458f)
1#!/bin/python
2"""
3
4This file and its contents are supplied under the terms of the
5Common Development and Distribution License ("CDDL"), version 1.0.
6You may only use this file in accordance with the terms of version
71.0 of the CDDL.
8
9A full copy of the text of the CDDL should have accompanied this
10source.  A copy of the CDDL is also available via the Internet at
11http://www.illumos.org/license/CDDL.
12
13Copyright 2013 DEY Storage Systems, Inc.
14
15Scratch script to produce the widths.cm content from the widths text
16files.  It converts numeric unicode to symbolic forms.
17"""
18
19# Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
20
21from __future__ import print_function
22
23SYMBOLS = {}
24
25
26def u8_str(val):
27    """
28    Convert a numeric value to a string representing the UTF-8 encoding
29    of the numeric value, which should be a valid Unicode code point.
30    """
31    u8str = unichr(val).encode('utf-8')
32    idx = 0
33    out = ""
34    while idx < len(u8str):
35        out += "\\x%X" % ord(u8str[idx])
36        idx += 1
37    return out
38
39
40def load_utf8():
41    """
42    This function loads the UTF-8 character map file, loading the symbols
43    and the numeric values.  The result goes into the global SYMBOLS array.
44    """
45    lines = open("UTF-8.cm").readlines()
46    for line in lines:
47        items = line.split()
48        if (len(items) != 2) or items[0].startswith("#"):
49            continue
50        (sym, val) = (items[0], items[1])
51        SYMBOLS[val] = sym
52
53
54def do_width_file(width, filename):
55    """
56    This function takes a file pairs of unicode values (hex), each of
57    which is a range of unicode values, that all have the given width.
58    """
59    for line in open(filename).readlines():
60        if line.startswith("#"):
61            continue
62        vals = line.split()
63        while len(vals) > 1:
64            start = int(vals[0], 16)
65            end = int(vals[1], 16)
66            val = start
67            while val <= end:
68                key = u8_str(val)
69                val += 1
70                sym = SYMBOLS.get(key, None)
71                if sym == None:
72                    continue
73                print("%s\t%d" % (sym, width))
74            vals = vals[2:]
75
76
77if __name__ == "__main__":
78    print("WIDTH")
79    load_utf8()
80    do_width_file(0, "widths-0.txt")
81    do_width_file(2, "widths-2.txt")
82    print("END WIDTH")
83