xref: /illumos-gate/usr/src/data/locale/tools/mkwidths.py (revision e07d85f87c3920e032adb855fdc500e4616c7718)
1#!/bin/python
2"""
3
4This file and its contents are supplied under the terms of the
5Common Development and Distribution License ("CDDL"), version 1.0.
6You may only use this file in accordance with the terms of version
71.0 of the CDDL.
8
9A full copy of the text of the CDDL should have accompanied this
10source.  A copy of the CDDL is also available via the Internet at
11http://www.illumos.org/license/CDDL.
12
13Copyright 2013 DEY Storage Systems, Inc.
14
15Scratch script to produce the widths.cm content from the widths text
16files.  It converts numeric unicode to symbolic forms.
17"""
18
19SYMBOLS = {}
20
21
22def u8_str(val):
23    """
24    Convert a numeric value to a string representing the UTF-8 encoding
25    of the numeric value, which should be a valid Unicode code point.
26    """
27    u8str = unichr(val).encode('utf-8')
28    idx = 0
29    out = ""
30    while idx < len(u8str):
31        out += "\\x%X" % ord(u8str[idx])
32        idx += 1
33    return out
34
35
36def load_utf8():
37    """
38    This function loads the UTF-8 character map file, loading the symbols
39    and the numeric values.  The result goes into the global SYMBOLS array.
40    """
41    lines = open("UTF-8.cm").readlines()
42    for line in lines:
43        items = line.split()
44        if (len(items) != 2) or items[0].startswith("#"):
45            continue
46        (sym, val) = (items[0], items[1])
47        SYMBOLS[val] = sym
48
49
50def do_width_file(width, filename):
51    """
52    This function takes a file pairs of unicode values (hex), each of
53    which is a range of unicode values, that all have the given width.
54    """
55    for line in open(filename).readlines():
56        if line.startswith("#"):
57            continue
58        vals = line.split()
59        while len(vals) > 1:
60            start = int(vals[0], 16)
61            end = int(vals[1], 16)
62            val = start
63            while val <= end:
64                key = u8_str(val)
65                val += 1
66                sym = SYMBOLS.get(key, None)
67                if sym == None:
68                    continue
69                print "%s\t%d" % (sym, width)
70            vals = vals[2:]
71
72
73if __name__ == "__main__":
74    print "WIDTH"
75    load_utf8()
76    do_width_file(0, "widths-0.txt")
77    do_width_file(2, "widths-2.txt")
78    print "END WIDTH"
79