xref: /titanic_44/usr/src/cmd/localedef/mkwidths.py (revision 2da1cd3a39e2d3da7f9d15071ea9462919c011ac)
1*2da1cd3aSGarrett D'Amore#!/bin/python
2*2da1cd3aSGarrett D'Amore"""
3*2da1cd3aSGarrett D'Amore
4*2da1cd3aSGarrett D'AmoreThis file and its contents are supplied under the terms of the
5*2da1cd3aSGarrett D'AmoreCommon Development and Distribution License ("CDDL"), version 1.0.
6*2da1cd3aSGarrett D'AmoreYou may only use this file in accordance with the terms of version
7*2da1cd3aSGarrett D'Amore1.0 of the CDDL.
8*2da1cd3aSGarrett D'Amore
9*2da1cd3aSGarrett D'AmoreA full copy of the text of the CDDL should have accompanied this
10*2da1cd3aSGarrett D'Amoresource.  A copy of the CDDL is also available via the Internet at
11*2da1cd3aSGarrett D'Amorehttp://www.illumos.org/license/CDDL.
12*2da1cd3aSGarrett D'Amore
13*2da1cd3aSGarrett D'AmoreCopyright 2013 DEY Storage Systems, Inc.
14*2da1cd3aSGarrett D'Amore
15*2da1cd3aSGarrett D'AmoreScratch script to produce the widths.cm content from the widths text
16*2da1cd3aSGarrett D'Amorefiles.  It converts numeric unicode to symbolic forms.
17*2da1cd3aSGarrett D'Amore"""
18*2da1cd3aSGarrett D'Amore
19*2da1cd3aSGarrett D'AmoreSYMBOLS = {}
20*2da1cd3aSGarrett D'Amore
21*2da1cd3aSGarrett D'Amore
22*2da1cd3aSGarrett D'Amoredef u8_str(val):
23*2da1cd3aSGarrett D'Amore    """
24*2da1cd3aSGarrett D'Amore    Convert a numeric value to a string representing the UTF-8 encoding
25*2da1cd3aSGarrett D'Amore    of the numeric value, which should be a valid Unicode code point.
26*2da1cd3aSGarrett D'Amore    """
27*2da1cd3aSGarrett D'Amore    u8str = unichr(val).encode('utf-8')
28*2da1cd3aSGarrett D'Amore    idx = 0
29*2da1cd3aSGarrett D'Amore    out = ""
30*2da1cd3aSGarrett D'Amore    while idx < len(u8str):
31*2da1cd3aSGarrett D'Amore        out += "\\x%X" % ord(u8str[idx])
32*2da1cd3aSGarrett D'Amore        idx += 1
33*2da1cd3aSGarrett D'Amore    return out
34*2da1cd3aSGarrett D'Amore
35*2da1cd3aSGarrett D'Amore
36*2da1cd3aSGarrett D'Amoredef load_utf8():
37*2da1cd3aSGarrett D'Amore    """
38*2da1cd3aSGarrett D'Amore    This function loads the UTF-8 character map file, loading the symbols
39*2da1cd3aSGarrett D'Amore    and the numeric values.  The result goes into the global SYMBOLS array.
40*2da1cd3aSGarrett D'Amore    """
41*2da1cd3aSGarrett D'Amore    lines = open("UTF-8.cm").readlines()
42*2da1cd3aSGarrett D'Amore    for line in lines:
43*2da1cd3aSGarrett D'Amore        items = line.split()
44*2da1cd3aSGarrett D'Amore        if (len(items) != 2) or items[0].startswith("#"):
45*2da1cd3aSGarrett D'Amore            continue
46*2da1cd3aSGarrett D'Amore        (sym, val) = (items[0], items[1])
47*2da1cd3aSGarrett D'Amore        SYMBOLS[val] = sym
48*2da1cd3aSGarrett D'Amore
49*2da1cd3aSGarrett D'Amore
50*2da1cd3aSGarrett D'Amoredef do_width_file(width, filename):
51*2da1cd3aSGarrett D'Amore    """
52*2da1cd3aSGarrett D'Amore    This function takes a file pairs of unicode values (hex), each of
53*2da1cd3aSGarrett D'Amore    which is a range of unicode values, that all have the given width.
54*2da1cd3aSGarrett D'Amore    """
55*2da1cd3aSGarrett D'Amore    for line in open(filename).readlines():
56*2da1cd3aSGarrett D'Amore        if line.startswith("#"):
57*2da1cd3aSGarrett D'Amore            continue
58*2da1cd3aSGarrett D'Amore        vals = line.split()
59*2da1cd3aSGarrett D'Amore        while len(vals) > 1:
60*2da1cd3aSGarrett D'Amore            start = int(vals[0], 16)
61*2da1cd3aSGarrett D'Amore            end = int(vals[1], 16)
62*2da1cd3aSGarrett D'Amore            val = start
63*2da1cd3aSGarrett D'Amore            while val <= end:
64*2da1cd3aSGarrett D'Amore                key = u8_str(val)
65*2da1cd3aSGarrett D'Amore                val += 1
66*2da1cd3aSGarrett D'Amore                sym = SYMBOLS.get(key, None)
67*2da1cd3aSGarrett D'Amore                if sym == None:
68*2da1cd3aSGarrett D'Amore                    continue
69*2da1cd3aSGarrett D'Amore                print "%s\t%d" % (sym, width)
70*2da1cd3aSGarrett D'Amore            vals = vals[2:]
71*2da1cd3aSGarrett D'Amore
72*2da1cd3aSGarrett D'Amore
73*2da1cd3aSGarrett D'Amoreif __name__ == "__main__":
74*2da1cd3aSGarrett D'Amore    print "WIDTH"
75*2da1cd3aSGarrett D'Amore    load_utf8()
76*2da1cd3aSGarrett D'Amore    do_width_file(0, "widths-0.txt")
77*2da1cd3aSGarrett D'Amore    do_width_file(2, "widths-2.txt")
78*2da1cd3aSGarrett D'Amore    print "END WIDTH"
79