cmd/localedef/mkwidths.py

*2da1cd3aSGarrett D'Amore#!/bin/python
*2da1cd3aSGarrett D'Amore"""
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'AmoreThis file and its contents are supplied under the terms of the
*2da1cd3aSGarrett D'AmoreCommon Development and Distribution License ("CDDL"), version 1.0.
*2da1cd3aSGarrett D'AmoreYou may only use this file in accordance with the terms of version
*2da1cd3aSGarrett D'Amore1.0 of the CDDL.
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'AmoreA full copy of the text of the CDDL should have accompanied this
*2da1cd3aSGarrett D'Amoresource.  A copy of the CDDL is also available via the Internet at
*2da1cd3aSGarrett D'Amorehttp://www.illumos.org/license/CDDL.
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'AmoreCopyright 2013 DEY Storage Systems, Inc.
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'AmoreScratch script to produce the widths.cm content from the widths text
*2da1cd3aSGarrett D'Amorefiles.  It converts numeric unicode to symbolic forms.
*2da1cd3aSGarrett D'Amore"""
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'AmoreSYMBOLS = {}
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'Amoredef u8_str(val):
*2da1cd3aSGarrett D'Amore    """
*2da1cd3aSGarrett D'Amore    Convert a numeric value to a string representing the UTF-8 encoding
*2da1cd3aSGarrett D'Amore    of the numeric value, which should be a valid Unicode code point.
*2da1cd3aSGarrett D'Amore    """
*2da1cd3aSGarrett D'Amore    u8str = unichr(val).encode('utf-8')
*2da1cd3aSGarrett D'Amore    idx = 0
*2da1cd3aSGarrett D'Amore    out = ""
*2da1cd3aSGarrett D'Amore    while idx < len(u8str):
*2da1cd3aSGarrett D'Amore        out += "\\x%X" % ord(u8str[idx])
*2da1cd3aSGarrett D'Amore        idx += 1
*2da1cd3aSGarrett D'Amore    return out
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'Amoredef load_utf8():
*2da1cd3aSGarrett D'Amore    """
*2da1cd3aSGarrett D'Amore    This function loads the UTF-8 character map file, loading the symbols
*2da1cd3aSGarrett D'Amore    and the numeric values.  The result goes into the global SYMBOLS array.
*2da1cd3aSGarrett D'Amore    """
*2da1cd3aSGarrett D'Amore    lines = open("UTF-8.cm").readlines()
*2da1cd3aSGarrett D'Amore    for line in lines:
*2da1cd3aSGarrett D'Amore        items = line.split()
*2da1cd3aSGarrett D'Amore        if (len(items) != 2) or items[0].startswith("#"):
*2da1cd3aSGarrett D'Amore            continue
*2da1cd3aSGarrett D'Amore        (sym, val) = (items[0], items[1])
*2da1cd3aSGarrett D'Amore        SYMBOLS[val] = sym
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'Amoredef do_width_file(width, filename):
*2da1cd3aSGarrett D'Amore    """
*2da1cd3aSGarrett D'Amore    This function takes a file pairs of unicode values (hex), each of
*2da1cd3aSGarrett D'Amore    which is a range of unicode values, that all have the given width.
*2da1cd3aSGarrett D'Amore    """
*2da1cd3aSGarrett D'Amore    for line in open(filename).readlines():
*2da1cd3aSGarrett D'Amore        if line.startswith("#"):
*2da1cd3aSGarrett D'Amore            continue
*2da1cd3aSGarrett D'Amore        vals = line.split()
*2da1cd3aSGarrett D'Amore        while len(vals) > 1:
*2da1cd3aSGarrett D'Amore            start = int(vals[0], 16)
*2da1cd3aSGarrett D'Amore            end = int(vals[1], 16)
*2da1cd3aSGarrett D'Amore            val = start
*2da1cd3aSGarrett D'Amore            while val <= end:
*2da1cd3aSGarrett D'Amore                key = u8_str(val)
*2da1cd3aSGarrett D'Amore                val += 1
*2da1cd3aSGarrett D'Amore                sym = SYMBOLS.get(key, None)
*2da1cd3aSGarrett D'Amore                if sym == None:
*2da1cd3aSGarrett D'Amore                    continue
*2da1cd3aSGarrett D'Amore                print "%s\t%d" % (sym, width)
*2da1cd3aSGarrett D'Amore            vals = vals[2:]
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'Amore
*2da1cd3aSGarrett D'Amoreif __name__ == "__main__":
*2da1cd3aSGarrett D'Amore    print "WIDTH"
*2da1cd3aSGarrett D'Amore    load_utf8()
*2da1cd3aSGarrett D'Amore    do_width_file(0, "widths-0.txt")
*2da1cd3aSGarrett D'Amore    do_width_file(2, "widths-2.txt")
*2da1cd3aSGarrett D'Amore    print "END WIDTH"