1#!/bin/python 2""" 3 4This file and its contents are supplied under the terms of the 5Common Development and Distribution License ("CDDL"), version 1.0. 6You may only use this file in accordance with the terms of version 71.0 of the CDDL. 8 9A full copy of the text of the CDDL should have accompanied this 10source. A copy of the CDDL is also available via the Internet at 11http://www.illumos.org/license/CDDL. 12 13Copyright 2013 DEY Storage Systems, Inc. 14 15Scratch script to produce the widths.cm content from the widths text 16files. It converts numeric unicode to symbolic forms. 17""" 18 19SYMBOLS = {} 20 21 22def u8_str(val): 23 """ 24 Convert a numeric value to a string representing the UTF-8 encoding 25 of the numeric value, which should be a valid Unicode code point. 26 """ 27 u8str = unichr(val).encode('utf-8') 28 idx = 0 29 out = "" 30 while idx < len(u8str): 31 out += "\\x%X" % ord(u8str[idx]) 32 idx += 1 33 return out 34 35 36def load_utf8(): 37 """ 38 This function loads the UTF-8 character map file, loading the symbols 39 and the numeric values. The result goes into the global SYMBOLS array. 40 """ 41 lines = open("UTF-8.cm").readlines() 42 for line in lines: 43 items = line.split() 44 if (len(items) != 2) or items[0].startswith("#"): 45 continue 46 (sym, val) = (items[0], items[1]) 47 SYMBOLS[val] = sym 48 49 50def do_width_file(width, filename): 51 """ 52 This function takes a file pairs of unicode values (hex), each of 53 which is a range of unicode values, that all have the given width. 54 """ 55 for line in open(filename).readlines(): 56 if line.startswith("#"): 57 continue 58 vals = line.split() 59 while len(vals) > 1: 60 start = int(vals[0], 16) 61 end = int(vals[1], 16) 62 val = start 63 while val <= end: 64 key = u8_str(val) 65 val += 1 66 sym = SYMBOLS.get(key, None) 67 if sym == None: 68 continue 69 print "%s\t%d" % (sym, width) 70 vals = vals[2:] 71 72 73if __name__ == "__main__": 74 print "WIDTH" 75 load_utf8() 76 do_width_file(0, "widths-0.txt") 77 do_width_file(2, "widths-2.txt") 78 print "END WIDTH" 79