1#!/usr/local/bin/python 2# -*- coding: iso-8859-1 -*- 3 4# $Id$ 5 6# Copyright (c) 2004 Kungliga Tekniska Högskolan 7# (Royal Institute of Technology, Stockholm, Sweden). 8# All rights reserved. 9# 10# Redistribution and use in source and binary forms, with or without 11# modification, are permitted provided that the following conditions 12# are met: 13# 14# 1. Redistributions of source code must retain the above copyright 15# notice, this list of conditions and the following disclaimer. 16# 17# 2. Redistributions in binary form must reproduce the above copyright 18# notice, this list of conditions and the following disclaimer in the 19# documentation and/or other materials provided with the distribution. 20# 21# 3. Neither the name of the Institute nor the names of its contributors 22# may be used to endorse or promote products derived from this software 23# without specific prior written permission. 24# 25# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 26# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 29# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35# SUCH DAMAGE. 36 37import re 38import string 39import sys 40 41import generate 42import UnicodeData 43import util 44 45if len(sys.argv) != 4: 46 print "usage: %s UnicodeData.txt" 47 " CompositionExclusions-3.2.0.txt out-dir" % sys.argv[0] 48 sys.exit(1) 49 50ud = UnicodeData.read(sys.argv[1]) 51 52def sortedKeys(d): 53 """Return a sorted list of the keys of a dict""" 54 keys = d.keys() 55 keys.sort() 56 return keys 57 58trans = dict([(k, [re.sub('<[a-zA-Z]+>', '', v[4]), v[0]]) 59 for k,v in ud.items() if v[4]]) 60 61maxLength = 0 62for v in trans.values(): 63 maxLength = max(maxLength, len(v[0].split())) 64 65normalize_h = generate.Header('%s/normalize_table.h' % sys.argv[3]) 66normalize_c = generate.Implementation('%s/normalize_table.c' % sys.argv[3]) 67 68normalize_h.file.write( 69''' 70#include <krb5-types.h> 71 72#define MAX_LENGTH_CANON %u 73 74struct translation { 75 uint32_t key; 76 unsigned short val_len; 77 unsigned short val_offset; 78}; 79 80extern const struct translation _wind_normalize_table[]; 81 82extern const uint32_t _wind_normalize_val_table[]; 83 84extern const size_t _wind_normalize_table_size; 85 86struct canon_node { 87 uint32_t val; 88 unsigned char next_start; 89 unsigned char next_end; 90 unsigned short next_offset; 91}; 92 93extern const struct canon_node _wind_canon_table[]; 94 95extern const unsigned short _wind_canon_next_table[]; 96''' % maxLength) 97 98normalize_c.file.write( 99''' 100#include <stdlib.h> 101#include "normalize_table.h" 102 103const struct translation _wind_normalize_table[] = { 104''') 105 106normalizeValTable = [] 107 108for k in sortedKeys(trans) : 109 v = trans[k] 110 (key, value, description) = k, v[0], v[1] 111 vec = [int(x, 0x10) for x in value.split()]; 112 offset = util.subList(normalizeValTable, vec) 113 if not offset: 114 offset = len(normalizeValTable) 115 normalizeValTable.extend(vec) # [("0x%s" % i) for i in vec]) 116 normalize_c.file.write(" {0x%x, %u, %u}, /* %s */\n" 117 % (key, len(vec), offset, description)) 118 119normalize_c.file.write( 120'''}; 121 122''') 123 124normalize_c.file.write( 125 "const size_t _wind_normalize_table_size = %u;\n\n" % len(trans)) 126 127normalize_c.file.write("const uint32_t _wind_normalize_val_table[] = {\n") 128 129for v in normalizeValTable: 130 normalize_c.file.write(" 0x%x,\n" % v) 131 132normalize_c.file.write("};\n\n"); 133 134exclusions = UnicodeData.read(sys.argv[2]) 135 136inv = dict([(''.join(["%05x" % int(x, 0x10) for x in v[4].split(' ')]), 137 [k, v[0]]) 138 for k,v in ud.items() 139 if v[4] and not re.search('<[a-zA-Z]+> *', v[4]) and not exclusions.has_key(k)]) 140 141table = 0 142 143tables = {} 144 145def createTable(): 146 """add a new table""" 147 global table, tables 148 ret = table 149 table += 1 150 tables[ret] = [0] + [None] * 16 151 return ret 152 153def add(table, k, v): 154 """add an entry (k, v) to table (recursively)""" 155 if len(k) == 0: 156 table[0] = v[0] 157 else: 158 i = int(k[0], 0x10) + 1 159 if table[i] == None: 160 table[i] = createTable() 161 add(tables[table[i]], k[1:], v) 162 163top = createTable() 164 165for k,v in inv.items(): 166 add(tables[top], k, v) 167 168next_table = [] 169tableToNext = {} 170tableEnd = {} 171tableStart = {} 172 173for k in sortedKeys(tables) : 174 t = tables[k] 175 tableToNext[k] = len(next_table) 176 l = t[1:] 177 start = 0 178 while start < 16 and l[start] == None: 179 start += 1 180 end = 16 181 while end > start and l[end - 1] == None: 182 end -= 1 183 tableStart[k] = start 184 tableEnd[k] = end 185 n = [] 186 for i in range(start, end): 187 x = l[i] 188 if x: 189 n.append(x) 190 else: 191 n.append(0) 192 next_table.extend(n) 193 194normalize_c.file.write("const struct canon_node _wind_canon_table[] = {\n") 195 196for k in sortedKeys(tables) : 197 t = tables[k] 198 normalize_c.file.write(" {0x%x, %u, %u, %u},\n" % 199 (t[0], tableStart[k], tableEnd[k], tableToNext[k])) 200 201normalize_c.file.write("};\n\n") 202 203normalize_c.file.write("const unsigned short _wind_canon_next_table[] = {\n") 204 205for k in next_table: 206 normalize_c.file.write(" %u,\n" % k) 207 208normalize_c.file.write("};\n\n") 209 210normalize_h.close() 211normalize_c.close() 212