1 #!/usr/local/bin/python 2 # -*- coding: iso-8859-1 -*- 3 4 # Id 5 6 # Copyright (c) 2004 Kungliga Tekniska Hgskolan 7 # (Royal Institute of Technology, Stockholm, Sweden). 8 # All rights reserved. 9 # 10 # Redistribution and use in source and binary forms, with or without 11 # modification, are permitted provided that the following conditions 12 # are met: 13 # 14 # 1. Redistributions of source code must retain the above copyright 15 # notice, this list of conditions and the following disclaimer. 16 # 17 # 2. Redistributions in binary form must reproduce the above copyright 18 # notice, this list of conditions and the following disclaimer in the 19 # documentation and/or other materials provided with the distribution. 20 # 21 # 3. Neither the name of the Institute nor the names of its contributors 22 # may be used to endorse or promote products derived from this software 23 # without specific prior written permission. 24 # 25 # THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 26 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 # ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 29 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 # SUCH DAMAGE. 36 37 import re 38 import string 39 import sys 40 41 import generate 42 import UnicodeData 43 import util 44 45 if len(sys.argv) != 4: 46 print("usage: %s UnicodeData.txt" 47 " CompositionExclusions-3.2.0.txt out-dir" % sys.argv[0]) 48 sys.exit(1) 49 50 ud = UnicodeData.read(sys.argv[1]) 51 52 def sortedKeys(d): 53 """Return a sorted list of the keys of a dict""" 54 keys = list(d.keys()) 55 keys.sort() 56 return keys 57 58 trans = dict([(k, [re.sub('<[a-zA-Z]+>', '', v[4]), v[0]]) 59 for k,v in ud.items() if v[4]]) 60 61 maxLength = 0 62 for v in trans.values(): 63 maxLength = max(maxLength, len(v[0].split())) 64 65 normalize_h = generate.Header('%s/normalize_table.h' % sys.argv[3]) 66 normalize_c = generate.Implementation('%s/normalize_table.c' % sys.argv[3]) 67 68 normalize_h.file.write( 69 ''' 70 #include <krb5-types.h> 71 72 #define MAX_LENGTH_CANON %u 73 74 struct translation { 75 uint32_t key; 76 unsigned short val_len; 77 unsigned short val_offset; 78 }; 79 80 extern const struct translation _wind_normalize_table[]; 81 82 extern const uint32_t _wind_normalize_val_table[]; 83 84 extern const size_t _wind_normalize_table_size; 85 86 struct canon_node { 87 uint32_t val; 88 unsigned char next_start; 89 unsigned char next_end; 90 unsigned short next_offset; 91 }; 92 93 extern const struct canon_node _wind_canon_table[]; 94 95 extern const unsigned short _wind_canon_next_table[]; 96 ''' % maxLength) 97 98 normalize_c.file.write( 99 ''' 100 #include "normalize_table.h" 101 #include <stdlib.h> 102 103 const struct translation _wind_normalize_table[] = { 104 ''') 105 106 normalizeValTable = [] 107 108 for k in sortedKeys(trans) : 109 v = trans[k] 110 (key, value, description) = k, v[0], v[1] 111 vec = [int(x, 0x10) for x in value.split()]; 112 offset = util.subList(normalizeValTable, vec) 113 if not offset: 114 offset = len(normalizeValTable) 115 normalizeValTable.extend(vec) # [("0x%s" % i) for i in vec]) 116 normalize_c.file.write(" {0x%x, %u, %u}, /* %s */\n" 117 % (key, len(vec), offset, description)) 118 119 normalize_c.file.write( 120 '''}; 121 122 ''') 123 124 normalize_c.file.write( 125 "const size_t _wind_normalize_table_size = %u;\n\n" % len(trans)) 126 127 normalize_c.file.write("const uint32_t _wind_normalize_val_table[] = {\n") 128 129 for v in normalizeValTable: 130 normalize_c.file.write(" 0x%x,\n" % v) 131 132 normalize_c.file.write("};\n\n"); 133 134 exclusions = UnicodeData.read(sys.argv[2]) 135 136 inv = dict([(''.join(["%05x" % int(x, 0x10) for x in v[4].split(' ')]), 137 [k, v[0]]) 138 for k,v in ud.items() 139 if v[4] and not re.search('<[a-zA-Z]+> *', v[4]) and not k in exclusions]) 140 141 table = 0 142 143 tables = {} 144 145 def createTable(): 146 """add a new table""" 147 global table, tables 148 ret = table 149 table += 1 150 tables[ret] = [0] + [None] * 16 151 return ret 152 153 def add(table, k, v): 154 """add an entry (k, v) to table (recursively)""" 155 if len(k) == 0: 156 table[0] = v[0] 157 else: 158 i = int(k[0], 0x10) + 1 159 if table[i] == None: 160 table[i] = createTable() 161 add(tables[table[i]], k[1:], v) 162 163 top = createTable() 164 165 for k,v in inv.items(): 166 add(tables[top], k, v) 167 168 next_table = [] 169 tableToNext = {} 170 tableEnd = {} 171 tableStart = {} 172 173 for k in sortedKeys(tables) : 174 t = tables[k] 175 tableToNext[k] = len(next_table) 176 l = t[1:] 177 start = 0 178 while start < 16 and l[start] == None: 179 start += 1 180 end = 16 181 while end > start and l[end - 1] == None: 182 end -= 1 183 tableStart[k] = start 184 tableEnd[k] = end 185 n = [] 186 for i in range(start, end): 187 x = l[i] 188 if x: 189 n.append(x) 190 else: 191 n.append(0) 192 next_table.extend(n) 193 194 normalize_c.file.write("const struct canon_node _wind_canon_table[] = {\n") 195 196 for k in sortedKeys(tables) : 197 t = tables[k] 198 normalize_c.file.write(" {0x%x, %u, %u, %u},\n" % 199 (t[0], tableStart[k], tableEnd[k], tableToNext[k])) 200 201 normalize_c.file.write("};\n\n") 202 203 normalize_c.file.write("const unsigned short _wind_canon_next_table[] = {\n") 204 205 for k in next_table: 206 normalize_c.file.write(" %u,\n" % k) 207 208 normalize_c.file.write("};\n\n") 209 210 normalize_h.close() 211 normalize_c.close() 212