1 1.1 elric #!/usr/local/bin/python 2 1.1 elric # -*- coding: iso-8859-1 -*- 3 1.1 elric 4 1.1.1.2 elric # Id 5 1.1 elric 6 1.1 elric # Copyright (c) 2004 Kungliga Tekniska Hgskolan 7 1.1 elric # (Royal Institute of Technology, Stockholm, Sweden). 8 1.1 elric # All rights reserved. 9 1.1 elric # 10 1.1 elric # Redistribution and use in source and binary forms, with or without 11 1.1 elric # modification, are permitted provided that the following conditions 12 1.1 elric # are met: 13 1.1 elric # 14 1.1 elric # 1. Redistributions of source code must retain the above copyright 15 1.1 elric # notice, this list of conditions and the following disclaimer. 16 1.1 elric # 17 1.1 elric # 2. Redistributions in binary form must reproduce the above copyright 18 1.1 elric # notice, this list of conditions and the following disclaimer in the 19 1.1 elric # documentation and/or other materials provided with the distribution. 20 1.1 elric # 21 1.1 elric # 3. Neither the name of the Institute nor the names of its contributors 22 1.1 elric # may be used to endorse or promote products derived from this software 23 1.1 elric # without specific prior written permission. 24 1.1 elric # 25 1.1 elric # THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 26 1.1 elric # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 1.1 elric # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 1.1 elric # ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 29 1.1 elric # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 1.1 elric # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 1.1 elric # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 1.1 elric # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 1.1 elric # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 1.1 elric # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 1.1 elric # SUCH DAMAGE. 36 1.1 elric 37 1.1 elric import re 38 1.1 elric import string 39 1.1 elric import sys 40 1.1 elric 41 1.1 elric import generate 42 1.1 elric import UnicodeData 43 1.1 elric import util 44 1.1 elric 45 1.1 elric if len(sys.argv) != 4: 46 1.1.1.4 christos print("usage: %s UnicodeData.txt" 47 1.1.1.4 christos " CompositionExclusions-3.2.0.txt out-dir" % sys.argv[0]) 48 1.1 elric sys.exit(1) 49 1.1 elric 50 1.1 elric ud = UnicodeData.read(sys.argv[1]) 51 1.1 elric 52 1.1 elric def sortedKeys(d): 53 1.1 elric """Return a sorted list of the keys of a dict""" 54 1.1.1.4 christos keys = list(d.keys()) 55 1.1 elric keys.sort() 56 1.1 elric return keys 57 1.1 elric 58 1.1 elric trans = dict([(k, [re.sub('<[a-zA-Z]+>', '', v[4]), v[0]]) 59 1.1 elric for k,v in ud.items() if v[4]]) 60 1.1 elric 61 1.1 elric maxLength = 0 62 1.1 elric for v in trans.values(): 63 1.1 elric maxLength = max(maxLength, len(v[0].split())) 64 1.1 elric 65 1.1 elric normalize_h = generate.Header('%s/normalize_table.h' % sys.argv[3]) 66 1.1 elric normalize_c = generate.Implementation('%s/normalize_table.c' % sys.argv[3]) 67 1.1 elric 68 1.1 elric normalize_h.file.write( 69 1.1 elric ''' 70 1.1 elric #include <krb5-types.h> 71 1.1 elric 72 1.1 elric #define MAX_LENGTH_CANON %u 73 1.1 elric 74 1.1 elric struct translation { 75 1.1 elric uint32_t key; 76 1.1 elric unsigned short val_len; 77 1.1 elric unsigned short val_offset; 78 1.1 elric }; 79 1.1 elric 80 1.1 elric extern const struct translation _wind_normalize_table[]; 81 1.1 elric 82 1.1 elric extern const uint32_t _wind_normalize_val_table[]; 83 1.1 elric 84 1.1 elric extern const size_t _wind_normalize_table_size; 85 1.1 elric 86 1.1 elric struct canon_node { 87 1.1 elric uint32_t val; 88 1.1 elric unsigned char next_start; 89 1.1 elric unsigned char next_end; 90 1.1 elric unsigned short next_offset; 91 1.1 elric }; 92 1.1 elric 93 1.1 elric extern const struct canon_node _wind_canon_table[]; 94 1.1 elric 95 1.1 elric extern const unsigned short _wind_canon_next_table[]; 96 1.1 elric ''' % maxLength) 97 1.1 elric 98 1.1 elric normalize_c.file.write( 99 1.1 elric ''' 100 1.1 elric #include "normalize_table.h" 101 1.1.1.3 christos #include <stdlib.h> 102 1.1 elric 103 1.1 elric const struct translation _wind_normalize_table[] = { 104 1.1 elric ''') 105 1.1 elric 106 1.1 elric normalizeValTable = [] 107 1.1 elric 108 1.1 elric for k in sortedKeys(trans) : 109 1.1 elric v = trans[k] 110 1.1 elric (key, value, description) = k, v[0], v[1] 111 1.1 elric vec = [int(x, 0x10) for x in value.split()]; 112 1.1 elric offset = util.subList(normalizeValTable, vec) 113 1.1 elric if not offset: 114 1.1 elric offset = len(normalizeValTable) 115 1.1 elric normalizeValTable.extend(vec) # [("0x%s" % i) for i in vec]) 116 1.1 elric normalize_c.file.write(" {0x%x, %u, %u}, /* %s */\n" 117 1.1 elric % (key, len(vec), offset, description)) 118 1.1 elric 119 1.1 elric normalize_c.file.write( 120 1.1 elric '''}; 121 1.1 elric 122 1.1 elric ''') 123 1.1 elric 124 1.1 elric normalize_c.file.write( 125 1.1 elric "const size_t _wind_normalize_table_size = %u;\n\n" % len(trans)) 126 1.1 elric 127 1.1 elric normalize_c.file.write("const uint32_t _wind_normalize_val_table[] = {\n") 128 1.1 elric 129 1.1 elric for v in normalizeValTable: 130 1.1 elric normalize_c.file.write(" 0x%x,\n" % v) 131 1.1 elric 132 1.1 elric normalize_c.file.write("};\n\n"); 133 1.1 elric 134 1.1 elric exclusions = UnicodeData.read(sys.argv[2]) 135 1.1 elric 136 1.1 elric inv = dict([(''.join(["%05x" % int(x, 0x10) for x in v[4].split(' ')]), 137 1.1 elric [k, v[0]]) 138 1.1 elric for k,v in ud.items() 139 1.1.1.4 christos if v[4] and not re.search('<[a-zA-Z]+> *', v[4]) and not k in exclusions]) 140 1.1 elric 141 1.1 elric table = 0 142 1.1 elric 143 1.1 elric tables = {} 144 1.1 elric 145 1.1 elric def createTable(): 146 1.1 elric """add a new table""" 147 1.1 elric global table, tables 148 1.1 elric ret = table 149 1.1 elric table += 1 150 1.1 elric tables[ret] = [0] + [None] * 16 151 1.1 elric return ret 152 1.1 elric 153 1.1 elric def add(table, k, v): 154 1.1 elric """add an entry (k, v) to table (recursively)""" 155 1.1 elric if len(k) == 0: 156 1.1 elric table[0] = v[0] 157 1.1 elric else: 158 1.1 elric i = int(k[0], 0x10) + 1 159 1.1 elric if table[i] == None: 160 1.1 elric table[i] = createTable() 161 1.1 elric add(tables[table[i]], k[1:], v) 162 1.1 elric 163 1.1 elric top = createTable() 164 1.1 elric 165 1.1 elric for k,v in inv.items(): 166 1.1 elric add(tables[top], k, v) 167 1.1 elric 168 1.1 elric next_table = [] 169 1.1 elric tableToNext = {} 170 1.1 elric tableEnd = {} 171 1.1 elric tableStart = {} 172 1.1 elric 173 1.1 elric for k in sortedKeys(tables) : 174 1.1 elric t = tables[k] 175 1.1 elric tableToNext[k] = len(next_table) 176 1.1 elric l = t[1:] 177 1.1 elric start = 0 178 1.1 elric while start < 16 and l[start] == None: 179 1.1 elric start += 1 180 1.1 elric end = 16 181 1.1 elric while end > start and l[end - 1] == None: 182 1.1 elric end -= 1 183 1.1 elric tableStart[k] = start 184 1.1 elric tableEnd[k] = end 185 1.1 elric n = [] 186 1.1 elric for i in range(start, end): 187 1.1 elric x = l[i] 188 1.1 elric if x: 189 1.1 elric n.append(x) 190 1.1 elric else: 191 1.1 elric n.append(0) 192 1.1 elric next_table.extend(n) 193 1.1 elric 194 1.1 elric normalize_c.file.write("const struct canon_node _wind_canon_table[] = {\n") 195 1.1 elric 196 1.1 elric for k in sortedKeys(tables) : 197 1.1 elric t = tables[k] 198 1.1 elric normalize_c.file.write(" {0x%x, %u, %u, %u},\n" % 199 1.1 elric (t[0], tableStart[k], tableEnd[k], tableToNext[k])) 200 1.1 elric 201 1.1 elric normalize_c.file.write("};\n\n") 202 1.1 elric 203 1.1 elric normalize_c.file.write("const unsigned short _wind_canon_next_table[] = {\n") 204 1.1 elric 205 1.1 elric for k in next_table: 206 1.1 elric normalize_c.file.write(" %u,\n" % k) 207 1.1 elric 208 1.1 elric normalize_c.file.write("};\n\n") 209 1.1 elric 210 1.1 elric normalize_h.close() 211 1.1 elric normalize_c.close() 212