Home | History | Annotate | Line # | Download | only in wind
      1      1.1     elric #!/usr/local/bin/python
      2      1.1     elric # -*- coding: iso-8859-1 -*-
      3      1.1     elric 
      4  1.1.1.2     elric # Id
      5      1.1     elric 
      6      1.1     elric # Copyright (c) 2004 Kungliga Tekniska Hgskolan
      7      1.1     elric # (Royal Institute of Technology, Stockholm, Sweden). 
      8      1.1     elric # All rights reserved. 
      9      1.1     elric # 
     10      1.1     elric # Redistribution and use in source and binary forms, with or without 
     11      1.1     elric # modification, are permitted provided that the following conditions 
     12      1.1     elric # are met: 
     13      1.1     elric # 
     14      1.1     elric # 1. Redistributions of source code must retain the above copyright 
     15      1.1     elric #    notice, this list of conditions and the following disclaimer. 
     16      1.1     elric # 
     17      1.1     elric # 2. Redistributions in binary form must reproduce the above copyright 
     18      1.1     elric #    notice, this list of conditions and the following disclaimer in the 
     19      1.1     elric #    documentation and/or other materials provided with the distribution. 
     20      1.1     elric # 
     21      1.1     elric # 3. Neither the name of the Institute nor the names of its contributors 
     22      1.1     elric #    may be used to endorse or promote products derived from this software 
     23      1.1     elric #    without specific prior written permission. 
     24      1.1     elric # 
     25      1.1     elric # THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 
     26      1.1     elric # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
     27      1.1     elric # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
     28      1.1     elric # ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 
     29      1.1     elric # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
     30      1.1     elric # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
     31      1.1     elric # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
     32      1.1     elric # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
     33      1.1     elric # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
     34      1.1     elric # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
     35      1.1     elric # SUCH DAMAGE. 
     36      1.1     elric 
     37      1.1     elric import re
     38      1.1     elric import string
     39      1.1     elric import sys
     40      1.1     elric 
     41      1.1     elric import generate
     42      1.1     elric import UnicodeData
     43      1.1     elric import util
     44      1.1     elric 
     45      1.1     elric if len(sys.argv) != 4:
     46  1.1.1.4  christos     print("usage: %s UnicodeData.txt"
     47  1.1.1.4  christos     " CompositionExclusions-3.2.0.txt out-dir" % sys.argv[0])
     48      1.1     elric     sys.exit(1)
     49      1.1     elric 
     50      1.1     elric ud = UnicodeData.read(sys.argv[1])
     51      1.1     elric 
     52      1.1     elric def sortedKeys(d):
     53      1.1     elric     """Return a sorted list of the keys of a dict"""
     54  1.1.1.4  christos     keys = list(d.keys())
     55      1.1     elric     keys.sort()
     56      1.1     elric     return keys
     57      1.1     elric 
     58      1.1     elric trans = dict([(k, [re.sub('<[a-zA-Z]+>', '', v[4]), v[0]])
     59      1.1     elric               for k,v in ud.items() if v[4]])
     60      1.1     elric 
     61      1.1     elric maxLength = 0
     62      1.1     elric for v in trans.values():
     63      1.1     elric     maxLength = max(maxLength, len(v[0].split()))
     64      1.1     elric 
     65      1.1     elric normalize_h = generate.Header('%s/normalize_table.h' % sys.argv[3])
     66      1.1     elric normalize_c = generate.Implementation('%s/normalize_table.c' % sys.argv[3])
     67      1.1     elric 
     68      1.1     elric normalize_h.file.write(
     69      1.1     elric '''
     70      1.1     elric #include <krb5-types.h>
     71      1.1     elric 
     72      1.1     elric #define MAX_LENGTH_CANON %u
     73      1.1     elric 
     74      1.1     elric struct translation {
     75      1.1     elric   uint32_t key;
     76      1.1     elric   unsigned short val_len;
     77      1.1     elric   unsigned short val_offset;
     78      1.1     elric };
     79      1.1     elric 
     80      1.1     elric extern const struct translation _wind_normalize_table[];
     81      1.1     elric 
     82      1.1     elric extern const uint32_t _wind_normalize_val_table[];
     83      1.1     elric 
     84      1.1     elric extern const size_t _wind_normalize_table_size;
     85      1.1     elric 
     86      1.1     elric struct canon_node {
     87      1.1     elric   uint32_t val;
     88      1.1     elric   unsigned char next_start;
     89      1.1     elric   unsigned char next_end;
     90      1.1     elric   unsigned short next_offset;
     91      1.1     elric };
     92      1.1     elric 
     93      1.1     elric extern const struct canon_node _wind_canon_table[];
     94      1.1     elric 
     95      1.1     elric extern const unsigned short _wind_canon_next_table[];
     96      1.1     elric ''' % maxLength)
     97      1.1     elric 
     98      1.1     elric normalize_c.file.write(
     99      1.1     elric '''
    100      1.1     elric #include "normalize_table.h"
    101  1.1.1.3  christos #include <stdlib.h>
    102      1.1     elric 
    103      1.1     elric const struct translation _wind_normalize_table[] = {
    104      1.1     elric ''')
    105      1.1     elric 
    106      1.1     elric normalizeValTable = []
    107      1.1     elric 
    108      1.1     elric for k in sortedKeys(trans) :
    109      1.1     elric     v = trans[k]
    110      1.1     elric     (key, value, description) = k, v[0], v[1]
    111      1.1     elric     vec = [int(x, 0x10) for x in value.split()];
    112      1.1     elric     offset = util.subList(normalizeValTable, vec)
    113      1.1     elric     if not offset:
    114      1.1     elric         offset = len(normalizeValTable)
    115      1.1     elric         normalizeValTable.extend(vec) # [("0x%s" % i) for i in vec])
    116      1.1     elric     normalize_c.file.write("  {0x%x, %u, %u}, /* %s */\n"
    117      1.1     elric                            % (key, len(vec), offset, description))
    118      1.1     elric 
    119      1.1     elric normalize_c.file.write(
    120      1.1     elric '''};
    121      1.1     elric 
    122      1.1     elric ''')
    123      1.1     elric 
    124      1.1     elric normalize_c.file.write(
    125      1.1     elric     "const size_t _wind_normalize_table_size = %u;\n\n" % len(trans))
    126      1.1     elric 
    127      1.1     elric normalize_c.file.write("const uint32_t _wind_normalize_val_table[] = {\n")
    128      1.1     elric 
    129      1.1     elric for v in normalizeValTable:
    130      1.1     elric     normalize_c.file.write("  0x%x,\n" % v)
    131      1.1     elric 
    132      1.1     elric normalize_c.file.write("};\n\n");
    133      1.1     elric 
    134      1.1     elric exclusions = UnicodeData.read(sys.argv[2])
    135      1.1     elric 
    136      1.1     elric inv = dict([(''.join(["%05x" % int(x, 0x10) for x in v[4].split(' ')]),
    137      1.1     elric              [k, v[0]])
    138      1.1     elric             for k,v in ud.items()
    139  1.1.1.4  christos             if v[4] and not re.search('<[a-zA-Z]+> *', v[4]) and not k in exclusions])
    140      1.1     elric 
    141      1.1     elric table = 0
    142      1.1     elric 
    143      1.1     elric tables = {}
    144      1.1     elric 
    145      1.1     elric def createTable():
    146      1.1     elric     """add a new table"""
    147      1.1     elric     global table, tables
    148      1.1     elric     ret = table
    149      1.1     elric     table += 1
    150      1.1     elric     tables[ret] = [0] + [None] * 16
    151      1.1     elric     return ret
    152      1.1     elric 
    153      1.1     elric def add(table, k, v):
    154      1.1     elric     """add an entry (k, v) to table (recursively)"""
    155      1.1     elric     if len(k) == 0:
    156      1.1     elric         table[0] = v[0]
    157      1.1     elric     else:
    158      1.1     elric         i = int(k[0], 0x10) + 1
    159      1.1     elric         if table[i] == None:
    160      1.1     elric             table[i] = createTable()
    161      1.1     elric         add(tables[table[i]], k[1:], v)
    162      1.1     elric 
    163      1.1     elric top = createTable()
    164      1.1     elric 
    165      1.1     elric for k,v in inv.items():
    166      1.1     elric     add(tables[top], k, v)
    167      1.1     elric 
    168      1.1     elric next_table  = []
    169      1.1     elric tableToNext = {}
    170      1.1     elric tableEnd    = {}
    171      1.1     elric tableStart  = {}
    172      1.1     elric 
    173      1.1     elric for k in sortedKeys(tables) :
    174      1.1     elric     t = tables[k]
    175      1.1     elric     tableToNext[k] = len(next_table)
    176      1.1     elric     l = t[1:]
    177      1.1     elric     start = 0
    178      1.1     elric     while start < 16 and l[start] == None:
    179      1.1     elric         start += 1
    180      1.1     elric     end = 16
    181      1.1     elric     while end > start and l[end - 1] == None:
    182      1.1     elric         end -= 1
    183      1.1     elric     tableStart[k] = start
    184      1.1     elric     tableEnd[k]   = end
    185      1.1     elric     n = []
    186      1.1     elric     for i in range(start, end):
    187      1.1     elric         x = l[i]
    188      1.1     elric         if x:
    189      1.1     elric             n.append(x)
    190      1.1     elric         else:
    191      1.1     elric             n.append(0)
    192      1.1     elric     next_table.extend(n)
    193      1.1     elric 
    194      1.1     elric normalize_c.file.write("const struct canon_node _wind_canon_table[] = {\n")
    195      1.1     elric 
    196      1.1     elric for k in sortedKeys(tables) :
    197      1.1     elric     t = tables[k]
    198      1.1     elric     normalize_c.file.write("  {0x%x, %u, %u, %u},\n" %
    199      1.1     elric                            (t[0], tableStart[k], tableEnd[k], tableToNext[k]))
    200      1.1     elric 
    201      1.1     elric normalize_c.file.write("};\n\n")
    202      1.1     elric 
    203      1.1     elric normalize_c.file.write("const unsigned short _wind_canon_next_table[] = {\n")
    204      1.1     elric 
    205      1.1     elric for k in next_table:
    206      1.1     elric     normalize_c.file.write("  %u,\n" % k)
    207      1.1     elric 
    208      1.1     elric normalize_c.file.write("};\n\n")
    209      1.1     elric 
    210      1.1     elric normalize_h.close()
    211      1.1     elric normalize_c.close()
    212