Home | History | Annotate | Line # | Download | only in ucdata
ucgendat.c revision 1.1.1.6.4.1
      1 /*	$NetBSD: ucgendat.c,v 1.1.1.6.4.1 2020/04/13 07:56:15 martin Exp $	*/
      2 
      3 /* $OpenLDAP$ */
      4 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
      5  *
      6  * Copyright 1998-2019 The OpenLDAP Foundation.
      7  * All rights reserved.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted only as authorized by the OpenLDAP
     11  * Public License.
     12  *
     13  * A copy of this license is available in file LICENSE in the
     14  * top-level directory of the distribution or, alternatively, at
     15  * <http://www.OpenLDAP.org/license.html>.
     16  */
     17 /* Copyright 2001 Computing Research Labs, New Mexico State University
     18  *
     19  * Permission is hereby granted, free of charge, to any person obtaining a
     20  * copy of this software and associated documentation files (the "Software"),
     21  * to deal in the Software without restriction, including without limitation
     22  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     23  * and/or sell copies of the Software, and to permit persons to whom the
     24  * Software is furnished to do so, subject to the following conditions:
     25  *
     26  * The above copyright notice and this permission notice shall be included in
     27  * all copies or substantial portions of the Software.
     28  *
     29  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     30  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     31  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     32  * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
     33  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
     34  * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
     35  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     36  */
     37 /* Id: ucgendat.c,v 1.4 2001/01/02 18:46:20 mleisher Exp " */
     38 
     39 #include <sys/cdefs.h>
     40 __RCSID("$NetBSD: ucgendat.c,v 1.1.1.6.4.1 2020/04/13 07:56:15 martin Exp $");
     41 
     42 #include "portable.h"
     43 #include "ldap_config.h"
     44 
     45 #include <stdio.h>
     46 #include <ac/ctype.h>
     47 #include <ac/stdlib.h>
     48 #include <ac/string.h>
     49 #include <ac/unistd.h>
     50 
     51 #include <ac/bytes.h>
     52 
     53 #include <lutil.h>
     54 
     55 #ifndef HARDCODE_DATA
     56 #define	HARDCODE_DATA	1
     57 #endif
     58 
     59 #undef ishdigit
     60 #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\
     61                       ((cc) >= 'A' && (cc) <= 'F') ||\
     62                       ((cc) >= 'a' && (cc) <= 'f'))
     63 
     64 /*
     65  * A header written to the output file with the byte-order-mark and the number
     66  * of property nodes.
     67  */
     68 static ac_uint2 hdr[2] = {0xfeff, 0};
     69 
     70 #define NUMPROPS 50
     71 #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3)))
     72 
     73 typedef struct {
     74     char *name;
     75     int len;
     76 } _prop_t;
     77 
     78 /*
     79  * List of properties expected to be found in the Unicode Character Database
     80  * including some implementation specific properties.
     81  *
     82  * The implementation specific properties are:
     83  * Cm = Composed (can be decomposed)
     84  * Nb = Non-breaking
     85  * Sy = Symmetric (has left and right forms)
     86  * Hd = Hex digit
     87  * Qm = Quote marks
     88  * Mr = Mirroring
     89  * Ss = Space, other
     90  * Cp = Defined character
     91  */
     92 static _prop_t props[NUMPROPS] = {
     93     {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2},
     94     {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2},
     95     {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2},
     96     {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2},
     97     {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L",  1}, {"R",  1},
     98     {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B",  1},
     99     {"S",  1}, {"WS", 2}, {"ON", 2},
    100     {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2},
    101     {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}, {"AL", 2}
    102 };
    103 
    104 typedef struct {
    105     ac_uint4 *ranges;
    106     ac_uint2 used;
    107     ac_uint2 size;
    108 } _ranges_t;
    109 
    110 static _ranges_t proptbl[NUMPROPS];
    111 
    112 /*
    113  * Make sure this array is sized to be on a 4-byte boundary at compile time.
    114  */
    115 static ac_uint2 propcnt[NEEDPROPS];
    116 
    117 /*
    118  * Array used to collect a decomposition before adding it to the decomposition
    119  * table.
    120  */
    121 static ac_uint4 dectmp[64];
    122 static ac_uint4 dectmp_size;
    123 
    124 typedef struct {
    125     ac_uint4 code;
    126     ac_uint2 size;
    127     ac_uint2 used;
    128     ac_uint4 *decomp;
    129 } _decomp_t;
    130 
    131 /*
    132  * List of decomposition.  Created and expanded in order as the characters are
    133  * encountered. First list contains canonical mappings, second also includes
    134  * compatibility mappings.
    135  */
    136 static _decomp_t *decomps;
    137 static ac_uint4 decomps_used;
    138 static ac_uint4 decomps_size;
    139 
    140 static _decomp_t *kdecomps;
    141 static ac_uint4 kdecomps_used;
    142 static ac_uint4 kdecomps_size;
    143 
    144 /*
    145  * Composition exclusion table stuff.
    146  */
    147 #define COMPEX_SET(c) (compexs[(c) >> 5] |= (1 << ((c) & 31)))
    148 #define COMPEX_TEST(c) (compexs[(c) >> 5] & (1 << ((c) & 31)))
    149 static ac_uint4 compexs[8192];
    150 
    151 /*
    152  * Struct for holding a composition pair, and array of composition pairs
    153  */
    154 typedef struct {
    155     ac_uint4 comp;
    156     ac_uint4 count;
    157     ac_uint4 code1;
    158     ac_uint4 code2;
    159 } _comp_t;
    160 
    161 static _comp_t *comps;
    162 static ac_uint4 comps_used;
    163 
    164 /*
    165  * Types and lists for handling lists of case mappings.
    166  */
    167 typedef struct {
    168     ac_uint4 key;
    169     ac_uint4 other1;
    170     ac_uint4 other2;
    171 } _case_t;
    172 
    173 static _case_t *upper;
    174 static _case_t *lower;
    175 static _case_t *title;
    176 static ac_uint4 upper_used;
    177 static ac_uint4 upper_size;
    178 static ac_uint4 lower_used;
    179 static ac_uint4 lower_size;
    180 static ac_uint4 title_used;
    181 static ac_uint4 title_size;
    182 
    183 /*
    184  * Array used to collect case mappings before adding them to a list.
    185  */
    186 static ac_uint4 cases[3];
    187 
    188 /*
    189  * An array to hold ranges for combining classes.
    190  */
    191 static ac_uint4 *ccl;
    192 static ac_uint4 ccl_used;
    193 static ac_uint4 ccl_size;
    194 
    195 /*
    196  * Structures for handling numbers.
    197  */
    198 typedef struct {
    199     ac_uint4 code;
    200     ac_uint4 idx;
    201 } _codeidx_t;
    202 
    203 typedef struct {
    204     short numerator;
    205     short denominator;
    206 } _num_t;
    207 
    208 /*
    209  * Arrays to hold the mapping of codes to numbers.
    210  */
    211 static _codeidx_t *ncodes;
    212 static ac_uint4 ncodes_used;
    213 static ac_uint4 ncodes_size;
    214 
    215 static _num_t *nums;
    216 static ac_uint4 nums_used;
    217 static ac_uint4 nums_size;
    218 
    219 /*
    220  * Array for holding numbers.
    221  */
    222 static _num_t *nums;
    223 static ac_uint4 nums_used;
    224 static ac_uint4 nums_size;
    225 
    226 static void
    227 add_range(ac_uint4 start, ac_uint4 end, char *p1, char *p2)
    228 {
    229     int i, j, k, len;
    230     _ranges_t *rlp;
    231     char *name;
    232 
    233     for (k = 0; k < 2; k++) {
    234         if (k == 0) {
    235             name = p1;
    236             len = 2;
    237         } else {
    238             if (p2 == 0)
    239               break;
    240 
    241             name = p2;
    242             len = 1;
    243         }
    244 
    245         for (i = 0; i < NUMPROPS; i++) {
    246             if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
    247               break;
    248         }
    249 
    250         if (i == NUMPROPS)
    251           continue;
    252 
    253         rlp = &proptbl[i];
    254 
    255         /*
    256          * Resize the range list if necessary.
    257          */
    258         if (rlp->used == rlp->size) {
    259             if (rlp->size == 0)
    260               rlp->ranges = (ac_uint4 *)
    261                   malloc(sizeof(ac_uint4) << 3);
    262             else
    263               rlp->ranges = (ac_uint4 *)
    264                   realloc((char *) rlp->ranges,
    265                           sizeof(ac_uint4) * (rlp->size + 8));
    266             rlp->size += 8;
    267         }
    268 
    269         /*
    270          * If this is the first code for this property list, just add it
    271          * and return.
    272          */
    273         if (rlp->used == 0) {
    274             rlp->ranges[0] = start;
    275             rlp->ranges[1] = end;
    276             rlp->used += 2;
    277             continue;
    278         }
    279 
    280         /*
    281          * Optimize the case of adding the range to the end.
    282          */
    283         j = rlp->used - 1;
    284         if (start > rlp->ranges[j]) {
    285             j = rlp->used;
    286             rlp->ranges[j++] = start;
    287             rlp->ranges[j++] = end;
    288             rlp->used = j;
    289             continue;
    290         }
    291 
    292         /*
    293          * Need to locate the insertion point.
    294          */
    295         for (i = 0;
    296              i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ;
    297 
    298         /*
    299          * If the start value lies in the current range, then simply set the
    300          * new end point of the range to the end value passed as a parameter.
    301          */
    302         if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) {
    303             rlp->ranges[i + 1] = end;
    304             return;
    305         }
    306 
    307         /*
    308          * Shift following values up by two.
    309          */
    310         for (j = rlp->used; j > i; j -= 2) {
    311             rlp->ranges[j] = rlp->ranges[j - 2];
    312             rlp->ranges[j + 1] = rlp->ranges[j - 1];
    313         }
    314 
    315         /*
    316          * Add the new range at the insertion point.
    317          */
    318         rlp->ranges[i] = start;
    319         rlp->ranges[i + 1] = end;
    320         rlp->used += 2;
    321     }
    322 }
    323 
    324 static void
    325 ordered_range_insert(ac_uint4 c, char *name, int len)
    326 {
    327     int i, j;
    328     ac_uint4 s, e;
    329     _ranges_t *rlp;
    330 
    331     if (len == 0)
    332       return;
    333 
    334     /*
    335      * Deal with directionality codes introduced in Unicode 3.0.
    336      */
    337     if ((len == 2 && memcmp(name, "BN", 2) == 0) ||
    338         (len == 3 &&
    339          (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 ||
    340           memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 ||
    341           memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0))) {
    342         /*
    343          * Mark all of these as Other Neutral to preserve compatibility with
    344          * older versions.
    345          */
    346         len = 2;
    347         name = "ON";
    348     }
    349 
    350     for (i = 0; i < NUMPROPS; i++) {
    351         if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
    352           break;
    353     }
    354 
    355     if (i == NUMPROPS)
    356       return;
    357 
    358     /*
    359      * Have a match, so insert the code in order.
    360      */
    361     rlp = &proptbl[i];
    362 
    363     /*
    364      * Resize the range list if necessary.
    365      */
    366     if (rlp->used == rlp->size) {
    367         if (rlp->size == 0)
    368           rlp->ranges = (ac_uint4 *)
    369               malloc(sizeof(ac_uint4) << 3);
    370         else
    371           rlp->ranges = (ac_uint4 *)
    372               realloc((char *) rlp->ranges,
    373                       sizeof(ac_uint4) * (rlp->size + 8));
    374         rlp->size += 8;
    375     }
    376 
    377     /*
    378      * If this is the first code for this property list, just add it
    379      * and return.
    380      */
    381     if (rlp->used == 0) {
    382         rlp->ranges[0] = rlp->ranges[1] = c;
    383         rlp->used += 2;
    384         return;
    385     }
    386 
    387     /*
    388      * Optimize the cases of extending the last range and adding new ranges to
    389      * the end.
    390      */
    391     j = rlp->used - 1;
    392     e = rlp->ranges[j];
    393     s = rlp->ranges[j - 1];
    394 
    395     if (c == e + 1) {
    396         /*
    397          * Extend the last range.
    398          */
    399         rlp->ranges[j] = c;
    400         return;
    401     }
    402 
    403     if (c > e + 1) {
    404         /*
    405          * Start another range on the end.
    406          */
    407         j = rlp->used;
    408         rlp->ranges[j] = rlp->ranges[j + 1] = c;
    409         rlp->used += 2;
    410         return;
    411     }
    412 
    413     if (c >= s)
    414       /*
    415        * The code is a duplicate of a code in the last range, so just return.
    416        */
    417       return;
    418 
    419     /*
    420      * The code should be inserted somewhere before the last range in the
    421      * list.  Locate the insertion point.
    422      */
    423     for (i = 0;
    424          i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ;
    425 
    426     s = rlp->ranges[i];
    427     e = rlp->ranges[i + 1];
    428 
    429     if (c == e + 1)
    430       /*
    431        * Simply extend the current range.
    432        */
    433       rlp->ranges[i + 1] = c;
    434     else if (c < s) {
    435         /*
    436          * Add a new entry before the current location.  Shift all entries
    437          * before the current one up by one to make room.
    438          */
    439         for (j = rlp->used; j > i; j -= 2) {
    440             rlp->ranges[j] = rlp->ranges[j - 2];
    441             rlp->ranges[j + 1] = rlp->ranges[j - 1];
    442         }
    443         rlp->ranges[i] = rlp->ranges[i + 1] = c;
    444 
    445         rlp->used += 2;
    446     }
    447 }
    448 
    449 static void
    450 add_decomp(ac_uint4 code, short compat)
    451 {
    452     ac_uint4 i, j, size;
    453     _decomp_t **pdecomps;
    454     ac_uint4 *pdecomps_used;
    455     ac_uint4 *pdecomps_size;
    456 
    457     if (compat) {
    458 	pdecomps = &kdecomps;
    459 	pdecomps_used = &kdecomps_used;
    460 	pdecomps_size = &kdecomps_size;
    461     } else {
    462 	pdecomps = &decomps;
    463 	pdecomps_used = &decomps_used;
    464 	pdecomps_size = &decomps_size;
    465     }
    466 
    467     /*
    468      * Add the code to the composite property.
    469      */
    470     if (!compat) {
    471 	ordered_range_insert(code, "Cm", 2);
    472     }
    473 
    474     /*
    475      * Locate the insertion point for the code.
    476      */
    477     for (i = 0; i < *pdecomps_used && code > (*pdecomps)[i].code; i++) ;
    478 
    479     /*
    480      * Allocate space for a new decomposition.
    481      */
    482     if (*pdecomps_used == *pdecomps_size) {
    483         if (*pdecomps_size == 0)
    484           *pdecomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
    485         else
    486           *pdecomps = (_decomp_t *)
    487               realloc((char *) *pdecomps,
    488                       sizeof(_decomp_t) * (*pdecomps_size + 8));
    489         (void) memset((char *) (*pdecomps + *pdecomps_size), '\0',
    490                       sizeof(_decomp_t) << 3);
    491         *pdecomps_size += 8;
    492     }
    493 
    494     if (i < *pdecomps_used && code != (*pdecomps)[i].code) {
    495         /*
    496          * Shift the decomps up by one if the codes don't match.
    497          */
    498         for (j = *pdecomps_used; j > i; j--)
    499           (void) AC_MEMCPY((char *) &(*pdecomps)[j], (char *) &(*pdecomps)[j - 1],
    500                         sizeof(_decomp_t));
    501     }
    502 
    503     /*
    504      * Insert or replace a decomposition.
    505      */
    506     size = dectmp_size + (4 - (dectmp_size & 3));
    507     if ((*pdecomps)[i].size < size) {
    508         if ((*pdecomps)[i].size == 0)
    509           (*pdecomps)[i].decomp = (ac_uint4 *)
    510               malloc(sizeof(ac_uint4) * size);
    511         else
    512           (*pdecomps)[i].decomp = (ac_uint4 *)
    513               realloc((char *) (*pdecomps)[i].decomp,
    514                       sizeof(ac_uint4) * size);
    515         (*pdecomps)[i].size = size;
    516     }
    517 
    518     if ((*pdecomps)[i].code != code)
    519       (*pdecomps_used)++;
    520 
    521     (*pdecomps)[i].code = code;
    522     (*pdecomps)[i].used = dectmp_size;
    523     (void) AC_MEMCPY((char *) (*pdecomps)[i].decomp, (char *) dectmp,
    524                   sizeof(ac_uint4) * dectmp_size);
    525 
    526     /*
    527      * NOTICE: This needs changing later so it is more general than simply
    528      * pairs.  This calculation is done here to simplify allocation elsewhere.
    529      */
    530     if (!compat && dectmp_size == 2)
    531       comps_used++;
    532 }
    533 
    534 static void
    535 add_title(ac_uint4 code)
    536 {
    537     ac_uint4 i, j;
    538 
    539     /*
    540      * Always map the code to itself.
    541      */
    542     cases[2] = code;
    543 
    544     /*
    545      * If the upper case character is not present, then make it the same as
    546      * the title case.
    547      */
    548     if (cases[0] == 0)
    549       cases[0] = code;
    550 
    551     if (title_used == title_size) {
    552         if (title_size == 0)
    553           title = (_case_t *) malloc(sizeof(_case_t) << 3);
    554         else
    555           title = (_case_t *) realloc((char *) title,
    556                                       sizeof(_case_t) * (title_size + 8));
    557         title_size += 8;
    558     }
    559 
    560     /*
    561      * Locate the insertion point.
    562      */
    563     for (i = 0; i < title_used && code > title[i].key; i++) ;
    564 
    565     if (i < title_used) {
    566         /*
    567          * Shift the array up by one.
    568          */
    569         for (j = title_used; j > i; j--)
    570           (void) AC_MEMCPY((char *) &title[j], (char *) &title[j - 1],
    571                         sizeof(_case_t));
    572     }
    573 
    574     title[i].key = cases[2];    /* Title */
    575     title[i].other1 = cases[0]; /* Upper */
    576     title[i].other2 = cases[1]; /* Lower */
    577 
    578     title_used++;
    579 }
    580 
    581 static void
    582 add_upper(ac_uint4 code)
    583 {
    584     ac_uint4 i, j;
    585 
    586     /*
    587      * Always map the code to itself.
    588      */
    589     cases[0] = code;
    590 
    591     /*
    592      * If the title case character is not present, then make it the same as
    593      * the upper case.
    594      */
    595     if (cases[2] == 0)
    596       cases[2] = code;
    597 
    598     if (upper_used == upper_size) {
    599         if (upper_size == 0)
    600           upper = (_case_t *) malloc(sizeof(_case_t) << 3);
    601         else
    602           upper = (_case_t *) realloc((char *) upper,
    603                                       sizeof(_case_t) * (upper_size + 8));
    604         upper_size += 8;
    605     }
    606 
    607     /*
    608      * Locate the insertion point.
    609      */
    610     for (i = 0; i < upper_used && code > upper[i].key; i++) ;
    611 
    612     if (i < upper_used) {
    613         /*
    614          * Shift the array up by one.
    615          */
    616         for (j = upper_used; j > i; j--)
    617           (void) AC_MEMCPY((char *) &upper[j], (char *) &upper[j - 1],
    618                         sizeof(_case_t));
    619     }
    620 
    621     upper[i].key = cases[0];    /* Upper */
    622     upper[i].other1 = cases[1]; /* Lower */
    623     upper[i].other2 = cases[2]; /* Title */
    624 
    625     upper_used++;
    626 }
    627 
    628 static void
    629 add_lower(ac_uint4 code)
    630 {
    631     ac_uint4 i, j;
    632 
    633     /*
    634      * Always map the code to itself.
    635      */
    636     cases[1] = code;
    637 
    638     /*
    639      * If the title case character is empty, then make it the same as the
    640      * upper case.
    641      */
    642     if (cases[2] == 0)
    643       cases[2] = cases[0];
    644 
    645     if (lower_used == lower_size) {
    646         if (lower_size == 0)
    647           lower = (_case_t *) malloc(sizeof(_case_t) << 3);
    648         else
    649           lower = (_case_t *) realloc((char *) lower,
    650                                       sizeof(_case_t) * (lower_size + 8));
    651         lower_size += 8;
    652     }
    653 
    654     /*
    655      * Locate the insertion point.
    656      */
    657     for (i = 0; i < lower_used && code > lower[i].key; i++) ;
    658 
    659     if (i < lower_used) {
    660         /*
    661          * Shift the array up by one.
    662          */
    663         for (j = lower_used; j > i; j--)
    664           (void) AC_MEMCPY((char *) &lower[j], (char *) &lower[j - 1],
    665                         sizeof(_case_t));
    666     }
    667 
    668     lower[i].key = cases[1];    /* Lower */
    669     lower[i].other1 = cases[0]; /* Upper */
    670     lower[i].other2 = cases[2]; /* Title */
    671 
    672     lower_used++;
    673 }
    674 
    675 static void
    676 ordered_ccl_insert(ac_uint4 c, ac_uint4 ccl_code)
    677 {
    678     ac_uint4 i, j;
    679 
    680     if (ccl_used == ccl_size) {
    681         if (ccl_size == 0)
    682           ccl = (ac_uint4 *) malloc(sizeof(ac_uint4) * 24);
    683         else
    684           ccl = (ac_uint4 *)
    685               realloc((char *) ccl, sizeof(ac_uint4) * (ccl_size + 24));
    686         ccl_size += 24;
    687     }
    688 
    689     /*
    690      * Optimize adding the first item.
    691      */
    692     if (ccl_used == 0) {
    693         ccl[0] = ccl[1] = c;
    694         ccl[2] = ccl_code;
    695         ccl_used += 3;
    696         return;
    697     }
    698 
    699     /*
    700      * Handle the special case of extending the range on the end.  This
    701      * requires that the combining class codes are the same.
    702      */
    703     if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) {
    704         ccl[ccl_used - 2] = c;
    705         return;
    706     }
    707 
    708     /*
    709      * Handle the special case of adding another range on the end.
    710      */
    711     if (c > ccl[ccl_used - 2] + 1 ||
    712         (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) {
    713         ccl[ccl_used++] = c;
    714         ccl[ccl_used++] = c;
    715         ccl[ccl_used++] = ccl_code;
    716         return;
    717     }
    718 
    719     /*
    720      * Locate either the insertion point or range for the code.
    721      */
    722     for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ;
    723 
    724     if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) {
    725         /*
    726          * Extend an existing range.
    727          */
    728         ccl[i + 1] = c;
    729         return;
    730     } else if (c < ccl[i]) {
    731         /*
    732          * Start a new range before the current location.
    733          */
    734         for (j = ccl_used; j > i; j -= 3) {
    735             ccl[j] = ccl[j - 3];
    736             ccl[j - 1] = ccl[j - 4];
    737             ccl[j - 2] = ccl[j - 5];
    738         }
    739         ccl[i] = ccl[i + 1] = c;
    740         ccl[i + 2] = ccl_code;
    741     }
    742 }
    743 
    744 /*
    745  * Adds a number if it does not already exist and returns an index value
    746  * multiplied by 2.
    747  */
    748 static ac_uint4
    749 make_number(short num, short denom)
    750 {
    751     ac_uint4 n;
    752 
    753     /*
    754      * Determine if the number already exists.
    755      */
    756     for (n = 0; n < nums_used; n++) {
    757         if (nums[n].numerator == num && nums[n].denominator == denom)
    758           return n << 1;
    759     }
    760 
    761     if (nums_used == nums_size) {
    762         if (nums_size == 0)
    763           nums = (_num_t *) malloc(sizeof(_num_t) << 3);
    764         else
    765           nums = (_num_t *) realloc((char *) nums,
    766                                     sizeof(_num_t) * (nums_size + 8));
    767         nums_size += 8;
    768     }
    769 
    770     n = nums_used++;
    771     nums[n].numerator = num;
    772     nums[n].denominator = denom;
    773 
    774     return n << 1;
    775 }
    776 
    777 static void
    778 add_number(ac_uint4 code, short num, short denom)
    779 {
    780     ac_uint4 i, j;
    781 
    782     /*
    783      * Insert the code in order.
    784      */
    785     for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ;
    786 
    787     /*
    788      * Handle the case of the codes matching and simply replace the number
    789      * that was there before.
    790      */
    791     if (i < ncodes_used && code == ncodes[i].code) {
    792         ncodes[i].idx = make_number(num, denom);
    793         return;
    794     }
    795 
    796     /*
    797      * Resize the array if necessary.
    798      */
    799     if (ncodes_used == ncodes_size) {
    800         if (ncodes_size == 0)
    801           ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3);
    802         else
    803           ncodes = (_codeidx_t *)
    804               realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8));
    805 
    806         ncodes_size += 8;
    807     }
    808 
    809     /*
    810      * Shift things around to insert the code if necessary.
    811      */
    812     if (i < ncodes_used) {
    813         for (j = ncodes_used; j > i; j--) {
    814             ncodes[j].code = ncodes[j - 1].code;
    815             ncodes[j].idx = ncodes[j - 1].idx;
    816         }
    817     }
    818     ncodes[i].code = code;
    819     ncodes[i].idx = make_number(num, denom);
    820 
    821     ncodes_used++;
    822 }
    823 
    824 /*
    825  * This routine assumes that the line is a valid Unicode Character Database
    826  * entry.
    827  */
    828 static void
    829 read_cdata(FILE *in)
    830 {
    831     ac_uint4 i, lineno, skip, code, ccl_code;
    832     short wnum, neg, number[2], compat;
    833     char line[512], *s, *e, *first_prop;
    834 
    835     lineno = skip = 0;
    836     while (fgets(line, sizeof(line), in)) {
    837 	if( (s=strchr(line, '\n')) ) *s = '\0';
    838         lineno++;
    839 
    840         /*
    841          * Skip blank lines and lines that start with a '#'.
    842          */
    843         if (line[0] == 0 || line[0] == '#')
    844           continue;
    845 
    846         /*
    847          * If lines need to be skipped, do it here.
    848          */
    849         if (skip) {
    850             skip--;
    851             continue;
    852         }
    853 
    854         /*
    855          * Collect the code.  The code can be up to 6 hex digits in length to
    856          * allow surrogates to be specified.
    857          */
    858         for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) {
    859             code <<= 4;
    860             if (*s >= '0' && *s <= '9')
    861               code += *s - '0';
    862             else if (*s >= 'A' && *s <= 'F')
    863               code += (*s - 'A') + 10;
    864             else if (*s >= 'a' && *s <= 'f')
    865               code += (*s - 'a') + 10;
    866         }
    867 
    868         /*
    869          * Handle the following special cases:
    870          * 1. 4E00-9FA5 CJK Ideographs.
    871          * 2. AC00-D7A3 Hangul Syllables.
    872          * 3. D800-DFFF Surrogates.
    873          * 4. E000-F8FF Private Use Area.
    874          * 5. F900-FA2D Han compatibility.
    875 	 * ...Plus additional ranges in newer Unicode versions...
    876          */
    877         switch (code) {
    878 	  case 0x3400:
    879 	    /* CJK Ideograph Extension A */
    880             add_range(0x3400, 0x4db5, "Lo", "L");
    881 
    882             add_range(0x3400, 0x4db5, "Cp", 0);
    883 
    884 	    skip = 1;
    885 	    break;
    886           case 0x4e00:
    887             /*
    888              * The Han ideographs.
    889              */
    890             add_range(0x4e00, 0x9fff, "Lo", "L");
    891 
    892             /*
    893              * Add the characters to the defined category.
    894              */
    895             add_range(0x4e00, 0x9fa5, "Cp", 0);
    896 
    897             skip = 1;
    898             break;
    899           case 0xac00:
    900             /*
    901              * The Hangul syllables.
    902              */
    903             add_range(0xac00, 0xd7a3, "Lo", "L");
    904 
    905             /*
    906              * Add the characters to the defined category.
    907              */
    908             add_range(0xac00, 0xd7a3, "Cp", 0);
    909 
    910             skip = 1;
    911             break;
    912           case 0xd800:
    913             /*
    914              * Make a range of all surrogates and assume some default
    915              * properties.
    916              */
    917             add_range(0x010000, 0x10ffff, "Cs", "L");
    918             skip = 5;
    919             break;
    920           case 0xe000:
    921             /*
    922              * The Private Use area.  Add with a default set of properties.
    923              */
    924             add_range(0xe000, 0xf8ff, "Co", "L");
    925             skip = 1;
    926             break;
    927           case 0xf900:
    928             /*
    929              * The CJK compatibility area.
    930              */
    931             add_range(0xf900, 0xfaff, "Lo", "L");
    932 
    933             /*
    934              * Add the characters to the defined category.
    935              */
    936             add_range(0xf900, 0xfaff, "Cp", 0);
    937 
    938             skip = 1;
    939 	    break;
    940 	  case 0x20000:
    941 	    /* CJK Ideograph Extension B */
    942             add_range(0x20000, 0x2a6d6, "Lo", "L");
    943 
    944             add_range(0x20000, 0x2a6d6, "Cp", 0);
    945 
    946 	    skip = 1;
    947 	    break;
    948 	  case 0xf0000:
    949 	    /* Plane 15 private use */
    950 	    add_range(0xf0000, 0xffffd, "Co", "L");
    951 	    skip = 1;
    952 	    break;
    953 
    954 	  case 0x100000:
    955 	    /* Plane 16 private use */
    956 	    add_range(0x100000, 0x10fffd, "Co", "L");
    957 	    skip = 1;
    958 	    break;
    959         }
    960 
    961         if (skip)
    962           continue;
    963 
    964         /*
    965          * Add the code to the defined category.
    966          */
    967         ordered_range_insert(code, "Cp", 2);
    968 
    969         /*
    970          * Locate the first character property field.
    971          */
    972         for (i = 0; *s != 0 && i < 2; s++) {
    973             if (*s == ';')
    974               i++;
    975         }
    976         for (e = s; *e && *e != ';'; e++) ;
    977 
    978         first_prop = s;
    979 
    980         ordered_range_insert(code, s, e - s);
    981 
    982         /*
    983          * Locate the combining class code.
    984          */
    985         for (s = e; *s != 0 && i < 3; s++) {
    986             if (*s == ';')
    987               i++;
    988         }
    989 
    990         /*
    991          * Convert the combining class code from decimal.
    992          */
    993         for (ccl_code = 0, e = s; *e && *e != ';'; e++)
    994           ccl_code = (ccl_code * 10) + (*e - '0');
    995 
    996         /*
    997          * Add the code if it not 0.
    998          */
    999         if (ccl_code != 0)
   1000           ordered_ccl_insert(code, ccl_code);
   1001 
   1002         /*
   1003          * Locate the second character property field.
   1004          */
   1005         for (s = e; *s != 0 && i < 4; s++) {
   1006             if (*s == ';')
   1007               i++;
   1008         }
   1009         for (e = s; *e && *e != ';'; e++) ;
   1010 
   1011         ordered_range_insert(code, s, e - s);
   1012 
   1013         /*
   1014          * Check for a decomposition.
   1015          */
   1016         s = ++e;
   1017         if (*s != ';') {
   1018 	    compat = *s == '<';
   1019 	    if (compat) {
   1020 		/*
   1021 		 * Skip compatibility formatting tag.
   1022 		 */
   1023 		while (*s++ != '>');
   1024 	    }
   1025             /*
   1026              * Collect the codes of the decomposition.
   1027              */
   1028             for (dectmp_size = 0; *s != ';'; ) {
   1029                 /*
   1030                  * Skip all leading non-hex digits.
   1031                  */
   1032                 while (!ishdigit(*s))
   1033  		  s++;
   1034 
   1035                 for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) {
   1036                     dectmp[dectmp_size] <<= 4;
   1037                     if (*s >= '0' && *s <= '9')
   1038                       dectmp[dectmp_size] += *s - '0';
   1039                     else if (*s >= 'A' && *s <= 'F')
   1040                       dectmp[dectmp_size] += (*s - 'A') + 10;
   1041                     else if (*s >= 'a' && *s <= 'f')
   1042                       dectmp[dectmp_size] += (*s - 'a') + 10;
   1043                 }
   1044                 dectmp_size++;
   1045             }
   1046 
   1047             /*
   1048              * If there are any codes in the temporary decomposition array,
   1049              * then add the character with its decomposition.
   1050              */
   1051             if (dectmp_size > 0) {
   1052 		if (!compat) {
   1053 		    add_decomp(code, 0);
   1054 		}
   1055 		add_decomp(code, 1);
   1056 	    }
   1057         }
   1058 
   1059         /*
   1060          * Skip to the number field.
   1061          */
   1062         for (i = 0; i < 3 && *s; s++) {
   1063             if (*s == ';')
   1064               i++;
   1065         }
   1066 
   1067         /*
   1068          * Scan the number in.
   1069          */
   1070         number[0] = number[1] = 0;
   1071         for (e = s, neg = wnum = 0; *e && *e != ';'; e++) {
   1072             if (*e == '-') {
   1073                 neg = 1;
   1074                 continue;
   1075             }
   1076 
   1077             if (*e == '/') {
   1078                 /*
   1079                  * Move the the denominator of the fraction.
   1080                  */
   1081                 if (neg)
   1082                   number[wnum] *= -1;
   1083                 neg = 0;
   1084                 e++;
   1085                 wnum++;
   1086             }
   1087             number[wnum] = (number[wnum] * 10) + (*e - '0');
   1088         }
   1089 
   1090         if (e > s) {
   1091             /*
   1092              * Adjust the denominator in case of integers and add the number.
   1093              */
   1094             if (wnum == 0)
   1095               number[1] = 1;
   1096 
   1097             add_number(code, number[0], number[1]);
   1098         }
   1099 
   1100         /*
   1101          * Skip to the start of the possible case mappings.
   1102          */
   1103         for (s = e, i = 0; i < 4 && *s; s++) {
   1104             if (*s == ';')
   1105               i++;
   1106         }
   1107 
   1108         /*
   1109          * Collect the case mappings.
   1110          */
   1111         cases[0] = cases[1] = cases[2] = 0;
   1112         for (i = 0; i < 3; i++) {
   1113             while (ishdigit(*s)) {
   1114                 cases[i] <<= 4;
   1115                 if (*s >= '0' && *s <= '9')
   1116                   cases[i] += *s - '0';
   1117                 else if (*s >= 'A' && *s <= 'F')
   1118                   cases[i] += (*s - 'A') + 10;
   1119                 else if (*s >= 'a' && *s <= 'f')
   1120                   cases[i] += (*s - 'a') + 10;
   1121                 s++;
   1122             }
   1123             if (*s == ';')
   1124               s++;
   1125         }
   1126         if (!strncmp(first_prop,"Lt",2) && (cases[0] || cases[1]))
   1127           /*
   1128            * Add the upper and lower mappings for a title case character.
   1129            */
   1130           add_title(code);
   1131         else if (cases[1])
   1132           /*
   1133            * Add the lower and title case mappings for the upper case
   1134            * character.
   1135            */
   1136           add_upper(code);
   1137         else if (cases[0])
   1138           /*
   1139            * Add the upper and title case mappings for the lower case
   1140            * character.
   1141            */
   1142           add_lower(code);
   1143     }
   1144 }
   1145 
   1146 static _decomp_t *
   1147 find_decomp(ac_uint4 code, short compat)
   1148 {
   1149     long l, r, m;
   1150     _decomp_t *decs;
   1151 
   1152     l = 0;
   1153     r = (compat ? kdecomps_used : decomps_used) - 1;
   1154     decs = compat ? kdecomps : decomps;
   1155     while (l <= r) {
   1156         m = (l + r) >> 1;
   1157         if (code > decs[m].code)
   1158           l = m + 1;
   1159         else if (code < decs[m].code)
   1160           r = m - 1;
   1161         else
   1162           return &decs[m];
   1163     }
   1164     return 0;
   1165 }
   1166 
   1167 static void
   1168 decomp_it(_decomp_t *d, short compat)
   1169 {
   1170     ac_uint4 i;
   1171     _decomp_t *dp;
   1172 
   1173     for (i = 0; i < d->used; i++) {
   1174         if ((dp = find_decomp(d->decomp[i], compat)) != 0)
   1175           decomp_it(dp, compat);
   1176         else
   1177           dectmp[dectmp_size++] = d->decomp[i];
   1178     }
   1179 }
   1180 
   1181 /*
   1182  * Expand all decompositions by recursively decomposing each character
   1183  * in the decomposition.
   1184  */
   1185 static void
   1186 expand_decomp(void)
   1187 {
   1188     ac_uint4 i;
   1189 
   1190     for (i = 0; i < decomps_used; i++) {
   1191         dectmp_size = 0;
   1192         decomp_it(&decomps[i], 0);
   1193         if (dectmp_size > 0)
   1194           add_decomp(decomps[i].code, 0);
   1195     }
   1196 
   1197     for (i = 0; i < kdecomps_used; i++) {
   1198         dectmp_size = 0;
   1199         decomp_it(&kdecomps[i], 1);
   1200         if (dectmp_size > 0)
   1201           add_decomp(kdecomps[i].code, 1);
   1202     }
   1203 }
   1204 
   1205 static int
   1206 cmpcomps(const void *v_comp1, const void *v_comp2)
   1207 {
   1208 	const _comp_t *comp1 = v_comp1, *comp2 = v_comp2;
   1209     long diff = comp1->code1 - comp2->code1;
   1210 
   1211     if (!diff)
   1212 	diff = comp1->code2 - comp2->code2;
   1213     return (int) diff;
   1214 }
   1215 
   1216 /*
   1217  * Load composition exclusion data
   1218  */
   1219 static void
   1220 read_compexdata(FILE *in)
   1221 {
   1222     ac_uint2 i;
   1223     ac_uint4 code;
   1224     char line[512], *s;
   1225 
   1226     (void) memset((char *) compexs, 0, sizeof(compexs));
   1227 
   1228     while (fgets(line, sizeof(line), in)) {
   1229 	if( (s=strchr(line, '\n')) ) *s = '\0';
   1230         /*
   1231          * Skip blank lines and lines that start with a '#'.
   1232          */
   1233         if (line[0] == 0 || line[0] == '#')
   1234 	    continue;
   1235 
   1236 	/*
   1237          * Collect the code.  Assume max 6 digits
   1238          */
   1239 
   1240 	for (s = line, i = code = 0; *s != '#' && i < 6; i++, s++) {
   1241 	    if (isspace((unsigned char)*s)) break;
   1242             code <<= 4;
   1243             if (*s >= '0' && *s <= '9')
   1244 		code += *s - '0';
   1245             else if (*s >= 'A' && *s <= 'F')
   1246 		code += (*s - 'A') + 10;
   1247             else if (*s >= 'a' && *s <= 'f')
   1248 		code += (*s - 'a') + 10;
   1249         }
   1250         COMPEX_SET(code);
   1251     }
   1252 }
   1253 
   1254 /*
   1255  * Creates array of compositions from decomposition array
   1256  */
   1257 static void
   1258 create_comps(void)
   1259 {
   1260     ac_uint4 i, cu;
   1261 
   1262     comps = (_comp_t *) malloc(comps_used * sizeof(_comp_t));
   1263 
   1264     for (i = cu = 0; i < decomps_used; i++) {
   1265 	if (decomps[i].used != 2 || COMPEX_TEST(decomps[i].code))
   1266 	    continue;
   1267 	comps[cu].comp = decomps[i].code;
   1268 	comps[cu].count = 2;
   1269 	comps[cu].code1 = decomps[i].decomp[0];
   1270 	comps[cu].code2 = decomps[i].decomp[1];
   1271 	cu++;
   1272     }
   1273     comps_used = cu;
   1274     qsort(comps, comps_used, sizeof(_comp_t), cmpcomps);
   1275 }
   1276 
   1277 #if HARDCODE_DATA
   1278 static void
   1279 write_case(FILE *out, _case_t *tab, int num, int first)
   1280 {
   1281     int i;
   1282 
   1283     for (i=0; i<num; i++) {
   1284 	if (first) first = 0;
   1285 	else fprintf(out, ",");
   1286 	fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx",
   1287 		(unsigned long) tab[i].key, (unsigned long) tab[i].other1,
   1288 		(unsigned long) tab[i].other2);
   1289     }
   1290 }
   1291 
   1292 #define PREF "static const "
   1293 
   1294 #endif
   1295 
   1296 static void
   1297 write_cdata(char *opath)
   1298 {
   1299     FILE *out;
   1300 	ac_uint4 bytes;
   1301     ac_uint4 i, idx, nprops;
   1302 #if !(HARDCODE_DATA)
   1303     ac_uint2 casecnt[2];
   1304 #endif
   1305     char path[BUFSIZ];
   1306 #if HARDCODE_DATA
   1307     int j, k;
   1308 
   1309     /*****************************************************************
   1310      *
   1311      * Generate the ctype data.
   1312      *
   1313      *****************************************************************/
   1314 
   1315     /*
   1316      * Open the output file.
   1317      */
   1318     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "uctable.h", opath);
   1319     if ((out = fopen(path, "w")) == 0)
   1320       return;
   1321 #else
   1322     /*
   1323      * Open the ctype.dat file.
   1324      */
   1325     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "ctype.dat", opath);
   1326     if ((out = fopen(path, "wb")) == 0)
   1327       return;
   1328 #endif
   1329 
   1330     /*
   1331      * Collect the offsets for the properties.  The offsets array is
   1332      * on a 4-byte boundary to keep things efficient for architectures
   1333      * that need such a thing.
   1334      */
   1335     for (i = idx = 0; i < NUMPROPS; i++) {
   1336         propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff;
   1337         idx += proptbl[i].used;
   1338     }
   1339 
   1340     /*
   1341      * Add the sentinel index which is used by the binary search as the upper
   1342      * bound for a search.
   1343      */
   1344     propcnt[i] = idx;
   1345 
   1346     /*
   1347      * Record the actual number of property lists.  This may be different than
   1348      * the number of offsets actually written because of aligning on a 4-byte
   1349      * boundary.
   1350      */
   1351     hdr[1] = NUMPROPS;
   1352 
   1353     /*
   1354      * Calculate the byte count needed and pad the property counts array to a
   1355      * 4-byte boundary.
   1356      */
   1357     if ((bytes = sizeof(ac_uint2) * (NUMPROPS + 1)) & 3)
   1358       bytes += 4 - (bytes & 3);
   1359     nprops = bytes / sizeof(ac_uint2);
   1360     bytes += sizeof(ac_uint4) * idx;
   1361 
   1362 #if HARDCODE_DATA
   1363     fprintf(out, PREF "ac_uint4 _ucprop_size = %d;\n\n", NUMPROPS);
   1364 
   1365     fprintf(out, PREF "ac_uint2 _ucprop_offsets[] = {");
   1366 
   1367     for (i = 0; i<nprops; i++) {
   1368        if (i) fprintf(out, ",");
   1369        if (!(i&7)) fprintf(out, "\n\t");
   1370        else fprintf(out, " ");
   1371        fprintf(out, "0x%04x", propcnt[i]);
   1372     }
   1373     fprintf(out, "\n};\n\n");
   1374 
   1375     fprintf(out, PREF "ac_uint4 _ucprop_ranges[] = {");
   1376 
   1377     k = 0;
   1378     for (i = 0; i < NUMPROPS; i++) {
   1379 	if (proptbl[i].used > 0) {
   1380 	  for (j=0; j<proptbl[i].used; j++) {
   1381 	    if (k) fprintf(out, ",");
   1382 	    if (!(k&3)) fprintf(out,"\n\t");
   1383 	    else fprintf(out, " ");
   1384 	    k++;
   1385 	    fprintf(out, "0x%08lx", (unsigned long) proptbl[i].ranges[j]);
   1386 	  }
   1387 	}
   1388     }
   1389     fprintf(out, "\n};\n\n");
   1390 #else
   1391     /*
   1392      * Write the header.
   1393      */
   1394     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
   1395 
   1396     /*
   1397      * Write the byte count.
   1398      */
   1399     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
   1400 
   1401     /*
   1402      * Write the property list counts.
   1403      */
   1404     fwrite((char *) propcnt, sizeof(ac_uint2), nprops, out);
   1405 
   1406     /*
   1407      * Write the property lists.
   1408      */
   1409     for (i = 0; i < NUMPROPS; i++) {
   1410         if (proptbl[i].used > 0)
   1411           fwrite((char *) proptbl[i].ranges, sizeof(ac_uint4),
   1412                  proptbl[i].used, out);
   1413     }
   1414 
   1415     fclose(out);
   1416 #endif
   1417 
   1418     /*****************************************************************
   1419      *
   1420      * Generate the case mapping data.
   1421      *
   1422      *****************************************************************/
   1423 
   1424 #if HARDCODE_DATA
   1425     fprintf(out, PREF "ac_uint4 _uccase_size = %ld;\n\n",
   1426         (long) (upper_used + lower_used + title_used));
   1427 
   1428     fprintf(out, PREF "ac_uint2 _uccase_len[2] = {%ld, %ld};\n\n",
   1429         (long) upper_used, (long) lower_used);
   1430     fprintf(out, PREF "ac_uint4 _uccase_map[] = {");
   1431 
   1432     if (upper_used > 0)
   1433       /*
   1434        * Write the upper case table.
   1435        */
   1436       write_case(out, upper, upper_used, 1);
   1437 
   1438     if (lower_used > 0)
   1439       /*
   1440        * Write the lower case table.
   1441        */
   1442       write_case(out, lower, lower_used, !upper_used);
   1443 
   1444     if (title_used > 0)
   1445       /*
   1446        * Write the title case table.
   1447        */
   1448       write_case(out, title, title_used, !(upper_used||lower_used));
   1449 
   1450     if (!(upper_used || lower_used || title_used))
   1451 	fprintf(out, "\t0");
   1452 
   1453     fprintf(out, "\n};\n\n");
   1454 #else
   1455     /*
   1456      * Open the case.dat file.
   1457      */
   1458     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "case.dat", opath);
   1459     if ((out = fopen(path, "wb")) == 0)
   1460       return;
   1461 
   1462     /*
   1463      * Write the case mapping tables.
   1464      */
   1465     hdr[1] = upper_used + lower_used + title_used;
   1466     casecnt[0] = upper_used;
   1467     casecnt[1] = lower_used;
   1468 
   1469     /*
   1470      * Write the header.
   1471      */
   1472     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
   1473 
   1474     /*
   1475      * Write the upper and lower case table sizes.
   1476      */
   1477     fwrite((char *) casecnt, sizeof(ac_uint2), 2, out);
   1478 
   1479     if (upper_used > 0)
   1480       /*
   1481        * Write the upper case table.
   1482        */
   1483       fwrite((char *) upper, sizeof(_case_t), upper_used, out);
   1484 
   1485     if (lower_used > 0)
   1486       /*
   1487        * Write the lower case table.
   1488        */
   1489       fwrite((char *) lower, sizeof(_case_t), lower_used, out);
   1490 
   1491     if (title_used > 0)
   1492       /*
   1493        * Write the title case table.
   1494        */
   1495       fwrite((char *) title, sizeof(_case_t), title_used, out);
   1496 
   1497     fclose(out);
   1498 #endif
   1499 
   1500     /*****************************************************************
   1501      *
   1502      * Generate the composition data.
   1503      *
   1504      *****************************************************************/
   1505 
   1506     /*
   1507      * Create compositions from decomposition data
   1508      */
   1509     create_comps();
   1510 
   1511 #if HARDCODE_DATA
   1512     fprintf(out, PREF "ac_uint4 _uccomp_size = %ld;\n\n",
   1513         comps_used * 4L);
   1514 
   1515     fprintf(out, PREF "ac_uint4 _uccomp_data[] = {");
   1516 
   1517      /*
   1518       * Now, if comps exist, write them out.
   1519       */
   1520     if (comps_used > 0) {
   1521 	for (i=0; i<comps_used; i++) {
   1522 	    if (i) fprintf(out, ",");
   1523 	    fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx",
   1524 	        (unsigned long) comps[i].comp, (unsigned long) comps[i].count,
   1525 	        (unsigned long) comps[i].code1, (unsigned long) comps[i].code2);
   1526 	}
   1527     } else {
   1528 	fprintf(out, "\t0");
   1529     }
   1530     fprintf(out, "\n};\n\n");
   1531 #else
   1532     /*
   1533      * Open the comp.dat file.
   1534      */
   1535     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "comp.dat", opath);
   1536     if ((out = fopen(path, "wb")) == 0)
   1537 	return;
   1538 
   1539     /*
   1540      * Write the header.
   1541      */
   1542     hdr[1] = (ac_uint2) comps_used * 4;
   1543     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
   1544 
   1545     /*
   1546      * Write out the byte count to maintain header size.
   1547      */
   1548     bytes = comps_used * sizeof(_comp_t);
   1549     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
   1550 
   1551     /*
   1552      * Now, if comps exist, write them out.
   1553      */
   1554     if (comps_used > 0)
   1555         fwrite((char *) comps, sizeof(_comp_t), comps_used, out);
   1556 
   1557     fclose(out);
   1558 #endif
   1559 
   1560     /*****************************************************************
   1561      *
   1562      * Generate the decomposition data.
   1563      *
   1564      *****************************************************************/
   1565 
   1566     /*
   1567      * Fully expand all decompositions before generating the output file.
   1568      */
   1569     expand_decomp();
   1570 
   1571 #if HARDCODE_DATA
   1572     fprintf(out, PREF "ac_uint4 _ucdcmp_size = %ld;\n\n",
   1573         decomps_used * 2L);
   1574 
   1575     fprintf(out, PREF "ac_uint4 _ucdcmp_nodes[] = {");
   1576 
   1577     if (decomps_used) {
   1578 	/*
   1579 	 * Write the list of decomp nodes.
   1580 	 */
   1581 	for (i = idx = 0; i < decomps_used; i++) {
   1582 	    fprintf(out, "\n\t0x%08lx, 0x%08lx,",
   1583 	        (unsigned long) decomps[i].code, (unsigned long) idx);
   1584 	    idx += decomps[i].used;
   1585 	}
   1586 
   1587 	/*
   1588 	 * Write the sentinel index as the last decomp node.
   1589 	 */
   1590 	fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx);
   1591 
   1592 	fprintf(out, PREF "ac_uint4 _ucdcmp_decomp[] = {");
   1593 	/*
   1594 	 * Write the decompositions themselves.
   1595 	 */
   1596 	k = 0;
   1597 	for (i = 0; i < decomps_used; i++)
   1598 	  for (j=0; j<decomps[i].used; j++) {
   1599 	    if (k) fprintf(out, ",");
   1600 	    if (!(k&3)) fprintf(out,"\n\t");
   1601 	    else fprintf(out, " ");
   1602 	    k++;
   1603 	    fprintf(out, "0x%08lx", (unsigned long) decomps[i].decomp[j]);
   1604 	  }
   1605 	fprintf(out, "\n};\n\n");
   1606     }
   1607 #else
   1608     /*
   1609      * Open the decomp.dat file.
   1610      */
   1611     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "decomp.dat", opath);
   1612     if ((out = fopen(path, "wb")) == 0)
   1613       return;
   1614 
   1615     hdr[1] = decomps_used;
   1616 
   1617     /*
   1618      * Write the header.
   1619      */
   1620     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
   1621 
   1622     /*
   1623      * Write a temporary byte count which will be calculated as the
   1624      * decompositions are written out.
   1625      */
   1626     bytes = 0;
   1627     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
   1628 
   1629     if (decomps_used) {
   1630         /*
   1631          * Write the list of decomp nodes.
   1632          */
   1633         for (i = idx = 0; i < decomps_used; i++) {
   1634             fwrite((char *) &decomps[i].code, sizeof(ac_uint4), 1, out);
   1635             fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
   1636             idx += decomps[i].used;
   1637         }
   1638 
   1639         /*
   1640          * Write the sentinel index as the last decomp node.
   1641          */
   1642         fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
   1643 
   1644         /*
   1645          * Write the decompositions themselves.
   1646          */
   1647         for (i = 0; i < decomps_used; i++)
   1648           fwrite((char *) decomps[i].decomp, sizeof(ac_uint4),
   1649                  decomps[i].used, out);
   1650 
   1651         /*
   1652          * Seek back to the beginning and write the byte count.
   1653          */
   1654         bytes = (sizeof(ac_uint4) * idx) +
   1655             (sizeof(ac_uint4) * ((hdr[1] << 1) + 1));
   1656         fseek(out, sizeof(ac_uint2) << 1, 0L);
   1657         fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
   1658 
   1659         fclose(out);
   1660     }
   1661 #endif
   1662 
   1663 #ifdef HARDCODE_DATA
   1664     fprintf(out, PREF "ac_uint4 _uckdcmp_size = %ld;\n\n",
   1665         kdecomps_used * 2L);
   1666 
   1667     fprintf(out, PREF "ac_uint4 _uckdcmp_nodes[] = {");
   1668 
   1669     if (kdecomps_used) {
   1670 	/*
   1671 	 * Write the list of kdecomp nodes.
   1672 	 */
   1673 	for (i = idx = 0; i < kdecomps_used; i++) {
   1674 	    fprintf(out, "\n\t0x%08lx, 0x%08lx,",
   1675 	        (unsigned long) kdecomps[i].code, (unsigned long) idx);
   1676 	    idx += kdecomps[i].used;
   1677 	}
   1678 
   1679 	/*
   1680 	 * Write the sentinel index as the last decomp node.
   1681 	 */
   1682 	fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx);
   1683 
   1684 	fprintf(out, PREF "ac_uint4 _uckdcmp_decomp[] = {");
   1685 
   1686 	/*
   1687 	 * Write the decompositions themselves.
   1688 	 */
   1689 	k = 0;
   1690 	for (i = 0; i < kdecomps_used; i++)
   1691 	  for (j=0; j<kdecomps[i].used; j++) {
   1692 	    if (k) fprintf(out, ",");
   1693 	    if (!(k&3)) fprintf(out,"\n\t");
   1694 	    else fprintf(out, " ");
   1695 	    k++;
   1696 	    fprintf(out, "0x%08lx", (unsigned long) kdecomps[i].decomp[j]);
   1697 	  }
   1698 	fprintf(out, "\n};\n\n");
   1699     }
   1700 #else
   1701     /*
   1702      * Open the kdecomp.dat file.
   1703      */
   1704     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "kdecomp.dat", opath);
   1705     if ((out = fopen(path, "wb")) == 0)
   1706       return;
   1707 
   1708     hdr[1] = kdecomps_used;
   1709 
   1710     /*
   1711      * Write the header.
   1712      */
   1713     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
   1714 
   1715     /*
   1716      * Write a temporary byte count which will be calculated as the
   1717      * decompositions are written out.
   1718      */
   1719     bytes = 0;
   1720     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
   1721 
   1722     if (kdecomps_used) {
   1723         /*
   1724          * Write the list of kdecomp nodes.
   1725          */
   1726         for (i = idx = 0; i < kdecomps_used; i++) {
   1727             fwrite((char *) &kdecomps[i].code, sizeof(ac_uint4), 1, out);
   1728             fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
   1729             idx += kdecomps[i].used;
   1730         }
   1731 
   1732         /*
   1733          * Write the sentinel index as the last decomp node.
   1734          */
   1735         fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
   1736 
   1737         /*
   1738          * Write the decompositions themselves.
   1739          */
   1740         for (i = 0; i < kdecomps_used; i++)
   1741           fwrite((char *) kdecomps[i].decomp, sizeof(ac_uint4),
   1742                  kdecomps[i].used, out);
   1743 
   1744         /*
   1745          * Seek back to the beginning and write the byte count.
   1746          */
   1747         bytes = (sizeof(ac_uint4) * idx) +
   1748             (sizeof(ac_uint4) * ((hdr[1] << 1) + 1));
   1749         fseek(out, sizeof(ac_uint2) << 1, 0L);
   1750         fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
   1751 
   1752         fclose(out);
   1753     }
   1754 #endif
   1755 
   1756     /*****************************************************************
   1757      *
   1758      * Generate the combining class data.
   1759      *
   1760      *****************************************************************/
   1761 #ifdef HARDCODE_DATA
   1762     fprintf(out, PREF "ac_uint4 _uccmcl_size = %ld;\n\n", (long) ccl_used);
   1763 
   1764     fprintf(out, PREF "ac_uint4 _uccmcl_nodes[] = {");
   1765 
   1766     if (ccl_used > 0) {
   1767 	/*
   1768 	 * Write the combining class ranges out.
   1769 	 */
   1770 	for (i = 0; i<ccl_used; i++) {
   1771 	    if (i) fprintf(out, ",");
   1772 	    if (!(i&3)) fprintf(out, "\n\t");
   1773 	    else fprintf(out, " ");
   1774 	    fprintf(out, "0x%08lx", (unsigned long) ccl[i]);
   1775 	}
   1776     } else {
   1777 	fprintf(out, "\t0");
   1778     }
   1779     fprintf(out, "\n};\n\n");
   1780 #else
   1781     /*
   1782      * Open the cmbcl.dat file.
   1783      */
   1784     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "cmbcl.dat", opath);
   1785     if ((out = fopen(path, "wb")) == 0)
   1786       return;
   1787 
   1788     /*
   1789      * Set the number of ranges used.  Each range has a combining class which
   1790      * means each entry is a 3-tuple.
   1791      */
   1792     hdr[1] = ccl_used / 3;
   1793 
   1794     /*
   1795      * Write the header.
   1796      */
   1797     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
   1798 
   1799     /*
   1800      * Write out the byte count to maintain header size.
   1801      */
   1802     bytes = ccl_used * sizeof(ac_uint4);
   1803     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
   1804 
   1805     if (ccl_used > 0)
   1806       /*
   1807        * Write the combining class ranges out.
   1808        */
   1809       fwrite((char *) ccl, sizeof(ac_uint4), ccl_used, out);
   1810 
   1811     fclose(out);
   1812 #endif
   1813 
   1814     /*****************************************************************
   1815      *
   1816      * Generate the number data.
   1817      *
   1818      *****************************************************************/
   1819 
   1820 #if HARDCODE_DATA
   1821     fprintf(out, PREF "ac_uint4 _ucnum_size = %lu;\n\n",
   1822         (unsigned long)ncodes_used<<1);
   1823 
   1824     fprintf(out, PREF "ac_uint4 _ucnum_nodes[] = {");
   1825 
   1826     /*
   1827      * Now, if number mappings exist, write them out.
   1828      */
   1829     if (ncodes_used > 0) {
   1830 	for (i = 0; i<ncodes_used; i++) {
   1831 	    if (i) fprintf(out, ",");
   1832 	    if (!(i&1)) fprintf(out, "\n\t");
   1833 	    else fprintf(out, " ");
   1834 	    fprintf(out, "0x%08lx, 0x%08lx",
   1835 	        (unsigned long) ncodes[i].code, (unsigned long) ncodes[i].idx);
   1836 	}
   1837 	fprintf(out, "\n};\n\n");
   1838 
   1839 	fprintf(out, PREF "short _ucnum_vals[] = {");
   1840 	for (i = 0; i<nums_used; i++) {
   1841 	    if (i) fprintf(out, ",");
   1842 	    if (!(i&3)) fprintf(out, "\n\t");
   1843 	    else fprintf(out, " ");
   1844 	    if (nums[i].numerator < 0) {
   1845 		fprintf(out, "%6d, 0x%04x",
   1846 		  nums[i].numerator, nums[i].denominator);
   1847 	    } else {
   1848 		fprintf(out, "0x%04x, 0x%04x",
   1849 		  nums[i].numerator, nums[i].denominator);
   1850 	    }
   1851 	}
   1852 	fprintf(out, "\n};\n\n");
   1853     }
   1854 #else
   1855     /*
   1856      * Open the num.dat file.
   1857      */
   1858     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "num.dat", opath);
   1859     if ((out = fopen(path, "wb")) == 0)
   1860       return;
   1861 
   1862     /*
   1863      * The count part of the header will be the total number of codes that
   1864      * have numbers.
   1865      */
   1866     hdr[1] = (ac_uint2) (ncodes_used << 1);
   1867     bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t));
   1868 
   1869     /*
   1870      * Write the header.
   1871      */
   1872     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
   1873 
   1874     /*
   1875      * Write out the byte count to maintain header size.
   1876      */
   1877     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
   1878 
   1879     /*
   1880      * Now, if number mappings exist, write them out.
   1881      */
   1882     if (ncodes_used > 0) {
   1883         fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out);
   1884         fwrite((char *) nums, sizeof(_num_t), nums_used, out);
   1885     }
   1886 #endif
   1887 
   1888     fclose(out);
   1889 }
   1890 
   1891 static void
   1892 usage(char *prog)
   1893 {
   1894     fprintf(stderr,
   1895             "Usage: %s [-o output-directory|-x composition-exclusions]", prog);
   1896     fprintf(stderr, " datafile1 datafile2 ...\n\n");
   1897     fprintf(stderr,
   1898             "-o output-directory\n\t\tWrite the output files to a different");
   1899     fprintf(stderr, " directory (default: .).\n");
   1900     fprintf(stderr,
   1901             "-x composition-exclusion\n\t\tFile of composition codes");
   1902     fprintf(stderr, " that should be excluded.\n");
   1903     exit(1);
   1904 }
   1905 
   1906 int
   1907 main(int argc, char *argv[])
   1908 {
   1909     FILE *in;
   1910     char *prog, *opath;
   1911 
   1912     prog = lutil_progname( "ucgendat", argc, argv );
   1913 
   1914     opath = 0;
   1915     in = stdin;
   1916 
   1917     argc--;
   1918     argv++;
   1919 
   1920     while (argc > 0) {
   1921         if (argv[0][0] == '-') {
   1922             switch (argv[0][1]) {
   1923               case 'o':
   1924                 argc--;
   1925                 argv++;
   1926                 opath = argv[0];
   1927                 break;
   1928               case 'x':
   1929                 argc--;
   1930                 argv++;
   1931                 if ((in = fopen(argv[0], "r")) == 0)
   1932                   fprintf(stderr,
   1933                           "%s: unable to open composition exclusion file %s\n",
   1934                           prog, argv[0]);
   1935                 else {
   1936                     read_compexdata(in);
   1937                     fclose(in);
   1938                     in = 0;
   1939                 }
   1940                 break;
   1941               default:
   1942                 usage(prog);
   1943             }
   1944         } else {
   1945             if (in != stdin && in != NULL)
   1946               fclose(in);
   1947             if ((in = fopen(argv[0], "r")) == 0)
   1948               fprintf(stderr, "%s: unable to open ctype file %s\n",
   1949                       prog, argv[0]);
   1950             else {
   1951                 read_cdata(in);
   1952                 fclose(in);
   1953                 in = 0;
   1954 	    }
   1955         }
   1956         argc--;
   1957         argv++;
   1958     }
   1959 
   1960     if (opath == 0)
   1961       opath = ".";
   1962     write_cdata(opath);
   1963 
   1964     return 0;
   1965 }
   1966