1 1.3 christos /* $NetBSD: ucgendat.c,v 1.4 2025/09/05 21:16:22 christos Exp $ */ 2 1.2 christos 3 1.2 christos /* $OpenLDAP$ */ 4 1.1 lukem /* This work is part of OpenLDAP Software <http://www.openldap.org/>. 5 1.1 lukem * 6 1.4 christos * Copyright 1998-2024 The OpenLDAP Foundation. 7 1.1 lukem * All rights reserved. 8 1.1 lukem * 9 1.1 lukem * Redistribution and use in source and binary forms, with or without 10 1.1 lukem * modification, are permitted only as authorized by the OpenLDAP 11 1.1 lukem * Public License. 12 1.1 lukem * 13 1.1 lukem * A copy of this license is available in file LICENSE in the 14 1.1 lukem * top-level directory of the distribution or, alternatively, at 15 1.1 lukem * <http://www.OpenLDAP.org/license.html>. 16 1.1 lukem */ 17 1.1 lukem /* Copyright 2001 Computing Research Labs, New Mexico State University 18 1.1 lukem * 19 1.1 lukem * Permission is hereby granted, free of charge, to any person obtaining a 20 1.1 lukem * copy of this software and associated documentation files (the "Software"), 21 1.1 lukem * to deal in the Software without restriction, including without limitation 22 1.1 lukem * the rights to use, copy, modify, merge, publish, distribute, sublicense, 23 1.1 lukem * and/or sell copies of the Software, and to permit persons to whom the 24 1.1 lukem * Software is furnished to do so, subject to the following conditions: 25 1.1 lukem * 26 1.1 lukem * The above copyright notice and this permission notice shall be included in 27 1.1 lukem * all copies or substantial portions of the Software. 28 1.1 lukem * 29 1.1 lukem * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 30 1.1 lukem * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 31 1.1 lukem * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 32 1.1 lukem * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY 33 1.1 lukem * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 34 1.1 lukem * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 35 1.1 lukem * THE USE OR OTHER DEALINGS IN THE SOFTWARE. 36 1.1 lukem */ 37 1.2 christos /* Id: ucgendat.c,v 1.4 2001/01/02 18:46:20 mleisher Exp " */ 38 1.2 christos 39 1.2 christos #include <sys/cdefs.h> 40 1.3 christos __RCSID("$NetBSD: ucgendat.c,v 1.4 2025/09/05 21:16:22 christos Exp $"); 41 1.1 lukem 42 1.1 lukem #include "portable.h" 43 1.1 lukem #include "ldap_config.h" 44 1.1 lukem 45 1.1 lukem #include <stdio.h> 46 1.1 lukem #include <ac/ctype.h> 47 1.1 lukem #include <ac/stdlib.h> 48 1.1 lukem #include <ac/string.h> 49 1.1 lukem #include <ac/unistd.h> 50 1.1 lukem 51 1.1 lukem #include <ac/bytes.h> 52 1.1 lukem 53 1.1 lukem #include <lutil.h> 54 1.1 lukem 55 1.1 lukem #ifndef HARDCODE_DATA 56 1.1 lukem #define HARDCODE_DATA 1 57 1.1 lukem #endif 58 1.1 lukem 59 1.1 lukem #undef ishdigit 60 1.1 lukem #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\ 61 1.1 lukem ((cc) >= 'A' && (cc) <= 'F') ||\ 62 1.1 lukem ((cc) >= 'a' && (cc) <= 'f')) 63 1.1 lukem 64 1.1 lukem /* 65 1.1 lukem * A header written to the output file with the byte-order-mark and the number 66 1.1 lukem * of property nodes. 67 1.1 lukem */ 68 1.1 lukem static ac_uint2 hdr[2] = {0xfeff, 0}; 69 1.1 lukem 70 1.1 lukem #define NUMPROPS 50 71 1.1 lukem #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3))) 72 1.1 lukem 73 1.1 lukem typedef struct { 74 1.1 lukem char *name; 75 1.1 lukem int len; 76 1.1 lukem } _prop_t; 77 1.1 lukem 78 1.1 lukem /* 79 1.1 lukem * List of properties expected to be found in the Unicode Character Database 80 1.1 lukem * including some implementation specific properties. 81 1.1 lukem * 82 1.1 lukem * The implementation specific properties are: 83 1.1 lukem * Cm = Composed (can be decomposed) 84 1.1 lukem * Nb = Non-breaking 85 1.1 lukem * Sy = Symmetric (has left and right forms) 86 1.1 lukem * Hd = Hex digit 87 1.1 lukem * Qm = Quote marks 88 1.1 lukem * Mr = Mirroring 89 1.1 lukem * Ss = Space, other 90 1.1 lukem * Cp = Defined character 91 1.1 lukem */ 92 1.1 lukem static _prop_t props[NUMPROPS] = { 93 1.1 lukem {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2}, 94 1.1 lukem {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2}, 95 1.1 lukem {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2}, 96 1.1 lukem {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2}, 97 1.1 lukem {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L", 1}, {"R", 1}, 98 1.1 lukem {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B", 1}, 99 1.1 lukem {"S", 1}, {"WS", 2}, {"ON", 2}, 100 1.1 lukem {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2}, 101 1.1 lukem {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}, {"AL", 2} 102 1.1 lukem }; 103 1.1 lukem 104 1.1 lukem typedef struct { 105 1.1 lukem ac_uint4 *ranges; 106 1.1 lukem ac_uint2 used; 107 1.1 lukem ac_uint2 size; 108 1.1 lukem } _ranges_t; 109 1.1 lukem 110 1.1 lukem static _ranges_t proptbl[NUMPROPS]; 111 1.1 lukem 112 1.1 lukem /* 113 1.1 lukem * Make sure this array is sized to be on a 4-byte boundary at compile time. 114 1.1 lukem */ 115 1.1 lukem static ac_uint2 propcnt[NEEDPROPS]; 116 1.1 lukem 117 1.1 lukem /* 118 1.1 lukem * Array used to collect a decomposition before adding it to the decomposition 119 1.1 lukem * table. 120 1.1 lukem */ 121 1.1 lukem static ac_uint4 dectmp[64]; 122 1.1 lukem static ac_uint4 dectmp_size; 123 1.1 lukem 124 1.1 lukem typedef struct { 125 1.1 lukem ac_uint4 code; 126 1.1 lukem ac_uint2 size; 127 1.1 lukem ac_uint2 used; 128 1.1 lukem ac_uint4 *decomp; 129 1.1 lukem } _decomp_t; 130 1.1 lukem 131 1.1 lukem /* 132 1.1 lukem * List of decomposition. Created and expanded in order as the characters are 133 1.1 lukem * encountered. First list contains canonical mappings, second also includes 134 1.1 lukem * compatibility mappings. 135 1.1 lukem */ 136 1.1 lukem static _decomp_t *decomps; 137 1.1 lukem static ac_uint4 decomps_used; 138 1.1 lukem static ac_uint4 decomps_size; 139 1.1 lukem 140 1.1 lukem static _decomp_t *kdecomps; 141 1.1 lukem static ac_uint4 kdecomps_used; 142 1.1 lukem static ac_uint4 kdecomps_size; 143 1.1 lukem 144 1.1 lukem /* 145 1.1 lukem * Composition exclusion table stuff. 146 1.1 lukem */ 147 1.1 lukem #define COMPEX_SET(c) (compexs[(c) >> 5] |= (1 << ((c) & 31))) 148 1.1 lukem #define COMPEX_TEST(c) (compexs[(c) >> 5] & (1 << ((c) & 31))) 149 1.1 lukem static ac_uint4 compexs[8192]; 150 1.1 lukem 151 1.1 lukem /* 152 1.1 lukem * Struct for holding a composition pair, and array of composition pairs 153 1.1 lukem */ 154 1.1 lukem typedef struct { 155 1.1 lukem ac_uint4 comp; 156 1.1 lukem ac_uint4 count; 157 1.1 lukem ac_uint4 code1; 158 1.1 lukem ac_uint4 code2; 159 1.1 lukem } _comp_t; 160 1.1 lukem 161 1.1 lukem static _comp_t *comps; 162 1.1 lukem static ac_uint4 comps_used; 163 1.1 lukem 164 1.1 lukem /* 165 1.1 lukem * Types and lists for handling lists of case mappings. 166 1.1 lukem */ 167 1.1 lukem typedef struct { 168 1.1 lukem ac_uint4 key; 169 1.1 lukem ac_uint4 other1; 170 1.1 lukem ac_uint4 other2; 171 1.1 lukem } _case_t; 172 1.1 lukem 173 1.1 lukem static _case_t *upper; 174 1.1 lukem static _case_t *lower; 175 1.1 lukem static _case_t *title; 176 1.1 lukem static ac_uint4 upper_used; 177 1.1 lukem static ac_uint4 upper_size; 178 1.1 lukem static ac_uint4 lower_used; 179 1.1 lukem static ac_uint4 lower_size; 180 1.1 lukem static ac_uint4 title_used; 181 1.1 lukem static ac_uint4 title_size; 182 1.1 lukem 183 1.1 lukem /* 184 1.1 lukem * Array used to collect case mappings before adding them to a list. 185 1.1 lukem */ 186 1.1 lukem static ac_uint4 cases[3]; 187 1.1 lukem 188 1.1 lukem /* 189 1.1 lukem * An array to hold ranges for combining classes. 190 1.1 lukem */ 191 1.1 lukem static ac_uint4 *ccl; 192 1.1 lukem static ac_uint4 ccl_used; 193 1.1 lukem static ac_uint4 ccl_size; 194 1.1 lukem 195 1.1 lukem /* 196 1.1 lukem * Structures for handling numbers. 197 1.1 lukem */ 198 1.1 lukem typedef struct { 199 1.1 lukem ac_uint4 code; 200 1.1 lukem ac_uint4 idx; 201 1.1 lukem } _codeidx_t; 202 1.1 lukem 203 1.1 lukem typedef struct { 204 1.1 lukem short numerator; 205 1.1 lukem short denominator; 206 1.1 lukem } _num_t; 207 1.1 lukem 208 1.1 lukem /* 209 1.1 lukem * Arrays to hold the mapping of codes to numbers. 210 1.1 lukem */ 211 1.1 lukem static _codeidx_t *ncodes; 212 1.1 lukem static ac_uint4 ncodes_used; 213 1.1 lukem static ac_uint4 ncodes_size; 214 1.1 lukem 215 1.1 lukem static _num_t *nums; 216 1.1 lukem static ac_uint4 nums_used; 217 1.1 lukem static ac_uint4 nums_size; 218 1.1 lukem 219 1.1 lukem /* 220 1.1 lukem * Array for holding numbers. 221 1.1 lukem */ 222 1.1 lukem static _num_t *nums; 223 1.1 lukem static ac_uint4 nums_used; 224 1.1 lukem static ac_uint4 nums_size; 225 1.1 lukem 226 1.1 lukem static void 227 1.1 lukem add_range(ac_uint4 start, ac_uint4 end, char *p1, char *p2) 228 1.1 lukem { 229 1.1 lukem int i, j, k, len; 230 1.1 lukem _ranges_t *rlp; 231 1.1 lukem char *name; 232 1.1 lukem 233 1.1 lukem for (k = 0; k < 2; k++) { 234 1.1 lukem if (k == 0) { 235 1.1 lukem name = p1; 236 1.1 lukem len = 2; 237 1.1 lukem } else { 238 1.1 lukem if (p2 == 0) 239 1.1 lukem break; 240 1.1 lukem 241 1.1 lukem name = p2; 242 1.1 lukem len = 1; 243 1.1 lukem } 244 1.1 lukem 245 1.1 lukem for (i = 0; i < NUMPROPS; i++) { 246 1.1 lukem if (props[i].len == len && memcmp(props[i].name, name, len) == 0) 247 1.1 lukem break; 248 1.1 lukem } 249 1.1 lukem 250 1.1 lukem if (i == NUMPROPS) 251 1.1 lukem continue; 252 1.1 lukem 253 1.1 lukem rlp = &proptbl[i]; 254 1.1 lukem 255 1.1 lukem /* 256 1.1 lukem * Resize the range list if necessary. 257 1.1 lukem */ 258 1.1 lukem if (rlp->used == rlp->size) { 259 1.1 lukem if (rlp->size == 0) 260 1.1 lukem rlp->ranges = (ac_uint4 *) 261 1.1 lukem malloc(sizeof(ac_uint4) << 3); 262 1.1 lukem else 263 1.1 lukem rlp->ranges = (ac_uint4 *) 264 1.1 lukem realloc((char *) rlp->ranges, 265 1.1 lukem sizeof(ac_uint4) * (rlp->size + 8)); 266 1.1 lukem rlp->size += 8; 267 1.1 lukem } 268 1.1 lukem 269 1.1 lukem /* 270 1.1 lukem * If this is the first code for this property list, just add it 271 1.1 lukem * and return. 272 1.1 lukem */ 273 1.1 lukem if (rlp->used == 0) { 274 1.1 lukem rlp->ranges[0] = start; 275 1.1 lukem rlp->ranges[1] = end; 276 1.1 lukem rlp->used += 2; 277 1.1 lukem continue; 278 1.1 lukem } 279 1.1 lukem 280 1.1 lukem /* 281 1.1 lukem * Optimize the case of adding the range to the end. 282 1.1 lukem */ 283 1.1 lukem j = rlp->used - 1; 284 1.1 lukem if (start > rlp->ranges[j]) { 285 1.1 lukem j = rlp->used; 286 1.1 lukem rlp->ranges[j++] = start; 287 1.1 lukem rlp->ranges[j++] = end; 288 1.1 lukem rlp->used = j; 289 1.1 lukem continue; 290 1.1 lukem } 291 1.1 lukem 292 1.1 lukem /* 293 1.1 lukem * Need to locate the insertion point. 294 1.1 lukem */ 295 1.1 lukem for (i = 0; 296 1.1 lukem i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ; 297 1.1 lukem 298 1.1 lukem /* 299 1.1 lukem * If the start value lies in the current range, then simply set the 300 1.1 lukem * new end point of the range to the end value passed as a parameter. 301 1.1 lukem */ 302 1.1 lukem if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) { 303 1.1 lukem rlp->ranges[i + 1] = end; 304 1.1 lukem return; 305 1.1 lukem } 306 1.1 lukem 307 1.1 lukem /* 308 1.1 lukem * Shift following values up by two. 309 1.1 lukem */ 310 1.1 lukem for (j = rlp->used; j > i; j -= 2) { 311 1.1 lukem rlp->ranges[j] = rlp->ranges[j - 2]; 312 1.1 lukem rlp->ranges[j + 1] = rlp->ranges[j - 1]; 313 1.1 lukem } 314 1.1 lukem 315 1.1 lukem /* 316 1.1 lukem * Add the new range at the insertion point. 317 1.1 lukem */ 318 1.1 lukem rlp->ranges[i] = start; 319 1.1 lukem rlp->ranges[i + 1] = end; 320 1.1 lukem rlp->used += 2; 321 1.1 lukem } 322 1.1 lukem } 323 1.1 lukem 324 1.1 lukem static void 325 1.1 lukem ordered_range_insert(ac_uint4 c, char *name, int len) 326 1.1 lukem { 327 1.1 lukem int i, j; 328 1.1 lukem ac_uint4 s, e; 329 1.1 lukem _ranges_t *rlp; 330 1.1 lukem 331 1.1 lukem if (len == 0) 332 1.1 lukem return; 333 1.1 lukem 334 1.1 lukem /* 335 1.1 lukem * Deal with directionality codes introduced in Unicode 3.0. 336 1.1 lukem */ 337 1.1 lukem if ((len == 2 && memcmp(name, "BN", 2) == 0) || 338 1.1 lukem (len == 3 && 339 1.1 lukem (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 || 340 1.1 lukem memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 || 341 1.1 lukem memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0))) { 342 1.1 lukem /* 343 1.1 lukem * Mark all of these as Other Neutral to preserve compatibility with 344 1.1 lukem * older versions. 345 1.1 lukem */ 346 1.1 lukem len = 2; 347 1.1 lukem name = "ON"; 348 1.1 lukem } 349 1.1 lukem 350 1.1 lukem for (i = 0; i < NUMPROPS; i++) { 351 1.1 lukem if (props[i].len == len && memcmp(props[i].name, name, len) == 0) 352 1.1 lukem break; 353 1.1 lukem } 354 1.1 lukem 355 1.1 lukem if (i == NUMPROPS) 356 1.1 lukem return; 357 1.1 lukem 358 1.1 lukem /* 359 1.1 lukem * Have a match, so insert the code in order. 360 1.1 lukem */ 361 1.1 lukem rlp = &proptbl[i]; 362 1.1 lukem 363 1.1 lukem /* 364 1.1 lukem * Resize the range list if necessary. 365 1.1 lukem */ 366 1.1 lukem if (rlp->used == rlp->size) { 367 1.1 lukem if (rlp->size == 0) 368 1.1 lukem rlp->ranges = (ac_uint4 *) 369 1.1 lukem malloc(sizeof(ac_uint4) << 3); 370 1.1 lukem else 371 1.1 lukem rlp->ranges = (ac_uint4 *) 372 1.1 lukem realloc((char *) rlp->ranges, 373 1.1 lukem sizeof(ac_uint4) * (rlp->size + 8)); 374 1.1 lukem rlp->size += 8; 375 1.1 lukem } 376 1.1 lukem 377 1.1 lukem /* 378 1.1 lukem * If this is the first code for this property list, just add it 379 1.1 lukem * and return. 380 1.1 lukem */ 381 1.1 lukem if (rlp->used == 0) { 382 1.1 lukem rlp->ranges[0] = rlp->ranges[1] = c; 383 1.1 lukem rlp->used += 2; 384 1.1 lukem return; 385 1.1 lukem } 386 1.1 lukem 387 1.1 lukem /* 388 1.1 lukem * Optimize the cases of extending the last range and adding new ranges to 389 1.1 lukem * the end. 390 1.1 lukem */ 391 1.1 lukem j = rlp->used - 1; 392 1.1 lukem e = rlp->ranges[j]; 393 1.1 lukem s = rlp->ranges[j - 1]; 394 1.1 lukem 395 1.1 lukem if (c == e + 1) { 396 1.1 lukem /* 397 1.1 lukem * Extend the last range. 398 1.1 lukem */ 399 1.1 lukem rlp->ranges[j] = c; 400 1.1 lukem return; 401 1.1 lukem } 402 1.1 lukem 403 1.1 lukem if (c > e + 1) { 404 1.1 lukem /* 405 1.1 lukem * Start another range on the end. 406 1.1 lukem */ 407 1.1 lukem j = rlp->used; 408 1.1 lukem rlp->ranges[j] = rlp->ranges[j + 1] = c; 409 1.1 lukem rlp->used += 2; 410 1.1 lukem return; 411 1.1 lukem } 412 1.1 lukem 413 1.1 lukem if (c >= s) 414 1.1 lukem /* 415 1.1 lukem * The code is a duplicate of a code in the last range, so just return. 416 1.1 lukem */ 417 1.1 lukem return; 418 1.1 lukem 419 1.1 lukem /* 420 1.1 lukem * The code should be inserted somewhere before the last range in the 421 1.1 lukem * list. Locate the insertion point. 422 1.1 lukem */ 423 1.1 lukem for (i = 0; 424 1.1 lukem i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ; 425 1.1 lukem 426 1.1 lukem s = rlp->ranges[i]; 427 1.1 lukem e = rlp->ranges[i + 1]; 428 1.1 lukem 429 1.1 lukem if (c == e + 1) 430 1.1 lukem /* 431 1.1 lukem * Simply extend the current range. 432 1.1 lukem */ 433 1.1 lukem rlp->ranges[i + 1] = c; 434 1.1 lukem else if (c < s) { 435 1.1 lukem /* 436 1.1 lukem * Add a new entry before the current location. Shift all entries 437 1.1 lukem * before the current one up by one to make room. 438 1.1 lukem */ 439 1.1 lukem for (j = rlp->used; j > i; j -= 2) { 440 1.1 lukem rlp->ranges[j] = rlp->ranges[j - 2]; 441 1.1 lukem rlp->ranges[j + 1] = rlp->ranges[j - 1]; 442 1.1 lukem } 443 1.1 lukem rlp->ranges[i] = rlp->ranges[i + 1] = c; 444 1.1 lukem 445 1.1 lukem rlp->used += 2; 446 1.1 lukem } 447 1.1 lukem } 448 1.1 lukem 449 1.1 lukem static void 450 1.1 lukem add_decomp(ac_uint4 code, short compat) 451 1.1 lukem { 452 1.1 lukem ac_uint4 i, j, size; 453 1.1 lukem _decomp_t **pdecomps; 454 1.1 lukem ac_uint4 *pdecomps_used; 455 1.1 lukem ac_uint4 *pdecomps_size; 456 1.1 lukem 457 1.1 lukem if (compat) { 458 1.1 lukem pdecomps = &kdecomps; 459 1.1 lukem pdecomps_used = &kdecomps_used; 460 1.1 lukem pdecomps_size = &kdecomps_size; 461 1.1 lukem } else { 462 1.1 lukem pdecomps = &decomps; 463 1.1 lukem pdecomps_used = &decomps_used; 464 1.1 lukem pdecomps_size = &decomps_size; 465 1.1 lukem } 466 1.1 lukem 467 1.1 lukem /* 468 1.1 lukem * Add the code to the composite property. 469 1.1 lukem */ 470 1.1 lukem if (!compat) { 471 1.1 lukem ordered_range_insert(code, "Cm", 2); 472 1.1 lukem } 473 1.1 lukem 474 1.1 lukem /* 475 1.1 lukem * Locate the insertion point for the code. 476 1.1 lukem */ 477 1.1 lukem for (i = 0; i < *pdecomps_used && code > (*pdecomps)[i].code; i++) ; 478 1.1 lukem 479 1.1 lukem /* 480 1.1 lukem * Allocate space for a new decomposition. 481 1.1 lukem */ 482 1.1 lukem if (*pdecomps_used == *pdecomps_size) { 483 1.1 lukem if (*pdecomps_size == 0) 484 1.1 lukem *pdecomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3); 485 1.1 lukem else 486 1.1 lukem *pdecomps = (_decomp_t *) 487 1.1 lukem realloc((char *) *pdecomps, 488 1.1 lukem sizeof(_decomp_t) * (*pdecomps_size + 8)); 489 1.1 lukem (void) memset((char *) (*pdecomps + *pdecomps_size), '\0', 490 1.1 lukem sizeof(_decomp_t) << 3); 491 1.1 lukem *pdecomps_size += 8; 492 1.1 lukem } 493 1.1 lukem 494 1.1 lukem if (i < *pdecomps_used && code != (*pdecomps)[i].code) { 495 1.1 lukem /* 496 1.1 lukem * Shift the decomps up by one if the codes don't match. 497 1.1 lukem */ 498 1.1 lukem for (j = *pdecomps_used; j > i; j--) 499 1.1 lukem (void) AC_MEMCPY((char *) &(*pdecomps)[j], (char *) &(*pdecomps)[j - 1], 500 1.1 lukem sizeof(_decomp_t)); 501 1.1 lukem } 502 1.1 lukem 503 1.1 lukem /* 504 1.1 lukem * Insert or replace a decomposition. 505 1.1 lukem */ 506 1.1 lukem size = dectmp_size + (4 - (dectmp_size & 3)); 507 1.1 lukem if ((*pdecomps)[i].size < size) { 508 1.1 lukem if ((*pdecomps)[i].size == 0) 509 1.1 lukem (*pdecomps)[i].decomp = (ac_uint4 *) 510 1.1 lukem malloc(sizeof(ac_uint4) * size); 511 1.1 lukem else 512 1.1 lukem (*pdecomps)[i].decomp = (ac_uint4 *) 513 1.1 lukem realloc((char *) (*pdecomps)[i].decomp, 514 1.1 lukem sizeof(ac_uint4) * size); 515 1.1 lukem (*pdecomps)[i].size = size; 516 1.1 lukem } 517 1.1 lukem 518 1.1 lukem if ((*pdecomps)[i].code != code) 519 1.1 lukem (*pdecomps_used)++; 520 1.1 lukem 521 1.1 lukem (*pdecomps)[i].code = code; 522 1.1 lukem (*pdecomps)[i].used = dectmp_size; 523 1.1 lukem (void) AC_MEMCPY((char *) (*pdecomps)[i].decomp, (char *) dectmp, 524 1.1 lukem sizeof(ac_uint4) * dectmp_size); 525 1.1 lukem 526 1.1 lukem /* 527 1.1 lukem * NOTICE: This needs changing later so it is more general than simply 528 1.1 lukem * pairs. This calculation is done here to simplify allocation elsewhere. 529 1.1 lukem */ 530 1.1 lukem if (!compat && dectmp_size == 2) 531 1.1 lukem comps_used++; 532 1.1 lukem } 533 1.1 lukem 534 1.1 lukem static void 535 1.1 lukem add_title(ac_uint4 code) 536 1.1 lukem { 537 1.1 lukem ac_uint4 i, j; 538 1.1 lukem 539 1.1 lukem /* 540 1.1 lukem * Always map the code to itself. 541 1.1 lukem */ 542 1.1 lukem cases[2] = code; 543 1.1 lukem 544 1.2 christos /* 545 1.2 christos * If the upper case character is not present, then make it the same as 546 1.2 christos * the title case. 547 1.2 christos */ 548 1.2 christos if (cases[0] == 0) 549 1.2 christos cases[0] = code; 550 1.2 christos 551 1.1 lukem if (title_used == title_size) { 552 1.1 lukem if (title_size == 0) 553 1.1 lukem title = (_case_t *) malloc(sizeof(_case_t) << 3); 554 1.1 lukem else 555 1.1 lukem title = (_case_t *) realloc((char *) title, 556 1.1 lukem sizeof(_case_t) * (title_size + 8)); 557 1.1 lukem title_size += 8; 558 1.1 lukem } 559 1.1 lukem 560 1.1 lukem /* 561 1.1 lukem * Locate the insertion point. 562 1.1 lukem */ 563 1.1 lukem for (i = 0; i < title_used && code > title[i].key; i++) ; 564 1.1 lukem 565 1.1 lukem if (i < title_used) { 566 1.1 lukem /* 567 1.1 lukem * Shift the array up by one. 568 1.1 lukem */ 569 1.1 lukem for (j = title_used; j > i; j--) 570 1.1 lukem (void) AC_MEMCPY((char *) &title[j], (char *) &title[j - 1], 571 1.1 lukem sizeof(_case_t)); 572 1.1 lukem } 573 1.1 lukem 574 1.1 lukem title[i].key = cases[2]; /* Title */ 575 1.1 lukem title[i].other1 = cases[0]; /* Upper */ 576 1.1 lukem title[i].other2 = cases[1]; /* Lower */ 577 1.1 lukem 578 1.1 lukem title_used++; 579 1.1 lukem } 580 1.1 lukem 581 1.1 lukem static void 582 1.1 lukem add_upper(ac_uint4 code) 583 1.1 lukem { 584 1.1 lukem ac_uint4 i, j; 585 1.1 lukem 586 1.1 lukem /* 587 1.1 lukem * Always map the code to itself. 588 1.1 lukem */ 589 1.1 lukem cases[0] = code; 590 1.1 lukem 591 1.1 lukem /* 592 1.1 lukem * If the title case character is not present, then make it the same as 593 1.1 lukem * the upper case. 594 1.1 lukem */ 595 1.1 lukem if (cases[2] == 0) 596 1.1 lukem cases[2] = code; 597 1.1 lukem 598 1.1 lukem if (upper_used == upper_size) { 599 1.1 lukem if (upper_size == 0) 600 1.1 lukem upper = (_case_t *) malloc(sizeof(_case_t) << 3); 601 1.1 lukem else 602 1.1 lukem upper = (_case_t *) realloc((char *) upper, 603 1.1 lukem sizeof(_case_t) * (upper_size + 8)); 604 1.1 lukem upper_size += 8; 605 1.1 lukem } 606 1.1 lukem 607 1.1 lukem /* 608 1.1 lukem * Locate the insertion point. 609 1.1 lukem */ 610 1.1 lukem for (i = 0; i < upper_used && code > upper[i].key; i++) ; 611 1.1 lukem 612 1.1 lukem if (i < upper_used) { 613 1.1 lukem /* 614 1.1 lukem * Shift the array up by one. 615 1.1 lukem */ 616 1.1 lukem for (j = upper_used; j > i; j--) 617 1.1 lukem (void) AC_MEMCPY((char *) &upper[j], (char *) &upper[j - 1], 618 1.1 lukem sizeof(_case_t)); 619 1.1 lukem } 620 1.1 lukem 621 1.1 lukem upper[i].key = cases[0]; /* Upper */ 622 1.1 lukem upper[i].other1 = cases[1]; /* Lower */ 623 1.1 lukem upper[i].other2 = cases[2]; /* Title */ 624 1.1 lukem 625 1.1 lukem upper_used++; 626 1.1 lukem } 627 1.1 lukem 628 1.1 lukem static void 629 1.1 lukem add_lower(ac_uint4 code) 630 1.1 lukem { 631 1.1 lukem ac_uint4 i, j; 632 1.1 lukem 633 1.1 lukem /* 634 1.1 lukem * Always map the code to itself. 635 1.1 lukem */ 636 1.1 lukem cases[1] = code; 637 1.1 lukem 638 1.1 lukem /* 639 1.1 lukem * If the title case character is empty, then make it the same as the 640 1.1 lukem * upper case. 641 1.1 lukem */ 642 1.1 lukem if (cases[2] == 0) 643 1.1 lukem cases[2] = cases[0]; 644 1.1 lukem 645 1.1 lukem if (lower_used == lower_size) { 646 1.1 lukem if (lower_size == 0) 647 1.1 lukem lower = (_case_t *) malloc(sizeof(_case_t) << 3); 648 1.1 lukem else 649 1.1 lukem lower = (_case_t *) realloc((char *) lower, 650 1.1 lukem sizeof(_case_t) * (lower_size + 8)); 651 1.1 lukem lower_size += 8; 652 1.1 lukem } 653 1.1 lukem 654 1.1 lukem /* 655 1.1 lukem * Locate the insertion point. 656 1.1 lukem */ 657 1.1 lukem for (i = 0; i < lower_used && code > lower[i].key; i++) ; 658 1.1 lukem 659 1.1 lukem if (i < lower_used) { 660 1.1 lukem /* 661 1.1 lukem * Shift the array up by one. 662 1.1 lukem */ 663 1.1 lukem for (j = lower_used; j > i; j--) 664 1.1 lukem (void) AC_MEMCPY((char *) &lower[j], (char *) &lower[j - 1], 665 1.1 lukem sizeof(_case_t)); 666 1.1 lukem } 667 1.1 lukem 668 1.1 lukem lower[i].key = cases[1]; /* Lower */ 669 1.1 lukem lower[i].other1 = cases[0]; /* Upper */ 670 1.1 lukem lower[i].other2 = cases[2]; /* Title */ 671 1.1 lukem 672 1.1 lukem lower_used++; 673 1.1 lukem } 674 1.1 lukem 675 1.1 lukem static void 676 1.1 lukem ordered_ccl_insert(ac_uint4 c, ac_uint4 ccl_code) 677 1.1 lukem { 678 1.1 lukem ac_uint4 i, j; 679 1.1 lukem 680 1.1 lukem if (ccl_used == ccl_size) { 681 1.1 lukem if (ccl_size == 0) 682 1.1 lukem ccl = (ac_uint4 *) malloc(sizeof(ac_uint4) * 24); 683 1.1 lukem else 684 1.1 lukem ccl = (ac_uint4 *) 685 1.1 lukem realloc((char *) ccl, sizeof(ac_uint4) * (ccl_size + 24)); 686 1.1 lukem ccl_size += 24; 687 1.1 lukem } 688 1.1 lukem 689 1.1 lukem /* 690 1.1 lukem * Optimize adding the first item. 691 1.1 lukem */ 692 1.1 lukem if (ccl_used == 0) { 693 1.1 lukem ccl[0] = ccl[1] = c; 694 1.1 lukem ccl[2] = ccl_code; 695 1.1 lukem ccl_used += 3; 696 1.1 lukem return; 697 1.1 lukem } 698 1.1 lukem 699 1.1 lukem /* 700 1.1 lukem * Handle the special case of extending the range on the end. This 701 1.1 lukem * requires that the combining class codes are the same. 702 1.1 lukem */ 703 1.1 lukem if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) { 704 1.1 lukem ccl[ccl_used - 2] = c; 705 1.1 lukem return; 706 1.1 lukem } 707 1.1 lukem 708 1.1 lukem /* 709 1.1 lukem * Handle the special case of adding another range on the end. 710 1.1 lukem */ 711 1.1 lukem if (c > ccl[ccl_used - 2] + 1 || 712 1.1 lukem (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) { 713 1.1 lukem ccl[ccl_used++] = c; 714 1.1 lukem ccl[ccl_used++] = c; 715 1.1 lukem ccl[ccl_used++] = ccl_code; 716 1.1 lukem return; 717 1.1 lukem } 718 1.1 lukem 719 1.1 lukem /* 720 1.1 lukem * Locate either the insertion point or range for the code. 721 1.1 lukem */ 722 1.1 lukem for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ; 723 1.1 lukem 724 1.1 lukem if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) { 725 1.1 lukem /* 726 1.1 lukem * Extend an existing range. 727 1.1 lukem */ 728 1.1 lukem ccl[i + 1] = c; 729 1.1 lukem return; 730 1.1 lukem } else if (c < ccl[i]) { 731 1.1 lukem /* 732 1.1 lukem * Start a new range before the current location. 733 1.1 lukem */ 734 1.1 lukem for (j = ccl_used; j > i; j -= 3) { 735 1.1 lukem ccl[j] = ccl[j - 3]; 736 1.1 lukem ccl[j - 1] = ccl[j - 4]; 737 1.1 lukem ccl[j - 2] = ccl[j - 5]; 738 1.1 lukem } 739 1.1 lukem ccl[i] = ccl[i + 1] = c; 740 1.1 lukem ccl[i + 2] = ccl_code; 741 1.1 lukem } 742 1.1 lukem } 743 1.1 lukem 744 1.1 lukem /* 745 1.1 lukem * Adds a number if it does not already exist and returns an index value 746 1.1 lukem * multiplied by 2. 747 1.1 lukem */ 748 1.1 lukem static ac_uint4 749 1.1 lukem make_number(short num, short denom) 750 1.1 lukem { 751 1.1 lukem ac_uint4 n; 752 1.1 lukem 753 1.1 lukem /* 754 1.1 lukem * Determine if the number already exists. 755 1.1 lukem */ 756 1.1 lukem for (n = 0; n < nums_used; n++) { 757 1.1 lukem if (nums[n].numerator == num && nums[n].denominator == denom) 758 1.1 lukem return n << 1; 759 1.1 lukem } 760 1.1 lukem 761 1.1 lukem if (nums_used == nums_size) { 762 1.1 lukem if (nums_size == 0) 763 1.1 lukem nums = (_num_t *) malloc(sizeof(_num_t) << 3); 764 1.1 lukem else 765 1.1 lukem nums = (_num_t *) realloc((char *) nums, 766 1.1 lukem sizeof(_num_t) * (nums_size + 8)); 767 1.1 lukem nums_size += 8; 768 1.1 lukem } 769 1.1 lukem 770 1.1 lukem n = nums_used++; 771 1.1 lukem nums[n].numerator = num; 772 1.1 lukem nums[n].denominator = denom; 773 1.1 lukem 774 1.1 lukem return n << 1; 775 1.1 lukem } 776 1.1 lukem 777 1.1 lukem static void 778 1.1 lukem add_number(ac_uint4 code, short num, short denom) 779 1.1 lukem { 780 1.1 lukem ac_uint4 i, j; 781 1.1 lukem 782 1.1 lukem /* 783 1.1 lukem * Insert the code in order. 784 1.1 lukem */ 785 1.1 lukem for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ; 786 1.1 lukem 787 1.1 lukem /* 788 1.1 lukem * Handle the case of the codes matching and simply replace the number 789 1.1 lukem * that was there before. 790 1.1 lukem */ 791 1.1 lukem if (i < ncodes_used && code == ncodes[i].code) { 792 1.1 lukem ncodes[i].idx = make_number(num, denom); 793 1.1 lukem return; 794 1.1 lukem } 795 1.1 lukem 796 1.1 lukem /* 797 1.1 lukem * Resize the array if necessary. 798 1.1 lukem */ 799 1.1 lukem if (ncodes_used == ncodes_size) { 800 1.1 lukem if (ncodes_size == 0) 801 1.1 lukem ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3); 802 1.1 lukem else 803 1.1 lukem ncodes = (_codeidx_t *) 804 1.1 lukem realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8)); 805 1.1 lukem 806 1.1 lukem ncodes_size += 8; 807 1.1 lukem } 808 1.1 lukem 809 1.1 lukem /* 810 1.1 lukem * Shift things around to insert the code if necessary. 811 1.1 lukem */ 812 1.1 lukem if (i < ncodes_used) { 813 1.1 lukem for (j = ncodes_used; j > i; j--) { 814 1.1 lukem ncodes[j].code = ncodes[j - 1].code; 815 1.1 lukem ncodes[j].idx = ncodes[j - 1].idx; 816 1.1 lukem } 817 1.1 lukem } 818 1.1 lukem ncodes[i].code = code; 819 1.1 lukem ncodes[i].idx = make_number(num, denom); 820 1.1 lukem 821 1.1 lukem ncodes_used++; 822 1.1 lukem } 823 1.1 lukem 824 1.1 lukem /* 825 1.1 lukem * This routine assumes that the line is a valid Unicode Character Database 826 1.1 lukem * entry. 827 1.1 lukem */ 828 1.1 lukem static void 829 1.1 lukem read_cdata(FILE *in) 830 1.1 lukem { 831 1.1 lukem ac_uint4 i, lineno, skip, code, ccl_code; 832 1.1 lukem short wnum, neg, number[2], compat; 833 1.2 christos char line[512], *s, *e, *first_prop; 834 1.1 lukem 835 1.1 lukem lineno = skip = 0; 836 1.1 lukem while (fgets(line, sizeof(line), in)) { 837 1.1 lukem if( (s=strchr(line, '\n')) ) *s = '\0'; 838 1.1 lukem lineno++; 839 1.1 lukem 840 1.1 lukem /* 841 1.1 lukem * Skip blank lines and lines that start with a '#'. 842 1.1 lukem */ 843 1.1 lukem if (line[0] == 0 || line[0] == '#') 844 1.1 lukem continue; 845 1.1 lukem 846 1.1 lukem /* 847 1.1 lukem * If lines need to be skipped, do it here. 848 1.1 lukem */ 849 1.1 lukem if (skip) { 850 1.1 lukem skip--; 851 1.1 lukem continue; 852 1.1 lukem } 853 1.1 lukem 854 1.1 lukem /* 855 1.1 lukem * Collect the code. The code can be up to 6 hex digits in length to 856 1.1 lukem * allow surrogates to be specified. 857 1.1 lukem */ 858 1.1 lukem for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) { 859 1.1 lukem code <<= 4; 860 1.1 lukem if (*s >= '0' && *s <= '9') 861 1.1 lukem code += *s - '0'; 862 1.1 lukem else if (*s >= 'A' && *s <= 'F') 863 1.1 lukem code += (*s - 'A') + 10; 864 1.1 lukem else if (*s >= 'a' && *s <= 'f') 865 1.1 lukem code += (*s - 'a') + 10; 866 1.1 lukem } 867 1.1 lukem 868 1.1 lukem /* 869 1.1 lukem * Handle the following special cases: 870 1.1 lukem * 1. 4E00-9FA5 CJK Ideographs. 871 1.1 lukem * 2. AC00-D7A3 Hangul Syllables. 872 1.1 lukem * 3. D800-DFFF Surrogates. 873 1.1 lukem * 4. E000-F8FF Private Use Area. 874 1.1 lukem * 5. F900-FA2D Han compatibility. 875 1.1 lukem * ...Plus additional ranges in newer Unicode versions... 876 1.1 lukem */ 877 1.1 lukem switch (code) { 878 1.1 lukem case 0x3400: 879 1.1 lukem /* CJK Ideograph Extension A */ 880 1.1 lukem add_range(0x3400, 0x4db5, "Lo", "L"); 881 1.1 lukem 882 1.1 lukem add_range(0x3400, 0x4db5, "Cp", 0); 883 1.1 lukem 884 1.1 lukem skip = 1; 885 1.1 lukem break; 886 1.1 lukem case 0x4e00: 887 1.1 lukem /* 888 1.1 lukem * The Han ideographs. 889 1.1 lukem */ 890 1.1 lukem add_range(0x4e00, 0x9fff, "Lo", "L"); 891 1.1 lukem 892 1.1 lukem /* 893 1.1 lukem * Add the characters to the defined category. 894 1.1 lukem */ 895 1.1 lukem add_range(0x4e00, 0x9fa5, "Cp", 0); 896 1.1 lukem 897 1.1 lukem skip = 1; 898 1.1 lukem break; 899 1.1 lukem case 0xac00: 900 1.1 lukem /* 901 1.1 lukem * The Hangul syllables. 902 1.1 lukem */ 903 1.1 lukem add_range(0xac00, 0xd7a3, "Lo", "L"); 904 1.1 lukem 905 1.1 lukem /* 906 1.1 lukem * Add the characters to the defined category. 907 1.1 lukem */ 908 1.1 lukem add_range(0xac00, 0xd7a3, "Cp", 0); 909 1.1 lukem 910 1.1 lukem skip = 1; 911 1.1 lukem break; 912 1.1 lukem case 0xd800: 913 1.1 lukem /* 914 1.1 lukem * Make a range of all surrogates and assume some default 915 1.1 lukem * properties. 916 1.1 lukem */ 917 1.1 lukem add_range(0x010000, 0x10ffff, "Cs", "L"); 918 1.1 lukem skip = 5; 919 1.1 lukem break; 920 1.1 lukem case 0xe000: 921 1.1 lukem /* 922 1.1 lukem * The Private Use area. Add with a default set of properties. 923 1.1 lukem */ 924 1.1 lukem add_range(0xe000, 0xf8ff, "Co", "L"); 925 1.1 lukem skip = 1; 926 1.1 lukem break; 927 1.1 lukem case 0xf900: 928 1.1 lukem /* 929 1.1 lukem * The CJK compatibility area. 930 1.1 lukem */ 931 1.1 lukem add_range(0xf900, 0xfaff, "Lo", "L"); 932 1.1 lukem 933 1.1 lukem /* 934 1.1 lukem * Add the characters to the defined category. 935 1.1 lukem */ 936 1.1 lukem add_range(0xf900, 0xfaff, "Cp", 0); 937 1.1 lukem 938 1.1 lukem skip = 1; 939 1.1 lukem break; 940 1.1 lukem case 0x20000: 941 1.1 lukem /* CJK Ideograph Extension B */ 942 1.1 lukem add_range(0x20000, 0x2a6d6, "Lo", "L"); 943 1.1 lukem 944 1.1 lukem add_range(0x20000, 0x2a6d6, "Cp", 0); 945 1.1 lukem 946 1.1 lukem skip = 1; 947 1.1 lukem break; 948 1.1 lukem case 0xf0000: 949 1.1 lukem /* Plane 15 private use */ 950 1.1 lukem add_range(0xf0000, 0xffffd, "Co", "L"); 951 1.1 lukem skip = 1; 952 1.1 lukem break; 953 1.1 lukem 954 1.1 lukem case 0x100000: 955 1.1 lukem /* Plane 16 private use */ 956 1.1 lukem add_range(0x100000, 0x10fffd, "Co", "L"); 957 1.1 lukem skip = 1; 958 1.1 lukem break; 959 1.1 lukem } 960 1.1 lukem 961 1.1 lukem if (skip) 962 1.1 lukem continue; 963 1.1 lukem 964 1.1 lukem /* 965 1.1 lukem * Add the code to the defined category. 966 1.1 lukem */ 967 1.1 lukem ordered_range_insert(code, "Cp", 2); 968 1.1 lukem 969 1.1 lukem /* 970 1.1 lukem * Locate the first character property field. 971 1.1 lukem */ 972 1.1 lukem for (i = 0; *s != 0 && i < 2; s++) { 973 1.1 lukem if (*s == ';') 974 1.1 lukem i++; 975 1.1 lukem } 976 1.1 lukem for (e = s; *e && *e != ';'; e++) ; 977 1.2 christos 978 1.2 christos first_prop = s; 979 1.1 lukem 980 1.1 lukem ordered_range_insert(code, s, e - s); 981 1.1 lukem 982 1.1 lukem /* 983 1.1 lukem * Locate the combining class code. 984 1.1 lukem */ 985 1.1 lukem for (s = e; *s != 0 && i < 3; s++) { 986 1.1 lukem if (*s == ';') 987 1.1 lukem i++; 988 1.1 lukem } 989 1.1 lukem 990 1.1 lukem /* 991 1.1 lukem * Convert the combining class code from decimal. 992 1.1 lukem */ 993 1.1 lukem for (ccl_code = 0, e = s; *e && *e != ';'; e++) 994 1.1 lukem ccl_code = (ccl_code * 10) + (*e - '0'); 995 1.1 lukem 996 1.1 lukem /* 997 1.1 lukem * Add the code if it not 0. 998 1.1 lukem */ 999 1.1 lukem if (ccl_code != 0) 1000 1.1 lukem ordered_ccl_insert(code, ccl_code); 1001 1.1 lukem 1002 1.1 lukem /* 1003 1.1 lukem * Locate the second character property field. 1004 1.1 lukem */ 1005 1.1 lukem for (s = e; *s != 0 && i < 4; s++) { 1006 1.1 lukem if (*s == ';') 1007 1.1 lukem i++; 1008 1.1 lukem } 1009 1.1 lukem for (e = s; *e && *e != ';'; e++) ; 1010 1.1 lukem 1011 1.1 lukem ordered_range_insert(code, s, e - s); 1012 1.1 lukem 1013 1.1 lukem /* 1014 1.1 lukem * Check for a decomposition. 1015 1.1 lukem */ 1016 1.1 lukem s = ++e; 1017 1.1 lukem if (*s != ';') { 1018 1.1 lukem compat = *s == '<'; 1019 1.1 lukem if (compat) { 1020 1.1 lukem /* 1021 1.1 lukem * Skip compatibility formatting tag. 1022 1.1 lukem */ 1023 1.1 lukem while (*s++ != '>'); 1024 1.1 lukem } 1025 1.1 lukem /* 1026 1.1 lukem * Collect the codes of the decomposition. 1027 1.1 lukem */ 1028 1.1 lukem for (dectmp_size = 0; *s != ';'; ) { 1029 1.1 lukem /* 1030 1.1 lukem * Skip all leading non-hex digits. 1031 1.1 lukem */ 1032 1.1 lukem while (!ishdigit(*s)) 1033 1.1 lukem s++; 1034 1.1 lukem 1035 1.1 lukem for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) { 1036 1.1 lukem dectmp[dectmp_size] <<= 4; 1037 1.1 lukem if (*s >= '0' && *s <= '9') 1038 1.1 lukem dectmp[dectmp_size] += *s - '0'; 1039 1.1 lukem else if (*s >= 'A' && *s <= 'F') 1040 1.1 lukem dectmp[dectmp_size] += (*s - 'A') + 10; 1041 1.1 lukem else if (*s >= 'a' && *s <= 'f') 1042 1.1 lukem dectmp[dectmp_size] += (*s - 'a') + 10; 1043 1.1 lukem } 1044 1.1 lukem dectmp_size++; 1045 1.1 lukem } 1046 1.1 lukem 1047 1.1 lukem /* 1048 1.1 lukem * If there are any codes in the temporary decomposition array, 1049 1.1 lukem * then add the character with its decomposition. 1050 1.1 lukem */ 1051 1.1 lukem if (dectmp_size > 0) { 1052 1.1 lukem if (!compat) { 1053 1.1 lukem add_decomp(code, 0); 1054 1.1 lukem } 1055 1.1 lukem add_decomp(code, 1); 1056 1.1 lukem } 1057 1.1 lukem } 1058 1.1 lukem 1059 1.1 lukem /* 1060 1.1 lukem * Skip to the number field. 1061 1.1 lukem */ 1062 1.1 lukem for (i = 0; i < 3 && *s; s++) { 1063 1.1 lukem if (*s == ';') 1064 1.1 lukem i++; 1065 1.1 lukem } 1066 1.1 lukem 1067 1.1 lukem /* 1068 1.1 lukem * Scan the number in. 1069 1.1 lukem */ 1070 1.1 lukem number[0] = number[1] = 0; 1071 1.1 lukem for (e = s, neg = wnum = 0; *e && *e != ';'; e++) { 1072 1.1 lukem if (*e == '-') { 1073 1.1 lukem neg = 1; 1074 1.1 lukem continue; 1075 1.1 lukem } 1076 1.1 lukem 1077 1.1 lukem if (*e == '/') { 1078 1.1 lukem /* 1079 1.1 lukem * Move the the denominator of the fraction. 1080 1.1 lukem */ 1081 1.1 lukem if (neg) 1082 1.1 lukem number[wnum] *= -1; 1083 1.1 lukem neg = 0; 1084 1.1 lukem e++; 1085 1.1 lukem wnum++; 1086 1.1 lukem } 1087 1.1 lukem number[wnum] = (number[wnum] * 10) + (*e - '0'); 1088 1.1 lukem } 1089 1.1 lukem 1090 1.1 lukem if (e > s) { 1091 1.1 lukem /* 1092 1.1 lukem * Adjust the denominator in case of integers and add the number. 1093 1.1 lukem */ 1094 1.1 lukem if (wnum == 0) 1095 1.1 lukem number[1] = 1; 1096 1.1 lukem 1097 1.1 lukem add_number(code, number[0], number[1]); 1098 1.1 lukem } 1099 1.1 lukem 1100 1.1 lukem /* 1101 1.1 lukem * Skip to the start of the possible case mappings. 1102 1.1 lukem */ 1103 1.1 lukem for (s = e, i = 0; i < 4 && *s; s++) { 1104 1.1 lukem if (*s == ';') 1105 1.1 lukem i++; 1106 1.1 lukem } 1107 1.1 lukem 1108 1.1 lukem /* 1109 1.1 lukem * Collect the case mappings. 1110 1.1 lukem */ 1111 1.1 lukem cases[0] = cases[1] = cases[2] = 0; 1112 1.1 lukem for (i = 0; i < 3; i++) { 1113 1.1 lukem while (ishdigit(*s)) { 1114 1.1 lukem cases[i] <<= 4; 1115 1.1 lukem if (*s >= '0' && *s <= '9') 1116 1.1 lukem cases[i] += *s - '0'; 1117 1.1 lukem else if (*s >= 'A' && *s <= 'F') 1118 1.1 lukem cases[i] += (*s - 'A') + 10; 1119 1.1 lukem else if (*s >= 'a' && *s <= 'f') 1120 1.1 lukem cases[i] += (*s - 'a') + 10; 1121 1.1 lukem s++; 1122 1.1 lukem } 1123 1.1 lukem if (*s == ';') 1124 1.1 lukem s++; 1125 1.1 lukem } 1126 1.2 christos if (!strncmp(first_prop,"Lt",2) && (cases[0] || cases[1])) 1127 1.1 lukem /* 1128 1.1 lukem * Add the upper and lower mappings for a title case character. 1129 1.1 lukem */ 1130 1.1 lukem add_title(code); 1131 1.1 lukem else if (cases[1]) 1132 1.1 lukem /* 1133 1.1 lukem * Add the lower and title case mappings for the upper case 1134 1.1 lukem * character. 1135 1.1 lukem */ 1136 1.1 lukem add_upper(code); 1137 1.1 lukem else if (cases[0]) 1138 1.1 lukem /* 1139 1.1 lukem * Add the upper and title case mappings for the lower case 1140 1.1 lukem * character. 1141 1.1 lukem */ 1142 1.1 lukem add_lower(code); 1143 1.1 lukem } 1144 1.1 lukem } 1145 1.1 lukem 1146 1.1 lukem static _decomp_t * 1147 1.1 lukem find_decomp(ac_uint4 code, short compat) 1148 1.1 lukem { 1149 1.1 lukem long l, r, m; 1150 1.1 lukem _decomp_t *decs; 1151 1.1 lukem 1152 1.1 lukem l = 0; 1153 1.1 lukem r = (compat ? kdecomps_used : decomps_used) - 1; 1154 1.1 lukem decs = compat ? kdecomps : decomps; 1155 1.1 lukem while (l <= r) { 1156 1.1 lukem m = (l + r) >> 1; 1157 1.1 lukem if (code > decs[m].code) 1158 1.1 lukem l = m + 1; 1159 1.1 lukem else if (code < decs[m].code) 1160 1.1 lukem r = m - 1; 1161 1.1 lukem else 1162 1.1 lukem return &decs[m]; 1163 1.1 lukem } 1164 1.1 lukem return 0; 1165 1.1 lukem } 1166 1.1 lukem 1167 1.1 lukem static void 1168 1.1 lukem decomp_it(_decomp_t *d, short compat) 1169 1.1 lukem { 1170 1.1 lukem ac_uint4 i; 1171 1.1 lukem _decomp_t *dp; 1172 1.1 lukem 1173 1.1 lukem for (i = 0; i < d->used; i++) { 1174 1.1 lukem if ((dp = find_decomp(d->decomp[i], compat)) != 0) 1175 1.1 lukem decomp_it(dp, compat); 1176 1.1 lukem else 1177 1.1 lukem dectmp[dectmp_size++] = d->decomp[i]; 1178 1.1 lukem } 1179 1.1 lukem } 1180 1.1 lukem 1181 1.1 lukem /* 1182 1.1 lukem * Expand all decompositions by recursively decomposing each character 1183 1.1 lukem * in the decomposition. 1184 1.1 lukem */ 1185 1.1 lukem static void 1186 1.1 lukem expand_decomp(void) 1187 1.1 lukem { 1188 1.1 lukem ac_uint4 i; 1189 1.1 lukem 1190 1.1 lukem for (i = 0; i < decomps_used; i++) { 1191 1.1 lukem dectmp_size = 0; 1192 1.1 lukem decomp_it(&decomps[i], 0); 1193 1.1 lukem if (dectmp_size > 0) 1194 1.1 lukem add_decomp(decomps[i].code, 0); 1195 1.1 lukem } 1196 1.1 lukem 1197 1.1 lukem for (i = 0; i < kdecomps_used; i++) { 1198 1.1 lukem dectmp_size = 0; 1199 1.1 lukem decomp_it(&kdecomps[i], 1); 1200 1.1 lukem if (dectmp_size > 0) 1201 1.1 lukem add_decomp(kdecomps[i].code, 1); 1202 1.1 lukem } 1203 1.1 lukem } 1204 1.1 lukem 1205 1.1 lukem static int 1206 1.1 lukem cmpcomps(const void *v_comp1, const void *v_comp2) 1207 1.1 lukem { 1208 1.1 lukem const _comp_t *comp1 = v_comp1, *comp2 = v_comp2; 1209 1.1 lukem long diff = comp1->code1 - comp2->code1; 1210 1.1 lukem 1211 1.1 lukem if (!diff) 1212 1.1 lukem diff = comp1->code2 - comp2->code2; 1213 1.1 lukem return (int) diff; 1214 1.1 lukem } 1215 1.1 lukem 1216 1.1 lukem /* 1217 1.1 lukem * Load composition exclusion data 1218 1.1 lukem */ 1219 1.1 lukem static void 1220 1.1 lukem read_compexdata(FILE *in) 1221 1.1 lukem { 1222 1.1 lukem ac_uint2 i; 1223 1.1 lukem ac_uint4 code; 1224 1.1 lukem char line[512], *s; 1225 1.1 lukem 1226 1.1 lukem (void) memset((char *) compexs, 0, sizeof(compexs)); 1227 1.1 lukem 1228 1.1 lukem while (fgets(line, sizeof(line), in)) { 1229 1.1 lukem if( (s=strchr(line, '\n')) ) *s = '\0'; 1230 1.1 lukem /* 1231 1.1 lukem * Skip blank lines and lines that start with a '#'. 1232 1.1 lukem */ 1233 1.1 lukem if (line[0] == 0 || line[0] == '#') 1234 1.1 lukem continue; 1235 1.1 lukem 1236 1.1 lukem /* 1237 1.1 lukem * Collect the code. Assume max 6 digits 1238 1.1 lukem */ 1239 1.1 lukem 1240 1.1 lukem for (s = line, i = code = 0; *s != '#' && i < 6; i++, s++) { 1241 1.1 lukem if (isspace((unsigned char)*s)) break; 1242 1.1 lukem code <<= 4; 1243 1.1 lukem if (*s >= '0' && *s <= '9') 1244 1.1 lukem code += *s - '0'; 1245 1.1 lukem else if (*s >= 'A' && *s <= 'F') 1246 1.1 lukem code += (*s - 'A') + 10; 1247 1.1 lukem else if (*s >= 'a' && *s <= 'f') 1248 1.1 lukem code += (*s - 'a') + 10; 1249 1.1 lukem } 1250 1.1 lukem COMPEX_SET(code); 1251 1.1 lukem } 1252 1.1 lukem } 1253 1.1 lukem 1254 1.1 lukem /* 1255 1.1 lukem * Creates array of compositions from decomposition array 1256 1.1 lukem */ 1257 1.1 lukem static void 1258 1.1 lukem create_comps(void) 1259 1.1 lukem { 1260 1.1 lukem ac_uint4 i, cu; 1261 1.1 lukem 1262 1.1 lukem comps = (_comp_t *) malloc(comps_used * sizeof(_comp_t)); 1263 1.1 lukem 1264 1.1 lukem for (i = cu = 0; i < decomps_used; i++) { 1265 1.1 lukem if (decomps[i].used != 2 || COMPEX_TEST(decomps[i].code)) 1266 1.1 lukem continue; 1267 1.1 lukem comps[cu].comp = decomps[i].code; 1268 1.1 lukem comps[cu].count = 2; 1269 1.1 lukem comps[cu].code1 = decomps[i].decomp[0]; 1270 1.1 lukem comps[cu].code2 = decomps[i].decomp[1]; 1271 1.1 lukem cu++; 1272 1.1 lukem } 1273 1.1 lukem comps_used = cu; 1274 1.1 lukem qsort(comps, comps_used, sizeof(_comp_t), cmpcomps); 1275 1.1 lukem } 1276 1.1 lukem 1277 1.1 lukem #if HARDCODE_DATA 1278 1.1 lukem static void 1279 1.1 lukem write_case(FILE *out, _case_t *tab, int num, int first) 1280 1.1 lukem { 1281 1.1 lukem int i; 1282 1.1 lukem 1283 1.1 lukem for (i=0; i<num; i++) { 1284 1.1 lukem if (first) first = 0; 1285 1.1 lukem else fprintf(out, ","); 1286 1.1 lukem fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx", 1287 1.1 lukem (unsigned long) tab[i].key, (unsigned long) tab[i].other1, 1288 1.1 lukem (unsigned long) tab[i].other2); 1289 1.1 lukem } 1290 1.1 lukem } 1291 1.1 lukem 1292 1.1 lukem #define PREF "static const " 1293 1.1 lukem 1294 1.1 lukem #endif 1295 1.1 lukem 1296 1.1 lukem static void 1297 1.1 lukem write_cdata(char *opath) 1298 1.1 lukem { 1299 1.1 lukem FILE *out; 1300 1.1 lukem ac_uint4 bytes; 1301 1.1 lukem ac_uint4 i, idx, nprops; 1302 1.1 lukem #if !(HARDCODE_DATA) 1303 1.1 lukem ac_uint2 casecnt[2]; 1304 1.1 lukem #endif 1305 1.1 lukem char path[BUFSIZ]; 1306 1.1 lukem #if HARDCODE_DATA 1307 1.1 lukem int j, k; 1308 1.1 lukem 1309 1.1 lukem /***************************************************************** 1310 1.1 lukem * 1311 1.1 lukem * Generate the ctype data. 1312 1.1 lukem * 1313 1.1 lukem *****************************************************************/ 1314 1.1 lukem 1315 1.1 lukem /* 1316 1.1 lukem * Open the output file. 1317 1.1 lukem */ 1318 1.1 lukem snprintf(path, sizeof path, "%s" LDAP_DIRSEP "uctable.h", opath); 1319 1.1 lukem if ((out = fopen(path, "w")) == 0) 1320 1.1 lukem return; 1321 1.1 lukem #else 1322 1.1 lukem /* 1323 1.1 lukem * Open the ctype.dat file. 1324 1.1 lukem */ 1325 1.1 lukem snprintf(path, sizeof path, "%s" LDAP_DIRSEP "ctype.dat", opath); 1326 1.1 lukem if ((out = fopen(path, "wb")) == 0) 1327 1.1 lukem return; 1328 1.1 lukem #endif 1329 1.1 lukem 1330 1.1 lukem /* 1331 1.1 lukem * Collect the offsets for the properties. The offsets array is 1332 1.1 lukem * on a 4-byte boundary to keep things efficient for architectures 1333 1.1 lukem * that need such a thing. 1334 1.1 lukem */ 1335 1.1 lukem for (i = idx = 0; i < NUMPROPS; i++) { 1336 1.1 lukem propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff; 1337 1.1 lukem idx += proptbl[i].used; 1338 1.1 lukem } 1339 1.1 lukem 1340 1.1 lukem /* 1341 1.1 lukem * Add the sentinel index which is used by the binary search as the upper 1342 1.1 lukem * bound for a search. 1343 1.1 lukem */ 1344 1.1 lukem propcnt[i] = idx; 1345 1.1 lukem 1346 1.1 lukem /* 1347 1.1 lukem * Record the actual number of property lists. This may be different than 1348 1.1 lukem * the number of offsets actually written because of aligning on a 4-byte 1349 1.1 lukem * boundary. 1350 1.1 lukem */ 1351 1.1 lukem hdr[1] = NUMPROPS; 1352 1.1 lukem 1353 1.1 lukem /* 1354 1.1 lukem * Calculate the byte count needed and pad the property counts array to a 1355 1.1 lukem * 4-byte boundary. 1356 1.1 lukem */ 1357 1.1 lukem if ((bytes = sizeof(ac_uint2) * (NUMPROPS + 1)) & 3) 1358 1.1 lukem bytes += 4 - (bytes & 3); 1359 1.1 lukem nprops = bytes / sizeof(ac_uint2); 1360 1.1 lukem bytes += sizeof(ac_uint4) * idx; 1361 1.1 lukem 1362 1.1 lukem #if HARDCODE_DATA 1363 1.1 lukem fprintf(out, PREF "ac_uint4 _ucprop_size = %d;\n\n", NUMPROPS); 1364 1.1 lukem 1365 1.1 lukem fprintf(out, PREF "ac_uint2 _ucprop_offsets[] = {"); 1366 1.1 lukem 1367 1.1 lukem for (i = 0; i<nprops; i++) { 1368 1.1 lukem if (i) fprintf(out, ","); 1369 1.1 lukem if (!(i&7)) fprintf(out, "\n\t"); 1370 1.1 lukem else fprintf(out, " "); 1371 1.1 lukem fprintf(out, "0x%04x", propcnt[i]); 1372 1.1 lukem } 1373 1.1 lukem fprintf(out, "\n};\n\n"); 1374 1.1 lukem 1375 1.1 lukem fprintf(out, PREF "ac_uint4 _ucprop_ranges[] = {"); 1376 1.1 lukem 1377 1.1 lukem k = 0; 1378 1.1 lukem for (i = 0; i < NUMPROPS; i++) { 1379 1.1 lukem if (proptbl[i].used > 0) { 1380 1.1 lukem for (j=0; j<proptbl[i].used; j++) { 1381 1.1 lukem if (k) fprintf(out, ","); 1382 1.1 lukem if (!(k&3)) fprintf(out,"\n\t"); 1383 1.1 lukem else fprintf(out, " "); 1384 1.1 lukem k++; 1385 1.1 lukem fprintf(out, "0x%08lx", (unsigned long) proptbl[i].ranges[j]); 1386 1.1 lukem } 1387 1.1 lukem } 1388 1.1 lukem } 1389 1.1 lukem fprintf(out, "\n};\n\n"); 1390 1.1 lukem #else 1391 1.1 lukem /* 1392 1.1 lukem * Write the header. 1393 1.1 lukem */ 1394 1.1 lukem fwrite((char *) hdr, sizeof(ac_uint2), 2, out); 1395 1.1 lukem 1396 1.1 lukem /* 1397 1.1 lukem * Write the byte count. 1398 1.1 lukem */ 1399 1.1 lukem fwrite((char *) &bytes, sizeof(ac_uint4), 1, out); 1400 1.1 lukem 1401 1.1 lukem /* 1402 1.1 lukem * Write the property list counts. 1403 1.1 lukem */ 1404 1.1 lukem fwrite((char *) propcnt, sizeof(ac_uint2), nprops, out); 1405 1.1 lukem 1406 1.1 lukem /* 1407 1.1 lukem * Write the property lists. 1408 1.1 lukem */ 1409 1.1 lukem for (i = 0; i < NUMPROPS; i++) { 1410 1.1 lukem if (proptbl[i].used > 0) 1411 1.1 lukem fwrite((char *) proptbl[i].ranges, sizeof(ac_uint4), 1412 1.1 lukem proptbl[i].used, out); 1413 1.1 lukem } 1414 1.1 lukem 1415 1.1 lukem fclose(out); 1416 1.1 lukem #endif 1417 1.1 lukem 1418 1.1 lukem /***************************************************************** 1419 1.1 lukem * 1420 1.1 lukem * Generate the case mapping data. 1421 1.1 lukem * 1422 1.1 lukem *****************************************************************/ 1423 1.1 lukem 1424 1.1 lukem #if HARDCODE_DATA 1425 1.1 lukem fprintf(out, PREF "ac_uint4 _uccase_size = %ld;\n\n", 1426 1.1 lukem (long) (upper_used + lower_used + title_used)); 1427 1.1 lukem 1428 1.1 lukem fprintf(out, PREF "ac_uint2 _uccase_len[2] = {%ld, %ld};\n\n", 1429 1.1 lukem (long) upper_used, (long) lower_used); 1430 1.1 lukem fprintf(out, PREF "ac_uint4 _uccase_map[] = {"); 1431 1.1 lukem 1432 1.1 lukem if (upper_used > 0) 1433 1.1 lukem /* 1434 1.1 lukem * Write the upper case table. 1435 1.1 lukem */ 1436 1.1 lukem write_case(out, upper, upper_used, 1); 1437 1.1 lukem 1438 1.1 lukem if (lower_used > 0) 1439 1.1 lukem /* 1440 1.1 lukem * Write the lower case table. 1441 1.1 lukem */ 1442 1.1 lukem write_case(out, lower, lower_used, !upper_used); 1443 1.1 lukem 1444 1.1 lukem if (title_used > 0) 1445 1.1 lukem /* 1446 1.1 lukem * Write the title case table. 1447 1.1 lukem */ 1448 1.1 lukem write_case(out, title, title_used, !(upper_used||lower_used)); 1449 1.1 lukem 1450 1.1 lukem if (!(upper_used || lower_used || title_used)) 1451 1.1 lukem fprintf(out, "\t0"); 1452 1.1 lukem 1453 1.1 lukem fprintf(out, "\n};\n\n"); 1454 1.1 lukem #else 1455 1.1 lukem /* 1456 1.1 lukem * Open the case.dat file. 1457 1.1 lukem */ 1458 1.1 lukem snprintf(path, sizeof path, "%s" LDAP_DIRSEP "case.dat", opath); 1459 1.1 lukem if ((out = fopen(path, "wb")) == 0) 1460 1.1 lukem return; 1461 1.1 lukem 1462 1.1 lukem /* 1463 1.1 lukem * Write the case mapping tables. 1464 1.1 lukem */ 1465 1.1 lukem hdr[1] = upper_used + lower_used + title_used; 1466 1.1 lukem casecnt[0] = upper_used; 1467 1.1 lukem casecnt[1] = lower_used; 1468 1.1 lukem 1469 1.1 lukem /* 1470 1.1 lukem * Write the header. 1471 1.1 lukem */ 1472 1.1 lukem fwrite((char *) hdr, sizeof(ac_uint2), 2, out); 1473 1.1 lukem 1474 1.1 lukem /* 1475 1.1 lukem * Write the upper and lower case table sizes. 1476 1.1 lukem */ 1477 1.1 lukem fwrite((char *) casecnt, sizeof(ac_uint2), 2, out); 1478 1.1 lukem 1479 1.1 lukem if (upper_used > 0) 1480 1.1 lukem /* 1481 1.1 lukem * Write the upper case table. 1482 1.1 lukem */ 1483 1.1 lukem fwrite((char *) upper, sizeof(_case_t), upper_used, out); 1484 1.1 lukem 1485 1.1 lukem if (lower_used > 0) 1486 1.1 lukem /* 1487 1.1 lukem * Write the lower case table. 1488 1.1 lukem */ 1489 1.1 lukem fwrite((char *) lower, sizeof(_case_t), lower_used, out); 1490 1.1 lukem 1491 1.1 lukem if (title_used > 0) 1492 1.1 lukem /* 1493 1.1 lukem * Write the title case table. 1494 1.1 lukem */ 1495 1.1 lukem fwrite((char *) title, sizeof(_case_t), title_used, out); 1496 1.1 lukem 1497 1.1 lukem fclose(out); 1498 1.1 lukem #endif 1499 1.1 lukem 1500 1.1 lukem /***************************************************************** 1501 1.1 lukem * 1502 1.1 lukem * Generate the composition data. 1503 1.1 lukem * 1504 1.1 lukem *****************************************************************/ 1505 1.1 lukem 1506 1.1 lukem /* 1507 1.1 lukem * Create compositions from decomposition data 1508 1.1 lukem */ 1509 1.1 lukem create_comps(); 1510 1.1 lukem 1511 1.1 lukem #if HARDCODE_DATA 1512 1.1 lukem fprintf(out, PREF "ac_uint4 _uccomp_size = %ld;\n\n", 1513 1.1 lukem comps_used * 4L); 1514 1.1 lukem 1515 1.1 lukem fprintf(out, PREF "ac_uint4 _uccomp_data[] = {"); 1516 1.1 lukem 1517 1.1 lukem /* 1518 1.1 lukem * Now, if comps exist, write them out. 1519 1.1 lukem */ 1520 1.1 lukem if (comps_used > 0) { 1521 1.1 lukem for (i=0; i<comps_used; i++) { 1522 1.1 lukem if (i) fprintf(out, ","); 1523 1.1 lukem fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx", 1524 1.1 lukem (unsigned long) comps[i].comp, (unsigned long) comps[i].count, 1525 1.1 lukem (unsigned long) comps[i].code1, (unsigned long) comps[i].code2); 1526 1.1 lukem } 1527 1.1 lukem } else { 1528 1.1 lukem fprintf(out, "\t0"); 1529 1.1 lukem } 1530 1.1 lukem fprintf(out, "\n};\n\n"); 1531 1.1 lukem #else 1532 1.1 lukem /* 1533 1.1 lukem * Open the comp.dat file. 1534 1.1 lukem */ 1535 1.1 lukem snprintf(path, sizeof path, "%s" LDAP_DIRSEP "comp.dat", opath); 1536 1.1 lukem if ((out = fopen(path, "wb")) == 0) 1537 1.1 lukem return; 1538 1.1 lukem 1539 1.1 lukem /* 1540 1.1 lukem * Write the header. 1541 1.1 lukem */ 1542 1.1 lukem hdr[1] = (ac_uint2) comps_used * 4; 1543 1.1 lukem fwrite((char *) hdr, sizeof(ac_uint2), 2, out); 1544 1.1 lukem 1545 1.1 lukem /* 1546 1.1 lukem * Write out the byte count to maintain header size. 1547 1.1 lukem */ 1548 1.1 lukem bytes = comps_used * sizeof(_comp_t); 1549 1.1 lukem fwrite((char *) &bytes, sizeof(ac_uint4), 1, out); 1550 1.1 lukem 1551 1.1 lukem /* 1552 1.1 lukem * Now, if comps exist, write them out. 1553 1.1 lukem */ 1554 1.1 lukem if (comps_used > 0) 1555 1.1 lukem fwrite((char *) comps, sizeof(_comp_t), comps_used, out); 1556 1.1 lukem 1557 1.1 lukem fclose(out); 1558 1.1 lukem #endif 1559 1.1 lukem 1560 1.1 lukem /***************************************************************** 1561 1.1 lukem * 1562 1.1 lukem * Generate the decomposition data. 1563 1.1 lukem * 1564 1.1 lukem *****************************************************************/ 1565 1.1 lukem 1566 1.1 lukem /* 1567 1.1 lukem * Fully expand all decompositions before generating the output file. 1568 1.1 lukem */ 1569 1.1 lukem expand_decomp(); 1570 1.1 lukem 1571 1.1 lukem #if HARDCODE_DATA 1572 1.1 lukem fprintf(out, PREF "ac_uint4 _ucdcmp_size = %ld;\n\n", 1573 1.1 lukem decomps_used * 2L); 1574 1.1 lukem 1575 1.1 lukem fprintf(out, PREF "ac_uint4 _ucdcmp_nodes[] = {"); 1576 1.1 lukem 1577 1.1 lukem if (decomps_used) { 1578 1.1 lukem /* 1579 1.1 lukem * Write the list of decomp nodes. 1580 1.1 lukem */ 1581 1.1 lukem for (i = idx = 0; i < decomps_used; i++) { 1582 1.1 lukem fprintf(out, "\n\t0x%08lx, 0x%08lx,", 1583 1.1 lukem (unsigned long) decomps[i].code, (unsigned long) idx); 1584 1.1 lukem idx += decomps[i].used; 1585 1.1 lukem } 1586 1.1 lukem 1587 1.1 lukem /* 1588 1.1 lukem * Write the sentinel index as the last decomp node. 1589 1.1 lukem */ 1590 1.1 lukem fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx); 1591 1.1 lukem 1592 1.1 lukem fprintf(out, PREF "ac_uint4 _ucdcmp_decomp[] = {"); 1593 1.1 lukem /* 1594 1.1 lukem * Write the decompositions themselves. 1595 1.1 lukem */ 1596 1.1 lukem k = 0; 1597 1.1 lukem for (i = 0; i < decomps_used; i++) 1598 1.1 lukem for (j=0; j<decomps[i].used; j++) { 1599 1.1 lukem if (k) fprintf(out, ","); 1600 1.1 lukem if (!(k&3)) fprintf(out,"\n\t"); 1601 1.1 lukem else fprintf(out, " "); 1602 1.1 lukem k++; 1603 1.1 lukem fprintf(out, "0x%08lx", (unsigned long) decomps[i].decomp[j]); 1604 1.1 lukem } 1605 1.1 lukem fprintf(out, "\n};\n\n"); 1606 1.1 lukem } 1607 1.1 lukem #else 1608 1.1 lukem /* 1609 1.1 lukem * Open the decomp.dat file. 1610 1.1 lukem */ 1611 1.1 lukem snprintf(path, sizeof path, "%s" LDAP_DIRSEP "decomp.dat", opath); 1612 1.1 lukem if ((out = fopen(path, "wb")) == 0) 1613 1.1 lukem return; 1614 1.1 lukem 1615 1.1 lukem hdr[1] = decomps_used; 1616 1.1 lukem 1617 1.1 lukem /* 1618 1.1 lukem * Write the header. 1619 1.1 lukem */ 1620 1.1 lukem fwrite((char *) hdr, sizeof(ac_uint2), 2, out); 1621 1.1 lukem 1622 1.1 lukem /* 1623 1.1 lukem * Write a temporary byte count which will be calculated as the 1624 1.1 lukem * decompositions are written out. 1625 1.1 lukem */ 1626 1.1 lukem bytes = 0; 1627 1.1 lukem fwrite((char *) &bytes, sizeof(ac_uint4), 1, out); 1628 1.1 lukem 1629 1.1 lukem if (decomps_used) { 1630 1.1 lukem /* 1631 1.1 lukem * Write the list of decomp nodes. 1632 1.1 lukem */ 1633 1.1 lukem for (i = idx = 0; i < decomps_used; i++) { 1634 1.1 lukem fwrite((char *) &decomps[i].code, sizeof(ac_uint4), 1, out); 1635 1.1 lukem fwrite((char *) &idx, sizeof(ac_uint4), 1, out); 1636 1.1 lukem idx += decomps[i].used; 1637 1.1 lukem } 1638 1.1 lukem 1639 1.1 lukem /* 1640 1.1 lukem * Write the sentinel index as the last decomp node. 1641 1.1 lukem */ 1642 1.1 lukem fwrite((char *) &idx, sizeof(ac_uint4), 1, out); 1643 1.1 lukem 1644 1.1 lukem /* 1645 1.1 lukem * Write the decompositions themselves. 1646 1.1 lukem */ 1647 1.1 lukem for (i = 0; i < decomps_used; i++) 1648 1.1 lukem fwrite((char *) decomps[i].decomp, sizeof(ac_uint4), 1649 1.1 lukem decomps[i].used, out); 1650 1.1 lukem 1651 1.1 lukem /* 1652 1.1 lukem * Seek back to the beginning and write the byte count. 1653 1.1 lukem */ 1654 1.1 lukem bytes = (sizeof(ac_uint4) * idx) + 1655 1.1 lukem (sizeof(ac_uint4) * ((hdr[1] << 1) + 1)); 1656 1.1 lukem fseek(out, sizeof(ac_uint2) << 1, 0L); 1657 1.1 lukem fwrite((char *) &bytes, sizeof(ac_uint4), 1, out); 1658 1.1 lukem 1659 1.1 lukem fclose(out); 1660 1.1 lukem } 1661 1.1 lukem #endif 1662 1.1 lukem 1663 1.4 christos #if HARDCODE_DATA 1664 1.1 lukem fprintf(out, PREF "ac_uint4 _uckdcmp_size = %ld;\n\n", 1665 1.1 lukem kdecomps_used * 2L); 1666 1.1 lukem 1667 1.1 lukem fprintf(out, PREF "ac_uint4 _uckdcmp_nodes[] = {"); 1668 1.1 lukem 1669 1.1 lukem if (kdecomps_used) { 1670 1.1 lukem /* 1671 1.1 lukem * Write the list of kdecomp nodes. 1672 1.1 lukem */ 1673 1.1 lukem for (i = idx = 0; i < kdecomps_used; i++) { 1674 1.1 lukem fprintf(out, "\n\t0x%08lx, 0x%08lx,", 1675 1.1 lukem (unsigned long) kdecomps[i].code, (unsigned long) idx); 1676 1.1 lukem idx += kdecomps[i].used; 1677 1.1 lukem } 1678 1.1 lukem 1679 1.1 lukem /* 1680 1.1 lukem * Write the sentinel index as the last decomp node. 1681 1.1 lukem */ 1682 1.1 lukem fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx); 1683 1.1 lukem 1684 1.1 lukem fprintf(out, PREF "ac_uint4 _uckdcmp_decomp[] = {"); 1685 1.1 lukem 1686 1.1 lukem /* 1687 1.1 lukem * Write the decompositions themselves. 1688 1.1 lukem */ 1689 1.1 lukem k = 0; 1690 1.1 lukem for (i = 0; i < kdecomps_used; i++) 1691 1.1 lukem for (j=0; j<kdecomps[i].used; j++) { 1692 1.1 lukem if (k) fprintf(out, ","); 1693 1.1 lukem if (!(k&3)) fprintf(out,"\n\t"); 1694 1.1 lukem else fprintf(out, " "); 1695 1.1 lukem k++; 1696 1.1 lukem fprintf(out, "0x%08lx", (unsigned long) kdecomps[i].decomp[j]); 1697 1.1 lukem } 1698 1.1 lukem fprintf(out, "\n};\n\n"); 1699 1.1 lukem } 1700 1.1 lukem #else 1701 1.1 lukem /* 1702 1.1 lukem * Open the kdecomp.dat file. 1703 1.1 lukem */ 1704 1.1 lukem snprintf(path, sizeof path, "%s" LDAP_DIRSEP "kdecomp.dat", opath); 1705 1.1 lukem if ((out = fopen(path, "wb")) == 0) 1706 1.1 lukem return; 1707 1.1 lukem 1708 1.1 lukem hdr[1] = kdecomps_used; 1709 1.1 lukem 1710 1.1 lukem /* 1711 1.1 lukem * Write the header. 1712 1.1 lukem */ 1713 1.1 lukem fwrite((char *) hdr, sizeof(ac_uint2), 2, out); 1714 1.1 lukem 1715 1.1 lukem /* 1716 1.1 lukem * Write a temporary byte count which will be calculated as the 1717 1.1 lukem * decompositions are written out. 1718 1.1 lukem */ 1719 1.1 lukem bytes = 0; 1720 1.1 lukem fwrite((char *) &bytes, sizeof(ac_uint4), 1, out); 1721 1.1 lukem 1722 1.1 lukem if (kdecomps_used) { 1723 1.1 lukem /* 1724 1.1 lukem * Write the list of kdecomp nodes. 1725 1.1 lukem */ 1726 1.1 lukem for (i = idx = 0; i < kdecomps_used; i++) { 1727 1.1 lukem fwrite((char *) &kdecomps[i].code, sizeof(ac_uint4), 1, out); 1728 1.1 lukem fwrite((char *) &idx, sizeof(ac_uint4), 1, out); 1729 1.1 lukem idx += kdecomps[i].used; 1730 1.1 lukem } 1731 1.1 lukem 1732 1.1 lukem /* 1733 1.1 lukem * Write the sentinel index as the last decomp node. 1734 1.1 lukem */ 1735 1.1 lukem fwrite((char *) &idx, sizeof(ac_uint4), 1, out); 1736 1.1 lukem 1737 1.1 lukem /* 1738 1.1 lukem * Write the decompositions themselves. 1739 1.1 lukem */ 1740 1.1 lukem for (i = 0; i < kdecomps_used; i++) 1741 1.1 lukem fwrite((char *) kdecomps[i].decomp, sizeof(ac_uint4), 1742 1.1 lukem kdecomps[i].used, out); 1743 1.1 lukem 1744 1.1 lukem /* 1745 1.1 lukem * Seek back to the beginning and write the byte count. 1746 1.1 lukem */ 1747 1.1 lukem bytes = (sizeof(ac_uint4) * idx) + 1748 1.1 lukem (sizeof(ac_uint4) * ((hdr[1] << 1) + 1)); 1749 1.1 lukem fseek(out, sizeof(ac_uint2) << 1, 0L); 1750 1.1 lukem fwrite((char *) &bytes, sizeof(ac_uint4), 1, out); 1751 1.1 lukem 1752 1.1 lukem fclose(out); 1753 1.1 lukem } 1754 1.1 lukem #endif 1755 1.1 lukem 1756 1.1 lukem /***************************************************************** 1757 1.1 lukem * 1758 1.1 lukem * Generate the combining class data. 1759 1.1 lukem * 1760 1.1 lukem *****************************************************************/ 1761 1.4 christos #if HARDCODE_DATA 1762 1.1 lukem fprintf(out, PREF "ac_uint4 _uccmcl_size = %ld;\n\n", (long) ccl_used); 1763 1.1 lukem 1764 1.1 lukem fprintf(out, PREF "ac_uint4 _uccmcl_nodes[] = {"); 1765 1.1 lukem 1766 1.1 lukem if (ccl_used > 0) { 1767 1.1 lukem /* 1768 1.1 lukem * Write the combining class ranges out. 1769 1.1 lukem */ 1770 1.1 lukem for (i = 0; i<ccl_used; i++) { 1771 1.1 lukem if (i) fprintf(out, ","); 1772 1.1 lukem if (!(i&3)) fprintf(out, "\n\t"); 1773 1.1 lukem else fprintf(out, " "); 1774 1.1 lukem fprintf(out, "0x%08lx", (unsigned long) ccl[i]); 1775 1.1 lukem } 1776 1.1 lukem } else { 1777 1.1 lukem fprintf(out, "\t0"); 1778 1.1 lukem } 1779 1.1 lukem fprintf(out, "\n};\n\n"); 1780 1.1 lukem #else 1781 1.1 lukem /* 1782 1.1 lukem * Open the cmbcl.dat file. 1783 1.1 lukem */ 1784 1.1 lukem snprintf(path, sizeof path, "%s" LDAP_DIRSEP "cmbcl.dat", opath); 1785 1.1 lukem if ((out = fopen(path, "wb")) == 0) 1786 1.1 lukem return; 1787 1.1 lukem 1788 1.1 lukem /* 1789 1.1 lukem * Set the number of ranges used. Each range has a combining class which 1790 1.1 lukem * means each entry is a 3-tuple. 1791 1.1 lukem */ 1792 1.1 lukem hdr[1] = ccl_used / 3; 1793 1.1 lukem 1794 1.1 lukem /* 1795 1.1 lukem * Write the header. 1796 1.1 lukem */ 1797 1.1 lukem fwrite((char *) hdr, sizeof(ac_uint2), 2, out); 1798 1.1 lukem 1799 1.1 lukem /* 1800 1.1 lukem * Write out the byte count to maintain header size. 1801 1.1 lukem */ 1802 1.1 lukem bytes = ccl_used * sizeof(ac_uint4); 1803 1.1 lukem fwrite((char *) &bytes, sizeof(ac_uint4), 1, out); 1804 1.1 lukem 1805 1.1 lukem if (ccl_used > 0) 1806 1.1 lukem /* 1807 1.1 lukem * Write the combining class ranges out. 1808 1.1 lukem */ 1809 1.1 lukem fwrite((char *) ccl, sizeof(ac_uint4), ccl_used, out); 1810 1.1 lukem 1811 1.1 lukem fclose(out); 1812 1.1 lukem #endif 1813 1.1 lukem 1814 1.1 lukem /***************************************************************** 1815 1.1 lukem * 1816 1.1 lukem * Generate the number data. 1817 1.1 lukem * 1818 1.1 lukem *****************************************************************/ 1819 1.1 lukem 1820 1.1 lukem #if HARDCODE_DATA 1821 1.1 lukem fprintf(out, PREF "ac_uint4 _ucnum_size = %lu;\n\n", 1822 1.1 lukem (unsigned long)ncodes_used<<1); 1823 1.1 lukem 1824 1.1 lukem fprintf(out, PREF "ac_uint4 _ucnum_nodes[] = {"); 1825 1.1 lukem 1826 1.1 lukem /* 1827 1.1 lukem * Now, if number mappings exist, write them out. 1828 1.1 lukem */ 1829 1.1 lukem if (ncodes_used > 0) { 1830 1.1 lukem for (i = 0; i<ncodes_used; i++) { 1831 1.1 lukem if (i) fprintf(out, ","); 1832 1.1 lukem if (!(i&1)) fprintf(out, "\n\t"); 1833 1.1 lukem else fprintf(out, " "); 1834 1.1 lukem fprintf(out, "0x%08lx, 0x%08lx", 1835 1.1 lukem (unsigned long) ncodes[i].code, (unsigned long) ncodes[i].idx); 1836 1.1 lukem } 1837 1.1 lukem fprintf(out, "\n};\n\n"); 1838 1.1 lukem 1839 1.1 lukem fprintf(out, PREF "short _ucnum_vals[] = {"); 1840 1.1 lukem for (i = 0; i<nums_used; i++) { 1841 1.1 lukem if (i) fprintf(out, ","); 1842 1.1 lukem if (!(i&3)) fprintf(out, "\n\t"); 1843 1.1 lukem else fprintf(out, " "); 1844 1.1 lukem if (nums[i].numerator < 0) { 1845 1.1 lukem fprintf(out, "%6d, 0x%04x", 1846 1.1 lukem nums[i].numerator, nums[i].denominator); 1847 1.1 lukem } else { 1848 1.1 lukem fprintf(out, "0x%04x, 0x%04x", 1849 1.1 lukem nums[i].numerator, nums[i].denominator); 1850 1.1 lukem } 1851 1.1 lukem } 1852 1.1 lukem fprintf(out, "\n};\n\n"); 1853 1.1 lukem } 1854 1.1 lukem #else 1855 1.1 lukem /* 1856 1.1 lukem * Open the num.dat file. 1857 1.1 lukem */ 1858 1.1 lukem snprintf(path, sizeof path, "%s" LDAP_DIRSEP "num.dat", opath); 1859 1.1 lukem if ((out = fopen(path, "wb")) == 0) 1860 1.1 lukem return; 1861 1.1 lukem 1862 1.1 lukem /* 1863 1.1 lukem * The count part of the header will be the total number of codes that 1864 1.1 lukem * have numbers. 1865 1.1 lukem */ 1866 1.1 lukem hdr[1] = (ac_uint2) (ncodes_used << 1); 1867 1.1 lukem bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t)); 1868 1.1 lukem 1869 1.1 lukem /* 1870 1.1 lukem * Write the header. 1871 1.1 lukem */ 1872 1.1 lukem fwrite((char *) hdr, sizeof(ac_uint2), 2, out); 1873 1.1 lukem 1874 1.1 lukem /* 1875 1.1 lukem * Write out the byte count to maintain header size. 1876 1.1 lukem */ 1877 1.1 lukem fwrite((char *) &bytes, sizeof(ac_uint4), 1, out); 1878 1.1 lukem 1879 1.1 lukem /* 1880 1.1 lukem * Now, if number mappings exist, write them out. 1881 1.1 lukem */ 1882 1.1 lukem if (ncodes_used > 0) { 1883 1.1 lukem fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out); 1884 1.1 lukem fwrite((char *) nums, sizeof(_num_t), nums_used, out); 1885 1.1 lukem } 1886 1.1 lukem #endif 1887 1.1 lukem 1888 1.1 lukem fclose(out); 1889 1.1 lukem } 1890 1.1 lukem 1891 1.1 lukem static void 1892 1.1 lukem usage(char *prog) 1893 1.1 lukem { 1894 1.1 lukem fprintf(stderr, 1895 1.1 lukem "Usage: %s [-o output-directory|-x composition-exclusions]", prog); 1896 1.1 lukem fprintf(stderr, " datafile1 datafile2 ...\n\n"); 1897 1.1 lukem fprintf(stderr, 1898 1.1 lukem "-o output-directory\n\t\tWrite the output files to a different"); 1899 1.1 lukem fprintf(stderr, " directory (default: .).\n"); 1900 1.1 lukem fprintf(stderr, 1901 1.1 lukem "-x composition-exclusion\n\t\tFile of composition codes"); 1902 1.1 lukem fprintf(stderr, " that should be excluded.\n"); 1903 1.1 lukem exit(1); 1904 1.1 lukem } 1905 1.1 lukem 1906 1.1 lukem int 1907 1.1 lukem main(int argc, char *argv[]) 1908 1.1 lukem { 1909 1.1 lukem FILE *in; 1910 1.1 lukem char *prog, *opath; 1911 1.1 lukem 1912 1.1 lukem prog = lutil_progname( "ucgendat", argc, argv ); 1913 1.1 lukem 1914 1.1 lukem opath = 0; 1915 1.1 lukem in = stdin; 1916 1.1 lukem 1917 1.1 lukem argc--; 1918 1.1 lukem argv++; 1919 1.1 lukem 1920 1.1 lukem while (argc > 0) { 1921 1.1 lukem if (argv[0][0] == '-') { 1922 1.1 lukem switch (argv[0][1]) { 1923 1.1 lukem case 'o': 1924 1.1 lukem argc--; 1925 1.1 lukem argv++; 1926 1.1 lukem opath = argv[0]; 1927 1.1 lukem break; 1928 1.1 lukem case 'x': 1929 1.1 lukem argc--; 1930 1.1 lukem argv++; 1931 1.1 lukem if ((in = fopen(argv[0], "r")) == 0) 1932 1.1 lukem fprintf(stderr, 1933 1.1 lukem "%s: unable to open composition exclusion file %s\n", 1934 1.1 lukem prog, argv[0]); 1935 1.1 lukem else { 1936 1.1 lukem read_compexdata(in); 1937 1.1 lukem fclose(in); 1938 1.1 lukem in = 0; 1939 1.1 lukem } 1940 1.1 lukem break; 1941 1.1 lukem default: 1942 1.1 lukem usage(prog); 1943 1.1 lukem } 1944 1.1 lukem } else { 1945 1.1 lukem if (in != stdin && in != NULL) 1946 1.1 lukem fclose(in); 1947 1.1 lukem if ((in = fopen(argv[0], "r")) == 0) 1948 1.1 lukem fprintf(stderr, "%s: unable to open ctype file %s\n", 1949 1.1 lukem prog, argv[0]); 1950 1.1 lukem else { 1951 1.1 lukem read_cdata(in); 1952 1.1 lukem fclose(in); 1953 1.1 lukem in = 0; 1954 1.1 lukem } 1955 1.1 lukem } 1956 1.1 lukem argc--; 1957 1.1 lukem argv++; 1958 1.1 lukem } 1959 1.1 lukem 1960 1.1 lukem if (opath == 0) 1961 1.1 lukem opath = "."; 1962 1.1 lukem write_cdata(opath); 1963 1.1 lukem 1964 1.1 lukem return 0; 1965 1.1 lukem } 1966