fclang.c revision a6844aab
1/* 2 * fontconfig/src/fclang.c 3 * 4 * Copyright © 2002 Keith Packard 5 * 6 * Permission to use, copy, modify, distribute, and sell this software and its 7 * documentation for any purpose is hereby granted without fee, provided that 8 * the above copyright notice appear in all copies and that both that 9 * copyright notice and this permission notice appear in supporting 10 * documentation, and that the name of Keith Packard not be used in 11 * advertising or publicity pertaining to distribution of the software without 12 * specific, written prior permission. Keith Packard makes no 13 * representations about the suitability of this software for any purpose. It 14 * is provided "as is" without express or implied warranty. 15 * 16 * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO 18 * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR 19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 22 * PERFORMANCE OF THIS SOFTWARE. 23 */ 24 25#include "fcint.h" 26#include "fcftint.h" 27 28typedef struct { 29 const FcChar8 lang[8]; 30 const FcCharSet charset; 31} FcLangCharSet; 32 33typedef struct { 34 int begin; 35 int end; 36} FcLangCharSetRange; 37 38#include "../fc-lang/fclang.h" 39 40struct _FcLangSet { 41 FcStrSet *extra; 42 FcChar32 map_size; 43 FcChar32 map[NUM_LANG_SET_MAP]; 44}; 45 46static void 47FcLangSetBitSet (FcLangSet *ls, 48 unsigned int id) 49{ 50 int bucket; 51 52 id = fcLangCharSetIndices[id]; 53 bucket = id >> 5; 54 if (bucket >= ls->map_size) 55 return; /* shouldn't happen really */ 56 57 ls->map[bucket] |= ((FcChar32) 1 << (id & 0x1f)); 58} 59 60static FcBool 61FcLangSetBitGet (const FcLangSet *ls, 62 unsigned int id) 63{ 64 int bucket; 65 66 id = fcLangCharSetIndices[id]; 67 bucket = id >> 5; 68 if (bucket >= ls->map_size) 69 return FcFalse; 70 71 return ((ls->map[bucket] >> (id & 0x1f)) & 1) ? FcTrue : FcFalse; 72} 73 74FcLangSet * 75FcFreeTypeLangSet (const FcCharSet *charset, 76 const FcChar8 *exclusiveLang) 77{ 78 int i, j; 79 FcChar32 missing; 80 const FcCharSet *exclusiveCharset = 0; 81 FcLangSet *ls; 82 83 if (exclusiveLang) 84 exclusiveCharset = FcLangGetCharSet (exclusiveLang); 85 ls = FcLangSetCreate (); 86 if (!ls) 87 return 0; 88 if (FcDebug() & FC_DBG_LANGSET) 89 { 90 printf ("font charset"); 91 FcCharSetPrint (charset); 92 printf ("\n"); 93 } 94 for (i = 0; i < NUM_LANG_CHAR_SET; i++) 95 { 96 if (FcDebug() & FC_DBG_LANGSET) 97 { 98 printf ("%s charset", fcLangCharSets[i].lang); 99 FcCharSetPrint (&fcLangCharSets[i].charset); 100 printf ("\n"); 101 } 102 103 /* 104 * Check for Han charsets to make fonts 105 * which advertise support for a single language 106 * not support other Han languages 107 */ 108 if (exclusiveCharset && 109 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang)) 110 { 111 if (fcLangCharSets[i].charset.num != exclusiveCharset->num) 112 continue; 113 114 for (j = 0; j < fcLangCharSets[i].charset.num; j++) 115 if (FcCharSetLeaf(&fcLangCharSets[i].charset, j) != 116 FcCharSetLeaf(exclusiveCharset, j)) 117 continue; 118 } 119 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset); 120 if (FcDebug() & FC_DBG_SCANV) 121 { 122 if (missing && missing < 10) 123 { 124 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset, 125 charset); 126 FcChar32 ucs4; 127 FcChar32 map[FC_CHARSET_MAP_SIZE]; 128 FcChar32 next; 129 130 printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing); 131 printf ("{"); 132 for (ucs4 = FcCharSetFirstPage (missed, map, &next); 133 ucs4 != FC_CHARSET_DONE; 134 ucs4 = FcCharSetNextPage (missed, map, &next)) 135 { 136 int i, j; 137 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++) 138 if (map[i]) 139 { 140 for (j = 0; j < 32; j++) 141 if (map[i] & (1 << j)) 142 printf (" %04x", ucs4 + i * 32 + j); 143 } 144 } 145 printf (" }\n\t"); 146 FcCharSetDestroy (missed); 147 } 148 else 149 printf ("%s(%u) ", fcLangCharSets[i].lang, missing); 150 } 151 if (!missing) 152 FcLangSetBitSet (ls, i); 153 } 154 155 if (FcDebug() & FC_DBG_SCANV) 156 printf ("\n"); 157 158 159 return ls; 160} 161 162#define FcLangEnd(c) ((c) == '-' || (c) == '\0') 163 164FcLangResult 165FcLangCompare (const FcChar8 *s1, const FcChar8 *s2) 166{ 167 FcChar8 c1, c2; 168 FcLangResult result = FcLangDifferentLang; 169 170 for (;;) 171 { 172 c1 = *s1++; 173 c2 = *s2++; 174 175 c1 = FcToLower (c1); 176 c2 = FcToLower (c2); 177 if (c1 != c2) 178 { 179 if (FcLangEnd (c1) && FcLangEnd (c2)) 180 result = FcLangDifferentTerritory; 181 return result; 182 } 183 else if (!c1) 184 return FcLangEqual; 185 else if (c1 == '-') 186 result = FcLangDifferentTerritory; 187 } 188} 189 190/* 191 * Return FcTrue when super contains sub. 192 * 193 * super contains sub if super and sub have the same 194 * language and either the same country or one 195 * is missing the country 196 */ 197 198static FcBool 199FcLangContains (const FcChar8 *super, const FcChar8 *sub) 200{ 201 FcChar8 c1, c2; 202 203 for (;;) 204 { 205 c1 = *super++; 206 c2 = *sub++; 207 208 c1 = FcToLower (c1); 209 c2 = FcToLower (c2); 210 if (c1 != c2) 211 { 212 /* see if super has a country while sub is mising one */ 213 if (c1 == '-' && c2 == '\0') 214 return FcTrue; 215 /* see if sub has a country while super is mising one */ 216 if (c1 == '\0' && c2 == '-') 217 return FcTrue; 218 return FcFalse; 219 } 220 else if (!c1) 221 return FcTrue; 222 } 223} 224 225const FcCharSet * 226FcLangGetCharSet (const FcChar8 *lang) 227{ 228 int i; 229 int country = -1; 230 231 for (i = 0; i < NUM_LANG_CHAR_SET; i++) 232 { 233 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) { 234 case FcLangEqual: 235 return &fcLangCharSets[i].charset; 236 case FcLangDifferentTerritory: 237 if (country == -1) 238 country = i; 239 case FcLangDifferentLang: 240 default: 241 break; 242 } 243 } 244 if (country == -1) 245 return 0; 246 return &fcLangCharSets[country].charset; 247} 248 249FcStrSet * 250FcGetLangs (void) 251{ 252 FcStrSet *langs; 253 int i; 254 255 langs = FcStrSetCreate(); 256 if (!langs) 257 return 0; 258 259 for (i = 0; i < NUM_LANG_CHAR_SET; i++) 260 FcStrSetAdd (langs, fcLangCharSets[i].lang); 261 262 return langs; 263} 264 265FcLangSet * 266FcLangSetCreate (void) 267{ 268 FcLangSet *ls; 269 270 ls = malloc (sizeof (FcLangSet)); 271 if (!ls) 272 return 0; 273 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet)); 274 memset (ls->map, '\0', sizeof (ls->map)); 275 ls->map_size = NUM_LANG_SET_MAP; 276 ls->extra = 0; 277 return ls; 278} 279 280void 281FcLangSetDestroy (FcLangSet *ls) 282{ 283 if (ls->extra) 284 FcStrSetDestroy (ls->extra); 285 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet)); 286 free (ls); 287} 288 289FcLangSet * 290FcLangSetCopy (const FcLangSet *ls) 291{ 292 FcLangSet *new; 293 294 new = FcLangSetCreate (); 295 if (!new) 296 goto bail0; 297 memset (new->map, '\0', sizeof (new->map)); 298 memcpy (new->map, ls->map, FC_MIN (sizeof (new->map), ls->map_size * sizeof (ls->map[0]))); 299 if (ls->extra) 300 { 301 FcStrList *list; 302 FcChar8 *extra; 303 304 new->extra = FcStrSetCreate (); 305 if (!new->extra) 306 goto bail1; 307 308 list = FcStrListCreate (ls->extra); 309 if (!list) 310 goto bail1; 311 312 while ((extra = FcStrListNext (list))) 313 if (!FcStrSetAdd (new->extra, extra)) 314 { 315 FcStrListDone (list); 316 goto bail1; 317 } 318 FcStrListDone (list); 319 } 320 return new; 321bail1: 322 FcLangSetDestroy (new); 323bail0: 324 return 0; 325} 326 327static int 328FcLangSetIndex (const FcChar8 *lang) 329{ 330 int low, high, mid = 0; 331 int cmp = 0; 332 FcChar8 firstChar = FcToLower(lang[0]); 333 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0'; 334 335 if (firstChar < 'a') 336 { 337 low = 0; 338 high = fcLangCharSetRanges[0].begin; 339 } 340 else if(firstChar > 'z') 341 { 342 low = fcLangCharSetRanges[25].begin; 343 high = NUM_LANG_CHAR_SET - 1; 344 } 345 else 346 { 347 low = fcLangCharSetRanges[firstChar - 'a'].begin; 348 high = fcLangCharSetRanges[firstChar - 'a'].end; 349 /* no matches */ 350 if (low > high) 351 return -low; /* next entry after where it would be */ 352 } 353 354 while (low <= high) 355 { 356 mid = (high + low) >> 1; 357 if(fcLangCharSets[mid].lang[0] != firstChar) 358 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang); 359 else 360 { /* fast path for resolving 2-letter languages (by far the most common) after 361 * finding the first char (probably already true because of the hash table) */ 362 cmp = fcLangCharSets[mid].lang[1] - secondChar; 363 if (cmp == 0 && 364 (fcLangCharSets[mid].lang[2] != '\0' || 365 lang[2] != '\0')) 366 { 367 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2, 368 lang+2); 369 } 370 } 371 if (cmp == 0) 372 return mid; 373 if (cmp < 0) 374 low = mid + 1; 375 else 376 high = mid - 1; 377 } 378 if (cmp < 0) 379 mid++; 380 return -(mid + 1); 381} 382 383FcBool 384FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang) 385{ 386 int id; 387 388 id = FcLangSetIndex (lang); 389 if (id >= 0) 390 { 391 FcLangSetBitSet (ls, id); 392 return FcTrue; 393 } 394 if (!ls->extra) 395 { 396 ls->extra = FcStrSetCreate (); 397 if (!ls->extra) 398 return FcFalse; 399 } 400 return FcStrSetAdd (ls->extra, lang); 401} 402 403FcLangResult 404FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang) 405{ 406 int id; 407 FcLangResult best, r; 408 int i; 409 410 id = FcLangSetIndex (lang); 411 if (id < 0) 412 id = -id - 1; 413 else if (FcLangSetBitGet (ls, id)) 414 return FcLangEqual; 415 best = FcLangDifferentLang; 416 for (i = id - 1; i >= 0; i--) 417 { 418 r = FcLangCompare (lang, fcLangCharSets[i].lang); 419 if (r == FcLangDifferentLang) 420 break; 421 if (FcLangSetBitGet (ls, i) && r < best) 422 best = r; 423 } 424 for (i = id; i < NUM_LANG_CHAR_SET; i++) 425 { 426 r = FcLangCompare (lang, fcLangCharSets[i].lang); 427 if (r == FcLangDifferentLang) 428 break; 429 if (FcLangSetBitGet (ls, i) && r < best) 430 best = r; 431 } 432 if (ls->extra) 433 { 434 FcStrList *list = FcStrListCreate (ls->extra); 435 FcChar8 *extra; 436 437 if (list) 438 { 439 while (best > FcLangEqual && (extra = FcStrListNext (list))) 440 { 441 r = FcLangCompare (lang, extra); 442 if (r < best) 443 best = r; 444 } 445 FcStrListDone (list); 446 } 447 } 448 return best; 449} 450 451static FcLangResult 452FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set) 453{ 454 FcStrList *list = FcStrListCreate (set); 455 FcLangResult r, best = FcLangDifferentLang; 456 FcChar8 *extra; 457 458 if (list) 459 { 460 while (best > FcLangEqual && (extra = FcStrListNext (list))) 461 { 462 r = FcLangSetHasLang (ls, extra); 463 if (r < best) 464 best = r; 465 } 466 FcStrListDone (list); 467 } 468 return best; 469} 470 471FcLangResult 472FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb) 473{ 474 int i, j, count; 475 FcLangResult best, r; 476 477 count = FC_MIN (lsa->map_size, lsb->map_size); 478 count = FC_MIN (NUM_LANG_SET_MAP, count); 479 for (i = 0; i < count; i++) 480 if (lsa->map[i] & lsb->map[i]) 481 return FcLangEqual; 482 best = FcLangDifferentLang; 483 for (j = 0; j < NUM_COUNTRY_SET; j++) 484 for (i = 0; i < count; i++) 485 if ((lsa->map[i] & fcLangCountrySets[j][i]) && 486 (lsb->map[i] & fcLangCountrySets[j][i])) 487 { 488 best = FcLangDifferentTerritory; 489 break; 490 } 491 if (lsa->extra) 492 { 493 r = FcLangSetCompareStrSet (lsb, lsa->extra); 494 if (r < best) 495 best = r; 496 } 497 if (best > FcLangEqual && lsb->extra) 498 { 499 r = FcLangSetCompareStrSet (lsa, lsb->extra); 500 if (r < best) 501 best = r; 502 } 503 return best; 504} 505 506/* 507 * Used in computing values -- mustn't allocate any storage 508 */ 509FcLangSet * 510FcLangSetPromote (const FcChar8 *lang) 511{ 512 static FcLangSet ls; 513 static FcStrSet strs; 514 static FcChar8 *str; 515 int id; 516 517 memset (ls.map, '\0', sizeof (ls.map)); 518 ls.extra = 0; 519 id = FcLangSetIndex (lang); 520 if (id > 0) 521 { 522 FcLangSetBitSet (&ls, id); 523 } 524 else 525 { 526 ls.extra = &strs; 527 strs.num = 1; 528 strs.size = 1; 529 strs.strs = &str; 530 strs.ref = 1; 531 str = (FcChar8 *) lang; 532 } 533 return &ls; 534} 535 536FcChar32 537FcLangSetHash (const FcLangSet *ls) 538{ 539 FcChar32 h = 0; 540 int i, count; 541 542 count = FC_MIN (ls->map_size, NUM_LANG_SET_MAP); 543 for (i = 0; i < count; i++) 544 h ^= ls->map[i]; 545 if (ls->extra) 546 h ^= ls->extra->num; 547 return h; 548} 549 550FcLangSet * 551FcNameParseLangSet (const FcChar8 *string) 552{ 553 FcChar8 lang[32], c = 0; 554 int i; 555 FcLangSet *ls; 556 557 ls = FcLangSetCreate (); 558 if (!ls) 559 goto bail0; 560 561 for(;;) 562 { 563 for(i = 0; i < 31;i++) 564 { 565 c = *string++; 566 if(c == '\0' || c == '|') 567 break; /* end of this code */ 568 lang[i] = c; 569 } 570 lang[i] = '\0'; 571 if (!FcLangSetAdd (ls, lang)) 572 goto bail1; 573 if(c == '\0') 574 break; 575 } 576 return ls; 577bail1: 578 FcLangSetDestroy (ls); 579bail0: 580 return 0; 581} 582 583FcBool 584FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls) 585{ 586 int i, bit, count; 587 FcChar32 bits; 588 FcBool first = FcTrue; 589 590 count = FC_MIN (ls->map_size, NUM_LANG_SET_MAP); 591 for (i = 0; i < count; i++) 592 { 593 if ((bits = ls->map[i])) 594 { 595 for (bit = 0; bit <= 31; bit++) 596 if (bits & (1 << bit)) 597 { 598 int id = (i << 5) | bit; 599 if (!first) 600 if (!FcStrBufChar (buf, '|')) 601 return FcFalse; 602 if (!FcStrBufString (buf, fcLangCharSets[fcLangCharSetIndicesInv[id]].lang)) 603 return FcFalse; 604 first = FcFalse; 605 } 606 } 607 } 608 if (ls->extra) 609 { 610 FcStrList *list = FcStrListCreate (ls->extra); 611 FcChar8 *extra; 612 613 if (!list) 614 return FcFalse; 615 while ((extra = FcStrListNext (list))) 616 { 617 if (!first) 618 if (!FcStrBufChar (buf, '|')) 619 { 620 FcStrListDone (list); 621 return FcFalse; 622 } 623 if (!FcStrBufString (buf, extra)) 624 { 625 FcStrListDone (list); 626 return FcFalse; 627 } 628 first = FcFalse; 629 } 630 FcStrListDone (list); 631 } 632 return FcTrue; 633} 634 635FcBool 636FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb) 637{ 638 int i, count; 639 640 count = FC_MIN (lsa->map_size, lsb->map_size); 641 count = FC_MIN (NUM_LANG_SET_MAP, count); 642 for (i = 0; i < count; i++) 643 { 644 if (lsa->map[i] != lsb->map[i]) 645 return FcFalse; 646 } 647 if (!lsa->extra && !lsb->extra) 648 return FcTrue; 649 if (lsa->extra && lsb->extra) 650 return FcStrSetEqual (lsa->extra, lsb->extra); 651 return FcFalse; 652} 653 654static FcBool 655FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang) 656{ 657 int id; 658 int i; 659 660 id = FcLangSetIndex (lang); 661 if (id < 0) 662 id = -id - 1; 663 else if (FcLangSetBitGet (ls, id)) 664 return FcTrue; 665 /* 666 * search up and down among equal languages for a match 667 */ 668 for (i = id - 1; i >= 0; i--) 669 { 670 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang) 671 break; 672 if (FcLangSetBitGet (ls, i) && 673 FcLangContains (fcLangCharSets[i].lang, lang)) 674 return FcTrue; 675 } 676 for (i = id; i < NUM_LANG_CHAR_SET; i++) 677 { 678 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang) 679 break; 680 if (FcLangSetBitGet (ls, i) && 681 FcLangContains (fcLangCharSets[i].lang, lang)) 682 return FcTrue; 683 } 684 if (ls->extra) 685 { 686 FcStrList *list = FcStrListCreate (ls->extra); 687 FcChar8 *extra; 688 689 if (list) 690 { 691 while ((extra = FcStrListNext (list))) 692 { 693 if (FcLangContains (extra, lang)) 694 break; 695 } 696 FcStrListDone (list); 697 if (extra) 698 return FcTrue; 699 } 700 } 701 return FcFalse; 702} 703 704/* 705 * return FcTrue if lsa contains every language in lsb 706 */ 707FcBool 708FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb) 709{ 710 int i, j, count; 711 FcChar32 missing; 712 713 if (FcDebug() & FC_DBG_MATCHV) 714 { 715 printf ("FcLangSet "); FcLangSetPrint (lsa); 716 printf (" contains "); FcLangSetPrint (lsb); 717 printf ("\n"); 718 } 719 /* 720 * check bitmaps for missing language support 721 */ 722 count = FC_MIN (lsa->map_size, lsb->map_size); 723 count = FC_MIN (NUM_LANG_SET_MAP, count); 724 for (i = 0; i < count; i++) 725 { 726 missing = lsb->map[i] & ~lsa->map[i]; 727 if (missing) 728 { 729 for (j = 0; j < 32; j++) 730 if (missing & (1 << j)) 731 { 732 if (!FcLangSetContainsLang (lsa, 733 fcLangCharSets[fcLangCharSetIndicesInv[i*32 + j]].lang)) 734 { 735 if (FcDebug() & FC_DBG_MATCHV) 736 printf ("\tMissing bitmap %s\n", fcLangCharSets[fcLangCharSetIndicesInv[i*32+j]].lang); 737 return FcFalse; 738 } 739 } 740 } 741 } 742 if (lsb->extra) 743 { 744 FcStrList *list = FcStrListCreate (lsb->extra); 745 FcChar8 *extra; 746 747 if (list) 748 { 749 while ((extra = FcStrListNext (list))) 750 { 751 if (!FcLangSetContainsLang (lsa, extra)) 752 { 753 if (FcDebug() & FC_DBG_MATCHV) 754 printf ("\tMissing string %s\n", extra); 755 break; 756 } 757 } 758 FcStrListDone (list); 759 if (extra) 760 return FcFalse; 761 } 762 } 763 return FcTrue; 764} 765 766FcBool 767FcLangSetSerializeAlloc (FcSerialize *serialize, const FcLangSet *l) 768{ 769 if (!FcSerializeAlloc (serialize, l, sizeof (FcLangSet))) 770 return FcFalse; 771 return FcTrue; 772} 773 774FcLangSet * 775FcLangSetSerialize(FcSerialize *serialize, const FcLangSet *l) 776{ 777 FcLangSet *l_serialize = FcSerializePtr (serialize, l); 778 779 if (!l_serialize) 780 return NULL; 781 memset (l_serialize->map, '\0', sizeof (l_serialize->map)); 782 memcpy (l_serialize->map, l->map, FC_MIN (sizeof (l_serialize->map), l->map_size * sizeof (l->map[0]))); 783 l_serialize->map_size = NUM_LANG_SET_MAP; 784 l_serialize->extra = NULL; /* We don't serialize ls->extra */ 785 return l_serialize; 786} 787 788FcStrSet * 789FcLangSetGetLangs (const FcLangSet *ls) 790{ 791 FcStrSet *langs; 792 int i; 793 794 langs = FcStrSetCreate(); 795 if (!langs) 796 return 0; 797 798 for (i = 0; i < NUM_LANG_CHAR_SET; i++) 799 if (FcLangSetBitGet (ls, i)) 800 FcStrSetAdd (langs, fcLangCharSets[i].lang); 801 802 if (ls->extra) 803 { 804 FcStrList *list = FcStrListCreate (ls->extra); 805 FcChar8 *extra; 806 807 if (list) 808 { 809 while ((extra = FcStrListNext (list))) 810 FcStrSetAdd (langs, extra); 811 812 FcStrListDone (list); 813 } 814 } 815 816 return langs; 817} 818 819#define __fclang__ 820#include "fcaliastail.h" 821#include "fcftaliastail.h" 822#undef __fclang__ 823