Home | History | Annotate | Line # | Download | only in libuniname
uniname.c revision 1.1
      1  1.1  christos /* Association between Unicode characters and their names.
      2  1.1  christos    Copyright (C) 2000-2002, 2005-2006 Free Software Foundation, Inc.
      3  1.1  christos 
      4  1.1  christos    This program is free software; you can redistribute it and/or modify
      5  1.1  christos    it under the terms of the GNU General Public License as published by
      6  1.1  christos    the Free Software Foundation; either version 2, or (at your option)
      7  1.1  christos    any later version.
      8  1.1  christos 
      9  1.1  christos    This program is distributed in the hope that it will be useful,
     10  1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     11  1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12  1.1  christos    GNU General Public License for more details.
     13  1.1  christos 
     14  1.1  christos    You should have received a copy of the GNU General Public License
     15  1.1  christos    along with this program; if not, write to the Free Software Foundation,
     16  1.1  christos    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
     17  1.1  christos 
     18  1.1  christos #ifdef HAVE_CONFIG_H
     19  1.1  christos # include <config.h>
     20  1.1  christos #endif
     21  1.1  christos 
     22  1.1  christos /* Specification.  */
     23  1.1  christos #include "uniname.h"
     24  1.1  christos 
     25  1.1  christos #include <assert.h>
     26  1.1  christos #include <stdbool.h>
     27  1.1  christos #include <stdio.h>
     28  1.1  christos #include <string.h>
     29  1.1  christos 
     30  1.1  christos #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
     31  1.1  christos 
     32  1.1  christos 
     33  1.1  christos /* Table of Unicode character names, derived from UnicodeData.txt.  */
     34  1.1  christos #define uint16_t unsigned short
     35  1.1  christos #define uint32_t unsigned int
     36  1.1  christos #include "uninames.h"
     37  1.1  christos /* It contains:
     38  1.1  christos   static const char unicode_name_words[34594] = ...;
     39  1.1  christos   #define UNICODE_CHARNAME_NUM_WORDS 5906
     40  1.1  christos   static const struct { uint16_t extra_offset; uint16_t ind_offset; } unicode_name_by_length[26] = ...;
     41  1.1  christos   #define UNICODE_CHARNAME_WORD_HANGUL 3624
     42  1.1  christos   #define UNICODE_CHARNAME_WORD_SYLLABLE 4654
     43  1.1  christos   #define UNICODE_CHARNAME_WORD_CJK 401
     44  1.1  christos   #define UNICODE_CHARNAME_WORD_COMPATIBILITY 5755
     45  1.1  christos   static const uint16_t unicode_names[62620] = ...;
     46  1.1  christos   static const struct { uint16_t code; uint16_t name; } unicode_name_to_code[15257] = ...;
     47  1.1  christos   static const struct { uint16_t code; uint16_t name; } unicode_code_to_name[15257] = ...;
     48  1.1  christos   #define UNICODE_CHARNAME_MAX_LENGTH 83
     49  1.1  christos   #define UNICODE_CHARNAME_MAX_WORDS 13
     50  1.1  christos */
     51  1.1  christos 
     52  1.1  christos /* Returns the word with a given index.  */
     53  1.1  christos static const char *
     54  1.1  christos unicode_name_word (unsigned int index, unsigned int *lengthp)
     55  1.1  christos {
     56  1.1  christos   unsigned int i1;
     57  1.1  christos   unsigned int i2;
     58  1.1  christos   unsigned int i;
     59  1.1  christos 
     60  1.1  christos   assert (index < UNICODE_CHARNAME_NUM_WORDS);
     61  1.1  christos 
     62  1.1  christos   /* Binary search for i with
     63  1.1  christos        unicode_name_by_length[i].ind_offset <= index
     64  1.1  christos      and
     65  1.1  christos        index < unicode_name_by_length[i+1].ind_offset
     66  1.1  christos    */
     67  1.1  christos 
     68  1.1  christos   i1 = 0;
     69  1.1  christos   i2 = SIZEOF (unicode_name_by_length) - 1;
     70  1.1  christos   while (i2 - i1 > 1)
     71  1.1  christos     {
     72  1.1  christos       unsigned int i = (i1 + i2) >> 1;
     73  1.1  christos       if (unicode_name_by_length[i].ind_offset <= index)
     74  1.1  christos 	i1 = i;
     75  1.1  christos       else
     76  1.1  christos 	i2 = i;
     77  1.1  christos     }
     78  1.1  christos   i = i1;
     79  1.1  christos   assert (unicode_name_by_length[i].ind_offset <= index
     80  1.1  christos 	  && index < unicode_name_by_length[i+1].ind_offset);
     81  1.1  christos   *lengthp = i;
     82  1.1  christos   return &unicode_name_words[unicode_name_by_length[i].extra_offset
     83  1.1  christos 			     + (index-unicode_name_by_length[i].ind_offset)*i];
     84  1.1  christos }
     85  1.1  christos 
     86  1.1  christos /* Looks up the index of a word.  */
     87  1.1  christos static int
     88  1.1  christos unicode_name_word_lookup (const char *word, unsigned int length)
     89  1.1  christos {
     90  1.1  christos   if (length > 0 && length < SIZEOF (unicode_name_by_length) - 1)
     91  1.1  christos     {
     92  1.1  christos       /* Binary search among the words of given length.  */
     93  1.1  christos       unsigned int extra_offset = unicode_name_by_length[length].extra_offset;
     94  1.1  christos       unsigned int i0 = unicode_name_by_length[length].ind_offset;
     95  1.1  christos       unsigned int i1 = i0;
     96  1.1  christos       unsigned int i2 = unicode_name_by_length[length+1].ind_offset;
     97  1.1  christos       while (i2 - i1 > 0)
     98  1.1  christos 	{
     99  1.1  christos 	  unsigned int i = (i1 + i2) >> 1;
    100  1.1  christos 	  const char *p = &unicode_name_words[extra_offset + (i-i0)*length];
    101  1.1  christos 	  const char *w = word;
    102  1.1  christos 	  unsigned int n = length;
    103  1.1  christos 	  for (;;)
    104  1.1  christos 	    {
    105  1.1  christos 	      if (*p < *w)
    106  1.1  christos 		{
    107  1.1  christos 		  if (i1 == i)
    108  1.1  christos 		    return -1;
    109  1.1  christos 		  /* Note here: i1 < i < i2.  */
    110  1.1  christos 		  i1 = i;
    111  1.1  christos 		  break;
    112  1.1  christos 		}
    113  1.1  christos 	      if (*p > *w)
    114  1.1  christos 		{
    115  1.1  christos 		  /* Note here: i1 <= i < i2.  */
    116  1.1  christos 		  i2 = i;
    117  1.1  christos 		  break;
    118  1.1  christos 		}
    119  1.1  christos 	      p++; w++; n--;
    120  1.1  christos 	      if (n == 0)
    121  1.1  christos 		return i;
    122  1.1  christos 	    }
    123  1.1  christos 	}
    124  1.1  christos     }
    125  1.1  christos   return -1;
    126  1.1  christos }
    127  1.1  christos 
    128  1.1  christos /* Auxiliary tables for Hangul syllable names, see the Unicode 3.0 book,
    129  1.1  christos    sections 3.11 and 4.4.  */
    130  1.1  christos static const char jamo_initial_short_name[19][3] =
    131  1.1  christos {
    132  1.1  christos   "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S", "SS", "", "J", "JJ",
    133  1.1  christos   "C", "K", "T", "P", "H"
    134  1.1  christos };
    135  1.1  christos static const char jamo_medial_short_name[21][4] =
    136  1.1  christos {
    137  1.1  christos   "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA", "WAE", "OE", "YO",
    138  1.1  christos   "U", "WEO", "WE", "WI", "YU", "EU", "YI", "I"
    139  1.1  christos };
    140  1.1  christos static const char jamo_final_short_name[28][3] =
    141  1.1  christos {
    142  1.1  christos   "", "G", "GG", "GS", "N", "NI", "NH", "D", "L", "LG", "LM", "LB", "LS", "LT",
    143  1.1  christos   "LP", "LH", "M", "B", "BS", "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
    144  1.1  christos };
    145  1.1  christos 
    146  1.1  christos /* Looks up the name of a Unicode character, in uppercase ASCII.
    147  1.1  christos    Returns the filled buf, or NULL if the character does not have a name.  */
    148  1.1  christos char *
    149  1.1  christos unicode_character_name (unsigned int c, char *buf)
    150  1.1  christos {
    151  1.1  christos   if (c >= 0xAC00 && c <= 0xD7A3)
    152  1.1  christos     {
    153  1.1  christos       /* Special case for Hangul syllables. Keeps the tables small.  */
    154  1.1  christos       char *ptr;
    155  1.1  christos       unsigned int tmp;
    156  1.1  christos       unsigned int index1;
    157  1.1  christos       unsigned int index2;
    158  1.1  christos       unsigned int index3;
    159  1.1  christos       const char *q;
    160  1.1  christos 
    161  1.1  christos       /* buf needs to have at least 16 + 7 bytes here.  */
    162  1.1  christos       memcpy (buf, "HANGUL SYLLABLE ", 16);
    163  1.1  christos       ptr = buf + 16;
    164  1.1  christos 
    165  1.1  christos       tmp = c - 0xAC00;
    166  1.1  christos       index3 = tmp % 28; tmp = tmp / 28;
    167  1.1  christos       index2 = tmp % 21; tmp = tmp / 21;
    168  1.1  christos       index1 = tmp;
    169  1.1  christos 
    170  1.1  christos       q = jamo_initial_short_name[index1];
    171  1.1  christos       while (*q != '\0')
    172  1.1  christos 	*ptr++ = *q++;
    173  1.1  christos       q = jamo_medial_short_name[index2];
    174  1.1  christos       while (*q != '\0')
    175  1.1  christos 	*ptr++ = *q++;
    176  1.1  christos       q = jamo_final_short_name[index3];
    177  1.1  christos       while (*q != '\0')
    178  1.1  christos 	*ptr++ = *q++;
    179  1.1  christos       *ptr = '\0';
    180  1.1  christos       return buf;
    181  1.1  christos     }
    182  1.1  christos   else if ((c >= 0xF900 && c <= 0xFA2D) || (c >= 0xFA30 && c <= 0xFA6A)
    183  1.1  christos 	   || (c >= 0xFA70 && c <= 0xFAD9) || (c >= 0x2F800 && c <= 0x2FA1D))
    184  1.1  christos     {
    185  1.1  christos       /* Special case for CJK compatibility ideographs. Keeps the tables
    186  1.1  christos 	 small.  */
    187  1.1  christos       char *ptr;
    188  1.1  christos       int i;
    189  1.1  christos 
    190  1.1  christos       /* buf needs to have at least 28 + 5 bytes here.  */
    191  1.1  christos       memcpy (buf, "CJK COMPATIBILITY IDEOGRAPH-", 28);
    192  1.1  christos       ptr = buf + 28;
    193  1.1  christos 
    194  1.1  christos       for (i = (c < 0x10000 ? 12 : 16); i >= 0; i -= 4)
    195  1.1  christos 	{
    196  1.1  christos 	  unsigned int x = (c >> i) & 0xf;
    197  1.1  christos 	  *ptr++ = (x < 10 ? '0' : 'A' - 10) + x;
    198  1.1  christos 	}
    199  1.1  christos       *ptr = '\0';
    200  1.1  christos       return buf;
    201  1.1  christos     }
    202  1.1  christos   else
    203  1.1  christos     {
    204  1.1  christos       const uint16_t *words;
    205  1.1  christos 
    206  1.1  christos       /* Transform the code so that it fits in 16 bits.  */
    207  1.1  christos       switch (c >> 12)
    208  1.1  christos 	{
    209  1.1  christos 	case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
    210  1.1  christos 	  break;
    211  1.1  christos 	case 0x0A:
    212  1.1  christos 	  c -= 0x05000;
    213  1.1  christos 	  break;
    214  1.1  christos 	case 0x0F:
    215  1.1  christos 	  c -= 0x09000;
    216  1.1  christos 	  break;
    217  1.1  christos 	case 0x10:
    218  1.1  christos 	  c -= 0x09000;
    219  1.1  christos 	  break;
    220  1.1  christos 	case 0x1D:
    221  1.1  christos 	  c -= 0x15000;
    222  1.1  christos 	  break;
    223  1.1  christos 	case 0x2F:
    224  1.1  christos 	  c -= 0x26000;
    225  1.1  christos 	  break;
    226  1.1  christos 	case 0xE0:
    227  1.1  christos 	  c -= 0xD6000;
    228  1.1  christos 	  break;
    229  1.1  christos 	default:
    230  1.1  christos 	  return NULL;
    231  1.1  christos 	}
    232  1.1  christos 
    233  1.1  christos       {
    234  1.1  christos 	/* Binary search in unicode_code_to_name.  */
    235  1.1  christos 	unsigned int i1 = 0;
    236  1.1  christos 	unsigned int i2 = SIZEOF (unicode_code_to_name);
    237  1.1  christos 	for (;;)
    238  1.1  christos 	  {
    239  1.1  christos 	    unsigned int i = (i1 + i2) >> 1;
    240  1.1  christos 	    if (unicode_code_to_name[i].code == c)
    241  1.1  christos 	      {
    242  1.1  christos 		words = &unicode_names[unicode_code_to_name[i].name];
    243  1.1  christos 		break;
    244  1.1  christos 	      }
    245  1.1  christos 	    else if (unicode_code_to_name[i].code < c)
    246  1.1  christos 	      {
    247  1.1  christos 		if (i1 == i)
    248  1.1  christos 		  {
    249  1.1  christos 		    words = NULL;
    250  1.1  christos 		    break;
    251  1.1  christos 		  }
    252  1.1  christos 		/* Note here: i1 < i < i2.  */
    253  1.1  christos 		i1 = i;
    254  1.1  christos 	      }
    255  1.1  christos 	    else if (unicode_code_to_name[i].code > c)
    256  1.1  christos 	      {
    257  1.1  christos 		if (i2 == i)
    258  1.1  christos 		  {
    259  1.1  christos 		    words = NULL;
    260  1.1  christos 		    break;
    261  1.1  christos 		  }
    262  1.1  christos 		/* Note here: i1 <= i < i2.  */
    263  1.1  christos 		i2 = i;
    264  1.1  christos 	      }
    265  1.1  christos 	  }
    266  1.1  christos       }
    267  1.1  christos       if (words != NULL)
    268  1.1  christos 	{
    269  1.1  christos 	  /* Found it in unicode_code_to_name. Now concatenate the words.  */
    270  1.1  christos 	  /* buf needs to have at least UNICODE_CHARNAME_MAX_LENGTH bytes.  */
    271  1.1  christos 	  char *ptr = buf;
    272  1.1  christos 	  for (;;)
    273  1.1  christos 	    {
    274  1.1  christos 	      unsigned int wordlen;
    275  1.1  christos 	      const char *word = unicode_name_word (*words>>1, &wordlen);
    276  1.1  christos 	      do
    277  1.1  christos 		*ptr++ = *word++;
    278  1.1  christos 	      while (--wordlen > 0);
    279  1.1  christos 	      if ((*words & 1) == 0)
    280  1.1  christos 		break;
    281  1.1  christos 	      *ptr++ = ' ';
    282  1.1  christos 	      words++;
    283  1.1  christos 	    }
    284  1.1  christos 	  *ptr = '\0';
    285  1.1  christos 	  return buf;
    286  1.1  christos 	}
    287  1.1  christos       return NULL;
    288  1.1  christos     }
    289  1.1  christos }
    290  1.1  christos 
    291  1.1  christos /* Looks up the Unicode character with a given name, in upper- or lowercase
    292  1.1  christos    ASCII.  Returns the character if found, or UNINAME_INVALID if not found.  */
    293  1.1  christos unsigned int
    294  1.1  christos unicode_name_character (const char *name)
    295  1.1  christos {
    296  1.1  christos   unsigned int len = strlen (name);
    297  1.1  christos   if (len > 1 && len <= UNICODE_CHARNAME_MAX_LENGTH)
    298  1.1  christos     {
    299  1.1  christos       /* Test for "word1 word2 ..." syntax.  */
    300  1.1  christos       char buf[UNICODE_CHARNAME_MAX_LENGTH];
    301  1.1  christos       char *ptr = buf;
    302  1.1  christos       for (;;)
    303  1.1  christos 	{
    304  1.1  christos 	  char c = *name++;
    305  1.1  christos 	  if (!(c >= ' ' && c <= '~'))
    306  1.1  christos 	    break;
    307  1.1  christos 	  *ptr++ = (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
    308  1.1  christos 	  if (--len == 0)
    309  1.1  christos 	    goto filled_buf;
    310  1.1  christos 	}
    311  1.1  christos       if (false)
    312  1.1  christos       filled_buf:
    313  1.1  christos 	{
    314  1.1  christos 	  /* Convert the constituents to uint16_t words.  */
    315  1.1  christos 	  uint16_t words[UNICODE_CHARNAME_MAX_WORDS];
    316  1.1  christos 	  uint16_t *wordptr = words;
    317  1.1  christos 	  {
    318  1.1  christos 	    const char *p1 = buf;
    319  1.1  christos 	    for (;;)
    320  1.1  christos 	      {
    321  1.1  christos 		{
    322  1.1  christos 		  int word;
    323  1.1  christos 		  const char *p2 = p1;
    324  1.1  christos 		  while (p2 < ptr && *p2 != ' ')
    325  1.1  christos 		    p2++;
    326  1.1  christos 		  word = unicode_name_word_lookup (p1, p2 - p1);
    327  1.1  christos 		  if (word < 0)
    328  1.1  christos 		    break;
    329  1.1  christos 		  if (wordptr == &words[UNICODE_CHARNAME_MAX_WORDS])
    330  1.1  christos 		    break;
    331  1.1  christos 		  *wordptr++ = word;
    332  1.1  christos 		  if (p2 == ptr)
    333  1.1  christos 		    goto filled_words;
    334  1.1  christos 		  p1 = p2 + 1;
    335  1.1  christos 		}
    336  1.1  christos 		/* Special case for Hangul syllables. Keeps the tables small. */
    337  1.1  christos 		if (wordptr == &words[2]
    338  1.1  christos 		    && words[0] == UNICODE_CHARNAME_WORD_HANGUL
    339  1.1  christos 		    && words[1] == UNICODE_CHARNAME_WORD_SYLLABLE)
    340  1.1  christos 		  {
    341  1.1  christos 		    /* Split the last word [p1..ptr) into three parts:
    342  1.1  christos 			 1) [BCDGHJKMNPRST]
    343  1.1  christos 			 2) [AEIOUWY]
    344  1.1  christos 			 3) [BCDGHIJKLMNPST]
    345  1.1  christos 		     */
    346  1.1  christos 		    const char *p2;
    347  1.1  christos 		    const char *p3;
    348  1.1  christos 		    const char *p4;
    349  1.1  christos 
    350  1.1  christos 		    p2 = p1;
    351  1.1  christos 		    while (p2 < ptr
    352  1.1  christos 			   && (*p2 == 'B' || *p2 == 'C' || *p2 == 'D'
    353  1.1  christos 			       || *p2 == 'G' || *p2 == 'H' || *p2 == 'J'
    354  1.1  christos 			       || *p2 == 'K' || *p2 == 'M' || *p2 == 'N'
    355  1.1  christos 			       || *p2 == 'P' || *p2 == 'R' || *p2 == 'S'
    356  1.1  christos 			       || *p2 == 'T'))
    357  1.1  christos 		      p2++;
    358  1.1  christos 		    p3 = p2;
    359  1.1  christos 		    while (p3 < ptr
    360  1.1  christos 			   && (*p3 == 'A' || *p3 == 'E' || *p3 == 'I'
    361  1.1  christos 			       || *p3 == 'O' || *p3 == 'U' || *p3 == 'W'
    362  1.1  christos 			       || *p3 == 'Y'))
    363  1.1  christos 		      p3++;
    364  1.1  christos 		    p4 = p3;
    365  1.1  christos 		    while (p4 < ptr
    366  1.1  christos 			   && (*p4 == 'B' || *p4 == 'C' || *p4 == 'D'
    367  1.1  christos 			       || *p4 == 'G' || *p4 == 'H' || *p4 == 'I'
    368  1.1  christos 			       || *p4 == 'J' || *p4 == 'K' || *p4 == 'L'
    369  1.1  christos 			       || *p4 == 'M' || *p4 == 'N' || *p4 == 'P'
    370  1.1  christos 			       || *p4 == 'S' || *p4 == 'T'))
    371  1.1  christos 		      p4++;
    372  1.1  christos 		    if (p4 == ptr)
    373  1.1  christos 		      {
    374  1.1  christos 			unsigned int n1 = p2 - p1;
    375  1.1  christos 			unsigned int n2 = p3 - p2;
    376  1.1  christos 			unsigned int n3 = p4 - p3;
    377  1.1  christos 
    378  1.1  christos 			if (n1 <= 2 && (n2 >= 1 && n2 <= 3) && n3 <= 2)
    379  1.1  christos 			  {
    380  1.1  christos 			    unsigned int index1;
    381  1.1  christos 
    382  1.1  christos 			    for (index1 = 0; index1 < 19; index1++)
    383  1.1  christos 			      if (memcmp(jamo_initial_short_name[index1], p1, n1) == 0
    384  1.1  christos 				  && jamo_initial_short_name[index1][n1] == '\0')
    385  1.1  christos 				{
    386  1.1  christos 				  unsigned int index2;
    387  1.1  christos 
    388  1.1  christos 				  for (index2 = 0; index2 < 21; index2++)
    389  1.1  christos 				    if (memcmp(jamo_medial_short_name[index2], p2, n2) == 0
    390  1.1  christos 					&& jamo_medial_short_name[index2][n2] == '\0')
    391  1.1  christos 				      {
    392  1.1  christos 					unsigned int index3;
    393  1.1  christos 
    394  1.1  christos 					for (index3 = 0; index3 < 28; index3++)
    395  1.1  christos 					  if (memcmp(jamo_final_short_name[index3], p3, n3) == 0
    396  1.1  christos 					      && jamo_final_short_name[index3][n3] == '\0')
    397  1.1  christos 					    {
    398  1.1  christos 					      return 0xAC00 + (index1 * 21 + index2) * 28 + index3;
    399  1.1  christos 					    }
    400  1.1  christos 					break;
    401  1.1  christos 				      }
    402  1.1  christos 				  break;
    403  1.1  christos 				}
    404  1.1  christos 			  }
    405  1.1  christos 		      }
    406  1.1  christos 		  }
    407  1.1  christos 		/* Special case for CJK compatibility ideographs. Keeps the
    408  1.1  christos 		   tables small.  */
    409  1.1  christos 		if (wordptr == &words[2]
    410  1.1  christos 		    && words[0] == UNICODE_CHARNAME_WORD_CJK
    411  1.1  christos 		    && words[1] == UNICODE_CHARNAME_WORD_COMPATIBILITY
    412  1.1  christos 		    && p1 + 14 <= ptr
    413  1.1  christos 		    && p1 + 15 >= ptr
    414  1.1  christos 		    && memcmp (p1, "IDEOGRAPH-", 10) == 0)
    415  1.1  christos 		  {
    416  1.1  christos 		    const char *p2 = p1 + 10;
    417  1.1  christos 
    418  1.1  christos 		    if (*p2 != '0')
    419  1.1  christos 		      {
    420  1.1  christos 			unsigned int c = 0;
    421  1.1  christos 
    422  1.1  christos 			for (;;)
    423  1.1  christos 			  {
    424  1.1  christos 			    if (*p2 >= '0' && *p2 <= '9')
    425  1.1  christos 			      c += (*p2 - '0');
    426  1.1  christos 			    else if (*p2 >= 'A' && *p2 <= 'F')
    427  1.1  christos 			      c += (*p2 - 'A' + 10);
    428  1.1  christos 			    else
    429  1.1  christos 			      break;
    430  1.1  christos 			    p2++;
    431  1.1  christos 			    if (p2 == ptr)
    432  1.1  christos 			      {
    433  1.1  christos 				if ((c >= 0xF900 && c <= 0xFA2D)
    434  1.1  christos 				    || (c >= 0xFA30 && c <= 0xFA6A)
    435  1.1  christos 				    || (c >= 0xFA70 && c <= 0xFAD9)
    436  1.1  christos 				    || (c >= 0x2F800 && c <= 0x2FA1D))
    437  1.1  christos 				  return c;
    438  1.1  christos 				else
    439  1.1  christos 				  break;
    440  1.1  christos 			      }
    441  1.1  christos 			    c = c << 4;
    442  1.1  christos 			  }
    443  1.1  christos 		      }
    444  1.1  christos 		  }
    445  1.1  christos 	      }
    446  1.1  christos 	  }
    447  1.1  christos 	  if (false)
    448  1.1  christos 	  filled_words:
    449  1.1  christos 	    {
    450  1.1  christos 	      /* Multiply by 2, to simplify later comparisons.  */
    451  1.1  christos 	      unsigned int words_length = wordptr - words;
    452  1.1  christos 	      {
    453  1.1  christos 		int i = words_length - 1;
    454  1.1  christos 		words[i] = 2 * words[i];
    455  1.1  christos 		for (; --i >= 0; )
    456  1.1  christos 		  words[i] = 2 * words[i] + 1;
    457  1.1  christos 	      }
    458  1.1  christos 	      /* Binary search in unicode_name_to_code.  */
    459  1.1  christos 	      {
    460  1.1  christos 		unsigned int i1 = 0;
    461  1.1  christos 		unsigned int i2 = SIZEOF (unicode_name_to_code);
    462  1.1  christos 		for (;;)
    463  1.1  christos 		  {
    464  1.1  christos 		    unsigned int i = (i1 + i2) >> 1;
    465  1.1  christos 		    const uint16_t *w = words;
    466  1.1  christos 		    const uint16_t *p = &unicode_names[unicode_name_to_code[i].name];
    467  1.1  christos 		    unsigned int n = words_length;
    468  1.1  christos 		    for (;;)
    469  1.1  christos 		      {
    470  1.1  christos 			if (*p < *w)
    471  1.1  christos 			  {
    472  1.1  christos 			    if (i1 == i)
    473  1.1  christos 			      goto name_not_found;
    474  1.1  christos 			    /* Note here: i1 < i < i2.  */
    475  1.1  christos 			    i1 = i;
    476  1.1  christos 			    break;
    477  1.1  christos 			  }
    478  1.1  christos 			else if (*p > *w)
    479  1.1  christos 			  {
    480  1.1  christos 			    if (i2 == i)
    481  1.1  christos 			      goto name_not_found;
    482  1.1  christos 			    /* Note here: i1 <= i < i2.  */
    483  1.1  christos 			    i2 = i;
    484  1.1  christos 			    break;
    485  1.1  christos 			  }
    486  1.1  christos 			p++; w++; n--;
    487  1.1  christos 			if (n == 0)
    488  1.1  christos 			  {
    489  1.1  christos 			    unsigned int c = unicode_name_to_code[i].code;
    490  1.1  christos 
    491  1.1  christos 			    /* Undo the transformation to 16-bit space.  */
    492  1.1  christos 			    static const unsigned int offset[11] =
    493  1.1  christos 			      {
    494  1.1  christos 				0x00000, 0x00000, 0x00000, 0x00000, 0x00000,
    495  1.1  christos 				0x05000, 0x09000, 0x09000, 0x15000, 0x26000,
    496  1.1  christos 				0xD6000
    497  1.1  christos 			      };
    498  1.1  christos 			    return c + offset[c >> 12];
    499  1.1  christos 			  }
    500  1.1  christos 		      }
    501  1.1  christos 		  }
    502  1.1  christos 	      }
    503  1.1  christos 	    name_not_found: ;
    504  1.1  christos 	    }
    505  1.1  christos 	}
    506  1.1  christos     }
    507  1.1  christos   return UNINAME_INVALID;
    508  1.1  christos }
    509