1 /* $NetBSD: rune.c,v 1.50 2025/09/15 00:11:54 riastradh Exp $ */ 2 /*- 3 * Copyright (c)2010 Citrus Project, 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/endian.h> 29 #include <sys/mman.h> 30 #include <sys/stat.h> 31 #include <assert.h> 32 #include <errno.h> 33 #include <fcntl.h> 34 #define __SETLOCALE_SOURCE__ 35 #include <locale.h> 36 #include <stddef.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <unistd.h> 41 #include <wchar.h> 42 43 #include "ctype_guard.h" 44 45 #include "setlocale_local.h" 46 47 #include "citrus_module.h" 48 #include "citrus_ctype.h" 49 50 #include "runetype_local.h" 51 52 #include "multibyte.h" 53 54 #include "_wctype_local.h" 55 #include "_wctrans_local.h" 56 57 typedef struct { 58 _RuneLocale rl; 59 #ifdef __CHAR_UNSIGNED__ 60 unsigned short rlp_ctype_tab [_CTYPE_NUM_CHARS + 1]; 61 short rlp_tolower_tab[_CTYPE_NUM_CHARS + 1]; 62 short rlp_toupper_tab[_CTYPE_NUM_CHARS + 1]; 63 #else 64 unsigned short *rlp_ctype_tab; 65 short *rlp_tolower_tab; 66 short *rlp_toupper_tab; 67 #endif 68 char rlp_codeset[33]; /* XXX */ 69 70 #ifdef __BUILD_LEGACY 71 unsigned char rlp_compat_bsdctype[_CTYPE_NUM_CHARS + 1]; 72 #endif 73 } _RuneLocalePriv; 74 75 #ifndef __CHAR_UNSIGNED__ 76 77 #define roundup(X, N) ((((X) + ((N) - 1))/(N))*(N)) 78 79 static void * 80 alloc_guarded(size_t elemsize, size_t nelem) 81 { 82 const unsigned long page_size = sysconf(_SC_PAGESIZE); 83 size_t nbytes = 0; 84 void *p = MAP_FAILED, *q = NULL; 85 86 _DIAGASSERT(elemsize != 0); 87 if (nelem > SIZE_MAX/elemsize) 88 goto fail; 89 nbytes = page_size + roundup(elemsize*nelem, page_size); 90 p = mmap(NULL, nbytes, PROT_READ|PROT_WRITE, MAP_ANON, 91 /*fd*/-1, /*offset*/0); 92 if (p == MAP_FAILED) 93 goto fail; 94 if (allow_ctype_abuse()) 95 memset(p, 0xff, page_size); 96 else if (mprotect(p, page_size, PROT_NONE) == -1) 97 goto fail; 98 q = (char *)p + page_size; 99 return q; 100 101 fail: if (p != MAP_FAILED) 102 (void)munmap(p, nbytes); 103 return NULL; 104 } 105 106 static void 107 free_guarded(void *q, size_t elemsize, size_t nelem) 108 { 109 const unsigned long page_size = sysconf(_SC_PAGESIZE); 110 size_t nbytes = 0; 111 void *p; 112 113 if (q == NULL) 114 return; 115 _DIAGASSERT(elemsize <= SIZE_MAX/nelem); 116 nbytes = page_size + roundup(elemsize*nelem, page_size); 117 p = (char *)q - page_size; 118 (void)munmap(p, nbytes); 119 } 120 121 #endif /* !__CHAR_UNSIGNED__ */ 122 123 static __inline void 124 _rune_wctype_init(_RuneLocale *rl) 125 { 126 memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype, 127 sizeof(rl->rl_wctype)); 128 } 129 130 static __inline void 131 _rune_wctrans_init(_RuneLocale *rl) 132 { 133 rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_name = "tolower"; 134 rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_cached = &rl->rl_maplower[0]; 135 rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_extmap = &rl->rl_maplower_ext; 136 rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_name = "toupper"; 137 rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_cached = &rl->rl_mapupper[0]; 138 rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_extmap = &rl->rl_mapupper_ext; 139 } 140 141 static __inline void 142 _rune_init_priv(_RuneLocalePriv *rlp) 143 { 144 #if _CTYPE_CACHE_SIZE != _CTYPE_NUM_CHARS 145 int i; 146 147 for (i = _CTYPE_CACHE_SIZE; i < _CTYPE_NUM_CHARS; ++i) { 148 rlp->rlp_ctype_tab [i + 1] = 0; 149 rlp->rlp_tolower_tab[i + 1] = i; 150 rlp->rlp_toupper_tab[i + 1] = i; 151 152 #ifdef __BUILD_LEGACY 153 rlp->rlp_compat_bsdctype[i + 1] = 0; 154 #endif 155 } 156 #endif 157 rlp->rlp_ctype_tab [0] = 0; 158 rlp->rlp_tolower_tab[0] = EOF; 159 rlp->rlp_toupper_tab[0] = EOF; 160 161 rlp->rl.rl_ctype_tab = (const unsigned short *)&rlp->rlp_ctype_tab[0]; 162 rlp->rl.rl_tolower_tab = (const short *)&rlp->rlp_tolower_tab[0]; 163 rlp->rl.rl_toupper_tab = (const short *)&rlp->rlp_toupper_tab[0]; 164 rlp->rl.rl_codeset = (const char *)&rlp->rlp_codeset[0]; 165 166 _rune_wctype_init(&rlp->rl); 167 _rune_wctrans_init(&rlp->rl); 168 169 #ifdef __BUILD_LEGACY 170 rlp->rlp_compat_bsdctype[0] = 0; 171 rlp->rl.rl_compat_bsdctype = (const unsigned char *) 172 &rlp->rlp_compat_bsdctype[0]; 173 #endif 174 } 175 176 static __inline void 177 _rune_find_codeset(char *s, size_t n, 178 char *var, size_t *plenvar) 179 { 180 size_t lenvar; 181 const char *endvar; 182 183 #define _RUNE_CODESET_LEN (sizeof(_RUNE_CODESET)-1) 184 185 lenvar = *plenvar; 186 for (/**/; lenvar > _RUNE_CODESET_LEN; ++var, --lenvar) { 187 if (!memcmp(var, _RUNE_CODESET, _RUNE_CODESET_LEN)) { 188 *var = '\0'; 189 *plenvar -= lenvar; 190 endvar = &var[_RUNE_CODESET_LEN]; 191 while (n-- > 1 && lenvar-- > _RUNE_CODESET_LEN) { 192 if (*endvar == ' ' || *endvar == '\t') 193 break; 194 *s++ = *endvar++; 195 } 196 break; 197 } 198 } 199 *s = '\0'; 200 } 201 202 #ifdef __BUILD_LEGACY 203 static __inline int 204 _runetype_to_bsdctype(_RuneType bits) 205 { 206 int ret; 207 208 if (bits == (_RuneType)0) 209 return 0; 210 ret = 0; 211 if (bits & _RUNETYPE_U) 212 ret |= _COMPAT_U; 213 if (bits & _RUNETYPE_L) 214 ret |= _COMPAT_L; 215 if (bits & _RUNETYPE_D) 216 ret |= _COMPAT_N; 217 if (bits & _RUNETYPE_S) 218 ret |= _COMPAT_S; 219 if (bits & _RUNETYPE_P) 220 ret |= _COMPAT_P; 221 if (bits & _RUNETYPE_C) 222 ret |= _COMPAT_C; 223 if ((bits & (_RUNETYPE_X | _RUNETYPE_D)) == _RUNETYPE_X) 224 ret |= _COMPAT_X; 225 if ((bits & (_RUNETYPE_R | _RUNETYPE_G)) == _RUNETYPE_R) 226 ret |= _COMPAT_B; 227 return ret; 228 } 229 #endif /* __BUILD_LEGACY */ 230 231 static __inline int 232 _rune_read_file(const char * __restrict var, size_t lenvar, 233 _RuneLocale ** __restrict prl) 234 { 235 int ret, i; 236 const _FileRuneLocale *frl; 237 const _FileRuneEntry *fre; 238 const uint32_t *frune; 239 _RuneLocalePriv *rlp; 240 _RuneLocale *rl; 241 _RuneEntry *re; 242 uint32_t *rune; 243 uint32_t runetype_len, maplower_len, mapupper_len, variable_len; 244 size_t len, n; 245 246 if (lenvar < sizeof(*frl)) 247 return EFTYPE; 248 lenvar -= sizeof(*frl); 249 frl = (const _FileRuneLocale *)(const void *)var; 250 if (memcmp(_RUNECT10_MAGIC, &frl->frl_magic[0], sizeof(frl->frl_magic))) 251 return EFTYPE; 252 253 runetype_len = be32toh(frl->frl_runetype_ext.frr_nranges); 254 maplower_len = be32toh(frl->frl_maplower_ext.frr_nranges); 255 mapupper_len = be32toh(frl->frl_mapupper_ext.frr_nranges); 256 len = runetype_len + maplower_len + mapupper_len; 257 258 fre = (const _FileRuneEntry *)(const void *)(frl + 1); 259 frune = (const uint32_t *)(const void *)(fre + len); 260 261 variable_len = be32toh((uint32_t)frl->frl_variable_len); 262 263 n = len * sizeof(*fre); 264 if (lenvar < n) 265 return EFTYPE; 266 lenvar -= n; 267 268 n = sizeof(*rlp) + (len * sizeof(*re)) + lenvar; 269 rlp = (_RuneLocalePriv *)malloc(n); 270 if (rlp == NULL) 271 return ENOMEM; 272 #ifndef __CHAR_UNSIGNED__ 273 rlp->rlp_ctype_tab = NULL; 274 rlp->rlp_tolower_tab = NULL; 275 rlp->rlp_toupper_tab = NULL; 276 if ((rlp->rlp_ctype_tab = alloc_guarded(sizeof(rlp->rlp_ctype_tab[0]), 277 _CTYPE_NUM_CHARS + 1)) == NULL || 278 (rlp->rlp_tolower_tab = 279 alloc_guarded(sizeof(rlp->rlp_tolower_tab[0]), 280 _CTYPE_NUM_CHARS + 1)) == NULL || 281 (rlp->rlp_toupper_tab = 282 alloc_guarded(sizeof(rlp->rlp_toupper_tab[0]), 283 _CTYPE_NUM_CHARS + 1)) == NULL) { 284 ret = ENOMEM; 285 goto err; 286 } 287 #endif /* !__CHAR_UNSIGNED__ */ 288 _rune_init_priv(rlp); 289 290 rl = &rlp->rl; 291 re = (_RuneEntry *)(void *)(rlp + 1); 292 rune = (uint32_t *)(void *)(re + len); 293 294 for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) { 295 rl->rl_runetype[i] = be32toh(frl->frl_runetype[i]); 296 rl->rl_maplower[i] = be32toh((uint32_t)frl->frl_maplower[i]); 297 rl->rl_mapupper[i] = be32toh((uint32_t)frl->frl_mapupper[i]); 298 } 299 300 #define READ_RANGE(name) \ 301 do { \ 302 const _FileRuneEntry *end_fre; \ 303 const uint32_t *end_frune; \ 304 \ 305 rl->rl_##name##_ext.rr_nranges = name##_len; \ 306 rl->rl_##name##_ext.rr_rune_ranges = re; \ 307 \ 308 end_fre = fre + name##_len; \ 309 while (fre < end_fre) { \ 310 re->re_min = be32toh((uint32_t)fre->fre_min); \ 311 re->re_max = be32toh((uint32_t)fre->fre_max); \ 312 re->re_map = be32toh((uint32_t)fre->fre_map); \ 313 if (re->re_map != 0) { \ 314 re->re_rune_types = NULL; \ 315 } else { \ 316 re->re_rune_types = rune; \ 317 len = re->re_max - re->re_min + 1; \ 318 n = len * sizeof(*frune); \ 319 if (lenvar < n) { \ 320 ret = EFTYPE; \ 321 goto err; \ 322 } \ 323 lenvar -= n; \ 324 end_frune = frune + len; \ 325 while (frune < end_frune) \ 326 *rune++ = be32toh(*frune++); \ 327 } \ 328 ++fre, ++re; \ 329 } \ 330 } while (0) 331 332 READ_RANGE(runetype); 333 READ_RANGE(maplower); 334 READ_RANGE(mapupper); 335 336 if (lenvar < variable_len) { 337 ret = EFTYPE; 338 goto err; 339 } 340 341 memcpy((void *)rune, (void const *)frune, variable_len); 342 rl->rl_variable_len = variable_len; 343 rl->rl_variable = (void *)rune; 344 345 _rune_find_codeset(rlp->rlp_codeset, sizeof(rlp->rlp_codeset), 346 (char *)rl->rl_variable, &rl->rl_variable_len); 347 348 ret = _citrus_ctype_open(&rl->rl_citrus_ctype, frl->frl_encoding, 349 rl->rl_variable, rl->rl_variable_len, _PRIVSIZE); 350 if (ret) 351 goto err; 352 if (__mb_len_max_runtime < 353 _citrus_ctype_get_mb_cur_max(rl->rl_citrus_ctype)) { 354 ret = EINVAL; 355 goto err; 356 } 357 358 for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) { 359 wint_t wc; 360 _RuneType rc; 361 362 ret = _citrus_ctype_btowc(rl->rl_citrus_ctype, i, &wc); 363 if (ret) 364 goto err; 365 if (wc == WEOF) { 366 rlp->rlp_ctype_tab[i + 1] = 0; 367 rlp->rlp_tolower_tab[i + 1] = i; 368 rlp->rlp_toupper_tab[i + 1] = i; 369 } else { 370 rc = _runetype_priv(rl, wc); 371 rlp->rlp_ctype_tab[i + 1] = (unsigned short) 372 ((rc & ~_RUNETYPE_SWM) >> 8); 373 374 #ifdef __BUILD_LEGACY 375 rlp->rlp_compat_bsdctype[i + 1] 376 = _runetype_to_bsdctype(rc); 377 #endif 378 379 #define CONVERT_MAP(name) \ 380 do { \ 381 wint_t map; \ 382 int c; \ 383 \ 384 map = _towctrans_priv(wc, _wctrans_##name(rl)); \ 385 if (map == wc || (_citrus_ctype_wctob(rl->rl_citrus_ctype, \ 386 map, &c) || c == EOF)) \ 387 c = i; \ 388 rlp->rlp_to##name##_tab[i + 1] = (short)c; \ 389 } while (0) 390 391 CONVERT_MAP(lower); 392 CONVERT_MAP(upper); 393 } 394 } 395 *prl = rl; 396 return 0; 397 398 err: 399 #ifndef __CHAR_UNSIGNED__ 400 free_guarded(rlp->rlp_ctype_tab, sizeof(rlp->rlp_ctype_tab[0]), 401 _CTYPE_NUM_CHARS + 1); 402 free_guarded(rlp->rlp_tolower_tab, sizeof(rlp->rlp_tolower_tab[0]), 403 _CTYPE_NUM_CHARS + 1); 404 free_guarded(rlp->rlp_toupper_tab, sizeof(rlp->rlp_toupper_tab[0]), 405 _CTYPE_NUM_CHARS + 1); 406 #endif 407 free(rlp); 408 return ret; 409 } 410 411 int 412 _rune_load(const char * __restrict var, size_t lenvar, 413 _RuneLocale ** __restrict prl) 414 { 415 int ret; 416 417 _DIAGASSERT(var != NULL || lenvar < 1); 418 _DIAGASSERT(prl != NULL); 419 420 if (lenvar < 1) 421 return EFTYPE; 422 switch (*var) { 423 case 'R': 424 ret = _rune_read_file(var, lenvar, prl); 425 break; 426 default: 427 ret = EFTYPE; 428 } 429 return ret; 430 } 431