Home | History | Annotate | Line # | Download | only in locale
      1 /*	$NetBSD: rune.c,v 1.50 2025/09/15 00:11:54 riastradh Exp $	*/
      2 /*-
      3  * Copyright (c)2010 Citrus Project,
      4  * All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 #include <sys/endian.h>
     29 #include <sys/mman.h>
     30 #include <sys/stat.h>
     31 #include <assert.h>
     32 #include <errno.h>
     33 #include <fcntl.h>
     34 #define __SETLOCALE_SOURCE__
     35 #include <locale.h>
     36 #include <stddef.h>
     37 #include <stdio.h>
     38 #include <stdlib.h>
     39 #include <string.h>
     40 #include <unistd.h>
     41 #include <wchar.h>
     42 
     43 #include "ctype_guard.h"
     44 
     45 #include "setlocale_local.h"
     46 
     47 #include "citrus_module.h"
     48 #include "citrus_ctype.h"
     49 
     50 #include "runetype_local.h"
     51 
     52 #include "multibyte.h"
     53 
     54 #include "_wctype_local.h"
     55 #include "_wctrans_local.h"
     56 
     57 typedef struct {
     58 	_RuneLocale rl;
     59 #ifdef __CHAR_UNSIGNED__
     60 	unsigned short	rlp_ctype_tab  [_CTYPE_NUM_CHARS + 1];
     61 	short		rlp_tolower_tab[_CTYPE_NUM_CHARS + 1];
     62 	short		rlp_toupper_tab[_CTYPE_NUM_CHARS + 1];
     63 #else
     64 	unsigned short	*rlp_ctype_tab;
     65 	short		*rlp_tolower_tab;
     66 	short		*rlp_toupper_tab;
     67 #endif
     68 	char		rlp_codeset[33]; /* XXX */
     69 
     70 #ifdef __BUILD_LEGACY
     71 	unsigned char	rlp_compat_bsdctype[_CTYPE_NUM_CHARS + 1];
     72 #endif
     73 } _RuneLocalePriv;
     74 
     75 #ifndef __CHAR_UNSIGNED__
     76 
     77 #define	roundup(X, N)	((((X) + ((N) - 1))/(N))*(N))
     78 
     79 static void *
     80 alloc_guarded(size_t elemsize, size_t nelem)
     81 {
     82 	const unsigned long page_size = sysconf(_SC_PAGESIZE);
     83 	size_t nbytes = 0;
     84 	void *p = MAP_FAILED, *q = NULL;
     85 
     86 	_DIAGASSERT(elemsize != 0);
     87 	if (nelem > SIZE_MAX/elemsize)
     88 		goto fail;
     89 	nbytes = page_size + roundup(elemsize*nelem, page_size);
     90 	p = mmap(NULL, nbytes, PROT_READ|PROT_WRITE, MAP_ANON,
     91 	    /*fd*/-1, /*offset*/0);
     92 	if (p == MAP_FAILED)
     93 		goto fail;
     94 	if (allow_ctype_abuse())
     95 		memset(p, 0xff, page_size);
     96 	else if (mprotect(p, page_size, PROT_NONE) == -1)
     97 		goto fail;
     98 	q = (char *)p + page_size;
     99 	return q;
    100 
    101 fail:	if (p != MAP_FAILED)
    102 		(void)munmap(p, nbytes);
    103 	return NULL;
    104 }
    105 
    106 static void
    107 free_guarded(void *q, size_t elemsize, size_t nelem)
    108 {
    109 	const unsigned long page_size = sysconf(_SC_PAGESIZE);
    110 	size_t nbytes = 0;
    111 	void *p;
    112 
    113 	if (q == NULL)
    114 		return;
    115 	_DIAGASSERT(elemsize <= SIZE_MAX/nelem);
    116 	nbytes = page_size + roundup(elemsize*nelem, page_size);
    117 	p = (char *)q - page_size;
    118 	(void)munmap(p, nbytes);
    119 }
    120 
    121 #endif	/* !__CHAR_UNSIGNED__ */
    122 
    123 static __inline void
    124 _rune_wctype_init(_RuneLocale *rl)
    125 {
    126 	memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype,
    127 	    sizeof(rl->rl_wctype));
    128 }
    129 
    130 static __inline void
    131 _rune_wctrans_init(_RuneLocale *rl)
    132 {
    133 	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_name   = "tolower";
    134 	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_cached = &rl->rl_maplower[0];
    135 	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_extmap = &rl->rl_maplower_ext;
    136 	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_name   = "toupper";
    137 	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_cached = &rl->rl_mapupper[0];
    138 	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_extmap = &rl->rl_mapupper_ext;
    139 }
    140 
    141 static __inline void
    142 _rune_init_priv(_RuneLocalePriv *rlp)
    143 {
    144 #if _CTYPE_CACHE_SIZE != _CTYPE_NUM_CHARS
    145 	int i;
    146 
    147 	for (i = _CTYPE_CACHE_SIZE; i < _CTYPE_NUM_CHARS; ++i) {
    148 		rlp->rlp_ctype_tab  [i + 1] = 0;
    149 		rlp->rlp_tolower_tab[i + 1] = i;
    150 		rlp->rlp_toupper_tab[i + 1] = i;
    151 
    152 #ifdef __BUILD_LEGACY
    153 		rlp->rlp_compat_bsdctype[i + 1] = 0;
    154 #endif
    155 	}
    156 #endif
    157 	rlp->rlp_ctype_tab  [0] = 0;
    158 	rlp->rlp_tolower_tab[0] = EOF;
    159 	rlp->rlp_toupper_tab[0] = EOF;
    160 
    161 	rlp->rl.rl_ctype_tab   = (const unsigned short *)&rlp->rlp_ctype_tab[0];
    162 	rlp->rl.rl_tolower_tab = (const short *)&rlp->rlp_tolower_tab[0];
    163 	rlp->rl.rl_toupper_tab = (const short *)&rlp->rlp_toupper_tab[0];
    164 	rlp->rl.rl_codeset     = (const char *)&rlp->rlp_codeset[0];
    165 
    166 	_rune_wctype_init(&rlp->rl);
    167 	_rune_wctrans_init(&rlp->rl);
    168 
    169 #ifdef __BUILD_LEGACY
    170 	rlp->rlp_compat_bsdctype[0] = 0;
    171 	rlp->rl.rl_compat_bsdctype = (const unsigned char *)
    172 	    &rlp->rlp_compat_bsdctype[0];
    173 #endif
    174 }
    175 
    176 static __inline void
    177 _rune_find_codeset(char *s, size_t n,
    178     char *var, size_t *plenvar)
    179 {
    180 	size_t lenvar;
    181 	const char *endvar;
    182 
    183 #define _RUNE_CODESET_LEN (sizeof(_RUNE_CODESET)-1)
    184 
    185 	lenvar = *plenvar;
    186 	for (/**/; lenvar > _RUNE_CODESET_LEN; ++var, --lenvar) {
    187 		if (!memcmp(var, _RUNE_CODESET, _RUNE_CODESET_LEN)) {
    188 			*var = '\0';
    189 			*plenvar -= lenvar;
    190 			endvar = &var[_RUNE_CODESET_LEN];
    191 			while (n-- > 1 && lenvar-- > _RUNE_CODESET_LEN) {
    192 				if (*endvar == ' ' || *endvar == '\t')
    193 					break;
    194 				*s++ = *endvar++;
    195 			}
    196 			break;
    197 		}
    198 	}
    199 	*s = '\0';
    200 }
    201 
    202 #ifdef __BUILD_LEGACY
    203 static __inline int
    204 _runetype_to_bsdctype(_RuneType bits)
    205 {
    206 	int ret;
    207 
    208 	if (bits == (_RuneType)0)
    209 		return 0;
    210 	ret = 0;
    211 	if (bits & _RUNETYPE_U)
    212 		ret |= _COMPAT_U;
    213 	if (bits & _RUNETYPE_L)
    214 		ret |= _COMPAT_L;
    215 	if (bits & _RUNETYPE_D)
    216 		ret |= _COMPAT_N;
    217 	if (bits & _RUNETYPE_S)
    218 		ret |= _COMPAT_S;
    219 	if (bits & _RUNETYPE_P)
    220 		ret |= _COMPAT_P;
    221 	if (bits & _RUNETYPE_C)
    222 		ret |= _COMPAT_C;
    223 	if ((bits & (_RUNETYPE_X | _RUNETYPE_D)) == _RUNETYPE_X)
    224 		ret |= _COMPAT_X;
    225 	if ((bits & (_RUNETYPE_R | _RUNETYPE_G)) == _RUNETYPE_R)
    226 		ret |= _COMPAT_B;
    227 	return ret;
    228 }
    229 #endif /* __BUILD_LEGACY */
    230 
    231 static __inline int
    232 _rune_read_file(const char * __restrict var, size_t lenvar,
    233     _RuneLocale ** __restrict prl)
    234 {
    235 	int ret, i;
    236 	const _FileRuneLocale *frl;
    237 	const _FileRuneEntry *fre;
    238 	const uint32_t *frune;
    239 	_RuneLocalePriv *rlp;
    240 	_RuneLocale *rl;
    241 	_RuneEntry *re;
    242 	uint32_t *rune;
    243 	uint32_t runetype_len, maplower_len, mapupper_len, variable_len;
    244 	size_t len, n;
    245 
    246 	if (lenvar < sizeof(*frl))
    247 		return EFTYPE;
    248 	lenvar -= sizeof(*frl);
    249 	frl = (const _FileRuneLocale *)(const void *)var;
    250 	if (memcmp(_RUNECT10_MAGIC, &frl->frl_magic[0], sizeof(frl->frl_magic)))
    251 		return EFTYPE;
    252 
    253 	runetype_len = be32toh(frl->frl_runetype_ext.frr_nranges);
    254 	maplower_len = be32toh(frl->frl_maplower_ext.frr_nranges);
    255 	mapupper_len = be32toh(frl->frl_mapupper_ext.frr_nranges);
    256 	len = runetype_len + maplower_len + mapupper_len;
    257 
    258 	fre = (const _FileRuneEntry *)(const void *)(frl + 1);
    259 	frune = (const uint32_t *)(const void *)(fre + len);
    260 
    261 	variable_len = be32toh((uint32_t)frl->frl_variable_len);
    262 
    263 	n = len * sizeof(*fre);
    264 	if (lenvar < n)
    265 		return EFTYPE;
    266 	lenvar -= n;
    267 
    268 	n = sizeof(*rlp) + (len * sizeof(*re)) + lenvar;
    269 	rlp = (_RuneLocalePriv *)malloc(n);
    270 	if (rlp == NULL)
    271 		return ENOMEM;
    272 #ifndef __CHAR_UNSIGNED__
    273 	rlp->rlp_ctype_tab = NULL;
    274 	rlp->rlp_tolower_tab = NULL;
    275 	rlp->rlp_toupper_tab = NULL;
    276 	if ((rlp->rlp_ctype_tab = alloc_guarded(sizeof(rlp->rlp_ctype_tab[0]),
    277 		    _CTYPE_NUM_CHARS + 1)) == NULL ||
    278 	    (rlp->rlp_tolower_tab =
    279 		alloc_guarded(sizeof(rlp->rlp_tolower_tab[0]),
    280 		    _CTYPE_NUM_CHARS + 1)) == NULL ||
    281 	    (rlp->rlp_toupper_tab =
    282 		alloc_guarded(sizeof(rlp->rlp_toupper_tab[0]),
    283 		    _CTYPE_NUM_CHARS + 1)) == NULL) {
    284 		ret = ENOMEM;
    285 		goto err;
    286 	}
    287 #endif	/* !__CHAR_UNSIGNED__ */
    288 	_rune_init_priv(rlp);
    289 
    290 	rl = &rlp->rl;
    291 	re = (_RuneEntry *)(void *)(rlp + 1);
    292 	rune = (uint32_t *)(void *)(re + len);
    293 
    294 	for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
    295 		rl->rl_runetype[i] = be32toh(frl->frl_runetype[i]);
    296 		rl->rl_maplower[i] = be32toh((uint32_t)frl->frl_maplower[i]);
    297 		rl->rl_mapupper[i] = be32toh((uint32_t)frl->frl_mapupper[i]);
    298 	}
    299 
    300 #define READ_RANGE(name)						\
    301 do {									\
    302 	const _FileRuneEntry *end_fre;					\
    303 	const uint32_t *end_frune;					\
    304 									\
    305 	rl->rl_##name##_ext.rr_nranges = name##_len;			\
    306 	rl->rl_##name##_ext.rr_rune_ranges = re;			\
    307 									\
    308 	end_fre = fre + name##_len;					\
    309 	while (fre < end_fre) {						\
    310 		re->re_min = be32toh((uint32_t)fre->fre_min);		\
    311 		re->re_max = be32toh((uint32_t)fre->fre_max);		\
    312 		re->re_map = be32toh((uint32_t)fre->fre_map);		\
    313 		if (re->re_map != 0) {					\
    314 			re->re_rune_types = NULL;			\
    315 		} else {						\
    316 			re->re_rune_types = rune;			\
    317 			len = re->re_max - re->re_min + 1;		\
    318 			n = len * sizeof(*frune);			\
    319 			if (lenvar < n) {				\
    320 				ret = EFTYPE;				\
    321 				goto err;				\
    322 			}						\
    323 			lenvar -= n;					\
    324 			end_frune = frune + len;			\
    325 			while (frune < end_frune)			\
    326 				*rune++ = be32toh(*frune++);		\
    327 		}							\
    328 		++fre, ++re;						\
    329 	}								\
    330 } while (0)
    331 
    332 	READ_RANGE(runetype);
    333 	READ_RANGE(maplower);
    334 	READ_RANGE(mapupper);
    335 
    336 	if (lenvar < variable_len) {
    337 		ret = EFTYPE;
    338 		goto err;
    339 	}
    340 
    341 	memcpy((void *)rune, (void const *)frune, variable_len);
    342 	rl->rl_variable_len = variable_len;
    343 	rl->rl_variable = (void *)rune;
    344 
    345 	_rune_find_codeset(rlp->rlp_codeset, sizeof(rlp->rlp_codeset),
    346 	    (char *)rl->rl_variable, &rl->rl_variable_len);
    347 
    348 	ret = _citrus_ctype_open(&rl->rl_citrus_ctype, frl->frl_encoding,
    349 	    rl->rl_variable, rl->rl_variable_len, _PRIVSIZE);
    350 	if (ret)
    351 		goto err;
    352 	if (__mb_len_max_runtime <
    353 	    _citrus_ctype_get_mb_cur_max(rl->rl_citrus_ctype)) {
    354 		ret = EINVAL;
    355 		goto err;
    356 	}
    357 
    358 	for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
    359 		wint_t wc;
    360 		_RuneType rc;
    361 
    362 		ret = _citrus_ctype_btowc(rl->rl_citrus_ctype, i, &wc);
    363 		if (ret)
    364 			goto err;
    365 		if (wc == WEOF) {
    366 			rlp->rlp_ctype_tab[i + 1] = 0;
    367 			rlp->rlp_tolower_tab[i + 1] = i;
    368 			rlp->rlp_toupper_tab[i + 1] = i;
    369 		} else {
    370 			rc = _runetype_priv(rl, wc);
    371 			rlp->rlp_ctype_tab[i + 1] = (unsigned short)
    372 			    ((rc & ~_RUNETYPE_SWM) >> 8);
    373 
    374 #ifdef __BUILD_LEGACY
    375 			rlp->rlp_compat_bsdctype[i + 1]
    376 			  = _runetype_to_bsdctype(rc);
    377 #endif
    378 
    379 #define CONVERT_MAP(name)						\
    380 do {									\
    381 	wint_t map;							\
    382 	int c;								\
    383 									\
    384 	map = _towctrans_priv(wc, _wctrans_##name(rl));			\
    385 	if (map == wc || (_citrus_ctype_wctob(rl->rl_citrus_ctype,	\
    386 	    map, &c)  || c == EOF))					\
    387 		c = i;							\
    388 	rlp->rlp_to##name##_tab[i + 1] = (short)c;			\
    389 } while (0)
    390 
    391 			CONVERT_MAP(lower);
    392 			CONVERT_MAP(upper);
    393 		}
    394 	}
    395 	*prl = rl;
    396 	return 0;
    397 
    398 err:
    399 #ifndef __CHAR_UNSIGNED__
    400 	free_guarded(rlp->rlp_ctype_tab, sizeof(rlp->rlp_ctype_tab[0]),
    401 	    _CTYPE_NUM_CHARS + 1);
    402 	free_guarded(rlp->rlp_tolower_tab, sizeof(rlp->rlp_tolower_tab[0]),
    403 	    _CTYPE_NUM_CHARS + 1);
    404 	free_guarded(rlp->rlp_toupper_tab, sizeof(rlp->rlp_toupper_tab[0]),
    405 	    _CTYPE_NUM_CHARS + 1);
    406 #endif
    407 	free(rlp);
    408 	return ret;
    409 }
    410 
    411 int
    412 _rune_load(const char * __restrict var, size_t lenvar,
    413     _RuneLocale ** __restrict prl)
    414 {
    415 	int ret;
    416 
    417 	_DIAGASSERT(var != NULL || lenvar < 1);
    418 	_DIAGASSERT(prl != NULL);
    419 
    420 	if (lenvar < 1)
    421 		return EFTYPE;
    422 	switch (*var) {
    423 	case 'R':
    424 		ret = _rune_read_file(var, lenvar, prl);
    425 		break;
    426 	default:
    427 		ret = EFTYPE;
    428 	}
    429 	return ret;
    430 }
    431