Home | History | Annotate | Line # | Download | only in gen
      1 /*	$NetBSD: ctype_guard.h,v 1.8 2025/09/15 00:11:54 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2025 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #ifndef	_LIBC_CTYPE_GUARD_H_
     30 #define	_LIBC_CTYPE_GUARD_H_
     31 
     32 #include <sys/cdefs.h>
     33 
     34 #include <stdbool.h>
     35 
     36 #include "ctype_local.h"
     37 
     38 /*
     39  * On platforms where char is signed, the ctype(3) functions have an
     40  * insidious trap where logic like
     41  *
     42  *	char *s = ...;
     43  *
     44  *	if (isspace(*s)) ...
     45  *
     46  * is undefined behaviour whenever the string s has any bytes outside
     47  * the 7-bit US-ASCII range.  The correct way to do this is to cast any
     48  * char inputs to unsigned char first:
     49  *
     50  *	if (isspace((unsigned char)*s)) ...
     51  *
     52  * Unfortunately, the buggy idiom without an unsigned char cast is
     53  * extremely prevalent in C code in the wild.  (See the ctype(3) man
     54  * page for details on why this idiom is required and why isspace
     55  * itself can't just be `fixed'.)
     56  *
     57  * The ctype(3) functions are implemented as macros that expand to
     58  * simple table lookups -- partly for performance, and partly to
     59  * deliberately trigger -Wchar-subscript compiler warnings for
     60  * suspicious code like the above.
     61  *
     62  * To noisily detect the undefined behaviour without incurring overhead
     63  * for the defined cases of the ctype(3) functions, we put a guard page
     64  * before the tables in memory so that (at least for inputs other than
     65  * 0xff, which coincides as signed char with EOF = -1) attempts to use
     66  * the ctype(3) functions on invalid inputs will reliably -- and
     67  * safely! -- crash with SIGSEGV instead of sometimes returning
     68  * nondeterministic nonsense results.
     69  *
     70  * We do this by defining two symbols, one local and one global:
     71  *
     72  *	- The local (C `static') symbol _C_ctype_tab_guarded_ is
     73  *	  page-aligned and has an extra page-sized buffer at the
     74  *	  beginning for a guard page.  It is defined as an ordinary C
     75  *	  array to keep the source code legible.
     76  *
     77  *	- The global symbol _C_ctype_tab_ starts one page past the
     78  *	  start of _C_ctype_tab_guarded_.  It is defined, via some
     79  *	  macros for convenience, by the assembler directives:
     80  *
     81  *		.type _C_ctype_tab_,@object
     82  *		.global _C_ctype_tab_
     83  *		_C_ctype_tab_ = _C_ctype_tab_guarded_ + PAGE_SIZE
     84  *
     85  *	  (with PAGE_SIZE being replaced by the numeric value from
     86  *	  vmparam.h -- actually, we use MAX_PAGE_SIZE for the handful
     87  *	  of architectures that support different page sizes on
     88  *	  different machines).
     89  *
     90  * Then, at startup, we mprotect the guard page PROT_NONE.
     91  *
     92  * The global symbol _C_ctype_tab_ is an immutable part of the libc
     93  * ABI, and is used by, e.g., libstdc++.so, so we have to define it
     94  * compatibly -- this is why it must be defined as an ELF global symbol
     95  * in its own right, and not simply handled inside libc.so by
     96  * additional arithmetic relative to _C_ctype_tab_guarded_.
     97  */
     98 
     99 #if defined(__CHAR_UNSIGNED__)	/* disable if char is unsigned */
    100 #  define	_CTYPE_GUARD_PAGE	0
    101 #elif defined(_CTYPE_DYNAMIC)	/* enable in shared libc */
    102 #  define	_CTYPE_GUARD_PAGE	1
    103 #else	/* static libc -- let's aim for space-efficiency for now */
    104 #  define	_CTYPE_GUARD_PAGE	0
    105 #endif
    106 
    107 #ifdef __arm__
    108 #  define	__ctype_table_object(name)				      \
    109 	__asm(".type " _C_LABEL_STRING(#name) ",%object")
    110 #else
    111 #  define	__ctype_table_object(name)				      \
    112 	__asm(".type " _C_LABEL_STRING(#name) ",@object")
    113 #endif
    114 
    115 #define	__ctype_table_size(name, guard, nelem, elemsize)		      \
    116 	__CTASSERT(sizeof((guard)[0]) == (elemsize));			      \
    117 	__CTASSERT(sizeof(guard) == _CTYPE_GUARD_SIZE + (nelem)*(elemsize));  \
    118 	__asm(".size " _C_LABEL_STRING(#name) ","			      \
    119 	    ___STRING((nelem) * (elemsize)))
    120 
    121 __dso_hidden bool allow_ctype_abuse(void);
    122 
    123 #if _CTYPE_GUARD_PAGE
    124 
    125 #  include <machine/vmparam.h>
    126 
    127 __dso_hidden bool constructor_allow_ctype_abuse(void);
    128 
    129 /*
    130  * _CTYPE_GUARD_SIZE must be a macro so it will work through ___STRING
    131  * to produce a string for symbol arithmetic in __asm.
    132  */
    133 #  ifdef MAX_PAGE_SIZE
    134 #    define	_CTYPE_GUARD_SIZE	MAX_PAGE_SIZE
    135 #  else
    136 #    define	_CTYPE_GUARD_SIZE	PAGE_SIZE
    137 #  endif
    138 
    139 #  define	_CTYPE_GUARD_INIT(n, x)	[0 ... (n) - 1] = (x),
    140 
    141 enum {
    142 	_C_CTYPE_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_ctype_tab_[0]),
    143 #  ifdef __BUILD_LEGACY
    144 	_C_COMPAT_BSDCTYPE_GUARD =
    145 	    _CTYPE_GUARD_SIZE/sizeof(_C_compat_bsdctype[0]),
    146 #  endif
    147 	_C_TOLOWER_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_tolower_tab_[0]),
    148 	_C_TOUPPER_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_toupper_tab_[0]),
    149 };
    150 
    151 #  define	__ctype_table	__aligned(_CTYPE_GUARD_SIZE)
    152 #  define	__ctype_table_guarded(name, guard, nelem, elemsize)	      \
    153 	__ctype_table_object(name);					      \
    154 	__asm(".global " _C_LABEL_STRING(#name));			      \
    155 	__asm(_C_LABEL_STRING(#name) " = " _C_LABEL_STRING(#guard) " + "      \
    156 	    ___STRING(_CTYPE_GUARD_SIZE));				      \
    157 	__ctype_table_size(name, guard, nelem, elemsize)
    158 
    159 #else  /* !_CTYPE_GUARD_PAGE */
    160 
    161 #  define	_CTYPE_GUARD_SIZE	0
    162 
    163 #  define	_CTYPE_GUARD_INIT(n, x)	/* empty */
    164 
    165 enum {
    166 	_C_CTYPE_TAB_GUARD = 0,
    167 #  ifdef __BUILD_LEGACY
    168 	_C_COMPAT_BSDCTYPE_GUARD = 0,
    169 #  endif
    170 	_C_TOLOWER_TAB_GUARD = 0,
    171 	_C_TOUPPER_TAB_GUARD = 0,
    172 };
    173 
    174 /* Compiler can't see into __strong_alias, so mark it __used. */
    175 #  define	__ctype_table	__used
    176 #  define	__ctype_table_guarded(name, guard, nelem, elemsize)	      \
    177 	__ctype_table_object(name);					      \
    178 	__strong_alias(name, guard)					      \
    179 	__ctype_table_size(name, guard, nelem, elemsize)
    180 
    181 #endif	/* _CTYPE_GUARD_PAGE */
    182 
    183 #endif	/* _LIBC_CTYPE_GUARD_H_ */
    184