1 /* $NetBSD: ctype_guard.h,v 1.8 2025/09/15 00:11:54 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2025 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #ifndef _LIBC_CTYPE_GUARD_H_ 30 #define _LIBC_CTYPE_GUARD_H_ 31 32 #include <sys/cdefs.h> 33 34 #include <stdbool.h> 35 36 #include "ctype_local.h" 37 38 /* 39 * On platforms where char is signed, the ctype(3) functions have an 40 * insidious trap where logic like 41 * 42 * char *s = ...; 43 * 44 * if (isspace(*s)) ... 45 * 46 * is undefined behaviour whenever the string s has any bytes outside 47 * the 7-bit US-ASCII range. The correct way to do this is to cast any 48 * char inputs to unsigned char first: 49 * 50 * if (isspace((unsigned char)*s)) ... 51 * 52 * Unfortunately, the buggy idiom without an unsigned char cast is 53 * extremely prevalent in C code in the wild. (See the ctype(3) man 54 * page for details on why this idiom is required and why isspace 55 * itself can't just be `fixed'.) 56 * 57 * The ctype(3) functions are implemented as macros that expand to 58 * simple table lookups -- partly for performance, and partly to 59 * deliberately trigger -Wchar-subscript compiler warnings for 60 * suspicious code like the above. 61 * 62 * To noisily detect the undefined behaviour without incurring overhead 63 * for the defined cases of the ctype(3) functions, we put a guard page 64 * before the tables in memory so that (at least for inputs other than 65 * 0xff, which coincides as signed char with EOF = -1) attempts to use 66 * the ctype(3) functions on invalid inputs will reliably -- and 67 * safely! -- crash with SIGSEGV instead of sometimes returning 68 * nondeterministic nonsense results. 69 * 70 * We do this by defining two symbols, one local and one global: 71 * 72 * - The local (C `static') symbol _C_ctype_tab_guarded_ is 73 * page-aligned and has an extra page-sized buffer at the 74 * beginning for a guard page. It is defined as an ordinary C 75 * array to keep the source code legible. 76 * 77 * - The global symbol _C_ctype_tab_ starts one page past the 78 * start of _C_ctype_tab_guarded_. It is defined, via some 79 * macros for convenience, by the assembler directives: 80 * 81 * .type _C_ctype_tab_,@object 82 * .global _C_ctype_tab_ 83 * _C_ctype_tab_ = _C_ctype_tab_guarded_ + PAGE_SIZE 84 * 85 * (with PAGE_SIZE being replaced by the numeric value from 86 * vmparam.h -- actually, we use MAX_PAGE_SIZE for the handful 87 * of architectures that support different page sizes on 88 * different machines). 89 * 90 * Then, at startup, we mprotect the guard page PROT_NONE. 91 * 92 * The global symbol _C_ctype_tab_ is an immutable part of the libc 93 * ABI, and is used by, e.g., libstdc++.so, so we have to define it 94 * compatibly -- this is why it must be defined as an ELF global symbol 95 * in its own right, and not simply handled inside libc.so by 96 * additional arithmetic relative to _C_ctype_tab_guarded_. 97 */ 98 99 #if defined(__CHAR_UNSIGNED__) /* disable if char is unsigned */ 100 # define _CTYPE_GUARD_PAGE 0 101 #elif defined(_CTYPE_DYNAMIC) /* enable in shared libc */ 102 # define _CTYPE_GUARD_PAGE 1 103 #else /* static libc -- let's aim for space-efficiency for now */ 104 # define _CTYPE_GUARD_PAGE 0 105 #endif 106 107 #ifdef __arm__ 108 # define __ctype_table_object(name) \ 109 __asm(".type " _C_LABEL_STRING(#name) ",%object") 110 #else 111 # define __ctype_table_object(name) \ 112 __asm(".type " _C_LABEL_STRING(#name) ",@object") 113 #endif 114 115 #define __ctype_table_size(name, guard, nelem, elemsize) \ 116 __CTASSERT(sizeof((guard)[0]) == (elemsize)); \ 117 __CTASSERT(sizeof(guard) == _CTYPE_GUARD_SIZE + (nelem)*(elemsize)); \ 118 __asm(".size " _C_LABEL_STRING(#name) "," \ 119 ___STRING((nelem) * (elemsize))) 120 121 __dso_hidden bool allow_ctype_abuse(void); 122 123 #if _CTYPE_GUARD_PAGE 124 125 # include <machine/vmparam.h> 126 127 __dso_hidden bool constructor_allow_ctype_abuse(void); 128 129 /* 130 * _CTYPE_GUARD_SIZE must be a macro so it will work through ___STRING 131 * to produce a string for symbol arithmetic in __asm. 132 */ 133 # ifdef MAX_PAGE_SIZE 134 # define _CTYPE_GUARD_SIZE MAX_PAGE_SIZE 135 # else 136 # define _CTYPE_GUARD_SIZE PAGE_SIZE 137 # endif 138 139 # define _CTYPE_GUARD_INIT(n, x) [0 ... (n) - 1] = (x), 140 141 enum { 142 _C_CTYPE_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_ctype_tab_[0]), 143 # ifdef __BUILD_LEGACY 144 _C_COMPAT_BSDCTYPE_GUARD = 145 _CTYPE_GUARD_SIZE/sizeof(_C_compat_bsdctype[0]), 146 # endif 147 _C_TOLOWER_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_tolower_tab_[0]), 148 _C_TOUPPER_TAB_GUARD = _CTYPE_GUARD_SIZE/sizeof(_C_toupper_tab_[0]), 149 }; 150 151 # define __ctype_table __aligned(_CTYPE_GUARD_SIZE) 152 # define __ctype_table_guarded(name, guard, nelem, elemsize) \ 153 __ctype_table_object(name); \ 154 __asm(".global " _C_LABEL_STRING(#name)); \ 155 __asm(_C_LABEL_STRING(#name) " = " _C_LABEL_STRING(#guard) " + " \ 156 ___STRING(_CTYPE_GUARD_SIZE)); \ 157 __ctype_table_size(name, guard, nelem, elemsize) 158 159 #else /* !_CTYPE_GUARD_PAGE */ 160 161 # define _CTYPE_GUARD_SIZE 0 162 163 # define _CTYPE_GUARD_INIT(n, x) /* empty */ 164 165 enum { 166 _C_CTYPE_TAB_GUARD = 0, 167 # ifdef __BUILD_LEGACY 168 _C_COMPAT_BSDCTYPE_GUARD = 0, 169 # endif 170 _C_TOLOWER_TAB_GUARD = 0, 171 _C_TOUPPER_TAB_GUARD = 0, 172 }; 173 174 /* Compiler can't see into __strong_alias, so mark it __used. */ 175 # define __ctype_table __used 176 # define __ctype_table_guarded(name, guard, nelem, elemsize) \ 177 __ctype_table_object(name); \ 178 __strong_alias(name, guard) \ 179 __ctype_table_size(name, guard, nelem, elemsize) 180 181 #endif /* _CTYPE_GUARD_PAGE */ 182 183 #endif /* _LIBC_CTYPE_GUARD_H_ */ 184