Home | History | Annotate | Line # | Download | only in x86_pte_tester
      1 /*	$NetBSD: x86_pte_tester.c,v 1.3 2022/08/21 14:06:42 mlelstv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2016 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #define __HAVE_DIRECT_MAP
     30 #define __HAVE_PCPU_AREA
     31 #define SVS
     32 
     33 #include <sys/cdefs.h>
     34 #include <sys/param.h>
     35 #include <sys/module.h>
     36 #include <sys/proc.h>
     37 #include <sys/sysctl.h>
     38 #include <uvm/uvm.h>
     39 #include <x86/pmap.h>
     40 
     41 #if defined(__x86_64__)
     42 # include <amd64/pmap.h>
     43 # include <amd64/pmap_private.h>
     44 # define NLEVEL 4
     45 #else
     46 # error "Unsupported configuration"
     47 #endif
     48 
     49 static struct {
     50 	struct sysctllog *ctx_sysctllog;
     51 	vaddr_t levels[NLEVEL];
     52 	struct {
     53 		size_t l4;
     54 		size_t l3;
     55 		size_t l2;
     56 		size_t l1;
     57 	} coord;
     58 	struct {
     59 		size_t n_rwx;
     60 		size_t n_shstk;
     61 		bool kernel_map_with_low_ptes;
     62 		bool pte_is_user_accessible;
     63 		size_t n_user_space_is_kernel;
     64 		size_t n_kernel_space_is_user;
     65 		size_t n_svs_g_bit_set;
     66 	} results;
     67 } tester_ctx;
     68 
     69 typedef enum {
     70 	WALK_NEXT, /* go to the next level */
     71 	WALK_SKIP, /* skip the next level, but keep iterating on the current one */
     72 	WALK_STOP  /* stop the iteration on the current level */
     73 } walk_type;
     74 
     75 /* -------------------------------------------------------------------------- */
     76 
     77 #define is_flag(__ent, __flag)	(((__ent) & __flag) != 0)
     78 #define is_valid(__ent)		is_flag(__ent, PTE_P)
     79 #define get_pa(__pde)		(__pde & PTE_FRAME)
     80 
     81 #define L4_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
     82 #define L3_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
     83 #define L2_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
     84 #define L1_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
     85 
     86 static void
     87 scan_l1(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
     88 {
     89 	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[0];
     90 	size_t i;
     91 
     92 	pmap_kenter_pa(tester_ctx.levels[0], pa, VM_PROT_READ, 0);
     93 	pmap_update(pmap_kernel());
     94 
     95 	for (i = 0; i < L1_MAX_NENTRIES; i++) {
     96 		tester_ctx.coord.l1 = i;
     97 		if (is_valid(pd[i])) {
     98 			fn(pd[i], i, 1);
     99 		}
    100 	}
    101 
    102 	pmap_kremove(tester_ctx.levels[0], PAGE_SIZE);
    103 	pmap_update(pmap_kernel());
    104 }
    105 
    106 static void
    107 scan_l2(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
    108 {
    109 	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[1];
    110 	walk_type ret;
    111 	size_t i;
    112 
    113 	pmap_kenter_pa(tester_ctx.levels[1], pa, VM_PROT_READ, 0);
    114 	pmap_update(pmap_kernel());
    115 
    116 	for (i = 0; i < L2_MAX_NENTRIES; i++) {
    117 		tester_ctx.coord.l2 = i;
    118 		if (!is_valid(pd[i]))
    119 			continue;
    120 		ret = fn(pd[i], i, 2);
    121 		if (ret == WALK_STOP)
    122 			break;
    123 		if (is_flag(pd[i], PTE_PS))
    124 			continue;
    125 		if (ret == WALK_NEXT)
    126 			scan_l1(get_pa(pd[i]), fn);
    127 	}
    128 
    129 	pmap_kremove(tester_ctx.levels[1], PAGE_SIZE);
    130 	pmap_update(pmap_kernel());
    131 }
    132 
    133 static void
    134 scan_l3(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
    135 {
    136 	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[2];
    137 	walk_type ret;
    138 	size_t i;
    139 
    140 	pmap_kenter_pa(tester_ctx.levels[2], pa, VM_PROT_READ, 0);
    141 	pmap_update(pmap_kernel());
    142 
    143 	for (i = 0; i < L3_MAX_NENTRIES; i++) {
    144 		tester_ctx.coord.l3 = i;
    145 		if (!is_valid(pd[i]))
    146 			continue;
    147 		ret = fn(pd[i], i, 3);
    148 		if (ret == WALK_STOP)
    149 			break;
    150 		if (is_flag(pd[i], PTE_PS))
    151 			continue;
    152 		if (ret == WALK_NEXT)
    153 			scan_l2(get_pa(pd[i]), fn);
    154 	}
    155 
    156 	pmap_kremove(tester_ctx.levels[2], PAGE_SIZE);
    157 	pmap_update(pmap_kernel());
    158 }
    159 
    160 static void
    161 scan_l4(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
    162 {
    163 	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[3];
    164 	walk_type ret;
    165 	size_t i;
    166 
    167 	pmap_kenter_pa(tester_ctx.levels[3], pa, VM_PROT_READ, 0);
    168 	pmap_update(pmap_kernel());
    169 
    170 	for (i = 0; i < L4_MAX_NENTRIES; i++) {
    171 		tester_ctx.coord.l4 = i;
    172 		if (!is_valid(pd[i]))
    173 			continue;
    174 		ret = fn(pd[i], i, 4);
    175 		if (ret == WALK_STOP)
    176 			break;
    177 		if (is_flag(pd[i], PTE_PS))
    178 			continue;
    179 		if (ret == WALK_NEXT)
    180 			scan_l3(get_pa(pd[i]), fn);
    181 	}
    182 
    183 	pmap_kremove(tester_ctx.levels[3], PAGE_SIZE);
    184 	pmap_update(pmap_kernel());
    185 }
    186 
    187 static void
    188 scan_tree(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
    189 {
    190 	scan_l4(pa, fn);
    191 }
    192 
    193 /* -------------------------------------------------------------------------- */
    194 
    195 /*
    196  * Rule: the number of kernel RWX pages should be zero.
    197  */
    198 static walk_type
    199 count_krwx(pd_entry_t pde, size_t slot, int lvl)
    200 {
    201 	if (lvl == NLEVEL && slot < 256) {
    202 		return WALK_SKIP;
    203 	}
    204 	if (is_flag(pde, PTE_NX) || !is_flag(pde, PTE_W)) {
    205 		return WALK_SKIP;
    206 	}
    207 	if (lvl != 1 && !is_flag(pde, PTE_PS)) {
    208 		return WALK_NEXT;
    209 	}
    210 
    211 	if (lvl == 4) {
    212 		tester_ctx.results.n_rwx += (NBPD_L4 / PAGE_SIZE);
    213 	} else if (lvl == 3) {
    214 		tester_ctx.results.n_rwx += (NBPD_L3 / PAGE_SIZE);
    215 	} else if (lvl == 2) {
    216 		tester_ctx.results.n_rwx += (NBPD_L2 / PAGE_SIZE);
    217 	} else if (lvl == 1) {
    218 		tester_ctx.results.n_rwx += (NBPD_L1 / PAGE_SIZE);
    219 	}
    220 
    221 	return WALK_NEXT;
    222 }
    223 
    224 /*
    225  * Rule: the number of kernel SHSTK pages should be zero.
    226  */
    227 static walk_type
    228 count_kshstk(pd_entry_t pde, size_t slot, int lvl)
    229 {
    230 	if (lvl == NLEVEL && slot < 256) {
    231 		return WALK_SKIP;
    232 	}
    233 
    234 	if (is_flag(pde, PTE_PS) || lvl == 1) {
    235 		if (!is_flag(pde, PTE_W) && is_flag(pde, PTE_D)) {
    236 			if (lvl == 4) {
    237 				tester_ctx.results.n_shstk += (NBPD_L4 / PAGE_SIZE);
    238 			} else if (lvl == 3) {
    239 				tester_ctx.results.n_shstk += (NBPD_L3 / PAGE_SIZE);
    240 			} else if (lvl == 2) {
    241 				tester_ctx.results.n_shstk += (NBPD_L2 / PAGE_SIZE);
    242 			} else if (lvl == 1) {
    243 				tester_ctx.results.n_shstk += (NBPD_L1 / PAGE_SIZE);
    244 			}
    245 		}
    246 		return WALK_SKIP;
    247 	}
    248 
    249 	if (!is_flag(pde, PTE_W)) {
    250 		return WALK_SKIP;
    251 	}
    252 
    253 	return WALK_NEXT;
    254 }
    255 
    256 /*
    257  * Rule: the lower half of the kernel map must be zero.
    258  */
    259 static walk_type
    260 check_kernel_map(pd_entry_t pde, size_t slot, int lvl)
    261 {
    262 	if (lvl != NLEVEL) {
    263 		return WALK_STOP;
    264 	}
    265 	if (slot >= 256) {
    266 		return WALK_SKIP;
    267 	}
    268 	if (pde != 0) {
    269 		tester_ctx.results.kernel_map_with_low_ptes |= true;
    270 	}
    271 	return WALK_SKIP;
    272 }
    273 
    274 /*
    275  * Rule: the PTE space must not have user permissions.
    276  */
    277 static walk_type
    278 check_pte_space(pd_entry_t pde, size_t slot, int lvl)
    279 {
    280 	if (lvl != NLEVEL) {
    281 		return WALK_STOP;
    282 	}
    283 	if (slot != PDIR_SLOT_PTE) {
    284 		return WALK_SKIP;
    285 	}
    286 	if (is_flag(pde, PTE_U)) {
    287 		tester_ctx.results.pte_is_user_accessible |= true;
    288 	}
    289 	return WALK_SKIP;
    290 }
    291 
    292 /*
    293  * Rule: each page in the lower half must have user permissions.
    294  */
    295 static walk_type
    296 check_user_space(pd_entry_t pde, size_t slot, int lvl)
    297 {
    298 	if (lvl == NLEVEL && slot >= 256) {
    299 		return WALK_SKIP;
    300 	}
    301 	if (!is_flag(pde, PTE_U)) {
    302 		tester_ctx.results.n_user_space_is_kernel += 1;
    303 		return WALK_SKIP;
    304 	}
    305 	return WALK_NEXT;
    306 }
    307 
    308 /*
    309  * Rule: each page in the higher half must have kernel permissions.
    310  */
    311 static walk_type
    312 check_kernel_space(pd_entry_t pde, size_t slot, int lvl)
    313 {
    314 	if (lvl == NLEVEL && slot < 256) {
    315 		return WALK_SKIP;
    316 	}
    317 	if (lvl == NLEVEL && slot == PDIR_SLOT_PTE) {
    318 		return WALK_SKIP;
    319 	}
    320 	if (is_flag(pde, PTE_U)) {
    321 		tester_ctx.results.n_kernel_space_is_user += 1;
    322 		return WALK_SKIP;
    323 	}
    324 	return WALK_NEXT;
    325 }
    326 
    327 /*
    328  * Rule: the SVS map is allowed to use the G bit only on the PCPU area.
    329  */
    330 static walk_type
    331 check_svs_g_bit(pd_entry_t pde, size_t slot, int lvl)
    332 {
    333 	if (lvl == NLEVEL && slot == PDIR_SLOT_PCPU) {
    334 		return WALK_SKIP;
    335 	}
    336 	if (is_flag(pde, PTE_G)) {
    337 		tester_ctx.results.n_svs_g_bit_set += 1;
    338 		return WALK_SKIP;
    339 	}
    340 	return WALK_NEXT;
    341 }
    342 
    343 /* -------------------------------------------------------------------------- */
    344 
    345 static void
    346 scan_svs(void)
    347 {
    348 	extern bool svs_enabled;
    349 	paddr_t pa0;
    350 
    351 	if (!svs_enabled) {
    352 		tester_ctx.results.n_svs_g_bit_set = -1;
    353 		return;
    354 	}
    355 
    356 	kpreempt_disable();
    357 	pa0 = curcpu()->ci_svs_updirpa;
    358 	scan_tree(pa0, &check_user_space);
    359 	scan_tree(pa0, &check_kernel_space);
    360 	scan_tree(pa0, &check_svs_g_bit);
    361 	kpreempt_enable();
    362 }
    363 
    364 static void
    365 scan_proc(struct proc *p)
    366 {
    367 	struct pmap *pmap = p->p_vmspace->vm_map.pmap;
    368 	paddr_t pa0;
    369 
    370 	mutex_enter(&pmap->pm_lock);
    371 
    372 	kpreempt_disable();
    373 	pa0 = (paddr_t)pmap->pm_pdirpa[0];
    374 	scan_tree(pa0, &check_user_space);
    375 	scan_tree(pa0, &check_kernel_space);
    376 	scan_tree(pa0, &check_pte_space);
    377 	kpreempt_enable();
    378 
    379 	mutex_exit(&pmap->pm_lock);
    380 }
    381 
    382 static void
    383 x86_pte_run_scans(void)
    384 {
    385 	struct pmap *kpm = pmap_kernel();
    386 	paddr_t pa0;
    387 
    388 	memset(&tester_ctx.results, 0, sizeof(tester_ctx.results));
    389 
    390 	/* Scan the current user process. */
    391 	scan_proc(curproc);
    392 
    393 	/* Scan the SVS mapping. */
    394 	scan_svs();
    395 
    396 	/* Scan the kernel map. */
    397 	pa0 = (paddr_t)kpm->pm_pdirpa[0];
    398 	scan_tree(pa0, &count_krwx);
    399 	scan_tree(pa0, &count_kshstk);
    400 	scan_tree(pa0, &check_kernel_map);
    401 }
    402 
    403 static void
    404 x86_pte_levels_init(void)
    405 {
    406 	size_t i;
    407 	for (i = 0; i < NLEVEL; i++) {
    408 		tester_ctx.levels[i] = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
    409 		    UVM_KMF_VAONLY);
    410 	}
    411 }
    412 
    413 static void
    414 x86_pte_levels_destroy(void)
    415 {
    416 	size_t i;
    417 	for (i = 0; i < NLEVEL; i++) {
    418 		uvm_km_free(kernel_map, tester_ctx.levels[i], PAGE_SIZE,
    419 		    UVM_KMF_VAONLY);
    420 	}
    421 }
    422 
    423 /* -------------------------------------------------------------------------- */
    424 
    425 static int
    426 x86_pte_sysctl_run(SYSCTLFN_ARGS)
    427 {
    428 	if (oldlenp == NULL)
    429 		return EINVAL;
    430 
    431 	x86_pte_run_scans();
    432 
    433 	if (oldp == NULL) {
    434 		*oldlenp = sizeof(tester_ctx.results);
    435 		return 0;
    436 	}
    437 
    438 	if (*oldlenp < sizeof(tester_ctx.results))
    439 		return ENOMEM;
    440 
    441 	return copyout(&tester_ctx.results, oldp, sizeof(tester_ctx.results));
    442 }
    443 
    444 static int
    445 x86_pte_sysctl_init(void)
    446 {
    447 	struct sysctllog **log = &tester_ctx.ctx_sysctllog;
    448 	const struct sysctlnode *rnode, *cnode;
    449 	int error;
    450 
    451 	error = sysctl_createv(log, 0, NULL, &rnode, CTLFLAG_PERMANENT,
    452 	    CTLTYPE_NODE, "x86_pte_test",
    453 	    SYSCTL_DESCR("x86_pte testing interface"),
    454 	    NULL, 0, NULL, 0, CTL_KERN, CTL_CREATE, CTL_EOL);
    455 	if (error)
    456 		goto out;
    457 
    458 	error = sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_PERMANENT,
    459 	    CTLTYPE_STRUCT, "test",
    460 	    SYSCTL_DESCR("execute a x86_pte test"),
    461 	    x86_pte_sysctl_run, 0, NULL, 0, CTL_CREATE, CTL_EOL);
    462 
    463 out:
    464  	if (error)
    465 		sysctl_teardown(log);
    466 	return error;
    467 }
    468 
    469 static void
    470 x86_pte_sysctl_destroy(void)
    471 {
    472 	sysctl_teardown(&tester_ctx.ctx_sysctllog);
    473 }
    474 
    475 /* -------------------------------------------------------------------------- */
    476 
    477 MODULE(MODULE_CLASS_MISC, x86_pte_tester, NULL);
    478 
    479 static int
    480 x86_pte_tester_modcmd(modcmd_t cmd, void *arg __unused)
    481 {
    482 	int error = 0;
    483 
    484 	switch (cmd) {
    485 	case MODULE_CMD_INIT:
    486 		x86_pte_levels_init();
    487 		error = x86_pte_sysctl_init();
    488 		break;
    489 	case MODULE_CMD_FINI:
    490 		x86_pte_sysctl_destroy();
    491 		x86_pte_levels_destroy();
    492 		break;
    493 	default:
    494 		error = ENOTTY;
    495 		break;
    496 	}
    497 
    498 	return error;
    499 }
    500