1 /* $NetBSD: x86_pte_tester.c,v 1.3 2022/08/21 14:06:42 mlelstv Exp $ */ 2 3 /* 4 * Copyright (c) 2016 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #define __HAVE_DIRECT_MAP 30 #define __HAVE_PCPU_AREA 31 #define SVS 32 33 #include <sys/cdefs.h> 34 #include <sys/param.h> 35 #include <sys/module.h> 36 #include <sys/proc.h> 37 #include <sys/sysctl.h> 38 #include <uvm/uvm.h> 39 #include <x86/pmap.h> 40 41 #if defined(__x86_64__) 42 # include <amd64/pmap.h> 43 # include <amd64/pmap_private.h> 44 # define NLEVEL 4 45 #else 46 # error "Unsupported configuration" 47 #endif 48 49 static struct { 50 struct sysctllog *ctx_sysctllog; 51 vaddr_t levels[NLEVEL]; 52 struct { 53 size_t l4; 54 size_t l3; 55 size_t l2; 56 size_t l1; 57 } coord; 58 struct { 59 size_t n_rwx; 60 size_t n_shstk; 61 bool kernel_map_with_low_ptes; 62 bool pte_is_user_accessible; 63 size_t n_user_space_is_kernel; 64 size_t n_kernel_space_is_user; 65 size_t n_svs_g_bit_set; 66 } results; 67 } tester_ctx; 68 69 typedef enum { 70 WALK_NEXT, /* go to the next level */ 71 WALK_SKIP, /* skip the next level, but keep iterating on the current one */ 72 WALK_STOP /* stop the iteration on the current level */ 73 } walk_type; 74 75 /* -------------------------------------------------------------------------- */ 76 77 #define is_flag(__ent, __flag) (((__ent) & __flag) != 0) 78 #define is_valid(__ent) is_flag(__ent, PTE_P) 79 #define get_pa(__pde) (__pde & PTE_FRAME) 80 81 #define L4_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t)) 82 #define L3_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t)) 83 #define L2_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t)) 84 #define L1_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t)) 85 86 static void 87 scan_l1(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl)) 88 { 89 pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[0]; 90 size_t i; 91 92 pmap_kenter_pa(tester_ctx.levels[0], pa, VM_PROT_READ, 0); 93 pmap_update(pmap_kernel()); 94 95 for (i = 0; i < L1_MAX_NENTRIES; i++) { 96 tester_ctx.coord.l1 = i; 97 if (is_valid(pd[i])) { 98 fn(pd[i], i, 1); 99 } 100 } 101 102 pmap_kremove(tester_ctx.levels[0], PAGE_SIZE); 103 pmap_update(pmap_kernel()); 104 } 105 106 static void 107 scan_l2(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl)) 108 { 109 pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[1]; 110 walk_type ret; 111 size_t i; 112 113 pmap_kenter_pa(tester_ctx.levels[1], pa, VM_PROT_READ, 0); 114 pmap_update(pmap_kernel()); 115 116 for (i = 0; i < L2_MAX_NENTRIES; i++) { 117 tester_ctx.coord.l2 = i; 118 if (!is_valid(pd[i])) 119 continue; 120 ret = fn(pd[i], i, 2); 121 if (ret == WALK_STOP) 122 break; 123 if (is_flag(pd[i], PTE_PS)) 124 continue; 125 if (ret == WALK_NEXT) 126 scan_l1(get_pa(pd[i]), fn); 127 } 128 129 pmap_kremove(tester_ctx.levels[1], PAGE_SIZE); 130 pmap_update(pmap_kernel()); 131 } 132 133 static void 134 scan_l3(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl)) 135 { 136 pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[2]; 137 walk_type ret; 138 size_t i; 139 140 pmap_kenter_pa(tester_ctx.levels[2], pa, VM_PROT_READ, 0); 141 pmap_update(pmap_kernel()); 142 143 for (i = 0; i < L3_MAX_NENTRIES; i++) { 144 tester_ctx.coord.l3 = i; 145 if (!is_valid(pd[i])) 146 continue; 147 ret = fn(pd[i], i, 3); 148 if (ret == WALK_STOP) 149 break; 150 if (is_flag(pd[i], PTE_PS)) 151 continue; 152 if (ret == WALK_NEXT) 153 scan_l2(get_pa(pd[i]), fn); 154 } 155 156 pmap_kremove(tester_ctx.levels[2], PAGE_SIZE); 157 pmap_update(pmap_kernel()); 158 } 159 160 static void 161 scan_l4(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl)) 162 { 163 pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[3]; 164 walk_type ret; 165 size_t i; 166 167 pmap_kenter_pa(tester_ctx.levels[3], pa, VM_PROT_READ, 0); 168 pmap_update(pmap_kernel()); 169 170 for (i = 0; i < L4_MAX_NENTRIES; i++) { 171 tester_ctx.coord.l4 = i; 172 if (!is_valid(pd[i])) 173 continue; 174 ret = fn(pd[i], i, 4); 175 if (ret == WALK_STOP) 176 break; 177 if (is_flag(pd[i], PTE_PS)) 178 continue; 179 if (ret == WALK_NEXT) 180 scan_l3(get_pa(pd[i]), fn); 181 } 182 183 pmap_kremove(tester_ctx.levels[3], PAGE_SIZE); 184 pmap_update(pmap_kernel()); 185 } 186 187 static void 188 scan_tree(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl)) 189 { 190 scan_l4(pa, fn); 191 } 192 193 /* -------------------------------------------------------------------------- */ 194 195 /* 196 * Rule: the number of kernel RWX pages should be zero. 197 */ 198 static walk_type 199 count_krwx(pd_entry_t pde, size_t slot, int lvl) 200 { 201 if (lvl == NLEVEL && slot < 256) { 202 return WALK_SKIP; 203 } 204 if (is_flag(pde, PTE_NX) || !is_flag(pde, PTE_W)) { 205 return WALK_SKIP; 206 } 207 if (lvl != 1 && !is_flag(pde, PTE_PS)) { 208 return WALK_NEXT; 209 } 210 211 if (lvl == 4) { 212 tester_ctx.results.n_rwx += (NBPD_L4 / PAGE_SIZE); 213 } else if (lvl == 3) { 214 tester_ctx.results.n_rwx += (NBPD_L3 / PAGE_SIZE); 215 } else if (lvl == 2) { 216 tester_ctx.results.n_rwx += (NBPD_L2 / PAGE_SIZE); 217 } else if (lvl == 1) { 218 tester_ctx.results.n_rwx += (NBPD_L1 / PAGE_SIZE); 219 } 220 221 return WALK_NEXT; 222 } 223 224 /* 225 * Rule: the number of kernel SHSTK pages should be zero. 226 */ 227 static walk_type 228 count_kshstk(pd_entry_t pde, size_t slot, int lvl) 229 { 230 if (lvl == NLEVEL && slot < 256) { 231 return WALK_SKIP; 232 } 233 234 if (is_flag(pde, PTE_PS) || lvl == 1) { 235 if (!is_flag(pde, PTE_W) && is_flag(pde, PTE_D)) { 236 if (lvl == 4) { 237 tester_ctx.results.n_shstk += (NBPD_L4 / PAGE_SIZE); 238 } else if (lvl == 3) { 239 tester_ctx.results.n_shstk += (NBPD_L3 / PAGE_SIZE); 240 } else if (lvl == 2) { 241 tester_ctx.results.n_shstk += (NBPD_L2 / PAGE_SIZE); 242 } else if (lvl == 1) { 243 tester_ctx.results.n_shstk += (NBPD_L1 / PAGE_SIZE); 244 } 245 } 246 return WALK_SKIP; 247 } 248 249 if (!is_flag(pde, PTE_W)) { 250 return WALK_SKIP; 251 } 252 253 return WALK_NEXT; 254 } 255 256 /* 257 * Rule: the lower half of the kernel map must be zero. 258 */ 259 static walk_type 260 check_kernel_map(pd_entry_t pde, size_t slot, int lvl) 261 { 262 if (lvl != NLEVEL) { 263 return WALK_STOP; 264 } 265 if (slot >= 256) { 266 return WALK_SKIP; 267 } 268 if (pde != 0) { 269 tester_ctx.results.kernel_map_with_low_ptes |= true; 270 } 271 return WALK_SKIP; 272 } 273 274 /* 275 * Rule: the PTE space must not have user permissions. 276 */ 277 static walk_type 278 check_pte_space(pd_entry_t pde, size_t slot, int lvl) 279 { 280 if (lvl != NLEVEL) { 281 return WALK_STOP; 282 } 283 if (slot != PDIR_SLOT_PTE) { 284 return WALK_SKIP; 285 } 286 if (is_flag(pde, PTE_U)) { 287 tester_ctx.results.pte_is_user_accessible |= true; 288 } 289 return WALK_SKIP; 290 } 291 292 /* 293 * Rule: each page in the lower half must have user permissions. 294 */ 295 static walk_type 296 check_user_space(pd_entry_t pde, size_t slot, int lvl) 297 { 298 if (lvl == NLEVEL && slot >= 256) { 299 return WALK_SKIP; 300 } 301 if (!is_flag(pde, PTE_U)) { 302 tester_ctx.results.n_user_space_is_kernel += 1; 303 return WALK_SKIP; 304 } 305 return WALK_NEXT; 306 } 307 308 /* 309 * Rule: each page in the higher half must have kernel permissions. 310 */ 311 static walk_type 312 check_kernel_space(pd_entry_t pde, size_t slot, int lvl) 313 { 314 if (lvl == NLEVEL && slot < 256) { 315 return WALK_SKIP; 316 } 317 if (lvl == NLEVEL && slot == PDIR_SLOT_PTE) { 318 return WALK_SKIP; 319 } 320 if (is_flag(pde, PTE_U)) { 321 tester_ctx.results.n_kernel_space_is_user += 1; 322 return WALK_SKIP; 323 } 324 return WALK_NEXT; 325 } 326 327 /* 328 * Rule: the SVS map is allowed to use the G bit only on the PCPU area. 329 */ 330 static walk_type 331 check_svs_g_bit(pd_entry_t pde, size_t slot, int lvl) 332 { 333 if (lvl == NLEVEL && slot == PDIR_SLOT_PCPU) { 334 return WALK_SKIP; 335 } 336 if (is_flag(pde, PTE_G)) { 337 tester_ctx.results.n_svs_g_bit_set += 1; 338 return WALK_SKIP; 339 } 340 return WALK_NEXT; 341 } 342 343 /* -------------------------------------------------------------------------- */ 344 345 static void 346 scan_svs(void) 347 { 348 extern bool svs_enabled; 349 paddr_t pa0; 350 351 if (!svs_enabled) { 352 tester_ctx.results.n_svs_g_bit_set = -1; 353 return; 354 } 355 356 kpreempt_disable(); 357 pa0 = curcpu()->ci_svs_updirpa; 358 scan_tree(pa0, &check_user_space); 359 scan_tree(pa0, &check_kernel_space); 360 scan_tree(pa0, &check_svs_g_bit); 361 kpreempt_enable(); 362 } 363 364 static void 365 scan_proc(struct proc *p) 366 { 367 struct pmap *pmap = p->p_vmspace->vm_map.pmap; 368 paddr_t pa0; 369 370 mutex_enter(&pmap->pm_lock); 371 372 kpreempt_disable(); 373 pa0 = (paddr_t)pmap->pm_pdirpa[0]; 374 scan_tree(pa0, &check_user_space); 375 scan_tree(pa0, &check_kernel_space); 376 scan_tree(pa0, &check_pte_space); 377 kpreempt_enable(); 378 379 mutex_exit(&pmap->pm_lock); 380 } 381 382 static void 383 x86_pte_run_scans(void) 384 { 385 struct pmap *kpm = pmap_kernel(); 386 paddr_t pa0; 387 388 memset(&tester_ctx.results, 0, sizeof(tester_ctx.results)); 389 390 /* Scan the current user process. */ 391 scan_proc(curproc); 392 393 /* Scan the SVS mapping. */ 394 scan_svs(); 395 396 /* Scan the kernel map. */ 397 pa0 = (paddr_t)kpm->pm_pdirpa[0]; 398 scan_tree(pa0, &count_krwx); 399 scan_tree(pa0, &count_kshstk); 400 scan_tree(pa0, &check_kernel_map); 401 } 402 403 static void 404 x86_pte_levels_init(void) 405 { 406 size_t i; 407 for (i = 0; i < NLEVEL; i++) { 408 tester_ctx.levels[i] = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 409 UVM_KMF_VAONLY); 410 } 411 } 412 413 static void 414 x86_pte_levels_destroy(void) 415 { 416 size_t i; 417 for (i = 0; i < NLEVEL; i++) { 418 uvm_km_free(kernel_map, tester_ctx.levels[i], PAGE_SIZE, 419 UVM_KMF_VAONLY); 420 } 421 } 422 423 /* -------------------------------------------------------------------------- */ 424 425 static int 426 x86_pte_sysctl_run(SYSCTLFN_ARGS) 427 { 428 if (oldlenp == NULL) 429 return EINVAL; 430 431 x86_pte_run_scans(); 432 433 if (oldp == NULL) { 434 *oldlenp = sizeof(tester_ctx.results); 435 return 0; 436 } 437 438 if (*oldlenp < sizeof(tester_ctx.results)) 439 return ENOMEM; 440 441 return copyout(&tester_ctx.results, oldp, sizeof(tester_ctx.results)); 442 } 443 444 static int 445 x86_pte_sysctl_init(void) 446 { 447 struct sysctllog **log = &tester_ctx.ctx_sysctllog; 448 const struct sysctlnode *rnode, *cnode; 449 int error; 450 451 error = sysctl_createv(log, 0, NULL, &rnode, CTLFLAG_PERMANENT, 452 CTLTYPE_NODE, "x86_pte_test", 453 SYSCTL_DESCR("x86_pte testing interface"), 454 NULL, 0, NULL, 0, CTL_KERN, CTL_CREATE, CTL_EOL); 455 if (error) 456 goto out; 457 458 error = sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_PERMANENT, 459 CTLTYPE_STRUCT, "test", 460 SYSCTL_DESCR("execute a x86_pte test"), 461 x86_pte_sysctl_run, 0, NULL, 0, CTL_CREATE, CTL_EOL); 462 463 out: 464 if (error) 465 sysctl_teardown(log); 466 return error; 467 } 468 469 static void 470 x86_pte_sysctl_destroy(void) 471 { 472 sysctl_teardown(&tester_ctx.ctx_sysctllog); 473 } 474 475 /* -------------------------------------------------------------------------- */ 476 477 MODULE(MODULE_CLASS_MISC, x86_pte_tester, NULL); 478 479 static int 480 x86_pte_tester_modcmd(modcmd_t cmd, void *arg __unused) 481 { 482 int error = 0; 483 484 switch (cmd) { 485 case MODULE_CMD_INIT: 486 x86_pte_levels_init(); 487 error = x86_pte_sysctl_init(); 488 break; 489 case MODULE_CMD_FINI: 490 x86_pte_sysctl_destroy(); 491 x86_pte_levels_destroy(); 492 break; 493 default: 494 error = ENOTTY; 495 break; 496 } 497 498 return error; 499 } 500