1 1.14 riastrad /* $NetBSD: dtrace_subr.c,v 1.14 2022/08/21 18:58:45 riastradh Exp $ */ 2 1.2 darran 3 1.1 darran /* 4 1.1 darran * CDDL HEADER START 5 1.1 darran * 6 1.1 darran * The contents of this file are subject to the terms of the 7 1.1 darran * Common Development and Distribution License, Version 1.0 only 8 1.1 darran * (the "License"). You may not use this file except in compliance 9 1.1 darran * with the License. 10 1.1 darran * 11 1.1 darran * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 12 1.1 darran * or http://www.opensolaris.org/os/licensing. 13 1.1 darran * See the License for the specific language governing permissions 14 1.1 darran * and limitations under the License. 15 1.1 darran * 16 1.1 darran * When distributing Covered Code, include this CDDL HEADER in each 17 1.1 darran * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 18 1.1 darran * If applicable, add the following below this CDDL HEADER, with the 19 1.1 darran * fields enclosed by brackets "[]" replaced with your own identifying 20 1.1 darran * information: Portions Copyright [yyyy] [name of copyright owner] 21 1.1 darran * 22 1.1 darran * CDDL HEADER END 23 1.1 darran * 24 1.9 chs * $FreeBSD: head/sys/cddl/dev/dtrace/i386/dtrace_subr.c 313850 2017-02-17 03:27:20Z markj $ 25 1.1 darran * 26 1.1 darran */ 27 1.1 darran /* 28 1.1 darran * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 29 1.1 darran * Use is subject to license terms. 30 1.1 darran */ 31 1.1 darran 32 1.9 chs /* 33 1.9 chs * Copyright (c) 2011, Joyent, Inc. All rights reserved. 34 1.9 chs */ 35 1.9 chs 36 1.1 darran #include <sys/param.h> 37 1.1 darran #include <sys/systm.h> 38 1.1 darran #include <sys/types.h> 39 1.1 darran #include <sys/kernel.h> 40 1.1 darran #include <sys/malloc.h> 41 1.1 darran #include <sys/kmem.h> 42 1.2 darran #include <sys/xcall.h> 43 1.2 darran #include <sys/cpu.h> 44 1.2 darran #include <sys/cpuvar.h> 45 1.1 darran #include <sys/dtrace_impl.h> 46 1.1 darran #include <sys/dtrace_bsd.h> 47 1.2 darran #include <machine/cpu.h> 48 1.14 riastrad #include <machine/cpufunc.h> 49 1.1 darran #include <machine/clock.h> 50 1.1 darran #include <machine/frame.h> 51 1.2 darran #include <uvm/uvm_pglist.h> 52 1.2 darran #include <uvm/uvm_prot.h> 53 1.2 darran #include <uvm/uvm_pmap.h> 54 1.1 darran 55 1.3 tron #include <x86/include/cpu_counter.h> 56 1.3 tron 57 1.1 darran extern uintptr_t kernelbase; 58 1.9 chs 59 1.9 chs extern void dtrace_getnanotime(struct timespec *tsp); 60 1.1 darran 61 1.8 chs int dtrace_invop(uintptr_t, struct trapframe *, uintptr_t); 62 1.1 darran 63 1.1 darran typedef struct dtrace_invop_hdlr { 64 1.8 chs int (*dtih_func)(uintptr_t, struct trapframe *, uintptr_t); 65 1.1 darran struct dtrace_invop_hdlr *dtih_next; 66 1.1 darran } dtrace_invop_hdlr_t; 67 1.1 darran 68 1.1 darran dtrace_invop_hdlr_t *dtrace_invop_hdlr; 69 1.1 darran 70 1.3 tron void dtrace_gethrtime_init(void *arg); 71 1.3 tron 72 1.1 darran int 73 1.8 chs dtrace_invop(uintptr_t addr, struct trapframe *frame, uintptr_t eax) 74 1.1 darran { 75 1.1 darran dtrace_invop_hdlr_t *hdlr; 76 1.1 darran int rval; 77 1.1 darran 78 1.1 darran for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) 79 1.8 chs if ((rval = hdlr->dtih_func(addr, frame, eax)) != 0) 80 1.1 darran return (rval); 81 1.1 darran 82 1.1 darran return (0); 83 1.1 darran } 84 1.1 darran 85 1.1 darran void 86 1.8 chs dtrace_invop_add(int (*func)(uintptr_t, struct trapframe *, uintptr_t)) 87 1.1 darran { 88 1.1 darran dtrace_invop_hdlr_t *hdlr; 89 1.1 darran 90 1.13 simonb hdlr = kmem_alloc(sizeof(*hdlr), KM_SLEEP); 91 1.1 darran hdlr->dtih_func = func; 92 1.1 darran hdlr->dtih_next = dtrace_invop_hdlr; 93 1.1 darran dtrace_invop_hdlr = hdlr; 94 1.1 darran } 95 1.1 darran 96 1.1 darran void 97 1.8 chs dtrace_invop_remove(int (*func)(uintptr_t, struct trapframe *, uintptr_t)) 98 1.1 darran { 99 1.1 darran dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlr, *prev = NULL; 100 1.1 darran 101 1.1 darran for (;;) { 102 1.1 darran if (hdlr == NULL) 103 1.1 darran panic("attempt to remove non-existent invop handler"); 104 1.1 darran 105 1.1 darran if (hdlr->dtih_func == func) 106 1.1 darran break; 107 1.1 darran 108 1.1 darran prev = hdlr; 109 1.1 darran hdlr = hdlr->dtih_next; 110 1.1 darran } 111 1.1 darran 112 1.1 darran if (prev == NULL) { 113 1.1 darran ASSERT(dtrace_invop_hdlr == hdlr); 114 1.1 darran dtrace_invop_hdlr = hdlr->dtih_next; 115 1.1 darran } else { 116 1.1 darran ASSERT(dtrace_invop_hdlr != hdlr); 117 1.1 darran prev->dtih_next = hdlr->dtih_next; 118 1.1 darran } 119 1.1 darran 120 1.13 simonb kmem_free(hdlr, sizeof(*hdlr)); 121 1.1 darran } 122 1.1 darran 123 1.1 darran void 124 1.1 darran dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) 125 1.1 darran { 126 1.1 darran (*func)(0, kernelbase); 127 1.1 darran } 128 1.1 darran 129 1.2 darran static void 130 1.2 darran xcall_func(void *arg0, void *arg1) 131 1.2 darran { 132 1.2 darran dtrace_xcall_t func = arg0; 133 1.2 darran 134 1.2 darran (*func)(arg1); 135 1.2 darran } 136 1.2 darran 137 1.1 darran void 138 1.7 chs dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) 139 1.1 darran { 140 1.2 darran uint64_t where; 141 1.1 darran 142 1.7 chs if (cpu == DTRACE_CPUALL) { 143 1.2 darran where = xc_broadcast(0, xcall_func, func, arg); 144 1.2 darran } else { 145 1.2 darran struct cpu_info *cinfo = cpu_lookup(cpu); 146 1.1 darran 147 1.2 darran KASSERT(cinfo != NULL); 148 1.2 darran where = xc_unicast(0, xcall_func, func, arg, cinfo); 149 1.1 darran } 150 1.2 darran xc_wait(where); 151 1.1 darran 152 1.2 darran /* XXX Q. Do we really need the other cpus to wait also? 153 1.2 darran * (see solaris:xc_sync()) 154 1.2 darran */ 155 1.1 darran } 156 1.1 darran 157 1.1 darran static void 158 1.1 darran dtrace_sync_func(void) 159 1.1 darran { 160 1.1 darran } 161 1.1 darran 162 1.1 darran void 163 1.1 darran dtrace_sync(void) 164 1.1 darran { 165 1.1 darran dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL); 166 1.1 darran } 167 1.1 darran 168 1.1 darran #ifdef notyet 169 1.1 darran void 170 1.1 darran dtrace_safe_synchronous_signal(void) 171 1.1 darran { 172 1.1 darran kthread_t *t = curthread; 173 1.1 darran struct regs *rp = lwptoregs(ttolwp(t)); 174 1.1 darran size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; 175 1.1 darran 176 1.1 darran ASSERT(t->t_dtrace_on); 177 1.1 darran 178 1.1 darran /* 179 1.1 darran * If we're not in the range of scratch addresses, we're not actually 180 1.1 darran * tracing user instructions so turn off the flags. If the instruction 181 1.1 darran * we copied out caused a synchonous trap, reset the pc back to its 182 1.1 darran * original value and turn off the flags. 183 1.1 darran */ 184 1.1 darran if (rp->r_pc < t->t_dtrace_scrpc || 185 1.1 darran rp->r_pc > t->t_dtrace_astpc + isz) { 186 1.1 darran t->t_dtrace_ft = 0; 187 1.1 darran } else if (rp->r_pc == t->t_dtrace_scrpc || 188 1.1 darran rp->r_pc == t->t_dtrace_astpc) { 189 1.1 darran rp->r_pc = t->t_dtrace_pc; 190 1.1 darran t->t_dtrace_ft = 0; 191 1.1 darran } 192 1.1 darran } 193 1.1 darran 194 1.1 darran int 195 1.1 darran dtrace_safe_defer_signal(void) 196 1.1 darran { 197 1.1 darran kthread_t *t = curthread; 198 1.1 darran struct regs *rp = lwptoregs(ttolwp(t)); 199 1.1 darran size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; 200 1.1 darran 201 1.1 darran ASSERT(t->t_dtrace_on); 202 1.1 darran 203 1.1 darran /* 204 1.1 darran * If we're not in the range of scratch addresses, we're not actually 205 1.1 darran * tracing user instructions so turn off the flags. 206 1.1 darran */ 207 1.1 darran if (rp->r_pc < t->t_dtrace_scrpc || 208 1.1 darran rp->r_pc > t->t_dtrace_astpc + isz) { 209 1.1 darran t->t_dtrace_ft = 0; 210 1.1 darran return (0); 211 1.1 darran } 212 1.1 darran 213 1.1 darran /* 214 1.9 chs * If we have executed the original instruction, but we have performed 215 1.9 chs * neither the jmp back to t->t_dtrace_npc nor the clean up of any 216 1.9 chs * registers used to emulate %rip-relative instructions in 64-bit mode, 217 1.9 chs * we'll save ourselves some effort by doing that here and taking the 218 1.9 chs * signal right away. We detect this condition by seeing if the program 219 1.9 chs * counter is the range [scrpc + isz, astpc). 220 1.1 darran */ 221 1.9 chs if (rp->r_pc >= t->t_dtrace_scrpc + isz && 222 1.9 chs rp->r_pc < t->t_dtrace_astpc) { 223 1.1 darran #ifdef __amd64 224 1.1 darran /* 225 1.1 darran * If there is a scratch register and we're on the 226 1.1 darran * instruction immediately after the modified instruction, 227 1.1 darran * restore the value of that scratch register. 228 1.1 darran */ 229 1.1 darran if (t->t_dtrace_reg != 0 && 230 1.1 darran rp->r_pc == t->t_dtrace_scrpc + isz) { 231 1.1 darran switch (t->t_dtrace_reg) { 232 1.1 darran case REG_RAX: 233 1.1 darran rp->r_rax = t->t_dtrace_regv; 234 1.1 darran break; 235 1.1 darran case REG_RCX: 236 1.1 darran rp->r_rcx = t->t_dtrace_regv; 237 1.1 darran break; 238 1.1 darran case REG_R8: 239 1.1 darran rp->r_r8 = t->t_dtrace_regv; 240 1.1 darran break; 241 1.1 darran case REG_R9: 242 1.1 darran rp->r_r9 = t->t_dtrace_regv; 243 1.1 darran break; 244 1.1 darran } 245 1.1 darran } 246 1.1 darran #endif 247 1.1 darran rp->r_pc = t->t_dtrace_npc; 248 1.1 darran t->t_dtrace_ft = 0; 249 1.1 darran return (0); 250 1.1 darran } 251 1.1 darran 252 1.1 darran /* 253 1.1 darran * Otherwise, make sure we'll return to the kernel after executing 254 1.1 darran * the copied out instruction and defer the signal. 255 1.1 darran */ 256 1.1 darran if (!t->t_dtrace_step) { 257 1.1 darran ASSERT(rp->r_pc < t->t_dtrace_astpc); 258 1.1 darran rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc; 259 1.1 darran t->t_dtrace_step = 1; 260 1.1 darran } 261 1.1 darran 262 1.1 darran t->t_dtrace_ast = 1; 263 1.1 darran 264 1.1 darran return (1); 265 1.1 darran } 266 1.1 darran #endif 267 1.1 darran 268 1.1 darran static int64_t tgt_cpu_tsc; 269 1.1 darran static int64_t hst_cpu_tsc; 270 1.2 darran static int64_t tsc_skew[MAXCPUS]; 271 1.1 darran static uint64_t nsec_scale; 272 1.1 darran 273 1.1 darran /* See below for the explanation of this macro. */ 274 1.1 darran #define SCALE_SHIFT 28 275 1.1 darran 276 1.2 darran static __inline uint64_t 277 1.2 darran dtrace_rdtsc(void) 278 1.2 darran { 279 1.2 darran uint64_t rv; 280 1.2 darran 281 1.2 darran __asm __volatile("rdtsc" : "=A" (rv)); 282 1.2 darran return (rv); 283 1.2 darran } 284 1.2 darran 285 1.1 darran static void 286 1.1 darran dtrace_gethrtime_init_cpu(void *arg) 287 1.1 darran { 288 1.1 darran uintptr_t cpu = (uintptr_t) arg; 289 1.1 darran 290 1.2 darran if (cpu == cpu_number()) 291 1.2 darran tgt_cpu_tsc = dtrace_rdtsc(); 292 1.1 darran else 293 1.2 darran hst_cpu_tsc = dtrace_rdtsc(); 294 1.1 darran } 295 1.1 darran 296 1.2 darran void 297 1.1 darran dtrace_gethrtime_init(void *arg) 298 1.1 darran { 299 1.1 darran uint64_t tsc_f; 300 1.2 darran CPU_INFO_ITERATOR cpuind; 301 1.2 darran struct cpu_info *cinfo = curcpu(); 302 1.2 darran cpuid_t cur_cpuid = cpu_number(); /* current cpu id */ 303 1.1 darran 304 1.1 darran /* 305 1.1 darran * Get TSC frequency known at this moment. 306 1.1 darran * This should be constant if TSC is invariant. 307 1.1 darran * Otherwise tick->time conversion will be inaccurate, but 308 1.1 darran * will preserve monotonic property of TSC. 309 1.1 darran */ 310 1.2 darran tsc_f = cpu_frequency(cinfo); 311 1.1 darran 312 1.1 darran /* 313 1.1 darran * The following line checks that nsec_scale calculated below 314 1.1 darran * doesn't overflow 32-bit unsigned integer, so that it can multiply 315 1.1 darran * another 32-bit integer without overflowing 64-bit. 316 1.1 darran * Thus minimum supported TSC frequency is 62.5MHz. 317 1.1 darran */ 318 1.9 chs KASSERTMSG(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)), 319 1.9 chs "TSC frequency is too low"); 320 1.1 darran 321 1.1 darran /* 322 1.1 darran * We scale up NANOSEC/tsc_f ratio to preserve as much precision 323 1.1 darran * as possible. 324 1.1 darran * 2^28 factor was chosen quite arbitrarily from practical 325 1.1 darran * considerations: 326 1.1 darran * - it supports TSC frequencies as low as 62.5MHz (see above); 327 1.1 darran * - it provides quite good precision (e < 0.01%) up to THz 328 1.1 darran * (terahertz) values; 329 1.1 darran */ 330 1.1 darran nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f; 331 1.1 darran 332 1.1 darran /* The current CPU is the reference one. */ 333 1.2 darran tsc_skew[cur_cpuid] = 0; 334 1.1 darran 335 1.2 darran for (CPU_INFO_FOREACH(cpuind, cinfo)) { 336 1.2 darran /* use skew relative to cpu 0 */ 337 1.2 darran tsc_skew[cpu_index(cinfo)] = cinfo->ci_data.cpu_cc_skew; 338 1.2 darran } 339 1.2 darran 340 1.2 darran /* Already handled in x86/tsc.c for ci_data.cpu_cc_skew */ 341 1.2 darran #if 0 342 1.9 chs /* The current CPU is the reference one. */ 343 1.9 chs sched_pin(); 344 1.9 chs tsc_skew[curcpu] = 0; 345 1.9 chs CPU_FOREACH(i) { 346 1.1 darran if (i == curcpu) 347 1.1 darran continue; 348 1.1 darran 349 1.9 chs pc = pcpu_find(i); 350 1.9 chs CPU_SETOF(PCPU_GET(cpuid), &map); 351 1.9 chs CPU_SET(pc->pc_cpuid, &map); 352 1.1 darran 353 1.9 chs smp_rendezvous_cpus(map, NULL, 354 1.1 darran dtrace_gethrtime_init_cpu, 355 1.1 darran smp_no_rendevous_barrier, (void *)(uintptr_t) i); 356 1.1 darran 357 1.1 darran tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc; 358 1.1 darran } 359 1.9 chs sched_unpin(); 360 1.2 darran #endif 361 1.1 darran } 362 1.1 darran 363 1.9 chs #ifdef __FreeBSD__ 364 1.9 chs #ifdef EARLY_AP_STARTUP 365 1.9 chs SYSINIT(dtrace_gethrtime_init, SI_SUB_DTRACE, SI_ORDER_ANY, 366 1.9 chs dtrace_gethrtime_init, NULL); 367 1.9 chs #else 368 1.9 chs SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init, 369 1.9 chs NULL); 370 1.9 chs #endif 371 1.9 chs #endif 372 1.9 chs 373 1.1 darran /* 374 1.1 darran * DTrace needs a high resolution time function which can 375 1.1 darran * be called from a probe context and guaranteed not to have 376 1.1 darran * instrumented with probes itself. 377 1.1 darran * 378 1.1 darran * Returns nanoseconds since boot. 379 1.1 darran */ 380 1.1 darran uint64_t 381 1.1 darran dtrace_gethrtime() 382 1.1 darran { 383 1.1 darran uint64_t tsc; 384 1.1 darran uint32_t lo; 385 1.1 darran uint32_t hi; 386 1.1 darran 387 1.1 darran /* 388 1.1 darran * We split TSC value into lower and higher 32-bit halves and separately 389 1.1 darran * scale them with nsec_scale, then we scale them down by 2^28 390 1.1 darran * (see nsec_scale calculations) taking into account 32-bit shift of 391 1.1 darran * the higher half and finally add. 392 1.1 darran */ 393 1.2 darran tsc = dtrace_rdtsc() + tsc_skew[cpu_number()]; 394 1.1 darran lo = tsc; 395 1.1 darran hi = tsc >> 32; 396 1.1 darran return (((lo * nsec_scale) >> SCALE_SHIFT) + 397 1.1 darran ((hi * nsec_scale) << (32 - SCALE_SHIFT))); 398 1.1 darran } 399 1.1 darran 400 1.1 darran uint64_t 401 1.1 darran dtrace_gethrestime(void) 402 1.1 darran { 403 1.9 chs struct timespec current_time; 404 1.9 chs 405 1.9 chs dtrace_getnanotime(¤t_time); 406 1.9 chs 407 1.9 chs return (current_time.tv_sec * 1000000000ULL + current_time.tv_nsec); 408 1.1 darran } 409 1.1 darran 410 1.1 darran /* Function to handle DTrace traps during probes. See i386/i386/trap.c */ 411 1.1 darran int 412 1.1 darran dtrace_trap(struct trapframe *frame, u_int type) 413 1.1 darran { 414 1.9 chs bool nofault; 415 1.2 darran cpuid_t cpuid = cpu_number(); /* current cpu id */ 416 1.2 darran 417 1.1 darran /* 418 1.1 darran * A trap can occur while DTrace executes a probe. Before 419 1.1 darran * executing the probe, DTrace blocks re-scheduling and sets 420 1.9 chs * a flag in its per-cpu flags to indicate that it doesn't 421 1.9 chs * want to fault. On returning from the probe, the no-fault 422 1.1 darran * flag is cleared and finally re-scheduling is enabled. 423 1.1 darran * 424 1.1 darran * Check if DTrace has enabled 'no-fault' mode: 425 1.1 darran */ 426 1.9 chs nofault = (cpu_core[cpuid].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0; 427 1.9 chs if (nofault) { 428 1.12 rin KASSERTMSG((x86_read_flags() & PSL_I) == 0, 429 1.12 rin "interrupts enabled"); 430 1.9 chs 431 1.1 darran /* 432 1.1 darran * There are only a couple of trap types that are expected. 433 1.1 darran * All the rest will be handled in the usual way. 434 1.1 darran */ 435 1.1 darran switch (type) { 436 1.1 darran /* General protection fault. */ 437 1.1 darran case T_PROTFLT: 438 1.1 darran /* Flag an illegal operation. */ 439 1.2 darran cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 440 1.1 darran 441 1.1 darran /* 442 1.1 darran * Offset the instruction pointer to the instruction 443 1.1 darran * following the one causing the fault. 444 1.1 darran */ 445 1.1 darran frame->tf_eip += dtrace_instr_size((u_char *) frame->tf_eip); 446 1.1 darran return (1); 447 1.1 darran /* Page fault. */ 448 1.1 darran case T_PAGEFLT: 449 1.1 darran /* Flag a bad address. */ 450 1.2 darran cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; 451 1.2 darran cpu_core[cpuid].cpuc_dtrace_illval = rcr2(); 452 1.1 darran 453 1.1 darran /* 454 1.1 darran * Offset the instruction pointer to the instruction 455 1.1 darran * following the one causing the fault. 456 1.1 darran */ 457 1.1 darran frame->tf_eip += dtrace_instr_size((u_char *) frame->tf_eip); 458 1.1 darran return (1); 459 1.1 darran default: 460 1.1 darran /* Handle all other traps in the usual way. */ 461 1.1 darran break; 462 1.1 darran } 463 1.1 darran } 464 1.1 darran 465 1.1 darran /* Handle the trap in the usual way. */ 466 1.1 darran return (0); 467 1.1 darran } 468