1 /* $NetBSD: subr_prof.c,v 1.52 2026/01/04 03:20:46 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 1982, 1986, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)subr_prof.c 8.4 (Berkeley) 2/14/95 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: subr_prof.c,v 1.52 2026/01/04 03:20:46 riastradh Exp $"); 36 37 #ifdef _KERNEL_OPT 38 #include "opt_gprof.h" 39 #include "opt_multiprocessor.h" 40 #endif 41 42 #include <sys/param.h> 43 #include <sys/types.h> 44 45 #include <sys/cpu.h> 46 #include <sys/kernel.h> 47 #include <sys/mount.h> 48 #include <sys/proc.h> 49 #include <sys/sdt.h> 50 #include <sys/syscallargs.h> 51 #include <sys/sysctl.h> 52 #include <sys/systm.h> 53 54 #ifdef GPROF 55 #include <sys/gmon.h> 56 #include <sys/malloc.h> 57 #include <sys/xcall.h> 58 59 MALLOC_DEFINE(M_GPROF, "gprof", "kernel profiling buffer"); 60 61 static int sysctl_kern_profiling(SYSCTLFN_ARGS); 62 #ifdef MULTIPROCESSOR 63 void _gmonparam_merge(struct gmonparam *, struct gmonparam *); 64 #endif 65 66 /* 67 * Froms is actually a bunch of unsigned shorts indexing tos 68 */ 69 struct gmonparam _gmonparam = { .state = GMON_PROF_OFF }; 70 71 /* Actual start of the kernel text segment. */ 72 extern char kernel_text[]; 73 74 extern char etext[]; 75 76 77 void 78 kmstartup(void) 79 { 80 char *cp; 81 struct gmonparam *p = &_gmonparam; 82 unsigned long size; 83 /* 84 * Round lowpc and highpc to multiples of the density we're using 85 * so the rest of the scaling (here and in gprof) stays in ints. 86 */ 87 p->lowpc = rounddown(((u_long)kernel_text), 88 HISTFRACTION * sizeof(HISTCOUNTER)); 89 p->highpc = roundup((u_long)etext, 90 HISTFRACTION * sizeof(HISTCOUNTER)); 91 p->textsize = p->highpc - p->lowpc; 92 printf("Profiling kernel, textsize=%ld [%lx..%lx]\n", 93 p->textsize, p->lowpc, p->highpc); 94 p->kcountsize = p->textsize / HISTFRACTION; 95 p->hashfraction = HASHFRACTION; 96 p->fromssize = p->textsize / HASHFRACTION; 97 p->tolimit = p->textsize * ARCDENSITY / 100; 98 if (p->tolimit < MINARCS) 99 p->tolimit = MINARCS; 100 else if (p->tolimit > MAXARCS) 101 p->tolimit = MAXARCS; 102 p->tossize = p->tolimit * sizeof(struct tostruct); 103 104 size = p->kcountsize + p->fromssize + p->tossize; 105 #ifdef MULTIPROCESSOR 106 CPU_INFO_ITERATOR cii; 107 struct cpu_info *ci; 108 for (CPU_INFO_FOREACH(cii, ci)) { 109 p = malloc(sizeof(struct gmonparam) + size, M_GPROF, 110 M_NOWAIT | M_ZERO); 111 if (p == NULL) { 112 printf("No memory for profiling on %s\n", 113 cpu_name(ci)); 114 /* cannot profile on this cpu */ 115 continue; 116 } 117 memcpy(p, &_gmonparam, sizeof(_gmonparam)); 118 ci->ci_gmon = p; 119 120 /* 121 * To allow profiling to be controlled only by the global 122 * _gmonparam.state, set the default value for each CPU to 123 * GMON_PROF_ON. If _gmonparam.state is not ON, mcount will 124 * not be executed. 125 * This is For compatibility of the kgmon(8) kmem interface. 126 */ 127 p->state = GMON_PROF_ON; 128 129 cp = (char *)(p + 1); 130 p->tos = (struct tostruct *)cp; 131 p->kcount = (u_short *)(cp + p->tossize); 132 p->froms = (u_short *)(cp + p->tossize + p->kcountsize); 133 } 134 135 sysctl_createv(NULL, 0, NULL, NULL, 136 0, CTLTYPE_NODE, "percpu", 137 SYSCTL_DESCR("per cpu profiling information"), 138 NULL, 0, NULL, 0, 139 CTL_KERN, KERN_PROF, GPROF_PERCPU, CTL_EOL); 140 141 for (CPU_INFO_FOREACH(cii, ci)) { 142 if (ci->ci_gmon == NULL) 143 continue; 144 145 sysctl_createv(NULL, 0, NULL, NULL, 146 0, CTLTYPE_NODE, cpu_name(ci), 147 NULL, 148 NULL, 0, NULL, 0, 149 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), CTL_EOL); 150 151 sysctl_createv(NULL, 0, NULL, NULL, 152 CTLFLAG_READWRITE, CTLTYPE_INT, "state", 153 SYSCTL_DESCR("Profiling state"), 154 sysctl_kern_profiling, 0, (void *)ci, 0, 155 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), 156 GPROF_STATE, CTL_EOL); 157 sysctl_createv(NULL, 0, NULL, NULL, 158 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "count", 159 SYSCTL_DESCR("Array of statistical program counters"), 160 sysctl_kern_profiling, 0, (void *)ci, 0, 161 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), 162 GPROF_COUNT, CTL_EOL); 163 sysctl_createv(NULL, 0, NULL, NULL, 164 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "froms", 165 SYSCTL_DESCR("Array indexed by program counter of " 166 "call-from points"), 167 sysctl_kern_profiling, 0, (void *)ci, 0, 168 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), 169 GPROF_FROMS, CTL_EOL); 170 sysctl_createv(NULL, 0, NULL, NULL, 171 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "tos", 172 SYSCTL_DESCR("Array of structures describing " 173 "destination of calls and their counts"), 174 sysctl_kern_profiling, 0, (void *)ci, 0, 175 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), 176 GPROF_TOS, CTL_EOL); 177 sysctl_createv(NULL, 0, NULL, NULL, 178 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "gmonparam", 179 SYSCTL_DESCR("Structure giving the sizes of the above " 180 "arrays"), 181 sysctl_kern_profiling, 0, (void *)ci, 0, 182 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), 183 GPROF_GMONPARAM, CTL_EOL); 184 } 185 186 /* 187 * For minimal compatibility of the kgmon(8) kmem interface, 188 * the _gmonparam and cpu0:ci_gmon share buffers. 189 */ 190 p = curcpu()->ci_gmon; 191 if (p != NULL) { 192 _gmonparam.tos = p->tos; 193 _gmonparam.kcount = p->kcount; 194 _gmonparam.froms = p->froms; 195 } 196 #else /* MULTIPROCESSOR */ 197 cp = malloc(size, M_GPROF, M_NOWAIT | M_ZERO); 198 if (cp == 0) { 199 printf("No memory for profiling.\n"); 200 return; 201 } 202 p->tos = (struct tostruct *)cp; 203 cp += p->tossize; 204 p->kcount = (u_short *)cp; 205 cp += p->kcountsize; 206 p->froms = (u_short *)cp; 207 #endif /* MULTIPROCESSOR */ 208 } 209 210 #ifdef MULTIPROCESSOR 211 static void 212 prof_set_state_xc(void *arg1, void *arg2 __unused) 213 { 214 int state = PTRTOUINT64(arg1); 215 struct gmonparam *gp = curcpu()->ci_gmon; 216 217 if (gp != NULL) 218 gp->state = state; 219 } 220 #endif /* MULTIPROCESSOR */ 221 222 /* 223 * Return kernel profiling information. 224 */ 225 /* 226 * sysctl helper routine for kern.profiling subtree. enables/disables 227 * kernel profiling and gives out copies of the profiling data. 228 */ 229 static int 230 sysctl_kern_profiling(SYSCTLFN_ARGS) 231 { 232 struct sysctlnode node = *rnode; 233 struct gmonparam *gp; 234 int error; 235 #ifdef MULTIPROCESSOR 236 CPU_INFO_ITERATOR cii; 237 struct cpu_info *ci, *target_ci; 238 uint64_t where; 239 int state; 240 bool prof_on, do_merge; 241 242 target_ci = (struct cpu_info *)rnode->sysctl_data; 243 do_merge = (oldp != NULL) && (target_ci == NULL) && 244 ((node.sysctl_num == GPROF_COUNT) || 245 (node.sysctl_num == GPROF_FROMS) || 246 (node.sysctl_num == GPROF_TOS)); 247 248 if (do_merge) { 249 /* kern.profiling.{count,froms,tos} */ 250 unsigned long size; 251 char *cp; 252 253 /* allocate temporary gmonparam, and merge results of all CPU */ 254 size = _gmonparam.kcountsize + _gmonparam.fromssize + 255 _gmonparam.tossize; 256 gp = malloc(sizeof(struct gmonparam) + size, M_GPROF, 257 M_NOWAIT | M_ZERO); 258 if (gp == NULL) 259 return SET_ERROR(ENOMEM); 260 memcpy(gp, &_gmonparam, sizeof(_gmonparam)); 261 cp = (char *)(gp + 1); 262 gp->tos = (struct tostruct *)cp; 263 gp->kcount = (u_short *)(cp + gp->tossize); 264 gp->froms = (u_short *)(cp + gp->tossize + gp->kcountsize); 265 266 for (CPU_INFO_FOREACH(cii, ci)) { 267 if (ci->ci_gmon == NULL) 268 continue; 269 _gmonparam_merge(gp, ci->ci_gmon); 270 } 271 } else if (target_ci != NULL) { 272 /* kern.profiling.percpu.* */ 273 gp = target_ci->ci_gmon; 274 } else { 275 /* kern.profiling.{state,gmonparam} */ 276 gp = &_gmonparam; 277 } 278 #else /* MULTIPROCESSOR */ 279 gp = &_gmonparam; 280 #endif 281 282 switch (node.sysctl_num) { 283 case GPROF_STATE: 284 #ifdef MULTIPROCESSOR 285 /* 286 * if _gmonparam.state is OFF, the state of each CPU is 287 * considered to be OFF, even if it is actually ON. 288 */ 289 if (_gmonparam.state == GMON_PROF_OFF || 290 gp->state == GMON_PROF_OFF) 291 state = GMON_PROF_OFF; 292 else 293 state = GMON_PROF_ON; 294 node.sysctl_data = &state; 295 #else 296 node.sysctl_data = &gp->state; 297 #endif 298 break; 299 case GPROF_COUNT: 300 node.sysctl_data = gp->kcount; 301 node.sysctl_size = gp->kcountsize; 302 break; 303 case GPROF_FROMS: 304 node.sysctl_data = gp->froms; 305 node.sysctl_size = gp->fromssize; 306 break; 307 case GPROF_TOS: 308 node.sysctl_data = gp->tos; 309 node.sysctl_size = gp->tossize; 310 break; 311 case GPROF_GMONPARAM: 312 node.sysctl_data = gp; 313 node.sysctl_size = sizeof(*gp); 314 break; 315 default: 316 return SET_ERROR(EOPNOTSUPP); 317 } 318 319 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 320 if (error || newp == NULL) 321 goto done; 322 323 #ifdef MULTIPROCESSOR 324 switch (node.sysctl_num) { 325 case GPROF_STATE: 326 if (target_ci != NULL) { 327 where = xc_unicast(0, prof_set_state_xc, 328 UINT64TOPTR(state), NULL, target_ci); 329 xc_wait(where); 330 331 /* if even one CPU being profiled, enable perfclock. */ 332 prof_on = false; 333 for (CPU_INFO_FOREACH(cii, ci)) { 334 if (ci->ci_gmon == NULL) 335 continue; 336 if (ci->ci_gmon->state != GMON_PROF_OFF) { 337 prof_on = true; 338 break; 339 } 340 } 341 mutex_spin_enter(&proc0.p_stmutex); 342 if (prof_on) 343 startprofclock(&proc0); 344 else 345 stopprofclock(&proc0); 346 mutex_spin_exit(&proc0.p_stmutex); 347 348 if (prof_on) { 349 _gmonparam.state = GMON_PROF_ON; 350 } else { 351 _gmonparam.state = GMON_PROF_OFF; 352 /* 353 * when _gmonparam.state and all CPU gmon state 354 * are OFF, all CPU states should be ON so that 355 * the entire CPUs profiling can be controlled 356 * by _gmonparam.state only. 357 */ 358 for (CPU_INFO_FOREACH(cii, ci)) { 359 if (ci->ci_gmon == NULL) 360 continue; 361 ci->ci_gmon->state = GMON_PROF_ON; 362 } 363 } 364 } else { 365 _gmonparam.state = state; 366 where = xc_broadcast(0, prof_set_state_xc, 367 UINT64TOPTR(state), NULL); 368 xc_wait(where); 369 370 mutex_spin_enter(&proc0.p_stmutex); 371 if (state == GMON_PROF_OFF) 372 stopprofclock(&proc0); 373 else 374 startprofclock(&proc0); 375 mutex_spin_exit(&proc0.p_stmutex); 376 } 377 break; 378 case GPROF_COUNT: 379 /* 380 * if 'kern.profiling.{count,froms,tos}' is written, the same 381 * data will be written to 'kern.profiling.percpu.cpuN.xxx' 382 */ 383 if (target_ci == NULL) { 384 for (CPU_INFO_FOREACH(cii, ci)) { 385 if (ci->ci_gmon == NULL) 386 continue; 387 memmove(ci->ci_gmon->kcount, gp->kcount, 388 newlen); 389 } 390 } 391 break; 392 case GPROF_FROMS: 393 if (target_ci == NULL) { 394 for (CPU_INFO_FOREACH(cii, ci)) { 395 if (ci->ci_gmon == NULL) 396 continue; 397 memmove(ci->ci_gmon->froms, gp->froms, newlen); 398 } 399 } 400 break; 401 case GPROF_TOS: 402 if (target_ci == NULL) { 403 for (CPU_INFO_FOREACH(cii, ci)) { 404 if (ci->ci_gmon == NULL) 405 continue; 406 memmove(ci->ci_gmon->tos, gp->tos, newlen); 407 } 408 } 409 break; 410 } 411 #else 412 if (node.sysctl_num == GPROF_STATE) { 413 mutex_spin_enter(&proc0.p_stmutex); 414 if (gp->state == GMON_PROF_OFF) 415 stopprofclock(&proc0); 416 else 417 startprofclock(&proc0); 418 mutex_spin_exit(&proc0.p_stmutex); 419 } 420 #endif 421 422 done: 423 #ifdef MULTIPROCESSOR 424 if (do_merge) 425 free(gp, M_GPROF); 426 #endif 427 return error; 428 } 429 430 SYSCTL_SETUP(sysctl_kern_gprof_setup, "sysctl kern.profiling subtree setup") 431 { 432 433 sysctl_createv(clog, 0, NULL, NULL, 434 CTLFLAG_PERMANENT, 435 CTLTYPE_NODE, "profiling", 436 SYSCTL_DESCR("Profiling information (available)"), 437 NULL, 0, NULL, 0, 438 CTL_KERN, KERN_PROF, CTL_EOL); 439 440 sysctl_createv(clog, 0, NULL, NULL, 441 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 442 CTLTYPE_INT, "state", 443 SYSCTL_DESCR("Profiling state"), 444 sysctl_kern_profiling, 0, NULL, 0, 445 CTL_KERN, KERN_PROF, GPROF_STATE, CTL_EOL); 446 sysctl_createv(clog, 0, NULL, NULL, 447 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 448 CTLTYPE_STRUCT, "count", 449 SYSCTL_DESCR("Array of statistical program counters"), 450 sysctl_kern_profiling, 0, NULL, 0, 451 CTL_KERN, KERN_PROF, GPROF_COUNT, CTL_EOL); 452 sysctl_createv(clog, 0, NULL, NULL, 453 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 454 CTLTYPE_STRUCT, "froms", 455 SYSCTL_DESCR("Array indexed by program counter of " 456 "call-from points"), 457 sysctl_kern_profiling, 0, NULL, 0, 458 CTL_KERN, KERN_PROF, GPROF_FROMS, CTL_EOL); 459 sysctl_createv(clog, 0, NULL, NULL, 460 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 461 CTLTYPE_STRUCT, "tos", 462 SYSCTL_DESCR("Array of structures describing " 463 "destination of calls and their counts"), 464 sysctl_kern_profiling, 0, NULL, 0, 465 CTL_KERN, KERN_PROF, GPROF_TOS, CTL_EOL); 466 sysctl_createv(clog, 0, NULL, NULL, 467 CTLFLAG_PERMANENT, 468 CTLTYPE_STRUCT, "gmonparam", 469 SYSCTL_DESCR("Structure giving the sizes of the above " 470 "arrays"), 471 sysctl_kern_profiling, 0, NULL, 0, 472 CTL_KERN, KERN_PROF, GPROF_GMONPARAM, CTL_EOL); 473 } 474 #endif /* GPROF */ 475 476 /* 477 * Profiling system call. 478 * 479 * The scale factor is a fixed point number with 16 bits of fraction, so that 480 * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling. 481 */ 482 /* ARGSUSED */ 483 int 484 sys_profil(struct lwp *l, const struct sys_profil_args *uap, register_t *retval) 485 { 486 /* { 487 syscallarg(char *) samples; 488 syscallarg(size_t) size; 489 syscallarg(u_long) offset; 490 syscallarg(u_int) scale; 491 } */ 492 struct proc *p = l->l_proc; 493 struct uprof *upp; 494 495 if (SCARG(uap, scale) > (1 << 16)) 496 return SET_ERROR(EINVAL); 497 if (SCARG(uap, scale) == 0) { 498 mutex_spin_enter(&p->p_stmutex); 499 stopprofclock(p); 500 mutex_spin_exit(&p->p_stmutex); 501 return 0; 502 } 503 upp = &p->p_stats->p_prof; 504 505 /* Block profile interrupts while changing state. */ 506 mutex_spin_enter(&p->p_stmutex); 507 upp->pr_off = SCARG(uap, offset); 508 upp->pr_scale = SCARG(uap, scale); 509 upp->pr_base = SCARG(uap, samples); 510 upp->pr_size = SCARG(uap, size); 511 startprofclock(p); 512 mutex_spin_exit(&p->p_stmutex); 513 514 return 0; 515 } 516 517 /* 518 * Scale is a fixed-point number with the binary point 16 bits 519 * into the value, and is <= 1.0. pc is at most 32 bits, so the 520 * intermediate result is at most 48 bits. 521 */ 522 #define PC_TO_INDEX(pc, prof) \ 523 ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ 524 (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) 525 526 /* 527 * Collect user-level profiling statistics; called on a profiling tick, 528 * when a process is running in user-mode. This routine may be called 529 * from an interrupt context. We schedule an AST that will vector us 530 * to trap() with a context in which copyin and copyout will work. 531 * Trap will then call addupc_task(). 532 * 533 * XXX We could use ufetch/ustore here if the profile buffers were 534 * wired. 535 * 536 * Note that we may (rarely) not get around to the AST soon enough, and 537 * lose profile ticks when the next tick overwrites this one, but in this 538 * case the system is overloaded and the profile is probably already 539 * inaccurate. 540 */ 541 void 542 addupc_intr(struct lwp *l, u_long pc) 543 { 544 struct uprof *prof; 545 struct proc *p; 546 u_int i; 547 548 p = l->l_proc; 549 550 KASSERT(mutex_owned(&p->p_stmutex)); 551 552 prof = &p->p_stats->p_prof; 553 if (pc < prof->pr_off || 554 (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) 555 return; /* out of range; ignore */ 556 557 mutex_spin_exit(&p->p_stmutex); 558 559 /* XXXSMP */ 560 prof->pr_addr = pc; 561 prof->pr_ticks++; 562 cpu_need_proftick(l); 563 564 mutex_spin_enter(&p->p_stmutex); 565 } 566 567 /* 568 * Much like before, but we can afford to take faults here. If the 569 * update fails, we simply turn off profiling. 570 */ 571 void 572 addupc_task(struct lwp *l, u_long pc, u_int ticks) 573 { 574 struct uprof *prof; 575 struct proc *p; 576 void *addr; 577 int error; 578 u_int i; 579 u_short v; 580 581 p = l->l_proc; 582 583 if (ticks == 0) 584 return; 585 586 mutex_spin_enter(&p->p_stmutex); 587 prof = &p->p_stats->p_prof; 588 589 /* Testing P_PROFIL may be unnecessary, but is certainly safe. */ 590 if ((p->p_stflag & PST_PROFIL) == 0 || pc < prof->pr_off || 591 (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) { 592 mutex_spin_exit(&p->p_stmutex); 593 return; 594 } 595 596 addr = prof->pr_base + i; 597 mutex_spin_exit(&p->p_stmutex); 598 if ((error = copyin(addr, (void *)&v, sizeof(v))) == 0) { 599 v += ticks; 600 error = copyout((void *)&v, addr, sizeof(v)); 601 } 602 if (error != 0) { 603 mutex_spin_enter(&p->p_stmutex); 604 stopprofclock(p); 605 mutex_spin_exit(&p->p_stmutex); 606 } 607 } 608