kern_resource.c revision 1.141.2.1 1 /* $NetBSD: kern_resource.c,v 1.141.2.1 2008/05/10 23:49:04 wrstuden Exp $ */
2
3 /*-
4 * Copyright (c) 1982, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)kern_resource.c 8.8 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.141.2.1 2008/05/10 23:49:04 wrstuden Exp $");
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/file.h>
46 #include <sys/resourcevar.h>
47 #include <sys/malloc.h>
48 #include <sys/kmem.h>
49 #include <sys/namei.h>
50 #include <sys/pool.h>
51 #include <sys/proc.h>
52 #include <sys/sysctl.h>
53 #include <sys/timevar.h>
54 #include <sys/kauth.h>
55 #include <sys/atomic.h>
56 #include <sys/mount.h>
57 #include <sys/sa.h>
58 #include <sys/syscallargs.h>
59 #include <sys/atomic.h>
60
61 #include <uvm/uvm_extern.h>
62
63 /*
64 * Maximum process data and stack limits.
65 * They are variables so they are patchable.
66 */
67 rlim_t maxdmap = MAXDSIZ;
68 rlim_t maxsmap = MAXSSIZ;
69
70 static SLIST_HEAD(uihashhead, uidinfo) *uihashtbl;
71 static u_long uihash;
72
73 #define UIHASH(uid) (&uihashtbl[(uid) & uihash])
74
75 static pool_cache_t plimit_cache;
76 static pool_cache_t pstats_cache;
77
78 void
79 resource_init(void)
80 {
81 /*
82 * In case of MP system, SLIST_FOREACH would force a cache line
83 * write-back for every modified 'uidinfo', thus we try to keep the
84 * lists short.
85 */
86 const u_int uihash_sz = (maxproc > 1 ? 1024 : 64);
87
88 plimit_cache = pool_cache_init(sizeof(struct plimit), 0, 0, 0,
89 "plimitpl", NULL, IPL_NONE, NULL, NULL, NULL);
90 pstats_cache = pool_cache_init(sizeof(struct pstats), 0, 0, 0,
91 "pstatspl", NULL, IPL_NONE, NULL, NULL, NULL);
92 uihashtbl = hashinit(uihash_sz, HASH_SLIST, true, &uihash);
93 }
94
95 /*
96 * Resource controls and accounting.
97 */
98
99 int
100 sys_getpriority(struct lwp *l, const struct sys_getpriority_args *uap,
101 register_t *retval)
102 {
103 /* {
104 syscallarg(int) which;
105 syscallarg(id_t) who;
106 } */
107 struct proc *curp = l->l_proc, *p;
108 int low = NZERO + PRIO_MAX + 1;
109 int who = SCARG(uap, who);
110
111 mutex_enter(proc_lock);
112 switch (SCARG(uap, which)) {
113 case PRIO_PROCESS:
114 if (who == 0)
115 p = curp;
116 else
117 p = p_find(who, PFIND_LOCKED);
118 if (p != NULL)
119 low = p->p_nice;
120 break;
121
122 case PRIO_PGRP: {
123 struct pgrp *pg;
124
125 if (who == 0)
126 pg = curp->p_pgrp;
127 else if ((pg = pg_find(who, PFIND_LOCKED)) == NULL)
128 break;
129 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
130 if (p->p_nice < low)
131 low = p->p_nice;
132 }
133 break;
134 }
135
136 case PRIO_USER:
137 if (who == 0)
138 who = (int)kauth_cred_geteuid(l->l_cred);
139 PROCLIST_FOREACH(p, &allproc) {
140 if ((p->p_flag & PK_MARKER) != 0)
141 continue;
142 mutex_enter(p->p_lock);
143 if (kauth_cred_geteuid(p->p_cred) ==
144 (uid_t)who && p->p_nice < low)
145 low = p->p_nice;
146 mutex_exit(p->p_lock);
147 }
148 break;
149
150 default:
151 mutex_exit(proc_lock);
152 return (EINVAL);
153 }
154 mutex_exit(proc_lock);
155
156 if (low == NZERO + PRIO_MAX + 1)
157 return (ESRCH);
158 *retval = low - NZERO;
159 return (0);
160 }
161
162 /* ARGSUSED */
163 int
164 sys_setpriority(struct lwp *l, const struct sys_setpriority_args *uap,
165 register_t *retval)
166 {
167 /* {
168 syscallarg(int) which;
169 syscallarg(id_t) who;
170 syscallarg(int) prio;
171 } */
172 struct proc *curp = l->l_proc, *p;
173 int found = 0, error = 0;
174 int who = SCARG(uap, who);
175
176 mutex_enter(proc_lock);
177 switch (SCARG(uap, which)) {
178 case PRIO_PROCESS:
179 if (who == 0)
180 p = curp;
181 else
182 p = p_find(who, PFIND_LOCKED);
183 if (p != 0) {
184 mutex_enter(p->p_lock);
185 error = donice(l, p, SCARG(uap, prio));
186 mutex_exit(p->p_lock);
187 }
188 found++;
189 break;
190
191 case PRIO_PGRP: {
192 struct pgrp *pg;
193
194 if (who == 0)
195 pg = curp->p_pgrp;
196 else if ((pg = pg_find(who, PFIND_LOCKED)) == NULL)
197 break;
198 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
199 mutex_enter(p->p_lock);
200 error = donice(l, p, SCARG(uap, prio));
201 mutex_exit(p->p_lock);
202 found++;
203 }
204 break;
205 }
206
207 case PRIO_USER:
208 if (who == 0)
209 who = (int)kauth_cred_geteuid(l->l_cred);
210 PROCLIST_FOREACH(p, &allproc) {
211 if ((p->p_flag & PK_MARKER) != 0)
212 continue;
213 mutex_enter(p->p_lock);
214 if (kauth_cred_geteuid(p->p_cred) ==
215 (uid_t)SCARG(uap, who)) {
216 error = donice(l, p, SCARG(uap, prio));
217 found++;
218 }
219 mutex_exit(p->p_lock);
220 }
221 break;
222
223 default:
224 error = EINVAL;
225 break;
226 }
227 mutex_exit(proc_lock);
228 if (found == 0)
229 return (ESRCH);
230 return (error);
231 }
232
233 /*
234 * Renice a process.
235 *
236 * Call with the target process' credentials locked.
237 */
238 int
239 donice(struct lwp *l, struct proc *chgp, int n)
240 {
241 kauth_cred_t cred = l->l_cred;
242
243 KASSERT(mutex_owned(chgp->p_lock));
244
245 if (n > PRIO_MAX)
246 n = PRIO_MAX;
247 if (n < PRIO_MIN)
248 n = PRIO_MIN;
249 n += NZERO;
250 if (kauth_authorize_process(cred, KAUTH_PROCESS_NICE, chgp,
251 KAUTH_ARG(n), NULL, NULL))
252 return (EACCES);
253 sched_nice(chgp, n);
254 return (0);
255 }
256
257 /* ARGSUSED */
258 int
259 sys_setrlimit(struct lwp *l, const struct sys_setrlimit_args *uap,
260 register_t *retval)
261 {
262 /* {
263 syscallarg(int) which;
264 syscallarg(const struct rlimit *) rlp;
265 } */
266 int which = SCARG(uap, which);
267 struct rlimit alim;
268 int error;
269
270 error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit));
271 if (error)
272 return (error);
273 return (dosetrlimit(l, l->l_proc, which, &alim));
274 }
275
276 int
277 dosetrlimit(struct lwp *l, struct proc *p, int which, struct rlimit *limp)
278 {
279 struct rlimit *alimp;
280 int error;
281
282 if ((u_int)which >= RLIM_NLIMITS)
283 return (EINVAL);
284
285 if (limp->rlim_cur < 0 || limp->rlim_max < 0)
286 return (EINVAL);
287
288 if (limp->rlim_cur > limp->rlim_max) {
289 /*
290 * This is programming error. According to SUSv2, we should
291 * return error in this case.
292 */
293 return (EINVAL);
294 }
295
296 alimp = &p->p_rlimit[which];
297 /* if we don't change the value, no need to limcopy() */
298 if (limp->rlim_cur == alimp->rlim_cur &&
299 limp->rlim_max == alimp->rlim_max)
300 return 0;
301
302 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
303 p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_SET), limp, KAUTH_ARG(which));
304 if (error)
305 return (error);
306
307 lim_privatise(p, false);
308 /* p->p_limit is now unchangeable */
309 alimp = &p->p_rlimit[which];
310
311 switch (which) {
312
313 case RLIMIT_DATA:
314 if (limp->rlim_cur > maxdmap)
315 limp->rlim_cur = maxdmap;
316 if (limp->rlim_max > maxdmap)
317 limp->rlim_max = maxdmap;
318 break;
319
320 case RLIMIT_STACK:
321 if (limp->rlim_cur > maxsmap)
322 limp->rlim_cur = maxsmap;
323 if (limp->rlim_max > maxsmap)
324 limp->rlim_max = maxsmap;
325
326 /*
327 * Return EINVAL if the new stack size limit is lower than
328 * current usage. Otherwise, the process would get SIGSEGV the
329 * moment it would try to access anything on it's current stack.
330 * This conforms to SUSv2.
331 */
332 if (limp->rlim_cur < p->p_vmspace->vm_ssize * PAGE_SIZE
333 || limp->rlim_max < p->p_vmspace->vm_ssize * PAGE_SIZE) {
334 return (EINVAL);
335 }
336
337 /*
338 * Stack is allocated to the max at exec time with
339 * only "rlim_cur" bytes accessible (In other words,
340 * allocates stack dividing two contiguous regions at
341 * "rlim_cur" bytes boundary).
342 *
343 * Since allocation is done in terms of page, roundup
344 * "rlim_cur" (otherwise, contiguous regions
345 * overlap). If stack limit is going up make more
346 * accessible, if going down make inaccessible.
347 */
348 limp->rlim_cur = round_page(limp->rlim_cur);
349 if (limp->rlim_cur != alimp->rlim_cur) {
350 vaddr_t addr;
351 vsize_t size;
352 vm_prot_t prot;
353
354 if (limp->rlim_cur > alimp->rlim_cur) {
355 prot = VM_PROT_READ | VM_PROT_WRITE;
356 size = limp->rlim_cur - alimp->rlim_cur;
357 addr = (vaddr_t)p->p_vmspace->vm_minsaddr -
358 limp->rlim_cur;
359 } else {
360 prot = VM_PROT_NONE;
361 size = alimp->rlim_cur - limp->rlim_cur;
362 addr = (vaddr_t)p->p_vmspace->vm_minsaddr -
363 alimp->rlim_cur;
364 }
365 (void) uvm_map_protect(&p->p_vmspace->vm_map,
366 addr, addr+size, prot, false);
367 }
368 break;
369
370 case RLIMIT_NOFILE:
371 if (limp->rlim_cur > maxfiles)
372 limp->rlim_cur = maxfiles;
373 if (limp->rlim_max > maxfiles)
374 limp->rlim_max = maxfiles;
375 break;
376
377 case RLIMIT_NPROC:
378 if (limp->rlim_cur > maxproc)
379 limp->rlim_cur = maxproc;
380 if (limp->rlim_max > maxproc)
381 limp->rlim_max = maxproc;
382 break;
383 }
384
385 mutex_enter(&p->p_limit->pl_lock);
386 *alimp = *limp;
387 mutex_exit(&p->p_limit->pl_lock);
388 return (0);
389 }
390
391 /* ARGSUSED */
392 int
393 sys_getrlimit(struct lwp *l, const struct sys_getrlimit_args *uap,
394 register_t *retval)
395 {
396 /* {
397 syscallarg(int) which;
398 syscallarg(struct rlimit *) rlp;
399 } */
400 struct proc *p = l->l_proc;
401 int which = SCARG(uap, which);
402 struct rlimit rl;
403
404 if ((u_int)which >= RLIM_NLIMITS)
405 return (EINVAL);
406
407 mutex_enter(p->p_lock);
408 memcpy(&rl, &p->p_rlimit[which], sizeof(rl));
409 mutex_exit(p->p_lock);
410
411 return copyout(&rl, SCARG(uap, rlp), sizeof(rl));
412 }
413
414 /*
415 * Transform the running time and tick information in proc p into user,
416 * system, and interrupt time usage.
417 *
418 * Should be called with p->p_lock held unless called from exit1().
419 */
420 void
421 calcru(struct proc *p, struct timeval *up, struct timeval *sp,
422 struct timeval *ip, struct timeval *rp)
423 {
424 uint64_t u, st, ut, it, tot;
425 struct lwp *l;
426 struct bintime tm;
427 struct timeval tv;
428
429 mutex_spin_enter(&p->p_stmutex);
430 st = p->p_sticks;
431 ut = p->p_uticks;
432 it = p->p_iticks;
433 mutex_spin_exit(&p->p_stmutex);
434
435 tm = p->p_rtime;
436
437 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
438 lwp_lock(l);
439 bintime_add(&tm, &l->l_rtime);
440 if ((l->l_flag & LW_RUNNING) != 0) {
441 struct bintime diff;
442 /*
443 * Adjust for the current time slice. This is
444 * actually fairly important since the error
445 * here is on the order of a time quantum,
446 * which is much greater than the sampling
447 * error.
448 */
449 binuptime(&diff);
450 bintime_sub(&diff, &l->l_stime);
451 bintime_add(&tm, &diff);
452 }
453 lwp_unlock(l);
454 }
455
456 tot = st + ut + it;
457 bintime2timeval(&tm, &tv);
458 u = (uint64_t)tv.tv_sec * 1000000ul + tv.tv_usec;
459
460 if (tot == 0) {
461 /* No ticks, so can't use to share time out, split 50-50 */
462 st = ut = u / 2;
463 } else {
464 st = (u * st) / tot;
465 ut = (u * ut) / tot;
466 }
467 if (sp != NULL) {
468 sp->tv_sec = st / 1000000;
469 sp->tv_usec = st % 1000000;
470 }
471 if (up != NULL) {
472 up->tv_sec = ut / 1000000;
473 up->tv_usec = ut % 1000000;
474 }
475 if (ip != NULL) {
476 if (it != 0)
477 it = (u * it) / tot;
478 ip->tv_sec = it / 1000000;
479 ip->tv_usec = it % 1000000;
480 }
481 if (rp != NULL) {
482 *rp = tv;
483 }
484 }
485
486 /* ARGSUSED */
487 int
488 sys_getrusage(struct lwp *l, const struct sys_getrusage_args *uap,
489 register_t *retval)
490 {
491 /* {
492 syscallarg(int) who;
493 syscallarg(struct rusage *) rusage;
494 } */
495 struct rusage ru;
496 struct proc *p = l->l_proc;
497
498 switch (SCARG(uap, who)) {
499 case RUSAGE_SELF:
500 mutex_enter(p->p_lock);
501 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
502 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL, NULL);
503 rulwps(p, &ru);
504 mutex_exit(p->p_lock);
505 break;
506
507 case RUSAGE_CHILDREN:
508 mutex_enter(p->p_lock);
509 memcpy(&ru, &p->p_stats->p_cru, sizeof(ru));
510 mutex_exit(p->p_lock);
511 break;
512
513 default:
514 return EINVAL;
515 }
516
517 return copyout(&ru, SCARG(uap, rusage), sizeof(ru));
518 }
519
520 void
521 ruadd(struct rusage *ru, struct rusage *ru2)
522 {
523 long *ip, *ip2;
524 int i;
525
526 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
527 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
528 if (ru->ru_maxrss < ru2->ru_maxrss)
529 ru->ru_maxrss = ru2->ru_maxrss;
530 ip = &ru->ru_first; ip2 = &ru2->ru_first;
531 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
532 *ip++ += *ip2++;
533 }
534
535 void
536 rulwps(proc_t *p, struct rusage *ru)
537 {
538 lwp_t *l;
539
540 KASSERT(mutex_owned(p->p_lock));
541
542 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
543 ruadd(ru, &l->l_ru);
544 ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw);
545 ru->ru_nivcsw += l->l_nivcsw;
546 }
547 }
548
549 /*
550 * Make a copy of the plimit structure.
551 * We share these structures copy-on-write after fork,
552 * and copy when a limit is changed.
553 *
554 * Unfortunately (due to PL_SHAREMOD) it is possibly for the structure
555 * we are copying to change beneath our feet!
556 */
557 struct plimit *
558 lim_copy(struct plimit *lim)
559 {
560 struct plimit *newlim;
561 char *corename;
562 size_t alen, len;
563
564 newlim = pool_cache_get(plimit_cache, PR_WAITOK);
565 mutex_init(&newlim->pl_lock, MUTEX_DEFAULT, IPL_NONE);
566 newlim->pl_flags = 0;
567 newlim->pl_refcnt = 1;
568 newlim->pl_sv_limit = NULL;
569
570 mutex_enter(&lim->pl_lock);
571 memcpy(newlim->pl_rlimit, lim->pl_rlimit,
572 sizeof(struct rlimit) * RLIM_NLIMITS);
573
574 alen = 0;
575 corename = NULL;
576 for (;;) {
577 if (lim->pl_corename == defcorename) {
578 newlim->pl_corename = defcorename;
579 break;
580 }
581 len = strlen(lim->pl_corename) + 1;
582 if (len <= alen) {
583 newlim->pl_corename = corename;
584 memcpy(corename, lim->pl_corename, len);
585 corename = NULL;
586 break;
587 }
588 mutex_exit(&lim->pl_lock);
589 if (corename != NULL)
590 free(corename, M_TEMP);
591 alen = len;
592 corename = malloc(alen, M_TEMP, M_WAITOK);
593 mutex_enter(&lim->pl_lock);
594 }
595 mutex_exit(&lim->pl_lock);
596 if (corename != NULL)
597 free(corename, M_TEMP);
598 return newlim;
599 }
600
601 void
602 lim_addref(struct plimit *lim)
603 {
604 atomic_inc_uint(&lim->pl_refcnt);
605 }
606
607 /*
608 * Give a process it's own private plimit structure.
609 * This will only be shared (in fork) if modifications are to be shared.
610 */
611 void
612 lim_privatise(struct proc *p, bool set_shared)
613 {
614 struct plimit *lim, *newlim;
615
616 lim = p->p_limit;
617 if (lim->pl_flags & PL_WRITEABLE) {
618 if (set_shared)
619 lim->pl_flags |= PL_SHAREMOD;
620 return;
621 }
622
623 if (set_shared && lim->pl_flags & PL_SHAREMOD)
624 return;
625
626 newlim = lim_copy(lim);
627
628 mutex_enter(p->p_lock);
629 if (p->p_limit->pl_flags & PL_WRITEABLE) {
630 /* Someone crept in while we were busy */
631 mutex_exit(p->p_lock);
632 limfree(newlim);
633 if (set_shared)
634 p->p_limit->pl_flags |= PL_SHAREMOD;
635 return;
636 }
637
638 /*
639 * Since most accesses to p->p_limit aren't locked, we must not
640 * delete the old limit structure yet.
641 */
642 newlim->pl_sv_limit = p->p_limit;
643 newlim->pl_flags |= PL_WRITEABLE;
644 if (set_shared)
645 newlim->pl_flags |= PL_SHAREMOD;
646 p->p_limit = newlim;
647 mutex_exit(p->p_lock);
648 }
649
650 void
651 limfree(struct plimit *lim)
652 {
653 struct plimit *sv_lim;
654
655 do {
656 if (atomic_dec_uint_nv(&lim->pl_refcnt) > 0)
657 return;
658 if (lim->pl_corename != defcorename)
659 free(lim->pl_corename, M_TEMP);
660 sv_lim = lim->pl_sv_limit;
661 mutex_destroy(&lim->pl_lock);
662 pool_cache_put(plimit_cache, lim);
663 } while ((lim = sv_lim) != NULL);
664 }
665
666 struct pstats *
667 pstatscopy(struct pstats *ps)
668 {
669
670 struct pstats *newps;
671
672 newps = pool_cache_get(pstats_cache, PR_WAITOK);
673
674 memset(&newps->pstat_startzero, 0,
675 (unsigned) ((char *)&newps->pstat_endzero -
676 (char *)&newps->pstat_startzero));
677 memcpy(&newps->pstat_startcopy, &ps->pstat_startcopy,
678 ((char *)&newps->pstat_endcopy -
679 (char *)&newps->pstat_startcopy));
680
681 return (newps);
682
683 }
684
685 void
686 pstatsfree(struct pstats *ps)
687 {
688
689 pool_cache_put(pstats_cache, ps);
690 }
691
692 /*
693 * sysctl interface in five parts
694 */
695
696 /*
697 * a routine for sysctl proc subtree helpers that need to pick a valid
698 * process by pid.
699 */
700 static int
701 sysctl_proc_findproc(struct lwp *l, struct proc **p2, pid_t pid)
702 {
703 struct proc *ptmp;
704 int error = 0;
705
706 if (pid == PROC_CURPROC)
707 ptmp = l->l_proc;
708 else if ((ptmp = pfind(pid)) == NULL)
709 error = ESRCH;
710
711 *p2 = ptmp;
712 return (error);
713 }
714
715 /*
716 * sysctl helper routine for setting a process's specific corefile
717 * name. picks the process based on the given pid and checks the
718 * correctness of the new value.
719 */
720 static int
721 sysctl_proc_corename(SYSCTLFN_ARGS)
722 {
723 struct proc *ptmp;
724 struct plimit *lim;
725 int error = 0, len;
726 char *cname;
727 char *ocore;
728 char *tmp;
729 struct sysctlnode node;
730
731 /*
732 * is this all correct?
733 */
734 if (namelen != 0)
735 return (EINVAL);
736 if (name[-1] != PROC_PID_CORENAME)
737 return (EINVAL);
738
739 /*
740 * whom are we tweaking?
741 */
742 error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-2]);
743 if (error)
744 return (error);
745
746 /* XXX-elad */
747 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
748 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
749 if (error)
750 return (error);
751
752 if (newp == NULL) {
753 error = kauth_authorize_process(l->l_cred,
754 KAUTH_PROCESS_CORENAME, ptmp,
755 KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_GET), NULL, NULL);
756 if (error)
757 return (error);
758 }
759
760 /*
761 * let them modify a temporary copy of the core name
762 */
763 cname = PNBUF_GET();
764 lim = ptmp->p_limit;
765 mutex_enter(&lim->pl_lock);
766 strlcpy(cname, lim->pl_corename, MAXPATHLEN);
767 mutex_exit(&lim->pl_lock);
768
769 node = *rnode;
770 node.sysctl_data = cname;
771 error = sysctl_lookup(SYSCTLFN_CALL(&node));
772
773 /*
774 * if that failed, or they have nothing new to say, or we've
775 * heard it before...
776 */
777 if (error || newp == NULL)
778 goto done;
779 lim = ptmp->p_limit;
780 mutex_enter(&lim->pl_lock);
781 error = strcmp(cname, lim->pl_corename);
782 mutex_exit(&lim->pl_lock);
783 if (error == 0)
784 /* Unchanged */
785 goto done;
786
787 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CORENAME,
788 ptmp, KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_SET), cname, NULL);
789 if (error)
790 return (error);
791
792 /*
793 * no error yet and cname now has the new core name in it.
794 * let's see if it looks acceptable. it must be either "core"
795 * or end in ".core" or "/core".
796 */
797 len = strlen(cname);
798 if (len < 4) {
799 error = EINVAL;
800 } else if (strcmp(cname + len - 4, "core") != 0) {
801 error = EINVAL;
802 } else if (len > 4 && cname[len - 5] != '/' && cname[len - 5] != '.') {
803 error = EINVAL;
804 }
805 if (error != 0) {
806 goto done;
807 }
808
809 /*
810 * hmm...looks good. now...where do we put it?
811 */
812 tmp = malloc(len + 1, M_TEMP, M_WAITOK|M_CANFAIL);
813 if (tmp == NULL) {
814 error = ENOMEM;
815 goto done;
816 }
817 memcpy(tmp, cname, len + 1);
818
819 lim_privatise(ptmp, false);
820 lim = ptmp->p_limit;
821 mutex_enter(&lim->pl_lock);
822 ocore = lim->pl_corename;
823 lim->pl_corename = tmp;
824 mutex_exit(&lim->pl_lock);
825 if (ocore != defcorename)
826 free(ocore, M_TEMP);
827
828 done:
829 PNBUF_PUT(cname);
830 return error;
831 }
832
833 /*
834 * sysctl helper routine for checking/setting a process's stop flags,
835 * one for fork and one for exec.
836 */
837 static int
838 sysctl_proc_stop(SYSCTLFN_ARGS)
839 {
840 struct proc *ptmp;
841 int i, f, error = 0;
842 struct sysctlnode node;
843
844 if (namelen != 0)
845 return (EINVAL);
846
847 error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-2]);
848 if (error)
849 return (error);
850
851 /* XXX-elad */
852 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
853 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
854 if (error)
855 return (error);
856
857 switch (rnode->sysctl_num) {
858 case PROC_PID_STOPFORK:
859 f = PS_STOPFORK;
860 break;
861 case PROC_PID_STOPEXEC:
862 f = PS_STOPEXEC;
863 break;
864 case PROC_PID_STOPEXIT:
865 f = PS_STOPEXIT;
866 break;
867 default:
868 return (EINVAL);
869 }
870
871 i = (ptmp->p_flag & f) ? 1 : 0;
872 node = *rnode;
873 node.sysctl_data = &i;
874 error = sysctl_lookup(SYSCTLFN_CALL(&node));
875 if (error || newp == NULL)
876 return (error);
877
878 mutex_enter(ptmp->p_lock);
879 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_STOPFLAG,
880 ptmp, KAUTH_ARG(f), NULL, NULL);
881 if (error)
882 return (error);
883 if (i)
884 ptmp->p_sflag |= f;
885 else
886 ptmp->p_sflag &= ~f;
887 mutex_exit(ptmp->p_lock);
888
889 return (0);
890 }
891
892 /*
893 * sysctl helper routine for a process's rlimits as exposed by sysctl.
894 */
895 static int
896 sysctl_proc_plimit(SYSCTLFN_ARGS)
897 {
898 struct proc *ptmp;
899 u_int limitno;
900 int which, error = 0;
901 struct rlimit alim;
902 struct sysctlnode node;
903
904 if (namelen != 0)
905 return (EINVAL);
906
907 which = name[-1];
908 if (which != PROC_PID_LIMIT_TYPE_SOFT &&
909 which != PROC_PID_LIMIT_TYPE_HARD)
910 return (EINVAL);
911
912 limitno = name[-2] - 1;
913 if (limitno >= RLIM_NLIMITS)
914 return (EINVAL);
915
916 if (name[-3] != PROC_PID_LIMIT)
917 return (EINVAL);
918
919 error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-4]);
920 if (error)
921 return (error);
922
923 /* XXX-elad */
924 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
925 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
926 if (error)
927 return (error);
928
929 /* Check if we can view limits. */
930 if (newp == NULL) {
931 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
932 ptmp, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_GET), &alim,
933 KAUTH_ARG(which));
934 if (error)
935 return (error);
936 }
937
938 node = *rnode;
939 memcpy(&alim, &ptmp->p_rlimit[limitno], sizeof(alim));
940 if (which == PROC_PID_LIMIT_TYPE_HARD)
941 node.sysctl_data = &alim.rlim_max;
942 else
943 node.sysctl_data = &alim.rlim_cur;
944
945 error = sysctl_lookup(SYSCTLFN_CALL(&node));
946 if (error || newp == NULL)
947 return (error);
948
949 return (dosetrlimit(l, ptmp, limitno, &alim));
950 }
951
952 /*
953 * and finally, the actually glue that sticks it to the tree
954 */
955 SYSCTL_SETUP(sysctl_proc_setup, "sysctl proc subtree setup")
956 {
957
958 sysctl_createv(clog, 0, NULL, NULL,
959 CTLFLAG_PERMANENT,
960 CTLTYPE_NODE, "proc", NULL,
961 NULL, 0, NULL, 0,
962 CTL_PROC, CTL_EOL);
963 sysctl_createv(clog, 0, NULL, NULL,
964 CTLFLAG_PERMANENT|CTLFLAG_ANYNUMBER,
965 CTLTYPE_NODE, "curproc",
966 SYSCTL_DESCR("Per-process settings"),
967 NULL, 0, NULL, 0,
968 CTL_PROC, PROC_CURPROC, CTL_EOL);
969
970 sysctl_createv(clog, 0, NULL, NULL,
971 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
972 CTLTYPE_STRING, "corename",
973 SYSCTL_DESCR("Core file name"),
974 sysctl_proc_corename, 0, NULL, MAXPATHLEN,
975 CTL_PROC, PROC_CURPROC, PROC_PID_CORENAME, CTL_EOL);
976 sysctl_createv(clog, 0, NULL, NULL,
977 CTLFLAG_PERMANENT,
978 CTLTYPE_NODE, "rlimit",
979 SYSCTL_DESCR("Process limits"),
980 NULL, 0, NULL, 0,
981 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, CTL_EOL);
982
983 #define create_proc_plimit(s, n) do { \
984 sysctl_createv(clog, 0, NULL, NULL, \
985 CTLFLAG_PERMANENT, \
986 CTLTYPE_NODE, s, \
987 SYSCTL_DESCR("Process " s " limits"), \
988 NULL, 0, NULL, 0, \
989 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \
990 CTL_EOL); \
991 sysctl_createv(clog, 0, NULL, NULL, \
992 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
993 CTLTYPE_QUAD, "soft", \
994 SYSCTL_DESCR("Process soft " s " limit"), \
995 sysctl_proc_plimit, 0, NULL, 0, \
996 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \
997 PROC_PID_LIMIT_TYPE_SOFT, CTL_EOL); \
998 sysctl_createv(clog, 0, NULL, NULL, \
999 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
1000 CTLTYPE_QUAD, "hard", \
1001 SYSCTL_DESCR("Process hard " s " limit"), \
1002 sysctl_proc_plimit, 0, NULL, 0, \
1003 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \
1004 PROC_PID_LIMIT_TYPE_HARD, CTL_EOL); \
1005 } while (0/*CONSTCOND*/)
1006
1007 create_proc_plimit("cputime", PROC_PID_LIMIT_CPU);
1008 create_proc_plimit("filesize", PROC_PID_LIMIT_FSIZE);
1009 create_proc_plimit("datasize", PROC_PID_LIMIT_DATA);
1010 create_proc_plimit("stacksize", PROC_PID_LIMIT_STACK);
1011 create_proc_plimit("coredumpsize", PROC_PID_LIMIT_CORE);
1012 create_proc_plimit("memoryuse", PROC_PID_LIMIT_RSS);
1013 create_proc_plimit("memorylocked", PROC_PID_LIMIT_MEMLOCK);
1014 create_proc_plimit("maxproc", PROC_PID_LIMIT_NPROC);
1015 create_proc_plimit("descriptors", PROC_PID_LIMIT_NOFILE);
1016 create_proc_plimit("sbsize", PROC_PID_LIMIT_SBSIZE);
1017
1018 #undef create_proc_plimit
1019
1020 sysctl_createv(clog, 0, NULL, NULL,
1021 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1022 CTLTYPE_INT, "stopfork",
1023 SYSCTL_DESCR("Stop process at fork(2)"),
1024 sysctl_proc_stop, 0, NULL, 0,
1025 CTL_PROC, PROC_CURPROC, PROC_PID_STOPFORK, CTL_EOL);
1026 sysctl_createv(clog, 0, NULL, NULL,
1027 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1028 CTLTYPE_INT, "stopexec",
1029 SYSCTL_DESCR("Stop process at execve(2)"),
1030 sysctl_proc_stop, 0, NULL, 0,
1031 CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXEC, CTL_EOL);
1032 sysctl_createv(clog, 0, NULL, NULL,
1033 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1034 CTLTYPE_INT, "stopexit",
1035 SYSCTL_DESCR("Stop process before completing exit"),
1036 sysctl_proc_stop, 0, NULL, 0,
1037 CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXIT, CTL_EOL);
1038 }
1039
1040 void
1041 uid_init(void)
1042 {
1043
1044 /*
1045 * Ensure that uid 0 is always in the user hash table, as
1046 * sbreserve() expects it available from interrupt context.
1047 */
1048 (void)uid_find(0);
1049 }
1050
1051 struct uidinfo *
1052 uid_find(uid_t uid)
1053 {
1054 struct uidinfo *uip, *uip_first, *newuip;
1055 struct uihashhead *uipp;
1056
1057 uipp = UIHASH(uid);
1058 newuip = NULL;
1059
1060 /*
1061 * To make insertion atomic, abstraction of SLIST will be violated.
1062 */
1063 uip_first = uipp->slh_first;
1064 again:
1065 SLIST_FOREACH(uip, uipp, ui_hash) {
1066 if (uip->ui_uid != uid)
1067 continue;
1068 if (newuip != NULL)
1069 kmem_free(newuip, sizeof(*newuip));
1070 return uip;
1071 }
1072 if (newuip == NULL)
1073 newuip = kmem_zalloc(sizeof(*newuip), KM_SLEEP);
1074 newuip->ui_uid = uid;
1075
1076 /*
1077 * If atomic insert is unsuccessful, another thread might be
1078 * allocated this 'uid', thus full re-check is needed.
1079 */
1080 newuip->ui_hash.sle_next = uip_first;
1081 membar_producer();
1082 uip = atomic_cas_ptr(&uipp->slh_first, uip_first, newuip);
1083 if (uip != uip_first) {
1084 uip_first = uip;
1085 goto again;
1086 }
1087
1088 return newuip;
1089 }
1090
1091 /*
1092 * Change the count associated with number of processes
1093 * a given user is using.
1094 */
1095 int
1096 chgproccnt(uid_t uid, int diff)
1097 {
1098 struct uidinfo *uip;
1099 long proccnt;
1100
1101 uip = uid_find(uid);
1102 proccnt = atomic_add_long_nv(&uip->ui_proccnt, diff);
1103 KASSERT(proccnt >= 0);
1104 return proccnt;
1105 }
1106
1107 int
1108 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t xmax)
1109 {
1110 rlim_t nsb;
1111 const long diff = to - *hiwat;
1112
1113 nsb = atomic_add_long_nv((long *)&uip->ui_sbsize, diff);
1114 if (diff > 0 && nsb > xmax) {
1115 atomic_add_long((long *)&uip->ui_sbsize, -diff);
1116 return 0;
1117 }
1118 *hiwat = to;
1119 KASSERT(nsb >= 0);
1120 return 1;
1121 }
1122