kern_resource.c revision 1.72 1 /* $NetBSD: kern_resource.c,v 1.72 2003/08/07 16:31:48 agc Exp $ */
2
3 /*-
4 * Copyright (c) 1982, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)kern_resource.c 8.8 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.72 2003/08/07 16:31:48 agc Exp $");
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/file.h>
46 #include <sys/resourcevar.h>
47 #include <sys/malloc.h>
48 #include <sys/pool.h>
49 #include <sys/proc.h>
50
51 #include <sys/mount.h>
52 #include <sys/sa.h>
53 #include <sys/syscallargs.h>
54
55 #include <uvm/uvm_extern.h>
56
57 /*
58 * Maximum process data and stack limits.
59 * They are variables so they are patchable.
60 *
61 * XXXX Do we really need them to be patchable?
62 */
63 rlim_t maxdmap = MAXDSIZ;
64 rlim_t maxsmap = MAXSSIZ;
65
66 /*
67 * Resource controls and accounting.
68 */
69
70 int
71 sys_getpriority(l, v, retval)
72 struct lwp *l;
73 void *v;
74 register_t *retval;
75 {
76 struct sys_getpriority_args /* {
77 syscallarg(int) which;
78 syscallarg(int) who;
79 } */ *uap = v;
80 struct proc *curp = l->l_proc, *p;
81 int low = NZERO + PRIO_MAX + 1;
82
83 switch (SCARG(uap, which)) {
84
85 case PRIO_PROCESS:
86 if (SCARG(uap, who) == 0)
87 p = curp;
88 else
89 p = pfind(SCARG(uap, who));
90 if (p == 0)
91 break;
92 low = p->p_nice;
93 break;
94
95 case PRIO_PGRP: {
96 struct pgrp *pg;
97
98 if (SCARG(uap, who) == 0)
99 pg = curp->p_pgrp;
100 else if ((pg = pgfind(SCARG(uap, who))) == NULL)
101 break;
102 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
103 if (p->p_nice < low)
104 low = p->p_nice;
105 }
106 break;
107 }
108
109 case PRIO_USER:
110 if (SCARG(uap, who) == 0)
111 SCARG(uap, who) = curp->p_ucred->cr_uid;
112 proclist_lock_read();
113 LIST_FOREACH(p, &allproc, p_list) {
114 if (p->p_ucred->cr_uid == (uid_t) SCARG(uap, who) &&
115 p->p_nice < low)
116 low = p->p_nice;
117 }
118 proclist_unlock_read();
119 break;
120
121 default:
122 return (EINVAL);
123 }
124 if (low == NZERO + PRIO_MAX + 1)
125 return (ESRCH);
126 *retval = low - NZERO;
127 return (0);
128 }
129
130 /* ARGSUSED */
131 int
132 sys_setpriority(l, v, retval)
133 struct lwp *l;
134 void *v;
135 register_t *retval;
136 {
137 struct sys_setpriority_args /* {
138 syscallarg(int) which;
139 syscallarg(int) who;
140 syscallarg(int) prio;
141 } */ *uap = v;
142 struct proc *curp = l->l_proc, *p;
143 int found = 0, error = 0;
144
145 switch (SCARG(uap, which)) {
146
147 case PRIO_PROCESS:
148 if (SCARG(uap, who) == 0)
149 p = curp;
150 else
151 p = pfind(SCARG(uap, who));
152 if (p == 0)
153 break;
154 error = donice(curp, p, SCARG(uap, prio));
155 found++;
156 break;
157
158 case PRIO_PGRP: {
159 struct pgrp *pg;
160
161 if (SCARG(uap, who) == 0)
162 pg = curp->p_pgrp;
163 else if ((pg = pgfind(SCARG(uap, who))) == NULL)
164 break;
165 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
166 error = donice(curp, p, SCARG(uap, prio));
167 found++;
168 }
169 break;
170 }
171
172 case PRIO_USER:
173 if (SCARG(uap, who) == 0)
174 SCARG(uap, who) = curp->p_ucred->cr_uid;
175 proclist_lock_read();
176 LIST_FOREACH(p, &allproc, p_list) {
177 if (p->p_ucred->cr_uid == (uid_t) SCARG(uap, who)) {
178 error = donice(curp, p, SCARG(uap, prio));
179 found++;
180 }
181 }
182 proclist_unlock_read();
183 break;
184
185 default:
186 return (EINVAL);
187 }
188 if (found == 0)
189 return (ESRCH);
190 return (error);
191 }
192
193 int
194 donice(curp, chgp, n)
195 struct proc *curp, *chgp;
196 int n;
197 {
198 struct pcred *pcred = curp->p_cred;
199 int s;
200
201 if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
202 pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
203 pcred->p_ruid != chgp->p_ucred->cr_uid)
204 return (EPERM);
205 if (n > PRIO_MAX)
206 n = PRIO_MAX;
207 if (n < PRIO_MIN)
208 n = PRIO_MIN;
209 n += NZERO;
210 if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag))
211 return (EACCES);
212 chgp->p_nice = n;
213 SCHED_LOCK(s);
214 (void)resetprocpriority(chgp);
215 SCHED_UNLOCK(s);
216 return (0);
217 }
218
219 /* ARGSUSED */
220 int
221 sys_setrlimit(l, v, retval)
222 struct lwp *l;
223 void *v;
224 register_t *retval;
225 {
226 struct sys_setrlimit_args /* {
227 syscallarg(int) which;
228 syscallarg(const struct rlimit *) rlp;
229 } */ *uap = v;
230 struct proc *p = l->l_proc;
231 int which = SCARG(uap, which);
232 struct rlimit alim;
233 int error;
234
235 error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit));
236 if (error)
237 return (error);
238 return (dosetrlimit(p, p->p_cred, which, &alim));
239 }
240
241 int
242 dosetrlimit(p, cred, which, limp)
243 struct proc *p;
244 struct pcred *cred;
245 int which;
246 struct rlimit *limp;
247 {
248 struct rlimit *alimp;
249 struct plimit *newplim;
250 int error;
251
252 if ((u_int)which >= RLIM_NLIMITS)
253 return (EINVAL);
254
255 if (limp->rlim_cur < 0 || limp->rlim_max < 0)
256 return (EINVAL);
257
258 alimp = &p->p_rlimit[which];
259 /* if we don't change the value, no need to limcopy() */
260 if (limp->rlim_cur == alimp->rlim_cur &&
261 limp->rlim_max == alimp->rlim_max)
262 return 0;
263
264 if (limp->rlim_cur > limp->rlim_max) {
265 /*
266 * This is programming error. According to SUSv2, we should
267 * return error in this case.
268 */
269 return (EINVAL);
270 }
271 if (limp->rlim_max > alimp->rlim_max
272 && (error = suser(cred->pc_ucred, &p->p_acflag)) != 0)
273 return (error);
274
275 if (p->p_limit->p_refcnt > 1 &&
276 (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
277 newplim = limcopy(p->p_limit);
278 limfree(p->p_limit);
279 p->p_limit = newplim;
280 alimp = &p->p_rlimit[which];
281 }
282
283 switch (which) {
284
285 case RLIMIT_DATA:
286 if (limp->rlim_cur > maxdmap)
287 limp->rlim_cur = maxdmap;
288 if (limp->rlim_max > maxdmap)
289 limp->rlim_max = maxdmap;
290 break;
291
292 case RLIMIT_STACK:
293 if (limp->rlim_cur > maxsmap)
294 limp->rlim_cur = maxsmap;
295 if (limp->rlim_max > maxsmap)
296 limp->rlim_max = maxsmap;
297
298 /*
299 * Return EINVAL if the new stack size limit is lower than
300 * current usage. Otherwise, the process would get SIGSEGV the
301 * moment it would try to access anything on it's current stack.
302 * This conforms to SUSv2.
303 */
304 if (limp->rlim_cur < p->p_vmspace->vm_ssize * PAGE_SIZE
305 || limp->rlim_max < p->p_vmspace->vm_ssize * PAGE_SIZE)
306 return (EINVAL);
307
308 /*
309 * Stack is allocated to the max at exec time with
310 * only "rlim_cur" bytes accessible (In other words,
311 * allocates stack dividing two contiguous regions at
312 * "rlim_cur" bytes boundary).
313 *
314 * Since allocation is done in terms of page, roundup
315 * "rlim_cur" (otherwise, contiguous regions
316 * overlap). If stack limit is going up make more
317 * accessible, if going down make inaccessible.
318 */
319 limp->rlim_cur = round_page(limp->rlim_cur);
320 if (limp->rlim_cur != alimp->rlim_cur) {
321 vaddr_t addr;
322 vsize_t size;
323 vm_prot_t prot;
324
325 if (limp->rlim_cur > alimp->rlim_cur) {
326 prot = VM_PROT_ALL;
327 size = limp->rlim_cur - alimp->rlim_cur;
328 addr = USRSTACK - limp->rlim_cur;
329 } else {
330 prot = VM_PROT_NONE;
331 size = alimp->rlim_cur - limp->rlim_cur;
332 addr = USRSTACK - alimp->rlim_cur;
333 }
334 (void) uvm_map_protect(&p->p_vmspace->vm_map,
335 addr, addr+size, prot, FALSE);
336 }
337 break;
338
339 case RLIMIT_NOFILE:
340 if (limp->rlim_cur > maxfiles)
341 limp->rlim_cur = maxfiles;
342 if (limp->rlim_max > maxfiles)
343 limp->rlim_max = maxfiles;
344 break;
345
346 case RLIMIT_NPROC:
347 if (limp->rlim_cur > maxproc)
348 limp->rlim_cur = maxproc;
349 if (limp->rlim_max > maxproc)
350 limp->rlim_max = maxproc;
351 break;
352 }
353 *alimp = *limp;
354 return (0);
355 }
356
357 /* ARGSUSED */
358 int
359 sys_getrlimit(l, v, retval)
360 struct lwp *l;
361 void *v;
362 register_t *retval;
363 {
364 struct sys_getrlimit_args /* {
365 syscallarg(int) which;
366 syscallarg(struct rlimit *) rlp;
367 } */ *uap = v;
368 struct proc *p = l->l_proc;
369 int which = SCARG(uap, which);
370
371 if ((u_int)which >= RLIM_NLIMITS)
372 return (EINVAL);
373 return (copyout(&p->p_rlimit[which], SCARG(uap, rlp),
374 sizeof(struct rlimit)));
375 }
376
377 /*
378 * Transform the running time and tick information in proc p into user,
379 * system, and interrupt time usage.
380 */
381 void
382 calcru(p, up, sp, ip)
383 struct proc *p;
384 struct timeval *up;
385 struct timeval *sp;
386 struct timeval *ip;
387 {
388 u_quad_t u, st, ut, it, tot;
389 unsigned long sec;
390 long usec;
391 int s;
392 struct timeval tv;
393 struct lwp *l;
394
395 s = splstatclock();
396 st = p->p_sticks;
397 ut = p->p_uticks;
398 it = p->p_iticks;
399 splx(s);
400
401 sec = p->p_rtime.tv_sec;
402 usec = p->p_rtime.tv_usec;
403 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
404 if (l->l_stat == LSONPROC) {
405 struct schedstate_percpu *spc;
406
407 KDASSERT(l->l_cpu != NULL);
408 spc = &l->l_cpu->ci_schedstate;
409
410 /*
411 * Adjust for the current time slice. This is
412 * actually fairly important since the error
413 * here is on the order of a time quantum,
414 * which is much greater than the sampling
415 * error.
416 */
417 microtime(&tv);
418 sec += tv.tv_sec - spc->spc_runtime.tv_sec;
419 usec += tv.tv_usec - spc->spc_runtime.tv_usec;
420 }
421 }
422
423 tot = st + ut + it;
424 u = sec * 1000000ull + usec;
425
426 if (tot == 0) {
427 /* No ticks, so can't use to share time out, split 50-50 */
428 st = ut = u / 2;
429 } else {
430 st = (u * st) / tot;
431 ut = (u * ut) / tot;
432 }
433 sp->tv_sec = st / 1000000;
434 sp->tv_usec = st % 1000000;
435 up->tv_sec = ut / 1000000;
436 up->tv_usec = ut % 1000000;
437 if (ip != NULL) {
438 if (it != 0)
439 it = (u * it) / tot;
440 ip->tv_sec = it / 1000000;
441 ip->tv_usec = it % 1000000;
442 }
443 }
444
445 /* ARGSUSED */
446 int
447 sys_getrusage(l, v, retval)
448 struct lwp *l;
449 void *v;
450 register_t *retval;
451 {
452 struct sys_getrusage_args /* {
453 syscallarg(int) who;
454 syscallarg(struct rusage *) rusage;
455 } */ *uap = v;
456 struct rusage *rup;
457 struct proc *p = l->l_proc;
458
459 switch (SCARG(uap, who)) {
460
461 case RUSAGE_SELF:
462 rup = &p->p_stats->p_ru;
463 calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
464 break;
465
466 case RUSAGE_CHILDREN:
467 rup = &p->p_stats->p_cru;
468 break;
469
470 default:
471 return (EINVAL);
472 }
473 return (copyout(rup, SCARG(uap, rusage), sizeof(struct rusage)));
474 }
475
476 void
477 ruadd(ru, ru2)
478 struct rusage *ru, *ru2;
479 {
480 long *ip, *ip2;
481 int i;
482
483 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
484 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
485 if (ru->ru_maxrss < ru2->ru_maxrss)
486 ru->ru_maxrss = ru2->ru_maxrss;
487 ip = &ru->ru_first; ip2 = &ru2->ru_first;
488 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
489 *ip++ += *ip2++;
490 }
491
492 /*
493 * Make a copy of the plimit structure.
494 * We share these structures copy-on-write after fork,
495 * and copy when a limit is changed.
496 */
497 struct plimit *
498 limcopy(lim)
499 struct plimit *lim;
500 {
501 struct plimit *newlim;
502 size_t l;
503
504 newlim = pool_get(&plimit_pool, PR_WAITOK);
505 memcpy(newlim->pl_rlimit, lim->pl_rlimit,
506 sizeof(struct rlimit) * RLIM_NLIMITS);
507 if (lim->pl_corename == defcorename) {
508 newlim->pl_corename = defcorename;
509 } else {
510 l = strlen(lim->pl_corename) + 1;
511 newlim->pl_corename = malloc(l, M_TEMP, M_WAITOK);
512 strlcpy(newlim->pl_corename, lim->pl_corename, l);
513 }
514 newlim->p_lflags = 0;
515 newlim->p_refcnt = 1;
516 return (newlim);
517 }
518
519 void
520 limfree(lim)
521 struct plimit *lim;
522 {
523
524 if (--lim->p_refcnt > 0)
525 return;
526 #ifdef DIAGNOSTIC
527 if (lim->p_refcnt < 0)
528 panic("limfree");
529 #endif
530 if (lim->pl_corename != defcorename)
531 free(lim->pl_corename, M_TEMP);
532 pool_put(&plimit_pool, lim);
533 }
534
535 struct pstats *
536 pstatscopy(ps)
537 struct pstats *ps;
538 {
539
540 struct pstats *newps;
541
542 newps = pool_get(&pstats_pool, PR_WAITOK);
543
544 memset(&newps->pstat_startzero, 0,
545 (unsigned) ((caddr_t)&newps->pstat_endzero -
546 (caddr_t)&newps->pstat_startzero));
547 memcpy(&newps->pstat_startcopy, &ps->pstat_startcopy,
548 ((caddr_t)&newps->pstat_endcopy -
549 (caddr_t)&newps->pstat_startcopy));
550
551 return (newps);
552
553 }
554
555 void
556 pstatsfree(ps)
557 struct pstats *ps;
558 {
559
560 pool_put(&pstats_pool, ps);
561 }
562