sysv_sem.c revision 1.93 1 /* $NetBSD: sysv_sem.c,v 1.93 2015/05/13 01:00:16 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Implementation of SVID semaphores
35 *
36 * Author: Daniel Boulet
37 *
38 * This software is provided ``AS IS'' without any warranties of any kind.
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.93 2015/05/13 01:00:16 pgoyette Exp $");
43
44 #ifdef _KERNEL_OPT
45 #include "opt_sysv.h"
46 #endif
47
48 #include <sys/param.h>
49 #include <sys/kernel.h>
50 #include <sys/sem.h>
51 #include <sys/sysctl.h>
52 #include <sys/kmem.h>
53 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
54 #include <sys/syscallargs.h>
55 #include <sys/kauth.h>
56
57 /*
58 * Memory areas:
59 * 1st: Pool of semaphore identifiers
60 * 2nd: Semaphores
61 * 3rd: Conditional variables
62 * 4th: Undo structures
63 */
64 struct semid_ds * sema __read_mostly;
65 static struct __sem * sem __read_mostly;
66 static kcondvar_t * semcv __read_mostly;
67 static int * semu __read_mostly;
68
69 static kmutex_t semlock __cacheline_aligned;
70 static bool sem_realloc_state __read_mostly;
71 static kcondvar_t sem_realloc_cv;
72
73 /*
74 * List of active undo structures, total number of semaphores,
75 * and total number of semop waiters.
76 */
77 static struct sem_undo *semu_list __read_mostly;
78 static u_int semtot __cacheline_aligned;
79 static u_int sem_waiters __cacheline_aligned;
80
81 /* Macro to find a particular sem_undo vector */
82 #define SEMU(s, ix) ((struct sem_undo *)(((long)s) + ix * seminfo.semusz))
83
84 #ifdef SEM_DEBUG
85 #define SEM_PRINTF(a) printf a
86 #else
87 #define SEM_PRINTF(a)
88 #endif
89
90 void *hook; /* cookie from exithook_establish() */
91
92 extern int kern_has_sysvsem;
93
94 struct sem_undo *semu_alloc(struct proc *);
95 int semundo_adjust(struct proc *, struct sem_undo **, int, int, int);
96 void semundo_clear(int, int);
97
98 void
99 seminit(void)
100 {
101 int i, sz;
102 vaddr_t v;
103
104 mutex_init(&semlock, MUTEX_DEFAULT, IPL_NONE);
105 cv_init(&sem_realloc_cv, "semrealc");
106 sem_realloc_state = false;
107 semtot = 0;
108 sem_waiters = 0;
109
110 /* Allocate the wired memory for our structures */
111 sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
112 ALIGN(seminfo.semmns * sizeof(struct __sem)) +
113 ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
114 ALIGN(seminfo.semmnu * seminfo.semusz);
115 sz = round_page(sz);
116 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
117 if (v == 0)
118 panic("sysv_sem: cannot allocate memory");
119 sema = (void *)v;
120 sem = (void *)((uintptr_t)sema +
121 ALIGN(seminfo.semmni * sizeof(struct semid_ds)));
122 semcv = (void *)((uintptr_t)sem +
123 ALIGN(seminfo.semmns * sizeof(struct __sem)));
124 semu = (void *)((uintptr_t)semcv +
125 ALIGN(seminfo.semmni * sizeof(kcondvar_t)));
126
127 for (i = 0; i < seminfo.semmni; i++) {
128 sema[i]._sem_base = 0;
129 sema[i].sem_perm.mode = 0;
130 cv_init(&semcv[i], "semwait");
131 }
132 for (i = 0; i < seminfo.semmnu; i++) {
133 struct sem_undo *suptr = SEMU(semu, i);
134 suptr->un_proc = NULL;
135 }
136 semu_list = NULL;
137 exithook_establish(semexit, NULL);
138
139 kern_has_sysvsem = 1;
140
141 sysvipcinit();
142 }
143
144 static int
145 semrealloc(int newsemmni, int newsemmns, int newsemmnu)
146 {
147 struct semid_ds *new_sema, *old_sema;
148 struct __sem *new_sem;
149 struct sem_undo *new_semu_list, *suptr, *nsuptr;
150 int *new_semu;
151 kcondvar_t *new_semcv;
152 vaddr_t v;
153 int i, j, lsemid, nmnus, sz;
154
155 if (newsemmni < 1 || newsemmns < 1 || newsemmnu < 1)
156 return EINVAL;
157
158 /* Allocate the wired memory for our structures */
159 sz = ALIGN(newsemmni * sizeof(struct semid_ds)) +
160 ALIGN(newsemmns * sizeof(struct __sem)) +
161 ALIGN(newsemmni * sizeof(kcondvar_t)) +
162 ALIGN(newsemmnu * seminfo.semusz);
163 sz = round_page(sz);
164 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
165 if (v == 0)
166 return ENOMEM;
167
168 mutex_enter(&semlock);
169 if (sem_realloc_state) {
170 mutex_exit(&semlock);
171 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
172 return EBUSY;
173 }
174 sem_realloc_state = true;
175 if (sem_waiters) {
176 /*
177 * Mark reallocation state, wake-up all waiters,
178 * and wait while they will all exit.
179 */
180 for (i = 0; i < seminfo.semmni; i++)
181 cv_broadcast(&semcv[i]);
182 while (sem_waiters)
183 cv_wait(&sem_realloc_cv, &semlock);
184 }
185 old_sema = sema;
186
187 /* Get the number of last slot */
188 lsemid = 0;
189 for (i = 0; i < seminfo.semmni; i++)
190 if (sema[i].sem_perm.mode & SEM_ALLOC)
191 lsemid = i;
192
193 /* Get the number of currently used undo structures */
194 nmnus = 0;
195 for (i = 0; i < seminfo.semmnu; i++) {
196 suptr = SEMU(semu, i);
197 if (suptr->un_proc == NULL)
198 continue;
199 nmnus++;
200 }
201
202 /* We cannot reallocate less memory than we use */
203 if (lsemid >= newsemmni || semtot > newsemmns || nmnus > newsemmnu) {
204 mutex_exit(&semlock);
205 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
206 return EBUSY;
207 }
208
209 new_sema = (void *)v;
210 new_sem = (void *)((uintptr_t)new_sema +
211 ALIGN(newsemmni * sizeof(struct semid_ds)));
212 new_semcv = (void *)((uintptr_t)new_sem +
213 ALIGN(newsemmns * sizeof(struct __sem)));
214 new_semu = (void *)((uintptr_t)new_semcv +
215 ALIGN(newsemmni * sizeof(kcondvar_t)));
216
217 /* Initialize all semaphore identifiers and condvars */
218 for (i = 0; i < newsemmni; i++) {
219 new_sema[i]._sem_base = 0;
220 new_sema[i].sem_perm.mode = 0;
221 cv_init(&new_semcv[i], "semwait");
222 }
223 for (i = 0; i < newsemmnu; i++) {
224 nsuptr = SEMU(new_semu, i);
225 nsuptr->un_proc = NULL;
226 }
227
228 /*
229 * Copy all identifiers, semaphores and list of the
230 * undo structures to the new memory allocation.
231 */
232 j = 0;
233 for (i = 0; i <= lsemid; i++) {
234 if ((sema[i].sem_perm.mode & SEM_ALLOC) == 0)
235 continue;
236 memcpy(&new_sema[i], &sema[i], sizeof(struct semid_ds));
237 new_sema[i]._sem_base = &new_sem[j];
238 memcpy(new_sema[i]._sem_base, sema[i]._sem_base,
239 (sizeof(struct __sem) * sema[i].sem_nsems));
240 j += sema[i].sem_nsems;
241 }
242 KASSERT(j == semtot);
243
244 j = 0;
245 new_semu_list = NULL;
246 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) {
247 KASSERT(j < newsemmnu);
248 nsuptr = SEMU(new_semu, j);
249 memcpy(nsuptr, suptr, SEMUSZ);
250 nsuptr->un_next = new_semu_list;
251 new_semu_list = nsuptr;
252 j++;
253 }
254
255 for (i = 0; i < seminfo.semmni; i++) {
256 KASSERT(cv_has_waiters(&semcv[i]) == false);
257 cv_destroy(&semcv[i]);
258 }
259
260 sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
261 ALIGN(seminfo.semmns * sizeof(struct __sem)) +
262 ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
263 ALIGN(seminfo.semmnu * seminfo.semusz);
264 sz = round_page(sz);
265
266 /* Set the pointers and update the new values */
267 sema = new_sema;
268 sem = new_sem;
269 semcv = new_semcv;
270 semu = new_semu;
271 semu_list = new_semu_list;
272
273 seminfo.semmni = newsemmni;
274 seminfo.semmns = newsemmns;
275 seminfo.semmnu = newsemmnu;
276
277 /* Reallocation completed - notify all waiters, if any */
278 sem_realloc_state = false;
279 cv_broadcast(&sem_realloc_cv);
280 mutex_exit(&semlock);
281
282 uvm_km_free(kernel_map, (vaddr_t)old_sema, sz, UVM_KMF_WIRED);
283 return 0;
284 }
285
286 /*
287 * Placebo.
288 */
289
290 int
291 sys_semconfig(struct lwp *l, const struct sys_semconfig_args *uap, register_t *retval)
292 {
293
294 *retval = 0;
295 return 0;
296 }
297
298 /*
299 * Allocate a new sem_undo structure for a process.
300 * => Returns NULL on failure.
301 */
302 struct sem_undo *
303 semu_alloc(struct proc *p)
304 {
305 struct sem_undo *suptr, **supptr;
306 bool attempted = false;
307 int i;
308
309 KASSERT(mutex_owned(&semlock));
310 again:
311 /* Look for a free structure. */
312 for (i = 0; i < seminfo.semmnu; i++) {
313 suptr = SEMU(semu, i);
314 if (suptr->un_proc == NULL) {
315 /* Found. Fill it in and return. */
316 suptr->un_next = semu_list;
317 semu_list = suptr;
318 suptr->un_cnt = 0;
319 suptr->un_proc = p;
320 return suptr;
321 }
322 }
323
324 /* Not found. Attempt to free some structures. */
325 if (!attempted) {
326 bool freed = false;
327
328 attempted = true;
329 supptr = &semu_list;
330 while ((suptr = *supptr) != NULL) {
331 if (suptr->un_cnt == 0) {
332 suptr->un_proc = NULL;
333 *supptr = suptr->un_next;
334 freed = true;
335 } else {
336 supptr = &suptr->un_next;
337 }
338 }
339 if (freed) {
340 goto again;
341 }
342 }
343 return NULL;
344 }
345
346 /*
347 * Adjust a particular entry for a particular proc
348 */
349
350 int
351 semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, int semnum,
352 int adjval)
353 {
354 struct sem_undo *suptr;
355 struct sem_undo_entry *sunptr;
356 int i;
357
358 KASSERT(mutex_owned(&semlock));
359
360 /*
361 * Look for and remember the sem_undo if the caller doesn't
362 * provide it
363 */
364
365 suptr = *supptr;
366 if (suptr == NULL) {
367 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
368 if (suptr->un_proc == p)
369 break;
370
371 if (suptr == NULL) {
372 suptr = semu_alloc(p);
373 if (suptr == NULL)
374 return (ENOSPC);
375 }
376 *supptr = suptr;
377 }
378
379 /*
380 * Look for the requested entry and adjust it (delete if
381 * adjval becomes 0).
382 */
383 sunptr = &suptr->un_ent[0];
384 for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
385 if (sunptr->un_id != semid || sunptr->un_num != semnum)
386 continue;
387 sunptr->un_adjval += adjval;
388 if (sunptr->un_adjval == 0) {
389 suptr->un_cnt--;
390 if (i < suptr->un_cnt)
391 suptr->un_ent[i] =
392 suptr->un_ent[suptr->un_cnt];
393 }
394 return (0);
395 }
396
397 /* Didn't find the right entry - create it */
398 if (suptr->un_cnt == SEMUME)
399 return (EINVAL);
400
401 sunptr = &suptr->un_ent[suptr->un_cnt];
402 suptr->un_cnt++;
403 sunptr->un_adjval = adjval;
404 sunptr->un_id = semid;
405 sunptr->un_num = semnum;
406 return (0);
407 }
408
409 void
410 semundo_clear(int semid, int semnum)
411 {
412 struct sem_undo *suptr;
413 struct sem_undo_entry *sunptr, *sunend;
414
415 KASSERT(mutex_owned(&semlock));
416
417 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
418 for (sunptr = &suptr->un_ent[0],
419 sunend = sunptr + suptr->un_cnt; sunptr < sunend;) {
420 if (sunptr->un_id == semid) {
421 if (semnum == -1 || sunptr->un_num == semnum) {
422 suptr->un_cnt--;
423 sunend--;
424 if (sunptr != sunend)
425 *sunptr = *sunend;
426 if (semnum != -1)
427 break;
428 else
429 continue;
430 }
431 }
432 sunptr++;
433 }
434 }
435
436 int
437 sys_____semctl50(struct lwp *l, const struct sys_____semctl50_args *uap,
438 register_t *retval)
439 {
440 /* {
441 syscallarg(int) semid;
442 syscallarg(int) semnum;
443 syscallarg(int) cmd;
444 syscallarg(union __semun *) arg;
445 } */
446 struct semid_ds sembuf;
447 int cmd, error;
448 void *pass_arg;
449 union __semun karg;
450
451 cmd = SCARG(uap, cmd);
452
453 pass_arg = get_semctl_arg(cmd, &sembuf, &karg);
454
455 if (pass_arg) {
456 error = copyin(SCARG(uap, arg), &karg, sizeof(karg));
457 if (error)
458 return error;
459 if (cmd == IPC_SET) {
460 error = copyin(karg.buf, &sembuf, sizeof(sembuf));
461 if (error)
462 return (error);
463 }
464 }
465
466 error = semctl1(l, SCARG(uap, semid), SCARG(uap, semnum), cmd,
467 pass_arg, retval);
468
469 if (error == 0 && cmd == IPC_STAT)
470 error = copyout(&sembuf, karg.buf, sizeof(sembuf));
471
472 return (error);
473 }
474
475 int
476 semctl1(struct lwp *l, int semid, int semnum, int cmd, void *v,
477 register_t *retval)
478 {
479 kauth_cred_t cred = l->l_cred;
480 union __semun *arg = v;
481 struct semid_ds *sembuf = v, *semaptr;
482 int i, error, ix;
483
484 SEM_PRINTF(("call to semctl(%d, %d, %d, %p)\n",
485 semid, semnum, cmd, v));
486
487 mutex_enter(&semlock);
488
489 ix = IPCID_TO_IX(semid);
490 if (ix < 0 || ix >= seminfo.semmni) {
491 mutex_exit(&semlock);
492 return (EINVAL);
493 }
494
495 semaptr = &sema[ix];
496 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
497 semaptr->sem_perm._seq != IPCID_TO_SEQ(semid)) {
498 mutex_exit(&semlock);
499 return (EINVAL);
500 }
501
502 switch (cmd) {
503 case IPC_RMID:
504 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)) != 0)
505 break;
506 semaptr->sem_perm.cuid = kauth_cred_geteuid(cred);
507 semaptr->sem_perm.uid = kauth_cred_geteuid(cred);
508 semtot -= semaptr->sem_nsems;
509 for (i = semaptr->_sem_base - sem; i < semtot; i++)
510 sem[i] = sem[i + semaptr->sem_nsems];
511 for (i = 0; i < seminfo.semmni; i++) {
512 if ((sema[i].sem_perm.mode & SEM_ALLOC) &&
513 sema[i]._sem_base > semaptr->_sem_base)
514 sema[i]._sem_base -= semaptr->sem_nsems;
515 }
516 semaptr->sem_perm.mode = 0;
517 semundo_clear(ix, -1);
518 cv_broadcast(&semcv[ix]);
519 break;
520
521 case IPC_SET:
522 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
523 break;
524 KASSERT(sembuf != NULL);
525 semaptr->sem_perm.uid = sembuf->sem_perm.uid;
526 semaptr->sem_perm.gid = sembuf->sem_perm.gid;
527 semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
528 (sembuf->sem_perm.mode & 0777);
529 semaptr->sem_ctime = time_second;
530 break;
531
532 case IPC_STAT:
533 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
534 break;
535 KASSERT(sembuf != NULL);
536 memcpy(sembuf, semaptr, sizeof(struct semid_ds));
537 sembuf->sem_perm.mode &= 0777;
538 break;
539
540 case GETNCNT:
541 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
542 break;
543 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
544 error = EINVAL;
545 break;
546 }
547 *retval = semaptr->_sem_base[semnum].semncnt;
548 break;
549
550 case GETPID:
551 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
552 break;
553 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
554 error = EINVAL;
555 break;
556 }
557 *retval = semaptr->_sem_base[semnum].sempid;
558 break;
559
560 case GETVAL:
561 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
562 break;
563 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
564 error = EINVAL;
565 break;
566 }
567 *retval = semaptr->_sem_base[semnum].semval;
568 break;
569
570 case GETALL:
571 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
572 break;
573 KASSERT(arg != NULL);
574 for (i = 0; i < semaptr->sem_nsems; i++) {
575 error = copyout(&semaptr->_sem_base[i].semval,
576 &arg->array[i], sizeof(arg->array[i]));
577 if (error != 0)
578 break;
579 }
580 break;
581
582 case GETZCNT:
583 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
584 break;
585 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
586 error = EINVAL;
587 break;
588 }
589 *retval = semaptr->_sem_base[semnum].semzcnt;
590 break;
591
592 case SETVAL:
593 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
594 break;
595 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
596 error = EINVAL;
597 break;
598 }
599 KASSERT(arg != NULL);
600 if ((unsigned int)arg->val > seminfo.semvmx) {
601 error = ERANGE;
602 break;
603 }
604 semaptr->_sem_base[semnum].semval = arg->val;
605 semundo_clear(ix, semnum);
606 cv_broadcast(&semcv[ix]);
607 break;
608
609 case SETALL:
610 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
611 break;
612 KASSERT(arg != NULL);
613 for (i = 0; i < semaptr->sem_nsems; i++) {
614 unsigned short semval;
615 error = copyin(&arg->array[i], &semval,
616 sizeof(arg->array[i]));
617 if (error != 0)
618 break;
619 if ((unsigned int)semval > seminfo.semvmx) {
620 error = ERANGE;
621 break;
622 }
623 semaptr->_sem_base[i].semval = semval;
624 }
625 semundo_clear(ix, -1);
626 cv_broadcast(&semcv[ix]);
627 break;
628
629 default:
630 error = EINVAL;
631 break;
632 }
633
634 mutex_exit(&semlock);
635 return (error);
636 }
637
638 int
639 sys_semget(struct lwp *l, const struct sys_semget_args *uap, register_t *retval)
640 {
641 /* {
642 syscallarg(key_t) key;
643 syscallarg(int) nsems;
644 syscallarg(int) semflg;
645 } */
646 int semid, error = 0;
647 int key = SCARG(uap, key);
648 int nsems = SCARG(uap, nsems);
649 int semflg = SCARG(uap, semflg);
650 kauth_cred_t cred = l->l_cred;
651
652 SEM_PRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
653
654 mutex_enter(&semlock);
655
656 if (key != IPC_PRIVATE) {
657 for (semid = 0; semid < seminfo.semmni; semid++) {
658 if ((sema[semid].sem_perm.mode & SEM_ALLOC) &&
659 sema[semid].sem_perm._key == key)
660 break;
661 }
662 if (semid < seminfo.semmni) {
663 SEM_PRINTF(("found public key\n"));
664 if ((error = ipcperm(cred, &sema[semid].sem_perm,
665 semflg & 0700)))
666 goto out;
667 if (nsems > 0 && sema[semid].sem_nsems < nsems) {
668 SEM_PRINTF(("too small\n"));
669 error = EINVAL;
670 goto out;
671 }
672 if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
673 SEM_PRINTF(("not exclusive\n"));
674 error = EEXIST;
675 goto out;
676 }
677 goto found;
678 }
679 }
680
681 SEM_PRINTF(("need to allocate the semid_ds\n"));
682 if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
683 if (nsems <= 0 || nsems > seminfo.semmsl) {
684 SEM_PRINTF(("nsems out of range (0<%d<=%d)\n", nsems,
685 seminfo.semmsl));
686 error = EINVAL;
687 goto out;
688 }
689 if (nsems > seminfo.semmns - semtot) {
690 SEM_PRINTF(("not enough semaphores left "
691 "(need %d, got %d)\n",
692 nsems, seminfo.semmns - semtot));
693 error = ENOSPC;
694 goto out;
695 }
696 for (semid = 0; semid < seminfo.semmni; semid++) {
697 if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0)
698 break;
699 }
700 if (semid == seminfo.semmni) {
701 SEM_PRINTF(("no more semid_ds's available\n"));
702 error = ENOSPC;
703 goto out;
704 }
705 SEM_PRINTF(("semid %d is available\n", semid));
706 sema[semid].sem_perm._key = key;
707 sema[semid].sem_perm.cuid = kauth_cred_geteuid(cred);
708 sema[semid].sem_perm.uid = kauth_cred_geteuid(cred);
709 sema[semid].sem_perm.cgid = kauth_cred_getegid(cred);
710 sema[semid].sem_perm.gid = kauth_cred_getegid(cred);
711 sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
712 sema[semid].sem_perm._seq =
713 (sema[semid].sem_perm._seq + 1) & 0x7fff;
714 sema[semid].sem_nsems = nsems;
715 sema[semid].sem_otime = 0;
716 sema[semid].sem_ctime = time_second;
717 sema[semid]._sem_base = &sem[semtot];
718 semtot += nsems;
719 memset(sema[semid]._sem_base, 0,
720 sizeof(sema[semid]._sem_base[0]) * nsems);
721 SEM_PRINTF(("sembase = %p, next = %p\n", sema[semid]._sem_base,
722 &sem[semtot]));
723 } else {
724 SEM_PRINTF(("didn't find it and wasn't asked to create it\n"));
725 error = ENOENT;
726 goto out;
727 }
728
729 found:
730 *retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm);
731 out:
732 mutex_exit(&semlock);
733 return (error);
734 }
735
736 #define SMALL_SOPS 8
737
738 int
739 sys_semop(struct lwp *l, const struct sys_semop_args *uap, register_t *retval)
740 {
741 /* {
742 syscallarg(int) semid;
743 syscallarg(struct sembuf *) sops;
744 syscallarg(size_t) nsops;
745 } */
746 struct proc *p = l->l_proc;
747 int semid = SCARG(uap, semid), seq;
748 size_t nsops = SCARG(uap, nsops);
749 struct sembuf small_sops[SMALL_SOPS];
750 struct sembuf *sops;
751 struct semid_ds *semaptr;
752 struct sembuf *sopptr = NULL;
753 struct __sem *semptr = NULL;
754 struct sem_undo *suptr = NULL;
755 kauth_cred_t cred = l->l_cred;
756 int i, error;
757 int do_wakeup, do_undos;
758
759 SEM_PRINTF(("call to semop(%d, %p, %zd)\n", semid, SCARG(uap,sops), nsops));
760
761 if (__predict_false((p->p_flag & PK_SYSVSEM) == 0)) {
762 mutex_enter(p->p_lock);
763 p->p_flag |= PK_SYSVSEM;
764 mutex_exit(p->p_lock);
765 }
766
767 restart:
768 if (nsops <= SMALL_SOPS) {
769 sops = small_sops;
770 } else if (nsops <= seminfo.semopm) {
771 sops = kmem_alloc(nsops * sizeof(*sops), KM_SLEEP);
772 } else {
773 SEM_PRINTF(("too many sops (max=%d, nsops=%zd)\n",
774 seminfo.semopm, nsops));
775 return (E2BIG);
776 }
777
778 error = copyin(SCARG(uap, sops), sops, nsops * sizeof(sops[0]));
779 if (error) {
780 SEM_PRINTF(("error = %d from copyin(%p, %p, %zd)\n", error,
781 SCARG(uap, sops), &sops, nsops * sizeof(sops[0])));
782 if (sops != small_sops)
783 kmem_free(sops, nsops * sizeof(*sops));
784 return error;
785 }
786
787 mutex_enter(&semlock);
788 /* In case of reallocation, we will wait for completion */
789 while (__predict_false(sem_realloc_state))
790 cv_wait(&sem_realloc_cv, &semlock);
791
792 semid = IPCID_TO_IX(semid); /* Convert back to zero origin */
793 if (semid < 0 || semid >= seminfo.semmni) {
794 error = EINVAL;
795 goto out;
796 }
797
798 semaptr = &sema[semid];
799 seq = IPCID_TO_SEQ(SCARG(uap, semid));
800 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
801 semaptr->sem_perm._seq != seq) {
802 error = EINVAL;
803 goto out;
804 }
805
806 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) {
807 SEM_PRINTF(("error = %d from ipaccess\n", error));
808 goto out;
809 }
810
811 for (i = 0; i < nsops; i++)
812 if (sops[i].sem_num >= semaptr->sem_nsems) {
813 error = EFBIG;
814 goto out;
815 }
816
817 /*
818 * Loop trying to satisfy the vector of requests.
819 * If we reach a point where we must wait, any requests already
820 * performed are rolled back and we go to sleep until some other
821 * process wakes us up. At this point, we start all over again.
822 *
823 * This ensures that from the perspective of other tasks, a set
824 * of requests is atomic (never partially satisfied).
825 */
826 do_undos = 0;
827
828 for (;;) {
829 do_wakeup = 0;
830
831 for (i = 0; i < nsops; i++) {
832 sopptr = &sops[i];
833 semptr = &semaptr->_sem_base[sopptr->sem_num];
834
835 SEM_PRINTF(("semop: semaptr=%p, sem_base=%p, "
836 "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n",
837 semaptr, semaptr->_sem_base, semptr,
838 sopptr->sem_num, semptr->semval, sopptr->sem_op,
839 (sopptr->sem_flg & IPC_NOWAIT) ?
840 "nowait" : "wait"));
841
842 if (sopptr->sem_op < 0) {
843 if ((int)(semptr->semval +
844 sopptr->sem_op) < 0) {
845 SEM_PRINTF(("semop: "
846 "can't do it now\n"));
847 break;
848 } else {
849 semptr->semval += sopptr->sem_op;
850 if (semptr->semval == 0 &&
851 semptr->semzcnt > 0)
852 do_wakeup = 1;
853 }
854 if (sopptr->sem_flg & SEM_UNDO)
855 do_undos = 1;
856 } else if (sopptr->sem_op == 0) {
857 if (semptr->semval > 0) {
858 SEM_PRINTF(("semop: not zero now\n"));
859 break;
860 }
861 } else {
862 if (semptr->semncnt > 0)
863 do_wakeup = 1;
864 semptr->semval += sopptr->sem_op;
865 if (sopptr->sem_flg & SEM_UNDO)
866 do_undos = 1;
867 }
868 }
869
870 /*
871 * Did we get through the entire vector?
872 */
873 if (i >= nsops)
874 goto done;
875
876 /*
877 * No ... rollback anything that we've already done
878 */
879 SEM_PRINTF(("semop: rollback 0 through %d\n", i - 1));
880 while (i-- > 0)
881 semaptr->_sem_base[sops[i].sem_num].semval -=
882 sops[i].sem_op;
883
884 /*
885 * If the request that we couldn't satisfy has the
886 * NOWAIT flag set then return with EAGAIN.
887 */
888 if (sopptr->sem_flg & IPC_NOWAIT) {
889 error = EAGAIN;
890 goto out;
891 }
892
893 if (sopptr->sem_op == 0)
894 semptr->semzcnt++;
895 else
896 semptr->semncnt++;
897
898 sem_waiters++;
899 SEM_PRINTF(("semop: good night!\n"));
900 error = cv_wait_sig(&semcv[semid], &semlock);
901 SEM_PRINTF(("semop: good morning (error=%d)!\n", error));
902 sem_waiters--;
903
904 /* Notify reallocator, if it is waiting */
905 cv_broadcast(&sem_realloc_cv);
906
907 /*
908 * Make sure that the semaphore still exists
909 */
910 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
911 semaptr->sem_perm._seq != seq) {
912 error = EIDRM;
913 goto out;
914 }
915
916 /*
917 * The semaphore is still alive. Readjust the count of
918 * waiting processes.
919 */
920 semptr = &semaptr->_sem_base[sopptr->sem_num];
921 if (sopptr->sem_op == 0)
922 semptr->semzcnt--;
923 else
924 semptr->semncnt--;
925
926 /* In case of such state, restart the call */
927 if (sem_realloc_state) {
928 mutex_exit(&semlock);
929 goto restart;
930 }
931
932 /* Is it really morning, or was our sleep interrupted? */
933 if (error != 0) {
934 error = EINTR;
935 goto out;
936 }
937 SEM_PRINTF(("semop: good morning!\n"));
938 }
939
940 done:
941 /*
942 * Process any SEM_UNDO requests.
943 */
944 if (do_undos) {
945 for (i = 0; i < nsops; i++) {
946 /*
947 * We only need to deal with SEM_UNDO's for non-zero
948 * op's.
949 */
950 int adjval;
951
952 if ((sops[i].sem_flg & SEM_UNDO) == 0)
953 continue;
954 adjval = sops[i].sem_op;
955 if (adjval == 0)
956 continue;
957 error = semundo_adjust(p, &suptr, semid,
958 sops[i].sem_num, -adjval);
959 if (error == 0)
960 continue;
961
962 /*
963 * Oh-Oh! We ran out of either sem_undo's or undo's.
964 * Rollback the adjustments to this point and then
965 * rollback the semaphore ups and down so we can return
966 * with an error with all structures restored. We
967 * rollback the undo's in the exact reverse order that
968 * we applied them. This guarantees that we won't run
969 * out of space as we roll things back out.
970 */
971 while (i-- > 0) {
972 if ((sops[i].sem_flg & SEM_UNDO) == 0)
973 continue;
974 adjval = sops[i].sem_op;
975 if (adjval == 0)
976 continue;
977 if (semundo_adjust(p, &suptr, semid,
978 sops[i].sem_num, adjval) != 0)
979 panic("semop - can't undo undos");
980 }
981
982 for (i = 0; i < nsops; i++)
983 semaptr->_sem_base[sops[i].sem_num].semval -=
984 sops[i].sem_op;
985
986 SEM_PRINTF(("error = %d from semundo_adjust\n", error));
987 goto out;
988 } /* loop through the sops */
989 } /* if (do_undos) */
990
991 /* We're definitely done - set the sempid's */
992 for (i = 0; i < nsops; i++) {
993 sopptr = &sops[i];
994 semptr = &semaptr->_sem_base[sopptr->sem_num];
995 semptr->sempid = p->p_pid;
996 }
997
998 /* Update sem_otime */
999 semaptr->sem_otime = time_second;
1000
1001 /* Do a wakeup if any semaphore was up'd. */
1002 if (do_wakeup) {
1003 SEM_PRINTF(("semop: doing wakeup\n"));
1004 cv_broadcast(&semcv[semid]);
1005 SEM_PRINTF(("semop: back from wakeup\n"));
1006 }
1007 SEM_PRINTF(("semop: done\n"));
1008 *retval = 0;
1009
1010 out:
1011 mutex_exit(&semlock);
1012 if (sops != small_sops)
1013 kmem_free(sops, nsops * sizeof(*sops));
1014 return error;
1015 }
1016
1017 /*
1018 * Go through the undo structures for this process and apply the
1019 * adjustments to semaphores.
1020 */
1021 /*ARGSUSED*/
1022 void
1023 semexit(struct proc *p, void *v)
1024 {
1025 struct sem_undo *suptr;
1026 struct sem_undo **supptr;
1027
1028 if ((p->p_flag & PK_SYSVSEM) == 0)
1029 return;
1030
1031 mutex_enter(&semlock);
1032
1033 /*
1034 * Go through the chain of undo vectors looking for one
1035 * associated with this process.
1036 */
1037
1038 for (supptr = &semu_list; (suptr = *supptr) != NULL;
1039 supptr = &suptr->un_next) {
1040 if (suptr->un_proc == p)
1041 break;
1042 }
1043
1044 /*
1045 * If there is no undo vector, skip to the end.
1046 */
1047
1048 if (suptr == NULL) {
1049 mutex_exit(&semlock);
1050 return;
1051 }
1052
1053 /*
1054 * We now have an undo vector for this process.
1055 */
1056
1057 SEM_PRINTF(("proc @%p has undo structure with %d entries\n", p,
1058 suptr->un_cnt));
1059
1060 /*
1061 * If there are any active undo elements then process them.
1062 */
1063 if (suptr->un_cnt > 0) {
1064 int ix;
1065
1066 for (ix = 0; ix < suptr->un_cnt; ix++) {
1067 int semid = suptr->un_ent[ix].un_id;
1068 int semnum = suptr->un_ent[ix].un_num;
1069 int adjval = suptr->un_ent[ix].un_adjval;
1070 struct semid_ds *semaptr;
1071
1072 semaptr = &sema[semid];
1073 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
1074 panic("semexit - semid not allocated");
1075 if (semnum >= semaptr->sem_nsems)
1076 panic("semexit - semnum out of range");
1077
1078 SEM_PRINTF(("semexit: %p id=%d num=%d(adj=%d) ; "
1079 "sem=%d\n",
1080 suptr->un_proc, suptr->un_ent[ix].un_id,
1081 suptr->un_ent[ix].un_num,
1082 suptr->un_ent[ix].un_adjval,
1083 semaptr->_sem_base[semnum].semval));
1084
1085 if (adjval < 0 &&
1086 semaptr->_sem_base[semnum].semval < -adjval)
1087 semaptr->_sem_base[semnum].semval = 0;
1088 else
1089 semaptr->_sem_base[semnum].semval += adjval;
1090
1091 cv_broadcast(&semcv[semid]);
1092 SEM_PRINTF(("semexit: back from wakeup\n"));
1093 }
1094 }
1095
1096 /*
1097 * Deallocate the undo vector.
1098 */
1099 SEM_PRINTF(("removing vector\n"));
1100 suptr->un_proc = NULL;
1101 *supptr = suptr->un_next;
1102 mutex_exit(&semlock);
1103 }
1104
1105 /*
1106 * Sysctl initialization and nodes.
1107 */
1108
1109 static int
1110 sysctl_ipc_semmni(SYSCTLFN_ARGS)
1111 {
1112 int newsize, error;
1113 struct sysctlnode node;
1114 node = *rnode;
1115 node.sysctl_data = &newsize;
1116
1117 newsize = seminfo.semmni;
1118 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1119 if (error || newp == NULL)
1120 return error;
1121
1122 return semrealloc(newsize, seminfo.semmns, seminfo.semmnu);
1123 }
1124
1125 static int
1126 sysctl_ipc_semmns(SYSCTLFN_ARGS)
1127 {
1128 int newsize, error;
1129 struct sysctlnode node;
1130 node = *rnode;
1131 node.sysctl_data = &newsize;
1132
1133 newsize = seminfo.semmns;
1134 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1135 if (error || newp == NULL)
1136 return error;
1137
1138 return semrealloc(seminfo.semmni, newsize, seminfo.semmnu);
1139 }
1140
1141 static int
1142 sysctl_ipc_semmnu(SYSCTLFN_ARGS)
1143 {
1144 int newsize, error;
1145 struct sysctlnode node;
1146 node = *rnode;
1147 node.sysctl_data = &newsize;
1148
1149 newsize = seminfo.semmnu;
1150 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1151 if (error || newp == NULL)
1152 return error;
1153
1154 return semrealloc(seminfo.semmni, seminfo.semmns, newsize);
1155 }
1156
1157 SYSCTL_SETUP(sysctl_ipc_sem_setup, "sysctl kern.ipc subtree setup")
1158 {
1159 const struct sysctlnode *node = NULL;
1160
1161 sysctl_createv(clog, 0, NULL, &node,
1162 CTLFLAG_PERMANENT,
1163 CTLTYPE_NODE, "ipc",
1164 SYSCTL_DESCR("SysV IPC options"),
1165 NULL, 0, NULL, 0,
1166 CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1167
1168 if (node == NULL)
1169 return;
1170
1171 sysctl_createv(clog, 0, &node, NULL,
1172 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1173 CTLTYPE_INT, "semmni",
1174 SYSCTL_DESCR("Max number of number of semaphore identifiers"),
1175 sysctl_ipc_semmni, 0, &seminfo.semmni, 0,
1176 CTL_CREATE, CTL_EOL);
1177 sysctl_createv(clog, 0, &node, NULL,
1178 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1179 CTLTYPE_INT, "semmns",
1180 SYSCTL_DESCR("Max number of number of semaphores in system"),
1181 sysctl_ipc_semmns, 0, &seminfo.semmns, 0,
1182 CTL_CREATE, CTL_EOL);
1183 sysctl_createv(clog, 0, &node, NULL,
1184 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1185 CTLTYPE_INT, "semmnu",
1186 SYSCTL_DESCR("Max number of undo structures in system"),
1187 sysctl_ipc_semmnu, 0, &seminfo.semmnu, 0,
1188 CTL_CREATE, CTL_EOL);
1189 }
1190