sysv_shm.c revision 1.106 1 /* $NetBSD: sysv_shm.c,v 1.106 2008/04/12 20:49:22 rmind Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by Adam Glass and Charles M.
54 * Hannum.
55 * 4. The names of the authors may not be used to endorse or promote products
56 * derived from this software without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
59 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
60 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
61 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
63 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
64 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
65 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
66 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
67 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68 */
69
70 #include <sys/cdefs.h>
71 __KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.106 2008/04/12 20:49:22 rmind Exp $");
72
73 #define SYSVSHM
74
75 #include <sys/param.h>
76 #include <sys/kernel.h>
77 #include <sys/kmem.h>
78 #include <sys/shm.h>
79 #include <sys/mutex.h>
80 #include <sys/mman.h>
81 #include <sys/stat.h>
82 #include <sys/sysctl.h>
83 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
84 #include <sys/syscallargs.h>
85 #include <sys/queue.h>
86 #include <sys/pool.h>
87 #include <sys/kauth.h>
88
89 #include <uvm/uvm_extern.h>
90 #include <uvm/uvm_object.h>
91
92 int shm_nused;
93 struct shmid_ds *shmsegs;
94
95 struct shmmap_entry {
96 SLIST_ENTRY(shmmap_entry) next;
97 vaddr_t va;
98 int shmid;
99 };
100
101 static kmutex_t shm_lock;
102 static kcondvar_t * shm_cv;
103 static struct pool shmmap_entry_pool;
104 static int shm_last_free, shm_use_phys;
105 static size_t shm_committed;
106
107 static kcondvar_t shm_realloc_cv;
108 static bool shm_realloc_state;
109 static u_int shm_realloc_disable;
110
111 struct shmmap_state {
112 unsigned int nitems;
113 unsigned int nrefs;
114 SLIST_HEAD(, shmmap_entry) entries;
115 };
116
117 #ifdef SHMDEBUG
118 #define SHMPRINTF(a) printf a
119 #else
120 #define SHMPRINTF(a)
121 #endif
122
123 static int shmrealloc(int);
124
125 /*
126 * Find the shared memory segment by the identifier.
127 * => must be called with shm_lock held;
128 */
129 static struct shmid_ds *
130 shm_find_segment_by_shmid(int shmid)
131 {
132 int segnum;
133 struct shmid_ds *shmseg;
134
135 KASSERT(mutex_owned(&shm_lock));
136
137 segnum = IPCID_TO_IX(shmid);
138 if (segnum < 0 || segnum >= shminfo.shmmni)
139 return NULL;
140 shmseg = &shmsegs[segnum];
141 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0)
142 return NULL;
143 if ((shmseg->shm_perm.mode &
144 (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED)
145 return NULL;
146 if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid))
147 return NULL;
148
149 return shmseg;
150 }
151
152 /*
153 * Free memory segment.
154 * => must be called with shm_lock held;
155 */
156 static void
157 shm_free_segment(int segnum)
158 {
159 struct shmid_ds *shmseg;
160 size_t size;
161 bool wanted;
162
163 KASSERT(mutex_owned(&shm_lock));
164
165 shmseg = &shmsegs[segnum];
166 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n",
167 shmseg->shm_perm._key, shmseg->shm_perm._seq));
168
169 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
170 wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED);
171
172 shmseg->_shm_internal = NULL;
173 shm_committed -= btoc(size);
174 shm_nused--;
175 shmseg->shm_perm.mode = SHMSEG_FREE;
176 shm_last_free = segnum;
177 if (wanted == true)
178 cv_broadcast(&shm_cv[segnum]);
179 }
180
181 /*
182 * Delete entry from the shm map.
183 * => must be called with shm_lock held;
184 */
185 static struct uvm_object *
186 shm_delete_mapping(struct shmmap_state *shmmap_s,
187 struct shmmap_entry *shmmap_se)
188 {
189 struct uvm_object *uobj = NULL;
190 struct shmid_ds *shmseg;
191 int segnum;
192
193 KASSERT(mutex_owned(&shm_lock));
194
195 segnum = IPCID_TO_IX(shmmap_se->shmid);
196 shmseg = &shmsegs[segnum];
197 SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next);
198 shmmap_s->nitems--;
199 shmseg->shm_dtime = time_second;
200 if ((--shmseg->shm_nattch <= 0) &&
201 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
202 uobj = shmseg->_shm_internal;
203 shm_free_segment(segnum);
204 }
205
206 return uobj;
207 }
208
209 /*
210 * Get a non-shared shm map for that vmspace. Note, that memory
211 * allocation might be performed with lock held.
212 */
213 static struct shmmap_state *
214 shmmap_getprivate(struct proc *p)
215 {
216 struct shmmap_state *oshmmap_s, *shmmap_s;
217 struct shmmap_entry *oshmmap_se, *shmmap_se;
218
219 KASSERT(mutex_owned(&shm_lock));
220
221 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */
222 oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
223 if (oshmmap_s && oshmmap_s->nrefs == 1)
224 return oshmmap_s;
225
226 /* 2. No shm map preset - create a fresh one */
227 shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP);
228 shmmap_s->nrefs = 1;
229 SLIST_INIT(&shmmap_s->entries);
230 p->p_vmspace->vm_shm = (void *)shmmap_s;
231
232 if (oshmmap_s == NULL)
233 return shmmap_s;
234
235 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n",
236 p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs));
237
238 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */
239 SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) {
240 shmmap_se = pool_get(&shmmap_entry_pool, PR_WAITOK);
241 shmmap_se->va = oshmmap_se->va;
242 shmmap_se->shmid = oshmmap_se->shmid;
243 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
244 }
245 shmmap_s->nitems = oshmmap_s->nitems;
246 oshmmap_s->nrefs--;
247
248 return shmmap_s;
249 }
250
251 /*
252 * Lock/unlock the memory.
253 * => must be called with shm_lock held;
254 * => called from one place, thus, inline;
255 */
256 static inline int
257 shm_memlock(struct lwp *l, struct shmid_ds *shmseg, int shmid, int cmd)
258 {
259 struct proc *p = l->l_proc;
260 struct shmmap_entry *shmmap_se;
261 struct shmmap_state *shmmap_s;
262 size_t size;
263 int error;
264
265 KASSERT(mutex_owned(&shm_lock));
266 shmmap_s = shmmap_getprivate(p);
267
268 /* Find our shared memory address by shmid */
269 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
270 if (shmmap_se->shmid != shmid)
271 continue;
272
273 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
274
275 if (cmd == SHM_LOCK &&
276 (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) {
277 /* Wire the object and map, then tag it */
278 error = uobj_wirepages(shmseg->_shm_internal, 0,
279 round_page(shmseg->shm_segsz));
280 if (error)
281 return EIO;
282 error = uvm_map_pageable(&p->p_vmspace->vm_map,
283 shmmap_se->va, shmmap_se->va + size, false, 0);
284 if (error) {
285 uobj_unwirepages(shmseg->_shm_internal, 0,
286 round_page(shmseg->shm_segsz));
287 if (error == EFAULT)
288 error = ENOMEM;
289 return error;
290 }
291 shmseg->shm_perm.mode |= SHMSEG_WIRED;
292
293 } else if (cmd == SHM_UNLOCK &&
294 (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) {
295 /* Unwire the object and map, then untag it */
296 uobj_unwirepages(shmseg->_shm_internal, 0,
297 round_page(shmseg->shm_segsz));
298 error = uvm_map_pageable(&p->p_vmspace->vm_map,
299 shmmap_se->va, shmmap_se->va + size, true, 0);
300 if (error)
301 return EIO;
302 shmseg->shm_perm.mode &= ~SHMSEG_WIRED;
303 }
304 }
305
306 return 0;
307 }
308
309 /*
310 * Unmap shared memory.
311 */
312 int
313 sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval)
314 {
315 /* {
316 syscallarg(const void *) shmaddr;
317 } */
318 struct proc *p = l->l_proc;
319 struct shmmap_state *shmmap_s1, *shmmap_s;
320 struct shmmap_entry *shmmap_se;
321 struct uvm_object *uobj;
322 struct shmid_ds *shmseg;
323 size_t size;
324
325 mutex_enter(&shm_lock);
326 /* In case of reallocation, we will wait for completion */
327 while (__predict_false(shm_realloc_state))
328 cv_wait(&shm_realloc_cv, &shm_lock);
329
330 shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm;
331 if (shmmap_s1 == NULL) {
332 mutex_exit(&shm_lock);
333 return EINVAL;
334 }
335
336 /* Find the map entry */
337 SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next)
338 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
339 break;
340 if (shmmap_se == NULL) {
341 mutex_exit(&shm_lock);
342 return EINVAL;
343 }
344
345 shmmap_s = shmmap_getprivate(p);
346 if (shmmap_s != shmmap_s1) {
347 /* Map has been copied, lookup entry in new map */
348 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
349 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
350 break;
351 if (shmmap_se == NULL) {
352 mutex_exit(&shm_lock);
353 return EINVAL;
354 }
355 }
356
357 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n",
358 p->p_vmspace, shmmap_se->shmid, shmmap_se->va));
359
360 /* Delete the entry from shm map */
361 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
362 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
363 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
364 mutex_exit(&shm_lock);
365
366 uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size);
367 if (uobj != NULL)
368 uao_detach(uobj);
369 pool_put(&shmmap_entry_pool, shmmap_se);
370
371 return 0;
372 }
373
374 /*
375 * Map shared memory.
376 */
377 int
378 sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval)
379 {
380 /* {
381 syscallarg(int) shmid;
382 syscallarg(const void *) shmaddr;
383 syscallarg(int) shmflg;
384 } */
385 int error, flags = 0;
386 struct proc *p = l->l_proc;
387 kauth_cred_t cred = l->l_cred;
388 struct shmid_ds *shmseg;
389 struct shmmap_state *shmmap_s;
390 struct shmmap_entry *shmmap_se;
391 struct uvm_object *uobj;
392 struct vmspace *vm;
393 vaddr_t attach_va;
394 vm_prot_t prot;
395 vsize_t size;
396
397 /* Allocate a new map entry and set it */
398 shmmap_se = pool_get(&shmmap_entry_pool, PR_WAITOK);
399
400 mutex_enter(&shm_lock);
401 /* In case of reallocation, we will wait for completion */
402 while (__predict_false(shm_realloc_state))
403 cv_wait(&shm_realloc_cv, &shm_lock);
404
405 shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid));
406 if (shmseg == NULL) {
407 error = EINVAL;
408 goto err;
409 }
410 error = ipcperm(cred, &shmseg->shm_perm,
411 (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
412 if (error)
413 goto err;
414
415 vm = p->p_vmspace;
416 shmmap_s = (struct shmmap_state *)vm->vm_shm;
417 if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) {
418 error = EMFILE;
419 goto err;
420 }
421
422 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
423 prot = VM_PROT_READ;
424 if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0)
425 prot |= VM_PROT_WRITE;
426 if (SCARG(uap, shmaddr)) {
427 flags |= UVM_FLAG_FIXED;
428 if (SCARG(uap, shmflg) & SHM_RND)
429 attach_va =
430 (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1);
431 else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0)
432 attach_va = (vaddr_t)SCARG(uap, shmaddr);
433 else {
434 error = EINVAL;
435 goto err;
436 }
437 } else {
438 /* This is just a hint to uvm_mmap() about where to put it. */
439 attach_va = p->p_emul->e_vm_default_addr(p,
440 (vaddr_t)vm->vm_daddr, size);
441 }
442
443 /*
444 * Create a map entry, add it to the list and increase the counters.
445 * The lock will be dropped before the mapping, disable reallocation.
446 */
447 shmmap_s = shmmap_getprivate(p);
448 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
449 shmmap_s->nitems++;
450 shmseg->shm_lpid = p->p_pid;
451 shmseg->shm_nattch++;
452 shm_realloc_disable++;
453 mutex_exit(&shm_lock);
454
455 /*
456 * Add a reference to the memory object, map it to the
457 * address space, and lock the memory, if needed.
458 */
459 uobj = shmseg->_shm_internal;
460 uao_reference(uobj);
461 error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0,
462 UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags));
463 if (error)
464 goto err_detach;
465 if (shm_use_phys || (shmseg->shm_perm.mode & SHMSEG_WIRED)) {
466 error = uvm_map_pageable(&vm->vm_map, attach_va,
467 attach_va + size, false, 0);
468 if (error) {
469 if (error == EFAULT)
470 error = ENOMEM;
471 uvm_deallocate(&vm->vm_map, attach_va, size);
472 goto err_detach;
473 }
474 }
475
476 /* Set the new address, and update the time */
477 mutex_enter(&shm_lock);
478 shmmap_se->va = attach_va;
479 shmmap_se->shmid = SCARG(uap, shmid);
480 shmseg->shm_atime = time_second;
481 shm_realloc_disable--;
482 retval[0] = attach_va;
483 SHMPRINTF(("shmat: vm %p: add %d @%lx\n",
484 p->p_vmspace, shmmap_se->shmid, attach_va));
485 err:
486 cv_broadcast(&shm_realloc_cv);
487 mutex_exit(&shm_lock);
488 if (error && shmmap_se)
489 pool_put(&shmmap_entry_pool, shmmap_se);
490 return error;
491
492 err_detach:
493 uao_detach(uobj);
494 mutex_enter(&shm_lock);
495 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
496 shm_realloc_disable--;
497 cv_broadcast(&shm_realloc_cv);
498 mutex_exit(&shm_lock);
499 if (uobj != NULL)
500 uao_detach(uobj);
501 pool_put(&shmmap_entry_pool, shmmap_se);
502 return error;
503 }
504
505 /*
506 * Shared memory control operations.
507 */
508 int
509 sys___shmctl13(struct lwp *l, const struct sys___shmctl13_args *uap, register_t *retval)
510 {
511 /* {
512 syscallarg(int) shmid;
513 syscallarg(int) cmd;
514 syscallarg(struct shmid_ds *) buf;
515 } */
516 struct shmid_ds shmbuf;
517 int cmd, error;
518
519 cmd = SCARG(uap, cmd);
520 if (cmd == IPC_SET) {
521 error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf));
522 if (error)
523 return error;
524 }
525
526 error = shmctl1(l, SCARG(uap, shmid), cmd,
527 (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL);
528
529 if (error == 0 && cmd == IPC_STAT)
530 error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf));
531
532 return error;
533 }
534
535 int
536 shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
537 {
538 struct uvm_object *uobj = NULL;
539 kauth_cred_t cred = l->l_cred;
540 struct shmid_ds *shmseg;
541 int error = 0;
542
543 mutex_enter(&shm_lock);
544 /* In case of reallocation, we will wait for completion */
545 while (__predict_false(shm_realloc_state))
546 cv_wait(&shm_realloc_cv, &shm_lock);
547
548 shmseg = shm_find_segment_by_shmid(shmid);
549 if (shmseg == NULL) {
550 mutex_exit(&shm_lock);
551 return EINVAL;
552 }
553
554 switch (cmd) {
555 case IPC_STAT:
556 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0)
557 break;
558 memcpy(shmbuf, shmseg, sizeof(struct shmid_ds));
559 break;
560 case IPC_SET:
561 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
562 break;
563 shmseg->shm_perm.uid = shmbuf->shm_perm.uid;
564 shmseg->shm_perm.gid = shmbuf->shm_perm.gid;
565 shmseg->shm_perm.mode =
566 (shmseg->shm_perm.mode & ~ACCESSPERMS) |
567 (shmbuf->shm_perm.mode & ACCESSPERMS);
568 shmseg->shm_ctime = time_second;
569 break;
570 case IPC_RMID:
571 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
572 break;
573 shmseg->shm_perm._key = IPC_PRIVATE;
574 shmseg->shm_perm.mode |= SHMSEG_REMOVED;
575 if (shmseg->shm_nattch <= 0) {
576 uobj = shmseg->_shm_internal;
577 shm_free_segment(IPCID_TO_IX(shmid));
578 }
579 break;
580 case SHM_LOCK:
581 case SHM_UNLOCK:
582 if ((error = kauth_authorize_generic(cred,
583 KAUTH_GENERIC_ISSUSER, NULL)) != 0)
584 break;
585 error = shm_memlock(l, shmseg, shmid, cmd);
586 break;
587 default:
588 error = EINVAL;
589 }
590
591 mutex_exit(&shm_lock);
592 if (uobj != NULL)
593 uao_detach(uobj);
594 return error;
595 }
596
597 /*
598 * Try to take an already existing segment.
599 * => must be called with shm_lock held;
600 * => called from one place, thus, inline;
601 */
602 static inline int
603 shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode,
604 register_t *retval)
605 {
606 struct shmid_ds *shmseg;
607 kauth_cred_t cred = l->l_cred;
608 int segnum, error;
609 again:
610 KASSERT(mutex_owned(&shm_lock));
611
612 /* Find segment by key */
613 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
614 if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) &&
615 shmsegs[segnum].shm_perm._key == SCARG(uap, key))
616 break;
617 if (segnum == shminfo.shmmni) {
618 /* Not found */
619 return -1;
620 }
621
622 shmseg = &shmsegs[segnum];
623 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
624 /*
625 * This segment is in the process of being allocated. Wait
626 * until it's done, and look the key up again (in case the
627 * allocation failed or it was freed).
628 */
629 shmseg->shm_perm.mode |= SHMSEG_WANTED;
630 error = cv_wait_sig(&shm_cv[segnum], &shm_lock);
631 if (error)
632 return error;
633 goto again;
634 }
635
636 /* Check the permission, segment size and appropriate flag */
637 error = ipcperm(cred, &shmseg->shm_perm, mode);
638 if (error)
639 return error;
640 if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz)
641 return EINVAL;
642 if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) ==
643 (IPC_CREAT | IPC_EXCL))
644 return EEXIST;
645
646 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
647 return 0;
648 }
649
650 int
651 sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval)
652 {
653 /* {
654 syscallarg(key_t) key;
655 syscallarg(size_t) size;
656 syscallarg(int) shmflg;
657 } */
658 struct shmid_ds *shmseg;
659 kauth_cred_t cred = l->l_cred;
660 key_t key = SCARG(uap, key);
661 size_t size;
662 int error, mode, segnum;
663 bool lockmem;
664
665 mode = SCARG(uap, shmflg) & ACCESSPERMS;
666 if (SCARG(uap, shmflg) & _SHM_RMLINGER)
667 mode |= SHMSEG_RMLINGER;
668
669 SHMPRINTF(("shmget: key 0x%lx size 0x%x shmflg 0x%x mode 0x%x\n",
670 SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode));
671
672 mutex_enter(&shm_lock);
673 /* In case of reallocation, we will wait for completion */
674 while (__predict_false(shm_realloc_state))
675 cv_wait(&shm_realloc_cv, &shm_lock);
676
677 if (key != IPC_PRIVATE) {
678 error = shmget_existing(l, uap, mode, retval);
679 if (error != -1) {
680 mutex_exit(&shm_lock);
681 return error;
682 }
683 if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) {
684 mutex_exit(&shm_lock);
685 return ENOENT;
686 }
687 }
688 error = 0;
689
690 /*
691 * Check the for the limits.
692 */
693 size = SCARG(uap, size);
694 if (size < shminfo.shmmin || size > shminfo.shmmax) {
695 mutex_exit(&shm_lock);
696 return EINVAL;
697 }
698 if (shm_nused >= shminfo.shmmni) {
699 mutex_exit(&shm_lock);
700 return ENOSPC;
701 }
702 size = (size + PGOFSET) & ~PGOFSET;
703 if (shm_committed + btoc(size) > shminfo.shmall) {
704 mutex_exit(&shm_lock);
705 return ENOMEM;
706 }
707
708 /* Find the first available segment */
709 if (shm_last_free < 0) {
710 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
711 if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE)
712 break;
713 KASSERT(segnum < shminfo.shmmni);
714 } else {
715 segnum = shm_last_free;
716 shm_last_free = -1;
717 }
718
719 /*
720 * Initialize the segment.
721 * We will drop the lock while allocating the memory, thus mark the
722 * segment present, but removed, that no other thread could take it.
723 * Also, disable reallocation, while lock is dropped.
724 */
725 shmseg = &shmsegs[segnum];
726 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
727 shm_committed += btoc(size);
728 shm_nused++;
729 lockmem = shm_use_phys;
730 shm_realloc_disable++;
731 mutex_exit(&shm_lock);
732
733 /* Allocate the memory object and lock it if needed */
734 shmseg->_shm_internal = uao_create(size, 0);
735 if (lockmem) {
736 /* Wire the pages and tag it */
737 error = uobj_wirepages(shmseg->_shm_internal, 0,
738 round_page(shmseg->shm_segsz));
739 if (error) {
740 mutex_enter(&shm_lock);
741 shm_free_segment(segnum);
742 shm_realloc_disable--;
743 mutex_exit(&shm_lock);
744 return error;
745 }
746 }
747
748 /*
749 * Please note, while segment is marked, there are no need to hold the
750 * lock, while setting it (except shm_perm.mode).
751 */
752 shmseg->shm_perm._key = SCARG(uap, key);
753 shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff;
754 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
755
756 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred);
757 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred);
758 shmseg->shm_segsz = SCARG(uap, size);
759 shmseg->shm_cpid = l->l_proc->p_pid;
760 shmseg->shm_lpid = shmseg->shm_nattch = 0;
761 shmseg->shm_atime = shmseg->shm_dtime = 0;
762 shmseg->shm_ctime = time_second;
763
764 /*
765 * Segment is initialized.
766 * Enter the lock, mark as allocated, and notify waiters (if any).
767 * Also, unmark the state of reallocation.
768 */
769 mutex_enter(&shm_lock);
770 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
771 (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) |
772 SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0);
773 if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
774 shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
775 cv_broadcast(&shm_cv[segnum]);
776 }
777 shm_realloc_disable--;
778 cv_broadcast(&shm_realloc_cv);
779 mutex_exit(&shm_lock);
780
781 return error;
782 }
783
784 void
785 shmfork(struct vmspace *vm1, struct vmspace *vm2)
786 {
787 struct shmmap_state *shmmap_s;
788 struct shmmap_entry *shmmap_se;
789
790 SHMPRINTF(("shmfork %p->%p\n", vm1, vm2));
791 mutex_enter(&shm_lock);
792 vm2->vm_shm = vm1->vm_shm;
793 if (vm1->vm_shm) {
794 shmmap_s = (struct shmmap_state *)vm1->vm_shm;
795 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
796 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++;
797 shmmap_s->nrefs++;
798 }
799 mutex_exit(&shm_lock);
800 }
801
802 void
803 shmexit(struct vmspace *vm)
804 {
805 struct shmmap_state *shmmap_s;
806 struct shmmap_entry *shmmap_se;
807 struct uvm_object **uobj;
808 size_t *size;
809 u_int i, n;
810
811 SLIST_HEAD(, shmmap_entry) tmp_entries;
812
813 mutex_enter(&shm_lock);
814 shmmap_s = (struct shmmap_state *)vm->vm_shm;
815 if (shmmap_s == NULL) {
816 mutex_exit(&shm_lock);
817 return;
818 }
819
820 vm->vm_shm = NULL;
821
822 if (--shmmap_s->nrefs > 0) {
823 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n",
824 vm, shmmap_s->nitems, shmmap_s->nrefs));
825 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
826 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--;
827 mutex_exit(&shm_lock);
828 return;
829 }
830
831 KASSERT(shmmap_s->nrefs == 0);
832 n = shmmap_s->nitems;
833 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, n));
834 mutex_exit(&shm_lock);
835 if (n == 0) {
836 kmem_free(shmmap_s, sizeof(struct shmmap_state));
837 return;
838 }
839
840 /* Allocate the arrays */
841 SLIST_INIT(&tmp_entries);
842 uobj = kmem_zalloc(n * sizeof(void *), KM_SLEEP);
843 size = kmem_zalloc(n * sizeof(size_t), KM_SLEEP);
844
845 /* Delete the entry from shm map */
846 i = 0;
847 mutex_enter(&shm_lock);
848 while (!SLIST_EMPTY(&shmmap_s->entries)) {
849 struct shmid_ds *shmseg;
850
851 shmmap_se = SLIST_FIRST(&shmmap_s->entries);
852 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
853 size[i] = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
854 uobj[i] = shm_delete_mapping(shmmap_s, shmmap_se);
855 SLIST_INSERT_HEAD(&tmp_entries, shmmap_se, next);
856 i++;
857 }
858 mutex_exit(&shm_lock);
859
860 /* Unmap all segments, free the entries */
861 i = 0;
862 while (!SLIST_EMPTY(&tmp_entries)) {
863 KASSERT(i < n);
864 shmmap_se = SLIST_FIRST(&tmp_entries);
865 SLIST_REMOVE(&tmp_entries, shmmap_se, shmmap_entry, next);
866 uvm_deallocate(&vm->vm_map, shmmap_se->va, size[i]);
867 if (uobj[i] != NULL)
868 uao_detach(uobj[i]);
869 pool_put(&shmmap_entry_pool, shmmap_se);
870 i++;
871 }
872
873 kmem_free(uobj, n * sizeof(void *));
874 kmem_free(size, n * sizeof(size_t));
875 kmem_free(shmmap_s, sizeof(struct shmmap_state));
876 }
877
878 static int
879 shmrealloc(int newshmni)
880 {
881 vaddr_t v;
882 struct shmid_ds *oldshmsegs, *newshmsegs;
883 kcondvar_t *newshm_cv;
884 size_t sz;
885 int i, lsegid;
886
887 if (newshmni < 1)
888 return EINVAL;
889
890 /* Allocate new memory area */
891 sz = ALIGN(newshmni * sizeof(struct shmid_ds)) +
892 ALIGN(newshmni * sizeof(kcondvar_t));
893 v = uvm_km_alloc(kernel_map, round_page(sz), 0,
894 UVM_KMF_WIRED|UVM_KMF_ZERO);
895 if (v == 0)
896 return ENOMEM;
897
898 mutex_enter(&shm_lock);
899 while (shm_realloc_state || shm_realloc_disable)
900 cv_wait(&shm_realloc_cv, &shm_lock);
901
902 /*
903 * Get the number of last segment. Fail we are trying to
904 * reallocate less memory than we use.
905 */
906 lsegid = 0;
907 for (i = 0; i < shminfo.shmmni; i++)
908 if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0)
909 lsegid = i;
910 if (lsegid >= newshmni) {
911 mutex_exit(&shm_lock);
912 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
913 return EBUSY;
914 }
915 shm_realloc_state = true;
916
917 newshmsegs = (void *)v;
918 newshm_cv = (void *)(ALIGN(newshmsegs) +
919 newshmni * sizeof(struct shmid_ds));
920
921 /* Copy all memory to the new area */
922 for (i = 0; i < shm_nused; i++)
923 (void)memcpy(&newshmsegs[i], &shmsegs[i],
924 sizeof(newshmsegs[0]));
925
926 /* Mark as free all new segments, if there is any */
927 for (; i < newshmni; i++) {
928 cv_init(&newshm_cv[i], "shmwait");
929 newshmsegs[i].shm_perm.mode = SHMSEG_FREE;
930 newshmsegs[i].shm_perm._seq = 0;
931 }
932
933 oldshmsegs = shmsegs;
934 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
935 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
936
937 shminfo.shmmni = newshmni;
938 shmsegs = newshmsegs;
939 shm_cv = newshm_cv;
940
941 /* Reallocation completed - notify all waiters, if any */
942 shm_realloc_state = false;
943 cv_broadcast(&shm_realloc_cv);
944 mutex_exit(&shm_lock);
945
946 uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED);
947 return 0;
948 }
949
950 void
951 shminit(void)
952 {
953 vaddr_t v;
954 size_t sz;
955 int i;
956
957 mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE);
958 pool_init(&shmmap_entry_pool, sizeof(struct shmmap_entry), 0, 0, 0,
959 "shmmp", &pool_allocator_nointr, IPL_NONE);
960 cv_init(&shm_realloc_cv, "shmrealc");
961
962 /* Allocate the wired memory for our structures */
963 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
964 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
965 v = uvm_km_alloc(kernel_map, round_page(sz), 0,
966 UVM_KMF_WIRED|UVM_KMF_ZERO);
967 if (v == 0)
968 panic("sysv_shm: cannot allocate memory");
969 shmsegs = (void *)v;
970 shm_cv = (void *)(ALIGN(shmsegs) +
971 shminfo.shmmni * sizeof(struct shmid_ds));
972
973 shminfo.shmmax *= PAGE_SIZE;
974
975 for (i = 0; i < shminfo.shmmni; i++) {
976 cv_init(&shm_cv[i], "shmwait");
977 shmsegs[i].shm_perm.mode = SHMSEG_FREE;
978 shmsegs[i].shm_perm._seq = 0;
979 }
980 shm_last_free = 0;
981 shm_nused = 0;
982 shm_committed = 0;
983 shm_realloc_disable = 0;
984 shm_realloc_state = false;
985 }
986
987 static int
988 sysctl_ipc_shmmni(SYSCTLFN_ARGS)
989 {
990 int newsize, error;
991 struct sysctlnode node;
992 node = *rnode;
993 node.sysctl_data = &newsize;
994
995 newsize = shminfo.shmmni;
996 error = sysctl_lookup(SYSCTLFN_CALL(&node));
997 if (error || newp == NULL)
998 return error;
999
1000 sysctl_unlock();
1001 error = shmrealloc(newsize);
1002 sysctl_relock();
1003 return error;
1004 }
1005
1006 static int
1007 sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
1008 {
1009 int newsize, error;
1010 struct sysctlnode node;
1011 node = *rnode;
1012 node.sysctl_data = &newsize;
1013
1014 newsize = shminfo.shmall;
1015 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1016 if (error || newp == NULL)
1017 return error;
1018
1019 if (newsize < 1)
1020 return EINVAL;
1021
1022 shminfo.shmall = newsize;
1023 shminfo.shmmax = shminfo.shmall * PAGE_SIZE;
1024
1025 return 0;
1026 }
1027
1028 SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup")
1029 {
1030
1031 sysctl_createv(clog, 0, NULL, NULL,
1032 CTLFLAG_PERMANENT,
1033 CTLTYPE_NODE, "kern", NULL,
1034 NULL, 0, NULL, 0,
1035 CTL_KERN, CTL_EOL);
1036 sysctl_createv(clog, 0, NULL, NULL,
1037 CTLFLAG_PERMANENT,
1038 CTLTYPE_NODE, "ipc",
1039 SYSCTL_DESCR("SysV IPC options"),
1040 NULL, 0, NULL, 0,
1041 CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1042 sysctl_createv(clog, 0, NULL, NULL,
1043 CTLFLAG_PERMANENT | CTLFLAG_READONLY,
1044 CTLTYPE_INT, "shmmax",
1045 SYSCTL_DESCR("Max shared memory segment size in bytes"),
1046 NULL, 0, &shminfo.shmmax, 0,
1047 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL);
1048 sysctl_createv(clog, 0, NULL, NULL,
1049 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1050 CTLTYPE_INT, "shmmni",
1051 SYSCTL_DESCR("Max number of shared memory identifiers"),
1052 sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0,
1053 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL);
1054 sysctl_createv(clog, 0, NULL, NULL,
1055 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1056 CTLTYPE_INT, "shmseg",
1057 SYSCTL_DESCR("Max shared memory segments per process"),
1058 NULL, 0, &shminfo.shmseg, 0,
1059 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL);
1060 sysctl_createv(clog, 0, NULL, NULL,
1061 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1062 CTLTYPE_INT, "shmmaxpgs",
1063 SYSCTL_DESCR("Max amount of shared memory in pages"),
1064 sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0,
1065 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL);
1066 sysctl_createv(clog, 0, NULL, NULL,
1067 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1068 CTLTYPE_INT, "shm_use_phys",
1069 SYSCTL_DESCR("Enable/disable locking of shared memory in "
1070 "physical memory"), NULL, 0, &shm_use_phys, 0,
1071 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL);
1072 }
1073