sysv_shm.c revision 1.128 1 /* $NetBSD: sysv_shm.c,v 1.128 2015/05/13 01:16:15 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by Adam Glass and Charles M.
47 * Hannum.
48 * 4. The names of the authors may not be used to endorse or promote products
49 * derived from this software without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
52 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
53 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
54 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
55 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
57 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
58 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
59 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
60 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.128 2015/05/13 01:16:15 pgoyette Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_sysv.h"
68 #endif
69
70 #include <sys/param.h>
71 #include <sys/kernel.h>
72 #include <sys/kmem.h>
73 #include <sys/shm.h>
74 #include <sys/mutex.h>
75 #include <sys/mman.h>
76 #include <sys/stat.h>
77 #include <sys/sysctl.h>
78 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
79 #include <sys/syscallargs.h>
80 #include <sys/queue.h>
81 #include <sys/kauth.h>
82
83 #include <uvm/uvm_extern.h>
84 #include <uvm/uvm_object.h>
85
86 struct shmmap_entry {
87 SLIST_ENTRY(shmmap_entry) next;
88 vaddr_t va;
89 int shmid;
90 };
91
92 int shm_nused __cacheline_aligned;
93 struct shmid_ds * shmsegs __read_mostly;
94
95 static kmutex_t shm_lock __cacheline_aligned;
96 static kcondvar_t * shm_cv __cacheline_aligned;
97 static int shm_last_free __cacheline_aligned;
98 static size_t shm_committed __cacheline_aligned;
99 static int shm_use_phys __read_mostly;
100
101 static kcondvar_t shm_realloc_cv;
102 static bool shm_realloc_state;
103 static u_int shm_realloc_disable;
104
105 struct shmmap_state {
106 unsigned int nitems;
107 unsigned int nrefs;
108 SLIST_HEAD(, shmmap_entry) entries;
109 };
110
111 extern int kern_has_sysvshm;
112
113 #ifdef SHMDEBUG
114 #define SHMPRINTF(a) printf a
115 #else
116 #define SHMPRINTF(a)
117 #endif
118
119 static int shmrealloc(int);
120
121 /*
122 * Find the shared memory segment by the identifier.
123 * => must be called with shm_lock held;
124 */
125 static struct shmid_ds *
126 shm_find_segment_by_shmid(int shmid)
127 {
128 int segnum;
129 struct shmid_ds *shmseg;
130
131 KASSERT(mutex_owned(&shm_lock));
132
133 segnum = IPCID_TO_IX(shmid);
134 if (segnum < 0 || segnum >= shminfo.shmmni)
135 return NULL;
136 shmseg = &shmsegs[segnum];
137 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0)
138 return NULL;
139 if ((shmseg->shm_perm.mode &
140 (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED)
141 return NULL;
142 if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid))
143 return NULL;
144
145 return shmseg;
146 }
147
148 /*
149 * Free memory segment.
150 * => must be called with shm_lock held;
151 */
152 static void
153 shm_free_segment(int segnum)
154 {
155 struct shmid_ds *shmseg;
156 size_t size;
157 bool wanted;
158
159 KASSERT(mutex_owned(&shm_lock));
160
161 shmseg = &shmsegs[segnum];
162 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n",
163 shmseg->shm_perm._key, shmseg->shm_perm._seq));
164
165 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
166 wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED);
167
168 shmseg->_shm_internal = NULL;
169 shm_committed -= btoc(size);
170 shm_nused--;
171 shmseg->shm_perm.mode = SHMSEG_FREE;
172 shm_last_free = segnum;
173 if (wanted == true)
174 cv_broadcast(&shm_cv[segnum]);
175 }
176
177 /*
178 * Delete entry from the shm map.
179 * => must be called with shm_lock held;
180 */
181 static struct uvm_object *
182 shm_delete_mapping(struct shmmap_state *shmmap_s,
183 struct shmmap_entry *shmmap_se)
184 {
185 struct uvm_object *uobj = NULL;
186 struct shmid_ds *shmseg;
187 int segnum;
188
189 KASSERT(mutex_owned(&shm_lock));
190
191 segnum = IPCID_TO_IX(shmmap_se->shmid);
192 shmseg = &shmsegs[segnum];
193 SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next);
194 shmmap_s->nitems--;
195 shmseg->shm_dtime = time_second;
196 if ((--shmseg->shm_nattch <= 0) &&
197 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
198 uobj = shmseg->_shm_internal;
199 shm_free_segment(segnum);
200 }
201
202 return uobj;
203 }
204
205 /*
206 * Get a non-shared shm map for that vmspace. Note, that memory
207 * allocation might be performed with lock held.
208 */
209 static struct shmmap_state *
210 shmmap_getprivate(struct proc *p)
211 {
212 struct shmmap_state *oshmmap_s, *shmmap_s;
213 struct shmmap_entry *oshmmap_se, *shmmap_se;
214
215 KASSERT(mutex_owned(&shm_lock));
216
217 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */
218 oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
219 if (oshmmap_s && oshmmap_s->nrefs == 1)
220 return oshmmap_s;
221
222 /* 2. No shm map preset - create a fresh one */
223 shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP);
224 shmmap_s->nrefs = 1;
225 SLIST_INIT(&shmmap_s->entries);
226 p->p_vmspace->vm_shm = (void *)shmmap_s;
227
228 if (oshmmap_s == NULL)
229 return shmmap_s;
230
231 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n",
232 p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs));
233
234 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */
235 SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) {
236 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
237 shmmap_se->va = oshmmap_se->va;
238 shmmap_se->shmid = oshmmap_se->shmid;
239 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
240 }
241 shmmap_s->nitems = oshmmap_s->nitems;
242 oshmmap_s->nrefs--;
243
244 return shmmap_s;
245 }
246
247 /*
248 * Lock/unlock the memory.
249 * => must be called with shm_lock held;
250 * => called from one place, thus, inline;
251 */
252 static inline int
253 shm_memlock(struct lwp *l, struct shmid_ds *shmseg, int shmid, int cmd)
254 {
255 struct proc *p = l->l_proc;
256 struct shmmap_entry *shmmap_se;
257 struct shmmap_state *shmmap_s;
258 size_t size;
259 int error;
260
261 KASSERT(mutex_owned(&shm_lock));
262 shmmap_s = shmmap_getprivate(p);
263
264 /* Find our shared memory address by shmid */
265 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
266 if (shmmap_se->shmid != shmid)
267 continue;
268
269 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
270
271 if (cmd == SHM_LOCK &&
272 (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) {
273 /* Wire the object and map, then tag it */
274 error = uvm_obj_wirepages(shmseg->_shm_internal,
275 0, size, NULL);
276 if (error)
277 return EIO;
278 error = uvm_map_pageable(&p->p_vmspace->vm_map,
279 shmmap_se->va, shmmap_se->va + size, false, 0);
280 if (error) {
281 uvm_obj_unwirepages(shmseg->_shm_internal,
282 0, size);
283 if (error == EFAULT)
284 error = ENOMEM;
285 return error;
286 }
287 shmseg->shm_perm.mode |= SHMSEG_WIRED;
288
289 } else if (cmd == SHM_UNLOCK &&
290 (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) {
291 /* Unwire the object and map, then untag it */
292 uvm_obj_unwirepages(shmseg->_shm_internal, 0, size);
293 error = uvm_map_pageable(&p->p_vmspace->vm_map,
294 shmmap_se->va, shmmap_se->va + size, true, 0);
295 if (error)
296 return EIO;
297 shmseg->shm_perm.mode &= ~SHMSEG_WIRED;
298 }
299 }
300
301 return 0;
302 }
303
304 /*
305 * Unmap shared memory.
306 */
307 int
308 sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval)
309 {
310 /* {
311 syscallarg(const void *) shmaddr;
312 } */
313 struct proc *p = l->l_proc;
314 struct shmmap_state *shmmap_s1, *shmmap_s;
315 struct shmmap_entry *shmmap_se;
316 struct uvm_object *uobj;
317 struct shmid_ds *shmseg;
318 size_t size;
319
320 mutex_enter(&shm_lock);
321 /* In case of reallocation, we will wait for completion */
322 while (__predict_false(shm_realloc_state))
323 cv_wait(&shm_realloc_cv, &shm_lock);
324
325 shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm;
326 if (shmmap_s1 == NULL) {
327 mutex_exit(&shm_lock);
328 return EINVAL;
329 }
330
331 /* Find the map entry */
332 SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next)
333 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
334 break;
335 if (shmmap_se == NULL) {
336 mutex_exit(&shm_lock);
337 return EINVAL;
338 }
339
340 shmmap_s = shmmap_getprivate(p);
341 if (shmmap_s != shmmap_s1) {
342 /* Map has been copied, lookup entry in new map */
343 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
344 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
345 break;
346 if (shmmap_se == NULL) {
347 mutex_exit(&shm_lock);
348 return EINVAL;
349 }
350 }
351
352 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n",
353 p->p_vmspace, shmmap_se->shmid, shmmap_se->va));
354
355 /* Delete the entry from shm map */
356 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
357 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
358 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
359 mutex_exit(&shm_lock);
360
361 uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size);
362 if (uobj != NULL) {
363 uao_detach(uobj);
364 }
365 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
366
367 return 0;
368 }
369
370 /*
371 * Map shared memory.
372 */
373 int
374 sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval)
375 {
376 /* {
377 syscallarg(int) shmid;
378 syscallarg(const void *) shmaddr;
379 syscallarg(int) shmflg;
380 } */
381 int error, flags = 0;
382 struct proc *p = l->l_proc;
383 kauth_cred_t cred = l->l_cred;
384 struct shmid_ds *shmseg;
385 struct shmmap_state *shmmap_s;
386 struct shmmap_entry *shmmap_se;
387 struct uvm_object *uobj;
388 struct vmspace *vm;
389 vaddr_t attach_va;
390 vm_prot_t prot;
391 vsize_t size;
392
393 /* Allocate a new map entry and set it */
394 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
395 shmmap_se->shmid = SCARG(uap, shmid);
396
397 mutex_enter(&shm_lock);
398 /* In case of reallocation, we will wait for completion */
399 while (__predict_false(shm_realloc_state))
400 cv_wait(&shm_realloc_cv, &shm_lock);
401
402 shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid));
403 if (shmseg == NULL) {
404 error = EINVAL;
405 goto err;
406 }
407 error = ipcperm(cred, &shmseg->shm_perm,
408 (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
409 if (error)
410 goto err;
411
412 vm = p->p_vmspace;
413 shmmap_s = (struct shmmap_state *)vm->vm_shm;
414 if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) {
415 error = EMFILE;
416 goto err;
417 }
418
419 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
420 prot = VM_PROT_READ;
421 if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0)
422 prot |= VM_PROT_WRITE;
423 if (SCARG(uap, shmaddr)) {
424 flags |= UVM_FLAG_FIXED;
425 if (SCARG(uap, shmflg) & SHM_RND)
426 attach_va =
427 (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1);
428 else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0)
429 attach_va = (vaddr_t)SCARG(uap, shmaddr);
430 else {
431 error = EINVAL;
432 goto err;
433 }
434 } else {
435 /* This is just a hint to uvm_map() about where to put it. */
436 attach_va = p->p_emul->e_vm_default_addr(p,
437 (vaddr_t)vm->vm_daddr, size);
438 }
439
440 /*
441 * Create a map entry, add it to the list and increase the counters.
442 * The lock will be dropped before the mapping, disable reallocation.
443 */
444 shmmap_s = shmmap_getprivate(p);
445 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
446 shmmap_s->nitems++;
447 shmseg->shm_lpid = p->p_pid;
448 shmseg->shm_nattch++;
449 shm_realloc_disable++;
450 mutex_exit(&shm_lock);
451
452 /*
453 * Add a reference to the memory object, map it to the
454 * address space, and lock the memory, if needed.
455 */
456 uobj = shmseg->_shm_internal;
457 uao_reference(uobj);
458 error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0,
459 UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags));
460 if (error)
461 goto err_detach;
462 if (shm_use_phys || (shmseg->shm_perm.mode & SHMSEG_WIRED)) {
463 error = uvm_map_pageable(&vm->vm_map, attach_va,
464 attach_va + size, false, 0);
465 if (error) {
466 if (error == EFAULT)
467 error = ENOMEM;
468 uvm_deallocate(&vm->vm_map, attach_va, size);
469 goto err_detach;
470 }
471 }
472
473 /* Set the new address, and update the time */
474 mutex_enter(&shm_lock);
475 shmmap_se->va = attach_va;
476 shmseg->shm_atime = time_second;
477 shm_realloc_disable--;
478 retval[0] = attach_va;
479 SHMPRINTF(("shmat: vm %p: add %d @%lx\n",
480 p->p_vmspace, shmmap_se->shmid, attach_va));
481 err:
482 cv_broadcast(&shm_realloc_cv);
483 mutex_exit(&shm_lock);
484 if (error && shmmap_se) {
485 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
486 }
487 return error;
488
489 err_detach:
490 uao_detach(uobj);
491 mutex_enter(&shm_lock);
492 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
493 shm_realloc_disable--;
494 cv_broadcast(&shm_realloc_cv);
495 mutex_exit(&shm_lock);
496 if (uobj != NULL) {
497 uao_detach(uobj);
498 }
499 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
500 return error;
501 }
502
503 /*
504 * Shared memory control operations.
505 */
506 int
507 sys___shmctl50(struct lwp *l, const struct sys___shmctl50_args *uap,
508 register_t *retval)
509 {
510 /* {
511 syscallarg(int) shmid;
512 syscallarg(int) cmd;
513 syscallarg(struct shmid_ds *) buf;
514 } */
515 struct shmid_ds shmbuf;
516 int cmd, error;
517
518 cmd = SCARG(uap, cmd);
519 if (cmd == IPC_SET) {
520 error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf));
521 if (error)
522 return error;
523 }
524
525 error = shmctl1(l, SCARG(uap, shmid), cmd,
526 (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL);
527
528 if (error == 0 && cmd == IPC_STAT)
529 error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf));
530
531 return error;
532 }
533
534 int
535 shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
536 {
537 struct uvm_object *uobj = NULL;
538 kauth_cred_t cred = l->l_cred;
539 struct shmid_ds *shmseg;
540 int error = 0;
541
542 mutex_enter(&shm_lock);
543 /* In case of reallocation, we will wait for completion */
544 while (__predict_false(shm_realloc_state))
545 cv_wait(&shm_realloc_cv, &shm_lock);
546
547 shmseg = shm_find_segment_by_shmid(shmid);
548 if (shmseg == NULL) {
549 mutex_exit(&shm_lock);
550 return EINVAL;
551 }
552
553 switch (cmd) {
554 case IPC_STAT:
555 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0)
556 break;
557 memcpy(shmbuf, shmseg, sizeof(struct shmid_ds));
558 break;
559 case IPC_SET:
560 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
561 break;
562 shmseg->shm_perm.uid = shmbuf->shm_perm.uid;
563 shmseg->shm_perm.gid = shmbuf->shm_perm.gid;
564 shmseg->shm_perm.mode =
565 (shmseg->shm_perm.mode & ~ACCESSPERMS) |
566 (shmbuf->shm_perm.mode & ACCESSPERMS);
567 shmseg->shm_ctime = time_second;
568 break;
569 case IPC_RMID:
570 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
571 break;
572 shmseg->shm_perm._key = IPC_PRIVATE;
573 shmseg->shm_perm.mode |= SHMSEG_REMOVED;
574 if (shmseg->shm_nattch <= 0) {
575 uobj = shmseg->_shm_internal;
576 shm_free_segment(IPCID_TO_IX(shmid));
577 }
578 break;
579 case SHM_LOCK:
580 case SHM_UNLOCK:
581 if ((error = kauth_authorize_system(cred,
582 KAUTH_SYSTEM_SYSVIPC,
583 (cmd == SHM_LOCK) ? KAUTH_REQ_SYSTEM_SYSVIPC_SHM_LOCK :
584 KAUTH_REQ_SYSTEM_SYSVIPC_SHM_UNLOCK, NULL, NULL, NULL)) != 0)
585 break;
586 error = shm_memlock(l, shmseg, shmid, cmd);
587 break;
588 default:
589 error = EINVAL;
590 }
591
592 mutex_exit(&shm_lock);
593 if (uobj != NULL)
594 uao_detach(uobj);
595 return error;
596 }
597
598 /*
599 * Try to take an already existing segment.
600 * => must be called with shm_lock held;
601 * => called from one place, thus, inline;
602 */
603 static inline int
604 shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode,
605 register_t *retval)
606 {
607 struct shmid_ds *shmseg;
608 kauth_cred_t cred = l->l_cred;
609 int segnum, error;
610 again:
611 KASSERT(mutex_owned(&shm_lock));
612
613 /* Find segment by key */
614 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
615 if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) &&
616 shmsegs[segnum].shm_perm._key == SCARG(uap, key))
617 break;
618 if (segnum == shminfo.shmmni) {
619 /* Not found */
620 return -1;
621 }
622
623 shmseg = &shmsegs[segnum];
624 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
625 /*
626 * This segment is in the process of being allocated. Wait
627 * until it's done, and look the key up again (in case the
628 * allocation failed or it was freed).
629 */
630 shmseg->shm_perm.mode |= SHMSEG_WANTED;
631 error = cv_wait_sig(&shm_cv[segnum], &shm_lock);
632 if (error)
633 return error;
634 goto again;
635 }
636
637 /*
638 * First check the flags, to generate a useful error when a
639 * segment already exists.
640 */
641 if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) ==
642 (IPC_CREAT | IPC_EXCL))
643 return EEXIST;
644
645 /* Check the permission and segment size. */
646 error = ipcperm(cred, &shmseg->shm_perm, mode);
647 if (error)
648 return error;
649 if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz)
650 return EINVAL;
651
652 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
653 return 0;
654 }
655
656 int
657 sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval)
658 {
659 /* {
660 syscallarg(key_t) key;
661 syscallarg(size_t) size;
662 syscallarg(int) shmflg;
663 } */
664 struct shmid_ds *shmseg;
665 kauth_cred_t cred = l->l_cred;
666 key_t key = SCARG(uap, key);
667 size_t size;
668 int error, mode, segnum;
669 bool lockmem;
670
671 mode = SCARG(uap, shmflg) & ACCESSPERMS;
672 if (SCARG(uap, shmflg) & _SHM_RMLINGER)
673 mode |= SHMSEG_RMLINGER;
674
675 SHMPRINTF(("shmget: key 0x%lx size 0x%zx shmflg 0x%x mode 0x%x\n",
676 SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode));
677
678 mutex_enter(&shm_lock);
679 /* In case of reallocation, we will wait for completion */
680 while (__predict_false(shm_realloc_state))
681 cv_wait(&shm_realloc_cv, &shm_lock);
682
683 if (key != IPC_PRIVATE) {
684 error = shmget_existing(l, uap, mode, retval);
685 if (error != -1) {
686 mutex_exit(&shm_lock);
687 return error;
688 }
689 if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) {
690 mutex_exit(&shm_lock);
691 return ENOENT;
692 }
693 }
694 error = 0;
695
696 /*
697 * Check the for the limits.
698 */
699 size = SCARG(uap, size);
700 if (size < shminfo.shmmin || size > shminfo.shmmax) {
701 mutex_exit(&shm_lock);
702 return EINVAL;
703 }
704 if (shm_nused >= shminfo.shmmni) {
705 mutex_exit(&shm_lock);
706 return ENOSPC;
707 }
708 size = (size + PGOFSET) & ~PGOFSET;
709 if (shm_committed + btoc(size) > shminfo.shmall) {
710 mutex_exit(&shm_lock);
711 return ENOMEM;
712 }
713
714 /* Find the first available segment */
715 if (shm_last_free < 0) {
716 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
717 if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE)
718 break;
719 KASSERT(segnum < shminfo.shmmni);
720 } else {
721 segnum = shm_last_free;
722 shm_last_free = -1;
723 }
724
725 /*
726 * Initialize the segment.
727 * We will drop the lock while allocating the memory, thus mark the
728 * segment present, but removed, that no other thread could take it.
729 * Also, disable reallocation, while lock is dropped.
730 */
731 shmseg = &shmsegs[segnum];
732 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
733 shm_committed += btoc(size);
734 shm_nused++;
735 lockmem = shm_use_phys;
736 shm_realloc_disable++;
737 mutex_exit(&shm_lock);
738
739 /* Allocate the memory object and lock it if needed */
740 shmseg->_shm_internal = uao_create(size, 0);
741 if (lockmem) {
742 /* Wire the pages and tag it */
743 error = uvm_obj_wirepages(shmseg->_shm_internal, 0, size, NULL);
744 if (error) {
745 uao_detach(shmseg->_shm_internal);
746 mutex_enter(&shm_lock);
747 shm_free_segment(segnum);
748 shm_realloc_disable--;
749 mutex_exit(&shm_lock);
750 return error;
751 }
752 }
753
754 /*
755 * Please note, while segment is marked, there are no need to hold the
756 * lock, while setting it (except shm_perm.mode).
757 */
758 shmseg->shm_perm._key = SCARG(uap, key);
759 shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff;
760 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
761
762 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred);
763 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred);
764 shmseg->shm_segsz = SCARG(uap, size);
765 shmseg->shm_cpid = l->l_proc->p_pid;
766 shmseg->shm_lpid = shmseg->shm_nattch = 0;
767 shmseg->shm_atime = shmseg->shm_dtime = 0;
768 shmseg->shm_ctime = time_second;
769
770 /*
771 * Segment is initialized.
772 * Enter the lock, mark as allocated, and notify waiters (if any).
773 * Also, unmark the state of reallocation.
774 */
775 mutex_enter(&shm_lock);
776 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
777 (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) |
778 SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0);
779 if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
780 shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
781 cv_broadcast(&shm_cv[segnum]);
782 }
783 shm_realloc_disable--;
784 cv_broadcast(&shm_realloc_cv);
785 mutex_exit(&shm_lock);
786
787 return error;
788 }
789
790 void
791 shmfork(struct vmspace *vm1, struct vmspace *vm2)
792 {
793 struct shmmap_state *shmmap_s;
794 struct shmmap_entry *shmmap_se;
795
796 SHMPRINTF(("shmfork %p->%p\n", vm1, vm2));
797 mutex_enter(&shm_lock);
798 vm2->vm_shm = vm1->vm_shm;
799 if (vm1->vm_shm) {
800 shmmap_s = (struct shmmap_state *)vm1->vm_shm;
801 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
802 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++;
803 shmmap_s->nrefs++;
804 }
805 mutex_exit(&shm_lock);
806 }
807
808 void
809 shmexit(struct vmspace *vm)
810 {
811 struct shmmap_state *shmmap_s;
812 struct shmmap_entry *shmmap_se;
813
814 mutex_enter(&shm_lock);
815 shmmap_s = (struct shmmap_state *)vm->vm_shm;
816 if (shmmap_s == NULL) {
817 mutex_exit(&shm_lock);
818 return;
819 }
820 vm->vm_shm = NULL;
821
822 if (--shmmap_s->nrefs > 0) {
823 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n",
824 vm, shmmap_s->nitems, shmmap_s->nrefs));
825 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
826 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--;
827 }
828 mutex_exit(&shm_lock);
829 return;
830 }
831
832 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems));
833 if (shmmap_s->nitems == 0) {
834 mutex_exit(&shm_lock);
835 kmem_free(shmmap_s, sizeof(struct shmmap_state));
836 return;
837 }
838
839 /*
840 * Delete the entry from shm map.
841 */
842 for (;;) {
843 struct shmid_ds *shmseg;
844 struct uvm_object *uobj;
845 size_t sz;
846
847 shmmap_se = SLIST_FIRST(&shmmap_s->entries);
848 KASSERT(shmmap_se != NULL);
849
850 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
851 sz = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
852 /* shm_delete_mapping() removes from the list. */
853 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
854 mutex_exit(&shm_lock);
855
856 uvm_deallocate(&vm->vm_map, shmmap_se->va, sz);
857 if (uobj != NULL) {
858 uao_detach(uobj);
859 }
860 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
861
862 if (SLIST_EMPTY(&shmmap_s->entries)) {
863 break;
864 }
865 mutex_enter(&shm_lock);
866 KASSERT(!SLIST_EMPTY(&shmmap_s->entries));
867 }
868 kmem_free(shmmap_s, sizeof(struct shmmap_state));
869 }
870
871 static int
872 shmrealloc(int newshmni)
873 {
874 vaddr_t v;
875 struct shmid_ds *oldshmsegs, *newshmsegs;
876 kcondvar_t *newshm_cv, *oldshm_cv;
877 size_t sz;
878 int i, lsegid, oldshmni;
879
880 if (newshmni < 1)
881 return EINVAL;
882
883 /* Allocate new memory area */
884 sz = ALIGN(newshmni * sizeof(struct shmid_ds)) +
885 ALIGN(newshmni * sizeof(kcondvar_t));
886 sz = round_page(sz);
887 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
888 if (v == 0)
889 return ENOMEM;
890
891 mutex_enter(&shm_lock);
892 while (shm_realloc_state || shm_realloc_disable)
893 cv_wait(&shm_realloc_cv, &shm_lock);
894
895 /*
896 * Get the number of last segment. Fail we are trying to
897 * reallocate less memory than we use.
898 */
899 lsegid = 0;
900 for (i = 0; i < shminfo.shmmni; i++)
901 if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0)
902 lsegid = i;
903 if (lsegid >= newshmni) {
904 mutex_exit(&shm_lock);
905 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
906 return EBUSY;
907 }
908 shm_realloc_state = true;
909
910 newshmsegs = (void *)v;
911 newshm_cv = (void *)((uintptr_t)newshmsegs +
912 ALIGN(newshmni * sizeof(struct shmid_ds)));
913
914 /* Copy all memory to the new area */
915 for (i = 0; i < shm_nused; i++) {
916 cv_init(&newshm_cv[i], "shmwait");
917 (void)memcpy(&newshmsegs[i], &shmsegs[i],
918 sizeof(newshmsegs[0]));
919 }
920
921 /* Mark as free all new segments, if there is any */
922 for (; i < newshmni; i++) {
923 cv_init(&newshm_cv[i], "shmwait");
924 newshmsegs[i].shm_perm.mode = SHMSEG_FREE;
925 newshmsegs[i].shm_perm._seq = 0;
926 }
927
928 oldshmsegs = shmsegs;
929 oldshmni = shminfo.shmmni;
930 shminfo.shmmni = newshmni;
931 shmsegs = newshmsegs;
932 shm_cv = newshm_cv;
933
934 /* Reallocation completed - notify all waiters, if any */
935 shm_realloc_state = false;
936 cv_broadcast(&shm_realloc_cv);
937 mutex_exit(&shm_lock);
938
939 /* Release now unused resources. */
940 oldshm_cv = (void *)((uintptr_t)oldshmsegs +
941 ALIGN(oldshmni * sizeof(struct shmid_ds)));
942 for (i = 0; i < oldshmni; i++)
943 cv_destroy(&oldshm_cv[i]);
944
945 sz = ALIGN(oldshmni * sizeof(struct shmid_ds)) +
946 ALIGN(oldshmni * sizeof(kcondvar_t));
947 sz = round_page(sz);
948 uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED);
949
950 return 0;
951 }
952
953 void
954 shminit(void)
955 {
956 vaddr_t v;
957 size_t sz;
958 int i;
959
960 mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE);
961 cv_init(&shm_realloc_cv, "shmrealc");
962
963 /* Allocate the wired memory for our structures */
964 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
965 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
966 sz = round_page(sz);
967 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
968 if (v == 0)
969 panic("sysv_shm: cannot allocate memory");
970 shmsegs = (void *)v;
971 shm_cv = (void *)((uintptr_t)shmsegs +
972 ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)));
973
974 if (shminfo.shmmax == 0)
975 shminfo.shmmax = max(physmem / 4, 1024) * PAGE_SIZE;
976 else
977 shminfo.shmmax *= PAGE_SIZE;
978 shminfo.shmall = shminfo.shmmax / PAGE_SIZE;
979
980 for (i = 0; i < shminfo.shmmni; i++) {
981 cv_init(&shm_cv[i], "shmwait");
982 shmsegs[i].shm_perm.mode = SHMSEG_FREE;
983 shmsegs[i].shm_perm._seq = 0;
984 }
985 shm_last_free = 0;
986 shm_nused = 0;
987 shm_committed = 0;
988 shm_realloc_disable = 0;
989 shm_realloc_state = false;
990
991 kern_has_sysvshm = 1;
992
993 sysvipcinit();
994 }
995
996 int
997 shmfini(void)
998 {
999 size_t sz;
1000 int i;
1001 vaddr_t v = (vaddr_t)shmsegs;
1002
1003 mutex_enter(&shm_lock);
1004 if (shm_nused) {
1005 mutex_exit(&shm_lock);
1006 return 1;
1007 }
1008
1009 /* Destroy all condvars */
1010 for (i = 0; i < shminfo.shmmni; i++)
1011 cv_destroy(&shm_cv[i]);
1012 cv_destroy(&shm_realloc_cv);
1013
1014 /* Free the allocated/wired memory */
1015 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
1016 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
1017 sz = round_page(sz);
1018 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
1019
1020 /* Release and destroy our mutex */
1021 mutex_exit(&shm_lock);
1022 mutex_destroy(&shm_lock);
1023
1024 kern_has_sysvshm = 0;
1025
1026 return 0;
1027 }
1028
1029 static int
1030 sysctl_ipc_shmmni(SYSCTLFN_ARGS)
1031 {
1032 int newsize, error;
1033 struct sysctlnode node;
1034 node = *rnode;
1035 node.sysctl_data = &newsize;
1036
1037 newsize = shminfo.shmmni;
1038 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1039 if (error || newp == NULL)
1040 return error;
1041
1042 sysctl_unlock();
1043 error = shmrealloc(newsize);
1044 sysctl_relock();
1045 return error;
1046 }
1047
1048 static int
1049 sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
1050 {
1051 uint32_t newsize;
1052 int error;
1053 struct sysctlnode node;
1054 node = *rnode;
1055 node.sysctl_data = &newsize;
1056
1057 newsize = shminfo.shmall;
1058 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1059 if (error || newp == NULL)
1060 return error;
1061
1062 if (newsize < 1)
1063 return EINVAL;
1064
1065 shminfo.shmall = newsize;
1066 shminfo.shmmax = (uint64_t)shminfo.shmall * PAGE_SIZE;
1067
1068 return 0;
1069 }
1070
1071 static int
1072 sysctl_ipc_shmmax(SYSCTLFN_ARGS)
1073 {
1074 uint64_t newsize;
1075 int error;
1076 struct sysctlnode node;
1077 node = *rnode;
1078 node.sysctl_data = &newsize;
1079
1080 newsize = shminfo.shmmax;
1081 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1082 if (error || newp == NULL)
1083 return error;
1084
1085 if (newsize < PAGE_SIZE)
1086 return EINVAL;
1087
1088 shminfo.shmmax = round_page(newsize);
1089 shminfo.shmall = shminfo.shmmax >> PAGE_SHIFT;
1090
1091 return 0;
1092 }
1093
1094 SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup")
1095 {
1096
1097 sysctl_createv(clog, 0, NULL, NULL,
1098 CTLFLAG_PERMANENT,
1099 CTLTYPE_NODE, "ipc",
1100 SYSCTL_DESCR("SysV IPC options"),
1101 NULL, 0, NULL, 0,
1102 CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1103 sysctl_createv(clog, 0, NULL, NULL,
1104 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1105 CTLTYPE_QUAD, "shmmax",
1106 SYSCTL_DESCR("Max shared memory segment size in bytes"),
1107 sysctl_ipc_shmmax, 0, &shminfo.shmmax, 0,
1108 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL);
1109 sysctl_createv(clog, 0, NULL, NULL,
1110 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1111 CTLTYPE_INT, "shmmni",
1112 SYSCTL_DESCR("Max number of shared memory identifiers"),
1113 sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0,
1114 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL);
1115 sysctl_createv(clog, 0, NULL, NULL,
1116 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1117 CTLTYPE_INT, "shmseg",
1118 SYSCTL_DESCR("Max shared memory segments per process"),
1119 NULL, 0, &shminfo.shmseg, 0,
1120 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL);
1121 sysctl_createv(clog, 0, NULL, NULL,
1122 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1123 CTLTYPE_INT, "shmmaxpgs",
1124 SYSCTL_DESCR("Max amount of shared memory in pages"),
1125 sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0,
1126 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL);
1127 sysctl_createv(clog, 0, NULL, NULL,
1128 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1129 CTLTYPE_INT, "shm_use_phys",
1130 SYSCTL_DESCR("Enable/disable locking of shared memory in "
1131 "physical memory"), NULL, 0, &shm_use_phys, 0,
1132 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL);
1133 }
1134