sysv_shm.c revision 1.139 1 /* $NetBSD: sysv_shm.c,v 1.139 2019/10/01 16:36:58 chs Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by Adam Glass and Charles M.
47 * Hannum.
48 * 4. The names of the authors may not be used to endorse or promote products
49 * derived from this software without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
52 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
53 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
54 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
55 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
57 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
58 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
59 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
60 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.139 2019/10/01 16:36:58 chs Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_sysv.h"
68 #endif
69
70 #include <sys/param.h>
71 #include <sys/kernel.h>
72 #include <sys/kmem.h>
73 #include <sys/shm.h>
74 #include <sys/mutex.h>
75 #include <sys/mman.h>
76 #include <sys/stat.h>
77 #include <sys/sysctl.h>
78 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
79 #include <sys/syscallargs.h>
80 #include <sys/queue.h>
81 #include <sys/kauth.h>
82
83 #include <uvm/uvm_extern.h>
84 #include <uvm/uvm_object.h>
85
86 struct shmmap_entry {
87 SLIST_ENTRY(shmmap_entry) next;
88 vaddr_t va;
89 int shmid;
90 bool busy;
91 };
92
93 int shm_nused __cacheline_aligned;
94 struct shmid_ds * shmsegs __read_mostly;
95
96 static kmutex_t shm_lock __cacheline_aligned;
97 static kcondvar_t * shm_cv __cacheline_aligned;
98 static int shm_last_free __cacheline_aligned;
99 static size_t shm_committed __cacheline_aligned;
100 static int shm_use_phys __read_mostly;
101
102 static kcondvar_t shm_realloc_cv;
103 static bool shm_realloc_state;
104 static u_int shm_realloc_disable;
105
106 struct shmmap_state {
107 unsigned int nitems;
108 unsigned int nrefs;
109 SLIST_HEAD(, shmmap_entry) entries;
110 };
111
112 extern int kern_has_sysvshm;
113
114 SYSCTL_SETUP_PROTO(sysctl_ipc_shm_setup);
115
116 #ifdef SHMDEBUG
117 #define SHMPRINTF(a) printf a
118 #else
119 #define SHMPRINTF(a)
120 #endif
121
122 static int shmrealloc(int);
123
124 /*
125 * Find the shared memory segment permission by the index. Only used by
126 * compat_linux to implement SHM_STAT.
127 */
128 int
129 shm_find_segment_perm_by_index(int index, struct ipc_perm *perm)
130 {
131 struct shmid_ds *shmseg;
132
133 mutex_enter(&shm_lock);
134 if (index < 0 || index >= shminfo.shmmni) {
135 mutex_exit(&shm_lock);
136 return EINVAL;
137 }
138 shmseg = &shmsegs[index];
139 memcpy(perm, &shmseg->shm_perm, sizeof(*perm));
140 mutex_exit(&shm_lock);
141 return 0;
142 }
143
144 /*
145 * Find the shared memory segment by the identifier.
146 * => must be called with shm_lock held;
147 */
148 static struct shmid_ds *
149 shm_find_segment_by_shmid(int shmid)
150 {
151 int segnum;
152 struct shmid_ds *shmseg;
153
154 KASSERT(mutex_owned(&shm_lock));
155
156 segnum = IPCID_TO_IX(shmid);
157 if (segnum < 0 || segnum >= shminfo.shmmni)
158 return NULL;
159 shmseg = &shmsegs[segnum];
160 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0)
161 return NULL;
162 if ((shmseg->shm_perm.mode &
163 (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED)
164 return NULL;
165 if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid))
166 return NULL;
167
168 return shmseg;
169 }
170
171 /*
172 * Free memory segment.
173 * => must be called with shm_lock held;
174 */
175 static void
176 shm_free_segment(int segnum)
177 {
178 struct shmid_ds *shmseg;
179 size_t size;
180 bool wanted;
181
182 KASSERT(mutex_owned(&shm_lock));
183
184 shmseg = &shmsegs[segnum];
185 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n",
186 shmseg->shm_perm._key, shmseg->shm_perm._seq));
187
188 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
189 wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED);
190
191 shmseg->_shm_internal = NULL;
192 shm_committed -= btoc(size);
193 shm_nused--;
194 shmseg->shm_perm.mode = SHMSEG_FREE;
195 shm_last_free = segnum;
196 if (wanted == true)
197 cv_broadcast(&shm_cv[segnum]);
198 }
199
200 /*
201 * Delete entry from the shm map.
202 * => must be called with shm_lock held;
203 */
204 static struct uvm_object *
205 shm_delete_mapping(struct shmmap_state *shmmap_s,
206 struct shmmap_entry *shmmap_se)
207 {
208 struct uvm_object *uobj = NULL;
209 struct shmid_ds *shmseg;
210 int segnum;
211
212 KASSERT(mutex_owned(&shm_lock));
213
214 segnum = IPCID_TO_IX(shmmap_se->shmid);
215 shmseg = &shmsegs[segnum];
216 SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next);
217 shmmap_s->nitems--;
218 shmseg->shm_dtime = time_second;
219 if ((--shmseg->shm_nattch <= 0) &&
220 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
221 uobj = shmseg->_shm_internal;
222 shm_free_segment(segnum);
223 }
224
225 return uobj;
226 }
227
228 /*
229 * Get a non-shared shm map for that vmspace. Note, that memory
230 * allocation might be performed with lock held.
231 */
232 static struct shmmap_state *
233 shmmap_getprivate(struct proc *p)
234 {
235 struct shmmap_state *oshmmap_s, *shmmap_s;
236 struct shmmap_entry *oshmmap_se, *shmmap_se;
237
238 KASSERT(mutex_owned(&shm_lock));
239
240 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */
241 oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
242 if (oshmmap_s && oshmmap_s->nrefs == 1)
243 return oshmmap_s;
244
245 /* 2. No shm map preset - create a fresh one */
246 shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP);
247 shmmap_s->nrefs = 1;
248 SLIST_INIT(&shmmap_s->entries);
249 p->p_vmspace->vm_shm = (void *)shmmap_s;
250
251 if (oshmmap_s == NULL)
252 return shmmap_s;
253
254 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n",
255 p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs));
256
257 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */
258 SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) {
259 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
260 shmmap_se->va = oshmmap_se->va;
261 shmmap_se->shmid = oshmmap_se->shmid;
262 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
263 }
264 shmmap_s->nitems = oshmmap_s->nitems;
265 oshmmap_s->nrefs--;
266
267 return shmmap_s;
268 }
269
270 /*
271 * Lock/unlock the memory.
272 * => must be called with shm_lock held;
273 */
274 static int
275 shm_memlock(struct shmid_ds *shmseg, int shmid, int cmd)
276 {
277 size_t size;
278 int error;
279
280 KASSERT(mutex_owned(&shm_lock));
281
282 size = round_page(shmseg->shm_segsz);
283
284 if (cmd == SHM_LOCK && (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) {
285 /* Wire the object and map, then tag it */
286 error = uvm_obj_wirepages(shmseg->_shm_internal,
287 0, size, NULL);
288 if (error)
289 return EIO;
290 shmseg->shm_perm.mode |= SHMSEG_WIRED;
291
292 } else if (cmd == SHM_UNLOCK &&
293 (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) {
294 /* Unwire the object, then untag it */
295 uvm_obj_unwirepages(shmseg->_shm_internal, 0, size);
296 shmseg->shm_perm.mode &= ~SHMSEG_WIRED;
297 }
298
299 return 0;
300 }
301
302 /*
303 * Unmap shared memory.
304 */
305 int
306 sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval)
307 {
308 /* {
309 syscallarg(const void *) shmaddr;
310 } */
311 struct proc *p = l->l_proc;
312 struct shmmap_state *shmmap_s1, *shmmap_s;
313 struct shmmap_entry *shmmap_se;
314 struct uvm_object *uobj;
315 struct shmid_ds *shmseg;
316 size_t size;
317 int segnum;
318
319 mutex_enter(&shm_lock);
320 restart:
321 /* In case of reallocation, we will wait for completion */
322 while (__predict_false(shm_realloc_state))
323 cv_wait(&shm_realloc_cv, &shm_lock);
324
325 shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm;
326 if (shmmap_s1 == NULL) {
327 mutex_exit(&shm_lock);
328 return EINVAL;
329 }
330
331 /* Find the map entry */
332 SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next)
333 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
334 break;
335 if (shmmap_se == NULL) {
336 mutex_exit(&shm_lock);
337 return EINVAL;
338 }
339
340 shmmap_s = shmmap_getprivate(p);
341 if (shmmap_s != shmmap_s1) {
342 /* Map has been copied, lookup entry in new map */
343 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
344 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
345 break;
346 if (shmmap_se == NULL) {
347 mutex_exit(&shm_lock);
348 return EINVAL;
349 }
350 }
351
352 segnum = IPCID_TO_IX(shmmap_se->shmid);
353 if (shmmap_se->busy) {
354 cv_wait(&shm_cv[segnum], &shm_lock);
355 goto restart;
356 }
357
358 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n",
359 p->p_vmspace, shmmap_se->shmid, shmmap_se->va));
360
361 /* Delete the entry from shm map */
362 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
363 shmseg = &shmsegs[segnum];
364 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
365 mutex_exit(&shm_lock);
366
367 uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size);
368 if (uobj != NULL) {
369 uao_detach(uobj);
370 }
371 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
372
373 return 0;
374 }
375
376 /*
377 * Map shared memory.
378 */
379 int
380 sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval)
381 {
382 /* {
383 syscallarg(int) shmid;
384 syscallarg(const void *) shmaddr;
385 syscallarg(int) shmflg;
386 } */
387 int error, flags = 0;
388 struct proc *p = l->l_proc;
389 kauth_cred_t cred = l->l_cred;
390 struct shmid_ds *shmseg;
391 struct shmmap_state *shmmap_s;
392 struct shmmap_entry *shmmap_se;
393 struct uvm_object *uobj;
394 struct vmspace *vm;
395 vaddr_t attach_va;
396 vm_prot_t prot;
397 vsize_t size;
398 int segnum;
399
400 /* Allocate a new map entry and set it */
401 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
402 shmmap_se->shmid = SCARG(uap, shmid);
403 shmmap_se->busy = true;
404 segnum = IPCID_TO_IX(shmmap_se->shmid);
405
406 mutex_enter(&shm_lock);
407 /* In case of reallocation, we will wait for completion */
408 while (__predict_false(shm_realloc_state))
409 cv_wait(&shm_realloc_cv, &shm_lock);
410
411 shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid));
412 if (shmseg == NULL) {
413 error = EINVAL;
414 goto err;
415 }
416 error = ipcperm(cred, &shmseg->shm_perm,
417 (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
418 if (error)
419 goto err;
420
421 vm = p->p_vmspace;
422 shmmap_s = (struct shmmap_state *)vm->vm_shm;
423 if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) {
424 error = EMFILE;
425 goto err;
426 }
427
428 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
429 prot = VM_PROT_READ;
430 if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0)
431 prot |= VM_PROT_WRITE;
432 if (SCARG(uap, shmaddr)) {
433 flags |= UVM_FLAG_FIXED;
434 if (SCARG(uap, shmflg) & SHM_RND)
435 attach_va =
436 (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1);
437 else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0)
438 attach_va = (vaddr_t)SCARG(uap, shmaddr);
439 else {
440 error = EINVAL;
441 goto err;
442 }
443 } else {
444 /* This is just a hint to uvm_map() about where to put it. */
445 attach_va = p->p_emul->e_vm_default_addr(p,
446 (vaddr_t)vm->vm_daddr, size,
447 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
448 }
449
450 /*
451 * Create a map entry, add it to the list and increase the counters.
452 * The lock will be dropped before the mapping, disable reallocation.
453 */
454 shmmap_s = shmmap_getprivate(p);
455 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
456 shmmap_s->nitems++;
457 shmseg->shm_lpid = p->p_pid;
458 shmseg->shm_nattch++;
459 shm_realloc_disable++;
460
461 /*
462 * Add a reference to the uvm object while we hold the
463 * shm_lock.
464 */
465 uobj = shmseg->_shm_internal;
466 uao_reference(uobj);
467 mutex_exit(&shm_lock);
468
469 /*
470 * Drop the shm_lock to map it into the address space, and lock
471 * the memory, if needed (XXX where does this lock memory?).
472 */
473 error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0,
474 UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags));
475 if (error)
476 goto err_detach;
477
478 /* Set the new address, and update the time */
479 mutex_enter(&shm_lock);
480 shmmap_se->va = attach_va;
481 shmmap_se->busy = false;
482 shmseg->shm_atime = time_second;
483 shm_realloc_disable--;
484 retval[0] = attach_va;
485 SHMPRINTF(("shmat: vm %p: add %d @%lx\n",
486 p->p_vmspace, shmmap_se->shmid, attach_va));
487 cv_broadcast(&shm_cv[segnum]);
488 err:
489 cv_broadcast(&shm_realloc_cv);
490 mutex_exit(&shm_lock);
491 if (error && shmmap_se) {
492 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
493 }
494 return error;
495
496 err_detach:
497 uao_detach(uobj);
498 mutex_enter(&shm_lock);
499 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
500 shm_realloc_disable--;
501 cv_broadcast(&shm_cv[segnum]);
502 cv_broadcast(&shm_realloc_cv);
503 mutex_exit(&shm_lock);
504 if (uobj != NULL) {
505 uao_detach(uobj);
506 }
507 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
508 return error;
509 }
510
511 /*
512 * Shared memory control operations.
513 */
514 int
515 sys___shmctl50(struct lwp *l, const struct sys___shmctl50_args *uap,
516 register_t *retval)
517 {
518 /* {
519 syscallarg(int) shmid;
520 syscallarg(int) cmd;
521 syscallarg(struct shmid_ds *) buf;
522 } */
523 struct shmid_ds shmbuf;
524 int cmd, error;
525
526 cmd = SCARG(uap, cmd);
527 if (cmd == IPC_SET) {
528 error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf));
529 if (error)
530 return error;
531 }
532
533 error = shmctl1(l, SCARG(uap, shmid), cmd,
534 (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL);
535
536 if (error == 0 && cmd == IPC_STAT)
537 error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf));
538
539 return error;
540 }
541
542 int
543 shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
544 {
545 struct uvm_object *uobj = NULL;
546 kauth_cred_t cred = l->l_cred;
547 struct shmid_ds *shmseg;
548 int error = 0;
549
550 mutex_enter(&shm_lock);
551 /* In case of reallocation, we will wait for completion */
552 while (__predict_false(shm_realloc_state))
553 cv_wait(&shm_realloc_cv, &shm_lock);
554
555 shmseg = shm_find_segment_by_shmid(shmid);
556 if (shmseg == NULL) {
557 mutex_exit(&shm_lock);
558 return EINVAL;
559 }
560
561 switch (cmd) {
562 case IPC_STAT:
563 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0)
564 break;
565 memset(shmbuf, 0, sizeof *shmbuf);
566 shmbuf->shm_perm = shmseg->shm_perm;
567 shmbuf->shm_perm.mode &= 0777;
568 shmbuf->shm_segsz = shmseg->shm_segsz;
569 shmbuf->shm_lpid = shmseg->shm_lpid;
570 shmbuf->shm_cpid = shmseg->shm_cpid;
571 shmbuf->shm_nattch = shmseg->shm_nattch;
572 shmbuf->shm_atime = shmseg->shm_atime;
573 shmbuf->shm_dtime = shmseg->shm_dtime;
574 shmbuf->shm_ctime = shmseg->shm_ctime;
575 break;
576 case IPC_SET:
577 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
578 break;
579 shmseg->shm_perm.uid = shmbuf->shm_perm.uid;
580 shmseg->shm_perm.gid = shmbuf->shm_perm.gid;
581 shmseg->shm_perm.mode =
582 (shmseg->shm_perm.mode & ~ACCESSPERMS) |
583 (shmbuf->shm_perm.mode & ACCESSPERMS);
584 shmseg->shm_ctime = time_second;
585 break;
586 case IPC_RMID:
587 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
588 break;
589 shmseg->shm_perm._key = IPC_PRIVATE;
590 shmseg->shm_perm.mode |= SHMSEG_REMOVED;
591 if (shmseg->shm_nattch <= 0) {
592 uobj = shmseg->_shm_internal;
593 shm_free_segment(IPCID_TO_IX(shmid));
594 }
595 break;
596 case SHM_LOCK:
597 case SHM_UNLOCK:
598 if ((error = kauth_authorize_system(cred,
599 KAUTH_SYSTEM_SYSVIPC,
600 (cmd == SHM_LOCK) ? KAUTH_REQ_SYSTEM_SYSVIPC_SHM_LOCK :
601 KAUTH_REQ_SYSTEM_SYSVIPC_SHM_UNLOCK, NULL, NULL, NULL)) != 0)
602 break;
603 error = shm_memlock(shmseg, shmid, cmd);
604 break;
605 default:
606 error = EINVAL;
607 }
608
609 mutex_exit(&shm_lock);
610 if (uobj != NULL)
611 uao_detach(uobj);
612 return error;
613 }
614
615 /*
616 * Try to take an already existing segment.
617 * => must be called with shm_lock held;
618 * => called from one place, thus, inline;
619 */
620 static inline int
621 shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode,
622 register_t *retval)
623 {
624 struct shmid_ds *shmseg;
625 kauth_cred_t cred = l->l_cred;
626 int segnum, error;
627 again:
628 KASSERT(mutex_owned(&shm_lock));
629
630 /* Find segment by key */
631 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
632 if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) &&
633 shmsegs[segnum].shm_perm._key == SCARG(uap, key))
634 break;
635 if (segnum == shminfo.shmmni) {
636 /* Not found */
637 return -1;
638 }
639
640 shmseg = &shmsegs[segnum];
641 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
642 /*
643 * This segment is in the process of being allocated. Wait
644 * until it's done, and look the key up again (in case the
645 * allocation failed or it was freed).
646 */
647 shmseg->shm_perm.mode |= SHMSEG_WANTED;
648 error = cv_wait_sig(&shm_cv[segnum], &shm_lock);
649 if (error)
650 return error;
651 goto again;
652 }
653
654 /*
655 * First check the flags, to generate a useful error when a
656 * segment already exists.
657 */
658 if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) ==
659 (IPC_CREAT | IPC_EXCL))
660 return EEXIST;
661
662 /* Check the permission and segment size. */
663 error = ipcperm(cred, &shmseg->shm_perm, mode);
664 if (error)
665 return error;
666 if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz)
667 return EINVAL;
668
669 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
670 return 0;
671 }
672
673 int
674 sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval)
675 {
676 /* {
677 syscallarg(key_t) key;
678 syscallarg(size_t) size;
679 syscallarg(int) shmflg;
680 } */
681 struct shmid_ds *shmseg;
682 kauth_cred_t cred = l->l_cred;
683 key_t key = SCARG(uap, key);
684 size_t size;
685 int error, mode, segnum;
686 bool lockmem;
687
688 mode = SCARG(uap, shmflg) & ACCESSPERMS;
689 if (SCARG(uap, shmflg) & _SHM_RMLINGER)
690 mode |= SHMSEG_RMLINGER;
691
692 SHMPRINTF(("shmget: key 0x%lx size 0x%zx shmflg 0x%x mode 0x%x\n",
693 SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode));
694
695 mutex_enter(&shm_lock);
696 /* In case of reallocation, we will wait for completion */
697 while (__predict_false(shm_realloc_state))
698 cv_wait(&shm_realloc_cv, &shm_lock);
699
700 if (key != IPC_PRIVATE) {
701 error = shmget_existing(l, uap, mode, retval);
702 if (error != -1) {
703 mutex_exit(&shm_lock);
704 return error;
705 }
706 if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) {
707 mutex_exit(&shm_lock);
708 return ENOENT;
709 }
710 }
711 error = 0;
712
713 /*
714 * Check the for the limits.
715 */
716 size = SCARG(uap, size);
717 if (size < shminfo.shmmin || size > shminfo.shmmax) {
718 mutex_exit(&shm_lock);
719 return EINVAL;
720 }
721 if (shm_nused >= shminfo.shmmni) {
722 mutex_exit(&shm_lock);
723 return ENOSPC;
724 }
725 size = round_page(size);
726 if (shm_committed + btoc(size) > shminfo.shmall) {
727 mutex_exit(&shm_lock);
728 return ENOMEM;
729 }
730
731 /* Find the first available segment */
732 if (shm_last_free < 0) {
733 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
734 if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE)
735 break;
736 KASSERT(segnum < shminfo.shmmni);
737 } else {
738 segnum = shm_last_free;
739 shm_last_free = -1;
740 }
741
742 /*
743 * Initialize the segment.
744 * We will drop the lock while allocating the memory, thus mark the
745 * segment present, but removed, that no other thread could take it.
746 * Also, disable reallocation, while lock is dropped.
747 */
748 shmseg = &shmsegs[segnum];
749 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
750 shm_committed += btoc(size);
751 shm_nused++;
752 lockmem = shm_use_phys;
753 shm_realloc_disable++;
754 mutex_exit(&shm_lock);
755
756 /* Allocate the memory object and lock it if needed */
757 shmseg->_shm_internal = uao_create(size, 0);
758 if (lockmem) {
759 /* Wire the pages and tag it */
760 error = uvm_obj_wirepages(shmseg->_shm_internal, 0, size, NULL);
761 if (error) {
762 uao_detach(shmseg->_shm_internal);
763 mutex_enter(&shm_lock);
764 shm_free_segment(segnum);
765 shm_realloc_disable--;
766 mutex_exit(&shm_lock);
767 return error;
768 }
769 }
770
771 /*
772 * Please note, while segment is marked, there are no need to hold the
773 * lock, while setting it (except shm_perm.mode).
774 */
775 shmseg->shm_perm._key = SCARG(uap, key);
776 shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff;
777 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
778
779 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred);
780 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred);
781 shmseg->shm_segsz = SCARG(uap, size);
782 shmseg->shm_cpid = l->l_proc->p_pid;
783 shmseg->shm_lpid = shmseg->shm_nattch = 0;
784 shmseg->shm_atime = shmseg->shm_dtime = 0;
785 shmseg->shm_ctime = time_second;
786
787 /*
788 * Segment is initialized.
789 * Enter the lock, mark as allocated, and notify waiters (if any).
790 * Also, unmark the state of reallocation.
791 */
792 mutex_enter(&shm_lock);
793 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
794 (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) |
795 SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0);
796 if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
797 shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
798 cv_broadcast(&shm_cv[segnum]);
799 }
800 shm_realloc_disable--;
801 cv_broadcast(&shm_realloc_cv);
802 mutex_exit(&shm_lock);
803
804 return error;
805 }
806
807 void
808 shmfork(struct vmspace *vm1, struct vmspace *vm2)
809 {
810 struct shmmap_state *shmmap_s;
811 struct shmmap_entry *shmmap_se;
812
813 SHMPRINTF(("shmfork %p->%p\n", vm1, vm2));
814 mutex_enter(&shm_lock);
815 vm2->vm_shm = vm1->vm_shm;
816 if (vm1->vm_shm) {
817 shmmap_s = (struct shmmap_state *)vm1->vm_shm;
818 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
819 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++;
820 shmmap_s->nrefs++;
821 }
822 mutex_exit(&shm_lock);
823 }
824
825 void
826 shmexit(struct vmspace *vm)
827 {
828 struct shmmap_state *shmmap_s;
829 struct shmmap_entry *shmmap_se;
830
831 mutex_enter(&shm_lock);
832 shmmap_s = (struct shmmap_state *)vm->vm_shm;
833 if (shmmap_s == NULL) {
834 mutex_exit(&shm_lock);
835 return;
836 }
837 vm->vm_shm = NULL;
838
839 if (--shmmap_s->nrefs > 0) {
840 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n",
841 vm, shmmap_s->nitems, shmmap_s->nrefs));
842 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
843 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--;
844 }
845 mutex_exit(&shm_lock);
846 return;
847 }
848
849 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems));
850 if (shmmap_s->nitems == 0) {
851 mutex_exit(&shm_lock);
852 kmem_free(shmmap_s, sizeof(struct shmmap_state));
853 return;
854 }
855
856 /*
857 * Delete the entry from shm map.
858 */
859 for (;;) {
860 struct shmid_ds *shmseg;
861 struct uvm_object *uobj;
862 size_t sz;
863
864 shmmap_se = SLIST_FIRST(&shmmap_s->entries);
865 KASSERT(shmmap_se != NULL);
866
867 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
868 sz = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
869 /* shm_delete_mapping() removes from the list. */
870 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
871 mutex_exit(&shm_lock);
872
873 uvm_deallocate(&vm->vm_map, shmmap_se->va, sz);
874 if (uobj != NULL) {
875 uao_detach(uobj);
876 }
877 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
878
879 if (SLIST_EMPTY(&shmmap_s->entries)) {
880 break;
881 }
882 mutex_enter(&shm_lock);
883 KASSERT(!SLIST_EMPTY(&shmmap_s->entries));
884 }
885 kmem_free(shmmap_s, sizeof(struct shmmap_state));
886 }
887
888 static int
889 shmrealloc(int newshmni)
890 {
891 vaddr_t v;
892 struct shmid_ds *oldshmsegs, *newshmsegs;
893 kcondvar_t *newshm_cv, *oldshm_cv;
894 size_t sz;
895 int i, lsegid, oldshmni;
896
897 if (newshmni < 1)
898 return EINVAL;
899
900 /* Allocate new memory area */
901 sz = ALIGN(newshmni * sizeof(struct shmid_ds)) +
902 ALIGN(newshmni * sizeof(kcondvar_t));
903 sz = round_page(sz);
904 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
905 if (v == 0)
906 return ENOMEM;
907
908 mutex_enter(&shm_lock);
909 while (shm_realloc_state || shm_realloc_disable)
910 cv_wait(&shm_realloc_cv, &shm_lock);
911
912 /*
913 * Get the number of last segment. Fail we are trying to
914 * reallocate less memory than we use.
915 */
916 lsegid = 0;
917 for (i = 0; i < shminfo.shmmni; i++)
918 if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0)
919 lsegid = i;
920 if (lsegid >= newshmni) {
921 mutex_exit(&shm_lock);
922 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
923 return EBUSY;
924 }
925 shm_realloc_state = true;
926
927 newshmsegs = (void *)v;
928 newshm_cv = (void *)((uintptr_t)newshmsegs +
929 ALIGN(newshmni * sizeof(struct shmid_ds)));
930
931 /* Copy all memory to the new area */
932 for (i = 0; i < shm_nused; i++) {
933 cv_init(&newshm_cv[i], "shmwait");
934 (void)memcpy(&newshmsegs[i], &shmsegs[i],
935 sizeof(newshmsegs[0]));
936 }
937
938 /* Mark as free all new segments, if there is any */
939 for (; i < newshmni; i++) {
940 cv_init(&newshm_cv[i], "shmwait");
941 newshmsegs[i].shm_perm.mode = SHMSEG_FREE;
942 newshmsegs[i].shm_perm._seq = 0;
943 }
944
945 oldshmsegs = shmsegs;
946 oldshmni = shminfo.shmmni;
947 shminfo.shmmni = newshmni;
948 shmsegs = newshmsegs;
949 shm_cv = newshm_cv;
950
951 /* Reallocation completed - notify all waiters, if any */
952 shm_realloc_state = false;
953 cv_broadcast(&shm_realloc_cv);
954 mutex_exit(&shm_lock);
955
956 /* Release now unused resources. */
957 oldshm_cv = (void *)((uintptr_t)oldshmsegs +
958 ALIGN(oldshmni * sizeof(struct shmid_ds)));
959 for (i = 0; i < oldshmni; i++)
960 cv_destroy(&oldshm_cv[i]);
961
962 sz = ALIGN(oldshmni * sizeof(struct shmid_ds)) +
963 ALIGN(oldshmni * sizeof(kcondvar_t));
964 sz = round_page(sz);
965 uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED);
966
967 return 0;
968 }
969
970 int
971 shminit(void)
972 {
973 vaddr_t v;
974 size_t sz;
975 int i;
976
977 mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE);
978 cv_init(&shm_realloc_cv, "shmrealc");
979
980 /* Allocate the wired memory for our structures */
981 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
982 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
983 sz = round_page(sz);
984 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
985 if (v == 0) {
986 printf("sysv_shm: cannot allocate memory");
987 return ENOMEM;
988 }
989 shmsegs = (void *)v;
990 shm_cv = (void *)((uintptr_t)shmsegs +
991 ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)));
992
993 if (shminfo.shmmax == 0)
994 shminfo.shmmax = uimax(physmem / 4, 1024) * PAGE_SIZE;
995 else
996 shminfo.shmmax *= PAGE_SIZE;
997 shminfo.shmall = shminfo.shmmax / PAGE_SIZE;
998
999 for (i = 0; i < shminfo.shmmni; i++) {
1000 cv_init(&shm_cv[i], "shmwait");
1001 shmsegs[i].shm_perm.mode = SHMSEG_FREE;
1002 shmsegs[i].shm_perm._seq = 0;
1003 }
1004 shm_last_free = 0;
1005 shm_nused = 0;
1006 shm_committed = 0;
1007 shm_realloc_disable = 0;
1008 shm_realloc_state = false;
1009
1010 kern_has_sysvshm = 1;
1011
1012 /* Load the callback function pointers for the uvm subsystem */
1013 uvm_shmexit = shmexit;
1014 uvm_shmfork = shmfork;
1015
1016 return 0;
1017 }
1018
1019 int
1020 shmfini(void)
1021 {
1022 size_t sz;
1023 int i;
1024 vaddr_t v = (vaddr_t)shmsegs;
1025
1026 mutex_enter(&shm_lock);
1027 if (shm_nused) {
1028 mutex_exit(&shm_lock);
1029 return 1;
1030 }
1031
1032 /* Clear the callback function pointers for the uvm subsystem */
1033 uvm_shmexit = NULL;
1034 uvm_shmfork = NULL;
1035
1036 /* Destroy all condvars */
1037 for (i = 0; i < shminfo.shmmni; i++)
1038 cv_destroy(&shm_cv[i]);
1039 cv_destroy(&shm_realloc_cv);
1040
1041 /* Free the allocated/wired memory */
1042 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
1043 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
1044 sz = round_page(sz);
1045 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
1046
1047 /* Release and destroy our mutex */
1048 mutex_exit(&shm_lock);
1049 mutex_destroy(&shm_lock);
1050
1051 kern_has_sysvshm = 0;
1052
1053 return 0;
1054 }
1055
1056 static int
1057 sysctl_ipc_shmmni(SYSCTLFN_ARGS)
1058 {
1059 int newsize, error;
1060 struct sysctlnode node;
1061 node = *rnode;
1062 node.sysctl_data = &newsize;
1063
1064 newsize = shminfo.shmmni;
1065 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1066 if (error || newp == NULL)
1067 return error;
1068
1069 sysctl_unlock();
1070 error = shmrealloc(newsize);
1071 sysctl_relock();
1072 return error;
1073 }
1074
1075 static int
1076 sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
1077 {
1078 uint32_t newsize;
1079 int error;
1080 struct sysctlnode node;
1081 node = *rnode;
1082 node.sysctl_data = &newsize;
1083
1084 newsize = shminfo.shmall;
1085 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1086 if (error || newp == NULL)
1087 return error;
1088
1089 if (newsize < 1)
1090 return EINVAL;
1091
1092 shminfo.shmall = newsize;
1093 shminfo.shmmax = (uint64_t)shminfo.shmall * PAGE_SIZE;
1094
1095 return 0;
1096 }
1097
1098 static int
1099 sysctl_ipc_shmmax(SYSCTLFN_ARGS)
1100 {
1101 uint64_t newsize;
1102 int error;
1103 struct sysctlnode node;
1104 node = *rnode;
1105 node.sysctl_data = &newsize;
1106
1107 newsize = shminfo.shmmax;
1108 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1109 if (error || newp == NULL)
1110 return error;
1111
1112 if (newsize < PAGE_SIZE)
1113 return EINVAL;
1114
1115 shminfo.shmmax = round_page(newsize);
1116 shminfo.shmall = shminfo.shmmax >> PAGE_SHIFT;
1117
1118 return 0;
1119 }
1120
1121 SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup")
1122 {
1123
1124 sysctl_createv(clog, 0, NULL, NULL,
1125 CTLFLAG_PERMANENT,
1126 CTLTYPE_NODE, "ipc",
1127 SYSCTL_DESCR("SysV IPC options"),
1128 NULL, 0, NULL, 0,
1129 CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1130 sysctl_createv(clog, 0, NULL, NULL,
1131 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1132 CTLTYPE_QUAD, "shmmax",
1133 SYSCTL_DESCR("Max shared memory segment size in bytes"),
1134 sysctl_ipc_shmmax, 0, &shminfo.shmmax, 0,
1135 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL);
1136 sysctl_createv(clog, 0, NULL, NULL,
1137 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1138 CTLTYPE_INT, "shmmni",
1139 SYSCTL_DESCR("Max number of shared memory identifiers"),
1140 sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0,
1141 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL);
1142 sysctl_createv(clog, 0, NULL, NULL,
1143 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1144 CTLTYPE_INT, "shmseg",
1145 SYSCTL_DESCR("Max shared memory segments per process"),
1146 NULL, 0, &shminfo.shmseg, 0,
1147 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL);
1148 sysctl_createv(clog, 0, NULL, NULL,
1149 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1150 CTLTYPE_INT, "shmmaxpgs",
1151 SYSCTL_DESCR("Max amount of shared memory in pages"),
1152 sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0,
1153 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL);
1154 sysctl_createv(clog, 0, NULL, NULL,
1155 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1156 CTLTYPE_INT, "shm_use_phys",
1157 SYSCTL_DESCR("Enable/disable locking of shared memory in "
1158 "physical memory"), NULL, 0, &shm_use_phys, 0,
1159 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL);
1160 }
1161