sysv_shm.c revision 1.135.2.1 1 /* $NetBSD: sysv_shm.c,v 1.135.2.1 2019/09/10 16:14:53 martin Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by Adam Glass and Charles M.
47 * Hannum.
48 * 4. The names of the authors may not be used to endorse or promote products
49 * derived from this software without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
52 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
53 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
54 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
55 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
57 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
58 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
59 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
60 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.135.2.1 2019/09/10 16:14:53 martin Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_sysv.h"
68 #endif
69
70 #include <sys/param.h>
71 #include <sys/kernel.h>
72 #include <sys/kmem.h>
73 #include <sys/shm.h>
74 #include <sys/mutex.h>
75 #include <sys/mman.h>
76 #include <sys/stat.h>
77 #include <sys/sysctl.h>
78 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
79 #include <sys/syscallargs.h>
80 #include <sys/queue.h>
81 #include <sys/kauth.h>
82
83 #include <uvm/uvm_extern.h>
84 #include <uvm/uvm_object.h>
85
86 struct shmmap_entry {
87 SLIST_ENTRY(shmmap_entry) next;
88 vaddr_t va;
89 int shmid;
90 };
91
92 int shm_nused __cacheline_aligned;
93 struct shmid_ds * shmsegs __read_mostly;
94
95 static kmutex_t shm_lock __cacheline_aligned;
96 static kcondvar_t * shm_cv __cacheline_aligned;
97 static int shm_last_free __cacheline_aligned;
98 static size_t shm_committed __cacheline_aligned;
99 static int shm_use_phys __read_mostly;
100
101 static kcondvar_t shm_realloc_cv;
102 static bool shm_realloc_state;
103 static u_int shm_realloc_disable;
104
105 struct shmmap_state {
106 unsigned int nitems;
107 unsigned int nrefs;
108 SLIST_HEAD(, shmmap_entry) entries;
109 };
110
111 extern int kern_has_sysvshm;
112
113 SYSCTL_SETUP_PROTO(sysctl_ipc_shm_setup);
114
115 #ifdef SHMDEBUG
116 #define SHMPRINTF(a) printf a
117 #else
118 #define SHMPRINTF(a)
119 #endif
120
121 static int shmrealloc(int);
122
123 /*
124 * Find the shared memory segment by the identifier.
125 * => must be called with shm_lock held;
126 */
127 static struct shmid_ds *
128 shm_find_segment_by_shmid(int shmid)
129 {
130 int segnum;
131 struct shmid_ds *shmseg;
132
133 KASSERT(mutex_owned(&shm_lock));
134
135 segnum = IPCID_TO_IX(shmid);
136 if (segnum < 0 || segnum >= shminfo.shmmni)
137 return NULL;
138 shmseg = &shmsegs[segnum];
139 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0)
140 return NULL;
141 if ((shmseg->shm_perm.mode &
142 (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED)
143 return NULL;
144 if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid))
145 return NULL;
146
147 return shmseg;
148 }
149
150 /*
151 * Free memory segment.
152 * => must be called with shm_lock held;
153 */
154 static void
155 shm_free_segment(int segnum)
156 {
157 struct shmid_ds *shmseg;
158 size_t size;
159 bool wanted;
160
161 KASSERT(mutex_owned(&shm_lock));
162
163 shmseg = &shmsegs[segnum];
164 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n",
165 shmseg->shm_perm._key, shmseg->shm_perm._seq));
166
167 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
168 wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED);
169
170 shmseg->_shm_internal = NULL;
171 shm_committed -= btoc(size);
172 shm_nused--;
173 shmseg->shm_perm.mode = SHMSEG_FREE;
174 shm_last_free = segnum;
175 if (wanted == true)
176 cv_broadcast(&shm_cv[segnum]);
177 }
178
179 /*
180 * Delete entry from the shm map.
181 * => must be called with shm_lock held;
182 */
183 static struct uvm_object *
184 shm_delete_mapping(struct shmmap_state *shmmap_s,
185 struct shmmap_entry *shmmap_se)
186 {
187 struct uvm_object *uobj = NULL;
188 struct shmid_ds *shmseg;
189 int segnum;
190
191 KASSERT(mutex_owned(&shm_lock));
192
193 segnum = IPCID_TO_IX(shmmap_se->shmid);
194 shmseg = &shmsegs[segnum];
195 SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next);
196 shmmap_s->nitems--;
197 shmseg->shm_dtime = time_second;
198 if ((--shmseg->shm_nattch <= 0) &&
199 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
200 uobj = shmseg->_shm_internal;
201 shm_free_segment(segnum);
202 }
203
204 return uobj;
205 }
206
207 /*
208 * Get a non-shared shm map for that vmspace. Note, that memory
209 * allocation might be performed with lock held.
210 */
211 static struct shmmap_state *
212 shmmap_getprivate(struct proc *p)
213 {
214 struct shmmap_state *oshmmap_s, *shmmap_s;
215 struct shmmap_entry *oshmmap_se, *shmmap_se;
216
217 KASSERT(mutex_owned(&shm_lock));
218
219 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */
220 oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
221 if (oshmmap_s && oshmmap_s->nrefs == 1)
222 return oshmmap_s;
223
224 /* 2. No shm map preset - create a fresh one */
225 shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP);
226 shmmap_s->nrefs = 1;
227 SLIST_INIT(&shmmap_s->entries);
228 p->p_vmspace->vm_shm = (void *)shmmap_s;
229
230 if (oshmmap_s == NULL)
231 return shmmap_s;
232
233 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n",
234 p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs));
235
236 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */
237 SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) {
238 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
239 shmmap_se->va = oshmmap_se->va;
240 shmmap_se->shmid = oshmmap_se->shmid;
241 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
242 }
243 shmmap_s->nitems = oshmmap_s->nitems;
244 oshmmap_s->nrefs--;
245
246 return shmmap_s;
247 }
248
249 /*
250 * Lock/unlock the memory.
251 * => must be called with shm_lock held;
252 */
253 static int
254 shm_memlock(struct shmid_ds *shmseg, int shmid, int cmd)
255 {
256 size_t size;
257 int error;
258
259 KASSERT(mutex_owned(&shm_lock));
260
261 size = round_page(shmseg->shm_segsz);
262
263 if (cmd == SHM_LOCK && (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) {
264 /* Wire the object and map, then tag it */
265 error = uvm_obj_wirepages(shmseg->_shm_internal,
266 0, size, NULL);
267 if (error)
268 return EIO;
269 shmseg->shm_perm.mode |= SHMSEG_WIRED;
270
271 } else if (cmd == SHM_UNLOCK &&
272 (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) {
273 /* Unwire the object, then untag it */
274 uvm_obj_unwirepages(shmseg->_shm_internal, 0, size);
275 shmseg->shm_perm.mode &= ~SHMSEG_WIRED;
276 }
277
278 return 0;
279 }
280
281 /*
282 * Unmap shared memory.
283 */
284 int
285 sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval)
286 {
287 /* {
288 syscallarg(const void *) shmaddr;
289 } */
290 struct proc *p = l->l_proc;
291 struct shmmap_state *shmmap_s1, *shmmap_s;
292 struct shmmap_entry *shmmap_se;
293 struct uvm_object *uobj;
294 struct shmid_ds *shmseg;
295 size_t size;
296
297 mutex_enter(&shm_lock);
298 /* In case of reallocation, we will wait for completion */
299 while (__predict_false(shm_realloc_state))
300 cv_wait(&shm_realloc_cv, &shm_lock);
301
302 shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm;
303 if (shmmap_s1 == NULL) {
304 mutex_exit(&shm_lock);
305 return EINVAL;
306 }
307
308 /* Find the map entry */
309 SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next)
310 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
311 break;
312 if (shmmap_se == NULL) {
313 mutex_exit(&shm_lock);
314 return EINVAL;
315 }
316
317 shmmap_s = shmmap_getprivate(p);
318 if (shmmap_s != shmmap_s1) {
319 /* Map has been copied, lookup entry in new map */
320 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
321 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
322 break;
323 if (shmmap_se == NULL) {
324 mutex_exit(&shm_lock);
325 return EINVAL;
326 }
327 }
328
329 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n",
330 p->p_vmspace, shmmap_se->shmid, shmmap_se->va));
331
332 /* Delete the entry from shm map */
333 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
334 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
335 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
336 mutex_exit(&shm_lock);
337
338 uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size);
339 if (uobj != NULL) {
340 uao_detach(uobj);
341 }
342 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
343
344 return 0;
345 }
346
347 /*
348 * Map shared memory.
349 */
350 int
351 sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval)
352 {
353 /* {
354 syscallarg(int) shmid;
355 syscallarg(const void *) shmaddr;
356 syscallarg(int) shmflg;
357 } */
358 int error, flags = 0;
359 struct proc *p = l->l_proc;
360 kauth_cred_t cred = l->l_cred;
361 struct shmid_ds *shmseg;
362 struct shmmap_state *shmmap_s;
363 struct shmmap_entry *shmmap_se;
364 struct uvm_object *uobj;
365 struct vmspace *vm;
366 vaddr_t attach_va;
367 vm_prot_t prot;
368 vsize_t size;
369
370 /* Allocate a new map entry and set it */
371 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
372 shmmap_se->shmid = SCARG(uap, shmid);
373
374 mutex_enter(&shm_lock);
375 /* In case of reallocation, we will wait for completion */
376 while (__predict_false(shm_realloc_state))
377 cv_wait(&shm_realloc_cv, &shm_lock);
378
379 shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid));
380 if (shmseg == NULL) {
381 error = EINVAL;
382 goto err;
383 }
384 error = ipcperm(cred, &shmseg->shm_perm,
385 (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
386 if (error)
387 goto err;
388
389 vm = p->p_vmspace;
390 shmmap_s = (struct shmmap_state *)vm->vm_shm;
391 if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) {
392 error = EMFILE;
393 goto err;
394 }
395
396 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
397 prot = VM_PROT_READ;
398 if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0)
399 prot |= VM_PROT_WRITE;
400 if (SCARG(uap, shmaddr)) {
401 flags |= UVM_FLAG_FIXED;
402 if (SCARG(uap, shmflg) & SHM_RND)
403 attach_va =
404 (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1);
405 else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0)
406 attach_va = (vaddr_t)SCARG(uap, shmaddr);
407 else {
408 error = EINVAL;
409 goto err;
410 }
411 } else {
412 /* This is just a hint to uvm_map() about where to put it. */
413 attach_va = p->p_emul->e_vm_default_addr(p,
414 (vaddr_t)vm->vm_daddr, size,
415 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
416 }
417
418 /*
419 * Create a map entry, add it to the list and increase the counters.
420 * The lock will be dropped before the mapping, disable reallocation.
421 */
422 shmmap_s = shmmap_getprivate(p);
423 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
424 shmmap_s->nitems++;
425 shmseg->shm_lpid = p->p_pid;
426 shmseg->shm_nattch++;
427 shm_realloc_disable++;
428
429 /*
430 * Add a reference to the uvm object while we hold the
431 * shm_lock.
432 */
433 uobj = shmseg->_shm_internal;
434 uao_reference(uobj);
435 mutex_exit(&shm_lock);
436
437 /*
438 * Drop the shm_lock to map it into the address space, and lock
439 * the memory, if needed (XXX where does this lock memory?).
440 */
441 error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0,
442 UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags));
443 if (error)
444 goto err_detach;
445
446 /* Set the new address, and update the time */
447 mutex_enter(&shm_lock);
448 shmmap_se->va = attach_va;
449 shmseg->shm_atime = time_second;
450 shm_realloc_disable--;
451 retval[0] = attach_va;
452 SHMPRINTF(("shmat: vm %p: add %d @%lx\n",
453 p->p_vmspace, shmmap_se->shmid, attach_va));
454 err:
455 cv_broadcast(&shm_realloc_cv);
456 mutex_exit(&shm_lock);
457 if (error && shmmap_se) {
458 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
459 }
460 return error;
461
462 err_detach:
463 uao_detach(uobj);
464 mutex_enter(&shm_lock);
465 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
466 shm_realloc_disable--;
467 cv_broadcast(&shm_realloc_cv);
468 mutex_exit(&shm_lock);
469 if (uobj != NULL) {
470 uao_detach(uobj);
471 }
472 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
473 return error;
474 }
475
476 /*
477 * Shared memory control operations.
478 */
479 int
480 sys___shmctl50(struct lwp *l, const struct sys___shmctl50_args *uap,
481 register_t *retval)
482 {
483 /* {
484 syscallarg(int) shmid;
485 syscallarg(int) cmd;
486 syscallarg(struct shmid_ds *) buf;
487 } */
488 struct shmid_ds shmbuf;
489 int cmd, error;
490
491 cmd = SCARG(uap, cmd);
492 if (cmd == IPC_SET) {
493 error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf));
494 if (error)
495 return error;
496 }
497
498 error = shmctl1(l, SCARG(uap, shmid), cmd,
499 (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL);
500
501 if (error == 0 && cmd == IPC_STAT)
502 error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf));
503
504 return error;
505 }
506
507 int
508 shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
509 {
510 struct uvm_object *uobj = NULL;
511 kauth_cred_t cred = l->l_cred;
512 struct shmid_ds *shmseg;
513 int error = 0;
514
515 mutex_enter(&shm_lock);
516 /* In case of reallocation, we will wait for completion */
517 while (__predict_false(shm_realloc_state))
518 cv_wait(&shm_realloc_cv, &shm_lock);
519
520 shmseg = shm_find_segment_by_shmid(shmid);
521 if (shmseg == NULL) {
522 mutex_exit(&shm_lock);
523 return EINVAL;
524 }
525
526 switch (cmd) {
527 case IPC_STAT:
528 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0)
529 break;
530 memset(shmbuf, 0, sizeof *shmbuf);
531 shmbuf->shm_perm = shmseg->shm_perm;
532 shmbuf->shm_perm.mode &= 0777;
533 shmbuf->shm_segsz = shmseg->shm_segsz;
534 shmbuf->shm_lpid = shmseg->shm_lpid;
535 shmbuf->shm_cpid = shmseg->shm_cpid;
536 shmbuf->shm_nattch = shmseg->shm_nattch;
537 shmbuf->shm_atime = shmseg->shm_atime;
538 shmbuf->shm_dtime = shmseg->shm_dtime;
539 shmbuf->shm_ctime = shmseg->shm_ctime;
540 break;
541 case IPC_SET:
542 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
543 break;
544 shmseg->shm_perm.uid = shmbuf->shm_perm.uid;
545 shmseg->shm_perm.gid = shmbuf->shm_perm.gid;
546 shmseg->shm_perm.mode =
547 (shmseg->shm_perm.mode & ~ACCESSPERMS) |
548 (shmbuf->shm_perm.mode & ACCESSPERMS);
549 shmseg->shm_ctime = time_second;
550 break;
551 case IPC_RMID:
552 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
553 break;
554 shmseg->shm_perm._key = IPC_PRIVATE;
555 shmseg->shm_perm.mode |= SHMSEG_REMOVED;
556 if (shmseg->shm_nattch <= 0) {
557 uobj = shmseg->_shm_internal;
558 shm_free_segment(IPCID_TO_IX(shmid));
559 }
560 break;
561 case SHM_LOCK:
562 case SHM_UNLOCK:
563 if ((error = kauth_authorize_system(cred,
564 KAUTH_SYSTEM_SYSVIPC,
565 (cmd == SHM_LOCK) ? KAUTH_REQ_SYSTEM_SYSVIPC_SHM_LOCK :
566 KAUTH_REQ_SYSTEM_SYSVIPC_SHM_UNLOCK, NULL, NULL, NULL)) != 0)
567 break;
568 error = shm_memlock(shmseg, shmid, cmd);
569 break;
570 default:
571 error = EINVAL;
572 }
573
574 mutex_exit(&shm_lock);
575 if (uobj != NULL)
576 uao_detach(uobj);
577 return error;
578 }
579
580 /*
581 * Try to take an already existing segment.
582 * => must be called with shm_lock held;
583 * => called from one place, thus, inline;
584 */
585 static inline int
586 shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode,
587 register_t *retval)
588 {
589 struct shmid_ds *shmseg;
590 kauth_cred_t cred = l->l_cred;
591 int segnum, error;
592 again:
593 KASSERT(mutex_owned(&shm_lock));
594
595 /* Find segment by key */
596 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
597 if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) &&
598 shmsegs[segnum].shm_perm._key == SCARG(uap, key))
599 break;
600 if (segnum == shminfo.shmmni) {
601 /* Not found */
602 return -1;
603 }
604
605 shmseg = &shmsegs[segnum];
606 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
607 /*
608 * This segment is in the process of being allocated. Wait
609 * until it's done, and look the key up again (in case the
610 * allocation failed or it was freed).
611 */
612 shmseg->shm_perm.mode |= SHMSEG_WANTED;
613 error = cv_wait_sig(&shm_cv[segnum], &shm_lock);
614 if (error)
615 return error;
616 goto again;
617 }
618
619 /*
620 * First check the flags, to generate a useful error when a
621 * segment already exists.
622 */
623 if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) ==
624 (IPC_CREAT | IPC_EXCL))
625 return EEXIST;
626
627 /* Check the permission and segment size. */
628 error = ipcperm(cred, &shmseg->shm_perm, mode);
629 if (error)
630 return error;
631 if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz)
632 return EINVAL;
633
634 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
635 return 0;
636 }
637
638 int
639 sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval)
640 {
641 /* {
642 syscallarg(key_t) key;
643 syscallarg(size_t) size;
644 syscallarg(int) shmflg;
645 } */
646 struct shmid_ds *shmseg;
647 kauth_cred_t cred = l->l_cred;
648 key_t key = SCARG(uap, key);
649 size_t size;
650 int error, mode, segnum;
651 bool lockmem;
652
653 mode = SCARG(uap, shmflg) & ACCESSPERMS;
654 if (SCARG(uap, shmflg) & _SHM_RMLINGER)
655 mode |= SHMSEG_RMLINGER;
656
657 SHMPRINTF(("shmget: key 0x%lx size 0x%zx shmflg 0x%x mode 0x%x\n",
658 SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode));
659
660 mutex_enter(&shm_lock);
661 /* In case of reallocation, we will wait for completion */
662 while (__predict_false(shm_realloc_state))
663 cv_wait(&shm_realloc_cv, &shm_lock);
664
665 if (key != IPC_PRIVATE) {
666 error = shmget_existing(l, uap, mode, retval);
667 if (error != -1) {
668 mutex_exit(&shm_lock);
669 return error;
670 }
671 if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) {
672 mutex_exit(&shm_lock);
673 return ENOENT;
674 }
675 }
676 error = 0;
677
678 /*
679 * Check the for the limits.
680 */
681 size = SCARG(uap, size);
682 if (size < shminfo.shmmin || size > shminfo.shmmax) {
683 mutex_exit(&shm_lock);
684 return EINVAL;
685 }
686 if (shm_nused >= shminfo.shmmni) {
687 mutex_exit(&shm_lock);
688 return ENOSPC;
689 }
690 size = round_page(size);
691 if (shm_committed + btoc(size) > shminfo.shmall) {
692 mutex_exit(&shm_lock);
693 return ENOMEM;
694 }
695
696 /* Find the first available segment */
697 if (shm_last_free < 0) {
698 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
699 if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE)
700 break;
701 KASSERT(segnum < shminfo.shmmni);
702 } else {
703 segnum = shm_last_free;
704 shm_last_free = -1;
705 }
706
707 /*
708 * Initialize the segment.
709 * We will drop the lock while allocating the memory, thus mark the
710 * segment present, but removed, that no other thread could take it.
711 * Also, disable reallocation, while lock is dropped.
712 */
713 shmseg = &shmsegs[segnum];
714 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
715 shm_committed += btoc(size);
716 shm_nused++;
717 lockmem = shm_use_phys;
718 shm_realloc_disable++;
719 mutex_exit(&shm_lock);
720
721 /* Allocate the memory object and lock it if needed */
722 shmseg->_shm_internal = uao_create(size, 0);
723 if (lockmem) {
724 /* Wire the pages and tag it */
725 error = uvm_obj_wirepages(shmseg->_shm_internal, 0, size, NULL);
726 if (error) {
727 uao_detach(shmseg->_shm_internal);
728 mutex_enter(&shm_lock);
729 shm_free_segment(segnum);
730 shm_realloc_disable--;
731 mutex_exit(&shm_lock);
732 return error;
733 }
734 }
735
736 /*
737 * Please note, while segment is marked, there are no need to hold the
738 * lock, while setting it (except shm_perm.mode).
739 */
740 shmseg->shm_perm._key = SCARG(uap, key);
741 shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff;
742 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
743
744 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred);
745 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred);
746 shmseg->shm_segsz = SCARG(uap, size);
747 shmseg->shm_cpid = l->l_proc->p_pid;
748 shmseg->shm_lpid = shmseg->shm_nattch = 0;
749 shmseg->shm_atime = shmseg->shm_dtime = 0;
750 shmseg->shm_ctime = time_second;
751
752 /*
753 * Segment is initialized.
754 * Enter the lock, mark as allocated, and notify waiters (if any).
755 * Also, unmark the state of reallocation.
756 */
757 mutex_enter(&shm_lock);
758 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
759 (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) |
760 SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0);
761 if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
762 shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
763 cv_broadcast(&shm_cv[segnum]);
764 }
765 shm_realloc_disable--;
766 cv_broadcast(&shm_realloc_cv);
767 mutex_exit(&shm_lock);
768
769 return error;
770 }
771
772 void
773 shmfork(struct vmspace *vm1, struct vmspace *vm2)
774 {
775 struct shmmap_state *shmmap_s;
776 struct shmmap_entry *shmmap_se;
777
778 SHMPRINTF(("shmfork %p->%p\n", vm1, vm2));
779 mutex_enter(&shm_lock);
780 vm2->vm_shm = vm1->vm_shm;
781 if (vm1->vm_shm) {
782 shmmap_s = (struct shmmap_state *)vm1->vm_shm;
783 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
784 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++;
785 shmmap_s->nrefs++;
786 }
787 mutex_exit(&shm_lock);
788 }
789
790 void
791 shmexit(struct vmspace *vm)
792 {
793 struct shmmap_state *shmmap_s;
794 struct shmmap_entry *shmmap_se;
795
796 mutex_enter(&shm_lock);
797 shmmap_s = (struct shmmap_state *)vm->vm_shm;
798 if (shmmap_s == NULL) {
799 mutex_exit(&shm_lock);
800 return;
801 }
802 vm->vm_shm = NULL;
803
804 if (--shmmap_s->nrefs > 0) {
805 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n",
806 vm, shmmap_s->nitems, shmmap_s->nrefs));
807 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
808 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--;
809 }
810 mutex_exit(&shm_lock);
811 return;
812 }
813
814 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems));
815 if (shmmap_s->nitems == 0) {
816 mutex_exit(&shm_lock);
817 kmem_free(shmmap_s, sizeof(struct shmmap_state));
818 return;
819 }
820
821 /*
822 * Delete the entry from shm map.
823 */
824 for (;;) {
825 struct shmid_ds *shmseg;
826 struct uvm_object *uobj;
827 size_t sz;
828
829 shmmap_se = SLIST_FIRST(&shmmap_s->entries);
830 KASSERT(shmmap_se != NULL);
831
832 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
833 sz = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
834 /* shm_delete_mapping() removes from the list. */
835 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
836 mutex_exit(&shm_lock);
837
838 uvm_deallocate(&vm->vm_map, shmmap_se->va, sz);
839 if (uobj != NULL) {
840 uao_detach(uobj);
841 }
842 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
843
844 if (SLIST_EMPTY(&shmmap_s->entries)) {
845 break;
846 }
847 mutex_enter(&shm_lock);
848 KASSERT(!SLIST_EMPTY(&shmmap_s->entries));
849 }
850 kmem_free(shmmap_s, sizeof(struct shmmap_state));
851 }
852
853 static int
854 shmrealloc(int newshmni)
855 {
856 vaddr_t v;
857 struct shmid_ds *oldshmsegs, *newshmsegs;
858 kcondvar_t *newshm_cv, *oldshm_cv;
859 size_t sz;
860 int i, lsegid, oldshmni;
861
862 if (newshmni < 1)
863 return EINVAL;
864
865 /* Allocate new memory area */
866 sz = ALIGN(newshmni * sizeof(struct shmid_ds)) +
867 ALIGN(newshmni * sizeof(kcondvar_t));
868 sz = round_page(sz);
869 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
870 if (v == 0)
871 return ENOMEM;
872
873 mutex_enter(&shm_lock);
874 while (shm_realloc_state || shm_realloc_disable)
875 cv_wait(&shm_realloc_cv, &shm_lock);
876
877 /*
878 * Get the number of last segment. Fail we are trying to
879 * reallocate less memory than we use.
880 */
881 lsegid = 0;
882 for (i = 0; i < shminfo.shmmni; i++)
883 if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0)
884 lsegid = i;
885 if (lsegid >= newshmni) {
886 mutex_exit(&shm_lock);
887 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
888 return EBUSY;
889 }
890 shm_realloc_state = true;
891
892 newshmsegs = (void *)v;
893 newshm_cv = (void *)((uintptr_t)newshmsegs +
894 ALIGN(newshmni * sizeof(struct shmid_ds)));
895
896 /* Copy all memory to the new area */
897 for (i = 0; i < shm_nused; i++) {
898 cv_init(&newshm_cv[i], "shmwait");
899 (void)memcpy(&newshmsegs[i], &shmsegs[i],
900 sizeof(newshmsegs[0]));
901 }
902
903 /* Mark as free all new segments, if there is any */
904 for (; i < newshmni; i++) {
905 cv_init(&newshm_cv[i], "shmwait");
906 newshmsegs[i].shm_perm.mode = SHMSEG_FREE;
907 newshmsegs[i].shm_perm._seq = 0;
908 }
909
910 oldshmsegs = shmsegs;
911 oldshmni = shminfo.shmmni;
912 shminfo.shmmni = newshmni;
913 shmsegs = newshmsegs;
914 shm_cv = newshm_cv;
915
916 /* Reallocation completed - notify all waiters, if any */
917 shm_realloc_state = false;
918 cv_broadcast(&shm_realloc_cv);
919 mutex_exit(&shm_lock);
920
921 /* Release now unused resources. */
922 oldshm_cv = (void *)((uintptr_t)oldshmsegs +
923 ALIGN(oldshmni * sizeof(struct shmid_ds)));
924 for (i = 0; i < oldshmni; i++)
925 cv_destroy(&oldshm_cv[i]);
926
927 sz = ALIGN(oldshmni * sizeof(struct shmid_ds)) +
928 ALIGN(oldshmni * sizeof(kcondvar_t));
929 sz = round_page(sz);
930 uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED);
931
932 return 0;
933 }
934
935 int
936 shminit(struct sysctllog **clog)
937 {
938 vaddr_t v;
939 size_t sz;
940 int i;
941
942 mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE);
943 cv_init(&shm_realloc_cv, "shmrealc");
944
945 /* Allocate the wired memory for our structures */
946 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
947 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
948 sz = round_page(sz);
949 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
950 if (v == 0) {
951 printf("sysv_shm: cannot allocate memory");
952 return ENOMEM;
953 }
954 shmsegs = (void *)v;
955 shm_cv = (void *)((uintptr_t)shmsegs +
956 ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)));
957
958 if (shminfo.shmmax == 0)
959 shminfo.shmmax = uimax(physmem / 4, 1024) * PAGE_SIZE;
960 else
961 shminfo.shmmax *= PAGE_SIZE;
962 shminfo.shmall = shminfo.shmmax / PAGE_SIZE;
963
964 for (i = 0; i < shminfo.shmmni; i++) {
965 cv_init(&shm_cv[i], "shmwait");
966 shmsegs[i].shm_perm.mode = SHMSEG_FREE;
967 shmsegs[i].shm_perm._seq = 0;
968 }
969 shm_last_free = 0;
970 shm_nused = 0;
971 shm_committed = 0;
972 shm_realloc_disable = 0;
973 shm_realloc_state = false;
974
975 kern_has_sysvshm = 1;
976
977 /* Load the callback function pointers for the uvm subsystem */
978 uvm_shmexit = shmexit;
979 uvm_shmfork = shmfork;
980
981 #ifdef _MODULE
982 if (clog)
983 sysctl_ipc_shm_setup(clog);
984 #endif
985 return 0;
986 }
987
988 int
989 shmfini(void)
990 {
991 size_t sz;
992 int i;
993 vaddr_t v = (vaddr_t)shmsegs;
994
995 mutex_enter(&shm_lock);
996 if (shm_nused) {
997 mutex_exit(&shm_lock);
998 return 1;
999 }
1000
1001 /* Clear the callback function pointers for the uvm subsystem */
1002 uvm_shmexit = NULL;
1003 uvm_shmfork = NULL;
1004
1005 /* Destroy all condvars */
1006 for (i = 0; i < shminfo.shmmni; i++)
1007 cv_destroy(&shm_cv[i]);
1008 cv_destroy(&shm_realloc_cv);
1009
1010 /* Free the allocated/wired memory */
1011 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
1012 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
1013 sz = round_page(sz);
1014 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
1015
1016 /* Release and destroy our mutex */
1017 mutex_exit(&shm_lock);
1018 mutex_destroy(&shm_lock);
1019
1020 kern_has_sysvshm = 0;
1021
1022 return 0;
1023 }
1024
1025 static int
1026 sysctl_ipc_shmmni(SYSCTLFN_ARGS)
1027 {
1028 int newsize, error;
1029 struct sysctlnode node;
1030 node = *rnode;
1031 node.sysctl_data = &newsize;
1032
1033 newsize = shminfo.shmmni;
1034 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1035 if (error || newp == NULL)
1036 return error;
1037
1038 sysctl_unlock();
1039 error = shmrealloc(newsize);
1040 sysctl_relock();
1041 return error;
1042 }
1043
1044 static int
1045 sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
1046 {
1047 uint32_t newsize;
1048 int error;
1049 struct sysctlnode node;
1050 node = *rnode;
1051 node.sysctl_data = &newsize;
1052
1053 newsize = shminfo.shmall;
1054 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1055 if (error || newp == NULL)
1056 return error;
1057
1058 if (newsize < 1)
1059 return EINVAL;
1060
1061 shminfo.shmall = newsize;
1062 shminfo.shmmax = (uint64_t)shminfo.shmall * PAGE_SIZE;
1063
1064 return 0;
1065 }
1066
1067 static int
1068 sysctl_ipc_shmmax(SYSCTLFN_ARGS)
1069 {
1070 uint64_t newsize;
1071 int error;
1072 struct sysctlnode node;
1073 node = *rnode;
1074 node.sysctl_data = &newsize;
1075
1076 newsize = shminfo.shmmax;
1077 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1078 if (error || newp == NULL)
1079 return error;
1080
1081 if (newsize < PAGE_SIZE)
1082 return EINVAL;
1083
1084 shminfo.shmmax = round_page(newsize);
1085 shminfo.shmall = shminfo.shmmax >> PAGE_SHIFT;
1086
1087 return 0;
1088 }
1089
1090 SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup")
1091 {
1092
1093 sysctl_createv(clog, 0, NULL, NULL,
1094 CTLFLAG_PERMANENT,
1095 CTLTYPE_NODE, "ipc",
1096 SYSCTL_DESCR("SysV IPC options"),
1097 NULL, 0, NULL, 0,
1098 CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1099 sysctl_createv(clog, 0, NULL, NULL,
1100 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1101 CTLTYPE_QUAD, "shmmax",
1102 SYSCTL_DESCR("Max shared memory segment size in bytes"),
1103 sysctl_ipc_shmmax, 0, &shminfo.shmmax, 0,
1104 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL);
1105 sysctl_createv(clog, 0, NULL, NULL,
1106 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1107 CTLTYPE_INT, "shmmni",
1108 SYSCTL_DESCR("Max number of shared memory identifiers"),
1109 sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0,
1110 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL);
1111 sysctl_createv(clog, 0, NULL, NULL,
1112 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1113 CTLTYPE_INT, "shmseg",
1114 SYSCTL_DESCR("Max shared memory segments per process"),
1115 NULL, 0, &shminfo.shmseg, 0,
1116 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL);
1117 sysctl_createv(clog, 0, NULL, NULL,
1118 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1119 CTLTYPE_INT, "shmmaxpgs",
1120 SYSCTL_DESCR("Max amount of shared memory in pages"),
1121 sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0,
1122 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL);
1123 sysctl_createv(clog, 0, NULL, NULL,
1124 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1125 CTLTYPE_INT, "shm_use_phys",
1126 SYSCTL_DESCR("Enable/disable locking of shared memory in "
1127 "physical memory"), NULL, 0, &shm_use_phys, 0,
1128 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL);
1129 }
1130