sysv_shm.c revision 1.131.10.2 1 /* $NetBSD: sysv_shm.c,v 1.131.10.2 2020/01/21 18:12:54 martin Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by Adam Glass and Charles M.
47 * Hannum.
48 * 4. The names of the authors may not be used to endorse or promote products
49 * derived from this software without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
52 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
53 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
54 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
55 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
57 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
58 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
59 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
60 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.131.10.2 2020/01/21 18:12:54 martin Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_sysv.h"
68 #endif
69
70 #include <sys/param.h>
71 #include <sys/kernel.h>
72 #include <sys/kmem.h>
73 #include <sys/shm.h>
74 #include <sys/mutex.h>
75 #include <sys/mman.h>
76 #include <sys/stat.h>
77 #include <sys/sysctl.h>
78 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
79 #include <sys/syscallargs.h>
80 #include <sys/queue.h>
81 #include <sys/kauth.h>
82
83 #include <uvm/uvm_extern.h>
84 #include <uvm/uvm_object.h>
85
86 struct shmmap_entry {
87 SLIST_ENTRY(shmmap_entry) next;
88 vaddr_t va;
89 int shmid;
90 };
91
92 int shm_nused __cacheline_aligned;
93 struct shmid_ds * shmsegs __read_mostly;
94
95 static kmutex_t shm_lock __cacheline_aligned;
96 static kcondvar_t * shm_cv __cacheline_aligned;
97 static int shm_last_free __cacheline_aligned;
98 static size_t shm_committed __cacheline_aligned;
99 static int shm_use_phys __read_mostly;
100
101 static kcondvar_t shm_realloc_cv;
102 static bool shm_realloc_state;
103 static u_int shm_realloc_disable;
104
105 struct shmmap_state {
106 unsigned int nitems;
107 unsigned int nrefs;
108 SLIST_HEAD(, shmmap_entry) entries;
109 };
110
111 extern int kern_has_sysvshm;
112
113 SYSCTL_SETUP_PROTO(sysctl_ipc_shm_setup);
114
115 #ifdef SHMDEBUG
116 #define SHMPRINTF(a) printf a
117 #else
118 #define SHMPRINTF(a)
119 #endif
120
121 static int shmrealloc(int);
122
123 /*
124 * Find the shared memory segment permission by the index. Only used by
125 * compat_linux to implement SHM_STAT.
126 */
127 int
128 shm_find_segment_perm_by_index(int index, struct ipc_perm *perm)
129 {
130 struct shmid_ds *shmseg;
131
132 mutex_enter(&shm_lock);
133 if (index < 0 || index >= shminfo.shmmni) {
134 mutex_exit(&shm_lock);
135 return EINVAL;
136 }
137 shmseg = &shmsegs[index];
138 memcpy(perm, &shmseg->shm_perm, sizeof(*perm));
139 mutex_exit(&shm_lock);
140 return 0;
141 }
142
143 /*
144 * Find the shared memory segment by the identifier.
145 * => must be called with shm_lock held;
146 */
147 static struct shmid_ds *
148 shm_find_segment_by_shmid(int shmid)
149 {
150 int segnum;
151 struct shmid_ds *shmseg;
152
153 KASSERT(mutex_owned(&shm_lock));
154
155 segnum = IPCID_TO_IX(shmid);
156 if (segnum < 0 || segnum >= shminfo.shmmni)
157 return NULL;
158 shmseg = &shmsegs[segnum];
159 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0)
160 return NULL;
161 if ((shmseg->shm_perm.mode &
162 (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED)
163 return NULL;
164 if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid))
165 return NULL;
166
167 return shmseg;
168 }
169
170 /*
171 * Free memory segment.
172 * => must be called with shm_lock held;
173 */
174 static void
175 shm_free_segment(int segnum)
176 {
177 struct shmid_ds *shmseg;
178 size_t size;
179 bool wanted;
180
181 KASSERT(mutex_owned(&shm_lock));
182
183 shmseg = &shmsegs[segnum];
184 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n",
185 shmseg->shm_perm._key, shmseg->shm_perm._seq));
186
187 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
188 wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED);
189
190 shmseg->_shm_internal = NULL;
191 shm_committed -= btoc(size);
192 shm_nused--;
193 shmseg->shm_perm.mode = SHMSEG_FREE;
194 shm_last_free = segnum;
195 if (wanted == true)
196 cv_broadcast(&shm_cv[segnum]);
197 }
198
199 /*
200 * Delete entry from the shm map.
201 * => must be called with shm_lock held;
202 */
203 static struct uvm_object *
204 shm_delete_mapping(struct shmmap_state *shmmap_s,
205 struct shmmap_entry *shmmap_se)
206 {
207 struct uvm_object *uobj = NULL;
208 struct shmid_ds *shmseg;
209 int segnum;
210
211 KASSERT(mutex_owned(&shm_lock));
212
213 segnum = IPCID_TO_IX(shmmap_se->shmid);
214 shmseg = &shmsegs[segnum];
215 SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next);
216 shmmap_s->nitems--;
217 shmseg->shm_dtime = time_second;
218 if ((--shmseg->shm_nattch <= 0) &&
219 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
220 uobj = shmseg->_shm_internal;
221 shm_free_segment(segnum);
222 }
223
224 return uobj;
225 }
226
227 /*
228 * Get a non-shared shm map for that vmspace. Note, that memory
229 * allocation might be performed with lock held.
230 */
231 static struct shmmap_state *
232 shmmap_getprivate(struct proc *p)
233 {
234 struct shmmap_state *oshmmap_s, *shmmap_s;
235 struct shmmap_entry *oshmmap_se, *shmmap_se;
236
237 KASSERT(mutex_owned(&shm_lock));
238
239 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */
240 oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
241 if (oshmmap_s && oshmmap_s->nrefs == 1)
242 return oshmmap_s;
243
244 /* 2. No shm map preset - create a fresh one */
245 shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP);
246 shmmap_s->nrefs = 1;
247 SLIST_INIT(&shmmap_s->entries);
248 p->p_vmspace->vm_shm = (void *)shmmap_s;
249
250 if (oshmmap_s == NULL)
251 return shmmap_s;
252
253 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n",
254 p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs));
255
256 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */
257 SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) {
258 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
259 shmmap_se->va = oshmmap_se->va;
260 shmmap_se->shmid = oshmmap_se->shmid;
261 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
262 }
263 shmmap_s->nitems = oshmmap_s->nitems;
264 oshmmap_s->nrefs--;
265
266 return shmmap_s;
267 }
268
269 /*
270 * Lock/unlock the memory.
271 * => must be called with shm_lock held;
272 * => called from one place, thus, inline;
273 */
274 static inline int
275 shm_memlock(struct lwp *l, struct shmid_ds *shmseg, int shmid, int cmd)
276 {
277 struct proc *p = l->l_proc;
278 struct shmmap_entry *shmmap_se;
279 struct shmmap_state *shmmap_s;
280 size_t size;
281 int error;
282
283 KASSERT(mutex_owned(&shm_lock));
284 shmmap_s = shmmap_getprivate(p);
285
286 /* Find our shared memory address by shmid */
287 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
288 if (shmmap_se->shmid != shmid)
289 continue;
290
291 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
292
293 if (cmd == SHM_LOCK &&
294 (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) {
295 /* Wire the object and map, then tag it */
296 error = uvm_obj_wirepages(shmseg->_shm_internal,
297 0, size, NULL);
298 if (error)
299 return EIO;
300 error = uvm_map_pageable(&p->p_vmspace->vm_map,
301 shmmap_se->va, shmmap_se->va + size, false, 0);
302 if (error) {
303 uvm_obj_unwirepages(shmseg->_shm_internal,
304 0, size);
305 if (error == EFAULT)
306 error = ENOMEM;
307 return error;
308 }
309 shmseg->shm_perm.mode |= SHMSEG_WIRED;
310
311 } else if (cmd == SHM_UNLOCK &&
312 (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) {
313 /* Unwire the object and map, then untag it */
314 uvm_obj_unwirepages(shmseg->_shm_internal, 0, size);
315 error = uvm_map_pageable(&p->p_vmspace->vm_map,
316 shmmap_se->va, shmmap_se->va + size, true, 0);
317 if (error)
318 return EIO;
319 shmseg->shm_perm.mode &= ~SHMSEG_WIRED;
320 }
321 }
322
323 return 0;
324 }
325
326 /*
327 * Unmap shared memory.
328 */
329 int
330 sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval)
331 {
332 /* {
333 syscallarg(const void *) shmaddr;
334 } */
335 struct proc *p = l->l_proc;
336 struct shmmap_state *shmmap_s1, *shmmap_s;
337 struct shmmap_entry *shmmap_se;
338 struct uvm_object *uobj;
339 struct shmid_ds *shmseg;
340 size_t size;
341
342 mutex_enter(&shm_lock);
343 /* In case of reallocation, we will wait for completion */
344 while (__predict_false(shm_realloc_state))
345 cv_wait(&shm_realloc_cv, &shm_lock);
346
347 shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm;
348 if (shmmap_s1 == NULL) {
349 mutex_exit(&shm_lock);
350 return EINVAL;
351 }
352
353 /* Find the map entry */
354 SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next)
355 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
356 break;
357 if (shmmap_se == NULL) {
358 mutex_exit(&shm_lock);
359 return EINVAL;
360 }
361
362 shmmap_s = shmmap_getprivate(p);
363 if (shmmap_s != shmmap_s1) {
364 /* Map has been copied, lookup entry in new map */
365 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
366 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
367 break;
368 if (shmmap_se == NULL) {
369 mutex_exit(&shm_lock);
370 return EINVAL;
371 }
372 }
373
374 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n",
375 p->p_vmspace, shmmap_se->shmid, shmmap_se->va));
376
377 /* Delete the entry from shm map */
378 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
379 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
380 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
381 mutex_exit(&shm_lock);
382
383 uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size);
384 if (uobj != NULL) {
385 uao_detach(uobj);
386 }
387 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
388
389 return 0;
390 }
391
392 /*
393 * Map shared memory.
394 */
395 int
396 sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval)
397 {
398 /* {
399 syscallarg(int) shmid;
400 syscallarg(const void *) shmaddr;
401 syscallarg(int) shmflg;
402 } */
403 int error, flags = 0;
404 struct proc *p = l->l_proc;
405 kauth_cred_t cred = l->l_cred;
406 struct shmid_ds *shmseg;
407 struct shmmap_state *shmmap_s;
408 struct shmmap_entry *shmmap_se;
409 struct uvm_object *uobj;
410 struct vmspace *vm;
411 vaddr_t attach_va;
412 vm_prot_t prot;
413 vsize_t size;
414
415 /* Allocate a new map entry and set it */
416 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
417 shmmap_se->shmid = SCARG(uap, shmid);
418
419 mutex_enter(&shm_lock);
420 /* In case of reallocation, we will wait for completion */
421 while (__predict_false(shm_realloc_state))
422 cv_wait(&shm_realloc_cv, &shm_lock);
423
424 shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid));
425 if (shmseg == NULL) {
426 error = EINVAL;
427 goto err;
428 }
429 error = ipcperm(cred, &shmseg->shm_perm,
430 (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
431 if (error)
432 goto err;
433
434 vm = p->p_vmspace;
435 shmmap_s = (struct shmmap_state *)vm->vm_shm;
436 if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) {
437 error = EMFILE;
438 goto err;
439 }
440
441 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
442 prot = VM_PROT_READ;
443 if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0)
444 prot |= VM_PROT_WRITE;
445 if (SCARG(uap, shmaddr)) {
446 flags |= UVM_FLAG_FIXED;
447 if (SCARG(uap, shmflg) & SHM_RND)
448 attach_va =
449 (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1);
450 else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0)
451 attach_va = (vaddr_t)SCARG(uap, shmaddr);
452 else {
453 error = EINVAL;
454 goto err;
455 }
456 } else {
457 /* This is just a hint to uvm_map() about where to put it. */
458 attach_va = p->p_emul->e_vm_default_addr(p,
459 (vaddr_t)vm->vm_daddr, size,
460 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
461 }
462
463 /*
464 * Create a map entry, add it to the list and increase the counters.
465 * The lock will be dropped before the mapping, disable reallocation.
466 */
467 shmmap_s = shmmap_getprivate(p);
468 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
469 shmmap_s->nitems++;
470 shmseg->shm_lpid = p->p_pid;
471 shmseg->shm_nattch++;
472 shm_realloc_disable++;
473 mutex_exit(&shm_lock);
474
475 /*
476 * Add a reference to the memory object, map it to the
477 * address space, and lock the memory, if needed.
478 */
479 uobj = shmseg->_shm_internal;
480 uao_reference(uobj);
481 error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0,
482 UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags));
483 if (error)
484 goto err_detach;
485 if (shm_use_phys || (shmseg->shm_perm.mode & SHMSEG_WIRED)) {
486 error = uvm_map_pageable(&vm->vm_map, attach_va,
487 attach_va + size, false, 0);
488 if (error) {
489 if (error == EFAULT)
490 error = ENOMEM;
491 uvm_deallocate(&vm->vm_map, attach_va, size);
492 goto err_detach;
493 }
494 }
495
496 /* Set the new address, and update the time */
497 mutex_enter(&shm_lock);
498 shmmap_se->va = attach_va;
499 shmseg->shm_atime = time_second;
500 shm_realloc_disable--;
501 retval[0] = attach_va;
502 SHMPRINTF(("shmat: vm %p: add %d @%lx\n",
503 p->p_vmspace, shmmap_se->shmid, attach_va));
504 err:
505 cv_broadcast(&shm_realloc_cv);
506 mutex_exit(&shm_lock);
507 if (error && shmmap_se) {
508 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
509 }
510 return error;
511
512 err_detach:
513 uao_detach(uobj);
514 mutex_enter(&shm_lock);
515 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
516 shm_realloc_disable--;
517 cv_broadcast(&shm_realloc_cv);
518 mutex_exit(&shm_lock);
519 if (uobj != NULL) {
520 uao_detach(uobj);
521 }
522 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
523 return error;
524 }
525
526 /*
527 * Shared memory control operations.
528 */
529 int
530 sys___shmctl50(struct lwp *l, const struct sys___shmctl50_args *uap,
531 register_t *retval)
532 {
533 /* {
534 syscallarg(int) shmid;
535 syscallarg(int) cmd;
536 syscallarg(struct shmid_ds *) buf;
537 } */
538 struct shmid_ds shmbuf;
539 int cmd, error;
540
541 cmd = SCARG(uap, cmd);
542 if (cmd == IPC_SET) {
543 error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf));
544 if (error)
545 return error;
546 }
547
548 error = shmctl1(l, SCARG(uap, shmid), cmd,
549 (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL);
550
551 if (error == 0 && cmd == IPC_STAT)
552 error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf));
553
554 return error;
555 }
556
557 int
558 shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
559 {
560 struct uvm_object *uobj = NULL;
561 kauth_cred_t cred = l->l_cred;
562 struct shmid_ds *shmseg;
563 int error = 0;
564
565 mutex_enter(&shm_lock);
566 /* In case of reallocation, we will wait for completion */
567 while (__predict_false(shm_realloc_state))
568 cv_wait(&shm_realloc_cv, &shm_lock);
569
570 shmseg = shm_find_segment_by_shmid(shmid);
571 if (shmseg == NULL) {
572 mutex_exit(&shm_lock);
573 return EINVAL;
574 }
575
576 switch (cmd) {
577 case IPC_STAT:
578 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0)
579 break;
580 memset(shmbuf, 0, sizeof *shmbuf);
581 shmbuf->shm_perm = shmseg->shm_perm;
582 shmbuf->shm_perm.mode &= 0777;
583 shmbuf->shm_segsz = shmseg->shm_segsz;
584 shmbuf->shm_lpid = shmseg->shm_lpid;
585 shmbuf->shm_cpid = shmseg->shm_cpid;
586 shmbuf->shm_nattch = shmseg->shm_nattch;
587 shmbuf->shm_atime = shmseg->shm_atime;
588 shmbuf->shm_dtime = shmseg->shm_dtime;
589 shmbuf->shm_ctime = shmseg->shm_ctime;
590 break;
591 case IPC_SET:
592 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
593 break;
594 shmseg->shm_perm.uid = shmbuf->shm_perm.uid;
595 shmseg->shm_perm.gid = shmbuf->shm_perm.gid;
596 shmseg->shm_perm.mode =
597 (shmseg->shm_perm.mode & ~ACCESSPERMS) |
598 (shmbuf->shm_perm.mode & ACCESSPERMS);
599 shmseg->shm_ctime = time_second;
600 break;
601 case IPC_RMID:
602 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
603 break;
604 shmseg->shm_perm._key = IPC_PRIVATE;
605 shmseg->shm_perm.mode |= SHMSEG_REMOVED;
606 if (shmseg->shm_nattch <= 0) {
607 uobj = shmseg->_shm_internal;
608 shm_free_segment(IPCID_TO_IX(shmid));
609 }
610 break;
611 case SHM_LOCK:
612 case SHM_UNLOCK:
613 if ((error = kauth_authorize_system(cred,
614 KAUTH_SYSTEM_SYSVIPC,
615 (cmd == SHM_LOCK) ? KAUTH_REQ_SYSTEM_SYSVIPC_SHM_LOCK :
616 KAUTH_REQ_SYSTEM_SYSVIPC_SHM_UNLOCK, NULL, NULL, NULL)) != 0)
617 break;
618 error = shm_memlock(l, shmseg, shmid, cmd);
619 break;
620 default:
621 error = EINVAL;
622 }
623
624 mutex_exit(&shm_lock);
625 if (uobj != NULL)
626 uao_detach(uobj);
627 return error;
628 }
629
630 /*
631 * Try to take an already existing segment.
632 * => must be called with shm_lock held;
633 * => called from one place, thus, inline;
634 */
635 static inline int
636 shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode,
637 register_t *retval)
638 {
639 struct shmid_ds *shmseg;
640 kauth_cred_t cred = l->l_cred;
641 int segnum, error;
642 again:
643 KASSERT(mutex_owned(&shm_lock));
644
645 /* Find segment by key */
646 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
647 if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) &&
648 shmsegs[segnum].shm_perm._key == SCARG(uap, key))
649 break;
650 if (segnum == shminfo.shmmni) {
651 /* Not found */
652 return -1;
653 }
654
655 shmseg = &shmsegs[segnum];
656 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
657 /*
658 * This segment is in the process of being allocated. Wait
659 * until it's done, and look the key up again (in case the
660 * allocation failed or it was freed).
661 */
662 shmseg->shm_perm.mode |= SHMSEG_WANTED;
663 error = cv_wait_sig(&shm_cv[segnum], &shm_lock);
664 if (error)
665 return error;
666 goto again;
667 }
668
669 /*
670 * First check the flags, to generate a useful error when a
671 * segment already exists.
672 */
673 if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) ==
674 (IPC_CREAT | IPC_EXCL))
675 return EEXIST;
676
677 /* Check the permission and segment size. */
678 error = ipcperm(cred, &shmseg->shm_perm, mode);
679 if (error)
680 return error;
681 if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz)
682 return EINVAL;
683
684 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
685 return 0;
686 }
687
688 int
689 sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval)
690 {
691 /* {
692 syscallarg(key_t) key;
693 syscallarg(size_t) size;
694 syscallarg(int) shmflg;
695 } */
696 struct shmid_ds *shmseg;
697 kauth_cred_t cred = l->l_cred;
698 key_t key = SCARG(uap, key);
699 size_t size;
700 int error, mode, segnum;
701 bool lockmem;
702
703 mode = SCARG(uap, shmflg) & ACCESSPERMS;
704 if (SCARG(uap, shmflg) & _SHM_RMLINGER)
705 mode |= SHMSEG_RMLINGER;
706
707 SHMPRINTF(("shmget: key 0x%lx size 0x%zx shmflg 0x%x mode 0x%x\n",
708 SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode));
709
710 mutex_enter(&shm_lock);
711 /* In case of reallocation, we will wait for completion */
712 while (__predict_false(shm_realloc_state))
713 cv_wait(&shm_realloc_cv, &shm_lock);
714
715 if (key != IPC_PRIVATE) {
716 error = shmget_existing(l, uap, mode, retval);
717 if (error != -1) {
718 mutex_exit(&shm_lock);
719 return error;
720 }
721 if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) {
722 mutex_exit(&shm_lock);
723 return ENOENT;
724 }
725 }
726 error = 0;
727
728 /*
729 * Check the for the limits.
730 */
731 size = SCARG(uap, size);
732 if (size < shminfo.shmmin || size > shminfo.shmmax) {
733 mutex_exit(&shm_lock);
734 return EINVAL;
735 }
736 if (shm_nused >= shminfo.shmmni) {
737 mutex_exit(&shm_lock);
738 return ENOSPC;
739 }
740 size = (size + PGOFSET) & ~PGOFSET;
741 if (shm_committed + btoc(size) > shminfo.shmall) {
742 mutex_exit(&shm_lock);
743 return ENOMEM;
744 }
745
746 /* Find the first available segment */
747 if (shm_last_free < 0) {
748 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
749 if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE)
750 break;
751 KASSERT(segnum < shminfo.shmmni);
752 } else {
753 segnum = shm_last_free;
754 shm_last_free = -1;
755 }
756
757 /*
758 * Initialize the segment.
759 * We will drop the lock while allocating the memory, thus mark the
760 * segment present, but removed, that no other thread could take it.
761 * Also, disable reallocation, while lock is dropped.
762 */
763 shmseg = &shmsegs[segnum];
764 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
765 shm_committed += btoc(size);
766 shm_nused++;
767 lockmem = shm_use_phys;
768 shm_realloc_disable++;
769 mutex_exit(&shm_lock);
770
771 /* Allocate the memory object and lock it if needed */
772 shmseg->_shm_internal = uao_create(size, 0);
773 if (lockmem) {
774 /* Wire the pages and tag it */
775 error = uvm_obj_wirepages(shmseg->_shm_internal, 0, size, NULL);
776 if (error) {
777 uao_detach(shmseg->_shm_internal);
778 mutex_enter(&shm_lock);
779 shm_free_segment(segnum);
780 shm_realloc_disable--;
781 mutex_exit(&shm_lock);
782 return error;
783 }
784 }
785
786 /*
787 * Please note, while segment is marked, there are no need to hold the
788 * lock, while setting it (except shm_perm.mode).
789 */
790 shmseg->shm_perm._key = SCARG(uap, key);
791 shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff;
792 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
793
794 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred);
795 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred);
796 shmseg->shm_segsz = SCARG(uap, size);
797 shmseg->shm_cpid = l->l_proc->p_pid;
798 shmseg->shm_lpid = shmseg->shm_nattch = 0;
799 shmseg->shm_atime = shmseg->shm_dtime = 0;
800 shmseg->shm_ctime = time_second;
801
802 /*
803 * Segment is initialized.
804 * Enter the lock, mark as allocated, and notify waiters (if any).
805 * Also, unmark the state of reallocation.
806 */
807 mutex_enter(&shm_lock);
808 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
809 (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) |
810 SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0);
811 if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
812 shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
813 cv_broadcast(&shm_cv[segnum]);
814 }
815 shm_realloc_disable--;
816 cv_broadcast(&shm_realloc_cv);
817 mutex_exit(&shm_lock);
818
819 return error;
820 }
821
822 void
823 shmfork(struct vmspace *vm1, struct vmspace *vm2)
824 {
825 struct shmmap_state *shmmap_s;
826 struct shmmap_entry *shmmap_se;
827
828 SHMPRINTF(("shmfork %p->%p\n", vm1, vm2));
829 mutex_enter(&shm_lock);
830 vm2->vm_shm = vm1->vm_shm;
831 if (vm1->vm_shm) {
832 shmmap_s = (struct shmmap_state *)vm1->vm_shm;
833 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
834 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++;
835 shmmap_s->nrefs++;
836 }
837 mutex_exit(&shm_lock);
838 }
839
840 void
841 shmexit(struct vmspace *vm)
842 {
843 struct shmmap_state *shmmap_s;
844 struct shmmap_entry *shmmap_se;
845
846 mutex_enter(&shm_lock);
847 shmmap_s = (struct shmmap_state *)vm->vm_shm;
848 if (shmmap_s == NULL) {
849 mutex_exit(&shm_lock);
850 return;
851 }
852 vm->vm_shm = NULL;
853
854 if (--shmmap_s->nrefs > 0) {
855 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n",
856 vm, shmmap_s->nitems, shmmap_s->nrefs));
857 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
858 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--;
859 }
860 mutex_exit(&shm_lock);
861 return;
862 }
863
864 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems));
865 if (shmmap_s->nitems == 0) {
866 mutex_exit(&shm_lock);
867 kmem_free(shmmap_s, sizeof(struct shmmap_state));
868 return;
869 }
870
871 /*
872 * Delete the entry from shm map.
873 */
874 for (;;) {
875 struct shmid_ds *shmseg;
876 struct uvm_object *uobj;
877 size_t sz;
878
879 shmmap_se = SLIST_FIRST(&shmmap_s->entries);
880 KASSERT(shmmap_se != NULL);
881
882 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
883 sz = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
884 /* shm_delete_mapping() removes from the list. */
885 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
886 mutex_exit(&shm_lock);
887
888 uvm_deallocate(&vm->vm_map, shmmap_se->va, sz);
889 if (uobj != NULL) {
890 uao_detach(uobj);
891 }
892 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
893
894 if (SLIST_EMPTY(&shmmap_s->entries)) {
895 break;
896 }
897 mutex_enter(&shm_lock);
898 KASSERT(!SLIST_EMPTY(&shmmap_s->entries));
899 }
900 kmem_free(shmmap_s, sizeof(struct shmmap_state));
901 }
902
903 static int
904 shmrealloc(int newshmni)
905 {
906 vaddr_t v;
907 struct shmid_ds *oldshmsegs, *newshmsegs;
908 kcondvar_t *newshm_cv, *oldshm_cv;
909 size_t sz;
910 int i, lsegid, oldshmni;
911
912 if (newshmni < 1)
913 return EINVAL;
914
915 /* Allocate new memory area */
916 sz = ALIGN(newshmni * sizeof(struct shmid_ds)) +
917 ALIGN(newshmni * sizeof(kcondvar_t));
918 sz = round_page(sz);
919 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
920 if (v == 0)
921 return ENOMEM;
922
923 mutex_enter(&shm_lock);
924 while (shm_realloc_state || shm_realloc_disable)
925 cv_wait(&shm_realloc_cv, &shm_lock);
926
927 /*
928 * Get the number of last segment. Fail we are trying to
929 * reallocate less memory than we use.
930 */
931 lsegid = 0;
932 for (i = 0; i < shminfo.shmmni; i++)
933 if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0)
934 lsegid = i;
935 if (lsegid >= newshmni) {
936 mutex_exit(&shm_lock);
937 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
938 return EBUSY;
939 }
940 shm_realloc_state = true;
941
942 newshmsegs = (void *)v;
943 newshm_cv = (void *)((uintptr_t)newshmsegs +
944 ALIGN(newshmni * sizeof(struct shmid_ds)));
945
946 /* Copy all memory to the new area */
947 for (i = 0; i < shm_nused; i++) {
948 cv_init(&newshm_cv[i], "shmwait");
949 (void)memcpy(&newshmsegs[i], &shmsegs[i],
950 sizeof(newshmsegs[0]));
951 }
952
953 /* Mark as free all new segments, if there is any */
954 for (; i < newshmni; i++) {
955 cv_init(&newshm_cv[i], "shmwait");
956 newshmsegs[i].shm_perm.mode = SHMSEG_FREE;
957 newshmsegs[i].shm_perm._seq = 0;
958 }
959
960 oldshmsegs = shmsegs;
961 oldshmni = shminfo.shmmni;
962 shminfo.shmmni = newshmni;
963 shmsegs = newshmsegs;
964 shm_cv = newshm_cv;
965
966 /* Reallocation completed - notify all waiters, if any */
967 shm_realloc_state = false;
968 cv_broadcast(&shm_realloc_cv);
969 mutex_exit(&shm_lock);
970
971 /* Release now unused resources. */
972 oldshm_cv = (void *)((uintptr_t)oldshmsegs +
973 ALIGN(oldshmni * sizeof(struct shmid_ds)));
974 for (i = 0; i < oldshmni; i++)
975 cv_destroy(&oldshm_cv[i]);
976
977 sz = ALIGN(oldshmni * sizeof(struct shmid_ds)) +
978 ALIGN(oldshmni * sizeof(kcondvar_t));
979 sz = round_page(sz);
980 uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED);
981
982 return 0;
983 }
984
985 void
986 shminit(struct sysctllog **clog)
987 {
988 vaddr_t v;
989 size_t sz;
990 int i;
991
992 mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE);
993 cv_init(&shm_realloc_cv, "shmrealc");
994
995 /* Allocate the wired memory for our structures */
996 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
997 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
998 sz = round_page(sz);
999 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
1000 if (v == 0)
1001 panic("sysv_shm: cannot allocate memory");
1002 shmsegs = (void *)v;
1003 shm_cv = (void *)((uintptr_t)shmsegs +
1004 ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)));
1005
1006 if (shminfo.shmmax == 0)
1007 shminfo.shmmax = max(physmem / 4, 1024) * PAGE_SIZE;
1008 else
1009 shminfo.shmmax *= PAGE_SIZE;
1010 shminfo.shmall = shminfo.shmmax / PAGE_SIZE;
1011
1012 for (i = 0; i < shminfo.shmmni; i++) {
1013 cv_init(&shm_cv[i], "shmwait");
1014 shmsegs[i].shm_perm.mode = SHMSEG_FREE;
1015 shmsegs[i].shm_perm._seq = 0;
1016 }
1017 shm_last_free = 0;
1018 shm_nused = 0;
1019 shm_committed = 0;
1020 shm_realloc_disable = 0;
1021 shm_realloc_state = false;
1022
1023 kern_has_sysvshm = 1;
1024
1025 /* Load the callback function pointers for the uvm subsystem */
1026 uvm_shmexit = shmexit;
1027 uvm_shmfork = shmfork;
1028
1029 #ifdef _MODULE
1030 if (clog)
1031 sysctl_ipc_shm_setup(clog);
1032 #endif
1033 }
1034
1035 int
1036 shmfini(void)
1037 {
1038 size_t sz;
1039 int i;
1040 vaddr_t v = (vaddr_t)shmsegs;
1041
1042 mutex_enter(&shm_lock);
1043 if (shm_nused) {
1044 mutex_exit(&shm_lock);
1045 return 1;
1046 }
1047
1048 /* Clear the callback function pointers for the uvm subsystem */
1049 uvm_shmexit = NULL;
1050 uvm_shmfork = NULL;
1051
1052 /* Destroy all condvars */
1053 for (i = 0; i < shminfo.shmmni; i++)
1054 cv_destroy(&shm_cv[i]);
1055 cv_destroy(&shm_realloc_cv);
1056
1057 /* Free the allocated/wired memory */
1058 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
1059 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
1060 sz = round_page(sz);
1061 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
1062
1063 /* Release and destroy our mutex */
1064 mutex_exit(&shm_lock);
1065 mutex_destroy(&shm_lock);
1066
1067 kern_has_sysvshm = 0;
1068
1069 return 0;
1070 }
1071
1072 static int
1073 sysctl_ipc_shmmni(SYSCTLFN_ARGS)
1074 {
1075 int newsize, error;
1076 struct sysctlnode node;
1077 node = *rnode;
1078 node.sysctl_data = &newsize;
1079
1080 newsize = shminfo.shmmni;
1081 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1082 if (error || newp == NULL)
1083 return error;
1084
1085 sysctl_unlock();
1086 error = shmrealloc(newsize);
1087 sysctl_relock();
1088 return error;
1089 }
1090
1091 static int
1092 sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
1093 {
1094 uint32_t newsize;
1095 int error;
1096 struct sysctlnode node;
1097 node = *rnode;
1098 node.sysctl_data = &newsize;
1099
1100 newsize = shminfo.shmall;
1101 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1102 if (error || newp == NULL)
1103 return error;
1104
1105 if (newsize < 1)
1106 return EINVAL;
1107
1108 shminfo.shmall = newsize;
1109 shminfo.shmmax = (uint64_t)shminfo.shmall * PAGE_SIZE;
1110
1111 return 0;
1112 }
1113
1114 static int
1115 sysctl_ipc_shmmax(SYSCTLFN_ARGS)
1116 {
1117 uint64_t newsize;
1118 int error;
1119 struct sysctlnode node;
1120 node = *rnode;
1121 node.sysctl_data = &newsize;
1122
1123 newsize = shminfo.shmmax;
1124 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1125 if (error || newp == NULL)
1126 return error;
1127
1128 if (newsize < PAGE_SIZE)
1129 return EINVAL;
1130
1131 shminfo.shmmax = round_page(newsize);
1132 shminfo.shmall = shminfo.shmmax >> PAGE_SHIFT;
1133
1134 return 0;
1135 }
1136
1137 SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup")
1138 {
1139
1140 sysctl_createv(clog, 0, NULL, NULL,
1141 CTLFLAG_PERMANENT,
1142 CTLTYPE_NODE, "ipc",
1143 SYSCTL_DESCR("SysV IPC options"),
1144 NULL, 0, NULL, 0,
1145 CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1146 sysctl_createv(clog, 0, NULL, NULL,
1147 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1148 CTLTYPE_QUAD, "shmmax",
1149 SYSCTL_DESCR("Max shared memory segment size in bytes"),
1150 sysctl_ipc_shmmax, 0, &shminfo.shmmax, 0,
1151 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL);
1152 sysctl_createv(clog, 0, NULL, NULL,
1153 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1154 CTLTYPE_INT, "shmmni",
1155 SYSCTL_DESCR("Max number of shared memory identifiers"),
1156 sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0,
1157 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL);
1158 sysctl_createv(clog, 0, NULL, NULL,
1159 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1160 CTLTYPE_INT, "shmseg",
1161 SYSCTL_DESCR("Max shared memory segments per process"),
1162 NULL, 0, &shminfo.shmseg, 0,
1163 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL);
1164 sysctl_createv(clog, 0, NULL, NULL,
1165 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1166 CTLTYPE_INT, "shmmaxpgs",
1167 SYSCTL_DESCR("Max amount of shared memory in pages"),
1168 sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0,
1169 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL);
1170 sysctl_createv(clog, 0, NULL, NULL,
1171 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1172 CTLTYPE_INT, "shm_use_phys",
1173 SYSCTL_DESCR("Enable/disable locking of shared memory in "
1174 "physical memory"), NULL, 0, &shm_use_phys, 0,
1175 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL);
1176 }
1177