sysv_shm.c revision 1.131.18.1 1 /* $NetBSD: sysv_shm.c,v 1.131.18.1 2019/06/10 22:09:03 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by Adam Glass and Charles M.
47 * Hannum.
48 * 4. The names of the authors may not be used to endorse or promote products
49 * derived from this software without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
52 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
53 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
54 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
55 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
57 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
58 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
59 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
60 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.131.18.1 2019/06/10 22:09:03 christos Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_sysv.h"
68 #endif
69
70 #include <sys/param.h>
71 #include <sys/kernel.h>
72 #include <sys/kmem.h>
73 #include <sys/shm.h>
74 #include <sys/mutex.h>
75 #include <sys/mman.h>
76 #include <sys/stat.h>
77 #include <sys/sysctl.h>
78 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
79 #include <sys/syscallargs.h>
80 #include <sys/queue.h>
81 #include <sys/kauth.h>
82
83 #include <uvm/uvm_extern.h>
84 #include <uvm/uvm_object.h>
85
86 struct shmmap_entry {
87 SLIST_ENTRY(shmmap_entry) next;
88 vaddr_t va;
89 int shmid;
90 };
91
92 int shm_nused __cacheline_aligned;
93 struct shmid_ds * shmsegs __read_mostly;
94
95 static kmutex_t shm_lock __cacheline_aligned;
96 static kcondvar_t * shm_cv __cacheline_aligned;
97 static int shm_last_free __cacheline_aligned;
98 static size_t shm_committed __cacheline_aligned;
99 static int shm_use_phys __read_mostly;
100
101 static kcondvar_t shm_realloc_cv;
102 static bool shm_realloc_state;
103 static u_int shm_realloc_disable;
104
105 struct shmmap_state {
106 unsigned int nitems;
107 unsigned int nrefs;
108 SLIST_HEAD(, shmmap_entry) entries;
109 };
110
111 extern int kern_has_sysvshm;
112
113 SYSCTL_SETUP_PROTO(sysctl_ipc_shm_setup);
114
115 #ifdef SHMDEBUG
116 #define SHMPRINTF(a) printf a
117 #else
118 #define SHMPRINTF(a)
119 #endif
120
121 static int shmrealloc(int);
122
123 /*
124 * Find the shared memory segment by the identifier.
125 * => must be called with shm_lock held;
126 */
127 static struct shmid_ds *
128 shm_find_segment_by_shmid(int shmid)
129 {
130 int segnum;
131 struct shmid_ds *shmseg;
132
133 KASSERT(mutex_owned(&shm_lock));
134
135 segnum = IPCID_TO_IX(shmid);
136 if (segnum < 0 || segnum >= shminfo.shmmni)
137 return NULL;
138 shmseg = &shmsegs[segnum];
139 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0)
140 return NULL;
141 if ((shmseg->shm_perm.mode &
142 (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED)
143 return NULL;
144 if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid))
145 return NULL;
146
147 return shmseg;
148 }
149
150 /*
151 * Free memory segment.
152 * => must be called with shm_lock held;
153 */
154 static void
155 shm_free_segment(int segnum)
156 {
157 struct shmid_ds *shmseg;
158 size_t size;
159 bool wanted;
160
161 KASSERT(mutex_owned(&shm_lock));
162
163 shmseg = &shmsegs[segnum];
164 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n",
165 shmseg->shm_perm._key, shmseg->shm_perm._seq));
166
167 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
168 wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED);
169
170 shmseg->_shm_internal = NULL;
171 shm_committed -= btoc(size);
172 shm_nused--;
173 shmseg->shm_perm.mode = SHMSEG_FREE;
174 shm_last_free = segnum;
175 if (wanted == true)
176 cv_broadcast(&shm_cv[segnum]);
177 }
178
179 /*
180 * Delete entry from the shm map.
181 * => must be called with shm_lock held;
182 */
183 static struct uvm_object *
184 shm_delete_mapping(struct shmmap_state *shmmap_s,
185 struct shmmap_entry *shmmap_se)
186 {
187 struct uvm_object *uobj = NULL;
188 struct shmid_ds *shmseg;
189 int segnum;
190
191 KASSERT(mutex_owned(&shm_lock));
192
193 segnum = IPCID_TO_IX(shmmap_se->shmid);
194 shmseg = &shmsegs[segnum];
195 SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next);
196 shmmap_s->nitems--;
197 shmseg->shm_dtime = time_second;
198 if ((--shmseg->shm_nattch <= 0) &&
199 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
200 uobj = shmseg->_shm_internal;
201 shm_free_segment(segnum);
202 }
203
204 return uobj;
205 }
206
207 /*
208 * Get a non-shared shm map for that vmspace. Note, that memory
209 * allocation might be performed with lock held.
210 */
211 static struct shmmap_state *
212 shmmap_getprivate(struct proc *p)
213 {
214 struct shmmap_state *oshmmap_s, *shmmap_s;
215 struct shmmap_entry *oshmmap_se, *shmmap_se;
216
217 KASSERT(mutex_owned(&shm_lock));
218
219 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */
220 oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
221 if (oshmmap_s && oshmmap_s->nrefs == 1)
222 return oshmmap_s;
223
224 /* 2. No shm map preset - create a fresh one */
225 shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP);
226 shmmap_s->nrefs = 1;
227 SLIST_INIT(&shmmap_s->entries);
228 p->p_vmspace->vm_shm = (void *)shmmap_s;
229
230 if (oshmmap_s == NULL)
231 return shmmap_s;
232
233 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n",
234 p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs));
235
236 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */
237 SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) {
238 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
239 shmmap_se->va = oshmmap_se->va;
240 shmmap_se->shmid = oshmmap_se->shmid;
241 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
242 }
243 shmmap_s->nitems = oshmmap_s->nitems;
244 oshmmap_s->nrefs--;
245
246 return shmmap_s;
247 }
248
249 /*
250 * Lock/unlock the memory.
251 * => must be called with shm_lock held;
252 */
253 static int
254 shm_memlock(struct shmid_ds *shmseg, int shmid, int cmd)
255 {
256 size_t size;
257 int error;
258
259 KASSERT(mutex_owned(&shm_lock));
260
261 size = round_page(shmseg->shm_segsz);
262
263 if (cmd == SHM_LOCK && (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) {
264 /* Wire the object and map, then tag it */
265 error = uvm_obj_wirepages(shmseg->_shm_internal,
266 0, size, NULL);
267 if (error)
268 return EIO;
269 shmseg->shm_perm.mode |= SHMSEG_WIRED;
270
271 } else if (cmd == SHM_UNLOCK &&
272 (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) {
273 /* Unwire the object, then untag it */
274 uvm_obj_unwirepages(shmseg->_shm_internal, 0, size);
275 shmseg->shm_perm.mode &= ~SHMSEG_WIRED;
276 }
277
278 return 0;
279 }
280
281 /*
282 * Unmap shared memory.
283 */
284 int
285 sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval)
286 {
287 /* {
288 syscallarg(const void *) shmaddr;
289 } */
290 struct proc *p = l->l_proc;
291 struct shmmap_state *shmmap_s1, *shmmap_s;
292 struct shmmap_entry *shmmap_se;
293 struct uvm_object *uobj;
294 struct shmid_ds *shmseg;
295 size_t size;
296
297 mutex_enter(&shm_lock);
298 /* In case of reallocation, we will wait for completion */
299 while (__predict_false(shm_realloc_state))
300 cv_wait(&shm_realloc_cv, &shm_lock);
301
302 shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm;
303 if (shmmap_s1 == NULL) {
304 mutex_exit(&shm_lock);
305 return EINVAL;
306 }
307
308 /* Find the map entry */
309 SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next)
310 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
311 break;
312 if (shmmap_se == NULL) {
313 mutex_exit(&shm_lock);
314 return EINVAL;
315 }
316
317 shmmap_s = shmmap_getprivate(p);
318 if (shmmap_s != shmmap_s1) {
319 /* Map has been copied, lookup entry in new map */
320 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
321 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
322 break;
323 if (shmmap_se == NULL) {
324 mutex_exit(&shm_lock);
325 return EINVAL;
326 }
327 }
328
329 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n",
330 p->p_vmspace, shmmap_se->shmid, shmmap_se->va));
331
332 /* Delete the entry from shm map */
333 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
334 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
335 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
336 mutex_exit(&shm_lock);
337
338 uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size);
339 if (uobj != NULL) {
340 uao_detach(uobj);
341 }
342 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
343
344 return 0;
345 }
346
347 /*
348 * Map shared memory.
349 */
350 int
351 sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval)
352 {
353 /* {
354 syscallarg(int) shmid;
355 syscallarg(const void *) shmaddr;
356 syscallarg(int) shmflg;
357 } */
358 int error, flags = 0;
359 struct proc *p = l->l_proc;
360 kauth_cred_t cred = l->l_cred;
361 struct shmid_ds *shmseg;
362 struct shmmap_state *shmmap_s;
363 struct shmmap_entry *shmmap_se;
364 struct uvm_object *uobj;
365 struct vmspace *vm;
366 vaddr_t attach_va;
367 vm_prot_t prot;
368 vsize_t size;
369
370 /* Allocate a new map entry and set it */
371 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
372 shmmap_se->shmid = SCARG(uap, shmid);
373
374 mutex_enter(&shm_lock);
375 /* In case of reallocation, we will wait for completion */
376 while (__predict_false(shm_realloc_state))
377 cv_wait(&shm_realloc_cv, &shm_lock);
378
379 shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid));
380 if (shmseg == NULL) {
381 error = EINVAL;
382 goto err;
383 }
384 error = ipcperm(cred, &shmseg->shm_perm,
385 (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
386 if (error)
387 goto err;
388
389 vm = p->p_vmspace;
390 shmmap_s = (struct shmmap_state *)vm->vm_shm;
391 if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) {
392 error = EMFILE;
393 goto err;
394 }
395
396 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
397 prot = VM_PROT_READ;
398 if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0)
399 prot |= VM_PROT_WRITE;
400 if (SCARG(uap, shmaddr)) {
401 flags |= UVM_FLAG_FIXED;
402 if (SCARG(uap, shmflg) & SHM_RND)
403 attach_va =
404 (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1);
405 else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0)
406 attach_va = (vaddr_t)SCARG(uap, shmaddr);
407 else {
408 error = EINVAL;
409 goto err;
410 }
411 } else {
412 /* This is just a hint to uvm_map() about where to put it. */
413 attach_va = p->p_emul->e_vm_default_addr(p,
414 (vaddr_t)vm->vm_daddr, size,
415 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
416 }
417
418 /*
419 * Create a map entry, add it to the list and increase the counters.
420 * The lock will be dropped before the mapping, disable reallocation.
421 */
422 shmmap_s = shmmap_getprivate(p);
423 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
424 shmmap_s->nitems++;
425 shmseg->shm_lpid = p->p_pid;
426 shmseg->shm_nattch++;
427 shm_realloc_disable++;
428 mutex_exit(&shm_lock);
429
430 /*
431 * Add a reference to the memory object, map it to the
432 * address space, and lock the memory, if needed.
433 */
434 uobj = shmseg->_shm_internal;
435 uao_reference(uobj);
436 error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0,
437 UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags));
438 if (error)
439 goto err_detach;
440
441 /* Set the new address, and update the time */
442 mutex_enter(&shm_lock);
443 shmmap_se->va = attach_va;
444 shmseg->shm_atime = time_second;
445 shm_realloc_disable--;
446 retval[0] = attach_va;
447 SHMPRINTF(("shmat: vm %p: add %d @%lx\n",
448 p->p_vmspace, shmmap_se->shmid, attach_va));
449 err:
450 cv_broadcast(&shm_realloc_cv);
451 mutex_exit(&shm_lock);
452 if (error && shmmap_se) {
453 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
454 }
455 return error;
456
457 err_detach:
458 uao_detach(uobj);
459 mutex_enter(&shm_lock);
460 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
461 shm_realloc_disable--;
462 cv_broadcast(&shm_realloc_cv);
463 mutex_exit(&shm_lock);
464 if (uobj != NULL) {
465 uao_detach(uobj);
466 }
467 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
468 return error;
469 }
470
471 /*
472 * Shared memory control operations.
473 */
474 int
475 sys___shmctl50(struct lwp *l, const struct sys___shmctl50_args *uap,
476 register_t *retval)
477 {
478 /* {
479 syscallarg(int) shmid;
480 syscallarg(int) cmd;
481 syscallarg(struct shmid_ds *) buf;
482 } */
483 struct shmid_ds shmbuf;
484 int cmd, error;
485
486 cmd = SCARG(uap, cmd);
487 if (cmd == IPC_SET) {
488 error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf));
489 if (error)
490 return error;
491 }
492
493 error = shmctl1(l, SCARG(uap, shmid), cmd,
494 (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL);
495
496 if (error == 0 && cmd == IPC_STAT)
497 error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf));
498
499 return error;
500 }
501
502 int
503 shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
504 {
505 struct uvm_object *uobj = NULL;
506 kauth_cred_t cred = l->l_cred;
507 struct shmid_ds *shmseg;
508 int error = 0;
509
510 mutex_enter(&shm_lock);
511 /* In case of reallocation, we will wait for completion */
512 while (__predict_false(shm_realloc_state))
513 cv_wait(&shm_realloc_cv, &shm_lock);
514
515 shmseg = shm_find_segment_by_shmid(shmid);
516 if (shmseg == NULL) {
517 mutex_exit(&shm_lock);
518 return EINVAL;
519 }
520
521 switch (cmd) {
522 case IPC_STAT:
523 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0)
524 break;
525 memset(shmbuf, 0, sizeof *shmbuf);
526 shmbuf->shm_perm = shmseg->shm_perm;
527 shmbuf->shm_perm.mode &= 0777;
528 shmbuf->shm_segsz = shmseg->shm_segsz;
529 shmbuf->shm_lpid = shmseg->shm_lpid;
530 shmbuf->shm_cpid = shmseg->shm_cpid;
531 shmbuf->shm_nattch = shmseg->shm_nattch;
532 shmbuf->shm_atime = shmseg->shm_atime;
533 shmbuf->shm_dtime = shmseg->shm_dtime;
534 shmbuf->shm_ctime = shmseg->shm_ctime;
535 break;
536 case IPC_SET:
537 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
538 break;
539 shmseg->shm_perm.uid = shmbuf->shm_perm.uid;
540 shmseg->shm_perm.gid = shmbuf->shm_perm.gid;
541 shmseg->shm_perm.mode =
542 (shmseg->shm_perm.mode & ~ACCESSPERMS) |
543 (shmbuf->shm_perm.mode & ACCESSPERMS);
544 shmseg->shm_ctime = time_second;
545 break;
546 case IPC_RMID:
547 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
548 break;
549 shmseg->shm_perm._key = IPC_PRIVATE;
550 shmseg->shm_perm.mode |= SHMSEG_REMOVED;
551 if (shmseg->shm_nattch <= 0) {
552 uobj = shmseg->_shm_internal;
553 shm_free_segment(IPCID_TO_IX(shmid));
554 }
555 break;
556 case SHM_LOCK:
557 case SHM_UNLOCK:
558 if ((error = kauth_authorize_system(cred,
559 KAUTH_SYSTEM_SYSVIPC,
560 (cmd == SHM_LOCK) ? KAUTH_REQ_SYSTEM_SYSVIPC_SHM_LOCK :
561 KAUTH_REQ_SYSTEM_SYSVIPC_SHM_UNLOCK, NULL, NULL, NULL)) != 0)
562 break;
563 error = shm_memlock(shmseg, shmid, cmd);
564 break;
565 default:
566 error = EINVAL;
567 }
568
569 mutex_exit(&shm_lock);
570 if (uobj != NULL)
571 uao_detach(uobj);
572 return error;
573 }
574
575 /*
576 * Try to take an already existing segment.
577 * => must be called with shm_lock held;
578 * => called from one place, thus, inline;
579 */
580 static inline int
581 shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode,
582 register_t *retval)
583 {
584 struct shmid_ds *shmseg;
585 kauth_cred_t cred = l->l_cred;
586 int segnum, error;
587 again:
588 KASSERT(mutex_owned(&shm_lock));
589
590 /* Find segment by key */
591 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
592 if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) &&
593 shmsegs[segnum].shm_perm._key == SCARG(uap, key))
594 break;
595 if (segnum == shminfo.shmmni) {
596 /* Not found */
597 return -1;
598 }
599
600 shmseg = &shmsegs[segnum];
601 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
602 /*
603 * This segment is in the process of being allocated. Wait
604 * until it's done, and look the key up again (in case the
605 * allocation failed or it was freed).
606 */
607 shmseg->shm_perm.mode |= SHMSEG_WANTED;
608 error = cv_wait_sig(&shm_cv[segnum], &shm_lock);
609 if (error)
610 return error;
611 goto again;
612 }
613
614 /*
615 * First check the flags, to generate a useful error when a
616 * segment already exists.
617 */
618 if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) ==
619 (IPC_CREAT | IPC_EXCL))
620 return EEXIST;
621
622 /* Check the permission and segment size. */
623 error = ipcperm(cred, &shmseg->shm_perm, mode);
624 if (error)
625 return error;
626 if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz)
627 return EINVAL;
628
629 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
630 return 0;
631 }
632
633 int
634 sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval)
635 {
636 /* {
637 syscallarg(key_t) key;
638 syscallarg(size_t) size;
639 syscallarg(int) shmflg;
640 } */
641 struct shmid_ds *shmseg;
642 kauth_cred_t cred = l->l_cred;
643 key_t key = SCARG(uap, key);
644 size_t size;
645 int error, mode, segnum;
646 bool lockmem;
647
648 mode = SCARG(uap, shmflg) & ACCESSPERMS;
649 if (SCARG(uap, shmflg) & _SHM_RMLINGER)
650 mode |= SHMSEG_RMLINGER;
651
652 SHMPRINTF(("shmget: key 0x%lx size 0x%zx shmflg 0x%x mode 0x%x\n",
653 SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode));
654
655 mutex_enter(&shm_lock);
656 /* In case of reallocation, we will wait for completion */
657 while (__predict_false(shm_realloc_state))
658 cv_wait(&shm_realloc_cv, &shm_lock);
659
660 if (key != IPC_PRIVATE) {
661 error = shmget_existing(l, uap, mode, retval);
662 if (error != -1) {
663 mutex_exit(&shm_lock);
664 return error;
665 }
666 if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) {
667 mutex_exit(&shm_lock);
668 return ENOENT;
669 }
670 }
671 error = 0;
672
673 /*
674 * Check the for the limits.
675 */
676 size = SCARG(uap, size);
677 if (size < shminfo.shmmin || size > shminfo.shmmax) {
678 mutex_exit(&shm_lock);
679 return EINVAL;
680 }
681 if (shm_nused >= shminfo.shmmni) {
682 mutex_exit(&shm_lock);
683 return ENOSPC;
684 }
685 size = round_page(size);
686 if (shm_committed + btoc(size) > shminfo.shmall) {
687 mutex_exit(&shm_lock);
688 return ENOMEM;
689 }
690
691 /* Find the first available segment */
692 if (shm_last_free < 0) {
693 for (segnum = 0; segnum < shminfo.shmmni; segnum++)
694 if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE)
695 break;
696 KASSERT(segnum < shminfo.shmmni);
697 } else {
698 segnum = shm_last_free;
699 shm_last_free = -1;
700 }
701
702 /*
703 * Initialize the segment.
704 * We will drop the lock while allocating the memory, thus mark the
705 * segment present, but removed, that no other thread could take it.
706 * Also, disable reallocation, while lock is dropped.
707 */
708 shmseg = &shmsegs[segnum];
709 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
710 shm_committed += btoc(size);
711 shm_nused++;
712 lockmem = shm_use_phys;
713 shm_realloc_disable++;
714 mutex_exit(&shm_lock);
715
716 /* Allocate the memory object and lock it if needed */
717 shmseg->_shm_internal = uao_create(size, 0);
718 if (lockmem) {
719 /* Wire the pages and tag it */
720 error = uvm_obj_wirepages(shmseg->_shm_internal, 0, size, NULL);
721 if (error) {
722 uao_detach(shmseg->_shm_internal);
723 mutex_enter(&shm_lock);
724 shm_free_segment(segnum);
725 shm_realloc_disable--;
726 mutex_exit(&shm_lock);
727 return error;
728 }
729 }
730
731 /*
732 * Please note, while segment is marked, there are no need to hold the
733 * lock, while setting it (except shm_perm.mode).
734 */
735 shmseg->shm_perm._key = SCARG(uap, key);
736 shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff;
737 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
738
739 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred);
740 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred);
741 shmseg->shm_segsz = SCARG(uap, size);
742 shmseg->shm_cpid = l->l_proc->p_pid;
743 shmseg->shm_lpid = shmseg->shm_nattch = 0;
744 shmseg->shm_atime = shmseg->shm_dtime = 0;
745 shmseg->shm_ctime = time_second;
746
747 /*
748 * Segment is initialized.
749 * Enter the lock, mark as allocated, and notify waiters (if any).
750 * Also, unmark the state of reallocation.
751 */
752 mutex_enter(&shm_lock);
753 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
754 (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) |
755 SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0);
756 if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
757 shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
758 cv_broadcast(&shm_cv[segnum]);
759 }
760 shm_realloc_disable--;
761 cv_broadcast(&shm_realloc_cv);
762 mutex_exit(&shm_lock);
763
764 return error;
765 }
766
767 void
768 shmfork(struct vmspace *vm1, struct vmspace *vm2)
769 {
770 struct shmmap_state *shmmap_s;
771 struct shmmap_entry *shmmap_se;
772
773 SHMPRINTF(("shmfork %p->%p\n", vm1, vm2));
774 mutex_enter(&shm_lock);
775 vm2->vm_shm = vm1->vm_shm;
776 if (vm1->vm_shm) {
777 shmmap_s = (struct shmmap_state *)vm1->vm_shm;
778 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
779 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++;
780 shmmap_s->nrefs++;
781 }
782 mutex_exit(&shm_lock);
783 }
784
785 void
786 shmexit(struct vmspace *vm)
787 {
788 struct shmmap_state *shmmap_s;
789 struct shmmap_entry *shmmap_se;
790
791 mutex_enter(&shm_lock);
792 shmmap_s = (struct shmmap_state *)vm->vm_shm;
793 if (shmmap_s == NULL) {
794 mutex_exit(&shm_lock);
795 return;
796 }
797 vm->vm_shm = NULL;
798
799 if (--shmmap_s->nrefs > 0) {
800 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n",
801 vm, shmmap_s->nitems, shmmap_s->nrefs));
802 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
803 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--;
804 }
805 mutex_exit(&shm_lock);
806 return;
807 }
808
809 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems));
810 if (shmmap_s->nitems == 0) {
811 mutex_exit(&shm_lock);
812 kmem_free(shmmap_s, sizeof(struct shmmap_state));
813 return;
814 }
815
816 /*
817 * Delete the entry from shm map.
818 */
819 for (;;) {
820 struct shmid_ds *shmseg;
821 struct uvm_object *uobj;
822 size_t sz;
823
824 shmmap_se = SLIST_FIRST(&shmmap_s->entries);
825 KASSERT(shmmap_se != NULL);
826
827 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
828 sz = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
829 /* shm_delete_mapping() removes from the list. */
830 uobj = shm_delete_mapping(shmmap_s, shmmap_se);
831 mutex_exit(&shm_lock);
832
833 uvm_deallocate(&vm->vm_map, shmmap_se->va, sz);
834 if (uobj != NULL) {
835 uao_detach(uobj);
836 }
837 kmem_free(shmmap_se, sizeof(struct shmmap_entry));
838
839 if (SLIST_EMPTY(&shmmap_s->entries)) {
840 break;
841 }
842 mutex_enter(&shm_lock);
843 KASSERT(!SLIST_EMPTY(&shmmap_s->entries));
844 }
845 kmem_free(shmmap_s, sizeof(struct shmmap_state));
846 }
847
848 static int
849 shmrealloc(int newshmni)
850 {
851 vaddr_t v;
852 struct shmid_ds *oldshmsegs, *newshmsegs;
853 kcondvar_t *newshm_cv, *oldshm_cv;
854 size_t sz;
855 int i, lsegid, oldshmni;
856
857 if (newshmni < 1)
858 return EINVAL;
859
860 /* Allocate new memory area */
861 sz = ALIGN(newshmni * sizeof(struct shmid_ds)) +
862 ALIGN(newshmni * sizeof(kcondvar_t));
863 sz = round_page(sz);
864 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
865 if (v == 0)
866 return ENOMEM;
867
868 mutex_enter(&shm_lock);
869 while (shm_realloc_state || shm_realloc_disable)
870 cv_wait(&shm_realloc_cv, &shm_lock);
871
872 /*
873 * Get the number of last segment. Fail we are trying to
874 * reallocate less memory than we use.
875 */
876 lsegid = 0;
877 for (i = 0; i < shminfo.shmmni; i++)
878 if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0)
879 lsegid = i;
880 if (lsegid >= newshmni) {
881 mutex_exit(&shm_lock);
882 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
883 return EBUSY;
884 }
885 shm_realloc_state = true;
886
887 newshmsegs = (void *)v;
888 newshm_cv = (void *)((uintptr_t)newshmsegs +
889 ALIGN(newshmni * sizeof(struct shmid_ds)));
890
891 /* Copy all memory to the new area */
892 for (i = 0; i < shm_nused; i++) {
893 cv_init(&newshm_cv[i], "shmwait");
894 (void)memcpy(&newshmsegs[i], &shmsegs[i],
895 sizeof(newshmsegs[0]));
896 }
897
898 /* Mark as free all new segments, if there is any */
899 for (; i < newshmni; i++) {
900 cv_init(&newshm_cv[i], "shmwait");
901 newshmsegs[i].shm_perm.mode = SHMSEG_FREE;
902 newshmsegs[i].shm_perm._seq = 0;
903 }
904
905 oldshmsegs = shmsegs;
906 oldshmni = shminfo.shmmni;
907 shminfo.shmmni = newshmni;
908 shmsegs = newshmsegs;
909 shm_cv = newshm_cv;
910
911 /* Reallocation completed - notify all waiters, if any */
912 shm_realloc_state = false;
913 cv_broadcast(&shm_realloc_cv);
914 mutex_exit(&shm_lock);
915
916 /* Release now unused resources. */
917 oldshm_cv = (void *)((uintptr_t)oldshmsegs +
918 ALIGN(oldshmni * sizeof(struct shmid_ds)));
919 for (i = 0; i < oldshmni; i++)
920 cv_destroy(&oldshm_cv[i]);
921
922 sz = ALIGN(oldshmni * sizeof(struct shmid_ds)) +
923 ALIGN(oldshmni * sizeof(kcondvar_t));
924 sz = round_page(sz);
925 uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED);
926
927 return 0;
928 }
929
930 int
931 shminit(struct sysctllog **clog)
932 {
933 vaddr_t v;
934 size_t sz;
935 int i;
936
937 mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE);
938 cv_init(&shm_realloc_cv, "shmrealc");
939
940 /* Allocate the wired memory for our structures */
941 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
942 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
943 sz = round_page(sz);
944 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
945 if (v == 0) {
946 printf("sysv_shm: cannot allocate memory");
947 return ENOMEM;
948 }
949 shmsegs = (void *)v;
950 shm_cv = (void *)((uintptr_t)shmsegs +
951 ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)));
952
953 if (shminfo.shmmax == 0)
954 shminfo.shmmax = uimax(physmem / 4, 1024) * PAGE_SIZE;
955 else
956 shminfo.shmmax *= PAGE_SIZE;
957 shminfo.shmall = shminfo.shmmax / PAGE_SIZE;
958
959 for (i = 0; i < shminfo.shmmni; i++) {
960 cv_init(&shm_cv[i], "shmwait");
961 shmsegs[i].shm_perm.mode = SHMSEG_FREE;
962 shmsegs[i].shm_perm._seq = 0;
963 }
964 shm_last_free = 0;
965 shm_nused = 0;
966 shm_committed = 0;
967 shm_realloc_disable = 0;
968 shm_realloc_state = false;
969
970 kern_has_sysvshm = 1;
971
972 /* Load the callback function pointers for the uvm subsystem */
973 uvm_shmexit = shmexit;
974 uvm_shmfork = shmfork;
975
976 #ifdef _MODULE
977 if (clog)
978 sysctl_ipc_shm_setup(clog);
979 #endif
980 return 0;
981 }
982
983 int
984 shmfini(void)
985 {
986 size_t sz;
987 int i;
988 vaddr_t v = (vaddr_t)shmsegs;
989
990 mutex_enter(&shm_lock);
991 if (shm_nused) {
992 mutex_exit(&shm_lock);
993 return 1;
994 }
995
996 /* Clear the callback function pointers for the uvm subsystem */
997 uvm_shmexit = NULL;
998 uvm_shmfork = NULL;
999
1000 /* Destroy all condvars */
1001 for (i = 0; i < shminfo.shmmni; i++)
1002 cv_destroy(&shm_cv[i]);
1003 cv_destroy(&shm_realloc_cv);
1004
1005 /* Free the allocated/wired memory */
1006 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
1007 ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
1008 sz = round_page(sz);
1009 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
1010
1011 /* Release and destroy our mutex */
1012 mutex_exit(&shm_lock);
1013 mutex_destroy(&shm_lock);
1014
1015 kern_has_sysvshm = 0;
1016
1017 return 0;
1018 }
1019
1020 static int
1021 sysctl_ipc_shmmni(SYSCTLFN_ARGS)
1022 {
1023 int newsize, error;
1024 struct sysctlnode node;
1025 node = *rnode;
1026 node.sysctl_data = &newsize;
1027
1028 newsize = shminfo.shmmni;
1029 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1030 if (error || newp == NULL)
1031 return error;
1032
1033 sysctl_unlock();
1034 error = shmrealloc(newsize);
1035 sysctl_relock();
1036 return error;
1037 }
1038
1039 static int
1040 sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
1041 {
1042 uint32_t newsize;
1043 int error;
1044 struct sysctlnode node;
1045 node = *rnode;
1046 node.sysctl_data = &newsize;
1047
1048 newsize = shminfo.shmall;
1049 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1050 if (error || newp == NULL)
1051 return error;
1052
1053 if (newsize < 1)
1054 return EINVAL;
1055
1056 shminfo.shmall = newsize;
1057 shminfo.shmmax = (uint64_t)shminfo.shmall * PAGE_SIZE;
1058
1059 return 0;
1060 }
1061
1062 static int
1063 sysctl_ipc_shmmax(SYSCTLFN_ARGS)
1064 {
1065 uint64_t newsize;
1066 int error;
1067 struct sysctlnode node;
1068 node = *rnode;
1069 node.sysctl_data = &newsize;
1070
1071 newsize = shminfo.shmmax;
1072 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1073 if (error || newp == NULL)
1074 return error;
1075
1076 if (newsize < PAGE_SIZE)
1077 return EINVAL;
1078
1079 shminfo.shmmax = round_page(newsize);
1080 shminfo.shmall = shminfo.shmmax >> PAGE_SHIFT;
1081
1082 return 0;
1083 }
1084
1085 SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup")
1086 {
1087
1088 sysctl_createv(clog, 0, NULL, NULL,
1089 CTLFLAG_PERMANENT,
1090 CTLTYPE_NODE, "ipc",
1091 SYSCTL_DESCR("SysV IPC options"),
1092 NULL, 0, NULL, 0,
1093 CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1094 sysctl_createv(clog, 0, NULL, NULL,
1095 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1096 CTLTYPE_QUAD, "shmmax",
1097 SYSCTL_DESCR("Max shared memory segment size in bytes"),
1098 sysctl_ipc_shmmax, 0, &shminfo.shmmax, 0,
1099 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL);
1100 sysctl_createv(clog, 0, NULL, NULL,
1101 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1102 CTLTYPE_INT, "shmmni",
1103 SYSCTL_DESCR("Max number of shared memory identifiers"),
1104 sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0,
1105 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL);
1106 sysctl_createv(clog, 0, NULL, NULL,
1107 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1108 CTLTYPE_INT, "shmseg",
1109 SYSCTL_DESCR("Max shared memory segments per process"),
1110 NULL, 0, &shminfo.shmseg, 0,
1111 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL);
1112 sysctl_createv(clog, 0, NULL, NULL,
1113 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1114 CTLTYPE_INT, "shmmaxpgs",
1115 SYSCTL_DESCR("Max amount of shared memory in pages"),
1116 sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0,
1117 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL);
1118 sysctl_createv(clog, 0, NULL, NULL,
1119 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1120 CTLTYPE_INT, "shm_use_phys",
1121 SYSCTL_DESCR("Enable/disable locking of shared memory in "
1122 "physical memory"), NULL, 0, &shm_use_phys, 0,
1123 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL);
1124 }
1125