uipc_usrreq.c revision 1.9 1 /*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * from: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
34 * $Id: uipc_usrreq.c,v 1.9 1994/06/08 11:28:47 mycroft Exp $
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/filedesc.h>
41 #include <sys/domain.h>
42 #include <sys/protosw.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/unpcb.h>
46 #include <sys/un.h>
47 #include <sys/namei.h>
48 #include <sys/vnode.h>
49 #include <sys/file.h>
50 #include <sys/stat.h>
51 #include <sys/mbuf.h>
52
53 /*
54 * Unix communications domain.
55 *
56 * TODO:
57 * SEQPACKET, RDM
58 * rethink name space problems
59 * need a proper out-of-band
60 */
61 struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
62 ino_t unp_ino; /* prototype for fake inode numbers */
63
64 /*ARGSUSED*/
65 int
66 uipc_usrreq(so, req, m, nam, control)
67 struct socket *so;
68 int req;
69 struct mbuf *m, *nam, *control;
70 {
71 struct unpcb *unp = sotounpcb(so);
72 register struct socket *so2;
73 register int error = 0;
74 struct proc *p = curproc; /* XXX */
75
76 if (req == PRU_CONTROL)
77 return (EOPNOTSUPP);
78 if (req != PRU_SEND && control && control->m_len) {
79 error = EOPNOTSUPP;
80 goto release;
81 }
82 if (unp == 0 && req != PRU_ATTACH) {
83 error = EINVAL;
84 goto release;
85 }
86 switch (req) {
87
88 case PRU_ATTACH:
89 if (unp) {
90 error = EISCONN;
91 break;
92 }
93 error = unp_attach(so);
94 break;
95
96 case PRU_DETACH:
97 unp_detach(unp);
98 break;
99
100 case PRU_BIND:
101 error = unp_bind(unp, nam, p);
102 break;
103
104 case PRU_LISTEN:
105 if (unp->unp_vnode == 0)
106 error = EINVAL;
107 break;
108
109 case PRU_CONNECT:
110 error = unp_connect(so, nam, p);
111 break;
112
113 case PRU_CONNECT2:
114 error = unp_connect2(so, (struct socket *)nam);
115 break;
116
117 case PRU_DISCONNECT:
118 unp_disconnect(unp);
119 break;
120
121 case PRU_ACCEPT:
122 /*
123 * Pass back name of connected socket,
124 * if it was bound and we are still connected
125 * (our peer may have closed already!).
126 */
127 if (unp->unp_conn && unp->unp_conn->unp_addr) {
128 nam->m_len = unp->unp_conn->unp_addr->m_len;
129 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
130 mtod(nam, caddr_t), (unsigned)nam->m_len);
131 } else {
132 nam->m_len = sizeof(sun_noname);
133 *(mtod(nam, struct sockaddr *)) = sun_noname;
134 }
135 break;
136
137 case PRU_SHUTDOWN:
138 socantsendmore(so);
139 unp_shutdown(unp);
140 break;
141
142 case PRU_RCVD:
143 switch (so->so_type) {
144
145 case SOCK_DGRAM:
146 panic("uipc 1");
147 /*NOTREACHED*/
148
149 case SOCK_STREAM:
150 #define rcv (&so->so_rcv)
151 #define snd (&so2->so_snd)
152 if (unp->unp_conn == 0)
153 break;
154 so2 = unp->unp_conn->unp_socket;
155 /*
156 * Adjust backpressure on sender
157 * and wakeup any waiting to write.
158 */
159 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
160 unp->unp_mbcnt = rcv->sb_mbcnt;
161 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
162 unp->unp_cc = rcv->sb_cc;
163 sowwakeup(so2);
164 #undef snd
165 #undef rcv
166 break;
167
168 default:
169 panic("uipc 2");
170 }
171 break;
172
173 case PRU_SEND:
174 if (control && (error = unp_internalize(control, p)))
175 break;
176 switch (so->so_type) {
177
178 case SOCK_DGRAM: {
179 struct sockaddr *from;
180
181 if (nam) {
182 if (unp->unp_conn) {
183 error = EISCONN;
184 break;
185 }
186 error = unp_connect(so, nam, p);
187 if (error)
188 break;
189 } else {
190 if (unp->unp_conn == 0) {
191 error = ENOTCONN;
192 break;
193 }
194 }
195 so2 = unp->unp_conn->unp_socket;
196 if (unp->unp_addr)
197 from = mtod(unp->unp_addr, struct sockaddr *);
198 else
199 from = &sun_noname;
200 if (sbappendaddr(&so2->so_rcv, from, m, control)) {
201 sorwakeup(so2);
202 m = 0;
203 control = 0;
204 } else
205 error = ENOBUFS;
206 if (nam)
207 unp_disconnect(unp);
208 break;
209 }
210
211 case SOCK_STREAM:
212 #define rcv (&so2->so_rcv)
213 #define snd (&so->so_snd)
214 if (so->so_state & SS_CANTSENDMORE) {
215 error = EPIPE;
216 break;
217 }
218 if (unp->unp_conn == 0)
219 panic("uipc 3");
220 so2 = unp->unp_conn->unp_socket;
221 /*
222 * Send to paired receive port, and then reduce
223 * send buffer hiwater marks to maintain backpressure.
224 * Wake up readers.
225 */
226 if (control) {
227 if (sbappendcontrol(rcv, m, control))
228 control = 0;
229 } else
230 sbappend(rcv, m);
231 snd->sb_mbmax -=
232 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
233 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
234 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
235 unp->unp_conn->unp_cc = rcv->sb_cc;
236 sorwakeup(so2);
237 m = 0;
238 #undef snd
239 #undef rcv
240 break;
241
242 default:
243 panic("uipc 4");
244 }
245 break;
246
247 case PRU_ABORT:
248 unp_drop(unp, ECONNABORTED);
249 break;
250
251 case PRU_SENSE:
252 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
253 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
254 so2 = unp->unp_conn->unp_socket;
255 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
256 }
257 ((struct stat *) m)->st_dev = NODEV;
258 if (unp->unp_ino == 0)
259 unp->unp_ino = unp_ino++;
260 ((struct stat *) m)->st_ino = unp->unp_ino;
261 return (0);
262
263 case PRU_RCVOOB:
264 return (EOPNOTSUPP);
265
266 case PRU_SENDOOB:
267 error = EOPNOTSUPP;
268 break;
269
270 case PRU_SOCKADDR:
271 if (unp->unp_addr) {
272 nam->m_len = unp->unp_addr->m_len;
273 bcopy(mtod(unp->unp_addr, caddr_t),
274 mtod(nam, caddr_t), (unsigned)nam->m_len);
275 } else
276 nam->m_len = 0;
277 break;
278
279 case PRU_PEERADDR:
280 if (unp->unp_conn && unp->unp_conn->unp_addr) {
281 nam->m_len = unp->unp_conn->unp_addr->m_len;
282 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
283 mtod(nam, caddr_t), (unsigned)nam->m_len);
284 } else
285 nam->m_len = 0;
286 break;
287
288 case PRU_SLOWTIMO:
289 break;
290
291 default:
292 panic("piusrreq");
293 }
294 release:
295 if (control)
296 m_freem(control);
297 if (m)
298 m_freem(m);
299 return (error);
300 }
301
302 /*
303 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
304 * for stream sockets, although the total for sender and receiver is
305 * actually only PIPSIZ.
306 * Datagram sockets really use the sendspace as the maximum datagram size,
307 * and don't really want to reserve the sendspace. Their recvspace should
308 * be large enough for at least one max-size datagram plus address.
309 */
310 #define PIPSIZ 4096
311 u_long unpst_sendspace = PIPSIZ;
312 u_long unpst_recvspace = PIPSIZ;
313 u_long unpdg_sendspace = 2*1024; /* really max datagram size */
314 u_long unpdg_recvspace = 4*1024;
315
316 int unp_rights; /* file descriptors in flight */
317
318 int
319 unp_attach(so)
320 struct socket *so;
321 {
322 register struct mbuf *m;
323 register struct unpcb *unp;
324 int error;
325
326 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
327 switch (so->so_type) {
328
329 case SOCK_STREAM:
330 error = soreserve(so, unpst_sendspace, unpst_recvspace);
331 break;
332
333 case SOCK_DGRAM:
334 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
335 break;
336
337 default:
338 panic("unp_attach");
339 }
340 if (error)
341 return (error);
342 }
343 m = m_getclr(M_DONTWAIT, MT_PCB);
344 if (m == NULL)
345 return (ENOBUFS);
346 unp = mtod(m, struct unpcb *);
347 so->so_pcb = (caddr_t)unp;
348 unp->unp_socket = so;
349 return (0);
350 }
351
352 int
353 unp_detach(unp)
354 register struct unpcb *unp;
355 {
356
357 if (unp->unp_vnode) {
358 unp->unp_vnode->v_socket = 0;
359 vrele(unp->unp_vnode);
360 unp->unp_vnode = 0;
361 }
362 if (unp->unp_conn)
363 unp_disconnect(unp);
364 while (unp->unp_refs)
365 unp_drop(unp->unp_refs, ECONNRESET);
366 soisdisconnected(unp->unp_socket);
367 unp->unp_socket->so_pcb = 0;
368 m_freem(unp->unp_addr);
369 (void) m_free(dtom(unp));
370 if (unp_rights) {
371 /*
372 * Normally the receive buffer is flushed later,
373 * in sofree, but if our receive buffer holds references
374 * to descriptors that are now garbage, we will dispose
375 * of those descriptor references after the garbage collector
376 * gets them (resulting in a "panic: closef: count < 0").
377 */
378 sorflush(unp->unp_socket);
379 unp_gc();
380 }
381 }
382
383 int
384 unp_bind(unp, nam, p)
385 struct unpcb *unp;
386 struct mbuf *nam;
387 struct proc *p;
388 {
389 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
390 register struct vnode *vp;
391 struct vattr vattr;
392 int error;
393 struct nameidata nd;
394
395 NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
396 soun->sun_path, p);
397 if (unp->unp_vnode != NULL)
398 return (EINVAL);
399 if (nam->m_len == MLEN) {
400 if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
401 return (EINVAL);
402 } else
403 *(mtod(nam, caddr_t) + nam->m_len) = 0;
404 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
405 if (error = namei(&nd))
406 return (error);
407 vp = nd.ni_vp;
408 if (vp != NULL) {
409 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
410 if (nd.ni_dvp == vp)
411 vrele(nd.ni_dvp);
412 else
413 vput(nd.ni_dvp);
414 vrele(vp);
415 return (EADDRINUSE);
416 }
417 VATTR_NULL(&vattr);
418 vattr.va_type = VSOCK;
419 vattr.va_mode = ACCESSPERMS;
420 LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
421 if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
422 return (error);
423 vp = nd.ni_vp;
424 vp->v_socket = unp->unp_socket;
425 unp->unp_vnode = vp;
426 unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
427 VOP_UNLOCK(vp);
428 return (0);
429 }
430
431 int
432 unp_connect(so, nam, p)
433 struct socket *so;
434 struct mbuf *nam;
435 struct proc *p;
436 {
437 register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
438 register struct vnode *vp;
439 register struct socket *so2, *so3;
440 struct unpcb *unp2, *unp3;
441 int error;
442 struct nameidata nd;
443
444 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
445 if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */
446 if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
447 return (EMSGSIZE);
448 } else
449 *(mtod(nam, caddr_t) + nam->m_len) = 0;
450 if (error = namei(&nd))
451 return (error);
452 vp = nd.ni_vp;
453 if (vp->v_type != VSOCK) {
454 error = ENOTSOCK;
455 goto bad;
456 }
457 if (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p))
458 goto bad;
459 so2 = vp->v_socket;
460 if (so2 == 0) {
461 error = ECONNREFUSED;
462 goto bad;
463 }
464 if (so->so_type != so2->so_type) {
465 error = EPROTOTYPE;
466 goto bad;
467 }
468 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
469 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
470 (so3 = sonewconn(so2, 0)) == 0) {
471 error = ECONNREFUSED;
472 goto bad;
473 }
474 unp2 = sotounpcb(so2);
475 unp3 = sotounpcb(so3);
476 if (unp2->unp_addr)
477 unp3->unp_addr =
478 m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
479 so2 = so3;
480 }
481 error = unp_connect2(so, so2);
482 bad:
483 vput(vp);
484 return (error);
485 }
486
487 int
488 unp_connect2(so, so2)
489 register struct socket *so;
490 register struct socket *so2;
491 {
492 register struct unpcb *unp = sotounpcb(so);
493 register struct unpcb *unp2;
494
495 if (so2->so_type != so->so_type)
496 return (EPROTOTYPE);
497 unp2 = sotounpcb(so2);
498 unp->unp_conn = unp2;
499 switch (so->so_type) {
500
501 case SOCK_DGRAM:
502 unp->unp_nextref = unp2->unp_refs;
503 unp2->unp_refs = unp;
504 soisconnected(so);
505 break;
506
507 case SOCK_STREAM:
508 unp2->unp_conn = unp;
509 soisconnected(so);
510 soisconnected(so2);
511 break;
512
513 default:
514 panic("unp_connect2");
515 }
516 return (0);
517 }
518
519 void
520 unp_disconnect(unp)
521 struct unpcb *unp;
522 {
523 register struct unpcb *unp2 = unp->unp_conn;
524
525 if (unp2 == 0)
526 return;
527 unp->unp_conn = 0;
528 switch (unp->unp_socket->so_type) {
529
530 case SOCK_DGRAM:
531 if (unp2->unp_refs == unp)
532 unp2->unp_refs = unp->unp_nextref;
533 else {
534 unp2 = unp2->unp_refs;
535 for (;;) {
536 if (unp2 == 0)
537 panic("unp_disconnect");
538 if (unp2->unp_nextref == unp)
539 break;
540 unp2 = unp2->unp_nextref;
541 }
542 unp2->unp_nextref = unp->unp_nextref;
543 }
544 unp->unp_nextref = 0;
545 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
546 break;
547
548 case SOCK_STREAM:
549 soisdisconnected(unp->unp_socket);
550 unp2->unp_conn = 0;
551 soisdisconnected(unp2->unp_socket);
552 break;
553 }
554 }
555
556 #ifdef notdef
557 unp_abort(unp)
558 struct unpcb *unp;
559 {
560
561 unp_detach(unp);
562 }
563 #endif
564
565 void
566 unp_shutdown(unp)
567 struct unpcb *unp;
568 {
569 struct socket *so;
570
571 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
572 (so = unp->unp_conn->unp_socket))
573 socantrcvmore(so);
574 }
575
576 void
577 unp_drop(unp, errno)
578 struct unpcb *unp;
579 int errno;
580 {
581 struct socket *so = unp->unp_socket;
582
583 so->so_error = errno;
584 unp_disconnect(unp);
585 if (so->so_head) {
586 so->so_pcb = (caddr_t) 0;
587 m_freem(unp->unp_addr);
588 (void) m_free(dtom(unp));
589 sofree(so);
590 }
591 }
592
593 #ifdef notdef
594 unp_drain()
595 {
596
597 }
598 #endif
599
600 int
601 unp_externalize(rights)
602 struct mbuf *rights;
603 {
604 struct proc *p = curproc; /* XXX */
605 register int i;
606 register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
607 register struct file **rp = (struct file **)(cm + 1);
608 register struct file *fp;
609 int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
610 int f;
611
612 if (!fdavail(p, newfds)) {
613 for (i = 0; i < newfds; i++) {
614 fp = *rp;
615 unp_discard(fp);
616 *rp++ = 0;
617 }
618 return (EMSGSIZE);
619 }
620 for (i = 0; i < newfds; i++) {
621 if (fdalloc(p, 0, &f))
622 panic("unp_externalize");
623 fp = *rp;
624 p->p_fd->fd_ofiles[f] = fp;
625 fp->f_msgcount--;
626 unp_rights--;
627 *(int *)rp++ = f;
628 }
629 return (0);
630 }
631
632 int
633 unp_internalize(control, p)
634 struct mbuf *control;
635 struct proc *p;
636 {
637 struct filedesc *fdp = p->p_fd;
638 register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
639 register struct file **rp;
640 register struct file *fp;
641 register int i, fd;
642 int oldfds;
643
644 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
645 cm->cmsg_len != control->m_len)
646 return (EINVAL);
647 oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
648 rp = (struct file **)(cm + 1);
649 for (i = 0; i < oldfds; i++) {
650 fd = *(int *)rp++;
651 if ((unsigned)fd >= fdp->fd_nfiles ||
652 fdp->fd_ofiles[fd] == NULL)
653 return (EBADF);
654 }
655 rp = (struct file **)(cm + 1);
656 for (i = 0; i < oldfds; i++) {
657 fp = fdp->fd_ofiles[*(int *)rp];
658 *rp++ = fp;
659 fp->f_count++;
660 fp->f_msgcount++;
661 unp_rights++;
662 }
663 return (0);
664 }
665
666 int unp_defer, unp_gcing;
667 extern struct domain unixdomain;
668
669 void
670 unp_gc()
671 {
672 register struct file *fp, *nextfp;
673 register struct socket *so;
674 struct file **extra_ref, **fpp;
675 int nunref, i;
676
677 if (unp_gcing)
678 return;
679 unp_gcing = 1;
680 unp_defer = 0;
681 for (fp = filehead; fp; fp = fp->f_filef)
682 fp->f_flag &= ~(FMARK|FDEFER);
683 do {
684 for (fp = filehead; fp; fp = fp->f_filef) {
685 if (fp->f_count == 0)
686 continue;
687 if (fp->f_flag & FDEFER) {
688 fp->f_flag &= ~FDEFER;
689 unp_defer--;
690 } else {
691 if (fp->f_flag & FMARK)
692 continue;
693 if (fp->f_count == fp->f_msgcount)
694 continue;
695 fp->f_flag |= FMARK;
696 }
697 if (fp->f_type != DTYPE_SOCKET ||
698 (so = (struct socket *)fp->f_data) == 0)
699 continue;
700 if (so->so_proto->pr_domain != &unixdomain ||
701 (so->so_proto->pr_flags&PR_RIGHTS) == 0)
702 continue;
703 #ifdef notdef
704 if (so->so_rcv.sb_flags & SB_LOCK) {
705 /*
706 * This is problematical; it's not clear
707 * we need to wait for the sockbuf to be
708 * unlocked (on a uniprocessor, at least),
709 * and it's also not clear what to do
710 * if sbwait returns an error due to receipt
711 * of a signal. If sbwait does return
712 * an error, we'll go into an infinite
713 * loop. Delete all of this for now.
714 */
715 (void) sbwait(&so->so_rcv);
716 goto restart;
717 }
718 #endif
719 unp_scan(so->so_rcv.sb_mb, unp_mark);
720 }
721 } while (unp_defer);
722 /*
723 * We grab an extra reference to each of the file table entries
724 * that are not otherwise accessible and then free the rights
725 * that are stored in messages on them.
726 *
727 * The bug in the orginal code is a little tricky, so I'll describe
728 * what's wrong with it here.
729 *
730 * It is incorrect to simply unp_discard each entry for f_msgcount
731 * times -- consider the case of sockets A and B that contain
732 * references to each other. On a last close of some other socket,
733 * we trigger a gc since the number of outstanding rights (unp_rights)
734 * is non-zero. If during the sweep phase the gc code un_discards,
735 * we end up doing a (full) closef on the descriptor. A closef on A
736 * results in the following chain. Closef calls soo_close, which
737 * calls soclose. Soclose calls first (through the switch
738 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
739 * returns because the previous instance had set unp_gcing, and
740 * we return all the way back to soclose, which marks the socket
741 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
742 * to free up the rights that are queued in messages on the socket A,
743 * i.e., the reference on B. The sorflush calls via the dom_dispose
744 * switch unp_dispose, which unp_scans with unp_discard. This second
745 * instance of unp_discard just calls closef on B.
746 *
747 * Well, a similar chain occurs on B, resulting in a sorflush on B,
748 * which results in another closef on A. Unfortunately, A is already
749 * being closed, and the descriptor has already been marked with
750 * SS_NOFDREF, and soclose panics at this point.
751 *
752 * Here, we first take an extra reference to each inaccessible
753 * descriptor. Then, we call sorflush ourself, since we know
754 * it is a Unix domain socket anyhow. After we destroy all the
755 * rights carried in messages, we do a last closef to get rid
756 * of our extra reference. This is the last close, and the
757 * unp_detach etc will shut down the socket.
758 *
759 * 91/09/19, bsy (at) cs.cmu.edu
760 */
761 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
762 for (nunref = 0, fp = filehead, fpp = extra_ref; fp; fp = nextfp) {
763 nextfp = fp->f_filef;
764 if (fp->f_count == 0)
765 continue;
766 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
767 *fpp++ = fp;
768 nunref++;
769 fp->f_count++;
770 }
771 }
772 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
773 sorflush((struct socket *)(*fpp)->f_data);
774 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
775 closef(*fpp);
776 free((caddr_t)extra_ref, M_FILE);
777 unp_gcing = 0;
778 }
779
780 void
781 unp_dispose(m)
782 struct mbuf *m;
783 {
784
785 if (m)
786 unp_scan(m, unp_discard);
787 }
788
789 void
790 unp_scan(m0, op)
791 register struct mbuf *m0;
792 void (*op) __P((struct file *));
793 {
794 register struct mbuf *m;
795 register struct file **rp;
796 register struct cmsghdr *cm;
797 register int i;
798 int qfds;
799
800 while (m0) {
801 for (m = m0; m; m = m->m_next)
802 if (m->m_type == MT_CONTROL &&
803 m->m_len >= sizeof(*cm)) {
804 cm = mtod(m, struct cmsghdr *);
805 if (cm->cmsg_level != SOL_SOCKET ||
806 cm->cmsg_type != SCM_RIGHTS)
807 continue;
808 qfds = (cm->cmsg_len - sizeof *cm)
809 / sizeof (struct file *);
810 rp = (struct file **)(cm + 1);
811 for (i = 0; i < qfds; i++)
812 (*op)(*rp++);
813 break; /* XXX, but saves time */
814 }
815 m0 = m0->m_act;
816 }
817 }
818
819 void
820 unp_mark(fp)
821 struct file *fp;
822 {
823
824 if (fp->f_flag & FMARK)
825 return;
826 unp_defer++;
827 fp->f_flag |= (FMARK|FDEFER);
828 }
829
830 void
831 unp_discard(fp)
832 struct file *fp;
833 {
834
835 fp->f_msgcount--;
836 unp_rights--;
837 (void) closef(fp, (struct proc *)NULL);
838 }
839