uipc_socket.c revision 1.54.2.6 1 /* $NetBSD: uipc_socket.c,v 1.54.2.6 2001/11/14 19:16:46 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.54.2.6 2001/11/14 19:16:46 nathanw Exp $");
40
41 #include "opt_compat_sunos.h"
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/lwp.h>
46 #include <sys/proc.h>
47 #include <sys/file.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/domain.h>
51 #include <sys/kernel.h>
52 #include <sys/protosw.h>
53 #include <sys/socket.h>
54 #include <sys/socketvar.h>
55 #include <sys/signalvar.h>
56 #include <sys/resourcevar.h>
57 #include <sys/pool.h>
58
59 struct pool socket_pool;
60
61 extern int somaxconn; /* patchable (XXX sysctl) */
62 int somaxconn = SOMAXCONN;
63
64 void
65 soinit(void)
66 {
67
68 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
69 "sockpl", 0, NULL, NULL, M_SOCKET);
70 }
71
72 /*
73 * Socket operation routines.
74 * These routines are called by the routines in
75 * sys_socket.c or from a system process, and
76 * implement the semantics of socket operations by
77 * switching out to the protocol specific routines.
78 */
79 /*ARGSUSED*/
80 int
81 socreate(int dom, struct socket **aso, int type, int proto)
82 {
83 struct proc *p;
84 struct protosw *prp;
85 struct socket *so;
86 int error, s;
87
88 p = curproc->l_proc; /* XXX */
89 if (proto)
90 prp = pffindproto(dom, proto, type);
91 else
92 prp = pffindtype(dom, type);
93 if (prp == 0 || prp->pr_usrreq == 0)
94 return (EPROTONOSUPPORT);
95 if (prp->pr_type != type)
96 return (EPROTOTYPE);
97 s = splsoftnet();
98 so = pool_get(&socket_pool, PR_WAITOK);
99 memset((caddr_t)so, 0, sizeof(*so));
100 TAILQ_INIT(&so->so_q0);
101 TAILQ_INIT(&so->so_q);
102 so->so_type = type;
103 so->so_proto = prp;
104 so->so_send = sosend;
105 so->so_receive = soreceive;
106 if (p != 0)
107 so->so_uid = p->p_ucred->cr_uid;
108 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
109 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
110 if (error) {
111 so->so_state |= SS_NOFDREF;
112 sofree(so);
113 splx(s);
114 return (error);
115 }
116 #ifdef COMPAT_SUNOS
117 {
118 extern struct emul emul_sunos;
119 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
120 so->so_options |= SO_BROADCAST;
121 }
122 #endif
123 splx(s);
124 *aso = so;
125 return (0);
126 }
127
128 int
129 sobind(struct socket *so, struct mbuf *nam, struct proc *p)
130 {
131 int s, error;
132
133 s = splsoftnet();
134 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
135 nam, (struct mbuf *)0, p);
136 splx(s);
137 return (error);
138 }
139
140 int
141 solisten(struct socket *so, int backlog)
142 {
143 int s, error;
144
145 s = splsoftnet();
146 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
147 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
148 if (error) {
149 splx(s);
150 return (error);
151 }
152 if (so->so_q.tqh_first == NULL)
153 so->so_options |= SO_ACCEPTCONN;
154 if (backlog < 0)
155 backlog = 0;
156 so->so_qlimit = min(backlog, somaxconn);
157 splx(s);
158 return (0);
159 }
160
161 void
162 sofree(struct socket *so)
163 {
164
165 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
166 return;
167 if (so->so_head) {
168 /*
169 * We must not decommission a socket that's on the accept(2)
170 * queue. If we do, then accept(2) may hang after select(2)
171 * indicated that the listening socket was ready.
172 */
173 if (!soqremque(so, 0))
174 return;
175 }
176 sbrelease(&so->so_snd);
177 sorflush(so);
178 pool_put(&socket_pool, so);
179 }
180
181 /*
182 * Close a socket on last file table reference removal.
183 * Initiate disconnect if connected.
184 * Free socket when disconnect complete.
185 */
186 int
187 soclose(struct socket *so)
188 {
189 struct socket *so2;
190 int s, error;
191
192 error = 0;
193 s = splsoftnet(); /* conservative */
194 if (so->so_options & SO_ACCEPTCONN) {
195 while ((so2 = so->so_q0.tqh_first) != 0) {
196 (void) soqremque(so2, 0);
197 (void) soabort(so2);
198 }
199 while ((so2 = so->so_q.tqh_first) != 0) {
200 (void) soqremque(so2, 1);
201 (void) soabort(so2);
202 }
203 }
204 if (so->so_pcb == 0)
205 goto discard;
206 if (so->so_state & SS_ISCONNECTED) {
207 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
208 error = sodisconnect(so);
209 if (error)
210 goto drop;
211 }
212 if (so->so_options & SO_LINGER) {
213 if ((so->so_state & SS_ISDISCONNECTING) &&
214 (so->so_state & SS_NBIO))
215 goto drop;
216 while (so->so_state & SS_ISCONNECTED) {
217 error = tsleep((caddr_t)&so->so_timeo,
218 PSOCK | PCATCH, netcls,
219 so->so_linger * hz);
220 if (error)
221 break;
222 }
223 }
224 }
225 drop:
226 if (so->so_pcb) {
227 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
228 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
229 (struct proc *)0);
230 if (error == 0)
231 error = error2;
232 }
233 discard:
234 if (so->so_state & SS_NOFDREF)
235 panic("soclose: NOFDREF");
236 so->so_state |= SS_NOFDREF;
237 sofree(so);
238 splx(s);
239 return (error);
240 }
241
242 /*
243 * Must be called at splsoftnet...
244 */
245 int
246 soabort(struct socket *so)
247 {
248
249 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
250 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
251 }
252
253 int
254 soaccept(struct socket *so, struct mbuf *nam)
255 {
256 int s, error;
257
258 error = 0;
259 s = splsoftnet();
260 if ((so->so_state & SS_NOFDREF) == 0)
261 panic("soaccept: !NOFDREF");
262 so->so_state &= ~SS_NOFDREF;
263 if ((so->so_state & SS_ISDISCONNECTED) == 0 ||
264 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0)
265 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
266 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0);
267 else
268 error = ECONNABORTED;
269
270 splx(s);
271 return (error);
272 }
273
274 int
275 soconnect(struct socket *so, struct mbuf *nam)
276 {
277 struct proc *p;
278 int s, error;
279
280 p = curproc->l_proc; /* XXX */
281 if (so->so_options & SO_ACCEPTCONN)
282 return (EOPNOTSUPP);
283 s = splsoftnet();
284 /*
285 * If protocol is connection-based, can only connect once.
286 * Otherwise, if connected, try to disconnect first.
287 * This allows user to disconnect by connecting to, e.g.,
288 * a null address.
289 */
290 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
291 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
292 (error = sodisconnect(so))))
293 error = EISCONN;
294 else
295 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
296 (struct mbuf *)0, nam, (struct mbuf *)0, p);
297 splx(s);
298 return (error);
299 }
300
301 int
302 soconnect2(struct socket *so1, struct socket *so2)
303 {
304 int s, error;
305
306 s = splsoftnet();
307 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
308 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
309 (struct proc *)0);
310 splx(s);
311 return (error);
312 }
313
314 int
315 sodisconnect(struct socket *so)
316 {
317 int s, error;
318
319 s = splsoftnet();
320 if ((so->so_state & SS_ISCONNECTED) == 0) {
321 error = ENOTCONN;
322 goto bad;
323 }
324 if (so->so_state & SS_ISDISCONNECTING) {
325 error = EALREADY;
326 goto bad;
327 }
328 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
329 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
330 (struct proc *)0);
331 bad:
332 splx(s);
333 return (error);
334 }
335
336 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
337 /*
338 * Send on a socket.
339 * If send must go all at once and message is larger than
340 * send buffering, then hard error.
341 * Lock against other senders.
342 * If must go all at once and not enough room now, then
343 * inform user that this would block and do nothing.
344 * Otherwise, if nonblocking, send as much as possible.
345 * The data to be sent is described by "uio" if nonzero,
346 * otherwise by the mbuf chain "top" (which must be null
347 * if uio is not). Data provided in mbuf chain must be small
348 * enough to send all at once.
349 *
350 * Returns nonzero on error, timeout or signal; callers
351 * must check for short counts if EINTR/ERESTART are returned.
352 * Data and control buffers are freed on return.
353 */
354 int
355 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top,
356 struct mbuf *control, int flags)
357 {
358 struct proc *p;
359 struct mbuf **mp, *m;
360 long space, len, resid, clen, mlen;
361 int error, s, dontroute, atomic;
362
363 p = curproc->l_proc; /* XXX */
364 clen = 0;
365 atomic = sosendallatonce(so) || top;
366 if (uio)
367 resid = uio->uio_resid;
368 else
369 resid = top->m_pkthdr.len;
370 /*
371 * In theory resid should be unsigned.
372 * However, space must be signed, as it might be less than 0
373 * if we over-committed, and we must use a signed comparison
374 * of space and resid. On the other hand, a negative resid
375 * causes us to loop sending 0-length segments to the protocol.
376 */
377 if (resid < 0) {
378 error = EINVAL;
379 goto out;
380 }
381 dontroute =
382 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
383 (so->so_proto->pr_flags & PR_ATOMIC);
384 p->p_stats->p_ru.ru_msgsnd++;
385 if (control)
386 clen = control->m_len;
387 #define snderr(errno) { error = errno; splx(s); goto release; }
388
389 restart:
390 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
391 goto out;
392 do {
393 s = splsoftnet();
394 if (so->so_state & SS_CANTSENDMORE)
395 snderr(EPIPE);
396 if (so->so_error) {
397 error = so->so_error;
398 so->so_error = 0;
399 splx(s);
400 goto release;
401 }
402 if ((so->so_state & SS_ISCONNECTED) == 0) {
403 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
404 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
405 !(resid == 0 && clen != 0))
406 snderr(ENOTCONN);
407 } else if (addr == 0)
408 snderr(EDESTADDRREQ);
409 }
410 space = sbspace(&so->so_snd);
411 if (flags & MSG_OOB)
412 space += 1024;
413 if ((atomic && resid > so->so_snd.sb_hiwat) ||
414 clen > so->so_snd.sb_hiwat)
415 snderr(EMSGSIZE);
416 if (space < resid + clen && uio &&
417 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
418 if (so->so_state & SS_NBIO)
419 snderr(EWOULDBLOCK);
420 sbunlock(&so->so_snd);
421 error = sbwait(&so->so_snd);
422 splx(s);
423 if (error)
424 goto out;
425 goto restart;
426 }
427 splx(s);
428 mp = ⊤
429 space -= clen;
430 do {
431 if (uio == NULL) {
432 /*
433 * Data is prepackaged in "top".
434 */
435 resid = 0;
436 if (flags & MSG_EOR)
437 top->m_flags |= M_EOR;
438 } else do {
439 if (top == 0) {
440 MGETHDR(m, M_WAIT, MT_DATA);
441 mlen = MHLEN;
442 m->m_pkthdr.len = 0;
443 m->m_pkthdr.rcvif = (struct ifnet *)0;
444 } else {
445 MGET(m, M_WAIT, MT_DATA);
446 mlen = MLEN;
447 }
448 if (resid >= MINCLSIZE && space >= MCLBYTES) {
449 MCLGET(m, M_WAIT);
450 if ((m->m_flags & M_EXT) == 0)
451 goto nopages;
452 mlen = MCLBYTES;
453 #ifdef MAPPED_MBUFS
454 len = lmin(MCLBYTES, resid);
455 #else
456 if (atomic && top == 0) {
457 len = lmin(MCLBYTES - max_hdr,
458 resid);
459 m->m_data += max_hdr;
460 } else
461 len = lmin(MCLBYTES, resid);
462 #endif
463 space -= len;
464 } else {
465 nopages:
466 len = lmin(lmin(mlen, resid), space);
467 space -= len;
468 /*
469 * For datagram protocols, leave room
470 * for protocol headers in first mbuf.
471 */
472 if (atomic && top == 0 && len < mlen)
473 MH_ALIGN(m, len);
474 }
475 error = uiomove(mtod(m, caddr_t), (int)len,
476 uio);
477 resid = uio->uio_resid;
478 m->m_len = len;
479 *mp = m;
480 top->m_pkthdr.len += len;
481 if (error)
482 goto release;
483 mp = &m->m_next;
484 if (resid <= 0) {
485 if (flags & MSG_EOR)
486 top->m_flags |= M_EOR;
487 break;
488 }
489 } while (space > 0 && atomic);
490
491 s = splsoftnet();
492
493 if (so->so_state & SS_CANTSENDMORE)
494 snderr(EPIPE);
495
496 if (dontroute)
497 so->so_options |= SO_DONTROUTE;
498 if (resid > 0)
499 so->so_state |= SS_MORETOCOME;
500 error = (*so->so_proto->pr_usrreq)(so,
501 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
502 top, addr, control, p);
503 if (dontroute)
504 so->so_options &= ~SO_DONTROUTE;
505 if (resid > 0)
506 so->so_state &= ~SS_MORETOCOME;
507 splx(s);
508
509 clen = 0;
510 control = 0;
511 top = 0;
512 mp = ⊤
513 if (error)
514 goto release;
515 } while (resid && space > 0);
516 } while (resid);
517
518 release:
519 sbunlock(&so->so_snd);
520 out:
521 if (top)
522 m_freem(top);
523 if (control)
524 m_freem(control);
525 return (error);
526 }
527
528 /*
529 * Implement receive operations on a socket.
530 * We depend on the way that records are added to the sockbuf
531 * by sbappend*. In particular, each record (mbufs linked through m_next)
532 * must begin with an address if the protocol so specifies,
533 * followed by an optional mbuf or mbufs containing ancillary data,
534 * and then zero or more mbufs of data.
535 * In order to avoid blocking network interrupts for the entire time here,
536 * we splx() while doing the actual copy to user space.
537 * Although the sockbuf is locked, new data may still be appended,
538 * and thus we must maintain consistency of the sockbuf during that time.
539 *
540 * The caller may receive the data as a single mbuf chain by supplying
541 * an mbuf **mp0 for use in returning the chain. The uio is then used
542 * only for the count in uio_resid.
543 */
544 int
545 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio,
546 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
547 {
548 struct mbuf *m, **mp;
549 int flags, len, error, s, offset, moff, type, orig_resid;
550 struct protosw *pr;
551 struct mbuf *nextrecord;
552
553 pr = so->so_proto;
554 mp = mp0;
555 type = 0;
556 orig_resid = uio->uio_resid;
557 if (paddr)
558 *paddr = 0;
559 if (controlp)
560 *controlp = 0;
561 if (flagsp)
562 flags = *flagsp &~ MSG_EOR;
563 else
564 flags = 0;
565 if (flags & MSG_OOB) {
566 m = m_get(M_WAIT, MT_DATA);
567 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
568 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
569 (struct proc *)0);
570 if (error)
571 goto bad;
572 do {
573 error = uiomove(mtod(m, caddr_t),
574 (int) min(uio->uio_resid, m->m_len), uio);
575 m = m_free(m);
576 } while (uio->uio_resid && error == 0 && m);
577 bad:
578 if (m)
579 m_freem(m);
580 return (error);
581 }
582 if (mp)
583 *mp = (struct mbuf *)0;
584 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
585 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
586 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
587
588 restart:
589 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
590 return (error);
591 s = splsoftnet();
592
593 m = so->so_rcv.sb_mb;
594 /*
595 * If we have less data than requested, block awaiting more
596 * (subject to any timeout) if:
597 * 1. the current count is less than the low water mark,
598 * 2. MSG_WAITALL is set, and it is possible to do the entire
599 * receive operation at once if we block (resid <= hiwat), or
600 * 3. MSG_DONTWAIT is not set.
601 * If MSG_WAITALL is set but resid is larger than the receive buffer,
602 * we have to do the receive in sections, and thus risk returning
603 * a short count if a timeout or signal occurs after we start.
604 */
605 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
606 so->so_rcv.sb_cc < uio->uio_resid) &&
607 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
608 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
609 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
610 #ifdef DIAGNOSTIC
611 if (m == 0 && so->so_rcv.sb_cc)
612 panic("receive 1");
613 #endif
614 if (so->so_error) {
615 if (m)
616 goto dontblock;
617 error = so->so_error;
618 if ((flags & MSG_PEEK) == 0)
619 so->so_error = 0;
620 goto release;
621 }
622 if (so->so_state & SS_CANTRCVMORE) {
623 if (m)
624 goto dontblock;
625 else
626 goto release;
627 }
628 for (; m; m = m->m_next)
629 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
630 m = so->so_rcv.sb_mb;
631 goto dontblock;
632 }
633 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
634 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
635 error = ENOTCONN;
636 goto release;
637 }
638 if (uio->uio_resid == 0)
639 goto release;
640 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
641 error = EWOULDBLOCK;
642 goto release;
643 }
644 sbunlock(&so->so_rcv);
645 error = sbwait(&so->so_rcv);
646 splx(s);
647 if (error)
648 return (error);
649 goto restart;
650 }
651 dontblock:
652 #ifdef notyet /* XXXX */
653 if (uio->uio_procp)
654 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
655 #endif
656 nextrecord = m->m_nextpkt;
657 if (pr->pr_flags & PR_ADDR) {
658 #ifdef DIAGNOSTIC
659 if (m->m_type != MT_SONAME)
660 panic("receive 1a");
661 #endif
662 orig_resid = 0;
663 if (flags & MSG_PEEK) {
664 if (paddr)
665 *paddr = m_copy(m, 0, m->m_len);
666 m = m->m_next;
667 } else {
668 sbfree(&so->so_rcv, m);
669 if (paddr) {
670 *paddr = m;
671 so->so_rcv.sb_mb = m->m_next;
672 m->m_next = 0;
673 m = so->so_rcv.sb_mb;
674 } else {
675 MFREE(m, so->so_rcv.sb_mb);
676 m = so->so_rcv.sb_mb;
677 }
678 }
679 }
680 while (m && m->m_type == MT_CONTROL && error == 0) {
681 if (flags & MSG_PEEK) {
682 if (controlp)
683 *controlp = m_copy(m, 0, m->m_len);
684 m = m->m_next;
685 } else {
686 sbfree(&so->so_rcv, m);
687 if (controlp) {
688 if (pr->pr_domain->dom_externalize &&
689 mtod(m, struct cmsghdr *)->cmsg_type ==
690 SCM_RIGHTS)
691 error = (*pr->pr_domain->dom_externalize)(m);
692 *controlp = m;
693 so->so_rcv.sb_mb = m->m_next;
694 m->m_next = 0;
695 m = so->so_rcv.sb_mb;
696 } else {
697 MFREE(m, so->so_rcv.sb_mb);
698 m = so->so_rcv.sb_mb;
699 }
700 }
701 if (controlp) {
702 orig_resid = 0;
703 controlp = &(*controlp)->m_next;
704 }
705 }
706 if (m) {
707 if ((flags & MSG_PEEK) == 0)
708 m->m_nextpkt = nextrecord;
709 type = m->m_type;
710 if (type == MT_OOBDATA)
711 flags |= MSG_OOB;
712 }
713 moff = 0;
714 offset = 0;
715 while (m && uio->uio_resid > 0 && error == 0) {
716 if (m->m_type == MT_OOBDATA) {
717 if (type != MT_OOBDATA)
718 break;
719 } else if (type == MT_OOBDATA)
720 break;
721 #ifdef DIAGNOSTIC
722 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
723 panic("receive 3");
724 #endif
725 so->so_state &= ~SS_RCVATMARK;
726 len = uio->uio_resid;
727 if (so->so_oobmark && len > so->so_oobmark - offset)
728 len = so->so_oobmark - offset;
729 if (len > m->m_len - moff)
730 len = m->m_len - moff;
731 /*
732 * If mp is set, just pass back the mbufs.
733 * Otherwise copy them out via the uio, then free.
734 * Sockbuf must be consistent here (points to current mbuf,
735 * it points to next record) when we drop priority;
736 * we must note any additions to the sockbuf when we
737 * block interrupts again.
738 */
739 if (mp == 0) {
740 splx(s);
741 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
742 s = splsoftnet();
743 if (error)
744 goto release;
745 } else
746 uio->uio_resid -= len;
747 if (len == m->m_len - moff) {
748 if (m->m_flags & M_EOR)
749 flags |= MSG_EOR;
750 if (flags & MSG_PEEK) {
751 m = m->m_next;
752 moff = 0;
753 } else {
754 nextrecord = m->m_nextpkt;
755 sbfree(&so->so_rcv, m);
756 if (mp) {
757 *mp = m;
758 mp = &m->m_next;
759 so->so_rcv.sb_mb = m = m->m_next;
760 *mp = (struct mbuf *)0;
761 } else {
762 MFREE(m, so->so_rcv.sb_mb);
763 m = so->so_rcv.sb_mb;
764 }
765 if (m)
766 m->m_nextpkt = nextrecord;
767 }
768 } else {
769 if (flags & MSG_PEEK)
770 moff += len;
771 else {
772 if (mp)
773 *mp = m_copym(m, 0, len, M_WAIT);
774 m->m_data += len;
775 m->m_len -= len;
776 so->so_rcv.sb_cc -= len;
777 }
778 }
779 if (so->so_oobmark) {
780 if ((flags & MSG_PEEK) == 0) {
781 so->so_oobmark -= len;
782 if (so->so_oobmark == 0) {
783 so->so_state |= SS_RCVATMARK;
784 break;
785 }
786 } else {
787 offset += len;
788 if (offset == so->so_oobmark)
789 break;
790 }
791 }
792 if (flags & MSG_EOR)
793 break;
794 /*
795 * If the MSG_WAITALL flag is set (for non-atomic socket),
796 * we must not quit until "uio->uio_resid == 0" or an error
797 * termination. If a signal/timeout occurs, return
798 * with a short count but without error.
799 * Keep sockbuf locked against other readers.
800 */
801 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
802 !sosendallatonce(so) && !nextrecord) {
803 if (so->so_error || so->so_state & SS_CANTRCVMORE)
804 break;
805 error = sbwait(&so->so_rcv);
806 if (error) {
807 sbunlock(&so->so_rcv);
808 splx(s);
809 return (0);
810 }
811 if ((m = so->so_rcv.sb_mb) != NULL)
812 nextrecord = m->m_nextpkt;
813 }
814 }
815
816 if (m && pr->pr_flags & PR_ATOMIC) {
817 flags |= MSG_TRUNC;
818 if ((flags & MSG_PEEK) == 0)
819 (void) sbdroprecord(&so->so_rcv);
820 }
821 if ((flags & MSG_PEEK) == 0) {
822 if (m == 0)
823 so->so_rcv.sb_mb = nextrecord;
824 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
825 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
826 (struct mbuf *)(long)flags, (struct mbuf *)0,
827 (struct proc *)0);
828 }
829 if (orig_resid == uio->uio_resid && orig_resid &&
830 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
831 sbunlock(&so->so_rcv);
832 splx(s);
833 goto restart;
834 }
835
836 if (flagsp)
837 *flagsp |= flags;
838 release:
839 sbunlock(&so->so_rcv);
840 splx(s);
841 return (error);
842 }
843
844 int
845 soshutdown(struct socket *so, int how)
846 {
847 struct protosw *pr;
848
849 pr = so->so_proto;
850 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
851 return (EINVAL);
852
853 if (how == SHUT_RD || how == SHUT_RDWR)
854 sorflush(so);
855 if (how == SHUT_WR || how == SHUT_RDWR)
856 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
857 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
858 return (0);
859 }
860
861 void
862 sorflush(struct socket *so)
863 {
864 struct sockbuf *sb, asb;
865 struct protosw *pr;
866 int s;
867
868 sb = &so->so_rcv;
869 pr = so->so_proto;
870 sb->sb_flags |= SB_NOINTR;
871 (void) sblock(sb, M_WAITOK);
872 s = splnet();
873 socantrcvmore(so);
874 sbunlock(sb);
875 asb = *sb;
876 memset((caddr_t)sb, 0, sizeof(*sb));
877 splx(s);
878 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
879 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
880 sbrelease(&asb);
881 }
882
883 int
884 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0)
885 {
886 int error;
887 struct mbuf *m;
888
889 error = 0;
890 m = m0;
891 if (level != SOL_SOCKET) {
892 if (so->so_proto && so->so_proto->pr_ctloutput)
893 return ((*so->so_proto->pr_ctloutput)
894 (PRCO_SETOPT, so, level, optname, &m0));
895 error = ENOPROTOOPT;
896 } else {
897 switch (optname) {
898
899 case SO_LINGER:
900 if (m == NULL || m->m_len != sizeof(struct linger)) {
901 error = EINVAL;
902 goto bad;
903 }
904 so->so_linger = mtod(m, struct linger *)->l_linger;
905 /* fall thru... */
906
907 case SO_DEBUG:
908 case SO_KEEPALIVE:
909 case SO_DONTROUTE:
910 case SO_USELOOPBACK:
911 case SO_BROADCAST:
912 case SO_REUSEADDR:
913 case SO_REUSEPORT:
914 case SO_OOBINLINE:
915 case SO_TIMESTAMP:
916 if (m == NULL || m->m_len < sizeof(int)) {
917 error = EINVAL;
918 goto bad;
919 }
920 if (*mtod(m, int *))
921 so->so_options |= optname;
922 else
923 so->so_options &= ~optname;
924 break;
925
926 case SO_SNDBUF:
927 case SO_RCVBUF:
928 case SO_SNDLOWAT:
929 case SO_RCVLOWAT:
930 {
931 int optval;
932
933 if (m == NULL || m->m_len < sizeof(int)) {
934 error = EINVAL;
935 goto bad;
936 }
937
938 /*
939 * Values < 1 make no sense for any of these
940 * options, so disallow them.
941 */
942 optval = *mtod(m, int *);
943 if (optval < 1) {
944 error = EINVAL;
945 goto bad;
946 }
947
948 switch (optname) {
949
950 case SO_SNDBUF:
951 case SO_RCVBUF:
952 if (sbreserve(optname == SO_SNDBUF ?
953 &so->so_snd : &so->so_rcv,
954 (u_long) optval) == 0) {
955 error = ENOBUFS;
956 goto bad;
957 }
958 break;
959
960 /*
961 * Make sure the low-water is never greater than
962 * the high-water.
963 */
964 case SO_SNDLOWAT:
965 so->so_snd.sb_lowat =
966 (optval > so->so_snd.sb_hiwat) ?
967 so->so_snd.sb_hiwat : optval;
968 break;
969 case SO_RCVLOWAT:
970 so->so_rcv.sb_lowat =
971 (optval > so->so_rcv.sb_hiwat) ?
972 so->so_rcv.sb_hiwat : optval;
973 break;
974 }
975 break;
976 }
977
978 case SO_SNDTIMEO:
979 case SO_RCVTIMEO:
980 {
981 struct timeval *tv;
982 short val;
983
984 if (m == NULL || m->m_len < sizeof(*tv)) {
985 error = EINVAL;
986 goto bad;
987 }
988 tv = mtod(m, struct timeval *);
989 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
990 error = EDOM;
991 goto bad;
992 }
993 val = tv->tv_sec * hz + tv->tv_usec / tick;
994
995 switch (optname) {
996
997 case SO_SNDTIMEO:
998 so->so_snd.sb_timeo = val;
999 break;
1000 case SO_RCVTIMEO:
1001 so->so_rcv.sb_timeo = val;
1002 break;
1003 }
1004 break;
1005 }
1006
1007 default:
1008 error = ENOPROTOOPT;
1009 break;
1010 }
1011 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1012 (void) ((*so->so_proto->pr_ctloutput)
1013 (PRCO_SETOPT, so, level, optname, &m0));
1014 m = NULL; /* freed by protocol */
1015 }
1016 }
1017 bad:
1018 if (m)
1019 (void) m_free(m);
1020 return (error);
1021 }
1022
1023 int
1024 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp)
1025 {
1026 struct mbuf *m;
1027
1028 if (level != SOL_SOCKET) {
1029 if (so->so_proto && so->so_proto->pr_ctloutput) {
1030 return ((*so->so_proto->pr_ctloutput)
1031 (PRCO_GETOPT, so, level, optname, mp));
1032 } else
1033 return (ENOPROTOOPT);
1034 } else {
1035 m = m_get(M_WAIT, MT_SOOPTS);
1036 m->m_len = sizeof(int);
1037
1038 switch (optname) {
1039
1040 case SO_LINGER:
1041 m->m_len = sizeof(struct linger);
1042 mtod(m, struct linger *)->l_onoff =
1043 so->so_options & SO_LINGER;
1044 mtod(m, struct linger *)->l_linger = so->so_linger;
1045 break;
1046
1047 case SO_USELOOPBACK:
1048 case SO_DONTROUTE:
1049 case SO_DEBUG:
1050 case SO_KEEPALIVE:
1051 case SO_REUSEADDR:
1052 case SO_REUSEPORT:
1053 case SO_BROADCAST:
1054 case SO_OOBINLINE:
1055 case SO_TIMESTAMP:
1056 *mtod(m, int *) = so->so_options & optname;
1057 break;
1058
1059 case SO_TYPE:
1060 *mtod(m, int *) = so->so_type;
1061 break;
1062
1063 case SO_ERROR:
1064 *mtod(m, int *) = so->so_error;
1065 so->so_error = 0;
1066 break;
1067
1068 case SO_SNDBUF:
1069 *mtod(m, int *) = so->so_snd.sb_hiwat;
1070 break;
1071
1072 case SO_RCVBUF:
1073 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1074 break;
1075
1076 case SO_SNDLOWAT:
1077 *mtod(m, int *) = so->so_snd.sb_lowat;
1078 break;
1079
1080 case SO_RCVLOWAT:
1081 *mtod(m, int *) = so->so_rcv.sb_lowat;
1082 break;
1083
1084 case SO_SNDTIMEO:
1085 case SO_RCVTIMEO:
1086 {
1087 int val = (optname == SO_SNDTIMEO ?
1088 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1089
1090 m->m_len = sizeof(struct timeval);
1091 mtod(m, struct timeval *)->tv_sec = val / hz;
1092 mtod(m, struct timeval *)->tv_usec =
1093 (val % hz) * tick;
1094 break;
1095 }
1096
1097 default:
1098 (void)m_free(m);
1099 return (ENOPROTOOPT);
1100 }
1101 *mp = m;
1102 return (0);
1103 }
1104 }
1105
1106 void
1107 sohasoutofband(struct socket *so)
1108 {
1109 struct proc *p;
1110
1111 if (so->so_pgid < 0)
1112 gsignal(-so->so_pgid, SIGURG);
1113 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1114 psignal(p, SIGURG);
1115 selwakeup(&so->so_rcv.sb_sel);
1116 }
1117