uipc_socket.c revision 1.39 1 /* $NetBSD: uipc_socket.c,v 1.39 1998/09/25 23:32:27 matt Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
36 */
37
38 #include "opt_compat_sunos.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/file.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/domain.h>
47 #include <sys/kernel.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/resourcevar.h>
53 #include <sys/pool.h>
54
55 struct pool socket_pool;
56
57 void
58 soinit()
59 {
60
61 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
62 "sockpl", 0, NULL, NULL, M_SOCKET);
63 }
64
65 /*
66 * Socket operation routines.
67 * These routines are called by the routines in
68 * sys_socket.c or from a system process, and
69 * implement the semantics of socket operations by
70 * switching out to the protocol specific routines.
71 */
72 /*ARGSUSED*/
73 int
74 socreate(dom, aso, type, proto)
75 int dom;
76 struct socket **aso;
77 register int type;
78 int proto;
79 {
80 struct proc *p = curproc; /* XXX */
81 register struct protosw *prp;
82 register struct socket *so;
83 register int error;
84 int s;
85
86 if (proto)
87 prp = pffindproto(dom, proto, type);
88 else
89 prp = pffindtype(dom, type);
90 if (prp == 0 || prp->pr_usrreq == 0)
91 return (EPROTONOSUPPORT);
92 if (prp->pr_type != type)
93 return (EPROTOTYPE);
94 s = splsoftnet();
95 so = pool_get(&socket_pool, PR_WAITOK);
96 memset((caddr_t)so, 0, sizeof(*so));
97 TAILQ_INIT(&so->so_q0);
98 TAILQ_INIT(&so->so_q);
99 so->so_type = type;
100 so->so_proto = prp;
101 so->so_send = sosend;
102 so->so_receive = soreceive;
103 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
104 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
105 if (error) {
106 so->so_state |= SS_NOFDREF;
107 sofree(so);
108 splx(s);
109 return (error);
110 }
111 #ifdef COMPAT_SUNOS
112 {
113 extern struct emul emul_sunos;
114 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
115 so->so_options |= SO_BROADCAST;
116 }
117 #endif
118 splx(s);
119 *aso = so;
120 return (0);
121 }
122
123 int
124 sobind(so, nam)
125 struct socket *so;
126 struct mbuf *nam;
127 {
128 struct proc *p = curproc; /* XXX */
129 int s = splsoftnet();
130 int error;
131
132 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
133 nam, (struct mbuf *)0, p);
134 splx(s);
135 return (error);
136 }
137
138 int
139 solisten(so, backlog)
140 register struct socket *so;
141 int backlog;
142 {
143 int s = splsoftnet(), error;
144
145 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
146 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
147 if (error) {
148 splx(s);
149 return (error);
150 }
151 if (so->so_q.tqh_first == NULL)
152 so->so_options |= SO_ACCEPTCONN;
153 if (backlog < 0)
154 backlog = 0;
155 so->so_qlimit = min(backlog, SOMAXCONN);
156 splx(s);
157 return (0);
158 }
159
160 void
161 sofree(so)
162 register struct socket *so;
163 {
164
165 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
166 return;
167 if (so->so_head) {
168 if (!soqremque(so, 0) && !soqremque(so, 1))
169 panic("sofree dq");
170 so->so_head = 0;
171 }
172 sbrelease(&so->so_snd);
173 sorflush(so);
174 pool_put(&socket_pool, so);
175 }
176
177 /*
178 * Close a socket on last file table reference removal.
179 * Initiate disconnect if connected.
180 * Free socket when disconnect complete.
181 */
182 int
183 soclose(so)
184 register struct socket *so;
185 {
186 int s = splsoftnet(); /* conservative */
187 int error = 0;
188
189 if (so->so_options & SO_ACCEPTCONN) {
190 while (so->so_q0.tqh_first)
191 (void) soabort(so->so_q0.tqh_first);
192 while (so->so_q.tqh_first)
193 (void) soabort(so->so_q.tqh_first);
194 }
195 if (so->so_pcb == 0)
196 goto discard;
197 if (so->so_state & SS_ISCONNECTED) {
198 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
199 error = sodisconnect(so);
200 if (error)
201 goto drop;
202 }
203 if (so->so_options & SO_LINGER) {
204 if ((so->so_state & SS_ISDISCONNECTING) &&
205 (so->so_state & SS_NBIO))
206 goto drop;
207 while (so->so_state & SS_ISCONNECTED) {
208 error = tsleep((caddr_t)&so->so_timeo,
209 PSOCK | PCATCH, netcls,
210 so->so_linger * hz);
211 if (error)
212 break;
213 }
214 }
215 }
216 drop:
217 if (so->so_pcb) {
218 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
219 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
220 (struct proc *)0);
221 if (error == 0)
222 error = error2;
223 }
224 discard:
225 if (so->so_state & SS_NOFDREF)
226 panic("soclose: NOFDREF");
227 so->so_state |= SS_NOFDREF;
228 sofree(so);
229 splx(s);
230 return (error);
231 }
232
233 /*
234 * Must be called at splsoftnet...
235 */
236 int
237 soabort(so)
238 struct socket *so;
239 {
240
241 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
242 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
243 }
244
245 int
246 soaccept(so, nam)
247 register struct socket *so;
248 struct mbuf *nam;
249 {
250 int s = splsoftnet();
251 int error;
252
253 if ((so->so_state & SS_NOFDREF) == 0)
254 panic("soaccept: !NOFDREF");
255 so->so_state &= ~SS_NOFDREF;
256 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, (struct mbuf *)0,
257 nam, (struct mbuf *)0, (struct proc *)0);
258 splx(s);
259 return (error);
260 }
261
262 int
263 soconnect(so, nam)
264 register struct socket *so;
265 struct mbuf *nam;
266 {
267 struct proc *p = curproc; /* XXX */
268 int s;
269 int error;
270
271 if (so->so_options & SO_ACCEPTCONN)
272 return (EOPNOTSUPP);
273 s = splsoftnet();
274 /*
275 * If protocol is connection-based, can only connect once.
276 * Otherwise, if connected, try to disconnect first.
277 * This allows user to disconnect by connecting to, e.g.,
278 * a null address.
279 */
280 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
281 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
282 (error = sodisconnect(so))))
283 error = EISCONN;
284 else
285 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
286 (struct mbuf *)0, nam, (struct mbuf *)0, p);
287 splx(s);
288 return (error);
289 }
290
291 int
292 soconnect2(so1, so2)
293 register struct socket *so1;
294 struct socket *so2;
295 {
296 int s = splsoftnet();
297 int error;
298
299 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
300 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
301 (struct proc *)0);
302 splx(s);
303 return (error);
304 }
305
306 int
307 sodisconnect(so)
308 register struct socket *so;
309 {
310 int s = splsoftnet();
311 int error;
312
313 if ((so->so_state & SS_ISCONNECTED) == 0) {
314 error = ENOTCONN;
315 goto bad;
316 }
317 if (so->so_state & SS_ISDISCONNECTING) {
318 error = EALREADY;
319 goto bad;
320 }
321 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
322 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
323 (struct proc *)0);
324 bad:
325 splx(s);
326 return (error);
327 }
328
329 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
330 /*
331 * Send on a socket.
332 * If send must go all at once and message is larger than
333 * send buffering, then hard error.
334 * Lock against other senders.
335 * If must go all at once and not enough room now, then
336 * inform user that this would block and do nothing.
337 * Otherwise, if nonblocking, send as much as possible.
338 * The data to be sent is described by "uio" if nonzero,
339 * otherwise by the mbuf chain "top" (which must be null
340 * if uio is not). Data provided in mbuf chain must be small
341 * enough to send all at once.
342 *
343 * Returns nonzero on error, timeout or signal; callers
344 * must check for short counts if EINTR/ERESTART are returned.
345 * Data and control buffers are freed on return.
346 */
347 int
348 sosend(so, addr, uio, top, control, flags)
349 register struct socket *so;
350 struct mbuf *addr;
351 struct uio *uio;
352 struct mbuf *top;
353 struct mbuf *control;
354 int flags;
355 {
356 struct proc *p = curproc; /* XXX */
357 struct mbuf **mp;
358 register struct mbuf *m;
359 register long space, len, resid;
360 int clen = 0, error, s, dontroute, mlen;
361 int atomic = sosendallatonce(so) || top;
362
363 if (uio)
364 resid = uio->uio_resid;
365 else
366 resid = top->m_pkthdr.len;
367 /*
368 * In theory resid should be unsigned.
369 * However, space must be signed, as it might be less than 0
370 * if we over-committed, and we must use a signed comparison
371 * of space and resid. On the other hand, a negative resid
372 * causes us to loop sending 0-length segments to the protocol.
373 */
374 if (resid < 0) {
375 error = EINVAL;
376 goto out;
377 }
378 dontroute =
379 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
380 (so->so_proto->pr_flags & PR_ATOMIC);
381 p->p_stats->p_ru.ru_msgsnd++;
382 if (control)
383 clen = control->m_len;
384 #define snderr(errno) { error = errno; splx(s); goto release; }
385
386 restart:
387 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
388 goto out;
389 do {
390 s = splsoftnet();
391 if (so->so_state & SS_CANTSENDMORE)
392 snderr(EPIPE);
393 if (so->so_error)
394 snderr(so->so_error);
395 if ((so->so_state & SS_ISCONNECTED) == 0) {
396 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
397 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
398 !(resid == 0 && clen != 0))
399 snderr(ENOTCONN);
400 } else if (addr == 0)
401 snderr(EDESTADDRREQ);
402 }
403 space = sbspace(&so->so_snd);
404 if (flags & MSG_OOB)
405 space += 1024;
406 if ((atomic && resid > so->so_snd.sb_hiwat) ||
407 clen > so->so_snd.sb_hiwat)
408 snderr(EMSGSIZE);
409 if (space < resid + clen && uio &&
410 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
411 if (so->so_state & SS_NBIO)
412 snderr(EWOULDBLOCK);
413 sbunlock(&so->so_snd);
414 error = sbwait(&so->so_snd);
415 splx(s);
416 if (error)
417 goto out;
418 goto restart;
419 }
420 splx(s);
421 mp = ⊤
422 space -= clen;
423 do {
424 if (uio == NULL) {
425 /*
426 * Data is prepackaged in "top".
427 */
428 resid = 0;
429 if (flags & MSG_EOR)
430 top->m_flags |= M_EOR;
431 } else do {
432 if (top == 0) {
433 MGETHDR(m, M_WAIT, MT_DATA);
434 mlen = MHLEN;
435 m->m_pkthdr.len = 0;
436 m->m_pkthdr.rcvif = (struct ifnet *)0;
437 } else {
438 MGET(m, M_WAIT, MT_DATA);
439 mlen = MLEN;
440 }
441 if (resid >= MINCLSIZE && space >= MCLBYTES) {
442 MCLGET(m, M_WAIT);
443 if ((m->m_flags & M_EXT) == 0)
444 goto nopages;
445 mlen = MCLBYTES;
446 #ifdef MAPPED_MBUFS
447 len = min(MCLBYTES, resid);
448 #else
449 if (atomic && top == 0) {
450 len = min(MCLBYTES - max_hdr, resid);
451 m->m_data += max_hdr;
452 } else
453 len = min(MCLBYTES, resid);
454 #endif
455 space -= len;
456 } else {
457 nopages:
458 len = min(min(mlen, resid), space);
459 space -= len;
460 /*
461 * For datagram protocols, leave room
462 * for protocol headers in first mbuf.
463 */
464 if (atomic && top == 0 && len < mlen)
465 MH_ALIGN(m, len);
466 }
467 error = uiomove(mtod(m, caddr_t), (int)len, uio);
468 resid = uio->uio_resid;
469 m->m_len = len;
470 *mp = m;
471 top->m_pkthdr.len += len;
472 if (error)
473 goto release;
474 mp = &m->m_next;
475 if (resid <= 0) {
476 if (flags & MSG_EOR)
477 top->m_flags |= M_EOR;
478 break;
479 }
480 } while (space > 0 && atomic);
481 if (dontroute)
482 so->so_options |= SO_DONTROUTE;
483 s = splsoftnet(); /* XXX */
484 error = (*so->so_proto->pr_usrreq)(so,
485 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
486 top, addr, control, p);
487 splx(s);
488 if (dontroute)
489 so->so_options &= ~SO_DONTROUTE;
490 clen = 0;
491 control = 0;
492 top = 0;
493 mp = ⊤
494 if (error)
495 goto release;
496 } while (resid && space > 0);
497 } while (resid);
498
499 release:
500 sbunlock(&so->so_snd);
501 out:
502 if (top)
503 m_freem(top);
504 if (control)
505 m_freem(control);
506 return (error);
507 }
508
509 /*
510 * Implement receive operations on a socket.
511 * We depend on the way that records are added to the sockbuf
512 * by sbappend*. In particular, each record (mbufs linked through m_next)
513 * must begin with an address if the protocol so specifies,
514 * followed by an optional mbuf or mbufs containing ancillary data,
515 * and then zero or more mbufs of data.
516 * In order to avoid blocking network interrupts for the entire time here,
517 * we splx() while doing the actual copy to user space.
518 * Although the sockbuf is locked, new data may still be appended,
519 * and thus we must maintain consistency of the sockbuf during that time.
520 *
521 * The caller may receive the data as a single mbuf chain by supplying
522 * an mbuf **mp0 for use in returning the chain. The uio is then used
523 * only for the count in uio_resid.
524 */
525 int
526 soreceive(so, paddr, uio, mp0, controlp, flagsp)
527 register struct socket *so;
528 struct mbuf **paddr;
529 struct uio *uio;
530 struct mbuf **mp0;
531 struct mbuf **controlp;
532 int *flagsp;
533 {
534 register struct mbuf *m, **mp;
535 register int flags, len, error, s, offset;
536 struct protosw *pr = so->so_proto;
537 struct mbuf *nextrecord;
538 int moff, type = 0;
539 int orig_resid = uio->uio_resid;
540
541 mp = mp0;
542 if (paddr)
543 *paddr = 0;
544 if (controlp)
545 *controlp = 0;
546 if (flagsp)
547 flags = *flagsp &~ MSG_EOR;
548 else
549 flags = 0;
550 if (flags & MSG_OOB) {
551 m = m_get(M_WAIT, MT_DATA);
552 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
553 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
554 (struct proc *)0);
555 if (error)
556 goto bad;
557 do {
558 error = uiomove(mtod(m, caddr_t),
559 (int) min(uio->uio_resid, m->m_len), uio);
560 m = m_free(m);
561 } while (uio->uio_resid && error == 0 && m);
562 bad:
563 if (m)
564 m_freem(m);
565 return (error);
566 }
567 if (mp)
568 *mp = (struct mbuf *)0;
569 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
570 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
571 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
572
573 restart:
574 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
575 return (error);
576 s = splsoftnet();
577
578 m = so->so_rcv.sb_mb;
579 /*
580 * If we have less data than requested, block awaiting more
581 * (subject to any timeout) if:
582 * 1. the current count is less than the low water mark,
583 * 2. MSG_WAITALL is set, and it is possible to do the entire
584 * receive operation at once if we block (resid <= hiwat), or
585 * 3. MSG_DONTWAIT is not set.
586 * If MSG_WAITALL is set but resid is larger than the receive buffer,
587 * we have to do the receive in sections, and thus risk returning
588 * a short count if a timeout or signal occurs after we start.
589 */
590 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
591 so->so_rcv.sb_cc < uio->uio_resid) &&
592 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
593 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
594 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
595 #ifdef DIAGNOSTIC
596 if (m == 0 && so->so_rcv.sb_cc)
597 panic("receive 1");
598 #endif
599 if (so->so_error) {
600 if (m)
601 goto dontblock;
602 error = so->so_error;
603 if ((flags & MSG_PEEK) == 0)
604 so->so_error = 0;
605 goto release;
606 }
607 if (so->so_state & SS_CANTRCVMORE) {
608 if (m)
609 goto dontblock;
610 else
611 goto release;
612 }
613 for (; m; m = m->m_next)
614 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
615 m = so->so_rcv.sb_mb;
616 goto dontblock;
617 }
618 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
619 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
620 error = ENOTCONN;
621 goto release;
622 }
623 if (uio->uio_resid == 0)
624 goto release;
625 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
626 error = EWOULDBLOCK;
627 goto release;
628 }
629 sbunlock(&so->so_rcv);
630 error = sbwait(&so->so_rcv);
631 splx(s);
632 if (error)
633 return (error);
634 goto restart;
635 }
636 dontblock:
637 #ifdef notyet /* XXXX */
638 if (uio->uio_procp)
639 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
640 #endif
641 nextrecord = m->m_nextpkt;
642 if (pr->pr_flags & PR_ADDR) {
643 #ifdef DIAGNOSTIC
644 if (m->m_type != MT_SONAME)
645 panic("receive 1a");
646 #endif
647 orig_resid = 0;
648 if (flags & MSG_PEEK) {
649 if (paddr)
650 *paddr = m_copy(m, 0, m->m_len);
651 m = m->m_next;
652 } else {
653 sbfree(&so->so_rcv, m);
654 if (paddr) {
655 *paddr = m;
656 so->so_rcv.sb_mb = m->m_next;
657 m->m_next = 0;
658 m = so->so_rcv.sb_mb;
659 } else {
660 MFREE(m, so->so_rcv.sb_mb);
661 m = so->so_rcv.sb_mb;
662 }
663 }
664 }
665 while (m && m->m_type == MT_CONTROL && error == 0) {
666 if (flags & MSG_PEEK) {
667 if (controlp)
668 *controlp = m_copy(m, 0, m->m_len);
669 m = m->m_next;
670 } else {
671 sbfree(&so->so_rcv, m);
672 if (controlp) {
673 if (pr->pr_domain->dom_externalize &&
674 mtod(m, struct cmsghdr *)->cmsg_type ==
675 SCM_RIGHTS)
676 error = (*pr->pr_domain->dom_externalize)(m);
677 *controlp = m;
678 so->so_rcv.sb_mb = m->m_next;
679 m->m_next = 0;
680 m = so->so_rcv.sb_mb;
681 } else {
682 MFREE(m, so->so_rcv.sb_mb);
683 m = so->so_rcv.sb_mb;
684 }
685 }
686 if (controlp) {
687 orig_resid = 0;
688 controlp = &(*controlp)->m_next;
689 }
690 }
691 if (m) {
692 if ((flags & MSG_PEEK) == 0)
693 m->m_nextpkt = nextrecord;
694 type = m->m_type;
695 if (type == MT_OOBDATA)
696 flags |= MSG_OOB;
697 }
698 moff = 0;
699 offset = 0;
700 while (m && uio->uio_resid > 0 && error == 0) {
701 if (m->m_type == MT_OOBDATA) {
702 if (type != MT_OOBDATA)
703 break;
704 } else if (type == MT_OOBDATA)
705 break;
706 #ifdef DIAGNOSTIC
707 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
708 panic("receive 3");
709 #endif
710 so->so_state &= ~SS_RCVATMARK;
711 len = uio->uio_resid;
712 if (so->so_oobmark && len > so->so_oobmark - offset)
713 len = so->so_oobmark - offset;
714 if (len > m->m_len - moff)
715 len = m->m_len - moff;
716 /*
717 * If mp is set, just pass back the mbufs.
718 * Otherwise copy them out via the uio, then free.
719 * Sockbuf must be consistent here (points to current mbuf,
720 * it points to next record) when we drop priority;
721 * we must note any additions to the sockbuf when we
722 * block interrupts again.
723 */
724 if (mp == 0) {
725 splx(s);
726 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
727 s = splsoftnet();
728 } else
729 uio->uio_resid -= len;
730 if (len == m->m_len - moff) {
731 if (m->m_flags & M_EOR)
732 flags |= MSG_EOR;
733 if (flags & MSG_PEEK) {
734 m = m->m_next;
735 moff = 0;
736 } else {
737 nextrecord = m->m_nextpkt;
738 sbfree(&so->so_rcv, m);
739 if (mp) {
740 *mp = m;
741 mp = &m->m_next;
742 so->so_rcv.sb_mb = m = m->m_next;
743 *mp = (struct mbuf *)0;
744 } else {
745 MFREE(m, so->so_rcv.sb_mb);
746 m = so->so_rcv.sb_mb;
747 }
748 if (m)
749 m->m_nextpkt = nextrecord;
750 }
751 } else {
752 if (flags & MSG_PEEK)
753 moff += len;
754 else {
755 if (mp)
756 *mp = m_copym(m, 0, len, M_WAIT);
757 m->m_data += len;
758 m->m_len -= len;
759 so->so_rcv.sb_cc -= len;
760 }
761 }
762 if (so->so_oobmark) {
763 if ((flags & MSG_PEEK) == 0) {
764 so->so_oobmark -= len;
765 if (so->so_oobmark == 0) {
766 so->so_state |= SS_RCVATMARK;
767 break;
768 }
769 } else {
770 offset += len;
771 if (offset == so->so_oobmark)
772 break;
773 }
774 }
775 if (flags & MSG_EOR)
776 break;
777 /*
778 * If the MSG_WAITALL flag is set (for non-atomic socket),
779 * we must not quit until "uio->uio_resid == 0" or an error
780 * termination. If a signal/timeout occurs, return
781 * with a short count but without error.
782 * Keep sockbuf locked against other readers.
783 */
784 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
785 !sosendallatonce(so) && !nextrecord) {
786 if (so->so_error || so->so_state & SS_CANTRCVMORE)
787 break;
788 error = sbwait(&so->so_rcv);
789 if (error) {
790 sbunlock(&so->so_rcv);
791 splx(s);
792 return (0);
793 }
794 if ((m = so->so_rcv.sb_mb) != NULL)
795 nextrecord = m->m_nextpkt;
796 }
797 }
798
799 if (m && pr->pr_flags & PR_ATOMIC) {
800 flags |= MSG_TRUNC;
801 if ((flags & MSG_PEEK) == 0)
802 (void) sbdroprecord(&so->so_rcv);
803 }
804 if ((flags & MSG_PEEK) == 0) {
805 if (m == 0)
806 so->so_rcv.sb_mb = nextrecord;
807 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
808 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
809 (struct mbuf *)(long)flags, (struct mbuf *)0,
810 (struct proc *)0);
811 }
812 if (orig_resid == uio->uio_resid && orig_resid &&
813 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
814 sbunlock(&so->so_rcv);
815 splx(s);
816 goto restart;
817 }
818
819 if (flagsp)
820 *flagsp |= flags;
821 release:
822 sbunlock(&so->so_rcv);
823 splx(s);
824 return (error);
825 }
826
827 int
828 soshutdown(so, how)
829 struct socket *so;
830 int how;
831 {
832 struct protosw *pr = so->so_proto;
833
834 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
835 return (EINVAL);
836
837 if (how == SHUT_RD || how == SHUT_RDWR)
838 sorflush(so);
839 if (how == SHUT_WR || how == SHUT_RDWR)
840 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
841 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
842 return (0);
843 }
844
845 void
846 sorflush(so)
847 register struct socket *so;
848 {
849 register struct sockbuf *sb = &so->so_rcv;
850 register struct protosw *pr = so->so_proto;
851 register int s;
852 struct sockbuf asb;
853
854 sb->sb_flags |= SB_NOINTR;
855 (void) sblock(sb, M_WAITOK);
856 s = splimp();
857 socantrcvmore(so);
858 sbunlock(sb);
859 asb = *sb;
860 memset((caddr_t)sb, 0, sizeof(*sb));
861 splx(s);
862 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
863 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
864 sbrelease(&asb);
865 }
866
867 int
868 sosetopt(so, level, optname, m0)
869 register struct socket *so;
870 int level, optname;
871 struct mbuf *m0;
872 {
873 int error = 0;
874 register struct mbuf *m = m0;
875
876 if (level != SOL_SOCKET) {
877 if (so->so_proto && so->so_proto->pr_ctloutput)
878 return ((*so->so_proto->pr_ctloutput)
879 (PRCO_SETOPT, so, level, optname, &m0));
880 error = ENOPROTOOPT;
881 } else {
882 switch (optname) {
883
884 case SO_LINGER:
885 if (m == NULL || m->m_len != sizeof(struct linger)) {
886 error = EINVAL;
887 goto bad;
888 }
889 so->so_linger = mtod(m, struct linger *)->l_linger;
890 /* fall thru... */
891
892 case SO_DEBUG:
893 case SO_KEEPALIVE:
894 case SO_DONTROUTE:
895 case SO_USELOOPBACK:
896 case SO_BROADCAST:
897 case SO_REUSEADDR:
898 case SO_REUSEPORT:
899 case SO_OOBINLINE:
900 case SO_TIMESTAMP:
901 if (m == NULL || m->m_len < sizeof(int)) {
902 error = EINVAL;
903 goto bad;
904 }
905 if (*mtod(m, int *))
906 so->so_options |= optname;
907 else
908 so->so_options &= ~optname;
909 break;
910
911 case SO_SNDBUF:
912 case SO_RCVBUF:
913 case SO_SNDLOWAT:
914 case SO_RCVLOWAT:
915 {
916 int optval;
917
918 if (m == NULL || m->m_len < sizeof(int)) {
919 error = EINVAL;
920 goto bad;
921 }
922
923 /*
924 * Values < 1 make no sense for any of these
925 * options, so disallow them.
926 */
927 optval = *mtod(m, int *);
928 if (optval < 1) {
929 error = EINVAL;
930 goto bad;
931 }
932
933 switch (optname) {
934
935 case SO_SNDBUF:
936 case SO_RCVBUF:
937 if (sbreserve(optname == SO_SNDBUF ?
938 &so->so_snd : &so->so_rcv,
939 (u_long) optval) == 0) {
940 error = ENOBUFS;
941 goto bad;
942 }
943 break;
944
945 /*
946 * Make sure the low-water is never greater than
947 * the high-water.
948 */
949 case SO_SNDLOWAT:
950 so->so_snd.sb_lowat =
951 (optval > so->so_snd.sb_hiwat) ?
952 so->so_snd.sb_hiwat : optval;
953 break;
954 case SO_RCVLOWAT:
955 so->so_rcv.sb_lowat =
956 (optval > so->so_rcv.sb_hiwat) ?
957 so->so_rcv.sb_hiwat : optval;
958 break;
959 }
960 break;
961 }
962
963 case SO_SNDTIMEO:
964 case SO_RCVTIMEO:
965 {
966 struct timeval *tv;
967 short val;
968
969 if (m == NULL || m->m_len < sizeof(*tv)) {
970 error = EINVAL;
971 goto bad;
972 }
973 tv = mtod(m, struct timeval *);
974 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
975 error = EDOM;
976 goto bad;
977 }
978 val = tv->tv_sec * hz + tv->tv_usec / tick;
979
980 switch (optname) {
981
982 case SO_SNDTIMEO:
983 so->so_snd.sb_timeo = val;
984 break;
985 case SO_RCVTIMEO:
986 so->so_rcv.sb_timeo = val;
987 break;
988 }
989 break;
990 }
991
992 default:
993 error = ENOPROTOOPT;
994 break;
995 }
996 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
997 (void) ((*so->so_proto->pr_ctloutput)
998 (PRCO_SETOPT, so, level, optname, &m0));
999 m = NULL; /* freed by protocol */
1000 }
1001 }
1002 bad:
1003 if (m)
1004 (void) m_free(m);
1005 return (error);
1006 }
1007
1008 int
1009 sogetopt(so, level, optname, mp)
1010 register struct socket *so;
1011 int level, optname;
1012 struct mbuf **mp;
1013 {
1014 register struct mbuf *m;
1015
1016 if (level != SOL_SOCKET) {
1017 if (so->so_proto && so->so_proto->pr_ctloutput) {
1018 return ((*so->so_proto->pr_ctloutput)
1019 (PRCO_GETOPT, so, level, optname, mp));
1020 } else
1021 return (ENOPROTOOPT);
1022 } else {
1023 m = m_get(M_WAIT, MT_SOOPTS);
1024 m->m_len = sizeof(int);
1025
1026 switch (optname) {
1027
1028 case SO_LINGER:
1029 m->m_len = sizeof(struct linger);
1030 mtod(m, struct linger *)->l_onoff =
1031 so->so_options & SO_LINGER;
1032 mtod(m, struct linger *)->l_linger = so->so_linger;
1033 break;
1034
1035 case SO_USELOOPBACK:
1036 case SO_DONTROUTE:
1037 case SO_DEBUG:
1038 case SO_KEEPALIVE:
1039 case SO_REUSEADDR:
1040 case SO_REUSEPORT:
1041 case SO_BROADCAST:
1042 case SO_OOBINLINE:
1043 case SO_TIMESTAMP:
1044 *mtod(m, int *) = so->so_options & optname;
1045 break;
1046
1047 case SO_TYPE:
1048 *mtod(m, int *) = so->so_type;
1049 break;
1050
1051 case SO_ERROR:
1052 *mtod(m, int *) = so->so_error;
1053 so->so_error = 0;
1054 break;
1055
1056 case SO_SNDBUF:
1057 *mtod(m, int *) = so->so_snd.sb_hiwat;
1058 break;
1059
1060 case SO_RCVBUF:
1061 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1062 break;
1063
1064 case SO_SNDLOWAT:
1065 *mtod(m, int *) = so->so_snd.sb_lowat;
1066 break;
1067
1068 case SO_RCVLOWAT:
1069 *mtod(m, int *) = so->so_rcv.sb_lowat;
1070 break;
1071
1072 case SO_SNDTIMEO:
1073 case SO_RCVTIMEO:
1074 {
1075 int val = (optname == SO_SNDTIMEO ?
1076 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1077
1078 m->m_len = sizeof(struct timeval);
1079 mtod(m, struct timeval *)->tv_sec = val / hz;
1080 mtod(m, struct timeval *)->tv_usec =
1081 (val % hz) * tick;
1082 break;
1083 }
1084
1085 default:
1086 (void)m_free(m);
1087 return (ENOPROTOOPT);
1088 }
1089 *mp = m;
1090 return (0);
1091 }
1092 }
1093
1094 void
1095 sohasoutofband(so)
1096 register struct socket *so;
1097 {
1098 struct proc *p;
1099
1100 if (so->so_pgid < 0)
1101 gsignal(-so->so_pgid, SIGURG);
1102 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1103 psignal(p, SIGURG);
1104 selwakeup(&so->so_rcv.sb_sel);
1105 }
1106