uipc_socket.c revision 1.40 1 /* $NetBSD: uipc_socket.c,v 1.40 1998/12/16 00:26:10 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
36 */
37
38 #include "opt_compat_sunos.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/file.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/domain.h>
47 #include <sys/kernel.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/resourcevar.h>
53 #include <sys/pool.h>
54
55 struct pool socket_pool;
56
57 void
58 soinit()
59 {
60
61 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
62 "sockpl", 0, NULL, NULL, M_SOCKET);
63 }
64
65 /*
66 * Socket operation routines.
67 * These routines are called by the routines in
68 * sys_socket.c or from a system process, and
69 * implement the semantics of socket operations by
70 * switching out to the protocol specific routines.
71 */
72 /*ARGSUSED*/
73 int
74 socreate(dom, aso, type, proto)
75 int dom;
76 struct socket **aso;
77 register int type;
78 int proto;
79 {
80 struct proc *p = curproc; /* XXX */
81 register struct protosw *prp;
82 register struct socket *so;
83 register int error;
84 int s;
85
86 if (proto)
87 prp = pffindproto(dom, proto, type);
88 else
89 prp = pffindtype(dom, type);
90 if (prp == 0 || prp->pr_usrreq == 0)
91 return (EPROTONOSUPPORT);
92 if (prp->pr_type != type)
93 return (EPROTOTYPE);
94 s = splsoftnet();
95 so = pool_get(&socket_pool, PR_WAITOK);
96 memset((caddr_t)so, 0, sizeof(*so));
97 TAILQ_INIT(&so->so_q0);
98 TAILQ_INIT(&so->so_q);
99 so->so_type = type;
100 so->so_proto = prp;
101 so->so_send = sosend;
102 so->so_receive = soreceive;
103 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
104 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
105 if (error) {
106 so->so_state |= SS_NOFDREF;
107 sofree(so);
108 splx(s);
109 return (error);
110 }
111 #ifdef COMPAT_SUNOS
112 {
113 extern struct emul emul_sunos;
114 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
115 so->so_options |= SO_BROADCAST;
116 }
117 #endif
118 splx(s);
119 *aso = so;
120 return (0);
121 }
122
123 int
124 sobind(so, nam)
125 struct socket *so;
126 struct mbuf *nam;
127 {
128 struct proc *p = curproc; /* XXX */
129 int s = splsoftnet();
130 int error;
131
132 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
133 nam, (struct mbuf *)0, p);
134 splx(s);
135 return (error);
136 }
137
138 int
139 solisten(so, backlog)
140 register struct socket *so;
141 int backlog;
142 {
143 int s = splsoftnet(), error;
144
145 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
146 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
147 if (error) {
148 splx(s);
149 return (error);
150 }
151 if (so->so_q.tqh_first == NULL)
152 so->so_options |= SO_ACCEPTCONN;
153 if (backlog < 0)
154 backlog = 0;
155 so->so_qlimit = min(backlog, SOMAXCONN);
156 splx(s);
157 return (0);
158 }
159
160 void
161 sofree(so)
162 register struct socket *so;
163 {
164
165 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
166 return;
167 if (so->so_head) {
168 if (!soqremque(so, 0) && !soqremque(so, 1))
169 panic("sofree dq");
170 so->so_head = 0;
171 }
172 sbrelease(&so->so_snd);
173 sorflush(so);
174 pool_put(&socket_pool, so);
175 }
176
177 /*
178 * Close a socket on last file table reference removal.
179 * Initiate disconnect if connected.
180 * Free socket when disconnect complete.
181 */
182 int
183 soclose(so)
184 register struct socket *so;
185 {
186 int s = splsoftnet(); /* conservative */
187 int error = 0;
188
189 if (so->so_options & SO_ACCEPTCONN) {
190 while (so->so_q0.tqh_first)
191 (void) soabort(so->so_q0.tqh_first);
192 while (so->so_q.tqh_first)
193 (void) soabort(so->so_q.tqh_first);
194 }
195 if (so->so_pcb == 0)
196 goto discard;
197 if (so->so_state & SS_ISCONNECTED) {
198 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
199 error = sodisconnect(so);
200 if (error)
201 goto drop;
202 }
203 if (so->so_options & SO_LINGER) {
204 if ((so->so_state & SS_ISDISCONNECTING) &&
205 (so->so_state & SS_NBIO))
206 goto drop;
207 while (so->so_state & SS_ISCONNECTED) {
208 error = tsleep((caddr_t)&so->so_timeo,
209 PSOCK | PCATCH, netcls,
210 so->so_linger * hz);
211 if (error)
212 break;
213 }
214 }
215 }
216 drop:
217 if (so->so_pcb) {
218 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
219 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
220 (struct proc *)0);
221 if (error == 0)
222 error = error2;
223 }
224 discard:
225 if (so->so_state & SS_NOFDREF)
226 panic("soclose: NOFDREF");
227 so->so_state |= SS_NOFDREF;
228 sofree(so);
229 splx(s);
230 return (error);
231 }
232
233 /*
234 * Must be called at splsoftnet...
235 */
236 int
237 soabort(so)
238 struct socket *so;
239 {
240
241 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
242 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
243 }
244
245 int
246 soaccept(so, nam)
247 register struct socket *so;
248 struct mbuf *nam;
249 {
250 int s = splsoftnet();
251 int error;
252
253 if ((so->so_state & SS_NOFDREF) == 0)
254 panic("soaccept: !NOFDREF");
255 so->so_state &= ~SS_NOFDREF;
256 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, (struct mbuf *)0,
257 nam, (struct mbuf *)0, (struct proc *)0);
258 splx(s);
259 return (error);
260 }
261
262 int
263 soconnect(so, nam)
264 register struct socket *so;
265 struct mbuf *nam;
266 {
267 struct proc *p = curproc; /* XXX */
268 int s;
269 int error;
270
271 if (so->so_options & SO_ACCEPTCONN)
272 return (EOPNOTSUPP);
273 s = splsoftnet();
274 /*
275 * If protocol is connection-based, can only connect once.
276 * Otherwise, if connected, try to disconnect first.
277 * This allows user to disconnect by connecting to, e.g.,
278 * a null address.
279 */
280 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
281 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
282 (error = sodisconnect(so))))
283 error = EISCONN;
284 else
285 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
286 (struct mbuf *)0, nam, (struct mbuf *)0, p);
287 splx(s);
288 return (error);
289 }
290
291 int
292 soconnect2(so1, so2)
293 register struct socket *so1;
294 struct socket *so2;
295 {
296 int s = splsoftnet();
297 int error;
298
299 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
300 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
301 (struct proc *)0);
302 splx(s);
303 return (error);
304 }
305
306 int
307 sodisconnect(so)
308 register struct socket *so;
309 {
310 int s = splsoftnet();
311 int error;
312
313 if ((so->so_state & SS_ISCONNECTED) == 0) {
314 error = ENOTCONN;
315 goto bad;
316 }
317 if (so->so_state & SS_ISDISCONNECTING) {
318 error = EALREADY;
319 goto bad;
320 }
321 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
322 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
323 (struct proc *)0);
324 bad:
325 splx(s);
326 return (error);
327 }
328
329 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
330 /*
331 * Send on a socket.
332 * If send must go all at once and message is larger than
333 * send buffering, then hard error.
334 * Lock against other senders.
335 * If must go all at once and not enough room now, then
336 * inform user that this would block and do nothing.
337 * Otherwise, if nonblocking, send as much as possible.
338 * The data to be sent is described by "uio" if nonzero,
339 * otherwise by the mbuf chain "top" (which must be null
340 * if uio is not). Data provided in mbuf chain must be small
341 * enough to send all at once.
342 *
343 * Returns nonzero on error, timeout or signal; callers
344 * must check for short counts if EINTR/ERESTART are returned.
345 * Data and control buffers are freed on return.
346 */
347 int
348 sosend(so, addr, uio, top, control, flags)
349 register struct socket *so;
350 struct mbuf *addr;
351 struct uio *uio;
352 struct mbuf *top;
353 struct mbuf *control;
354 int flags;
355 {
356 struct proc *p = curproc; /* XXX */
357 struct mbuf **mp;
358 register struct mbuf *m;
359 register long space, len, resid;
360 int clen = 0, error, s, dontroute, mlen;
361 int atomic = sosendallatonce(so) || top;
362
363 if (uio)
364 resid = uio->uio_resid;
365 else
366 resid = top->m_pkthdr.len;
367 /*
368 * In theory resid should be unsigned.
369 * However, space must be signed, as it might be less than 0
370 * if we over-committed, and we must use a signed comparison
371 * of space and resid. On the other hand, a negative resid
372 * causes us to loop sending 0-length segments to the protocol.
373 */
374 if (resid < 0) {
375 error = EINVAL;
376 goto out;
377 }
378 dontroute =
379 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
380 (so->so_proto->pr_flags & PR_ATOMIC);
381 p->p_stats->p_ru.ru_msgsnd++;
382 if (control)
383 clen = control->m_len;
384 #define snderr(errno) { error = errno; splx(s); goto release; }
385
386 restart:
387 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
388 goto out;
389 do {
390 s = splsoftnet();
391 if (so->so_state & SS_CANTSENDMORE)
392 snderr(EPIPE);
393 if (so->so_error)
394 snderr(so->so_error);
395 if ((so->so_state & SS_ISCONNECTED) == 0) {
396 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
397 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
398 !(resid == 0 && clen != 0))
399 snderr(ENOTCONN);
400 } else if (addr == 0)
401 snderr(EDESTADDRREQ);
402 }
403 space = sbspace(&so->so_snd);
404 if (flags & MSG_OOB)
405 space += 1024;
406 if ((atomic && resid > so->so_snd.sb_hiwat) ||
407 clen > so->so_snd.sb_hiwat)
408 snderr(EMSGSIZE);
409 if (space < resid + clen && uio &&
410 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
411 if (so->so_state & SS_NBIO)
412 snderr(EWOULDBLOCK);
413 sbunlock(&so->so_snd);
414 error = sbwait(&so->so_snd);
415 splx(s);
416 if (error)
417 goto out;
418 goto restart;
419 }
420 splx(s);
421 mp = ⊤
422 space -= clen;
423 do {
424 if (uio == NULL) {
425 /*
426 * Data is prepackaged in "top".
427 */
428 resid = 0;
429 if (flags & MSG_EOR)
430 top->m_flags |= M_EOR;
431 } else do {
432 if (top == 0) {
433 MGETHDR(m, M_WAIT, MT_DATA);
434 mlen = MHLEN;
435 m->m_pkthdr.len = 0;
436 m->m_pkthdr.rcvif = (struct ifnet *)0;
437 } else {
438 MGET(m, M_WAIT, MT_DATA);
439 mlen = MLEN;
440 }
441 if (resid >= MINCLSIZE && space >= MCLBYTES) {
442 MCLGET(m, M_WAIT);
443 if ((m->m_flags & M_EXT) == 0)
444 goto nopages;
445 mlen = MCLBYTES;
446 #ifdef MAPPED_MBUFS
447 len = min(MCLBYTES, resid);
448 #else
449 if (atomic && top == 0) {
450 len = min(MCLBYTES - max_hdr, resid);
451 m->m_data += max_hdr;
452 } else
453 len = min(MCLBYTES, resid);
454 #endif
455 space -= len;
456 } else {
457 nopages:
458 len = min(min(mlen, resid), space);
459 space -= len;
460 /*
461 * For datagram protocols, leave room
462 * for protocol headers in first mbuf.
463 */
464 if (atomic && top == 0 && len < mlen)
465 MH_ALIGN(m, len);
466 }
467 error = uiomove(mtod(m, caddr_t), (int)len, uio);
468 resid = uio->uio_resid;
469 m->m_len = len;
470 *mp = m;
471 top->m_pkthdr.len += len;
472 if (error)
473 goto release;
474 mp = &m->m_next;
475 if (resid <= 0) {
476 if (flags & MSG_EOR)
477 top->m_flags |= M_EOR;
478 break;
479 }
480 } while (space > 0 && atomic);
481 if (dontroute)
482 so->so_options |= SO_DONTROUTE;
483 if (resid > 0)
484 so->so_state |= SS_MORETOCOME;
485 s = splsoftnet(); /* XXX */
486 error = (*so->so_proto->pr_usrreq)(so,
487 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
488 top, addr, control, p);
489 splx(s);
490 if (dontroute)
491 so->so_options &= ~SO_DONTROUTE;
492 if (resid > 0)
493 so->so_state &= ~SS_MORETOCOME;
494 clen = 0;
495 control = 0;
496 top = 0;
497 mp = ⊤
498 if (error)
499 goto release;
500 } while (resid && space > 0);
501 } while (resid);
502
503 release:
504 sbunlock(&so->so_snd);
505 out:
506 if (top)
507 m_freem(top);
508 if (control)
509 m_freem(control);
510 return (error);
511 }
512
513 /*
514 * Implement receive operations on a socket.
515 * We depend on the way that records are added to the sockbuf
516 * by sbappend*. In particular, each record (mbufs linked through m_next)
517 * must begin with an address if the protocol so specifies,
518 * followed by an optional mbuf or mbufs containing ancillary data,
519 * and then zero or more mbufs of data.
520 * In order to avoid blocking network interrupts for the entire time here,
521 * we splx() while doing the actual copy to user space.
522 * Although the sockbuf is locked, new data may still be appended,
523 * and thus we must maintain consistency of the sockbuf during that time.
524 *
525 * The caller may receive the data as a single mbuf chain by supplying
526 * an mbuf **mp0 for use in returning the chain. The uio is then used
527 * only for the count in uio_resid.
528 */
529 int
530 soreceive(so, paddr, uio, mp0, controlp, flagsp)
531 register struct socket *so;
532 struct mbuf **paddr;
533 struct uio *uio;
534 struct mbuf **mp0;
535 struct mbuf **controlp;
536 int *flagsp;
537 {
538 register struct mbuf *m, **mp;
539 register int flags, len, error, s, offset;
540 struct protosw *pr = so->so_proto;
541 struct mbuf *nextrecord;
542 int moff, type = 0;
543 int orig_resid = uio->uio_resid;
544
545 mp = mp0;
546 if (paddr)
547 *paddr = 0;
548 if (controlp)
549 *controlp = 0;
550 if (flagsp)
551 flags = *flagsp &~ MSG_EOR;
552 else
553 flags = 0;
554 if (flags & MSG_OOB) {
555 m = m_get(M_WAIT, MT_DATA);
556 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
557 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
558 (struct proc *)0);
559 if (error)
560 goto bad;
561 do {
562 error = uiomove(mtod(m, caddr_t),
563 (int) min(uio->uio_resid, m->m_len), uio);
564 m = m_free(m);
565 } while (uio->uio_resid && error == 0 && m);
566 bad:
567 if (m)
568 m_freem(m);
569 return (error);
570 }
571 if (mp)
572 *mp = (struct mbuf *)0;
573 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
574 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
575 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
576
577 restart:
578 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
579 return (error);
580 s = splsoftnet();
581
582 m = so->so_rcv.sb_mb;
583 /*
584 * If we have less data than requested, block awaiting more
585 * (subject to any timeout) if:
586 * 1. the current count is less than the low water mark,
587 * 2. MSG_WAITALL is set, and it is possible to do the entire
588 * receive operation at once if we block (resid <= hiwat), or
589 * 3. MSG_DONTWAIT is not set.
590 * If MSG_WAITALL is set but resid is larger than the receive buffer,
591 * we have to do the receive in sections, and thus risk returning
592 * a short count if a timeout or signal occurs after we start.
593 */
594 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
595 so->so_rcv.sb_cc < uio->uio_resid) &&
596 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
597 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
598 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
599 #ifdef DIAGNOSTIC
600 if (m == 0 && so->so_rcv.sb_cc)
601 panic("receive 1");
602 #endif
603 if (so->so_error) {
604 if (m)
605 goto dontblock;
606 error = so->so_error;
607 if ((flags & MSG_PEEK) == 0)
608 so->so_error = 0;
609 goto release;
610 }
611 if (so->so_state & SS_CANTRCVMORE) {
612 if (m)
613 goto dontblock;
614 else
615 goto release;
616 }
617 for (; m; m = m->m_next)
618 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
619 m = so->so_rcv.sb_mb;
620 goto dontblock;
621 }
622 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
623 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
624 error = ENOTCONN;
625 goto release;
626 }
627 if (uio->uio_resid == 0)
628 goto release;
629 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
630 error = EWOULDBLOCK;
631 goto release;
632 }
633 sbunlock(&so->so_rcv);
634 error = sbwait(&so->so_rcv);
635 splx(s);
636 if (error)
637 return (error);
638 goto restart;
639 }
640 dontblock:
641 #ifdef notyet /* XXXX */
642 if (uio->uio_procp)
643 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
644 #endif
645 nextrecord = m->m_nextpkt;
646 if (pr->pr_flags & PR_ADDR) {
647 #ifdef DIAGNOSTIC
648 if (m->m_type != MT_SONAME)
649 panic("receive 1a");
650 #endif
651 orig_resid = 0;
652 if (flags & MSG_PEEK) {
653 if (paddr)
654 *paddr = m_copy(m, 0, m->m_len);
655 m = m->m_next;
656 } else {
657 sbfree(&so->so_rcv, m);
658 if (paddr) {
659 *paddr = m;
660 so->so_rcv.sb_mb = m->m_next;
661 m->m_next = 0;
662 m = so->so_rcv.sb_mb;
663 } else {
664 MFREE(m, so->so_rcv.sb_mb);
665 m = so->so_rcv.sb_mb;
666 }
667 }
668 }
669 while (m && m->m_type == MT_CONTROL && error == 0) {
670 if (flags & MSG_PEEK) {
671 if (controlp)
672 *controlp = m_copy(m, 0, m->m_len);
673 m = m->m_next;
674 } else {
675 sbfree(&so->so_rcv, m);
676 if (controlp) {
677 if (pr->pr_domain->dom_externalize &&
678 mtod(m, struct cmsghdr *)->cmsg_type ==
679 SCM_RIGHTS)
680 error = (*pr->pr_domain->dom_externalize)(m);
681 *controlp = m;
682 so->so_rcv.sb_mb = m->m_next;
683 m->m_next = 0;
684 m = so->so_rcv.sb_mb;
685 } else {
686 MFREE(m, so->so_rcv.sb_mb);
687 m = so->so_rcv.sb_mb;
688 }
689 }
690 if (controlp) {
691 orig_resid = 0;
692 controlp = &(*controlp)->m_next;
693 }
694 }
695 if (m) {
696 if ((flags & MSG_PEEK) == 0)
697 m->m_nextpkt = nextrecord;
698 type = m->m_type;
699 if (type == MT_OOBDATA)
700 flags |= MSG_OOB;
701 }
702 moff = 0;
703 offset = 0;
704 while (m && uio->uio_resid > 0 && error == 0) {
705 if (m->m_type == MT_OOBDATA) {
706 if (type != MT_OOBDATA)
707 break;
708 } else if (type == MT_OOBDATA)
709 break;
710 #ifdef DIAGNOSTIC
711 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
712 panic("receive 3");
713 #endif
714 so->so_state &= ~SS_RCVATMARK;
715 len = uio->uio_resid;
716 if (so->so_oobmark && len > so->so_oobmark - offset)
717 len = so->so_oobmark - offset;
718 if (len > m->m_len - moff)
719 len = m->m_len - moff;
720 /*
721 * If mp is set, just pass back the mbufs.
722 * Otherwise copy them out via the uio, then free.
723 * Sockbuf must be consistent here (points to current mbuf,
724 * it points to next record) when we drop priority;
725 * we must note any additions to the sockbuf when we
726 * block interrupts again.
727 */
728 if (mp == 0) {
729 splx(s);
730 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
731 s = splsoftnet();
732 } else
733 uio->uio_resid -= len;
734 if (len == m->m_len - moff) {
735 if (m->m_flags & M_EOR)
736 flags |= MSG_EOR;
737 if (flags & MSG_PEEK) {
738 m = m->m_next;
739 moff = 0;
740 } else {
741 nextrecord = m->m_nextpkt;
742 sbfree(&so->so_rcv, m);
743 if (mp) {
744 *mp = m;
745 mp = &m->m_next;
746 so->so_rcv.sb_mb = m = m->m_next;
747 *mp = (struct mbuf *)0;
748 } else {
749 MFREE(m, so->so_rcv.sb_mb);
750 m = so->so_rcv.sb_mb;
751 }
752 if (m)
753 m->m_nextpkt = nextrecord;
754 }
755 } else {
756 if (flags & MSG_PEEK)
757 moff += len;
758 else {
759 if (mp)
760 *mp = m_copym(m, 0, len, M_WAIT);
761 m->m_data += len;
762 m->m_len -= len;
763 so->so_rcv.sb_cc -= len;
764 }
765 }
766 if (so->so_oobmark) {
767 if ((flags & MSG_PEEK) == 0) {
768 so->so_oobmark -= len;
769 if (so->so_oobmark == 0) {
770 so->so_state |= SS_RCVATMARK;
771 break;
772 }
773 } else {
774 offset += len;
775 if (offset == so->so_oobmark)
776 break;
777 }
778 }
779 if (flags & MSG_EOR)
780 break;
781 /*
782 * If the MSG_WAITALL flag is set (for non-atomic socket),
783 * we must not quit until "uio->uio_resid == 0" or an error
784 * termination. If a signal/timeout occurs, return
785 * with a short count but without error.
786 * Keep sockbuf locked against other readers.
787 */
788 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
789 !sosendallatonce(so) && !nextrecord) {
790 if (so->so_error || so->so_state & SS_CANTRCVMORE)
791 break;
792 error = sbwait(&so->so_rcv);
793 if (error) {
794 sbunlock(&so->so_rcv);
795 splx(s);
796 return (0);
797 }
798 if ((m = so->so_rcv.sb_mb) != NULL)
799 nextrecord = m->m_nextpkt;
800 }
801 }
802
803 if (m && pr->pr_flags & PR_ATOMIC) {
804 flags |= MSG_TRUNC;
805 if ((flags & MSG_PEEK) == 0)
806 (void) sbdroprecord(&so->so_rcv);
807 }
808 if ((flags & MSG_PEEK) == 0) {
809 if (m == 0)
810 so->so_rcv.sb_mb = nextrecord;
811 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
812 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
813 (struct mbuf *)(long)flags, (struct mbuf *)0,
814 (struct proc *)0);
815 }
816 if (orig_resid == uio->uio_resid && orig_resid &&
817 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
818 sbunlock(&so->so_rcv);
819 splx(s);
820 goto restart;
821 }
822
823 if (flagsp)
824 *flagsp |= flags;
825 release:
826 sbunlock(&so->so_rcv);
827 splx(s);
828 return (error);
829 }
830
831 int
832 soshutdown(so, how)
833 struct socket *so;
834 int how;
835 {
836 struct protosw *pr = so->so_proto;
837
838 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
839 return (EINVAL);
840
841 if (how == SHUT_RD || how == SHUT_RDWR)
842 sorflush(so);
843 if (how == SHUT_WR || how == SHUT_RDWR)
844 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
845 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
846 return (0);
847 }
848
849 void
850 sorflush(so)
851 register struct socket *so;
852 {
853 register struct sockbuf *sb = &so->so_rcv;
854 register struct protosw *pr = so->so_proto;
855 register int s;
856 struct sockbuf asb;
857
858 sb->sb_flags |= SB_NOINTR;
859 (void) sblock(sb, M_WAITOK);
860 s = splimp();
861 socantrcvmore(so);
862 sbunlock(sb);
863 asb = *sb;
864 memset((caddr_t)sb, 0, sizeof(*sb));
865 splx(s);
866 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
867 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
868 sbrelease(&asb);
869 }
870
871 int
872 sosetopt(so, level, optname, m0)
873 register struct socket *so;
874 int level, optname;
875 struct mbuf *m0;
876 {
877 int error = 0;
878 register struct mbuf *m = m0;
879
880 if (level != SOL_SOCKET) {
881 if (so->so_proto && so->so_proto->pr_ctloutput)
882 return ((*so->so_proto->pr_ctloutput)
883 (PRCO_SETOPT, so, level, optname, &m0));
884 error = ENOPROTOOPT;
885 } else {
886 switch (optname) {
887
888 case SO_LINGER:
889 if (m == NULL || m->m_len != sizeof(struct linger)) {
890 error = EINVAL;
891 goto bad;
892 }
893 so->so_linger = mtod(m, struct linger *)->l_linger;
894 /* fall thru... */
895
896 case SO_DEBUG:
897 case SO_KEEPALIVE:
898 case SO_DONTROUTE:
899 case SO_USELOOPBACK:
900 case SO_BROADCAST:
901 case SO_REUSEADDR:
902 case SO_REUSEPORT:
903 case SO_OOBINLINE:
904 case SO_TIMESTAMP:
905 if (m == NULL || m->m_len < sizeof(int)) {
906 error = EINVAL;
907 goto bad;
908 }
909 if (*mtod(m, int *))
910 so->so_options |= optname;
911 else
912 so->so_options &= ~optname;
913 break;
914
915 case SO_SNDBUF:
916 case SO_RCVBUF:
917 case SO_SNDLOWAT:
918 case SO_RCVLOWAT:
919 {
920 int optval;
921
922 if (m == NULL || m->m_len < sizeof(int)) {
923 error = EINVAL;
924 goto bad;
925 }
926
927 /*
928 * Values < 1 make no sense for any of these
929 * options, so disallow them.
930 */
931 optval = *mtod(m, int *);
932 if (optval < 1) {
933 error = EINVAL;
934 goto bad;
935 }
936
937 switch (optname) {
938
939 case SO_SNDBUF:
940 case SO_RCVBUF:
941 if (sbreserve(optname == SO_SNDBUF ?
942 &so->so_snd : &so->so_rcv,
943 (u_long) optval) == 0) {
944 error = ENOBUFS;
945 goto bad;
946 }
947 break;
948
949 /*
950 * Make sure the low-water is never greater than
951 * the high-water.
952 */
953 case SO_SNDLOWAT:
954 so->so_snd.sb_lowat =
955 (optval > so->so_snd.sb_hiwat) ?
956 so->so_snd.sb_hiwat : optval;
957 break;
958 case SO_RCVLOWAT:
959 so->so_rcv.sb_lowat =
960 (optval > so->so_rcv.sb_hiwat) ?
961 so->so_rcv.sb_hiwat : optval;
962 break;
963 }
964 break;
965 }
966
967 case SO_SNDTIMEO:
968 case SO_RCVTIMEO:
969 {
970 struct timeval *tv;
971 short val;
972
973 if (m == NULL || m->m_len < sizeof(*tv)) {
974 error = EINVAL;
975 goto bad;
976 }
977 tv = mtod(m, struct timeval *);
978 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
979 error = EDOM;
980 goto bad;
981 }
982 val = tv->tv_sec * hz + tv->tv_usec / tick;
983
984 switch (optname) {
985
986 case SO_SNDTIMEO:
987 so->so_snd.sb_timeo = val;
988 break;
989 case SO_RCVTIMEO:
990 so->so_rcv.sb_timeo = val;
991 break;
992 }
993 break;
994 }
995
996 default:
997 error = ENOPROTOOPT;
998 break;
999 }
1000 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1001 (void) ((*so->so_proto->pr_ctloutput)
1002 (PRCO_SETOPT, so, level, optname, &m0));
1003 m = NULL; /* freed by protocol */
1004 }
1005 }
1006 bad:
1007 if (m)
1008 (void) m_free(m);
1009 return (error);
1010 }
1011
1012 int
1013 sogetopt(so, level, optname, mp)
1014 register struct socket *so;
1015 int level, optname;
1016 struct mbuf **mp;
1017 {
1018 register struct mbuf *m;
1019
1020 if (level != SOL_SOCKET) {
1021 if (so->so_proto && so->so_proto->pr_ctloutput) {
1022 return ((*so->so_proto->pr_ctloutput)
1023 (PRCO_GETOPT, so, level, optname, mp));
1024 } else
1025 return (ENOPROTOOPT);
1026 } else {
1027 m = m_get(M_WAIT, MT_SOOPTS);
1028 m->m_len = sizeof(int);
1029
1030 switch (optname) {
1031
1032 case SO_LINGER:
1033 m->m_len = sizeof(struct linger);
1034 mtod(m, struct linger *)->l_onoff =
1035 so->so_options & SO_LINGER;
1036 mtod(m, struct linger *)->l_linger = so->so_linger;
1037 break;
1038
1039 case SO_USELOOPBACK:
1040 case SO_DONTROUTE:
1041 case SO_DEBUG:
1042 case SO_KEEPALIVE:
1043 case SO_REUSEADDR:
1044 case SO_REUSEPORT:
1045 case SO_BROADCAST:
1046 case SO_OOBINLINE:
1047 case SO_TIMESTAMP:
1048 *mtod(m, int *) = so->so_options & optname;
1049 break;
1050
1051 case SO_TYPE:
1052 *mtod(m, int *) = so->so_type;
1053 break;
1054
1055 case SO_ERROR:
1056 *mtod(m, int *) = so->so_error;
1057 so->so_error = 0;
1058 break;
1059
1060 case SO_SNDBUF:
1061 *mtod(m, int *) = so->so_snd.sb_hiwat;
1062 break;
1063
1064 case SO_RCVBUF:
1065 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1066 break;
1067
1068 case SO_SNDLOWAT:
1069 *mtod(m, int *) = so->so_snd.sb_lowat;
1070 break;
1071
1072 case SO_RCVLOWAT:
1073 *mtod(m, int *) = so->so_rcv.sb_lowat;
1074 break;
1075
1076 case SO_SNDTIMEO:
1077 case SO_RCVTIMEO:
1078 {
1079 int val = (optname == SO_SNDTIMEO ?
1080 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1081
1082 m->m_len = sizeof(struct timeval);
1083 mtod(m, struct timeval *)->tv_sec = val / hz;
1084 mtod(m, struct timeval *)->tv_usec =
1085 (val % hz) * tick;
1086 break;
1087 }
1088
1089 default:
1090 (void)m_free(m);
1091 return (ENOPROTOOPT);
1092 }
1093 *mp = m;
1094 return (0);
1095 }
1096 }
1097
1098 void
1099 sohasoutofband(so)
1100 register struct socket *so;
1101 {
1102 struct proc *p;
1103
1104 if (so->so_pgid < 0)
1105 gsignal(-so->so_pgid, SIGURG);
1106 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1107 psignal(p, SIGURG);
1108 selwakeup(&so->so_rcv.sb_sel);
1109 }
1110