uipc_socket.c revision 1.19 1 /* $NetBSD: uipc_socket.c,v 1.19 1995/05/23 00:19:30 cgd Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/file.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/domain.h>
45 #include <sys/kernel.h>
46 #include <sys/protosw.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/resourcevar.h>
50
51 /*
52 * Socket operation routines.
53 * These routines are called by the routines in
54 * sys_socket.c or from a system process, and
55 * implement the semantics of socket operations by
56 * switching out to the protocol specific routines.
57 */
58 /*ARGSUSED*/
59 int
60 socreate(dom, aso, type, proto)
61 int dom;
62 struct socket **aso;
63 register int type;
64 int proto;
65 {
66 struct proc *p = curproc; /* XXX */
67 register struct protosw *prp;
68 register struct socket *so;
69 register int error;
70
71 if (proto)
72 prp = pffindproto(dom, proto, type);
73 else
74 prp = pffindtype(dom, type);
75 if (prp == 0 || prp->pr_usrreq == 0)
76 return (EPROTONOSUPPORT);
77 if (prp->pr_type != type)
78 return (EPROTOTYPE);
79 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
80 bzero((caddr_t)so, sizeof(*so));
81 so->so_type = type;
82 if (p->p_ucred->cr_uid == 0)
83 so->so_state = SS_PRIV;
84 so->so_proto = prp;
85 error =
86 (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
87 (struct mbuf *)(long)proto, (struct mbuf *)0);
88 if (error) {
89 so->so_state |= SS_NOFDREF;
90 sofree(so);
91 return (error);
92 }
93 #ifdef COMPAT_SUNOS
94 {
95 extern struct emul emul_sunos;
96 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
97 so->so_options |= SO_BROADCAST;
98 }
99 #endif
100 *aso = so;
101 return (0);
102 }
103
104 int
105 sobind(so, nam)
106 struct socket *so;
107 struct mbuf *nam;
108 {
109 int s = splnet();
110 int error;
111
112 error =
113 (*so->so_proto->pr_usrreq)(so, PRU_BIND,
114 (struct mbuf *)0, nam, (struct mbuf *)0);
115 splx(s);
116 return (error);
117 }
118
119 int
120 solisten(so, backlog)
121 register struct socket *so;
122 int backlog;
123 {
124 int s = splnet(), error;
125
126 error =
127 (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
128 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
129 if (error) {
130 splx(s);
131 return (error);
132 }
133 if (so->so_q == 0)
134 so->so_options |= SO_ACCEPTCONN;
135 if (backlog < 0)
136 backlog = 0;
137 so->so_qlimit = min(backlog, SOMAXCONN);
138 splx(s);
139 return (0);
140 }
141
142 int
143 sofree(so)
144 register struct socket *so;
145 {
146
147 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
148 return;
149 if (so->so_head) {
150 if (!soqremque(so, 0) && !soqremque(so, 1))
151 panic("sofree dq");
152 so->so_head = 0;
153 }
154 sbrelease(&so->so_snd);
155 sorflush(so);
156 FREE(so, M_SOCKET);
157 }
158
159 /*
160 * Close a socket on last file table reference removal.
161 * Initiate disconnect if connected.
162 * Free socket when disconnect complete.
163 */
164 int
165 soclose(so)
166 register struct socket *so;
167 {
168 int s = splnet(); /* conservative */
169 int error = 0;
170
171 if (so->so_options & SO_ACCEPTCONN) {
172 while (so->so_q0)
173 (void) soabort(so->so_q0);
174 while (so->so_q)
175 (void) soabort(so->so_q);
176 }
177 if (so->so_pcb == 0)
178 goto discard;
179 if (so->so_state & SS_ISCONNECTED) {
180 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
181 error = sodisconnect(so);
182 if (error)
183 goto drop;
184 }
185 if (so->so_options & SO_LINGER) {
186 if ((so->so_state & SS_ISDISCONNECTING) &&
187 (so->so_state & SS_NBIO))
188 goto drop;
189 while (so->so_state & SS_ISCONNECTED)
190 if (error = tsleep((caddr_t)&so->so_timeo,
191 PSOCK | PCATCH, netcls, so->so_linger))
192 break;
193 }
194 }
195 drop:
196 if (so->so_pcb) {
197 int error2 =
198 (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
199 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
200 if (error == 0)
201 error = error2;
202 }
203 discard:
204 if (so->so_state & SS_NOFDREF)
205 panic("soclose: NOFDREF");
206 so->so_state |= SS_NOFDREF;
207 sofree(so);
208 splx(s);
209 return (error);
210 }
211
212 /*
213 * Must be called at splnet...
214 */
215 int
216 soabort(so)
217 struct socket *so;
218 {
219
220 return (
221 (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
222 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
223 }
224
225 int
226 soaccept(so, nam)
227 register struct socket *so;
228 struct mbuf *nam;
229 {
230 int s = splnet();
231 int error;
232
233 if ((so->so_state & SS_NOFDREF) == 0)
234 panic("soaccept: !NOFDREF");
235 so->so_state &= ~SS_NOFDREF;
236 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
237 (struct mbuf *)0, nam, (struct mbuf *)0);
238 splx(s);
239 return (error);
240 }
241
242 int
243 soconnect(so, nam)
244 register struct socket *so;
245 struct mbuf *nam;
246 {
247 int s;
248 int error;
249
250 if (so->so_options & SO_ACCEPTCONN)
251 return (EOPNOTSUPP);
252 s = splnet();
253 /*
254 * If protocol is connection-based, can only connect once.
255 * Otherwise, if connected, try to disconnect first.
256 * This allows user to disconnect by connecting to, e.g.,
257 * a null address.
258 */
259 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
260 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
261 (error = sodisconnect(so))))
262 error = EISCONN;
263 else
264 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
265 (struct mbuf *)0, nam, (struct mbuf *)0);
266 splx(s);
267 return (error);
268 }
269
270 int
271 soconnect2(so1, so2)
272 register struct socket *so1;
273 struct socket *so2;
274 {
275 int s = splnet();
276 int error;
277
278 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
279 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
280 splx(s);
281 return (error);
282 }
283
284 int
285 sodisconnect(so)
286 register struct socket *so;
287 {
288 int s = splnet();
289 int error;
290
291 if ((so->so_state & SS_ISCONNECTED) == 0) {
292 error = ENOTCONN;
293 goto bad;
294 }
295 if (so->so_state & SS_ISDISCONNECTING) {
296 error = EALREADY;
297 goto bad;
298 }
299 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
300 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
301 bad:
302 splx(s);
303 return (error);
304 }
305
306 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
307 /*
308 * Send on a socket.
309 * If send must go all at once and message is larger than
310 * send buffering, then hard error.
311 * Lock against other senders.
312 * If must go all at once and not enough room now, then
313 * inform user that this would block and do nothing.
314 * Otherwise, if nonblocking, send as much as possible.
315 * The data to be sent is described by "uio" if nonzero,
316 * otherwise by the mbuf chain "top" (which must be null
317 * if uio is not). Data provided in mbuf chain must be small
318 * enough to send all at once.
319 *
320 * Returns nonzero on error, timeout or signal; callers
321 * must check for short counts if EINTR/ERESTART are returned.
322 * Data and control buffers are freed on return.
323 */
324 int
325 sosend(so, addr, uio, top, control, flags)
326 register struct socket *so;
327 struct mbuf *addr;
328 struct uio *uio;
329 struct mbuf *top;
330 struct mbuf *control;
331 int flags;
332 {
333 struct proc *p = curproc; /* XXX */
334 struct mbuf **mp;
335 register struct mbuf *m;
336 register long space, len, resid;
337 int clen = 0, error, s, dontroute, mlen;
338 int atomic = sosendallatonce(so) || top;
339
340 if (uio)
341 resid = uio->uio_resid;
342 else
343 resid = top->m_pkthdr.len;
344 /*
345 * In theory resid should be unsigned.
346 * However, space must be signed, as it might be less than 0
347 * if we over-committed, and we must use a signed comparison
348 * of space and resid. On the other hand, a negative resid
349 * causes us to loop sending 0-length segments to the protocol.
350 */
351 if (resid < 0)
352 return (EINVAL);
353 dontroute =
354 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
355 (so->so_proto->pr_flags & PR_ATOMIC);
356 p->p_stats->p_ru.ru_msgsnd++;
357 if (control)
358 clen = control->m_len;
359 #define snderr(errno) { error = errno; splx(s); goto release; }
360
361 restart:
362 if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
363 goto out;
364 do {
365 s = splnet();
366 if (so->so_state & SS_CANTSENDMORE)
367 snderr(EPIPE);
368 if (so->so_error)
369 snderr(so->so_error);
370 if ((so->so_state & SS_ISCONNECTED) == 0) {
371 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
372 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
373 !(resid == 0 && clen != 0))
374 snderr(ENOTCONN);
375 } else if (addr == 0)
376 snderr(EDESTADDRREQ);
377 }
378 space = sbspace(&so->so_snd);
379 if (flags & MSG_OOB)
380 space += 1024;
381 if (atomic && resid > so->so_snd.sb_hiwat ||
382 clen > so->so_snd.sb_hiwat)
383 snderr(EMSGSIZE);
384 if (space < resid + clen && uio &&
385 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
386 if (so->so_state & SS_NBIO)
387 snderr(EWOULDBLOCK);
388 sbunlock(&so->so_snd);
389 error = sbwait(&so->so_snd);
390 splx(s);
391 if (error)
392 goto out;
393 goto restart;
394 }
395 splx(s);
396 mp = ⊤
397 space -= clen;
398 do {
399 if (uio == NULL) {
400 /*
401 * Data is prepackaged in "top".
402 */
403 resid = 0;
404 if (flags & MSG_EOR)
405 top->m_flags |= M_EOR;
406 } else do {
407 if (top == 0) {
408 MGETHDR(m, M_WAIT, MT_DATA);
409 mlen = MHLEN;
410 m->m_pkthdr.len = 0;
411 m->m_pkthdr.rcvif = (struct ifnet *)0;
412 } else {
413 MGET(m, M_WAIT, MT_DATA);
414 mlen = MLEN;
415 }
416 if (resid >= MINCLSIZE && space >= MCLBYTES) {
417 MCLGET(m, M_WAIT);
418 if ((m->m_flags & M_EXT) == 0)
419 goto nopages;
420 mlen = MCLBYTES;
421 #ifdef MAPPED_MBUFS
422 len = min(MCLBYTES, resid);
423 #else
424 if (atomic && top == 0) {
425 len = min(MCLBYTES - max_hdr, resid);
426 m->m_data += max_hdr;
427 } else
428 len = min(MCLBYTES, resid);
429 #endif
430 space -= MCLBYTES;
431 } else {
432 nopages:
433 len = min(min(mlen, resid), space);
434 space -= len;
435 /*
436 * For datagram protocols, leave room
437 * for protocol headers in first mbuf.
438 */
439 if (atomic && top == 0 && len < mlen)
440 MH_ALIGN(m, len);
441 }
442 error = uiomove(mtod(m, caddr_t), (int)len, uio);
443 resid = uio->uio_resid;
444 m->m_len = len;
445 *mp = m;
446 top->m_pkthdr.len += len;
447 if (error)
448 goto release;
449 mp = &m->m_next;
450 if (resid <= 0) {
451 if (flags & MSG_EOR)
452 top->m_flags |= M_EOR;
453 break;
454 }
455 } while (space > 0 && atomic);
456 if (dontroute)
457 so->so_options |= SO_DONTROUTE;
458 s = splnet(); /* XXX */
459 error = (*so->so_proto->pr_usrreq)(so,
460 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
461 top, addr, control);
462 splx(s);
463 if (dontroute)
464 so->so_options &= ~SO_DONTROUTE;
465 clen = 0;
466 control = 0;
467 top = 0;
468 mp = ⊤
469 if (error)
470 goto release;
471 } while (resid && space > 0);
472 } while (resid);
473
474 release:
475 sbunlock(&so->so_snd);
476 out:
477 if (top)
478 m_freem(top);
479 if (control)
480 m_freem(control);
481 return (error);
482 }
483
484 /*
485 * Implement receive operations on a socket.
486 * We depend on the way that records are added to the sockbuf
487 * by sbappend*. In particular, each record (mbufs linked through m_next)
488 * must begin with an address if the protocol so specifies,
489 * followed by an optional mbuf or mbufs containing ancillary data,
490 * and then zero or more mbufs of data.
491 * In order to avoid blocking network interrupts for the entire time here,
492 * we splx() while doing the actual copy to user space.
493 * Although the sockbuf is locked, new data may still be appended,
494 * and thus we must maintain consistency of the sockbuf during that time.
495 *
496 * The caller may receive the data as a single mbuf chain by supplying
497 * an mbuf **mp0 for use in returning the chain. The uio is then used
498 * only for the count in uio_resid.
499 */
500 int
501 soreceive(so, paddr, uio, mp0, controlp, flagsp)
502 register struct socket *so;
503 struct mbuf **paddr;
504 struct uio *uio;
505 struct mbuf **mp0;
506 struct mbuf **controlp;
507 int *flagsp;
508 {
509 register struct mbuf *m, **mp;
510 register int flags, len, error, s, offset;
511 struct protosw *pr = so->so_proto;
512 struct mbuf *nextrecord;
513 int moff, type;
514 int orig_resid = uio->uio_resid;
515
516 mp = mp0;
517 if (paddr)
518 *paddr = 0;
519 if (controlp)
520 *controlp = 0;
521 if (flagsp)
522 flags = *flagsp &~ MSG_EOR;
523 else
524 flags = 0;
525 if (flags & MSG_OOB) {
526 m = m_get(M_WAIT, MT_DATA);
527 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
528 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0);
529 if (error)
530 goto bad;
531 do {
532 error = uiomove(mtod(m, caddr_t),
533 (int) min(uio->uio_resid, m->m_len), uio);
534 m = m_free(m);
535 } while (uio->uio_resid && error == 0 && m);
536 bad:
537 if (m)
538 m_freem(m);
539 return (error);
540 }
541 if (mp)
542 *mp = (struct mbuf *)0;
543 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
544 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
545 (struct mbuf *)0, (struct mbuf *)0);
546
547 restart:
548 if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
549 return (error);
550 s = splnet();
551
552 m = so->so_rcv.sb_mb;
553 /*
554 * If we have less data than requested, block awaiting more
555 * (subject to any timeout) if:
556 * 1. the current count is less than the low water mark,
557 * 2. MSG_WAITALL is set, and it is possible to do the entire
558 * receive operation at once if we block (resid <= hiwat), or
559 * 3. MSG_DONTWAIT is not set.
560 * If MSG_WAITALL is set but resid is larger than the receive buffer,
561 * we have to do the receive in sections, and thus risk returning
562 * a short count if a timeout or signal occurs after we start.
563 */
564 if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
565 so->so_rcv.sb_cc < uio->uio_resid) &&
566 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
567 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
568 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0) {
569 #ifdef DIAGNOSTIC
570 if (m == 0 && so->so_rcv.sb_cc)
571 panic("receive 1");
572 #endif
573 if (so->so_error) {
574 if (m)
575 goto dontblock;
576 error = so->so_error;
577 if ((flags & MSG_PEEK) == 0)
578 so->so_error = 0;
579 goto release;
580 }
581 if (so->so_state & SS_CANTRCVMORE) {
582 if (m)
583 goto dontblock;
584 else
585 goto release;
586 }
587 for (; m; m = m->m_next)
588 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
589 m = so->so_rcv.sb_mb;
590 goto dontblock;
591 }
592 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
593 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
594 error = ENOTCONN;
595 goto release;
596 }
597 if (uio->uio_resid == 0)
598 goto release;
599 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
600 error = EWOULDBLOCK;
601 goto release;
602 }
603 sbunlock(&so->so_rcv);
604 error = sbwait(&so->so_rcv);
605 splx(s);
606 if (error)
607 return (error);
608 goto restart;
609 }
610 dontblock:
611 #ifdef notyet /* XXXX */
612 if (uio->uio_procp)
613 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
614 #endif
615 nextrecord = m->m_nextpkt;
616 if (pr->pr_flags & PR_ADDR) {
617 #ifdef DIAGNOSTIC
618 if (m->m_type != MT_SONAME)
619 panic("receive 1a");
620 #endif
621 orig_resid = 0;
622 if (flags & MSG_PEEK) {
623 if (paddr)
624 *paddr = m_copy(m, 0, m->m_len);
625 m = m->m_next;
626 } else {
627 sbfree(&so->so_rcv, m);
628 if (paddr) {
629 *paddr = m;
630 so->so_rcv.sb_mb = m->m_next;
631 m->m_next = 0;
632 m = so->so_rcv.sb_mb;
633 } else {
634 MFREE(m, so->so_rcv.sb_mb);
635 m = so->so_rcv.sb_mb;
636 }
637 }
638 }
639 while (m && m->m_type == MT_CONTROL && error == 0) {
640 if (flags & MSG_PEEK) {
641 if (controlp)
642 *controlp = m_copy(m, 0, m->m_len);
643 m = m->m_next;
644 } else {
645 sbfree(&so->so_rcv, m);
646 if (controlp) {
647 if (pr->pr_domain->dom_externalize &&
648 mtod(m, struct cmsghdr *)->cmsg_type ==
649 SCM_RIGHTS)
650 error = (*pr->pr_domain->dom_externalize)(m);
651 *controlp = m;
652 so->so_rcv.sb_mb = m->m_next;
653 m->m_next = 0;
654 m = so->so_rcv.sb_mb;
655 } else {
656 MFREE(m, so->so_rcv.sb_mb);
657 m = so->so_rcv.sb_mb;
658 }
659 }
660 if (controlp) {
661 orig_resid = 0;
662 controlp = &(*controlp)->m_next;
663 }
664 }
665 if (m) {
666 if ((flags & MSG_PEEK) == 0)
667 m->m_nextpkt = nextrecord;
668 type = m->m_type;
669 if (type == MT_OOBDATA)
670 flags |= MSG_OOB;
671 }
672 moff = 0;
673 offset = 0;
674 while (m && uio->uio_resid > 0 && error == 0) {
675 if (m->m_type == MT_OOBDATA) {
676 if (type != MT_OOBDATA)
677 break;
678 } else if (type == MT_OOBDATA)
679 break;
680 #ifdef DIAGNOSTIC
681 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
682 panic("receive 3");
683 #endif
684 so->so_state &= ~SS_RCVATMARK;
685 len = uio->uio_resid;
686 if (so->so_oobmark && len > so->so_oobmark - offset)
687 len = so->so_oobmark - offset;
688 if (len > m->m_len - moff)
689 len = m->m_len - moff;
690 /*
691 * If mp is set, just pass back the mbufs.
692 * Otherwise copy them out via the uio, then free.
693 * Sockbuf must be consistent here (points to current mbuf,
694 * it points to next record) when we drop priority;
695 * we must note any additions to the sockbuf when we
696 * block interrupts again.
697 */
698 if (mp == 0) {
699 splx(s);
700 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
701 s = splnet();
702 } else
703 uio->uio_resid -= len;
704 if (len == m->m_len - moff) {
705 if (m->m_flags & M_EOR)
706 flags |= MSG_EOR;
707 if (flags & MSG_PEEK) {
708 m = m->m_next;
709 moff = 0;
710 } else {
711 nextrecord = m->m_nextpkt;
712 sbfree(&so->so_rcv, m);
713 if (mp) {
714 *mp = m;
715 mp = &m->m_next;
716 so->so_rcv.sb_mb = m = m->m_next;
717 *mp = (struct mbuf *)0;
718 } else {
719 MFREE(m, so->so_rcv.sb_mb);
720 m = so->so_rcv.sb_mb;
721 }
722 if (m)
723 m->m_nextpkt = nextrecord;
724 }
725 } else {
726 if (flags & MSG_PEEK)
727 moff += len;
728 else {
729 if (mp)
730 *mp = m_copym(m, 0, len, M_WAIT);
731 m->m_data += len;
732 m->m_len -= len;
733 so->so_rcv.sb_cc -= len;
734 }
735 }
736 if (so->so_oobmark) {
737 if ((flags & MSG_PEEK) == 0) {
738 so->so_oobmark -= len;
739 if (so->so_oobmark == 0) {
740 so->so_state |= SS_RCVATMARK;
741 break;
742 }
743 } else {
744 offset += len;
745 if (offset == so->so_oobmark)
746 break;
747 }
748 }
749 if (flags & MSG_EOR)
750 break;
751 /*
752 * If the MSG_WAITALL flag is set (for non-atomic socket),
753 * we must not quit until "uio->uio_resid == 0" or an error
754 * termination. If a signal/timeout occurs, return
755 * with a short count but without error.
756 * Keep sockbuf locked against other readers.
757 */
758 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
759 !sosendallatonce(so) && !nextrecord) {
760 if (so->so_error || so->so_state & SS_CANTRCVMORE)
761 break;
762 error = sbwait(&so->so_rcv);
763 if (error) {
764 sbunlock(&so->so_rcv);
765 splx(s);
766 return (0);
767 }
768 if (m = so->so_rcv.sb_mb)
769 nextrecord = m->m_nextpkt;
770 }
771 }
772
773 if (m && pr->pr_flags & PR_ATOMIC) {
774 flags |= MSG_TRUNC;
775 if ((flags & MSG_PEEK) == 0)
776 (void) sbdroprecord(&so->so_rcv);
777 }
778 if ((flags & MSG_PEEK) == 0) {
779 if (m == 0)
780 so->so_rcv.sb_mb = nextrecord;
781 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
782 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
783 (struct mbuf *)(long)flags, (struct mbuf *)0,
784 (struct mbuf *)0);
785 }
786 if (orig_resid == uio->uio_resid && orig_resid &&
787 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
788 sbunlock(&so->so_rcv);
789 splx(s);
790 goto restart;
791 }
792
793 if (flagsp)
794 *flagsp |= flags;
795 release:
796 sbunlock(&so->so_rcv);
797 splx(s);
798 return (error);
799 }
800
801 int
802 soshutdown(so, how)
803 register struct socket *so;
804 register int how;
805 {
806 register struct protosw *pr = so->so_proto;
807
808 how++;
809 if (how & FREAD)
810 sorflush(so);
811 if (how & FWRITE)
812 return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
813 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
814 return (0);
815 }
816
817 void
818 sorflush(so)
819 register struct socket *so;
820 {
821 register struct sockbuf *sb = &so->so_rcv;
822 register struct protosw *pr = so->so_proto;
823 register int s;
824 struct sockbuf asb;
825
826 sb->sb_flags |= SB_NOINTR;
827 (void) sblock(sb, M_WAITOK);
828 s = splimp();
829 socantrcvmore(so);
830 sbunlock(sb);
831 asb = *sb;
832 bzero((caddr_t)sb, sizeof (*sb));
833 splx(s);
834 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
835 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
836 sbrelease(&asb);
837 }
838
839 int
840 sosetopt(so, level, optname, m0)
841 register struct socket *so;
842 int level, optname;
843 struct mbuf *m0;
844 {
845 int error = 0;
846 register struct mbuf *m = m0;
847
848 if (level != SOL_SOCKET) {
849 if (so->so_proto && so->so_proto->pr_ctloutput)
850 return ((*so->so_proto->pr_ctloutput)
851 (PRCO_SETOPT, so, level, optname, &m0));
852 error = ENOPROTOOPT;
853 } else {
854 switch (optname) {
855
856 case SO_LINGER:
857 if (m == NULL || m->m_len != sizeof (struct linger)) {
858 error = EINVAL;
859 goto bad;
860 }
861 so->so_linger = mtod(m, struct linger *)->l_linger;
862 /* fall thru... */
863
864 case SO_DEBUG:
865 case SO_KEEPALIVE:
866 case SO_DONTROUTE:
867 case SO_USELOOPBACK:
868 case SO_BROADCAST:
869 case SO_REUSEADDR:
870 case SO_REUSEPORT:
871 case SO_OOBINLINE:
872 if (m == NULL || m->m_len < sizeof (int)) {
873 error = EINVAL;
874 goto bad;
875 }
876 if (*mtod(m, int *))
877 so->so_options |= optname;
878 else
879 so->so_options &= ~optname;
880 break;
881
882 case SO_SNDBUF:
883 case SO_RCVBUF:
884 case SO_SNDLOWAT:
885 case SO_RCVLOWAT:
886 if (m == NULL || m->m_len < sizeof (int)) {
887 error = EINVAL;
888 goto bad;
889 }
890 switch (optname) {
891
892 case SO_SNDBUF:
893 case SO_RCVBUF:
894 if (sbreserve(optname == SO_SNDBUF ?
895 &so->so_snd : &so->so_rcv,
896 (u_long) *mtod(m, int *)) == 0) {
897 error = ENOBUFS;
898 goto bad;
899 }
900 break;
901
902 case SO_SNDLOWAT:
903 so->so_snd.sb_lowat = *mtod(m, int *);
904 break;
905 case SO_RCVLOWAT:
906 so->so_rcv.sb_lowat = *mtod(m, int *);
907 break;
908 }
909 break;
910
911 case SO_SNDTIMEO:
912 case SO_RCVTIMEO:
913 {
914 struct timeval *tv;
915 short val;
916
917 if (m == NULL || m->m_len < sizeof (*tv)) {
918 error = EINVAL;
919 goto bad;
920 }
921 tv = mtod(m, struct timeval *);
922 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
923 error = EDOM;
924 goto bad;
925 }
926 val = tv->tv_sec * hz + tv->tv_usec / tick;
927
928 switch (optname) {
929
930 case SO_SNDTIMEO:
931 so->so_snd.sb_timeo = val;
932 break;
933 case SO_RCVTIMEO:
934 so->so_rcv.sb_timeo = val;
935 break;
936 }
937 break;
938 }
939
940 default:
941 error = ENOPROTOOPT;
942 break;
943 }
944 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
945 (void) ((*so->so_proto->pr_ctloutput)
946 (PRCO_SETOPT, so, level, optname, &m0));
947 m = NULL; /* freed by protocol */
948 }
949 }
950 bad:
951 if (m)
952 (void) m_free(m);
953 return (error);
954 }
955
956 int
957 sogetopt(so, level, optname, mp)
958 register struct socket *so;
959 int level, optname;
960 struct mbuf **mp;
961 {
962 register struct mbuf *m;
963
964 if (level != SOL_SOCKET) {
965 if (so->so_proto && so->so_proto->pr_ctloutput) {
966 return ((*so->so_proto->pr_ctloutput)
967 (PRCO_GETOPT, so, level, optname, mp));
968 } else
969 return (ENOPROTOOPT);
970 } else {
971 m = m_get(M_WAIT, MT_SOOPTS);
972 m->m_len = sizeof (int);
973
974 switch (optname) {
975
976 case SO_LINGER:
977 m->m_len = sizeof (struct linger);
978 mtod(m, struct linger *)->l_onoff =
979 so->so_options & SO_LINGER;
980 mtod(m, struct linger *)->l_linger = so->so_linger;
981 break;
982
983 case SO_USELOOPBACK:
984 case SO_DONTROUTE:
985 case SO_DEBUG:
986 case SO_KEEPALIVE:
987 case SO_REUSEADDR:
988 case SO_REUSEPORT:
989 case SO_BROADCAST:
990 case SO_OOBINLINE:
991 *mtod(m, int *) = so->so_options & optname;
992 break;
993
994 case SO_TYPE:
995 *mtod(m, int *) = so->so_type;
996 break;
997
998 case SO_ERROR:
999 *mtod(m, int *) = so->so_error;
1000 so->so_error = 0;
1001 break;
1002
1003 case SO_SNDBUF:
1004 *mtod(m, int *) = so->so_snd.sb_hiwat;
1005 break;
1006
1007 case SO_RCVBUF:
1008 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1009 break;
1010
1011 case SO_SNDLOWAT:
1012 *mtod(m, int *) = so->so_snd.sb_lowat;
1013 break;
1014
1015 case SO_RCVLOWAT:
1016 *mtod(m, int *) = so->so_rcv.sb_lowat;
1017 break;
1018
1019 case SO_SNDTIMEO:
1020 case SO_RCVTIMEO:
1021 {
1022 int val = (optname == SO_SNDTIMEO ?
1023 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1024
1025 m->m_len = sizeof(struct timeval);
1026 mtod(m, struct timeval *)->tv_sec = val / hz;
1027 mtod(m, struct timeval *)->tv_usec =
1028 (val % hz) / tick;
1029 break;
1030 }
1031
1032 default:
1033 (void)m_free(m);
1034 return (ENOPROTOOPT);
1035 }
1036 *mp = m;
1037 return (0);
1038 }
1039 }
1040
1041 void
1042 sohasoutofband(so)
1043 register struct socket *so;
1044 {
1045 struct proc *p;
1046
1047 if (so->so_pgid < 0)
1048 gsignal(-so->so_pgid, SIGURG);
1049 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1050 psignal(p, SIGURG);
1051 selwakeup(&so->so_rcv.sb_sel);
1052 }
1053