uipc_socket.c revision 1.1.1.2 1 /*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/proc.h>
39 #include <sys/file.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/domain.h>
43 #include <sys/kernel.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/resourcevar.h>
48
49 /*
50 * Socket operation routines.
51 * These routines are called by the routines in
52 * sys_socket.c or from a system process, and
53 * implement the semantics of socket operations by
54 * switching out to the protocol specific routines.
55 */
56 /*ARGSUSED*/
57 socreate(dom, aso, type, proto)
58 int dom;
59 struct socket **aso;
60 register int type;
61 int proto;
62 {
63 struct proc *p = curproc; /* XXX */
64 register struct protosw *prp;
65 register struct socket *so;
66 register int error;
67
68 if (proto)
69 prp = pffindproto(dom, proto, type);
70 else
71 prp = pffindtype(dom, type);
72 if (prp == 0 || prp->pr_usrreq == 0)
73 return (EPROTONOSUPPORT);
74 if (prp->pr_type != type)
75 return (EPROTOTYPE);
76 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
77 bzero((caddr_t)so, sizeof(*so));
78 so->so_type = type;
79 if (p->p_ucred->cr_uid == 0)
80 so->so_state = SS_PRIV;
81 so->so_proto = prp;
82 error =
83 (*prp->pr_usrreq)(so, PRU_ATTACH,
84 (struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0);
85 if (error) {
86 so->so_state |= SS_NOFDREF;
87 sofree(so);
88 return (error);
89 }
90 *aso = so;
91 return (0);
92 }
93
94 sobind(so, nam)
95 struct socket *so;
96 struct mbuf *nam;
97 {
98 int s = splnet();
99 int error;
100
101 error =
102 (*so->so_proto->pr_usrreq)(so, PRU_BIND,
103 (struct mbuf *)0, nam, (struct mbuf *)0);
104 splx(s);
105 return (error);
106 }
107
108 solisten(so, backlog)
109 register struct socket *so;
110 int backlog;
111 {
112 int s = splnet(), error;
113
114 error =
115 (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
116 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
117 if (error) {
118 splx(s);
119 return (error);
120 }
121 if (so->so_q == 0)
122 so->so_options |= SO_ACCEPTCONN;
123 if (backlog < 0)
124 backlog = 0;
125 so->so_qlimit = min(backlog, SOMAXCONN);
126 splx(s);
127 return (0);
128 }
129
130 sofree(so)
131 register struct socket *so;
132 {
133
134 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
135 return;
136 if (so->so_head) {
137 if (!soqremque(so, 0) && !soqremque(so, 1))
138 panic("sofree dq");
139 so->so_head = 0;
140 }
141 sbrelease(&so->so_snd);
142 sorflush(so);
143 FREE(so, M_SOCKET);
144 }
145
146 /*
147 * Close a socket on last file table reference removal.
148 * Initiate disconnect if connected.
149 * Free socket when disconnect complete.
150 */
151 soclose(so)
152 register struct socket *so;
153 {
154 int s = splnet(); /* conservative */
155 int error = 0;
156
157 if (so->so_options & SO_ACCEPTCONN) {
158 while (so->so_q0)
159 (void) soabort(so->so_q0);
160 while (so->so_q)
161 (void) soabort(so->so_q);
162 }
163 if (so->so_pcb == 0)
164 goto discard;
165 if (so->so_state & SS_ISCONNECTED) {
166 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
167 error = sodisconnect(so);
168 if (error)
169 goto drop;
170 }
171 if (so->so_options & SO_LINGER) {
172 if ((so->so_state & SS_ISDISCONNECTING) &&
173 (so->so_state & SS_NBIO))
174 goto drop;
175 while (so->so_state & SS_ISCONNECTED)
176 if (error = tsleep((caddr_t)&so->so_timeo,
177 PSOCK | PCATCH, netcls, so->so_linger))
178 break;
179 }
180 }
181 drop:
182 if (so->so_pcb) {
183 int error2 =
184 (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
185 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
186 if (error == 0)
187 error = error2;
188 }
189 discard:
190 if (so->so_state & SS_NOFDREF)
191 panic("soclose: NOFDREF");
192 so->so_state |= SS_NOFDREF;
193 sofree(so);
194 splx(s);
195 return (error);
196 }
197
198 /*
199 * Must be called at splnet...
200 */
201 soabort(so)
202 struct socket *so;
203 {
204
205 return (
206 (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
207 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
208 }
209
210 soaccept(so, nam)
211 register struct socket *so;
212 struct mbuf *nam;
213 {
214 int s = splnet();
215 int error;
216
217 if ((so->so_state & SS_NOFDREF) == 0)
218 panic("soaccept: !NOFDREF");
219 so->so_state &= ~SS_NOFDREF;
220 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
221 (struct mbuf *)0, nam, (struct mbuf *)0);
222 splx(s);
223 return (error);
224 }
225
226 soconnect(so, nam)
227 register struct socket *so;
228 struct mbuf *nam;
229 {
230 int s;
231 int error;
232
233 if (so->so_options & SO_ACCEPTCONN)
234 return (EOPNOTSUPP);
235 s = splnet();
236 /*
237 * If protocol is connection-based, can only connect once.
238 * Otherwise, if connected, try to disconnect first.
239 * This allows user to disconnect by connecting to, e.g.,
240 * a null address.
241 */
242 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
243 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
244 (error = sodisconnect(so))))
245 error = EISCONN;
246 else
247 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
248 (struct mbuf *)0, nam, (struct mbuf *)0);
249 splx(s);
250 return (error);
251 }
252
253 soconnect2(so1, so2)
254 register struct socket *so1;
255 struct socket *so2;
256 {
257 int s = splnet();
258 int error;
259
260 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
261 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
262 splx(s);
263 return (error);
264 }
265
266 sodisconnect(so)
267 register struct socket *so;
268 {
269 int s = splnet();
270 int error;
271
272 if ((so->so_state & SS_ISCONNECTED) == 0) {
273 error = ENOTCONN;
274 goto bad;
275 }
276 if (so->so_state & SS_ISDISCONNECTING) {
277 error = EALREADY;
278 goto bad;
279 }
280 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
281 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
282 bad:
283 splx(s);
284 return (error);
285 }
286
287 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
288 /*
289 * Send on a socket.
290 * If send must go all at once and message is larger than
291 * send buffering, then hard error.
292 * Lock against other senders.
293 * If must go all at once and not enough room now, then
294 * inform user that this would block and do nothing.
295 * Otherwise, if nonblocking, send as much as possible.
296 * The data to be sent is described by "uio" if nonzero,
297 * otherwise by the mbuf chain "top" (which must be null
298 * if uio is not). Data provided in mbuf chain must be small
299 * enough to send all at once.
300 *
301 * Returns nonzero on error, timeout or signal; callers
302 * must check for short counts if EINTR/ERESTART are returned.
303 * Data and control buffers are freed on return.
304 */
305 sosend(so, addr, uio, top, control, flags)
306 register struct socket *so;
307 struct mbuf *addr;
308 struct uio *uio;
309 struct mbuf *top;
310 struct mbuf *control;
311 int flags;
312 {
313 struct proc *p = curproc; /* XXX */
314 struct mbuf **mp;
315 register struct mbuf *m;
316 register long space, len, resid;
317 int clen = 0, error, s, dontroute, mlen;
318 int atomic = sosendallatonce(so) || top;
319
320 if (uio)
321 resid = uio->uio_resid;
322 else
323 resid = top->m_pkthdr.len;
324 /*
325 * In theory resid should be unsigned.
326 * However, space must be signed, as it might be less than 0
327 * if we over-committed, and we must use a signed comparison
328 * of space and resid. On the other hand, a negative resid
329 * causes us to loop sending 0-length segments to the protocol.
330 */
331 if (resid < 0)
332 return (EINVAL);
333 dontroute =
334 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
335 (so->so_proto->pr_flags & PR_ATOMIC);
336 p->p_stats->p_ru.ru_msgsnd++;
337 if (control)
338 clen = control->m_len;
339 #define snderr(errno) { error = errno; splx(s); goto release; }
340
341 restart:
342 if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
343 goto out;
344 do {
345 s = splnet();
346 if (so->so_state & SS_CANTSENDMORE)
347 snderr(EPIPE);
348 if (so->so_error)
349 snderr(so->so_error);
350 if ((so->so_state & SS_ISCONNECTED) == 0) {
351 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
352 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
353 !(resid == 0 && clen != 0))
354 snderr(ENOTCONN);
355 } else if (addr == 0)
356 snderr(EDESTADDRREQ);
357 }
358 space = sbspace(&so->so_snd);
359 if (flags & MSG_OOB)
360 space += 1024;
361 if (atomic && resid > so->so_snd.sb_hiwat ||
362 clen > so->so_snd.sb_hiwat)
363 snderr(EMSGSIZE);
364 if (space < resid + clen && uio &&
365 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
366 if (so->so_state & SS_NBIO)
367 snderr(EWOULDBLOCK);
368 sbunlock(&so->so_snd);
369 error = sbwait(&so->so_snd);
370 splx(s);
371 if (error)
372 goto out;
373 goto restart;
374 }
375 splx(s);
376 mp = ⊤
377 space -= clen;
378 do {
379 if (uio == NULL) {
380 /*
381 * Data is prepackaged in "top".
382 */
383 resid = 0;
384 if (flags & MSG_EOR)
385 top->m_flags |= M_EOR;
386 } else do {
387 if (top == 0) {
388 MGETHDR(m, M_WAIT, MT_DATA);
389 mlen = MHLEN;
390 m->m_pkthdr.len = 0;
391 m->m_pkthdr.rcvif = (struct ifnet *)0;
392 } else {
393 MGET(m, M_WAIT, MT_DATA);
394 mlen = MLEN;
395 }
396 if (resid >= MINCLSIZE && space >= MCLBYTES) {
397 MCLGET(m, M_WAIT);
398 if ((m->m_flags & M_EXT) == 0)
399 goto nopages;
400 mlen = MCLBYTES;
401 #ifdef MAPPED_MBUFS
402 len = min(MCLBYTES, resid);
403 #else
404 if (atomic && top == 0) {
405 len = min(MCLBYTES - max_hdr, resid);
406 m->m_data += max_hdr;
407 } else
408 len = min(MCLBYTES, resid);
409 #endif
410 space -= MCLBYTES;
411 } else {
412 nopages:
413 len = min(min(mlen, resid), space);
414 space -= len;
415 /*
416 * For datagram protocols, leave room
417 * for protocol headers in first mbuf.
418 */
419 if (atomic && top == 0 && len < mlen)
420 MH_ALIGN(m, len);
421 }
422 error = uiomove(mtod(m, caddr_t), (int)len, uio);
423 resid = uio->uio_resid;
424 m->m_len = len;
425 *mp = m;
426 top->m_pkthdr.len += len;
427 if (error)
428 goto release;
429 mp = &m->m_next;
430 if (resid <= 0) {
431 if (flags & MSG_EOR)
432 top->m_flags |= M_EOR;
433 break;
434 }
435 } while (space > 0 && atomic);
436 if (dontroute)
437 so->so_options |= SO_DONTROUTE;
438 s = splnet(); /* XXX */
439 error = (*so->so_proto->pr_usrreq)(so,
440 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
441 top, addr, control);
442 splx(s);
443 if (dontroute)
444 so->so_options &= ~SO_DONTROUTE;
445 clen = 0;
446 control = 0;
447 top = 0;
448 mp = ⊤
449 if (error)
450 goto release;
451 } while (resid && space > 0);
452 } while (resid);
453
454 release:
455 sbunlock(&so->so_snd);
456 out:
457 if (top)
458 m_freem(top);
459 if (control)
460 m_freem(control);
461 return (error);
462 }
463
464 /*
465 * Implement receive operations on a socket.
466 * We depend on the way that records are added to the sockbuf
467 * by sbappend*. In particular, each record (mbufs linked through m_next)
468 * must begin with an address if the protocol so specifies,
469 * followed by an optional mbuf or mbufs containing ancillary data,
470 * and then zero or more mbufs of data.
471 * In order to avoid blocking network interrupts for the entire time here,
472 * we splx() while doing the actual copy to user space.
473 * Although the sockbuf is locked, new data may still be appended,
474 * and thus we must maintain consistency of the sockbuf during that time.
475 *
476 * The caller may receive the data as a single mbuf chain by supplying
477 * an mbuf **mp0 for use in returning the chain. The uio is then used
478 * only for the count in uio_resid.
479 */
480 soreceive(so, paddr, uio, mp0, controlp, flagsp)
481 register struct socket *so;
482 struct mbuf **paddr;
483 struct uio *uio;
484 struct mbuf **mp0;
485 struct mbuf **controlp;
486 int *flagsp;
487 {
488 register struct mbuf *m, **mp;
489 register int flags, len, error, s, offset;
490 struct protosw *pr = so->so_proto;
491 struct mbuf *nextrecord;
492 int moff, type;
493 int orig_resid = uio->uio_resid;
494
495 mp = mp0;
496 if (paddr)
497 *paddr = 0;
498 if (controlp)
499 *controlp = 0;
500 if (flagsp)
501 flags = *flagsp &~ MSG_EOR;
502 else
503 flags = 0;
504 if (flags & MSG_OOB) {
505 m = m_get(M_WAIT, MT_DATA);
506 error = (*pr->pr_usrreq)(so, PRU_RCVOOB,
507 m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0);
508 if (error)
509 goto bad;
510 do {
511 error = uiomove(mtod(m, caddr_t),
512 (int) min(uio->uio_resid, m->m_len), uio);
513 m = m_free(m);
514 } while (uio->uio_resid && error == 0 && m);
515 bad:
516 if (m)
517 m_freem(m);
518 return (error);
519 }
520 if (mp)
521 *mp = (struct mbuf *)0;
522 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
523 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
524 (struct mbuf *)0, (struct mbuf *)0);
525
526 restart:
527 if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
528 return (error);
529 s = splnet();
530
531 m = so->so_rcv.sb_mb;
532 /*
533 * If we have less data than requested, block awaiting more
534 * (subject to any timeout) if:
535 * 1. the current count is less than the low water mark, or
536 * 2. MSG_WAITALL is set, and it is possible to do the entire
537 * receive operation at once if we block (resid <= hiwat).
538 * 3. MSG_DONTWAIT is not set
539 * If MSG_WAITALL is set but resid is larger than the receive buffer,
540 * we have to do the receive in sections, and thus risk returning
541 * a short count if a timeout or signal occurs after we start.
542 */
543 if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
544 so->so_rcv.sb_cc < uio->uio_resid) &&
545 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
546 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
547 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0) {
548 #ifdef DIAGNOSTIC
549 if (m == 0 && so->so_rcv.sb_cc)
550 panic("receive 1");
551 #endif
552 if (so->so_error) {
553 if (m)
554 goto dontblock;
555 error = so->so_error;
556 if ((flags & MSG_PEEK) == 0)
557 so->so_error = 0;
558 goto release;
559 }
560 if (so->so_state & SS_CANTRCVMORE) {
561 if (m)
562 goto dontblock;
563 else
564 goto release;
565 }
566 for (; m; m = m->m_next)
567 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
568 m = so->so_rcv.sb_mb;
569 goto dontblock;
570 }
571 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
572 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
573 error = ENOTCONN;
574 goto release;
575 }
576 if (uio->uio_resid == 0)
577 goto release;
578 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
579 error = EWOULDBLOCK;
580 goto release;
581 }
582 sbunlock(&so->so_rcv);
583 error = sbwait(&so->so_rcv);
584 splx(s);
585 if (error)
586 return (error);
587 goto restart;
588 }
589 dontblock:
590 if (uio->uio_procp)
591 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
592 nextrecord = m->m_nextpkt;
593 if (pr->pr_flags & PR_ADDR) {
594 #ifdef DIAGNOSTIC
595 if (m->m_type != MT_SONAME)
596 panic("receive 1a");
597 #endif
598 orig_resid = 0;
599 if (flags & MSG_PEEK) {
600 if (paddr)
601 *paddr = m_copy(m, 0, m->m_len);
602 m = m->m_next;
603 } else {
604 sbfree(&so->so_rcv, m);
605 if (paddr) {
606 *paddr = m;
607 so->so_rcv.sb_mb = m->m_next;
608 m->m_next = 0;
609 m = so->so_rcv.sb_mb;
610 } else {
611 MFREE(m, so->so_rcv.sb_mb);
612 m = so->so_rcv.sb_mb;
613 }
614 }
615 }
616 while (m && m->m_type == MT_CONTROL && error == 0) {
617 if (flags & MSG_PEEK) {
618 if (controlp)
619 *controlp = m_copy(m, 0, m->m_len);
620 m = m->m_next;
621 } else {
622 sbfree(&so->so_rcv, m);
623 if (controlp) {
624 if (pr->pr_domain->dom_externalize &&
625 mtod(m, struct cmsghdr *)->cmsg_type ==
626 SCM_RIGHTS)
627 error = (*pr->pr_domain->dom_externalize)(m);
628 *controlp = m;
629 so->so_rcv.sb_mb = m->m_next;
630 m->m_next = 0;
631 m = so->so_rcv.sb_mb;
632 } else {
633 MFREE(m, so->so_rcv.sb_mb);
634 m = so->so_rcv.sb_mb;
635 }
636 }
637 if (controlp) {
638 orig_resid = 0;
639 controlp = &(*controlp)->m_next;
640 }
641 }
642 if (m) {
643 if ((flags & MSG_PEEK) == 0)
644 m->m_nextpkt = nextrecord;
645 type = m->m_type;
646 if (type == MT_OOBDATA)
647 flags |= MSG_OOB;
648 }
649 moff = 0;
650 offset = 0;
651 while (m && uio->uio_resid > 0 && error == 0) {
652 if (m->m_type == MT_OOBDATA) {
653 if (type != MT_OOBDATA)
654 break;
655 } else if (type == MT_OOBDATA)
656 break;
657 #ifdef DIAGNOSTIC
658 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
659 panic("receive 3");
660 #endif
661 so->so_state &= ~SS_RCVATMARK;
662 len = uio->uio_resid;
663 if (so->so_oobmark && len > so->so_oobmark - offset)
664 len = so->so_oobmark - offset;
665 if (len > m->m_len - moff)
666 len = m->m_len - moff;
667 /*
668 * If mp is set, just pass back the mbufs.
669 * Otherwise copy them out via the uio, then free.
670 * Sockbuf must be consistent here (points to current mbuf,
671 * it points to next record) when we drop priority;
672 * we must note any additions to the sockbuf when we
673 * block interrupts again.
674 */
675 if (mp == 0) {
676 splx(s);
677 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
678 s = splnet();
679 } else
680 uio->uio_resid -= len;
681 if (len == m->m_len - moff) {
682 if (m->m_flags & M_EOR)
683 flags |= MSG_EOR;
684 if (flags & MSG_PEEK) {
685 m = m->m_next;
686 moff = 0;
687 } else {
688 nextrecord = m->m_nextpkt;
689 sbfree(&so->so_rcv, m);
690 if (mp) {
691 *mp = m;
692 mp = &m->m_next;
693 so->so_rcv.sb_mb = m = m->m_next;
694 *mp = (struct mbuf *)0;
695 } else {
696 MFREE(m, so->so_rcv.sb_mb);
697 m = so->so_rcv.sb_mb;
698 }
699 if (m)
700 m->m_nextpkt = nextrecord;
701 }
702 } else {
703 if (flags & MSG_PEEK)
704 moff += len;
705 else {
706 if (mp)
707 *mp = m_copym(m, 0, len, M_WAIT);
708 m->m_data += len;
709 m->m_len -= len;
710 so->so_rcv.sb_cc -= len;
711 }
712 }
713 if (so->so_oobmark) {
714 if ((flags & MSG_PEEK) == 0) {
715 so->so_oobmark -= len;
716 if (so->so_oobmark == 0) {
717 so->so_state |= SS_RCVATMARK;
718 break;
719 }
720 } else {
721 offset += len;
722 if (offset == so->so_oobmark)
723 break;
724 }
725 }
726 if (flags & MSG_EOR)
727 break;
728 /*
729 * If the MSG_WAITALL flag is set (for non-atomic socket),
730 * we must not quit until "uio->uio_resid == 0" or an error
731 * termination. If a signal/timeout occurs, return
732 * with a short count but without error.
733 * Keep sockbuf locked against other readers.
734 */
735 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
736 !sosendallatonce(so) && !nextrecord) {
737 if (so->so_error || so->so_state & SS_CANTRCVMORE)
738 break;
739 error = sbwait(&so->so_rcv);
740 if (error) {
741 sbunlock(&so->so_rcv);
742 splx(s);
743 return (0);
744 }
745 if (m = so->so_rcv.sb_mb)
746 nextrecord = m->m_nextpkt;
747 }
748 }
749
750 if (m && pr->pr_flags & PR_ATOMIC) {
751 flags |= MSG_TRUNC;
752 if ((flags & MSG_PEEK) == 0)
753 (void) sbdroprecord(&so->so_rcv);
754 }
755 if ((flags & MSG_PEEK) == 0) {
756 if (m == 0)
757 so->so_rcv.sb_mb = nextrecord;
758 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
759 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
760 (struct mbuf *)flags, (struct mbuf *)0,
761 (struct mbuf *)0);
762 }
763 if (orig_resid == uio->uio_resid && orig_resid &&
764 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
765 sbunlock(&so->so_rcv);
766 splx(s);
767 goto restart;
768 }
769
770 if (flagsp)
771 *flagsp |= flags;
772 release:
773 sbunlock(&so->so_rcv);
774 splx(s);
775 return (error);
776 }
777
778 soshutdown(so, how)
779 register struct socket *so;
780 register int how;
781 {
782 register struct protosw *pr = so->so_proto;
783
784 how++;
785 if (how & FREAD)
786 sorflush(so);
787 if (how & FWRITE)
788 return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
789 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
790 return (0);
791 }
792
793 sorflush(so)
794 register struct socket *so;
795 {
796 register struct sockbuf *sb = &so->so_rcv;
797 register struct protosw *pr = so->so_proto;
798 register int s;
799 struct sockbuf asb;
800
801 sb->sb_flags |= SB_NOINTR;
802 (void) sblock(sb, M_WAITOK);
803 s = splimp();
804 socantrcvmore(so);
805 sbunlock(sb);
806 asb = *sb;
807 bzero((caddr_t)sb, sizeof (*sb));
808 splx(s);
809 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
810 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
811 sbrelease(&asb);
812 }
813
814 sosetopt(so, level, optname, m0)
815 register struct socket *so;
816 int level, optname;
817 struct mbuf *m0;
818 {
819 int error = 0;
820 register struct mbuf *m = m0;
821
822 if (level != SOL_SOCKET) {
823 if (so->so_proto && so->so_proto->pr_ctloutput)
824 return ((*so->so_proto->pr_ctloutput)
825 (PRCO_SETOPT, so, level, optname, &m0));
826 error = ENOPROTOOPT;
827 } else {
828 switch (optname) {
829
830 case SO_LINGER:
831 if (m == NULL || m->m_len != sizeof (struct linger)) {
832 error = EINVAL;
833 goto bad;
834 }
835 so->so_linger = mtod(m, struct linger *)->l_linger;
836 /* fall thru... */
837
838 case SO_DEBUG:
839 case SO_KEEPALIVE:
840 case SO_DONTROUTE:
841 case SO_USELOOPBACK:
842 case SO_BROADCAST:
843 case SO_REUSEADDR:
844 case SO_REUSEPORT:
845 case SO_OOBINLINE:
846 if (m == NULL || m->m_len < sizeof (int)) {
847 error = EINVAL;
848 goto bad;
849 }
850 if (*mtod(m, int *))
851 so->so_options |= optname;
852 else
853 so->so_options &= ~optname;
854 break;
855
856 case SO_SNDBUF:
857 case SO_RCVBUF:
858 case SO_SNDLOWAT:
859 case SO_RCVLOWAT:
860 if (m == NULL || m->m_len < sizeof (int)) {
861 error = EINVAL;
862 goto bad;
863 }
864 switch (optname) {
865
866 case SO_SNDBUF:
867 case SO_RCVBUF:
868 if (sbreserve(optname == SO_SNDBUF ?
869 &so->so_snd : &so->so_rcv,
870 (u_long) *mtod(m, int *)) == 0) {
871 error = ENOBUFS;
872 goto bad;
873 }
874 break;
875
876 case SO_SNDLOWAT:
877 so->so_snd.sb_lowat = *mtod(m, int *);
878 break;
879 case SO_RCVLOWAT:
880 so->so_rcv.sb_lowat = *mtod(m, int *);
881 break;
882 }
883 break;
884
885 case SO_SNDTIMEO:
886 case SO_RCVTIMEO:
887 {
888 struct timeval *tv;
889 short val;
890
891 if (m == NULL || m->m_len < sizeof (*tv)) {
892 error = EINVAL;
893 goto bad;
894 }
895 tv = mtod(m, struct timeval *);
896 if (tv->tv_sec > SHRT_MAX / hz - hz) {
897 error = EDOM;
898 goto bad;
899 }
900 val = tv->tv_sec * hz + tv->tv_usec / tick;
901
902 switch (optname) {
903
904 case SO_SNDTIMEO:
905 so->so_snd.sb_timeo = val;
906 break;
907 case SO_RCVTIMEO:
908 so->so_rcv.sb_timeo = val;
909 break;
910 }
911 break;
912 }
913
914 default:
915 error = ENOPROTOOPT;
916 break;
917 }
918 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
919 (void) ((*so->so_proto->pr_ctloutput)
920 (PRCO_SETOPT, so, level, optname, &m0));
921 m = NULL; /* freed by protocol */
922 }
923 }
924 bad:
925 if (m)
926 (void) m_free(m);
927 return (error);
928 }
929
930 sogetopt(so, level, optname, mp)
931 register struct socket *so;
932 int level, optname;
933 struct mbuf **mp;
934 {
935 register struct mbuf *m;
936
937 if (level != SOL_SOCKET) {
938 if (so->so_proto && so->so_proto->pr_ctloutput) {
939 return ((*so->so_proto->pr_ctloutput)
940 (PRCO_GETOPT, so, level, optname, mp));
941 } else
942 return (ENOPROTOOPT);
943 } else {
944 m = m_get(M_WAIT, MT_SOOPTS);
945 m->m_len = sizeof (int);
946
947 switch (optname) {
948
949 case SO_LINGER:
950 m->m_len = sizeof (struct linger);
951 mtod(m, struct linger *)->l_onoff =
952 so->so_options & SO_LINGER;
953 mtod(m, struct linger *)->l_linger = so->so_linger;
954 break;
955
956 case SO_USELOOPBACK:
957 case SO_DONTROUTE:
958 case SO_DEBUG:
959 case SO_KEEPALIVE:
960 case SO_REUSEADDR:
961 case SO_REUSEPORT:
962 case SO_BROADCAST:
963 case SO_OOBINLINE:
964 *mtod(m, int *) = so->so_options & optname;
965 break;
966
967 case SO_TYPE:
968 *mtod(m, int *) = so->so_type;
969 break;
970
971 case SO_ERROR:
972 *mtod(m, int *) = so->so_error;
973 so->so_error = 0;
974 break;
975
976 case SO_SNDBUF:
977 *mtod(m, int *) = so->so_snd.sb_hiwat;
978 break;
979
980 case SO_RCVBUF:
981 *mtod(m, int *) = so->so_rcv.sb_hiwat;
982 break;
983
984 case SO_SNDLOWAT:
985 *mtod(m, int *) = so->so_snd.sb_lowat;
986 break;
987
988 case SO_RCVLOWAT:
989 *mtod(m, int *) = so->so_rcv.sb_lowat;
990 break;
991
992 case SO_SNDTIMEO:
993 case SO_RCVTIMEO:
994 {
995 int val = (optname == SO_SNDTIMEO ?
996 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
997
998 m->m_len = sizeof(struct timeval);
999 mtod(m, struct timeval *)->tv_sec = val / hz;
1000 mtod(m, struct timeval *)->tv_usec =
1001 (val % hz) / tick;
1002 break;
1003 }
1004
1005 default:
1006 (void)m_free(m);
1007 return (ENOPROTOOPT);
1008 }
1009 *mp = m;
1010 return (0);
1011 }
1012 }
1013
1014 sohasoutofband(so)
1015 register struct socket *so;
1016 {
1017 struct proc *p;
1018
1019 if (so->so_pgid < 0)
1020 gsignal(-so->so_pgid, SIGURG);
1021 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1022 psignal(p, SIGURG);
1023 selwakeup(&so->so_rcv.sb_sel);
1024 }
1025