uipc_socket.c revision 1.1.1.3 1 /*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/proc.h>
39 #include <sys/file.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/domain.h>
43 #include <sys/kernel.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/resourcevar.h>
48
49 /*
50 * Socket operation routines.
51 * These routines are called by the routines in
52 * sys_socket.c or from a system process, and
53 * implement the semantics of socket operations by
54 * switching out to the protocol specific routines.
55 */
56 /*ARGSUSED*/
57 int
58 socreate(dom, aso, type, proto)
59 int dom;
60 struct socket **aso;
61 register int type;
62 int proto;
63 {
64 struct proc *p = curproc; /* XXX */
65 register struct protosw *prp;
66 register struct socket *so;
67 register int error;
68
69 if (proto)
70 prp = pffindproto(dom, proto, type);
71 else
72 prp = pffindtype(dom, type);
73 if (prp == 0 || prp->pr_usrreq == 0)
74 return (EPROTONOSUPPORT);
75 if (prp->pr_type != type)
76 return (EPROTOTYPE);
77 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
78 bzero((caddr_t)so, sizeof(*so));
79 so->so_type = type;
80 if (p->p_ucred->cr_uid == 0)
81 so->so_state = SS_PRIV;
82 so->so_proto = prp;
83 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
84 (struct mbuf *)(long)proto, (struct mbuf *)0);
85 if (error) {
86 so->so_state |= SS_NOFDREF;
87 sofree(so);
88 return (error);
89 }
90 *aso = so;
91 return (0);
92 }
93
94 int
95 sobind(so, nam)
96 struct socket *so;
97 struct mbuf *nam;
98 {
99 int s = splnet();
100 int error;
101
102 error =
103 (*so->so_proto->pr_usrreq)(so, PRU_BIND,
104 (struct mbuf *)0, nam, (struct mbuf *)0);
105 splx(s);
106 return (error);
107 }
108
109 int
110 solisten(so, backlog)
111 register struct socket *so;
112 int backlog;
113 {
114 int s = splnet(), error;
115
116 error =
117 (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
118 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
119 if (error) {
120 splx(s);
121 return (error);
122 }
123 if (so->so_q == 0)
124 so->so_options |= SO_ACCEPTCONN;
125 if (backlog < 0)
126 backlog = 0;
127 so->so_qlimit = min(backlog, SOMAXCONN);
128 splx(s);
129 return (0);
130 }
131
132 int
133 sofree(so)
134 register struct socket *so;
135 {
136
137 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
138 return;
139 if (so->so_head) {
140 if (!soqremque(so, 0) && !soqremque(so, 1))
141 panic("sofree dq");
142 so->so_head = 0;
143 }
144 sbrelease(&so->so_snd);
145 sorflush(so);
146 FREE(so, M_SOCKET);
147 }
148
149 /*
150 * Close a socket on last file table reference removal.
151 * Initiate disconnect if connected.
152 * Free socket when disconnect complete.
153 */
154 int
155 soclose(so)
156 register struct socket *so;
157 {
158 int s = splnet(); /* conservative */
159 int error = 0;
160
161 if (so->so_options & SO_ACCEPTCONN) {
162 while (so->so_q0)
163 (void) soabort(so->so_q0);
164 while (so->so_q)
165 (void) soabort(so->so_q);
166 }
167 if (so->so_pcb == 0)
168 goto discard;
169 if (so->so_state & SS_ISCONNECTED) {
170 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
171 error = sodisconnect(so);
172 if (error)
173 goto drop;
174 }
175 if (so->so_options & SO_LINGER) {
176 if ((so->so_state & SS_ISDISCONNECTING) &&
177 (so->so_state & SS_NBIO))
178 goto drop;
179 while (so->so_state & SS_ISCONNECTED)
180 if (error = tsleep((caddr_t)&so->so_timeo,
181 PSOCK | PCATCH, netcls, so->so_linger * hz))
182 break;
183 }
184 }
185 drop:
186 if (so->so_pcb) {
187 int error2 =
188 (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
189 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
190 if (error == 0)
191 error = error2;
192 }
193 discard:
194 if (so->so_state & SS_NOFDREF)
195 panic("soclose: NOFDREF");
196 so->so_state |= SS_NOFDREF;
197 sofree(so);
198 splx(s);
199 return (error);
200 }
201
202 /*
203 * Must be called at splnet...
204 */
205 int
206 soabort(so)
207 struct socket *so;
208 {
209
210 return (
211 (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
212 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
213 }
214
215 int
216 soaccept(so, nam)
217 register struct socket *so;
218 struct mbuf *nam;
219 {
220 int s = splnet();
221 int error;
222
223 if ((so->so_state & SS_NOFDREF) == 0)
224 panic("soaccept: !NOFDREF");
225 so->so_state &= ~SS_NOFDREF;
226 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
227 (struct mbuf *)0, nam, (struct mbuf *)0);
228 splx(s);
229 return (error);
230 }
231
232 int
233 soconnect(so, nam)
234 register struct socket *so;
235 struct mbuf *nam;
236 {
237 int s;
238 int error;
239
240 if (so->so_options & SO_ACCEPTCONN)
241 return (EOPNOTSUPP);
242 s = splnet();
243 /*
244 * If protocol is connection-based, can only connect once.
245 * Otherwise, if connected, try to disconnect first.
246 * This allows user to disconnect by connecting to, e.g.,
247 * a null address.
248 */
249 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
250 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
251 (error = sodisconnect(so))))
252 error = EISCONN;
253 else
254 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
255 (struct mbuf *)0, nam, (struct mbuf *)0);
256 splx(s);
257 return (error);
258 }
259
260 int
261 soconnect2(so1, so2)
262 register struct socket *so1;
263 struct socket *so2;
264 {
265 int s = splnet();
266 int error;
267
268 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
269 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
270 splx(s);
271 return (error);
272 }
273
274 int
275 sodisconnect(so)
276 register struct socket *so;
277 {
278 int s = splnet();
279 int error;
280
281 if ((so->so_state & SS_ISCONNECTED) == 0) {
282 error = ENOTCONN;
283 goto bad;
284 }
285 if (so->so_state & SS_ISDISCONNECTING) {
286 error = EALREADY;
287 goto bad;
288 }
289 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
290 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
291 bad:
292 splx(s);
293 return (error);
294 }
295
296 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
297 /*
298 * Send on a socket.
299 * If send must go all at once and message is larger than
300 * send buffering, then hard error.
301 * Lock against other senders.
302 * If must go all at once and not enough room now, then
303 * inform user that this would block and do nothing.
304 * Otherwise, if nonblocking, send as much as possible.
305 * The data to be sent is described by "uio" if nonzero,
306 * otherwise by the mbuf chain "top" (which must be null
307 * if uio is not). Data provided in mbuf chain must be small
308 * enough to send all at once.
309 *
310 * Returns nonzero on error, timeout or signal; callers
311 * must check for short counts if EINTR/ERESTART are returned.
312 * Data and control buffers are freed on return.
313 */
314 int
315 sosend(so, addr, uio, top, control, flags)
316 register struct socket *so;
317 struct mbuf *addr;
318 struct uio *uio;
319 struct mbuf *top;
320 struct mbuf *control;
321 int flags;
322 {
323 struct proc *p = curproc; /* XXX */
324 struct mbuf **mp;
325 register struct mbuf *m;
326 register long space, len, resid;
327 int clen = 0, error, s, dontroute, mlen;
328 int atomic = sosendallatonce(so) || top;
329
330 if (uio)
331 resid = uio->uio_resid;
332 else
333 resid = top->m_pkthdr.len;
334 /*
335 * In theory resid should be unsigned.
336 * However, space must be signed, as it might be less than 0
337 * if we over-committed, and we must use a signed comparison
338 * of space and resid. On the other hand, a negative resid
339 * causes us to loop sending 0-length segments to the protocol.
340 */
341 if (resid < 0)
342 return (EINVAL);
343 dontroute =
344 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
345 (so->so_proto->pr_flags & PR_ATOMIC);
346 p->p_stats->p_ru.ru_msgsnd++;
347 if (control)
348 clen = control->m_len;
349 #define snderr(errno) { error = errno; splx(s); goto release; }
350
351 restart:
352 if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
353 goto out;
354 do {
355 s = splnet();
356 if (so->so_state & SS_CANTSENDMORE)
357 snderr(EPIPE);
358 if (so->so_error)
359 snderr(so->so_error);
360 if ((so->so_state & SS_ISCONNECTED) == 0) {
361 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
362 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
363 !(resid == 0 && clen != 0))
364 snderr(ENOTCONN);
365 } else if (addr == 0)
366 snderr(EDESTADDRREQ);
367 }
368 space = sbspace(&so->so_snd);
369 if (flags & MSG_OOB)
370 space += 1024;
371 if (atomic && resid > so->so_snd.sb_hiwat ||
372 clen > so->so_snd.sb_hiwat)
373 snderr(EMSGSIZE);
374 if (space < resid + clen && uio &&
375 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
376 if (so->so_state & SS_NBIO)
377 snderr(EWOULDBLOCK);
378 sbunlock(&so->so_snd);
379 error = sbwait(&so->so_snd);
380 splx(s);
381 if (error)
382 goto out;
383 goto restart;
384 }
385 splx(s);
386 mp = ⊤
387 space -= clen;
388 do {
389 if (uio == NULL) {
390 /*
391 * Data is prepackaged in "top".
392 */
393 resid = 0;
394 if (flags & MSG_EOR)
395 top->m_flags |= M_EOR;
396 } else do {
397 if (top == 0) {
398 MGETHDR(m, M_WAIT, MT_DATA);
399 mlen = MHLEN;
400 m->m_pkthdr.len = 0;
401 m->m_pkthdr.rcvif = (struct ifnet *)0;
402 } else {
403 MGET(m, M_WAIT, MT_DATA);
404 mlen = MLEN;
405 }
406 if (resid >= MINCLSIZE && space >= MCLBYTES) {
407 MCLGET(m, M_WAIT);
408 if ((m->m_flags & M_EXT) == 0)
409 goto nopages;
410 mlen = MCLBYTES;
411 #ifdef MAPPED_MBUFS
412 len = min(MCLBYTES, resid);
413 #else
414 if (atomic && top == 0) {
415 len = min(MCLBYTES - max_hdr, resid);
416 m->m_data += max_hdr;
417 } else
418 len = min(MCLBYTES, resid);
419 #endif
420 space -= MCLBYTES;
421 } else {
422 nopages:
423 len = min(min(mlen, resid), space);
424 space -= len;
425 /*
426 * For datagram protocols, leave room
427 * for protocol headers in first mbuf.
428 */
429 if (atomic && top == 0 && len < mlen)
430 MH_ALIGN(m, len);
431 }
432 error = uiomove(mtod(m, caddr_t), (int)len, uio);
433 resid = uio->uio_resid;
434 m->m_len = len;
435 *mp = m;
436 top->m_pkthdr.len += len;
437 if (error)
438 goto release;
439 mp = &m->m_next;
440 if (resid <= 0) {
441 if (flags & MSG_EOR)
442 top->m_flags |= M_EOR;
443 break;
444 }
445 } while (space > 0 && atomic);
446 if (dontroute)
447 so->so_options |= SO_DONTROUTE;
448 s = splnet(); /* XXX */
449 error = (*so->so_proto->pr_usrreq)(so,
450 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
451 top, addr, control);
452 splx(s);
453 if (dontroute)
454 so->so_options &= ~SO_DONTROUTE;
455 clen = 0;
456 control = 0;
457 top = 0;
458 mp = ⊤
459 if (error)
460 goto release;
461 } while (resid && space > 0);
462 } while (resid);
463
464 release:
465 sbunlock(&so->so_snd);
466 out:
467 if (top)
468 m_freem(top);
469 if (control)
470 m_freem(control);
471 return (error);
472 }
473
474 /*
475 * Implement receive operations on a socket.
476 * We depend on the way that records are added to the sockbuf
477 * by sbappend*. In particular, each record (mbufs linked through m_next)
478 * must begin with an address if the protocol so specifies,
479 * followed by an optional mbuf or mbufs containing ancillary data,
480 * and then zero or more mbufs of data.
481 * In order to avoid blocking network interrupts for the entire time here,
482 * we splx() while doing the actual copy to user space.
483 * Although the sockbuf is locked, new data may still be appended,
484 * and thus we must maintain consistency of the sockbuf during that time.
485 *
486 * The caller may receive the data as a single mbuf chain by supplying
487 * an mbuf **mp0 for use in returning the chain. The uio is then used
488 * only for the count in uio_resid.
489 */
490 int
491 soreceive(so, paddr, uio, mp0, controlp, flagsp)
492 register struct socket *so;
493 struct mbuf **paddr;
494 struct uio *uio;
495 struct mbuf **mp0;
496 struct mbuf **controlp;
497 int *flagsp;
498 {
499 register struct mbuf *m, **mp;
500 register int flags, len, error, s, offset;
501 struct protosw *pr = so->so_proto;
502 struct mbuf *nextrecord;
503 int moff, type;
504 int orig_resid = uio->uio_resid;
505
506 mp = mp0;
507 if (paddr)
508 *paddr = 0;
509 if (controlp)
510 *controlp = 0;
511 if (flagsp)
512 flags = *flagsp &~ MSG_EOR;
513 else
514 flags = 0;
515 if (flags & MSG_OOB) {
516 m = m_get(M_WAIT, MT_DATA);
517 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
518 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0);
519 if (error)
520 goto bad;
521 do {
522 error = uiomove(mtod(m, caddr_t),
523 (int) min(uio->uio_resid, m->m_len), uio);
524 m = m_free(m);
525 } while (uio->uio_resid && error == 0 && m);
526 bad:
527 if (m)
528 m_freem(m);
529 return (error);
530 }
531 if (mp)
532 *mp = (struct mbuf *)0;
533 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
534 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
535 (struct mbuf *)0, (struct mbuf *)0);
536
537 restart:
538 if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
539 return (error);
540 s = splnet();
541
542 m = so->so_rcv.sb_mb;
543 /*
544 * If we have less data than requested, block awaiting more
545 * (subject to any timeout) if:
546 * 1. the current count is less than the low water mark, or
547 * 2. MSG_WAITALL is set, and it is possible to do the entire
548 * receive operation at once if we block (resid <= hiwat), or
549 * 3. MSG_DONTWAIT is not set.
550 * If MSG_WAITALL is set but resid is larger than the receive buffer,
551 * we have to do the receive in sections, and thus risk returning
552 * a short count if a timeout or signal occurs after we start.
553 */
554 if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
555 so->so_rcv.sb_cc < uio->uio_resid) &&
556 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
557 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
558 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0) {
559 #ifdef DIAGNOSTIC
560 if (m == 0 && so->so_rcv.sb_cc)
561 panic("receive 1");
562 #endif
563 if (so->so_error) {
564 if (m)
565 goto dontblock;
566 error = so->so_error;
567 if ((flags & MSG_PEEK) == 0)
568 so->so_error = 0;
569 goto release;
570 }
571 if (so->so_state & SS_CANTRCVMORE) {
572 if (m)
573 goto dontblock;
574 else
575 goto release;
576 }
577 for (; m; m = m->m_next)
578 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
579 m = so->so_rcv.sb_mb;
580 goto dontblock;
581 }
582 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
583 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
584 error = ENOTCONN;
585 goto release;
586 }
587 if (uio->uio_resid == 0)
588 goto release;
589 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
590 error = EWOULDBLOCK;
591 goto release;
592 }
593 sbunlock(&so->so_rcv);
594 error = sbwait(&so->so_rcv);
595 splx(s);
596 if (error)
597 return (error);
598 goto restart;
599 }
600 dontblock:
601 if (uio->uio_procp)
602 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
603 nextrecord = m->m_nextpkt;
604 if (pr->pr_flags & PR_ADDR) {
605 #ifdef DIAGNOSTIC
606 if (m->m_type != MT_SONAME)
607 panic("receive 1a");
608 #endif
609 orig_resid = 0;
610 if (flags & MSG_PEEK) {
611 if (paddr)
612 *paddr = m_copy(m, 0, m->m_len);
613 m = m->m_next;
614 } else {
615 sbfree(&so->so_rcv, m);
616 if (paddr) {
617 *paddr = m;
618 so->so_rcv.sb_mb = m->m_next;
619 m->m_next = 0;
620 m = so->so_rcv.sb_mb;
621 } else {
622 MFREE(m, so->so_rcv.sb_mb);
623 m = so->so_rcv.sb_mb;
624 }
625 }
626 }
627 while (m && m->m_type == MT_CONTROL && error == 0) {
628 if (flags & MSG_PEEK) {
629 if (controlp)
630 *controlp = m_copy(m, 0, m->m_len);
631 m = m->m_next;
632 } else {
633 sbfree(&so->so_rcv, m);
634 if (controlp) {
635 if (pr->pr_domain->dom_externalize &&
636 mtod(m, struct cmsghdr *)->cmsg_type ==
637 SCM_RIGHTS)
638 error = (*pr->pr_domain->dom_externalize)(m);
639 *controlp = m;
640 so->so_rcv.sb_mb = m->m_next;
641 m->m_next = 0;
642 m = so->so_rcv.sb_mb;
643 } else {
644 MFREE(m, so->so_rcv.sb_mb);
645 m = so->so_rcv.sb_mb;
646 }
647 }
648 if (controlp) {
649 orig_resid = 0;
650 controlp = &(*controlp)->m_next;
651 }
652 }
653 if (m) {
654 if ((flags & MSG_PEEK) == 0)
655 m->m_nextpkt = nextrecord;
656 type = m->m_type;
657 if (type == MT_OOBDATA)
658 flags |= MSG_OOB;
659 }
660 moff = 0;
661 offset = 0;
662 while (m && uio->uio_resid > 0 && error == 0) {
663 if (m->m_type == MT_OOBDATA) {
664 if (type != MT_OOBDATA)
665 break;
666 } else if (type == MT_OOBDATA)
667 break;
668 #ifdef DIAGNOSTIC
669 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
670 panic("receive 3");
671 #endif
672 so->so_state &= ~SS_RCVATMARK;
673 len = uio->uio_resid;
674 if (so->so_oobmark && len > so->so_oobmark - offset)
675 len = so->so_oobmark - offset;
676 if (len > m->m_len - moff)
677 len = m->m_len - moff;
678 /*
679 * If mp is set, just pass back the mbufs.
680 * Otherwise copy them out via the uio, then free.
681 * Sockbuf must be consistent here (points to current mbuf,
682 * it points to next record) when we drop priority;
683 * we must note any additions to the sockbuf when we
684 * block interrupts again.
685 */
686 if (mp == 0) {
687 splx(s);
688 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
689 s = splnet();
690 } else
691 uio->uio_resid -= len;
692 if (len == m->m_len - moff) {
693 if (m->m_flags & M_EOR)
694 flags |= MSG_EOR;
695 if (flags & MSG_PEEK) {
696 m = m->m_next;
697 moff = 0;
698 } else {
699 nextrecord = m->m_nextpkt;
700 sbfree(&so->so_rcv, m);
701 if (mp) {
702 *mp = m;
703 mp = &m->m_next;
704 so->so_rcv.sb_mb = m = m->m_next;
705 *mp = (struct mbuf *)0;
706 } else {
707 MFREE(m, so->so_rcv.sb_mb);
708 m = so->so_rcv.sb_mb;
709 }
710 if (m)
711 m->m_nextpkt = nextrecord;
712 }
713 } else {
714 if (flags & MSG_PEEK)
715 moff += len;
716 else {
717 if (mp)
718 *mp = m_copym(m, 0, len, M_WAIT);
719 m->m_data += len;
720 m->m_len -= len;
721 so->so_rcv.sb_cc -= len;
722 }
723 }
724 if (so->so_oobmark) {
725 if ((flags & MSG_PEEK) == 0) {
726 so->so_oobmark -= len;
727 if (so->so_oobmark == 0) {
728 so->so_state |= SS_RCVATMARK;
729 break;
730 }
731 } else {
732 offset += len;
733 if (offset == so->so_oobmark)
734 break;
735 }
736 }
737 if (flags & MSG_EOR)
738 break;
739 /*
740 * If the MSG_WAITALL flag is set (for non-atomic socket),
741 * we must not quit until "uio->uio_resid == 0" or an error
742 * termination. If a signal/timeout occurs, return
743 * with a short count but without error.
744 * Keep sockbuf locked against other readers.
745 */
746 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
747 !sosendallatonce(so) && !nextrecord) {
748 if (so->so_error || so->so_state & SS_CANTRCVMORE)
749 break;
750 error = sbwait(&so->so_rcv);
751 if (error) {
752 sbunlock(&so->so_rcv);
753 splx(s);
754 return (0);
755 }
756 if (m = so->so_rcv.sb_mb)
757 nextrecord = m->m_nextpkt;
758 }
759 }
760
761 if (m && pr->pr_flags & PR_ATOMIC) {
762 flags |= MSG_TRUNC;
763 if ((flags & MSG_PEEK) == 0)
764 (void) sbdroprecord(&so->so_rcv);
765 }
766 if ((flags & MSG_PEEK) == 0) {
767 if (m == 0)
768 so->so_rcv.sb_mb = nextrecord;
769 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
770 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
771 (struct mbuf *)(long)flags, (struct mbuf *)0,
772 (struct mbuf *)0);
773 }
774 if (orig_resid == uio->uio_resid && orig_resid &&
775 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
776 sbunlock(&so->so_rcv);
777 splx(s);
778 goto restart;
779 }
780
781 if (flagsp)
782 *flagsp |= flags;
783 release:
784 sbunlock(&so->so_rcv);
785 splx(s);
786 return (error);
787 }
788
789 int
790 soshutdown(so, how)
791 register struct socket *so;
792 register int how;
793 {
794 register struct protosw *pr = so->so_proto;
795
796 how++;
797 if (how & FREAD)
798 sorflush(so);
799 if (how & FWRITE)
800 return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
801 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
802 return (0);
803 }
804
805 void
806 sorflush(so)
807 register struct socket *so;
808 {
809 register struct sockbuf *sb = &so->so_rcv;
810 register struct protosw *pr = so->so_proto;
811 register int s;
812 struct sockbuf asb;
813
814 sb->sb_flags |= SB_NOINTR;
815 (void) sblock(sb, M_WAITOK);
816 s = splimp();
817 socantrcvmore(so);
818 sbunlock(sb);
819 asb = *sb;
820 bzero((caddr_t)sb, sizeof (*sb));
821 splx(s);
822 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
823 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
824 sbrelease(&asb);
825 }
826
827 int
828 sosetopt(so, level, optname, m0)
829 register struct socket *so;
830 int level, optname;
831 struct mbuf *m0;
832 {
833 int error = 0;
834 register struct mbuf *m = m0;
835
836 if (level != SOL_SOCKET) {
837 if (so->so_proto && so->so_proto->pr_ctloutput)
838 return ((*so->so_proto->pr_ctloutput)
839 (PRCO_SETOPT, so, level, optname, &m0));
840 error = ENOPROTOOPT;
841 } else {
842 switch (optname) {
843
844 case SO_LINGER:
845 if (m == NULL || m->m_len != sizeof (struct linger)) {
846 error = EINVAL;
847 goto bad;
848 }
849 so->so_linger = mtod(m, struct linger *)->l_linger;
850 /* fall thru... */
851
852 case SO_DEBUG:
853 case SO_KEEPALIVE:
854 case SO_DONTROUTE:
855 case SO_USELOOPBACK:
856 case SO_BROADCAST:
857 case SO_REUSEADDR:
858 case SO_REUSEPORT:
859 case SO_OOBINLINE:
860 if (m == NULL || m->m_len < sizeof (int)) {
861 error = EINVAL;
862 goto bad;
863 }
864 if (*mtod(m, int *))
865 so->so_options |= optname;
866 else
867 so->so_options &= ~optname;
868 break;
869
870 case SO_SNDBUF:
871 case SO_RCVBUF:
872 case SO_SNDLOWAT:
873 case SO_RCVLOWAT:
874 if (m == NULL || m->m_len < sizeof (int)) {
875 error = EINVAL;
876 goto bad;
877 }
878 switch (optname) {
879
880 case SO_SNDBUF:
881 case SO_RCVBUF:
882 if (sbreserve(optname == SO_SNDBUF ?
883 &so->so_snd : &so->so_rcv,
884 (u_long) *mtod(m, int *)) == 0) {
885 error = ENOBUFS;
886 goto bad;
887 }
888 break;
889
890 case SO_SNDLOWAT:
891 so->so_snd.sb_lowat = *mtod(m, int *);
892 break;
893 case SO_RCVLOWAT:
894 so->so_rcv.sb_lowat = *mtod(m, int *);
895 break;
896 }
897 break;
898
899 case SO_SNDTIMEO:
900 case SO_RCVTIMEO:
901 {
902 struct timeval *tv;
903 short val;
904
905 if (m == NULL || m->m_len < sizeof (*tv)) {
906 error = EINVAL;
907 goto bad;
908 }
909 tv = mtod(m, struct timeval *);
910 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
911 error = EDOM;
912 goto bad;
913 }
914 val = tv->tv_sec * hz + tv->tv_usec / tick;
915
916 switch (optname) {
917
918 case SO_SNDTIMEO:
919 so->so_snd.sb_timeo = val;
920 break;
921 case SO_RCVTIMEO:
922 so->so_rcv.sb_timeo = val;
923 break;
924 }
925 break;
926 }
927
928 default:
929 error = ENOPROTOOPT;
930 break;
931 }
932 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
933 (void) ((*so->so_proto->pr_ctloutput)
934 (PRCO_SETOPT, so, level, optname, &m0));
935 m = NULL; /* freed by protocol */
936 }
937 }
938 bad:
939 if (m)
940 (void) m_free(m);
941 return (error);
942 }
943
944 int
945 sogetopt(so, level, optname, mp)
946 register struct socket *so;
947 int level, optname;
948 struct mbuf **mp;
949 {
950 register struct mbuf *m;
951
952 if (level != SOL_SOCKET) {
953 if (so->so_proto && so->so_proto->pr_ctloutput) {
954 return ((*so->so_proto->pr_ctloutput)
955 (PRCO_GETOPT, so, level, optname, mp));
956 } else
957 return (ENOPROTOOPT);
958 } else {
959 m = m_get(M_WAIT, MT_SOOPTS);
960 m->m_len = sizeof (int);
961
962 switch (optname) {
963
964 case SO_LINGER:
965 m->m_len = sizeof (struct linger);
966 mtod(m, struct linger *)->l_onoff =
967 so->so_options & SO_LINGER;
968 mtod(m, struct linger *)->l_linger = so->so_linger;
969 break;
970
971 case SO_USELOOPBACK:
972 case SO_DONTROUTE:
973 case SO_DEBUG:
974 case SO_KEEPALIVE:
975 case SO_REUSEADDR:
976 case SO_REUSEPORT:
977 case SO_BROADCAST:
978 case SO_OOBINLINE:
979 *mtod(m, int *) = so->so_options & optname;
980 break;
981
982 case SO_TYPE:
983 *mtod(m, int *) = so->so_type;
984 break;
985
986 case SO_ERROR:
987 *mtod(m, int *) = so->so_error;
988 so->so_error = 0;
989 break;
990
991 case SO_SNDBUF:
992 *mtod(m, int *) = so->so_snd.sb_hiwat;
993 break;
994
995 case SO_RCVBUF:
996 *mtod(m, int *) = so->so_rcv.sb_hiwat;
997 break;
998
999 case SO_SNDLOWAT:
1000 *mtod(m, int *) = so->so_snd.sb_lowat;
1001 break;
1002
1003 case SO_RCVLOWAT:
1004 *mtod(m, int *) = so->so_rcv.sb_lowat;
1005 break;
1006
1007 case SO_SNDTIMEO:
1008 case SO_RCVTIMEO:
1009 {
1010 int val = (optname == SO_SNDTIMEO ?
1011 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1012
1013 m->m_len = sizeof(struct timeval);
1014 mtod(m, struct timeval *)->tv_sec = val / hz;
1015 mtod(m, struct timeval *)->tv_usec =
1016 (val % hz) * tick;
1017 break;
1018 }
1019
1020 default:
1021 (void)m_free(m);
1022 return (ENOPROTOOPT);
1023 }
1024 *mp = m;
1025 return (0);
1026 }
1027 }
1028
1029 void
1030 sohasoutofband(so)
1031 register struct socket *so;
1032 {
1033 struct proc *p;
1034
1035 if (so->so_pgid < 0)
1036 gsignal(-so->so_pgid, SIGURG);
1037 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1038 psignal(p, SIGURG);
1039 selwakeup(&so->so_rcv.sb_sel);
1040 }
1041