uipc_socket.c revision 1.37 1 /* $NetBSD: uipc_socket.c,v 1.37 1998/08/02 04:53:12 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
36 */
37
38 #include "opt_compat_sunos.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/file.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/domain.h>
47 #include <sys/kernel.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/resourcevar.h>
53 #include <sys/pool.h>
54
55 struct pool socket_pool;
56
57 void
58 soinit()
59 {
60
61 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
62 "sockpl", 0, NULL, NULL, M_SOCKET);
63 }
64
65 /*
66 * Socket operation routines.
67 * These routines are called by the routines in
68 * sys_socket.c or from a system process, and
69 * implement the semantics of socket operations by
70 * switching out to the protocol specific routines.
71 */
72 /*ARGSUSED*/
73 int
74 socreate(dom, aso, type, proto)
75 int dom;
76 struct socket **aso;
77 register int type;
78 int proto;
79 {
80 struct proc *p = curproc; /* XXX */
81 register struct protosw *prp;
82 register struct socket *so;
83 register int error;
84
85 if (proto)
86 prp = pffindproto(dom, proto, type);
87 else
88 prp = pffindtype(dom, type);
89 if (prp == 0 || prp->pr_usrreq == 0)
90 return (EPROTONOSUPPORT);
91 if (prp->pr_type != type)
92 return (EPROTOTYPE);
93 so = pool_get(&socket_pool, PR_WAITOK);
94 bzero((caddr_t)so, sizeof(*so));
95 TAILQ_INIT(&so->so_q0);
96 TAILQ_INIT(&so->so_q);
97 so->so_type = type;
98 so->so_proto = prp;
99 so->so_send = sosend;
100 so->so_receive = soreceive;
101 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
102 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
103 if (error) {
104 so->so_state |= SS_NOFDREF;
105 sofree(so);
106 return (error);
107 }
108 #ifdef COMPAT_SUNOS
109 {
110 extern struct emul emul_sunos;
111 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM)
112 so->so_options |= SO_BROADCAST;
113 }
114 #endif
115 *aso = so;
116 return (0);
117 }
118
119 int
120 sobind(so, nam)
121 struct socket *so;
122 struct mbuf *nam;
123 {
124 struct proc *p = curproc; /* XXX */
125 int s = splsoftnet();
126 int error;
127
128 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
129 nam, (struct mbuf *)0, p);
130 splx(s);
131 return (error);
132 }
133
134 int
135 solisten(so, backlog)
136 register struct socket *so;
137 int backlog;
138 {
139 int s = splsoftnet(), error;
140
141 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
142 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
143 if (error) {
144 splx(s);
145 return (error);
146 }
147 if (so->so_q.tqh_first == NULL)
148 so->so_options |= SO_ACCEPTCONN;
149 if (backlog < 0)
150 backlog = 0;
151 so->so_qlimit = min(backlog, SOMAXCONN);
152 splx(s);
153 return (0);
154 }
155
156 void
157 sofree(so)
158 register struct socket *so;
159 {
160
161 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
162 return;
163 if (so->so_head) {
164 if (!soqremque(so, 0) && !soqremque(so, 1))
165 panic("sofree dq");
166 so->so_head = 0;
167 }
168 sbrelease(&so->so_snd);
169 sorflush(so);
170 pool_put(&socket_pool, so);
171 }
172
173 /*
174 * Close a socket on last file table reference removal.
175 * Initiate disconnect if connected.
176 * Free socket when disconnect complete.
177 */
178 int
179 soclose(so)
180 register struct socket *so;
181 {
182 int s = splsoftnet(); /* conservative */
183 int error = 0;
184
185 if (so->so_options & SO_ACCEPTCONN) {
186 while (so->so_q0.tqh_first)
187 (void) soabort(so->so_q0.tqh_first);
188 while (so->so_q.tqh_first)
189 (void) soabort(so->so_q.tqh_first);
190 }
191 if (so->so_pcb == 0)
192 goto discard;
193 if (so->so_state & SS_ISCONNECTED) {
194 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
195 error = sodisconnect(so);
196 if (error)
197 goto drop;
198 }
199 if (so->so_options & SO_LINGER) {
200 if ((so->so_state & SS_ISDISCONNECTING) &&
201 (so->so_state & SS_NBIO))
202 goto drop;
203 while (so->so_state & SS_ISCONNECTED) {
204 error = tsleep((caddr_t)&so->so_timeo,
205 PSOCK | PCATCH, netcls,
206 so->so_linger * hz);
207 if (error)
208 break;
209 }
210 }
211 }
212 drop:
213 if (so->so_pcb) {
214 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
215 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
216 (struct proc *)0);
217 if (error == 0)
218 error = error2;
219 }
220 discard:
221 if (so->so_state & SS_NOFDREF)
222 panic("soclose: NOFDREF");
223 so->so_state |= SS_NOFDREF;
224 sofree(so);
225 splx(s);
226 return (error);
227 }
228
229 /*
230 * Must be called at splsoftnet...
231 */
232 int
233 soabort(so)
234 struct socket *so;
235 {
236
237 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
238 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
239 }
240
241 int
242 soaccept(so, nam)
243 register struct socket *so;
244 struct mbuf *nam;
245 {
246 int s = splsoftnet();
247 int error;
248
249 if ((so->so_state & SS_NOFDREF) == 0)
250 panic("soaccept: !NOFDREF");
251 so->so_state &= ~SS_NOFDREF;
252 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, (struct mbuf *)0,
253 nam, (struct mbuf *)0, (struct proc *)0);
254 splx(s);
255 return (error);
256 }
257
258 int
259 soconnect(so, nam)
260 register struct socket *so;
261 struct mbuf *nam;
262 {
263 struct proc *p = curproc; /* XXX */
264 int s;
265 int error;
266
267 if (so->so_options & SO_ACCEPTCONN)
268 return (EOPNOTSUPP);
269 s = splsoftnet();
270 /*
271 * If protocol is connection-based, can only connect once.
272 * Otherwise, if connected, try to disconnect first.
273 * This allows user to disconnect by connecting to, e.g.,
274 * a null address.
275 */
276 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
277 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
278 (error = sodisconnect(so))))
279 error = EISCONN;
280 else
281 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
282 (struct mbuf *)0, nam, (struct mbuf *)0, p);
283 splx(s);
284 return (error);
285 }
286
287 int
288 soconnect2(so1, so2)
289 register struct socket *so1;
290 struct socket *so2;
291 {
292 int s = splsoftnet();
293 int error;
294
295 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
296 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
297 (struct proc *)0);
298 splx(s);
299 return (error);
300 }
301
302 int
303 sodisconnect(so)
304 register struct socket *so;
305 {
306 int s = splsoftnet();
307 int error;
308
309 if ((so->so_state & SS_ISCONNECTED) == 0) {
310 error = ENOTCONN;
311 goto bad;
312 }
313 if (so->so_state & SS_ISDISCONNECTING) {
314 error = EALREADY;
315 goto bad;
316 }
317 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
318 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
319 (struct proc *)0);
320 bad:
321 splx(s);
322 return (error);
323 }
324
325 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
326 /*
327 * Send on a socket.
328 * If send must go all at once and message is larger than
329 * send buffering, then hard error.
330 * Lock against other senders.
331 * If must go all at once and not enough room now, then
332 * inform user that this would block and do nothing.
333 * Otherwise, if nonblocking, send as much as possible.
334 * The data to be sent is described by "uio" if nonzero,
335 * otherwise by the mbuf chain "top" (which must be null
336 * if uio is not). Data provided in mbuf chain must be small
337 * enough to send all at once.
338 *
339 * Returns nonzero on error, timeout or signal; callers
340 * must check for short counts if EINTR/ERESTART are returned.
341 * Data and control buffers are freed on return.
342 */
343 int
344 sosend(so, addr, uio, top, control, flags)
345 register struct socket *so;
346 struct mbuf *addr;
347 struct uio *uio;
348 struct mbuf *top;
349 struct mbuf *control;
350 int flags;
351 {
352 struct proc *p = curproc; /* XXX */
353 struct mbuf **mp;
354 register struct mbuf *m;
355 register long space, len, resid;
356 int clen = 0, error, s, dontroute, mlen;
357 int atomic = sosendallatonce(so) || top;
358
359 if (uio)
360 resid = uio->uio_resid;
361 else
362 resid = top->m_pkthdr.len;
363 /*
364 * In theory resid should be unsigned.
365 * However, space must be signed, as it might be less than 0
366 * if we over-committed, and we must use a signed comparison
367 * of space and resid. On the other hand, a negative resid
368 * causes us to loop sending 0-length segments to the protocol.
369 */
370 if (resid < 0) {
371 error = EINVAL;
372 goto out;
373 }
374 dontroute =
375 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
376 (so->so_proto->pr_flags & PR_ATOMIC);
377 p->p_stats->p_ru.ru_msgsnd++;
378 if (control)
379 clen = control->m_len;
380 #define snderr(errno) { error = errno; splx(s); goto release; }
381
382 restart:
383 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
384 goto out;
385 do {
386 s = splsoftnet();
387 if (so->so_state & SS_CANTSENDMORE)
388 snderr(EPIPE);
389 if (so->so_error)
390 snderr(so->so_error);
391 if ((so->so_state & SS_ISCONNECTED) == 0) {
392 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
393 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
394 !(resid == 0 && clen != 0))
395 snderr(ENOTCONN);
396 } else if (addr == 0)
397 snderr(EDESTADDRREQ);
398 }
399 space = sbspace(&so->so_snd);
400 if (flags & MSG_OOB)
401 space += 1024;
402 if ((atomic && resid > so->so_snd.sb_hiwat) ||
403 clen > so->so_snd.sb_hiwat)
404 snderr(EMSGSIZE);
405 if (space < resid + clen && uio &&
406 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
407 if (so->so_state & SS_NBIO)
408 snderr(EWOULDBLOCK);
409 sbunlock(&so->so_snd);
410 error = sbwait(&so->so_snd);
411 splx(s);
412 if (error)
413 goto out;
414 goto restart;
415 }
416 splx(s);
417 mp = ⊤
418 space -= clen;
419 do {
420 if (uio == NULL) {
421 /*
422 * Data is prepackaged in "top".
423 */
424 resid = 0;
425 if (flags & MSG_EOR)
426 top->m_flags |= M_EOR;
427 } else do {
428 if (top == 0) {
429 MGETHDR(m, M_WAIT, MT_DATA);
430 mlen = MHLEN;
431 m->m_pkthdr.len = 0;
432 m->m_pkthdr.rcvif = (struct ifnet *)0;
433 } else {
434 MGET(m, M_WAIT, MT_DATA);
435 mlen = MLEN;
436 }
437 if (resid >= MINCLSIZE && space >= MCLBYTES) {
438 MCLGET(m, M_WAIT);
439 if ((m->m_flags & M_EXT) == 0)
440 goto nopages;
441 mlen = MCLBYTES;
442 #ifdef MAPPED_MBUFS
443 len = min(MCLBYTES, resid);
444 #else
445 if (atomic && top == 0) {
446 len = min(MCLBYTES - max_hdr, resid);
447 m->m_data += max_hdr;
448 } else
449 len = min(MCLBYTES, resid);
450 #endif
451 space -= len;
452 } else {
453 nopages:
454 len = min(min(mlen, resid), space);
455 space -= len;
456 /*
457 * For datagram protocols, leave room
458 * for protocol headers in first mbuf.
459 */
460 if (atomic && top == 0 && len < mlen)
461 MH_ALIGN(m, len);
462 }
463 error = uiomove(mtod(m, caddr_t), (int)len, uio);
464 resid = uio->uio_resid;
465 m->m_len = len;
466 *mp = m;
467 top->m_pkthdr.len += len;
468 if (error)
469 goto release;
470 mp = &m->m_next;
471 if (resid <= 0) {
472 if (flags & MSG_EOR)
473 top->m_flags |= M_EOR;
474 break;
475 }
476 } while (space > 0 && atomic);
477 if (dontroute)
478 so->so_options |= SO_DONTROUTE;
479 s = splsoftnet(); /* XXX */
480 error = (*so->so_proto->pr_usrreq)(so,
481 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
482 top, addr, control, p);
483 splx(s);
484 if (dontroute)
485 so->so_options &= ~SO_DONTROUTE;
486 clen = 0;
487 control = 0;
488 top = 0;
489 mp = ⊤
490 if (error)
491 goto release;
492 } while (resid && space > 0);
493 } while (resid);
494
495 release:
496 sbunlock(&so->so_snd);
497 out:
498 if (top)
499 m_freem(top);
500 if (control)
501 m_freem(control);
502 return (error);
503 }
504
505 /*
506 * Implement receive operations on a socket.
507 * We depend on the way that records are added to the sockbuf
508 * by sbappend*. In particular, each record (mbufs linked through m_next)
509 * must begin with an address if the protocol so specifies,
510 * followed by an optional mbuf or mbufs containing ancillary data,
511 * and then zero or more mbufs of data.
512 * In order to avoid blocking network interrupts for the entire time here,
513 * we splx() while doing the actual copy to user space.
514 * Although the sockbuf is locked, new data may still be appended,
515 * and thus we must maintain consistency of the sockbuf during that time.
516 *
517 * The caller may receive the data as a single mbuf chain by supplying
518 * an mbuf **mp0 for use in returning the chain. The uio is then used
519 * only for the count in uio_resid.
520 */
521 int
522 soreceive(so, paddr, uio, mp0, controlp, flagsp)
523 register struct socket *so;
524 struct mbuf **paddr;
525 struct uio *uio;
526 struct mbuf **mp0;
527 struct mbuf **controlp;
528 int *flagsp;
529 {
530 register struct mbuf *m, **mp;
531 register int flags, len, error, s, offset;
532 struct protosw *pr = so->so_proto;
533 struct mbuf *nextrecord;
534 int moff, type = 0;
535 int orig_resid = uio->uio_resid;
536
537 mp = mp0;
538 if (paddr)
539 *paddr = 0;
540 if (controlp)
541 *controlp = 0;
542 if (flagsp)
543 flags = *flagsp &~ MSG_EOR;
544 else
545 flags = 0;
546 if (flags & MSG_OOB) {
547 m = m_get(M_WAIT, MT_DATA);
548 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
549 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
550 (struct proc *)0);
551 if (error)
552 goto bad;
553 do {
554 error = uiomove(mtod(m, caddr_t),
555 (int) min(uio->uio_resid, m->m_len), uio);
556 m = m_free(m);
557 } while (uio->uio_resid && error == 0 && m);
558 bad:
559 if (m)
560 m_freem(m);
561 return (error);
562 }
563 if (mp)
564 *mp = (struct mbuf *)0;
565 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
566 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
567 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
568
569 restart:
570 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
571 return (error);
572 s = splsoftnet();
573
574 m = so->so_rcv.sb_mb;
575 /*
576 * If we have less data than requested, block awaiting more
577 * (subject to any timeout) if:
578 * 1. the current count is less than the low water mark,
579 * 2. MSG_WAITALL is set, and it is possible to do the entire
580 * receive operation at once if we block (resid <= hiwat), or
581 * 3. MSG_DONTWAIT is not set.
582 * If MSG_WAITALL is set but resid is larger than the receive buffer,
583 * we have to do the receive in sections, and thus risk returning
584 * a short count if a timeout or signal occurs after we start.
585 */
586 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
587 so->so_rcv.sb_cc < uio->uio_resid) &&
588 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
589 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
590 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
591 #ifdef DIAGNOSTIC
592 if (m == 0 && so->so_rcv.sb_cc)
593 panic("receive 1");
594 #endif
595 if (so->so_error) {
596 if (m)
597 goto dontblock;
598 error = so->so_error;
599 if ((flags & MSG_PEEK) == 0)
600 so->so_error = 0;
601 goto release;
602 }
603 if (so->so_state & SS_CANTRCVMORE) {
604 if (m)
605 goto dontblock;
606 else
607 goto release;
608 }
609 for (; m; m = m->m_next)
610 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
611 m = so->so_rcv.sb_mb;
612 goto dontblock;
613 }
614 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
615 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
616 error = ENOTCONN;
617 goto release;
618 }
619 if (uio->uio_resid == 0)
620 goto release;
621 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
622 error = EWOULDBLOCK;
623 goto release;
624 }
625 sbunlock(&so->so_rcv);
626 error = sbwait(&so->so_rcv);
627 splx(s);
628 if (error)
629 return (error);
630 goto restart;
631 }
632 dontblock:
633 #ifdef notyet /* XXXX */
634 if (uio->uio_procp)
635 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
636 #endif
637 nextrecord = m->m_nextpkt;
638 if (pr->pr_flags & PR_ADDR) {
639 #ifdef DIAGNOSTIC
640 if (m->m_type != MT_SONAME)
641 panic("receive 1a");
642 #endif
643 orig_resid = 0;
644 if (flags & MSG_PEEK) {
645 if (paddr)
646 *paddr = m_copy(m, 0, m->m_len);
647 m = m->m_next;
648 } else {
649 sbfree(&so->so_rcv, m);
650 if (paddr) {
651 *paddr = m;
652 so->so_rcv.sb_mb = m->m_next;
653 m->m_next = 0;
654 m = so->so_rcv.sb_mb;
655 } else {
656 MFREE(m, so->so_rcv.sb_mb);
657 m = so->so_rcv.sb_mb;
658 }
659 }
660 }
661 while (m && m->m_type == MT_CONTROL && error == 0) {
662 if (flags & MSG_PEEK) {
663 if (controlp)
664 *controlp = m_copy(m, 0, m->m_len);
665 m = m->m_next;
666 } else {
667 sbfree(&so->so_rcv, m);
668 if (controlp) {
669 if (pr->pr_domain->dom_externalize &&
670 mtod(m, struct cmsghdr *)->cmsg_type ==
671 SCM_RIGHTS)
672 error = (*pr->pr_domain->dom_externalize)(m);
673 *controlp = m;
674 so->so_rcv.sb_mb = m->m_next;
675 m->m_next = 0;
676 m = so->so_rcv.sb_mb;
677 } else {
678 MFREE(m, so->so_rcv.sb_mb);
679 m = so->so_rcv.sb_mb;
680 }
681 }
682 if (controlp) {
683 orig_resid = 0;
684 controlp = &(*controlp)->m_next;
685 }
686 }
687 if (m) {
688 if ((flags & MSG_PEEK) == 0)
689 m->m_nextpkt = nextrecord;
690 type = m->m_type;
691 if (type == MT_OOBDATA)
692 flags |= MSG_OOB;
693 }
694 moff = 0;
695 offset = 0;
696 while (m && uio->uio_resid > 0 && error == 0) {
697 if (m->m_type == MT_OOBDATA) {
698 if (type != MT_OOBDATA)
699 break;
700 } else if (type == MT_OOBDATA)
701 break;
702 #ifdef DIAGNOSTIC
703 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
704 panic("receive 3");
705 #endif
706 so->so_state &= ~SS_RCVATMARK;
707 len = uio->uio_resid;
708 if (so->so_oobmark && len > so->so_oobmark - offset)
709 len = so->so_oobmark - offset;
710 if (len > m->m_len - moff)
711 len = m->m_len - moff;
712 /*
713 * If mp is set, just pass back the mbufs.
714 * Otherwise copy them out via the uio, then free.
715 * Sockbuf must be consistent here (points to current mbuf,
716 * it points to next record) when we drop priority;
717 * we must note any additions to the sockbuf when we
718 * block interrupts again.
719 */
720 if (mp == 0) {
721 splx(s);
722 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
723 s = splsoftnet();
724 } else
725 uio->uio_resid -= len;
726 if (len == m->m_len - moff) {
727 if (m->m_flags & M_EOR)
728 flags |= MSG_EOR;
729 if (flags & MSG_PEEK) {
730 m = m->m_next;
731 moff = 0;
732 } else {
733 nextrecord = m->m_nextpkt;
734 sbfree(&so->so_rcv, m);
735 if (mp) {
736 *mp = m;
737 mp = &m->m_next;
738 so->so_rcv.sb_mb = m = m->m_next;
739 *mp = (struct mbuf *)0;
740 } else {
741 MFREE(m, so->so_rcv.sb_mb);
742 m = so->so_rcv.sb_mb;
743 }
744 if (m)
745 m->m_nextpkt = nextrecord;
746 }
747 } else {
748 if (flags & MSG_PEEK)
749 moff += len;
750 else {
751 if (mp)
752 *mp = m_copym(m, 0, len, M_WAIT);
753 m->m_data += len;
754 m->m_len -= len;
755 so->so_rcv.sb_cc -= len;
756 }
757 }
758 if (so->so_oobmark) {
759 if ((flags & MSG_PEEK) == 0) {
760 so->so_oobmark -= len;
761 if (so->so_oobmark == 0) {
762 so->so_state |= SS_RCVATMARK;
763 break;
764 }
765 } else {
766 offset += len;
767 if (offset == so->so_oobmark)
768 break;
769 }
770 }
771 if (flags & MSG_EOR)
772 break;
773 /*
774 * If the MSG_WAITALL flag is set (for non-atomic socket),
775 * we must not quit until "uio->uio_resid == 0" or an error
776 * termination. If a signal/timeout occurs, return
777 * with a short count but without error.
778 * Keep sockbuf locked against other readers.
779 */
780 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
781 !sosendallatonce(so) && !nextrecord) {
782 if (so->so_error || so->so_state & SS_CANTRCVMORE)
783 break;
784 error = sbwait(&so->so_rcv);
785 if (error) {
786 sbunlock(&so->so_rcv);
787 splx(s);
788 return (0);
789 }
790 if ((m = so->so_rcv.sb_mb) != NULL)
791 nextrecord = m->m_nextpkt;
792 }
793 }
794
795 if (m && pr->pr_flags & PR_ATOMIC) {
796 flags |= MSG_TRUNC;
797 if ((flags & MSG_PEEK) == 0)
798 (void) sbdroprecord(&so->so_rcv);
799 }
800 if ((flags & MSG_PEEK) == 0) {
801 if (m == 0)
802 so->so_rcv.sb_mb = nextrecord;
803 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
804 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
805 (struct mbuf *)(long)flags, (struct mbuf *)0,
806 (struct proc *)0);
807 }
808 if (orig_resid == uio->uio_resid && orig_resid &&
809 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
810 sbunlock(&so->so_rcv);
811 splx(s);
812 goto restart;
813 }
814
815 if (flagsp)
816 *flagsp |= flags;
817 release:
818 sbunlock(&so->so_rcv);
819 splx(s);
820 return (error);
821 }
822
823 int
824 soshutdown(so, how)
825 struct socket *so;
826 int how;
827 {
828 struct protosw *pr = so->so_proto;
829
830 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
831 return (EINVAL);
832
833 if (how == SHUT_RD || how == SHUT_RDWR)
834 sorflush(so);
835 if (how == SHUT_WR || how == SHUT_RDWR)
836 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
837 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
838 return (0);
839 }
840
841 void
842 sorflush(so)
843 register struct socket *so;
844 {
845 register struct sockbuf *sb = &so->so_rcv;
846 register struct protosw *pr = so->so_proto;
847 register int s;
848 struct sockbuf asb;
849
850 sb->sb_flags |= SB_NOINTR;
851 (void) sblock(sb, M_WAITOK);
852 s = splimp();
853 socantrcvmore(so);
854 sbunlock(sb);
855 asb = *sb;
856 bzero((caddr_t)sb, sizeof(*sb));
857 splx(s);
858 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
859 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
860 sbrelease(&asb);
861 }
862
863 int
864 sosetopt(so, level, optname, m0)
865 register struct socket *so;
866 int level, optname;
867 struct mbuf *m0;
868 {
869 int error = 0;
870 register struct mbuf *m = m0;
871
872 if (level != SOL_SOCKET) {
873 if (so->so_proto && so->so_proto->pr_ctloutput)
874 return ((*so->so_proto->pr_ctloutput)
875 (PRCO_SETOPT, so, level, optname, &m0));
876 error = ENOPROTOOPT;
877 } else {
878 switch (optname) {
879
880 case SO_LINGER:
881 if (m == NULL || m->m_len != sizeof(struct linger)) {
882 error = EINVAL;
883 goto bad;
884 }
885 so->so_linger = mtod(m, struct linger *)->l_linger;
886 /* fall thru... */
887
888 case SO_DEBUG:
889 case SO_KEEPALIVE:
890 case SO_DONTROUTE:
891 case SO_USELOOPBACK:
892 case SO_BROADCAST:
893 case SO_REUSEADDR:
894 case SO_REUSEPORT:
895 case SO_OOBINLINE:
896 case SO_TIMESTAMP:
897 if (m == NULL || m->m_len < sizeof(int)) {
898 error = EINVAL;
899 goto bad;
900 }
901 if (*mtod(m, int *))
902 so->so_options |= optname;
903 else
904 so->so_options &= ~optname;
905 break;
906
907 case SO_SNDBUF:
908 case SO_RCVBUF:
909 case SO_SNDLOWAT:
910 case SO_RCVLOWAT:
911 {
912 int optval;
913
914 if (m == NULL || m->m_len < sizeof(int)) {
915 error = EINVAL;
916 goto bad;
917 }
918
919 /*
920 * Values < 1 make no sense for any of these
921 * options, so disallow them.
922 */
923 optval = *mtod(m, int *);
924 if (optval < 1) {
925 error = EINVAL;
926 goto bad;
927 }
928
929 switch (optname) {
930
931 case SO_SNDBUF:
932 case SO_RCVBUF:
933 if (sbreserve(optname == SO_SNDBUF ?
934 &so->so_snd : &so->so_rcv,
935 (u_long) optval) == 0) {
936 error = ENOBUFS;
937 goto bad;
938 }
939 break;
940
941 /*
942 * Make sure the low-water is never greater than
943 * the high-water.
944 */
945 case SO_SNDLOWAT:
946 so->so_snd.sb_lowat =
947 (optval > so->so_snd.sb_hiwat) ?
948 so->so_snd.sb_hiwat : optval;
949 break;
950 case SO_RCVLOWAT:
951 so->so_rcv.sb_lowat =
952 (optval > so->so_rcv.sb_hiwat) ?
953 so->so_rcv.sb_hiwat : optval;
954 break;
955 }
956 break;
957 }
958
959 case SO_SNDTIMEO:
960 case SO_RCVTIMEO:
961 {
962 struct timeval *tv;
963 short val;
964
965 if (m == NULL || m->m_len < sizeof(*tv)) {
966 error = EINVAL;
967 goto bad;
968 }
969 tv = mtod(m, struct timeval *);
970 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
971 error = EDOM;
972 goto bad;
973 }
974 val = tv->tv_sec * hz + tv->tv_usec / tick;
975
976 switch (optname) {
977
978 case SO_SNDTIMEO:
979 so->so_snd.sb_timeo = val;
980 break;
981 case SO_RCVTIMEO:
982 so->so_rcv.sb_timeo = val;
983 break;
984 }
985 break;
986 }
987
988 default:
989 error = ENOPROTOOPT;
990 break;
991 }
992 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
993 (void) ((*so->so_proto->pr_ctloutput)
994 (PRCO_SETOPT, so, level, optname, &m0));
995 m = NULL; /* freed by protocol */
996 }
997 }
998 bad:
999 if (m)
1000 (void) m_free(m);
1001 return (error);
1002 }
1003
1004 int
1005 sogetopt(so, level, optname, mp)
1006 register struct socket *so;
1007 int level, optname;
1008 struct mbuf **mp;
1009 {
1010 register struct mbuf *m;
1011
1012 if (level != SOL_SOCKET) {
1013 if (so->so_proto && so->so_proto->pr_ctloutput) {
1014 return ((*so->so_proto->pr_ctloutput)
1015 (PRCO_GETOPT, so, level, optname, mp));
1016 } else
1017 return (ENOPROTOOPT);
1018 } else {
1019 m = m_get(M_WAIT, MT_SOOPTS);
1020 m->m_len = sizeof(int);
1021
1022 switch (optname) {
1023
1024 case SO_LINGER:
1025 m->m_len = sizeof(struct linger);
1026 mtod(m, struct linger *)->l_onoff =
1027 so->so_options & SO_LINGER;
1028 mtod(m, struct linger *)->l_linger = so->so_linger;
1029 break;
1030
1031 case SO_USELOOPBACK:
1032 case SO_DONTROUTE:
1033 case SO_DEBUG:
1034 case SO_KEEPALIVE:
1035 case SO_REUSEADDR:
1036 case SO_REUSEPORT:
1037 case SO_BROADCAST:
1038 case SO_OOBINLINE:
1039 case SO_TIMESTAMP:
1040 *mtod(m, int *) = so->so_options & optname;
1041 break;
1042
1043 case SO_TYPE:
1044 *mtod(m, int *) = so->so_type;
1045 break;
1046
1047 case SO_ERROR:
1048 *mtod(m, int *) = so->so_error;
1049 so->so_error = 0;
1050 break;
1051
1052 case SO_SNDBUF:
1053 *mtod(m, int *) = so->so_snd.sb_hiwat;
1054 break;
1055
1056 case SO_RCVBUF:
1057 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1058 break;
1059
1060 case SO_SNDLOWAT:
1061 *mtod(m, int *) = so->so_snd.sb_lowat;
1062 break;
1063
1064 case SO_RCVLOWAT:
1065 *mtod(m, int *) = so->so_rcv.sb_lowat;
1066 break;
1067
1068 case SO_SNDTIMEO:
1069 case SO_RCVTIMEO:
1070 {
1071 int val = (optname == SO_SNDTIMEO ?
1072 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1073
1074 m->m_len = sizeof(struct timeval);
1075 mtod(m, struct timeval *)->tv_sec = val / hz;
1076 mtod(m, struct timeval *)->tv_usec =
1077 (val % hz) * tick;
1078 break;
1079 }
1080
1081 default:
1082 (void)m_free(m);
1083 return (ENOPROTOOPT);
1084 }
1085 *mp = m;
1086 return (0);
1087 }
1088 }
1089
1090 void
1091 sohasoutofband(so)
1092 register struct socket *so;
1093 {
1094 struct proc *p;
1095
1096 if (so->so_pgid < 0)
1097 gsignal(-so->so_pgid, SIGURG);
1098 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1099 psignal(p, SIGURG);
1100 selwakeup(&so->so_rcv.sb_sel);
1101 }
1102