uipc_socket.c revision 1.54.2.10 1 /* $NetBSD: uipc_socket.c,v 1.54.2.10 2002/04/17 00:06:19 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.54.2.10 2002/04/17 00:06:19 nathanw Exp $");
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/lwp.h>
44 #include <sys/proc.h>
45 #include <sys/file.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/domain.h>
49 #include <sys/kernel.h>
50 #include <sys/protosw.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/signalvar.h>
54 #include <sys/resourcevar.h>
55 #include <sys/pool.h>
56
57 struct pool socket_pool;
58
59 extern int somaxconn; /* patchable (XXX sysctl) */
60 int somaxconn = SOMAXCONN;
61
62 void
63 soinit(void)
64 {
65
66 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
67 "sockpl", NULL);
68 }
69
70 /*
71 * Socket operation routines.
72 * These routines are called by the routines in
73 * sys_socket.c or from a system process, and
74 * implement the semantics of socket operations by
75 * switching out to the protocol specific routines.
76 */
77 /*ARGSUSED*/
78 int
79 socreate(int dom, struct socket **aso, int type, int proto)
80 {
81 struct proc *p;
82 struct protosw *prp;
83 struct socket *so;
84 int error, s;
85
86 p = curproc->l_proc; /* XXX */
87 if (proto)
88 prp = pffindproto(dom, proto, type);
89 else
90 prp = pffindtype(dom, type);
91 if (prp == 0 || prp->pr_usrreq == 0)
92 return (EPROTONOSUPPORT);
93 if (prp->pr_type != type)
94 return (EPROTOTYPE);
95 s = splsoftnet();
96 so = pool_get(&socket_pool, PR_WAITOK);
97 memset((caddr_t)so, 0, sizeof(*so));
98 TAILQ_INIT(&so->so_q0);
99 TAILQ_INIT(&so->so_q);
100 so->so_type = type;
101 so->so_proto = prp;
102 so->so_send = sosend;
103 so->so_receive = soreceive;
104 if (p != 0)
105 so->so_uid = p->p_ucred->cr_uid;
106 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
107 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
108 if (error) {
109 so->so_state |= SS_NOFDREF;
110 sofree(so);
111 splx(s);
112 return (error);
113 }
114 splx(s);
115 *aso = so;
116 return (0);
117 }
118
119 int
120 sobind(struct socket *so, struct mbuf *nam, struct proc *p)
121 {
122 int s, error;
123
124 s = splsoftnet();
125 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
126 nam, (struct mbuf *)0, p);
127 splx(s);
128 return (error);
129 }
130
131 int
132 solisten(struct socket *so, int backlog)
133 {
134 int s, error;
135
136 s = splsoftnet();
137 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
138 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
139 if (error) {
140 splx(s);
141 return (error);
142 }
143 if (TAILQ_EMPTY(&so->so_q))
144 so->so_options |= SO_ACCEPTCONN;
145 if (backlog < 0)
146 backlog = 0;
147 so->so_qlimit = min(backlog, somaxconn);
148 splx(s);
149 return (0);
150 }
151
152 void
153 sofree(struct socket *so)
154 {
155
156 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
157 return;
158 if (so->so_head) {
159 /*
160 * We must not decommission a socket that's on the accept(2)
161 * queue. If we do, then accept(2) may hang after select(2)
162 * indicated that the listening socket was ready.
163 */
164 if (!soqremque(so, 0))
165 return;
166 }
167 sbrelease(&so->so_snd);
168 sorflush(so);
169 pool_put(&socket_pool, so);
170 }
171
172 /*
173 * Close a socket on last file table reference removal.
174 * Initiate disconnect if connected.
175 * Free socket when disconnect complete.
176 */
177 int
178 soclose(struct socket *so)
179 {
180 struct socket *so2;
181 int s, error;
182
183 error = 0;
184 s = splsoftnet(); /* conservative */
185 if (so->so_options & SO_ACCEPTCONN) {
186 while ((so2 = TAILQ_FIRST(&so->so_q0)) != 0) {
187 (void) soqremque(so2, 0);
188 (void) soabort(so2);
189 }
190 while ((so2 = TAILQ_FIRST(&so->so_q)) != 0) {
191 (void) soqremque(so2, 1);
192 (void) soabort(so2);
193 }
194 }
195 if (so->so_pcb == 0)
196 goto discard;
197 if (so->so_state & SS_ISCONNECTED) {
198 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
199 error = sodisconnect(so);
200 if (error)
201 goto drop;
202 }
203 if (so->so_options & SO_LINGER) {
204 if ((so->so_state & SS_ISDISCONNECTING) &&
205 (so->so_state & SS_NBIO))
206 goto drop;
207 while (so->so_state & SS_ISCONNECTED) {
208 error = tsleep((caddr_t)&so->so_timeo,
209 PSOCK | PCATCH, netcls,
210 so->so_linger * hz);
211 if (error)
212 break;
213 }
214 }
215 }
216 drop:
217 if (so->so_pcb) {
218 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
219 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
220 (struct proc *)0);
221 if (error == 0)
222 error = error2;
223 }
224 discard:
225 if (so->so_state & SS_NOFDREF)
226 panic("soclose: NOFDREF");
227 so->so_state |= SS_NOFDREF;
228 sofree(so);
229 splx(s);
230 return (error);
231 }
232
233 /*
234 * Must be called at splsoftnet...
235 */
236 int
237 soabort(struct socket *so)
238 {
239
240 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
241 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
242 }
243
244 int
245 soaccept(struct socket *so, struct mbuf *nam)
246 {
247 int s, error;
248
249 error = 0;
250 s = splsoftnet();
251 if ((so->so_state & SS_NOFDREF) == 0)
252 panic("soaccept: !NOFDREF");
253 so->so_state &= ~SS_NOFDREF;
254 if ((so->so_state & SS_ISDISCONNECTED) == 0 ||
255 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0)
256 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
257 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0);
258 else
259 error = ECONNABORTED;
260
261 splx(s);
262 return (error);
263 }
264
265 int
266 soconnect(struct socket *so, struct mbuf *nam)
267 {
268 struct proc *p;
269 int s, error;
270
271 p = curproc->l_proc; /* XXX */
272 if (so->so_options & SO_ACCEPTCONN)
273 return (EOPNOTSUPP);
274 s = splsoftnet();
275 /*
276 * If protocol is connection-based, can only connect once.
277 * Otherwise, if connected, try to disconnect first.
278 * This allows user to disconnect by connecting to, e.g.,
279 * a null address.
280 */
281 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
282 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
283 (error = sodisconnect(so))))
284 error = EISCONN;
285 else
286 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
287 (struct mbuf *)0, nam, (struct mbuf *)0, p);
288 splx(s);
289 return (error);
290 }
291
292 int
293 soconnect2(struct socket *so1, struct socket *so2)
294 {
295 int s, error;
296
297 s = splsoftnet();
298 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
299 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
300 (struct proc *)0);
301 splx(s);
302 return (error);
303 }
304
305 int
306 sodisconnect(struct socket *so)
307 {
308 int s, error;
309
310 s = splsoftnet();
311 if ((so->so_state & SS_ISCONNECTED) == 0) {
312 error = ENOTCONN;
313 goto bad;
314 }
315 if (so->so_state & SS_ISDISCONNECTING) {
316 error = EALREADY;
317 goto bad;
318 }
319 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
320 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
321 (struct proc *)0);
322 bad:
323 splx(s);
324 return (error);
325 }
326
327 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
328 /*
329 * Send on a socket.
330 * If send must go all at once and message is larger than
331 * send buffering, then hard error.
332 * Lock against other senders.
333 * If must go all at once and not enough room now, then
334 * inform user that this would block and do nothing.
335 * Otherwise, if nonblocking, send as much as possible.
336 * The data to be sent is described by "uio" if nonzero,
337 * otherwise by the mbuf chain "top" (which must be null
338 * if uio is not). Data provided in mbuf chain must be small
339 * enough to send all at once.
340 *
341 * Returns nonzero on error, timeout or signal; callers
342 * must check for short counts if EINTR/ERESTART are returned.
343 * Data and control buffers are freed on return.
344 */
345 int
346 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top,
347 struct mbuf *control, int flags)
348 {
349 struct proc *p;
350 struct mbuf **mp, *m;
351 long space, len, resid, clen, mlen;
352 int error, s, dontroute, atomic;
353
354 p = curproc->l_proc; /* XXX */
355 clen = 0;
356 atomic = sosendallatonce(so) || top;
357 if (uio)
358 resid = uio->uio_resid;
359 else
360 resid = top->m_pkthdr.len;
361 /*
362 * In theory resid should be unsigned.
363 * However, space must be signed, as it might be less than 0
364 * if we over-committed, and we must use a signed comparison
365 * of space and resid. On the other hand, a negative resid
366 * causes us to loop sending 0-length segments to the protocol.
367 */
368 if (resid < 0) {
369 error = EINVAL;
370 goto out;
371 }
372 dontroute =
373 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
374 (so->so_proto->pr_flags & PR_ATOMIC);
375 p->p_stats->p_ru.ru_msgsnd++;
376 if (control)
377 clen = control->m_len;
378 #define snderr(errno) { error = errno; splx(s); goto release; }
379
380 restart:
381 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
382 goto out;
383 do {
384 s = splsoftnet();
385 if (so->so_state & SS_CANTSENDMORE)
386 snderr(EPIPE);
387 if (so->so_error) {
388 error = so->so_error;
389 so->so_error = 0;
390 splx(s);
391 goto release;
392 }
393 if ((so->so_state & SS_ISCONNECTED) == 0) {
394 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
395 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
396 !(resid == 0 && clen != 0))
397 snderr(ENOTCONN);
398 } else if (addr == 0)
399 snderr(EDESTADDRREQ);
400 }
401 space = sbspace(&so->so_snd);
402 if (flags & MSG_OOB)
403 space += 1024;
404 if ((atomic && resid > so->so_snd.sb_hiwat) ||
405 clen > so->so_snd.sb_hiwat)
406 snderr(EMSGSIZE);
407 if (space < resid + clen && uio &&
408 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
409 if (so->so_state & SS_NBIO)
410 snderr(EWOULDBLOCK);
411 sbunlock(&so->so_snd);
412 error = sbwait(&so->so_snd);
413 splx(s);
414 if (error)
415 goto out;
416 goto restart;
417 }
418 splx(s);
419 mp = ⊤
420 space -= clen;
421 do {
422 if (uio == NULL) {
423 /*
424 * Data is prepackaged in "top".
425 */
426 resid = 0;
427 if (flags & MSG_EOR)
428 top->m_flags |= M_EOR;
429 } else do {
430 if (top == 0) {
431 MGETHDR(m, M_WAIT, MT_DATA);
432 mlen = MHLEN;
433 m->m_pkthdr.len = 0;
434 m->m_pkthdr.rcvif = (struct ifnet *)0;
435 } else {
436 MGET(m, M_WAIT, MT_DATA);
437 mlen = MLEN;
438 }
439 if (resid >= MINCLSIZE && space >= MCLBYTES) {
440 MCLGET(m, M_WAIT);
441 if ((m->m_flags & M_EXT) == 0)
442 goto nopages;
443 mlen = MCLBYTES;
444 #ifdef MAPPED_MBUFS
445 len = lmin(MCLBYTES, resid);
446 #else
447 if (atomic && top == 0) {
448 len = lmin(MCLBYTES - max_hdr,
449 resid);
450 m->m_data += max_hdr;
451 } else
452 len = lmin(MCLBYTES, resid);
453 #endif
454 space -= len;
455 } else {
456 nopages:
457 len = lmin(lmin(mlen, resid), space);
458 space -= len;
459 /*
460 * For datagram protocols, leave room
461 * for protocol headers in first mbuf.
462 */
463 if (atomic && top == 0 && len < mlen)
464 MH_ALIGN(m, len);
465 }
466 error = uiomove(mtod(m, caddr_t), (int)len,
467 uio);
468 resid = uio->uio_resid;
469 m->m_len = len;
470 *mp = m;
471 top->m_pkthdr.len += len;
472 if (error)
473 goto release;
474 mp = &m->m_next;
475 if (resid <= 0) {
476 if (flags & MSG_EOR)
477 top->m_flags |= M_EOR;
478 break;
479 }
480 } while (space > 0 && atomic);
481
482 s = splsoftnet();
483
484 if (so->so_state & SS_CANTSENDMORE)
485 snderr(EPIPE);
486
487 if (dontroute)
488 so->so_options |= SO_DONTROUTE;
489 if (resid > 0)
490 so->so_state |= SS_MORETOCOME;
491 error = (*so->so_proto->pr_usrreq)(so,
492 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
493 top, addr, control, p);
494 if (dontroute)
495 so->so_options &= ~SO_DONTROUTE;
496 if (resid > 0)
497 so->so_state &= ~SS_MORETOCOME;
498 splx(s);
499
500 clen = 0;
501 control = 0;
502 top = 0;
503 mp = ⊤
504 if (error)
505 goto release;
506 } while (resid && space > 0);
507 } while (resid);
508
509 release:
510 sbunlock(&so->so_snd);
511 out:
512 if (top)
513 m_freem(top);
514 if (control)
515 m_freem(control);
516 return (error);
517 }
518
519 /*
520 * Implement receive operations on a socket.
521 * We depend on the way that records are added to the sockbuf
522 * by sbappend*. In particular, each record (mbufs linked through m_next)
523 * must begin with an address if the protocol so specifies,
524 * followed by an optional mbuf or mbufs containing ancillary data,
525 * and then zero or more mbufs of data.
526 * In order to avoid blocking network interrupts for the entire time here,
527 * we splx() while doing the actual copy to user space.
528 * Although the sockbuf is locked, new data may still be appended,
529 * and thus we must maintain consistency of the sockbuf during that time.
530 *
531 * The caller may receive the data as a single mbuf chain by supplying
532 * an mbuf **mp0 for use in returning the chain. The uio is then used
533 * only for the count in uio_resid.
534 */
535 int
536 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio,
537 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
538 {
539 struct mbuf *m, **mp;
540 int flags, len, error, s, offset, moff, type, orig_resid;
541 struct protosw *pr;
542 struct mbuf *nextrecord;
543
544 pr = so->so_proto;
545 mp = mp0;
546 type = 0;
547 orig_resid = uio->uio_resid;
548 if (paddr)
549 *paddr = 0;
550 if (controlp)
551 *controlp = 0;
552 if (flagsp)
553 flags = *flagsp &~ MSG_EOR;
554 else
555 flags = 0;
556 if (flags & MSG_OOB) {
557 m = m_get(M_WAIT, MT_DATA);
558 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
559 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
560 (struct proc *)0);
561 if (error)
562 goto bad;
563 do {
564 error = uiomove(mtod(m, caddr_t),
565 (int) min(uio->uio_resid, m->m_len), uio);
566 m = m_free(m);
567 } while (uio->uio_resid && error == 0 && m);
568 bad:
569 if (m)
570 m_freem(m);
571 return (error);
572 }
573 if (mp)
574 *mp = (struct mbuf *)0;
575 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
576 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
577 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
578
579 restart:
580 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
581 return (error);
582 s = splsoftnet();
583
584 m = so->so_rcv.sb_mb;
585 /*
586 * If we have less data than requested, block awaiting more
587 * (subject to any timeout) if:
588 * 1. the current count is less than the low water mark,
589 * 2. MSG_WAITALL is set, and it is possible to do the entire
590 * receive operation at once if we block (resid <= hiwat), or
591 * 3. MSG_DONTWAIT is not set.
592 * If MSG_WAITALL is set but resid is larger than the receive buffer,
593 * we have to do the receive in sections, and thus risk returning
594 * a short count if a timeout or signal occurs after we start.
595 */
596 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
597 so->so_rcv.sb_cc < uio->uio_resid) &&
598 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
599 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
600 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
601 #ifdef DIAGNOSTIC
602 if (m == 0 && so->so_rcv.sb_cc)
603 panic("receive 1");
604 #endif
605 if (so->so_error) {
606 if (m)
607 goto dontblock;
608 error = so->so_error;
609 if ((flags & MSG_PEEK) == 0)
610 so->so_error = 0;
611 goto release;
612 }
613 if (so->so_state & SS_CANTRCVMORE) {
614 if (m)
615 goto dontblock;
616 else
617 goto release;
618 }
619 for (; m; m = m->m_next)
620 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
621 m = so->so_rcv.sb_mb;
622 goto dontblock;
623 }
624 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
625 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
626 error = ENOTCONN;
627 goto release;
628 }
629 if (uio->uio_resid == 0)
630 goto release;
631 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
632 error = EWOULDBLOCK;
633 goto release;
634 }
635 sbunlock(&so->so_rcv);
636 error = sbwait(&so->so_rcv);
637 splx(s);
638 if (error)
639 return (error);
640 goto restart;
641 }
642 dontblock:
643 #ifdef notyet /* XXXX */
644 if (uio->uio_procp)
645 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
646 #endif
647 nextrecord = m->m_nextpkt;
648 if (pr->pr_flags & PR_ADDR) {
649 #ifdef DIAGNOSTIC
650 if (m->m_type != MT_SONAME)
651 panic("receive 1a");
652 #endif
653 orig_resid = 0;
654 if (flags & MSG_PEEK) {
655 if (paddr)
656 *paddr = m_copy(m, 0, m->m_len);
657 m = m->m_next;
658 } else {
659 sbfree(&so->so_rcv, m);
660 if (paddr) {
661 *paddr = m;
662 so->so_rcv.sb_mb = m->m_next;
663 m->m_next = 0;
664 m = so->so_rcv.sb_mb;
665 } else {
666 MFREE(m, so->so_rcv.sb_mb);
667 m = so->so_rcv.sb_mb;
668 }
669 }
670 }
671 while (m && m->m_type == MT_CONTROL && error == 0) {
672 if (flags & MSG_PEEK) {
673 if (controlp)
674 *controlp = m_copy(m, 0, m->m_len);
675 m = m->m_next;
676 } else {
677 sbfree(&so->so_rcv, m);
678 if (controlp) {
679 if (pr->pr_domain->dom_externalize &&
680 mtod(m, struct cmsghdr *)->cmsg_type ==
681 SCM_RIGHTS)
682 error = (*pr->pr_domain->dom_externalize)(m);
683 *controlp = m;
684 so->so_rcv.sb_mb = m->m_next;
685 m->m_next = 0;
686 m = so->so_rcv.sb_mb;
687 } else {
688 MFREE(m, so->so_rcv.sb_mb);
689 m = so->so_rcv.sb_mb;
690 }
691 }
692 if (controlp) {
693 orig_resid = 0;
694 controlp = &(*controlp)->m_next;
695 }
696 }
697 if (m) {
698 if ((flags & MSG_PEEK) == 0)
699 m->m_nextpkt = nextrecord;
700 type = m->m_type;
701 if (type == MT_OOBDATA)
702 flags |= MSG_OOB;
703 }
704 moff = 0;
705 offset = 0;
706 while (m && uio->uio_resid > 0 && error == 0) {
707 if (m->m_type == MT_OOBDATA) {
708 if (type != MT_OOBDATA)
709 break;
710 } else if (type == MT_OOBDATA)
711 break;
712 #ifdef DIAGNOSTIC
713 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
714 panic("receive 3");
715 #endif
716 so->so_state &= ~SS_RCVATMARK;
717 len = uio->uio_resid;
718 if (so->so_oobmark && len > so->so_oobmark - offset)
719 len = so->so_oobmark - offset;
720 if (len > m->m_len - moff)
721 len = m->m_len - moff;
722 /*
723 * If mp is set, just pass back the mbufs.
724 * Otherwise copy them out via the uio, then free.
725 * Sockbuf must be consistent here (points to current mbuf,
726 * it points to next record) when we drop priority;
727 * we must note any additions to the sockbuf when we
728 * block interrupts again.
729 */
730 if (mp == 0) {
731 splx(s);
732 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
733 s = splsoftnet();
734 if (error)
735 goto release;
736 } else
737 uio->uio_resid -= len;
738 if (len == m->m_len - moff) {
739 if (m->m_flags & M_EOR)
740 flags |= MSG_EOR;
741 if (flags & MSG_PEEK) {
742 m = m->m_next;
743 moff = 0;
744 } else {
745 nextrecord = m->m_nextpkt;
746 sbfree(&so->so_rcv, m);
747 if (mp) {
748 *mp = m;
749 mp = &m->m_next;
750 so->so_rcv.sb_mb = m = m->m_next;
751 *mp = (struct mbuf *)0;
752 } else {
753 MFREE(m, so->so_rcv.sb_mb);
754 m = so->so_rcv.sb_mb;
755 }
756 if (m)
757 m->m_nextpkt = nextrecord;
758 }
759 } else {
760 if (flags & MSG_PEEK)
761 moff += len;
762 else {
763 if (mp)
764 *mp = m_copym(m, 0, len, M_WAIT);
765 m->m_data += len;
766 m->m_len -= len;
767 so->so_rcv.sb_cc -= len;
768 }
769 }
770 if (so->so_oobmark) {
771 if ((flags & MSG_PEEK) == 0) {
772 so->so_oobmark -= len;
773 if (so->so_oobmark == 0) {
774 so->so_state |= SS_RCVATMARK;
775 break;
776 }
777 } else {
778 offset += len;
779 if (offset == so->so_oobmark)
780 break;
781 }
782 }
783 if (flags & MSG_EOR)
784 break;
785 /*
786 * If the MSG_WAITALL flag is set (for non-atomic socket),
787 * we must not quit until "uio->uio_resid == 0" or an error
788 * termination. If a signal/timeout occurs, return
789 * with a short count but without error.
790 * Keep sockbuf locked against other readers.
791 */
792 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
793 !sosendallatonce(so) && !nextrecord) {
794 if (so->so_error || so->so_state & SS_CANTRCVMORE)
795 break;
796 error = sbwait(&so->so_rcv);
797 if (error) {
798 sbunlock(&so->so_rcv);
799 splx(s);
800 return (0);
801 }
802 if ((m = so->so_rcv.sb_mb) != NULL)
803 nextrecord = m->m_nextpkt;
804 }
805 }
806
807 if (m && pr->pr_flags & PR_ATOMIC) {
808 flags |= MSG_TRUNC;
809 if ((flags & MSG_PEEK) == 0)
810 (void) sbdroprecord(&so->so_rcv);
811 }
812 if ((flags & MSG_PEEK) == 0) {
813 if (m == 0)
814 so->so_rcv.sb_mb = nextrecord;
815 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
816 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
817 (struct mbuf *)(long)flags, (struct mbuf *)0,
818 (struct proc *)0);
819 }
820 if (orig_resid == uio->uio_resid && orig_resid &&
821 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
822 sbunlock(&so->so_rcv);
823 splx(s);
824 goto restart;
825 }
826
827 if (flagsp)
828 *flagsp |= flags;
829 release:
830 sbunlock(&so->so_rcv);
831 splx(s);
832 return (error);
833 }
834
835 int
836 soshutdown(struct socket *so, int how)
837 {
838 struct protosw *pr;
839
840 pr = so->so_proto;
841 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
842 return (EINVAL);
843
844 if (how == SHUT_RD || how == SHUT_RDWR)
845 sorflush(so);
846 if (how == SHUT_WR || how == SHUT_RDWR)
847 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
848 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
849 return (0);
850 }
851
852 void
853 sorflush(struct socket *so)
854 {
855 struct sockbuf *sb, asb;
856 struct protosw *pr;
857 int s;
858
859 sb = &so->so_rcv;
860 pr = so->so_proto;
861 sb->sb_flags |= SB_NOINTR;
862 (void) sblock(sb, M_WAITOK);
863 s = splnet();
864 socantrcvmore(so);
865 sbunlock(sb);
866 asb = *sb;
867 memset((caddr_t)sb, 0, sizeof(*sb));
868 splx(s);
869 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
870 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
871 sbrelease(&asb);
872 }
873
874 int
875 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0)
876 {
877 int error;
878 struct mbuf *m;
879
880 error = 0;
881 m = m0;
882 if (level != SOL_SOCKET) {
883 if (so->so_proto && so->so_proto->pr_ctloutput)
884 return ((*so->so_proto->pr_ctloutput)
885 (PRCO_SETOPT, so, level, optname, &m0));
886 error = ENOPROTOOPT;
887 } else {
888 switch (optname) {
889
890 case SO_LINGER:
891 if (m == NULL || m->m_len != sizeof(struct linger)) {
892 error = EINVAL;
893 goto bad;
894 }
895 so->so_linger = mtod(m, struct linger *)->l_linger;
896 /* fall thru... */
897
898 case SO_DEBUG:
899 case SO_KEEPALIVE:
900 case SO_DONTROUTE:
901 case SO_USELOOPBACK:
902 case SO_BROADCAST:
903 case SO_REUSEADDR:
904 case SO_REUSEPORT:
905 case SO_OOBINLINE:
906 case SO_TIMESTAMP:
907 if (m == NULL || m->m_len < sizeof(int)) {
908 error = EINVAL;
909 goto bad;
910 }
911 if (*mtod(m, int *))
912 so->so_options |= optname;
913 else
914 so->so_options &= ~optname;
915 break;
916
917 case SO_SNDBUF:
918 case SO_RCVBUF:
919 case SO_SNDLOWAT:
920 case SO_RCVLOWAT:
921 {
922 int optval;
923
924 if (m == NULL || m->m_len < sizeof(int)) {
925 error = EINVAL;
926 goto bad;
927 }
928
929 /*
930 * Values < 1 make no sense for any of these
931 * options, so disallow them.
932 */
933 optval = *mtod(m, int *);
934 if (optval < 1) {
935 error = EINVAL;
936 goto bad;
937 }
938
939 switch (optname) {
940
941 case SO_SNDBUF:
942 case SO_RCVBUF:
943 if (sbreserve(optname == SO_SNDBUF ?
944 &so->so_snd : &so->so_rcv,
945 (u_long) optval) == 0) {
946 error = ENOBUFS;
947 goto bad;
948 }
949 break;
950
951 /*
952 * Make sure the low-water is never greater than
953 * the high-water.
954 */
955 case SO_SNDLOWAT:
956 so->so_snd.sb_lowat =
957 (optval > so->so_snd.sb_hiwat) ?
958 so->so_snd.sb_hiwat : optval;
959 break;
960 case SO_RCVLOWAT:
961 so->so_rcv.sb_lowat =
962 (optval > so->so_rcv.sb_hiwat) ?
963 so->so_rcv.sb_hiwat : optval;
964 break;
965 }
966 break;
967 }
968
969 case SO_SNDTIMEO:
970 case SO_RCVTIMEO:
971 {
972 struct timeval *tv;
973 short val;
974
975 if (m == NULL || m->m_len < sizeof(*tv)) {
976 error = EINVAL;
977 goto bad;
978 }
979 tv = mtod(m, struct timeval *);
980 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
981 error = EDOM;
982 goto bad;
983 }
984 val = tv->tv_sec * hz + tv->tv_usec / tick;
985
986 switch (optname) {
987
988 case SO_SNDTIMEO:
989 so->so_snd.sb_timeo = val;
990 break;
991 case SO_RCVTIMEO:
992 so->so_rcv.sb_timeo = val;
993 break;
994 }
995 break;
996 }
997
998 default:
999 error = ENOPROTOOPT;
1000 break;
1001 }
1002 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1003 (void) ((*so->so_proto->pr_ctloutput)
1004 (PRCO_SETOPT, so, level, optname, &m0));
1005 m = NULL; /* freed by protocol */
1006 }
1007 }
1008 bad:
1009 if (m)
1010 (void) m_free(m);
1011 return (error);
1012 }
1013
1014 int
1015 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp)
1016 {
1017 struct mbuf *m;
1018
1019 if (level != SOL_SOCKET) {
1020 if (so->so_proto && so->so_proto->pr_ctloutput) {
1021 return ((*so->so_proto->pr_ctloutput)
1022 (PRCO_GETOPT, so, level, optname, mp));
1023 } else
1024 return (ENOPROTOOPT);
1025 } else {
1026 m = m_get(M_WAIT, MT_SOOPTS);
1027 m->m_len = sizeof(int);
1028
1029 switch (optname) {
1030
1031 case SO_LINGER:
1032 m->m_len = sizeof(struct linger);
1033 mtod(m, struct linger *)->l_onoff =
1034 so->so_options & SO_LINGER;
1035 mtod(m, struct linger *)->l_linger = so->so_linger;
1036 break;
1037
1038 case SO_USELOOPBACK:
1039 case SO_DONTROUTE:
1040 case SO_DEBUG:
1041 case SO_KEEPALIVE:
1042 case SO_REUSEADDR:
1043 case SO_REUSEPORT:
1044 case SO_BROADCAST:
1045 case SO_OOBINLINE:
1046 case SO_TIMESTAMP:
1047 *mtod(m, int *) = so->so_options & optname;
1048 break;
1049
1050 case SO_TYPE:
1051 *mtod(m, int *) = so->so_type;
1052 break;
1053
1054 case SO_ERROR:
1055 *mtod(m, int *) = so->so_error;
1056 so->so_error = 0;
1057 break;
1058
1059 case SO_SNDBUF:
1060 *mtod(m, int *) = so->so_snd.sb_hiwat;
1061 break;
1062
1063 case SO_RCVBUF:
1064 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1065 break;
1066
1067 case SO_SNDLOWAT:
1068 *mtod(m, int *) = so->so_snd.sb_lowat;
1069 break;
1070
1071 case SO_RCVLOWAT:
1072 *mtod(m, int *) = so->so_rcv.sb_lowat;
1073 break;
1074
1075 case SO_SNDTIMEO:
1076 case SO_RCVTIMEO:
1077 {
1078 int val = (optname == SO_SNDTIMEO ?
1079 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1080
1081 m->m_len = sizeof(struct timeval);
1082 mtod(m, struct timeval *)->tv_sec = val / hz;
1083 mtod(m, struct timeval *)->tv_usec =
1084 (val % hz) * tick;
1085 break;
1086 }
1087
1088 default:
1089 (void)m_free(m);
1090 return (ENOPROTOOPT);
1091 }
1092 *mp = m;
1093 return (0);
1094 }
1095 }
1096
1097 void
1098 sohasoutofband(struct socket *so)
1099 {
1100 struct proc *p;
1101
1102 if (so->so_pgid < 0)
1103 gsignal(-so->so_pgid, SIGURG);
1104 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1105 psignal(p, SIGURG);
1106 selwakeup(&so->so_rcv.sb_sel);
1107 }
1108