uipc_socket.c revision 1.79 1 /* $NetBSD: uipc_socket.c,v 1.79 2003/04/09 18:38:03 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of Wasabi Systems, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Copyright (c) 1982, 1986, 1988, 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the University of
54 * California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 * may be used to endorse or promote products derived from this software
57 * without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95
72 */
73
74 #include <sys/cdefs.h>
75 __KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.79 2003/04/09 18:38:03 thorpej Exp $");
76
77 #include "opt_sock_counters.h"
78 #include "opt_sosend_loan.h"
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/file.h>
84 #include <sys/malloc.h>
85 #include <sys/mbuf.h>
86 #include <sys/domain.h>
87 #include <sys/kernel.h>
88 #include <sys/protosw.h>
89 #include <sys/socket.h>
90 #include <sys/socketvar.h>
91 #include <sys/signalvar.h>
92 #include <sys/resourcevar.h>
93 #include <sys/pool.h>
94 #include <sys/event.h>
95
96 #include <uvm/uvm.h>
97
98 struct pool socket_pool;
99
100 MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options");
101 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
102
103 extern int somaxconn; /* patchable (XXX sysctl) */
104 int somaxconn = SOMAXCONN;
105
106 #ifdef SOSEND_COUNTERS
107 #include <sys/device.h>
108
109 struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
110 NULL, "sosend", "loan big");
111 struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
112 NULL, "sosend", "copy big");
113 struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
114 NULL, "sosend", "copy small");
115 struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
116 NULL, "sosend", "kva limit");
117
118 #define SOSEND_COUNTER_INCR(ev) (ev)->ev_count++
119
120 #else
121
122 #define SOSEND_COUNTER_INCR(ev) /* nothing */
123
124 #endif /* SOSEND_COUNTERS */
125
126 void
127 soinit(void)
128 {
129
130 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
131 "sockpl", NULL);
132
133 #ifdef SOSEND_COUNTERS
134 evcnt_attach_static(&sosend_loan_big);
135 evcnt_attach_static(&sosend_copy_big);
136 evcnt_attach_static(&sosend_copy_small);
137 evcnt_attach_static(&sosend_kvalimit);
138 #endif /* SOSEND_COUNTERS */
139 }
140
141 #ifdef SOSEND_NO_LOAN
142 int use_sosend_loan = 0;
143 #else
144 int use_sosend_loan = 1;
145 #endif
146
147 struct mbuf *so_pendfree;
148
149 int somaxkva = 16 * 1024 * 1024;
150 int socurkva;
151 int sokvawaiters;
152
153 #define SOCK_LOAN_THRESH 4096
154 #define SOCK_LOAN_CHUNK 65536
155
156 static void
157 sodoloanfree(struct vm_page **pgs, caddr_t buf, size_t size)
158 {
159 vaddr_t va, sva, eva;
160 vsize_t len;
161 paddr_t pa;
162 int i, npgs;
163
164 eva = round_page((vaddr_t) buf + size);
165 sva = trunc_page((vaddr_t) buf);
166 len = eva - sva;
167 npgs = len >> PAGE_SHIFT;
168
169 if (__predict_false(pgs == NULL)) {
170 pgs = alloca(npgs * sizeof(*pgs));
171
172 for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) {
173 if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
174 panic("sodoloanfree: va 0x%lx not mapped", va);
175 pgs[i] = PHYS_TO_VM_PAGE(pa);
176 }
177 }
178
179 pmap_kremove(sva, len);
180 pmap_update(pmap_kernel());
181 uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE);
182 uvm_km_free(kernel_map, sva, len);
183 socurkva -= len;
184 if (sokvawaiters)
185 wakeup(&socurkva);
186 }
187
188 static size_t
189 sodopendfree(struct socket *so)
190 {
191 struct mbuf *m;
192 size_t rv = 0;
193 int s;
194
195 s = splvm();
196
197 for (;;) {
198 m = so_pendfree;
199 if (m == NULL)
200 break;
201 so_pendfree = m->m_next;
202 splx(s);
203
204 rv += m->m_ext.ext_size;
205 sodoloanfree((m->m_flags & M_EXT_PAGES) ?
206 m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf,
207 m->m_ext.ext_size);
208 s = splvm();
209 pool_cache_put(&mbpool_cache, m);
210 }
211
212 for (;;) {
213 m = so->so_pendfree;
214 if (m == NULL)
215 break;
216 so->so_pendfree = m->m_next;
217 splx(s);
218
219 rv += m->m_ext.ext_size;
220 sodoloanfree((m->m_flags & M_EXT_PAGES) ?
221 m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf,
222 m->m_ext.ext_size);
223 s = splvm();
224 pool_cache_put(&mbpool_cache, m);
225 }
226
227 splx(s);
228 return (rv);
229 }
230
231 static void
232 soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg)
233 {
234 struct socket *so = arg;
235 int s;
236
237 if (m == NULL) {
238 sodoloanfree(NULL, buf, size);
239 return;
240 }
241
242 s = splvm();
243 m->m_next = so->so_pendfree;
244 so->so_pendfree = m;
245 splx(s);
246 if (sokvawaiters)
247 wakeup(&socurkva);
248 }
249
250 static long
251 sosend_loan(struct socket *so, struct uio *uio, struct mbuf *m, long space)
252 {
253 struct iovec *iov = uio->uio_iov;
254 vaddr_t sva, eva;
255 vsize_t len;
256 vaddr_t lva, va;
257 int npgs, s, i, error;
258
259 if (uio->uio_segflg != UIO_USERSPACE)
260 return (0);
261
262 if (iov->iov_len < (size_t) space)
263 space = iov->iov_len;
264 if (space > SOCK_LOAN_CHUNK)
265 space = SOCK_LOAN_CHUNK;
266
267 eva = round_page((vaddr_t) iov->iov_base + space);
268 sva = trunc_page((vaddr_t) iov->iov_base);
269 len = eva - sva;
270 npgs = len >> PAGE_SHIFT;
271
272 /* XXX KDASSERT */
273 KASSERT(npgs <= M_EXT_MAXPAGES);
274
275 while (socurkva + len > somaxkva) {
276 if (sodopendfree(so))
277 continue;
278 SOSEND_COUNTER_INCR(&sosend_kvalimit);
279 s = splvm();
280 sokvawaiters++;
281 (void) tsleep(&socurkva, PVM, "sokva", 0);
282 sokvawaiters--;
283 splx(s);
284 }
285
286 lva = uvm_km_valloc_wait(kernel_map, len);
287 if (lva == 0)
288 return (0);
289 socurkva += len;
290
291 error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len,
292 m->m_ext.ext_pgs, UVM_LOAN_TOPAGE);
293 if (error) {
294 uvm_km_free(kernel_map, lva, len);
295 socurkva -= len;
296 return (0);
297 }
298
299 for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE)
300 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]),
301 VM_PROT_READ);
302 pmap_update(pmap_kernel());
303
304 lva += (vaddr_t) iov->iov_base & PAGE_MASK;
305
306 MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so);
307 m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP;
308
309 uio->uio_resid -= space;
310 /* uio_offset not updated, not set/used for write(2) */
311 uio->uio_iov->iov_base = (caddr_t) uio->uio_iov->iov_base + space;
312 uio->uio_iov->iov_len -= space;
313 if (uio->uio_iov->iov_len == 0) {
314 uio->uio_iov++;
315 uio->uio_iovcnt--;
316 }
317
318 return (space);
319 }
320
321 /*
322 * Socket operation routines.
323 * These routines are called by the routines in
324 * sys_socket.c or from a system process, and
325 * implement the semantics of socket operations by
326 * switching out to the protocol specific routines.
327 */
328 /*ARGSUSED*/
329 int
330 socreate(int dom, struct socket **aso, int type, int proto)
331 {
332 struct proc *p;
333 struct protosw *prp;
334 struct socket *so;
335 int error, s;
336
337 p = curproc; /* XXX */
338 if (proto)
339 prp = pffindproto(dom, proto, type);
340 else
341 prp = pffindtype(dom, type);
342 if (prp == 0 || prp->pr_usrreq == 0)
343 return (EPROTONOSUPPORT);
344 if (prp->pr_type != type)
345 return (EPROTOTYPE);
346 s = splsoftnet();
347 so = pool_get(&socket_pool, PR_WAITOK);
348 memset((caddr_t)so, 0, sizeof(*so));
349 TAILQ_INIT(&so->so_q0);
350 TAILQ_INIT(&so->so_q);
351 so->so_type = type;
352 so->so_proto = prp;
353 so->so_send = sosend;
354 so->so_receive = soreceive;
355 #ifdef MBUFTRACE
356 so->so_rcv.sb_mowner = &prp->pr_domain->dom_mowner;
357 so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner;
358 so->so_mowner = &prp->pr_domain->dom_mowner;
359 #endif
360 if (p != 0)
361 so->so_uid = p->p_ucred->cr_uid;
362 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
363 (struct mbuf *)(long)proto, (struct mbuf *)0, p);
364 if (error) {
365 so->so_state |= SS_NOFDREF;
366 sofree(so);
367 splx(s);
368 return (error);
369 }
370 splx(s);
371 *aso = so;
372 return (0);
373 }
374
375 int
376 sobind(struct socket *so, struct mbuf *nam, struct proc *p)
377 {
378 int s, error;
379
380 s = splsoftnet();
381 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0,
382 nam, (struct mbuf *)0, p);
383 splx(s);
384 return (error);
385 }
386
387 int
388 solisten(struct socket *so, int backlog)
389 {
390 int s, error;
391
392 s = splsoftnet();
393 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0,
394 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
395 if (error) {
396 splx(s);
397 return (error);
398 }
399 if (TAILQ_EMPTY(&so->so_q))
400 so->so_options |= SO_ACCEPTCONN;
401 if (backlog < 0)
402 backlog = 0;
403 so->so_qlimit = min(backlog, somaxconn);
404 splx(s);
405 return (0);
406 }
407
408 void
409 sofree(struct socket *so)
410 {
411 struct mbuf *m;
412
413 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
414 return;
415 if (so->so_head) {
416 /*
417 * We must not decommission a socket that's on the accept(2)
418 * queue. If we do, then accept(2) may hang after select(2)
419 * indicated that the listening socket was ready.
420 */
421 if (!soqremque(so, 0))
422 return;
423 }
424 sbrelease(&so->so_snd);
425 sorflush(so);
426 while ((m = so->so_pendfree) != NULL) {
427 so->so_pendfree = m->m_next;
428 m->m_next = so_pendfree;
429 so_pendfree = m;
430 }
431 pool_put(&socket_pool, so);
432 }
433
434 /*
435 * Close a socket on last file table reference removal.
436 * Initiate disconnect if connected.
437 * Free socket when disconnect complete.
438 */
439 int
440 soclose(struct socket *so)
441 {
442 struct socket *so2;
443 int s, error;
444
445 error = 0;
446 s = splsoftnet(); /* conservative */
447 if (so->so_options & SO_ACCEPTCONN) {
448 while ((so2 = TAILQ_FIRST(&so->so_q0)) != 0) {
449 (void) soqremque(so2, 0);
450 (void) soabort(so2);
451 }
452 while ((so2 = TAILQ_FIRST(&so->so_q)) != 0) {
453 (void) soqremque(so2, 1);
454 (void) soabort(so2);
455 }
456 }
457 if (so->so_pcb == 0)
458 goto discard;
459 if (so->so_state & SS_ISCONNECTED) {
460 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
461 error = sodisconnect(so);
462 if (error)
463 goto drop;
464 }
465 if (so->so_options & SO_LINGER) {
466 if ((so->so_state & SS_ISDISCONNECTING) &&
467 (so->so_state & SS_NBIO))
468 goto drop;
469 while (so->so_state & SS_ISCONNECTED) {
470 error = tsleep((caddr_t)&so->so_timeo,
471 PSOCK | PCATCH, netcls,
472 so->so_linger * hz);
473 if (error)
474 break;
475 }
476 }
477 }
478 drop:
479 if (so->so_pcb) {
480 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
481 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
482 (struct proc *)0);
483 if (error == 0)
484 error = error2;
485 }
486 discard:
487 if (so->so_state & SS_NOFDREF)
488 panic("soclose: NOFDREF");
489 so->so_state |= SS_NOFDREF;
490 sofree(so);
491 splx(s);
492 return (error);
493 }
494
495 /*
496 * Must be called at splsoftnet...
497 */
498 int
499 soabort(struct socket *so)
500 {
501
502 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0,
503 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
504 }
505
506 int
507 soaccept(struct socket *so, struct mbuf *nam)
508 {
509 int s, error;
510
511 error = 0;
512 s = splsoftnet();
513 if ((so->so_state & SS_NOFDREF) == 0)
514 panic("soaccept: !NOFDREF");
515 so->so_state &= ~SS_NOFDREF;
516 if ((so->so_state & SS_ISDISCONNECTED) == 0 ||
517 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0)
518 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
519 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0);
520 else
521 error = ECONNABORTED;
522
523 splx(s);
524 return (error);
525 }
526
527 int
528 soconnect(struct socket *so, struct mbuf *nam)
529 {
530 struct proc *p;
531 int s, error;
532
533 p = curproc; /* XXX */
534 if (so->so_options & SO_ACCEPTCONN)
535 return (EOPNOTSUPP);
536 s = splsoftnet();
537 /*
538 * If protocol is connection-based, can only connect once.
539 * Otherwise, if connected, try to disconnect first.
540 * This allows user to disconnect by connecting to, e.g.,
541 * a null address.
542 */
543 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
544 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
545 (error = sodisconnect(so))))
546 error = EISCONN;
547 else
548 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
549 (struct mbuf *)0, nam, (struct mbuf *)0, p);
550 splx(s);
551 return (error);
552 }
553
554 int
555 soconnect2(struct socket *so1, struct socket *so2)
556 {
557 int s, error;
558
559 s = splsoftnet();
560 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
561 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0,
562 (struct proc *)0);
563 splx(s);
564 return (error);
565 }
566
567 int
568 sodisconnect(struct socket *so)
569 {
570 int s, error;
571
572 s = splsoftnet();
573 if ((so->so_state & SS_ISCONNECTED) == 0) {
574 error = ENOTCONN;
575 goto bad;
576 }
577 if (so->so_state & SS_ISDISCONNECTING) {
578 error = EALREADY;
579 goto bad;
580 }
581 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
582 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
583 (struct proc *)0);
584 bad:
585 splx(s);
586 sodopendfree(so);
587 return (error);
588 }
589
590 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
591 /*
592 * Send on a socket.
593 * If send must go all at once and message is larger than
594 * send buffering, then hard error.
595 * Lock against other senders.
596 * If must go all at once and not enough room now, then
597 * inform user that this would block and do nothing.
598 * Otherwise, if nonblocking, send as much as possible.
599 * The data to be sent is described by "uio" if nonzero,
600 * otherwise by the mbuf chain "top" (which must be null
601 * if uio is not). Data provided in mbuf chain must be small
602 * enough to send all at once.
603 *
604 * Returns nonzero on error, timeout or signal; callers
605 * must check for short counts if EINTR/ERESTART are returned.
606 * Data and control buffers are freed on return.
607 */
608 int
609 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top,
610 struct mbuf *control, int flags)
611 {
612 struct proc *p;
613 struct mbuf **mp, *m;
614 long space, len, resid, clen, mlen;
615 int error, s, dontroute, atomic;
616
617 sodopendfree(so);
618
619 p = curproc; /* XXX */
620 clen = 0;
621 atomic = sosendallatonce(so) || top;
622 if (uio)
623 resid = uio->uio_resid;
624 else
625 resid = top->m_pkthdr.len;
626 /*
627 * In theory resid should be unsigned.
628 * However, space must be signed, as it might be less than 0
629 * if we over-committed, and we must use a signed comparison
630 * of space and resid. On the other hand, a negative resid
631 * causes us to loop sending 0-length segments to the protocol.
632 */
633 if (resid < 0) {
634 error = EINVAL;
635 goto out;
636 }
637 dontroute =
638 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
639 (so->so_proto->pr_flags & PR_ATOMIC);
640 p->p_stats->p_ru.ru_msgsnd++;
641 if (control)
642 clen = control->m_len;
643 #define snderr(errno) { error = errno; splx(s); goto release; }
644
645 restart:
646 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
647 goto out;
648 do {
649 s = splsoftnet();
650 if (so->so_state & SS_CANTSENDMORE)
651 snderr(EPIPE);
652 if (so->so_error) {
653 error = so->so_error;
654 so->so_error = 0;
655 splx(s);
656 goto release;
657 }
658 if ((so->so_state & SS_ISCONNECTED) == 0) {
659 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
660 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
661 !(resid == 0 && clen != 0))
662 snderr(ENOTCONN);
663 } else if (addr == 0)
664 snderr(EDESTADDRREQ);
665 }
666 space = sbspace(&so->so_snd);
667 if (flags & MSG_OOB)
668 space += 1024;
669 if ((atomic && resid > so->so_snd.sb_hiwat) ||
670 clen > so->so_snd.sb_hiwat)
671 snderr(EMSGSIZE);
672 if (space < resid + clen && uio &&
673 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
674 if (so->so_state & SS_NBIO)
675 snderr(EWOULDBLOCK);
676 sbunlock(&so->so_snd);
677 error = sbwait(&so->so_snd);
678 splx(s);
679 if (error)
680 goto out;
681 goto restart;
682 }
683 splx(s);
684 mp = ⊤
685 space -= clen;
686 do {
687 if (uio == NULL) {
688 /*
689 * Data is prepackaged in "top".
690 */
691 resid = 0;
692 if (flags & MSG_EOR)
693 top->m_flags |= M_EOR;
694 } else do {
695 if (top == 0) {
696 m = m_gethdr(M_WAIT, MT_DATA);
697 mlen = MHLEN;
698 m->m_pkthdr.len = 0;
699 m->m_pkthdr.rcvif = (struct ifnet *)0;
700 } else {
701 m = m_get(M_WAIT, MT_DATA);
702 mlen = MLEN;
703 }
704 MCLAIM(m, so->so_snd.sb_mowner);
705 if (use_sosend_loan &&
706 uio->uio_iov->iov_len >= SOCK_LOAN_THRESH &&
707 space >= SOCK_LOAN_THRESH &&
708 (len = sosend_loan(so, uio, m,
709 space)) != 0) {
710 SOSEND_COUNTER_INCR(&sosend_loan_big);
711 space -= len;
712 goto have_data;
713 }
714 if (resid >= MINCLSIZE && space >= MCLBYTES) {
715 SOSEND_COUNTER_INCR(&sosend_copy_big);
716 m_clget(m, M_WAIT);
717 if ((m->m_flags & M_EXT) == 0)
718 goto nopages;
719 mlen = MCLBYTES;
720 if (atomic && top == 0) {
721 len = lmin(MCLBYTES - max_hdr,
722 resid);
723 m->m_data += max_hdr;
724 } else
725 len = lmin(MCLBYTES, resid);
726 space -= len;
727 } else {
728 nopages:
729 SOSEND_COUNTER_INCR(&sosend_copy_small);
730 len = lmin(lmin(mlen, resid), space);
731 space -= len;
732 /*
733 * For datagram protocols, leave room
734 * for protocol headers in first mbuf.
735 */
736 if (atomic && top == 0 && len < mlen)
737 MH_ALIGN(m, len);
738 }
739 error = uiomove(mtod(m, caddr_t), (int)len,
740 uio);
741 have_data:
742 resid = uio->uio_resid;
743 m->m_len = len;
744 *mp = m;
745 top->m_pkthdr.len += len;
746 if (error)
747 goto release;
748 mp = &m->m_next;
749 if (resid <= 0) {
750 if (flags & MSG_EOR)
751 top->m_flags |= M_EOR;
752 break;
753 }
754 } while (space > 0 && atomic);
755
756 s = splsoftnet();
757
758 if (so->so_state & SS_CANTSENDMORE)
759 snderr(EPIPE);
760
761 if (dontroute)
762 so->so_options |= SO_DONTROUTE;
763 if (resid > 0)
764 so->so_state |= SS_MORETOCOME;
765 error = (*so->so_proto->pr_usrreq)(so,
766 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
767 top, addr, control, p);
768 if (dontroute)
769 so->so_options &= ~SO_DONTROUTE;
770 if (resid > 0)
771 so->so_state &= ~SS_MORETOCOME;
772 splx(s);
773
774 clen = 0;
775 control = 0;
776 top = 0;
777 mp = ⊤
778 if (error)
779 goto release;
780 } while (resid && space > 0);
781 } while (resid);
782
783 release:
784 sbunlock(&so->so_snd);
785 out:
786 if (top)
787 m_freem(top);
788 if (control)
789 m_freem(control);
790 return (error);
791 }
792
793 /*
794 * Implement receive operations on a socket.
795 * We depend on the way that records are added to the sockbuf
796 * by sbappend*. In particular, each record (mbufs linked through m_next)
797 * must begin with an address if the protocol so specifies,
798 * followed by an optional mbuf or mbufs containing ancillary data,
799 * and then zero or more mbufs of data.
800 * In order to avoid blocking network interrupts for the entire time here,
801 * we splx() while doing the actual copy to user space.
802 * Although the sockbuf is locked, new data may still be appended,
803 * and thus we must maintain consistency of the sockbuf during that time.
804 *
805 * The caller may receive the data as a single mbuf chain by supplying
806 * an mbuf **mp0 for use in returning the chain. The uio is then used
807 * only for the count in uio_resid.
808 */
809 int
810 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio,
811 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
812 {
813 struct mbuf *m, **mp;
814 int flags, len, error, s, offset, moff, type, orig_resid;
815 struct protosw *pr;
816 struct mbuf *nextrecord;
817 int mbuf_removed = 0;
818
819 pr = so->so_proto;
820 mp = mp0;
821 type = 0;
822 orig_resid = uio->uio_resid;
823 if (paddr)
824 *paddr = 0;
825 if (controlp)
826 *controlp = 0;
827 if (flagsp)
828 flags = *flagsp &~ MSG_EOR;
829 else
830 flags = 0;
831
832 if ((flags & MSG_DONTWAIT) == 0)
833 sodopendfree(so);
834
835 if (flags & MSG_OOB) {
836 m = m_get(M_WAIT, MT_DATA);
837 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
838 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
839 (struct proc *)0);
840 if (error)
841 goto bad;
842 do {
843 error = uiomove(mtod(m, caddr_t),
844 (int) min(uio->uio_resid, m->m_len), uio);
845 m = m_free(m);
846 } while (uio->uio_resid && error == 0 && m);
847 bad:
848 if (m)
849 m_freem(m);
850 return (error);
851 }
852 if (mp)
853 *mp = (struct mbuf *)0;
854 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
855 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
856 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
857
858 restart:
859 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
860 return (error);
861 s = splsoftnet();
862
863 m = so->so_rcv.sb_mb;
864 /*
865 * If we have less data than requested, block awaiting more
866 * (subject to any timeout) if:
867 * 1. the current count is less than the low water mark,
868 * 2. MSG_WAITALL is set, and it is possible to do the entire
869 * receive operation at once if we block (resid <= hiwat), or
870 * 3. MSG_DONTWAIT is not set.
871 * If MSG_WAITALL is set but resid is larger than the receive buffer,
872 * we have to do the receive in sections, and thus risk returning
873 * a short count if a timeout or signal occurs after we start.
874 */
875 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
876 so->so_rcv.sb_cc < uio->uio_resid) &&
877 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
878 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
879 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
880 #ifdef DIAGNOSTIC
881 if (m == 0 && so->so_rcv.sb_cc)
882 panic("receive 1");
883 #endif
884 if (so->so_error) {
885 if (m)
886 goto dontblock;
887 error = so->so_error;
888 if ((flags & MSG_PEEK) == 0)
889 so->so_error = 0;
890 goto release;
891 }
892 if (so->so_state & SS_CANTRCVMORE) {
893 if (m)
894 goto dontblock;
895 else
896 goto release;
897 }
898 for (; m; m = m->m_next)
899 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
900 m = so->so_rcv.sb_mb;
901 goto dontblock;
902 }
903 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
904 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
905 error = ENOTCONN;
906 goto release;
907 }
908 if (uio->uio_resid == 0)
909 goto release;
910 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
911 error = EWOULDBLOCK;
912 goto release;
913 }
914 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
915 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
916 sbunlock(&so->so_rcv);
917 error = sbwait(&so->so_rcv);
918 splx(s);
919 if (error)
920 return (error);
921 goto restart;
922 }
923 dontblock:
924 /*
925 * On entry here, m points to the first record of the socket buffer.
926 * While we process the initial mbufs containing address and control
927 * info, we save a copy of m->m_nextpkt into nextrecord.
928 */
929 #ifdef notyet /* XXXX */
930 if (uio->uio_procp)
931 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
932 #endif
933 KASSERT(m == so->so_rcv.sb_mb);
934 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
935 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
936 nextrecord = m->m_nextpkt;
937 if (pr->pr_flags & PR_ADDR) {
938 #ifdef DIAGNOSTIC
939 if (m->m_type != MT_SONAME)
940 panic("receive 1a");
941 #endif
942 orig_resid = 0;
943 if (flags & MSG_PEEK) {
944 if (paddr)
945 *paddr = m_copy(m, 0, m->m_len);
946 m = m->m_next;
947 } else {
948 sbfree(&so->so_rcv, m);
949 mbuf_removed = 1;
950 if (paddr) {
951 *paddr = m;
952 so->so_rcv.sb_mb = m->m_next;
953 m->m_next = 0;
954 m = so->so_rcv.sb_mb;
955 } else {
956 MFREE(m, so->so_rcv.sb_mb);
957 m = so->so_rcv.sb_mb;
958 }
959 }
960 }
961 while (m && m->m_type == MT_CONTROL && error == 0) {
962 if (flags & MSG_PEEK) {
963 if (controlp)
964 *controlp = m_copy(m, 0, m->m_len);
965 m = m->m_next;
966 } else {
967 sbfree(&so->so_rcv, m);
968 mbuf_removed = 1;
969 if (controlp) {
970 if (pr->pr_domain->dom_externalize &&
971 mtod(m, struct cmsghdr *)->cmsg_type ==
972 SCM_RIGHTS)
973 error = (*pr->pr_domain->dom_externalize)(m);
974 *controlp = m;
975 so->so_rcv.sb_mb = m->m_next;
976 m->m_next = 0;
977 m = so->so_rcv.sb_mb;
978 } else {
979 MFREE(m, so->so_rcv.sb_mb);
980 m = so->so_rcv.sb_mb;
981 }
982 }
983 if (controlp) {
984 orig_resid = 0;
985 controlp = &(*controlp)->m_next;
986 }
987 }
988
989 /*
990 * If m is non-NULL, we have some data to read. From now on,
991 * make sure to keep sb_lastrecord consistent when working on
992 * the last packet on the chain (nextrecord == NULL) and we
993 * change m->m_nextpkt.
994 */
995 if (m) {
996 if ((flags & MSG_PEEK) == 0) {
997 m->m_nextpkt = nextrecord;
998 /*
999 * If nextrecord == NULL (this is a single chain),
1000 * then sb_lastrecord may not be valid here if m
1001 * was changed earlier.
1002 */
1003 if (nextrecord == NULL) {
1004 KASSERT(so->so_rcv.sb_mb == m);
1005 so->so_rcv.sb_lastrecord = m;
1006 }
1007 }
1008 type = m->m_type;
1009 if (type == MT_OOBDATA)
1010 flags |= MSG_OOB;
1011 } else {
1012 if ((flags & MSG_PEEK) == 0) {
1013 KASSERT(so->so_rcv.sb_mb == m);
1014 so->so_rcv.sb_mb = nextrecord;
1015 SB_EMPTY_FIXUP(&so->so_rcv);
1016 }
1017 }
1018 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
1019 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
1020
1021 moff = 0;
1022 offset = 0;
1023 while (m && uio->uio_resid > 0 && error == 0) {
1024 if (m->m_type == MT_OOBDATA) {
1025 if (type != MT_OOBDATA)
1026 break;
1027 } else if (type == MT_OOBDATA)
1028 break;
1029 #ifdef DIAGNOSTIC
1030 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
1031 panic("receive 3");
1032 #endif
1033 so->so_state &= ~SS_RCVATMARK;
1034 len = uio->uio_resid;
1035 if (so->so_oobmark && len > so->so_oobmark - offset)
1036 len = so->so_oobmark - offset;
1037 if (len > m->m_len - moff)
1038 len = m->m_len - moff;
1039 /*
1040 * If mp is set, just pass back the mbufs.
1041 * Otherwise copy them out via the uio, then free.
1042 * Sockbuf must be consistent here (points to current mbuf,
1043 * it points to next record) when we drop priority;
1044 * we must note any additions to the sockbuf when we
1045 * block interrupts again.
1046 */
1047 if (mp == 0) {
1048 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
1049 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
1050 splx(s);
1051 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
1052 s = splsoftnet();
1053 if (error) {
1054 /*
1055 * If any part of the record has been removed
1056 * (such as the MT_SONAME mbuf, which will
1057 * happen when PR_ADDR, and thus also
1058 * PR_ATOMIC, is set), then drop the entire
1059 * record to maintain the atomicity of the
1060 * receive operation.
1061 *
1062 * This avoids a later panic("receive 1a")
1063 * when compiled with DIAGNOSTIC.
1064 */
1065 if (m && mbuf_removed
1066 && (pr->pr_flags & PR_ATOMIC))
1067 (void) sbdroprecord(&so->so_rcv);
1068
1069 goto release;
1070 }
1071 } else
1072 uio->uio_resid -= len;
1073 if (len == m->m_len - moff) {
1074 if (m->m_flags & M_EOR)
1075 flags |= MSG_EOR;
1076 if (flags & MSG_PEEK) {
1077 m = m->m_next;
1078 moff = 0;
1079 } else {
1080 nextrecord = m->m_nextpkt;
1081 sbfree(&so->so_rcv, m);
1082 if (mp) {
1083 *mp = m;
1084 mp = &m->m_next;
1085 so->so_rcv.sb_mb = m = m->m_next;
1086 *mp = (struct mbuf *)0;
1087 } else {
1088 MFREE(m, so->so_rcv.sb_mb);
1089 m = so->so_rcv.sb_mb;
1090 }
1091 /*
1092 * If m != NULL, we also know that
1093 * so->so_rcv.sb_mb != NULL.
1094 */
1095 KASSERT(so->so_rcv.sb_mb == m);
1096 if (m) {
1097 m->m_nextpkt = nextrecord;
1098 if (nextrecord == NULL)
1099 so->so_rcv.sb_lastrecord = m;
1100 } else {
1101 so->so_rcv.sb_mb = nextrecord;
1102 SB_EMPTY_FIXUP(&so->so_rcv);
1103 }
1104 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
1105 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
1106 }
1107 } else {
1108 if (flags & MSG_PEEK)
1109 moff += len;
1110 else {
1111 if (mp)
1112 *mp = m_copym(m, 0, len, M_WAIT);
1113 m->m_data += len;
1114 m->m_len -= len;
1115 so->so_rcv.sb_cc -= len;
1116 }
1117 }
1118 if (so->so_oobmark) {
1119 if ((flags & MSG_PEEK) == 0) {
1120 so->so_oobmark -= len;
1121 if (so->so_oobmark == 0) {
1122 so->so_state |= SS_RCVATMARK;
1123 break;
1124 }
1125 } else {
1126 offset += len;
1127 if (offset == so->so_oobmark)
1128 break;
1129 }
1130 }
1131 if (flags & MSG_EOR)
1132 break;
1133 /*
1134 * If the MSG_WAITALL flag is set (for non-atomic socket),
1135 * we must not quit until "uio->uio_resid == 0" or an error
1136 * termination. If a signal/timeout occurs, return
1137 * with a short count but without error.
1138 * Keep sockbuf locked against other readers.
1139 */
1140 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
1141 !sosendallatonce(so) && !nextrecord) {
1142 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1143 break;
1144 /*
1145 * If we are peeking and the socket receive buffer is
1146 * full, stop since we can't get more data to peek at.
1147 */
1148 if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0)
1149 break;
1150 /*
1151 * If we've drained the socket buffer, tell the
1152 * protocol in case it needs to do something to
1153 * get it filled again.
1154 */
1155 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb)
1156 (*pr->pr_usrreq)(so, PRU_RCVD,
1157 (struct mbuf *)0,
1158 (struct mbuf *)(long)flags,
1159 (struct mbuf *)0,
1160 (struct proc *)0);
1161 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
1162 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
1163 error = sbwait(&so->so_rcv);
1164 if (error) {
1165 sbunlock(&so->so_rcv);
1166 splx(s);
1167 return (0);
1168 }
1169 if ((m = so->so_rcv.sb_mb) != NULL)
1170 nextrecord = m->m_nextpkt;
1171 }
1172 }
1173
1174 if (m && pr->pr_flags & PR_ATOMIC) {
1175 flags |= MSG_TRUNC;
1176 if ((flags & MSG_PEEK) == 0)
1177 (void) sbdroprecord(&so->so_rcv);
1178 }
1179 if ((flags & MSG_PEEK) == 0) {
1180 if (m == 0) {
1181 /*
1182 * First part is an inline SB_EMPTY_FIXUP(). Second
1183 * part makes sure sb_lastrecord is up-to-date if
1184 * there is still data in the socket buffer.
1185 */
1186 so->so_rcv.sb_mb = nextrecord;
1187 if (so->so_rcv.sb_mb == NULL) {
1188 so->so_rcv.sb_mbtail = NULL;
1189 so->so_rcv.sb_lastrecord = NULL;
1190 } else if (nextrecord->m_nextpkt == NULL)
1191 so->so_rcv.sb_lastrecord = nextrecord;
1192 }
1193 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
1194 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
1195 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
1196 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
1197 (struct mbuf *)(long)flags, (struct mbuf *)0,
1198 (struct proc *)0);
1199 }
1200 if (orig_resid == uio->uio_resid && orig_resid &&
1201 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
1202 sbunlock(&so->so_rcv);
1203 splx(s);
1204 goto restart;
1205 }
1206
1207 if (flagsp)
1208 *flagsp |= flags;
1209 release:
1210 sbunlock(&so->so_rcv);
1211 splx(s);
1212 return (error);
1213 }
1214
1215 int
1216 soshutdown(struct socket *so, int how)
1217 {
1218 struct protosw *pr;
1219
1220 pr = so->so_proto;
1221 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
1222 return (EINVAL);
1223
1224 if (how == SHUT_RD || how == SHUT_RDWR)
1225 sorflush(so);
1226 if (how == SHUT_WR || how == SHUT_RDWR)
1227 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0,
1228 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
1229 return (0);
1230 }
1231
1232 void
1233 sorflush(struct socket *so)
1234 {
1235 struct sockbuf *sb, asb;
1236 struct protosw *pr;
1237 int s;
1238
1239 sb = &so->so_rcv;
1240 pr = so->so_proto;
1241 sb->sb_flags |= SB_NOINTR;
1242 (void) sblock(sb, M_WAITOK);
1243 s = splnet();
1244 socantrcvmore(so);
1245 sbunlock(sb);
1246 asb = *sb;
1247 memset((caddr_t)sb, 0, sizeof(*sb));
1248 splx(s);
1249 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
1250 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
1251 sbrelease(&asb);
1252 }
1253
1254 int
1255 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0)
1256 {
1257 int error;
1258 struct mbuf *m;
1259
1260 error = 0;
1261 m = m0;
1262 if (level != SOL_SOCKET) {
1263 if (so->so_proto && so->so_proto->pr_ctloutput)
1264 return ((*so->so_proto->pr_ctloutput)
1265 (PRCO_SETOPT, so, level, optname, &m0));
1266 error = ENOPROTOOPT;
1267 } else {
1268 switch (optname) {
1269
1270 case SO_LINGER:
1271 if (m == NULL || m->m_len != sizeof(struct linger)) {
1272 error = EINVAL;
1273 goto bad;
1274 }
1275 so->so_linger = mtod(m, struct linger *)->l_linger;
1276 /* fall thru... */
1277
1278 case SO_DEBUG:
1279 case SO_KEEPALIVE:
1280 case SO_DONTROUTE:
1281 case SO_USELOOPBACK:
1282 case SO_BROADCAST:
1283 case SO_REUSEADDR:
1284 case SO_REUSEPORT:
1285 case SO_OOBINLINE:
1286 case SO_TIMESTAMP:
1287 if (m == NULL || m->m_len < sizeof(int)) {
1288 error = EINVAL;
1289 goto bad;
1290 }
1291 if (*mtod(m, int *))
1292 so->so_options |= optname;
1293 else
1294 so->so_options &= ~optname;
1295 break;
1296
1297 case SO_SNDBUF:
1298 case SO_RCVBUF:
1299 case SO_SNDLOWAT:
1300 case SO_RCVLOWAT:
1301 {
1302 int optval;
1303
1304 if (m == NULL || m->m_len < sizeof(int)) {
1305 error = EINVAL;
1306 goto bad;
1307 }
1308
1309 /*
1310 * Values < 1 make no sense for any of these
1311 * options, so disallow them.
1312 */
1313 optval = *mtod(m, int *);
1314 if (optval < 1) {
1315 error = EINVAL;
1316 goto bad;
1317 }
1318
1319 switch (optname) {
1320
1321 case SO_SNDBUF:
1322 case SO_RCVBUF:
1323 if (sbreserve(optname == SO_SNDBUF ?
1324 &so->so_snd : &so->so_rcv,
1325 (u_long) optval) == 0) {
1326 error = ENOBUFS;
1327 goto bad;
1328 }
1329 break;
1330
1331 /*
1332 * Make sure the low-water is never greater than
1333 * the high-water.
1334 */
1335 case SO_SNDLOWAT:
1336 so->so_snd.sb_lowat =
1337 (optval > so->so_snd.sb_hiwat) ?
1338 so->so_snd.sb_hiwat : optval;
1339 break;
1340 case SO_RCVLOWAT:
1341 so->so_rcv.sb_lowat =
1342 (optval > so->so_rcv.sb_hiwat) ?
1343 so->so_rcv.sb_hiwat : optval;
1344 break;
1345 }
1346 break;
1347 }
1348
1349 case SO_SNDTIMEO:
1350 case SO_RCVTIMEO:
1351 {
1352 struct timeval *tv;
1353 short val;
1354
1355 if (m == NULL || m->m_len < sizeof(*tv)) {
1356 error = EINVAL;
1357 goto bad;
1358 }
1359 tv = mtod(m, struct timeval *);
1360 if (tv->tv_sec > (SHRT_MAX - tv->tv_usec / tick) / hz) {
1361 error = EDOM;
1362 goto bad;
1363 }
1364 val = tv->tv_sec * hz + tv->tv_usec / tick;
1365 if (val == 0 && tv->tv_usec != 0)
1366 val = 1;
1367
1368 switch (optname) {
1369
1370 case SO_SNDTIMEO:
1371 so->so_snd.sb_timeo = val;
1372 break;
1373 case SO_RCVTIMEO:
1374 so->so_rcv.sb_timeo = val;
1375 break;
1376 }
1377 break;
1378 }
1379
1380 default:
1381 error = ENOPROTOOPT;
1382 break;
1383 }
1384 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1385 (void) ((*so->so_proto->pr_ctloutput)
1386 (PRCO_SETOPT, so, level, optname, &m0));
1387 m = NULL; /* freed by protocol */
1388 }
1389 }
1390 bad:
1391 if (m)
1392 (void) m_free(m);
1393 return (error);
1394 }
1395
1396 int
1397 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp)
1398 {
1399 struct mbuf *m;
1400
1401 if (level != SOL_SOCKET) {
1402 if (so->so_proto && so->so_proto->pr_ctloutput) {
1403 return ((*so->so_proto->pr_ctloutput)
1404 (PRCO_GETOPT, so, level, optname, mp));
1405 } else
1406 return (ENOPROTOOPT);
1407 } else {
1408 m = m_get(M_WAIT, MT_SOOPTS);
1409 m->m_len = sizeof(int);
1410
1411 switch (optname) {
1412
1413 case SO_LINGER:
1414 m->m_len = sizeof(struct linger);
1415 mtod(m, struct linger *)->l_onoff =
1416 so->so_options & SO_LINGER;
1417 mtod(m, struct linger *)->l_linger = so->so_linger;
1418 break;
1419
1420 case SO_USELOOPBACK:
1421 case SO_DONTROUTE:
1422 case SO_DEBUG:
1423 case SO_KEEPALIVE:
1424 case SO_REUSEADDR:
1425 case SO_REUSEPORT:
1426 case SO_BROADCAST:
1427 case SO_OOBINLINE:
1428 case SO_TIMESTAMP:
1429 *mtod(m, int *) = so->so_options & optname;
1430 break;
1431
1432 case SO_TYPE:
1433 *mtod(m, int *) = so->so_type;
1434 break;
1435
1436 case SO_ERROR:
1437 *mtod(m, int *) = so->so_error;
1438 so->so_error = 0;
1439 break;
1440
1441 case SO_SNDBUF:
1442 *mtod(m, int *) = so->so_snd.sb_hiwat;
1443 break;
1444
1445 case SO_RCVBUF:
1446 *mtod(m, int *) = so->so_rcv.sb_hiwat;
1447 break;
1448
1449 case SO_SNDLOWAT:
1450 *mtod(m, int *) = so->so_snd.sb_lowat;
1451 break;
1452
1453 case SO_RCVLOWAT:
1454 *mtod(m, int *) = so->so_rcv.sb_lowat;
1455 break;
1456
1457 case SO_SNDTIMEO:
1458 case SO_RCVTIMEO:
1459 {
1460 int val = (optname == SO_SNDTIMEO ?
1461 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1462
1463 m->m_len = sizeof(struct timeval);
1464 mtod(m, struct timeval *)->tv_sec = val / hz;
1465 mtod(m, struct timeval *)->tv_usec =
1466 (val % hz) * tick;
1467 break;
1468 }
1469
1470 default:
1471 (void)m_free(m);
1472 return (ENOPROTOOPT);
1473 }
1474 *mp = m;
1475 return (0);
1476 }
1477 }
1478
1479 void
1480 sohasoutofband(struct socket *so)
1481 {
1482 struct proc *p;
1483
1484 if (so->so_pgid < 0)
1485 gsignal(-so->so_pgid, SIGURG);
1486 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1487 psignal(p, SIGURG);
1488 selwakeup(&so->so_rcv.sb_sel);
1489 }
1490
1491 static void
1492 filt_sordetach(struct knote *kn)
1493 {
1494 struct socket *so;
1495
1496 so = (struct socket *)kn->kn_fp->f_data;
1497 SLIST_REMOVE(&so->so_rcv.sb_sel.sel_klist, kn, knote, kn_selnext);
1498 if (SLIST_EMPTY(&so->so_rcv.sb_sel.sel_klist))
1499 so->so_rcv.sb_flags &= ~SB_KNOTE;
1500 }
1501
1502 /*ARGSUSED*/
1503 static int
1504 filt_soread(struct knote *kn, long hint)
1505 {
1506 struct socket *so;
1507
1508 so = (struct socket *)kn->kn_fp->f_data;
1509 kn->kn_data = so->so_rcv.sb_cc;
1510 if (so->so_state & SS_CANTRCVMORE) {
1511 kn->kn_flags |= EV_EOF;
1512 kn->kn_fflags = so->so_error;
1513 return (1);
1514 }
1515 if (so->so_error) /* temporary udp error */
1516 return (1);
1517 if (kn->kn_sfflags & NOTE_LOWAT)
1518 return (kn->kn_data >= kn->kn_sdata);
1519 return (kn->kn_data >= so->so_rcv.sb_lowat);
1520 }
1521
1522 static void
1523 filt_sowdetach(struct knote *kn)
1524 {
1525 struct socket *so;
1526
1527 so = (struct socket *)kn->kn_fp->f_data;
1528 SLIST_REMOVE(&so->so_snd.sb_sel.sel_klist, kn, knote, kn_selnext);
1529 if (SLIST_EMPTY(&so->so_snd.sb_sel.sel_klist))
1530 so->so_snd.sb_flags &= ~SB_KNOTE;
1531 }
1532
1533 /*ARGSUSED*/
1534 static int
1535 filt_sowrite(struct knote *kn, long hint)
1536 {
1537 struct socket *so;
1538
1539 so = (struct socket *)kn->kn_fp->f_data;
1540 kn->kn_data = sbspace(&so->so_snd);
1541 if (so->so_state & SS_CANTSENDMORE) {
1542 kn->kn_flags |= EV_EOF;
1543 kn->kn_fflags = so->so_error;
1544 return (1);
1545 }
1546 if (so->so_error) /* temporary udp error */
1547 return (1);
1548 if (((so->so_state & SS_ISCONNECTED) == 0) &&
1549 (so->so_proto->pr_flags & PR_CONNREQUIRED))
1550 return (0);
1551 if (kn->kn_sfflags & NOTE_LOWAT)
1552 return (kn->kn_data >= kn->kn_sdata);
1553 return (kn->kn_data >= so->so_snd.sb_lowat);
1554 }
1555
1556 /*ARGSUSED*/
1557 static int
1558 filt_solisten(struct knote *kn, long hint)
1559 {
1560 struct socket *so;
1561
1562 so = (struct socket *)kn->kn_fp->f_data;
1563
1564 /*
1565 * Set kn_data to number of incoming connections, not
1566 * counting partial (incomplete) connections.
1567 */
1568 kn->kn_data = so->so_qlen;
1569 return (kn->kn_data > 0);
1570 }
1571
1572 static const struct filterops solisten_filtops =
1573 { 1, NULL, filt_sordetach, filt_solisten };
1574 static const struct filterops soread_filtops =
1575 { 1, NULL, filt_sordetach, filt_soread };
1576 static const struct filterops sowrite_filtops =
1577 { 1, NULL, filt_sowdetach, filt_sowrite };
1578
1579 int
1580 soo_kqfilter(struct file *fp, struct knote *kn)
1581 {
1582 struct socket *so;
1583 struct sockbuf *sb;
1584
1585 so = (struct socket *)kn->kn_fp->f_data;
1586 switch (kn->kn_filter) {
1587 case EVFILT_READ:
1588 if (so->so_options & SO_ACCEPTCONN)
1589 kn->kn_fop = &solisten_filtops;
1590 else
1591 kn->kn_fop = &soread_filtops;
1592 sb = &so->so_rcv;
1593 break;
1594 case EVFILT_WRITE:
1595 kn->kn_fop = &sowrite_filtops;
1596 sb = &so->so_snd;
1597 break;
1598 default:
1599 return (1);
1600 }
1601 SLIST_INSERT_HEAD(&sb->sb_sel.sel_klist, kn, kn_selnext);
1602 sb->sb_flags |= SB_KNOTE;
1603 return (0);
1604 }
1605
1606