1 1.314 kre /* $NetBSD: uipc_socket.c,v 1.314 2025/07/16 19:14:13 kre Exp $ */ 2 1.64 thorpej 3 1.270 maxv /* 4 1.304 ad * Copyright (c) 2002, 2007, 2008, 2009, 2023 The NetBSD Foundation, Inc. 5 1.64 thorpej * All rights reserved. 6 1.64 thorpej * 7 1.64 thorpej * This code is derived from software contributed to The NetBSD Foundation 8 1.188 ad * by Jason R. Thorpe of Wasabi Systems, Inc, and by Andrew Doran. 9 1.64 thorpej * 10 1.64 thorpej * Redistribution and use in source and binary forms, with or without 11 1.64 thorpej * modification, are permitted provided that the following conditions 12 1.64 thorpej * are met: 13 1.64 thorpej * 1. Redistributions of source code must retain the above copyright 14 1.64 thorpej * notice, this list of conditions and the following disclaimer. 15 1.64 thorpej * 2. Redistributions in binary form must reproduce the above copyright 16 1.64 thorpej * notice, this list of conditions and the following disclaimer in the 17 1.64 thorpej * documentation and/or other materials provided with the distribution. 18 1.64 thorpej * 19 1.64 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.64 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.64 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.64 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.64 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.64 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.64 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.64 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.64 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.64 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.64 thorpej * POSSIBILITY OF SUCH DAMAGE. 30 1.64 thorpej */ 31 1.16 cgd 32 1.1 cgd /* 33 1.159 ad * Copyright (c) 2004 The FreeBSD Foundation 34 1.159 ad * Copyright (c) 2004 Robert Watson 35 1.15 mycroft * Copyright (c) 1982, 1986, 1988, 1990, 1993 36 1.15 mycroft * The Regents of the University of California. All rights reserved. 37 1.1 cgd * 38 1.1 cgd * Redistribution and use in source and binary forms, with or without 39 1.1 cgd * modification, are permitted provided that the following conditions 40 1.1 cgd * are met: 41 1.1 cgd * 1. Redistributions of source code must retain the above copyright 42 1.1 cgd * notice, this list of conditions and the following disclaimer. 43 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 44 1.1 cgd * notice, this list of conditions and the following disclaimer in the 45 1.1 cgd * documentation and/or other materials provided with the distribution. 46 1.85 agc * 3. Neither the name of the University nor the names of its contributors 47 1.1 cgd * may be used to endorse or promote products derived from this software 48 1.1 cgd * without specific prior written permission. 49 1.1 cgd * 50 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 1.1 cgd * SUCH DAMAGE. 61 1.1 cgd * 62 1.32 fvdl * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 63 1.1 cgd */ 64 1.59 lukem 65 1.222 rmind /* 66 1.222 rmind * Socket operation routines. 67 1.222 rmind * 68 1.222 rmind * These routines are called by the routines in sys_socket.c or from a 69 1.222 rmind * system process, and implement the semantics of socket operations by 70 1.222 rmind * switching out to the protocol specific routines. 71 1.222 rmind */ 72 1.222 rmind 73 1.59 lukem #include <sys/cdefs.h> 74 1.314 kre __KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.314 2025/07/16 19:14:13 kre Exp $"); 75 1.64 thorpej 76 1.246 pooka #ifdef _KERNEL_OPT 77 1.179 christos #include "opt_compat_netbsd.h" 78 1.81 martin #include "opt_mbuftrace.h" 79 1.167 ad #include "opt_multiprocessor.h" /* XXX */ 80 1.311 riastrad #include "opt_pipe.h" 81 1.247 rjs #include "opt_sctp.h" 82 1.311 riastrad #include "opt_sock_counters.h" 83 1.311 riastrad #include "opt_somaxkva.h" 84 1.311 riastrad #include "opt_sosend_loan.h" 85 1.246 pooka #endif 86 1.1 cgd 87 1.9 mycroft #include <sys/param.h> 88 1.311 riastrad #include <sys/types.h> 89 1.311 riastrad 90 1.311 riastrad #include <sys/compat_stub.h> 91 1.311 riastrad #include <sys/condvar.h> 92 1.311 riastrad #include <sys/domain.h> 93 1.311 riastrad #include <sys/event.h> 94 1.9 mycroft #include <sys/file.h> 95 1.142 dyoung #include <sys/filedesc.h> 96 1.311 riastrad #include <sys/kauth.h> 97 1.311 riastrad #include <sys/kernel.h> 98 1.173 plunky #include <sys/kmem.h> 99 1.311 riastrad #include <sys/kthread.h> 100 1.9 mycroft #include <sys/mbuf.h> 101 1.311 riastrad #include <sys/mutex.h> 102 1.311 riastrad #include <sys/poll.h> 103 1.311 riastrad #include <sys/proc.h> 104 1.9 mycroft #include <sys/protosw.h> 105 1.311 riastrad #include <sys/resourcevar.h> 106 1.313 riastrad #include <sys/sdt.h> 107 1.311 riastrad #include <sys/signalvar.h> 108 1.9 mycroft #include <sys/socket.h> 109 1.9 mycroft #include <sys/socketvar.h> 110 1.311 riastrad #include <sys/systm.h> 111 1.174 pooka #include <sys/uidinfo.h> 112 1.37 thorpej 113 1.311 riastrad #include <compat/sys/socket.h> 114 1.179 christos #include <compat/sys/time.h> 115 1.179 christos 116 1.202 uebayasi #include <uvm/uvm_extern.h> 117 1.202 uebayasi #include <uvm/uvm_loan.h> 118 1.202 uebayasi #include <uvm/uvm_page.h> 119 1.64 thorpej 120 1.281 pgoyette #ifdef SCTP 121 1.281 pgoyette #include <netinet/sctp_route.h> 122 1.281 pgoyette #endif 123 1.281 pgoyette 124 1.77 thorpej MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 125 1.37 thorpej 126 1.142 dyoung extern const struct fileops socketops; 127 1.142 dyoung 128 1.266 christos static int sooptions; 129 1.54 lukem extern int somaxconn; /* patchable (XXX sysctl) */ 130 1.54 lukem int somaxconn = SOMAXCONN; 131 1.160 ad kmutex_t *softnet_lock; 132 1.49 jonathan 133 1.64 thorpej #ifdef SOSEND_COUNTERS 134 1.64 thorpej #include <sys/device.h> 135 1.64 thorpej 136 1.113 thorpej static struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 137 1.64 thorpej NULL, "sosend", "loan big"); 138 1.113 thorpej static struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 139 1.64 thorpej NULL, "sosend", "copy big"); 140 1.113 thorpej static struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 141 1.64 thorpej NULL, "sosend", "copy small"); 142 1.113 thorpej static struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 143 1.64 thorpej NULL, "sosend", "kva limit"); 144 1.64 thorpej 145 1.64 thorpej #define SOSEND_COUNTER_INCR(ev) (ev)->ev_count++ 146 1.64 thorpej 147 1.101 matt EVCNT_ATTACH_STATIC(sosend_loan_big); 148 1.101 matt EVCNT_ATTACH_STATIC(sosend_copy_big); 149 1.101 matt EVCNT_ATTACH_STATIC(sosend_copy_small); 150 1.101 matt EVCNT_ATTACH_STATIC(sosend_kvalimit); 151 1.64 thorpej #else 152 1.64 thorpej 153 1.64 thorpej #define SOSEND_COUNTER_INCR(ev) /* nothing */ 154 1.64 thorpej 155 1.64 thorpej #endif /* SOSEND_COUNTERS */ 156 1.64 thorpej 157 1.167 ad #if defined(SOSEND_NO_LOAN) || defined(MULTIPROCESSOR) 158 1.121 yamt int sock_loan_thresh = -1; 159 1.71 thorpej #else 160 1.121 yamt int sock_loan_thresh = 4096; 161 1.65 thorpej #endif 162 1.64 thorpej 163 1.136 ad static kmutex_t so_pendfree_lock; 164 1.205 bouyer static struct mbuf *so_pendfree = NULL; 165 1.64 thorpej 166 1.84 ragge #ifndef SOMAXKVA 167 1.84 ragge #define SOMAXKVA (16 * 1024 * 1024) 168 1.84 ragge #endif 169 1.84 ragge int somaxkva = SOMAXKVA; 170 1.113 thorpej static int socurkva; 171 1.136 ad static kcondvar_t socurkva_cv; 172 1.64 thorpej 173 1.292 mlelstv #ifndef SOFIXEDBUF 174 1.292 mlelstv #define SOFIXEDBUF true 175 1.292 mlelstv #endif 176 1.292 mlelstv bool sofixedbuf = SOFIXEDBUF; 177 1.292 mlelstv 178 1.191 elad static kauth_listener_t socket_listener; 179 1.191 elad 180 1.64 thorpej #define SOCK_LOAN_CHUNK 65536 181 1.64 thorpej 182 1.205 bouyer static void sopendfree_thread(void *); 183 1.205 bouyer static kcondvar_t pendfree_thread_cv; 184 1.205 bouyer static lwp_t *sopendfree_lwp; 185 1.93 yamt 186 1.212 pooka static void sysctl_kern_socket_setup(void); 187 1.178 pooka static struct sysctllog *socket_sysctllog; 188 1.178 pooka 189 1.113 thorpej static vsize_t 190 1.129 yamt sokvareserve(struct socket *so, vsize_t len) 191 1.80 yamt { 192 1.98 christos int error; 193 1.80 yamt 194 1.136 ad mutex_enter(&so_pendfree_lock); 195 1.80 yamt while (socurkva + len > somaxkva) { 196 1.80 yamt SOSEND_COUNTER_INCR(&sosend_kvalimit); 197 1.136 ad error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock); 198 1.98 christos if (error) { 199 1.98 christos len = 0; 200 1.98 christos break; 201 1.98 christos } 202 1.80 yamt } 203 1.93 yamt socurkva += len; 204 1.136 ad mutex_exit(&so_pendfree_lock); 205 1.98 christos return len; 206 1.95 yamt } 207 1.95 yamt 208 1.113 thorpej static void 209 1.95 yamt sokvaunreserve(vsize_t len) 210 1.95 yamt { 211 1.95 yamt 212 1.136 ad mutex_enter(&so_pendfree_lock); 213 1.95 yamt socurkva -= len; 214 1.136 ad cv_broadcast(&socurkva_cv); 215 1.136 ad mutex_exit(&so_pendfree_lock); 216 1.95 yamt } 217 1.95 yamt 218 1.95 yamt /* 219 1.95 yamt * sokvaalloc: allocate kva for loan. 220 1.95 yamt */ 221 1.95 yamt vaddr_t 222 1.209 matt sokvaalloc(vaddr_t sva, vsize_t len, struct socket *so) 223 1.95 yamt { 224 1.95 yamt vaddr_t lva; 225 1.95 yamt 226 1.98 christos if (sokvareserve(so, len) == 0) 227 1.98 christos return 0; 228 1.93 yamt 229 1.209 matt lva = uvm_km_alloc(kernel_map, len, atop(sva) & uvmexp.colormask, 230 1.209 matt UVM_KMF_COLORMATCH | UVM_KMF_VAONLY | UVM_KMF_WAITVA); 231 1.95 yamt if (lva == 0) { 232 1.95 yamt sokvaunreserve(len); 233 1.270 maxv return 0; 234 1.95 yamt } 235 1.80 yamt 236 1.80 yamt return lva; 237 1.80 yamt } 238 1.80 yamt 239 1.93 yamt /* 240 1.93 yamt * sokvafree: free kva for loan. 241 1.93 yamt */ 242 1.80 yamt void 243 1.80 yamt sokvafree(vaddr_t sva, vsize_t len) 244 1.80 yamt { 245 1.93 yamt 246 1.109 yamt uvm_km_free(kernel_map, sva, len, UVM_KMF_VAONLY); 247 1.95 yamt sokvaunreserve(len); 248 1.80 yamt } 249 1.80 yamt 250 1.64 thorpej static void 251 1.134 christos sodoloanfree(struct vm_page **pgs, void *buf, size_t size) 252 1.64 thorpej { 253 1.156 yamt vaddr_t sva, eva; 254 1.64 thorpej vsize_t len; 255 1.156 yamt int npgs; 256 1.156 yamt 257 1.156 yamt KASSERT(pgs != NULL); 258 1.64 thorpej 259 1.64 thorpej eva = round_page((vaddr_t) buf + size); 260 1.64 thorpej sva = trunc_page((vaddr_t) buf); 261 1.64 thorpej len = eva - sva; 262 1.64 thorpej npgs = len >> PAGE_SHIFT; 263 1.64 thorpej 264 1.64 thorpej pmap_kremove(sva, len); 265 1.64 thorpej pmap_update(pmap_kernel()); 266 1.64 thorpej uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE); 267 1.80 yamt sokvafree(sva, len); 268 1.64 thorpej } 269 1.64 thorpej 270 1.93 yamt /* 271 1.270 maxv * sopendfree_thread: free mbufs on "pendfree" list. Unlock and relock 272 1.270 maxv * so_pendfree_lock when freeing mbufs. 273 1.93 yamt */ 274 1.205 bouyer static void 275 1.205 bouyer sopendfree_thread(void *v) 276 1.93 yamt { 277 1.137 ad struct mbuf *m, *next; 278 1.205 bouyer size_t rv; 279 1.93 yamt 280 1.205 bouyer mutex_enter(&so_pendfree_lock); 281 1.64 thorpej 282 1.205 bouyer for (;;) { 283 1.205 bouyer rv = 0; 284 1.205 bouyer while (so_pendfree != NULL) { 285 1.205 bouyer m = so_pendfree; 286 1.205 bouyer so_pendfree = NULL; 287 1.205 bouyer mutex_exit(&so_pendfree_lock); 288 1.205 bouyer 289 1.205 bouyer for (; m != NULL; m = next) { 290 1.205 bouyer next = m->m_next; 291 1.253 ryo KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 292 1.253 ryo 0); 293 1.205 bouyer KASSERT(m->m_ext.ext_refcnt == 0); 294 1.205 bouyer 295 1.205 bouyer rv += m->m_ext.ext_size; 296 1.205 bouyer sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf, 297 1.205 bouyer m->m_ext.ext_size); 298 1.205 bouyer pool_cache_put(mb_cache, m); 299 1.205 bouyer } 300 1.93 yamt 301 1.205 bouyer mutex_enter(&so_pendfree_lock); 302 1.93 yamt } 303 1.205 bouyer if (rv) 304 1.205 bouyer cv_broadcast(&socurkva_cv); 305 1.205 bouyer cv_wait(&pendfree_thread_cv, &so_pendfree_lock); 306 1.64 thorpej } 307 1.205 bouyer panic("sopendfree_thread"); 308 1.205 bouyer /* NOTREACHED */ 309 1.64 thorpej } 310 1.64 thorpej 311 1.80 yamt void 312 1.134 christos soloanfree(struct mbuf *m, void *buf, size_t size, void *arg) 313 1.64 thorpej { 314 1.64 thorpej 315 1.156 yamt KASSERT(m != NULL); 316 1.64 thorpej 317 1.93 yamt /* 318 1.93 yamt * postpone freeing mbuf. 319 1.93 yamt * 320 1.93 yamt * we can't do it in interrupt context 321 1.93 yamt * because we need to put kva back to kernel_map. 322 1.93 yamt */ 323 1.93 yamt 324 1.136 ad mutex_enter(&so_pendfree_lock); 325 1.92 yamt m->m_next = so_pendfree; 326 1.92 yamt so_pendfree = m; 327 1.205 bouyer cv_signal(&pendfree_thread_cv); 328 1.136 ad mutex_exit(&so_pendfree_lock); 329 1.64 thorpej } 330 1.64 thorpej 331 1.64 thorpej static long 332 1.64 thorpej sosend_loan(struct socket *so, struct uio *uio, struct mbuf *m, long space) 333 1.64 thorpej { 334 1.64 thorpej struct iovec *iov = uio->uio_iov; 335 1.64 thorpej vaddr_t sva, eva; 336 1.64 thorpej vsize_t len; 337 1.156 yamt vaddr_t lva; 338 1.156 yamt int npgs, error; 339 1.156 yamt vaddr_t va; 340 1.156 yamt int i; 341 1.64 thorpej 342 1.116 yamt if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) 343 1.270 maxv return 0; 344 1.64 thorpej 345 1.64 thorpej if (iov->iov_len < (size_t) space) 346 1.64 thorpej space = iov->iov_len; 347 1.64 thorpej if (space > SOCK_LOAN_CHUNK) 348 1.64 thorpej space = SOCK_LOAN_CHUNK; 349 1.64 thorpej 350 1.64 thorpej eva = round_page((vaddr_t) iov->iov_base + space); 351 1.64 thorpej sva = trunc_page((vaddr_t) iov->iov_base); 352 1.64 thorpej len = eva - sva; 353 1.64 thorpej npgs = len >> PAGE_SHIFT; 354 1.64 thorpej 355 1.79 thorpej KASSERT(npgs <= M_EXT_MAXPAGES); 356 1.79 thorpej 357 1.209 matt lva = sokvaalloc(sva, len, so); 358 1.64 thorpej if (lva == 0) 359 1.252 uwe return 0; 360 1.64 thorpej 361 1.116 yamt error = uvm_loan(&uio->uio_vmspace->vm_map, sva, len, 362 1.79 thorpej m->m_ext.ext_pgs, UVM_LOAN_TOPAGE); 363 1.64 thorpej if (error) { 364 1.80 yamt sokvafree(lva, len); 365 1.270 maxv return 0; 366 1.64 thorpej } 367 1.64 thorpej 368 1.64 thorpej for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) 369 1.79 thorpej pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), 370 1.194 cegger VM_PROT_READ, 0); 371 1.64 thorpej pmap_update(pmap_kernel()); 372 1.64 thorpej 373 1.64 thorpej lva += (vaddr_t) iov->iov_base & PAGE_MASK; 374 1.64 thorpej 375 1.134 christos MEXTADD(m, (void *) lva, space, M_MBUF, soloanfree, so); 376 1.79 thorpej m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP; 377 1.64 thorpej 378 1.64 thorpej uio->uio_resid -= space; 379 1.64 thorpej /* uio_offset not updated, not set/used for write(2) */ 380 1.134 christos uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + space; 381 1.64 thorpej uio->uio_iov->iov_len -= space; 382 1.64 thorpej if (uio->uio_iov->iov_len == 0) { 383 1.64 thorpej uio->uio_iov++; 384 1.64 thorpej uio->uio_iovcnt--; 385 1.64 thorpej } 386 1.64 thorpej 387 1.270 maxv return space; 388 1.64 thorpej } 389 1.64 thorpej 390 1.191 elad static int 391 1.191 elad socket_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 392 1.191 elad void *arg0, void *arg1, void *arg2, void *arg3) 393 1.191 elad { 394 1.191 elad int result; 395 1.191 elad enum kauth_network_req req; 396 1.191 elad 397 1.191 elad result = KAUTH_RESULT_DEFER; 398 1.287 joerg req = (enum kauth_network_req)(uintptr_t)arg0; 399 1.191 elad 400 1.193 elad if ((action != KAUTH_NETWORK_SOCKET) && 401 1.193 elad (action != KAUTH_NETWORK_BIND)) 402 1.191 elad return result; 403 1.191 elad 404 1.191 elad switch (req) { 405 1.193 elad case KAUTH_REQ_NETWORK_BIND_PORT: 406 1.193 elad result = KAUTH_RESULT_ALLOW; 407 1.193 elad break; 408 1.193 elad 409 1.191 elad case KAUTH_REQ_NETWORK_SOCKET_DROP: { 410 1.191 elad /* Normal users can only drop their own connections. */ 411 1.191 elad struct socket *so = (struct socket *)arg1; 412 1.191 elad 413 1.220 christos if (so->so_cred && proc_uidmatch(cred, so->so_cred) == 0) 414 1.191 elad result = KAUTH_RESULT_ALLOW; 415 1.191 elad 416 1.191 elad break; 417 1.191 elad } 418 1.191 elad 419 1.191 elad case KAUTH_REQ_NETWORK_SOCKET_OPEN: 420 1.191 elad /* We allow "raw" routing/bluetooth sockets to anyone. */ 421 1.254 christos switch ((u_long)arg1) { 422 1.254 christos case PF_ROUTE: 423 1.254 christos case PF_OROUTE: 424 1.254 christos case PF_BLUETOOTH: 425 1.255 bouyer case PF_CAN: 426 1.191 elad result = KAUTH_RESULT_ALLOW; 427 1.254 christos break; 428 1.254 christos default: 429 1.191 elad /* Privileged, let secmodel handle this. */ 430 1.191 elad if ((u_long)arg2 == SOCK_RAW) 431 1.191 elad break; 432 1.254 christos result = KAUTH_RESULT_ALLOW; 433 1.254 christos break; 434 1.191 elad } 435 1.191 elad break; 436 1.191 elad 437 1.192 elad case KAUTH_REQ_NETWORK_SOCKET_CANSEE: 438 1.192 elad result = KAUTH_RESULT_ALLOW; 439 1.192 elad 440 1.192 elad break; 441 1.192 elad 442 1.191 elad default: 443 1.191 elad break; 444 1.191 elad } 445 1.191 elad 446 1.191 elad return result; 447 1.191 elad } 448 1.191 elad 449 1.119 yamt void 450 1.119 yamt soinit(void) 451 1.119 yamt { 452 1.119 yamt 453 1.212 pooka sysctl_kern_socket_setup(); 454 1.178 pooka 455 1.281 pgoyette #ifdef SCTP 456 1.281 pgoyette /* Update the SCTP function hooks if necessary*/ 457 1.281 pgoyette 458 1.281 pgoyette vec_sctp_add_ip_address = sctp_add_ip_address; 459 1.312 riastrad vec_sctp_delete_ip_address = sctp_delete_ip_address; 460 1.281 pgoyette #endif 461 1.281 pgoyette 462 1.148 ad mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM); 463 1.160 ad softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 464 1.136 ad cv_init(&socurkva_cv, "sokva"); 465 1.205 bouyer cv_init(&pendfree_thread_cv, "sopendfr"); 466 1.166 ad soinit2(); 467 1.136 ad 468 1.119 yamt /* Set the initial adjusted socket buffer size. */ 469 1.119 yamt if (sb_max_set(sb_max)) 470 1.119 yamt panic("bad initial sb_max value: %lu", sb_max); 471 1.119 yamt 472 1.191 elad socket_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 473 1.191 elad socket_listener_cb, NULL); 474 1.119 yamt } 475 1.119 yamt 476 1.205 bouyer void 477 1.205 bouyer soinit1(void) 478 1.205 bouyer { 479 1.205 bouyer int error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, 480 1.205 bouyer sopendfree_thread, NULL, &sopendfree_lwp, "sopendfree"); 481 1.205 bouyer if (error) 482 1.205 bouyer panic("soinit1 %d", error); 483 1.205 bouyer } 484 1.205 bouyer 485 1.1 cgd /* 486 1.222 rmind * socreate: create a new socket of the specified type and the protocol. 487 1.222 rmind * 488 1.222 rmind * => Caller may specify another socket for lock sharing (must not be held). 489 1.222 rmind * => Returns the new socket without lock held. 490 1.224 rmind */ 491 1.3 andrew int 492 1.160 ad socreate(int dom, struct socket **aso, int type, int proto, struct lwp *l, 493 1.270 maxv struct socket *lockso) 494 1.1 cgd { 495 1.270 maxv const struct protosw *prp; 496 1.270 maxv struct socket *so; 497 1.270 maxv uid_t uid; 498 1.270 maxv int error; 499 1.270 maxv kmutex_t *lock; 500 1.1 cgd 501 1.132 elad error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 502 1.132 elad KAUTH_REQ_NETWORK_SOCKET_OPEN, KAUTH_ARG(dom), KAUTH_ARG(type), 503 1.132 elad KAUTH_ARG(proto)); 504 1.140 dyoung if (error != 0) 505 1.140 dyoung return error; 506 1.127 elad 507 1.1 cgd if (proto) 508 1.1 cgd prp = pffindproto(dom, proto, type); 509 1.1 cgd else 510 1.1 cgd prp = pffindtype(dom, type); 511 1.140 dyoung if (prp == NULL) { 512 1.120 ginsbach /* no support for domain */ 513 1.120 ginsbach if (pffinddomain(dom) == 0) 514 1.313 riastrad return SET_ERROR(EAFNOSUPPORT); 515 1.120 ginsbach /* no support for socket type */ 516 1.120 ginsbach if (proto == 0 && type != 0) 517 1.313 riastrad return SET_ERROR(EPROTOTYPE); 518 1.313 riastrad return SET_ERROR(EPROTONOSUPPORT); 519 1.120 ginsbach } 520 1.223 rmind if (prp->pr_usrreqs == NULL) 521 1.313 riastrad return SET_ERROR(EPROTONOSUPPORT); 522 1.1 cgd if (prp->pr_type != type) 523 1.313 riastrad return SET_ERROR(EPROTOTYPE); 524 1.160 ad 525 1.160 ad so = soget(true); 526 1.1 cgd so->so_type = type; 527 1.1 cgd so->so_proto = prp; 528 1.33 matt so->so_send = sosend; 529 1.33 matt so->so_receive = soreceive; 530 1.266 christos so->so_options = sooptions; 531 1.78 matt #ifdef MBUFTRACE 532 1.78 matt so->so_rcv.sb_mowner = &prp->pr_domain->dom_mowner; 533 1.78 matt so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner; 534 1.78 matt so->so_mowner = &prp->pr_domain->dom_mowner; 535 1.78 matt #endif 536 1.138 rmind uid = kauth_cred_geteuid(l->l_cred); 537 1.115 yamt so->so_uidinfo = uid_find(uid); 538 1.291 christos so->so_egid = kauth_cred_getegid(l->l_cred); 539 1.168 yamt so->so_cpid = l->l_proc->p_pid; 540 1.224 rmind 541 1.224 rmind /* 542 1.224 rmind * Lock assigned and taken during PCB attach, unless we share 543 1.224 rmind * the lock with another socket, e.g. socketpair(2) case. 544 1.224 rmind */ 545 1.224 rmind if (lockso) { 546 1.302 riastrad /* 547 1.302 riastrad * lockso->so_lock should be stable at this point, so 548 1.302 riastrad * no need for atomic_load_*. 549 1.302 riastrad */ 550 1.160 ad lock = lockso->so_lock; 551 1.160 ad so->so_lock = lock; 552 1.160 ad mutex_obj_hold(lock); 553 1.160 ad mutex_enter(lock); 554 1.160 ad } 555 1.224 rmind 556 1.224 rmind /* Attach the PCB (returns with the socket lock held). */ 557 1.224 rmind error = (*prp->pr_usrreqs->pr_attach)(so, proto); 558 1.160 ad KASSERT(solocked(so)); 559 1.224 rmind 560 1.224 rmind if (error) { 561 1.222 rmind KASSERT(so->so_pcb == NULL); 562 1.1 cgd so->so_state |= SS_NOFDREF; 563 1.1 cgd sofree(so); 564 1.140 dyoung return error; 565 1.1 cgd } 566 1.305 ad so->so_cred = kauth_cred_hold(l->l_cred); 567 1.160 ad sounlock(so); 568 1.224 rmind 569 1.1 cgd *aso = so; 570 1.140 dyoung return 0; 571 1.1 cgd } 572 1.1 cgd 573 1.222 rmind /* 574 1.222 rmind * fsocreate: create a socket and a file descriptor associated with it. 575 1.308 jdolecek * Returns the allocated file structure in *fpp, but the descriptor 576 1.308 jdolecek * is not visible yet for the process. 577 1.308 jdolecek * Caller is responsible for calling fd_affix() for the returned *fpp once 578 1.308 jdolecek * it's socket initialization is finished successfully, or fd_abort() if it's 579 1.308 jdolecek * initialization fails. 580 1.312 riastrad * 581 1.222 rmind * 582 1.308 jdolecek * => On success, write file descriptor to *fdout and *fpp and return zero. 583 1.308 jdolecek * => On failure, return non-zero; *fdout and *fpp will be undefined. 584 1.142 dyoung */ 585 1.142 dyoung int 586 1.308 jdolecek fsocreate(int domain, struct socket **sop, int type, int proto, int *fdout, 587 1.308 jdolecek file_t **fpp, struct socket *lockso) 588 1.142 dyoung { 589 1.222 rmind lwp_t *l = curlwp; 590 1.222 rmind int error, fd, flags; 591 1.222 rmind struct socket *so; 592 1.308 jdolecek file_t *fp; 593 1.308 jdolecek 594 1.308 jdolecek flags = type & SOCK_FLAGS_MASK; 595 1.308 jdolecek type &= ~SOCK_FLAGS_MASK; 596 1.308 jdolecek error = socreate(domain, &so, type, proto, l, lockso); 597 1.308 jdolecek if (error) { 598 1.308 jdolecek return error; 599 1.308 jdolecek } 600 1.142 dyoung 601 1.222 rmind if ((error = fd_allocfile(&fp, &fd)) != 0) { 602 1.308 jdolecek soclose(so); 603 1.204 christos return error; 604 1.222 rmind } 605 1.204 christos fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0); 606 1.314 kre fd_set_foclose(l, fd, (flags & SOCK_CLOFORK) != 0); 607 1.207 christos fp->f_flag = FREAD|FWRITE|((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)| 608 1.207 christos ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0); 609 1.142 dyoung fp->f_type = DTYPE_SOCKET; 610 1.142 dyoung fp->f_ops = &socketops; 611 1.222 rmind if (flags & SOCK_NONBLOCK) { 612 1.222 rmind so->so_state |= SS_NBIO; 613 1.222 rmind } 614 1.235 matt fp->f_socket = so; 615 1.222 rmind 616 1.222 rmind if (sop != NULL) { 617 1.222 rmind *sop = so; 618 1.142 dyoung } 619 1.222 rmind *fdout = fd; 620 1.308 jdolecek *fpp = fp; 621 1.142 dyoung return error; 622 1.142 dyoung } 623 1.142 dyoung 624 1.3 andrew int 625 1.190 dyoung sofamily(const struct socket *so) 626 1.190 dyoung { 627 1.190 dyoung const struct protosw *pr; 628 1.190 dyoung const struct domain *dom; 629 1.190 dyoung 630 1.190 dyoung if ((pr = so->so_proto) == NULL) 631 1.190 dyoung return AF_UNSPEC; 632 1.190 dyoung if ((dom = pr->pr_domain) == NULL) 633 1.190 dyoung return AF_UNSPEC; 634 1.190 dyoung return dom->dom_family; 635 1.190 dyoung } 636 1.190 dyoung 637 1.190 dyoung int 638 1.236 rtr sobind(struct socket *so, struct sockaddr *nam, struct lwp *l) 639 1.1 cgd { 640 1.270 maxv int error; 641 1.1 cgd 642 1.160 ad solock(so); 643 1.237 rtr if (nam->sa_family != so->so_proto->pr_domain->dom_family) { 644 1.237 rtr sounlock(so); 645 1.313 riastrad return SET_ERROR(EAFNOSUPPORT); 646 1.237 rtr } 647 1.231 rtr error = (*so->so_proto->pr_usrreqs->pr_bind)(so, nam, l); 648 1.160 ad sounlock(so); 649 1.140 dyoung return error; 650 1.1 cgd } 651 1.1 cgd 652 1.3 andrew int 653 1.150 elad solisten(struct socket *so, int backlog, struct lwp *l) 654 1.1 cgd { 655 1.270 maxv int error; 656 1.270 maxv short oldopt, oldqlimit; 657 1.1 cgd 658 1.160 ad solock(so); 659 1.253 ryo if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | 660 1.163 ad SS_ISDISCONNECTING)) != 0) { 661 1.222 rmind sounlock(so); 662 1.313 riastrad return SET_ERROR(EINVAL); 663 1.163 ad } 664 1.247 rjs oldopt = so->so_options; 665 1.247 rjs oldqlimit = so->so_qlimit; 666 1.247 rjs if (TAILQ_EMPTY(&so->so_q)) 667 1.247 rjs so->so_options |= SO_ACCEPTCONN; 668 1.247 rjs if (backlog < 0) 669 1.247 rjs backlog = 0; 670 1.265 riastrad so->so_qlimit = uimin(backlog, somaxconn); 671 1.247 rjs 672 1.231 rtr error = (*so->so_proto->pr_usrreqs->pr_listen)(so, l); 673 1.140 dyoung if (error != 0) { 674 1.247 rjs so->so_options = oldopt; 675 1.247 rjs so->so_qlimit = oldqlimit; 676 1.160 ad sounlock(so); 677 1.140 dyoung return error; 678 1.1 cgd } 679 1.160 ad sounlock(so); 680 1.140 dyoung return 0; 681 1.1 cgd } 682 1.1 cgd 683 1.21 christos void 684 1.54 lukem sofree(struct socket *so) 685 1.1 cgd { 686 1.161 ad u_int refs; 687 1.1 cgd 688 1.160 ad KASSERT(solocked(so)); 689 1.160 ad 690 1.160 ad if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { 691 1.160 ad sounlock(so); 692 1.1 cgd return; 693 1.160 ad } 694 1.43 mycroft if (so->so_head) { 695 1.43 mycroft /* 696 1.43 mycroft * We must not decommission a socket that's on the accept(2) 697 1.43 mycroft * queue. If we do, then accept(2) may hang after select(2) 698 1.43 mycroft * indicated that the listening socket was ready. 699 1.43 mycroft */ 700 1.160 ad if (!soqremque(so, 0)) { 701 1.160 ad sounlock(so); 702 1.43 mycroft return; 703 1.160 ad } 704 1.43 mycroft } 705 1.98 christos if (so->so_rcv.sb_hiwat) 706 1.110 christos (void)chgsbsize(so->so_uidinfo, &so->so_rcv.sb_hiwat, 0, 707 1.98 christos RLIM_INFINITY); 708 1.98 christos if (so->so_snd.sb_hiwat) 709 1.110 christos (void)chgsbsize(so->so_uidinfo, &so->so_snd.sb_hiwat, 0, 710 1.98 christos RLIM_INFINITY); 711 1.98 christos sbrelease(&so->so_snd, so); 712 1.160 ad KASSERT(!cv_has_waiters(&so->so_cv)); 713 1.160 ad KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 714 1.160 ad KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 715 1.1 cgd sorflush(so); 716 1.161 ad refs = so->so_aborting; /* XXX */ 717 1.303 andvar /* Remove accept filter if one is present. */ 718 1.170 tls if (so->so_accf != NULL) 719 1.177 ad (void)accept_filt_clear(so); 720 1.160 ad sounlock(so); 721 1.161 ad if (refs == 0) /* XXX */ 722 1.161 ad soput(so); 723 1.1 cgd } 724 1.1 cgd 725 1.1 cgd /* 726 1.222 rmind * soclose: close a socket on last file table reference removal. 727 1.222 rmind * Initiate disconnect if connected. Free socket when disconnect complete. 728 1.1 cgd */ 729 1.3 andrew int 730 1.54 lukem soclose(struct socket *so) 731 1.1 cgd { 732 1.222 rmind struct socket *so2; 733 1.222 rmind int error = 0; 734 1.1 cgd 735 1.160 ad solock(so); 736 1.1 cgd if (so->so_options & SO_ACCEPTCONN) { 737 1.172 ad for (;;) { 738 1.172 ad if ((so2 = TAILQ_FIRST(&so->so_q0)) != 0) { 739 1.160 ad KASSERT(solocked2(so, so2)); 740 1.160 ad (void) soqremque(so2, 0); 741 1.160 ad /* soabort drops the lock. */ 742 1.160 ad (void) soabort(so2); 743 1.160 ad solock(so); 744 1.172 ad continue; 745 1.160 ad } 746 1.172 ad if ((so2 = TAILQ_FIRST(&so->so_q)) != 0) { 747 1.160 ad KASSERT(solocked2(so, so2)); 748 1.160 ad (void) soqremque(so2, 1); 749 1.160 ad /* soabort drops the lock. */ 750 1.160 ad (void) soabort(so2); 751 1.160 ad solock(so); 752 1.172 ad continue; 753 1.160 ad } 754 1.172 ad break; 755 1.172 ad } 756 1.1 cgd } 757 1.222 rmind if (so->so_pcb == NULL) 758 1.1 cgd goto discard; 759 1.1 cgd if (so->so_state & SS_ISCONNECTED) { 760 1.1 cgd if ((so->so_state & SS_ISDISCONNECTING) == 0) { 761 1.1 cgd error = sodisconnect(so); 762 1.1 cgd if (error) 763 1.1 cgd goto drop; 764 1.1 cgd } 765 1.1 cgd if (so->so_options & SO_LINGER) { 766 1.206 christos if ((so->so_state & (SS_ISDISCONNECTING|SS_NBIO)) == 767 1.206 christos (SS_ISDISCONNECTING|SS_NBIO)) 768 1.1 cgd goto drop; 769 1.21 christos while (so->so_state & SS_ISCONNECTED) { 770 1.185 yamt error = sowait(so, true, so->so_linger * hz); 771 1.21 christos if (error) 772 1.1 cgd break; 773 1.21 christos } 774 1.1 cgd } 775 1.1 cgd } 776 1.54 lukem drop: 777 1.1 cgd if (so->so_pcb) { 778 1.224 rmind KASSERT(solocked(so)); 779 1.224 rmind (*so->so_proto->pr_usrreqs->pr_detach)(so); 780 1.1 cgd } 781 1.54 lukem discard: 782 1.222 rmind KASSERT((so->so_state & SS_NOFDREF) == 0); 783 1.198 elad kauth_cred_free(so->so_cred); 784 1.273 maxv so->so_cred = NULL; 785 1.1 cgd so->so_state |= SS_NOFDREF; 786 1.1 cgd sofree(so); 787 1.222 rmind return error; 788 1.1 cgd } 789 1.1 cgd 790 1.1 cgd /* 791 1.160 ad * Must be called with the socket locked.. Will return with it unlocked. 792 1.1 cgd */ 793 1.3 andrew int 794 1.54 lukem soabort(struct socket *so) 795 1.1 cgd { 796 1.161 ad u_int refs; 797 1.139 yamt int error; 798 1.253 ryo 799 1.160 ad KASSERT(solocked(so)); 800 1.160 ad KASSERT(so->so_head == NULL); 801 1.1 cgd 802 1.161 ad so->so_aborting++; /* XXX */ 803 1.230 mrg error = (*so->so_proto->pr_usrreqs->pr_abort)(so); 804 1.161 ad refs = --so->so_aborting; /* XXX */ 805 1.164 drochner if (error || (refs == 0)) { 806 1.139 yamt sofree(so); 807 1.160 ad } else { 808 1.160 ad sounlock(so); 809 1.139 yamt } 810 1.139 yamt return error; 811 1.1 cgd } 812 1.1 cgd 813 1.3 andrew int 814 1.239 rtr soaccept(struct socket *so, struct sockaddr *nam) 815 1.1 cgd { 816 1.222 rmind int error; 817 1.160 ad 818 1.160 ad KASSERT(solocked(so)); 819 1.222 rmind KASSERT((so->so_state & SS_NOFDREF) != 0); 820 1.1 cgd 821 1.1 cgd so->so_state &= ~SS_NOFDREF; 822 1.55 thorpej if ((so->so_state & SS_ISDISCONNECTED) == 0 || 823 1.55 thorpej (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 824 1.225 rtr error = (*so->so_proto->pr_usrreqs->pr_accept)(so, nam); 825 1.41 mycroft else 826 1.313 riastrad error = SET_ERROR(ECONNABORTED); 827 1.52 itojun 828 1.222 rmind return error; 829 1.1 cgd } 830 1.1 cgd 831 1.3 andrew int 832 1.240 rtr soconnect(struct socket *so, struct sockaddr *nam, struct lwp *l) 833 1.1 cgd { 834 1.222 rmind int error; 835 1.160 ad 836 1.160 ad KASSERT(solocked(so)); 837 1.1 cgd 838 1.1 cgd if (so->so_options & SO_ACCEPTCONN) 839 1.313 riastrad return SET_ERROR(EOPNOTSUPP); 840 1.1 cgd /* 841 1.1 cgd * If protocol is connection-based, can only connect once. 842 1.1 cgd * Otherwise, if connected, try to disconnect first. 843 1.1 cgd * This allows user to disconnect by connecting to, e.g., 844 1.1 cgd * a null address. 845 1.1 cgd */ 846 1.1 cgd if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 847 1.1 cgd ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 848 1.241 rtr (error = sodisconnect(so)))) { 849 1.313 riastrad error = SET_ERROR(EISCONN); 850 1.241 rtr } else { 851 1.242 rtr if (nam->sa_family != so->so_proto->pr_domain->dom_family) { 852 1.313 riastrad return SET_ERROR(EAFNOSUPPORT); 853 1.241 rtr } 854 1.231 rtr error = (*so->so_proto->pr_usrreqs->pr_connect)(so, nam, l); 855 1.241 rtr } 856 1.222 rmind 857 1.222 rmind return error; 858 1.1 cgd } 859 1.1 cgd 860 1.3 andrew int 861 1.54 lukem soconnect2(struct socket *so1, struct socket *so2) 862 1.1 cgd { 863 1.160 ad KASSERT(solocked2(so1, so2)); 864 1.1 cgd 865 1.234 rtr return (*so1->so_proto->pr_usrreqs->pr_connect2)(so1, so2); 866 1.1 cgd } 867 1.1 cgd 868 1.3 andrew int 869 1.54 lukem sodisconnect(struct socket *so) 870 1.1 cgd { 871 1.270 maxv int error; 872 1.160 ad 873 1.160 ad KASSERT(solocked(so)); 874 1.1 cgd 875 1.1 cgd if ((so->so_state & SS_ISCONNECTED) == 0) { 876 1.313 riastrad error = SET_ERROR(ENOTCONN); 877 1.160 ad } else if (so->so_state & SS_ISDISCONNECTING) { 878 1.313 riastrad error = SET_ERROR(EALREADY); 879 1.160 ad } else { 880 1.229 rtr error = (*so->so_proto->pr_usrreqs->pr_disconnect)(so); 881 1.1 cgd } 882 1.270 maxv return error; 883 1.1 cgd } 884 1.1 cgd 885 1.15 mycroft #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 886 1.1 cgd /* 887 1.1 cgd * Send on a socket. 888 1.1 cgd * If send must go all at once and message is larger than 889 1.1 cgd * send buffering, then hard error. 890 1.1 cgd * Lock against other senders. 891 1.1 cgd * If must go all at once and not enough room now, then 892 1.1 cgd * inform user that this would block and do nothing. 893 1.1 cgd * Otherwise, if nonblocking, send as much as possible. 894 1.1 cgd * The data to be sent is described by "uio" if nonzero, 895 1.1 cgd * otherwise by the mbuf chain "top" (which must be null 896 1.1 cgd * if uio is not). Data provided in mbuf chain must be small 897 1.1 cgd * enough to send all at once. 898 1.1 cgd * 899 1.1 cgd * Returns nonzero on error, timeout or signal; callers 900 1.1 cgd * must check for short counts if EINTR/ERESTART are returned. 901 1.1 cgd * Data and control buffers are freed on return. 902 1.1 cgd */ 903 1.3 andrew int 904 1.245 rtr sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, 905 1.245 rtr struct mbuf *top, struct mbuf *control, int flags, struct lwp *l) 906 1.1 cgd { 907 1.270 maxv struct mbuf **mp, *m; 908 1.270 maxv long space, len, resid, clen, mlen; 909 1.270 maxv int error, s, dontroute, atomic; 910 1.307 martin short wakeup_state = 0; 911 1.54 lukem 912 1.160 ad clen = 0; 913 1.64 thorpej 914 1.160 ad /* 915 1.160 ad * solock() provides atomicity of access. splsoftnet() prevents 916 1.160 ad * protocol processing soft interrupts from interrupting us and 917 1.160 ad * blocking (expensive). 918 1.160 ad */ 919 1.160 ad s = splsoftnet(); 920 1.160 ad solock(so); 921 1.54 lukem atomic = sosendallatonce(so) || top; 922 1.1 cgd if (uio) 923 1.1 cgd resid = uio->uio_resid; 924 1.1 cgd else 925 1.1 cgd resid = top->m_pkthdr.len; 926 1.7 cgd /* 927 1.7 cgd * In theory resid should be unsigned. 928 1.7 cgd * However, space must be signed, as it might be less than 0 929 1.7 cgd * if we over-committed, and we must use a signed comparison 930 1.7 cgd * of space and resid. On the other hand, a negative resid 931 1.7 cgd * causes us to loop sending 0-length segments to the protocol. 932 1.7 cgd */ 933 1.29 mycroft if (resid < 0) { 934 1.313 riastrad error = SET_ERROR(EINVAL); 935 1.29 mycroft goto out; 936 1.29 mycroft } 937 1.1 cgd dontroute = 938 1.1 cgd (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 939 1.1 cgd (so->so_proto->pr_flags & PR_ATOMIC); 940 1.165 christos l->l_ru.ru_msgsnd++; 941 1.1 cgd if (control) 942 1.1 cgd clen = control->m_len; 943 1.54 lukem restart: 944 1.21 christos if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 945 1.1 cgd goto out; 946 1.1 cgd do { 947 1.160 ad if (so->so_state & SS_CANTSENDMORE) { 948 1.313 riastrad error = SET_ERROR(EPIPE); 949 1.160 ad goto release; 950 1.160 ad } 951 1.48 thorpej if (so->so_error) { 952 1.313 riastrad error = SET_ERROR(so->so_error); 953 1.282 christos if ((flags & MSG_PEEK) == 0) 954 1.282 christos so->so_error = 0; 955 1.48 thorpej goto release; 956 1.48 thorpej } 957 1.1 cgd if ((so->so_state & SS_ISCONNECTED) == 0) { 958 1.1 cgd if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 959 1.217 rmind if (resid || clen == 0) { 960 1.313 riastrad error = SET_ERROR(ENOTCONN); 961 1.160 ad goto release; 962 1.160 ad } 963 1.244 rtr } else if (addr == NULL) { 964 1.313 riastrad error = SET_ERROR(EDESTADDRREQ); 965 1.160 ad goto release; 966 1.160 ad } 967 1.1 cgd } 968 1.1 cgd space = sbspace(&so->so_snd); 969 1.1 cgd if (flags & MSG_OOB) 970 1.1 cgd space += 1024; 971 1.21 christos if ((atomic && resid > so->so_snd.sb_hiwat) || 972 1.160 ad clen > so->so_snd.sb_hiwat) { 973 1.313 riastrad error = SET_ERROR(EMSGSIZE); 974 1.160 ad goto release; 975 1.160 ad } 976 1.96 mycroft if (space < resid + clen && 977 1.1 cgd (atomic || space < so->so_snd.sb_lowat || space < clen)) { 978 1.206 christos if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) { 979 1.313 riastrad error = SET_ERROR(EWOULDBLOCK); 980 1.160 ad goto release; 981 1.160 ad } 982 1.1 cgd sbunlock(&so->so_snd); 983 1.307 martin if (wakeup_state & SS_RESTARTSYS) { 984 1.313 riastrad error = SET_ERROR(ERESTART); 985 1.307 martin goto out; 986 1.307 martin } 987 1.1 cgd error = sbwait(&so->so_snd); 988 1.1 cgd if (error) 989 1.1 cgd goto out; 990 1.307 martin wakeup_state = so->so_state; 991 1.1 cgd goto restart; 992 1.1 cgd } 993 1.307 martin wakeup_state = 0; 994 1.1 cgd mp = ⊤ 995 1.1 cgd space -= clen; 996 1.1 cgd do { 997 1.45 tv if (uio == NULL) { 998 1.45 tv /* 999 1.45 tv * Data is prepackaged in "top". 1000 1.45 tv */ 1001 1.45 tv resid = 0; 1002 1.45 tv if (flags & MSG_EOR) 1003 1.45 tv top->m_flags |= M_EOR; 1004 1.45 tv } else do { 1005 1.160 ad sounlock(so); 1006 1.160 ad splx(s); 1007 1.144 dyoung if (top == NULL) { 1008 1.78 matt m = m_gethdr(M_WAIT, MT_DATA); 1009 1.45 tv mlen = MHLEN; 1010 1.45 tv m->m_pkthdr.len = 0; 1011 1.248 ozaki m_reset_rcvif(m); 1012 1.45 tv } else { 1013 1.78 matt m = m_get(M_WAIT, MT_DATA); 1014 1.45 tv mlen = MLEN; 1015 1.45 tv } 1016 1.78 matt MCLAIM(m, so->so_snd.sb_mowner); 1017 1.121 yamt if (sock_loan_thresh >= 0 && 1018 1.121 yamt uio->uio_iov->iov_len >= sock_loan_thresh && 1019 1.121 yamt space >= sock_loan_thresh && 1020 1.64 thorpej (len = sosend_loan(so, uio, m, 1021 1.252 uwe space)) != 0) { 1022 1.64 thorpej SOSEND_COUNTER_INCR(&sosend_loan_big); 1023 1.64 thorpej space -= len; 1024 1.64 thorpej goto have_data; 1025 1.64 thorpej } 1026 1.45 tv if (resid >= MINCLSIZE && space >= MCLBYTES) { 1027 1.64 thorpej SOSEND_COUNTER_INCR(&sosend_copy_big); 1028 1.201 oki m_clget(m, M_DONTWAIT); 1029 1.45 tv if ((m->m_flags & M_EXT) == 0) 1030 1.45 tv goto nopages; 1031 1.45 tv mlen = MCLBYTES; 1032 1.45 tv if (atomic && top == 0) { 1033 1.58 jdolecek len = lmin(MCLBYTES - max_hdr, 1034 1.54 lukem resid); 1035 1.45 tv m->m_data += max_hdr; 1036 1.45 tv } else 1037 1.58 jdolecek len = lmin(MCLBYTES, resid); 1038 1.45 tv space -= len; 1039 1.45 tv } else { 1040 1.64 thorpej nopages: 1041 1.64 thorpej SOSEND_COUNTER_INCR(&sosend_copy_small); 1042 1.58 jdolecek len = lmin(lmin(mlen, resid), space); 1043 1.45 tv space -= len; 1044 1.45 tv /* 1045 1.45 tv * For datagram protocols, leave room 1046 1.45 tv * for protocol headers in first mbuf. 1047 1.45 tv */ 1048 1.45 tv if (atomic && top == 0 && len < mlen) 1049 1.268 maxv m_align(m, len); 1050 1.45 tv } 1051 1.144 dyoung error = uiomove(mtod(m, void *), (int)len, uio); 1052 1.64 thorpej have_data: 1053 1.45 tv resid = uio->uio_resid; 1054 1.45 tv m->m_len = len; 1055 1.45 tv *mp = m; 1056 1.45 tv top->m_pkthdr.len += len; 1057 1.160 ad s = splsoftnet(); 1058 1.160 ad solock(so); 1059 1.144 dyoung if (error != 0) 1060 1.45 tv goto release; 1061 1.45 tv mp = &m->m_next; 1062 1.45 tv if (resid <= 0) { 1063 1.45 tv if (flags & MSG_EOR) 1064 1.45 tv top->m_flags |= M_EOR; 1065 1.45 tv break; 1066 1.45 tv } 1067 1.45 tv } while (space > 0 && atomic); 1068 1.108 perry 1069 1.160 ad if (so->so_state & SS_CANTSENDMORE) { 1070 1.313 riastrad error = SET_ERROR(EPIPE); 1071 1.160 ad goto release; 1072 1.160 ad } 1073 1.45 tv if (dontroute) 1074 1.45 tv so->so_options |= SO_DONTROUTE; 1075 1.45 tv if (resid > 0) 1076 1.45 tv so->so_state |= SS_MORETOCOME; 1077 1.240 rtr if (flags & MSG_OOB) { 1078 1.253 ryo error = (*so->so_proto->pr_usrreqs->pr_sendoob)( 1079 1.253 ryo so, top, control); 1080 1.240 rtr } else { 1081 1.232 rtr error = (*so->so_proto->pr_usrreqs->pr_send)(so, 1082 1.245 rtr top, addr, control, l); 1083 1.240 rtr } 1084 1.45 tv if (dontroute) 1085 1.45 tv so->so_options &= ~SO_DONTROUTE; 1086 1.45 tv if (resid > 0) 1087 1.45 tv so->so_state &= ~SS_MORETOCOME; 1088 1.45 tv clen = 0; 1089 1.144 dyoung control = NULL; 1090 1.144 dyoung top = NULL; 1091 1.45 tv mp = ⊤ 1092 1.144 dyoung if (error != 0) 1093 1.1 cgd goto release; 1094 1.1 cgd } while (resid && space > 0); 1095 1.1 cgd } while (resid); 1096 1.1 cgd 1097 1.54 lukem release: 1098 1.1 cgd sbunlock(&so->so_snd); 1099 1.54 lukem out: 1100 1.160 ad sounlock(so); 1101 1.160 ad splx(s); 1102 1.310 rin m_freem(top); 1103 1.310 rin m_freem(control); 1104 1.270 maxv return error; 1105 1.1 cgd } 1106 1.1 cgd 1107 1.1 cgd /* 1108 1.159 ad * Following replacement or removal of the first mbuf on the first 1109 1.159 ad * mbuf chain of a socket buffer, push necessary state changes back 1110 1.159 ad * into the socket buffer so that other consumers see the values 1111 1.270 maxv * consistently. 'nextrecord' is the caller's locally stored value of 1112 1.159 ad * the original value of sb->sb_mb->m_nextpkt which must be restored 1113 1.159 ad * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL. 1114 1.159 ad */ 1115 1.159 ad static void 1116 1.159 ad sbsync(struct sockbuf *sb, struct mbuf *nextrecord) 1117 1.159 ad { 1118 1.159 ad 1119 1.160 ad KASSERT(solocked(sb->sb_so)); 1120 1.160 ad 1121 1.159 ad /* 1122 1.159 ad * First, update for the new value of nextrecord. If necessary, 1123 1.159 ad * make it the first record. 1124 1.159 ad */ 1125 1.159 ad if (sb->sb_mb != NULL) 1126 1.159 ad sb->sb_mb->m_nextpkt = nextrecord; 1127 1.159 ad else 1128 1.159 ad sb->sb_mb = nextrecord; 1129 1.159 ad 1130 1.159 ad /* 1131 1.159 ad * Now update any dependent socket buffer fields to reflect 1132 1.159 ad * the new state. This is an inline of SB_EMPTY_FIXUP, with 1133 1.159 ad * the addition of a second clause that takes care of the 1134 1.159 ad * case where sb_mb has been updated, but remains the last 1135 1.159 ad * record. 1136 1.159 ad */ 1137 1.159 ad if (sb->sb_mb == NULL) { 1138 1.159 ad sb->sb_mbtail = NULL; 1139 1.159 ad sb->sb_lastrecord = NULL; 1140 1.159 ad } else if (sb->sb_mb->m_nextpkt == NULL) 1141 1.159 ad sb->sb_lastrecord = sb->sb_mb; 1142 1.159 ad } 1143 1.159 ad 1144 1.159 ad /* 1145 1.1 cgd * Implement receive operations on a socket. 1146 1.1 cgd * 1147 1.270 maxv * We depend on the way that records are added to the sockbuf by sbappend*. In 1148 1.270 maxv * particular, each record (mbufs linked through m_next) must begin with an 1149 1.270 maxv * address if the protocol so specifies, followed by an optional mbuf or mbufs 1150 1.270 maxv * containing ancillary data, and then zero or more mbufs of data. 1151 1.270 maxv * 1152 1.270 maxv * In order to avoid blocking network interrupts for the entire time here, we 1153 1.270 maxv * splx() while doing the actual copy to user space. Although the sockbuf is 1154 1.270 maxv * locked, new data may still be appended, and thus we must maintain 1155 1.270 maxv * consistency of the sockbuf during that time. 1156 1.270 maxv * 1157 1.270 maxv * The caller may receive the data as a single mbuf chain by supplying an mbuf 1158 1.270 maxv * **mp0 for use in returning the chain. The uio is then used only for the 1159 1.270 maxv * count in uio_resid. 1160 1.1 cgd */ 1161 1.3 andrew int 1162 1.54 lukem soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 1163 1.270 maxv struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 1164 1.1 cgd { 1165 1.116 yamt struct lwp *l = curlwp; 1166 1.270 maxv struct mbuf *m, **mp, *mt; 1167 1.211 chs size_t len, offset, moff, orig_resid; 1168 1.211 chs int atomic, flags, error, s, type; 1169 1.270 maxv const struct protosw *pr; 1170 1.270 maxv struct mbuf *nextrecord; 1171 1.270 maxv int mbuf_removed = 0; 1172 1.146 dyoung const struct domain *dom; 1173 1.307 martin short wakeup_state = 0; 1174 1.64 thorpej 1175 1.54 lukem pr = so->so_proto; 1176 1.146 dyoung atomic = pr->pr_flags & PR_ATOMIC; 1177 1.146 dyoung dom = pr->pr_domain; 1178 1.1 cgd mp = mp0; 1179 1.54 lukem type = 0; 1180 1.54 lukem orig_resid = uio->uio_resid; 1181 1.102 jonathan 1182 1.144 dyoung if (paddr != NULL) 1183 1.144 dyoung *paddr = NULL; 1184 1.144 dyoung if (controlp != NULL) 1185 1.144 dyoung *controlp = NULL; 1186 1.144 dyoung if (flagsp != NULL) 1187 1.252 uwe flags = *flagsp &~ MSG_EOR; 1188 1.1 cgd else 1189 1.1 cgd flags = 0; 1190 1.66 enami 1191 1.1 cgd if (flags & MSG_OOB) { 1192 1.1 cgd m = m_get(M_WAIT, MT_DATA); 1193 1.160 ad solock(so); 1194 1.226 rtr error = (*pr->pr_usrreqs->pr_recvoob)(so, m, flags & MSG_PEEK); 1195 1.160 ad sounlock(so); 1196 1.1 cgd if (error) 1197 1.1 cgd goto bad; 1198 1.1 cgd do { 1199 1.134 christos error = uiomove(mtod(m, void *), 1200 1.211 chs MIN(uio->uio_resid, m->m_len), uio); 1201 1.1 cgd m = m_free(m); 1202 1.144 dyoung } while (uio->uio_resid > 0 && error == 0 && m); 1203 1.270 maxv bad: 1204 1.310 rin m_freem(m); 1205 1.144 dyoung return error; 1206 1.1 cgd } 1207 1.144 dyoung if (mp != NULL) 1208 1.140 dyoung *mp = NULL; 1209 1.160 ad 1210 1.160 ad /* 1211 1.160 ad * solock() provides atomicity of access. splsoftnet() prevents 1212 1.160 ad * protocol processing soft interrupts from interrupting us and 1213 1.160 ad * blocking (expensive). 1214 1.160 ad */ 1215 1.160 ad s = splsoftnet(); 1216 1.160 ad solock(so); 1217 1.270 maxv restart: 1218 1.160 ad if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) { 1219 1.160 ad sounlock(so); 1220 1.160 ad splx(s); 1221 1.144 dyoung return error; 1222 1.160 ad } 1223 1.270 maxv m = so->so_rcv.sb_mb; 1224 1.1 cgd 1225 1.1 cgd /* 1226 1.1 cgd * If we have less data than requested, block awaiting more 1227 1.1 cgd * (subject to any timeout) if: 1228 1.15 mycroft * 1. the current count is less than the low water mark, 1229 1.1 cgd * 2. MSG_WAITALL is set, and it is possible to do the entire 1230 1.15 mycroft * receive operation at once if we block (resid <= hiwat), or 1231 1.15 mycroft * 3. MSG_DONTWAIT is not set. 1232 1.1 cgd * If MSG_WAITALL is set but resid is larger than the receive buffer, 1233 1.1 cgd * we have to do the receive in sections, and thus risk returning 1234 1.1 cgd * a short count if a timeout or signal occurs after we start. 1235 1.1 cgd */ 1236 1.144 dyoung if (m == NULL || 1237 1.144 dyoung ((flags & MSG_DONTWAIT) == 0 && 1238 1.144 dyoung so->so_rcv.sb_cc < uio->uio_resid && 1239 1.144 dyoung (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 1240 1.144 dyoung ((flags & MSG_WAITALL) && 1241 1.144 dyoung uio->uio_resid <= so->so_rcv.sb_hiwat)) && 1242 1.146 dyoung m->m_nextpkt == NULL && !atomic)) { 1243 1.1 cgd #ifdef DIAGNOSTIC 1244 1.144 dyoung if (m == NULL && so->so_rcv.sb_cc) 1245 1.1 cgd panic("receive 1"); 1246 1.1 cgd #endif 1247 1.264 roy if (so->so_error || so->so_rerror) { 1248 1.282 christos u_short *e; 1249 1.144 dyoung if (m != NULL) 1250 1.15 mycroft goto dontblock; 1251 1.283 mlelstv e = so->so_error ? &so->so_error : &so->so_rerror; 1252 1.313 riastrad error = SET_ERROR(*e); 1253 1.282 christos if ((flags & MSG_PEEK) == 0) 1254 1.282 christos *e = 0; 1255 1.1 cgd goto release; 1256 1.1 cgd } 1257 1.1 cgd if (so->so_state & SS_CANTRCVMORE) { 1258 1.144 dyoung if (m != NULL) 1259 1.15 mycroft goto dontblock; 1260 1.1 cgd else 1261 1.1 cgd goto release; 1262 1.1 cgd } 1263 1.144 dyoung for (; m != NULL; m = m->m_next) 1264 1.1 cgd if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 1265 1.1 cgd m = so->so_rcv.sb_mb; 1266 1.1 cgd goto dontblock; 1267 1.1 cgd } 1268 1.1 cgd if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1269 1.1 cgd (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1270 1.313 riastrad error = SET_ERROR(ENOTCONN); 1271 1.1 cgd goto release; 1272 1.1 cgd } 1273 1.1 cgd if (uio->uio_resid == 0) 1274 1.1 cgd goto release; 1275 1.206 christos if ((so->so_state & SS_NBIO) || 1276 1.206 christos (flags & (MSG_DONTWAIT|MSG_NBIO))) { 1277 1.313 riastrad error = SET_ERROR(EWOULDBLOCK); 1278 1.1 cgd goto release; 1279 1.1 cgd } 1280 1.69 thorpej SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 1281 1.69 thorpej SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 1282 1.1 cgd sbunlock(&so->so_rcv); 1283 1.307 martin if (wakeup_state & SS_RESTARTSYS) 1284 1.313 riastrad error = SET_ERROR(ERESTART); 1285 1.307 martin else 1286 1.307 martin error = sbwait(&so->so_rcv); 1287 1.160 ad if (error != 0) { 1288 1.160 ad sounlock(so); 1289 1.160 ad splx(s); 1290 1.144 dyoung return error; 1291 1.160 ad } 1292 1.307 martin wakeup_state = so->so_state; 1293 1.1 cgd goto restart; 1294 1.1 cgd } 1295 1.270 maxv 1296 1.270 maxv dontblock: 1297 1.69 thorpej /* 1298 1.69 thorpej * On entry here, m points to the first record of the socket buffer. 1299 1.159 ad * From this point onward, we maintain 'nextrecord' as a cache of the 1300 1.159 ad * pointer to the next record in the socket buffer. We must keep the 1301 1.159 ad * various socket buffer pointers and local stack versions of the 1302 1.159 ad * pointers in sync, pushing out modifications before dropping the 1303 1.160 ad * socket lock, and re-reading them when picking it up. 1304 1.159 ad * 1305 1.159 ad * Otherwise, we will race with the network stack appending new data 1306 1.159 ad * or records onto the socket buffer by using inconsistent/stale 1307 1.159 ad * versions of the field, possibly resulting in socket buffer 1308 1.159 ad * corruption. 1309 1.159 ad * 1310 1.159 ad * By holding the high-level sblock(), we prevent simultaneous 1311 1.159 ad * readers from pulling off the front of the socket buffer. 1312 1.69 thorpej */ 1313 1.144 dyoung if (l != NULL) 1314 1.157 ad l->l_ru.ru_msgrcv++; 1315 1.69 thorpej KASSERT(m == so->so_rcv.sb_mb); 1316 1.69 thorpej SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 1317 1.69 thorpej SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 1318 1.1 cgd nextrecord = m->m_nextpkt; 1319 1.270 maxv 1320 1.1 cgd if (pr->pr_flags & PR_ADDR) { 1321 1.270 maxv KASSERT(m->m_type == MT_SONAME); 1322 1.3 andrew orig_resid = 0; 1323 1.1 cgd if (flags & MSG_PEEK) { 1324 1.1 cgd if (paddr) 1325 1.263 maxv *paddr = m_copym(m, 0, m->m_len, M_DONTWAIT); 1326 1.1 cgd m = m->m_next; 1327 1.1 cgd } else { 1328 1.1 cgd sbfree(&so->so_rcv, m); 1329 1.67 he mbuf_removed = 1; 1330 1.144 dyoung if (paddr != NULL) { 1331 1.1 cgd *paddr = m; 1332 1.1 cgd so->so_rcv.sb_mb = m->m_next; 1333 1.144 dyoung m->m_next = NULL; 1334 1.1 cgd m = so->so_rcv.sb_mb; 1335 1.1 cgd } else { 1336 1.249 christos m = so->so_rcv.sb_mb = m_free(m); 1337 1.1 cgd } 1338 1.159 ad sbsync(&so->so_rcv, nextrecord); 1339 1.1 cgd } 1340 1.1 cgd } 1341 1.270 maxv 1342 1.247 rjs if (pr->pr_flags & PR_ADDR_OPT) { 1343 1.247 rjs /* 1344 1.270 maxv * For SCTP we may be getting a whole message OR a partial 1345 1.270 maxv * delivery. 1346 1.247 rjs */ 1347 1.247 rjs if (m->m_type == MT_SONAME) { 1348 1.247 rjs orig_resid = 0; 1349 1.247 rjs if (flags & MSG_PEEK) { 1350 1.247 rjs if (paddr) 1351 1.263 maxv *paddr = m_copym(m, 0, m->m_len, M_DONTWAIT); 1352 1.247 rjs m = m->m_next; 1353 1.247 rjs } else { 1354 1.247 rjs sbfree(&so->so_rcv, m); 1355 1.289 jakllsch mbuf_removed = 1; 1356 1.247 rjs if (paddr) { 1357 1.247 rjs *paddr = m; 1358 1.247 rjs so->so_rcv.sb_mb = m->m_next; 1359 1.247 rjs m->m_next = 0; 1360 1.247 rjs m = so->so_rcv.sb_mb; 1361 1.247 rjs } else { 1362 1.249 christos m = so->so_rcv.sb_mb = m_free(m); 1363 1.247 rjs } 1364 1.289 jakllsch sbsync(&so->so_rcv, nextrecord); 1365 1.247 rjs } 1366 1.247 rjs } 1367 1.247 rjs } 1368 1.159 ad 1369 1.159 ad /* 1370 1.159 ad * Process one or more MT_CONTROL mbufs present before any data mbufs 1371 1.159 ad * in the first mbuf chain on the socket buffer. If MSG_PEEK, we 1372 1.159 ad * just copy the data; if !MSG_PEEK, we call into the protocol to 1373 1.159 ad * perform externalization (or freeing if controlp == NULL). 1374 1.159 ad */ 1375 1.159 ad if (__predict_false(m != NULL && m->m_type == MT_CONTROL)) { 1376 1.159 ad struct mbuf *cm = NULL, *cmn; 1377 1.159 ad struct mbuf **cme = &cm; 1378 1.159 ad 1379 1.159 ad do { 1380 1.159 ad if (flags & MSG_PEEK) { 1381 1.159 ad if (controlp != NULL) { 1382 1.263 maxv *controlp = m_copym(m, 0, m->m_len, M_DONTWAIT); 1383 1.301 riastrad controlp = (*controlp == NULL ? NULL : 1384 1.301 riastrad &(*controlp)->m_next); 1385 1.159 ad } 1386 1.159 ad m = m->m_next; 1387 1.159 ad } else { 1388 1.159 ad sbfree(&so->so_rcv, m); 1389 1.1 cgd so->so_rcv.sb_mb = m->m_next; 1390 1.144 dyoung m->m_next = NULL; 1391 1.159 ad *cme = m; 1392 1.159 ad cme = &(*cme)->m_next; 1393 1.1 cgd m = so->so_rcv.sb_mb; 1394 1.159 ad } 1395 1.159 ad } while (m != NULL && m->m_type == MT_CONTROL); 1396 1.159 ad if ((flags & MSG_PEEK) == 0) 1397 1.159 ad sbsync(&so->so_rcv, nextrecord); 1398 1.270 maxv 1399 1.159 ad for (; cm != NULL; cm = cmn) { 1400 1.159 ad cmn = cm->m_next; 1401 1.159 ad cm->m_next = NULL; 1402 1.159 ad type = mtod(cm, struct cmsghdr *)->cmsg_type; 1403 1.159 ad if (controlp != NULL) { 1404 1.159 ad if (dom->dom_externalize != NULL && 1405 1.159 ad type == SCM_RIGHTS) { 1406 1.160 ad sounlock(so); 1407 1.159 ad splx(s); 1408 1.204 christos error = (*dom->dom_externalize)(cm, l, 1409 1.314 kre ((flags & MSG_CMSG_CLOEXEC) ? 1410 1.314 kre O_CLOEXEC : 0) | 1411 1.314 kre ((flags & MSG_CMSG_CLOFORK) ? 1412 1.314 kre O_CLOFORK : 0)); 1413 1.159 ad s = splsoftnet(); 1414 1.160 ad solock(so); 1415 1.159 ad } 1416 1.159 ad *controlp = cm; 1417 1.159 ad while (*controlp != NULL) 1418 1.159 ad controlp = &(*controlp)->m_next; 1419 1.1 cgd } else { 1420 1.106 itojun /* 1421 1.106 itojun * Dispose of any SCM_RIGHTS message that went 1422 1.106 itojun * through the read path rather than recv. 1423 1.106 itojun */ 1424 1.159 ad if (dom->dom_dispose != NULL && 1425 1.159 ad type == SCM_RIGHTS) { 1426 1.253 ryo sounlock(so); 1427 1.159 ad (*dom->dom_dispose)(cm); 1428 1.160 ad solock(so); 1429 1.159 ad } 1430 1.159 ad m_freem(cm); 1431 1.1 cgd } 1432 1.1 cgd } 1433 1.159 ad if (m != NULL) 1434 1.159 ad nextrecord = so->so_rcv.sb_mb->m_nextpkt; 1435 1.159 ad else 1436 1.159 ad nextrecord = so->so_rcv.sb_mb; 1437 1.159 ad orig_resid = 0; 1438 1.1 cgd } 1439 1.69 thorpej 1440 1.159 ad /* If m is non-NULL, we have some data to read. */ 1441 1.159 ad if (__predict_true(m != NULL)) { 1442 1.1 cgd type = m->m_type; 1443 1.1 cgd if (type == MT_OOBDATA) 1444 1.1 cgd flags |= MSG_OOB; 1445 1.1 cgd } 1446 1.69 thorpej SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 1447 1.69 thorpej SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 1448 1.69 thorpej 1449 1.1 cgd moff = 0; 1450 1.1 cgd offset = 0; 1451 1.144 dyoung while (m != NULL && uio->uio_resid > 0 && error == 0) { 1452 1.272 maxv /* 1453 1.272 maxv * If the type of mbuf has changed, end the receive 1454 1.272 maxv * operation and do a short read. 1455 1.272 maxv */ 1456 1.1 cgd if (m->m_type == MT_OOBDATA) { 1457 1.1 cgd if (type != MT_OOBDATA) 1458 1.1 cgd break; 1459 1.270 maxv } else if (type == MT_OOBDATA) { 1460 1.1 cgd break; 1461 1.272 maxv } else if (m->m_type == MT_CONTROL) { 1462 1.272 maxv break; 1463 1.270 maxv } 1464 1.1 cgd #ifdef DIAGNOSTIC 1465 1.270 maxv else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) { 1466 1.272 maxv panic("%s: m_type=%d", __func__, m->m_type); 1467 1.270 maxv } 1468 1.1 cgd #endif 1469 1.270 maxv 1470 1.1 cgd so->so_state &= ~SS_RCVATMARK; 1471 1.307 martin wakeup_state = 0; 1472 1.1 cgd len = uio->uio_resid; 1473 1.1 cgd if (so->so_oobmark && len > so->so_oobmark - offset) 1474 1.1 cgd len = so->so_oobmark - offset; 1475 1.1 cgd if (len > m->m_len - moff) 1476 1.1 cgd len = m->m_len - moff; 1477 1.270 maxv 1478 1.1 cgd /* 1479 1.1 cgd * If mp is set, just pass back the mbufs. 1480 1.1 cgd * Otherwise copy them out via the uio, then free. 1481 1.1 cgd * Sockbuf must be consistent here (points to current mbuf, 1482 1.1 cgd * it points to next record) when we drop priority; 1483 1.1 cgd * we must note any additions to the sockbuf when we 1484 1.1 cgd * block interrupts again. 1485 1.1 cgd */ 1486 1.144 dyoung if (mp == NULL) { 1487 1.69 thorpej SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 1488 1.69 thorpej SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 1489 1.160 ad sounlock(so); 1490 1.1 cgd splx(s); 1491 1.211 chs error = uiomove(mtod(m, char *) + moff, len, uio); 1492 1.20 mycroft s = splsoftnet(); 1493 1.160 ad solock(so); 1494 1.144 dyoung if (error != 0) { 1495 1.67 he /* 1496 1.67 he * If any part of the record has been removed 1497 1.67 he * (such as the MT_SONAME mbuf, which will 1498 1.67 he * happen when PR_ADDR, and thus also 1499 1.67 he * PR_ATOMIC, is set), then drop the entire 1500 1.67 he * record to maintain the atomicity of the 1501 1.67 he * receive operation. 1502 1.67 he * 1503 1.67 he * This avoids a later panic("receive 1a") 1504 1.67 he * when compiled with DIAGNOSTIC. 1505 1.67 he */ 1506 1.146 dyoung if (m && mbuf_removed && atomic) 1507 1.67 he (void) sbdroprecord(&so->so_rcv); 1508 1.67 he 1509 1.57 jdolecek goto release; 1510 1.67 he } 1511 1.270 maxv } else { 1512 1.1 cgd uio->uio_resid -= len; 1513 1.270 maxv } 1514 1.270 maxv 1515 1.1 cgd if (len == m->m_len - moff) { 1516 1.1 cgd if (m->m_flags & M_EOR) 1517 1.1 cgd flags |= MSG_EOR; 1518 1.247 rjs #ifdef SCTP 1519 1.247 rjs if (m->m_flags & M_NOTIFICATION) 1520 1.247 rjs flags |= MSG_NOTIFICATION; 1521 1.270 maxv #endif 1522 1.1 cgd if (flags & MSG_PEEK) { 1523 1.1 cgd m = m->m_next; 1524 1.1 cgd moff = 0; 1525 1.1 cgd } else { 1526 1.1 cgd nextrecord = m->m_nextpkt; 1527 1.1 cgd sbfree(&so->so_rcv, m); 1528 1.1 cgd if (mp) { 1529 1.1 cgd *mp = m; 1530 1.1 cgd mp = &m->m_next; 1531 1.1 cgd so->so_rcv.sb_mb = m = m->m_next; 1532 1.140 dyoung *mp = NULL; 1533 1.1 cgd } else { 1534 1.249 christos m = so->so_rcv.sb_mb = m_free(m); 1535 1.1 cgd } 1536 1.69 thorpej /* 1537 1.69 thorpej * If m != NULL, we also know that 1538 1.69 thorpej * so->so_rcv.sb_mb != NULL. 1539 1.69 thorpej */ 1540 1.69 thorpej KASSERT(so->so_rcv.sb_mb == m); 1541 1.69 thorpej if (m) { 1542 1.1 cgd m->m_nextpkt = nextrecord; 1543 1.69 thorpej if (nextrecord == NULL) 1544 1.69 thorpej so->so_rcv.sb_lastrecord = m; 1545 1.69 thorpej } else { 1546 1.69 thorpej so->so_rcv.sb_mb = nextrecord; 1547 1.70 thorpej SB_EMPTY_FIXUP(&so->so_rcv); 1548 1.69 thorpej } 1549 1.69 thorpej SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 1550 1.69 thorpej SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 1551 1.1 cgd } 1552 1.270 maxv } else if (flags & MSG_PEEK) { 1553 1.144 dyoung moff += len; 1554 1.270 maxv } else { 1555 1.160 ad if (mp != NULL) { 1556 1.160 ad mt = m_copym(m, 0, len, M_NOWAIT); 1557 1.160 ad if (__predict_false(mt == NULL)) { 1558 1.160 ad sounlock(so); 1559 1.160 ad mt = m_copym(m, 0, len, M_WAIT); 1560 1.160 ad solock(so); 1561 1.160 ad } 1562 1.160 ad *mp = mt; 1563 1.160 ad } 1564 1.144 dyoung m->m_data += len; 1565 1.144 dyoung m->m_len -= len; 1566 1.144 dyoung so->so_rcv.sb_cc -= len; 1567 1.1 cgd } 1568 1.270 maxv 1569 1.1 cgd if (so->so_oobmark) { 1570 1.1 cgd if ((flags & MSG_PEEK) == 0) { 1571 1.1 cgd so->so_oobmark -= len; 1572 1.1 cgd if (so->so_oobmark == 0) { 1573 1.1 cgd so->so_state |= SS_RCVATMARK; 1574 1.1 cgd break; 1575 1.1 cgd } 1576 1.7 cgd } else { 1577 1.1 cgd offset += len; 1578 1.7 cgd if (offset == so->so_oobmark) 1579 1.7 cgd break; 1580 1.7 cgd } 1581 1.293 chs } else { 1582 1.293 chs so->so_state &= ~SS_POLLRDBAND; 1583 1.1 cgd } 1584 1.1 cgd if (flags & MSG_EOR) 1585 1.1 cgd break; 1586 1.270 maxv 1587 1.1 cgd /* 1588 1.1 cgd * If the MSG_WAITALL flag is set (for non-atomic socket), 1589 1.1 cgd * we must not quit until "uio->uio_resid == 0" or an error 1590 1.1 cgd * termination. If a signal/timeout occurs, return 1591 1.1 cgd * with a short count but without error. 1592 1.1 cgd * Keep sockbuf locked against other readers. 1593 1.1 cgd */ 1594 1.144 dyoung while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 1595 1.3 andrew !sosendallatonce(so) && !nextrecord) { 1596 1.264 roy if (so->so_error || so->so_rerror || 1597 1.264 roy so->so_state & SS_CANTRCVMORE) 1598 1.1 cgd break; 1599 1.68 matt /* 1600 1.68 matt * If we are peeking and the socket receive buffer is 1601 1.68 matt * full, stop since we can't get more data to peek at. 1602 1.68 matt */ 1603 1.68 matt if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0) 1604 1.68 matt break; 1605 1.68 matt /* 1606 1.68 matt * If we've drained the socket buffer, tell the 1607 1.68 matt * protocol in case it needs to do something to 1608 1.68 matt * get it filled again. 1609 1.68 matt */ 1610 1.68 matt if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) 1611 1.233 rtr (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); 1612 1.69 thorpej SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 1613 1.69 thorpej SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 1614 1.307 martin if (wakeup_state & SS_RESTARTSYS) 1615 1.313 riastrad error = SET_ERROR(ERESTART); 1616 1.307 martin else 1617 1.307 martin error = sbwait(&so->so_rcv); 1618 1.144 dyoung if (error != 0) { 1619 1.1 cgd sbunlock(&so->so_rcv); 1620 1.160 ad sounlock(so); 1621 1.1 cgd splx(s); 1622 1.144 dyoung return 0; 1623 1.1 cgd } 1624 1.21 christos if ((m = so->so_rcv.sb_mb) != NULL) 1625 1.1 cgd nextrecord = m->m_nextpkt; 1626 1.307 martin wakeup_state = so->so_state; 1627 1.1 cgd } 1628 1.1 cgd } 1629 1.3 andrew 1630 1.146 dyoung if (m && atomic) { 1631 1.3 andrew flags |= MSG_TRUNC; 1632 1.3 andrew if ((flags & MSG_PEEK) == 0) 1633 1.3 andrew (void) sbdroprecord(&so->so_rcv); 1634 1.3 andrew } 1635 1.1 cgd if ((flags & MSG_PEEK) == 0) { 1636 1.144 dyoung if (m == NULL) { 1637 1.69 thorpej /* 1638 1.70 thorpej * First part is an inline SB_EMPTY_FIXUP(). Second 1639 1.69 thorpej * part makes sure sb_lastrecord is up-to-date if 1640 1.69 thorpej * there is still data in the socket buffer. 1641 1.69 thorpej */ 1642 1.1 cgd so->so_rcv.sb_mb = nextrecord; 1643 1.69 thorpej if (so->so_rcv.sb_mb == NULL) { 1644 1.69 thorpej so->so_rcv.sb_mbtail = NULL; 1645 1.69 thorpej so->so_rcv.sb_lastrecord = NULL; 1646 1.69 thorpej } else if (nextrecord->m_nextpkt == NULL) 1647 1.69 thorpej so->so_rcv.sb_lastrecord = nextrecord; 1648 1.69 thorpej } 1649 1.69 thorpej SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 1650 1.69 thorpej SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 1651 1.1 cgd if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 1652 1.233 rtr (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); 1653 1.1 cgd } 1654 1.3 andrew if (orig_resid == uio->uio_resid && orig_resid && 1655 1.3 andrew (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 1656 1.3 andrew sbunlock(&so->so_rcv); 1657 1.3 andrew goto restart; 1658 1.3 andrew } 1659 1.108 perry 1660 1.144 dyoung if (flagsp != NULL) 1661 1.1 cgd *flagsp |= flags; 1662 1.270 maxv release: 1663 1.1 cgd sbunlock(&so->so_rcv); 1664 1.160 ad sounlock(so); 1665 1.1 cgd splx(s); 1666 1.144 dyoung return error; 1667 1.1 cgd } 1668 1.1 cgd 1669 1.14 mycroft int 1670 1.54 lukem soshutdown(struct socket *so, int how) 1671 1.1 cgd { 1672 1.270 maxv const struct protosw *pr; 1673 1.270 maxv int error; 1674 1.160 ad 1675 1.160 ad KASSERT(solocked(so)); 1676 1.34 kleink 1677 1.54 lukem pr = so->so_proto; 1678 1.34 kleink if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 1679 1.313 riastrad return SET_ERROR(EINVAL); 1680 1.1 cgd 1681 1.160 ad if (how == SHUT_RD || how == SHUT_RDWR) { 1682 1.1 cgd sorflush(so); 1683 1.160 ad error = 0; 1684 1.160 ad } 1685 1.34 kleink if (how == SHUT_WR || how == SHUT_RDWR) 1686 1.229 rtr error = (*pr->pr_usrreqs->pr_shutdown)(so); 1687 1.160 ad 1688 1.160 ad return error; 1689 1.1 cgd } 1690 1.1 cgd 1691 1.195 dsl void 1692 1.196 dsl sorestart(struct socket *so) 1693 1.188 ad { 1694 1.196 dsl /* 1695 1.196 dsl * An application has called close() on an fd on which another 1696 1.196 dsl * of its threads has called a socket system call. 1697 1.196 dsl * Mark this and wake everyone up, and code that would block again 1698 1.196 dsl * instead returns ERESTART. 1699 1.196 dsl * On system call re-entry the fd is validated and EBADF returned. 1700 1.196 dsl * Any other fd will block again on the 2nd syscall. 1701 1.196 dsl */ 1702 1.188 ad solock(so); 1703 1.307 martin so->so_state |= SS_RESTARTSYS; 1704 1.307 martin cv_broadcast(&so->so_cv); 1705 1.307 martin cv_broadcast(&so->so_snd.sb_cv); 1706 1.307 martin cv_broadcast(&so->so_rcv.sb_cv); 1707 1.188 ad sounlock(so); 1708 1.188 ad } 1709 1.188 ad 1710 1.14 mycroft void 1711 1.54 lukem sorflush(struct socket *so) 1712 1.1 cgd { 1713 1.270 maxv struct sockbuf *sb, asb; 1714 1.270 maxv const struct protosw *pr; 1715 1.160 ad 1716 1.160 ad KASSERT(solocked(so)); 1717 1.1 cgd 1718 1.54 lukem sb = &so->so_rcv; 1719 1.54 lukem pr = so->so_proto; 1720 1.160 ad socantrcvmore(so); 1721 1.1 cgd sb->sb_flags |= SB_NOINTR; 1722 1.160 ad (void )sblock(sb, M_WAITOK); 1723 1.1 cgd sbunlock(sb); 1724 1.1 cgd asb = *sb; 1725 1.86 wrstuden /* 1726 1.86 wrstuden * Clear most of the sockbuf structure, but leave some of the 1727 1.86 wrstuden * fields valid. 1728 1.86 wrstuden */ 1729 1.86 wrstuden memset(&sb->sb_startzero, 0, 1730 1.86 wrstuden sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); 1731 1.160 ad if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) { 1732 1.160 ad sounlock(so); 1733 1.1 cgd (*pr->pr_domain->dom_dispose)(asb.sb_mb); 1734 1.160 ad solock(so); 1735 1.160 ad } 1736 1.98 christos sbrelease(&asb, so); 1737 1.1 cgd } 1738 1.1 cgd 1739 1.171 plunky /* 1740 1.171 plunky * internal set SOL_SOCKET options 1741 1.171 plunky */ 1742 1.142 dyoung static int 1743 1.171 plunky sosetopt1(struct socket *so, const struct sockopt *sopt) 1744 1.1 cgd { 1745 1.275 pgoyette int error, opt; 1746 1.219 christos int optval = 0; /* XXX: gcc */ 1747 1.171 plunky struct linger l; 1748 1.171 plunky struct timeval tv; 1749 1.142 dyoung 1750 1.275 pgoyette opt = sopt->sopt_name; 1751 1.275 pgoyette 1752 1.275 pgoyette switch (opt) { 1753 1.142 dyoung 1754 1.170 tls case SO_ACCEPTFILTER: 1755 1.177 ad error = accept_filt_setopt(so, sopt); 1756 1.177 ad KASSERT(solocked(so)); 1757 1.170 tls break; 1758 1.170 tls 1759 1.253 ryo case SO_LINGER: 1760 1.253 ryo error = sockopt_get(sopt, &l, sizeof(l)); 1761 1.177 ad solock(so); 1762 1.253 ryo if (error) 1763 1.253 ryo break; 1764 1.253 ryo if (l.l_linger < 0 || l.l_linger > USHRT_MAX || 1765 1.253 ryo l.l_linger > (INT_MAX / hz)) { 1766 1.313 riastrad error = SET_ERROR(EDOM); 1767 1.177 ad break; 1768 1.177 ad } 1769 1.253 ryo so->so_linger = l.l_linger; 1770 1.253 ryo if (l.l_onoff) 1771 1.253 ryo so->so_options |= SO_LINGER; 1772 1.253 ryo else 1773 1.253 ryo so->so_options &= ~SO_LINGER; 1774 1.253 ryo break; 1775 1.1 cgd 1776 1.142 dyoung case SO_DEBUG: 1777 1.142 dyoung case SO_KEEPALIVE: 1778 1.142 dyoung case SO_DONTROUTE: 1779 1.142 dyoung case SO_USELOOPBACK: 1780 1.142 dyoung case SO_BROADCAST: 1781 1.142 dyoung case SO_REUSEADDR: 1782 1.142 dyoung case SO_REUSEPORT: 1783 1.142 dyoung case SO_OOBINLINE: 1784 1.142 dyoung case SO_TIMESTAMP: 1785 1.207 christos case SO_NOSIGPIPE: 1786 1.266 christos case SO_RERROR: 1787 1.171 plunky error = sockopt_getint(sopt, &optval); 1788 1.177 ad solock(so); 1789 1.171 plunky if (error) 1790 1.177 ad break; 1791 1.171 plunky if (optval) 1792 1.179 christos so->so_options |= opt; 1793 1.142 dyoung else 1794 1.179 christos so->so_options &= ~opt; 1795 1.142 dyoung break; 1796 1.142 dyoung 1797 1.142 dyoung case SO_SNDBUF: 1798 1.142 dyoung case SO_RCVBUF: 1799 1.142 dyoung case SO_SNDLOWAT: 1800 1.142 dyoung case SO_RCVLOWAT: 1801 1.171 plunky error = sockopt_getint(sopt, &optval); 1802 1.177 ad solock(so); 1803 1.171 plunky if (error) 1804 1.177 ad break; 1805 1.1 cgd 1806 1.142 dyoung /* 1807 1.142 dyoung * Values < 1 make no sense for any of these 1808 1.142 dyoung * options, so disallow them. 1809 1.142 dyoung */ 1810 1.177 ad if (optval < 1) { 1811 1.313 riastrad error = SET_ERROR(EINVAL); 1812 1.177 ad break; 1813 1.177 ad } 1814 1.1 cgd 1815 1.179 christos switch (opt) { 1816 1.171 plunky case SO_SNDBUF: 1817 1.177 ad if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) { 1818 1.313 riastrad error = SET_ERROR(ENOBUFS); 1819 1.177 ad break; 1820 1.177 ad } 1821 1.292 mlelstv if (sofixedbuf) 1822 1.292 mlelstv so->so_snd.sb_flags &= ~SB_AUTOSIZE; 1823 1.171 plunky break; 1824 1.1 cgd 1825 1.1 cgd case SO_RCVBUF: 1826 1.177 ad if (sbreserve(&so->so_rcv, (u_long)optval, so) == 0) { 1827 1.313 riastrad error = SET_ERROR(ENOBUFS); 1828 1.177 ad break; 1829 1.177 ad } 1830 1.292 mlelstv if (sofixedbuf) 1831 1.292 mlelstv so->so_rcv.sb_flags &= ~SB_AUTOSIZE; 1832 1.142 dyoung break; 1833 1.142 dyoung 1834 1.142 dyoung /* 1835 1.142 dyoung * Make sure the low-water is never greater than 1836 1.142 dyoung * the high-water. 1837 1.142 dyoung */ 1838 1.1 cgd case SO_SNDLOWAT: 1839 1.171 plunky if (optval > so->so_snd.sb_hiwat) 1840 1.171 plunky optval = so->so_snd.sb_hiwat; 1841 1.171 plunky 1842 1.171 plunky so->so_snd.sb_lowat = optval; 1843 1.142 dyoung break; 1844 1.171 plunky 1845 1.1 cgd case SO_RCVLOWAT: 1846 1.171 plunky if (optval > so->so_rcv.sb_hiwat) 1847 1.171 plunky optval = so->so_rcv.sb_hiwat; 1848 1.171 plunky 1849 1.171 plunky so->so_rcv.sb_lowat = optval; 1850 1.142 dyoung break; 1851 1.142 dyoung } 1852 1.142 dyoung break; 1853 1.28 thorpej 1854 1.142 dyoung case SO_SNDTIMEO: 1855 1.142 dyoung case SO_RCVTIMEO: 1856 1.177 ad solock(so); 1857 1.278 pgoyette error = sockopt_get(sopt, &tv, sizeof(tv)); 1858 1.171 plunky if (error) 1859 1.177 ad break; 1860 1.171 plunky 1861 1.274 maxv if (tv.tv_sec < 0 || tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 1862 1.313 riastrad error = SET_ERROR(EDOM); 1863 1.274 maxv break; 1864 1.274 maxv } 1865 1.177 ad if (tv.tv_sec > (INT_MAX - tv.tv_usec / tick) / hz) { 1866 1.313 riastrad error = SET_ERROR(EDOM); 1867 1.177 ad break; 1868 1.177 ad } 1869 1.28 thorpej 1870 1.171 plunky optval = tv.tv_sec * hz + tv.tv_usec / tick; 1871 1.171 plunky if (optval == 0 && tv.tv_usec != 0) 1872 1.171 plunky optval = 1; 1873 1.28 thorpej 1874 1.179 christos switch (opt) { 1875 1.142 dyoung case SO_SNDTIMEO: 1876 1.171 plunky so->so_snd.sb_timeo = optval; 1877 1.1 cgd break; 1878 1.1 cgd case SO_RCVTIMEO: 1879 1.171 plunky so->so_rcv.sb_timeo = optval; 1880 1.142 dyoung break; 1881 1.142 dyoung } 1882 1.142 dyoung break; 1883 1.1 cgd 1884 1.142 dyoung default: 1885 1.278 pgoyette MODULE_HOOK_CALL(uipc_socket_50_setopt1_hook, 1886 1.278 pgoyette (opt, so, sopt), enosys(), error); 1887 1.278 pgoyette if (error == ENOSYS || error == EPASSTHROUGH) { 1888 1.278 pgoyette solock(so); 1889 1.313 riastrad error = SET_ERROR(ENOPROTOOPT); 1890 1.278 pgoyette } 1891 1.177 ad break; 1892 1.142 dyoung } 1893 1.177 ad KASSERT(solocked(so)); 1894 1.177 ad return error; 1895 1.142 dyoung } 1896 1.1 cgd 1897 1.142 dyoung int 1898 1.171 plunky sosetopt(struct socket *so, struct sockopt *sopt) 1899 1.142 dyoung { 1900 1.142 dyoung int error, prerr; 1901 1.1 cgd 1902 1.177 ad if (sopt->sopt_level == SOL_SOCKET) { 1903 1.171 plunky error = sosetopt1(so, sopt); 1904 1.177 ad KASSERT(solocked(so)); 1905 1.177 ad } else { 1906 1.313 riastrad error = SET_ERROR(ENOPROTOOPT); 1907 1.177 ad solock(so); 1908 1.177 ad } 1909 1.1 cgd 1910 1.142 dyoung if ((error == 0 || error == ENOPROTOOPT) && 1911 1.142 dyoung so->so_proto != NULL && so->so_proto->pr_ctloutput != NULL) { 1912 1.142 dyoung /* give the protocol stack a shot */ 1913 1.171 plunky prerr = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, sopt); 1914 1.142 dyoung if (prerr == 0) 1915 1.142 dyoung error = 0; 1916 1.142 dyoung else if (prerr != ENOPROTOOPT) 1917 1.142 dyoung error = prerr; 1918 1.171 plunky } 1919 1.160 ad sounlock(so); 1920 1.142 dyoung return error; 1921 1.1 cgd } 1922 1.1 cgd 1923 1.171 plunky /* 1924 1.171 plunky * so_setsockopt() is a wrapper providing a sockopt structure for sosetopt() 1925 1.171 plunky */ 1926 1.171 plunky int 1927 1.171 plunky so_setsockopt(struct lwp *l, struct socket *so, int level, int name, 1928 1.171 plunky const void *val, size_t valsize) 1929 1.171 plunky { 1930 1.171 plunky struct sockopt sopt; 1931 1.171 plunky int error; 1932 1.171 plunky 1933 1.171 plunky KASSERT(valsize == 0 || val != NULL); 1934 1.171 plunky 1935 1.171 plunky sockopt_init(&sopt, level, name, valsize); 1936 1.171 plunky sockopt_set(&sopt, val, valsize); 1937 1.171 plunky 1938 1.171 plunky error = sosetopt(so, &sopt); 1939 1.171 plunky 1940 1.171 plunky sockopt_destroy(&sopt); 1941 1.171 plunky 1942 1.171 plunky return error; 1943 1.171 plunky } 1944 1.253 ryo 1945 1.171 plunky /* 1946 1.171 plunky * internal get SOL_SOCKET options 1947 1.171 plunky */ 1948 1.171 plunky static int 1949 1.171 plunky sogetopt1(struct socket *so, struct sockopt *sopt) 1950 1.171 plunky { 1951 1.179 christos int error, optval, opt; 1952 1.171 plunky struct linger l; 1953 1.171 plunky struct timeval tv; 1954 1.171 plunky 1955 1.179 christos switch ((opt = sopt->sopt_name)) { 1956 1.171 plunky 1957 1.171 plunky case SO_ACCEPTFILTER: 1958 1.177 ad error = accept_filt_getopt(so, sopt); 1959 1.171 plunky break; 1960 1.171 plunky 1961 1.171 plunky case SO_LINGER: 1962 1.171 plunky l.l_onoff = (so->so_options & SO_LINGER) ? 1 : 0; 1963 1.171 plunky l.l_linger = so->so_linger; 1964 1.171 plunky 1965 1.171 plunky error = sockopt_set(sopt, &l, sizeof(l)); 1966 1.171 plunky break; 1967 1.171 plunky 1968 1.171 plunky case SO_USELOOPBACK: 1969 1.171 plunky case SO_DONTROUTE: 1970 1.171 plunky case SO_DEBUG: 1971 1.171 plunky case SO_KEEPALIVE: 1972 1.171 plunky case SO_REUSEADDR: 1973 1.171 plunky case SO_REUSEPORT: 1974 1.171 plunky case SO_BROADCAST: 1975 1.171 plunky case SO_OOBINLINE: 1976 1.171 plunky case SO_TIMESTAMP: 1977 1.207 christos case SO_NOSIGPIPE: 1978 1.266 christos case SO_RERROR: 1979 1.218 seanb case SO_ACCEPTCONN: 1980 1.179 christos error = sockopt_setint(sopt, (so->so_options & opt) ? 1 : 0); 1981 1.171 plunky break; 1982 1.171 plunky 1983 1.171 plunky case SO_TYPE: 1984 1.171 plunky error = sockopt_setint(sopt, so->so_type); 1985 1.171 plunky break; 1986 1.171 plunky 1987 1.171 plunky case SO_ERROR: 1988 1.267 hannken if (so->so_error == 0) { 1989 1.267 hannken so->so_error = so->so_rerror; 1990 1.267 hannken so->so_rerror = 0; 1991 1.267 hannken } 1992 1.171 plunky error = sockopt_setint(sopt, so->so_error); 1993 1.171 plunky so->so_error = 0; 1994 1.171 plunky break; 1995 1.171 plunky 1996 1.171 plunky case SO_SNDBUF: 1997 1.171 plunky error = sockopt_setint(sopt, so->so_snd.sb_hiwat); 1998 1.171 plunky break; 1999 1.171 plunky 2000 1.171 plunky case SO_RCVBUF: 2001 1.171 plunky error = sockopt_setint(sopt, so->so_rcv.sb_hiwat); 2002 1.171 plunky break; 2003 1.171 plunky 2004 1.171 plunky case SO_SNDLOWAT: 2005 1.171 plunky error = sockopt_setint(sopt, so->so_snd.sb_lowat); 2006 1.171 plunky break; 2007 1.171 plunky 2008 1.171 plunky case SO_RCVLOWAT: 2009 1.171 plunky error = sockopt_setint(sopt, so->so_rcv.sb_lowat); 2010 1.171 plunky break; 2011 1.171 plunky 2012 1.171 plunky case SO_SNDTIMEO: 2013 1.171 plunky case SO_RCVTIMEO: 2014 1.179 christos optval = (opt == SO_SNDTIMEO ? 2015 1.171 plunky so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 2016 1.171 plunky 2017 1.288 maxv memset(&tv, 0, sizeof(tv)); 2018 1.171 plunky tv.tv_sec = optval / hz; 2019 1.171 plunky tv.tv_usec = (optval % hz) * tick; 2020 1.171 plunky 2021 1.171 plunky error = sockopt_set(sopt, &tv, sizeof(tv)); 2022 1.171 plunky break; 2023 1.171 plunky 2024 1.171 plunky case SO_OVERFLOWED: 2025 1.171 plunky error = sockopt_setint(sopt, so->so_rcv.sb_overflowed); 2026 1.171 plunky break; 2027 1.171 plunky 2028 1.171 plunky default: 2029 1.275 pgoyette MODULE_HOOK_CALL(uipc_socket_50_getopt1_hook, 2030 1.278 pgoyette (opt, so, sopt), enosys(), error); 2031 1.275 pgoyette if (error) 2032 1.313 riastrad error = SET_ERROR(ENOPROTOOPT); 2033 1.171 plunky break; 2034 1.171 plunky } 2035 1.171 plunky 2036 1.270 maxv return error; 2037 1.171 plunky } 2038 1.171 plunky 2039 1.14 mycroft int 2040 1.171 plunky sogetopt(struct socket *so, struct sockopt *sopt) 2041 1.1 cgd { 2042 1.270 maxv int error; 2043 1.1 cgd 2044 1.160 ad solock(so); 2045 1.171 plunky if (sopt->sopt_level != SOL_SOCKET) { 2046 1.1 cgd if (so->so_proto && so->so_proto->pr_ctloutput) { 2047 1.160 ad error = ((*so->so_proto->pr_ctloutput) 2048 1.171 plunky (PRCO_GETOPT, so, sopt)); 2049 1.1 cgd } else 2050 1.313 riastrad error = SET_ERROR(ENOPROTOOPT); 2051 1.1 cgd } else { 2052 1.171 plunky error = sogetopt1(so, sopt); 2053 1.171 plunky } 2054 1.171 plunky sounlock(so); 2055 1.270 maxv return error; 2056 1.171 plunky } 2057 1.171 plunky 2058 1.171 plunky /* 2059 1.171 plunky * alloc sockopt data buffer buffer 2060 1.171 plunky * - will be released at destroy 2061 1.171 plunky */ 2062 1.176 plunky static int 2063 1.176 plunky sockopt_alloc(struct sockopt *sopt, size_t len, km_flag_t kmflag) 2064 1.171 plunky { 2065 1.290 maxv void *data; 2066 1.171 plunky 2067 1.171 plunky KASSERT(sopt->sopt_size == 0); 2068 1.171 plunky 2069 1.176 plunky if (len > sizeof(sopt->sopt_buf)) { 2070 1.290 maxv data = kmem_zalloc(len, kmflag); 2071 1.290 maxv if (data == NULL) 2072 1.313 riastrad return SET_ERROR(ENOMEM); 2073 1.290 maxv sopt->sopt_data = data; 2074 1.176 plunky } else 2075 1.171 plunky sopt->sopt_data = sopt->sopt_buf; 2076 1.171 plunky 2077 1.171 plunky sopt->sopt_size = len; 2078 1.176 plunky return 0; 2079 1.171 plunky } 2080 1.171 plunky 2081 1.171 plunky /* 2082 1.171 plunky * initialise sockopt storage 2083 1.176 plunky * - MAY sleep during allocation 2084 1.171 plunky */ 2085 1.171 plunky void 2086 1.171 plunky sockopt_init(struct sockopt *sopt, int level, int name, size_t size) 2087 1.171 plunky { 2088 1.1 cgd 2089 1.171 plunky memset(sopt, 0, sizeof(*sopt)); 2090 1.1 cgd 2091 1.171 plunky sopt->sopt_level = level; 2092 1.171 plunky sopt->sopt_name = name; 2093 1.176 plunky (void)sockopt_alloc(sopt, size, KM_SLEEP); 2094 1.171 plunky } 2095 1.171 plunky 2096 1.171 plunky /* 2097 1.171 plunky * destroy sockopt storage 2098 1.171 plunky * - will release any held memory references 2099 1.171 plunky */ 2100 1.171 plunky void 2101 1.171 plunky sockopt_destroy(struct sockopt *sopt) 2102 1.171 plunky { 2103 1.171 plunky 2104 1.171 plunky if (sopt->sopt_data != sopt->sopt_buf) 2105 1.173 plunky kmem_free(sopt->sopt_data, sopt->sopt_size); 2106 1.171 plunky 2107 1.171 plunky memset(sopt, 0, sizeof(*sopt)); 2108 1.171 plunky } 2109 1.171 plunky 2110 1.171 plunky /* 2111 1.171 plunky * set sockopt value 2112 1.171 plunky * - value is copied into sockopt 2113 1.253 ryo * - memory is allocated when necessary, will not sleep 2114 1.171 plunky */ 2115 1.171 plunky int 2116 1.171 plunky sockopt_set(struct sockopt *sopt, const void *buf, size_t len) 2117 1.171 plunky { 2118 1.176 plunky int error; 2119 1.171 plunky 2120 1.176 plunky if (sopt->sopt_size == 0) { 2121 1.176 plunky error = sockopt_alloc(sopt, len, KM_NOSLEEP); 2122 1.176 plunky if (error) 2123 1.176 plunky return error; 2124 1.176 plunky } 2125 1.171 plunky 2126 1.279 christos sopt->sopt_retsize = MIN(sopt->sopt_size, len); 2127 1.285 maxv if (sopt->sopt_retsize > 0) { 2128 1.285 maxv memcpy(sopt->sopt_data, buf, sopt->sopt_retsize); 2129 1.285 maxv } 2130 1.259 christos 2131 1.171 plunky return 0; 2132 1.171 plunky } 2133 1.171 plunky 2134 1.171 plunky /* 2135 1.171 plunky * common case of set sockopt integer value 2136 1.171 plunky */ 2137 1.171 plunky int 2138 1.171 plunky sockopt_setint(struct sockopt *sopt, int val) 2139 1.171 plunky { 2140 1.171 plunky 2141 1.171 plunky return sockopt_set(sopt, &val, sizeof(int)); 2142 1.171 plunky } 2143 1.171 plunky 2144 1.171 plunky /* 2145 1.171 plunky * get sockopt value 2146 1.171 plunky * - correct size must be given 2147 1.171 plunky */ 2148 1.171 plunky int 2149 1.171 plunky sockopt_get(const struct sockopt *sopt, void *buf, size_t len) 2150 1.171 plunky { 2151 1.170 tls 2152 1.171 plunky if (sopt->sopt_size != len) 2153 1.313 riastrad return SET_ERROR(EINVAL); 2154 1.1 cgd 2155 1.171 plunky memcpy(buf, sopt->sopt_data, len); 2156 1.171 plunky return 0; 2157 1.171 plunky } 2158 1.1 cgd 2159 1.171 plunky /* 2160 1.171 plunky * common case of get sockopt integer value 2161 1.171 plunky */ 2162 1.171 plunky int 2163 1.171 plunky sockopt_getint(const struct sockopt *sopt, int *valp) 2164 1.171 plunky { 2165 1.1 cgd 2166 1.171 plunky return sockopt_get(sopt, valp, sizeof(int)); 2167 1.171 plunky } 2168 1.1 cgd 2169 1.171 plunky /* 2170 1.171 plunky * set sockopt value from mbuf 2171 1.171 plunky * - ONLY for legacy code 2172 1.171 plunky * - mbuf is released by sockopt 2173 1.176 plunky * - will not sleep 2174 1.171 plunky */ 2175 1.171 plunky int 2176 1.171 plunky sockopt_setmbuf(struct sockopt *sopt, struct mbuf *m) 2177 1.171 plunky { 2178 1.171 plunky size_t len; 2179 1.176 plunky int error; 2180 1.1 cgd 2181 1.171 plunky len = m_length(m); 2182 1.1 cgd 2183 1.176 plunky if (sopt->sopt_size == 0) { 2184 1.176 plunky error = sockopt_alloc(sopt, len, KM_NOSLEEP); 2185 1.176 plunky if (error) 2186 1.176 plunky return error; 2187 1.176 plunky } 2188 1.1 cgd 2189 1.279 christos sopt->sopt_retsize = MIN(sopt->sopt_size, len); 2190 1.279 christos m_copydata(m, 0, sopt->sopt_retsize, sopt->sopt_data); 2191 1.171 plunky m_freem(m); 2192 1.1 cgd 2193 1.171 plunky return 0; 2194 1.171 plunky } 2195 1.1 cgd 2196 1.171 plunky /* 2197 1.171 plunky * get sockopt value into mbuf 2198 1.171 plunky * - ONLY for legacy code 2199 1.171 plunky * - mbuf to be released by the caller 2200 1.176 plunky * - will not sleep 2201 1.171 plunky */ 2202 1.171 plunky struct mbuf * 2203 1.171 plunky sockopt_getmbuf(const struct sockopt *sopt) 2204 1.171 plunky { 2205 1.171 plunky struct mbuf *m; 2206 1.107 darrenr 2207 1.176 plunky if (sopt->sopt_size > MCLBYTES) 2208 1.176 plunky return NULL; 2209 1.176 plunky 2210 1.176 plunky m = m_get(M_DONTWAIT, MT_SOOPTS); 2211 1.171 plunky if (m == NULL) 2212 1.171 plunky return NULL; 2213 1.171 plunky 2214 1.176 plunky if (sopt->sopt_size > MLEN) { 2215 1.176 plunky MCLGET(m, M_DONTWAIT); 2216 1.176 plunky if ((m->m_flags & M_EXT) == 0) { 2217 1.176 plunky m_free(m); 2218 1.176 plunky return NULL; 2219 1.176 plunky } 2220 1.1 cgd } 2221 1.176 plunky 2222 1.176 plunky memcpy(mtod(m, void *), sopt->sopt_data, sopt->sopt_size); 2223 1.176 plunky m->m_len = sopt->sopt_size; 2224 1.160 ad 2225 1.171 plunky return m; 2226 1.1 cgd } 2227 1.1 cgd 2228 1.14 mycroft void 2229 1.54 lukem sohasoutofband(struct socket *so) 2230 1.1 cgd { 2231 1.153 rmind 2232 1.293 chs so->so_state |= SS_POLLRDBAND; 2233 1.90 christos fownsignal(so->so_pgid, SIGURG, POLL_PRI, POLLPRI|POLLRDBAND, so); 2234 1.189 ad selnotify(&so->so_rcv.sb_sel, POLLPRI | POLLRDBAND, NOTE_SUBMIT); 2235 1.1 cgd } 2236 1.72 jdolecek 2237 1.72 jdolecek static void 2238 1.72 jdolecek filt_sordetach(struct knote *kn) 2239 1.72 jdolecek { 2240 1.270 maxv struct socket *so; 2241 1.72 jdolecek 2242 1.235 matt so = ((file_t *)kn->kn_obj)->f_socket; 2243 1.160 ad solock(so); 2244 1.297 thorpej if (selremove_knote(&so->so_rcv.sb_sel, kn)) 2245 1.297 thorpej so->so_rcv.sb_flags &= ~SB_KNOTE; 2246 1.160 ad sounlock(so); 2247 1.72 jdolecek } 2248 1.72 jdolecek 2249 1.72 jdolecek /*ARGSUSED*/ 2250 1.72 jdolecek static int 2251 1.129 yamt filt_soread(struct knote *kn, long hint) 2252 1.72 jdolecek { 2253 1.270 maxv struct socket *so; 2254 1.160 ad int rv; 2255 1.72 jdolecek 2256 1.235 matt so = ((file_t *)kn->kn_obj)->f_socket; 2257 1.160 ad if (hint != NOTE_SUBMIT) 2258 1.160 ad solock(so); 2259 1.72 jdolecek kn->kn_data = so->so_rcv.sb_cc; 2260 1.72 jdolecek if (so->so_state & SS_CANTRCVMORE) { 2261 1.299 thorpej knote_set_eof(kn, 0); 2262 1.72 jdolecek kn->kn_fflags = so->so_error; 2263 1.160 ad rv = 1; 2264 1.264 roy } else if (so->so_error || so->so_rerror) 2265 1.160 ad rv = 1; 2266 1.160 ad else if (kn->kn_sfflags & NOTE_LOWAT) 2267 1.160 ad rv = (kn->kn_data >= kn->kn_sdata); 2268 1.253 ryo else 2269 1.160 ad rv = (kn->kn_data >= so->so_rcv.sb_lowat); 2270 1.160 ad if (hint != NOTE_SUBMIT) 2271 1.160 ad sounlock(so); 2272 1.160 ad return rv; 2273 1.72 jdolecek } 2274 1.72 jdolecek 2275 1.72 jdolecek static void 2276 1.72 jdolecek filt_sowdetach(struct knote *kn) 2277 1.72 jdolecek { 2278 1.270 maxv struct socket *so; 2279 1.72 jdolecek 2280 1.235 matt so = ((file_t *)kn->kn_obj)->f_socket; 2281 1.160 ad solock(so); 2282 1.297 thorpej if (selremove_knote(&so->so_snd.sb_sel, kn)) 2283 1.297 thorpej so->so_snd.sb_flags &= ~SB_KNOTE; 2284 1.160 ad sounlock(so); 2285 1.72 jdolecek } 2286 1.72 jdolecek 2287 1.72 jdolecek /*ARGSUSED*/ 2288 1.72 jdolecek static int 2289 1.129 yamt filt_sowrite(struct knote *kn, long hint) 2290 1.72 jdolecek { 2291 1.270 maxv struct socket *so; 2292 1.160 ad int rv; 2293 1.72 jdolecek 2294 1.235 matt so = ((file_t *)kn->kn_obj)->f_socket; 2295 1.160 ad if (hint != NOTE_SUBMIT) 2296 1.160 ad solock(so); 2297 1.72 jdolecek kn->kn_data = sbspace(&so->so_snd); 2298 1.72 jdolecek if (so->so_state & SS_CANTSENDMORE) { 2299 1.299 thorpej knote_set_eof(kn, 0); 2300 1.72 jdolecek kn->kn_fflags = so->so_error; 2301 1.160 ad rv = 1; 2302 1.261 roy } else if (so->so_error) 2303 1.160 ad rv = 1; 2304 1.160 ad else if (((so->so_state & SS_ISCONNECTED) == 0) && 2305 1.72 jdolecek (so->so_proto->pr_flags & PR_CONNREQUIRED)) 2306 1.160 ad rv = 0; 2307 1.160 ad else if (kn->kn_sfflags & NOTE_LOWAT) 2308 1.160 ad rv = (kn->kn_data >= kn->kn_sdata); 2309 1.160 ad else 2310 1.160 ad rv = (kn->kn_data >= so->so_snd.sb_lowat); 2311 1.160 ad if (hint != NOTE_SUBMIT) 2312 1.160 ad sounlock(so); 2313 1.160 ad return rv; 2314 1.72 jdolecek } 2315 1.72 jdolecek 2316 1.300 thorpej static int 2317 1.300 thorpej filt_soempty(struct knote *kn, long hint) 2318 1.300 thorpej { 2319 1.300 thorpej struct socket *so; 2320 1.300 thorpej int rv; 2321 1.300 thorpej 2322 1.300 thorpej so = ((file_t *)kn->kn_obj)->f_socket; 2323 1.300 thorpej if (hint != NOTE_SUBMIT) 2324 1.300 thorpej solock(so); 2325 1.300 thorpej rv = (kn->kn_data = sbused(&so->so_snd)) == 0 || 2326 1.300 thorpej (so->so_options & SO_ACCEPTCONN) != 0; 2327 1.300 thorpej if (hint != NOTE_SUBMIT) 2328 1.300 thorpej sounlock(so); 2329 1.300 thorpej return rv; 2330 1.300 thorpej } 2331 1.300 thorpej 2332 1.72 jdolecek /*ARGSUSED*/ 2333 1.72 jdolecek static int 2334 1.129 yamt filt_solisten(struct knote *kn, long hint) 2335 1.72 jdolecek { 2336 1.270 maxv struct socket *so; 2337 1.160 ad int rv; 2338 1.72 jdolecek 2339 1.235 matt so = ((file_t *)kn->kn_obj)->f_socket; 2340 1.72 jdolecek 2341 1.72 jdolecek /* 2342 1.72 jdolecek * Set kn_data to number of incoming connections, not 2343 1.72 jdolecek * counting partial (incomplete) connections. 2344 1.108 perry */ 2345 1.160 ad if (hint != NOTE_SUBMIT) 2346 1.160 ad solock(so); 2347 1.72 jdolecek kn->kn_data = so->so_qlen; 2348 1.160 ad rv = (kn->kn_data > 0); 2349 1.160 ad if (hint != NOTE_SUBMIT) 2350 1.160 ad sounlock(so); 2351 1.160 ad return rv; 2352 1.72 jdolecek } 2353 1.72 jdolecek 2354 1.257 maya static const struct filterops solisten_filtops = { 2355 1.298 thorpej .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 2356 1.257 maya .f_attach = NULL, 2357 1.257 maya .f_detach = filt_sordetach, 2358 1.257 maya .f_event = filt_solisten, 2359 1.257 maya }; 2360 1.257 maya 2361 1.257 maya static const struct filterops soread_filtops = { 2362 1.298 thorpej .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 2363 1.257 maya .f_attach = NULL, 2364 1.257 maya .f_detach = filt_sordetach, 2365 1.257 maya .f_event = filt_soread, 2366 1.257 maya }; 2367 1.257 maya 2368 1.257 maya static const struct filterops sowrite_filtops = { 2369 1.298 thorpej .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 2370 1.257 maya .f_attach = NULL, 2371 1.257 maya .f_detach = filt_sowdetach, 2372 1.257 maya .f_event = filt_sowrite, 2373 1.257 maya }; 2374 1.72 jdolecek 2375 1.300 thorpej static const struct filterops soempty_filtops = { 2376 1.300 thorpej .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 2377 1.300 thorpej .f_attach = NULL, 2378 1.300 thorpej .f_detach = filt_sowdetach, 2379 1.300 thorpej .f_event = filt_soempty, 2380 1.300 thorpej }; 2381 1.300 thorpej 2382 1.72 jdolecek int 2383 1.129 yamt soo_kqfilter(struct file *fp, struct knote *kn) 2384 1.72 jdolecek { 2385 1.270 maxv struct socket *so; 2386 1.270 maxv struct sockbuf *sb; 2387 1.72 jdolecek 2388 1.235 matt so = ((file_t *)kn->kn_obj)->f_socket; 2389 1.160 ad solock(so); 2390 1.72 jdolecek switch (kn->kn_filter) { 2391 1.72 jdolecek case EVFILT_READ: 2392 1.72 jdolecek if (so->so_options & SO_ACCEPTCONN) 2393 1.72 jdolecek kn->kn_fop = &solisten_filtops; 2394 1.72 jdolecek else 2395 1.72 jdolecek kn->kn_fop = &soread_filtops; 2396 1.72 jdolecek sb = &so->so_rcv; 2397 1.72 jdolecek break; 2398 1.72 jdolecek case EVFILT_WRITE: 2399 1.72 jdolecek kn->kn_fop = &sowrite_filtops; 2400 1.72 jdolecek sb = &so->so_snd; 2401 1.309 jdolecek 2402 1.309 jdolecek #ifdef PIPE_SOCKETPAIR 2403 1.309 jdolecek if (so->so_state & SS_ISAPIPE) { 2404 1.309 jdolecek /* Other end of pipe has been closed. */ 2405 1.309 jdolecek if (so->so_state & SS_ISDISCONNECTED) { 2406 1.309 jdolecek sounlock(so); 2407 1.313 riastrad return SET_ERROR(EBADF); 2408 1.309 jdolecek } 2409 1.309 jdolecek } 2410 1.309 jdolecek #endif 2411 1.72 jdolecek break; 2412 1.300 thorpej case EVFILT_EMPTY: 2413 1.300 thorpej kn->kn_fop = &soempty_filtops; 2414 1.300 thorpej sb = &so->so_snd; 2415 1.300 thorpej break; 2416 1.72 jdolecek default: 2417 1.160 ad sounlock(so); 2418 1.313 riastrad return SET_ERROR(EINVAL); 2419 1.72 jdolecek } 2420 1.294 thorpej selrecord_knote(&sb->sb_sel, kn); 2421 1.72 jdolecek sb->sb_flags |= SB_KNOTE; 2422 1.160 ad sounlock(so); 2423 1.270 maxv return 0; 2424 1.72 jdolecek } 2425 1.72 jdolecek 2426 1.154 ad static int 2427 1.154 ad sodopoll(struct socket *so, int events) 2428 1.154 ad { 2429 1.154 ad int revents; 2430 1.154 ad 2431 1.154 ad revents = 0; 2432 1.154 ad 2433 1.154 ad if (events & (POLLIN | POLLRDNORM)) 2434 1.154 ad if (soreadable(so)) 2435 1.154 ad revents |= events & (POLLIN | POLLRDNORM); 2436 1.154 ad 2437 1.154 ad if (events & (POLLOUT | POLLWRNORM)) 2438 1.154 ad if (sowritable(so)) 2439 1.154 ad revents |= events & (POLLOUT | POLLWRNORM); 2440 1.154 ad 2441 1.154 ad if (events & (POLLPRI | POLLRDBAND)) 2442 1.293 chs if (so->so_state & SS_POLLRDBAND) 2443 1.154 ad revents |= events & (POLLPRI | POLLRDBAND); 2444 1.154 ad 2445 1.154 ad return revents; 2446 1.154 ad } 2447 1.154 ad 2448 1.154 ad int 2449 1.154 ad sopoll(struct socket *so, int events) 2450 1.154 ad { 2451 1.154 ad int revents = 0; 2452 1.154 ad 2453 1.160 ad #ifndef DIAGNOSTIC 2454 1.160 ad /* 2455 1.160 ad * Do a quick, unlocked check in expectation that the socket 2456 1.160 ad * will be ready for I/O. Don't do this check if DIAGNOSTIC, 2457 1.160 ad * as the solocked() assertions will fail. 2458 1.160 ad */ 2459 1.154 ad if ((revents = sodopoll(so, events)) != 0) 2460 1.154 ad return revents; 2461 1.160 ad #endif 2462 1.154 ad 2463 1.160 ad solock(so); 2464 1.154 ad if ((revents = sodopoll(so, events)) == 0) { 2465 1.154 ad if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { 2466 1.154 ad selrecord(curlwp, &so->so_rcv.sb_sel); 2467 1.160 ad so->so_rcv.sb_flags |= SB_NOTIFY; 2468 1.154 ad } 2469 1.154 ad 2470 1.154 ad if (events & (POLLOUT | POLLWRNORM)) { 2471 1.154 ad selrecord(curlwp, &so->so_snd.sb_sel); 2472 1.160 ad so->so_snd.sb_flags |= SB_NOTIFY; 2473 1.154 ad } 2474 1.154 ad } 2475 1.160 ad sounlock(so); 2476 1.154 ad 2477 1.154 ad return revents; 2478 1.154 ad } 2479 1.154 ad 2480 1.256 christos struct mbuf ** 2481 1.262 maxv sbsavetimestamp(int opt, struct mbuf **mp) 2482 1.256 christos { 2483 1.256 christos struct timeval tv; 2484 1.275 pgoyette int error; 2485 1.275 pgoyette 2486 1.295 chs memset(&tv, 0, sizeof(tv)); 2487 1.256 christos microtime(&tv); 2488 1.256 christos 2489 1.284 pgoyette MODULE_HOOK_CALL(uipc_socket_50_sbts_hook, (opt, &mp), enosys(), error); 2490 1.275 pgoyette if (error == 0) 2491 1.275 pgoyette return mp; 2492 1.256 christos 2493 1.256 christos if (opt & SO_TIMESTAMP) { 2494 1.256 christos *mp = sbcreatecontrol(&tv, sizeof(tv), 2495 1.256 christos SCM_TIMESTAMP, SOL_SOCKET); 2496 1.256 christos if (*mp) 2497 1.256 christos mp = &(*mp)->m_next; 2498 1.256 christos } 2499 1.256 christos return mp; 2500 1.256 christos } 2501 1.256 christos 2502 1.154 ad 2503 1.94 yamt #include <sys/sysctl.h> 2504 1.94 yamt 2505 1.94 yamt static int sysctl_kern_somaxkva(SYSCTLFN_PROTO); 2506 1.212 pooka static int sysctl_kern_sbmax(SYSCTLFN_PROTO); 2507 1.94 yamt 2508 1.94 yamt /* 2509 1.94 yamt * sysctl helper routine for kern.somaxkva. ensures that the given 2510 1.94 yamt * value is not too small. 2511 1.94 yamt * (XXX should we maybe make sure it's not too large as well?) 2512 1.94 yamt */ 2513 1.94 yamt static int 2514 1.94 yamt sysctl_kern_somaxkva(SYSCTLFN_ARGS) 2515 1.94 yamt { 2516 1.94 yamt int error, new_somaxkva; 2517 1.94 yamt struct sysctlnode node; 2518 1.94 yamt 2519 1.94 yamt new_somaxkva = somaxkva; 2520 1.94 yamt node = *rnode; 2521 1.94 yamt node.sysctl_data = &new_somaxkva; 2522 1.94 yamt error = sysctl_lookup(SYSCTLFN_CALL(&node)); 2523 1.94 yamt if (error || newp == NULL) 2524 1.270 maxv return error; 2525 1.94 yamt 2526 1.94 yamt if (new_somaxkva < (16 * 1024 * 1024)) /* sanity */ 2527 1.313 riastrad return SET_ERROR(EINVAL); 2528 1.94 yamt 2529 1.136 ad mutex_enter(&so_pendfree_lock); 2530 1.94 yamt somaxkva = new_somaxkva; 2531 1.136 ad cv_broadcast(&socurkva_cv); 2532 1.136 ad mutex_exit(&so_pendfree_lock); 2533 1.94 yamt 2534 1.270 maxv return error; 2535 1.94 yamt } 2536 1.94 yamt 2537 1.212 pooka /* 2538 1.212 pooka * sysctl helper routine for kern.sbmax. Basically just ensures that 2539 1.212 pooka * any new value is not too small. 2540 1.212 pooka */ 2541 1.212 pooka static int 2542 1.212 pooka sysctl_kern_sbmax(SYSCTLFN_ARGS) 2543 1.212 pooka { 2544 1.212 pooka int error, new_sbmax; 2545 1.212 pooka struct sysctlnode node; 2546 1.212 pooka 2547 1.212 pooka new_sbmax = sb_max; 2548 1.212 pooka node = *rnode; 2549 1.212 pooka node.sysctl_data = &new_sbmax; 2550 1.212 pooka error = sysctl_lookup(SYSCTLFN_CALL(&node)); 2551 1.212 pooka if (error || newp == NULL) 2552 1.270 maxv return error; 2553 1.212 pooka 2554 1.212 pooka KERNEL_LOCK(1, NULL); 2555 1.212 pooka error = sb_max_set(new_sbmax); 2556 1.212 pooka KERNEL_UNLOCK_ONE(NULL); 2557 1.212 pooka 2558 1.270 maxv return error; 2559 1.212 pooka } 2560 1.212 pooka 2561 1.266 christos /* 2562 1.266 christos * sysctl helper routine for kern.sooptions. Ensures that only allowed 2563 1.266 christos * options can be set. 2564 1.266 christos */ 2565 1.266 christos static int 2566 1.266 christos sysctl_kern_sooptions(SYSCTLFN_ARGS) 2567 1.266 christos { 2568 1.266 christos int error, new_options; 2569 1.266 christos struct sysctlnode node; 2570 1.266 christos 2571 1.266 christos new_options = sooptions; 2572 1.266 christos node = *rnode; 2573 1.266 christos node.sysctl_data = &new_options; 2574 1.266 christos error = sysctl_lookup(SYSCTLFN_CALL(&node)); 2575 1.266 christos if (error || newp == NULL) 2576 1.266 christos return error; 2577 1.266 christos 2578 1.266 christos if (new_options & ~SO_DEFOPTS) 2579 1.313 riastrad return SET_ERROR(EINVAL); 2580 1.266 christos 2581 1.266 christos sooptions = new_options; 2582 1.266 christos 2583 1.266 christos return 0; 2584 1.266 christos } 2585 1.266 christos 2586 1.178 pooka static void 2587 1.212 pooka sysctl_kern_socket_setup(void) 2588 1.94 yamt { 2589 1.94 yamt 2590 1.178 pooka KASSERT(socket_sysctllog == NULL); 2591 1.97 atatat 2592 1.178 pooka sysctl_createv(&socket_sysctllog, 0, NULL, NULL, 2593 1.97 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2594 1.103 atatat CTLTYPE_INT, "somaxkva", 2595 1.103 atatat SYSCTL_DESCR("Maximum amount of kernel memory to be " 2596 1.270 maxv "used for socket buffers"), 2597 1.94 yamt sysctl_kern_somaxkva, 0, NULL, 0, 2598 1.94 yamt CTL_KERN, KERN_SOMAXKVA, CTL_EOL); 2599 1.212 pooka 2600 1.212 pooka sysctl_createv(&socket_sysctllog, 0, NULL, NULL, 2601 1.212 pooka CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2602 1.292 mlelstv CTLTYPE_BOOL, "sofixedbuf", 2603 1.292 mlelstv SYSCTL_DESCR("Prevent scaling of fixed socket buffers"), 2604 1.292 mlelstv NULL, 0, &sofixedbuf, 0, 2605 1.292 mlelstv CTL_KERN, KERN_SOFIXEDBUF, CTL_EOL); 2606 1.292 mlelstv 2607 1.292 mlelstv sysctl_createv(&socket_sysctllog, 0, NULL, NULL, 2608 1.292 mlelstv CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2609 1.212 pooka CTLTYPE_INT, "sbmax", 2610 1.212 pooka SYSCTL_DESCR("Maximum socket buffer size"), 2611 1.212 pooka sysctl_kern_sbmax, 0, NULL, 0, 2612 1.212 pooka CTL_KERN, KERN_SBMAX, CTL_EOL); 2613 1.266 christos 2614 1.266 christos sysctl_createv(&socket_sysctllog, 0, NULL, NULL, 2615 1.266 christos CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2616 1.266 christos CTLTYPE_INT, "sooptions", 2617 1.266 christos SYSCTL_DESCR("Default socket options"), 2618 1.266 christos sysctl_kern_sooptions, 0, NULL, 0, 2619 1.266 christos CTL_KERN, CTL_CREATE, CTL_EOL); 2620 1.94 yamt } 2621