1 1.238 ozaki /* $NetBSD: tcp_usrreq.c,v 1.238 2022/11/04 09:01:53 ozaki-r Exp $ */ 2 1.40 itojun 3 1.40 itojun /* 4 1.40 itojun * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 1.40 itojun * All rights reserved. 6 1.71 itojun * 7 1.40 itojun * Redistribution and use in source and binary forms, with or without 8 1.40 itojun * modification, are permitted provided that the following conditions 9 1.40 itojun * are met: 10 1.40 itojun * 1. Redistributions of source code must retain the above copyright 11 1.40 itojun * notice, this list of conditions and the following disclaimer. 12 1.40 itojun * 2. Redistributions in binary form must reproduce the above copyright 13 1.40 itojun * notice, this list of conditions and the following disclaimer in the 14 1.40 itojun * documentation and/or other materials provided with the distribution. 15 1.40 itojun * 3. Neither the name of the project nor the names of its contributors 16 1.40 itojun * may be used to endorse or promote products derived from this software 17 1.40 itojun * without specific prior written permission. 18 1.71 itojun * 19 1.40 itojun * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 1.40 itojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 1.40 itojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 1.40 itojun * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 1.40 itojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 1.40 itojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 1.40 itojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 1.40 itojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 1.40 itojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 1.40 itojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 1.40 itojun * SUCH DAMAGE. 30 1.40 itojun */ 31 1.34 thorpej 32 1.34 thorpej /*- 33 1.120 rpaulo * Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc. 34 1.34 thorpej * All rights reserved. 35 1.34 thorpej * 36 1.34 thorpej * This code is derived from software contributed to The NetBSD Foundation 37 1.34 thorpej * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 1.34 thorpej * Facility, NASA Ames Research Center. 39 1.95 mycroft * This code is derived from software contributed to The NetBSD Foundation 40 1.95 mycroft * by Charles M. Hannum. 41 1.120 rpaulo * This code is derived from software contributed to The NetBSD Foundation 42 1.120 rpaulo * by Rui Paulo. 43 1.34 thorpej * 44 1.34 thorpej * Redistribution and use in source and binary forms, with or without 45 1.34 thorpej * modification, are permitted provided that the following conditions 46 1.34 thorpej * are met: 47 1.34 thorpej * 1. Redistributions of source code must retain the above copyright 48 1.34 thorpej * notice, this list of conditions and the following disclaimer. 49 1.34 thorpej * 2. Redistributions in binary form must reproduce the above copyright 50 1.34 thorpej * notice, this list of conditions and the following disclaimer in the 51 1.34 thorpej * documentation and/or other materials provided with the distribution. 52 1.34 thorpej * 53 1.34 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 54 1.34 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 1.34 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 56 1.34 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 57 1.34 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 58 1.34 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 59 1.34 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 60 1.34 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 61 1.34 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 62 1.34 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 63 1.34 thorpej * POSSIBILITY OF SUCH DAMAGE. 64 1.34 thorpej */ 65 1.10 cgd 66 1.1 cgd /* 67 1.33 thorpej * Copyright (c) 1982, 1986, 1988, 1993, 1995 68 1.9 mycroft * The Regents of the University of California. All rights reserved. 69 1.1 cgd * 70 1.1 cgd * Redistribution and use in source and binary forms, with or without 71 1.1 cgd * modification, are permitted provided that the following conditions 72 1.1 cgd * are met: 73 1.1 cgd * 1. Redistributions of source code must retain the above copyright 74 1.1 cgd * notice, this list of conditions and the following disclaimer. 75 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 76 1.1 cgd * notice, this list of conditions and the following disclaimer in the 77 1.1 cgd * documentation and/or other materials provided with the distribution. 78 1.82 agc * 3. Neither the name of the University nor the names of its contributors 79 1.1 cgd * may be used to endorse or promote products derived from this software 80 1.1 cgd * without specific prior written permission. 81 1.1 cgd * 82 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 83 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 84 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 85 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 86 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 87 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 88 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 89 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 90 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 91 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 92 1.1 cgd * SUCH DAMAGE. 93 1.1 cgd * 94 1.33 thorpej * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 95 1.1 cgd */ 96 1.67 lukem 97 1.173 rmind /* 98 1.173 rmind * TCP protocol interface to socket abstraction. 99 1.173 rmind */ 100 1.173 rmind 101 1.67 lukem #include <sys/cdefs.h> 102 1.238 ozaki __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.238 2022/11/04 09:01:53 ozaki-r Exp $"); 103 1.1 cgd 104 1.209 pooka #ifdef _KERNEL_OPT 105 1.40 itojun #include "opt_inet.h" 106 1.63 abs #include "opt_tcp_debug.h" 107 1.77 martin #include "opt_mbuftrace.h" 108 1.209 pooka #include "opt_tcp_space.h" 109 1.213 knakahar #include "opt_net_mpsafe.h" 110 1.209 pooka #endif 111 1.162 tls 112 1.5 mycroft #include <sys/param.h> 113 1.5 mycroft #include <sys/systm.h> 114 1.13 glass #include <sys/kernel.h> 115 1.5 mycroft #include <sys/mbuf.h> 116 1.5 mycroft #include <sys/socket.h> 117 1.5 mycroft #include <sys/socketvar.h> 118 1.5 mycroft #include <sys/protosw.h> 119 1.5 mycroft #include <sys/errno.h> 120 1.5 mycroft #include <sys/stat.h> 121 1.20 christos #include <sys/proc.h> 122 1.40 itojun #include <sys/domain.h> 123 1.20 christos #include <sys/sysctl.h> 124 1.117 elad #include <sys/kauth.h> 125 1.203 he #include <sys/kernel.h> 126 1.149 pooka #include <sys/uidinfo.h> 127 1.1 cgd 128 1.5 mycroft #include <net/if.h> 129 1.1 cgd 130 1.5 mycroft #include <netinet/in.h> 131 1.5 mycroft #include <netinet/in_systm.h> 132 1.14 cgd #include <netinet/in_var.h> 133 1.5 mycroft #include <netinet/ip.h> 134 1.5 mycroft #include <netinet/in_pcb.h> 135 1.5 mycroft #include <netinet/ip_var.h> 136 1.108 yamt #include <netinet/in_offload.h> 137 1.40 itojun 138 1.40 itojun #ifdef INET6 139 1.40 itojun #include <netinet/ip6.h> 140 1.40 itojun #include <netinet6/in6_pcb.h> 141 1.40 itojun #include <netinet6/ip6_var.h> 142 1.135 christos #include <netinet6/scope6_var.h> 143 1.40 itojun #endif 144 1.40 itojun 145 1.5 mycroft #include <netinet/tcp.h> 146 1.5 mycroft #include <netinet/tcp_fsm.h> 147 1.5 mycroft #include <netinet/tcp_seq.h> 148 1.5 mycroft #include <netinet/tcp_timer.h> 149 1.5 mycroft #include <netinet/tcp_var.h> 150 1.142 thorpej #include <netinet/tcp_private.h> 151 1.124 rpaulo #include <netinet/tcp_congctl.h> 152 1.5 mycroft #include <netinet/tcp_debug.h> 153 1.159 dyoung #include <netinet/tcp_vtw.h> 154 1.232 ozaki #include <netinet/tcp_syncache.h> 155 1.26 thorpej 156 1.221 maxv static int 157 1.221 maxv tcp_debug_capture(struct tcpcb *tp, int req) 158 1.185 rtr { 159 1.185 rtr #ifdef TCP_DEBUG 160 1.185 rtr return tp->t_state; 161 1.185 rtr #endif 162 1.185 rtr return 0; 163 1.185 rtr } 164 1.185 rtr 165 1.185 rtr static inline void 166 1.185 rtr tcp_debug_trace(struct socket *so, struct tcpcb *tp, int ostate, int req) 167 1.221 maxv { 168 1.185 rtr #ifdef TCP_DEBUG 169 1.185 rtr if (tp && (so->so_options & SO_DEBUG)) 170 1.185 rtr tcp_trace(TA_USER, ostate, tp, NULL, req); 171 1.185 rtr #endif 172 1.185 rtr } 173 1.185 rtr 174 1.132 christos static void 175 1.132 christos change_keepalive(struct socket *so, struct tcpcb *tp) 176 1.132 christos { 177 1.225 riastrad tp->t_maxidle = tp->t_keepcnt * MIN(tp->t_keepintvl, 178 1.225 riastrad TCP_TIMER_MAXTICKS / tp->t_keepcnt); 179 1.132 christos TCP_TIMER_DISARM(tp, TCPT_KEEP); 180 1.132 christos TCP_TIMER_DISARM(tp, TCPT_2MSL); 181 1.132 christos 182 1.132 christos if (tp->t_state == TCPS_SYN_RECEIVED || 183 1.132 christos tp->t_state == TCPS_SYN_SENT) { 184 1.132 christos TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 185 1.221 maxv } else if (so->so_options & SO_KEEPALIVE && 186 1.132 christos tp->t_state <= TCPS_CLOSE_WAIT) { 187 1.132 christos TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl); 188 1.132 christos } else { 189 1.132 christos TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle); 190 1.132 christos } 191 1.132 christos 192 1.132 christos if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 193 1.132 christos TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 194 1.132 christos } 195 1.132 christos 196 1.203 he /* 197 1.203 he * Export TCP internal state information via a struct tcp_info, based on the 198 1.203 he * Linux 2.6 API. Not ABI compatible as our constants are mapped differently 199 1.203 he * (TCP state machine, etc). We export all information using FreeBSD-native 200 1.203 he * constants -- for example, the numeric values for tcpi_state will differ 201 1.203 he * from Linux. 202 1.203 he */ 203 1.203 he static void 204 1.203 he tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) 205 1.203 he { 206 1.203 he 207 1.203 he bzero(ti, sizeof(*ti)); 208 1.203 he 209 1.203 he ti->tcpi_state = tp->t_state; 210 1.203 he if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) 211 1.203 he ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; 212 1.203 he if (tp->t_flags & TF_SACK_PERMIT) 213 1.203 he ti->tcpi_options |= TCPI_OPT_SACK; 214 1.203 he if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { 215 1.203 he ti->tcpi_options |= TCPI_OPT_WSCALE; 216 1.203 he ti->tcpi_snd_wscale = tp->snd_scale; 217 1.203 he ti->tcpi_rcv_wscale = tp->rcv_scale; 218 1.203 he } 219 1.203 he if (tp->t_flags & TF_ECN_PERMIT) { 220 1.203 he ti->tcpi_options |= TCPI_OPT_ECN; 221 1.203 he } 222 1.203 he 223 1.203 he ti->tcpi_rto = tp->t_rxtcur * tick; 224 1.226 maxv ti->tcpi_last_data_recv = (long)(getticks() - 225 1.203 he (int)tp->t_rcvtime) * tick; 226 1.227 mlelstv ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick / PR_SLOWHZ) 227 1.227 mlelstv >> (TCP_RTT_SHIFT + 2); 228 1.227 mlelstv ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick / PR_SLOWHZ) 229 1.227 mlelstv >> (TCP_RTTVAR_SHIFT + 2); 230 1.203 he 231 1.203 he ti->tcpi_snd_ssthresh = tp->snd_ssthresh; 232 1.203 he /* Linux API wants these in # of segments, apparently */ 233 1.203 he ti->tcpi_snd_cwnd = tp->snd_cwnd / tp->t_segsz; 234 1.203 he ti->tcpi_snd_wnd = tp->snd_wnd / tp->t_segsz; 235 1.203 he 236 1.203 he /* 237 1.203 he * FreeBSD-specific extension fields for tcp_info. 238 1.203 he */ 239 1.203 he ti->tcpi_rcv_space = tp->rcv_wnd; 240 1.203 he ti->tcpi_rcv_nxt = tp->rcv_nxt; 241 1.203 he ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */ 242 1.203 he ti->tcpi_snd_nxt = tp->snd_nxt; 243 1.203 he ti->tcpi_snd_mss = tp->t_segsz; 244 1.203 he ti->tcpi_rcv_mss = tp->t_segsz; 245 1.203 he #ifdef TF_TOE 246 1.203 he if (tp->t_flags & TF_TOE) 247 1.203 he ti->tcpi_options |= TCPI_OPT_TOE; 248 1.203 he #endif 249 1.203 he /* From the redundant department of redundancies... */ 250 1.203 he ti->__tcpi_retransmits = ti->__tcpi_retrans = 251 1.203 he ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack; 252 1.203 he 253 1.203 he ti->tcpi_rcv_ooopack = tp->t_rcvoopack; 254 1.203 he ti->tcpi_snd_zerowin = tp->t_sndzerowin; 255 1.203 he } 256 1.203 he 257 1.6 mycroft int 258 1.147 plunky tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt) 259 1.1 cgd { 260 1.9 mycroft int error = 0, s; 261 1.9 mycroft struct inpcb *inp; 262 1.49 augustss struct tcpcb *tp; 263 1.203 he struct tcp_info ti; 264 1.132 christos u_int ui; 265 1.40 itojun int family; /* family of the socket */ 266 1.147 plunky int level, optname, optval; 267 1.147 plunky 268 1.147 plunky level = sopt->sopt_level; 269 1.147 plunky optname = sopt->sopt_name; 270 1.40 itojun 271 1.40 itojun family = so->so_proto->pr_domain->dom_family; 272 1.1 cgd 273 1.16 mycroft s = splsoftnet(); 274 1.233 ozaki inp = sotoinpcb(so); 275 1.236 ozaki if (inp == NULL) { 276 1.236 ozaki splx(s); 277 1.236 ozaki return ECONNRESET; 278 1.236 ozaki } 279 1.9 mycroft if (level != IPPROTO_TCP) { 280 1.40 itojun switch (family) { 281 1.40 itojun case PF_INET: 282 1.147 plunky error = ip_ctloutput(op, so, sopt); 283 1.40 itojun break; 284 1.40 itojun #ifdef INET6 285 1.40 itojun case PF_INET6: 286 1.147 plunky error = ip6_ctloutput(op, so, sopt); 287 1.40 itojun break; 288 1.40 itojun #endif 289 1.40 itojun } 290 1.9 mycroft splx(s); 291 1.221 maxv return error; 292 1.9 mycroft } 293 1.233 ozaki tp = intotcpcb(inp); 294 1.1 cgd 295 1.1 cgd switch (op) { 296 1.1 cgd case PRCO_SETOPT: 297 1.1 cgd switch (optname) { 298 1.90 jonathan #ifdef TCP_SIGNATURE 299 1.90 jonathan case TCP_MD5SIG: 300 1.147 plunky error = sockopt_getint(sopt, &optval); 301 1.90 jonathan if (error) 302 1.90 jonathan break; 303 1.147 plunky if (optval > 0) 304 1.90 jonathan tp->t_flags |= TF_SIGNATURE; 305 1.91 itojun else 306 1.90 jonathan tp->t_flags &= ~TF_SIGNATURE; 307 1.90 jonathan break; 308 1.90 jonathan #endif /* TCP_SIGNATURE */ 309 1.90 jonathan 310 1.1 cgd case TCP_NODELAY: 311 1.147 plunky error = sockopt_getint(sopt, &optval); 312 1.147 plunky if (error) 313 1.147 plunky break; 314 1.147 plunky if (optval) 315 1.1 cgd tp->t_flags |= TF_NODELAY; 316 1.1 cgd else 317 1.1 cgd tp->t_flags &= ~TF_NODELAY; 318 1.1 cgd break; 319 1.1 cgd 320 1.9 mycroft case TCP_MAXSEG: 321 1.147 plunky error = sockopt_getint(sopt, &optval); 322 1.147 plunky if (error) 323 1.147 plunky break; 324 1.147 plunky if (optval > 0 && optval <= tp->t_peermss) 325 1.147 plunky tp->t_peermss = optval; /* limit on send size */ 326 1.9 mycroft else 327 1.9 mycroft error = EINVAL; 328 1.9 mycroft break; 329 1.124 rpaulo #ifdef notyet 330 1.124 rpaulo case TCP_CONGCTL: 331 1.147 plunky /* XXX string overflow XXX */ 332 1.147 plunky error = tcp_congctl_select(tp, sopt->sopt_data); 333 1.147 plunky break; 334 1.124 rpaulo #endif 335 1.9 mycroft 336 1.132 christos case TCP_KEEPIDLE: 337 1.147 plunky error = sockopt_get(sopt, &ui, sizeof(ui)); 338 1.147 plunky if (error) 339 1.147 plunky break; 340 1.225 riastrad if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) { 341 1.132 christos tp->t_keepidle = ui; 342 1.132 christos change_keepalive(so, tp); 343 1.132 christos } else 344 1.132 christos error = EINVAL; 345 1.132 christos break; 346 1.132 christos 347 1.132 christos case TCP_KEEPINTVL: 348 1.147 plunky error = sockopt_get(sopt, &ui, sizeof(ui)); 349 1.147 plunky if (error) 350 1.147 plunky break; 351 1.225 riastrad if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) { 352 1.132 christos tp->t_keepintvl = ui; 353 1.132 christos change_keepalive(so, tp); 354 1.132 christos } else 355 1.132 christos error = EINVAL; 356 1.132 christos break; 357 1.132 christos 358 1.132 christos case TCP_KEEPCNT: 359 1.147 plunky error = sockopt_get(sopt, &ui, sizeof(ui)); 360 1.147 plunky if (error) 361 1.147 plunky break; 362 1.225 riastrad if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) { 363 1.132 christos tp->t_keepcnt = ui; 364 1.132 christos change_keepalive(so, tp); 365 1.132 christos } else 366 1.132 christos error = EINVAL; 367 1.132 christos break; 368 1.132 christos 369 1.132 christos case TCP_KEEPINIT: 370 1.147 plunky error = sockopt_get(sopt, &ui, sizeof(ui)); 371 1.147 plunky if (error) 372 1.147 plunky break; 373 1.225 riastrad if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) { 374 1.132 christos tp->t_keepinit = ui; 375 1.132 christos change_keepalive(so, tp); 376 1.132 christos } else 377 1.132 christos error = EINVAL; 378 1.132 christos break; 379 1.132 christos 380 1.1 cgd default: 381 1.9 mycroft error = ENOPROTOOPT; 382 1.1 cgd break; 383 1.1 cgd } 384 1.1 cgd break; 385 1.1 cgd 386 1.1 cgd case PRCO_GETOPT: 387 1.1 cgd switch (optname) { 388 1.90 jonathan #ifdef TCP_SIGNATURE 389 1.90 jonathan case TCP_MD5SIG: 390 1.147 plunky optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 391 1.216 christos goto setval; 392 1.90 jonathan #endif 393 1.1 cgd case TCP_NODELAY: 394 1.147 plunky optval = tp->t_flags & TF_NODELAY; 395 1.216 christos goto setval; 396 1.1 cgd case TCP_MAXSEG: 397 1.147 plunky optval = tp->t_peermss; 398 1.216 christos goto setval; 399 1.203 he case TCP_INFO: 400 1.203 he tcp_fill_info(tp, &ti); 401 1.203 he error = sockopt_set(sopt, &ti, sizeof ti); 402 1.203 he break; 403 1.124 rpaulo #ifdef notyet 404 1.124 rpaulo case TCP_CONGCTL: 405 1.124 rpaulo break; 406 1.124 rpaulo #endif 407 1.216 christos case TCP_KEEPIDLE: 408 1.216 christos optval = tp->t_keepidle; 409 1.216 christos goto setval; 410 1.216 christos case TCP_KEEPINTVL: 411 1.216 christos optval = tp->t_keepintvl; 412 1.216 christos goto setval; 413 1.216 christos case TCP_KEEPCNT: 414 1.216 christos optval = tp->t_keepcnt; 415 1.216 christos goto setval; 416 1.216 christos case TCP_KEEPINIT: 417 1.230 christos optval = tp->t_keepinit; 418 1.216 christos setval: error = sockopt_set(sopt, &optval, sizeof(optval)); 419 1.216 christos break; 420 1.1 cgd default: 421 1.9 mycroft error = ENOPROTOOPT; 422 1.1 cgd break; 423 1.1 cgd } 424 1.1 cgd break; 425 1.1 cgd } 426 1.9 mycroft splx(s); 427 1.221 maxv return error; 428 1.1 cgd } 429 1.1 cgd 430 1.11 mycroft #ifndef TCP_SENDSPACE 431 1.84 tls #define TCP_SENDSPACE 1024*32 432 1.11 mycroft #endif 433 1.25 thorpej int tcp_sendspace = TCP_SENDSPACE; 434 1.11 mycroft #ifndef TCP_RECVSPACE 435 1.84 tls #define TCP_RECVSPACE 1024*32 436 1.11 mycroft #endif 437 1.25 thorpej int tcp_recvspace = TCP_RECVSPACE; 438 1.1 cgd 439 1.1 cgd /* 440 1.173 rmind * tcp_attach: attach TCP protocol to socket, allocating internet protocol 441 1.173 rmind * control block, TCP control block, buffer space and entering LISTEN state 442 1.173 rmind * if to accept connections. 443 1.1 cgd */ 444 1.173 rmind static int 445 1.173 rmind tcp_attach(struct socket *so, int proto) 446 1.1 cgd { 447 1.49 augustss struct tcpcb *tp; 448 1.1 cgd struct inpcb *inp; 449 1.173 rmind int s, error, family; 450 1.173 rmind 451 1.173 rmind /* Assign the lock (must happen even if we will error out). */ 452 1.173 rmind s = splsoftnet(); 453 1.173 rmind sosetlock(so); 454 1.173 rmind KASSERT(solocked(so)); 455 1.233 ozaki KASSERT(sotoinpcb(so) == NULL); 456 1.40 itojun 457 1.233 ozaki inp = sotoinpcb(so); 458 1.233 ozaki KASSERT(inp == NULL); 459 1.173 rmind 460 1.233 ozaki family = soaf(so); 461 1.1 cgd 462 1.75 matt #ifdef MBUFTRACE 463 1.130 yamt so->so_mowner = &tcp_sock_mowner; 464 1.130 yamt so->so_rcv.sb_mowner = &tcp_sock_rx_mowner; 465 1.130 yamt so->so_snd.sb_mowner = &tcp_sock_tx_mowner; 466 1.75 matt #endif 467 1.1 cgd if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 468 1.1 cgd error = soreserve(so, tcp_sendspace, tcp_recvspace); 469 1.1 cgd if (error) 470 1.173 rmind goto out; 471 1.1 cgd } 472 1.136 rmind 473 1.136 rmind so->so_rcv.sb_flags |= SB_AUTOSIZE; 474 1.136 rmind so->so_snd.sb_flags |= SB_AUTOSIZE; 475 1.136 rmind 476 1.237 ozaki error = inpcb_create(so, &tcbtable); 477 1.233 ozaki if (error) 478 1.173 rmind goto out; 479 1.233 ozaki inp = sotoinpcb(so); 480 1.40 itojun 481 1.233 ozaki tp = tcp_newtcpcb(family, inp); 482 1.173 rmind if (tp == NULL) { 483 1.1 cgd int nofd = so->so_state & SS_NOFDREF; /* XXX */ 484 1.1 cgd 485 1.1 cgd so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 486 1.237 ozaki inpcb_destroy(inp); 487 1.1 cgd so->so_state |= nofd; 488 1.173 rmind error = ENOBUFS; 489 1.173 rmind goto out; 490 1.1 cgd } 491 1.1 cgd tp->t_state = TCPS_CLOSED; 492 1.173 rmind if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 493 1.173 rmind so->so_linger = TCP_LINGERTIME; 494 1.173 rmind } 495 1.173 rmind out: 496 1.173 rmind KASSERT(solocked(so)); 497 1.173 rmind splx(s); 498 1.173 rmind return error; 499 1.173 rmind } 500 1.173 rmind 501 1.173 rmind static void 502 1.173 rmind tcp_detach(struct socket *so) 503 1.173 rmind { 504 1.233 ozaki struct inpcb *inp; 505 1.233 ozaki struct tcpcb *tp; 506 1.192 rtr int s; 507 1.173 rmind 508 1.233 ozaki inp = sotoinpcb(so); 509 1.235 ozaki if (inp == NULL) 510 1.235 ozaki return; 511 1.233 ozaki tp = intotcpcb(inp); 512 1.192 rtr 513 1.173 rmind s = splsoftnet(); 514 1.194 rtr (void)tcp_disconnect1(tp); 515 1.173 rmind splx(s); 516 1.1 cgd } 517 1.1 cgd 518 1.178 rtr static int 519 1.206 rtr tcp_accept(struct socket *so, struct sockaddr *nam) 520 1.186 rtr { 521 1.233 ozaki struct inpcb *inp; 522 1.233 ozaki struct tcpcb *tp; 523 1.186 rtr int ostate = 0; 524 1.195 rtr int s; 525 1.186 rtr 526 1.233 ozaki inp = sotoinpcb(so); 527 1.235 ozaki if (inp == NULL) 528 1.235 ozaki return EINVAL; 529 1.233 ozaki tp = intotcpcb(inp); 530 1.186 rtr 531 1.192 rtr ostate = tcp_debug_capture(tp, PRU_ACCEPT); 532 1.187 rmind 533 1.187 rmind /* 534 1.187 rmind * Accept a connection. Essentially all the work is 535 1.187 rmind * done at higher levels; just return the address 536 1.187 rmind * of the peer, storing through addr. 537 1.187 rmind */ 538 1.195 rtr s = splsoftnet(); 539 1.233 ozaki if (inp->inp_af == AF_INET) { 540 1.237 ozaki inpcb_fetch_peeraddr(inp, (struct sockaddr_in *)nam); 541 1.186 rtr } 542 1.186 rtr #ifdef INET6 543 1.233 ozaki else if (inp->inp_af == AF_INET6) { 544 1.238 ozaki in6pcb_fetch_peeraddr(inp, (struct sockaddr_in6 *)nam); 545 1.186 rtr } 546 1.186 rtr #endif 547 1.186 rtr tcp_debug_trace(so, tp, ostate, PRU_ACCEPT); 548 1.195 rtr splx(s); 549 1.192 rtr 550 1.186 rtr return 0; 551 1.186 rtr } 552 1.186 rtr 553 1.186 rtr static int 554 1.205 rtr tcp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l) 555 1.190 rtr { 556 1.192 rtr struct inpcb *inp = NULL; 557 1.205 rtr struct sockaddr_in *sin = (struct sockaddr_in *)nam; 558 1.205 rtr #ifdef INET6 559 1.205 rtr struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 560 1.205 rtr #endif /* INET6 */ 561 1.233 ozaki struct tcpcb *tp; 562 1.190 rtr int s; 563 1.190 rtr int error = 0; 564 1.190 rtr int ostate = 0; 565 1.190 rtr 566 1.233 ozaki inp = sotoinpcb(so); 567 1.235 ozaki if (inp == NULL) 568 1.235 ozaki return EINVAL; 569 1.233 ozaki tp = intotcpcb(inp); 570 1.190 rtr 571 1.192 rtr ostate = tcp_debug_capture(tp, PRU_BIND); 572 1.190 rtr 573 1.190 rtr /* 574 1.190 rtr * Give the socket an address. 575 1.190 rtr */ 576 1.192 rtr s = splsoftnet(); 577 1.192 rtr switch (so->so_proto->pr_domain->dom_family) { 578 1.190 rtr case PF_INET: 579 1.237 ozaki error = inpcb_bind(inp, sin, l); 580 1.190 rtr break; 581 1.190 rtr #ifdef INET6 582 1.190 rtr case PF_INET6: 583 1.238 ozaki error = in6pcb_bind(inp, sin6, l); 584 1.190 rtr if (!error) { 585 1.190 rtr /* mapped addr case */ 586 1.234 ozaki if (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp))) 587 1.190 rtr tp->t_family = AF_INET; 588 1.190 rtr else 589 1.190 rtr tp->t_family = AF_INET6; 590 1.190 rtr } 591 1.190 rtr break; 592 1.190 rtr #endif 593 1.190 rtr } 594 1.190 rtr tcp_debug_trace(so, tp, ostate, PRU_BIND); 595 1.192 rtr splx(s); 596 1.190 rtr 597 1.192 rtr return error; 598 1.190 rtr } 599 1.190 rtr 600 1.190 rtr static int 601 1.196 rtr tcp_listen(struct socket *so, struct lwp *l) 602 1.190 rtr { 603 1.233 ozaki struct inpcb *inp; 604 1.233 ozaki struct tcpcb *tp; 605 1.190 rtr int error = 0; 606 1.190 rtr int ostate = 0; 607 1.195 rtr int s; 608 1.190 rtr 609 1.233 ozaki inp = sotoinpcb(so); 610 1.235 ozaki if (inp == NULL) 611 1.235 ozaki return EINVAL; 612 1.233 ozaki tp = intotcpcb(inp); 613 1.190 rtr 614 1.192 rtr ostate = tcp_debug_capture(tp, PRU_LISTEN); 615 1.190 rtr 616 1.190 rtr /* 617 1.190 rtr * Prepare to accept connections. 618 1.190 rtr */ 619 1.192 rtr s = splsoftnet(); 620 1.233 ozaki if (inp->inp_af == AF_INET && inp->inp_lport == 0) { 621 1.237 ozaki error = inpcb_bind(inp, NULL, l); 622 1.190 rtr if (error) 623 1.190 rtr goto release; 624 1.190 rtr } 625 1.190 rtr #ifdef INET6 626 1.233 ozaki if (inp->inp_af == AF_INET6 && inp->inp_lport == 0) { 627 1.238 ozaki error = in6pcb_bind(inp, NULL, l); 628 1.190 rtr if (error) 629 1.190 rtr goto release; 630 1.190 rtr } 631 1.190 rtr #endif 632 1.190 rtr tp->t_state = TCPS_LISTEN; 633 1.190 rtr 634 1.192 rtr release: 635 1.190 rtr tcp_debug_trace(so, tp, ostate, PRU_LISTEN); 636 1.192 rtr splx(s); 637 1.190 rtr 638 1.192 rtr return error; 639 1.190 rtr } 640 1.190 rtr 641 1.190 rtr static int 642 1.208 rtr tcp_connect(struct socket *so, struct sockaddr *nam, struct lwp *l) 643 1.193 rtr { 644 1.233 ozaki struct inpcb *inp; 645 1.233 ozaki struct tcpcb *tp; 646 1.193 rtr int s; 647 1.193 rtr int error = 0; 648 1.193 rtr int ostate = 0; 649 1.193 rtr 650 1.233 ozaki inp = sotoinpcb(so); 651 1.235 ozaki if (inp == NULL) 652 1.235 ozaki return EINVAL; 653 1.233 ozaki tp = intotcpcb(inp); 654 1.193 rtr 655 1.193 rtr ostate = tcp_debug_capture(tp, PRU_CONNECT); 656 1.193 rtr 657 1.193 rtr /* 658 1.193 rtr * Initiate connection to peer. 659 1.193 rtr * Create a template for use in transmissions on this connection. 660 1.193 rtr * Enter SYN_SENT state, and mark socket as connecting. 661 1.193 rtr * Start keep-alive timer, and seed output sequence space. 662 1.193 rtr * Send initial segment on connection. 663 1.193 rtr */ 664 1.195 rtr s = splsoftnet(); 665 1.217 maxv 666 1.233 ozaki if (inp->inp_af == AF_INET) { 667 1.193 rtr if (inp->inp_lport == 0) { 668 1.237 ozaki error = inpcb_bind(inp, NULL, l); 669 1.193 rtr if (error) 670 1.193 rtr goto release; 671 1.193 rtr } 672 1.237 ozaki error = inpcb_connect(inp, (struct sockaddr_in *)nam, l); 673 1.193 rtr } 674 1.193 rtr #ifdef INET6 675 1.233 ozaki if (inp->inp_af == AF_INET6) { 676 1.233 ozaki if (inp->inp_lport == 0) { 677 1.238 ozaki error = in6pcb_bind(inp, NULL, l); 678 1.193 rtr if (error) 679 1.193 rtr goto release; 680 1.193 rtr } 681 1.238 ozaki error = in6pcb_connect(inp, (struct sockaddr_in6 *)nam, l); 682 1.193 rtr if (!error) { 683 1.193 rtr /* mapped addr case */ 684 1.234 ozaki if (IN6_IS_ADDR_V4MAPPED(&in6p_faddr(inp))) 685 1.193 rtr tp->t_family = AF_INET; 686 1.193 rtr else 687 1.193 rtr tp->t_family = AF_INET6; 688 1.193 rtr } 689 1.193 rtr } 690 1.193 rtr #endif 691 1.193 rtr if (error) 692 1.193 rtr goto release; 693 1.193 rtr tp->t_template = tcp_template(tp); 694 1.193 rtr if (tp->t_template == 0) { 695 1.233 ozaki if (inp->inp_af == AF_INET) 696 1.237 ozaki inpcb_disconnect(inp); 697 1.193 rtr #ifdef INET6 698 1.233 ozaki else if (inp->inp_af == AF_INET6) 699 1.238 ozaki in6pcb_disconnect(inp); 700 1.193 rtr #endif 701 1.193 rtr error = ENOBUFS; 702 1.193 rtr goto release; 703 1.193 rtr } 704 1.193 rtr /* 705 1.193 rtr * Compute window scaling to request. 706 1.193 rtr * XXX: This should be moved to tcp_output(). 707 1.193 rtr */ 708 1.193 rtr while (tp->request_r_scale < TCP_MAX_WINSHIFT && 709 1.193 rtr (TCP_MAXWIN << tp->request_r_scale) < sb_max) 710 1.193 rtr tp->request_r_scale++; 711 1.193 rtr soisconnecting(so); 712 1.193 rtr TCP_STATINC(TCP_STAT_CONNATTEMPT); 713 1.193 rtr tp->t_state = TCPS_SYN_SENT; 714 1.193 rtr TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 715 1.229 christos tp->iss = tcp_new_iss(tp); 716 1.193 rtr tcp_sendseqinit(tp); 717 1.193 rtr error = tcp_output(tp); 718 1.193 rtr 719 1.193 rtr release: 720 1.193 rtr tcp_debug_trace(so, tp, ostate, PRU_CONNECT); 721 1.193 rtr splx(s); 722 1.193 rtr 723 1.193 rtr return error; 724 1.193 rtr } 725 1.193 rtr 726 1.193 rtr static int 727 1.200 rtr tcp_connect2(struct socket *so, struct socket *so2) 728 1.200 rtr { 729 1.233 ozaki struct inpcb *inp; 730 1.233 ozaki struct tcpcb *tp; 731 1.200 rtr int ostate = 0; 732 1.200 rtr 733 1.200 rtr KASSERT(solocked(so)); 734 1.200 rtr 735 1.233 ozaki inp = sotoinpcb(so); 736 1.235 ozaki if (inp == NULL) 737 1.235 ozaki return EINVAL; 738 1.233 ozaki tp = intotcpcb(inp); 739 1.200 rtr 740 1.200 rtr ostate = tcp_debug_capture(tp, PRU_CONNECT2); 741 1.200 rtr 742 1.200 rtr tcp_debug_trace(so, tp, ostate, PRU_CONNECT2); 743 1.200 rtr 744 1.200 rtr return EOPNOTSUPP; 745 1.200 rtr } 746 1.200 rtr 747 1.200 rtr static int 748 1.194 rtr tcp_disconnect(struct socket *so) 749 1.194 rtr { 750 1.233 ozaki struct inpcb *inp; 751 1.233 ozaki struct tcpcb *tp; 752 1.194 rtr int error = 0; 753 1.194 rtr int ostate = 0; 754 1.195 rtr int s; 755 1.194 rtr 756 1.233 ozaki inp = sotoinpcb(so); 757 1.235 ozaki if (inp == NULL) 758 1.235 ozaki return EINVAL; 759 1.233 ozaki tp = intotcpcb(inp); 760 1.194 rtr 761 1.194 rtr ostate = tcp_debug_capture(tp, PRU_DISCONNECT); 762 1.194 rtr 763 1.194 rtr /* 764 1.194 rtr * Initiate disconnect from peer. 765 1.194 rtr * If connection never passed embryonic stage, just drop; 766 1.194 rtr * else if don't need to let data drain, then can just drop anyways, 767 1.194 rtr * else have to begin TCP shutdown process: mark socket disconnecting, 768 1.194 rtr * drain unread data, state switch to reflect user close, and 769 1.194 rtr * send segment (e.g. FIN) to peer. Socket will be really disconnected 770 1.194 rtr * when peer sends FIN and acks ours. 771 1.194 rtr * 772 1.194 rtr * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 773 1.194 rtr */ 774 1.194 rtr s = splsoftnet(); 775 1.194 rtr tp = tcp_disconnect1(tp); 776 1.194 rtr tcp_debug_trace(so, tp, ostate, PRU_DISCONNECT); 777 1.194 rtr splx(s); 778 1.194 rtr 779 1.194 rtr return error; 780 1.194 rtr } 781 1.194 rtr 782 1.194 rtr static int 783 1.194 rtr tcp_shutdown(struct socket *so) 784 1.194 rtr { 785 1.233 ozaki struct inpcb *inp; 786 1.233 ozaki struct tcpcb *tp; 787 1.194 rtr int error = 0; 788 1.194 rtr int ostate = 0; 789 1.195 rtr int s; 790 1.194 rtr 791 1.233 ozaki inp = sotoinpcb(so); 792 1.235 ozaki if (inp == NULL) 793 1.235 ozaki return EINVAL; 794 1.233 ozaki tp = intotcpcb(inp); 795 1.194 rtr 796 1.194 rtr ostate = tcp_debug_capture(tp, PRU_SHUTDOWN); 797 1.194 rtr /* 798 1.194 rtr * Mark the connection as being incapable of further output. 799 1.194 rtr */ 800 1.194 rtr s = splsoftnet(); 801 1.194 rtr socantsendmore(so); 802 1.194 rtr tp = tcp_usrclosed(tp); 803 1.194 rtr if (tp) 804 1.194 rtr error = tcp_output(tp); 805 1.194 rtr tcp_debug_trace(so, tp, ostate, PRU_SHUTDOWN); 806 1.194 rtr splx(s); 807 1.194 rtr 808 1.194 rtr return error; 809 1.194 rtr } 810 1.194 rtr 811 1.194 rtr static int 812 1.194 rtr tcp_abort(struct socket *so) 813 1.194 rtr { 814 1.233 ozaki struct inpcb *inp; 815 1.233 ozaki struct tcpcb *tp; 816 1.194 rtr int error = 0; 817 1.194 rtr int ostate = 0; 818 1.195 rtr int s; 819 1.194 rtr 820 1.233 ozaki inp = sotoinpcb(so); 821 1.235 ozaki if (inp == NULL) 822 1.235 ozaki return EINVAL; 823 1.233 ozaki tp = intotcpcb(inp); 824 1.194 rtr 825 1.194 rtr ostate = tcp_debug_capture(tp, PRU_ABORT); 826 1.194 rtr 827 1.194 rtr /* 828 1.194 rtr * Abort the TCP. 829 1.194 rtr */ 830 1.194 rtr s = splsoftnet(); 831 1.194 rtr tp = tcp_drop(tp, ECONNABORTED); 832 1.194 rtr tcp_debug_trace(so, tp, ostate, PRU_ABORT); 833 1.194 rtr splx(s); 834 1.194 rtr 835 1.194 rtr return error; 836 1.194 rtr } 837 1.194 rtr 838 1.194 rtr static int 839 1.180 rtr tcp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp) 840 1.178 rtr { 841 1.178 rtr switch (so->so_proto->pr_domain->dom_family) { 842 1.178 rtr case PF_INET: 843 1.180 rtr return in_control(so, cmd, nam, ifp); 844 1.178 rtr #ifdef INET6 845 1.178 rtr case PF_INET6: 846 1.180 rtr return in6_control(so, cmd, nam, ifp); 847 1.178 rtr #endif 848 1.178 rtr default: 849 1.178 rtr return EAFNOSUPPORT; 850 1.178 rtr } 851 1.178 rtr } 852 1.178 rtr 853 1.181 rtr static int 854 1.181 rtr tcp_stat(struct socket *so, struct stat *ub) 855 1.181 rtr { 856 1.184 rtr KASSERT(solocked(so)); 857 1.184 rtr 858 1.183 rtr /* stat: don't bother with a blocksize. */ 859 1.183 rtr return 0; 860 1.181 rtr } 861 1.181 rtr 862 1.185 rtr static int 863 1.206 rtr tcp_peeraddr(struct socket *so, struct sockaddr *nam) 864 1.185 rtr { 865 1.233 ozaki struct inpcb *inp; 866 1.233 ozaki struct tcpcb *tp; 867 1.185 rtr int ostate = 0; 868 1.195 rtr int s; 869 1.185 rtr 870 1.233 ozaki inp = sotoinpcb(so); 871 1.235 ozaki if (inp == NULL) 872 1.235 ozaki return EINVAL; 873 1.233 ozaki tp = intotcpcb(inp); 874 1.185 rtr 875 1.192 rtr ostate = tcp_debug_capture(tp, PRU_PEERADDR); 876 1.185 rtr 877 1.195 rtr s = splsoftnet(); 878 1.233 ozaki if (inp->inp_af == AF_INET) { 879 1.237 ozaki inpcb_fetch_peeraddr(inp, (struct sockaddr_in *)nam); 880 1.206 rtr } 881 1.185 rtr #ifdef INET6 882 1.233 ozaki else if (inp->inp_af == AF_INET6) { 883 1.238 ozaki in6pcb_fetch_peeraddr(inp, (struct sockaddr_in6 *)nam); 884 1.206 rtr } 885 1.185 rtr #endif 886 1.185 rtr tcp_debug_trace(so, tp, ostate, PRU_PEERADDR); 887 1.195 rtr splx(s); 888 1.185 rtr 889 1.185 rtr return 0; 890 1.185 rtr } 891 1.185 rtr 892 1.185 rtr static int 893 1.206 rtr tcp_sockaddr(struct socket *so, struct sockaddr *nam) 894 1.185 rtr { 895 1.233 ozaki struct inpcb *inp; 896 1.233 ozaki struct tcpcb *tp; 897 1.185 rtr int ostate = 0; 898 1.195 rtr int s; 899 1.185 rtr 900 1.233 ozaki inp = sotoinpcb(so); 901 1.235 ozaki if (inp == NULL) 902 1.235 ozaki return EINVAL; 903 1.233 ozaki tp = intotcpcb(inp); 904 1.185 rtr 905 1.192 rtr ostate = tcp_debug_capture(tp, PRU_SOCKADDR); 906 1.185 rtr 907 1.195 rtr s = splsoftnet(); 908 1.233 ozaki if (inp->inp_af == AF_INET) { 909 1.237 ozaki inpcb_fetch_sockaddr(inp, (struct sockaddr_in *)nam); 910 1.206 rtr } 911 1.185 rtr #ifdef INET6 912 1.233 ozaki if (inp->inp_af == AF_INET6) { 913 1.238 ozaki in6pcb_fetch_sockaddr(inp, (struct sockaddr_in6 *)nam); 914 1.206 rtr } 915 1.185 rtr #endif 916 1.185 rtr tcp_debug_trace(so, tp, ostate, PRU_SOCKADDR); 917 1.195 rtr splx(s); 918 1.185 rtr 919 1.185 rtr return 0; 920 1.185 rtr } 921 1.185 rtr 922 1.189 rtr static int 923 1.199 rtr tcp_rcvd(struct socket *so, int flags, struct lwp *l) 924 1.199 rtr { 925 1.233 ozaki struct inpcb *inp; 926 1.233 ozaki struct tcpcb *tp; 927 1.199 rtr int ostate = 0; 928 1.199 rtr int s; 929 1.199 rtr 930 1.233 ozaki inp = sotoinpcb(so); 931 1.235 ozaki if (inp == NULL) 932 1.235 ozaki return EINVAL; 933 1.233 ozaki tp = intotcpcb(inp); 934 1.199 rtr 935 1.199 rtr ostate = tcp_debug_capture(tp, PRU_RCVD); 936 1.199 rtr 937 1.199 rtr /* 938 1.199 rtr * After a receive, possibly send window update to peer. 939 1.199 rtr * 940 1.199 rtr * soreceive() calls this function when a user receives 941 1.199 rtr * ancillary data on a listening socket. We don't call 942 1.199 rtr * tcp_output in such a case, since there is no header 943 1.199 rtr * template for a listening socket and hence the kernel 944 1.199 rtr * will panic. 945 1.199 rtr */ 946 1.199 rtr s = splsoftnet(); 947 1.199 rtr if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 948 1.199 rtr (void) tcp_output(tp); 949 1.199 rtr splx(s); 950 1.199 rtr 951 1.199 rtr tcp_debug_trace(so, tp, ostate, PRU_RCVD); 952 1.199 rtr 953 1.199 rtr return 0; 954 1.199 rtr } 955 1.199 rtr 956 1.199 rtr static int 957 1.189 rtr tcp_recvoob(struct socket *so, struct mbuf *m, int flags) 958 1.189 rtr { 959 1.233 ozaki struct inpcb *inp; 960 1.233 ozaki struct tcpcb *tp; 961 1.189 rtr int ostate = 0; 962 1.195 rtr int s; 963 1.189 rtr 964 1.233 ozaki inp = sotoinpcb(so); 965 1.235 ozaki if (inp == NULL) 966 1.235 ozaki return EINVAL; 967 1.233 ozaki tp = intotcpcb(inp); 968 1.189 rtr 969 1.192 rtr ostate = tcp_debug_capture(tp, PRU_RCVOOB); 970 1.189 rtr 971 1.195 rtr s = splsoftnet(); 972 1.189 rtr if ((so->so_oobmark == 0 && 973 1.189 rtr (so->so_state & SS_RCVATMARK) == 0) || 974 1.189 rtr so->so_options & SO_OOBINLINE || 975 1.195 rtr tp->t_oobflags & TCPOOB_HADDATA) { 976 1.195 rtr splx(s); 977 1.189 rtr return EINVAL; 978 1.195 rtr } 979 1.189 rtr 980 1.195 rtr if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 981 1.195 rtr splx(s); 982 1.189 rtr return EWOULDBLOCK; 983 1.195 rtr } 984 1.189 rtr 985 1.189 rtr m->m_len = 1; 986 1.189 rtr *mtod(m, char *) = tp->t_iobc; 987 1.228 chs if ((flags & MSG_PEEK) == 0) { 988 1.189 rtr tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 989 1.228 chs so->so_state &= ~SS_POLLRDBAND; 990 1.228 chs } 991 1.189 rtr 992 1.189 rtr tcp_debug_trace(so, tp, ostate, PRU_RCVOOB); 993 1.195 rtr splx(s); 994 1.189 rtr 995 1.189 rtr return 0; 996 1.189 rtr } 997 1.189 rtr 998 1.189 rtr static int 999 1.208 rtr tcp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam, 1000 1.198 rtr struct mbuf *control, struct lwp *l) 1001 1.198 rtr { 1002 1.233 ozaki struct inpcb *inp; 1003 1.233 ozaki struct tcpcb *tp; 1004 1.198 rtr int ostate = 0; 1005 1.198 rtr int error = 0; 1006 1.198 rtr int s; 1007 1.198 rtr 1008 1.233 ozaki inp = sotoinpcb(so); 1009 1.235 ozaki if (inp == NULL) 1010 1.235 ozaki return EINVAL; 1011 1.233 ozaki tp = intotcpcb(inp); 1012 1.198 rtr 1013 1.198 rtr ostate = tcp_debug_capture(tp, PRU_SEND); 1014 1.198 rtr 1015 1.198 rtr /* 1016 1.198 rtr * Do a send by putting data in output queue and updating urgent 1017 1.198 rtr * marker if URG set. Possibly send more data. 1018 1.198 rtr */ 1019 1.198 rtr s = splsoftnet(); 1020 1.198 rtr if (control && control->m_len) { 1021 1.198 rtr m_freem(control); 1022 1.198 rtr m_freem(m); 1023 1.198 rtr tcp_debug_trace(so, tp, ostate, PRU_SEND); 1024 1.198 rtr splx(s); 1025 1.198 rtr return EINVAL; 1026 1.198 rtr } 1027 1.198 rtr 1028 1.198 rtr sbappendstream(&so->so_snd, m); 1029 1.198 rtr error = tcp_output(tp); 1030 1.198 rtr tcp_debug_trace(so, tp, ostate, PRU_SEND); 1031 1.198 rtr splx(s); 1032 1.198 rtr 1033 1.198 rtr return error; 1034 1.198 rtr } 1035 1.198 rtr 1036 1.198 rtr static int 1037 1.189 rtr tcp_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control) 1038 1.189 rtr { 1039 1.189 rtr struct inpcb *inp = NULL; 1040 1.189 rtr struct tcpcb *tp = NULL; 1041 1.189 rtr int ostate = 0; 1042 1.189 rtr int error = 0; 1043 1.195 rtr int s; 1044 1.189 rtr 1045 1.233 ozaki inp = sotoinpcb(so); 1046 1.235 ozaki if (inp == NULL) { 1047 1.235 ozaki m_freem(m); 1048 1.235 ozaki m_freem(control); 1049 1.235 ozaki return EINVAL; 1050 1.235 ozaki } 1051 1.233 ozaki tp = intotcpcb(inp); 1052 1.231 riastrad if (tp->t_template == NULL) { 1053 1.231 riastrad /* 1054 1.231 riastrad * XXX FreeBSD appears to open the connection 1055 1.231 riastrad * automagically in this case, but the socket address 1056 1.231 riastrad * isn't passed through here so we can't do that. 1057 1.231 riastrad */ 1058 1.231 riastrad m_freem(m); 1059 1.231 riastrad m_freem(control); 1060 1.231 riastrad return ENOTCONN; 1061 1.231 riastrad } 1062 1.189 rtr 1063 1.192 rtr ostate = tcp_debug_capture(tp, PRU_SENDOOB); 1064 1.189 rtr 1065 1.195 rtr s = splsoftnet(); 1066 1.222 christos if (sbspace_oob(&so->so_snd) == 0) { 1067 1.189 rtr m_freem(m); 1068 1.223 martin m_freem(control); 1069 1.195 rtr splx(s); 1070 1.189 rtr return ENOBUFS; 1071 1.189 rtr } 1072 1.189 rtr /* 1073 1.189 rtr * According to RFC961 (Assigned Protocols), 1074 1.189 rtr * the urgent pointer points to the last octet 1075 1.189 rtr * of urgent data. We continue, however, 1076 1.189 rtr * to consider it to indicate the first octet 1077 1.189 rtr * of data past the urgent section. 1078 1.189 rtr * Otherwise, snd_up should be one lower. 1079 1.189 rtr */ 1080 1.189 rtr sbappendstream(&so->so_snd, m); 1081 1.189 rtr tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 1082 1.189 rtr tp->t_force = 1; 1083 1.189 rtr error = tcp_output(tp); 1084 1.189 rtr tp->t_force = 0; 1085 1.189 rtr tcp_debug_trace(so, tp, ostate, PRU_SENDOOB); 1086 1.195 rtr splx(s); 1087 1.223 martin m_freem(control); 1088 1.189 rtr 1089 1.189 rtr return error; 1090 1.189 rtr } 1091 1.189 rtr 1092 1.200 rtr static int 1093 1.200 rtr tcp_purgeif(struct socket *so, struct ifnet *ifp) 1094 1.200 rtr { 1095 1.200 rtr int s; 1096 1.213 knakahar int error = 0; 1097 1.200 rtr 1098 1.200 rtr s = splsoftnet(); 1099 1.213 knakahar 1100 1.200 rtr mutex_enter(softnet_lock); 1101 1.200 rtr switch (so->so_proto->pr_domain->dom_family) { 1102 1.200 rtr case PF_INET: 1103 1.237 ozaki inpcb_purgeif0(&tcbtable, ifp); 1104 1.214 ozaki #ifdef NET_MPSAFE 1105 1.214 ozaki mutex_exit(softnet_lock); 1106 1.214 ozaki #endif 1107 1.200 rtr in_purgeif(ifp); 1108 1.214 ozaki #ifdef NET_MPSAFE 1109 1.214 ozaki mutex_enter(softnet_lock); 1110 1.214 ozaki #endif 1111 1.237 ozaki inpcb_purgeif(&tcbtable, ifp); 1112 1.200 rtr break; 1113 1.200 rtr #ifdef INET6 1114 1.200 rtr case PF_INET6: 1115 1.238 ozaki in6pcb_purgeif0(&tcbtable, ifp); 1116 1.214 ozaki #ifdef NET_MPSAFE 1117 1.214 ozaki mutex_exit(softnet_lock); 1118 1.214 ozaki #endif 1119 1.200 rtr in6_purgeif(ifp); 1120 1.214 ozaki #ifdef NET_MPSAFE 1121 1.214 ozaki mutex_enter(softnet_lock); 1122 1.214 ozaki #endif 1123 1.238 ozaki in6pcb_purgeif(&tcbtable, ifp); 1124 1.200 rtr break; 1125 1.200 rtr #endif 1126 1.200 rtr default: 1127 1.213 knakahar error = EAFNOSUPPORT; 1128 1.213 knakahar break; 1129 1.200 rtr } 1130 1.200 rtr mutex_exit(softnet_lock); 1131 1.200 rtr splx(s); 1132 1.200 rtr 1133 1.213 knakahar return error; 1134 1.200 rtr } 1135 1.200 rtr 1136 1.1 cgd /* 1137 1.1 cgd * Initiate (or continue) disconnect. 1138 1.1 cgd * If embryonic state, just send reset (once). 1139 1.1 cgd * If in ``let data drain'' option and linger null, just drop. 1140 1.1 cgd * Otherwise (hard), mark socket disconnecting and drop 1141 1.1 cgd * current input data; switch states based on user close, and 1142 1.1 cgd * send segment to peer (with FIN). 1143 1.1 cgd */ 1144 1.1 cgd struct tcpcb * 1145 1.194 rtr tcp_disconnect1(struct tcpcb *tp) 1146 1.1 cgd { 1147 1.40 itojun struct socket *so; 1148 1.40 itojun 1149 1.233 ozaki so = tp->t_inpcb->inp_socket; 1150 1.1 cgd 1151 1.12 mycroft if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 1152 1.1 cgd tp = tcp_close(tp); 1153 1.1 cgd else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 1154 1.1 cgd tp = tcp_drop(tp, 0); 1155 1.1 cgd else { 1156 1.1 cgd soisdisconnecting(so); 1157 1.1 cgd sbflush(&so->so_rcv); 1158 1.1 cgd tp = tcp_usrclosed(tp); 1159 1.1 cgd if (tp) 1160 1.1 cgd (void) tcp_output(tp); 1161 1.1 cgd } 1162 1.221 maxv return tp; 1163 1.1 cgd } 1164 1.1 cgd 1165 1.1 cgd /* 1166 1.1 cgd * User issued close, and wish to trail through shutdown states: 1167 1.1 cgd * if never received SYN, just forget it. If got a SYN from peer, 1168 1.1 cgd * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1169 1.1 cgd * If already got a FIN from peer, then almost done; go to LAST_ACK 1170 1.1 cgd * state. In all other cases, have already sent FIN to peer (e.g. 1171 1.1 cgd * after PRU_SHUTDOWN), and just have to play tedious game waiting 1172 1.1 cgd * for peer to send FIN or not respond to keep-alives, etc. 1173 1.1 cgd * We can let the user exit from the close as soon as the FIN is acked. 1174 1.1 cgd */ 1175 1.1 cgd struct tcpcb * 1176 1.94 perry tcp_usrclosed(struct tcpcb *tp) 1177 1.1 cgd { 1178 1.1 cgd 1179 1.1 cgd switch (tp->t_state) { 1180 1.1 cgd 1181 1.1 cgd case TCPS_CLOSED: 1182 1.1 cgd case TCPS_LISTEN: 1183 1.1 cgd case TCPS_SYN_SENT: 1184 1.1 cgd tp->t_state = TCPS_CLOSED; 1185 1.1 cgd tp = tcp_close(tp); 1186 1.1 cgd break; 1187 1.1 cgd 1188 1.1 cgd case TCPS_SYN_RECEIVED: 1189 1.1 cgd case TCPS_ESTABLISHED: 1190 1.1 cgd tp->t_state = TCPS_FIN_WAIT_1; 1191 1.1 cgd break; 1192 1.1 cgd 1193 1.1 cgd case TCPS_CLOSE_WAIT: 1194 1.1 cgd tp->t_state = TCPS_LAST_ACK; 1195 1.1 cgd break; 1196 1.1 cgd } 1197 1.18 mycroft if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1198 1.233 ozaki struct socket *so = tp->t_inpcb->inp_socket; 1199 1.115 christos if (so) 1200 1.115 christos soisdisconnected(so); 1201 1.19 mycroft /* 1202 1.19 mycroft * If we are in FIN_WAIT_2, we arrived here because the 1203 1.19 mycroft * application did a shutdown of the send side. Like the 1204 1.19 mycroft * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 1205 1.19 mycroft * a full close, we start a timer to make sure sockets are 1206 1.19 mycroft * not left in FIN_WAIT_2 forever. 1207 1.19 mycroft */ 1208 1.132 christos if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 1209 1.132 christos TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 1210 1.159 dyoung else if (tp->t_state == TCPS_TIME_WAIT 1211 1.233 ozaki && ((tp->t_inpcb->inp_af == AF_INET 1212 1.159 dyoung && (tcp4_vtw_enable & 1) 1213 1.159 dyoung && vtw_add(AF_INET, tp)) 1214 1.159 dyoung || 1215 1.233 ozaki (tp->t_inpcb->inp_af == AF_INET6 1216 1.159 dyoung && (tcp6_vtw_enable & 1) 1217 1.159 dyoung && vtw_add(AF_INET6, tp)))) { 1218 1.159 dyoung tp = 0; 1219 1.159 dyoung } 1220 1.18 mycroft } 1221 1.221 maxv return tp; 1222 1.17 thorpej } 1223 1.17 thorpej 1224 1.86 atatat /* 1225 1.86 atatat * sysctl helper routine for net.inet.ip.mssdflt. it can't be less 1226 1.86 atatat * than 32. 1227 1.86 atatat */ 1228 1.86 atatat static int 1229 1.86 atatat sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS) 1230 1.86 atatat { 1231 1.86 atatat int error, mssdflt; 1232 1.86 atatat struct sysctlnode node; 1233 1.86 atatat 1234 1.86 atatat mssdflt = tcp_mssdflt; 1235 1.86 atatat node = *rnode; 1236 1.86 atatat node.sysctl_data = &mssdflt; 1237 1.86 atatat error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1238 1.86 atatat if (error || newp == NULL) 1239 1.221 maxv return error; 1240 1.86 atatat 1241 1.86 atatat if (mssdflt < 32) 1242 1.221 maxv return EINVAL; 1243 1.86 atatat tcp_mssdflt = mssdflt; 1244 1.86 atatat 1245 1.170 kefren mutex_enter(softnet_lock); 1246 1.170 kefren tcp_tcpcb_template(); 1247 1.170 kefren mutex_exit(softnet_lock); 1248 1.170 kefren 1249 1.221 maxv return 0; 1250 1.86 atatat } 1251 1.36 matt 1252 1.17 thorpej /* 1253 1.170 kefren * sysctl helper for TCP CB template update 1254 1.170 kefren */ 1255 1.170 kefren static int 1256 1.170 kefren sysctl_update_tcpcb_template(SYSCTLFN_ARGS) 1257 1.170 kefren { 1258 1.170 kefren int t, error; 1259 1.170 kefren struct sysctlnode node; 1260 1.170 kefren 1261 1.170 kefren /* follow procedures in sysctl(9) manpage */ 1262 1.170 kefren t = *(int *)rnode->sysctl_data; 1263 1.170 kefren node = *rnode; 1264 1.170 kefren node.sysctl_data = &t; 1265 1.170 kefren error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1266 1.170 kefren if (error || newp == NULL) 1267 1.170 kefren return error; 1268 1.170 kefren 1269 1.170 kefren if (t < 0) 1270 1.170 kefren return EINVAL; 1271 1.170 kefren 1272 1.170 kefren *(int *)rnode->sysctl_data = t; 1273 1.170 kefren 1274 1.170 kefren mutex_enter(softnet_lock); 1275 1.170 kefren tcp_tcpcb_template(); 1276 1.170 kefren mutex_exit(softnet_lock); 1277 1.170 kefren 1278 1.170 kefren return 0; 1279 1.170 kefren } 1280 1.170 kefren 1281 1.170 kefren /* 1282 1.86 atatat * sysctl helper routine for setting port related values under 1283 1.86 atatat * net.inet.ip and net.inet6.ip6. does basic range checking and does 1284 1.86 atatat * additional checks for each type. this code has placed in 1285 1.86 atatat * tcp_input.c since INET and INET6 both use the same tcp code. 1286 1.86 atatat * 1287 1.86 atatat * this helper is not static so that both inet and inet6 can use it. 1288 1.17 thorpej */ 1289 1.17 thorpej int 1290 1.86 atatat sysctl_net_inet_ip_ports(SYSCTLFN_ARGS) 1291 1.86 atatat { 1292 1.86 atatat int error, tmp; 1293 1.86 atatat int apmin, apmax; 1294 1.86 atatat #ifndef IPNOPRIVPORTS 1295 1.86 atatat int lpmin, lpmax; 1296 1.86 atatat #endif /* IPNOPRIVPORTS */ 1297 1.86 atatat struct sysctlnode node; 1298 1.86 atatat 1299 1.86 atatat if (namelen != 0) 1300 1.221 maxv return EINVAL; 1301 1.86 atatat 1302 1.86 atatat switch (name[-3]) { 1303 1.86 atatat case PF_INET: 1304 1.86 atatat apmin = anonportmin; 1305 1.86 atatat apmax = anonportmax; 1306 1.86 atatat #ifndef IPNOPRIVPORTS 1307 1.86 atatat lpmin = lowportmin; 1308 1.86 atatat lpmax = lowportmax; 1309 1.86 atatat #endif /* IPNOPRIVPORTS */ 1310 1.86 atatat break; 1311 1.86 atatat #ifdef INET6 1312 1.86 atatat case PF_INET6: 1313 1.86 atatat apmin = ip6_anonportmin; 1314 1.86 atatat apmax = ip6_anonportmax; 1315 1.86 atatat #ifndef IPNOPRIVPORTS 1316 1.86 atatat lpmin = ip6_lowportmin; 1317 1.86 atatat lpmax = ip6_lowportmax; 1318 1.86 atatat #endif /* IPNOPRIVPORTS */ 1319 1.86 atatat break; 1320 1.86 atatat #endif /* INET6 */ 1321 1.86 atatat default: 1322 1.221 maxv return EINVAL; 1323 1.86 atatat } 1324 1.86 atatat 1325 1.86 atatat /* 1326 1.86 atatat * insert temporary copy into node, perform lookup on 1327 1.86 atatat * temporary, then restore pointer 1328 1.86 atatat */ 1329 1.86 atatat node = *rnode; 1330 1.86 atatat tmp = *(int*)rnode->sysctl_data; 1331 1.86 atatat node.sysctl_data = &tmp; 1332 1.86 atatat error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1333 1.86 atatat if (error || newp == NULL) 1334 1.221 maxv return error; 1335 1.86 atatat 1336 1.86 atatat /* 1337 1.86 atatat * simple port range check 1338 1.86 atatat */ 1339 1.86 atatat if (tmp < 0 || tmp > 65535) 1340 1.221 maxv return EINVAL; 1341 1.86 atatat 1342 1.86 atatat /* 1343 1.86 atatat * per-node range checks 1344 1.86 atatat */ 1345 1.86 atatat switch (rnode->sysctl_num) { 1346 1.86 atatat case IPCTL_ANONPORTMIN: 1347 1.151 yamt case IPV6CTL_ANONPORTMIN: 1348 1.86 atatat if (tmp >= apmax) 1349 1.221 maxv return EINVAL; 1350 1.86 atatat #ifndef IPNOPRIVPORTS 1351 1.86 atatat if (tmp < IPPORT_RESERVED) 1352 1.221 maxv return EINVAL; 1353 1.86 atatat #endif /* IPNOPRIVPORTS */ 1354 1.86 atatat break; 1355 1.86 atatat 1356 1.86 atatat case IPCTL_ANONPORTMAX: 1357 1.151 yamt case IPV6CTL_ANONPORTMAX: 1358 1.86 atatat if (apmin >= tmp) 1359 1.221 maxv return EINVAL; 1360 1.86 atatat #ifndef IPNOPRIVPORTS 1361 1.86 atatat if (tmp < IPPORT_RESERVED) 1362 1.221 maxv return EINVAL; 1363 1.86 atatat #endif /* IPNOPRIVPORTS */ 1364 1.86 atatat break; 1365 1.86 atatat 1366 1.86 atatat #ifndef IPNOPRIVPORTS 1367 1.86 atatat case IPCTL_LOWPORTMIN: 1368 1.151 yamt case IPV6CTL_LOWPORTMIN: 1369 1.86 atatat if (tmp >= lpmax || 1370 1.86 atatat tmp > IPPORT_RESERVEDMAX || 1371 1.86 atatat tmp < IPPORT_RESERVEDMIN) 1372 1.221 maxv return EINVAL; 1373 1.86 atatat break; 1374 1.86 atatat 1375 1.86 atatat case IPCTL_LOWPORTMAX: 1376 1.151 yamt case IPV6CTL_LOWPORTMAX: 1377 1.86 atatat if (lpmin >= tmp || 1378 1.86 atatat tmp > IPPORT_RESERVEDMAX || 1379 1.86 atatat tmp < IPPORT_RESERVEDMIN) 1380 1.221 maxv return EINVAL; 1381 1.86 atatat break; 1382 1.86 atatat #endif /* IPNOPRIVPORTS */ 1383 1.86 atatat 1384 1.86 atatat default: 1385 1.221 maxv return EINVAL; 1386 1.86 atatat } 1387 1.86 atatat 1388 1.86 atatat *(int*)rnode->sysctl_data = tmp; 1389 1.86 atatat 1390 1.221 maxv return 0; 1391 1.86 atatat } 1392 1.86 atatat 1393 1.133 christos static inline int 1394 1.133 christos copyout_uid(struct socket *sockp, void *oldp, size_t *oldlenp) 1395 1.133 christos { 1396 1.168 christos if (oldp) { 1397 1.168 christos size_t sz; 1398 1.168 christos uid_t uid; 1399 1.168 christos int error; 1400 1.168 christos 1401 1.168 christos if (sockp->so_cred == NULL) 1402 1.168 christos return EPERM; 1403 1.133 christos 1404 1.168 christos uid = kauth_cred_geteuid(sockp->so_cred); 1405 1.133 christos sz = MIN(sizeof(uid), *oldlenp); 1406 1.168 christos if ((error = copyout(&uid, oldp, sz)) != 0) 1407 1.133 christos return error; 1408 1.133 christos } 1409 1.168 christos *oldlenp = sizeof(uid_t); 1410 1.133 christos return 0; 1411 1.133 christos } 1412 1.133 christos 1413 1.133 christos static inline int 1414 1.133 christos inet4_ident_core(struct in_addr raddr, u_int rport, 1415 1.133 christos struct in_addr laddr, u_int lport, 1416 1.133 christos void *oldp, size_t *oldlenp, 1417 1.133 christos struct lwp *l, int dodrop) 1418 1.133 christos { 1419 1.133 christos struct inpcb *inp; 1420 1.133 christos struct socket *sockp; 1421 1.133 christos 1422 1.237 ozaki inp = inpcb_lookup(&tcbtable, raddr, rport, laddr, lport, 0); 1423 1.221 maxv 1424 1.133 christos if (inp == NULL || (sockp = inp->inp_socket) == NULL) 1425 1.133 christos return ESRCH; 1426 1.133 christos 1427 1.133 christos if (dodrop) { 1428 1.133 christos struct tcpcb *tp; 1429 1.153 elad int error; 1430 1.221 maxv 1431 1.133 christos if (inp == NULL || (tp = intotcpcb(inp)) == NULL || 1432 1.133 christos (inp->inp_socket->so_options & SO_ACCEPTCONN) != 0) 1433 1.133 christos return ESRCH; 1434 1.153 elad 1435 1.153 elad error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1436 1.153 elad KAUTH_REQ_NETWORK_SOCKET_DROP, inp->inp_socket, tp, NULL); 1437 1.153 elad if (error) 1438 1.221 maxv return error; 1439 1.221 maxv 1440 1.133 christos (void)tcp_drop(tp, ECONNABORTED); 1441 1.133 christos return 0; 1442 1.133 christos } 1443 1.221 maxv 1444 1.221 maxv return copyout_uid(sockp, oldp, oldlenp); 1445 1.133 christos } 1446 1.133 christos 1447 1.134 xtraeme #ifdef INET6 1448 1.133 christos static inline int 1449 1.133 christos inet6_ident_core(struct in6_addr *raddr, u_int rport, 1450 1.133 christos struct in6_addr *laddr, u_int lport, 1451 1.133 christos void *oldp, size_t *oldlenp, 1452 1.133 christos struct lwp *l, int dodrop) 1453 1.133 christos { 1454 1.233 ozaki struct inpcb *inp; 1455 1.133 christos struct socket *sockp; 1456 1.133 christos 1457 1.238 ozaki inp = in6pcb_lookup(&tcbtable, raddr, rport, laddr, lport, 0, 0); 1458 1.133 christos 1459 1.233 ozaki if (inp == NULL || (sockp = inp->inp_socket) == NULL) 1460 1.133 christos return ESRCH; 1461 1.221 maxv 1462 1.133 christos if (dodrop) { 1463 1.133 christos struct tcpcb *tp; 1464 1.153 elad int error; 1465 1.221 maxv 1466 1.233 ozaki if (inp == NULL || (tp = intotcpcb(inp)) == NULL || 1467 1.233 ozaki (inp->inp_socket->so_options & SO_ACCEPTCONN) != 0) 1468 1.133 christos return ESRCH; 1469 1.133 christos 1470 1.153 elad error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1471 1.233 ozaki KAUTH_REQ_NETWORK_SOCKET_DROP, inp->inp_socket, tp, NULL); 1472 1.153 elad if (error) 1473 1.221 maxv return error; 1474 1.133 christos 1475 1.133 christos (void)tcp_drop(tp, ECONNABORTED); 1476 1.133 christos return 0; 1477 1.133 christos } 1478 1.221 maxv 1479 1.221 maxv return copyout_uid(sockp, oldp, oldlenp); 1480 1.133 christos } 1481 1.134 xtraeme #endif 1482 1.133 christos 1483 1.133 christos /* 1484 1.133 christos * sysctl helper routine for the net.inet.tcp.drop and 1485 1.133 christos * net.inet6.tcp6.drop nodes. 1486 1.133 christos */ 1487 1.133 christos #define sysctl_net_inet_tcp_drop sysctl_net_inet_tcp_ident 1488 1.133 christos 1489 1.133 christos /* 1490 1.86 atatat * sysctl helper routine for the net.inet.tcp.ident and 1491 1.86 atatat * net.inet6.tcp6.ident nodes. contains backwards compat code for the 1492 1.86 atatat * old way of looking up the ident information for ipv4 which involves 1493 1.86 atatat * stuffing the port/addr pairs into the mib lookup. 1494 1.86 atatat */ 1495 1.86 atatat static int 1496 1.86 atatat sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS) 1497 1.86 atatat { 1498 1.86 atatat struct sockaddr_in *si4[2]; 1499 1.86 atatat #ifdef INET6 1500 1.86 atatat struct sockaddr_in6 *si6[2]; 1501 1.217 maxv #endif 1502 1.86 atatat struct sockaddr_storage sa[2]; 1503 1.148 matt int error, pf, dodrop; 1504 1.86 atatat 1505 1.133 christos dodrop = name[-1] == TCPCTL_DROP; 1506 1.133 christos if (dodrop) { 1507 1.133 christos if (oldp != NULL || *oldlenp != 0) 1508 1.133 christos return EINVAL; 1509 1.133 christos if (newp == NULL) 1510 1.133 christos return EPERM; 1511 1.133 christos if (newlen < sizeof(sa)) 1512 1.133 christos return ENOMEM; 1513 1.133 christos } 1514 1.86 atatat if (namelen != 4 && namelen != 0) 1515 1.133 christos return EINVAL; 1516 1.86 atatat if (name[-2] != IPPROTO_TCP) 1517 1.133 christos return EINVAL; 1518 1.86 atatat pf = name[-3]; 1519 1.86 atatat 1520 1.86 atatat /* old style lookup, ipv4 only */ 1521 1.86 atatat if (namelen == 4) { 1522 1.88 atatat struct in_addr laddr, raddr; 1523 1.88 atatat u_int lport, rport; 1524 1.88 atatat 1525 1.86 atatat if (pf != PF_INET) 1526 1.133 christos return EPROTONOSUPPORT; 1527 1.86 atatat raddr.s_addr = (uint32_t)name[0]; 1528 1.86 atatat rport = (u_int)name[1]; 1529 1.86 atatat laddr.s_addr = (uint32_t)name[2]; 1530 1.86 atatat lport = (u_int)name[3]; 1531 1.221 maxv 1532 1.148 matt mutex_enter(softnet_lock); 1533 1.133 christos error = inet4_ident_core(raddr, rport, laddr, lport, 1534 1.133 christos oldp, oldlenp, l, dodrop); 1535 1.148 matt mutex_exit(softnet_lock); 1536 1.133 christos return error; 1537 1.86 atatat } 1538 1.86 atatat 1539 1.86 atatat if (newp == NULL || newlen != sizeof(sa)) 1540 1.133 christos return EINVAL; 1541 1.86 atatat error = copyin(newp, &sa, newlen); 1542 1.86 atatat if (error) 1543 1.133 christos return error; 1544 1.86 atatat 1545 1.86 atatat /* 1546 1.86 atatat * requested families must match 1547 1.86 atatat */ 1548 1.86 atatat if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family) 1549 1.133 christos return EINVAL; 1550 1.86 atatat 1551 1.86 atatat switch (pf) { 1552 1.135 christos #ifdef INET6 1553 1.135 christos case PF_INET6: 1554 1.135 christos si6[0] = (struct sockaddr_in6*)&sa[0]; 1555 1.135 christos si6[1] = (struct sockaddr_in6*)&sa[1]; 1556 1.135 christos if (si6[0]->sin6_len != sizeof(*si6[0]) || 1557 1.135 christos si6[1]->sin6_len != sizeof(*si6[1])) 1558 1.135 christos return EINVAL; 1559 1.135 christos 1560 1.135 christos if (!IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) && 1561 1.135 christos !IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) { 1562 1.135 christos error = sa6_embedscope(si6[0], ip6_use_defzone); 1563 1.135 christos if (error) 1564 1.135 christos return error; 1565 1.135 christos error = sa6_embedscope(si6[1], ip6_use_defzone); 1566 1.135 christos if (error) 1567 1.135 christos return error; 1568 1.135 christos 1569 1.148 matt mutex_enter(softnet_lock); 1570 1.135 christos error = inet6_ident_core(&si6[0]->sin6_addr, 1571 1.135 christos si6[0]->sin6_port, &si6[1]->sin6_addr, 1572 1.135 christos si6[1]->sin6_port, oldp, oldlenp, l, dodrop); 1573 1.148 matt mutex_exit(softnet_lock); 1574 1.135 christos return error; 1575 1.135 christos } 1576 1.135 christos 1577 1.135 christos if (IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) != 1578 1.135 christos IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) 1579 1.135 christos return EINVAL; 1580 1.135 christos 1581 1.135 christos in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[0]); 1582 1.135 christos in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[1]); 1583 1.224 mrg #endif /* INET6 */ 1584 1.135 christos /*FALLTHROUGH*/ 1585 1.133 christos case PF_INET: 1586 1.86 atatat si4[0] = (struct sockaddr_in*)&sa[0]; 1587 1.86 atatat si4[1] = (struct sockaddr_in*)&sa[1]; 1588 1.86 atatat if (si4[0]->sin_len != sizeof(*si4[0]) || 1589 1.135 christos si4[0]->sin_len != sizeof(*si4[1])) 1590 1.133 christos return EINVAL; 1591 1.221 maxv 1592 1.148 matt mutex_enter(softnet_lock); 1593 1.133 christos error = inet4_ident_core(si4[0]->sin_addr, si4[0]->sin_port, 1594 1.133 christos si4[1]->sin_addr, si4[1]->sin_port, 1595 1.133 christos oldp, oldlenp, l, dodrop); 1596 1.148 matt mutex_exit(softnet_lock); 1597 1.133 christos return error; 1598 1.133 christos default: 1599 1.133 christos return EPROTONOSUPPORT; 1600 1.86 atatat } 1601 1.76 christos } 1602 1.76 christos 1603 1.86 atatat /* 1604 1.97 atatat * sysctl helper for the inet and inet6 pcblists. handles tcp/udp and 1605 1.97 atatat * inet/inet6, as well as raw pcbs for each. specifically not 1606 1.97 atatat * declared static so that raw sockets and udp/udp6 can use it as 1607 1.97 atatat * well. 1608 1.97 atatat */ 1609 1.97 atatat int 1610 1.97 atatat sysctl_inpcblist(SYSCTLFN_ARGS) 1611 1.97 atatat { 1612 1.220 maxv const bool allowaddr = get_expose_address(curproc); 1613 1.97 atatat struct sockaddr_in *in; 1614 1.104 christos const struct inpcb *inp; 1615 1.97 atatat #ifdef INET6 1616 1.97 atatat struct sockaddr_in6 *in6; 1617 1.97 atatat #endif 1618 1.104 christos struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data); 1619 1.97 atatat struct tcpcb *tp; 1620 1.97 atatat struct kinfo_pcb pcb; 1621 1.97 atatat char *dp; 1622 1.97 atatat size_t len, needed, elem_size, out_size; 1623 1.97 atatat int error, elem_count, pf, proto, pf2; 1624 1.97 atatat 1625 1.97 atatat if (namelen != 4) 1626 1.221 maxv return EINVAL; 1627 1.97 atatat 1628 1.114 christos if (oldp != NULL) { 1629 1.114 christos len = *oldlenp; 1630 1.114 christos elem_size = name[2]; 1631 1.114 christos elem_count = name[3]; 1632 1.114 christos if (elem_size != sizeof(pcb)) 1633 1.114 christos return EINVAL; 1634 1.114 christos } else { 1635 1.114 christos len = 0; 1636 1.114 christos elem_count = INT_MAX; 1637 1.114 christos elem_size = sizeof(pcb); 1638 1.114 christos } 1639 1.97 atatat error = 0; 1640 1.97 atatat dp = oldp; 1641 1.114 christos out_size = elem_size; 1642 1.97 atatat needed = 0; 1643 1.97 atatat 1644 1.97 atatat if (namelen == 1 && name[0] == CTL_QUERY) 1645 1.105 atatat return (sysctl_query(SYSCTLFN_CALL(rnode))); 1646 1.97 atatat 1647 1.97 atatat if (name - oname != 4) 1648 1.221 maxv return EINVAL; 1649 1.97 atatat 1650 1.97 atatat pf = oname[1]; 1651 1.97 atatat proto = oname[2]; 1652 1.116 christos pf2 = (oldp != NULL) ? pf : 0; 1653 1.97 atatat 1654 1.148 matt mutex_enter(softnet_lock); 1655 1.148 matt 1656 1.233 ozaki TAILQ_FOREACH(inp, &pcbtbl->inpt_queue, inp_queue) { 1657 1.233 ozaki if (inp->inp_af != pf) 1658 1.97 atatat continue; 1659 1.97 atatat 1660 1.125 elad if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1661 1.233 ozaki KAUTH_REQ_NETWORK_SOCKET_CANSEE, inp->inp_socket, NULL, 1662 1.125 elad NULL) != 0) 1663 1.111 elad continue; 1664 1.111 elad 1665 1.97 atatat memset(&pcb, 0, sizeof(pcb)); 1666 1.97 atatat 1667 1.97 atatat pcb.ki_family = pf; 1668 1.97 atatat pcb.ki_type = proto; 1669 1.97 atatat 1670 1.97 atatat switch (pf2) { 1671 1.97 atatat case 0: 1672 1.97 atatat /* just probing for size */ 1673 1.97 atatat break; 1674 1.97 atatat case PF_INET: 1675 1.97 atatat pcb.ki_family = inp->inp_socket->so_proto-> 1676 1.97 atatat pr_domain->dom_family; 1677 1.97 atatat pcb.ki_type = inp->inp_socket->so_proto-> 1678 1.97 atatat pr_type; 1679 1.97 atatat pcb.ki_protocol = inp->inp_socket->so_proto-> 1680 1.97 atatat pr_protocol; 1681 1.97 atatat pcb.ki_pflags = inp->inp_flags; 1682 1.97 atatat 1683 1.97 atatat pcb.ki_sostate = inp->inp_socket->so_state; 1684 1.97 atatat pcb.ki_prstate = inp->inp_state; 1685 1.97 atatat if (proto == IPPROTO_TCP) { 1686 1.97 atatat tp = intotcpcb(inp); 1687 1.97 atatat pcb.ki_tstate = tp->t_state; 1688 1.97 atatat pcb.ki_tflags = tp->t_flags; 1689 1.97 atatat } 1690 1.97 atatat 1691 1.220 maxv COND_SET_VALUE(pcb.ki_pcbaddr, 1692 1.220 maxv PTRTOUINT64(inp), allowaddr); 1693 1.220 maxv COND_SET_VALUE(pcb.ki_ppcbaddr, 1694 1.220 maxv PTRTOUINT64(inp->inp_ppcb), allowaddr); 1695 1.220 maxv COND_SET_VALUE(pcb.ki_sockaddr, 1696 1.220 maxv PTRTOUINT64(inp->inp_socket), allowaddr); 1697 1.97 atatat 1698 1.97 atatat pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc; 1699 1.97 atatat pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc; 1700 1.97 atatat 1701 1.97 atatat in = satosin(&pcb.ki_src); 1702 1.97 atatat in->sin_len = sizeof(*in); 1703 1.97 atatat in->sin_family = pf; 1704 1.97 atatat in->sin_port = inp->inp_lport; 1705 1.234 ozaki in->sin_addr = const_in4p_laddr(inp); 1706 1.97 atatat if (pcb.ki_prstate >= INP_CONNECTED) { 1707 1.97 atatat in = satosin(&pcb.ki_dst); 1708 1.97 atatat in->sin_len = sizeof(*in); 1709 1.97 atatat in->sin_family = pf; 1710 1.97 atatat in->sin_port = inp->inp_fport; 1711 1.234 ozaki in->sin_addr = const_in4p_faddr(inp); 1712 1.97 atatat } 1713 1.97 atatat break; 1714 1.97 atatat #ifdef INET6 1715 1.97 atatat case PF_INET6: 1716 1.233 ozaki pcb.ki_family = inp->inp_socket->so_proto-> 1717 1.97 atatat pr_domain->dom_family; 1718 1.233 ozaki pcb.ki_type = inp->inp_socket->so_proto->pr_type; 1719 1.233 ozaki pcb.ki_protocol = inp->inp_socket->so_proto-> 1720 1.97 atatat pr_protocol; 1721 1.233 ozaki pcb.ki_pflags = inp->inp_flags; 1722 1.97 atatat 1723 1.233 ozaki pcb.ki_sostate = inp->inp_socket->so_state; 1724 1.233 ozaki pcb.ki_prstate = inp->inp_state; 1725 1.97 atatat if (proto == IPPROTO_TCP) { 1726 1.233 ozaki tp = intotcpcb(inp); 1727 1.97 atatat pcb.ki_tstate = tp->t_state; 1728 1.97 atatat pcb.ki_tflags = tp->t_flags; 1729 1.97 atatat } 1730 1.97 atatat 1731 1.220 maxv COND_SET_VALUE(pcb.ki_pcbaddr, 1732 1.233 ozaki PTRTOUINT64(inp), allowaddr); 1733 1.220 maxv COND_SET_VALUE(pcb.ki_ppcbaddr, 1734 1.233 ozaki PTRTOUINT64(inp->inp_ppcb), allowaddr); 1735 1.220 maxv COND_SET_VALUE(pcb.ki_sockaddr, 1736 1.233 ozaki PTRTOUINT64(inp->inp_socket), allowaddr); 1737 1.97 atatat 1738 1.233 ozaki pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc; 1739 1.233 ozaki pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc; 1740 1.97 atatat 1741 1.97 atatat in6 = satosin6(&pcb.ki_src); 1742 1.97 atatat in6->sin6_len = sizeof(*in6); 1743 1.97 atatat in6->sin6_family = pf; 1744 1.233 ozaki in6->sin6_port = inp->inp_lport; 1745 1.234 ozaki in6->sin6_flowinfo = const_in6p_flowinfo(inp); 1746 1.234 ozaki in6->sin6_addr = const_in6p_laddr(inp); 1747 1.97 atatat in6->sin6_scope_id = 0; /* XXX? */ 1748 1.97 atatat 1749 1.233 ozaki if (pcb.ki_prstate >= INP_CONNECTED) { 1750 1.97 atatat in6 = satosin6(&pcb.ki_dst); 1751 1.97 atatat in6->sin6_len = sizeof(*in6); 1752 1.97 atatat in6->sin6_family = pf; 1753 1.233 ozaki in6->sin6_port = inp->inp_fport; 1754 1.234 ozaki in6->sin6_flowinfo = const_in6p_flowinfo(inp); 1755 1.234 ozaki in6->sin6_addr = const_in6p_faddr(inp); 1756 1.97 atatat in6->sin6_scope_id = 0; /* XXX? */ 1757 1.97 atatat } 1758 1.97 atatat break; 1759 1.97 atatat #endif 1760 1.97 atatat } 1761 1.97 atatat 1762 1.97 atatat if (len >= elem_size && elem_count > 0) { 1763 1.97 atatat error = copyout(&pcb, dp, out_size); 1764 1.155 rmind if (error) { 1765 1.155 rmind mutex_exit(softnet_lock); 1766 1.221 maxv return error; 1767 1.155 rmind } 1768 1.97 atatat dp += elem_size; 1769 1.97 atatat len -= elem_size; 1770 1.97 atatat } 1771 1.152 mrg needed += elem_size; 1772 1.152 mrg if (elem_count > 0 && elem_count != INT_MAX) 1773 1.152 mrg elem_count--; 1774 1.97 atatat } 1775 1.97 atatat 1776 1.97 atatat *oldlenp = needed; 1777 1.97 atatat if (oldp == NULL) 1778 1.97 atatat *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb); 1779 1.97 atatat 1780 1.148 matt mutex_exit(softnet_lock); 1781 1.148 matt 1782 1.221 maxv return error; 1783 1.97 atatat } 1784 1.97 atatat 1785 1.124 rpaulo static int 1786 1.124 rpaulo sysctl_tcp_congctl(SYSCTLFN_ARGS) 1787 1.124 rpaulo { 1788 1.124 rpaulo struct sysctlnode node; 1789 1.148 matt int error; 1790 1.124 rpaulo char newname[TCPCC_MAXLEN]; 1791 1.124 rpaulo 1792 1.124 rpaulo strlcpy(newname, tcp_congctl_global_name, sizeof(newname) - 1); 1793 1.221 maxv 1794 1.124 rpaulo node = *rnode; 1795 1.124 rpaulo node.sysctl_data = newname; 1796 1.124 rpaulo node.sysctl_size = sizeof(newname); 1797 1.124 rpaulo 1798 1.124 rpaulo error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1799 1.221 maxv 1800 1.221 maxv if (error || 1801 1.124 rpaulo newp == NULL || 1802 1.124 rpaulo strncmp(newname, tcp_congctl_global_name, sizeof(newname)) == 0) 1803 1.124 rpaulo return error; 1804 1.124 rpaulo 1805 1.148 matt mutex_enter(softnet_lock); 1806 1.148 matt error = tcp_congctl_select(NULL, newname); 1807 1.148 matt mutex_exit(softnet_lock); 1808 1.148 matt 1809 1.124 rpaulo return error; 1810 1.124 rpaulo } 1811 1.124 rpaulo 1812 1.132 christos static int 1813 1.166 christos sysctl_tcp_init_win(SYSCTLFN_ARGS) 1814 1.166 christos { 1815 1.166 christos int error; 1816 1.166 christos u_int iw; 1817 1.166 christos struct sysctlnode node; 1818 1.166 christos 1819 1.166 christos iw = *(u_int *)rnode->sysctl_data; 1820 1.166 christos node = *rnode; 1821 1.166 christos node.sysctl_data = &iw; 1822 1.166 christos node.sysctl_size = sizeof(iw); 1823 1.166 christos error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1824 1.166 christos if (error || newp == NULL) 1825 1.166 christos return error; 1826 1.166 christos 1827 1.166 christos if (iw >= __arraycount(tcp_init_win_max)) 1828 1.166 christos return EINVAL; 1829 1.166 christos *(u_int *)rnode->sysctl_data = iw; 1830 1.166 christos return 0; 1831 1.166 christos } 1832 1.166 christos 1833 1.166 christos static int 1834 1.132 christos sysctl_tcp_keep(SYSCTLFN_ARGS) 1835 1.221 maxv { 1836 1.132 christos int error; 1837 1.132 christos u_int tmp; 1838 1.132 christos struct sysctlnode node; 1839 1.132 christos 1840 1.132 christos node = *rnode; 1841 1.132 christos tmp = *(u_int *)rnode->sysctl_data; 1842 1.132 christos node.sysctl_data = &tmp; 1843 1.132 christos 1844 1.132 christos error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1845 1.132 christos if (error || newp == NULL) 1846 1.132 christos return error; 1847 1.132 christos 1848 1.225 riastrad if (!(tmp > 0 && tmp <= TCP_TIMER_MAXTICKS)) 1849 1.225 riastrad return EINVAL; 1850 1.225 riastrad 1851 1.148 matt mutex_enter(softnet_lock); 1852 1.148 matt 1853 1.132 christos *(u_int *)rnode->sysctl_data = tmp; 1854 1.132 christos tcp_tcpcb_template(); /* update the template */ 1855 1.148 matt 1856 1.148 matt mutex_exit(softnet_lock); 1857 1.132 christos return 0; 1858 1.132 christos } 1859 1.132 christos 1860 1.142 thorpej static int 1861 1.142 thorpej sysctl_net_inet_tcp_stats(SYSCTLFN_ARGS) 1862 1.142 thorpej { 1863 1.142 thorpej 1864 1.146 thorpej return (NETSTAT_SYSCTL(tcpstat_percpu, TCP_NSTATS)); 1865 1.142 thorpej } 1866 1.132 christos 1867 1.97 atatat /* 1868 1.86 atatat * this (second stage) setup routine is a replacement for tcp_sysctl() 1869 1.86 atatat * (which is currently used for ipv4 and ipv6) 1870 1.86 atatat */ 1871 1.86 atatat static void 1872 1.87 atatat sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname, 1873 1.87 atatat const char *tcpname) 1874 1.86 atatat { 1875 1.128 rpaulo const struct sysctlnode *sack_node; 1876 1.127 yamt const struct sysctlnode *abc_node; 1877 1.128 rpaulo const struct sysctlnode *ecn_node; 1878 1.128 rpaulo const struct sysctlnode *congctl_node; 1879 1.159 dyoung const struct sysctlnode *mslt_node; 1880 1.159 dyoung const struct sysctlnode *vtw_node; 1881 1.109 rpaulo #ifdef TCP_DEBUG 1882 1.109 rpaulo extern struct tcp_debug tcp_debug[TCP_NDEBUG]; 1883 1.109 rpaulo extern int tcp_debx; 1884 1.109 rpaulo #endif 1885 1.76 christos 1886 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1887 1.87 atatat CTLFLAG_PERMANENT, 1888 1.86 atatat CTLTYPE_NODE, pfname, NULL, 1889 1.86 atatat NULL, 0, NULL, 0, 1890 1.86 atatat CTL_NET, pf, CTL_EOL); 1891 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1892 1.87 atatat CTLFLAG_PERMANENT, 1893 1.92 atatat CTLTYPE_NODE, tcpname, 1894 1.92 atatat SYSCTL_DESCR("TCP related settings"), 1895 1.86 atatat NULL, 0, NULL, 0, 1896 1.86 atatat CTL_NET, pf, IPPROTO_TCP, CTL_EOL); 1897 1.86 atatat 1898 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1899 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1900 1.92 atatat CTLTYPE_INT, "rfc1323", 1901 1.92 atatat SYSCTL_DESCR("Enable RFC1323 TCP extensions"), 1902 1.170 kefren sysctl_update_tcpcb_template, 0, &tcp_do_rfc1323, 0, 1903 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL); 1904 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1905 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1906 1.92 atatat CTLTYPE_INT, "sendspace", 1907 1.92 atatat SYSCTL_DESCR("Default TCP send buffer size"), 1908 1.86 atatat NULL, 0, &tcp_sendspace, 0, 1909 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL); 1910 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1911 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1912 1.92 atatat CTLTYPE_INT, "recvspace", 1913 1.92 atatat SYSCTL_DESCR("Default TCP receive buffer size"), 1914 1.86 atatat NULL, 0, &tcp_recvspace, 0, 1915 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL); 1916 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1917 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1918 1.92 atatat CTLTYPE_INT, "mssdflt", 1919 1.92 atatat SYSCTL_DESCR("Default maximum segment size"), 1920 1.86 atatat sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0, 1921 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL); 1922 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1923 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1924 1.136 rmind CTLTYPE_INT, "minmss", 1925 1.136 rmind SYSCTL_DESCR("Lower limit for TCP maximum segment size"), 1926 1.136 rmind NULL, 0, &tcp_minmss, 0, 1927 1.136 rmind CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1928 1.136 rmind sysctl_createv(clog, 0, NULL, NULL, 1929 1.136 rmind CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1930 1.156 darran CTLTYPE_INT, "msl", 1931 1.156 darran SYSCTL_DESCR("Maximum Segment Life"), 1932 1.156 darran NULL, 0, &tcp_msl, 0, 1933 1.156 darran CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSL, CTL_EOL); 1934 1.156 darran sysctl_createv(clog, 0, NULL, NULL, 1935 1.156 darran CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1936 1.92 atatat CTLTYPE_INT, "syn_cache_limit", 1937 1.92 atatat SYSCTL_DESCR("Maximum number of entries in the TCP " 1938 1.92 atatat "compressed state engine"), 1939 1.86 atatat NULL, 0, &tcp_syn_cache_limit, 0, 1940 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT, 1941 1.86 atatat CTL_EOL); 1942 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1943 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1944 1.92 atatat CTLTYPE_INT, "syn_bucket_limit", 1945 1.92 atatat SYSCTL_DESCR("Maximum number of entries per hash " 1946 1.92 atatat "bucket in the TCP compressed state " 1947 1.92 atatat "engine"), 1948 1.86 atatat NULL, 0, &tcp_syn_bucket_limit, 0, 1949 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT, 1950 1.86 atatat CTL_EOL); 1951 1.86 atatat #if 0 /* obsoleted */ 1952 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1953 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1954 1.92 atatat CTLTYPE_INT, "syn_cache_interval", 1955 1.92 atatat SYSCTL_DESCR("TCP compressed state engine's timer interval"), 1956 1.86 atatat NULL, 0, &tcp_syn_cache_interval, 0, 1957 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER, 1958 1.86 atatat CTL_EOL); 1959 1.86 atatat #endif 1960 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1961 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1962 1.92 atatat CTLTYPE_INT, "init_win", 1963 1.92 atatat SYSCTL_DESCR("Initial TCP congestion window"), 1964 1.166 christos sysctl_tcp_init_win, 0, &tcp_init_win, 0, 1965 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL); 1966 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1967 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1968 1.92 atatat CTLTYPE_INT, "mss_ifmtu", 1969 1.92 atatat SYSCTL_DESCR("Use interface MTU for calculating MSS"), 1970 1.86 atatat NULL, 0, &tcp_mss_ifmtu, 0, 1971 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL); 1972 1.102 kurahone sysctl_createv(clog, 0, NULL, &sack_node, 1973 1.102 kurahone CTLFLAG_PERMANENT, 1974 1.102 kurahone CTLTYPE_NODE, "sack", 1975 1.102 kurahone SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"), 1976 1.102 kurahone NULL, 0, NULL, 0, 1977 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL); 1978 1.128 rpaulo 1979 1.128 rpaulo /* Congctl subtree */ 1980 1.128 rpaulo sysctl_createv(clog, 0, NULL, &congctl_node, 1981 1.124 rpaulo CTLFLAG_PERMANENT, 1982 1.124 rpaulo CTLTYPE_NODE, "congctl", 1983 1.124 rpaulo SYSCTL_DESCR("TCP Congestion Control"), 1984 1.128 rpaulo NULL, 0, NULL, 0, 1985 1.124 rpaulo CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1986 1.128 rpaulo sysctl_createv(clog, 0, &congctl_node, NULL, 1987 1.124 rpaulo CTLFLAG_PERMANENT, 1988 1.124 rpaulo CTLTYPE_STRING, "available", 1989 1.124 rpaulo SYSCTL_DESCR("Available Congestion Control Mechanisms"), 1990 1.165 dsl NULL, 0, tcp_congctl_avail, 0, CTL_CREATE, CTL_EOL); 1991 1.128 rpaulo sysctl_createv(clog, 0, &congctl_node, NULL, 1992 1.124 rpaulo CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1993 1.124 rpaulo CTLTYPE_STRING, "selected", 1994 1.124 rpaulo SYSCTL_DESCR("Selected Congestion Control Mechanism"), 1995 1.124 rpaulo sysctl_tcp_congctl, 0, NULL, TCPCC_MAXLEN, 1996 1.124 rpaulo CTL_CREATE, CTL_EOL); 1997 1.124 rpaulo 1998 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 1999 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2000 1.92 atatat CTLTYPE_INT, "win_scale", 2001 1.92 atatat SYSCTL_DESCR("Use RFC1323 window scale options"), 2002 1.170 kefren sysctl_update_tcpcb_template, 0, &tcp_do_win_scale, 0, 2003 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL); 2004 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2005 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2006 1.92 atatat CTLTYPE_INT, "timestamps", 2007 1.92 atatat SYSCTL_DESCR("Use RFC1323 time stamp options"), 2008 1.170 kefren sysctl_update_tcpcb_template, 0, &tcp_do_timestamps, 0, 2009 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL); 2010 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2011 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2012 1.92 atatat CTLTYPE_INT, "cwm", 2013 1.92 atatat SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window " 2014 1.92 atatat "Monitoring"), 2015 1.86 atatat NULL, 0, &tcp_cwm, 0, 2016 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL); 2017 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2018 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2019 1.92 atatat CTLTYPE_INT, "cwm_burstsize", 2020 1.92 atatat SYSCTL_DESCR("Congestion Window Monitoring allowed " 2021 1.92 atatat "burst count in packets"), 2022 1.86 atatat NULL, 0, &tcp_cwm_burstsize, 0, 2023 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE, 2024 1.86 atatat CTL_EOL); 2025 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2026 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2027 1.92 atatat CTLTYPE_INT, "ack_on_push", 2028 1.92 atatat SYSCTL_DESCR("Immediately return ACK when PSH is " 2029 1.92 atatat "received"), 2030 1.86 atatat NULL, 0, &tcp_ack_on_push, 0, 2031 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL); 2032 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2033 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2034 1.92 atatat CTLTYPE_INT, "keepidle", 2035 1.92 atatat SYSCTL_DESCR("Allowed connection idle ticks before a " 2036 1.92 atatat "keepalive probe is sent"), 2037 1.132 christos sysctl_tcp_keep, 0, &tcp_keepidle, 0, 2038 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL); 2039 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2040 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2041 1.92 atatat CTLTYPE_INT, "keepintvl", 2042 1.92 atatat SYSCTL_DESCR("Ticks before next keepalive probe is sent"), 2043 1.132 christos sysctl_tcp_keep, 0, &tcp_keepintvl, 0, 2044 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL); 2045 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2046 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2047 1.92 atatat CTLTYPE_INT, "keepcnt", 2048 1.92 atatat SYSCTL_DESCR("Number of keepalive probes to send"), 2049 1.132 christos sysctl_tcp_keep, 0, &tcp_keepcnt, 0, 2050 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL); 2051 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2052 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 2053 1.92 atatat CTLTYPE_INT, "slowhz", 2054 1.92 atatat SYSCTL_DESCR("Keepalive ticks per second"), 2055 1.86 atatat NULL, PR_SLOWHZ, NULL, 0, 2056 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL); 2057 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2058 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2059 1.92 atatat CTLTYPE_INT, "log_refused", 2060 1.92 atatat SYSCTL_DESCR("Log refused TCP connections"), 2061 1.86 atatat NULL, 0, &tcp_log_refused, 0, 2062 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL); 2063 1.86 atatat #if 0 /* obsoleted */ 2064 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2065 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2066 1.86 atatat CTLTYPE_INT, "rstratelimit", NULL, 2067 1.86 atatat NULL, 0, &tcp_rst_ratelim, 0, 2068 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL); 2069 1.86 atatat #endif 2070 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2071 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2072 1.92 atatat CTLTYPE_INT, "rstppslimit", 2073 1.92 atatat SYSCTL_DESCR("Maximum number of RST packets to send " 2074 1.92 atatat "per second"), 2075 1.86 atatat NULL, 0, &tcp_rst_ppslim, 0, 2076 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL); 2077 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2078 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2079 1.92 atatat CTLTYPE_INT, "delack_ticks", 2080 1.92 atatat SYSCTL_DESCR("Number of ticks to delay sending an ACK"), 2081 1.86 atatat NULL, 0, &tcp_delack_ticks, 0, 2082 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL); 2083 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2084 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2085 1.92 atatat CTLTYPE_INT, "init_win_local", 2086 1.92 atatat SYSCTL_DESCR("Initial TCP window size (in segments)"), 2087 1.166 christos sysctl_tcp_init_win, 0, &tcp_init_win_local, 0, 2088 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL, 2089 1.86 atatat CTL_EOL); 2090 1.87 atatat sysctl_createv(clog, 0, NULL, NULL, 2091 1.87 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2092 1.92 atatat CTLTYPE_STRUCT, "ident", 2093 1.92 atatat SYSCTL_DESCR("RFC1413 Identification Protocol lookups"), 2094 1.86 atatat sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t), 2095 1.86 atatat CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL); 2096 1.93 thorpej sysctl_createv(clog, 0, NULL, NULL, 2097 1.93 thorpej CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2098 1.93 thorpej CTLTYPE_INT, "do_loopback_cksum", 2099 1.93 thorpej SYSCTL_DESCR("Perform TCP checksum on loopback"), 2100 1.93 thorpej NULL, 0, &tcp_do_loopback_cksum, 0, 2101 1.93 thorpej CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM, 2102 1.93 thorpej CTL_EOL); 2103 1.97 atatat sysctl_createv(clog, 0, NULL, NULL, 2104 1.97 atatat CTLFLAG_PERMANENT, 2105 1.100 atatat CTLTYPE_STRUCT, "pcblist", 2106 1.97 atatat SYSCTL_DESCR("TCP protocol control block list"), 2107 1.97 atatat sysctl_inpcblist, 0, &tcbtable, 0, 2108 1.97 atatat CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 2109 1.97 atatat CTL_EOL); 2110 1.132 christos sysctl_createv(clog, 0, NULL, NULL, 2111 1.132 christos CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2112 1.132 christos CTLTYPE_INT, "keepinit", 2113 1.132 christos SYSCTL_DESCR("Ticks before initial tcp connection times out"), 2114 1.132 christos sysctl_tcp_keep, 0, &tcp_keepinit, 0, 2115 1.132 christos CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2116 1.102 kurahone 2117 1.136 rmind /* TCP socket buffers auto-sizing nodes */ 2118 1.136 rmind sysctl_createv(clog, 0, NULL, NULL, 2119 1.136 rmind CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2120 1.136 rmind CTLTYPE_INT, "recvbuf_auto", 2121 1.136 rmind SYSCTL_DESCR("Enable automatic receive " 2122 1.136 rmind "buffer sizing (experimental)"), 2123 1.136 rmind NULL, 0, &tcp_do_autorcvbuf, 0, 2124 1.136 rmind CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2125 1.136 rmind sysctl_createv(clog, 0, NULL, NULL, 2126 1.136 rmind CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2127 1.136 rmind CTLTYPE_INT, "recvbuf_inc", 2128 1.136 rmind SYSCTL_DESCR("Incrementor step size of " 2129 1.136 rmind "automatic receive buffer"), 2130 1.136 rmind NULL, 0, &tcp_autorcvbuf_inc, 0, 2131 1.136 rmind CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2132 1.136 rmind sysctl_createv(clog, 0, NULL, NULL, 2133 1.136 rmind CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2134 1.136 rmind CTLTYPE_INT, "recvbuf_max", 2135 1.136 rmind SYSCTL_DESCR("Max size of automatic receive buffer"), 2136 1.136 rmind NULL, 0, &tcp_autorcvbuf_max, 0, 2137 1.136 rmind CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2138 1.136 rmind 2139 1.136 rmind sysctl_createv(clog, 0, NULL, NULL, 2140 1.136 rmind CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2141 1.136 rmind CTLTYPE_INT, "sendbuf_auto", 2142 1.136 rmind SYSCTL_DESCR("Enable automatic send " 2143 1.136 rmind "buffer sizing (experimental)"), 2144 1.136 rmind NULL, 0, &tcp_do_autosndbuf, 0, 2145 1.136 rmind CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2146 1.136 rmind sysctl_createv(clog, 0, NULL, NULL, 2147 1.136 rmind CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2148 1.136 rmind CTLTYPE_INT, "sendbuf_inc", 2149 1.136 rmind SYSCTL_DESCR("Incrementor step size of " 2150 1.136 rmind "automatic send buffer"), 2151 1.136 rmind NULL, 0, &tcp_autosndbuf_inc, 0, 2152 1.136 rmind CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2153 1.136 rmind sysctl_createv(clog, 0, NULL, NULL, 2154 1.136 rmind CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2155 1.136 rmind CTLTYPE_INT, "sendbuf_max", 2156 1.136 rmind SYSCTL_DESCR("Max size of automatic send buffer"), 2157 1.136 rmind NULL, 0, &tcp_autosndbuf_max, 0, 2158 1.136 rmind CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2159 1.136 rmind 2160 1.128 rpaulo /* ECN subtree */ 2161 1.128 rpaulo sysctl_createv(clog, 0, NULL, &ecn_node, 2162 1.128 rpaulo CTLFLAG_PERMANENT, 2163 1.128 rpaulo CTLTYPE_NODE, "ecn", 2164 1.128 rpaulo SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"), 2165 1.128 rpaulo NULL, 0, NULL, 0, 2166 1.128 rpaulo CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2167 1.128 rpaulo sysctl_createv(clog, 0, &ecn_node, NULL, 2168 1.120 rpaulo CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2169 1.120 rpaulo CTLTYPE_INT, "enable", 2170 1.120 rpaulo SYSCTL_DESCR("Enable TCP Explicit Congestion " 2171 1.120 rpaulo "Notification"), 2172 1.128 rpaulo NULL, 0, &tcp_do_ecn, 0, CTL_CREATE, CTL_EOL); 2173 1.128 rpaulo sysctl_createv(clog, 0, &ecn_node, NULL, 2174 1.120 rpaulo CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2175 1.120 rpaulo CTLTYPE_INT, "maxretries", 2176 1.120 rpaulo SYSCTL_DESCR("Number of times to retry ECN setup " 2177 1.120 rpaulo "before disabling ECN on the connection"), 2178 1.128 rpaulo NULL, 0, &tcp_ecn_maxretries, 0, CTL_CREATE, CTL_EOL); 2179 1.221 maxv 2180 1.201 snj /* SACK gets its own little subtree. */ 2181 1.102 kurahone sysctl_createv(clog, 0, NULL, &sack_node, 2182 1.102 kurahone CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2183 1.102 kurahone CTLTYPE_INT, "enable", 2184 1.102 kurahone SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"), 2185 1.102 kurahone NULL, 0, &tcp_do_sack, 0, 2186 1.102 kurahone CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2187 1.102 kurahone sysctl_createv(clog, 0, NULL, &sack_node, 2188 1.102 kurahone CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2189 1.102 kurahone CTLTYPE_INT, "maxholes", 2190 1.102 kurahone SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"), 2191 1.102 kurahone NULL, 0, &tcp_sack_tp_maxholes, 0, 2192 1.102 kurahone CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2193 1.102 kurahone sysctl_createv(clog, 0, NULL, &sack_node, 2194 1.102 kurahone CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2195 1.102 kurahone CTLTYPE_INT, "globalmaxholes", 2196 1.102 kurahone SYSCTL_DESCR("Global maximum number of TCP SACK holes"), 2197 1.102 kurahone NULL, 0, &tcp_sack_globalmaxholes, 0, 2198 1.102 kurahone CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2199 1.102 kurahone sysctl_createv(clog, 0, NULL, &sack_node, 2200 1.102 kurahone CTLFLAG_PERMANENT, 2201 1.102 kurahone CTLTYPE_INT, "globalholes", 2202 1.102 kurahone SYSCTL_DESCR("Global number of TCP SACK holes"), 2203 1.102 kurahone NULL, 0, &tcp_sack_globalholes, 0, 2204 1.102 kurahone CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2205 1.107 elad 2206 1.107 elad sysctl_createv(clog, 0, NULL, NULL, 2207 1.107 elad CTLFLAG_PERMANENT, 2208 1.107 elad CTLTYPE_STRUCT, "stats", 2209 1.107 elad SYSCTL_DESCR("TCP statistics"), 2210 1.142 thorpej sysctl_net_inet_tcp_stats, 0, NULL, 0, 2211 1.107 elad CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS, 2212 1.107 elad CTL_EOL); 2213 1.159 dyoung sysctl_createv(clog, 0, NULL, NULL, 2214 1.159 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2215 1.159 dyoung CTLTYPE_INT, "local_by_rtt", 2216 1.159 dyoung SYSCTL_DESCR("Use RTT estimator to decide which hosts " 2217 1.159 dyoung "are local"), 2218 1.159 dyoung NULL, 0, &tcp_rttlocal, 0, 2219 1.159 dyoung CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2220 1.109 rpaulo #ifdef TCP_DEBUG 2221 1.109 rpaulo sysctl_createv(clog, 0, NULL, NULL, 2222 1.109 rpaulo CTLFLAG_PERMANENT, 2223 1.109 rpaulo CTLTYPE_STRUCT, "debug", 2224 1.109 rpaulo SYSCTL_DESCR("TCP sockets debug information"), 2225 1.109 rpaulo NULL, 0, &tcp_debug, sizeof(tcp_debug), 2226 1.109 rpaulo CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG, 2227 1.109 rpaulo CTL_EOL); 2228 1.109 rpaulo sysctl_createv(clog, 0, NULL, NULL, 2229 1.109 rpaulo CTLFLAG_PERMANENT, 2230 1.109 rpaulo CTLTYPE_INT, "debx", 2231 1.110 rpaulo SYSCTL_DESCR("Number of TCP debug sockets messages"), 2232 1.109 rpaulo NULL, 0, &tcp_debx, sizeof(tcp_debx), 2233 1.109 rpaulo CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX, 2234 1.109 rpaulo CTL_EOL); 2235 1.109 rpaulo #endif 2236 1.133 christos sysctl_createv(clog, 0, NULL, NULL, 2237 1.133 christos CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2238 1.133 christos CTLTYPE_STRUCT, "drop", 2239 1.133 christos SYSCTL_DESCR("TCP drop connection"), 2240 1.133 christos sysctl_net_inet_tcp_drop, 0, NULL, 0, 2241 1.133 christos CTL_NET, pf, IPPROTO_TCP, TCPCTL_DROP, CTL_EOL); 2242 1.126 rpaulo sysctl_createv(clog, 0, NULL, NULL, 2243 1.126 rpaulo CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2244 1.126 rpaulo CTLTYPE_INT, "iss_hash", 2245 1.126 rpaulo SYSCTL_DESCR("Enable RFC 1948 ISS by cryptographic " 2246 1.126 rpaulo "hash computation"), 2247 1.126 rpaulo NULL, 0, &tcp_do_rfc1948, sizeof(tcp_do_rfc1948), 2248 1.126 rpaulo CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 2249 1.126 rpaulo CTL_EOL); 2250 1.109 rpaulo 2251 1.127 yamt /* ABC subtree */ 2252 1.127 yamt 2253 1.127 yamt sysctl_createv(clog, 0, NULL, &abc_node, 2254 1.127 yamt CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc", 2255 1.127 yamt SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"), 2256 1.127 yamt NULL, 0, NULL, 0, 2257 1.127 yamt CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2258 1.127 yamt sysctl_createv(clog, 0, &abc_node, NULL, 2259 1.127 yamt CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2260 1.127 yamt CTLTYPE_INT, "enable", 2261 1.127 yamt SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"), 2262 1.127 yamt NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL); 2263 1.127 yamt sysctl_createv(clog, 0, &abc_node, NULL, 2264 1.127 yamt CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2265 1.127 yamt CTLTYPE_INT, "aggressive", 2266 1.127 yamt SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"), 2267 1.127 yamt NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL); 2268 1.159 dyoung 2269 1.159 dyoung /* MSL tuning subtree */ 2270 1.159 dyoung 2271 1.159 dyoung sysctl_createv(clog, 0, NULL, &mslt_node, 2272 1.159 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE, "mslt", 2273 1.159 dyoung SYSCTL_DESCR("MSL Tuning for TIME_WAIT truncation"), 2274 1.159 dyoung NULL, 0, NULL, 0, 2275 1.159 dyoung CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2276 1.159 dyoung sysctl_createv(clog, 0, &mslt_node, NULL, 2277 1.159 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2278 1.159 dyoung CTLTYPE_INT, "enable", 2279 1.159 dyoung SYSCTL_DESCR("Enable TIME_WAIT truncation"), 2280 1.159 dyoung NULL, 0, &tcp_msl_enable, 0, CTL_CREATE, CTL_EOL); 2281 1.159 dyoung sysctl_createv(clog, 0, &mslt_node, NULL, 2282 1.159 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2283 1.159 dyoung CTLTYPE_INT, "loopback", 2284 1.159 dyoung SYSCTL_DESCR("MSL value to use for loopback connections"), 2285 1.159 dyoung NULL, 0, &tcp_msl_loop, 0, CTL_CREATE, CTL_EOL); 2286 1.159 dyoung sysctl_createv(clog, 0, &mslt_node, NULL, 2287 1.159 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2288 1.159 dyoung CTLTYPE_INT, "local", 2289 1.159 dyoung SYSCTL_DESCR("MSL value to use for local connections"), 2290 1.159 dyoung NULL, 0, &tcp_msl_local, 0, CTL_CREATE, CTL_EOL); 2291 1.159 dyoung sysctl_createv(clog, 0, &mslt_node, NULL, 2292 1.159 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2293 1.159 dyoung CTLTYPE_INT, "remote", 2294 1.159 dyoung SYSCTL_DESCR("MSL value to use for remote connections"), 2295 1.159 dyoung NULL, 0, &tcp_msl_remote, 0, CTL_CREATE, CTL_EOL); 2296 1.159 dyoung sysctl_createv(clog, 0, &mslt_node, NULL, 2297 1.159 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2298 1.159 dyoung CTLTYPE_INT, "remote_threshold", 2299 1.221 maxv SYSCTL_DESCR("RTT estimate value to promote local to remote"), 2300 1.159 dyoung NULL, 0, &tcp_msl_remote_threshold, 0, CTL_CREATE, CTL_EOL); 2301 1.159 dyoung 2302 1.159 dyoung /* vestigial TIME_WAIT tuning subtree */ 2303 1.159 dyoung 2304 1.159 dyoung sysctl_createv(clog, 0, NULL, &vtw_node, 2305 1.159 dyoung CTLFLAG_PERMANENT, CTLTYPE_NODE, "vtw", 2306 1.159 dyoung SYSCTL_DESCR("Tuning for Vestigial TIME_WAIT"), 2307 1.159 dyoung NULL, 0, NULL, 0, 2308 1.159 dyoung CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2309 1.159 dyoung sysctl_createv(clog, 0, &vtw_node, NULL, 2310 1.159 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2311 1.159 dyoung CTLTYPE_INT, "enable", 2312 1.159 dyoung SYSCTL_DESCR("Enable Vestigial TIME_WAIT"), 2313 1.160 dyoung sysctl_tcp_vtw_enable, 0, 2314 1.159 dyoung (pf == AF_INET) ? &tcp4_vtw_enable : &tcp6_vtw_enable, 2315 1.159 dyoung 0, CTL_CREATE, CTL_EOL); 2316 1.159 dyoung sysctl_createv(clog, 0, &vtw_node, NULL, 2317 1.159 dyoung CTLFLAG_PERMANENT|CTLFLAG_READONLY, 2318 1.159 dyoung CTLTYPE_INT, "entries", 2319 1.159 dyoung SYSCTL_DESCR("Maximum number of vestigial TIME_WAIT entries"), 2320 1.159 dyoung NULL, 0, &tcp_vtw_entries, 0, CTL_CREATE, CTL_EOL); 2321 1.86 atatat } 2322 1.86 atatat 2323 1.157 pooka void 2324 1.157 pooka tcp_usrreq_init(void) 2325 1.76 christos { 2326 1.76 christos 2327 1.157 pooka sysctl_net_inet_tcp_setup2(NULL, PF_INET, "inet", "tcp"); 2328 1.86 atatat #ifdef INET6 2329 1.157 pooka sysctl_net_inet_tcp_setup2(NULL, PF_INET6, "inet6", "tcp6"); 2330 1.157 pooka #endif 2331 1.1 cgd } 2332 1.172 rmind 2333 1.174 rmind PR_WRAP_USRREQS(tcp) 2334 1.174 rmind #define tcp_attach tcp_attach_wrapper 2335 1.174 rmind #define tcp_detach tcp_detach_wrapper 2336 1.186 rtr #define tcp_accept tcp_accept_wrapper 2337 1.190 rtr #define tcp_bind tcp_bind_wrapper 2338 1.190 rtr #define tcp_listen tcp_listen_wrapper 2339 1.193 rtr #define tcp_connect tcp_connect_wrapper 2340 1.200 rtr #define tcp_connect2 tcp_connect2_wrapper 2341 1.194 rtr #define tcp_disconnect tcp_disconnect_wrapper 2342 1.194 rtr #define tcp_shutdown tcp_shutdown_wrapper 2343 1.194 rtr #define tcp_abort tcp_abort_wrapper 2344 1.178 rtr #define tcp_ioctl tcp_ioctl_wrapper 2345 1.181 rtr #define tcp_stat tcp_stat_wrapper 2346 1.185 rtr #define tcp_peeraddr tcp_peeraddr_wrapper 2347 1.185 rtr #define tcp_sockaddr tcp_sockaddr_wrapper 2348 1.199 rtr #define tcp_rcvd tcp_rcvd_wrapper 2349 1.189 rtr #define tcp_recvoob tcp_recvoob_wrapper 2350 1.198 rtr #define tcp_send tcp_send_wrapper 2351 1.189 rtr #define tcp_sendoob tcp_sendoob_wrapper 2352 1.200 rtr #define tcp_purgeif tcp_purgeif_wrapper 2353 1.172 rmind 2354 1.172 rmind const struct pr_usrreqs tcp_usrreqs = { 2355 1.173 rmind .pr_attach = tcp_attach, 2356 1.173 rmind .pr_detach = tcp_detach, 2357 1.186 rtr .pr_accept = tcp_accept, 2358 1.190 rtr .pr_bind = tcp_bind, 2359 1.190 rtr .pr_listen = tcp_listen, 2360 1.193 rtr .pr_connect = tcp_connect, 2361 1.200 rtr .pr_connect2 = tcp_connect2, 2362 1.194 rtr .pr_disconnect = tcp_disconnect, 2363 1.194 rtr .pr_shutdown = tcp_shutdown, 2364 1.194 rtr .pr_abort = tcp_abort, 2365 1.178 rtr .pr_ioctl = tcp_ioctl, 2366 1.181 rtr .pr_stat = tcp_stat, 2367 1.185 rtr .pr_peeraddr = tcp_peeraddr, 2368 1.185 rtr .pr_sockaddr = tcp_sockaddr, 2369 1.199 rtr .pr_rcvd = tcp_rcvd, 2370 1.189 rtr .pr_recvoob = tcp_recvoob, 2371 1.198 rtr .pr_send = tcp_send, 2372 1.189 rtr .pr_sendoob = tcp_sendoob, 2373 1.200 rtr .pr_purgeif = tcp_purgeif, 2374 1.172 rmind }; 2375