1 1.222 rillig /* $NetBSD: tcp_output.c,v 1.222 2024/09/08 09:36:52 rillig Exp $ */ 2 1.48 itojun 3 1.48 itojun /* 4 1.48 itojun * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 1.48 itojun * All rights reserved. 6 1.82 itojun * 7 1.48 itojun * Redistribution and use in source and binary forms, with or without 8 1.48 itojun * modification, are permitted provided that the following conditions 9 1.48 itojun * are met: 10 1.48 itojun * 1. Redistributions of source code must retain the above copyright 11 1.48 itojun * notice, this list of conditions and the following disclaimer. 12 1.48 itojun * 2. Redistributions in binary form must reproduce the above copyright 13 1.48 itojun * notice, this list of conditions and the following disclaimer in the 14 1.48 itojun * documentation and/or other materials provided with the distribution. 15 1.48 itojun * 3. Neither the name of the project nor the names of its contributors 16 1.48 itojun * may be used to endorse or promote products derived from this software 17 1.48 itojun * without specific prior written permission. 18 1.82 itojun * 19 1.48 itojun * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 1.48 itojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 1.48 itojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 1.48 itojun * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 1.48 itojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 1.48 itojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 1.48 itojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 1.48 itojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 1.48 itojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 1.48 itojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 1.48 itojun * SUCH DAMAGE. 30 1.48 itojun */ 31 1.28 thorpej 32 1.77 itojun /* 33 1.77 itojun * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 34 1.82 itojun * 35 1.77 itojun * NRL grants permission for redistribution and use in source and binary 36 1.77 itojun * forms, with or without modification, of the software and documentation 37 1.77 itojun * created at NRL provided that the following conditions are met: 38 1.82 itojun * 39 1.77 itojun * 1. Redistributions of source code must retain the above copyright 40 1.77 itojun * notice, this list of conditions and the following disclaimer. 41 1.77 itojun * 2. Redistributions in binary form must reproduce the above copyright 42 1.77 itojun * notice, this list of conditions and the following disclaimer in the 43 1.77 itojun * documentation and/or other materials provided with the distribution. 44 1.77 itojun * 3. All advertising materials mentioning features or use of this software 45 1.77 itojun * must display the following acknowledgements: 46 1.77 itojun * This product includes software developed by the University of 47 1.77 itojun * California, Berkeley and its contributors. 48 1.77 itojun * This product includes software developed at the Information 49 1.77 itojun * Technology Division, US Naval Research Laboratory. 50 1.77 itojun * 4. Neither the name of the NRL nor the names of its contributors 51 1.77 itojun * may be used to endorse or promote products derived from this software 52 1.77 itojun * without specific prior written permission. 53 1.82 itojun * 54 1.77 itojun * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 55 1.77 itojun * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 56 1.77 itojun * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 57 1.77 itojun * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 58 1.77 itojun * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 59 1.77 itojun * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 60 1.77 itojun * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 61 1.77 itojun * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 62 1.77 itojun * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 63 1.77 itojun * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 64 1.77 itojun * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 1.82 itojun * 66 1.77 itojun * The views and conclusions contained in the software and documentation 67 1.77 itojun * are those of the authors and should not be interpreted as representing 68 1.77 itojun * official policies, either expressed or implied, of the US Naval 69 1.77 itojun * Research Laboratory (NRL). 70 1.77 itojun */ 71 1.77 itojun 72 1.28 thorpej /*- 73 1.143 rpaulo * Copyright (c) 1997, 1998, 2001, 2005, 2006 The NetBSD Foundation, Inc. 74 1.28 thorpej * All rights reserved. 75 1.28 thorpej * 76 1.28 thorpej * This code is derived from software contributed to The NetBSD Foundation 77 1.28 thorpej * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 78 1.28 thorpej * Facility, NASA Ames Research Center. 79 1.119 mycroft * This code is derived from software contributed to The NetBSD Foundation 80 1.119 mycroft * by Charles M. Hannum. 81 1.143 rpaulo * This code is derived from software contributed to The NetBSD Foundation 82 1.143 rpaulo * by Rui Paulo. 83 1.28 thorpej * 84 1.28 thorpej * Redistribution and use in source and binary forms, with or without 85 1.28 thorpej * modification, are permitted provided that the following conditions 86 1.28 thorpej * are met: 87 1.28 thorpej * 1. Redistributions of source code must retain the above copyright 88 1.28 thorpej * notice, this list of conditions and the following disclaimer. 89 1.28 thorpej * 2. Redistributions in binary form must reproduce the above copyright 90 1.28 thorpej * notice, this list of conditions and the following disclaimer in the 91 1.28 thorpej * documentation and/or other materials provided with the distribution. 92 1.28 thorpej * 93 1.28 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 94 1.28 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 95 1.28 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 96 1.28 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 97 1.28 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 98 1.28 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 99 1.28 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 100 1.28 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 101 1.28 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 1.28 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 103 1.28 thorpej * POSSIBILITY OF SUCH DAMAGE. 104 1.28 thorpej */ 105 1.10 cgd 106 1.1 cgd /* 107 1.27 thorpej * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 108 1.9 mycroft * The Regents of the University of California. All rights reserved. 109 1.1 cgd * 110 1.1 cgd * Redistribution and use in source and binary forms, with or without 111 1.1 cgd * modification, are permitted provided that the following conditions 112 1.1 cgd * are met: 113 1.1 cgd * 1. Redistributions of source code must retain the above copyright 114 1.1 cgd * notice, this list of conditions and the following disclaimer. 115 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 116 1.1 cgd * notice, this list of conditions and the following disclaimer in the 117 1.1 cgd * documentation and/or other materials provided with the distribution. 118 1.97 agc * 3. Neither the name of the University nor the names of its contributors 119 1.1 cgd * may be used to endorse or promote products derived from this software 120 1.1 cgd * without specific prior written permission. 121 1.1 cgd * 122 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 123 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 124 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 125 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 126 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 127 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 128 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 129 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 130 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 131 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 132 1.1 cgd * SUCH DAMAGE. 133 1.1 cgd * 134 1.27 thorpej * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95 135 1.1 cgd */ 136 1.75 lukem 137 1.75 lukem #include <sys/cdefs.h> 138 1.222 rillig __KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.222 2024/09/08 09:36:52 rillig Exp $"); 139 1.1 cgd 140 1.185 pooka #ifdef _KERNEL_OPT 141 1.48 itojun #include "opt_inet.h" 142 1.51 thorpej #include "opt_ipsec.h" 143 1.67 abs #include "opt_tcp_debug.h" 144 1.185 pooka #endif 145 1.48 itojun 146 1.4 mycroft #include <sys/param.h> 147 1.4 mycroft #include <sys/systm.h> 148 1.4 mycroft #include <sys/mbuf.h> 149 1.4 mycroft #include <sys/protosw.h> 150 1.4 mycroft #include <sys/socket.h> 151 1.4 mycroft #include <sys/socketvar.h> 152 1.4 mycroft #include <sys/errno.h> 153 1.48 itojun #include <sys/domain.h> 154 1.74 thorpej #include <sys/kernel.h> 155 1.113 itojun #ifdef TCP_SIGNATURE 156 1.113 itojun #include <sys/md5.h> 157 1.113 itojun #endif 158 1.1 cgd 159 1.17 thorpej #include <net/if.h> 160 1.4 mycroft #include <net/route.h> 161 1.1 cgd 162 1.4 mycroft #include <netinet/in.h> 163 1.4 mycroft #include <netinet/in_systm.h> 164 1.4 mycroft #include <netinet/ip.h> 165 1.4 mycroft #include <netinet/in_pcb.h> 166 1.4 mycroft #include <netinet/ip_var.h> 167 1.48 itojun 168 1.48 itojun #ifdef INET6 169 1.48 itojun #include <netinet/ip6.h> 170 1.81 itojun #include <netinet6/in6_var.h> 171 1.81 itojun #include <netinet6/ip6_var.h> 172 1.48 itojun #include <netinet6/in6_pcb.h> 173 1.81 itojun #include <netinet6/nd6.h> 174 1.87 itojun #endif 175 1.87 itojun 176 1.175 christos #ifdef IPSEC 177 1.98 jonathan #include <netipsec/ipsec.h> 178 1.114 jonathan #include <netipsec/key.h> 179 1.154 degroote #ifdef INET6 180 1.154 degroote #include <netipsec/ipsec6.h> 181 1.154 degroote #endif 182 1.200 maxv #endif 183 1.48 itojun 184 1.4 mycroft #include <netinet/tcp.h> 185 1.1 cgd #define TCPOUTFLAGS 186 1.4 mycroft #include <netinet/tcp_fsm.h> 187 1.4 mycroft #include <netinet/tcp_seq.h> 188 1.4 mycroft #include <netinet/tcp_timer.h> 189 1.4 mycroft #include <netinet/tcp_var.h> 190 1.166 thorpej #include <netinet/tcp_private.h> 191 1.149 rpaulo #include <netinet/tcp_congctl.h> 192 1.4 mycroft #include <netinet/tcp_debug.h> 193 1.131 yamt #include <netinet/in_offload.h> 194 1.151 yamt #include <netinet6/in6_offload.h> 195 1.14 christos 196 1.33 thorpej /* 197 1.136 drochner * Knob to enable Congestion Window Monitoring, and control 198 1.36 thorpej * the burst size it allows. Default burst is 4 packets, per 199 1.36 thorpej * the Internet draft. 200 1.33 thorpej */ 201 1.83 thorpej int tcp_cwm = 0; 202 1.36 thorpej int tcp_cwm_burstsize = 4; 203 1.33 thorpej 204 1.169 pooka int tcp_do_autosndbuf = 1; 205 1.160 rmind int tcp_autosndbuf_inc = 8 * 1024; 206 1.160 rmind int tcp_autosndbuf_max = 256 * 1024; 207 1.160 rmind 208 1.79 thorpej #ifdef TCP_OUTPUT_COUNTERS 209 1.79 thorpej #include <sys/device.h> 210 1.79 thorpej 211 1.79 thorpej extern struct evcnt tcp_output_bigheader; 212 1.104 enami extern struct evcnt tcp_output_predict_hit; 213 1.104 enami extern struct evcnt tcp_output_predict_miss; 214 1.79 thorpej extern struct evcnt tcp_output_copysmall; 215 1.79 thorpej extern struct evcnt tcp_output_copybig; 216 1.79 thorpej extern struct evcnt tcp_output_refbig; 217 1.79 thorpej 218 1.79 thorpej #define TCP_OUTPUT_COUNTER_INCR(ev) (ev)->ev_count++ 219 1.79 thorpej #else 220 1.79 thorpej 221 1.79 thorpej #define TCP_OUTPUT_COUNTER_INCR(ev) /* nothing */ 222 1.79 thorpej 223 1.79 thorpej #endif /* TCP_OUTPUT_COUNTERS */ 224 1.79 thorpej 225 1.201 maxv static int 226 1.151 yamt tcp_segsize(struct tcpcb *tp, int *txsegsizep, int *rxsegsizep, 227 1.155 thorpej bool *alwaysfragp) 228 1.17 thorpej { 229 1.17 thorpej struct inpcb *inp = tp->t_inpcb; 230 1.88 scw struct socket *so = NULL; 231 1.17 thorpej struct rtentry *rt; 232 1.17 thorpej struct ifnet *ifp; 233 1.17 thorpej int size; 234 1.142 seanb int hdrlen; 235 1.76 jmcneill int optlen; 236 1.48 itojun 237 1.156 thorpej *alwaysfragp = false; 238 1.203 maxv size = tcp_mssdflt; 239 1.151 yamt 240 1.48 itojun switch (tp->t_family) { 241 1.48 itojun case AF_INET: 242 1.142 seanb hdrlen = sizeof(struct ip) + sizeof(struct tcphdr); 243 1.48 itojun break; 244 1.48 itojun #ifdef INET6 245 1.48 itojun case AF_INET6: 246 1.142 seanb hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 247 1.48 itojun break; 248 1.48 itojun #endif 249 1.48 itojun default: 250 1.212 mlelstv hdrlen = 1; /* prevent zero sized segments */ 251 1.48 itojun goto out; 252 1.48 itojun } 253 1.17 thorpej 254 1.217 ozaki rt = inpcb_rtentry(inp); 255 1.215 ozaki so = inp->inp_socket; 256 1.48 itojun if (rt == NULL) { 257 1.17 thorpej goto out; 258 1.17 thorpej } 259 1.17 thorpej 260 1.17 thorpej ifp = rt->rt_ifp; 261 1.17 thorpej 262 1.107 itojun if (tp->t_mtudisc && rt->rt_rmx.rmx_mtu != 0) { 263 1.107 itojun #ifdef INET6 264 1.215 ozaki if (inp->inp_af == AF_INET6 && rt->rt_rmx.rmx_mtu < IPV6_MMTU) { 265 1.107 itojun /* 266 1.107 itojun * RFC2460 section 5, last paragraph: if path MTU is 267 1.107 itojun * smaller than 1280, use 1280 as packet size and 268 1.107 itojun * attach fragment header. 269 1.107 itojun */ 270 1.142 seanb size = IPV6_MMTU - hdrlen - sizeof(struct ip6_frag); 271 1.156 thorpej *alwaysfragp = true; 272 1.107 itojun } else 273 1.142 seanb size = rt->rt_rmx.rmx_mtu - hdrlen; 274 1.107 itojun #else 275 1.142 seanb size = rt->rt_rmx.rmx_mtu - hdrlen; 276 1.107 itojun #endif 277 1.107 itojun } else if (ifp->if_flags & IFF_LOOPBACK) 278 1.142 seanb size = ifp->if_mtu - hdrlen; 279 1.216 ozaki else if (inp->inp_af == AF_INET && tp->t_mtudisc) 280 1.142 seanb size = ifp->if_mtu - hdrlen; 281 1.216 ozaki else if (inp->inp_af == AF_INET && in_localaddr(in4p_faddr(inp))) 282 1.142 seanb size = ifp->if_mtu - hdrlen; 283 1.48 itojun #ifdef INET6 284 1.215 ozaki else if (inp->inp_af == AF_INET6) { 285 1.216 ozaki if (IN6_IS_ADDR_V4MAPPED(&in6p_faddr(inp))) { 286 1.48 itojun /* mapped addr case */ 287 1.48 itojun struct in_addr d; 288 1.216 ozaki memcpy(&d, &in6p_faddr(inp).s6_addr32[3], sizeof(d)); 289 1.80 itojun if (tp->t_mtudisc || in_localaddr(d)) 290 1.142 seanb size = ifp->if_mtu - hdrlen; 291 1.200 maxv } else { 292 1.65 itojun /* 293 1.65 itojun * for IPv6, path MTU discovery is always turned on, 294 1.65 itojun * or the node must use packet size <= 1280. 295 1.65 itojun */ 296 1.213 roy size = tp->t_mtudisc ? ifp->if_mtu : IPV6_MMTU; 297 1.142 seanb size -= hdrlen; 298 1.48 itojun } 299 1.48 itojun } 300 1.48 itojun #endif 301 1.217 ozaki inpcb_rtentry_unref(rt, inp); 302 1.78 thorpej out: 303 1.76 jmcneill /* 304 1.76 jmcneill * Now we must make room for whatever extra TCP/IP options are in 305 1.76 jmcneill * the packet. 306 1.76 jmcneill */ 307 1.76 jmcneill optlen = tcp_optlen(tp); 308 1.76 jmcneill 309 1.48 itojun /* 310 1.48 itojun * XXX tp->t_ourmss should have the right size, but without this code 311 1.48 itojun * fragmentation will occur... need more investigation 312 1.48 itojun */ 313 1.200 maxv 314 1.215 ozaki if (inp->inp_af == AF_INET) { 315 1.175 christos #if defined(IPSEC) 316 1.176 christos if (ipsec_used && 317 1.196 ozaki !ipsec_pcb_skip_ipsec(inp->inp_sp, IPSEC_DIR_OUTBOUND)) 318 1.108 thorpej optlen += ipsec4_hdrsiz_tcp(tp); 319 1.48 itojun #endif 320 1.76 jmcneill optlen += ip_optlen(inp); 321 1.48 itojun } 322 1.200 maxv 323 1.48 itojun #ifdef INET6 324 1.215 ozaki if (inp->inp_af == AF_INET6 && tp->t_family == AF_INET) { 325 1.175 christos #if defined(IPSEC) 326 1.176 christos if (ipsec_used && 327 1.215 ozaki !ipsec_pcb_skip_ipsec(inp->inp_sp, IPSEC_DIR_OUTBOUND)) 328 1.108 thorpej optlen += ipsec4_hdrsiz_tcp(tp); 329 1.48 itojun #endif 330 1.48 itojun /* XXX size -= ip_optlen(in6p); */ 331 1.215 ozaki } else if (inp->inp_af == AF_INET6) { 332 1.175 christos #if defined(IPSEC) 333 1.176 christos if (ipsec_used && 334 1.215 ozaki !ipsec_pcb_skip_ipsec(inp->inp_sp, IPSEC_DIR_OUTBOUND)) 335 1.108 thorpej optlen += ipsec6_hdrsiz_tcp(tp); 336 1.48 itojun #endif 337 1.215 ozaki optlen += ip6_optlen(inp); 338 1.48 itojun } 339 1.48 itojun #endif 340 1.76 jmcneill size -= optlen; 341 1.17 thorpej 342 1.204 maxv /* 343 1.204 maxv * There may not be any room for data if mtu is too small. This 344 1.204 maxv * includes zero-sized. 345 1.204 maxv */ 346 1.204 maxv if (size <= 0) { 347 1.201 maxv return EMSGSIZE; 348 1.204 maxv } 349 1.107 itojun 350 1.52 itojun /* 351 1.52 itojun * *rxsegsizep holds *estimated* inbound segment size (estimation 352 1.52 itojun * assumes that path MTU is the same for both ways). this is only 353 1.52 itojun * for silly window avoidance, do not use the value for other purposes. 354 1.52 itojun * 355 1.52 itojun * ipseclen is subtracted from both sides, this may not be right. 356 1.52 itojun * I'm not quite sure about this (could someone comment). 357 1.52 itojun */ 358 1.209 riastrad *txsegsizep = uimin(tp->t_peermss - optlen, size); 359 1.209 riastrad *rxsegsizep = uimin(tp->t_ourmss - optlen, size); 360 1.203 maxv 361 1.85 thorpej /* 362 1.85 thorpej * Never send more than half a buffer full. This insures that we can 363 1.85 thorpej * always keep 2 packets on the wire, no matter what SO_SNDBUF is, and 364 1.86 mycroft * therefore acks will never be delayed unless we run out of data to 365 1.85 thorpej * transmit. 366 1.85 thorpej */ 367 1.203 maxv if (so) { 368 1.209 riastrad *txsegsizep = uimin(so->so_snd.sb_hiwat >> 1, *txsegsizep); 369 1.203 maxv } 370 1.21 kml 371 1.212 mlelstv /* 372 1.212 mlelstv * A segment must at least store header + options 373 1.212 mlelstv */ 374 1.212 mlelstv if (*txsegsizep < hdrlen + optlen) { 375 1.212 mlelstv return EMSGSIZE; 376 1.212 mlelstv } 377 1.212 mlelstv 378 1.21 kml if (*txsegsizep != tp->t_segsz) { 379 1.35 kml /* 380 1.82 itojun * If the new segment size is larger, we don't want to 381 1.35 kml * mess up the congestion window, but if it is smaller 382 1.35 kml * we'll have to reduce the congestion window to ensure 383 1.35 kml * that we don't get into trouble with initial windows 384 1.35 kml * and the rest. In any case, if the segment size 385 1.35 kml * has changed, chances are the path has, too, and 386 1.35 kml * our congestion window will be different. 387 1.29 kml */ 388 1.35 kml if (*txsegsizep < tp->t_segsz) { 389 1.209 riastrad tp->snd_cwnd = uimax((tp->snd_cwnd / tp->t_segsz) 390 1.203 maxv * *txsegsizep, *txsegsizep); 391 1.209 riastrad tp->snd_ssthresh = uimax((tp->snd_ssthresh / tp->t_segsz) 392 1.203 maxv * *txsegsizep, *txsegsizep); 393 1.35 kml } 394 1.21 kml tp->t_segsz = *txsegsizep; 395 1.21 kml } 396 1.107 itojun 397 1.201 maxv return 0; 398 1.17 thorpej } 399 1.17 thorpej 400 1.201 maxv static int 401 1.70 thorpej tcp_build_datapkt(struct tcpcb *tp, struct socket *so, int off, 402 1.70 thorpej long len, int hdrlen, struct mbuf **mp) 403 1.70 thorpej { 404 1.95 ragge struct mbuf *m, *m0; 405 1.220 riastrad net_stat_ref_t tcps; 406 1.70 thorpej 407 1.166 thorpej tcps = TCP_STAT_GETREF(); 408 1.70 thorpej if (tp->t_force && len == 1) 409 1.220 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDPROBE); 410 1.70 thorpej else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { 411 1.180 he tp->t_sndrexmitpack++; 412 1.220 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDREXMITPACK); 413 1.220 riastrad _NET_STATADD_REF(tcps, TCP_STAT_SNDREXMITBYTE, len); 414 1.70 thorpej } else { 415 1.220 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDPACK); 416 1.220 riastrad _NET_STATADD_REF(tcps, TCP_STAT_SNDBYTE, len); 417 1.70 thorpej } 418 1.166 thorpej TCP_STAT_PUTREF(); 419 1.202 maxv 420 1.70 thorpej MGETHDR(m, M_DONTWAIT, MT_HEADER); 421 1.79 thorpej if (__predict_false(m == NULL)) 422 1.201 maxv return ENOBUFS; 423 1.89 matt MCLAIM(m, &tcp_tx_mowner); 424 1.79 thorpej 425 1.79 thorpej /* 426 1.79 thorpej * XXX Because other code assumes headers will fit in 427 1.79 thorpej * XXX one header mbuf. 428 1.79 thorpej * 429 1.79 thorpej * (This code should almost *never* be run.) 430 1.79 thorpej */ 431 1.79 thorpej if (__predict_false((max_linkhdr + hdrlen) > MHLEN)) { 432 1.79 thorpej TCP_OUTPUT_COUNTER_INCR(&tcp_output_bigheader); 433 1.70 thorpej MCLGET(m, M_DONTWAIT); 434 1.70 thorpej if ((m->m_flags & M_EXT) == 0) { 435 1.70 thorpej m_freem(m); 436 1.201 maxv return ENOBUFS; 437 1.70 thorpej } 438 1.70 thorpej } 439 1.79 thorpej 440 1.70 thorpej m->m_data += max_linkhdr; 441 1.70 thorpej m->m_len = hdrlen; 442 1.95 ragge 443 1.95 ragge /* 444 1.95 ragge * To avoid traversing the whole sb_mb chain for correct 445 1.102 thorpej * data to send, remember last sent mbuf, its offset and 446 1.102 thorpej * the sent size. When called the next time, see if the 447 1.102 thorpej * data to send is directly following the previous transfer. 448 1.102 thorpej * This is important for large TCP windows. 449 1.95 ragge */ 450 1.106 ragge if (off == 0 || tp->t_lastm == NULL || 451 1.106 ragge (tp->t_lastoff + tp->t_lastlen) != off) { 452 1.103 thorpej TCP_OUTPUT_COUNTER_INCR(&tcp_output_predict_miss); 453 1.95 ragge /* 454 1.95 ragge * Either a new packet or a retransmit. 455 1.95 ragge * Start from the beginning. 456 1.95 ragge */ 457 1.95 ragge tp->t_lastm = so->so_snd.sb_mb; 458 1.95 ragge tp->t_inoff = off; 459 1.102 thorpej } else { 460 1.103 thorpej TCP_OUTPUT_COUNTER_INCR(&tcp_output_predict_hit); 461 1.95 ragge tp->t_inoff += tp->t_lastlen; 462 1.102 thorpej } 463 1.95 ragge 464 1.95 ragge /* Traverse forward to next packet */ 465 1.95 ragge while (tp->t_inoff > 0) { 466 1.95 ragge if (tp->t_lastm == NULL) 467 1.95 ragge panic("tp->t_lastm == NULL"); 468 1.95 ragge if (tp->t_inoff < tp->t_lastm->m_len) 469 1.95 ragge break; 470 1.95 ragge tp->t_inoff -= tp->t_lastm->m_len; 471 1.95 ragge tp->t_lastm = tp->t_lastm->m_next; 472 1.95 ragge } 473 1.95 ragge 474 1.95 ragge tp->t_lastoff = off; 475 1.95 ragge tp->t_lastlen = len; 476 1.95 ragge m0 = tp->t_lastm; 477 1.95 ragge off = tp->t_inoff; 478 1.95 ragge 479 1.70 thorpej if (len <= M_TRAILINGSPACE(m)) { 480 1.201 maxv m_copydata(m0, off, (int)len, mtod(m, char *) + hdrlen); 481 1.70 thorpej m->m_len += len; 482 1.79 thorpej TCP_OUTPUT_COUNTER_INCR(&tcp_output_copysmall); 483 1.70 thorpej } else { 484 1.201 maxv m->m_next = m_copym(m0, off, (int)len, M_DONTWAIT); 485 1.70 thorpej if (m->m_next == NULL) { 486 1.70 thorpej m_freem(m); 487 1.201 maxv return ENOBUFS; 488 1.70 thorpej } 489 1.79 thorpej #ifdef TCP_OUTPUT_COUNTERS 490 1.79 thorpej if (m->m_next->m_flags & M_EXT) 491 1.79 thorpej TCP_OUTPUT_COUNTER_INCR(&tcp_output_refbig); 492 1.79 thorpej else 493 1.79 thorpej TCP_OUTPUT_COUNTER_INCR(&tcp_output_copybig); 494 1.201 maxv #endif 495 1.70 thorpej } 496 1.70 thorpej 497 1.70 thorpej *mp = m; 498 1.201 maxv return 0; 499 1.70 thorpej } 500 1.70 thorpej 501 1.1 cgd /* 502 1.1 cgd * Tcp output routine: figure out what should be sent and send it. 503 1.1 cgd */ 504 1.6 mycroft int 505 1.116 perry tcp_output(struct tcpcb *tp) 506 1.1 cgd { 507 1.187 ozaki struct rtentry *rt = NULL; 508 1.48 itojun struct socket *so; 509 1.48 itojun struct route *ro; 510 1.47 thorpej long len, win; 511 1.1 cgd int off, flags, error; 512 1.56 augustss struct mbuf *m; 513 1.48 itojun struct ip *ip; 514 1.48 itojun #ifdef INET6 515 1.48 itojun struct ip6_hdr *ip6; 516 1.48 itojun #endif 517 1.56 augustss struct tcphdr *th; 518 1.191 christos u_char opt[MAX_TCPOPTLEN], *optp; 519 1.191 christos #define OPT_FITS(more) ((optlen + (more)) <= sizeof(opt)) 520 1.137 christos unsigned optlen, hdrlen, packetlen; 521 1.128 yamt unsigned int sack_numblks; 522 1.18 thorpej int idle, sendalot, txsegsize, rxsegsize; 523 1.127 yamt int txsegsize_nosack; 524 1.44 matt int maxburst = TCP_MAXBURST; 525 1.48 itojun int af; /* address family on the wire */ 526 1.48 itojun int iphdrlen; 527 1.151 yamt int has_tso4, has_tso6; 528 1.127 yamt int has_tso, use_tso; 529 1.155 thorpej bool alwaysfrag; 530 1.118 jonathan int sack_rxmit; 531 1.118 jonathan int sack_bytes_rxmt; 532 1.170 matt int ecn_tos; 533 1.118 jonathan struct sackhole *p; 534 1.110 jonathan #ifdef TCP_SIGNATURE 535 1.110 jonathan int sigoff = 0; 536 1.110 jonathan #endif 537 1.220 riastrad net_stat_ref_t tcps; 538 1.48 itojun 539 1.215 ozaki so = tp->t_inpcb->inp_socket; 540 1.215 ozaki ro = &tp->t_inpcb->inp_route; 541 1.48 itojun 542 1.48 itojun switch (af = tp->t_family) { 543 1.48 itojun case AF_INET: 544 1.215 ozaki case AF_INET6: 545 1.48 itojun if (tp->t_inpcb) 546 1.48 itojun break; 547 1.201 maxv return EINVAL; 548 1.48 itojun default: 549 1.201 maxv return EAFNOSUPPORT; 550 1.48 itojun } 551 1.17 thorpej 552 1.151 yamt if (tcp_segsize(tp, &txsegsize, &rxsegsize, &alwaysfrag)) 553 1.201 maxv return EMSGSIZE; 554 1.1 cgd 555 1.1 cgd idle = (tp->snd_max == tp->snd_una); 556 1.33 thorpej 557 1.41 thorpej /* 558 1.120 matt * Determine if we can use TCP segmentation offload: 559 1.120 matt * - If we're using IPv4 560 1.120 matt * - If there is not an IPsec policy that prevents it 561 1.120 matt * - If the interface can do it 562 1.120 matt */ 563 1.156 thorpej has_tso4 = has_tso6 = false; 564 1.200 maxv 565 1.215 ozaki has_tso4 = tp->t_inpcb->inp_af == AF_INET && 566 1.175 christos #if defined(IPSEC) 567 1.196 ozaki (!ipsec_used || ipsec_pcb_skip_ipsec(tp->t_inpcb->inp_sp, 568 1.177 hikaru IPSEC_DIR_OUTBOUND)) && 569 1.120 matt #endif 570 1.176 christos (rt = rtcache_validate(&tp->t_inpcb->inp_route)) != NULL && 571 1.176 christos (rt->rt_ifp->if_capenable & IFCAP_TSOv4) != 0; 572 1.187 ozaki if (rt != NULL) { 573 1.187 ozaki rtcache_unref(rt, &tp->t_inpcb->inp_route); 574 1.187 ozaki rt = NULL; 575 1.187 ozaki } 576 1.200 maxv 577 1.151 yamt #if defined(INET6) 578 1.215 ozaki has_tso6 = tp->t_inpcb->inp_af == AF_INET6 && 579 1.175 christos #if defined(IPSEC) 580 1.215 ozaki (!ipsec_used || ipsec_pcb_skip_ipsec(tp->t_inpcb->inp_sp, 581 1.177 hikaru IPSEC_DIR_OUTBOUND)) && 582 1.151 yamt #endif 583 1.215 ozaki (rt = rtcache_validate(&tp->t_inpcb->inp_route)) != NULL && 584 1.176 christos (rt->rt_ifp->if_capenable & IFCAP_TSOv6) != 0; 585 1.187 ozaki if (rt != NULL) 586 1.215 ozaki rtcache_unref(rt, &tp->t_inpcb->inp_route); 587 1.151 yamt #endif /* defined(INET6) */ 588 1.151 yamt has_tso = (has_tso4 || has_tso6) && !alwaysfrag; 589 1.120 matt 590 1.120 matt /* 591 1.41 thorpej * Restart Window computation. From draft-floyd-incr-init-win-03: 592 1.41 thorpej * 593 1.41 thorpej * Optionally, a TCP MAY set the restart window to the 594 1.41 thorpej * minimum of the value used for the initial window and 595 1.41 thorpej * the current value of cwnd (in other words, using a 596 1.41 thorpej * larger value for the restart window should never increase 597 1.41 thorpej * the size of cwnd). 598 1.41 thorpej */ 599 1.33 thorpej if (tcp_cwm) { 600 1.1 cgd /* 601 1.33 thorpej * Hughes/Touch/Heidemann Congestion Window Monitoring. 602 1.33 thorpej * Count the number of packets currently pending 603 1.33 thorpej * acknowledgement, and limit our congestion window 604 1.37 thorpej * to a pre-determined allowed burst size plus that count. 605 1.33 thorpej * This prevents bursting once all pending packets have 606 1.33 thorpej * been acknowledged (i.e. transmission is idle). 607 1.42 thorpej * 608 1.42 thorpej * XXX Link this to Initial Window? 609 1.1 cgd */ 610 1.209 riastrad tp->snd_cwnd = uimin(tp->snd_cwnd, 611 1.36 thorpej (tcp_cwm_burstsize * txsegsize) + 612 1.33 thorpej (tp->snd_nxt - tp->snd_una)); 613 1.33 thorpej } else { 614 1.73 thorpej if (idle && (tcp_now - tp->t_rcvtime) >= tp->t_rxtcur) { 615 1.33 thorpej /* 616 1.33 thorpej * We have been idle for "a while" and no acks are 617 1.33 thorpej * expected to clock out any data we send -- 618 1.33 thorpej * slow start to get ack "clock" running again. 619 1.33 thorpej */ 620 1.90 thorpej int ss = tcp_init_win; 621 1.215 ozaki if (tp->t_inpcb->inp_af == AF_INET && 622 1.216 ozaki in_localaddr(in4p_faddr(tp->t_inpcb))) 623 1.90 thorpej ss = tcp_init_win_local; 624 1.90 thorpej #ifdef INET6 625 1.215 ozaki else if (tp->t_inpcb->inp_af == AF_INET6 && 626 1.216 ozaki in6_localaddr(&in6p_faddr(tp->t_inpcb))) 627 1.90 thorpej ss = tcp_init_win_local; 628 1.90 thorpej #endif 629 1.209 riastrad tp->snd_cwnd = uimin(tp->snd_cwnd, 630 1.90 thorpej TCP_INITIAL_WINDOW(ss, txsegsize)); 631 1.33 thorpej } 632 1.33 thorpej } 633 1.33 thorpej 634 1.127 yamt txsegsize_nosack = txsegsize; 635 1.1 cgd again: 636 1.170 matt ecn_tos = 0; 637 1.128 yamt use_tso = has_tso; 638 1.148 yamt if ((tp->t_flags & (TF_ECN_SND_CWR|TF_ECN_SND_ECE)) != 0) { 639 1.148 yamt /* don't duplicate CWR/ECE. */ 640 1.148 yamt use_tso = 0; 641 1.148 yamt } 642 1.129 yamt TCP_REASS_LOCK(tp); 643 1.128 yamt sack_numblks = tcp_sack_numblks(tp); 644 1.128 yamt if (sack_numblks) { 645 1.147 yamt int sackoptlen; 646 1.147 yamt 647 1.147 yamt sackoptlen = TCP_SACK_OPTLEN(sack_numblks); 648 1.147 yamt if (sackoptlen > txsegsize_nosack) { 649 1.147 yamt sack_numblks = 0; /* give up SACK */ 650 1.147 yamt txsegsize = txsegsize_nosack; 651 1.147 yamt } else { 652 1.147 yamt if ((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) { 653 1.147 yamt /* don't duplicate D-SACK. */ 654 1.147 yamt use_tso = 0; 655 1.147 yamt } 656 1.147 yamt txsegsize = txsegsize_nosack - sackoptlen; 657 1.128 yamt } 658 1.127 yamt } else { 659 1.128 yamt txsegsize = txsegsize_nosack; 660 1.127 yamt } 661 1.127 yamt 662 1.33 thorpej /* 663 1.33 thorpej * Determine length of data that should be transmitted, and 664 1.33 thorpej * flags that should be used. If there is some data or critical 665 1.33 thorpej * controls (SYN, RST) to send, then transmit; otherwise, 666 1.33 thorpej * investigate further. 667 1.118 jonathan * 668 1.118 jonathan * Readjust SACK information to avoid resending duplicate data. 669 1.33 thorpej */ 670 1.118 jonathan if (TCP_SACK_ENABLED(tp) && SEQ_LT(tp->snd_nxt, tp->snd_max)) 671 1.118 jonathan tcp_sack_adjust(tp); 672 1.1 cgd sendalot = 0; 673 1.1 cgd off = tp->snd_nxt - tp->snd_una; 674 1.209 riastrad win = uimin(tp->snd_wnd, tp->snd_cwnd); 675 1.1 cgd 676 1.9 mycroft flags = tcp_outflags[tp->t_state]; 677 1.118 jonathan 678 1.118 jonathan /* 679 1.118 jonathan * Send any SACK-generated retransmissions. If we're explicitly trying 680 1.118 jonathan * to send out new data (when sendalot is 1), bypass this function. 681 1.118 jonathan * If we retransmit in fast recovery mode, decrement snd_cwnd, since 682 1.118 jonathan * we're replacing a (future) new transmission with a retransmission 683 1.118 jonathan * now, and we previously incremented snd_cwnd in tcp_input(). 684 1.118 jonathan */ 685 1.118 jonathan /* 686 1.201 maxv * Still in sack recovery, reset rxmit flag to zero. 687 1.118 jonathan */ 688 1.118 jonathan sack_rxmit = 0; 689 1.118 jonathan sack_bytes_rxmt = 0; 690 1.118 jonathan len = 0; 691 1.118 jonathan p = NULL; 692 1.121 matt do { 693 1.118 jonathan long cwin; 694 1.121 matt if (!TCP_SACK_ENABLED(tp)) 695 1.121 matt break; 696 1.201 maxv if (tp->t_partialacks < 0) 697 1.121 matt break; 698 1.121 matt p = tcp_sack_output(tp, &sack_bytes_rxmt); 699 1.121 matt if (p == NULL) 700 1.121 matt break; 701 1.201 maxv 702 1.209 riastrad cwin = uimin(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt; 703 1.118 jonathan if (cwin < 0) 704 1.118 jonathan cwin = 0; 705 1.118 jonathan /* Do not retransmit SACK segments beyond snd_recover */ 706 1.118 jonathan if (SEQ_GT(p->end, tp->snd_recover)) { 707 1.118 jonathan /* 708 1.118 jonathan * (At least) part of sack hole extends beyond 709 1.118 jonathan * snd_recover. Check to see if we can rexmit data 710 1.118 jonathan * for this hole. 711 1.118 jonathan */ 712 1.118 jonathan if (SEQ_GEQ(p->rxmit, tp->snd_recover)) { 713 1.118 jonathan /* 714 1.118 jonathan * Can't rexmit any more data for this hole. 715 1.118 jonathan * That data will be rexmitted in the next 716 1.118 jonathan * sack recovery episode, when snd_recover 717 1.118 jonathan * moves past p->rxmit. 718 1.118 jonathan */ 719 1.118 jonathan p = NULL; 720 1.121 matt break; 721 1.121 matt } 722 1.121 matt /* Can rexmit part of the current hole */ 723 1.121 matt len = ((long)ulmin(cwin, tp->snd_recover - p->rxmit)); 724 1.118 jonathan } else 725 1.118 jonathan len = ((long)ulmin(cwin, p->end - p->rxmit)); 726 1.118 jonathan off = p->rxmit - tp->snd_una; 727 1.133 yamt if (off + len > so->so_snd.sb_cc) { 728 1.133 yamt /* 1 for TH_FIN */ 729 1.133 yamt KASSERT(off + len == so->so_snd.sb_cc + 1); 730 1.133 yamt KASSERT(p->rxmit + len == tp->snd_max); 731 1.133 yamt len = so->so_snd.sb_cc - off; 732 1.133 yamt } 733 1.118 jonathan if (len > 0) { 734 1.118 jonathan sack_rxmit = 1; 735 1.118 jonathan sendalot = 1; 736 1.118 jonathan } 737 1.123 thorpej } while (/*CONSTCOND*/0); 738 1.118 jonathan 739 1.1 cgd /* 740 1.1 cgd * If in persist timeout with window of 0, send 1 byte. 741 1.1 cgd * Otherwise, if window is small but nonzero 742 1.1 cgd * and timer expired, we will send what we can 743 1.1 cgd * and go to transmit state. 744 1.1 cgd */ 745 1.1 cgd if (tp->t_force) { 746 1.9 mycroft if (win == 0) { 747 1.9 mycroft /* 748 1.9 mycroft * If we still have some data to send, then 749 1.9 mycroft * clear the FIN bit. Usually this would 750 1.9 mycroft * happen below when it realizes that we 751 1.9 mycroft * aren't sending all the data. However, 752 1.9 mycroft * if we have exactly 1 byte of unset data, 753 1.9 mycroft * then it won't clear the FIN bit below, 754 1.9 mycroft * and if we are in persist state, we wind 755 1.9 mycroft * up sending the packet without recording 756 1.9 mycroft * that we sent the FIN bit. 757 1.9 mycroft * 758 1.9 mycroft * We can't just blindly clear the FIN bit, 759 1.9 mycroft * because if we don't have any more data 760 1.9 mycroft * to send then the probe will be the FIN 761 1.9 mycroft * itself. 762 1.9 mycroft */ 763 1.9 mycroft if (off < so->so_snd.sb_cc) 764 1.9 mycroft flags &= ~TH_FIN; 765 1.1 cgd win = 1; 766 1.9 mycroft } else { 767 1.38 thorpej TCP_TIMER_DISARM(tp, TCPT_PERSIST); 768 1.1 cgd tp->t_rxtshift = 0; 769 1.1 cgd } 770 1.1 cgd } 771 1.1 cgd 772 1.146 yamt if (sack_rxmit == 0) { 773 1.150 yamt if (TCP_SACK_ENABLED(tp) && tp->t_partialacks >= 0) { 774 1.118 jonathan long cwin; 775 1.118 jonathan 776 1.118 jonathan /* 777 1.118 jonathan * We are inside of a SACK recovery episode and are 778 1.118 jonathan * sending new data, having retransmitted all the 779 1.118 jonathan * data possible in the scoreboard. 780 1.118 jonathan */ 781 1.132 yamt if (tp->snd_wnd < so->so_snd.sb_cc) { 782 1.132 yamt len = tp->snd_wnd - off; 783 1.132 yamt flags &= ~TH_FIN; 784 1.132 yamt } else { 785 1.132 yamt len = so->so_snd.sb_cc - off; 786 1.132 yamt } 787 1.132 yamt 788 1.118 jonathan /* 789 1.118 jonathan * From FreeBSD: 790 1.118 jonathan * Don't remove this (len > 0) check ! 791 1.201 maxv * We explicitly check for len > 0 here (although it 792 1.201 maxv * isn't really necessary), to work around a gcc 793 1.118 jonathan * optimization issue - to force gcc to compute 794 1.118 jonathan * len above. Without this check, the computation 795 1.118 jonathan * of len is bungled by the optimizer. 796 1.118 jonathan */ 797 1.118 jonathan if (len > 0) { 798 1.201 maxv cwin = tp->snd_cwnd - 799 1.146 yamt (tp->snd_nxt - tp->sack_newdata) - 800 1.146 yamt sack_bytes_rxmt; 801 1.118 jonathan if (cwin < 0) 802 1.118 jonathan cwin = 0; 803 1.132 yamt if (cwin < len) { 804 1.132 yamt len = cwin; 805 1.132 yamt flags &= ~TH_FIN; 806 1.132 yamt } 807 1.118 jonathan } 808 1.118 jonathan } else if (win < so->so_snd.sb_cc) { 809 1.118 jonathan len = win - off; 810 1.118 jonathan flags &= ~TH_FIN; 811 1.146 yamt } else { 812 1.118 jonathan len = so->so_snd.sb_cc - off; 813 1.146 yamt } 814 1.118 jonathan } 815 1.1 cgd 816 1.1 cgd if (len < 0) { 817 1.1 cgd /* 818 1.1 cgd * If FIN has been sent but not acked, 819 1.1 cgd * but we haven't been called to retransmit, 820 1.1 cgd * len will be -1. Otherwise, window shrank 821 1.1 cgd * after we sent into it. If window shrank to 0, 822 1.25 thorpej * cancel pending retransmit, pull snd_nxt back 823 1.25 thorpej * to (closed) window, and set the persist timer 824 1.25 thorpej * if it isn't already going. If the window didn't 825 1.25 thorpej * close completely, just wait for an ACK. 826 1.43 mycroft * 827 1.43 mycroft * If we have a pending FIN, either it has already been 828 1.43 mycroft * transmitted or it is outside the window, so drop it. 829 1.43 mycroft * If the FIN has been transmitted, but this is not a 830 1.43 mycroft * retransmission, then len must be -1. Therefore we also 831 1.43 mycroft * prevent here the sending of `gratuitous FINs'. This 832 1.43 mycroft * eliminates the need to check for that case below (e.g. 833 1.43 mycroft * to back up snd_nxt before the FIN so that the sequence 834 1.43 mycroft * number is correct). 835 1.1 cgd */ 836 1.1 cgd len = 0; 837 1.43 mycroft flags &= ~TH_FIN; 838 1.1 cgd if (win == 0) { 839 1.38 thorpej TCP_TIMER_DISARM(tp, TCPT_REXMT); 840 1.25 thorpej tp->t_rxtshift = 0; 841 1.1 cgd tp->snd_nxt = tp->snd_una; 842 1.38 thorpej if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) 843 1.25 thorpej tcp_setpersist(tp); 844 1.1 cgd } 845 1.1 cgd } 846 1.160 rmind 847 1.160 rmind /* 848 1.160 rmind * Automatic sizing enables the performance of large buffers 849 1.160 rmind * and most of the efficiency of small ones by only allocating 850 1.160 rmind * space when it is needed. 851 1.160 rmind * 852 1.160 rmind * The criteria to step up the send buffer one notch are: 853 1.160 rmind * 1. receive window of remote host is larger than send buffer 854 1.160 rmind * (with a fudge factor of 5/4th); 855 1.160 rmind * 2. send buffer is filled to 7/8th with data (so we actually 856 1.160 rmind * have data to make use of it); 857 1.160 rmind * 3. send buffer fill has not hit maximal automatic size; 858 1.160 rmind * 4. our send window (slow start and cogestion controlled) is 859 1.160 rmind * larger than sent but unacknowledged data in send buffer. 860 1.160 rmind * 861 1.160 rmind * The remote host receive window scaling factor may limit the 862 1.160 rmind * growing of the send buffer before it reaches its allowed 863 1.160 rmind * maximum. 864 1.160 rmind * 865 1.160 rmind * It scales directly with slow start or congestion window 866 1.160 rmind * and does at most one step per received ACK. This fast 867 1.160 rmind * scaling has the drawback of growing the send buffer beyond 868 1.160 rmind * what is strictly necessary to make full use of a given 869 1.214 andvar * delay*bandwidth product. However testing has shown this not 870 1.222 rillig * to be much of a problem. At worst we are trading wasting 871 1.214 andvar * of available bandwidth (the non-use of it) for wasting some 872 1.160 rmind * socket buffer memory. 873 1.160 rmind * 874 1.160 rmind * TODO: Shrink send buffer during idle periods together 875 1.160 rmind * with congestion window. Requires another timer. 876 1.160 rmind */ 877 1.160 rmind if (tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) { 878 1.160 rmind if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat && 879 1.160 rmind so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) && 880 1.160 rmind so->so_snd.sb_cc < tcp_autosndbuf_max && 881 1.160 rmind win >= (so->so_snd.sb_cc - (tp->snd_nxt - tp->snd_una))) { 882 1.160 rmind if (!sbreserve(&so->so_snd, 883 1.209 riastrad uimin(so->so_snd.sb_hiwat + tcp_autosndbuf_inc, 884 1.160 rmind tcp_autosndbuf_max), so)) 885 1.160 rmind so->so_snd.sb_flags &= ~SB_AUTOSIZE; 886 1.160 rmind } 887 1.160 rmind } 888 1.160 rmind 889 1.18 thorpej if (len > txsegsize) { 890 1.120 matt if (use_tso) { 891 1.120 matt /* 892 1.120 matt * Truncate TSO transfers to IP_MAXPACKET, and make 893 1.120 matt * sure that we send equal size transfers down the 894 1.120 matt * stack (rather than big-small-big-small-...). 895 1.120 matt */ 896 1.152 martin #ifdef INET6 897 1.171 yamt CTASSERT(IPV6_MAXPACKET == IP_MAXPACKET); 898 1.152 martin #endif 899 1.209 riastrad len = (uimin(len, IP_MAXPACKET) / txsegsize) * txsegsize; 900 1.126 yamt if (len <= txsegsize) { 901 1.126 yamt use_tso = 0; 902 1.126 yamt } 903 1.120 matt } else 904 1.120 matt len = txsegsize; 905 1.11 mycroft flags &= ~TH_FIN; 906 1.1 cgd sendalot = 1; 907 1.120 matt } else 908 1.120 matt use_tso = 0; 909 1.118 jonathan if (sack_rxmit) { 910 1.118 jonathan if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) 911 1.118 jonathan flags &= ~TH_FIN; 912 1.118 jonathan } 913 1.1 cgd 914 1.1 cgd win = sbspace(&so->so_rcv); 915 1.1 cgd 916 1.1 cgd /* 917 1.1 cgd * Sender silly window avoidance. If connection is idle 918 1.1 cgd * and can send all data, a maximum segment, 919 1.1 cgd * at least a maximum default-size segment do it, 920 1.1 cgd * or are forced, do it; otherwise don't bother. 921 1.1 cgd * If peer's buffer is tiny, then send 922 1.1 cgd * when window is at least half open. 923 1.1 cgd * If retransmitting (possibly after persist timer forced us 924 1.1 cgd * to send into a small window), then must resend. 925 1.1 cgd */ 926 1.1 cgd if (len) { 927 1.120 matt if (len >= txsegsize) 928 1.1 cgd goto send; 929 1.46 thorpej if ((so->so_state & SS_MORETOCOME) == 0 && 930 1.46 thorpej ((idle || tp->t_flags & TF_NODELAY) && 931 1.46 thorpej len + off >= so->so_snd.sb_cc)) 932 1.1 cgd goto send; 933 1.1 cgd if (tp->t_force) 934 1.1 cgd goto send; 935 1.1 cgd if (len >= tp->max_sndwnd / 2) 936 1.1 cgd goto send; 937 1.1 cgd if (SEQ_LT(tp->snd_nxt, tp->snd_max)) 938 1.1 cgd goto send; 939 1.118 jonathan if (sack_rxmit) 940 1.118 jonathan goto send; 941 1.1 cgd } 942 1.1 cgd 943 1.1 cgd /* 944 1.18 thorpej * Compare available window to amount of window known to peer 945 1.18 thorpej * (as advertised window less next expected input). If the 946 1.18 thorpej * difference is at least twice the size of the largest segment 947 1.18 thorpej * we expect to receive (i.e. two segments) or at least 50% of 948 1.18 thorpej * the maximum possible window, then want to send a window update 949 1.18 thorpej * to peer. 950 1.1 cgd */ 951 1.1 cgd if (win > 0) { 952 1.82 itojun /* 953 1.9 mycroft * "adv" is the amount we can increase the window, 954 1.9 mycroft * taking into account that we are limited by 955 1.9 mycroft * TCP_MAXWIN << tp->rcv_scale. 956 1.9 mycroft */ 957 1.209 riastrad long recwin = uimin(win, (long)TCP_MAXWIN << tp->rcv_scale); 958 1.207 uwe long oldwin, adv; 959 1.1 cgd 960 1.182 christos /* 961 1.207 uwe * rcv_nxt may overtake rcv_adv when we accept a 962 1.207 uwe * zero-window probe. 963 1.182 christos */ 964 1.207 uwe if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) 965 1.207 uwe oldwin = tp->rcv_adv - tp->rcv_nxt; 966 1.207 uwe else 967 1.207 uwe oldwin = 0; 968 1.207 uwe 969 1.207 uwe /* 970 1.207 uwe * If the new window size ends up being the same as or 971 1.207 uwe * less than the old size when it is scaled, then 972 1.207 uwe * don't force a window update. 973 1.207 uwe */ 974 1.207 uwe if (recwin >> tp->rcv_scale <= oldwin >> tp->rcv_scale) 975 1.182 christos goto dontupdate; 976 1.207 uwe 977 1.207 uwe adv = recwin - oldwin; 978 1.18 thorpej if (adv >= (long) (2 * rxsegsize)) 979 1.1 cgd goto send; 980 1.1 cgd if (2 * adv >= (long) so->so_rcv.sb_hiwat) 981 1.1 cgd goto send; 982 1.1 cgd } 983 1.182 christos dontupdate: 984 1.1 cgd 985 1.1 cgd /* 986 1.1 cgd * Send if we owe peer an ACK. 987 1.1 cgd */ 988 1.1 cgd if (tp->t_flags & TF_ACKNOW) 989 1.1 cgd goto send; 990 1.43 mycroft if (flags & (TH_SYN|TH_FIN|TH_RST)) 991 1.1 cgd goto send; 992 1.1 cgd if (SEQ_GT(tp->snd_up, tp->snd_una)) 993 1.1 cgd goto send; 994 1.118 jonathan /* 995 1.118 jonathan * In SACK, it is possible for tcp_output to fail to send a segment 996 1.118 jonathan * after the retransmission timer has been turned off. Make sure 997 1.118 jonathan * that the retransmission timer is set. 998 1.118 jonathan */ 999 1.118 jonathan if (TCP_SACK_ENABLED(tp) && SEQ_GT(tp->snd_max, tp->snd_una) && 1000 1.118 jonathan !TCP_TIMER_ISARMED(tp, TCPT_REXMT) && 1001 1.118 jonathan !TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) { 1002 1.118 jonathan TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); 1003 1.118 jonathan goto just_return; 1004 1.118 jonathan } 1005 1.1 cgd 1006 1.1 cgd /* 1007 1.1 cgd * TCP window updates are not reliable, rather a polling protocol 1008 1.1 cgd * using ``persist'' packets is used to insure receipt of window 1009 1.1 cgd * updates. The three ``states'' for the output side are: 1010 1.1 cgd * idle not doing retransmits or persists 1011 1.1 cgd * persisting to move a small or zero window 1012 1.1 cgd * (re)transmitting and thereby not persisting 1013 1.1 cgd * 1014 1.1 cgd * tp->t_timer[TCPT_PERSIST] 1015 1.1 cgd * is set when we are in persist state. 1016 1.1 cgd * tp->t_force 1017 1.1 cgd * is set when we are called to send a persist packet. 1018 1.1 cgd * tp->t_timer[TCPT_REXMT] 1019 1.1 cgd * is set when we are retransmitting 1020 1.1 cgd * The output side is idle when both timers are zero. 1021 1.1 cgd * 1022 1.1 cgd * If send window is too small, there is data to transmit, and no 1023 1.1 cgd * retransmit or persist is pending, then go to persist state. 1024 1.1 cgd * If nothing happens soon, send when timer expires: 1025 1.1 cgd * if window is nonzero, transmit what we can, 1026 1.1 cgd * otherwise force out a byte. 1027 1.1 cgd */ 1028 1.38 thorpej if (so->so_snd.sb_cc && TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0 && 1029 1.38 thorpej TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) { 1030 1.1 cgd tp->t_rxtshift = 0; 1031 1.1 cgd tcp_setpersist(tp); 1032 1.1 cgd } 1033 1.1 cgd 1034 1.1 cgd /* 1035 1.1 cgd * No reason to send a segment, just return. 1036 1.1 cgd */ 1037 1.118 jonathan just_return: 1038 1.129 yamt TCP_REASS_UNLOCK(tp); 1039 1.201 maxv return 0; 1040 1.1 cgd 1041 1.1 cgd send: 1042 1.1 cgd /* 1043 1.208 maxv * Before ESTABLISHED, force sending of initial options unless TCP set 1044 1.208 maxv * not to do any options. 1045 1.208 maxv * 1046 1.208 maxv * Note: we assume that the IP/TCP header plus TCP options always fit 1047 1.208 maxv * in a single mbuf, leaving room for a maximum link header, i.e.: 1048 1.208 maxv * max_linkhdr + IP_header + TCP_header + optlen <= MCLBYTES 1049 1.1 cgd */ 1050 1.1 cgd optlen = 0; 1051 1.188 christos optp = opt; 1052 1.48 itojun switch (af) { 1053 1.48 itojun case AF_INET: 1054 1.48 itojun iphdrlen = sizeof(struct ip) + sizeof(struct tcphdr); 1055 1.48 itojun break; 1056 1.48 itojun #ifdef INET6 1057 1.48 itojun case AF_INET6: 1058 1.48 itojun iphdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 1059 1.48 itojun break; 1060 1.48 itojun #endif 1061 1.49 itojun default: /*pacify gcc*/ 1062 1.49 itojun iphdrlen = 0; 1063 1.49 itojun break; 1064 1.48 itojun } 1065 1.48 itojun hdrlen = iphdrlen; 1066 1.9 mycroft if (flags & TH_SYN) { 1067 1.163 dyoung struct rtentry *synrt; 1068 1.48 itojun 1069 1.217 ozaki synrt = inpcb_rtentry(tp->t_inpcb); 1070 1.9 mycroft tp->snd_nxt = tp->iss; 1071 1.163 dyoung tp->t_ourmss = tcp_mss_to_advertise(synrt != NULL ? 1072 1.163 dyoung synrt->rt_ifp : NULL, af); 1073 1.217 ozaki inpcb_rtentry_unref(synrt, tp->t_inpcb); 1074 1.192 christos if ((tp->t_flags & TF_NOOPT) == 0 && OPT_FITS(TCPOLEN_MAXSEG)) { 1075 1.188 christos *optp++ = TCPOPT_MAXSEG; 1076 1.188 christos *optp++ = TCPOLEN_MAXSEG; 1077 1.188 christos *optp++ = (tp->t_ourmss >> 8) & 0xff; 1078 1.188 christos *optp++ = tp->t_ourmss & 0xff; 1079 1.188 christos optlen += TCPOLEN_MAXSEG; 1080 1.82 itojun 1081 1.9 mycroft if ((tp->t_flags & TF_REQ_SCALE) && 1082 1.9 mycroft ((flags & TH_ACK) == 0 || 1083 1.178 christos (tp->t_flags & TF_RCVD_SCALE)) && 1084 1.192 christos OPT_FITS(TCPOLEN_WINDOW + TCPOLEN_NOP)) { 1085 1.188 christos *((uint32_t *)optp) = htonl( 1086 1.9 mycroft TCPOPT_NOP << 24 | 1087 1.9 mycroft TCPOPT_WINDOW << 16 | 1088 1.9 mycroft TCPOLEN_WINDOW << 8 | 1089 1.9 mycroft tp->request_r_scale); 1090 1.188 christos optp += TCPOLEN_WINDOW + TCPOLEN_NOP; 1091 1.188 christos optlen += TCPOLEN_WINDOW + TCPOLEN_NOP; 1092 1.9 mycroft } 1093 1.192 christos if (tcp_do_sack && OPT_FITS(TCPOLEN_SACK_PERMITTED)) { 1094 1.188 christos *optp++ = TCPOPT_SACK_PERMITTED; 1095 1.188 christos *optp++ = TCPOLEN_SACK_PERMITTED; 1096 1.188 christos optlen += TCPOLEN_SACK_PERMITTED; 1097 1.118 jonathan } 1098 1.9 mycroft } 1099 1.82 itojun } 1100 1.82 itojun 1101 1.82 itojun /* 1102 1.82 itojun * Send a timestamp and echo-reply if this is a SYN and our side 1103 1.9 mycroft * wants to use timestamps (TF_REQ_TSTMP is set) or both our side 1104 1.9 mycroft * and our peer have sent timestamps in our SYN's. 1105 1.82 itojun */ 1106 1.82 itojun if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && 1107 1.82 itojun (flags & TH_RST) == 0 && 1108 1.82 itojun ((flags & (TH_SYN|TH_ACK)) == TH_SYN || 1109 1.188 christos (tp->t_flags & TF_RCVD_TSTMP))) { 1110 1.188 christos int alen = 0; 1111 1.193 kre while (optlen % 4 != 2) { 1112 1.188 christos optlen += TCPOLEN_NOP; 1113 1.188 christos *optp++ = TCPOPT_NOP; 1114 1.188 christos alen++; 1115 1.188 christos } 1116 1.188 christos if (OPT_FITS(TCPOLEN_TIMESTAMP)) { 1117 1.188 christos *optp++ = TCPOPT_TIMESTAMP; 1118 1.188 christos *optp++ = TCPOLEN_TIMESTAMP; 1119 1.188 christos uint32_t *lp = (uint32_t *)optp; 1120 1.188 christos /* Form timestamp option (appendix A of RFC 1323) */ 1121 1.188 christos *lp++ = htonl(TCP_TIMESTAMP(tp)); 1122 1.188 christos *lp = htonl(tp->ts_recent); 1123 1.188 christos optp += TCPOLEN_TIMESTAMP - 2; 1124 1.188 christos optlen += TCPOLEN_TIMESTAMP; 1125 1.188 christos 1126 1.188 christos /* Set receive buffer autosizing timestamp. */ 1127 1.188 christos if (tp->rfbuf_ts == 0 && 1128 1.188 christos (so->so_rcv.sb_flags & SB_AUTOSIZE)) 1129 1.188 christos tp->rfbuf_ts = TCP_TIMESTAMP(tp); 1130 1.188 christos } else { 1131 1.188 christos optp -= alen; 1132 1.188 christos optlen -= alen; 1133 1.188 christos } 1134 1.188 christos } 1135 1.9 mycroft 1136 1.188 christos #ifdef TCP_SIGNATURE 1137 1.188 christos if (tp->t_flags & TF_SIGNATURE) { 1138 1.188 christos /* 1139 1.188 christos * Initialize TCP-MD5 option (RFC2385) 1140 1.188 christos */ 1141 1.189 christos if (!OPT_FITS(TCPOLEN_SIGNATURE)) 1142 1.189 christos goto reset; 1143 1.201 maxv 1144 1.189 christos *optp++ = TCPOPT_SIGNATURE; 1145 1.189 christos *optp++ = TCPOLEN_SIGNATURE; 1146 1.189 christos sigoff = optlen + 2; 1147 1.189 christos memset(optp, 0, TCP_SIGLEN); 1148 1.189 christos optlen += TCPOLEN_SIGNATURE; 1149 1.189 christos optp += TCP_SIGLEN; 1150 1.82 itojun } 1151 1.201 maxv #endif 1152 1.82 itojun 1153 1.118 jonathan /* 1154 1.118 jonathan * Tack on the SACK block if it is necessary. 1155 1.118 jonathan */ 1156 1.128 yamt if (sack_numblks) { 1157 1.188 christos int alen = 0; 1158 1.188 christos int sack_len = sack_numblks * 8; 1159 1.193 kre while (optlen % 4 != 2) { 1160 1.188 christos optlen += TCPOLEN_NOP; 1161 1.188 christos *optp++ = TCPOPT_NOP; 1162 1.188 christos alen++; 1163 1.188 christos } 1164 1.178 christos if (OPT_FITS(sack_len + 2)) { 1165 1.188 christos struct ipqent *tiqe; 1166 1.188 christos *optp++ = TCPOPT_SACK; 1167 1.188 christos *optp++ = sack_len + 2; 1168 1.188 christos uint32_t *lp = (uint32_t *)optp; 1169 1.178 christos if ((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) { 1170 1.178 christos sack_numblks--; 1171 1.178 christos *lp++ = htonl(tp->rcv_dsack_block.left); 1172 1.178 christos *lp++ = htonl(tp->rcv_dsack_block.right); 1173 1.178 christos tp->rcv_sack_flags &= ~TCPSACK_HAVED; 1174 1.178 christos } 1175 1.178 christos for (tiqe = TAILQ_FIRST(&tp->timeq); 1176 1.178 christos sack_numblks > 0; 1177 1.178 christos tiqe = TAILQ_NEXT(tiqe, ipqe_timeq)) { 1178 1.178 christos KASSERT(tiqe != NULL); 1179 1.178 christos sack_numblks--; 1180 1.178 christos *lp++ = htonl(tiqe->ipqe_seq); 1181 1.178 christos *lp++ = htonl(tiqe->ipqe_seq + tiqe->ipqe_len + 1182 1.178 christos ((tiqe->ipqe_flags & TH_FIN) != 0 ? 1 : 0)); 1183 1.178 christos } 1184 1.194 martin optlen += sack_len + 2; 1185 1.188 christos optp += sack_len; 1186 1.188 christos } else { 1187 1.188 christos optp -= alen; 1188 1.188 christos optlen -= alen; 1189 1.128 yamt } 1190 1.118 jonathan } 1191 1.118 jonathan 1192 1.188 christos /* Terminate and pad TCP options to a 4 byte boundary. */ 1193 1.188 christos if (optlen % 4) { 1194 1.192 christos if (!OPT_FITS(TCPOLEN_EOL)) { 1195 1.190 christos reset: TCP_REASS_UNLOCK(tp); 1196 1.189 christos error = ECONNABORTED; 1197 1.189 christos goto out; 1198 1.189 christos } 1199 1.188 christos optlen += TCPOLEN_EOL; 1200 1.188 christos *optp++ = TCPOPT_EOL; 1201 1.188 christos } 1202 1.188 christos /* 1203 1.188 christos * According to RFC 793 (STD0007): 1204 1.188 christos * "The content of the header beyond the End-of-Option option 1205 1.188 christos * must be header padding (i.e., zero)." 1206 1.188 christos * and later: "The padding is composed of zeros." 1207 1.188 christos */ 1208 1.188 christos while (optlen % 4) { 1209 1.192 christos if (!OPT_FITS(TCPOLEN_PAD)) 1210 1.188 christos goto reset; 1211 1.188 christos optlen += TCPOLEN_PAD; 1212 1.188 christos *optp++ = TCPOPT_PAD; 1213 1.183 kefren } 1214 1.188 christos 1215 1.188 christos TCP_REASS_UNLOCK(tp); 1216 1.110 jonathan 1217 1.82 itojun hdrlen += optlen; 1218 1.82 itojun 1219 1.1 cgd #ifdef DIAGNOSTIC 1220 1.120 matt if (!use_tso && len > txsegsize) 1221 1.29 kml panic("tcp data to be sent is larger than segment"); 1222 1.120 matt else if (use_tso && len > IP_MAXPACKET) 1223 1.120 matt panic("tcp data to be sent is larger than max TSO size"); 1224 1.82 itojun if (max_linkhdr + hdrlen > MCLBYTES) 1225 1.9 mycroft panic("tcphdr too big"); 1226 1.1 cgd #endif 1227 1.1 cgd 1228 1.1 cgd /* 1229 1.1 cgd * Grab a header mbuf, attaching a copy of data to 1230 1.1 cgd * be transmitted, and initialize the header from 1231 1.1 cgd * the template for sends on this connection. 1232 1.1 cgd */ 1233 1.1 cgd if (len) { 1234 1.70 thorpej error = tcp_build_datapkt(tp, so, off, len, hdrlen, &m); 1235 1.70 thorpej if (error) 1236 1.1 cgd goto out; 1237 1.1 cgd /* 1238 1.1 cgd * If we're sending everything we've got, set PUSH. 1239 1.1 cgd * (This will keep happy those implementations which only 1240 1.1 cgd * give data to the user when a buffer fills or 1241 1.1 cgd * a PUSH comes in.) 1242 1.1 cgd */ 1243 1.1 cgd if (off + len == so->so_snd.sb_cc) 1244 1.1 cgd flags |= TH_PUSH; 1245 1.1 cgd } else { 1246 1.166 thorpej tcps = TCP_STAT_GETREF(); 1247 1.1 cgd if (tp->t_flags & TF_ACKNOW) 1248 1.220 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDACKS); 1249 1.1 cgd else if (flags & (TH_SYN|TH_FIN|TH_RST)) 1250 1.220 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDCTRL); 1251 1.1 cgd else if (SEQ_GT(tp->snd_up, tp->snd_una)) 1252 1.220 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDURG); 1253 1.1 cgd else 1254 1.220 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDWINUP); 1255 1.166 thorpej TCP_STAT_PUTREF(); 1256 1.1 cgd 1257 1.1 cgd MGETHDR(m, M_DONTWAIT, MT_HEADER); 1258 1.54 itojun if (m != NULL && max_linkhdr + hdrlen > MHLEN) { 1259 1.48 itojun MCLGET(m, M_DONTWAIT); 1260 1.48 itojun if ((m->m_flags & M_EXT) == 0) { 1261 1.48 itojun m_freem(m); 1262 1.48 itojun m = NULL; 1263 1.48 itojun } 1264 1.48 itojun } 1265 1.1 cgd if (m == NULL) { 1266 1.1 cgd error = ENOBUFS; 1267 1.1 cgd goto out; 1268 1.1 cgd } 1269 1.89 matt MCLAIM(m, &tcp_tx_mowner); 1270 1.1 cgd m->m_data += max_linkhdr; 1271 1.1 cgd m->m_len = hdrlen; 1272 1.1 cgd } 1273 1.186 ozaki m_reset_rcvif(m); 1274 1.48 itojun switch (af) { 1275 1.48 itojun case AF_INET: 1276 1.48 itojun ip = mtod(m, struct ip *); 1277 1.48 itojun #ifdef INET6 1278 1.48 itojun ip6 = NULL; 1279 1.48 itojun #endif 1280 1.48 itojun th = (struct tcphdr *)(ip + 1); 1281 1.48 itojun break; 1282 1.48 itojun #ifdef INET6 1283 1.48 itojun case AF_INET6: 1284 1.48 itojun ip = NULL; 1285 1.48 itojun ip6 = mtod(m, struct ip6_hdr *); 1286 1.48 itojun th = (struct tcphdr *)(ip6 + 1); 1287 1.48 itojun break; 1288 1.48 itojun #endif 1289 1.49 itojun default: /*pacify gcc*/ 1290 1.49 itojun ip = NULL; 1291 1.50 fvdl #ifdef INET6 1292 1.49 itojun ip6 = NULL; 1293 1.50 fvdl #endif 1294 1.49 itojun th = NULL; 1295 1.49 itojun break; 1296 1.48 itojun } 1297 1.201 maxv if (tp->t_template == NULL) 1298 1.211 maxv panic("%s: no template", __func__); 1299 1.48 itojun if (tp->t_template->m_len < iphdrlen) 1300 1.211 maxv panic("%s: %d < %d", __func__, tp->t_template->m_len, iphdrlen); 1301 1.157 christos bcopy(mtod(tp->t_template, void *), mtod(m, void *), iphdrlen); 1302 1.1 cgd 1303 1.1 cgd /* 1304 1.143 rpaulo * If we are starting a connection, send ECN setup 1305 1.143 rpaulo * SYN packet. If we are on a retransmit, we may 1306 1.143 rpaulo * resend those bits a number of times as per 1307 1.143 rpaulo * RFC 3168. 1308 1.143 rpaulo */ 1309 1.143 rpaulo if (tp->t_state == TCPS_SYN_SENT && tcp_do_ecn) { 1310 1.143 rpaulo if (tp->t_flags & TF_SYN_REXMT) { 1311 1.143 rpaulo if (tp->t_ecn_retries--) 1312 1.143 rpaulo flags |= TH_ECE|TH_CWR; 1313 1.143 rpaulo } else { 1314 1.143 rpaulo flags |= TH_ECE|TH_CWR; 1315 1.143 rpaulo tp->t_ecn_retries = tcp_ecn_maxretries; 1316 1.143 rpaulo } 1317 1.143 rpaulo } 1318 1.143 rpaulo 1319 1.143 rpaulo if (TCP_ECN_ALLOWED(tp)) { 1320 1.143 rpaulo /* 1321 1.143 rpaulo * If the peer has ECN, mark data packets 1322 1.143 rpaulo * ECN capable. Ignore pure ack packets, retransmissions 1323 1.143 rpaulo * and window probes. 1324 1.143 rpaulo */ 1325 1.143 rpaulo if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && 1326 1.143 rpaulo !(tp->t_force && len == 1)) { 1327 1.170 matt ecn_tos = IPTOS_ECN_ECT0; 1328 1.166 thorpej TCP_STATINC(TCP_STAT_ECN_ECT); 1329 1.143 rpaulo } 1330 1.143 rpaulo 1331 1.143 rpaulo /* 1332 1.143 rpaulo * Reply with proper ECN notifications. 1333 1.143 rpaulo */ 1334 1.143 rpaulo if (tp->t_flags & TF_ECN_SND_CWR) { 1335 1.143 rpaulo flags |= TH_CWR; 1336 1.143 rpaulo tp->t_flags &= ~TF_ECN_SND_CWR; 1337 1.201 maxv } 1338 1.143 rpaulo if (tp->t_flags & TF_ECN_SND_ECE) { 1339 1.143 rpaulo flags |= TH_ECE; 1340 1.143 rpaulo } 1341 1.143 rpaulo } 1342 1.143 rpaulo 1343 1.143 rpaulo /* 1344 1.9 mycroft * If we are doing retransmissions, then snd_nxt will 1345 1.9 mycroft * not reflect the first unsent octet. For ACK only 1346 1.9 mycroft * packets, we do not want the sequence number of the 1347 1.9 mycroft * retransmitted packet, we want the sequence number 1348 1.9 mycroft * of the next unsent octet. So, if there is no data 1349 1.9 mycroft * (and no SYN or FIN), use snd_max instead of snd_nxt 1350 1.9 mycroft * when filling in ti_seq. But if we are in persist 1351 1.9 mycroft * state, snd_max might reflect one byte beyond the 1352 1.9 mycroft * right edge of the window, so use snd_nxt in that 1353 1.9 mycroft * case, since we know we aren't doing a retransmission. 1354 1.9 mycroft * (retransmit and persist are mutually exclusive...) 1355 1.9 mycroft */ 1356 1.118 jonathan if (TCP_SACK_ENABLED(tp) && sack_rxmit) { 1357 1.118 jonathan th->th_seq = htonl(p->rxmit); 1358 1.118 jonathan p->rxmit += len; 1359 1.118 jonathan } else { 1360 1.118 jonathan if (len || (flags & (TH_SYN|TH_FIN)) || 1361 1.118 jonathan TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) 1362 1.118 jonathan th->th_seq = htonl(tp->snd_nxt); 1363 1.118 jonathan else 1364 1.118 jonathan th->th_seq = htonl(tp->snd_max); 1365 1.118 jonathan } 1366 1.48 itojun th->th_ack = htonl(tp->rcv_nxt); 1367 1.1 cgd if (optlen) { 1368 1.205 maxv memcpy(th + 1, opt, optlen); 1369 1.48 itojun th->th_off = (sizeof (struct tcphdr) + optlen) >> 2; 1370 1.1 cgd } 1371 1.48 itojun th->th_flags = flags; 1372 1.1 cgd /* 1373 1.1 cgd * Calculate receive window. Don't shrink window, 1374 1.1 cgd * but avoid silly window syndrome. 1375 1.1 cgd */ 1376 1.18 thorpej if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)rxsegsize) 1377 1.1 cgd win = 0; 1378 1.9 mycroft if (win > (long)TCP_MAXWIN << tp->rcv_scale) 1379 1.9 mycroft win = (long)TCP_MAXWIN << tp->rcv_scale; 1380 1.112 chs if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt)) 1381 1.112 chs win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt); 1382 1.48 itojun th->th_win = htons((u_int16_t) (win>>tp->rcv_scale)); 1383 1.180 he if (th->th_win == 0) { 1384 1.180 he tp->t_sndzerowin++; 1385 1.180 he } 1386 1.1 cgd if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { 1387 1.16 kml u_int32_t urp = tp->snd_up - tp->snd_nxt; 1388 1.16 kml if (urp > IP_MAXPACKET) 1389 1.16 kml urp = IP_MAXPACKET; 1390 1.48 itojun th->th_urp = htons((u_int16_t)urp); 1391 1.48 itojun th->th_flags |= TH_URG; 1392 1.1 cgd } else 1393 1.1 cgd /* 1394 1.1 cgd * If no urgent pointer to send, then we pull 1395 1.1 cgd * the urgent pointer to the left edge of the send window 1396 1.1 cgd * so that it doesn't drift into the send window on sequence 1397 1.1 cgd * number wraparound. 1398 1.1 cgd */ 1399 1.1 cgd tp->snd_up = tp->snd_una; /* drag it along */ 1400 1.1 cgd 1401 1.110 jonathan #ifdef TCP_SIGNATURE 1402 1.113 itojun if (sigoff && (tp->t_flags & TF_SIGNATURE)) { 1403 1.113 itojun struct secasvar *sav; 1404 1.113 itojun u_int8_t *sigp; 1405 1.113 itojun 1406 1.198 maxv sav = tcp_signature_getsav(m); 1407 1.113 itojun if (sav == NULL) { 1408 1.221 rin m_freem(m); 1409 1.201 maxv return EPERM; 1410 1.113 itojun } 1411 1.113 itojun 1412 1.113 itojun m->m_pkthdr.len = hdrlen + len; 1413 1.159 riz sigp = (char *)th + sizeof(*th) + sigoff; 1414 1.159 riz tcp_signature(m, th, (char *)th - mtod(m, char *), sav, sigp); 1415 1.113 itojun 1416 1.113 itojun key_sa_recordxfer(sav, m); 1417 1.197 ozaki KEY_SA_UNREF(&sav); 1418 1.113 itojun } 1419 1.110 jonathan #endif 1420 1.110 jonathan 1421 1.1 cgd /* 1422 1.66 thorpej * Set ourselves up to be checksummed just before the packet 1423 1.130 yamt * hits the wire. 1424 1.1 cgd */ 1425 1.48 itojun switch (af) { 1426 1.48 itojun case AF_INET: 1427 1.125 matt m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1428 1.120 matt if (use_tso) { 1429 1.120 matt m->m_pkthdr.segsz = txsegsize; 1430 1.125 matt m->m_pkthdr.csum_flags = M_CSUM_TSOv4; 1431 1.120 matt } else { 1432 1.130 yamt m->m_pkthdr.csum_flags = M_CSUM_TCPv4; 1433 1.120 matt if (len + optlen) { 1434 1.120 matt /* Fixup the pseudo-header checksum. */ 1435 1.120 matt /* XXXJRT Not IP Jumbogram safe. */ 1436 1.120 matt th->th_sum = in_cksum_addword(th->th_sum, 1437 1.120 matt htons((u_int16_t) (len + optlen))); 1438 1.120 matt } 1439 1.66 thorpej } 1440 1.48 itojun break; 1441 1.48 itojun #ifdef INET6 1442 1.48 itojun case AF_INET6: 1443 1.138 yamt m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1444 1.151 yamt if (use_tso) { 1445 1.151 yamt m->m_pkthdr.segsz = txsegsize; 1446 1.151 yamt m->m_pkthdr.csum_flags = M_CSUM_TSOv6; 1447 1.151 yamt } else { 1448 1.151 yamt m->m_pkthdr.csum_flags = M_CSUM_TCPv6; 1449 1.151 yamt if (len + optlen) { 1450 1.151 yamt /* Fixup the pseudo-header checksum. */ 1451 1.151 yamt /* XXXJRT: Not IPv6 Jumbogram safe. */ 1452 1.151 yamt th->th_sum = in_cksum_addword(th->th_sum, 1453 1.151 yamt htons((u_int16_t) (len + optlen))); 1454 1.151 yamt } 1455 1.66 thorpej } 1456 1.48 itojun break; 1457 1.48 itojun #endif 1458 1.48 itojun } 1459 1.1 cgd 1460 1.1 cgd /* 1461 1.1 cgd * In transmit state, time the transmission and arrange for 1462 1.1 cgd * the retransmit. In persist state, just set snd_max. 1463 1.1 cgd */ 1464 1.38 thorpej if (tp->t_force == 0 || TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) { 1465 1.1 cgd tcp_seq startseq = tp->snd_nxt; 1466 1.1 cgd 1467 1.1 cgd /* 1468 1.1 cgd * Advance snd_nxt over sequence space of this segment. 1469 1.43 mycroft * There are no states in which we send both a SYN and a FIN, 1470 1.43 mycroft * so we collapse the tests for these flags. 1471 1.1 cgd */ 1472 1.43 mycroft if (flags & (TH_SYN|TH_FIN)) 1473 1.43 mycroft tp->snd_nxt++; 1474 1.118 jonathan if (sack_rxmit) 1475 1.118 jonathan goto timer; 1476 1.1 cgd tp->snd_nxt += len; 1477 1.1 cgd if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { 1478 1.1 cgd tp->snd_max = tp->snd_nxt; 1479 1.1 cgd /* 1480 1.1 cgd * Time this transmission if not a retransmission and 1481 1.1 cgd * not currently timing anything. 1482 1.1 cgd */ 1483 1.73 thorpej if (tp->t_rtttime == 0) { 1484 1.73 thorpej tp->t_rtttime = tcp_now; 1485 1.1 cgd tp->t_rtseq = startseq; 1486 1.166 thorpej TCP_STATINC(TCP_STAT_SEGSTIMED); 1487 1.1 cgd } 1488 1.1 cgd } 1489 1.1 cgd 1490 1.1 cgd /* 1491 1.1 cgd * Set retransmit timer if not currently set, 1492 1.1 cgd * and not doing an ack or a keep-alive probe. 1493 1.1 cgd * Initial value for retransmit timer is smoothed 1494 1.1 cgd * round-trip time + 2 * round-trip time variance. 1495 1.1 cgd * Initialize shift counter which is used for backoff 1496 1.1 cgd * of retransmit time. 1497 1.1 cgd */ 1498 1.118 jonathan timer: 1499 1.184 matt if (TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0) { 1500 1.184 matt if ((sack_rxmit && tp->snd_nxt != tp->snd_max) 1501 1.184 matt || tp->snd_nxt != tp->snd_una) { 1502 1.184 matt if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) { 1503 1.184 matt TCP_TIMER_DISARM(tp, TCPT_PERSIST); 1504 1.184 matt tp->t_rxtshift = 0; 1505 1.184 matt } 1506 1.184 matt TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); 1507 1.184 matt } else if (len == 0 && so->so_snd.sb_cc > 0 1508 1.184 matt && TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) { 1509 1.184 matt /* 1510 1.184 matt * If we are sending a window probe and there's 1511 1.184 matt * unacked data in the socket, make sure at 1512 1.184 matt * least the persist timer is running. 1513 1.184 matt */ 1514 1.1 cgd tp->t_rxtshift = 0; 1515 1.184 matt tcp_setpersist(tp); 1516 1.1 cgd } 1517 1.1 cgd } 1518 1.1 cgd } else 1519 1.1 cgd if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) 1520 1.1 cgd tp->snd_max = tp->snd_nxt + len; 1521 1.1 cgd 1522 1.67 abs #ifdef TCP_DEBUG 1523 1.1 cgd /* 1524 1.1 cgd * Trace. 1525 1.1 cgd */ 1526 1.91 itojun if (so->so_options & SO_DEBUG) 1527 1.48 itojun tcp_trace(TA_OUTPUT, tp->t_state, tp, m, 0); 1528 1.67 abs #endif 1529 1.1 cgd 1530 1.1 cgd /* 1531 1.1 cgd * Fill in IP length and desired time to live and 1532 1.1 cgd * send to IP level. There should be a better way 1533 1.1 cgd * to handle ttl and tos; we could keep them in 1534 1.1 cgd * the template, but need a way to checksum without them. 1535 1.1 cgd */ 1536 1.1 cgd m->m_pkthdr.len = hdrlen + len; 1537 1.19 kml 1538 1.48 itojun switch (af) { 1539 1.48 itojun case AF_INET: 1540 1.84 itojun ip->ip_len = htons(m->m_pkthdr.len); 1541 1.137 christos packetlen = m->m_pkthdr.len; 1542 1.215 ozaki if (tp->t_inpcb->inp_af == AF_INET) { 1543 1.216 ozaki ip->ip_ttl = in4p_ip(tp->t_inpcb).ip_ttl; 1544 1.216 ozaki ip->ip_tos = in4p_ip(tp->t_inpcb).ip_tos | ecn_tos; 1545 1.48 itojun } 1546 1.48 itojun #ifdef INET6 1547 1.215 ozaki else if (tp->t_inpcb->inp_af == AF_INET6) { 1548 1.218 ozaki ip->ip_ttl = in6pcb_selecthlim(tp->t_inpcb, NULL); /*XXX*/ 1549 1.170 matt ip->ip_tos = ecn_tos; /*XXX*/ 1550 1.48 itojun } 1551 1.48 itojun #endif 1552 1.48 itojun break; 1553 1.48 itojun #ifdef INET6 1554 1.48 itojun case AF_INET6: 1555 1.137 christos packetlen = m->m_pkthdr.len; 1556 1.48 itojun ip6->ip6_nxt = IPPROTO_TCP; 1557 1.215 ozaki if (tp->t_family == AF_INET6) { 1558 1.53 itojun /* 1559 1.53 itojun * we separately set hoplimit for every segment, since 1560 1.53 itojun * the user might want to change the value via 1561 1.53 itojun * setsockopt. Also, desired default hop limit might 1562 1.53 itojun * be changed via Neighbor Discovery. 1563 1.53 itojun */ 1564 1.218 ozaki ip6->ip6_hlim = in6pcb_selecthlim_rt(tp->t_inpcb); 1565 1.53 itojun } 1566 1.170 matt ip6->ip6_flow |= htonl(ecn_tos << 20); 1567 1.170 matt /* ip6->ip6_flow = ??? (from template) */ 1568 1.48 itojun /* ip6_plen will be filled in ip6_output(). */ 1569 1.48 itojun break; 1570 1.48 itojun #endif 1571 1.137 christos default: /*pacify gcc*/ 1572 1.137 christos packetlen = 0; 1573 1.137 christos break; 1574 1.48 itojun } 1575 1.48 itojun 1576 1.48 itojun switch (af) { 1577 1.48 itojun case AF_INET: 1578 1.48 itojun { 1579 1.48 itojun struct mbuf *opts; 1580 1.48 itojun 1581 1.216 ozaki if (tp->t_inpcb->inp_af == AF_INET) 1582 1.48 itojun opts = tp->t_inpcb->inp_options; 1583 1.48 itojun else 1584 1.48 itojun opts = NULL; 1585 1.48 itojun error = ip_output(m, opts, ro, 1586 1.80 itojun (tp->t_mtudisc ? IP_MTUDISC : 0) | 1587 1.195 ozaki (so->so_options & SO_DONTROUTE), NULL, tp->t_inpcb); 1588 1.48 itojun break; 1589 1.48 itojun } 1590 1.48 itojun #ifdef INET6 1591 1.48 itojun case AF_INET6: 1592 1.48 itojun { 1593 1.48 itojun struct ip6_pktopts *opts; 1594 1.48 itojun 1595 1.216 ozaki if (tp->t_inpcb->inp_af == AF_INET6) 1596 1.216 ozaki opts = in6p_outputopts(tp->t_inpcb); 1597 1.48 itojun else 1598 1.48 itojun opts = NULL; 1599 1.158 dyoung error = ip6_output(m, opts, ro, so->so_options & SO_DONTROUTE, 1600 1.215 ozaki NULL, tp->t_inpcb, NULL); 1601 1.48 itojun break; 1602 1.48 itojun } 1603 1.48 itojun #endif 1604 1.49 itojun default: 1605 1.49 itojun error = EAFNOSUPPORT; 1606 1.49 itojun break; 1607 1.48 itojun } 1608 1.1 cgd if (error) { 1609 1.1 cgd out: 1610 1.1 cgd if (error == ENOBUFS) { 1611 1.166 thorpej TCP_STATINC(TCP_STAT_SELFQUENCH); 1612 1.215 ozaki tcp_quench(tp->t_inpcb); 1613 1.71 thorpej error = 0; 1614 1.219 bouyer } else if ((error == EHOSTUNREACH || error == ENETDOWN || 1615 1.219 bouyer error == EHOSTDOWN) && TCPS_HAVERCVDSYN(tp->t_state)) { 1616 1.1 cgd tp->t_softerror = error; 1617 1.71 thorpej error = 0; 1618 1.1 cgd } 1619 1.71 thorpej 1620 1.199 khorben /* Back out the sequence number advance. */ 1621 1.118 jonathan if (sack_rxmit) 1622 1.118 jonathan p->rxmit -= len; 1623 1.118 jonathan 1624 1.71 thorpej /* Restart the delayed ACK timer, if necessary. */ 1625 1.71 thorpej if (tp->t_flags & TF_DELACK) 1626 1.71 thorpej TCP_RESTART_DELACK(tp); 1627 1.71 thorpej 1628 1.201 maxv return error; 1629 1.1 cgd } 1630 1.137 christos 1631 1.137 christos if (packetlen > tp->t_pmtud_mtu_sent) 1632 1.137 christos tp->t_pmtud_mtu_sent = packetlen; 1633 1.201 maxv 1634 1.166 thorpej tcps = TCP_STAT_GETREF(); 1635 1.220 riastrad _NET_STATINC_REF(tcps, TCP_STAT_SNDTOTAL); 1636 1.23 thorpej if (tp->t_flags & TF_DELACK) 1637 1.220 riastrad _NET_STATINC_REF(tcps, TCP_STAT_DELACK); 1638 1.166 thorpej TCP_STAT_PUTREF(); 1639 1.1 cgd 1640 1.1 cgd /* 1641 1.1 cgd * Data sent (as far as we can tell). 1642 1.1 cgd * If this advertises a larger window than any other segment, 1643 1.1 cgd * then remember the size of the advertised window. 1644 1.1 cgd * Any pending ACK has now been sent. 1645 1.1 cgd */ 1646 1.1 cgd if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) 1647 1.1 cgd tp->rcv_adv = tp->rcv_nxt + win; 1648 1.9 mycroft tp->last_ack_sent = tp->rcv_nxt; 1649 1.26 thorpej tp->t_flags &= ~TF_ACKNOW; 1650 1.26 thorpej TCP_CLEAR_DELACK(tp); 1651 1.44 matt #ifdef DIAGNOSTIC 1652 1.44 matt if (maxburst < 0) 1653 1.44 matt printf("tcp_output: maxburst exceeded by %d\n", -maxburst); 1654 1.44 matt #endif 1655 1.149 rpaulo if (sendalot && (tp->t_congctl == &tcp_reno_ctl || --maxburst)) 1656 1.1 cgd goto again; 1657 1.201 maxv return 0; 1658 1.1 cgd } 1659 1.1 cgd 1660 1.6 mycroft void 1661 1.116 perry tcp_setpersist(struct tcpcb *tp) 1662 1.1 cgd { 1663 1.56 augustss int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2); 1664 1.38 thorpej int nticks; 1665 1.1 cgd 1666 1.38 thorpej if (TCP_TIMER_ISARMED(tp, TCPT_REXMT)) 1667 1.1 cgd panic("tcp_output REXMT"); 1668 1.1 cgd /* 1669 1.1 cgd * Start/restart persistance timer. 1670 1.1 cgd */ 1671 1.30 kml if (t < tp->t_rttmin) 1672 1.30 kml t = tp->t_rttmin; 1673 1.38 thorpej TCPT_RANGESET(nticks, t * tcp_backoff[tp->t_rxtshift], 1674 1.1 cgd TCPTV_PERSMIN, TCPTV_PERSMAX); 1675 1.38 thorpej TCP_TIMER_ARM(tp, TCPT_PERSIST, nticks); 1676 1.1 cgd if (tp->t_rxtshift < TCP_MAXRXTSHIFT) 1677 1.1 cgd tp->t_rxtshift++; 1678 1.1 cgd } 1679