Home | History | Annotate | Line # | Download | only in netinet
tcp_sack.c revision 1.2
      1  1.2      yamt /* $NetBSD: tcp_sack.c,v 1.2 2005/03/06 23:05:20 yamt Exp $ */
      2  1.1  jonathan 
      3  1.1  jonathan /*
      4  1.1  jonathan  * Copyright (c) 2005 The NetBSD Foundation, Inc.
      5  1.1  jonathan  * All rights reserved.
      6  1.1  jonathan  *
      7  1.1  jonathan  * This code is derived from software contributed to The NetBSD Foundation
      8  1.1  jonathan  * by Kentaro A. Kurahone.
      9  1.1  jonathan  *
     10  1.1  jonathan  * Redistribution and use in source and binary forms, with or without
     11  1.1  jonathan  * modification, are permitted provided that the following conditions
     12  1.1  jonathan  * are met:
     13  1.1  jonathan  * 1. Redistributions of source code must retain the above copyright
     14  1.1  jonathan  *    notice, this list of conditions and the following disclaimer.
     15  1.1  jonathan  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.1  jonathan  *    notice, this list of conditions and the following disclaimer in the
     17  1.1  jonathan  *    documentation and/or other materials provided with the distribution.
     18  1.1  jonathan  * 3. All advertising materials mentioning features or use of this software
     19  1.1  jonathan  *    must display the following acknowledgement:
     20  1.1  jonathan  *	This product includes software developed by the NetBSD
     21  1.1  jonathan  *	Foundation, Inc. and its contributors.
     22  1.1  jonathan  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  1.1  jonathan  *    contributors may be used to endorse or promote products derived
     24  1.1  jonathan  *    from this software without specific prior written permission.
     25  1.1  jonathan  *
     26  1.1  jonathan  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  1.1  jonathan  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  1.1  jonathan  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  1.1  jonathan  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  1.1  jonathan  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  1.1  jonathan  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  1.1  jonathan  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  1.1  jonathan  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  1.1  jonathan  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  1.1  jonathan  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  1.1  jonathan  * POSSIBILITY OF SUCH DAMAGE.
     37  1.1  jonathan  */
     38  1.1  jonathan 
     39  1.1  jonathan /*
     40  1.1  jonathan  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
     41  1.1  jonathan  *	The Regents of the University of California.  All rights reserved.
     42  1.1  jonathan  *
     43  1.1  jonathan  * Redistribution and use in source and binary forms, with or without
     44  1.1  jonathan  * modification, are permitted provided that the following conditions
     45  1.1  jonathan  * are met:
     46  1.1  jonathan  * 1. Redistributions of source code must retain the above copyright
     47  1.1  jonathan  *    notice, this list of conditions and the following disclaimer.
     48  1.1  jonathan  * 2. Redistributions in binary form must reproduce the above copyright
     49  1.1  jonathan  *    notice, this list of conditions and the following disclaimer in the
     50  1.1  jonathan  *    documentation and/or other materials provided with the distribution.
     51  1.1  jonathan  * 4. Neither the name of the University nor the names of its contributors
     52  1.1  jonathan  *    may be used to endorse or promote products derived from this software
     53  1.1  jonathan  *    without specific prior written permission.
     54  1.1  jonathan  *
     55  1.1  jonathan  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     56  1.1  jonathan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     57  1.1  jonathan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     58  1.1  jonathan  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     59  1.1  jonathan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     60  1.1  jonathan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     61  1.1  jonathan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     62  1.1  jonathan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     63  1.1  jonathan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     64  1.1  jonathan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     65  1.1  jonathan  * SUCH DAMAGE.
     66  1.1  jonathan  *
     67  1.1  jonathan  *	@(#)tcp_sack.c	8.12 (Berkeley) 5/24/95
     68  1.1  jonathan  * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.2 2004/12/25 23:02:57 rwatson Exp $
     69  1.1  jonathan  */
     70  1.1  jonathan 
     71  1.1  jonathan /*
     72  1.1  jonathan  *	@@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
     73  1.1  jonathan  *
     74  1.1  jonathan  * NRL grants permission for redistribution and use in source and binary
     75  1.1  jonathan  * forms, with or without modification, of the software and documentation
     76  1.1  jonathan  * created at NRL provided that the following conditions are met:
     77  1.1  jonathan  *
     78  1.1  jonathan  * 1. Redistributions of source code must retain the above copyright
     79  1.1  jonathan  *    notice, this list of conditions and the following disclaimer.
     80  1.1  jonathan  * 2. Redistributions in binary form must reproduce the above copyright
     81  1.1  jonathan  *    notice, this list of conditions and the following disclaimer in the
     82  1.1  jonathan  *    documentation and/or other materials provided with the distribution.
     83  1.1  jonathan  * 3. All advertising materials mentioning features or use of this software
     84  1.1  jonathan  *    must display the following acknowledgements:
     85  1.1  jonathan  *	This product includes software developed by the University of
     86  1.1  jonathan  *	California, Berkeley and its contributors.
     87  1.1  jonathan  *	This product includes software developed at the Information
     88  1.1  jonathan  *	Technology Division, US Naval Research Laboratory.
     89  1.1  jonathan  * 4. Neither the name of the NRL nor the names of its contributors
     90  1.1  jonathan  *    may be used to endorse or promote products derived from this software
     91  1.1  jonathan  *    without specific prior written permission.
     92  1.1  jonathan  *
     93  1.1  jonathan  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
     94  1.1  jonathan  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     95  1.1  jonathan  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
     96  1.1  jonathan  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
     97  1.1  jonathan  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     98  1.1  jonathan  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     99  1.1  jonathan  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    100  1.1  jonathan  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    101  1.1  jonathan  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    102  1.1  jonathan  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    103  1.1  jonathan  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    104  1.1  jonathan  *
    105  1.1  jonathan  * The views and conclusions contained in the software and documentation
    106  1.1  jonathan  * are those of the authors and should not be interpreted as representing
    107  1.1  jonathan  * official policies, either expressed or implied, of the US Naval
    108  1.1  jonathan  * Research Laboratory (NRL).
    109  1.1  jonathan  */
    110  1.1  jonathan 
    111  1.1  jonathan #include <sys/cdefs.h>
    112  1.2      yamt __KERNEL_RCSID(0, "$NetBSD: tcp_sack.c,v 1.2 2005/03/06 23:05:20 yamt Exp $");
    113  1.1  jonathan 
    114  1.1  jonathan #include "opt_inet.h"
    115  1.1  jonathan #include "opt_ipsec.h"
    116  1.1  jonathan #include "opt_inet_csum.h"
    117  1.1  jonathan #include "opt_tcp_debug.h"
    118  1.1  jonathan 
    119  1.1  jonathan #include <sys/param.h>
    120  1.1  jonathan #include <sys/systm.h>
    121  1.1  jonathan #include <sys/malloc.h>
    122  1.1  jonathan #include <sys/mbuf.h>
    123  1.1  jonathan #include <sys/protosw.h>
    124  1.1  jonathan #include <sys/socket.h>
    125  1.1  jonathan #include <sys/socketvar.h>
    126  1.1  jonathan #include <sys/errno.h>
    127  1.1  jonathan #include <sys/syslog.h>
    128  1.1  jonathan #include <sys/pool.h>
    129  1.1  jonathan #include <sys/domain.h>
    130  1.1  jonathan #include <sys/kernel.h>
    131  1.1  jonathan 
    132  1.1  jonathan #include <net/if.h>
    133  1.1  jonathan #include <net/route.h>
    134  1.1  jonathan #include <net/if_types.h>
    135  1.1  jonathan 
    136  1.1  jonathan #include <netinet/in.h>
    137  1.1  jonathan #include <netinet/in_systm.h>
    138  1.1  jonathan #include <netinet/ip.h>
    139  1.1  jonathan #include <netinet/in_pcb.h>
    140  1.1  jonathan #include <netinet/in_var.h>
    141  1.1  jonathan #include <netinet/ip_var.h>
    142  1.1  jonathan 
    143  1.1  jonathan #ifdef INET6
    144  1.1  jonathan #ifndef INET
    145  1.1  jonathan #include <netinet/in.h>
    146  1.1  jonathan #endif
    147  1.1  jonathan #include <netinet/ip6.h>
    148  1.1  jonathan #include <netinet6/ip6_var.h>
    149  1.1  jonathan #include <netinet6/in6_pcb.h>
    150  1.1  jonathan #include <netinet6/ip6_var.h>
    151  1.1  jonathan #include <netinet6/in6_var.h>
    152  1.1  jonathan #include <netinet/icmp6.h>
    153  1.1  jonathan #include <netinet6/nd6.h>
    154  1.1  jonathan #endif
    155  1.1  jonathan 
    156  1.1  jonathan #ifndef INET6
    157  1.1  jonathan /* always need ip6.h for IP6_EXTHDR_GET */
    158  1.1  jonathan #include <netinet/ip6.h>
    159  1.1  jonathan #endif
    160  1.1  jonathan 
    161  1.1  jonathan #include <netinet/tcp.h>
    162  1.1  jonathan #include <netinet/tcp_fsm.h>
    163  1.1  jonathan #include <netinet/tcp_seq.h>
    164  1.1  jonathan #include <netinet/tcp_timer.h>
    165  1.1  jonathan #include <netinet/tcp_var.h>
    166  1.1  jonathan #include <netinet/tcpip.h>
    167  1.1  jonathan #include <netinet/tcp_debug.h>
    168  1.1  jonathan 
    169  1.1  jonathan #include <machine/stdarg.h>
    170  1.1  jonathan 
    171  1.1  jonathan #define SEQ_MIN(a, b)	((SEQ_LT(a, b)) ? (a) : (b))
    172  1.1  jonathan #define SEQ_MAX(a, b)	((SEQ_GT(a, b)) ? (a) : (b))
    173  1.1  jonathan 
    174  1.1  jonathan /* SACK block pool. */
    175  1.1  jonathan POOL_INIT(sackhole_pool, sizeof(struct sackhole), 0, 0, 0, "sackholepl", NULL);
    176  1.1  jonathan 
    177  1.1  jonathan void
    178  1.1  jonathan tcp_update_sack_list(struct tcpcb *tp)
    179  1.1  jonathan {
    180  1.1  jonathan 	int i = 0;
    181  1.1  jonathan 	struct ipqent *tiqe = NULL;
    182  1.1  jonathan 
    183  1.1  jonathan 	if (!TCP_SACK_ENABLED(tp) || (tp->t_flags & TF_SIGNATURE)) {
    184  1.1  jonathan 		/* Can't SACK this connection. */
    185  1.1  jonathan 		return;
    186  1.1  jonathan 	}
    187  1.1  jonathan 
    188  1.1  jonathan 	/*
    189  1.1  jonathan 	 * If possible, tack on the D-SACK block. (RFC2883)
    190  1.1  jonathan 	 */
    191  1.1  jonathan 	if (tp->rcv_sack_flags & TCPSACK_HAVED) {
    192  1.1  jonathan 		tp->rcv_sack_block[0].left = tp->rcv_dsack_block.left;
    193  1.1  jonathan 		tp->rcv_sack_block[0].right = tp->rcv_dsack_block.right;
    194  1.1  jonathan 		tp->rcv_sack_flags &= ~TCPSACK_HAVED;
    195  1.1  jonathan 		i++;
    196  1.1  jonathan 	}
    197  1.1  jonathan 
    198  1.1  jonathan 	/*
    199  1.1  jonathan 	 * Build up a list of holes in the TCP space.  Note that
    200  1.1  jonathan 	 * the first SACK block is always the most recent segment
    201  1.1  jonathan 	 * received.
    202  1.1  jonathan 	 */
    203  1.1  jonathan 	TAILQ_FOREACH(tiqe, &tp->timeq, ipqe_timeq) {
    204  1.1  jonathan 		tp->rcv_sack_block[i].left = tiqe->ipqe_seq;
    205  1.1  jonathan 		tp->rcv_sack_block[i].right = tiqe->ipqe_seq + tiqe->ipqe_len;
    206  1.1  jonathan 		i++;
    207  1.1  jonathan 		if (i >= TCP_SACK_MAX) {
    208  1.1  jonathan 			break;
    209  1.1  jonathan 		}
    210  1.1  jonathan 	}
    211  1.1  jonathan 
    212  1.1  jonathan 	/* If we can SACK, do so. */
    213  1.1  jonathan 	tp->rcv_sack_num = i;
    214  1.1  jonathan }
    215  1.1  jonathan 
    216  1.1  jonathan void
    217  1.1  jonathan tcp_new_dsack(struct tcpcb *tp, tcp_seq seq, u_int32_t len)
    218  1.1  jonathan {
    219  1.1  jonathan 	if (TCP_SACK_ENABLED(tp)) {
    220  1.1  jonathan 		tp->rcv_dsack_block.left = seq;
    221  1.1  jonathan 		tp->rcv_dsack_block.right = seq + len;
    222  1.1  jonathan 		tp->rcv_sack_flags |= TCPSACK_HAVED;
    223  1.1  jonathan 	}
    224  1.1  jonathan }
    225  1.1  jonathan 
    226  1.1  jonathan void
    227  1.1  jonathan tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
    228  1.1  jonathan {
    229  1.1  jonathan 	struct sackblk t_sack_block[TCP_SACK_MAX];
    230  1.1  jonathan 	struct sackblk *sack = NULL;
    231  1.1  jonathan 	struct sackhole *cur = NULL;
    232  1.1  jonathan 	struct sackhole *tmp = NULL;
    233  1.1  jonathan 	u_int32_t *lp = (u_int32_t *) (cp + 2);
    234  1.1  jonathan 	int i, j, num_sack_blks;
    235  1.1  jonathan 	tcp_seq left, right, acked;
    236  1.1  jonathan 
    237  1.1  jonathan 	/*
    238  1.1  jonathan 	 * If we aren't processing SACK responses, or the peer
    239  1.1  jonathan 	 * sends us a sack option with invalid length, don't
    240  1.1  jonathan 	 * update the scoreboard.
    241  1.1  jonathan 	 */
    242  1.1  jonathan 	if (!TCP_SACK_ENABLED(tp) ||
    243  1.1  jonathan 			(optlen % 8 != 2 || optlen < 10)) {
    244  1.1  jonathan 		return;
    245  1.1  jonathan 	}
    246  1.1  jonathan 
    247  1.1  jonathan 	/*
    248  1.1  jonathan 	 * Extract SACK blocks.
    249  1.1  jonathan 	 *
    250  1.1  jonathan 	 * Note that t_sack_block is sorted so that we only need to do
    251  1.1  jonathan 	 * one pass over the sequence number space. (SACK "fast-path")
    252  1.1  jonathan 	 */
    253  1.1  jonathan 	num_sack_blks = optlen / 8;
    254  1.1  jonathan 	acked = (SEQ_GT(th->th_ack, tp->snd_una)) ? th->th_ack : tp->snd_una;
    255  1.1  jonathan 	for (i = 0; i < num_sack_blks; i++, lp += 2) {
    256  1.1  jonathan 		left = ntohl(*lp);
    257  1.1  jonathan 		right = ntohl(*(lp + 1));
    258  1.1  jonathan 
    259  1.1  jonathan 		if ((SEQ_LEQ(right, acked)) ||
    260  1.1  jonathan 				SEQ_GEQ(left, tp->snd_max) ||
    261  1.1  jonathan 				SEQ_GEQ(left, right)) {
    262  1.1  jonathan 			/* SACK entry that's old, or invalid. */
    263  1.1  jonathan 			i--;
    264  1.1  jonathan 			num_sack_blks--;
    265  1.1  jonathan 			continue;
    266  1.1  jonathan 		}
    267  1.1  jonathan 
    268  1.1  jonathan 		/* Insertion sort. */
    269  1.2      yamt 		for (j = i; (j > 0) && SEQ_LT(left, t_sack_block[j - 1].left);
    270  1.2      yamt 		    j--) {
    271  1.1  jonathan 			t_sack_block[j].left = t_sack_block[j - 1].left;
    272  1.1  jonathan 			t_sack_block[j].right = t_sack_block[j - 1].right;
    273  1.1  jonathan 		}
    274  1.1  jonathan 		t_sack_block[j].left = left;
    275  1.1  jonathan 		t_sack_block[j].right = right;
    276  1.1  jonathan 	}
    277  1.1  jonathan 
    278  1.1  jonathan 	/* Update the scoreboard. */
    279  1.1  jonathan 	cur = TAILQ_FIRST(&tp->snd_holes);
    280  1.1  jonathan 	for (i = 0; i < num_sack_blks; i++) {
    281  1.1  jonathan 		sack = &t_sack_block[i];
    282  1.1  jonathan 		/*
    283  1.1  jonathan 		 * FACK TCP.  Update snd_fack so we can enter Fast
    284  1.1  jonathan 		 * Recovery early.
    285  1.1  jonathan 		 */
    286  1.1  jonathan 		if (SEQ_GEQ(sack->right, tp->snd_fack))
    287  1.1  jonathan 			tp->snd_fack = sack->right;
    288  1.1  jonathan 
    289  1.1  jonathan 		if (TAILQ_EMPTY(&tp->snd_holes)) {
    290  1.1  jonathan 			/* First hole. */
    291  1.2      yamt 			cur = (struct sackhole *)
    292  1.2      yamt 			    pool_get(&sackhole_pool, PR_NOWAIT);
    293  1.1  jonathan 			if (cur == NULL) {
    294  1.1  jonathan 				/* ENOBUFS, bail out*/
    295  1.1  jonathan 				return;
    296  1.1  jonathan 			}
    297  1.1  jonathan 			cur->start = th->th_ack;
    298  1.1  jonathan 			cur->end = sack->left;
    299  1.1  jonathan 			cur->rxmit = cur->start;
    300  1.1  jonathan 			tp->rcv_lastsack = sack->right;
    301  1.1  jonathan 			TAILQ_INSERT_HEAD(&tp->snd_holes, cur, sackhole_q);
    302  1.1  jonathan 			continue; /* With next sack block */
    303  1.1  jonathan 		}
    304  1.1  jonathan 
    305  1.1  jonathan 		/* Go through the list of holes. */
    306  1.1  jonathan 		while (cur) {
    307  1.1  jonathan 			if (SEQ_LEQ(sack->left, cur->start))
    308  1.1  jonathan 				/* SACKs data before the current hole */
    309  1.1  jonathan 				break; /* No use going through more holes */
    310  1.1  jonathan 
    311  1.1  jonathan 			if (SEQ_GEQ(sack->left, cur->end)) {
    312  1.1  jonathan 				/* SACKs data beyond the current hole */
    313  1.1  jonathan 				cur = TAILQ_NEXT(cur, sackhole_q);
    314  1.1  jonathan 				continue;
    315  1.1  jonathan 			}
    316  1.1  jonathan 
    317  1.1  jonathan 			if (SEQ_LEQ(sack->left, cur->start)) {
    318  1.1  jonathan 				/* Data acks at least the beginning of hole */
    319  1.1  jonathan 				if (SEQ_GEQ(sack->right, cur->end)) {
    320  1.1  jonathan 					/* Acks entire hole, so delete hole */
    321  1.1  jonathan 					tmp = cur;
    322  1.1  jonathan 					cur = TAILQ_NEXT(cur, sackhole_q);
    323  1.2      yamt 					TAILQ_REMOVE(&tp->snd_holes, tmp,
    324  1.2      yamt 					    sackhole_q);
    325  1.1  jonathan 					pool_put(&sackhole_pool, tmp);
    326  1.1  jonathan 					break;
    327  1.1  jonathan 				}
    328  1.1  jonathan 
    329  1.1  jonathan 				/* Otherwise, move start of hole forward */
    330  1.1  jonathan 				cur->start = sack->right;
    331  1.1  jonathan 				cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
    332  1.1  jonathan 				cur = TAILQ_NEXT(cur, sackhole_q);
    333  1.1  jonathan 				break;
    334  1.1  jonathan 			}
    335  1.1  jonathan 
    336  1.1  jonathan 			if (SEQ_GEQ(sack->right, cur->end)) {
    337  1.1  jonathan 				/* Move end of hole backward. */
    338  1.1  jonathan 				cur->end = sack->left;
    339  1.1  jonathan 				cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
    340  1.1  jonathan 				cur = TAILQ_NEXT(cur, sackhole_q);
    341  1.1  jonathan 				break;
    342  1.1  jonathan 			}
    343  1.1  jonathan 
    344  1.1  jonathan 			if (SEQ_LT(cur->start, sack->left) &&
    345  1.1  jonathan 			    SEQ_GT(cur->end, sack->right)) {
    346  1.1  jonathan 				/*
    347  1.1  jonathan 				 * ACKs some data in middle of a hole; need to
    348  1.1  jonathan 				 * split current hole
    349  1.1  jonathan 				 */
    350  1.1  jonathan 				tmp = (struct sackhole *)
    351  1.2      yamt 				    pool_get(&sackhole_pool, PR_NOWAIT);
    352  1.1  jonathan 				if (tmp == NULL) {
    353  1.1  jonathan 					/* ENOBUFS, bail out. */
    354  1.1  jonathan 					return;
    355  1.1  jonathan 				}
    356  1.1  jonathan 				tmp->start = sack->right;
    357  1.1  jonathan 				tmp->end = cur->end;
    358  1.1  jonathan 				tmp->rxmit = SEQ_MAX(cur->rxmit, tmp->start);
    359  1.1  jonathan 				cur->end = sack->left;
    360  1.1  jonathan 				cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
    361  1.1  jonathan 				TAILQ_INSERT_AFTER(&tp->snd_holes, cur, tmp,
    362  1.1  jonathan 						sackhole_q);
    363  1.1  jonathan 				cur = TAILQ_NEXT(tmp, sackhole_q);
    364  1.1  jonathan 				break;
    365  1.1  jonathan 			}
    366  1.1  jonathan 		}
    367  1.1  jonathan 
    368  1.1  jonathan 		/* At this point, we have reached the tail of the list. */
    369  1.1  jonathan 		if (SEQ_LT(tp->rcv_lastsack, sack->left)) {
    370  1.1  jonathan 			/*
    371  1.1  jonathan 			 * Need to append new hole at end.
    372  1.1  jonathan 			 */
    373  1.1  jonathan 			tmp = (struct sackhole *)
    374  1.1  jonathan 				pool_get(&sackhole_pool, PR_NOWAIT);
    375  1.1  jonathan 			if (tmp == NULL)
    376  1.1  jonathan 				continue; /* ENOBUFS */
    377  1.1  jonathan 			tmp->start = tp->rcv_lastsack;
    378  1.1  jonathan 			tmp->end = sack->left;
    379  1.1  jonathan 			tmp->rxmit = tmp->start;
    380  1.1  jonathan 			tp->rcv_lastsack = sack->right;
    381  1.1  jonathan 			TAILQ_INSERT_TAIL(&tp->snd_holes, tmp, sackhole_q);
    382  1.1  jonathan 			cur = tmp;
    383  1.1  jonathan 		}
    384  1.1  jonathan 	}
    385  1.1  jonathan }
    386  1.1  jonathan 
    387  1.1  jonathan void
    388  1.1  jonathan tcp_del_sackholes(struct tcpcb *tp, struct tcphdr *th)
    389  1.1  jonathan {
    390  1.1  jonathan 	/* Max because this could be an older ack that just arrived. */
    391  1.1  jonathan 	tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
    392  1.1  jonathan 		th->th_ack : tp->snd_una;
    393  1.1  jonathan 	struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
    394  1.1  jonathan 	struct sackhole *tmp;
    395  1.1  jonathan 
    396  1.1  jonathan 	while (cur) {
    397  1.1  jonathan 		if (SEQ_LEQ(cur->end, lastack)) {
    398  1.1  jonathan 			tmp = cur;
    399  1.1  jonathan 			cur = TAILQ_NEXT(cur, sackhole_q);
    400  1.1  jonathan 			TAILQ_REMOVE(&tp->snd_holes, tmp, sackhole_q);
    401  1.1  jonathan 			pool_put(&sackhole_pool, tmp);
    402  1.1  jonathan 		} else if (SEQ_LT(cur->start, lastack)) {
    403  1.1  jonathan 			cur->start = lastack;
    404  1.1  jonathan 			if (SEQ_LT(cur->rxmit, cur->start))
    405  1.1  jonathan 				cur->rxmit = cur->start;
    406  1.1  jonathan 			break;
    407  1.1  jonathan 		} else
    408  1.1  jonathan 			break;
    409  1.1  jonathan 
    410  1.1  jonathan 	}
    411  1.1  jonathan }
    412  1.1  jonathan 
    413  1.1  jonathan void
    414  1.1  jonathan tcp_free_sackholes(struct tcpcb *tp)
    415  1.1  jonathan {
    416  1.1  jonathan 	struct sackhole *sack;
    417  1.1  jonathan 
    418  1.1  jonathan 	/* Free up the SACK hole list. */
    419  1.1  jonathan 	while (!TAILQ_EMPTY(&tp->snd_holes)) {
    420  1.1  jonathan 		sack = TAILQ_FIRST(&tp->snd_holes);
    421  1.1  jonathan 		TAILQ_REMOVE(&tp->snd_holes, sack, sackhole_q);
    422  1.1  jonathan 		pool_put(&sackhole_pool, sack);
    423  1.1  jonathan 	}
    424  1.1  jonathan }
    425  1.1  jonathan 
    426  1.1  jonathan /*
    427  1.1  jonathan  * Implements the SACK response to a new ack, checking for partial acks
    428  1.1  jonathan  * in fast recovery.
    429  1.1  jonathan  */
    430  1.1  jonathan void
    431  1.1  jonathan tcp_sack_newack(struct tcpcb *tp, struct tcphdr *th)
    432  1.1  jonathan {
    433  1.1  jonathan 	if (tp->t_partialacks < 0) {
    434  1.1  jonathan 		/*
    435  1.1  jonathan 		 * Not in fast recovery.  Reset the duplicate ack
    436  1.1  jonathan 		 * counter.
    437  1.1  jonathan 		 */
    438  1.1  jonathan 		tp->t_dupacks = 0;
    439  1.1  jonathan 	} else if (SEQ_LT(th->th_ack, tp->snd_recover)) {
    440  1.1  jonathan 		/*
    441  1.1  jonathan 		 * Partial ack handling within a sack recovery episode.
    442  1.1  jonathan 		 * Keeping this very simple for now. When a partial ack
    443  1.1  jonathan 		 * is received, force snd_cwnd to a value that will allow
    444  1.1  jonathan 		 * the sender to transmit no more than 2 segments.
    445  1.1  jonathan 		 * If necessary, a fancier scheme can be adopted at a
    446  1.1  jonathan 		 * later point, but for now, the goal is to prevent the
    447  1.1  jonathan 		 * sender from bursting a large amount of data in the midst
    448  1.1  jonathan 		 * of sack recovery.
    449  1.1  jonathan 		 */
    450  1.1  jonathan 		int num_segs = 1;
    451  1.1  jonathan 		int sack_bytes_rxmt = 0;
    452  1.1  jonathan 
    453  1.1  jonathan 		tp->t_partialacks++;
    454  1.1  jonathan 		TCP_TIMER_DISARM(tp, TCPT_REXMT);
    455  1.1  jonathan 		tp->t_rtttime = 0;
    456  1.1  jonathan 
    457  1.2      yamt 	 	/*
    458  1.2      yamt 		 * send one or 2 segments based on how much new data was acked
    459  1.2      yamt 		 */
    460  1.1  jonathan  		if (((th->th_ack - tp->snd_una) / tp->t_segsz) > 2)
    461  1.1  jonathan  			num_segs = 2;
    462  1.1  jonathan 	 	(void)tcp_sack_output(tp, &sack_bytes_rxmt);
    463  1.2      yamt  		tp->snd_cwnd = sack_bytes_rxmt +
    464  1.2      yamt 		    (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_segsz;
    465  1.1  jonathan   		tp->t_flags |= TF_ACKNOW;
    466  1.1  jonathan 	  	(void) tcp_output(tp);
    467  1.1  jonathan 	} else {
    468  1.1  jonathan 		/*
    469  1.1  jonathan 		 * Complete ack, inflate the congestion window to
    470  1.1  jonathan                  * ssthresh and exit fast recovery.
    471  1.1  jonathan 		 *
    472  1.1  jonathan 		 * Window inflation should have left us with approx.
    473  1.1  jonathan 		 * snd_ssthresh outstanding data.  But in case we
    474  1.1  jonathan 		 * would be inclined to send a burst, better to do
    475  1.1  jonathan 		 * it via the slow start mechanism.
    476  1.1  jonathan 		 */
    477  1.1  jonathan 		if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh)
    478  1.1  jonathan 			tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack)
    479  1.1  jonathan 			    + tp->t_segsz;
    480  1.1  jonathan 		else
    481  1.1  jonathan 			tp->snd_cwnd = tp->snd_ssthresh;
    482  1.1  jonathan 		tp->t_partialacks = -1;
    483  1.1  jonathan 		tp->t_dupacks = 0;
    484  1.1  jonathan 		if (SEQ_GT(th->th_ack, tp->snd_fack))
    485  1.1  jonathan 			tp->snd_fack = th->th_ack;
    486  1.1  jonathan 	}
    487  1.1  jonathan }
    488  1.1  jonathan 
    489  1.1  jonathan /*
    490  1.1  jonathan  * Returns pointer to a sackhole if there are any pending retransmissions;
    491  1.1  jonathan  * NULL otherwise.
    492  1.1  jonathan  */
    493  1.1  jonathan struct sackhole *
    494  1.1  jonathan tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
    495  1.1  jonathan {
    496  1.1  jonathan 	struct sackhole *cur = NULL;
    497  1.1  jonathan 
    498  1.1  jonathan 	if(!TCP_SACK_ENABLED(tp))
    499  1.1  jonathan 		return (NULL);
    500  1.1  jonathan 
    501  1.1  jonathan 	*sack_bytes_rexmt = 0;
    502  1.1  jonathan 	TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
    503  1.1  jonathan 		if (SEQ_LT(cur->rxmit, cur->end)) {
    504  1.2      yamt 			if (SEQ_LT(cur->rxmit, tp->snd_una)) {
    505  1.2      yamt 				/* old SACK hole */
    506  1.1  jonathan 				continue;
    507  1.1  jonathan 			}
    508  1.1  jonathan 			*sack_bytes_rexmt += (cur->rxmit - cur->start);
    509  1.1  jonathan 			break;
    510  1.1  jonathan 		}
    511  1.1  jonathan 		*sack_bytes_rexmt += (cur->rxmit - cur->start);
    512  1.1  jonathan 	}
    513  1.1  jonathan 
    514  1.1  jonathan 	return (cur);
    515  1.1  jonathan }
    516  1.1  jonathan 
    517  1.1  jonathan /*
    518  1.1  jonathan  * After a timeout, the SACK list may be rebuilt.  This SACK information
    519  1.1  jonathan  * should be used to avoid retransmitting SACKed data.  This function
    520  1.1  jonathan  * traverses the SACK list to see if snd_nxt should be moved forward.
    521  1.1  jonathan  */
    522  1.1  jonathan void
    523  1.1  jonathan tcp_sack_adjust(struct tcpcb *tp)
    524  1.1  jonathan {
    525  1.1  jonathan 	struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
    526  1.1  jonathan 	struct sackhole *n = NULL;
    527  1.1  jonathan 
    528  1.1  jonathan 	if (TAILQ_EMPTY(&tp->snd_holes))
    529  1.1  jonathan 		return; /* No holes */
    530  1.1  jonathan 	if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
    531  1.1  jonathan 		return; /* We're already beyond any SACKed blocks */
    532  1.1  jonathan 
    533  1.1  jonathan 	/*
    534  1.1  jonathan 	 * Two cases for which we want to advance snd_nxt:
    535  1.1  jonathan 	 * i) snd_nxt lies between end of one hole and beginning of another
    536  1.1  jonathan 	 * ii) snd_nxt lies between end of last hole and rcv_lastsack
    537  1.1  jonathan 	 */
    538  1.1  jonathan 	while ((n = TAILQ_NEXT(cur, sackhole_q)) != NULL) {
    539  1.1  jonathan 		if (SEQ_LT(tp->snd_nxt, cur->end))
    540  1.1  jonathan 			return;
    541  1.1  jonathan 		if (SEQ_GEQ(tp->snd_nxt, n->start))
    542  1.1  jonathan 			cur = n;
    543  1.1  jonathan 		else {
    544  1.1  jonathan 			tp->snd_nxt = n->start;
    545  1.1  jonathan 			return;
    546  1.1  jonathan 		}
    547  1.1  jonathan 	}
    548  1.1  jonathan 	if (SEQ_LT(tp->snd_nxt, cur->end))
    549  1.1  jonathan 		return;
    550  1.1  jonathan 	tp->snd_nxt = tp->rcv_lastsack;
    551  1.1  jonathan 
    552  1.1  jonathan 	return;
    553  1.1  jonathan }
    554