Home | History | Annotate | Line # | Download | only in netinet
tcp_sack.c revision 1.12
      1  1.11  kurahone /* $NetBSD: tcp_sack.c,v 1.12 2005/04/05 01:07:17 kurahone Exp $ */
      2   1.1  jonathan 
      3   1.1  jonathan /*
      4   1.1  jonathan  * Copyright (c) 2005 The NetBSD Foundation, Inc.
      5   1.1  jonathan  * All rights reserved.
      6   1.1  jonathan  *
      7   1.1  jonathan  * This code is derived from software contributed to The NetBSD Foundation
      8   1.1  jonathan  * by Kentaro A. Kurahone.
      9   1.1  jonathan  *
     10   1.1  jonathan  * Redistribution and use in source and binary forms, with or without
     11   1.1  jonathan  * modification, are permitted provided that the following conditions
     12   1.1  jonathan  * are met:
     13   1.1  jonathan  * 1. Redistributions of source code must retain the above copyright
     14   1.1  jonathan  *    notice, this list of conditions and the following disclaimer.
     15   1.1  jonathan  * 2. Redistributions in binary form must reproduce the above copyright
     16   1.1  jonathan  *    notice, this list of conditions and the following disclaimer in the
     17   1.1  jonathan  *    documentation and/or other materials provided with the distribution.
     18   1.1  jonathan  * 3. All advertising materials mentioning features or use of this software
     19   1.1  jonathan  *    must display the following acknowledgement:
     20   1.1  jonathan  *	This product includes software developed by the NetBSD
     21   1.1  jonathan  *	Foundation, Inc. and its contributors.
     22   1.1  jonathan  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23   1.1  jonathan  *    contributors may be used to endorse or promote products derived
     24   1.1  jonathan  *    from this software without specific prior written permission.
     25   1.1  jonathan  *
     26   1.1  jonathan  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27   1.1  jonathan  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28   1.1  jonathan  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29   1.1  jonathan  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30   1.1  jonathan  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31   1.1  jonathan  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32   1.1  jonathan  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33   1.1  jonathan  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34   1.1  jonathan  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35   1.1  jonathan  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36   1.1  jonathan  * POSSIBILITY OF SUCH DAMAGE.
     37   1.1  jonathan  */
     38   1.1  jonathan 
     39   1.1  jonathan /*
     40   1.1  jonathan  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
     41   1.1  jonathan  *	The Regents of the University of California.  All rights reserved.
     42   1.1  jonathan  *
     43   1.1  jonathan  * Redistribution and use in source and binary forms, with or without
     44   1.1  jonathan  * modification, are permitted provided that the following conditions
     45   1.1  jonathan  * are met:
     46   1.1  jonathan  * 1. Redistributions of source code must retain the above copyright
     47   1.1  jonathan  *    notice, this list of conditions and the following disclaimer.
     48   1.1  jonathan  * 2. Redistributions in binary form must reproduce the above copyright
     49   1.1  jonathan  *    notice, this list of conditions and the following disclaimer in the
     50   1.1  jonathan  *    documentation and/or other materials provided with the distribution.
     51   1.1  jonathan  * 4. Neither the name of the University nor the names of its contributors
     52   1.1  jonathan  *    may be used to endorse or promote products derived from this software
     53   1.1  jonathan  *    without specific prior written permission.
     54   1.1  jonathan  *
     55   1.1  jonathan  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     56   1.1  jonathan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     57   1.1  jonathan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     58   1.1  jonathan  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     59   1.1  jonathan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     60   1.1  jonathan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     61   1.1  jonathan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     62   1.1  jonathan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     63   1.1  jonathan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     64   1.1  jonathan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     65   1.1  jonathan  * SUCH DAMAGE.
     66   1.1  jonathan  *
     67   1.1  jonathan  *	@(#)tcp_sack.c	8.12 (Berkeley) 5/24/95
     68   1.1  jonathan  * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.2 2004/12/25 23:02:57 rwatson Exp $
     69   1.1  jonathan  */
     70   1.1  jonathan 
     71   1.1  jonathan /*
     72   1.1  jonathan  *	@@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
     73   1.1  jonathan  *
     74   1.1  jonathan  * NRL grants permission for redistribution and use in source and binary
     75   1.1  jonathan  * forms, with or without modification, of the software and documentation
     76   1.1  jonathan  * created at NRL provided that the following conditions are met:
     77   1.1  jonathan  *
     78   1.1  jonathan  * 1. Redistributions of source code must retain the above copyright
     79   1.1  jonathan  *    notice, this list of conditions and the following disclaimer.
     80   1.1  jonathan  * 2. Redistributions in binary form must reproduce the above copyright
     81   1.1  jonathan  *    notice, this list of conditions and the following disclaimer in the
     82   1.1  jonathan  *    documentation and/or other materials provided with the distribution.
     83   1.1  jonathan  * 3. All advertising materials mentioning features or use of this software
     84   1.1  jonathan  *    must display the following acknowledgements:
     85   1.1  jonathan  *	This product includes software developed by the University of
     86   1.1  jonathan  *	California, Berkeley and its contributors.
     87   1.1  jonathan  *	This product includes software developed at the Information
     88   1.1  jonathan  *	Technology Division, US Naval Research Laboratory.
     89   1.1  jonathan  * 4. Neither the name of the NRL nor the names of its contributors
     90   1.1  jonathan  *    may be used to endorse or promote products derived from this software
     91   1.1  jonathan  *    without specific prior written permission.
     92   1.1  jonathan  *
     93   1.1  jonathan  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
     94   1.1  jonathan  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     95   1.1  jonathan  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
     96   1.1  jonathan  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
     97   1.1  jonathan  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     98   1.1  jonathan  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     99   1.1  jonathan  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    100   1.1  jonathan  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    101   1.1  jonathan  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    102   1.1  jonathan  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    103   1.1  jonathan  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    104   1.1  jonathan  *
    105   1.1  jonathan  * The views and conclusions contained in the software and documentation
    106   1.1  jonathan  * are those of the authors and should not be interpreted as representing
    107   1.1  jonathan  * official policies, either expressed or implied, of the US Naval
    108   1.1  jonathan  * Research Laboratory (NRL).
    109   1.1  jonathan  */
    110   1.1  jonathan 
    111   1.1  jonathan #include <sys/cdefs.h>
    112  1.11  kurahone __KERNEL_RCSID(0, "$NetBSD: tcp_sack.c,v 1.12 2005/04/05 01:07:17 kurahone Exp $");
    113   1.1  jonathan 
    114   1.1  jonathan #include "opt_inet.h"
    115   1.1  jonathan #include "opt_ipsec.h"
    116   1.1  jonathan #include "opt_inet_csum.h"
    117   1.1  jonathan #include "opt_tcp_debug.h"
    118   1.1  jonathan 
    119   1.1  jonathan #include <sys/param.h>
    120   1.1  jonathan #include <sys/systm.h>
    121   1.1  jonathan #include <sys/malloc.h>
    122   1.1  jonathan #include <sys/mbuf.h>
    123   1.1  jonathan #include <sys/protosw.h>
    124   1.1  jonathan #include <sys/socket.h>
    125   1.1  jonathan #include <sys/socketvar.h>
    126   1.1  jonathan #include <sys/errno.h>
    127   1.1  jonathan #include <sys/syslog.h>
    128   1.1  jonathan #include <sys/pool.h>
    129   1.1  jonathan #include <sys/domain.h>
    130   1.1  jonathan #include <sys/kernel.h>
    131   1.1  jonathan 
    132   1.1  jonathan #include <net/if.h>
    133   1.1  jonathan #include <net/route.h>
    134   1.1  jonathan #include <net/if_types.h>
    135   1.1  jonathan 
    136   1.1  jonathan #include <netinet/in.h>
    137   1.1  jonathan #include <netinet/in_systm.h>
    138   1.1  jonathan #include <netinet/ip.h>
    139   1.1  jonathan #include <netinet/in_pcb.h>
    140   1.1  jonathan #include <netinet/in_var.h>
    141   1.1  jonathan #include <netinet/ip_var.h>
    142   1.1  jonathan 
    143   1.1  jonathan #ifdef INET6
    144   1.1  jonathan #ifndef INET
    145   1.1  jonathan #include <netinet/in.h>
    146   1.1  jonathan #endif
    147   1.1  jonathan #include <netinet/ip6.h>
    148   1.1  jonathan #include <netinet6/ip6_var.h>
    149   1.1  jonathan #include <netinet6/in6_pcb.h>
    150   1.1  jonathan #include <netinet6/ip6_var.h>
    151   1.1  jonathan #include <netinet6/in6_var.h>
    152   1.1  jonathan #include <netinet/icmp6.h>
    153   1.1  jonathan #include <netinet6/nd6.h>
    154   1.1  jonathan #endif
    155   1.1  jonathan 
    156   1.1  jonathan #ifndef INET6
    157   1.1  jonathan /* always need ip6.h for IP6_EXTHDR_GET */
    158   1.1  jonathan #include <netinet/ip6.h>
    159   1.1  jonathan #endif
    160   1.1  jonathan 
    161   1.1  jonathan #include <netinet/tcp.h>
    162   1.1  jonathan #include <netinet/tcp_fsm.h>
    163   1.1  jonathan #include <netinet/tcp_seq.h>
    164   1.1  jonathan #include <netinet/tcp_timer.h>
    165   1.1  jonathan #include <netinet/tcp_var.h>
    166   1.1  jonathan #include <netinet/tcpip.h>
    167   1.1  jonathan #include <netinet/tcp_debug.h>
    168   1.1  jonathan 
    169   1.1  jonathan #include <machine/stdarg.h>
    170   1.1  jonathan 
    171   1.1  jonathan /* SACK block pool. */
    172   1.1  jonathan POOL_INIT(sackhole_pool, sizeof(struct sackhole), 0, 0, 0, "sackholepl", NULL);
    173   1.1  jonathan 
    174   1.1  jonathan void
    175   1.1  jonathan tcp_new_dsack(struct tcpcb *tp, tcp_seq seq, u_int32_t len)
    176   1.1  jonathan {
    177   1.1  jonathan 	if (TCP_SACK_ENABLED(tp)) {
    178   1.1  jonathan 		tp->rcv_dsack_block.left = seq;
    179   1.1  jonathan 		tp->rcv_dsack_block.right = seq + len;
    180   1.1  jonathan 		tp->rcv_sack_flags |= TCPSACK_HAVED;
    181   1.1  jonathan 	}
    182   1.1  jonathan }
    183   1.1  jonathan 
    184   1.1  jonathan void
    185   1.1  jonathan tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
    186   1.1  jonathan {
    187   1.5      yamt 	struct sackblk
    188   1.5      yamt 	    t_sack_block[(MAX_TCPOPTLEN - 2) / (sizeof(u_int32_t) * 2)];
    189   1.1  jonathan 	struct sackblk *sack = NULL;
    190   1.1  jonathan 	struct sackhole *cur = NULL;
    191   1.1  jonathan 	struct sackhole *tmp = NULL;
    192   1.1  jonathan 	u_int32_t *lp = (u_int32_t *) (cp + 2);
    193   1.1  jonathan 	int i, j, num_sack_blks;
    194   1.1  jonathan 	tcp_seq left, right, acked;
    195   1.1  jonathan 
    196   1.1  jonathan 	/*
    197  1.11  kurahone 	 * If we aren't processing SACK responses, this is not an ACK
    198  1.11  kurahone 	 * or the peer sends us a sack option with invalid length, don't
    199   1.1  jonathan 	 * update the scoreboard.
    200   1.1  jonathan 	 */
    201  1.11  kurahone 	if (!TCP_SACK_ENABLED(tp) || ((th->th_flags & TH_ACK) == 0) ||
    202  1.11  kurahone 			(optlen % 8 != 2 || optlen < 10)) {
    203   1.1  jonathan 		return;
    204   1.1  jonathan 	}
    205   1.1  jonathan 
    206  1.12  kurahone 	/*
    207  1.12  kurahone 	 * If we don't want any SACK holes to be allocated, just return.
    208  1.12  kurahone 	 */
    209  1.12  kurahone 	if (tcp_sack_globalmaxholes == 0 || tcp_sack_tp_maxholes == 0) {
    210  1.12  kurahone 		return;
    211  1.12  kurahone 	}
    212  1.12  kurahone 
    213  1.11  kurahone 	/* If the ACK is outside [snd_una, snd_max], ignore the SACK options. */
    214  1.11  kurahone 	if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))
    215  1.11  kurahone 		return;
    216  1.11  kurahone 
    217   1.1  jonathan 	/*
    218   1.1  jonathan 	 * Extract SACK blocks.
    219   1.1  jonathan 	 *
    220   1.1  jonathan 	 * Note that t_sack_block is sorted so that we only need to do
    221   1.1  jonathan 	 * one pass over the sequence number space. (SACK "fast-path")
    222   1.1  jonathan 	 */
    223   1.1  jonathan 	num_sack_blks = optlen / 8;
    224   1.1  jonathan 	acked = (SEQ_GT(th->th_ack, tp->snd_una)) ? th->th_ack : tp->snd_una;
    225   1.1  jonathan 	for (i = 0; i < num_sack_blks; i++, lp += 2) {
    226   1.3      yamt 		memcpy(&left, lp, sizeof(*lp));
    227   1.3      yamt 		memcpy(&right, lp + 1, sizeof(*lp));
    228   1.3      yamt 		left = ntohl(left);
    229   1.3      yamt 		right = ntohl(right);
    230   1.1  jonathan 
    231   1.4      yamt 		if (SEQ_LEQ(right, acked) || SEQ_GEQ(left, tp->snd_max) ||
    232   1.4      yamt 		    SEQ_GEQ(left, right)) {
    233   1.1  jonathan 			/* SACK entry that's old, or invalid. */
    234   1.1  jonathan 			i--;
    235   1.1  jonathan 			num_sack_blks--;
    236   1.1  jonathan 			continue;
    237   1.1  jonathan 		}
    238   1.1  jonathan 
    239   1.1  jonathan 		/* Insertion sort. */
    240   1.2      yamt 		for (j = i; (j > 0) && SEQ_LT(left, t_sack_block[j - 1].left);
    241   1.2      yamt 		    j--) {
    242   1.1  jonathan 			t_sack_block[j].left = t_sack_block[j - 1].left;
    243   1.1  jonathan 			t_sack_block[j].right = t_sack_block[j - 1].right;
    244   1.1  jonathan 		}
    245   1.1  jonathan 		t_sack_block[j].left = left;
    246   1.1  jonathan 		t_sack_block[j].right = right;
    247   1.1  jonathan 	}
    248   1.1  jonathan 
    249   1.1  jonathan 	/* Update the scoreboard. */
    250   1.1  jonathan 	cur = TAILQ_FIRST(&tp->snd_holes);
    251   1.1  jonathan 	for (i = 0; i < num_sack_blks; i++) {
    252   1.1  jonathan 		sack = &t_sack_block[i];
    253   1.1  jonathan 		/*
    254   1.1  jonathan 		 * FACK TCP.  Update snd_fack so we can enter Fast
    255   1.1  jonathan 		 * Recovery early.
    256   1.1  jonathan 		 */
    257   1.1  jonathan 		if (SEQ_GEQ(sack->right, tp->snd_fack))
    258   1.1  jonathan 			tp->snd_fack = sack->right;
    259   1.1  jonathan 
    260   1.1  jonathan 		if (TAILQ_EMPTY(&tp->snd_holes)) {
    261   1.1  jonathan 			/* First hole. */
    262  1.12  kurahone 			if (tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
    263  1.12  kurahone 				return;
    264  1.12  kurahone 			}
    265   1.2      yamt 			cur = (struct sackhole *)
    266   1.2      yamt 			    pool_get(&sackhole_pool, PR_NOWAIT);
    267   1.1  jonathan 			if (cur == NULL) {
    268   1.1  jonathan 				/* ENOBUFS, bail out*/
    269   1.1  jonathan 				return;
    270   1.1  jonathan 			}
    271   1.1  jonathan 			cur->start = th->th_ack;
    272   1.1  jonathan 			cur->end = sack->left;
    273   1.1  jonathan 			cur->rxmit = cur->start;
    274   1.1  jonathan 			tp->rcv_lastsack = sack->right;
    275  1.12  kurahone 			tp->snd_numholes++;
    276  1.12  kurahone 			tcp_sack_globalholes++;
    277   1.1  jonathan 			TAILQ_INSERT_HEAD(&tp->snd_holes, cur, sackhole_q);
    278   1.1  jonathan 			continue; /* With next sack block */
    279   1.1  jonathan 		}
    280   1.1  jonathan 
    281   1.1  jonathan 		/* Go through the list of holes. */
    282   1.1  jonathan 		while (cur) {
    283   1.6      yamt 			if (SEQ_LEQ(sack->right, cur->start))
    284   1.1  jonathan 				/* SACKs data before the current hole */
    285   1.1  jonathan 				break; /* No use going through more holes */
    286   1.1  jonathan 
    287   1.1  jonathan 			if (SEQ_GEQ(sack->left, cur->end)) {
    288   1.1  jonathan 				/* SACKs data beyond the current hole */
    289   1.1  jonathan 				cur = TAILQ_NEXT(cur, sackhole_q);
    290   1.1  jonathan 				continue;
    291   1.1  jonathan 			}
    292   1.1  jonathan 
    293   1.1  jonathan 			if (SEQ_LEQ(sack->left, cur->start)) {
    294   1.1  jonathan 				/* Data acks at least the beginning of hole */
    295   1.1  jonathan 				if (SEQ_GEQ(sack->right, cur->end)) {
    296   1.1  jonathan 					/* Acks entire hole, so delete hole */
    297   1.1  jonathan 					tmp = cur;
    298   1.1  jonathan 					cur = TAILQ_NEXT(cur, sackhole_q);
    299  1.12  kurahone 					tp->snd_numholes--;
    300  1.12  kurahone 					tcp_sack_globalholes--;
    301   1.2      yamt 					TAILQ_REMOVE(&tp->snd_holes, tmp,
    302   1.2      yamt 					    sackhole_q);
    303   1.1  jonathan 					pool_put(&sackhole_pool, tmp);
    304   1.1  jonathan 					break;
    305   1.1  jonathan 				}
    306   1.1  jonathan 
    307   1.1  jonathan 				/* Otherwise, move start of hole forward */
    308   1.1  jonathan 				cur->start = sack->right;
    309   1.1  jonathan 				cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
    310   1.1  jonathan 				break;
    311   1.1  jonathan 			}
    312   1.1  jonathan 
    313   1.1  jonathan 			if (SEQ_GEQ(sack->right, cur->end)) {
    314   1.1  jonathan 				/* Move end of hole backward. */
    315   1.1  jonathan 				cur->end = sack->left;
    316   1.1  jonathan 				cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
    317   1.1  jonathan 				cur = TAILQ_NEXT(cur, sackhole_q);
    318   1.1  jonathan 				break;
    319   1.1  jonathan 			}
    320   1.1  jonathan 
    321   1.1  jonathan 			if (SEQ_LT(cur->start, sack->left) &&
    322   1.1  jonathan 			    SEQ_GT(cur->end, sack->right)) {
    323   1.1  jonathan 				/*
    324   1.1  jonathan 				 * ACKs some data in middle of a hole; need to
    325   1.1  jonathan 				 * split current hole
    326   1.1  jonathan 				 */
    327  1.12  kurahone 				if (tcp_sack_globalholes >=
    328  1.12  kurahone 						tcp_sack_globalmaxholes ||
    329  1.12  kurahone 						tp->snd_numholes >=
    330  1.12  kurahone 						tcp_sack_tp_maxholes) {
    331  1.12  kurahone 					return;
    332  1.12  kurahone 				}
    333   1.1  jonathan 				tmp = (struct sackhole *)
    334   1.2      yamt 				    pool_get(&sackhole_pool, PR_NOWAIT);
    335   1.1  jonathan 				if (tmp == NULL) {
    336   1.1  jonathan 					/* ENOBUFS, bail out. */
    337   1.1  jonathan 					return;
    338   1.1  jonathan 				}
    339   1.1  jonathan 				tmp->start = sack->right;
    340   1.1  jonathan 				tmp->end = cur->end;
    341   1.1  jonathan 				tmp->rxmit = SEQ_MAX(cur->rxmit, tmp->start);
    342   1.1  jonathan 				cur->end = sack->left;
    343   1.1  jonathan 				cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
    344  1.12  kurahone 				tp->snd_numholes++;
    345  1.12  kurahone 				tcp_sack_globalholes++;
    346   1.1  jonathan 				TAILQ_INSERT_AFTER(&tp->snd_holes, cur, tmp,
    347   1.1  jonathan 						sackhole_q);
    348   1.7      yamt 				cur = tmp;
    349   1.1  jonathan 				break;
    350   1.1  jonathan 			}
    351   1.1  jonathan 		}
    352   1.1  jonathan 
    353   1.1  jonathan 		/* At this point, we have reached the tail of the list. */
    354   1.1  jonathan 		if (SEQ_LT(tp->rcv_lastsack, sack->left)) {
    355   1.1  jonathan 			/*
    356   1.1  jonathan 			 * Need to append new hole at end.
    357   1.1  jonathan 			 */
    358  1.12  kurahone 			if (tcp_sack_globalholes >=
    359  1.12  kurahone 					tcp_sack_globalmaxholes ||
    360  1.12  kurahone 					tp->snd_numholes >=
    361  1.12  kurahone 					tcp_sack_tp_maxholes) {
    362  1.12  kurahone 				return;
    363  1.12  kurahone 			}
    364   1.1  jonathan 			tmp = (struct sackhole *)
    365   1.4      yamt 			    pool_get(&sackhole_pool, PR_NOWAIT);
    366   1.1  jonathan 			if (tmp == NULL)
    367   1.1  jonathan 				continue; /* ENOBUFS */
    368   1.1  jonathan 			tmp->start = tp->rcv_lastsack;
    369   1.1  jonathan 			tmp->end = sack->left;
    370   1.1  jonathan 			tmp->rxmit = tmp->start;
    371  1.12  kurahone 			tp->snd_numholes++;
    372  1.12  kurahone 			tcp_sack_globalholes++;
    373   1.1  jonathan 			TAILQ_INSERT_TAIL(&tp->snd_holes, tmp, sackhole_q);
    374   1.1  jonathan 			cur = tmp;
    375   1.1  jonathan 		}
    376   1.8      yamt 		if (SEQ_LT(tp->rcv_lastsack, sack->right)) {
    377   1.8      yamt 			tp->rcv_lastsack = sack->right;
    378   1.8      yamt 		}
    379   1.1  jonathan 	}
    380   1.1  jonathan }
    381   1.1  jonathan 
    382   1.1  jonathan void
    383   1.1  jonathan tcp_del_sackholes(struct tcpcb *tp, struct tcphdr *th)
    384   1.1  jonathan {
    385   1.1  jonathan 	/* Max because this could be an older ack that just arrived. */
    386   1.1  jonathan 	tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
    387   1.1  jonathan 		th->th_ack : tp->snd_una;
    388   1.1  jonathan 	struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
    389   1.1  jonathan 	struct sackhole *tmp;
    390   1.1  jonathan 
    391   1.1  jonathan 	while (cur) {
    392   1.1  jonathan 		if (SEQ_LEQ(cur->end, lastack)) {
    393   1.1  jonathan 			tmp = cur;
    394   1.1  jonathan 			cur = TAILQ_NEXT(cur, sackhole_q);
    395  1.12  kurahone 			tp->snd_numholes--;
    396  1.12  kurahone 			tcp_sack_globalholes--;
    397   1.1  jonathan 			TAILQ_REMOVE(&tp->snd_holes, tmp, sackhole_q);
    398   1.1  jonathan 			pool_put(&sackhole_pool, tmp);
    399   1.1  jonathan 		} else if (SEQ_LT(cur->start, lastack)) {
    400   1.1  jonathan 			cur->start = lastack;
    401   1.1  jonathan 			if (SEQ_LT(cur->rxmit, cur->start))
    402   1.1  jonathan 				cur->rxmit = cur->start;
    403   1.1  jonathan 			break;
    404   1.1  jonathan 		} else
    405   1.1  jonathan 			break;
    406   1.1  jonathan 
    407   1.1  jonathan 	}
    408   1.1  jonathan }
    409   1.1  jonathan 
    410   1.1  jonathan void
    411   1.1  jonathan tcp_free_sackholes(struct tcpcb *tp)
    412   1.1  jonathan {
    413   1.1  jonathan 	struct sackhole *sack;
    414   1.1  jonathan 
    415   1.1  jonathan 	/* Free up the SACK hole list. */
    416   1.1  jonathan 	while (!TAILQ_EMPTY(&tp->snd_holes)) {
    417   1.1  jonathan 		sack = TAILQ_FIRST(&tp->snd_holes);
    418  1.12  kurahone 		tcp_sack_globalholes--;
    419   1.1  jonathan 		TAILQ_REMOVE(&tp->snd_holes, sack, sackhole_q);
    420   1.1  jonathan 		pool_put(&sackhole_pool, sack);
    421   1.1  jonathan 	}
    422  1.12  kurahone 
    423  1.12  kurahone 	tp->snd_numholes = 0;
    424   1.1  jonathan }
    425   1.1  jonathan 
    426   1.1  jonathan /*
    427   1.1  jonathan  * Implements the SACK response to a new ack, checking for partial acks
    428   1.1  jonathan  * in fast recovery.
    429   1.1  jonathan  */
    430   1.1  jonathan void
    431   1.1  jonathan tcp_sack_newack(struct tcpcb *tp, struct tcphdr *th)
    432   1.1  jonathan {
    433   1.1  jonathan 	if (tp->t_partialacks < 0) {
    434   1.1  jonathan 		/*
    435   1.1  jonathan 		 * Not in fast recovery.  Reset the duplicate ack
    436   1.1  jonathan 		 * counter.
    437   1.1  jonathan 		 */
    438   1.1  jonathan 		tp->t_dupacks = 0;
    439   1.1  jonathan 	} else if (SEQ_LT(th->th_ack, tp->snd_recover)) {
    440   1.1  jonathan 		/*
    441   1.1  jonathan 		 * Partial ack handling within a sack recovery episode.
    442   1.1  jonathan 		 * Keeping this very simple for now. When a partial ack
    443   1.1  jonathan 		 * is received, force snd_cwnd to a value that will allow
    444   1.1  jonathan 		 * the sender to transmit no more than 2 segments.
    445   1.1  jonathan 		 * If necessary, a fancier scheme can be adopted at a
    446   1.1  jonathan 		 * later point, but for now, the goal is to prevent the
    447   1.1  jonathan 		 * sender from bursting a large amount of data in the midst
    448   1.1  jonathan 		 * of sack recovery.
    449   1.1  jonathan 		 */
    450   1.1  jonathan 		int num_segs = 1;
    451   1.1  jonathan 		int sack_bytes_rxmt = 0;
    452   1.1  jonathan 
    453   1.1  jonathan 		tp->t_partialacks++;
    454   1.1  jonathan 		TCP_TIMER_DISARM(tp, TCPT_REXMT);
    455   1.1  jonathan 		tp->t_rtttime = 0;
    456   1.1  jonathan 
    457   1.2      yamt 	 	/*
    458   1.2      yamt 		 * send one or 2 segments based on how much new data was acked
    459   1.2      yamt 		 */
    460   1.1  jonathan  		if (((th->th_ack - tp->snd_una) / tp->t_segsz) > 2)
    461   1.1  jonathan  			num_segs = 2;
    462   1.1  jonathan 	 	(void)tcp_sack_output(tp, &sack_bytes_rxmt);
    463   1.2      yamt  		tp->snd_cwnd = sack_bytes_rxmt +
    464   1.2      yamt 		    (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_segsz;
    465   1.1  jonathan   		tp->t_flags |= TF_ACKNOW;
    466   1.1  jonathan 	  	(void) tcp_output(tp);
    467   1.1  jonathan 	} else {
    468   1.1  jonathan 		/*
    469   1.1  jonathan 		 * Complete ack, inflate the congestion window to
    470   1.1  jonathan                  * ssthresh and exit fast recovery.
    471   1.1  jonathan 		 *
    472   1.1  jonathan 		 * Window inflation should have left us with approx.
    473   1.1  jonathan 		 * snd_ssthresh outstanding data.  But in case we
    474   1.1  jonathan 		 * would be inclined to send a burst, better to do
    475   1.1  jonathan 		 * it via the slow start mechanism.
    476   1.1  jonathan 		 */
    477   1.1  jonathan 		if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh)
    478   1.1  jonathan 			tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack)
    479   1.1  jonathan 			    + tp->t_segsz;
    480   1.1  jonathan 		else
    481   1.1  jonathan 			tp->snd_cwnd = tp->snd_ssthresh;
    482   1.1  jonathan 		tp->t_partialacks = -1;
    483   1.1  jonathan 		tp->t_dupacks = 0;
    484   1.1  jonathan 		if (SEQ_GT(th->th_ack, tp->snd_fack))
    485   1.1  jonathan 			tp->snd_fack = th->th_ack;
    486   1.1  jonathan 	}
    487   1.1  jonathan }
    488   1.1  jonathan 
    489   1.1  jonathan /*
    490   1.1  jonathan  * Returns pointer to a sackhole if there are any pending retransmissions;
    491   1.1  jonathan  * NULL otherwise.
    492   1.1  jonathan  */
    493   1.1  jonathan struct sackhole *
    494   1.1  jonathan tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
    495   1.1  jonathan {
    496   1.1  jonathan 	struct sackhole *cur = NULL;
    497   1.1  jonathan 
    498   1.1  jonathan 	if(!TCP_SACK_ENABLED(tp))
    499   1.1  jonathan 		return (NULL);
    500   1.1  jonathan 
    501   1.1  jonathan 	*sack_bytes_rexmt = 0;
    502   1.1  jonathan 	TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
    503   1.1  jonathan 		if (SEQ_LT(cur->rxmit, cur->end)) {
    504   1.2      yamt 			if (SEQ_LT(cur->rxmit, tp->snd_una)) {
    505   1.2      yamt 				/* old SACK hole */
    506   1.1  jonathan 				continue;
    507   1.1  jonathan 			}
    508   1.1  jonathan 			*sack_bytes_rexmt += (cur->rxmit - cur->start);
    509   1.1  jonathan 			break;
    510   1.1  jonathan 		}
    511   1.1  jonathan 		*sack_bytes_rexmt += (cur->rxmit - cur->start);
    512   1.1  jonathan 	}
    513   1.1  jonathan 
    514   1.1  jonathan 	return (cur);
    515   1.1  jonathan }
    516   1.1  jonathan 
    517   1.1  jonathan /*
    518   1.1  jonathan  * After a timeout, the SACK list may be rebuilt.  This SACK information
    519   1.1  jonathan  * should be used to avoid retransmitting SACKed data.  This function
    520   1.1  jonathan  * traverses the SACK list to see if snd_nxt should be moved forward.
    521   1.1  jonathan  */
    522   1.1  jonathan void
    523   1.1  jonathan tcp_sack_adjust(struct tcpcb *tp)
    524   1.1  jonathan {
    525   1.1  jonathan 	struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
    526   1.1  jonathan 	struct sackhole *n = NULL;
    527   1.1  jonathan 
    528   1.1  jonathan 	if (TAILQ_EMPTY(&tp->snd_holes))
    529   1.1  jonathan 		return; /* No holes */
    530   1.1  jonathan 	if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
    531   1.1  jonathan 		return; /* We're already beyond any SACKed blocks */
    532   1.1  jonathan 
    533   1.1  jonathan 	/*
    534   1.1  jonathan 	 * Two cases for which we want to advance snd_nxt:
    535   1.1  jonathan 	 * i) snd_nxt lies between end of one hole and beginning of another
    536   1.1  jonathan 	 * ii) snd_nxt lies between end of last hole and rcv_lastsack
    537   1.1  jonathan 	 */
    538   1.1  jonathan 	while ((n = TAILQ_NEXT(cur, sackhole_q)) != NULL) {
    539   1.1  jonathan 		if (SEQ_LT(tp->snd_nxt, cur->end))
    540   1.1  jonathan 			return;
    541   1.1  jonathan 		if (SEQ_GEQ(tp->snd_nxt, n->start))
    542   1.1  jonathan 			cur = n;
    543   1.1  jonathan 		else {
    544   1.1  jonathan 			tp->snd_nxt = n->start;
    545   1.1  jonathan 			return;
    546   1.1  jonathan 		}
    547   1.1  jonathan 	}
    548   1.1  jonathan 	if (SEQ_LT(tp->snd_nxt, cur->end))
    549   1.1  jonathan 		return;
    550   1.1  jonathan 	tp->snd_nxt = tp->rcv_lastsack;
    551   1.1  jonathan 
    552   1.1  jonathan 	return;
    553   1.1  jonathan }
    554   1.9      yamt 
    555   1.9      yamt int
    556  1.10      yamt tcp_sack_numblks(const struct tcpcb *tp)
    557   1.9      yamt {
    558  1.10      yamt 	int numblks;
    559   1.9      yamt 
    560  1.10      yamt 	if (!TCP_SACK_ENABLED(tp)) {
    561   1.9      yamt 		return 0;
    562   1.9      yamt 	}
    563   1.9      yamt 
    564  1.10      yamt 	numblks = (((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) ? 1 : 0) +
    565  1.10      yamt 	    tp->t_segqlen;
    566  1.10      yamt 
    567  1.10      yamt 	if (numblks == 0) {
    568  1.10      yamt 		return 0;
    569  1.10      yamt 	}
    570  1.10      yamt 
    571  1.10      yamt 	if (numblks > TCP_SACK_MAX) {
    572  1.10      yamt 		numblks = TCP_SACK_MAX;
    573  1.10      yamt 	}
    574  1.10      yamt 
    575  1.10      yamt 	return numblks;
    576   1.9      yamt }
    577