tcp_sack.c revision 1.12 1 1.11 kurahone /* $NetBSD: tcp_sack.c,v 1.12 2005/04/05 01:07:17 kurahone Exp $ */
2 1.1 jonathan
3 1.1 jonathan /*
4 1.1 jonathan * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 1.1 jonathan * All rights reserved.
6 1.1 jonathan *
7 1.1 jonathan * This code is derived from software contributed to The NetBSD Foundation
8 1.1 jonathan * by Kentaro A. Kurahone.
9 1.1 jonathan *
10 1.1 jonathan * Redistribution and use in source and binary forms, with or without
11 1.1 jonathan * modification, are permitted provided that the following conditions
12 1.1 jonathan * are met:
13 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
14 1.1 jonathan * notice, this list of conditions and the following disclaimer.
15 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
17 1.1 jonathan * documentation and/or other materials provided with the distribution.
18 1.1 jonathan * 3. All advertising materials mentioning features or use of this software
19 1.1 jonathan * must display the following acknowledgement:
20 1.1 jonathan * This product includes software developed by the NetBSD
21 1.1 jonathan * Foundation, Inc. and its contributors.
22 1.1 jonathan * 4. Neither the name of The NetBSD Foundation nor the names of its
23 1.1 jonathan * contributors may be used to endorse or promote products derived
24 1.1 jonathan * from this software without specific prior written permission.
25 1.1 jonathan *
26 1.1 jonathan * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 1.1 jonathan * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 1.1 jonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 1.1 jonathan * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 1.1 jonathan * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 1.1 jonathan * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 1.1 jonathan * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 1.1 jonathan * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 1.1 jonathan * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 1.1 jonathan * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 1.1 jonathan * POSSIBILITY OF SUCH DAMAGE.
37 1.1 jonathan */
38 1.1 jonathan
39 1.1 jonathan /*
40 1.1 jonathan * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
41 1.1 jonathan * The Regents of the University of California. All rights reserved.
42 1.1 jonathan *
43 1.1 jonathan * Redistribution and use in source and binary forms, with or without
44 1.1 jonathan * modification, are permitted provided that the following conditions
45 1.1 jonathan * are met:
46 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
47 1.1 jonathan * notice, this list of conditions and the following disclaimer.
48 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
49 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
50 1.1 jonathan * documentation and/or other materials provided with the distribution.
51 1.1 jonathan * 4. Neither the name of the University nor the names of its contributors
52 1.1 jonathan * may be used to endorse or promote products derived from this software
53 1.1 jonathan * without specific prior written permission.
54 1.1 jonathan *
55 1.1 jonathan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56 1.1 jonathan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57 1.1 jonathan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58 1.1 jonathan * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59 1.1 jonathan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60 1.1 jonathan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61 1.1 jonathan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62 1.1 jonathan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63 1.1 jonathan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 1.1 jonathan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 1.1 jonathan * SUCH DAMAGE.
66 1.1 jonathan *
67 1.1 jonathan * @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95
68 1.1 jonathan * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.2 2004/12/25 23:02:57 rwatson Exp $
69 1.1 jonathan */
70 1.1 jonathan
71 1.1 jonathan /*
72 1.1 jonathan * @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995
73 1.1 jonathan *
74 1.1 jonathan * NRL grants permission for redistribution and use in source and binary
75 1.1 jonathan * forms, with or without modification, of the software and documentation
76 1.1 jonathan * created at NRL provided that the following conditions are met:
77 1.1 jonathan *
78 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
79 1.1 jonathan * notice, this list of conditions and the following disclaimer.
80 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
81 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
82 1.1 jonathan * documentation and/or other materials provided with the distribution.
83 1.1 jonathan * 3. All advertising materials mentioning features or use of this software
84 1.1 jonathan * must display the following acknowledgements:
85 1.1 jonathan * This product includes software developed by the University of
86 1.1 jonathan * California, Berkeley and its contributors.
87 1.1 jonathan * This product includes software developed at the Information
88 1.1 jonathan * Technology Division, US Naval Research Laboratory.
89 1.1 jonathan * 4. Neither the name of the NRL nor the names of its contributors
90 1.1 jonathan * may be used to endorse or promote products derived from this software
91 1.1 jonathan * without specific prior written permission.
92 1.1 jonathan *
93 1.1 jonathan * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
94 1.1 jonathan * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
95 1.1 jonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
96 1.1 jonathan * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
97 1.1 jonathan * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
98 1.1 jonathan * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
99 1.1 jonathan * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
100 1.1 jonathan * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
101 1.1 jonathan * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
102 1.1 jonathan * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
103 1.1 jonathan * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
104 1.1 jonathan *
105 1.1 jonathan * The views and conclusions contained in the software and documentation
106 1.1 jonathan * are those of the authors and should not be interpreted as representing
107 1.1 jonathan * official policies, either expressed or implied, of the US Naval
108 1.1 jonathan * Research Laboratory (NRL).
109 1.1 jonathan */
110 1.1 jonathan
111 1.1 jonathan #include <sys/cdefs.h>
112 1.11 kurahone __KERNEL_RCSID(0, "$NetBSD: tcp_sack.c,v 1.12 2005/04/05 01:07:17 kurahone Exp $");
113 1.1 jonathan
114 1.1 jonathan #include "opt_inet.h"
115 1.1 jonathan #include "opt_ipsec.h"
116 1.1 jonathan #include "opt_inet_csum.h"
117 1.1 jonathan #include "opt_tcp_debug.h"
118 1.1 jonathan
119 1.1 jonathan #include <sys/param.h>
120 1.1 jonathan #include <sys/systm.h>
121 1.1 jonathan #include <sys/malloc.h>
122 1.1 jonathan #include <sys/mbuf.h>
123 1.1 jonathan #include <sys/protosw.h>
124 1.1 jonathan #include <sys/socket.h>
125 1.1 jonathan #include <sys/socketvar.h>
126 1.1 jonathan #include <sys/errno.h>
127 1.1 jonathan #include <sys/syslog.h>
128 1.1 jonathan #include <sys/pool.h>
129 1.1 jonathan #include <sys/domain.h>
130 1.1 jonathan #include <sys/kernel.h>
131 1.1 jonathan
132 1.1 jonathan #include <net/if.h>
133 1.1 jonathan #include <net/route.h>
134 1.1 jonathan #include <net/if_types.h>
135 1.1 jonathan
136 1.1 jonathan #include <netinet/in.h>
137 1.1 jonathan #include <netinet/in_systm.h>
138 1.1 jonathan #include <netinet/ip.h>
139 1.1 jonathan #include <netinet/in_pcb.h>
140 1.1 jonathan #include <netinet/in_var.h>
141 1.1 jonathan #include <netinet/ip_var.h>
142 1.1 jonathan
143 1.1 jonathan #ifdef INET6
144 1.1 jonathan #ifndef INET
145 1.1 jonathan #include <netinet/in.h>
146 1.1 jonathan #endif
147 1.1 jonathan #include <netinet/ip6.h>
148 1.1 jonathan #include <netinet6/ip6_var.h>
149 1.1 jonathan #include <netinet6/in6_pcb.h>
150 1.1 jonathan #include <netinet6/ip6_var.h>
151 1.1 jonathan #include <netinet6/in6_var.h>
152 1.1 jonathan #include <netinet/icmp6.h>
153 1.1 jonathan #include <netinet6/nd6.h>
154 1.1 jonathan #endif
155 1.1 jonathan
156 1.1 jonathan #ifndef INET6
157 1.1 jonathan /* always need ip6.h for IP6_EXTHDR_GET */
158 1.1 jonathan #include <netinet/ip6.h>
159 1.1 jonathan #endif
160 1.1 jonathan
161 1.1 jonathan #include <netinet/tcp.h>
162 1.1 jonathan #include <netinet/tcp_fsm.h>
163 1.1 jonathan #include <netinet/tcp_seq.h>
164 1.1 jonathan #include <netinet/tcp_timer.h>
165 1.1 jonathan #include <netinet/tcp_var.h>
166 1.1 jonathan #include <netinet/tcpip.h>
167 1.1 jonathan #include <netinet/tcp_debug.h>
168 1.1 jonathan
169 1.1 jonathan #include <machine/stdarg.h>
170 1.1 jonathan
171 1.1 jonathan /* SACK block pool. */
172 1.1 jonathan POOL_INIT(sackhole_pool, sizeof(struct sackhole), 0, 0, 0, "sackholepl", NULL);
173 1.1 jonathan
174 1.1 jonathan void
175 1.1 jonathan tcp_new_dsack(struct tcpcb *tp, tcp_seq seq, u_int32_t len)
176 1.1 jonathan {
177 1.1 jonathan if (TCP_SACK_ENABLED(tp)) {
178 1.1 jonathan tp->rcv_dsack_block.left = seq;
179 1.1 jonathan tp->rcv_dsack_block.right = seq + len;
180 1.1 jonathan tp->rcv_sack_flags |= TCPSACK_HAVED;
181 1.1 jonathan }
182 1.1 jonathan }
183 1.1 jonathan
184 1.1 jonathan void
185 1.1 jonathan tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
186 1.1 jonathan {
187 1.5 yamt struct sackblk
188 1.5 yamt t_sack_block[(MAX_TCPOPTLEN - 2) / (sizeof(u_int32_t) * 2)];
189 1.1 jonathan struct sackblk *sack = NULL;
190 1.1 jonathan struct sackhole *cur = NULL;
191 1.1 jonathan struct sackhole *tmp = NULL;
192 1.1 jonathan u_int32_t *lp = (u_int32_t *) (cp + 2);
193 1.1 jonathan int i, j, num_sack_blks;
194 1.1 jonathan tcp_seq left, right, acked;
195 1.1 jonathan
196 1.1 jonathan /*
197 1.11 kurahone * If we aren't processing SACK responses, this is not an ACK
198 1.11 kurahone * or the peer sends us a sack option with invalid length, don't
199 1.1 jonathan * update the scoreboard.
200 1.1 jonathan */
201 1.11 kurahone if (!TCP_SACK_ENABLED(tp) || ((th->th_flags & TH_ACK) == 0) ||
202 1.11 kurahone (optlen % 8 != 2 || optlen < 10)) {
203 1.1 jonathan return;
204 1.1 jonathan }
205 1.1 jonathan
206 1.12 kurahone /*
207 1.12 kurahone * If we don't want any SACK holes to be allocated, just return.
208 1.12 kurahone */
209 1.12 kurahone if (tcp_sack_globalmaxholes == 0 || tcp_sack_tp_maxholes == 0) {
210 1.12 kurahone return;
211 1.12 kurahone }
212 1.12 kurahone
213 1.11 kurahone /* If the ACK is outside [snd_una, snd_max], ignore the SACK options. */
214 1.11 kurahone if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))
215 1.11 kurahone return;
216 1.11 kurahone
217 1.1 jonathan /*
218 1.1 jonathan * Extract SACK blocks.
219 1.1 jonathan *
220 1.1 jonathan * Note that t_sack_block is sorted so that we only need to do
221 1.1 jonathan * one pass over the sequence number space. (SACK "fast-path")
222 1.1 jonathan */
223 1.1 jonathan num_sack_blks = optlen / 8;
224 1.1 jonathan acked = (SEQ_GT(th->th_ack, tp->snd_una)) ? th->th_ack : tp->snd_una;
225 1.1 jonathan for (i = 0; i < num_sack_blks; i++, lp += 2) {
226 1.3 yamt memcpy(&left, lp, sizeof(*lp));
227 1.3 yamt memcpy(&right, lp + 1, sizeof(*lp));
228 1.3 yamt left = ntohl(left);
229 1.3 yamt right = ntohl(right);
230 1.1 jonathan
231 1.4 yamt if (SEQ_LEQ(right, acked) || SEQ_GEQ(left, tp->snd_max) ||
232 1.4 yamt SEQ_GEQ(left, right)) {
233 1.1 jonathan /* SACK entry that's old, or invalid. */
234 1.1 jonathan i--;
235 1.1 jonathan num_sack_blks--;
236 1.1 jonathan continue;
237 1.1 jonathan }
238 1.1 jonathan
239 1.1 jonathan /* Insertion sort. */
240 1.2 yamt for (j = i; (j > 0) && SEQ_LT(left, t_sack_block[j - 1].left);
241 1.2 yamt j--) {
242 1.1 jonathan t_sack_block[j].left = t_sack_block[j - 1].left;
243 1.1 jonathan t_sack_block[j].right = t_sack_block[j - 1].right;
244 1.1 jonathan }
245 1.1 jonathan t_sack_block[j].left = left;
246 1.1 jonathan t_sack_block[j].right = right;
247 1.1 jonathan }
248 1.1 jonathan
249 1.1 jonathan /* Update the scoreboard. */
250 1.1 jonathan cur = TAILQ_FIRST(&tp->snd_holes);
251 1.1 jonathan for (i = 0; i < num_sack_blks; i++) {
252 1.1 jonathan sack = &t_sack_block[i];
253 1.1 jonathan /*
254 1.1 jonathan * FACK TCP. Update snd_fack so we can enter Fast
255 1.1 jonathan * Recovery early.
256 1.1 jonathan */
257 1.1 jonathan if (SEQ_GEQ(sack->right, tp->snd_fack))
258 1.1 jonathan tp->snd_fack = sack->right;
259 1.1 jonathan
260 1.1 jonathan if (TAILQ_EMPTY(&tp->snd_holes)) {
261 1.1 jonathan /* First hole. */
262 1.12 kurahone if (tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
263 1.12 kurahone return;
264 1.12 kurahone }
265 1.2 yamt cur = (struct sackhole *)
266 1.2 yamt pool_get(&sackhole_pool, PR_NOWAIT);
267 1.1 jonathan if (cur == NULL) {
268 1.1 jonathan /* ENOBUFS, bail out*/
269 1.1 jonathan return;
270 1.1 jonathan }
271 1.1 jonathan cur->start = th->th_ack;
272 1.1 jonathan cur->end = sack->left;
273 1.1 jonathan cur->rxmit = cur->start;
274 1.1 jonathan tp->rcv_lastsack = sack->right;
275 1.12 kurahone tp->snd_numholes++;
276 1.12 kurahone tcp_sack_globalholes++;
277 1.1 jonathan TAILQ_INSERT_HEAD(&tp->snd_holes, cur, sackhole_q);
278 1.1 jonathan continue; /* With next sack block */
279 1.1 jonathan }
280 1.1 jonathan
281 1.1 jonathan /* Go through the list of holes. */
282 1.1 jonathan while (cur) {
283 1.6 yamt if (SEQ_LEQ(sack->right, cur->start))
284 1.1 jonathan /* SACKs data before the current hole */
285 1.1 jonathan break; /* No use going through more holes */
286 1.1 jonathan
287 1.1 jonathan if (SEQ_GEQ(sack->left, cur->end)) {
288 1.1 jonathan /* SACKs data beyond the current hole */
289 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
290 1.1 jonathan continue;
291 1.1 jonathan }
292 1.1 jonathan
293 1.1 jonathan if (SEQ_LEQ(sack->left, cur->start)) {
294 1.1 jonathan /* Data acks at least the beginning of hole */
295 1.1 jonathan if (SEQ_GEQ(sack->right, cur->end)) {
296 1.1 jonathan /* Acks entire hole, so delete hole */
297 1.1 jonathan tmp = cur;
298 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
299 1.12 kurahone tp->snd_numholes--;
300 1.12 kurahone tcp_sack_globalholes--;
301 1.2 yamt TAILQ_REMOVE(&tp->snd_holes, tmp,
302 1.2 yamt sackhole_q);
303 1.1 jonathan pool_put(&sackhole_pool, tmp);
304 1.1 jonathan break;
305 1.1 jonathan }
306 1.1 jonathan
307 1.1 jonathan /* Otherwise, move start of hole forward */
308 1.1 jonathan cur->start = sack->right;
309 1.1 jonathan cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
310 1.1 jonathan break;
311 1.1 jonathan }
312 1.1 jonathan
313 1.1 jonathan if (SEQ_GEQ(sack->right, cur->end)) {
314 1.1 jonathan /* Move end of hole backward. */
315 1.1 jonathan cur->end = sack->left;
316 1.1 jonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
317 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
318 1.1 jonathan break;
319 1.1 jonathan }
320 1.1 jonathan
321 1.1 jonathan if (SEQ_LT(cur->start, sack->left) &&
322 1.1 jonathan SEQ_GT(cur->end, sack->right)) {
323 1.1 jonathan /*
324 1.1 jonathan * ACKs some data in middle of a hole; need to
325 1.1 jonathan * split current hole
326 1.1 jonathan */
327 1.12 kurahone if (tcp_sack_globalholes >=
328 1.12 kurahone tcp_sack_globalmaxholes ||
329 1.12 kurahone tp->snd_numholes >=
330 1.12 kurahone tcp_sack_tp_maxholes) {
331 1.12 kurahone return;
332 1.12 kurahone }
333 1.1 jonathan tmp = (struct sackhole *)
334 1.2 yamt pool_get(&sackhole_pool, PR_NOWAIT);
335 1.1 jonathan if (tmp == NULL) {
336 1.1 jonathan /* ENOBUFS, bail out. */
337 1.1 jonathan return;
338 1.1 jonathan }
339 1.1 jonathan tmp->start = sack->right;
340 1.1 jonathan tmp->end = cur->end;
341 1.1 jonathan tmp->rxmit = SEQ_MAX(cur->rxmit, tmp->start);
342 1.1 jonathan cur->end = sack->left;
343 1.1 jonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
344 1.12 kurahone tp->snd_numholes++;
345 1.12 kurahone tcp_sack_globalholes++;
346 1.1 jonathan TAILQ_INSERT_AFTER(&tp->snd_holes, cur, tmp,
347 1.1 jonathan sackhole_q);
348 1.7 yamt cur = tmp;
349 1.1 jonathan break;
350 1.1 jonathan }
351 1.1 jonathan }
352 1.1 jonathan
353 1.1 jonathan /* At this point, we have reached the tail of the list. */
354 1.1 jonathan if (SEQ_LT(tp->rcv_lastsack, sack->left)) {
355 1.1 jonathan /*
356 1.1 jonathan * Need to append new hole at end.
357 1.1 jonathan */
358 1.12 kurahone if (tcp_sack_globalholes >=
359 1.12 kurahone tcp_sack_globalmaxholes ||
360 1.12 kurahone tp->snd_numholes >=
361 1.12 kurahone tcp_sack_tp_maxholes) {
362 1.12 kurahone return;
363 1.12 kurahone }
364 1.1 jonathan tmp = (struct sackhole *)
365 1.4 yamt pool_get(&sackhole_pool, PR_NOWAIT);
366 1.1 jonathan if (tmp == NULL)
367 1.1 jonathan continue; /* ENOBUFS */
368 1.1 jonathan tmp->start = tp->rcv_lastsack;
369 1.1 jonathan tmp->end = sack->left;
370 1.1 jonathan tmp->rxmit = tmp->start;
371 1.12 kurahone tp->snd_numholes++;
372 1.12 kurahone tcp_sack_globalholes++;
373 1.1 jonathan TAILQ_INSERT_TAIL(&tp->snd_holes, tmp, sackhole_q);
374 1.1 jonathan cur = tmp;
375 1.1 jonathan }
376 1.8 yamt if (SEQ_LT(tp->rcv_lastsack, sack->right)) {
377 1.8 yamt tp->rcv_lastsack = sack->right;
378 1.8 yamt }
379 1.1 jonathan }
380 1.1 jonathan }
381 1.1 jonathan
382 1.1 jonathan void
383 1.1 jonathan tcp_del_sackholes(struct tcpcb *tp, struct tcphdr *th)
384 1.1 jonathan {
385 1.1 jonathan /* Max because this could be an older ack that just arrived. */
386 1.1 jonathan tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
387 1.1 jonathan th->th_ack : tp->snd_una;
388 1.1 jonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
389 1.1 jonathan struct sackhole *tmp;
390 1.1 jonathan
391 1.1 jonathan while (cur) {
392 1.1 jonathan if (SEQ_LEQ(cur->end, lastack)) {
393 1.1 jonathan tmp = cur;
394 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
395 1.12 kurahone tp->snd_numholes--;
396 1.12 kurahone tcp_sack_globalholes--;
397 1.1 jonathan TAILQ_REMOVE(&tp->snd_holes, tmp, sackhole_q);
398 1.1 jonathan pool_put(&sackhole_pool, tmp);
399 1.1 jonathan } else if (SEQ_LT(cur->start, lastack)) {
400 1.1 jonathan cur->start = lastack;
401 1.1 jonathan if (SEQ_LT(cur->rxmit, cur->start))
402 1.1 jonathan cur->rxmit = cur->start;
403 1.1 jonathan break;
404 1.1 jonathan } else
405 1.1 jonathan break;
406 1.1 jonathan
407 1.1 jonathan }
408 1.1 jonathan }
409 1.1 jonathan
410 1.1 jonathan void
411 1.1 jonathan tcp_free_sackholes(struct tcpcb *tp)
412 1.1 jonathan {
413 1.1 jonathan struct sackhole *sack;
414 1.1 jonathan
415 1.1 jonathan /* Free up the SACK hole list. */
416 1.1 jonathan while (!TAILQ_EMPTY(&tp->snd_holes)) {
417 1.1 jonathan sack = TAILQ_FIRST(&tp->snd_holes);
418 1.12 kurahone tcp_sack_globalholes--;
419 1.1 jonathan TAILQ_REMOVE(&tp->snd_holes, sack, sackhole_q);
420 1.1 jonathan pool_put(&sackhole_pool, sack);
421 1.1 jonathan }
422 1.12 kurahone
423 1.12 kurahone tp->snd_numholes = 0;
424 1.1 jonathan }
425 1.1 jonathan
426 1.1 jonathan /*
427 1.1 jonathan * Implements the SACK response to a new ack, checking for partial acks
428 1.1 jonathan * in fast recovery.
429 1.1 jonathan */
430 1.1 jonathan void
431 1.1 jonathan tcp_sack_newack(struct tcpcb *tp, struct tcphdr *th)
432 1.1 jonathan {
433 1.1 jonathan if (tp->t_partialacks < 0) {
434 1.1 jonathan /*
435 1.1 jonathan * Not in fast recovery. Reset the duplicate ack
436 1.1 jonathan * counter.
437 1.1 jonathan */
438 1.1 jonathan tp->t_dupacks = 0;
439 1.1 jonathan } else if (SEQ_LT(th->th_ack, tp->snd_recover)) {
440 1.1 jonathan /*
441 1.1 jonathan * Partial ack handling within a sack recovery episode.
442 1.1 jonathan * Keeping this very simple for now. When a partial ack
443 1.1 jonathan * is received, force snd_cwnd to a value that will allow
444 1.1 jonathan * the sender to transmit no more than 2 segments.
445 1.1 jonathan * If necessary, a fancier scheme can be adopted at a
446 1.1 jonathan * later point, but for now, the goal is to prevent the
447 1.1 jonathan * sender from bursting a large amount of data in the midst
448 1.1 jonathan * of sack recovery.
449 1.1 jonathan */
450 1.1 jonathan int num_segs = 1;
451 1.1 jonathan int sack_bytes_rxmt = 0;
452 1.1 jonathan
453 1.1 jonathan tp->t_partialacks++;
454 1.1 jonathan TCP_TIMER_DISARM(tp, TCPT_REXMT);
455 1.1 jonathan tp->t_rtttime = 0;
456 1.1 jonathan
457 1.2 yamt /*
458 1.2 yamt * send one or 2 segments based on how much new data was acked
459 1.2 yamt */
460 1.1 jonathan if (((th->th_ack - tp->snd_una) / tp->t_segsz) > 2)
461 1.1 jonathan num_segs = 2;
462 1.1 jonathan (void)tcp_sack_output(tp, &sack_bytes_rxmt);
463 1.2 yamt tp->snd_cwnd = sack_bytes_rxmt +
464 1.2 yamt (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_segsz;
465 1.1 jonathan tp->t_flags |= TF_ACKNOW;
466 1.1 jonathan (void) tcp_output(tp);
467 1.1 jonathan } else {
468 1.1 jonathan /*
469 1.1 jonathan * Complete ack, inflate the congestion window to
470 1.1 jonathan * ssthresh and exit fast recovery.
471 1.1 jonathan *
472 1.1 jonathan * Window inflation should have left us with approx.
473 1.1 jonathan * snd_ssthresh outstanding data. But in case we
474 1.1 jonathan * would be inclined to send a burst, better to do
475 1.1 jonathan * it via the slow start mechanism.
476 1.1 jonathan */
477 1.1 jonathan if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh)
478 1.1 jonathan tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack)
479 1.1 jonathan + tp->t_segsz;
480 1.1 jonathan else
481 1.1 jonathan tp->snd_cwnd = tp->snd_ssthresh;
482 1.1 jonathan tp->t_partialacks = -1;
483 1.1 jonathan tp->t_dupacks = 0;
484 1.1 jonathan if (SEQ_GT(th->th_ack, tp->snd_fack))
485 1.1 jonathan tp->snd_fack = th->th_ack;
486 1.1 jonathan }
487 1.1 jonathan }
488 1.1 jonathan
489 1.1 jonathan /*
490 1.1 jonathan * Returns pointer to a sackhole if there are any pending retransmissions;
491 1.1 jonathan * NULL otherwise.
492 1.1 jonathan */
493 1.1 jonathan struct sackhole *
494 1.1 jonathan tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
495 1.1 jonathan {
496 1.1 jonathan struct sackhole *cur = NULL;
497 1.1 jonathan
498 1.1 jonathan if(!TCP_SACK_ENABLED(tp))
499 1.1 jonathan return (NULL);
500 1.1 jonathan
501 1.1 jonathan *sack_bytes_rexmt = 0;
502 1.1 jonathan TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
503 1.1 jonathan if (SEQ_LT(cur->rxmit, cur->end)) {
504 1.2 yamt if (SEQ_LT(cur->rxmit, tp->snd_una)) {
505 1.2 yamt /* old SACK hole */
506 1.1 jonathan continue;
507 1.1 jonathan }
508 1.1 jonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
509 1.1 jonathan break;
510 1.1 jonathan }
511 1.1 jonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
512 1.1 jonathan }
513 1.1 jonathan
514 1.1 jonathan return (cur);
515 1.1 jonathan }
516 1.1 jonathan
517 1.1 jonathan /*
518 1.1 jonathan * After a timeout, the SACK list may be rebuilt. This SACK information
519 1.1 jonathan * should be used to avoid retransmitting SACKed data. This function
520 1.1 jonathan * traverses the SACK list to see if snd_nxt should be moved forward.
521 1.1 jonathan */
522 1.1 jonathan void
523 1.1 jonathan tcp_sack_adjust(struct tcpcb *tp)
524 1.1 jonathan {
525 1.1 jonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
526 1.1 jonathan struct sackhole *n = NULL;
527 1.1 jonathan
528 1.1 jonathan if (TAILQ_EMPTY(&tp->snd_holes))
529 1.1 jonathan return; /* No holes */
530 1.1 jonathan if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
531 1.1 jonathan return; /* We're already beyond any SACKed blocks */
532 1.1 jonathan
533 1.1 jonathan /*
534 1.1 jonathan * Two cases for which we want to advance snd_nxt:
535 1.1 jonathan * i) snd_nxt lies between end of one hole and beginning of another
536 1.1 jonathan * ii) snd_nxt lies between end of last hole and rcv_lastsack
537 1.1 jonathan */
538 1.1 jonathan while ((n = TAILQ_NEXT(cur, sackhole_q)) != NULL) {
539 1.1 jonathan if (SEQ_LT(tp->snd_nxt, cur->end))
540 1.1 jonathan return;
541 1.1 jonathan if (SEQ_GEQ(tp->snd_nxt, n->start))
542 1.1 jonathan cur = n;
543 1.1 jonathan else {
544 1.1 jonathan tp->snd_nxt = n->start;
545 1.1 jonathan return;
546 1.1 jonathan }
547 1.1 jonathan }
548 1.1 jonathan if (SEQ_LT(tp->snd_nxt, cur->end))
549 1.1 jonathan return;
550 1.1 jonathan tp->snd_nxt = tp->rcv_lastsack;
551 1.1 jonathan
552 1.1 jonathan return;
553 1.1 jonathan }
554 1.9 yamt
555 1.9 yamt int
556 1.10 yamt tcp_sack_numblks(const struct tcpcb *tp)
557 1.9 yamt {
558 1.10 yamt int numblks;
559 1.9 yamt
560 1.10 yamt if (!TCP_SACK_ENABLED(tp)) {
561 1.9 yamt return 0;
562 1.9 yamt }
563 1.9 yamt
564 1.10 yamt numblks = (((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) ? 1 : 0) +
565 1.10 yamt tp->t_segqlen;
566 1.10 yamt
567 1.10 yamt if (numblks == 0) {
568 1.10 yamt return 0;
569 1.10 yamt }
570 1.10 yamt
571 1.10 yamt if (numblks > TCP_SACK_MAX) {
572 1.10 yamt numblks = TCP_SACK_MAX;
573 1.10 yamt }
574 1.10 yamt
575 1.10 yamt return numblks;
576 1.9 yamt }
577