tcp_sack.c revision 1.2 1 1.2 yamt /* $NetBSD: tcp_sack.c,v 1.2 2005/03/06 23:05:20 yamt Exp $ */
2 1.1 jonathan
3 1.1 jonathan /*
4 1.1 jonathan * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 1.1 jonathan * All rights reserved.
6 1.1 jonathan *
7 1.1 jonathan * This code is derived from software contributed to The NetBSD Foundation
8 1.1 jonathan * by Kentaro A. Kurahone.
9 1.1 jonathan *
10 1.1 jonathan * Redistribution and use in source and binary forms, with or without
11 1.1 jonathan * modification, are permitted provided that the following conditions
12 1.1 jonathan * are met:
13 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
14 1.1 jonathan * notice, this list of conditions and the following disclaimer.
15 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
17 1.1 jonathan * documentation and/or other materials provided with the distribution.
18 1.1 jonathan * 3. All advertising materials mentioning features or use of this software
19 1.1 jonathan * must display the following acknowledgement:
20 1.1 jonathan * This product includes software developed by the NetBSD
21 1.1 jonathan * Foundation, Inc. and its contributors.
22 1.1 jonathan * 4. Neither the name of The NetBSD Foundation nor the names of its
23 1.1 jonathan * contributors may be used to endorse or promote products derived
24 1.1 jonathan * from this software without specific prior written permission.
25 1.1 jonathan *
26 1.1 jonathan * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 1.1 jonathan * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 1.1 jonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 1.1 jonathan * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 1.1 jonathan * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 1.1 jonathan * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 1.1 jonathan * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 1.1 jonathan * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 1.1 jonathan * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 1.1 jonathan * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 1.1 jonathan * POSSIBILITY OF SUCH DAMAGE.
37 1.1 jonathan */
38 1.1 jonathan
39 1.1 jonathan /*
40 1.1 jonathan * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
41 1.1 jonathan * The Regents of the University of California. All rights reserved.
42 1.1 jonathan *
43 1.1 jonathan * Redistribution and use in source and binary forms, with or without
44 1.1 jonathan * modification, are permitted provided that the following conditions
45 1.1 jonathan * are met:
46 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
47 1.1 jonathan * notice, this list of conditions and the following disclaimer.
48 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
49 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
50 1.1 jonathan * documentation and/or other materials provided with the distribution.
51 1.1 jonathan * 4. Neither the name of the University nor the names of its contributors
52 1.1 jonathan * may be used to endorse or promote products derived from this software
53 1.1 jonathan * without specific prior written permission.
54 1.1 jonathan *
55 1.1 jonathan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56 1.1 jonathan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57 1.1 jonathan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58 1.1 jonathan * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59 1.1 jonathan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60 1.1 jonathan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61 1.1 jonathan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62 1.1 jonathan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63 1.1 jonathan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 1.1 jonathan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 1.1 jonathan * SUCH DAMAGE.
66 1.1 jonathan *
67 1.1 jonathan * @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95
68 1.1 jonathan * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.2 2004/12/25 23:02:57 rwatson Exp $
69 1.1 jonathan */
70 1.1 jonathan
71 1.1 jonathan /*
72 1.1 jonathan * @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995
73 1.1 jonathan *
74 1.1 jonathan * NRL grants permission for redistribution and use in source and binary
75 1.1 jonathan * forms, with or without modification, of the software and documentation
76 1.1 jonathan * created at NRL provided that the following conditions are met:
77 1.1 jonathan *
78 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
79 1.1 jonathan * notice, this list of conditions and the following disclaimer.
80 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
81 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
82 1.1 jonathan * documentation and/or other materials provided with the distribution.
83 1.1 jonathan * 3. All advertising materials mentioning features or use of this software
84 1.1 jonathan * must display the following acknowledgements:
85 1.1 jonathan * This product includes software developed by the University of
86 1.1 jonathan * California, Berkeley and its contributors.
87 1.1 jonathan * This product includes software developed at the Information
88 1.1 jonathan * Technology Division, US Naval Research Laboratory.
89 1.1 jonathan * 4. Neither the name of the NRL nor the names of its contributors
90 1.1 jonathan * may be used to endorse or promote products derived from this software
91 1.1 jonathan * without specific prior written permission.
92 1.1 jonathan *
93 1.1 jonathan * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
94 1.1 jonathan * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
95 1.1 jonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
96 1.1 jonathan * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
97 1.1 jonathan * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
98 1.1 jonathan * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
99 1.1 jonathan * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
100 1.1 jonathan * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
101 1.1 jonathan * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
102 1.1 jonathan * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
103 1.1 jonathan * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
104 1.1 jonathan *
105 1.1 jonathan * The views and conclusions contained in the software and documentation
106 1.1 jonathan * are those of the authors and should not be interpreted as representing
107 1.1 jonathan * official policies, either expressed or implied, of the US Naval
108 1.1 jonathan * Research Laboratory (NRL).
109 1.1 jonathan */
110 1.1 jonathan
111 1.1 jonathan #include <sys/cdefs.h>
112 1.2 yamt __KERNEL_RCSID(0, "$NetBSD: tcp_sack.c,v 1.2 2005/03/06 23:05:20 yamt Exp $");
113 1.1 jonathan
114 1.1 jonathan #include "opt_inet.h"
115 1.1 jonathan #include "opt_ipsec.h"
116 1.1 jonathan #include "opt_inet_csum.h"
117 1.1 jonathan #include "opt_tcp_debug.h"
118 1.1 jonathan
119 1.1 jonathan #include <sys/param.h>
120 1.1 jonathan #include <sys/systm.h>
121 1.1 jonathan #include <sys/malloc.h>
122 1.1 jonathan #include <sys/mbuf.h>
123 1.1 jonathan #include <sys/protosw.h>
124 1.1 jonathan #include <sys/socket.h>
125 1.1 jonathan #include <sys/socketvar.h>
126 1.1 jonathan #include <sys/errno.h>
127 1.1 jonathan #include <sys/syslog.h>
128 1.1 jonathan #include <sys/pool.h>
129 1.1 jonathan #include <sys/domain.h>
130 1.1 jonathan #include <sys/kernel.h>
131 1.1 jonathan
132 1.1 jonathan #include <net/if.h>
133 1.1 jonathan #include <net/route.h>
134 1.1 jonathan #include <net/if_types.h>
135 1.1 jonathan
136 1.1 jonathan #include <netinet/in.h>
137 1.1 jonathan #include <netinet/in_systm.h>
138 1.1 jonathan #include <netinet/ip.h>
139 1.1 jonathan #include <netinet/in_pcb.h>
140 1.1 jonathan #include <netinet/in_var.h>
141 1.1 jonathan #include <netinet/ip_var.h>
142 1.1 jonathan
143 1.1 jonathan #ifdef INET6
144 1.1 jonathan #ifndef INET
145 1.1 jonathan #include <netinet/in.h>
146 1.1 jonathan #endif
147 1.1 jonathan #include <netinet/ip6.h>
148 1.1 jonathan #include <netinet6/ip6_var.h>
149 1.1 jonathan #include <netinet6/in6_pcb.h>
150 1.1 jonathan #include <netinet6/ip6_var.h>
151 1.1 jonathan #include <netinet6/in6_var.h>
152 1.1 jonathan #include <netinet/icmp6.h>
153 1.1 jonathan #include <netinet6/nd6.h>
154 1.1 jonathan #endif
155 1.1 jonathan
156 1.1 jonathan #ifndef INET6
157 1.1 jonathan /* always need ip6.h for IP6_EXTHDR_GET */
158 1.1 jonathan #include <netinet/ip6.h>
159 1.1 jonathan #endif
160 1.1 jonathan
161 1.1 jonathan #include <netinet/tcp.h>
162 1.1 jonathan #include <netinet/tcp_fsm.h>
163 1.1 jonathan #include <netinet/tcp_seq.h>
164 1.1 jonathan #include <netinet/tcp_timer.h>
165 1.1 jonathan #include <netinet/tcp_var.h>
166 1.1 jonathan #include <netinet/tcpip.h>
167 1.1 jonathan #include <netinet/tcp_debug.h>
168 1.1 jonathan
169 1.1 jonathan #include <machine/stdarg.h>
170 1.1 jonathan
171 1.1 jonathan #define SEQ_MIN(a, b) ((SEQ_LT(a, b)) ? (a) : (b))
172 1.1 jonathan #define SEQ_MAX(a, b) ((SEQ_GT(a, b)) ? (a) : (b))
173 1.1 jonathan
174 1.1 jonathan /* SACK block pool. */
175 1.1 jonathan POOL_INIT(sackhole_pool, sizeof(struct sackhole), 0, 0, 0, "sackholepl", NULL);
176 1.1 jonathan
177 1.1 jonathan void
178 1.1 jonathan tcp_update_sack_list(struct tcpcb *tp)
179 1.1 jonathan {
180 1.1 jonathan int i = 0;
181 1.1 jonathan struct ipqent *tiqe = NULL;
182 1.1 jonathan
183 1.1 jonathan if (!TCP_SACK_ENABLED(tp) || (tp->t_flags & TF_SIGNATURE)) {
184 1.1 jonathan /* Can't SACK this connection. */
185 1.1 jonathan return;
186 1.1 jonathan }
187 1.1 jonathan
188 1.1 jonathan /*
189 1.1 jonathan * If possible, tack on the D-SACK block. (RFC2883)
190 1.1 jonathan */
191 1.1 jonathan if (tp->rcv_sack_flags & TCPSACK_HAVED) {
192 1.1 jonathan tp->rcv_sack_block[0].left = tp->rcv_dsack_block.left;
193 1.1 jonathan tp->rcv_sack_block[0].right = tp->rcv_dsack_block.right;
194 1.1 jonathan tp->rcv_sack_flags &= ~TCPSACK_HAVED;
195 1.1 jonathan i++;
196 1.1 jonathan }
197 1.1 jonathan
198 1.1 jonathan /*
199 1.1 jonathan * Build up a list of holes in the TCP space. Note that
200 1.1 jonathan * the first SACK block is always the most recent segment
201 1.1 jonathan * received.
202 1.1 jonathan */
203 1.1 jonathan TAILQ_FOREACH(tiqe, &tp->timeq, ipqe_timeq) {
204 1.1 jonathan tp->rcv_sack_block[i].left = tiqe->ipqe_seq;
205 1.1 jonathan tp->rcv_sack_block[i].right = tiqe->ipqe_seq + tiqe->ipqe_len;
206 1.1 jonathan i++;
207 1.1 jonathan if (i >= TCP_SACK_MAX) {
208 1.1 jonathan break;
209 1.1 jonathan }
210 1.1 jonathan }
211 1.1 jonathan
212 1.1 jonathan /* If we can SACK, do so. */
213 1.1 jonathan tp->rcv_sack_num = i;
214 1.1 jonathan }
215 1.1 jonathan
216 1.1 jonathan void
217 1.1 jonathan tcp_new_dsack(struct tcpcb *tp, tcp_seq seq, u_int32_t len)
218 1.1 jonathan {
219 1.1 jonathan if (TCP_SACK_ENABLED(tp)) {
220 1.1 jonathan tp->rcv_dsack_block.left = seq;
221 1.1 jonathan tp->rcv_dsack_block.right = seq + len;
222 1.1 jonathan tp->rcv_sack_flags |= TCPSACK_HAVED;
223 1.1 jonathan }
224 1.1 jonathan }
225 1.1 jonathan
226 1.1 jonathan void
227 1.1 jonathan tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
228 1.1 jonathan {
229 1.1 jonathan struct sackblk t_sack_block[TCP_SACK_MAX];
230 1.1 jonathan struct sackblk *sack = NULL;
231 1.1 jonathan struct sackhole *cur = NULL;
232 1.1 jonathan struct sackhole *tmp = NULL;
233 1.1 jonathan u_int32_t *lp = (u_int32_t *) (cp + 2);
234 1.1 jonathan int i, j, num_sack_blks;
235 1.1 jonathan tcp_seq left, right, acked;
236 1.1 jonathan
237 1.1 jonathan /*
238 1.1 jonathan * If we aren't processing SACK responses, or the peer
239 1.1 jonathan * sends us a sack option with invalid length, don't
240 1.1 jonathan * update the scoreboard.
241 1.1 jonathan */
242 1.1 jonathan if (!TCP_SACK_ENABLED(tp) ||
243 1.1 jonathan (optlen % 8 != 2 || optlen < 10)) {
244 1.1 jonathan return;
245 1.1 jonathan }
246 1.1 jonathan
247 1.1 jonathan /*
248 1.1 jonathan * Extract SACK blocks.
249 1.1 jonathan *
250 1.1 jonathan * Note that t_sack_block is sorted so that we only need to do
251 1.1 jonathan * one pass over the sequence number space. (SACK "fast-path")
252 1.1 jonathan */
253 1.1 jonathan num_sack_blks = optlen / 8;
254 1.1 jonathan acked = (SEQ_GT(th->th_ack, tp->snd_una)) ? th->th_ack : tp->snd_una;
255 1.1 jonathan for (i = 0; i < num_sack_blks; i++, lp += 2) {
256 1.1 jonathan left = ntohl(*lp);
257 1.1 jonathan right = ntohl(*(lp + 1));
258 1.1 jonathan
259 1.1 jonathan if ((SEQ_LEQ(right, acked)) ||
260 1.1 jonathan SEQ_GEQ(left, tp->snd_max) ||
261 1.1 jonathan SEQ_GEQ(left, right)) {
262 1.1 jonathan /* SACK entry that's old, or invalid. */
263 1.1 jonathan i--;
264 1.1 jonathan num_sack_blks--;
265 1.1 jonathan continue;
266 1.1 jonathan }
267 1.1 jonathan
268 1.1 jonathan /* Insertion sort. */
269 1.2 yamt for (j = i; (j > 0) && SEQ_LT(left, t_sack_block[j - 1].left);
270 1.2 yamt j--) {
271 1.1 jonathan t_sack_block[j].left = t_sack_block[j - 1].left;
272 1.1 jonathan t_sack_block[j].right = t_sack_block[j - 1].right;
273 1.1 jonathan }
274 1.1 jonathan t_sack_block[j].left = left;
275 1.1 jonathan t_sack_block[j].right = right;
276 1.1 jonathan }
277 1.1 jonathan
278 1.1 jonathan /* Update the scoreboard. */
279 1.1 jonathan cur = TAILQ_FIRST(&tp->snd_holes);
280 1.1 jonathan for (i = 0; i < num_sack_blks; i++) {
281 1.1 jonathan sack = &t_sack_block[i];
282 1.1 jonathan /*
283 1.1 jonathan * FACK TCP. Update snd_fack so we can enter Fast
284 1.1 jonathan * Recovery early.
285 1.1 jonathan */
286 1.1 jonathan if (SEQ_GEQ(sack->right, tp->snd_fack))
287 1.1 jonathan tp->snd_fack = sack->right;
288 1.1 jonathan
289 1.1 jonathan if (TAILQ_EMPTY(&tp->snd_holes)) {
290 1.1 jonathan /* First hole. */
291 1.2 yamt cur = (struct sackhole *)
292 1.2 yamt pool_get(&sackhole_pool, PR_NOWAIT);
293 1.1 jonathan if (cur == NULL) {
294 1.1 jonathan /* ENOBUFS, bail out*/
295 1.1 jonathan return;
296 1.1 jonathan }
297 1.1 jonathan cur->start = th->th_ack;
298 1.1 jonathan cur->end = sack->left;
299 1.1 jonathan cur->rxmit = cur->start;
300 1.1 jonathan tp->rcv_lastsack = sack->right;
301 1.1 jonathan TAILQ_INSERT_HEAD(&tp->snd_holes, cur, sackhole_q);
302 1.1 jonathan continue; /* With next sack block */
303 1.1 jonathan }
304 1.1 jonathan
305 1.1 jonathan /* Go through the list of holes. */
306 1.1 jonathan while (cur) {
307 1.1 jonathan if (SEQ_LEQ(sack->left, cur->start))
308 1.1 jonathan /* SACKs data before the current hole */
309 1.1 jonathan break; /* No use going through more holes */
310 1.1 jonathan
311 1.1 jonathan if (SEQ_GEQ(sack->left, cur->end)) {
312 1.1 jonathan /* SACKs data beyond the current hole */
313 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
314 1.1 jonathan continue;
315 1.1 jonathan }
316 1.1 jonathan
317 1.1 jonathan if (SEQ_LEQ(sack->left, cur->start)) {
318 1.1 jonathan /* Data acks at least the beginning of hole */
319 1.1 jonathan if (SEQ_GEQ(sack->right, cur->end)) {
320 1.1 jonathan /* Acks entire hole, so delete hole */
321 1.1 jonathan tmp = cur;
322 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
323 1.2 yamt TAILQ_REMOVE(&tp->snd_holes, tmp,
324 1.2 yamt sackhole_q);
325 1.1 jonathan pool_put(&sackhole_pool, tmp);
326 1.1 jonathan break;
327 1.1 jonathan }
328 1.1 jonathan
329 1.1 jonathan /* Otherwise, move start of hole forward */
330 1.1 jonathan cur->start = sack->right;
331 1.1 jonathan cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
332 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
333 1.1 jonathan break;
334 1.1 jonathan }
335 1.1 jonathan
336 1.1 jonathan if (SEQ_GEQ(sack->right, cur->end)) {
337 1.1 jonathan /* Move end of hole backward. */
338 1.1 jonathan cur->end = sack->left;
339 1.1 jonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
340 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
341 1.1 jonathan break;
342 1.1 jonathan }
343 1.1 jonathan
344 1.1 jonathan if (SEQ_LT(cur->start, sack->left) &&
345 1.1 jonathan SEQ_GT(cur->end, sack->right)) {
346 1.1 jonathan /*
347 1.1 jonathan * ACKs some data in middle of a hole; need to
348 1.1 jonathan * split current hole
349 1.1 jonathan */
350 1.1 jonathan tmp = (struct sackhole *)
351 1.2 yamt pool_get(&sackhole_pool, PR_NOWAIT);
352 1.1 jonathan if (tmp == NULL) {
353 1.1 jonathan /* ENOBUFS, bail out. */
354 1.1 jonathan return;
355 1.1 jonathan }
356 1.1 jonathan tmp->start = sack->right;
357 1.1 jonathan tmp->end = cur->end;
358 1.1 jonathan tmp->rxmit = SEQ_MAX(cur->rxmit, tmp->start);
359 1.1 jonathan cur->end = sack->left;
360 1.1 jonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
361 1.1 jonathan TAILQ_INSERT_AFTER(&tp->snd_holes, cur, tmp,
362 1.1 jonathan sackhole_q);
363 1.1 jonathan cur = TAILQ_NEXT(tmp, sackhole_q);
364 1.1 jonathan break;
365 1.1 jonathan }
366 1.1 jonathan }
367 1.1 jonathan
368 1.1 jonathan /* At this point, we have reached the tail of the list. */
369 1.1 jonathan if (SEQ_LT(tp->rcv_lastsack, sack->left)) {
370 1.1 jonathan /*
371 1.1 jonathan * Need to append new hole at end.
372 1.1 jonathan */
373 1.1 jonathan tmp = (struct sackhole *)
374 1.1 jonathan pool_get(&sackhole_pool, PR_NOWAIT);
375 1.1 jonathan if (tmp == NULL)
376 1.1 jonathan continue; /* ENOBUFS */
377 1.1 jonathan tmp->start = tp->rcv_lastsack;
378 1.1 jonathan tmp->end = sack->left;
379 1.1 jonathan tmp->rxmit = tmp->start;
380 1.1 jonathan tp->rcv_lastsack = sack->right;
381 1.1 jonathan TAILQ_INSERT_TAIL(&tp->snd_holes, tmp, sackhole_q);
382 1.1 jonathan cur = tmp;
383 1.1 jonathan }
384 1.1 jonathan }
385 1.1 jonathan }
386 1.1 jonathan
387 1.1 jonathan void
388 1.1 jonathan tcp_del_sackholes(struct tcpcb *tp, struct tcphdr *th)
389 1.1 jonathan {
390 1.1 jonathan /* Max because this could be an older ack that just arrived. */
391 1.1 jonathan tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
392 1.1 jonathan th->th_ack : tp->snd_una;
393 1.1 jonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
394 1.1 jonathan struct sackhole *tmp;
395 1.1 jonathan
396 1.1 jonathan while (cur) {
397 1.1 jonathan if (SEQ_LEQ(cur->end, lastack)) {
398 1.1 jonathan tmp = cur;
399 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
400 1.1 jonathan TAILQ_REMOVE(&tp->snd_holes, tmp, sackhole_q);
401 1.1 jonathan pool_put(&sackhole_pool, tmp);
402 1.1 jonathan } else if (SEQ_LT(cur->start, lastack)) {
403 1.1 jonathan cur->start = lastack;
404 1.1 jonathan if (SEQ_LT(cur->rxmit, cur->start))
405 1.1 jonathan cur->rxmit = cur->start;
406 1.1 jonathan break;
407 1.1 jonathan } else
408 1.1 jonathan break;
409 1.1 jonathan
410 1.1 jonathan }
411 1.1 jonathan }
412 1.1 jonathan
413 1.1 jonathan void
414 1.1 jonathan tcp_free_sackholes(struct tcpcb *tp)
415 1.1 jonathan {
416 1.1 jonathan struct sackhole *sack;
417 1.1 jonathan
418 1.1 jonathan /* Free up the SACK hole list. */
419 1.1 jonathan while (!TAILQ_EMPTY(&tp->snd_holes)) {
420 1.1 jonathan sack = TAILQ_FIRST(&tp->snd_holes);
421 1.1 jonathan TAILQ_REMOVE(&tp->snd_holes, sack, sackhole_q);
422 1.1 jonathan pool_put(&sackhole_pool, sack);
423 1.1 jonathan }
424 1.1 jonathan }
425 1.1 jonathan
426 1.1 jonathan /*
427 1.1 jonathan * Implements the SACK response to a new ack, checking for partial acks
428 1.1 jonathan * in fast recovery.
429 1.1 jonathan */
430 1.1 jonathan void
431 1.1 jonathan tcp_sack_newack(struct tcpcb *tp, struct tcphdr *th)
432 1.1 jonathan {
433 1.1 jonathan if (tp->t_partialacks < 0) {
434 1.1 jonathan /*
435 1.1 jonathan * Not in fast recovery. Reset the duplicate ack
436 1.1 jonathan * counter.
437 1.1 jonathan */
438 1.1 jonathan tp->t_dupacks = 0;
439 1.1 jonathan } else if (SEQ_LT(th->th_ack, tp->snd_recover)) {
440 1.1 jonathan /*
441 1.1 jonathan * Partial ack handling within a sack recovery episode.
442 1.1 jonathan * Keeping this very simple for now. When a partial ack
443 1.1 jonathan * is received, force snd_cwnd to a value that will allow
444 1.1 jonathan * the sender to transmit no more than 2 segments.
445 1.1 jonathan * If necessary, a fancier scheme can be adopted at a
446 1.1 jonathan * later point, but for now, the goal is to prevent the
447 1.1 jonathan * sender from bursting a large amount of data in the midst
448 1.1 jonathan * of sack recovery.
449 1.1 jonathan */
450 1.1 jonathan int num_segs = 1;
451 1.1 jonathan int sack_bytes_rxmt = 0;
452 1.1 jonathan
453 1.1 jonathan tp->t_partialacks++;
454 1.1 jonathan TCP_TIMER_DISARM(tp, TCPT_REXMT);
455 1.1 jonathan tp->t_rtttime = 0;
456 1.1 jonathan
457 1.2 yamt /*
458 1.2 yamt * send one or 2 segments based on how much new data was acked
459 1.2 yamt */
460 1.1 jonathan if (((th->th_ack - tp->snd_una) / tp->t_segsz) > 2)
461 1.1 jonathan num_segs = 2;
462 1.1 jonathan (void)tcp_sack_output(tp, &sack_bytes_rxmt);
463 1.2 yamt tp->snd_cwnd = sack_bytes_rxmt +
464 1.2 yamt (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_segsz;
465 1.1 jonathan tp->t_flags |= TF_ACKNOW;
466 1.1 jonathan (void) tcp_output(tp);
467 1.1 jonathan } else {
468 1.1 jonathan /*
469 1.1 jonathan * Complete ack, inflate the congestion window to
470 1.1 jonathan * ssthresh and exit fast recovery.
471 1.1 jonathan *
472 1.1 jonathan * Window inflation should have left us with approx.
473 1.1 jonathan * snd_ssthresh outstanding data. But in case we
474 1.1 jonathan * would be inclined to send a burst, better to do
475 1.1 jonathan * it via the slow start mechanism.
476 1.1 jonathan */
477 1.1 jonathan if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh)
478 1.1 jonathan tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack)
479 1.1 jonathan + tp->t_segsz;
480 1.1 jonathan else
481 1.1 jonathan tp->snd_cwnd = tp->snd_ssthresh;
482 1.1 jonathan tp->t_partialacks = -1;
483 1.1 jonathan tp->t_dupacks = 0;
484 1.1 jonathan if (SEQ_GT(th->th_ack, tp->snd_fack))
485 1.1 jonathan tp->snd_fack = th->th_ack;
486 1.1 jonathan }
487 1.1 jonathan }
488 1.1 jonathan
489 1.1 jonathan /*
490 1.1 jonathan * Returns pointer to a sackhole if there are any pending retransmissions;
491 1.1 jonathan * NULL otherwise.
492 1.1 jonathan */
493 1.1 jonathan struct sackhole *
494 1.1 jonathan tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
495 1.1 jonathan {
496 1.1 jonathan struct sackhole *cur = NULL;
497 1.1 jonathan
498 1.1 jonathan if(!TCP_SACK_ENABLED(tp))
499 1.1 jonathan return (NULL);
500 1.1 jonathan
501 1.1 jonathan *sack_bytes_rexmt = 0;
502 1.1 jonathan TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
503 1.1 jonathan if (SEQ_LT(cur->rxmit, cur->end)) {
504 1.2 yamt if (SEQ_LT(cur->rxmit, tp->snd_una)) {
505 1.2 yamt /* old SACK hole */
506 1.1 jonathan continue;
507 1.1 jonathan }
508 1.1 jonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
509 1.1 jonathan break;
510 1.1 jonathan }
511 1.1 jonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
512 1.1 jonathan }
513 1.1 jonathan
514 1.1 jonathan return (cur);
515 1.1 jonathan }
516 1.1 jonathan
517 1.1 jonathan /*
518 1.1 jonathan * After a timeout, the SACK list may be rebuilt. This SACK information
519 1.1 jonathan * should be used to avoid retransmitting SACKed data. This function
520 1.1 jonathan * traverses the SACK list to see if snd_nxt should be moved forward.
521 1.1 jonathan */
522 1.1 jonathan void
523 1.1 jonathan tcp_sack_adjust(struct tcpcb *tp)
524 1.1 jonathan {
525 1.1 jonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
526 1.1 jonathan struct sackhole *n = NULL;
527 1.1 jonathan
528 1.1 jonathan if (TAILQ_EMPTY(&tp->snd_holes))
529 1.1 jonathan return; /* No holes */
530 1.1 jonathan if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
531 1.1 jonathan return; /* We're already beyond any SACKed blocks */
532 1.1 jonathan
533 1.1 jonathan /*
534 1.1 jonathan * Two cases for which we want to advance snd_nxt:
535 1.1 jonathan * i) snd_nxt lies between end of one hole and beginning of another
536 1.1 jonathan * ii) snd_nxt lies between end of last hole and rcv_lastsack
537 1.1 jonathan */
538 1.1 jonathan while ((n = TAILQ_NEXT(cur, sackhole_q)) != NULL) {
539 1.1 jonathan if (SEQ_LT(tp->snd_nxt, cur->end))
540 1.1 jonathan return;
541 1.1 jonathan if (SEQ_GEQ(tp->snd_nxt, n->start))
542 1.1 jonathan cur = n;
543 1.1 jonathan else {
544 1.1 jonathan tp->snd_nxt = n->start;
545 1.1 jonathan return;
546 1.1 jonathan }
547 1.1 jonathan }
548 1.1 jonathan if (SEQ_LT(tp->snd_nxt, cur->end))
549 1.1 jonathan return;
550 1.1 jonathan tp->snd_nxt = tp->rcv_lastsack;
551 1.1 jonathan
552 1.1 jonathan return;
553 1.1 jonathan }
554