tcp_sack.c revision 1.3 1 1.3 yamt /* $NetBSD: tcp_sack.c,v 1.3 2005/03/06 23:05:56 yamt Exp $ */
2 1.1 jonathan
3 1.1 jonathan /*
4 1.1 jonathan * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 1.1 jonathan * All rights reserved.
6 1.1 jonathan *
7 1.1 jonathan * This code is derived from software contributed to The NetBSD Foundation
8 1.1 jonathan * by Kentaro A. Kurahone.
9 1.1 jonathan *
10 1.1 jonathan * Redistribution and use in source and binary forms, with or without
11 1.1 jonathan * modification, are permitted provided that the following conditions
12 1.1 jonathan * are met:
13 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
14 1.1 jonathan * notice, this list of conditions and the following disclaimer.
15 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
17 1.1 jonathan * documentation and/or other materials provided with the distribution.
18 1.1 jonathan * 3. All advertising materials mentioning features or use of this software
19 1.1 jonathan * must display the following acknowledgement:
20 1.1 jonathan * This product includes software developed by the NetBSD
21 1.1 jonathan * Foundation, Inc. and its contributors.
22 1.1 jonathan * 4. Neither the name of The NetBSD Foundation nor the names of its
23 1.1 jonathan * contributors may be used to endorse or promote products derived
24 1.1 jonathan * from this software without specific prior written permission.
25 1.1 jonathan *
26 1.1 jonathan * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 1.1 jonathan * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 1.1 jonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 1.1 jonathan * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 1.1 jonathan * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 1.1 jonathan * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 1.1 jonathan * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 1.1 jonathan * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 1.1 jonathan * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 1.1 jonathan * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 1.1 jonathan * POSSIBILITY OF SUCH DAMAGE.
37 1.1 jonathan */
38 1.1 jonathan
39 1.1 jonathan /*
40 1.1 jonathan * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
41 1.1 jonathan * The Regents of the University of California. All rights reserved.
42 1.1 jonathan *
43 1.1 jonathan * Redistribution and use in source and binary forms, with or without
44 1.1 jonathan * modification, are permitted provided that the following conditions
45 1.1 jonathan * are met:
46 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
47 1.1 jonathan * notice, this list of conditions and the following disclaimer.
48 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
49 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
50 1.1 jonathan * documentation and/or other materials provided with the distribution.
51 1.1 jonathan * 4. Neither the name of the University nor the names of its contributors
52 1.1 jonathan * may be used to endorse or promote products derived from this software
53 1.1 jonathan * without specific prior written permission.
54 1.1 jonathan *
55 1.1 jonathan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56 1.1 jonathan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57 1.1 jonathan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58 1.1 jonathan * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59 1.1 jonathan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60 1.1 jonathan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61 1.1 jonathan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62 1.1 jonathan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63 1.1 jonathan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 1.1 jonathan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 1.1 jonathan * SUCH DAMAGE.
66 1.1 jonathan *
67 1.1 jonathan * @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95
68 1.1 jonathan * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.2 2004/12/25 23:02:57 rwatson Exp $
69 1.1 jonathan */
70 1.1 jonathan
71 1.1 jonathan /*
72 1.1 jonathan * @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995
73 1.1 jonathan *
74 1.1 jonathan * NRL grants permission for redistribution and use in source and binary
75 1.1 jonathan * forms, with or without modification, of the software and documentation
76 1.1 jonathan * created at NRL provided that the following conditions are met:
77 1.1 jonathan *
78 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
79 1.1 jonathan * notice, this list of conditions and the following disclaimer.
80 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
81 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
82 1.1 jonathan * documentation and/or other materials provided with the distribution.
83 1.1 jonathan * 3. All advertising materials mentioning features or use of this software
84 1.1 jonathan * must display the following acknowledgements:
85 1.1 jonathan * This product includes software developed by the University of
86 1.1 jonathan * California, Berkeley and its contributors.
87 1.1 jonathan * This product includes software developed at the Information
88 1.1 jonathan * Technology Division, US Naval Research Laboratory.
89 1.1 jonathan * 4. Neither the name of the NRL nor the names of its contributors
90 1.1 jonathan * may be used to endorse or promote products derived from this software
91 1.1 jonathan * without specific prior written permission.
92 1.1 jonathan *
93 1.1 jonathan * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
94 1.1 jonathan * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
95 1.1 jonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
96 1.1 jonathan * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
97 1.1 jonathan * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
98 1.1 jonathan * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
99 1.1 jonathan * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
100 1.1 jonathan * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
101 1.1 jonathan * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
102 1.1 jonathan * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
103 1.1 jonathan * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
104 1.1 jonathan *
105 1.1 jonathan * The views and conclusions contained in the software and documentation
106 1.1 jonathan * are those of the authors and should not be interpreted as representing
107 1.1 jonathan * official policies, either expressed or implied, of the US Naval
108 1.1 jonathan * Research Laboratory (NRL).
109 1.1 jonathan */
110 1.1 jonathan
111 1.1 jonathan #include <sys/cdefs.h>
112 1.3 yamt __KERNEL_RCSID(0, "$NetBSD: tcp_sack.c,v 1.3 2005/03/06 23:05:56 yamt Exp $");
113 1.1 jonathan
114 1.1 jonathan #include "opt_inet.h"
115 1.1 jonathan #include "opt_ipsec.h"
116 1.1 jonathan #include "opt_inet_csum.h"
117 1.1 jonathan #include "opt_tcp_debug.h"
118 1.1 jonathan
119 1.1 jonathan #include <sys/param.h>
120 1.1 jonathan #include <sys/systm.h>
121 1.1 jonathan #include <sys/malloc.h>
122 1.1 jonathan #include <sys/mbuf.h>
123 1.1 jonathan #include <sys/protosw.h>
124 1.1 jonathan #include <sys/socket.h>
125 1.1 jonathan #include <sys/socketvar.h>
126 1.1 jonathan #include <sys/errno.h>
127 1.1 jonathan #include <sys/syslog.h>
128 1.1 jonathan #include <sys/pool.h>
129 1.1 jonathan #include <sys/domain.h>
130 1.1 jonathan #include <sys/kernel.h>
131 1.1 jonathan
132 1.1 jonathan #include <net/if.h>
133 1.1 jonathan #include <net/route.h>
134 1.1 jonathan #include <net/if_types.h>
135 1.1 jonathan
136 1.1 jonathan #include <netinet/in.h>
137 1.1 jonathan #include <netinet/in_systm.h>
138 1.1 jonathan #include <netinet/ip.h>
139 1.1 jonathan #include <netinet/in_pcb.h>
140 1.1 jonathan #include <netinet/in_var.h>
141 1.1 jonathan #include <netinet/ip_var.h>
142 1.1 jonathan
143 1.1 jonathan #ifdef INET6
144 1.1 jonathan #ifndef INET
145 1.1 jonathan #include <netinet/in.h>
146 1.1 jonathan #endif
147 1.1 jonathan #include <netinet/ip6.h>
148 1.1 jonathan #include <netinet6/ip6_var.h>
149 1.1 jonathan #include <netinet6/in6_pcb.h>
150 1.1 jonathan #include <netinet6/ip6_var.h>
151 1.1 jonathan #include <netinet6/in6_var.h>
152 1.1 jonathan #include <netinet/icmp6.h>
153 1.1 jonathan #include <netinet6/nd6.h>
154 1.1 jonathan #endif
155 1.1 jonathan
156 1.1 jonathan #ifndef INET6
157 1.1 jonathan /* always need ip6.h for IP6_EXTHDR_GET */
158 1.1 jonathan #include <netinet/ip6.h>
159 1.1 jonathan #endif
160 1.1 jonathan
161 1.1 jonathan #include <netinet/tcp.h>
162 1.1 jonathan #include <netinet/tcp_fsm.h>
163 1.1 jonathan #include <netinet/tcp_seq.h>
164 1.1 jonathan #include <netinet/tcp_timer.h>
165 1.1 jonathan #include <netinet/tcp_var.h>
166 1.1 jonathan #include <netinet/tcpip.h>
167 1.1 jonathan #include <netinet/tcp_debug.h>
168 1.1 jonathan
169 1.1 jonathan #include <machine/stdarg.h>
170 1.1 jonathan
171 1.1 jonathan #define SEQ_MIN(a, b) ((SEQ_LT(a, b)) ? (a) : (b))
172 1.1 jonathan #define SEQ_MAX(a, b) ((SEQ_GT(a, b)) ? (a) : (b))
173 1.1 jonathan
174 1.1 jonathan /* SACK block pool. */
175 1.1 jonathan POOL_INIT(sackhole_pool, sizeof(struct sackhole), 0, 0, 0, "sackholepl", NULL);
176 1.1 jonathan
177 1.1 jonathan void
178 1.1 jonathan tcp_update_sack_list(struct tcpcb *tp)
179 1.1 jonathan {
180 1.1 jonathan int i = 0;
181 1.1 jonathan struct ipqent *tiqe = NULL;
182 1.1 jonathan
183 1.1 jonathan if (!TCP_SACK_ENABLED(tp) || (tp->t_flags & TF_SIGNATURE)) {
184 1.1 jonathan /* Can't SACK this connection. */
185 1.1 jonathan return;
186 1.1 jonathan }
187 1.1 jonathan
188 1.1 jonathan /*
189 1.1 jonathan * If possible, tack on the D-SACK block. (RFC2883)
190 1.1 jonathan */
191 1.1 jonathan if (tp->rcv_sack_flags & TCPSACK_HAVED) {
192 1.1 jonathan tp->rcv_sack_block[0].left = tp->rcv_dsack_block.left;
193 1.1 jonathan tp->rcv_sack_block[0].right = tp->rcv_dsack_block.right;
194 1.1 jonathan tp->rcv_sack_flags &= ~TCPSACK_HAVED;
195 1.1 jonathan i++;
196 1.1 jonathan }
197 1.1 jonathan
198 1.1 jonathan /*
199 1.1 jonathan * Build up a list of holes in the TCP space. Note that
200 1.1 jonathan * the first SACK block is always the most recent segment
201 1.1 jonathan * received.
202 1.1 jonathan */
203 1.1 jonathan TAILQ_FOREACH(tiqe, &tp->timeq, ipqe_timeq) {
204 1.1 jonathan tp->rcv_sack_block[i].left = tiqe->ipqe_seq;
205 1.1 jonathan tp->rcv_sack_block[i].right = tiqe->ipqe_seq + tiqe->ipqe_len;
206 1.1 jonathan i++;
207 1.1 jonathan if (i >= TCP_SACK_MAX) {
208 1.1 jonathan break;
209 1.1 jonathan }
210 1.1 jonathan }
211 1.1 jonathan
212 1.1 jonathan /* If we can SACK, do so. */
213 1.1 jonathan tp->rcv_sack_num = i;
214 1.1 jonathan }
215 1.1 jonathan
216 1.1 jonathan void
217 1.1 jonathan tcp_new_dsack(struct tcpcb *tp, tcp_seq seq, u_int32_t len)
218 1.1 jonathan {
219 1.1 jonathan if (TCP_SACK_ENABLED(tp)) {
220 1.1 jonathan tp->rcv_dsack_block.left = seq;
221 1.1 jonathan tp->rcv_dsack_block.right = seq + len;
222 1.1 jonathan tp->rcv_sack_flags |= TCPSACK_HAVED;
223 1.1 jonathan }
224 1.1 jonathan }
225 1.1 jonathan
226 1.1 jonathan void
227 1.1 jonathan tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
228 1.1 jonathan {
229 1.1 jonathan struct sackblk t_sack_block[TCP_SACK_MAX];
230 1.1 jonathan struct sackblk *sack = NULL;
231 1.1 jonathan struct sackhole *cur = NULL;
232 1.1 jonathan struct sackhole *tmp = NULL;
233 1.1 jonathan u_int32_t *lp = (u_int32_t *) (cp + 2);
234 1.1 jonathan int i, j, num_sack_blks;
235 1.1 jonathan tcp_seq left, right, acked;
236 1.1 jonathan
237 1.1 jonathan /*
238 1.1 jonathan * If we aren't processing SACK responses, or the peer
239 1.1 jonathan * sends us a sack option with invalid length, don't
240 1.1 jonathan * update the scoreboard.
241 1.1 jonathan */
242 1.1 jonathan if (!TCP_SACK_ENABLED(tp) ||
243 1.1 jonathan (optlen % 8 != 2 || optlen < 10)) {
244 1.1 jonathan return;
245 1.1 jonathan }
246 1.1 jonathan
247 1.1 jonathan /*
248 1.1 jonathan * Extract SACK blocks.
249 1.1 jonathan *
250 1.1 jonathan * Note that t_sack_block is sorted so that we only need to do
251 1.1 jonathan * one pass over the sequence number space. (SACK "fast-path")
252 1.1 jonathan */
253 1.1 jonathan num_sack_blks = optlen / 8;
254 1.1 jonathan acked = (SEQ_GT(th->th_ack, tp->snd_una)) ? th->th_ack : tp->snd_una;
255 1.1 jonathan for (i = 0; i < num_sack_blks; i++, lp += 2) {
256 1.3 yamt memcpy(&left, lp, sizeof(*lp));
257 1.3 yamt memcpy(&right, lp + 1, sizeof(*lp));
258 1.3 yamt left = ntohl(left);
259 1.3 yamt right = ntohl(right);
260 1.1 jonathan
261 1.1 jonathan if ((SEQ_LEQ(right, acked)) ||
262 1.1 jonathan SEQ_GEQ(left, tp->snd_max) ||
263 1.1 jonathan SEQ_GEQ(left, right)) {
264 1.1 jonathan /* SACK entry that's old, or invalid. */
265 1.1 jonathan i--;
266 1.1 jonathan num_sack_blks--;
267 1.1 jonathan continue;
268 1.1 jonathan }
269 1.1 jonathan
270 1.1 jonathan /* Insertion sort. */
271 1.2 yamt for (j = i; (j > 0) && SEQ_LT(left, t_sack_block[j - 1].left);
272 1.2 yamt j--) {
273 1.1 jonathan t_sack_block[j].left = t_sack_block[j - 1].left;
274 1.1 jonathan t_sack_block[j].right = t_sack_block[j - 1].right;
275 1.1 jonathan }
276 1.1 jonathan t_sack_block[j].left = left;
277 1.1 jonathan t_sack_block[j].right = right;
278 1.1 jonathan }
279 1.1 jonathan
280 1.1 jonathan /* Update the scoreboard. */
281 1.1 jonathan cur = TAILQ_FIRST(&tp->snd_holes);
282 1.1 jonathan for (i = 0; i < num_sack_blks; i++) {
283 1.1 jonathan sack = &t_sack_block[i];
284 1.1 jonathan /*
285 1.1 jonathan * FACK TCP. Update snd_fack so we can enter Fast
286 1.1 jonathan * Recovery early.
287 1.1 jonathan */
288 1.1 jonathan if (SEQ_GEQ(sack->right, tp->snd_fack))
289 1.1 jonathan tp->snd_fack = sack->right;
290 1.1 jonathan
291 1.1 jonathan if (TAILQ_EMPTY(&tp->snd_holes)) {
292 1.1 jonathan /* First hole. */
293 1.2 yamt cur = (struct sackhole *)
294 1.2 yamt pool_get(&sackhole_pool, PR_NOWAIT);
295 1.1 jonathan if (cur == NULL) {
296 1.1 jonathan /* ENOBUFS, bail out*/
297 1.1 jonathan return;
298 1.1 jonathan }
299 1.1 jonathan cur->start = th->th_ack;
300 1.1 jonathan cur->end = sack->left;
301 1.1 jonathan cur->rxmit = cur->start;
302 1.1 jonathan tp->rcv_lastsack = sack->right;
303 1.1 jonathan TAILQ_INSERT_HEAD(&tp->snd_holes, cur, sackhole_q);
304 1.1 jonathan continue; /* With next sack block */
305 1.1 jonathan }
306 1.1 jonathan
307 1.1 jonathan /* Go through the list of holes. */
308 1.1 jonathan while (cur) {
309 1.1 jonathan if (SEQ_LEQ(sack->left, cur->start))
310 1.1 jonathan /* SACKs data before the current hole */
311 1.1 jonathan break; /* No use going through more holes */
312 1.1 jonathan
313 1.1 jonathan if (SEQ_GEQ(sack->left, cur->end)) {
314 1.1 jonathan /* SACKs data beyond the current hole */
315 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
316 1.1 jonathan continue;
317 1.1 jonathan }
318 1.1 jonathan
319 1.1 jonathan if (SEQ_LEQ(sack->left, cur->start)) {
320 1.1 jonathan /* Data acks at least the beginning of hole */
321 1.1 jonathan if (SEQ_GEQ(sack->right, cur->end)) {
322 1.1 jonathan /* Acks entire hole, so delete hole */
323 1.1 jonathan tmp = cur;
324 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
325 1.2 yamt TAILQ_REMOVE(&tp->snd_holes, tmp,
326 1.2 yamt sackhole_q);
327 1.1 jonathan pool_put(&sackhole_pool, tmp);
328 1.1 jonathan break;
329 1.1 jonathan }
330 1.1 jonathan
331 1.1 jonathan /* Otherwise, move start of hole forward */
332 1.1 jonathan cur->start = sack->right;
333 1.1 jonathan cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
334 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
335 1.1 jonathan break;
336 1.1 jonathan }
337 1.1 jonathan
338 1.1 jonathan if (SEQ_GEQ(sack->right, cur->end)) {
339 1.1 jonathan /* Move end of hole backward. */
340 1.1 jonathan cur->end = sack->left;
341 1.1 jonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
342 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
343 1.1 jonathan break;
344 1.1 jonathan }
345 1.1 jonathan
346 1.1 jonathan if (SEQ_LT(cur->start, sack->left) &&
347 1.1 jonathan SEQ_GT(cur->end, sack->right)) {
348 1.1 jonathan /*
349 1.1 jonathan * ACKs some data in middle of a hole; need to
350 1.1 jonathan * split current hole
351 1.1 jonathan */
352 1.1 jonathan tmp = (struct sackhole *)
353 1.2 yamt pool_get(&sackhole_pool, PR_NOWAIT);
354 1.1 jonathan if (tmp == NULL) {
355 1.1 jonathan /* ENOBUFS, bail out. */
356 1.1 jonathan return;
357 1.1 jonathan }
358 1.1 jonathan tmp->start = sack->right;
359 1.1 jonathan tmp->end = cur->end;
360 1.1 jonathan tmp->rxmit = SEQ_MAX(cur->rxmit, tmp->start);
361 1.1 jonathan cur->end = sack->left;
362 1.1 jonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
363 1.1 jonathan TAILQ_INSERT_AFTER(&tp->snd_holes, cur, tmp,
364 1.1 jonathan sackhole_q);
365 1.1 jonathan cur = TAILQ_NEXT(tmp, sackhole_q);
366 1.1 jonathan break;
367 1.1 jonathan }
368 1.1 jonathan }
369 1.1 jonathan
370 1.1 jonathan /* At this point, we have reached the tail of the list. */
371 1.1 jonathan if (SEQ_LT(tp->rcv_lastsack, sack->left)) {
372 1.1 jonathan /*
373 1.1 jonathan * Need to append new hole at end.
374 1.1 jonathan */
375 1.1 jonathan tmp = (struct sackhole *)
376 1.1 jonathan pool_get(&sackhole_pool, PR_NOWAIT);
377 1.1 jonathan if (tmp == NULL)
378 1.1 jonathan continue; /* ENOBUFS */
379 1.1 jonathan tmp->start = tp->rcv_lastsack;
380 1.1 jonathan tmp->end = sack->left;
381 1.1 jonathan tmp->rxmit = tmp->start;
382 1.1 jonathan tp->rcv_lastsack = sack->right;
383 1.1 jonathan TAILQ_INSERT_TAIL(&tp->snd_holes, tmp, sackhole_q);
384 1.1 jonathan cur = tmp;
385 1.1 jonathan }
386 1.1 jonathan }
387 1.1 jonathan }
388 1.1 jonathan
389 1.1 jonathan void
390 1.1 jonathan tcp_del_sackholes(struct tcpcb *tp, struct tcphdr *th)
391 1.1 jonathan {
392 1.1 jonathan /* Max because this could be an older ack that just arrived. */
393 1.1 jonathan tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
394 1.1 jonathan th->th_ack : tp->snd_una;
395 1.1 jonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
396 1.1 jonathan struct sackhole *tmp;
397 1.1 jonathan
398 1.1 jonathan while (cur) {
399 1.1 jonathan if (SEQ_LEQ(cur->end, lastack)) {
400 1.1 jonathan tmp = cur;
401 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
402 1.1 jonathan TAILQ_REMOVE(&tp->snd_holes, tmp, sackhole_q);
403 1.1 jonathan pool_put(&sackhole_pool, tmp);
404 1.1 jonathan } else if (SEQ_LT(cur->start, lastack)) {
405 1.1 jonathan cur->start = lastack;
406 1.1 jonathan if (SEQ_LT(cur->rxmit, cur->start))
407 1.1 jonathan cur->rxmit = cur->start;
408 1.1 jonathan break;
409 1.1 jonathan } else
410 1.1 jonathan break;
411 1.1 jonathan
412 1.1 jonathan }
413 1.1 jonathan }
414 1.1 jonathan
415 1.1 jonathan void
416 1.1 jonathan tcp_free_sackholes(struct tcpcb *tp)
417 1.1 jonathan {
418 1.1 jonathan struct sackhole *sack;
419 1.1 jonathan
420 1.1 jonathan /* Free up the SACK hole list. */
421 1.1 jonathan while (!TAILQ_EMPTY(&tp->snd_holes)) {
422 1.1 jonathan sack = TAILQ_FIRST(&tp->snd_holes);
423 1.1 jonathan TAILQ_REMOVE(&tp->snd_holes, sack, sackhole_q);
424 1.1 jonathan pool_put(&sackhole_pool, sack);
425 1.1 jonathan }
426 1.1 jonathan }
427 1.1 jonathan
428 1.1 jonathan /*
429 1.1 jonathan * Implements the SACK response to a new ack, checking for partial acks
430 1.1 jonathan * in fast recovery.
431 1.1 jonathan */
432 1.1 jonathan void
433 1.1 jonathan tcp_sack_newack(struct tcpcb *tp, struct tcphdr *th)
434 1.1 jonathan {
435 1.1 jonathan if (tp->t_partialacks < 0) {
436 1.1 jonathan /*
437 1.1 jonathan * Not in fast recovery. Reset the duplicate ack
438 1.1 jonathan * counter.
439 1.1 jonathan */
440 1.1 jonathan tp->t_dupacks = 0;
441 1.1 jonathan } else if (SEQ_LT(th->th_ack, tp->snd_recover)) {
442 1.1 jonathan /*
443 1.1 jonathan * Partial ack handling within a sack recovery episode.
444 1.1 jonathan * Keeping this very simple for now. When a partial ack
445 1.1 jonathan * is received, force snd_cwnd to a value that will allow
446 1.1 jonathan * the sender to transmit no more than 2 segments.
447 1.1 jonathan * If necessary, a fancier scheme can be adopted at a
448 1.1 jonathan * later point, but for now, the goal is to prevent the
449 1.1 jonathan * sender from bursting a large amount of data in the midst
450 1.1 jonathan * of sack recovery.
451 1.1 jonathan */
452 1.1 jonathan int num_segs = 1;
453 1.1 jonathan int sack_bytes_rxmt = 0;
454 1.1 jonathan
455 1.1 jonathan tp->t_partialacks++;
456 1.1 jonathan TCP_TIMER_DISARM(tp, TCPT_REXMT);
457 1.1 jonathan tp->t_rtttime = 0;
458 1.1 jonathan
459 1.2 yamt /*
460 1.2 yamt * send one or 2 segments based on how much new data was acked
461 1.2 yamt */
462 1.1 jonathan if (((th->th_ack - tp->snd_una) / tp->t_segsz) > 2)
463 1.1 jonathan num_segs = 2;
464 1.1 jonathan (void)tcp_sack_output(tp, &sack_bytes_rxmt);
465 1.2 yamt tp->snd_cwnd = sack_bytes_rxmt +
466 1.2 yamt (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_segsz;
467 1.1 jonathan tp->t_flags |= TF_ACKNOW;
468 1.1 jonathan (void) tcp_output(tp);
469 1.1 jonathan } else {
470 1.1 jonathan /*
471 1.1 jonathan * Complete ack, inflate the congestion window to
472 1.1 jonathan * ssthresh and exit fast recovery.
473 1.1 jonathan *
474 1.1 jonathan * Window inflation should have left us with approx.
475 1.1 jonathan * snd_ssthresh outstanding data. But in case we
476 1.1 jonathan * would be inclined to send a burst, better to do
477 1.1 jonathan * it via the slow start mechanism.
478 1.1 jonathan */
479 1.1 jonathan if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh)
480 1.1 jonathan tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack)
481 1.1 jonathan + tp->t_segsz;
482 1.1 jonathan else
483 1.1 jonathan tp->snd_cwnd = tp->snd_ssthresh;
484 1.1 jonathan tp->t_partialacks = -1;
485 1.1 jonathan tp->t_dupacks = 0;
486 1.1 jonathan if (SEQ_GT(th->th_ack, tp->snd_fack))
487 1.1 jonathan tp->snd_fack = th->th_ack;
488 1.1 jonathan }
489 1.1 jonathan }
490 1.1 jonathan
491 1.1 jonathan /*
492 1.1 jonathan * Returns pointer to a sackhole if there are any pending retransmissions;
493 1.1 jonathan * NULL otherwise.
494 1.1 jonathan */
495 1.1 jonathan struct sackhole *
496 1.1 jonathan tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
497 1.1 jonathan {
498 1.1 jonathan struct sackhole *cur = NULL;
499 1.1 jonathan
500 1.1 jonathan if(!TCP_SACK_ENABLED(tp))
501 1.1 jonathan return (NULL);
502 1.1 jonathan
503 1.1 jonathan *sack_bytes_rexmt = 0;
504 1.1 jonathan TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
505 1.1 jonathan if (SEQ_LT(cur->rxmit, cur->end)) {
506 1.2 yamt if (SEQ_LT(cur->rxmit, tp->snd_una)) {
507 1.2 yamt /* old SACK hole */
508 1.1 jonathan continue;
509 1.1 jonathan }
510 1.1 jonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
511 1.1 jonathan break;
512 1.1 jonathan }
513 1.1 jonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
514 1.1 jonathan }
515 1.1 jonathan
516 1.1 jonathan return (cur);
517 1.1 jonathan }
518 1.1 jonathan
519 1.1 jonathan /*
520 1.1 jonathan * After a timeout, the SACK list may be rebuilt. This SACK information
521 1.1 jonathan * should be used to avoid retransmitting SACKed data. This function
522 1.1 jonathan * traverses the SACK list to see if snd_nxt should be moved forward.
523 1.1 jonathan */
524 1.1 jonathan void
525 1.1 jonathan tcp_sack_adjust(struct tcpcb *tp)
526 1.1 jonathan {
527 1.1 jonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
528 1.1 jonathan struct sackhole *n = NULL;
529 1.1 jonathan
530 1.1 jonathan if (TAILQ_EMPTY(&tp->snd_holes))
531 1.1 jonathan return; /* No holes */
532 1.1 jonathan if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
533 1.1 jonathan return; /* We're already beyond any SACKed blocks */
534 1.1 jonathan
535 1.1 jonathan /*
536 1.1 jonathan * Two cases for which we want to advance snd_nxt:
537 1.1 jonathan * i) snd_nxt lies between end of one hole and beginning of another
538 1.1 jonathan * ii) snd_nxt lies between end of last hole and rcv_lastsack
539 1.1 jonathan */
540 1.1 jonathan while ((n = TAILQ_NEXT(cur, sackhole_q)) != NULL) {
541 1.1 jonathan if (SEQ_LT(tp->snd_nxt, cur->end))
542 1.1 jonathan return;
543 1.1 jonathan if (SEQ_GEQ(tp->snd_nxt, n->start))
544 1.1 jonathan cur = n;
545 1.1 jonathan else {
546 1.1 jonathan tp->snd_nxt = n->start;
547 1.1 jonathan return;
548 1.1 jonathan }
549 1.1 jonathan }
550 1.1 jonathan if (SEQ_LT(tp->snd_nxt, cur->end))
551 1.1 jonathan return;
552 1.1 jonathan tp->snd_nxt = tp->rcv_lastsack;
553 1.1 jonathan
554 1.1 jonathan return;
555 1.1 jonathan }
556