tcp_sack.c revision 1.20 1 1.20 reinoud /* $NetBSD: tcp_sack.c,v 1.20 2006/10/20 13:11:09 reinoud Exp $ */
2 1.1 jonathan
3 1.1 jonathan /*
4 1.1 jonathan * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 1.1 jonathan * All rights reserved.
6 1.1 jonathan *
7 1.1 jonathan * This code is derived from software contributed to The NetBSD Foundation
8 1.1 jonathan * by Kentaro A. Kurahone.
9 1.1 jonathan *
10 1.1 jonathan * Redistribution and use in source and binary forms, with or without
11 1.1 jonathan * modification, are permitted provided that the following conditions
12 1.1 jonathan * are met:
13 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
14 1.1 jonathan * notice, this list of conditions and the following disclaimer.
15 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
17 1.1 jonathan * documentation and/or other materials provided with the distribution.
18 1.1 jonathan * 3. All advertising materials mentioning features or use of this software
19 1.1 jonathan * must display the following acknowledgement:
20 1.1 jonathan * This product includes software developed by the NetBSD
21 1.1 jonathan * Foundation, Inc. and its contributors.
22 1.1 jonathan * 4. Neither the name of The NetBSD Foundation nor the names of its
23 1.1 jonathan * contributors may be used to endorse or promote products derived
24 1.1 jonathan * from this software without specific prior written permission.
25 1.1 jonathan *
26 1.1 jonathan * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 1.1 jonathan * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 1.1 jonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 1.1 jonathan * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 1.1 jonathan * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 1.1 jonathan * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 1.1 jonathan * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 1.1 jonathan * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 1.1 jonathan * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 1.1 jonathan * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 1.1 jonathan * POSSIBILITY OF SUCH DAMAGE.
37 1.1 jonathan */
38 1.1 jonathan
39 1.1 jonathan /*
40 1.1 jonathan * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
41 1.1 jonathan * The Regents of the University of California. All rights reserved.
42 1.1 jonathan *
43 1.1 jonathan * Redistribution and use in source and binary forms, with or without
44 1.1 jonathan * modification, are permitted provided that the following conditions
45 1.1 jonathan * are met:
46 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
47 1.1 jonathan * notice, this list of conditions and the following disclaimer.
48 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
49 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
50 1.1 jonathan * documentation and/or other materials provided with the distribution.
51 1.1 jonathan * 4. Neither the name of the University nor the names of its contributors
52 1.1 jonathan * may be used to endorse or promote products derived from this software
53 1.1 jonathan * without specific prior written permission.
54 1.1 jonathan *
55 1.1 jonathan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56 1.1 jonathan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57 1.1 jonathan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58 1.1 jonathan * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59 1.1 jonathan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60 1.1 jonathan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61 1.1 jonathan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62 1.1 jonathan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63 1.1 jonathan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 1.1 jonathan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 1.1 jonathan * SUCH DAMAGE.
66 1.1 jonathan *
67 1.1 jonathan * @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95
68 1.1 jonathan * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.2 2004/12/25 23:02:57 rwatson Exp $
69 1.1 jonathan */
70 1.1 jonathan
71 1.1 jonathan /*
72 1.1 jonathan * @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995
73 1.1 jonathan *
74 1.1 jonathan * NRL grants permission for redistribution and use in source and binary
75 1.1 jonathan * forms, with or without modification, of the software and documentation
76 1.1 jonathan * created at NRL provided that the following conditions are met:
77 1.1 jonathan *
78 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
79 1.1 jonathan * notice, this list of conditions and the following disclaimer.
80 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
81 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
82 1.1 jonathan * documentation and/or other materials provided with the distribution.
83 1.1 jonathan * 3. All advertising materials mentioning features or use of this software
84 1.1 jonathan * must display the following acknowledgements:
85 1.1 jonathan * This product includes software developed by the University of
86 1.1 jonathan * California, Berkeley and its contributors.
87 1.1 jonathan * This product includes software developed at the Information
88 1.1 jonathan * Technology Division, US Naval Research Laboratory.
89 1.1 jonathan * 4. Neither the name of the NRL nor the names of its contributors
90 1.1 jonathan * may be used to endorse or promote products derived from this software
91 1.1 jonathan * without specific prior written permission.
92 1.1 jonathan *
93 1.1 jonathan * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
94 1.1 jonathan * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
95 1.1 jonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
96 1.1 jonathan * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
97 1.1 jonathan * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
98 1.1 jonathan * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
99 1.1 jonathan * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
100 1.1 jonathan * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
101 1.1 jonathan * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
102 1.1 jonathan * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
103 1.1 jonathan * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
104 1.1 jonathan *
105 1.1 jonathan * The views and conclusions contained in the software and documentation
106 1.1 jonathan * are those of the authors and should not be interpreted as representing
107 1.1 jonathan * official policies, either expressed or implied, of the US Naval
108 1.1 jonathan * Research Laboratory (NRL).
109 1.1 jonathan */
110 1.1 jonathan
111 1.1 jonathan #include <sys/cdefs.h>
112 1.20 reinoud __KERNEL_RCSID(0, "$NetBSD: tcp_sack.c,v 1.20 2006/10/20 13:11:09 reinoud Exp $");
113 1.1 jonathan
114 1.1 jonathan #include "opt_inet.h"
115 1.1 jonathan #include "opt_ipsec.h"
116 1.1 jonathan #include "opt_inet_csum.h"
117 1.1 jonathan #include "opt_tcp_debug.h"
118 1.1 jonathan
119 1.1 jonathan #include <sys/param.h>
120 1.1 jonathan #include <sys/systm.h>
121 1.1 jonathan #include <sys/malloc.h>
122 1.1 jonathan #include <sys/mbuf.h>
123 1.1 jonathan #include <sys/protosw.h>
124 1.1 jonathan #include <sys/socket.h>
125 1.1 jonathan #include <sys/socketvar.h>
126 1.1 jonathan #include <sys/errno.h>
127 1.1 jonathan #include <sys/syslog.h>
128 1.1 jonathan #include <sys/pool.h>
129 1.1 jonathan #include <sys/domain.h>
130 1.1 jonathan #include <sys/kernel.h>
131 1.1 jonathan
132 1.1 jonathan #include <net/if.h>
133 1.1 jonathan #include <net/route.h>
134 1.1 jonathan #include <net/if_types.h>
135 1.1 jonathan
136 1.1 jonathan #include <netinet/in.h>
137 1.1 jonathan #include <netinet/in_systm.h>
138 1.1 jonathan #include <netinet/ip.h>
139 1.1 jonathan #include <netinet/in_pcb.h>
140 1.1 jonathan #include <netinet/in_var.h>
141 1.1 jonathan #include <netinet/ip_var.h>
142 1.1 jonathan
143 1.1 jonathan #ifdef INET6
144 1.1 jonathan #ifndef INET
145 1.1 jonathan #include <netinet/in.h>
146 1.1 jonathan #endif
147 1.1 jonathan #include <netinet/ip6.h>
148 1.1 jonathan #include <netinet6/ip6_var.h>
149 1.1 jonathan #include <netinet6/in6_pcb.h>
150 1.1 jonathan #include <netinet6/ip6_var.h>
151 1.1 jonathan #include <netinet6/in6_var.h>
152 1.1 jonathan #include <netinet/icmp6.h>
153 1.1 jonathan #include <netinet6/nd6.h>
154 1.1 jonathan #endif
155 1.1 jonathan
156 1.1 jonathan #ifndef INET6
157 1.1 jonathan /* always need ip6.h for IP6_EXTHDR_GET */
158 1.1 jonathan #include <netinet/ip6.h>
159 1.1 jonathan #endif
160 1.1 jonathan
161 1.1 jonathan #include <netinet/tcp.h>
162 1.1 jonathan #include <netinet/tcp_fsm.h>
163 1.1 jonathan #include <netinet/tcp_seq.h>
164 1.1 jonathan #include <netinet/tcp_timer.h>
165 1.1 jonathan #include <netinet/tcp_var.h>
166 1.1 jonathan #include <netinet/tcpip.h>
167 1.1 jonathan #include <netinet/tcp_debug.h>
168 1.1 jonathan
169 1.1 jonathan #include <machine/stdarg.h>
170 1.1 jonathan
171 1.1 jonathan /* SACK block pool. */
172 1.19 yamt static POOL_INIT(sackhole_pool, sizeof(struct sackhole), 0, 0, 0, "sackholepl",
173 1.19 yamt NULL);
174 1.19 yamt
175 1.19 yamt static struct sackhole *
176 1.19 yamt sack_allochole(struct tcpcb *tp)
177 1.19 yamt {
178 1.19 yamt struct sackhole *hole;
179 1.19 yamt
180 1.19 yamt if (tp->snd_numholes >= tcp_sack_tp_maxholes ||
181 1.19 yamt tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
182 1.19 yamt return NULL;
183 1.19 yamt }
184 1.19 yamt hole = pool_get(&sackhole_pool, PR_NOWAIT);
185 1.19 yamt if (hole == NULL) {
186 1.19 yamt return NULL;
187 1.19 yamt }
188 1.19 yamt tp->snd_numholes++;
189 1.19 yamt tcp_sack_globalholes++;
190 1.19 yamt
191 1.19 yamt return hole;
192 1.19 yamt }
193 1.19 yamt
194 1.19 yamt static struct sackhole *
195 1.19 yamt sack_inserthole(struct tcpcb *tp, tcp_seq start, tcp_seq end,
196 1.19 yamt struct sackhole *prev)
197 1.19 yamt {
198 1.19 yamt struct sackhole *hole;
199 1.19 yamt
200 1.19 yamt hole = sack_allochole(tp);
201 1.19 yamt if (hole == NULL) {
202 1.19 yamt return NULL;
203 1.19 yamt }
204 1.19 yamt hole->start = hole->rxmit = start;
205 1.19 yamt hole->end = end;
206 1.19 yamt if (prev != NULL) {
207 1.19 yamt TAILQ_INSERT_AFTER(&tp->snd_holes, prev, hole, sackhole_q);
208 1.19 yamt } else {
209 1.19 yamt TAILQ_INSERT_TAIL(&tp->snd_holes, hole, sackhole_q);
210 1.19 yamt }
211 1.19 yamt return hole;
212 1.19 yamt }
213 1.19 yamt
214 1.19 yamt static struct sackhole *
215 1.19 yamt sack_removehole(struct tcpcb *tp, struct sackhole *hole)
216 1.19 yamt {
217 1.19 yamt struct sackhole *next;
218 1.19 yamt
219 1.19 yamt next = TAILQ_NEXT(hole, sackhole_q);
220 1.19 yamt tp->snd_numholes--;
221 1.19 yamt tcp_sack_globalholes--;
222 1.19 yamt TAILQ_REMOVE(&tp->snd_holes, hole, sackhole_q);
223 1.19 yamt pool_put(&sackhole_pool, hole);
224 1.19 yamt
225 1.19 yamt return next;
226 1.19 yamt }
227 1.1 jonathan
228 1.1 jonathan void
229 1.1 jonathan tcp_new_dsack(struct tcpcb *tp, tcp_seq seq, u_int32_t len)
230 1.1 jonathan {
231 1.1 jonathan if (TCP_SACK_ENABLED(tp)) {
232 1.1 jonathan tp->rcv_dsack_block.left = seq;
233 1.1 jonathan tp->rcv_dsack_block.right = seq + len;
234 1.1 jonathan tp->rcv_sack_flags |= TCPSACK_HAVED;
235 1.1 jonathan }
236 1.1 jonathan }
237 1.1 jonathan
238 1.1 jonathan void
239 1.1 jonathan tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
240 1.1 jonathan {
241 1.5 yamt struct sackblk
242 1.5 yamt t_sack_block[(MAX_TCPOPTLEN - 2) / (sizeof(u_int32_t) * 2)];
243 1.1 jonathan struct sackblk *sack = NULL;
244 1.1 jonathan struct sackhole *cur = NULL;
245 1.1 jonathan struct sackhole *tmp = NULL;
246 1.20 reinoud char *lp = cp + 2;
247 1.18 yamt int i, j, num_sack_blks;
248 1.1 jonathan tcp_seq left, right, acked;
249 1.1 jonathan
250 1.1 jonathan /*
251 1.11 kurahone * If we aren't processing SACK responses, this is not an ACK
252 1.11 kurahone * or the peer sends us a sack option with invalid length, don't
253 1.1 jonathan * update the scoreboard.
254 1.1 jonathan */
255 1.11 kurahone if (!TCP_SACK_ENABLED(tp) || ((th->th_flags & TH_ACK) == 0) ||
256 1.11 kurahone (optlen % 8 != 2 || optlen < 10)) {
257 1.1 jonathan return;
258 1.1 jonathan }
259 1.1 jonathan
260 1.12 kurahone /*
261 1.12 kurahone * If we don't want any SACK holes to be allocated, just return.
262 1.12 kurahone */
263 1.12 kurahone if (tcp_sack_globalmaxholes == 0 || tcp_sack_tp_maxholes == 0) {
264 1.12 kurahone return;
265 1.12 kurahone }
266 1.12 kurahone
267 1.11 kurahone /* If the ACK is outside [snd_una, snd_max], ignore the SACK options. */
268 1.11 kurahone if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))
269 1.11 kurahone return;
270 1.11 kurahone
271 1.1 jonathan /*
272 1.1 jonathan * Extract SACK blocks.
273 1.1 jonathan *
274 1.1 jonathan * Note that t_sack_block is sorted so that we only need to do
275 1.1 jonathan * one pass over the sequence number space. (SACK "fast-path")
276 1.1 jonathan */
277 1.1 jonathan num_sack_blks = optlen / 8;
278 1.1 jonathan acked = (SEQ_GT(th->th_ack, tp->snd_una)) ? th->th_ack : tp->snd_una;
279 1.20 reinoud for (i = 0; i < num_sack_blks; i++, lp += sizeof(uint32_t) * 2) {
280 1.20 reinoud memcpy(&left, lp, sizeof(uint32_t));
281 1.20 reinoud memcpy(&right, lp + sizeof(uint32_t), sizeof(uint32_t));
282 1.3 yamt left = ntohl(left);
283 1.3 yamt right = ntohl(right);
284 1.1 jonathan
285 1.13 yamt if (SEQ_LEQ(right, acked) || SEQ_GT(right, tp->snd_max) ||
286 1.4 yamt SEQ_GEQ(left, right)) {
287 1.1 jonathan /* SACK entry that's old, or invalid. */
288 1.1 jonathan i--;
289 1.1 jonathan num_sack_blks--;
290 1.1 jonathan continue;
291 1.1 jonathan }
292 1.1 jonathan
293 1.1 jonathan /* Insertion sort. */
294 1.2 yamt for (j = i; (j > 0) && SEQ_LT(left, t_sack_block[j - 1].left);
295 1.2 yamt j--) {
296 1.1 jonathan t_sack_block[j].left = t_sack_block[j - 1].left;
297 1.1 jonathan t_sack_block[j].right = t_sack_block[j - 1].right;
298 1.1 jonathan }
299 1.1 jonathan t_sack_block[j].left = left;
300 1.1 jonathan t_sack_block[j].right = right;
301 1.1 jonathan }
302 1.1 jonathan
303 1.1 jonathan /* Update the scoreboard. */
304 1.1 jonathan cur = TAILQ_FIRST(&tp->snd_holes);
305 1.1 jonathan for (i = 0; i < num_sack_blks; i++) {
306 1.1 jonathan sack = &t_sack_block[i];
307 1.1 jonathan /*
308 1.1 jonathan * FACK TCP. Update snd_fack so we can enter Fast
309 1.1 jonathan * Recovery early.
310 1.1 jonathan */
311 1.1 jonathan if (SEQ_GEQ(sack->right, tp->snd_fack))
312 1.1 jonathan tp->snd_fack = sack->right;
313 1.1 jonathan
314 1.1 jonathan if (TAILQ_EMPTY(&tp->snd_holes)) {
315 1.1 jonathan /* First hole. */
316 1.19 yamt cur = sack_inserthole(tp, th->th_ack, sack->left, NULL);
317 1.1 jonathan if (cur == NULL) {
318 1.1 jonathan /* ENOBUFS, bail out*/
319 1.1 jonathan return;
320 1.1 jonathan }
321 1.1 jonathan tp->rcv_lastsack = sack->right;
322 1.1 jonathan continue; /* With next sack block */
323 1.1 jonathan }
324 1.1 jonathan
325 1.1 jonathan /* Go through the list of holes. */
326 1.1 jonathan while (cur) {
327 1.6 yamt if (SEQ_LEQ(sack->right, cur->start))
328 1.1 jonathan /* SACKs data before the current hole */
329 1.1 jonathan break; /* No use going through more holes */
330 1.1 jonathan
331 1.1 jonathan if (SEQ_GEQ(sack->left, cur->end)) {
332 1.1 jonathan /* SACKs data beyond the current hole */
333 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
334 1.1 jonathan continue;
335 1.1 jonathan }
336 1.1 jonathan
337 1.1 jonathan if (SEQ_LEQ(sack->left, cur->start)) {
338 1.1 jonathan /* Data acks at least the beginning of hole */
339 1.1 jonathan if (SEQ_GEQ(sack->right, cur->end)) {
340 1.1 jonathan /* Acks entire hole, so delete hole */
341 1.19 yamt cur = sack_removehole(tp, cur);
342 1.1 jonathan break;
343 1.1 jonathan }
344 1.1 jonathan
345 1.1 jonathan /* Otherwise, move start of hole forward */
346 1.1 jonathan cur->start = sack->right;
347 1.1 jonathan cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
348 1.1 jonathan break;
349 1.1 jonathan }
350 1.1 jonathan
351 1.1 jonathan if (SEQ_GEQ(sack->right, cur->end)) {
352 1.1 jonathan /* Move end of hole backward. */
353 1.1 jonathan cur->end = sack->left;
354 1.1 jonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
355 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
356 1.1 jonathan break;
357 1.1 jonathan }
358 1.1 jonathan
359 1.1 jonathan if (SEQ_LT(cur->start, sack->left) &&
360 1.1 jonathan SEQ_GT(cur->end, sack->right)) {
361 1.1 jonathan /*
362 1.1 jonathan * ACKs some data in middle of a hole; need to
363 1.1 jonathan * split current hole
364 1.1 jonathan */
365 1.19 yamt tmp = sack_inserthole(tp, sack->right, cur->end,
366 1.19 yamt cur);
367 1.1 jonathan if (tmp == NULL) {
368 1.1 jonathan return;
369 1.1 jonathan }
370 1.1 jonathan tmp->rxmit = SEQ_MAX(cur->rxmit, tmp->start);
371 1.1 jonathan cur->end = sack->left;
372 1.1 jonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
373 1.7 yamt cur = tmp;
374 1.1 jonathan break;
375 1.1 jonathan }
376 1.1 jonathan }
377 1.1 jonathan
378 1.1 jonathan /* At this point, we have reached the tail of the list. */
379 1.1 jonathan if (SEQ_LT(tp->rcv_lastsack, sack->left)) {
380 1.1 jonathan /*
381 1.1 jonathan * Need to append new hole at end.
382 1.1 jonathan */
383 1.19 yamt cur = sack_inserthole(tp, tp->rcv_lastsack, sack->left,
384 1.19 yamt NULL);
385 1.19 yamt if (cur == NULL) {
386 1.12 kurahone return;
387 1.12 kurahone }
388 1.1 jonathan }
389 1.8 yamt if (SEQ_LT(tp->rcv_lastsack, sack->right)) {
390 1.8 yamt tp->rcv_lastsack = sack->right;
391 1.8 yamt }
392 1.1 jonathan }
393 1.1 jonathan }
394 1.1 jonathan
395 1.1 jonathan void
396 1.1 jonathan tcp_del_sackholes(struct tcpcb *tp, struct tcphdr *th)
397 1.1 jonathan {
398 1.1 jonathan /* Max because this could be an older ack that just arrived. */
399 1.1 jonathan tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
400 1.1 jonathan th->th_ack : tp->snd_una;
401 1.1 jonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
402 1.1 jonathan
403 1.1 jonathan while (cur) {
404 1.1 jonathan if (SEQ_LEQ(cur->end, lastack)) {
405 1.19 yamt cur = sack_removehole(tp, cur);
406 1.1 jonathan } else if (SEQ_LT(cur->start, lastack)) {
407 1.1 jonathan cur->start = lastack;
408 1.1 jonathan if (SEQ_LT(cur->rxmit, cur->start))
409 1.1 jonathan cur->rxmit = cur->start;
410 1.1 jonathan break;
411 1.1 jonathan } else
412 1.1 jonathan break;
413 1.1 jonathan }
414 1.1 jonathan }
415 1.1 jonathan
416 1.1 jonathan void
417 1.1 jonathan tcp_free_sackholes(struct tcpcb *tp)
418 1.1 jonathan {
419 1.1 jonathan struct sackhole *sack;
420 1.1 jonathan
421 1.1 jonathan /* Free up the SACK hole list. */
422 1.19 yamt while ((sack = TAILQ_FIRST(&tp->snd_holes)) != NULL) {
423 1.19 yamt sack_removehole(tp, sack);
424 1.1 jonathan }
425 1.19 yamt KASSERT(tp->snd_numholes == 0);
426 1.1 jonathan }
427 1.1 jonathan
428 1.1 jonathan /*
429 1.1 jonathan * Implements the SACK response to a new ack, checking for partial acks
430 1.1 jonathan * in fast recovery.
431 1.1 jonathan */
432 1.1 jonathan void
433 1.1 jonathan tcp_sack_newack(struct tcpcb *tp, struct tcphdr *th)
434 1.1 jonathan {
435 1.1 jonathan if (tp->t_partialacks < 0) {
436 1.1 jonathan /*
437 1.1 jonathan * Not in fast recovery. Reset the duplicate ack
438 1.1 jonathan * counter.
439 1.1 jonathan */
440 1.1 jonathan tp->t_dupacks = 0;
441 1.1 jonathan } else if (SEQ_LT(th->th_ack, tp->snd_recover)) {
442 1.1 jonathan /*
443 1.1 jonathan * Partial ack handling within a sack recovery episode.
444 1.1 jonathan * Keeping this very simple for now. When a partial ack
445 1.1 jonathan * is received, force snd_cwnd to a value that will allow
446 1.1 jonathan * the sender to transmit no more than 2 segments.
447 1.1 jonathan * If necessary, a fancier scheme can be adopted at a
448 1.1 jonathan * later point, but for now, the goal is to prevent the
449 1.1 jonathan * sender from bursting a large amount of data in the midst
450 1.1 jonathan * of sack recovery.
451 1.1 jonathan */
452 1.1 jonathan int num_segs = 1;
453 1.1 jonathan int sack_bytes_rxmt = 0;
454 1.1 jonathan
455 1.1 jonathan tp->t_partialacks++;
456 1.1 jonathan TCP_TIMER_DISARM(tp, TCPT_REXMT);
457 1.1 jonathan tp->t_rtttime = 0;
458 1.1 jonathan
459 1.2 yamt /*
460 1.2 yamt * send one or 2 segments based on how much new data was acked
461 1.2 yamt */
462 1.1 jonathan if (((th->th_ack - tp->snd_una) / tp->t_segsz) > 2)
463 1.1 jonathan num_segs = 2;
464 1.1 jonathan (void)tcp_sack_output(tp, &sack_bytes_rxmt);
465 1.2 yamt tp->snd_cwnd = sack_bytes_rxmt +
466 1.2 yamt (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_segsz;
467 1.1 jonathan tp->t_flags |= TF_ACKNOW;
468 1.1 jonathan (void) tcp_output(tp);
469 1.1 jonathan } else {
470 1.1 jonathan /*
471 1.1 jonathan * Complete ack, inflate the congestion window to
472 1.1 jonathan * ssthresh and exit fast recovery.
473 1.1 jonathan *
474 1.1 jonathan * Window inflation should have left us with approx.
475 1.1 jonathan * snd_ssthresh outstanding data. But in case we
476 1.1 jonathan * would be inclined to send a burst, better to do
477 1.1 jonathan * it via the slow start mechanism.
478 1.1 jonathan */
479 1.1 jonathan if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh)
480 1.1 jonathan tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack)
481 1.1 jonathan + tp->t_segsz;
482 1.1 jonathan else
483 1.1 jonathan tp->snd_cwnd = tp->snd_ssthresh;
484 1.1 jonathan tp->t_partialacks = -1;
485 1.1 jonathan tp->t_dupacks = 0;
486 1.1 jonathan if (SEQ_GT(th->th_ack, tp->snd_fack))
487 1.1 jonathan tp->snd_fack = th->th_ack;
488 1.1 jonathan }
489 1.1 jonathan }
490 1.1 jonathan
491 1.1 jonathan /*
492 1.1 jonathan * Returns pointer to a sackhole if there are any pending retransmissions;
493 1.1 jonathan * NULL otherwise.
494 1.1 jonathan */
495 1.1 jonathan struct sackhole *
496 1.1 jonathan tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
497 1.1 jonathan {
498 1.1 jonathan struct sackhole *cur = NULL;
499 1.1 jonathan
500 1.17 yamt if (!TCP_SACK_ENABLED(tp))
501 1.1 jonathan return (NULL);
502 1.1 jonathan
503 1.1 jonathan *sack_bytes_rexmt = 0;
504 1.1 jonathan TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
505 1.1 jonathan if (SEQ_LT(cur->rxmit, cur->end)) {
506 1.2 yamt if (SEQ_LT(cur->rxmit, tp->snd_una)) {
507 1.2 yamt /* old SACK hole */
508 1.1 jonathan continue;
509 1.1 jonathan }
510 1.1 jonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
511 1.1 jonathan break;
512 1.1 jonathan }
513 1.1 jonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
514 1.1 jonathan }
515 1.1 jonathan
516 1.1 jonathan return (cur);
517 1.1 jonathan }
518 1.1 jonathan
519 1.1 jonathan /*
520 1.1 jonathan * After a timeout, the SACK list may be rebuilt. This SACK information
521 1.1 jonathan * should be used to avoid retransmitting SACKed data. This function
522 1.1 jonathan * traverses the SACK list to see if snd_nxt should be moved forward.
523 1.1 jonathan */
524 1.1 jonathan void
525 1.1 jonathan tcp_sack_adjust(struct tcpcb *tp)
526 1.1 jonathan {
527 1.1 jonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
528 1.1 jonathan struct sackhole *n = NULL;
529 1.1 jonathan
530 1.1 jonathan if (TAILQ_EMPTY(&tp->snd_holes))
531 1.1 jonathan return; /* No holes */
532 1.1 jonathan if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
533 1.1 jonathan return; /* We're already beyond any SACKed blocks */
534 1.1 jonathan
535 1.1 jonathan /*
536 1.1 jonathan * Two cases for which we want to advance snd_nxt:
537 1.1 jonathan * i) snd_nxt lies between end of one hole and beginning of another
538 1.1 jonathan * ii) snd_nxt lies between end of last hole and rcv_lastsack
539 1.1 jonathan */
540 1.1 jonathan while ((n = TAILQ_NEXT(cur, sackhole_q)) != NULL) {
541 1.1 jonathan if (SEQ_LT(tp->snd_nxt, cur->end))
542 1.1 jonathan return;
543 1.1 jonathan if (SEQ_GEQ(tp->snd_nxt, n->start))
544 1.1 jonathan cur = n;
545 1.1 jonathan else {
546 1.1 jonathan tp->snd_nxt = n->start;
547 1.1 jonathan return;
548 1.1 jonathan }
549 1.1 jonathan }
550 1.1 jonathan if (SEQ_LT(tp->snd_nxt, cur->end))
551 1.1 jonathan return;
552 1.1 jonathan tp->snd_nxt = tp->rcv_lastsack;
553 1.1 jonathan
554 1.1 jonathan return;
555 1.1 jonathan }
556 1.9 yamt
557 1.9 yamt int
558 1.10 yamt tcp_sack_numblks(const struct tcpcb *tp)
559 1.9 yamt {
560 1.10 yamt int numblks;
561 1.9 yamt
562 1.10 yamt if (!TCP_SACK_ENABLED(tp)) {
563 1.9 yamt return 0;
564 1.9 yamt }
565 1.9 yamt
566 1.10 yamt numblks = (((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) ? 1 : 0) +
567 1.10 yamt tp->t_segqlen;
568 1.10 yamt
569 1.10 yamt if (numblks == 0) {
570 1.10 yamt return 0;
571 1.10 yamt }
572 1.10 yamt
573 1.10 yamt if (numblks > TCP_SACK_MAX) {
574 1.10 yamt numblks = TCP_SACK_MAX;
575 1.10 yamt }
576 1.10 yamt
577 1.10 yamt return numblks;
578 1.9 yamt }
579