tcp_sack.c revision 1.17 1 1.16 yamt /* $NetBSD: tcp_sack.c,v 1.17 2006/10/07 19:26:45 yamt Exp $ */
2 1.1 jonathan
3 1.1 jonathan /*
4 1.1 jonathan * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 1.1 jonathan * All rights reserved.
6 1.1 jonathan *
7 1.1 jonathan * This code is derived from software contributed to The NetBSD Foundation
8 1.1 jonathan * by Kentaro A. Kurahone.
9 1.1 jonathan *
10 1.1 jonathan * Redistribution and use in source and binary forms, with or without
11 1.1 jonathan * modification, are permitted provided that the following conditions
12 1.1 jonathan * are met:
13 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
14 1.1 jonathan * notice, this list of conditions and the following disclaimer.
15 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
17 1.1 jonathan * documentation and/or other materials provided with the distribution.
18 1.1 jonathan * 3. All advertising materials mentioning features or use of this software
19 1.1 jonathan * must display the following acknowledgement:
20 1.1 jonathan * This product includes software developed by the NetBSD
21 1.1 jonathan * Foundation, Inc. and its contributors.
22 1.1 jonathan * 4. Neither the name of The NetBSD Foundation nor the names of its
23 1.1 jonathan * contributors may be used to endorse or promote products derived
24 1.1 jonathan * from this software without specific prior written permission.
25 1.1 jonathan *
26 1.1 jonathan * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 1.1 jonathan * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 1.1 jonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 1.1 jonathan * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 1.1 jonathan * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 1.1 jonathan * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 1.1 jonathan * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 1.1 jonathan * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 1.1 jonathan * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 1.1 jonathan * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 1.1 jonathan * POSSIBILITY OF SUCH DAMAGE.
37 1.1 jonathan */
38 1.1 jonathan
39 1.1 jonathan /*
40 1.1 jonathan * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
41 1.1 jonathan * The Regents of the University of California. All rights reserved.
42 1.1 jonathan *
43 1.1 jonathan * Redistribution and use in source and binary forms, with or without
44 1.1 jonathan * modification, are permitted provided that the following conditions
45 1.1 jonathan * are met:
46 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
47 1.1 jonathan * notice, this list of conditions and the following disclaimer.
48 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
49 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
50 1.1 jonathan * documentation and/or other materials provided with the distribution.
51 1.1 jonathan * 4. Neither the name of the University nor the names of its contributors
52 1.1 jonathan * may be used to endorse or promote products derived from this software
53 1.1 jonathan * without specific prior written permission.
54 1.1 jonathan *
55 1.1 jonathan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56 1.1 jonathan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57 1.1 jonathan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58 1.1 jonathan * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59 1.1 jonathan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60 1.1 jonathan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61 1.1 jonathan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62 1.1 jonathan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63 1.1 jonathan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 1.1 jonathan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 1.1 jonathan * SUCH DAMAGE.
66 1.1 jonathan *
67 1.1 jonathan * @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95
68 1.1 jonathan * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.2 2004/12/25 23:02:57 rwatson Exp $
69 1.1 jonathan */
70 1.1 jonathan
71 1.1 jonathan /*
72 1.1 jonathan * @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995
73 1.1 jonathan *
74 1.1 jonathan * NRL grants permission for redistribution and use in source and binary
75 1.1 jonathan * forms, with or without modification, of the software and documentation
76 1.1 jonathan * created at NRL provided that the following conditions are met:
77 1.1 jonathan *
78 1.1 jonathan * 1. Redistributions of source code must retain the above copyright
79 1.1 jonathan * notice, this list of conditions and the following disclaimer.
80 1.1 jonathan * 2. Redistributions in binary form must reproduce the above copyright
81 1.1 jonathan * notice, this list of conditions and the following disclaimer in the
82 1.1 jonathan * documentation and/or other materials provided with the distribution.
83 1.1 jonathan * 3. All advertising materials mentioning features or use of this software
84 1.1 jonathan * must display the following acknowledgements:
85 1.1 jonathan * This product includes software developed by the University of
86 1.1 jonathan * California, Berkeley and its contributors.
87 1.1 jonathan * This product includes software developed at the Information
88 1.1 jonathan * Technology Division, US Naval Research Laboratory.
89 1.1 jonathan * 4. Neither the name of the NRL nor the names of its contributors
90 1.1 jonathan * may be used to endorse or promote products derived from this software
91 1.1 jonathan * without specific prior written permission.
92 1.1 jonathan *
93 1.1 jonathan * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
94 1.1 jonathan * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
95 1.1 jonathan * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
96 1.1 jonathan * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
97 1.1 jonathan * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
98 1.1 jonathan * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
99 1.1 jonathan * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
100 1.1 jonathan * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
101 1.1 jonathan * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
102 1.1 jonathan * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
103 1.1 jonathan * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
104 1.1 jonathan *
105 1.1 jonathan * The views and conclusions contained in the software and documentation
106 1.1 jonathan * are those of the authors and should not be interpreted as representing
107 1.1 jonathan * official policies, either expressed or implied, of the US Naval
108 1.1 jonathan * Research Laboratory (NRL).
109 1.1 jonathan */
110 1.1 jonathan
111 1.1 jonathan #include <sys/cdefs.h>
112 1.16 yamt __KERNEL_RCSID(0, "$NetBSD: tcp_sack.c,v 1.17 2006/10/07 19:26:45 yamt Exp $");
113 1.1 jonathan
114 1.1 jonathan #include "opt_inet.h"
115 1.1 jonathan #include "opt_ipsec.h"
116 1.1 jonathan #include "opt_inet_csum.h"
117 1.1 jonathan #include "opt_tcp_debug.h"
118 1.1 jonathan
119 1.1 jonathan #include <sys/param.h>
120 1.1 jonathan #include <sys/systm.h>
121 1.1 jonathan #include <sys/malloc.h>
122 1.1 jonathan #include <sys/mbuf.h>
123 1.1 jonathan #include <sys/protosw.h>
124 1.1 jonathan #include <sys/socket.h>
125 1.1 jonathan #include <sys/socketvar.h>
126 1.1 jonathan #include <sys/errno.h>
127 1.1 jonathan #include <sys/syslog.h>
128 1.1 jonathan #include <sys/pool.h>
129 1.1 jonathan #include <sys/domain.h>
130 1.1 jonathan #include <sys/kernel.h>
131 1.1 jonathan
132 1.1 jonathan #include <net/if.h>
133 1.1 jonathan #include <net/route.h>
134 1.1 jonathan #include <net/if_types.h>
135 1.1 jonathan
136 1.1 jonathan #include <netinet/in.h>
137 1.1 jonathan #include <netinet/in_systm.h>
138 1.1 jonathan #include <netinet/ip.h>
139 1.1 jonathan #include <netinet/in_pcb.h>
140 1.1 jonathan #include <netinet/in_var.h>
141 1.1 jonathan #include <netinet/ip_var.h>
142 1.1 jonathan
143 1.1 jonathan #ifdef INET6
144 1.1 jonathan #ifndef INET
145 1.1 jonathan #include <netinet/in.h>
146 1.1 jonathan #endif
147 1.1 jonathan #include <netinet/ip6.h>
148 1.1 jonathan #include <netinet6/ip6_var.h>
149 1.1 jonathan #include <netinet6/in6_pcb.h>
150 1.1 jonathan #include <netinet6/ip6_var.h>
151 1.1 jonathan #include <netinet6/in6_var.h>
152 1.1 jonathan #include <netinet/icmp6.h>
153 1.1 jonathan #include <netinet6/nd6.h>
154 1.1 jonathan #endif
155 1.1 jonathan
156 1.1 jonathan #ifndef INET6
157 1.1 jonathan /* always need ip6.h for IP6_EXTHDR_GET */
158 1.1 jonathan #include <netinet/ip6.h>
159 1.1 jonathan #endif
160 1.1 jonathan
161 1.1 jonathan #include <netinet/tcp.h>
162 1.1 jonathan #include <netinet/tcp_fsm.h>
163 1.1 jonathan #include <netinet/tcp_seq.h>
164 1.1 jonathan #include <netinet/tcp_timer.h>
165 1.1 jonathan #include <netinet/tcp_var.h>
166 1.1 jonathan #include <netinet/tcpip.h>
167 1.1 jonathan #include <netinet/tcp_debug.h>
168 1.1 jonathan
169 1.1 jonathan #include <machine/stdarg.h>
170 1.1 jonathan
171 1.1 jonathan /* SACK block pool. */
172 1.1 jonathan POOL_INIT(sackhole_pool, sizeof(struct sackhole), 0, 0, 0, "sackholepl", NULL);
173 1.1 jonathan
174 1.1 jonathan void
175 1.1 jonathan tcp_new_dsack(struct tcpcb *tp, tcp_seq seq, u_int32_t len)
176 1.1 jonathan {
177 1.1 jonathan if (TCP_SACK_ENABLED(tp)) {
178 1.1 jonathan tp->rcv_dsack_block.left = seq;
179 1.1 jonathan tp->rcv_dsack_block.right = seq + len;
180 1.1 jonathan tp->rcv_sack_flags |= TCPSACK_HAVED;
181 1.1 jonathan }
182 1.1 jonathan }
183 1.1 jonathan
184 1.1 jonathan void
185 1.1 jonathan tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
186 1.1 jonathan {
187 1.5 yamt struct sackblk
188 1.5 yamt t_sack_block[(MAX_TCPOPTLEN - 2) / (sizeof(u_int32_t) * 2)];
189 1.1 jonathan struct sackblk *sack = NULL;
190 1.1 jonathan struct sackhole *cur = NULL;
191 1.1 jonathan struct sackhole *tmp = NULL;
192 1.1 jonathan u_int32_t *lp = (u_int32_t *) (cp + 2);
193 1.15 tls int i, j, num_sack_blks, s;
194 1.1 jonathan tcp_seq left, right, acked;
195 1.1 jonathan
196 1.1 jonathan /*
197 1.11 kurahone * If we aren't processing SACK responses, this is not an ACK
198 1.11 kurahone * or the peer sends us a sack option with invalid length, don't
199 1.1 jonathan * update the scoreboard.
200 1.1 jonathan */
201 1.11 kurahone if (!TCP_SACK_ENABLED(tp) || ((th->th_flags & TH_ACK) == 0) ||
202 1.11 kurahone (optlen % 8 != 2 || optlen < 10)) {
203 1.1 jonathan return;
204 1.1 jonathan }
205 1.1 jonathan
206 1.12 kurahone /*
207 1.12 kurahone * If we don't want any SACK holes to be allocated, just return.
208 1.12 kurahone */
209 1.12 kurahone if (tcp_sack_globalmaxholes == 0 || tcp_sack_tp_maxholes == 0) {
210 1.12 kurahone return;
211 1.12 kurahone }
212 1.12 kurahone
213 1.11 kurahone /* If the ACK is outside [snd_una, snd_max], ignore the SACK options. */
214 1.11 kurahone if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))
215 1.11 kurahone return;
216 1.11 kurahone
217 1.1 jonathan /*
218 1.1 jonathan * Extract SACK blocks.
219 1.1 jonathan *
220 1.1 jonathan * Note that t_sack_block is sorted so that we only need to do
221 1.1 jonathan * one pass over the sequence number space. (SACK "fast-path")
222 1.1 jonathan */
223 1.1 jonathan num_sack_blks = optlen / 8;
224 1.1 jonathan acked = (SEQ_GT(th->th_ack, tp->snd_una)) ? th->th_ack : tp->snd_una;
225 1.1 jonathan for (i = 0; i < num_sack_blks; i++, lp += 2) {
226 1.3 yamt memcpy(&left, lp, sizeof(*lp));
227 1.3 yamt memcpy(&right, lp + 1, sizeof(*lp));
228 1.3 yamt left = ntohl(left);
229 1.3 yamt right = ntohl(right);
230 1.1 jonathan
231 1.13 yamt if (SEQ_LEQ(right, acked) || SEQ_GT(right, tp->snd_max) ||
232 1.4 yamt SEQ_GEQ(left, right)) {
233 1.1 jonathan /* SACK entry that's old, or invalid. */
234 1.1 jonathan i--;
235 1.1 jonathan num_sack_blks--;
236 1.1 jonathan continue;
237 1.1 jonathan }
238 1.1 jonathan
239 1.1 jonathan /* Insertion sort. */
240 1.2 yamt for (j = i; (j > 0) && SEQ_LT(left, t_sack_block[j - 1].left);
241 1.2 yamt j--) {
242 1.1 jonathan t_sack_block[j].left = t_sack_block[j - 1].left;
243 1.1 jonathan t_sack_block[j].right = t_sack_block[j - 1].right;
244 1.1 jonathan }
245 1.1 jonathan t_sack_block[j].left = left;
246 1.1 jonathan t_sack_block[j].right = right;
247 1.1 jonathan }
248 1.1 jonathan
249 1.15 tls /* XXX: Investigate making this a bit more fine-grained. */
250 1.15 tls s = splsoftnet();
251 1.15 tls
252 1.1 jonathan /* Update the scoreboard. */
253 1.1 jonathan cur = TAILQ_FIRST(&tp->snd_holes);
254 1.1 jonathan for (i = 0; i < num_sack_blks; i++) {
255 1.1 jonathan sack = &t_sack_block[i];
256 1.1 jonathan /*
257 1.1 jonathan * FACK TCP. Update snd_fack so we can enter Fast
258 1.1 jonathan * Recovery early.
259 1.1 jonathan */
260 1.1 jonathan if (SEQ_GEQ(sack->right, tp->snd_fack))
261 1.1 jonathan tp->snd_fack = sack->right;
262 1.1 jonathan
263 1.1 jonathan if (TAILQ_EMPTY(&tp->snd_holes)) {
264 1.1 jonathan /* First hole. */
265 1.12 kurahone if (tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
266 1.15 tls splx(s);
267 1.12 kurahone return;
268 1.12 kurahone }
269 1.2 yamt cur = (struct sackhole *)
270 1.2 yamt pool_get(&sackhole_pool, PR_NOWAIT);
271 1.1 jonathan if (cur == NULL) {
272 1.1 jonathan /* ENOBUFS, bail out*/
273 1.15 tls splx(s);
274 1.1 jonathan return;
275 1.1 jonathan }
276 1.1 jonathan cur->start = th->th_ack;
277 1.1 jonathan cur->end = sack->left;
278 1.1 jonathan cur->rxmit = cur->start;
279 1.1 jonathan tp->rcv_lastsack = sack->right;
280 1.12 kurahone tp->snd_numholes++;
281 1.12 kurahone tcp_sack_globalholes++;
282 1.1 jonathan TAILQ_INSERT_HEAD(&tp->snd_holes, cur, sackhole_q);
283 1.1 jonathan continue; /* With next sack block */
284 1.1 jonathan }
285 1.1 jonathan
286 1.1 jonathan /* Go through the list of holes. */
287 1.1 jonathan while (cur) {
288 1.6 yamt if (SEQ_LEQ(sack->right, cur->start))
289 1.1 jonathan /* SACKs data before the current hole */
290 1.1 jonathan break; /* No use going through more holes */
291 1.1 jonathan
292 1.1 jonathan if (SEQ_GEQ(sack->left, cur->end)) {
293 1.1 jonathan /* SACKs data beyond the current hole */
294 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
295 1.1 jonathan continue;
296 1.1 jonathan }
297 1.1 jonathan
298 1.1 jonathan if (SEQ_LEQ(sack->left, cur->start)) {
299 1.1 jonathan /* Data acks at least the beginning of hole */
300 1.1 jonathan if (SEQ_GEQ(sack->right, cur->end)) {
301 1.1 jonathan /* Acks entire hole, so delete hole */
302 1.1 jonathan tmp = cur;
303 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
304 1.12 kurahone tp->snd_numholes--;
305 1.12 kurahone tcp_sack_globalholes--;
306 1.2 yamt TAILQ_REMOVE(&tp->snd_holes, tmp,
307 1.2 yamt sackhole_q);
308 1.1 jonathan pool_put(&sackhole_pool, tmp);
309 1.1 jonathan break;
310 1.1 jonathan }
311 1.1 jonathan
312 1.1 jonathan /* Otherwise, move start of hole forward */
313 1.1 jonathan cur->start = sack->right;
314 1.1 jonathan cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
315 1.1 jonathan break;
316 1.1 jonathan }
317 1.1 jonathan
318 1.1 jonathan if (SEQ_GEQ(sack->right, cur->end)) {
319 1.1 jonathan /* Move end of hole backward. */
320 1.1 jonathan cur->end = sack->left;
321 1.1 jonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
322 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
323 1.1 jonathan break;
324 1.1 jonathan }
325 1.1 jonathan
326 1.1 jonathan if (SEQ_LT(cur->start, sack->left) &&
327 1.1 jonathan SEQ_GT(cur->end, sack->right)) {
328 1.1 jonathan /*
329 1.1 jonathan * ACKs some data in middle of a hole; need to
330 1.1 jonathan * split current hole
331 1.1 jonathan */
332 1.12 kurahone if (tcp_sack_globalholes >=
333 1.12 kurahone tcp_sack_globalmaxholes ||
334 1.12 kurahone tp->snd_numholes >=
335 1.12 kurahone tcp_sack_tp_maxholes) {
336 1.15 tls splx(s);
337 1.12 kurahone return;
338 1.12 kurahone }
339 1.1 jonathan tmp = (struct sackhole *)
340 1.2 yamt pool_get(&sackhole_pool, PR_NOWAIT);
341 1.1 jonathan if (tmp == NULL) {
342 1.1 jonathan /* ENOBUFS, bail out. */
343 1.15 tls splx(s);
344 1.1 jonathan return;
345 1.1 jonathan }
346 1.1 jonathan tmp->start = sack->right;
347 1.1 jonathan tmp->end = cur->end;
348 1.1 jonathan tmp->rxmit = SEQ_MAX(cur->rxmit, tmp->start);
349 1.1 jonathan cur->end = sack->left;
350 1.1 jonathan cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
351 1.12 kurahone tp->snd_numholes++;
352 1.12 kurahone tcp_sack_globalholes++;
353 1.1 jonathan TAILQ_INSERT_AFTER(&tp->snd_holes, cur, tmp,
354 1.1 jonathan sackhole_q);
355 1.7 yamt cur = tmp;
356 1.1 jonathan break;
357 1.1 jonathan }
358 1.1 jonathan }
359 1.1 jonathan
360 1.1 jonathan /* At this point, we have reached the tail of the list. */
361 1.1 jonathan if (SEQ_LT(tp->rcv_lastsack, sack->left)) {
362 1.1 jonathan /*
363 1.1 jonathan * Need to append new hole at end.
364 1.1 jonathan */
365 1.12 kurahone if (tcp_sack_globalholes >=
366 1.12 kurahone tcp_sack_globalmaxholes ||
367 1.12 kurahone tp->snd_numholes >=
368 1.12 kurahone tcp_sack_tp_maxholes) {
369 1.15 tls splx(s);
370 1.12 kurahone return;
371 1.12 kurahone }
372 1.1 jonathan tmp = (struct sackhole *)
373 1.4 yamt pool_get(&sackhole_pool, PR_NOWAIT);
374 1.1 jonathan if (tmp == NULL)
375 1.1 jonathan continue; /* ENOBUFS */
376 1.1 jonathan tmp->start = tp->rcv_lastsack;
377 1.1 jonathan tmp->end = sack->left;
378 1.1 jonathan tmp->rxmit = tmp->start;
379 1.12 kurahone tp->snd_numholes++;
380 1.12 kurahone tcp_sack_globalholes++;
381 1.1 jonathan TAILQ_INSERT_TAIL(&tp->snd_holes, tmp, sackhole_q);
382 1.1 jonathan cur = tmp;
383 1.1 jonathan }
384 1.8 yamt if (SEQ_LT(tp->rcv_lastsack, sack->right)) {
385 1.8 yamt tp->rcv_lastsack = sack->right;
386 1.8 yamt }
387 1.1 jonathan }
388 1.15 tls
389 1.15 tls splx(s);
390 1.1 jonathan }
391 1.1 jonathan
392 1.1 jonathan void
393 1.1 jonathan tcp_del_sackholes(struct tcpcb *tp, struct tcphdr *th)
394 1.1 jonathan {
395 1.1 jonathan /* Max because this could be an older ack that just arrived. */
396 1.1 jonathan tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
397 1.1 jonathan th->th_ack : tp->snd_una;
398 1.1 jonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
399 1.1 jonathan struct sackhole *tmp;
400 1.15 tls int s;
401 1.15 tls
402 1.15 tls s = splsoftnet();
403 1.1 jonathan
404 1.1 jonathan while (cur) {
405 1.1 jonathan if (SEQ_LEQ(cur->end, lastack)) {
406 1.1 jonathan tmp = cur;
407 1.1 jonathan cur = TAILQ_NEXT(cur, sackhole_q);
408 1.12 kurahone tp->snd_numholes--;
409 1.12 kurahone tcp_sack_globalholes--;
410 1.1 jonathan TAILQ_REMOVE(&tp->snd_holes, tmp, sackhole_q);
411 1.1 jonathan pool_put(&sackhole_pool, tmp);
412 1.1 jonathan } else if (SEQ_LT(cur->start, lastack)) {
413 1.1 jonathan cur->start = lastack;
414 1.1 jonathan if (SEQ_LT(cur->rxmit, cur->start))
415 1.1 jonathan cur->rxmit = cur->start;
416 1.1 jonathan break;
417 1.1 jonathan } else
418 1.1 jonathan break;
419 1.1 jonathan }
420 1.15 tls
421 1.15 tls splx(s);
422 1.1 jonathan }
423 1.1 jonathan
424 1.1 jonathan void
425 1.1 jonathan tcp_free_sackholes(struct tcpcb *tp)
426 1.1 jonathan {
427 1.1 jonathan struct sackhole *sack;
428 1.15 tls int s;
429 1.15 tls
430 1.15 tls s = splsoftnet();
431 1.1 jonathan
432 1.1 jonathan /* Free up the SACK hole list. */
433 1.1 jonathan while (!TAILQ_EMPTY(&tp->snd_holes)) {
434 1.1 jonathan sack = TAILQ_FIRST(&tp->snd_holes);
435 1.12 kurahone tcp_sack_globalholes--;
436 1.1 jonathan TAILQ_REMOVE(&tp->snd_holes, sack, sackhole_q);
437 1.1 jonathan pool_put(&sackhole_pool, sack);
438 1.1 jonathan }
439 1.12 kurahone
440 1.12 kurahone tp->snd_numholes = 0;
441 1.15 tls
442 1.15 tls splx(s);
443 1.1 jonathan }
444 1.1 jonathan
445 1.1 jonathan /*
446 1.1 jonathan * Implements the SACK response to a new ack, checking for partial acks
447 1.1 jonathan * in fast recovery.
448 1.1 jonathan */
449 1.1 jonathan void
450 1.1 jonathan tcp_sack_newack(struct tcpcb *tp, struct tcphdr *th)
451 1.1 jonathan {
452 1.1 jonathan if (tp->t_partialacks < 0) {
453 1.1 jonathan /*
454 1.1 jonathan * Not in fast recovery. Reset the duplicate ack
455 1.1 jonathan * counter.
456 1.1 jonathan */
457 1.1 jonathan tp->t_dupacks = 0;
458 1.1 jonathan } else if (SEQ_LT(th->th_ack, tp->snd_recover)) {
459 1.1 jonathan /*
460 1.1 jonathan * Partial ack handling within a sack recovery episode.
461 1.1 jonathan * Keeping this very simple for now. When a partial ack
462 1.1 jonathan * is received, force snd_cwnd to a value that will allow
463 1.1 jonathan * the sender to transmit no more than 2 segments.
464 1.1 jonathan * If necessary, a fancier scheme can be adopted at a
465 1.1 jonathan * later point, but for now, the goal is to prevent the
466 1.1 jonathan * sender from bursting a large amount of data in the midst
467 1.1 jonathan * of sack recovery.
468 1.1 jonathan */
469 1.1 jonathan int num_segs = 1;
470 1.1 jonathan int sack_bytes_rxmt = 0;
471 1.1 jonathan
472 1.1 jonathan tp->t_partialacks++;
473 1.1 jonathan TCP_TIMER_DISARM(tp, TCPT_REXMT);
474 1.1 jonathan tp->t_rtttime = 0;
475 1.1 jonathan
476 1.2 yamt /*
477 1.2 yamt * send one or 2 segments based on how much new data was acked
478 1.2 yamt */
479 1.1 jonathan if (((th->th_ack - tp->snd_una) / tp->t_segsz) > 2)
480 1.1 jonathan num_segs = 2;
481 1.1 jonathan (void)tcp_sack_output(tp, &sack_bytes_rxmt);
482 1.2 yamt tp->snd_cwnd = sack_bytes_rxmt +
483 1.2 yamt (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_segsz;
484 1.1 jonathan tp->t_flags |= TF_ACKNOW;
485 1.1 jonathan (void) tcp_output(tp);
486 1.1 jonathan } else {
487 1.1 jonathan /*
488 1.1 jonathan * Complete ack, inflate the congestion window to
489 1.1 jonathan * ssthresh and exit fast recovery.
490 1.1 jonathan *
491 1.1 jonathan * Window inflation should have left us with approx.
492 1.1 jonathan * snd_ssthresh outstanding data. But in case we
493 1.1 jonathan * would be inclined to send a burst, better to do
494 1.1 jonathan * it via the slow start mechanism.
495 1.1 jonathan */
496 1.1 jonathan if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh)
497 1.1 jonathan tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack)
498 1.1 jonathan + tp->t_segsz;
499 1.1 jonathan else
500 1.1 jonathan tp->snd_cwnd = tp->snd_ssthresh;
501 1.1 jonathan tp->t_partialacks = -1;
502 1.1 jonathan tp->t_dupacks = 0;
503 1.1 jonathan if (SEQ_GT(th->th_ack, tp->snd_fack))
504 1.1 jonathan tp->snd_fack = th->th_ack;
505 1.1 jonathan }
506 1.1 jonathan }
507 1.1 jonathan
508 1.1 jonathan /*
509 1.1 jonathan * Returns pointer to a sackhole if there are any pending retransmissions;
510 1.1 jonathan * NULL otherwise.
511 1.1 jonathan */
512 1.1 jonathan struct sackhole *
513 1.1 jonathan tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
514 1.1 jonathan {
515 1.1 jonathan struct sackhole *cur = NULL;
516 1.1 jonathan
517 1.17 yamt if (!TCP_SACK_ENABLED(tp))
518 1.1 jonathan return (NULL);
519 1.1 jonathan
520 1.1 jonathan *sack_bytes_rexmt = 0;
521 1.1 jonathan TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
522 1.1 jonathan if (SEQ_LT(cur->rxmit, cur->end)) {
523 1.2 yamt if (SEQ_LT(cur->rxmit, tp->snd_una)) {
524 1.2 yamt /* old SACK hole */
525 1.1 jonathan continue;
526 1.1 jonathan }
527 1.1 jonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
528 1.1 jonathan break;
529 1.1 jonathan }
530 1.1 jonathan *sack_bytes_rexmt += (cur->rxmit - cur->start);
531 1.1 jonathan }
532 1.1 jonathan
533 1.1 jonathan return (cur);
534 1.1 jonathan }
535 1.1 jonathan
536 1.1 jonathan /*
537 1.1 jonathan * After a timeout, the SACK list may be rebuilt. This SACK information
538 1.1 jonathan * should be used to avoid retransmitting SACKed data. This function
539 1.1 jonathan * traverses the SACK list to see if snd_nxt should be moved forward.
540 1.1 jonathan */
541 1.1 jonathan void
542 1.1 jonathan tcp_sack_adjust(struct tcpcb *tp)
543 1.1 jonathan {
544 1.1 jonathan struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
545 1.1 jonathan struct sackhole *n = NULL;
546 1.1 jonathan
547 1.1 jonathan if (TAILQ_EMPTY(&tp->snd_holes))
548 1.1 jonathan return; /* No holes */
549 1.1 jonathan if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
550 1.1 jonathan return; /* We're already beyond any SACKed blocks */
551 1.1 jonathan
552 1.1 jonathan /*
553 1.1 jonathan * Two cases for which we want to advance snd_nxt:
554 1.1 jonathan * i) snd_nxt lies between end of one hole and beginning of another
555 1.1 jonathan * ii) snd_nxt lies between end of last hole and rcv_lastsack
556 1.1 jonathan */
557 1.1 jonathan while ((n = TAILQ_NEXT(cur, sackhole_q)) != NULL) {
558 1.1 jonathan if (SEQ_LT(tp->snd_nxt, cur->end))
559 1.1 jonathan return;
560 1.1 jonathan if (SEQ_GEQ(tp->snd_nxt, n->start))
561 1.1 jonathan cur = n;
562 1.1 jonathan else {
563 1.1 jonathan tp->snd_nxt = n->start;
564 1.1 jonathan return;
565 1.1 jonathan }
566 1.1 jonathan }
567 1.1 jonathan if (SEQ_LT(tp->snd_nxt, cur->end))
568 1.1 jonathan return;
569 1.1 jonathan tp->snd_nxt = tp->rcv_lastsack;
570 1.1 jonathan
571 1.1 jonathan return;
572 1.1 jonathan }
573 1.9 yamt
574 1.9 yamt int
575 1.10 yamt tcp_sack_numblks(const struct tcpcb *tp)
576 1.9 yamt {
577 1.10 yamt int numblks;
578 1.9 yamt
579 1.10 yamt if (!TCP_SACK_ENABLED(tp)) {
580 1.9 yamt return 0;
581 1.9 yamt }
582 1.9 yamt
583 1.10 yamt numblks = (((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) ? 1 : 0) +
584 1.10 yamt tp->t_segqlen;
585 1.10 yamt
586 1.10 yamt if (numblks == 0) {
587 1.10 yamt return 0;
588 1.10 yamt }
589 1.10 yamt
590 1.10 yamt if (numblks > TCP_SACK_MAX) {
591 1.10 yamt numblks = TCP_SACK_MAX;
592 1.10 yamt }
593 1.10 yamt
594 1.10 yamt return numblks;
595 1.9 yamt }
596