dccp_tcplike.c revision 1.2 1 /* $KAME: dccp_tcplike.c,v 1.19 2005/07/27 06:27:25 nishida Exp $ */
2 /* $NetBSD: dccp_tcplike.c,v 1.2 2015/08/24 22:21:26 pooka Exp $ */
3
4 /*
5 * Copyright (c) 2003 Magnus Erixzon
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31 /*
32 * TCP-like congestion control for DCCP
33 */
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: dccp_tcplike.c,v 1.2 2015/08/24 22:21:26 pooka Exp $");
37
38 #ifdef _KERNEL_OPT
39 #include "opt_dccp.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/domain.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/proc.h>
50 #include <sys/protosw.h>
51 #include <sys/signalvar.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/mutex.h>
55 #include <sys/sysctl.h>
56 #include <sys/syslog.h>
57
58 #include <net/if.h>
59 #include <net/route.h>
60
61 #include <netinet/in.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/ip.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/in_var.h>
66
67 #include <netinet/ip_icmp.h>
68 #include <netinet/icmp_var.h>
69 #include <netinet/ip_var.h>
70
71 #include <netinet/dccp.h>
72 #include <netinet/dccp_var.h>
73 #include <netinet/dccp_tcplike.h>
74
75 #define TCPLIKE_DEBUG(args) dccp_log args
76 #define MALLOC_DEBUG(args) log args
77 #define CWND_DEBUG(args) dccp_log args
78 #define ACKRATIO_DEBUG(args) dccp_log args
79 #define LOSS_DEBUG(args) dccp_log args
80 #define TIMEOUT_DEBUG(args) dccp_log args
81
82 #if !defined(__FreeBSD__) || __FreeBSD_version < 500000
83 #define INP_INFO_LOCK_INIT(x,y)
84 #define INP_INFO_WLOCK(x)
85 #define INP_INFO_WUNLOCK(x)
86 #define INP_INFO_RLOCK(x)
87 #define INP_INFO_RUNLOCK(x)
88 #define INP_LOCK(x)
89 #define INP_UNLOCK(x)
90 #endif
91
92 /* Sender side */
93
94 void tcplike_rto_timeout(void *);
95 void tcplike_rtt_sample(struct tcplike_send_ccb *, u_int16_t);
96 void _add_to_cwndvector(struct tcplike_send_ccb *, u_int64_t);
97 void _remove_from_cwndvector(struct tcplike_send_ccb *, u_int64_t);
98 int _chop_cwndvector(struct tcplike_send_ccb *, u_int64_t);
99 int _cwndvector_size(struct tcplike_send_ccb *);
100 u_char _cwndvector_state(struct tcplike_send_ccb *, u_int64_t);
101
102 void tcplike_send_term(void *);
103 void tcplike_recv_term(void *);
104
105 void _avlist_add(struct tcplike_recv_ccb *, u_int64_t, u_int64_t);
106 u_int64_t _avlist_get(struct tcplike_recv_ccb *, u_int64_t);
107
108 /* extern Ack Vector functions */
109 extern void dccp_use_ackvector(struct dccpcb *);
110 extern void dccp_update_ackvector(struct dccpcb *, u_int64_t);
111 extern void dccp_increment_ackvector(struct dccpcb *, u_int64_t);
112 extern u_int16_t dccp_generate_ackvector(struct dccpcb *, u_char *);
113 extern u_char dccp_ackvector_state(struct dccpcb *, u_int32_t);
114
115 extern int dccp_get_option(char *, int, int, char *, int);
116 extern int dccp_remove_feature(struct dccpcb *, u_int8_t, u_int8_t);
117
118 /*
119 * RTO timer activated
120 */
121 void
122 tcplike_rto_timeout(void *ccb)
123 {
124 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
125 /*struct inpcb *inp;*/
126 int s;
127
128 mutex_enter(&(cb->mutex));
129
130 cb->ssthresh = cb->cwnd >>1;
131 cb->cwnd = 1; /* allowing 1 packet to be sent */
132 cb->outstanding = 0; /* is this correct? */
133 cb->rto_timer_callout = 0;
134 cb->rto = cb->rto << 1;
135 TIMEOUT_DEBUG((LOG_INFO, "RTO Timeout. New RTO = %u\n", cb->rto));
136
137 cb->sample_rtt = 0;
138
139 cb->ack_last = 0;
140 cb->ack_miss = 0;
141
142 cb->rcvr_ackratio = 1; /* Constraint 2 & 3. We need ACKs asap */
143 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
144 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
145 (char *) &cb->rcvr_ackratio, 1);
146 cb->acked_in_win = 0;
147 cb->acked_windows = 0;
148 cb->oldcwnd_ts = cb->pcb->seq_snd;
149
150 LOSS_DEBUG((LOG_INFO, "Timeout. CWND value: %u , OUTSTANDING value: %u\n",
151 cb->cwnd, cb->outstanding));
152 mutex_exit(&(cb->mutex));
153
154 /* lock'n run dccp_output */
155 s = splnet();
156 INP_INFO_RLOCK(&dccpbinfo);
157 /*inp = cb->pcb->d_inpcb;*/
158 INP_LOCK(inp);
159 INP_INFO_RUNLOCK(&dccpbinfo);
160
161 dccp_output(cb->pcb, 1);
162
163 INP_UNLOCK(inp);
164 splx(s);
165 }
166
167 void tcplike_rtt_sample(struct tcplike_send_ccb *cb, u_int16_t sample)
168 {
169 u_int16_t err;
170
171 if (cb->rtt == 0xffff) {
172 /* hmmmmm. */
173 cb->rtt = sample;
174 cb->rto = cb->rtt << 1;
175 return;
176 }
177
178 /* This is how the Linux implementation is doing it.. */
179 if (sample >= cb->rtt) {
180 err = sample - cb->rtt;
181 cb->rtt = cb->rtt + (err >> 3);
182 } else {
183 err = cb->rtt - sample;
184 cb->rtt = cb->rtt - (err >> 3);
185 }
186 cb->rtt_d = cb->rtt_d + ((err - cb->rtt_d) >> 2);
187 if (cb->rtt < TCPLIKE_MIN_RTT)
188 cb->rtt = TCPLIKE_MIN_RTT;
189 cb->rto = cb->rtt + (cb->rtt_d << 2);
190
191
192 /* 5 million ways to calculate RTT ...*/
193 #if 0
194 cb->srtt = ( 0.8 * cb->srtt ) + (0.2 * sample);
195 if (cb->srtt < TCPLIKE_MIN_RTT)
196 cb->srtt = TCPLIKE_MIN_RTT;
197 cb->rto = cb->srtt << 1;
198 #endif
199
200 LOSS_DEBUG((LOG_INFO, "RTT Sample: %u , New RTO: %u\n", sample, cb->rto));
201 }
202
203 /* Functions declared in struct dccp_cc_sw */
204
205 /*
206 * Initialises the sender side
207 * returns: pointer to a tfrc_send_ccb struct on success, otherwise 0
208 */
209 void *
210 tcplike_send_init(struct dccpcb* pcb)
211 {
212 struct tcplike_send_ccb *cb;
213
214 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_init()\n"));
215
216 cb = malloc(sizeof (struct tcplike_send_ccb), M_PCB, M_NOWAIT | M_ZERO);
217 if (cb == 0) {
218 TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_send_ccb!\n"));
219 dccpstat.tcplikes_send_memerr++;
220 return 0;
221 }
222 memset(cb, 0, sizeof (struct tcplike_send_ccb));
223
224 /* init sender */
225 cb->pcb = pcb;
226
227 cb->cwnd = TCPLIKE_INITIAL_CWND;
228 cb->ssthresh = 0xafff; /* lim-> infinity */
229 cb->oldcwnd_ts = 0;
230 cb->outstanding = 0;
231 cb->rcvr_ackratio = 2; /* Ack Ratio */
232 cb->acked_in_win = 0;
233 cb->acked_windows = 0;
234
235 CWND_DEBUG((LOG_INFO, "Init. CWND value: %u , OUTSTANDING value: %u\n",
236 cb->cwnd, cb->outstanding));
237 cb->rtt = 0xffff;
238 cb->rto = TIMEOUT_UBOUND;
239 callout_init(&cb->rto_timer, 0);
240 callout_init(&cb->free_timer, 0);
241 cb->rto_timer_callout = 0;
242 cb->rtt_d = 0;
243 cb->timestamp = 0;
244
245 cb->sample_rtt = 1;
246
247 cb->cv_size = TCPLIKE_INITIAL_CWNDVECTOR;
248 /* 1 bit per entry */
249 cb->cwndvector = malloc(cb->cv_size / 8, M_PCB, M_NOWAIT | M_ZERO);
250 if (cb->cwndvector == NULL) {
251 MALLOC_DEBUG((LOG_INFO, "Unable to allocate memory for cwndvector\n"));
252 /* What to do now? */
253 cb->cv_size = 0;
254 dccpstat.tcplikes_send_memerr++;
255 return 0;
256 }
257 memset(cb->cwndvector, 0, cb->cv_size / 8);
258 cb->cv_hs = cb->cv_ts = 0;
259 cb->cv_hp = cb->cwndvector;
260
261 cb->ack_last = 0;
262 cb->ack_miss = 0;
263
264 mutex_init(&(cb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
265
266 TCPLIKE_DEBUG((LOG_INFO, "TCPlike sender initialised!\n"));
267 dccpstat.tcplikes_send_conn++;
268 return cb;
269 }
270
271 void tcplike_send_term(void *ccb)
272 {
273 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
274 if (ccb == 0)
275 return;
276
277 mutex_destroy(&(cb->mutex));
278
279 free(cb, M_PCB);
280 TCPLIKE_DEBUG((LOG_INFO, "TCP-like sender is destroyed\n"));
281 }
282
283 /*
284 * Free the sender side
285 * args: ccb - ccb of sender
286 */
287 void
288 tcplike_send_free(void *ccb)
289 {
290 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
291
292 LOSS_DEBUG((LOG_INFO, "Entering tcplike_send_free()\n"));
293
294 if (ccb == 0)
295 return;
296
297 mutex_enter(&(cb->mutex));
298
299 free(cb->cwndvector, M_PCB);
300 cb->cv_hs = cb->cv_ts = 0;
301
302 /* untimeout any active timer */
303 if (cb->rto_timer_callout) {
304 TCPLIKE_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
305 callout_stop(&cb->rto_timer);
306 cb->rto_timer_callout = 0;
307 }
308
309 mutex_exit(&(cb->mutex));
310
311 callout_reset(&cb->free_timer, 10 * hz, tcplike_send_term, (void *)cb);
312 }
313
314 /*
315 * Ask TCPlike wheter one can send a packet or not
316 * args: ccb - ccb block for current connection
317 * returns: 0 if ok, else <> 0.
318 */
319 int
320 tcplike_send_packet(void *ccb, long datasize)
321 {
322 /* check if one can send here */
323 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
324 long ticks;
325 char feature[1];
326
327 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet()\n"));
328
329 if (datasize == 0) {
330 TCPLIKE_DEBUG((LOG_INFO, "Sending pure ACK. Dont care about CC right now\n"));
331 return 1;
332 }
333
334 mutex_enter(&(cb->mutex));
335
336 if (cb->cwnd <= cb->outstanding) {
337 /* May not send. trigger RTO */
338 DCCP_DEBUG((LOG_INFO, "cwnd (%d) < outstanding (%d)\n", cb->cwnd, cb->outstanding));
339 if (!cb->rto_timer_callout) {
340 LOSS_DEBUG((LOG_INFO, "Trigger TCPlike RTO timeout timer. Ticks = %u\n", cb->rto));
341 ticks = (long)cb->rto;
342 callout_reset(&cb->rto_timer, ticks,
343 tcplike_rto_timeout, (void *)cb);
344 cb->rto_timer_callout = 1;
345 }
346 mutex_exit(&(cb->mutex));
347 return 0;
348 }
349
350 /* We're allowed to send */
351
352 feature[0] = 1;
353 if (cb->pcb->remote_ackvector == 0) {
354 ACK_DEBUG((LOG_INFO, "Adding Change(Use Ack Vector, 1) to outgoing packet\n"));
355 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR);
356 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR, feature, 1);
357 }
358
359 /* untimeout any active timer */
360 if (cb->rto_timer_callout) {
361 LOSS_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
362 callout_stop(&cb->rto_timer);
363 cb->rto_timer_callout = 0;
364 }
365
366 if (!cb->sample_rtt) {
367 struct timeval stamp;
368 microtime(&stamp);
369 cb->timestamp = ((stamp.tv_sec & 0x00000FFF) * 1000000) + stamp.tv_usec;
370 dccp_add_option(cb->pcb, DCCP_OPT_TIMESTAMP, (char*) &(cb->timestamp), 4);
371 /*LOSS_DEBUG((LOG_INFO, "Adding timestamp %u\n", cb->timestamp));*/
372 cb->sample_rtt = 1;
373 }
374
375 mutex_exit(&(cb->mutex));
376 return 1;
377
378 }
379
380 /*
381 * Notify sender that a packet has been sent
382 * args: ccb - ccb block for current connection
383 * moreToSend - if there exists more packets to send
384 */
385 void
386 tcplike_send_packet_sent(void *ccb, int moreToSend, long datasize)
387 {
388 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
389
390 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet_sent(,%i,%i)\n",moreToSend,(int) datasize));
391
392 if (datasize == 0) {
393 TCPLIKE_DEBUG((LOG_INFO, "Sent pure ACK. Dont care about cwnd-storing\n"));
394 return;
395 }
396
397 mutex_enter(&(cb->mutex));
398
399 cb->outstanding++;
400 TCPLIKE_DEBUG((LOG_INFO, "SENT. cwnd: %d, outstanding: %d\n",cb->cwnd, cb->outstanding));
401
402 /* stash the seqnr in cwndvector */
403 /* Dont do this if we're only sending an ACK ! */
404 _add_to_cwndvector(cb, cb->pcb->seq_snd);
405 CWND_DEBUG((LOG_INFO, "Sent. CWND value: %u , OUTSTANDING value: %u\n",cb->cwnd, cb->outstanding));
406
407 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
408 mutex_exit(&(cb->mutex));
409 }
410
411 /*
412 * Notify that an ack package was received
413 * args: ccb - ccb block for current connection
414 */
415 void
416 tcplike_send_packet_recv(void *ccb, char *options, int optlen)
417 {
418 dccp_seq acknum, lastok;
419 u_int16_t numlostpackets, avsize, i, prev_size;
420 u_int8_t length, state, numokpackets, ackratiocnt;
421 u_char av[10];
422 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
423
424 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_ack_recv()\n"));
425 mutex_enter(&(cb->mutex));
426
427 if (dccp_get_option(options, optlen, DCCP_OPT_TIMESTAMP_ECHO, av,10) > 0) {
428 u_int32_t echo, elapsed;
429
430 TCPLIKE_DEBUG((LOG_INFO, "Received TIMESTAMP ECHO\n"));
431 bcopy(av, &echo, 4);
432 bcopy(av + 4, &elapsed, 4);
433
434 if (echo == cb->timestamp) {
435 struct timeval time;
436 u_int32_t c_stamp;
437 u_int16_t diff;
438
439 microtime(&time);
440 c_stamp = ((time.tv_sec & 0x00000FFF) * 1000000) + time.tv_usec;
441
442 diff = (u_int16_t) c_stamp - cb->timestamp - elapsed;
443 diff = (u_int16_t)(diff / 1000);
444 TCPLIKE_DEBUG((LOG_INFO, "Got Timestamp Echo; Echo = %u, Elapsed = %u. DIFF = %u\n",
445 echo, elapsed, diff));
446 tcplike_rtt_sample(cb, diff);
447 }
448 }
449
450 if (cb->pcb->ack_rcv == 0) {
451 /* There was no Ack. There is no spoon */
452
453 /* We'll clear the missingacks data here, since the other host
454 * is also sending data.
455 * I guess we could deal with this, using the NDP field in the
456 * header. Let's stick a *TODO* mark here for now.
457 * The missingacks mechanism will activate if other host goes to
458 * only sending DCCP-Ack packets.
459 */
460 cb->ack_last = 0;
461 cb->ack_miss = 0;
462 ACKRATIO_DEBUG((LOG_INFO, "Clear Missing Acks state!\n"));
463 mutex_exit(&(cb->mutex));
464 return;
465 }
466
467 cb->sample_rtt = 0;
468
469 /* check ackVector for lost packets. cmp with cv_list */
470 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av,10);
471 if (avsize == 0)
472 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av,10);
473
474 if (avsize > 0)
475 dccpstat.tcplikes_send_ackrecv++;
476
477 acknum = cb->pcb->ack_rcv;
478 numlostpackets = 0;
479 numokpackets = 0;
480 lastok = 0;
481 prev_size = _cwndvector_size(cb);
482
483 TCPLIKE_DEBUG((LOG_INFO, "Start removing from cwndvector %d\n", avsize));
484 if (avsize == 0)
485 _remove_from_cwndvector(cb, acknum);
486
487 for (i=0; i < avsize; i++) {
488 state = (av[i] & 0xc0) >> 6;
489 length = (av[i] & 0x3f) +1;
490 while (length > 0) {
491 if (state == 0) {
492 CWND_DEBUG((LOG_INFO, "Packet %llu was OK\n", acknum));
493 numokpackets++;
494 lastok = acknum;
495 _remove_from_cwndvector(cb, acknum);
496 } else {
497 if (acknum > cb->oldcwnd_ts) {
498 LOSS_DEBUG((LOG_INFO, "Packet %llu was lost %llu state %d\n", acknum, cb->oldcwnd_ts, state));
499 numlostpackets++;
500 dccpstat.tcplikes_send_reploss++;
501 }
502 }
503 acknum--;
504 length--;
505 }
506 }
507 if (lastok)
508 if (_chop_cwndvector(cb, lastok-TCPLIKE_NUMDUPACK)) {
509 LOSS_DEBUG((LOG_INFO, "Packets were lost\n"));
510 if (lastok-TCPLIKE_NUMDUPACK > cb->oldcwnd_ts) {
511 numlostpackets++;
512 dccpstat.tcplikes_send_assloss++;
513 }
514 }
515
516 lastok = cb->cv_hs;
517 while (_cwndvector_state(cb, lastok) == 0x00 && lastok < cb->cv_ts)
518 lastok++;
519 if (lastok != cb->cv_hs)
520 _chop_cwndvector(cb, lastok);
521
522 cb->outstanding = _cwndvector_size(cb);
523 CWND_DEBUG((LOG_INFO, "Decrease outstanding. was = %u , now = %u\n", prev_size, cb->outstanding));
524 if (prev_size == cb->outstanding) {
525 /* Nothing dropped from cwndvector */
526 mutex_exit(&(cb->mutex));
527 return;
528 }
529
530 cb->acked_in_win += numokpackets;
531
532 if (cb->cwnd < cb->ssthresh) {
533 /* Slow start */
534
535 if (numlostpackets > 0) {
536 /* Packet loss */
537 LOSS_DEBUG((LOG_INFO, "Packet Loss in Slow Start\n"));
538 cb->cwnd = cb->cwnd>>1;
539 if (cb->cwnd < 1)
540 cb->cwnd = 1;
541 cb->ssthresh = cb->cwnd;
542 cb->acked_in_win = 0;
543 cb->acked_windows = 0;
544 cb->oldcwnd_ts = cb->pcb->seq_snd;
545
546 } else {
547 cb->cwnd++;
548 }
549
550 } else if (cb->cwnd >= cb->ssthresh) {
551
552 if (numlostpackets > 0) {
553 /* Packet loss */
554 LOSS_DEBUG((LOG_INFO, "Packet Loss in action\n"));
555 cb->cwnd = cb->cwnd>>1;
556 if (cb->cwnd < 1)
557 cb->cwnd = 1;
558 cb->ssthresh = cb->cwnd;
559 cb->acked_in_win = 0;
560 cb->acked_windows = 0;
561 cb->oldcwnd_ts = cb->pcb->seq_snd;
562
563 } else if (cb->acked_in_win > cb->cwnd) {
564 cb->cwnd++;
565 }
566 }
567
568 /* Ok let's check if there are missing Ack packets */
569 ACKRATIO_DEBUG((LOG_INFO, "Check Ack. seq_rcv: %u ,ack_last: %u ,ack_miss: %u\n",
570 cb->pcb->seq_rcv, cb->ack_last, cb->ack_miss));
571
572 if (cb->ack_last == 0) {
573 /* First received ack (or first after Data packet). Yey */
574 cb->ack_last = cb->pcb->seq_rcv;
575 cb->ack_miss = 0;
576 } else if (cb->pcb->seq_rcv == (cb->ack_last + 1)) {
577 /* This is correct, non-congestion, in-order behaviour */
578 cb->ack_last = cb->pcb->seq_rcv;
579
580 } else if (cb->pcb->seq_rcv < (cb->ack_last + 1)) {
581 /* Might be an Ack we've been missing */
582 /* This code has a flaw; If we miss 2 Ack packets, we only care
583 * about the older one. This means that the next-to-oldest one could
584 * be lost without any action beeing taken.
585 * Time will tell if that is going to be a Giant Problem(r)
586 */
587 if (cb->pcb->seq_rcv == cb->ack_miss) {
588 /* Yea it was. great */
589 cb->ack_miss = 0;
590 }
591
592 } else if (cb->pcb->seq_rcv > (cb->ack_last + 1)) {
593 /* There is a jump in Ack seqnums.. */
594 cb->ack_miss = cb->ack_last + 1;
595 cb->ack_last = cb->pcb->seq_rcv;
596 }
597
598 if (cb->ack_miss && ((cb->ack_miss + TCPLIKE_NUMDUPACK) < cb->ack_last)) {
599 /* Alert! Alert! Ack packets are MIA.
600 * Decrease Ack Ratio
601 */
602 cb->rcvr_ackratio = cb->rcvr_ackratio<<1;
603 if (cb->rcvr_ackratio > (cb->cwnd>>1)) {
604 /* Constraint 2 */
605 cb->rcvr_ackratio = cb->cwnd>>1;
606 }
607 if (cb->rcvr_ackratio == 0)
608 cb->rcvr_ackratio = 1;
609 ACKRATIO_DEBUG((LOG_INFO, "Increase Ack Ratio. Now = %u. (cwnd = %u)\n", cb->rcvr_ackratio, cb->cwnd));
610 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
611 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
612 (char *) &cb->rcvr_ackratio, 1);
613
614 cb->ack_miss = 0;
615 cb->acked_windows = 0;
616 cb->acked_in_win = 0;
617 dccpstat.tcplikes_send_missack++;
618
619 } else if (cb->acked_in_win > cb->cwnd) {
620 cb->acked_in_win = 0;
621 cb->acked_windows++;
622 if (cb->rcvr_ackratio == 1) {
623 /* Ack Ratio is 1. We cant decrease it more.. Lets wait for some
624 * heavy congestion so we can increase it
625 */
626 cb->acked_windows = 0;
627 }
628 }
629
630 if (cb->acked_windows >= 1) {
631 ackratiocnt = (cb->cwnd / ((cb->rcvr_ackratio*cb->rcvr_ackratio) - cb->rcvr_ackratio));
632 if (cb->acked_windows >= ackratiocnt) {
633 if (cb->rcvr_ackratio > 2 && cb->cwnd >= 4) {
634 /* Constraint 3 - AckRatio at least 2 for a cwnd >= 4 */
635 cb->rcvr_ackratio--;
636 ACKRATIO_DEBUG((LOG_INFO, "Decrease ackratio by 1, now: %u\n", cb->rcvr_ackratio));
637 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
638 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
639 (char *) &cb->rcvr_ackratio, 1);
640 }
641 cb->acked_in_win = 0;
642 cb->acked_windows = 0;
643 }
644 }
645
646 CWND_DEBUG((LOG_INFO, "Recvd. CWND value: %u , OUTSTANDING value: %u\n",
647 cb->cwnd, cb->outstanding));
648
649 if (cb->cwnd > cb->outstanding && cb->rto_timer_callout) {
650 LOSS_DEBUG((LOG_INFO, "Force DCCP_OUTPUT, CWND = %u Outstanding = %u\n",
651 cb->cwnd, cb->outstanding));
652 callout_stop(&cb->rto_timer);
653 cb->rto_timer_callout = 0;
654
655 mutex_exit(&(cb->mutex));
656 dccp_output(cb->pcb, 1);
657 return;
658 }
659 mutex_exit(&(cb->mutex));
660 }
661
662 int
663 _cwndvector_size(struct tcplike_send_ccb *cb)
664 {
665 u_int64_t gap, offset, seqnr;
666 u_int32_t cnt;
667 u_char *t;
668
669 TCPLIKE_DEBUG((LOG_INFO, "Enter cwndvector_size\n"));
670 cnt = 0;
671 for (seqnr = cb->cv_hs; seqnr < cb->cv_ts; seqnr++) {
672 gap = seqnr - cb->cv_hs;
673
674 offset = gap % 8;
675 t = cb->cv_hp + (gap/8);
676 if (t >= (cb->cwndvector + (cb->cv_size/8)))
677 t -= (cb->cv_size / 8); /* wrapped */
678
679 if (((*t & (0x01 << offset)) >> offset) == 0x01)
680 cnt++;
681 }
682 return cnt;
683 }
684
685 u_char
686 _cwndvector_state(struct tcplike_send_ccb *cb, u_int64_t seqnr)
687 {
688 u_int64_t gap, offset;
689 u_char *t;
690
691 /* Check for wrapping */
692 if (seqnr >= cb->cv_hs) {
693 /* Not wrapped */
694 gap = seqnr - cb->cv_hs;
695 } else {
696 /* Wrapped XXXXX */
697 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
698 }
699
700 if (gap >= cb->cv_size) {
701 /* gap is bigger than cwndvector size? baaad */
702 return 0x01;
703 }
704
705 offset = gap % 8;
706 t = cb->cv_hp + (gap/8);
707 if (t >= (cb->cwndvector + (cb->cv_size/8)))
708 t -= (cb->cv_size / 8); /* wrapped */
709
710 return ((*t & (0x01 << offset)) >> offset);
711 }
712
713 void
714 _add_to_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
715 {
716 u_int64_t offset, dc, gap;
717 u_char *t, *n;
718
719 TCPLIKE_DEBUG((LOG_INFO, "Entering add_to_cwndvector\n"));
720
721 if (cb->cv_hs == cb->cv_ts) {
722 /* Empty cwndvector */
723 cb->cv_hs = cb->cv_ts = seqnr;
724 }
725
726 /* Check for wrapping */
727 if (seqnr >= cb->cv_hs) {
728 /* Not wrapped */
729 gap = seqnr - cb->cv_hs;
730 } else {
731 /* Wrapped */
732 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
733 }
734
735 if (gap >= cb->cv_size) {
736 /* gap is bigger than cwndvector size? baaad */
737 /* maybe we should increase the cwndvector here */
738 CWND_DEBUG((LOG_INFO, "add cwndvector error. gap: %d, cv_size: %d, seqnr: %d\n",
739 gap, cb->cv_size, seqnr));
740 dccpstat.tcplikes_send_badseq++;
741 return;
742 }
743
744 offset = gap % 8; /* bit to mark */
745 t = cb->cv_hp + (gap/8);
746 if (t >= (cb->cwndvector + (cb->cv_size/8)))
747 t -= (cb->cv_size / 8); /* cwndvector wrapped */
748
749 *t = *t | (0x01 << offset); /* turn on bit */
750
751 cb->cv_ts = seqnr+1;
752 if (cb->cv_ts == 0x1000000000000LL)
753 cb->cv_ts = 0;
754
755 if (gap > (cb->cv_size - 128)) {
756 MALLOC_DEBUG((LOG_INFO, "INCREASE cwndVECTOR\n"));
757 n = malloc(cb->cv_size/4, M_PCB, M_NOWAIT); /* old size * 2 */
758 if (n == NULL) {
759 MALLOC_DEBUG((LOG_INFO, "Increase cwndvector FAILED\n"));
760 dccpstat.tcplikes_send_memerr++;
761 return;
762 }
763 memset (n+cb->cv_size/8,0x00,cb->cv_size/8); /* new half all missing */
764 dc = (cb->cwndvector + (cb->cv_size/8)) - cb->cv_hp;
765 memcpy (n,cb->cv_hp, dc); /* tail to end */
766 memcpy (n+dc,cb->cwndvector,cb->cv_hp - cb->cwndvector); /* start to tail */
767 cb->cv_size = cb->cv_size * 2; /* counted in items, so it';s a doubling */
768 free (cb->cwndvector, M_PCB);
769 cb->cv_hp = cb->cwndvector = n;
770 }
771 }
772
773 void
774 _remove_from_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
775 {
776 u_int64_t offset;
777 int64_t gap;
778 u_char *t;
779
780 DCCP_DEBUG((LOG_INFO, "Entering remove_from_cwndvector\n"));
781
782 if (cb->cv_hs == cb->cv_ts) {
783 /* Empty cwndvector */
784 return;
785 }
786
787 /* Check for wrapping */
788 if (seqnr >= cb->cv_hs) {
789 /* Not wrapped */
790 gap = seqnr - cb->cv_hs;
791 } else {
792 /* Wrapped */
793 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
794 }
795
796 if (gap >= cb->cv_size) {
797 /* gap is bigger than cwndvector size. has already been chopped */
798 return;
799 }
800
801 offset = gap % 8; /* hi or low 2 bits to mark */
802 t = cb->cv_hp + (gap/8);
803 if (t >= (cb->cwndvector + (cb->cv_size/8)))
804 t -= (cb->cv_size / 8); /* cwndvector wrapped */
805
806 *t = *t & (~(0x01 << offset)); /* turn off bits */
807 }
808
809 int
810 _chop_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
811 {
812 int64_t gap, bytegap;
813 u_char *t;
814
815 CWND_DEBUG((LOG_INFO,"Chop cwndvector at: %u\n", seqnr));
816
817 if (cb->cv_hs == cb->cv_ts)
818 return 0;
819
820 if (seqnr > cb->cv_hs) {
821 gap = seqnr - cb->cv_hs;
822 } else {
823 /* We received obsolete information */
824 return 0;
825 }
826
827 bytegap = gap/8;
828 if (bytegap == 0)
829 return 0;
830
831 t = cb->cv_hp + bytegap;
832 if (t >= (cb->cwndvector + (cb->cv_size/8)))
833 t -= (cb->cv_size / 8); /* ackvector wrapped */
834 cb->cv_hp = t;
835 cb->cv_hs += bytegap*8;
836 return 1;
837 }
838
839
840 /* Receiver side */
841
842
843 /* Functions declared in struct dccp_cc_sw */
844
845 /* Initialises the receiver side
846 * returns: pointer to a tcplike_recv_ccb struct on success, otherwise 0
847 */
848 void *
849 tcplike_recv_init(struct dccpcb *pcb)
850 {
851 struct tcplike_recv_ccb *ccb;
852
853 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_init()\n"));
854
855 ccb = malloc(sizeof (struct tcplike_recv_ccb), M_PCB, M_NOWAIT | M_ZERO);
856 if (ccb == 0) {
857 TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_recv_ccb!\n"));
858 dccpstat.tcplikes_recv_memerr++;
859 return 0;
860 }
861
862 memset(ccb, 0, sizeof (struct tcplike_recv_ccb));
863
864 ccb->pcb = pcb;
865 ccb->unacked = 0;
866 ccb->pcb->ack_ratio = 2;
867
868 ccb->pcb->remote_ackvector = 1;
869 dccp_use_ackvector(ccb->pcb);
870
871 callout_init(&ccb->free_timer, 0);
872
873 mutex_init(&(ccb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
874
875 TCPLIKE_DEBUG((LOG_INFO, "TCPlike receiver initialised!\n"));
876 dccpstat.tcplikes_recv_conn++;
877 return ccb;
878 }
879
880 void tcplike_recv_term(void *ccb)
881 {
882 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
883 if (ccb == 0)
884 return;
885
886 mutex_destroy(&(cb->mutex));
887 free(cb, M_PCB);
888 TCPLIKE_DEBUG((LOG_INFO, "TCP-like receiver is destroyed\n"));
889 }
890
891 /* Free the receiver side
892 * args: ccb - ccb of recevier
893 */
894 void
895 tcplike_recv_free(void *ccb)
896 {
897 struct ack_list *a;
898 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
899
900 LOSS_DEBUG((LOG_INFO, "Entering tcplike_recv_free()\n"));
901
902 if (ccb == 0)
903 return;
904
905 mutex_enter(&(cb->mutex));
906
907 a = cb->av_list;
908 while (a) {
909 cb->av_list = a->next;
910 free(a, M_TEMP);
911 a = cb->av_list;
912 }
913
914 cb->pcb->av_size = 0;
915 free(cb->pcb->ackvector, M_PCB);
916
917 mutex_exit(&(cb->mutex));
918 callout_reset(&cb->free_timer, 10 * hz, tcplike_recv_term, (void *)cb);
919 }
920
921 /*
922 * Tell TCPlike that a packet has been received
923 * args: ccb - ccb block for current connection
924 */
925 void
926 tcplike_recv_packet_recv(void *ccb, char *options, int optlen)
927 {
928 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
929 u_char ackvector[16];
930 u_int16_t avsize;
931 u_char av_rcv[10];
932
933 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_packet()\n"));
934
935 mutex_enter(&(cb->mutex));
936
937 if (cb->pcb->type_rcv == DCCP_TYPE_DATA ||
938 cb->pcb->type_rcv == DCCP_TYPE_DATAACK)
939 dccpstat.tcplikes_recv_datarecv++;
940
941 /* Grab Ack Vector 0 or 1 */
942 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av_rcv,10);
943 if (avsize == 0)
944 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av_rcv,10);
945
946 /* We are only interested in acks-on-acks here.
947 * The "real" ack handling is done be the sender */
948 if (avsize == 0 && cb->pcb->ack_rcv) {
949 u_int64_t ackthru;
950 /* We got an Ack without an ackvector.
951 * This would mean it's an ack on an ack.
952 */
953 ackthru = _avlist_get(cb, cb->pcb->ack_rcv);
954 ACK_DEBUG((LOG_INFO, "GOT Ack without Ackvector; Ackthru: %u\n", ackthru));
955 if (ackthru) {
956 dccp_update_ackvector(cb->pcb, ackthru);
957 dccpstat.tcplikes_recv_ackack++;
958 }
959 } else if (avsize > 0 && cb->pcb->ack_rcv) {
960 /* We received an AckVector */
961 u_int32_t acknum, ackthru;
962 int i;
963 ACK_DEBUG((LOG_INFO, "GOT Ack with Ackvector\n"));
964 /* gotta loop through the ackvector */
965 acknum = cb->pcb->ack_rcv;
966 for (i=0; i<avsize; i++) {
967 u_int8_t state, len;
968 state = (av_rcv[i] & 0xc0) >> 6;
969 len = (av_rcv[i] & 0x2f) + 1;
970 if (state != 0) {
971 /* Drops in ackvector! Will be noted and taken care of by the sender part */
972 ACK_DEBUG((LOG_INFO, "Packets %u - %u are FUCKED\n",acknum-len, acknum));
973 continue;
974 }
975
976 while (len>0) {
977 ackthru = _avlist_get(cb, acknum);
978 ACK_DEBUG((LOG_INFO, "Ackthru: %u\n", ackthru));
979 if (ackthru) {
980 dccp_update_ackvector(cb->pcb, ackthru);
981 dccpstat.tcplikes_recv_ackack++;
982 }
983 acknum--;
984 len--;
985 }
986 }
987 }
988
989 ACK_DEBUG((LOG_INFO, "Adding %llu to local ackvector\n", cb->pcb->seq_rcv));
990 dccp_increment_ackvector(cb->pcb, cb->pcb->seq_rcv);
991 cb->unacked++;
992
993 if (cb->unacked >= cb->pcb->ack_ratio) {
994 /* Time to send an Ack */
995
996 avsize = dccp_generate_ackvector(cb->pcb, ackvector);
997 TCPLIKE_DEBUG((LOG_INFO, "recv_packet avsize %d ackvector %d\n", avsize, ackvector));
998 cb->unacked = 0;
999 if (avsize > 0) {
1000 dccp_add_option(cb->pcb, DCCP_OPT_ACK_VECTOR0, ackvector, avsize);
1001 cb->pcb->ack_snd = cb->pcb->seq_rcv;
1002 _avlist_add(cb, cb->pcb->seq_snd+1, cb->pcb->ack_snd);
1003 ACK_DEBUG((LOG_INFO, "Recvr: Sending Ack (%llu) w/ Ack Vector\n", cb->pcb->ack_snd));
1004 dccpstat.tcplikes_recv_acksent++;
1005 dccp_output(cb->pcb, 1);
1006 }
1007 }
1008 mutex_exit(&(cb->mutex));
1009 }
1010
1011 void
1012 _avlist_add(struct tcplike_recv_ccb *cb, u_int64_t localseq, u_int64_t ackthru)
1013 {
1014 struct ack_list *a;
1015 ACK_DEBUG((LOG_INFO,"Adding localseq %u - ackthru %u to avlist\n", localseq, ackthru));
1016 /*MALLOC_DEBUG((LOG_INFO, "New ack_list, %u\n", sizeof (struct ack_list)));*/
1017 a = malloc(sizeof(struct ack_list), M_TEMP, M_NOWAIT);
1018 if (a == NULL) {
1019 MALLOC_DEBUG((LOG_INFO, "avlist_add: FAILED\n"));
1020 dccpstat.tcplikes_recv_memerr++;
1021 return;
1022 }
1023 memset(a, 0, sizeof(struct ack_list));
1024 a->localseq = localseq;
1025 a->ackthru = ackthru;
1026 a->next = cb->av_list;
1027 cb->av_list = a;
1028 }
1029
1030 /*
1031 * Searches the av_list. if 'localseq' found, drop it from list and return
1032 * ackthru
1033 */
1034 u_int64_t
1035 _avlist_get(struct tcplike_recv_ccb *cb, u_int64_t localseq)
1036 {
1037 struct ack_list *a, *n, *p;
1038 u_int64_t ackthru;
1039
1040 ACK_DEBUG((LOG_INFO,"Getting localseq %u from avlist\n", localseq));
1041 a = cb->av_list;
1042 p = 0;
1043 while (a) {
1044 n = a->next;
1045 if (a->localseq == localseq) {
1046 if (p)
1047 p->next = n;
1048 else
1049 cb->av_list = n;
1050 ackthru = a->ackthru;
1051 /*MALLOC_DEBUG((LOG_INFO, "Freeing element %u in ack_list\n", a->localseq));*/
1052 free(a, M_TEMP);
1053 return ackthru;
1054 }
1055 p = a;
1056 a = n;
1057 }
1058 /* Not found. return 0 */
1059 return 0;
1060 }
1061
1062 /*
1063 int tcplike_option_recv(void);
1064 */
1065