dccp_tcplike.c revision 1.1 1 /* $KAME: dccp_tcplike.c,v 1.19 2005/07/27 06:27:25 nishida Exp $ */
2 /* $NetBSD: dccp_tcplike.c,v 1.1 2015/02/10 19:11:52 rjs Exp $ */
3
4 /*
5 * Copyright (c) 2003 Magnus Erixzon
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31 /*
32 * TCP-like congestion control for DCCP
33 */
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: dccp_tcplike.c,v 1.1 2015/02/10 19:11:52 rjs Exp $");
37
38 #include "opt_dccp.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/domain.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/proc.h>
48 #include <sys/protosw.h>
49 #include <sys/signalvar.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/mutex.h>
53 #include <sys/sysctl.h>
54 #include <sys/syslog.h>
55
56 #include <net/if.h>
57 #include <net/route.h>
58
59 #include <netinet/in.h>
60 #include <netinet/in_systm.h>
61 #include <netinet/ip.h>
62 #include <netinet/in_pcb.h>
63 #include <netinet/in_var.h>
64
65 #include <netinet/ip_icmp.h>
66 #include <netinet/icmp_var.h>
67 #include <netinet/ip_var.h>
68
69 #include <netinet/dccp.h>
70 #include <netinet/dccp_var.h>
71 #include <netinet/dccp_tcplike.h>
72
73 #define TCPLIKE_DEBUG(args) dccp_log args
74 #define MALLOC_DEBUG(args) log args
75 #define CWND_DEBUG(args) dccp_log args
76 #define ACKRATIO_DEBUG(args) dccp_log args
77 #define LOSS_DEBUG(args) dccp_log args
78 #define TIMEOUT_DEBUG(args) dccp_log args
79
80 #if !defined(__FreeBSD__) || __FreeBSD_version < 500000
81 #define INP_INFO_LOCK_INIT(x,y)
82 #define INP_INFO_WLOCK(x)
83 #define INP_INFO_WUNLOCK(x)
84 #define INP_INFO_RLOCK(x)
85 #define INP_INFO_RUNLOCK(x)
86 #define INP_LOCK(x)
87 #define INP_UNLOCK(x)
88 #endif
89
90 /* Sender side */
91
92 void tcplike_rto_timeout(void *);
93 void tcplike_rtt_sample(struct tcplike_send_ccb *, u_int16_t);
94 void _add_to_cwndvector(struct tcplike_send_ccb *, u_int64_t);
95 void _remove_from_cwndvector(struct tcplike_send_ccb *, u_int64_t);
96 int _chop_cwndvector(struct tcplike_send_ccb *, u_int64_t);
97 int _cwndvector_size(struct tcplike_send_ccb *);
98 u_char _cwndvector_state(struct tcplike_send_ccb *, u_int64_t);
99
100 void tcplike_send_term(void *);
101 void tcplike_recv_term(void *);
102
103 void _avlist_add(struct tcplike_recv_ccb *, u_int64_t, u_int64_t);
104 u_int64_t _avlist_get(struct tcplike_recv_ccb *, u_int64_t);
105
106 /* extern Ack Vector functions */
107 extern void dccp_use_ackvector(struct dccpcb *);
108 extern void dccp_update_ackvector(struct dccpcb *, u_int64_t);
109 extern void dccp_increment_ackvector(struct dccpcb *, u_int64_t);
110 extern u_int16_t dccp_generate_ackvector(struct dccpcb *, u_char *);
111 extern u_char dccp_ackvector_state(struct dccpcb *, u_int32_t);
112
113 extern int dccp_get_option(char *, int, int, char *, int);
114 extern int dccp_remove_feature(struct dccpcb *, u_int8_t, u_int8_t);
115
116 /*
117 * RTO timer activated
118 */
119 void
120 tcplike_rto_timeout(void *ccb)
121 {
122 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
123 /*struct inpcb *inp;*/
124 int s;
125
126 mutex_enter(&(cb->mutex));
127
128 cb->ssthresh = cb->cwnd >>1;
129 cb->cwnd = 1; /* allowing 1 packet to be sent */
130 cb->outstanding = 0; /* is this correct? */
131 cb->rto_timer_callout = 0;
132 cb->rto = cb->rto << 1;
133 TIMEOUT_DEBUG((LOG_INFO, "RTO Timeout. New RTO = %u\n", cb->rto));
134
135 cb->sample_rtt = 0;
136
137 cb->ack_last = 0;
138 cb->ack_miss = 0;
139
140 cb->rcvr_ackratio = 1; /* Constraint 2 & 3. We need ACKs asap */
141 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
142 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
143 (char *) &cb->rcvr_ackratio, 1);
144 cb->acked_in_win = 0;
145 cb->acked_windows = 0;
146 cb->oldcwnd_ts = cb->pcb->seq_snd;
147
148 LOSS_DEBUG((LOG_INFO, "Timeout. CWND value: %u , OUTSTANDING value: %u\n",
149 cb->cwnd, cb->outstanding));
150 mutex_exit(&(cb->mutex));
151
152 /* lock'n run dccp_output */
153 s = splnet();
154 INP_INFO_RLOCK(&dccpbinfo);
155 /*inp = cb->pcb->d_inpcb;*/
156 INP_LOCK(inp);
157 INP_INFO_RUNLOCK(&dccpbinfo);
158
159 dccp_output(cb->pcb, 1);
160
161 INP_UNLOCK(inp);
162 splx(s);
163 }
164
165 void tcplike_rtt_sample(struct tcplike_send_ccb *cb, u_int16_t sample)
166 {
167 u_int16_t err;
168
169 if (cb->rtt == 0xffff) {
170 /* hmmmmm. */
171 cb->rtt = sample;
172 cb->rto = cb->rtt << 1;
173 return;
174 }
175
176 /* This is how the Linux implementation is doing it.. */
177 if (sample >= cb->rtt) {
178 err = sample - cb->rtt;
179 cb->rtt = cb->rtt + (err >> 3);
180 } else {
181 err = cb->rtt - sample;
182 cb->rtt = cb->rtt - (err >> 3);
183 }
184 cb->rtt_d = cb->rtt_d + ((err - cb->rtt_d) >> 2);
185 if (cb->rtt < TCPLIKE_MIN_RTT)
186 cb->rtt = TCPLIKE_MIN_RTT;
187 cb->rto = cb->rtt + (cb->rtt_d << 2);
188
189
190 /* 5 million ways to calculate RTT ...*/
191 #if 0
192 cb->srtt = ( 0.8 * cb->srtt ) + (0.2 * sample);
193 if (cb->srtt < TCPLIKE_MIN_RTT)
194 cb->srtt = TCPLIKE_MIN_RTT;
195 cb->rto = cb->srtt << 1;
196 #endif
197
198 LOSS_DEBUG((LOG_INFO, "RTT Sample: %u , New RTO: %u\n", sample, cb->rto));
199 }
200
201 /* Functions declared in struct dccp_cc_sw */
202
203 /*
204 * Initialises the sender side
205 * returns: pointer to a tfrc_send_ccb struct on success, otherwise 0
206 */
207 void *
208 tcplike_send_init(struct dccpcb* pcb)
209 {
210 struct tcplike_send_ccb *cb;
211
212 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_init()\n"));
213
214 cb = malloc(sizeof (struct tcplike_send_ccb), M_PCB, M_NOWAIT | M_ZERO);
215 if (cb == 0) {
216 TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_send_ccb!\n"));
217 dccpstat.tcplikes_send_memerr++;
218 return 0;
219 }
220 memset(cb, 0, sizeof (struct tcplike_send_ccb));
221
222 /* init sender */
223 cb->pcb = pcb;
224
225 cb->cwnd = TCPLIKE_INITIAL_CWND;
226 cb->ssthresh = 0xafff; /* lim-> infinity */
227 cb->oldcwnd_ts = 0;
228 cb->outstanding = 0;
229 cb->rcvr_ackratio = 2; /* Ack Ratio */
230 cb->acked_in_win = 0;
231 cb->acked_windows = 0;
232
233 CWND_DEBUG((LOG_INFO, "Init. CWND value: %u , OUTSTANDING value: %u\n",
234 cb->cwnd, cb->outstanding));
235 cb->rtt = 0xffff;
236 cb->rto = TIMEOUT_UBOUND;
237 callout_init(&cb->rto_timer, 0);
238 callout_init(&cb->free_timer, 0);
239 cb->rto_timer_callout = 0;
240 cb->rtt_d = 0;
241 cb->timestamp = 0;
242
243 cb->sample_rtt = 1;
244
245 cb->cv_size = TCPLIKE_INITIAL_CWNDVECTOR;
246 /* 1 bit per entry */
247 cb->cwndvector = malloc(cb->cv_size / 8, M_PCB, M_NOWAIT | M_ZERO);
248 if (cb->cwndvector == NULL) {
249 MALLOC_DEBUG((LOG_INFO, "Unable to allocate memory for cwndvector\n"));
250 /* What to do now? */
251 cb->cv_size = 0;
252 dccpstat.tcplikes_send_memerr++;
253 return 0;
254 }
255 memset(cb->cwndvector, 0, cb->cv_size / 8);
256 cb->cv_hs = cb->cv_ts = 0;
257 cb->cv_hp = cb->cwndvector;
258
259 cb->ack_last = 0;
260 cb->ack_miss = 0;
261
262 mutex_init(&(cb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
263
264 TCPLIKE_DEBUG((LOG_INFO, "TCPlike sender initialised!\n"));
265 dccpstat.tcplikes_send_conn++;
266 return cb;
267 }
268
269 void tcplike_send_term(void *ccb)
270 {
271 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
272 if (ccb == 0)
273 return;
274
275 mutex_destroy(&(cb->mutex));
276
277 free(cb, M_PCB);
278 TCPLIKE_DEBUG((LOG_INFO, "TCP-like sender is destroyed\n"));
279 }
280
281 /*
282 * Free the sender side
283 * args: ccb - ccb of sender
284 */
285 void
286 tcplike_send_free(void *ccb)
287 {
288 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
289
290 LOSS_DEBUG((LOG_INFO, "Entering tcplike_send_free()\n"));
291
292 if (ccb == 0)
293 return;
294
295 mutex_enter(&(cb->mutex));
296
297 free(cb->cwndvector, M_PCB);
298 cb->cv_hs = cb->cv_ts = 0;
299
300 /* untimeout any active timer */
301 if (cb->rto_timer_callout) {
302 TCPLIKE_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
303 callout_stop(&cb->rto_timer);
304 cb->rto_timer_callout = 0;
305 }
306
307 mutex_exit(&(cb->mutex));
308
309 callout_reset(&cb->free_timer, 10 * hz, tcplike_send_term, (void *)cb);
310 }
311
312 /*
313 * Ask TCPlike wheter one can send a packet or not
314 * args: ccb - ccb block for current connection
315 * returns: 0 if ok, else <> 0.
316 */
317 int
318 tcplike_send_packet(void *ccb, long datasize)
319 {
320 /* check if one can send here */
321 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
322 long ticks;
323 char feature[1];
324
325 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet()\n"));
326
327 if (datasize == 0) {
328 TCPLIKE_DEBUG((LOG_INFO, "Sending pure ACK. Dont care about CC right now\n"));
329 return 1;
330 }
331
332 mutex_enter(&(cb->mutex));
333
334 if (cb->cwnd <= cb->outstanding) {
335 /* May not send. trigger RTO */
336 DCCP_DEBUG((LOG_INFO, "cwnd (%d) < outstanding (%d)\n", cb->cwnd, cb->outstanding));
337 if (!cb->rto_timer_callout) {
338 LOSS_DEBUG((LOG_INFO, "Trigger TCPlike RTO timeout timer. Ticks = %u\n", cb->rto));
339 ticks = (long)cb->rto;
340 callout_reset(&cb->rto_timer, ticks,
341 tcplike_rto_timeout, (void *)cb);
342 cb->rto_timer_callout = 1;
343 }
344 mutex_exit(&(cb->mutex));
345 return 0;
346 }
347
348 /* We're allowed to send */
349
350 feature[0] = 1;
351 if (cb->pcb->remote_ackvector == 0) {
352 ACK_DEBUG((LOG_INFO, "Adding Change(Use Ack Vector, 1) to outgoing packet\n"));
353 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR);
354 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR, feature, 1);
355 }
356
357 /* untimeout any active timer */
358 if (cb->rto_timer_callout) {
359 LOSS_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
360 callout_stop(&cb->rto_timer);
361 cb->rto_timer_callout = 0;
362 }
363
364 if (!cb->sample_rtt) {
365 struct timeval stamp;
366 microtime(&stamp);
367 cb->timestamp = ((stamp.tv_sec & 0x00000FFF) * 1000000) + stamp.tv_usec;
368 dccp_add_option(cb->pcb, DCCP_OPT_TIMESTAMP, (char*) &(cb->timestamp), 4);
369 /*LOSS_DEBUG((LOG_INFO, "Adding timestamp %u\n", cb->timestamp));*/
370 cb->sample_rtt = 1;
371 }
372
373 mutex_exit(&(cb->mutex));
374 return 1;
375
376 }
377
378 /*
379 * Notify sender that a packet has been sent
380 * args: ccb - ccb block for current connection
381 * moreToSend - if there exists more packets to send
382 */
383 void
384 tcplike_send_packet_sent(void *ccb, int moreToSend, long datasize)
385 {
386 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
387
388 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet_sent(,%i,%i)\n",moreToSend,(int) datasize));
389
390 if (datasize == 0) {
391 TCPLIKE_DEBUG((LOG_INFO, "Sent pure ACK. Dont care about cwnd-storing\n"));
392 return;
393 }
394
395 mutex_enter(&(cb->mutex));
396
397 cb->outstanding++;
398 TCPLIKE_DEBUG((LOG_INFO, "SENT. cwnd: %d, outstanding: %d\n",cb->cwnd, cb->outstanding));
399
400 /* stash the seqnr in cwndvector */
401 /* Dont do this if we're only sending an ACK ! */
402 _add_to_cwndvector(cb, cb->pcb->seq_snd);
403 CWND_DEBUG((LOG_INFO, "Sent. CWND value: %u , OUTSTANDING value: %u\n",cb->cwnd, cb->outstanding));
404
405 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
406 mutex_exit(&(cb->mutex));
407 }
408
409 /*
410 * Notify that an ack package was received
411 * args: ccb - ccb block for current connection
412 */
413 void
414 tcplike_send_packet_recv(void *ccb, char *options, int optlen)
415 {
416 dccp_seq acknum, lastok;
417 u_int16_t numlostpackets, avsize, i, prev_size;
418 u_int8_t length, state, numokpackets, ackratiocnt;
419 u_char av[10];
420 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
421
422 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_ack_recv()\n"));
423 mutex_enter(&(cb->mutex));
424
425 if (dccp_get_option(options, optlen, DCCP_OPT_TIMESTAMP_ECHO, av,10) > 0) {
426 u_int32_t echo, elapsed;
427
428 TCPLIKE_DEBUG((LOG_INFO, "Received TIMESTAMP ECHO\n"));
429 bcopy(av, &echo, 4);
430 bcopy(av + 4, &elapsed, 4);
431
432 if (echo == cb->timestamp) {
433 struct timeval time;
434 u_int32_t c_stamp;
435 u_int16_t diff;
436
437 microtime(&time);
438 c_stamp = ((time.tv_sec & 0x00000FFF) * 1000000) + time.tv_usec;
439
440 diff = (u_int16_t) c_stamp - cb->timestamp - elapsed;
441 diff = (u_int16_t)(diff / 1000);
442 TCPLIKE_DEBUG((LOG_INFO, "Got Timestamp Echo; Echo = %u, Elapsed = %u. DIFF = %u\n",
443 echo, elapsed, diff));
444 tcplike_rtt_sample(cb, diff);
445 }
446 }
447
448 if (cb->pcb->ack_rcv == 0) {
449 /* There was no Ack. There is no spoon */
450
451 /* We'll clear the missingacks data here, since the other host
452 * is also sending data.
453 * I guess we could deal with this, using the NDP field in the
454 * header. Let's stick a *TODO* mark here for now.
455 * The missingacks mechanism will activate if other host goes to
456 * only sending DCCP-Ack packets.
457 */
458 cb->ack_last = 0;
459 cb->ack_miss = 0;
460 ACKRATIO_DEBUG((LOG_INFO, "Clear Missing Acks state!\n"));
461 mutex_exit(&(cb->mutex));
462 return;
463 }
464
465 cb->sample_rtt = 0;
466
467 /* check ackVector for lost packets. cmp with cv_list */
468 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av,10);
469 if (avsize == 0)
470 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av,10);
471
472 if (avsize > 0)
473 dccpstat.tcplikes_send_ackrecv++;
474
475 acknum = cb->pcb->ack_rcv;
476 numlostpackets = 0;
477 numokpackets = 0;
478 lastok = 0;
479 prev_size = _cwndvector_size(cb);
480
481 TCPLIKE_DEBUG((LOG_INFO, "Start removing from cwndvector %d\n", avsize));
482 if (avsize == 0)
483 _remove_from_cwndvector(cb, acknum);
484
485 for (i=0; i < avsize; i++) {
486 state = (av[i] & 0xc0) >> 6;
487 length = (av[i] & 0x3f) +1;
488 while (length > 0) {
489 if (state == 0) {
490 CWND_DEBUG((LOG_INFO, "Packet %llu was OK\n", acknum));
491 numokpackets++;
492 lastok = acknum;
493 _remove_from_cwndvector(cb, acknum);
494 } else {
495 if (acknum > cb->oldcwnd_ts) {
496 LOSS_DEBUG((LOG_INFO, "Packet %llu was lost %llu state %d\n", acknum, cb->oldcwnd_ts, state));
497 numlostpackets++;
498 dccpstat.tcplikes_send_reploss++;
499 }
500 }
501 acknum--;
502 length--;
503 }
504 }
505 if (lastok)
506 if (_chop_cwndvector(cb, lastok-TCPLIKE_NUMDUPACK)) {
507 LOSS_DEBUG((LOG_INFO, "Packets were lost\n"));
508 if (lastok-TCPLIKE_NUMDUPACK > cb->oldcwnd_ts) {
509 numlostpackets++;
510 dccpstat.tcplikes_send_assloss++;
511 }
512 }
513
514 lastok = cb->cv_hs;
515 while (_cwndvector_state(cb, lastok) == 0x00 && lastok < cb->cv_ts)
516 lastok++;
517 if (lastok != cb->cv_hs)
518 _chop_cwndvector(cb, lastok);
519
520 cb->outstanding = _cwndvector_size(cb);
521 CWND_DEBUG((LOG_INFO, "Decrease outstanding. was = %u , now = %u\n", prev_size, cb->outstanding));
522 if (prev_size == cb->outstanding) {
523 /* Nothing dropped from cwndvector */
524 mutex_exit(&(cb->mutex));
525 return;
526 }
527
528 cb->acked_in_win += numokpackets;
529
530 if (cb->cwnd < cb->ssthresh) {
531 /* Slow start */
532
533 if (numlostpackets > 0) {
534 /* Packet loss */
535 LOSS_DEBUG((LOG_INFO, "Packet Loss in Slow Start\n"));
536 cb->cwnd = cb->cwnd>>1;
537 if (cb->cwnd < 1)
538 cb->cwnd = 1;
539 cb->ssthresh = cb->cwnd;
540 cb->acked_in_win = 0;
541 cb->acked_windows = 0;
542 cb->oldcwnd_ts = cb->pcb->seq_snd;
543
544 } else {
545 cb->cwnd++;
546 }
547
548 } else if (cb->cwnd >= cb->ssthresh) {
549
550 if (numlostpackets > 0) {
551 /* Packet loss */
552 LOSS_DEBUG((LOG_INFO, "Packet Loss in action\n"));
553 cb->cwnd = cb->cwnd>>1;
554 if (cb->cwnd < 1)
555 cb->cwnd = 1;
556 cb->ssthresh = cb->cwnd;
557 cb->acked_in_win = 0;
558 cb->acked_windows = 0;
559 cb->oldcwnd_ts = cb->pcb->seq_snd;
560
561 } else if (cb->acked_in_win > cb->cwnd) {
562 cb->cwnd++;
563 }
564 }
565
566 /* Ok let's check if there are missing Ack packets */
567 ACKRATIO_DEBUG((LOG_INFO, "Check Ack. seq_rcv: %u ,ack_last: %u ,ack_miss: %u\n",
568 cb->pcb->seq_rcv, cb->ack_last, cb->ack_miss));
569
570 if (cb->ack_last == 0) {
571 /* First received ack (or first after Data packet). Yey */
572 cb->ack_last = cb->pcb->seq_rcv;
573 cb->ack_miss = 0;
574 } else if (cb->pcb->seq_rcv == (cb->ack_last + 1)) {
575 /* This is correct, non-congestion, in-order behaviour */
576 cb->ack_last = cb->pcb->seq_rcv;
577
578 } else if (cb->pcb->seq_rcv < (cb->ack_last + 1)) {
579 /* Might be an Ack we've been missing */
580 /* This code has a flaw; If we miss 2 Ack packets, we only care
581 * about the older one. This means that the next-to-oldest one could
582 * be lost without any action beeing taken.
583 * Time will tell if that is going to be a Giant Problem(r)
584 */
585 if (cb->pcb->seq_rcv == cb->ack_miss) {
586 /* Yea it was. great */
587 cb->ack_miss = 0;
588 }
589
590 } else if (cb->pcb->seq_rcv > (cb->ack_last + 1)) {
591 /* There is a jump in Ack seqnums.. */
592 cb->ack_miss = cb->ack_last + 1;
593 cb->ack_last = cb->pcb->seq_rcv;
594 }
595
596 if (cb->ack_miss && ((cb->ack_miss + TCPLIKE_NUMDUPACK) < cb->ack_last)) {
597 /* Alert! Alert! Ack packets are MIA.
598 * Decrease Ack Ratio
599 */
600 cb->rcvr_ackratio = cb->rcvr_ackratio<<1;
601 if (cb->rcvr_ackratio > (cb->cwnd>>1)) {
602 /* Constraint 2 */
603 cb->rcvr_ackratio = cb->cwnd>>1;
604 }
605 if (cb->rcvr_ackratio == 0)
606 cb->rcvr_ackratio = 1;
607 ACKRATIO_DEBUG((LOG_INFO, "Increase Ack Ratio. Now = %u. (cwnd = %u)\n", cb->rcvr_ackratio, cb->cwnd));
608 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
609 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
610 (char *) &cb->rcvr_ackratio, 1);
611
612 cb->ack_miss = 0;
613 cb->acked_windows = 0;
614 cb->acked_in_win = 0;
615 dccpstat.tcplikes_send_missack++;
616
617 } else if (cb->acked_in_win > cb->cwnd) {
618 cb->acked_in_win = 0;
619 cb->acked_windows++;
620 if (cb->rcvr_ackratio == 1) {
621 /* Ack Ratio is 1. We cant decrease it more.. Lets wait for some
622 * heavy congestion so we can increase it
623 */
624 cb->acked_windows = 0;
625 }
626 }
627
628 if (cb->acked_windows >= 1) {
629 ackratiocnt = (cb->cwnd / ((cb->rcvr_ackratio*cb->rcvr_ackratio) - cb->rcvr_ackratio));
630 if (cb->acked_windows >= ackratiocnt) {
631 if (cb->rcvr_ackratio > 2 && cb->cwnd >= 4) {
632 /* Constraint 3 - AckRatio at least 2 for a cwnd >= 4 */
633 cb->rcvr_ackratio--;
634 ACKRATIO_DEBUG((LOG_INFO, "Decrease ackratio by 1, now: %u\n", cb->rcvr_ackratio));
635 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
636 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
637 (char *) &cb->rcvr_ackratio, 1);
638 }
639 cb->acked_in_win = 0;
640 cb->acked_windows = 0;
641 }
642 }
643
644 CWND_DEBUG((LOG_INFO, "Recvd. CWND value: %u , OUTSTANDING value: %u\n",
645 cb->cwnd, cb->outstanding));
646
647 if (cb->cwnd > cb->outstanding && cb->rto_timer_callout) {
648 LOSS_DEBUG((LOG_INFO, "Force DCCP_OUTPUT, CWND = %u Outstanding = %u\n",
649 cb->cwnd, cb->outstanding));
650 callout_stop(&cb->rto_timer);
651 cb->rto_timer_callout = 0;
652
653 mutex_exit(&(cb->mutex));
654 dccp_output(cb->pcb, 1);
655 return;
656 }
657 mutex_exit(&(cb->mutex));
658 }
659
660 int
661 _cwndvector_size(struct tcplike_send_ccb *cb)
662 {
663 u_int64_t gap, offset, seqnr;
664 u_int32_t cnt;
665 u_char *t;
666
667 TCPLIKE_DEBUG((LOG_INFO, "Enter cwndvector_size\n"));
668 cnt = 0;
669 for (seqnr = cb->cv_hs; seqnr < cb->cv_ts; seqnr++) {
670 gap = seqnr - cb->cv_hs;
671
672 offset = gap % 8;
673 t = cb->cv_hp + (gap/8);
674 if (t >= (cb->cwndvector + (cb->cv_size/8)))
675 t -= (cb->cv_size / 8); /* wrapped */
676
677 if (((*t & (0x01 << offset)) >> offset) == 0x01)
678 cnt++;
679 }
680 return cnt;
681 }
682
683 u_char
684 _cwndvector_state(struct tcplike_send_ccb *cb, u_int64_t seqnr)
685 {
686 u_int64_t gap, offset;
687 u_char *t;
688
689 /* Check for wrapping */
690 if (seqnr >= cb->cv_hs) {
691 /* Not wrapped */
692 gap = seqnr - cb->cv_hs;
693 } else {
694 /* Wrapped XXXXX */
695 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
696 }
697
698 if (gap >= cb->cv_size) {
699 /* gap is bigger than cwndvector size? baaad */
700 return 0x01;
701 }
702
703 offset = gap % 8;
704 t = cb->cv_hp + (gap/8);
705 if (t >= (cb->cwndvector + (cb->cv_size/8)))
706 t -= (cb->cv_size / 8); /* wrapped */
707
708 return ((*t & (0x01 << offset)) >> offset);
709 }
710
711 void
712 _add_to_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
713 {
714 u_int64_t offset, dc, gap;
715 u_char *t, *n;
716
717 TCPLIKE_DEBUG((LOG_INFO, "Entering add_to_cwndvector\n"));
718
719 if (cb->cv_hs == cb->cv_ts) {
720 /* Empty cwndvector */
721 cb->cv_hs = cb->cv_ts = seqnr;
722 }
723
724 /* Check for wrapping */
725 if (seqnr >= cb->cv_hs) {
726 /* Not wrapped */
727 gap = seqnr - cb->cv_hs;
728 } else {
729 /* Wrapped */
730 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
731 }
732
733 if (gap >= cb->cv_size) {
734 /* gap is bigger than cwndvector size? baaad */
735 /* maybe we should increase the cwndvector here */
736 CWND_DEBUG((LOG_INFO, "add cwndvector error. gap: %d, cv_size: %d, seqnr: %d\n",
737 gap, cb->cv_size, seqnr));
738 dccpstat.tcplikes_send_badseq++;
739 return;
740 }
741
742 offset = gap % 8; /* bit to mark */
743 t = cb->cv_hp + (gap/8);
744 if (t >= (cb->cwndvector + (cb->cv_size/8)))
745 t -= (cb->cv_size / 8); /* cwndvector wrapped */
746
747 *t = *t | (0x01 << offset); /* turn on bit */
748
749 cb->cv_ts = seqnr+1;
750 if (cb->cv_ts == 0x1000000000000LL)
751 cb->cv_ts = 0;
752
753 if (gap > (cb->cv_size - 128)) {
754 MALLOC_DEBUG((LOG_INFO, "INCREASE cwndVECTOR\n"));
755 n = malloc(cb->cv_size/4, M_PCB, M_NOWAIT); /* old size * 2 */
756 if (n == NULL) {
757 MALLOC_DEBUG((LOG_INFO, "Increase cwndvector FAILED\n"));
758 dccpstat.tcplikes_send_memerr++;
759 return;
760 }
761 memset (n+cb->cv_size/8,0x00,cb->cv_size/8); /* new half all missing */
762 dc = (cb->cwndvector + (cb->cv_size/8)) - cb->cv_hp;
763 memcpy (n,cb->cv_hp, dc); /* tail to end */
764 memcpy (n+dc,cb->cwndvector,cb->cv_hp - cb->cwndvector); /* start to tail */
765 cb->cv_size = cb->cv_size * 2; /* counted in items, so it';s a doubling */
766 free (cb->cwndvector, M_PCB);
767 cb->cv_hp = cb->cwndvector = n;
768 }
769 }
770
771 void
772 _remove_from_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
773 {
774 u_int64_t offset;
775 int64_t gap;
776 u_char *t;
777
778 DCCP_DEBUG((LOG_INFO, "Entering remove_from_cwndvector\n"));
779
780 if (cb->cv_hs == cb->cv_ts) {
781 /* Empty cwndvector */
782 return;
783 }
784
785 /* Check for wrapping */
786 if (seqnr >= cb->cv_hs) {
787 /* Not wrapped */
788 gap = seqnr - cb->cv_hs;
789 } else {
790 /* Wrapped */
791 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
792 }
793
794 if (gap >= cb->cv_size) {
795 /* gap is bigger than cwndvector size. has already been chopped */
796 return;
797 }
798
799 offset = gap % 8; /* hi or low 2 bits to mark */
800 t = cb->cv_hp + (gap/8);
801 if (t >= (cb->cwndvector + (cb->cv_size/8)))
802 t -= (cb->cv_size / 8); /* cwndvector wrapped */
803
804 *t = *t & (~(0x01 << offset)); /* turn off bits */
805 }
806
807 int
808 _chop_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
809 {
810 int64_t gap, bytegap;
811 u_char *t;
812
813 CWND_DEBUG((LOG_INFO,"Chop cwndvector at: %u\n", seqnr));
814
815 if (cb->cv_hs == cb->cv_ts)
816 return 0;
817
818 if (seqnr > cb->cv_hs) {
819 gap = seqnr - cb->cv_hs;
820 } else {
821 /* We received obsolete information */
822 return 0;
823 }
824
825 bytegap = gap/8;
826 if (bytegap == 0)
827 return 0;
828
829 t = cb->cv_hp + bytegap;
830 if (t >= (cb->cwndvector + (cb->cv_size/8)))
831 t -= (cb->cv_size / 8); /* ackvector wrapped */
832 cb->cv_hp = t;
833 cb->cv_hs += bytegap*8;
834 return 1;
835 }
836
837
838 /* Receiver side */
839
840
841 /* Functions declared in struct dccp_cc_sw */
842
843 /* Initialises the receiver side
844 * returns: pointer to a tcplike_recv_ccb struct on success, otherwise 0
845 */
846 void *
847 tcplike_recv_init(struct dccpcb *pcb)
848 {
849 struct tcplike_recv_ccb *ccb;
850
851 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_init()\n"));
852
853 ccb = malloc(sizeof (struct tcplike_recv_ccb), M_PCB, M_NOWAIT | M_ZERO);
854 if (ccb == 0) {
855 TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_recv_ccb!\n"));
856 dccpstat.tcplikes_recv_memerr++;
857 return 0;
858 }
859
860 memset(ccb, 0, sizeof (struct tcplike_recv_ccb));
861
862 ccb->pcb = pcb;
863 ccb->unacked = 0;
864 ccb->pcb->ack_ratio = 2;
865
866 ccb->pcb->remote_ackvector = 1;
867 dccp_use_ackvector(ccb->pcb);
868
869 callout_init(&ccb->free_timer, 0);
870
871 mutex_init(&(ccb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
872
873 TCPLIKE_DEBUG((LOG_INFO, "TCPlike receiver initialised!\n"));
874 dccpstat.tcplikes_recv_conn++;
875 return ccb;
876 }
877
878 void tcplike_recv_term(void *ccb)
879 {
880 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
881 if (ccb == 0)
882 return;
883
884 mutex_destroy(&(cb->mutex));
885 free(cb, M_PCB);
886 TCPLIKE_DEBUG((LOG_INFO, "TCP-like receiver is destroyed\n"));
887 }
888
889 /* Free the receiver side
890 * args: ccb - ccb of recevier
891 */
892 void
893 tcplike_recv_free(void *ccb)
894 {
895 struct ack_list *a;
896 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
897
898 LOSS_DEBUG((LOG_INFO, "Entering tcplike_recv_free()\n"));
899
900 if (ccb == 0)
901 return;
902
903 mutex_enter(&(cb->mutex));
904
905 a = cb->av_list;
906 while (a) {
907 cb->av_list = a->next;
908 free(a, M_TEMP);
909 a = cb->av_list;
910 }
911
912 cb->pcb->av_size = 0;
913 free(cb->pcb->ackvector, M_PCB);
914
915 mutex_exit(&(cb->mutex));
916 callout_reset(&cb->free_timer, 10 * hz, tcplike_recv_term, (void *)cb);
917 }
918
919 /*
920 * Tell TCPlike that a packet has been received
921 * args: ccb - ccb block for current connection
922 */
923 void
924 tcplike_recv_packet_recv(void *ccb, char *options, int optlen)
925 {
926 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
927 u_char ackvector[16];
928 u_int16_t avsize;
929 u_char av_rcv[10];
930
931 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_packet()\n"));
932
933 mutex_enter(&(cb->mutex));
934
935 if (cb->pcb->type_rcv == DCCP_TYPE_DATA ||
936 cb->pcb->type_rcv == DCCP_TYPE_DATAACK)
937 dccpstat.tcplikes_recv_datarecv++;
938
939 /* Grab Ack Vector 0 or 1 */
940 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av_rcv,10);
941 if (avsize == 0)
942 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av_rcv,10);
943
944 /* We are only interested in acks-on-acks here.
945 * The "real" ack handling is done be the sender */
946 if (avsize == 0 && cb->pcb->ack_rcv) {
947 u_int64_t ackthru;
948 /* We got an Ack without an ackvector.
949 * This would mean it's an ack on an ack.
950 */
951 ackthru = _avlist_get(cb, cb->pcb->ack_rcv);
952 ACK_DEBUG((LOG_INFO, "GOT Ack without Ackvector; Ackthru: %u\n", ackthru));
953 if (ackthru) {
954 dccp_update_ackvector(cb->pcb, ackthru);
955 dccpstat.tcplikes_recv_ackack++;
956 }
957 } else if (avsize > 0 && cb->pcb->ack_rcv) {
958 /* We received an AckVector */
959 u_int32_t acknum, ackthru;
960 int i;
961 ACK_DEBUG((LOG_INFO, "GOT Ack with Ackvector\n"));
962 /* gotta loop through the ackvector */
963 acknum = cb->pcb->ack_rcv;
964 for (i=0; i<avsize; i++) {
965 u_int8_t state, len;
966 state = (av_rcv[i] & 0xc0) >> 6;
967 len = (av_rcv[i] & 0x2f) + 1;
968 if (state != 0) {
969 /* Drops in ackvector! Will be noted and taken care of by the sender part */
970 ACK_DEBUG((LOG_INFO, "Packets %u - %u are FUCKED\n",acknum-len, acknum));
971 continue;
972 }
973
974 while (len>0) {
975 ackthru = _avlist_get(cb, acknum);
976 ACK_DEBUG((LOG_INFO, "Ackthru: %u\n", ackthru));
977 if (ackthru) {
978 dccp_update_ackvector(cb->pcb, ackthru);
979 dccpstat.tcplikes_recv_ackack++;
980 }
981 acknum--;
982 len--;
983 }
984 }
985 }
986
987 ACK_DEBUG((LOG_INFO, "Adding %llu to local ackvector\n", cb->pcb->seq_rcv));
988 dccp_increment_ackvector(cb->pcb, cb->pcb->seq_rcv);
989 cb->unacked++;
990
991 if (cb->unacked >= cb->pcb->ack_ratio) {
992 /* Time to send an Ack */
993
994 avsize = dccp_generate_ackvector(cb->pcb, ackvector);
995 TCPLIKE_DEBUG((LOG_INFO, "recv_packet avsize %d ackvector %d\n", avsize, ackvector));
996 cb->unacked = 0;
997 if (avsize > 0) {
998 dccp_add_option(cb->pcb, DCCP_OPT_ACK_VECTOR0, ackvector, avsize);
999 cb->pcb->ack_snd = cb->pcb->seq_rcv;
1000 _avlist_add(cb, cb->pcb->seq_snd+1, cb->pcb->ack_snd);
1001 ACK_DEBUG((LOG_INFO, "Recvr: Sending Ack (%llu) w/ Ack Vector\n", cb->pcb->ack_snd));
1002 dccpstat.tcplikes_recv_acksent++;
1003 dccp_output(cb->pcb, 1);
1004 }
1005 }
1006 mutex_exit(&(cb->mutex));
1007 }
1008
1009 void
1010 _avlist_add(struct tcplike_recv_ccb *cb, u_int64_t localseq, u_int64_t ackthru)
1011 {
1012 struct ack_list *a;
1013 ACK_DEBUG((LOG_INFO,"Adding localseq %u - ackthru %u to avlist\n", localseq, ackthru));
1014 /*MALLOC_DEBUG((LOG_INFO, "New ack_list, %u\n", sizeof (struct ack_list)));*/
1015 a = malloc(sizeof(struct ack_list), M_TEMP, M_NOWAIT);
1016 if (a == NULL) {
1017 MALLOC_DEBUG((LOG_INFO, "avlist_add: FAILED\n"));
1018 dccpstat.tcplikes_recv_memerr++;
1019 return;
1020 }
1021 memset(a, 0, sizeof(struct ack_list));
1022 a->localseq = localseq;
1023 a->ackthru = ackthru;
1024 a->next = cb->av_list;
1025 cb->av_list = a;
1026 }
1027
1028 /*
1029 * Searches the av_list. if 'localseq' found, drop it from list and return
1030 * ackthru
1031 */
1032 u_int64_t
1033 _avlist_get(struct tcplike_recv_ccb *cb, u_int64_t localseq)
1034 {
1035 struct ack_list *a, *n, *p;
1036 u_int64_t ackthru;
1037
1038 ACK_DEBUG((LOG_INFO,"Getting localseq %u from avlist\n", localseq));
1039 a = cb->av_list;
1040 p = 0;
1041 while (a) {
1042 n = a->next;
1043 if (a->localseq == localseq) {
1044 if (p)
1045 p->next = n;
1046 else
1047 cb->av_list = n;
1048 ackthru = a->ackthru;
1049 /*MALLOC_DEBUG((LOG_INFO, "Freeing element %u in ack_list\n", a->localseq));*/
1050 free(a, M_TEMP);
1051 return ackthru;
1052 }
1053 p = a;
1054 a = n;
1055 }
1056 /* Not found. return 0 */
1057 return 0;
1058 }
1059
1060 /*
1061 int tcplike_option_recv(void);
1062 */
1063