altq_red.c revision 1.13.8.2 1 /* $NetBSD: altq_red.c,v 1.13.8.2 2006/06/26 12:44:22 yamt Exp $ */
2 /* $KAME: altq_red.c,v 1.9 2002/01/07 11:25:40 kjc Exp $ */
3
4 /*
5 * Copyright (C) 1997-2000
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.13.8.2 2006/06/26 12:44:22 yamt Exp $");
65
66 #if defined(__FreeBSD__) || defined(__NetBSD__)
67 #include "opt_altq.h"
68 #if (__FreeBSD__ != 2)
69 #include "opt_inet.h"
70 #ifdef __FreeBSD__
71 #include "opt_inet6.h"
72 #endif
73 #endif
74 #endif /* __FreeBSD__ || __NetBSD__ */
75 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
76
77 #include <sys/param.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/socket.h>
81 #include <sys/sockio.h>
82 #include <sys/systm.h>
83 #include <sys/proc.h>
84 #include <sys/errno.h>
85 #include <sys/kernel.h>
86 #include <sys/kauth.h>
87 #ifdef ALTQ_FLOWVALVE
88 #include <sys/queue.h>
89 #include <sys/time.h>
90 #endif
91
92 #include <net/if.h>
93 #include <net/if_types.h>
94
95 #include <netinet/in.h>
96 #include <netinet/in_systm.h>
97 #include <netinet/ip.h>
98 #ifdef INET6
99 #include <netinet/ip6.h>
100 #endif
101
102 #include <altq/altq.h>
103 #include <altq/altq_conf.h>
104 #include <altq/altq_red.h>
105 #ifdef ALTQ_FLOWVALVE
106 #include <altq/altq_flowvalve.h>
107 #endif
108
109 /*
110 * ALTQ/RED (Random Early Detection) implementation using 32-bit
111 * fixed-point calculation.
112 *
113 * written by kjc using the ns code as a reference.
114 * you can learn more about red and ns from Sally's home page at
115 * http://www-nrg.ee.lbl.gov/floyd/
116 *
117 * most of the red parameter values are fixed in this implementation
118 * to prevent fixed-point overflow/underflow.
119 * if you change the parameters, watch out for overflow/underflow!
120 *
121 * the parameters used are recommended values by Sally.
122 * the corresponding ns config looks:
123 * q_weight=0.00195
124 * minthresh=5 maxthresh=15 queue-size=60
125 * linterm=30
126 * dropmech=drop-tail
127 * bytes=false (can't be handled by 32-bit fixed-point)
128 * doubleq=false dqthresh=false
129 * wait=true
130 */
131 /*
132 * alternative red parameters for a slow link.
133 *
134 * assume the queue length becomes from zero to L and keeps L, it takes
135 * N packets for q_avg to reach 63% of L.
136 * when q_weight is 0.002, N is about 500 packets.
137 * for a slow link like dial-up, 500 packets takes more than 1 minute!
138 * when q_weight is 0.008, N is about 127 packets.
139 * when q_weight is 0.016, N is about 63 packets.
140 * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
141 * are allowed for 0.016.
142 * see Sally's paper for more details.
143 */
144 /* normal red parameters */
145 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
146 /* q_weight = 0.00195 */
147
148 /* red parameters for a slow link */
149 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
150 /* q_weight = 0.0078125 */
151
152 /* red parameters for a very slow link (e.g., dialup) */
153 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
154 /* q_weight = 0.015625 */
155
156 /* fixed-point uses 12-bit decimal places */
157 #define FP_SHIFT 12 /* fixed-point shift */
158
159 /* red parameters for drop probability */
160 #define INV_P_MAX 10 /* inverse of max drop probability */
161 #define TH_MIN 5 /* min threshold */
162 #define TH_MAX 15 /* max threshold */
163
164 #define RED_LIMIT 60 /* default max queue length */
165
166 /*
167 * our default policy for forced-drop is drop-tail.
168 * (in altq-1.1.2 or earlier, the default was random-drop.
169 * but it makes more sense to punish the cause of the surge.)
170 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
171 */
172
173 #ifdef ALTQ_FLOWVALVE
174 /*
175 * flow-valve is an extension to protect red from unresponsive flows
176 * and to promote end-to-end congestion control.
177 * flow-valve observes the average drop rates of the flows that have
178 * experienced packet drops in the recent past.
179 * when the average drop rate exceeds the threshold, the flow is
180 * blocked by the flow-valve. the trapped flow should back off
181 * exponentially to escape from the flow-valve.
182 */
183 #ifdef RED_RANDOM_DROP
184 #error "random-drop can't be used with flow-valve!"
185 #endif
186 #endif /* ALTQ_FLOWVALVE */
187
188 /* red_list keeps all red_queue_t's allocated. */
189 static red_queue_t *red_list = NULL;
190
191 /* default red parameter values */
192 static int default_th_min = TH_MIN;
193 static int default_th_max = TH_MAX;
194 static int default_inv_pmax = INV_P_MAX;
195
196 /* internal function prototypes */
197 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
198 struct altq_pktattr *));
199 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
200 static int red_request __P((struct ifaltq *, int, void *));
201 static void red_purgeq __P((red_queue_t *));
202 static int red_detach __P((red_queue_t *));
203 #ifdef ALTQ_FLOWVALVE
204 static inline struct fve *flowlist_lookup __P((struct flowvalve *,
205 struct altq_pktattr *, struct timeval *));
206 static inline struct fve *flowlist_reclaim __P((struct flowvalve *,
207 struct altq_pktattr *));
208 static inline void flowlist_move_to_head __P((struct flowvalve *,
209 struct fve *));
210 static inline int fv_p2f __P((struct flowvalve *, int));
211 static struct flowvalve *fv_alloc __P((struct red *));
212 static void fv_destroy __P((struct flowvalve *));
213 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
214 struct fve **));
215 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
216 struct fve *));
217 #endif
218
219 /*
220 * red device interface
221 */
222 altqdev_decl(red);
223
224 int
225 redopen(dev, flag, fmt, l)
226 dev_t dev;
227 int flag, fmt;
228 struct lwp *l;
229 {
230 /* everything will be done when the queueing scheme is attached. */
231 return 0;
232 }
233
234 int
235 redclose(dev, flag, fmt, l)
236 dev_t dev;
237 int flag, fmt;
238 struct lwp *l;
239 {
240 red_queue_t *rqp;
241 int err, error = 0;
242
243 while ((rqp = red_list) != NULL) {
244 /* destroy all */
245 err = red_detach(rqp);
246 if (err != 0 && error == 0)
247 error = err;
248 }
249
250 return error;
251 }
252
253 int
254 redioctl(dev, cmd, addr, flag, l)
255 dev_t dev;
256 ioctlcmd_t cmd;
257 caddr_t addr;
258 int flag;
259 struct lwp *l;
260 {
261 red_queue_t *rqp;
262 struct red_interface *ifacep;
263 struct ifnet *ifp;
264 struct proc *p = l->l_proc;
265 int error = 0;
266
267 /* check super-user privilege */
268 switch (cmd) {
269 case RED_GETSTATS:
270 break;
271 default:
272 #if (__FreeBSD_version > 400000)
273 if ((error = suser(p)) != 0)
274 #else
275 if ((error = kauth_authorize_generic(p->p_cred,
276 KAUTH_GENERIC_ISSUSER,
277 &p->p_acflag)) != 0)
278 #endif
279 return (error);
280 break;
281 }
282
283 switch (cmd) {
284
285 case RED_ENABLE:
286 ifacep = (struct red_interface *)addr;
287 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
288 error = EBADF;
289 break;
290 }
291 error = altq_enable(rqp->rq_ifq);
292 break;
293
294 case RED_DISABLE:
295 ifacep = (struct red_interface *)addr;
296 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
297 error = EBADF;
298 break;
299 }
300 error = altq_disable(rqp->rq_ifq);
301 break;
302
303 case RED_IF_ATTACH:
304 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
305 if (ifp == NULL) {
306 error = ENXIO;
307 break;
308 }
309
310 /* allocate and initialize red_queue_t */
311 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
312 if (rqp == NULL) {
313 error = ENOMEM;
314 break;
315 }
316
317 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
318 M_WAITOK|M_ZERO);
319 if (rqp->rq_q == NULL) {
320 free(rqp, M_DEVBUF);
321 error = ENOMEM;
322 break;
323 }
324
325 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
326 if (rqp->rq_red == NULL) {
327 free(rqp->rq_q, M_DEVBUF);
328 free(rqp, M_DEVBUF);
329 error = ENOMEM;
330 break;
331 }
332
333 rqp->rq_ifq = &ifp->if_snd;
334 qtail(rqp->rq_q) = NULL;
335 qlen(rqp->rq_q) = 0;
336 qlimit(rqp->rq_q) = RED_LIMIT;
337 qtype(rqp->rq_q) = Q_RED;
338
339 /*
340 * set RED to this ifnet structure.
341 */
342 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
343 red_enqueue, red_dequeue, red_request,
344 NULL, NULL);
345 if (error) {
346 red_destroy(rqp->rq_red);
347 free(rqp->rq_q, M_DEVBUF);
348 free(rqp, M_DEVBUF);
349 break;
350 }
351
352 /* add this state to the red list */
353 rqp->rq_next = red_list;
354 red_list = rqp;
355 break;
356
357 case RED_IF_DETACH:
358 ifacep = (struct red_interface *)addr;
359 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
360 error = EBADF;
361 break;
362 }
363 error = red_detach(rqp);
364 break;
365
366 case RED_GETSTATS:
367 do {
368 struct red_stats *q_stats;
369 red_t *rp;
370
371 q_stats = (struct red_stats *)addr;
372 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
373 ALTQT_RED)) == NULL) {
374 error = EBADF;
375 break;
376 }
377
378 q_stats->q_len = qlen(rqp->rq_q);
379 q_stats->q_limit = qlimit(rqp->rq_q);
380
381 rp = rqp->rq_red;
382 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
383 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
384 q_stats->drop_cnt = rp->red_stats.drop_cnt;
385 q_stats->drop_forced = rp->red_stats.drop_forced;
386 q_stats->drop_unforced = rp->red_stats.drop_unforced;
387 q_stats->marked_packets = rp->red_stats.marked_packets;
388
389 q_stats->weight = rp->red_weight;
390 q_stats->inv_pmax = rp->red_inv_pmax;
391 q_stats->th_min = rp->red_thmin;
392 q_stats->th_max = rp->red_thmax;
393
394 #ifdef ALTQ_FLOWVALVE
395 if (rp->red_flowvalve != NULL) {
396 struct flowvalve *fv = rp->red_flowvalve;
397 q_stats->fv_flows = fv->fv_flows;
398 q_stats->fv_pass = fv->fv_stats.pass;
399 q_stats->fv_predrop = fv->fv_stats.predrop;
400 q_stats->fv_alloc = fv->fv_stats.alloc;
401 q_stats->fv_escape = fv->fv_stats.escape;
402 } else {
403 #endif /* ALTQ_FLOWVALVE */
404 q_stats->fv_flows = 0;
405 q_stats->fv_pass = 0;
406 q_stats->fv_predrop = 0;
407 q_stats->fv_alloc = 0;
408 q_stats->fv_escape = 0;
409 #ifdef ALTQ_FLOWVALVE
410 }
411 #endif /* ALTQ_FLOWVALVE */
412 } while (0);
413 break;
414
415 case RED_CONFIG:
416 do {
417 struct red_conf *fc;
418 red_t *new;
419 int s, limit;
420
421 fc = (struct red_conf *)addr;
422 if ((rqp = altq_lookup(fc->iface.red_ifname,
423 ALTQT_RED)) == NULL) {
424 error = EBADF;
425 break;
426 }
427 new = red_alloc(fc->red_weight,
428 fc->red_inv_pmax,
429 fc->red_thmin,
430 fc->red_thmax,
431 fc->red_flags,
432 fc->red_pkttime);
433 if (new == NULL) {
434 error = ENOMEM;
435 break;
436 }
437
438 s = splnet();
439 red_purgeq(rqp);
440 limit = fc->red_limit;
441 if (limit < fc->red_thmax)
442 limit = fc->red_thmax;
443 qlimit(rqp->rq_q) = limit;
444 fc->red_limit = limit; /* write back the new value */
445
446 red_destroy(rqp->rq_red);
447 rqp->rq_red = new;
448
449 splx(s);
450
451 /* write back new values */
452 fc->red_limit = limit;
453 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
454 fc->red_thmin = rqp->rq_red->red_thmin;
455 fc->red_thmax = rqp->rq_red->red_thmax;
456
457 } while (0);
458 break;
459
460 case RED_SETDEFAULTS:
461 do {
462 struct redparams *rp;
463
464 rp = (struct redparams *)addr;
465
466 default_th_min = rp->th_min;
467 default_th_max = rp->th_max;
468 default_inv_pmax = rp->inv_pmax;
469 } while (0);
470 break;
471
472 default:
473 error = EINVAL;
474 break;
475 }
476 return error;
477 }
478
479 static int
480 red_detach(rqp)
481 red_queue_t *rqp;
482 {
483 red_queue_t *tmp;
484 int error = 0;
485
486 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
487 altq_disable(rqp->rq_ifq);
488
489 if ((error = altq_detach(rqp->rq_ifq)))
490 return (error);
491
492 if (red_list == rqp)
493 red_list = rqp->rq_next;
494 else {
495 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
496 if (tmp->rq_next == rqp) {
497 tmp->rq_next = rqp->rq_next;
498 break;
499 }
500 if (tmp == NULL)
501 printf("red_detach: no state found in red_list!\n");
502 }
503
504 red_destroy(rqp->rq_red);
505 free(rqp->rq_q, M_DEVBUF);
506 free(rqp, M_DEVBUF);
507 return (error);
508 }
509
510 /*
511 * red support routines
512 */
513
514 red_t *
515 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
516 int weight, inv_pmax, th_min, th_max;
517 int flags, pkttime;
518 {
519 red_t *rp;
520 int w, i;
521 int npkts_per_sec;
522
523 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
524 if (rp == NULL)
525 return (NULL);
526
527 rp->red_avg = 0;
528 rp->red_idle = 1;
529
530 if (weight == 0)
531 rp->red_weight = W_WEIGHT;
532 else
533 rp->red_weight = weight;
534 if (inv_pmax == 0)
535 rp->red_inv_pmax = default_inv_pmax;
536 else
537 rp->red_inv_pmax = inv_pmax;
538 if (th_min == 0)
539 rp->red_thmin = default_th_min;
540 else
541 rp->red_thmin = th_min;
542 if (th_max == 0)
543 rp->red_thmax = default_th_max;
544 else
545 rp->red_thmax = th_max;
546
547 rp->red_flags = flags;
548
549 if (pkttime == 0)
550 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
551 rp->red_pkttime = 800;
552 else
553 rp->red_pkttime = pkttime;
554
555 if (weight == 0) {
556 /* when the link is very slow, adjust red parameters */
557 npkts_per_sec = 1000000 / rp->red_pkttime;
558 if (npkts_per_sec < 50) {
559 /* up to about 400Kbps */
560 rp->red_weight = W_WEIGHT_2;
561 } else if (npkts_per_sec < 300) {
562 /* up to about 2.4Mbps */
563 rp->red_weight = W_WEIGHT_1;
564 }
565 }
566
567 /* calculate wshift. weight must be power of 2 */
568 w = rp->red_weight;
569 for (i = 0; w > 1; i++)
570 w = w >> 1;
571 rp->red_wshift = i;
572 w = 1 << rp->red_wshift;
573 if (w != rp->red_weight) {
574 printf("invalid weight value %d for red! use %d\n",
575 rp->red_weight, w);
576 rp->red_weight = w;
577 }
578
579 /*
580 * thmin_s and thmax_s are scaled versions of th_min and th_max
581 * to be compared with avg.
582 */
583 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
584 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
585
586 /*
587 * precompute probability denominator
588 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
589 */
590 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
591 * rp->red_inv_pmax) << FP_SHIFT;
592
593 /* allocate weight table */
594 rp->red_wtab = wtab_alloc(rp->red_weight);
595
596 microtime(&rp->red_last);
597 #ifdef ALTQ_FLOWVALVE
598 if (flags & REDF_FLOWVALVE)
599 rp->red_flowvalve = fv_alloc(rp);
600 /* if fv_alloc failes, flowvalve is just disabled */
601 #endif
602 return (rp);
603 }
604
605 void
606 red_destroy(rp)
607 red_t *rp;
608 {
609 #ifdef ALTQ_FLOWVALVE
610 if (rp->red_flowvalve != NULL)
611 fv_destroy(rp->red_flowvalve);
612 #endif
613 wtab_destroy(rp->red_wtab);
614 free(rp, M_DEVBUF);
615 }
616
617 void
618 red_getstats(rp, sp)
619 red_t *rp;
620 struct redstats *sp;
621 {
622 sp->q_avg = rp->red_avg >> rp->red_wshift;
623 sp->xmit_cnt = rp->red_stats.xmit_cnt;
624 sp->drop_cnt = rp->red_stats.drop_cnt;
625 sp->drop_forced = rp->red_stats.drop_forced;
626 sp->drop_unforced = rp->red_stats.drop_unforced;
627 sp->marked_packets = rp->red_stats.marked_packets;
628 }
629
630 /*
631 * enqueue routine:
632 *
633 * returns: 0 when successfully queued.
634 * ENOBUFS when drop occurs.
635 */
636 static int
637 red_enqueue(ifq, m, pktattr)
638 struct ifaltq *ifq;
639 struct mbuf *m;
640 struct altq_pktattr *pktattr;
641 {
642 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
643
644 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
645 return ENOBUFS;
646 ifq->ifq_len++;
647 return 0;
648 }
649
650 int
651 red_addq(rp, q, m, pktattr)
652 red_t *rp;
653 class_queue_t *q;
654 struct mbuf *m;
655 struct altq_pktattr *pktattr;
656 {
657 int avg, droptype;
658 int n;
659 #ifdef ALTQ_FLOWVALVE
660 struct fve *fve = NULL;
661
662 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
663 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
664 m_freem(m);
665 return (-1);
666 }
667 #endif
668
669 avg = rp->red_avg;
670
671 /*
672 * if we were idle, we pretend that n packets arrived during
673 * the idle period.
674 */
675 if (rp->red_idle) {
676 struct timeval now;
677 int t;
678
679 rp->red_idle = 0;
680 microtime(&now);
681 t = (now.tv_sec - rp->red_last.tv_sec);
682 if (t > 60) {
683 /*
684 * being idle for more than 1 minute, set avg to zero.
685 * this prevents t from overflow.
686 */
687 avg = 0;
688 } else {
689 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
690 n = t / rp->red_pkttime - 1;
691
692 /* the following line does (avg = (1 - Wq)^n * avg) */
693 if (n > 0)
694 avg = (avg >> FP_SHIFT) *
695 pow_w(rp->red_wtab, n);
696 }
697 }
698
699 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
700 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
701 rp->red_avg = avg; /* save the new value */
702
703 /*
704 * red_count keeps a tally of arriving traffic that has not
705 * been dropped.
706 */
707 rp->red_count++;
708
709 /* see if we drop early */
710 droptype = DTYPE_NODROP;
711 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
712 if (avg >= rp->red_thmax_s) {
713 /* avg >= th_max: forced drop */
714 droptype = DTYPE_FORCED;
715 } else if (rp->red_old == 0) {
716 /* first exceeds th_min */
717 rp->red_count = 1;
718 rp->red_old = 1;
719 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
720 rp->red_probd, rp->red_count)) {
721 /* mark or drop by red */
722 if ((rp->red_flags & REDF_ECN) &&
723 mark_ecn(m, pktattr, rp->red_flags)) {
724 /* successfully marked. do not drop. */
725 rp->red_count = 0;
726 #ifdef RED_STATS
727 rp->red_stats.marked_packets++;
728 #endif
729 } else {
730 /* unforced drop by red */
731 droptype = DTYPE_EARLY;
732 }
733 }
734 } else {
735 /* avg < th_min */
736 rp->red_old = 0;
737 }
738
739 /*
740 * if the queue length hits the hard limit, it's a forced drop.
741 */
742 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
743 droptype = DTYPE_FORCED;
744
745 #ifdef RED_RANDOM_DROP
746 /* if successful or forced drop, enqueue this packet. */
747 if (droptype != DTYPE_EARLY)
748 _addq(q, m);
749 #else
750 /* if successful, enqueue this packet. */
751 if (droptype == DTYPE_NODROP)
752 _addq(q, m);
753 #endif
754 if (droptype != DTYPE_NODROP) {
755 if (droptype == DTYPE_EARLY) {
756 /* drop the incoming packet */
757 #ifdef RED_STATS
758 rp->red_stats.drop_unforced++;
759 #endif
760 } else {
761 /* forced drop, select a victim packet in the queue. */
762 #ifdef RED_RANDOM_DROP
763 m = _getq_random(q);
764 #endif
765 #ifdef RED_STATS
766 rp->red_stats.drop_forced++;
767 #endif
768 }
769 #ifdef RED_STATS
770 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
771 #endif
772 rp->red_count = 0;
773 #ifdef ALTQ_FLOWVALVE
774 if (rp->red_flowvalve != NULL)
775 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
776 #endif
777 m_freem(m);
778 return (-1);
779 }
780 /* successfully queued */
781 #ifdef RED_STATS
782 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
783 #endif
784 return (0);
785 }
786
787 /*
788 * early-drop probability is calculated as follows:
789 * prob = p_max * (avg - th_min) / (th_max - th_min)
790 * prob_a = prob / (2 - count*prob)
791 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
792 * here prob_a increases as successive undrop count increases.
793 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
794 * becomes 1 when (count >= (2 / prob))).
795 */
796 int
797 drop_early(fp_len, fp_probd, count)
798 int fp_len; /* (avg - TH_MIN) in fixed-point */
799 int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
800 int count; /* how many successive undropped packets */
801 {
802 int d; /* denominator of drop-probability */
803
804 d = fp_probd - count * fp_len;
805 if (d <= 0)
806 /* count exceeds the hard limit: drop or mark */
807 return (1);
808
809 /*
810 * now the range of d is [1..600] in fixed-point. (when
811 * th_max-th_min=10 and p_max=1/30)
812 * drop probability = (avg - TH_MIN) / d
813 */
814
815 if ((random() % d) < fp_len) {
816 /* drop or mark */
817 return (1);
818 }
819 /* no drop/mark */
820 return (0);
821 }
822
823 /*
824 * try to mark CE bit to the packet.
825 * returns 1 if successfully marked, 0 otherwise.
826 */
827 int
828 mark_ecn(m, pktattr, flags)
829 struct mbuf *m;
830 struct altq_pktattr *pktattr;
831 int flags;
832 {
833 struct mbuf *m0;
834
835 if (pktattr == NULL ||
836 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
837 return (0);
838
839 /* verify that pattr_hdr is within the mbuf data */
840 for (m0 = m; m0 != NULL; m0 = m0->m_next)
841 if ((pktattr->pattr_hdr >= m0->m_data) &&
842 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
843 break;
844 if (m0 == NULL) {
845 /* ick, pattr_hdr is stale */
846 pktattr->pattr_af = AF_UNSPEC;
847 return (0);
848 }
849
850 switch (pktattr->pattr_af) {
851 case AF_INET:
852 if (flags & REDF_ECN4) {
853 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
854 u_int8_t otos;
855 int sum;
856
857 if (ip->ip_v != 4)
858 return (0); /* version mismatch! */
859
860 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
861 return (0); /* not-ECT */
862 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
863 return (1); /* already marked */
864
865 /*
866 * ecn-capable but not marked,
867 * mark CE and update checksum
868 */
869 otos = ip->ip_tos;
870 ip->ip_tos |= IPTOS_ECN_CE;
871 /*
872 * update checksum (from RFC1624)
873 * HC' = ~(~HC + ~m + m')
874 */
875 sum = ~ntohs(ip->ip_sum) & 0xffff;
876 sum += (~otos & 0xffff) + ip->ip_tos;
877 sum = (sum >> 16) + (sum & 0xffff);
878 sum += (sum >> 16); /* add carry */
879 ip->ip_sum = htons(~sum & 0xffff);
880 return (1);
881 }
882 break;
883 #ifdef INET6
884 case AF_INET6:
885 if (flags & REDF_ECN6) {
886 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
887 u_int32_t flowlabel;
888
889 flowlabel = ntohl(ip6->ip6_flow);
890 if ((flowlabel >> 28) != 6)
891 return (0); /* version mismatch! */
892 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
893 (IPTOS_ECN_NOTECT << 20))
894 return (0); /* not-ECT */
895 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
896 (IPTOS_ECN_CE << 20))
897 return (1); /* already marked */
898 /*
899 * ecn-capable but not marked, mark CE
900 */
901 flowlabel |= (IPTOS_ECN_CE << 20);
902 ip6->ip6_flow = htonl(flowlabel);
903 return (1);
904 }
905 break;
906 #endif /* INET6 */
907 }
908
909 /* not marked */
910 return (0);
911 }
912
913 /*
914 * dequeue routine:
915 * must be called in splnet.
916 *
917 * returns: mbuf dequeued.
918 * NULL when no packet is available in the queue.
919 */
920
921 static struct mbuf *
922 red_dequeue(ifq, op)
923 struct ifaltq *ifq;
924 int op;
925 {
926 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
927 struct mbuf *m;
928
929 if (op == ALTDQ_POLL)
930 return qhead(rqp->rq_q);
931
932 /* op == ALTDQ_REMOVE */
933 m = red_getq(rqp->rq_red, rqp->rq_q);
934 if (m != NULL)
935 ifq->ifq_len--;
936 return (m);
937 }
938
939 struct mbuf *
940 red_getq(rp, q)
941 red_t *rp;
942 class_queue_t *q;
943 {
944 struct mbuf *m;
945
946 if ((m = _getq(q)) == NULL) {
947 if (rp->red_idle == 0) {
948 rp->red_idle = 1;
949 microtime(&rp->red_last);
950 }
951 return NULL;
952 }
953
954 rp->red_idle = 0;
955 return (m);
956 }
957
958 static int
959 red_request(ifq, req, arg)
960 struct ifaltq *ifq;
961 int req;
962 void *arg;
963 {
964 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
965
966 switch (req) {
967 case ALTRQ_PURGE:
968 red_purgeq(rqp);
969 break;
970 }
971 return (0);
972 }
973
974 static void
975 red_purgeq(rqp)
976 red_queue_t *rqp;
977 {
978 _flushq(rqp->rq_q);
979 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
980 rqp->rq_ifq->ifq_len = 0;
981 }
982
983
984 /*
985 * helper routine to calibrate avg during idle.
986 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
987 * here Wq = 1/weight and the code assumes Wq is close to zero.
988 *
989 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
990 */
991 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
992
993 struct wtab *
994 wtab_alloc(weight)
995 int weight;
996 {
997 struct wtab *w;
998 int i;
999
1000 for (w = wtab_list; w != NULL; w = w->w_next)
1001 if (w->w_weight == weight) {
1002 w->w_refcount++;
1003 return (w);
1004 }
1005
1006 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
1007 if (w == NULL)
1008 panic("wtab_alloc: malloc failed!");
1009 w->w_weight = weight;
1010 w->w_refcount = 1;
1011 w->w_next = wtab_list;
1012 wtab_list = w;
1013
1014 /* initialize the weight table */
1015 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
1016 for (i = 1; i < 32; i++) {
1017 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
1018 if (w->w_tab[i] == 0 && w->w_param_max == 0)
1019 w->w_param_max = 1 << i;
1020 }
1021
1022 return (w);
1023 }
1024
1025 int
1026 wtab_destroy(w)
1027 struct wtab *w;
1028 {
1029 struct wtab *prev;
1030
1031 if (--w->w_refcount > 0)
1032 return (0);
1033
1034 if (wtab_list == w)
1035 wtab_list = w->w_next;
1036 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
1037 if (prev->w_next == w) {
1038 prev->w_next = w->w_next;
1039 break;
1040 }
1041
1042 free(w, M_DEVBUF);
1043 return (0);
1044 }
1045
1046 int32_t
1047 pow_w(w, n)
1048 struct wtab *w;
1049 int n;
1050 {
1051 int i, bit;
1052 int32_t val;
1053
1054 if (n >= w->w_param_max)
1055 return (0);
1056
1057 val = 1 << FP_SHIFT;
1058 if (n <= 0)
1059 return (val);
1060
1061 bit = 1;
1062 i = 0;
1063 while (n) {
1064 if (n & bit) {
1065 val = (val * w->w_tab[i]) >> FP_SHIFT;
1066 n &= ~bit;
1067 }
1068 i++;
1069 bit <<= 1;
1070 }
1071 return (val);
1072 }
1073
1074 #ifdef ALTQ_FLOWVALVE
1075
1076 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1077 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1078 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1079 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1080 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1081 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1082
1083 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1084 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1085
1086 #define FV_N 10 /* update fve_f every FV_N packets */
1087
1088 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1089 #define FV_TTHRESH 3 /* time threshold to delete fve */
1090 #define FV_ALPHA 5 /* extra packet count */
1091
1092 #if (__FreeBSD_version > 300000) || defined(__HAVE_TIMECOUNTER)
1093 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1094 #else
1095 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1096 #endif
1097
1098 /*
1099 * Brtt table: 127 entry table to convert drop rate (p) to
1100 * the corresponding bandwidth fraction (f)
1101 * the following equation is implemented to use scaled values,
1102 * fve_p and fve_f, in the fixed point format.
1103 *
1104 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1105 * f = Brtt(p) / (max_th + alpha)
1106 */
1107 #define BRTT_SIZE 128
1108 #define BRTT_SHIFT 12
1109 #define BRTT_MASK 0x0007f000
1110 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1111
1112 const int brtt_tab[BRTT_SIZE] = {
1113 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1114 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1115 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1116 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1117 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1118 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1119 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1120 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1121 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1122 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1123 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1124 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1125 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1126 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1127 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1128 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1129 };
1130
1131 static inline struct fve *
1132 flowlist_lookup(fv, pktattr, now)
1133 struct flowvalve *fv;
1134 struct altq_pktattr *pktattr;
1135 struct timeval *now;
1136 {
1137 struct fve *fve;
1138 int flows;
1139 struct ip *ip;
1140 #ifdef INET6
1141 struct ip6_hdr *ip6;
1142 #endif
1143 struct timeval tthresh;
1144
1145 if (pktattr == NULL)
1146 return (NULL);
1147
1148 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1149 flows = 0;
1150 /*
1151 * search the flow list
1152 */
1153 switch (pktattr->pattr_af) {
1154 case AF_INET:
1155 ip = (struct ip *)pktattr->pattr_hdr;
1156 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1157 if (fve->fve_lastdrop.tv_sec == 0)
1158 break;
1159 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1160 fve->fve_lastdrop.tv_sec = 0;
1161 break;
1162 }
1163 if (fve->fve_flow.flow_af == AF_INET &&
1164 fve->fve_flow.flow_ip.ip_src.s_addr ==
1165 ip->ip_src.s_addr &&
1166 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1167 ip->ip_dst.s_addr)
1168 return (fve);
1169 flows++;
1170 }
1171 break;
1172 #ifdef INET6
1173 case AF_INET6:
1174 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1175 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1176 if (fve->fve_lastdrop.tv_sec == 0)
1177 break;
1178 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1179 fve->fve_lastdrop.tv_sec = 0;
1180 break;
1181 }
1182 if (fve->fve_flow.flow_af == AF_INET6 &&
1183 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1184 &ip6->ip6_src) &&
1185 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1186 &ip6->ip6_dst))
1187 return (fve);
1188 flows++;
1189 }
1190 break;
1191 #endif /* INET6 */
1192
1193 default:
1194 /* unknown protocol. no drop. */
1195 return (NULL);
1196 }
1197 fv->fv_flows = flows; /* save the number of active fve's */
1198 return (NULL);
1199 }
1200
1201 static inline struct fve *
1202 flowlist_reclaim(fv, pktattr)
1203 struct flowvalve *fv;
1204 struct altq_pktattr *pktattr;
1205 {
1206 struct fve *fve;
1207 struct ip *ip;
1208 #ifdef INET6
1209 struct ip6_hdr *ip6;
1210 #endif
1211
1212 /*
1213 * get an entry from the tail of the LRU list.
1214 */
1215 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1216
1217 switch (pktattr->pattr_af) {
1218 case AF_INET:
1219 ip = (struct ip *)pktattr->pattr_hdr;
1220 fve->fve_flow.flow_af = AF_INET;
1221 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1222 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1223 break;
1224 #ifdef INET6
1225 case AF_INET6:
1226 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1227 fve->fve_flow.flow_af = AF_INET6;
1228 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1229 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1230 break;
1231 #endif
1232 }
1233
1234 fve->fve_state = Green;
1235 fve->fve_p = 0.0;
1236 fve->fve_f = 0.0;
1237 fve->fve_ifseq = fv->fv_ifseq - 1;
1238 fve->fve_count = 0;
1239
1240 fv->fv_flows++;
1241 #ifdef FV_STATS
1242 fv->fv_stats.alloc++;
1243 #endif
1244 return (fve);
1245 }
1246
1247 static inline void
1248 flowlist_move_to_head(fv, fve)
1249 struct flowvalve *fv;
1250 struct fve *fve;
1251 {
1252 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1253 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1254 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1255 }
1256 }
1257
1258 /*
1259 * allocate flowvalve structure
1260 */
1261 static struct flowvalve *
1262 fv_alloc(rp)
1263 struct red *rp;
1264 {
1265 struct flowvalve *fv;
1266 struct fve *fve;
1267 int i, num;
1268
1269 num = FV_FLOWLISTSIZE;
1270 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
1271 if (fv == NULL)
1272 return (NULL);
1273
1274 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
1275 M_WAITOK|M_ZERO);
1276 if (fv->fv_fves == NULL) {
1277 free(fv, M_DEVBUF);
1278 return (NULL);
1279 }
1280
1281 fv->fv_flows = 0;
1282 TAILQ_INIT(&fv->fv_flowlist);
1283 for (i = 0; i < num; i++) {
1284 fve = &fv->fv_fves[i];
1285 fve->fve_lastdrop.tv_sec = 0;
1286 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1287 }
1288
1289 /* initialize drop rate threshold in scaled fixed-point */
1290 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1291
1292 /* initialize drop rate to fraction table */
1293 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
1294 if (fv->fv_p2ftab == NULL) {
1295 free(fv->fv_fves, M_DEVBUF);
1296 free(fv, M_DEVBUF);
1297 return (NULL);
1298 }
1299 /*
1300 * create the p2f table.
1301 * (shift is used to keep the precision)
1302 */
1303 for (i = 1; i < BRTT_SIZE; i++) {
1304 int f;
1305
1306 f = brtt_tab[i] << 8;
1307 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1308 }
1309
1310 return (fv);
1311 }
1312
1313 static void fv_destroy(fv)
1314 struct flowvalve *fv;
1315 {
1316 free(fv->fv_p2ftab, M_DEVBUF);
1317 free(fv->fv_fves, M_DEVBUF);
1318 free(fv, M_DEVBUF);
1319 }
1320
1321 static inline int
1322 fv_p2f(fv, p)
1323 struct flowvalve *fv;
1324 int p;
1325 {
1326 int val, f;
1327
1328 if (p >= BRTT_PMAX)
1329 f = fv->fv_p2ftab[BRTT_SIZE-1];
1330 else if ((val = (p & BRTT_MASK)))
1331 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1332 else
1333 f = fv->fv_p2ftab[1];
1334 return (f);
1335 }
1336
1337 /*
1338 * check if an arriving packet should be pre-dropped.
1339 * called from red_addq() when a packet arrives.
1340 * returns 1 when the packet should be pre-dropped.
1341 * should be called in splnet.
1342 */
1343 static int
1344 fv_checkflow(fv, pktattr, fcache)
1345 struct flowvalve *fv;
1346 struct altq_pktattr *pktattr;
1347 struct fve **fcache;
1348 {
1349 struct fve *fve;
1350 struct timeval now;
1351
1352 fv->fv_ifseq++;
1353 FV_TIMESTAMP(&now);
1354
1355 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1356 /* no matching entry in the flowlist */
1357 return (0);
1358
1359 *fcache = fve;
1360
1361 /* update fraction f for every FV_N packets */
1362 if (++fve->fve_count == FV_N) {
1363 /*
1364 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1365 */
1366 fve->fve_f =
1367 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1368 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1369 fve->fve_ifseq = fv->fv_ifseq;
1370 fve->fve_count = 0;
1371 }
1372
1373 /*
1374 * overpumping test
1375 */
1376 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1377 int fthresh;
1378
1379 /* calculate a threshold */
1380 fthresh = fv_p2f(fv, fve->fve_p);
1381 if (fve->fve_f > fthresh)
1382 fve->fve_state = Red;
1383 }
1384
1385 if (fve->fve_state == Red) {
1386 /*
1387 * backoff test
1388 */
1389 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1390 /* no drop for at least FV_BACKOFFTHRESH sec */
1391 fve->fve_p = 0;
1392 fve->fve_state = Green;
1393 #ifdef FV_STATS
1394 fv->fv_stats.escape++;
1395 #endif
1396 } else {
1397 /* block this flow */
1398 flowlist_move_to_head(fv, fve);
1399 fve->fve_lastdrop = now;
1400 #ifdef FV_STATS
1401 fv->fv_stats.predrop++;
1402 #endif
1403 return (1);
1404 }
1405 }
1406
1407 /*
1408 * p = (1 - Wp) * p
1409 */
1410 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1411 if (fve->fve_p < 0)
1412 fve->fve_p = 0;
1413 #ifdef FV_STATS
1414 fv->fv_stats.pass++;
1415 #endif
1416 return (0);
1417 }
1418
1419 /*
1420 * called from red_addq when a packet is dropped by red.
1421 * should be called in splnet.
1422 */
1423 static void fv_dropbyred(fv, pktattr, fcache)
1424 struct flowvalve *fv;
1425 struct altq_pktattr *pktattr;
1426 struct fve *fcache;
1427 {
1428 struct fve *fve;
1429 struct timeval now;
1430
1431 if (pktattr == NULL)
1432 return;
1433 FV_TIMESTAMP(&now);
1434
1435 if (fcache != NULL)
1436 /* the fve of this packet is already cached */
1437 fve = fcache;
1438 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1439 fve = flowlist_reclaim(fv, pktattr);
1440
1441 flowlist_move_to_head(fv, fve);
1442
1443 /*
1444 * update p: the following line cancels the update
1445 * in fv_checkflow() and calculate
1446 * p = Wp + (1 - Wp) * p
1447 */
1448 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1449
1450 fve->fve_lastdrop = now;
1451 }
1452
1453 #endif /* ALTQ_FLOWVALVE */
1454
1455 #ifdef KLD_MODULE
1456
1457 static struct altqsw red_sw =
1458 {"red", redopen, redclose, redioctl};
1459
1460 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1461
1462 #endif /* KLD_MODULE */
1463
1464 #endif /* ALTQ_RED */
1465