altq_red.c revision 1.13.4.1 1 /* $NetBSD: altq_red.c,v 1.13.4.1 2006/09/09 02:36:40 rpaulo Exp $ */
2 /* $KAME: altq_red.c,v 1.9 2002/01/07 11:25:40 kjc Exp $ */
3
4 /*
5 * Copyright (C) 1997-2000
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.13.4.1 2006/09/09 02:36:40 rpaulo Exp $");
65
66 #if defined(__FreeBSD__) || defined(__NetBSD__)
67 #include "opt_altq.h"
68 #if (__FreeBSD__ != 2)
69 #include "opt_inet.h"
70 #ifdef __FreeBSD__
71 #include "opt_inet6.h"
72 #endif
73 #endif
74 #endif /* __FreeBSD__ || __NetBSD__ */
75 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
76
77 #include <sys/param.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/socket.h>
81 #include <sys/sockio.h>
82 #include <sys/systm.h>
83 #include <sys/proc.h>
84 #include <sys/errno.h>
85 #include <sys/kernel.h>
86 #include <sys/kauth.h>
87 #ifdef ALTQ_FLOWVALVE
88 #include <sys/queue.h>
89 #include <sys/time.h>
90 #endif
91
92 #include <net/if.h>
93 #include <net/if_types.h>
94
95 #include <netinet/in.h>
96 #include <netinet/in_systm.h>
97 #include <netinet/ip.h>
98 #ifdef INET6
99 #include <netinet/ip6.h>
100 #endif
101
102 #include <altq/altq.h>
103 #include <altq/altq_conf.h>
104 #include <altq/altq_red.h>
105 #ifdef ALTQ_FLOWVALVE
106 #include <altq/altq_flowvalve.h>
107 #endif
108
109 /*
110 * ALTQ/RED (Random Early Detection) implementation using 32-bit
111 * fixed-point calculation.
112 *
113 * written by kjc using the ns code as a reference.
114 * you can learn more about red and ns from Sally's home page at
115 * http://www-nrg.ee.lbl.gov/floyd/
116 *
117 * most of the red parameter values are fixed in this implementation
118 * to prevent fixed-point overflow/underflow.
119 * if you change the parameters, watch out for overflow/underflow!
120 *
121 * the parameters used are recommended values by Sally.
122 * the corresponding ns config looks:
123 * q_weight=0.00195
124 * minthresh=5 maxthresh=15 queue-size=60
125 * linterm=30
126 * dropmech=drop-tail
127 * bytes=false (can't be handled by 32-bit fixed-point)
128 * doubleq=false dqthresh=false
129 * wait=true
130 */
131 /*
132 * alternative red parameters for a slow link.
133 *
134 * assume the queue length becomes from zero to L and keeps L, it takes
135 * N packets for q_avg to reach 63% of L.
136 * when q_weight is 0.002, N is about 500 packets.
137 * for a slow link like dial-up, 500 packets takes more than 1 minute!
138 * when q_weight is 0.008, N is about 127 packets.
139 * when q_weight is 0.016, N is about 63 packets.
140 * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
141 * are allowed for 0.016.
142 * see Sally's paper for more details.
143 */
144 /* normal red parameters */
145 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
146 /* q_weight = 0.00195 */
147
148 /* red parameters for a slow link */
149 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
150 /* q_weight = 0.0078125 */
151
152 /* red parameters for a very slow link (e.g., dialup) */
153 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
154 /* q_weight = 0.015625 */
155
156 /* fixed-point uses 12-bit decimal places */
157 #define FP_SHIFT 12 /* fixed-point shift */
158
159 /* red parameters for drop probability */
160 #define INV_P_MAX 10 /* inverse of max drop probability */
161 #define TH_MIN 5 /* min threshold */
162 #define TH_MAX 15 /* max threshold */
163
164 #define RED_LIMIT 60 /* default max queue length */
165
166 /*
167 * our default policy for forced-drop is drop-tail.
168 * (in altq-1.1.2 or earlier, the default was random-drop.
169 * but it makes more sense to punish the cause of the surge.)
170 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
171 */
172
173 #ifdef ALTQ_FLOWVALVE
174 /*
175 * flow-valve is an extension to protect red from unresponsive flows
176 * and to promote end-to-end congestion control.
177 * flow-valve observes the average drop rates of the flows that have
178 * experienced packet drops in the recent past.
179 * when the average drop rate exceeds the threshold, the flow is
180 * blocked by the flow-valve. the trapped flow should back off
181 * exponentially to escape from the flow-valve.
182 */
183 #ifdef RED_RANDOM_DROP
184 #error "random-drop can't be used with flow-valve!"
185 #endif
186 #endif /* ALTQ_FLOWVALVE */
187
188 /* red_list keeps all red_queue_t's allocated. */
189 static red_queue_t *red_list = NULL;
190
191 /* default red parameter values */
192 static int default_th_min = TH_MIN;
193 static int default_th_max = TH_MAX;
194 static int default_inv_pmax = INV_P_MAX;
195
196 /* internal function prototypes */
197 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
198 struct altq_pktattr *));
199 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
200 static int red_request __P((struct ifaltq *, int, void *));
201 static void red_purgeq __P((red_queue_t *));
202 static int red_detach __P((red_queue_t *));
203 #ifdef ALTQ_FLOWVALVE
204 static inline struct fve *flowlist_lookup __P((struct flowvalve *,
205 struct altq_pktattr *, struct timeval *));
206 static inline struct fve *flowlist_reclaim __P((struct flowvalve *,
207 struct altq_pktattr *));
208 static inline void flowlist_move_to_head __P((struct flowvalve *,
209 struct fve *));
210 static inline int fv_p2f __P((struct flowvalve *, int));
211 static struct flowvalve *fv_alloc __P((struct red *));
212 static void fv_destroy __P((struct flowvalve *));
213 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
214 struct fve **));
215 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
216 struct fve *));
217 #endif
218
219 /*
220 * red device interface
221 */
222 altqdev_decl(red);
223
224 int
225 redopen(dev, flag, fmt, l)
226 dev_t dev;
227 int flag, fmt;
228 struct lwp *l;
229 {
230 /* everything will be done when the queueing scheme is attached. */
231 return 0;
232 }
233
234 int
235 redclose(dev, flag, fmt, l)
236 dev_t dev;
237 int flag, fmt;
238 struct lwp *l;
239 {
240 red_queue_t *rqp;
241 int err, error = 0;
242
243 while ((rqp = red_list) != NULL) {
244 /* destroy all */
245 err = red_detach(rqp);
246 if (err != 0 && error == 0)
247 error = err;
248 }
249
250 return error;
251 }
252
253 int
254 redioctl(dev, cmd, addr, flag, l)
255 dev_t dev;
256 ioctlcmd_t cmd;
257 caddr_t addr;
258 int flag;
259 struct lwp *l;
260 {
261 red_queue_t *rqp;
262 struct red_interface *ifacep;
263 struct ifnet *ifp;
264 int error = 0;
265
266 /* check super-user privilege */
267 switch (cmd) {
268 case RED_GETSTATS:
269 break;
270 default:
271 #if (__FreeBSD_version > 400000)
272 if ((error = suser(p)) != 0)
273 #else
274 if ((error = kauth_authorize_generic(l->l_cred,
275 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)
276 #endif
277 return (error);
278 break;
279 }
280
281 switch (cmd) {
282
283 case RED_ENABLE:
284 ifacep = (struct red_interface *)addr;
285 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
286 error = EBADF;
287 break;
288 }
289 error = altq_enable(rqp->rq_ifq);
290 break;
291
292 case RED_DISABLE:
293 ifacep = (struct red_interface *)addr;
294 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
295 error = EBADF;
296 break;
297 }
298 error = altq_disable(rqp->rq_ifq);
299 break;
300
301 case RED_IF_ATTACH:
302 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
303 if (ifp == NULL) {
304 error = ENXIO;
305 break;
306 }
307
308 /* allocate and initialize red_queue_t */
309 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
310 if (rqp == NULL) {
311 error = ENOMEM;
312 break;
313 }
314
315 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
316 M_WAITOK|M_ZERO);
317 if (rqp->rq_q == NULL) {
318 free(rqp, M_DEVBUF);
319 error = ENOMEM;
320 break;
321 }
322
323 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
324 if (rqp->rq_red == NULL) {
325 free(rqp->rq_q, M_DEVBUF);
326 free(rqp, M_DEVBUF);
327 error = ENOMEM;
328 break;
329 }
330
331 rqp->rq_ifq = &ifp->if_snd;
332 qtail(rqp->rq_q) = NULL;
333 qlen(rqp->rq_q) = 0;
334 qlimit(rqp->rq_q) = RED_LIMIT;
335 qtype(rqp->rq_q) = Q_RED;
336
337 /*
338 * set RED to this ifnet structure.
339 */
340 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
341 red_enqueue, red_dequeue, red_request,
342 NULL, NULL);
343 if (error) {
344 red_destroy(rqp->rq_red);
345 free(rqp->rq_q, M_DEVBUF);
346 free(rqp, M_DEVBUF);
347 break;
348 }
349
350 /* add this state to the red list */
351 rqp->rq_next = red_list;
352 red_list = rqp;
353 break;
354
355 case RED_IF_DETACH:
356 ifacep = (struct red_interface *)addr;
357 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
358 error = EBADF;
359 break;
360 }
361 error = red_detach(rqp);
362 break;
363
364 case RED_GETSTATS:
365 do {
366 struct red_stats *q_stats;
367 red_t *rp;
368
369 q_stats = (struct red_stats *)addr;
370 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
371 ALTQT_RED)) == NULL) {
372 error = EBADF;
373 break;
374 }
375
376 q_stats->q_len = qlen(rqp->rq_q);
377 q_stats->q_limit = qlimit(rqp->rq_q);
378
379 rp = rqp->rq_red;
380 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
381 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
382 q_stats->drop_cnt = rp->red_stats.drop_cnt;
383 q_stats->drop_forced = rp->red_stats.drop_forced;
384 q_stats->drop_unforced = rp->red_stats.drop_unforced;
385 q_stats->marked_packets = rp->red_stats.marked_packets;
386
387 q_stats->weight = rp->red_weight;
388 q_stats->inv_pmax = rp->red_inv_pmax;
389 q_stats->th_min = rp->red_thmin;
390 q_stats->th_max = rp->red_thmax;
391
392 #ifdef ALTQ_FLOWVALVE
393 if (rp->red_flowvalve != NULL) {
394 struct flowvalve *fv = rp->red_flowvalve;
395 q_stats->fv_flows = fv->fv_flows;
396 q_stats->fv_pass = fv->fv_stats.pass;
397 q_stats->fv_predrop = fv->fv_stats.predrop;
398 q_stats->fv_alloc = fv->fv_stats.alloc;
399 q_stats->fv_escape = fv->fv_stats.escape;
400 } else {
401 #endif /* ALTQ_FLOWVALVE */
402 q_stats->fv_flows = 0;
403 q_stats->fv_pass = 0;
404 q_stats->fv_predrop = 0;
405 q_stats->fv_alloc = 0;
406 q_stats->fv_escape = 0;
407 #ifdef ALTQ_FLOWVALVE
408 }
409 #endif /* ALTQ_FLOWVALVE */
410 } while (0);
411 break;
412
413 case RED_CONFIG:
414 do {
415 struct red_conf *fc;
416 red_t *new;
417 int s, limit;
418
419 fc = (struct red_conf *)addr;
420 if ((rqp = altq_lookup(fc->iface.red_ifname,
421 ALTQT_RED)) == NULL) {
422 error = EBADF;
423 break;
424 }
425 new = red_alloc(fc->red_weight,
426 fc->red_inv_pmax,
427 fc->red_thmin,
428 fc->red_thmax,
429 fc->red_flags,
430 fc->red_pkttime);
431 if (new == NULL) {
432 error = ENOMEM;
433 break;
434 }
435
436 s = splnet();
437 red_purgeq(rqp);
438 limit = fc->red_limit;
439 if (limit < fc->red_thmax)
440 limit = fc->red_thmax;
441 qlimit(rqp->rq_q) = limit;
442 fc->red_limit = limit; /* write back the new value */
443
444 red_destroy(rqp->rq_red);
445 rqp->rq_red = new;
446
447 splx(s);
448
449 /* write back new values */
450 fc->red_limit = limit;
451 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
452 fc->red_thmin = rqp->rq_red->red_thmin;
453 fc->red_thmax = rqp->rq_red->red_thmax;
454
455 } while (0);
456 break;
457
458 case RED_SETDEFAULTS:
459 do {
460 struct redparams *rp;
461
462 rp = (struct redparams *)addr;
463
464 default_th_min = rp->th_min;
465 default_th_max = rp->th_max;
466 default_inv_pmax = rp->inv_pmax;
467 } while (0);
468 break;
469
470 default:
471 error = EINVAL;
472 break;
473 }
474 return error;
475 }
476
477 static int
478 red_detach(rqp)
479 red_queue_t *rqp;
480 {
481 red_queue_t *tmp;
482 int error = 0;
483
484 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
485 altq_disable(rqp->rq_ifq);
486
487 if ((error = altq_detach(rqp->rq_ifq)))
488 return (error);
489
490 if (red_list == rqp)
491 red_list = rqp->rq_next;
492 else {
493 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
494 if (tmp->rq_next == rqp) {
495 tmp->rq_next = rqp->rq_next;
496 break;
497 }
498 if (tmp == NULL)
499 printf("red_detach: no state found in red_list!\n");
500 }
501
502 red_destroy(rqp->rq_red);
503 free(rqp->rq_q, M_DEVBUF);
504 free(rqp, M_DEVBUF);
505 return (error);
506 }
507
508 /*
509 * red support routines
510 */
511
512 red_t *
513 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
514 int weight, inv_pmax, th_min, th_max;
515 int flags, pkttime;
516 {
517 red_t *rp;
518 int w, i;
519 int npkts_per_sec;
520
521 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
522 if (rp == NULL)
523 return (NULL);
524
525 rp->red_avg = 0;
526 rp->red_idle = 1;
527
528 if (weight == 0)
529 rp->red_weight = W_WEIGHT;
530 else
531 rp->red_weight = weight;
532 if (inv_pmax == 0)
533 rp->red_inv_pmax = default_inv_pmax;
534 else
535 rp->red_inv_pmax = inv_pmax;
536 if (th_min == 0)
537 rp->red_thmin = default_th_min;
538 else
539 rp->red_thmin = th_min;
540 if (th_max == 0)
541 rp->red_thmax = default_th_max;
542 else
543 rp->red_thmax = th_max;
544
545 rp->red_flags = flags;
546
547 if (pkttime == 0)
548 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
549 rp->red_pkttime = 800;
550 else
551 rp->red_pkttime = pkttime;
552
553 if (weight == 0) {
554 /* when the link is very slow, adjust red parameters */
555 npkts_per_sec = 1000000 / rp->red_pkttime;
556 if (npkts_per_sec < 50) {
557 /* up to about 400Kbps */
558 rp->red_weight = W_WEIGHT_2;
559 } else if (npkts_per_sec < 300) {
560 /* up to about 2.4Mbps */
561 rp->red_weight = W_WEIGHT_1;
562 }
563 }
564
565 /* calculate wshift. weight must be power of 2 */
566 w = rp->red_weight;
567 for (i = 0; w > 1; i++)
568 w = w >> 1;
569 rp->red_wshift = i;
570 w = 1 << rp->red_wshift;
571 if (w != rp->red_weight) {
572 printf("invalid weight value %d for red! use %d\n",
573 rp->red_weight, w);
574 rp->red_weight = w;
575 }
576
577 /*
578 * thmin_s and thmax_s are scaled versions of th_min and th_max
579 * to be compared with avg.
580 */
581 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
582 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
583
584 /*
585 * precompute probability denominator
586 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
587 */
588 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
589 * rp->red_inv_pmax) << FP_SHIFT;
590
591 /* allocate weight table */
592 rp->red_wtab = wtab_alloc(rp->red_weight);
593
594 microtime(&rp->red_last);
595 #ifdef ALTQ_FLOWVALVE
596 if (flags & REDF_FLOWVALVE)
597 rp->red_flowvalve = fv_alloc(rp);
598 /* if fv_alloc failes, flowvalve is just disabled */
599 #endif
600 return (rp);
601 }
602
603 void
604 red_destroy(rp)
605 red_t *rp;
606 {
607 #ifdef ALTQ_FLOWVALVE
608 if (rp->red_flowvalve != NULL)
609 fv_destroy(rp->red_flowvalve);
610 #endif
611 wtab_destroy(rp->red_wtab);
612 free(rp, M_DEVBUF);
613 }
614
615 void
616 red_getstats(rp, sp)
617 red_t *rp;
618 struct redstats *sp;
619 {
620 sp->q_avg = rp->red_avg >> rp->red_wshift;
621 sp->xmit_cnt = rp->red_stats.xmit_cnt;
622 sp->drop_cnt = rp->red_stats.drop_cnt;
623 sp->drop_forced = rp->red_stats.drop_forced;
624 sp->drop_unforced = rp->red_stats.drop_unforced;
625 sp->marked_packets = rp->red_stats.marked_packets;
626 }
627
628 /*
629 * enqueue routine:
630 *
631 * returns: 0 when successfully queued.
632 * ENOBUFS when drop occurs.
633 */
634 static int
635 red_enqueue(ifq, m, pktattr)
636 struct ifaltq *ifq;
637 struct mbuf *m;
638 struct altq_pktattr *pktattr;
639 {
640 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
641
642 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
643 return ENOBUFS;
644 ifq->ifq_len++;
645 return 0;
646 }
647
648 int
649 red_addq(rp, q, m, pktattr)
650 red_t *rp;
651 class_queue_t *q;
652 struct mbuf *m;
653 struct altq_pktattr *pktattr;
654 {
655 int avg, droptype;
656 int n;
657 #ifdef ALTQ_FLOWVALVE
658 struct fve *fve = NULL;
659
660 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
661 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
662 m_freem(m);
663 return (-1);
664 }
665 #endif
666
667 avg = rp->red_avg;
668
669 /*
670 * if we were idle, we pretend that n packets arrived during
671 * the idle period.
672 */
673 if (rp->red_idle) {
674 struct timeval now;
675 int t;
676
677 rp->red_idle = 0;
678 microtime(&now);
679 t = (now.tv_sec - rp->red_last.tv_sec);
680 if (t > 60) {
681 /*
682 * being idle for more than 1 minute, set avg to zero.
683 * this prevents t from overflow.
684 */
685 avg = 0;
686 } else {
687 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
688 n = t / rp->red_pkttime - 1;
689
690 /* the following line does (avg = (1 - Wq)^n * avg) */
691 if (n > 0)
692 avg = (avg >> FP_SHIFT) *
693 pow_w(rp->red_wtab, n);
694 }
695 }
696
697 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
698 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
699 rp->red_avg = avg; /* save the new value */
700
701 /*
702 * red_count keeps a tally of arriving traffic that has not
703 * been dropped.
704 */
705 rp->red_count++;
706
707 /* see if we drop early */
708 droptype = DTYPE_NODROP;
709 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
710 if (avg >= rp->red_thmax_s) {
711 /* avg >= th_max: forced drop */
712 droptype = DTYPE_FORCED;
713 } else if (rp->red_old == 0) {
714 /* first exceeds th_min */
715 rp->red_count = 1;
716 rp->red_old = 1;
717 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
718 rp->red_probd, rp->red_count)) {
719 /* mark or drop by red */
720 if ((rp->red_flags & REDF_ECN) &&
721 mark_ecn(m, pktattr, rp->red_flags)) {
722 /* successfully marked. do not drop. */
723 rp->red_count = 0;
724 #ifdef RED_STATS
725 rp->red_stats.marked_packets++;
726 #endif
727 } else {
728 /* unforced drop by red */
729 droptype = DTYPE_EARLY;
730 }
731 }
732 } else {
733 /* avg < th_min */
734 rp->red_old = 0;
735 }
736
737 /*
738 * if the queue length hits the hard limit, it's a forced drop.
739 */
740 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
741 droptype = DTYPE_FORCED;
742
743 #ifdef RED_RANDOM_DROP
744 /* if successful or forced drop, enqueue this packet. */
745 if (droptype != DTYPE_EARLY)
746 _addq(q, m);
747 #else
748 /* if successful, enqueue this packet. */
749 if (droptype == DTYPE_NODROP)
750 _addq(q, m);
751 #endif
752 if (droptype != DTYPE_NODROP) {
753 if (droptype == DTYPE_EARLY) {
754 /* drop the incoming packet */
755 #ifdef RED_STATS
756 rp->red_stats.drop_unforced++;
757 #endif
758 } else {
759 /* forced drop, select a victim packet in the queue. */
760 #ifdef RED_RANDOM_DROP
761 m = _getq_random(q);
762 #endif
763 #ifdef RED_STATS
764 rp->red_stats.drop_forced++;
765 #endif
766 }
767 #ifdef RED_STATS
768 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
769 #endif
770 rp->red_count = 0;
771 #ifdef ALTQ_FLOWVALVE
772 if (rp->red_flowvalve != NULL)
773 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
774 #endif
775 m_freem(m);
776 return (-1);
777 }
778 /* successfully queued */
779 #ifdef RED_STATS
780 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
781 #endif
782 return (0);
783 }
784
785 /*
786 * early-drop probability is calculated as follows:
787 * prob = p_max * (avg - th_min) / (th_max - th_min)
788 * prob_a = prob / (2 - count*prob)
789 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
790 * here prob_a increases as successive undrop count increases.
791 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
792 * becomes 1 when (count >= (2 / prob))).
793 */
794 int
795 drop_early(fp_len, fp_probd, count)
796 int fp_len; /* (avg - TH_MIN) in fixed-point */
797 int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
798 int count; /* how many successive undropped packets */
799 {
800 int d; /* denominator of drop-probability */
801
802 d = fp_probd - count * fp_len;
803 if (d <= 0)
804 /* count exceeds the hard limit: drop or mark */
805 return (1);
806
807 /*
808 * now the range of d is [1..600] in fixed-point. (when
809 * th_max-th_min=10 and p_max=1/30)
810 * drop probability = (avg - TH_MIN) / d
811 */
812
813 if ((random() % d) < fp_len) {
814 /* drop or mark */
815 return (1);
816 }
817 /* no drop/mark */
818 return (0);
819 }
820
821 /*
822 * try to mark CE bit to the packet.
823 * returns 1 if successfully marked, 0 otherwise.
824 */
825 int
826 mark_ecn(m, pktattr, flags)
827 struct mbuf *m;
828 struct altq_pktattr *pktattr;
829 int flags;
830 {
831 struct mbuf *m0;
832
833 if (pktattr == NULL ||
834 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
835 return (0);
836
837 /* verify that pattr_hdr is within the mbuf data */
838 for (m0 = m; m0 != NULL; m0 = m0->m_next)
839 if ((pktattr->pattr_hdr >= m0->m_data) &&
840 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
841 break;
842 if (m0 == NULL) {
843 /* ick, pattr_hdr is stale */
844 pktattr->pattr_af = AF_UNSPEC;
845 return (0);
846 }
847
848 switch (pktattr->pattr_af) {
849 case AF_INET:
850 if (flags & REDF_ECN4) {
851 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
852 u_int8_t otos;
853 int sum;
854
855 if (ip->ip_v != 4)
856 return (0); /* version mismatch! */
857
858 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
859 return (0); /* not-ECT */
860 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
861 return (1); /* already marked */
862
863 /*
864 * ecn-capable but not marked,
865 * mark CE and update checksum
866 */
867 otos = ip->ip_tos;
868 ip->ip_tos |= IPTOS_ECN_CE;
869 /*
870 * update checksum (from RFC1624)
871 * HC' = ~(~HC + ~m + m')
872 */
873 sum = ~ntohs(ip->ip_sum) & 0xffff;
874 sum += (~otos & 0xffff) + ip->ip_tos;
875 sum = (sum >> 16) + (sum & 0xffff);
876 sum += (sum >> 16); /* add carry */
877 ip->ip_sum = htons(~sum & 0xffff);
878 return (1);
879 }
880 break;
881 #ifdef INET6
882 case AF_INET6:
883 if (flags & REDF_ECN6) {
884 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
885 u_int32_t flowlabel;
886
887 flowlabel = ntohl(ip6->ip6_flow);
888 if ((flowlabel >> 28) != 6)
889 return (0); /* version mismatch! */
890 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
891 (IPTOS_ECN_NOTECT << 20))
892 return (0); /* not-ECT */
893 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
894 (IPTOS_ECN_CE << 20))
895 return (1); /* already marked */
896 /*
897 * ecn-capable but not marked, mark CE
898 */
899 flowlabel |= (IPTOS_ECN_CE << 20);
900 ip6->ip6_flow = htonl(flowlabel);
901 return (1);
902 }
903 break;
904 #endif /* INET6 */
905 }
906
907 /* not marked */
908 return (0);
909 }
910
911 /*
912 * dequeue routine:
913 * must be called in splnet.
914 *
915 * returns: mbuf dequeued.
916 * NULL when no packet is available in the queue.
917 */
918
919 static struct mbuf *
920 red_dequeue(ifq, op)
921 struct ifaltq *ifq;
922 int op;
923 {
924 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
925 struct mbuf *m;
926
927 if (op == ALTDQ_POLL)
928 return qhead(rqp->rq_q);
929
930 /* op == ALTDQ_REMOVE */
931 m = red_getq(rqp->rq_red, rqp->rq_q);
932 if (m != NULL)
933 ifq->ifq_len--;
934 return (m);
935 }
936
937 struct mbuf *
938 red_getq(rp, q)
939 red_t *rp;
940 class_queue_t *q;
941 {
942 struct mbuf *m;
943
944 if ((m = _getq(q)) == NULL) {
945 if (rp->red_idle == 0) {
946 rp->red_idle = 1;
947 microtime(&rp->red_last);
948 }
949 return NULL;
950 }
951
952 rp->red_idle = 0;
953 return (m);
954 }
955
956 static int
957 red_request(ifq, req, arg)
958 struct ifaltq *ifq;
959 int req;
960 void *arg;
961 {
962 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
963
964 switch (req) {
965 case ALTRQ_PURGE:
966 red_purgeq(rqp);
967 break;
968 }
969 return (0);
970 }
971
972 static void
973 red_purgeq(rqp)
974 red_queue_t *rqp;
975 {
976 _flushq(rqp->rq_q);
977 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
978 rqp->rq_ifq->ifq_len = 0;
979 }
980
981
982 /*
983 * helper routine to calibrate avg during idle.
984 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
985 * here Wq = 1/weight and the code assumes Wq is close to zero.
986 *
987 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
988 */
989 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
990
991 struct wtab *
992 wtab_alloc(weight)
993 int weight;
994 {
995 struct wtab *w;
996 int i;
997
998 for (w = wtab_list; w != NULL; w = w->w_next)
999 if (w->w_weight == weight) {
1000 w->w_refcount++;
1001 return (w);
1002 }
1003
1004 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
1005 if (w == NULL)
1006 panic("wtab_alloc: malloc failed!");
1007 w->w_weight = weight;
1008 w->w_refcount = 1;
1009 w->w_next = wtab_list;
1010 wtab_list = w;
1011
1012 /* initialize the weight table */
1013 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
1014 for (i = 1; i < 32; i++) {
1015 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
1016 if (w->w_tab[i] == 0 && w->w_param_max == 0)
1017 w->w_param_max = 1 << i;
1018 }
1019
1020 return (w);
1021 }
1022
1023 int
1024 wtab_destroy(w)
1025 struct wtab *w;
1026 {
1027 struct wtab *prev;
1028
1029 if (--w->w_refcount > 0)
1030 return (0);
1031
1032 if (wtab_list == w)
1033 wtab_list = w->w_next;
1034 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
1035 if (prev->w_next == w) {
1036 prev->w_next = w->w_next;
1037 break;
1038 }
1039
1040 free(w, M_DEVBUF);
1041 return (0);
1042 }
1043
1044 int32_t
1045 pow_w(w, n)
1046 struct wtab *w;
1047 int n;
1048 {
1049 int i, bit;
1050 int32_t val;
1051
1052 if (n >= w->w_param_max)
1053 return (0);
1054
1055 val = 1 << FP_SHIFT;
1056 if (n <= 0)
1057 return (val);
1058
1059 bit = 1;
1060 i = 0;
1061 while (n) {
1062 if (n & bit) {
1063 val = (val * w->w_tab[i]) >> FP_SHIFT;
1064 n &= ~bit;
1065 }
1066 i++;
1067 bit <<= 1;
1068 }
1069 return (val);
1070 }
1071
1072 #ifdef ALTQ_FLOWVALVE
1073
1074 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1075 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1076 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1077 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1078 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1079 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1080
1081 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1082 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1083
1084 #define FV_N 10 /* update fve_f every FV_N packets */
1085
1086 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1087 #define FV_TTHRESH 3 /* time threshold to delete fve */
1088 #define FV_ALPHA 5 /* extra packet count */
1089
1090 #if (__FreeBSD_version > 300000) || defined(__HAVE_TIMECOUNTER)
1091 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1092 #else
1093 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1094 #endif
1095
1096 /*
1097 * Brtt table: 127 entry table to convert drop rate (p) to
1098 * the corresponding bandwidth fraction (f)
1099 * the following equation is implemented to use scaled values,
1100 * fve_p and fve_f, in the fixed point format.
1101 *
1102 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1103 * f = Brtt(p) / (max_th + alpha)
1104 */
1105 #define BRTT_SIZE 128
1106 #define BRTT_SHIFT 12
1107 #define BRTT_MASK 0x0007f000
1108 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1109
1110 const int brtt_tab[BRTT_SIZE] = {
1111 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1112 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1113 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1114 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1115 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1116 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1117 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1118 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1119 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1120 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1121 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1122 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1123 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1124 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1125 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1126 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1127 };
1128
1129 static inline struct fve *
1130 flowlist_lookup(fv, pktattr, now)
1131 struct flowvalve *fv;
1132 struct altq_pktattr *pktattr;
1133 struct timeval *now;
1134 {
1135 struct fve *fve;
1136 int flows;
1137 struct ip *ip;
1138 #ifdef INET6
1139 struct ip6_hdr *ip6;
1140 #endif
1141 struct timeval tthresh;
1142
1143 if (pktattr == NULL)
1144 return (NULL);
1145
1146 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1147 flows = 0;
1148 /*
1149 * search the flow list
1150 */
1151 switch (pktattr->pattr_af) {
1152 case AF_INET:
1153 ip = (struct ip *)pktattr->pattr_hdr;
1154 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1155 if (fve->fve_lastdrop.tv_sec == 0)
1156 break;
1157 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1158 fve->fve_lastdrop.tv_sec = 0;
1159 break;
1160 }
1161 if (fve->fve_flow.flow_af == AF_INET &&
1162 fve->fve_flow.flow_ip.ip_src.s_addr ==
1163 ip->ip_src.s_addr &&
1164 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1165 ip->ip_dst.s_addr)
1166 return (fve);
1167 flows++;
1168 }
1169 break;
1170 #ifdef INET6
1171 case AF_INET6:
1172 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1173 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1174 if (fve->fve_lastdrop.tv_sec == 0)
1175 break;
1176 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1177 fve->fve_lastdrop.tv_sec = 0;
1178 break;
1179 }
1180 if (fve->fve_flow.flow_af == AF_INET6 &&
1181 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1182 &ip6->ip6_src) &&
1183 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1184 &ip6->ip6_dst))
1185 return (fve);
1186 flows++;
1187 }
1188 break;
1189 #endif /* INET6 */
1190
1191 default:
1192 /* unknown protocol. no drop. */
1193 return (NULL);
1194 }
1195 fv->fv_flows = flows; /* save the number of active fve's */
1196 return (NULL);
1197 }
1198
1199 static inline struct fve *
1200 flowlist_reclaim(fv, pktattr)
1201 struct flowvalve *fv;
1202 struct altq_pktattr *pktattr;
1203 {
1204 struct fve *fve;
1205 struct ip *ip;
1206 #ifdef INET6
1207 struct ip6_hdr *ip6;
1208 #endif
1209
1210 /*
1211 * get an entry from the tail of the LRU list.
1212 */
1213 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1214
1215 switch (pktattr->pattr_af) {
1216 case AF_INET:
1217 ip = (struct ip *)pktattr->pattr_hdr;
1218 fve->fve_flow.flow_af = AF_INET;
1219 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1220 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1221 break;
1222 #ifdef INET6
1223 case AF_INET6:
1224 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1225 fve->fve_flow.flow_af = AF_INET6;
1226 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1227 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1228 break;
1229 #endif
1230 }
1231
1232 fve->fve_state = Green;
1233 fve->fve_p = 0.0;
1234 fve->fve_f = 0.0;
1235 fve->fve_ifseq = fv->fv_ifseq - 1;
1236 fve->fve_count = 0;
1237
1238 fv->fv_flows++;
1239 #ifdef FV_STATS
1240 fv->fv_stats.alloc++;
1241 #endif
1242 return (fve);
1243 }
1244
1245 static inline void
1246 flowlist_move_to_head(fv, fve)
1247 struct flowvalve *fv;
1248 struct fve *fve;
1249 {
1250 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1251 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1252 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1253 }
1254 }
1255
1256 /*
1257 * allocate flowvalve structure
1258 */
1259 static struct flowvalve *
1260 fv_alloc(rp)
1261 struct red *rp;
1262 {
1263 struct flowvalve *fv;
1264 struct fve *fve;
1265 int i, num;
1266
1267 num = FV_FLOWLISTSIZE;
1268 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
1269 if (fv == NULL)
1270 return (NULL);
1271
1272 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
1273 M_WAITOK|M_ZERO);
1274 if (fv->fv_fves == NULL) {
1275 free(fv, M_DEVBUF);
1276 return (NULL);
1277 }
1278
1279 fv->fv_flows = 0;
1280 TAILQ_INIT(&fv->fv_flowlist);
1281 for (i = 0; i < num; i++) {
1282 fve = &fv->fv_fves[i];
1283 fve->fve_lastdrop.tv_sec = 0;
1284 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1285 }
1286
1287 /* initialize drop rate threshold in scaled fixed-point */
1288 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1289
1290 /* initialize drop rate to fraction table */
1291 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
1292 if (fv->fv_p2ftab == NULL) {
1293 free(fv->fv_fves, M_DEVBUF);
1294 free(fv, M_DEVBUF);
1295 return (NULL);
1296 }
1297 /*
1298 * create the p2f table.
1299 * (shift is used to keep the precision)
1300 */
1301 for (i = 1; i < BRTT_SIZE; i++) {
1302 int f;
1303
1304 f = brtt_tab[i] << 8;
1305 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1306 }
1307
1308 return (fv);
1309 }
1310
1311 static void fv_destroy(fv)
1312 struct flowvalve *fv;
1313 {
1314 free(fv->fv_p2ftab, M_DEVBUF);
1315 free(fv->fv_fves, M_DEVBUF);
1316 free(fv, M_DEVBUF);
1317 }
1318
1319 static inline int
1320 fv_p2f(fv, p)
1321 struct flowvalve *fv;
1322 int p;
1323 {
1324 int val, f;
1325
1326 if (p >= BRTT_PMAX)
1327 f = fv->fv_p2ftab[BRTT_SIZE-1];
1328 else if ((val = (p & BRTT_MASK)))
1329 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1330 else
1331 f = fv->fv_p2ftab[1];
1332 return (f);
1333 }
1334
1335 /*
1336 * check if an arriving packet should be pre-dropped.
1337 * called from red_addq() when a packet arrives.
1338 * returns 1 when the packet should be pre-dropped.
1339 * should be called in splnet.
1340 */
1341 static int
1342 fv_checkflow(fv, pktattr, fcache)
1343 struct flowvalve *fv;
1344 struct altq_pktattr *pktattr;
1345 struct fve **fcache;
1346 {
1347 struct fve *fve;
1348 struct timeval now;
1349
1350 fv->fv_ifseq++;
1351 FV_TIMESTAMP(&now);
1352
1353 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1354 /* no matching entry in the flowlist */
1355 return (0);
1356
1357 *fcache = fve;
1358
1359 /* update fraction f for every FV_N packets */
1360 if (++fve->fve_count == FV_N) {
1361 /*
1362 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1363 */
1364 fve->fve_f =
1365 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1366 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1367 fve->fve_ifseq = fv->fv_ifseq;
1368 fve->fve_count = 0;
1369 }
1370
1371 /*
1372 * overpumping test
1373 */
1374 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1375 int fthresh;
1376
1377 /* calculate a threshold */
1378 fthresh = fv_p2f(fv, fve->fve_p);
1379 if (fve->fve_f > fthresh)
1380 fve->fve_state = Red;
1381 }
1382
1383 if (fve->fve_state == Red) {
1384 /*
1385 * backoff test
1386 */
1387 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1388 /* no drop for at least FV_BACKOFFTHRESH sec */
1389 fve->fve_p = 0;
1390 fve->fve_state = Green;
1391 #ifdef FV_STATS
1392 fv->fv_stats.escape++;
1393 #endif
1394 } else {
1395 /* block this flow */
1396 flowlist_move_to_head(fv, fve);
1397 fve->fve_lastdrop = now;
1398 #ifdef FV_STATS
1399 fv->fv_stats.predrop++;
1400 #endif
1401 return (1);
1402 }
1403 }
1404
1405 /*
1406 * p = (1 - Wp) * p
1407 */
1408 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1409 if (fve->fve_p < 0)
1410 fve->fve_p = 0;
1411 #ifdef FV_STATS
1412 fv->fv_stats.pass++;
1413 #endif
1414 return (0);
1415 }
1416
1417 /*
1418 * called from red_addq when a packet is dropped by red.
1419 * should be called in splnet.
1420 */
1421 static void fv_dropbyred(fv, pktattr, fcache)
1422 struct flowvalve *fv;
1423 struct altq_pktattr *pktattr;
1424 struct fve *fcache;
1425 {
1426 struct fve *fve;
1427 struct timeval now;
1428
1429 if (pktattr == NULL)
1430 return;
1431 FV_TIMESTAMP(&now);
1432
1433 if (fcache != NULL)
1434 /* the fve of this packet is already cached */
1435 fve = fcache;
1436 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1437 fve = flowlist_reclaim(fv, pktattr);
1438
1439 flowlist_move_to_head(fv, fve);
1440
1441 /*
1442 * update p: the following line cancels the update
1443 * in fv_checkflow() and calculate
1444 * p = Wp + (1 - Wp) * p
1445 */
1446 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1447
1448 fve->fve_lastdrop = now;
1449 }
1450
1451 #endif /* ALTQ_FLOWVALVE */
1452
1453 #ifdef KLD_MODULE
1454
1455 static struct altqsw red_sw =
1456 {"red", redopen, redclose, redioctl};
1457
1458 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1459
1460 #endif /* KLD_MODULE */
1461
1462 #endif /* ALTQ_RED */
1463