altq_red.c revision 1.1.1.1 1 /* $KAME: altq_red.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $ */
2
3 /*
4 * Copyright (C) 1997-2000
5 * Sony Computer Science Laboratories Inc. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29 /*
30 * Copyright (c) 1990-1994 Regents of the University of California.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the Computer Systems
44 * Engineering Group at Lawrence Berkeley Laboratory.
45 * 4. Neither the name of the University nor of the Laboratory may be used
46 * to endorse or promote products derived from this software without
47 * specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62 #if defined(__FreeBSD__) || defined(__NetBSD__)
63 #include "opt_altq.h"
64 #if (__FreeBSD__ != 2)
65 #include "opt_inet.h"
66 #ifdef __FreeBSD__
67 #include "opt_inet6.h"
68 #endif
69 #endif
70 #endif /* __FreeBSD__ || __NetBSD__ */
71 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
72
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/sockio.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/errno.h>
81 #include <sys/kernel.h>
82 #ifdef ALTQ_FLOWVALVE
83 #include <sys/queue.h>
84 #include <sys/time.h>
85 #endif
86
87 #include <net/if.h>
88 #include <net/if_types.h>
89
90 #include <netinet/in.h>
91 #include <netinet/in_systm.h>
92 #include <netinet/ip.h>
93 #ifdef INET6
94 #include <netinet/ip6.h>
95 #endif
96
97 #include <altq/altq.h>
98 #include <altq/altq_conf.h>
99 #include <altq/altq_red.h>
100 #ifdef ALTQ_FLOWVALVE
101 #include <altq/altq_flowvalve.h>
102 #endif
103
104 /*
105 * ALTQ/RED (Random Early Detection) implementation using 32-bit
106 * fixed-point calculation.
107 *
108 * written by kjc using the ns code as a reference.
109 * you can learn more about red and ns from Sally's home page at
110 * http://www-nrg.ee.lbl.gov/floyd/
111 *
112 * most of the red parameter values are fixed in this implementation
113 * to prevent fixed-point overflow/underflow.
114 * if you change the parameters, watch out for overflow/underflow!
115 *
116 * the parameters used are recommended values by Sally.
117 * the corresponding ns config looks:
118 * q_weight=0.00195
119 * minthresh=5 maxthresh=15 queue-size=60
120 * linterm=30
121 * dropmech=drop-tail
122 * bytes=false (can't be handled by 32-bit fixed-point)
123 * doubleq=false dqthresh=false
124 * wait=true
125 */
126 /*
127 * alternative red parameters for a slow link.
128 *
129 * assume the queue length becomes from zero to L and keeps L, it takes
130 * N packets for q_avg to reach 63% of L.
131 * when q_weight is 0.002, N is about 500 packets.
132 * for a slow link like dial-up, 500 packets takes more than 1 minute!
133 * when q_weight is 0.008, N is about 127 packets.
134 * when q_weight is 0.016, N is about 63 packets.
135 * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
136 * are allowed for 0.016.
137 * see Sally's paper for more details.
138 */
139 /* normal red parameters */
140 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
141 /* q_weight = 0.00195 */
142
143 /* red parameters for a slow link */
144 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
145 /* q_weight = 0.0078125 */
146
147 /* red parameters for a very slow link (e.g., dialup) */
148 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
149 /* q_weight = 0.015625 */
150
151 /* fixed-point uses 12-bit decimal places */
152 #define FP_SHIFT 12 /* fixed-point shift */
153
154 /* red parameters for drop probability */
155 #define INV_P_MAX 10 /* inverse of max drop probability */
156 #define TH_MIN 5 /* min threshold */
157 #define TH_MAX 15 /* max threshold */
158
159 #define RED_LIMIT 60 /* default max queue lenght */
160 #define RED_STATS /* collect statistics */
161
162 /*
163 * our default policy for forced-drop is drop-tail.
164 * (in altq-1.1.2 or earlier, the default was random-drop.
165 * but it makes more sense to punish the cause of the surge.)
166 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
167 */
168
169 #ifdef ALTQ_FLOWVALVE
170 /*
171 * flow-valve is an extention to protect red from unresponsive flows
172 * and to promote end-to-end congestion control.
173 * flow-valve observes the average drop rates of the flows that have
174 * experienced packet drops in the recent past.
175 * when the average drop rate exceeds the threshold, the flow is
176 * blocked by the flow-valve. the trapped flow should back off
177 * exponentially to escape from the flow-valve.
178 */
179 #ifdef RED_RANDOM_DROP
180 #error "random-drop can't be used with flow-valve!"
181 #endif
182 #endif /* ALTQ_FLOWVALVE */
183
184 /* red_list keeps all red_queue_t's allocated. */
185 static red_queue_t *red_list = NULL;
186
187 /* default red parameter values */
188 static int default_th_min = TH_MIN;
189 static int default_th_max = TH_MAX;
190 static int default_inv_pmax = INV_P_MAX;
191
192 /* internal function prototypes */
193 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
194 struct altq_pktattr *));
195 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
196 static int red_request __P((struct ifaltq *, int, void *));
197 static void red_purgeq __P((red_queue_t *));
198 static int red_detach __P((red_queue_t *));
199 #ifdef ALTQ_FLOWVALVE
200 static __inline struct fve *flowlist_lookup __P((struct flowvalve *,
201 struct altq_pktattr *, struct timeval *));
202 static __inline struct fve *flowlist_reclaim __P((struct flowvalve *,
203 struct altq_pktattr *));
204 static __inline void flowlist_move_to_head __P((struct flowvalve *,
205 struct fve *));
206 static __inline int fv_p2f __P((struct flowvalve *, int));
207 static struct flowvalve *fv_alloc __P((struct red *));
208 static void fv_destroy __P((struct flowvalve *));
209 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
210 struct fve **));
211 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
212 struct fve *));
213 #endif
214
215 /*
216 * red device interface
217 */
218 altqdev_decl(red);
219
220 int
221 redopen(dev, flag, fmt, p)
222 dev_t dev;
223 int flag, fmt;
224 struct proc *p;
225 {
226 /* everything will be done when the queueing scheme is attached. */
227 return 0;
228 }
229
230 int
231 redclose(dev, flag, fmt, p)
232 dev_t dev;
233 int flag, fmt;
234 struct proc *p;
235 {
236 red_queue_t *rqp;
237 int err, error = 0;
238
239 while ((rqp = red_list) != NULL) {
240 /* destroy all */
241 err = red_detach(rqp);
242 if (err != 0 && error == 0)
243 error = err;
244 }
245
246 return error;
247 }
248
249 int
250 redioctl(dev, cmd, addr, flag, p)
251 dev_t dev;
252 ioctlcmd_t cmd;
253 caddr_t addr;
254 int flag;
255 struct proc *p;
256 {
257 red_queue_t *rqp;
258 struct red_interface *ifacep;
259 struct ifnet *ifp;
260 int error = 0;
261
262 /* check super-user privilege */
263 switch (cmd) {
264 case RED_GETSTATS:
265 break;
266 default:
267 #if (__FreeBSD_version > 400000)
268 if ((error = suser(p)) != 0)
269 #else
270 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
271 #endif
272 return (error);
273 break;
274 }
275
276 switch (cmd) {
277
278 case RED_ENABLE:
279 ifacep = (struct red_interface *)addr;
280 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
281 error = EBADF;
282 break;
283 }
284 error = altq_enable(rqp->rq_ifq);
285 break;
286
287 case RED_DISABLE:
288 ifacep = (struct red_interface *)addr;
289 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
290 error = EBADF;
291 break;
292 }
293 error = altq_disable(rqp->rq_ifq);
294 break;
295
296 case RED_IF_ATTACH:
297 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
298 if (ifp == NULL) {
299 error = ENXIO;
300 break;
301 }
302
303 /* allocate and initialize red_queue_t */
304 MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
305 if (rqp == NULL) {
306 error = ENOMEM;
307 break;
308 }
309 bzero(rqp, sizeof(red_queue_t));
310
311 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
312 M_DEVBUF, M_WAITOK);
313 if (rqp->rq_q == NULL) {
314 FREE(rqp, M_DEVBUF);
315 error = ENOMEM;
316 break;
317 }
318 bzero(rqp->rq_q, sizeof(class_queue_t));
319
320 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
321 if (rqp->rq_red == NULL) {
322 FREE(rqp->rq_q, M_DEVBUF);
323 FREE(rqp, M_DEVBUF);
324 error = ENOMEM;
325 break;
326 }
327
328 rqp->rq_ifq = &ifp->if_snd;
329 qtail(rqp->rq_q) = NULL;
330 qlen(rqp->rq_q) = 0;
331 qlimit(rqp->rq_q) = RED_LIMIT;
332 qtype(rqp->rq_q) = Q_RED;
333
334 /*
335 * set RED to this ifnet structure.
336 */
337 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
338 red_enqueue, red_dequeue, red_request,
339 NULL, NULL);
340 if (error) {
341 red_destroy(rqp->rq_red);
342 FREE(rqp->rq_q, M_DEVBUF);
343 FREE(rqp, M_DEVBUF);
344 break;
345 }
346
347 /* add this state to the red list */
348 rqp->rq_next = red_list;
349 red_list = rqp;
350 break;
351
352 case RED_IF_DETACH:
353 ifacep = (struct red_interface *)addr;
354 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
355 error = EBADF;
356 break;
357 }
358 error = red_detach(rqp);
359 break;
360
361 case RED_GETSTATS:
362 do {
363 struct red_stats *q_stats;
364 red_t *rp;
365
366 q_stats = (struct red_stats *)addr;
367 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
368 ALTQT_RED)) == NULL) {
369 error = EBADF;
370 break;
371 }
372
373 q_stats->q_len = qlen(rqp->rq_q);
374 q_stats->q_limit = qlimit(rqp->rq_q);
375
376 rp = rqp->rq_red;
377 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
378 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
379 q_stats->drop_cnt = rp->red_stats.drop_cnt;
380 q_stats->drop_forced = rp->red_stats.drop_forced;
381 q_stats->drop_unforced = rp->red_stats.drop_unforced;
382 q_stats->marked_packets = rp->red_stats.marked_packets;
383
384 q_stats->weight = rp->red_weight;
385 q_stats->inv_pmax = rp->red_inv_pmax;
386 q_stats->th_min = rp->red_thmin;
387 q_stats->th_max = rp->red_thmax;
388
389 #ifdef ALTQ_FLOWVALVE
390 if (rp->red_flowvalve != NULL) {
391 struct flowvalve *fv = rp->red_flowvalve;
392 q_stats->fv_flows = fv->fv_flows;
393 q_stats->fv_pass = fv->fv_stats.pass;
394 q_stats->fv_predrop = fv->fv_stats.predrop;
395 q_stats->fv_alloc = fv->fv_stats.alloc;
396 q_stats->fv_escape = fv->fv_stats.escape;
397 } else {
398 #endif /* ALTQ_FLOWVALVE */
399 q_stats->fv_flows = 0;
400 q_stats->fv_pass = 0;
401 q_stats->fv_predrop = 0;
402 q_stats->fv_alloc = 0;
403 q_stats->fv_escape = 0;
404 #ifdef ALTQ_FLOWVALVE
405 }
406 #endif /* ALTQ_FLOWVALVE */
407 } while (0);
408 break;
409
410 case RED_CONFIG:
411 do {
412 struct red_conf *fc;
413 red_t *new;
414 int s, limit;
415
416 fc = (struct red_conf *)addr;
417 if ((rqp = altq_lookup(fc->iface.red_ifname,
418 ALTQT_RED)) == NULL) {
419 error = EBADF;
420 break;
421 }
422 new = red_alloc(fc->red_weight,
423 fc->red_inv_pmax,
424 fc->red_thmin,
425 fc->red_thmax,
426 fc->red_flags,
427 fc->red_pkttime);
428 if (new == NULL) {
429 error = ENOMEM;
430 break;
431 }
432
433 s = splimp();
434 red_purgeq(rqp);
435 limit = fc->red_limit;
436 if (limit < fc->red_thmax)
437 limit = fc->red_thmax;
438 qlimit(rqp->rq_q) = limit;
439 fc->red_limit = limit; /* write back the new value */
440
441 red_destroy(rqp->rq_red);
442 rqp->rq_red = new;
443
444 splx(s);
445
446 /* write back new values */
447 fc->red_limit = limit;
448 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
449 fc->red_thmin = rqp->rq_red->red_thmin;
450 fc->red_thmax = rqp->rq_red->red_thmax;
451
452 } while (0);
453 break;
454
455 case RED_SETDEFAULTS:
456 do {
457 struct redparams *rp;
458
459 rp = (struct redparams *)addr;
460
461 default_th_min = rp->th_min;
462 default_th_max = rp->th_max;
463 default_inv_pmax = rp->inv_pmax;
464 } while (0);
465 break;
466
467 default:
468 error = EINVAL;
469 break;
470 }
471 return error;
472 }
473
474 static int
475 red_detach(rqp)
476 red_queue_t *rqp;
477 {
478 red_queue_t *tmp;
479 int error = 0;
480
481 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
482 altq_disable(rqp->rq_ifq);
483
484 if ((error = altq_detach(rqp->rq_ifq)))
485 return (error);
486
487 if (red_list == rqp)
488 red_list = rqp->rq_next;
489 else {
490 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
491 if (tmp->rq_next == rqp) {
492 tmp->rq_next = rqp->rq_next;
493 break;
494 }
495 if (tmp == NULL)
496 printf("red_detach: no state found in red_list!\n");
497 }
498
499 red_destroy(rqp->rq_red);
500 FREE(rqp->rq_q, M_DEVBUF);
501 FREE(rqp, M_DEVBUF);
502 return (error);
503 }
504
505 /*
506 * red support routines
507 */
508
509 red_t *
510 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
511 int weight, inv_pmax, th_min, th_max;
512 int flags, pkttime;
513 {
514 red_t *rp;
515 int w, i;
516 int npkts_per_sec;
517
518 MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
519 if (rp == NULL)
520 return (NULL);
521 bzero(rp, sizeof(red_t));
522
523 rp->red_avg = 0;
524 rp->red_idle = 1;
525
526 if (weight == 0)
527 rp->red_weight = W_WEIGHT;
528 else
529 rp->red_weight = weight;
530 if (inv_pmax == 0)
531 rp->red_inv_pmax = default_inv_pmax;
532 else
533 rp->red_inv_pmax = inv_pmax;
534 if (th_min == 0)
535 rp->red_thmin = default_th_min;
536 else
537 rp->red_thmin = th_min;
538 if (th_max == 0)
539 rp->red_thmax = default_th_max;
540 else
541 rp->red_thmax = th_max;
542
543 rp->red_flags = flags;
544
545 if (pkttime == 0)
546 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
547 rp->red_pkttime = 800;
548 else
549 rp->red_pkttime = pkttime;
550
551 if (weight == 0) {
552 /* when the link is very slow, adjust red parameters */
553 npkts_per_sec = 1000000 / rp->red_pkttime;
554 if (npkts_per_sec < 50) {
555 /* up to about 400Kbps */
556 rp->red_weight = W_WEIGHT_2;
557 } else if (npkts_per_sec < 300) {
558 /* up to about 2.4Mbps */
559 rp->red_weight = W_WEIGHT_1;
560 }
561 }
562
563 /* calculate wshift. weight must be power of 2 */
564 w = rp->red_weight;
565 for (i = 0; w > 1; i++)
566 w = w >> 1;
567 rp->red_wshift = i;
568 w = 1 << rp->red_wshift;
569 if (w != rp->red_weight) {
570 printf("invalid weight value %d for red! use %d\n",
571 rp->red_weight, w);
572 rp->red_weight = w;
573 }
574
575 /*
576 * thmin_s and thmax_s are scaled versions of th_min and th_max
577 * to be compared with avg.
578 */
579 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
580 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
581
582 /*
583 * precompute probability denominator
584 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
585 */
586 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
587 * rp->red_inv_pmax) << FP_SHIFT;
588
589 /* allocate weight table */
590 rp->red_wtab = wtab_alloc(rp->red_weight);
591
592 microtime(&rp->red_last);
593 #ifdef ALTQ_FLOWVALVE
594 if (flags & REDF_FLOWVALVE)
595 rp->red_flowvalve = fv_alloc(rp);
596 /* if fv_alloc failes, flowvalve is just disabled */
597 #endif
598 return (rp);
599 }
600
601 void
602 red_destroy(rp)
603 red_t *rp;
604 {
605 #ifdef ALTQ_FLOWVALVE
606 if (rp->red_flowvalve != NULL)
607 fv_destroy(rp->red_flowvalve);
608 #endif
609 wtab_destroy(rp->red_wtab);
610 FREE(rp, M_DEVBUF);
611 }
612
613 void
614 red_getstats(rp, sp)
615 red_t *rp;
616 struct redstats *sp;
617 {
618 sp->q_avg = rp->red_avg >> rp->red_wshift;
619 sp->xmit_cnt = rp->red_stats.xmit_cnt;
620 sp->drop_cnt = rp->red_stats.drop_cnt;
621 sp->drop_forced = rp->red_stats.drop_forced;
622 sp->drop_unforced = rp->red_stats.drop_unforced;
623 sp->marked_packets = rp->red_stats.marked_packets;
624 }
625
626 /*
627 * enqueue routine:
628 *
629 * returns: 0 when successfully queued.
630 * ENOBUFS when drop occurs.
631 */
632 static int
633 red_enqueue(ifq, m, pktattr)
634 struct ifaltq *ifq;
635 struct mbuf *m;
636 struct altq_pktattr *pktattr;
637 {
638 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
639
640 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
641 return ENOBUFS;
642 ifq->ifq_len++;
643 return 0;
644 }
645
646 int
647 red_addq(rp, q, m, pktattr)
648 red_t *rp;
649 class_queue_t *q;
650 struct mbuf *m;
651 struct altq_pktattr *pktattr;
652 {
653 int avg, droptype;
654 int n;
655 #ifdef ALTQ_FLOWVALVE
656 struct fve *fve = NULL;
657
658 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
659 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
660 m_freem(m);
661 return (-1);
662 }
663 #endif
664
665 avg = rp->red_avg;
666
667 /*
668 * if we were idle, we pretend that n packets arrived during
669 * the idle period.
670 */
671 if (rp->red_idle) {
672 struct timeval now;
673 int t;
674
675 rp->red_idle = 0;
676 microtime(&now);
677 t = (now.tv_sec - rp->red_last.tv_sec);
678 if (t > 60) {
679 /*
680 * being idle for more than 1 minute, set avg to zero.
681 * this prevents t from overflow.
682 */
683 avg = 0;
684 } else {
685 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
686 n = t / rp->red_pkttime - 1;
687
688 /* the following line does (avg = (1 - Wq)^n * avg) */
689 if (n > 0)
690 avg = (avg >> FP_SHIFT) *
691 pow_w(rp->red_wtab, n);
692 }
693 }
694
695 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
696 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
697 rp->red_avg = avg; /* save the new value */
698
699 /*
700 * red_count keeps a tally of arriving traffic that has not
701 * been dropped.
702 */
703 rp->red_count++;
704
705 /* see if we drop early */
706 droptype = DTYPE_NODROP;
707 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
708 if (avg >= rp->red_thmax_s) {
709 /* avg >= th_max: forced drop */
710 droptype = DTYPE_FORCED;
711 } else if (rp->red_old == 0) {
712 /* first exceeds th_min */
713 rp->red_count = 1;
714 rp->red_old = 1;
715 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
716 rp->red_probd, rp->red_count)) {
717 /* mark or drop by red */
718 if ((rp->red_flags & REDF_ECN) &&
719 mark_ecn(m, pktattr, rp->red_flags)) {
720 /* successfully marked. do not drop. */
721 rp->red_count = 0;
722 #ifdef RED_STATS
723 rp->red_stats.marked_packets++;
724 #endif
725 } else {
726 /* unforced drop by red */
727 droptype = DTYPE_EARLY;
728 }
729 }
730 } else {
731 /* avg < th_min */
732 rp->red_old = 0;
733 }
734
735 /*
736 * if the queue length hits the hard limit, it's a forced drop.
737 */
738 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
739 droptype = DTYPE_FORCED;
740
741 #ifdef RED_RANDOM_DROP
742 /* if successful or forced drop, enqueue this packet. */
743 if (droptype != DTYPE_EARLY)
744 _addq(q, m);
745 #else
746 /* if successful, enqueue this packet. */
747 if (droptype == DTYPE_NODROP)
748 _addq(q, m);
749 #endif
750 if (droptype != DTYPE_NODROP) {
751 if (droptype == DTYPE_EARLY) {
752 /* drop the incoming packet */
753 #ifdef RED_STATS
754 rp->red_stats.drop_unforced++;
755 #endif
756 } else {
757 /* forced drop, select a victim packet in the queue. */
758 #ifdef RED_RANDOM_DROP
759 m = _getq_random(q);
760 #endif
761 #ifdef RED_STATS
762 rp->red_stats.drop_forced++;
763 #endif
764 }
765 #ifdef RED_STATS
766 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
767 #endif
768 rp->red_count = 0;
769 #ifdef ALTQ_FLOWVALVE
770 if (rp->red_flowvalve != NULL)
771 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
772 #endif
773 m_freem(m);
774 return (-1);
775 }
776 /* successfully queued */
777 #ifdef RED_STATS
778 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
779 #endif
780 return (0);
781 }
782
783 /*
784 * early-drop probability is calculated as follows:
785 * prob = p_max * (avg - th_min) / (th_max - th_min)
786 * prob_a = prob / (2 - count*prob)
787 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
788 * here prob_a increases as successive undrop count increases.
789 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
790 * becomes 1 when (count >= (2 / prob))).
791 */
792 int
793 drop_early(fp_len, fp_probd, count)
794 int fp_len; /* (avg - TH_MIN) in fixed-point */
795 int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
796 int count; /* how many successive undropped packets */
797 {
798 int d; /* denominator of drop-probability */
799
800 d = fp_probd - count * fp_len;
801 if (d <= 0)
802 /* count exceeds the hard limit: drop or mark */
803 return (1);
804
805 /*
806 * now the range of d is [1..600] in fixed-point. (when
807 * th_max-th_min=10 and p_max=1/30)
808 * drop probability = (avg - TH_MIN) / d
809 */
810
811 if ((random() % d) < fp_len) {
812 /* drop or mark */
813 return (1);
814 }
815 /* no drop/mark */
816 return (0);
817 }
818
819 /*
820 * try to mark CE bit to the packet.
821 * returns 1 if successfully marked, 0 otherwise.
822 */
823 int
824 mark_ecn(m, pktattr, flags)
825 struct mbuf *m;
826 struct altq_pktattr *pktattr;
827 int flags;
828 {
829 struct mbuf *m0;
830
831 if (pktattr == NULL ||
832 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
833 return (0);
834
835 /* verify that pattr_hdr is within the mbuf data */
836 for (m0 = m; m0 != NULL; m0 = m0->m_next)
837 if ((pktattr->pattr_hdr >= m0->m_data) &&
838 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
839 break;
840 if (m0 == NULL) {
841 /* ick, pattr_hdr is stale */
842 pktattr->pattr_af = AF_UNSPEC;
843 return (0);
844 }
845
846 switch (pktattr->pattr_af) {
847 case AF_INET:
848 if (flags & REDF_ECN4) {
849 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
850
851 if (ip->ip_v != 4)
852 return (0); /* version mismatch! */
853 if (ip->ip_tos & IPTOS_ECT) {
854 /* ECN-capable, mark ECN bit. */
855 if ((ip->ip_tos & IPTOS_CE) == 0) {
856 #if (IPTOS_CE == 0x01)
857 u_short sum;
858
859 ip->ip_tos |= IPTOS_CE;
860 /*
861 * optimized version when IPTOS_CE
862 * is 0x01.
863 * HC' = HC -1 when HC > 0
864 * = 0xfffe when HC = 0
865 */
866 sum = ntohs(ip->ip_sum);
867 if (sum == 0)
868 sum = 0xfffe;
869 else
870 sum -= 1;
871 ip->ip_sum = htons(sum);
872 #else /* IPTOS_CE != 0x01 */
873 long sum;
874
875 ip->ip_tos |= IPTOS_CE;
876 /*
877 * update checksum (from RFC1624)
878 * HC' = ~(~HC + ~m + m')
879 */
880 sum = ~ntohs(ip->ip_sum) & 0xffff;
881 sum += 0xffff + IPTOS_CE;
882 sum = (sum >> 16) + (sum & 0xffff);
883 sum += (sum >> 16); /* add carry */
884
885 ip->ip_sum = htons(~sum & 0xffff);
886 #endif /* IPTOS_CE != 0x01 */
887 }
888 return (1);
889 }
890 }
891 break;
892 #ifdef INET6
893 case AF_INET6:
894 if (flags & REDF_ECN6) {
895 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
896 u_int32_t flowlabel;
897
898 flowlabel = ntohl(ip6->ip6_flow);
899 if ((flowlabel >> 28) != 6)
900 return (0); /* version mismatch! */
901 if (flowlabel & (IPTOS_ECT << 20)) {
902 /* ECN-capable, mark ECN bit. */
903 flowlabel |= (IPTOS_CE << 20);
904 ip6->ip6_flow = htonl(flowlabel);
905 return (1);
906 }
907 }
908 break;
909 #endif /* INET6 */
910 }
911
912 /* not marked */
913 return (0);
914 }
915
916 /*
917 * dequeue routine:
918 * must be called in splimp.
919 *
920 * returns: mbuf dequeued.
921 * NULL when no packet is available in the queue.
922 */
923
924 static struct mbuf *
925 red_dequeue(ifq, op)
926 struct ifaltq *ifq;
927 int op;
928 {
929 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
930 struct mbuf *m;
931
932 if (op == ALTDQ_POLL)
933 return qhead(rqp->rq_q);
934
935 /* op == ALTDQ_REMOVE */
936 m = red_getq(rqp->rq_red, rqp->rq_q);
937 if (m != NULL)
938 ifq->ifq_len--;
939 return (m);
940 }
941
942 struct mbuf *
943 red_getq(rp, q)
944 red_t *rp;
945 class_queue_t *q;
946 {
947 struct mbuf *m;
948
949 if ((m = _getq(q)) == NULL) {
950 if (rp->red_idle == 0) {
951 rp->red_idle = 1;
952 microtime(&rp->red_last);
953 }
954 return NULL;
955 }
956
957 rp->red_idle = 0;
958 return (m);
959 }
960
961 static int
962 red_request(ifq, req, arg)
963 struct ifaltq *ifq;
964 int req;
965 void *arg;
966 {
967 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
968
969 switch (req) {
970 case ALTRQ_PURGE:
971 red_purgeq(rqp);
972 break;
973 }
974 return (0);
975 }
976
977 static void
978 red_purgeq(rqp)
979 red_queue_t *rqp;
980 {
981 _flushq(rqp->rq_q);
982 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
983 rqp->rq_ifq->ifq_len = 0;
984 }
985
986
987 /*
988 * helper routine to calibrate avg during idle.
989 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
990 * here Wq = 1/weight and the code assumes Wq is close to zero.
991 *
992 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
993 */
994 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
995
996 struct wtab *
997 wtab_alloc(weight)
998 int weight;
999 {
1000 struct wtab *w;
1001 int i;
1002
1003 for (w = wtab_list; w != NULL; w = w->w_next)
1004 if (w->w_weight == weight) {
1005 w->w_refcount++;
1006 return (w);
1007 }
1008
1009 MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
1010 if (w == NULL)
1011 panic("wtab_alloc: malloc failed!");
1012 bzero(w, sizeof(struct wtab));
1013 w->w_weight = weight;
1014 w->w_refcount = 1;
1015 w->w_next = wtab_list;
1016 wtab_list = w;
1017
1018 /* initialize the weight table */
1019 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
1020 for (i = 1; i < 32; i++) {
1021 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
1022 if (w->w_tab[i] == 0 && w->w_param_max == 0)
1023 w->w_param_max = 1 << i;
1024 }
1025
1026 return (w);
1027 }
1028
1029 int
1030 wtab_destroy(w)
1031 struct wtab *w;
1032 {
1033 struct wtab *prev;
1034
1035 if (--w->w_refcount > 0)
1036 return (0);
1037
1038 if (wtab_list == w)
1039 wtab_list = w->w_next;
1040 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
1041 if (prev->w_next == w) {
1042 prev->w_next = w->w_next;
1043 break;
1044 }
1045
1046 FREE(w, M_DEVBUF);
1047 return (0);
1048 }
1049
1050 int32_t
1051 pow_w(w, n)
1052 struct wtab *w;
1053 int n;
1054 {
1055 int i, bit;
1056 int32_t val;
1057
1058 if (n >= w->w_param_max)
1059 return (0);
1060
1061 val = 1 << FP_SHIFT;
1062 if (n <= 0)
1063 return (val);
1064
1065 bit = 1;
1066 i = 0;
1067 while (n) {
1068 if (n & bit) {
1069 val = (val * w->w_tab[i]) >> FP_SHIFT;
1070 n &= ~bit;
1071 }
1072 i++;
1073 bit <<= 1;
1074 }
1075 return (val);
1076 }
1077
1078 #ifdef ALTQ_FLOWVALVE
1079
1080 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1081 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1082 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1083 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1084 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1085 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1086
1087 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1088 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1089
1090 #define FV_N 10 /* update fve_f every FV_N packets */
1091
1092 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1093 #define FV_TTHRESH 3 /* time threshold to delete fve */
1094 #define FV_ALPHA 5 /* extra packet count */
1095
1096 #define FV_STATS
1097
1098 #if (__FreeBSD_version > 300000)
1099 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1100 #else
1101 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1102 #endif
1103
1104 /*
1105 * Brtt table: 127 entry table to convert drop rate (p) to
1106 * the corresponding bandwidth fraction (f)
1107 * the following equation is implemented to use scaled values,
1108 * fve_p and fve_f, in the fixed point format.
1109 *
1110 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1111 * f = Brtt(p) / (max_th + alpha)
1112 */
1113 #define BRTT_SIZE 128
1114 #define BRTT_SHIFT 12
1115 #define BRTT_MASK 0x0007f000
1116 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1117
1118 const int brtt_tab[BRTT_SIZE] = {
1119 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1120 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1121 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1122 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1123 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1124 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1125 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1126 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1127 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1128 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1129 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1130 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1131 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1132 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1133 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1134 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1135 };
1136
1137 static __inline struct fve *
1138 flowlist_lookup(fv, pktattr, now)
1139 struct flowvalve *fv;
1140 struct altq_pktattr *pktattr;
1141 struct timeval *now;
1142 {
1143 struct fve *fve;
1144 int flows;
1145 struct ip *ip;
1146 #ifdef INET6
1147 struct ip6_hdr *ip6;
1148 #endif
1149 struct timeval tthresh;
1150
1151 if (pktattr == NULL)
1152 return (NULL);
1153
1154 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1155 flows = 0;
1156 /*
1157 * search the flow list
1158 */
1159 switch (pktattr->pattr_af) {
1160 case AF_INET:
1161 ip = (struct ip *)pktattr->pattr_hdr;
1162 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1163 if (fve->fve_lastdrop.tv_sec == 0)
1164 break;
1165 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1166 fve->fve_lastdrop.tv_sec = 0;
1167 break;
1168 }
1169 if (fve->fve_flow.flow_af == AF_INET &&
1170 fve->fve_flow.flow_ip.ip_src.s_addr ==
1171 ip->ip_src.s_addr &&
1172 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1173 ip->ip_dst.s_addr)
1174 return (fve);
1175 flows++;
1176 }
1177 break;
1178 #ifdef INET6
1179 case AF_INET6:
1180 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1181 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1182 if (fve->fve_lastdrop.tv_sec == 0)
1183 break;
1184 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1185 fve->fve_lastdrop.tv_sec = 0;
1186 break;
1187 }
1188 if (fve->fve_flow.flow_af == AF_INET6 &&
1189 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1190 &ip6->ip6_src) &&
1191 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1192 &ip6->ip6_dst))
1193 return (fve);
1194 flows++;
1195 }
1196 break;
1197 #endif /* INET6 */
1198
1199 default:
1200 /* unknown protocol. no drop. */
1201 return (NULL);
1202 }
1203 fv->fv_flows = flows; /* save the number of active fve's */
1204 return (NULL);
1205 }
1206
1207 static __inline struct fve *
1208 flowlist_reclaim(fv, pktattr)
1209 struct flowvalve *fv;
1210 struct altq_pktattr *pktattr;
1211 {
1212 struct fve *fve;
1213 struct ip *ip;
1214 #ifdef INET6
1215 struct ip6_hdr *ip6;
1216 #endif
1217
1218 /*
1219 * get an entry from the tail of the LRU list.
1220 */
1221 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1222
1223 switch (pktattr->pattr_af) {
1224 case AF_INET:
1225 ip = (struct ip *)pktattr->pattr_hdr;
1226 fve->fve_flow.flow_af = AF_INET;
1227 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1228 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1229 break;
1230 #ifdef INET6
1231 case AF_INET6:
1232 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1233 fve->fve_flow.flow_af = AF_INET6;
1234 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1235 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1236 break;
1237 #endif
1238 }
1239
1240 fve->fve_state = Green;
1241 fve->fve_p = 0.0;
1242 fve->fve_f = 0.0;
1243 fve->fve_ifseq = fv->fv_ifseq - 1;
1244 fve->fve_count = 0;
1245
1246 fv->fv_flows++;
1247 #ifdef FV_STATS
1248 fv->fv_stats.alloc++;
1249 #endif
1250 return (fve);
1251 }
1252
1253 static __inline void
1254 flowlist_move_to_head(fv, fve)
1255 struct flowvalve *fv;
1256 struct fve *fve;
1257 {
1258 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1259 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1260 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1261 }
1262 }
1263
1264 /*
1265 * allocate flowvalve structure
1266 */
1267 static struct flowvalve *
1268 fv_alloc(rp)
1269 struct red *rp;
1270 {
1271 struct flowvalve *fv;
1272 struct fve *fve;
1273 int i, num;
1274
1275 num = FV_FLOWLISTSIZE;
1276 MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
1277 M_DEVBUF, M_WAITOK);
1278 if (fv == NULL)
1279 return (NULL);
1280 bzero(fv, sizeof(struct flowvalve));
1281
1282 MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
1283 M_DEVBUF, M_WAITOK);
1284 if (fv->fv_fves == NULL) {
1285 FREE(fv, M_DEVBUF);
1286 return (NULL);
1287 }
1288 bzero(fv->fv_fves, sizeof(struct fve) * num);
1289
1290 fv->fv_flows = 0;
1291 TAILQ_INIT(&fv->fv_flowlist);
1292 for (i = 0; i < num; i++) {
1293 fve = &fv->fv_fves[i];
1294 fve->fve_lastdrop.tv_sec = 0;
1295 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1296 }
1297
1298 /* initialize drop rate threshold in scaled fixed-point */
1299 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1300
1301 /* initialize drop rate to fraction table */
1302 MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
1303 M_DEVBUF, M_WAITOK);
1304 if (fv->fv_p2ftab == NULL) {
1305 FREE(fv->fv_fves, M_DEVBUF);
1306 FREE(fv, M_DEVBUF);
1307 return (NULL);
1308 }
1309 /*
1310 * create the p2f table.
1311 * (shift is used to keep the precision)
1312 */
1313 for (i = 1; i < BRTT_SIZE; i++) {
1314 int f;
1315
1316 f = brtt_tab[i] << 8;
1317 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1318 }
1319
1320 return (fv);
1321 }
1322
1323 static void fv_destroy(fv)
1324 struct flowvalve *fv;
1325 {
1326 FREE(fv->fv_p2ftab, M_DEVBUF);
1327 FREE(fv->fv_fves, M_DEVBUF);
1328 FREE(fv, M_DEVBUF);
1329 }
1330
1331 static __inline int
1332 fv_p2f(fv, p)
1333 struct flowvalve *fv;
1334 int p;
1335 {
1336 int val, f;
1337
1338 if (p >= BRTT_PMAX)
1339 f = fv->fv_p2ftab[BRTT_SIZE-1];
1340 else if ((val = (p & BRTT_MASK)))
1341 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1342 else
1343 f = fv->fv_p2ftab[1];
1344 return (f);
1345 }
1346
1347 /*
1348 * check if an arriving packet should be pre-dropped.
1349 * called from red_addq() when a packet arrives.
1350 * returns 1 when the packet should be pre-dropped.
1351 * should be called in splimp.
1352 */
1353 static int
1354 fv_checkflow(fv, pktattr, fcache)
1355 struct flowvalve *fv;
1356 struct altq_pktattr *pktattr;
1357 struct fve **fcache;
1358 {
1359 struct fve *fve;
1360 struct timeval now;
1361
1362 fv->fv_ifseq++;
1363 FV_TIMESTAMP(&now);
1364
1365 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1366 /* no matching entry in the flowlist */
1367 return (0);
1368
1369 *fcache = fve;
1370
1371 /* update fraction f for every FV_N packets */
1372 if (++fve->fve_count == FV_N) {
1373 /*
1374 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1375 */
1376 fve->fve_f =
1377 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1378 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1379 fve->fve_ifseq = fv->fv_ifseq;
1380 fve->fve_count = 0;
1381 }
1382
1383 /*
1384 * overpumping test
1385 */
1386 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1387 int fthresh;
1388
1389 /* calculate a threshold */
1390 fthresh = fv_p2f(fv, fve->fve_p);
1391 if (fve->fve_f > fthresh)
1392 fve->fve_state = Red;
1393 }
1394
1395 if (fve->fve_state == Red) {
1396 /*
1397 * backoff test
1398 */
1399 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1400 /* no drop for at least FV_BACKOFFTHRESH sec */
1401 fve->fve_p = 0;
1402 fve->fve_state = Green;
1403 #ifdef FV_STATS
1404 fv->fv_stats.escape++;
1405 #endif
1406 } else {
1407 /* block this flow */
1408 flowlist_move_to_head(fv, fve);
1409 fve->fve_lastdrop = now;
1410 #ifdef FV_STATS
1411 fv->fv_stats.predrop++;
1412 #endif
1413 return (1);
1414 }
1415 }
1416
1417 /*
1418 * p = (1 - Wp) * p
1419 */
1420 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1421 if (fve->fve_p < 0)
1422 fve->fve_p = 0;
1423 #ifdef FV_STATS
1424 fv->fv_stats.pass++;
1425 #endif
1426 return (0);
1427 }
1428
1429 /*
1430 * called from red_addq when a packet is dropped by red.
1431 * should be called in splimp.
1432 */
1433 static void fv_dropbyred(fv, pktattr, fcache)
1434 struct flowvalve *fv;
1435 struct altq_pktattr *pktattr;
1436 struct fve *fcache;
1437 {
1438 struct fve *fve;
1439 struct timeval now;
1440
1441 if (pktattr == NULL)
1442 return;
1443 FV_TIMESTAMP(&now);
1444
1445 if (fcache != NULL)
1446 /* the fve of this packet is already cached */
1447 fve = fcache;
1448 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1449 fve = flowlist_reclaim(fv, pktattr);
1450
1451 flowlist_move_to_head(fv, fve);
1452
1453 /*
1454 * update p: the following line cancels the update
1455 * in fv_checkflow() and calculate
1456 * p = Wp + (1 - Wp) * p
1457 */
1458 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1459
1460 fve->fve_lastdrop = now;
1461 }
1462
1463 #endif /* ALTQ_FLOWVALVE */
1464
1465 #ifdef KLD_MODULE
1466
1467 static struct altqsw red_sw =
1468 {"red", redopen, redclose, redioctl};
1469
1470 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1471
1472 #endif /* KLD_MODULE */
1473
1474 #endif /* ALTQ_RED */
1475