altq_red.c revision 1.14 1 /* $NetBSD: altq_red.c,v 1.14 2006/04/23 06:46:40 christos Exp $ */
2 /* $KAME: altq_red.c,v 1.9 2002/01/07 11:25:40 kjc Exp $ */
3
4 /*
5 * Copyright (C) 1997-2000
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.14 2006/04/23 06:46:40 christos Exp $");
65
66 #if defined(__FreeBSD__) || defined(__NetBSD__)
67 #include "opt_altq.h"
68 #if (__FreeBSD__ != 2)
69 #include "opt_inet.h"
70 #ifdef __FreeBSD__
71 #include "opt_inet6.h"
72 #endif
73 #endif
74 #endif /* __FreeBSD__ || __NetBSD__ */
75 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
76
77 #include <sys/param.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/socket.h>
81 #include <sys/sockio.h>
82 #include <sys/systm.h>
83 #include <sys/proc.h>
84 #include <sys/errno.h>
85 #include <sys/kernel.h>
86 #ifdef ALTQ_FLOWVALVE
87 #include <sys/queue.h>
88 #include <sys/time.h>
89 #endif
90
91 #include <net/if.h>
92 #include <net/if_types.h>
93
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #ifdef INET6
98 #include <netinet/ip6.h>
99 #endif
100
101 #include <altq/altq.h>
102 #include <altq/altq_conf.h>
103 #include <altq/altq_red.h>
104 #ifdef ALTQ_FLOWVALVE
105 #include <altq/altq_flowvalve.h>
106 #endif
107
108 /*
109 * ALTQ/RED (Random Early Detection) implementation using 32-bit
110 * fixed-point calculation.
111 *
112 * written by kjc using the ns code as a reference.
113 * you can learn more about red and ns from Sally's home page at
114 * http://www-nrg.ee.lbl.gov/floyd/
115 *
116 * most of the red parameter values are fixed in this implementation
117 * to prevent fixed-point overflow/underflow.
118 * if you change the parameters, watch out for overflow/underflow!
119 *
120 * the parameters used are recommended values by Sally.
121 * the corresponding ns config looks:
122 * q_weight=0.00195
123 * minthresh=5 maxthresh=15 queue-size=60
124 * linterm=30
125 * dropmech=drop-tail
126 * bytes=false (can't be handled by 32-bit fixed-point)
127 * doubleq=false dqthresh=false
128 * wait=true
129 */
130 /*
131 * alternative red parameters for a slow link.
132 *
133 * assume the queue length becomes from zero to L and keeps L, it takes
134 * N packets for q_avg to reach 63% of L.
135 * when q_weight is 0.002, N is about 500 packets.
136 * for a slow link like dial-up, 500 packets takes more than 1 minute!
137 * when q_weight is 0.008, N is about 127 packets.
138 * when q_weight is 0.016, N is about 63 packets.
139 * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
140 * are allowed for 0.016.
141 * see Sally's paper for more details.
142 */
143 /* normal red parameters */
144 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
145 /* q_weight = 0.00195 */
146
147 /* red parameters for a slow link */
148 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
149 /* q_weight = 0.0078125 */
150
151 /* red parameters for a very slow link (e.g., dialup) */
152 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
153 /* q_weight = 0.015625 */
154
155 /* fixed-point uses 12-bit decimal places */
156 #define FP_SHIFT 12 /* fixed-point shift */
157
158 /* red parameters for drop probability */
159 #define INV_P_MAX 10 /* inverse of max drop probability */
160 #define TH_MIN 5 /* min threshold */
161 #define TH_MAX 15 /* max threshold */
162
163 #define RED_LIMIT 60 /* default max queue length */
164
165 /*
166 * our default policy for forced-drop is drop-tail.
167 * (in altq-1.1.2 or earlier, the default was random-drop.
168 * but it makes more sense to punish the cause of the surge.)
169 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
170 */
171
172 #ifdef ALTQ_FLOWVALVE
173 /*
174 * flow-valve is an extension to protect red from unresponsive flows
175 * and to promote end-to-end congestion control.
176 * flow-valve observes the average drop rates of the flows that have
177 * experienced packet drops in the recent past.
178 * when the average drop rate exceeds the threshold, the flow is
179 * blocked by the flow-valve. the trapped flow should back off
180 * exponentially to escape from the flow-valve.
181 */
182 #ifdef RED_RANDOM_DROP
183 #error "random-drop can't be used with flow-valve!"
184 #endif
185 #endif /* ALTQ_FLOWVALVE */
186
187 /* red_list keeps all red_queue_t's allocated. */
188 static red_queue_t *red_list = NULL;
189
190 /* default red parameter values */
191 static int default_th_min = TH_MIN;
192 static int default_th_max = TH_MAX;
193 static int default_inv_pmax = INV_P_MAX;
194
195 /* internal function prototypes */
196 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
197 struct altq_pktattr *));
198 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
199 static int red_request __P((struct ifaltq *, int, void *));
200 static void red_purgeq __P((red_queue_t *));
201 static int red_detach __P((red_queue_t *));
202 #ifdef ALTQ_FLOWVALVE
203 static inline struct fve *flowlist_lookup __P((struct flowvalve *,
204 struct altq_pktattr *, struct timeval *));
205 static inline struct fve *flowlist_reclaim __P((struct flowvalve *,
206 struct altq_pktattr *));
207 static inline void flowlist_move_to_head __P((struct flowvalve *,
208 struct fve *));
209 static inline int fv_p2f __P((struct flowvalve *, int));
210 static struct flowvalve *fv_alloc __P((struct red *));
211 static void fv_destroy __P((struct flowvalve *));
212 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
213 struct fve **));
214 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
215 struct fve *));
216 #endif
217
218 /*
219 * red device interface
220 */
221 altqdev_decl(red);
222
223 int
224 redopen(dev, flag, fmt, l)
225 dev_t dev;
226 int flag, fmt;
227 struct lwp *l;
228 {
229 /* everything will be done when the queueing scheme is attached. */
230 return 0;
231 }
232
233 int
234 redclose(dev, flag, fmt, l)
235 dev_t dev;
236 int flag, fmt;
237 struct lwp *l;
238 {
239 red_queue_t *rqp;
240 int err, error = 0;
241
242 while ((rqp = red_list) != NULL) {
243 /* destroy all */
244 err = red_detach(rqp);
245 if (err != 0 && error == 0)
246 error = err;
247 }
248
249 return error;
250 }
251
252 int
253 redioctl(dev, cmd, addr, flag, l)
254 dev_t dev;
255 ioctlcmd_t cmd;
256 caddr_t addr;
257 int flag;
258 struct lwp *l;
259 {
260 red_queue_t *rqp;
261 struct red_interface *ifacep;
262 struct ifnet *ifp;
263 struct proc *p = l->l_proc;
264 int error = 0;
265
266 /* check super-user privilege */
267 switch (cmd) {
268 case RED_GETSTATS:
269 break;
270 default:
271 #if (__FreeBSD_version > 400000)
272 if ((error = suser(p)) != 0)
273 #else
274 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
275 #endif
276 return (error);
277 break;
278 }
279
280 switch (cmd) {
281
282 case RED_ENABLE:
283 ifacep = (struct red_interface *)addr;
284 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
285 error = EBADF;
286 break;
287 }
288 error = altq_enable(rqp->rq_ifq);
289 break;
290
291 case RED_DISABLE:
292 ifacep = (struct red_interface *)addr;
293 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
294 error = EBADF;
295 break;
296 }
297 error = altq_disable(rqp->rq_ifq);
298 break;
299
300 case RED_IF_ATTACH:
301 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
302 if (ifp == NULL) {
303 error = ENXIO;
304 break;
305 }
306
307 /* allocate and initialize red_queue_t */
308 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
309 if (rqp == NULL) {
310 error = ENOMEM;
311 break;
312 }
313
314 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
315 M_WAITOK|M_ZERO);
316 if (rqp->rq_q == NULL) {
317 free(rqp, M_DEVBUF);
318 error = ENOMEM;
319 break;
320 }
321
322 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
323 if (rqp->rq_red == NULL) {
324 FREE(rqp->rq_q, M_DEVBUF);
325 FREE(rqp, M_DEVBUF);
326 error = ENOMEM;
327 break;
328 }
329
330 rqp->rq_ifq = &ifp->if_snd;
331 qtail(rqp->rq_q) = NULL;
332 qlen(rqp->rq_q) = 0;
333 qlimit(rqp->rq_q) = RED_LIMIT;
334 qtype(rqp->rq_q) = Q_RED;
335
336 /*
337 * set RED to this ifnet structure.
338 */
339 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
340 red_enqueue, red_dequeue, red_request,
341 NULL, NULL);
342 if (error) {
343 red_destroy(rqp->rq_red);
344 FREE(rqp->rq_q, M_DEVBUF);
345 FREE(rqp, M_DEVBUF);
346 break;
347 }
348
349 /* add this state to the red list */
350 rqp->rq_next = red_list;
351 red_list = rqp;
352 break;
353
354 case RED_IF_DETACH:
355 ifacep = (struct red_interface *)addr;
356 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
357 error = EBADF;
358 break;
359 }
360 error = red_detach(rqp);
361 break;
362
363 case RED_GETSTATS:
364 do {
365 struct red_stats *q_stats;
366 red_t *rp;
367
368 q_stats = (struct red_stats *)addr;
369 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
370 ALTQT_RED)) == NULL) {
371 error = EBADF;
372 break;
373 }
374
375 q_stats->q_len = qlen(rqp->rq_q);
376 q_stats->q_limit = qlimit(rqp->rq_q);
377
378 rp = rqp->rq_red;
379 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
380 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
381 q_stats->drop_cnt = rp->red_stats.drop_cnt;
382 q_stats->drop_forced = rp->red_stats.drop_forced;
383 q_stats->drop_unforced = rp->red_stats.drop_unforced;
384 q_stats->marked_packets = rp->red_stats.marked_packets;
385
386 q_stats->weight = rp->red_weight;
387 q_stats->inv_pmax = rp->red_inv_pmax;
388 q_stats->th_min = rp->red_thmin;
389 q_stats->th_max = rp->red_thmax;
390
391 #ifdef ALTQ_FLOWVALVE
392 if (rp->red_flowvalve != NULL) {
393 struct flowvalve *fv = rp->red_flowvalve;
394 q_stats->fv_flows = fv->fv_flows;
395 q_stats->fv_pass = fv->fv_stats.pass;
396 q_stats->fv_predrop = fv->fv_stats.predrop;
397 q_stats->fv_alloc = fv->fv_stats.alloc;
398 q_stats->fv_escape = fv->fv_stats.escape;
399 } else {
400 #endif /* ALTQ_FLOWVALVE */
401 q_stats->fv_flows = 0;
402 q_stats->fv_pass = 0;
403 q_stats->fv_predrop = 0;
404 q_stats->fv_alloc = 0;
405 q_stats->fv_escape = 0;
406 #ifdef ALTQ_FLOWVALVE
407 }
408 #endif /* ALTQ_FLOWVALVE */
409 } while (0);
410 break;
411
412 case RED_CONFIG:
413 do {
414 struct red_conf *fc;
415 red_t *new;
416 int s, limit;
417
418 fc = (struct red_conf *)addr;
419 if ((rqp = altq_lookup(fc->iface.red_ifname,
420 ALTQT_RED)) == NULL) {
421 error = EBADF;
422 break;
423 }
424 new = red_alloc(fc->red_weight,
425 fc->red_inv_pmax,
426 fc->red_thmin,
427 fc->red_thmax,
428 fc->red_flags,
429 fc->red_pkttime);
430 if (new == NULL) {
431 error = ENOMEM;
432 break;
433 }
434
435 s = splnet();
436 red_purgeq(rqp);
437 limit = fc->red_limit;
438 if (limit < fc->red_thmax)
439 limit = fc->red_thmax;
440 qlimit(rqp->rq_q) = limit;
441 fc->red_limit = limit; /* write back the new value */
442
443 red_destroy(rqp->rq_red);
444 rqp->rq_red = new;
445
446 splx(s);
447
448 /* write back new values */
449 fc->red_limit = limit;
450 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
451 fc->red_thmin = rqp->rq_red->red_thmin;
452 fc->red_thmax = rqp->rq_red->red_thmax;
453
454 } while (0);
455 break;
456
457 case RED_SETDEFAULTS:
458 do {
459 struct redparams *rp;
460
461 rp = (struct redparams *)addr;
462
463 default_th_min = rp->th_min;
464 default_th_max = rp->th_max;
465 default_inv_pmax = rp->inv_pmax;
466 } while (0);
467 break;
468
469 default:
470 error = EINVAL;
471 break;
472 }
473 return error;
474 }
475
476 static int
477 red_detach(rqp)
478 red_queue_t *rqp;
479 {
480 red_queue_t *tmp;
481 int error = 0;
482
483 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
484 altq_disable(rqp->rq_ifq);
485
486 if ((error = altq_detach(rqp->rq_ifq)))
487 return (error);
488
489 if (red_list == rqp)
490 red_list = rqp->rq_next;
491 else {
492 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
493 if (tmp->rq_next == rqp) {
494 tmp->rq_next = rqp->rq_next;
495 break;
496 }
497 if (tmp == NULL)
498 printf("red_detach: no state found in red_list!\n");
499 }
500
501 red_destroy(rqp->rq_red);
502 FREE(rqp->rq_q, M_DEVBUF);
503 FREE(rqp, M_DEVBUF);
504 return (error);
505 }
506
507 /*
508 * red support routines
509 */
510
511 red_t *
512 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
513 int weight, inv_pmax, th_min, th_max;
514 int flags, pkttime;
515 {
516 red_t *rp;
517 int w, i;
518 int npkts_per_sec;
519
520 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
521 if (rp == NULL)
522 return (NULL);
523
524 rp->red_avg = 0;
525 rp->red_idle = 1;
526
527 if (weight == 0)
528 rp->red_weight = W_WEIGHT;
529 else
530 rp->red_weight = weight;
531 if (inv_pmax == 0)
532 rp->red_inv_pmax = default_inv_pmax;
533 else
534 rp->red_inv_pmax = inv_pmax;
535 if (th_min == 0)
536 rp->red_thmin = default_th_min;
537 else
538 rp->red_thmin = th_min;
539 if (th_max == 0)
540 rp->red_thmax = default_th_max;
541 else
542 rp->red_thmax = th_max;
543
544 rp->red_flags = flags;
545
546 if (pkttime == 0)
547 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
548 rp->red_pkttime = 800;
549 else
550 rp->red_pkttime = pkttime;
551
552 if (weight == 0) {
553 /* when the link is very slow, adjust red parameters */
554 npkts_per_sec = 1000000 / rp->red_pkttime;
555 if (npkts_per_sec < 50) {
556 /* up to about 400Kbps */
557 rp->red_weight = W_WEIGHT_2;
558 } else if (npkts_per_sec < 300) {
559 /* up to about 2.4Mbps */
560 rp->red_weight = W_WEIGHT_1;
561 }
562 }
563
564 /* calculate wshift. weight must be power of 2 */
565 w = rp->red_weight;
566 for (i = 0; w > 1; i++)
567 w = w >> 1;
568 rp->red_wshift = i;
569 w = 1 << rp->red_wshift;
570 if (w != rp->red_weight) {
571 printf("invalid weight value %d for red! use %d\n",
572 rp->red_weight, w);
573 rp->red_weight = w;
574 }
575
576 /*
577 * thmin_s and thmax_s are scaled versions of th_min and th_max
578 * to be compared with avg.
579 */
580 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
581 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
582
583 /*
584 * precompute probability denominator
585 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
586 */
587 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
588 * rp->red_inv_pmax) << FP_SHIFT;
589
590 /* allocate weight table */
591 rp->red_wtab = wtab_alloc(rp->red_weight);
592
593 microtime(&rp->red_last);
594 #ifdef ALTQ_FLOWVALVE
595 if (flags & REDF_FLOWVALVE)
596 rp->red_flowvalve = fv_alloc(rp);
597 /* if fv_alloc failes, flowvalve is just disabled */
598 #endif
599 return (rp);
600 }
601
602 void
603 red_destroy(rp)
604 red_t *rp;
605 {
606 #ifdef ALTQ_FLOWVALVE
607 if (rp->red_flowvalve != NULL)
608 fv_destroy(rp->red_flowvalve);
609 #endif
610 wtab_destroy(rp->red_wtab);
611 FREE(rp, M_DEVBUF);
612 }
613
614 void
615 red_getstats(rp, sp)
616 red_t *rp;
617 struct redstats *sp;
618 {
619 sp->q_avg = rp->red_avg >> rp->red_wshift;
620 sp->xmit_cnt = rp->red_stats.xmit_cnt;
621 sp->drop_cnt = rp->red_stats.drop_cnt;
622 sp->drop_forced = rp->red_stats.drop_forced;
623 sp->drop_unforced = rp->red_stats.drop_unforced;
624 sp->marked_packets = rp->red_stats.marked_packets;
625 }
626
627 /*
628 * enqueue routine:
629 *
630 * returns: 0 when successfully queued.
631 * ENOBUFS when drop occurs.
632 */
633 static int
634 red_enqueue(ifq, m, pktattr)
635 struct ifaltq *ifq;
636 struct mbuf *m;
637 struct altq_pktattr *pktattr;
638 {
639 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
640
641 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
642 return ENOBUFS;
643 ifq->ifq_len++;
644 return 0;
645 }
646
647 int
648 red_addq(rp, q, m, pktattr)
649 red_t *rp;
650 class_queue_t *q;
651 struct mbuf *m;
652 struct altq_pktattr *pktattr;
653 {
654 int avg, droptype;
655 int n;
656 #ifdef ALTQ_FLOWVALVE
657 struct fve *fve = NULL;
658
659 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
660 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
661 m_freem(m);
662 return (-1);
663 }
664 #endif
665
666 avg = rp->red_avg;
667
668 /*
669 * if we were idle, we pretend that n packets arrived during
670 * the idle period.
671 */
672 if (rp->red_idle) {
673 struct timeval now;
674 int t;
675
676 rp->red_idle = 0;
677 microtime(&now);
678 t = (now.tv_sec - rp->red_last.tv_sec);
679 if (t > 60) {
680 /*
681 * being idle for more than 1 minute, set avg to zero.
682 * this prevents t from overflow.
683 */
684 avg = 0;
685 } else {
686 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
687 n = t / rp->red_pkttime - 1;
688
689 /* the following line does (avg = (1 - Wq)^n * avg) */
690 if (n > 0)
691 avg = (avg >> FP_SHIFT) *
692 pow_w(rp->red_wtab, n);
693 }
694 }
695
696 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
697 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
698 rp->red_avg = avg; /* save the new value */
699
700 /*
701 * red_count keeps a tally of arriving traffic that has not
702 * been dropped.
703 */
704 rp->red_count++;
705
706 /* see if we drop early */
707 droptype = DTYPE_NODROP;
708 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
709 if (avg >= rp->red_thmax_s) {
710 /* avg >= th_max: forced drop */
711 droptype = DTYPE_FORCED;
712 } else if (rp->red_old == 0) {
713 /* first exceeds th_min */
714 rp->red_count = 1;
715 rp->red_old = 1;
716 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
717 rp->red_probd, rp->red_count)) {
718 /* mark or drop by red */
719 if ((rp->red_flags & REDF_ECN) &&
720 mark_ecn(m, pktattr, rp->red_flags)) {
721 /* successfully marked. do not drop. */
722 rp->red_count = 0;
723 #ifdef RED_STATS
724 rp->red_stats.marked_packets++;
725 #endif
726 } else {
727 /* unforced drop by red */
728 droptype = DTYPE_EARLY;
729 }
730 }
731 } else {
732 /* avg < th_min */
733 rp->red_old = 0;
734 }
735
736 /*
737 * if the queue length hits the hard limit, it's a forced drop.
738 */
739 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
740 droptype = DTYPE_FORCED;
741
742 #ifdef RED_RANDOM_DROP
743 /* if successful or forced drop, enqueue this packet. */
744 if (droptype != DTYPE_EARLY)
745 _addq(q, m);
746 #else
747 /* if successful, enqueue this packet. */
748 if (droptype == DTYPE_NODROP)
749 _addq(q, m);
750 #endif
751 if (droptype != DTYPE_NODROP) {
752 if (droptype == DTYPE_EARLY) {
753 /* drop the incoming packet */
754 #ifdef RED_STATS
755 rp->red_stats.drop_unforced++;
756 #endif
757 } else {
758 /* forced drop, select a victim packet in the queue. */
759 #ifdef RED_RANDOM_DROP
760 m = _getq_random(q);
761 #endif
762 #ifdef RED_STATS
763 rp->red_stats.drop_forced++;
764 #endif
765 }
766 #ifdef RED_STATS
767 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
768 #endif
769 rp->red_count = 0;
770 #ifdef ALTQ_FLOWVALVE
771 if (rp->red_flowvalve != NULL)
772 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
773 #endif
774 m_freem(m);
775 return (-1);
776 }
777 /* successfully queued */
778 #ifdef RED_STATS
779 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
780 #endif
781 return (0);
782 }
783
784 /*
785 * early-drop probability is calculated as follows:
786 * prob = p_max * (avg - th_min) / (th_max - th_min)
787 * prob_a = prob / (2 - count*prob)
788 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
789 * here prob_a increases as successive undrop count increases.
790 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
791 * becomes 1 when (count >= (2 / prob))).
792 */
793 int
794 drop_early(fp_len, fp_probd, count)
795 int fp_len; /* (avg - TH_MIN) in fixed-point */
796 int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
797 int count; /* how many successive undropped packets */
798 {
799 int d; /* denominator of drop-probability */
800
801 d = fp_probd - count * fp_len;
802 if (d <= 0)
803 /* count exceeds the hard limit: drop or mark */
804 return (1);
805
806 /*
807 * now the range of d is [1..600] in fixed-point. (when
808 * th_max-th_min=10 and p_max=1/30)
809 * drop probability = (avg - TH_MIN) / d
810 */
811
812 if ((random() % d) < fp_len) {
813 /* drop or mark */
814 return (1);
815 }
816 /* no drop/mark */
817 return (0);
818 }
819
820 /*
821 * try to mark CE bit to the packet.
822 * returns 1 if successfully marked, 0 otherwise.
823 */
824 int
825 mark_ecn(m, pktattr, flags)
826 struct mbuf *m;
827 struct altq_pktattr *pktattr;
828 int flags;
829 {
830 struct mbuf *m0;
831
832 if (pktattr == NULL ||
833 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
834 return (0);
835
836 /* verify that pattr_hdr is within the mbuf data */
837 for (m0 = m; m0 != NULL; m0 = m0->m_next)
838 if ((pktattr->pattr_hdr >= m0->m_data) &&
839 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
840 break;
841 if (m0 == NULL) {
842 /* ick, pattr_hdr is stale */
843 pktattr->pattr_af = AF_UNSPEC;
844 return (0);
845 }
846
847 switch (pktattr->pattr_af) {
848 case AF_INET:
849 if (flags & REDF_ECN4) {
850 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
851 u_int8_t otos;
852 int sum;
853
854 if (ip->ip_v != 4)
855 return (0); /* version mismatch! */
856
857 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
858 return (0); /* not-ECT */
859 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
860 return (1); /* already marked */
861
862 /*
863 * ecn-capable but not marked,
864 * mark CE and update checksum
865 */
866 otos = ip->ip_tos;
867 ip->ip_tos |= IPTOS_ECN_CE;
868 /*
869 * update checksum (from RFC1624)
870 * HC' = ~(~HC + ~m + m')
871 */
872 sum = ~ntohs(ip->ip_sum) & 0xffff;
873 sum += (~otos & 0xffff) + ip->ip_tos;
874 sum = (sum >> 16) + (sum & 0xffff);
875 sum += (sum >> 16); /* add carry */
876 ip->ip_sum = htons(~sum & 0xffff);
877 return (1);
878 }
879 break;
880 #ifdef INET6
881 case AF_INET6:
882 if (flags & REDF_ECN6) {
883 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
884 u_int32_t flowlabel;
885
886 flowlabel = ntohl(ip6->ip6_flow);
887 if ((flowlabel >> 28) != 6)
888 return (0); /* version mismatch! */
889 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
890 (IPTOS_ECN_NOTECT << 20))
891 return (0); /* not-ECT */
892 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
893 (IPTOS_ECN_CE << 20))
894 return (1); /* already marked */
895 /*
896 * ecn-capable but not marked, mark CE
897 */
898 flowlabel |= (IPTOS_ECN_CE << 20);
899 ip6->ip6_flow = htonl(flowlabel);
900 return (1);
901 }
902 break;
903 #endif /* INET6 */
904 }
905
906 /* not marked */
907 return (0);
908 }
909
910 /*
911 * dequeue routine:
912 * must be called in splnet.
913 *
914 * returns: mbuf dequeued.
915 * NULL when no packet is available in the queue.
916 */
917
918 static struct mbuf *
919 red_dequeue(ifq, op)
920 struct ifaltq *ifq;
921 int op;
922 {
923 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
924 struct mbuf *m;
925
926 if (op == ALTDQ_POLL)
927 return qhead(rqp->rq_q);
928
929 /* op == ALTDQ_REMOVE */
930 m = red_getq(rqp->rq_red, rqp->rq_q);
931 if (m != NULL)
932 ifq->ifq_len--;
933 return (m);
934 }
935
936 struct mbuf *
937 red_getq(rp, q)
938 red_t *rp;
939 class_queue_t *q;
940 {
941 struct mbuf *m;
942
943 if ((m = _getq(q)) == NULL) {
944 if (rp->red_idle == 0) {
945 rp->red_idle = 1;
946 microtime(&rp->red_last);
947 }
948 return NULL;
949 }
950
951 rp->red_idle = 0;
952 return (m);
953 }
954
955 static int
956 red_request(ifq, req, arg)
957 struct ifaltq *ifq;
958 int req;
959 void *arg;
960 {
961 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
962
963 switch (req) {
964 case ALTRQ_PURGE:
965 red_purgeq(rqp);
966 break;
967 }
968 return (0);
969 }
970
971 static void
972 red_purgeq(rqp)
973 red_queue_t *rqp;
974 {
975 _flushq(rqp->rq_q);
976 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
977 rqp->rq_ifq->ifq_len = 0;
978 }
979
980
981 /*
982 * helper routine to calibrate avg during idle.
983 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
984 * here Wq = 1/weight and the code assumes Wq is close to zero.
985 *
986 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
987 */
988 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
989
990 struct wtab *
991 wtab_alloc(weight)
992 int weight;
993 {
994 struct wtab *w;
995 int i;
996
997 for (w = wtab_list; w != NULL; w = w->w_next)
998 if (w->w_weight == weight) {
999 w->w_refcount++;
1000 return (w);
1001 }
1002
1003 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
1004 if (w == NULL)
1005 panic("wtab_alloc: malloc failed!");
1006 w->w_weight = weight;
1007 w->w_refcount = 1;
1008 w->w_next = wtab_list;
1009 wtab_list = w;
1010
1011 /* initialize the weight table */
1012 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
1013 for (i = 1; i < 32; i++) {
1014 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
1015 if (w->w_tab[i] == 0 && w->w_param_max == 0)
1016 w->w_param_max = 1 << i;
1017 }
1018
1019 return (w);
1020 }
1021
1022 int
1023 wtab_destroy(w)
1024 struct wtab *w;
1025 {
1026 struct wtab *prev;
1027
1028 if (--w->w_refcount > 0)
1029 return (0);
1030
1031 if (wtab_list == w)
1032 wtab_list = w->w_next;
1033 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
1034 if (prev->w_next == w) {
1035 prev->w_next = w->w_next;
1036 break;
1037 }
1038
1039 FREE(w, M_DEVBUF);
1040 return (0);
1041 }
1042
1043 int32_t
1044 pow_w(w, n)
1045 struct wtab *w;
1046 int n;
1047 {
1048 int i, bit;
1049 int32_t val;
1050
1051 if (n >= w->w_param_max)
1052 return (0);
1053
1054 val = 1 << FP_SHIFT;
1055 if (n <= 0)
1056 return (val);
1057
1058 bit = 1;
1059 i = 0;
1060 while (n) {
1061 if (n & bit) {
1062 val = (val * w->w_tab[i]) >> FP_SHIFT;
1063 n &= ~bit;
1064 }
1065 i++;
1066 bit <<= 1;
1067 }
1068 return (val);
1069 }
1070
1071 #ifdef ALTQ_FLOWVALVE
1072
1073 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1074 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1075 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1076 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1077 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1078 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1079
1080 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1081 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1082
1083 #define FV_N 10 /* update fve_f every FV_N packets */
1084
1085 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1086 #define FV_TTHRESH 3 /* time threshold to delete fve */
1087 #define FV_ALPHA 5 /* extra packet count */
1088
1089 #if (__FreeBSD_version > 300000)
1090 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1091 #else
1092 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1093 #endif
1094
1095 /*
1096 * Brtt table: 127 entry table to convert drop rate (p) to
1097 * the corresponding bandwidth fraction (f)
1098 * the following equation is implemented to use scaled values,
1099 * fve_p and fve_f, in the fixed point format.
1100 *
1101 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1102 * f = Brtt(p) / (max_th + alpha)
1103 */
1104 #define BRTT_SIZE 128
1105 #define BRTT_SHIFT 12
1106 #define BRTT_MASK 0x0007f000
1107 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1108
1109 const int brtt_tab[BRTT_SIZE] = {
1110 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1111 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1112 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1113 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1114 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1115 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1116 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1117 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1118 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1119 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1120 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1121 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1122 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1123 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1124 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1125 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1126 };
1127
1128 static inline struct fve *
1129 flowlist_lookup(fv, pktattr, now)
1130 struct flowvalve *fv;
1131 struct altq_pktattr *pktattr;
1132 struct timeval *now;
1133 {
1134 struct fve *fve;
1135 int flows;
1136 struct ip *ip;
1137 #ifdef INET6
1138 struct ip6_hdr *ip6;
1139 #endif
1140 struct timeval tthresh;
1141
1142 if (pktattr == NULL)
1143 return (NULL);
1144
1145 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1146 flows = 0;
1147 /*
1148 * search the flow list
1149 */
1150 switch (pktattr->pattr_af) {
1151 case AF_INET:
1152 ip = (struct ip *)pktattr->pattr_hdr;
1153 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1154 if (fve->fve_lastdrop.tv_sec == 0)
1155 break;
1156 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1157 fve->fve_lastdrop.tv_sec = 0;
1158 break;
1159 }
1160 if (fve->fve_flow.flow_af == AF_INET &&
1161 fve->fve_flow.flow_ip.ip_src.s_addr ==
1162 ip->ip_src.s_addr &&
1163 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1164 ip->ip_dst.s_addr)
1165 return (fve);
1166 flows++;
1167 }
1168 break;
1169 #ifdef INET6
1170 case AF_INET6:
1171 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1172 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1173 if (fve->fve_lastdrop.tv_sec == 0)
1174 break;
1175 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1176 fve->fve_lastdrop.tv_sec = 0;
1177 break;
1178 }
1179 if (fve->fve_flow.flow_af == AF_INET6 &&
1180 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1181 &ip6->ip6_src) &&
1182 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1183 &ip6->ip6_dst))
1184 return (fve);
1185 flows++;
1186 }
1187 break;
1188 #endif /* INET6 */
1189
1190 default:
1191 /* unknown protocol. no drop. */
1192 return (NULL);
1193 }
1194 fv->fv_flows = flows; /* save the number of active fve's */
1195 return (NULL);
1196 }
1197
1198 static inline struct fve *
1199 flowlist_reclaim(fv, pktattr)
1200 struct flowvalve *fv;
1201 struct altq_pktattr *pktattr;
1202 {
1203 struct fve *fve;
1204 struct ip *ip;
1205 #ifdef INET6
1206 struct ip6_hdr *ip6;
1207 #endif
1208
1209 /*
1210 * get an entry from the tail of the LRU list.
1211 */
1212 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1213
1214 switch (pktattr->pattr_af) {
1215 case AF_INET:
1216 ip = (struct ip *)pktattr->pattr_hdr;
1217 fve->fve_flow.flow_af = AF_INET;
1218 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1219 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1220 break;
1221 #ifdef INET6
1222 case AF_INET6:
1223 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1224 fve->fve_flow.flow_af = AF_INET6;
1225 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1226 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1227 break;
1228 #endif
1229 }
1230
1231 fve->fve_state = Green;
1232 fve->fve_p = 0.0;
1233 fve->fve_f = 0.0;
1234 fve->fve_ifseq = fv->fv_ifseq - 1;
1235 fve->fve_count = 0;
1236
1237 fv->fv_flows++;
1238 #ifdef FV_STATS
1239 fv->fv_stats.alloc++;
1240 #endif
1241 return (fve);
1242 }
1243
1244 static inline void
1245 flowlist_move_to_head(fv, fve)
1246 struct flowvalve *fv;
1247 struct fve *fve;
1248 {
1249 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1250 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1251 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1252 }
1253 }
1254
1255 /*
1256 * allocate flowvalve structure
1257 */
1258 static struct flowvalve *
1259 fv_alloc(rp)
1260 struct red *rp;
1261 {
1262 struct flowvalve *fv;
1263 struct fve *fve;
1264 int i, num;
1265
1266 num = FV_FLOWLISTSIZE;
1267 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
1268 if (fv == NULL)
1269 return (NULL);
1270
1271 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
1272 M_WAITOK|M_ZERO);
1273 if (fv->fv_fves == NULL) {
1274 free(fv, M_DEVBUF);
1275 return (NULL);
1276 }
1277
1278 fv->fv_flows = 0;
1279 TAILQ_INIT(&fv->fv_flowlist);
1280 for (i = 0; i < num; i++) {
1281 fve = &fv->fv_fves[i];
1282 fve->fve_lastdrop.tv_sec = 0;
1283 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1284 }
1285
1286 /* initialize drop rate threshold in scaled fixed-point */
1287 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1288
1289 /* initialize drop rate to fraction table */
1290 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
1291 if (fv->fv_p2ftab == NULL) {
1292 free(fv->fv_fves, M_DEVBUF);
1293 free(fv, M_DEVBUF);
1294 return (NULL);
1295 }
1296 /*
1297 * create the p2f table.
1298 * (shift is used to keep the precision)
1299 */
1300 for (i = 1; i < BRTT_SIZE; i++) {
1301 int f;
1302
1303 f = brtt_tab[i] << 8;
1304 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1305 }
1306
1307 return (fv);
1308 }
1309
1310 static void fv_destroy(fv)
1311 struct flowvalve *fv;
1312 {
1313 FREE(fv->fv_p2ftab, M_DEVBUF);
1314 FREE(fv->fv_fves, M_DEVBUF);
1315 FREE(fv, M_DEVBUF);
1316 }
1317
1318 static inline int
1319 fv_p2f(fv, p)
1320 struct flowvalve *fv;
1321 int p;
1322 {
1323 int val, f;
1324
1325 if (p >= BRTT_PMAX)
1326 f = fv->fv_p2ftab[BRTT_SIZE-1];
1327 else if ((val = (p & BRTT_MASK)))
1328 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1329 else
1330 f = fv->fv_p2ftab[1];
1331 return (f);
1332 }
1333
1334 /*
1335 * check if an arriving packet should be pre-dropped.
1336 * called from red_addq() when a packet arrives.
1337 * returns 1 when the packet should be pre-dropped.
1338 * should be called in splnet.
1339 */
1340 static int
1341 fv_checkflow(fv, pktattr, fcache)
1342 struct flowvalve *fv;
1343 struct altq_pktattr *pktattr;
1344 struct fve **fcache;
1345 {
1346 struct fve *fve;
1347 struct timeval now;
1348
1349 fv->fv_ifseq++;
1350 FV_TIMESTAMP(&now);
1351
1352 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1353 /* no matching entry in the flowlist */
1354 return (0);
1355
1356 *fcache = fve;
1357
1358 /* update fraction f for every FV_N packets */
1359 if (++fve->fve_count == FV_N) {
1360 /*
1361 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1362 */
1363 fve->fve_f =
1364 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1365 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1366 fve->fve_ifseq = fv->fv_ifseq;
1367 fve->fve_count = 0;
1368 }
1369
1370 /*
1371 * overpumping test
1372 */
1373 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1374 int fthresh;
1375
1376 /* calculate a threshold */
1377 fthresh = fv_p2f(fv, fve->fve_p);
1378 if (fve->fve_f > fthresh)
1379 fve->fve_state = Red;
1380 }
1381
1382 if (fve->fve_state == Red) {
1383 /*
1384 * backoff test
1385 */
1386 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1387 /* no drop for at least FV_BACKOFFTHRESH sec */
1388 fve->fve_p = 0;
1389 fve->fve_state = Green;
1390 #ifdef FV_STATS
1391 fv->fv_stats.escape++;
1392 #endif
1393 } else {
1394 /* block this flow */
1395 flowlist_move_to_head(fv, fve);
1396 fve->fve_lastdrop = now;
1397 #ifdef FV_STATS
1398 fv->fv_stats.predrop++;
1399 #endif
1400 return (1);
1401 }
1402 }
1403
1404 /*
1405 * p = (1 - Wp) * p
1406 */
1407 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1408 if (fve->fve_p < 0)
1409 fve->fve_p = 0;
1410 #ifdef FV_STATS
1411 fv->fv_stats.pass++;
1412 #endif
1413 return (0);
1414 }
1415
1416 /*
1417 * called from red_addq when a packet is dropped by red.
1418 * should be called in splnet.
1419 */
1420 static void fv_dropbyred(fv, pktattr, fcache)
1421 struct flowvalve *fv;
1422 struct altq_pktattr *pktattr;
1423 struct fve *fcache;
1424 {
1425 struct fve *fve;
1426 struct timeval now;
1427
1428 if (pktattr == NULL)
1429 return;
1430 FV_TIMESTAMP(&now);
1431
1432 if (fcache != NULL)
1433 /* the fve of this packet is already cached */
1434 fve = fcache;
1435 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1436 fve = flowlist_reclaim(fv, pktattr);
1437
1438 flowlist_move_to_head(fv, fve);
1439
1440 /*
1441 * update p: the following line cancels the update
1442 * in fv_checkflow() and calculate
1443 * p = Wp + (1 - Wp) * p
1444 */
1445 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1446
1447 fve->fve_lastdrop = now;
1448 }
1449
1450 #endif /* ALTQ_FLOWVALVE */
1451
1452 #ifdef KLD_MODULE
1453
1454 static struct altqsw red_sw =
1455 {"red", redopen, redclose, redioctl};
1456
1457 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1458
1459 #endif /* KLD_MODULE */
1460
1461 #endif /* ALTQ_RED */
1462