altq_red.c revision 1.13.10.3 1 /* $NetBSD: altq_red.c,v 1.13.10.3 2006/05/11 23:26:17 elad Exp $ */
2 /* $KAME: altq_red.c,v 1.9 2002/01/07 11:25:40 kjc Exp $ */
3
4 /*
5 * Copyright (C) 1997-2000
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.13.10.3 2006/05/11 23:26:17 elad Exp $");
65
66 #if defined(__FreeBSD__) || defined(__NetBSD__)
67 #include "opt_altq.h"
68 #if (__FreeBSD__ != 2)
69 #include "opt_inet.h"
70 #ifdef __FreeBSD__
71 #include "opt_inet6.h"
72 #endif
73 #endif
74 #endif /* __FreeBSD__ || __NetBSD__ */
75 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
76
77 #include <sys/param.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/socket.h>
81 #include <sys/sockio.h>
82 #include <sys/systm.h>
83 #include <sys/proc.h>
84 #include <sys/errno.h>
85 #include <sys/kernel.h>
86 #ifdef ALTQ_FLOWVALVE
87 #include <sys/queue.h>
88 #include <sys/time.h>
89 #endif
90
91 #include <net/if.h>
92 #include <net/if_types.h>
93
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #ifdef INET6
98 #include <netinet/ip6.h>
99 #endif
100
101 #include <altq/altq.h>
102 #include <altq/altq_conf.h>
103 #include <altq/altq_red.h>
104 #ifdef ALTQ_FLOWVALVE
105 #include <altq/altq_flowvalve.h>
106 #endif
107
108 /*
109 * ALTQ/RED (Random Early Detection) implementation using 32-bit
110 * fixed-point calculation.
111 *
112 * written by kjc using the ns code as a reference.
113 * you can learn more about red and ns from Sally's home page at
114 * http://www-nrg.ee.lbl.gov/floyd/
115 *
116 * most of the red parameter values are fixed in this implementation
117 * to prevent fixed-point overflow/underflow.
118 * if you change the parameters, watch out for overflow/underflow!
119 *
120 * the parameters used are recommended values by Sally.
121 * the corresponding ns config looks:
122 * q_weight=0.00195
123 * minthresh=5 maxthresh=15 queue-size=60
124 * linterm=30
125 * dropmech=drop-tail
126 * bytes=false (can't be handled by 32-bit fixed-point)
127 * doubleq=false dqthresh=false
128 * wait=true
129 */
130 /*
131 * alternative red parameters for a slow link.
132 *
133 * assume the queue length becomes from zero to L and keeps L, it takes
134 * N packets for q_avg to reach 63% of L.
135 * when q_weight is 0.002, N is about 500 packets.
136 * for a slow link like dial-up, 500 packets takes more than 1 minute!
137 * when q_weight is 0.008, N is about 127 packets.
138 * when q_weight is 0.016, N is about 63 packets.
139 * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
140 * are allowed for 0.016.
141 * see Sally's paper for more details.
142 */
143 /* normal red parameters */
144 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
145 /* q_weight = 0.00195 */
146
147 /* red parameters for a slow link */
148 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
149 /* q_weight = 0.0078125 */
150
151 /* red parameters for a very slow link (e.g., dialup) */
152 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
153 /* q_weight = 0.015625 */
154
155 /* fixed-point uses 12-bit decimal places */
156 #define FP_SHIFT 12 /* fixed-point shift */
157
158 /* red parameters for drop probability */
159 #define INV_P_MAX 10 /* inverse of max drop probability */
160 #define TH_MIN 5 /* min threshold */
161 #define TH_MAX 15 /* max threshold */
162
163 #define RED_LIMIT 60 /* default max queue length */
164
165 /*
166 * our default policy for forced-drop is drop-tail.
167 * (in altq-1.1.2 or earlier, the default was random-drop.
168 * but it makes more sense to punish the cause of the surge.)
169 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
170 */
171
172 #ifdef ALTQ_FLOWVALVE
173 /*
174 * flow-valve is an extension to protect red from unresponsive flows
175 * and to promote end-to-end congestion control.
176 * flow-valve observes the average drop rates of the flows that have
177 * experienced packet drops in the recent past.
178 * when the average drop rate exceeds the threshold, the flow is
179 * blocked by the flow-valve. the trapped flow should back off
180 * exponentially to escape from the flow-valve.
181 */
182 #ifdef RED_RANDOM_DROP
183 #error "random-drop can't be used with flow-valve!"
184 #endif
185 #endif /* ALTQ_FLOWVALVE */
186
187 /* red_list keeps all red_queue_t's allocated. */
188 static red_queue_t *red_list = NULL;
189
190 /* default red parameter values */
191 static int default_th_min = TH_MIN;
192 static int default_th_max = TH_MAX;
193 static int default_inv_pmax = INV_P_MAX;
194
195 /* internal function prototypes */
196 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
197 struct altq_pktattr *));
198 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
199 static int red_request __P((struct ifaltq *, int, void *));
200 static void red_purgeq __P((red_queue_t *));
201 static int red_detach __P((red_queue_t *));
202 #ifdef ALTQ_FLOWVALVE
203 static inline struct fve *flowlist_lookup __P((struct flowvalve *,
204 struct altq_pktattr *, struct timeval *));
205 static inline struct fve *flowlist_reclaim __P((struct flowvalve *,
206 struct altq_pktattr *));
207 static inline void flowlist_move_to_head __P((struct flowvalve *,
208 struct fve *));
209 static inline int fv_p2f __P((struct flowvalve *, int));
210 static struct flowvalve *fv_alloc __P((struct red *));
211 static void fv_destroy __P((struct flowvalve *));
212 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
213 struct fve **));
214 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
215 struct fve *));
216 #endif
217
218 /*
219 * red device interface
220 */
221 altqdev_decl(red);
222
223 int
224 redopen(dev, flag, fmt, l)
225 dev_t dev;
226 int flag, fmt;
227 struct lwp *l;
228 {
229 /* everything will be done when the queueing scheme is attached. */
230 return 0;
231 }
232
233 int
234 redclose(dev, flag, fmt, l)
235 dev_t dev;
236 int flag, fmt;
237 struct lwp *l;
238 {
239 red_queue_t *rqp;
240 int err, error = 0;
241
242 while ((rqp = red_list) != NULL) {
243 /* destroy all */
244 err = red_detach(rqp);
245 if (err != 0 && error == 0)
246 error = err;
247 }
248
249 return error;
250 }
251
252 int
253 redioctl(dev, cmd, addr, flag, l)
254 dev_t dev;
255 ioctlcmd_t cmd;
256 caddr_t addr;
257 int flag;
258 struct lwp *l;
259 {
260 red_queue_t *rqp;
261 struct red_interface *ifacep;
262 struct ifnet *ifp;
263 struct proc *p = l->l_proc;
264 int error = 0;
265
266 /* check super-user privilege */
267 switch (cmd) {
268 case RED_GETSTATS:
269 break;
270 default:
271 #if (__FreeBSD_version > 400000)
272 if ((error = suser(p)) != 0)
273 #else
274 if ((error = kauth_authorize_generic(p->p_cred,
275 KAUTH_GENERIC_ISSUSER,
276 &p->p_acflag)) != 0)
277 #endif
278 return (error);
279 break;
280 }
281
282 switch (cmd) {
283
284 case RED_ENABLE:
285 ifacep = (struct red_interface *)addr;
286 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
287 error = EBADF;
288 break;
289 }
290 error = altq_enable(rqp->rq_ifq);
291 break;
292
293 case RED_DISABLE:
294 ifacep = (struct red_interface *)addr;
295 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
296 error = EBADF;
297 break;
298 }
299 error = altq_disable(rqp->rq_ifq);
300 break;
301
302 case RED_IF_ATTACH:
303 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
304 if (ifp == NULL) {
305 error = ENXIO;
306 break;
307 }
308
309 /* allocate and initialize red_queue_t */
310 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
311 if (rqp == NULL) {
312 error = ENOMEM;
313 break;
314 }
315
316 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
317 M_WAITOK|M_ZERO);
318 if (rqp->rq_q == NULL) {
319 free(rqp, M_DEVBUF);
320 error = ENOMEM;
321 break;
322 }
323
324 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
325 if (rqp->rq_red == NULL) {
326 free(rqp->rq_q, M_DEVBUF);
327 free(rqp, M_DEVBUF);
328 error = ENOMEM;
329 break;
330 }
331
332 rqp->rq_ifq = &ifp->if_snd;
333 qtail(rqp->rq_q) = NULL;
334 qlen(rqp->rq_q) = 0;
335 qlimit(rqp->rq_q) = RED_LIMIT;
336 qtype(rqp->rq_q) = Q_RED;
337
338 /*
339 * set RED to this ifnet structure.
340 */
341 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
342 red_enqueue, red_dequeue, red_request,
343 NULL, NULL);
344 if (error) {
345 red_destroy(rqp->rq_red);
346 free(rqp->rq_q, M_DEVBUF);
347 free(rqp, M_DEVBUF);
348 break;
349 }
350
351 /* add this state to the red list */
352 rqp->rq_next = red_list;
353 red_list = rqp;
354 break;
355
356 case RED_IF_DETACH:
357 ifacep = (struct red_interface *)addr;
358 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
359 error = EBADF;
360 break;
361 }
362 error = red_detach(rqp);
363 break;
364
365 case RED_GETSTATS:
366 do {
367 struct red_stats *q_stats;
368 red_t *rp;
369
370 q_stats = (struct red_stats *)addr;
371 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
372 ALTQT_RED)) == NULL) {
373 error = EBADF;
374 break;
375 }
376
377 q_stats->q_len = qlen(rqp->rq_q);
378 q_stats->q_limit = qlimit(rqp->rq_q);
379
380 rp = rqp->rq_red;
381 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
382 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
383 q_stats->drop_cnt = rp->red_stats.drop_cnt;
384 q_stats->drop_forced = rp->red_stats.drop_forced;
385 q_stats->drop_unforced = rp->red_stats.drop_unforced;
386 q_stats->marked_packets = rp->red_stats.marked_packets;
387
388 q_stats->weight = rp->red_weight;
389 q_stats->inv_pmax = rp->red_inv_pmax;
390 q_stats->th_min = rp->red_thmin;
391 q_stats->th_max = rp->red_thmax;
392
393 #ifdef ALTQ_FLOWVALVE
394 if (rp->red_flowvalve != NULL) {
395 struct flowvalve *fv = rp->red_flowvalve;
396 q_stats->fv_flows = fv->fv_flows;
397 q_stats->fv_pass = fv->fv_stats.pass;
398 q_stats->fv_predrop = fv->fv_stats.predrop;
399 q_stats->fv_alloc = fv->fv_stats.alloc;
400 q_stats->fv_escape = fv->fv_stats.escape;
401 } else {
402 #endif /* ALTQ_FLOWVALVE */
403 q_stats->fv_flows = 0;
404 q_stats->fv_pass = 0;
405 q_stats->fv_predrop = 0;
406 q_stats->fv_alloc = 0;
407 q_stats->fv_escape = 0;
408 #ifdef ALTQ_FLOWVALVE
409 }
410 #endif /* ALTQ_FLOWVALVE */
411 } while (0);
412 break;
413
414 case RED_CONFIG:
415 do {
416 struct red_conf *fc;
417 red_t *new;
418 int s, limit;
419
420 fc = (struct red_conf *)addr;
421 if ((rqp = altq_lookup(fc->iface.red_ifname,
422 ALTQT_RED)) == NULL) {
423 error = EBADF;
424 break;
425 }
426 new = red_alloc(fc->red_weight,
427 fc->red_inv_pmax,
428 fc->red_thmin,
429 fc->red_thmax,
430 fc->red_flags,
431 fc->red_pkttime);
432 if (new == NULL) {
433 error = ENOMEM;
434 break;
435 }
436
437 s = splnet();
438 red_purgeq(rqp);
439 limit = fc->red_limit;
440 if (limit < fc->red_thmax)
441 limit = fc->red_thmax;
442 qlimit(rqp->rq_q) = limit;
443 fc->red_limit = limit; /* write back the new value */
444
445 red_destroy(rqp->rq_red);
446 rqp->rq_red = new;
447
448 splx(s);
449
450 /* write back new values */
451 fc->red_limit = limit;
452 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
453 fc->red_thmin = rqp->rq_red->red_thmin;
454 fc->red_thmax = rqp->rq_red->red_thmax;
455
456 } while (0);
457 break;
458
459 case RED_SETDEFAULTS:
460 do {
461 struct redparams *rp;
462
463 rp = (struct redparams *)addr;
464
465 default_th_min = rp->th_min;
466 default_th_max = rp->th_max;
467 default_inv_pmax = rp->inv_pmax;
468 } while (0);
469 break;
470
471 default:
472 error = EINVAL;
473 break;
474 }
475 return error;
476 }
477
478 static int
479 red_detach(rqp)
480 red_queue_t *rqp;
481 {
482 red_queue_t *tmp;
483 int error = 0;
484
485 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
486 altq_disable(rqp->rq_ifq);
487
488 if ((error = altq_detach(rqp->rq_ifq)))
489 return (error);
490
491 if (red_list == rqp)
492 red_list = rqp->rq_next;
493 else {
494 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
495 if (tmp->rq_next == rqp) {
496 tmp->rq_next = rqp->rq_next;
497 break;
498 }
499 if (tmp == NULL)
500 printf("red_detach: no state found in red_list!\n");
501 }
502
503 red_destroy(rqp->rq_red);
504 free(rqp->rq_q, M_DEVBUF);
505 free(rqp, M_DEVBUF);
506 return (error);
507 }
508
509 /*
510 * red support routines
511 */
512
513 red_t *
514 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
515 int weight, inv_pmax, th_min, th_max;
516 int flags, pkttime;
517 {
518 red_t *rp;
519 int w, i;
520 int npkts_per_sec;
521
522 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
523 if (rp == NULL)
524 return (NULL);
525
526 rp->red_avg = 0;
527 rp->red_idle = 1;
528
529 if (weight == 0)
530 rp->red_weight = W_WEIGHT;
531 else
532 rp->red_weight = weight;
533 if (inv_pmax == 0)
534 rp->red_inv_pmax = default_inv_pmax;
535 else
536 rp->red_inv_pmax = inv_pmax;
537 if (th_min == 0)
538 rp->red_thmin = default_th_min;
539 else
540 rp->red_thmin = th_min;
541 if (th_max == 0)
542 rp->red_thmax = default_th_max;
543 else
544 rp->red_thmax = th_max;
545
546 rp->red_flags = flags;
547
548 if (pkttime == 0)
549 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
550 rp->red_pkttime = 800;
551 else
552 rp->red_pkttime = pkttime;
553
554 if (weight == 0) {
555 /* when the link is very slow, adjust red parameters */
556 npkts_per_sec = 1000000 / rp->red_pkttime;
557 if (npkts_per_sec < 50) {
558 /* up to about 400Kbps */
559 rp->red_weight = W_WEIGHT_2;
560 } else if (npkts_per_sec < 300) {
561 /* up to about 2.4Mbps */
562 rp->red_weight = W_WEIGHT_1;
563 }
564 }
565
566 /* calculate wshift. weight must be power of 2 */
567 w = rp->red_weight;
568 for (i = 0; w > 1; i++)
569 w = w >> 1;
570 rp->red_wshift = i;
571 w = 1 << rp->red_wshift;
572 if (w != rp->red_weight) {
573 printf("invalid weight value %d for red! use %d\n",
574 rp->red_weight, w);
575 rp->red_weight = w;
576 }
577
578 /*
579 * thmin_s and thmax_s are scaled versions of th_min and th_max
580 * to be compared with avg.
581 */
582 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
583 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
584
585 /*
586 * precompute probability denominator
587 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
588 */
589 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
590 * rp->red_inv_pmax) << FP_SHIFT;
591
592 /* allocate weight table */
593 rp->red_wtab = wtab_alloc(rp->red_weight);
594
595 microtime(&rp->red_last);
596 #ifdef ALTQ_FLOWVALVE
597 if (flags & REDF_FLOWVALVE)
598 rp->red_flowvalve = fv_alloc(rp);
599 /* if fv_alloc failes, flowvalve is just disabled */
600 #endif
601 return (rp);
602 }
603
604 void
605 red_destroy(rp)
606 red_t *rp;
607 {
608 #ifdef ALTQ_FLOWVALVE
609 if (rp->red_flowvalve != NULL)
610 fv_destroy(rp->red_flowvalve);
611 #endif
612 wtab_destroy(rp->red_wtab);
613 free(rp, M_DEVBUF);
614 }
615
616 void
617 red_getstats(rp, sp)
618 red_t *rp;
619 struct redstats *sp;
620 {
621 sp->q_avg = rp->red_avg >> rp->red_wshift;
622 sp->xmit_cnt = rp->red_stats.xmit_cnt;
623 sp->drop_cnt = rp->red_stats.drop_cnt;
624 sp->drop_forced = rp->red_stats.drop_forced;
625 sp->drop_unforced = rp->red_stats.drop_unforced;
626 sp->marked_packets = rp->red_stats.marked_packets;
627 }
628
629 /*
630 * enqueue routine:
631 *
632 * returns: 0 when successfully queued.
633 * ENOBUFS when drop occurs.
634 */
635 static int
636 red_enqueue(ifq, m, pktattr)
637 struct ifaltq *ifq;
638 struct mbuf *m;
639 struct altq_pktattr *pktattr;
640 {
641 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
642
643 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
644 return ENOBUFS;
645 ifq->ifq_len++;
646 return 0;
647 }
648
649 int
650 red_addq(rp, q, m, pktattr)
651 red_t *rp;
652 class_queue_t *q;
653 struct mbuf *m;
654 struct altq_pktattr *pktattr;
655 {
656 int avg, droptype;
657 int n;
658 #ifdef ALTQ_FLOWVALVE
659 struct fve *fve = NULL;
660
661 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
662 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
663 m_freem(m);
664 return (-1);
665 }
666 #endif
667
668 avg = rp->red_avg;
669
670 /*
671 * if we were idle, we pretend that n packets arrived during
672 * the idle period.
673 */
674 if (rp->red_idle) {
675 struct timeval now;
676 int t;
677
678 rp->red_idle = 0;
679 microtime(&now);
680 t = (now.tv_sec - rp->red_last.tv_sec);
681 if (t > 60) {
682 /*
683 * being idle for more than 1 minute, set avg to zero.
684 * this prevents t from overflow.
685 */
686 avg = 0;
687 } else {
688 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
689 n = t / rp->red_pkttime - 1;
690
691 /* the following line does (avg = (1 - Wq)^n * avg) */
692 if (n > 0)
693 avg = (avg >> FP_SHIFT) *
694 pow_w(rp->red_wtab, n);
695 }
696 }
697
698 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
699 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
700 rp->red_avg = avg; /* save the new value */
701
702 /*
703 * red_count keeps a tally of arriving traffic that has not
704 * been dropped.
705 */
706 rp->red_count++;
707
708 /* see if we drop early */
709 droptype = DTYPE_NODROP;
710 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
711 if (avg >= rp->red_thmax_s) {
712 /* avg >= th_max: forced drop */
713 droptype = DTYPE_FORCED;
714 } else if (rp->red_old == 0) {
715 /* first exceeds th_min */
716 rp->red_count = 1;
717 rp->red_old = 1;
718 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
719 rp->red_probd, rp->red_count)) {
720 /* mark or drop by red */
721 if ((rp->red_flags & REDF_ECN) &&
722 mark_ecn(m, pktattr, rp->red_flags)) {
723 /* successfully marked. do not drop. */
724 rp->red_count = 0;
725 #ifdef RED_STATS
726 rp->red_stats.marked_packets++;
727 #endif
728 } else {
729 /* unforced drop by red */
730 droptype = DTYPE_EARLY;
731 }
732 }
733 } else {
734 /* avg < th_min */
735 rp->red_old = 0;
736 }
737
738 /*
739 * if the queue length hits the hard limit, it's a forced drop.
740 */
741 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
742 droptype = DTYPE_FORCED;
743
744 #ifdef RED_RANDOM_DROP
745 /* if successful or forced drop, enqueue this packet. */
746 if (droptype != DTYPE_EARLY)
747 _addq(q, m);
748 #else
749 /* if successful, enqueue this packet. */
750 if (droptype == DTYPE_NODROP)
751 _addq(q, m);
752 #endif
753 if (droptype != DTYPE_NODROP) {
754 if (droptype == DTYPE_EARLY) {
755 /* drop the incoming packet */
756 #ifdef RED_STATS
757 rp->red_stats.drop_unforced++;
758 #endif
759 } else {
760 /* forced drop, select a victim packet in the queue. */
761 #ifdef RED_RANDOM_DROP
762 m = _getq_random(q);
763 #endif
764 #ifdef RED_STATS
765 rp->red_stats.drop_forced++;
766 #endif
767 }
768 #ifdef RED_STATS
769 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
770 #endif
771 rp->red_count = 0;
772 #ifdef ALTQ_FLOWVALVE
773 if (rp->red_flowvalve != NULL)
774 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
775 #endif
776 m_freem(m);
777 return (-1);
778 }
779 /* successfully queued */
780 #ifdef RED_STATS
781 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
782 #endif
783 return (0);
784 }
785
786 /*
787 * early-drop probability is calculated as follows:
788 * prob = p_max * (avg - th_min) / (th_max - th_min)
789 * prob_a = prob / (2 - count*prob)
790 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
791 * here prob_a increases as successive undrop count increases.
792 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
793 * becomes 1 when (count >= (2 / prob))).
794 */
795 int
796 drop_early(fp_len, fp_probd, count)
797 int fp_len; /* (avg - TH_MIN) in fixed-point */
798 int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
799 int count; /* how many successive undropped packets */
800 {
801 int d; /* denominator of drop-probability */
802
803 d = fp_probd - count * fp_len;
804 if (d <= 0)
805 /* count exceeds the hard limit: drop or mark */
806 return (1);
807
808 /*
809 * now the range of d is [1..600] in fixed-point. (when
810 * th_max-th_min=10 and p_max=1/30)
811 * drop probability = (avg - TH_MIN) / d
812 */
813
814 if ((random() % d) < fp_len) {
815 /* drop or mark */
816 return (1);
817 }
818 /* no drop/mark */
819 return (0);
820 }
821
822 /*
823 * try to mark CE bit to the packet.
824 * returns 1 if successfully marked, 0 otherwise.
825 */
826 int
827 mark_ecn(m, pktattr, flags)
828 struct mbuf *m;
829 struct altq_pktattr *pktattr;
830 int flags;
831 {
832 struct mbuf *m0;
833
834 if (pktattr == NULL ||
835 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
836 return (0);
837
838 /* verify that pattr_hdr is within the mbuf data */
839 for (m0 = m; m0 != NULL; m0 = m0->m_next)
840 if ((pktattr->pattr_hdr >= m0->m_data) &&
841 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
842 break;
843 if (m0 == NULL) {
844 /* ick, pattr_hdr is stale */
845 pktattr->pattr_af = AF_UNSPEC;
846 return (0);
847 }
848
849 switch (pktattr->pattr_af) {
850 case AF_INET:
851 if (flags & REDF_ECN4) {
852 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
853 u_int8_t otos;
854 int sum;
855
856 if (ip->ip_v != 4)
857 return (0); /* version mismatch! */
858
859 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
860 return (0); /* not-ECT */
861 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
862 return (1); /* already marked */
863
864 /*
865 * ecn-capable but not marked,
866 * mark CE and update checksum
867 */
868 otos = ip->ip_tos;
869 ip->ip_tos |= IPTOS_ECN_CE;
870 /*
871 * update checksum (from RFC1624)
872 * HC' = ~(~HC + ~m + m')
873 */
874 sum = ~ntohs(ip->ip_sum) & 0xffff;
875 sum += (~otos & 0xffff) + ip->ip_tos;
876 sum = (sum >> 16) + (sum & 0xffff);
877 sum += (sum >> 16); /* add carry */
878 ip->ip_sum = htons(~sum & 0xffff);
879 return (1);
880 }
881 break;
882 #ifdef INET6
883 case AF_INET6:
884 if (flags & REDF_ECN6) {
885 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
886 u_int32_t flowlabel;
887
888 flowlabel = ntohl(ip6->ip6_flow);
889 if ((flowlabel >> 28) != 6)
890 return (0); /* version mismatch! */
891 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
892 (IPTOS_ECN_NOTECT << 20))
893 return (0); /* not-ECT */
894 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
895 (IPTOS_ECN_CE << 20))
896 return (1); /* already marked */
897 /*
898 * ecn-capable but not marked, mark CE
899 */
900 flowlabel |= (IPTOS_ECN_CE << 20);
901 ip6->ip6_flow = htonl(flowlabel);
902 return (1);
903 }
904 break;
905 #endif /* INET6 */
906 }
907
908 /* not marked */
909 return (0);
910 }
911
912 /*
913 * dequeue routine:
914 * must be called in splnet.
915 *
916 * returns: mbuf dequeued.
917 * NULL when no packet is available in the queue.
918 */
919
920 static struct mbuf *
921 red_dequeue(ifq, op)
922 struct ifaltq *ifq;
923 int op;
924 {
925 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
926 struct mbuf *m;
927
928 if (op == ALTDQ_POLL)
929 return qhead(rqp->rq_q);
930
931 /* op == ALTDQ_REMOVE */
932 m = red_getq(rqp->rq_red, rqp->rq_q);
933 if (m != NULL)
934 ifq->ifq_len--;
935 return (m);
936 }
937
938 struct mbuf *
939 red_getq(rp, q)
940 red_t *rp;
941 class_queue_t *q;
942 {
943 struct mbuf *m;
944
945 if ((m = _getq(q)) == NULL) {
946 if (rp->red_idle == 0) {
947 rp->red_idle = 1;
948 microtime(&rp->red_last);
949 }
950 return NULL;
951 }
952
953 rp->red_idle = 0;
954 return (m);
955 }
956
957 static int
958 red_request(ifq, req, arg)
959 struct ifaltq *ifq;
960 int req;
961 void *arg;
962 {
963 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
964
965 switch (req) {
966 case ALTRQ_PURGE:
967 red_purgeq(rqp);
968 break;
969 }
970 return (0);
971 }
972
973 static void
974 red_purgeq(rqp)
975 red_queue_t *rqp;
976 {
977 _flushq(rqp->rq_q);
978 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
979 rqp->rq_ifq->ifq_len = 0;
980 }
981
982
983 /*
984 * helper routine to calibrate avg during idle.
985 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
986 * here Wq = 1/weight and the code assumes Wq is close to zero.
987 *
988 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
989 */
990 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
991
992 struct wtab *
993 wtab_alloc(weight)
994 int weight;
995 {
996 struct wtab *w;
997 int i;
998
999 for (w = wtab_list; w != NULL; w = w->w_next)
1000 if (w->w_weight == weight) {
1001 w->w_refcount++;
1002 return (w);
1003 }
1004
1005 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
1006 if (w == NULL)
1007 panic("wtab_alloc: malloc failed!");
1008 w->w_weight = weight;
1009 w->w_refcount = 1;
1010 w->w_next = wtab_list;
1011 wtab_list = w;
1012
1013 /* initialize the weight table */
1014 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
1015 for (i = 1; i < 32; i++) {
1016 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
1017 if (w->w_tab[i] == 0 && w->w_param_max == 0)
1018 w->w_param_max = 1 << i;
1019 }
1020
1021 return (w);
1022 }
1023
1024 int
1025 wtab_destroy(w)
1026 struct wtab *w;
1027 {
1028 struct wtab *prev;
1029
1030 if (--w->w_refcount > 0)
1031 return (0);
1032
1033 if (wtab_list == w)
1034 wtab_list = w->w_next;
1035 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
1036 if (prev->w_next == w) {
1037 prev->w_next = w->w_next;
1038 break;
1039 }
1040
1041 free(w, M_DEVBUF);
1042 return (0);
1043 }
1044
1045 int32_t
1046 pow_w(w, n)
1047 struct wtab *w;
1048 int n;
1049 {
1050 int i, bit;
1051 int32_t val;
1052
1053 if (n >= w->w_param_max)
1054 return (0);
1055
1056 val = 1 << FP_SHIFT;
1057 if (n <= 0)
1058 return (val);
1059
1060 bit = 1;
1061 i = 0;
1062 while (n) {
1063 if (n & bit) {
1064 val = (val * w->w_tab[i]) >> FP_SHIFT;
1065 n &= ~bit;
1066 }
1067 i++;
1068 bit <<= 1;
1069 }
1070 return (val);
1071 }
1072
1073 #ifdef ALTQ_FLOWVALVE
1074
1075 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1076 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1077 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1078 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1079 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1080 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1081
1082 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1083 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1084
1085 #define FV_N 10 /* update fve_f every FV_N packets */
1086
1087 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1088 #define FV_TTHRESH 3 /* time threshold to delete fve */
1089 #define FV_ALPHA 5 /* extra packet count */
1090
1091 #if (__FreeBSD_version > 300000)
1092 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1093 #else
1094 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1095 #endif
1096
1097 /*
1098 * Brtt table: 127 entry table to convert drop rate (p) to
1099 * the corresponding bandwidth fraction (f)
1100 * the following equation is implemented to use scaled values,
1101 * fve_p and fve_f, in the fixed point format.
1102 *
1103 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1104 * f = Brtt(p) / (max_th + alpha)
1105 */
1106 #define BRTT_SIZE 128
1107 #define BRTT_SHIFT 12
1108 #define BRTT_MASK 0x0007f000
1109 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1110
1111 const int brtt_tab[BRTT_SIZE] = {
1112 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1113 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1114 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1115 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1116 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1117 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1118 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1119 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1120 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1121 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1122 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1123 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1124 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1125 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1126 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1127 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1128 };
1129
1130 static inline struct fve *
1131 flowlist_lookup(fv, pktattr, now)
1132 struct flowvalve *fv;
1133 struct altq_pktattr *pktattr;
1134 struct timeval *now;
1135 {
1136 struct fve *fve;
1137 int flows;
1138 struct ip *ip;
1139 #ifdef INET6
1140 struct ip6_hdr *ip6;
1141 #endif
1142 struct timeval tthresh;
1143
1144 if (pktattr == NULL)
1145 return (NULL);
1146
1147 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1148 flows = 0;
1149 /*
1150 * search the flow list
1151 */
1152 switch (pktattr->pattr_af) {
1153 case AF_INET:
1154 ip = (struct ip *)pktattr->pattr_hdr;
1155 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1156 if (fve->fve_lastdrop.tv_sec == 0)
1157 break;
1158 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1159 fve->fve_lastdrop.tv_sec = 0;
1160 break;
1161 }
1162 if (fve->fve_flow.flow_af == AF_INET &&
1163 fve->fve_flow.flow_ip.ip_src.s_addr ==
1164 ip->ip_src.s_addr &&
1165 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1166 ip->ip_dst.s_addr)
1167 return (fve);
1168 flows++;
1169 }
1170 break;
1171 #ifdef INET6
1172 case AF_INET6:
1173 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1174 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1175 if (fve->fve_lastdrop.tv_sec == 0)
1176 break;
1177 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1178 fve->fve_lastdrop.tv_sec = 0;
1179 break;
1180 }
1181 if (fve->fve_flow.flow_af == AF_INET6 &&
1182 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1183 &ip6->ip6_src) &&
1184 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1185 &ip6->ip6_dst))
1186 return (fve);
1187 flows++;
1188 }
1189 break;
1190 #endif /* INET6 */
1191
1192 default:
1193 /* unknown protocol. no drop. */
1194 return (NULL);
1195 }
1196 fv->fv_flows = flows; /* save the number of active fve's */
1197 return (NULL);
1198 }
1199
1200 static inline struct fve *
1201 flowlist_reclaim(fv, pktattr)
1202 struct flowvalve *fv;
1203 struct altq_pktattr *pktattr;
1204 {
1205 struct fve *fve;
1206 struct ip *ip;
1207 #ifdef INET6
1208 struct ip6_hdr *ip6;
1209 #endif
1210
1211 /*
1212 * get an entry from the tail of the LRU list.
1213 */
1214 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1215
1216 switch (pktattr->pattr_af) {
1217 case AF_INET:
1218 ip = (struct ip *)pktattr->pattr_hdr;
1219 fve->fve_flow.flow_af = AF_INET;
1220 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1221 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1222 break;
1223 #ifdef INET6
1224 case AF_INET6:
1225 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1226 fve->fve_flow.flow_af = AF_INET6;
1227 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1228 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1229 break;
1230 #endif
1231 }
1232
1233 fve->fve_state = Green;
1234 fve->fve_p = 0.0;
1235 fve->fve_f = 0.0;
1236 fve->fve_ifseq = fv->fv_ifseq - 1;
1237 fve->fve_count = 0;
1238
1239 fv->fv_flows++;
1240 #ifdef FV_STATS
1241 fv->fv_stats.alloc++;
1242 #endif
1243 return (fve);
1244 }
1245
1246 static inline void
1247 flowlist_move_to_head(fv, fve)
1248 struct flowvalve *fv;
1249 struct fve *fve;
1250 {
1251 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1252 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1253 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1254 }
1255 }
1256
1257 /*
1258 * allocate flowvalve structure
1259 */
1260 static struct flowvalve *
1261 fv_alloc(rp)
1262 struct red *rp;
1263 {
1264 struct flowvalve *fv;
1265 struct fve *fve;
1266 int i, num;
1267
1268 num = FV_FLOWLISTSIZE;
1269 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
1270 if (fv == NULL)
1271 return (NULL);
1272
1273 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
1274 M_WAITOK|M_ZERO);
1275 if (fv->fv_fves == NULL) {
1276 free(fv, M_DEVBUF);
1277 return (NULL);
1278 }
1279
1280 fv->fv_flows = 0;
1281 TAILQ_INIT(&fv->fv_flowlist);
1282 for (i = 0; i < num; i++) {
1283 fve = &fv->fv_fves[i];
1284 fve->fve_lastdrop.tv_sec = 0;
1285 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1286 }
1287
1288 /* initialize drop rate threshold in scaled fixed-point */
1289 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1290
1291 /* initialize drop rate to fraction table */
1292 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
1293 if (fv->fv_p2ftab == NULL) {
1294 free(fv->fv_fves, M_DEVBUF);
1295 free(fv, M_DEVBUF);
1296 return (NULL);
1297 }
1298 /*
1299 * create the p2f table.
1300 * (shift is used to keep the precision)
1301 */
1302 for (i = 1; i < BRTT_SIZE; i++) {
1303 int f;
1304
1305 f = brtt_tab[i] << 8;
1306 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1307 }
1308
1309 return (fv);
1310 }
1311
1312 static void fv_destroy(fv)
1313 struct flowvalve *fv;
1314 {
1315 free(fv->fv_p2ftab, M_DEVBUF);
1316 free(fv->fv_fves, M_DEVBUF);
1317 free(fv, M_DEVBUF);
1318 }
1319
1320 static inline int
1321 fv_p2f(fv, p)
1322 struct flowvalve *fv;
1323 int p;
1324 {
1325 int val, f;
1326
1327 if (p >= BRTT_PMAX)
1328 f = fv->fv_p2ftab[BRTT_SIZE-1];
1329 else if ((val = (p & BRTT_MASK)))
1330 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1331 else
1332 f = fv->fv_p2ftab[1];
1333 return (f);
1334 }
1335
1336 /*
1337 * check if an arriving packet should be pre-dropped.
1338 * called from red_addq() when a packet arrives.
1339 * returns 1 when the packet should be pre-dropped.
1340 * should be called in splnet.
1341 */
1342 static int
1343 fv_checkflow(fv, pktattr, fcache)
1344 struct flowvalve *fv;
1345 struct altq_pktattr *pktattr;
1346 struct fve **fcache;
1347 {
1348 struct fve *fve;
1349 struct timeval now;
1350
1351 fv->fv_ifseq++;
1352 FV_TIMESTAMP(&now);
1353
1354 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1355 /* no matching entry in the flowlist */
1356 return (0);
1357
1358 *fcache = fve;
1359
1360 /* update fraction f for every FV_N packets */
1361 if (++fve->fve_count == FV_N) {
1362 /*
1363 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1364 */
1365 fve->fve_f =
1366 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1367 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1368 fve->fve_ifseq = fv->fv_ifseq;
1369 fve->fve_count = 0;
1370 }
1371
1372 /*
1373 * overpumping test
1374 */
1375 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1376 int fthresh;
1377
1378 /* calculate a threshold */
1379 fthresh = fv_p2f(fv, fve->fve_p);
1380 if (fve->fve_f > fthresh)
1381 fve->fve_state = Red;
1382 }
1383
1384 if (fve->fve_state == Red) {
1385 /*
1386 * backoff test
1387 */
1388 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1389 /* no drop for at least FV_BACKOFFTHRESH sec */
1390 fve->fve_p = 0;
1391 fve->fve_state = Green;
1392 #ifdef FV_STATS
1393 fv->fv_stats.escape++;
1394 #endif
1395 } else {
1396 /* block this flow */
1397 flowlist_move_to_head(fv, fve);
1398 fve->fve_lastdrop = now;
1399 #ifdef FV_STATS
1400 fv->fv_stats.predrop++;
1401 #endif
1402 return (1);
1403 }
1404 }
1405
1406 /*
1407 * p = (1 - Wp) * p
1408 */
1409 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1410 if (fve->fve_p < 0)
1411 fve->fve_p = 0;
1412 #ifdef FV_STATS
1413 fv->fv_stats.pass++;
1414 #endif
1415 return (0);
1416 }
1417
1418 /*
1419 * called from red_addq when a packet is dropped by red.
1420 * should be called in splnet.
1421 */
1422 static void fv_dropbyred(fv, pktattr, fcache)
1423 struct flowvalve *fv;
1424 struct altq_pktattr *pktattr;
1425 struct fve *fcache;
1426 {
1427 struct fve *fve;
1428 struct timeval now;
1429
1430 if (pktattr == NULL)
1431 return;
1432 FV_TIMESTAMP(&now);
1433
1434 if (fcache != NULL)
1435 /* the fve of this packet is already cached */
1436 fve = fcache;
1437 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1438 fve = flowlist_reclaim(fv, pktattr);
1439
1440 flowlist_move_to_head(fv, fve);
1441
1442 /*
1443 * update p: the following line cancels the update
1444 * in fv_checkflow() and calculate
1445 * p = Wp + (1 - Wp) * p
1446 */
1447 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1448
1449 fve->fve_lastdrop = now;
1450 }
1451
1452 #endif /* ALTQ_FLOWVALVE */
1453
1454 #ifdef KLD_MODULE
1455
1456 static struct altqsw red_sw =
1457 {"red", redopen, redclose, redioctl};
1458
1459 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1460
1461 #endif /* KLD_MODULE */
1462
1463 #endif /* ALTQ_RED */
1464