altq_red.c revision 1.2.2.3 1 /* $NetBSD: altq_red.c,v 1.2.2.3 2001/04/21 17:46:12 bouyer Exp $ */
2 /* $KAME: altq_red.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $ */
3
4 /*
5 * Copyright (C) 1997-2000
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #if defined(__FreeBSD__) || defined(__NetBSD__)
64 #include "opt_altq.h"
65 #if (__FreeBSD__ != 2)
66 #include "opt_inet.h"
67 #ifdef __FreeBSD__
68 #include "opt_inet6.h"
69 #endif
70 #endif
71 #endif /* __FreeBSD__ || __NetBSD__ */
72 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
73
74 #include <sys/param.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/sockio.h>
79 #include <sys/systm.h>
80 #include <sys/proc.h>
81 #include <sys/errno.h>
82 #include <sys/kernel.h>
83 #ifdef ALTQ_FLOWVALVE
84 #include <sys/queue.h>
85 #include <sys/time.h>
86 #endif
87
88 #include <net/if.h>
89 #include <net/if_types.h>
90
91 #include <netinet/in.h>
92 #include <netinet/in_systm.h>
93 #include <netinet/ip.h>
94 #ifdef INET6
95 #include <netinet/ip6.h>
96 #endif
97
98 #include <altq/altq.h>
99 #include <altq/altq_conf.h>
100 #include <altq/altq_red.h>
101 #ifdef ALTQ_FLOWVALVE
102 #include <altq/altq_flowvalve.h>
103 #endif
104
105 /*
106 * ALTQ/RED (Random Early Detection) implementation using 32-bit
107 * fixed-point calculation.
108 *
109 * written by kjc using the ns code as a reference.
110 * you can learn more about red and ns from Sally's home page at
111 * http://www-nrg.ee.lbl.gov/floyd/
112 *
113 * most of the red parameter values are fixed in this implementation
114 * to prevent fixed-point overflow/underflow.
115 * if you change the parameters, watch out for overflow/underflow!
116 *
117 * the parameters used are recommended values by Sally.
118 * the corresponding ns config looks:
119 * q_weight=0.00195
120 * minthresh=5 maxthresh=15 queue-size=60
121 * linterm=30
122 * dropmech=drop-tail
123 * bytes=false (can't be handled by 32-bit fixed-point)
124 * doubleq=false dqthresh=false
125 * wait=true
126 */
127 /*
128 * alternative red parameters for a slow link.
129 *
130 * assume the queue length becomes from zero to L and keeps L, it takes
131 * N packets for q_avg to reach 63% of L.
132 * when q_weight is 0.002, N is about 500 packets.
133 * for a slow link like dial-up, 500 packets takes more than 1 minute!
134 * when q_weight is 0.008, N is about 127 packets.
135 * when q_weight is 0.016, N is about 63 packets.
136 * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
137 * are allowed for 0.016.
138 * see Sally's paper for more details.
139 */
140 /* normal red parameters */
141 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
142 /* q_weight = 0.00195 */
143
144 /* red parameters for a slow link */
145 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
146 /* q_weight = 0.0078125 */
147
148 /* red parameters for a very slow link (e.g., dialup) */
149 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
150 /* q_weight = 0.015625 */
151
152 /* fixed-point uses 12-bit decimal places */
153 #define FP_SHIFT 12 /* fixed-point shift */
154
155 /* red parameters for drop probability */
156 #define INV_P_MAX 10 /* inverse of max drop probability */
157 #define TH_MIN 5 /* min threshold */
158 #define TH_MAX 15 /* max threshold */
159
160 #define RED_LIMIT 60 /* default max queue lenght */
161 #define RED_STATS /* collect statistics */
162
163 /*
164 * our default policy for forced-drop is drop-tail.
165 * (in altq-1.1.2 or earlier, the default was random-drop.
166 * but it makes more sense to punish the cause of the surge.)
167 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
168 */
169
170 #ifdef ALTQ_FLOWVALVE
171 /*
172 * flow-valve is an extention to protect red from unresponsive flows
173 * and to promote end-to-end congestion control.
174 * flow-valve observes the average drop rates of the flows that have
175 * experienced packet drops in the recent past.
176 * when the average drop rate exceeds the threshold, the flow is
177 * blocked by the flow-valve. the trapped flow should back off
178 * exponentially to escape from the flow-valve.
179 */
180 #ifdef RED_RANDOM_DROP
181 #error "random-drop can't be used with flow-valve!"
182 #endif
183 #endif /* ALTQ_FLOWVALVE */
184
185 /* red_list keeps all red_queue_t's allocated. */
186 static red_queue_t *red_list = NULL;
187
188 /* default red parameter values */
189 static int default_th_min = TH_MIN;
190 static int default_th_max = TH_MAX;
191 static int default_inv_pmax = INV_P_MAX;
192
193 /* internal function prototypes */
194 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
195 struct altq_pktattr *));
196 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
197 static int red_request __P((struct ifaltq *, int, void *));
198 static void red_purgeq __P((red_queue_t *));
199 static int red_detach __P((red_queue_t *));
200 #ifdef ALTQ_FLOWVALVE
201 static __inline struct fve *flowlist_lookup __P((struct flowvalve *,
202 struct altq_pktattr *, struct timeval *));
203 static __inline struct fve *flowlist_reclaim __P((struct flowvalve *,
204 struct altq_pktattr *));
205 static __inline void flowlist_move_to_head __P((struct flowvalve *,
206 struct fve *));
207 static __inline int fv_p2f __P((struct flowvalve *, int));
208 static struct flowvalve *fv_alloc __P((struct red *));
209 static void fv_destroy __P((struct flowvalve *));
210 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
211 struct fve **));
212 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
213 struct fve *));
214 #endif
215
216 /*
217 * red device interface
218 */
219 altqdev_decl(red);
220
221 int
222 redopen(dev, flag, fmt, p)
223 dev_t dev;
224 int flag, fmt;
225 struct proc *p;
226 {
227 /* everything will be done when the queueing scheme is attached. */
228 return 0;
229 }
230
231 int
232 redclose(dev, flag, fmt, p)
233 dev_t dev;
234 int flag, fmt;
235 struct proc *p;
236 {
237 red_queue_t *rqp;
238 int err, error = 0;
239
240 while ((rqp = red_list) != NULL) {
241 /* destroy all */
242 err = red_detach(rqp);
243 if (err != 0 && error == 0)
244 error = err;
245 }
246
247 return error;
248 }
249
250 int
251 redioctl(dev, cmd, addr, flag, p)
252 dev_t dev;
253 ioctlcmd_t cmd;
254 caddr_t addr;
255 int flag;
256 struct proc *p;
257 {
258 red_queue_t *rqp;
259 struct red_interface *ifacep;
260 struct ifnet *ifp;
261 int error = 0;
262
263 /* check super-user privilege */
264 switch (cmd) {
265 case RED_GETSTATS:
266 break;
267 default:
268 #if (__FreeBSD_version > 400000)
269 if ((error = suser(p)) != 0)
270 #else
271 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
272 #endif
273 return (error);
274 break;
275 }
276
277 switch (cmd) {
278
279 case RED_ENABLE:
280 ifacep = (struct red_interface *)addr;
281 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
282 error = EBADF;
283 break;
284 }
285 error = altq_enable(rqp->rq_ifq);
286 break;
287
288 case RED_DISABLE:
289 ifacep = (struct red_interface *)addr;
290 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
291 error = EBADF;
292 break;
293 }
294 error = altq_disable(rqp->rq_ifq);
295 break;
296
297 case RED_IF_ATTACH:
298 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
299 if (ifp == NULL) {
300 error = ENXIO;
301 break;
302 }
303
304 /* allocate and initialize red_queue_t */
305 MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
306 if (rqp == NULL) {
307 error = ENOMEM;
308 break;
309 }
310 bzero(rqp, sizeof(red_queue_t));
311
312 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
313 M_DEVBUF, M_WAITOK);
314 if (rqp->rq_q == NULL) {
315 FREE(rqp, M_DEVBUF);
316 error = ENOMEM;
317 break;
318 }
319 bzero(rqp->rq_q, sizeof(class_queue_t));
320
321 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
322 if (rqp->rq_red == NULL) {
323 FREE(rqp->rq_q, M_DEVBUF);
324 FREE(rqp, M_DEVBUF);
325 error = ENOMEM;
326 break;
327 }
328
329 rqp->rq_ifq = &ifp->if_snd;
330 qtail(rqp->rq_q) = NULL;
331 qlen(rqp->rq_q) = 0;
332 qlimit(rqp->rq_q) = RED_LIMIT;
333 qtype(rqp->rq_q) = Q_RED;
334
335 /*
336 * set RED to this ifnet structure.
337 */
338 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
339 red_enqueue, red_dequeue, red_request,
340 NULL, NULL);
341 if (error) {
342 red_destroy(rqp->rq_red);
343 FREE(rqp->rq_q, M_DEVBUF);
344 FREE(rqp, M_DEVBUF);
345 break;
346 }
347
348 /* add this state to the red list */
349 rqp->rq_next = red_list;
350 red_list = rqp;
351 break;
352
353 case RED_IF_DETACH:
354 ifacep = (struct red_interface *)addr;
355 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
356 error = EBADF;
357 break;
358 }
359 error = red_detach(rqp);
360 break;
361
362 case RED_GETSTATS:
363 do {
364 struct red_stats *q_stats;
365 red_t *rp;
366
367 q_stats = (struct red_stats *)addr;
368 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
369 ALTQT_RED)) == NULL) {
370 error = EBADF;
371 break;
372 }
373
374 q_stats->q_len = qlen(rqp->rq_q);
375 q_stats->q_limit = qlimit(rqp->rq_q);
376
377 rp = rqp->rq_red;
378 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
379 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
380 q_stats->drop_cnt = rp->red_stats.drop_cnt;
381 q_stats->drop_forced = rp->red_stats.drop_forced;
382 q_stats->drop_unforced = rp->red_stats.drop_unforced;
383 q_stats->marked_packets = rp->red_stats.marked_packets;
384
385 q_stats->weight = rp->red_weight;
386 q_stats->inv_pmax = rp->red_inv_pmax;
387 q_stats->th_min = rp->red_thmin;
388 q_stats->th_max = rp->red_thmax;
389
390 #ifdef ALTQ_FLOWVALVE
391 if (rp->red_flowvalve != NULL) {
392 struct flowvalve *fv = rp->red_flowvalve;
393 q_stats->fv_flows = fv->fv_flows;
394 q_stats->fv_pass = fv->fv_stats.pass;
395 q_stats->fv_predrop = fv->fv_stats.predrop;
396 q_stats->fv_alloc = fv->fv_stats.alloc;
397 q_stats->fv_escape = fv->fv_stats.escape;
398 } else {
399 #endif /* ALTQ_FLOWVALVE */
400 q_stats->fv_flows = 0;
401 q_stats->fv_pass = 0;
402 q_stats->fv_predrop = 0;
403 q_stats->fv_alloc = 0;
404 q_stats->fv_escape = 0;
405 #ifdef ALTQ_FLOWVALVE
406 }
407 #endif /* ALTQ_FLOWVALVE */
408 } while (0);
409 break;
410
411 case RED_CONFIG:
412 do {
413 struct red_conf *fc;
414 red_t *new;
415 int s, limit;
416
417 fc = (struct red_conf *)addr;
418 if ((rqp = altq_lookup(fc->iface.red_ifname,
419 ALTQT_RED)) == NULL) {
420 error = EBADF;
421 break;
422 }
423 new = red_alloc(fc->red_weight,
424 fc->red_inv_pmax,
425 fc->red_thmin,
426 fc->red_thmax,
427 fc->red_flags,
428 fc->red_pkttime);
429 if (new == NULL) {
430 error = ENOMEM;
431 break;
432 }
433
434 s = splnet();
435 red_purgeq(rqp);
436 limit = fc->red_limit;
437 if (limit < fc->red_thmax)
438 limit = fc->red_thmax;
439 qlimit(rqp->rq_q) = limit;
440 fc->red_limit = limit; /* write back the new value */
441
442 red_destroy(rqp->rq_red);
443 rqp->rq_red = new;
444
445 splx(s);
446
447 /* write back new values */
448 fc->red_limit = limit;
449 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
450 fc->red_thmin = rqp->rq_red->red_thmin;
451 fc->red_thmax = rqp->rq_red->red_thmax;
452
453 } while (0);
454 break;
455
456 case RED_SETDEFAULTS:
457 do {
458 struct redparams *rp;
459
460 rp = (struct redparams *)addr;
461
462 default_th_min = rp->th_min;
463 default_th_max = rp->th_max;
464 default_inv_pmax = rp->inv_pmax;
465 } while (0);
466 break;
467
468 default:
469 error = EINVAL;
470 break;
471 }
472 return error;
473 }
474
475 static int
476 red_detach(rqp)
477 red_queue_t *rqp;
478 {
479 red_queue_t *tmp;
480 int error = 0;
481
482 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
483 altq_disable(rqp->rq_ifq);
484
485 if ((error = altq_detach(rqp->rq_ifq)))
486 return (error);
487
488 if (red_list == rqp)
489 red_list = rqp->rq_next;
490 else {
491 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
492 if (tmp->rq_next == rqp) {
493 tmp->rq_next = rqp->rq_next;
494 break;
495 }
496 if (tmp == NULL)
497 printf("red_detach: no state found in red_list!\n");
498 }
499
500 red_destroy(rqp->rq_red);
501 FREE(rqp->rq_q, M_DEVBUF);
502 FREE(rqp, M_DEVBUF);
503 return (error);
504 }
505
506 /*
507 * red support routines
508 */
509
510 red_t *
511 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
512 int weight, inv_pmax, th_min, th_max;
513 int flags, pkttime;
514 {
515 red_t *rp;
516 int w, i;
517 int npkts_per_sec;
518
519 MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
520 if (rp == NULL)
521 return (NULL);
522 bzero(rp, sizeof(red_t));
523
524 rp->red_avg = 0;
525 rp->red_idle = 1;
526
527 if (weight == 0)
528 rp->red_weight = W_WEIGHT;
529 else
530 rp->red_weight = weight;
531 if (inv_pmax == 0)
532 rp->red_inv_pmax = default_inv_pmax;
533 else
534 rp->red_inv_pmax = inv_pmax;
535 if (th_min == 0)
536 rp->red_thmin = default_th_min;
537 else
538 rp->red_thmin = th_min;
539 if (th_max == 0)
540 rp->red_thmax = default_th_max;
541 else
542 rp->red_thmax = th_max;
543
544 rp->red_flags = flags;
545
546 if (pkttime == 0)
547 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
548 rp->red_pkttime = 800;
549 else
550 rp->red_pkttime = pkttime;
551
552 if (weight == 0) {
553 /* when the link is very slow, adjust red parameters */
554 npkts_per_sec = 1000000 / rp->red_pkttime;
555 if (npkts_per_sec < 50) {
556 /* up to about 400Kbps */
557 rp->red_weight = W_WEIGHT_2;
558 } else if (npkts_per_sec < 300) {
559 /* up to about 2.4Mbps */
560 rp->red_weight = W_WEIGHT_1;
561 }
562 }
563
564 /* calculate wshift. weight must be power of 2 */
565 w = rp->red_weight;
566 for (i = 0; w > 1; i++)
567 w = w >> 1;
568 rp->red_wshift = i;
569 w = 1 << rp->red_wshift;
570 if (w != rp->red_weight) {
571 printf("invalid weight value %d for red! use %d\n",
572 rp->red_weight, w);
573 rp->red_weight = w;
574 }
575
576 /*
577 * thmin_s and thmax_s are scaled versions of th_min and th_max
578 * to be compared with avg.
579 */
580 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
581 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
582
583 /*
584 * precompute probability denominator
585 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
586 */
587 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
588 * rp->red_inv_pmax) << FP_SHIFT;
589
590 /* allocate weight table */
591 rp->red_wtab = wtab_alloc(rp->red_weight);
592
593 microtime(&rp->red_last);
594 #ifdef ALTQ_FLOWVALVE
595 if (flags & REDF_FLOWVALVE)
596 rp->red_flowvalve = fv_alloc(rp);
597 /* if fv_alloc failes, flowvalve is just disabled */
598 #endif
599 return (rp);
600 }
601
602 void
603 red_destroy(rp)
604 red_t *rp;
605 {
606 #ifdef ALTQ_FLOWVALVE
607 if (rp->red_flowvalve != NULL)
608 fv_destroy(rp->red_flowvalve);
609 #endif
610 wtab_destroy(rp->red_wtab);
611 FREE(rp, M_DEVBUF);
612 }
613
614 void
615 red_getstats(rp, sp)
616 red_t *rp;
617 struct redstats *sp;
618 {
619 sp->q_avg = rp->red_avg >> rp->red_wshift;
620 sp->xmit_cnt = rp->red_stats.xmit_cnt;
621 sp->drop_cnt = rp->red_stats.drop_cnt;
622 sp->drop_forced = rp->red_stats.drop_forced;
623 sp->drop_unforced = rp->red_stats.drop_unforced;
624 sp->marked_packets = rp->red_stats.marked_packets;
625 }
626
627 /*
628 * enqueue routine:
629 *
630 * returns: 0 when successfully queued.
631 * ENOBUFS when drop occurs.
632 */
633 static int
634 red_enqueue(ifq, m, pktattr)
635 struct ifaltq *ifq;
636 struct mbuf *m;
637 struct altq_pktattr *pktattr;
638 {
639 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
640
641 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
642 return ENOBUFS;
643 ifq->ifq_len++;
644 return 0;
645 }
646
647 int
648 red_addq(rp, q, m, pktattr)
649 red_t *rp;
650 class_queue_t *q;
651 struct mbuf *m;
652 struct altq_pktattr *pktattr;
653 {
654 int avg, droptype;
655 int n;
656 #ifdef ALTQ_FLOWVALVE
657 struct fve *fve = NULL;
658
659 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
660 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
661 m_freem(m);
662 return (-1);
663 }
664 #endif
665
666 avg = rp->red_avg;
667
668 /*
669 * if we were idle, we pretend that n packets arrived during
670 * the idle period.
671 */
672 if (rp->red_idle) {
673 struct timeval now;
674 int t;
675
676 rp->red_idle = 0;
677 microtime(&now);
678 t = (now.tv_sec - rp->red_last.tv_sec);
679 if (t > 60) {
680 /*
681 * being idle for more than 1 minute, set avg to zero.
682 * this prevents t from overflow.
683 */
684 avg = 0;
685 } else {
686 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
687 n = t / rp->red_pkttime - 1;
688
689 /* the following line does (avg = (1 - Wq)^n * avg) */
690 if (n > 0)
691 avg = (avg >> FP_SHIFT) *
692 pow_w(rp->red_wtab, n);
693 }
694 }
695
696 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
697 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
698 rp->red_avg = avg; /* save the new value */
699
700 /*
701 * red_count keeps a tally of arriving traffic that has not
702 * been dropped.
703 */
704 rp->red_count++;
705
706 /* see if we drop early */
707 droptype = DTYPE_NODROP;
708 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
709 if (avg >= rp->red_thmax_s) {
710 /* avg >= th_max: forced drop */
711 droptype = DTYPE_FORCED;
712 } else if (rp->red_old == 0) {
713 /* first exceeds th_min */
714 rp->red_count = 1;
715 rp->red_old = 1;
716 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
717 rp->red_probd, rp->red_count)) {
718 /* mark or drop by red */
719 if ((rp->red_flags & REDF_ECN) &&
720 mark_ecn(m, pktattr, rp->red_flags)) {
721 /* successfully marked. do not drop. */
722 rp->red_count = 0;
723 #ifdef RED_STATS
724 rp->red_stats.marked_packets++;
725 #endif
726 } else {
727 /* unforced drop by red */
728 droptype = DTYPE_EARLY;
729 }
730 }
731 } else {
732 /* avg < th_min */
733 rp->red_old = 0;
734 }
735
736 /*
737 * if the queue length hits the hard limit, it's a forced drop.
738 */
739 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
740 droptype = DTYPE_FORCED;
741
742 #ifdef RED_RANDOM_DROP
743 /* if successful or forced drop, enqueue this packet. */
744 if (droptype != DTYPE_EARLY)
745 _addq(q, m);
746 #else
747 /* if successful, enqueue this packet. */
748 if (droptype == DTYPE_NODROP)
749 _addq(q, m);
750 #endif
751 if (droptype != DTYPE_NODROP) {
752 if (droptype == DTYPE_EARLY) {
753 /* drop the incoming packet */
754 #ifdef RED_STATS
755 rp->red_stats.drop_unforced++;
756 #endif
757 } else {
758 /* forced drop, select a victim packet in the queue. */
759 #ifdef RED_RANDOM_DROP
760 m = _getq_random(q);
761 #endif
762 #ifdef RED_STATS
763 rp->red_stats.drop_forced++;
764 #endif
765 }
766 #ifdef RED_STATS
767 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
768 #endif
769 rp->red_count = 0;
770 #ifdef ALTQ_FLOWVALVE
771 if (rp->red_flowvalve != NULL)
772 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
773 #endif
774 m_freem(m);
775 return (-1);
776 }
777 /* successfully queued */
778 #ifdef RED_STATS
779 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
780 #endif
781 return (0);
782 }
783
784 /*
785 * early-drop probability is calculated as follows:
786 * prob = p_max * (avg - th_min) / (th_max - th_min)
787 * prob_a = prob / (2 - count*prob)
788 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
789 * here prob_a increases as successive undrop count increases.
790 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
791 * becomes 1 when (count >= (2 / prob))).
792 */
793 int
794 drop_early(fp_len, fp_probd, count)
795 int fp_len; /* (avg - TH_MIN) in fixed-point */
796 int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
797 int count; /* how many successive undropped packets */
798 {
799 int d; /* denominator of drop-probability */
800
801 d = fp_probd - count * fp_len;
802 if (d <= 0)
803 /* count exceeds the hard limit: drop or mark */
804 return (1);
805
806 /*
807 * now the range of d is [1..600] in fixed-point. (when
808 * th_max-th_min=10 and p_max=1/30)
809 * drop probability = (avg - TH_MIN) / d
810 */
811
812 if ((random() % d) < fp_len) {
813 /* drop or mark */
814 return (1);
815 }
816 /* no drop/mark */
817 return (0);
818 }
819
820 /*
821 * try to mark CE bit to the packet.
822 * returns 1 if successfully marked, 0 otherwise.
823 */
824 int
825 mark_ecn(m, pktattr, flags)
826 struct mbuf *m;
827 struct altq_pktattr *pktattr;
828 int flags;
829 {
830 struct mbuf *m0;
831
832 if (pktattr == NULL ||
833 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
834 return (0);
835
836 /* verify that pattr_hdr is within the mbuf data */
837 for (m0 = m; m0 != NULL; m0 = m0->m_next)
838 if ((pktattr->pattr_hdr >= m0->m_data) &&
839 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
840 break;
841 if (m0 == NULL) {
842 /* ick, pattr_hdr is stale */
843 pktattr->pattr_af = AF_UNSPEC;
844 return (0);
845 }
846
847 switch (pktattr->pattr_af) {
848 case AF_INET:
849 if (flags & REDF_ECN4) {
850 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
851
852 if (ip->ip_v != 4)
853 return (0); /* version mismatch! */
854 if (ip->ip_tos & IPTOS_ECT) {
855 /* ECN-capable, mark ECN bit. */
856 if ((ip->ip_tos & IPTOS_CE) == 0) {
857 #if (IPTOS_CE == 0x01)
858 u_short sum;
859
860 ip->ip_tos |= IPTOS_CE;
861 /*
862 * optimized version when IPTOS_CE
863 * is 0x01.
864 * HC' = HC -1 when HC > 0
865 * = 0xfffe when HC = 0
866 */
867 sum = ntohs(ip->ip_sum);
868 if (sum == 0)
869 sum = 0xfffe;
870 else
871 sum -= 1;
872 ip->ip_sum = htons(sum);
873 #else /* IPTOS_CE != 0x01 */
874 long sum;
875
876 ip->ip_tos |= IPTOS_CE;
877 /*
878 * update checksum (from RFC1624)
879 * HC' = ~(~HC + ~m + m')
880 */
881 sum = ~ntohs(ip->ip_sum) & 0xffff;
882 sum += 0xffff + IPTOS_CE;
883 sum = (sum >> 16) + (sum & 0xffff);
884 sum += (sum >> 16); /* add carry */
885
886 ip->ip_sum = htons(~sum & 0xffff);
887 #endif /* IPTOS_CE != 0x01 */
888 }
889 return (1);
890 }
891 }
892 break;
893 #ifdef INET6
894 case AF_INET6:
895 if (flags & REDF_ECN6) {
896 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
897 u_int32_t flowlabel;
898
899 flowlabel = ntohl(ip6->ip6_flow);
900 if ((flowlabel >> 28) != 6)
901 return (0); /* version mismatch! */
902 if (flowlabel & (IPTOS_ECT << 20)) {
903 /* ECN-capable, mark ECN bit. */
904 flowlabel |= (IPTOS_CE << 20);
905 ip6->ip6_flow = htonl(flowlabel);
906 return (1);
907 }
908 }
909 break;
910 #endif /* INET6 */
911 }
912
913 /* not marked */
914 return (0);
915 }
916
917 /*
918 * dequeue routine:
919 * must be called in splnet.
920 *
921 * returns: mbuf dequeued.
922 * NULL when no packet is available in the queue.
923 */
924
925 static struct mbuf *
926 red_dequeue(ifq, op)
927 struct ifaltq *ifq;
928 int op;
929 {
930 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
931 struct mbuf *m;
932
933 if (op == ALTDQ_POLL)
934 return qhead(rqp->rq_q);
935
936 /* op == ALTDQ_REMOVE */
937 m = red_getq(rqp->rq_red, rqp->rq_q);
938 if (m != NULL)
939 ifq->ifq_len--;
940 return (m);
941 }
942
943 struct mbuf *
944 red_getq(rp, q)
945 red_t *rp;
946 class_queue_t *q;
947 {
948 struct mbuf *m;
949
950 if ((m = _getq(q)) == NULL) {
951 if (rp->red_idle == 0) {
952 rp->red_idle = 1;
953 microtime(&rp->red_last);
954 }
955 return NULL;
956 }
957
958 rp->red_idle = 0;
959 return (m);
960 }
961
962 static int
963 red_request(ifq, req, arg)
964 struct ifaltq *ifq;
965 int req;
966 void *arg;
967 {
968 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
969
970 switch (req) {
971 case ALTRQ_PURGE:
972 red_purgeq(rqp);
973 break;
974 }
975 return (0);
976 }
977
978 static void
979 red_purgeq(rqp)
980 red_queue_t *rqp;
981 {
982 _flushq(rqp->rq_q);
983 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
984 rqp->rq_ifq->ifq_len = 0;
985 }
986
987
988 /*
989 * helper routine to calibrate avg during idle.
990 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
991 * here Wq = 1/weight and the code assumes Wq is close to zero.
992 *
993 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
994 */
995 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
996
997 struct wtab *
998 wtab_alloc(weight)
999 int weight;
1000 {
1001 struct wtab *w;
1002 int i;
1003
1004 for (w = wtab_list; w != NULL; w = w->w_next)
1005 if (w->w_weight == weight) {
1006 w->w_refcount++;
1007 return (w);
1008 }
1009
1010 MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
1011 if (w == NULL)
1012 panic("wtab_alloc: malloc failed!");
1013 bzero(w, sizeof(struct wtab));
1014 w->w_weight = weight;
1015 w->w_refcount = 1;
1016 w->w_next = wtab_list;
1017 wtab_list = w;
1018
1019 /* initialize the weight table */
1020 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
1021 for (i = 1; i < 32; i++) {
1022 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
1023 if (w->w_tab[i] == 0 && w->w_param_max == 0)
1024 w->w_param_max = 1 << i;
1025 }
1026
1027 return (w);
1028 }
1029
1030 int
1031 wtab_destroy(w)
1032 struct wtab *w;
1033 {
1034 struct wtab *prev;
1035
1036 if (--w->w_refcount > 0)
1037 return (0);
1038
1039 if (wtab_list == w)
1040 wtab_list = w->w_next;
1041 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
1042 if (prev->w_next == w) {
1043 prev->w_next = w->w_next;
1044 break;
1045 }
1046
1047 FREE(w, M_DEVBUF);
1048 return (0);
1049 }
1050
1051 int32_t
1052 pow_w(w, n)
1053 struct wtab *w;
1054 int n;
1055 {
1056 int i, bit;
1057 int32_t val;
1058
1059 if (n >= w->w_param_max)
1060 return (0);
1061
1062 val = 1 << FP_SHIFT;
1063 if (n <= 0)
1064 return (val);
1065
1066 bit = 1;
1067 i = 0;
1068 while (n) {
1069 if (n & bit) {
1070 val = (val * w->w_tab[i]) >> FP_SHIFT;
1071 n &= ~bit;
1072 }
1073 i++;
1074 bit <<= 1;
1075 }
1076 return (val);
1077 }
1078
1079 #ifdef ALTQ_FLOWVALVE
1080
1081 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1082 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1083 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1084 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1085 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1086 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1087
1088 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1089 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1090
1091 #define FV_N 10 /* update fve_f every FV_N packets */
1092
1093 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1094 #define FV_TTHRESH 3 /* time threshold to delete fve */
1095 #define FV_ALPHA 5 /* extra packet count */
1096
1097 #define FV_STATS
1098
1099 #if (__FreeBSD_version > 300000)
1100 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1101 #else
1102 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1103 #endif
1104
1105 /*
1106 * Brtt table: 127 entry table to convert drop rate (p) to
1107 * the corresponding bandwidth fraction (f)
1108 * the following equation is implemented to use scaled values,
1109 * fve_p and fve_f, in the fixed point format.
1110 *
1111 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1112 * f = Brtt(p) / (max_th + alpha)
1113 */
1114 #define BRTT_SIZE 128
1115 #define BRTT_SHIFT 12
1116 #define BRTT_MASK 0x0007f000
1117 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1118
1119 const int brtt_tab[BRTT_SIZE] = {
1120 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1121 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1122 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1123 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1124 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1125 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1126 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1127 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1128 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1129 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1130 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1131 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1132 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1133 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1134 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1135 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1136 };
1137
1138 static __inline struct fve *
1139 flowlist_lookup(fv, pktattr, now)
1140 struct flowvalve *fv;
1141 struct altq_pktattr *pktattr;
1142 struct timeval *now;
1143 {
1144 struct fve *fve;
1145 int flows;
1146 struct ip *ip;
1147 #ifdef INET6
1148 struct ip6_hdr *ip6;
1149 #endif
1150 struct timeval tthresh;
1151
1152 if (pktattr == NULL)
1153 return (NULL);
1154
1155 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1156 flows = 0;
1157 /*
1158 * search the flow list
1159 */
1160 switch (pktattr->pattr_af) {
1161 case AF_INET:
1162 ip = (struct ip *)pktattr->pattr_hdr;
1163 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1164 if (fve->fve_lastdrop.tv_sec == 0)
1165 break;
1166 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1167 fve->fve_lastdrop.tv_sec = 0;
1168 break;
1169 }
1170 if (fve->fve_flow.flow_af == AF_INET &&
1171 fve->fve_flow.flow_ip.ip_src.s_addr ==
1172 ip->ip_src.s_addr &&
1173 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1174 ip->ip_dst.s_addr)
1175 return (fve);
1176 flows++;
1177 }
1178 break;
1179 #ifdef INET6
1180 case AF_INET6:
1181 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1182 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1183 if (fve->fve_lastdrop.tv_sec == 0)
1184 break;
1185 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1186 fve->fve_lastdrop.tv_sec = 0;
1187 break;
1188 }
1189 if (fve->fve_flow.flow_af == AF_INET6 &&
1190 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1191 &ip6->ip6_src) &&
1192 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1193 &ip6->ip6_dst))
1194 return (fve);
1195 flows++;
1196 }
1197 break;
1198 #endif /* INET6 */
1199
1200 default:
1201 /* unknown protocol. no drop. */
1202 return (NULL);
1203 }
1204 fv->fv_flows = flows; /* save the number of active fve's */
1205 return (NULL);
1206 }
1207
1208 static __inline struct fve *
1209 flowlist_reclaim(fv, pktattr)
1210 struct flowvalve *fv;
1211 struct altq_pktattr *pktattr;
1212 {
1213 struct fve *fve;
1214 struct ip *ip;
1215 #ifdef INET6
1216 struct ip6_hdr *ip6;
1217 #endif
1218
1219 /*
1220 * get an entry from the tail of the LRU list.
1221 */
1222 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1223
1224 switch (pktattr->pattr_af) {
1225 case AF_INET:
1226 ip = (struct ip *)pktattr->pattr_hdr;
1227 fve->fve_flow.flow_af = AF_INET;
1228 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1229 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1230 break;
1231 #ifdef INET6
1232 case AF_INET6:
1233 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1234 fve->fve_flow.flow_af = AF_INET6;
1235 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1236 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1237 break;
1238 #endif
1239 }
1240
1241 fve->fve_state = Green;
1242 fve->fve_p = 0.0;
1243 fve->fve_f = 0.0;
1244 fve->fve_ifseq = fv->fv_ifseq - 1;
1245 fve->fve_count = 0;
1246
1247 fv->fv_flows++;
1248 #ifdef FV_STATS
1249 fv->fv_stats.alloc++;
1250 #endif
1251 return (fve);
1252 }
1253
1254 static __inline void
1255 flowlist_move_to_head(fv, fve)
1256 struct flowvalve *fv;
1257 struct fve *fve;
1258 {
1259 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1260 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1261 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1262 }
1263 }
1264
1265 /*
1266 * allocate flowvalve structure
1267 */
1268 static struct flowvalve *
1269 fv_alloc(rp)
1270 struct red *rp;
1271 {
1272 struct flowvalve *fv;
1273 struct fve *fve;
1274 int i, num;
1275
1276 num = FV_FLOWLISTSIZE;
1277 MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
1278 M_DEVBUF, M_WAITOK);
1279 if (fv == NULL)
1280 return (NULL);
1281 bzero(fv, sizeof(struct flowvalve));
1282
1283 MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
1284 M_DEVBUF, M_WAITOK);
1285 if (fv->fv_fves == NULL) {
1286 FREE(fv, M_DEVBUF);
1287 return (NULL);
1288 }
1289 bzero(fv->fv_fves, sizeof(struct fve) * num);
1290
1291 fv->fv_flows = 0;
1292 TAILQ_INIT(&fv->fv_flowlist);
1293 for (i = 0; i < num; i++) {
1294 fve = &fv->fv_fves[i];
1295 fve->fve_lastdrop.tv_sec = 0;
1296 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1297 }
1298
1299 /* initialize drop rate threshold in scaled fixed-point */
1300 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1301
1302 /* initialize drop rate to fraction table */
1303 MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
1304 M_DEVBUF, M_WAITOK);
1305 if (fv->fv_p2ftab == NULL) {
1306 FREE(fv->fv_fves, M_DEVBUF);
1307 FREE(fv, M_DEVBUF);
1308 return (NULL);
1309 }
1310 /*
1311 * create the p2f table.
1312 * (shift is used to keep the precision)
1313 */
1314 for (i = 1; i < BRTT_SIZE; i++) {
1315 int f;
1316
1317 f = brtt_tab[i] << 8;
1318 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1319 }
1320
1321 return (fv);
1322 }
1323
1324 static void fv_destroy(fv)
1325 struct flowvalve *fv;
1326 {
1327 FREE(fv->fv_p2ftab, M_DEVBUF);
1328 FREE(fv->fv_fves, M_DEVBUF);
1329 FREE(fv, M_DEVBUF);
1330 }
1331
1332 static __inline int
1333 fv_p2f(fv, p)
1334 struct flowvalve *fv;
1335 int p;
1336 {
1337 int val, f;
1338
1339 if (p >= BRTT_PMAX)
1340 f = fv->fv_p2ftab[BRTT_SIZE-1];
1341 else if ((val = (p & BRTT_MASK)))
1342 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1343 else
1344 f = fv->fv_p2ftab[1];
1345 return (f);
1346 }
1347
1348 /*
1349 * check if an arriving packet should be pre-dropped.
1350 * called from red_addq() when a packet arrives.
1351 * returns 1 when the packet should be pre-dropped.
1352 * should be called in splnet.
1353 */
1354 static int
1355 fv_checkflow(fv, pktattr, fcache)
1356 struct flowvalve *fv;
1357 struct altq_pktattr *pktattr;
1358 struct fve **fcache;
1359 {
1360 struct fve *fve;
1361 struct timeval now;
1362
1363 fv->fv_ifseq++;
1364 FV_TIMESTAMP(&now);
1365
1366 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1367 /* no matching entry in the flowlist */
1368 return (0);
1369
1370 *fcache = fve;
1371
1372 /* update fraction f for every FV_N packets */
1373 if (++fve->fve_count == FV_N) {
1374 /*
1375 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1376 */
1377 fve->fve_f =
1378 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1379 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1380 fve->fve_ifseq = fv->fv_ifseq;
1381 fve->fve_count = 0;
1382 }
1383
1384 /*
1385 * overpumping test
1386 */
1387 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1388 int fthresh;
1389
1390 /* calculate a threshold */
1391 fthresh = fv_p2f(fv, fve->fve_p);
1392 if (fve->fve_f > fthresh)
1393 fve->fve_state = Red;
1394 }
1395
1396 if (fve->fve_state == Red) {
1397 /*
1398 * backoff test
1399 */
1400 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1401 /* no drop for at least FV_BACKOFFTHRESH sec */
1402 fve->fve_p = 0;
1403 fve->fve_state = Green;
1404 #ifdef FV_STATS
1405 fv->fv_stats.escape++;
1406 #endif
1407 } else {
1408 /* block this flow */
1409 flowlist_move_to_head(fv, fve);
1410 fve->fve_lastdrop = now;
1411 #ifdef FV_STATS
1412 fv->fv_stats.predrop++;
1413 #endif
1414 return (1);
1415 }
1416 }
1417
1418 /*
1419 * p = (1 - Wp) * p
1420 */
1421 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1422 if (fve->fve_p < 0)
1423 fve->fve_p = 0;
1424 #ifdef FV_STATS
1425 fv->fv_stats.pass++;
1426 #endif
1427 return (0);
1428 }
1429
1430 /*
1431 * called from red_addq when a packet is dropped by red.
1432 * should be called in splnet.
1433 */
1434 static void fv_dropbyred(fv, pktattr, fcache)
1435 struct flowvalve *fv;
1436 struct altq_pktattr *pktattr;
1437 struct fve *fcache;
1438 {
1439 struct fve *fve;
1440 struct timeval now;
1441
1442 if (pktattr == NULL)
1443 return;
1444 FV_TIMESTAMP(&now);
1445
1446 if (fcache != NULL)
1447 /* the fve of this packet is already cached */
1448 fve = fcache;
1449 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1450 fve = flowlist_reclaim(fv, pktattr);
1451
1452 flowlist_move_to_head(fv, fve);
1453
1454 /*
1455 * update p: the following line cancels the update
1456 * in fv_checkflow() and calculate
1457 * p = Wp + (1 - Wp) * p
1458 */
1459 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1460
1461 fve->fve_lastdrop = now;
1462 }
1463
1464 #endif /* ALTQ_FLOWVALVE */
1465
1466 #ifdef KLD_MODULE
1467
1468 static struct altqsw red_sw =
1469 {"red", redopen, redclose, redioctl};
1470
1471 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1472
1473 #endif /* KLD_MODULE */
1474
1475 #endif /* ALTQ_RED */
1476