altq_red.c revision 1.13 1 /* $NetBSD: altq_red.c,v 1.13 2005/12/24 20:10:16 perry Exp $ */
2 /* $KAME: altq_red.c,v 1.9 2002/01/07 11:25:40 kjc Exp $ */
3
4 /*
5 * Copyright (C) 1997-2000
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.13 2005/12/24 20:10:16 perry Exp $");
65
66 #if defined(__FreeBSD__) || defined(__NetBSD__)
67 #include "opt_altq.h"
68 #if (__FreeBSD__ != 2)
69 #include "opt_inet.h"
70 #ifdef __FreeBSD__
71 #include "opt_inet6.h"
72 #endif
73 #endif
74 #endif /* __FreeBSD__ || __NetBSD__ */
75 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
76
77 #include <sys/param.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/socket.h>
81 #include <sys/sockio.h>
82 #include <sys/systm.h>
83 #include <sys/proc.h>
84 #include <sys/errno.h>
85 #include <sys/kernel.h>
86 #ifdef ALTQ_FLOWVALVE
87 #include <sys/queue.h>
88 #include <sys/time.h>
89 #endif
90
91 #include <net/if.h>
92 #include <net/if_types.h>
93
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #ifdef INET6
98 #include <netinet/ip6.h>
99 #endif
100
101 #include <altq/altq.h>
102 #include <altq/altq_conf.h>
103 #include <altq/altq_red.h>
104 #ifdef ALTQ_FLOWVALVE
105 #include <altq/altq_flowvalve.h>
106 #endif
107
108 /*
109 * ALTQ/RED (Random Early Detection) implementation using 32-bit
110 * fixed-point calculation.
111 *
112 * written by kjc using the ns code as a reference.
113 * you can learn more about red and ns from Sally's home page at
114 * http://www-nrg.ee.lbl.gov/floyd/
115 *
116 * most of the red parameter values are fixed in this implementation
117 * to prevent fixed-point overflow/underflow.
118 * if you change the parameters, watch out for overflow/underflow!
119 *
120 * the parameters used are recommended values by Sally.
121 * the corresponding ns config looks:
122 * q_weight=0.00195
123 * minthresh=5 maxthresh=15 queue-size=60
124 * linterm=30
125 * dropmech=drop-tail
126 * bytes=false (can't be handled by 32-bit fixed-point)
127 * doubleq=false dqthresh=false
128 * wait=true
129 */
130 /*
131 * alternative red parameters for a slow link.
132 *
133 * assume the queue length becomes from zero to L and keeps L, it takes
134 * N packets for q_avg to reach 63% of L.
135 * when q_weight is 0.002, N is about 500 packets.
136 * for a slow link like dial-up, 500 packets takes more than 1 minute!
137 * when q_weight is 0.008, N is about 127 packets.
138 * when q_weight is 0.016, N is about 63 packets.
139 * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
140 * are allowed for 0.016.
141 * see Sally's paper for more details.
142 */
143 /* normal red parameters */
144 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
145 /* q_weight = 0.00195 */
146
147 /* red parameters for a slow link */
148 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
149 /* q_weight = 0.0078125 */
150
151 /* red parameters for a very slow link (e.g., dialup) */
152 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
153 /* q_weight = 0.015625 */
154
155 /* fixed-point uses 12-bit decimal places */
156 #define FP_SHIFT 12 /* fixed-point shift */
157
158 /* red parameters for drop probability */
159 #define INV_P_MAX 10 /* inverse of max drop probability */
160 #define TH_MIN 5 /* min threshold */
161 #define TH_MAX 15 /* max threshold */
162
163 #define RED_LIMIT 60 /* default max queue length */
164
165 /*
166 * our default policy for forced-drop is drop-tail.
167 * (in altq-1.1.2 or earlier, the default was random-drop.
168 * but it makes more sense to punish the cause of the surge.)
169 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
170 */
171
172 #ifdef ALTQ_FLOWVALVE
173 /*
174 * flow-valve is an extension to protect red from unresponsive flows
175 * and to promote end-to-end congestion control.
176 * flow-valve observes the average drop rates of the flows that have
177 * experienced packet drops in the recent past.
178 * when the average drop rate exceeds the threshold, the flow is
179 * blocked by the flow-valve. the trapped flow should back off
180 * exponentially to escape from the flow-valve.
181 */
182 #ifdef RED_RANDOM_DROP
183 #error "random-drop can't be used with flow-valve!"
184 #endif
185 #endif /* ALTQ_FLOWVALVE */
186
187 /* red_list keeps all red_queue_t's allocated. */
188 static red_queue_t *red_list = NULL;
189
190 /* default red parameter values */
191 static int default_th_min = TH_MIN;
192 static int default_th_max = TH_MAX;
193 static int default_inv_pmax = INV_P_MAX;
194
195 /* internal function prototypes */
196 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
197 struct altq_pktattr *));
198 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
199 static int red_request __P((struct ifaltq *, int, void *));
200 static void red_purgeq __P((red_queue_t *));
201 static int red_detach __P((red_queue_t *));
202 #ifdef ALTQ_FLOWVALVE
203 static inline struct fve *flowlist_lookup __P((struct flowvalve *,
204 struct altq_pktattr *, struct timeval *));
205 static inline struct fve *flowlist_reclaim __P((struct flowvalve *,
206 struct altq_pktattr *));
207 static inline void flowlist_move_to_head __P((struct flowvalve *,
208 struct fve *));
209 static inline int fv_p2f __P((struct flowvalve *, int));
210 static struct flowvalve *fv_alloc __P((struct red *));
211 static void fv_destroy __P((struct flowvalve *));
212 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
213 struct fve **));
214 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
215 struct fve *));
216 #endif
217
218 /*
219 * red device interface
220 */
221 altqdev_decl(red);
222
223 int
224 redopen(dev, flag, fmt, l)
225 dev_t dev;
226 int flag, fmt;
227 struct lwp *l;
228 {
229 /* everything will be done when the queueing scheme is attached. */
230 return 0;
231 }
232
233 int
234 redclose(dev, flag, fmt, l)
235 dev_t dev;
236 int flag, fmt;
237 struct lwp *l;
238 {
239 red_queue_t *rqp;
240 int err, error = 0;
241
242 while ((rqp = red_list) != NULL) {
243 /* destroy all */
244 err = red_detach(rqp);
245 if (err != 0 && error == 0)
246 error = err;
247 }
248
249 return error;
250 }
251
252 int
253 redioctl(dev, cmd, addr, flag, l)
254 dev_t dev;
255 ioctlcmd_t cmd;
256 caddr_t addr;
257 int flag;
258 struct lwp *l;
259 {
260 red_queue_t *rqp;
261 struct red_interface *ifacep;
262 struct ifnet *ifp;
263 struct proc *p = l->l_proc;
264 int error = 0;
265
266 /* check super-user privilege */
267 switch (cmd) {
268 case RED_GETSTATS:
269 break;
270 default:
271 #if (__FreeBSD_version > 400000)
272 if ((error = suser(p)) != 0)
273 #else
274 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
275 #endif
276 return (error);
277 break;
278 }
279
280 switch (cmd) {
281
282 case RED_ENABLE:
283 ifacep = (struct red_interface *)addr;
284 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
285 error = EBADF;
286 break;
287 }
288 error = altq_enable(rqp->rq_ifq);
289 break;
290
291 case RED_DISABLE:
292 ifacep = (struct red_interface *)addr;
293 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
294 error = EBADF;
295 break;
296 }
297 error = altq_disable(rqp->rq_ifq);
298 break;
299
300 case RED_IF_ATTACH:
301 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
302 if (ifp == NULL) {
303 error = ENXIO;
304 break;
305 }
306
307 /* allocate and initialize red_queue_t */
308 MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
309 if (rqp == NULL) {
310 error = ENOMEM;
311 break;
312 }
313 (void)memset(rqp, 0, sizeof(red_queue_t));
314
315 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
316 M_DEVBUF, M_WAITOK);
317 if (rqp->rq_q == NULL) {
318 FREE(rqp, M_DEVBUF);
319 error = ENOMEM;
320 break;
321 }
322 (void)memset(rqp->rq_q, 0, sizeof(class_queue_t));
323
324 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
325 if (rqp->rq_red == NULL) {
326 FREE(rqp->rq_q, M_DEVBUF);
327 FREE(rqp, M_DEVBUF);
328 error = ENOMEM;
329 break;
330 }
331
332 rqp->rq_ifq = &ifp->if_snd;
333 qtail(rqp->rq_q) = NULL;
334 qlen(rqp->rq_q) = 0;
335 qlimit(rqp->rq_q) = RED_LIMIT;
336 qtype(rqp->rq_q) = Q_RED;
337
338 /*
339 * set RED to this ifnet structure.
340 */
341 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
342 red_enqueue, red_dequeue, red_request,
343 NULL, NULL);
344 if (error) {
345 red_destroy(rqp->rq_red);
346 FREE(rqp->rq_q, M_DEVBUF);
347 FREE(rqp, M_DEVBUF);
348 break;
349 }
350
351 /* add this state to the red list */
352 rqp->rq_next = red_list;
353 red_list = rqp;
354 break;
355
356 case RED_IF_DETACH:
357 ifacep = (struct red_interface *)addr;
358 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
359 error = EBADF;
360 break;
361 }
362 error = red_detach(rqp);
363 break;
364
365 case RED_GETSTATS:
366 do {
367 struct red_stats *q_stats;
368 red_t *rp;
369
370 q_stats = (struct red_stats *)addr;
371 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
372 ALTQT_RED)) == NULL) {
373 error = EBADF;
374 break;
375 }
376
377 q_stats->q_len = qlen(rqp->rq_q);
378 q_stats->q_limit = qlimit(rqp->rq_q);
379
380 rp = rqp->rq_red;
381 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
382 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
383 q_stats->drop_cnt = rp->red_stats.drop_cnt;
384 q_stats->drop_forced = rp->red_stats.drop_forced;
385 q_stats->drop_unforced = rp->red_stats.drop_unforced;
386 q_stats->marked_packets = rp->red_stats.marked_packets;
387
388 q_stats->weight = rp->red_weight;
389 q_stats->inv_pmax = rp->red_inv_pmax;
390 q_stats->th_min = rp->red_thmin;
391 q_stats->th_max = rp->red_thmax;
392
393 #ifdef ALTQ_FLOWVALVE
394 if (rp->red_flowvalve != NULL) {
395 struct flowvalve *fv = rp->red_flowvalve;
396 q_stats->fv_flows = fv->fv_flows;
397 q_stats->fv_pass = fv->fv_stats.pass;
398 q_stats->fv_predrop = fv->fv_stats.predrop;
399 q_stats->fv_alloc = fv->fv_stats.alloc;
400 q_stats->fv_escape = fv->fv_stats.escape;
401 } else {
402 #endif /* ALTQ_FLOWVALVE */
403 q_stats->fv_flows = 0;
404 q_stats->fv_pass = 0;
405 q_stats->fv_predrop = 0;
406 q_stats->fv_alloc = 0;
407 q_stats->fv_escape = 0;
408 #ifdef ALTQ_FLOWVALVE
409 }
410 #endif /* ALTQ_FLOWVALVE */
411 } while (0);
412 break;
413
414 case RED_CONFIG:
415 do {
416 struct red_conf *fc;
417 red_t *new;
418 int s, limit;
419
420 fc = (struct red_conf *)addr;
421 if ((rqp = altq_lookup(fc->iface.red_ifname,
422 ALTQT_RED)) == NULL) {
423 error = EBADF;
424 break;
425 }
426 new = red_alloc(fc->red_weight,
427 fc->red_inv_pmax,
428 fc->red_thmin,
429 fc->red_thmax,
430 fc->red_flags,
431 fc->red_pkttime);
432 if (new == NULL) {
433 error = ENOMEM;
434 break;
435 }
436
437 s = splnet();
438 red_purgeq(rqp);
439 limit = fc->red_limit;
440 if (limit < fc->red_thmax)
441 limit = fc->red_thmax;
442 qlimit(rqp->rq_q) = limit;
443 fc->red_limit = limit; /* write back the new value */
444
445 red_destroy(rqp->rq_red);
446 rqp->rq_red = new;
447
448 splx(s);
449
450 /* write back new values */
451 fc->red_limit = limit;
452 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
453 fc->red_thmin = rqp->rq_red->red_thmin;
454 fc->red_thmax = rqp->rq_red->red_thmax;
455
456 } while (0);
457 break;
458
459 case RED_SETDEFAULTS:
460 do {
461 struct redparams *rp;
462
463 rp = (struct redparams *)addr;
464
465 default_th_min = rp->th_min;
466 default_th_max = rp->th_max;
467 default_inv_pmax = rp->inv_pmax;
468 } while (0);
469 break;
470
471 default:
472 error = EINVAL;
473 break;
474 }
475 return error;
476 }
477
478 static int
479 red_detach(rqp)
480 red_queue_t *rqp;
481 {
482 red_queue_t *tmp;
483 int error = 0;
484
485 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
486 altq_disable(rqp->rq_ifq);
487
488 if ((error = altq_detach(rqp->rq_ifq)))
489 return (error);
490
491 if (red_list == rqp)
492 red_list = rqp->rq_next;
493 else {
494 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
495 if (tmp->rq_next == rqp) {
496 tmp->rq_next = rqp->rq_next;
497 break;
498 }
499 if (tmp == NULL)
500 printf("red_detach: no state found in red_list!\n");
501 }
502
503 red_destroy(rqp->rq_red);
504 FREE(rqp->rq_q, M_DEVBUF);
505 FREE(rqp, M_DEVBUF);
506 return (error);
507 }
508
509 /*
510 * red support routines
511 */
512
513 red_t *
514 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
515 int weight, inv_pmax, th_min, th_max;
516 int flags, pkttime;
517 {
518 red_t *rp;
519 int w, i;
520 int npkts_per_sec;
521
522 MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
523 if (rp == NULL)
524 return (NULL);
525 (void)memset(rp, 0, sizeof(red_t));
526
527 rp->red_avg = 0;
528 rp->red_idle = 1;
529
530 if (weight == 0)
531 rp->red_weight = W_WEIGHT;
532 else
533 rp->red_weight = weight;
534 if (inv_pmax == 0)
535 rp->red_inv_pmax = default_inv_pmax;
536 else
537 rp->red_inv_pmax = inv_pmax;
538 if (th_min == 0)
539 rp->red_thmin = default_th_min;
540 else
541 rp->red_thmin = th_min;
542 if (th_max == 0)
543 rp->red_thmax = default_th_max;
544 else
545 rp->red_thmax = th_max;
546
547 rp->red_flags = flags;
548
549 if (pkttime == 0)
550 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
551 rp->red_pkttime = 800;
552 else
553 rp->red_pkttime = pkttime;
554
555 if (weight == 0) {
556 /* when the link is very slow, adjust red parameters */
557 npkts_per_sec = 1000000 / rp->red_pkttime;
558 if (npkts_per_sec < 50) {
559 /* up to about 400Kbps */
560 rp->red_weight = W_WEIGHT_2;
561 } else if (npkts_per_sec < 300) {
562 /* up to about 2.4Mbps */
563 rp->red_weight = W_WEIGHT_1;
564 }
565 }
566
567 /* calculate wshift. weight must be power of 2 */
568 w = rp->red_weight;
569 for (i = 0; w > 1; i++)
570 w = w >> 1;
571 rp->red_wshift = i;
572 w = 1 << rp->red_wshift;
573 if (w != rp->red_weight) {
574 printf("invalid weight value %d for red! use %d\n",
575 rp->red_weight, w);
576 rp->red_weight = w;
577 }
578
579 /*
580 * thmin_s and thmax_s are scaled versions of th_min and th_max
581 * to be compared with avg.
582 */
583 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
584 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
585
586 /*
587 * precompute probability denominator
588 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
589 */
590 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
591 * rp->red_inv_pmax) << FP_SHIFT;
592
593 /* allocate weight table */
594 rp->red_wtab = wtab_alloc(rp->red_weight);
595
596 microtime(&rp->red_last);
597 #ifdef ALTQ_FLOWVALVE
598 if (flags & REDF_FLOWVALVE)
599 rp->red_flowvalve = fv_alloc(rp);
600 /* if fv_alloc failes, flowvalve is just disabled */
601 #endif
602 return (rp);
603 }
604
605 void
606 red_destroy(rp)
607 red_t *rp;
608 {
609 #ifdef ALTQ_FLOWVALVE
610 if (rp->red_flowvalve != NULL)
611 fv_destroy(rp->red_flowvalve);
612 #endif
613 wtab_destroy(rp->red_wtab);
614 FREE(rp, M_DEVBUF);
615 }
616
617 void
618 red_getstats(rp, sp)
619 red_t *rp;
620 struct redstats *sp;
621 {
622 sp->q_avg = rp->red_avg >> rp->red_wshift;
623 sp->xmit_cnt = rp->red_stats.xmit_cnt;
624 sp->drop_cnt = rp->red_stats.drop_cnt;
625 sp->drop_forced = rp->red_stats.drop_forced;
626 sp->drop_unforced = rp->red_stats.drop_unforced;
627 sp->marked_packets = rp->red_stats.marked_packets;
628 }
629
630 /*
631 * enqueue routine:
632 *
633 * returns: 0 when successfully queued.
634 * ENOBUFS when drop occurs.
635 */
636 static int
637 red_enqueue(ifq, m, pktattr)
638 struct ifaltq *ifq;
639 struct mbuf *m;
640 struct altq_pktattr *pktattr;
641 {
642 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
643
644 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
645 return ENOBUFS;
646 ifq->ifq_len++;
647 return 0;
648 }
649
650 int
651 red_addq(rp, q, m, pktattr)
652 red_t *rp;
653 class_queue_t *q;
654 struct mbuf *m;
655 struct altq_pktattr *pktattr;
656 {
657 int avg, droptype;
658 int n;
659 #ifdef ALTQ_FLOWVALVE
660 struct fve *fve = NULL;
661
662 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
663 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
664 m_freem(m);
665 return (-1);
666 }
667 #endif
668
669 avg = rp->red_avg;
670
671 /*
672 * if we were idle, we pretend that n packets arrived during
673 * the idle period.
674 */
675 if (rp->red_idle) {
676 struct timeval now;
677 int t;
678
679 rp->red_idle = 0;
680 microtime(&now);
681 t = (now.tv_sec - rp->red_last.tv_sec);
682 if (t > 60) {
683 /*
684 * being idle for more than 1 minute, set avg to zero.
685 * this prevents t from overflow.
686 */
687 avg = 0;
688 } else {
689 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
690 n = t / rp->red_pkttime - 1;
691
692 /* the following line does (avg = (1 - Wq)^n * avg) */
693 if (n > 0)
694 avg = (avg >> FP_SHIFT) *
695 pow_w(rp->red_wtab, n);
696 }
697 }
698
699 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
700 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
701 rp->red_avg = avg; /* save the new value */
702
703 /*
704 * red_count keeps a tally of arriving traffic that has not
705 * been dropped.
706 */
707 rp->red_count++;
708
709 /* see if we drop early */
710 droptype = DTYPE_NODROP;
711 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
712 if (avg >= rp->red_thmax_s) {
713 /* avg >= th_max: forced drop */
714 droptype = DTYPE_FORCED;
715 } else if (rp->red_old == 0) {
716 /* first exceeds th_min */
717 rp->red_count = 1;
718 rp->red_old = 1;
719 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
720 rp->red_probd, rp->red_count)) {
721 /* mark or drop by red */
722 if ((rp->red_flags & REDF_ECN) &&
723 mark_ecn(m, pktattr, rp->red_flags)) {
724 /* successfully marked. do not drop. */
725 rp->red_count = 0;
726 #ifdef RED_STATS
727 rp->red_stats.marked_packets++;
728 #endif
729 } else {
730 /* unforced drop by red */
731 droptype = DTYPE_EARLY;
732 }
733 }
734 } else {
735 /* avg < th_min */
736 rp->red_old = 0;
737 }
738
739 /*
740 * if the queue length hits the hard limit, it's a forced drop.
741 */
742 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
743 droptype = DTYPE_FORCED;
744
745 #ifdef RED_RANDOM_DROP
746 /* if successful or forced drop, enqueue this packet. */
747 if (droptype != DTYPE_EARLY)
748 _addq(q, m);
749 #else
750 /* if successful, enqueue this packet. */
751 if (droptype == DTYPE_NODROP)
752 _addq(q, m);
753 #endif
754 if (droptype != DTYPE_NODROP) {
755 if (droptype == DTYPE_EARLY) {
756 /* drop the incoming packet */
757 #ifdef RED_STATS
758 rp->red_stats.drop_unforced++;
759 #endif
760 } else {
761 /* forced drop, select a victim packet in the queue. */
762 #ifdef RED_RANDOM_DROP
763 m = _getq_random(q);
764 #endif
765 #ifdef RED_STATS
766 rp->red_stats.drop_forced++;
767 #endif
768 }
769 #ifdef RED_STATS
770 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
771 #endif
772 rp->red_count = 0;
773 #ifdef ALTQ_FLOWVALVE
774 if (rp->red_flowvalve != NULL)
775 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
776 #endif
777 m_freem(m);
778 return (-1);
779 }
780 /* successfully queued */
781 #ifdef RED_STATS
782 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
783 #endif
784 return (0);
785 }
786
787 /*
788 * early-drop probability is calculated as follows:
789 * prob = p_max * (avg - th_min) / (th_max - th_min)
790 * prob_a = prob / (2 - count*prob)
791 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
792 * here prob_a increases as successive undrop count increases.
793 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
794 * becomes 1 when (count >= (2 / prob))).
795 */
796 int
797 drop_early(fp_len, fp_probd, count)
798 int fp_len; /* (avg - TH_MIN) in fixed-point */
799 int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
800 int count; /* how many successive undropped packets */
801 {
802 int d; /* denominator of drop-probability */
803
804 d = fp_probd - count * fp_len;
805 if (d <= 0)
806 /* count exceeds the hard limit: drop or mark */
807 return (1);
808
809 /*
810 * now the range of d is [1..600] in fixed-point. (when
811 * th_max-th_min=10 and p_max=1/30)
812 * drop probability = (avg - TH_MIN) / d
813 */
814
815 if ((random() % d) < fp_len) {
816 /* drop or mark */
817 return (1);
818 }
819 /* no drop/mark */
820 return (0);
821 }
822
823 /*
824 * try to mark CE bit to the packet.
825 * returns 1 if successfully marked, 0 otherwise.
826 */
827 int
828 mark_ecn(m, pktattr, flags)
829 struct mbuf *m;
830 struct altq_pktattr *pktattr;
831 int flags;
832 {
833 struct mbuf *m0;
834
835 if (pktattr == NULL ||
836 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
837 return (0);
838
839 /* verify that pattr_hdr is within the mbuf data */
840 for (m0 = m; m0 != NULL; m0 = m0->m_next)
841 if ((pktattr->pattr_hdr >= m0->m_data) &&
842 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
843 break;
844 if (m0 == NULL) {
845 /* ick, pattr_hdr is stale */
846 pktattr->pattr_af = AF_UNSPEC;
847 return (0);
848 }
849
850 switch (pktattr->pattr_af) {
851 case AF_INET:
852 if (flags & REDF_ECN4) {
853 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
854 u_int8_t otos;
855 int sum;
856
857 if (ip->ip_v != 4)
858 return (0); /* version mismatch! */
859
860 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
861 return (0); /* not-ECT */
862 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
863 return (1); /* already marked */
864
865 /*
866 * ecn-capable but not marked,
867 * mark CE and update checksum
868 */
869 otos = ip->ip_tos;
870 ip->ip_tos |= IPTOS_ECN_CE;
871 /*
872 * update checksum (from RFC1624)
873 * HC' = ~(~HC + ~m + m')
874 */
875 sum = ~ntohs(ip->ip_sum) & 0xffff;
876 sum += (~otos & 0xffff) + ip->ip_tos;
877 sum = (sum >> 16) + (sum & 0xffff);
878 sum += (sum >> 16); /* add carry */
879 ip->ip_sum = htons(~sum & 0xffff);
880 return (1);
881 }
882 break;
883 #ifdef INET6
884 case AF_INET6:
885 if (flags & REDF_ECN6) {
886 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
887 u_int32_t flowlabel;
888
889 flowlabel = ntohl(ip6->ip6_flow);
890 if ((flowlabel >> 28) != 6)
891 return (0); /* version mismatch! */
892 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
893 (IPTOS_ECN_NOTECT << 20))
894 return (0); /* not-ECT */
895 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
896 (IPTOS_ECN_CE << 20))
897 return (1); /* already marked */
898 /*
899 * ecn-capable but not marked, mark CE
900 */
901 flowlabel |= (IPTOS_ECN_CE << 20);
902 ip6->ip6_flow = htonl(flowlabel);
903 return (1);
904 }
905 break;
906 #endif /* INET6 */
907 }
908
909 /* not marked */
910 return (0);
911 }
912
913 /*
914 * dequeue routine:
915 * must be called in splnet.
916 *
917 * returns: mbuf dequeued.
918 * NULL when no packet is available in the queue.
919 */
920
921 static struct mbuf *
922 red_dequeue(ifq, op)
923 struct ifaltq *ifq;
924 int op;
925 {
926 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
927 struct mbuf *m;
928
929 if (op == ALTDQ_POLL)
930 return qhead(rqp->rq_q);
931
932 /* op == ALTDQ_REMOVE */
933 m = red_getq(rqp->rq_red, rqp->rq_q);
934 if (m != NULL)
935 ifq->ifq_len--;
936 return (m);
937 }
938
939 struct mbuf *
940 red_getq(rp, q)
941 red_t *rp;
942 class_queue_t *q;
943 {
944 struct mbuf *m;
945
946 if ((m = _getq(q)) == NULL) {
947 if (rp->red_idle == 0) {
948 rp->red_idle = 1;
949 microtime(&rp->red_last);
950 }
951 return NULL;
952 }
953
954 rp->red_idle = 0;
955 return (m);
956 }
957
958 static int
959 red_request(ifq, req, arg)
960 struct ifaltq *ifq;
961 int req;
962 void *arg;
963 {
964 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
965
966 switch (req) {
967 case ALTRQ_PURGE:
968 red_purgeq(rqp);
969 break;
970 }
971 return (0);
972 }
973
974 static void
975 red_purgeq(rqp)
976 red_queue_t *rqp;
977 {
978 _flushq(rqp->rq_q);
979 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
980 rqp->rq_ifq->ifq_len = 0;
981 }
982
983
984 /*
985 * helper routine to calibrate avg during idle.
986 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
987 * here Wq = 1/weight and the code assumes Wq is close to zero.
988 *
989 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
990 */
991 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
992
993 struct wtab *
994 wtab_alloc(weight)
995 int weight;
996 {
997 struct wtab *w;
998 int i;
999
1000 for (w = wtab_list; w != NULL; w = w->w_next)
1001 if (w->w_weight == weight) {
1002 w->w_refcount++;
1003 return (w);
1004 }
1005
1006 MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
1007 if (w == NULL)
1008 panic("wtab_alloc: malloc failed!");
1009 (void)memset(w, 0, sizeof(struct wtab));
1010 w->w_weight = weight;
1011 w->w_refcount = 1;
1012 w->w_next = wtab_list;
1013 wtab_list = w;
1014
1015 /* initialize the weight table */
1016 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
1017 for (i = 1; i < 32; i++) {
1018 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
1019 if (w->w_tab[i] == 0 && w->w_param_max == 0)
1020 w->w_param_max = 1 << i;
1021 }
1022
1023 return (w);
1024 }
1025
1026 int
1027 wtab_destroy(w)
1028 struct wtab *w;
1029 {
1030 struct wtab *prev;
1031
1032 if (--w->w_refcount > 0)
1033 return (0);
1034
1035 if (wtab_list == w)
1036 wtab_list = w->w_next;
1037 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
1038 if (prev->w_next == w) {
1039 prev->w_next = w->w_next;
1040 break;
1041 }
1042
1043 FREE(w, M_DEVBUF);
1044 return (0);
1045 }
1046
1047 int32_t
1048 pow_w(w, n)
1049 struct wtab *w;
1050 int n;
1051 {
1052 int i, bit;
1053 int32_t val;
1054
1055 if (n >= w->w_param_max)
1056 return (0);
1057
1058 val = 1 << FP_SHIFT;
1059 if (n <= 0)
1060 return (val);
1061
1062 bit = 1;
1063 i = 0;
1064 while (n) {
1065 if (n & bit) {
1066 val = (val * w->w_tab[i]) >> FP_SHIFT;
1067 n &= ~bit;
1068 }
1069 i++;
1070 bit <<= 1;
1071 }
1072 return (val);
1073 }
1074
1075 #ifdef ALTQ_FLOWVALVE
1076
1077 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1078 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1079 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1080 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1081 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1082 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1083
1084 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1085 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1086
1087 #define FV_N 10 /* update fve_f every FV_N packets */
1088
1089 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1090 #define FV_TTHRESH 3 /* time threshold to delete fve */
1091 #define FV_ALPHA 5 /* extra packet count */
1092
1093 #if (__FreeBSD_version > 300000)
1094 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1095 #else
1096 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1097 #endif
1098
1099 /*
1100 * Brtt table: 127 entry table to convert drop rate (p) to
1101 * the corresponding bandwidth fraction (f)
1102 * the following equation is implemented to use scaled values,
1103 * fve_p and fve_f, in the fixed point format.
1104 *
1105 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1106 * f = Brtt(p) / (max_th + alpha)
1107 */
1108 #define BRTT_SIZE 128
1109 #define BRTT_SHIFT 12
1110 #define BRTT_MASK 0x0007f000
1111 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1112
1113 const int brtt_tab[BRTT_SIZE] = {
1114 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1115 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1116 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1117 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1118 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1119 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1120 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1121 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1122 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1123 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1124 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1125 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1126 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1127 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1128 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1129 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1130 };
1131
1132 static inline struct fve *
1133 flowlist_lookup(fv, pktattr, now)
1134 struct flowvalve *fv;
1135 struct altq_pktattr *pktattr;
1136 struct timeval *now;
1137 {
1138 struct fve *fve;
1139 int flows;
1140 struct ip *ip;
1141 #ifdef INET6
1142 struct ip6_hdr *ip6;
1143 #endif
1144 struct timeval tthresh;
1145
1146 if (pktattr == NULL)
1147 return (NULL);
1148
1149 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1150 flows = 0;
1151 /*
1152 * search the flow list
1153 */
1154 switch (pktattr->pattr_af) {
1155 case AF_INET:
1156 ip = (struct ip *)pktattr->pattr_hdr;
1157 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1158 if (fve->fve_lastdrop.tv_sec == 0)
1159 break;
1160 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1161 fve->fve_lastdrop.tv_sec = 0;
1162 break;
1163 }
1164 if (fve->fve_flow.flow_af == AF_INET &&
1165 fve->fve_flow.flow_ip.ip_src.s_addr ==
1166 ip->ip_src.s_addr &&
1167 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1168 ip->ip_dst.s_addr)
1169 return (fve);
1170 flows++;
1171 }
1172 break;
1173 #ifdef INET6
1174 case AF_INET6:
1175 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1176 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1177 if (fve->fve_lastdrop.tv_sec == 0)
1178 break;
1179 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1180 fve->fve_lastdrop.tv_sec = 0;
1181 break;
1182 }
1183 if (fve->fve_flow.flow_af == AF_INET6 &&
1184 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1185 &ip6->ip6_src) &&
1186 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1187 &ip6->ip6_dst))
1188 return (fve);
1189 flows++;
1190 }
1191 break;
1192 #endif /* INET6 */
1193
1194 default:
1195 /* unknown protocol. no drop. */
1196 return (NULL);
1197 }
1198 fv->fv_flows = flows; /* save the number of active fve's */
1199 return (NULL);
1200 }
1201
1202 static inline struct fve *
1203 flowlist_reclaim(fv, pktattr)
1204 struct flowvalve *fv;
1205 struct altq_pktattr *pktattr;
1206 {
1207 struct fve *fve;
1208 struct ip *ip;
1209 #ifdef INET6
1210 struct ip6_hdr *ip6;
1211 #endif
1212
1213 /*
1214 * get an entry from the tail of the LRU list.
1215 */
1216 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1217
1218 switch (pktattr->pattr_af) {
1219 case AF_INET:
1220 ip = (struct ip *)pktattr->pattr_hdr;
1221 fve->fve_flow.flow_af = AF_INET;
1222 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1223 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1224 break;
1225 #ifdef INET6
1226 case AF_INET6:
1227 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1228 fve->fve_flow.flow_af = AF_INET6;
1229 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1230 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1231 break;
1232 #endif
1233 }
1234
1235 fve->fve_state = Green;
1236 fve->fve_p = 0.0;
1237 fve->fve_f = 0.0;
1238 fve->fve_ifseq = fv->fv_ifseq - 1;
1239 fve->fve_count = 0;
1240
1241 fv->fv_flows++;
1242 #ifdef FV_STATS
1243 fv->fv_stats.alloc++;
1244 #endif
1245 return (fve);
1246 }
1247
1248 static inline void
1249 flowlist_move_to_head(fv, fve)
1250 struct flowvalve *fv;
1251 struct fve *fve;
1252 {
1253 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1254 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1255 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1256 }
1257 }
1258
1259 /*
1260 * allocate flowvalve structure
1261 */
1262 static struct flowvalve *
1263 fv_alloc(rp)
1264 struct red *rp;
1265 {
1266 struct flowvalve *fv;
1267 struct fve *fve;
1268 int i, num;
1269
1270 num = FV_FLOWLISTSIZE;
1271 MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
1272 M_DEVBUF, M_WAITOK);
1273 if (fv == NULL)
1274 return (NULL);
1275 (void)memset(fv, 0, sizeof(struct flowvalve));
1276
1277 MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
1278 M_DEVBUF, M_WAITOK);
1279 if (fv->fv_fves == NULL) {
1280 FREE(fv, M_DEVBUF);
1281 return (NULL);
1282 }
1283 (void)memset(fv->fv_fves, 0, sizeof(struct fve) * num);
1284
1285 fv->fv_flows = 0;
1286 TAILQ_INIT(&fv->fv_flowlist);
1287 for (i = 0; i < num; i++) {
1288 fve = &fv->fv_fves[i];
1289 fve->fve_lastdrop.tv_sec = 0;
1290 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1291 }
1292
1293 /* initialize drop rate threshold in scaled fixed-point */
1294 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1295
1296 /* initialize drop rate to fraction table */
1297 MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
1298 M_DEVBUF, M_WAITOK);
1299 if (fv->fv_p2ftab == NULL) {
1300 FREE(fv->fv_fves, M_DEVBUF);
1301 FREE(fv, M_DEVBUF);
1302 return (NULL);
1303 }
1304 /*
1305 * create the p2f table.
1306 * (shift is used to keep the precision)
1307 */
1308 for (i = 1; i < BRTT_SIZE; i++) {
1309 int f;
1310
1311 f = brtt_tab[i] << 8;
1312 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1313 }
1314
1315 return (fv);
1316 }
1317
1318 static void fv_destroy(fv)
1319 struct flowvalve *fv;
1320 {
1321 FREE(fv->fv_p2ftab, M_DEVBUF);
1322 FREE(fv->fv_fves, M_DEVBUF);
1323 FREE(fv, M_DEVBUF);
1324 }
1325
1326 static inline int
1327 fv_p2f(fv, p)
1328 struct flowvalve *fv;
1329 int p;
1330 {
1331 int val, f;
1332
1333 if (p >= BRTT_PMAX)
1334 f = fv->fv_p2ftab[BRTT_SIZE-1];
1335 else if ((val = (p & BRTT_MASK)))
1336 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1337 else
1338 f = fv->fv_p2ftab[1];
1339 return (f);
1340 }
1341
1342 /*
1343 * check if an arriving packet should be pre-dropped.
1344 * called from red_addq() when a packet arrives.
1345 * returns 1 when the packet should be pre-dropped.
1346 * should be called in splnet.
1347 */
1348 static int
1349 fv_checkflow(fv, pktattr, fcache)
1350 struct flowvalve *fv;
1351 struct altq_pktattr *pktattr;
1352 struct fve **fcache;
1353 {
1354 struct fve *fve;
1355 struct timeval now;
1356
1357 fv->fv_ifseq++;
1358 FV_TIMESTAMP(&now);
1359
1360 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1361 /* no matching entry in the flowlist */
1362 return (0);
1363
1364 *fcache = fve;
1365
1366 /* update fraction f for every FV_N packets */
1367 if (++fve->fve_count == FV_N) {
1368 /*
1369 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1370 */
1371 fve->fve_f =
1372 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1373 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1374 fve->fve_ifseq = fv->fv_ifseq;
1375 fve->fve_count = 0;
1376 }
1377
1378 /*
1379 * overpumping test
1380 */
1381 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1382 int fthresh;
1383
1384 /* calculate a threshold */
1385 fthresh = fv_p2f(fv, fve->fve_p);
1386 if (fve->fve_f > fthresh)
1387 fve->fve_state = Red;
1388 }
1389
1390 if (fve->fve_state == Red) {
1391 /*
1392 * backoff test
1393 */
1394 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1395 /* no drop for at least FV_BACKOFFTHRESH sec */
1396 fve->fve_p = 0;
1397 fve->fve_state = Green;
1398 #ifdef FV_STATS
1399 fv->fv_stats.escape++;
1400 #endif
1401 } else {
1402 /* block this flow */
1403 flowlist_move_to_head(fv, fve);
1404 fve->fve_lastdrop = now;
1405 #ifdef FV_STATS
1406 fv->fv_stats.predrop++;
1407 #endif
1408 return (1);
1409 }
1410 }
1411
1412 /*
1413 * p = (1 - Wp) * p
1414 */
1415 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1416 if (fve->fve_p < 0)
1417 fve->fve_p = 0;
1418 #ifdef FV_STATS
1419 fv->fv_stats.pass++;
1420 #endif
1421 return (0);
1422 }
1423
1424 /*
1425 * called from red_addq when a packet is dropped by red.
1426 * should be called in splnet.
1427 */
1428 static void fv_dropbyred(fv, pktattr, fcache)
1429 struct flowvalve *fv;
1430 struct altq_pktattr *pktattr;
1431 struct fve *fcache;
1432 {
1433 struct fve *fve;
1434 struct timeval now;
1435
1436 if (pktattr == NULL)
1437 return;
1438 FV_TIMESTAMP(&now);
1439
1440 if (fcache != NULL)
1441 /* the fve of this packet is already cached */
1442 fve = fcache;
1443 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1444 fve = flowlist_reclaim(fv, pktattr);
1445
1446 flowlist_move_to_head(fv, fve);
1447
1448 /*
1449 * update p: the following line cancels the update
1450 * in fv_checkflow() and calculate
1451 * p = Wp + (1 - Wp) * p
1452 */
1453 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1454
1455 fve->fve_lastdrop = now;
1456 }
1457
1458 #endif /* ALTQ_FLOWVALVE */
1459
1460 #ifdef KLD_MODULE
1461
1462 static struct altqsw red_sw =
1463 {"red", redopen, redclose, redioctl};
1464
1465 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1466
1467 #endif /* KLD_MODULE */
1468
1469 #endif /* ALTQ_RED */
1470