altq_red.c revision 1.13.10.1 1 /* $NetBSD: altq_red.c,v 1.13.10.1 2006/03/08 00:23:46 elad Exp $ */
2 /* $KAME: altq_red.c,v 1.9 2002/01/07 11:25:40 kjc Exp $ */
3
4 /*
5 * Copyright (C) 1997-2000
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.13.10.1 2006/03/08 00:23:46 elad Exp $");
65
66 #if defined(__FreeBSD__) || defined(__NetBSD__)
67 #include "opt_altq.h"
68 #if (__FreeBSD__ != 2)
69 #include "opt_inet.h"
70 #ifdef __FreeBSD__
71 #include "opt_inet6.h"
72 #endif
73 #endif
74 #endif /* __FreeBSD__ || __NetBSD__ */
75 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
76
77 #include <sys/param.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/socket.h>
81 #include <sys/sockio.h>
82 #include <sys/systm.h>
83 #include <sys/proc.h>
84 #include <sys/errno.h>
85 #include <sys/kernel.h>
86 #ifdef ALTQ_FLOWVALVE
87 #include <sys/queue.h>
88 #include <sys/time.h>
89 #endif
90
91 #include <net/if.h>
92 #include <net/if_types.h>
93
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #ifdef INET6
98 #include <netinet/ip6.h>
99 #endif
100
101 #include <altq/altq.h>
102 #include <altq/altq_conf.h>
103 #include <altq/altq_red.h>
104 #ifdef ALTQ_FLOWVALVE
105 #include <altq/altq_flowvalve.h>
106 #endif
107
108 /*
109 * ALTQ/RED (Random Early Detection) implementation using 32-bit
110 * fixed-point calculation.
111 *
112 * written by kjc using the ns code as a reference.
113 * you can learn more about red and ns from Sally's home page at
114 * http://www-nrg.ee.lbl.gov/floyd/
115 *
116 * most of the red parameter values are fixed in this implementation
117 * to prevent fixed-point overflow/underflow.
118 * if you change the parameters, watch out for overflow/underflow!
119 *
120 * the parameters used are recommended values by Sally.
121 * the corresponding ns config looks:
122 * q_weight=0.00195
123 * minthresh=5 maxthresh=15 queue-size=60
124 * linterm=30
125 * dropmech=drop-tail
126 * bytes=false (can't be handled by 32-bit fixed-point)
127 * doubleq=false dqthresh=false
128 * wait=true
129 */
130 /*
131 * alternative red parameters for a slow link.
132 *
133 * assume the queue length becomes from zero to L and keeps L, it takes
134 * N packets for q_avg to reach 63% of L.
135 * when q_weight is 0.002, N is about 500 packets.
136 * for a slow link like dial-up, 500 packets takes more than 1 minute!
137 * when q_weight is 0.008, N is about 127 packets.
138 * when q_weight is 0.016, N is about 63 packets.
139 * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
140 * are allowed for 0.016.
141 * see Sally's paper for more details.
142 */
143 /* normal red parameters */
144 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
145 /* q_weight = 0.00195 */
146
147 /* red parameters for a slow link */
148 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
149 /* q_weight = 0.0078125 */
150
151 /* red parameters for a very slow link (e.g., dialup) */
152 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
153 /* q_weight = 0.015625 */
154
155 /* fixed-point uses 12-bit decimal places */
156 #define FP_SHIFT 12 /* fixed-point shift */
157
158 /* red parameters for drop probability */
159 #define INV_P_MAX 10 /* inverse of max drop probability */
160 #define TH_MIN 5 /* min threshold */
161 #define TH_MAX 15 /* max threshold */
162
163 #define RED_LIMIT 60 /* default max queue length */
164
165 /*
166 * our default policy for forced-drop is drop-tail.
167 * (in altq-1.1.2 or earlier, the default was random-drop.
168 * but it makes more sense to punish the cause of the surge.)
169 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
170 */
171
172 #ifdef ALTQ_FLOWVALVE
173 /*
174 * flow-valve is an extension to protect red from unresponsive flows
175 * and to promote end-to-end congestion control.
176 * flow-valve observes the average drop rates of the flows that have
177 * experienced packet drops in the recent past.
178 * when the average drop rate exceeds the threshold, the flow is
179 * blocked by the flow-valve. the trapped flow should back off
180 * exponentially to escape from the flow-valve.
181 */
182 #ifdef RED_RANDOM_DROP
183 #error "random-drop can't be used with flow-valve!"
184 #endif
185 #endif /* ALTQ_FLOWVALVE */
186
187 /* red_list keeps all red_queue_t's allocated. */
188 static red_queue_t *red_list = NULL;
189
190 /* default red parameter values */
191 static int default_th_min = TH_MIN;
192 static int default_th_max = TH_MAX;
193 static int default_inv_pmax = INV_P_MAX;
194
195 /* internal function prototypes */
196 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
197 struct altq_pktattr *));
198 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
199 static int red_request __P((struct ifaltq *, int, void *));
200 static void red_purgeq __P((red_queue_t *));
201 static int red_detach __P((red_queue_t *));
202 #ifdef ALTQ_FLOWVALVE
203 static inline struct fve *flowlist_lookup __P((struct flowvalve *,
204 struct altq_pktattr *, struct timeval *));
205 static inline struct fve *flowlist_reclaim __P((struct flowvalve *,
206 struct altq_pktattr *));
207 static inline void flowlist_move_to_head __P((struct flowvalve *,
208 struct fve *));
209 static inline int fv_p2f __P((struct flowvalve *, int));
210 static struct flowvalve *fv_alloc __P((struct red *));
211 static void fv_destroy __P((struct flowvalve *));
212 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
213 struct fve **));
214 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
215 struct fve *));
216 #endif
217
218 /*
219 * red device interface
220 */
221 altqdev_decl(red);
222
223 int
224 redopen(dev, flag, fmt, l)
225 dev_t dev;
226 int flag, fmt;
227 struct lwp *l;
228 {
229 /* everything will be done when the queueing scheme is attached. */
230 return 0;
231 }
232
233 int
234 redclose(dev, flag, fmt, l)
235 dev_t dev;
236 int flag, fmt;
237 struct lwp *l;
238 {
239 red_queue_t *rqp;
240 int err, error = 0;
241
242 while ((rqp = red_list) != NULL) {
243 /* destroy all */
244 err = red_detach(rqp);
245 if (err != 0 && error == 0)
246 error = err;
247 }
248
249 return error;
250 }
251
252 int
253 redioctl(dev, cmd, addr, flag, l)
254 dev_t dev;
255 ioctlcmd_t cmd;
256 caddr_t addr;
257 int flag;
258 struct lwp *l;
259 {
260 red_queue_t *rqp;
261 struct red_interface *ifacep;
262 struct ifnet *ifp;
263 struct proc *p = l->l_proc;
264 int error = 0;
265
266 /* check super-user privilege */
267 switch (cmd) {
268 case RED_GETSTATS:
269 break;
270 default:
271 #if (__FreeBSD_version > 400000)
272 if ((error = suser(p)) != 0)
273 #else
274 if ((error = generic_authorize(p->p_cred,
275 KAUTH_GENERIC_ISSUSER,
276 &p->p_acflag)) != 0)
277 #endif
278 return (error);
279 break;
280 }
281
282 switch (cmd) {
283
284 case RED_ENABLE:
285 ifacep = (struct red_interface *)addr;
286 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
287 error = EBADF;
288 break;
289 }
290 error = altq_enable(rqp->rq_ifq);
291 break;
292
293 case RED_DISABLE:
294 ifacep = (struct red_interface *)addr;
295 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
296 error = EBADF;
297 break;
298 }
299 error = altq_disable(rqp->rq_ifq);
300 break;
301
302 case RED_IF_ATTACH:
303 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
304 if (ifp == NULL) {
305 error = ENXIO;
306 break;
307 }
308
309 /* allocate and initialize red_queue_t */
310 MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
311 if (rqp == NULL) {
312 error = ENOMEM;
313 break;
314 }
315 (void)memset(rqp, 0, sizeof(red_queue_t));
316
317 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
318 M_DEVBUF, M_WAITOK);
319 if (rqp->rq_q == NULL) {
320 FREE(rqp, M_DEVBUF);
321 error = ENOMEM;
322 break;
323 }
324 (void)memset(rqp->rq_q, 0, sizeof(class_queue_t));
325
326 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
327 if (rqp->rq_red == NULL) {
328 FREE(rqp->rq_q, M_DEVBUF);
329 FREE(rqp, M_DEVBUF);
330 error = ENOMEM;
331 break;
332 }
333
334 rqp->rq_ifq = &ifp->if_snd;
335 qtail(rqp->rq_q) = NULL;
336 qlen(rqp->rq_q) = 0;
337 qlimit(rqp->rq_q) = RED_LIMIT;
338 qtype(rqp->rq_q) = Q_RED;
339
340 /*
341 * set RED to this ifnet structure.
342 */
343 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
344 red_enqueue, red_dequeue, red_request,
345 NULL, NULL);
346 if (error) {
347 red_destroy(rqp->rq_red);
348 FREE(rqp->rq_q, M_DEVBUF);
349 FREE(rqp, M_DEVBUF);
350 break;
351 }
352
353 /* add this state to the red list */
354 rqp->rq_next = red_list;
355 red_list = rqp;
356 break;
357
358 case RED_IF_DETACH:
359 ifacep = (struct red_interface *)addr;
360 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
361 error = EBADF;
362 break;
363 }
364 error = red_detach(rqp);
365 break;
366
367 case RED_GETSTATS:
368 do {
369 struct red_stats *q_stats;
370 red_t *rp;
371
372 q_stats = (struct red_stats *)addr;
373 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
374 ALTQT_RED)) == NULL) {
375 error = EBADF;
376 break;
377 }
378
379 q_stats->q_len = qlen(rqp->rq_q);
380 q_stats->q_limit = qlimit(rqp->rq_q);
381
382 rp = rqp->rq_red;
383 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
384 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
385 q_stats->drop_cnt = rp->red_stats.drop_cnt;
386 q_stats->drop_forced = rp->red_stats.drop_forced;
387 q_stats->drop_unforced = rp->red_stats.drop_unforced;
388 q_stats->marked_packets = rp->red_stats.marked_packets;
389
390 q_stats->weight = rp->red_weight;
391 q_stats->inv_pmax = rp->red_inv_pmax;
392 q_stats->th_min = rp->red_thmin;
393 q_stats->th_max = rp->red_thmax;
394
395 #ifdef ALTQ_FLOWVALVE
396 if (rp->red_flowvalve != NULL) {
397 struct flowvalve *fv = rp->red_flowvalve;
398 q_stats->fv_flows = fv->fv_flows;
399 q_stats->fv_pass = fv->fv_stats.pass;
400 q_stats->fv_predrop = fv->fv_stats.predrop;
401 q_stats->fv_alloc = fv->fv_stats.alloc;
402 q_stats->fv_escape = fv->fv_stats.escape;
403 } else {
404 #endif /* ALTQ_FLOWVALVE */
405 q_stats->fv_flows = 0;
406 q_stats->fv_pass = 0;
407 q_stats->fv_predrop = 0;
408 q_stats->fv_alloc = 0;
409 q_stats->fv_escape = 0;
410 #ifdef ALTQ_FLOWVALVE
411 }
412 #endif /* ALTQ_FLOWVALVE */
413 } while (0);
414 break;
415
416 case RED_CONFIG:
417 do {
418 struct red_conf *fc;
419 red_t *new;
420 int s, limit;
421
422 fc = (struct red_conf *)addr;
423 if ((rqp = altq_lookup(fc->iface.red_ifname,
424 ALTQT_RED)) == NULL) {
425 error = EBADF;
426 break;
427 }
428 new = red_alloc(fc->red_weight,
429 fc->red_inv_pmax,
430 fc->red_thmin,
431 fc->red_thmax,
432 fc->red_flags,
433 fc->red_pkttime);
434 if (new == NULL) {
435 error = ENOMEM;
436 break;
437 }
438
439 s = splnet();
440 red_purgeq(rqp);
441 limit = fc->red_limit;
442 if (limit < fc->red_thmax)
443 limit = fc->red_thmax;
444 qlimit(rqp->rq_q) = limit;
445 fc->red_limit = limit; /* write back the new value */
446
447 red_destroy(rqp->rq_red);
448 rqp->rq_red = new;
449
450 splx(s);
451
452 /* write back new values */
453 fc->red_limit = limit;
454 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
455 fc->red_thmin = rqp->rq_red->red_thmin;
456 fc->red_thmax = rqp->rq_red->red_thmax;
457
458 } while (0);
459 break;
460
461 case RED_SETDEFAULTS:
462 do {
463 struct redparams *rp;
464
465 rp = (struct redparams *)addr;
466
467 default_th_min = rp->th_min;
468 default_th_max = rp->th_max;
469 default_inv_pmax = rp->inv_pmax;
470 } while (0);
471 break;
472
473 default:
474 error = EINVAL;
475 break;
476 }
477 return error;
478 }
479
480 static int
481 red_detach(rqp)
482 red_queue_t *rqp;
483 {
484 red_queue_t *tmp;
485 int error = 0;
486
487 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
488 altq_disable(rqp->rq_ifq);
489
490 if ((error = altq_detach(rqp->rq_ifq)))
491 return (error);
492
493 if (red_list == rqp)
494 red_list = rqp->rq_next;
495 else {
496 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
497 if (tmp->rq_next == rqp) {
498 tmp->rq_next = rqp->rq_next;
499 break;
500 }
501 if (tmp == NULL)
502 printf("red_detach: no state found in red_list!\n");
503 }
504
505 red_destroy(rqp->rq_red);
506 FREE(rqp->rq_q, M_DEVBUF);
507 FREE(rqp, M_DEVBUF);
508 return (error);
509 }
510
511 /*
512 * red support routines
513 */
514
515 red_t *
516 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
517 int weight, inv_pmax, th_min, th_max;
518 int flags, pkttime;
519 {
520 red_t *rp;
521 int w, i;
522 int npkts_per_sec;
523
524 MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
525 if (rp == NULL)
526 return (NULL);
527 (void)memset(rp, 0, sizeof(red_t));
528
529 rp->red_avg = 0;
530 rp->red_idle = 1;
531
532 if (weight == 0)
533 rp->red_weight = W_WEIGHT;
534 else
535 rp->red_weight = weight;
536 if (inv_pmax == 0)
537 rp->red_inv_pmax = default_inv_pmax;
538 else
539 rp->red_inv_pmax = inv_pmax;
540 if (th_min == 0)
541 rp->red_thmin = default_th_min;
542 else
543 rp->red_thmin = th_min;
544 if (th_max == 0)
545 rp->red_thmax = default_th_max;
546 else
547 rp->red_thmax = th_max;
548
549 rp->red_flags = flags;
550
551 if (pkttime == 0)
552 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
553 rp->red_pkttime = 800;
554 else
555 rp->red_pkttime = pkttime;
556
557 if (weight == 0) {
558 /* when the link is very slow, adjust red parameters */
559 npkts_per_sec = 1000000 / rp->red_pkttime;
560 if (npkts_per_sec < 50) {
561 /* up to about 400Kbps */
562 rp->red_weight = W_WEIGHT_2;
563 } else if (npkts_per_sec < 300) {
564 /* up to about 2.4Mbps */
565 rp->red_weight = W_WEIGHT_1;
566 }
567 }
568
569 /* calculate wshift. weight must be power of 2 */
570 w = rp->red_weight;
571 for (i = 0; w > 1; i++)
572 w = w >> 1;
573 rp->red_wshift = i;
574 w = 1 << rp->red_wshift;
575 if (w != rp->red_weight) {
576 printf("invalid weight value %d for red! use %d\n",
577 rp->red_weight, w);
578 rp->red_weight = w;
579 }
580
581 /*
582 * thmin_s and thmax_s are scaled versions of th_min and th_max
583 * to be compared with avg.
584 */
585 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
586 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
587
588 /*
589 * precompute probability denominator
590 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
591 */
592 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
593 * rp->red_inv_pmax) << FP_SHIFT;
594
595 /* allocate weight table */
596 rp->red_wtab = wtab_alloc(rp->red_weight);
597
598 microtime(&rp->red_last);
599 #ifdef ALTQ_FLOWVALVE
600 if (flags & REDF_FLOWVALVE)
601 rp->red_flowvalve = fv_alloc(rp);
602 /* if fv_alloc failes, flowvalve is just disabled */
603 #endif
604 return (rp);
605 }
606
607 void
608 red_destroy(rp)
609 red_t *rp;
610 {
611 #ifdef ALTQ_FLOWVALVE
612 if (rp->red_flowvalve != NULL)
613 fv_destroy(rp->red_flowvalve);
614 #endif
615 wtab_destroy(rp->red_wtab);
616 FREE(rp, M_DEVBUF);
617 }
618
619 void
620 red_getstats(rp, sp)
621 red_t *rp;
622 struct redstats *sp;
623 {
624 sp->q_avg = rp->red_avg >> rp->red_wshift;
625 sp->xmit_cnt = rp->red_stats.xmit_cnt;
626 sp->drop_cnt = rp->red_stats.drop_cnt;
627 sp->drop_forced = rp->red_stats.drop_forced;
628 sp->drop_unforced = rp->red_stats.drop_unforced;
629 sp->marked_packets = rp->red_stats.marked_packets;
630 }
631
632 /*
633 * enqueue routine:
634 *
635 * returns: 0 when successfully queued.
636 * ENOBUFS when drop occurs.
637 */
638 static int
639 red_enqueue(ifq, m, pktattr)
640 struct ifaltq *ifq;
641 struct mbuf *m;
642 struct altq_pktattr *pktattr;
643 {
644 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
645
646 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
647 return ENOBUFS;
648 ifq->ifq_len++;
649 return 0;
650 }
651
652 int
653 red_addq(rp, q, m, pktattr)
654 red_t *rp;
655 class_queue_t *q;
656 struct mbuf *m;
657 struct altq_pktattr *pktattr;
658 {
659 int avg, droptype;
660 int n;
661 #ifdef ALTQ_FLOWVALVE
662 struct fve *fve = NULL;
663
664 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
665 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
666 m_freem(m);
667 return (-1);
668 }
669 #endif
670
671 avg = rp->red_avg;
672
673 /*
674 * if we were idle, we pretend that n packets arrived during
675 * the idle period.
676 */
677 if (rp->red_idle) {
678 struct timeval now;
679 int t;
680
681 rp->red_idle = 0;
682 microtime(&now);
683 t = (now.tv_sec - rp->red_last.tv_sec);
684 if (t > 60) {
685 /*
686 * being idle for more than 1 minute, set avg to zero.
687 * this prevents t from overflow.
688 */
689 avg = 0;
690 } else {
691 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
692 n = t / rp->red_pkttime - 1;
693
694 /* the following line does (avg = (1 - Wq)^n * avg) */
695 if (n > 0)
696 avg = (avg >> FP_SHIFT) *
697 pow_w(rp->red_wtab, n);
698 }
699 }
700
701 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
702 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
703 rp->red_avg = avg; /* save the new value */
704
705 /*
706 * red_count keeps a tally of arriving traffic that has not
707 * been dropped.
708 */
709 rp->red_count++;
710
711 /* see if we drop early */
712 droptype = DTYPE_NODROP;
713 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
714 if (avg >= rp->red_thmax_s) {
715 /* avg >= th_max: forced drop */
716 droptype = DTYPE_FORCED;
717 } else if (rp->red_old == 0) {
718 /* first exceeds th_min */
719 rp->red_count = 1;
720 rp->red_old = 1;
721 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
722 rp->red_probd, rp->red_count)) {
723 /* mark or drop by red */
724 if ((rp->red_flags & REDF_ECN) &&
725 mark_ecn(m, pktattr, rp->red_flags)) {
726 /* successfully marked. do not drop. */
727 rp->red_count = 0;
728 #ifdef RED_STATS
729 rp->red_stats.marked_packets++;
730 #endif
731 } else {
732 /* unforced drop by red */
733 droptype = DTYPE_EARLY;
734 }
735 }
736 } else {
737 /* avg < th_min */
738 rp->red_old = 0;
739 }
740
741 /*
742 * if the queue length hits the hard limit, it's a forced drop.
743 */
744 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
745 droptype = DTYPE_FORCED;
746
747 #ifdef RED_RANDOM_DROP
748 /* if successful or forced drop, enqueue this packet. */
749 if (droptype != DTYPE_EARLY)
750 _addq(q, m);
751 #else
752 /* if successful, enqueue this packet. */
753 if (droptype == DTYPE_NODROP)
754 _addq(q, m);
755 #endif
756 if (droptype != DTYPE_NODROP) {
757 if (droptype == DTYPE_EARLY) {
758 /* drop the incoming packet */
759 #ifdef RED_STATS
760 rp->red_stats.drop_unforced++;
761 #endif
762 } else {
763 /* forced drop, select a victim packet in the queue. */
764 #ifdef RED_RANDOM_DROP
765 m = _getq_random(q);
766 #endif
767 #ifdef RED_STATS
768 rp->red_stats.drop_forced++;
769 #endif
770 }
771 #ifdef RED_STATS
772 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
773 #endif
774 rp->red_count = 0;
775 #ifdef ALTQ_FLOWVALVE
776 if (rp->red_flowvalve != NULL)
777 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
778 #endif
779 m_freem(m);
780 return (-1);
781 }
782 /* successfully queued */
783 #ifdef RED_STATS
784 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
785 #endif
786 return (0);
787 }
788
789 /*
790 * early-drop probability is calculated as follows:
791 * prob = p_max * (avg - th_min) / (th_max - th_min)
792 * prob_a = prob / (2 - count*prob)
793 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
794 * here prob_a increases as successive undrop count increases.
795 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
796 * becomes 1 when (count >= (2 / prob))).
797 */
798 int
799 drop_early(fp_len, fp_probd, count)
800 int fp_len; /* (avg - TH_MIN) in fixed-point */
801 int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
802 int count; /* how many successive undropped packets */
803 {
804 int d; /* denominator of drop-probability */
805
806 d = fp_probd - count * fp_len;
807 if (d <= 0)
808 /* count exceeds the hard limit: drop or mark */
809 return (1);
810
811 /*
812 * now the range of d is [1..600] in fixed-point. (when
813 * th_max-th_min=10 and p_max=1/30)
814 * drop probability = (avg - TH_MIN) / d
815 */
816
817 if ((random() % d) < fp_len) {
818 /* drop or mark */
819 return (1);
820 }
821 /* no drop/mark */
822 return (0);
823 }
824
825 /*
826 * try to mark CE bit to the packet.
827 * returns 1 if successfully marked, 0 otherwise.
828 */
829 int
830 mark_ecn(m, pktattr, flags)
831 struct mbuf *m;
832 struct altq_pktattr *pktattr;
833 int flags;
834 {
835 struct mbuf *m0;
836
837 if (pktattr == NULL ||
838 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
839 return (0);
840
841 /* verify that pattr_hdr is within the mbuf data */
842 for (m0 = m; m0 != NULL; m0 = m0->m_next)
843 if ((pktattr->pattr_hdr >= m0->m_data) &&
844 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
845 break;
846 if (m0 == NULL) {
847 /* ick, pattr_hdr is stale */
848 pktattr->pattr_af = AF_UNSPEC;
849 return (0);
850 }
851
852 switch (pktattr->pattr_af) {
853 case AF_INET:
854 if (flags & REDF_ECN4) {
855 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
856 u_int8_t otos;
857 int sum;
858
859 if (ip->ip_v != 4)
860 return (0); /* version mismatch! */
861
862 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
863 return (0); /* not-ECT */
864 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
865 return (1); /* already marked */
866
867 /*
868 * ecn-capable but not marked,
869 * mark CE and update checksum
870 */
871 otos = ip->ip_tos;
872 ip->ip_tos |= IPTOS_ECN_CE;
873 /*
874 * update checksum (from RFC1624)
875 * HC' = ~(~HC + ~m + m')
876 */
877 sum = ~ntohs(ip->ip_sum) & 0xffff;
878 sum += (~otos & 0xffff) + ip->ip_tos;
879 sum = (sum >> 16) + (sum & 0xffff);
880 sum += (sum >> 16); /* add carry */
881 ip->ip_sum = htons(~sum & 0xffff);
882 return (1);
883 }
884 break;
885 #ifdef INET6
886 case AF_INET6:
887 if (flags & REDF_ECN6) {
888 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
889 u_int32_t flowlabel;
890
891 flowlabel = ntohl(ip6->ip6_flow);
892 if ((flowlabel >> 28) != 6)
893 return (0); /* version mismatch! */
894 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
895 (IPTOS_ECN_NOTECT << 20))
896 return (0); /* not-ECT */
897 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
898 (IPTOS_ECN_CE << 20))
899 return (1); /* already marked */
900 /*
901 * ecn-capable but not marked, mark CE
902 */
903 flowlabel |= (IPTOS_ECN_CE << 20);
904 ip6->ip6_flow = htonl(flowlabel);
905 return (1);
906 }
907 break;
908 #endif /* INET6 */
909 }
910
911 /* not marked */
912 return (0);
913 }
914
915 /*
916 * dequeue routine:
917 * must be called in splnet.
918 *
919 * returns: mbuf dequeued.
920 * NULL when no packet is available in the queue.
921 */
922
923 static struct mbuf *
924 red_dequeue(ifq, op)
925 struct ifaltq *ifq;
926 int op;
927 {
928 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
929 struct mbuf *m;
930
931 if (op == ALTDQ_POLL)
932 return qhead(rqp->rq_q);
933
934 /* op == ALTDQ_REMOVE */
935 m = red_getq(rqp->rq_red, rqp->rq_q);
936 if (m != NULL)
937 ifq->ifq_len--;
938 return (m);
939 }
940
941 struct mbuf *
942 red_getq(rp, q)
943 red_t *rp;
944 class_queue_t *q;
945 {
946 struct mbuf *m;
947
948 if ((m = _getq(q)) == NULL) {
949 if (rp->red_idle == 0) {
950 rp->red_idle = 1;
951 microtime(&rp->red_last);
952 }
953 return NULL;
954 }
955
956 rp->red_idle = 0;
957 return (m);
958 }
959
960 static int
961 red_request(ifq, req, arg)
962 struct ifaltq *ifq;
963 int req;
964 void *arg;
965 {
966 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
967
968 switch (req) {
969 case ALTRQ_PURGE:
970 red_purgeq(rqp);
971 break;
972 }
973 return (0);
974 }
975
976 static void
977 red_purgeq(rqp)
978 red_queue_t *rqp;
979 {
980 _flushq(rqp->rq_q);
981 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
982 rqp->rq_ifq->ifq_len = 0;
983 }
984
985
986 /*
987 * helper routine to calibrate avg during idle.
988 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
989 * here Wq = 1/weight and the code assumes Wq is close to zero.
990 *
991 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
992 */
993 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
994
995 struct wtab *
996 wtab_alloc(weight)
997 int weight;
998 {
999 struct wtab *w;
1000 int i;
1001
1002 for (w = wtab_list; w != NULL; w = w->w_next)
1003 if (w->w_weight == weight) {
1004 w->w_refcount++;
1005 return (w);
1006 }
1007
1008 MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
1009 if (w == NULL)
1010 panic("wtab_alloc: malloc failed!");
1011 (void)memset(w, 0, sizeof(struct wtab));
1012 w->w_weight = weight;
1013 w->w_refcount = 1;
1014 w->w_next = wtab_list;
1015 wtab_list = w;
1016
1017 /* initialize the weight table */
1018 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
1019 for (i = 1; i < 32; i++) {
1020 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
1021 if (w->w_tab[i] == 0 && w->w_param_max == 0)
1022 w->w_param_max = 1 << i;
1023 }
1024
1025 return (w);
1026 }
1027
1028 int
1029 wtab_destroy(w)
1030 struct wtab *w;
1031 {
1032 struct wtab *prev;
1033
1034 if (--w->w_refcount > 0)
1035 return (0);
1036
1037 if (wtab_list == w)
1038 wtab_list = w->w_next;
1039 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
1040 if (prev->w_next == w) {
1041 prev->w_next = w->w_next;
1042 break;
1043 }
1044
1045 FREE(w, M_DEVBUF);
1046 return (0);
1047 }
1048
1049 int32_t
1050 pow_w(w, n)
1051 struct wtab *w;
1052 int n;
1053 {
1054 int i, bit;
1055 int32_t val;
1056
1057 if (n >= w->w_param_max)
1058 return (0);
1059
1060 val = 1 << FP_SHIFT;
1061 if (n <= 0)
1062 return (val);
1063
1064 bit = 1;
1065 i = 0;
1066 while (n) {
1067 if (n & bit) {
1068 val = (val * w->w_tab[i]) >> FP_SHIFT;
1069 n &= ~bit;
1070 }
1071 i++;
1072 bit <<= 1;
1073 }
1074 return (val);
1075 }
1076
1077 #ifdef ALTQ_FLOWVALVE
1078
1079 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1080 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1081 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1082 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1083 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1084 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1085
1086 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1087 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1088
1089 #define FV_N 10 /* update fve_f every FV_N packets */
1090
1091 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1092 #define FV_TTHRESH 3 /* time threshold to delete fve */
1093 #define FV_ALPHA 5 /* extra packet count */
1094
1095 #if (__FreeBSD_version > 300000)
1096 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1097 #else
1098 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1099 #endif
1100
1101 /*
1102 * Brtt table: 127 entry table to convert drop rate (p) to
1103 * the corresponding bandwidth fraction (f)
1104 * the following equation is implemented to use scaled values,
1105 * fve_p and fve_f, in the fixed point format.
1106 *
1107 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1108 * f = Brtt(p) / (max_th + alpha)
1109 */
1110 #define BRTT_SIZE 128
1111 #define BRTT_SHIFT 12
1112 #define BRTT_MASK 0x0007f000
1113 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1114
1115 const int brtt_tab[BRTT_SIZE] = {
1116 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1117 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1118 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1119 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1120 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1121 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1122 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1123 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1124 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1125 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1126 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1127 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1128 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1129 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1130 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1131 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1132 };
1133
1134 static inline struct fve *
1135 flowlist_lookup(fv, pktattr, now)
1136 struct flowvalve *fv;
1137 struct altq_pktattr *pktattr;
1138 struct timeval *now;
1139 {
1140 struct fve *fve;
1141 int flows;
1142 struct ip *ip;
1143 #ifdef INET6
1144 struct ip6_hdr *ip6;
1145 #endif
1146 struct timeval tthresh;
1147
1148 if (pktattr == NULL)
1149 return (NULL);
1150
1151 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1152 flows = 0;
1153 /*
1154 * search the flow list
1155 */
1156 switch (pktattr->pattr_af) {
1157 case AF_INET:
1158 ip = (struct ip *)pktattr->pattr_hdr;
1159 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1160 if (fve->fve_lastdrop.tv_sec == 0)
1161 break;
1162 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1163 fve->fve_lastdrop.tv_sec = 0;
1164 break;
1165 }
1166 if (fve->fve_flow.flow_af == AF_INET &&
1167 fve->fve_flow.flow_ip.ip_src.s_addr ==
1168 ip->ip_src.s_addr &&
1169 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1170 ip->ip_dst.s_addr)
1171 return (fve);
1172 flows++;
1173 }
1174 break;
1175 #ifdef INET6
1176 case AF_INET6:
1177 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1178 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1179 if (fve->fve_lastdrop.tv_sec == 0)
1180 break;
1181 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1182 fve->fve_lastdrop.tv_sec = 0;
1183 break;
1184 }
1185 if (fve->fve_flow.flow_af == AF_INET6 &&
1186 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1187 &ip6->ip6_src) &&
1188 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1189 &ip6->ip6_dst))
1190 return (fve);
1191 flows++;
1192 }
1193 break;
1194 #endif /* INET6 */
1195
1196 default:
1197 /* unknown protocol. no drop. */
1198 return (NULL);
1199 }
1200 fv->fv_flows = flows; /* save the number of active fve's */
1201 return (NULL);
1202 }
1203
1204 static inline struct fve *
1205 flowlist_reclaim(fv, pktattr)
1206 struct flowvalve *fv;
1207 struct altq_pktattr *pktattr;
1208 {
1209 struct fve *fve;
1210 struct ip *ip;
1211 #ifdef INET6
1212 struct ip6_hdr *ip6;
1213 #endif
1214
1215 /*
1216 * get an entry from the tail of the LRU list.
1217 */
1218 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1219
1220 switch (pktattr->pattr_af) {
1221 case AF_INET:
1222 ip = (struct ip *)pktattr->pattr_hdr;
1223 fve->fve_flow.flow_af = AF_INET;
1224 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1225 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1226 break;
1227 #ifdef INET6
1228 case AF_INET6:
1229 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1230 fve->fve_flow.flow_af = AF_INET6;
1231 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1232 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1233 break;
1234 #endif
1235 }
1236
1237 fve->fve_state = Green;
1238 fve->fve_p = 0.0;
1239 fve->fve_f = 0.0;
1240 fve->fve_ifseq = fv->fv_ifseq - 1;
1241 fve->fve_count = 0;
1242
1243 fv->fv_flows++;
1244 #ifdef FV_STATS
1245 fv->fv_stats.alloc++;
1246 #endif
1247 return (fve);
1248 }
1249
1250 static inline void
1251 flowlist_move_to_head(fv, fve)
1252 struct flowvalve *fv;
1253 struct fve *fve;
1254 {
1255 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1256 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1257 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1258 }
1259 }
1260
1261 /*
1262 * allocate flowvalve structure
1263 */
1264 static struct flowvalve *
1265 fv_alloc(rp)
1266 struct red *rp;
1267 {
1268 struct flowvalve *fv;
1269 struct fve *fve;
1270 int i, num;
1271
1272 num = FV_FLOWLISTSIZE;
1273 MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
1274 M_DEVBUF, M_WAITOK);
1275 if (fv == NULL)
1276 return (NULL);
1277 (void)memset(fv, 0, sizeof(struct flowvalve));
1278
1279 MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
1280 M_DEVBUF, M_WAITOK);
1281 if (fv->fv_fves == NULL) {
1282 FREE(fv, M_DEVBUF);
1283 return (NULL);
1284 }
1285 (void)memset(fv->fv_fves, 0, sizeof(struct fve) * num);
1286
1287 fv->fv_flows = 0;
1288 TAILQ_INIT(&fv->fv_flowlist);
1289 for (i = 0; i < num; i++) {
1290 fve = &fv->fv_fves[i];
1291 fve->fve_lastdrop.tv_sec = 0;
1292 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1293 }
1294
1295 /* initialize drop rate threshold in scaled fixed-point */
1296 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1297
1298 /* initialize drop rate to fraction table */
1299 MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
1300 M_DEVBUF, M_WAITOK);
1301 if (fv->fv_p2ftab == NULL) {
1302 FREE(fv->fv_fves, M_DEVBUF);
1303 FREE(fv, M_DEVBUF);
1304 return (NULL);
1305 }
1306 /*
1307 * create the p2f table.
1308 * (shift is used to keep the precision)
1309 */
1310 for (i = 1; i < BRTT_SIZE; i++) {
1311 int f;
1312
1313 f = brtt_tab[i] << 8;
1314 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1315 }
1316
1317 return (fv);
1318 }
1319
1320 static void fv_destroy(fv)
1321 struct flowvalve *fv;
1322 {
1323 FREE(fv->fv_p2ftab, M_DEVBUF);
1324 FREE(fv->fv_fves, M_DEVBUF);
1325 FREE(fv, M_DEVBUF);
1326 }
1327
1328 static inline int
1329 fv_p2f(fv, p)
1330 struct flowvalve *fv;
1331 int p;
1332 {
1333 int val, f;
1334
1335 if (p >= BRTT_PMAX)
1336 f = fv->fv_p2ftab[BRTT_SIZE-1];
1337 else if ((val = (p & BRTT_MASK)))
1338 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1339 else
1340 f = fv->fv_p2ftab[1];
1341 return (f);
1342 }
1343
1344 /*
1345 * check if an arriving packet should be pre-dropped.
1346 * called from red_addq() when a packet arrives.
1347 * returns 1 when the packet should be pre-dropped.
1348 * should be called in splnet.
1349 */
1350 static int
1351 fv_checkflow(fv, pktattr, fcache)
1352 struct flowvalve *fv;
1353 struct altq_pktattr *pktattr;
1354 struct fve **fcache;
1355 {
1356 struct fve *fve;
1357 struct timeval now;
1358
1359 fv->fv_ifseq++;
1360 FV_TIMESTAMP(&now);
1361
1362 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1363 /* no matching entry in the flowlist */
1364 return (0);
1365
1366 *fcache = fve;
1367
1368 /* update fraction f for every FV_N packets */
1369 if (++fve->fve_count == FV_N) {
1370 /*
1371 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1372 */
1373 fve->fve_f =
1374 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1375 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1376 fve->fve_ifseq = fv->fv_ifseq;
1377 fve->fve_count = 0;
1378 }
1379
1380 /*
1381 * overpumping test
1382 */
1383 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1384 int fthresh;
1385
1386 /* calculate a threshold */
1387 fthresh = fv_p2f(fv, fve->fve_p);
1388 if (fve->fve_f > fthresh)
1389 fve->fve_state = Red;
1390 }
1391
1392 if (fve->fve_state == Red) {
1393 /*
1394 * backoff test
1395 */
1396 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1397 /* no drop for at least FV_BACKOFFTHRESH sec */
1398 fve->fve_p = 0;
1399 fve->fve_state = Green;
1400 #ifdef FV_STATS
1401 fv->fv_stats.escape++;
1402 #endif
1403 } else {
1404 /* block this flow */
1405 flowlist_move_to_head(fv, fve);
1406 fve->fve_lastdrop = now;
1407 #ifdef FV_STATS
1408 fv->fv_stats.predrop++;
1409 #endif
1410 return (1);
1411 }
1412 }
1413
1414 /*
1415 * p = (1 - Wp) * p
1416 */
1417 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1418 if (fve->fve_p < 0)
1419 fve->fve_p = 0;
1420 #ifdef FV_STATS
1421 fv->fv_stats.pass++;
1422 #endif
1423 return (0);
1424 }
1425
1426 /*
1427 * called from red_addq when a packet is dropped by red.
1428 * should be called in splnet.
1429 */
1430 static void fv_dropbyred(fv, pktattr, fcache)
1431 struct flowvalve *fv;
1432 struct altq_pktattr *pktattr;
1433 struct fve *fcache;
1434 {
1435 struct fve *fve;
1436 struct timeval now;
1437
1438 if (pktattr == NULL)
1439 return;
1440 FV_TIMESTAMP(&now);
1441
1442 if (fcache != NULL)
1443 /* the fve of this packet is already cached */
1444 fve = fcache;
1445 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1446 fve = flowlist_reclaim(fv, pktattr);
1447
1448 flowlist_move_to_head(fv, fve);
1449
1450 /*
1451 * update p: the following line cancels the update
1452 * in fv_checkflow() and calculate
1453 * p = Wp + (1 - Wp) * p
1454 */
1455 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1456
1457 fve->fve_lastdrop = now;
1458 }
1459
1460 #endif /* ALTQ_FLOWVALVE */
1461
1462 #ifdef KLD_MODULE
1463
1464 static struct altqsw red_sw =
1465 {"red", redopen, redclose, redioctl};
1466
1467 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1468
1469 #endif /* KLD_MODULE */
1470
1471 #endif /* ALTQ_RED */
1472