altq_red.c revision 1.22 1 /* $NetBSD: altq_red.c,v 1.22 2006/10/13 09:57:28 peter Exp $ */
2 /* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.22 2006/10/13 09:57:28 peter Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_altq.h"
68 #include "opt_inet.h"
69 #include "pf.h"
70 #endif
71
72 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
73
74 #include <sys/param.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/systm.h>
79 #include <sys/errno.h>
80 #include <sys/kauth.h>
81 #if 1 /* ALTQ3_COMPAT */
82 #include <sys/sockio.h>
83 #include <sys/proc.h>
84 #include <sys/kernel.h>
85 #include <sys/kauth.h>
86 #ifdef ALTQ_FLOWVALVE
87 #include <sys/queue.h>
88 #include <sys/time.h>
89 #endif
90 #endif /* ALTQ3_COMPAT */
91
92 #include <net/if.h>
93
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #ifdef INET6
98 #include <netinet/ip6.h>
99 #endif
100
101 #if NPF > 0
102 #include <net/pfvar.h>
103 #endif
104 #include <altq/altq.h>
105 #include <altq/altq_red.h>
106 #ifdef ALTQ3_COMPAT
107 #include <altq/altq_conf.h>
108 #ifdef ALTQ_FLOWVALVE
109 #include <altq/altq_flowvalve.h>
110 #endif
111 #endif
112
113 /*
114 * ALTQ/RED (Random Early Detection) implementation using 32-bit
115 * fixed-point calculation.
116 *
117 * written by kjc using the ns code as a reference.
118 * you can learn more about red and ns from Sally's home page at
119 * http://www-nrg.ee.lbl.gov/floyd/
120 *
121 * most of the red parameter values are fixed in this implementation
122 * to prevent fixed-point overflow/underflow.
123 * if you change the parameters, watch out for overflow/underflow!
124 *
125 * the parameters used are recommended values by Sally.
126 * the corresponding ns config looks:
127 * q_weight=0.00195
128 * minthresh=5 maxthresh=15 queue-size=60
129 * linterm=30
130 * dropmech=drop-tail
131 * bytes=false (can't be handled by 32-bit fixed-point)
132 * doubleq=false dqthresh=false
133 * wait=true
134 */
135 /*
136 * alternative red parameters for a slow link.
137 *
138 * assume the queue length becomes from zero to L and keeps L, it takes
139 * N packets for q_avg to reach 63% of L.
140 * when q_weight is 0.002, N is about 500 packets.
141 * for a slow link like dial-up, 500 packets takes more than 1 minute!
142 * when q_weight is 0.008, N is about 127 packets.
143 * when q_weight is 0.016, N is about 63 packets.
144 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
145 * are allowed for 0.016.
146 * see Sally's paper for more details.
147 */
148 /* normal red parameters */
149 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
150 /* q_weight = 0.00195 */
151
152 /* red parameters for a slow link */
153 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
154 /* q_weight = 0.0078125 */
155
156 /* red parameters for a very slow link (e.g., dialup) */
157 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
158 /* q_weight = 0.015625 */
159
160 /* fixed-point uses 12-bit decimal places */
161 #define FP_SHIFT 12 /* fixed-point shift */
162
163 /* red parameters for drop probability */
164 #define INV_P_MAX 10 /* inverse of max drop probability */
165 #define TH_MIN 5 /* min threshold */
166 #define TH_MAX 15 /* max threshold */
167
168 #define RED_LIMIT 60 /* default max queue lenght */
169 #define RED_STATS /* collect statistics */
170
171 /*
172 * our default policy for forced-drop is drop-tail.
173 * (in altq-1.1.2 or earlier, the default was random-drop.
174 * but it makes more sense to punish the cause of the surge.)
175 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
176 */
177
178 #ifdef ALTQ3_COMPAT
179 #ifdef ALTQ_FLOWVALVE
180 /*
181 * flow-valve is an extention to protect red from unresponsive flows
182 * and to promote end-to-end congestion control.
183 * flow-valve observes the average drop rates of the flows that have
184 * experienced packet drops in the recent past.
185 * when the average drop rate exceeds the threshold, the flow is
186 * blocked by the flow-valve. the trapped flow should back off
187 * exponentially to escape from the flow-valve.
188 */
189 #ifdef RED_RANDOM_DROP
190 #error "random-drop can't be used with flow-valve!"
191 #endif
192 #endif /* ALTQ_FLOWVALVE */
193
194 /* red_list keeps all red_queue_t's allocated. */
195 static red_queue_t *red_list = NULL;
196
197 #endif /* ALTQ3_COMPAT */
198
199 /* default red parameter values */
200 static int default_th_min = TH_MIN;
201 static int default_th_max = TH_MAX;
202 static int default_inv_pmax = INV_P_MAX;
203
204 #ifdef ALTQ3_COMPAT
205 /* internal function prototypes */
206 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
207 static struct mbuf *red_dequeue(struct ifaltq *, int);
208 static int red_request(struct ifaltq *, int, void *);
209 static void red_purgeq(red_queue_t *);
210 static int red_detach(red_queue_t *);
211 #ifdef ALTQ_FLOWVALVE
212 static inline struct fve *flowlist_lookup(struct flowvalve *,
213 struct altq_pktattr *, struct timeval *);
214 static inline struct fve *flowlist_reclaim(struct flowvalve *,
215 struct altq_pktattr *);
216 static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
217 static inline int fv_p2f(struct flowvalve *, int);
218 static struct flowvalve *fv_alloc(struct red *);
219 static void fv_destroy(struct flowvalve *);
220 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
221 struct fve **);
222 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
223 struct fve *);
224 #endif
225 #endif /* ALTQ3_COMPAT */
226
227 /*
228 * red support routines
229 */
230 red_t *
231 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
232 int pkttime)
233 {
234 red_t *rp;
235 int w, i;
236 int npkts_per_sec;
237
238 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
239 if (rp == NULL)
240 return (NULL);
241
242 rp->red_avg = 0;
243 rp->red_idle = 1;
244
245 if (weight == 0)
246 rp->red_weight = W_WEIGHT;
247 else
248 rp->red_weight = weight;
249 if (inv_pmax == 0)
250 rp->red_inv_pmax = default_inv_pmax;
251 else
252 rp->red_inv_pmax = inv_pmax;
253 if (th_min == 0)
254 rp->red_thmin = default_th_min;
255 else
256 rp->red_thmin = th_min;
257 if (th_max == 0)
258 rp->red_thmax = default_th_max;
259 else
260 rp->red_thmax = th_max;
261
262 rp->red_flags = flags;
263
264 if (pkttime == 0)
265 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
266 rp->red_pkttime = 800;
267 else
268 rp->red_pkttime = pkttime;
269
270 if (weight == 0) {
271 /* when the link is very slow, adjust red parameters */
272 npkts_per_sec = 1000000 / rp->red_pkttime;
273 if (npkts_per_sec < 50) {
274 /* up to about 400Kbps */
275 rp->red_weight = W_WEIGHT_2;
276 } else if (npkts_per_sec < 300) {
277 /* up to about 2.4Mbps */
278 rp->red_weight = W_WEIGHT_1;
279 }
280 }
281
282 /* calculate wshift. weight must be power of 2 */
283 w = rp->red_weight;
284 for (i = 0; w > 1; i++)
285 w = w >> 1;
286 rp->red_wshift = i;
287 w = 1 << rp->red_wshift;
288 if (w != rp->red_weight) {
289 printf("invalid weight value %d for red! use %d\n",
290 rp->red_weight, w);
291 rp->red_weight = w;
292 }
293
294 /*
295 * thmin_s and thmax_s are scaled versions of th_min and th_max
296 * to be compared with avg.
297 */
298 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
299 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
300
301 /*
302 * precompute probability denominator
303 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
304 */
305 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
306 * rp->red_inv_pmax) << FP_SHIFT;
307
308 /* allocate weight table */
309 rp->red_wtab = wtab_alloc(rp->red_weight);
310
311 microtime(&rp->red_last);
312 #ifdef ALTQ3_COMPAT
313 #ifdef ALTQ_FLOWVALVE
314 if (flags & REDF_FLOWVALVE)
315 rp->red_flowvalve = fv_alloc(rp);
316 /* if fv_alloc failes, flowvalve is just disabled */
317 #endif
318 #endif /* ALTQ3_COMPAT */
319 return (rp);
320 }
321
322 void
323 red_destroy(red_t *rp)
324 {
325 #ifdef ALTQ3_COMPAT
326 #ifdef ALTQ_FLOWVALVE
327 if (rp->red_flowvalve != NULL)
328 fv_destroy(rp->red_flowvalve);
329 #endif
330 #endif /* ALTQ3_COMPAT */
331 wtab_destroy(rp->red_wtab);
332 free(rp, M_DEVBUF);
333 }
334
335 void
336 red_getstats(red_t *rp, struct redstats *sp)
337 {
338 sp->q_avg = rp->red_avg >> rp->red_wshift;
339 sp->xmit_cnt = rp->red_stats.xmit_cnt;
340 sp->drop_cnt = rp->red_stats.drop_cnt;
341 sp->drop_forced = rp->red_stats.drop_forced;
342 sp->drop_unforced = rp->red_stats.drop_unforced;
343 sp->marked_packets = rp->red_stats.marked_packets;
344 }
345
346 int
347 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
348 struct altq_pktattr *pktattr)
349 {
350 int avg, droptype;
351 int n;
352 #ifdef ALTQ3_COMPAT
353 #ifdef ALTQ_FLOWVALVE
354 struct fve *fve = NULL;
355
356 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
357 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
358 m_freem(m);
359 return (-1);
360 }
361 #endif
362 #endif /* ALTQ3_COMPAT */
363
364 avg = rp->red_avg;
365
366 /*
367 * if we were idle, we pretend that n packets arrived during
368 * the idle period.
369 */
370 if (rp->red_idle) {
371 struct timeval now;
372 int t;
373
374 rp->red_idle = 0;
375 microtime(&now);
376 t = (now.tv_sec - rp->red_last.tv_sec);
377 if (t > 60) {
378 /*
379 * being idle for more than 1 minute, set avg to zero.
380 * this prevents t from overflow.
381 */
382 avg = 0;
383 } else {
384 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
385 n = t / rp->red_pkttime - 1;
386
387 /* the following line does (avg = (1 - Wq)^n * avg) */
388 if (n > 0)
389 avg = (avg >> FP_SHIFT) *
390 pow_w(rp->red_wtab, n);
391 }
392 }
393
394 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
395 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
396 rp->red_avg = avg; /* save the new value */
397
398 /*
399 * red_count keeps a tally of arriving traffic that has not
400 * been dropped.
401 */
402 rp->red_count++;
403
404 /* see if we drop early */
405 droptype = DTYPE_NODROP;
406 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
407 if (avg >= rp->red_thmax_s) {
408 /* avg >= th_max: forced drop */
409 droptype = DTYPE_FORCED;
410 } else if (rp->red_old == 0) {
411 /* first exceeds th_min */
412 rp->red_count = 1;
413 rp->red_old = 1;
414 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
415 rp->red_probd, rp->red_count)) {
416 /* mark or drop by red */
417 if ((rp->red_flags & REDF_ECN) &&
418 mark_ecn(m, pktattr, rp->red_flags)) {
419 /* successfully marked. do not drop. */
420 rp->red_count = 0;
421 #ifdef RED_STATS
422 rp->red_stats.marked_packets++;
423 #endif
424 } else {
425 /* unforced drop by red */
426 droptype = DTYPE_EARLY;
427 }
428 }
429 } else {
430 /* avg < th_min */
431 rp->red_old = 0;
432 }
433
434 /*
435 * if the queue length hits the hard limit, it's a forced drop.
436 */
437 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
438 droptype = DTYPE_FORCED;
439
440 #ifdef RED_RANDOM_DROP
441 /* if successful or forced drop, enqueue this packet. */
442 if (droptype != DTYPE_EARLY)
443 _addq(q, m);
444 #else
445 /* if successful, enqueue this packet. */
446 if (droptype == DTYPE_NODROP)
447 _addq(q, m);
448 #endif
449 if (droptype != DTYPE_NODROP) {
450 if (droptype == DTYPE_EARLY) {
451 /* drop the incoming packet */
452 #ifdef RED_STATS
453 rp->red_stats.drop_unforced++;
454 #endif
455 } else {
456 /* forced drop, select a victim packet in the queue. */
457 #ifdef RED_RANDOM_DROP
458 m = _getq_random(q);
459 #endif
460 #ifdef RED_STATS
461 rp->red_stats.drop_forced++;
462 #endif
463 }
464 #ifdef RED_STATS
465 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
466 #endif
467 rp->red_count = 0;
468 #ifdef ALTQ3_COMPAT
469 #ifdef ALTQ_FLOWVALVE
470 if (rp->red_flowvalve != NULL)
471 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
472 #endif
473 #endif /* ALTQ3_COMPAT */
474 m_freem(m);
475 return (-1);
476 }
477 /* successfully queued */
478 #ifdef RED_STATS
479 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
480 #endif
481 return (0);
482 }
483
484 /*
485 * early-drop probability is calculated as follows:
486 * prob = p_max * (avg - th_min) / (th_max - th_min)
487 * prob_a = prob / (2 - count*prob)
488 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
489 * here prob_a increases as successive undrop count increases.
490 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
491 * becomes 1 when (count >= (2 / prob))).
492 */
493 int
494 drop_early(int fp_len, int fp_probd, int count)
495 {
496 int d; /* denominator of drop-probability */
497
498 d = fp_probd - count * fp_len;
499 if (d <= 0)
500 /* count exceeds the hard limit: drop or mark */
501 return (1);
502
503 /*
504 * now the range of d is [1..600] in fixed-point. (when
505 * th_max-th_min=10 and p_max=1/30)
506 * drop probability = (avg - TH_MIN) / d
507 */
508
509 if ((arc4random() % d) < fp_len) {
510 /* drop or mark */
511 return (1);
512 }
513 /* no drop/mark */
514 return (0);
515 }
516
517 /*
518 * try to mark CE bit to the packet.
519 * returns 1 if successfully marked, 0 otherwise.
520 */
521 int
522 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
523 {
524 struct mbuf *m0;
525 struct m_tag *t;
526 struct altq_tag *at;
527 void *hdr;
528 int af;
529
530 t = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
531 if (t != NULL) {
532 at = (struct altq_tag *)(t + 1);
533 if (at == NULL)
534 return (0);
535 af = at->af;
536 hdr = at->hdr;
537 #ifdef ALTQ3_COMPAT
538 } else if (pktattr != NULL) {
539 af = pktattr->pattr_af;
540 hdr = pktattr->pattr_hdr;
541 #endif /* ALTQ3_COMPAT */
542 } else
543 return (0);
544
545 if (af != AF_INET && af != AF_INET6)
546 return (0);
547
548 /* verify that pattr_hdr is within the mbuf data */
549 for (m0 = m; m0 != NULL; m0 = m0->m_next)
550 if (((caddr_t)hdr >= m0->m_data) &&
551 ((caddr_t)hdr < m0->m_data + m0->m_len))
552 break;
553 if (m0 == NULL) {
554 /* ick, tag info is stale */
555 return (0);
556 }
557
558 switch (af) {
559 case AF_INET:
560 if (flags & REDF_ECN4) {
561 struct ip *ip = hdr;
562 u_int8_t otos;
563 int sum;
564
565 if (ip->ip_v != 4)
566 return (0); /* version mismatch! */
567
568 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
569 return (0); /* not-ECT */
570 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
571 return (1); /* already marked */
572
573 /*
574 * ecn-capable but not marked,
575 * mark CE and update checksum
576 */
577 otos = ip->ip_tos;
578 ip->ip_tos |= IPTOS_ECN_CE;
579 /*
580 * update checksum (from RFC1624)
581 * HC' = ~(~HC + ~m + m')
582 */
583 sum = ~ntohs(ip->ip_sum) & 0xffff;
584 sum += (~otos & 0xffff) + ip->ip_tos;
585 sum = (sum >> 16) + (sum & 0xffff);
586 sum += (sum >> 16); /* add carry */
587 ip->ip_sum = htons(~sum & 0xffff);
588 return (1);
589 }
590 break;
591 #ifdef INET6
592 case AF_INET6:
593 if (flags & REDF_ECN6) {
594 struct ip6_hdr *ip6 = hdr;
595 u_int32_t flowlabel;
596
597 flowlabel = ntohl(ip6->ip6_flow);
598 if ((flowlabel >> 28) != 6)
599 return (0); /* version mismatch! */
600 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
601 (IPTOS_ECN_NOTECT << 20))
602 return (0); /* not-ECT */
603 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
604 (IPTOS_ECN_CE << 20))
605 return (1); /* already marked */
606 /*
607 * ecn-capable but not marked, mark CE
608 */
609 flowlabel |= (IPTOS_ECN_CE << 20);
610 ip6->ip6_flow = htonl(flowlabel);
611 return (1);
612 }
613 break;
614 #endif /* INET6 */
615 }
616
617 /* not marked */
618 return (0);
619 }
620
621 struct mbuf *
622 red_getq(red_t *rp, class_queue_t *q)
623 {
624 struct mbuf *m;
625
626 if ((m = _getq(q)) == NULL) {
627 if (rp->red_idle == 0) {
628 rp->red_idle = 1;
629 microtime(&rp->red_last);
630 }
631 return NULL;
632 }
633
634 rp->red_idle = 0;
635 return (m);
636 }
637
638 /*
639 * helper routine to calibrate avg during idle.
640 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
641 * here Wq = 1/weight and the code assumes Wq is close to zero.
642 *
643 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
644 */
645 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
646
647 struct wtab *
648 wtab_alloc(int weight)
649 {
650 struct wtab *w;
651 int i;
652
653 for (w = wtab_list; w != NULL; w = w->w_next)
654 if (w->w_weight == weight) {
655 w->w_refcount++;
656 return (w);
657 }
658
659 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
660 if (w == NULL)
661 panic("wtab_alloc: malloc failed!");
662 w->w_weight = weight;
663 w->w_refcount = 1;
664 w->w_next = wtab_list;
665 wtab_list = w;
666
667 /* initialize the weight table */
668 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
669 for (i = 1; i < 32; i++) {
670 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
671 if (w->w_tab[i] == 0 && w->w_param_max == 0)
672 w->w_param_max = 1 << i;
673 }
674
675 return (w);
676 }
677
678 int
679 wtab_destroy(struct wtab *w)
680 {
681 struct wtab *prev;
682
683 if (--w->w_refcount > 0)
684 return (0);
685
686 if (wtab_list == w)
687 wtab_list = w->w_next;
688 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
689 if (prev->w_next == w) {
690 prev->w_next = w->w_next;
691 break;
692 }
693
694 free(w, M_DEVBUF);
695 return (0);
696 }
697
698 int32_t
699 pow_w(struct wtab *w, int n)
700 {
701 int i, bit;
702 int32_t val;
703
704 if (n >= w->w_param_max)
705 return (0);
706
707 val = 1 << FP_SHIFT;
708 if (n <= 0)
709 return (val);
710
711 bit = 1;
712 i = 0;
713 while (n) {
714 if (n & bit) {
715 val = (val * w->w_tab[i]) >> FP_SHIFT;
716 n &= ~bit;
717 }
718 i++;
719 bit <<= 1;
720 }
721 return (val);
722 }
723
724 #ifdef ALTQ3_COMPAT
725 /*
726 * red device interface
727 */
728 altqdev_decl(red);
729
730 int
731 redopen(dev_t dev __unused, int flag __unused, int fmt __unused,
732 struct lwp *l __unused)
733 {
734 /* everything will be done when the queueing scheme is attached. */
735 return 0;
736 }
737
738 int
739 redclose(dev_t dev __unused, int flag __unused, int fmt __unused,
740 struct lwp *l __unused)
741 {
742 red_queue_t *rqp;
743 int err, error = 0;
744
745 while ((rqp = red_list) != NULL) {
746 /* destroy all */
747 err = red_detach(rqp);
748 if (err != 0 && error == 0)
749 error = err;
750 }
751
752 return error;
753 }
754
755 int
756 redioctl(dev_t dev __unused, ioctlcmd_t cmd, caddr_t addr, int flag __unused,
757 struct lwp *l)
758 {
759 red_queue_t *rqp;
760 struct red_interface *ifacep;
761 struct ifnet *ifp;
762 struct proc *p = l->l_proc;
763 int error = 0;
764
765 /* check super-user privilege */
766 switch (cmd) {
767 case RED_GETSTATS:
768 break;
769 default:
770 #if (__FreeBSD_version > 400000)
771 if ((error = suser(p)) != 0)
772 #else
773 if ((error = kauth_authorize_generic(p->p_cred,
774 KAUTH_GENERIC_ISSUSER, &p->p_acflag)) != 0)
775 #endif
776 return (error);
777 break;
778 }
779
780 switch (cmd) {
781
782 case RED_ENABLE:
783 ifacep = (struct red_interface *)addr;
784 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
785 error = EBADF;
786 break;
787 }
788 error = altq_enable(rqp->rq_ifq);
789 break;
790
791 case RED_DISABLE:
792 ifacep = (struct red_interface *)addr;
793 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
794 error = EBADF;
795 break;
796 }
797 error = altq_disable(rqp->rq_ifq);
798 break;
799
800 case RED_IF_ATTACH:
801 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
802 if (ifp == NULL) {
803 error = ENXIO;
804 break;
805 }
806
807 /* allocate and initialize red_queue_t */
808 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
809 if (rqp == NULL) {
810 error = ENOMEM;
811 break;
812 }
813
814 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
815 M_WAITOK|M_ZERO);
816 if (rqp->rq_q == NULL) {
817 free(rqp, M_DEVBUF);
818 error = ENOMEM;
819 break;
820 }
821
822 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
823 if (rqp->rq_red == NULL) {
824 free(rqp->rq_q, M_DEVBUF);
825 free(rqp, M_DEVBUF);
826 error = ENOMEM;
827 break;
828 }
829
830 rqp->rq_ifq = &ifp->if_snd;
831 qtail(rqp->rq_q) = NULL;
832 qlen(rqp->rq_q) = 0;
833 qlimit(rqp->rq_q) = RED_LIMIT;
834 qtype(rqp->rq_q) = Q_RED;
835
836 /*
837 * set RED to this ifnet structure.
838 */
839 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
840 red_enqueue, red_dequeue, red_request,
841 NULL, NULL);
842 if (error) {
843 red_destroy(rqp->rq_red);
844 free(rqp->rq_q, M_DEVBUF);
845 free(rqp, M_DEVBUF);
846 break;
847 }
848
849 /* add this state to the red list */
850 rqp->rq_next = red_list;
851 red_list = rqp;
852 break;
853
854 case RED_IF_DETACH:
855 ifacep = (struct red_interface *)addr;
856 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
857 error = EBADF;
858 break;
859 }
860 error = red_detach(rqp);
861 break;
862
863 case RED_GETSTATS:
864 do {
865 struct red_stats *q_stats;
866 red_t *rp;
867
868 q_stats = (struct red_stats *)addr;
869 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
870 ALTQT_RED)) == NULL) {
871 error = EBADF;
872 break;
873 }
874
875 q_stats->q_len = qlen(rqp->rq_q);
876 q_stats->q_limit = qlimit(rqp->rq_q);
877
878 rp = rqp->rq_red;
879 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
880 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
881 q_stats->drop_cnt = rp->red_stats.drop_cnt;
882 q_stats->drop_forced = rp->red_stats.drop_forced;
883 q_stats->drop_unforced = rp->red_stats.drop_unforced;
884 q_stats->marked_packets = rp->red_stats.marked_packets;
885
886 q_stats->weight = rp->red_weight;
887 q_stats->inv_pmax = rp->red_inv_pmax;
888 q_stats->th_min = rp->red_thmin;
889 q_stats->th_max = rp->red_thmax;
890
891 #ifdef ALTQ_FLOWVALVE
892 if (rp->red_flowvalve != NULL) {
893 struct flowvalve *fv = rp->red_flowvalve;
894 q_stats->fv_flows = fv->fv_flows;
895 q_stats->fv_pass = fv->fv_stats.pass;
896 q_stats->fv_predrop = fv->fv_stats.predrop;
897 q_stats->fv_alloc = fv->fv_stats.alloc;
898 q_stats->fv_escape = fv->fv_stats.escape;
899 } else {
900 #endif /* ALTQ_FLOWVALVE */
901 q_stats->fv_flows = 0;
902 q_stats->fv_pass = 0;
903 q_stats->fv_predrop = 0;
904 q_stats->fv_alloc = 0;
905 q_stats->fv_escape = 0;
906 #ifdef ALTQ_FLOWVALVE
907 }
908 #endif /* ALTQ_FLOWVALVE */
909 } while (/*CONSTCOND*/ 0);
910 break;
911
912 case RED_CONFIG:
913 do {
914 struct red_conf *fc;
915 red_t *new;
916 int s, limit;
917
918 fc = (struct red_conf *)addr;
919 if ((rqp = altq_lookup(fc->iface.red_ifname,
920 ALTQT_RED)) == NULL) {
921 error = EBADF;
922 break;
923 }
924 new = red_alloc(fc->red_weight,
925 fc->red_inv_pmax,
926 fc->red_thmin,
927 fc->red_thmax,
928 fc->red_flags,
929 fc->red_pkttime);
930 if (new == NULL) {
931 error = ENOMEM;
932 break;
933 }
934
935 s = splnet();
936 red_purgeq(rqp);
937 limit = fc->red_limit;
938 if (limit < fc->red_thmax)
939 limit = fc->red_thmax;
940 qlimit(rqp->rq_q) = limit;
941 fc->red_limit = limit; /* write back the new value */
942
943 red_destroy(rqp->rq_red);
944 rqp->rq_red = new;
945
946 splx(s);
947
948 /* write back new values */
949 fc->red_limit = limit;
950 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
951 fc->red_thmin = rqp->rq_red->red_thmin;
952 fc->red_thmax = rqp->rq_red->red_thmax;
953
954 } while (/*CONSTCOND*/ 0);
955 break;
956
957 case RED_SETDEFAULTS:
958 do {
959 struct redparams *rp;
960
961 rp = (struct redparams *)addr;
962
963 default_th_min = rp->th_min;
964 default_th_max = rp->th_max;
965 default_inv_pmax = rp->inv_pmax;
966 } while (/*CONSTCOND*/ 0);
967 break;
968
969 default:
970 error = EINVAL;
971 break;
972 }
973 return error;
974 }
975
976 static int
977 red_detach(red_queue_t *rqp)
978 {
979 red_queue_t *tmp;
980 int error = 0;
981
982 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
983 altq_disable(rqp->rq_ifq);
984
985 if ((error = altq_detach(rqp->rq_ifq)))
986 return (error);
987
988 if (red_list == rqp)
989 red_list = rqp->rq_next;
990 else {
991 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
992 if (tmp->rq_next == rqp) {
993 tmp->rq_next = rqp->rq_next;
994 break;
995 }
996 if (tmp == NULL)
997 printf("red_detach: no state found in red_list!\n");
998 }
999
1000 red_destroy(rqp->rq_red);
1001 free(rqp->rq_q, M_DEVBUF);
1002 free(rqp, M_DEVBUF);
1003 return (error);
1004 }
1005
1006 /*
1007 * enqueue routine:
1008 *
1009 * returns: 0 when successfully queued.
1010 * ENOBUFS when drop occurs.
1011 */
1012 static int
1013 red_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
1014 {
1015 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1016
1017 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1018 return ENOBUFS;
1019 ifq->ifq_len++;
1020 return 0;
1021 }
1022
1023 /*
1024 * dequeue routine:
1025 * must be called in splnet.
1026 *
1027 * returns: mbuf dequeued.
1028 * NULL when no packet is available in the queue.
1029 */
1030
1031 static struct mbuf *
1032 red_dequeue(struct ifaltq *ifq, int op)
1033 {
1034 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1035 struct mbuf *m;
1036
1037 if (op == ALTDQ_POLL)
1038 return qhead(rqp->rq_q);
1039
1040 /* op == ALTDQ_REMOVE */
1041 m = red_getq(rqp->rq_red, rqp->rq_q);
1042 if (m != NULL)
1043 ifq->ifq_len--;
1044 return (m);
1045 }
1046
1047 static int
1048 red_request(struct ifaltq *ifq, int req, void *arg __unused)
1049 {
1050 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1051
1052 switch (req) {
1053 case ALTRQ_PURGE:
1054 red_purgeq(rqp);
1055 break;
1056 }
1057 return (0);
1058 }
1059
1060 static void
1061 red_purgeq(red_queue_t *rqp)
1062 {
1063 _flushq(rqp->rq_q);
1064 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1065 rqp->rq_ifq->ifq_len = 0;
1066 }
1067
1068 #ifdef ALTQ_FLOWVALVE
1069
1070 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1071 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1072 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1073 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1074 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1075 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1076
1077 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1078 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1079
1080 #define FV_N 10 /* update fve_f every FV_N packets */
1081
1082 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1083 #define FV_TTHRESH 3 /* time threshold to delete fve */
1084 #define FV_ALPHA 5 /* extra packet count */
1085
1086 #define FV_STATS
1087
1088 #if (__FreeBSD_version > 300000) || defined(__HAVE_TIMECOUNTER)
1089 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1090 #else
1091 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1092 #endif
1093
1094 /*
1095 * Brtt table: 127 entry table to convert drop rate (p) to
1096 * the corresponding bandwidth fraction (f)
1097 * the following equation is implemented to use scaled values,
1098 * fve_p and fve_f, in the fixed point format.
1099 *
1100 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1101 * f = Brtt(p) / (max_th + alpha)
1102 */
1103 #define BRTT_SIZE 128
1104 #define BRTT_SHIFT 12
1105 #define BRTT_MASK 0x0007f000
1106 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1107
1108 const int brtt_tab[BRTT_SIZE] = {
1109 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1110 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1111 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1112 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1113 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1114 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1115 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1116 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1117 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1118 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1119 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1120 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1121 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1122 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1123 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1124 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1125 };
1126
1127 static inline struct fve *
1128 flowlist_lookup(struct flowvalve *fv, struct altq_pktattr *pktattr,
1129 struct timeval *now)
1130 {
1131 struct fve *fve;
1132 int flows;
1133 struct ip *ip;
1134 #ifdef INET6
1135 struct ip6_hdr *ip6;
1136 #endif
1137 struct timeval tthresh;
1138
1139 if (pktattr == NULL)
1140 return (NULL);
1141
1142 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1143 flows = 0;
1144 /*
1145 * search the flow list
1146 */
1147 switch (pktattr->pattr_af) {
1148 case AF_INET:
1149 ip = (struct ip *)pktattr->pattr_hdr;
1150 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1151 if (fve->fve_lastdrop.tv_sec == 0)
1152 break;
1153 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1154 fve->fve_lastdrop.tv_sec = 0;
1155 break;
1156 }
1157 if (fve->fve_flow.flow_af == AF_INET &&
1158 fve->fve_flow.flow_ip.ip_src.s_addr ==
1159 ip->ip_src.s_addr &&
1160 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1161 ip->ip_dst.s_addr)
1162 return (fve);
1163 flows++;
1164 }
1165 break;
1166 #ifdef INET6
1167 case AF_INET6:
1168 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1169 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1170 if (fve->fve_lastdrop.tv_sec == 0)
1171 break;
1172 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1173 fve->fve_lastdrop.tv_sec = 0;
1174 break;
1175 }
1176 if (fve->fve_flow.flow_af == AF_INET6 &&
1177 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1178 &ip6->ip6_src) &&
1179 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1180 &ip6->ip6_dst))
1181 return (fve);
1182 flows++;
1183 }
1184 break;
1185 #endif /* INET6 */
1186
1187 default:
1188 /* unknown protocol. no drop. */
1189 return (NULL);
1190 }
1191 fv->fv_flows = flows; /* save the number of active fve's */
1192 return (NULL);
1193 }
1194
1195 static inline struct fve *
1196 flowlist_reclaim(struct flowvalve *fv, struct altq_pktattr *pktattr)
1197 {
1198 struct fve *fve;
1199 struct ip *ip;
1200 #ifdef INET6
1201 struct ip6_hdr *ip6;
1202 #endif
1203
1204 /*
1205 * get an entry from the tail of the LRU list.
1206 */
1207 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1208
1209 switch (pktattr->pattr_af) {
1210 case AF_INET:
1211 ip = (struct ip *)pktattr->pattr_hdr;
1212 fve->fve_flow.flow_af = AF_INET;
1213 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1214 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1215 break;
1216 #ifdef INET6
1217 case AF_INET6:
1218 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1219 fve->fve_flow.flow_af = AF_INET6;
1220 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1221 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1222 break;
1223 #endif
1224 }
1225
1226 fve->fve_state = Green;
1227 fve->fve_p = 0.0;
1228 fve->fve_f = 0.0;
1229 fve->fve_ifseq = fv->fv_ifseq - 1;
1230 fve->fve_count = 0;
1231
1232 fv->fv_flows++;
1233 #ifdef FV_STATS
1234 fv->fv_stats.alloc++;
1235 #endif
1236 return (fve);
1237 }
1238
1239 static inline void
1240 flowlist_move_to_head(struct flowvalve *fv, struct fve *fve)
1241 {
1242 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1243 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1244 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1245 }
1246 }
1247
1248 /*
1249 * allocate flowvalve structure
1250 */
1251 static struct flowvalve *
1252 fv_alloc(struct red *rp)
1253 {
1254 struct flowvalve *fv;
1255 struct fve *fve;
1256 int i, num;
1257
1258 num = FV_FLOWLISTSIZE;
1259 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
1260 if (fv == NULL)
1261 return (NULL);
1262
1263 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
1264 M_WAITOK|M_ZERO);
1265 if (fv->fv_fves == NULL) {
1266 free(fv, M_DEVBUF);
1267 return (NULL);
1268 }
1269
1270 fv->fv_flows = 0;
1271 TAILQ_INIT(&fv->fv_flowlist);
1272 for (i = 0; i < num; i++) {
1273 fve = &fv->fv_fves[i];
1274 fve->fve_lastdrop.tv_sec = 0;
1275 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1276 }
1277
1278 /* initialize drop rate threshold in scaled fixed-point */
1279 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1280
1281 /* initialize drop rate to fraction table */
1282 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
1283 if (fv->fv_p2ftab == NULL) {
1284 free(fv->fv_fves, M_DEVBUF);
1285 free(fv, M_DEVBUF);
1286 return (NULL);
1287 }
1288 /*
1289 * create the p2f table.
1290 * (shift is used to keep the precision)
1291 */
1292 for (i = 1; i < BRTT_SIZE; i++) {
1293 int f;
1294
1295 f = brtt_tab[i] << 8;
1296 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1297 }
1298
1299 return (fv);
1300 }
1301
1302 static void
1303 fv_destroy(struct flowvalve *fv)
1304 {
1305 free(fv->fv_p2ftab, M_DEVBUF);
1306 free(fv->fv_fves, M_DEVBUF);
1307 free(fv, M_DEVBUF);
1308 }
1309
1310 static inline int
1311 fv_p2f(struct flowvalve *fv, int p)
1312 {
1313 int val, f;
1314
1315 if (p >= BRTT_PMAX)
1316 f = fv->fv_p2ftab[BRTT_SIZE-1];
1317 else if ((val = (p & BRTT_MASK)))
1318 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1319 else
1320 f = fv->fv_p2ftab[1];
1321 return (f);
1322 }
1323
1324 /*
1325 * check if an arriving packet should be pre-dropped.
1326 * called from red_addq() when a packet arrives.
1327 * returns 1 when the packet should be pre-dropped.
1328 * should be called in splnet.
1329 */
1330 static int
1331 fv_checkflow(struct flowvalve *fv, struct altq_pktattr *pktattr,
1332 struct fve **fcache)
1333 {
1334 struct fve *fve;
1335 struct timeval now;
1336
1337 fv->fv_ifseq++;
1338 FV_TIMESTAMP(&now);
1339
1340 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1341 /* no matching entry in the flowlist */
1342 return (0);
1343
1344 *fcache = fve;
1345
1346 /* update fraction f for every FV_N packets */
1347 if (++fve->fve_count == FV_N) {
1348 /*
1349 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1350 */
1351 fve->fve_f =
1352 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1353 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1354 fve->fve_ifseq = fv->fv_ifseq;
1355 fve->fve_count = 0;
1356 }
1357
1358 /*
1359 * overpumping test
1360 */
1361 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1362 int fthresh;
1363
1364 /* calculate a threshold */
1365 fthresh = fv_p2f(fv, fve->fve_p);
1366 if (fve->fve_f > fthresh)
1367 fve->fve_state = Red;
1368 }
1369
1370 if (fve->fve_state == Red) {
1371 /*
1372 * backoff test
1373 */
1374 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1375 /* no drop for at least FV_BACKOFFTHRESH sec */
1376 fve->fve_p = 0;
1377 fve->fve_state = Green;
1378 #ifdef FV_STATS
1379 fv->fv_stats.escape++;
1380 #endif
1381 } else {
1382 /* block this flow */
1383 flowlist_move_to_head(fv, fve);
1384 fve->fve_lastdrop = now;
1385 #ifdef FV_STATS
1386 fv->fv_stats.predrop++;
1387 #endif
1388 return (1);
1389 }
1390 }
1391
1392 /*
1393 * p = (1 - Wp) * p
1394 */
1395 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1396 if (fve->fve_p < 0)
1397 fve->fve_p = 0;
1398 #ifdef FV_STATS
1399 fv->fv_stats.pass++;
1400 #endif
1401 return (0);
1402 }
1403
1404 /*
1405 * called from red_addq when a packet is dropped by red.
1406 * should be called in splnet.
1407 */
1408 static void
1409 fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *pktattr,
1410 struct fve *fcache)
1411 {
1412 struct fve *fve;
1413 struct timeval now;
1414
1415 if (pktattr == NULL)
1416 return;
1417 FV_TIMESTAMP(&now);
1418
1419 if (fcache != NULL)
1420 /* the fve of this packet is already cached */
1421 fve = fcache;
1422 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1423 fve = flowlist_reclaim(fv, pktattr);
1424
1425 flowlist_move_to_head(fv, fve);
1426
1427 /*
1428 * update p: the following line cancels the update
1429 * in fv_checkflow() and calculate
1430 * p = Wp + (1 - Wp) * p
1431 */
1432 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1433
1434 fve->fve_lastdrop = now;
1435 }
1436
1437 #endif /* ALTQ_FLOWVALVE */
1438
1439 #ifdef KLD_MODULE
1440
1441 static struct altqsw red_sw =
1442 {"red", redopen, redclose, redioctl};
1443
1444 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1445 MODULE_VERSION(altq_red, 1);
1446
1447 #endif /* KLD_MODULE */
1448 #endif /* ALTQ3_COMPAT */
1449
1450 #endif /* ALTQ_RED */
1451