altq_red.c revision 1.21 1 /* $NetBSD: altq_red.c,v 1.21 2006/10/12 19:59:08 peter Exp $ */
2 /* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.21 2006/10/12 19:59:08 peter Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_altq.h"
68 #include "opt_inet.h"
69 #endif
70
71 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
72
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/systm.h>
78 #include <sys/errno.h>
79 #include <sys/kauth.h>
80 #if 1 /* ALTQ3_COMPAT */
81 #include <sys/sockio.h>
82 #include <sys/proc.h>
83 #include <sys/kernel.h>
84 #include <sys/kauth.h>
85 #ifdef ALTQ_FLOWVALVE
86 #include <sys/queue.h>
87 #include <sys/time.h>
88 #endif
89 #endif /* ALTQ3_COMPAT */
90
91 #include <net/if.h>
92
93 #include <netinet/in.h>
94 #include <netinet/in_systm.h>
95 #include <netinet/ip.h>
96 #ifdef INET6
97 #include <netinet/ip6.h>
98 #endif
99
100 #include <net/pfvar.h>
101 #include <altq/altq.h>
102 #include <altq/altq_red.h>
103 #ifdef ALTQ3_COMPAT
104 #include <altq/altq_conf.h>
105 #ifdef ALTQ_FLOWVALVE
106 #include <altq/altq_flowvalve.h>
107 #endif
108 #endif
109
110 /*
111 * ALTQ/RED (Random Early Detection) implementation using 32-bit
112 * fixed-point calculation.
113 *
114 * written by kjc using the ns code as a reference.
115 * you can learn more about red and ns from Sally's home page at
116 * http://www-nrg.ee.lbl.gov/floyd/
117 *
118 * most of the red parameter values are fixed in this implementation
119 * to prevent fixed-point overflow/underflow.
120 * if you change the parameters, watch out for overflow/underflow!
121 *
122 * the parameters used are recommended values by Sally.
123 * the corresponding ns config looks:
124 * q_weight=0.00195
125 * minthresh=5 maxthresh=15 queue-size=60
126 * linterm=30
127 * dropmech=drop-tail
128 * bytes=false (can't be handled by 32-bit fixed-point)
129 * doubleq=false dqthresh=false
130 * wait=true
131 */
132 /*
133 * alternative red parameters for a slow link.
134 *
135 * assume the queue length becomes from zero to L and keeps L, it takes
136 * N packets for q_avg to reach 63% of L.
137 * when q_weight is 0.002, N is about 500 packets.
138 * for a slow link like dial-up, 500 packets takes more than 1 minute!
139 * when q_weight is 0.008, N is about 127 packets.
140 * when q_weight is 0.016, N is about 63 packets.
141 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
142 * are allowed for 0.016.
143 * see Sally's paper for more details.
144 */
145 /* normal red parameters */
146 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
147 /* q_weight = 0.00195 */
148
149 /* red parameters for a slow link */
150 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
151 /* q_weight = 0.0078125 */
152
153 /* red parameters for a very slow link (e.g., dialup) */
154 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
155 /* q_weight = 0.015625 */
156
157 /* fixed-point uses 12-bit decimal places */
158 #define FP_SHIFT 12 /* fixed-point shift */
159
160 /* red parameters for drop probability */
161 #define INV_P_MAX 10 /* inverse of max drop probability */
162 #define TH_MIN 5 /* min threshold */
163 #define TH_MAX 15 /* max threshold */
164
165 #define RED_LIMIT 60 /* default max queue lenght */
166 #define RED_STATS /* collect statistics */
167
168 /*
169 * our default policy for forced-drop is drop-tail.
170 * (in altq-1.1.2 or earlier, the default was random-drop.
171 * but it makes more sense to punish the cause of the surge.)
172 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
173 */
174
175 #ifdef ALTQ3_COMPAT
176 #ifdef ALTQ_FLOWVALVE
177 /*
178 * flow-valve is an extention to protect red from unresponsive flows
179 * and to promote end-to-end congestion control.
180 * flow-valve observes the average drop rates of the flows that have
181 * experienced packet drops in the recent past.
182 * when the average drop rate exceeds the threshold, the flow is
183 * blocked by the flow-valve. the trapped flow should back off
184 * exponentially to escape from the flow-valve.
185 */
186 #ifdef RED_RANDOM_DROP
187 #error "random-drop can't be used with flow-valve!"
188 #endif
189 #endif /* ALTQ_FLOWVALVE */
190
191 /* red_list keeps all red_queue_t's allocated. */
192 static red_queue_t *red_list = NULL;
193
194 #endif /* ALTQ3_COMPAT */
195
196 /* default red parameter values */
197 static int default_th_min = TH_MIN;
198 static int default_th_max = TH_MAX;
199 static int default_inv_pmax = INV_P_MAX;
200
201 #ifdef ALTQ3_COMPAT
202 /* internal function prototypes */
203 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
204 static struct mbuf *red_dequeue(struct ifaltq *, int);
205 static int red_request(struct ifaltq *, int, void *);
206 static void red_purgeq(red_queue_t *);
207 static int red_detach(red_queue_t *);
208 #ifdef ALTQ_FLOWVALVE
209 static inline struct fve *flowlist_lookup(struct flowvalve *,
210 struct altq_pktattr *, struct timeval *);
211 static inline struct fve *flowlist_reclaim(struct flowvalve *,
212 struct altq_pktattr *);
213 static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
214 static inline int fv_p2f(struct flowvalve *, int);
215 static struct flowvalve *fv_alloc(struct red *);
216 static void fv_destroy(struct flowvalve *);
217 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
218 struct fve **);
219 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
220 struct fve *);
221 #endif
222 #endif /* ALTQ3_COMPAT */
223
224 /*
225 * red support routines
226 */
227 red_t *
228 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
229 int pkttime)
230 {
231 red_t *rp;
232 int w, i;
233 int npkts_per_sec;
234
235 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
236 if (rp == NULL)
237 return (NULL);
238
239 rp->red_avg = 0;
240 rp->red_idle = 1;
241
242 if (weight == 0)
243 rp->red_weight = W_WEIGHT;
244 else
245 rp->red_weight = weight;
246 if (inv_pmax == 0)
247 rp->red_inv_pmax = default_inv_pmax;
248 else
249 rp->red_inv_pmax = inv_pmax;
250 if (th_min == 0)
251 rp->red_thmin = default_th_min;
252 else
253 rp->red_thmin = th_min;
254 if (th_max == 0)
255 rp->red_thmax = default_th_max;
256 else
257 rp->red_thmax = th_max;
258
259 rp->red_flags = flags;
260
261 if (pkttime == 0)
262 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
263 rp->red_pkttime = 800;
264 else
265 rp->red_pkttime = pkttime;
266
267 if (weight == 0) {
268 /* when the link is very slow, adjust red parameters */
269 npkts_per_sec = 1000000 / rp->red_pkttime;
270 if (npkts_per_sec < 50) {
271 /* up to about 400Kbps */
272 rp->red_weight = W_WEIGHT_2;
273 } else if (npkts_per_sec < 300) {
274 /* up to about 2.4Mbps */
275 rp->red_weight = W_WEIGHT_1;
276 }
277 }
278
279 /* calculate wshift. weight must be power of 2 */
280 w = rp->red_weight;
281 for (i = 0; w > 1; i++)
282 w = w >> 1;
283 rp->red_wshift = i;
284 w = 1 << rp->red_wshift;
285 if (w != rp->red_weight) {
286 printf("invalid weight value %d for red! use %d\n",
287 rp->red_weight, w);
288 rp->red_weight = w;
289 }
290
291 /*
292 * thmin_s and thmax_s are scaled versions of th_min and th_max
293 * to be compared with avg.
294 */
295 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
296 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
297
298 /*
299 * precompute probability denominator
300 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
301 */
302 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
303 * rp->red_inv_pmax) << FP_SHIFT;
304
305 /* allocate weight table */
306 rp->red_wtab = wtab_alloc(rp->red_weight);
307
308 microtime(&rp->red_last);
309 #ifdef ALTQ3_COMPAT
310 #ifdef ALTQ_FLOWVALVE
311 if (flags & REDF_FLOWVALVE)
312 rp->red_flowvalve = fv_alloc(rp);
313 /* if fv_alloc failes, flowvalve is just disabled */
314 #endif
315 #endif /* ALTQ3_COMPAT */
316 return (rp);
317 }
318
319 void
320 red_destroy(red_t *rp)
321 {
322 #ifdef ALTQ3_COMPAT
323 #ifdef ALTQ_FLOWVALVE
324 if (rp->red_flowvalve != NULL)
325 fv_destroy(rp->red_flowvalve);
326 #endif
327 #endif /* ALTQ3_COMPAT */
328 wtab_destroy(rp->red_wtab);
329 free(rp, M_DEVBUF);
330 }
331
332 void
333 red_getstats(red_t *rp, struct redstats *sp)
334 {
335 sp->q_avg = rp->red_avg >> rp->red_wshift;
336 sp->xmit_cnt = rp->red_stats.xmit_cnt;
337 sp->drop_cnt = rp->red_stats.drop_cnt;
338 sp->drop_forced = rp->red_stats.drop_forced;
339 sp->drop_unforced = rp->red_stats.drop_unforced;
340 sp->marked_packets = rp->red_stats.marked_packets;
341 }
342
343 int
344 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
345 struct altq_pktattr *pktattr)
346 {
347 int avg, droptype;
348 int n;
349 #ifdef ALTQ3_COMPAT
350 #ifdef ALTQ_FLOWVALVE
351 struct fve *fve = NULL;
352
353 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
354 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
355 m_freem(m);
356 return (-1);
357 }
358 #endif
359 #endif /* ALTQ3_COMPAT */
360
361 avg = rp->red_avg;
362
363 /*
364 * if we were idle, we pretend that n packets arrived during
365 * the idle period.
366 */
367 if (rp->red_idle) {
368 struct timeval now;
369 int t;
370
371 rp->red_idle = 0;
372 microtime(&now);
373 t = (now.tv_sec - rp->red_last.tv_sec);
374 if (t > 60) {
375 /*
376 * being idle for more than 1 minute, set avg to zero.
377 * this prevents t from overflow.
378 */
379 avg = 0;
380 } else {
381 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
382 n = t / rp->red_pkttime - 1;
383
384 /* the following line does (avg = (1 - Wq)^n * avg) */
385 if (n > 0)
386 avg = (avg >> FP_SHIFT) *
387 pow_w(rp->red_wtab, n);
388 }
389 }
390
391 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
392 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
393 rp->red_avg = avg; /* save the new value */
394
395 /*
396 * red_count keeps a tally of arriving traffic that has not
397 * been dropped.
398 */
399 rp->red_count++;
400
401 /* see if we drop early */
402 droptype = DTYPE_NODROP;
403 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
404 if (avg >= rp->red_thmax_s) {
405 /* avg >= th_max: forced drop */
406 droptype = DTYPE_FORCED;
407 } else if (rp->red_old == 0) {
408 /* first exceeds th_min */
409 rp->red_count = 1;
410 rp->red_old = 1;
411 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
412 rp->red_probd, rp->red_count)) {
413 /* mark or drop by red */
414 if ((rp->red_flags & REDF_ECN) &&
415 mark_ecn(m, pktattr, rp->red_flags)) {
416 /* successfully marked. do not drop. */
417 rp->red_count = 0;
418 #ifdef RED_STATS
419 rp->red_stats.marked_packets++;
420 #endif
421 } else {
422 /* unforced drop by red */
423 droptype = DTYPE_EARLY;
424 }
425 }
426 } else {
427 /* avg < th_min */
428 rp->red_old = 0;
429 }
430
431 /*
432 * if the queue length hits the hard limit, it's a forced drop.
433 */
434 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
435 droptype = DTYPE_FORCED;
436
437 #ifdef RED_RANDOM_DROP
438 /* if successful or forced drop, enqueue this packet. */
439 if (droptype != DTYPE_EARLY)
440 _addq(q, m);
441 #else
442 /* if successful, enqueue this packet. */
443 if (droptype == DTYPE_NODROP)
444 _addq(q, m);
445 #endif
446 if (droptype != DTYPE_NODROP) {
447 if (droptype == DTYPE_EARLY) {
448 /* drop the incoming packet */
449 #ifdef RED_STATS
450 rp->red_stats.drop_unforced++;
451 #endif
452 } else {
453 /* forced drop, select a victim packet in the queue. */
454 #ifdef RED_RANDOM_DROP
455 m = _getq_random(q);
456 #endif
457 #ifdef RED_STATS
458 rp->red_stats.drop_forced++;
459 #endif
460 }
461 #ifdef RED_STATS
462 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
463 #endif
464 rp->red_count = 0;
465 #ifdef ALTQ3_COMPAT
466 #ifdef ALTQ_FLOWVALVE
467 if (rp->red_flowvalve != NULL)
468 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
469 #endif
470 #endif /* ALTQ3_COMPAT */
471 m_freem(m);
472 return (-1);
473 }
474 /* successfully queued */
475 #ifdef RED_STATS
476 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
477 #endif
478 return (0);
479 }
480
481 /*
482 * early-drop probability is calculated as follows:
483 * prob = p_max * (avg - th_min) / (th_max - th_min)
484 * prob_a = prob / (2 - count*prob)
485 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
486 * here prob_a increases as successive undrop count increases.
487 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
488 * becomes 1 when (count >= (2 / prob))).
489 */
490 int
491 drop_early(int fp_len, int fp_probd, int count)
492 {
493 int d; /* denominator of drop-probability */
494
495 d = fp_probd - count * fp_len;
496 if (d <= 0)
497 /* count exceeds the hard limit: drop or mark */
498 return (1);
499
500 /*
501 * now the range of d is [1..600] in fixed-point. (when
502 * th_max-th_min=10 and p_max=1/30)
503 * drop probability = (avg - TH_MIN) / d
504 */
505
506 if ((arc4random() % d) < fp_len) {
507 /* drop or mark */
508 return (1);
509 }
510 /* no drop/mark */
511 return (0);
512 }
513
514 /*
515 * try to mark CE bit to the packet.
516 * returns 1 if successfully marked, 0 otherwise.
517 */
518 int
519 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
520 {
521 struct mbuf *m0;
522 struct m_tag *t;
523 struct altq_tag *at;
524 void *hdr;
525 int af;
526
527 t = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
528 if (t != NULL) {
529 at = (struct altq_tag *)(t + 1);
530 if (at == NULL)
531 return (0);
532 af = at->af;
533 hdr = at->hdr;
534 #ifdef ALTQ3_COMPAT
535 } else if (pktattr != NULL) {
536 af = pktattr->pattr_af;
537 hdr = pktattr->pattr_hdr;
538 #endif /* ALTQ3_COMPAT */
539 } else
540 return (0);
541
542 if (af != AF_INET && af != AF_INET6)
543 return (0);
544
545 /* verify that pattr_hdr is within the mbuf data */
546 for (m0 = m; m0 != NULL; m0 = m0->m_next)
547 if (((caddr_t)hdr >= m0->m_data) &&
548 ((caddr_t)hdr < m0->m_data + m0->m_len))
549 break;
550 if (m0 == NULL) {
551 /* ick, tag info is stale */
552 return (0);
553 }
554
555 switch (af) {
556 case AF_INET:
557 if (flags & REDF_ECN4) {
558 struct ip *ip = hdr;
559 u_int8_t otos;
560 int sum;
561
562 if (ip->ip_v != 4)
563 return (0); /* version mismatch! */
564
565 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
566 return (0); /* not-ECT */
567 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
568 return (1); /* already marked */
569
570 /*
571 * ecn-capable but not marked,
572 * mark CE and update checksum
573 */
574 otos = ip->ip_tos;
575 ip->ip_tos |= IPTOS_ECN_CE;
576 /*
577 * update checksum (from RFC1624)
578 * HC' = ~(~HC + ~m + m')
579 */
580 sum = ~ntohs(ip->ip_sum) & 0xffff;
581 sum += (~otos & 0xffff) + ip->ip_tos;
582 sum = (sum >> 16) + (sum & 0xffff);
583 sum += (sum >> 16); /* add carry */
584 ip->ip_sum = htons(~sum & 0xffff);
585 return (1);
586 }
587 break;
588 #ifdef INET6
589 case AF_INET6:
590 if (flags & REDF_ECN6) {
591 struct ip6_hdr *ip6 = hdr;
592 u_int32_t flowlabel;
593
594 flowlabel = ntohl(ip6->ip6_flow);
595 if ((flowlabel >> 28) != 6)
596 return (0); /* version mismatch! */
597 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
598 (IPTOS_ECN_NOTECT << 20))
599 return (0); /* not-ECT */
600 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
601 (IPTOS_ECN_CE << 20))
602 return (1); /* already marked */
603 /*
604 * ecn-capable but not marked, mark CE
605 */
606 flowlabel |= (IPTOS_ECN_CE << 20);
607 ip6->ip6_flow = htonl(flowlabel);
608 return (1);
609 }
610 break;
611 #endif /* INET6 */
612 }
613
614 /* not marked */
615 return (0);
616 }
617
618 struct mbuf *
619 red_getq(red_t *rp, class_queue_t *q)
620 {
621 struct mbuf *m;
622
623 if ((m = _getq(q)) == NULL) {
624 if (rp->red_idle == 0) {
625 rp->red_idle = 1;
626 microtime(&rp->red_last);
627 }
628 return NULL;
629 }
630
631 rp->red_idle = 0;
632 return (m);
633 }
634
635 /*
636 * helper routine to calibrate avg during idle.
637 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
638 * here Wq = 1/weight and the code assumes Wq is close to zero.
639 *
640 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
641 */
642 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
643
644 struct wtab *
645 wtab_alloc(int weight)
646 {
647 struct wtab *w;
648 int i;
649
650 for (w = wtab_list; w != NULL; w = w->w_next)
651 if (w->w_weight == weight) {
652 w->w_refcount++;
653 return (w);
654 }
655
656 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
657 if (w == NULL)
658 panic("wtab_alloc: malloc failed!");
659 w->w_weight = weight;
660 w->w_refcount = 1;
661 w->w_next = wtab_list;
662 wtab_list = w;
663
664 /* initialize the weight table */
665 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
666 for (i = 1; i < 32; i++) {
667 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
668 if (w->w_tab[i] == 0 && w->w_param_max == 0)
669 w->w_param_max = 1 << i;
670 }
671
672 return (w);
673 }
674
675 int
676 wtab_destroy(struct wtab *w)
677 {
678 struct wtab *prev;
679
680 if (--w->w_refcount > 0)
681 return (0);
682
683 if (wtab_list == w)
684 wtab_list = w->w_next;
685 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
686 if (prev->w_next == w) {
687 prev->w_next = w->w_next;
688 break;
689 }
690
691 free(w, M_DEVBUF);
692 return (0);
693 }
694
695 int32_t
696 pow_w(struct wtab *w, int n)
697 {
698 int i, bit;
699 int32_t val;
700
701 if (n >= w->w_param_max)
702 return (0);
703
704 val = 1 << FP_SHIFT;
705 if (n <= 0)
706 return (val);
707
708 bit = 1;
709 i = 0;
710 while (n) {
711 if (n & bit) {
712 val = (val * w->w_tab[i]) >> FP_SHIFT;
713 n &= ~bit;
714 }
715 i++;
716 bit <<= 1;
717 }
718 return (val);
719 }
720
721 #ifdef ALTQ3_COMPAT
722 /*
723 * red device interface
724 */
725 altqdev_decl(red);
726
727 int
728 redopen(dev_t dev __unused, int flag __unused, int fmt __unused,
729 struct lwp *l __unused)
730 {
731 /* everything will be done when the queueing scheme is attached. */
732 return 0;
733 }
734
735 int
736 redclose(dev_t dev __unused, int flag __unused, int fmt __unused,
737 struct lwp *l __unused)
738 {
739 red_queue_t *rqp;
740 int err, error = 0;
741
742 while ((rqp = red_list) != NULL) {
743 /* destroy all */
744 err = red_detach(rqp);
745 if (err != 0 && error == 0)
746 error = err;
747 }
748
749 return error;
750 }
751
752 int
753 redioctl(dev_t dev __unused, ioctlcmd_t cmd, caddr_t addr, int flag __unused,
754 struct lwp *l)
755 {
756 red_queue_t *rqp;
757 struct red_interface *ifacep;
758 struct ifnet *ifp;
759 struct proc *p = l->l_proc;
760 int error = 0;
761
762 /* check super-user privilege */
763 switch (cmd) {
764 case RED_GETSTATS:
765 break;
766 default:
767 #if (__FreeBSD_version > 400000)
768 if ((error = suser(p)) != 0)
769 #else
770 if ((error = kauth_authorize_generic(p->p_cred,
771 KAUTH_GENERIC_ISSUSER, &p->p_acflag)) != 0)
772 #endif
773 return (error);
774 break;
775 }
776
777 switch (cmd) {
778
779 case RED_ENABLE:
780 ifacep = (struct red_interface *)addr;
781 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
782 error = EBADF;
783 break;
784 }
785 error = altq_enable(rqp->rq_ifq);
786 break;
787
788 case RED_DISABLE:
789 ifacep = (struct red_interface *)addr;
790 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
791 error = EBADF;
792 break;
793 }
794 error = altq_disable(rqp->rq_ifq);
795 break;
796
797 case RED_IF_ATTACH:
798 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
799 if (ifp == NULL) {
800 error = ENXIO;
801 break;
802 }
803
804 /* allocate and initialize red_queue_t */
805 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
806 if (rqp == NULL) {
807 error = ENOMEM;
808 break;
809 }
810
811 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
812 M_WAITOK|M_ZERO);
813 if (rqp->rq_q == NULL) {
814 free(rqp, M_DEVBUF);
815 error = ENOMEM;
816 break;
817 }
818
819 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
820 if (rqp->rq_red == NULL) {
821 free(rqp->rq_q, M_DEVBUF);
822 free(rqp, M_DEVBUF);
823 error = ENOMEM;
824 break;
825 }
826
827 rqp->rq_ifq = &ifp->if_snd;
828 qtail(rqp->rq_q) = NULL;
829 qlen(rqp->rq_q) = 0;
830 qlimit(rqp->rq_q) = RED_LIMIT;
831 qtype(rqp->rq_q) = Q_RED;
832
833 /*
834 * set RED to this ifnet structure.
835 */
836 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
837 red_enqueue, red_dequeue, red_request,
838 NULL, NULL);
839 if (error) {
840 red_destroy(rqp->rq_red);
841 free(rqp->rq_q, M_DEVBUF);
842 free(rqp, M_DEVBUF);
843 break;
844 }
845
846 /* add this state to the red list */
847 rqp->rq_next = red_list;
848 red_list = rqp;
849 break;
850
851 case RED_IF_DETACH:
852 ifacep = (struct red_interface *)addr;
853 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
854 error = EBADF;
855 break;
856 }
857 error = red_detach(rqp);
858 break;
859
860 case RED_GETSTATS:
861 do {
862 struct red_stats *q_stats;
863 red_t *rp;
864
865 q_stats = (struct red_stats *)addr;
866 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
867 ALTQT_RED)) == NULL) {
868 error = EBADF;
869 break;
870 }
871
872 q_stats->q_len = qlen(rqp->rq_q);
873 q_stats->q_limit = qlimit(rqp->rq_q);
874
875 rp = rqp->rq_red;
876 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
877 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
878 q_stats->drop_cnt = rp->red_stats.drop_cnt;
879 q_stats->drop_forced = rp->red_stats.drop_forced;
880 q_stats->drop_unforced = rp->red_stats.drop_unforced;
881 q_stats->marked_packets = rp->red_stats.marked_packets;
882
883 q_stats->weight = rp->red_weight;
884 q_stats->inv_pmax = rp->red_inv_pmax;
885 q_stats->th_min = rp->red_thmin;
886 q_stats->th_max = rp->red_thmax;
887
888 #ifdef ALTQ_FLOWVALVE
889 if (rp->red_flowvalve != NULL) {
890 struct flowvalve *fv = rp->red_flowvalve;
891 q_stats->fv_flows = fv->fv_flows;
892 q_stats->fv_pass = fv->fv_stats.pass;
893 q_stats->fv_predrop = fv->fv_stats.predrop;
894 q_stats->fv_alloc = fv->fv_stats.alloc;
895 q_stats->fv_escape = fv->fv_stats.escape;
896 } else {
897 #endif /* ALTQ_FLOWVALVE */
898 q_stats->fv_flows = 0;
899 q_stats->fv_pass = 0;
900 q_stats->fv_predrop = 0;
901 q_stats->fv_alloc = 0;
902 q_stats->fv_escape = 0;
903 #ifdef ALTQ_FLOWVALVE
904 }
905 #endif /* ALTQ_FLOWVALVE */
906 } while (/*CONSTCOND*/ 0);
907 break;
908
909 case RED_CONFIG:
910 do {
911 struct red_conf *fc;
912 red_t *new;
913 int s, limit;
914
915 fc = (struct red_conf *)addr;
916 if ((rqp = altq_lookup(fc->iface.red_ifname,
917 ALTQT_RED)) == NULL) {
918 error = EBADF;
919 break;
920 }
921 new = red_alloc(fc->red_weight,
922 fc->red_inv_pmax,
923 fc->red_thmin,
924 fc->red_thmax,
925 fc->red_flags,
926 fc->red_pkttime);
927 if (new == NULL) {
928 error = ENOMEM;
929 break;
930 }
931
932 s = splnet();
933 red_purgeq(rqp);
934 limit = fc->red_limit;
935 if (limit < fc->red_thmax)
936 limit = fc->red_thmax;
937 qlimit(rqp->rq_q) = limit;
938 fc->red_limit = limit; /* write back the new value */
939
940 red_destroy(rqp->rq_red);
941 rqp->rq_red = new;
942
943 splx(s);
944
945 /* write back new values */
946 fc->red_limit = limit;
947 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
948 fc->red_thmin = rqp->rq_red->red_thmin;
949 fc->red_thmax = rqp->rq_red->red_thmax;
950
951 } while (/*CONSTCOND*/ 0);
952 break;
953
954 case RED_SETDEFAULTS:
955 do {
956 struct redparams *rp;
957
958 rp = (struct redparams *)addr;
959
960 default_th_min = rp->th_min;
961 default_th_max = rp->th_max;
962 default_inv_pmax = rp->inv_pmax;
963 } while (/*CONSTCOND*/ 0);
964 break;
965
966 default:
967 error = EINVAL;
968 break;
969 }
970 return error;
971 }
972
973 static int
974 red_detach(red_queue_t *rqp)
975 {
976 red_queue_t *tmp;
977 int error = 0;
978
979 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
980 altq_disable(rqp->rq_ifq);
981
982 if ((error = altq_detach(rqp->rq_ifq)))
983 return (error);
984
985 if (red_list == rqp)
986 red_list = rqp->rq_next;
987 else {
988 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
989 if (tmp->rq_next == rqp) {
990 tmp->rq_next = rqp->rq_next;
991 break;
992 }
993 if (tmp == NULL)
994 printf("red_detach: no state found in red_list!\n");
995 }
996
997 red_destroy(rqp->rq_red);
998 free(rqp->rq_q, M_DEVBUF);
999 free(rqp, M_DEVBUF);
1000 return (error);
1001 }
1002
1003 /*
1004 * enqueue routine:
1005 *
1006 * returns: 0 when successfully queued.
1007 * ENOBUFS when drop occurs.
1008 */
1009 static int
1010 red_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
1011 {
1012 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1013
1014 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1015 return ENOBUFS;
1016 ifq->ifq_len++;
1017 return 0;
1018 }
1019
1020 /*
1021 * dequeue routine:
1022 * must be called in splnet.
1023 *
1024 * returns: mbuf dequeued.
1025 * NULL when no packet is available in the queue.
1026 */
1027
1028 static struct mbuf *
1029 red_dequeue(struct ifaltq *ifq, int op)
1030 {
1031 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1032 struct mbuf *m;
1033
1034 if (op == ALTDQ_POLL)
1035 return qhead(rqp->rq_q);
1036
1037 /* op == ALTDQ_REMOVE */
1038 m = red_getq(rqp->rq_red, rqp->rq_q);
1039 if (m != NULL)
1040 ifq->ifq_len--;
1041 return (m);
1042 }
1043
1044 static int
1045 red_request(struct ifaltq *ifq, int req, void *arg __unused)
1046 {
1047 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1048
1049 switch (req) {
1050 case ALTRQ_PURGE:
1051 red_purgeq(rqp);
1052 break;
1053 }
1054 return (0);
1055 }
1056
1057 static void
1058 red_purgeq(red_queue_t *rqp)
1059 {
1060 _flushq(rqp->rq_q);
1061 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1062 rqp->rq_ifq->ifq_len = 0;
1063 }
1064
1065 #ifdef ALTQ_FLOWVALVE
1066
1067 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1068 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1069 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1070 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1071 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1072 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1073
1074 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1075 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1076
1077 #define FV_N 10 /* update fve_f every FV_N packets */
1078
1079 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1080 #define FV_TTHRESH 3 /* time threshold to delete fve */
1081 #define FV_ALPHA 5 /* extra packet count */
1082
1083 #define FV_STATS
1084
1085 #if (__FreeBSD_version > 300000) || defined(__HAVE_TIMECOUNTER)
1086 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1087 #else
1088 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1089 #endif
1090
1091 /*
1092 * Brtt table: 127 entry table to convert drop rate (p) to
1093 * the corresponding bandwidth fraction (f)
1094 * the following equation is implemented to use scaled values,
1095 * fve_p and fve_f, in the fixed point format.
1096 *
1097 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1098 * f = Brtt(p) / (max_th + alpha)
1099 */
1100 #define BRTT_SIZE 128
1101 #define BRTT_SHIFT 12
1102 #define BRTT_MASK 0x0007f000
1103 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1104
1105 const int brtt_tab[BRTT_SIZE] = {
1106 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1107 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1108 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1109 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1110 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1111 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1112 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1113 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1114 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1115 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1116 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1117 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1118 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1119 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1120 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1121 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1122 };
1123
1124 static inline struct fve *
1125 flowlist_lookup(struct flowvalve *fv, struct altq_pktattr *pktattr,
1126 struct timeval *now)
1127 {
1128 struct fve *fve;
1129 int flows;
1130 struct ip *ip;
1131 #ifdef INET6
1132 struct ip6_hdr *ip6;
1133 #endif
1134 struct timeval tthresh;
1135
1136 if (pktattr == NULL)
1137 return (NULL);
1138
1139 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1140 flows = 0;
1141 /*
1142 * search the flow list
1143 */
1144 switch (pktattr->pattr_af) {
1145 case AF_INET:
1146 ip = (struct ip *)pktattr->pattr_hdr;
1147 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1148 if (fve->fve_lastdrop.tv_sec == 0)
1149 break;
1150 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1151 fve->fve_lastdrop.tv_sec = 0;
1152 break;
1153 }
1154 if (fve->fve_flow.flow_af == AF_INET &&
1155 fve->fve_flow.flow_ip.ip_src.s_addr ==
1156 ip->ip_src.s_addr &&
1157 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1158 ip->ip_dst.s_addr)
1159 return (fve);
1160 flows++;
1161 }
1162 break;
1163 #ifdef INET6
1164 case AF_INET6:
1165 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1166 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1167 if (fve->fve_lastdrop.tv_sec == 0)
1168 break;
1169 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1170 fve->fve_lastdrop.tv_sec = 0;
1171 break;
1172 }
1173 if (fve->fve_flow.flow_af == AF_INET6 &&
1174 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1175 &ip6->ip6_src) &&
1176 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1177 &ip6->ip6_dst))
1178 return (fve);
1179 flows++;
1180 }
1181 break;
1182 #endif /* INET6 */
1183
1184 default:
1185 /* unknown protocol. no drop. */
1186 return (NULL);
1187 }
1188 fv->fv_flows = flows; /* save the number of active fve's */
1189 return (NULL);
1190 }
1191
1192 static inline struct fve *
1193 flowlist_reclaim(struct flowvalve *fv, struct altq_pktattr *pktattr)
1194 {
1195 struct fve *fve;
1196 struct ip *ip;
1197 #ifdef INET6
1198 struct ip6_hdr *ip6;
1199 #endif
1200
1201 /*
1202 * get an entry from the tail of the LRU list.
1203 */
1204 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1205
1206 switch (pktattr->pattr_af) {
1207 case AF_INET:
1208 ip = (struct ip *)pktattr->pattr_hdr;
1209 fve->fve_flow.flow_af = AF_INET;
1210 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1211 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1212 break;
1213 #ifdef INET6
1214 case AF_INET6:
1215 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1216 fve->fve_flow.flow_af = AF_INET6;
1217 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1218 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1219 break;
1220 #endif
1221 }
1222
1223 fve->fve_state = Green;
1224 fve->fve_p = 0.0;
1225 fve->fve_f = 0.0;
1226 fve->fve_ifseq = fv->fv_ifseq - 1;
1227 fve->fve_count = 0;
1228
1229 fv->fv_flows++;
1230 #ifdef FV_STATS
1231 fv->fv_stats.alloc++;
1232 #endif
1233 return (fve);
1234 }
1235
1236 static inline void
1237 flowlist_move_to_head(struct flowvalve *fv, struct fve *fve)
1238 {
1239 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1240 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1241 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1242 }
1243 }
1244
1245 /*
1246 * allocate flowvalve structure
1247 */
1248 static struct flowvalve *
1249 fv_alloc(struct red *rp)
1250 {
1251 struct flowvalve *fv;
1252 struct fve *fve;
1253 int i, num;
1254
1255 num = FV_FLOWLISTSIZE;
1256 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
1257 if (fv == NULL)
1258 return (NULL);
1259
1260 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
1261 M_WAITOK|M_ZERO);
1262 if (fv->fv_fves == NULL) {
1263 free(fv, M_DEVBUF);
1264 return (NULL);
1265 }
1266
1267 fv->fv_flows = 0;
1268 TAILQ_INIT(&fv->fv_flowlist);
1269 for (i = 0; i < num; i++) {
1270 fve = &fv->fv_fves[i];
1271 fve->fve_lastdrop.tv_sec = 0;
1272 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1273 }
1274
1275 /* initialize drop rate threshold in scaled fixed-point */
1276 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1277
1278 /* initialize drop rate to fraction table */
1279 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
1280 if (fv->fv_p2ftab == NULL) {
1281 free(fv->fv_fves, M_DEVBUF);
1282 free(fv, M_DEVBUF);
1283 return (NULL);
1284 }
1285 /*
1286 * create the p2f table.
1287 * (shift is used to keep the precision)
1288 */
1289 for (i = 1; i < BRTT_SIZE; i++) {
1290 int f;
1291
1292 f = brtt_tab[i] << 8;
1293 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1294 }
1295
1296 return (fv);
1297 }
1298
1299 static void
1300 fv_destroy(struct flowvalve *fv)
1301 {
1302 free(fv->fv_p2ftab, M_DEVBUF);
1303 free(fv->fv_fves, M_DEVBUF);
1304 free(fv, M_DEVBUF);
1305 }
1306
1307 static inline int
1308 fv_p2f(struct flowvalve *fv, int p)
1309 {
1310 int val, f;
1311
1312 if (p >= BRTT_PMAX)
1313 f = fv->fv_p2ftab[BRTT_SIZE-1];
1314 else if ((val = (p & BRTT_MASK)))
1315 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1316 else
1317 f = fv->fv_p2ftab[1];
1318 return (f);
1319 }
1320
1321 /*
1322 * check if an arriving packet should be pre-dropped.
1323 * called from red_addq() when a packet arrives.
1324 * returns 1 when the packet should be pre-dropped.
1325 * should be called in splnet.
1326 */
1327 static int
1328 fv_checkflow(struct flowvalve *fv, struct altq_pktattr *pktattr,
1329 struct fve **fcache)
1330 {
1331 struct fve *fve;
1332 struct timeval now;
1333
1334 fv->fv_ifseq++;
1335 FV_TIMESTAMP(&now);
1336
1337 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1338 /* no matching entry in the flowlist */
1339 return (0);
1340
1341 *fcache = fve;
1342
1343 /* update fraction f for every FV_N packets */
1344 if (++fve->fve_count == FV_N) {
1345 /*
1346 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1347 */
1348 fve->fve_f =
1349 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1350 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1351 fve->fve_ifseq = fv->fv_ifseq;
1352 fve->fve_count = 0;
1353 }
1354
1355 /*
1356 * overpumping test
1357 */
1358 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1359 int fthresh;
1360
1361 /* calculate a threshold */
1362 fthresh = fv_p2f(fv, fve->fve_p);
1363 if (fve->fve_f > fthresh)
1364 fve->fve_state = Red;
1365 }
1366
1367 if (fve->fve_state == Red) {
1368 /*
1369 * backoff test
1370 */
1371 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1372 /* no drop for at least FV_BACKOFFTHRESH sec */
1373 fve->fve_p = 0;
1374 fve->fve_state = Green;
1375 #ifdef FV_STATS
1376 fv->fv_stats.escape++;
1377 #endif
1378 } else {
1379 /* block this flow */
1380 flowlist_move_to_head(fv, fve);
1381 fve->fve_lastdrop = now;
1382 #ifdef FV_STATS
1383 fv->fv_stats.predrop++;
1384 #endif
1385 return (1);
1386 }
1387 }
1388
1389 /*
1390 * p = (1 - Wp) * p
1391 */
1392 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1393 if (fve->fve_p < 0)
1394 fve->fve_p = 0;
1395 #ifdef FV_STATS
1396 fv->fv_stats.pass++;
1397 #endif
1398 return (0);
1399 }
1400
1401 /*
1402 * called from red_addq when a packet is dropped by red.
1403 * should be called in splnet.
1404 */
1405 static void
1406 fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *pktattr,
1407 struct fve *fcache)
1408 {
1409 struct fve *fve;
1410 struct timeval now;
1411
1412 if (pktattr == NULL)
1413 return;
1414 FV_TIMESTAMP(&now);
1415
1416 if (fcache != NULL)
1417 /* the fve of this packet is already cached */
1418 fve = fcache;
1419 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1420 fve = flowlist_reclaim(fv, pktattr);
1421
1422 flowlist_move_to_head(fv, fve);
1423
1424 /*
1425 * update p: the following line cancels the update
1426 * in fv_checkflow() and calculate
1427 * p = Wp + (1 - Wp) * p
1428 */
1429 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1430
1431 fve->fve_lastdrop = now;
1432 }
1433
1434 #endif /* ALTQ_FLOWVALVE */
1435
1436 #ifdef KLD_MODULE
1437
1438 static struct altqsw red_sw =
1439 {"red", redopen, redclose, redioctl};
1440
1441 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1442 MODULE_VERSION(altq_red, 1);
1443
1444 #endif /* KLD_MODULE */
1445 #endif /* ALTQ3_COMPAT */
1446
1447 #endif /* ALTQ_RED */
1448