altq_red.c revision 1.13.12.3 1 /* $NetBSD: altq_red.c,v 1.13.12.3 2006/06/09 19:52:35 peter Exp $ */
2 /* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.13.12.3 2006/06/09 19:52:35 peter Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_altq.h"
68 #include "opt_inet.h"
69 #endif
70
71 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
72
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/systm.h>
78 #include <sys/errno.h>
79 #include <sys/kauth.h>
80 #if 1 /* ALTQ3_COMPAT */
81 #include <sys/sockio.h>
82 #include <sys/proc.h>
83 #include <sys/kernel.h>
84 #ifdef ALTQ_FLOWVALVE
85 #include <sys/queue.h>
86 #include <sys/time.h>
87 #endif
88 #endif /* ALTQ3_COMPAT */
89
90 #include <net/if.h>
91
92 #include <netinet/in.h>
93 #include <netinet/in_systm.h>
94 #include <netinet/ip.h>
95 #ifdef INET6
96 #include <netinet/ip6.h>
97 #endif
98
99 #include <net/pfvar.h>
100 #include <altq/altq.h>
101 #include <altq/altq_red.h>
102 #ifdef ALTQ3_COMPAT
103 #include <altq/altq_conf.h>
104 #ifdef ALTQ_FLOWVALVE
105 #include <altq/altq_flowvalve.h>
106 #endif
107 #endif
108
109 /*
110 * ALTQ/RED (Random Early Detection) implementation using 32-bit
111 * fixed-point calculation.
112 *
113 * written by kjc using the ns code as a reference.
114 * you can learn more about red and ns from Sally's home page at
115 * http://www-nrg.ee.lbl.gov/floyd/
116 *
117 * most of the red parameter values are fixed in this implementation
118 * to prevent fixed-point overflow/underflow.
119 * if you change the parameters, watch out for overflow/underflow!
120 *
121 * the parameters used are recommended values by Sally.
122 * the corresponding ns config looks:
123 * q_weight=0.00195
124 * minthresh=5 maxthresh=15 queue-size=60
125 * linterm=30
126 * dropmech=drop-tail
127 * bytes=false (can't be handled by 32-bit fixed-point)
128 * doubleq=false dqthresh=false
129 * wait=true
130 */
131 /*
132 * alternative red parameters for a slow link.
133 *
134 * assume the queue length becomes from zero to L and keeps L, it takes
135 * N packets for q_avg to reach 63% of L.
136 * when q_weight is 0.002, N is about 500 packets.
137 * for a slow link like dial-up, 500 packets takes more than 1 minute!
138 * when q_weight is 0.008, N is about 127 packets.
139 * when q_weight is 0.016, N is about 63 packets.
140 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
141 * are allowed for 0.016.
142 * see Sally's paper for more details.
143 */
144 /* normal red parameters */
145 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
146 /* q_weight = 0.00195 */
147
148 /* red parameters for a slow link */
149 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
150 /* q_weight = 0.0078125 */
151
152 /* red parameters for a very slow link (e.g., dialup) */
153 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
154 /* q_weight = 0.015625 */
155
156 /* fixed-point uses 12-bit decimal places */
157 #define FP_SHIFT 12 /* fixed-point shift */
158
159 /* red parameters for drop probability */
160 #define INV_P_MAX 10 /* inverse of max drop probability */
161 #define TH_MIN 5 /* min threshold */
162 #define TH_MAX 15 /* max threshold */
163
164 #define RED_LIMIT 60 /* default max queue lenght */
165 #define RED_STATS /* collect statistics */
166
167 /*
168 * our default policy for forced-drop is drop-tail.
169 * (in altq-1.1.2 or earlier, the default was random-drop.
170 * but it makes more sense to punish the cause of the surge.)
171 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
172 */
173
174 #ifdef ALTQ3_COMPAT
175 #ifdef ALTQ_FLOWVALVE
176 /*
177 * flow-valve is an extention to protect red from unresponsive flows
178 * and to promote end-to-end congestion control.
179 * flow-valve observes the average drop rates of the flows that have
180 * experienced packet drops in the recent past.
181 * when the average drop rate exceeds the threshold, the flow is
182 * blocked by the flow-valve. the trapped flow should back off
183 * exponentially to escape from the flow-valve.
184 */
185 #ifdef RED_RANDOM_DROP
186 #error "random-drop can't be used with flow-valve!"
187 #endif
188 #endif /* ALTQ_FLOWVALVE */
189
190 /* red_list keeps all red_queue_t's allocated. */
191 static red_queue_t *red_list = NULL;
192
193 #endif /* ALTQ3_COMPAT */
194
195 /* default red parameter values */
196 static int default_th_min = TH_MIN;
197 static int default_th_max = TH_MAX;
198 static int default_inv_pmax = INV_P_MAX;
199
200 #ifdef ALTQ3_COMPAT
201 /* internal function prototypes */
202 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
203 static struct mbuf *red_dequeue(struct ifaltq *, int);
204 static int red_request(struct ifaltq *, int, void *);
205 static void red_purgeq(red_queue_t *);
206 static int red_detach(red_queue_t *);
207 #ifdef ALTQ_FLOWVALVE
208 static inline struct fve *flowlist_lookup(struct flowvalve *,
209 struct altq_pktattr *, struct timeval *);
210 static inline struct fve *flowlist_reclaim(struct flowvalve *,
211 struct altq_pktattr *);
212 static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
213 static inline int fv_p2f(struct flowvalve *, int);
214 static struct flowvalve *fv_alloc(struct red *);
215 static void fv_destroy(struct flowvalve *);
216 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
217 struct fve **);
218 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
219 struct fve *);
220 #endif
221 #endif /* ALTQ3_COMPAT */
222
223 /*
224 * red support routines
225 */
226 red_t *
227 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
228 int pkttime)
229 {
230 red_t *rp;
231 int w, i;
232 int npkts_per_sec;
233
234 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
235 if (rp == NULL)
236 return (NULL);
237
238 rp->red_avg = 0;
239 rp->red_idle = 1;
240
241 if (weight == 0)
242 rp->red_weight = W_WEIGHT;
243 else
244 rp->red_weight = weight;
245 if (inv_pmax == 0)
246 rp->red_inv_pmax = default_inv_pmax;
247 else
248 rp->red_inv_pmax = inv_pmax;
249 if (th_min == 0)
250 rp->red_thmin = default_th_min;
251 else
252 rp->red_thmin = th_min;
253 if (th_max == 0)
254 rp->red_thmax = default_th_max;
255 else
256 rp->red_thmax = th_max;
257
258 rp->red_flags = flags;
259
260 if (pkttime == 0)
261 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
262 rp->red_pkttime = 800;
263 else
264 rp->red_pkttime = pkttime;
265
266 if (weight == 0) {
267 /* when the link is very slow, adjust red parameters */
268 npkts_per_sec = 1000000 / rp->red_pkttime;
269 if (npkts_per_sec < 50) {
270 /* up to about 400Kbps */
271 rp->red_weight = W_WEIGHT_2;
272 } else if (npkts_per_sec < 300) {
273 /* up to about 2.4Mbps */
274 rp->red_weight = W_WEIGHT_1;
275 }
276 }
277
278 /* calculate wshift. weight must be power of 2 */
279 w = rp->red_weight;
280 for (i = 0; w > 1; i++)
281 w = w >> 1;
282 rp->red_wshift = i;
283 w = 1 << rp->red_wshift;
284 if (w != rp->red_weight) {
285 printf("invalid weight value %d for red! use %d\n",
286 rp->red_weight, w);
287 rp->red_weight = w;
288 }
289
290 /*
291 * thmin_s and thmax_s are scaled versions of th_min and th_max
292 * to be compared with avg.
293 */
294 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
295 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
296
297 /*
298 * precompute probability denominator
299 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
300 */
301 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
302 * rp->red_inv_pmax) << FP_SHIFT;
303
304 /* allocate weight table */
305 rp->red_wtab = wtab_alloc(rp->red_weight);
306
307 microtime(&rp->red_last);
308 #ifdef ALTQ3_COMPAT
309 #ifdef ALTQ_FLOWVALVE
310 if (flags & REDF_FLOWVALVE)
311 rp->red_flowvalve = fv_alloc(rp);
312 /* if fv_alloc failes, flowvalve is just disabled */
313 #endif
314 #endif /* ALTQ3_COMPAT */
315 return (rp);
316 }
317
318 void
319 red_destroy(red_t *rp)
320 {
321 #ifdef ALTQ3_COMPAT
322 #ifdef ALTQ_FLOWVALVE
323 if (rp->red_flowvalve != NULL)
324 fv_destroy(rp->red_flowvalve);
325 #endif
326 #endif /* ALTQ3_COMPAT */
327 wtab_destroy(rp->red_wtab);
328 free(rp, M_DEVBUF);
329 }
330
331 void
332 red_getstats(red_t *rp, struct redstats *sp)
333 {
334 sp->q_avg = rp->red_avg >> rp->red_wshift;
335 sp->xmit_cnt = rp->red_stats.xmit_cnt;
336 sp->drop_cnt = rp->red_stats.drop_cnt;
337 sp->drop_forced = rp->red_stats.drop_forced;
338 sp->drop_unforced = rp->red_stats.drop_unforced;
339 sp->marked_packets = rp->red_stats.marked_packets;
340 }
341
342 int
343 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
344 struct altq_pktattr *pktattr)
345 {
346 int avg, droptype;
347 int n;
348 #ifdef ALTQ3_COMPAT
349 #ifdef ALTQ_FLOWVALVE
350 struct fve *fve = NULL;
351
352 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
353 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
354 m_freem(m);
355 return (-1);
356 }
357 #endif
358 #endif /* ALTQ3_COMPAT */
359
360 avg = rp->red_avg;
361
362 /*
363 * if we were idle, we pretend that n packets arrived during
364 * the idle period.
365 */
366 if (rp->red_idle) {
367 struct timeval now;
368 int t;
369
370 rp->red_idle = 0;
371 microtime(&now);
372 t = (now.tv_sec - rp->red_last.tv_sec);
373 if (t > 60) {
374 /*
375 * being idle for more than 1 minute, set avg to zero.
376 * this prevents t from overflow.
377 */
378 avg = 0;
379 } else {
380 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
381 n = t / rp->red_pkttime - 1;
382
383 /* the following line does (avg = (1 - Wq)^n * avg) */
384 if (n > 0)
385 avg = (avg >> FP_SHIFT) *
386 pow_w(rp->red_wtab, n);
387 }
388 }
389
390 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
391 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
392 rp->red_avg = avg; /* save the new value */
393
394 /*
395 * red_count keeps a tally of arriving traffic that has not
396 * been dropped.
397 */
398 rp->red_count++;
399
400 /* see if we drop early */
401 droptype = DTYPE_NODROP;
402 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
403 if (avg >= rp->red_thmax_s) {
404 /* avg >= th_max: forced drop */
405 droptype = DTYPE_FORCED;
406 } else if (rp->red_old == 0) {
407 /* first exceeds th_min */
408 rp->red_count = 1;
409 rp->red_old = 1;
410 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
411 rp->red_probd, rp->red_count)) {
412 /* mark or drop by red */
413 if ((rp->red_flags & REDF_ECN) &&
414 mark_ecn(m, pktattr, rp->red_flags)) {
415 /* successfully marked. do not drop. */
416 rp->red_count = 0;
417 #ifdef RED_STATS
418 rp->red_stats.marked_packets++;
419 #endif
420 } else {
421 /* unforced drop by red */
422 droptype = DTYPE_EARLY;
423 }
424 }
425 } else {
426 /* avg < th_min */
427 rp->red_old = 0;
428 }
429
430 /*
431 * if the queue length hits the hard limit, it's a forced drop.
432 */
433 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
434 droptype = DTYPE_FORCED;
435
436 #ifdef RED_RANDOM_DROP
437 /* if successful or forced drop, enqueue this packet. */
438 if (droptype != DTYPE_EARLY)
439 _addq(q, m);
440 #else
441 /* if successful, enqueue this packet. */
442 if (droptype == DTYPE_NODROP)
443 _addq(q, m);
444 #endif
445 if (droptype != DTYPE_NODROP) {
446 if (droptype == DTYPE_EARLY) {
447 /* drop the incoming packet */
448 #ifdef RED_STATS
449 rp->red_stats.drop_unforced++;
450 #endif
451 } else {
452 /* forced drop, select a victim packet in the queue. */
453 #ifdef RED_RANDOM_DROP
454 m = _getq_random(q);
455 #endif
456 #ifdef RED_STATS
457 rp->red_stats.drop_forced++;
458 #endif
459 }
460 #ifdef RED_STATS
461 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
462 #endif
463 rp->red_count = 0;
464 #ifdef ALTQ3_COMPAT
465 #ifdef ALTQ_FLOWVALVE
466 if (rp->red_flowvalve != NULL)
467 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
468 #endif
469 #endif /* ALTQ3_COMPAT */
470 m_freem(m);
471 return (-1);
472 }
473 /* successfully queued */
474 #ifdef RED_STATS
475 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
476 #endif
477 return (0);
478 }
479
480 /*
481 * early-drop probability is calculated as follows:
482 * prob = p_max * (avg - th_min) / (th_max - th_min)
483 * prob_a = prob / (2 - count*prob)
484 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
485 * here prob_a increases as successive undrop count increases.
486 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
487 * becomes 1 when (count >= (2 / prob))).
488 */
489 int
490 drop_early(int fp_len, int fp_probd, int count)
491 {
492 int d; /* denominator of drop-probability */
493
494 d = fp_probd - count * fp_len;
495 if (d <= 0)
496 /* count exceeds the hard limit: drop or mark */
497 return (1);
498
499 /*
500 * now the range of d is [1..600] in fixed-point. (when
501 * th_max-th_min=10 and p_max=1/30)
502 * drop probability = (avg - TH_MIN) / d
503 */
504
505 if ((arc4random() % d) < fp_len) {
506 /* drop or mark */
507 return (1);
508 }
509 /* no drop/mark */
510 return (0);
511 }
512
513 /*
514 * try to mark CE bit to the packet.
515 * returns 1 if successfully marked, 0 otherwise.
516 */
517 int
518 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
519 {
520 struct mbuf *m0;
521 struct m_tag *t;
522 struct altq_tag *at;
523 void *hdr;
524 int af;
525
526 t = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
527 if (t != NULL) {
528 at = (struct altq_tag *)(t + 1);
529 if (at == NULL)
530 return (0);
531 af = at->af;
532 hdr = at->hdr;
533 #ifdef ALTQ3_COMPAT
534 } else if (pktattr != NULL) {
535 af = pktattr->pattr_af;
536 hdr = pktattr->pattr_hdr;
537 #endif /* ALTQ3_COMPAT */
538 } else
539 return (0);
540
541 if (af != AF_INET && af != AF_INET6)
542 return (0);
543
544 /* verify that pattr_hdr is within the mbuf data */
545 for (m0 = m; m0 != NULL; m0 = m0->m_next)
546 if (((caddr_t)hdr >= m0->m_data) &&
547 ((caddr_t)hdr < m0->m_data + m0->m_len))
548 break;
549 if (m0 == NULL) {
550 /* ick, tag info is stale */
551 return (0);
552 }
553
554 switch (af) {
555 case AF_INET:
556 if (flags & REDF_ECN4) {
557 struct ip *ip = hdr;
558 u_int8_t otos;
559 int sum;
560
561 if (ip->ip_v != 4)
562 return (0); /* version mismatch! */
563
564 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
565 return (0); /* not-ECT */
566 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
567 return (1); /* already marked */
568
569 /*
570 * ecn-capable but not marked,
571 * mark CE and update checksum
572 */
573 otos = ip->ip_tos;
574 ip->ip_tos |= IPTOS_ECN_CE;
575 /*
576 * update checksum (from RFC1624)
577 * HC' = ~(~HC + ~m + m')
578 */
579 sum = ~ntohs(ip->ip_sum) & 0xffff;
580 sum += (~otos & 0xffff) + ip->ip_tos;
581 sum = (sum >> 16) + (sum & 0xffff);
582 sum += (sum >> 16); /* add carry */
583 ip->ip_sum = htons(~sum & 0xffff);
584 return (1);
585 }
586 break;
587 #ifdef INET6
588 case AF_INET6:
589 if (flags & REDF_ECN6) {
590 struct ip6_hdr *ip6 = hdr;
591 u_int32_t flowlabel;
592
593 flowlabel = ntohl(ip6->ip6_flow);
594 if ((flowlabel >> 28) != 6)
595 return (0); /* version mismatch! */
596 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
597 (IPTOS_ECN_NOTECT << 20))
598 return (0); /* not-ECT */
599 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
600 (IPTOS_ECN_CE << 20))
601 return (1); /* already marked */
602 /*
603 * ecn-capable but not marked, mark CE
604 */
605 flowlabel |= (IPTOS_ECN_CE << 20);
606 ip6->ip6_flow = htonl(flowlabel);
607 return (1);
608 }
609 break;
610 #endif /* INET6 */
611 }
612
613 /* not marked */
614 return (0);
615 }
616
617 struct mbuf *
618 red_getq(rp, q)
619 red_t *rp;
620 class_queue_t *q;
621 {
622 struct mbuf *m;
623
624 if ((m = _getq(q)) == NULL) {
625 if (rp->red_idle == 0) {
626 rp->red_idle = 1;
627 microtime(&rp->red_last);
628 }
629 return NULL;
630 }
631
632 rp->red_idle = 0;
633 return (m);
634 }
635
636 /*
637 * helper routine to calibrate avg during idle.
638 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
639 * here Wq = 1/weight and the code assumes Wq is close to zero.
640 *
641 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
642 */
643 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
644
645 struct wtab *
646 wtab_alloc(int weight)
647 {
648 struct wtab *w;
649 int i;
650
651 for (w = wtab_list; w != NULL; w = w->w_next)
652 if (w->w_weight == weight) {
653 w->w_refcount++;
654 return (w);
655 }
656
657 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
658 if (w == NULL)
659 panic("wtab_alloc: malloc failed!");
660 w->w_weight = weight;
661 w->w_refcount = 1;
662 w->w_next = wtab_list;
663 wtab_list = w;
664
665 /* initialize the weight table */
666 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
667 for (i = 1; i < 32; i++) {
668 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
669 if (w->w_tab[i] == 0 && w->w_param_max == 0)
670 w->w_param_max = 1 << i;
671 }
672
673 return (w);
674 }
675
676 int
677 wtab_destroy(struct wtab *w)
678 {
679 struct wtab *prev;
680
681 if (--w->w_refcount > 0)
682 return (0);
683
684 if (wtab_list == w)
685 wtab_list = w->w_next;
686 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
687 if (prev->w_next == w) {
688 prev->w_next = w->w_next;
689 break;
690 }
691
692 free(w, M_DEVBUF);
693 return (0);
694 }
695
696 int32_t
697 pow_w(struct wtab *w, int n)
698 {
699 int i, bit;
700 int32_t val;
701
702 if (n >= w->w_param_max)
703 return (0);
704
705 val = 1 << FP_SHIFT;
706 if (n <= 0)
707 return (val);
708
709 bit = 1;
710 i = 0;
711 while (n) {
712 if (n & bit) {
713 val = (val * w->w_tab[i]) >> FP_SHIFT;
714 n &= ~bit;
715 }
716 i++;
717 bit <<= 1;
718 }
719 return (val);
720 }
721
722 #ifdef ALTQ3_COMPAT
723 /*
724 * red device interface
725 */
726 altqdev_decl(red);
727
728 int
729 redopen(dev, flag, fmt, l)
730 dev_t dev;
731 int flag, fmt;
732 struct lwp *l;
733 {
734 /* everything will be done when the queueing scheme is attached. */
735 return 0;
736 }
737
738 int
739 redclose(dev, flag, fmt, l)
740 dev_t dev;
741 int flag, fmt;
742 struct lwp *l;
743 {
744 red_queue_t *rqp;
745 int err, error = 0;
746
747 while ((rqp = red_list) != NULL) {
748 /* destroy all */
749 err = red_detach(rqp);
750 if (err != 0 && error == 0)
751 error = err;
752 }
753
754 return error;
755 }
756
757 int
758 redioctl(dev, cmd, addr, flag, l)
759 dev_t dev;
760 ioctlcmd_t cmd;
761 caddr_t addr;
762 int flag;
763 struct lwp *l;
764 {
765 red_queue_t *rqp;
766 struct red_interface *ifacep;
767 struct ifnet *ifp;
768 struct proc *p = l->l_proc;
769 int error = 0;
770
771 /* check super-user privilege */
772 switch (cmd) {
773 case RED_GETSTATS:
774 break;
775 default:
776 #if (__FreeBSD_version > 400000)
777 if ((error = suser(p)) != 0)
778 #else
779 if ((error = kauth_authorize_generic(p->p_cred,
780 KAUTH_GENERIC_ISSUSER, &p->p_acflag)) != 0)
781 #endif
782 return (error);
783 break;
784 }
785
786 switch (cmd) {
787
788 case RED_ENABLE:
789 ifacep = (struct red_interface *)addr;
790 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
791 error = EBADF;
792 break;
793 }
794 error = altq_enable(rqp->rq_ifq);
795 break;
796
797 case RED_DISABLE:
798 ifacep = (struct red_interface *)addr;
799 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
800 error = EBADF;
801 break;
802 }
803 error = altq_disable(rqp->rq_ifq);
804 break;
805
806 case RED_IF_ATTACH:
807 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
808 if (ifp == NULL) {
809 error = ENXIO;
810 break;
811 }
812
813 /* allocate and initialize red_queue_t */
814 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
815 if (rqp == NULL) {
816 error = ENOMEM;
817 break;
818 }
819
820 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
821 M_WAITOK|M_ZERO);
822 if (rqp->rq_q == NULL) {
823 free(rqp, M_DEVBUF);
824 error = ENOMEM;
825 break;
826 }
827
828 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
829 if (rqp->rq_red == NULL) {
830 free(rqp->rq_q, M_DEVBUF);
831 free(rqp, M_DEVBUF);
832 error = ENOMEM;
833 break;
834 }
835
836 rqp->rq_ifq = &ifp->if_snd;
837 qtail(rqp->rq_q) = NULL;
838 qlen(rqp->rq_q) = 0;
839 qlimit(rqp->rq_q) = RED_LIMIT;
840 qtype(rqp->rq_q) = Q_RED;
841
842 /*
843 * set RED to this ifnet structure.
844 */
845 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
846 red_enqueue, red_dequeue, red_request,
847 NULL, NULL);
848 if (error) {
849 red_destroy(rqp->rq_red);
850 free(rqp->rq_q, M_DEVBUF);
851 free(rqp, M_DEVBUF);
852 break;
853 }
854
855 /* add this state to the red list */
856 rqp->rq_next = red_list;
857 red_list = rqp;
858 break;
859
860 case RED_IF_DETACH:
861 ifacep = (struct red_interface *)addr;
862 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
863 error = EBADF;
864 break;
865 }
866 error = red_detach(rqp);
867 break;
868
869 case RED_GETSTATS:
870 do {
871 struct red_stats *q_stats;
872 red_t *rp;
873
874 q_stats = (struct red_stats *)addr;
875 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
876 ALTQT_RED)) == NULL) {
877 error = EBADF;
878 break;
879 }
880
881 q_stats->q_len = qlen(rqp->rq_q);
882 q_stats->q_limit = qlimit(rqp->rq_q);
883
884 rp = rqp->rq_red;
885 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
886 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
887 q_stats->drop_cnt = rp->red_stats.drop_cnt;
888 q_stats->drop_forced = rp->red_stats.drop_forced;
889 q_stats->drop_unforced = rp->red_stats.drop_unforced;
890 q_stats->marked_packets = rp->red_stats.marked_packets;
891
892 q_stats->weight = rp->red_weight;
893 q_stats->inv_pmax = rp->red_inv_pmax;
894 q_stats->th_min = rp->red_thmin;
895 q_stats->th_max = rp->red_thmax;
896
897 #ifdef ALTQ_FLOWVALVE
898 if (rp->red_flowvalve != NULL) {
899 struct flowvalve *fv = rp->red_flowvalve;
900 q_stats->fv_flows = fv->fv_flows;
901 q_stats->fv_pass = fv->fv_stats.pass;
902 q_stats->fv_predrop = fv->fv_stats.predrop;
903 q_stats->fv_alloc = fv->fv_stats.alloc;
904 q_stats->fv_escape = fv->fv_stats.escape;
905 } else {
906 #endif /* ALTQ_FLOWVALVE */
907 q_stats->fv_flows = 0;
908 q_stats->fv_pass = 0;
909 q_stats->fv_predrop = 0;
910 q_stats->fv_alloc = 0;
911 q_stats->fv_escape = 0;
912 #ifdef ALTQ_FLOWVALVE
913 }
914 #endif /* ALTQ_FLOWVALVE */
915 } while (/*CONSTCOND*/ 0);
916 break;
917
918 case RED_CONFIG:
919 do {
920 struct red_conf *fc;
921 red_t *new;
922 int s, limit;
923
924 fc = (struct red_conf *)addr;
925 if ((rqp = altq_lookup(fc->iface.red_ifname,
926 ALTQT_RED)) == NULL) {
927 error = EBADF;
928 break;
929 }
930 new = red_alloc(fc->red_weight,
931 fc->red_inv_pmax,
932 fc->red_thmin,
933 fc->red_thmax,
934 fc->red_flags,
935 fc->red_pkttime);
936 if (new == NULL) {
937 error = ENOMEM;
938 break;
939 }
940
941 s = splnet();
942 red_purgeq(rqp);
943 limit = fc->red_limit;
944 if (limit < fc->red_thmax)
945 limit = fc->red_thmax;
946 qlimit(rqp->rq_q) = limit;
947 fc->red_limit = limit; /* write back the new value */
948
949 red_destroy(rqp->rq_red);
950 rqp->rq_red = new;
951
952 splx(s);
953
954 /* write back new values */
955 fc->red_limit = limit;
956 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
957 fc->red_thmin = rqp->rq_red->red_thmin;
958 fc->red_thmax = rqp->rq_red->red_thmax;
959
960 } while (/*CONSTCOND*/ 0);
961 break;
962
963 case RED_SETDEFAULTS:
964 do {
965 struct redparams *rp;
966
967 rp = (struct redparams *)addr;
968
969 default_th_min = rp->th_min;
970 default_th_max = rp->th_max;
971 default_inv_pmax = rp->inv_pmax;
972 } while (/*CONSTCOND*/ 0);
973 break;
974
975 default:
976 error = EINVAL;
977 break;
978 }
979 return error;
980 }
981
982 static int
983 red_detach(rqp)
984 red_queue_t *rqp;
985 {
986 red_queue_t *tmp;
987 int error = 0;
988
989 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
990 altq_disable(rqp->rq_ifq);
991
992 if ((error = altq_detach(rqp->rq_ifq)))
993 return (error);
994
995 if (red_list == rqp)
996 red_list = rqp->rq_next;
997 else {
998 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
999 if (tmp->rq_next == rqp) {
1000 tmp->rq_next = rqp->rq_next;
1001 break;
1002 }
1003 if (tmp == NULL)
1004 printf("red_detach: no state found in red_list!\n");
1005 }
1006
1007 red_destroy(rqp->rq_red);
1008 free(rqp->rq_q, M_DEVBUF);
1009 free(rqp, M_DEVBUF);
1010 return (error);
1011 }
1012
1013 /*
1014 * enqueue routine:
1015 *
1016 * returns: 0 when successfully queued.
1017 * ENOBUFS when drop occurs.
1018 */
1019 static int
1020 red_enqueue(ifq, m, pktattr)
1021 struct ifaltq *ifq;
1022 struct mbuf *m;
1023 struct altq_pktattr *pktattr;
1024 {
1025 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1026
1027 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1028 return ENOBUFS;
1029 ifq->ifq_len++;
1030 return 0;
1031 }
1032
1033 /*
1034 * dequeue routine:
1035 * must be called in splnet.
1036 *
1037 * returns: mbuf dequeued.
1038 * NULL when no packet is available in the queue.
1039 */
1040
1041 static struct mbuf *
1042 red_dequeue(ifq, op)
1043 struct ifaltq *ifq;
1044 int op;
1045 {
1046 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1047 struct mbuf *m;
1048
1049 if (op == ALTDQ_POLL)
1050 return qhead(rqp->rq_q);
1051
1052 /* op == ALTDQ_REMOVE */
1053 m = red_getq(rqp->rq_red, rqp->rq_q);
1054 if (m != NULL)
1055 ifq->ifq_len--;
1056 return (m);
1057 }
1058
1059 static int
1060 red_request(ifq, req, arg)
1061 struct ifaltq *ifq;
1062 int req;
1063 void *arg;
1064 {
1065 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1066
1067 switch (req) {
1068 case ALTRQ_PURGE:
1069 red_purgeq(rqp);
1070 break;
1071 }
1072 return (0);
1073 }
1074
1075 static void
1076 red_purgeq(rqp)
1077 red_queue_t *rqp;
1078 {
1079 _flushq(rqp->rq_q);
1080 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1081 rqp->rq_ifq->ifq_len = 0;
1082 }
1083
1084 #ifdef ALTQ_FLOWVALVE
1085
1086 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1087 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1088 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1089 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1090 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1091 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1092
1093 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1094 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1095
1096 #define FV_N 10 /* update fve_f every FV_N packets */
1097
1098 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1099 #define FV_TTHRESH 3 /* time threshold to delete fve */
1100 #define FV_ALPHA 5 /* extra packet count */
1101
1102 #define FV_STATS
1103
1104 #if (__FreeBSD_version > 300000)
1105 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1106 #else
1107 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1108 #endif
1109
1110 /*
1111 * Brtt table: 127 entry table to convert drop rate (p) to
1112 * the corresponding bandwidth fraction (f)
1113 * the following equation is implemented to use scaled values,
1114 * fve_p and fve_f, in the fixed point format.
1115 *
1116 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1117 * f = Brtt(p) / (max_th + alpha)
1118 */
1119 #define BRTT_SIZE 128
1120 #define BRTT_SHIFT 12
1121 #define BRTT_MASK 0x0007f000
1122 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1123
1124 const int brtt_tab[BRTT_SIZE] = {
1125 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1126 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1127 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1128 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1129 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1130 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1131 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1132 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1133 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1134 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1135 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1136 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1137 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1138 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1139 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1140 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1141 };
1142
1143 static inline struct fve *
1144 flowlist_lookup(fv, pktattr, now)
1145 struct flowvalve *fv;
1146 struct altq_pktattr *pktattr;
1147 struct timeval *now;
1148 {
1149 struct fve *fve;
1150 int flows;
1151 struct ip *ip;
1152 #ifdef INET6
1153 struct ip6_hdr *ip6;
1154 #endif
1155 struct timeval tthresh;
1156
1157 if (pktattr == NULL)
1158 return (NULL);
1159
1160 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1161 flows = 0;
1162 /*
1163 * search the flow list
1164 */
1165 switch (pktattr->pattr_af) {
1166 case AF_INET:
1167 ip = (struct ip *)pktattr->pattr_hdr;
1168 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1169 if (fve->fve_lastdrop.tv_sec == 0)
1170 break;
1171 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1172 fve->fve_lastdrop.tv_sec = 0;
1173 break;
1174 }
1175 if (fve->fve_flow.flow_af == AF_INET &&
1176 fve->fve_flow.flow_ip.ip_src.s_addr ==
1177 ip->ip_src.s_addr &&
1178 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1179 ip->ip_dst.s_addr)
1180 return (fve);
1181 flows++;
1182 }
1183 break;
1184 #ifdef INET6
1185 case AF_INET6:
1186 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1187 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1188 if (fve->fve_lastdrop.tv_sec == 0)
1189 break;
1190 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1191 fve->fve_lastdrop.tv_sec = 0;
1192 break;
1193 }
1194 if (fve->fve_flow.flow_af == AF_INET6 &&
1195 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1196 &ip6->ip6_src) &&
1197 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1198 &ip6->ip6_dst))
1199 return (fve);
1200 flows++;
1201 }
1202 break;
1203 #endif /* INET6 */
1204
1205 default:
1206 /* unknown protocol. no drop. */
1207 return (NULL);
1208 }
1209 fv->fv_flows = flows; /* save the number of active fve's */
1210 return (NULL);
1211 }
1212
1213 static inline struct fve *
1214 flowlist_reclaim(fv, pktattr)
1215 struct flowvalve *fv;
1216 struct altq_pktattr *pktattr;
1217 {
1218 struct fve *fve;
1219 struct ip *ip;
1220 #ifdef INET6
1221 struct ip6_hdr *ip6;
1222 #endif
1223
1224 /*
1225 * get an entry from the tail of the LRU list.
1226 */
1227 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1228
1229 switch (pktattr->pattr_af) {
1230 case AF_INET:
1231 ip = (struct ip *)pktattr->pattr_hdr;
1232 fve->fve_flow.flow_af = AF_INET;
1233 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1234 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1235 break;
1236 #ifdef INET6
1237 case AF_INET6:
1238 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1239 fve->fve_flow.flow_af = AF_INET6;
1240 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1241 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1242 break;
1243 #endif
1244 }
1245
1246 fve->fve_state = Green;
1247 fve->fve_p = 0.0;
1248 fve->fve_f = 0.0;
1249 fve->fve_ifseq = fv->fv_ifseq - 1;
1250 fve->fve_count = 0;
1251
1252 fv->fv_flows++;
1253 #ifdef FV_STATS
1254 fv->fv_stats.alloc++;
1255 #endif
1256 return (fve);
1257 }
1258
1259 static inline void
1260 flowlist_move_to_head(fv, fve)
1261 struct flowvalve *fv;
1262 struct fve *fve;
1263 {
1264 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1265 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1266 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1267 }
1268 }
1269
1270 /*
1271 * allocate flowvalve structure
1272 */
1273 static struct flowvalve *
1274 fv_alloc(rp)
1275 struct red *rp;
1276 {
1277 struct flowvalve *fv;
1278 struct fve *fve;
1279 int i, num;
1280
1281 num = FV_FLOWLISTSIZE;
1282 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
1283 if (fv == NULL)
1284 return (NULL);
1285
1286 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
1287 M_WAITOK|M_ZERO);
1288 if (fv->fv_fves == NULL) {
1289 free(fv, M_DEVBUF);
1290 return (NULL);
1291 }
1292
1293 fv->fv_flows = 0;
1294 TAILQ_INIT(&fv->fv_flowlist);
1295 for (i = 0; i < num; i++) {
1296 fve = &fv->fv_fves[i];
1297 fve->fve_lastdrop.tv_sec = 0;
1298 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1299 }
1300
1301 /* initialize drop rate threshold in scaled fixed-point */
1302 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1303
1304 /* initialize drop rate to fraction table */
1305 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
1306 if (fv->fv_p2ftab == NULL) {
1307 free(fv->fv_fves, M_DEVBUF);
1308 free(fv, M_DEVBUF);
1309 return (NULL);
1310 }
1311 /*
1312 * create the p2f table.
1313 * (shift is used to keep the precision)
1314 */
1315 for (i = 1; i < BRTT_SIZE; i++) {
1316 int f;
1317
1318 f = brtt_tab[i] << 8;
1319 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1320 }
1321
1322 return (fv);
1323 }
1324
1325 static void fv_destroy(fv)
1326 struct flowvalve *fv;
1327 {
1328 free(fv->fv_p2ftab, M_DEVBUF);
1329 free(fv->fv_fves, M_DEVBUF);
1330 free(fv, M_DEVBUF);
1331 }
1332
1333 static inline int
1334 fv_p2f(fv, p)
1335 struct flowvalve *fv;
1336 int p;
1337 {
1338 int val, f;
1339
1340 if (p >= BRTT_PMAX)
1341 f = fv->fv_p2ftab[BRTT_SIZE-1];
1342 else if ((val = (p & BRTT_MASK)))
1343 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1344 else
1345 f = fv->fv_p2ftab[1];
1346 return (f);
1347 }
1348
1349 /*
1350 * check if an arriving packet should be pre-dropped.
1351 * called from red_addq() when a packet arrives.
1352 * returns 1 when the packet should be pre-dropped.
1353 * should be called in splnet.
1354 */
1355 static int
1356 fv_checkflow(fv, pktattr, fcache)
1357 struct flowvalve *fv;
1358 struct altq_pktattr *pktattr;
1359 struct fve **fcache;
1360 {
1361 struct fve *fve;
1362 struct timeval now;
1363
1364 fv->fv_ifseq++;
1365 FV_TIMESTAMP(&now);
1366
1367 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1368 /* no matching entry in the flowlist */
1369 return (0);
1370
1371 *fcache = fve;
1372
1373 /* update fraction f for every FV_N packets */
1374 if (++fve->fve_count == FV_N) {
1375 /*
1376 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1377 */
1378 fve->fve_f =
1379 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1380 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1381 fve->fve_ifseq = fv->fv_ifseq;
1382 fve->fve_count = 0;
1383 }
1384
1385 /*
1386 * overpumping test
1387 */
1388 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1389 int fthresh;
1390
1391 /* calculate a threshold */
1392 fthresh = fv_p2f(fv, fve->fve_p);
1393 if (fve->fve_f > fthresh)
1394 fve->fve_state = Red;
1395 }
1396
1397 if (fve->fve_state == Red) {
1398 /*
1399 * backoff test
1400 */
1401 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1402 /* no drop for at least FV_BACKOFFTHRESH sec */
1403 fve->fve_p = 0;
1404 fve->fve_state = Green;
1405 #ifdef FV_STATS
1406 fv->fv_stats.escape++;
1407 #endif
1408 } else {
1409 /* block this flow */
1410 flowlist_move_to_head(fv, fve);
1411 fve->fve_lastdrop = now;
1412 #ifdef FV_STATS
1413 fv->fv_stats.predrop++;
1414 #endif
1415 return (1);
1416 }
1417 }
1418
1419 /*
1420 * p = (1 - Wp) * p
1421 */
1422 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1423 if (fve->fve_p < 0)
1424 fve->fve_p = 0;
1425 #ifdef FV_STATS
1426 fv->fv_stats.pass++;
1427 #endif
1428 return (0);
1429 }
1430
1431 /*
1432 * called from red_addq when a packet is dropped by red.
1433 * should be called in splnet.
1434 */
1435 static void fv_dropbyred(fv, pktattr, fcache)
1436 struct flowvalve *fv;
1437 struct altq_pktattr *pktattr;
1438 struct fve *fcache;
1439 {
1440 struct fve *fve;
1441 struct timeval now;
1442
1443 if (pktattr == NULL)
1444 return;
1445 FV_TIMESTAMP(&now);
1446
1447 if (fcache != NULL)
1448 /* the fve of this packet is already cached */
1449 fve = fcache;
1450 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1451 fve = flowlist_reclaim(fv, pktattr);
1452
1453 flowlist_move_to_head(fv, fve);
1454
1455 /*
1456 * update p: the following line cancels the update
1457 * in fv_checkflow() and calculate
1458 * p = Wp + (1 - Wp) * p
1459 */
1460 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1461
1462 fve->fve_lastdrop = now;
1463 }
1464
1465 #endif /* ALTQ_FLOWVALVE */
1466
1467 #ifdef KLD_MODULE
1468
1469 static struct altqsw red_sw =
1470 {"red", redopen, redclose, redioctl};
1471
1472 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1473 MODULE_VERSION(altq_red, 1);
1474
1475 #endif /* KLD_MODULE */
1476 #endif /* ALTQ3_COMPAT */
1477
1478 #endif /* ALTQ_RED */
1479