altq_red.c revision 1.13.12.2 1 /* $NetBSD: altq_red.c,v 1.13.12.2 2006/03/18 18:18:53 peter Exp $ */
2 /* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.13.12.2 2006/03/18 18:18:53 peter Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_altq.h"
68 #include "opt_inet.h"
69 #endif
70
71 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
72
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/systm.h>
78 #include <sys/errno.h>
79 #if 1 /* ALTQ3_COMPAT */
80 #include <sys/sockio.h>
81 #include <sys/proc.h>
82 #include <sys/kernel.h>
83 #ifdef ALTQ_FLOWVALVE
84 #include <sys/queue.h>
85 #include <sys/time.h>
86 #endif
87 #endif /* ALTQ3_COMPAT */
88
89 #include <net/if.h>
90
91 #include <netinet/in.h>
92 #include <netinet/in_systm.h>
93 #include <netinet/ip.h>
94 #ifdef INET6
95 #include <netinet/ip6.h>
96 #endif
97
98 #include <net/pfvar.h>
99 #include <altq/altq.h>
100 #include <altq/altq_red.h>
101 #ifdef ALTQ3_COMPAT
102 #include <altq/altq_conf.h>
103 #ifdef ALTQ_FLOWVALVE
104 #include <altq/altq_flowvalve.h>
105 #endif
106 #endif
107
108 /*
109 * ALTQ/RED (Random Early Detection) implementation using 32-bit
110 * fixed-point calculation.
111 *
112 * written by kjc using the ns code as a reference.
113 * you can learn more about red and ns from Sally's home page at
114 * http://www-nrg.ee.lbl.gov/floyd/
115 *
116 * most of the red parameter values are fixed in this implementation
117 * to prevent fixed-point overflow/underflow.
118 * if you change the parameters, watch out for overflow/underflow!
119 *
120 * the parameters used are recommended values by Sally.
121 * the corresponding ns config looks:
122 * q_weight=0.00195
123 * minthresh=5 maxthresh=15 queue-size=60
124 * linterm=30
125 * dropmech=drop-tail
126 * bytes=false (can't be handled by 32-bit fixed-point)
127 * doubleq=false dqthresh=false
128 * wait=true
129 */
130 /*
131 * alternative red parameters for a slow link.
132 *
133 * assume the queue length becomes from zero to L and keeps L, it takes
134 * N packets for q_avg to reach 63% of L.
135 * when q_weight is 0.002, N is about 500 packets.
136 * for a slow link like dial-up, 500 packets takes more than 1 minute!
137 * when q_weight is 0.008, N is about 127 packets.
138 * when q_weight is 0.016, N is about 63 packets.
139 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
140 * are allowed for 0.016.
141 * see Sally's paper for more details.
142 */
143 /* normal red parameters */
144 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
145 /* q_weight = 0.00195 */
146
147 /* red parameters for a slow link */
148 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
149 /* q_weight = 0.0078125 */
150
151 /* red parameters for a very slow link (e.g., dialup) */
152 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
153 /* q_weight = 0.015625 */
154
155 /* fixed-point uses 12-bit decimal places */
156 #define FP_SHIFT 12 /* fixed-point shift */
157
158 /* red parameters for drop probability */
159 #define INV_P_MAX 10 /* inverse of max drop probability */
160 #define TH_MIN 5 /* min threshold */
161 #define TH_MAX 15 /* max threshold */
162
163 #define RED_LIMIT 60 /* default max queue lenght */
164 #define RED_STATS /* collect statistics */
165
166 /*
167 * our default policy for forced-drop is drop-tail.
168 * (in altq-1.1.2 or earlier, the default was random-drop.
169 * but it makes more sense to punish the cause of the surge.)
170 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
171 */
172
173 #ifdef ALTQ3_COMPAT
174 #ifdef ALTQ_FLOWVALVE
175 /*
176 * flow-valve is an extention to protect red from unresponsive flows
177 * and to promote end-to-end congestion control.
178 * flow-valve observes the average drop rates of the flows that have
179 * experienced packet drops in the recent past.
180 * when the average drop rate exceeds the threshold, the flow is
181 * blocked by the flow-valve. the trapped flow should back off
182 * exponentially to escape from the flow-valve.
183 */
184 #ifdef RED_RANDOM_DROP
185 #error "random-drop can't be used with flow-valve!"
186 #endif
187 #endif /* ALTQ_FLOWVALVE */
188
189 /* red_list keeps all red_queue_t's allocated. */
190 static red_queue_t *red_list = NULL;
191
192 #endif /* ALTQ3_COMPAT */
193
194 /* default red parameter values */
195 static int default_th_min = TH_MIN;
196 static int default_th_max = TH_MAX;
197 static int default_inv_pmax = INV_P_MAX;
198
199 #ifdef ALTQ3_COMPAT
200 /* internal function prototypes */
201 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
202 static struct mbuf *red_dequeue(struct ifaltq *, int);
203 static int red_request(struct ifaltq *, int, void *);
204 static void red_purgeq(red_queue_t *);
205 static int red_detach(red_queue_t *);
206 #ifdef ALTQ_FLOWVALVE
207 static inline struct fve *flowlist_lookup(struct flowvalve *,
208 struct altq_pktattr *, struct timeval *);
209 static inline struct fve *flowlist_reclaim(struct flowvalve *,
210 struct altq_pktattr *);
211 static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
212 static inline int fv_p2f(struct flowvalve *, int);
213 static struct flowvalve *fv_alloc(struct red *);
214 static void fv_destroy(struct flowvalve *);
215 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
216 struct fve **);
217 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
218 struct fve *);
219 #endif
220 #endif /* ALTQ3_COMPAT */
221
222 /*
223 * red support routines
224 */
225 red_t *
226 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
227 int pkttime)
228 {
229 red_t *rp;
230 int w, i;
231 int npkts_per_sec;
232
233 MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
234 if (rp == NULL)
235 return (NULL);
236 (void)memset(rp, 0, sizeof(red_t));
237
238 rp->red_avg = 0;
239 rp->red_idle = 1;
240
241 if (weight == 0)
242 rp->red_weight = W_WEIGHT;
243 else
244 rp->red_weight = weight;
245 if (inv_pmax == 0)
246 rp->red_inv_pmax = default_inv_pmax;
247 else
248 rp->red_inv_pmax = inv_pmax;
249 if (th_min == 0)
250 rp->red_thmin = default_th_min;
251 else
252 rp->red_thmin = th_min;
253 if (th_max == 0)
254 rp->red_thmax = default_th_max;
255 else
256 rp->red_thmax = th_max;
257
258 rp->red_flags = flags;
259
260 if (pkttime == 0)
261 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
262 rp->red_pkttime = 800;
263 else
264 rp->red_pkttime = pkttime;
265
266 if (weight == 0) {
267 /* when the link is very slow, adjust red parameters */
268 npkts_per_sec = 1000000 / rp->red_pkttime;
269 if (npkts_per_sec < 50) {
270 /* up to about 400Kbps */
271 rp->red_weight = W_WEIGHT_2;
272 } else if (npkts_per_sec < 300) {
273 /* up to about 2.4Mbps */
274 rp->red_weight = W_WEIGHT_1;
275 }
276 }
277
278 /* calculate wshift. weight must be power of 2 */
279 w = rp->red_weight;
280 for (i = 0; w > 1; i++)
281 w = w >> 1;
282 rp->red_wshift = i;
283 w = 1 << rp->red_wshift;
284 if (w != rp->red_weight) {
285 printf("invalid weight value %d for red! use %d\n",
286 rp->red_weight, w);
287 rp->red_weight = w;
288 }
289
290 /*
291 * thmin_s and thmax_s are scaled versions of th_min and th_max
292 * to be compared with avg.
293 */
294 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
295 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
296
297 /*
298 * precompute probability denominator
299 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
300 */
301 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
302 * rp->red_inv_pmax) << FP_SHIFT;
303
304 /* allocate weight table */
305 rp->red_wtab = wtab_alloc(rp->red_weight);
306
307 microtime(&rp->red_last);
308 #ifdef ALTQ3_COMPAT
309 #ifdef ALTQ_FLOWVALVE
310 if (flags & REDF_FLOWVALVE)
311 rp->red_flowvalve = fv_alloc(rp);
312 /* if fv_alloc failes, flowvalve is just disabled */
313 #endif
314 #endif /* ALTQ3_COMPAT */
315 return (rp);
316 }
317
318 void
319 red_destroy(red_t *rp)
320 {
321 #ifdef ALTQ3_COMPAT
322 #ifdef ALTQ_FLOWVALVE
323 if (rp->red_flowvalve != NULL)
324 fv_destroy(rp->red_flowvalve);
325 #endif
326 #endif /* ALTQ3_COMPAT */
327 wtab_destroy(rp->red_wtab);
328 FREE(rp, M_DEVBUF);
329 }
330
331 void
332 red_getstats(red_t *rp, struct redstats *sp)
333 {
334 sp->q_avg = rp->red_avg >> rp->red_wshift;
335 sp->xmit_cnt = rp->red_stats.xmit_cnt;
336 sp->drop_cnt = rp->red_stats.drop_cnt;
337 sp->drop_forced = rp->red_stats.drop_forced;
338 sp->drop_unforced = rp->red_stats.drop_unforced;
339 sp->marked_packets = rp->red_stats.marked_packets;
340 }
341
342 int
343 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
344 struct altq_pktattr *pktattr)
345 {
346 int avg, droptype;
347 int n;
348 #ifdef ALTQ3_COMPAT
349 #ifdef ALTQ_FLOWVALVE
350 struct fve *fve = NULL;
351
352 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
353 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
354 m_freem(m);
355 return (-1);
356 }
357 #endif
358 #endif /* ALTQ3_COMPAT */
359
360 avg = rp->red_avg;
361
362 /*
363 * if we were idle, we pretend that n packets arrived during
364 * the idle period.
365 */
366 if (rp->red_idle) {
367 struct timeval now;
368 int t;
369
370 rp->red_idle = 0;
371 microtime(&now);
372 t = (now.tv_sec - rp->red_last.tv_sec);
373 if (t > 60) {
374 /*
375 * being idle for more than 1 minute, set avg to zero.
376 * this prevents t from overflow.
377 */
378 avg = 0;
379 } else {
380 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
381 n = t / rp->red_pkttime - 1;
382
383 /* the following line does (avg = (1 - Wq)^n * avg) */
384 if (n > 0)
385 avg = (avg >> FP_SHIFT) *
386 pow_w(rp->red_wtab, n);
387 }
388 }
389
390 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
391 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
392 rp->red_avg = avg; /* save the new value */
393
394 /*
395 * red_count keeps a tally of arriving traffic that has not
396 * been dropped.
397 */
398 rp->red_count++;
399
400 /* see if we drop early */
401 droptype = DTYPE_NODROP;
402 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
403 if (avg >= rp->red_thmax_s) {
404 /* avg >= th_max: forced drop */
405 droptype = DTYPE_FORCED;
406 } else if (rp->red_old == 0) {
407 /* first exceeds th_min */
408 rp->red_count = 1;
409 rp->red_old = 1;
410 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
411 rp->red_probd, rp->red_count)) {
412 /* mark or drop by red */
413 if ((rp->red_flags & REDF_ECN) &&
414 mark_ecn(m, pktattr, rp->red_flags)) {
415 /* successfully marked. do not drop. */
416 rp->red_count = 0;
417 #ifdef RED_STATS
418 rp->red_stats.marked_packets++;
419 #endif
420 } else {
421 /* unforced drop by red */
422 droptype = DTYPE_EARLY;
423 }
424 }
425 } else {
426 /* avg < th_min */
427 rp->red_old = 0;
428 }
429
430 /*
431 * if the queue length hits the hard limit, it's a forced drop.
432 */
433 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
434 droptype = DTYPE_FORCED;
435
436 #ifdef RED_RANDOM_DROP
437 /* if successful or forced drop, enqueue this packet. */
438 if (droptype != DTYPE_EARLY)
439 _addq(q, m);
440 #else
441 /* if successful, enqueue this packet. */
442 if (droptype == DTYPE_NODROP)
443 _addq(q, m);
444 #endif
445 if (droptype != DTYPE_NODROP) {
446 if (droptype == DTYPE_EARLY) {
447 /* drop the incoming packet */
448 #ifdef RED_STATS
449 rp->red_stats.drop_unforced++;
450 #endif
451 } else {
452 /* forced drop, select a victim packet in the queue. */
453 #ifdef RED_RANDOM_DROP
454 m = _getq_random(q);
455 #endif
456 #ifdef RED_STATS
457 rp->red_stats.drop_forced++;
458 #endif
459 }
460 #ifdef RED_STATS
461 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
462 #endif
463 rp->red_count = 0;
464 #ifdef ALTQ3_COMPAT
465 #ifdef ALTQ_FLOWVALVE
466 if (rp->red_flowvalve != NULL)
467 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
468 #endif
469 #endif /* ALTQ3_COMPAT */
470 m_freem(m);
471 return (-1);
472 }
473 /* successfully queued */
474 #ifdef RED_STATS
475 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
476 #endif
477 return (0);
478 }
479
480 /*
481 * early-drop probability is calculated as follows:
482 * prob = p_max * (avg - th_min) / (th_max - th_min)
483 * prob_a = prob / (2 - count*prob)
484 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
485 * here prob_a increases as successive undrop count increases.
486 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
487 * becomes 1 when (count >= (2 / prob))).
488 */
489 int
490 drop_early(int fp_len, int fp_probd, int count)
491 {
492 int d; /* denominator of drop-probability */
493
494 d = fp_probd - count * fp_len;
495 if (d <= 0)
496 /* count exceeds the hard limit: drop or mark */
497 return (1);
498
499 /*
500 * now the range of d is [1..600] in fixed-point. (when
501 * th_max-th_min=10 and p_max=1/30)
502 * drop probability = (avg - TH_MIN) / d
503 */
504
505 if ((arc4random() % d) < fp_len) {
506 /* drop or mark */
507 return (1);
508 }
509 /* no drop/mark */
510 return (0);
511 }
512
513 /*
514 * try to mark CE bit to the packet.
515 * returns 1 if successfully marked, 0 otherwise.
516 */
517 int
518 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
519 {
520 struct mbuf *m0;
521 struct m_tag *t;
522 struct altq_tag *at;
523 void *hdr;
524 int af;
525
526 t = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
527 if (t != NULL) {
528 at = (struct altq_tag *)(t + 1);
529 if (at == NULL)
530 return (0);
531 af = at->af;
532 hdr = at->hdr;
533 #ifdef ALTQ3_COMPAT
534 } else if (pktattr != NULL) {
535 af = pktattr->pattr_af;
536 hdr = pktattr->pattr_hdr;
537 #endif /* ALTQ3_COMPAT */
538 } else
539 return (0);
540
541 if (af != AF_INET && af != AF_INET6)
542 return (0);
543
544 /* verify that pattr_hdr is within the mbuf data */
545 for (m0 = m; m0 != NULL; m0 = m0->m_next)
546 if (((caddr_t)hdr >= m0->m_data) &&
547 ((caddr_t)hdr < m0->m_data + m0->m_len))
548 break;
549 if (m0 == NULL) {
550 /* ick, tag info is stale */
551 return (0);
552 }
553
554 switch (af) {
555 case AF_INET:
556 if (flags & REDF_ECN4) {
557 struct ip *ip = hdr;
558 u_int8_t otos;
559 int sum;
560
561 if (ip->ip_v != 4)
562 return (0); /* version mismatch! */
563
564 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
565 return (0); /* not-ECT */
566 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
567 return (1); /* already marked */
568
569 /*
570 * ecn-capable but not marked,
571 * mark CE and update checksum
572 */
573 otos = ip->ip_tos;
574 ip->ip_tos |= IPTOS_ECN_CE;
575 /*
576 * update checksum (from RFC1624)
577 * HC' = ~(~HC + ~m + m')
578 */
579 sum = ~ntohs(ip->ip_sum) & 0xffff;
580 sum += (~otos & 0xffff) + ip->ip_tos;
581 sum = (sum >> 16) + (sum & 0xffff);
582 sum += (sum >> 16); /* add carry */
583 ip->ip_sum = htons(~sum & 0xffff);
584 return (1);
585 }
586 break;
587 #ifdef INET6
588 case AF_INET6:
589 if (flags & REDF_ECN6) {
590 struct ip6_hdr *ip6 = hdr;
591 u_int32_t flowlabel;
592
593 flowlabel = ntohl(ip6->ip6_flow);
594 if ((flowlabel >> 28) != 6)
595 return (0); /* version mismatch! */
596 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
597 (IPTOS_ECN_NOTECT << 20))
598 return (0); /* not-ECT */
599 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
600 (IPTOS_ECN_CE << 20))
601 return (1); /* already marked */
602 /*
603 * ecn-capable but not marked, mark CE
604 */
605 flowlabel |= (IPTOS_ECN_CE << 20);
606 ip6->ip6_flow = htonl(flowlabel);
607 return (1);
608 }
609 break;
610 #endif /* INET6 */
611 }
612
613 /* not marked */
614 return (0);
615 }
616
617 struct mbuf *
618 red_getq(rp, q)
619 red_t *rp;
620 class_queue_t *q;
621 {
622 struct mbuf *m;
623
624 if ((m = _getq(q)) == NULL) {
625 if (rp->red_idle == 0) {
626 rp->red_idle = 1;
627 microtime(&rp->red_last);
628 }
629 return NULL;
630 }
631
632 rp->red_idle = 0;
633 return (m);
634 }
635
636 /*
637 * helper routine to calibrate avg during idle.
638 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
639 * here Wq = 1/weight and the code assumes Wq is close to zero.
640 *
641 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
642 */
643 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
644
645 struct wtab *
646 wtab_alloc(int weight)
647 {
648 struct wtab *w;
649 int i;
650
651 for (w = wtab_list; w != NULL; w = w->w_next)
652 if (w->w_weight == weight) {
653 w->w_refcount++;
654 return (w);
655 }
656
657 MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
658 if (w == NULL)
659 panic("wtab_alloc: malloc failed!");
660 (void)memset(w, 0, sizeof(struct wtab));
661 w->w_weight = weight;
662 w->w_refcount = 1;
663 w->w_next = wtab_list;
664 wtab_list = w;
665
666 /* initialize the weight table */
667 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
668 for (i = 1; i < 32; i++) {
669 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
670 if (w->w_tab[i] == 0 && w->w_param_max == 0)
671 w->w_param_max = 1 << i;
672 }
673
674 return (w);
675 }
676
677 int
678 wtab_destroy(struct wtab *w)
679 {
680 struct wtab *prev;
681
682 if (--w->w_refcount > 0)
683 return (0);
684
685 if (wtab_list == w)
686 wtab_list = w->w_next;
687 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
688 if (prev->w_next == w) {
689 prev->w_next = w->w_next;
690 break;
691 }
692
693 FREE(w, M_DEVBUF);
694 return (0);
695 }
696
697 int32_t
698 pow_w(struct wtab *w, int n)
699 {
700 int i, bit;
701 int32_t val;
702
703 if (n >= w->w_param_max)
704 return (0);
705
706 val = 1 << FP_SHIFT;
707 if (n <= 0)
708 return (val);
709
710 bit = 1;
711 i = 0;
712 while (n) {
713 if (n & bit) {
714 val = (val * w->w_tab[i]) >> FP_SHIFT;
715 n &= ~bit;
716 }
717 i++;
718 bit <<= 1;
719 }
720 return (val);
721 }
722
723 #ifdef ALTQ3_COMPAT
724 /*
725 * red device interface
726 */
727 altqdev_decl(red);
728
729 int
730 redopen(dev, flag, fmt, l)
731 dev_t dev;
732 int flag, fmt;
733 struct lwp *l;
734 {
735 /* everything will be done when the queueing scheme is attached. */
736 return 0;
737 }
738
739 int
740 redclose(dev, flag, fmt, l)
741 dev_t dev;
742 int flag, fmt;
743 struct lwp *l;
744 {
745 red_queue_t *rqp;
746 int err, error = 0;
747
748 while ((rqp = red_list) != NULL) {
749 /* destroy all */
750 err = red_detach(rqp);
751 if (err != 0 && error == 0)
752 error = err;
753 }
754
755 return error;
756 }
757
758 int
759 redioctl(dev, cmd, addr, flag, l)
760 dev_t dev;
761 ioctlcmd_t cmd;
762 caddr_t addr;
763 int flag;
764 struct lwp *l;
765 {
766 red_queue_t *rqp;
767 struct red_interface *ifacep;
768 struct ifnet *ifp;
769 struct proc *p = l->l_proc;
770 int error = 0;
771
772 /* check super-user privilege */
773 switch (cmd) {
774 case RED_GETSTATS:
775 break;
776 default:
777 #if (__FreeBSD_version > 400000)
778 if ((error = suser(p)) != 0)
779 #else
780 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
781 #endif
782 return (error);
783 break;
784 }
785
786 switch (cmd) {
787
788 case RED_ENABLE:
789 ifacep = (struct red_interface *)addr;
790 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
791 error = EBADF;
792 break;
793 }
794 error = altq_enable(rqp->rq_ifq);
795 break;
796
797 case RED_DISABLE:
798 ifacep = (struct red_interface *)addr;
799 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
800 error = EBADF;
801 break;
802 }
803 error = altq_disable(rqp->rq_ifq);
804 break;
805
806 case RED_IF_ATTACH:
807 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
808 if (ifp == NULL) {
809 error = ENXIO;
810 break;
811 }
812
813 /* allocate and initialize red_queue_t */
814 MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
815 if (rqp == NULL) {
816 error = ENOMEM;
817 break;
818 }
819 (void)memset(rqp, 0, sizeof(red_queue_t));
820
821 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
822 M_DEVBUF, M_WAITOK);
823 if (rqp->rq_q == NULL) {
824 FREE(rqp, M_DEVBUF);
825 error = ENOMEM;
826 break;
827 }
828 (void)memset(rqp->rq_q, 0, sizeof(class_queue_t));
829
830 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
831 if (rqp->rq_red == NULL) {
832 FREE(rqp->rq_q, M_DEVBUF);
833 FREE(rqp, M_DEVBUF);
834 error = ENOMEM;
835 break;
836 }
837
838 rqp->rq_ifq = &ifp->if_snd;
839 qtail(rqp->rq_q) = NULL;
840 qlen(rqp->rq_q) = 0;
841 qlimit(rqp->rq_q) = RED_LIMIT;
842 qtype(rqp->rq_q) = Q_RED;
843
844 /*
845 * set RED to this ifnet structure.
846 */
847 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
848 red_enqueue, red_dequeue, red_request,
849 NULL, NULL);
850 if (error) {
851 red_destroy(rqp->rq_red);
852 FREE(rqp->rq_q, M_DEVBUF);
853 FREE(rqp, M_DEVBUF);
854 break;
855 }
856
857 /* add this state to the red list */
858 rqp->rq_next = red_list;
859 red_list = rqp;
860 break;
861
862 case RED_IF_DETACH:
863 ifacep = (struct red_interface *)addr;
864 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
865 error = EBADF;
866 break;
867 }
868 error = red_detach(rqp);
869 break;
870
871 case RED_GETSTATS:
872 do {
873 struct red_stats *q_stats;
874 red_t *rp;
875
876 q_stats = (struct red_stats *)addr;
877 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
878 ALTQT_RED)) == NULL) {
879 error = EBADF;
880 break;
881 }
882
883 q_stats->q_len = qlen(rqp->rq_q);
884 q_stats->q_limit = qlimit(rqp->rq_q);
885
886 rp = rqp->rq_red;
887 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
888 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
889 q_stats->drop_cnt = rp->red_stats.drop_cnt;
890 q_stats->drop_forced = rp->red_stats.drop_forced;
891 q_stats->drop_unforced = rp->red_stats.drop_unforced;
892 q_stats->marked_packets = rp->red_stats.marked_packets;
893
894 q_stats->weight = rp->red_weight;
895 q_stats->inv_pmax = rp->red_inv_pmax;
896 q_stats->th_min = rp->red_thmin;
897 q_stats->th_max = rp->red_thmax;
898
899 #ifdef ALTQ_FLOWVALVE
900 if (rp->red_flowvalve != NULL) {
901 struct flowvalve *fv = rp->red_flowvalve;
902 q_stats->fv_flows = fv->fv_flows;
903 q_stats->fv_pass = fv->fv_stats.pass;
904 q_stats->fv_predrop = fv->fv_stats.predrop;
905 q_stats->fv_alloc = fv->fv_stats.alloc;
906 q_stats->fv_escape = fv->fv_stats.escape;
907 } else {
908 #endif /* ALTQ_FLOWVALVE */
909 q_stats->fv_flows = 0;
910 q_stats->fv_pass = 0;
911 q_stats->fv_predrop = 0;
912 q_stats->fv_alloc = 0;
913 q_stats->fv_escape = 0;
914 #ifdef ALTQ_FLOWVALVE
915 }
916 #endif /* ALTQ_FLOWVALVE */
917 } while (/*CONSTCOND*/ 0);
918 break;
919
920 case RED_CONFIG:
921 do {
922 struct red_conf *fc;
923 red_t *new;
924 int s, limit;
925
926 fc = (struct red_conf *)addr;
927 if ((rqp = altq_lookup(fc->iface.red_ifname,
928 ALTQT_RED)) == NULL) {
929 error = EBADF;
930 break;
931 }
932 new = red_alloc(fc->red_weight,
933 fc->red_inv_pmax,
934 fc->red_thmin,
935 fc->red_thmax,
936 fc->red_flags,
937 fc->red_pkttime);
938 if (new == NULL) {
939 error = ENOMEM;
940 break;
941 }
942
943 s = splnet();
944 red_purgeq(rqp);
945 limit = fc->red_limit;
946 if (limit < fc->red_thmax)
947 limit = fc->red_thmax;
948 qlimit(rqp->rq_q) = limit;
949 fc->red_limit = limit; /* write back the new value */
950
951 red_destroy(rqp->rq_red);
952 rqp->rq_red = new;
953
954 splx(s);
955
956 /* write back new values */
957 fc->red_limit = limit;
958 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
959 fc->red_thmin = rqp->rq_red->red_thmin;
960 fc->red_thmax = rqp->rq_red->red_thmax;
961
962 } while (/*CONSTCOND*/ 0);
963 break;
964
965 case RED_SETDEFAULTS:
966 do {
967 struct redparams *rp;
968
969 rp = (struct redparams *)addr;
970
971 default_th_min = rp->th_min;
972 default_th_max = rp->th_max;
973 default_inv_pmax = rp->inv_pmax;
974 } while (/*CONSTCOND*/ 0);
975 break;
976
977 default:
978 error = EINVAL;
979 break;
980 }
981 return error;
982 }
983
984 static int
985 red_detach(rqp)
986 red_queue_t *rqp;
987 {
988 red_queue_t *tmp;
989 int error = 0;
990
991 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
992 altq_disable(rqp->rq_ifq);
993
994 if ((error = altq_detach(rqp->rq_ifq)))
995 return (error);
996
997 if (red_list == rqp)
998 red_list = rqp->rq_next;
999 else {
1000 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1001 if (tmp->rq_next == rqp) {
1002 tmp->rq_next = rqp->rq_next;
1003 break;
1004 }
1005 if (tmp == NULL)
1006 printf("red_detach: no state found in red_list!\n");
1007 }
1008
1009 red_destroy(rqp->rq_red);
1010 FREE(rqp->rq_q, M_DEVBUF);
1011 FREE(rqp, M_DEVBUF);
1012 return (error);
1013 }
1014
1015 /*
1016 * enqueue routine:
1017 *
1018 * returns: 0 when successfully queued.
1019 * ENOBUFS when drop occurs.
1020 */
1021 static int
1022 red_enqueue(ifq, m, pktattr)
1023 struct ifaltq *ifq;
1024 struct mbuf *m;
1025 struct altq_pktattr *pktattr;
1026 {
1027 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1028
1029 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1030 return ENOBUFS;
1031 ifq->ifq_len++;
1032 return 0;
1033 }
1034
1035 /*
1036 * dequeue routine:
1037 * must be called in splnet.
1038 *
1039 * returns: mbuf dequeued.
1040 * NULL when no packet is available in the queue.
1041 */
1042
1043 static struct mbuf *
1044 red_dequeue(ifq, op)
1045 struct ifaltq *ifq;
1046 int op;
1047 {
1048 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1049 struct mbuf *m;
1050
1051 if (op == ALTDQ_POLL)
1052 return qhead(rqp->rq_q);
1053
1054 /* op == ALTDQ_REMOVE */
1055 m = red_getq(rqp->rq_red, rqp->rq_q);
1056 if (m != NULL)
1057 ifq->ifq_len--;
1058 return (m);
1059 }
1060
1061 static int
1062 red_request(ifq, req, arg)
1063 struct ifaltq *ifq;
1064 int req;
1065 void *arg;
1066 {
1067 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1068
1069 switch (req) {
1070 case ALTRQ_PURGE:
1071 red_purgeq(rqp);
1072 break;
1073 }
1074 return (0);
1075 }
1076
1077 static void
1078 red_purgeq(rqp)
1079 red_queue_t *rqp;
1080 {
1081 _flushq(rqp->rq_q);
1082 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1083 rqp->rq_ifq->ifq_len = 0;
1084 }
1085
1086 #ifdef ALTQ_FLOWVALVE
1087
1088 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1089 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1090 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1091 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1092 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1093 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1094
1095 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1096 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1097
1098 #define FV_N 10 /* update fve_f every FV_N packets */
1099
1100 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1101 #define FV_TTHRESH 3 /* time threshold to delete fve */
1102 #define FV_ALPHA 5 /* extra packet count */
1103
1104 #define FV_STATS
1105
1106 #if (__FreeBSD_version > 300000)
1107 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1108 #else
1109 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1110 #endif
1111
1112 /*
1113 * Brtt table: 127 entry table to convert drop rate (p) to
1114 * the corresponding bandwidth fraction (f)
1115 * the following equation is implemented to use scaled values,
1116 * fve_p and fve_f, in the fixed point format.
1117 *
1118 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1119 * f = Brtt(p) / (max_th + alpha)
1120 */
1121 #define BRTT_SIZE 128
1122 #define BRTT_SHIFT 12
1123 #define BRTT_MASK 0x0007f000
1124 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1125
1126 const int brtt_tab[BRTT_SIZE] = {
1127 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1128 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1129 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1130 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1131 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1132 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1133 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1134 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1135 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1136 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1137 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1138 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1139 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1140 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1141 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1142 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1143 };
1144
1145 static inline struct fve *
1146 flowlist_lookup(fv, pktattr, now)
1147 struct flowvalve *fv;
1148 struct altq_pktattr *pktattr;
1149 struct timeval *now;
1150 {
1151 struct fve *fve;
1152 int flows;
1153 struct ip *ip;
1154 #ifdef INET6
1155 struct ip6_hdr *ip6;
1156 #endif
1157 struct timeval tthresh;
1158
1159 if (pktattr == NULL)
1160 return (NULL);
1161
1162 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1163 flows = 0;
1164 /*
1165 * search the flow list
1166 */
1167 switch (pktattr->pattr_af) {
1168 case AF_INET:
1169 ip = (struct ip *)pktattr->pattr_hdr;
1170 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1171 if (fve->fve_lastdrop.tv_sec == 0)
1172 break;
1173 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1174 fve->fve_lastdrop.tv_sec = 0;
1175 break;
1176 }
1177 if (fve->fve_flow.flow_af == AF_INET &&
1178 fve->fve_flow.flow_ip.ip_src.s_addr ==
1179 ip->ip_src.s_addr &&
1180 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1181 ip->ip_dst.s_addr)
1182 return (fve);
1183 flows++;
1184 }
1185 break;
1186 #ifdef INET6
1187 case AF_INET6:
1188 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1189 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1190 if (fve->fve_lastdrop.tv_sec == 0)
1191 break;
1192 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1193 fve->fve_lastdrop.tv_sec = 0;
1194 break;
1195 }
1196 if (fve->fve_flow.flow_af == AF_INET6 &&
1197 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1198 &ip6->ip6_src) &&
1199 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1200 &ip6->ip6_dst))
1201 return (fve);
1202 flows++;
1203 }
1204 break;
1205 #endif /* INET6 */
1206
1207 default:
1208 /* unknown protocol. no drop. */
1209 return (NULL);
1210 }
1211 fv->fv_flows = flows; /* save the number of active fve's */
1212 return (NULL);
1213 }
1214
1215 static inline struct fve *
1216 flowlist_reclaim(fv, pktattr)
1217 struct flowvalve *fv;
1218 struct altq_pktattr *pktattr;
1219 {
1220 struct fve *fve;
1221 struct ip *ip;
1222 #ifdef INET6
1223 struct ip6_hdr *ip6;
1224 #endif
1225
1226 /*
1227 * get an entry from the tail of the LRU list.
1228 */
1229 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1230
1231 switch (pktattr->pattr_af) {
1232 case AF_INET:
1233 ip = (struct ip *)pktattr->pattr_hdr;
1234 fve->fve_flow.flow_af = AF_INET;
1235 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1236 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1237 break;
1238 #ifdef INET6
1239 case AF_INET6:
1240 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1241 fve->fve_flow.flow_af = AF_INET6;
1242 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1243 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1244 break;
1245 #endif
1246 }
1247
1248 fve->fve_state = Green;
1249 fve->fve_p = 0.0;
1250 fve->fve_f = 0.0;
1251 fve->fve_ifseq = fv->fv_ifseq - 1;
1252 fve->fve_count = 0;
1253
1254 fv->fv_flows++;
1255 #ifdef FV_STATS
1256 fv->fv_stats.alloc++;
1257 #endif
1258 return (fve);
1259 }
1260
1261 static inline void
1262 flowlist_move_to_head(fv, fve)
1263 struct flowvalve *fv;
1264 struct fve *fve;
1265 {
1266 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1267 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1268 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1269 }
1270 }
1271
1272 /*
1273 * allocate flowvalve structure
1274 */
1275 static struct flowvalve *
1276 fv_alloc(rp)
1277 struct red *rp;
1278 {
1279 struct flowvalve *fv;
1280 struct fve *fve;
1281 int i, num;
1282
1283 num = FV_FLOWLISTSIZE;
1284 MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
1285 M_DEVBUF, M_WAITOK);
1286 if (fv == NULL)
1287 return (NULL);
1288 (void)memset(fv, 0, sizeof(struct flowvalve));
1289
1290 MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
1291 M_DEVBUF, M_WAITOK);
1292 if (fv->fv_fves == NULL) {
1293 FREE(fv, M_DEVBUF);
1294 return (NULL);
1295 }
1296 (void)memset(fv->fv_fves, 0, sizeof(struct fve) * num);
1297
1298 fv->fv_flows = 0;
1299 TAILQ_INIT(&fv->fv_flowlist);
1300 for (i = 0; i < num; i++) {
1301 fve = &fv->fv_fves[i];
1302 fve->fve_lastdrop.tv_sec = 0;
1303 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1304 }
1305
1306 /* initialize drop rate threshold in scaled fixed-point */
1307 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1308
1309 /* initialize drop rate to fraction table */
1310 MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
1311 M_DEVBUF, M_WAITOK);
1312 if (fv->fv_p2ftab == NULL) {
1313 FREE(fv->fv_fves, M_DEVBUF);
1314 FREE(fv, M_DEVBUF);
1315 return (NULL);
1316 }
1317 /*
1318 * create the p2f table.
1319 * (shift is used to keep the precision)
1320 */
1321 for (i = 1; i < BRTT_SIZE; i++) {
1322 int f;
1323
1324 f = brtt_tab[i] << 8;
1325 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1326 }
1327
1328 return (fv);
1329 }
1330
1331 static void fv_destroy(fv)
1332 struct flowvalve *fv;
1333 {
1334 FREE(fv->fv_p2ftab, M_DEVBUF);
1335 FREE(fv->fv_fves, M_DEVBUF);
1336 FREE(fv, M_DEVBUF);
1337 }
1338
1339 static inline int
1340 fv_p2f(fv, p)
1341 struct flowvalve *fv;
1342 int p;
1343 {
1344 int val, f;
1345
1346 if (p >= BRTT_PMAX)
1347 f = fv->fv_p2ftab[BRTT_SIZE-1];
1348 else if ((val = (p & BRTT_MASK)))
1349 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1350 else
1351 f = fv->fv_p2ftab[1];
1352 return (f);
1353 }
1354
1355 /*
1356 * check if an arriving packet should be pre-dropped.
1357 * called from red_addq() when a packet arrives.
1358 * returns 1 when the packet should be pre-dropped.
1359 * should be called in splnet.
1360 */
1361 static int
1362 fv_checkflow(fv, pktattr, fcache)
1363 struct flowvalve *fv;
1364 struct altq_pktattr *pktattr;
1365 struct fve **fcache;
1366 {
1367 struct fve *fve;
1368 struct timeval now;
1369
1370 fv->fv_ifseq++;
1371 FV_TIMESTAMP(&now);
1372
1373 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1374 /* no matching entry in the flowlist */
1375 return (0);
1376
1377 *fcache = fve;
1378
1379 /* update fraction f for every FV_N packets */
1380 if (++fve->fve_count == FV_N) {
1381 /*
1382 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1383 */
1384 fve->fve_f =
1385 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1386 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1387 fve->fve_ifseq = fv->fv_ifseq;
1388 fve->fve_count = 0;
1389 }
1390
1391 /*
1392 * overpumping test
1393 */
1394 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1395 int fthresh;
1396
1397 /* calculate a threshold */
1398 fthresh = fv_p2f(fv, fve->fve_p);
1399 if (fve->fve_f > fthresh)
1400 fve->fve_state = Red;
1401 }
1402
1403 if (fve->fve_state == Red) {
1404 /*
1405 * backoff test
1406 */
1407 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1408 /* no drop for at least FV_BACKOFFTHRESH sec */
1409 fve->fve_p = 0;
1410 fve->fve_state = Green;
1411 #ifdef FV_STATS
1412 fv->fv_stats.escape++;
1413 #endif
1414 } else {
1415 /* block this flow */
1416 flowlist_move_to_head(fv, fve);
1417 fve->fve_lastdrop = now;
1418 #ifdef FV_STATS
1419 fv->fv_stats.predrop++;
1420 #endif
1421 return (1);
1422 }
1423 }
1424
1425 /*
1426 * p = (1 - Wp) * p
1427 */
1428 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1429 if (fve->fve_p < 0)
1430 fve->fve_p = 0;
1431 #ifdef FV_STATS
1432 fv->fv_stats.pass++;
1433 #endif
1434 return (0);
1435 }
1436
1437 /*
1438 * called from red_addq when a packet is dropped by red.
1439 * should be called in splnet.
1440 */
1441 static void fv_dropbyred(fv, pktattr, fcache)
1442 struct flowvalve *fv;
1443 struct altq_pktattr *pktattr;
1444 struct fve *fcache;
1445 {
1446 struct fve *fve;
1447 struct timeval now;
1448
1449 if (pktattr == NULL)
1450 return;
1451 FV_TIMESTAMP(&now);
1452
1453 if (fcache != NULL)
1454 /* the fve of this packet is already cached */
1455 fve = fcache;
1456 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1457 fve = flowlist_reclaim(fv, pktattr);
1458
1459 flowlist_move_to_head(fv, fve);
1460
1461 /*
1462 * update p: the following line cancels the update
1463 * in fv_checkflow() and calculate
1464 * p = Wp + (1 - Wp) * p
1465 */
1466 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1467
1468 fve->fve_lastdrop = now;
1469 }
1470
1471 #endif /* ALTQ_FLOWVALVE */
1472
1473 #ifdef KLD_MODULE
1474
1475 static struct altqsw red_sw =
1476 {"red", redopen, redclose, redioctl};
1477
1478 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1479 MODULE_VERSION(altq_red, 1);
1480
1481 #endif /* KLD_MODULE */
1482 #endif /* ALTQ3_COMPAT */
1483
1484 #endif /* ALTQ_RED */
1485