altq_red.c revision 1.28.30.1 1 /* $NetBSD: altq_red.c,v 1.28.30.1 2012/04/17 00:05:51 yamt Exp $ */
2 /* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.28.30.1 2012/04/17 00:05:51 yamt Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_altq.h"
68 #include "opt_inet.h"
69 #include "pf.h"
70 #endif
71
72 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
73
74 #include <sys/param.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/systm.h>
79 #include <sys/errno.h>
80 #include <sys/kauth.h>
81 #if 1 /* ALTQ3_COMPAT */
82 #include <sys/sockio.h>
83 #include <sys/proc.h>
84 #include <sys/kernel.h>
85 #ifdef ALTQ_FLOWVALVE
86 #include <sys/queue.h>
87 #include <sys/time.h>
88 #endif
89 #endif /* ALTQ3_COMPAT */
90 #include <sys/cprng.h>
91
92 #include <net/if.h>
93
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #ifdef INET6
98 #include <netinet/ip6.h>
99 #endif
100
101 #if NPF > 0
102 #include <net/pfvar.h>
103 #endif
104 #include <altq/altq.h>
105 #include <altq/altq_red.h>
106 #ifdef ALTQ3_COMPAT
107 #include <altq/altq_conf.h>
108 #ifdef ALTQ_FLOWVALVE
109 #include <altq/altq_flowvalve.h>
110 #endif
111 #endif
112
113 /*
114 * ALTQ/RED (Random Early Detection) implementation using 32-bit
115 * fixed-point calculation.
116 *
117 * written by kjc using the ns code as a reference.
118 * you can learn more about red and ns from Sally's home page at
119 * http://www-nrg.ee.lbl.gov/floyd/
120 *
121 * most of the red parameter values are fixed in this implementation
122 * to prevent fixed-point overflow/underflow.
123 * if you change the parameters, watch out for overflow/underflow!
124 *
125 * the parameters used are recommended values by Sally.
126 * the corresponding ns config looks:
127 * q_weight=0.00195
128 * minthresh=5 maxthresh=15 queue-size=60
129 * linterm=30
130 * dropmech=drop-tail
131 * bytes=false (can't be handled by 32-bit fixed-point)
132 * doubleq=false dqthresh=false
133 * wait=true
134 */
135 /*
136 * alternative red parameters for a slow link.
137 *
138 * assume the queue length becomes from zero to L and keeps L, it takes
139 * N packets for q_avg to reach 63% of L.
140 * when q_weight is 0.002, N is about 500 packets.
141 * for a slow link like dial-up, 500 packets takes more than 1 minute!
142 * when q_weight is 0.008, N is about 127 packets.
143 * when q_weight is 0.016, N is about 63 packets.
144 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
145 * are allowed for 0.016.
146 * see Sally's paper for more details.
147 */
148 /* normal red parameters */
149 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
150 /* q_weight = 0.00195 */
151
152 /* red parameters for a slow link */
153 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
154 /* q_weight = 0.0078125 */
155
156 /* red parameters for a very slow link (e.g., dialup) */
157 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
158 /* q_weight = 0.015625 */
159
160 /* fixed-point uses 12-bit decimal places */
161 #define FP_SHIFT 12 /* fixed-point shift */
162
163 /* red parameters for drop probability */
164 #define INV_P_MAX 10 /* inverse of max drop probability */
165 #define TH_MIN 5 /* min threshold */
166 #define TH_MAX 15 /* max threshold */
167
168 #define RED_LIMIT 60 /* default max queue lenght */
169 #define RED_STATS /* collect statistics */
170
171 /*
172 * our default policy for forced-drop is drop-tail.
173 * (in altq-1.1.2 or earlier, the default was random-drop.
174 * but it makes more sense to punish the cause of the surge.)
175 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
176 */
177
178 #ifdef ALTQ3_COMPAT
179 #ifdef ALTQ_FLOWVALVE
180 /*
181 * flow-valve is an extention to protect red from unresponsive flows
182 * and to promote end-to-end congestion control.
183 * flow-valve observes the average drop rates of the flows that have
184 * experienced packet drops in the recent past.
185 * when the average drop rate exceeds the threshold, the flow is
186 * blocked by the flow-valve. the trapped flow should back off
187 * exponentially to escape from the flow-valve.
188 */
189 #ifdef RED_RANDOM_DROP
190 #error "random-drop can't be used with flow-valve!"
191 #endif
192 #endif /* ALTQ_FLOWVALVE */
193
194 /* red_list keeps all red_queue_t's allocated. */
195 static red_queue_t *red_list = NULL;
196
197 #endif /* ALTQ3_COMPAT */
198
199 /* default red parameter values */
200 static int default_th_min = TH_MIN;
201 static int default_th_max = TH_MAX;
202 static int default_inv_pmax = INV_P_MAX;
203
204 #ifdef ALTQ3_COMPAT
205 /* internal function prototypes */
206 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
207 static struct mbuf *red_dequeue(struct ifaltq *, int);
208 static int red_request(struct ifaltq *, int, void *);
209 static void red_purgeq(red_queue_t *);
210 static int red_detach(red_queue_t *);
211 #ifdef ALTQ_FLOWVALVE
212 static inline struct fve *flowlist_lookup(struct flowvalve *,
213 struct altq_pktattr *, struct timeval *);
214 static inline struct fve *flowlist_reclaim(struct flowvalve *,
215 struct altq_pktattr *);
216 static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
217 static inline int fv_p2f(struct flowvalve *, int);
218 static struct flowvalve *fv_alloc(struct red *);
219 static void fv_destroy(struct flowvalve *);
220 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
221 struct fve **);
222 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
223 struct fve *);
224 #endif
225 #endif /* ALTQ3_COMPAT */
226
227 /*
228 * red support routines
229 */
230 red_t *
231 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
232 int pkttime)
233 {
234 red_t *rp;
235 int w, i;
236 int npkts_per_sec;
237
238 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
239 if (rp == NULL)
240 return (NULL);
241
242 rp->red_avg = 0;
243 rp->red_idle = 1;
244
245 if (weight == 0)
246 rp->red_weight = W_WEIGHT;
247 else
248 rp->red_weight = weight;
249 if (inv_pmax == 0)
250 rp->red_inv_pmax = default_inv_pmax;
251 else
252 rp->red_inv_pmax = inv_pmax;
253 if (th_min == 0)
254 rp->red_thmin = default_th_min;
255 else
256 rp->red_thmin = th_min;
257 if (th_max == 0)
258 rp->red_thmax = default_th_max;
259 else
260 rp->red_thmax = th_max;
261
262 rp->red_flags = flags;
263
264 if (pkttime == 0)
265 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
266 rp->red_pkttime = 800;
267 else
268 rp->red_pkttime = pkttime;
269
270 if (weight == 0) {
271 /* when the link is very slow, adjust red parameters */
272 npkts_per_sec = 1000000 / rp->red_pkttime;
273 if (npkts_per_sec < 50) {
274 /* up to about 400Kbps */
275 rp->red_weight = W_WEIGHT_2;
276 } else if (npkts_per_sec < 300) {
277 /* up to about 2.4Mbps */
278 rp->red_weight = W_WEIGHT_1;
279 }
280 }
281
282 /* calculate wshift. weight must be power of 2 */
283 w = rp->red_weight;
284 for (i = 0; w > 1; i++)
285 w = w >> 1;
286 rp->red_wshift = i;
287 w = 1 << rp->red_wshift;
288 if (w != rp->red_weight) {
289 printf("invalid weight value %d for red! use %d\n",
290 rp->red_weight, w);
291 rp->red_weight = w;
292 }
293
294 /*
295 * thmin_s and thmax_s are scaled versions of th_min and th_max
296 * to be compared with avg.
297 */
298 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
299 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
300
301 /*
302 * precompute probability denominator
303 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
304 */
305 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
306 * rp->red_inv_pmax) << FP_SHIFT;
307
308 /* allocate weight table */
309 rp->red_wtab = wtab_alloc(rp->red_weight);
310
311 microtime(&rp->red_last);
312 #ifdef ALTQ3_COMPAT
313 #ifdef ALTQ_FLOWVALVE
314 if (flags & REDF_FLOWVALVE)
315 rp->red_flowvalve = fv_alloc(rp);
316 /* if fv_alloc failes, flowvalve is just disabled */
317 #endif
318 #endif /* ALTQ3_COMPAT */
319 return (rp);
320 }
321
322 void
323 red_destroy(red_t *rp)
324 {
325 #ifdef ALTQ3_COMPAT
326 #ifdef ALTQ_FLOWVALVE
327 if (rp->red_flowvalve != NULL)
328 fv_destroy(rp->red_flowvalve);
329 #endif
330 #endif /* ALTQ3_COMPAT */
331 wtab_destroy(rp->red_wtab);
332 free(rp, M_DEVBUF);
333 }
334
335 void
336 red_getstats(red_t *rp, struct redstats *sp)
337 {
338 sp->q_avg = rp->red_avg >> rp->red_wshift;
339 sp->xmit_cnt = rp->red_stats.xmit_cnt;
340 sp->drop_cnt = rp->red_stats.drop_cnt;
341 sp->drop_forced = rp->red_stats.drop_forced;
342 sp->drop_unforced = rp->red_stats.drop_unforced;
343 sp->marked_packets = rp->red_stats.marked_packets;
344 }
345
346 int
347 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
348 struct altq_pktattr *pktattr)
349 {
350 int avg, droptype;
351 int n;
352 #ifdef ALTQ3_COMPAT
353 #ifdef ALTQ_FLOWVALVE
354 struct fve *fve = NULL;
355
356 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
357 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
358 m_freem(m);
359 return (-1);
360 }
361 #endif
362 #endif /* ALTQ3_COMPAT */
363
364 avg = rp->red_avg;
365
366 /*
367 * if we were idle, we pretend that n packets arrived during
368 * the idle period.
369 */
370 if (rp->red_idle) {
371 struct timeval now;
372 int t;
373
374 rp->red_idle = 0;
375 microtime(&now);
376 t = (now.tv_sec - rp->red_last.tv_sec);
377 if (t > 60) {
378 /*
379 * being idle for more than 1 minute, set avg to zero.
380 * this prevents t from overflow.
381 */
382 avg = 0;
383 } else {
384 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
385 n = t / rp->red_pkttime - 1;
386
387 /* the following line does (avg = (1 - Wq)^n * avg) */
388 if (n > 0)
389 avg = (avg >> FP_SHIFT) *
390 pow_w(rp->red_wtab, n);
391 }
392 }
393
394 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
395 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
396 rp->red_avg = avg; /* save the new value */
397
398 /*
399 * red_count keeps a tally of arriving traffic that has not
400 * been dropped.
401 */
402 rp->red_count++;
403
404 /* see if we drop early */
405 droptype = DTYPE_NODROP;
406 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
407 if (avg >= rp->red_thmax_s) {
408 /* avg >= th_max: forced drop */
409 droptype = DTYPE_FORCED;
410 } else if (rp->red_old == 0) {
411 /* first exceeds th_min */
412 rp->red_count = 1;
413 rp->red_old = 1;
414 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
415 rp->red_probd, rp->red_count)) {
416 /* mark or drop by red */
417 if ((rp->red_flags & REDF_ECN) &&
418 mark_ecn(m, pktattr, rp->red_flags)) {
419 /* successfully marked. do not drop. */
420 rp->red_count = 0;
421 #ifdef RED_STATS
422 rp->red_stats.marked_packets++;
423 #endif
424 } else {
425 /* unforced drop by red */
426 droptype = DTYPE_EARLY;
427 }
428 }
429 } else {
430 /* avg < th_min */
431 rp->red_old = 0;
432 }
433
434 /*
435 * if the queue length hits the hard limit, it's a forced drop.
436 */
437 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
438 droptype = DTYPE_FORCED;
439
440 #ifdef RED_RANDOM_DROP
441 /* if successful or forced drop, enqueue this packet. */
442 if (droptype != DTYPE_EARLY)
443 _addq(q, m);
444 #else
445 /* if successful, enqueue this packet. */
446 if (droptype == DTYPE_NODROP)
447 _addq(q, m);
448 #endif
449 if (droptype != DTYPE_NODROP) {
450 if (droptype == DTYPE_EARLY) {
451 /* drop the incoming packet */
452 #ifdef RED_STATS
453 rp->red_stats.drop_unforced++;
454 #endif
455 } else {
456 /* forced drop, select a victim packet in the queue. */
457 #ifdef RED_RANDOM_DROP
458 m = _getq_random(q);
459 #endif
460 #ifdef RED_STATS
461 rp->red_stats.drop_forced++;
462 #endif
463 }
464 #ifdef RED_STATS
465 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
466 #endif
467 rp->red_count = 0;
468 #ifdef ALTQ3_COMPAT
469 #ifdef ALTQ_FLOWVALVE
470 if (rp->red_flowvalve != NULL)
471 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
472 #endif
473 #endif /* ALTQ3_COMPAT */
474 m_freem(m);
475 return (-1);
476 }
477 /* successfully queued */
478 #ifdef RED_STATS
479 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
480 #endif
481 return (0);
482 }
483
484 /*
485 * early-drop probability is calculated as follows:
486 * prob = p_max * (avg - th_min) / (th_max - th_min)
487 * prob_a = prob / (2 - count*prob)
488 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
489 * here prob_a increases as successive undrop count increases.
490 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
491 * becomes 1 when (count >= (2 / prob))).
492 */
493 int
494 drop_early(int fp_len, int fp_probd, int count)
495 {
496 int d; /* denominator of drop-probability */
497
498 d = fp_probd - count * fp_len;
499 if (d <= 0)
500 /* count exceeds the hard limit: drop or mark */
501 return (1);
502
503 /*
504 * now the range of d is [1..600] in fixed-point. (when
505 * th_max-th_min=10 and p_max=1/30)
506 * drop probability = (avg - TH_MIN) / d
507 */
508
509 if ((cprng_fast32() % d) < fp_len) {
510 /* drop or mark */
511 return (1);
512 }
513 /* no drop/mark */
514 return (0);
515 }
516
517 /*
518 * try to mark CE bit to the packet.
519 * returns 1 if successfully marked, 0 otherwise.
520 */
521 int
522 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
523 {
524 struct mbuf *m0;
525 struct m_tag *t;
526 struct altq_tag *at;
527 void *hdr;
528 int af;
529
530 t = m_tag_find(m, PACKET_TAG_ALTQ_QID, NULL);
531 if (t != NULL) {
532 at = (struct altq_tag *)(t + 1);
533 if (at == NULL)
534 return (0);
535 af = at->af;
536 hdr = at->hdr;
537 #ifdef ALTQ3_COMPAT
538 } else if (pktattr != NULL) {
539 af = pktattr->pattr_af;
540 hdr = pktattr->pattr_hdr;
541 #endif /* ALTQ3_COMPAT */
542 } else
543 return (0);
544
545 if (af != AF_INET && af != AF_INET6)
546 return (0);
547
548 /* verify that pattr_hdr is within the mbuf data */
549 for (m0 = m; m0 != NULL; m0 = m0->m_next)
550 if (((char *)hdr >= m0->m_data) &&
551 ((char *)hdr < m0->m_data + m0->m_len))
552 break;
553 if (m0 == NULL) {
554 /* ick, tag info is stale */
555 return (0);
556 }
557
558 switch (af) {
559 case AF_INET:
560 if (flags & REDF_ECN4) {
561 struct ip *ip = hdr;
562 u_int8_t otos;
563 int sum;
564
565 if (ip->ip_v != 4)
566 return (0); /* version mismatch! */
567
568 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
569 return (0); /* not-ECT */
570 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
571 return (1); /* already marked */
572
573 /*
574 * ecn-capable but not marked,
575 * mark CE and update checksum
576 */
577 otos = ip->ip_tos;
578 ip->ip_tos |= IPTOS_ECN_CE;
579 /*
580 * update checksum (from RFC1624)
581 * HC' = ~(~HC + ~m + m')
582 */
583 sum = ~ntohs(ip->ip_sum) & 0xffff;
584 sum += (~otos & 0xffff) + ip->ip_tos;
585 sum = (sum >> 16) + (sum & 0xffff);
586 sum += (sum >> 16); /* add carry */
587 ip->ip_sum = htons(~sum & 0xffff);
588 return (1);
589 }
590 break;
591 #ifdef INET6
592 case AF_INET6:
593 if (flags & REDF_ECN6) {
594 struct ip6_hdr *ip6 = hdr;
595 u_int32_t flowlabel;
596
597 flowlabel = ntohl(ip6->ip6_flow);
598 if ((flowlabel >> 28) != 6)
599 return (0); /* version mismatch! */
600 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
601 (IPTOS_ECN_NOTECT << 20))
602 return (0); /* not-ECT */
603 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
604 (IPTOS_ECN_CE << 20))
605 return (1); /* already marked */
606 /*
607 * ecn-capable but not marked, mark CE
608 */
609 flowlabel |= (IPTOS_ECN_CE << 20);
610 ip6->ip6_flow = htonl(flowlabel);
611 return (1);
612 }
613 break;
614 #endif /* INET6 */
615 }
616
617 /* not marked */
618 return (0);
619 }
620
621 struct mbuf *
622 red_getq(red_t *rp, class_queue_t *q)
623 {
624 struct mbuf *m;
625
626 if ((m = _getq(q)) == NULL) {
627 if (rp->red_idle == 0) {
628 rp->red_idle = 1;
629 microtime(&rp->red_last);
630 }
631 return NULL;
632 }
633
634 rp->red_idle = 0;
635 return (m);
636 }
637
638 /*
639 * helper routine to calibrate avg during idle.
640 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
641 * here Wq = 1/weight and the code assumes Wq is close to zero.
642 *
643 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
644 */
645 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
646
647 struct wtab *
648 wtab_alloc(int weight)
649 {
650 struct wtab *w;
651 int i;
652
653 for (w = wtab_list; w != NULL; w = w->w_next)
654 if (w->w_weight == weight) {
655 w->w_refcount++;
656 return (w);
657 }
658
659 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
660 if (w == NULL)
661 panic("wtab_alloc: malloc failed!");
662 w->w_weight = weight;
663 w->w_refcount = 1;
664 w->w_next = wtab_list;
665 wtab_list = w;
666
667 /* initialize the weight table */
668 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
669 for (i = 1; i < 32; i++) {
670 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
671 if (w->w_tab[i] == 0 && w->w_param_max == 0)
672 w->w_param_max = 1 << i;
673 }
674
675 return (w);
676 }
677
678 int
679 wtab_destroy(struct wtab *w)
680 {
681 struct wtab *prev;
682
683 if (--w->w_refcount > 0)
684 return (0);
685
686 if (wtab_list == w)
687 wtab_list = w->w_next;
688 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
689 if (prev->w_next == w) {
690 prev->w_next = w->w_next;
691 break;
692 }
693
694 free(w, M_DEVBUF);
695 return (0);
696 }
697
698 int32_t
699 pow_w(struct wtab *w, int n)
700 {
701 int i, bit;
702 int32_t val;
703
704 if (n >= w->w_param_max)
705 return (0);
706
707 val = 1 << FP_SHIFT;
708 if (n <= 0)
709 return (val);
710
711 bit = 1;
712 i = 0;
713 while (n) {
714 if (n & bit) {
715 val = (val * w->w_tab[i]) >> FP_SHIFT;
716 n &= ~bit;
717 }
718 i++;
719 bit <<= 1;
720 }
721 return (val);
722 }
723
724 #ifdef ALTQ3_COMPAT
725 /*
726 * red device interface
727 */
728 altqdev_decl(red);
729
730 int
731 redopen(dev_t dev, int flag, int fmt,
732 struct lwp *l)
733 {
734 /* everything will be done when the queueing scheme is attached. */
735 return 0;
736 }
737
738 int
739 redclose(dev_t dev, int flag, int fmt,
740 struct lwp *l)
741 {
742 red_queue_t *rqp;
743 int err, error = 0;
744
745 while ((rqp = red_list) != NULL) {
746 /* destroy all */
747 err = red_detach(rqp);
748 if (err != 0 && error == 0)
749 error = err;
750 }
751
752 return error;
753 }
754
755 int
756 redioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
757 struct lwp *l)
758 {
759 red_queue_t *rqp;
760 struct red_interface *ifacep;
761 struct ifnet *ifp;
762 struct proc *p = l->l_proc;
763 int error = 0;
764
765 /* check super-user privilege */
766 switch (cmd) {
767 case RED_GETSTATS:
768 break;
769 default:
770 #if (__FreeBSD_version > 400000)
771 if ((error = suser(p)) != 0)
772 #else
773 if ((error = kauth_authorize_network(p->p_cred,
774 KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RED, NULL,
775 NULL, NULL)) != 0)
776 #endif
777 return (error);
778 break;
779 }
780
781 switch (cmd) {
782
783 case RED_ENABLE:
784 ifacep = (struct red_interface *)addr;
785 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
786 error = EBADF;
787 break;
788 }
789 error = altq_enable(rqp->rq_ifq);
790 break;
791
792 case RED_DISABLE:
793 ifacep = (struct red_interface *)addr;
794 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
795 error = EBADF;
796 break;
797 }
798 error = altq_disable(rqp->rq_ifq);
799 break;
800
801 case RED_IF_ATTACH:
802 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
803 if (ifp == NULL) {
804 error = ENXIO;
805 break;
806 }
807
808 /* allocate and initialize red_queue_t */
809 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
810 if (rqp == NULL) {
811 error = ENOMEM;
812 break;
813 }
814
815 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
816 M_WAITOK|M_ZERO);
817 if (rqp->rq_q == NULL) {
818 free(rqp, M_DEVBUF);
819 error = ENOMEM;
820 break;
821 }
822
823 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
824 if (rqp->rq_red == NULL) {
825 free(rqp->rq_q, M_DEVBUF);
826 free(rqp, M_DEVBUF);
827 error = ENOMEM;
828 break;
829 }
830
831 rqp->rq_ifq = &ifp->if_snd;
832 qtail(rqp->rq_q) = NULL;
833 qlen(rqp->rq_q) = 0;
834 qlimit(rqp->rq_q) = RED_LIMIT;
835 qtype(rqp->rq_q) = Q_RED;
836
837 /*
838 * set RED to this ifnet structure.
839 */
840 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
841 red_enqueue, red_dequeue, red_request,
842 NULL, NULL);
843 if (error) {
844 red_destroy(rqp->rq_red);
845 free(rqp->rq_q, M_DEVBUF);
846 free(rqp, M_DEVBUF);
847 break;
848 }
849
850 /* add this state to the red list */
851 rqp->rq_next = red_list;
852 red_list = rqp;
853 break;
854
855 case RED_IF_DETACH:
856 ifacep = (struct red_interface *)addr;
857 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
858 error = EBADF;
859 break;
860 }
861 error = red_detach(rqp);
862 break;
863
864 case RED_GETSTATS:
865 do {
866 struct red_stats *q_stats;
867 red_t *rp;
868
869 q_stats = (struct red_stats *)addr;
870 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
871 ALTQT_RED)) == NULL) {
872 error = EBADF;
873 break;
874 }
875
876 q_stats->q_len = qlen(rqp->rq_q);
877 q_stats->q_limit = qlimit(rqp->rq_q);
878
879 rp = rqp->rq_red;
880 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
881 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
882 q_stats->drop_cnt = rp->red_stats.drop_cnt;
883 q_stats->drop_forced = rp->red_stats.drop_forced;
884 q_stats->drop_unforced = rp->red_stats.drop_unforced;
885 q_stats->marked_packets = rp->red_stats.marked_packets;
886
887 q_stats->weight = rp->red_weight;
888 q_stats->inv_pmax = rp->red_inv_pmax;
889 q_stats->th_min = rp->red_thmin;
890 q_stats->th_max = rp->red_thmax;
891
892 #ifdef ALTQ_FLOWVALVE
893 if (rp->red_flowvalve != NULL) {
894 struct flowvalve *fv = rp->red_flowvalve;
895 q_stats->fv_flows = fv->fv_flows;
896 q_stats->fv_pass = fv->fv_stats.pass;
897 q_stats->fv_predrop = fv->fv_stats.predrop;
898 q_stats->fv_alloc = fv->fv_stats.alloc;
899 q_stats->fv_escape = fv->fv_stats.escape;
900 } else {
901 #endif /* ALTQ_FLOWVALVE */
902 q_stats->fv_flows = 0;
903 q_stats->fv_pass = 0;
904 q_stats->fv_predrop = 0;
905 q_stats->fv_alloc = 0;
906 q_stats->fv_escape = 0;
907 #ifdef ALTQ_FLOWVALVE
908 }
909 #endif /* ALTQ_FLOWVALVE */
910 } while (/*CONSTCOND*/ 0);
911 break;
912
913 case RED_CONFIG:
914 do {
915 struct red_conf *fc;
916 red_t *new;
917 int s, limit;
918
919 fc = (struct red_conf *)addr;
920 if ((rqp = altq_lookup(fc->iface.red_ifname,
921 ALTQT_RED)) == NULL) {
922 error = EBADF;
923 break;
924 }
925 new = red_alloc(fc->red_weight,
926 fc->red_inv_pmax,
927 fc->red_thmin,
928 fc->red_thmax,
929 fc->red_flags,
930 fc->red_pkttime);
931 if (new == NULL) {
932 error = ENOMEM;
933 break;
934 }
935
936 s = splnet();
937 red_purgeq(rqp);
938 limit = fc->red_limit;
939 if (limit < fc->red_thmax)
940 limit = fc->red_thmax;
941 qlimit(rqp->rq_q) = limit;
942 fc->red_limit = limit; /* write back the new value */
943
944 red_destroy(rqp->rq_red);
945 rqp->rq_red = new;
946
947 splx(s);
948
949 /* write back new values */
950 fc->red_limit = limit;
951 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
952 fc->red_thmin = rqp->rq_red->red_thmin;
953 fc->red_thmax = rqp->rq_red->red_thmax;
954
955 } while (/*CONSTCOND*/ 0);
956 break;
957
958 case RED_SETDEFAULTS:
959 do {
960 struct redparams *rp;
961
962 rp = (struct redparams *)addr;
963
964 default_th_min = rp->th_min;
965 default_th_max = rp->th_max;
966 default_inv_pmax = rp->inv_pmax;
967 } while (/*CONSTCOND*/ 0);
968 break;
969
970 default:
971 error = EINVAL;
972 break;
973 }
974 return error;
975 }
976
977 static int
978 red_detach(red_queue_t *rqp)
979 {
980 red_queue_t *tmp;
981 int error = 0;
982
983 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
984 altq_disable(rqp->rq_ifq);
985
986 if ((error = altq_detach(rqp->rq_ifq)))
987 return (error);
988
989 if (red_list == rqp)
990 red_list = rqp->rq_next;
991 else {
992 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
993 if (tmp->rq_next == rqp) {
994 tmp->rq_next = rqp->rq_next;
995 break;
996 }
997 if (tmp == NULL)
998 printf("red_detach: no state found in red_list!\n");
999 }
1000
1001 red_destroy(rqp->rq_red);
1002 free(rqp->rq_q, M_DEVBUF);
1003 free(rqp, M_DEVBUF);
1004 return (error);
1005 }
1006
1007 /*
1008 * enqueue routine:
1009 *
1010 * returns: 0 when successfully queued.
1011 * ENOBUFS when drop occurs.
1012 */
1013 static int
1014 red_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
1015 {
1016 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1017
1018 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1019 return ENOBUFS;
1020 ifq->ifq_len++;
1021 return 0;
1022 }
1023
1024 /*
1025 * dequeue routine:
1026 * must be called in splnet.
1027 *
1028 * returns: mbuf dequeued.
1029 * NULL when no packet is available in the queue.
1030 */
1031
1032 static struct mbuf *
1033 red_dequeue(struct ifaltq *ifq, int op)
1034 {
1035 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1036 struct mbuf *m;
1037
1038 if (op == ALTDQ_POLL)
1039 return qhead(rqp->rq_q);
1040
1041 /* op == ALTDQ_REMOVE */
1042 m = red_getq(rqp->rq_red, rqp->rq_q);
1043 if (m != NULL)
1044 ifq->ifq_len--;
1045 return (m);
1046 }
1047
1048 static int
1049 red_request(struct ifaltq *ifq, int req, void *arg)
1050 {
1051 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1052
1053 switch (req) {
1054 case ALTRQ_PURGE:
1055 red_purgeq(rqp);
1056 break;
1057 }
1058 return (0);
1059 }
1060
1061 static void
1062 red_purgeq(red_queue_t *rqp)
1063 {
1064 _flushq(rqp->rq_q);
1065 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1066 rqp->rq_ifq->ifq_len = 0;
1067 }
1068
1069 #ifdef ALTQ_FLOWVALVE
1070
1071 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1072 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1073 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1074 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1075 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1076 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1077
1078 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1079 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1080
1081 #define FV_N 10 /* update fve_f every FV_N packets */
1082
1083 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1084 #define FV_TTHRESH 3 /* time threshold to delete fve */
1085 #define FV_ALPHA 5 /* extra packet count */
1086
1087 #define FV_STATS
1088
1089 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1090
1091 /*
1092 * Brtt table: 127 entry table to convert drop rate (p) to
1093 * the corresponding bandwidth fraction (f)
1094 * the following equation is implemented to use scaled values,
1095 * fve_p and fve_f, in the fixed point format.
1096 *
1097 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1098 * f = Brtt(p) / (max_th + alpha)
1099 */
1100 #define BRTT_SIZE 128
1101 #define BRTT_SHIFT 12
1102 #define BRTT_MASK 0x0007f000
1103 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1104
1105 const int brtt_tab[BRTT_SIZE] = {
1106 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1107 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1108 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1109 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1110 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1111 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1112 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1113 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1114 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1115 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1116 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1117 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1118 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1119 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1120 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1121 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1122 };
1123
1124 static inline struct fve *
1125 flowlist_lookup(struct flowvalve *fv, struct altq_pktattr *pktattr,
1126 struct timeval *now)
1127 {
1128 struct fve *fve;
1129 int flows;
1130 struct ip *ip;
1131 #ifdef INET6
1132 struct ip6_hdr *ip6;
1133 #endif
1134 struct timeval tthresh;
1135
1136 if (pktattr == NULL)
1137 return (NULL);
1138
1139 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1140 flows = 0;
1141 /*
1142 * search the flow list
1143 */
1144 switch (pktattr->pattr_af) {
1145 case AF_INET:
1146 ip = (struct ip *)pktattr->pattr_hdr;
1147 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1148 if (fve->fve_lastdrop.tv_sec == 0)
1149 break;
1150 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1151 fve->fve_lastdrop.tv_sec = 0;
1152 break;
1153 }
1154 if (fve->fve_flow.flow_af == AF_INET &&
1155 fve->fve_flow.flow_ip.ip_src.s_addr ==
1156 ip->ip_src.s_addr &&
1157 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1158 ip->ip_dst.s_addr)
1159 return (fve);
1160 flows++;
1161 }
1162 break;
1163 #ifdef INET6
1164 case AF_INET6:
1165 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1166 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1167 if (fve->fve_lastdrop.tv_sec == 0)
1168 break;
1169 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1170 fve->fve_lastdrop.tv_sec = 0;
1171 break;
1172 }
1173 if (fve->fve_flow.flow_af == AF_INET6 &&
1174 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1175 &ip6->ip6_src) &&
1176 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1177 &ip6->ip6_dst))
1178 return (fve);
1179 flows++;
1180 }
1181 break;
1182 #endif /* INET6 */
1183
1184 default:
1185 /* unknown protocol. no drop. */
1186 return (NULL);
1187 }
1188 fv->fv_flows = flows; /* save the number of active fve's */
1189 return (NULL);
1190 }
1191
1192 static inline struct fve *
1193 flowlist_reclaim(struct flowvalve *fv, struct altq_pktattr *pktattr)
1194 {
1195 struct fve *fve;
1196 struct ip *ip;
1197 #ifdef INET6
1198 struct ip6_hdr *ip6;
1199 #endif
1200
1201 /*
1202 * get an entry from the tail of the LRU list.
1203 */
1204 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1205
1206 switch (pktattr->pattr_af) {
1207 case AF_INET:
1208 ip = (struct ip *)pktattr->pattr_hdr;
1209 fve->fve_flow.flow_af = AF_INET;
1210 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1211 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1212 break;
1213 #ifdef INET6
1214 case AF_INET6:
1215 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1216 fve->fve_flow.flow_af = AF_INET6;
1217 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1218 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1219 break;
1220 #endif
1221 }
1222
1223 fve->fve_state = Green;
1224 fve->fve_p = 0.0;
1225 fve->fve_f = 0.0;
1226 fve->fve_ifseq = fv->fv_ifseq - 1;
1227 fve->fve_count = 0;
1228
1229 fv->fv_flows++;
1230 #ifdef FV_STATS
1231 fv->fv_stats.alloc++;
1232 #endif
1233 return (fve);
1234 }
1235
1236 static inline void
1237 flowlist_move_to_head(struct flowvalve *fv, struct fve *fve)
1238 {
1239 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1240 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1241 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1242 }
1243 }
1244
1245 /*
1246 * allocate flowvalve structure
1247 */
1248 static struct flowvalve *
1249 fv_alloc(struct red *rp)
1250 {
1251 struct flowvalve *fv;
1252 struct fve *fve;
1253 int i, num;
1254
1255 num = FV_FLOWLISTSIZE;
1256 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
1257 if (fv == NULL)
1258 return (NULL);
1259
1260 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
1261 M_WAITOK|M_ZERO);
1262 if (fv->fv_fves == NULL) {
1263 free(fv, M_DEVBUF);
1264 return (NULL);
1265 }
1266
1267 fv->fv_flows = 0;
1268 TAILQ_INIT(&fv->fv_flowlist);
1269 for (i = 0; i < num; i++) {
1270 fve = &fv->fv_fves[i];
1271 fve->fve_lastdrop.tv_sec = 0;
1272 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1273 }
1274
1275 /* initialize drop rate threshold in scaled fixed-point */
1276 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1277
1278 /* initialize drop rate to fraction table */
1279 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
1280 if (fv->fv_p2ftab == NULL) {
1281 free(fv->fv_fves, M_DEVBUF);
1282 free(fv, M_DEVBUF);
1283 return (NULL);
1284 }
1285 /*
1286 * create the p2f table.
1287 * (shift is used to keep the precision)
1288 */
1289 for (i = 1; i < BRTT_SIZE; i++) {
1290 int f;
1291
1292 f = brtt_tab[i] << 8;
1293 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1294 }
1295
1296 return (fv);
1297 }
1298
1299 static void
1300 fv_destroy(struct flowvalve *fv)
1301 {
1302 free(fv->fv_p2ftab, M_DEVBUF);
1303 free(fv->fv_fves, M_DEVBUF);
1304 free(fv, M_DEVBUF);
1305 }
1306
1307 static inline int
1308 fv_p2f(struct flowvalve *fv, int p)
1309 {
1310 int val, f;
1311
1312 if (p >= BRTT_PMAX)
1313 f = fv->fv_p2ftab[BRTT_SIZE-1];
1314 else if ((val = (p & BRTT_MASK)))
1315 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1316 else
1317 f = fv->fv_p2ftab[1];
1318 return (f);
1319 }
1320
1321 /*
1322 * check if an arriving packet should be pre-dropped.
1323 * called from red_addq() when a packet arrives.
1324 * returns 1 when the packet should be pre-dropped.
1325 * should be called in splnet.
1326 */
1327 static int
1328 fv_checkflow(struct flowvalve *fv, struct altq_pktattr *pktattr,
1329 struct fve **fcache)
1330 {
1331 struct fve *fve;
1332 struct timeval now;
1333
1334 fv->fv_ifseq++;
1335 FV_TIMESTAMP(&now);
1336
1337 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1338 /* no matching entry in the flowlist */
1339 return (0);
1340
1341 *fcache = fve;
1342
1343 /* update fraction f for every FV_N packets */
1344 if (++fve->fve_count == FV_N) {
1345 /*
1346 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1347 */
1348 fve->fve_f =
1349 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1350 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1351 fve->fve_ifseq = fv->fv_ifseq;
1352 fve->fve_count = 0;
1353 }
1354
1355 /*
1356 * overpumping test
1357 */
1358 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1359 int fthresh;
1360
1361 /* calculate a threshold */
1362 fthresh = fv_p2f(fv, fve->fve_p);
1363 if (fve->fve_f > fthresh)
1364 fve->fve_state = Red;
1365 }
1366
1367 if (fve->fve_state == Red) {
1368 /*
1369 * backoff test
1370 */
1371 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1372 /* no drop for at least FV_BACKOFFTHRESH sec */
1373 fve->fve_p = 0;
1374 fve->fve_state = Green;
1375 #ifdef FV_STATS
1376 fv->fv_stats.escape++;
1377 #endif
1378 } else {
1379 /* block this flow */
1380 flowlist_move_to_head(fv, fve);
1381 fve->fve_lastdrop = now;
1382 #ifdef FV_STATS
1383 fv->fv_stats.predrop++;
1384 #endif
1385 return (1);
1386 }
1387 }
1388
1389 /*
1390 * p = (1 - Wp) * p
1391 */
1392 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1393 if (fve->fve_p < 0)
1394 fve->fve_p = 0;
1395 #ifdef FV_STATS
1396 fv->fv_stats.pass++;
1397 #endif
1398 return (0);
1399 }
1400
1401 /*
1402 * called from red_addq when a packet is dropped by red.
1403 * should be called in splnet.
1404 */
1405 static void
1406 fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *pktattr,
1407 struct fve *fcache)
1408 {
1409 struct fve *fve;
1410 struct timeval now;
1411
1412 if (pktattr == NULL)
1413 return;
1414 FV_TIMESTAMP(&now);
1415
1416 if (fcache != NULL)
1417 /* the fve of this packet is already cached */
1418 fve = fcache;
1419 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1420 fve = flowlist_reclaim(fv, pktattr);
1421
1422 flowlist_move_to_head(fv, fve);
1423
1424 /*
1425 * update p: the following line cancels the update
1426 * in fv_checkflow() and calculate
1427 * p = Wp + (1 - Wp) * p
1428 */
1429 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1430
1431 fve->fve_lastdrop = now;
1432 }
1433
1434 #endif /* ALTQ_FLOWVALVE */
1435
1436 #ifdef KLD_MODULE
1437
1438 static struct altqsw red_sw =
1439 {"red", redopen, redclose, redioctl};
1440
1441 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1442 MODULE_VERSION(altq_red, 1);
1443
1444 #endif /* KLD_MODULE */
1445 #endif /* ALTQ3_COMPAT */
1446
1447 #endif /* ALTQ_RED */
1448