altq_red.c revision 1.13.12.2 1 1.13.12.2 peter /* $NetBSD: altq_red.c,v 1.13.12.2 2006/03/18 18:18:53 peter Exp $ */
2 1.13.12.1 peter /* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */
3 1.1 thorpej
4 1.1 thorpej /*
5 1.13.12.1 peter * Copyright (C) 1997-2003
6 1.1 thorpej * Sony Computer Science Laboratories Inc. All rights reserved.
7 1.1 thorpej *
8 1.1 thorpej * Redistribution and use in source and binary forms, with or without
9 1.1 thorpej * modification, are permitted provided that the following conditions
10 1.1 thorpej * are met:
11 1.1 thorpej * 1. Redistributions of source code must retain the above copyright
12 1.1 thorpej * notice, this list of conditions and the following disclaimer.
13 1.1 thorpej * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 thorpej * notice, this list of conditions and the following disclaimer in the
15 1.1 thorpej * documentation and/or other materials provided with the distribution.
16 1.1 thorpej *
17 1.1 thorpej * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 1.1 thorpej * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 1.1 thorpej * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 1.1 thorpej * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 1.1 thorpej * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 1.1 thorpej * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 1.1 thorpej * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 1.1 thorpej * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 1.1 thorpej * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 1.1 thorpej * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 1.1 thorpej * SUCH DAMAGE.
28 1.1 thorpej *
29 1.1 thorpej */
30 1.1 thorpej /*
31 1.1 thorpej * Copyright (c) 1990-1994 Regents of the University of California.
32 1.1 thorpej * All rights reserved.
33 1.1 thorpej *
34 1.1 thorpej * Redistribution and use in source and binary forms, with or without
35 1.1 thorpej * modification, are permitted provided that the following conditions
36 1.1 thorpej * are met:
37 1.1 thorpej * 1. Redistributions of source code must retain the above copyright
38 1.1 thorpej * notice, this list of conditions and the following disclaimer.
39 1.1 thorpej * 2. Redistributions in binary form must reproduce the above copyright
40 1.1 thorpej * notice, this list of conditions and the following disclaimer in the
41 1.1 thorpej * documentation and/or other materials provided with the distribution.
42 1.1 thorpej * 3. All advertising materials mentioning features or use of this software
43 1.1 thorpej * must display the following acknowledgement:
44 1.1 thorpej * This product includes software developed by the Computer Systems
45 1.1 thorpej * Engineering Group at Lawrence Berkeley Laboratory.
46 1.1 thorpej * 4. Neither the name of the University nor of the Laboratory may be used
47 1.1 thorpej * to endorse or promote products derived from this software without
48 1.1 thorpej * specific prior written permission.
49 1.1 thorpej *
50 1.1 thorpej * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 1.1 thorpej * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 1.1 thorpej * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 1.1 thorpej * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 1.1 thorpej * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 1.1 thorpej * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 1.1 thorpej * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 1.1 thorpej * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 1.1 thorpej * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 1.1 thorpej * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 1.1 thorpej * SUCH DAMAGE.
61 1.1 thorpej */
62 1.5 lukem
63 1.5 lukem #include <sys/cdefs.h>
64 1.13.12.2 peter __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.13.12.2 2006/03/18 18:18:53 peter Exp $");
65 1.1 thorpej
66 1.13.12.1 peter #ifdef _KERNEL_OPT
67 1.1 thorpej #include "opt_altq.h"
68 1.1 thorpej #include "opt_inet.h"
69 1.1 thorpej #endif
70 1.13.12.1 peter
71 1.1 thorpej #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
72 1.1 thorpej
73 1.1 thorpej #include <sys/param.h>
74 1.1 thorpej #include <sys/malloc.h>
75 1.1 thorpej #include <sys/mbuf.h>
76 1.1 thorpej #include <sys/socket.h>
77 1.1 thorpej #include <sys/systm.h>
78 1.1 thorpej #include <sys/errno.h>
79 1.13.12.1 peter #if 1 /* ALTQ3_COMPAT */
80 1.13.12.1 peter #include <sys/sockio.h>
81 1.13.12.1 peter #include <sys/proc.h>
82 1.1 thorpej #include <sys/kernel.h>
83 1.1 thorpej #ifdef ALTQ_FLOWVALVE
84 1.1 thorpej #include <sys/queue.h>
85 1.1 thorpej #include <sys/time.h>
86 1.1 thorpej #endif
87 1.13.12.1 peter #endif /* ALTQ3_COMPAT */
88 1.1 thorpej
89 1.1 thorpej #include <net/if.h>
90 1.1 thorpej
91 1.1 thorpej #include <netinet/in.h>
92 1.1 thorpej #include <netinet/in_systm.h>
93 1.1 thorpej #include <netinet/ip.h>
94 1.1 thorpej #ifdef INET6
95 1.1 thorpej #include <netinet/ip6.h>
96 1.1 thorpej #endif
97 1.1 thorpej
98 1.13.12.1 peter #include <net/pfvar.h>
99 1.1 thorpej #include <altq/altq.h>
100 1.1 thorpej #include <altq/altq_red.h>
101 1.13.12.1 peter #ifdef ALTQ3_COMPAT
102 1.13.12.1 peter #include <altq/altq_conf.h>
103 1.1 thorpej #ifdef ALTQ_FLOWVALVE
104 1.1 thorpej #include <altq/altq_flowvalve.h>
105 1.1 thorpej #endif
106 1.13.12.1 peter #endif
107 1.1 thorpej
108 1.1 thorpej /*
109 1.1 thorpej * ALTQ/RED (Random Early Detection) implementation using 32-bit
110 1.1 thorpej * fixed-point calculation.
111 1.1 thorpej *
112 1.1 thorpej * written by kjc using the ns code as a reference.
113 1.1 thorpej * you can learn more about red and ns from Sally's home page at
114 1.1 thorpej * http://www-nrg.ee.lbl.gov/floyd/
115 1.1 thorpej *
116 1.1 thorpej * most of the red parameter values are fixed in this implementation
117 1.1 thorpej * to prevent fixed-point overflow/underflow.
118 1.1 thorpej * if you change the parameters, watch out for overflow/underflow!
119 1.1 thorpej *
120 1.1 thorpej * the parameters used are recommended values by Sally.
121 1.1 thorpej * the corresponding ns config looks:
122 1.1 thorpej * q_weight=0.00195
123 1.1 thorpej * minthresh=5 maxthresh=15 queue-size=60
124 1.1 thorpej * linterm=30
125 1.1 thorpej * dropmech=drop-tail
126 1.1 thorpej * bytes=false (can't be handled by 32-bit fixed-point)
127 1.11 perry * doubleq=false dqthresh=false
128 1.1 thorpej * wait=true
129 1.1 thorpej */
130 1.1 thorpej /*
131 1.1 thorpej * alternative red parameters for a slow link.
132 1.1 thorpej *
133 1.1 thorpej * assume the queue length becomes from zero to L and keeps L, it takes
134 1.1 thorpej * N packets for q_avg to reach 63% of L.
135 1.1 thorpej * when q_weight is 0.002, N is about 500 packets.
136 1.1 thorpej * for a slow link like dial-up, 500 packets takes more than 1 minute!
137 1.1 thorpej * when q_weight is 0.008, N is about 127 packets.
138 1.1 thorpej * when q_weight is 0.016, N is about 63 packets.
139 1.13.12.1 peter * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
140 1.1 thorpej * are allowed for 0.016.
141 1.1 thorpej * see Sally's paper for more details.
142 1.1 thorpej */
143 1.1 thorpej /* normal red parameters */
144 1.1 thorpej #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
145 1.1 thorpej /* q_weight = 0.00195 */
146 1.1 thorpej
147 1.1 thorpej /* red parameters for a slow link */
148 1.1 thorpej #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
149 1.1 thorpej /* q_weight = 0.0078125 */
150 1.1 thorpej
151 1.1 thorpej /* red parameters for a very slow link (e.g., dialup) */
152 1.1 thorpej #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
153 1.1 thorpej /* q_weight = 0.015625 */
154 1.1 thorpej
155 1.1 thorpej /* fixed-point uses 12-bit decimal places */
156 1.1 thorpej #define FP_SHIFT 12 /* fixed-point shift */
157 1.1 thorpej
158 1.1 thorpej /* red parameters for drop probability */
159 1.1 thorpej #define INV_P_MAX 10 /* inverse of max drop probability */
160 1.1 thorpej #define TH_MIN 5 /* min threshold */
161 1.1 thorpej #define TH_MAX 15 /* max threshold */
162 1.1 thorpej
163 1.13.12.1 peter #define RED_LIMIT 60 /* default max queue lenght */
164 1.13.12.1 peter #define RED_STATS /* collect statistics */
165 1.1 thorpej
166 1.1 thorpej /*
167 1.1 thorpej * our default policy for forced-drop is drop-tail.
168 1.1 thorpej * (in altq-1.1.2 or earlier, the default was random-drop.
169 1.1 thorpej * but it makes more sense to punish the cause of the surge.)
170 1.1 thorpej * to switch to the random-drop policy, define "RED_RANDOM_DROP".
171 1.1 thorpej */
172 1.1 thorpej
173 1.13.12.1 peter #ifdef ALTQ3_COMPAT
174 1.1 thorpej #ifdef ALTQ_FLOWVALVE
175 1.1 thorpej /*
176 1.13.12.1 peter * flow-valve is an extention to protect red from unresponsive flows
177 1.1 thorpej * and to promote end-to-end congestion control.
178 1.1 thorpej * flow-valve observes the average drop rates of the flows that have
179 1.1 thorpej * experienced packet drops in the recent past.
180 1.1 thorpej * when the average drop rate exceeds the threshold, the flow is
181 1.1 thorpej * blocked by the flow-valve. the trapped flow should back off
182 1.1 thorpej * exponentially to escape from the flow-valve.
183 1.1 thorpej */
184 1.1 thorpej #ifdef RED_RANDOM_DROP
185 1.1 thorpej #error "random-drop can't be used with flow-valve!"
186 1.1 thorpej #endif
187 1.1 thorpej #endif /* ALTQ_FLOWVALVE */
188 1.1 thorpej
189 1.1 thorpej /* red_list keeps all red_queue_t's allocated. */
190 1.1 thorpej static red_queue_t *red_list = NULL;
191 1.1 thorpej
192 1.13.12.1 peter #endif /* ALTQ3_COMPAT */
193 1.13.12.1 peter
194 1.1 thorpej /* default red parameter values */
195 1.1 thorpej static int default_th_min = TH_MIN;
196 1.1 thorpej static int default_th_max = TH_MAX;
197 1.1 thorpej static int default_inv_pmax = INV_P_MAX;
198 1.1 thorpej
199 1.13.12.1 peter #ifdef ALTQ3_COMPAT
200 1.1 thorpej /* internal function prototypes */
201 1.13.12.1 peter static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
202 1.13.12.1 peter static struct mbuf *red_dequeue(struct ifaltq *, int);
203 1.13.12.1 peter static int red_request(struct ifaltq *, int, void *);
204 1.13.12.1 peter static void red_purgeq(red_queue_t *);
205 1.13.12.1 peter static int red_detach(red_queue_t *);
206 1.1 thorpej #ifdef ALTQ_FLOWVALVE
207 1.13.12.1 peter static inline struct fve *flowlist_lookup(struct flowvalve *,
208 1.13.12.1 peter struct altq_pktattr *, struct timeval *);
209 1.13.12.1 peter static inline struct fve *flowlist_reclaim(struct flowvalve *,
210 1.13.12.1 peter struct altq_pktattr *);
211 1.13.12.1 peter static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
212 1.13.12.1 peter static inline int fv_p2f(struct flowvalve *, int);
213 1.13.12.1 peter static struct flowvalve *fv_alloc(struct red *);
214 1.13.12.1 peter static void fv_destroy(struct flowvalve *);
215 1.13.12.1 peter static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
216 1.13.12.1 peter struct fve **);
217 1.13.12.1 peter static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
218 1.13.12.1 peter struct fve *);
219 1.1 thorpej #endif
220 1.13.12.1 peter #endif /* ALTQ3_COMPAT */
221 1.1 thorpej
222 1.1 thorpej /*
223 1.13.12.1 peter * red support routines
224 1.1 thorpej */
225 1.13.12.1 peter red_t *
226 1.13.12.1 peter red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
227 1.13.12.1 peter int pkttime)
228 1.1 thorpej {
229 1.13.12.1 peter red_t *rp;
230 1.13.12.1 peter int w, i;
231 1.13.12.1 peter int npkts_per_sec;
232 1.1 thorpej
233 1.13.12.1 peter MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
234 1.13.12.1 peter if (rp == NULL)
235 1.13.12.1 peter return (NULL);
236 1.13.12.1 peter (void)memset(rp, 0, sizeof(red_t));
237 1.1 thorpej
238 1.13.12.1 peter rp->red_avg = 0;
239 1.13.12.1 peter rp->red_idle = 1;
240 1.1 thorpej
241 1.13.12.1 peter if (weight == 0)
242 1.13.12.1 peter rp->red_weight = W_WEIGHT;
243 1.13.12.1 peter else
244 1.13.12.1 peter rp->red_weight = weight;
245 1.13.12.1 peter if (inv_pmax == 0)
246 1.13.12.1 peter rp->red_inv_pmax = default_inv_pmax;
247 1.13.12.1 peter else
248 1.13.12.1 peter rp->red_inv_pmax = inv_pmax;
249 1.13.12.1 peter if (th_min == 0)
250 1.13.12.1 peter rp->red_thmin = default_th_min;
251 1.13.12.1 peter else
252 1.13.12.1 peter rp->red_thmin = th_min;
253 1.13.12.1 peter if (th_max == 0)
254 1.13.12.1 peter rp->red_thmax = default_th_max;
255 1.13.12.1 peter else
256 1.13.12.1 peter rp->red_thmax = th_max;
257 1.1 thorpej
258 1.13.12.1 peter rp->red_flags = flags;
259 1.11 perry
260 1.13.12.1 peter if (pkttime == 0)
261 1.13.12.1 peter /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
262 1.13.12.1 peter rp->red_pkttime = 800;
263 1.13.12.1 peter else
264 1.13.12.1 peter rp->red_pkttime = pkttime;
265 1.1 thorpej
266 1.13.12.1 peter if (weight == 0) {
267 1.13.12.1 peter /* when the link is very slow, adjust red parameters */
268 1.13.12.1 peter npkts_per_sec = 1000000 / rp->red_pkttime;
269 1.13.12.1 peter if (npkts_per_sec < 50) {
270 1.13.12.1 peter /* up to about 400Kbps */
271 1.13.12.1 peter rp->red_weight = W_WEIGHT_2;
272 1.13.12.1 peter } else if (npkts_per_sec < 300) {
273 1.13.12.1 peter /* up to about 2.4Mbps */
274 1.13.12.1 peter rp->red_weight = W_WEIGHT_1;
275 1.1 thorpej }
276 1.13.12.1 peter }
277 1.1 thorpej
278 1.13.12.1 peter /* calculate wshift. weight must be power of 2 */
279 1.13.12.1 peter w = rp->red_weight;
280 1.13.12.1 peter for (i = 0; w > 1; i++)
281 1.13.12.1 peter w = w >> 1;
282 1.13.12.1 peter rp->red_wshift = i;
283 1.13.12.1 peter w = 1 << rp->red_wshift;
284 1.13.12.1 peter if (w != rp->red_weight) {
285 1.13.12.1 peter printf("invalid weight value %d for red! use %d\n",
286 1.13.12.1 peter rp->red_weight, w);
287 1.13.12.1 peter rp->red_weight = w;
288 1.13.12.1 peter }
289 1.1 thorpej
290 1.13.12.1 peter /*
291 1.13.12.1 peter * thmin_s and thmax_s are scaled versions of th_min and th_max
292 1.13.12.1 peter * to be compared with avg.
293 1.13.12.1 peter */
294 1.13.12.1 peter rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
295 1.13.12.1 peter rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
296 1.1 thorpej
297 1.13.12.1 peter /*
298 1.13.12.1 peter * precompute probability denominator
299 1.13.12.1 peter * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
300 1.13.12.1 peter */
301 1.13.12.1 peter rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
302 1.13.12.1 peter * rp->red_inv_pmax) << FP_SHIFT;
303 1.1 thorpej
304 1.13.12.1 peter /* allocate weight table */
305 1.13.12.1 peter rp->red_wtab = wtab_alloc(rp->red_weight);
306 1.1 thorpej
307 1.13.12.1 peter microtime(&rp->red_last);
308 1.13.12.2 peter #ifdef ALTQ3_COMPAT
309 1.13.12.1 peter #ifdef ALTQ_FLOWVALVE
310 1.13.12.1 peter if (flags & REDF_FLOWVALVE)
311 1.13.12.1 peter rp->red_flowvalve = fv_alloc(rp);
312 1.13.12.1 peter /* if fv_alloc failes, flowvalve is just disabled */
313 1.13.12.1 peter #endif
314 1.13.12.2 peter #endif /* ALTQ3_COMPAT */
315 1.13.12.1 peter return (rp);
316 1.13.12.1 peter }
317 1.1 thorpej
318 1.13.12.1 peter void
319 1.13.12.1 peter red_destroy(red_t *rp)
320 1.13.12.1 peter {
321 1.13.12.1 peter #ifdef ALTQ3_COMPAT
322 1.13.12.1 peter #ifdef ALTQ_FLOWVALVE
323 1.13.12.1 peter if (rp->red_flowvalve != NULL)
324 1.13.12.1 peter fv_destroy(rp->red_flowvalve);
325 1.13.12.1 peter #endif
326 1.13.12.1 peter #endif /* ALTQ3_COMPAT */
327 1.13.12.1 peter wtab_destroy(rp->red_wtab);
328 1.13.12.1 peter FREE(rp, M_DEVBUF);
329 1.13.12.1 peter }
330 1.1 thorpej
331 1.13.12.1 peter void
332 1.13.12.1 peter red_getstats(red_t *rp, struct redstats *sp)
333 1.13.12.1 peter {
334 1.13.12.1 peter sp->q_avg = rp->red_avg >> rp->red_wshift;
335 1.13.12.1 peter sp->xmit_cnt = rp->red_stats.xmit_cnt;
336 1.13.12.1 peter sp->drop_cnt = rp->red_stats.drop_cnt;
337 1.13.12.1 peter sp->drop_forced = rp->red_stats.drop_forced;
338 1.13.12.1 peter sp->drop_unforced = rp->red_stats.drop_unforced;
339 1.13.12.1 peter sp->marked_packets = rp->red_stats.marked_packets;
340 1.13.12.1 peter }
341 1.1 thorpej
342 1.13.12.1 peter int
343 1.13.12.1 peter red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
344 1.13.12.1 peter struct altq_pktattr *pktattr)
345 1.13.12.1 peter {
346 1.13.12.1 peter int avg, droptype;
347 1.13.12.1 peter int n;
348 1.13.12.1 peter #ifdef ALTQ3_COMPAT
349 1.13.12.1 peter #ifdef ALTQ_FLOWVALVE
350 1.13.12.1 peter struct fve *fve = NULL;
351 1.1 thorpej
352 1.13.12.1 peter if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
353 1.13.12.1 peter if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
354 1.13.12.1 peter m_freem(m);
355 1.13.12.1 peter return (-1);
356 1.1 thorpej }
357 1.13.12.1 peter #endif
358 1.13.12.1 peter #endif /* ALTQ3_COMPAT */
359 1.1 thorpej
360 1.13.12.1 peter avg = rp->red_avg;
361 1.1 thorpej
362 1.13.12.1 peter /*
363 1.13.12.1 peter * if we were idle, we pretend that n packets arrived during
364 1.13.12.1 peter * the idle period.
365 1.13.12.1 peter */
366 1.13.12.1 peter if (rp->red_idle) {
367 1.13.12.1 peter struct timeval now;
368 1.13.12.1 peter int t;
369 1.1 thorpej
370 1.13.12.1 peter rp->red_idle = 0;
371 1.13.12.1 peter microtime(&now);
372 1.13.12.1 peter t = (now.tv_sec - rp->red_last.tv_sec);
373 1.13.12.1 peter if (t > 60) {
374 1.13.12.1 peter /*
375 1.13.12.1 peter * being idle for more than 1 minute, set avg to zero.
376 1.13.12.1 peter * this prevents t from overflow.
377 1.13.12.1 peter */
378 1.13.12.1 peter avg = 0;
379 1.13.12.1 peter } else {
380 1.13.12.1 peter t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
381 1.13.12.1 peter n = t / rp->red_pkttime - 1;
382 1.1 thorpej
383 1.13.12.1 peter /* the following line does (avg = (1 - Wq)^n * avg) */
384 1.13.12.1 peter if (n > 0)
385 1.13.12.1 peter avg = (avg >> FP_SHIFT) *
386 1.13.12.1 peter pow_w(rp->red_wtab, n);
387 1.13.12.1 peter }
388 1.13.12.1 peter }
389 1.1 thorpej
390 1.13.12.1 peter /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
391 1.13.12.1 peter avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
392 1.13.12.1 peter rp->red_avg = avg; /* save the new value */
393 1.1 thorpej
394 1.13.12.1 peter /*
395 1.13.12.1 peter * red_count keeps a tally of arriving traffic that has not
396 1.13.12.1 peter * been dropped.
397 1.13.12.1 peter */
398 1.13.12.1 peter rp->red_count++;
399 1.1 thorpej
400 1.13.12.1 peter /* see if we drop early */
401 1.13.12.1 peter droptype = DTYPE_NODROP;
402 1.13.12.1 peter if (avg >= rp->red_thmin_s && qlen(q) > 1) {
403 1.13.12.1 peter if (avg >= rp->red_thmax_s) {
404 1.13.12.1 peter /* avg >= th_max: forced drop */
405 1.13.12.1 peter droptype = DTYPE_FORCED;
406 1.13.12.1 peter } else if (rp->red_old == 0) {
407 1.13.12.1 peter /* first exceeds th_min */
408 1.13.12.1 peter rp->red_count = 1;
409 1.13.12.1 peter rp->red_old = 1;
410 1.13.12.1 peter } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
411 1.13.12.1 peter rp->red_probd, rp->red_count)) {
412 1.13.12.1 peter /* mark or drop by red */
413 1.13.12.1 peter if ((rp->red_flags & REDF_ECN) &&
414 1.13.12.1 peter mark_ecn(m, pktattr, rp->red_flags)) {
415 1.13.12.1 peter /* successfully marked. do not drop. */
416 1.13.12.1 peter rp->red_count = 0;
417 1.13.12.1 peter #ifdef RED_STATS
418 1.13.12.1 peter rp->red_stats.marked_packets++;
419 1.13.12.1 peter #endif
420 1.13.12.1 peter } else {
421 1.13.12.1 peter /* unforced drop by red */
422 1.13.12.1 peter droptype = DTYPE_EARLY;
423 1.1 thorpej }
424 1.13.12.1 peter }
425 1.13.12.1 peter } else {
426 1.13.12.1 peter /* avg < th_min */
427 1.13.12.1 peter rp->red_old = 0;
428 1.13.12.1 peter }
429 1.1 thorpej
430 1.13.12.1 peter /*
431 1.13.12.1 peter * if the queue length hits the hard limit, it's a forced drop.
432 1.13.12.1 peter */
433 1.13.12.1 peter if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
434 1.13.12.1 peter droptype = DTYPE_FORCED;
435 1.11 perry
436 1.13.12.1 peter #ifdef RED_RANDOM_DROP
437 1.13.12.1 peter /* if successful or forced drop, enqueue this packet. */
438 1.13.12.1 peter if (droptype != DTYPE_EARLY)
439 1.13.12.1 peter _addq(q, m);
440 1.13.12.1 peter #else
441 1.13.12.1 peter /* if successful, enqueue this packet. */
442 1.13.12.1 peter if (droptype == DTYPE_NODROP)
443 1.13.12.1 peter _addq(q, m);
444 1.13.12.1 peter #endif
445 1.13.12.1 peter if (droptype != DTYPE_NODROP) {
446 1.13.12.1 peter if (droptype == DTYPE_EARLY) {
447 1.13.12.1 peter /* drop the incoming packet */
448 1.13.12.1 peter #ifdef RED_STATS
449 1.13.12.1 peter rp->red_stats.drop_unforced++;
450 1.13.12.1 peter #endif
451 1.13.12.1 peter } else {
452 1.13.12.1 peter /* forced drop, select a victim packet in the queue. */
453 1.13.12.1 peter #ifdef RED_RANDOM_DROP
454 1.13.12.1 peter m = _getq_random(q);
455 1.13.12.1 peter #endif
456 1.13.12.1 peter #ifdef RED_STATS
457 1.13.12.1 peter rp->red_stats.drop_forced++;
458 1.13.12.1 peter #endif
459 1.13.12.1 peter }
460 1.13.12.1 peter #ifdef RED_STATS
461 1.13.12.1 peter PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
462 1.13.12.1 peter #endif
463 1.13.12.1 peter rp->red_count = 0;
464 1.13.12.1 peter #ifdef ALTQ3_COMPAT
465 1.13.12.1 peter #ifdef ALTQ_FLOWVALVE
466 1.13.12.1 peter if (rp->red_flowvalve != NULL)
467 1.13.12.1 peter fv_dropbyred(rp->red_flowvalve, pktattr, fve);
468 1.13.12.1 peter #endif
469 1.13.12.1 peter #endif /* ALTQ3_COMPAT */
470 1.13.12.1 peter m_freem(m);
471 1.13.12.1 peter return (-1);
472 1.1 thorpej }
473 1.13.12.1 peter /* successfully queued */
474 1.13.12.1 peter #ifdef RED_STATS
475 1.13.12.1 peter PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
476 1.13.12.1 peter #endif
477 1.13.12.1 peter return (0);
478 1.1 thorpej }
479 1.1 thorpej
480 1.13.12.1 peter /*
481 1.13.12.1 peter * early-drop probability is calculated as follows:
482 1.13.12.1 peter * prob = p_max * (avg - th_min) / (th_max - th_min)
483 1.13.12.1 peter * prob_a = prob / (2 - count*prob)
484 1.13.12.1 peter * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
485 1.13.12.1 peter * here prob_a increases as successive undrop count increases.
486 1.13.12.1 peter * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
487 1.13.12.1 peter * becomes 1 when (count >= (2 / prob))).
488 1.13.12.1 peter */
489 1.13.12.1 peter int
490 1.13.12.1 peter drop_early(int fp_len, int fp_probd, int count)
491 1.1 thorpej {
492 1.13.12.1 peter int d; /* denominator of drop-probability */
493 1.1 thorpej
494 1.13.12.1 peter d = fp_probd - count * fp_len;
495 1.13.12.1 peter if (d <= 0)
496 1.13.12.1 peter /* count exceeds the hard limit: drop or mark */
497 1.13.12.1 peter return (1);
498 1.1 thorpej
499 1.13.12.1 peter /*
500 1.13.12.1 peter * now the range of d is [1..600] in fixed-point. (when
501 1.13.12.1 peter * th_max-th_min=10 and p_max=1/30)
502 1.13.12.1 peter * drop probability = (avg - TH_MIN) / d
503 1.13.12.1 peter */
504 1.1 thorpej
505 1.13.12.1 peter if ((arc4random() % d) < fp_len) {
506 1.13.12.1 peter /* drop or mark */
507 1.13.12.1 peter return (1);
508 1.1 thorpej }
509 1.13.12.1 peter /* no drop/mark */
510 1.13.12.1 peter return (0);
511 1.1 thorpej }
512 1.1 thorpej
513 1.1 thorpej /*
514 1.13.12.1 peter * try to mark CE bit to the packet.
515 1.13.12.1 peter * returns 1 if successfully marked, 0 otherwise.
516 1.1 thorpej */
517 1.13.12.1 peter int
518 1.13.12.1 peter mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
519 1.13.12.1 peter {
520 1.13.12.1 peter struct mbuf *m0;
521 1.13.12.1 peter struct m_tag *t;
522 1.13.12.1 peter struct altq_tag *at;
523 1.13.12.1 peter void *hdr;
524 1.13.12.1 peter int af;
525 1.13.12.1 peter
526 1.13.12.1 peter t = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
527 1.13.12.1 peter if (t != NULL) {
528 1.13.12.1 peter at = (struct altq_tag *)(t + 1);
529 1.13.12.1 peter if (at == NULL)
530 1.13.12.1 peter return (0);
531 1.13.12.1 peter af = at->af;
532 1.13.12.1 peter hdr = at->hdr;
533 1.13.12.1 peter #ifdef ALTQ3_COMPAT
534 1.13.12.1 peter } else if (pktattr != NULL) {
535 1.13.12.1 peter af = pktattr->pattr_af;
536 1.13.12.1 peter hdr = pktattr->pattr_hdr;
537 1.13.12.1 peter #endif /* ALTQ3_COMPAT */
538 1.13.12.1 peter } else
539 1.13.12.1 peter return (0);
540 1.1 thorpej
541 1.13.12.1 peter if (af != AF_INET && af != AF_INET6)
542 1.13.12.1 peter return (0);
543 1.11 perry
544 1.13.12.1 peter /* verify that pattr_hdr is within the mbuf data */
545 1.13.12.1 peter for (m0 = m; m0 != NULL; m0 = m0->m_next)
546 1.13.12.1 peter if (((caddr_t)hdr >= m0->m_data) &&
547 1.13.12.1 peter ((caddr_t)hdr < m0->m_data + m0->m_len))
548 1.13.12.1 peter break;
549 1.13.12.1 peter if (m0 == NULL) {
550 1.13.12.1 peter /* ick, tag info is stale */
551 1.13.12.1 peter return (0);
552 1.13.12.1 peter }
553 1.1 thorpej
554 1.13.12.1 peter switch (af) {
555 1.13.12.1 peter case AF_INET:
556 1.13.12.1 peter if (flags & REDF_ECN4) {
557 1.13.12.1 peter struct ip *ip = hdr;
558 1.13.12.1 peter u_int8_t otos;
559 1.13.12.1 peter int sum;
560 1.1 thorpej
561 1.13.12.1 peter if (ip->ip_v != 4)
562 1.13.12.1 peter return (0); /* version mismatch! */
563 1.1 thorpej
564 1.13.12.1 peter if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
565 1.13.12.1 peter return (0); /* not-ECT */
566 1.13.12.1 peter if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
567 1.13.12.1 peter return (1); /* already marked */
568 1.1 thorpej
569 1.13.12.1 peter /*
570 1.13.12.1 peter * ecn-capable but not marked,
571 1.13.12.1 peter * mark CE and update checksum
572 1.13.12.1 peter */
573 1.13.12.1 peter otos = ip->ip_tos;
574 1.13.12.1 peter ip->ip_tos |= IPTOS_ECN_CE;
575 1.13.12.1 peter /*
576 1.13.12.1 peter * update checksum (from RFC1624)
577 1.13.12.1 peter * HC' = ~(~HC + ~m + m')
578 1.13.12.1 peter */
579 1.13.12.1 peter sum = ~ntohs(ip->ip_sum) & 0xffff;
580 1.13.12.1 peter sum += (~otos & 0xffff) + ip->ip_tos;
581 1.13.12.1 peter sum = (sum >> 16) + (sum & 0xffff);
582 1.13.12.1 peter sum += (sum >> 16); /* add carry */
583 1.13.12.1 peter ip->ip_sum = htons(~sum & 0xffff);
584 1.13.12.1 peter return (1);
585 1.1 thorpej }
586 1.13.12.1 peter break;
587 1.13.12.1 peter #ifdef INET6
588 1.13.12.1 peter case AF_INET6:
589 1.13.12.1 peter if (flags & REDF_ECN6) {
590 1.13.12.1 peter struct ip6_hdr *ip6 = hdr;
591 1.13.12.1 peter u_int32_t flowlabel;
592 1.1 thorpej
593 1.13.12.1 peter flowlabel = ntohl(ip6->ip6_flow);
594 1.13.12.1 peter if ((flowlabel >> 28) != 6)
595 1.13.12.1 peter return (0); /* version mismatch! */
596 1.13.12.1 peter if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
597 1.13.12.1 peter (IPTOS_ECN_NOTECT << 20))
598 1.13.12.1 peter return (0); /* not-ECT */
599 1.13.12.1 peter if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
600 1.13.12.1 peter (IPTOS_ECN_CE << 20))
601 1.13.12.1 peter return (1); /* already marked */
602 1.13.12.1 peter /*
603 1.13.12.1 peter * ecn-capable but not marked, mark CE
604 1.13.12.1 peter */
605 1.13.12.1 peter flowlabel |= (IPTOS_ECN_CE << 20);
606 1.13.12.1 peter ip6->ip6_flow = htonl(flowlabel);
607 1.13.12.1 peter return (1);
608 1.13.12.1 peter }
609 1.13.12.1 peter break;
610 1.13.12.1 peter #endif /* INET6 */
611 1.1 thorpej }
612 1.11 perry
613 1.13.12.1 peter /* not marked */
614 1.13.12.1 peter return (0);
615 1.1 thorpej }
616 1.1 thorpej
617 1.13.12.1 peter struct mbuf *
618 1.13.12.1 peter red_getq(rp, q)
619 1.1 thorpej red_t *rp;
620 1.13.12.1 peter class_queue_t *q;
621 1.1 thorpej {
622 1.13.12.1 peter struct mbuf *m;
623 1.1 thorpej
624 1.13.12.1 peter if ((m = _getq(q)) == NULL) {
625 1.13.12.1 peter if (rp->red_idle == 0) {
626 1.13.12.1 peter rp->red_idle = 1;
627 1.13.12.1 peter microtime(&rp->red_last);
628 1.13.12.1 peter }
629 1.13.12.1 peter return NULL;
630 1.13.12.1 peter }
631 1.13.12.1 peter
632 1.13.12.1 peter rp->red_idle = 0;
633 1.13.12.1 peter return (m);
634 1.1 thorpej }
635 1.1 thorpej
636 1.1 thorpej /*
637 1.13.12.1 peter * helper routine to calibrate avg during idle.
638 1.13.12.1 peter * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
639 1.13.12.1 peter * here Wq = 1/weight and the code assumes Wq is close to zero.
640 1.1 thorpej *
641 1.13.12.1 peter * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
642 1.1 thorpej */
643 1.13.12.1 peter static struct wtab *wtab_list = NULL; /* pointer to wtab list */
644 1.13.12.1 peter
645 1.13.12.1 peter struct wtab *
646 1.13.12.1 peter wtab_alloc(int weight)
647 1.1 thorpej {
648 1.13.12.1 peter struct wtab *w;
649 1.13.12.1 peter int i;
650 1.1 thorpej
651 1.13.12.1 peter for (w = wtab_list; w != NULL; w = w->w_next)
652 1.13.12.1 peter if (w->w_weight == weight) {
653 1.13.12.1 peter w->w_refcount++;
654 1.13.12.1 peter return (w);
655 1.13.12.1 peter }
656 1.13.12.1 peter
657 1.13.12.1 peter MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
658 1.13.12.1 peter if (w == NULL)
659 1.13.12.1 peter panic("wtab_alloc: malloc failed!");
660 1.13.12.1 peter (void)memset(w, 0, sizeof(struct wtab));
661 1.13.12.1 peter w->w_weight = weight;
662 1.13.12.1 peter w->w_refcount = 1;
663 1.13.12.1 peter w->w_next = wtab_list;
664 1.13.12.1 peter wtab_list = w;
665 1.13.12.1 peter
666 1.13.12.1 peter /* initialize the weight table */
667 1.13.12.1 peter w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
668 1.13.12.1 peter for (i = 1; i < 32; i++) {
669 1.13.12.1 peter w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
670 1.13.12.1 peter if (w->w_tab[i] == 0 && w->w_param_max == 0)
671 1.13.12.1 peter w->w_param_max = 1 << i;
672 1.13.12.1 peter }
673 1.13.12.1 peter
674 1.13.12.1 peter return (w);
675 1.1 thorpej }
676 1.1 thorpej
677 1.1 thorpej int
678 1.13.12.1 peter wtab_destroy(struct wtab *w)
679 1.1 thorpej {
680 1.13.12.1 peter struct wtab *prev;
681 1.1 thorpej
682 1.13.12.1 peter if (--w->w_refcount > 0)
683 1.13.12.1 peter return (0);
684 1.13.12.1 peter
685 1.13.12.1 peter if (wtab_list == w)
686 1.13.12.1 peter wtab_list = w->w_next;
687 1.13.12.1 peter else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
688 1.13.12.1 peter if (prev->w_next == w) {
689 1.13.12.1 peter prev->w_next = w->w_next;
690 1.13.12.1 peter break;
691 1.1 thorpej }
692 1.1 thorpej
693 1.13.12.1 peter FREE(w, M_DEVBUF);
694 1.13.12.1 peter return (0);
695 1.13.12.1 peter }
696 1.1 thorpej
697 1.13.12.1 peter int32_t
698 1.13.12.1 peter pow_w(struct wtab *w, int n)
699 1.13.12.1 peter {
700 1.13.12.1 peter int i, bit;
701 1.13.12.1 peter int32_t val;
702 1.1 thorpej
703 1.13.12.1 peter if (n >= w->w_param_max)
704 1.13.12.1 peter return (0);
705 1.1 thorpej
706 1.13.12.1 peter val = 1 << FP_SHIFT;
707 1.13.12.1 peter if (n <= 0)
708 1.13.12.1 peter return (val);
709 1.13.12.1 peter
710 1.13.12.1 peter bit = 1;
711 1.13.12.1 peter i = 0;
712 1.13.12.1 peter while (n) {
713 1.13.12.1 peter if (n & bit) {
714 1.13.12.1 peter val = (val * w->w_tab[i]) >> FP_SHIFT;
715 1.13.12.1 peter n &= ~bit;
716 1.1 thorpej }
717 1.13.12.1 peter i++;
718 1.13.12.1 peter bit <<= 1;
719 1.1 thorpej }
720 1.13.12.1 peter return (val);
721 1.13.12.1 peter }
722 1.1 thorpej
723 1.13.12.1 peter #ifdef ALTQ3_COMPAT
724 1.13.12.1 peter /*
725 1.13.12.1 peter * red device interface
726 1.13.12.1 peter */
727 1.13.12.1 peter altqdev_decl(red);
728 1.1 thorpej
729 1.13.12.1 peter int
730 1.13.12.1 peter redopen(dev, flag, fmt, l)
731 1.13.12.1 peter dev_t dev;
732 1.13.12.1 peter int flag, fmt;
733 1.13.12.1 peter struct lwp *l;
734 1.13.12.1 peter {
735 1.13.12.1 peter /* everything will be done when the queueing scheme is attached. */
736 1.13.12.1 peter return 0;
737 1.13.12.1 peter }
738 1.13.12.1 peter
739 1.13.12.1 peter int
740 1.13.12.1 peter redclose(dev, flag, fmt, l)
741 1.13.12.1 peter dev_t dev;
742 1.13.12.1 peter int flag, fmt;
743 1.13.12.1 peter struct lwp *l;
744 1.13.12.1 peter {
745 1.13.12.1 peter red_queue_t *rqp;
746 1.13.12.1 peter int err, error = 0;
747 1.13.12.1 peter
748 1.13.12.1 peter while ((rqp = red_list) != NULL) {
749 1.13.12.1 peter /* destroy all */
750 1.13.12.1 peter err = red_detach(rqp);
751 1.13.12.1 peter if (err != 0 && error == 0)
752 1.13.12.1 peter error = err;
753 1.1 thorpej }
754 1.1 thorpej
755 1.13.12.1 peter return error;
756 1.13.12.1 peter }
757 1.1 thorpej
758 1.13.12.1 peter int
759 1.13.12.1 peter redioctl(dev, cmd, addr, flag, l)
760 1.13.12.1 peter dev_t dev;
761 1.13.12.1 peter ioctlcmd_t cmd;
762 1.13.12.1 peter caddr_t addr;
763 1.13.12.1 peter int flag;
764 1.13.12.1 peter struct lwp *l;
765 1.13.12.1 peter {
766 1.13.12.1 peter red_queue_t *rqp;
767 1.13.12.1 peter struct red_interface *ifacep;
768 1.13.12.1 peter struct ifnet *ifp;
769 1.13.12.1 peter struct proc *p = l->l_proc;
770 1.13.12.1 peter int error = 0;
771 1.13.12.1 peter
772 1.13.12.1 peter /* check super-user privilege */
773 1.13.12.1 peter switch (cmd) {
774 1.13.12.1 peter case RED_GETSTATS:
775 1.13.12.1 peter break;
776 1.13.12.1 peter default:
777 1.13.12.1 peter #if (__FreeBSD_version > 400000)
778 1.13.12.1 peter if ((error = suser(p)) != 0)
779 1.1 thorpej #else
780 1.13.12.1 peter if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
781 1.1 thorpej #endif
782 1.13.12.1 peter return (error);
783 1.13.12.1 peter break;
784 1.13.12.1 peter }
785 1.13.12.1 peter
786 1.13.12.1 peter switch (cmd) {
787 1.13.12.1 peter
788 1.13.12.1 peter case RED_ENABLE:
789 1.13.12.1 peter ifacep = (struct red_interface *)addr;
790 1.13.12.1 peter if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
791 1.13.12.1 peter error = EBADF;
792 1.13.12.1 peter break;
793 1.1 thorpej }
794 1.13.12.1 peter error = altq_enable(rqp->rq_ifq);
795 1.13.12.1 peter break;
796 1.13.12.1 peter
797 1.13.12.1 peter case RED_DISABLE:
798 1.13.12.1 peter ifacep = (struct red_interface *)addr;
799 1.13.12.1 peter if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
800 1.13.12.1 peter error = EBADF;
801 1.13.12.1 peter break;
802 1.13.12.1 peter }
803 1.13.12.1 peter error = altq_disable(rqp->rq_ifq);
804 1.13.12.1 peter break;
805 1.13.12.1 peter
806 1.13.12.1 peter case RED_IF_ATTACH:
807 1.13.12.1 peter ifp = ifunit(((struct red_interface *)addr)->red_ifname);
808 1.13.12.1 peter if (ifp == NULL) {
809 1.13.12.1 peter error = ENXIO;
810 1.13.12.1 peter break;
811 1.13.12.1 peter }
812 1.13.12.1 peter
813 1.13.12.1 peter /* allocate and initialize red_queue_t */
814 1.13.12.1 peter MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
815 1.13.12.1 peter if (rqp == NULL) {
816 1.13.12.1 peter error = ENOMEM;
817 1.13.12.1 peter break;
818 1.13.12.1 peter }
819 1.13.12.1 peter (void)memset(rqp, 0, sizeof(red_queue_t));
820 1.13.12.1 peter
821 1.13.12.1 peter MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
822 1.13.12.1 peter M_DEVBUF, M_WAITOK);
823 1.13.12.1 peter if (rqp->rq_q == NULL) {
824 1.13.12.1 peter FREE(rqp, M_DEVBUF);
825 1.13.12.1 peter error = ENOMEM;
826 1.13.12.1 peter break;
827 1.13.12.1 peter }
828 1.13.12.1 peter (void)memset(rqp->rq_q, 0, sizeof(class_queue_t));
829 1.13.12.1 peter
830 1.13.12.1 peter rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
831 1.13.12.1 peter if (rqp->rq_red == NULL) {
832 1.13.12.1 peter FREE(rqp->rq_q, M_DEVBUF);
833 1.13.12.1 peter FREE(rqp, M_DEVBUF);
834 1.13.12.1 peter error = ENOMEM;
835 1.13.12.1 peter break;
836 1.13.12.1 peter }
837 1.13.12.1 peter
838 1.13.12.1 peter rqp->rq_ifq = &ifp->if_snd;
839 1.13.12.1 peter qtail(rqp->rq_q) = NULL;
840 1.13.12.1 peter qlen(rqp->rq_q) = 0;
841 1.13.12.1 peter qlimit(rqp->rq_q) = RED_LIMIT;
842 1.13.12.1 peter qtype(rqp->rq_q) = Q_RED;
843 1.13.12.1 peter
844 1.13.12.1 peter /*
845 1.13.12.1 peter * set RED to this ifnet structure.
846 1.13.12.1 peter */
847 1.13.12.1 peter error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
848 1.13.12.1 peter red_enqueue, red_dequeue, red_request,
849 1.13.12.1 peter NULL, NULL);
850 1.13.12.1 peter if (error) {
851 1.13.12.1 peter red_destroy(rqp->rq_red);
852 1.13.12.1 peter FREE(rqp->rq_q, M_DEVBUF);
853 1.13.12.1 peter FREE(rqp, M_DEVBUF);
854 1.13.12.1 peter break;
855 1.13.12.1 peter }
856 1.13.12.1 peter
857 1.13.12.1 peter /* add this state to the red list */
858 1.13.12.1 peter rqp->rq_next = red_list;
859 1.13.12.1 peter red_list = rqp;
860 1.13.12.1 peter break;
861 1.13.12.1 peter
862 1.13.12.1 peter case RED_IF_DETACH:
863 1.13.12.1 peter ifacep = (struct red_interface *)addr;
864 1.13.12.1 peter if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
865 1.13.12.1 peter error = EBADF;
866 1.13.12.1 peter break;
867 1.13.12.1 peter }
868 1.13.12.1 peter error = red_detach(rqp);
869 1.13.12.1 peter break;
870 1.13.12.1 peter
871 1.13.12.1 peter case RED_GETSTATS:
872 1.13.12.1 peter do {
873 1.13.12.1 peter struct red_stats *q_stats;
874 1.13.12.1 peter red_t *rp;
875 1.13.12.1 peter
876 1.13.12.1 peter q_stats = (struct red_stats *)addr;
877 1.13.12.1 peter if ((rqp = altq_lookup(q_stats->iface.red_ifname,
878 1.13.12.1 peter ALTQT_RED)) == NULL) {
879 1.13.12.1 peter error = EBADF;
880 1.13.12.1 peter break;
881 1.13.12.1 peter }
882 1.13.12.1 peter
883 1.13.12.1 peter q_stats->q_len = qlen(rqp->rq_q);
884 1.13.12.1 peter q_stats->q_limit = qlimit(rqp->rq_q);
885 1.13.12.1 peter
886 1.13.12.1 peter rp = rqp->rq_red;
887 1.13.12.1 peter q_stats->q_avg = rp->red_avg >> rp->red_wshift;
888 1.13.12.1 peter q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
889 1.13.12.1 peter q_stats->drop_cnt = rp->red_stats.drop_cnt;
890 1.13.12.1 peter q_stats->drop_forced = rp->red_stats.drop_forced;
891 1.13.12.1 peter q_stats->drop_unforced = rp->red_stats.drop_unforced;
892 1.13.12.1 peter q_stats->marked_packets = rp->red_stats.marked_packets;
893 1.13.12.1 peter
894 1.13.12.1 peter q_stats->weight = rp->red_weight;
895 1.13.12.1 peter q_stats->inv_pmax = rp->red_inv_pmax;
896 1.13.12.1 peter q_stats->th_min = rp->red_thmin;
897 1.13.12.1 peter q_stats->th_max = rp->red_thmax;
898 1.13.12.1 peter
899 1.1 thorpej #ifdef ALTQ_FLOWVALVE
900 1.13.12.1 peter if (rp->red_flowvalve != NULL) {
901 1.13.12.1 peter struct flowvalve *fv = rp->red_flowvalve;
902 1.13.12.1 peter q_stats->fv_flows = fv->fv_flows;
903 1.13.12.1 peter q_stats->fv_pass = fv->fv_stats.pass;
904 1.13.12.1 peter q_stats->fv_predrop = fv->fv_stats.predrop;
905 1.13.12.1 peter q_stats->fv_alloc = fv->fv_stats.alloc;
906 1.13.12.1 peter q_stats->fv_escape = fv->fv_stats.escape;
907 1.13.12.1 peter } else {
908 1.13.12.1 peter #endif /* ALTQ_FLOWVALVE */
909 1.13.12.1 peter q_stats->fv_flows = 0;
910 1.13.12.1 peter q_stats->fv_pass = 0;
911 1.13.12.1 peter q_stats->fv_predrop = 0;
912 1.13.12.1 peter q_stats->fv_alloc = 0;
913 1.13.12.1 peter q_stats->fv_escape = 0;
914 1.13.12.1 peter #ifdef ALTQ_FLOWVALVE
915 1.13.12.1 peter }
916 1.13.12.1 peter #endif /* ALTQ_FLOWVALVE */
917 1.13.12.1 peter } while (/*CONSTCOND*/ 0);
918 1.13.12.1 peter break;
919 1.13.12.1 peter
920 1.13.12.1 peter case RED_CONFIG:
921 1.13.12.1 peter do {
922 1.13.12.1 peter struct red_conf *fc;
923 1.13.12.1 peter red_t *new;
924 1.13.12.1 peter int s, limit;
925 1.13.12.1 peter
926 1.13.12.1 peter fc = (struct red_conf *)addr;
927 1.13.12.1 peter if ((rqp = altq_lookup(fc->iface.red_ifname,
928 1.13.12.1 peter ALTQT_RED)) == NULL) {
929 1.13.12.1 peter error = EBADF;
930 1.13.12.1 peter break;
931 1.13.12.1 peter }
932 1.13.12.1 peter new = red_alloc(fc->red_weight,
933 1.13.12.1 peter fc->red_inv_pmax,
934 1.13.12.1 peter fc->red_thmin,
935 1.13.12.1 peter fc->red_thmax,
936 1.13.12.1 peter fc->red_flags,
937 1.13.12.1 peter fc->red_pkttime);
938 1.13.12.1 peter if (new == NULL) {
939 1.13.12.1 peter error = ENOMEM;
940 1.13.12.1 peter break;
941 1.13.12.1 peter }
942 1.13.12.1 peter
943 1.13.12.1 peter s = splnet();
944 1.13.12.1 peter red_purgeq(rqp);
945 1.13.12.1 peter limit = fc->red_limit;
946 1.13.12.1 peter if (limit < fc->red_thmax)
947 1.13.12.1 peter limit = fc->red_thmax;
948 1.13.12.1 peter qlimit(rqp->rq_q) = limit;
949 1.13.12.1 peter fc->red_limit = limit; /* write back the new value */
950 1.13.12.1 peter
951 1.13.12.1 peter red_destroy(rqp->rq_red);
952 1.13.12.1 peter rqp->rq_red = new;
953 1.13.12.1 peter
954 1.13.12.1 peter splx(s);
955 1.13.12.1 peter
956 1.13.12.1 peter /* write back new values */
957 1.13.12.1 peter fc->red_limit = limit;
958 1.13.12.1 peter fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
959 1.13.12.1 peter fc->red_thmin = rqp->rq_red->red_thmin;
960 1.13.12.1 peter fc->red_thmax = rqp->rq_red->red_thmax;
961 1.13.12.1 peter
962 1.13.12.1 peter } while (/*CONSTCOND*/ 0);
963 1.13.12.1 peter break;
964 1.13.12.1 peter
965 1.13.12.1 peter case RED_SETDEFAULTS:
966 1.13.12.1 peter do {
967 1.13.12.1 peter struct redparams *rp;
968 1.13.12.1 peter
969 1.13.12.1 peter rp = (struct redparams *)addr;
970 1.13.12.1 peter
971 1.13.12.1 peter default_th_min = rp->th_min;
972 1.13.12.1 peter default_th_max = rp->th_max;
973 1.13.12.1 peter default_inv_pmax = rp->inv_pmax;
974 1.13.12.1 peter } while (/*CONSTCOND*/ 0);
975 1.13.12.1 peter break;
976 1.13.12.1 peter
977 1.13.12.1 peter default:
978 1.13.12.1 peter error = EINVAL;
979 1.13.12.1 peter break;
980 1.1 thorpej }
981 1.13.12.1 peter return error;
982 1.1 thorpej }
983 1.1 thorpej
984 1.13.12.1 peter static int
985 1.13.12.1 peter red_detach(rqp)
986 1.13.12.1 peter red_queue_t *rqp;
987 1.1 thorpej {
988 1.13.12.1 peter red_queue_t *tmp;
989 1.13.12.1 peter int error = 0;
990 1.1 thorpej
991 1.13.12.1 peter if (ALTQ_IS_ENABLED(rqp->rq_ifq))
992 1.13.12.1 peter altq_disable(rqp->rq_ifq);
993 1.1 thorpej
994 1.13.12.1 peter if ((error = altq_detach(rqp->rq_ifq)))
995 1.13.12.1 peter return (error);
996 1.1 thorpej
997 1.13.12.1 peter if (red_list == rqp)
998 1.13.12.1 peter red_list = rqp->rq_next;
999 1.13.12.1 peter else {
1000 1.13.12.1 peter for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1001 1.13.12.1 peter if (tmp->rq_next == rqp) {
1002 1.13.12.1 peter tmp->rq_next = rqp->rq_next;
1003 1.13.12.1 peter break;
1004 1.13.12.1 peter }
1005 1.13.12.1 peter if (tmp == NULL)
1006 1.13.12.1 peter printf("red_detach: no state found in red_list!\n");
1007 1.1 thorpej }
1008 1.13.12.1 peter
1009 1.13.12.1 peter red_destroy(rqp->rq_red);
1010 1.13.12.1 peter FREE(rqp->rq_q, M_DEVBUF);
1011 1.13.12.1 peter FREE(rqp, M_DEVBUF);
1012 1.13.12.1 peter return (error);
1013 1.1 thorpej }
1014 1.1 thorpej
1015 1.1 thorpej /*
1016 1.13.12.1 peter * enqueue routine:
1017 1.13.12.1 peter *
1018 1.13.12.1 peter * returns: 0 when successfully queued.
1019 1.13.12.1 peter * ENOBUFS when drop occurs.
1020 1.1 thorpej */
1021 1.13.12.1 peter static int
1022 1.13.12.1 peter red_enqueue(ifq, m, pktattr)
1023 1.13.12.1 peter struct ifaltq *ifq;
1024 1.1 thorpej struct mbuf *m;
1025 1.1 thorpej struct altq_pktattr *pktattr;
1026 1.1 thorpej {
1027 1.13.12.1 peter red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1028 1.1 thorpej
1029 1.13.12.1 peter if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1030 1.13.12.1 peter return ENOBUFS;
1031 1.13.12.1 peter ifq->ifq_len++;
1032 1.13.12.1 peter return 0;
1033 1.1 thorpej }
1034 1.1 thorpej
1035 1.1 thorpej /*
1036 1.1 thorpej * dequeue routine:
1037 1.3 thorpej * must be called in splnet.
1038 1.1 thorpej *
1039 1.1 thorpej * returns: mbuf dequeued.
1040 1.1 thorpej * NULL when no packet is available in the queue.
1041 1.1 thorpej */
1042 1.1 thorpej
1043 1.1 thorpej static struct mbuf *
1044 1.1 thorpej red_dequeue(ifq, op)
1045 1.1 thorpej struct ifaltq *ifq;
1046 1.1 thorpej int op;
1047 1.1 thorpej {
1048 1.1 thorpej red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1049 1.1 thorpej struct mbuf *m;
1050 1.1 thorpej
1051 1.1 thorpej if (op == ALTDQ_POLL)
1052 1.1 thorpej return qhead(rqp->rq_q);
1053 1.1 thorpej
1054 1.1 thorpej /* op == ALTDQ_REMOVE */
1055 1.1 thorpej m = red_getq(rqp->rq_red, rqp->rq_q);
1056 1.1 thorpej if (m != NULL)
1057 1.1 thorpej ifq->ifq_len--;
1058 1.1 thorpej return (m);
1059 1.1 thorpej }
1060 1.1 thorpej
1061 1.1 thorpej static int
1062 1.1 thorpej red_request(ifq, req, arg)
1063 1.1 thorpej struct ifaltq *ifq;
1064 1.1 thorpej int req;
1065 1.1 thorpej void *arg;
1066 1.1 thorpej {
1067 1.1 thorpej red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1068 1.1 thorpej
1069 1.1 thorpej switch (req) {
1070 1.1 thorpej case ALTRQ_PURGE:
1071 1.1 thorpej red_purgeq(rqp);
1072 1.1 thorpej break;
1073 1.1 thorpej }
1074 1.1 thorpej return (0);
1075 1.1 thorpej }
1076 1.1 thorpej
1077 1.1 thorpej static void
1078 1.1 thorpej red_purgeq(rqp)
1079 1.1 thorpej red_queue_t *rqp;
1080 1.1 thorpej {
1081 1.1 thorpej _flushq(rqp->rq_q);
1082 1.1 thorpej if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1083 1.1 thorpej rqp->rq_ifq->ifq_len = 0;
1084 1.1 thorpej }
1085 1.1 thorpej
1086 1.1 thorpej #ifdef ALTQ_FLOWVALVE
1087 1.1 thorpej
1088 1.1 thorpej #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1089 1.1 thorpej #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1090 1.1 thorpej #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1091 1.1 thorpej #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1092 1.1 thorpej #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1093 1.1 thorpej #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1094 1.1 thorpej
1095 1.1 thorpej #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1096 1.1 thorpej #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1097 1.1 thorpej
1098 1.1 thorpej #define FV_N 10 /* update fve_f every FV_N packets */
1099 1.1 thorpej
1100 1.1 thorpej #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1101 1.11 perry #define FV_TTHRESH 3 /* time threshold to delete fve */
1102 1.1 thorpej #define FV_ALPHA 5 /* extra packet count */
1103 1.1 thorpej
1104 1.13.12.1 peter #define FV_STATS
1105 1.13.12.1 peter
1106 1.1 thorpej #if (__FreeBSD_version > 300000)
1107 1.1 thorpej #define FV_TIMESTAMP(tp) getmicrotime(tp)
1108 1.1 thorpej #else
1109 1.1 thorpej #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1110 1.1 thorpej #endif
1111 1.1 thorpej
1112 1.1 thorpej /*
1113 1.1 thorpej * Brtt table: 127 entry table to convert drop rate (p) to
1114 1.1 thorpej * the corresponding bandwidth fraction (f)
1115 1.1 thorpej * the following equation is implemented to use scaled values,
1116 1.1 thorpej * fve_p and fve_f, in the fixed point format.
1117 1.1 thorpej *
1118 1.1 thorpej * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1119 1.1 thorpej * f = Brtt(p) / (max_th + alpha)
1120 1.1 thorpej */
1121 1.1 thorpej #define BRTT_SIZE 128
1122 1.1 thorpej #define BRTT_SHIFT 12
1123 1.1 thorpej #define BRTT_MASK 0x0007f000
1124 1.1 thorpej #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1125 1.1 thorpej
1126 1.1 thorpej const int brtt_tab[BRTT_SIZE] = {
1127 1.11 perry 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1128 1.11 perry 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1129 1.11 perry 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1130 1.11 perry 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1131 1.11 perry 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1132 1.11 perry 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1133 1.11 perry 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1134 1.11 perry 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1135 1.11 perry 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1136 1.11 perry 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1137 1.11 perry 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1138 1.11 perry 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1139 1.11 perry 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1140 1.11 perry 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1141 1.11 perry 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1142 1.1 thorpej 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1143 1.1 thorpej };
1144 1.1 thorpej
1145 1.13 perry static inline struct fve *
1146 1.1 thorpej flowlist_lookup(fv, pktattr, now)
1147 1.1 thorpej struct flowvalve *fv;
1148 1.1 thorpej struct altq_pktattr *pktattr;
1149 1.1 thorpej struct timeval *now;
1150 1.1 thorpej {
1151 1.1 thorpej struct fve *fve;
1152 1.1 thorpej int flows;
1153 1.1 thorpej struct ip *ip;
1154 1.1 thorpej #ifdef INET6
1155 1.1 thorpej struct ip6_hdr *ip6;
1156 1.1 thorpej #endif
1157 1.1 thorpej struct timeval tthresh;
1158 1.1 thorpej
1159 1.1 thorpej if (pktattr == NULL)
1160 1.1 thorpej return (NULL);
1161 1.1 thorpej
1162 1.1 thorpej tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1163 1.1 thorpej flows = 0;
1164 1.1 thorpej /*
1165 1.1 thorpej * search the flow list
1166 1.1 thorpej */
1167 1.1 thorpej switch (pktattr->pattr_af) {
1168 1.1 thorpej case AF_INET:
1169 1.1 thorpej ip = (struct ip *)pktattr->pattr_hdr;
1170 1.1 thorpej TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1171 1.1 thorpej if (fve->fve_lastdrop.tv_sec == 0)
1172 1.1 thorpej break;
1173 1.1 thorpej if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1174 1.1 thorpej fve->fve_lastdrop.tv_sec = 0;
1175 1.1 thorpej break;
1176 1.1 thorpej }
1177 1.1 thorpej if (fve->fve_flow.flow_af == AF_INET &&
1178 1.1 thorpej fve->fve_flow.flow_ip.ip_src.s_addr ==
1179 1.1 thorpej ip->ip_src.s_addr &&
1180 1.1 thorpej fve->fve_flow.flow_ip.ip_dst.s_addr ==
1181 1.1 thorpej ip->ip_dst.s_addr)
1182 1.1 thorpej return (fve);
1183 1.1 thorpej flows++;
1184 1.1 thorpej }
1185 1.1 thorpej break;
1186 1.1 thorpej #ifdef INET6
1187 1.1 thorpej case AF_INET6:
1188 1.1 thorpej ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1189 1.1 thorpej TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1190 1.1 thorpej if (fve->fve_lastdrop.tv_sec == 0)
1191 1.1 thorpej break;
1192 1.1 thorpej if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1193 1.1 thorpej fve->fve_lastdrop.tv_sec = 0;
1194 1.1 thorpej break;
1195 1.1 thorpej }
1196 1.1 thorpej if (fve->fve_flow.flow_af == AF_INET6 &&
1197 1.1 thorpej IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1198 1.1 thorpej &ip6->ip6_src) &&
1199 1.1 thorpej IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1200 1.1 thorpej &ip6->ip6_dst))
1201 1.1 thorpej return (fve);
1202 1.1 thorpej flows++;
1203 1.1 thorpej }
1204 1.1 thorpej break;
1205 1.1 thorpej #endif /* INET6 */
1206 1.1 thorpej
1207 1.1 thorpej default:
1208 1.1 thorpej /* unknown protocol. no drop. */
1209 1.1 thorpej return (NULL);
1210 1.1 thorpej }
1211 1.1 thorpej fv->fv_flows = flows; /* save the number of active fve's */
1212 1.1 thorpej return (NULL);
1213 1.1 thorpej }
1214 1.1 thorpej
1215 1.13 perry static inline struct fve *
1216 1.1 thorpej flowlist_reclaim(fv, pktattr)
1217 1.1 thorpej struct flowvalve *fv;
1218 1.1 thorpej struct altq_pktattr *pktattr;
1219 1.1 thorpej {
1220 1.1 thorpej struct fve *fve;
1221 1.1 thorpej struct ip *ip;
1222 1.1 thorpej #ifdef INET6
1223 1.1 thorpej struct ip6_hdr *ip6;
1224 1.1 thorpej #endif
1225 1.1 thorpej
1226 1.1 thorpej /*
1227 1.1 thorpej * get an entry from the tail of the LRU list.
1228 1.1 thorpej */
1229 1.1 thorpej fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1230 1.1 thorpej
1231 1.1 thorpej switch (pktattr->pattr_af) {
1232 1.1 thorpej case AF_INET:
1233 1.1 thorpej ip = (struct ip *)pktattr->pattr_hdr;
1234 1.1 thorpej fve->fve_flow.flow_af = AF_INET;
1235 1.1 thorpej fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1236 1.1 thorpej fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1237 1.1 thorpej break;
1238 1.1 thorpej #ifdef INET6
1239 1.1 thorpej case AF_INET6:
1240 1.1 thorpej ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1241 1.1 thorpej fve->fve_flow.flow_af = AF_INET6;
1242 1.1 thorpej fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1243 1.1 thorpej fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1244 1.1 thorpej break;
1245 1.1 thorpej #endif
1246 1.1 thorpej }
1247 1.1 thorpej
1248 1.1 thorpej fve->fve_state = Green;
1249 1.1 thorpej fve->fve_p = 0.0;
1250 1.1 thorpej fve->fve_f = 0.0;
1251 1.1 thorpej fve->fve_ifseq = fv->fv_ifseq - 1;
1252 1.1 thorpej fve->fve_count = 0;
1253 1.1 thorpej
1254 1.1 thorpej fv->fv_flows++;
1255 1.1 thorpej #ifdef FV_STATS
1256 1.1 thorpej fv->fv_stats.alloc++;
1257 1.1 thorpej #endif
1258 1.1 thorpej return (fve);
1259 1.1 thorpej }
1260 1.1 thorpej
1261 1.13 perry static inline void
1262 1.1 thorpej flowlist_move_to_head(fv, fve)
1263 1.1 thorpej struct flowvalve *fv;
1264 1.1 thorpej struct fve *fve;
1265 1.1 thorpej {
1266 1.1 thorpej if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1267 1.1 thorpej TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1268 1.1 thorpej TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1269 1.1 thorpej }
1270 1.1 thorpej }
1271 1.1 thorpej
1272 1.1 thorpej /*
1273 1.1 thorpej * allocate flowvalve structure
1274 1.1 thorpej */
1275 1.1 thorpej static struct flowvalve *
1276 1.1 thorpej fv_alloc(rp)
1277 1.1 thorpej struct red *rp;
1278 1.1 thorpej {
1279 1.1 thorpej struct flowvalve *fv;
1280 1.1 thorpej struct fve *fve;
1281 1.1 thorpej int i, num;
1282 1.1 thorpej
1283 1.1 thorpej num = FV_FLOWLISTSIZE;
1284 1.1 thorpej MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
1285 1.1 thorpej M_DEVBUF, M_WAITOK);
1286 1.1 thorpej if (fv == NULL)
1287 1.1 thorpej return (NULL);
1288 1.9 christos (void)memset(fv, 0, sizeof(struct flowvalve));
1289 1.1 thorpej
1290 1.1 thorpej MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
1291 1.1 thorpej M_DEVBUF, M_WAITOK);
1292 1.1 thorpej if (fv->fv_fves == NULL) {
1293 1.1 thorpej FREE(fv, M_DEVBUF);
1294 1.1 thorpej return (NULL);
1295 1.1 thorpej }
1296 1.9 christos (void)memset(fv->fv_fves, 0, sizeof(struct fve) * num);
1297 1.1 thorpej
1298 1.1 thorpej fv->fv_flows = 0;
1299 1.1 thorpej TAILQ_INIT(&fv->fv_flowlist);
1300 1.1 thorpej for (i = 0; i < num; i++) {
1301 1.1 thorpej fve = &fv->fv_fves[i];
1302 1.1 thorpej fve->fve_lastdrop.tv_sec = 0;
1303 1.1 thorpej TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1304 1.1 thorpej }
1305 1.1 thorpej
1306 1.1 thorpej /* initialize drop rate threshold in scaled fixed-point */
1307 1.1 thorpej fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1308 1.1 thorpej
1309 1.1 thorpej /* initialize drop rate to fraction table */
1310 1.1 thorpej MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
1311 1.1 thorpej M_DEVBUF, M_WAITOK);
1312 1.1 thorpej if (fv->fv_p2ftab == NULL) {
1313 1.1 thorpej FREE(fv->fv_fves, M_DEVBUF);
1314 1.1 thorpej FREE(fv, M_DEVBUF);
1315 1.1 thorpej return (NULL);
1316 1.1 thorpej }
1317 1.1 thorpej /*
1318 1.1 thorpej * create the p2f table.
1319 1.1 thorpej * (shift is used to keep the precision)
1320 1.1 thorpej */
1321 1.1 thorpej for (i = 1; i < BRTT_SIZE; i++) {
1322 1.1 thorpej int f;
1323 1.1 thorpej
1324 1.1 thorpej f = brtt_tab[i] << 8;
1325 1.1 thorpej fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1326 1.1 thorpej }
1327 1.1 thorpej
1328 1.1 thorpej return (fv);
1329 1.1 thorpej }
1330 1.1 thorpej
1331 1.1 thorpej static void fv_destroy(fv)
1332 1.1 thorpej struct flowvalve *fv;
1333 1.1 thorpej {
1334 1.1 thorpej FREE(fv->fv_p2ftab, M_DEVBUF);
1335 1.1 thorpej FREE(fv->fv_fves, M_DEVBUF);
1336 1.1 thorpej FREE(fv, M_DEVBUF);
1337 1.1 thorpej }
1338 1.1 thorpej
1339 1.13 perry static inline int
1340 1.1 thorpej fv_p2f(fv, p)
1341 1.1 thorpej struct flowvalve *fv;
1342 1.1 thorpej int p;
1343 1.1 thorpej {
1344 1.1 thorpej int val, f;
1345 1.1 thorpej
1346 1.1 thorpej if (p >= BRTT_PMAX)
1347 1.1 thorpej f = fv->fv_p2ftab[BRTT_SIZE-1];
1348 1.1 thorpej else if ((val = (p & BRTT_MASK)))
1349 1.1 thorpej f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1350 1.1 thorpej else
1351 1.1 thorpej f = fv->fv_p2ftab[1];
1352 1.1 thorpej return (f);
1353 1.1 thorpej }
1354 1.1 thorpej
1355 1.1 thorpej /*
1356 1.1 thorpej * check if an arriving packet should be pre-dropped.
1357 1.1 thorpej * called from red_addq() when a packet arrives.
1358 1.1 thorpej * returns 1 when the packet should be pre-dropped.
1359 1.3 thorpej * should be called in splnet.
1360 1.1 thorpej */
1361 1.1 thorpej static int
1362 1.1 thorpej fv_checkflow(fv, pktattr, fcache)
1363 1.1 thorpej struct flowvalve *fv;
1364 1.1 thorpej struct altq_pktattr *pktattr;
1365 1.1 thorpej struct fve **fcache;
1366 1.1 thorpej {
1367 1.1 thorpej struct fve *fve;
1368 1.1 thorpej struct timeval now;
1369 1.1 thorpej
1370 1.1 thorpej fv->fv_ifseq++;
1371 1.1 thorpej FV_TIMESTAMP(&now);
1372 1.1 thorpej
1373 1.1 thorpej if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1374 1.1 thorpej /* no matching entry in the flowlist */
1375 1.1 thorpej return (0);
1376 1.1 thorpej
1377 1.1 thorpej *fcache = fve;
1378 1.1 thorpej
1379 1.1 thorpej /* update fraction f for every FV_N packets */
1380 1.1 thorpej if (++fve->fve_count == FV_N) {
1381 1.1 thorpej /*
1382 1.1 thorpej * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1383 1.1 thorpej */
1384 1.1 thorpej fve->fve_f =
1385 1.1 thorpej (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1386 1.1 thorpej + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1387 1.1 thorpej fve->fve_ifseq = fv->fv_ifseq;
1388 1.1 thorpej fve->fve_count = 0;
1389 1.1 thorpej }
1390 1.1 thorpej
1391 1.1 thorpej /*
1392 1.1 thorpej * overpumping test
1393 1.1 thorpej */
1394 1.1 thorpej if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1395 1.1 thorpej int fthresh;
1396 1.1 thorpej
1397 1.1 thorpej /* calculate a threshold */
1398 1.1 thorpej fthresh = fv_p2f(fv, fve->fve_p);
1399 1.1 thorpej if (fve->fve_f > fthresh)
1400 1.1 thorpej fve->fve_state = Red;
1401 1.1 thorpej }
1402 1.1 thorpej
1403 1.1 thorpej if (fve->fve_state == Red) {
1404 1.1 thorpej /*
1405 1.1 thorpej * backoff test
1406 1.1 thorpej */
1407 1.1 thorpej if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1408 1.1 thorpej /* no drop for at least FV_BACKOFFTHRESH sec */
1409 1.1 thorpej fve->fve_p = 0;
1410 1.1 thorpej fve->fve_state = Green;
1411 1.1 thorpej #ifdef FV_STATS
1412 1.1 thorpej fv->fv_stats.escape++;
1413 1.1 thorpej #endif
1414 1.1 thorpej } else {
1415 1.1 thorpej /* block this flow */
1416 1.1 thorpej flowlist_move_to_head(fv, fve);
1417 1.1 thorpej fve->fve_lastdrop = now;
1418 1.1 thorpej #ifdef FV_STATS
1419 1.1 thorpej fv->fv_stats.predrop++;
1420 1.1 thorpej #endif
1421 1.1 thorpej return (1);
1422 1.1 thorpej }
1423 1.1 thorpej }
1424 1.1 thorpej
1425 1.1 thorpej /*
1426 1.1 thorpej * p = (1 - Wp) * p
1427 1.1 thorpej */
1428 1.1 thorpej fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1429 1.1 thorpej if (fve->fve_p < 0)
1430 1.1 thorpej fve->fve_p = 0;
1431 1.1 thorpej #ifdef FV_STATS
1432 1.1 thorpej fv->fv_stats.pass++;
1433 1.1 thorpej #endif
1434 1.1 thorpej return (0);
1435 1.1 thorpej }
1436 1.1 thorpej
1437 1.1 thorpej /*
1438 1.1 thorpej * called from red_addq when a packet is dropped by red.
1439 1.3 thorpej * should be called in splnet.
1440 1.1 thorpej */
1441 1.1 thorpej static void fv_dropbyred(fv, pktattr, fcache)
1442 1.1 thorpej struct flowvalve *fv;
1443 1.1 thorpej struct altq_pktattr *pktattr;
1444 1.1 thorpej struct fve *fcache;
1445 1.1 thorpej {
1446 1.1 thorpej struct fve *fve;
1447 1.1 thorpej struct timeval now;
1448 1.1 thorpej
1449 1.1 thorpej if (pktattr == NULL)
1450 1.1 thorpej return;
1451 1.1 thorpej FV_TIMESTAMP(&now);
1452 1.1 thorpej
1453 1.1 thorpej if (fcache != NULL)
1454 1.1 thorpej /* the fve of this packet is already cached */
1455 1.1 thorpej fve = fcache;
1456 1.1 thorpej else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1457 1.1 thorpej fve = flowlist_reclaim(fv, pktattr);
1458 1.1 thorpej
1459 1.1 thorpej flowlist_move_to_head(fv, fve);
1460 1.1 thorpej
1461 1.1 thorpej /*
1462 1.1 thorpej * update p: the following line cancels the update
1463 1.1 thorpej * in fv_checkflow() and calculate
1464 1.1 thorpej * p = Wp + (1 - Wp) * p
1465 1.1 thorpej */
1466 1.1 thorpej fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1467 1.1 thorpej
1468 1.1 thorpej fve->fve_lastdrop = now;
1469 1.1 thorpej }
1470 1.1 thorpej
1471 1.1 thorpej #endif /* ALTQ_FLOWVALVE */
1472 1.1 thorpej
1473 1.1 thorpej #ifdef KLD_MODULE
1474 1.1 thorpej
1475 1.1 thorpej static struct altqsw red_sw =
1476 1.1 thorpej {"red", redopen, redclose, redioctl};
1477 1.1 thorpej
1478 1.1 thorpej ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1479 1.13.12.1 peter MODULE_VERSION(altq_red, 1);
1480 1.1 thorpej
1481 1.1 thorpej #endif /* KLD_MODULE */
1482 1.13.12.1 peter #endif /* ALTQ3_COMPAT */
1483 1.1 thorpej
1484 1.1 thorpej #endif /* ALTQ_RED */
1485