Home | History | Annotate | Line # | Download | only in altq
altq_red.c revision 1.1.1.1
      1 /*	$KAME: altq_red.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1997-2000
      5  *	Sony Computer Science Laboratories Inc.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  *
     28  */
     29 /*
     30  * Copyright (c) 1990-1994 Regents of the University of California.
     31  * All rights reserved.
     32  *
     33  * Redistribution and use in source and binary forms, with or without
     34  * modification, are permitted provided that the following conditions
     35  * are met:
     36  * 1. Redistributions of source code must retain the above copyright
     37  *    notice, this list of conditions and the following disclaimer.
     38  * 2. Redistributions in binary form must reproduce the above copyright
     39  *    notice, this list of conditions and the following disclaimer in the
     40  *    documentation and/or other materials provided with the distribution.
     41  * 3. All advertising materials mentioning features or use of this software
     42  *    must display the following acknowledgement:
     43  *	This product includes software developed by the Computer Systems
     44  *	Engineering Group at Lawrence Berkeley Laboratory.
     45  * 4. Neither the name of the University nor of the Laboratory may be used
     46  *    to endorse or promote products derived from this software without
     47  *    specific prior written permission.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     59  * SUCH DAMAGE.
     60  */
     61 
     62 #if defined(__FreeBSD__) || defined(__NetBSD__)
     63 #include "opt_altq.h"
     64 #if (__FreeBSD__ != 2)
     65 #include "opt_inet.h"
     66 #ifdef __FreeBSD__
     67 #include "opt_inet6.h"
     68 #endif
     69 #endif
     70 #endif /* __FreeBSD__ || __NetBSD__ */
     71 #ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
     72 
     73 #include <sys/param.h>
     74 #include <sys/malloc.h>
     75 #include <sys/mbuf.h>
     76 #include <sys/socket.h>
     77 #include <sys/sockio.h>
     78 #include <sys/systm.h>
     79 #include <sys/proc.h>
     80 #include <sys/errno.h>
     81 #include <sys/kernel.h>
     82 #ifdef ALTQ_FLOWVALVE
     83 #include <sys/queue.h>
     84 #include <sys/time.h>
     85 #endif
     86 
     87 #include <net/if.h>
     88 #include <net/if_types.h>
     89 
     90 #include <netinet/in.h>
     91 #include <netinet/in_systm.h>
     92 #include <netinet/ip.h>
     93 #ifdef INET6
     94 #include <netinet/ip6.h>
     95 #endif
     96 
     97 #include <altq/altq.h>
     98 #include <altq/altq_conf.h>
     99 #include <altq/altq_red.h>
    100 #ifdef ALTQ_FLOWVALVE
    101 #include <altq/altq_flowvalve.h>
    102 #endif
    103 
    104 /*
    105  * ALTQ/RED (Random Early Detection) implementation using 32-bit
    106  * fixed-point calculation.
    107  *
    108  * written by kjc using the ns code as a reference.
    109  * you can learn more about red and ns from Sally's home page at
    110  * http://www-nrg.ee.lbl.gov/floyd/
    111  *
    112  * most of the red parameter values are fixed in this implementation
    113  * to prevent fixed-point overflow/underflow.
    114  * if you change the parameters, watch out for overflow/underflow!
    115  *
    116  * the parameters used are recommended values by Sally.
    117  * the corresponding ns config looks:
    118  *	q_weight=0.00195
    119  *	minthresh=5 maxthresh=15 queue-size=60
    120  *	linterm=30
    121  *	dropmech=drop-tail
    122  *	bytes=false (can't be handled by 32-bit fixed-point)
    123  *	doubleq=false dqthresh=false
    124  *	wait=true
    125  */
    126 /*
    127  * alternative red parameters for a slow link.
    128  *
    129  * assume the queue length becomes from zero to L and keeps L, it takes
    130  * N packets for q_avg to reach 63% of L.
    131  * when q_weight is 0.002, N is about 500 packets.
    132  * for a slow link like dial-up, 500 packets takes more than 1 minute!
    133  * when q_weight is 0.008, N is about 127 packets.
    134  * when q_weight is 0.016, N is about 63 packets.
    135  * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
    136  * are allowed for 0.016.
    137  * see Sally's paper for more details.
    138  */
    139 /* normal red parameters */
    140 #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
    141 				/* q_weight = 0.00195 */
    142 
    143 /* red parameters for a slow link */
    144 #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
    145 				/* q_weight = 0.0078125 */
    146 
    147 /* red parameters for a very slow link (e.g., dialup) */
    148 #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
    149 				/* q_weight = 0.015625 */
    150 
    151 /* fixed-point uses 12-bit decimal places */
    152 #define	FP_SHIFT	12	/* fixed-point shift */
    153 
    154 /* red parameters for drop probability */
    155 #define	INV_P_MAX	10	/* inverse of max drop probability */
    156 #define	TH_MIN		5	/* min threshold */
    157 #define	TH_MAX		15	/* max threshold */
    158 
    159 #define	RED_LIMIT	60	/* default max queue lenght */
    160 #define	RED_STATS		/* collect statistics */
    161 
    162 /*
    163  * our default policy for forced-drop is drop-tail.
    164  * (in altq-1.1.2 or earlier, the default was random-drop.
    165  * but it makes more sense to punish the cause of the surge.)
    166  * to switch to the random-drop policy, define "RED_RANDOM_DROP".
    167  */
    168 
    169 #ifdef ALTQ_FLOWVALVE
    170 /*
    171  * flow-valve is an extention to protect red from unresponsive flows
    172  * and to promote end-to-end congestion control.
    173  * flow-valve observes the average drop rates of the flows that have
    174  * experienced packet drops in the recent past.
    175  * when the average drop rate exceeds the threshold, the flow is
    176  * blocked by the flow-valve.  the trapped flow should back off
    177  * exponentially to escape from the flow-valve.
    178  */
    179 #ifdef RED_RANDOM_DROP
    180 #error "random-drop can't be used with flow-valve!"
    181 #endif
    182 #endif /* ALTQ_FLOWVALVE */
    183 
    184 /* red_list keeps all red_queue_t's allocated. */
    185 static red_queue_t *red_list = NULL;
    186 
    187 /* default red parameter values */
    188 static int default_th_min = TH_MIN;
    189 static int default_th_max = TH_MAX;
    190 static int default_inv_pmax = INV_P_MAX;
    191 
    192 /* internal function prototypes */
    193 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
    194 			    struct altq_pktattr *));
    195 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
    196 static int red_request __P((struct ifaltq *, int, void *));
    197 static void red_purgeq __P((red_queue_t *));
    198 static int red_detach __P((red_queue_t *));
    199 #ifdef ALTQ_FLOWVALVE
    200 static __inline struct fve *flowlist_lookup __P((struct flowvalve *,
    201 			 struct altq_pktattr *, struct timeval *));
    202 static __inline struct fve *flowlist_reclaim __P((struct flowvalve *,
    203 						  struct altq_pktattr *));
    204 static __inline void flowlist_move_to_head __P((struct flowvalve *,
    205 						struct fve *));
    206 static __inline int fv_p2f __P((struct flowvalve *, int));
    207 static struct flowvalve *fv_alloc __P((struct red *));
    208 static void fv_destroy __P((struct flowvalve *));
    209 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
    210 			     struct fve **));
    211 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
    212 			      struct fve *));
    213 #endif
    214 
    215 /*
    216  * red device interface
    217  */
    218 altqdev_decl(red);
    219 
    220 int
    221 redopen(dev, flag, fmt, p)
    222 	dev_t dev;
    223 	int flag, fmt;
    224 	struct proc *p;
    225 {
    226 	/* everything will be done when the queueing scheme is attached. */
    227 	return 0;
    228 }
    229 
    230 int
    231 redclose(dev, flag, fmt, p)
    232 	dev_t dev;
    233 	int flag, fmt;
    234 	struct proc *p;
    235 {
    236 	red_queue_t *rqp;
    237 	int err, error = 0;
    238 
    239 	while ((rqp = red_list) != NULL) {
    240 		/* destroy all */
    241 		err = red_detach(rqp);
    242 		if (err != 0 && error == 0)
    243 			error = err;
    244 	}
    245 
    246 	return error;
    247 }
    248 
    249 int
    250 redioctl(dev, cmd, addr, flag, p)
    251 	dev_t dev;
    252 	ioctlcmd_t cmd;
    253 	caddr_t addr;
    254 	int flag;
    255 	struct proc *p;
    256 {
    257 	red_queue_t *rqp;
    258 	struct red_interface *ifacep;
    259 	struct ifnet *ifp;
    260 	int	error = 0;
    261 
    262 	/* check super-user privilege */
    263 	switch (cmd) {
    264 	case RED_GETSTATS:
    265 		break;
    266 	default:
    267 #if (__FreeBSD_version > 400000)
    268 		if ((error = suser(p)) != 0)
    269 #else
    270 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    271 #endif
    272 			return (error);
    273 		break;
    274 	}
    275 
    276 	switch (cmd) {
    277 
    278 	case RED_ENABLE:
    279 		ifacep = (struct red_interface *)addr;
    280 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    281 			error = EBADF;
    282 			break;
    283 		}
    284 		error = altq_enable(rqp->rq_ifq);
    285 		break;
    286 
    287 	case RED_DISABLE:
    288 		ifacep = (struct red_interface *)addr;
    289 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    290 			error = EBADF;
    291 			break;
    292 		}
    293 		error = altq_disable(rqp->rq_ifq);
    294 		break;
    295 
    296 	case RED_IF_ATTACH:
    297 		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
    298 		if (ifp == NULL) {
    299 			error = ENXIO;
    300 			break;
    301 		}
    302 
    303 		/* allocate and initialize red_queue_t */
    304 		MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
    305 		if (rqp == NULL) {
    306 			error = ENOMEM;
    307 			break;
    308 		}
    309 		bzero(rqp, sizeof(red_queue_t));
    310 
    311 		MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
    312 		       M_DEVBUF, M_WAITOK);
    313 		if (rqp->rq_q == NULL) {
    314 			FREE(rqp, M_DEVBUF);
    315 			error = ENOMEM;
    316 			break;
    317 		}
    318 		bzero(rqp->rq_q, sizeof(class_queue_t));
    319 
    320 		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
    321 		if (rqp->rq_red == NULL) {
    322 			FREE(rqp->rq_q, M_DEVBUF);
    323 			FREE(rqp, M_DEVBUF);
    324 			error = ENOMEM;
    325 			break;
    326 		}
    327 
    328 		rqp->rq_ifq = &ifp->if_snd;
    329 		qtail(rqp->rq_q) = NULL;
    330 		qlen(rqp->rq_q) = 0;
    331 		qlimit(rqp->rq_q) = RED_LIMIT;
    332 		qtype(rqp->rq_q) = Q_RED;
    333 
    334 		/*
    335 		 * set RED to this ifnet structure.
    336 		 */
    337 		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
    338 				    red_enqueue, red_dequeue, red_request,
    339 				    NULL, NULL);
    340 		if (error) {
    341 			red_destroy(rqp->rq_red);
    342 			FREE(rqp->rq_q, M_DEVBUF);
    343 			FREE(rqp, M_DEVBUF);
    344 			break;
    345 		}
    346 
    347 		/* add this state to the red list */
    348 		rqp->rq_next = red_list;
    349 		red_list = rqp;
    350 		break;
    351 
    352 	case RED_IF_DETACH:
    353 		ifacep = (struct red_interface *)addr;
    354 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    355 			error = EBADF;
    356 			break;
    357 		}
    358 		error = red_detach(rqp);
    359 		break;
    360 
    361 	case RED_GETSTATS:
    362 		do {
    363 			struct red_stats *q_stats;
    364 			red_t *rp;
    365 
    366 			q_stats = (struct red_stats *)addr;
    367 			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
    368 					     ALTQT_RED)) == NULL) {
    369 				error = EBADF;
    370 				break;
    371 			}
    372 
    373 			q_stats->q_len 	   = qlen(rqp->rq_q);
    374 			q_stats->q_limit   = qlimit(rqp->rq_q);
    375 
    376 			rp = rqp->rq_red;
    377 			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
    378 			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
    379 			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
    380 			q_stats->drop_forced   = rp->red_stats.drop_forced;
    381 			q_stats->drop_unforced = rp->red_stats.drop_unforced;
    382 			q_stats->marked_packets = rp->red_stats.marked_packets;
    383 
    384 			q_stats->weight		= rp->red_weight;
    385 			q_stats->inv_pmax	= rp->red_inv_pmax;
    386 			q_stats->th_min		= rp->red_thmin;
    387 			q_stats->th_max		= rp->red_thmax;
    388 
    389 #ifdef ALTQ_FLOWVALVE
    390 			if (rp->red_flowvalve != NULL) {
    391 				struct flowvalve *fv = rp->red_flowvalve;
    392 				q_stats->fv_flows    = fv->fv_flows;
    393 				q_stats->fv_pass     = fv->fv_stats.pass;
    394 				q_stats->fv_predrop  = fv->fv_stats.predrop;
    395 				q_stats->fv_alloc    = fv->fv_stats.alloc;
    396 				q_stats->fv_escape   = fv->fv_stats.escape;
    397 			} else {
    398 #endif /* ALTQ_FLOWVALVE */
    399 				q_stats->fv_flows    = 0;
    400 				q_stats->fv_pass     = 0;
    401 				q_stats->fv_predrop  = 0;
    402 				q_stats->fv_alloc    = 0;
    403 				q_stats->fv_escape   = 0;
    404 #ifdef ALTQ_FLOWVALVE
    405 			}
    406 #endif /* ALTQ_FLOWVALVE */
    407 		} while (0);
    408 		break;
    409 
    410 	case RED_CONFIG:
    411 		do {
    412 			struct red_conf *fc;
    413 			red_t *new;
    414 			int s, limit;
    415 
    416 			fc = (struct red_conf *)addr;
    417 			if ((rqp = altq_lookup(fc->iface.red_ifname,
    418 					       ALTQT_RED)) == NULL) {
    419 				error = EBADF;
    420 				break;
    421 			}
    422 			new = red_alloc(fc->red_weight,
    423 					fc->red_inv_pmax,
    424 					fc->red_thmin,
    425 					fc->red_thmax,
    426 					fc->red_flags,
    427 					fc->red_pkttime);
    428 			if (new == NULL) {
    429 				error = ENOMEM;
    430 				break;
    431 			}
    432 
    433 			s = splimp();
    434 			red_purgeq(rqp);
    435 			limit = fc->red_limit;
    436 			if (limit < fc->red_thmax)
    437 				limit = fc->red_thmax;
    438 			qlimit(rqp->rq_q) = limit;
    439 			fc->red_limit = limit;	/* write back the new value */
    440 
    441 			red_destroy(rqp->rq_red);
    442 			rqp->rq_red = new;
    443 
    444 			splx(s);
    445 
    446 			/* write back new values */
    447 			fc->red_limit = limit;
    448 			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
    449 			fc->red_thmin = rqp->rq_red->red_thmin;
    450 			fc->red_thmax = rqp->rq_red->red_thmax;
    451 
    452 		} while (0);
    453 		break;
    454 
    455 	case RED_SETDEFAULTS:
    456 		do {
    457 			struct redparams *rp;
    458 
    459 			rp = (struct redparams *)addr;
    460 
    461 			default_th_min = rp->th_min;
    462 			default_th_max = rp->th_max;
    463 			default_inv_pmax = rp->inv_pmax;
    464 		} while (0);
    465 		break;
    466 
    467 	default:
    468 		error = EINVAL;
    469 		break;
    470 	}
    471 	return error;
    472 }
    473 
    474 static int
    475 red_detach(rqp)
    476 	red_queue_t *rqp;
    477 {
    478 	red_queue_t *tmp;
    479 	int error = 0;
    480 
    481 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
    482 		altq_disable(rqp->rq_ifq);
    483 
    484 	if ((error = altq_detach(rqp->rq_ifq)))
    485 		return (error);
    486 
    487 	if (red_list == rqp)
    488 		red_list = rqp->rq_next;
    489 	else {
    490 		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
    491 			if (tmp->rq_next == rqp) {
    492 				tmp->rq_next = rqp->rq_next;
    493 				break;
    494 			}
    495 		if (tmp == NULL)
    496 			printf("red_detach: no state found in red_list!\n");
    497 	}
    498 
    499 	red_destroy(rqp->rq_red);
    500 	FREE(rqp->rq_q, M_DEVBUF);
    501 	FREE(rqp, M_DEVBUF);
    502 	return (error);
    503 }
    504 
    505 /*
    506  * red support routines
    507  */
    508 
    509 red_t *
    510 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
    511 	int	weight, inv_pmax, th_min, th_max;
    512 	int	flags, pkttime;
    513 {
    514 	red_t 	*rp;
    515 	int	w, i;
    516 	int	npkts_per_sec;
    517 
    518 	MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
    519 	if (rp == NULL)
    520 		return (NULL);
    521 	bzero(rp, sizeof(red_t));
    522 
    523 	rp->red_avg = 0;
    524 	rp->red_idle = 1;
    525 
    526 	if (weight == 0)
    527 		rp->red_weight = W_WEIGHT;
    528 	else
    529 		rp->red_weight = weight;
    530 	if (inv_pmax == 0)
    531 		rp->red_inv_pmax = default_inv_pmax;
    532 	else
    533 		rp->red_inv_pmax = inv_pmax;
    534 	if (th_min == 0)
    535 		rp->red_thmin = default_th_min;
    536 	else
    537 		rp->red_thmin = th_min;
    538 	if (th_max == 0)
    539 		rp->red_thmax = default_th_max;
    540 	else
    541 		rp->red_thmax = th_max;
    542 
    543 	rp->red_flags = flags;
    544 
    545 	if (pkttime == 0)
    546 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
    547 		rp->red_pkttime = 800;
    548 	else
    549 		rp->red_pkttime = pkttime;
    550 
    551 	if (weight == 0) {
    552 		/* when the link is very slow, adjust red parameters */
    553 		npkts_per_sec = 1000000 / rp->red_pkttime;
    554 		if (npkts_per_sec < 50) {
    555 			/* up to about 400Kbps */
    556 			rp->red_weight = W_WEIGHT_2;
    557 		} else if (npkts_per_sec < 300) {
    558 			/* up to about 2.4Mbps */
    559 			rp->red_weight = W_WEIGHT_1;
    560 		}
    561 	}
    562 
    563 	/* calculate wshift.  weight must be power of 2 */
    564 	w = rp->red_weight;
    565 	for (i = 0; w > 1; i++)
    566 		w = w >> 1;
    567 	rp->red_wshift = i;
    568 	w = 1 << rp->red_wshift;
    569 	if (w != rp->red_weight) {
    570 		printf("invalid weight value %d for red! use %d\n",
    571 		       rp->red_weight, w);
    572 		rp->red_weight = w;
    573 	}
    574 
    575 	/*
    576 	 * thmin_s and thmax_s are scaled versions of th_min and th_max
    577 	 * to be compared with avg.
    578 	 */
    579 	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
    580 	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
    581 
    582 	/*
    583 	 * precompute probability denominator
    584 	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
    585 	 */
    586 	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
    587 			 * rp->red_inv_pmax) << FP_SHIFT;
    588 
    589 	/* allocate weight table */
    590 	rp->red_wtab = wtab_alloc(rp->red_weight);
    591 
    592 	microtime(&rp->red_last);
    593 #ifdef ALTQ_FLOWVALVE
    594 	if (flags & REDF_FLOWVALVE)
    595 		rp->red_flowvalve = fv_alloc(rp);
    596 	/* if fv_alloc failes, flowvalve is just disabled */
    597 #endif
    598 	return (rp);
    599 }
    600 
    601 void
    602 red_destroy(rp)
    603 	red_t *rp;
    604 {
    605 #ifdef ALTQ_FLOWVALVE
    606 	if (rp->red_flowvalve != NULL)
    607 		fv_destroy(rp->red_flowvalve);
    608 #endif
    609 	wtab_destroy(rp->red_wtab);
    610 	FREE(rp, M_DEVBUF);
    611 }
    612 
    613 void
    614 red_getstats(rp, sp)
    615 	red_t *rp;
    616 	struct redstats *sp;
    617 {
    618 	sp->q_avg 		= rp->red_avg >> rp->red_wshift;
    619 	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
    620 	sp->drop_cnt		= rp->red_stats.drop_cnt;
    621 	sp->drop_forced		= rp->red_stats.drop_forced;
    622 	sp->drop_unforced	= rp->red_stats.drop_unforced;
    623 	sp->marked_packets	= rp->red_stats.marked_packets;
    624 }
    625 
    626 /*
    627  * enqueue routine:
    628  *
    629  *	returns: 0 when successfully queued.
    630  *		 ENOBUFS when drop occurs.
    631  */
    632 static int
    633 red_enqueue(ifq, m, pktattr)
    634 	struct ifaltq *ifq;
    635 	struct mbuf *m;
    636 	struct altq_pktattr *pktattr;
    637 {
    638 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
    639 
    640 	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
    641 		return ENOBUFS;
    642 	ifq->ifq_len++;
    643 	return 0;
    644 }
    645 
    646 int
    647 red_addq(rp, q, m, pktattr)
    648 	red_t *rp;
    649 	class_queue_t *q;
    650 	struct mbuf *m;
    651 	struct altq_pktattr *pktattr;
    652 {
    653 	int avg, droptype;
    654 	int n;
    655 #ifdef ALTQ_FLOWVALVE
    656 	struct fve *fve = NULL;
    657 
    658 	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
    659 		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
    660 			m_freem(m);
    661 			return (-1);
    662 		}
    663 #endif
    664 
    665 	avg = rp->red_avg;
    666 
    667 	/*
    668 	 * if we were idle, we pretend that n packets arrived during
    669 	 * the idle period.
    670 	 */
    671 	if (rp->red_idle) {
    672 		struct timeval now;
    673 		int t;
    674 
    675 		rp->red_idle = 0;
    676 		microtime(&now);
    677 		t = (now.tv_sec - rp->red_last.tv_sec);
    678 		if (t > 60) {
    679 			/*
    680 			 * being idle for more than 1 minute, set avg to zero.
    681 			 * this prevents t from overflow.
    682 			 */
    683 			avg = 0;
    684 		} else {
    685 			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
    686 			n = t / rp->red_pkttime - 1;
    687 
    688 			/* the following line does (avg = (1 - Wq)^n * avg) */
    689 			if (n > 0)
    690 				avg = (avg >> FP_SHIFT) *
    691 				    pow_w(rp->red_wtab, n);
    692 		}
    693 	}
    694 
    695 	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
    696 	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
    697 	rp->red_avg = avg;		/* save the new value */
    698 
    699 	/*
    700 	 * red_count keeps a tally of arriving traffic that has not
    701 	 * been dropped.
    702 	 */
    703 	rp->red_count++;
    704 
    705 	/* see if we drop early */
    706 	droptype = DTYPE_NODROP;
    707 	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
    708 		if (avg >= rp->red_thmax_s) {
    709 			/* avg >= th_max: forced drop */
    710 			droptype = DTYPE_FORCED;
    711 		} else if (rp->red_old == 0) {
    712 			/* first exceeds th_min */
    713 			rp->red_count = 1;
    714 			rp->red_old = 1;
    715 		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
    716 				      rp->red_probd, rp->red_count)) {
    717 			/* mark or drop by red */
    718 			if ((rp->red_flags & REDF_ECN) &&
    719 			    mark_ecn(m, pktattr, rp->red_flags)) {
    720 				/* successfully marked.  do not drop. */
    721 				rp->red_count = 0;
    722 #ifdef RED_STATS
    723 				rp->red_stats.marked_packets++;
    724 #endif
    725 			} else {
    726 				/* unforced drop by red */
    727 				droptype = DTYPE_EARLY;
    728 			}
    729 		}
    730 	} else {
    731 		/* avg < th_min */
    732 		rp->red_old = 0;
    733 	}
    734 
    735 	/*
    736 	 * if the queue length hits the hard limit, it's a forced drop.
    737 	 */
    738 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
    739 		droptype = DTYPE_FORCED;
    740 
    741 #ifdef RED_RANDOM_DROP
    742 	/* if successful or forced drop, enqueue this packet. */
    743 	if (droptype != DTYPE_EARLY)
    744 		_addq(q, m);
    745 #else
    746 	/* if successful, enqueue this packet. */
    747 	if (droptype == DTYPE_NODROP)
    748 		_addq(q, m);
    749 #endif
    750 	if (droptype != DTYPE_NODROP) {
    751 		if (droptype == DTYPE_EARLY) {
    752 			/* drop the incoming packet */
    753 #ifdef RED_STATS
    754 			rp->red_stats.drop_unforced++;
    755 #endif
    756 		} else {
    757 			/* forced drop, select a victim packet in the queue. */
    758 #ifdef RED_RANDOM_DROP
    759 			m = _getq_random(q);
    760 #endif
    761 #ifdef RED_STATS
    762 			rp->red_stats.drop_forced++;
    763 #endif
    764 		}
    765 #ifdef RED_STATS
    766 		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
    767 #endif
    768 		rp->red_count = 0;
    769 #ifdef ALTQ_FLOWVALVE
    770 		if (rp->red_flowvalve != NULL)
    771 			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
    772 #endif
    773 		m_freem(m);
    774 		return (-1);
    775 	}
    776 	/* successfully queued */
    777 #ifdef RED_STATS
    778 	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
    779 #endif
    780 	return (0);
    781 }
    782 
    783 /*
    784  * early-drop probability is calculated as follows:
    785  *   prob = p_max * (avg - th_min) / (th_max - th_min)
    786  *   prob_a = prob / (2 - count*prob)
    787  *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
    788  * here prob_a increases as successive undrop count increases.
    789  * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
    790  * becomes 1 when (count >= (2 / prob))).
    791  */
    792 int
    793 drop_early(fp_len, fp_probd, count)
    794 	int fp_len;	/* (avg - TH_MIN) in fixed-point */
    795 	int fp_probd;	/* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
    796 	int count;	/* how many successive undropped packets */
    797 {
    798 	int d;		/* denominator of drop-probability */
    799 
    800 	d = fp_probd - count * fp_len;
    801 	if (d <= 0)
    802 		/* count exceeds the hard limit: drop or mark */
    803 		return (1);
    804 
    805 	/*
    806 	 * now the range of d is [1..600] in fixed-point. (when
    807 	 * th_max-th_min=10 and p_max=1/30)
    808 	 * drop probability = (avg - TH_MIN) / d
    809 	 */
    810 
    811 	if ((random() % d) < fp_len) {
    812 		/* drop or mark */
    813 		return (1);
    814 	}
    815 	/* no drop/mark */
    816 	return (0);
    817 }
    818 
    819 /*
    820  * try to mark CE bit to the packet.
    821  *    returns 1 if successfully marked, 0 otherwise.
    822  */
    823 int
    824 mark_ecn(m, pktattr, flags)
    825 	struct mbuf *m;
    826 	struct altq_pktattr *pktattr;
    827 	int flags;
    828 {
    829 	struct mbuf *m0;
    830 
    831 	if (pktattr == NULL ||
    832 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
    833 		return (0);
    834 
    835 	/* verify that pattr_hdr is within the mbuf data */
    836 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
    837 		if ((pktattr->pattr_hdr >= m0->m_data) &&
    838 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
    839 			break;
    840 	if (m0 == NULL) {
    841 		/* ick, pattr_hdr is stale */
    842 		pktattr->pattr_af = AF_UNSPEC;
    843 		return (0);
    844 	}
    845 
    846 	switch (pktattr->pattr_af) {
    847 	case AF_INET:
    848 		if (flags & REDF_ECN4) {
    849 			struct ip *ip = (struct ip *)pktattr->pattr_hdr;
    850 
    851 			if (ip->ip_v != 4)
    852 				return (0);	/* version mismatch! */
    853 			if (ip->ip_tos & IPTOS_ECT) {
    854 				/* ECN-capable, mark ECN bit. */
    855 				if ((ip->ip_tos & IPTOS_CE) == 0) {
    856 #if (IPTOS_CE == 0x01)
    857 					u_short sum;
    858 
    859 					ip->ip_tos |= IPTOS_CE;
    860 					/*
    861 					 * optimized version when IPTOS_CE
    862 					 * is 0x01.
    863 					 *   HC' = HC -1   when HC > 0
    864 					 *       = 0xfffe  when HC = 0
    865 					 */
    866 					sum = ntohs(ip->ip_sum);
    867 					if (sum == 0)
    868 						sum = 0xfffe;
    869 					else
    870 						sum -= 1;
    871 					ip->ip_sum = htons(sum);
    872 #else /* IPTOS_CE != 0x01 */
    873 					long sum;
    874 
    875 					ip->ip_tos |= IPTOS_CE;
    876 					/*
    877 					 * update checksum (from RFC1624)
    878 					 *	   HC' = ~(~HC + ~m + m')
    879 					 */
    880 					sum = ~ntohs(ip->ip_sum) & 0xffff;
    881 					sum += 0xffff + IPTOS_CE;
    882 					sum = (sum >> 16) + (sum & 0xffff);
    883 					sum += (sum >> 16);  /* add carry */
    884 
    885 					ip->ip_sum = htons(~sum & 0xffff);
    886 #endif /* IPTOS_CE != 0x01 */
    887 				}
    888 				return (1);
    889 			}
    890 		}
    891 		break;
    892 #ifdef INET6
    893 	case AF_INET6:
    894 		if (flags & REDF_ECN6) {
    895 			struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
    896 			u_int32_t flowlabel;
    897 
    898 			flowlabel = ntohl(ip6->ip6_flow);
    899 			if ((flowlabel >> 28) != 6)
    900 				return (0);	/* version mismatch! */
    901 			if (flowlabel & (IPTOS_ECT << 20)) {
    902 				/* ECN-capable, mark ECN bit. */
    903 				flowlabel |= (IPTOS_CE << 20);
    904 				ip6->ip6_flow = htonl(flowlabel);
    905 				return (1);
    906 			}
    907 		}
    908 		break;
    909 #endif  /* INET6 */
    910 	}
    911 
    912 	/* not marked */
    913 	return (0);
    914 }
    915 
    916 /*
    917  * dequeue routine:
    918  *	must be called in splimp.
    919  *
    920  *	returns: mbuf dequeued.
    921  *		 NULL when no packet is available in the queue.
    922  */
    923 
    924 static struct mbuf *
    925 red_dequeue(ifq, op)
    926 	struct ifaltq *ifq;
    927 	int op;
    928 {
    929 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
    930 	struct mbuf *m;
    931 
    932 	if (op == ALTDQ_POLL)
    933 		return qhead(rqp->rq_q);
    934 
    935 	/* op == ALTDQ_REMOVE */
    936 	m =  red_getq(rqp->rq_red, rqp->rq_q);
    937 	if (m != NULL)
    938 		ifq->ifq_len--;
    939 	return (m);
    940 }
    941 
    942 struct mbuf *
    943 red_getq(rp, q)
    944 	red_t *rp;
    945 	class_queue_t *q;
    946 {
    947 	struct mbuf *m;
    948 
    949 	if ((m = _getq(q)) == NULL) {
    950 		if (rp->red_idle == 0) {
    951 			rp->red_idle = 1;
    952 			microtime(&rp->red_last);
    953 		}
    954 		return NULL;
    955 	}
    956 
    957 	rp->red_idle = 0;
    958 	return (m);
    959 }
    960 
    961 static int
    962 red_request(ifq, req, arg)
    963 	struct ifaltq *ifq;
    964 	int req;
    965 	void *arg;
    966 {
    967 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
    968 
    969 	switch (req) {
    970 	case ALTRQ_PURGE:
    971 		red_purgeq(rqp);
    972 		break;
    973 	}
    974 	return (0);
    975 }
    976 
    977 static void
    978 red_purgeq(rqp)
    979 	red_queue_t *rqp;
    980 {
    981 	_flushq(rqp->rq_q);
    982 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
    983 		rqp->rq_ifq->ifq_len = 0;
    984 }
    985 
    986 
    987 /*
    988  * helper routine to calibrate avg during idle.
    989  * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
    990  * here Wq = 1/weight and the code assumes Wq is close to zero.
    991  *
    992  * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
    993  */
    994 static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
    995 
    996 struct wtab *
    997 wtab_alloc(weight)
    998 	int weight;
    999 {
   1000 	struct wtab *w;
   1001 	int i;
   1002 
   1003 	for (w = wtab_list; w != NULL; w = w->w_next)
   1004 		if (w->w_weight == weight) {
   1005 			w->w_refcount++;
   1006 			return (w);
   1007 		}
   1008 
   1009 	MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
   1010 	if (w == NULL)
   1011 		panic("wtab_alloc: malloc failed!");
   1012 	bzero(w, sizeof(struct wtab));
   1013 	w->w_weight = weight;
   1014 	w->w_refcount = 1;
   1015 	w->w_next = wtab_list;
   1016 	wtab_list = w;
   1017 
   1018 	/* initialize the weight table */
   1019 	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
   1020 	for (i = 1; i < 32; i++) {
   1021 		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
   1022 		if (w->w_tab[i] == 0 && w->w_param_max == 0)
   1023 			w->w_param_max = 1 << i;
   1024 	}
   1025 
   1026 	return (w);
   1027 }
   1028 
   1029 int
   1030 wtab_destroy(w)
   1031 	struct wtab *w;
   1032 {
   1033 	struct wtab *prev;
   1034 
   1035 	if (--w->w_refcount > 0)
   1036 		return (0);
   1037 
   1038 	if (wtab_list == w)
   1039 		wtab_list = w->w_next;
   1040 	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
   1041 		if (prev->w_next == w) {
   1042 			prev->w_next = w->w_next;
   1043 			break;
   1044 		}
   1045 
   1046 	FREE(w, M_DEVBUF);
   1047 	return (0);
   1048 }
   1049 
   1050 int32_t
   1051 pow_w(w, n)
   1052 	struct wtab *w;
   1053 	int n;
   1054 {
   1055 	int	i, bit;
   1056 	int32_t	val;
   1057 
   1058 	if (n >= w->w_param_max)
   1059 		return (0);
   1060 
   1061 	val = 1 << FP_SHIFT;
   1062 	if (n <= 0)
   1063 		return (val);
   1064 
   1065 	bit = 1;
   1066 	i = 0;
   1067 	while (n) {
   1068 		if (n & bit) {
   1069 			val = (val * w->w_tab[i]) >> FP_SHIFT;
   1070 			n &= ~bit;
   1071 		}
   1072 		i++;
   1073 		bit <<=  1;
   1074 	}
   1075 	return (val);
   1076 }
   1077 
   1078 #ifdef ALTQ_FLOWVALVE
   1079 
   1080 #define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
   1081 #define	FV_PSCALE(x)	((x) << FV_PSHIFT)
   1082 #define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
   1083 #define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
   1084 #define	FV_FSCALE(x)	((x) << FV_FSHIFT)
   1085 #define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
   1086 
   1087 #define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
   1088 #define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
   1089 
   1090 #define	FV_N			10	/* update fve_f every FV_N packets */
   1091 
   1092 #define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
   1093 #define	FV_TTHRESH		3  /* time threshold to delete fve */
   1094 #define	FV_ALPHA		5  /* extra packet count */
   1095 
   1096 #define	FV_STATS
   1097 
   1098 #if (__FreeBSD_version > 300000)
   1099 #define	FV_TIMESTAMP(tp)	getmicrotime(tp)
   1100 #else
   1101 #define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
   1102 #endif
   1103 
   1104 /*
   1105  * Brtt table: 127 entry table to convert drop rate (p) to
   1106  * the corresponding bandwidth fraction (f)
   1107  * the following equation is implemented to use scaled values,
   1108  * fve_p and fve_f, in the fixed point format.
   1109  *
   1110  *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
   1111  *   f = Brtt(p) / (max_th + alpha)
   1112  */
   1113 #define	BRTT_SIZE	128
   1114 #define	BRTT_SHIFT	12
   1115 #define	BRTT_MASK	0x0007f000
   1116 #define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
   1117 
   1118 const int brtt_tab[BRTT_SIZE] = {
   1119 	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
   1120 	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
   1121 	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
   1122 	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
   1123 	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
   1124 	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
   1125 	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
   1126 	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
   1127 	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
   1128 	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
   1129 	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
   1130 	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
   1131 	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
   1132 	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
   1133 	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
   1134 	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
   1135 };
   1136 
   1137 static __inline struct fve *
   1138 flowlist_lookup(fv, pktattr, now)
   1139 	struct flowvalve *fv;
   1140 	struct altq_pktattr *pktattr;
   1141 	struct timeval *now;
   1142 {
   1143 	struct fve *fve;
   1144 	int flows;
   1145 	struct ip *ip;
   1146 #ifdef INET6
   1147 	struct ip6_hdr *ip6;
   1148 #endif
   1149 	struct timeval tthresh;
   1150 
   1151 	if (pktattr == NULL)
   1152 		return (NULL);
   1153 
   1154 	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
   1155 	flows = 0;
   1156 	/*
   1157 	 * search the flow list
   1158 	 */
   1159 	switch (pktattr->pattr_af) {
   1160 	case AF_INET:
   1161 		ip = (struct ip *)pktattr->pattr_hdr;
   1162 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
   1163 			if (fve->fve_lastdrop.tv_sec == 0)
   1164 				break;
   1165 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
   1166 				fve->fve_lastdrop.tv_sec = 0;
   1167 				break;
   1168 			}
   1169 			if (fve->fve_flow.flow_af == AF_INET &&
   1170 			    fve->fve_flow.flow_ip.ip_src.s_addr ==
   1171 			    ip->ip_src.s_addr &&
   1172 			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
   1173 			    ip->ip_dst.s_addr)
   1174 				return (fve);
   1175 			flows++;
   1176 		}
   1177 		break;
   1178 #ifdef INET6
   1179 	case AF_INET6:
   1180 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1181 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
   1182 			if (fve->fve_lastdrop.tv_sec == 0)
   1183 				break;
   1184 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
   1185 				fve->fve_lastdrop.tv_sec = 0;
   1186 				break;
   1187 			}
   1188 			if (fve->fve_flow.flow_af == AF_INET6 &&
   1189 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
   1190 					       &ip6->ip6_src) &&
   1191 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
   1192 					       &ip6->ip6_dst))
   1193 				return (fve);
   1194 			flows++;
   1195 		}
   1196 		break;
   1197 #endif /* INET6 */
   1198 
   1199 	default:
   1200 		/* unknown protocol.  no drop. */
   1201 		return (NULL);
   1202 	}
   1203 	fv->fv_flows = flows;	/* save the number of active fve's */
   1204 	return (NULL);
   1205 }
   1206 
   1207 static __inline struct fve *
   1208 flowlist_reclaim(fv, pktattr)
   1209 	struct flowvalve *fv;
   1210 	struct altq_pktattr *pktattr;
   1211 {
   1212 	struct fve *fve;
   1213 	struct ip *ip;
   1214 #ifdef INET6
   1215 	struct ip6_hdr *ip6;
   1216 #endif
   1217 
   1218 	/*
   1219 	 * get an entry from the tail of the LRU list.
   1220 	 */
   1221 	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
   1222 
   1223 	switch (pktattr->pattr_af) {
   1224 	case AF_INET:
   1225 		ip = (struct ip *)pktattr->pattr_hdr;
   1226 		fve->fve_flow.flow_af = AF_INET;
   1227 		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
   1228 		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
   1229 		break;
   1230 #ifdef INET6
   1231 	case AF_INET6:
   1232 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1233 		fve->fve_flow.flow_af = AF_INET6;
   1234 		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
   1235 		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
   1236 		break;
   1237 #endif
   1238 	}
   1239 
   1240 	fve->fve_state = Green;
   1241 	fve->fve_p = 0.0;
   1242 	fve->fve_f = 0.0;
   1243 	fve->fve_ifseq = fv->fv_ifseq - 1;
   1244 	fve->fve_count = 0;
   1245 
   1246 	fv->fv_flows++;
   1247 #ifdef FV_STATS
   1248 	fv->fv_stats.alloc++;
   1249 #endif
   1250 	return (fve);
   1251 }
   1252 
   1253 static __inline void
   1254 flowlist_move_to_head(fv, fve)
   1255 	struct flowvalve *fv;
   1256 	struct fve *fve;
   1257 {
   1258 	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
   1259 		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
   1260 		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
   1261 	}
   1262 }
   1263 
   1264 /*
   1265  * allocate flowvalve structure
   1266  */
   1267 static struct flowvalve *
   1268 fv_alloc(rp)
   1269 	struct red *rp;
   1270 {
   1271 	struct flowvalve *fv;
   1272 	struct fve *fve;
   1273 	int i, num;
   1274 
   1275 	num = FV_FLOWLISTSIZE;
   1276 	MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
   1277 	       M_DEVBUF, M_WAITOK);
   1278 	if (fv == NULL)
   1279 		return (NULL);
   1280 	bzero(fv, sizeof(struct flowvalve));
   1281 
   1282 	MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
   1283 	       M_DEVBUF, M_WAITOK);
   1284 	if (fv->fv_fves == NULL) {
   1285 		FREE(fv, M_DEVBUF);
   1286 		return (NULL);
   1287 	}
   1288 	bzero(fv->fv_fves, sizeof(struct fve) * num);
   1289 
   1290 	fv->fv_flows = 0;
   1291 	TAILQ_INIT(&fv->fv_flowlist);
   1292 	for (i = 0; i < num; i++) {
   1293 		fve = &fv->fv_fves[i];
   1294 		fve->fve_lastdrop.tv_sec = 0;
   1295 		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
   1296 	}
   1297 
   1298 	/* initialize drop rate threshold in scaled fixed-point */
   1299 	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
   1300 
   1301 	/* initialize drop rate to fraction table */
   1302 	MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
   1303 	       M_DEVBUF, M_WAITOK);
   1304 	if (fv->fv_p2ftab == NULL) {
   1305 		FREE(fv->fv_fves, M_DEVBUF);
   1306 		FREE(fv, M_DEVBUF);
   1307 		return (NULL);
   1308 	}
   1309 	/*
   1310 	 * create the p2f table.
   1311 	 * (shift is used to keep the precision)
   1312 	 */
   1313 	for (i = 1; i < BRTT_SIZE; i++) {
   1314 		int f;
   1315 
   1316 		f = brtt_tab[i] << 8;
   1317 		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
   1318 	}
   1319 
   1320 	return (fv);
   1321 }
   1322 
   1323 static void fv_destroy(fv)
   1324 	struct flowvalve *fv;
   1325 {
   1326 	FREE(fv->fv_p2ftab, M_DEVBUF);
   1327 	FREE(fv->fv_fves, M_DEVBUF);
   1328 	FREE(fv, M_DEVBUF);
   1329 }
   1330 
   1331 static __inline int
   1332 fv_p2f(fv, p)
   1333 	struct flowvalve	*fv;
   1334 	int	p;
   1335 {
   1336 	int val, f;
   1337 
   1338 	if (p >= BRTT_PMAX)
   1339 		f = fv->fv_p2ftab[BRTT_SIZE-1];
   1340 	else if ((val = (p & BRTT_MASK)))
   1341 		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
   1342 	else
   1343 		f = fv->fv_p2ftab[1];
   1344 	return (f);
   1345 }
   1346 
   1347 /*
   1348  * check if an arriving packet should be pre-dropped.
   1349  * called from red_addq() when a packet arrives.
   1350  * returns 1 when the packet should be pre-dropped.
   1351  * should be called in splimp.
   1352  */
   1353 static int
   1354 fv_checkflow(fv, pktattr, fcache)
   1355 	struct flowvalve *fv;
   1356 	struct altq_pktattr *pktattr;
   1357 	struct fve **fcache;
   1358 {
   1359 	struct fve *fve;
   1360 	struct timeval now;
   1361 
   1362 	fv->fv_ifseq++;
   1363 	FV_TIMESTAMP(&now);
   1364 
   1365 	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
   1366 		/* no matching entry in the flowlist */
   1367 		return (0);
   1368 
   1369 	*fcache = fve;
   1370 
   1371 	/* update fraction f for every FV_N packets */
   1372 	if (++fve->fve_count == FV_N) {
   1373 		/*
   1374 		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
   1375 		 */
   1376 		fve->fve_f =
   1377 			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
   1378 			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
   1379 		fve->fve_ifseq = fv->fv_ifseq;
   1380 		fve->fve_count = 0;
   1381 	}
   1382 
   1383 	/*
   1384 	 * overpumping test
   1385 	 */
   1386 	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
   1387 		int fthresh;
   1388 
   1389 		/* calculate a threshold */
   1390 		fthresh = fv_p2f(fv, fve->fve_p);
   1391 		if (fve->fve_f > fthresh)
   1392 			fve->fve_state = Red;
   1393 	}
   1394 
   1395 	if (fve->fve_state == Red) {
   1396 		/*
   1397 		 * backoff test
   1398 		 */
   1399 		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
   1400 			/* no drop for at least FV_BACKOFFTHRESH sec */
   1401 			fve->fve_p = 0;
   1402 			fve->fve_state = Green;
   1403 #ifdef FV_STATS
   1404 			fv->fv_stats.escape++;
   1405 #endif
   1406 		} else {
   1407 			/* block this flow */
   1408 			flowlist_move_to_head(fv, fve);
   1409 			fve->fve_lastdrop = now;
   1410 #ifdef FV_STATS
   1411 			fv->fv_stats.predrop++;
   1412 #endif
   1413 			return (1);
   1414 		}
   1415 	}
   1416 
   1417 	/*
   1418 	 * p = (1 - Wp) * p
   1419 	 */
   1420 	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
   1421 	if (fve->fve_p < 0)
   1422 		fve->fve_p = 0;
   1423 #ifdef FV_STATS
   1424 	fv->fv_stats.pass++;
   1425 #endif
   1426 	return (0);
   1427 }
   1428 
   1429 /*
   1430  * called from red_addq when a packet is dropped by red.
   1431  * should be called in splimp.
   1432  */
   1433 static void fv_dropbyred(fv, pktattr, fcache)
   1434 	struct flowvalve *fv;
   1435 	struct altq_pktattr *pktattr;
   1436 	struct fve *fcache;
   1437 {
   1438 	struct fve *fve;
   1439 	struct timeval now;
   1440 
   1441 	if (pktattr == NULL)
   1442 		return;
   1443 	FV_TIMESTAMP(&now);
   1444 
   1445 	if (fcache != NULL)
   1446 		/* the fve of this packet is already cached */
   1447 		fve = fcache;
   1448 	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
   1449 		fve = flowlist_reclaim(fv, pktattr);
   1450 
   1451 	flowlist_move_to_head(fv, fve);
   1452 
   1453 	/*
   1454 	 * update p:  the following line cancels the update
   1455 	 *	      in fv_checkflow() and calculate
   1456 	 *	p = Wp + (1 - Wp) * p
   1457 	 */
   1458 	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
   1459 
   1460 	fve->fve_lastdrop = now;
   1461 }
   1462 
   1463 #endif /* ALTQ_FLOWVALVE */
   1464 
   1465 #ifdef KLD_MODULE
   1466 
   1467 static struct altqsw red_sw =
   1468 	{"red", redopen, redclose, redioctl};
   1469 
   1470 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
   1471 
   1472 #endif /* KLD_MODULE */
   1473 
   1474 #endif /* ALTQ_RED */
   1475