Home | History | Annotate | Line # | Download | only in altq
altq_red.c revision 1.28.30.1
      1  1.28.30.1      yamt /*	$NetBSD: altq_red.c,v 1.28.30.1 2012/04/17 00:05:51 yamt Exp $	*/
      2       1.21     peter /*	$KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $	*/
      3        1.1   thorpej 
      4        1.1   thorpej /*
      5       1.21     peter  * Copyright (C) 1997-2003
      6        1.1   thorpej  *	Sony Computer Science Laboratories Inc.  All rights reserved.
      7        1.1   thorpej  *
      8        1.1   thorpej  * Redistribution and use in source and binary forms, with or without
      9        1.1   thorpej  * modification, are permitted provided that the following conditions
     10        1.1   thorpej  * are met:
     11        1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     12        1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     13        1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     14        1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     15        1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     16        1.1   thorpej  *
     17        1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
     18        1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19        1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20        1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
     21        1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22        1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23        1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24        1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25        1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26        1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27        1.1   thorpej  * SUCH DAMAGE.
     28        1.1   thorpej  *
     29        1.1   thorpej  */
     30        1.1   thorpej /*
     31        1.1   thorpej  * Copyright (c) 1990-1994 Regents of the University of California.
     32        1.1   thorpej  * All rights reserved.
     33        1.1   thorpej  *
     34        1.1   thorpej  * Redistribution and use in source and binary forms, with or without
     35        1.1   thorpej  * modification, are permitted provided that the following conditions
     36        1.1   thorpej  * are met:
     37        1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     38        1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     39        1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     40        1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     41        1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     42        1.1   thorpej  * 3. All advertising materials mentioning features or use of this software
     43        1.1   thorpej  *    must display the following acknowledgement:
     44        1.1   thorpej  *	This product includes software developed by the Computer Systems
     45        1.1   thorpej  *	Engineering Group at Lawrence Berkeley Laboratory.
     46        1.1   thorpej  * 4. Neither the name of the University nor of the Laboratory may be used
     47        1.1   thorpej  *    to endorse or promote products derived from this software without
     48        1.1   thorpej  *    specific prior written permission.
     49        1.1   thorpej  *
     50        1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     51        1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     52        1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     53        1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     54        1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     55        1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     56        1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     57        1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     58        1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     59        1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     60        1.1   thorpej  * SUCH DAMAGE.
     61        1.1   thorpej  */
     62        1.5     lukem 
     63        1.5     lukem #include <sys/cdefs.h>
     64  1.28.30.1      yamt __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.28.30.1 2012/04/17 00:05:51 yamt Exp $");
     65        1.1   thorpej 
     66       1.21     peter #ifdef _KERNEL_OPT
     67        1.1   thorpej #include "opt_altq.h"
     68        1.1   thorpej #include "opt_inet.h"
     69       1.22     peter #include "pf.h"
     70        1.1   thorpej #endif
     71       1.21     peter 
     72        1.1   thorpej #ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
     73        1.1   thorpej 
     74        1.1   thorpej #include <sys/param.h>
     75        1.1   thorpej #include <sys/malloc.h>
     76        1.1   thorpej #include <sys/mbuf.h>
     77        1.1   thorpej #include <sys/socket.h>
     78       1.21     peter #include <sys/systm.h>
     79       1.21     peter #include <sys/errno.h>
     80       1.21     peter #include <sys/kauth.h>
     81       1.21     peter #if 1 /* ALTQ3_COMPAT */
     82        1.1   thorpej #include <sys/sockio.h>
     83        1.1   thorpej #include <sys/proc.h>
     84        1.1   thorpej #include <sys/kernel.h>
     85        1.1   thorpej #ifdef ALTQ_FLOWVALVE
     86        1.1   thorpej #include <sys/queue.h>
     87        1.1   thorpej #include <sys/time.h>
     88        1.1   thorpej #endif
     89       1.21     peter #endif /* ALTQ3_COMPAT */
     90  1.28.30.1      yamt #include <sys/cprng.h>
     91        1.1   thorpej 
     92        1.1   thorpej #include <net/if.h>
     93        1.1   thorpej 
     94        1.1   thorpej #include <netinet/in.h>
     95        1.1   thorpej #include <netinet/in_systm.h>
     96        1.1   thorpej #include <netinet/ip.h>
     97        1.1   thorpej #ifdef INET6
     98        1.1   thorpej #include <netinet/ip6.h>
     99        1.1   thorpej #endif
    100        1.1   thorpej 
    101       1.22     peter #if NPF > 0
    102       1.21     peter #include <net/pfvar.h>
    103       1.22     peter #endif
    104        1.1   thorpej #include <altq/altq.h>
    105       1.21     peter #include <altq/altq_red.h>
    106       1.21     peter #ifdef ALTQ3_COMPAT
    107        1.1   thorpej #include <altq/altq_conf.h>
    108        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    109        1.1   thorpej #include <altq/altq_flowvalve.h>
    110        1.1   thorpej #endif
    111       1.21     peter #endif
    112        1.1   thorpej 
    113        1.1   thorpej /*
    114        1.1   thorpej  * ALTQ/RED (Random Early Detection) implementation using 32-bit
    115        1.1   thorpej  * fixed-point calculation.
    116        1.1   thorpej  *
    117        1.1   thorpej  * written by kjc using the ns code as a reference.
    118        1.1   thorpej  * you can learn more about red and ns from Sally's home page at
    119        1.1   thorpej  * http://www-nrg.ee.lbl.gov/floyd/
    120        1.1   thorpej  *
    121        1.1   thorpej  * most of the red parameter values are fixed in this implementation
    122        1.1   thorpej  * to prevent fixed-point overflow/underflow.
    123        1.1   thorpej  * if you change the parameters, watch out for overflow/underflow!
    124        1.1   thorpej  *
    125        1.1   thorpej  * the parameters used are recommended values by Sally.
    126        1.1   thorpej  * the corresponding ns config looks:
    127        1.1   thorpej  *	q_weight=0.00195
    128        1.1   thorpej  *	minthresh=5 maxthresh=15 queue-size=60
    129        1.1   thorpej  *	linterm=30
    130        1.1   thorpej  *	dropmech=drop-tail
    131        1.1   thorpej  *	bytes=false (can't be handled by 32-bit fixed-point)
    132       1.11     perry  *	doubleq=false dqthresh=false
    133        1.1   thorpej  *	wait=true
    134        1.1   thorpej  */
    135        1.1   thorpej /*
    136        1.1   thorpej  * alternative red parameters for a slow link.
    137        1.1   thorpej  *
    138        1.1   thorpej  * assume the queue length becomes from zero to L and keeps L, it takes
    139        1.1   thorpej  * N packets for q_avg to reach 63% of L.
    140        1.1   thorpej  * when q_weight is 0.002, N is about 500 packets.
    141        1.1   thorpej  * for a slow link like dial-up, 500 packets takes more than 1 minute!
    142        1.1   thorpej  * when q_weight is 0.008, N is about 127 packets.
    143        1.1   thorpej  * when q_weight is 0.016, N is about 63 packets.
    144       1.21     peter  * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
    145        1.1   thorpej  * are allowed for 0.016.
    146        1.1   thorpej  * see Sally's paper for more details.
    147        1.1   thorpej  */
    148        1.1   thorpej /* normal red parameters */
    149        1.1   thorpej #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
    150        1.1   thorpej 				/* q_weight = 0.00195 */
    151        1.1   thorpej 
    152        1.1   thorpej /* red parameters for a slow link */
    153        1.1   thorpej #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
    154        1.1   thorpej 				/* q_weight = 0.0078125 */
    155        1.1   thorpej 
    156        1.1   thorpej /* red parameters for a very slow link (e.g., dialup) */
    157        1.1   thorpej #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
    158        1.1   thorpej 				/* q_weight = 0.015625 */
    159        1.1   thorpej 
    160        1.1   thorpej /* fixed-point uses 12-bit decimal places */
    161        1.1   thorpej #define	FP_SHIFT	12	/* fixed-point shift */
    162        1.1   thorpej 
    163        1.1   thorpej /* red parameters for drop probability */
    164        1.1   thorpej #define	INV_P_MAX	10	/* inverse of max drop probability */
    165        1.1   thorpej #define	TH_MIN		5	/* min threshold */
    166        1.1   thorpej #define	TH_MAX		15	/* max threshold */
    167        1.1   thorpej 
    168       1.21     peter #define	RED_LIMIT	60	/* default max queue lenght */
    169       1.21     peter #define	RED_STATS		/* collect statistics */
    170        1.1   thorpej 
    171        1.1   thorpej /*
    172        1.1   thorpej  * our default policy for forced-drop is drop-tail.
    173        1.1   thorpej  * (in altq-1.1.2 or earlier, the default was random-drop.
    174        1.1   thorpej  * but it makes more sense to punish the cause of the surge.)
    175        1.1   thorpej  * to switch to the random-drop policy, define "RED_RANDOM_DROP".
    176        1.1   thorpej  */
    177        1.1   thorpej 
    178       1.21     peter #ifdef ALTQ3_COMPAT
    179        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    180        1.1   thorpej /*
    181       1.21     peter  * flow-valve is an extention to protect red from unresponsive flows
    182        1.1   thorpej  * and to promote end-to-end congestion control.
    183        1.1   thorpej  * flow-valve observes the average drop rates of the flows that have
    184        1.1   thorpej  * experienced packet drops in the recent past.
    185        1.1   thorpej  * when the average drop rate exceeds the threshold, the flow is
    186        1.1   thorpej  * blocked by the flow-valve.  the trapped flow should back off
    187        1.1   thorpej  * exponentially to escape from the flow-valve.
    188        1.1   thorpej  */
    189        1.1   thorpej #ifdef RED_RANDOM_DROP
    190        1.1   thorpej #error "random-drop can't be used with flow-valve!"
    191        1.1   thorpej #endif
    192        1.1   thorpej #endif /* ALTQ_FLOWVALVE */
    193        1.1   thorpej 
    194        1.1   thorpej /* red_list keeps all red_queue_t's allocated. */
    195        1.1   thorpej static red_queue_t *red_list = NULL;
    196        1.1   thorpej 
    197       1.21     peter #endif /* ALTQ3_COMPAT */
    198       1.21     peter 
    199        1.1   thorpej /* default red parameter values */
    200        1.1   thorpej static int default_th_min = TH_MIN;
    201        1.1   thorpej static int default_th_max = TH_MAX;
    202        1.1   thorpej static int default_inv_pmax = INV_P_MAX;
    203        1.1   thorpej 
    204       1.21     peter #ifdef ALTQ3_COMPAT
    205        1.1   thorpej /* internal function prototypes */
    206       1.21     peter static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
    207       1.21     peter static struct mbuf *red_dequeue(struct ifaltq *, int);
    208       1.21     peter static int red_request(struct ifaltq *, int, void *);
    209       1.21     peter static void red_purgeq(red_queue_t *);
    210       1.21     peter static int red_detach(red_queue_t *);
    211        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    212       1.21     peter static inline struct fve *flowlist_lookup(struct flowvalve *,
    213       1.21     peter 			 struct altq_pktattr *, struct timeval *);
    214       1.21     peter static inline struct fve *flowlist_reclaim(struct flowvalve *,
    215       1.21     peter 					     struct altq_pktattr *);
    216       1.21     peter static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
    217       1.21     peter static inline int fv_p2f(struct flowvalve *, int);
    218       1.21     peter static struct flowvalve *fv_alloc(struct red *);
    219       1.21     peter static void fv_destroy(struct flowvalve *);
    220       1.21     peter static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
    221       1.21     peter 			struct fve **);
    222       1.21     peter static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
    223       1.21     peter 			 struct fve *);
    224        1.1   thorpej #endif
    225       1.21     peter #endif /* ALTQ3_COMPAT */
    226        1.1   thorpej 
    227        1.1   thorpej /*
    228       1.21     peter  * red support routines
    229        1.1   thorpej  */
    230       1.21     peter red_t *
    231       1.21     peter red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
    232       1.21     peter    int pkttime)
    233       1.21     peter {
    234       1.21     peter 	red_t	*rp;
    235       1.21     peter 	int	 w, i;
    236       1.21     peter 	int	 npkts_per_sec;
    237       1.21     peter 
    238       1.21     peter 	rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
    239       1.21     peter 	if (rp == NULL)
    240       1.21     peter 		return (NULL);
    241       1.21     peter 
    242       1.21     peter 	rp->red_avg = 0;
    243       1.21     peter 	rp->red_idle = 1;
    244       1.21     peter 
    245       1.21     peter 	if (weight == 0)
    246       1.21     peter 		rp->red_weight = W_WEIGHT;
    247       1.21     peter 	else
    248       1.21     peter 		rp->red_weight = weight;
    249       1.21     peter 	if (inv_pmax == 0)
    250       1.21     peter 		rp->red_inv_pmax = default_inv_pmax;
    251       1.21     peter 	else
    252       1.21     peter 		rp->red_inv_pmax = inv_pmax;
    253       1.21     peter 	if (th_min == 0)
    254       1.21     peter 		rp->red_thmin = default_th_min;
    255       1.21     peter 	else
    256       1.21     peter 		rp->red_thmin = th_min;
    257       1.21     peter 	if (th_max == 0)
    258       1.21     peter 		rp->red_thmax = default_th_max;
    259       1.21     peter 	else
    260       1.21     peter 		rp->red_thmax = th_max;
    261       1.21     peter 
    262       1.21     peter 	rp->red_flags = flags;
    263        1.1   thorpej 
    264       1.21     peter 	if (pkttime == 0)
    265       1.21     peter 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
    266       1.21     peter 		rp->red_pkttime = 800;
    267       1.21     peter 	else
    268       1.21     peter 		rp->red_pkttime = pkttime;
    269        1.1   thorpej 
    270       1.21     peter 	if (weight == 0) {
    271       1.21     peter 		/* when the link is very slow, adjust red parameters */
    272       1.21     peter 		npkts_per_sec = 1000000 / rp->red_pkttime;
    273       1.21     peter 		if (npkts_per_sec < 50) {
    274       1.21     peter 			/* up to about 400Kbps */
    275       1.21     peter 			rp->red_weight = W_WEIGHT_2;
    276       1.21     peter 		} else if (npkts_per_sec < 300) {
    277       1.21     peter 			/* up to about 2.4Mbps */
    278       1.21     peter 			rp->red_weight = W_WEIGHT_1;
    279       1.21     peter 		}
    280       1.21     peter 	}
    281        1.1   thorpej 
    282       1.21     peter 	/* calculate wshift.  weight must be power of 2 */
    283       1.21     peter 	w = rp->red_weight;
    284       1.21     peter 	for (i = 0; w > 1; i++)
    285       1.21     peter 		w = w >> 1;
    286       1.21     peter 	rp->red_wshift = i;
    287       1.21     peter 	w = 1 << rp->red_wshift;
    288       1.21     peter 	if (w != rp->red_weight) {
    289       1.21     peter 		printf("invalid weight value %d for red! use %d\n",
    290       1.21     peter 		       rp->red_weight, w);
    291       1.21     peter 		rp->red_weight = w;
    292        1.1   thorpej 	}
    293        1.1   thorpej 
    294       1.21     peter 	/*
    295       1.21     peter 	 * thmin_s and thmax_s are scaled versions of th_min and th_max
    296       1.21     peter 	 * to be compared with avg.
    297       1.21     peter 	 */
    298       1.21     peter 	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
    299       1.21     peter 	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
    300       1.21     peter 
    301       1.21     peter 	/*
    302       1.21     peter 	 * precompute probability denominator
    303       1.21     peter 	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
    304       1.21     peter 	 */
    305       1.21     peter 	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
    306       1.21     peter 			 * rp->red_inv_pmax) << FP_SHIFT;
    307       1.21     peter 
    308       1.21     peter 	/* allocate weight table */
    309       1.21     peter 	rp->red_wtab = wtab_alloc(rp->red_weight);
    310       1.21     peter 
    311       1.21     peter 	microtime(&rp->red_last);
    312       1.21     peter #ifdef ALTQ3_COMPAT
    313       1.21     peter #ifdef ALTQ_FLOWVALVE
    314       1.21     peter 	if (flags & REDF_FLOWVALVE)
    315       1.21     peter 		rp->red_flowvalve = fv_alloc(rp);
    316       1.21     peter 	/* if fv_alloc failes, flowvalve is just disabled */
    317       1.21     peter #endif
    318       1.21     peter #endif /* ALTQ3_COMPAT */
    319       1.21     peter 	return (rp);
    320        1.1   thorpej }
    321        1.1   thorpej 
    322       1.21     peter void
    323       1.21     peter red_destroy(red_t *rp)
    324        1.1   thorpej {
    325       1.21     peter #ifdef ALTQ3_COMPAT
    326       1.21     peter #ifdef ALTQ_FLOWVALVE
    327       1.21     peter 	if (rp->red_flowvalve != NULL)
    328       1.21     peter 		fv_destroy(rp->red_flowvalve);
    329        1.1   thorpej #endif
    330       1.21     peter #endif /* ALTQ3_COMPAT */
    331       1.21     peter 	wtab_destroy(rp->red_wtab);
    332       1.21     peter 	free(rp, M_DEVBUF);
    333       1.21     peter }
    334       1.11     perry 
    335       1.21     peter void
    336       1.21     peter red_getstats(red_t *rp, struct redstats *sp)
    337       1.21     peter {
    338       1.21     peter 	sp->q_avg		= rp->red_avg >> rp->red_wshift;
    339       1.21     peter 	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
    340       1.21     peter 	sp->drop_cnt		= rp->red_stats.drop_cnt;
    341       1.21     peter 	sp->drop_forced		= rp->red_stats.drop_forced;
    342       1.21     peter 	sp->drop_unforced	= rp->red_stats.drop_unforced;
    343       1.21     peter 	sp->marked_packets	= rp->red_stats.marked_packets;
    344       1.21     peter }
    345        1.1   thorpej 
    346       1.21     peter int
    347       1.21     peter red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
    348       1.21     peter     struct altq_pktattr *pktattr)
    349       1.21     peter {
    350       1.21     peter 	int avg, droptype;
    351       1.21     peter 	int n;
    352       1.21     peter #ifdef ALTQ3_COMPAT
    353       1.21     peter #ifdef ALTQ_FLOWVALVE
    354       1.21     peter 	struct fve *fve = NULL;
    355        1.1   thorpej 
    356       1.21     peter 	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
    357       1.21     peter 		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
    358       1.21     peter 			m_freem(m);
    359       1.21     peter 			return (-1);
    360        1.1   thorpej 		}
    361       1.21     peter #endif
    362       1.21     peter #endif /* ALTQ3_COMPAT */
    363        1.1   thorpej 
    364       1.21     peter 	avg = rp->red_avg;
    365        1.1   thorpej 
    366       1.21     peter 	/*
    367       1.21     peter 	 * if we were idle, we pretend that n packets arrived during
    368       1.21     peter 	 * the idle period.
    369       1.21     peter 	 */
    370       1.21     peter 	if (rp->red_idle) {
    371       1.21     peter 		struct timeval now;
    372       1.21     peter 		int t;
    373        1.1   thorpej 
    374       1.21     peter 		rp->red_idle = 0;
    375       1.21     peter 		microtime(&now);
    376       1.21     peter 		t = (now.tv_sec - rp->red_last.tv_sec);
    377       1.21     peter 		if (t > 60) {
    378       1.21     peter 			/*
    379       1.21     peter 			 * being idle for more than 1 minute, set avg to zero.
    380       1.21     peter 			 * this prevents t from overflow.
    381       1.21     peter 			 */
    382       1.21     peter 			avg = 0;
    383       1.21     peter 		} else {
    384       1.21     peter 			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
    385       1.21     peter 			n = t / rp->red_pkttime - 1;
    386        1.1   thorpej 
    387       1.21     peter 			/* the following line does (avg = (1 - Wq)^n * avg) */
    388       1.21     peter 			if (n > 0)
    389       1.21     peter 				avg = (avg >> FP_SHIFT) *
    390       1.21     peter 				    pow_w(rp->red_wtab, n);
    391        1.1   thorpej 		}
    392       1.21     peter 	}
    393        1.1   thorpej 
    394       1.21     peter 	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
    395       1.21     peter 	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
    396       1.21     peter 	rp->red_avg = avg;		/* save the new value */
    397        1.1   thorpej 
    398       1.21     peter 	/*
    399       1.21     peter 	 * red_count keeps a tally of arriving traffic that has not
    400       1.21     peter 	 * been dropped.
    401       1.21     peter 	 */
    402       1.21     peter 	rp->red_count++;
    403        1.1   thorpej 
    404       1.21     peter 	/* see if we drop early */
    405       1.21     peter 	droptype = DTYPE_NODROP;
    406       1.21     peter 	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
    407       1.21     peter 		if (avg >= rp->red_thmax_s) {
    408       1.21     peter 			/* avg >= th_max: forced drop */
    409       1.21     peter 			droptype = DTYPE_FORCED;
    410       1.21     peter 		} else if (rp->red_old == 0) {
    411       1.21     peter 			/* first exceeds th_min */
    412       1.21     peter 			rp->red_count = 1;
    413       1.21     peter 			rp->red_old = 1;
    414       1.21     peter 		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
    415       1.21     peter 				      rp->red_probd, rp->red_count)) {
    416       1.21     peter 			/* mark or drop by red */
    417       1.21     peter 			if ((rp->red_flags & REDF_ECN) &&
    418       1.21     peter 			    mark_ecn(m, pktattr, rp->red_flags)) {
    419       1.21     peter 				/* successfully marked.  do not drop. */
    420       1.21     peter 				rp->red_count = 0;
    421       1.21     peter #ifdef RED_STATS
    422       1.21     peter 				rp->red_stats.marked_packets++;
    423       1.21     peter #endif
    424       1.21     peter 			} else {
    425       1.21     peter 				/* unforced drop by red */
    426       1.21     peter 				droptype = DTYPE_EARLY;
    427       1.21     peter 			}
    428        1.1   thorpej 		}
    429       1.21     peter 	} else {
    430       1.21     peter 		/* avg < th_min */
    431       1.21     peter 		rp->red_old = 0;
    432       1.21     peter 	}
    433        1.1   thorpej 
    434       1.21     peter 	/*
    435       1.21     peter 	 * if the queue length hits the hard limit, it's a forced drop.
    436       1.21     peter 	 */
    437       1.21     peter 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
    438       1.21     peter 		droptype = DTYPE_FORCED;
    439        1.1   thorpej 
    440       1.21     peter #ifdef RED_RANDOM_DROP
    441       1.21     peter 	/* if successful or forced drop, enqueue this packet. */
    442       1.21     peter 	if (droptype != DTYPE_EARLY)
    443       1.21     peter 		_addq(q, m);
    444       1.21     peter #else
    445       1.21     peter 	/* if successful, enqueue this packet. */
    446       1.21     peter 	if (droptype == DTYPE_NODROP)
    447       1.21     peter 		_addq(q, m);
    448       1.21     peter #endif
    449       1.21     peter 	if (droptype != DTYPE_NODROP) {
    450       1.21     peter 		if (droptype == DTYPE_EARLY) {
    451       1.21     peter 			/* drop the incoming packet */
    452       1.21     peter #ifdef RED_STATS
    453       1.21     peter 			rp->red_stats.drop_unforced++;
    454       1.21     peter #endif
    455       1.21     peter 		} else {
    456       1.21     peter 			/* forced drop, select a victim packet in the queue. */
    457       1.21     peter #ifdef RED_RANDOM_DROP
    458       1.21     peter 			m = _getq_random(q);
    459       1.21     peter #endif
    460       1.21     peter #ifdef RED_STATS
    461       1.21     peter 			rp->red_stats.drop_forced++;
    462       1.21     peter #endif
    463       1.21     peter 		}
    464       1.21     peter #ifdef RED_STATS
    465       1.21     peter 		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
    466       1.21     peter #endif
    467       1.21     peter 		rp->red_count = 0;
    468       1.21     peter #ifdef ALTQ3_COMPAT
    469        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    470       1.21     peter 		if (rp->red_flowvalve != NULL)
    471       1.21     peter 			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
    472       1.21     peter #endif
    473       1.21     peter #endif /* ALTQ3_COMPAT */
    474       1.21     peter 		m_freem(m);
    475       1.21     peter 		return (-1);
    476        1.1   thorpej 	}
    477       1.21     peter 	/* successfully queued */
    478       1.21     peter #ifdef RED_STATS
    479       1.21     peter 	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
    480       1.21     peter #endif
    481       1.21     peter 	return (0);
    482        1.1   thorpej }
    483        1.1   thorpej 
    484       1.21     peter /*
    485       1.21     peter  * early-drop probability is calculated as follows:
    486       1.21     peter  *   prob = p_max * (avg - th_min) / (th_max - th_min)
    487       1.21     peter  *   prob_a = prob / (2 - count*prob)
    488       1.21     peter  *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
    489       1.21     peter  * here prob_a increases as successive undrop count increases.
    490       1.21     peter  * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
    491       1.21     peter  * becomes 1 when (count >= (2 / prob))).
    492       1.21     peter  */
    493       1.21     peter int
    494       1.21     peter drop_early(int fp_len, int fp_probd, int count)
    495        1.1   thorpej {
    496       1.21     peter 	int	d;		/* denominator of drop-probability */
    497        1.1   thorpej 
    498       1.21     peter 	d = fp_probd - count * fp_len;
    499       1.21     peter 	if (d <= 0)
    500       1.21     peter 		/* count exceeds the hard limit: drop or mark */
    501       1.21     peter 		return (1);
    502        1.1   thorpej 
    503       1.21     peter 	/*
    504       1.21     peter 	 * now the range of d is [1..600] in fixed-point. (when
    505       1.21     peter 	 * th_max-th_min=10 and p_max=1/30)
    506       1.21     peter 	 * drop probability = (avg - TH_MIN) / d
    507       1.21     peter 	 */
    508        1.1   thorpej 
    509  1.28.30.1      yamt 	if ((cprng_fast32() % d) < fp_len) {
    510       1.21     peter 		/* drop or mark */
    511       1.21     peter 		return (1);
    512        1.1   thorpej 	}
    513       1.21     peter 	/* no drop/mark */
    514       1.21     peter 	return (0);
    515        1.1   thorpej }
    516        1.1   thorpej 
    517        1.1   thorpej /*
    518       1.21     peter  * try to mark CE bit to the packet.
    519       1.21     peter  *    returns 1 if successfully marked, 0 otherwise.
    520        1.1   thorpej  */
    521       1.21     peter int
    522       1.21     peter mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
    523       1.21     peter {
    524       1.21     peter 	struct mbuf	*m0;
    525       1.21     peter 	struct m_tag	*t;
    526       1.21     peter 	struct altq_tag	*at;
    527       1.21     peter 	void		*hdr;
    528       1.21     peter 	int		 af;
    529       1.21     peter 
    530       1.28      yamt 	t = m_tag_find(m, PACKET_TAG_ALTQ_QID, NULL);
    531       1.21     peter 	if (t != NULL) {
    532       1.21     peter 		at = (struct altq_tag *)(t + 1);
    533       1.21     peter 		if (at == NULL)
    534       1.21     peter 			return (0);
    535       1.21     peter 		af = at->af;
    536       1.21     peter 		hdr = at->hdr;
    537       1.21     peter #ifdef ALTQ3_COMPAT
    538       1.21     peter 	} else if (pktattr != NULL) {
    539       1.21     peter 		af = pktattr->pattr_af;
    540       1.21     peter 		hdr = pktattr->pattr_hdr;
    541       1.21     peter #endif /* ALTQ3_COMPAT */
    542       1.21     peter 	} else
    543       1.21     peter 		return (0);
    544       1.21     peter 
    545       1.21     peter 	if (af != AF_INET && af != AF_INET6)
    546       1.21     peter 		return (0);
    547       1.21     peter 
    548       1.21     peter 	/* verify that pattr_hdr is within the mbuf data */
    549       1.21     peter 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
    550       1.25  christos 		if (((char *)hdr >= m0->m_data) &&
    551       1.25  christos 		    ((char *)hdr < m0->m_data + m0->m_len))
    552       1.21     peter 			break;
    553       1.21     peter 	if (m0 == NULL) {
    554       1.21     peter 		/* ick, tag info is stale */
    555       1.21     peter 		return (0);
    556       1.21     peter 	}
    557        1.1   thorpej 
    558       1.21     peter 	switch (af) {
    559       1.21     peter 	case AF_INET:
    560       1.21     peter 		if (flags & REDF_ECN4) {
    561       1.21     peter 			struct ip *ip = hdr;
    562       1.21     peter 			u_int8_t otos;
    563       1.21     peter 			int sum;
    564       1.11     perry 
    565       1.21     peter 			if (ip->ip_v != 4)
    566       1.21     peter 				return (0);	/* version mismatch! */
    567        1.1   thorpej 
    568       1.21     peter 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
    569       1.21     peter 				return (0);	/* not-ECT */
    570       1.21     peter 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
    571       1.21     peter 				return (1);	/* already marked */
    572        1.1   thorpej 
    573       1.21     peter 			/*
    574       1.21     peter 			 * ecn-capable but not marked,
    575       1.21     peter 			 * mark CE and update checksum
    576       1.21     peter 			 */
    577       1.21     peter 			otos = ip->ip_tos;
    578       1.21     peter 			ip->ip_tos |= IPTOS_ECN_CE;
    579       1.21     peter 			/*
    580       1.21     peter 			 * update checksum (from RFC1624)
    581       1.21     peter 			 *	   HC' = ~(~HC + ~m + m')
    582       1.21     peter 			 */
    583       1.21     peter 			sum = ~ntohs(ip->ip_sum) & 0xffff;
    584       1.21     peter 			sum += (~otos & 0xffff) + ip->ip_tos;
    585       1.21     peter 			sum = (sum >> 16) + (sum & 0xffff);
    586       1.21     peter 			sum += (sum >> 16);  /* add carry */
    587       1.21     peter 			ip->ip_sum = htons(~sum & 0xffff);
    588       1.21     peter 			return (1);
    589       1.21     peter 		}
    590       1.21     peter 		break;
    591       1.21     peter #ifdef INET6
    592       1.21     peter 	case AF_INET6:
    593       1.21     peter 		if (flags & REDF_ECN6) {
    594       1.21     peter 			struct ip6_hdr *ip6 = hdr;
    595       1.21     peter 			u_int32_t flowlabel;
    596       1.21     peter 
    597       1.21     peter 			flowlabel = ntohl(ip6->ip6_flow);
    598       1.21     peter 			if ((flowlabel >> 28) != 6)
    599       1.21     peter 				return (0);	/* version mismatch! */
    600       1.21     peter 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
    601       1.21     peter 			    (IPTOS_ECN_NOTECT << 20))
    602       1.21     peter 				return (0);	/* not-ECT */
    603       1.21     peter 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
    604       1.21     peter 			    (IPTOS_ECN_CE << 20))
    605       1.21     peter 				return (1);	/* already marked */
    606       1.21     peter 			/*
    607       1.21     peter 			 * ecn-capable but not marked,  mark CE
    608       1.21     peter 			 */
    609       1.21     peter 			flowlabel |= (IPTOS_ECN_CE << 20);
    610       1.21     peter 			ip6->ip6_flow = htonl(flowlabel);
    611       1.21     peter 			return (1);
    612       1.21     peter 		}
    613       1.21     peter 		break;
    614       1.21     peter #endif  /* INET6 */
    615       1.21     peter 	}
    616        1.1   thorpej 
    617       1.21     peter 	/* not marked */
    618       1.21     peter 	return (0);
    619       1.21     peter }
    620        1.1   thorpej 
    621       1.21     peter struct mbuf *
    622       1.21     peter red_getq(red_t *rp, class_queue_t *q)
    623       1.21     peter {
    624       1.21     peter 	struct mbuf *m;
    625        1.1   thorpej 
    626       1.21     peter 	if ((m = _getq(q)) == NULL) {
    627       1.21     peter 		if (rp->red_idle == 0) {
    628       1.21     peter 			rp->red_idle = 1;
    629       1.21     peter 			microtime(&rp->red_last);
    630        1.1   thorpej 		}
    631       1.21     peter 		return NULL;
    632        1.1   thorpej 	}
    633        1.1   thorpej 
    634       1.21     peter 	rp->red_idle = 0;
    635       1.21     peter 	return (m);
    636       1.21     peter }
    637       1.21     peter 
    638       1.21     peter /*
    639       1.21     peter  * helper routine to calibrate avg during idle.
    640       1.21     peter  * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
    641       1.21     peter  * here Wq = 1/weight and the code assumes Wq is close to zero.
    642       1.21     peter  *
    643       1.21     peter  * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
    644       1.21     peter  */
    645       1.21     peter static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
    646       1.21     peter 
    647       1.21     peter struct wtab *
    648       1.21     peter wtab_alloc(int weight)
    649       1.21     peter {
    650       1.21     peter 	struct wtab	*w;
    651       1.21     peter 	int		 i;
    652       1.11     perry 
    653       1.21     peter 	for (w = wtab_list; w != NULL; w = w->w_next)
    654       1.21     peter 		if (w->w_weight == weight) {
    655       1.21     peter 			w->w_refcount++;
    656       1.21     peter 			return (w);
    657       1.21     peter 		}
    658        1.1   thorpej 
    659       1.21     peter 	w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
    660       1.21     peter 	if (w == NULL)
    661       1.21     peter 		panic("wtab_alloc: malloc failed!");
    662       1.21     peter 	w->w_weight = weight;
    663       1.21     peter 	w->w_refcount = 1;
    664       1.21     peter 	w->w_next = wtab_list;
    665       1.21     peter 	wtab_list = w;
    666        1.1   thorpej 
    667       1.21     peter 	/* initialize the weight table */
    668       1.21     peter 	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
    669       1.21     peter 	for (i = 1; i < 32; i++) {
    670       1.21     peter 		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
    671       1.21     peter 		if (w->w_tab[i] == 0 && w->w_param_max == 0)
    672       1.21     peter 			w->w_param_max = 1 << i;
    673       1.21     peter 	}
    674        1.1   thorpej 
    675       1.21     peter 	return (w);
    676        1.1   thorpej }
    677        1.1   thorpej 
    678       1.21     peter int
    679       1.21     peter wtab_destroy(struct wtab *w)
    680        1.1   thorpej {
    681       1.21     peter 	struct wtab	*prev;
    682       1.21     peter 
    683       1.21     peter 	if (--w->w_refcount > 0)
    684       1.21     peter 		return (0);
    685       1.21     peter 
    686       1.21     peter 	if (wtab_list == w)
    687       1.21     peter 		wtab_list = w->w_next;
    688       1.21     peter 	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
    689       1.21     peter 		if (prev->w_next == w) {
    690       1.21     peter 			prev->w_next = w->w_next;
    691       1.21     peter 			break;
    692       1.21     peter 		}
    693       1.21     peter 
    694       1.21     peter 	free(w, M_DEVBUF);
    695       1.21     peter 	return (0);
    696        1.1   thorpej }
    697        1.1   thorpej 
    698       1.21     peter int32_t
    699       1.21     peter pow_w(struct wtab *w, int n)
    700        1.1   thorpej {
    701       1.21     peter 	int	i, bit;
    702       1.21     peter 	int32_t	val;
    703       1.21     peter 
    704       1.21     peter 	if (n >= w->w_param_max)
    705       1.21     peter 		return (0);
    706       1.21     peter 
    707       1.21     peter 	val = 1 << FP_SHIFT;
    708       1.21     peter 	if (n <= 0)
    709       1.21     peter 		return (val);
    710       1.21     peter 
    711       1.21     peter 	bit = 1;
    712       1.21     peter 	i = 0;
    713       1.21     peter 	while (n) {
    714       1.21     peter 		if (n & bit) {
    715       1.21     peter 			val = (val * w->w_tab[i]) >> FP_SHIFT;
    716       1.21     peter 			n &= ~bit;
    717       1.21     peter 		}
    718       1.21     peter 		i++;
    719       1.21     peter 		bit <<=  1;
    720       1.21     peter 	}
    721       1.21     peter 	return (val);
    722        1.1   thorpej }
    723        1.1   thorpej 
    724       1.21     peter #ifdef ALTQ3_COMPAT
    725        1.1   thorpej /*
    726       1.21     peter  * red device interface
    727        1.1   thorpej  */
    728       1.21     peter altqdev_decl(red);
    729       1.21     peter 
    730       1.21     peter int
    731       1.24  christos redopen(dev_t dev, int flag, int fmt,
    732       1.24  christos     struct lwp *l)
    733        1.1   thorpej {
    734       1.21     peter 	/* everything will be done when the queueing scheme is attached. */
    735        1.1   thorpej 	return 0;
    736        1.1   thorpej }
    737        1.1   thorpej 
    738        1.1   thorpej int
    739       1.24  christos redclose(dev_t dev, int flag, int fmt,
    740       1.24  christos     struct lwp *l)
    741        1.1   thorpej {
    742       1.21     peter 	red_queue_t *rqp;
    743       1.21     peter 	int err, error = 0;
    744       1.21     peter 
    745       1.21     peter 	while ((rqp = red_list) != NULL) {
    746       1.21     peter 		/* destroy all */
    747       1.21     peter 		err = red_detach(rqp);
    748       1.21     peter 		if (err != 0 && error == 0)
    749       1.21     peter 			error = err;
    750       1.21     peter 	}
    751        1.1   thorpej 
    752       1.21     peter 	return error;
    753       1.21     peter }
    754        1.1   thorpej 
    755       1.21     peter int
    756       1.25  christos redioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
    757       1.21     peter     struct lwp *l)
    758       1.21     peter {
    759       1.21     peter 	red_queue_t *rqp;
    760       1.21     peter 	struct red_interface *ifacep;
    761       1.21     peter 	struct ifnet *ifp;
    762       1.21     peter 	struct proc *p = l->l_proc;
    763       1.21     peter 	int	error = 0;
    764        1.1   thorpej 
    765       1.21     peter 	/* check super-user privilege */
    766       1.21     peter 	switch (cmd) {
    767       1.21     peter 	case RED_GETSTATS:
    768       1.21     peter 		break;
    769       1.21     peter 	default:
    770       1.21     peter #if (__FreeBSD_version > 400000)
    771       1.21     peter 		if ((error = suser(p)) != 0)
    772       1.21     peter #else
    773       1.23      elad 		if ((error = kauth_authorize_network(p->p_cred,
    774       1.23      elad 		    KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RED, NULL,
    775       1.23      elad 		    NULL, NULL)) != 0)
    776       1.21     peter #endif
    777       1.21     peter 			return (error);
    778       1.21     peter 		break;
    779       1.21     peter 	}
    780       1.21     peter 
    781       1.21     peter 	switch (cmd) {
    782       1.21     peter 
    783       1.21     peter 	case RED_ENABLE:
    784       1.21     peter 		ifacep = (struct red_interface *)addr;
    785       1.21     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    786       1.21     peter 			error = EBADF;
    787       1.21     peter 			break;
    788       1.21     peter 		}
    789       1.21     peter 		error = altq_enable(rqp->rq_ifq);
    790       1.21     peter 		break;
    791       1.21     peter 
    792       1.21     peter 	case RED_DISABLE:
    793       1.21     peter 		ifacep = (struct red_interface *)addr;
    794       1.21     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    795       1.21     peter 			error = EBADF;
    796       1.21     peter 			break;
    797       1.21     peter 		}
    798       1.21     peter 		error = altq_disable(rqp->rq_ifq);
    799       1.21     peter 		break;
    800       1.21     peter 
    801       1.21     peter 	case RED_IF_ATTACH:
    802       1.21     peter 		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
    803       1.21     peter 		if (ifp == NULL) {
    804       1.21     peter 			error = ENXIO;
    805       1.21     peter 			break;
    806       1.21     peter 		}
    807       1.21     peter 
    808       1.21     peter 		/* allocate and initialize red_queue_t */
    809       1.21     peter 		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
    810       1.21     peter 		if (rqp == NULL) {
    811       1.21     peter 			error = ENOMEM;
    812       1.21     peter 			break;
    813       1.21     peter 		}
    814       1.21     peter 
    815       1.21     peter 		rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
    816       1.21     peter 		    M_WAITOK|M_ZERO);
    817       1.21     peter 		if (rqp->rq_q == NULL) {
    818       1.21     peter 			free(rqp, M_DEVBUF);
    819       1.21     peter 			error = ENOMEM;
    820       1.21     peter 			break;
    821       1.21     peter 		}
    822       1.21     peter 
    823       1.21     peter 		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
    824       1.21     peter 		if (rqp->rq_red == NULL) {
    825       1.21     peter 			free(rqp->rq_q, M_DEVBUF);
    826       1.21     peter 			free(rqp, M_DEVBUF);
    827       1.21     peter 			error = ENOMEM;
    828       1.21     peter 			break;
    829       1.21     peter 		}
    830       1.21     peter 
    831       1.21     peter 		rqp->rq_ifq = &ifp->if_snd;
    832       1.21     peter 		qtail(rqp->rq_q) = NULL;
    833       1.21     peter 		qlen(rqp->rq_q) = 0;
    834       1.21     peter 		qlimit(rqp->rq_q) = RED_LIMIT;
    835       1.21     peter 		qtype(rqp->rq_q) = Q_RED;
    836       1.21     peter 
    837       1.21     peter 		/*
    838       1.21     peter 		 * set RED to this ifnet structure.
    839       1.21     peter 		 */
    840       1.21     peter 		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
    841       1.21     peter 				    red_enqueue, red_dequeue, red_request,
    842       1.21     peter 				    NULL, NULL);
    843       1.21     peter 		if (error) {
    844       1.21     peter 			red_destroy(rqp->rq_red);
    845       1.21     peter 			free(rqp->rq_q, M_DEVBUF);
    846       1.21     peter 			free(rqp, M_DEVBUF);
    847       1.21     peter 			break;
    848       1.21     peter 		}
    849        1.1   thorpej 
    850       1.21     peter 		/* add this state to the red list */
    851       1.21     peter 		rqp->rq_next = red_list;
    852       1.21     peter 		red_list = rqp;
    853       1.21     peter 		break;
    854        1.1   thorpej 
    855       1.21     peter 	case RED_IF_DETACH:
    856       1.21     peter 		ifacep = (struct red_interface *)addr;
    857       1.21     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    858       1.21     peter 			error = EBADF;
    859       1.21     peter 			break;
    860        1.1   thorpej 		}
    861       1.21     peter 		error = red_detach(rqp);
    862       1.21     peter 		break;
    863       1.21     peter 
    864       1.21     peter 	case RED_GETSTATS:
    865       1.21     peter 		do {
    866       1.21     peter 			struct red_stats *q_stats;
    867       1.21     peter 			red_t *rp;
    868       1.21     peter 
    869       1.21     peter 			q_stats = (struct red_stats *)addr;
    870       1.21     peter 			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
    871       1.21     peter 					     ALTQT_RED)) == NULL) {
    872       1.21     peter 				error = EBADF;
    873       1.21     peter 				break;
    874       1.21     peter 			}
    875       1.21     peter 
    876       1.21     peter 			q_stats->q_len 	   = qlen(rqp->rq_q);
    877       1.21     peter 			q_stats->q_limit   = qlimit(rqp->rq_q);
    878        1.1   thorpej 
    879       1.21     peter 			rp = rqp->rq_red;
    880       1.21     peter 			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
    881       1.21     peter 			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
    882       1.21     peter 			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
    883       1.21     peter 			q_stats->drop_forced   = rp->red_stats.drop_forced;
    884       1.21     peter 			q_stats->drop_unforced = rp->red_stats.drop_unforced;
    885       1.21     peter 			q_stats->marked_packets = rp->red_stats.marked_packets;
    886        1.1   thorpej 
    887       1.21     peter 			q_stats->weight		= rp->red_weight;
    888       1.21     peter 			q_stats->inv_pmax	= rp->red_inv_pmax;
    889       1.21     peter 			q_stats->th_min		= rp->red_thmin;
    890       1.21     peter 			q_stats->th_max		= rp->red_thmax;
    891        1.1   thorpej 
    892       1.21     peter #ifdef ALTQ_FLOWVALVE
    893       1.21     peter 			if (rp->red_flowvalve != NULL) {
    894       1.21     peter 				struct flowvalve *fv = rp->red_flowvalve;
    895       1.21     peter 				q_stats->fv_flows    = fv->fv_flows;
    896       1.21     peter 				q_stats->fv_pass     = fv->fv_stats.pass;
    897       1.21     peter 				q_stats->fv_predrop  = fv->fv_stats.predrop;
    898       1.21     peter 				q_stats->fv_alloc    = fv->fv_stats.alloc;
    899       1.21     peter 				q_stats->fv_escape   = fv->fv_stats.escape;
    900       1.11     perry 			} else {
    901       1.21     peter #endif /* ALTQ_FLOWVALVE */
    902       1.21     peter 				q_stats->fv_flows    = 0;
    903       1.21     peter 				q_stats->fv_pass     = 0;
    904       1.21     peter 				q_stats->fv_predrop  = 0;
    905       1.21     peter 				q_stats->fv_alloc    = 0;
    906       1.21     peter 				q_stats->fv_escape   = 0;
    907       1.21     peter #ifdef ALTQ_FLOWVALVE
    908       1.21     peter 			}
    909       1.21     peter #endif /* ALTQ_FLOWVALVE */
    910       1.21     peter 		} while (/*CONSTCOND*/ 0);
    911       1.21     peter 		break;
    912       1.21     peter 
    913       1.21     peter 	case RED_CONFIG:
    914       1.21     peter 		do {
    915       1.21     peter 			struct red_conf *fc;
    916       1.21     peter 			red_t *new;
    917       1.21     peter 			int s, limit;
    918       1.21     peter 
    919       1.21     peter 			fc = (struct red_conf *)addr;
    920       1.21     peter 			if ((rqp = altq_lookup(fc->iface.red_ifname,
    921       1.21     peter 					       ALTQT_RED)) == NULL) {
    922       1.21     peter 				error = EBADF;
    923       1.21     peter 				break;
    924       1.21     peter 			}
    925       1.21     peter 			new = red_alloc(fc->red_weight,
    926       1.21     peter 					fc->red_inv_pmax,
    927       1.21     peter 					fc->red_thmin,
    928       1.21     peter 					fc->red_thmax,
    929       1.21     peter 					fc->red_flags,
    930       1.21     peter 					fc->red_pkttime);
    931       1.21     peter 			if (new == NULL) {
    932       1.21     peter 				error = ENOMEM;
    933       1.21     peter 				break;
    934        1.1   thorpej 			}
    935        1.1   thorpej 
    936       1.21     peter 			s = splnet();
    937       1.21     peter 			red_purgeq(rqp);
    938       1.21     peter 			limit = fc->red_limit;
    939       1.21     peter 			if (limit < fc->red_thmax)
    940       1.21     peter 				limit = fc->red_thmax;
    941       1.21     peter 			qlimit(rqp->rq_q) = limit;
    942       1.21     peter 			fc->red_limit = limit;	/* write back the new value */
    943       1.21     peter 
    944       1.21     peter 			red_destroy(rqp->rq_red);
    945       1.21     peter 			rqp->rq_red = new;
    946       1.21     peter 
    947       1.21     peter 			splx(s);
    948       1.21     peter 
    949       1.21     peter 			/* write back new values */
    950       1.21     peter 			fc->red_limit = limit;
    951       1.21     peter 			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
    952       1.21     peter 			fc->red_thmin = rqp->rq_red->red_thmin;
    953       1.21     peter 			fc->red_thmax = rqp->rq_red->red_thmax;
    954       1.21     peter 
    955       1.21     peter 		} while (/*CONSTCOND*/ 0);
    956       1.21     peter 		break;
    957       1.21     peter 
    958       1.21     peter 	case RED_SETDEFAULTS:
    959       1.21     peter 		do {
    960       1.21     peter 			struct redparams *rp;
    961       1.21     peter 
    962       1.21     peter 			rp = (struct redparams *)addr;
    963       1.21     peter 
    964       1.21     peter 			default_th_min = rp->th_min;
    965       1.21     peter 			default_th_max = rp->th_max;
    966       1.21     peter 			default_inv_pmax = rp->inv_pmax;
    967       1.21     peter 		} while (/*CONSTCOND*/ 0);
    968       1.21     peter 		break;
    969        1.1   thorpej 
    970       1.21     peter 	default:
    971       1.21     peter 		error = EINVAL;
    972       1.21     peter 		break;
    973        1.1   thorpej 	}
    974       1.21     peter 	return error;
    975        1.1   thorpej }
    976        1.1   thorpej 
    977       1.21     peter static int
    978       1.21     peter red_detach(red_queue_t *rqp)
    979        1.1   thorpej {
    980       1.21     peter 	red_queue_t *tmp;
    981       1.21     peter 	int error = 0;
    982        1.1   thorpej 
    983       1.21     peter 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
    984       1.21     peter 		altq_disable(rqp->rq_ifq);
    985        1.1   thorpej 
    986       1.21     peter 	if ((error = altq_detach(rqp->rq_ifq)))
    987       1.21     peter 		return (error);
    988        1.1   thorpej 
    989       1.21     peter 	if (red_list == rqp)
    990       1.21     peter 		red_list = rqp->rq_next;
    991       1.21     peter 	else {
    992       1.21     peter 		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
    993       1.21     peter 			if (tmp->rq_next == rqp) {
    994       1.21     peter 				tmp->rq_next = rqp->rq_next;
    995       1.21     peter 				break;
    996       1.21     peter 			}
    997       1.21     peter 		if (tmp == NULL)
    998       1.21     peter 			printf("red_detach: no state found in red_list!\n");
    999        1.1   thorpej 	}
   1000       1.21     peter 
   1001       1.21     peter 	red_destroy(rqp->rq_red);
   1002       1.21     peter 	free(rqp->rq_q, M_DEVBUF);
   1003       1.21     peter 	free(rqp, M_DEVBUF);
   1004       1.21     peter 	return (error);
   1005        1.1   thorpej }
   1006        1.1   thorpej 
   1007        1.1   thorpej /*
   1008       1.21     peter  * enqueue routine:
   1009       1.21     peter  *
   1010       1.21     peter  *	returns: 0 when successfully queued.
   1011       1.21     peter  *		 ENOBUFS when drop occurs.
   1012        1.1   thorpej  */
   1013       1.21     peter static int
   1014       1.21     peter red_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
   1015        1.1   thorpej {
   1016       1.21     peter 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1017        1.1   thorpej 
   1018       1.21     peter 	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
   1019       1.21     peter 		return ENOBUFS;
   1020       1.21     peter 	ifq->ifq_len++;
   1021       1.21     peter 	return 0;
   1022        1.1   thorpej }
   1023        1.1   thorpej 
   1024        1.1   thorpej /*
   1025        1.1   thorpej  * dequeue routine:
   1026        1.3   thorpej  *	must be called in splnet.
   1027        1.1   thorpej  *
   1028        1.1   thorpej  *	returns: mbuf dequeued.
   1029        1.1   thorpej  *		 NULL when no packet is available in the queue.
   1030        1.1   thorpej  */
   1031        1.1   thorpej 
   1032        1.1   thorpej static struct mbuf *
   1033       1.21     peter red_dequeue(struct ifaltq *ifq, int op)
   1034        1.1   thorpej {
   1035        1.1   thorpej 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1036        1.1   thorpej 	struct mbuf *m;
   1037        1.1   thorpej 
   1038        1.1   thorpej 	if (op == ALTDQ_POLL)
   1039        1.1   thorpej 		return qhead(rqp->rq_q);
   1040        1.1   thorpej 
   1041        1.1   thorpej 	/* op == ALTDQ_REMOVE */
   1042        1.1   thorpej 	m =  red_getq(rqp->rq_red, rqp->rq_q);
   1043        1.1   thorpej 	if (m != NULL)
   1044        1.1   thorpej 		ifq->ifq_len--;
   1045        1.1   thorpej 	return (m);
   1046        1.1   thorpej }
   1047        1.1   thorpej 
   1048        1.1   thorpej static int
   1049       1.24  christos red_request(struct ifaltq *ifq, int req, void *arg)
   1050        1.1   thorpej {
   1051        1.1   thorpej 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1052        1.1   thorpej 
   1053        1.1   thorpej 	switch (req) {
   1054        1.1   thorpej 	case ALTRQ_PURGE:
   1055        1.1   thorpej 		red_purgeq(rqp);
   1056        1.1   thorpej 		break;
   1057        1.1   thorpej 	}
   1058        1.1   thorpej 	return (0);
   1059        1.1   thorpej }
   1060        1.1   thorpej 
   1061        1.1   thorpej static void
   1062       1.21     peter red_purgeq(red_queue_t *rqp)
   1063        1.1   thorpej {
   1064        1.1   thorpej 	_flushq(rqp->rq_q);
   1065        1.1   thorpej 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
   1066        1.1   thorpej 		rqp->rq_ifq->ifq_len = 0;
   1067        1.1   thorpej }
   1068        1.1   thorpej 
   1069        1.1   thorpej #ifdef ALTQ_FLOWVALVE
   1070        1.1   thorpej 
   1071        1.1   thorpej #define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
   1072        1.1   thorpej #define	FV_PSCALE(x)	((x) << FV_PSHIFT)
   1073        1.1   thorpej #define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
   1074        1.1   thorpej #define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
   1075        1.1   thorpej #define	FV_FSCALE(x)	((x) << FV_FSHIFT)
   1076        1.1   thorpej #define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
   1077        1.1   thorpej 
   1078        1.1   thorpej #define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
   1079        1.1   thorpej #define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
   1080        1.1   thorpej 
   1081        1.1   thorpej #define	FV_N			10	/* update fve_f every FV_N packets */
   1082        1.1   thorpej 
   1083        1.1   thorpej #define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
   1084       1.11     perry #define	FV_TTHRESH		3  /* time threshold to delete fve */
   1085        1.1   thorpej #define	FV_ALPHA		5  /* extra packet count */
   1086        1.1   thorpej 
   1087       1.21     peter #define	FV_STATS
   1088       1.21     peter 
   1089        1.1   thorpej #define	FV_TIMESTAMP(tp)	getmicrotime(tp)
   1090        1.1   thorpej 
   1091        1.1   thorpej /*
   1092        1.1   thorpej  * Brtt table: 127 entry table to convert drop rate (p) to
   1093        1.1   thorpej  * the corresponding bandwidth fraction (f)
   1094        1.1   thorpej  * the following equation is implemented to use scaled values,
   1095        1.1   thorpej  * fve_p and fve_f, in the fixed point format.
   1096        1.1   thorpej  *
   1097        1.1   thorpej  *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
   1098        1.1   thorpej  *   f = Brtt(p) / (max_th + alpha)
   1099        1.1   thorpej  */
   1100        1.1   thorpej #define	BRTT_SIZE	128
   1101        1.1   thorpej #define	BRTT_SHIFT	12
   1102        1.1   thorpej #define	BRTT_MASK	0x0007f000
   1103        1.1   thorpej #define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
   1104        1.1   thorpej 
   1105        1.1   thorpej const int brtt_tab[BRTT_SIZE] = {
   1106       1.11     perry 	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
   1107       1.11     perry 	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
   1108       1.11     perry 	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
   1109       1.11     perry 	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
   1110       1.11     perry 	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
   1111       1.11     perry 	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
   1112       1.11     perry 	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
   1113       1.11     perry 	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
   1114       1.11     perry 	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
   1115       1.11     perry 	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
   1116       1.11     perry 	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
   1117       1.11     perry 	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
   1118       1.11     perry 	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
   1119       1.11     perry 	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
   1120       1.11     perry 	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
   1121        1.1   thorpej 	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
   1122        1.1   thorpej };
   1123        1.1   thorpej 
   1124       1.13     perry static inline struct fve *
   1125       1.21     peter flowlist_lookup(struct flowvalve *fv, struct altq_pktattr *pktattr,
   1126       1.21     peter     struct timeval *now)
   1127        1.1   thorpej {
   1128        1.1   thorpej 	struct fve *fve;
   1129        1.1   thorpej 	int flows;
   1130        1.1   thorpej 	struct ip *ip;
   1131        1.1   thorpej #ifdef INET6
   1132        1.1   thorpej 	struct ip6_hdr *ip6;
   1133        1.1   thorpej #endif
   1134        1.1   thorpej 	struct timeval tthresh;
   1135        1.1   thorpej 
   1136        1.1   thorpej 	if (pktattr == NULL)
   1137        1.1   thorpej 		return (NULL);
   1138        1.1   thorpej 
   1139        1.1   thorpej 	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
   1140        1.1   thorpej 	flows = 0;
   1141        1.1   thorpej 	/*
   1142        1.1   thorpej 	 * search the flow list
   1143        1.1   thorpej 	 */
   1144        1.1   thorpej 	switch (pktattr->pattr_af) {
   1145        1.1   thorpej 	case AF_INET:
   1146        1.1   thorpej 		ip = (struct ip *)pktattr->pattr_hdr;
   1147        1.1   thorpej 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
   1148        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec == 0)
   1149        1.1   thorpej 				break;
   1150        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
   1151        1.1   thorpej 				fve->fve_lastdrop.tv_sec = 0;
   1152        1.1   thorpej 				break;
   1153        1.1   thorpej 			}
   1154        1.1   thorpej 			if (fve->fve_flow.flow_af == AF_INET &&
   1155        1.1   thorpej 			    fve->fve_flow.flow_ip.ip_src.s_addr ==
   1156        1.1   thorpej 			    ip->ip_src.s_addr &&
   1157        1.1   thorpej 			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
   1158        1.1   thorpej 			    ip->ip_dst.s_addr)
   1159        1.1   thorpej 				return (fve);
   1160        1.1   thorpej 			flows++;
   1161        1.1   thorpej 		}
   1162        1.1   thorpej 		break;
   1163        1.1   thorpej #ifdef INET6
   1164        1.1   thorpej 	case AF_INET6:
   1165        1.1   thorpej 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1166        1.1   thorpej 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
   1167        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec == 0)
   1168        1.1   thorpej 				break;
   1169        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
   1170        1.1   thorpej 				fve->fve_lastdrop.tv_sec = 0;
   1171        1.1   thorpej 				break;
   1172        1.1   thorpej 			}
   1173        1.1   thorpej 			if (fve->fve_flow.flow_af == AF_INET6 &&
   1174        1.1   thorpej 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
   1175        1.1   thorpej 					       &ip6->ip6_src) &&
   1176        1.1   thorpej 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
   1177        1.1   thorpej 					       &ip6->ip6_dst))
   1178        1.1   thorpej 				return (fve);
   1179        1.1   thorpej 			flows++;
   1180        1.1   thorpej 		}
   1181        1.1   thorpej 		break;
   1182        1.1   thorpej #endif /* INET6 */
   1183        1.1   thorpej 
   1184        1.1   thorpej 	default:
   1185        1.1   thorpej 		/* unknown protocol.  no drop. */
   1186        1.1   thorpej 		return (NULL);
   1187        1.1   thorpej 	}
   1188        1.1   thorpej 	fv->fv_flows = flows;	/* save the number of active fve's */
   1189        1.1   thorpej 	return (NULL);
   1190        1.1   thorpej }
   1191        1.1   thorpej 
   1192       1.13     perry static inline struct fve *
   1193       1.21     peter flowlist_reclaim(struct flowvalve *fv, struct altq_pktattr *pktattr)
   1194        1.1   thorpej {
   1195        1.1   thorpej 	struct fve *fve;
   1196        1.1   thorpej 	struct ip *ip;
   1197        1.1   thorpej #ifdef INET6
   1198        1.1   thorpej 	struct ip6_hdr *ip6;
   1199        1.1   thorpej #endif
   1200        1.1   thorpej 
   1201        1.1   thorpej 	/*
   1202        1.1   thorpej 	 * get an entry from the tail of the LRU list.
   1203        1.1   thorpej 	 */
   1204        1.1   thorpej 	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
   1205        1.1   thorpej 
   1206        1.1   thorpej 	switch (pktattr->pattr_af) {
   1207        1.1   thorpej 	case AF_INET:
   1208        1.1   thorpej 		ip = (struct ip *)pktattr->pattr_hdr;
   1209        1.1   thorpej 		fve->fve_flow.flow_af = AF_INET;
   1210        1.1   thorpej 		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
   1211        1.1   thorpej 		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
   1212        1.1   thorpej 		break;
   1213        1.1   thorpej #ifdef INET6
   1214        1.1   thorpej 	case AF_INET6:
   1215        1.1   thorpej 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1216        1.1   thorpej 		fve->fve_flow.flow_af = AF_INET6;
   1217        1.1   thorpej 		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
   1218        1.1   thorpej 		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
   1219        1.1   thorpej 		break;
   1220        1.1   thorpej #endif
   1221        1.1   thorpej 	}
   1222        1.1   thorpej 
   1223        1.1   thorpej 	fve->fve_state = Green;
   1224        1.1   thorpej 	fve->fve_p = 0.0;
   1225        1.1   thorpej 	fve->fve_f = 0.0;
   1226        1.1   thorpej 	fve->fve_ifseq = fv->fv_ifseq - 1;
   1227        1.1   thorpej 	fve->fve_count = 0;
   1228        1.1   thorpej 
   1229        1.1   thorpej 	fv->fv_flows++;
   1230        1.1   thorpej #ifdef FV_STATS
   1231        1.1   thorpej 	fv->fv_stats.alloc++;
   1232        1.1   thorpej #endif
   1233        1.1   thorpej 	return (fve);
   1234        1.1   thorpej }
   1235        1.1   thorpej 
   1236       1.13     perry static inline void
   1237       1.21     peter flowlist_move_to_head(struct flowvalve *fv, struct fve *fve)
   1238        1.1   thorpej {
   1239        1.1   thorpej 	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
   1240        1.1   thorpej 		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
   1241        1.1   thorpej 		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
   1242        1.1   thorpej 	}
   1243        1.1   thorpej }
   1244        1.1   thorpej 
   1245        1.1   thorpej /*
   1246        1.1   thorpej  * allocate flowvalve structure
   1247        1.1   thorpej  */
   1248        1.1   thorpej static struct flowvalve *
   1249       1.21     peter fv_alloc(struct red *rp)
   1250        1.1   thorpej {
   1251        1.1   thorpej 	struct flowvalve *fv;
   1252        1.1   thorpej 	struct fve *fve;
   1253        1.1   thorpej 	int i, num;
   1254        1.1   thorpej 
   1255        1.1   thorpej 	num = FV_FLOWLISTSIZE;
   1256       1.14  christos 	fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
   1257        1.1   thorpej 	if (fv == NULL)
   1258        1.1   thorpej 		return (NULL);
   1259        1.1   thorpej 
   1260       1.14  christos 	fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
   1261       1.14  christos 	    M_WAITOK|M_ZERO);
   1262        1.1   thorpej 	if (fv->fv_fves == NULL) {
   1263       1.14  christos 		free(fv, M_DEVBUF);
   1264        1.1   thorpej 		return (NULL);
   1265        1.1   thorpej 	}
   1266        1.1   thorpej 
   1267        1.1   thorpej 	fv->fv_flows = 0;
   1268        1.1   thorpej 	TAILQ_INIT(&fv->fv_flowlist);
   1269        1.1   thorpej 	for (i = 0; i < num; i++) {
   1270        1.1   thorpej 		fve = &fv->fv_fves[i];
   1271        1.1   thorpej 		fve->fve_lastdrop.tv_sec = 0;
   1272        1.1   thorpej 		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
   1273        1.1   thorpej 	}
   1274        1.1   thorpej 
   1275        1.1   thorpej 	/* initialize drop rate threshold in scaled fixed-point */
   1276        1.1   thorpej 	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
   1277        1.1   thorpej 
   1278        1.1   thorpej 	/* initialize drop rate to fraction table */
   1279       1.14  christos 	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
   1280        1.1   thorpej 	if (fv->fv_p2ftab == NULL) {
   1281       1.14  christos 		free(fv->fv_fves, M_DEVBUF);
   1282       1.14  christos 		free(fv, M_DEVBUF);
   1283        1.1   thorpej 		return (NULL);
   1284        1.1   thorpej 	}
   1285        1.1   thorpej 	/*
   1286        1.1   thorpej 	 * create the p2f table.
   1287        1.1   thorpej 	 * (shift is used to keep the precision)
   1288        1.1   thorpej 	 */
   1289        1.1   thorpej 	for (i = 1; i < BRTT_SIZE; i++) {
   1290        1.1   thorpej 		int f;
   1291        1.1   thorpej 
   1292        1.1   thorpej 		f = brtt_tab[i] << 8;
   1293        1.1   thorpej 		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
   1294        1.1   thorpej 	}
   1295        1.1   thorpej 
   1296        1.1   thorpej 	return (fv);
   1297        1.1   thorpej }
   1298        1.1   thorpej 
   1299       1.21     peter static void
   1300       1.21     peter fv_destroy(struct flowvalve *fv)
   1301        1.1   thorpej {
   1302       1.15  christos 	free(fv->fv_p2ftab, M_DEVBUF);
   1303       1.15  christos 	free(fv->fv_fves, M_DEVBUF);
   1304       1.15  christos 	free(fv, M_DEVBUF);
   1305        1.1   thorpej }
   1306        1.1   thorpej 
   1307       1.13     perry static inline int
   1308       1.21     peter fv_p2f(struct flowvalve *fv, int p)
   1309        1.1   thorpej {
   1310        1.1   thorpej 	int val, f;
   1311        1.1   thorpej 
   1312        1.1   thorpej 	if (p >= BRTT_PMAX)
   1313        1.1   thorpej 		f = fv->fv_p2ftab[BRTT_SIZE-1];
   1314        1.1   thorpej 	else if ((val = (p & BRTT_MASK)))
   1315        1.1   thorpej 		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
   1316        1.1   thorpej 	else
   1317        1.1   thorpej 		f = fv->fv_p2ftab[1];
   1318        1.1   thorpej 	return (f);
   1319        1.1   thorpej }
   1320        1.1   thorpej 
   1321        1.1   thorpej /*
   1322        1.1   thorpej  * check if an arriving packet should be pre-dropped.
   1323        1.1   thorpej  * called from red_addq() when a packet arrives.
   1324        1.1   thorpej  * returns 1 when the packet should be pre-dropped.
   1325        1.3   thorpej  * should be called in splnet.
   1326        1.1   thorpej  */
   1327        1.1   thorpej static int
   1328       1.21     peter fv_checkflow(struct flowvalve *fv, struct altq_pktattr *pktattr,
   1329       1.21     peter     struct fve **fcache)
   1330        1.1   thorpej {
   1331        1.1   thorpej 	struct fve *fve;
   1332        1.1   thorpej 	struct timeval now;
   1333        1.1   thorpej 
   1334        1.1   thorpej 	fv->fv_ifseq++;
   1335        1.1   thorpej 	FV_TIMESTAMP(&now);
   1336        1.1   thorpej 
   1337        1.1   thorpej 	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
   1338        1.1   thorpej 		/* no matching entry in the flowlist */
   1339        1.1   thorpej 		return (0);
   1340        1.1   thorpej 
   1341        1.1   thorpej 	*fcache = fve;
   1342        1.1   thorpej 
   1343        1.1   thorpej 	/* update fraction f for every FV_N packets */
   1344        1.1   thorpej 	if (++fve->fve_count == FV_N) {
   1345        1.1   thorpej 		/*
   1346        1.1   thorpej 		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
   1347        1.1   thorpej 		 */
   1348        1.1   thorpej 		fve->fve_f =
   1349        1.1   thorpej 			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
   1350        1.1   thorpej 			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
   1351        1.1   thorpej 		fve->fve_ifseq = fv->fv_ifseq;
   1352        1.1   thorpej 		fve->fve_count = 0;
   1353        1.1   thorpej 	}
   1354        1.1   thorpej 
   1355        1.1   thorpej 	/*
   1356        1.1   thorpej 	 * overpumping test
   1357        1.1   thorpej 	 */
   1358        1.1   thorpej 	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
   1359        1.1   thorpej 		int fthresh;
   1360        1.1   thorpej 
   1361        1.1   thorpej 		/* calculate a threshold */
   1362        1.1   thorpej 		fthresh = fv_p2f(fv, fve->fve_p);
   1363        1.1   thorpej 		if (fve->fve_f > fthresh)
   1364        1.1   thorpej 			fve->fve_state = Red;
   1365        1.1   thorpej 	}
   1366        1.1   thorpej 
   1367        1.1   thorpej 	if (fve->fve_state == Red) {
   1368        1.1   thorpej 		/*
   1369        1.1   thorpej 		 * backoff test
   1370        1.1   thorpej 		 */
   1371        1.1   thorpej 		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
   1372        1.1   thorpej 			/* no drop for at least FV_BACKOFFTHRESH sec */
   1373        1.1   thorpej 			fve->fve_p = 0;
   1374        1.1   thorpej 			fve->fve_state = Green;
   1375        1.1   thorpej #ifdef FV_STATS
   1376        1.1   thorpej 			fv->fv_stats.escape++;
   1377        1.1   thorpej #endif
   1378        1.1   thorpej 		} else {
   1379        1.1   thorpej 			/* block this flow */
   1380        1.1   thorpej 			flowlist_move_to_head(fv, fve);
   1381        1.1   thorpej 			fve->fve_lastdrop = now;
   1382        1.1   thorpej #ifdef FV_STATS
   1383        1.1   thorpej 			fv->fv_stats.predrop++;
   1384        1.1   thorpej #endif
   1385        1.1   thorpej 			return (1);
   1386        1.1   thorpej 		}
   1387        1.1   thorpej 	}
   1388        1.1   thorpej 
   1389        1.1   thorpej 	/*
   1390        1.1   thorpej 	 * p = (1 - Wp) * p
   1391        1.1   thorpej 	 */
   1392        1.1   thorpej 	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
   1393        1.1   thorpej 	if (fve->fve_p < 0)
   1394        1.1   thorpej 		fve->fve_p = 0;
   1395        1.1   thorpej #ifdef FV_STATS
   1396        1.1   thorpej 	fv->fv_stats.pass++;
   1397        1.1   thorpej #endif
   1398        1.1   thorpej 	return (0);
   1399        1.1   thorpej }
   1400        1.1   thorpej 
   1401        1.1   thorpej /*
   1402        1.1   thorpej  * called from red_addq when a packet is dropped by red.
   1403        1.3   thorpej  * should be called in splnet.
   1404        1.1   thorpej  */
   1405       1.21     peter static void
   1406       1.21     peter fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *pktattr,
   1407       1.21     peter     struct fve *fcache)
   1408        1.1   thorpej {
   1409        1.1   thorpej 	struct fve *fve;
   1410        1.1   thorpej 	struct timeval now;
   1411        1.1   thorpej 
   1412        1.1   thorpej 	if (pktattr == NULL)
   1413        1.1   thorpej 		return;
   1414        1.1   thorpej 	FV_TIMESTAMP(&now);
   1415        1.1   thorpej 
   1416        1.1   thorpej 	if (fcache != NULL)
   1417        1.1   thorpej 		/* the fve of this packet is already cached */
   1418        1.1   thorpej 		fve = fcache;
   1419        1.1   thorpej 	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
   1420        1.1   thorpej 		fve = flowlist_reclaim(fv, pktattr);
   1421        1.1   thorpej 
   1422        1.1   thorpej 	flowlist_move_to_head(fv, fve);
   1423        1.1   thorpej 
   1424        1.1   thorpej 	/*
   1425        1.1   thorpej 	 * update p:  the following line cancels the update
   1426        1.1   thorpej 	 *	      in fv_checkflow() and calculate
   1427        1.1   thorpej 	 *	p = Wp + (1 - Wp) * p
   1428        1.1   thorpej 	 */
   1429        1.1   thorpej 	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
   1430        1.1   thorpej 
   1431        1.1   thorpej 	fve->fve_lastdrop = now;
   1432        1.1   thorpej }
   1433        1.1   thorpej 
   1434        1.1   thorpej #endif /* ALTQ_FLOWVALVE */
   1435        1.1   thorpej 
   1436        1.1   thorpej #ifdef KLD_MODULE
   1437        1.1   thorpej 
   1438        1.1   thorpej static struct altqsw red_sw =
   1439        1.1   thorpej 	{"red", redopen, redclose, redioctl};
   1440        1.1   thorpej 
   1441        1.1   thorpej ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
   1442       1.21     peter MODULE_VERSION(altq_red, 1);
   1443        1.1   thorpej 
   1444        1.1   thorpej #endif /* KLD_MODULE */
   1445       1.21     peter #endif /* ALTQ3_COMPAT */
   1446        1.1   thorpej 
   1447        1.1   thorpej #endif /* ALTQ_RED */
   1448