Home | History | Annotate | Line # | Download | only in altq
altq_red.c revision 1.13.12.2
      1  1.13.12.2     peter /*	$NetBSD: altq_red.c,v 1.13.12.2 2006/03/18 18:18:53 peter Exp $	*/
      2  1.13.12.1     peter /*	$KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $	*/
      3        1.1   thorpej 
      4        1.1   thorpej /*
      5  1.13.12.1     peter  * Copyright (C) 1997-2003
      6        1.1   thorpej  *	Sony Computer Science Laboratories Inc.  All rights reserved.
      7        1.1   thorpej  *
      8        1.1   thorpej  * Redistribution and use in source and binary forms, with or without
      9        1.1   thorpej  * modification, are permitted provided that the following conditions
     10        1.1   thorpej  * are met:
     11        1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     12        1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     13        1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     14        1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     15        1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     16        1.1   thorpej  *
     17        1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
     18        1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19        1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20        1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
     21        1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22        1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23        1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24        1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25        1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26        1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27        1.1   thorpej  * SUCH DAMAGE.
     28        1.1   thorpej  *
     29        1.1   thorpej  */
     30        1.1   thorpej /*
     31        1.1   thorpej  * Copyright (c) 1990-1994 Regents of the University of California.
     32        1.1   thorpej  * All rights reserved.
     33        1.1   thorpej  *
     34        1.1   thorpej  * Redistribution and use in source and binary forms, with or without
     35        1.1   thorpej  * modification, are permitted provided that the following conditions
     36        1.1   thorpej  * are met:
     37        1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     38        1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     39        1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     40        1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     41        1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     42        1.1   thorpej  * 3. All advertising materials mentioning features or use of this software
     43        1.1   thorpej  *    must display the following acknowledgement:
     44        1.1   thorpej  *	This product includes software developed by the Computer Systems
     45        1.1   thorpej  *	Engineering Group at Lawrence Berkeley Laboratory.
     46        1.1   thorpej  * 4. Neither the name of the University nor of the Laboratory may be used
     47        1.1   thorpej  *    to endorse or promote products derived from this software without
     48        1.1   thorpej  *    specific prior written permission.
     49        1.1   thorpej  *
     50        1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     51        1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     52        1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     53        1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     54        1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     55        1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     56        1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     57        1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     58        1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     59        1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     60        1.1   thorpej  * SUCH DAMAGE.
     61        1.1   thorpej  */
     62        1.5     lukem 
     63        1.5     lukem #include <sys/cdefs.h>
     64  1.13.12.2     peter __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.13.12.2 2006/03/18 18:18:53 peter Exp $");
     65        1.1   thorpej 
     66  1.13.12.1     peter #ifdef _KERNEL_OPT
     67        1.1   thorpej #include "opt_altq.h"
     68        1.1   thorpej #include "opt_inet.h"
     69        1.1   thorpej #endif
     70  1.13.12.1     peter 
     71        1.1   thorpej #ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
     72        1.1   thorpej 
     73        1.1   thorpej #include <sys/param.h>
     74        1.1   thorpej #include <sys/malloc.h>
     75        1.1   thorpej #include <sys/mbuf.h>
     76        1.1   thorpej #include <sys/socket.h>
     77        1.1   thorpej #include <sys/systm.h>
     78        1.1   thorpej #include <sys/errno.h>
     79  1.13.12.1     peter #if 1 /* ALTQ3_COMPAT */
     80  1.13.12.1     peter #include <sys/sockio.h>
     81  1.13.12.1     peter #include <sys/proc.h>
     82        1.1   thorpej #include <sys/kernel.h>
     83        1.1   thorpej #ifdef ALTQ_FLOWVALVE
     84        1.1   thorpej #include <sys/queue.h>
     85        1.1   thorpej #include <sys/time.h>
     86        1.1   thorpej #endif
     87  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
     88        1.1   thorpej 
     89        1.1   thorpej #include <net/if.h>
     90        1.1   thorpej 
     91        1.1   thorpej #include <netinet/in.h>
     92        1.1   thorpej #include <netinet/in_systm.h>
     93        1.1   thorpej #include <netinet/ip.h>
     94        1.1   thorpej #ifdef INET6
     95        1.1   thorpej #include <netinet/ip6.h>
     96        1.1   thorpej #endif
     97        1.1   thorpej 
     98  1.13.12.1     peter #include <net/pfvar.h>
     99        1.1   thorpej #include <altq/altq.h>
    100        1.1   thorpej #include <altq/altq_red.h>
    101  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    102  1.13.12.1     peter #include <altq/altq_conf.h>
    103        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    104        1.1   thorpej #include <altq/altq_flowvalve.h>
    105        1.1   thorpej #endif
    106  1.13.12.1     peter #endif
    107        1.1   thorpej 
    108        1.1   thorpej /*
    109        1.1   thorpej  * ALTQ/RED (Random Early Detection) implementation using 32-bit
    110        1.1   thorpej  * fixed-point calculation.
    111        1.1   thorpej  *
    112        1.1   thorpej  * written by kjc using the ns code as a reference.
    113        1.1   thorpej  * you can learn more about red and ns from Sally's home page at
    114        1.1   thorpej  * http://www-nrg.ee.lbl.gov/floyd/
    115        1.1   thorpej  *
    116        1.1   thorpej  * most of the red parameter values are fixed in this implementation
    117        1.1   thorpej  * to prevent fixed-point overflow/underflow.
    118        1.1   thorpej  * if you change the parameters, watch out for overflow/underflow!
    119        1.1   thorpej  *
    120        1.1   thorpej  * the parameters used are recommended values by Sally.
    121        1.1   thorpej  * the corresponding ns config looks:
    122        1.1   thorpej  *	q_weight=0.00195
    123        1.1   thorpej  *	minthresh=5 maxthresh=15 queue-size=60
    124        1.1   thorpej  *	linterm=30
    125        1.1   thorpej  *	dropmech=drop-tail
    126        1.1   thorpej  *	bytes=false (can't be handled by 32-bit fixed-point)
    127       1.11     perry  *	doubleq=false dqthresh=false
    128        1.1   thorpej  *	wait=true
    129        1.1   thorpej  */
    130        1.1   thorpej /*
    131        1.1   thorpej  * alternative red parameters for a slow link.
    132        1.1   thorpej  *
    133        1.1   thorpej  * assume the queue length becomes from zero to L and keeps L, it takes
    134        1.1   thorpej  * N packets for q_avg to reach 63% of L.
    135        1.1   thorpej  * when q_weight is 0.002, N is about 500 packets.
    136        1.1   thorpej  * for a slow link like dial-up, 500 packets takes more than 1 minute!
    137        1.1   thorpej  * when q_weight is 0.008, N is about 127 packets.
    138        1.1   thorpej  * when q_weight is 0.016, N is about 63 packets.
    139  1.13.12.1     peter  * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
    140        1.1   thorpej  * are allowed for 0.016.
    141        1.1   thorpej  * see Sally's paper for more details.
    142        1.1   thorpej  */
    143        1.1   thorpej /* normal red parameters */
    144        1.1   thorpej #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
    145        1.1   thorpej 				/* q_weight = 0.00195 */
    146        1.1   thorpej 
    147        1.1   thorpej /* red parameters for a slow link */
    148        1.1   thorpej #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
    149        1.1   thorpej 				/* q_weight = 0.0078125 */
    150        1.1   thorpej 
    151        1.1   thorpej /* red parameters for a very slow link (e.g., dialup) */
    152        1.1   thorpej #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
    153        1.1   thorpej 				/* q_weight = 0.015625 */
    154        1.1   thorpej 
    155        1.1   thorpej /* fixed-point uses 12-bit decimal places */
    156        1.1   thorpej #define	FP_SHIFT	12	/* fixed-point shift */
    157        1.1   thorpej 
    158        1.1   thorpej /* red parameters for drop probability */
    159        1.1   thorpej #define	INV_P_MAX	10	/* inverse of max drop probability */
    160        1.1   thorpej #define	TH_MIN		5	/* min threshold */
    161        1.1   thorpej #define	TH_MAX		15	/* max threshold */
    162        1.1   thorpej 
    163  1.13.12.1     peter #define	RED_LIMIT	60	/* default max queue lenght */
    164  1.13.12.1     peter #define	RED_STATS		/* collect statistics */
    165        1.1   thorpej 
    166        1.1   thorpej /*
    167        1.1   thorpej  * our default policy for forced-drop is drop-tail.
    168        1.1   thorpej  * (in altq-1.1.2 or earlier, the default was random-drop.
    169        1.1   thorpej  * but it makes more sense to punish the cause of the surge.)
    170        1.1   thorpej  * to switch to the random-drop policy, define "RED_RANDOM_DROP".
    171        1.1   thorpej  */
    172        1.1   thorpej 
    173  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    174        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    175        1.1   thorpej /*
    176  1.13.12.1     peter  * flow-valve is an extention to protect red from unresponsive flows
    177        1.1   thorpej  * and to promote end-to-end congestion control.
    178        1.1   thorpej  * flow-valve observes the average drop rates of the flows that have
    179        1.1   thorpej  * experienced packet drops in the recent past.
    180        1.1   thorpej  * when the average drop rate exceeds the threshold, the flow is
    181        1.1   thorpej  * blocked by the flow-valve.  the trapped flow should back off
    182        1.1   thorpej  * exponentially to escape from the flow-valve.
    183        1.1   thorpej  */
    184        1.1   thorpej #ifdef RED_RANDOM_DROP
    185        1.1   thorpej #error "random-drop can't be used with flow-valve!"
    186        1.1   thorpej #endif
    187        1.1   thorpej #endif /* ALTQ_FLOWVALVE */
    188        1.1   thorpej 
    189        1.1   thorpej /* red_list keeps all red_queue_t's allocated. */
    190        1.1   thorpej static red_queue_t *red_list = NULL;
    191        1.1   thorpej 
    192  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    193  1.13.12.1     peter 
    194        1.1   thorpej /* default red parameter values */
    195        1.1   thorpej static int default_th_min = TH_MIN;
    196        1.1   thorpej static int default_th_max = TH_MAX;
    197        1.1   thorpej static int default_inv_pmax = INV_P_MAX;
    198        1.1   thorpej 
    199  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    200        1.1   thorpej /* internal function prototypes */
    201  1.13.12.1     peter static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
    202  1.13.12.1     peter static struct mbuf *red_dequeue(struct ifaltq *, int);
    203  1.13.12.1     peter static int red_request(struct ifaltq *, int, void *);
    204  1.13.12.1     peter static void red_purgeq(red_queue_t *);
    205  1.13.12.1     peter static int red_detach(red_queue_t *);
    206        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    207  1.13.12.1     peter static inline struct fve *flowlist_lookup(struct flowvalve *,
    208  1.13.12.1     peter 			 struct altq_pktattr *, struct timeval *);
    209  1.13.12.1     peter static inline struct fve *flowlist_reclaim(struct flowvalve *,
    210  1.13.12.1     peter 					     struct altq_pktattr *);
    211  1.13.12.1     peter static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
    212  1.13.12.1     peter static inline int fv_p2f(struct flowvalve *, int);
    213  1.13.12.1     peter static struct flowvalve *fv_alloc(struct red *);
    214  1.13.12.1     peter static void fv_destroy(struct flowvalve *);
    215  1.13.12.1     peter static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
    216  1.13.12.1     peter 			struct fve **);
    217  1.13.12.1     peter static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
    218  1.13.12.1     peter 			 struct fve *);
    219        1.1   thorpej #endif
    220  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    221        1.1   thorpej 
    222        1.1   thorpej /*
    223  1.13.12.1     peter  * red support routines
    224        1.1   thorpej  */
    225  1.13.12.1     peter red_t *
    226  1.13.12.1     peter red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
    227  1.13.12.1     peter    int pkttime)
    228        1.1   thorpej {
    229  1.13.12.1     peter 	red_t	*rp;
    230  1.13.12.1     peter 	int	 w, i;
    231  1.13.12.1     peter 	int	 npkts_per_sec;
    232        1.1   thorpej 
    233  1.13.12.1     peter 	MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
    234  1.13.12.1     peter 	if (rp == NULL)
    235  1.13.12.1     peter 		return (NULL);
    236  1.13.12.1     peter 	(void)memset(rp, 0, sizeof(red_t));
    237        1.1   thorpej 
    238  1.13.12.1     peter 	rp->red_avg = 0;
    239  1.13.12.1     peter 	rp->red_idle = 1;
    240        1.1   thorpej 
    241  1.13.12.1     peter 	if (weight == 0)
    242  1.13.12.1     peter 		rp->red_weight = W_WEIGHT;
    243  1.13.12.1     peter 	else
    244  1.13.12.1     peter 		rp->red_weight = weight;
    245  1.13.12.1     peter 	if (inv_pmax == 0)
    246  1.13.12.1     peter 		rp->red_inv_pmax = default_inv_pmax;
    247  1.13.12.1     peter 	else
    248  1.13.12.1     peter 		rp->red_inv_pmax = inv_pmax;
    249  1.13.12.1     peter 	if (th_min == 0)
    250  1.13.12.1     peter 		rp->red_thmin = default_th_min;
    251  1.13.12.1     peter 	else
    252  1.13.12.1     peter 		rp->red_thmin = th_min;
    253  1.13.12.1     peter 	if (th_max == 0)
    254  1.13.12.1     peter 		rp->red_thmax = default_th_max;
    255  1.13.12.1     peter 	else
    256  1.13.12.1     peter 		rp->red_thmax = th_max;
    257        1.1   thorpej 
    258  1.13.12.1     peter 	rp->red_flags = flags;
    259       1.11     perry 
    260  1.13.12.1     peter 	if (pkttime == 0)
    261  1.13.12.1     peter 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
    262  1.13.12.1     peter 		rp->red_pkttime = 800;
    263  1.13.12.1     peter 	else
    264  1.13.12.1     peter 		rp->red_pkttime = pkttime;
    265        1.1   thorpej 
    266  1.13.12.1     peter 	if (weight == 0) {
    267  1.13.12.1     peter 		/* when the link is very slow, adjust red parameters */
    268  1.13.12.1     peter 		npkts_per_sec = 1000000 / rp->red_pkttime;
    269  1.13.12.1     peter 		if (npkts_per_sec < 50) {
    270  1.13.12.1     peter 			/* up to about 400Kbps */
    271  1.13.12.1     peter 			rp->red_weight = W_WEIGHT_2;
    272  1.13.12.1     peter 		} else if (npkts_per_sec < 300) {
    273  1.13.12.1     peter 			/* up to about 2.4Mbps */
    274  1.13.12.1     peter 			rp->red_weight = W_WEIGHT_1;
    275        1.1   thorpej 		}
    276  1.13.12.1     peter 	}
    277        1.1   thorpej 
    278  1.13.12.1     peter 	/* calculate wshift.  weight must be power of 2 */
    279  1.13.12.1     peter 	w = rp->red_weight;
    280  1.13.12.1     peter 	for (i = 0; w > 1; i++)
    281  1.13.12.1     peter 		w = w >> 1;
    282  1.13.12.1     peter 	rp->red_wshift = i;
    283  1.13.12.1     peter 	w = 1 << rp->red_wshift;
    284  1.13.12.1     peter 	if (w != rp->red_weight) {
    285  1.13.12.1     peter 		printf("invalid weight value %d for red! use %d\n",
    286  1.13.12.1     peter 		       rp->red_weight, w);
    287  1.13.12.1     peter 		rp->red_weight = w;
    288  1.13.12.1     peter 	}
    289        1.1   thorpej 
    290  1.13.12.1     peter 	/*
    291  1.13.12.1     peter 	 * thmin_s and thmax_s are scaled versions of th_min and th_max
    292  1.13.12.1     peter 	 * to be compared with avg.
    293  1.13.12.1     peter 	 */
    294  1.13.12.1     peter 	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
    295  1.13.12.1     peter 	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
    296        1.1   thorpej 
    297  1.13.12.1     peter 	/*
    298  1.13.12.1     peter 	 * precompute probability denominator
    299  1.13.12.1     peter 	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
    300  1.13.12.1     peter 	 */
    301  1.13.12.1     peter 	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
    302  1.13.12.1     peter 			 * rp->red_inv_pmax) << FP_SHIFT;
    303        1.1   thorpej 
    304  1.13.12.1     peter 	/* allocate weight table */
    305  1.13.12.1     peter 	rp->red_wtab = wtab_alloc(rp->red_weight);
    306        1.1   thorpej 
    307  1.13.12.1     peter 	microtime(&rp->red_last);
    308  1.13.12.2     peter #ifdef ALTQ3_COMPAT
    309  1.13.12.1     peter #ifdef ALTQ_FLOWVALVE
    310  1.13.12.1     peter 	if (flags & REDF_FLOWVALVE)
    311  1.13.12.1     peter 		rp->red_flowvalve = fv_alloc(rp);
    312  1.13.12.1     peter 	/* if fv_alloc failes, flowvalve is just disabled */
    313  1.13.12.1     peter #endif
    314  1.13.12.2     peter #endif /* ALTQ3_COMPAT */
    315  1.13.12.1     peter 	return (rp);
    316  1.13.12.1     peter }
    317        1.1   thorpej 
    318  1.13.12.1     peter void
    319  1.13.12.1     peter red_destroy(red_t *rp)
    320  1.13.12.1     peter {
    321  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    322  1.13.12.1     peter #ifdef ALTQ_FLOWVALVE
    323  1.13.12.1     peter 	if (rp->red_flowvalve != NULL)
    324  1.13.12.1     peter 		fv_destroy(rp->red_flowvalve);
    325  1.13.12.1     peter #endif
    326  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    327  1.13.12.1     peter 	wtab_destroy(rp->red_wtab);
    328  1.13.12.1     peter 	FREE(rp, M_DEVBUF);
    329  1.13.12.1     peter }
    330        1.1   thorpej 
    331  1.13.12.1     peter void
    332  1.13.12.1     peter red_getstats(red_t *rp, struct redstats *sp)
    333  1.13.12.1     peter {
    334  1.13.12.1     peter 	sp->q_avg		= rp->red_avg >> rp->red_wshift;
    335  1.13.12.1     peter 	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
    336  1.13.12.1     peter 	sp->drop_cnt		= rp->red_stats.drop_cnt;
    337  1.13.12.1     peter 	sp->drop_forced		= rp->red_stats.drop_forced;
    338  1.13.12.1     peter 	sp->drop_unforced	= rp->red_stats.drop_unforced;
    339  1.13.12.1     peter 	sp->marked_packets	= rp->red_stats.marked_packets;
    340  1.13.12.1     peter }
    341        1.1   thorpej 
    342  1.13.12.1     peter int
    343  1.13.12.1     peter red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
    344  1.13.12.1     peter     struct altq_pktattr *pktattr)
    345  1.13.12.1     peter {
    346  1.13.12.1     peter 	int avg, droptype;
    347  1.13.12.1     peter 	int n;
    348  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    349  1.13.12.1     peter #ifdef ALTQ_FLOWVALVE
    350  1.13.12.1     peter 	struct fve *fve = NULL;
    351        1.1   thorpej 
    352  1.13.12.1     peter 	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
    353  1.13.12.1     peter 		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
    354  1.13.12.1     peter 			m_freem(m);
    355  1.13.12.1     peter 			return (-1);
    356        1.1   thorpej 		}
    357  1.13.12.1     peter #endif
    358  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    359        1.1   thorpej 
    360  1.13.12.1     peter 	avg = rp->red_avg;
    361        1.1   thorpej 
    362  1.13.12.1     peter 	/*
    363  1.13.12.1     peter 	 * if we were idle, we pretend that n packets arrived during
    364  1.13.12.1     peter 	 * the idle period.
    365  1.13.12.1     peter 	 */
    366  1.13.12.1     peter 	if (rp->red_idle) {
    367  1.13.12.1     peter 		struct timeval now;
    368  1.13.12.1     peter 		int t;
    369        1.1   thorpej 
    370  1.13.12.1     peter 		rp->red_idle = 0;
    371  1.13.12.1     peter 		microtime(&now);
    372  1.13.12.1     peter 		t = (now.tv_sec - rp->red_last.tv_sec);
    373  1.13.12.1     peter 		if (t > 60) {
    374  1.13.12.1     peter 			/*
    375  1.13.12.1     peter 			 * being idle for more than 1 minute, set avg to zero.
    376  1.13.12.1     peter 			 * this prevents t from overflow.
    377  1.13.12.1     peter 			 */
    378  1.13.12.1     peter 			avg = 0;
    379  1.13.12.1     peter 		} else {
    380  1.13.12.1     peter 			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
    381  1.13.12.1     peter 			n = t / rp->red_pkttime - 1;
    382        1.1   thorpej 
    383  1.13.12.1     peter 			/* the following line does (avg = (1 - Wq)^n * avg) */
    384  1.13.12.1     peter 			if (n > 0)
    385  1.13.12.1     peter 				avg = (avg >> FP_SHIFT) *
    386  1.13.12.1     peter 				    pow_w(rp->red_wtab, n);
    387  1.13.12.1     peter 		}
    388  1.13.12.1     peter 	}
    389        1.1   thorpej 
    390  1.13.12.1     peter 	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
    391  1.13.12.1     peter 	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
    392  1.13.12.1     peter 	rp->red_avg = avg;		/* save the new value */
    393        1.1   thorpej 
    394  1.13.12.1     peter 	/*
    395  1.13.12.1     peter 	 * red_count keeps a tally of arriving traffic that has not
    396  1.13.12.1     peter 	 * been dropped.
    397  1.13.12.1     peter 	 */
    398  1.13.12.1     peter 	rp->red_count++;
    399        1.1   thorpej 
    400  1.13.12.1     peter 	/* see if we drop early */
    401  1.13.12.1     peter 	droptype = DTYPE_NODROP;
    402  1.13.12.1     peter 	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
    403  1.13.12.1     peter 		if (avg >= rp->red_thmax_s) {
    404  1.13.12.1     peter 			/* avg >= th_max: forced drop */
    405  1.13.12.1     peter 			droptype = DTYPE_FORCED;
    406  1.13.12.1     peter 		} else if (rp->red_old == 0) {
    407  1.13.12.1     peter 			/* first exceeds th_min */
    408  1.13.12.1     peter 			rp->red_count = 1;
    409  1.13.12.1     peter 			rp->red_old = 1;
    410  1.13.12.1     peter 		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
    411  1.13.12.1     peter 				      rp->red_probd, rp->red_count)) {
    412  1.13.12.1     peter 			/* mark or drop by red */
    413  1.13.12.1     peter 			if ((rp->red_flags & REDF_ECN) &&
    414  1.13.12.1     peter 			    mark_ecn(m, pktattr, rp->red_flags)) {
    415  1.13.12.1     peter 				/* successfully marked.  do not drop. */
    416  1.13.12.1     peter 				rp->red_count = 0;
    417  1.13.12.1     peter #ifdef RED_STATS
    418  1.13.12.1     peter 				rp->red_stats.marked_packets++;
    419  1.13.12.1     peter #endif
    420  1.13.12.1     peter 			} else {
    421  1.13.12.1     peter 				/* unforced drop by red */
    422  1.13.12.1     peter 				droptype = DTYPE_EARLY;
    423        1.1   thorpej 			}
    424  1.13.12.1     peter 		}
    425  1.13.12.1     peter 	} else {
    426  1.13.12.1     peter 		/* avg < th_min */
    427  1.13.12.1     peter 		rp->red_old = 0;
    428  1.13.12.1     peter 	}
    429        1.1   thorpej 
    430  1.13.12.1     peter 	/*
    431  1.13.12.1     peter 	 * if the queue length hits the hard limit, it's a forced drop.
    432  1.13.12.1     peter 	 */
    433  1.13.12.1     peter 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
    434  1.13.12.1     peter 		droptype = DTYPE_FORCED;
    435       1.11     perry 
    436  1.13.12.1     peter #ifdef RED_RANDOM_DROP
    437  1.13.12.1     peter 	/* if successful or forced drop, enqueue this packet. */
    438  1.13.12.1     peter 	if (droptype != DTYPE_EARLY)
    439  1.13.12.1     peter 		_addq(q, m);
    440  1.13.12.1     peter #else
    441  1.13.12.1     peter 	/* if successful, enqueue this packet. */
    442  1.13.12.1     peter 	if (droptype == DTYPE_NODROP)
    443  1.13.12.1     peter 		_addq(q, m);
    444  1.13.12.1     peter #endif
    445  1.13.12.1     peter 	if (droptype != DTYPE_NODROP) {
    446  1.13.12.1     peter 		if (droptype == DTYPE_EARLY) {
    447  1.13.12.1     peter 			/* drop the incoming packet */
    448  1.13.12.1     peter #ifdef RED_STATS
    449  1.13.12.1     peter 			rp->red_stats.drop_unforced++;
    450  1.13.12.1     peter #endif
    451  1.13.12.1     peter 		} else {
    452  1.13.12.1     peter 			/* forced drop, select a victim packet in the queue. */
    453  1.13.12.1     peter #ifdef RED_RANDOM_DROP
    454  1.13.12.1     peter 			m = _getq_random(q);
    455  1.13.12.1     peter #endif
    456  1.13.12.1     peter #ifdef RED_STATS
    457  1.13.12.1     peter 			rp->red_stats.drop_forced++;
    458  1.13.12.1     peter #endif
    459  1.13.12.1     peter 		}
    460  1.13.12.1     peter #ifdef RED_STATS
    461  1.13.12.1     peter 		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
    462  1.13.12.1     peter #endif
    463  1.13.12.1     peter 		rp->red_count = 0;
    464  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    465  1.13.12.1     peter #ifdef ALTQ_FLOWVALVE
    466  1.13.12.1     peter 		if (rp->red_flowvalve != NULL)
    467  1.13.12.1     peter 			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
    468  1.13.12.1     peter #endif
    469  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    470  1.13.12.1     peter 		m_freem(m);
    471  1.13.12.1     peter 		return (-1);
    472        1.1   thorpej 	}
    473  1.13.12.1     peter 	/* successfully queued */
    474  1.13.12.1     peter #ifdef RED_STATS
    475  1.13.12.1     peter 	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
    476  1.13.12.1     peter #endif
    477  1.13.12.1     peter 	return (0);
    478        1.1   thorpej }
    479        1.1   thorpej 
    480  1.13.12.1     peter /*
    481  1.13.12.1     peter  * early-drop probability is calculated as follows:
    482  1.13.12.1     peter  *   prob = p_max * (avg - th_min) / (th_max - th_min)
    483  1.13.12.1     peter  *   prob_a = prob / (2 - count*prob)
    484  1.13.12.1     peter  *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
    485  1.13.12.1     peter  * here prob_a increases as successive undrop count increases.
    486  1.13.12.1     peter  * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
    487  1.13.12.1     peter  * becomes 1 when (count >= (2 / prob))).
    488  1.13.12.1     peter  */
    489  1.13.12.1     peter int
    490  1.13.12.1     peter drop_early(int fp_len, int fp_probd, int count)
    491        1.1   thorpej {
    492  1.13.12.1     peter 	int	d;		/* denominator of drop-probability */
    493        1.1   thorpej 
    494  1.13.12.1     peter 	d = fp_probd - count * fp_len;
    495  1.13.12.1     peter 	if (d <= 0)
    496  1.13.12.1     peter 		/* count exceeds the hard limit: drop or mark */
    497  1.13.12.1     peter 		return (1);
    498        1.1   thorpej 
    499  1.13.12.1     peter 	/*
    500  1.13.12.1     peter 	 * now the range of d is [1..600] in fixed-point. (when
    501  1.13.12.1     peter 	 * th_max-th_min=10 and p_max=1/30)
    502  1.13.12.1     peter 	 * drop probability = (avg - TH_MIN) / d
    503  1.13.12.1     peter 	 */
    504        1.1   thorpej 
    505  1.13.12.1     peter 	if ((arc4random() % d) < fp_len) {
    506  1.13.12.1     peter 		/* drop or mark */
    507  1.13.12.1     peter 		return (1);
    508        1.1   thorpej 	}
    509  1.13.12.1     peter 	/* no drop/mark */
    510  1.13.12.1     peter 	return (0);
    511        1.1   thorpej }
    512        1.1   thorpej 
    513        1.1   thorpej /*
    514  1.13.12.1     peter  * try to mark CE bit to the packet.
    515  1.13.12.1     peter  *    returns 1 if successfully marked, 0 otherwise.
    516        1.1   thorpej  */
    517  1.13.12.1     peter int
    518  1.13.12.1     peter mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
    519  1.13.12.1     peter {
    520  1.13.12.1     peter 	struct mbuf	*m0;
    521  1.13.12.1     peter 	struct m_tag	*t;
    522  1.13.12.1     peter 	struct altq_tag	*at;
    523  1.13.12.1     peter 	void		*hdr;
    524  1.13.12.1     peter 	int		 af;
    525  1.13.12.1     peter 
    526  1.13.12.1     peter 	t = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
    527  1.13.12.1     peter 	if (t != NULL) {
    528  1.13.12.1     peter 		at = (struct altq_tag *)(t + 1);
    529  1.13.12.1     peter 		if (at == NULL)
    530  1.13.12.1     peter 			return (0);
    531  1.13.12.1     peter 		af = at->af;
    532  1.13.12.1     peter 		hdr = at->hdr;
    533  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    534  1.13.12.1     peter 	} else if (pktattr != NULL) {
    535  1.13.12.1     peter 		af = pktattr->pattr_af;
    536  1.13.12.1     peter 		hdr = pktattr->pattr_hdr;
    537  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    538  1.13.12.1     peter 	} else
    539  1.13.12.1     peter 		return (0);
    540        1.1   thorpej 
    541  1.13.12.1     peter 	if (af != AF_INET && af != AF_INET6)
    542  1.13.12.1     peter 		return (0);
    543       1.11     perry 
    544  1.13.12.1     peter 	/* verify that pattr_hdr is within the mbuf data */
    545  1.13.12.1     peter 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
    546  1.13.12.1     peter 		if (((caddr_t)hdr >= m0->m_data) &&
    547  1.13.12.1     peter 		    ((caddr_t)hdr < m0->m_data + m0->m_len))
    548  1.13.12.1     peter 			break;
    549  1.13.12.1     peter 	if (m0 == NULL) {
    550  1.13.12.1     peter 		/* ick, tag info is stale */
    551  1.13.12.1     peter 		return (0);
    552  1.13.12.1     peter 	}
    553        1.1   thorpej 
    554  1.13.12.1     peter 	switch (af) {
    555  1.13.12.1     peter 	case AF_INET:
    556  1.13.12.1     peter 		if (flags & REDF_ECN4) {
    557  1.13.12.1     peter 			struct ip *ip = hdr;
    558  1.13.12.1     peter 			u_int8_t otos;
    559  1.13.12.1     peter 			int sum;
    560        1.1   thorpej 
    561  1.13.12.1     peter 			if (ip->ip_v != 4)
    562  1.13.12.1     peter 				return (0);	/* version mismatch! */
    563        1.1   thorpej 
    564  1.13.12.1     peter 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
    565  1.13.12.1     peter 				return (0);	/* not-ECT */
    566  1.13.12.1     peter 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
    567  1.13.12.1     peter 				return (1);	/* already marked */
    568        1.1   thorpej 
    569  1.13.12.1     peter 			/*
    570  1.13.12.1     peter 			 * ecn-capable but not marked,
    571  1.13.12.1     peter 			 * mark CE and update checksum
    572  1.13.12.1     peter 			 */
    573  1.13.12.1     peter 			otos = ip->ip_tos;
    574  1.13.12.1     peter 			ip->ip_tos |= IPTOS_ECN_CE;
    575  1.13.12.1     peter 			/*
    576  1.13.12.1     peter 			 * update checksum (from RFC1624)
    577  1.13.12.1     peter 			 *	   HC' = ~(~HC + ~m + m')
    578  1.13.12.1     peter 			 */
    579  1.13.12.1     peter 			sum = ~ntohs(ip->ip_sum) & 0xffff;
    580  1.13.12.1     peter 			sum += (~otos & 0xffff) + ip->ip_tos;
    581  1.13.12.1     peter 			sum = (sum >> 16) + (sum & 0xffff);
    582  1.13.12.1     peter 			sum += (sum >> 16);  /* add carry */
    583  1.13.12.1     peter 			ip->ip_sum = htons(~sum & 0xffff);
    584  1.13.12.1     peter 			return (1);
    585        1.1   thorpej 		}
    586  1.13.12.1     peter 		break;
    587  1.13.12.1     peter #ifdef INET6
    588  1.13.12.1     peter 	case AF_INET6:
    589  1.13.12.1     peter 		if (flags & REDF_ECN6) {
    590  1.13.12.1     peter 			struct ip6_hdr *ip6 = hdr;
    591  1.13.12.1     peter 			u_int32_t flowlabel;
    592        1.1   thorpej 
    593  1.13.12.1     peter 			flowlabel = ntohl(ip6->ip6_flow);
    594  1.13.12.1     peter 			if ((flowlabel >> 28) != 6)
    595  1.13.12.1     peter 				return (0);	/* version mismatch! */
    596  1.13.12.1     peter 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
    597  1.13.12.1     peter 			    (IPTOS_ECN_NOTECT << 20))
    598  1.13.12.1     peter 				return (0);	/* not-ECT */
    599  1.13.12.1     peter 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
    600  1.13.12.1     peter 			    (IPTOS_ECN_CE << 20))
    601  1.13.12.1     peter 				return (1);	/* already marked */
    602  1.13.12.1     peter 			/*
    603  1.13.12.1     peter 			 * ecn-capable but not marked,  mark CE
    604  1.13.12.1     peter 			 */
    605  1.13.12.1     peter 			flowlabel |= (IPTOS_ECN_CE << 20);
    606  1.13.12.1     peter 			ip6->ip6_flow = htonl(flowlabel);
    607  1.13.12.1     peter 			return (1);
    608  1.13.12.1     peter 		}
    609  1.13.12.1     peter 		break;
    610  1.13.12.1     peter #endif  /* INET6 */
    611        1.1   thorpej 	}
    612       1.11     perry 
    613  1.13.12.1     peter 	/* not marked */
    614  1.13.12.1     peter 	return (0);
    615        1.1   thorpej }
    616        1.1   thorpej 
    617  1.13.12.1     peter struct mbuf *
    618  1.13.12.1     peter red_getq(rp, q)
    619        1.1   thorpej 	red_t *rp;
    620  1.13.12.1     peter 	class_queue_t *q;
    621        1.1   thorpej {
    622  1.13.12.1     peter 	struct mbuf *m;
    623        1.1   thorpej 
    624  1.13.12.1     peter 	if ((m = _getq(q)) == NULL) {
    625  1.13.12.1     peter 		if (rp->red_idle == 0) {
    626  1.13.12.1     peter 			rp->red_idle = 1;
    627  1.13.12.1     peter 			microtime(&rp->red_last);
    628  1.13.12.1     peter 		}
    629  1.13.12.1     peter 		return NULL;
    630  1.13.12.1     peter 	}
    631  1.13.12.1     peter 
    632  1.13.12.1     peter 	rp->red_idle = 0;
    633  1.13.12.1     peter 	return (m);
    634        1.1   thorpej }
    635        1.1   thorpej 
    636        1.1   thorpej /*
    637  1.13.12.1     peter  * helper routine to calibrate avg during idle.
    638  1.13.12.1     peter  * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
    639  1.13.12.1     peter  * here Wq = 1/weight and the code assumes Wq is close to zero.
    640        1.1   thorpej  *
    641  1.13.12.1     peter  * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
    642        1.1   thorpej  */
    643  1.13.12.1     peter static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
    644  1.13.12.1     peter 
    645  1.13.12.1     peter struct wtab *
    646  1.13.12.1     peter wtab_alloc(int weight)
    647        1.1   thorpej {
    648  1.13.12.1     peter 	struct wtab	*w;
    649  1.13.12.1     peter 	int		 i;
    650        1.1   thorpej 
    651  1.13.12.1     peter 	for (w = wtab_list; w != NULL; w = w->w_next)
    652  1.13.12.1     peter 		if (w->w_weight == weight) {
    653  1.13.12.1     peter 			w->w_refcount++;
    654  1.13.12.1     peter 			return (w);
    655  1.13.12.1     peter 		}
    656  1.13.12.1     peter 
    657  1.13.12.1     peter 	MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
    658  1.13.12.1     peter 	if (w == NULL)
    659  1.13.12.1     peter 		panic("wtab_alloc: malloc failed!");
    660  1.13.12.1     peter 	(void)memset(w, 0, sizeof(struct wtab));
    661  1.13.12.1     peter 	w->w_weight = weight;
    662  1.13.12.1     peter 	w->w_refcount = 1;
    663  1.13.12.1     peter 	w->w_next = wtab_list;
    664  1.13.12.1     peter 	wtab_list = w;
    665  1.13.12.1     peter 
    666  1.13.12.1     peter 	/* initialize the weight table */
    667  1.13.12.1     peter 	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
    668  1.13.12.1     peter 	for (i = 1; i < 32; i++) {
    669  1.13.12.1     peter 		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
    670  1.13.12.1     peter 		if (w->w_tab[i] == 0 && w->w_param_max == 0)
    671  1.13.12.1     peter 			w->w_param_max = 1 << i;
    672  1.13.12.1     peter 	}
    673  1.13.12.1     peter 
    674  1.13.12.1     peter 	return (w);
    675        1.1   thorpej }
    676        1.1   thorpej 
    677        1.1   thorpej int
    678  1.13.12.1     peter wtab_destroy(struct wtab *w)
    679        1.1   thorpej {
    680  1.13.12.1     peter 	struct wtab	*prev;
    681        1.1   thorpej 
    682  1.13.12.1     peter 	if (--w->w_refcount > 0)
    683  1.13.12.1     peter 		return (0);
    684  1.13.12.1     peter 
    685  1.13.12.1     peter 	if (wtab_list == w)
    686  1.13.12.1     peter 		wtab_list = w->w_next;
    687  1.13.12.1     peter 	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
    688  1.13.12.1     peter 		if (prev->w_next == w) {
    689  1.13.12.1     peter 			prev->w_next = w->w_next;
    690  1.13.12.1     peter 			break;
    691        1.1   thorpej 		}
    692        1.1   thorpej 
    693  1.13.12.1     peter 	FREE(w, M_DEVBUF);
    694  1.13.12.1     peter 	return (0);
    695  1.13.12.1     peter }
    696        1.1   thorpej 
    697  1.13.12.1     peter int32_t
    698  1.13.12.1     peter pow_w(struct wtab *w, int n)
    699  1.13.12.1     peter {
    700  1.13.12.1     peter 	int	i, bit;
    701  1.13.12.1     peter 	int32_t	val;
    702        1.1   thorpej 
    703  1.13.12.1     peter 	if (n >= w->w_param_max)
    704  1.13.12.1     peter 		return (0);
    705        1.1   thorpej 
    706  1.13.12.1     peter 	val = 1 << FP_SHIFT;
    707  1.13.12.1     peter 	if (n <= 0)
    708  1.13.12.1     peter 		return (val);
    709  1.13.12.1     peter 
    710  1.13.12.1     peter 	bit = 1;
    711  1.13.12.1     peter 	i = 0;
    712  1.13.12.1     peter 	while (n) {
    713  1.13.12.1     peter 		if (n & bit) {
    714  1.13.12.1     peter 			val = (val * w->w_tab[i]) >> FP_SHIFT;
    715  1.13.12.1     peter 			n &= ~bit;
    716        1.1   thorpej 		}
    717  1.13.12.1     peter 		i++;
    718  1.13.12.1     peter 		bit <<=  1;
    719        1.1   thorpej 	}
    720  1.13.12.1     peter 	return (val);
    721  1.13.12.1     peter }
    722        1.1   thorpej 
    723  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    724  1.13.12.1     peter /*
    725  1.13.12.1     peter  * red device interface
    726  1.13.12.1     peter  */
    727  1.13.12.1     peter altqdev_decl(red);
    728        1.1   thorpej 
    729  1.13.12.1     peter int
    730  1.13.12.1     peter redopen(dev, flag, fmt, l)
    731  1.13.12.1     peter 	dev_t dev;
    732  1.13.12.1     peter 	int flag, fmt;
    733  1.13.12.1     peter 	struct lwp *l;
    734  1.13.12.1     peter {
    735  1.13.12.1     peter 	/* everything will be done when the queueing scheme is attached. */
    736  1.13.12.1     peter 	return 0;
    737  1.13.12.1     peter }
    738  1.13.12.1     peter 
    739  1.13.12.1     peter int
    740  1.13.12.1     peter redclose(dev, flag, fmt, l)
    741  1.13.12.1     peter 	dev_t dev;
    742  1.13.12.1     peter 	int flag, fmt;
    743  1.13.12.1     peter 	struct lwp *l;
    744  1.13.12.1     peter {
    745  1.13.12.1     peter 	red_queue_t *rqp;
    746  1.13.12.1     peter 	int err, error = 0;
    747  1.13.12.1     peter 
    748  1.13.12.1     peter 	while ((rqp = red_list) != NULL) {
    749  1.13.12.1     peter 		/* destroy all */
    750  1.13.12.1     peter 		err = red_detach(rqp);
    751  1.13.12.1     peter 		if (err != 0 && error == 0)
    752  1.13.12.1     peter 			error = err;
    753        1.1   thorpej 	}
    754        1.1   thorpej 
    755  1.13.12.1     peter 	return error;
    756  1.13.12.1     peter }
    757        1.1   thorpej 
    758  1.13.12.1     peter int
    759  1.13.12.1     peter redioctl(dev, cmd, addr, flag, l)
    760  1.13.12.1     peter 	dev_t dev;
    761  1.13.12.1     peter 	ioctlcmd_t cmd;
    762  1.13.12.1     peter 	caddr_t addr;
    763  1.13.12.1     peter 	int flag;
    764  1.13.12.1     peter 	struct lwp *l;
    765  1.13.12.1     peter {
    766  1.13.12.1     peter 	red_queue_t *rqp;
    767  1.13.12.1     peter 	struct red_interface *ifacep;
    768  1.13.12.1     peter 	struct ifnet *ifp;
    769  1.13.12.1     peter 	struct proc *p = l->l_proc;
    770  1.13.12.1     peter 	int	error = 0;
    771  1.13.12.1     peter 
    772  1.13.12.1     peter 	/* check super-user privilege */
    773  1.13.12.1     peter 	switch (cmd) {
    774  1.13.12.1     peter 	case RED_GETSTATS:
    775  1.13.12.1     peter 		break;
    776  1.13.12.1     peter 	default:
    777  1.13.12.1     peter #if (__FreeBSD_version > 400000)
    778  1.13.12.1     peter 		if ((error = suser(p)) != 0)
    779        1.1   thorpej #else
    780  1.13.12.1     peter 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    781        1.1   thorpej #endif
    782  1.13.12.1     peter 			return (error);
    783  1.13.12.1     peter 		break;
    784  1.13.12.1     peter 	}
    785  1.13.12.1     peter 
    786  1.13.12.1     peter 	switch (cmd) {
    787  1.13.12.1     peter 
    788  1.13.12.1     peter 	case RED_ENABLE:
    789  1.13.12.1     peter 		ifacep = (struct red_interface *)addr;
    790  1.13.12.1     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    791  1.13.12.1     peter 			error = EBADF;
    792  1.13.12.1     peter 			break;
    793        1.1   thorpej 		}
    794  1.13.12.1     peter 		error = altq_enable(rqp->rq_ifq);
    795  1.13.12.1     peter 		break;
    796  1.13.12.1     peter 
    797  1.13.12.1     peter 	case RED_DISABLE:
    798  1.13.12.1     peter 		ifacep = (struct red_interface *)addr;
    799  1.13.12.1     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    800  1.13.12.1     peter 			error = EBADF;
    801  1.13.12.1     peter 			break;
    802  1.13.12.1     peter 		}
    803  1.13.12.1     peter 		error = altq_disable(rqp->rq_ifq);
    804  1.13.12.1     peter 		break;
    805  1.13.12.1     peter 
    806  1.13.12.1     peter 	case RED_IF_ATTACH:
    807  1.13.12.1     peter 		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
    808  1.13.12.1     peter 		if (ifp == NULL) {
    809  1.13.12.1     peter 			error = ENXIO;
    810  1.13.12.1     peter 			break;
    811  1.13.12.1     peter 		}
    812  1.13.12.1     peter 
    813  1.13.12.1     peter 		/* allocate and initialize red_queue_t */
    814  1.13.12.1     peter 		MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
    815  1.13.12.1     peter 		if (rqp == NULL) {
    816  1.13.12.1     peter 			error = ENOMEM;
    817  1.13.12.1     peter 			break;
    818  1.13.12.1     peter 		}
    819  1.13.12.1     peter 		(void)memset(rqp, 0, sizeof(red_queue_t));
    820  1.13.12.1     peter 
    821  1.13.12.1     peter 		MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
    822  1.13.12.1     peter 		       M_DEVBUF, M_WAITOK);
    823  1.13.12.1     peter 		if (rqp->rq_q == NULL) {
    824  1.13.12.1     peter 			FREE(rqp, M_DEVBUF);
    825  1.13.12.1     peter 			error = ENOMEM;
    826  1.13.12.1     peter 			break;
    827  1.13.12.1     peter 		}
    828  1.13.12.1     peter 		(void)memset(rqp->rq_q, 0, sizeof(class_queue_t));
    829  1.13.12.1     peter 
    830  1.13.12.1     peter 		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
    831  1.13.12.1     peter 		if (rqp->rq_red == NULL) {
    832  1.13.12.1     peter 			FREE(rqp->rq_q, M_DEVBUF);
    833  1.13.12.1     peter 			FREE(rqp, M_DEVBUF);
    834  1.13.12.1     peter 			error = ENOMEM;
    835  1.13.12.1     peter 			break;
    836  1.13.12.1     peter 		}
    837  1.13.12.1     peter 
    838  1.13.12.1     peter 		rqp->rq_ifq = &ifp->if_snd;
    839  1.13.12.1     peter 		qtail(rqp->rq_q) = NULL;
    840  1.13.12.1     peter 		qlen(rqp->rq_q) = 0;
    841  1.13.12.1     peter 		qlimit(rqp->rq_q) = RED_LIMIT;
    842  1.13.12.1     peter 		qtype(rqp->rq_q) = Q_RED;
    843  1.13.12.1     peter 
    844  1.13.12.1     peter 		/*
    845  1.13.12.1     peter 		 * set RED to this ifnet structure.
    846  1.13.12.1     peter 		 */
    847  1.13.12.1     peter 		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
    848  1.13.12.1     peter 				    red_enqueue, red_dequeue, red_request,
    849  1.13.12.1     peter 				    NULL, NULL);
    850  1.13.12.1     peter 		if (error) {
    851  1.13.12.1     peter 			red_destroy(rqp->rq_red);
    852  1.13.12.1     peter 			FREE(rqp->rq_q, M_DEVBUF);
    853  1.13.12.1     peter 			FREE(rqp, M_DEVBUF);
    854  1.13.12.1     peter 			break;
    855  1.13.12.1     peter 		}
    856  1.13.12.1     peter 
    857  1.13.12.1     peter 		/* add this state to the red list */
    858  1.13.12.1     peter 		rqp->rq_next = red_list;
    859  1.13.12.1     peter 		red_list = rqp;
    860  1.13.12.1     peter 		break;
    861  1.13.12.1     peter 
    862  1.13.12.1     peter 	case RED_IF_DETACH:
    863  1.13.12.1     peter 		ifacep = (struct red_interface *)addr;
    864  1.13.12.1     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    865  1.13.12.1     peter 			error = EBADF;
    866  1.13.12.1     peter 			break;
    867  1.13.12.1     peter 		}
    868  1.13.12.1     peter 		error = red_detach(rqp);
    869  1.13.12.1     peter 		break;
    870  1.13.12.1     peter 
    871  1.13.12.1     peter 	case RED_GETSTATS:
    872  1.13.12.1     peter 		do {
    873  1.13.12.1     peter 			struct red_stats *q_stats;
    874  1.13.12.1     peter 			red_t *rp;
    875  1.13.12.1     peter 
    876  1.13.12.1     peter 			q_stats = (struct red_stats *)addr;
    877  1.13.12.1     peter 			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
    878  1.13.12.1     peter 					     ALTQT_RED)) == NULL) {
    879  1.13.12.1     peter 				error = EBADF;
    880  1.13.12.1     peter 				break;
    881  1.13.12.1     peter 			}
    882  1.13.12.1     peter 
    883  1.13.12.1     peter 			q_stats->q_len 	   = qlen(rqp->rq_q);
    884  1.13.12.1     peter 			q_stats->q_limit   = qlimit(rqp->rq_q);
    885  1.13.12.1     peter 
    886  1.13.12.1     peter 			rp = rqp->rq_red;
    887  1.13.12.1     peter 			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
    888  1.13.12.1     peter 			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
    889  1.13.12.1     peter 			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
    890  1.13.12.1     peter 			q_stats->drop_forced   = rp->red_stats.drop_forced;
    891  1.13.12.1     peter 			q_stats->drop_unforced = rp->red_stats.drop_unforced;
    892  1.13.12.1     peter 			q_stats->marked_packets = rp->red_stats.marked_packets;
    893  1.13.12.1     peter 
    894  1.13.12.1     peter 			q_stats->weight		= rp->red_weight;
    895  1.13.12.1     peter 			q_stats->inv_pmax	= rp->red_inv_pmax;
    896  1.13.12.1     peter 			q_stats->th_min		= rp->red_thmin;
    897  1.13.12.1     peter 			q_stats->th_max		= rp->red_thmax;
    898  1.13.12.1     peter 
    899        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    900  1.13.12.1     peter 			if (rp->red_flowvalve != NULL) {
    901  1.13.12.1     peter 				struct flowvalve *fv = rp->red_flowvalve;
    902  1.13.12.1     peter 				q_stats->fv_flows    = fv->fv_flows;
    903  1.13.12.1     peter 				q_stats->fv_pass     = fv->fv_stats.pass;
    904  1.13.12.1     peter 				q_stats->fv_predrop  = fv->fv_stats.predrop;
    905  1.13.12.1     peter 				q_stats->fv_alloc    = fv->fv_stats.alloc;
    906  1.13.12.1     peter 				q_stats->fv_escape   = fv->fv_stats.escape;
    907  1.13.12.1     peter 			} else {
    908  1.13.12.1     peter #endif /* ALTQ_FLOWVALVE */
    909  1.13.12.1     peter 				q_stats->fv_flows    = 0;
    910  1.13.12.1     peter 				q_stats->fv_pass     = 0;
    911  1.13.12.1     peter 				q_stats->fv_predrop  = 0;
    912  1.13.12.1     peter 				q_stats->fv_alloc    = 0;
    913  1.13.12.1     peter 				q_stats->fv_escape   = 0;
    914  1.13.12.1     peter #ifdef ALTQ_FLOWVALVE
    915  1.13.12.1     peter 			}
    916  1.13.12.1     peter #endif /* ALTQ_FLOWVALVE */
    917  1.13.12.1     peter 		} while (/*CONSTCOND*/ 0);
    918  1.13.12.1     peter 		break;
    919  1.13.12.1     peter 
    920  1.13.12.1     peter 	case RED_CONFIG:
    921  1.13.12.1     peter 		do {
    922  1.13.12.1     peter 			struct red_conf *fc;
    923  1.13.12.1     peter 			red_t *new;
    924  1.13.12.1     peter 			int s, limit;
    925  1.13.12.1     peter 
    926  1.13.12.1     peter 			fc = (struct red_conf *)addr;
    927  1.13.12.1     peter 			if ((rqp = altq_lookup(fc->iface.red_ifname,
    928  1.13.12.1     peter 					       ALTQT_RED)) == NULL) {
    929  1.13.12.1     peter 				error = EBADF;
    930  1.13.12.1     peter 				break;
    931  1.13.12.1     peter 			}
    932  1.13.12.1     peter 			new = red_alloc(fc->red_weight,
    933  1.13.12.1     peter 					fc->red_inv_pmax,
    934  1.13.12.1     peter 					fc->red_thmin,
    935  1.13.12.1     peter 					fc->red_thmax,
    936  1.13.12.1     peter 					fc->red_flags,
    937  1.13.12.1     peter 					fc->red_pkttime);
    938  1.13.12.1     peter 			if (new == NULL) {
    939  1.13.12.1     peter 				error = ENOMEM;
    940  1.13.12.1     peter 				break;
    941  1.13.12.1     peter 			}
    942  1.13.12.1     peter 
    943  1.13.12.1     peter 			s = splnet();
    944  1.13.12.1     peter 			red_purgeq(rqp);
    945  1.13.12.1     peter 			limit = fc->red_limit;
    946  1.13.12.1     peter 			if (limit < fc->red_thmax)
    947  1.13.12.1     peter 				limit = fc->red_thmax;
    948  1.13.12.1     peter 			qlimit(rqp->rq_q) = limit;
    949  1.13.12.1     peter 			fc->red_limit = limit;	/* write back the new value */
    950  1.13.12.1     peter 
    951  1.13.12.1     peter 			red_destroy(rqp->rq_red);
    952  1.13.12.1     peter 			rqp->rq_red = new;
    953  1.13.12.1     peter 
    954  1.13.12.1     peter 			splx(s);
    955  1.13.12.1     peter 
    956  1.13.12.1     peter 			/* write back new values */
    957  1.13.12.1     peter 			fc->red_limit = limit;
    958  1.13.12.1     peter 			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
    959  1.13.12.1     peter 			fc->red_thmin = rqp->rq_red->red_thmin;
    960  1.13.12.1     peter 			fc->red_thmax = rqp->rq_red->red_thmax;
    961  1.13.12.1     peter 
    962  1.13.12.1     peter 		} while (/*CONSTCOND*/ 0);
    963  1.13.12.1     peter 		break;
    964  1.13.12.1     peter 
    965  1.13.12.1     peter 	case RED_SETDEFAULTS:
    966  1.13.12.1     peter 		do {
    967  1.13.12.1     peter 			struct redparams *rp;
    968  1.13.12.1     peter 
    969  1.13.12.1     peter 			rp = (struct redparams *)addr;
    970  1.13.12.1     peter 
    971  1.13.12.1     peter 			default_th_min = rp->th_min;
    972  1.13.12.1     peter 			default_th_max = rp->th_max;
    973  1.13.12.1     peter 			default_inv_pmax = rp->inv_pmax;
    974  1.13.12.1     peter 		} while (/*CONSTCOND*/ 0);
    975  1.13.12.1     peter 		break;
    976  1.13.12.1     peter 
    977  1.13.12.1     peter 	default:
    978  1.13.12.1     peter 		error = EINVAL;
    979  1.13.12.1     peter 		break;
    980        1.1   thorpej 	}
    981  1.13.12.1     peter 	return error;
    982        1.1   thorpej }
    983        1.1   thorpej 
    984  1.13.12.1     peter static int
    985  1.13.12.1     peter red_detach(rqp)
    986  1.13.12.1     peter 	red_queue_t *rqp;
    987        1.1   thorpej {
    988  1.13.12.1     peter 	red_queue_t *tmp;
    989  1.13.12.1     peter 	int error = 0;
    990        1.1   thorpej 
    991  1.13.12.1     peter 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
    992  1.13.12.1     peter 		altq_disable(rqp->rq_ifq);
    993        1.1   thorpej 
    994  1.13.12.1     peter 	if ((error = altq_detach(rqp->rq_ifq)))
    995  1.13.12.1     peter 		return (error);
    996        1.1   thorpej 
    997  1.13.12.1     peter 	if (red_list == rqp)
    998  1.13.12.1     peter 		red_list = rqp->rq_next;
    999  1.13.12.1     peter 	else {
   1000  1.13.12.1     peter 		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
   1001  1.13.12.1     peter 			if (tmp->rq_next == rqp) {
   1002  1.13.12.1     peter 				tmp->rq_next = rqp->rq_next;
   1003  1.13.12.1     peter 				break;
   1004  1.13.12.1     peter 			}
   1005  1.13.12.1     peter 		if (tmp == NULL)
   1006  1.13.12.1     peter 			printf("red_detach: no state found in red_list!\n");
   1007        1.1   thorpej 	}
   1008  1.13.12.1     peter 
   1009  1.13.12.1     peter 	red_destroy(rqp->rq_red);
   1010  1.13.12.1     peter 	FREE(rqp->rq_q, M_DEVBUF);
   1011  1.13.12.1     peter 	FREE(rqp, M_DEVBUF);
   1012  1.13.12.1     peter 	return (error);
   1013        1.1   thorpej }
   1014        1.1   thorpej 
   1015        1.1   thorpej /*
   1016  1.13.12.1     peter  * enqueue routine:
   1017  1.13.12.1     peter  *
   1018  1.13.12.1     peter  *	returns: 0 when successfully queued.
   1019  1.13.12.1     peter  *		 ENOBUFS when drop occurs.
   1020        1.1   thorpej  */
   1021  1.13.12.1     peter static int
   1022  1.13.12.1     peter red_enqueue(ifq, m, pktattr)
   1023  1.13.12.1     peter 	struct ifaltq *ifq;
   1024        1.1   thorpej 	struct mbuf *m;
   1025        1.1   thorpej 	struct altq_pktattr *pktattr;
   1026        1.1   thorpej {
   1027  1.13.12.1     peter 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1028        1.1   thorpej 
   1029  1.13.12.1     peter 	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
   1030  1.13.12.1     peter 		return ENOBUFS;
   1031  1.13.12.1     peter 	ifq->ifq_len++;
   1032  1.13.12.1     peter 	return 0;
   1033        1.1   thorpej }
   1034        1.1   thorpej 
   1035        1.1   thorpej /*
   1036        1.1   thorpej  * dequeue routine:
   1037        1.3   thorpej  *	must be called in splnet.
   1038        1.1   thorpej  *
   1039        1.1   thorpej  *	returns: mbuf dequeued.
   1040        1.1   thorpej  *		 NULL when no packet is available in the queue.
   1041        1.1   thorpej  */
   1042        1.1   thorpej 
   1043        1.1   thorpej static struct mbuf *
   1044        1.1   thorpej red_dequeue(ifq, op)
   1045        1.1   thorpej 	struct ifaltq *ifq;
   1046        1.1   thorpej 	int op;
   1047        1.1   thorpej {
   1048        1.1   thorpej 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1049        1.1   thorpej 	struct mbuf *m;
   1050        1.1   thorpej 
   1051        1.1   thorpej 	if (op == ALTDQ_POLL)
   1052        1.1   thorpej 		return qhead(rqp->rq_q);
   1053        1.1   thorpej 
   1054        1.1   thorpej 	/* op == ALTDQ_REMOVE */
   1055        1.1   thorpej 	m =  red_getq(rqp->rq_red, rqp->rq_q);
   1056        1.1   thorpej 	if (m != NULL)
   1057        1.1   thorpej 		ifq->ifq_len--;
   1058        1.1   thorpej 	return (m);
   1059        1.1   thorpej }
   1060        1.1   thorpej 
   1061        1.1   thorpej static int
   1062        1.1   thorpej red_request(ifq, req, arg)
   1063        1.1   thorpej 	struct ifaltq *ifq;
   1064        1.1   thorpej 	int req;
   1065        1.1   thorpej 	void *arg;
   1066        1.1   thorpej {
   1067        1.1   thorpej 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1068        1.1   thorpej 
   1069        1.1   thorpej 	switch (req) {
   1070        1.1   thorpej 	case ALTRQ_PURGE:
   1071        1.1   thorpej 		red_purgeq(rqp);
   1072        1.1   thorpej 		break;
   1073        1.1   thorpej 	}
   1074        1.1   thorpej 	return (0);
   1075        1.1   thorpej }
   1076        1.1   thorpej 
   1077        1.1   thorpej static void
   1078        1.1   thorpej red_purgeq(rqp)
   1079        1.1   thorpej 	red_queue_t *rqp;
   1080        1.1   thorpej {
   1081        1.1   thorpej 	_flushq(rqp->rq_q);
   1082        1.1   thorpej 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
   1083        1.1   thorpej 		rqp->rq_ifq->ifq_len = 0;
   1084        1.1   thorpej }
   1085        1.1   thorpej 
   1086        1.1   thorpej #ifdef ALTQ_FLOWVALVE
   1087        1.1   thorpej 
   1088        1.1   thorpej #define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
   1089        1.1   thorpej #define	FV_PSCALE(x)	((x) << FV_PSHIFT)
   1090        1.1   thorpej #define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
   1091        1.1   thorpej #define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
   1092        1.1   thorpej #define	FV_FSCALE(x)	((x) << FV_FSHIFT)
   1093        1.1   thorpej #define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
   1094        1.1   thorpej 
   1095        1.1   thorpej #define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
   1096        1.1   thorpej #define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
   1097        1.1   thorpej 
   1098        1.1   thorpej #define	FV_N			10	/* update fve_f every FV_N packets */
   1099        1.1   thorpej 
   1100        1.1   thorpej #define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
   1101       1.11     perry #define	FV_TTHRESH		3  /* time threshold to delete fve */
   1102        1.1   thorpej #define	FV_ALPHA		5  /* extra packet count */
   1103        1.1   thorpej 
   1104  1.13.12.1     peter #define	FV_STATS
   1105  1.13.12.1     peter 
   1106        1.1   thorpej #if (__FreeBSD_version > 300000)
   1107        1.1   thorpej #define	FV_TIMESTAMP(tp)	getmicrotime(tp)
   1108        1.1   thorpej #else
   1109        1.1   thorpej #define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
   1110        1.1   thorpej #endif
   1111        1.1   thorpej 
   1112        1.1   thorpej /*
   1113        1.1   thorpej  * Brtt table: 127 entry table to convert drop rate (p) to
   1114        1.1   thorpej  * the corresponding bandwidth fraction (f)
   1115        1.1   thorpej  * the following equation is implemented to use scaled values,
   1116        1.1   thorpej  * fve_p and fve_f, in the fixed point format.
   1117        1.1   thorpej  *
   1118        1.1   thorpej  *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
   1119        1.1   thorpej  *   f = Brtt(p) / (max_th + alpha)
   1120        1.1   thorpej  */
   1121        1.1   thorpej #define	BRTT_SIZE	128
   1122        1.1   thorpej #define	BRTT_SHIFT	12
   1123        1.1   thorpej #define	BRTT_MASK	0x0007f000
   1124        1.1   thorpej #define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
   1125        1.1   thorpej 
   1126        1.1   thorpej const int brtt_tab[BRTT_SIZE] = {
   1127       1.11     perry 	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
   1128       1.11     perry 	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
   1129       1.11     perry 	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
   1130       1.11     perry 	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
   1131       1.11     perry 	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
   1132       1.11     perry 	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
   1133       1.11     perry 	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
   1134       1.11     perry 	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
   1135       1.11     perry 	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
   1136       1.11     perry 	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
   1137       1.11     perry 	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
   1138       1.11     perry 	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
   1139       1.11     perry 	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
   1140       1.11     perry 	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
   1141       1.11     perry 	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
   1142        1.1   thorpej 	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
   1143        1.1   thorpej };
   1144        1.1   thorpej 
   1145       1.13     perry static inline struct fve *
   1146        1.1   thorpej flowlist_lookup(fv, pktattr, now)
   1147        1.1   thorpej 	struct flowvalve *fv;
   1148        1.1   thorpej 	struct altq_pktattr *pktattr;
   1149        1.1   thorpej 	struct timeval *now;
   1150        1.1   thorpej {
   1151        1.1   thorpej 	struct fve *fve;
   1152        1.1   thorpej 	int flows;
   1153        1.1   thorpej 	struct ip *ip;
   1154        1.1   thorpej #ifdef INET6
   1155        1.1   thorpej 	struct ip6_hdr *ip6;
   1156        1.1   thorpej #endif
   1157        1.1   thorpej 	struct timeval tthresh;
   1158        1.1   thorpej 
   1159        1.1   thorpej 	if (pktattr == NULL)
   1160        1.1   thorpej 		return (NULL);
   1161        1.1   thorpej 
   1162        1.1   thorpej 	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
   1163        1.1   thorpej 	flows = 0;
   1164        1.1   thorpej 	/*
   1165        1.1   thorpej 	 * search the flow list
   1166        1.1   thorpej 	 */
   1167        1.1   thorpej 	switch (pktattr->pattr_af) {
   1168        1.1   thorpej 	case AF_INET:
   1169        1.1   thorpej 		ip = (struct ip *)pktattr->pattr_hdr;
   1170        1.1   thorpej 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
   1171        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec == 0)
   1172        1.1   thorpej 				break;
   1173        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
   1174        1.1   thorpej 				fve->fve_lastdrop.tv_sec = 0;
   1175        1.1   thorpej 				break;
   1176        1.1   thorpej 			}
   1177        1.1   thorpej 			if (fve->fve_flow.flow_af == AF_INET &&
   1178        1.1   thorpej 			    fve->fve_flow.flow_ip.ip_src.s_addr ==
   1179        1.1   thorpej 			    ip->ip_src.s_addr &&
   1180        1.1   thorpej 			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
   1181        1.1   thorpej 			    ip->ip_dst.s_addr)
   1182        1.1   thorpej 				return (fve);
   1183        1.1   thorpej 			flows++;
   1184        1.1   thorpej 		}
   1185        1.1   thorpej 		break;
   1186        1.1   thorpej #ifdef INET6
   1187        1.1   thorpej 	case AF_INET6:
   1188        1.1   thorpej 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1189        1.1   thorpej 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
   1190        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec == 0)
   1191        1.1   thorpej 				break;
   1192        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
   1193        1.1   thorpej 				fve->fve_lastdrop.tv_sec = 0;
   1194        1.1   thorpej 				break;
   1195        1.1   thorpej 			}
   1196        1.1   thorpej 			if (fve->fve_flow.flow_af == AF_INET6 &&
   1197        1.1   thorpej 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
   1198        1.1   thorpej 					       &ip6->ip6_src) &&
   1199        1.1   thorpej 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
   1200        1.1   thorpej 					       &ip6->ip6_dst))
   1201        1.1   thorpej 				return (fve);
   1202        1.1   thorpej 			flows++;
   1203        1.1   thorpej 		}
   1204        1.1   thorpej 		break;
   1205        1.1   thorpej #endif /* INET6 */
   1206        1.1   thorpej 
   1207        1.1   thorpej 	default:
   1208        1.1   thorpej 		/* unknown protocol.  no drop. */
   1209        1.1   thorpej 		return (NULL);
   1210        1.1   thorpej 	}
   1211        1.1   thorpej 	fv->fv_flows = flows;	/* save the number of active fve's */
   1212        1.1   thorpej 	return (NULL);
   1213        1.1   thorpej }
   1214        1.1   thorpej 
   1215       1.13     perry static inline struct fve *
   1216        1.1   thorpej flowlist_reclaim(fv, pktattr)
   1217        1.1   thorpej 	struct flowvalve *fv;
   1218        1.1   thorpej 	struct altq_pktattr *pktattr;
   1219        1.1   thorpej {
   1220        1.1   thorpej 	struct fve *fve;
   1221        1.1   thorpej 	struct ip *ip;
   1222        1.1   thorpej #ifdef INET6
   1223        1.1   thorpej 	struct ip6_hdr *ip6;
   1224        1.1   thorpej #endif
   1225        1.1   thorpej 
   1226        1.1   thorpej 	/*
   1227        1.1   thorpej 	 * get an entry from the tail of the LRU list.
   1228        1.1   thorpej 	 */
   1229        1.1   thorpej 	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
   1230        1.1   thorpej 
   1231        1.1   thorpej 	switch (pktattr->pattr_af) {
   1232        1.1   thorpej 	case AF_INET:
   1233        1.1   thorpej 		ip = (struct ip *)pktattr->pattr_hdr;
   1234        1.1   thorpej 		fve->fve_flow.flow_af = AF_INET;
   1235        1.1   thorpej 		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
   1236        1.1   thorpej 		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
   1237        1.1   thorpej 		break;
   1238        1.1   thorpej #ifdef INET6
   1239        1.1   thorpej 	case AF_INET6:
   1240        1.1   thorpej 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1241        1.1   thorpej 		fve->fve_flow.flow_af = AF_INET6;
   1242        1.1   thorpej 		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
   1243        1.1   thorpej 		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
   1244        1.1   thorpej 		break;
   1245        1.1   thorpej #endif
   1246        1.1   thorpej 	}
   1247        1.1   thorpej 
   1248        1.1   thorpej 	fve->fve_state = Green;
   1249        1.1   thorpej 	fve->fve_p = 0.0;
   1250        1.1   thorpej 	fve->fve_f = 0.0;
   1251        1.1   thorpej 	fve->fve_ifseq = fv->fv_ifseq - 1;
   1252        1.1   thorpej 	fve->fve_count = 0;
   1253        1.1   thorpej 
   1254        1.1   thorpej 	fv->fv_flows++;
   1255        1.1   thorpej #ifdef FV_STATS
   1256        1.1   thorpej 	fv->fv_stats.alloc++;
   1257        1.1   thorpej #endif
   1258        1.1   thorpej 	return (fve);
   1259        1.1   thorpej }
   1260        1.1   thorpej 
   1261       1.13     perry static inline void
   1262        1.1   thorpej flowlist_move_to_head(fv, fve)
   1263        1.1   thorpej 	struct flowvalve *fv;
   1264        1.1   thorpej 	struct fve *fve;
   1265        1.1   thorpej {
   1266        1.1   thorpej 	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
   1267        1.1   thorpej 		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
   1268        1.1   thorpej 		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
   1269        1.1   thorpej 	}
   1270        1.1   thorpej }
   1271        1.1   thorpej 
   1272        1.1   thorpej /*
   1273        1.1   thorpej  * allocate flowvalve structure
   1274        1.1   thorpej  */
   1275        1.1   thorpej static struct flowvalve *
   1276        1.1   thorpej fv_alloc(rp)
   1277        1.1   thorpej 	struct red *rp;
   1278        1.1   thorpej {
   1279        1.1   thorpej 	struct flowvalve *fv;
   1280        1.1   thorpej 	struct fve *fve;
   1281        1.1   thorpej 	int i, num;
   1282        1.1   thorpej 
   1283        1.1   thorpej 	num = FV_FLOWLISTSIZE;
   1284        1.1   thorpej 	MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
   1285        1.1   thorpej 	       M_DEVBUF, M_WAITOK);
   1286        1.1   thorpej 	if (fv == NULL)
   1287        1.1   thorpej 		return (NULL);
   1288        1.9  christos 	(void)memset(fv, 0, sizeof(struct flowvalve));
   1289        1.1   thorpej 
   1290        1.1   thorpej 	MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
   1291        1.1   thorpej 	       M_DEVBUF, M_WAITOK);
   1292        1.1   thorpej 	if (fv->fv_fves == NULL) {
   1293        1.1   thorpej 		FREE(fv, M_DEVBUF);
   1294        1.1   thorpej 		return (NULL);
   1295        1.1   thorpej 	}
   1296        1.9  christos 	(void)memset(fv->fv_fves, 0, sizeof(struct fve) * num);
   1297        1.1   thorpej 
   1298        1.1   thorpej 	fv->fv_flows = 0;
   1299        1.1   thorpej 	TAILQ_INIT(&fv->fv_flowlist);
   1300        1.1   thorpej 	for (i = 0; i < num; i++) {
   1301        1.1   thorpej 		fve = &fv->fv_fves[i];
   1302        1.1   thorpej 		fve->fve_lastdrop.tv_sec = 0;
   1303        1.1   thorpej 		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
   1304        1.1   thorpej 	}
   1305        1.1   thorpej 
   1306        1.1   thorpej 	/* initialize drop rate threshold in scaled fixed-point */
   1307        1.1   thorpej 	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
   1308        1.1   thorpej 
   1309        1.1   thorpej 	/* initialize drop rate to fraction table */
   1310        1.1   thorpej 	MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
   1311        1.1   thorpej 	       M_DEVBUF, M_WAITOK);
   1312        1.1   thorpej 	if (fv->fv_p2ftab == NULL) {
   1313        1.1   thorpej 		FREE(fv->fv_fves, M_DEVBUF);
   1314        1.1   thorpej 		FREE(fv, M_DEVBUF);
   1315        1.1   thorpej 		return (NULL);
   1316        1.1   thorpej 	}
   1317        1.1   thorpej 	/*
   1318        1.1   thorpej 	 * create the p2f table.
   1319        1.1   thorpej 	 * (shift is used to keep the precision)
   1320        1.1   thorpej 	 */
   1321        1.1   thorpej 	for (i = 1; i < BRTT_SIZE; i++) {
   1322        1.1   thorpej 		int f;
   1323        1.1   thorpej 
   1324        1.1   thorpej 		f = brtt_tab[i] << 8;
   1325        1.1   thorpej 		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
   1326        1.1   thorpej 	}
   1327        1.1   thorpej 
   1328        1.1   thorpej 	return (fv);
   1329        1.1   thorpej }
   1330        1.1   thorpej 
   1331        1.1   thorpej static void fv_destroy(fv)
   1332        1.1   thorpej 	struct flowvalve *fv;
   1333        1.1   thorpej {
   1334        1.1   thorpej 	FREE(fv->fv_p2ftab, M_DEVBUF);
   1335        1.1   thorpej 	FREE(fv->fv_fves, M_DEVBUF);
   1336        1.1   thorpej 	FREE(fv, M_DEVBUF);
   1337        1.1   thorpej }
   1338        1.1   thorpej 
   1339       1.13     perry static inline int
   1340        1.1   thorpej fv_p2f(fv, p)
   1341        1.1   thorpej 	struct flowvalve	*fv;
   1342        1.1   thorpej 	int	p;
   1343        1.1   thorpej {
   1344        1.1   thorpej 	int val, f;
   1345        1.1   thorpej 
   1346        1.1   thorpej 	if (p >= BRTT_PMAX)
   1347        1.1   thorpej 		f = fv->fv_p2ftab[BRTT_SIZE-1];
   1348        1.1   thorpej 	else if ((val = (p & BRTT_MASK)))
   1349        1.1   thorpej 		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
   1350        1.1   thorpej 	else
   1351        1.1   thorpej 		f = fv->fv_p2ftab[1];
   1352        1.1   thorpej 	return (f);
   1353        1.1   thorpej }
   1354        1.1   thorpej 
   1355        1.1   thorpej /*
   1356        1.1   thorpej  * check if an arriving packet should be pre-dropped.
   1357        1.1   thorpej  * called from red_addq() when a packet arrives.
   1358        1.1   thorpej  * returns 1 when the packet should be pre-dropped.
   1359        1.3   thorpej  * should be called in splnet.
   1360        1.1   thorpej  */
   1361        1.1   thorpej static int
   1362        1.1   thorpej fv_checkflow(fv, pktattr, fcache)
   1363        1.1   thorpej 	struct flowvalve *fv;
   1364        1.1   thorpej 	struct altq_pktattr *pktattr;
   1365        1.1   thorpej 	struct fve **fcache;
   1366        1.1   thorpej {
   1367        1.1   thorpej 	struct fve *fve;
   1368        1.1   thorpej 	struct timeval now;
   1369        1.1   thorpej 
   1370        1.1   thorpej 	fv->fv_ifseq++;
   1371        1.1   thorpej 	FV_TIMESTAMP(&now);
   1372        1.1   thorpej 
   1373        1.1   thorpej 	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
   1374        1.1   thorpej 		/* no matching entry in the flowlist */
   1375        1.1   thorpej 		return (0);
   1376        1.1   thorpej 
   1377        1.1   thorpej 	*fcache = fve;
   1378        1.1   thorpej 
   1379        1.1   thorpej 	/* update fraction f for every FV_N packets */
   1380        1.1   thorpej 	if (++fve->fve_count == FV_N) {
   1381        1.1   thorpej 		/*
   1382        1.1   thorpej 		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
   1383        1.1   thorpej 		 */
   1384        1.1   thorpej 		fve->fve_f =
   1385        1.1   thorpej 			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
   1386        1.1   thorpej 			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
   1387        1.1   thorpej 		fve->fve_ifseq = fv->fv_ifseq;
   1388        1.1   thorpej 		fve->fve_count = 0;
   1389        1.1   thorpej 	}
   1390        1.1   thorpej 
   1391        1.1   thorpej 	/*
   1392        1.1   thorpej 	 * overpumping test
   1393        1.1   thorpej 	 */
   1394        1.1   thorpej 	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
   1395        1.1   thorpej 		int fthresh;
   1396        1.1   thorpej 
   1397        1.1   thorpej 		/* calculate a threshold */
   1398        1.1   thorpej 		fthresh = fv_p2f(fv, fve->fve_p);
   1399        1.1   thorpej 		if (fve->fve_f > fthresh)
   1400        1.1   thorpej 			fve->fve_state = Red;
   1401        1.1   thorpej 	}
   1402        1.1   thorpej 
   1403        1.1   thorpej 	if (fve->fve_state == Red) {
   1404        1.1   thorpej 		/*
   1405        1.1   thorpej 		 * backoff test
   1406        1.1   thorpej 		 */
   1407        1.1   thorpej 		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
   1408        1.1   thorpej 			/* no drop for at least FV_BACKOFFTHRESH sec */
   1409        1.1   thorpej 			fve->fve_p = 0;
   1410        1.1   thorpej 			fve->fve_state = Green;
   1411        1.1   thorpej #ifdef FV_STATS
   1412        1.1   thorpej 			fv->fv_stats.escape++;
   1413        1.1   thorpej #endif
   1414        1.1   thorpej 		} else {
   1415        1.1   thorpej 			/* block this flow */
   1416        1.1   thorpej 			flowlist_move_to_head(fv, fve);
   1417        1.1   thorpej 			fve->fve_lastdrop = now;
   1418        1.1   thorpej #ifdef FV_STATS
   1419        1.1   thorpej 			fv->fv_stats.predrop++;
   1420        1.1   thorpej #endif
   1421        1.1   thorpej 			return (1);
   1422        1.1   thorpej 		}
   1423        1.1   thorpej 	}
   1424        1.1   thorpej 
   1425        1.1   thorpej 	/*
   1426        1.1   thorpej 	 * p = (1 - Wp) * p
   1427        1.1   thorpej 	 */
   1428        1.1   thorpej 	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
   1429        1.1   thorpej 	if (fve->fve_p < 0)
   1430        1.1   thorpej 		fve->fve_p = 0;
   1431        1.1   thorpej #ifdef FV_STATS
   1432        1.1   thorpej 	fv->fv_stats.pass++;
   1433        1.1   thorpej #endif
   1434        1.1   thorpej 	return (0);
   1435        1.1   thorpej }
   1436        1.1   thorpej 
   1437        1.1   thorpej /*
   1438        1.1   thorpej  * called from red_addq when a packet is dropped by red.
   1439        1.3   thorpej  * should be called in splnet.
   1440        1.1   thorpej  */
   1441        1.1   thorpej static void fv_dropbyred(fv, pktattr, fcache)
   1442        1.1   thorpej 	struct flowvalve *fv;
   1443        1.1   thorpej 	struct altq_pktattr *pktattr;
   1444        1.1   thorpej 	struct fve *fcache;
   1445        1.1   thorpej {
   1446        1.1   thorpej 	struct fve *fve;
   1447        1.1   thorpej 	struct timeval now;
   1448        1.1   thorpej 
   1449        1.1   thorpej 	if (pktattr == NULL)
   1450        1.1   thorpej 		return;
   1451        1.1   thorpej 	FV_TIMESTAMP(&now);
   1452        1.1   thorpej 
   1453        1.1   thorpej 	if (fcache != NULL)
   1454        1.1   thorpej 		/* the fve of this packet is already cached */
   1455        1.1   thorpej 		fve = fcache;
   1456        1.1   thorpej 	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
   1457        1.1   thorpej 		fve = flowlist_reclaim(fv, pktattr);
   1458        1.1   thorpej 
   1459        1.1   thorpej 	flowlist_move_to_head(fv, fve);
   1460        1.1   thorpej 
   1461        1.1   thorpej 	/*
   1462        1.1   thorpej 	 * update p:  the following line cancels the update
   1463        1.1   thorpej 	 *	      in fv_checkflow() and calculate
   1464        1.1   thorpej 	 *	p = Wp + (1 - Wp) * p
   1465        1.1   thorpej 	 */
   1466        1.1   thorpej 	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
   1467        1.1   thorpej 
   1468        1.1   thorpej 	fve->fve_lastdrop = now;
   1469        1.1   thorpej }
   1470        1.1   thorpej 
   1471        1.1   thorpej #endif /* ALTQ_FLOWVALVE */
   1472        1.1   thorpej 
   1473        1.1   thorpej #ifdef KLD_MODULE
   1474        1.1   thorpej 
   1475        1.1   thorpej static struct altqsw red_sw =
   1476        1.1   thorpej 	{"red", redopen, redclose, redioctl};
   1477        1.1   thorpej 
   1478        1.1   thorpej ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
   1479  1.13.12.1     peter MODULE_VERSION(altq_red, 1);
   1480        1.1   thorpej 
   1481        1.1   thorpej #endif /* KLD_MODULE */
   1482  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
   1483        1.1   thorpej 
   1484        1.1   thorpej #endif /* ALTQ_RED */
   1485