altq_red.c revision 1.13.12.1       1  1.13.12.1     peter /*	$NetBSD: altq_red.c,v 1.13.12.1 2006/03/18 12:08:18 peter Exp $	*/
      2  1.13.12.1     peter /*	$KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $	*/
      3        1.1   thorpej 
      4        1.1   thorpej /*
      5  1.13.12.1     peter  * Copyright (C) 1997-2003
      6        1.1   thorpej  *	Sony Computer Science Laboratories Inc.  All rights reserved.
      7        1.1   thorpej  *
      8        1.1   thorpej  * Redistribution and use in source and binary forms, with or without
      9        1.1   thorpej  * modification, are permitted provided that the following conditions
     10        1.1   thorpej  * are met:
     11        1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     12        1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     13        1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     14        1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     15        1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     16        1.1   thorpej  *
     17        1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
     18        1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19        1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20        1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
     21        1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22        1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23        1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24        1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25        1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26        1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27        1.1   thorpej  * SUCH DAMAGE.
     28        1.1   thorpej  *
     29        1.1   thorpej  */
     30        1.1   thorpej /*
     31        1.1   thorpej  * Copyright (c) 1990-1994 Regents of the University of California.
     32        1.1   thorpej  * All rights reserved.
     33        1.1   thorpej  *
     34        1.1   thorpej  * Redistribution and use in source and binary forms, with or without
     35        1.1   thorpej  * modification, are permitted provided that the following conditions
     36        1.1   thorpej  * are met:
     37        1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     38        1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     39        1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     40        1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     41        1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     42        1.1   thorpej  * 3. All advertising materials mentioning features or use of this software
     43        1.1   thorpej  *    must display the following acknowledgement:
     44        1.1   thorpej  *	This product includes software developed by the Computer Systems
     45        1.1   thorpej  *	Engineering Group at Lawrence Berkeley Laboratory.
     46        1.1   thorpej  * 4. Neither the name of the University nor of the Laboratory may be used
     47        1.1   thorpej  *    to endorse or promote products derived from this software without
     48        1.1   thorpej  *    specific prior written permission.
     49        1.1   thorpej  *
     50        1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     51        1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     52        1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     53        1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     54        1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     55        1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     56        1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     57        1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     58        1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     59        1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     60        1.1   thorpej  * SUCH DAMAGE.
     61        1.1   thorpej  */
     62        1.5     lukem 
     63        1.5     lukem #include <sys/cdefs.h>
     64  1.13.12.1     peter __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.13.12.1 2006/03/18 12:08:18 peter Exp $");
     65        1.1   thorpej 
     66  1.13.12.1     peter #ifdef _KERNEL_OPT
     67        1.1   thorpej #include "opt_altq.h"
     68        1.1   thorpej #include "opt_inet.h"
     69        1.1   thorpej #endif
     70  1.13.12.1     peter 
     71        1.1   thorpej #ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
     72        1.1   thorpej 
     73        1.1   thorpej #include <sys/param.h>
     74        1.1   thorpej #include <sys/malloc.h>
     75        1.1   thorpej #include <sys/mbuf.h>
     76        1.1   thorpej #include <sys/socket.h>
     77        1.1   thorpej #include <sys/systm.h>
     78        1.1   thorpej #include <sys/errno.h>
     79  1.13.12.1     peter #if 1 /* ALTQ3_COMPAT */
     80  1.13.12.1     peter #include <sys/sockio.h>
     81  1.13.12.1     peter #include <sys/proc.h>
     82        1.1   thorpej #include <sys/kernel.h>
     83        1.1   thorpej #ifdef ALTQ_FLOWVALVE
     84        1.1   thorpej #include <sys/queue.h>
     85        1.1   thorpej #include <sys/time.h>
     86        1.1   thorpej #endif
     87  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
     88        1.1   thorpej 
     89        1.1   thorpej #include <net/if.h>
     90        1.1   thorpej 
     91        1.1   thorpej #include <netinet/in.h>
     92        1.1   thorpej #include <netinet/in_systm.h>
     93        1.1   thorpej #include <netinet/ip.h>
     94        1.1   thorpej #ifdef INET6
     95        1.1   thorpej #include <netinet/ip6.h>
     96        1.1   thorpej #endif
     97        1.1   thorpej 
     98  1.13.12.1     peter #include <net/pfvar.h>
     99        1.1   thorpej #include <altq/altq.h>
    100        1.1   thorpej #include <altq/altq_red.h>
    101  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    102  1.13.12.1     peter #include <altq/altq_conf.h>
    103        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    104        1.1   thorpej #include <altq/altq_flowvalve.h>
    105        1.1   thorpej #endif
    106  1.13.12.1     peter #endif
    107        1.1   thorpej 
    108        1.1   thorpej /*
    109        1.1   thorpej  * ALTQ/RED (Random Early Detection) implementation using 32-bit
    110        1.1   thorpej  * fixed-point calculation.
    111        1.1   thorpej  *
    112        1.1   thorpej  * written by kjc using the ns code as a reference.
    113        1.1   thorpej  * you can learn more about red and ns from Sally's home page at
    114        1.1   thorpej  * http://www-nrg.ee.lbl.gov/floyd/
    115        1.1   thorpej  *
    116        1.1   thorpej  * most of the red parameter values are fixed in this implementation
    117        1.1   thorpej  * to prevent fixed-point overflow/underflow.
    118        1.1   thorpej  * if you change the parameters, watch out for overflow/underflow!
    119        1.1   thorpej  *
    120        1.1   thorpej  * the parameters used are recommended values by Sally.
    121        1.1   thorpej  * the corresponding ns config looks:
    122        1.1   thorpej  *	q_weight=0.00195
    123        1.1   thorpej  *	minthresh=5 maxthresh=15 queue-size=60
    124        1.1   thorpej  *	linterm=30
    125        1.1   thorpej  *	dropmech=drop-tail
    126        1.1   thorpej  *	bytes=false (can't be handled by 32-bit fixed-point)
    127       1.11     perry  *	doubleq=false dqthresh=false
    128        1.1   thorpej  *	wait=true
    129        1.1   thorpej  */
    130        1.1   thorpej /*
    131        1.1   thorpej  * alternative red parameters for a slow link.
    132        1.1   thorpej  *
    133        1.1   thorpej  * assume the queue length becomes from zero to L and keeps L, it takes
    134        1.1   thorpej  * N packets for q_avg to reach 63% of L.
    135        1.1   thorpej  * when q_weight is 0.002, N is about 500 packets.
    136        1.1   thorpej  * for a slow link like dial-up, 500 packets takes more than 1 minute!
    137        1.1   thorpej  * when q_weight is 0.008, N is about 127 packets.
    138        1.1   thorpej  * when q_weight is 0.016, N is about 63 packets.
    139  1.13.12.1     peter  * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
    140        1.1   thorpej  * are allowed for 0.016.
    141        1.1   thorpej  * see Sally's paper for more details.
    142        1.1   thorpej  */
    143        1.1   thorpej /* normal red parameters */
    144        1.1   thorpej #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
    145        1.1   thorpej 				/* q_weight = 0.00195 */
    146        1.1   thorpej 
    147        1.1   thorpej /* red parameters for a slow link */
    148        1.1   thorpej #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
    149        1.1   thorpej 				/* q_weight = 0.0078125 */
    150        1.1   thorpej 
    151        1.1   thorpej /* red parameters for a very slow link (e.g., dialup) */
    152        1.1   thorpej #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
    153        1.1   thorpej 				/* q_weight = 0.015625 */
    154        1.1   thorpej 
    155        1.1   thorpej /* fixed-point uses 12-bit decimal places */
    156        1.1   thorpej #define	FP_SHIFT	12	/* fixed-point shift */
    157        1.1   thorpej 
    158        1.1   thorpej /* red parameters for drop probability */
    159        1.1   thorpej #define	INV_P_MAX	10	/* inverse of max drop probability */
    160        1.1   thorpej #define	TH_MIN		5	/* min threshold */
    161        1.1   thorpej #define	TH_MAX		15	/* max threshold */
    162        1.1   thorpej 
    163  1.13.12.1     peter #define	RED_LIMIT	60	/* default max queue lenght */
    164  1.13.12.1     peter #define	RED_STATS		/* collect statistics */
    165        1.1   thorpej 
    166        1.1   thorpej /*
    167        1.1   thorpej  * our default policy for forced-drop is drop-tail.
    168        1.1   thorpej  * (in altq-1.1.2 or earlier, the default was random-drop.
    169        1.1   thorpej  * but it makes more sense to punish the cause of the surge.)
    170        1.1   thorpej  * to switch to the random-drop policy, define "RED_RANDOM_DROP".
    171        1.1   thorpej  */
    172        1.1   thorpej 
    173  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    174        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    175        1.1   thorpej /*
    176  1.13.12.1     peter  * flow-valve is an extention to protect red from unresponsive flows
    177        1.1   thorpej  * and to promote end-to-end congestion control.
    178        1.1   thorpej  * flow-valve observes the average drop rates of the flows that have
    179        1.1   thorpej  * experienced packet drops in the recent past.
    180        1.1   thorpej  * when the average drop rate exceeds the threshold, the flow is
    181        1.1   thorpej  * blocked by the flow-valve.  the trapped flow should back off
    182        1.1   thorpej  * exponentially to escape from the flow-valve.
    183        1.1   thorpej  */
    184        1.1   thorpej #ifdef RED_RANDOM_DROP
    185        1.1   thorpej #error "random-drop can't be used with flow-valve!"
    186        1.1   thorpej #endif
    187        1.1   thorpej #endif /* ALTQ_FLOWVALVE */
    188        1.1   thorpej 
    189        1.1   thorpej /* red_list keeps all red_queue_t's allocated. */
    190        1.1   thorpej static red_queue_t *red_list = NULL;
    191        1.1   thorpej 
    192  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    193  1.13.12.1     peter 
    194        1.1   thorpej /* default red parameter values */
    195        1.1   thorpej static int default_th_min = TH_MIN;
    196        1.1   thorpej static int default_th_max = TH_MAX;
    197        1.1   thorpej static int default_inv_pmax = INV_P_MAX;
    198        1.1   thorpej 
    199  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    200        1.1   thorpej /* internal function prototypes */
    201  1.13.12.1     peter static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
    202  1.13.12.1     peter static struct mbuf *red_dequeue(struct ifaltq *, int);
    203  1.13.12.1     peter static int red_request(struct ifaltq *, int, void *);
    204  1.13.12.1     peter static void red_purgeq(red_queue_t *);
    205  1.13.12.1     peter static int red_detach(red_queue_t *);
    206        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    207  1.13.12.1     peter static inline struct fve *flowlist_lookup(struct flowvalve *,
    208  1.13.12.1     peter 			 struct altq_pktattr *, struct timeval *);
    209  1.13.12.1     peter static inline struct fve *flowlist_reclaim(struct flowvalve *,
    210  1.13.12.1     peter 					     struct altq_pktattr *);
    211  1.13.12.1     peter static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
    212  1.13.12.1     peter static inline int fv_p2f(struct flowvalve *, int);
    213  1.13.12.1     peter static struct flowvalve *fv_alloc(struct red *);
    214  1.13.12.1     peter static void fv_destroy(struct flowvalve *);
    215  1.13.12.1     peter static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
    216  1.13.12.1     peter 			struct fve **);
    217  1.13.12.1     peter static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
    218  1.13.12.1     peter 			 struct fve *);
    219        1.1   thorpej #endif
    220  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    221        1.1   thorpej 
    222        1.1   thorpej /*
    223  1.13.12.1     peter  * red support routines
    224        1.1   thorpej  */
    225  1.13.12.1     peter red_t *
    226  1.13.12.1     peter red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
    227  1.13.12.1     peter    int pkttime)
    228        1.1   thorpej {
    229  1.13.12.1     peter 	red_t	*rp;
    230  1.13.12.1     peter 	int	 w, i;
    231  1.13.12.1     peter 	int	 npkts_per_sec;
    232        1.1   thorpej 
    233  1.13.12.1     peter 	MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
    234  1.13.12.1     peter 	if (rp == NULL)
    235  1.13.12.1     peter 		return (NULL);
    236  1.13.12.1     peter 	(void)memset(rp, 0, sizeof(red_t));
    237        1.1   thorpej 
    238  1.13.12.1     peter 	rp->red_avg = 0;
    239  1.13.12.1     peter 	rp->red_idle = 1;
    240        1.1   thorpej 
    241  1.13.12.1     peter 	if (weight == 0)
    242  1.13.12.1     peter 		rp->red_weight = W_WEIGHT;
    243  1.13.12.1     peter 	else
    244  1.13.12.1     peter 		rp->red_weight = weight;
    245  1.13.12.1     peter 	if (inv_pmax == 0)
    246  1.13.12.1     peter 		rp->red_inv_pmax = default_inv_pmax;
    247  1.13.12.1     peter 	else
    248  1.13.12.1     peter 		rp->red_inv_pmax = inv_pmax;
    249  1.13.12.1     peter 	if (th_min == 0)
    250  1.13.12.1     peter 		rp->red_thmin = default_th_min;
    251  1.13.12.1     peter 	else
    252  1.13.12.1     peter 		rp->red_thmin = th_min;
    253  1.13.12.1     peter 	if (th_max == 0)
    254  1.13.12.1     peter 		rp->red_thmax = default_th_max;
    255  1.13.12.1     peter 	else
    256  1.13.12.1     peter 		rp->red_thmax = th_max;
    257        1.1   thorpej 
    258  1.13.12.1     peter 	rp->red_flags = flags;
    259       1.11     perry 
    260  1.13.12.1     peter 	if (pkttime == 0)
    261  1.13.12.1     peter 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
    262  1.13.12.1     peter 		rp->red_pkttime = 800;
    263  1.13.12.1     peter 	else
    264  1.13.12.1     peter 		rp->red_pkttime = pkttime;
    265        1.1   thorpej 
    266  1.13.12.1     peter 	if (weight == 0) {
    267  1.13.12.1     peter 		/* when the link is very slow, adjust red parameters */
    268  1.13.12.1     peter 		npkts_per_sec = 1000000 / rp->red_pkttime;
    269  1.13.12.1     peter 		if (npkts_per_sec < 50) {
    270  1.13.12.1     peter 			/* up to about 400Kbps */
    271  1.13.12.1     peter 			rp->red_weight = W_WEIGHT_2;
    272  1.13.12.1     peter 		} else if (npkts_per_sec < 300) {
    273  1.13.12.1     peter 			/* up to about 2.4Mbps */
    274  1.13.12.1     peter 			rp->red_weight = W_WEIGHT_1;
    275        1.1   thorpej 		}
    276  1.13.12.1     peter 	}
    277        1.1   thorpej 
    278  1.13.12.1     peter 	/* calculate wshift.  weight must be power of 2 */
    279  1.13.12.1     peter 	w = rp->red_weight;
    280  1.13.12.1     peter 	for (i = 0; w > 1; i++)
    281  1.13.12.1     peter 		w = w >> 1;
    282  1.13.12.1     peter 	rp->red_wshift = i;
    283  1.13.12.1     peter 	w = 1 << rp->red_wshift;
    284  1.13.12.1     peter 	if (w != rp->red_weight) {
    285  1.13.12.1     peter 		printf("invalid weight value %d for red! use %d\n",
    286  1.13.12.1     peter 		       rp->red_weight, w);
    287  1.13.12.1     peter 		rp->red_weight = w;
    288  1.13.12.1     peter 	}
    289        1.1   thorpej 
    290  1.13.12.1     peter 	/*
    291  1.13.12.1     peter 	 * thmin_s and thmax_s are scaled versions of th_min and th_max
    292  1.13.12.1     peter 	 * to be compared with avg.
    293  1.13.12.1     peter 	 */
    294  1.13.12.1     peter 	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
    295  1.13.12.1     peter 	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
    296        1.1   thorpej 
    297  1.13.12.1     peter 	/*
    298  1.13.12.1     peter 	 * precompute probability denominator
    299  1.13.12.1     peter 	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
    300  1.13.12.1     peter 	 */
    301  1.13.12.1     peter 	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
    302  1.13.12.1     peter 			 * rp->red_inv_pmax) << FP_SHIFT;
    303        1.1   thorpej 
    304  1.13.12.1     peter 	/* allocate weight table */
    305  1.13.12.1     peter 	rp->red_wtab = wtab_alloc(rp->red_weight);
    306        1.1   thorpej 
    307  1.13.12.1     peter 	microtime(&rp->red_last);
    308  1.13.12.1     peter #ifdef ALTQ_FLOWVALVE
    309  1.13.12.1     peter 	if (flags & REDF_FLOWVALVE)
    310  1.13.12.1     peter 		rp->red_flowvalve = fv_alloc(rp);
    311  1.13.12.1     peter 	/* if fv_alloc failes, flowvalve is just disabled */
    312  1.13.12.1     peter #endif
    313  1.13.12.1     peter 	return (rp);
    314  1.13.12.1     peter }
    315        1.1   thorpej 
    316  1.13.12.1     peter void
    317  1.13.12.1     peter red_destroy(red_t *rp)
    318  1.13.12.1     peter {
    319  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    320  1.13.12.1     peter #ifdef ALTQ_FLOWVALVE
    321  1.13.12.1     peter 	if (rp->red_flowvalve != NULL)
    322  1.13.12.1     peter 		fv_destroy(rp->red_flowvalve);
    323  1.13.12.1     peter #endif
    324  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    325  1.13.12.1     peter 	wtab_destroy(rp->red_wtab);
    326  1.13.12.1     peter 	FREE(rp, M_DEVBUF);
    327  1.13.12.1     peter }
    328        1.1   thorpej 
    329  1.13.12.1     peter void
    330  1.13.12.1     peter red_getstats(red_t *rp, struct redstats *sp)
    331  1.13.12.1     peter {
    332  1.13.12.1     peter 	sp->q_avg		= rp->red_avg >> rp->red_wshift;
    333  1.13.12.1     peter 	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
    334  1.13.12.1     peter 	sp->drop_cnt		= rp->red_stats.drop_cnt;
    335  1.13.12.1     peter 	sp->drop_forced		= rp->red_stats.drop_forced;
    336  1.13.12.1     peter 	sp->drop_unforced	= rp->red_stats.drop_unforced;
    337  1.13.12.1     peter 	sp->marked_packets	= rp->red_stats.marked_packets;
    338  1.13.12.1     peter }
    339        1.1   thorpej 
    340  1.13.12.1     peter int
    341  1.13.12.1     peter red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
    342  1.13.12.1     peter     struct altq_pktattr *pktattr)
    343  1.13.12.1     peter {
    344  1.13.12.1     peter 	int avg, droptype;
    345  1.13.12.1     peter 	int n;
    346  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    347  1.13.12.1     peter #ifdef ALTQ_FLOWVALVE
    348  1.13.12.1     peter 	struct fve *fve = NULL;
    349        1.1   thorpej 
    350  1.13.12.1     peter 	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
    351  1.13.12.1     peter 		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
    352  1.13.12.1     peter 			m_freem(m);
    353  1.13.12.1     peter 			return (-1);
    354        1.1   thorpej 		}
    355  1.13.12.1     peter #endif
    356  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    357        1.1   thorpej 
    358  1.13.12.1     peter 	avg = rp->red_avg;
    359        1.1   thorpej 
    360  1.13.12.1     peter 	/*
    361  1.13.12.1     peter 	 * if we were idle, we pretend that n packets arrived during
    362  1.13.12.1     peter 	 * the idle period.
    363  1.13.12.1     peter 	 */
    364  1.13.12.1     peter 	if (rp->red_idle) {
    365  1.13.12.1     peter 		struct timeval now;
    366  1.13.12.1     peter 		int t;
    367        1.1   thorpej 
    368  1.13.12.1     peter 		rp->red_idle = 0;
    369  1.13.12.1     peter 		microtime(&now);
    370  1.13.12.1     peter 		t = (now.tv_sec - rp->red_last.tv_sec);
    371  1.13.12.1     peter 		if (t > 60) {
    372  1.13.12.1     peter 			/*
    373  1.13.12.1     peter 			 * being idle for more than 1 minute, set avg to zero.
    374  1.13.12.1     peter 			 * this prevents t from overflow.
    375  1.13.12.1     peter 			 */
    376  1.13.12.1     peter 			avg = 0;
    377  1.13.12.1     peter 		} else {
    378  1.13.12.1     peter 			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
    379  1.13.12.1     peter 			n = t / rp->red_pkttime - 1;
    380        1.1   thorpej 
    381  1.13.12.1     peter 			/* the following line does (avg = (1 - Wq)^n * avg) */
    382  1.13.12.1     peter 			if (n > 0)
    383  1.13.12.1     peter 				avg = (avg >> FP_SHIFT) *
    384  1.13.12.1     peter 				    pow_w(rp->red_wtab, n);
    385  1.13.12.1     peter 		}
    386  1.13.12.1     peter 	}
    387        1.1   thorpej 
    388  1.13.12.1     peter 	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
    389  1.13.12.1     peter 	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
    390  1.13.12.1     peter 	rp->red_avg = avg;		/* save the new value */
    391        1.1   thorpej 
    392  1.13.12.1     peter 	/*
    393  1.13.12.1     peter 	 * red_count keeps a tally of arriving traffic that has not
    394  1.13.12.1     peter 	 * been dropped.
    395  1.13.12.1     peter 	 */
    396  1.13.12.1     peter 	rp->red_count++;
    397        1.1   thorpej 
    398  1.13.12.1     peter 	/* see if we drop early */
    399  1.13.12.1     peter 	droptype = DTYPE_NODROP;
    400  1.13.12.1     peter 	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
    401  1.13.12.1     peter 		if (avg >= rp->red_thmax_s) {
    402  1.13.12.1     peter 			/* avg >= th_max: forced drop */
    403  1.13.12.1     peter 			droptype = DTYPE_FORCED;
    404  1.13.12.1     peter 		} else if (rp->red_old == 0) {
    405  1.13.12.1     peter 			/* first exceeds th_min */
    406  1.13.12.1     peter 			rp->red_count = 1;
    407  1.13.12.1     peter 			rp->red_old = 1;
    408  1.13.12.1     peter 		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
    409  1.13.12.1     peter 				      rp->red_probd, rp->red_count)) {
    410  1.13.12.1     peter 			/* mark or drop by red */
    411  1.13.12.1     peter 			if ((rp->red_flags & REDF_ECN) &&
    412  1.13.12.1     peter 			    mark_ecn(m, pktattr, rp->red_flags)) {
    413  1.13.12.1     peter 				/* successfully marked.  do not drop. */
    414  1.13.12.1     peter 				rp->red_count = 0;
    415  1.13.12.1     peter #ifdef RED_STATS
    416  1.13.12.1     peter 				rp->red_stats.marked_packets++;
    417  1.13.12.1     peter #endif
    418  1.13.12.1     peter 			} else {
    419  1.13.12.1     peter 				/* unforced drop by red */
    420  1.13.12.1     peter 				droptype = DTYPE_EARLY;
    421        1.1   thorpej 			}
    422  1.13.12.1     peter 		}
    423  1.13.12.1     peter 	} else {
    424  1.13.12.1     peter 		/* avg < th_min */
    425  1.13.12.1     peter 		rp->red_old = 0;
    426  1.13.12.1     peter 	}
    427        1.1   thorpej 
    428  1.13.12.1     peter 	/*
    429  1.13.12.1     peter 	 * if the queue length hits the hard limit, it's a forced drop.
    430  1.13.12.1     peter 	 */
    431  1.13.12.1     peter 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
    432  1.13.12.1     peter 		droptype = DTYPE_FORCED;
    433       1.11     perry 
    434  1.13.12.1     peter #ifdef RED_RANDOM_DROP
    435  1.13.12.1     peter 	/* if successful or forced drop, enqueue this packet. */
    436  1.13.12.1     peter 	if (droptype != DTYPE_EARLY)
    437  1.13.12.1     peter 		_addq(q, m);
    438  1.13.12.1     peter #else
    439  1.13.12.1     peter 	/* if successful, enqueue this packet. */
    440  1.13.12.1     peter 	if (droptype == DTYPE_NODROP)
    441  1.13.12.1     peter 		_addq(q, m);
    442  1.13.12.1     peter #endif
    443  1.13.12.1     peter 	if (droptype != DTYPE_NODROP) {
    444  1.13.12.1     peter 		if (droptype == DTYPE_EARLY) {
    445  1.13.12.1     peter 			/* drop the incoming packet */
    446  1.13.12.1     peter #ifdef RED_STATS
    447  1.13.12.1     peter 			rp->red_stats.drop_unforced++;
    448  1.13.12.1     peter #endif
    449  1.13.12.1     peter 		} else {
    450  1.13.12.1     peter 			/* forced drop, select a victim packet in the queue. */
    451  1.13.12.1     peter #ifdef RED_RANDOM_DROP
    452  1.13.12.1     peter 			m = _getq_random(q);
    453  1.13.12.1     peter #endif
    454  1.13.12.1     peter #ifdef RED_STATS
    455  1.13.12.1     peter 			rp->red_stats.drop_forced++;
    456  1.13.12.1     peter #endif
    457  1.13.12.1     peter 		}
    458  1.13.12.1     peter #ifdef RED_STATS
    459  1.13.12.1     peter 		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
    460  1.13.12.1     peter #endif
    461  1.13.12.1     peter 		rp->red_count = 0;
    462  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    463  1.13.12.1     peter #ifdef ALTQ_FLOWVALVE
    464  1.13.12.1     peter 		if (rp->red_flowvalve != NULL)
    465  1.13.12.1     peter 			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
    466  1.13.12.1     peter #endif
    467  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    468  1.13.12.1     peter 		m_freem(m);
    469  1.13.12.1     peter 		return (-1);
    470        1.1   thorpej 	}
    471  1.13.12.1     peter 	/* successfully queued */
    472  1.13.12.1     peter #ifdef RED_STATS
    473  1.13.12.1     peter 	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
    474  1.13.12.1     peter #endif
    475  1.13.12.1     peter 	return (0);
    476        1.1   thorpej }
    477        1.1   thorpej 
    478  1.13.12.1     peter /*
    479  1.13.12.1     peter  * early-drop probability is calculated as follows:
    480  1.13.12.1     peter  *   prob = p_max * (avg - th_min) / (th_max - th_min)
    481  1.13.12.1     peter  *   prob_a = prob / (2 - count*prob)
    482  1.13.12.1     peter  *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
    483  1.13.12.1     peter  * here prob_a increases as successive undrop count increases.
    484  1.13.12.1     peter  * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
    485  1.13.12.1     peter  * becomes 1 when (count >= (2 / prob))).
    486  1.13.12.1     peter  */
    487  1.13.12.1     peter int
    488  1.13.12.1     peter drop_early(int fp_len, int fp_probd, int count)
    489        1.1   thorpej {
    490  1.13.12.1     peter 	int	d;		/* denominator of drop-probability */
    491        1.1   thorpej 
    492  1.13.12.1     peter 	d = fp_probd - count * fp_len;
    493  1.13.12.1     peter 	if (d <= 0)
    494  1.13.12.1     peter 		/* count exceeds the hard limit: drop or mark */
    495  1.13.12.1     peter 		return (1);
    496        1.1   thorpej 
    497  1.13.12.1     peter 	/*
    498  1.13.12.1     peter 	 * now the range of d is [1..600] in fixed-point. (when
    499  1.13.12.1     peter 	 * th_max-th_min=10 and p_max=1/30)
    500  1.13.12.1     peter 	 * drop probability = (avg - TH_MIN) / d
    501  1.13.12.1     peter 	 */
    502        1.1   thorpej 
    503  1.13.12.1     peter 	if ((arc4random() % d) < fp_len) {
    504  1.13.12.1     peter 		/* drop or mark */
    505  1.13.12.1     peter 		return (1);
    506        1.1   thorpej 	}
    507  1.13.12.1     peter 	/* no drop/mark */
    508  1.13.12.1     peter 	return (0);
    509        1.1   thorpej }
    510        1.1   thorpej 
    511        1.1   thorpej /*
    512  1.13.12.1     peter  * try to mark CE bit to the packet.
    513  1.13.12.1     peter  *    returns 1 if successfully marked, 0 otherwise.
    514        1.1   thorpej  */
    515  1.13.12.1     peter int
    516  1.13.12.1     peter mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
    517  1.13.12.1     peter {
    518  1.13.12.1     peter 	struct mbuf	*m0;
    519  1.13.12.1     peter 	struct m_tag	*t;
    520  1.13.12.1     peter 	struct altq_tag	*at;
    521  1.13.12.1     peter 	void		*hdr;
    522  1.13.12.1     peter 	int		 af;
    523  1.13.12.1     peter 
    524  1.13.12.1     peter 	t = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
    525  1.13.12.1     peter 	if (t != NULL) {
    526  1.13.12.1     peter 		at = (struct altq_tag *)(t + 1);
    527  1.13.12.1     peter 		if (at == NULL)
    528  1.13.12.1     peter 			return (0);
    529  1.13.12.1     peter 		af = at->af;
    530  1.13.12.1     peter 		hdr = at->hdr;
    531  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    532  1.13.12.1     peter 	} else if (pktattr != NULL) {
    533  1.13.12.1     peter 		af = pktattr->pattr_af;
    534  1.13.12.1     peter 		hdr = pktattr->pattr_hdr;
    535  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
    536  1.13.12.1     peter 	} else
    537  1.13.12.1     peter 		return (0);
    538        1.1   thorpej 
    539  1.13.12.1     peter 	if (af != AF_INET && af != AF_INET6)
    540  1.13.12.1     peter 		return (0);
    541       1.11     perry 
    542  1.13.12.1     peter 	/* verify that pattr_hdr is within the mbuf data */
    543  1.13.12.1     peter 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
    544  1.13.12.1     peter 		if (((caddr_t)hdr >= m0->m_data) &&
    545  1.13.12.1     peter 		    ((caddr_t)hdr < m0->m_data + m0->m_len))
    546  1.13.12.1     peter 			break;
    547  1.13.12.1     peter 	if (m0 == NULL) {
    548  1.13.12.1     peter 		/* ick, tag info is stale */
    549  1.13.12.1     peter 		return (0);
    550  1.13.12.1     peter 	}
    551        1.1   thorpej 
    552  1.13.12.1     peter 	switch (af) {
    553  1.13.12.1     peter 	case AF_INET:
    554  1.13.12.1     peter 		if (flags & REDF_ECN4) {
    555  1.13.12.1     peter 			struct ip *ip = hdr;
    556  1.13.12.1     peter 			u_int8_t otos;
    557  1.13.12.1     peter 			int sum;
    558        1.1   thorpej 
    559  1.13.12.1     peter 			if (ip->ip_v != 4)
    560  1.13.12.1     peter 				return (0);	/* version mismatch! */
    561        1.1   thorpej 
    562  1.13.12.1     peter 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
    563  1.13.12.1     peter 				return (0);	/* not-ECT */
    564  1.13.12.1     peter 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
    565  1.13.12.1     peter 				return (1);	/* already marked */
    566        1.1   thorpej 
    567  1.13.12.1     peter 			/*
    568  1.13.12.1     peter 			 * ecn-capable but not marked,
    569  1.13.12.1     peter 			 * mark CE and update checksum
    570  1.13.12.1     peter 			 */
    571  1.13.12.1     peter 			otos = ip->ip_tos;
    572  1.13.12.1     peter 			ip->ip_tos |= IPTOS_ECN_CE;
    573  1.13.12.1     peter 			/*
    574  1.13.12.1     peter 			 * update checksum (from RFC1624)
    575  1.13.12.1     peter 			 *	   HC' = ~(~HC + ~m + m')
    576  1.13.12.1     peter 			 */
    577  1.13.12.1     peter 			sum = ~ntohs(ip->ip_sum) & 0xffff;
    578  1.13.12.1     peter 			sum += (~otos & 0xffff) + ip->ip_tos;
    579  1.13.12.1     peter 			sum = (sum >> 16) + (sum & 0xffff);
    580  1.13.12.1     peter 			sum += (sum >> 16);  /* add carry */
    581  1.13.12.1     peter 			ip->ip_sum = htons(~sum & 0xffff);
    582  1.13.12.1     peter 			return (1);
    583        1.1   thorpej 		}
    584  1.13.12.1     peter 		break;
    585  1.13.12.1     peter #ifdef INET6
    586  1.13.12.1     peter 	case AF_INET6:
    587  1.13.12.1     peter 		if (flags & REDF_ECN6) {
    588  1.13.12.1     peter 			struct ip6_hdr *ip6 = hdr;
    589  1.13.12.1     peter 			u_int32_t flowlabel;
    590        1.1   thorpej 
    591  1.13.12.1     peter 			flowlabel = ntohl(ip6->ip6_flow);
    592  1.13.12.1     peter 			if ((flowlabel >> 28) != 6)
    593  1.13.12.1     peter 				return (0);	/* version mismatch! */
    594  1.13.12.1     peter 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
    595  1.13.12.1     peter 			    (IPTOS_ECN_NOTECT << 20))
    596  1.13.12.1     peter 				return (0);	/* not-ECT */
    597  1.13.12.1     peter 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
    598  1.13.12.1     peter 			    (IPTOS_ECN_CE << 20))
    599  1.13.12.1     peter 				return (1);	/* already marked */
    600  1.13.12.1     peter 			/*
    601  1.13.12.1     peter 			 * ecn-capable but not marked,  mark CE
    602  1.13.12.1     peter 			 */
    603  1.13.12.1     peter 			flowlabel |= (IPTOS_ECN_CE << 20);
    604  1.13.12.1     peter 			ip6->ip6_flow = htonl(flowlabel);
    605  1.13.12.1     peter 			return (1);
    606  1.13.12.1     peter 		}
    607  1.13.12.1     peter 		break;
    608  1.13.12.1     peter #endif  /* INET6 */
    609        1.1   thorpej 	}
    610       1.11     perry 
    611  1.13.12.1     peter 	/* not marked */
    612  1.13.12.1     peter 	return (0);
    613        1.1   thorpej }
    614        1.1   thorpej 
    615  1.13.12.1     peter struct mbuf *
    616  1.13.12.1     peter red_getq(rp, q)
    617        1.1   thorpej 	red_t *rp;
    618  1.13.12.1     peter 	class_queue_t *q;
    619        1.1   thorpej {
    620  1.13.12.1     peter 	struct mbuf *m;
    621        1.1   thorpej 
    622  1.13.12.1     peter 	if ((m = _getq(q)) == NULL) {
    623  1.13.12.1     peter 		if (rp->red_idle == 0) {
    624  1.13.12.1     peter 			rp->red_idle = 1;
    625  1.13.12.1     peter 			microtime(&rp->red_last);
    626  1.13.12.1     peter 		}
    627  1.13.12.1     peter 		return NULL;
    628  1.13.12.1     peter 	}
    629  1.13.12.1     peter 
    630  1.13.12.1     peter 	rp->red_idle = 0;
    631  1.13.12.1     peter 	return (m);
    632        1.1   thorpej }
    633        1.1   thorpej 
    634        1.1   thorpej /*
    635  1.13.12.1     peter  * helper routine to calibrate avg during idle.
    636  1.13.12.1     peter  * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
    637  1.13.12.1     peter  * here Wq = 1/weight and the code assumes Wq is close to zero.
    638        1.1   thorpej  *
    639  1.13.12.1     peter  * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
    640        1.1   thorpej  */
    641  1.13.12.1     peter static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
    642  1.13.12.1     peter 
    643  1.13.12.1     peter struct wtab *
    644  1.13.12.1     peter wtab_alloc(int weight)
    645        1.1   thorpej {
    646  1.13.12.1     peter 	struct wtab	*w;
    647  1.13.12.1     peter 	int		 i;
    648        1.1   thorpej 
    649  1.13.12.1     peter 	for (w = wtab_list; w != NULL; w = w->w_next)
    650  1.13.12.1     peter 		if (w->w_weight == weight) {
    651  1.13.12.1     peter 			w->w_refcount++;
    652  1.13.12.1     peter 			return (w);
    653  1.13.12.1     peter 		}
    654  1.13.12.1     peter 
    655  1.13.12.1     peter 	MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
    656  1.13.12.1     peter 	if (w == NULL)
    657  1.13.12.1     peter 		panic("wtab_alloc: malloc failed!");
    658  1.13.12.1     peter 	(void)memset(w, 0, sizeof(struct wtab));
    659  1.13.12.1     peter 	w->w_weight = weight;
    660  1.13.12.1     peter 	w->w_refcount = 1;
    661  1.13.12.1     peter 	w->w_next = wtab_list;
    662  1.13.12.1     peter 	wtab_list = w;
    663  1.13.12.1     peter 
    664  1.13.12.1     peter 	/* initialize the weight table */
    665  1.13.12.1     peter 	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
    666  1.13.12.1     peter 	for (i = 1; i < 32; i++) {
    667  1.13.12.1     peter 		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
    668  1.13.12.1     peter 		if (w->w_tab[i] == 0 && w->w_param_max == 0)
    669  1.13.12.1     peter 			w->w_param_max = 1 << i;
    670  1.13.12.1     peter 	}
    671  1.13.12.1     peter 
    672  1.13.12.1     peter 	return (w);
    673        1.1   thorpej }
    674        1.1   thorpej 
    675        1.1   thorpej int
    676  1.13.12.1     peter wtab_destroy(struct wtab *w)
    677        1.1   thorpej {
    678  1.13.12.1     peter 	struct wtab	*prev;
    679        1.1   thorpej 
    680  1.13.12.1     peter 	if (--w->w_refcount > 0)
    681  1.13.12.1     peter 		return (0);
    682  1.13.12.1     peter 
    683  1.13.12.1     peter 	if (wtab_list == w)
    684  1.13.12.1     peter 		wtab_list = w->w_next;
    685  1.13.12.1     peter 	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
    686  1.13.12.1     peter 		if (prev->w_next == w) {
    687  1.13.12.1     peter 			prev->w_next = w->w_next;
    688  1.13.12.1     peter 			break;
    689        1.1   thorpej 		}
    690        1.1   thorpej 
    691  1.13.12.1     peter 	FREE(w, M_DEVBUF);
    692  1.13.12.1     peter 	return (0);
    693  1.13.12.1     peter }
    694        1.1   thorpej 
    695  1.13.12.1     peter int32_t
    696  1.13.12.1     peter pow_w(struct wtab *w, int n)
    697  1.13.12.1     peter {
    698  1.13.12.1     peter 	int	i, bit;
    699  1.13.12.1     peter 	int32_t	val;
    700        1.1   thorpej 
    701  1.13.12.1     peter 	if (n >= w->w_param_max)
    702  1.13.12.1     peter 		return (0);
    703        1.1   thorpej 
    704  1.13.12.1     peter 	val = 1 << FP_SHIFT;
    705  1.13.12.1     peter 	if (n <= 0)
    706  1.13.12.1     peter 		return (val);
    707  1.13.12.1     peter 
    708  1.13.12.1     peter 	bit = 1;
    709  1.13.12.1     peter 	i = 0;
    710  1.13.12.1     peter 	while (n) {
    711  1.13.12.1     peter 		if (n & bit) {
    712  1.13.12.1     peter 			val = (val * w->w_tab[i]) >> FP_SHIFT;
    713  1.13.12.1     peter 			n &= ~bit;
    714        1.1   thorpej 		}
    715  1.13.12.1     peter 		i++;
    716  1.13.12.1     peter 		bit <<=  1;
    717        1.1   thorpej 	}
    718  1.13.12.1     peter 	return (val);
    719  1.13.12.1     peter }
    720        1.1   thorpej 
    721  1.13.12.1     peter #ifdef ALTQ3_COMPAT
    722  1.13.12.1     peter /*
    723  1.13.12.1     peter  * red device interface
    724  1.13.12.1     peter  */
    725  1.13.12.1     peter altqdev_decl(red);
    726        1.1   thorpej 
    727  1.13.12.1     peter int
    728  1.13.12.1     peter redopen(dev, flag, fmt, l)
    729  1.13.12.1     peter 	dev_t dev;
    730  1.13.12.1     peter 	int flag, fmt;
    731  1.13.12.1     peter 	struct lwp *l;
    732  1.13.12.1     peter {
    733  1.13.12.1     peter 	/* everything will be done when the queueing scheme is attached. */
    734  1.13.12.1     peter 	return 0;
    735  1.13.12.1     peter }
    736  1.13.12.1     peter 
    737  1.13.12.1     peter int
    738  1.13.12.1     peter redclose(dev, flag, fmt, l)
    739  1.13.12.1     peter 	dev_t dev;
    740  1.13.12.1     peter 	int flag, fmt;
    741  1.13.12.1     peter 	struct lwp *l;
    742  1.13.12.1     peter {
    743  1.13.12.1     peter 	red_queue_t *rqp;
    744  1.13.12.1     peter 	int err, error = 0;
    745  1.13.12.1     peter 
    746  1.13.12.1     peter 	while ((rqp = red_list) != NULL) {
    747  1.13.12.1     peter 		/* destroy all */
    748  1.13.12.1     peter 		err = red_detach(rqp);
    749  1.13.12.1     peter 		if (err != 0 && error == 0)
    750  1.13.12.1     peter 			error = err;
    751        1.1   thorpej 	}
    752        1.1   thorpej 
    753  1.13.12.1     peter 	return error;
    754  1.13.12.1     peter }
    755        1.1   thorpej 
    756  1.13.12.1     peter int
    757  1.13.12.1     peter redioctl(dev, cmd, addr, flag, l)
    758  1.13.12.1     peter 	dev_t dev;
    759  1.13.12.1     peter 	ioctlcmd_t cmd;
    760  1.13.12.1     peter 	caddr_t addr;
    761  1.13.12.1     peter 	int flag;
    762  1.13.12.1     peter 	struct lwp *l;
    763  1.13.12.1     peter {
    764  1.13.12.1     peter 	red_queue_t *rqp;
    765  1.13.12.1     peter 	struct red_interface *ifacep;
    766  1.13.12.1     peter 	struct ifnet *ifp;
    767  1.13.12.1     peter 	struct proc *p = l->l_proc;
    768  1.13.12.1     peter 	int	error = 0;
    769  1.13.12.1     peter 
    770  1.13.12.1     peter 	/* check super-user privilege */
    771  1.13.12.1     peter 	switch (cmd) {
    772  1.13.12.1     peter 	case RED_GETSTATS:
    773  1.13.12.1     peter 		break;
    774  1.13.12.1     peter 	default:
    775  1.13.12.1     peter #if (__FreeBSD_version > 400000)
    776  1.13.12.1     peter 		if ((error = suser(p)) != 0)
    777        1.1   thorpej #else
    778  1.13.12.1     peter 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
    779        1.1   thorpej #endif
    780  1.13.12.1     peter 			return (error);
    781  1.13.12.1     peter 		break;
    782  1.13.12.1     peter 	}
    783  1.13.12.1     peter 
    784  1.13.12.1     peter 	switch (cmd) {
    785  1.13.12.1     peter 
    786  1.13.12.1     peter 	case RED_ENABLE:
    787  1.13.12.1     peter 		ifacep = (struct red_interface *)addr;
    788  1.13.12.1     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    789  1.13.12.1     peter 			error = EBADF;
    790  1.13.12.1     peter 			break;
    791        1.1   thorpej 		}
    792  1.13.12.1     peter 		error = altq_enable(rqp->rq_ifq);
    793  1.13.12.1     peter 		break;
    794  1.13.12.1     peter 
    795  1.13.12.1     peter 	case RED_DISABLE:
    796  1.13.12.1     peter 		ifacep = (struct red_interface *)addr;
    797  1.13.12.1     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    798  1.13.12.1     peter 			error = EBADF;
    799  1.13.12.1     peter 			break;
    800  1.13.12.1     peter 		}
    801  1.13.12.1     peter 		error = altq_disable(rqp->rq_ifq);
    802  1.13.12.1     peter 		break;
    803  1.13.12.1     peter 
    804  1.13.12.1     peter 	case RED_IF_ATTACH:
    805  1.13.12.1     peter 		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
    806  1.13.12.1     peter 		if (ifp == NULL) {
    807  1.13.12.1     peter 			error = ENXIO;
    808  1.13.12.1     peter 			break;
    809  1.13.12.1     peter 		}
    810  1.13.12.1     peter 
    811  1.13.12.1     peter 		/* allocate and initialize red_queue_t */
    812  1.13.12.1     peter 		MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
    813  1.13.12.1     peter 		if (rqp == NULL) {
    814  1.13.12.1     peter 			error = ENOMEM;
    815  1.13.12.1     peter 			break;
    816  1.13.12.1     peter 		}
    817  1.13.12.1     peter 		(void)memset(rqp, 0, sizeof(red_queue_t));
    818  1.13.12.1     peter 
    819  1.13.12.1     peter 		MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
    820  1.13.12.1     peter 		       M_DEVBUF, M_WAITOK);
    821  1.13.12.1     peter 		if (rqp->rq_q == NULL) {
    822  1.13.12.1     peter 			FREE(rqp, M_DEVBUF);
    823  1.13.12.1     peter 			error = ENOMEM;
    824  1.13.12.1     peter 			break;
    825  1.13.12.1     peter 		}
    826  1.13.12.1     peter 		(void)memset(rqp->rq_q, 0, sizeof(class_queue_t));
    827  1.13.12.1     peter 
    828  1.13.12.1     peter 		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
    829  1.13.12.1     peter 		if (rqp->rq_red == NULL) {
    830  1.13.12.1     peter 			FREE(rqp->rq_q, M_DEVBUF);
    831  1.13.12.1     peter 			FREE(rqp, M_DEVBUF);
    832  1.13.12.1     peter 			error = ENOMEM;
    833  1.13.12.1     peter 			break;
    834  1.13.12.1     peter 		}
    835  1.13.12.1     peter 
    836  1.13.12.1     peter 		rqp->rq_ifq = &ifp->if_snd;
    837  1.13.12.1     peter 		qtail(rqp->rq_q) = NULL;
    838  1.13.12.1     peter 		qlen(rqp->rq_q) = 0;
    839  1.13.12.1     peter 		qlimit(rqp->rq_q) = RED_LIMIT;
    840  1.13.12.1     peter 		qtype(rqp->rq_q) = Q_RED;
    841  1.13.12.1     peter 
    842  1.13.12.1     peter 		/*
    843  1.13.12.1     peter 		 * set RED to this ifnet structure.
    844  1.13.12.1     peter 		 */
    845  1.13.12.1     peter 		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
    846  1.13.12.1     peter 				    red_enqueue, red_dequeue, red_request,
    847  1.13.12.1     peter 				    NULL, NULL);
    848  1.13.12.1     peter 		if (error) {
    849  1.13.12.1     peter 			red_destroy(rqp->rq_red);
    850  1.13.12.1     peter 			FREE(rqp->rq_q, M_DEVBUF);
    851  1.13.12.1     peter 			FREE(rqp, M_DEVBUF);
    852  1.13.12.1     peter 			break;
    853  1.13.12.1     peter 		}
    854  1.13.12.1     peter 
    855  1.13.12.1     peter 		/* add this state to the red list */
    856  1.13.12.1     peter 		rqp->rq_next = red_list;
    857  1.13.12.1     peter 		red_list = rqp;
    858  1.13.12.1     peter 		break;
    859  1.13.12.1     peter 
    860  1.13.12.1     peter 	case RED_IF_DETACH:
    861  1.13.12.1     peter 		ifacep = (struct red_interface *)addr;
    862  1.13.12.1     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    863  1.13.12.1     peter 			error = EBADF;
    864  1.13.12.1     peter 			break;
    865  1.13.12.1     peter 		}
    866  1.13.12.1     peter 		error = red_detach(rqp);
    867  1.13.12.1     peter 		break;
    868  1.13.12.1     peter 
    869  1.13.12.1     peter 	case RED_GETSTATS:
    870  1.13.12.1     peter 		do {
    871  1.13.12.1     peter 			struct red_stats *q_stats;
    872  1.13.12.1     peter 			red_t *rp;
    873  1.13.12.1     peter 
    874  1.13.12.1     peter 			q_stats = (struct red_stats *)addr;
    875  1.13.12.1     peter 			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
    876  1.13.12.1     peter 					     ALTQT_RED)) == NULL) {
    877  1.13.12.1     peter 				error = EBADF;
    878  1.13.12.1     peter 				break;
    879  1.13.12.1     peter 			}
    880  1.13.12.1     peter 
    881  1.13.12.1     peter 			q_stats->q_len 	   = qlen(rqp->rq_q);
    882  1.13.12.1     peter 			q_stats->q_limit   = qlimit(rqp->rq_q);
    883  1.13.12.1     peter 
    884  1.13.12.1     peter 			rp = rqp->rq_red;
    885  1.13.12.1     peter 			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
    886  1.13.12.1     peter 			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
    887  1.13.12.1     peter 			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
    888  1.13.12.1     peter 			q_stats->drop_forced   = rp->red_stats.drop_forced;
    889  1.13.12.1     peter 			q_stats->drop_unforced = rp->red_stats.drop_unforced;
    890  1.13.12.1     peter 			q_stats->marked_packets = rp->red_stats.marked_packets;
    891  1.13.12.1     peter 
    892  1.13.12.1     peter 			q_stats->weight		= rp->red_weight;
    893  1.13.12.1     peter 			q_stats->inv_pmax	= rp->red_inv_pmax;
    894  1.13.12.1     peter 			q_stats->th_min		= rp->red_thmin;
    895  1.13.12.1     peter 			q_stats->th_max		= rp->red_thmax;
    896  1.13.12.1     peter 
    897        1.1   thorpej #ifdef ALTQ_FLOWVALVE
    898  1.13.12.1     peter 			if (rp->red_flowvalve != NULL) {
    899  1.13.12.1     peter 				struct flowvalve *fv = rp->red_flowvalve;
    900  1.13.12.1     peter 				q_stats->fv_flows    = fv->fv_flows;
    901  1.13.12.1     peter 				q_stats->fv_pass     = fv->fv_stats.pass;
    902  1.13.12.1     peter 				q_stats->fv_predrop  = fv->fv_stats.predrop;
    903  1.13.12.1     peter 				q_stats->fv_alloc    = fv->fv_stats.alloc;
    904  1.13.12.1     peter 				q_stats->fv_escape   = fv->fv_stats.escape;
    905  1.13.12.1     peter 			} else {
    906  1.13.12.1     peter #endif /* ALTQ_FLOWVALVE */
    907  1.13.12.1     peter 				q_stats->fv_flows    = 0;
    908  1.13.12.1     peter 				q_stats->fv_pass     = 0;
    909  1.13.12.1     peter 				q_stats->fv_predrop  = 0;
    910  1.13.12.1     peter 				q_stats->fv_alloc    = 0;
    911  1.13.12.1     peter 				q_stats->fv_escape   = 0;
    912  1.13.12.1     peter #ifdef ALTQ_FLOWVALVE
    913  1.13.12.1     peter 			}
    914  1.13.12.1     peter #endif /* ALTQ_FLOWVALVE */
    915  1.13.12.1     peter 		} while (/*CONSTCOND*/ 0);
    916  1.13.12.1     peter 		break;
    917  1.13.12.1     peter 
    918  1.13.12.1     peter 	case RED_CONFIG:
    919  1.13.12.1     peter 		do {
    920  1.13.12.1     peter 			struct red_conf *fc;
    921  1.13.12.1     peter 			red_t *new;
    922  1.13.12.1     peter 			int s, limit;
    923  1.13.12.1     peter 
    924  1.13.12.1     peter 			fc = (struct red_conf *)addr;
    925  1.13.12.1     peter 			if ((rqp = altq_lookup(fc->iface.red_ifname,
    926  1.13.12.1     peter 					       ALTQT_RED)) == NULL) {
    927  1.13.12.1     peter 				error = EBADF;
    928  1.13.12.1     peter 				break;
    929  1.13.12.1     peter 			}
    930  1.13.12.1     peter 			new = red_alloc(fc->red_weight,
    931  1.13.12.1     peter 					fc->red_inv_pmax,
    932  1.13.12.1     peter 					fc->red_thmin,
    933  1.13.12.1     peter 					fc->red_thmax,
    934  1.13.12.1     peter 					fc->red_flags,
    935  1.13.12.1     peter 					fc->red_pkttime);
    936  1.13.12.1     peter 			if (new == NULL) {
    937  1.13.12.1     peter 				error = ENOMEM;
    938  1.13.12.1     peter 				break;
    939  1.13.12.1     peter 			}
    940  1.13.12.1     peter 
    941  1.13.12.1     peter 			s = splnet();
    942  1.13.12.1     peter 			red_purgeq(rqp);
    943  1.13.12.1     peter 			limit = fc->red_limit;
    944  1.13.12.1     peter 			if (limit < fc->red_thmax)
    945  1.13.12.1     peter 				limit = fc->red_thmax;
    946  1.13.12.1     peter 			qlimit(rqp->rq_q) = limit;
    947  1.13.12.1     peter 			fc->red_limit = limit;	/* write back the new value */
    948  1.13.12.1     peter 
    949  1.13.12.1     peter 			red_destroy(rqp->rq_red);
    950  1.13.12.1     peter 			rqp->rq_red = new;
    951  1.13.12.1     peter 
    952  1.13.12.1     peter 			splx(s);
    953  1.13.12.1     peter 
    954  1.13.12.1     peter 			/* write back new values */
    955  1.13.12.1     peter 			fc->red_limit = limit;
    956  1.13.12.1     peter 			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
    957  1.13.12.1     peter 			fc->red_thmin = rqp->rq_red->red_thmin;
    958  1.13.12.1     peter 			fc->red_thmax = rqp->rq_red->red_thmax;
    959  1.13.12.1     peter 
    960  1.13.12.1     peter 		} while (/*CONSTCOND*/ 0);
    961  1.13.12.1     peter 		break;
    962  1.13.12.1     peter 
    963  1.13.12.1     peter 	case RED_SETDEFAULTS:
    964  1.13.12.1     peter 		do {
    965  1.13.12.1     peter 			struct redparams *rp;
    966  1.13.12.1     peter 
    967  1.13.12.1     peter 			rp = (struct redparams *)addr;
    968  1.13.12.1     peter 
    969  1.13.12.1     peter 			default_th_min = rp->th_min;
    970  1.13.12.1     peter 			default_th_max = rp->th_max;
    971  1.13.12.1     peter 			default_inv_pmax = rp->inv_pmax;
    972  1.13.12.1     peter 		} while (/*CONSTCOND*/ 0);
    973  1.13.12.1     peter 		break;
    974  1.13.12.1     peter 
    975  1.13.12.1     peter 	default:
    976  1.13.12.1     peter 		error = EINVAL;
    977  1.13.12.1     peter 		break;
    978        1.1   thorpej 	}
    979  1.13.12.1     peter 	return error;
    980        1.1   thorpej }
    981        1.1   thorpej 
    982  1.13.12.1     peter static int
    983  1.13.12.1     peter red_detach(rqp)
    984  1.13.12.1     peter 	red_queue_t *rqp;
    985        1.1   thorpej {
    986  1.13.12.1     peter 	red_queue_t *tmp;
    987  1.13.12.1     peter 	int error = 0;
    988        1.1   thorpej 
    989  1.13.12.1     peter 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
    990  1.13.12.1     peter 		altq_disable(rqp->rq_ifq);
    991        1.1   thorpej 
    992  1.13.12.1     peter 	if ((error = altq_detach(rqp->rq_ifq)))
    993  1.13.12.1     peter 		return (error);
    994        1.1   thorpej 
    995  1.13.12.1     peter 	if (red_list == rqp)
    996  1.13.12.1     peter 		red_list = rqp->rq_next;
    997  1.13.12.1     peter 	else {
    998  1.13.12.1     peter 		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
    999  1.13.12.1     peter 			if (tmp->rq_next == rqp) {
   1000  1.13.12.1     peter 				tmp->rq_next = rqp->rq_next;
   1001  1.13.12.1     peter 				break;
   1002  1.13.12.1     peter 			}
   1003  1.13.12.1     peter 		if (tmp == NULL)
   1004  1.13.12.1     peter 			printf("red_detach: no state found in red_list!\n");
   1005        1.1   thorpej 	}
   1006  1.13.12.1     peter 
   1007  1.13.12.1     peter 	red_destroy(rqp->rq_red);
   1008  1.13.12.1     peter 	FREE(rqp->rq_q, M_DEVBUF);
   1009  1.13.12.1     peter 	FREE(rqp, M_DEVBUF);
   1010  1.13.12.1     peter 	return (error);
   1011        1.1   thorpej }
   1012        1.1   thorpej 
   1013        1.1   thorpej /*
   1014  1.13.12.1     peter  * enqueue routine:
   1015  1.13.12.1     peter  *
   1016  1.13.12.1     peter  *	returns: 0 when successfully queued.
   1017  1.13.12.1     peter  *		 ENOBUFS when drop occurs.
   1018        1.1   thorpej  */
   1019  1.13.12.1     peter static int
   1020  1.13.12.1     peter red_enqueue(ifq, m, pktattr)
   1021  1.13.12.1     peter 	struct ifaltq *ifq;
   1022        1.1   thorpej 	struct mbuf *m;
   1023        1.1   thorpej 	struct altq_pktattr *pktattr;
   1024        1.1   thorpej {
   1025  1.13.12.1     peter 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1026        1.1   thorpej 
   1027  1.13.12.1     peter 	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
   1028  1.13.12.1     peter 		return ENOBUFS;
   1029  1.13.12.1     peter 	ifq->ifq_len++;
   1030  1.13.12.1     peter 	return 0;
   1031        1.1   thorpej }
   1032        1.1   thorpej 
   1033        1.1   thorpej /*
   1034        1.1   thorpej  * dequeue routine:
   1035        1.3   thorpej  *	must be called in splnet.
   1036        1.1   thorpej  *
   1037        1.1   thorpej  *	returns: mbuf dequeued.
   1038        1.1   thorpej  *		 NULL when no packet is available in the queue.
   1039        1.1   thorpej  */
   1040        1.1   thorpej 
   1041        1.1   thorpej static struct mbuf *
   1042        1.1   thorpej red_dequeue(ifq, op)
   1043        1.1   thorpej 	struct ifaltq *ifq;
   1044        1.1   thorpej 	int op;
   1045        1.1   thorpej {
   1046        1.1   thorpej 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1047        1.1   thorpej 	struct mbuf *m;
   1048        1.1   thorpej 
   1049        1.1   thorpej 	if (op == ALTDQ_POLL)
   1050        1.1   thorpej 		return qhead(rqp->rq_q);
   1051        1.1   thorpej 
   1052        1.1   thorpej 	/* op == ALTDQ_REMOVE */
   1053        1.1   thorpej 	m =  red_getq(rqp->rq_red, rqp->rq_q);
   1054        1.1   thorpej 	if (m != NULL)
   1055        1.1   thorpej 		ifq->ifq_len--;
   1056        1.1   thorpej 	return (m);
   1057        1.1   thorpej }
   1058        1.1   thorpej 
   1059        1.1   thorpej static int
   1060        1.1   thorpej red_request(ifq, req, arg)
   1061        1.1   thorpej 	struct ifaltq *ifq;
   1062        1.1   thorpej 	int req;
   1063        1.1   thorpej 	void *arg;
   1064        1.1   thorpej {
   1065        1.1   thorpej 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1066        1.1   thorpej 
   1067        1.1   thorpej 	switch (req) {
   1068        1.1   thorpej 	case ALTRQ_PURGE:
   1069        1.1   thorpej 		red_purgeq(rqp);
   1070        1.1   thorpej 		break;
   1071        1.1   thorpej 	}
   1072        1.1   thorpej 	return (0);
   1073        1.1   thorpej }
   1074        1.1   thorpej 
   1075        1.1   thorpej static void
   1076        1.1   thorpej red_purgeq(rqp)
   1077        1.1   thorpej 	red_queue_t *rqp;
   1078        1.1   thorpej {
   1079        1.1   thorpej 	_flushq(rqp->rq_q);
   1080        1.1   thorpej 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
   1081        1.1   thorpej 		rqp->rq_ifq->ifq_len = 0;
   1082        1.1   thorpej }
   1083        1.1   thorpej 
   1084        1.1   thorpej #ifdef ALTQ_FLOWVALVE
   1085        1.1   thorpej 
   1086        1.1   thorpej #define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
   1087        1.1   thorpej #define	FV_PSCALE(x)	((x) << FV_PSHIFT)
   1088        1.1   thorpej #define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
   1089        1.1   thorpej #define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
   1090        1.1   thorpej #define	FV_FSCALE(x)	((x) << FV_FSHIFT)
   1091        1.1   thorpej #define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
   1092        1.1   thorpej 
   1093        1.1   thorpej #define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
   1094        1.1   thorpej #define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
   1095        1.1   thorpej 
   1096        1.1   thorpej #define	FV_N			10	/* update fve_f every FV_N packets */
   1097        1.1   thorpej 
   1098        1.1   thorpej #define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
   1099       1.11     perry #define	FV_TTHRESH		3  /* time threshold to delete fve */
   1100        1.1   thorpej #define	FV_ALPHA		5  /* extra packet count */
   1101        1.1   thorpej 
   1102  1.13.12.1     peter #define	FV_STATS
   1103  1.13.12.1     peter 
   1104        1.1   thorpej #if (__FreeBSD_version > 300000)
   1105        1.1   thorpej #define	FV_TIMESTAMP(tp)	getmicrotime(tp)
   1106        1.1   thorpej #else
   1107        1.1   thorpej #define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
   1108        1.1   thorpej #endif
   1109        1.1   thorpej 
   1110        1.1   thorpej /*
   1111        1.1   thorpej  * Brtt table: 127 entry table to convert drop rate (p) to
   1112        1.1   thorpej  * the corresponding bandwidth fraction (f)
   1113        1.1   thorpej  * the following equation is implemented to use scaled values,
   1114        1.1   thorpej  * fve_p and fve_f, in the fixed point format.
   1115        1.1   thorpej  *
   1116        1.1   thorpej  *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
   1117        1.1   thorpej  *   f = Brtt(p) / (max_th + alpha)
   1118        1.1   thorpej  */
   1119        1.1   thorpej #define	BRTT_SIZE	128
   1120        1.1   thorpej #define	BRTT_SHIFT	12
   1121        1.1   thorpej #define	BRTT_MASK	0x0007f000
   1122        1.1   thorpej #define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
   1123        1.1   thorpej 
   1124        1.1   thorpej const int brtt_tab[BRTT_SIZE] = {
   1125       1.11     perry 	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
   1126       1.11     perry 	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
   1127       1.11     perry 	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
   1128       1.11     perry 	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
   1129       1.11     perry 	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
   1130       1.11     perry 	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
   1131       1.11     perry 	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
   1132       1.11     perry 	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
   1133       1.11     perry 	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
   1134       1.11     perry 	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
   1135       1.11     perry 	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
   1136       1.11     perry 	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
   1137       1.11     perry 	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
   1138       1.11     perry 	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
   1139       1.11     perry 	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
   1140        1.1   thorpej 	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
   1141        1.1   thorpej };
   1142        1.1   thorpej 
   1143       1.13     perry static inline struct fve *
   1144        1.1   thorpej flowlist_lookup(fv, pktattr, now)
   1145        1.1   thorpej 	struct flowvalve *fv;
   1146        1.1   thorpej 	struct altq_pktattr *pktattr;
   1147        1.1   thorpej 	struct timeval *now;
   1148        1.1   thorpej {
   1149        1.1   thorpej 	struct fve *fve;
   1150        1.1   thorpej 	int flows;
   1151        1.1   thorpej 	struct ip *ip;
   1152        1.1   thorpej #ifdef INET6
   1153        1.1   thorpej 	struct ip6_hdr *ip6;
   1154        1.1   thorpej #endif
   1155        1.1   thorpej 	struct timeval tthresh;
   1156        1.1   thorpej 
   1157        1.1   thorpej 	if (pktattr == NULL)
   1158        1.1   thorpej 		return (NULL);
   1159        1.1   thorpej 
   1160        1.1   thorpej 	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
   1161        1.1   thorpej 	flows = 0;
   1162        1.1   thorpej 	/*
   1163        1.1   thorpej 	 * search the flow list
   1164        1.1   thorpej 	 */
   1165        1.1   thorpej 	switch (pktattr->pattr_af) {
   1166        1.1   thorpej 	case AF_INET:
   1167        1.1   thorpej 		ip = (struct ip *)pktattr->pattr_hdr;
   1168        1.1   thorpej 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
   1169        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec == 0)
   1170        1.1   thorpej 				break;
   1171        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
   1172        1.1   thorpej 				fve->fve_lastdrop.tv_sec = 0;
   1173        1.1   thorpej 				break;
   1174        1.1   thorpej 			}
   1175        1.1   thorpej 			if (fve->fve_flow.flow_af == AF_INET &&
   1176        1.1   thorpej 			    fve->fve_flow.flow_ip.ip_src.s_addr ==
   1177        1.1   thorpej 			    ip->ip_src.s_addr &&
   1178        1.1   thorpej 			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
   1179        1.1   thorpej 			    ip->ip_dst.s_addr)
   1180        1.1   thorpej 				return (fve);
   1181        1.1   thorpej 			flows++;
   1182        1.1   thorpej 		}
   1183        1.1   thorpej 		break;
   1184        1.1   thorpej #ifdef INET6
   1185        1.1   thorpej 	case AF_INET6:
   1186        1.1   thorpej 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1187        1.1   thorpej 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
   1188        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec == 0)
   1189        1.1   thorpej 				break;
   1190        1.1   thorpej 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
   1191        1.1   thorpej 				fve->fve_lastdrop.tv_sec = 0;
   1192        1.1   thorpej 				break;
   1193        1.1   thorpej 			}
   1194        1.1   thorpej 			if (fve->fve_flow.flow_af == AF_INET6 &&
   1195        1.1   thorpej 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
   1196        1.1   thorpej 					       &ip6->ip6_src) &&
   1197        1.1   thorpej 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
   1198        1.1   thorpej 					       &ip6->ip6_dst))
   1199        1.1   thorpej 				return (fve);
   1200        1.1   thorpej 			flows++;
   1201        1.1   thorpej 		}
   1202        1.1   thorpej 		break;
   1203        1.1   thorpej #endif /* INET6 */
   1204        1.1   thorpej 
   1205        1.1   thorpej 	default:
   1206        1.1   thorpej 		/* unknown protocol.  no drop. */
   1207        1.1   thorpej 		return (NULL);
   1208        1.1   thorpej 	}
   1209        1.1   thorpej 	fv->fv_flows = flows;	/* save the number of active fve's */
   1210        1.1   thorpej 	return (NULL);
   1211        1.1   thorpej }
   1212        1.1   thorpej 
   1213       1.13     perry static inline struct fve *
   1214        1.1   thorpej flowlist_reclaim(fv, pktattr)
   1215        1.1   thorpej 	struct flowvalve *fv;
   1216        1.1   thorpej 	struct altq_pktattr *pktattr;
   1217        1.1   thorpej {
   1218        1.1   thorpej 	struct fve *fve;
   1219        1.1   thorpej 	struct ip *ip;
   1220        1.1   thorpej #ifdef INET6
   1221        1.1   thorpej 	struct ip6_hdr *ip6;
   1222        1.1   thorpej #endif
   1223        1.1   thorpej 
   1224        1.1   thorpej 	/*
   1225        1.1   thorpej 	 * get an entry from the tail of the LRU list.
   1226        1.1   thorpej 	 */
   1227        1.1   thorpej 	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
   1228        1.1   thorpej 
   1229        1.1   thorpej 	switch (pktattr->pattr_af) {
   1230        1.1   thorpej 	case AF_INET:
   1231        1.1   thorpej 		ip = (struct ip *)pktattr->pattr_hdr;
   1232        1.1   thorpej 		fve->fve_flow.flow_af = AF_INET;
   1233        1.1   thorpej 		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
   1234        1.1   thorpej 		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
   1235        1.1   thorpej 		break;
   1236        1.1   thorpej #ifdef INET6
   1237        1.1   thorpej 	case AF_INET6:
   1238        1.1   thorpej 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1239        1.1   thorpej 		fve->fve_flow.flow_af = AF_INET6;
   1240        1.1   thorpej 		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
   1241        1.1   thorpej 		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
   1242        1.1   thorpej 		break;
   1243        1.1   thorpej #endif
   1244        1.1   thorpej 	}
   1245        1.1   thorpej 
   1246        1.1   thorpej 	fve->fve_state = Green;
   1247        1.1   thorpej 	fve->fve_p = 0.0;
   1248        1.1   thorpej 	fve->fve_f = 0.0;
   1249        1.1   thorpej 	fve->fve_ifseq = fv->fv_ifseq - 1;
   1250        1.1   thorpej 	fve->fve_count = 0;
   1251        1.1   thorpej 
   1252        1.1   thorpej 	fv->fv_flows++;
   1253        1.1   thorpej #ifdef FV_STATS
   1254        1.1   thorpej 	fv->fv_stats.alloc++;
   1255        1.1   thorpej #endif
   1256        1.1   thorpej 	return (fve);
   1257        1.1   thorpej }
   1258        1.1   thorpej 
   1259       1.13     perry static inline void
   1260        1.1   thorpej flowlist_move_to_head(fv, fve)
   1261        1.1   thorpej 	struct flowvalve *fv;
   1262        1.1   thorpej 	struct fve *fve;
   1263        1.1   thorpej {
   1264        1.1   thorpej 	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
   1265        1.1   thorpej 		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
   1266        1.1   thorpej 		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
   1267        1.1   thorpej 	}
   1268        1.1   thorpej }
   1269        1.1   thorpej 
   1270        1.1   thorpej /*
   1271        1.1   thorpej  * allocate flowvalve structure
   1272        1.1   thorpej  */
   1273        1.1   thorpej static struct flowvalve *
   1274        1.1   thorpej fv_alloc(rp)
   1275        1.1   thorpej 	struct red *rp;
   1276        1.1   thorpej {
   1277        1.1   thorpej 	struct flowvalve *fv;
   1278        1.1   thorpej 	struct fve *fve;
   1279        1.1   thorpej 	int i, num;
   1280        1.1   thorpej 
   1281        1.1   thorpej 	num = FV_FLOWLISTSIZE;
   1282        1.1   thorpej 	MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
   1283        1.1   thorpej 	       M_DEVBUF, M_WAITOK);
   1284        1.1   thorpej 	if (fv == NULL)
   1285        1.1   thorpej 		return (NULL);
   1286        1.9  christos 	(void)memset(fv, 0, sizeof(struct flowvalve));
   1287        1.1   thorpej 
   1288        1.1   thorpej 	MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
   1289        1.1   thorpej 	       M_DEVBUF, M_WAITOK);
   1290        1.1   thorpej 	if (fv->fv_fves == NULL) {
   1291        1.1   thorpej 		FREE(fv, M_DEVBUF);
   1292        1.1   thorpej 		return (NULL);
   1293        1.1   thorpej 	}
   1294        1.9  christos 	(void)memset(fv->fv_fves, 0, sizeof(struct fve) * num);
   1295        1.1   thorpej 
   1296        1.1   thorpej 	fv->fv_flows = 0;
   1297        1.1   thorpej 	TAILQ_INIT(&fv->fv_flowlist);
   1298        1.1   thorpej 	for (i = 0; i < num; i++) {
   1299        1.1   thorpej 		fve = &fv->fv_fves[i];
   1300        1.1   thorpej 		fve->fve_lastdrop.tv_sec = 0;
   1301        1.1   thorpej 		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
   1302        1.1   thorpej 	}
   1303        1.1   thorpej 
   1304        1.1   thorpej 	/* initialize drop rate threshold in scaled fixed-point */
   1305        1.1   thorpej 	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
   1306        1.1   thorpej 
   1307        1.1   thorpej 	/* initialize drop rate to fraction table */
   1308        1.1   thorpej 	MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
   1309        1.1   thorpej 	       M_DEVBUF, M_WAITOK);
   1310        1.1   thorpej 	if (fv->fv_p2ftab == NULL) {
   1311        1.1   thorpej 		FREE(fv->fv_fves, M_DEVBUF);
   1312        1.1   thorpej 		FREE(fv, M_DEVBUF);
   1313        1.1   thorpej 		return (NULL);
   1314        1.1   thorpej 	}
   1315        1.1   thorpej 	/*
   1316        1.1   thorpej 	 * create the p2f table.
   1317        1.1   thorpej 	 * (shift is used to keep the precision)
   1318        1.1   thorpej 	 */
   1319        1.1   thorpej 	for (i = 1; i < BRTT_SIZE; i++) {
   1320        1.1   thorpej 		int f;
   1321        1.1   thorpej 
   1322        1.1   thorpej 		f = brtt_tab[i] << 8;
   1323        1.1   thorpej 		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
   1324        1.1   thorpej 	}
   1325        1.1   thorpej 
   1326        1.1   thorpej 	return (fv);
   1327        1.1   thorpej }
   1328        1.1   thorpej 
   1329        1.1   thorpej static void fv_destroy(fv)
   1330        1.1   thorpej 	struct flowvalve *fv;
   1331        1.1   thorpej {
   1332        1.1   thorpej 	FREE(fv->fv_p2ftab, M_DEVBUF);
   1333        1.1   thorpej 	FREE(fv->fv_fves, M_DEVBUF);
   1334        1.1   thorpej 	FREE(fv, M_DEVBUF);
   1335        1.1   thorpej }
   1336        1.1   thorpej 
   1337       1.13     perry static inline int
   1338        1.1   thorpej fv_p2f(fv, p)
   1339        1.1   thorpej 	struct flowvalve	*fv;
   1340        1.1   thorpej 	int	p;
   1341        1.1   thorpej {
   1342        1.1   thorpej 	int val, f;
   1343        1.1   thorpej 
   1344        1.1   thorpej 	if (p >= BRTT_PMAX)
   1345        1.1   thorpej 		f = fv->fv_p2ftab[BRTT_SIZE-1];
   1346        1.1   thorpej 	else if ((val = (p & BRTT_MASK)))
   1347        1.1   thorpej 		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
   1348        1.1   thorpej 	else
   1349        1.1   thorpej 		f = fv->fv_p2ftab[1];
   1350        1.1   thorpej 	return (f);
   1351        1.1   thorpej }
   1352        1.1   thorpej 
   1353        1.1   thorpej /*
   1354        1.1   thorpej  * check if an arriving packet should be pre-dropped.
   1355        1.1   thorpej  * called from red_addq() when a packet arrives.
   1356        1.1   thorpej  * returns 1 when the packet should be pre-dropped.
   1357        1.3   thorpej  * should be called in splnet.
   1358        1.1   thorpej  */
   1359        1.1   thorpej static int
   1360        1.1   thorpej fv_checkflow(fv, pktattr, fcache)
   1361        1.1   thorpej 	struct flowvalve *fv;
   1362        1.1   thorpej 	struct altq_pktattr *pktattr;
   1363        1.1   thorpej 	struct fve **fcache;
   1364        1.1   thorpej {
   1365        1.1   thorpej 	struct fve *fve;
   1366        1.1   thorpej 	struct timeval now;
   1367        1.1   thorpej 
   1368        1.1   thorpej 	fv->fv_ifseq++;
   1369        1.1   thorpej 	FV_TIMESTAMP(&now);
   1370        1.1   thorpej 
   1371        1.1   thorpej 	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
   1372        1.1   thorpej 		/* no matching entry in the flowlist */
   1373        1.1   thorpej 		return (0);
   1374        1.1   thorpej 
   1375        1.1   thorpej 	*fcache = fve;
   1376        1.1   thorpej 
   1377        1.1   thorpej 	/* update fraction f for every FV_N packets */
   1378        1.1   thorpej 	if (++fve->fve_count == FV_N) {
   1379        1.1   thorpej 		/*
   1380        1.1   thorpej 		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
   1381        1.1   thorpej 		 */
   1382        1.1   thorpej 		fve->fve_f =
   1383        1.1   thorpej 			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
   1384        1.1   thorpej 			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
   1385        1.1   thorpej 		fve->fve_ifseq = fv->fv_ifseq;
   1386        1.1   thorpej 		fve->fve_count = 0;
   1387        1.1   thorpej 	}
   1388        1.1   thorpej 
   1389        1.1   thorpej 	/*
   1390        1.1   thorpej 	 * overpumping test
   1391        1.1   thorpej 	 */
   1392        1.1   thorpej 	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
   1393        1.1   thorpej 		int fthresh;
   1394        1.1   thorpej 
   1395        1.1   thorpej 		/* calculate a threshold */
   1396        1.1   thorpej 		fthresh = fv_p2f(fv, fve->fve_p);
   1397        1.1   thorpej 		if (fve->fve_f > fthresh)
   1398        1.1   thorpej 			fve->fve_state = Red;
   1399        1.1   thorpej 	}
   1400        1.1   thorpej 
   1401        1.1   thorpej 	if (fve->fve_state == Red) {
   1402        1.1   thorpej 		/*
   1403        1.1   thorpej 		 * backoff test
   1404        1.1   thorpej 		 */
   1405        1.1   thorpej 		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
   1406        1.1   thorpej 			/* no drop for at least FV_BACKOFFTHRESH sec */
   1407        1.1   thorpej 			fve->fve_p = 0;
   1408        1.1   thorpej 			fve->fve_state = Green;
   1409        1.1   thorpej #ifdef FV_STATS
   1410        1.1   thorpej 			fv->fv_stats.escape++;
   1411        1.1   thorpej #endif
   1412        1.1   thorpej 		} else {
   1413        1.1   thorpej 			/* block this flow */
   1414        1.1   thorpej 			flowlist_move_to_head(fv, fve);
   1415        1.1   thorpej 			fve->fve_lastdrop = now;
   1416        1.1   thorpej #ifdef FV_STATS
   1417        1.1   thorpej 			fv->fv_stats.predrop++;
   1418        1.1   thorpej #endif
   1419        1.1   thorpej 			return (1);
   1420        1.1   thorpej 		}
   1421        1.1   thorpej 	}
   1422        1.1   thorpej 
   1423        1.1   thorpej 	/*
   1424        1.1   thorpej 	 * p = (1 - Wp) * p
   1425        1.1   thorpej 	 */
   1426        1.1   thorpej 	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
   1427        1.1   thorpej 	if (fve->fve_p < 0)
   1428        1.1   thorpej 		fve->fve_p = 0;
   1429        1.1   thorpej #ifdef FV_STATS
   1430        1.1   thorpej 	fv->fv_stats.pass++;
   1431        1.1   thorpej #endif
   1432        1.1   thorpej 	return (0);
   1433        1.1   thorpej }
   1434        1.1   thorpej 
   1435        1.1   thorpej /*
   1436        1.1   thorpej  * called from red_addq when a packet is dropped by red.
   1437        1.3   thorpej  * should be called in splnet.
   1438        1.1   thorpej  */
   1439        1.1   thorpej static void fv_dropbyred(fv, pktattr, fcache)
   1440        1.1   thorpej 	struct flowvalve *fv;
   1441        1.1   thorpej 	struct altq_pktattr *pktattr;
   1442        1.1   thorpej 	struct fve *fcache;
   1443        1.1   thorpej {
   1444        1.1   thorpej 	struct fve *fve;
   1445        1.1   thorpej 	struct timeval now;
   1446        1.1   thorpej 
   1447        1.1   thorpej 	if (pktattr == NULL)
   1448        1.1   thorpej 		return;
   1449        1.1   thorpej 	FV_TIMESTAMP(&now);
   1450        1.1   thorpej 
   1451        1.1   thorpej 	if (fcache != NULL)
   1452        1.1   thorpej 		/* the fve of this packet is already cached */
   1453        1.1   thorpej 		fve = fcache;
   1454        1.1   thorpej 	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
   1455        1.1   thorpej 		fve = flowlist_reclaim(fv, pktattr);
   1456        1.1   thorpej 
   1457        1.1   thorpej 	flowlist_move_to_head(fv, fve);
   1458        1.1   thorpej 
   1459        1.1   thorpej 	/*
   1460        1.1   thorpej 	 * update p:  the following line cancels the update
   1461        1.1   thorpej 	 *	      in fv_checkflow() and calculate
   1462        1.1   thorpej 	 *	p = Wp + (1 - Wp) * p
   1463        1.1   thorpej 	 */
   1464        1.1   thorpej 	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
   1465        1.1   thorpej 
   1466        1.1   thorpej 	fve->fve_lastdrop = now;
   1467        1.1   thorpej }
   1468        1.1   thorpej 
   1469        1.1   thorpej #endif /* ALTQ_FLOWVALVE */
   1470        1.1   thorpej 
   1471        1.1   thorpej #ifdef KLD_MODULE
   1472        1.1   thorpej 
   1473        1.1   thorpej static struct altqsw red_sw =
   1474        1.1   thorpej 	{"red", redopen, redclose, redioctl};
   1475        1.1   thorpej 
   1476        1.1   thorpej ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
   1477  1.13.12.1     peter MODULE_VERSION(altq_red, 1);
   1478        1.1   thorpej 
   1479        1.1   thorpej #endif /* KLD_MODULE */
   1480  1.13.12.1     peter #endif /* ALTQ3_COMPAT */
   1481        1.1   thorpej 
   1482        1.1   thorpej #endif /* ALTQ_RED */
   1483