Home | History | Annotate | Line # | Download | only in altq
altq_red.c revision 1.28
      1  1.28      yamt /*	$NetBSD: altq_red.c,v 1.28 2008/06/18 09:06:27 yamt Exp $	*/
      2  1.21     peter /*	$KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $	*/
      3   1.1   thorpej 
      4   1.1   thorpej /*
      5  1.21     peter  * Copyright (C) 1997-2003
      6   1.1   thorpej  *	Sony Computer Science Laboratories Inc.  All rights reserved.
      7   1.1   thorpej  *
      8   1.1   thorpej  * Redistribution and use in source and binary forms, with or without
      9   1.1   thorpej  * modification, are permitted provided that the following conditions
     10   1.1   thorpej  * are met:
     11   1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     12   1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     13   1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     14   1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     15   1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     16   1.1   thorpej  *
     17   1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
     18   1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19   1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20   1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
     21   1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22   1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23   1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24   1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25   1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26   1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27   1.1   thorpej  * SUCH DAMAGE.
     28   1.1   thorpej  *
     29   1.1   thorpej  */
     30   1.1   thorpej /*
     31   1.1   thorpej  * Copyright (c) 1990-1994 Regents of the University of California.
     32   1.1   thorpej  * All rights reserved.
     33   1.1   thorpej  *
     34   1.1   thorpej  * Redistribution and use in source and binary forms, with or without
     35   1.1   thorpej  * modification, are permitted provided that the following conditions
     36   1.1   thorpej  * are met:
     37   1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     38   1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     39   1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     40   1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     41   1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     42   1.1   thorpej  * 3. All advertising materials mentioning features or use of this software
     43   1.1   thorpej  *    must display the following acknowledgement:
     44   1.1   thorpej  *	This product includes software developed by the Computer Systems
     45   1.1   thorpej  *	Engineering Group at Lawrence Berkeley Laboratory.
     46   1.1   thorpej  * 4. Neither the name of the University nor of the Laboratory may be used
     47   1.1   thorpej  *    to endorse or promote products derived from this software without
     48   1.1   thorpej  *    specific prior written permission.
     49   1.1   thorpej  *
     50   1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     51   1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     52   1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     53   1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     54   1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     55   1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     56   1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     57   1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     58   1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     59   1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     60   1.1   thorpej  * SUCH DAMAGE.
     61   1.1   thorpej  */
     62   1.5     lukem 
     63   1.5     lukem #include <sys/cdefs.h>
     64  1.28      yamt __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.28 2008/06/18 09:06:27 yamt Exp $");
     65   1.1   thorpej 
     66  1.21     peter #ifdef _KERNEL_OPT
     67   1.1   thorpej #include "opt_altq.h"
     68   1.1   thorpej #include "opt_inet.h"
     69  1.22     peter #include "pf.h"
     70   1.1   thorpej #endif
     71  1.21     peter 
     72   1.1   thorpej #ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
     73   1.1   thorpej 
     74   1.1   thorpej #include <sys/param.h>
     75   1.1   thorpej #include <sys/malloc.h>
     76   1.1   thorpej #include <sys/mbuf.h>
     77   1.1   thorpej #include <sys/socket.h>
     78  1.21     peter #include <sys/systm.h>
     79  1.21     peter #include <sys/errno.h>
     80  1.21     peter #include <sys/kauth.h>
     81  1.21     peter #if 1 /* ALTQ3_COMPAT */
     82   1.1   thorpej #include <sys/sockio.h>
     83   1.1   thorpej #include <sys/proc.h>
     84   1.1   thorpej #include <sys/kernel.h>
     85   1.1   thorpej #ifdef ALTQ_FLOWVALVE
     86   1.1   thorpej #include <sys/queue.h>
     87   1.1   thorpej #include <sys/time.h>
     88   1.1   thorpej #endif
     89  1.21     peter #endif /* ALTQ3_COMPAT */
     90   1.1   thorpej 
     91   1.1   thorpej #include <net/if.h>
     92   1.1   thorpej 
     93   1.1   thorpej #include <netinet/in.h>
     94   1.1   thorpej #include <netinet/in_systm.h>
     95   1.1   thorpej #include <netinet/ip.h>
     96   1.1   thorpej #ifdef INET6
     97   1.1   thorpej #include <netinet/ip6.h>
     98   1.1   thorpej #endif
     99   1.1   thorpej 
    100  1.22     peter #if NPF > 0
    101  1.21     peter #include <net/pfvar.h>
    102  1.22     peter #endif
    103   1.1   thorpej #include <altq/altq.h>
    104  1.21     peter #include <altq/altq_red.h>
    105  1.21     peter #ifdef ALTQ3_COMPAT
    106   1.1   thorpej #include <altq/altq_conf.h>
    107   1.1   thorpej #ifdef ALTQ_FLOWVALVE
    108   1.1   thorpej #include <altq/altq_flowvalve.h>
    109   1.1   thorpej #endif
    110  1.21     peter #endif
    111   1.1   thorpej 
    112   1.1   thorpej /*
    113   1.1   thorpej  * ALTQ/RED (Random Early Detection) implementation using 32-bit
    114   1.1   thorpej  * fixed-point calculation.
    115   1.1   thorpej  *
    116   1.1   thorpej  * written by kjc using the ns code as a reference.
    117   1.1   thorpej  * you can learn more about red and ns from Sally's home page at
    118   1.1   thorpej  * http://www-nrg.ee.lbl.gov/floyd/
    119   1.1   thorpej  *
    120   1.1   thorpej  * most of the red parameter values are fixed in this implementation
    121   1.1   thorpej  * to prevent fixed-point overflow/underflow.
    122   1.1   thorpej  * if you change the parameters, watch out for overflow/underflow!
    123   1.1   thorpej  *
    124   1.1   thorpej  * the parameters used are recommended values by Sally.
    125   1.1   thorpej  * the corresponding ns config looks:
    126   1.1   thorpej  *	q_weight=0.00195
    127   1.1   thorpej  *	minthresh=5 maxthresh=15 queue-size=60
    128   1.1   thorpej  *	linterm=30
    129   1.1   thorpej  *	dropmech=drop-tail
    130   1.1   thorpej  *	bytes=false (can't be handled by 32-bit fixed-point)
    131  1.11     perry  *	doubleq=false dqthresh=false
    132   1.1   thorpej  *	wait=true
    133   1.1   thorpej  */
    134   1.1   thorpej /*
    135   1.1   thorpej  * alternative red parameters for a slow link.
    136   1.1   thorpej  *
    137   1.1   thorpej  * assume the queue length becomes from zero to L and keeps L, it takes
    138   1.1   thorpej  * N packets for q_avg to reach 63% of L.
    139   1.1   thorpej  * when q_weight is 0.002, N is about 500 packets.
    140   1.1   thorpej  * for a slow link like dial-up, 500 packets takes more than 1 minute!
    141   1.1   thorpej  * when q_weight is 0.008, N is about 127 packets.
    142   1.1   thorpej  * when q_weight is 0.016, N is about 63 packets.
    143  1.21     peter  * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
    144   1.1   thorpej  * are allowed for 0.016.
    145   1.1   thorpej  * see Sally's paper for more details.
    146   1.1   thorpej  */
    147   1.1   thorpej /* normal red parameters */
    148   1.1   thorpej #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
    149   1.1   thorpej 				/* q_weight = 0.00195 */
    150   1.1   thorpej 
    151   1.1   thorpej /* red parameters for a slow link */
    152   1.1   thorpej #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
    153   1.1   thorpej 				/* q_weight = 0.0078125 */
    154   1.1   thorpej 
    155   1.1   thorpej /* red parameters for a very slow link (e.g., dialup) */
    156   1.1   thorpej #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
    157   1.1   thorpej 				/* q_weight = 0.015625 */
    158   1.1   thorpej 
    159   1.1   thorpej /* fixed-point uses 12-bit decimal places */
    160   1.1   thorpej #define	FP_SHIFT	12	/* fixed-point shift */
    161   1.1   thorpej 
    162   1.1   thorpej /* red parameters for drop probability */
    163   1.1   thorpej #define	INV_P_MAX	10	/* inverse of max drop probability */
    164   1.1   thorpej #define	TH_MIN		5	/* min threshold */
    165   1.1   thorpej #define	TH_MAX		15	/* max threshold */
    166   1.1   thorpej 
    167  1.21     peter #define	RED_LIMIT	60	/* default max queue lenght */
    168  1.21     peter #define	RED_STATS		/* collect statistics */
    169   1.1   thorpej 
    170   1.1   thorpej /*
    171   1.1   thorpej  * our default policy for forced-drop is drop-tail.
    172   1.1   thorpej  * (in altq-1.1.2 or earlier, the default was random-drop.
    173   1.1   thorpej  * but it makes more sense to punish the cause of the surge.)
    174   1.1   thorpej  * to switch to the random-drop policy, define "RED_RANDOM_DROP".
    175   1.1   thorpej  */
    176   1.1   thorpej 
    177  1.21     peter #ifdef ALTQ3_COMPAT
    178   1.1   thorpej #ifdef ALTQ_FLOWVALVE
    179   1.1   thorpej /*
    180  1.21     peter  * flow-valve is an extention to protect red from unresponsive flows
    181   1.1   thorpej  * and to promote end-to-end congestion control.
    182   1.1   thorpej  * flow-valve observes the average drop rates of the flows that have
    183   1.1   thorpej  * experienced packet drops in the recent past.
    184   1.1   thorpej  * when the average drop rate exceeds the threshold, the flow is
    185   1.1   thorpej  * blocked by the flow-valve.  the trapped flow should back off
    186   1.1   thorpej  * exponentially to escape from the flow-valve.
    187   1.1   thorpej  */
    188   1.1   thorpej #ifdef RED_RANDOM_DROP
    189   1.1   thorpej #error "random-drop can't be used with flow-valve!"
    190   1.1   thorpej #endif
    191   1.1   thorpej #endif /* ALTQ_FLOWVALVE */
    192   1.1   thorpej 
    193   1.1   thorpej /* red_list keeps all red_queue_t's allocated. */
    194   1.1   thorpej static red_queue_t *red_list = NULL;
    195   1.1   thorpej 
    196  1.21     peter #endif /* ALTQ3_COMPAT */
    197  1.21     peter 
    198   1.1   thorpej /* default red parameter values */
    199   1.1   thorpej static int default_th_min = TH_MIN;
    200   1.1   thorpej static int default_th_max = TH_MAX;
    201   1.1   thorpej static int default_inv_pmax = INV_P_MAX;
    202   1.1   thorpej 
    203  1.21     peter #ifdef ALTQ3_COMPAT
    204   1.1   thorpej /* internal function prototypes */
    205  1.21     peter static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
    206  1.21     peter static struct mbuf *red_dequeue(struct ifaltq *, int);
    207  1.21     peter static int red_request(struct ifaltq *, int, void *);
    208  1.21     peter static void red_purgeq(red_queue_t *);
    209  1.21     peter static int red_detach(red_queue_t *);
    210   1.1   thorpej #ifdef ALTQ_FLOWVALVE
    211  1.21     peter static inline struct fve *flowlist_lookup(struct flowvalve *,
    212  1.21     peter 			 struct altq_pktattr *, struct timeval *);
    213  1.21     peter static inline struct fve *flowlist_reclaim(struct flowvalve *,
    214  1.21     peter 					     struct altq_pktattr *);
    215  1.21     peter static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
    216  1.21     peter static inline int fv_p2f(struct flowvalve *, int);
    217  1.21     peter static struct flowvalve *fv_alloc(struct red *);
    218  1.21     peter static void fv_destroy(struct flowvalve *);
    219  1.21     peter static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
    220  1.21     peter 			struct fve **);
    221  1.21     peter static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
    222  1.21     peter 			 struct fve *);
    223   1.1   thorpej #endif
    224  1.21     peter #endif /* ALTQ3_COMPAT */
    225   1.1   thorpej 
    226   1.1   thorpej /*
    227  1.21     peter  * red support routines
    228   1.1   thorpej  */
    229  1.21     peter red_t *
    230  1.21     peter red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
    231  1.21     peter    int pkttime)
    232  1.21     peter {
    233  1.21     peter 	red_t	*rp;
    234  1.21     peter 	int	 w, i;
    235  1.21     peter 	int	 npkts_per_sec;
    236  1.21     peter 
    237  1.21     peter 	rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
    238  1.21     peter 	if (rp == NULL)
    239  1.21     peter 		return (NULL);
    240  1.21     peter 
    241  1.21     peter 	rp->red_avg = 0;
    242  1.21     peter 	rp->red_idle = 1;
    243  1.21     peter 
    244  1.21     peter 	if (weight == 0)
    245  1.21     peter 		rp->red_weight = W_WEIGHT;
    246  1.21     peter 	else
    247  1.21     peter 		rp->red_weight = weight;
    248  1.21     peter 	if (inv_pmax == 0)
    249  1.21     peter 		rp->red_inv_pmax = default_inv_pmax;
    250  1.21     peter 	else
    251  1.21     peter 		rp->red_inv_pmax = inv_pmax;
    252  1.21     peter 	if (th_min == 0)
    253  1.21     peter 		rp->red_thmin = default_th_min;
    254  1.21     peter 	else
    255  1.21     peter 		rp->red_thmin = th_min;
    256  1.21     peter 	if (th_max == 0)
    257  1.21     peter 		rp->red_thmax = default_th_max;
    258  1.21     peter 	else
    259  1.21     peter 		rp->red_thmax = th_max;
    260  1.21     peter 
    261  1.21     peter 	rp->red_flags = flags;
    262   1.1   thorpej 
    263  1.21     peter 	if (pkttime == 0)
    264  1.21     peter 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
    265  1.21     peter 		rp->red_pkttime = 800;
    266  1.21     peter 	else
    267  1.21     peter 		rp->red_pkttime = pkttime;
    268   1.1   thorpej 
    269  1.21     peter 	if (weight == 0) {
    270  1.21     peter 		/* when the link is very slow, adjust red parameters */
    271  1.21     peter 		npkts_per_sec = 1000000 / rp->red_pkttime;
    272  1.21     peter 		if (npkts_per_sec < 50) {
    273  1.21     peter 			/* up to about 400Kbps */
    274  1.21     peter 			rp->red_weight = W_WEIGHT_2;
    275  1.21     peter 		} else if (npkts_per_sec < 300) {
    276  1.21     peter 			/* up to about 2.4Mbps */
    277  1.21     peter 			rp->red_weight = W_WEIGHT_1;
    278  1.21     peter 		}
    279  1.21     peter 	}
    280   1.1   thorpej 
    281  1.21     peter 	/* calculate wshift.  weight must be power of 2 */
    282  1.21     peter 	w = rp->red_weight;
    283  1.21     peter 	for (i = 0; w > 1; i++)
    284  1.21     peter 		w = w >> 1;
    285  1.21     peter 	rp->red_wshift = i;
    286  1.21     peter 	w = 1 << rp->red_wshift;
    287  1.21     peter 	if (w != rp->red_weight) {
    288  1.21     peter 		printf("invalid weight value %d for red! use %d\n",
    289  1.21     peter 		       rp->red_weight, w);
    290  1.21     peter 		rp->red_weight = w;
    291   1.1   thorpej 	}
    292   1.1   thorpej 
    293  1.21     peter 	/*
    294  1.21     peter 	 * thmin_s and thmax_s are scaled versions of th_min and th_max
    295  1.21     peter 	 * to be compared with avg.
    296  1.21     peter 	 */
    297  1.21     peter 	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
    298  1.21     peter 	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
    299  1.21     peter 
    300  1.21     peter 	/*
    301  1.21     peter 	 * precompute probability denominator
    302  1.21     peter 	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
    303  1.21     peter 	 */
    304  1.21     peter 	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
    305  1.21     peter 			 * rp->red_inv_pmax) << FP_SHIFT;
    306  1.21     peter 
    307  1.21     peter 	/* allocate weight table */
    308  1.21     peter 	rp->red_wtab = wtab_alloc(rp->red_weight);
    309  1.21     peter 
    310  1.21     peter 	microtime(&rp->red_last);
    311  1.21     peter #ifdef ALTQ3_COMPAT
    312  1.21     peter #ifdef ALTQ_FLOWVALVE
    313  1.21     peter 	if (flags & REDF_FLOWVALVE)
    314  1.21     peter 		rp->red_flowvalve = fv_alloc(rp);
    315  1.21     peter 	/* if fv_alloc failes, flowvalve is just disabled */
    316  1.21     peter #endif
    317  1.21     peter #endif /* ALTQ3_COMPAT */
    318  1.21     peter 	return (rp);
    319   1.1   thorpej }
    320   1.1   thorpej 
    321  1.21     peter void
    322  1.21     peter red_destroy(red_t *rp)
    323   1.1   thorpej {
    324  1.21     peter #ifdef ALTQ3_COMPAT
    325  1.21     peter #ifdef ALTQ_FLOWVALVE
    326  1.21     peter 	if (rp->red_flowvalve != NULL)
    327  1.21     peter 		fv_destroy(rp->red_flowvalve);
    328   1.1   thorpej #endif
    329  1.21     peter #endif /* ALTQ3_COMPAT */
    330  1.21     peter 	wtab_destroy(rp->red_wtab);
    331  1.21     peter 	free(rp, M_DEVBUF);
    332  1.21     peter }
    333  1.11     perry 
    334  1.21     peter void
    335  1.21     peter red_getstats(red_t *rp, struct redstats *sp)
    336  1.21     peter {
    337  1.21     peter 	sp->q_avg		= rp->red_avg >> rp->red_wshift;
    338  1.21     peter 	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
    339  1.21     peter 	sp->drop_cnt		= rp->red_stats.drop_cnt;
    340  1.21     peter 	sp->drop_forced		= rp->red_stats.drop_forced;
    341  1.21     peter 	sp->drop_unforced	= rp->red_stats.drop_unforced;
    342  1.21     peter 	sp->marked_packets	= rp->red_stats.marked_packets;
    343  1.21     peter }
    344   1.1   thorpej 
    345  1.21     peter int
    346  1.21     peter red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
    347  1.21     peter     struct altq_pktattr *pktattr)
    348  1.21     peter {
    349  1.21     peter 	int avg, droptype;
    350  1.21     peter 	int n;
    351  1.21     peter #ifdef ALTQ3_COMPAT
    352  1.21     peter #ifdef ALTQ_FLOWVALVE
    353  1.21     peter 	struct fve *fve = NULL;
    354   1.1   thorpej 
    355  1.21     peter 	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
    356  1.21     peter 		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
    357  1.21     peter 			m_freem(m);
    358  1.21     peter 			return (-1);
    359   1.1   thorpej 		}
    360  1.21     peter #endif
    361  1.21     peter #endif /* ALTQ3_COMPAT */
    362   1.1   thorpej 
    363  1.21     peter 	avg = rp->red_avg;
    364   1.1   thorpej 
    365  1.21     peter 	/*
    366  1.21     peter 	 * if we were idle, we pretend that n packets arrived during
    367  1.21     peter 	 * the idle period.
    368  1.21     peter 	 */
    369  1.21     peter 	if (rp->red_idle) {
    370  1.21     peter 		struct timeval now;
    371  1.21     peter 		int t;
    372   1.1   thorpej 
    373  1.21     peter 		rp->red_idle = 0;
    374  1.21     peter 		microtime(&now);
    375  1.21     peter 		t = (now.tv_sec - rp->red_last.tv_sec);
    376  1.21     peter 		if (t > 60) {
    377  1.21     peter 			/*
    378  1.21     peter 			 * being idle for more than 1 minute, set avg to zero.
    379  1.21     peter 			 * this prevents t from overflow.
    380  1.21     peter 			 */
    381  1.21     peter 			avg = 0;
    382  1.21     peter 		} else {
    383  1.21     peter 			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
    384  1.21     peter 			n = t / rp->red_pkttime - 1;
    385   1.1   thorpej 
    386  1.21     peter 			/* the following line does (avg = (1 - Wq)^n * avg) */
    387  1.21     peter 			if (n > 0)
    388  1.21     peter 				avg = (avg >> FP_SHIFT) *
    389  1.21     peter 				    pow_w(rp->red_wtab, n);
    390   1.1   thorpej 		}
    391  1.21     peter 	}
    392   1.1   thorpej 
    393  1.21     peter 	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
    394  1.21     peter 	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
    395  1.21     peter 	rp->red_avg = avg;		/* save the new value */
    396   1.1   thorpej 
    397  1.21     peter 	/*
    398  1.21     peter 	 * red_count keeps a tally of arriving traffic that has not
    399  1.21     peter 	 * been dropped.
    400  1.21     peter 	 */
    401  1.21     peter 	rp->red_count++;
    402   1.1   thorpej 
    403  1.21     peter 	/* see if we drop early */
    404  1.21     peter 	droptype = DTYPE_NODROP;
    405  1.21     peter 	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
    406  1.21     peter 		if (avg >= rp->red_thmax_s) {
    407  1.21     peter 			/* avg >= th_max: forced drop */
    408  1.21     peter 			droptype = DTYPE_FORCED;
    409  1.21     peter 		} else if (rp->red_old == 0) {
    410  1.21     peter 			/* first exceeds th_min */
    411  1.21     peter 			rp->red_count = 1;
    412  1.21     peter 			rp->red_old = 1;
    413  1.21     peter 		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
    414  1.21     peter 				      rp->red_probd, rp->red_count)) {
    415  1.21     peter 			/* mark or drop by red */
    416  1.21     peter 			if ((rp->red_flags & REDF_ECN) &&
    417  1.21     peter 			    mark_ecn(m, pktattr, rp->red_flags)) {
    418  1.21     peter 				/* successfully marked.  do not drop. */
    419  1.21     peter 				rp->red_count = 0;
    420  1.21     peter #ifdef RED_STATS
    421  1.21     peter 				rp->red_stats.marked_packets++;
    422  1.21     peter #endif
    423  1.21     peter 			} else {
    424  1.21     peter 				/* unforced drop by red */
    425  1.21     peter 				droptype = DTYPE_EARLY;
    426  1.21     peter 			}
    427   1.1   thorpej 		}
    428  1.21     peter 	} else {
    429  1.21     peter 		/* avg < th_min */
    430  1.21     peter 		rp->red_old = 0;
    431  1.21     peter 	}
    432   1.1   thorpej 
    433  1.21     peter 	/*
    434  1.21     peter 	 * if the queue length hits the hard limit, it's a forced drop.
    435  1.21     peter 	 */
    436  1.21     peter 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
    437  1.21     peter 		droptype = DTYPE_FORCED;
    438   1.1   thorpej 
    439  1.21     peter #ifdef RED_RANDOM_DROP
    440  1.21     peter 	/* if successful or forced drop, enqueue this packet. */
    441  1.21     peter 	if (droptype != DTYPE_EARLY)
    442  1.21     peter 		_addq(q, m);
    443  1.21     peter #else
    444  1.21     peter 	/* if successful, enqueue this packet. */
    445  1.21     peter 	if (droptype == DTYPE_NODROP)
    446  1.21     peter 		_addq(q, m);
    447  1.21     peter #endif
    448  1.21     peter 	if (droptype != DTYPE_NODROP) {
    449  1.21     peter 		if (droptype == DTYPE_EARLY) {
    450  1.21     peter 			/* drop the incoming packet */
    451  1.21     peter #ifdef RED_STATS
    452  1.21     peter 			rp->red_stats.drop_unforced++;
    453  1.21     peter #endif
    454  1.21     peter 		} else {
    455  1.21     peter 			/* forced drop, select a victim packet in the queue. */
    456  1.21     peter #ifdef RED_RANDOM_DROP
    457  1.21     peter 			m = _getq_random(q);
    458  1.21     peter #endif
    459  1.21     peter #ifdef RED_STATS
    460  1.21     peter 			rp->red_stats.drop_forced++;
    461  1.21     peter #endif
    462  1.21     peter 		}
    463  1.21     peter #ifdef RED_STATS
    464  1.21     peter 		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
    465  1.21     peter #endif
    466  1.21     peter 		rp->red_count = 0;
    467  1.21     peter #ifdef ALTQ3_COMPAT
    468   1.1   thorpej #ifdef ALTQ_FLOWVALVE
    469  1.21     peter 		if (rp->red_flowvalve != NULL)
    470  1.21     peter 			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
    471  1.21     peter #endif
    472  1.21     peter #endif /* ALTQ3_COMPAT */
    473  1.21     peter 		m_freem(m);
    474  1.21     peter 		return (-1);
    475   1.1   thorpej 	}
    476  1.21     peter 	/* successfully queued */
    477  1.21     peter #ifdef RED_STATS
    478  1.21     peter 	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
    479  1.21     peter #endif
    480  1.21     peter 	return (0);
    481   1.1   thorpej }
    482   1.1   thorpej 
    483  1.21     peter /*
    484  1.21     peter  * early-drop probability is calculated as follows:
    485  1.21     peter  *   prob = p_max * (avg - th_min) / (th_max - th_min)
    486  1.21     peter  *   prob_a = prob / (2 - count*prob)
    487  1.21     peter  *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
    488  1.21     peter  * here prob_a increases as successive undrop count increases.
    489  1.21     peter  * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
    490  1.21     peter  * becomes 1 when (count >= (2 / prob))).
    491  1.21     peter  */
    492  1.21     peter int
    493  1.21     peter drop_early(int fp_len, int fp_probd, int count)
    494   1.1   thorpej {
    495  1.21     peter 	int	d;		/* denominator of drop-probability */
    496   1.1   thorpej 
    497  1.21     peter 	d = fp_probd - count * fp_len;
    498  1.21     peter 	if (d <= 0)
    499  1.21     peter 		/* count exceeds the hard limit: drop or mark */
    500  1.21     peter 		return (1);
    501   1.1   thorpej 
    502  1.21     peter 	/*
    503  1.21     peter 	 * now the range of d is [1..600] in fixed-point. (when
    504  1.21     peter 	 * th_max-th_min=10 and p_max=1/30)
    505  1.21     peter 	 * drop probability = (avg - TH_MIN) / d
    506  1.21     peter 	 */
    507   1.1   thorpej 
    508  1.21     peter 	if ((arc4random() % d) < fp_len) {
    509  1.21     peter 		/* drop or mark */
    510  1.21     peter 		return (1);
    511   1.1   thorpej 	}
    512  1.21     peter 	/* no drop/mark */
    513  1.21     peter 	return (0);
    514   1.1   thorpej }
    515   1.1   thorpej 
    516   1.1   thorpej /*
    517  1.21     peter  * try to mark CE bit to the packet.
    518  1.21     peter  *    returns 1 if successfully marked, 0 otherwise.
    519   1.1   thorpej  */
    520  1.21     peter int
    521  1.21     peter mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
    522  1.21     peter {
    523  1.21     peter 	struct mbuf	*m0;
    524  1.21     peter 	struct m_tag	*t;
    525  1.21     peter 	struct altq_tag	*at;
    526  1.21     peter 	void		*hdr;
    527  1.21     peter 	int		 af;
    528  1.21     peter 
    529  1.28      yamt 	t = m_tag_find(m, PACKET_TAG_ALTQ_QID, NULL);
    530  1.21     peter 	if (t != NULL) {
    531  1.21     peter 		at = (struct altq_tag *)(t + 1);
    532  1.21     peter 		if (at == NULL)
    533  1.21     peter 			return (0);
    534  1.21     peter 		af = at->af;
    535  1.21     peter 		hdr = at->hdr;
    536  1.21     peter #ifdef ALTQ3_COMPAT
    537  1.21     peter 	} else if (pktattr != NULL) {
    538  1.21     peter 		af = pktattr->pattr_af;
    539  1.21     peter 		hdr = pktattr->pattr_hdr;
    540  1.21     peter #endif /* ALTQ3_COMPAT */
    541  1.21     peter 	} else
    542  1.21     peter 		return (0);
    543  1.21     peter 
    544  1.21     peter 	if (af != AF_INET && af != AF_INET6)
    545  1.21     peter 		return (0);
    546  1.21     peter 
    547  1.21     peter 	/* verify that pattr_hdr is within the mbuf data */
    548  1.21     peter 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
    549  1.25  christos 		if (((char *)hdr >= m0->m_data) &&
    550  1.25  christos 		    ((char *)hdr < m0->m_data + m0->m_len))
    551  1.21     peter 			break;
    552  1.21     peter 	if (m0 == NULL) {
    553  1.21     peter 		/* ick, tag info is stale */
    554  1.21     peter 		return (0);
    555  1.21     peter 	}
    556   1.1   thorpej 
    557  1.21     peter 	switch (af) {
    558  1.21     peter 	case AF_INET:
    559  1.21     peter 		if (flags & REDF_ECN4) {
    560  1.21     peter 			struct ip *ip = hdr;
    561  1.21     peter 			u_int8_t otos;
    562  1.21     peter 			int sum;
    563  1.11     perry 
    564  1.21     peter 			if (ip->ip_v != 4)
    565  1.21     peter 				return (0);	/* version mismatch! */
    566   1.1   thorpej 
    567  1.21     peter 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
    568  1.21     peter 				return (0);	/* not-ECT */
    569  1.21     peter 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
    570  1.21     peter 				return (1);	/* already marked */
    571   1.1   thorpej 
    572  1.21     peter 			/*
    573  1.21     peter 			 * ecn-capable but not marked,
    574  1.21     peter 			 * mark CE and update checksum
    575  1.21     peter 			 */
    576  1.21     peter 			otos = ip->ip_tos;
    577  1.21     peter 			ip->ip_tos |= IPTOS_ECN_CE;
    578  1.21     peter 			/*
    579  1.21     peter 			 * update checksum (from RFC1624)
    580  1.21     peter 			 *	   HC' = ~(~HC + ~m + m')
    581  1.21     peter 			 */
    582  1.21     peter 			sum = ~ntohs(ip->ip_sum) & 0xffff;
    583  1.21     peter 			sum += (~otos & 0xffff) + ip->ip_tos;
    584  1.21     peter 			sum = (sum >> 16) + (sum & 0xffff);
    585  1.21     peter 			sum += (sum >> 16);  /* add carry */
    586  1.21     peter 			ip->ip_sum = htons(~sum & 0xffff);
    587  1.21     peter 			return (1);
    588  1.21     peter 		}
    589  1.21     peter 		break;
    590  1.21     peter #ifdef INET6
    591  1.21     peter 	case AF_INET6:
    592  1.21     peter 		if (flags & REDF_ECN6) {
    593  1.21     peter 			struct ip6_hdr *ip6 = hdr;
    594  1.21     peter 			u_int32_t flowlabel;
    595  1.21     peter 
    596  1.21     peter 			flowlabel = ntohl(ip6->ip6_flow);
    597  1.21     peter 			if ((flowlabel >> 28) != 6)
    598  1.21     peter 				return (0);	/* version mismatch! */
    599  1.21     peter 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
    600  1.21     peter 			    (IPTOS_ECN_NOTECT << 20))
    601  1.21     peter 				return (0);	/* not-ECT */
    602  1.21     peter 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
    603  1.21     peter 			    (IPTOS_ECN_CE << 20))
    604  1.21     peter 				return (1);	/* already marked */
    605  1.21     peter 			/*
    606  1.21     peter 			 * ecn-capable but not marked,  mark CE
    607  1.21     peter 			 */
    608  1.21     peter 			flowlabel |= (IPTOS_ECN_CE << 20);
    609  1.21     peter 			ip6->ip6_flow = htonl(flowlabel);
    610  1.21     peter 			return (1);
    611  1.21     peter 		}
    612  1.21     peter 		break;
    613  1.21     peter #endif  /* INET6 */
    614  1.21     peter 	}
    615   1.1   thorpej 
    616  1.21     peter 	/* not marked */
    617  1.21     peter 	return (0);
    618  1.21     peter }
    619   1.1   thorpej 
    620  1.21     peter struct mbuf *
    621  1.21     peter red_getq(red_t *rp, class_queue_t *q)
    622  1.21     peter {
    623  1.21     peter 	struct mbuf *m;
    624   1.1   thorpej 
    625  1.21     peter 	if ((m = _getq(q)) == NULL) {
    626  1.21     peter 		if (rp->red_idle == 0) {
    627  1.21     peter 			rp->red_idle = 1;
    628  1.21     peter 			microtime(&rp->red_last);
    629   1.1   thorpej 		}
    630  1.21     peter 		return NULL;
    631   1.1   thorpej 	}
    632   1.1   thorpej 
    633  1.21     peter 	rp->red_idle = 0;
    634  1.21     peter 	return (m);
    635  1.21     peter }
    636  1.21     peter 
    637  1.21     peter /*
    638  1.21     peter  * helper routine to calibrate avg during idle.
    639  1.21     peter  * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
    640  1.21     peter  * here Wq = 1/weight and the code assumes Wq is close to zero.
    641  1.21     peter  *
    642  1.21     peter  * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
    643  1.21     peter  */
    644  1.21     peter static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
    645  1.21     peter 
    646  1.21     peter struct wtab *
    647  1.21     peter wtab_alloc(int weight)
    648  1.21     peter {
    649  1.21     peter 	struct wtab	*w;
    650  1.21     peter 	int		 i;
    651  1.11     perry 
    652  1.21     peter 	for (w = wtab_list; w != NULL; w = w->w_next)
    653  1.21     peter 		if (w->w_weight == weight) {
    654  1.21     peter 			w->w_refcount++;
    655  1.21     peter 			return (w);
    656  1.21     peter 		}
    657   1.1   thorpej 
    658  1.21     peter 	w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
    659  1.21     peter 	if (w == NULL)
    660  1.21     peter 		panic("wtab_alloc: malloc failed!");
    661  1.21     peter 	w->w_weight = weight;
    662  1.21     peter 	w->w_refcount = 1;
    663  1.21     peter 	w->w_next = wtab_list;
    664  1.21     peter 	wtab_list = w;
    665   1.1   thorpej 
    666  1.21     peter 	/* initialize the weight table */
    667  1.21     peter 	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
    668  1.21     peter 	for (i = 1; i < 32; i++) {
    669  1.21     peter 		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
    670  1.21     peter 		if (w->w_tab[i] == 0 && w->w_param_max == 0)
    671  1.21     peter 			w->w_param_max = 1 << i;
    672  1.21     peter 	}
    673   1.1   thorpej 
    674  1.21     peter 	return (w);
    675   1.1   thorpej }
    676   1.1   thorpej 
    677  1.21     peter int
    678  1.21     peter wtab_destroy(struct wtab *w)
    679   1.1   thorpej {
    680  1.21     peter 	struct wtab	*prev;
    681  1.21     peter 
    682  1.21     peter 	if (--w->w_refcount > 0)
    683  1.21     peter 		return (0);
    684  1.21     peter 
    685  1.21     peter 	if (wtab_list == w)
    686  1.21     peter 		wtab_list = w->w_next;
    687  1.21     peter 	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
    688  1.21     peter 		if (prev->w_next == w) {
    689  1.21     peter 			prev->w_next = w->w_next;
    690  1.21     peter 			break;
    691  1.21     peter 		}
    692  1.21     peter 
    693  1.21     peter 	free(w, M_DEVBUF);
    694  1.21     peter 	return (0);
    695   1.1   thorpej }
    696   1.1   thorpej 
    697  1.21     peter int32_t
    698  1.21     peter pow_w(struct wtab *w, int n)
    699   1.1   thorpej {
    700  1.21     peter 	int	i, bit;
    701  1.21     peter 	int32_t	val;
    702  1.21     peter 
    703  1.21     peter 	if (n >= w->w_param_max)
    704  1.21     peter 		return (0);
    705  1.21     peter 
    706  1.21     peter 	val = 1 << FP_SHIFT;
    707  1.21     peter 	if (n <= 0)
    708  1.21     peter 		return (val);
    709  1.21     peter 
    710  1.21     peter 	bit = 1;
    711  1.21     peter 	i = 0;
    712  1.21     peter 	while (n) {
    713  1.21     peter 		if (n & bit) {
    714  1.21     peter 			val = (val * w->w_tab[i]) >> FP_SHIFT;
    715  1.21     peter 			n &= ~bit;
    716  1.21     peter 		}
    717  1.21     peter 		i++;
    718  1.21     peter 		bit <<=  1;
    719  1.21     peter 	}
    720  1.21     peter 	return (val);
    721   1.1   thorpej }
    722   1.1   thorpej 
    723  1.21     peter #ifdef ALTQ3_COMPAT
    724   1.1   thorpej /*
    725  1.21     peter  * red device interface
    726   1.1   thorpej  */
    727  1.21     peter altqdev_decl(red);
    728  1.21     peter 
    729  1.21     peter int
    730  1.24  christos redopen(dev_t dev, int flag, int fmt,
    731  1.24  christos     struct lwp *l)
    732   1.1   thorpej {
    733  1.21     peter 	/* everything will be done when the queueing scheme is attached. */
    734   1.1   thorpej 	return 0;
    735   1.1   thorpej }
    736   1.1   thorpej 
    737   1.1   thorpej int
    738  1.24  christos redclose(dev_t dev, int flag, int fmt,
    739  1.24  christos     struct lwp *l)
    740   1.1   thorpej {
    741  1.21     peter 	red_queue_t *rqp;
    742  1.21     peter 	int err, error = 0;
    743  1.21     peter 
    744  1.21     peter 	while ((rqp = red_list) != NULL) {
    745  1.21     peter 		/* destroy all */
    746  1.21     peter 		err = red_detach(rqp);
    747  1.21     peter 		if (err != 0 && error == 0)
    748  1.21     peter 			error = err;
    749  1.21     peter 	}
    750   1.1   thorpej 
    751  1.21     peter 	return error;
    752  1.21     peter }
    753   1.1   thorpej 
    754  1.21     peter int
    755  1.25  christos redioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
    756  1.21     peter     struct lwp *l)
    757  1.21     peter {
    758  1.21     peter 	red_queue_t *rqp;
    759  1.21     peter 	struct red_interface *ifacep;
    760  1.21     peter 	struct ifnet *ifp;
    761  1.21     peter 	struct proc *p = l->l_proc;
    762  1.21     peter 	int	error = 0;
    763   1.1   thorpej 
    764  1.21     peter 	/* check super-user privilege */
    765  1.21     peter 	switch (cmd) {
    766  1.21     peter 	case RED_GETSTATS:
    767  1.21     peter 		break;
    768  1.21     peter 	default:
    769  1.21     peter #if (__FreeBSD_version > 400000)
    770  1.21     peter 		if ((error = suser(p)) != 0)
    771  1.21     peter #else
    772  1.23      elad 		if ((error = kauth_authorize_network(p->p_cred,
    773  1.23      elad 		    KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RED, NULL,
    774  1.23      elad 		    NULL, NULL)) != 0)
    775  1.21     peter #endif
    776  1.21     peter 			return (error);
    777  1.21     peter 		break;
    778  1.21     peter 	}
    779  1.21     peter 
    780  1.21     peter 	switch (cmd) {
    781  1.21     peter 
    782  1.21     peter 	case RED_ENABLE:
    783  1.21     peter 		ifacep = (struct red_interface *)addr;
    784  1.21     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    785  1.21     peter 			error = EBADF;
    786  1.21     peter 			break;
    787  1.21     peter 		}
    788  1.21     peter 		error = altq_enable(rqp->rq_ifq);
    789  1.21     peter 		break;
    790  1.21     peter 
    791  1.21     peter 	case RED_DISABLE:
    792  1.21     peter 		ifacep = (struct red_interface *)addr;
    793  1.21     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    794  1.21     peter 			error = EBADF;
    795  1.21     peter 			break;
    796  1.21     peter 		}
    797  1.21     peter 		error = altq_disable(rqp->rq_ifq);
    798  1.21     peter 		break;
    799  1.21     peter 
    800  1.21     peter 	case RED_IF_ATTACH:
    801  1.21     peter 		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
    802  1.21     peter 		if (ifp == NULL) {
    803  1.21     peter 			error = ENXIO;
    804  1.21     peter 			break;
    805  1.21     peter 		}
    806  1.21     peter 
    807  1.21     peter 		/* allocate and initialize red_queue_t */
    808  1.21     peter 		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
    809  1.21     peter 		if (rqp == NULL) {
    810  1.21     peter 			error = ENOMEM;
    811  1.21     peter 			break;
    812  1.21     peter 		}
    813  1.21     peter 
    814  1.21     peter 		rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
    815  1.21     peter 		    M_WAITOK|M_ZERO);
    816  1.21     peter 		if (rqp->rq_q == NULL) {
    817  1.21     peter 			free(rqp, M_DEVBUF);
    818  1.21     peter 			error = ENOMEM;
    819  1.21     peter 			break;
    820  1.21     peter 		}
    821  1.21     peter 
    822  1.21     peter 		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
    823  1.21     peter 		if (rqp->rq_red == NULL) {
    824  1.21     peter 			free(rqp->rq_q, M_DEVBUF);
    825  1.21     peter 			free(rqp, M_DEVBUF);
    826  1.21     peter 			error = ENOMEM;
    827  1.21     peter 			break;
    828  1.21     peter 		}
    829  1.21     peter 
    830  1.21     peter 		rqp->rq_ifq = &ifp->if_snd;
    831  1.21     peter 		qtail(rqp->rq_q) = NULL;
    832  1.21     peter 		qlen(rqp->rq_q) = 0;
    833  1.21     peter 		qlimit(rqp->rq_q) = RED_LIMIT;
    834  1.21     peter 		qtype(rqp->rq_q) = Q_RED;
    835  1.21     peter 
    836  1.21     peter 		/*
    837  1.21     peter 		 * set RED to this ifnet structure.
    838  1.21     peter 		 */
    839  1.21     peter 		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
    840  1.21     peter 				    red_enqueue, red_dequeue, red_request,
    841  1.21     peter 				    NULL, NULL);
    842  1.21     peter 		if (error) {
    843  1.21     peter 			red_destroy(rqp->rq_red);
    844  1.21     peter 			free(rqp->rq_q, M_DEVBUF);
    845  1.21     peter 			free(rqp, M_DEVBUF);
    846  1.21     peter 			break;
    847  1.21     peter 		}
    848   1.1   thorpej 
    849  1.21     peter 		/* add this state to the red list */
    850  1.21     peter 		rqp->rq_next = red_list;
    851  1.21     peter 		red_list = rqp;
    852  1.21     peter 		break;
    853   1.1   thorpej 
    854  1.21     peter 	case RED_IF_DETACH:
    855  1.21     peter 		ifacep = (struct red_interface *)addr;
    856  1.21     peter 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
    857  1.21     peter 			error = EBADF;
    858  1.21     peter 			break;
    859   1.1   thorpej 		}
    860  1.21     peter 		error = red_detach(rqp);
    861  1.21     peter 		break;
    862  1.21     peter 
    863  1.21     peter 	case RED_GETSTATS:
    864  1.21     peter 		do {
    865  1.21     peter 			struct red_stats *q_stats;
    866  1.21     peter 			red_t *rp;
    867  1.21     peter 
    868  1.21     peter 			q_stats = (struct red_stats *)addr;
    869  1.21     peter 			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
    870  1.21     peter 					     ALTQT_RED)) == NULL) {
    871  1.21     peter 				error = EBADF;
    872  1.21     peter 				break;
    873  1.21     peter 			}
    874  1.21     peter 
    875  1.21     peter 			q_stats->q_len 	   = qlen(rqp->rq_q);
    876  1.21     peter 			q_stats->q_limit   = qlimit(rqp->rq_q);
    877   1.1   thorpej 
    878  1.21     peter 			rp = rqp->rq_red;
    879  1.21     peter 			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
    880  1.21     peter 			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
    881  1.21     peter 			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
    882  1.21     peter 			q_stats->drop_forced   = rp->red_stats.drop_forced;
    883  1.21     peter 			q_stats->drop_unforced = rp->red_stats.drop_unforced;
    884  1.21     peter 			q_stats->marked_packets = rp->red_stats.marked_packets;
    885   1.1   thorpej 
    886  1.21     peter 			q_stats->weight		= rp->red_weight;
    887  1.21     peter 			q_stats->inv_pmax	= rp->red_inv_pmax;
    888  1.21     peter 			q_stats->th_min		= rp->red_thmin;
    889  1.21     peter 			q_stats->th_max		= rp->red_thmax;
    890   1.1   thorpej 
    891  1.21     peter #ifdef ALTQ_FLOWVALVE
    892  1.21     peter 			if (rp->red_flowvalve != NULL) {
    893  1.21     peter 				struct flowvalve *fv = rp->red_flowvalve;
    894  1.21     peter 				q_stats->fv_flows    = fv->fv_flows;
    895  1.21     peter 				q_stats->fv_pass     = fv->fv_stats.pass;
    896  1.21     peter 				q_stats->fv_predrop  = fv->fv_stats.predrop;
    897  1.21     peter 				q_stats->fv_alloc    = fv->fv_stats.alloc;
    898  1.21     peter 				q_stats->fv_escape   = fv->fv_stats.escape;
    899  1.11     perry 			} else {
    900  1.21     peter #endif /* ALTQ_FLOWVALVE */
    901  1.21     peter 				q_stats->fv_flows    = 0;
    902  1.21     peter 				q_stats->fv_pass     = 0;
    903  1.21     peter 				q_stats->fv_predrop  = 0;
    904  1.21     peter 				q_stats->fv_alloc    = 0;
    905  1.21     peter 				q_stats->fv_escape   = 0;
    906  1.21     peter #ifdef ALTQ_FLOWVALVE
    907  1.21     peter 			}
    908  1.21     peter #endif /* ALTQ_FLOWVALVE */
    909  1.21     peter 		} while (/*CONSTCOND*/ 0);
    910  1.21     peter 		break;
    911  1.21     peter 
    912  1.21     peter 	case RED_CONFIG:
    913  1.21     peter 		do {
    914  1.21     peter 			struct red_conf *fc;
    915  1.21     peter 			red_t *new;
    916  1.21     peter 			int s, limit;
    917  1.21     peter 
    918  1.21     peter 			fc = (struct red_conf *)addr;
    919  1.21     peter 			if ((rqp = altq_lookup(fc->iface.red_ifname,
    920  1.21     peter 					       ALTQT_RED)) == NULL) {
    921  1.21     peter 				error = EBADF;
    922  1.21     peter 				break;
    923  1.21     peter 			}
    924  1.21     peter 			new = red_alloc(fc->red_weight,
    925  1.21     peter 					fc->red_inv_pmax,
    926  1.21     peter 					fc->red_thmin,
    927  1.21     peter 					fc->red_thmax,
    928  1.21     peter 					fc->red_flags,
    929  1.21     peter 					fc->red_pkttime);
    930  1.21     peter 			if (new == NULL) {
    931  1.21     peter 				error = ENOMEM;
    932  1.21     peter 				break;
    933   1.1   thorpej 			}
    934   1.1   thorpej 
    935  1.21     peter 			s = splnet();
    936  1.21     peter 			red_purgeq(rqp);
    937  1.21     peter 			limit = fc->red_limit;
    938  1.21     peter 			if (limit < fc->red_thmax)
    939  1.21     peter 				limit = fc->red_thmax;
    940  1.21     peter 			qlimit(rqp->rq_q) = limit;
    941  1.21     peter 			fc->red_limit = limit;	/* write back the new value */
    942  1.21     peter 
    943  1.21     peter 			red_destroy(rqp->rq_red);
    944  1.21     peter 			rqp->rq_red = new;
    945  1.21     peter 
    946  1.21     peter 			splx(s);
    947  1.21     peter 
    948  1.21     peter 			/* write back new values */
    949  1.21     peter 			fc->red_limit = limit;
    950  1.21     peter 			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
    951  1.21     peter 			fc->red_thmin = rqp->rq_red->red_thmin;
    952  1.21     peter 			fc->red_thmax = rqp->rq_red->red_thmax;
    953  1.21     peter 
    954  1.21     peter 		} while (/*CONSTCOND*/ 0);
    955  1.21     peter 		break;
    956  1.21     peter 
    957  1.21     peter 	case RED_SETDEFAULTS:
    958  1.21     peter 		do {
    959  1.21     peter 			struct redparams *rp;
    960  1.21     peter 
    961  1.21     peter 			rp = (struct redparams *)addr;
    962  1.21     peter 
    963  1.21     peter 			default_th_min = rp->th_min;
    964  1.21     peter 			default_th_max = rp->th_max;
    965  1.21     peter 			default_inv_pmax = rp->inv_pmax;
    966  1.21     peter 		} while (/*CONSTCOND*/ 0);
    967  1.21     peter 		break;
    968   1.1   thorpej 
    969  1.21     peter 	default:
    970  1.21     peter 		error = EINVAL;
    971  1.21     peter 		break;
    972   1.1   thorpej 	}
    973  1.21     peter 	return error;
    974   1.1   thorpej }
    975   1.1   thorpej 
    976  1.21     peter static int
    977  1.21     peter red_detach(red_queue_t *rqp)
    978   1.1   thorpej {
    979  1.21     peter 	red_queue_t *tmp;
    980  1.21     peter 	int error = 0;
    981   1.1   thorpej 
    982  1.21     peter 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
    983  1.21     peter 		altq_disable(rqp->rq_ifq);
    984   1.1   thorpej 
    985  1.21     peter 	if ((error = altq_detach(rqp->rq_ifq)))
    986  1.21     peter 		return (error);
    987   1.1   thorpej 
    988  1.21     peter 	if (red_list == rqp)
    989  1.21     peter 		red_list = rqp->rq_next;
    990  1.21     peter 	else {
    991  1.21     peter 		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
    992  1.21     peter 			if (tmp->rq_next == rqp) {
    993  1.21     peter 				tmp->rq_next = rqp->rq_next;
    994  1.21     peter 				break;
    995  1.21     peter 			}
    996  1.21     peter 		if (tmp == NULL)
    997  1.21     peter 			printf("red_detach: no state found in red_list!\n");
    998   1.1   thorpej 	}
    999  1.21     peter 
   1000  1.21     peter 	red_destroy(rqp->rq_red);
   1001  1.21     peter 	free(rqp->rq_q, M_DEVBUF);
   1002  1.21     peter 	free(rqp, M_DEVBUF);
   1003  1.21     peter 	return (error);
   1004   1.1   thorpej }
   1005   1.1   thorpej 
   1006   1.1   thorpej /*
   1007  1.21     peter  * enqueue routine:
   1008  1.21     peter  *
   1009  1.21     peter  *	returns: 0 when successfully queued.
   1010  1.21     peter  *		 ENOBUFS when drop occurs.
   1011   1.1   thorpej  */
   1012  1.21     peter static int
   1013  1.21     peter red_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
   1014   1.1   thorpej {
   1015  1.21     peter 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1016   1.1   thorpej 
   1017  1.21     peter 	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
   1018  1.21     peter 		return ENOBUFS;
   1019  1.21     peter 	ifq->ifq_len++;
   1020  1.21     peter 	return 0;
   1021   1.1   thorpej }
   1022   1.1   thorpej 
   1023   1.1   thorpej /*
   1024   1.1   thorpej  * dequeue routine:
   1025   1.3   thorpej  *	must be called in splnet.
   1026   1.1   thorpej  *
   1027   1.1   thorpej  *	returns: mbuf dequeued.
   1028   1.1   thorpej  *		 NULL when no packet is available in the queue.
   1029   1.1   thorpej  */
   1030   1.1   thorpej 
   1031   1.1   thorpej static struct mbuf *
   1032  1.21     peter red_dequeue(struct ifaltq *ifq, int op)
   1033   1.1   thorpej {
   1034   1.1   thorpej 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1035   1.1   thorpej 	struct mbuf *m;
   1036   1.1   thorpej 
   1037   1.1   thorpej 	if (op == ALTDQ_POLL)
   1038   1.1   thorpej 		return qhead(rqp->rq_q);
   1039   1.1   thorpej 
   1040   1.1   thorpej 	/* op == ALTDQ_REMOVE */
   1041   1.1   thorpej 	m =  red_getq(rqp->rq_red, rqp->rq_q);
   1042   1.1   thorpej 	if (m != NULL)
   1043   1.1   thorpej 		ifq->ifq_len--;
   1044   1.1   thorpej 	return (m);
   1045   1.1   thorpej }
   1046   1.1   thorpej 
   1047   1.1   thorpej static int
   1048  1.24  christos red_request(struct ifaltq *ifq, int req, void *arg)
   1049   1.1   thorpej {
   1050   1.1   thorpej 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
   1051   1.1   thorpej 
   1052   1.1   thorpej 	switch (req) {
   1053   1.1   thorpej 	case ALTRQ_PURGE:
   1054   1.1   thorpej 		red_purgeq(rqp);
   1055   1.1   thorpej 		break;
   1056   1.1   thorpej 	}
   1057   1.1   thorpej 	return (0);
   1058   1.1   thorpej }
   1059   1.1   thorpej 
   1060   1.1   thorpej static void
   1061  1.21     peter red_purgeq(red_queue_t *rqp)
   1062   1.1   thorpej {
   1063   1.1   thorpej 	_flushq(rqp->rq_q);
   1064   1.1   thorpej 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
   1065   1.1   thorpej 		rqp->rq_ifq->ifq_len = 0;
   1066   1.1   thorpej }
   1067   1.1   thorpej 
   1068   1.1   thorpej #ifdef ALTQ_FLOWVALVE
   1069   1.1   thorpej 
   1070   1.1   thorpej #define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
   1071   1.1   thorpej #define	FV_PSCALE(x)	((x) << FV_PSHIFT)
   1072   1.1   thorpej #define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
   1073   1.1   thorpej #define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
   1074   1.1   thorpej #define	FV_FSCALE(x)	((x) << FV_FSHIFT)
   1075   1.1   thorpej #define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
   1076   1.1   thorpej 
   1077   1.1   thorpej #define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
   1078   1.1   thorpej #define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
   1079   1.1   thorpej 
   1080   1.1   thorpej #define	FV_N			10	/* update fve_f every FV_N packets */
   1081   1.1   thorpej 
   1082   1.1   thorpej #define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
   1083  1.11     perry #define	FV_TTHRESH		3  /* time threshold to delete fve */
   1084   1.1   thorpej #define	FV_ALPHA		5  /* extra packet count */
   1085   1.1   thorpej 
   1086  1.21     peter #define	FV_STATS
   1087  1.21     peter 
   1088   1.1   thorpej #define	FV_TIMESTAMP(tp)	getmicrotime(tp)
   1089   1.1   thorpej 
   1090   1.1   thorpej /*
   1091   1.1   thorpej  * Brtt table: 127 entry table to convert drop rate (p) to
   1092   1.1   thorpej  * the corresponding bandwidth fraction (f)
   1093   1.1   thorpej  * the following equation is implemented to use scaled values,
   1094   1.1   thorpej  * fve_p and fve_f, in the fixed point format.
   1095   1.1   thorpej  *
   1096   1.1   thorpej  *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
   1097   1.1   thorpej  *   f = Brtt(p) / (max_th + alpha)
   1098   1.1   thorpej  */
   1099   1.1   thorpej #define	BRTT_SIZE	128
   1100   1.1   thorpej #define	BRTT_SHIFT	12
   1101   1.1   thorpej #define	BRTT_MASK	0x0007f000
   1102   1.1   thorpej #define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
   1103   1.1   thorpej 
   1104   1.1   thorpej const int brtt_tab[BRTT_SIZE] = {
   1105  1.11     perry 	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
   1106  1.11     perry 	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
   1107  1.11     perry 	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
   1108  1.11     perry 	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
   1109  1.11     perry 	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
   1110  1.11     perry 	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
   1111  1.11     perry 	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
   1112  1.11     perry 	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
   1113  1.11     perry 	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
   1114  1.11     perry 	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
   1115  1.11     perry 	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
   1116  1.11     perry 	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
   1117  1.11     perry 	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
   1118  1.11     perry 	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
   1119  1.11     perry 	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
   1120   1.1   thorpej 	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
   1121   1.1   thorpej };
   1122   1.1   thorpej 
   1123  1.13     perry static inline struct fve *
   1124  1.21     peter flowlist_lookup(struct flowvalve *fv, struct altq_pktattr *pktattr,
   1125  1.21     peter     struct timeval *now)
   1126   1.1   thorpej {
   1127   1.1   thorpej 	struct fve *fve;
   1128   1.1   thorpej 	int flows;
   1129   1.1   thorpej 	struct ip *ip;
   1130   1.1   thorpej #ifdef INET6
   1131   1.1   thorpej 	struct ip6_hdr *ip6;
   1132   1.1   thorpej #endif
   1133   1.1   thorpej 	struct timeval tthresh;
   1134   1.1   thorpej 
   1135   1.1   thorpej 	if (pktattr == NULL)
   1136   1.1   thorpej 		return (NULL);
   1137   1.1   thorpej 
   1138   1.1   thorpej 	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
   1139   1.1   thorpej 	flows = 0;
   1140   1.1   thorpej 	/*
   1141   1.1   thorpej 	 * search the flow list
   1142   1.1   thorpej 	 */
   1143   1.1   thorpej 	switch (pktattr->pattr_af) {
   1144   1.1   thorpej 	case AF_INET:
   1145   1.1   thorpej 		ip = (struct ip *)pktattr->pattr_hdr;
   1146   1.1   thorpej 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
   1147   1.1   thorpej 			if (fve->fve_lastdrop.tv_sec == 0)
   1148   1.1   thorpej 				break;
   1149   1.1   thorpej 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
   1150   1.1   thorpej 				fve->fve_lastdrop.tv_sec = 0;
   1151   1.1   thorpej 				break;
   1152   1.1   thorpej 			}
   1153   1.1   thorpej 			if (fve->fve_flow.flow_af == AF_INET &&
   1154   1.1   thorpej 			    fve->fve_flow.flow_ip.ip_src.s_addr ==
   1155   1.1   thorpej 			    ip->ip_src.s_addr &&
   1156   1.1   thorpej 			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
   1157   1.1   thorpej 			    ip->ip_dst.s_addr)
   1158   1.1   thorpej 				return (fve);
   1159   1.1   thorpej 			flows++;
   1160   1.1   thorpej 		}
   1161   1.1   thorpej 		break;
   1162   1.1   thorpej #ifdef INET6
   1163   1.1   thorpej 	case AF_INET6:
   1164   1.1   thorpej 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1165   1.1   thorpej 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
   1166   1.1   thorpej 			if (fve->fve_lastdrop.tv_sec == 0)
   1167   1.1   thorpej 				break;
   1168   1.1   thorpej 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
   1169   1.1   thorpej 				fve->fve_lastdrop.tv_sec = 0;
   1170   1.1   thorpej 				break;
   1171   1.1   thorpej 			}
   1172   1.1   thorpej 			if (fve->fve_flow.flow_af == AF_INET6 &&
   1173   1.1   thorpej 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
   1174   1.1   thorpej 					       &ip6->ip6_src) &&
   1175   1.1   thorpej 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
   1176   1.1   thorpej 					       &ip6->ip6_dst))
   1177   1.1   thorpej 				return (fve);
   1178   1.1   thorpej 			flows++;
   1179   1.1   thorpej 		}
   1180   1.1   thorpej 		break;
   1181   1.1   thorpej #endif /* INET6 */
   1182   1.1   thorpej 
   1183   1.1   thorpej 	default:
   1184   1.1   thorpej 		/* unknown protocol.  no drop. */
   1185   1.1   thorpej 		return (NULL);
   1186   1.1   thorpej 	}
   1187   1.1   thorpej 	fv->fv_flows = flows;	/* save the number of active fve's */
   1188   1.1   thorpej 	return (NULL);
   1189   1.1   thorpej }
   1190   1.1   thorpej 
   1191  1.13     perry static inline struct fve *
   1192  1.21     peter flowlist_reclaim(struct flowvalve *fv, struct altq_pktattr *pktattr)
   1193   1.1   thorpej {
   1194   1.1   thorpej 	struct fve *fve;
   1195   1.1   thorpej 	struct ip *ip;
   1196   1.1   thorpej #ifdef INET6
   1197   1.1   thorpej 	struct ip6_hdr *ip6;
   1198   1.1   thorpej #endif
   1199   1.1   thorpej 
   1200   1.1   thorpej 	/*
   1201   1.1   thorpej 	 * get an entry from the tail of the LRU list.
   1202   1.1   thorpej 	 */
   1203   1.1   thorpej 	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
   1204   1.1   thorpej 
   1205   1.1   thorpej 	switch (pktattr->pattr_af) {
   1206   1.1   thorpej 	case AF_INET:
   1207   1.1   thorpej 		ip = (struct ip *)pktattr->pattr_hdr;
   1208   1.1   thorpej 		fve->fve_flow.flow_af = AF_INET;
   1209   1.1   thorpej 		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
   1210   1.1   thorpej 		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
   1211   1.1   thorpej 		break;
   1212   1.1   thorpej #ifdef INET6
   1213   1.1   thorpej 	case AF_INET6:
   1214   1.1   thorpej 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1215   1.1   thorpej 		fve->fve_flow.flow_af = AF_INET6;
   1216   1.1   thorpej 		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
   1217   1.1   thorpej 		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
   1218   1.1   thorpej 		break;
   1219   1.1   thorpej #endif
   1220   1.1   thorpej 	}
   1221   1.1   thorpej 
   1222   1.1   thorpej 	fve->fve_state = Green;
   1223   1.1   thorpej 	fve->fve_p = 0.0;
   1224   1.1   thorpej 	fve->fve_f = 0.0;
   1225   1.1   thorpej 	fve->fve_ifseq = fv->fv_ifseq - 1;
   1226   1.1   thorpej 	fve->fve_count = 0;
   1227   1.1   thorpej 
   1228   1.1   thorpej 	fv->fv_flows++;
   1229   1.1   thorpej #ifdef FV_STATS
   1230   1.1   thorpej 	fv->fv_stats.alloc++;
   1231   1.1   thorpej #endif
   1232   1.1   thorpej 	return (fve);
   1233   1.1   thorpej }
   1234   1.1   thorpej 
   1235  1.13     perry static inline void
   1236  1.21     peter flowlist_move_to_head(struct flowvalve *fv, struct fve *fve)
   1237   1.1   thorpej {
   1238   1.1   thorpej 	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
   1239   1.1   thorpej 		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
   1240   1.1   thorpej 		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
   1241   1.1   thorpej 	}
   1242   1.1   thorpej }
   1243   1.1   thorpej 
   1244   1.1   thorpej /*
   1245   1.1   thorpej  * allocate flowvalve structure
   1246   1.1   thorpej  */
   1247   1.1   thorpej static struct flowvalve *
   1248  1.21     peter fv_alloc(struct red *rp)
   1249   1.1   thorpej {
   1250   1.1   thorpej 	struct flowvalve *fv;
   1251   1.1   thorpej 	struct fve *fve;
   1252   1.1   thorpej 	int i, num;
   1253   1.1   thorpej 
   1254   1.1   thorpej 	num = FV_FLOWLISTSIZE;
   1255  1.14  christos 	fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
   1256   1.1   thorpej 	if (fv == NULL)
   1257   1.1   thorpej 		return (NULL);
   1258   1.1   thorpej 
   1259  1.14  christos 	fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
   1260  1.14  christos 	    M_WAITOK|M_ZERO);
   1261   1.1   thorpej 	if (fv->fv_fves == NULL) {
   1262  1.14  christos 		free(fv, M_DEVBUF);
   1263   1.1   thorpej 		return (NULL);
   1264   1.1   thorpej 	}
   1265   1.1   thorpej 
   1266   1.1   thorpej 	fv->fv_flows = 0;
   1267   1.1   thorpej 	TAILQ_INIT(&fv->fv_flowlist);
   1268   1.1   thorpej 	for (i = 0; i < num; i++) {
   1269   1.1   thorpej 		fve = &fv->fv_fves[i];
   1270   1.1   thorpej 		fve->fve_lastdrop.tv_sec = 0;
   1271   1.1   thorpej 		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
   1272   1.1   thorpej 	}
   1273   1.1   thorpej 
   1274   1.1   thorpej 	/* initialize drop rate threshold in scaled fixed-point */
   1275   1.1   thorpej 	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
   1276   1.1   thorpej 
   1277   1.1   thorpej 	/* initialize drop rate to fraction table */
   1278  1.14  christos 	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
   1279   1.1   thorpej 	if (fv->fv_p2ftab == NULL) {
   1280  1.14  christos 		free(fv->fv_fves, M_DEVBUF);
   1281  1.14  christos 		free(fv, M_DEVBUF);
   1282   1.1   thorpej 		return (NULL);
   1283   1.1   thorpej 	}
   1284   1.1   thorpej 	/*
   1285   1.1   thorpej 	 * create the p2f table.
   1286   1.1   thorpej 	 * (shift is used to keep the precision)
   1287   1.1   thorpej 	 */
   1288   1.1   thorpej 	for (i = 1; i < BRTT_SIZE; i++) {
   1289   1.1   thorpej 		int f;
   1290   1.1   thorpej 
   1291   1.1   thorpej 		f = brtt_tab[i] << 8;
   1292   1.1   thorpej 		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
   1293   1.1   thorpej 	}
   1294   1.1   thorpej 
   1295   1.1   thorpej 	return (fv);
   1296   1.1   thorpej }
   1297   1.1   thorpej 
   1298  1.21     peter static void
   1299  1.21     peter fv_destroy(struct flowvalve *fv)
   1300   1.1   thorpej {
   1301  1.15  christos 	free(fv->fv_p2ftab, M_DEVBUF);
   1302  1.15  christos 	free(fv->fv_fves, M_DEVBUF);
   1303  1.15  christos 	free(fv, M_DEVBUF);
   1304   1.1   thorpej }
   1305   1.1   thorpej 
   1306  1.13     perry static inline int
   1307  1.21     peter fv_p2f(struct flowvalve *fv, int p)
   1308   1.1   thorpej {
   1309   1.1   thorpej 	int val, f;
   1310   1.1   thorpej 
   1311   1.1   thorpej 	if (p >= BRTT_PMAX)
   1312   1.1   thorpej 		f = fv->fv_p2ftab[BRTT_SIZE-1];
   1313   1.1   thorpej 	else if ((val = (p & BRTT_MASK)))
   1314   1.1   thorpej 		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
   1315   1.1   thorpej 	else
   1316   1.1   thorpej 		f = fv->fv_p2ftab[1];
   1317   1.1   thorpej 	return (f);
   1318   1.1   thorpej }
   1319   1.1   thorpej 
   1320   1.1   thorpej /*
   1321   1.1   thorpej  * check if an arriving packet should be pre-dropped.
   1322   1.1   thorpej  * called from red_addq() when a packet arrives.
   1323   1.1   thorpej  * returns 1 when the packet should be pre-dropped.
   1324   1.3   thorpej  * should be called in splnet.
   1325   1.1   thorpej  */
   1326   1.1   thorpej static int
   1327  1.21     peter fv_checkflow(struct flowvalve *fv, struct altq_pktattr *pktattr,
   1328  1.21     peter     struct fve **fcache)
   1329   1.1   thorpej {
   1330   1.1   thorpej 	struct fve *fve;
   1331   1.1   thorpej 	struct timeval now;
   1332   1.1   thorpej 
   1333   1.1   thorpej 	fv->fv_ifseq++;
   1334   1.1   thorpej 	FV_TIMESTAMP(&now);
   1335   1.1   thorpej 
   1336   1.1   thorpej 	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
   1337   1.1   thorpej 		/* no matching entry in the flowlist */
   1338   1.1   thorpej 		return (0);
   1339   1.1   thorpej 
   1340   1.1   thorpej 	*fcache = fve;
   1341   1.1   thorpej 
   1342   1.1   thorpej 	/* update fraction f for every FV_N packets */
   1343   1.1   thorpej 	if (++fve->fve_count == FV_N) {
   1344   1.1   thorpej 		/*
   1345   1.1   thorpej 		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
   1346   1.1   thorpej 		 */
   1347   1.1   thorpej 		fve->fve_f =
   1348   1.1   thorpej 			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
   1349   1.1   thorpej 			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
   1350   1.1   thorpej 		fve->fve_ifseq = fv->fv_ifseq;
   1351   1.1   thorpej 		fve->fve_count = 0;
   1352   1.1   thorpej 	}
   1353   1.1   thorpej 
   1354   1.1   thorpej 	/*
   1355   1.1   thorpej 	 * overpumping test
   1356   1.1   thorpej 	 */
   1357   1.1   thorpej 	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
   1358   1.1   thorpej 		int fthresh;
   1359   1.1   thorpej 
   1360   1.1   thorpej 		/* calculate a threshold */
   1361   1.1   thorpej 		fthresh = fv_p2f(fv, fve->fve_p);
   1362   1.1   thorpej 		if (fve->fve_f > fthresh)
   1363   1.1   thorpej 			fve->fve_state = Red;
   1364   1.1   thorpej 	}
   1365   1.1   thorpej 
   1366   1.1   thorpej 	if (fve->fve_state == Red) {
   1367   1.1   thorpej 		/*
   1368   1.1   thorpej 		 * backoff test
   1369   1.1   thorpej 		 */
   1370   1.1   thorpej 		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
   1371   1.1   thorpej 			/* no drop for at least FV_BACKOFFTHRESH sec */
   1372   1.1   thorpej 			fve->fve_p = 0;
   1373   1.1   thorpej 			fve->fve_state = Green;
   1374   1.1   thorpej #ifdef FV_STATS
   1375   1.1   thorpej 			fv->fv_stats.escape++;
   1376   1.1   thorpej #endif
   1377   1.1   thorpej 		} else {
   1378   1.1   thorpej 			/* block this flow */
   1379   1.1   thorpej 			flowlist_move_to_head(fv, fve);
   1380   1.1   thorpej 			fve->fve_lastdrop = now;
   1381   1.1   thorpej #ifdef FV_STATS
   1382   1.1   thorpej 			fv->fv_stats.predrop++;
   1383   1.1   thorpej #endif
   1384   1.1   thorpej 			return (1);
   1385   1.1   thorpej 		}
   1386   1.1   thorpej 	}
   1387   1.1   thorpej 
   1388   1.1   thorpej 	/*
   1389   1.1   thorpej 	 * p = (1 - Wp) * p
   1390   1.1   thorpej 	 */
   1391   1.1   thorpej 	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
   1392   1.1   thorpej 	if (fve->fve_p < 0)
   1393   1.1   thorpej 		fve->fve_p = 0;
   1394   1.1   thorpej #ifdef FV_STATS
   1395   1.1   thorpej 	fv->fv_stats.pass++;
   1396   1.1   thorpej #endif
   1397   1.1   thorpej 	return (0);
   1398   1.1   thorpej }
   1399   1.1   thorpej 
   1400   1.1   thorpej /*
   1401   1.1   thorpej  * called from red_addq when a packet is dropped by red.
   1402   1.3   thorpej  * should be called in splnet.
   1403   1.1   thorpej  */
   1404  1.21     peter static void
   1405  1.21     peter fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *pktattr,
   1406  1.21     peter     struct fve *fcache)
   1407   1.1   thorpej {
   1408   1.1   thorpej 	struct fve *fve;
   1409   1.1   thorpej 	struct timeval now;
   1410   1.1   thorpej 
   1411   1.1   thorpej 	if (pktattr == NULL)
   1412   1.1   thorpej 		return;
   1413   1.1   thorpej 	FV_TIMESTAMP(&now);
   1414   1.1   thorpej 
   1415   1.1   thorpej 	if (fcache != NULL)
   1416   1.1   thorpej 		/* the fve of this packet is already cached */
   1417   1.1   thorpej 		fve = fcache;
   1418   1.1   thorpej 	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
   1419   1.1   thorpej 		fve = flowlist_reclaim(fv, pktattr);
   1420   1.1   thorpej 
   1421   1.1   thorpej 	flowlist_move_to_head(fv, fve);
   1422   1.1   thorpej 
   1423   1.1   thorpej 	/*
   1424   1.1   thorpej 	 * update p:  the following line cancels the update
   1425   1.1   thorpej 	 *	      in fv_checkflow() and calculate
   1426   1.1   thorpej 	 *	p = Wp + (1 - Wp) * p
   1427   1.1   thorpej 	 */
   1428   1.1   thorpej 	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
   1429   1.1   thorpej 
   1430   1.1   thorpej 	fve->fve_lastdrop = now;
   1431   1.1   thorpej }
   1432   1.1   thorpej 
   1433   1.1   thorpej #endif /* ALTQ_FLOWVALVE */
   1434   1.1   thorpej 
   1435   1.1   thorpej #ifdef KLD_MODULE
   1436   1.1   thorpej 
   1437   1.1   thorpej static struct altqsw red_sw =
   1438   1.1   thorpej 	{"red", redopen, redclose, redioctl};
   1439   1.1   thorpej 
   1440   1.1   thorpej ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
   1441  1.21     peter MODULE_VERSION(altq_red, 1);
   1442   1.1   thorpej 
   1443   1.1   thorpej #endif /* KLD_MODULE */
   1444  1.21     peter #endif /* ALTQ3_COMPAT */
   1445   1.1   thorpej 
   1446   1.1   thorpej #endif /* ALTQ_RED */
   1447