Home | History | Annotate | Line # | Download | only in altq
altq_rio.c revision 1.21.40.2
      1  1.21.40.2     skrll /*	$NetBSD: altq_rio.c,v 1.21.40.2 2016/05/29 08:44:15 skrll Exp $	*/
      2       1.14     peter /*	$KAME: altq_rio.c,v 1.19 2005/04/13 03:44:25 suz Exp $	*/
      3        1.1   thorpej 
      4        1.1   thorpej /*
      5       1.14     peter  * Copyright (C) 1998-2003
      6        1.1   thorpej  *	Sony Computer Science Laboratories Inc.  All rights reserved.
      7        1.1   thorpej  *
      8        1.1   thorpej  * Redistribution and use in source and binary forms, with or without
      9        1.1   thorpej  * modification, are permitted provided that the following conditions
     10        1.1   thorpej  * are met:
     11        1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     12        1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     13        1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     14        1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     15        1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     16        1.1   thorpej  *
     17        1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
     18        1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19        1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20        1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
     21        1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22        1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23        1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24        1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25        1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26        1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27        1.1   thorpej  * SUCH DAMAGE.
     28        1.1   thorpej  */
     29        1.1   thorpej /*
     30        1.1   thorpej  * Copyright (c) 1990-1994 Regents of the University of California.
     31        1.1   thorpej  * All rights reserved.
     32        1.1   thorpej  *
     33        1.1   thorpej  * Redistribution and use in source and binary forms, with or without
     34        1.1   thorpej  * modification, are permitted provided that the following conditions
     35        1.1   thorpej  * are met:
     36        1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     37        1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     38        1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     39        1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     40        1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     41        1.1   thorpej  * 3. All advertising materials mentioning features or use of this software
     42        1.1   thorpej  *    must display the following acknowledgement:
     43        1.1   thorpej  *	This product includes software developed by the Computer Systems
     44        1.1   thorpej  *	Engineering Group at Lawrence Berkeley Laboratory.
     45        1.1   thorpej  * 4. Neither the name of the University nor of the Laboratory may be used
     46        1.1   thorpej  *    to endorse or promote products derived from this software without
     47        1.1   thorpej  *    specific prior written permission.
     48        1.1   thorpej  *
     49        1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     50        1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     51        1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     52        1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     53        1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     54        1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     55        1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     56        1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     57        1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     58        1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     59        1.1   thorpej  * SUCH DAMAGE.
     60        1.1   thorpej  */
     61        1.4     lukem 
     62        1.4     lukem #include <sys/cdefs.h>
     63  1.21.40.2     skrll __KERNEL_RCSID(0, "$NetBSD: altq_rio.c,v 1.21.40.2 2016/05/29 08:44:15 skrll Exp $");
     64        1.1   thorpej 
     65       1.14     peter #ifdef _KERNEL_OPT
     66        1.1   thorpej #include "opt_altq.h"
     67        1.1   thorpej #include "opt_inet.h"
     68       1.15     peter #include "pf.h"
     69        1.1   thorpej #endif
     70       1.14     peter 
     71        1.1   thorpej #ifdef ALTQ_RIO	/* rio is enabled by ALTQ_RIO option in opt_altq.h */
     72        1.1   thorpej 
     73        1.1   thorpej #include <sys/param.h>
     74        1.1   thorpej #include <sys/malloc.h>
     75        1.1   thorpej #include <sys/mbuf.h>
     76        1.1   thorpej #include <sys/socket.h>
     77        1.1   thorpej #include <sys/systm.h>
     78       1.14     peter #include <sys/errno.h>
     79       1.14     peter #include <sys/kauth.h>
     80       1.14     peter #if 1 /* ALTQ3_COMPAT */
     81        1.1   thorpej #include <sys/proc.h>
     82       1.14     peter #include <sys/sockio.h>
     83        1.1   thorpej #include <sys/kernel.h>
     84       1.14     peter #endif
     85        1.1   thorpej 
     86        1.1   thorpej #include <net/if.h>
     87        1.1   thorpej 
     88        1.1   thorpej #include <netinet/in.h>
     89        1.1   thorpej #include <netinet/in_systm.h>
     90        1.1   thorpej #include <netinet/ip.h>
     91        1.1   thorpej #ifdef INET6
     92        1.1   thorpej #include <netinet/ip6.h>
     93        1.1   thorpej #endif
     94        1.1   thorpej 
     95       1.15     peter #if NPF > 0
     96       1.14     peter #include <net/pfvar.h>
     97       1.15     peter #endif
     98        1.1   thorpej #include <altq/altq.h>
     99        1.1   thorpej #include <altq/altq_cdnr.h>
    100        1.1   thorpej #include <altq/altq_red.h>
    101        1.1   thorpej #include <altq/altq_rio.h>
    102       1.14     peter #ifdef ALTQ3_COMPAT
    103       1.14     peter #include <altq/altq_conf.h>
    104       1.14     peter #endif
    105        1.1   thorpej 
    106        1.1   thorpej /*
    107        1.1   thorpej  * RIO: RED with IN/OUT bit
    108        1.1   thorpej  *   described in
    109        1.1   thorpej  *	"Explicit Allocation of Best Effort Packet Delivery Service"
    110        1.1   thorpej  *	David D. Clark and Wenjia Fang, MIT Lab for Computer Science
    111        1.1   thorpej  *	http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
    112        1.1   thorpej  *
    113        1.1   thorpej  * this implementation is extended to support more than 2 drop precedence
    114        1.1   thorpej  * values as described in RFC2597 (Assured Forwarding PHB Group).
    115        1.1   thorpej  *
    116        1.1   thorpej  */
    117        1.1   thorpej /*
    118        1.1   thorpej  * AF DS (differentiated service) codepoints.
    119        1.1   thorpej  * (classes can be mapped to CBQ or H-FSC classes.)
    120        1.7     perry  *
    121        1.1   thorpej  *      0   1   2   3   4   5   6   7
    122        1.1   thorpej  *    +---+---+---+---+---+---+---+---+
    123        1.1   thorpej  *    |   CLASS   |DropPre| 0 |  CU   |
    124        1.1   thorpej  *    +---+---+---+---+---+---+---+---+
    125        1.1   thorpej  *
    126        1.1   thorpej  *    class 1: 001
    127        1.1   thorpej  *    class 2: 010
    128        1.1   thorpej  *    class 3: 011
    129        1.1   thorpej  *    class 4: 100
    130        1.1   thorpej  *
    131        1.1   thorpej  *    low drop prec:    01
    132        1.1   thorpej  *    medium drop prec: 10
    133       1.14     peter  *    high drop prec:   11
    134        1.1   thorpej  */
    135        1.1   thorpej 
    136        1.1   thorpej /* normal red parameters */
    137        1.1   thorpej #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
    138        1.1   thorpej 				/* q_weight = 0.00195 */
    139        1.1   thorpej 
    140        1.1   thorpej /* red parameters for a slow link */
    141        1.1   thorpej #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
    142        1.1   thorpej 				/* q_weight = 0.0078125 */
    143        1.1   thorpej 
    144        1.1   thorpej /* red parameters for a very slow link (e.g., dialup) */
    145        1.1   thorpej #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
    146        1.1   thorpej 				/* q_weight = 0.015625 */
    147        1.1   thorpej 
    148        1.1   thorpej /* fixed-point uses 12-bit decimal places */
    149        1.1   thorpej #define	FP_SHIFT	12	/* fixed-point shift */
    150        1.1   thorpej 
    151        1.1   thorpej /* red parameters for drop probability */
    152        1.1   thorpej #define	INV_P_MAX	10	/* inverse of max drop probability */
    153        1.1   thorpej #define	TH_MIN		 5	/* min threshold */
    154        1.1   thorpej #define	TH_MAX		15	/* max threshold */
    155        1.1   thorpej 
    156       1.14     peter #define	RIO_LIMIT	60	/* default max queue lenght */
    157       1.14     peter #define	RIO_STATS		/* collect statistics */
    158        1.1   thorpej 
    159        1.1   thorpej #define	TV_DELTA(a, b, delta) {					\
    160        1.1   thorpej 	register int	xxs;					\
    161        1.1   thorpej 								\
    162        1.1   thorpej 	delta = (a)->tv_usec - (b)->tv_usec; 			\
    163        1.1   thorpej 	if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { 		\
    164        1.1   thorpej 		if (xxs < 0) { 					\
    165        1.1   thorpej 			delta = 60000000;			\
    166        1.1   thorpej 		} else if (xxs > 4)  {				\
    167        1.1   thorpej 			if (xxs > 60)				\
    168        1.1   thorpej 				delta = 60000000;		\
    169        1.1   thorpej 			else					\
    170        1.1   thorpej 				delta += xxs * 1000000;		\
    171        1.1   thorpej 		} else while (xxs > 0) {			\
    172        1.1   thorpej 			delta += 1000000;			\
    173        1.1   thorpej 			xxs--;					\
    174        1.1   thorpej 		}						\
    175        1.1   thorpej 	}							\
    176        1.1   thorpej }
    177        1.1   thorpej 
    178       1.14     peter #ifdef ALTQ3_COMPAT
    179        1.1   thorpej /* rio_list keeps all rio_queue_t's allocated. */
    180        1.1   thorpej static rio_queue_t *rio_list = NULL;
    181       1.14     peter #endif
    182        1.1   thorpej /* default rio parameter values */
    183        1.1   thorpej static struct redparams default_rio_params[RIO_NDROPPREC] = {
    184        1.1   thorpej   /* th_min,		 th_max,     inv_pmax */
    185        1.1   thorpej   { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
    186        1.1   thorpej   { TH_MAX + TH_MIN,	 TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
    187        1.1   thorpej   { TH_MIN,		 TH_MAX,     INV_P_MAX }  /* high drop precedence */
    188        1.1   thorpej };
    189        1.1   thorpej 
    190        1.1   thorpej /* internal function prototypes */
    191       1.14     peter static int dscp2index(u_int8_t);
    192       1.14     peter #ifdef ALTQ3_COMPAT
    193  1.21.40.1     skrll static int rio_enqueue(struct ifaltq *, struct mbuf *);
    194       1.14     peter static struct mbuf *rio_dequeue(struct ifaltq *, int);
    195       1.14     peter static int rio_request(struct ifaltq *, int, void *);
    196       1.14     peter static int rio_detach(rio_queue_t *);
    197       1.14     peter 
    198       1.14     peter /*
    199       1.14     peter  * rio device interface
    200       1.14     peter  */
    201       1.14     peter altqdev_decl(rio);
    202       1.14     peter 
    203       1.14     peter #endif /* ALTQ3_COMPAT */
    204       1.14     peter 
    205       1.14     peter rio_t *
    206       1.14     peter rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
    207       1.14     peter {
    208       1.14     peter 	rio_t	*rp;
    209       1.14     peter 	int	 w, i;
    210       1.14     peter 	int	 npkts_per_sec;
    211       1.14     peter 
    212       1.14     peter 	rp = malloc(sizeof(rio_t), M_DEVBUF, M_WAITOK|M_ZERO);
    213       1.14     peter 	if (rp == NULL)
    214       1.14     peter 		return (NULL);
    215       1.14     peter 
    216       1.14     peter 	rp->rio_flags = flags;
    217       1.14     peter 	if (pkttime == 0)
    218       1.14     peter 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
    219       1.14     peter 		rp->rio_pkttime = 800;
    220       1.14     peter 	else
    221       1.14     peter 		rp->rio_pkttime = pkttime;
    222       1.14     peter 
    223       1.14     peter 	if (weight != 0)
    224       1.14     peter 		rp->rio_weight = weight;
    225       1.14     peter 	else {
    226       1.14     peter 		/* use default */
    227       1.14     peter 		rp->rio_weight = W_WEIGHT;
    228       1.14     peter 
    229       1.14     peter 		/* when the link is very slow, adjust red parameters */
    230       1.14     peter 		npkts_per_sec = 1000000 / rp->rio_pkttime;
    231       1.14     peter 		if (npkts_per_sec < 50) {
    232       1.14     peter 			/* up to about 400Kbps */
    233       1.14     peter 			rp->rio_weight = W_WEIGHT_2;
    234       1.14     peter 		} else if (npkts_per_sec < 300) {
    235       1.14     peter 			/* up to about 2.4Mbps */
    236       1.14     peter 			rp->rio_weight = W_WEIGHT_1;
    237       1.14     peter 		}
    238       1.14     peter 	}
    239       1.14     peter 
    240       1.14     peter 	/* calculate wshift.  weight must be power of 2 */
    241       1.14     peter 	w = rp->rio_weight;
    242       1.14     peter 	for (i = 0; w > 1; i++)
    243       1.14     peter 		w = w >> 1;
    244       1.14     peter 	rp->rio_wshift = i;
    245       1.14     peter 	w = 1 << rp->rio_wshift;
    246       1.14     peter 	if (w != rp->rio_weight) {
    247       1.14     peter 		printf("invalid weight value %d for red! use %d\n",
    248       1.14     peter 		       rp->rio_weight, w);
    249       1.14     peter 		rp->rio_weight = w;
    250       1.14     peter 	}
    251       1.14     peter 
    252       1.14     peter 	/* allocate weight table */
    253       1.14     peter 	rp->rio_wtab = wtab_alloc(rp->rio_weight);
    254       1.14     peter 
    255       1.14     peter 	for (i = 0; i < RIO_NDROPPREC; i++) {
    256       1.14     peter 		struct dropprec_state *prec = &rp->rio_precstate[i];
    257       1.14     peter 
    258       1.14     peter 		prec->avg = 0;
    259       1.14     peter 		prec->idle = 1;
    260       1.14     peter 
    261       1.14     peter 		if (params == NULL || params[i].inv_pmax == 0)
    262       1.14     peter 			prec->inv_pmax = default_rio_params[i].inv_pmax;
    263       1.14     peter 		else
    264       1.14     peter 			prec->inv_pmax = params[i].inv_pmax;
    265       1.14     peter 		if (params == NULL || params[i].th_min == 0)
    266       1.14     peter 			prec->th_min = default_rio_params[i].th_min;
    267       1.14     peter 		else
    268       1.14     peter 			prec->th_min = params[i].th_min;
    269       1.14     peter 		if (params == NULL || params[i].th_max == 0)
    270       1.14     peter 			prec->th_max = default_rio_params[i].th_max;
    271       1.14     peter 		else
    272       1.14     peter 			prec->th_max = params[i].th_max;
    273       1.14     peter 
    274       1.14     peter 		/*
    275       1.14     peter 		 * th_min_s and th_max_s are scaled versions of th_min
    276       1.14     peter 		 * and th_max to be compared with avg.
    277       1.14     peter 		 */
    278       1.14     peter 		prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
    279       1.14     peter 		prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
    280       1.14     peter 
    281       1.14     peter 		/*
    282       1.14     peter 		 * precompute probability denominator
    283       1.14     peter 		 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
    284       1.14     peter 		 */
    285       1.14     peter 		prec->probd = (2 * (prec->th_max - prec->th_min)
    286       1.14     peter 			       * prec->inv_pmax) << FP_SHIFT;
    287       1.14     peter 
    288       1.14     peter 		microtime(&prec->last);
    289       1.14     peter 	}
    290       1.14     peter 
    291       1.14     peter 	return (rp);
    292       1.14     peter }
    293       1.14     peter 
    294       1.14     peter void
    295       1.14     peter rio_destroy(rio_t *rp)
    296       1.14     peter {
    297       1.14     peter 	wtab_destroy(rp->rio_wtab);
    298       1.14     peter 	free(rp, M_DEVBUF);
    299       1.14     peter }
    300       1.14     peter 
    301       1.14     peter void
    302       1.14     peter rio_getstats(rio_t *rp, struct redstats *sp)
    303       1.14     peter {
    304       1.14     peter 	int	i;
    305       1.14     peter 
    306       1.14     peter 	for (i = 0; i < RIO_NDROPPREC; i++) {
    307       1.21   tsutsui 		memcpy(sp, &rp->q_stats[i], sizeof(struct redstats));
    308       1.14     peter 		sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
    309       1.14     peter 		sp++;
    310       1.14     peter 	}
    311       1.14     peter }
    312       1.14     peter 
    313       1.14     peter #if (RIO_NDROPPREC == 3)
    314       1.14     peter /*
    315       1.14     peter  * internally, a drop precedence value is converted to an index
    316       1.14     peter  * starting from 0.
    317       1.14     peter  */
    318       1.14     peter static int
    319       1.14     peter dscp2index(u_int8_t dscp)
    320       1.14     peter {
    321       1.14     peter 	int	dpindex = dscp & AF_DROPPRECMASK;
    322       1.14     peter 
    323       1.14     peter 	if (dpindex == 0)
    324       1.14     peter 		return (0);
    325       1.14     peter 	return ((dpindex >> 3) - 1);
    326       1.14     peter }
    327       1.14     peter #endif
    328       1.14     peter 
    329       1.14     peter int
    330       1.14     peter rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
    331       1.14     peter     struct altq_pktattr *pktattr)
    332       1.14     peter {
    333       1.14     peter 	int			 avg, droptype;
    334       1.14     peter 	u_int8_t		 dsfield, odsfield;
    335       1.14     peter 	int			 dpindex, i, n, t;
    336       1.14     peter 	struct timeval		 now;
    337       1.14     peter 	struct dropprec_state	*prec;
    338       1.14     peter 
    339       1.14     peter 	dsfield = odsfield = read_dsfield(m, pktattr);
    340       1.14     peter 	dpindex = dscp2index(dsfield);
    341       1.14     peter 
    342       1.14     peter 	/*
    343       1.14     peter 	 * update avg of the precedence states whose drop precedence
    344       1.14     peter 	 * is larger than or equal to the drop precedence of the packet
    345       1.14     peter 	 */
    346       1.14     peter 	now.tv_sec = 0;
    347       1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
    348       1.14     peter 		prec = &rp->rio_precstate[i];
    349       1.14     peter 		avg = prec->avg;
    350       1.14     peter 		if (prec->idle) {
    351       1.14     peter 			prec->idle = 0;
    352       1.14     peter 			if (now.tv_sec == 0)
    353       1.14     peter 				microtime(&now);
    354       1.14     peter 			t = (now.tv_sec - prec->last.tv_sec);
    355       1.14     peter 			if (t > 60)
    356       1.14     peter 				avg = 0;
    357       1.14     peter 			else {
    358       1.14     peter 				t = t * 1000000 +
    359       1.14     peter 					(now.tv_usec - prec->last.tv_usec);
    360       1.14     peter 				n = t / rp->rio_pkttime;
    361       1.14     peter 				/* calculate (avg = (1 - Wq)^n * avg) */
    362       1.14     peter 				if (n > 0)
    363       1.14     peter 					avg = (avg >> FP_SHIFT) *
    364       1.14     peter 						pow_w(rp->rio_wtab, n);
    365       1.14     peter 			}
    366       1.14     peter 		}
    367       1.14     peter 
    368       1.14     peter 		/* run estimator. (avg is scaled by WEIGHT in fixed-point) */
    369       1.14     peter 		avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
    370       1.14     peter 		prec->avg = avg;		/* save the new value */
    371       1.14     peter 		/*
    372       1.14     peter 		 * count keeps a tally of arriving traffic that has not
    373       1.14     peter 		 * been dropped.
    374       1.14     peter 		 */
    375       1.14     peter 		prec->count++;
    376       1.14     peter 	}
    377       1.14     peter 
    378       1.14     peter 	prec = &rp->rio_precstate[dpindex];
    379       1.14     peter 	avg = prec->avg;
    380       1.14     peter 
    381       1.14     peter 	/* see if we drop early */
    382       1.14     peter 	droptype = DTYPE_NODROP;
    383       1.14     peter 	if (avg >= prec->th_min_s && prec->qlen > 1) {
    384       1.14     peter 		if (avg >= prec->th_max_s) {
    385       1.14     peter 			/* avg >= th_max: forced drop */
    386       1.14     peter 			droptype = DTYPE_FORCED;
    387       1.14     peter 		} else if (prec->old == 0) {
    388       1.14     peter 			/* first exceeds th_min */
    389       1.14     peter 			prec->count = 1;
    390       1.14     peter 			prec->old = 1;
    391       1.14     peter 		} else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
    392       1.14     peter 				      prec->probd, prec->count)) {
    393       1.14     peter 			/* unforced drop by red */
    394       1.14     peter 			droptype = DTYPE_EARLY;
    395       1.14     peter 		}
    396       1.14     peter 	} else {
    397       1.14     peter 		/* avg < th_min */
    398       1.14     peter 		prec->old = 0;
    399       1.14     peter 	}
    400       1.14     peter 
    401       1.14     peter 	/*
    402       1.14     peter 	 * if the queue length hits the hard limit, it's a forced drop.
    403       1.14     peter 	 */
    404       1.14     peter 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
    405       1.14     peter 		droptype = DTYPE_FORCED;
    406       1.14     peter 
    407       1.14     peter 	if (droptype != DTYPE_NODROP) {
    408       1.14     peter 		/* always drop incoming packet (as opposed to randomdrop) */
    409       1.14     peter 		for (i = dpindex; i < RIO_NDROPPREC; i++)
    410       1.14     peter 			rp->rio_precstate[i].count = 0;
    411       1.14     peter #ifdef RIO_STATS
    412       1.14     peter 		if (droptype == DTYPE_EARLY)
    413       1.14     peter 			rp->q_stats[dpindex].drop_unforced++;
    414       1.14     peter 		else
    415       1.14     peter 			rp->q_stats[dpindex].drop_forced++;
    416       1.14     peter 		PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
    417       1.14     peter #endif
    418       1.14     peter 		m_freem(m);
    419       1.14     peter 		return (-1);
    420       1.14     peter 	}
    421       1.14     peter 
    422       1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++)
    423       1.14     peter 		rp->rio_precstate[i].qlen++;
    424       1.14     peter 
    425       1.14     peter 	/* save drop precedence index in mbuf hdr */
    426  1.21.40.2     skrll 	M_SETCTX(m, (intptr_t)dpindex);
    427       1.14     peter 
    428       1.14     peter 	if (rp->rio_flags & RIOF_CLEARDSCP)
    429       1.14     peter 		dsfield &= ~DSCP_MASK;
    430       1.14     peter 
    431       1.14     peter 	if (dsfield != odsfield)
    432       1.14     peter 		write_dsfield(m, pktattr, dsfield);
    433       1.14     peter 
    434       1.14     peter 	_addq(q, m);
    435       1.14     peter 
    436       1.14     peter #ifdef RIO_STATS
    437       1.14     peter 	PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
    438       1.14     peter #endif
    439       1.14     peter 	return (0);
    440       1.14     peter }
    441       1.14     peter 
    442       1.14     peter struct mbuf *
    443       1.14     peter rio_getq(rio_t *rp, class_queue_t *q)
    444       1.14     peter {
    445       1.14     peter 	struct mbuf	*m;
    446       1.14     peter 	int		 dpindex, i;
    447       1.14     peter 
    448       1.14     peter 	if ((m = _getq(q)) == NULL)
    449       1.14     peter 		return NULL;
    450       1.14     peter 
    451  1.21.40.2     skrll 	dpindex = M_GETCTX(m, intptr_t);
    452       1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
    453       1.14     peter 		if (--rp->rio_precstate[i].qlen == 0) {
    454       1.14     peter 			if (rp->rio_precstate[i].idle == 0) {
    455       1.14     peter 				rp->rio_precstate[i].idle = 1;
    456       1.14     peter 				microtime(&rp->rio_precstate[i].last);
    457       1.14     peter 			}
    458       1.14     peter 		}
    459       1.14     peter 	}
    460       1.14     peter 	return (m);
    461       1.14     peter }
    462        1.1   thorpej 
    463       1.14     peter #ifdef ALTQ3_COMPAT
    464        1.1   thorpej int
    465       1.17  christos rioopen(dev_t dev, int flag, int fmt,
    466       1.17  christos     struct lwp *l)
    467        1.1   thorpej {
    468        1.1   thorpej 	/* everything will be done when the queueing scheme is attached. */
    469        1.1   thorpej 	return 0;
    470        1.1   thorpej }
    471        1.1   thorpej 
    472        1.1   thorpej int
    473       1.17  christos rioclose(dev_t dev, int flag, int fmt,
    474       1.17  christos     struct lwp *l)
    475        1.1   thorpej {
    476        1.1   thorpej 	rio_queue_t *rqp;
    477        1.1   thorpej 	int err, error = 0;
    478        1.1   thorpej 
    479        1.1   thorpej 	while ((rqp = rio_list) != NULL) {
    480        1.1   thorpej 		/* destroy all */
    481        1.1   thorpej 		err = rio_detach(rqp);
    482        1.1   thorpej 		if (err != 0 && error == 0)
    483        1.1   thorpej 			error = err;
    484        1.1   thorpej 	}
    485        1.1   thorpej 
    486        1.1   thorpej 	return error;
    487        1.1   thorpej }
    488        1.1   thorpej 
    489        1.1   thorpej int
    490       1.19  christos rioioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
    491       1.13  christos     struct lwp *l)
    492        1.1   thorpej {
    493        1.1   thorpej 	rio_queue_t *rqp;
    494        1.1   thorpej 	struct rio_interface *ifacep;
    495        1.1   thorpej 	struct ifnet *ifp;
    496        1.1   thorpej 	int	error = 0;
    497        1.1   thorpej 
    498        1.1   thorpej 	/* check super-user privilege */
    499        1.1   thorpej 	switch (cmd) {
    500        1.1   thorpej 	case RIO_GETSTATS:
    501        1.1   thorpej 		break;
    502        1.1   thorpej 	default:
    503        1.1   thorpej #if (__FreeBSD_version > 400000)
    504        1.1   thorpej 		if ((error = suser(p)) != 0)
    505        1.1   thorpej 			return (error);
    506        1.1   thorpej #else
    507       1.16      elad 		if ((error = kauth_authorize_network(l->l_cred,
    508       1.16      elad 		    KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RIO, NULL,
    509       1.16      elad 		    NULL, NULL)) != 0)
    510        1.1   thorpej 			return (error);
    511        1.1   thorpej #endif
    512        1.1   thorpej 		break;
    513        1.1   thorpej 	}
    514        1.7     perry 
    515        1.1   thorpej 	switch (cmd) {
    516        1.1   thorpej 
    517        1.1   thorpej 	case RIO_ENABLE:
    518        1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    519        1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    520        1.1   thorpej 			error = EBADF;
    521        1.1   thorpej 			break;
    522        1.1   thorpej 		}
    523        1.1   thorpej 		error = altq_enable(rqp->rq_ifq);
    524        1.1   thorpej 		break;
    525        1.1   thorpej 
    526        1.1   thorpej 	case RIO_DISABLE:
    527        1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    528        1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    529        1.1   thorpej 			error = EBADF;
    530        1.1   thorpej 			break;
    531        1.1   thorpej 		}
    532        1.1   thorpej 		error = altq_disable(rqp->rq_ifq);
    533        1.1   thorpej 		break;
    534        1.1   thorpej 
    535        1.1   thorpej 	case RIO_IF_ATTACH:
    536        1.1   thorpej 		ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
    537        1.1   thorpej 		if (ifp == NULL) {
    538        1.1   thorpej 			error = ENXIO;
    539        1.1   thorpej 			break;
    540        1.1   thorpej 		}
    541        1.1   thorpej 
    542        1.1   thorpej 		/* allocate and initialize rio_queue_t */
    543        1.9  christos 		rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
    544        1.1   thorpej 		if (rqp == NULL) {
    545        1.1   thorpej 			error = ENOMEM;
    546        1.1   thorpej 			break;
    547        1.1   thorpej 		}
    548        1.1   thorpej 
    549        1.9  christos 		rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
    550        1.9  christos 		    M_WAITOK|M_ZERO);
    551        1.1   thorpej 		if (rqp->rq_q == NULL) {
    552        1.9  christos 			free(rqp, M_DEVBUF);
    553        1.1   thorpej 			error = ENOMEM;
    554        1.1   thorpej 			break;
    555        1.1   thorpej 		}
    556        1.1   thorpej 
    557        1.1   thorpej 		rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
    558        1.1   thorpej 		if (rqp->rq_rio == NULL) {
    559        1.9  christos 			free(rqp->rq_q, M_DEVBUF);
    560        1.9  christos 			free(rqp, M_DEVBUF);
    561        1.1   thorpej 			error = ENOMEM;
    562        1.1   thorpej 			break;
    563        1.1   thorpej 		}
    564        1.1   thorpej 
    565        1.1   thorpej 		rqp->rq_ifq = &ifp->if_snd;
    566        1.1   thorpej 		qtail(rqp->rq_q) = NULL;
    567        1.1   thorpej 		qlen(rqp->rq_q) = 0;
    568        1.1   thorpej 		qlimit(rqp->rq_q) = RIO_LIMIT;
    569        1.1   thorpej 		qtype(rqp->rq_q) = Q_RIO;
    570        1.1   thorpej 
    571        1.1   thorpej 		/*
    572        1.1   thorpej 		 * set RIO to this ifnet structure.
    573        1.1   thorpej 		 */
    574        1.1   thorpej 		error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
    575        1.1   thorpej 				    rio_enqueue, rio_dequeue, rio_request,
    576        1.1   thorpej 				    NULL, NULL);
    577        1.1   thorpej 		if (error) {
    578        1.1   thorpej 			rio_destroy(rqp->rq_rio);
    579        1.9  christos 			free(rqp->rq_q, M_DEVBUF);
    580        1.9  christos 			free(rqp, M_DEVBUF);
    581        1.1   thorpej 			break;
    582        1.1   thorpej 		}
    583        1.1   thorpej 
    584        1.1   thorpej 		/* add this state to the rio list */
    585        1.1   thorpej 		rqp->rq_next = rio_list;
    586        1.1   thorpej 		rio_list = rqp;
    587        1.1   thorpej 		break;
    588        1.1   thorpej 
    589        1.1   thorpej 	case RIO_IF_DETACH:
    590        1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    591        1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    592        1.1   thorpej 			error = EBADF;
    593        1.1   thorpej 			break;
    594        1.1   thorpej 		}
    595        1.1   thorpej 		error = rio_detach(rqp);
    596        1.1   thorpej 		break;
    597        1.1   thorpej 
    598        1.1   thorpej 	case RIO_GETSTATS:
    599        1.1   thorpej 		do {
    600        1.1   thorpej 			struct rio_stats *q_stats;
    601        1.1   thorpej 			rio_t *rp;
    602        1.1   thorpej 			int i;
    603        1.1   thorpej 
    604        1.1   thorpej 			q_stats = (struct rio_stats *)addr;
    605        1.1   thorpej 			if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
    606        1.1   thorpej 					       ALTQT_RIO)) == NULL) {
    607        1.1   thorpej 				error = EBADF;
    608        1.1   thorpej 				break;
    609        1.1   thorpej 			}
    610        1.1   thorpej 
    611        1.1   thorpej 			rp = rqp->rq_rio;
    612        1.1   thorpej 
    613        1.1   thorpej 			q_stats->q_limit = qlimit(rqp->rq_q);
    614        1.1   thorpej 			q_stats->weight	= rp->rio_weight;
    615        1.1   thorpej 			q_stats->flags = rp->rio_flags;
    616        1.1   thorpej 
    617        1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++) {
    618        1.1   thorpej 				q_stats->q_len[i] = rp->rio_precstate[i].qlen;
    619       1.21   tsutsui 				memcpy(&q_stats->q_stats[i], &rp->q_stats[i],
    620       1.14     peter 				      sizeof(struct redstats));
    621        1.1   thorpej 				q_stats->q_stats[i].q_avg =
    622        1.1   thorpej 				    rp->rio_precstate[i].avg >> rp->rio_wshift;
    623        1.1   thorpej 
    624        1.1   thorpej 				q_stats->q_params[i].inv_pmax
    625        1.1   thorpej 					= rp->rio_precstate[i].inv_pmax;
    626        1.1   thorpej 				q_stats->q_params[i].th_min
    627        1.1   thorpej 					= rp->rio_precstate[i].th_min;
    628        1.1   thorpej 				q_stats->q_params[i].th_max
    629        1.1   thorpej 					= rp->rio_precstate[i].th_max;
    630        1.1   thorpej 			}
    631       1.14     peter 		} while (/*CONSTCOND*/ 0);
    632        1.1   thorpej 		break;
    633        1.1   thorpej 
    634        1.1   thorpej 	case RIO_CONFIG:
    635        1.1   thorpej 		do {
    636        1.1   thorpej 			struct rio_conf *fc;
    637        1.1   thorpej 			rio_t	*new;
    638        1.1   thorpej 			int s, limit, i;
    639        1.1   thorpej 
    640        1.1   thorpej 			fc = (struct rio_conf *)addr;
    641        1.1   thorpej 			if ((rqp = altq_lookup(fc->iface.rio_ifname,
    642        1.1   thorpej 					       ALTQT_RIO)) == NULL) {
    643        1.1   thorpej 				error = EBADF;
    644        1.1   thorpej 				break;
    645        1.1   thorpej 			}
    646        1.1   thorpej 
    647        1.1   thorpej 			new = rio_alloc(fc->rio_weight, &fc->q_params[0],
    648        1.1   thorpej 					fc->rio_flags, fc->rio_pkttime);
    649        1.1   thorpej 			if (new == NULL) {
    650        1.1   thorpej 				error = ENOMEM;
    651        1.1   thorpej 				break;
    652        1.1   thorpej 			}
    653        1.1   thorpej 
    654        1.3   thorpej 			s = splnet();
    655        1.1   thorpej 			_flushq(rqp->rq_q);
    656        1.1   thorpej 			limit = fc->rio_limit;
    657        1.1   thorpej 			if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
    658        1.1   thorpej 				limit = fc->q_params[RIO_NDROPPREC-1].th_max;
    659        1.1   thorpej 			qlimit(rqp->rq_q) = limit;
    660        1.1   thorpej 
    661        1.1   thorpej 			rio_destroy(rqp->rq_rio);
    662        1.1   thorpej 			rqp->rq_rio = new;
    663        1.1   thorpej 
    664        1.1   thorpej 			splx(s);
    665        1.1   thorpej 
    666        1.1   thorpej 			/* write back new values */
    667        1.1   thorpej 			fc->rio_limit = limit;
    668        1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++) {
    669        1.1   thorpej 				fc->q_params[i].inv_pmax =
    670        1.1   thorpej 					rqp->rq_rio->rio_precstate[i].inv_pmax;
    671        1.1   thorpej 				fc->q_params[i].th_min =
    672        1.1   thorpej 					rqp->rq_rio->rio_precstate[i].th_min;
    673        1.1   thorpej 				fc->q_params[i].th_max =
    674        1.1   thorpej 					rqp->rq_rio->rio_precstate[i].th_max;
    675        1.1   thorpej 			}
    676       1.14     peter 		} while (/*CONSTCOND*/ 0);
    677        1.1   thorpej 		break;
    678        1.1   thorpej 
    679        1.1   thorpej 	case RIO_SETDEFAULTS:
    680        1.1   thorpej 		do {
    681        1.1   thorpej 			struct redparams *rp;
    682        1.1   thorpej 			int i;
    683        1.1   thorpej 
    684        1.1   thorpej 			rp = (struct redparams *)addr;
    685        1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++)
    686        1.1   thorpej 				default_rio_params[i] = rp[i];
    687       1.14     peter 		} while (/*CONSTCOND*/ 0);
    688        1.1   thorpej 		break;
    689        1.1   thorpej 
    690        1.1   thorpej 	default:
    691        1.1   thorpej 		error = EINVAL;
    692        1.1   thorpej 		break;
    693        1.1   thorpej 	}
    694        1.1   thorpej 
    695        1.1   thorpej 	return error;
    696        1.1   thorpej }
    697        1.1   thorpej 
    698        1.1   thorpej static int
    699       1.14     peter rio_detach(rio_queue_t *rqp)
    700        1.1   thorpej {
    701        1.1   thorpej 	rio_queue_t *tmp;
    702        1.1   thorpej 	int error = 0;
    703        1.1   thorpej 
    704        1.1   thorpej 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
    705        1.1   thorpej 		altq_disable(rqp->rq_ifq);
    706        1.1   thorpej 
    707        1.1   thorpej 	if ((error = altq_detach(rqp->rq_ifq)))
    708        1.1   thorpej 		return (error);
    709        1.1   thorpej 
    710        1.1   thorpej 	if (rio_list == rqp)
    711        1.1   thorpej 		rio_list = rqp->rq_next;
    712        1.1   thorpej 	else {
    713        1.1   thorpej 		for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
    714        1.1   thorpej 			if (tmp->rq_next == rqp) {
    715        1.1   thorpej 				tmp->rq_next = rqp->rq_next;
    716        1.1   thorpej 				break;
    717        1.1   thorpej 			}
    718        1.1   thorpej 		if (tmp == NULL)
    719        1.1   thorpej 			printf("rio_detach: no state found in rio_list!\n");
    720        1.1   thorpej 	}
    721        1.1   thorpej 
    722        1.1   thorpej 	rio_destroy(rqp->rq_rio);
    723        1.9  christos 	free(rqp->rq_q, M_DEVBUF);
    724        1.9  christos 	free(rqp, M_DEVBUF);
    725        1.1   thorpej 	return (error);
    726        1.1   thorpej }
    727        1.1   thorpej 
    728        1.1   thorpej /*
    729        1.1   thorpej  * rio support routines
    730        1.1   thorpej  */
    731        1.1   thorpej static int
    732       1.17  christos rio_request(struct ifaltq *ifq, int req, void *arg)
    733        1.1   thorpej {
    734        1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    735        1.1   thorpej 
    736        1.1   thorpej 	switch (req) {
    737        1.1   thorpej 	case ALTRQ_PURGE:
    738        1.1   thorpej 		_flushq(rqp->rq_q);
    739        1.1   thorpej 		if (ALTQ_IS_ENABLED(ifq))
    740        1.1   thorpej 			ifq->ifq_len = 0;
    741        1.1   thorpej 		break;
    742        1.1   thorpej 	}
    743        1.1   thorpej 	return (0);
    744        1.1   thorpej }
    745        1.1   thorpej 
    746        1.1   thorpej /*
    747        1.1   thorpej  * enqueue routine:
    748        1.1   thorpej  *
    749        1.1   thorpej  *	returns: 0 when successfully queued.
    750        1.1   thorpej  *		 ENOBUFS when drop occurs.
    751        1.1   thorpej  */
    752        1.1   thorpej static int
    753  1.21.40.1     skrll rio_enqueue(struct ifaltq *ifq, struct mbuf *m)
    754        1.1   thorpej {
    755  1.21.40.1     skrll 	struct altq_pktattr pktattr;
    756        1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    757        1.1   thorpej 	int error = 0;
    758        1.1   thorpej 
    759  1.21.40.1     skrll 	pktattr.pattr_class = m->m_pkthdr.pattr_class;
    760  1.21.40.1     skrll 	pktattr.pattr_af = m->m_pkthdr.pattr_af;
    761  1.21.40.1     skrll 	pktattr.pattr_hdr = m->m_pkthdr.pattr_hdr;
    762  1.21.40.1     skrll 
    763  1.21.40.1     skrll 	if (rio_addq(rqp->rq_rio, rqp->rq_q, m, &pktattr) == 0)
    764        1.1   thorpej 		ifq->ifq_len++;
    765        1.1   thorpej 	else
    766        1.1   thorpej 		error = ENOBUFS;
    767        1.1   thorpej 	return error;
    768        1.1   thorpej }
    769        1.1   thorpej 
    770        1.1   thorpej /*
    771        1.1   thorpej  * dequeue routine:
    772        1.3   thorpej  *	must be called in splnet.
    773        1.1   thorpej  *
    774        1.1   thorpej  *	returns: mbuf dequeued.
    775        1.1   thorpej  *		 NULL when no packet is available in the queue.
    776        1.1   thorpej  */
    777        1.1   thorpej 
    778        1.1   thorpej static struct mbuf *
    779       1.14     peter rio_dequeue(struct ifaltq *ifq, int op)
    780        1.1   thorpej {
    781        1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    782        1.1   thorpej 	struct mbuf *m = NULL;
    783        1.1   thorpej 
    784        1.1   thorpej 	if (op == ALTDQ_POLL)
    785        1.1   thorpej 		return qhead(rqp->rq_q);
    786        1.1   thorpej 
    787        1.1   thorpej 	m = rio_getq(rqp->rq_rio, rqp->rq_q);
    788        1.1   thorpej 	if (m != NULL)
    789        1.1   thorpej 		ifq->ifq_len--;
    790        1.1   thorpej 	return m;
    791        1.1   thorpej }
    792        1.1   thorpej 
    793        1.1   thorpej #ifdef KLD_MODULE
    794        1.1   thorpej 
    795        1.1   thorpej static struct altqsw rio_sw =
    796        1.1   thorpej 	{"rio", rioopen, rioclose, rioioctl};
    797        1.1   thorpej 
    798        1.1   thorpej ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
    799       1.14     peter MODULE_VERSION(altq_rio, 1);
    800       1.14     peter MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1);
    801        1.1   thorpej 
    802        1.1   thorpej #endif /* KLD_MODULE */
    803       1.14     peter #endif /* ALTQ3_COMPAT */
    804        1.1   thorpej 
    805        1.1   thorpej #endif /* ALTQ_RIO */
    806