Home | History | Annotate | Line # | Download | only in altq
      1  1.26       joe /*	$NetBSD: altq_rio.c,v 1.26 2025/01/08 13:00:04 joe Exp $	*/
      2  1.14     peter /*	$KAME: altq_rio.c,v 1.19 2005/04/13 03:44:25 suz Exp $	*/
      3   1.1   thorpej 
      4   1.1   thorpej /*
      5  1.14     peter  * Copyright (C) 1998-2003
      6   1.1   thorpej  *	Sony Computer Science Laboratories Inc.  All rights reserved.
      7   1.1   thorpej  *
      8   1.1   thorpej  * Redistribution and use in source and binary forms, with or without
      9   1.1   thorpej  * modification, are permitted provided that the following conditions
     10   1.1   thorpej  * are met:
     11   1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     12   1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     13   1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     14   1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     15   1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     16   1.1   thorpej  *
     17   1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
     18   1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19   1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20   1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
     21   1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22   1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23   1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24   1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25   1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26   1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27   1.1   thorpej  * SUCH DAMAGE.
     28   1.1   thorpej  */
     29   1.1   thorpej /*
     30   1.1   thorpej  * Copyright (c) 1990-1994 Regents of the University of California.
     31   1.1   thorpej  * All rights reserved.
     32   1.1   thorpej  *
     33   1.1   thorpej  * Redistribution and use in source and binary forms, with or without
     34   1.1   thorpej  * modification, are permitted provided that the following conditions
     35   1.1   thorpej  * are met:
     36   1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     37   1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     38   1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     39   1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     40   1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     41   1.1   thorpej  * 3. All advertising materials mentioning features or use of this software
     42   1.1   thorpej  *    must display the following acknowledgement:
     43   1.1   thorpej  *	This product includes software developed by the Computer Systems
     44   1.1   thorpej  *	Engineering Group at Lawrence Berkeley Laboratory.
     45   1.1   thorpej  * 4. Neither the name of the University nor of the Laboratory may be used
     46   1.1   thorpej  *    to endorse or promote products derived from this software without
     47   1.1   thorpej  *    specific prior written permission.
     48   1.1   thorpej  *
     49   1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     50   1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     51   1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     52   1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     53   1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     54   1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     55   1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     56   1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     57   1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     58   1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     59   1.1   thorpej  * SUCH DAMAGE.
     60   1.1   thorpej  */
     61   1.4     lukem 
     62   1.4     lukem #include <sys/cdefs.h>
     63  1.26       joe __KERNEL_RCSID(0, "$NetBSD: altq_rio.c,v 1.26 2025/01/08 13:00:04 joe Exp $");
     64   1.1   thorpej 
     65  1.14     peter #ifdef _KERNEL_OPT
     66   1.1   thorpej #include "opt_altq.h"
     67   1.1   thorpej #include "opt_inet.h"
     68  1.15     peter #include "pf.h"
     69   1.1   thorpej #endif
     70  1.14     peter 
     71   1.1   thorpej #ifdef ALTQ_RIO	/* rio is enabled by ALTQ_RIO option in opt_altq.h */
     72   1.1   thorpej 
     73   1.1   thorpej #include <sys/param.h>
     74   1.1   thorpej #include <sys/malloc.h>
     75   1.1   thorpej #include <sys/mbuf.h>
     76   1.1   thorpej #include <sys/socket.h>
     77   1.1   thorpej #include <sys/systm.h>
     78  1.14     peter #include <sys/errno.h>
     79  1.14     peter #include <sys/kauth.h>
     80  1.14     peter #if 1 /* ALTQ3_COMPAT */
     81   1.1   thorpej #include <sys/proc.h>
     82  1.14     peter #include <sys/sockio.h>
     83   1.1   thorpej #include <sys/kernel.h>
     84  1.14     peter #endif
     85   1.1   thorpej 
     86   1.1   thorpej #include <net/if.h>
     87   1.1   thorpej 
     88   1.1   thorpej #include <netinet/in.h>
     89   1.1   thorpej #include <netinet/in_systm.h>
     90   1.1   thorpej #include <netinet/ip.h>
     91   1.1   thorpej #ifdef INET6
     92   1.1   thorpej #include <netinet/ip6.h>
     93   1.1   thorpej #endif
     94   1.1   thorpej 
     95  1.15     peter #if NPF > 0
     96  1.14     peter #include <net/pfvar.h>
     97  1.15     peter #endif
     98   1.1   thorpej #include <altq/altq.h>
     99   1.1   thorpej #include <altq/altq_cdnr.h>
    100   1.1   thorpej #include <altq/altq_red.h>
    101   1.1   thorpej #include <altq/altq_rio.h>
    102  1.14     peter #ifdef ALTQ3_COMPAT
    103  1.14     peter #include <altq/altq_conf.h>
    104  1.14     peter #endif
    105   1.1   thorpej 
    106   1.1   thorpej /*
    107   1.1   thorpej  * RIO: RED with IN/OUT bit
    108   1.1   thorpej  *   described in
    109   1.1   thorpej  *	"Explicit Allocation of Best Effort Packet Delivery Service"
    110   1.1   thorpej  *	David D. Clark and Wenjia Fang, MIT Lab for Computer Science
    111   1.1   thorpej  *	http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
    112   1.1   thorpej  *
    113   1.1   thorpej  * this implementation is extended to support more than 2 drop precedence
    114   1.1   thorpej  * values as described in RFC2597 (Assured Forwarding PHB Group).
    115   1.1   thorpej  *
    116   1.1   thorpej  */
    117   1.1   thorpej /*
    118   1.1   thorpej  * AF DS (differentiated service) codepoints.
    119   1.1   thorpej  * (classes can be mapped to CBQ or H-FSC classes.)
    120   1.7     perry  *
    121   1.1   thorpej  *      0   1   2   3   4   5   6   7
    122   1.1   thorpej  *    +---+---+---+---+---+---+---+---+
    123   1.1   thorpej  *    |   CLASS   |DropPre| 0 |  CU   |
    124   1.1   thorpej  *    +---+---+---+---+---+---+---+---+
    125   1.1   thorpej  *
    126   1.1   thorpej  *    class 1: 001
    127   1.1   thorpej  *    class 2: 010
    128   1.1   thorpej  *    class 3: 011
    129   1.1   thorpej  *    class 4: 100
    130   1.1   thorpej  *
    131   1.1   thorpej  *    low drop prec:    01
    132   1.1   thorpej  *    medium drop prec: 10
    133  1.14     peter  *    high drop prec:   11
    134   1.1   thorpej  */
    135   1.1   thorpej 
    136   1.1   thorpej /* normal red parameters */
    137   1.1   thorpej #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
    138   1.1   thorpej 				/* q_weight = 0.00195 */
    139   1.1   thorpej 
    140   1.1   thorpej /* red parameters for a slow link */
    141   1.1   thorpej #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
    142   1.1   thorpej 				/* q_weight = 0.0078125 */
    143   1.1   thorpej 
    144   1.1   thorpej /* red parameters for a very slow link (e.g., dialup) */
    145   1.1   thorpej #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
    146   1.1   thorpej 				/* q_weight = 0.015625 */
    147   1.1   thorpej 
    148   1.1   thorpej /* fixed-point uses 12-bit decimal places */
    149   1.1   thorpej #define	FP_SHIFT	12	/* fixed-point shift */
    150   1.1   thorpej 
    151   1.1   thorpej /* red parameters for drop probability */
    152   1.1   thorpej #define	INV_P_MAX	10	/* inverse of max drop probability */
    153   1.1   thorpej #define	TH_MIN		 5	/* min threshold */
    154   1.1   thorpej #define	TH_MAX		15	/* max threshold */
    155   1.1   thorpej 
    156  1.24   msaitoh #define	RIO_LIMIT	60	/* default max queue length */
    157  1.14     peter #define	RIO_STATS		/* collect statistics */
    158   1.1   thorpej 
    159   1.1   thorpej #define	TV_DELTA(a, b, delta) {					\
    160   1.1   thorpej 	register int	xxs;					\
    161   1.1   thorpej 								\
    162   1.1   thorpej 	delta = (a)->tv_usec - (b)->tv_usec; 			\
    163   1.1   thorpej 	if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { 		\
    164   1.1   thorpej 		if (xxs < 0) { 					\
    165   1.1   thorpej 			delta = 60000000;			\
    166   1.1   thorpej 		} else if (xxs > 4)  {				\
    167   1.1   thorpej 			if (xxs > 60)				\
    168   1.1   thorpej 				delta = 60000000;		\
    169   1.1   thorpej 			else					\
    170   1.1   thorpej 				delta += xxs * 1000000;		\
    171   1.1   thorpej 		} else while (xxs > 0) {			\
    172   1.1   thorpej 			delta += 1000000;			\
    173   1.1   thorpej 			xxs--;					\
    174   1.1   thorpej 		}						\
    175   1.1   thorpej 	}							\
    176   1.1   thorpej }
    177   1.1   thorpej 
    178  1.14     peter #ifdef ALTQ3_COMPAT
    179   1.1   thorpej /* rio_list keeps all rio_queue_t's allocated. */
    180   1.1   thorpej static rio_queue_t *rio_list = NULL;
    181  1.14     peter #endif
    182   1.1   thorpej /* default rio parameter values */
    183   1.1   thorpej static struct redparams default_rio_params[RIO_NDROPPREC] = {
    184   1.1   thorpej   /* th_min,		 th_max,     inv_pmax */
    185   1.1   thorpej   { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
    186   1.1   thorpej   { TH_MAX + TH_MIN,	 TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
    187   1.1   thorpej   { TH_MIN,		 TH_MAX,     INV_P_MAX }  /* high drop precedence */
    188   1.1   thorpej };
    189   1.1   thorpej 
    190   1.1   thorpej /* internal function prototypes */
    191  1.14     peter static int dscp2index(u_int8_t);
    192  1.14     peter #ifdef ALTQ3_COMPAT
    193  1.22  knakahar static int rio_enqueue(struct ifaltq *, struct mbuf *);
    194  1.14     peter static struct mbuf *rio_dequeue(struct ifaltq *, int);
    195  1.14     peter static int rio_request(struct ifaltq *, int, void *);
    196  1.14     peter static int rio_detach(rio_queue_t *);
    197  1.14     peter 
    198  1.14     peter /*
    199  1.14     peter  * rio device interface
    200  1.14     peter  */
    201  1.14     peter altqdev_decl(rio);
    202  1.14     peter 
    203  1.14     peter #endif /* ALTQ3_COMPAT */
    204  1.14     peter 
    205  1.14     peter rio_t *
    206  1.14     peter rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
    207  1.14     peter {
    208  1.14     peter 	rio_t	*rp;
    209  1.14     peter 	int	 w, i;
    210  1.14     peter 	int	 npkts_per_sec;
    211  1.14     peter 
    212  1.14     peter 	rp = malloc(sizeof(rio_t), M_DEVBUF, M_WAITOK|M_ZERO);
    213  1.14     peter 	if (rp == NULL)
    214  1.26       joe 		return NULL;
    215  1.14     peter 
    216  1.14     peter 	rp->rio_flags = flags;
    217  1.14     peter 	if (pkttime == 0)
    218  1.14     peter 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
    219  1.14     peter 		rp->rio_pkttime = 800;
    220  1.14     peter 	else
    221  1.14     peter 		rp->rio_pkttime = pkttime;
    222  1.14     peter 
    223  1.14     peter 	if (weight != 0)
    224  1.14     peter 		rp->rio_weight = weight;
    225  1.14     peter 	else {
    226  1.14     peter 		/* use default */
    227  1.14     peter 		rp->rio_weight = W_WEIGHT;
    228  1.14     peter 
    229  1.14     peter 		/* when the link is very slow, adjust red parameters */
    230  1.14     peter 		npkts_per_sec = 1000000 / rp->rio_pkttime;
    231  1.14     peter 		if (npkts_per_sec < 50) {
    232  1.14     peter 			/* up to about 400Kbps */
    233  1.14     peter 			rp->rio_weight = W_WEIGHT_2;
    234  1.14     peter 		} else if (npkts_per_sec < 300) {
    235  1.14     peter 			/* up to about 2.4Mbps */
    236  1.14     peter 			rp->rio_weight = W_WEIGHT_1;
    237  1.14     peter 		}
    238  1.14     peter 	}
    239  1.14     peter 
    240  1.14     peter 	/* calculate wshift.  weight must be power of 2 */
    241  1.14     peter 	w = rp->rio_weight;
    242  1.14     peter 	for (i = 0; w > 1; i++)
    243  1.14     peter 		w = w >> 1;
    244  1.14     peter 	rp->rio_wshift = i;
    245  1.14     peter 	w = 1 << rp->rio_wshift;
    246  1.14     peter 	if (w != rp->rio_weight) {
    247  1.14     peter 		printf("invalid weight value %d for red! use %d\n",
    248  1.14     peter 		       rp->rio_weight, w);
    249  1.14     peter 		rp->rio_weight = w;
    250  1.14     peter 	}
    251  1.14     peter 
    252  1.14     peter 	/* allocate weight table */
    253  1.14     peter 	rp->rio_wtab = wtab_alloc(rp->rio_weight);
    254  1.14     peter 
    255  1.14     peter 	for (i = 0; i < RIO_NDROPPREC; i++) {
    256  1.14     peter 		struct dropprec_state *prec = &rp->rio_precstate[i];
    257  1.14     peter 
    258  1.14     peter 		prec->avg = 0;
    259  1.14     peter 		prec->idle = 1;
    260  1.14     peter 
    261  1.14     peter 		if (params == NULL || params[i].inv_pmax == 0)
    262  1.14     peter 			prec->inv_pmax = default_rio_params[i].inv_pmax;
    263  1.14     peter 		else
    264  1.14     peter 			prec->inv_pmax = params[i].inv_pmax;
    265  1.14     peter 		if (params == NULL || params[i].th_min == 0)
    266  1.14     peter 			prec->th_min = default_rio_params[i].th_min;
    267  1.14     peter 		else
    268  1.14     peter 			prec->th_min = params[i].th_min;
    269  1.14     peter 		if (params == NULL || params[i].th_max == 0)
    270  1.14     peter 			prec->th_max = default_rio_params[i].th_max;
    271  1.14     peter 		else
    272  1.14     peter 			prec->th_max = params[i].th_max;
    273  1.14     peter 
    274  1.14     peter 		/*
    275  1.14     peter 		 * th_min_s and th_max_s are scaled versions of th_min
    276  1.14     peter 		 * and th_max to be compared with avg.
    277  1.14     peter 		 */
    278  1.14     peter 		prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
    279  1.14     peter 		prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
    280  1.14     peter 
    281  1.14     peter 		/*
    282  1.14     peter 		 * precompute probability denominator
    283  1.14     peter 		 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
    284  1.14     peter 		 */
    285  1.14     peter 		prec->probd = (2 * (prec->th_max - prec->th_min)
    286  1.14     peter 			       * prec->inv_pmax) << FP_SHIFT;
    287  1.14     peter 
    288  1.14     peter 		microtime(&prec->last);
    289  1.14     peter 	}
    290  1.14     peter 
    291  1.26       joe 	return rp;
    292  1.14     peter }
    293  1.14     peter 
    294  1.14     peter void
    295  1.14     peter rio_destroy(rio_t *rp)
    296  1.14     peter {
    297  1.14     peter 	wtab_destroy(rp->rio_wtab);
    298  1.14     peter 	free(rp, M_DEVBUF);
    299  1.14     peter }
    300  1.14     peter 
    301  1.14     peter void
    302  1.14     peter rio_getstats(rio_t *rp, struct redstats *sp)
    303  1.14     peter {
    304  1.14     peter 	int	i;
    305  1.14     peter 
    306  1.14     peter 	for (i = 0; i < RIO_NDROPPREC; i++) {
    307  1.21   tsutsui 		memcpy(sp, &rp->q_stats[i], sizeof(struct redstats));
    308  1.14     peter 		sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
    309  1.14     peter 		sp++;
    310  1.14     peter 	}
    311  1.14     peter }
    312  1.14     peter 
    313  1.14     peter #if (RIO_NDROPPREC == 3)
    314  1.14     peter /*
    315  1.14     peter  * internally, a drop precedence value is converted to an index
    316  1.14     peter  * starting from 0.
    317  1.14     peter  */
    318  1.14     peter static int
    319  1.14     peter dscp2index(u_int8_t dscp)
    320  1.14     peter {
    321  1.14     peter 	int	dpindex = dscp & AF_DROPPRECMASK;
    322  1.14     peter 
    323  1.14     peter 	if (dpindex == 0)
    324  1.26       joe 		return 0;
    325  1.14     peter 	return ((dpindex >> 3) - 1);
    326  1.14     peter }
    327  1.14     peter #endif
    328  1.14     peter 
    329  1.14     peter int
    330  1.14     peter rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
    331  1.14     peter     struct altq_pktattr *pktattr)
    332  1.14     peter {
    333  1.14     peter 	int			 avg, droptype;
    334  1.14     peter 	u_int8_t		 dsfield, odsfield;
    335  1.14     peter 	int			 dpindex, i, n, t;
    336  1.14     peter 	struct timeval		 now;
    337  1.14     peter 	struct dropprec_state	*prec;
    338  1.14     peter 
    339  1.14     peter 	dsfield = odsfield = read_dsfield(m, pktattr);
    340  1.14     peter 	dpindex = dscp2index(dsfield);
    341  1.14     peter 
    342  1.14     peter 	/*
    343  1.14     peter 	 * update avg of the precedence states whose drop precedence
    344  1.14     peter 	 * is larger than or equal to the drop precedence of the packet
    345  1.14     peter 	 */
    346  1.14     peter 	now.tv_sec = 0;
    347  1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
    348  1.14     peter 		prec = &rp->rio_precstate[i];
    349  1.14     peter 		avg = prec->avg;
    350  1.14     peter 		if (prec->idle) {
    351  1.14     peter 			prec->idle = 0;
    352  1.14     peter 			if (now.tv_sec == 0)
    353  1.14     peter 				microtime(&now);
    354  1.14     peter 			t = (now.tv_sec - prec->last.tv_sec);
    355  1.14     peter 			if (t > 60)
    356  1.14     peter 				avg = 0;
    357  1.14     peter 			else {
    358  1.14     peter 				t = t * 1000000 +
    359  1.14     peter 					(now.tv_usec - prec->last.tv_usec);
    360  1.14     peter 				n = t / rp->rio_pkttime;
    361  1.14     peter 				/* calculate (avg = (1 - Wq)^n * avg) */
    362  1.14     peter 				if (n > 0)
    363  1.14     peter 					avg = (avg >> FP_SHIFT) *
    364  1.14     peter 						pow_w(rp->rio_wtab, n);
    365  1.14     peter 			}
    366  1.14     peter 		}
    367  1.14     peter 
    368  1.14     peter 		/* run estimator. (avg is scaled by WEIGHT in fixed-point) */
    369  1.14     peter 		avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
    370  1.14     peter 		prec->avg = avg;		/* save the new value */
    371  1.14     peter 		/*
    372  1.14     peter 		 * count keeps a tally of arriving traffic that has not
    373  1.14     peter 		 * been dropped.
    374  1.14     peter 		 */
    375  1.14     peter 		prec->count++;
    376  1.14     peter 	}
    377  1.14     peter 
    378  1.14     peter 	prec = &rp->rio_precstate[dpindex];
    379  1.14     peter 	avg = prec->avg;
    380  1.14     peter 
    381  1.14     peter 	/* see if we drop early */
    382  1.14     peter 	droptype = DTYPE_NODROP;
    383  1.14     peter 	if (avg >= prec->th_min_s && prec->qlen > 1) {
    384  1.14     peter 		if (avg >= prec->th_max_s) {
    385  1.14     peter 			/* avg >= th_max: forced drop */
    386  1.14     peter 			droptype = DTYPE_FORCED;
    387  1.14     peter 		} else if (prec->old == 0) {
    388  1.14     peter 			/* first exceeds th_min */
    389  1.14     peter 			prec->count = 1;
    390  1.14     peter 			prec->old = 1;
    391  1.14     peter 		} else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
    392  1.14     peter 				      prec->probd, prec->count)) {
    393  1.14     peter 			/* unforced drop by red */
    394  1.14     peter 			droptype = DTYPE_EARLY;
    395  1.14     peter 		}
    396  1.14     peter 	} else {
    397  1.14     peter 		/* avg < th_min */
    398  1.14     peter 		prec->old = 0;
    399  1.14     peter 	}
    400  1.14     peter 
    401  1.14     peter 	/*
    402  1.14     peter 	 * if the queue length hits the hard limit, it's a forced drop.
    403  1.14     peter 	 */
    404  1.14     peter 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
    405  1.14     peter 		droptype = DTYPE_FORCED;
    406  1.14     peter 
    407  1.14     peter 	if (droptype != DTYPE_NODROP) {
    408  1.14     peter 		/* always drop incoming packet (as opposed to randomdrop) */
    409  1.14     peter 		for (i = dpindex; i < RIO_NDROPPREC; i++)
    410  1.14     peter 			rp->rio_precstate[i].count = 0;
    411  1.14     peter #ifdef RIO_STATS
    412  1.14     peter 		if (droptype == DTYPE_EARLY)
    413  1.14     peter 			rp->q_stats[dpindex].drop_unforced++;
    414  1.14     peter 		else
    415  1.14     peter 			rp->q_stats[dpindex].drop_forced++;
    416  1.14     peter 		PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
    417  1.14     peter #endif
    418  1.14     peter 		m_freem(m);
    419  1.26       joe 		return -1;
    420  1.14     peter 	}
    421  1.14     peter 
    422  1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++)
    423  1.14     peter 		rp->rio_precstate[i].qlen++;
    424  1.14     peter 
    425  1.14     peter 	/* save drop precedence index in mbuf hdr */
    426  1.23     ozaki 	M_SETCTX(m, (intptr_t)dpindex);
    427  1.14     peter 
    428  1.14     peter 	if (rp->rio_flags & RIOF_CLEARDSCP)
    429  1.14     peter 		dsfield &= ~DSCP_MASK;
    430  1.14     peter 
    431  1.14     peter 	if (dsfield != odsfield)
    432  1.14     peter 		write_dsfield(m, pktattr, dsfield);
    433  1.14     peter 
    434  1.14     peter 	_addq(q, m);
    435  1.14     peter 
    436  1.14     peter #ifdef RIO_STATS
    437  1.14     peter 	PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
    438  1.14     peter #endif
    439  1.26       joe 	return 0;
    440  1.14     peter }
    441  1.14     peter 
    442  1.14     peter struct mbuf *
    443  1.14     peter rio_getq(rio_t *rp, class_queue_t *q)
    444  1.14     peter {
    445  1.14     peter 	struct mbuf	*m;
    446  1.14     peter 	int		 dpindex, i;
    447  1.14     peter 
    448  1.14     peter 	if ((m = _getq(q)) == NULL)
    449  1.14     peter 		return NULL;
    450  1.14     peter 
    451  1.23     ozaki 	dpindex = M_GETCTX(m, intptr_t);
    452  1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
    453  1.14     peter 		if (--rp->rio_precstate[i].qlen == 0) {
    454  1.14     peter 			if (rp->rio_precstate[i].idle == 0) {
    455  1.14     peter 				rp->rio_precstate[i].idle = 1;
    456  1.14     peter 				microtime(&rp->rio_precstate[i].last);
    457  1.14     peter 			}
    458  1.14     peter 		}
    459  1.14     peter 	}
    460  1.26       joe 	return m;
    461  1.14     peter }
    462   1.1   thorpej 
    463  1.14     peter #ifdef ALTQ3_COMPAT
    464   1.1   thorpej int
    465  1.17  christos rioopen(dev_t dev, int flag, int fmt,
    466  1.17  christos     struct lwp *l)
    467   1.1   thorpej {
    468   1.1   thorpej 	/* everything will be done when the queueing scheme is attached. */
    469   1.1   thorpej 	return 0;
    470   1.1   thorpej }
    471   1.1   thorpej 
    472   1.1   thorpej int
    473  1.17  christos rioclose(dev_t dev, int flag, int fmt,
    474  1.17  christos     struct lwp *l)
    475   1.1   thorpej {
    476   1.1   thorpej 	rio_queue_t *rqp;
    477   1.1   thorpej 	int err, error = 0;
    478   1.1   thorpej 
    479   1.1   thorpej 	while ((rqp = rio_list) != NULL) {
    480   1.1   thorpej 		/* destroy all */
    481   1.1   thorpej 		err = rio_detach(rqp);
    482   1.1   thorpej 		if (err != 0 && error == 0)
    483   1.1   thorpej 			error = err;
    484   1.1   thorpej 	}
    485   1.1   thorpej 
    486   1.1   thorpej 	return error;
    487   1.1   thorpej }
    488   1.1   thorpej 
    489   1.1   thorpej int
    490  1.19  christos rioioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
    491  1.13  christos     struct lwp *l)
    492   1.1   thorpej {
    493   1.1   thorpej 	rio_queue_t *rqp;
    494   1.1   thorpej 	struct rio_interface *ifacep;
    495   1.1   thorpej 	struct ifnet *ifp;
    496   1.1   thorpej 	int	error = 0;
    497   1.1   thorpej 
    498   1.1   thorpej 	/* check super-user privilege */
    499   1.1   thorpej 	switch (cmd) {
    500   1.1   thorpej 	case RIO_GETSTATS:
    501   1.1   thorpej 		break;
    502   1.1   thorpej 	default:
    503  1.16      elad 		if ((error = kauth_authorize_network(l->l_cred,
    504  1.16      elad 		    KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RIO, NULL,
    505  1.16      elad 		    NULL, NULL)) != 0)
    506  1.26       joe 			return error;
    507   1.1   thorpej 		break;
    508   1.1   thorpej 	}
    509   1.7     perry 
    510   1.1   thorpej 	switch (cmd) {
    511   1.1   thorpej 
    512   1.1   thorpej 	case RIO_ENABLE:
    513   1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    514   1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    515   1.1   thorpej 			error = EBADF;
    516   1.1   thorpej 			break;
    517   1.1   thorpej 		}
    518   1.1   thorpej 		error = altq_enable(rqp->rq_ifq);
    519   1.1   thorpej 		break;
    520   1.1   thorpej 
    521   1.1   thorpej 	case RIO_DISABLE:
    522   1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    523   1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    524   1.1   thorpej 			error = EBADF;
    525   1.1   thorpej 			break;
    526   1.1   thorpej 		}
    527   1.1   thorpej 		error = altq_disable(rqp->rq_ifq);
    528   1.1   thorpej 		break;
    529   1.1   thorpej 
    530   1.1   thorpej 	case RIO_IF_ATTACH:
    531   1.1   thorpej 		ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
    532   1.1   thorpej 		if (ifp == NULL) {
    533   1.1   thorpej 			error = ENXIO;
    534   1.1   thorpej 			break;
    535   1.1   thorpej 		}
    536   1.1   thorpej 
    537   1.1   thorpej 		/* allocate and initialize rio_queue_t */
    538   1.9  christos 		rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
    539   1.1   thorpej 		if (rqp == NULL) {
    540   1.1   thorpej 			error = ENOMEM;
    541   1.1   thorpej 			break;
    542   1.1   thorpej 		}
    543   1.1   thorpej 
    544   1.9  christos 		rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
    545   1.9  christos 		    M_WAITOK|M_ZERO);
    546   1.1   thorpej 		if (rqp->rq_q == NULL) {
    547   1.9  christos 			free(rqp, M_DEVBUF);
    548   1.1   thorpej 			error = ENOMEM;
    549   1.1   thorpej 			break;
    550   1.1   thorpej 		}
    551   1.1   thorpej 
    552   1.1   thorpej 		rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
    553   1.1   thorpej 		if (rqp->rq_rio == NULL) {
    554   1.9  christos 			free(rqp->rq_q, M_DEVBUF);
    555   1.9  christos 			free(rqp, M_DEVBUF);
    556   1.1   thorpej 			error = ENOMEM;
    557   1.1   thorpej 			break;
    558   1.1   thorpej 		}
    559   1.1   thorpej 
    560   1.1   thorpej 		rqp->rq_ifq = &ifp->if_snd;
    561   1.1   thorpej 		qtail(rqp->rq_q) = NULL;
    562   1.1   thorpej 		qlen(rqp->rq_q) = 0;
    563   1.1   thorpej 		qlimit(rqp->rq_q) = RIO_LIMIT;
    564   1.1   thorpej 		qtype(rqp->rq_q) = Q_RIO;
    565   1.1   thorpej 
    566   1.1   thorpej 		/*
    567   1.1   thorpej 		 * set RIO to this ifnet structure.
    568   1.1   thorpej 		 */
    569   1.1   thorpej 		error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
    570   1.1   thorpej 				    rio_enqueue, rio_dequeue, rio_request,
    571   1.1   thorpej 				    NULL, NULL);
    572   1.1   thorpej 		if (error) {
    573   1.1   thorpej 			rio_destroy(rqp->rq_rio);
    574   1.9  christos 			free(rqp->rq_q, M_DEVBUF);
    575   1.9  christos 			free(rqp, M_DEVBUF);
    576   1.1   thorpej 			break;
    577   1.1   thorpej 		}
    578   1.1   thorpej 
    579   1.1   thorpej 		/* add this state to the rio list */
    580   1.1   thorpej 		rqp->rq_next = rio_list;
    581   1.1   thorpej 		rio_list = rqp;
    582   1.1   thorpej 		break;
    583   1.1   thorpej 
    584   1.1   thorpej 	case RIO_IF_DETACH:
    585   1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    586   1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    587   1.1   thorpej 			error = EBADF;
    588   1.1   thorpej 			break;
    589   1.1   thorpej 		}
    590   1.1   thorpej 		error = rio_detach(rqp);
    591   1.1   thorpej 		break;
    592   1.1   thorpej 
    593   1.1   thorpej 	case RIO_GETSTATS:
    594   1.1   thorpej 		do {
    595   1.1   thorpej 			struct rio_stats *q_stats;
    596   1.1   thorpej 			rio_t *rp;
    597   1.1   thorpej 			int i;
    598   1.1   thorpej 
    599   1.1   thorpej 			q_stats = (struct rio_stats *)addr;
    600   1.1   thorpej 			if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
    601   1.1   thorpej 					       ALTQT_RIO)) == NULL) {
    602   1.1   thorpej 				error = EBADF;
    603   1.1   thorpej 				break;
    604   1.1   thorpej 			}
    605   1.1   thorpej 
    606   1.1   thorpej 			rp = rqp->rq_rio;
    607   1.1   thorpej 
    608   1.1   thorpej 			q_stats->q_limit = qlimit(rqp->rq_q);
    609   1.1   thorpej 			q_stats->weight	= rp->rio_weight;
    610   1.1   thorpej 			q_stats->flags = rp->rio_flags;
    611   1.1   thorpej 
    612   1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++) {
    613   1.1   thorpej 				q_stats->q_len[i] = rp->rio_precstate[i].qlen;
    614  1.21   tsutsui 				memcpy(&q_stats->q_stats[i], &rp->q_stats[i],
    615  1.14     peter 				      sizeof(struct redstats));
    616   1.1   thorpej 				q_stats->q_stats[i].q_avg =
    617   1.1   thorpej 				    rp->rio_precstate[i].avg >> rp->rio_wshift;
    618   1.1   thorpej 
    619   1.1   thorpej 				q_stats->q_params[i].inv_pmax
    620   1.1   thorpej 					= rp->rio_precstate[i].inv_pmax;
    621   1.1   thorpej 				q_stats->q_params[i].th_min
    622   1.1   thorpej 					= rp->rio_precstate[i].th_min;
    623   1.1   thorpej 				q_stats->q_params[i].th_max
    624   1.1   thorpej 					= rp->rio_precstate[i].th_max;
    625   1.1   thorpej 			}
    626  1.14     peter 		} while (/*CONSTCOND*/ 0);
    627   1.1   thorpej 		break;
    628   1.1   thorpej 
    629   1.1   thorpej 	case RIO_CONFIG:
    630   1.1   thorpej 		do {
    631   1.1   thorpej 			struct rio_conf *fc;
    632   1.1   thorpej 			rio_t	*new;
    633   1.1   thorpej 			int s, limit, i;
    634   1.1   thorpej 
    635   1.1   thorpej 			fc = (struct rio_conf *)addr;
    636   1.1   thorpej 			if ((rqp = altq_lookup(fc->iface.rio_ifname,
    637   1.1   thorpej 					       ALTQT_RIO)) == NULL) {
    638   1.1   thorpej 				error = EBADF;
    639   1.1   thorpej 				break;
    640   1.1   thorpej 			}
    641   1.1   thorpej 
    642   1.1   thorpej 			new = rio_alloc(fc->rio_weight, &fc->q_params[0],
    643   1.1   thorpej 					fc->rio_flags, fc->rio_pkttime);
    644   1.1   thorpej 			if (new == NULL) {
    645   1.1   thorpej 				error = ENOMEM;
    646   1.1   thorpej 				break;
    647   1.1   thorpej 			}
    648   1.1   thorpej 
    649   1.3   thorpej 			s = splnet();
    650   1.1   thorpej 			_flushq(rqp->rq_q);
    651   1.1   thorpej 			limit = fc->rio_limit;
    652   1.1   thorpej 			if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
    653   1.1   thorpej 				limit = fc->q_params[RIO_NDROPPREC-1].th_max;
    654   1.1   thorpej 			qlimit(rqp->rq_q) = limit;
    655   1.1   thorpej 
    656   1.1   thorpej 			rio_destroy(rqp->rq_rio);
    657   1.1   thorpej 			rqp->rq_rio = new;
    658   1.1   thorpej 
    659   1.1   thorpej 			splx(s);
    660   1.1   thorpej 
    661   1.1   thorpej 			/* write back new values */
    662   1.1   thorpej 			fc->rio_limit = limit;
    663   1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++) {
    664   1.1   thorpej 				fc->q_params[i].inv_pmax =
    665   1.1   thorpej 					rqp->rq_rio->rio_precstate[i].inv_pmax;
    666   1.1   thorpej 				fc->q_params[i].th_min =
    667   1.1   thorpej 					rqp->rq_rio->rio_precstate[i].th_min;
    668   1.1   thorpej 				fc->q_params[i].th_max =
    669   1.1   thorpej 					rqp->rq_rio->rio_precstate[i].th_max;
    670   1.1   thorpej 			}
    671  1.14     peter 		} while (/*CONSTCOND*/ 0);
    672   1.1   thorpej 		break;
    673   1.1   thorpej 
    674   1.1   thorpej 	case RIO_SETDEFAULTS:
    675   1.1   thorpej 		do {
    676   1.1   thorpej 			struct redparams *rp;
    677   1.1   thorpej 			int i;
    678   1.1   thorpej 
    679   1.1   thorpej 			rp = (struct redparams *)addr;
    680   1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++)
    681   1.1   thorpej 				default_rio_params[i] = rp[i];
    682  1.14     peter 		} while (/*CONSTCOND*/ 0);
    683   1.1   thorpej 		break;
    684   1.1   thorpej 
    685   1.1   thorpej 	default:
    686   1.1   thorpej 		error = EINVAL;
    687   1.1   thorpej 		break;
    688   1.1   thorpej 	}
    689   1.1   thorpej 
    690   1.1   thorpej 	return error;
    691   1.1   thorpej }
    692   1.1   thorpej 
    693   1.1   thorpej static int
    694  1.14     peter rio_detach(rio_queue_t *rqp)
    695   1.1   thorpej {
    696   1.1   thorpej 	rio_queue_t *tmp;
    697   1.1   thorpej 	int error = 0;
    698   1.1   thorpej 
    699   1.1   thorpej 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
    700   1.1   thorpej 		altq_disable(rqp->rq_ifq);
    701   1.1   thorpej 
    702   1.1   thorpej 	if ((error = altq_detach(rqp->rq_ifq)))
    703  1.26       joe 		return error;
    704   1.1   thorpej 
    705   1.1   thorpej 	if (rio_list == rqp)
    706   1.1   thorpej 		rio_list = rqp->rq_next;
    707   1.1   thorpej 	else {
    708   1.1   thorpej 		for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
    709   1.1   thorpej 			if (tmp->rq_next == rqp) {
    710   1.1   thorpej 				tmp->rq_next = rqp->rq_next;
    711   1.1   thorpej 				break;
    712   1.1   thorpej 			}
    713   1.1   thorpej 		if (tmp == NULL)
    714   1.1   thorpej 			printf("rio_detach: no state found in rio_list!\n");
    715   1.1   thorpej 	}
    716   1.1   thorpej 
    717   1.1   thorpej 	rio_destroy(rqp->rq_rio);
    718   1.9  christos 	free(rqp->rq_q, M_DEVBUF);
    719   1.9  christos 	free(rqp, M_DEVBUF);
    720  1.26       joe 	return error;
    721   1.1   thorpej }
    722   1.1   thorpej 
    723   1.1   thorpej /*
    724   1.1   thorpej  * rio support routines
    725   1.1   thorpej  */
    726   1.1   thorpej static int
    727  1.17  christos rio_request(struct ifaltq *ifq, int req, void *arg)
    728   1.1   thorpej {
    729   1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    730   1.1   thorpej 
    731   1.1   thorpej 	switch (req) {
    732   1.1   thorpej 	case ALTRQ_PURGE:
    733   1.1   thorpej 		_flushq(rqp->rq_q);
    734   1.1   thorpej 		if (ALTQ_IS_ENABLED(ifq))
    735   1.1   thorpej 			ifq->ifq_len = 0;
    736   1.1   thorpej 		break;
    737   1.1   thorpej 	}
    738  1.26       joe 	return 0;
    739   1.1   thorpej }
    740   1.1   thorpej 
    741   1.1   thorpej /*
    742   1.1   thorpej  * enqueue routine:
    743   1.1   thorpej  *
    744   1.1   thorpej  *	returns: 0 when successfully queued.
    745   1.1   thorpej  *		 ENOBUFS when drop occurs.
    746   1.1   thorpej  */
    747   1.1   thorpej static int
    748  1.22  knakahar rio_enqueue(struct ifaltq *ifq, struct mbuf *m)
    749   1.1   thorpej {
    750  1.22  knakahar 	struct altq_pktattr pktattr;
    751   1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    752   1.1   thorpej 	int error = 0;
    753   1.1   thorpej 
    754  1.22  knakahar 	pktattr.pattr_class = m->m_pkthdr.pattr_class;
    755  1.22  knakahar 	pktattr.pattr_af = m->m_pkthdr.pattr_af;
    756  1.22  knakahar 	pktattr.pattr_hdr = m->m_pkthdr.pattr_hdr;
    757  1.22  knakahar 
    758  1.22  knakahar 	if (rio_addq(rqp->rq_rio, rqp->rq_q, m, &pktattr) == 0)
    759   1.1   thorpej 		ifq->ifq_len++;
    760   1.1   thorpej 	else
    761   1.1   thorpej 		error = ENOBUFS;
    762   1.1   thorpej 	return error;
    763   1.1   thorpej }
    764   1.1   thorpej 
    765   1.1   thorpej /*
    766   1.1   thorpej  * dequeue routine:
    767   1.3   thorpej  *	must be called in splnet.
    768   1.1   thorpej  *
    769   1.1   thorpej  *	returns: mbuf dequeued.
    770   1.1   thorpej  *		 NULL when no packet is available in the queue.
    771   1.1   thorpej  */
    772   1.1   thorpej 
    773   1.1   thorpej static struct mbuf *
    774  1.14     peter rio_dequeue(struct ifaltq *ifq, int op)
    775   1.1   thorpej {
    776   1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    777   1.1   thorpej 	struct mbuf *m = NULL;
    778   1.1   thorpej 
    779   1.1   thorpej 	if (op == ALTDQ_POLL)
    780   1.1   thorpej 		return qhead(rqp->rq_q);
    781   1.1   thorpej 
    782   1.1   thorpej 	m = rio_getq(rqp->rq_rio, rqp->rq_q);
    783   1.1   thorpej 	if (m != NULL)
    784   1.1   thorpej 		ifq->ifq_len--;
    785   1.1   thorpej 	return m;
    786   1.1   thorpej }
    787   1.1   thorpej 
    788   1.1   thorpej #ifdef KLD_MODULE
    789   1.1   thorpej 
    790   1.1   thorpej static struct altqsw rio_sw =
    791   1.1   thorpej 	{"rio", rioopen, rioclose, rioioctl};
    792   1.1   thorpej 
    793   1.1   thorpej ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
    794  1.14     peter MODULE_VERSION(altq_rio, 1);
    795  1.14     peter MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1);
    796   1.1   thorpej 
    797   1.1   thorpej #endif /* KLD_MODULE */
    798  1.14     peter #endif /* ALTQ3_COMPAT */
    799   1.1   thorpej 
    800   1.1   thorpej #endif /* ALTQ_RIO */
    801