Home | History | Annotate | Line # | Download | only in altq
altq_rio.c revision 1.18.2.1
      1  1.18.2.1     rmind /*	$NetBSD: altq_rio.c,v 1.18.2.1 2007/03/12 05:45:06 rmind Exp $	*/
      2      1.14     peter /*	$KAME: altq_rio.c,v 1.19 2005/04/13 03:44:25 suz Exp $	*/
      3       1.1   thorpej 
      4       1.1   thorpej /*
      5      1.14     peter  * Copyright (C) 1998-2003
      6       1.1   thorpej  *	Sony Computer Science Laboratories Inc.  All rights reserved.
      7       1.1   thorpej  *
      8       1.1   thorpej  * Redistribution and use in source and binary forms, with or without
      9       1.1   thorpej  * modification, are permitted provided that the following conditions
     10       1.1   thorpej  * are met:
     11       1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     12       1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     13       1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     14       1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     15       1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     16       1.1   thorpej  *
     17       1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
     18       1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19       1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20       1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
     21       1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22       1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23       1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24       1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25       1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26       1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27       1.1   thorpej  * SUCH DAMAGE.
     28       1.1   thorpej  */
     29       1.1   thorpej /*
     30       1.1   thorpej  * Copyright (c) 1990-1994 Regents of the University of California.
     31       1.1   thorpej  * All rights reserved.
     32       1.1   thorpej  *
     33       1.1   thorpej  * Redistribution and use in source and binary forms, with or without
     34       1.1   thorpej  * modification, are permitted provided that the following conditions
     35       1.1   thorpej  * are met:
     36       1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     37       1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     38       1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     39       1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     40       1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     41       1.1   thorpej  * 3. All advertising materials mentioning features or use of this software
     42       1.1   thorpej  *    must display the following acknowledgement:
     43       1.1   thorpej  *	This product includes software developed by the Computer Systems
     44       1.1   thorpej  *	Engineering Group at Lawrence Berkeley Laboratory.
     45       1.1   thorpej  * 4. Neither the name of the University nor of the Laboratory may be used
     46       1.1   thorpej  *    to endorse or promote products derived from this software without
     47       1.1   thorpej  *    specific prior written permission.
     48       1.1   thorpej  *
     49       1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     50       1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     51       1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     52       1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     53       1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     54       1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     55       1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     56       1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     57       1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     58       1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     59       1.1   thorpej  * SUCH DAMAGE.
     60       1.1   thorpej  */
     61       1.4     lukem 
     62       1.4     lukem #include <sys/cdefs.h>
     63  1.18.2.1     rmind __KERNEL_RCSID(0, "$NetBSD: altq_rio.c,v 1.18.2.1 2007/03/12 05:45:06 rmind Exp $");
     64       1.1   thorpej 
     65      1.14     peter #ifdef _KERNEL_OPT
     66       1.1   thorpej #include "opt_altq.h"
     67       1.1   thorpej #include "opt_inet.h"
     68      1.15     peter #include "pf.h"
     69       1.1   thorpej #endif
     70      1.14     peter 
     71       1.1   thorpej #ifdef ALTQ_RIO	/* rio is enabled by ALTQ_RIO option in opt_altq.h */
     72       1.1   thorpej 
     73       1.1   thorpej #include <sys/param.h>
     74       1.1   thorpej #include <sys/malloc.h>
     75       1.1   thorpej #include <sys/mbuf.h>
     76       1.1   thorpej #include <sys/socket.h>
     77       1.1   thorpej #include <sys/systm.h>
     78      1.14     peter #include <sys/errno.h>
     79      1.14     peter #include <sys/kauth.h>
     80      1.14     peter #if 1 /* ALTQ3_COMPAT */
     81       1.1   thorpej #include <sys/proc.h>
     82      1.14     peter #include <sys/sockio.h>
     83       1.1   thorpej #include <sys/kernel.h>
     84      1.14     peter #endif
     85       1.1   thorpej 
     86       1.1   thorpej #include <net/if.h>
     87       1.1   thorpej 
     88       1.1   thorpej #include <netinet/in.h>
     89       1.1   thorpej #include <netinet/in_systm.h>
     90       1.1   thorpej #include <netinet/ip.h>
     91       1.1   thorpej #ifdef INET6
     92       1.1   thorpej #include <netinet/ip6.h>
     93       1.1   thorpej #endif
     94       1.1   thorpej 
     95      1.15     peter #if NPF > 0
     96      1.14     peter #include <net/pfvar.h>
     97      1.15     peter #endif
     98       1.1   thorpej #include <altq/altq.h>
     99       1.1   thorpej #include <altq/altq_cdnr.h>
    100       1.1   thorpej #include <altq/altq_red.h>
    101       1.1   thorpej #include <altq/altq_rio.h>
    102      1.14     peter #ifdef ALTQ3_COMPAT
    103      1.14     peter #include <altq/altq_conf.h>
    104      1.14     peter #endif
    105       1.1   thorpej 
    106       1.1   thorpej /*
    107       1.1   thorpej  * RIO: RED with IN/OUT bit
    108       1.1   thorpej  *   described in
    109       1.1   thorpej  *	"Explicit Allocation of Best Effort Packet Delivery Service"
    110       1.1   thorpej  *	David D. Clark and Wenjia Fang, MIT Lab for Computer Science
    111       1.1   thorpej  *	http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
    112       1.1   thorpej  *
    113       1.1   thorpej  * this implementation is extended to support more than 2 drop precedence
    114       1.1   thorpej  * values as described in RFC2597 (Assured Forwarding PHB Group).
    115       1.1   thorpej  *
    116       1.1   thorpej  */
    117       1.1   thorpej /*
    118       1.1   thorpej  * AF DS (differentiated service) codepoints.
    119       1.1   thorpej  * (classes can be mapped to CBQ or H-FSC classes.)
    120       1.7     perry  *
    121       1.1   thorpej  *      0   1   2   3   4   5   6   7
    122       1.1   thorpej  *    +---+---+---+---+---+---+---+---+
    123       1.1   thorpej  *    |   CLASS   |DropPre| 0 |  CU   |
    124       1.1   thorpej  *    +---+---+---+---+---+---+---+---+
    125       1.1   thorpej  *
    126       1.1   thorpej  *    class 1: 001
    127       1.1   thorpej  *    class 2: 010
    128       1.1   thorpej  *    class 3: 011
    129       1.1   thorpej  *    class 4: 100
    130       1.1   thorpej  *
    131       1.1   thorpej  *    low drop prec:    01
    132       1.1   thorpej  *    medium drop prec: 10
    133      1.14     peter  *    high drop prec:   11
    134       1.1   thorpej  */
    135       1.1   thorpej 
    136       1.1   thorpej /* normal red parameters */
    137       1.1   thorpej #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
    138       1.1   thorpej 				/* q_weight = 0.00195 */
    139       1.1   thorpej 
    140       1.1   thorpej /* red parameters for a slow link */
    141       1.1   thorpej #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
    142       1.1   thorpej 				/* q_weight = 0.0078125 */
    143       1.1   thorpej 
    144       1.1   thorpej /* red parameters for a very slow link (e.g., dialup) */
    145       1.1   thorpej #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
    146       1.1   thorpej 				/* q_weight = 0.015625 */
    147       1.1   thorpej 
    148       1.1   thorpej /* fixed-point uses 12-bit decimal places */
    149       1.1   thorpej #define	FP_SHIFT	12	/* fixed-point shift */
    150       1.1   thorpej 
    151       1.1   thorpej /* red parameters for drop probability */
    152       1.1   thorpej #define	INV_P_MAX	10	/* inverse of max drop probability */
    153       1.1   thorpej #define	TH_MIN		 5	/* min threshold */
    154       1.1   thorpej #define	TH_MAX		15	/* max threshold */
    155       1.1   thorpej 
    156      1.14     peter #define	RIO_LIMIT	60	/* default max queue lenght */
    157      1.14     peter #define	RIO_STATS		/* collect statistics */
    158       1.1   thorpej 
    159       1.1   thorpej #define	TV_DELTA(a, b, delta) {					\
    160       1.1   thorpej 	register int	xxs;					\
    161       1.1   thorpej 								\
    162       1.1   thorpej 	delta = (a)->tv_usec - (b)->tv_usec; 			\
    163       1.1   thorpej 	if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { 		\
    164       1.1   thorpej 		if (xxs < 0) { 					\
    165       1.1   thorpej 			delta = 60000000;			\
    166       1.1   thorpej 		} else if (xxs > 4)  {				\
    167       1.1   thorpej 			if (xxs > 60)				\
    168       1.1   thorpej 				delta = 60000000;		\
    169       1.1   thorpej 			else					\
    170       1.1   thorpej 				delta += xxs * 1000000;		\
    171       1.1   thorpej 		} else while (xxs > 0) {			\
    172       1.1   thorpej 			delta += 1000000;			\
    173       1.1   thorpej 			xxs--;					\
    174       1.1   thorpej 		}						\
    175       1.1   thorpej 	}							\
    176       1.1   thorpej }
    177       1.1   thorpej 
    178      1.14     peter #ifdef ALTQ3_COMPAT
    179       1.1   thorpej /* rio_list keeps all rio_queue_t's allocated. */
    180       1.1   thorpej static rio_queue_t *rio_list = NULL;
    181      1.14     peter #endif
    182       1.1   thorpej /* default rio parameter values */
    183       1.1   thorpej static struct redparams default_rio_params[RIO_NDROPPREC] = {
    184       1.1   thorpej   /* th_min,		 th_max,     inv_pmax */
    185       1.1   thorpej   { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
    186       1.1   thorpej   { TH_MAX + TH_MIN,	 TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
    187       1.1   thorpej   { TH_MIN,		 TH_MAX,     INV_P_MAX }  /* high drop precedence */
    188       1.1   thorpej };
    189       1.1   thorpej 
    190       1.1   thorpej /* internal function prototypes */
    191      1.14     peter static int dscp2index(u_int8_t);
    192      1.14     peter #ifdef ALTQ3_COMPAT
    193      1.14     peter static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
    194      1.14     peter static struct mbuf *rio_dequeue(struct ifaltq *, int);
    195      1.14     peter static int rio_request(struct ifaltq *, int, void *);
    196      1.14     peter static int rio_detach(rio_queue_t *);
    197      1.14     peter 
    198      1.14     peter /*
    199      1.14     peter  * rio device interface
    200      1.14     peter  */
    201      1.14     peter altqdev_decl(rio);
    202      1.14     peter 
    203      1.14     peter #endif /* ALTQ3_COMPAT */
    204      1.14     peter 
    205      1.14     peter rio_t *
    206      1.14     peter rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
    207      1.14     peter {
    208      1.14     peter 	rio_t	*rp;
    209      1.14     peter 	int	 w, i;
    210      1.14     peter 	int	 npkts_per_sec;
    211      1.14     peter 
    212      1.14     peter 	rp = malloc(sizeof(rio_t), M_DEVBUF, M_WAITOK|M_ZERO);
    213      1.14     peter 	if (rp == NULL)
    214      1.14     peter 		return (NULL);
    215      1.14     peter 
    216      1.14     peter 	rp->rio_flags = flags;
    217      1.14     peter 	if (pkttime == 0)
    218      1.14     peter 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
    219      1.14     peter 		rp->rio_pkttime = 800;
    220      1.14     peter 	else
    221      1.14     peter 		rp->rio_pkttime = pkttime;
    222      1.14     peter 
    223      1.14     peter 	if (weight != 0)
    224      1.14     peter 		rp->rio_weight = weight;
    225      1.14     peter 	else {
    226      1.14     peter 		/* use default */
    227      1.14     peter 		rp->rio_weight = W_WEIGHT;
    228      1.14     peter 
    229      1.14     peter 		/* when the link is very slow, adjust red parameters */
    230      1.14     peter 		npkts_per_sec = 1000000 / rp->rio_pkttime;
    231      1.14     peter 		if (npkts_per_sec < 50) {
    232      1.14     peter 			/* up to about 400Kbps */
    233      1.14     peter 			rp->rio_weight = W_WEIGHT_2;
    234      1.14     peter 		} else if (npkts_per_sec < 300) {
    235      1.14     peter 			/* up to about 2.4Mbps */
    236      1.14     peter 			rp->rio_weight = W_WEIGHT_1;
    237      1.14     peter 		}
    238      1.14     peter 	}
    239      1.14     peter 
    240      1.14     peter 	/* calculate wshift.  weight must be power of 2 */
    241      1.14     peter 	w = rp->rio_weight;
    242      1.14     peter 	for (i = 0; w > 1; i++)
    243      1.14     peter 		w = w >> 1;
    244      1.14     peter 	rp->rio_wshift = i;
    245      1.14     peter 	w = 1 << rp->rio_wshift;
    246      1.14     peter 	if (w != rp->rio_weight) {
    247      1.14     peter 		printf("invalid weight value %d for red! use %d\n",
    248      1.14     peter 		       rp->rio_weight, w);
    249      1.14     peter 		rp->rio_weight = w;
    250      1.14     peter 	}
    251      1.14     peter 
    252      1.14     peter 	/* allocate weight table */
    253      1.14     peter 	rp->rio_wtab = wtab_alloc(rp->rio_weight);
    254      1.14     peter 
    255      1.14     peter 	for (i = 0; i < RIO_NDROPPREC; i++) {
    256      1.14     peter 		struct dropprec_state *prec = &rp->rio_precstate[i];
    257      1.14     peter 
    258      1.14     peter 		prec->avg = 0;
    259      1.14     peter 		prec->idle = 1;
    260      1.14     peter 
    261      1.14     peter 		if (params == NULL || params[i].inv_pmax == 0)
    262      1.14     peter 			prec->inv_pmax = default_rio_params[i].inv_pmax;
    263      1.14     peter 		else
    264      1.14     peter 			prec->inv_pmax = params[i].inv_pmax;
    265      1.14     peter 		if (params == NULL || params[i].th_min == 0)
    266      1.14     peter 			prec->th_min = default_rio_params[i].th_min;
    267      1.14     peter 		else
    268      1.14     peter 			prec->th_min = params[i].th_min;
    269      1.14     peter 		if (params == NULL || params[i].th_max == 0)
    270      1.14     peter 			prec->th_max = default_rio_params[i].th_max;
    271      1.14     peter 		else
    272      1.14     peter 			prec->th_max = params[i].th_max;
    273      1.14     peter 
    274      1.14     peter 		/*
    275      1.14     peter 		 * th_min_s and th_max_s are scaled versions of th_min
    276      1.14     peter 		 * and th_max to be compared with avg.
    277      1.14     peter 		 */
    278      1.14     peter 		prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
    279      1.14     peter 		prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
    280      1.14     peter 
    281      1.14     peter 		/*
    282      1.14     peter 		 * precompute probability denominator
    283      1.14     peter 		 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
    284      1.14     peter 		 */
    285      1.14     peter 		prec->probd = (2 * (prec->th_max - prec->th_min)
    286      1.14     peter 			       * prec->inv_pmax) << FP_SHIFT;
    287      1.14     peter 
    288      1.14     peter 		microtime(&prec->last);
    289      1.14     peter 	}
    290      1.14     peter 
    291      1.14     peter 	return (rp);
    292      1.14     peter }
    293      1.14     peter 
    294      1.14     peter void
    295      1.14     peter rio_destroy(rio_t *rp)
    296      1.14     peter {
    297      1.14     peter 	wtab_destroy(rp->rio_wtab);
    298      1.14     peter 	free(rp, M_DEVBUF);
    299      1.14     peter }
    300      1.14     peter 
    301      1.14     peter void
    302      1.14     peter rio_getstats(rio_t *rp, struct redstats *sp)
    303      1.14     peter {
    304      1.14     peter 	int	i;
    305      1.14     peter 
    306      1.14     peter 	for (i = 0; i < RIO_NDROPPREC; i++) {
    307      1.14     peter 		bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
    308      1.14     peter 		sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
    309      1.14     peter 		sp++;
    310      1.14     peter 	}
    311      1.14     peter }
    312      1.14     peter 
    313      1.14     peter #if (RIO_NDROPPREC == 3)
    314      1.14     peter /*
    315      1.14     peter  * internally, a drop precedence value is converted to an index
    316      1.14     peter  * starting from 0.
    317      1.14     peter  */
    318      1.14     peter static int
    319      1.14     peter dscp2index(u_int8_t dscp)
    320      1.14     peter {
    321      1.14     peter 	int	dpindex = dscp & AF_DROPPRECMASK;
    322      1.14     peter 
    323      1.14     peter 	if (dpindex == 0)
    324      1.14     peter 		return (0);
    325      1.14     peter 	return ((dpindex >> 3) - 1);
    326      1.14     peter }
    327      1.14     peter #endif
    328      1.14     peter 
    329      1.14     peter #if 1
    330      1.14     peter /*
    331      1.14     peter  * kludge: when a packet is dequeued, we need to know its drop precedence
    332      1.14     peter  * in order to keep the queue length of each drop precedence.
    333      1.14     peter  * use m_pkthdr.rcvif to pass this info.
    334      1.14     peter  */
    335      1.14     peter #define	RIOM_SET_PRECINDEX(m, idx)	\
    336      1.14     peter 	do { (m)->m_pkthdr.rcvif = (struct ifnet *)((long)(idx)); } while (0)
    337      1.14     peter #define	RIOM_GET_PRECINDEX(m)	\
    338      1.14     peter 	({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
    339      1.14     peter 	(m)->m_pkthdr.rcvif = NULL; idx; })
    340      1.14     peter #endif
    341      1.14     peter 
    342      1.14     peter int
    343      1.14     peter rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
    344      1.14     peter     struct altq_pktattr *pktattr)
    345      1.14     peter {
    346      1.14     peter 	int			 avg, droptype;
    347      1.14     peter 	u_int8_t		 dsfield, odsfield;
    348      1.14     peter 	int			 dpindex, i, n, t;
    349      1.14     peter 	struct timeval		 now;
    350      1.14     peter 	struct dropprec_state	*prec;
    351      1.14     peter 
    352      1.14     peter 	dsfield = odsfield = read_dsfield(m, pktattr);
    353      1.14     peter 	dpindex = dscp2index(dsfield);
    354      1.14     peter 
    355      1.14     peter 	/*
    356      1.14     peter 	 * update avg of the precedence states whose drop precedence
    357      1.14     peter 	 * is larger than or equal to the drop precedence of the packet
    358      1.14     peter 	 */
    359      1.14     peter 	now.tv_sec = 0;
    360      1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
    361      1.14     peter 		prec = &rp->rio_precstate[i];
    362      1.14     peter 		avg = prec->avg;
    363      1.14     peter 		if (prec->idle) {
    364      1.14     peter 			prec->idle = 0;
    365      1.14     peter 			if (now.tv_sec == 0)
    366      1.14     peter 				microtime(&now);
    367      1.14     peter 			t = (now.tv_sec - prec->last.tv_sec);
    368      1.14     peter 			if (t > 60)
    369      1.14     peter 				avg = 0;
    370      1.14     peter 			else {
    371      1.14     peter 				t = t * 1000000 +
    372      1.14     peter 					(now.tv_usec - prec->last.tv_usec);
    373      1.14     peter 				n = t / rp->rio_pkttime;
    374      1.14     peter 				/* calculate (avg = (1 - Wq)^n * avg) */
    375      1.14     peter 				if (n > 0)
    376      1.14     peter 					avg = (avg >> FP_SHIFT) *
    377      1.14     peter 						pow_w(rp->rio_wtab, n);
    378      1.14     peter 			}
    379      1.14     peter 		}
    380      1.14     peter 
    381      1.14     peter 		/* run estimator. (avg is scaled by WEIGHT in fixed-point) */
    382      1.14     peter 		avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
    383      1.14     peter 		prec->avg = avg;		/* save the new value */
    384      1.14     peter 		/*
    385      1.14     peter 		 * count keeps a tally of arriving traffic that has not
    386      1.14     peter 		 * been dropped.
    387      1.14     peter 		 */
    388      1.14     peter 		prec->count++;
    389      1.14     peter 	}
    390      1.14     peter 
    391      1.14     peter 	prec = &rp->rio_precstate[dpindex];
    392      1.14     peter 	avg = prec->avg;
    393      1.14     peter 
    394      1.14     peter 	/* see if we drop early */
    395      1.14     peter 	droptype = DTYPE_NODROP;
    396      1.14     peter 	if (avg >= prec->th_min_s && prec->qlen > 1) {
    397      1.14     peter 		if (avg >= prec->th_max_s) {
    398      1.14     peter 			/* avg >= th_max: forced drop */
    399      1.14     peter 			droptype = DTYPE_FORCED;
    400      1.14     peter 		} else if (prec->old == 0) {
    401      1.14     peter 			/* first exceeds th_min */
    402      1.14     peter 			prec->count = 1;
    403      1.14     peter 			prec->old = 1;
    404      1.14     peter 		} else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
    405      1.14     peter 				      prec->probd, prec->count)) {
    406      1.14     peter 			/* unforced drop by red */
    407      1.14     peter 			droptype = DTYPE_EARLY;
    408      1.14     peter 		}
    409      1.14     peter 	} else {
    410      1.14     peter 		/* avg < th_min */
    411      1.14     peter 		prec->old = 0;
    412      1.14     peter 	}
    413      1.14     peter 
    414      1.14     peter 	/*
    415      1.14     peter 	 * if the queue length hits the hard limit, it's a forced drop.
    416      1.14     peter 	 */
    417      1.14     peter 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
    418      1.14     peter 		droptype = DTYPE_FORCED;
    419      1.14     peter 
    420      1.14     peter 	if (droptype != DTYPE_NODROP) {
    421      1.14     peter 		/* always drop incoming packet (as opposed to randomdrop) */
    422      1.14     peter 		for (i = dpindex; i < RIO_NDROPPREC; i++)
    423      1.14     peter 			rp->rio_precstate[i].count = 0;
    424      1.14     peter #ifdef RIO_STATS
    425      1.14     peter 		if (droptype == DTYPE_EARLY)
    426      1.14     peter 			rp->q_stats[dpindex].drop_unforced++;
    427      1.14     peter 		else
    428      1.14     peter 			rp->q_stats[dpindex].drop_forced++;
    429      1.14     peter 		PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
    430      1.14     peter #endif
    431      1.14     peter 		m_freem(m);
    432      1.14     peter 		return (-1);
    433      1.14     peter 	}
    434      1.14     peter 
    435      1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++)
    436      1.14     peter 		rp->rio_precstate[i].qlen++;
    437      1.14     peter 
    438      1.14     peter 	/* save drop precedence index in mbuf hdr */
    439      1.14     peter 	RIOM_SET_PRECINDEX(m, dpindex);
    440      1.14     peter 
    441      1.14     peter 	if (rp->rio_flags & RIOF_CLEARDSCP)
    442      1.14     peter 		dsfield &= ~DSCP_MASK;
    443      1.14     peter 
    444      1.14     peter 	if (dsfield != odsfield)
    445      1.14     peter 		write_dsfield(m, pktattr, dsfield);
    446      1.14     peter 
    447      1.14     peter 	_addq(q, m);
    448      1.14     peter 
    449      1.14     peter #ifdef RIO_STATS
    450      1.14     peter 	PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
    451      1.14     peter #endif
    452      1.14     peter 	return (0);
    453      1.14     peter }
    454      1.14     peter 
    455      1.14     peter struct mbuf *
    456      1.14     peter rio_getq(rio_t *rp, class_queue_t *q)
    457      1.14     peter {
    458      1.14     peter 	struct mbuf	*m;
    459      1.14     peter 	int		 dpindex, i;
    460      1.14     peter 
    461      1.14     peter 	if ((m = _getq(q)) == NULL)
    462      1.14     peter 		return NULL;
    463      1.14     peter 
    464      1.14     peter 	dpindex = RIOM_GET_PRECINDEX(m);
    465      1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
    466      1.14     peter 		if (--rp->rio_precstate[i].qlen == 0) {
    467      1.14     peter 			if (rp->rio_precstate[i].idle == 0) {
    468      1.14     peter 				rp->rio_precstate[i].idle = 1;
    469      1.14     peter 				microtime(&rp->rio_precstate[i].last);
    470      1.14     peter 			}
    471      1.14     peter 		}
    472      1.14     peter 	}
    473      1.14     peter 	return (m);
    474      1.14     peter }
    475       1.1   thorpej 
    476      1.14     peter #ifdef ALTQ3_COMPAT
    477       1.1   thorpej int
    478      1.17  christos rioopen(dev_t dev, int flag, int fmt,
    479      1.17  christos     struct lwp *l)
    480       1.1   thorpej {
    481       1.1   thorpej 	/* everything will be done when the queueing scheme is attached. */
    482       1.1   thorpej 	return 0;
    483       1.1   thorpej }
    484       1.1   thorpej 
    485       1.1   thorpej int
    486      1.17  christos rioclose(dev_t dev, int flag, int fmt,
    487      1.17  christos     struct lwp *l)
    488       1.1   thorpej {
    489       1.1   thorpej 	rio_queue_t *rqp;
    490       1.1   thorpej 	int err, error = 0;
    491       1.1   thorpej 
    492       1.1   thorpej 	while ((rqp = rio_list) != NULL) {
    493       1.1   thorpej 		/* destroy all */
    494       1.1   thorpej 		err = rio_detach(rqp);
    495       1.1   thorpej 		if (err != 0 && error == 0)
    496       1.1   thorpej 			error = err;
    497       1.1   thorpej 	}
    498       1.1   thorpej 
    499       1.1   thorpej 	return error;
    500       1.1   thorpej }
    501       1.1   thorpej 
    502       1.1   thorpej int
    503  1.18.2.1     rmind rioioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
    504      1.13  christos     struct lwp *l)
    505       1.1   thorpej {
    506       1.1   thorpej 	rio_queue_t *rqp;
    507       1.1   thorpej 	struct rio_interface *ifacep;
    508       1.1   thorpej 	struct ifnet *ifp;
    509       1.1   thorpej 	int	error = 0;
    510       1.1   thorpej 
    511       1.1   thorpej 	/* check super-user privilege */
    512       1.1   thorpej 	switch (cmd) {
    513       1.1   thorpej 	case RIO_GETSTATS:
    514       1.1   thorpej 		break;
    515       1.1   thorpej 	default:
    516       1.1   thorpej #if (__FreeBSD_version > 400000)
    517       1.1   thorpej 		if ((error = suser(p)) != 0)
    518       1.1   thorpej 			return (error);
    519       1.1   thorpej #else
    520      1.16      elad 		if ((error = kauth_authorize_network(l->l_cred,
    521      1.16      elad 		    KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RIO, NULL,
    522      1.16      elad 		    NULL, NULL)) != 0)
    523       1.1   thorpej 			return (error);
    524       1.1   thorpej #endif
    525       1.1   thorpej 		break;
    526       1.1   thorpej 	}
    527       1.7     perry 
    528       1.1   thorpej 	switch (cmd) {
    529       1.1   thorpej 
    530       1.1   thorpej 	case RIO_ENABLE:
    531       1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    532       1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    533       1.1   thorpej 			error = EBADF;
    534       1.1   thorpej 			break;
    535       1.1   thorpej 		}
    536       1.1   thorpej 		error = altq_enable(rqp->rq_ifq);
    537       1.1   thorpej 		break;
    538       1.1   thorpej 
    539       1.1   thorpej 	case RIO_DISABLE:
    540       1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    541       1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    542       1.1   thorpej 			error = EBADF;
    543       1.1   thorpej 			break;
    544       1.1   thorpej 		}
    545       1.1   thorpej 		error = altq_disable(rqp->rq_ifq);
    546       1.1   thorpej 		break;
    547       1.1   thorpej 
    548       1.1   thorpej 	case RIO_IF_ATTACH:
    549       1.1   thorpej 		ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
    550       1.1   thorpej 		if (ifp == NULL) {
    551       1.1   thorpej 			error = ENXIO;
    552       1.1   thorpej 			break;
    553       1.1   thorpej 		}
    554       1.1   thorpej 
    555       1.1   thorpej 		/* allocate and initialize rio_queue_t */
    556       1.9  christos 		rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
    557       1.1   thorpej 		if (rqp == NULL) {
    558       1.1   thorpej 			error = ENOMEM;
    559       1.1   thorpej 			break;
    560       1.1   thorpej 		}
    561       1.1   thorpej 
    562       1.9  christos 		rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
    563       1.9  christos 		    M_WAITOK|M_ZERO);
    564       1.1   thorpej 		if (rqp->rq_q == NULL) {
    565       1.9  christos 			free(rqp, M_DEVBUF);
    566       1.1   thorpej 			error = ENOMEM;
    567       1.1   thorpej 			break;
    568       1.1   thorpej 		}
    569       1.1   thorpej 
    570       1.1   thorpej 		rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
    571       1.1   thorpej 		if (rqp->rq_rio == NULL) {
    572       1.9  christos 			free(rqp->rq_q, M_DEVBUF);
    573       1.9  christos 			free(rqp, M_DEVBUF);
    574       1.1   thorpej 			error = ENOMEM;
    575       1.1   thorpej 			break;
    576       1.1   thorpej 		}
    577       1.1   thorpej 
    578       1.1   thorpej 		rqp->rq_ifq = &ifp->if_snd;
    579       1.1   thorpej 		qtail(rqp->rq_q) = NULL;
    580       1.1   thorpej 		qlen(rqp->rq_q) = 0;
    581       1.1   thorpej 		qlimit(rqp->rq_q) = RIO_LIMIT;
    582       1.1   thorpej 		qtype(rqp->rq_q) = Q_RIO;
    583       1.1   thorpej 
    584       1.1   thorpej 		/*
    585       1.1   thorpej 		 * set RIO to this ifnet structure.
    586       1.1   thorpej 		 */
    587       1.1   thorpej 		error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
    588       1.1   thorpej 				    rio_enqueue, rio_dequeue, rio_request,
    589       1.1   thorpej 				    NULL, NULL);
    590       1.1   thorpej 		if (error) {
    591       1.1   thorpej 			rio_destroy(rqp->rq_rio);
    592       1.9  christos 			free(rqp->rq_q, M_DEVBUF);
    593       1.9  christos 			free(rqp, M_DEVBUF);
    594       1.1   thorpej 			break;
    595       1.1   thorpej 		}
    596       1.1   thorpej 
    597       1.1   thorpej 		/* add this state to the rio list */
    598       1.1   thorpej 		rqp->rq_next = rio_list;
    599       1.1   thorpej 		rio_list = rqp;
    600       1.1   thorpej 		break;
    601       1.1   thorpej 
    602       1.1   thorpej 	case RIO_IF_DETACH:
    603       1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    604       1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    605       1.1   thorpej 			error = EBADF;
    606       1.1   thorpej 			break;
    607       1.1   thorpej 		}
    608       1.1   thorpej 		error = rio_detach(rqp);
    609       1.1   thorpej 		break;
    610       1.1   thorpej 
    611       1.1   thorpej 	case RIO_GETSTATS:
    612       1.1   thorpej 		do {
    613       1.1   thorpej 			struct rio_stats *q_stats;
    614       1.1   thorpej 			rio_t *rp;
    615       1.1   thorpej 			int i;
    616       1.1   thorpej 
    617       1.1   thorpej 			q_stats = (struct rio_stats *)addr;
    618       1.1   thorpej 			if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
    619       1.1   thorpej 					       ALTQT_RIO)) == NULL) {
    620       1.1   thorpej 				error = EBADF;
    621       1.1   thorpej 				break;
    622       1.1   thorpej 			}
    623       1.1   thorpej 
    624       1.1   thorpej 			rp = rqp->rq_rio;
    625       1.1   thorpej 
    626       1.1   thorpej 			q_stats->q_limit = qlimit(rqp->rq_q);
    627       1.1   thorpej 			q_stats->weight	= rp->rio_weight;
    628       1.1   thorpej 			q_stats->flags = rp->rio_flags;
    629       1.1   thorpej 
    630       1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++) {
    631       1.1   thorpej 				q_stats->q_len[i] = rp->rio_precstate[i].qlen;
    632      1.14     peter 				bcopy(&rp->q_stats[i], &q_stats->q_stats[i],
    633      1.14     peter 				      sizeof(struct redstats));
    634       1.1   thorpej 				q_stats->q_stats[i].q_avg =
    635       1.1   thorpej 				    rp->rio_precstate[i].avg >> rp->rio_wshift;
    636       1.1   thorpej 
    637       1.1   thorpej 				q_stats->q_params[i].inv_pmax
    638       1.1   thorpej 					= rp->rio_precstate[i].inv_pmax;
    639       1.1   thorpej 				q_stats->q_params[i].th_min
    640       1.1   thorpej 					= rp->rio_precstate[i].th_min;
    641       1.1   thorpej 				q_stats->q_params[i].th_max
    642       1.1   thorpej 					= rp->rio_precstate[i].th_max;
    643       1.1   thorpej 			}
    644      1.14     peter 		} while (/*CONSTCOND*/ 0);
    645       1.1   thorpej 		break;
    646       1.1   thorpej 
    647       1.1   thorpej 	case RIO_CONFIG:
    648       1.1   thorpej 		do {
    649       1.1   thorpej 			struct rio_conf *fc;
    650       1.1   thorpej 			rio_t	*new;
    651       1.1   thorpej 			int s, limit, i;
    652       1.1   thorpej 
    653       1.1   thorpej 			fc = (struct rio_conf *)addr;
    654       1.1   thorpej 			if ((rqp = altq_lookup(fc->iface.rio_ifname,
    655       1.1   thorpej 					       ALTQT_RIO)) == NULL) {
    656       1.1   thorpej 				error = EBADF;
    657       1.1   thorpej 				break;
    658       1.1   thorpej 			}
    659       1.1   thorpej 
    660       1.1   thorpej 			new = rio_alloc(fc->rio_weight, &fc->q_params[0],
    661       1.1   thorpej 					fc->rio_flags, fc->rio_pkttime);
    662       1.1   thorpej 			if (new == NULL) {
    663       1.1   thorpej 				error = ENOMEM;
    664       1.1   thorpej 				break;
    665       1.1   thorpej 			}
    666       1.1   thorpej 
    667       1.3   thorpej 			s = splnet();
    668       1.1   thorpej 			_flushq(rqp->rq_q);
    669       1.1   thorpej 			limit = fc->rio_limit;
    670       1.1   thorpej 			if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
    671       1.1   thorpej 				limit = fc->q_params[RIO_NDROPPREC-1].th_max;
    672       1.1   thorpej 			qlimit(rqp->rq_q) = limit;
    673       1.1   thorpej 
    674       1.1   thorpej 			rio_destroy(rqp->rq_rio);
    675       1.1   thorpej 			rqp->rq_rio = new;
    676       1.1   thorpej 
    677       1.1   thorpej 			splx(s);
    678       1.1   thorpej 
    679       1.1   thorpej 			/* write back new values */
    680       1.1   thorpej 			fc->rio_limit = limit;
    681       1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++) {
    682       1.1   thorpej 				fc->q_params[i].inv_pmax =
    683       1.1   thorpej 					rqp->rq_rio->rio_precstate[i].inv_pmax;
    684       1.1   thorpej 				fc->q_params[i].th_min =
    685       1.1   thorpej 					rqp->rq_rio->rio_precstate[i].th_min;
    686       1.1   thorpej 				fc->q_params[i].th_max =
    687       1.1   thorpej 					rqp->rq_rio->rio_precstate[i].th_max;
    688       1.1   thorpej 			}
    689      1.14     peter 		} while (/*CONSTCOND*/ 0);
    690       1.1   thorpej 		break;
    691       1.1   thorpej 
    692       1.1   thorpej 	case RIO_SETDEFAULTS:
    693       1.1   thorpej 		do {
    694       1.1   thorpej 			struct redparams *rp;
    695       1.1   thorpej 			int i;
    696       1.1   thorpej 
    697       1.1   thorpej 			rp = (struct redparams *)addr;
    698       1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++)
    699       1.1   thorpej 				default_rio_params[i] = rp[i];
    700      1.14     peter 		} while (/*CONSTCOND*/ 0);
    701       1.1   thorpej 		break;
    702       1.1   thorpej 
    703       1.1   thorpej 	default:
    704       1.1   thorpej 		error = EINVAL;
    705       1.1   thorpej 		break;
    706       1.1   thorpej 	}
    707       1.1   thorpej 
    708       1.1   thorpej 	return error;
    709       1.1   thorpej }
    710       1.1   thorpej 
    711       1.1   thorpej static int
    712      1.14     peter rio_detach(rio_queue_t *rqp)
    713       1.1   thorpej {
    714       1.1   thorpej 	rio_queue_t *tmp;
    715       1.1   thorpej 	int error = 0;
    716       1.1   thorpej 
    717       1.1   thorpej 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
    718       1.1   thorpej 		altq_disable(rqp->rq_ifq);
    719       1.1   thorpej 
    720       1.1   thorpej 	if ((error = altq_detach(rqp->rq_ifq)))
    721       1.1   thorpej 		return (error);
    722       1.1   thorpej 
    723       1.1   thorpej 	if (rio_list == rqp)
    724       1.1   thorpej 		rio_list = rqp->rq_next;
    725       1.1   thorpej 	else {
    726       1.1   thorpej 		for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
    727       1.1   thorpej 			if (tmp->rq_next == rqp) {
    728       1.1   thorpej 				tmp->rq_next = rqp->rq_next;
    729       1.1   thorpej 				break;
    730       1.1   thorpej 			}
    731       1.1   thorpej 		if (tmp == NULL)
    732       1.1   thorpej 			printf("rio_detach: no state found in rio_list!\n");
    733       1.1   thorpej 	}
    734       1.1   thorpej 
    735       1.1   thorpej 	rio_destroy(rqp->rq_rio);
    736       1.9  christos 	free(rqp->rq_q, M_DEVBUF);
    737       1.9  christos 	free(rqp, M_DEVBUF);
    738       1.1   thorpej 	return (error);
    739       1.1   thorpej }
    740       1.1   thorpej 
    741       1.1   thorpej /*
    742       1.1   thorpej  * rio support routines
    743       1.1   thorpej  */
    744       1.1   thorpej static int
    745      1.17  christos rio_request(struct ifaltq *ifq, int req, void *arg)
    746       1.1   thorpej {
    747       1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    748       1.1   thorpej 
    749       1.1   thorpej 	switch (req) {
    750       1.1   thorpej 	case ALTRQ_PURGE:
    751       1.1   thorpej 		_flushq(rqp->rq_q);
    752       1.1   thorpej 		if (ALTQ_IS_ENABLED(ifq))
    753       1.1   thorpej 			ifq->ifq_len = 0;
    754       1.1   thorpej 		break;
    755       1.1   thorpej 	}
    756       1.1   thorpej 	return (0);
    757       1.1   thorpej }
    758       1.1   thorpej 
    759       1.1   thorpej /*
    760       1.1   thorpej  * enqueue routine:
    761       1.1   thorpej  *
    762       1.1   thorpej  *	returns: 0 when successfully queued.
    763       1.1   thorpej  *		 ENOBUFS when drop occurs.
    764       1.1   thorpej  */
    765       1.1   thorpej static int
    766      1.14     peter rio_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
    767       1.1   thorpej {
    768       1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    769       1.1   thorpej 	int error = 0;
    770       1.1   thorpej 
    771       1.1   thorpej 	if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0)
    772       1.1   thorpej 		ifq->ifq_len++;
    773       1.1   thorpej 	else
    774       1.1   thorpej 		error = ENOBUFS;
    775       1.1   thorpej 	return error;
    776       1.1   thorpej }
    777       1.1   thorpej 
    778       1.1   thorpej /*
    779       1.1   thorpej  * dequeue routine:
    780       1.3   thorpej  *	must be called in splnet.
    781       1.1   thorpej  *
    782       1.1   thorpej  *	returns: mbuf dequeued.
    783       1.1   thorpej  *		 NULL when no packet is available in the queue.
    784       1.1   thorpej  */
    785       1.1   thorpej 
    786       1.1   thorpej static struct mbuf *
    787      1.14     peter rio_dequeue(struct ifaltq *ifq, int op)
    788       1.1   thorpej {
    789       1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    790       1.1   thorpej 	struct mbuf *m = NULL;
    791       1.1   thorpej 
    792       1.1   thorpej 	if (op == ALTDQ_POLL)
    793       1.1   thorpej 		return qhead(rqp->rq_q);
    794       1.1   thorpej 
    795       1.1   thorpej 	m = rio_getq(rqp->rq_rio, rqp->rq_q);
    796       1.1   thorpej 	if (m != NULL)
    797       1.1   thorpej 		ifq->ifq_len--;
    798       1.1   thorpej 	return m;
    799       1.1   thorpej }
    800       1.1   thorpej 
    801       1.1   thorpej #ifdef KLD_MODULE
    802       1.1   thorpej 
    803       1.1   thorpej static struct altqsw rio_sw =
    804       1.1   thorpej 	{"rio", rioopen, rioclose, rioioctl};
    805       1.1   thorpej 
    806       1.1   thorpej ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
    807      1.14     peter MODULE_VERSION(altq_rio, 1);
    808      1.14     peter MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1);
    809       1.1   thorpej 
    810       1.1   thorpej #endif /* KLD_MODULE */
    811      1.14     peter #endif /* ALTQ3_COMPAT */
    812       1.1   thorpej 
    813       1.1   thorpej #endif /* ALTQ_RIO */
    814