Home | History | Annotate | Line # | Download | only in altq
altq_rio.c revision 1.14
      1  1.14     peter /*	$NetBSD: altq_rio.c,v 1.14 2006/10/12 19:59:08 peter Exp $	*/
      2  1.14     peter /*	$KAME: altq_rio.c,v 1.19 2005/04/13 03:44:25 suz Exp $	*/
      3   1.1   thorpej 
      4   1.1   thorpej /*
      5  1.14     peter  * Copyright (C) 1998-2003
      6   1.1   thorpej  *	Sony Computer Science Laboratories Inc.  All rights reserved.
      7   1.1   thorpej  *
      8   1.1   thorpej  * Redistribution and use in source and binary forms, with or without
      9   1.1   thorpej  * modification, are permitted provided that the following conditions
     10   1.1   thorpej  * are met:
     11   1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     12   1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     13   1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     14   1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     15   1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     16   1.1   thorpej  *
     17   1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
     18   1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19   1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20   1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
     21   1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22   1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23   1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24   1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25   1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26   1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27   1.1   thorpej  * SUCH DAMAGE.
     28   1.1   thorpej  */
     29   1.1   thorpej /*
     30   1.1   thorpej  * Copyright (c) 1990-1994 Regents of the University of California.
     31   1.1   thorpej  * All rights reserved.
     32   1.1   thorpej  *
     33   1.1   thorpej  * Redistribution and use in source and binary forms, with or without
     34   1.1   thorpej  * modification, are permitted provided that the following conditions
     35   1.1   thorpej  * are met:
     36   1.1   thorpej  * 1. Redistributions of source code must retain the above copyright
     37   1.1   thorpej  *    notice, this list of conditions and the following disclaimer.
     38   1.1   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     39   1.1   thorpej  *    notice, this list of conditions and the following disclaimer in the
     40   1.1   thorpej  *    documentation and/or other materials provided with the distribution.
     41   1.1   thorpej  * 3. All advertising materials mentioning features or use of this software
     42   1.1   thorpej  *    must display the following acknowledgement:
     43   1.1   thorpej  *	This product includes software developed by the Computer Systems
     44   1.1   thorpej  *	Engineering Group at Lawrence Berkeley Laboratory.
     45   1.1   thorpej  * 4. Neither the name of the University nor of the Laboratory may be used
     46   1.1   thorpej  *    to endorse or promote products derived from this software without
     47   1.1   thorpej  *    specific prior written permission.
     48   1.1   thorpej  *
     49   1.1   thorpej  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     50   1.1   thorpej  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     51   1.1   thorpej  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     52   1.1   thorpej  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     53   1.1   thorpej  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     54   1.1   thorpej  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     55   1.1   thorpej  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     56   1.1   thorpej  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     57   1.1   thorpej  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     58   1.1   thorpej  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     59   1.1   thorpej  * SUCH DAMAGE.
     60   1.1   thorpej  */
     61   1.4     lukem 
     62   1.4     lukem #include <sys/cdefs.h>
     63  1.14     peter __KERNEL_RCSID(0, "$NetBSD: altq_rio.c,v 1.14 2006/10/12 19:59:08 peter Exp $");
     64   1.1   thorpej 
     65  1.14     peter #ifdef _KERNEL_OPT
     66   1.1   thorpej #include "opt_altq.h"
     67   1.1   thorpej #include "opt_inet.h"
     68   1.1   thorpej #endif
     69  1.14     peter 
     70   1.1   thorpej #ifdef ALTQ_RIO	/* rio is enabled by ALTQ_RIO option in opt_altq.h */
     71   1.1   thorpej 
     72   1.1   thorpej #include <sys/param.h>
     73   1.1   thorpej #include <sys/malloc.h>
     74   1.1   thorpej #include <sys/mbuf.h>
     75   1.1   thorpej #include <sys/socket.h>
     76   1.1   thorpej #include <sys/systm.h>
     77  1.14     peter #include <sys/errno.h>
     78  1.14     peter #include <sys/kauth.h>
     79  1.14     peter #if 1 /* ALTQ3_COMPAT */
     80   1.1   thorpej #include <sys/proc.h>
     81  1.14     peter #include <sys/sockio.h>
     82   1.1   thorpej #include <sys/kernel.h>
     83  1.14     peter #endif
     84  1.11  christos #include <sys/kauth.h>
     85   1.1   thorpej 
     86   1.1   thorpej #include <net/if.h>
     87   1.1   thorpej 
     88   1.1   thorpej #include <netinet/in.h>
     89   1.1   thorpej #include <netinet/in_systm.h>
     90   1.1   thorpej #include <netinet/ip.h>
     91   1.1   thorpej #ifdef INET6
     92   1.1   thorpej #include <netinet/ip6.h>
     93   1.1   thorpej #endif
     94   1.1   thorpej 
     95  1.14     peter #include <net/pfvar.h>
     96   1.1   thorpej #include <altq/altq.h>
     97   1.1   thorpej #include <altq/altq_cdnr.h>
     98   1.1   thorpej #include <altq/altq_red.h>
     99   1.1   thorpej #include <altq/altq_rio.h>
    100  1.14     peter #ifdef ALTQ3_COMPAT
    101  1.14     peter #include <altq/altq_conf.h>
    102  1.14     peter #endif
    103   1.1   thorpej 
    104   1.1   thorpej /*
    105   1.1   thorpej  * RIO: RED with IN/OUT bit
    106   1.1   thorpej  *   described in
    107   1.1   thorpej  *	"Explicit Allocation of Best Effort Packet Delivery Service"
    108   1.1   thorpej  *	David D. Clark and Wenjia Fang, MIT Lab for Computer Science
    109   1.1   thorpej  *	http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
    110   1.1   thorpej  *
    111   1.1   thorpej  * this implementation is extended to support more than 2 drop precedence
    112   1.1   thorpej  * values as described in RFC2597 (Assured Forwarding PHB Group).
    113   1.1   thorpej  *
    114   1.1   thorpej  */
    115   1.1   thorpej /*
    116   1.1   thorpej  * AF DS (differentiated service) codepoints.
    117   1.1   thorpej  * (classes can be mapped to CBQ or H-FSC classes.)
    118   1.7     perry  *
    119   1.1   thorpej  *      0   1   2   3   4   5   6   7
    120   1.1   thorpej  *    +---+---+---+---+---+---+---+---+
    121   1.1   thorpej  *    |   CLASS   |DropPre| 0 |  CU   |
    122   1.1   thorpej  *    +---+---+---+---+---+---+---+---+
    123   1.1   thorpej  *
    124   1.1   thorpej  *    class 1: 001
    125   1.1   thorpej  *    class 2: 010
    126   1.1   thorpej  *    class 3: 011
    127   1.1   thorpej  *    class 4: 100
    128   1.1   thorpej  *
    129   1.1   thorpej  *    low drop prec:    01
    130   1.1   thorpej  *    medium drop prec: 10
    131  1.14     peter  *    high drop prec:   11
    132   1.1   thorpej  */
    133   1.1   thorpej 
    134   1.1   thorpej /* normal red parameters */
    135   1.1   thorpej #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
    136   1.1   thorpej 				/* q_weight = 0.00195 */
    137   1.1   thorpej 
    138   1.1   thorpej /* red parameters for a slow link */
    139   1.1   thorpej #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
    140   1.1   thorpej 				/* q_weight = 0.0078125 */
    141   1.1   thorpej 
    142   1.1   thorpej /* red parameters for a very slow link (e.g., dialup) */
    143   1.1   thorpej #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
    144   1.1   thorpej 				/* q_weight = 0.015625 */
    145   1.1   thorpej 
    146   1.1   thorpej /* fixed-point uses 12-bit decimal places */
    147   1.1   thorpej #define	FP_SHIFT	12	/* fixed-point shift */
    148   1.1   thorpej 
    149   1.1   thorpej /* red parameters for drop probability */
    150   1.1   thorpej #define	INV_P_MAX	10	/* inverse of max drop probability */
    151   1.1   thorpej #define	TH_MIN		 5	/* min threshold */
    152   1.1   thorpej #define	TH_MAX		15	/* max threshold */
    153   1.1   thorpej 
    154  1.14     peter #define	RIO_LIMIT	60	/* default max queue lenght */
    155  1.14     peter #define	RIO_STATS		/* collect statistics */
    156   1.1   thorpej 
    157   1.1   thorpej #define	TV_DELTA(a, b, delta) {					\
    158   1.1   thorpej 	register int	xxs;					\
    159   1.1   thorpej 								\
    160   1.1   thorpej 	delta = (a)->tv_usec - (b)->tv_usec; 			\
    161   1.1   thorpej 	if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { 		\
    162   1.1   thorpej 		if (xxs < 0) { 					\
    163   1.1   thorpej 			delta = 60000000;			\
    164   1.1   thorpej 		} else if (xxs > 4)  {				\
    165   1.1   thorpej 			if (xxs > 60)				\
    166   1.1   thorpej 				delta = 60000000;		\
    167   1.1   thorpej 			else					\
    168   1.1   thorpej 				delta += xxs * 1000000;		\
    169   1.1   thorpej 		} else while (xxs > 0) {			\
    170   1.1   thorpej 			delta += 1000000;			\
    171   1.1   thorpej 			xxs--;					\
    172   1.1   thorpej 		}						\
    173   1.1   thorpej 	}							\
    174   1.1   thorpej }
    175   1.1   thorpej 
    176  1.14     peter #ifdef ALTQ3_COMPAT
    177   1.1   thorpej /* rio_list keeps all rio_queue_t's allocated. */
    178   1.1   thorpej static rio_queue_t *rio_list = NULL;
    179  1.14     peter #endif
    180   1.1   thorpej /* default rio parameter values */
    181   1.1   thorpej static struct redparams default_rio_params[RIO_NDROPPREC] = {
    182   1.1   thorpej   /* th_min,		 th_max,     inv_pmax */
    183   1.1   thorpej   { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
    184   1.1   thorpej   { TH_MAX + TH_MIN,	 TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
    185   1.1   thorpej   { TH_MIN,		 TH_MAX,     INV_P_MAX }  /* high drop precedence */
    186   1.1   thorpej };
    187   1.1   thorpej 
    188   1.1   thorpej /* internal function prototypes */
    189  1.14     peter static int dscp2index(u_int8_t);
    190  1.14     peter #ifdef ALTQ3_COMPAT
    191  1.14     peter static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
    192  1.14     peter static struct mbuf *rio_dequeue(struct ifaltq *, int);
    193  1.14     peter static int rio_request(struct ifaltq *, int, void *);
    194  1.14     peter static int rio_detach(rio_queue_t *);
    195  1.14     peter 
    196  1.14     peter /*
    197  1.14     peter  * rio device interface
    198  1.14     peter  */
    199  1.14     peter altqdev_decl(rio);
    200  1.14     peter 
    201  1.14     peter #endif /* ALTQ3_COMPAT */
    202  1.14     peter 
    203  1.14     peter rio_t *
    204  1.14     peter rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
    205  1.14     peter {
    206  1.14     peter 	rio_t	*rp;
    207  1.14     peter 	int	 w, i;
    208  1.14     peter 	int	 npkts_per_sec;
    209  1.14     peter 
    210  1.14     peter 	rp = malloc(sizeof(rio_t), M_DEVBUF, M_WAITOK|M_ZERO);
    211  1.14     peter 	if (rp == NULL)
    212  1.14     peter 		return (NULL);
    213  1.14     peter 
    214  1.14     peter 	rp->rio_flags = flags;
    215  1.14     peter 	if (pkttime == 0)
    216  1.14     peter 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
    217  1.14     peter 		rp->rio_pkttime = 800;
    218  1.14     peter 	else
    219  1.14     peter 		rp->rio_pkttime = pkttime;
    220  1.14     peter 
    221  1.14     peter 	if (weight != 0)
    222  1.14     peter 		rp->rio_weight = weight;
    223  1.14     peter 	else {
    224  1.14     peter 		/* use default */
    225  1.14     peter 		rp->rio_weight = W_WEIGHT;
    226  1.14     peter 
    227  1.14     peter 		/* when the link is very slow, adjust red parameters */
    228  1.14     peter 		npkts_per_sec = 1000000 / rp->rio_pkttime;
    229  1.14     peter 		if (npkts_per_sec < 50) {
    230  1.14     peter 			/* up to about 400Kbps */
    231  1.14     peter 			rp->rio_weight = W_WEIGHT_2;
    232  1.14     peter 		} else if (npkts_per_sec < 300) {
    233  1.14     peter 			/* up to about 2.4Mbps */
    234  1.14     peter 			rp->rio_weight = W_WEIGHT_1;
    235  1.14     peter 		}
    236  1.14     peter 	}
    237  1.14     peter 
    238  1.14     peter 	/* calculate wshift.  weight must be power of 2 */
    239  1.14     peter 	w = rp->rio_weight;
    240  1.14     peter 	for (i = 0; w > 1; i++)
    241  1.14     peter 		w = w >> 1;
    242  1.14     peter 	rp->rio_wshift = i;
    243  1.14     peter 	w = 1 << rp->rio_wshift;
    244  1.14     peter 	if (w != rp->rio_weight) {
    245  1.14     peter 		printf("invalid weight value %d for red! use %d\n",
    246  1.14     peter 		       rp->rio_weight, w);
    247  1.14     peter 		rp->rio_weight = w;
    248  1.14     peter 	}
    249  1.14     peter 
    250  1.14     peter 	/* allocate weight table */
    251  1.14     peter 	rp->rio_wtab = wtab_alloc(rp->rio_weight);
    252  1.14     peter 
    253  1.14     peter 	for (i = 0; i < RIO_NDROPPREC; i++) {
    254  1.14     peter 		struct dropprec_state *prec = &rp->rio_precstate[i];
    255  1.14     peter 
    256  1.14     peter 		prec->avg = 0;
    257  1.14     peter 		prec->idle = 1;
    258  1.14     peter 
    259  1.14     peter 		if (params == NULL || params[i].inv_pmax == 0)
    260  1.14     peter 			prec->inv_pmax = default_rio_params[i].inv_pmax;
    261  1.14     peter 		else
    262  1.14     peter 			prec->inv_pmax = params[i].inv_pmax;
    263  1.14     peter 		if (params == NULL || params[i].th_min == 0)
    264  1.14     peter 			prec->th_min = default_rio_params[i].th_min;
    265  1.14     peter 		else
    266  1.14     peter 			prec->th_min = params[i].th_min;
    267  1.14     peter 		if (params == NULL || params[i].th_max == 0)
    268  1.14     peter 			prec->th_max = default_rio_params[i].th_max;
    269  1.14     peter 		else
    270  1.14     peter 			prec->th_max = params[i].th_max;
    271  1.14     peter 
    272  1.14     peter 		/*
    273  1.14     peter 		 * th_min_s and th_max_s are scaled versions of th_min
    274  1.14     peter 		 * and th_max to be compared with avg.
    275  1.14     peter 		 */
    276  1.14     peter 		prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
    277  1.14     peter 		prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
    278  1.14     peter 
    279  1.14     peter 		/*
    280  1.14     peter 		 * precompute probability denominator
    281  1.14     peter 		 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
    282  1.14     peter 		 */
    283  1.14     peter 		prec->probd = (2 * (prec->th_max - prec->th_min)
    284  1.14     peter 			       * prec->inv_pmax) << FP_SHIFT;
    285  1.14     peter 
    286  1.14     peter 		microtime(&prec->last);
    287  1.14     peter 	}
    288  1.14     peter 
    289  1.14     peter 	return (rp);
    290  1.14     peter }
    291  1.14     peter 
    292  1.14     peter void
    293  1.14     peter rio_destroy(rio_t *rp)
    294  1.14     peter {
    295  1.14     peter 	wtab_destroy(rp->rio_wtab);
    296  1.14     peter 	free(rp, M_DEVBUF);
    297  1.14     peter }
    298  1.14     peter 
    299  1.14     peter void
    300  1.14     peter rio_getstats(rio_t *rp, struct redstats *sp)
    301  1.14     peter {
    302  1.14     peter 	int	i;
    303  1.14     peter 
    304  1.14     peter 	for (i = 0; i < RIO_NDROPPREC; i++) {
    305  1.14     peter 		bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
    306  1.14     peter 		sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
    307  1.14     peter 		sp++;
    308  1.14     peter 	}
    309  1.14     peter }
    310  1.14     peter 
    311  1.14     peter #if (RIO_NDROPPREC == 3)
    312  1.14     peter /*
    313  1.14     peter  * internally, a drop precedence value is converted to an index
    314  1.14     peter  * starting from 0.
    315  1.14     peter  */
    316  1.14     peter static int
    317  1.14     peter dscp2index(u_int8_t dscp)
    318  1.14     peter {
    319  1.14     peter 	int	dpindex = dscp & AF_DROPPRECMASK;
    320  1.14     peter 
    321  1.14     peter 	if (dpindex == 0)
    322  1.14     peter 		return (0);
    323  1.14     peter 	return ((dpindex >> 3) - 1);
    324  1.14     peter }
    325  1.14     peter #endif
    326  1.14     peter 
    327  1.14     peter #if 1
    328  1.14     peter /*
    329  1.14     peter  * kludge: when a packet is dequeued, we need to know its drop precedence
    330  1.14     peter  * in order to keep the queue length of each drop precedence.
    331  1.14     peter  * use m_pkthdr.rcvif to pass this info.
    332  1.14     peter  */
    333  1.14     peter #define	RIOM_SET_PRECINDEX(m, idx)	\
    334  1.14     peter 	do { (m)->m_pkthdr.rcvif = (struct ifnet *)((long)(idx)); } while (0)
    335  1.14     peter #define	RIOM_GET_PRECINDEX(m)	\
    336  1.14     peter 	({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
    337  1.14     peter 	(m)->m_pkthdr.rcvif = NULL; idx; })
    338  1.14     peter #endif
    339  1.14     peter 
    340  1.14     peter int
    341  1.14     peter rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
    342  1.14     peter     struct altq_pktattr *pktattr)
    343  1.14     peter {
    344  1.14     peter 	int			 avg, droptype;
    345  1.14     peter 	u_int8_t		 dsfield, odsfield;
    346  1.14     peter 	int			 dpindex, i, n, t;
    347  1.14     peter 	struct timeval		 now;
    348  1.14     peter 	struct dropprec_state	*prec;
    349  1.14     peter 
    350  1.14     peter 	dsfield = odsfield = read_dsfield(m, pktattr);
    351  1.14     peter 	dpindex = dscp2index(dsfield);
    352  1.14     peter 
    353  1.14     peter 	/*
    354  1.14     peter 	 * update avg of the precedence states whose drop precedence
    355  1.14     peter 	 * is larger than or equal to the drop precedence of the packet
    356  1.14     peter 	 */
    357  1.14     peter 	now.tv_sec = 0;
    358  1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
    359  1.14     peter 		prec = &rp->rio_precstate[i];
    360  1.14     peter 		avg = prec->avg;
    361  1.14     peter 		if (prec->idle) {
    362  1.14     peter 			prec->idle = 0;
    363  1.14     peter 			if (now.tv_sec == 0)
    364  1.14     peter 				microtime(&now);
    365  1.14     peter 			t = (now.tv_sec - prec->last.tv_sec);
    366  1.14     peter 			if (t > 60)
    367  1.14     peter 				avg = 0;
    368  1.14     peter 			else {
    369  1.14     peter 				t = t * 1000000 +
    370  1.14     peter 					(now.tv_usec - prec->last.tv_usec);
    371  1.14     peter 				n = t / rp->rio_pkttime;
    372  1.14     peter 				/* calculate (avg = (1 - Wq)^n * avg) */
    373  1.14     peter 				if (n > 0)
    374  1.14     peter 					avg = (avg >> FP_SHIFT) *
    375  1.14     peter 						pow_w(rp->rio_wtab, n);
    376  1.14     peter 			}
    377  1.14     peter 		}
    378  1.14     peter 
    379  1.14     peter 		/* run estimator. (avg is scaled by WEIGHT in fixed-point) */
    380  1.14     peter 		avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
    381  1.14     peter 		prec->avg = avg;		/* save the new value */
    382  1.14     peter 		/*
    383  1.14     peter 		 * count keeps a tally of arriving traffic that has not
    384  1.14     peter 		 * been dropped.
    385  1.14     peter 		 */
    386  1.14     peter 		prec->count++;
    387  1.14     peter 	}
    388  1.14     peter 
    389  1.14     peter 	prec = &rp->rio_precstate[dpindex];
    390  1.14     peter 	avg = prec->avg;
    391  1.14     peter 
    392  1.14     peter 	/* see if we drop early */
    393  1.14     peter 	droptype = DTYPE_NODROP;
    394  1.14     peter 	if (avg >= prec->th_min_s && prec->qlen > 1) {
    395  1.14     peter 		if (avg >= prec->th_max_s) {
    396  1.14     peter 			/* avg >= th_max: forced drop */
    397  1.14     peter 			droptype = DTYPE_FORCED;
    398  1.14     peter 		} else if (prec->old == 0) {
    399  1.14     peter 			/* first exceeds th_min */
    400  1.14     peter 			prec->count = 1;
    401  1.14     peter 			prec->old = 1;
    402  1.14     peter 		} else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
    403  1.14     peter 				      prec->probd, prec->count)) {
    404  1.14     peter 			/* unforced drop by red */
    405  1.14     peter 			droptype = DTYPE_EARLY;
    406  1.14     peter 		}
    407  1.14     peter 	} else {
    408  1.14     peter 		/* avg < th_min */
    409  1.14     peter 		prec->old = 0;
    410  1.14     peter 	}
    411  1.14     peter 
    412  1.14     peter 	/*
    413  1.14     peter 	 * if the queue length hits the hard limit, it's a forced drop.
    414  1.14     peter 	 */
    415  1.14     peter 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
    416  1.14     peter 		droptype = DTYPE_FORCED;
    417  1.14     peter 
    418  1.14     peter 	if (droptype != DTYPE_NODROP) {
    419  1.14     peter 		/* always drop incoming packet (as opposed to randomdrop) */
    420  1.14     peter 		for (i = dpindex; i < RIO_NDROPPREC; i++)
    421  1.14     peter 			rp->rio_precstate[i].count = 0;
    422  1.14     peter #ifdef RIO_STATS
    423  1.14     peter 		if (droptype == DTYPE_EARLY)
    424  1.14     peter 			rp->q_stats[dpindex].drop_unforced++;
    425  1.14     peter 		else
    426  1.14     peter 			rp->q_stats[dpindex].drop_forced++;
    427  1.14     peter 		PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
    428  1.14     peter #endif
    429  1.14     peter 		m_freem(m);
    430  1.14     peter 		return (-1);
    431  1.14     peter 	}
    432  1.14     peter 
    433  1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++)
    434  1.14     peter 		rp->rio_precstate[i].qlen++;
    435  1.14     peter 
    436  1.14     peter 	/* save drop precedence index in mbuf hdr */
    437  1.14     peter 	RIOM_SET_PRECINDEX(m, dpindex);
    438  1.14     peter 
    439  1.14     peter 	if (rp->rio_flags & RIOF_CLEARDSCP)
    440  1.14     peter 		dsfield &= ~DSCP_MASK;
    441  1.14     peter 
    442  1.14     peter 	if (dsfield != odsfield)
    443  1.14     peter 		write_dsfield(m, pktattr, dsfield);
    444  1.14     peter 
    445  1.14     peter 	_addq(q, m);
    446  1.14     peter 
    447  1.14     peter #ifdef RIO_STATS
    448  1.14     peter 	PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
    449  1.14     peter #endif
    450  1.14     peter 	return (0);
    451  1.14     peter }
    452  1.14     peter 
    453  1.14     peter struct mbuf *
    454  1.14     peter rio_getq(rio_t *rp, class_queue_t *q)
    455  1.14     peter {
    456  1.14     peter 	struct mbuf	*m;
    457  1.14     peter 	int		 dpindex, i;
    458  1.14     peter 
    459  1.14     peter 	if ((m = _getq(q)) == NULL)
    460  1.14     peter 		return NULL;
    461  1.14     peter 
    462  1.14     peter 	dpindex = RIOM_GET_PRECINDEX(m);
    463  1.14     peter 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
    464  1.14     peter 		if (--rp->rio_precstate[i].qlen == 0) {
    465  1.14     peter 			if (rp->rio_precstate[i].idle == 0) {
    466  1.14     peter 				rp->rio_precstate[i].idle = 1;
    467  1.14     peter 				microtime(&rp->rio_precstate[i].last);
    468  1.14     peter 			}
    469  1.14     peter 		}
    470  1.14     peter 	}
    471  1.14     peter 	return (m);
    472  1.14     peter }
    473   1.1   thorpej 
    474  1.14     peter #ifdef ALTQ3_COMPAT
    475   1.1   thorpej int
    476  1.13  christos rioopen(dev_t dev __unused, int flag __unused, int fmt __unused,
    477  1.13  christos     struct lwp *l __unused)
    478   1.1   thorpej {
    479   1.1   thorpej 	/* everything will be done when the queueing scheme is attached. */
    480   1.1   thorpej 	return 0;
    481   1.1   thorpej }
    482   1.1   thorpej 
    483   1.1   thorpej int
    484  1.13  christos rioclose(dev_t dev __unused, int flag __unused, int fmt __unused,
    485  1.13  christos     struct lwp *l __unused)
    486   1.1   thorpej {
    487   1.1   thorpej 	rio_queue_t *rqp;
    488   1.1   thorpej 	int err, error = 0;
    489   1.1   thorpej 
    490   1.1   thorpej 	while ((rqp = rio_list) != NULL) {
    491   1.1   thorpej 		/* destroy all */
    492   1.1   thorpej 		err = rio_detach(rqp);
    493   1.1   thorpej 		if (err != 0 && error == 0)
    494   1.1   thorpej 			error = err;
    495   1.1   thorpej 	}
    496   1.1   thorpej 
    497   1.1   thorpej 	return error;
    498   1.1   thorpej }
    499   1.1   thorpej 
    500   1.1   thorpej int
    501  1.13  christos rioioctl(dev_t dev __unused, ioctlcmd_t cmd, caddr_t addr, int flag __unused,
    502  1.13  christos     struct lwp *l)
    503   1.1   thorpej {
    504   1.1   thorpej 	rio_queue_t *rqp;
    505   1.1   thorpej 	struct rio_interface *ifacep;
    506   1.1   thorpej 	struct ifnet *ifp;
    507   1.1   thorpej 	int	error = 0;
    508   1.1   thorpej 
    509   1.1   thorpej 	/* check super-user privilege */
    510   1.1   thorpej 	switch (cmd) {
    511   1.1   thorpej 	case RIO_GETSTATS:
    512   1.1   thorpej 		break;
    513   1.1   thorpej 	default:
    514   1.1   thorpej #if (__FreeBSD_version > 400000)
    515   1.1   thorpej 		if ((error = suser(p)) != 0)
    516   1.1   thorpej 			return (error);
    517   1.1   thorpej #else
    518  1.12        ad 		if ((error = kauth_authorize_generic(l->l_cred,
    519  1.12        ad 		    KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)
    520   1.1   thorpej 			return (error);
    521   1.1   thorpej #endif
    522   1.1   thorpej 		break;
    523   1.1   thorpej 	}
    524   1.7     perry 
    525   1.1   thorpej 	switch (cmd) {
    526   1.1   thorpej 
    527   1.1   thorpej 	case RIO_ENABLE:
    528   1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    529   1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    530   1.1   thorpej 			error = EBADF;
    531   1.1   thorpej 			break;
    532   1.1   thorpej 		}
    533   1.1   thorpej 		error = altq_enable(rqp->rq_ifq);
    534   1.1   thorpej 		break;
    535   1.1   thorpej 
    536   1.1   thorpej 	case RIO_DISABLE:
    537   1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    538   1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    539   1.1   thorpej 			error = EBADF;
    540   1.1   thorpej 			break;
    541   1.1   thorpej 		}
    542   1.1   thorpej 		error = altq_disable(rqp->rq_ifq);
    543   1.1   thorpej 		break;
    544   1.1   thorpej 
    545   1.1   thorpej 	case RIO_IF_ATTACH:
    546   1.1   thorpej 		ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
    547   1.1   thorpej 		if (ifp == NULL) {
    548   1.1   thorpej 			error = ENXIO;
    549   1.1   thorpej 			break;
    550   1.1   thorpej 		}
    551   1.1   thorpej 
    552   1.1   thorpej 		/* allocate and initialize rio_queue_t */
    553   1.9  christos 		rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
    554   1.1   thorpej 		if (rqp == NULL) {
    555   1.1   thorpej 			error = ENOMEM;
    556   1.1   thorpej 			break;
    557   1.1   thorpej 		}
    558   1.1   thorpej 
    559   1.9  christos 		rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
    560   1.9  christos 		    M_WAITOK|M_ZERO);
    561   1.1   thorpej 		if (rqp->rq_q == NULL) {
    562   1.9  christos 			free(rqp, M_DEVBUF);
    563   1.1   thorpej 			error = ENOMEM;
    564   1.1   thorpej 			break;
    565   1.1   thorpej 		}
    566   1.1   thorpej 
    567   1.1   thorpej 		rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
    568   1.1   thorpej 		if (rqp->rq_rio == NULL) {
    569   1.9  christos 			free(rqp->rq_q, M_DEVBUF);
    570   1.9  christos 			free(rqp, M_DEVBUF);
    571   1.1   thorpej 			error = ENOMEM;
    572   1.1   thorpej 			break;
    573   1.1   thorpej 		}
    574   1.1   thorpej 
    575   1.1   thorpej 		rqp->rq_ifq = &ifp->if_snd;
    576   1.1   thorpej 		qtail(rqp->rq_q) = NULL;
    577   1.1   thorpej 		qlen(rqp->rq_q) = 0;
    578   1.1   thorpej 		qlimit(rqp->rq_q) = RIO_LIMIT;
    579   1.1   thorpej 		qtype(rqp->rq_q) = Q_RIO;
    580   1.1   thorpej 
    581   1.1   thorpej 		/*
    582   1.1   thorpej 		 * set RIO to this ifnet structure.
    583   1.1   thorpej 		 */
    584   1.1   thorpej 		error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
    585   1.1   thorpej 				    rio_enqueue, rio_dequeue, rio_request,
    586   1.1   thorpej 				    NULL, NULL);
    587   1.1   thorpej 		if (error) {
    588   1.1   thorpej 			rio_destroy(rqp->rq_rio);
    589   1.9  christos 			free(rqp->rq_q, M_DEVBUF);
    590   1.9  christos 			free(rqp, M_DEVBUF);
    591   1.1   thorpej 			break;
    592   1.1   thorpej 		}
    593   1.1   thorpej 
    594   1.1   thorpej 		/* add this state to the rio list */
    595   1.1   thorpej 		rqp->rq_next = rio_list;
    596   1.1   thorpej 		rio_list = rqp;
    597   1.1   thorpej 		break;
    598   1.1   thorpej 
    599   1.1   thorpej 	case RIO_IF_DETACH:
    600   1.1   thorpej 		ifacep = (struct rio_interface *)addr;
    601   1.1   thorpej 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
    602   1.1   thorpej 			error = EBADF;
    603   1.1   thorpej 			break;
    604   1.1   thorpej 		}
    605   1.1   thorpej 		error = rio_detach(rqp);
    606   1.1   thorpej 		break;
    607   1.1   thorpej 
    608   1.1   thorpej 	case RIO_GETSTATS:
    609   1.1   thorpej 		do {
    610   1.1   thorpej 			struct rio_stats *q_stats;
    611   1.1   thorpej 			rio_t *rp;
    612   1.1   thorpej 			int i;
    613   1.1   thorpej 
    614   1.1   thorpej 			q_stats = (struct rio_stats *)addr;
    615   1.1   thorpej 			if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
    616   1.1   thorpej 					       ALTQT_RIO)) == NULL) {
    617   1.1   thorpej 				error = EBADF;
    618   1.1   thorpej 				break;
    619   1.1   thorpej 			}
    620   1.1   thorpej 
    621   1.1   thorpej 			rp = rqp->rq_rio;
    622   1.1   thorpej 
    623   1.1   thorpej 			q_stats->q_limit = qlimit(rqp->rq_q);
    624   1.1   thorpej 			q_stats->weight	= rp->rio_weight;
    625   1.1   thorpej 			q_stats->flags = rp->rio_flags;
    626   1.1   thorpej 
    627   1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++) {
    628   1.1   thorpej 				q_stats->q_len[i] = rp->rio_precstate[i].qlen;
    629  1.14     peter 				bcopy(&rp->q_stats[i], &q_stats->q_stats[i],
    630  1.14     peter 				      sizeof(struct redstats));
    631   1.1   thorpej 				q_stats->q_stats[i].q_avg =
    632   1.1   thorpej 				    rp->rio_precstate[i].avg >> rp->rio_wshift;
    633   1.1   thorpej 
    634   1.1   thorpej 				q_stats->q_params[i].inv_pmax
    635   1.1   thorpej 					= rp->rio_precstate[i].inv_pmax;
    636   1.1   thorpej 				q_stats->q_params[i].th_min
    637   1.1   thorpej 					= rp->rio_precstate[i].th_min;
    638   1.1   thorpej 				q_stats->q_params[i].th_max
    639   1.1   thorpej 					= rp->rio_precstate[i].th_max;
    640   1.1   thorpej 			}
    641  1.14     peter 		} while (/*CONSTCOND*/ 0);
    642   1.1   thorpej 		break;
    643   1.1   thorpej 
    644   1.1   thorpej 	case RIO_CONFIG:
    645   1.1   thorpej 		do {
    646   1.1   thorpej 			struct rio_conf *fc;
    647   1.1   thorpej 			rio_t	*new;
    648   1.1   thorpej 			int s, limit, i;
    649   1.1   thorpej 
    650   1.1   thorpej 			fc = (struct rio_conf *)addr;
    651   1.1   thorpej 			if ((rqp = altq_lookup(fc->iface.rio_ifname,
    652   1.1   thorpej 					       ALTQT_RIO)) == NULL) {
    653   1.1   thorpej 				error = EBADF;
    654   1.1   thorpej 				break;
    655   1.1   thorpej 			}
    656   1.1   thorpej 
    657   1.1   thorpej 			new = rio_alloc(fc->rio_weight, &fc->q_params[0],
    658   1.1   thorpej 					fc->rio_flags, fc->rio_pkttime);
    659   1.1   thorpej 			if (new == NULL) {
    660   1.1   thorpej 				error = ENOMEM;
    661   1.1   thorpej 				break;
    662   1.1   thorpej 			}
    663   1.1   thorpej 
    664   1.3   thorpej 			s = splnet();
    665   1.1   thorpej 			_flushq(rqp->rq_q);
    666   1.1   thorpej 			limit = fc->rio_limit;
    667   1.1   thorpej 			if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
    668   1.1   thorpej 				limit = fc->q_params[RIO_NDROPPREC-1].th_max;
    669   1.1   thorpej 			qlimit(rqp->rq_q) = limit;
    670   1.1   thorpej 
    671   1.1   thorpej 			rio_destroy(rqp->rq_rio);
    672   1.1   thorpej 			rqp->rq_rio = new;
    673   1.1   thorpej 
    674   1.1   thorpej 			splx(s);
    675   1.1   thorpej 
    676   1.1   thorpej 			/* write back new values */
    677   1.1   thorpej 			fc->rio_limit = limit;
    678   1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++) {
    679   1.1   thorpej 				fc->q_params[i].inv_pmax =
    680   1.1   thorpej 					rqp->rq_rio->rio_precstate[i].inv_pmax;
    681   1.1   thorpej 				fc->q_params[i].th_min =
    682   1.1   thorpej 					rqp->rq_rio->rio_precstate[i].th_min;
    683   1.1   thorpej 				fc->q_params[i].th_max =
    684   1.1   thorpej 					rqp->rq_rio->rio_precstate[i].th_max;
    685   1.1   thorpej 			}
    686  1.14     peter 		} while (/*CONSTCOND*/ 0);
    687   1.1   thorpej 		break;
    688   1.1   thorpej 
    689   1.1   thorpej 	case RIO_SETDEFAULTS:
    690   1.1   thorpej 		do {
    691   1.1   thorpej 			struct redparams *rp;
    692   1.1   thorpej 			int i;
    693   1.1   thorpej 
    694   1.1   thorpej 			rp = (struct redparams *)addr;
    695   1.1   thorpej 			for (i = 0; i < RIO_NDROPPREC; i++)
    696   1.1   thorpej 				default_rio_params[i] = rp[i];
    697  1.14     peter 		} while (/*CONSTCOND*/ 0);
    698   1.1   thorpej 		break;
    699   1.1   thorpej 
    700   1.1   thorpej 	default:
    701   1.1   thorpej 		error = EINVAL;
    702   1.1   thorpej 		break;
    703   1.1   thorpej 	}
    704   1.1   thorpej 
    705   1.1   thorpej 	return error;
    706   1.1   thorpej }
    707   1.1   thorpej 
    708   1.1   thorpej static int
    709  1.14     peter rio_detach(rio_queue_t *rqp)
    710   1.1   thorpej {
    711   1.1   thorpej 	rio_queue_t *tmp;
    712   1.1   thorpej 	int error = 0;
    713   1.1   thorpej 
    714   1.1   thorpej 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
    715   1.1   thorpej 		altq_disable(rqp->rq_ifq);
    716   1.1   thorpej 
    717   1.1   thorpej 	if ((error = altq_detach(rqp->rq_ifq)))
    718   1.1   thorpej 		return (error);
    719   1.1   thorpej 
    720   1.1   thorpej 	if (rio_list == rqp)
    721   1.1   thorpej 		rio_list = rqp->rq_next;
    722   1.1   thorpej 	else {
    723   1.1   thorpej 		for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
    724   1.1   thorpej 			if (tmp->rq_next == rqp) {
    725   1.1   thorpej 				tmp->rq_next = rqp->rq_next;
    726   1.1   thorpej 				break;
    727   1.1   thorpej 			}
    728   1.1   thorpej 		if (tmp == NULL)
    729   1.1   thorpej 			printf("rio_detach: no state found in rio_list!\n");
    730   1.1   thorpej 	}
    731   1.1   thorpej 
    732   1.1   thorpej 	rio_destroy(rqp->rq_rio);
    733   1.9  christos 	free(rqp->rq_q, M_DEVBUF);
    734   1.9  christos 	free(rqp, M_DEVBUF);
    735   1.1   thorpej 	return (error);
    736   1.1   thorpej }
    737   1.1   thorpej 
    738   1.1   thorpej /*
    739   1.1   thorpej  * rio support routines
    740   1.1   thorpej  */
    741   1.1   thorpej static int
    742  1.13  christos rio_request(struct ifaltq *ifq, int req, void *arg __unused)
    743   1.1   thorpej {
    744   1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    745   1.1   thorpej 
    746   1.1   thorpej 	switch (req) {
    747   1.1   thorpej 	case ALTRQ_PURGE:
    748   1.1   thorpej 		_flushq(rqp->rq_q);
    749   1.1   thorpej 		if (ALTQ_IS_ENABLED(ifq))
    750   1.1   thorpej 			ifq->ifq_len = 0;
    751   1.1   thorpej 		break;
    752   1.1   thorpej 	}
    753   1.1   thorpej 	return (0);
    754   1.1   thorpej }
    755   1.1   thorpej 
    756   1.1   thorpej /*
    757   1.1   thorpej  * enqueue routine:
    758   1.1   thorpej  *
    759   1.1   thorpej  *	returns: 0 when successfully queued.
    760   1.1   thorpej  *		 ENOBUFS when drop occurs.
    761   1.1   thorpej  */
    762   1.1   thorpej static int
    763  1.14     peter rio_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
    764   1.1   thorpej {
    765   1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    766   1.1   thorpej 	int error = 0;
    767   1.1   thorpej 
    768   1.1   thorpej 	if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0)
    769   1.1   thorpej 		ifq->ifq_len++;
    770   1.1   thorpej 	else
    771   1.1   thorpej 		error = ENOBUFS;
    772   1.1   thorpej 	return error;
    773   1.1   thorpej }
    774   1.1   thorpej 
    775   1.1   thorpej /*
    776   1.1   thorpej  * dequeue routine:
    777   1.3   thorpej  *	must be called in splnet.
    778   1.1   thorpej  *
    779   1.1   thorpej  *	returns: mbuf dequeued.
    780   1.1   thorpej  *		 NULL when no packet is available in the queue.
    781   1.1   thorpej  */
    782   1.1   thorpej 
    783   1.1   thorpej static struct mbuf *
    784  1.14     peter rio_dequeue(struct ifaltq *ifq, int op)
    785   1.1   thorpej {
    786   1.1   thorpej 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
    787   1.1   thorpej 	struct mbuf *m = NULL;
    788   1.1   thorpej 
    789   1.1   thorpej 	if (op == ALTDQ_POLL)
    790   1.1   thorpej 		return qhead(rqp->rq_q);
    791   1.1   thorpej 
    792   1.1   thorpej 	m = rio_getq(rqp->rq_rio, rqp->rq_q);
    793   1.1   thorpej 	if (m != NULL)
    794   1.1   thorpej 		ifq->ifq_len--;
    795   1.1   thorpej 	return m;
    796   1.1   thorpej }
    797   1.1   thorpej 
    798   1.1   thorpej #ifdef KLD_MODULE
    799   1.1   thorpej 
    800   1.1   thorpej static struct altqsw rio_sw =
    801   1.1   thorpej 	{"rio", rioopen, rioclose, rioioctl};
    802   1.1   thorpej 
    803   1.1   thorpej ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
    804  1.14     peter MODULE_VERSION(altq_rio, 1);
    805  1.14     peter MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1);
    806   1.1   thorpej 
    807   1.1   thorpej #endif /* KLD_MODULE */
    808  1.14     peter #endif /* ALTQ3_COMPAT */
    809   1.1   thorpej 
    810   1.1   thorpej #endif /* ALTQ_RIO */
    811