Home | History | Annotate | Line # | Download | only in altq
altq_subr.c revision 1.12.4.1
      1 /*	$NetBSD: altq_subr.c,v 1.12.4.1 2006/06/21 14:47:46 yamt Exp $	*/
      2 /*	$KAME: altq_subr.c,v 1.11 2002/01/11 08:11:49 kjc Exp $	*/
      3 
      4 /*
      5  * Copyright (C) 1997-2002
      6  *	Sony Computer Science Laboratories Inc.  All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
     18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
     21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27  * SUCH DAMAGE.
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: altq_subr.c,v 1.12.4.1 2006/06/21 14:47:46 yamt Exp $");
     32 
     33 #if defined(__FreeBSD__) || defined(__NetBSD__)
     34 #include "opt_altq.h"
     35 #if (__FreeBSD__ != 2)
     36 #include "opt_inet.h"
     37 #ifdef __FreeBSD__
     38 #include "opt_inet6.h"
     39 #endif
     40 #endif
     41 #endif /* __FreeBSD__ || __NetBSD__ */
     42 
     43 #include <sys/param.h>
     44 #include <sys/malloc.h>
     45 #include <sys/mbuf.h>
     46 #include <sys/systm.h>
     47 #include <sys/proc.h>
     48 #include <sys/socket.h>
     49 #include <sys/socketvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/errno.h>
     52 #include <sys/syslog.h>
     53 #include <sys/sysctl.h>
     54 #include <sys/queue.h>
     55 
     56 #include <net/if.h>
     57 #include <net/if_dl.h>
     58 #include <net/if_types.h>
     59 
     60 #include <netinet/in.h>
     61 #include <netinet/in_systm.h>
     62 #include <netinet/ip.h>
     63 #ifdef INET6
     64 #include <netinet/ip6.h>
     65 #endif
     66 #include <netinet/tcp.h>
     67 #include <netinet/udp.h>
     68 
     69 #include <altq/altq.h>
     70 #include <altq/altq_conf.h>
     71 
     72 /* machine dependent clock related includes */
     73 #ifdef __FreeBSD__
     74 #include "opt_cpu.h"	/* for FreeBSD-2.2.8 to get i586_ctr_freq */
     75 #include <machine/clock.h>
     76 #endif
     77 #if defined(__i386__)
     78 #include <machine/specialreg.h>		/* for CPUID_TSC */
     79 #ifdef __FreeBSD__
     80 #include <machine/md_var.h>		/* for cpu_feature */
     81 #elif defined(__NetBSD__) || defined(__OpenBSD__)
     82 #include <machine/cpu.h>		/* for cpu_feature */
     83 #endif
     84 #endif /* __i386__ */
     85 
     86 /*
     87  * internal function prototypes
     88  */
     89 static void	tbr_timeout __P((void *));
     90 static int 	extract_ports4 __P((struct mbuf *, struct ip *,
     91 				    struct flowinfo_in *));
     92 #ifdef INET6
     93 static int 	extract_ports6 __P((struct mbuf *, struct ip6_hdr *,
     94 				    struct flowinfo_in6 *));
     95 #endif
     96 static int	apply_filter4 __P((u_int32_t, struct flow_filter *,
     97 				   struct flowinfo_in *));
     98 static int	apply_ppfilter4 __P((u_int32_t, struct flow_filter *,
     99 				     struct flowinfo_in *));
    100 #ifdef INET6
    101 static int	apply_filter6 __P((u_int32_t, struct flow_filter6 *,
    102 					   struct flowinfo_in6 *));
    103 #endif
    104 static int	apply_tosfilter4 __P((u_int32_t, struct flow_filter *,
    105 					     struct flowinfo_in *));
    106 static u_long	get_filt_handle __P((struct acc_classifier *, int));
    107 static struct acc_filter *filth_to_filtp __P((struct acc_classifier *,
    108 					      u_long));
    109 static u_int32_t filt2fibmask __P((struct flow_filter *));
    110 
    111 static void 	ip4f_cache __P((struct ip *, struct flowinfo_in *));
    112 static int 	ip4f_lookup __P((struct ip *, struct flowinfo_in *));
    113 static int 	ip4f_init __P((void));
    114 static struct ip4_frag	*ip4f_alloc __P((void));
    115 static void 	ip4f_free __P((struct ip4_frag *));
    116 
    117 int (*altq_input) __P((struct mbuf *, int)) = NULL;
    118 static int tbr_timer = 0;	/* token bucket regulator timer */
    119 static struct callout tbr_callout = CALLOUT_INITIALIZER;
    120 
    121 /*
    122  * alternate queueing support routines
    123  */
    124 
    125 /* look up the queue state by the interface name and the queuing type. */
    126 void *
    127 altq_lookup(name, type)
    128 	char *name;
    129 	int type;
    130 {
    131 	struct ifnet *ifp;
    132 
    133 	if ((ifp = ifunit(name)) != NULL) {
    134 		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
    135 			return (ifp->if_snd.altq_disc);
    136 	}
    137 
    138 	return NULL;
    139 }
    140 
    141 int
    142 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
    143 	struct ifaltq *ifq;
    144 	int type;
    145 	void *discipline;
    146 	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
    147 	struct mbuf *(*dequeue)(struct ifaltq *, int);
    148 	int (*request)(struct ifaltq *, int, void *);
    149 	void *clfier;
    150 	void *(*classify)(void *, struct mbuf *, int);
    151 {
    152 	if (!ALTQ_IS_READY(ifq))
    153 		return ENXIO;
    154 	if (ALTQ_IS_ENABLED(ifq))
    155 		return EBUSY;
    156 	if (ALTQ_IS_ATTACHED(ifq))
    157 		return EEXIST;
    158 	ifq->altq_type     = type;
    159 	ifq->altq_disc     = discipline;
    160 	ifq->altq_enqueue  = enqueue;
    161 	ifq->altq_dequeue  = dequeue;
    162 	ifq->altq_request  = request;
    163 	ifq->altq_clfier   = clfier;
    164 	ifq->altq_classify = classify;
    165 	ifq->altq_flags &= ALTQF_CANTCHANGE;
    166 #ifdef ALTQ_KLD
    167 	altq_module_incref(type);
    168 #endif
    169 	return 0;
    170 }
    171 
    172 int
    173 altq_detach(ifq)
    174 	struct ifaltq *ifq;
    175 {
    176 	if (!ALTQ_IS_READY(ifq))
    177 		return ENXIO;
    178 	if (ALTQ_IS_ENABLED(ifq))
    179 		return EBUSY;
    180 	if (!ALTQ_IS_ATTACHED(ifq))
    181 		return (0);
    182 
    183 #ifdef ALTQ_KLD
    184 	altq_module_declref(ifq->altq_type);
    185 #endif
    186 	ifq->altq_type     = ALTQT_NONE;
    187 	ifq->altq_disc     = NULL;
    188 	ifq->altq_enqueue  = NULL;
    189 	ifq->altq_dequeue  = NULL;
    190 	ifq->altq_request  = NULL;
    191 	ifq->altq_clfier   = NULL;
    192 	ifq->altq_classify = NULL;
    193 	ifq->altq_flags &= ALTQF_CANTCHANGE;
    194 	return 0;
    195 }
    196 
    197 int
    198 altq_enable(ifq)
    199 	struct ifaltq *ifq;
    200 {
    201 	int s;
    202 
    203 	if (!ALTQ_IS_READY(ifq))
    204 		return ENXIO;
    205 	if (ALTQ_IS_ENABLED(ifq))
    206 		return 0;
    207 
    208 	s = splnet();
    209 	IFQ_PURGE(ifq);
    210 	ASSERT(ifq->ifq_len == 0);
    211 	ifq->altq_flags |= ALTQF_ENABLED;
    212 	if (ifq->altq_clfier != NULL)
    213 		ifq->altq_flags |= ALTQF_CLASSIFY;
    214 	splx(s);
    215 
    216 	return 0;
    217 }
    218 
    219 int
    220 altq_disable(ifq)
    221 	struct ifaltq *ifq;
    222 {
    223 	int s;
    224 
    225 	if (!ALTQ_IS_ENABLED(ifq))
    226 		return 0;
    227 
    228 	s = splnet();
    229 	IFQ_PURGE(ifq);
    230 	ASSERT(ifq->ifq_len == 0);
    231 	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
    232 	splx(s);
    233 	return 0;
    234 }
    235 
    236 void
    237 altq_assert(file, line, failedexpr)
    238 	const char *file, *failedexpr;
    239 	int line;
    240 {
    241 	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
    242 		     failedexpr, file, line);
    243 	panic("altq assertion");
    244 	/* NOTREACHED */
    245 }
    246 
    247 /*
    248  * internal representation of token bucket parameters
    249  *	rate: 	byte_per_unittime << 32
    250  *		(((bits_per_sec) / 8) << 32) / machclk_freq
    251  *	depth:	byte << 32
    252  *
    253  */
    254 #define	TBR_SHIFT	32
    255 #define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
    256 #define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
    257 
    258 struct mbuf *
    259 tbr_dequeue(ifq, op)
    260 	struct ifaltq *ifq;
    261 	int op;
    262 {
    263 	struct tb_regulator *tbr;
    264 	struct mbuf *m;
    265 	int64_t interval;
    266 	u_int64_t now;
    267 
    268 	tbr = ifq->altq_tbr;
    269 	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
    270 		/* if this is a remove after poll, bypass tbr check */
    271 	} else {
    272 		/* update token only when it is negative */
    273 		if (tbr->tbr_token <= 0) {
    274 			now = read_machclk();
    275 			interval = now - tbr->tbr_last;
    276 			if (interval >= tbr->tbr_filluptime)
    277 				tbr->tbr_token = tbr->tbr_depth;
    278 			else {
    279 				tbr->tbr_token += interval * tbr->tbr_rate;
    280 				if (tbr->tbr_token > tbr->tbr_depth)
    281 					tbr->tbr_token = tbr->tbr_depth;
    282 			}
    283 			tbr->tbr_last = now;
    284 		}
    285 		/* if token is still negative, don't allow dequeue */
    286 		if (tbr->tbr_token <= 0)
    287 			return (NULL);
    288 	}
    289 
    290 	if (ALTQ_IS_ENABLED(ifq))
    291 		m = (*ifq->altq_dequeue)(ifq, op);
    292 	else {
    293 		if (op == ALTDQ_POLL)
    294 			IF_POLL(ifq, m);
    295 		else
    296 			IF_DEQUEUE(ifq, m);
    297 	}
    298 
    299 	if (m != NULL && op == ALTDQ_REMOVE)
    300 		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
    301 	tbr->tbr_lastop = op;
    302 	return (m);
    303 }
    304 
    305 /*
    306  * set a token bucket regulator.
    307  * if the specified rate is zero, the token bucket regulator is deleted.
    308  */
    309 int
    310 tbr_set(ifq, profile)
    311 	struct ifaltq *ifq;
    312 	struct tb_profile *profile;
    313 {
    314 	struct tb_regulator *tbr, *otbr;
    315 
    316 	if (machclk_freq == 0)
    317 		init_machclk();
    318 	if (machclk_freq == 0) {
    319 		printf("tbr_set: no CPU clock available!\n");
    320 		return (ENXIO);
    321 	}
    322 
    323 	if (profile->rate == 0) {
    324 		/* delete this tbr */
    325 		if ((tbr = ifq->altq_tbr) == NULL)
    326 			return (ENOENT);
    327 		ifq->altq_tbr = NULL;
    328 		free(tbr, M_DEVBUF);
    329 		return (0);
    330 	}
    331 
    332 	tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_WAITOK|M_ZERO);
    333 	if (tbr == NULL)
    334 		return (ENOMEM);
    335 
    336 	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
    337 	tbr->tbr_depth = TBR_SCALE(profile->depth);
    338 	if (tbr->tbr_rate > 0)
    339 		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
    340 	else
    341 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
    342 	tbr->tbr_token = tbr->tbr_depth;
    343 	tbr->tbr_last = read_machclk();
    344 	tbr->tbr_lastop = ALTDQ_REMOVE;
    345 
    346 	otbr = ifq->altq_tbr;
    347 	ifq->altq_tbr = tbr;	/* set the new tbr */
    348 
    349 	if (otbr != NULL)
    350 		free(otbr, M_DEVBUF);
    351 	else {
    352 		if (tbr_timer == 0) {
    353 			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
    354 			tbr_timer = 1;
    355 		}
    356 	}
    357 	return (0);
    358 }
    359 
    360 /*
    361  * tbr_timeout goes through the interface list, and kicks the drivers
    362  * if necessary.
    363  */
    364 static void
    365 tbr_timeout(arg)
    366 	void *arg;
    367 {
    368 	struct ifnet *ifp;
    369 	int active, s;
    370 
    371 	active = 0;
    372 	s = splnet();
    373 #ifdef __FreeBSD__
    374 #if (__FreeBSD_version < 300000)
    375 	for (ifp = ifnet; ifp; ifp = ifp->if_next)
    376 #else
    377 	for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_link.tqe_next)
    378 #endif
    379 #else /* !FreeBSD */
    380 	for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_list.tqe_next)
    381 #endif
    382 	{
    383 		if (!TBR_IS_ENABLED(&ifp->if_snd))
    384 			continue;
    385 		active++;
    386 		if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL)
    387 			(*ifp->if_start)(ifp);
    388 	}
    389 	splx(s);
    390 	if (active > 0)
    391 		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
    392 	else
    393 		tbr_timer = 0;	/* don't need tbr_timer anymore */
    394 #if defined(__alpha__) && !defined(ALTQ_NOPCC)
    395 	{
    396 		/*
    397 		 * XXX read out the machine dependent clock once a second
    398 		 * to detect counter wrap-around.
    399 		 */
    400 		static u_int cnt;
    401 
    402 		if (++cnt >= hz) {
    403 			(void)read_machclk();
    404 			cnt = 0;
    405 		}
    406 	}
    407 #endif /* __alpha__ && !ALTQ_NOPCC */
    408 }
    409 
    410 /*
    411  * get token bucket regulator profile
    412  */
    413 int
    414 tbr_get(ifq, profile)
    415 	struct ifaltq *ifq;
    416 	struct tb_profile *profile;
    417 {
    418 	struct tb_regulator *tbr;
    419 
    420 	if ((tbr = ifq->altq_tbr) == NULL) {
    421 		profile->rate = 0;
    422 		profile->depth = 0;
    423 	} else {
    424 		profile->rate =
    425 		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
    426 		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
    427 	}
    428 	return (0);
    429 }
    430 
    431 
    432 #ifndef IPPROTO_ESP
    433 #define	IPPROTO_ESP	50		/* encapsulating security payload */
    434 #endif
    435 #ifndef IPPROTO_AH
    436 #define	IPPROTO_AH	51		/* authentication header */
    437 #endif
    438 
    439 /*
    440  * extract flow information from a given packet.
    441  * filt_mask shows flowinfo fields required.
    442  * we assume the ip header is in one mbuf, and addresses and ports are
    443  * in network byte order.
    444  */
    445 int
    446 altq_extractflow(m, af, flow, filt_bmask)
    447 	struct mbuf *m;
    448 	int af;
    449 	struct flowinfo *flow;
    450 	u_int32_t	filt_bmask;
    451 {
    452 
    453 	switch (af) {
    454 	case PF_INET: {
    455 		struct flowinfo_in *fin;
    456 		struct ip *ip;
    457 
    458 		ip = mtod(m, struct ip *);
    459 
    460 		if (ip->ip_v != 4)
    461 			break;
    462 
    463 		fin = (struct flowinfo_in *)flow;
    464 		fin->fi_len = sizeof(struct flowinfo_in);
    465 		fin->fi_family = AF_INET;
    466 
    467 		fin->fi_proto = ip->ip_p;
    468 		fin->fi_tos = ip->ip_tos;
    469 
    470 		fin->fi_src.s_addr = ip->ip_src.s_addr;
    471 		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
    472 
    473 		if (filt_bmask & FIMB4_PORTS)
    474 			/* if port info is required, extract port numbers */
    475 			extract_ports4(m, ip, fin);
    476 		else {
    477 			fin->fi_sport = 0;
    478 			fin->fi_dport = 0;
    479 			fin->fi_gpi = 0;
    480 		}
    481 		return (1);
    482 	}
    483 
    484 #ifdef INET6
    485 	case PF_INET6: {
    486 		struct flowinfo_in6 *fin6;
    487 		struct ip6_hdr *ip6;
    488 
    489 		ip6 = mtod(m, struct ip6_hdr *);
    490 		/* should we check the ip version? */
    491 
    492 		fin6 = (struct flowinfo_in6 *)flow;
    493 		fin6->fi6_len = sizeof(struct flowinfo_in6);
    494 		fin6->fi6_family = AF_INET6;
    495 
    496 		fin6->fi6_proto = ip6->ip6_nxt;
    497 		fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    498 
    499 		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
    500 		fin6->fi6_src = ip6->ip6_src;
    501 		fin6->fi6_dst = ip6->ip6_dst;
    502 
    503 		if ((filt_bmask & FIMB6_PORTS) ||
    504 		    ((filt_bmask & FIMB6_PROTO)
    505 		     && ip6->ip6_nxt > IPPROTO_IPV6))
    506 			/*
    507 			 * if port info is required, or proto is required
    508 			 * but there are option headers, extract port
    509 			 * and protocol numbers.
    510 			 */
    511 			extract_ports6(m, ip6, fin6);
    512 		else {
    513 			fin6->fi6_sport = 0;
    514 			fin6->fi6_dport = 0;
    515 			fin6->fi6_gpi = 0;
    516 		}
    517 		return (1);
    518 	}
    519 #endif /* INET6 */
    520 
    521 	default:
    522 		break;
    523 	}
    524 
    525 	/* failed */
    526 	flow->fi_len = sizeof(struct flowinfo);
    527 	flow->fi_family = AF_UNSPEC;
    528 	return (0);
    529 }
    530 
    531 /*
    532  * helper routine to extract port numbers
    533  */
    534 /* structure for ipsec and ipv6 option header template */
    535 struct _opt6 {
    536 	u_int8_t	opt6_nxt;	/* next header */
    537 	u_int8_t	opt6_hlen;	/* header extension length */
    538 	u_int16_t	_pad;
    539 	u_int32_t	ah_spi;		/* security parameter index
    540 					   for authentication header */
    541 };
    542 
    543 /*
    544  * extract port numbers from a ipv4 packet.
    545  */
    546 static int
    547 extract_ports4(m, ip, fin)
    548 	struct mbuf *m;
    549 	struct ip *ip;
    550 	struct flowinfo_in *fin;
    551 {
    552 	struct mbuf *m0;
    553 	u_short ip_off;
    554 	u_int8_t proto;
    555 	int 	off;
    556 
    557 	fin->fi_sport = 0;
    558 	fin->fi_dport = 0;
    559 	fin->fi_gpi = 0;
    560 
    561 	ip_off = ntohs(ip->ip_off);
    562 	/* if it is a fragment, try cached fragment info */
    563 	if (ip_off & IP_OFFMASK) {
    564 		ip4f_lookup(ip, fin);
    565 		return (1);
    566 	}
    567 
    568 	/* locate the mbuf containing the protocol header */
    569 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
    570 		if (((caddr_t)ip >= m0->m_data) &&
    571 		    ((caddr_t)ip < m0->m_data + m0->m_len))
    572 			break;
    573 	if (m0 == NULL) {
    574 #ifdef ALTQ_DEBUG
    575 		printf("extract_ports4: can't locate header! ip=%p\n", ip);
    576 #endif
    577 		return (0);
    578 	}
    579 	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
    580 	proto = ip->ip_p;
    581 
    582 #ifdef ALTQ_IPSEC
    583  again:
    584 #endif
    585 	while (off >= m0->m_len) {
    586 		off -= m0->m_len;
    587 		m0 = m0->m_next;
    588 		if (m0 == NULL)
    589 			return (0);  /* bogus ip_hl! */
    590 	}
    591 	if (m0->m_len < off + 4)
    592 		return (0);
    593 
    594 	switch (proto) {
    595 	case IPPROTO_TCP:
    596 	case IPPROTO_UDP: {
    597 		struct udphdr *udp;
    598 
    599 		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
    600 		fin->fi_sport = udp->uh_sport;
    601 		fin->fi_dport = udp->uh_dport;
    602 		fin->fi_proto = proto;
    603 		}
    604 		break;
    605 
    606 #ifdef ALTQ_IPSEC
    607 	case IPPROTO_ESP:
    608 		if (fin->fi_gpi == 0){
    609 			u_int32_t *gpi;
    610 
    611 			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
    612 			fin->fi_gpi   = *gpi;
    613 		}
    614 		fin->fi_proto = proto;
    615 		break;
    616 
    617 	case IPPROTO_AH: {
    618 			/* get next header and header length */
    619 			struct _opt6 *opt6;
    620 
    621 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
    622 			proto = opt6->opt6_nxt;
    623 			off += 8 + (opt6->opt6_hlen * 4);
    624 			if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
    625 				fin->fi_gpi = opt6->ah_spi;
    626 		}
    627 		/* goto the next header */
    628 		goto again;
    629 #endif  /* ALTQ_IPSEC */
    630 
    631 	default:
    632 		fin->fi_proto = proto;
    633 		return (0);
    634 	}
    635 
    636 	/* if this is a first fragment, cache it. */
    637 	if (ip_off & IP_MF)
    638 		ip4f_cache(ip, fin);
    639 
    640 	return (1);
    641 }
    642 
    643 #ifdef INET6
    644 static int
    645 extract_ports6(m, ip6, fin6)
    646 	struct mbuf *m;
    647 	struct ip6_hdr *ip6;
    648 	struct flowinfo_in6 *fin6;
    649 {
    650 	struct mbuf *m0;
    651 	int	off;
    652 	u_int8_t proto;
    653 
    654 	fin6->fi6_gpi   = 0;
    655 	fin6->fi6_sport = 0;
    656 	fin6->fi6_dport = 0;
    657 
    658 	/* locate the mbuf containing the protocol header */
    659 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
    660 		if (((caddr_t)ip6 >= m0->m_data) &&
    661 		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
    662 			break;
    663 	if (m0 == NULL) {
    664 #ifdef ALTQ_DEBUG
    665 		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
    666 #endif
    667 		return (0);
    668 	}
    669 	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
    670 
    671 	proto = ip6->ip6_nxt;
    672 	do {
    673 		while (off >= m0->m_len) {
    674 			off -= m0->m_len;
    675 			m0 = m0->m_next;
    676 			if (m0 == NULL)
    677 				return (0);
    678 		}
    679 		if (m0->m_len < off + 4)
    680 			return (0);
    681 
    682 		switch (proto) {
    683 		case IPPROTO_TCP:
    684 		case IPPROTO_UDP: {
    685 			struct udphdr *udp;
    686 
    687 			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
    688 			fin6->fi6_sport = udp->uh_sport;
    689 			fin6->fi6_dport = udp->uh_dport;
    690 			fin6->fi6_proto = proto;
    691 			}
    692 			return (1);
    693 
    694 		case IPPROTO_ESP:
    695 			if (fin6->fi6_gpi == 0) {
    696 				u_int32_t *gpi;
    697 
    698 				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
    699 				fin6->fi6_gpi   = *gpi;
    700 			}
    701 			fin6->fi6_proto = proto;
    702 			return (1);
    703 
    704 		case IPPROTO_AH: {
    705 			/* get next header and header length */
    706 			struct _opt6 *opt6;
    707 
    708 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
    709 			if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
    710 				fin6->fi6_gpi = opt6->ah_spi;
    711 			proto = opt6->opt6_nxt;
    712 			off += 8 + (opt6->opt6_hlen * 4);
    713 			/* goto the next header */
    714 			break;
    715 			}
    716 
    717 		case IPPROTO_HOPOPTS:
    718 		case IPPROTO_ROUTING:
    719 		case IPPROTO_DSTOPTS: {
    720 			/* get next header and header length */
    721 			struct _opt6 *opt6;
    722 
    723 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
    724 			proto = opt6->opt6_nxt;
    725 			off += (opt6->opt6_hlen + 1) * 8;
    726 			/* goto the next header */
    727 			break;
    728 			}
    729 
    730 		case IPPROTO_FRAGMENT:
    731 			/* ipv6 fragmentations are not supported yet */
    732 		default:
    733 			fin6->fi6_proto = proto;
    734 			return (0);
    735 		}
    736 	} while (1);
    737 	/*NOTREACHED*/
    738 }
    739 #endif /* INET6 */
    740 
    741 /*
    742  * altq common classifier
    743  */
    744 int
    745 acc_add_filter(classifier, filter, class, phandle)
    746 	struct acc_classifier *classifier;
    747 	struct flow_filter *filter;
    748 	void	*class;
    749 	u_long	*phandle;
    750 {
    751 	struct acc_filter *afp, *prev, *tmp;
    752 	int	i, s;
    753 
    754 #ifdef INET6
    755 	if (filter->ff_flow.fi_family != AF_INET &&
    756 	    filter->ff_flow.fi_family != AF_INET6)
    757 		return (EINVAL);
    758 #else
    759 	if (filter->ff_flow.fi_family != AF_INET)
    760 		return (EINVAL);
    761 #endif
    762 
    763 	afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK|M_ZERO);
    764 	if (afp == NULL)
    765 		return (ENOMEM);
    766 
    767 	afp->f_filter = *filter;
    768 	afp->f_class = class;
    769 
    770 	i = ACC_WILDCARD_INDEX;
    771 	if (filter->ff_flow.fi_family == AF_INET) {
    772 		struct flow_filter *filter4 = &afp->f_filter;
    773 
    774 		/*
    775 		 * if address is 0, it's a wildcard.  if address mask
    776 		 * isn't set, use full mask.
    777 		 */
    778 		if (filter4->ff_flow.fi_dst.s_addr == 0)
    779 			filter4->ff_mask.mask_dst.s_addr = 0;
    780 		else if (filter4->ff_mask.mask_dst.s_addr == 0)
    781 			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
    782 		if (filter4->ff_flow.fi_src.s_addr == 0)
    783 			filter4->ff_mask.mask_src.s_addr = 0;
    784 		else if (filter4->ff_mask.mask_src.s_addr == 0)
    785 			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
    786 
    787 		/* clear extra bits in addresses  */
    788 		   filter4->ff_flow.fi_dst.s_addr &=
    789 		       filter4->ff_mask.mask_dst.s_addr;
    790 		   filter4->ff_flow.fi_src.s_addr &=
    791 		       filter4->ff_mask.mask_src.s_addr;
    792 
    793 		/*
    794 		 * if dst address is a wildcard, use hash-entry
    795 		 * ACC_WILDCARD_INDEX.
    796 		 */
    797 		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
    798 			i = ACC_WILDCARD_INDEX;
    799 		else
    800 			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
    801 	}
    802 #ifdef INET6
    803 	else if (filter->ff_flow.fi_family == AF_INET6) {
    804 		struct flow_filter6 *filter6 =
    805 			(struct flow_filter6 *)&afp->f_filter;
    806 #ifndef IN6MASK0 /* taken from kame ipv6 */
    807 #define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
    808 #define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
    809 		const struct in6_addr in6mask0 = IN6MASK0;
    810 		const struct in6_addr in6mask128 = IN6MASK128;
    811 #endif
    812 
    813 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
    814 			filter6->ff_mask6.mask6_dst = in6mask0;
    815 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
    816 			filter6->ff_mask6.mask6_dst = in6mask128;
    817 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
    818 			filter6->ff_mask6.mask6_src = in6mask0;
    819 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
    820 			filter6->ff_mask6.mask6_src = in6mask128;
    821 
    822 		/* clear extra bits in addresses  */
    823 		for (i = 0; i < 16; i++)
    824 			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
    825 			    filter6->ff_mask6.mask6_dst.s6_addr[i];
    826 		for (i = 0; i < 16; i++)
    827 			filter6->ff_flow6.fi6_src.s6_addr[i] &=
    828 			    filter6->ff_mask6.mask6_src.s6_addr[i];
    829 
    830 		if (filter6->ff_flow6.fi6_flowlabel == 0)
    831 			i = ACC_WILDCARD_INDEX;
    832 		else
    833 			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
    834 	}
    835 #endif /* INET6 */
    836 
    837 	afp->f_handle = get_filt_handle(classifier, i);
    838 
    839 	/* update filter bitmask */
    840 	afp->f_fbmask = filt2fibmask(filter);
    841 	classifier->acc_fbmask |= afp->f_fbmask;
    842 
    843 	/*
    844 	 * add this filter to the filter list.
    845 	 * filters are ordered from the highest rule number.
    846 	 */
    847 	s = splnet();
    848 	prev = NULL;
    849 	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
    850 		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
    851 			prev = tmp;
    852 		else
    853 			break;
    854 	}
    855 	if (prev == NULL)
    856 		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
    857 	else
    858 		LIST_INSERT_AFTER(prev, afp, f_chain);
    859 	splx(s);
    860 
    861 	*phandle = afp->f_handle;
    862 	return (0);
    863 }
    864 
    865 int
    866 acc_delete_filter(classifier, handle)
    867 	struct acc_classifier *classifier;
    868 	u_long handle;
    869 {
    870 	struct acc_filter *afp;
    871 	int	s;
    872 
    873 	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
    874 		return (EINVAL);
    875 
    876 	s = splnet();
    877 	LIST_REMOVE(afp, f_chain);
    878 	splx(s);
    879 
    880 	free(afp, M_DEVBUF);
    881 
    882 	/* todo: update filt_bmask */
    883 
    884 	return (0);
    885 }
    886 
    887 /*
    888  * delete filters referencing to the specified class.
    889  * if the all flag is not 0, delete all the filters.
    890  */
    891 int
    892 acc_discard_filters(classifier, class, all)
    893 	struct acc_classifier *classifier;
    894 	void	*class;
    895 	int	all;
    896 {
    897 	struct acc_filter *afp;
    898 	int	i, s;
    899 
    900 	s = splnet();
    901 	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
    902 		do {
    903 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
    904 				if (all || afp->f_class == class) {
    905 					LIST_REMOVE(afp, f_chain);
    906 					free(afp, M_DEVBUF);
    907 					/* start again from the head */
    908 					break;
    909 				}
    910 		} while (afp != NULL);
    911 	}
    912 	splx(s);
    913 
    914 	if (all)
    915 		classifier->acc_fbmask = 0;
    916 
    917 	return (0);
    918 }
    919 
    920 void *
    921 acc_classify(clfier, m, af)
    922 	void *clfier;
    923 	struct mbuf *m;
    924 	int af;
    925 {
    926 	struct acc_classifier *classifier;
    927 	struct flowinfo flow;
    928 	struct acc_filter *afp;
    929 	int	i;
    930 
    931 	classifier = (struct acc_classifier *)clfier;
    932 	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
    933 
    934 	if (flow.fi_family == AF_INET) {
    935 		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
    936 
    937 		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
    938 			/* only tos is used */
    939 			LIST_FOREACH(afp,
    940 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
    941 				 f_chain)
    942 				if (apply_tosfilter4(afp->f_fbmask,
    943 						     &afp->f_filter, fp))
    944 					/* filter matched */
    945 					return (afp->f_class);
    946 		} else if ((classifier->acc_fbmask &
    947 			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
    948 		    == 0) {
    949 			/* only proto and ports are used */
    950 			LIST_FOREACH(afp,
    951 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
    952 				 f_chain)
    953 				if (apply_ppfilter4(afp->f_fbmask,
    954 						    &afp->f_filter, fp))
    955 					/* filter matched */
    956 					return (afp->f_class);
    957 		} else {
    958 			/* get the filter hash entry from its dest address */
    959 			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
    960 			do {
    961 				/*
    962 				 * go through this loop twice.  first for dst
    963 				 * hash, second for wildcards.
    964 				 */
    965 				LIST_FOREACH(afp, &classifier->acc_filters[i],
    966 					     f_chain)
    967 					if (apply_filter4(afp->f_fbmask,
    968 							  &afp->f_filter, fp))
    969 						/* filter matched */
    970 						return (afp->f_class);
    971 
    972 				/*
    973 				 * check again for filters with a dst addr
    974 				 * wildcard.
    975 				 * (daddr == 0 || dmask != 0xffffffff).
    976 				 */
    977 				if (i != ACC_WILDCARD_INDEX)
    978 					i = ACC_WILDCARD_INDEX;
    979 				else
    980 					break;
    981 			} while (1);
    982 		}
    983 	}
    984 #ifdef INET6
    985 	else if (flow.fi_family == AF_INET6) {
    986 		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
    987 
    988 		/* get the filter hash entry from its flow ID */
    989 		if (fp6->fi6_flowlabel != 0)
    990 			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
    991 		else
    992 			/* flowlable can be zero */
    993 			i = ACC_WILDCARD_INDEX;
    994 
    995 		/* go through this loop twice.  first for flow hash, second
    996 		   for wildcards. */
    997 		do {
    998 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
    999 				if (apply_filter6(afp->f_fbmask,
   1000 					(struct flow_filter6 *)&afp->f_filter,
   1001 					fp6))
   1002 					/* filter matched */
   1003 					return (afp->f_class);
   1004 
   1005 			/*
   1006 			 * check again for filters with a wildcard.
   1007 			 */
   1008 			if (i != ACC_WILDCARD_INDEX)
   1009 				i = ACC_WILDCARD_INDEX;
   1010 			else
   1011 				break;
   1012 		} while (1);
   1013 	}
   1014 #endif /* INET6 */
   1015 
   1016 	/* no filter matched */
   1017 	return (NULL);
   1018 }
   1019 
   1020 static int
   1021 apply_filter4(fbmask, filt, pkt)
   1022 	u_int32_t	fbmask;
   1023 	struct flow_filter *filt;
   1024 	struct flowinfo_in *pkt;
   1025 {
   1026 	if (filt->ff_flow.fi_family != AF_INET)
   1027 		return (0);
   1028 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
   1029 		return (0);
   1030 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
   1031 		return (0);
   1032 	if ((fbmask & FIMB4_DADDR) &&
   1033 	    filt->ff_flow.fi_dst.s_addr !=
   1034 	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
   1035 		return (0);
   1036 	if ((fbmask & FIMB4_SADDR) &&
   1037 	    filt->ff_flow.fi_src.s_addr !=
   1038 	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
   1039 		return (0);
   1040 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
   1041 		return (0);
   1042 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
   1043 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
   1044 		return (0);
   1045 	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
   1046 		return (0);
   1047 	/* match */
   1048 	return (1);
   1049 }
   1050 
   1051 /*
   1052  * filter matching function optimized for a common case that checks
   1053  * only protocol and port numbers
   1054  */
   1055 static int
   1056 apply_ppfilter4(fbmask, filt, pkt)
   1057 	u_int32_t	fbmask;
   1058 	struct flow_filter *filt;
   1059 	struct flowinfo_in *pkt;
   1060 {
   1061 	if (filt->ff_flow.fi_family != AF_INET)
   1062 		return (0);
   1063 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
   1064 		return (0);
   1065 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
   1066 		return (0);
   1067 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
   1068 		return (0);
   1069 	/* match */
   1070 	return (1);
   1071 }
   1072 
   1073 /*
   1074  * filter matching function only for tos field.
   1075  */
   1076 static int
   1077 apply_tosfilter4(fbmask, filt, pkt)
   1078 	u_int32_t	fbmask;
   1079 	struct flow_filter *filt;
   1080 	struct flowinfo_in *pkt;
   1081 {
   1082 	if (filt->ff_flow.fi_family != AF_INET)
   1083 		return (0);
   1084 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
   1085 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
   1086 		return (0);
   1087 	/* match */
   1088 	return (1);
   1089 }
   1090 
   1091 #ifdef INET6
   1092 static int
   1093 apply_filter6(fbmask, filt, pkt)
   1094 	u_int32_t	fbmask;
   1095 	struct flow_filter6 *filt;
   1096 	struct flowinfo_in6 *pkt;
   1097 {
   1098 	int i;
   1099 
   1100 	if (filt->ff_flow6.fi6_family != AF_INET6)
   1101 		return (0);
   1102 	if ((fbmask & FIMB6_FLABEL) &&
   1103 	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
   1104 		return (0);
   1105 	if ((fbmask & FIMB6_PROTO) &&
   1106 	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
   1107 		return (0);
   1108 	if ((fbmask & FIMB6_SPORT) &&
   1109 	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
   1110 		return (0);
   1111 	if ((fbmask & FIMB6_DPORT) &&
   1112 	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
   1113 		return (0);
   1114 	if (fbmask & FIMB6_SADDR) {
   1115 		for (i = 0; i < 4; i++)
   1116 			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
   1117 			    (pkt->fi6_src.s6_addr32[i] &
   1118 			     filt->ff_mask6.mask6_src.s6_addr32[i]))
   1119 				return (0);
   1120 	}
   1121 	if (fbmask & FIMB6_DADDR) {
   1122 		for (i = 0; i < 4; i++)
   1123 			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
   1124 			    (pkt->fi6_dst.s6_addr32[i] &
   1125 			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
   1126 				return (0);
   1127 	}
   1128 	if ((fbmask & FIMB6_TCLASS) &&
   1129 	    filt->ff_flow6.fi6_tclass !=
   1130 	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
   1131 		return (0);
   1132 	if ((fbmask & FIMB6_GPI) &&
   1133 	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
   1134 		return (0);
   1135 	/* match */
   1136 	return (1);
   1137 }
   1138 #endif /* INET6 */
   1139 
   1140 /*
   1141  *  filter handle:
   1142  *	bit 20-28: index to the filter hash table
   1143  *	bit  0-19: unique id in the hash bucket.
   1144  */
   1145 static u_long
   1146 get_filt_handle(classifier, i)
   1147 	struct acc_classifier *classifier;
   1148 	int	i;
   1149 {
   1150 	static u_long handle_number = 1;
   1151 	u_long 	handle;
   1152 	struct acc_filter *afp;
   1153 
   1154 	while (1) {
   1155 		handle = handle_number++ & 0x000fffff;
   1156 
   1157 		if (LIST_EMPTY(&classifier->acc_filters[i]))
   1158 			break;
   1159 
   1160 		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
   1161 			if ((afp->f_handle & 0x000fffff) == handle)
   1162 				break;
   1163 		if (afp == NULL)
   1164 			break;
   1165 		/* this handle is already used, try again */
   1166 	}
   1167 
   1168 	return ((i << 20) | handle);
   1169 }
   1170 
   1171 /* convert filter handle to filter pointer */
   1172 static struct acc_filter *
   1173 filth_to_filtp(classifier, handle)
   1174 	struct acc_classifier *classifier;
   1175 	u_long handle;
   1176 {
   1177 	struct acc_filter *afp;
   1178 	int	i;
   1179 
   1180 	i = ACC_GET_HINDEX(handle);
   1181 
   1182 	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
   1183 		if (afp->f_handle == handle)
   1184 			return (afp);
   1185 
   1186 	return (NULL);
   1187 }
   1188 
   1189 /* create flowinfo bitmask */
   1190 static u_int32_t
   1191 filt2fibmask(filt)
   1192 	struct flow_filter *filt;
   1193 {
   1194 	u_int32_t mask = 0;
   1195 #ifdef INET6
   1196 	struct flow_filter6 *filt6;
   1197 #endif
   1198 
   1199 	switch (filt->ff_flow.fi_family) {
   1200 	case AF_INET:
   1201 		if (filt->ff_flow.fi_proto != 0)
   1202 			mask |= FIMB4_PROTO;
   1203 		if (filt->ff_flow.fi_tos != 0)
   1204 			mask |= FIMB4_TOS;
   1205 		if (filt->ff_flow.fi_dst.s_addr != 0)
   1206 			mask |= FIMB4_DADDR;
   1207 		if (filt->ff_flow.fi_src.s_addr != 0)
   1208 			mask |= FIMB4_SADDR;
   1209 		if (filt->ff_flow.fi_sport != 0)
   1210 			mask |= FIMB4_SPORT;
   1211 		if (filt->ff_flow.fi_dport != 0)
   1212 			mask |= FIMB4_DPORT;
   1213 		if (filt->ff_flow.fi_gpi != 0)
   1214 			mask |= FIMB4_GPI;
   1215 		break;
   1216 #ifdef INET6
   1217 	case AF_INET6:
   1218 		filt6 = (struct flow_filter6 *)filt;
   1219 
   1220 		if (filt6->ff_flow6.fi6_proto != 0)
   1221 			mask |= FIMB6_PROTO;
   1222 		if (filt6->ff_flow6.fi6_tclass != 0)
   1223 			mask |= FIMB6_TCLASS;
   1224 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
   1225 			mask |= FIMB6_DADDR;
   1226 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
   1227 			mask |= FIMB6_SADDR;
   1228 		if (filt6->ff_flow6.fi6_sport != 0)
   1229 			mask |= FIMB6_SPORT;
   1230 		if (filt6->ff_flow6.fi6_dport != 0)
   1231 			mask |= FIMB6_DPORT;
   1232 		if (filt6->ff_flow6.fi6_gpi != 0)
   1233 			mask |= FIMB6_GPI;
   1234 		if (filt6->ff_flow6.fi6_flowlabel != 0)
   1235 			mask |= FIMB6_FLABEL;
   1236 		break;
   1237 #endif /* INET6 */
   1238 	}
   1239 	return (mask);
   1240 }
   1241 
   1242 
   1243 /*
   1244  * helper functions to handle IPv4 fragments.
   1245  * currently only in-sequence fragments are handled.
   1246  *	- fragment info is cached in a LRU list.
   1247  *	- when a first fragment is found, cache its flow info.
   1248  *	- when a non-first fragment is found, lookup the cache.
   1249  */
   1250 
   1251 struct ip4_frag {
   1252     TAILQ_ENTRY(ip4_frag) ip4f_chain;
   1253     char    ip4f_valid;
   1254     u_short ip4f_id;
   1255     struct flowinfo_in ip4f_info;
   1256 };
   1257 
   1258 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
   1259 
   1260 #define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
   1261 
   1262 
   1263 static void
   1264 ip4f_cache(ip, fin)
   1265 	struct ip *ip;
   1266 	struct flowinfo_in *fin;
   1267 {
   1268 	struct ip4_frag *fp;
   1269 
   1270 	if (TAILQ_EMPTY(&ip4f_list)) {
   1271 		/* first time call, allocate fragment cache entries. */
   1272 		if (ip4f_init() < 0)
   1273 			/* allocation failed! */
   1274 			return;
   1275 	}
   1276 
   1277 	fp = ip4f_alloc();
   1278 	fp->ip4f_id = ip->ip_id;
   1279 	fp->ip4f_info.fi_proto = ip->ip_p;
   1280 	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
   1281 	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
   1282 
   1283 	/* save port numbers */
   1284 	fp->ip4f_info.fi_sport = fin->fi_sport;
   1285 	fp->ip4f_info.fi_dport = fin->fi_dport;
   1286 	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
   1287 }
   1288 
   1289 static int
   1290 ip4f_lookup(ip, fin)
   1291 	struct ip *ip;
   1292 	struct flowinfo_in *fin;
   1293 {
   1294 	struct ip4_frag *fp;
   1295 
   1296 	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
   1297 	     fp = TAILQ_NEXT(fp, ip4f_chain))
   1298 		if (ip->ip_id == fp->ip4f_id &&
   1299 		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
   1300 		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
   1301 		    ip->ip_p == fp->ip4f_info.fi_proto) {
   1302 
   1303 			/* found the matching entry */
   1304 			fin->fi_sport = fp->ip4f_info.fi_sport;
   1305 			fin->fi_dport = fp->ip4f_info.fi_dport;
   1306 			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
   1307 
   1308 			if ((ntohs(ip->ip_off) & IP_MF) == 0)
   1309 				/* this is the last fragment,
   1310 				   release the entry. */
   1311 				ip4f_free(fp);
   1312 
   1313 			return (1);
   1314 		}
   1315 
   1316 	/* no matching entry found */
   1317 	return (0);
   1318 }
   1319 
   1320 static int
   1321 ip4f_init(void)
   1322 {
   1323 	struct ip4_frag *fp;
   1324 	int i;
   1325 
   1326 	TAILQ_INIT(&ip4f_list);
   1327 	for (i=0; i<IP4F_TABSIZE; i++) {
   1328 		fp = malloc(sizeof(struct ip4_frag), M_DEVBUF, M_NOWAIT);
   1329 		if (fp == NULL) {
   1330 			printf("ip4f_init: can't alloc %dth entry!\n", i);
   1331 			if (i == 0)
   1332 				return (-1);
   1333 			return (0);
   1334 		}
   1335 		fp->ip4f_valid = 0;
   1336 		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
   1337 	}
   1338 	return (0);
   1339 }
   1340 
   1341 static struct ip4_frag *
   1342 ip4f_alloc(void)
   1343 {
   1344 	struct ip4_frag *fp;
   1345 
   1346 	/* reclaim an entry at the tail, put it at the head */
   1347 	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
   1348 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
   1349 	fp->ip4f_valid = 1;
   1350 	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
   1351 	return (fp);
   1352 }
   1353 
   1354 static void
   1355 ip4f_free(fp)
   1356 	struct ip4_frag *fp;
   1357 {
   1358 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
   1359 	fp->ip4f_valid = 0;
   1360 	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
   1361 }
   1362 
   1363 /*
   1364  * read and write diffserv field in IPv4 or IPv6 header
   1365  */
   1366 u_int8_t
   1367 read_dsfield(m, pktattr)
   1368 	struct mbuf *m;
   1369 	struct altq_pktattr *pktattr;
   1370 {
   1371 	struct mbuf *m0;
   1372 	u_int8_t ds_field = 0;
   1373 
   1374 	if (pktattr == NULL ||
   1375 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
   1376 		return ((u_int8_t)0);
   1377 
   1378 	/* verify that pattr_hdr is within the mbuf data */
   1379 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
   1380 		if ((pktattr->pattr_hdr >= m0->m_data) &&
   1381 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
   1382 			break;
   1383 	if (m0 == NULL) {
   1384 		/* ick, pattr_hdr is stale */
   1385 		pktattr->pattr_af = AF_UNSPEC;
   1386 #ifdef ALTQ_DEBUG
   1387 		printf("read_dsfield: can't locate header!\n");
   1388 #endif
   1389 		return ((u_int8_t)0);
   1390 	}
   1391 
   1392 	if (pktattr->pattr_af == AF_INET) {
   1393 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
   1394 
   1395 		if (ip->ip_v != 4)
   1396 			return ((u_int8_t)0);	/* version mismatch! */
   1397 		ds_field = ip->ip_tos;
   1398 	}
   1399 #ifdef INET6
   1400 	else if (pktattr->pattr_af == AF_INET6) {
   1401 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1402 		u_int32_t flowlabel;
   1403 
   1404 		flowlabel = ntohl(ip6->ip6_flow);
   1405 		if ((flowlabel >> 28) != 6)
   1406 			return ((u_int8_t)0);	/* version mismatch! */
   1407 		ds_field = (flowlabel >> 20) & 0xff;
   1408 	}
   1409 #endif
   1410 	return (ds_field);
   1411 }
   1412 
   1413 void
   1414 write_dsfield(m, pktattr, dsfield)
   1415 	struct mbuf *m;
   1416 	struct altq_pktattr *pktattr;
   1417 	u_int8_t dsfield;
   1418 {
   1419 	struct mbuf *m0;
   1420 
   1421 	if (pktattr == NULL ||
   1422 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
   1423 		return;
   1424 
   1425 	/* verify that pattr_hdr is within the mbuf data */
   1426 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
   1427 		if ((pktattr->pattr_hdr >= m0->m_data) &&
   1428 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
   1429 			break;
   1430 	if (m0 == NULL) {
   1431 		/* ick, pattr_hdr is stale */
   1432 		pktattr->pattr_af = AF_UNSPEC;
   1433 #ifdef ALTQ_DEBUG
   1434 		printf("write_dsfield: can't locate header!\n");
   1435 #endif
   1436 		return;
   1437 	}
   1438 
   1439 	if (pktattr->pattr_af == AF_INET) {
   1440 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
   1441 		u_int8_t old;
   1442 		int32_t sum;
   1443 
   1444 		if (ip->ip_v != 4)
   1445 			return;		/* version mismatch! */
   1446 		old = ip->ip_tos;
   1447 		dsfield |= old & 3;	/* leave CU bits */
   1448 		if (old == dsfield)
   1449 			return;
   1450 		ip->ip_tos = dsfield;
   1451 		/*
   1452 		 * update checksum (from RFC1624)
   1453 		 *	   HC' = ~(~HC + ~m + m')
   1454 		 */
   1455 		sum = ~ntohs(ip->ip_sum) & 0xffff;
   1456 		sum += 0xff00 + (~old & 0xff) + dsfield;
   1457 		sum = (sum >> 16) + (sum & 0xffff);
   1458 		sum += (sum >> 16);  /* add carry */
   1459 
   1460 		ip->ip_sum = htons(~sum & 0xffff);
   1461 	}
   1462 #ifdef INET6
   1463 	else if (pktattr->pattr_af == AF_INET6) {
   1464 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1465 		u_int32_t flowlabel;
   1466 
   1467 		flowlabel = ntohl(ip6->ip6_flow);
   1468 		if ((flowlabel >> 28) != 6)
   1469 			return;		/* version mismatch! */
   1470 		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
   1471 		ip6->ip6_flow = htonl(flowlabel);
   1472 	}
   1473 #endif
   1474 	return;
   1475 }
   1476 
   1477 
   1478 /*
   1479  * high resolution clock support taking advantage of a machine dependent
   1480  * high resolution time counter (e.g., timestamp counter of intel pentium).
   1481  * we assume
   1482  *  - 64-bit-long monotonically-increasing counter
   1483  *  - frequency range is 100M-4GHz (CPU speed)
   1484  */
   1485 u_int32_t machclk_freq = 0;
   1486 u_int32_t machclk_per_tick = 0;
   1487 
   1488 #if (defined(__i386__) || defined(__alpha__)) && !defined(ALTQ_NOPCC)
   1489 
   1490 #if defined(__FreeBSD__) && defined(SMP)
   1491 #error SMP system!  use ALTQ_NOPCC option.
   1492 #endif
   1493 
   1494 #ifdef __alpha__
   1495 #ifdef __FreeBSD__
   1496 extern u_int32_t cycles_per_sec;	/* alpha CPU clock frequency */
   1497 #elif defined(__NetBSD__) || defined(__OpenBSD__)
   1498 extern u_int64_t cycles_per_usec;	/* alpha CPU clock frequency */
   1499 #endif
   1500 #endif /* __alpha__ */
   1501 
   1502 void
   1503 init_machclk(void)
   1504 {
   1505 	/* sanity check */
   1506 #ifdef __i386__
   1507 	/* check if TSC is available */
   1508 	if ((cpu_feature & CPUID_TSC) == 0) {
   1509 		printf("altq: TSC isn't available! use ALTQ_NOPCC option.\n");
   1510 		return;
   1511 	}
   1512 #endif
   1513 
   1514 	/*
   1515 	 * if the clock frequency (of Pentium TSC or Alpha PCC) is
   1516 	 * accessible, just use it.
   1517 	 */
   1518 #ifdef __i386__
   1519 #ifdef __FreeBSD__
   1520 #if (__FreeBSD_version > 300000)
   1521 	machclk_freq = tsc_freq;
   1522 #else
   1523 	machclk_freq = i586_ctr_freq;
   1524 #endif
   1525 #elif defined(__NetBSD__)
   1526 	machclk_freq = (u_int32_t)curcpu()->ci_tsc_freq;
   1527 #elif defined(__OpenBSD__)
   1528 	machclk_freq = pentium_mhz * 1000000;
   1529 #endif
   1530 #elif defined(__alpha__)
   1531 #ifdef __FreeBSD__
   1532 	machclk_freq = cycles_per_sec;
   1533 #elif defined(__NetBSD__) || defined(__OpenBSD__)
   1534 	machclk_freq = (u_int32_t)(cycles_per_usec * 1000000);
   1535 #endif
   1536 #endif /* __alpha__ */
   1537 
   1538 	/*
   1539 	 * if we don't know the clock frequency, measure it.
   1540 	 */
   1541 	if (machclk_freq == 0) {
   1542 		static int	wait;
   1543 		struct timeval	tv_start, tv_end;
   1544 		u_int64_t	start, end, diff;
   1545 		int		timo;
   1546 
   1547 		microtime(&tv_start);
   1548 		start = read_machclk();
   1549 		timo = hz;	/* 1 sec */
   1550 		(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
   1551 		microtime(&tv_end);
   1552 		end = read_machclk();
   1553 		diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
   1554 		    + tv_end.tv_usec - tv_start.tv_usec;
   1555 		if (diff != 0)
   1556 			machclk_freq = (u_int)((end - start) * 1000000 / diff);
   1557 	}
   1558 
   1559 	machclk_per_tick = machclk_freq / hz;
   1560 
   1561 #ifdef ALTQ_DEBUG
   1562 	printf("altq: CPU clock: %uHz\n", machclk_freq);
   1563 #endif
   1564 }
   1565 
   1566 #ifdef __alpha__
   1567 /*
   1568  * make a 64bit counter value out of the 32bit alpha processor cycle counter.
   1569  * read_machclk must be called within a half of its wrap-around cycle
   1570  * (about 5 sec for 400MHz CPU) to properly detect a counter wrap-around.
   1571  * tbr_timeout calls read_machclk once a second.
   1572  */
   1573 u_int64_t
   1574 read_machclk(void)
   1575 {
   1576 	static u_int32_t last_pcc, upper;
   1577 	u_int32_t pcc;
   1578 
   1579 	pcc = (u_int32_t)alpha_rpcc();
   1580 	if (pcc <= last_pcc)
   1581 		upper++;
   1582 	last_pcc = pcc;
   1583 	return (((u_int64_t)upper << 32) + pcc);
   1584 }
   1585 #endif /* __alpha__ */
   1586 #else /* !i386  && !alpha */
   1587 /* use microtime() for now */
   1588 void
   1589 init_machclk(void)
   1590 {
   1591 	machclk_freq = 1000000 << MACHCLK_SHIFT;
   1592 	machclk_per_tick = machclk_freq / hz;
   1593 	printf("altq: emulate %uHz CPU clock\n", machclk_freq);
   1594 }
   1595 #endif /* !i386 && !alpha */
   1596