Home | History | Annotate | Line # | Download | only in altq
altq_subr.c revision 1.5.2.2
      1 /*	$NetBSD: altq_subr.c,v 1.5.2.2 2002/01/10 19:35:58 thorpej Exp $	*/
      2 /*	$KAME: altq_subr.c,v 1.9 2001/09/04 06:31:15 kjc Exp $	*/
      3 
      4 /*
      5  * Copyright (C) 1997-2000
      6  *	Sony Computer Science Laboratories Inc.  All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
     18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
     21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27  * SUCH DAMAGE.
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: altq_subr.c,v 1.5.2.2 2002/01/10 19:35:58 thorpej Exp $");
     32 
     33 #if defined(__FreeBSD__) || defined(__NetBSD__)
     34 #include "opt_altq.h"
     35 #if (__FreeBSD__ != 2)
     36 #include "opt_inet.h"
     37 #ifdef __FreeBSD__
     38 #include "opt_inet6.h"
     39 #endif
     40 #endif
     41 #endif /* __FreeBSD__ || __NetBSD__ */
     42 
     43 #include <sys/param.h>
     44 #include <sys/malloc.h>
     45 #include <sys/mbuf.h>
     46 #include <sys/systm.h>
     47 #include <sys/proc.h>
     48 #include <sys/socket.h>
     49 #include <sys/socketvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/errno.h>
     52 #include <sys/syslog.h>
     53 #include <sys/sysctl.h>
     54 #include <sys/queue.h>
     55 
     56 #include <net/if.h>
     57 #include <net/if_dl.h>
     58 #include <net/if_types.h>
     59 
     60 #include <netinet/in.h>
     61 #include <netinet/in_systm.h>
     62 #include <netinet/ip.h>
     63 #ifdef INET6
     64 #include <netinet/ip6.h>
     65 #endif
     66 #include <netinet/tcp.h>
     67 #include <netinet/udp.h>
     68 
     69 #include <altq/altq.h>
     70 #include <altq/altq_conf.h>
     71 
     72 #ifdef __FreeBSD__
     73 #include "opt_cpu.h"	/* for FreeBSD-2.2.8 to get i586_ctr_freq */
     74 #include <machine/clock.h>
     75 #endif
     76 
     77 /*
     78  * internal function prototypes
     79  */
     80 static void	tbr_timeout __P((void *));
     81 static int 	extract_ports4 __P((struct mbuf *, struct ip *,
     82 				    struct flowinfo_in *));
     83 #ifdef INET6
     84 static int 	extract_ports6 __P((struct mbuf *, struct ip6_hdr *,
     85 				    struct flowinfo_in6 *));
     86 #endif
     87 static int	apply_filter4 __P((u_int32_t, struct flow_filter *,
     88 				   struct flowinfo_in *));
     89 static int	apply_ppfilter4 __P((u_int32_t, struct flow_filter *,
     90 				     struct flowinfo_in *));
     91 #ifdef INET6
     92 static int	apply_filter6 __P((u_int32_t, struct flow_filter6 *,
     93 					   struct flowinfo_in6 *));
     94 #endif
     95 static int	apply_tosfilter4 __P((u_int32_t, struct flow_filter *,
     96 					     struct flowinfo_in *));
     97 static u_long	get_filt_handle __P((struct acc_classifier *, int));
     98 static struct acc_filter *filth_to_filtp __P((struct acc_classifier *,
     99 					      u_long));
    100 static u_int32_t filt2fibmask __P((struct flow_filter *));
    101 
    102 static void 	ip4f_cache __P((struct ip *, struct flowinfo_in *));
    103 static int 	ip4f_lookup __P((struct ip *, struct flowinfo_in *));
    104 static int 	ip4f_init __P((void));
    105 static struct ip4_frag	*ip4f_alloc __P((void));
    106 static void 	ip4f_free __P((struct ip4_frag *));
    107 
    108 int (*altq_input) __P((struct mbuf *, int)) = NULL;
    109 static int tbr_timer = 0;	/* token bucket regulator timer */
    110 static struct callout tbr_callout = CALLOUT_INITIALIZER;
    111 
    112 /*
    113  * alternate queueing support routines
    114  */
    115 
    116 /* look up the queue state by the interface name and the queuing type. */
    117 void *
    118 altq_lookup(name, type)
    119 	char *name;
    120 	int type;
    121 {
    122 	struct ifnet *ifp;
    123 
    124 	if ((ifp = ifunit(name)) != NULL) {
    125 		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
    126 			return (ifp->if_snd.altq_disc);
    127 	}
    128 
    129 	return NULL;
    130 }
    131 
    132 int
    133 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
    134 	struct ifaltq *ifq;
    135 	int type;
    136 	void *discipline;
    137 	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
    138 	struct mbuf *(*dequeue)(struct ifaltq *, int);
    139 	int (*request)(struct ifaltq *, int, void *);
    140 	void *clfier;
    141 	void *(*classify)(void *, struct mbuf *, int);
    142 {
    143 	if (!ALTQ_IS_READY(ifq))
    144 		return ENXIO;
    145 	if (ALTQ_IS_ENABLED(ifq))
    146 		return EBUSY;
    147 	if (ALTQ_IS_ATTACHED(ifq))
    148 		return EEXIST;
    149 	ifq->altq_type     = type;
    150 	ifq->altq_disc     = discipline;
    151 	ifq->altq_enqueue  = enqueue;
    152 	ifq->altq_dequeue  = dequeue;
    153 	ifq->altq_request  = request;
    154 	ifq->altq_clfier   = clfier;
    155 	ifq->altq_classify = classify;
    156 	ifq->altq_flags &= ALTQF_CANTCHANGE;
    157 #ifdef ALTQ_KLD
    158 	altq_module_incref(type);
    159 #endif
    160 	return 0;
    161 }
    162 
    163 int
    164 altq_detach(ifq)
    165 	struct ifaltq *ifq;
    166 {
    167 	if (!ALTQ_IS_READY(ifq))
    168 		return ENXIO;
    169 	if (ALTQ_IS_ENABLED(ifq))
    170 		return EBUSY;
    171 	if (!ALTQ_IS_ATTACHED(ifq))
    172 		return (0);
    173 
    174 #ifdef ALTQ_KLD
    175 	altq_module_declref(ifq->altq_type);
    176 #endif
    177 	ifq->altq_type     = ALTQT_NONE;
    178 	ifq->altq_disc     = NULL;
    179 	ifq->altq_enqueue  = NULL;
    180 	ifq->altq_dequeue  = NULL;
    181 	ifq->altq_request  = NULL;
    182 	ifq->altq_clfier   = NULL;
    183 	ifq->altq_classify = NULL;
    184 	ifq->altq_flags &= ALTQF_CANTCHANGE;
    185 	return 0;
    186 }
    187 
    188 int
    189 altq_enable(ifq)
    190 	struct ifaltq *ifq;
    191 {
    192 	int s;
    193 
    194 	if (!ALTQ_IS_READY(ifq))
    195 		return ENXIO;
    196 	if (ALTQ_IS_ENABLED(ifq))
    197 		return 0;
    198 
    199 	s = splnet();
    200 	IFQ_PURGE(ifq);
    201 	ASSERT(ifq->ifq_len == 0);
    202 	ifq->altq_flags |= ALTQF_ENABLED;
    203 	if (ifq->altq_clfier != NULL)
    204 		ifq->altq_flags |= ALTQF_CLASSIFY;
    205 	splx(s);
    206 
    207 	return 0;
    208 }
    209 
    210 int
    211 altq_disable(ifq)
    212 	struct ifaltq *ifq;
    213 {
    214 	int s;
    215 
    216 	if (!ALTQ_IS_ENABLED(ifq))
    217 		return 0;
    218 
    219 	s = splnet();
    220 	IFQ_PURGE(ifq);
    221 	ASSERT(ifq->ifq_len == 0);
    222 	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
    223 	splx(s);
    224 	return 0;
    225 }
    226 
    227 void
    228 altq_assert(file, line, failedexpr)
    229 	const char *file, *failedexpr;
    230 	int line;
    231 {
    232 	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
    233 		     failedexpr, file, line);
    234 	panic("altq assertion");
    235 	/* NOTREACHED */
    236 }
    237 
    238 /*
    239  * internal representation of token bucket parameters
    240  *	rate: 	byte_per_unittime << 32
    241  *		(((bits_per_sec) / 8) << 32) / machclk_freq
    242  *	depth:	byte << 32
    243  *
    244  */
    245 #define	TBR_SHIFT	32
    246 #define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
    247 #define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
    248 
    249 struct mbuf *
    250 tbr_dequeue(ifq, op)
    251 	struct ifaltq *ifq;
    252 	int op;
    253 {
    254 	struct tb_regulator *tbr;
    255 	struct mbuf *m;
    256 	int64_t interval;
    257 	u_int64_t now;
    258 
    259 	tbr = ifq->altq_tbr;
    260 	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
    261 		/* if this is a remove after poll, bypass tbr check */
    262 	} else {
    263 		/* update token only when it is negative */
    264 		if (tbr->tbr_token <= 0) {
    265 			now = read_machclk();
    266 			interval = now - tbr->tbr_last;
    267 			if (interval >= tbr->tbr_filluptime)
    268 				tbr->tbr_token = tbr->tbr_depth;
    269 			else {
    270 				tbr->tbr_token += interval * tbr->tbr_rate;
    271 				if (tbr->tbr_token > tbr->tbr_depth)
    272 					tbr->tbr_token = tbr->tbr_depth;
    273 			}
    274 			tbr->tbr_last = now;
    275 		}
    276 		/* if token is still negative, don't allow dequeue */
    277 		if (tbr->tbr_token <= 0)
    278 			return (NULL);
    279 	}
    280 
    281 	if (ALTQ_IS_ENABLED(ifq))
    282 		m = (*ifq->altq_dequeue)(ifq, op);
    283 	else {
    284 		if (op == ALTDQ_POLL)
    285 			IF_POLL(ifq, m);
    286 		else
    287 			IF_DEQUEUE(ifq, m);
    288 	}
    289 
    290 	if (m != NULL && op == ALTDQ_REMOVE)
    291 		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
    292 	tbr->tbr_lastop = op;
    293 	return (m);
    294 }
    295 
    296 /*
    297  * set a token bucket regulator.
    298  * if the specified rate is zero, the token bucket regulator is deleted.
    299  */
    300 int
    301 tbr_set(ifq, profile)
    302 	struct ifaltq *ifq;
    303 	struct tb_profile *profile;
    304 {
    305 	struct tb_regulator *tbr, *otbr;
    306 
    307 	if (machclk_freq == 0)
    308 		init_machclk();
    309 	if (machclk_freq == 0) {
    310 		printf("tbr_set: no cpu clock available!\n");
    311 		return (ENXIO);
    312 	}
    313 
    314 	if (profile->rate == 0) {
    315 		/* delete this tbr */
    316 		if ((tbr = ifq->altq_tbr) == NULL)
    317 			return (ENOENT);
    318 		ifq->altq_tbr = NULL;
    319 		FREE(tbr, M_DEVBUF);
    320 		return (0);
    321 	}
    322 
    323 	MALLOC(tbr, struct tb_regulator *, sizeof(struct tb_regulator),
    324 	       M_DEVBUF, M_WAITOK);
    325 	if (tbr == NULL)
    326 		return (ENOMEM);
    327 	bzero(tbr, sizeof(struct tb_regulator));
    328 
    329 	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
    330 	tbr->tbr_depth = TBR_SCALE(profile->depth);
    331 	if (tbr->tbr_rate > 0)
    332 		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
    333 	else
    334 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
    335 	tbr->tbr_token = tbr->tbr_depth;
    336 	tbr->tbr_last = read_machclk();
    337 	tbr->tbr_lastop = ALTDQ_REMOVE;
    338 
    339 	otbr = ifq->altq_tbr;
    340 	ifq->altq_tbr = tbr;	/* set the new tbr */
    341 
    342 	if (otbr != NULL)
    343 		FREE(otbr, M_DEVBUF);
    344 	else {
    345 		if (tbr_timer == 0) {
    346 			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
    347 			tbr_timer = 1;
    348 		}
    349 	}
    350 	return (0);
    351 }
    352 
    353 /*
    354  * tbr_timeout goes through the interface list, and kicks the drivers
    355  * if necessary.
    356  */
    357 static void
    358 tbr_timeout(arg)
    359 	void *arg;
    360 {
    361 	struct ifnet *ifp;
    362 	int active, s;
    363 
    364 	active = 0;
    365 	s = splnet();
    366 #ifdef __FreeBSD__
    367 #if (__FreeBSD_version < 300000)
    368 	for (ifp = ifnet; ifp; ifp = ifp->if_next)
    369 #else
    370 	for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_link.tqe_next)
    371 #endif
    372 #else /* !FreeBSD */
    373 	for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_list.tqe_next)
    374 #endif
    375 	{
    376 		if (!TBR_IS_ENABLED(&ifp->if_snd))
    377 			continue;
    378 		active++;
    379 		if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL)
    380 			(*ifp->if_start)(ifp);
    381 	}
    382 	splx(s);
    383 	if (active > 0)
    384 		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
    385 	else
    386 		tbr_timer = 0;	/* don't need tbr_timer anymore */
    387 #if defined(__alpha__) && !defined(ALTQ_NOPCC)
    388 	{
    389 		/*
    390 		 * XXX read out the machine dependent clock once a second
    391 		 * to detect counter wrap-around.
    392 		 */
    393 		static u_int cnt;
    394 
    395 		if (++cnt >= hz) {
    396 			(void)read_machclk();
    397 			cnt = 0;
    398 		}
    399 	}
    400 #endif /* __alpha__ && !ALTQ_NOPCC */
    401 }
    402 
    403 /*
    404  * get token bucket regulator profile
    405  */
    406 int
    407 tbr_get(ifq, profile)
    408 	struct ifaltq *ifq;
    409 	struct tb_profile *profile;
    410 {
    411 	struct tb_regulator *tbr;
    412 
    413 	if ((tbr = ifq->altq_tbr) == NULL) {
    414 		profile->rate = 0;
    415 		profile->depth = 0;
    416 	} else {
    417 		profile->rate =
    418 		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
    419 		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
    420 	}
    421 	return (0);
    422 }
    423 
    424 
    425 #ifndef IPPROTO_ESP
    426 #define	IPPROTO_ESP	50		/* encapsulating security payload */
    427 #endif
    428 #ifndef IPPROTO_AH
    429 #define	IPPROTO_AH	51		/* authentication header */
    430 #endif
    431 
    432 /*
    433  * extract flow information from a given packet.
    434  * filt_mask shows flowinfo fields required.
    435  * we assume the ip header is in one mbuf, and addresses and ports are
    436  * in network byte order.
    437  */
    438 int
    439 altq_extractflow(m, af, flow, filt_bmask)
    440 	struct mbuf *m;
    441 	int af;
    442 	struct flowinfo *flow;
    443 	u_int32_t	filt_bmask;
    444 {
    445 
    446 	switch (af) {
    447 	case PF_INET: {
    448 		struct flowinfo_in *fin;
    449 		struct ip *ip;
    450 
    451 		ip = mtod(m, struct ip *);
    452 
    453 		if (ip->ip_v != 4)
    454 			break;
    455 
    456 		fin = (struct flowinfo_in *)flow;
    457 		fin->fi_len = sizeof(struct flowinfo_in);
    458 		fin->fi_family = AF_INET;
    459 
    460 		fin->fi_proto = ip->ip_p;
    461 		fin->fi_tos = ip->ip_tos;
    462 
    463 		fin->fi_src.s_addr = ip->ip_src.s_addr;
    464 		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
    465 
    466 		if (filt_bmask & FIMB4_PORTS)
    467 			/* if port info is required, extract port numbers */
    468 			extract_ports4(m, ip, fin);
    469 		else {
    470 			fin->fi_sport = 0;
    471 			fin->fi_dport = 0;
    472 			fin->fi_gpi = 0;
    473 		}
    474 		return (1);
    475 	}
    476 
    477 #ifdef INET6
    478 	case PF_INET6: {
    479 		struct flowinfo_in6 *fin6;
    480 		struct ip6_hdr *ip6;
    481 
    482 		ip6 = mtod(m, struct ip6_hdr *);
    483 		/* should we check the ip version? */
    484 
    485 		fin6 = (struct flowinfo_in6 *)flow;
    486 		fin6->fi6_len = sizeof(struct flowinfo_in6);
    487 		fin6->fi6_family = AF_INET6;
    488 
    489 		fin6->fi6_proto = ip6->ip6_nxt;
    490 		fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    491 
    492 		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
    493 		fin6->fi6_src = ip6->ip6_src;
    494 		fin6->fi6_dst = ip6->ip6_dst;
    495 
    496 		if ((filt_bmask & FIMB6_PORTS) ||
    497 		    ((filt_bmask & FIMB6_PROTO)
    498 		     && ip6->ip6_nxt > IPPROTO_IPV6))
    499 			/*
    500 			 * if port info is required, or proto is required
    501 			 * but there are option headers, extract port
    502 			 * and protocol numbers.
    503 			 */
    504 			extract_ports6(m, ip6, fin6);
    505 		else {
    506 			fin6->fi6_sport = 0;
    507 			fin6->fi6_dport = 0;
    508 			fin6->fi6_gpi = 0;
    509 		}
    510 		return (1);
    511 	}
    512 #endif /* INET6 */
    513 
    514 	default:
    515 		break;
    516 	}
    517 
    518 	/* failed */
    519 	flow->fi_len = sizeof(struct flowinfo);
    520 	flow->fi_family = AF_UNSPEC;
    521 	return (0);
    522 }
    523 
    524 /*
    525  * helper routine to extract port numbers
    526  */
    527 /* structure for ipsec and ipv6 option header template */
    528 struct _opt6 {
    529 	u_int8_t	opt6_nxt;	/* next header */
    530 	u_int8_t	opt6_hlen;	/* header extension length */
    531 	u_int16_t	_pad;
    532 	u_int32_t	ah_spi;		/* security parameter index
    533 					   for authentication header */
    534 };
    535 
    536 /*
    537  * extract port numbers from a ipv4 packet.
    538  */
    539 static int
    540 extract_ports4(m, ip, fin)
    541 	struct mbuf *m;
    542 	struct ip *ip;
    543 	struct flowinfo_in *fin;
    544 {
    545 	struct mbuf *m0;
    546 	u_short ip_off;
    547 	u_int8_t proto;
    548 	int 	off;
    549 
    550 	fin->fi_sport = 0;
    551 	fin->fi_dport = 0;
    552 	fin->fi_gpi = 0;
    553 
    554 	ip_off = ntohs(ip->ip_off);
    555 	/* if it is a fragment, try cached fragment info */
    556 	if (ip_off & IP_OFFMASK) {
    557 		ip4f_lookup(ip, fin);
    558 		return (1);
    559 	}
    560 
    561 	/* locate the mbuf containing the protocol header */
    562 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
    563 		if (((caddr_t)ip >= m0->m_data) &&
    564 		    ((caddr_t)ip < m0->m_data + m0->m_len))
    565 			break;
    566 	if (m0 == NULL) {
    567 #ifdef ALTQ_DEBUG
    568 		printf("extract_ports4: can't locate header! ip=%p\n", ip);
    569 #endif
    570 		return (0);
    571 	}
    572 	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
    573 	proto = ip->ip_p;
    574 
    575 #ifdef ALTQ_IPSEC
    576  again:
    577 #endif
    578 	while (off >= m0->m_len) {
    579 		off -= m0->m_len;
    580 		m0 = m0->m_next;
    581 	}
    582 	ASSERT(m0->m_len >= off + 4);
    583 
    584 	switch (proto) {
    585 	case IPPROTO_TCP:
    586 	case IPPROTO_UDP: {
    587 		struct udphdr *udp;
    588 
    589 		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
    590 		fin->fi_sport = udp->uh_sport;
    591 		fin->fi_dport = udp->uh_dport;
    592 		fin->fi_proto = proto;
    593 		}
    594 		break;
    595 
    596 #ifdef ALTQ_IPSEC
    597 	case IPPROTO_ESP:
    598 		if (fin->fi_gpi == 0){
    599 			u_int32_t *gpi;
    600 
    601 			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
    602 			fin->fi_gpi   = *gpi;
    603 		}
    604 		fin->fi_proto = proto;
    605 		break;
    606 
    607 	case IPPROTO_AH: {
    608 			/* get next header and header length */
    609 			struct _opt6 *opt6;
    610 
    611 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
    612 			proto = opt6->opt6_nxt;
    613 			off += 8 + (opt6->opt6_hlen * 4);
    614 			if (fin->fi_gpi == 0)
    615 				fin->fi_gpi = opt6->ah_spi;
    616 		}
    617 		/* goto the next header */
    618 		goto again;
    619 #endif  /* ALTQ_IPSEC */
    620 
    621 	default:
    622 		fin->fi_proto = proto;
    623 		return (0);
    624 	}
    625 
    626 	/* if this is a first fragment, cache it. */
    627 	if (ip_off & IP_MF)
    628 		ip4f_cache(ip, fin);
    629 
    630 	return (1);
    631 }
    632 
    633 #ifdef INET6
    634 static int
    635 extract_ports6(m, ip6, fin6)
    636 	struct mbuf *m;
    637 	struct ip6_hdr *ip6;
    638 	struct flowinfo_in6 *fin6;
    639 {
    640 	struct mbuf *m0;
    641 	int	off;
    642 	u_int8_t proto;
    643 
    644 	fin6->fi6_gpi   = 0;
    645 	fin6->fi6_sport = 0;
    646 	fin6->fi6_dport = 0;
    647 
    648 	/* locate the mbuf containing the protocol header */
    649 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
    650 		if (((caddr_t)ip6 >= m0->m_data) &&
    651 		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
    652 			break;
    653 	if (m0 == NULL) {
    654 #ifdef ALTQ_DEBUG
    655 		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
    656 #endif
    657 		return (0);
    658 	}
    659 	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
    660 
    661 	proto = ip6->ip6_nxt;
    662 	do {
    663 		while (off >= m0->m_len) {
    664 			off -= m0->m_len;
    665 			m0 = m0->m_next;
    666 		}
    667 		ASSERT(m0->m_len >= off + 4);
    668 
    669 		switch (proto) {
    670 		case IPPROTO_TCP:
    671 		case IPPROTO_UDP: {
    672 			struct udphdr *udp;
    673 
    674 			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
    675 			fin6->fi6_sport = udp->uh_sport;
    676 			fin6->fi6_dport = udp->uh_dport;
    677 			fin6->fi6_proto = proto;
    678 			}
    679 			return (1);
    680 
    681 		case IPPROTO_ESP:
    682 			if (fin6->fi6_gpi == 0) {
    683 				u_int32_t *gpi;
    684 
    685 				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
    686 				fin6->fi6_gpi   = *gpi;
    687 			}
    688 			fin6->fi6_proto = proto;
    689 			return (1);
    690 
    691 		case IPPROTO_AH: {
    692 			/* get next header and header length */
    693 			struct _opt6 *opt6;
    694 
    695 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
    696 			if (fin6->fi6_gpi == 0)
    697 				fin6->fi6_gpi = opt6->ah_spi;
    698 			proto = opt6->opt6_nxt;
    699 			off += 8 + (opt6->opt6_hlen * 4);
    700 			/* goto the next header */
    701 			break;
    702 			}
    703 
    704 		case IPPROTO_HOPOPTS:
    705 		case IPPROTO_ROUTING:
    706 		case IPPROTO_DSTOPTS: {
    707 			/* get next header and header length */
    708 			struct _opt6 *opt6;
    709 
    710 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
    711 			proto = opt6->opt6_nxt;
    712 			off += (opt6->opt6_hlen + 1) * 8;
    713 			/* goto the next header */
    714 			break;
    715 			}
    716 
    717 		case IPPROTO_FRAGMENT:
    718 			/* ipv6 fragmentations are not supported yet */
    719 		default:
    720 			fin6->fi6_proto = proto;
    721 			return (0);
    722 		}
    723 	} while (1);
    724 	/*NOTREACHED*/
    725 }
    726 #endif /* INET6 */
    727 
    728 /*
    729  * altq common classifier
    730  */
    731 int
    732 acc_add_filter(classifier, filter, class, phandle)
    733 	struct acc_classifier *classifier;
    734 	struct flow_filter *filter;
    735 	void	*class;
    736 	u_long	*phandle;
    737 {
    738 	struct acc_filter *afp, *prev, *tmp;
    739 	int	i, s;
    740 
    741 #ifdef INET6
    742 	if (filter->ff_flow.fi_family != AF_INET &&
    743 	    filter->ff_flow.fi_family != AF_INET6)
    744 		return (EINVAL);
    745 #else
    746 	if (filter->ff_flow.fi_family != AF_INET)
    747 		return (EINVAL);
    748 #endif
    749 
    750 	MALLOC(afp, struct acc_filter *, sizeof(struct acc_filter),
    751 	       M_DEVBUF, M_WAITOK);
    752 	if (afp == NULL)
    753 		return (ENOMEM);
    754 	bzero(afp, sizeof(struct acc_filter));
    755 
    756 	afp->f_filter = *filter;
    757 	afp->f_class = class;
    758 
    759 	i = ACC_WILDCARD_INDEX;
    760 	if (filter->ff_flow.fi_family == AF_INET) {
    761 		struct flow_filter *filter4 = &afp->f_filter;
    762 
    763 		/*
    764 		 * if address is 0, it's a wildcard.  if address mask
    765 		 * isn't set, use full mask.
    766 		 */
    767 		if (filter4->ff_flow.fi_dst.s_addr == 0)
    768 			filter4->ff_mask.mask_dst.s_addr = 0;
    769 		else if (filter4->ff_mask.mask_dst.s_addr == 0)
    770 			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
    771 		if (filter4->ff_flow.fi_src.s_addr == 0)
    772 			filter4->ff_mask.mask_src.s_addr = 0;
    773 		else if (filter4->ff_mask.mask_src.s_addr == 0)
    774 			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
    775 
    776 		/* clear extra bits in addresses  */
    777 		   filter4->ff_flow.fi_dst.s_addr &=
    778 		       filter4->ff_mask.mask_dst.s_addr;
    779 		   filter4->ff_flow.fi_src.s_addr &=
    780 		       filter4->ff_mask.mask_src.s_addr;
    781 
    782 		/*
    783 		 * if dst address is a wildcard, use hash-entry
    784 		 * ACC_WILDCARD_INDEX.
    785 		 */
    786 		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
    787 			i = ACC_WILDCARD_INDEX;
    788 		else
    789 			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
    790 	}
    791 #ifdef INET6
    792 	else if (filter->ff_flow.fi_family == AF_INET6) {
    793 		struct flow_filter6 *filter6 =
    794 			(struct flow_filter6 *)&afp->f_filter;
    795 #ifndef IN6MASK0 /* taken from kame ipv6 */
    796 #define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
    797 #define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
    798 		const struct in6_addr in6mask0 = IN6MASK0;
    799 		const struct in6_addr in6mask128 = IN6MASK128;
    800 #endif
    801 
    802 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
    803 			filter6->ff_mask6.mask6_dst = in6mask0;
    804 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
    805 			filter6->ff_mask6.mask6_dst = in6mask128;
    806 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
    807 			filter6->ff_mask6.mask6_src = in6mask0;
    808 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
    809 			filter6->ff_mask6.mask6_src = in6mask128;
    810 
    811 		/* clear extra bits in addresses  */
    812 		for (i = 0; i < 16; i++)
    813 			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
    814 			    filter6->ff_mask6.mask6_dst.s6_addr[i];
    815 		for (i = 0; i < 16; i++)
    816 			filter6->ff_flow6.fi6_src.s6_addr[i] &=
    817 			    filter6->ff_mask6.mask6_src.s6_addr[i];
    818 
    819 		if (filter6->ff_flow6.fi6_flowlabel == 0)
    820 			i = ACC_WILDCARD_INDEX;
    821 		else
    822 			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
    823 	}
    824 #endif /* INET6 */
    825 
    826 	afp->f_handle = get_filt_handle(classifier, i);
    827 
    828 	/* update filter bitmask */
    829 	afp->f_fbmask = filt2fibmask(filter);
    830 	classifier->acc_fbmask |= afp->f_fbmask;
    831 
    832 	/*
    833 	 * add this filter to the filter list.
    834 	 * filters are ordered from the highest rule number.
    835 	 */
    836 	s = splnet();
    837 	prev = NULL;
    838 	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
    839 		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
    840 			prev = tmp;
    841 		else
    842 			break;
    843 	}
    844 	if (prev == NULL)
    845 		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
    846 	else
    847 		LIST_INSERT_AFTER(prev, afp, f_chain);
    848 	splx(s);
    849 
    850 	*phandle = afp->f_handle;
    851 	return (0);
    852 }
    853 
    854 int
    855 acc_delete_filter(classifier, handle)
    856 	struct acc_classifier *classifier;
    857 	u_long handle;
    858 {
    859 	struct acc_filter *afp;
    860 	int	s;
    861 
    862 	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
    863 		return (EINVAL);
    864 
    865 	s = splnet();
    866 	LIST_REMOVE(afp, f_chain);
    867 	splx(s);
    868 
    869 	FREE(afp, M_DEVBUF);
    870 
    871 	/* todo: update filt_bmask */
    872 
    873 	return (0);
    874 }
    875 
    876 /*
    877  * delete filters referencing to the specified class.
    878  * if the all flag is not 0, delete all the filters.
    879  */
    880 int
    881 acc_discard_filters(classifier, class, all)
    882 	struct acc_classifier *classifier;
    883 	void	*class;
    884 	int	all;
    885 {
    886 	struct acc_filter *afp;
    887 	int	i, s;
    888 
    889 	s = splnet();
    890 	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
    891 		do {
    892 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
    893 				if (all || afp->f_class == class) {
    894 					LIST_REMOVE(afp, f_chain);
    895 					FREE(afp, M_DEVBUF);
    896 					/* start again from the head */
    897 					break;
    898 				}
    899 		} while (afp != NULL);
    900 	}
    901 	splx(s);
    902 
    903 	if (all)
    904 		classifier->acc_fbmask = 0;
    905 
    906 	return (0);
    907 }
    908 
    909 void *
    910 acc_classify(clfier, m, af)
    911 	void *clfier;
    912 	struct mbuf *m;
    913 	int af;
    914 {
    915 	struct acc_classifier *classifier;
    916 	struct flowinfo flow;
    917 	struct acc_filter *afp;
    918 	int	i;
    919 
    920 	classifier = (struct acc_classifier *)clfier;
    921 	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
    922 
    923 	if (flow.fi_family == AF_INET) {
    924 		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
    925 
    926 		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
    927 			/* only tos is used */
    928 			LIST_FOREACH(afp,
    929 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
    930 				 f_chain)
    931 				if (apply_tosfilter4(afp->f_fbmask,
    932 						     &afp->f_filter, fp))
    933 					/* filter matched */
    934 					return (afp->f_class);
    935 		} else if ((classifier->acc_fbmask &
    936 			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
    937 		    == 0) {
    938 			/* only proto and ports are used */
    939 			LIST_FOREACH(afp,
    940 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
    941 				 f_chain)
    942 				if (apply_ppfilter4(afp->f_fbmask,
    943 						    &afp->f_filter, fp))
    944 					/* filter matched */
    945 					return (afp->f_class);
    946 		} else {
    947 			/* get the filter hash entry from its dest address */
    948 			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
    949 			do {
    950 				/*
    951 				 * go through this loop twice.  first for dst
    952 				 * hash, second for wildcards.
    953 				 */
    954 				LIST_FOREACH(afp, &classifier->acc_filters[i],
    955 					     f_chain)
    956 					if (apply_filter4(afp->f_fbmask,
    957 							  &afp->f_filter, fp))
    958 						/* filter matched */
    959 						return (afp->f_class);
    960 
    961 				/*
    962 				 * check again for filters with a dst addr
    963 				 * wildcard.
    964 				 * (daddr == 0 || dmask != 0xffffffff).
    965 				 */
    966 				if (i != ACC_WILDCARD_INDEX)
    967 					i = ACC_WILDCARD_INDEX;
    968 				else
    969 					break;
    970 			} while (1);
    971 		}
    972 	}
    973 #ifdef INET6
    974 	else if (flow.fi_family == AF_INET6) {
    975 		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
    976 
    977 		/* get the filter hash entry from its flow ID */
    978 		if (fp6->fi6_flowlabel != 0)
    979 			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
    980 		else
    981 			/* flowlable can be zero */
    982 			i = ACC_WILDCARD_INDEX;
    983 
    984 		/* go through this loop twice.  first for flow hash, second
    985 		   for wildcards. */
    986 		do {
    987 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
    988 				if (apply_filter6(afp->f_fbmask,
    989 					(struct flow_filter6 *)&afp->f_filter,
    990 					fp6))
    991 					/* filter matched */
    992 					return (afp->f_class);
    993 
    994 			/*
    995 			 * check again for filters with a wildcard.
    996 			 */
    997 			if (i != ACC_WILDCARD_INDEX)
    998 				i = ACC_WILDCARD_INDEX;
    999 			else
   1000 				break;
   1001 		} while (1);
   1002 	}
   1003 #endif /* INET6 */
   1004 
   1005 	/* no filter matched */
   1006 	return (NULL);
   1007 }
   1008 
   1009 static int
   1010 apply_filter4(fbmask, filt, pkt)
   1011 	u_int32_t	fbmask;
   1012 	struct flow_filter *filt;
   1013 	struct flowinfo_in *pkt;
   1014 {
   1015 	if (filt->ff_flow.fi_family != AF_INET)
   1016 		return (0);
   1017 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
   1018 		return (0);
   1019 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
   1020 		return (0);
   1021 	if ((fbmask & FIMB4_DADDR) &&
   1022 	    filt->ff_flow.fi_dst.s_addr !=
   1023 	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
   1024 		return (0);
   1025 	if ((fbmask & FIMB4_SADDR) &&
   1026 	    filt->ff_flow.fi_src.s_addr !=
   1027 	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
   1028 		return (0);
   1029 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
   1030 		return (0);
   1031 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
   1032 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
   1033 		return (0);
   1034 	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
   1035 		return (0);
   1036 	/* match */
   1037 	return (1);
   1038 }
   1039 
   1040 /*
   1041  * filter matching function optimized for a common case that checks
   1042  * only protocol and port numbers
   1043  */
   1044 static int
   1045 apply_ppfilter4(fbmask, filt, pkt)
   1046 	u_int32_t	fbmask;
   1047 	struct flow_filter *filt;
   1048 	struct flowinfo_in *pkt;
   1049 {
   1050 	if (filt->ff_flow.fi_family != AF_INET)
   1051 		return (0);
   1052 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
   1053 		return (0);
   1054 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
   1055 		return (0);
   1056 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
   1057 		return (0);
   1058 	/* match */
   1059 	return (1);
   1060 }
   1061 
   1062 /*
   1063  * filter matching function only for tos field.
   1064  */
   1065 static int
   1066 apply_tosfilter4(fbmask, filt, pkt)
   1067 	u_int32_t	fbmask;
   1068 	struct flow_filter *filt;
   1069 	struct flowinfo_in *pkt;
   1070 {
   1071 	if (filt->ff_flow.fi_family != AF_INET)
   1072 		return (0);
   1073 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
   1074 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
   1075 		return (0);
   1076 	/* match */
   1077 	return (1);
   1078 }
   1079 
   1080 #ifdef INET6
   1081 static int
   1082 apply_filter6(fbmask, filt, pkt)
   1083 	u_int32_t	fbmask;
   1084 	struct flow_filter6 *filt;
   1085 	struct flowinfo_in6 *pkt;
   1086 {
   1087 	int i;
   1088 
   1089 	if (filt->ff_flow6.fi6_family != AF_INET6)
   1090 		return (0);
   1091 	if ((fbmask & FIMB6_FLABEL) &&
   1092 	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
   1093 		return (0);
   1094 	if ((fbmask & FIMB6_PROTO) &&
   1095 	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
   1096 		return (0);
   1097 	if ((fbmask & FIMB6_SPORT) &&
   1098 	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
   1099 		return (0);
   1100 	if ((fbmask & FIMB6_DPORT) &&
   1101 	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
   1102 		return (0);
   1103 	if (fbmask & FIMB6_SADDR) {
   1104 		for (i = 0; i < 4; i++)
   1105 			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
   1106 			    (pkt->fi6_src.s6_addr32[i] &
   1107 			     filt->ff_mask6.mask6_src.s6_addr32[i]))
   1108 				return (0);
   1109 	}
   1110 	if (fbmask & FIMB6_DADDR) {
   1111 		for (i = 0; i < 4; i++)
   1112 			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
   1113 			    (pkt->fi6_dst.s6_addr32[i] &
   1114 			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
   1115 				return (0);
   1116 	}
   1117 	if ((fbmask & FIMB6_TCLASS) &&
   1118 	    filt->ff_flow6.fi6_tclass !=
   1119 	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
   1120 		return (0);
   1121 	if ((fbmask & FIMB6_GPI) &&
   1122 	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
   1123 		return (0);
   1124 	/* match */
   1125 	return (1);
   1126 }
   1127 #endif /* INET6 */
   1128 
   1129 /*
   1130  *  filter handle:
   1131  *	bit 20-28: index to the filter hash table
   1132  *	bit  0-19: unique id in the hash bucket.
   1133  */
   1134 static u_long
   1135 get_filt_handle(classifier, i)
   1136 	struct acc_classifier *classifier;
   1137 	int	i;
   1138 {
   1139 	static u_long handle_number = 1;
   1140 	u_long 	handle;
   1141 	struct acc_filter *afp;
   1142 
   1143 	while (1) {
   1144 		handle = handle_number++ & 0x000fffff;
   1145 
   1146 		if (LIST_EMPTY(&classifier->acc_filters[i]))
   1147 			break;
   1148 
   1149 		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
   1150 			if ((afp->f_handle & 0x000fffff) == handle)
   1151 				break;
   1152 		if (afp == NULL)
   1153 			break;
   1154 		/* this handle is already used, try again */
   1155 	}
   1156 
   1157 	return ((i << 20) | handle);
   1158 }
   1159 
   1160 /* convert filter handle to filter pointer */
   1161 static struct acc_filter *
   1162 filth_to_filtp(classifier, handle)
   1163 	struct acc_classifier *classifier;
   1164 	u_long handle;
   1165 {
   1166 	struct acc_filter *afp;
   1167 	int	i;
   1168 
   1169 	i = ACC_GET_HINDEX(handle);
   1170 
   1171 	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
   1172 		if (afp->f_handle == handle)
   1173 			return (afp);
   1174 
   1175 	return (NULL);
   1176 }
   1177 
   1178 /* create flowinfo bitmask */
   1179 static u_int32_t
   1180 filt2fibmask(filt)
   1181 	struct flow_filter *filt;
   1182 {
   1183 	u_int32_t mask = 0;
   1184 #ifdef INET6
   1185 	struct flow_filter6 *filt6;
   1186 #endif
   1187 
   1188 	switch (filt->ff_flow.fi_family) {
   1189 	case AF_INET:
   1190 		if (filt->ff_flow.fi_proto != 0)
   1191 			mask |= FIMB4_PROTO;
   1192 		if (filt->ff_flow.fi_tos != 0)
   1193 			mask |= FIMB4_TOS;
   1194 		if (filt->ff_flow.fi_dst.s_addr != 0)
   1195 			mask |= FIMB4_DADDR;
   1196 		if (filt->ff_flow.fi_src.s_addr != 0)
   1197 			mask |= FIMB4_SADDR;
   1198 		if (filt->ff_flow.fi_sport != 0)
   1199 			mask |= FIMB4_SPORT;
   1200 		if (filt->ff_flow.fi_dport != 0)
   1201 			mask |= FIMB4_DPORT;
   1202 		if (filt->ff_flow.fi_gpi != 0)
   1203 			mask |= FIMB4_GPI;
   1204 		break;
   1205 #ifdef INET6
   1206 	case AF_INET6:
   1207 		filt6 = (struct flow_filter6 *)filt;
   1208 
   1209 		if (filt6->ff_flow6.fi6_proto != 0)
   1210 			mask |= FIMB6_PROTO;
   1211 		if (filt6->ff_flow6.fi6_tclass != 0)
   1212 			mask |= FIMB6_TCLASS;
   1213 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
   1214 			mask |= FIMB6_DADDR;
   1215 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
   1216 			mask |= FIMB6_SADDR;
   1217 		if (filt6->ff_flow6.fi6_sport != 0)
   1218 			mask |= FIMB6_SPORT;
   1219 		if (filt6->ff_flow6.fi6_dport != 0)
   1220 			mask |= FIMB6_DPORT;
   1221 		if (filt6->ff_flow6.fi6_gpi != 0)
   1222 			mask |= FIMB6_GPI;
   1223 		if (filt6->ff_flow6.fi6_flowlabel != 0)
   1224 			mask |= FIMB6_FLABEL;
   1225 		break;
   1226 #endif /* INET6 */
   1227 	}
   1228 	return (mask);
   1229 }
   1230 
   1231 
   1232 /*
   1233  * helper functions to handle IPv4 fragments.
   1234  * currently only in-sequence fragments are handled.
   1235  *	- fragment info is cached in a LRU list.
   1236  *	- when a first fragment is found, cache its flow info.
   1237  *	- when a non-first fragment is found, lookup the cache.
   1238  */
   1239 
   1240 struct ip4_frag {
   1241     TAILQ_ENTRY(ip4_frag) ip4f_chain;
   1242     char    ip4f_valid;
   1243     u_short ip4f_id;
   1244     struct flowinfo_in ip4f_info;
   1245 };
   1246 
   1247 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
   1248 
   1249 #define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
   1250 
   1251 
   1252 static void
   1253 ip4f_cache(ip, fin)
   1254 	struct ip *ip;
   1255 	struct flowinfo_in *fin;
   1256 {
   1257 	struct ip4_frag *fp;
   1258 
   1259 	if (TAILQ_EMPTY(&ip4f_list)) {
   1260 		/* first time call, allocate fragment cache entries. */
   1261 		if (ip4f_init() < 0)
   1262 			/* allocation failed! */
   1263 			return;
   1264 	}
   1265 
   1266 	fp = ip4f_alloc();
   1267 	fp->ip4f_id = ip->ip_id;
   1268 	fp->ip4f_info.fi_proto = ip->ip_p;
   1269 	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
   1270 	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
   1271 
   1272 	/* save port numbers */
   1273 	fp->ip4f_info.fi_sport = fin->fi_sport;
   1274 	fp->ip4f_info.fi_dport = fin->fi_dport;
   1275 	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
   1276 }
   1277 
   1278 static int
   1279 ip4f_lookup(ip, fin)
   1280 	struct ip *ip;
   1281 	struct flowinfo_in *fin;
   1282 {
   1283 	struct ip4_frag *fp;
   1284 
   1285 	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
   1286 	     fp = TAILQ_NEXT(fp, ip4f_chain))
   1287 		if (ip->ip_id == fp->ip4f_id &&
   1288 		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
   1289 		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
   1290 		    ip->ip_p == fp->ip4f_info.fi_proto) {
   1291 
   1292 			/* found the matching entry */
   1293 			fin->fi_sport = fp->ip4f_info.fi_sport;
   1294 			fin->fi_dport = fp->ip4f_info.fi_dport;
   1295 			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
   1296 
   1297 			if ((ntohs(ip->ip_off) & IP_MF) == 0)
   1298 				/* this is the last fragment,
   1299 				   release the entry. */
   1300 				ip4f_free(fp);
   1301 
   1302 			return (1);
   1303 		}
   1304 
   1305 	/* no matching entry found */
   1306 	return (0);
   1307 }
   1308 
   1309 static int
   1310 ip4f_init(void)
   1311 {
   1312 	struct ip4_frag *fp;
   1313 	int i;
   1314 
   1315 	TAILQ_INIT(&ip4f_list);
   1316 	for (i=0; i<IP4F_TABSIZE; i++) {
   1317 		MALLOC(fp, struct ip4_frag *, sizeof(struct ip4_frag),
   1318 		       M_DEVBUF, M_NOWAIT);
   1319 		if (fp == NULL) {
   1320 			printf("ip4f_init: can't alloc %dth entry!\n", i);
   1321 			if (i == 0)
   1322 				return (-1);
   1323 			return (0);
   1324 		}
   1325 		fp->ip4f_valid = 0;
   1326 		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
   1327 	}
   1328 	return (0);
   1329 }
   1330 
   1331 static struct ip4_frag *
   1332 ip4f_alloc(void)
   1333 {
   1334 	struct ip4_frag *fp;
   1335 
   1336 	/* reclaim an entry at the tail, put it at the head */
   1337 	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
   1338 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
   1339 	fp->ip4f_valid = 1;
   1340 	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
   1341 	return (fp);
   1342 }
   1343 
   1344 static void
   1345 ip4f_free(fp)
   1346 	struct ip4_frag *fp;
   1347 {
   1348 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
   1349 	fp->ip4f_valid = 0;
   1350 	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
   1351 }
   1352 
   1353 /*
   1354  * read and write diffserv field in IPv4 or IPv6 header
   1355  */
   1356 u_int8_t
   1357 read_dsfield(m, pktattr)
   1358 	struct mbuf *m;
   1359 	struct altq_pktattr *pktattr;
   1360 {
   1361 	struct mbuf *m0;
   1362 	u_int8_t ds_field = 0;
   1363 
   1364 	if (pktattr == NULL ||
   1365 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
   1366 		return ((u_int8_t)0);
   1367 
   1368 	/* verify that pattr_hdr is within the mbuf data */
   1369 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
   1370 		if ((pktattr->pattr_hdr >= m0->m_data) &&
   1371 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
   1372 			break;
   1373 	if (m0 == NULL) {
   1374 		/* ick, pattr_hdr is stale */
   1375 		pktattr->pattr_af = AF_UNSPEC;
   1376 #ifdef ALTQ_DEBUG
   1377 		printf("read_dsfield: can't locate header!\n");
   1378 #endif
   1379 		return ((u_int8_t)0);
   1380 	}
   1381 
   1382 	if (pktattr->pattr_af == AF_INET) {
   1383 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
   1384 
   1385 		if (ip->ip_v != 4)
   1386 			return ((u_int8_t)0);	/* version mismatch! */
   1387 		ds_field = ip->ip_tos;
   1388 	}
   1389 #ifdef INET6
   1390 	else if (pktattr->pattr_af == AF_INET6) {
   1391 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1392 		u_int32_t flowlabel;
   1393 
   1394 		flowlabel = ntohl(ip6->ip6_flow);
   1395 		if ((flowlabel >> 28) != 6)
   1396 			return ((u_int8_t)0);	/* version mismatch! */
   1397 		ds_field = (flowlabel >> 20) & 0xff;
   1398 	}
   1399 #endif
   1400 	return (ds_field);
   1401 }
   1402 
   1403 void
   1404 write_dsfield(m, pktattr, dsfield)
   1405 	struct mbuf *m;
   1406 	struct altq_pktattr *pktattr;
   1407 	u_int8_t dsfield;
   1408 {
   1409 	struct mbuf *m0;
   1410 
   1411 	if (pktattr == NULL ||
   1412 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
   1413 		return;
   1414 
   1415 	/* verify that pattr_hdr is within the mbuf data */
   1416 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
   1417 		if ((pktattr->pattr_hdr >= m0->m_data) &&
   1418 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
   1419 			break;
   1420 	if (m0 == NULL) {
   1421 		/* ick, pattr_hdr is stale */
   1422 		pktattr->pattr_af = AF_UNSPEC;
   1423 #ifdef ALTQ_DEBUG
   1424 		printf("write_dsfield: can't locate header!\n");
   1425 #endif
   1426 		return;
   1427 	}
   1428 
   1429 	if (pktattr->pattr_af == AF_INET) {
   1430 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
   1431 		u_int8_t old;
   1432 		int32_t sum;
   1433 
   1434 		if (ip->ip_v != 4)
   1435 			return;		/* version mismatch! */
   1436 		old = ip->ip_tos;
   1437 		dsfield |= old & 3;	/* leave CU bits */
   1438 		if (old == dsfield)
   1439 			return;
   1440 		ip->ip_tos = dsfield;
   1441 		/*
   1442 		 * update checksum (from RFC1624)
   1443 		 *	   HC' = ~(~HC + ~m + m')
   1444 		 */
   1445 		sum = ~ntohs(ip->ip_sum) & 0xffff;
   1446 		sum += 0xff00 + (~old & 0xff) + dsfield;
   1447 		sum = (sum >> 16) + (sum & 0xffff);
   1448 		sum += (sum >> 16);  /* add carry */
   1449 
   1450 		ip->ip_sum = htons(~sum & 0xffff);
   1451 	}
   1452 #ifdef INET6
   1453 	else if (pktattr->pattr_af == AF_INET6) {
   1454 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
   1455 		u_int32_t flowlabel;
   1456 
   1457 		flowlabel = ntohl(ip6->ip6_flow);
   1458 		if ((flowlabel >> 28) != 6)
   1459 			return;		/* version mismatch! */
   1460 		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
   1461 		ip6->ip6_flow = htonl(flowlabel);
   1462 	}
   1463 #endif
   1464 	return;
   1465 }
   1466 
   1467 
   1468 /*
   1469  * high resolution clock support taking advantage of a machine dependent
   1470  * high resolution time counter (e.g., timestamp counter of intel pentium).
   1471  * we assume
   1472  *  - 64-bit-long monotonically-increasing counter
   1473  *  - frequency range is 100M-4GHz (CPU speed)
   1474  */
   1475 u_int32_t machclk_freq = 0;
   1476 u_int32_t machclk_per_tick = 0;
   1477 
   1478 #if (defined(__i386__) || defined(__alpha__)) && !defined(ALTQ_NOPCC)
   1479 #ifdef __FreeBSD__
   1480 /* freebsd makes clock frequency accessible */
   1481 #ifdef __alpha__
   1482 extern u_int32_t cycles_per_sec;	/* alpha cpu clock frequency */
   1483 #endif
   1484 void
   1485 init_machclk(void)
   1486 {
   1487 #if defined(__i386__)
   1488 #if (__FreeBSD_version > 300000)
   1489 	machclk_freq = tsc_freq;
   1490 #else
   1491 	machclk_freq = i586_ctr_freq;
   1492 #endif
   1493 #elif defined(__alpha__)
   1494 	machclk_freq = cycles_per_sec;
   1495 #endif /* __alpha__ */
   1496 	machclk_per_tick = machclk_freq / hz;
   1497 }
   1498 #else /* !__FreeBSD__ */
   1499 /*
   1500  * measure Pentium TSC or Alpha PCC clock frequency
   1501  */
   1502 void
   1503 init_machclk(void)
   1504 {
   1505 	static int	wait;
   1506 	struct timeval	tv_start, tv_end;
   1507 	u_int64_t	start, end, diff;
   1508 	int		timo;
   1509 
   1510 	microtime(&tv_start);
   1511 	start = read_machclk();
   1512 	timo = hz;	/* 1 sec */
   1513 	(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
   1514 	microtime(&tv_end);
   1515 	end = read_machclk();
   1516 	diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
   1517 		+ tv_end.tv_usec - tv_start.tv_usec;
   1518 	if (diff != 0)
   1519 		machclk_freq = (u_int)((end - start) * 1000000 / diff);
   1520 	machclk_per_tick = machclk_freq / hz;
   1521 
   1522 	printf("altq: CPU clock: %uHz\n", machclk_freq);
   1523 }
   1524 #endif /* !__FreeBSD__ */
   1525 #ifdef __alpha__
   1526 /*
   1527  * make a 64bit counter value out of the 32bit alpha processor cycle counter.
   1528  * read_machclk must be called within a half of its wrap-around cycle
   1529  * (about 5 sec for 400MHz cpu) to properly detect a counter wrap-around.
   1530  * tbr_timeout calls read_machclk once a second.
   1531  */
   1532 u_int64_t
   1533 read_machclk(void)
   1534 {
   1535 	static u_int32_t last_pcc, upper;
   1536 	u_int32_t pcc;
   1537 
   1538 	pcc = (u_int32_t)alpha_rpcc();
   1539 	if (pcc <= last_pcc)
   1540 		upper++;
   1541 	last_pcc = pcc;
   1542 	return (((u_int64_t)upper << 32) + pcc);
   1543 }
   1544 #endif /* __alpha__ */
   1545 #else /* !i386  && !alpha */
   1546 /* use microtime() for now */
   1547 void
   1548 init_machclk(void)
   1549 {
   1550 	machclk_freq = 1000000 << MACHCLK_SHIFT;
   1551 	machclk_per_tick = machclk_freq / hz;
   1552 	printf("altq: emulate %uHz cpu clock\n", machclk_freq);
   1553 }
   1554 #endif /* !i386 && !alpha */
   1555