Home | History | Annotate | Line # | Download | only in net
pf_norm.c revision 1.1
      1 /*	$OpenBSD: pf_norm.c,v 1.80 2004/03/09 21:44:41 mcbride Exp $ */
      2 
      3 /*
      4  * Copyright 2001 Niels Provos <provos (at) citi.umich.edu>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "pflog.h"
     29 
     30 #include <sys/param.h>
     31 #include <sys/systm.h>
     32 #include <sys/mbuf.h>
     33 #include <sys/filio.h>
     34 #include <sys/fcntl.h>
     35 #include <sys/socket.h>
     36 #include <sys/kernel.h>
     37 #include <sys/time.h>
     38 #include <sys/pool.h>
     39 
     40 #include <dev/rndvar.h>
     41 #include <net/if.h>
     42 #include <net/if_types.h>
     43 #include <net/bpf.h>
     44 #include <net/route.h>
     45 #include <net/if_pflog.h>
     46 
     47 #include <netinet/in.h>
     48 #include <netinet/in_var.h>
     49 #include <netinet/in_systm.h>
     50 #include <netinet/ip.h>
     51 #include <netinet/ip_var.h>
     52 #include <netinet/tcp.h>
     53 #include <netinet/tcp_seq.h>
     54 #include <netinet/udp.h>
     55 #include <netinet/ip_icmp.h>
     56 
     57 #ifdef INET6
     58 #include <netinet/ip6.h>
     59 #endif /* INET6 */
     60 
     61 #include <net/pfvar.h>
     62 
     63 struct pf_frent {
     64 	LIST_ENTRY(pf_frent) fr_next;
     65 	struct ip *fr_ip;
     66 	struct mbuf *fr_m;
     67 };
     68 
     69 struct pf_frcache {
     70 	LIST_ENTRY(pf_frcache) fr_next;
     71 	uint16_t	fr_off;
     72 	uint16_t	fr_end;
     73 };
     74 
     75 #define PFFRAG_SEENLAST	0x0001		/* Seen the last fragment for this */
     76 #define PFFRAG_NOBUFFER	0x0002		/* Non-buffering fragment cache */
     77 #define PFFRAG_DROP	0x0004		/* Drop all fragments */
     78 #define BUFFER_FRAGMENTS(fr)	(!((fr)->fr_flags & PFFRAG_NOBUFFER))
     79 
     80 struct pf_fragment {
     81 	RB_ENTRY(pf_fragment) fr_entry;
     82 	TAILQ_ENTRY(pf_fragment) frag_next;
     83 	struct in_addr	fr_src;
     84 	struct in_addr	fr_dst;
     85 	u_int8_t	fr_p;		/* protocol of this fragment */
     86 	u_int8_t	fr_flags;	/* status flags */
     87 	u_int16_t	fr_id;		/* fragment id for reassemble */
     88 	u_int16_t	fr_max;		/* fragment data max */
     89 	u_int32_t	fr_timeout;
     90 #define fr_queue	fr_u.fru_queue
     91 #define fr_cache	fr_u.fru_cache
     92 	union {
     93 		LIST_HEAD(pf_fragq, pf_frent) fru_queue;	/* buffering */
     94 		LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;	/* non-buf */
     95 	} fr_u;
     96 };
     97 
     98 TAILQ_HEAD(pf_fragqueue, pf_fragment)	pf_fragqueue;
     99 TAILQ_HEAD(pf_cachequeue, pf_fragment)	pf_cachequeue;
    100 
    101 static __inline int	 pf_frag_compare(struct pf_fragment *,
    102 			    struct pf_fragment *);
    103 RB_HEAD(pf_frag_tree, pf_fragment)	pf_frag_tree, pf_cache_tree;
    104 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
    105 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
    106 
    107 /* Private prototypes */
    108 void			 pf_ip2key(struct pf_fragment *, struct ip *);
    109 void			 pf_remove_fragment(struct pf_fragment *);
    110 void			 pf_flush_fragments(void);
    111 void			 pf_free_fragment(struct pf_fragment *);
    112 struct pf_fragment	*pf_find_fragment(struct ip *, struct pf_frag_tree *);
    113 struct mbuf		*pf_reassemble(struct mbuf **, struct pf_fragment **,
    114 			    struct pf_frent *, int);
    115 struct mbuf		*pf_fragcache(struct mbuf **, struct ip*,
    116 			    struct pf_fragment **, int, int, int *);
    117 u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t);
    118 int			 pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
    119 			    struct tcphdr *, int);
    120 
    121 #define	DPFPRINTF(x)	if (pf_status.debug >= PF_DEBUG_MISC) \
    122 			    { printf("%s: ", __func__); printf x ;}
    123 
    124 /* Globals */
    125 struct pool		 pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
    126 struct pool		 pf_state_scrub_pl;
    127 int			 pf_nfrents, pf_ncache;
    128 
    129 void
    130 pf_normalize_init(void)
    131 {
    132 	pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
    133 	    NULL);
    134 	pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
    135 	    NULL);
    136 	pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
    137 	    "pffrcache", NULL);
    138 	pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
    139 	    NULL);
    140 	pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
    141 	    "pfstscr", NULL);
    142 
    143 	pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
    144 	pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
    145 	pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
    146 	pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
    147 
    148 	TAILQ_INIT(&pf_fragqueue);
    149 	TAILQ_INIT(&pf_cachequeue);
    150 }
    151 
    152 static __inline int
    153 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
    154 {
    155 	int	diff;
    156 
    157 	if ((diff = a->fr_id - b->fr_id))
    158 		return (diff);
    159 	else if ((diff = a->fr_p - b->fr_p))
    160 		return (diff);
    161 	else if (a->fr_src.s_addr < b->fr_src.s_addr)
    162 		return (-1);
    163 	else if (a->fr_src.s_addr > b->fr_src.s_addr)
    164 		return (1);
    165 	else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
    166 		return (-1);
    167 	else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
    168 		return (1);
    169 	return (0);
    170 }
    171 
    172 void
    173 pf_purge_expired_fragments(void)
    174 {
    175 	struct pf_fragment	*frag;
    176 	u_int32_t		 expire = time.tv_sec -
    177 				    pf_default_rule.timeout[PFTM_FRAG];
    178 
    179 	while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
    180 		KASSERT(BUFFER_FRAGMENTS(frag));
    181 		if (frag->fr_timeout > expire)
    182 			break;
    183 
    184 		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
    185 		pf_free_fragment(frag);
    186 	}
    187 
    188 	while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
    189 		KASSERT(!BUFFER_FRAGMENTS(frag));
    190 		if (frag->fr_timeout > expire)
    191 			break;
    192 
    193 		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
    194 		pf_free_fragment(frag);
    195 		KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
    196 		    TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
    197 	}
    198 }
    199 
    200 /*
    201  * Try to flush old fragments to make space for new ones
    202  */
    203 
    204 void
    205 pf_flush_fragments(void)
    206 {
    207 	struct pf_fragment	*frag;
    208 	int			 goal;
    209 
    210 	goal = pf_nfrents * 9 / 10;
    211 	DPFPRINTF(("trying to free > %d frents\n",
    212 	    pf_nfrents - goal));
    213 	while (goal < pf_nfrents) {
    214 		frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
    215 		if (frag == NULL)
    216 			break;
    217 		pf_free_fragment(frag);
    218 	}
    219 
    220 
    221 	goal = pf_ncache * 9 / 10;
    222 	DPFPRINTF(("trying to free > %d cache entries\n",
    223 	    pf_ncache - goal));
    224 	while (goal < pf_ncache) {
    225 		frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
    226 		if (frag == NULL)
    227 			break;
    228 		pf_free_fragment(frag);
    229 	}
    230 }
    231 
    232 /* Frees the fragments and all associated entries */
    233 
    234 void
    235 pf_free_fragment(struct pf_fragment *frag)
    236 {
    237 	struct pf_frent		*frent;
    238 	struct pf_frcache	*frcache;
    239 
    240 	/* Free all fragments */
    241 	if (BUFFER_FRAGMENTS(frag)) {
    242 		for (frent = LIST_FIRST(&frag->fr_queue); frent;
    243 		    frent = LIST_FIRST(&frag->fr_queue)) {
    244 			LIST_REMOVE(frent, fr_next);
    245 
    246 			m_freem(frent->fr_m);
    247 			pool_put(&pf_frent_pl, frent);
    248 			pf_nfrents--;
    249 		}
    250 	} else {
    251 		for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
    252 		    frcache = LIST_FIRST(&frag->fr_cache)) {
    253 			LIST_REMOVE(frcache, fr_next);
    254 
    255 			KASSERT(LIST_EMPTY(&frag->fr_cache) ||
    256 			    LIST_FIRST(&frag->fr_cache)->fr_off >
    257 			    frcache->fr_end);
    258 
    259 			pool_put(&pf_cent_pl, frcache);
    260 			pf_ncache--;
    261 		}
    262 	}
    263 
    264 	pf_remove_fragment(frag);
    265 }
    266 
    267 void
    268 pf_ip2key(struct pf_fragment *key, struct ip *ip)
    269 {
    270 	key->fr_p = ip->ip_p;
    271 	key->fr_id = ip->ip_id;
    272 	key->fr_src.s_addr = ip->ip_src.s_addr;
    273 	key->fr_dst.s_addr = ip->ip_dst.s_addr;
    274 }
    275 
    276 struct pf_fragment *
    277 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
    278 {
    279 	struct pf_fragment	 key;
    280 	struct pf_fragment	*frag;
    281 
    282 	pf_ip2key(&key, ip);
    283 
    284 	frag = RB_FIND(pf_frag_tree, tree, &key);
    285 	if (frag != NULL) {
    286 		/* XXX Are we sure we want to update the timeout? */
    287 		frag->fr_timeout = time.tv_sec;
    288 		if (BUFFER_FRAGMENTS(frag)) {
    289 			TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
    290 			TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
    291 		} else {
    292 			TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
    293 			TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
    294 		}
    295 	}
    296 
    297 	return (frag);
    298 }
    299 
    300 /* Removes a fragment from the fragment queue and frees the fragment */
    301 
    302 void
    303 pf_remove_fragment(struct pf_fragment *frag)
    304 {
    305 	if (BUFFER_FRAGMENTS(frag)) {
    306 		RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
    307 		TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
    308 		pool_put(&pf_frag_pl, frag);
    309 	} else {
    310 		RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
    311 		TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
    312 		pool_put(&pf_cache_pl, frag);
    313 	}
    314 }
    315 
    316 #define FR_IP_OFF(fr)	((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
    317 struct mbuf *
    318 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
    319     struct pf_frent *frent, int mff)
    320 {
    321 	struct mbuf	*m = *m0, *m2;
    322 	struct pf_frent	*frea, *next;
    323 	struct pf_frent	*frep = NULL;
    324 	struct ip	*ip = frent->fr_ip;
    325 	int		 hlen = ip->ip_hl << 2;
    326 	u_int16_t	 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
    327 	u_int16_t	 ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
    328 	u_int16_t	 max = ip_len + off;
    329 
    330 	KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
    331 
    332 	/* Strip off ip header */
    333 	m->m_data += hlen;
    334 	m->m_len -= hlen;
    335 
    336 	/* Create a new reassembly queue for this packet */
    337 	if (*frag == NULL) {
    338 		*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
    339 		if (*frag == NULL) {
    340 			pf_flush_fragments();
    341 			*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
    342 			if (*frag == NULL)
    343 				goto drop_fragment;
    344 		}
    345 
    346 		(*frag)->fr_flags = 0;
    347 		(*frag)->fr_max = 0;
    348 		(*frag)->fr_src = frent->fr_ip->ip_src;
    349 		(*frag)->fr_dst = frent->fr_ip->ip_dst;
    350 		(*frag)->fr_p = frent->fr_ip->ip_p;
    351 		(*frag)->fr_id = frent->fr_ip->ip_id;
    352 		(*frag)->fr_timeout = time.tv_sec;
    353 		LIST_INIT(&(*frag)->fr_queue);
    354 
    355 		RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
    356 		TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
    357 
    358 		/* We do not have a previous fragment */
    359 		frep = NULL;
    360 		goto insert;
    361 	}
    362 
    363 	/*
    364 	 * Find a fragment after the current one:
    365 	 *  - off contains the real shifted offset.
    366 	 */
    367 	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
    368 		if (FR_IP_OFF(frea) > off)
    369 			break;
    370 		frep = frea;
    371 	}
    372 
    373 	KASSERT(frep != NULL || frea != NULL);
    374 
    375 	if (frep != NULL &&
    376 	    FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
    377 	    4 > off)
    378 	{
    379 		u_int16_t	precut;
    380 
    381 		precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
    382 		    frep->fr_ip->ip_hl * 4 - off;
    383 		if (precut >= ip_len)
    384 			goto drop_fragment;
    385 		m_adj(frent->fr_m, precut);
    386 		DPFPRINTF(("overlap -%d\n", precut));
    387 		/* Enforce 8 byte boundaries */
    388 		ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
    389 		off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
    390 		ip_len -= precut;
    391 		ip->ip_len = htons(ip_len);
    392 	}
    393 
    394 	for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
    395 	    frea = next)
    396 	{
    397 		u_int16_t	aftercut;
    398 
    399 		aftercut = ip_len + off - FR_IP_OFF(frea);
    400 		DPFPRINTF(("adjust overlap %d\n", aftercut));
    401 		if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
    402 		    * 4)
    403 		{
    404 			frea->fr_ip->ip_len =
    405 			    htons(ntohs(frea->fr_ip->ip_len) - aftercut);
    406 			frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
    407 			    (aftercut >> 3));
    408 			m_adj(frea->fr_m, aftercut);
    409 			break;
    410 		}
    411 
    412 		/* This fragment is completely overlapped, loose it */
    413 		next = LIST_NEXT(frea, fr_next);
    414 		m_freem(frea->fr_m);
    415 		LIST_REMOVE(frea, fr_next);
    416 		pool_put(&pf_frent_pl, frea);
    417 		pf_nfrents--;
    418 	}
    419 
    420  insert:
    421 	/* Update maximum data size */
    422 	if ((*frag)->fr_max < max)
    423 		(*frag)->fr_max = max;
    424 	/* This is the last segment */
    425 	if (!mff)
    426 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
    427 
    428 	if (frep == NULL)
    429 		LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
    430 	else
    431 		LIST_INSERT_AFTER(frep, frent, fr_next);
    432 
    433 	/* Check if we are completely reassembled */
    434 	if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
    435 		return (NULL);
    436 
    437 	/* Check if we have all the data */
    438 	off = 0;
    439 	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
    440 		next = LIST_NEXT(frep, fr_next);
    441 
    442 		off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
    443 		if (off < (*frag)->fr_max &&
    444 		    (next == NULL || FR_IP_OFF(next) != off))
    445 		{
    446 			DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
    447 			    off, next == NULL ? -1 : FR_IP_OFF(next),
    448 			    (*frag)->fr_max));
    449 			return (NULL);
    450 		}
    451 	}
    452 	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
    453 	if (off < (*frag)->fr_max)
    454 		return (NULL);
    455 
    456 	/* We have all the data */
    457 	frent = LIST_FIRST(&(*frag)->fr_queue);
    458 	KASSERT(frent != NULL);
    459 	if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
    460 		DPFPRINTF(("drop: too big: %d\n", off));
    461 		pf_free_fragment(*frag);
    462 		*frag = NULL;
    463 		return (NULL);
    464 	}
    465 	next = LIST_NEXT(frent, fr_next);
    466 
    467 	/* Magic from ip_input */
    468 	ip = frent->fr_ip;
    469 	m = frent->fr_m;
    470 	m2 = m->m_next;
    471 	m->m_next = NULL;
    472 	m_cat(m, m2);
    473 	pool_put(&pf_frent_pl, frent);
    474 	pf_nfrents--;
    475 	for (frent = next; frent != NULL; frent = next) {
    476 		next = LIST_NEXT(frent, fr_next);
    477 
    478 		m2 = frent->fr_m;
    479 		pool_put(&pf_frent_pl, frent);
    480 		pf_nfrents--;
    481 		m_cat(m, m2);
    482 	}
    483 
    484 	ip->ip_src = (*frag)->fr_src;
    485 	ip->ip_dst = (*frag)->fr_dst;
    486 
    487 	/* Remove from fragment queue */
    488 	pf_remove_fragment(*frag);
    489 	*frag = NULL;
    490 
    491 	hlen = ip->ip_hl << 2;
    492 	ip->ip_len = htons(off + hlen);
    493 	m->m_len += hlen;
    494 	m->m_data -= hlen;
    495 
    496 	/* some debugging cruft by sklower, below, will go away soon */
    497 	/* XXX this should be done elsewhere */
    498 	if (m->m_flags & M_PKTHDR) {
    499 		int plen = 0;
    500 		for (m2 = m; m2; m2 = m2->m_next)
    501 			plen += m2->m_len;
    502 		m->m_pkthdr.len = plen;
    503 	}
    504 
    505 	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
    506 	return (m);
    507 
    508  drop_fragment:
    509 	/* Oops - fail safe - drop packet */
    510 	pool_put(&pf_frent_pl, frent);
    511 	pf_nfrents--;
    512 	m_freem(m);
    513 	return (NULL);
    514 }
    515 
    516 struct mbuf *
    517 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
    518     int drop, int *nomem)
    519 {
    520 	struct mbuf		*m = *m0;
    521 	struct pf_frcache	*frp, *fra, *cur = NULL;
    522 	int			 ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
    523 	u_int16_t		 off = ntohs(h->ip_off) << 3;
    524 	u_int16_t		 max = ip_len + off;
    525 	int			 hosed = 0;
    526 
    527 	KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
    528 
    529 	/* Create a new range queue for this packet */
    530 	if (*frag == NULL) {
    531 		*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
    532 		if (*frag == NULL) {
    533 			pf_flush_fragments();
    534 			*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
    535 			if (*frag == NULL)
    536 				goto no_mem;
    537 		}
    538 
    539 		/* Get an entry for the queue */
    540 		cur = pool_get(&pf_cent_pl, PR_NOWAIT);
    541 		if (cur == NULL) {
    542 			pool_put(&pf_cache_pl, *frag);
    543 			*frag = NULL;
    544 			goto no_mem;
    545 		}
    546 		pf_ncache++;
    547 
    548 		(*frag)->fr_flags = PFFRAG_NOBUFFER;
    549 		(*frag)->fr_max = 0;
    550 		(*frag)->fr_src = h->ip_src;
    551 		(*frag)->fr_dst = h->ip_dst;
    552 		(*frag)->fr_p = h->ip_p;
    553 		(*frag)->fr_id = h->ip_id;
    554 		(*frag)->fr_timeout = time.tv_sec;
    555 
    556 		cur->fr_off = off;
    557 		cur->fr_end = max;
    558 		LIST_INIT(&(*frag)->fr_cache);
    559 		LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
    560 
    561 		RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
    562 		TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
    563 
    564 		DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
    565 
    566 		goto pass;
    567 	}
    568 
    569 	/*
    570 	 * Find a fragment after the current one:
    571 	 *  - off contains the real shifted offset.
    572 	 */
    573 	frp = NULL;
    574 	LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
    575 		if (fra->fr_off > off)
    576 			break;
    577 		frp = fra;
    578 	}
    579 
    580 	KASSERT(frp != NULL || fra != NULL);
    581 
    582 	if (frp != NULL) {
    583 		int	precut;
    584 
    585 		precut = frp->fr_end - off;
    586 		if (precut >= ip_len) {
    587 			/* Fragment is entirely a duplicate */
    588 			DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
    589 			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
    590 			goto drop_fragment;
    591 		}
    592 		if (precut == 0) {
    593 			/* They are adjacent.  Fixup cache entry */
    594 			DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
    595 			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
    596 			frp->fr_end = max;
    597 		} else if (precut > 0) {
    598 			/* The first part of this payload overlaps with a
    599 			 * fragment that has already been passed.
    600 			 * Need to trim off the first part of the payload.
    601 			 * But to do so easily, we need to create another
    602 			 * mbuf to throw the original header into.
    603 			 */
    604 
    605 			DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
    606 			    h->ip_id, precut, frp->fr_off, frp->fr_end, off,
    607 			    max));
    608 
    609 			off += precut;
    610 			max -= precut;
    611 			/* Update the previous frag to encompass this one */
    612 			frp->fr_end = max;
    613 
    614 			if (!drop) {
    615 				/* XXX Optimization opportunity
    616 				 * This is a very heavy way to trim the payload.
    617 				 * we could do it much faster by diddling mbuf
    618 				 * internals but that would be even less legible
    619 				 * than this mbuf magic.  For my next trick,
    620 				 * I'll pull a rabbit out of my laptop.
    621 				 */
    622 				*m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT);
    623 				if (*m0 == NULL)
    624 					goto no_mem;
    625 				KASSERT((*m0)->m_next == NULL);
    626 				m_adj(m, precut + (h->ip_hl << 2));
    627 				m_cat(*m0, m);
    628 				m = *m0;
    629 				if (m->m_flags & M_PKTHDR) {
    630 					int plen = 0;
    631 					struct mbuf *t;
    632 					for (t = m; t; t = t->m_next)
    633 						plen += t->m_len;
    634 					m->m_pkthdr.len = plen;
    635 				}
    636 
    637 
    638 				h = mtod(m, struct ip *);
    639 
    640 
    641 				KASSERT((int)m->m_len ==
    642 				    ntohs(h->ip_len) - precut);
    643 				h->ip_off = htons(ntohs(h->ip_off) +
    644 				    (precut >> 3));
    645 				h->ip_len = htons(ntohs(h->ip_len) - precut);
    646 			} else {
    647 				hosed++;
    648 			}
    649 		} else {
    650 			/* There is a gap between fragments */
    651 
    652 			DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
    653 			    h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
    654 			    max));
    655 
    656 			cur = pool_get(&pf_cent_pl, PR_NOWAIT);
    657 			if (cur == NULL)
    658 				goto no_mem;
    659 			pf_ncache++;
    660 
    661 			cur->fr_off = off;
    662 			cur->fr_end = max;
    663 			LIST_INSERT_AFTER(frp, cur, fr_next);
    664 		}
    665 	}
    666 
    667 	if (fra != NULL) {
    668 		int	aftercut;
    669 		int	merge = 0;
    670 
    671 		aftercut = max - fra->fr_off;
    672 		if (aftercut == 0) {
    673 			/* Adjacent fragments */
    674 			DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
    675 			    h->ip_id, off, max, fra->fr_off, fra->fr_end));
    676 			fra->fr_off = off;
    677 			merge = 1;
    678 		} else if (aftercut > 0) {
    679 			/* Need to chop off the tail of this fragment */
    680 			DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
    681 			    h->ip_id, aftercut, off, max, fra->fr_off,
    682 			    fra->fr_end));
    683 			fra->fr_off = off;
    684 			max -= aftercut;
    685 
    686 			merge = 1;
    687 
    688 			if (!drop) {
    689 				m_adj(m, -aftercut);
    690 				if (m->m_flags & M_PKTHDR) {
    691 					int plen = 0;
    692 					struct mbuf *t;
    693 					for (t = m; t; t = t->m_next)
    694 						plen += t->m_len;
    695 					m->m_pkthdr.len = plen;
    696 				}
    697 				h = mtod(m, struct ip *);
    698 				KASSERT((int)m->m_len ==
    699 				    ntohs(h->ip_len) - aftercut);
    700 				h->ip_len = htons(ntohs(h->ip_len) - aftercut);
    701 			} else {
    702 				hosed++;
    703 			}
    704 		} else {
    705 			/* There is a gap between fragments */
    706 			DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
    707 			    h->ip_id, -aftercut, off, max, fra->fr_off,
    708 			    fra->fr_end));
    709 
    710 			cur = pool_get(&pf_cent_pl, PR_NOWAIT);
    711 			if (cur == NULL)
    712 				goto no_mem;
    713 			pf_ncache++;
    714 
    715 			cur->fr_off = off;
    716 			cur->fr_end = max;
    717 			LIST_INSERT_BEFORE(fra, cur, fr_next);
    718 		}
    719 
    720 
    721 		/* Need to glue together two separate fragment descriptors */
    722 		if (merge) {
    723 			if (cur && fra->fr_off <= cur->fr_end) {
    724 				/* Need to merge in a previous 'cur' */
    725 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
    726 				    "%d-%d) %d-%d (%d-%d)\n",
    727 				    h->ip_id, cur->fr_off, cur->fr_end, off,
    728 				    max, fra->fr_off, fra->fr_end));
    729 				fra->fr_off = cur->fr_off;
    730 				LIST_REMOVE(cur, fr_next);
    731 				pool_put(&pf_cent_pl, cur);
    732 				pf_ncache--;
    733 				cur = NULL;
    734 
    735 			} else if (frp && fra->fr_off <= frp->fr_end) {
    736 				/* Need to merge in a modified 'frp' */
    737 				KASSERT(cur == NULL);
    738 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
    739 				    "%d-%d) %d-%d (%d-%d)\n",
    740 				    h->ip_id, frp->fr_off, frp->fr_end, off,
    741 				    max, fra->fr_off, fra->fr_end));
    742 				fra->fr_off = frp->fr_off;
    743 				LIST_REMOVE(frp, fr_next);
    744 				pool_put(&pf_cent_pl, frp);
    745 				pf_ncache--;
    746 				frp = NULL;
    747 
    748 			}
    749 		}
    750 	}
    751 
    752 	if (hosed) {
    753 		/*
    754 		 * We must keep tracking the overall fragment even when
    755 		 * we're going to drop it anyway so that we know when to
    756 		 * free the overall descriptor.  Thus we drop the frag late.
    757 		 */
    758 		goto drop_fragment;
    759 	}
    760 
    761 
    762  pass:
    763 	/* Update maximum data size */
    764 	if ((*frag)->fr_max < max)
    765 		(*frag)->fr_max = max;
    766 
    767 	/* This is the last segment */
    768 	if (!mff)
    769 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
    770 
    771 	/* Check if we are completely reassembled */
    772 	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
    773 	    LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
    774 	    LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
    775 		/* Remove from fragment queue */
    776 		DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
    777 		    (*frag)->fr_max));
    778 		pf_free_fragment(*frag);
    779 		*frag = NULL;
    780 	}
    781 
    782 	return (m);
    783 
    784  no_mem:
    785 	*nomem = 1;
    786 
    787 	/* Still need to pay attention to !IP_MF */
    788 	if (!mff && *frag != NULL)
    789 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
    790 
    791 	m_freem(m);
    792 	return (NULL);
    793 
    794  drop_fragment:
    795 
    796 	/* Still need to pay attention to !IP_MF */
    797 	if (!mff && *frag != NULL)
    798 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
    799 
    800 	if (drop) {
    801 		/* This fragment has been deemed bad.  Don't reass */
    802 		if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
    803 			DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
    804 			    h->ip_id));
    805 		(*frag)->fr_flags |= PFFRAG_DROP;
    806 	}
    807 
    808 	m_freem(m);
    809 	return (NULL);
    810 }
    811 
    812 int
    813 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason)
    814 {
    815 	struct mbuf		*m = *m0;
    816 	struct pf_rule		*r;
    817 	struct pf_frent		*frent;
    818 	struct pf_fragment	*frag = NULL;
    819 	struct ip		*h = mtod(m, struct ip *);
    820 	int			 mff = (ntohs(h->ip_off) & IP_MF);
    821 	int			 hlen = h->ip_hl << 2;
    822 	u_int16_t		 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
    823 	u_int16_t		 max;
    824 	int			 ip_len;
    825 	int			 ip_off;
    826 
    827 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
    828 	while (r != NULL) {
    829 		r->evaluations++;
    830 		if (r->kif != NULL &&
    831 		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
    832 			r = r->skip[PF_SKIP_IFP].ptr;
    833 		else if (r->direction && r->direction != dir)
    834 			r = r->skip[PF_SKIP_DIR].ptr;
    835 		else if (r->af && r->af != AF_INET)
    836 			r = r->skip[PF_SKIP_AF].ptr;
    837 		else if (r->proto && r->proto != h->ip_p)
    838 			r = r->skip[PF_SKIP_PROTO].ptr;
    839 		else if (PF_MISMATCHAW(&r->src.addr,
    840 		    (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.not))
    841 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
    842 		else if (PF_MISMATCHAW(&r->dst.addr,
    843 		    (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.not))
    844 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
    845 		else
    846 			break;
    847 	}
    848 
    849 	if (r == NULL)
    850 		return (PF_PASS);
    851 	else
    852 		r->packets++;
    853 
    854 	/* Check for illegal packets */
    855 	if (hlen < (int)sizeof(struct ip))
    856 		goto drop;
    857 
    858 	if (hlen > ntohs(h->ip_len))
    859 		goto drop;
    860 
    861 	/* Clear IP_DF if the rule uses the no-df option */
    862 	if (r->rule_flag & PFRULE_NODF)
    863 		h->ip_off &= htons(~IP_DF);
    864 
    865 	/* We will need other tests here */
    866 	if (!fragoff && !mff)
    867 		goto no_fragment;
    868 
    869 	/* We're dealing with a fragment now. Don't allow fragments
    870 	 * with IP_DF to enter the cache. If the flag was cleared by
    871 	 * no-df above, fine. Otherwise drop it.
    872 	 */
    873 	if (h->ip_off & htons(IP_DF)) {
    874 		DPFPRINTF(("IP_DF\n"));
    875 		goto bad;
    876 	}
    877 
    878 	ip_len = ntohs(h->ip_len) - hlen;
    879 	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
    880 
    881 	/* All fragments are 8 byte aligned */
    882 	if (mff && (ip_len & 0x7)) {
    883 		DPFPRINTF(("mff and %d\n", ip_len));
    884 		goto bad;
    885 	}
    886 
    887 	/* Respect maximum length */
    888 	if (fragoff + ip_len > IP_MAXPACKET) {
    889 		DPFPRINTF(("max packet %d\n", fragoff + ip_len));
    890 		goto bad;
    891 	}
    892 	max = fragoff + ip_len;
    893 
    894 	if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
    895 		/* Fully buffer all of the fragments */
    896 
    897 		frag = pf_find_fragment(h, &pf_frag_tree);
    898 
    899 		/* Check if we saw the last fragment already */
    900 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
    901 		    max > frag->fr_max)
    902 			goto bad;
    903 
    904 		/* Get an entry for the fragment queue */
    905 		frent = pool_get(&pf_frent_pl, PR_NOWAIT);
    906 		if (frent == NULL) {
    907 			REASON_SET(reason, PFRES_MEMORY);
    908 			return (PF_DROP);
    909 		}
    910 		pf_nfrents++;
    911 		frent->fr_ip = h;
    912 		frent->fr_m = m;
    913 
    914 		/* Might return a completely reassembled mbuf, or NULL */
    915 		DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
    916 		*m0 = m = pf_reassemble(m0, &frag, frent, mff);
    917 
    918 		if (m == NULL)
    919 			return (PF_DROP);
    920 
    921 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
    922 			goto drop;
    923 
    924 		h = mtod(m, struct ip *);
    925 	} else {
    926 		/* non-buffering fragment cache (drops or masks overlaps) */
    927 		int	nomem = 0;
    928 
    929 		if (dir == PF_OUT) {
    930 			if (m_tag_find(m, PACKET_TAG_PF_FRAGCACHE, NULL) !=
    931 			    NULL) {
    932 				/* Already passed the fragment cache in the
    933 				 * input direction.  If we continued, it would
    934 				 * appear to be a dup and would be dropped.
    935 				 */
    936 				goto fragment_pass;
    937 			}
    938 		}
    939 
    940 		frag = pf_find_fragment(h, &pf_cache_tree);
    941 
    942 		/* Check if we saw the last fragment already */
    943 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
    944 		    max > frag->fr_max) {
    945 			if (r->rule_flag & PFRULE_FRAGDROP)
    946 				frag->fr_flags |= PFFRAG_DROP;
    947 			goto bad;
    948 		}
    949 
    950 		*m0 = m = pf_fragcache(m0, h, &frag, mff,
    951 		    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
    952 		if (m == NULL) {
    953 			if (nomem)
    954 				goto no_mem;
    955 			goto drop;
    956 		}
    957 
    958 		if (dir == PF_IN) {
    959 			struct m_tag	*mtag;
    960 
    961 			mtag = m_tag_get(PACKET_TAG_PF_FRAGCACHE, 0, M_NOWAIT);
    962 			if (mtag == NULL)
    963 				goto no_mem;
    964 			m_tag_prepend(m, mtag);
    965 		}
    966 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
    967 			goto drop;
    968 		goto fragment_pass;
    969 	}
    970 
    971  no_fragment:
    972 	/* At this point, only IP_DF is allowed in ip_off */
    973 	h->ip_off &= htons(IP_DF);
    974 
    975 	/* Enforce a minimum ttl, may cause endless packet loops */
    976 	if (r->min_ttl && h->ip_ttl < r->min_ttl)
    977 		h->ip_ttl = r->min_ttl;
    978 
    979 	if (r->rule_flag & PFRULE_RANDOMID)
    980 		h->ip_id = ip_randomid();
    981 
    982 	return (PF_PASS);
    983 
    984  fragment_pass:
    985 	/* Enforce a minimum ttl, may cause endless packet loops */
    986 	if (r->min_ttl && h->ip_ttl < r->min_ttl)
    987 		h->ip_ttl = r->min_ttl;
    988 
    989 	return (PF_PASS);
    990 
    991  no_mem:
    992 	REASON_SET(reason, PFRES_MEMORY);
    993 	if (r != NULL && r->log)
    994 		PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL);
    995 	return (PF_DROP);
    996 
    997  drop:
    998 	REASON_SET(reason, PFRES_NORM);
    999 	if (r != NULL && r->log)
   1000 		PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL);
   1001 	return (PF_DROP);
   1002 
   1003  bad:
   1004 	DPFPRINTF(("dropping bad fragment\n"));
   1005 
   1006 	/* Free associated fragments */
   1007 	if (frag != NULL)
   1008 		pf_free_fragment(frag);
   1009 
   1010 	REASON_SET(reason, PFRES_FRAG);
   1011 	if (r != NULL && r->log)
   1012 		PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL);
   1013 
   1014 	return (PF_DROP);
   1015 }
   1016 
   1017 #ifdef INET6
   1018 int
   1019 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
   1020     u_short *reason)
   1021 {
   1022 	struct mbuf		*m = *m0;
   1023 	struct pf_rule		*r;
   1024 	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
   1025 	int			 off;
   1026 	struct ip6_ext		 ext;
   1027 	struct ip6_opt		 opt;
   1028 	struct ip6_opt_jumbo	 jumbo;
   1029 	struct ip6_frag		 frag;
   1030 	u_int32_t		 jumbolen = 0, plen;
   1031 	u_int16_t		 fragoff = 0;
   1032 	int			 optend;
   1033 	int			 ooff;
   1034 	u_int8_t		 proto;
   1035 	int			 terminal;
   1036 
   1037 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
   1038 	while (r != NULL) {
   1039 		r->evaluations++;
   1040 		if (r->kif != NULL &&
   1041 		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
   1042 			r = r->skip[PF_SKIP_IFP].ptr;
   1043 		else if (r->direction && r->direction != dir)
   1044 			r = r->skip[PF_SKIP_DIR].ptr;
   1045 		else if (r->af && r->af != AF_INET6)
   1046 			r = r->skip[PF_SKIP_AF].ptr;
   1047 #if 0 /* header chain! */
   1048 		else if (r->proto && r->proto != h->ip6_nxt)
   1049 			r = r->skip[PF_SKIP_PROTO].ptr;
   1050 #endif
   1051 		else if (PF_MISMATCHAW(&r->src.addr,
   1052 		    (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.not))
   1053 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
   1054 		else if (PF_MISMATCHAW(&r->dst.addr,
   1055 		    (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.not))
   1056 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
   1057 		else
   1058 			break;
   1059 	}
   1060 
   1061 	if (r == NULL)
   1062 		return (PF_PASS);
   1063 	else
   1064 		r->packets++;
   1065 
   1066 	/* Check for illegal packets */
   1067 	if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
   1068 		goto drop;
   1069 
   1070 	off = sizeof(struct ip6_hdr);
   1071 	proto = h->ip6_nxt;
   1072 	terminal = 0;
   1073 	do {
   1074 		switch (proto) {
   1075 		case IPPROTO_FRAGMENT:
   1076 			goto fragment;
   1077 			break;
   1078 		case IPPROTO_AH:
   1079 		case IPPROTO_ROUTING:
   1080 		case IPPROTO_DSTOPTS:
   1081 			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
   1082 			    NULL, AF_INET6))
   1083 				goto shortpkt;
   1084 			if (proto == IPPROTO_AH)
   1085 				off += (ext.ip6e_len + 2) * 4;
   1086 			else
   1087 				off += (ext.ip6e_len + 1) * 8;
   1088 			proto = ext.ip6e_nxt;
   1089 			break;
   1090 		case IPPROTO_HOPOPTS:
   1091 			if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
   1092 			    NULL, AF_INET6))
   1093 				goto shortpkt;
   1094 			optend = off + (ext.ip6e_len + 1) * 8;
   1095 			ooff = off + sizeof(ext);
   1096 			do {
   1097 				if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
   1098 				    sizeof(opt.ip6o_type), NULL, NULL,
   1099 				    AF_INET6))
   1100 					goto shortpkt;
   1101 				if (opt.ip6o_type == IP6OPT_PAD1) {
   1102 					ooff++;
   1103 					continue;
   1104 				}
   1105 				if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
   1106 				    NULL, NULL, AF_INET6))
   1107 					goto shortpkt;
   1108 				if (ooff + sizeof(opt) + opt.ip6o_len > optend)
   1109 					goto drop;
   1110 				switch (opt.ip6o_type) {
   1111 				case IP6OPT_JUMBO:
   1112 					if (h->ip6_plen != 0)
   1113 						goto drop;
   1114 					if (!pf_pull_hdr(m, ooff, &jumbo,
   1115 					    sizeof(jumbo), NULL, NULL,
   1116 					    AF_INET6))
   1117 						goto shortpkt;
   1118 					memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
   1119 					    sizeof(jumbolen));
   1120 					jumbolen = ntohl(jumbolen);
   1121 					if (jumbolen <= IPV6_MAXPACKET)
   1122 						goto drop;
   1123 					if (sizeof(struct ip6_hdr) + jumbolen !=
   1124 					    m->m_pkthdr.len)
   1125 						goto drop;
   1126 					break;
   1127 				default:
   1128 					break;
   1129 				}
   1130 				ooff += sizeof(opt) + opt.ip6o_len;
   1131 			} while (ooff < optend);
   1132 
   1133 			off = optend;
   1134 			proto = ext.ip6e_nxt;
   1135 			break;
   1136 		default:
   1137 			terminal = 1;
   1138 			break;
   1139 		}
   1140 	} while (!terminal);
   1141 
   1142 	/* jumbo payload option must be present, or plen > 0 */
   1143 	if (ntohs(h->ip6_plen) == 0)
   1144 		plen = jumbolen;
   1145 	else
   1146 		plen = ntohs(h->ip6_plen);
   1147 	if (plen == 0)
   1148 		goto drop;
   1149 	if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
   1150 		goto shortpkt;
   1151 
   1152 	/* Enforce a minimum ttl, may cause endless packet loops */
   1153 	if (r->min_ttl && h->ip6_hlim < r->min_ttl)
   1154 		h->ip6_hlim = r->min_ttl;
   1155 
   1156 	return (PF_PASS);
   1157 
   1158  fragment:
   1159 	if (ntohs(h->ip6_plen) == 0 || jumbolen)
   1160 		goto drop;
   1161 	plen = ntohs(h->ip6_plen);
   1162 
   1163 	if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
   1164 		goto shortpkt;
   1165 	fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
   1166 	if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
   1167 		goto badfrag;
   1168 
   1169 	/* do something about it */
   1170 	return (PF_PASS);
   1171 
   1172  shortpkt:
   1173 	REASON_SET(reason, PFRES_SHORT);
   1174 	if (r != NULL && r->log)
   1175 		PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL);
   1176 	return (PF_DROP);
   1177 
   1178  drop:
   1179 	REASON_SET(reason, PFRES_NORM);
   1180 	if (r != NULL && r->log)
   1181 		PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL);
   1182 	return (PF_DROP);
   1183 
   1184  badfrag:
   1185 	REASON_SET(reason, PFRES_FRAG);
   1186 	if (r != NULL && r->log)
   1187 		PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL);
   1188 	return (PF_DROP);
   1189 }
   1190 #endif
   1191 
   1192 int
   1193 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
   1194     int off, void *h, struct pf_pdesc *pd)
   1195 {
   1196 	struct pf_rule	*r, *rm = NULL;
   1197 	struct tcphdr	*th = pd->hdr.tcp;
   1198 	int		 rewrite = 0;
   1199 	u_short		 reason;
   1200 	u_int8_t	 flags;
   1201 	sa_family_t	 af = pd->af;
   1202 
   1203 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
   1204 	while (r != NULL) {
   1205 		r->evaluations++;
   1206 		if (r->kif != NULL &&
   1207 		    (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
   1208 			r = r->skip[PF_SKIP_IFP].ptr;
   1209 		else if (r->direction && r->direction != dir)
   1210 			r = r->skip[PF_SKIP_DIR].ptr;
   1211 		else if (r->af && r->af != af)
   1212 			r = r->skip[PF_SKIP_AF].ptr;
   1213 		else if (r->proto && r->proto != pd->proto)
   1214 			r = r->skip[PF_SKIP_PROTO].ptr;
   1215 		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
   1216 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
   1217 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
   1218 			    r->src.port[0], r->src.port[1], th->th_sport))
   1219 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
   1220 		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
   1221 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
   1222 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
   1223 			    r->dst.port[0], r->dst.port[1], th->th_dport))
   1224 			r = r->skip[PF_SKIP_DST_PORT].ptr;
   1225 		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
   1226 			    pf_osfp_fingerprint(pd, m, off, th),
   1227 			    r->os_fingerprint))
   1228 			r = TAILQ_NEXT(r, entries);
   1229 		else {
   1230 			rm = r;
   1231 			break;
   1232 		}
   1233 	}
   1234 
   1235 	if (rm == NULL)
   1236 		return (PF_PASS);
   1237 	else
   1238 		r->packets++;
   1239 
   1240 	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
   1241 		pd->flags |= PFDESC_TCP_NORM;
   1242 
   1243 	flags = th->th_flags;
   1244 	if (flags & TH_SYN) {
   1245 		/* Illegal packet */
   1246 		if (flags & TH_RST)
   1247 			goto tcp_drop;
   1248 
   1249 		if (flags & TH_FIN)
   1250 			flags &= ~TH_FIN;
   1251 	} else {
   1252 		/* Illegal packet */
   1253 		if (!(flags & (TH_ACK|TH_RST)))
   1254 			goto tcp_drop;
   1255 	}
   1256 
   1257 	if (!(flags & TH_ACK)) {
   1258 		/* These flags are only valid if ACK is set */
   1259 		if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
   1260 			goto tcp_drop;
   1261 	}
   1262 
   1263 	/* Check for illegal header length */
   1264 	if (th->th_off < (sizeof(struct tcphdr) >> 2))
   1265 		goto tcp_drop;
   1266 
   1267 	/* If flags changed, or reserved data set, then adjust */
   1268 	if (flags != th->th_flags || th->th_x2 != 0) {
   1269 		u_int16_t	ov, nv;
   1270 
   1271 		ov = *(u_int16_t *)(&th->th_ack + 1);
   1272 		th->th_flags = flags;
   1273 		th->th_x2 = 0;
   1274 		nv = *(u_int16_t *)(&th->th_ack + 1);
   1275 
   1276 		th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv);
   1277 		rewrite = 1;
   1278 	}
   1279 
   1280 	/* Remove urgent pointer, if TH_URG is not set */
   1281 	if (!(flags & TH_URG) && th->th_urp) {
   1282 		th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0);
   1283 		th->th_urp = 0;
   1284 		rewrite = 1;
   1285 	}
   1286 
   1287 	/* Process options */
   1288 	if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
   1289 		rewrite = 1;
   1290 
   1291 	/* copy back packet headers if we sanitized */
   1292 	if (rewrite)
   1293 		m_copyback(m, off, sizeof(*th), th);
   1294 
   1295 	return (PF_PASS);
   1296 
   1297  tcp_drop:
   1298 	REASON_SET(&reason, PFRES_NORM);
   1299 	if (rm != NULL && r->log)
   1300 		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL);
   1301 	return (PF_DROP);
   1302 }
   1303 
   1304 int
   1305 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
   1306     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
   1307 {
   1308 	u_int8_t hdr[60];
   1309 	u_int8_t *opt;
   1310 
   1311 	KASSERT(src->scrub == NULL);
   1312 
   1313 	src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
   1314 	if (src->scrub == NULL)
   1315 		return (1);
   1316 	bzero(src->scrub, sizeof(*src->scrub));
   1317 
   1318 	switch (pd->af) {
   1319 #ifdef INET
   1320 	case AF_INET: {
   1321 		struct ip *h = mtod(m, struct ip *);
   1322 		src->scrub->pfss_ttl = h->ip_ttl;
   1323 		break;
   1324 	}
   1325 #endif /* INET */
   1326 #ifdef INET6
   1327 	case AF_INET6: {
   1328 		struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
   1329 		src->scrub->pfss_ttl = h->ip6_hlim;
   1330 		break;
   1331 	}
   1332 #endif /* INET6 */
   1333 	}
   1334 
   1335 
   1336 	/*
   1337 	 * All normalizations below are only begun if we see the start of
   1338 	 * the connections.  They must all set an enabled bit in pfss_flags
   1339 	 */
   1340 	if ((th->th_flags & TH_SYN) == 0)
   1341 		return (0);
   1342 
   1343 
   1344 	if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
   1345 	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
   1346 		/* Diddle with TCP options */
   1347 		int hlen;
   1348 		opt = hdr + sizeof(struct tcphdr);
   1349 		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
   1350 		while (hlen >= TCPOLEN_TIMESTAMP) {
   1351 			switch (*opt) {
   1352 			case TCPOPT_EOL:	/* FALLTHROUGH */
   1353 			case TCPOPT_NOP:
   1354 				opt++;
   1355 				hlen--;
   1356 				break;
   1357 			case TCPOPT_TIMESTAMP:
   1358 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
   1359 					src->scrub->pfss_flags |=
   1360 					    PFSS_TIMESTAMP;
   1361 					src->scrub->pfss_ts_mod = arc4random();
   1362 				}
   1363 				/* FALLTHROUGH */
   1364 			default:
   1365 				hlen -= opt[1];
   1366 				opt += opt[1];
   1367 				break;
   1368 			}
   1369 		}
   1370 	}
   1371 
   1372 	return (0);
   1373 }
   1374 
   1375 void
   1376 pf_normalize_tcp_cleanup(struct pf_state *state)
   1377 {
   1378 	if (state->src.scrub)
   1379 		pool_put(&pf_state_scrub_pl, state->src.scrub);
   1380 	if (state->dst.scrub)
   1381 		pool_put(&pf_state_scrub_pl, state->dst.scrub);
   1382 
   1383 	/* Someday... flush the TCP segment reassembly descriptors. */
   1384 }
   1385 
   1386 int
   1387 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
   1388     u_short *reason, struct tcphdr *th, struct pf_state_peer *src,
   1389     struct pf_state_peer *dst, int *writeback)
   1390 {
   1391 	u_int8_t hdr[60];
   1392 	u_int8_t *opt;
   1393 	int copyback = 0;
   1394 
   1395 	KASSERT(src->scrub || dst->scrub);
   1396 
   1397 	/*
   1398 	 * Enforce the minimum TTL seen for this connection.  Negate a common
   1399 	 * technique to evade an intrusion detection system and confuse
   1400 	 * firewall state code.
   1401 	 */
   1402 	switch (pd->af) {
   1403 #ifdef INET
   1404 	case AF_INET: {
   1405 		if (src->scrub) {
   1406 			struct ip *h = mtod(m, struct ip *);
   1407 			if (h->ip_ttl > src->scrub->pfss_ttl)
   1408 				src->scrub->pfss_ttl = h->ip_ttl;
   1409 			h->ip_ttl = src->scrub->pfss_ttl;
   1410 		}
   1411 		break;
   1412 	}
   1413 #endif /* INET */
   1414 #ifdef INET6
   1415 	case AF_INET6: {
   1416 		if (src->scrub) {
   1417 			struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
   1418 			if (h->ip6_hlim > src->scrub->pfss_ttl)
   1419 				src->scrub->pfss_ttl = h->ip6_hlim;
   1420 			h->ip6_hlim = src->scrub->pfss_ttl;
   1421 		}
   1422 		break;
   1423 	}
   1424 #endif /* INET6 */
   1425 	}
   1426 
   1427 	if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
   1428 	    ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
   1429 	    (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
   1430 	    pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
   1431 		/* Diddle with TCP options */
   1432 		int hlen;
   1433 		opt = hdr + sizeof(struct tcphdr);
   1434 		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
   1435 		while (hlen >= TCPOLEN_TIMESTAMP) {
   1436 			switch (*opt) {
   1437 			case TCPOPT_EOL:	/* FALLTHROUGH */
   1438 			case TCPOPT_NOP:
   1439 				opt++;
   1440 				hlen--;
   1441 				break;
   1442 			case TCPOPT_TIMESTAMP:
   1443 				/* Modulate the timestamps.  Can be used for
   1444 				 * NAT detection, OS uptime determination or
   1445 				 * reboot detection.
   1446 				 */
   1447 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
   1448 					u_int32_t ts_value;
   1449 					if (src->scrub &&
   1450 					    (src->scrub->pfss_flags &
   1451 					    PFSS_TIMESTAMP)) {
   1452 						memcpy(&ts_value, &opt[2],
   1453 						    sizeof(u_int32_t));
   1454 						ts_value = htonl(ntohl(ts_value)
   1455 						    + src->scrub->pfss_ts_mod);
   1456 						pf_change_a(&opt[2],
   1457 						    &th->th_sum, ts_value, 0);
   1458 						copyback = 1;
   1459 					}
   1460 
   1461 					/* Modulate TS reply iff valid (!0) */
   1462 					memcpy(&ts_value, &opt[6],
   1463 					    sizeof(u_int32_t));
   1464 					if (ts_value && dst->scrub &&
   1465 					    (dst->scrub->pfss_flags &
   1466 					    PFSS_TIMESTAMP)) {
   1467 						ts_value = htonl(ntohl(ts_value)
   1468 						    - dst->scrub->pfss_ts_mod);
   1469 						pf_change_a(&opt[6],
   1470 						    &th->th_sum, ts_value, 0);
   1471 						copyback = 1;
   1472 					}
   1473 				}
   1474 				/* FALLTHROUGH */
   1475 			default:
   1476 				hlen -= opt[1];
   1477 				opt += opt[1];
   1478 				break;
   1479 			}
   1480 		}
   1481 		if (copyback) {
   1482 			/* Copyback the options, caller copys back header */
   1483 			*writeback = 1;
   1484 			m_copyback(m, off + sizeof(struct tcphdr),
   1485 			    (th->th_off << 2) - sizeof(struct tcphdr), hdr +
   1486 			    sizeof(struct tcphdr));
   1487 		}
   1488 	}
   1489 
   1490 
   1491 	/* I have a dream....  TCP segment reassembly.... */
   1492 	return (0);
   1493 }
   1494 int
   1495 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
   1496     int off)
   1497 {
   1498 	u_int16_t	*mss;
   1499 	int		 thoff;
   1500 	int		 opt, cnt, optlen = 0;
   1501 	int		 rewrite = 0;
   1502 	u_char		*optp;
   1503 
   1504 	thoff = th->th_off << 2;
   1505 	cnt = thoff - sizeof(struct tcphdr);
   1506 	optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr);
   1507 
   1508 	for (; cnt > 0; cnt -= optlen, optp += optlen) {
   1509 		opt = optp[0];
   1510 		if (opt == TCPOPT_EOL)
   1511 			break;
   1512 		if (opt == TCPOPT_NOP)
   1513 			optlen = 1;
   1514 		else {
   1515 			if (cnt < 2)
   1516 				break;
   1517 			optlen = optp[1];
   1518 			if (optlen < 2 || optlen > cnt)
   1519 				break;
   1520 		}
   1521 		switch (opt) {
   1522 		case TCPOPT_MAXSEG:
   1523 			mss = (u_int16_t *)(optp + 2);
   1524 			if ((ntohs(*mss)) > r->max_mss) {
   1525 				th->th_sum = pf_cksum_fixup(th->th_sum,
   1526 				    *mss, htons(r->max_mss));
   1527 				*mss = htons(r->max_mss);
   1528 				rewrite = 1;
   1529 			}
   1530 			break;
   1531 		default:
   1532 			break;
   1533 		}
   1534 	}
   1535 
   1536 	return (rewrite);
   1537 }
   1538