pf_norm.c revision 1.15.8.1 1 /* $NetBSD: pf_norm.c,v 1.15.8.1 2007/03/13 16:51:29 ad Exp $ */
2 /* $OpenBSD: pf_norm.c,v 1.97 2004/09/21 16:59:12 aaron Exp $ */
3
4 /*
5 * Copyright 2001 Niels Provos <provos (at) citi.umich.edu>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #ifdef _KERNEL_OPT
30 #include "opt_inet.h"
31 #endif
32
33 #include "pflog.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/mbuf.h>
38 #include <sys/filio.h>
39 #include <sys/fcntl.h>
40 #include <sys/socket.h>
41 #include <sys/kernel.h>
42 #include <sys/time.h>
43 #include <sys/pool.h>
44
45 #ifdef __OpenBSD__
46 #include <dev/rndvar.h>
47 #else
48 #include <sys/rnd.h>
49 #endif
50 #include <net/if.h>
51 #include <net/if_types.h>
52 #include <net/bpf.h>
53 #include <net/route.h>
54 #include <net/if_pflog.h>
55
56 #include <netinet/in.h>
57 #include <netinet/in_var.h>
58 #include <netinet/in_systm.h>
59 #include <netinet/ip.h>
60 #include <netinet/ip_var.h>
61 #include <netinet/tcp.h>
62 #include <netinet/tcp_seq.h>
63 #include <netinet/udp.h>
64 #include <netinet/ip_icmp.h>
65
66 #ifdef INET6
67 #include <netinet/ip6.h>
68 #endif /* INET6 */
69
70 #include <net/pfvar.h>
71
72 struct pf_frent {
73 LIST_ENTRY(pf_frent) fr_next;
74 struct ip *fr_ip;
75 struct mbuf *fr_m;
76 };
77
78 struct pf_frcache {
79 LIST_ENTRY(pf_frcache) fr_next;
80 uint16_t fr_off;
81 uint16_t fr_end;
82 };
83
84 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
85 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
86 #define PFFRAG_DROP 0x0004 /* Drop all fragments */
87 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
88
89 struct pf_fragment {
90 RB_ENTRY(pf_fragment) fr_entry;
91 TAILQ_ENTRY(pf_fragment) frag_next;
92 struct in_addr fr_src;
93 struct in_addr fr_dst;
94 u_int8_t fr_p; /* protocol of this fragment */
95 u_int8_t fr_flags; /* status flags */
96 u_int16_t fr_id; /* fragment id for reassemble */
97 u_int16_t fr_max; /* fragment data max */
98 u_int32_t fr_timeout;
99 #define fr_queue fr_u.fru_queue
100 #define fr_cache fr_u.fru_cache
101 union {
102 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */
103 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */
104 } fr_u;
105 };
106
107 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
108 TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
109
110 static __inline int pf_frag_compare(struct pf_fragment *,
111 struct pf_fragment *);
112 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree;
113 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
114 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
115
116 /* Private prototypes */
117 void pf_ip2key(struct pf_fragment *, struct ip *);
118 void pf_remove_fragment(struct pf_fragment *);
119 void pf_flush_fragments(void);
120 void pf_free_fragment(struct pf_fragment *);
121 struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *);
122 struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
123 struct pf_frent *, int);
124 struct mbuf *pf_fragcache(struct mbuf **, struct ip*,
125 struct pf_fragment **, int, int, int *);
126 int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
127 struct tcphdr *, int);
128
129 #define DPFPRINTF(x) do { \
130 if (pf_status.debug >= PF_DEBUG_MISC) { \
131 printf("%s: ", __func__); \
132 printf x ; \
133 } \
134 } while(0)
135
136 /* Globals */
137 struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
138 struct pool pf_state_scrub_pl;
139 int pf_nfrents, pf_ncache;
140
141 void
142 pf_normalize_init(void)
143 {
144 #ifdef __NetBSD__
145 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
146 NULL, IPL_SOFTNET);
147 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
148 NULL, IPL_SOFTNET);
149 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
150 "pffrcache", NULL, IPL_SOFTNET);
151 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
152 NULL, IPL_SOFTNET);
153 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
154 "pfstscr", NULL, IPL_SOFTNET);
155 #else
156 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
157 NULL);
158 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
159 NULL);
160 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
161 "pffrcache", NULL);
162 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
163 NULL);
164 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
165 "pfstscr", NULL);
166 #endif
167
168 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
169 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
170 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
171 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
172
173 TAILQ_INIT(&pf_fragqueue);
174 TAILQ_INIT(&pf_cachequeue);
175 }
176
177 #ifdef _LKM
178 void
179 pf_normalize_destroy(void)
180 {
181 pool_destroy(&pf_state_scrub_pl);
182 pool_destroy(&pf_cent_pl);
183 pool_destroy(&pf_cache_pl);
184 pool_destroy(&pf_frag_pl);
185 pool_destroy(&pf_frent_pl);
186 }
187 #endif
188
189 static __inline int
190 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
191 {
192 int diff;
193
194 if ((diff = a->fr_id - b->fr_id))
195 return (diff);
196 else if ((diff = a->fr_p - b->fr_p))
197 return (diff);
198 else if (a->fr_src.s_addr < b->fr_src.s_addr)
199 return (-1);
200 else if (a->fr_src.s_addr > b->fr_src.s_addr)
201 return (1);
202 else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
203 return (-1);
204 else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
205 return (1);
206 return (0);
207 }
208
209 void
210 pf_purge_expired_fragments(void)
211 {
212 struct pf_fragment *frag;
213 u_int32_t expire = time_second -
214 pf_default_rule.timeout[PFTM_FRAG];
215
216 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
217 KASSERT(BUFFER_FRAGMENTS(frag));
218 if (frag->fr_timeout > expire)
219 break;
220
221 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
222 pf_free_fragment(frag);
223 }
224
225 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
226 KASSERT(!BUFFER_FRAGMENTS(frag));
227 if (frag->fr_timeout > expire)
228 break;
229
230 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
231 pf_free_fragment(frag);
232 KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
233 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
234 }
235 }
236
237 /*
238 * Try to flush old fragments to make space for new ones
239 */
240
241 void
242 pf_flush_fragments(void)
243 {
244 struct pf_fragment *frag;
245 int goal;
246
247 goal = pf_nfrents * 9 / 10;
248 DPFPRINTF(("trying to free > %d frents\n",
249 pf_nfrents - goal));
250 while (goal < pf_nfrents) {
251 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
252 if (frag == NULL)
253 break;
254 pf_free_fragment(frag);
255 }
256
257
258 goal = pf_ncache * 9 / 10;
259 DPFPRINTF(("trying to free > %d cache entries\n",
260 pf_ncache - goal));
261 while (goal < pf_ncache) {
262 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
263 if (frag == NULL)
264 break;
265 pf_free_fragment(frag);
266 }
267 }
268
269 /* Frees the fragments and all associated entries */
270
271 void
272 pf_free_fragment(struct pf_fragment *frag)
273 {
274 struct pf_frent *frent;
275 struct pf_frcache *frcache;
276
277 /* Free all fragments */
278 if (BUFFER_FRAGMENTS(frag)) {
279 for (frent = LIST_FIRST(&frag->fr_queue); frent;
280 frent = LIST_FIRST(&frag->fr_queue)) {
281 LIST_REMOVE(frent, fr_next);
282
283 m_freem(frent->fr_m);
284 pool_put(&pf_frent_pl, frent);
285 pf_nfrents--;
286 }
287 } else {
288 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
289 frcache = LIST_FIRST(&frag->fr_cache)) {
290 LIST_REMOVE(frcache, fr_next);
291
292 KASSERT(LIST_EMPTY(&frag->fr_cache) ||
293 LIST_FIRST(&frag->fr_cache)->fr_off >
294 frcache->fr_end);
295
296 pool_put(&pf_cent_pl, frcache);
297 pf_ncache--;
298 }
299 }
300
301 pf_remove_fragment(frag);
302 }
303
304 void
305 pf_ip2key(struct pf_fragment *key, struct ip *ip)
306 {
307 key->fr_p = ip->ip_p;
308 key->fr_id = ip->ip_id;
309 key->fr_src.s_addr = ip->ip_src.s_addr;
310 key->fr_dst.s_addr = ip->ip_dst.s_addr;
311 }
312
313 struct pf_fragment *
314 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
315 {
316 struct pf_fragment key;
317 struct pf_fragment *frag;
318
319 pf_ip2key(&key, ip);
320
321 frag = RB_FIND(pf_frag_tree, tree, &key);
322 if (frag != NULL) {
323 /* XXX Are we sure we want to update the timeout? */
324 frag->fr_timeout = time_second;
325 if (BUFFER_FRAGMENTS(frag)) {
326 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
327 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
328 } else {
329 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
330 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
331 }
332 }
333
334 return (frag);
335 }
336
337 /* Removes a fragment from the fragment queue and frees the fragment */
338
339 void
340 pf_remove_fragment(struct pf_fragment *frag)
341 {
342 if (BUFFER_FRAGMENTS(frag)) {
343 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
344 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
345 pool_put(&pf_frag_pl, frag);
346 } else {
347 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
348 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
349 pool_put(&pf_cache_pl, frag);
350 }
351 }
352
353 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
354 struct mbuf *
355 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
356 struct pf_frent *frent, int mff)
357 {
358 struct mbuf *m = *m0, *m2;
359 struct pf_frent *frea, *next;
360 struct pf_frent *frep = NULL;
361 struct ip *ip = frent->fr_ip;
362 int hlen = ip->ip_hl << 2;
363 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
364 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
365 u_int16_t max = ip_len + off;
366
367 KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
368
369 /* Strip off ip header */
370 m->m_data += hlen;
371 m->m_len -= hlen;
372
373 /* Create a new reassembly queue for this packet */
374 if (*frag == NULL) {
375 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
376 if (*frag == NULL) {
377 pf_flush_fragments();
378 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
379 if (*frag == NULL)
380 goto drop_fragment;
381 }
382
383 (*frag)->fr_flags = 0;
384 (*frag)->fr_max = 0;
385 (*frag)->fr_src = frent->fr_ip->ip_src;
386 (*frag)->fr_dst = frent->fr_ip->ip_dst;
387 (*frag)->fr_p = frent->fr_ip->ip_p;
388 (*frag)->fr_id = frent->fr_ip->ip_id;
389 (*frag)->fr_timeout = time_second;
390 LIST_INIT(&(*frag)->fr_queue);
391
392 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
393 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
394
395 /* We do not have a previous fragment */
396 frep = NULL;
397 goto insert;
398 }
399
400 /*
401 * Find a fragment after the current one:
402 * - off contains the real shifted offset.
403 */
404 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
405 if (FR_IP_OFF(frea) > off)
406 break;
407 frep = frea;
408 }
409
410 KASSERT(frep != NULL || frea != NULL);
411
412 if (frep != NULL &&
413 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
414 4 > off)
415 {
416 u_int16_t precut;
417
418 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
419 frep->fr_ip->ip_hl * 4 - off;
420 if (precut >= ip_len)
421 goto drop_fragment;
422 m_adj(frent->fr_m, precut);
423 DPFPRINTF(("overlap -%d\n", precut));
424 /* Enforce 8 byte boundaries */
425 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
426 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
427 ip_len -= precut;
428 ip->ip_len = htons(ip_len);
429 }
430
431 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
432 frea = next)
433 {
434 u_int16_t aftercut;
435
436 aftercut = ip_len + off - FR_IP_OFF(frea);
437 DPFPRINTF(("adjust overlap %d\n", aftercut));
438 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
439 * 4)
440 {
441 frea->fr_ip->ip_len =
442 htons(ntohs(frea->fr_ip->ip_len) - aftercut);
443 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
444 (aftercut >> 3));
445 m_adj(frea->fr_m, aftercut);
446 break;
447 }
448
449 /* This fragment is completely overlapped, loose it */
450 next = LIST_NEXT(frea, fr_next);
451 m_freem(frea->fr_m);
452 LIST_REMOVE(frea, fr_next);
453 pool_put(&pf_frent_pl, frea);
454 pf_nfrents--;
455 }
456
457 insert:
458 /* Update maximum data size */
459 if ((*frag)->fr_max < max)
460 (*frag)->fr_max = max;
461 /* This is the last segment */
462 if (!mff)
463 (*frag)->fr_flags |= PFFRAG_SEENLAST;
464
465 if (frep == NULL)
466 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
467 else
468 LIST_INSERT_AFTER(frep, frent, fr_next);
469
470 /* Check if we are completely reassembled */
471 if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
472 return (NULL);
473
474 /* Check if we have all the data */
475 off = 0;
476 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
477 next = LIST_NEXT(frep, fr_next);
478
479 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
480 if (off < (*frag)->fr_max &&
481 (next == NULL || FR_IP_OFF(next) != off))
482 {
483 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
484 off, next == NULL ? -1 : FR_IP_OFF(next),
485 (*frag)->fr_max));
486 return (NULL);
487 }
488 }
489 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
490 if (off < (*frag)->fr_max)
491 return (NULL);
492
493 /* We have all the data */
494 frent = LIST_FIRST(&(*frag)->fr_queue);
495 KASSERT(frent != NULL);
496 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
497 DPFPRINTF(("drop: too big: %d\n", off));
498 pf_free_fragment(*frag);
499 *frag = NULL;
500 return (NULL);
501 }
502 next = LIST_NEXT(frent, fr_next);
503
504 /* Magic from ip_input */
505 ip = frent->fr_ip;
506 m = frent->fr_m;
507 m2 = m->m_next;
508 m->m_next = NULL;
509 m_cat(m, m2);
510 pool_put(&pf_frent_pl, frent);
511 pf_nfrents--;
512 for (frent = next; frent != NULL; frent = next) {
513 next = LIST_NEXT(frent, fr_next);
514
515 m2 = frent->fr_m;
516 pool_put(&pf_frent_pl, frent);
517 pf_nfrents--;
518 m_cat(m, m2);
519 }
520
521 ip->ip_src = (*frag)->fr_src;
522 ip->ip_dst = (*frag)->fr_dst;
523
524 /* Remove from fragment queue */
525 pf_remove_fragment(*frag);
526 *frag = NULL;
527
528 hlen = ip->ip_hl << 2;
529 ip->ip_len = htons(off + hlen);
530 m->m_len += hlen;
531 m->m_data -= hlen;
532
533 /* some debugging cruft by sklower, below, will go away soon */
534 /* XXX this should be done elsewhere */
535 if (m->m_flags & M_PKTHDR) {
536 int plen = 0;
537 for (m2 = m; m2; m2 = m2->m_next)
538 plen += m2->m_len;
539 m->m_pkthdr.len = plen;
540 #if defined(__NetBSD__)
541 m->m_pkthdr.csum_flags = 0;
542 #endif /* defined(__NetBSD__) */
543 }
544
545 DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
546 return (m);
547
548 drop_fragment:
549 /* Oops - fail safe - drop packet */
550 pool_put(&pf_frent_pl, frent);
551 pf_nfrents--;
552 m_freem(m);
553 return (NULL);
554 }
555
556 struct mbuf *
557 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
558 int drop, int *nomem)
559 {
560 struct mbuf *m = *m0;
561 struct pf_frcache *frp, *fra, *cur = NULL;
562 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
563 u_int16_t off = ntohs(h->ip_off) << 3;
564 u_int16_t max = ip_len + off;
565 int hosed = 0;
566
567 KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
568
569 /* Create a new range queue for this packet */
570 if (*frag == NULL) {
571 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
572 if (*frag == NULL) {
573 pf_flush_fragments();
574 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
575 if (*frag == NULL)
576 goto no_mem;
577 }
578
579 /* Get an entry for the queue */
580 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
581 if (cur == NULL) {
582 pool_put(&pf_cache_pl, *frag);
583 *frag = NULL;
584 goto no_mem;
585 }
586 pf_ncache++;
587
588 (*frag)->fr_flags = PFFRAG_NOBUFFER;
589 (*frag)->fr_max = 0;
590 (*frag)->fr_src = h->ip_src;
591 (*frag)->fr_dst = h->ip_dst;
592 (*frag)->fr_p = h->ip_p;
593 (*frag)->fr_id = h->ip_id;
594 (*frag)->fr_timeout = time_second;
595
596 cur->fr_off = off;
597 cur->fr_end = max;
598 LIST_INIT(&(*frag)->fr_cache);
599 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
600
601 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
602 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
603
604 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
605
606 goto pass;
607 }
608
609 /*
610 * Find a fragment after the current one:
611 * - off contains the real shifted offset.
612 */
613 frp = NULL;
614 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
615 if (fra->fr_off > off)
616 break;
617 frp = fra;
618 }
619
620 KASSERT(frp != NULL || fra != NULL);
621
622 if (frp != NULL) {
623 int precut;
624
625 precut = frp->fr_end - off;
626 if (precut >= ip_len) {
627 /* Fragment is entirely a duplicate */
628 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
629 h->ip_id, frp->fr_off, frp->fr_end, off, max));
630 goto drop_fragment;
631 }
632 if (precut == 0) {
633 /* They are adjacent. Fixup cache entry */
634 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
635 h->ip_id, frp->fr_off, frp->fr_end, off, max));
636 frp->fr_end = max;
637 } else if (precut > 0) {
638 /* The first part of this payload overlaps with a
639 * fragment that has already been passed.
640 * Need to trim off the first part of the payload.
641 * But to do so easily, we need to create another
642 * mbuf to throw the original header into.
643 */
644
645 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
646 h->ip_id, precut, frp->fr_off, frp->fr_end, off,
647 max));
648
649 off += precut;
650 max -= precut;
651 /* Update the previous frag to encompass this one */
652 frp->fr_end = max;
653
654 if (!drop) {
655 /* XXX Optimization opportunity
656 * This is a very heavy way to trim the payload.
657 * we could do it much faster by diddling mbuf
658 * internals but that would be even less legible
659 * than this mbuf magic. For my next trick,
660 * I'll pull a rabbit out of my laptop.
661 */
662 *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT);
663 if (*m0 == NULL)
664 goto no_mem;
665 KASSERT((*m0)->m_next == NULL);
666 m_adj(m, precut + (h->ip_hl << 2));
667 m_cat(*m0, m);
668 m = *m0;
669 if (m->m_flags & M_PKTHDR) {
670 int plen = 0;
671 struct mbuf *t;
672 for (t = m; t; t = t->m_next)
673 plen += t->m_len;
674 m->m_pkthdr.len = plen;
675 }
676
677
678 h = mtod(m, struct ip *);
679
680
681 KASSERT((int)m->m_len ==
682 ntohs(h->ip_len) - precut);
683 h->ip_off = htons(ntohs(h->ip_off) +
684 (precut >> 3));
685 h->ip_len = htons(ntohs(h->ip_len) - precut);
686 } else {
687 hosed++;
688 }
689 } else {
690 /* There is a gap between fragments */
691
692 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
693 h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
694 max));
695
696 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
697 if (cur == NULL)
698 goto no_mem;
699 pf_ncache++;
700
701 cur->fr_off = off;
702 cur->fr_end = max;
703 LIST_INSERT_AFTER(frp, cur, fr_next);
704 }
705 }
706
707 if (fra != NULL) {
708 int aftercut;
709 int merge = 0;
710
711 aftercut = max - fra->fr_off;
712 if (aftercut == 0) {
713 /* Adjacent fragments */
714 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
715 h->ip_id, off, max, fra->fr_off, fra->fr_end));
716 fra->fr_off = off;
717 merge = 1;
718 } else if (aftercut > 0) {
719 /* Need to chop off the tail of this fragment */
720 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
721 h->ip_id, aftercut, off, max, fra->fr_off,
722 fra->fr_end));
723 fra->fr_off = off;
724 max -= aftercut;
725
726 merge = 1;
727
728 if (!drop) {
729 m_adj(m, -aftercut);
730 if (m->m_flags & M_PKTHDR) {
731 int plen = 0;
732 struct mbuf *t;
733 for (t = m; t; t = t->m_next)
734 plen += t->m_len;
735 m->m_pkthdr.len = plen;
736 }
737 h = mtod(m, struct ip *);
738 KASSERT((int)m->m_len ==
739 ntohs(h->ip_len) - aftercut);
740 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
741 } else {
742 hosed++;
743 }
744 } else if (frp == NULL) {
745 /* There is a gap between fragments */
746 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
747 h->ip_id, -aftercut, off, max, fra->fr_off,
748 fra->fr_end));
749
750 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
751 if (cur == NULL)
752 goto no_mem;
753 pf_ncache++;
754
755 cur->fr_off = off;
756 cur->fr_end = max;
757 LIST_INSERT_BEFORE(fra, cur, fr_next);
758 }
759
760
761 /* Need to glue together two separate fragment descriptors */
762 if (merge) {
763 if (cur && fra->fr_off <= cur->fr_end) {
764 /* Need to merge in a previous 'cur' */
765 DPFPRINTF(("fragcache[%d]: adjacent(merge "
766 "%d-%d) %d-%d (%d-%d)\n",
767 h->ip_id, cur->fr_off, cur->fr_end, off,
768 max, fra->fr_off, fra->fr_end));
769 fra->fr_off = cur->fr_off;
770 LIST_REMOVE(cur, fr_next);
771 pool_put(&pf_cent_pl, cur);
772 pf_ncache--;
773 cur = NULL;
774
775 } else if (frp && fra->fr_off <= frp->fr_end) {
776 /* Need to merge in a modified 'frp' */
777 KASSERT(cur == NULL);
778 DPFPRINTF(("fragcache[%d]: adjacent(merge "
779 "%d-%d) %d-%d (%d-%d)\n",
780 h->ip_id, frp->fr_off, frp->fr_end, off,
781 max, fra->fr_off, fra->fr_end));
782 fra->fr_off = frp->fr_off;
783 LIST_REMOVE(frp, fr_next);
784 pool_put(&pf_cent_pl, frp);
785 pf_ncache--;
786 frp = NULL;
787
788 }
789 }
790 }
791
792 if (hosed) {
793 /*
794 * We must keep tracking the overall fragment even when
795 * we're going to drop it anyway so that we know when to
796 * free the overall descriptor. Thus we drop the frag late.
797 */
798 goto drop_fragment;
799 }
800
801
802 pass:
803 /* Update maximum data size */
804 if ((*frag)->fr_max < max)
805 (*frag)->fr_max = max;
806
807 /* This is the last segment */
808 if (!mff)
809 (*frag)->fr_flags |= PFFRAG_SEENLAST;
810
811 /* Check if we are completely reassembled */
812 if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
813 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
814 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
815 /* Remove from fragment queue */
816 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
817 (*frag)->fr_max));
818 pf_free_fragment(*frag);
819 *frag = NULL;
820 }
821
822 return (m);
823
824 no_mem:
825 *nomem = 1;
826
827 /* Still need to pay attention to !IP_MF */
828 if (!mff && *frag != NULL)
829 (*frag)->fr_flags |= PFFRAG_SEENLAST;
830
831 m_freem(m);
832 return (NULL);
833
834 drop_fragment:
835
836 /* Still need to pay attention to !IP_MF */
837 if (!mff && *frag != NULL)
838 (*frag)->fr_flags |= PFFRAG_SEENLAST;
839
840 if (drop) {
841 /* This fragment has been deemed bad. Don't reass */
842 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
843 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
844 h->ip_id));
845 (*frag)->fr_flags |= PFFRAG_DROP;
846 }
847
848 m_freem(m);
849 return (NULL);
850 }
851
852 int
853 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
854 struct pf_pdesc *pd)
855 {
856 struct mbuf *m = *m0;
857 struct pf_rule *r;
858 struct pf_frent *frent;
859 struct pf_fragment *frag = NULL;
860 struct ip *h = mtod(m, struct ip *);
861 int mff = (ntohs(h->ip_off) & IP_MF);
862 int hlen = h->ip_hl << 2;
863 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
864 u_int16_t max;
865 int ip_len;
866 int ip_off;
867
868 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
869 while (r != NULL) {
870 r->evaluations++;
871 if (r->kif != NULL &&
872 (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
873 r = r->skip[PF_SKIP_IFP].ptr;
874 else if (r->direction && r->direction != dir)
875 r = r->skip[PF_SKIP_DIR].ptr;
876 else if (r->af && r->af != AF_INET)
877 r = r->skip[PF_SKIP_AF].ptr;
878 else if (r->proto && r->proto != h->ip_p)
879 r = r->skip[PF_SKIP_PROTO].ptr;
880 else if (PF_MISMATCHAW(&r->src.addr,
881 (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.neg))
882 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
883 else if (PF_MISMATCHAW(&r->dst.addr,
884 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg))
885 r = r->skip[PF_SKIP_DST_ADDR].ptr;
886 else
887 break;
888 }
889
890 if (r == NULL)
891 return (PF_PASS);
892 else
893 r->packets++;
894
895 /* Check for illegal packets */
896 if (hlen < (int)sizeof(struct ip))
897 goto drop;
898
899 if (hlen > ntohs(h->ip_len))
900 goto drop;
901
902 /* Clear IP_DF if the rule uses the no-df option */
903 if (r->rule_flag & PFRULE_NODF)
904 h->ip_off &= htons(~IP_DF);
905
906 /* We will need other tests here */
907 if (!fragoff && !mff)
908 goto no_fragment;
909
910 /* We're dealing with a fragment now. Don't allow fragments
911 * with IP_DF to enter the cache. If the flag was cleared by
912 * no-df above, fine. Otherwise drop it.
913 */
914 if (h->ip_off & htons(IP_DF)) {
915 DPFPRINTF(("IP_DF\n"));
916 goto bad;
917 }
918
919 ip_len = ntohs(h->ip_len) - hlen;
920 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
921
922 /* All fragments are 8 byte aligned */
923 if (mff && (ip_len & 0x7)) {
924 DPFPRINTF(("mff and %d\n", ip_len));
925 goto bad;
926 }
927
928 /* Respect maximum length */
929 if (fragoff + ip_len > IP_MAXPACKET) {
930 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
931 goto bad;
932 }
933 max = fragoff + ip_len;
934
935 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
936 /* Fully buffer all of the fragments */
937
938 frag = pf_find_fragment(h, &pf_frag_tree);
939
940 /* Check if we saw the last fragment already */
941 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
942 max > frag->fr_max)
943 goto bad;
944
945 /* Get an entry for the fragment queue */
946 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
947 if (frent == NULL) {
948 REASON_SET(reason, PFRES_MEMORY);
949 return (PF_DROP);
950 }
951 pf_nfrents++;
952 frent->fr_ip = h;
953 frent->fr_m = m;
954
955 /* Might return a completely reassembled mbuf, or NULL */
956 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
957 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
958
959 if (m == NULL)
960 return (PF_DROP);
961
962 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
963 goto drop;
964
965 h = mtod(m, struct ip *);
966 } else {
967 /* non-buffering fragment cache (drops or masks overlaps) */
968 int nomem = 0;
969
970 if (dir == PF_OUT) {
971 if (m_tag_find(m, PACKET_TAG_PF_FRAGCACHE, NULL) !=
972 NULL) {
973 /* Already passed the fragment cache in the
974 * input direction. If we continued, it would
975 * appear to be a dup and would be dropped.
976 */
977 goto fragment_pass;
978 }
979 }
980
981 frag = pf_find_fragment(h, &pf_cache_tree);
982
983 /* Check if we saw the last fragment already */
984 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
985 max > frag->fr_max) {
986 if (r->rule_flag & PFRULE_FRAGDROP)
987 frag->fr_flags |= PFFRAG_DROP;
988 goto bad;
989 }
990
991 *m0 = m = pf_fragcache(m0, h, &frag, mff,
992 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
993 if (m == NULL) {
994 if (nomem)
995 goto no_mem;
996 goto drop;
997 }
998
999 if (dir == PF_IN) {
1000 struct m_tag *mtag;
1001
1002 mtag = m_tag_get(PACKET_TAG_PF_FRAGCACHE, 0, M_NOWAIT);
1003 if (mtag == NULL)
1004 goto no_mem;
1005 m_tag_prepend(m, mtag);
1006 }
1007 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1008 goto drop;
1009 goto fragment_pass;
1010 }
1011
1012 no_fragment:
1013 /* At this point, only IP_DF is allowed in ip_off */
1014 h->ip_off &= htons(IP_DF);
1015
1016 /* Enforce a minimum ttl, may cause endless packet loops */
1017 if (r->min_ttl && h->ip_ttl < r->min_ttl)
1018 h->ip_ttl = r->min_ttl;
1019
1020 if (r->rule_flag & PFRULE_RANDOMID) {
1021 u_int16_t ip_id = h->ip_id;
1022
1023 h->ip_id = ip_randomid();
1024 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
1025 }
1026 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1027 pd->flags |= PFDESC_IP_REAS;
1028
1029 return (PF_PASS);
1030
1031 fragment_pass:
1032 /* Enforce a minimum ttl, may cause endless packet loops */
1033 if (r->min_ttl && h->ip_ttl < r->min_ttl)
1034 h->ip_ttl = r->min_ttl;
1035 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1036 pd->flags |= PFDESC_IP_REAS;
1037 return (PF_PASS);
1038
1039 no_mem:
1040 REASON_SET(reason, PFRES_MEMORY);
1041 if (r != NULL && r->log)
1042 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL);
1043 return (PF_DROP);
1044
1045 drop:
1046 REASON_SET(reason, PFRES_NORM);
1047 if (r != NULL && r->log)
1048 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL);
1049 return (PF_DROP);
1050
1051 bad:
1052 DPFPRINTF(("dropping bad fragment\n"));
1053
1054 /* Free associated fragments */
1055 if (frag != NULL)
1056 pf_free_fragment(frag);
1057
1058 REASON_SET(reason, PFRES_FRAG);
1059 if (r != NULL && r->log)
1060 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL);
1061
1062 return (PF_DROP);
1063 }
1064
1065 #ifdef INET6
1066 int
1067 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1068 u_short *reason, struct pf_pdesc *pd)
1069 {
1070 struct mbuf *m = *m0;
1071 struct pf_rule *r;
1072 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1073 int off;
1074 struct ip6_ext ext;
1075 struct ip6_opt opt;
1076 struct ip6_opt_jumbo jumbo;
1077 struct ip6_frag frag;
1078 u_int32_t jumbolen = 0, plen;
1079 u_int16_t fragoff = 0;
1080 int optend;
1081 int ooff;
1082 u_int8_t proto;
1083 int terminal;
1084
1085 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1086 while (r != NULL) {
1087 r->evaluations++;
1088 if (r->kif != NULL &&
1089 (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
1090 r = r->skip[PF_SKIP_IFP].ptr;
1091 else if (r->direction && r->direction != dir)
1092 r = r->skip[PF_SKIP_DIR].ptr;
1093 else if (r->af && r->af != AF_INET6)
1094 r = r->skip[PF_SKIP_AF].ptr;
1095 #if 0 /* header chain! */
1096 else if (r->proto && r->proto != h->ip6_nxt)
1097 r = r->skip[PF_SKIP_PROTO].ptr;
1098 #endif
1099 else if (PF_MISMATCHAW(&r->src.addr,
1100 (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.neg))
1101 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1102 else if (PF_MISMATCHAW(&r->dst.addr,
1103 (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.neg))
1104 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1105 else
1106 break;
1107 }
1108
1109 if (r == NULL)
1110 return (PF_PASS);
1111 else
1112 r->packets++;
1113
1114 /* Check for illegal packets */
1115 if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1116 goto drop;
1117
1118 off = sizeof(struct ip6_hdr);
1119 proto = h->ip6_nxt;
1120 terminal = 0;
1121 do {
1122 switch (proto) {
1123 case IPPROTO_FRAGMENT:
1124 goto fragment;
1125 break;
1126 case IPPROTO_AH:
1127 case IPPROTO_ROUTING:
1128 case IPPROTO_DSTOPTS:
1129 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1130 NULL, AF_INET6))
1131 goto shortpkt;
1132 if (proto == IPPROTO_AH)
1133 off += (ext.ip6e_len + 2) * 4;
1134 else
1135 off += (ext.ip6e_len + 1) * 8;
1136 proto = ext.ip6e_nxt;
1137 break;
1138 case IPPROTO_HOPOPTS:
1139 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1140 NULL, AF_INET6))
1141 goto shortpkt;
1142 optend = off + (ext.ip6e_len + 1) * 8;
1143 ooff = off + sizeof(ext);
1144 do {
1145 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1146 sizeof(opt.ip6o_type), NULL, NULL,
1147 AF_INET6))
1148 goto shortpkt;
1149 if (opt.ip6o_type == IP6OPT_PAD1) {
1150 ooff++;
1151 continue;
1152 }
1153 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1154 NULL, NULL, AF_INET6))
1155 goto shortpkt;
1156 if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1157 goto drop;
1158 switch (opt.ip6o_type) {
1159 case IP6OPT_JUMBO:
1160 if (h->ip6_plen != 0)
1161 goto drop;
1162 if (!pf_pull_hdr(m, ooff, &jumbo,
1163 sizeof(jumbo), NULL, NULL,
1164 AF_INET6))
1165 goto shortpkt;
1166 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1167 sizeof(jumbolen));
1168 jumbolen = ntohl(jumbolen);
1169 if (jumbolen <= IPV6_MAXPACKET)
1170 goto drop;
1171 if (sizeof(struct ip6_hdr) + jumbolen !=
1172 m->m_pkthdr.len)
1173 goto drop;
1174 break;
1175 default:
1176 break;
1177 }
1178 ooff += sizeof(opt) + opt.ip6o_len;
1179 } while (ooff < optend);
1180
1181 off = optend;
1182 proto = ext.ip6e_nxt;
1183 break;
1184 default:
1185 terminal = 1;
1186 break;
1187 }
1188 } while (!terminal);
1189
1190 /* jumbo payload option must be present, or plen > 0 */
1191 if (ntohs(h->ip6_plen) == 0)
1192 plen = jumbolen;
1193 else
1194 plen = ntohs(h->ip6_plen);
1195 if (plen == 0)
1196 goto drop;
1197 if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1198 goto shortpkt;
1199
1200 /* Enforce a minimum ttl, may cause endless packet loops */
1201 if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1202 h->ip6_hlim = r->min_ttl;
1203
1204 return (PF_PASS);
1205
1206 fragment:
1207 if (ntohs(h->ip6_plen) == 0 || jumbolen)
1208 goto drop;
1209 plen = ntohs(h->ip6_plen);
1210
1211 if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1212 goto shortpkt;
1213 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1214 if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
1215 goto badfrag;
1216
1217 /* do something about it */
1218 /* remember to set pd->flags |= PFDESC_IP_REAS */
1219 return (PF_PASS);
1220
1221 shortpkt:
1222 REASON_SET(reason, PFRES_SHORT);
1223 if (r != NULL && r->log)
1224 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL);
1225 return (PF_DROP);
1226
1227 drop:
1228 REASON_SET(reason, PFRES_NORM);
1229 if (r != NULL && r->log)
1230 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL);
1231 return (PF_DROP);
1232
1233 badfrag:
1234 REASON_SET(reason, PFRES_FRAG);
1235 if (r != NULL && r->log)
1236 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL);
1237 return (PF_DROP);
1238 }
1239 #endif /* INET6 */
1240
1241 int
1242 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m,
1243 int ipoff, int off, void *h, struct pf_pdesc *pd)
1244 {
1245 struct pf_rule *r, *rm = NULL;
1246 struct tcphdr *th = pd->hdr.tcp;
1247 int rewrite = 0;
1248 u_short reason;
1249 u_int8_t flags;
1250 sa_family_t af = pd->af;
1251
1252 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1253 while (r != NULL) {
1254 r->evaluations++;
1255 if (r->kif != NULL &&
1256 (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
1257 r = r->skip[PF_SKIP_IFP].ptr;
1258 else if (r->direction && r->direction != dir)
1259 r = r->skip[PF_SKIP_DIR].ptr;
1260 else if (r->af && r->af != af)
1261 r = r->skip[PF_SKIP_AF].ptr;
1262 else if (r->proto && r->proto != pd->proto)
1263 r = r->skip[PF_SKIP_PROTO].ptr;
1264 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg))
1265 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1266 else if (r->src.port_op && !pf_match_port(r->src.port_op,
1267 r->src.port[0], r->src.port[1], th->th_sport))
1268 r = r->skip[PF_SKIP_SRC_PORT].ptr;
1269 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg))
1270 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1271 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1272 r->dst.port[0], r->dst.port[1], th->th_dport))
1273 r = r->skip[PF_SKIP_DST_PORT].ptr;
1274 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1275 pf_osfp_fingerprint(pd, m, off, th),
1276 r->os_fingerprint))
1277 r = TAILQ_NEXT(r, entries);
1278 else {
1279 rm = r;
1280 break;
1281 }
1282 }
1283
1284 if (rm == NULL || rm->action == PF_NOSCRUB)
1285 return (PF_PASS);
1286 else
1287 r->packets++;
1288
1289 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1290 pd->flags |= PFDESC_TCP_NORM;
1291
1292 flags = th->th_flags;
1293 if (flags & TH_SYN) {
1294 /* Illegal packet */
1295 if (flags & TH_RST)
1296 goto tcp_drop;
1297
1298 if (flags & TH_FIN)
1299 flags &= ~TH_FIN;
1300 } else {
1301 /* Illegal packet */
1302 if (!(flags & (TH_ACK|TH_RST)))
1303 goto tcp_drop;
1304 }
1305
1306 if (!(flags & TH_ACK)) {
1307 /* These flags are only valid if ACK is set */
1308 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1309 goto tcp_drop;
1310 }
1311
1312 /* Check for illegal header length */
1313 if (th->th_off < (sizeof(struct tcphdr) >> 2))
1314 goto tcp_drop;
1315
1316 /* If flags changed, or reserved data set, then adjust */
1317 if (flags != th->th_flags || th->th_x2 != 0) {
1318 u_int16_t ov, nv;
1319
1320 ov = *(u_int16_t *)(&th->th_ack + 1);
1321 th->th_flags = flags;
1322 th->th_x2 = 0;
1323 nv = *(u_int16_t *)(&th->th_ack + 1);
1324
1325 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
1326 rewrite = 1;
1327 }
1328
1329 /* Remove urgent pointer, if TH_URG is not set */
1330 if (!(flags & TH_URG) && th->th_urp) {
1331 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
1332 th->th_urp = 0;
1333 rewrite = 1;
1334 }
1335
1336 /* Process options */
1337 if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
1338 rewrite = 1;
1339
1340 /* copy back packet headers if we sanitized */
1341 if (rewrite)
1342 m_copyback(m, off, sizeof(*th), th);
1343
1344 return (PF_PASS);
1345
1346 tcp_drop:
1347 REASON_SET(&reason, PFRES_NORM);
1348 if (rm != NULL && r->log)
1349 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL);
1350 return (PF_DROP);
1351 }
1352
1353 int
1354 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1355 struct tcphdr *th, struct pf_state_peer *src,
1356 struct pf_state_peer *dst)
1357 {
1358 u_int32_t tsval, tsecr;
1359 u_int8_t hdr[60];
1360 u_int8_t *opt;
1361
1362 KASSERT(src->scrub == NULL);
1363
1364 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
1365 if (src->scrub == NULL)
1366 return (1);
1367 bzero(src->scrub, sizeof(*src->scrub));
1368
1369 switch (pd->af) {
1370 #ifdef INET
1371 case AF_INET: {
1372 struct ip *h = mtod(m, struct ip *);
1373 src->scrub->pfss_ttl = h->ip_ttl;
1374 break;
1375 }
1376 #endif /* INET */
1377 #ifdef INET6
1378 case AF_INET6: {
1379 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1380 src->scrub->pfss_ttl = h->ip6_hlim;
1381 break;
1382 }
1383 #endif /* INET6 */
1384 }
1385
1386
1387 /*
1388 * All normalizations below are only begun if we see the start of
1389 * the connections. They must all set an enabled bit in pfss_flags
1390 */
1391 if ((th->th_flags & TH_SYN) == 0)
1392 return (0);
1393
1394
1395 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1396 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1397 /* Diddle with TCP options */
1398 int hlen;
1399 opt = hdr + sizeof(struct tcphdr);
1400 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1401 while (hlen >= TCPOLEN_TIMESTAMP) {
1402 switch (*opt) {
1403 case TCPOPT_EOL: /* FALLTHROUGH */
1404 case TCPOPT_NOP:
1405 opt++;
1406 hlen--;
1407 break;
1408 case TCPOPT_TIMESTAMP:
1409 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1410 src->scrub->pfss_flags |=
1411 PFSS_TIMESTAMP;
1412 src->scrub->pfss_ts_mod =
1413 htonl(arc4random());
1414
1415 /* note PFSS_PAWS not set yet */
1416 memcpy(&tsval, &opt[2],
1417 sizeof(u_int32_t));
1418 memcpy(&tsecr, &opt[6],
1419 sizeof(u_int32_t));
1420 src->scrub->pfss_tsval0 = ntohl(tsval);
1421 src->scrub->pfss_tsval = ntohl(tsval);
1422 src->scrub->pfss_tsecr = ntohl(tsecr);
1423 getmicrouptime(&src->scrub->pfss_last);
1424 }
1425 /* FALLTHROUGH */
1426 default:
1427 hlen -= MAX(opt[1], 2);
1428 opt += MAX(opt[1], 2);
1429 break;
1430 }
1431 }
1432 }
1433
1434 return (0);
1435 }
1436
1437 void
1438 pf_normalize_tcp_cleanup(struct pf_state *state)
1439 {
1440 if (state->src.scrub)
1441 pool_put(&pf_state_scrub_pl, state->src.scrub);
1442 if (state->dst.scrub)
1443 pool_put(&pf_state_scrub_pl, state->dst.scrub);
1444
1445 /* Someday... flush the TCP segment reassembly descriptors. */
1446 }
1447
1448 int
1449 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1450 u_short *reason, struct tcphdr *th, struct pf_state *state,
1451 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1452 {
1453 struct timeval uptime;
1454 u_int32_t tsval, tsecr;
1455 u_int tsval_from_last;
1456 u_int8_t hdr[60];
1457 u_int8_t *opt;
1458 int copyback = 0;
1459 int got_ts = 0;
1460
1461 KASSERT(src->scrub || dst->scrub);
1462
1463 /*
1464 * Enforce the minimum TTL seen for this connection. Negate a common
1465 * technique to evade an intrusion detection system and confuse
1466 * firewall state code.
1467 */
1468 switch (pd->af) {
1469 #ifdef INET
1470 case AF_INET: {
1471 if (src->scrub) {
1472 struct ip *h = mtod(m, struct ip *);
1473 if (h->ip_ttl > src->scrub->pfss_ttl)
1474 src->scrub->pfss_ttl = h->ip_ttl;
1475 h->ip_ttl = src->scrub->pfss_ttl;
1476 }
1477 break;
1478 }
1479 #endif /* INET */
1480 #ifdef INET6
1481 case AF_INET6: {
1482 if (src->scrub) {
1483 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1484 if (h->ip6_hlim > src->scrub->pfss_ttl)
1485 src->scrub->pfss_ttl = h->ip6_hlim;
1486 h->ip6_hlim = src->scrub->pfss_ttl;
1487 }
1488 break;
1489 }
1490 #endif /* INET6 */
1491 }
1492
1493 if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1494 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1495 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1496 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1497 /* Diddle with TCP options */
1498 int hlen;
1499 opt = hdr + sizeof(struct tcphdr);
1500 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1501 while (hlen >= TCPOLEN_TIMESTAMP) {
1502 switch (*opt) {
1503 case TCPOPT_EOL: /* FALLTHROUGH */
1504 case TCPOPT_NOP:
1505 opt++;
1506 hlen--;
1507 break;
1508 case TCPOPT_TIMESTAMP:
1509 /* Modulate the timestamps. Can be used for
1510 * NAT detection, OS uptime determination or
1511 * reboot detection.
1512 */
1513
1514 if (got_ts) {
1515 /* Huh? Multiple timestamps!? */
1516 if (pf_status.debug >= PF_DEBUG_MISC) {
1517 DPFPRINTF(("multiple TS??"));
1518 pf_print_state(state);
1519 printf("\n");
1520 }
1521 REASON_SET(reason, PFRES_TS);
1522 return (PF_DROP);
1523 }
1524 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1525 memcpy(&tsval, &opt[2],
1526 sizeof(u_int32_t));
1527 if (tsval && src->scrub &&
1528 (src->scrub->pfss_flags &
1529 PFSS_TIMESTAMP)) {
1530 tsval = ntohl(tsval);
1531 pf_change_a(&opt[2],
1532 &th->th_sum,
1533 htonl(tsval +
1534 src->scrub->pfss_ts_mod),
1535 0);
1536 copyback = 1;
1537 }
1538
1539 /* Modulate TS reply iff valid (!0) */
1540 memcpy(&tsecr, &opt[6],
1541 sizeof(u_int32_t));
1542 if (tsecr && dst->scrub &&
1543 (dst->scrub->pfss_flags &
1544 PFSS_TIMESTAMP)) {
1545 tsecr = ntohl(tsecr)
1546 - dst->scrub->pfss_ts_mod;
1547 pf_change_a(&opt[6],
1548 &th->th_sum, htonl(tsecr),
1549 0);
1550 copyback = 1;
1551 }
1552 got_ts = 1;
1553 }
1554 /* FALLTHROUGH */
1555 default:
1556 hlen -= MAX(opt[1], 2);
1557 opt += MAX(opt[1], 2);
1558 break;
1559 }
1560 }
1561 if (copyback) {
1562 /* Copyback the options, caller copys back header */
1563 *writeback = 1;
1564 m_copyback(m, off + sizeof(struct tcphdr),
1565 (th->th_off << 2) - sizeof(struct tcphdr), hdr +
1566 sizeof(struct tcphdr));
1567 }
1568 }
1569
1570
1571 /*
1572 * Must invalidate PAWS checks on connections idle for too long.
1573 * The fastest allowed timestamp clock is 1ms. That turns out to
1574 * be about 24 days before it wraps. XXX Right now our lowerbound
1575 * TS echo check only works for the first 12 days of a connection
1576 * when the TS has exhausted half its 32bit space
1577 */
1578 #define TS_MAX_IDLE (24*24*60*60)
1579 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
1580
1581 getmicrouptime(&uptime);
1582 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1583 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1584 time_second - state->creation > TS_MAX_CONN)) {
1585 if (pf_status.debug >= PF_DEBUG_MISC) {
1586 DPFPRINTF(("src idled out of PAWS\n"));
1587 pf_print_state(state);
1588 printf("\n");
1589 }
1590 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1591 | PFSS_PAWS_IDLED;
1592 }
1593 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1594 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1595 if (pf_status.debug >= PF_DEBUG_MISC) {
1596 DPFPRINTF(("dst idled out of PAWS\n"));
1597 pf_print_state(state);
1598 printf("\n");
1599 }
1600 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1601 | PFSS_PAWS_IDLED;
1602 }
1603
1604 if (got_ts && src->scrub && dst->scrub &&
1605 (src->scrub->pfss_flags & PFSS_PAWS) &&
1606 (dst->scrub->pfss_flags & PFSS_PAWS)) {
1607 /* Validate that the timestamps are "in-window".
1608 * RFC1323 describes TCP Timestamp options that allow
1609 * measurement of RTT (round trip time) and PAWS
1610 * (protection against wrapped sequence numbers). PAWS
1611 * gives us a set of rules for rejecting packets on
1612 * long fat pipes (packets that were somehow delayed
1613 * in transit longer than the time it took to send the
1614 * full TCP sequence space of 4Gb). We can use these
1615 * rules and infer a few others that will let us treat
1616 * the 32bit timestamp and the 32bit echoed timestamp
1617 * as sequence numbers to prevent a blind attacker from
1618 * inserting packets into a connection.
1619 *
1620 * RFC1323 tells us:
1621 * - The timestamp on this packet must be greater than
1622 * or equal to the last value echoed by the other
1623 * endpoint. The RFC says those will be discarded
1624 * since it is a dup that has already been acked.
1625 * This gives us a lowerbound on the timestamp.
1626 * timestamp >= other last echoed timestamp
1627 * - The timestamp will be less than or equal to
1628 * the last timestamp plus the time between the
1629 * last packet and now. The RFC defines the max
1630 * clock rate as 1ms. We will allow clocks to be
1631 * up to 10% fast and will allow a total difference
1632 * or 30 seconds due to a route change. And this
1633 * gives us an upperbound on the timestamp.
1634 * timestamp <= last timestamp + max ticks
1635 * We have to be careful here. Windows will send an
1636 * initial timestamp of zero and then initialize it
1637 * to a random value after the 3whs; presumably to
1638 * avoid a DoS by having to call an expensive RNG
1639 * during a SYN flood. Proof MS has at least one
1640 * good security geek.
1641 *
1642 * - The TCP timestamp option must also echo the other
1643 * endpoints timestamp. The timestamp echoed is the
1644 * one carried on the earliest unacknowledged segment
1645 * on the left edge of the sequence window. The RFC
1646 * states that the host will reject any echoed
1647 * timestamps that were larger than any ever sent.
1648 * This gives us an upperbound on the TS echo.
1649 * tescr <= largest_tsval
1650 * - The lowerbound on the TS echo is a little more
1651 * tricky to determine. The other endpoint's echoed
1652 * values will not decrease. But there may be
1653 * network conditions that re-order packets and
1654 * cause our view of them to decrease. For now the
1655 * only lowerbound we can safely determine is that
1656 * the TS echo will never be less than the orginal
1657 * TS. XXX There is probably a better lowerbound.
1658 * Remove TS_MAX_CONN with better lowerbound check.
1659 * tescr >= other original TS
1660 *
1661 * It is also important to note that the fastest
1662 * timestamp clock of 1ms will wrap its 32bit space in
1663 * 24 days. So we just disable TS checking after 24
1664 * days of idle time. We actually must use a 12d
1665 * connection limit until we can come up with a better
1666 * lowerbound to the TS echo check.
1667 */
1668 struct timeval delta_ts;
1669 int ts_fudge;
1670
1671
1672 /*
1673 * PFTM_TS_DIFF is how many seconds of leeway to allow
1674 * a host's timestamp. This can happen if the previous
1675 * packet got delayed in transit for much longer than
1676 * this packet.
1677 */
1678 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
1679 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
1680
1681
1682 /* Calculate max ticks since the last timestamp */
1683 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1 kHz + 10% skew */
1684 #define TS_MICROSECS 1000000 /* microseconds per second */
1685 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
1686 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
1687 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
1688
1689
1690 if ((src->state >= TCPS_ESTABLISHED &&
1691 dst->state >= TCPS_ESTABLISHED) &&
1692 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
1693 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
1694 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
1695 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
1696 /* Bad RFC1323 implementation or an insertion attack.
1697 *
1698 * - Solaris 2.6 and 2.7 are known to send another ACK
1699 * after the FIN,FIN|ACK,ACK closing that carries
1700 * an old timestamp.
1701 */
1702
1703 DPFPRINTF(("Timestamp failed %c%c%c%c\n",
1704 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
1705 SEQ_GT(tsval, src->scrub->pfss_tsval +
1706 tsval_from_last) ? '1' : ' ',
1707 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
1708 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
1709 DPFPRINTF((" tsval: %" PRIu32 " tsecr: %" PRIu32
1710 " +ticks: %" PRIu32 " idle: %lus %lums\n",
1711 tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
1712 delta_ts.tv_usec / 1000));
1713 DPFPRINTF((" src->tsval: %" PRIu32 " tsecr: %" PRIu32
1714 "\n",
1715 src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
1716 DPFPRINTF((" dst->tsval: %" PRIu32 " tsecr: %" PRIu32
1717 " tsval0: %" PRIu32 "\n",
1718 dst->scrub->pfss_tsval,
1719 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
1720 if (pf_status.debug >= PF_DEBUG_MISC) {
1721 pf_print_state(state);
1722 pf_print_flags(th->th_flags);
1723 printf("\n");
1724 }
1725 REASON_SET(reason, PFRES_TS);
1726 return (PF_DROP);
1727 }
1728
1729 /* XXX I'd really like to require tsecr but it's optional */
1730
1731 } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
1732 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
1733 || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
1734 src->scrub && dst->scrub &&
1735 (src->scrub->pfss_flags & PFSS_PAWS) &&
1736 (dst->scrub->pfss_flags & PFSS_PAWS)) {
1737 /* Didn't send a timestamp. Timestamps aren't really useful
1738 * when:
1739 * - connection opening or closing (often not even sent).
1740 * but we must not let an attacker to put a FIN on a
1741 * data packet to sneak it through our ESTABLISHED check.
1742 * - on a TCP reset. RFC suggests not even looking at TS.
1743 * - on an empty ACK. The TS will not be echoed so it will
1744 * probably not help keep the RTT calculation in sync and
1745 * there isn't as much danger when the sequence numbers
1746 * got wrapped. So some stacks don't include TS on empty
1747 * ACKs :-(
1748 *
1749 * To minimize the disruption to mostly RFC1323 conformant
1750 * stacks, we will only require timestamps on data packets.
1751 *
1752 * And what do ya know, we cannot require timestamps on data
1753 * packets. There appear to be devices that do legitimate
1754 * TCP connection hijacking. There are HTTP devices that allow
1755 * a 3whs (with timestamps) and then buffer the HTTP request.
1756 * If the intermediate device has the HTTP response cache, it
1757 * will spoof the response but not bother timestamping its
1758 * packets. So we can look for the presence of a timestamp in
1759 * the first data packet and if there, require it in all future
1760 * packets.
1761 */
1762
1763 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
1764 /*
1765 * Hey! Someone tried to sneak a packet in. Or the
1766 * stack changed its RFC1323 behavior?!?!
1767 */
1768 if (pf_status.debug >= PF_DEBUG_MISC) {
1769 DPFPRINTF(("Did not receive expected RFC1323 "
1770 "timestamp\n"));
1771 pf_print_state(state);
1772 pf_print_flags(th->th_flags);
1773 printf("\n");
1774 }
1775 REASON_SET(reason, PFRES_TS);
1776 return (PF_DROP);
1777 }
1778 }
1779
1780
1781 /*
1782 * We will note if a host sends his data packets with or without
1783 * timestamps. And require all data packets to contain a timestamp
1784 * if the first does. PAWS implicitly requires that all data packets be
1785 * timestamped. But I think there are middle-man devices that hijack
1786 * TCP streams immedietly after the 3whs and don't timestamp their
1787 * packets (seen in a WWW accelerator or cache).
1788 */
1789 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
1790 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
1791 if (got_ts)
1792 src->scrub->pfss_flags |= PFSS_DATA_TS;
1793 else {
1794 src->scrub->pfss_flags |= PFSS_DATA_NOTS;
1795 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
1796 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
1797 /* Don't warn if other host rejected RFC1323 */
1798 DPFPRINTF(("Broken RFC1323 stack did not "
1799 "timestamp data packet. Disabled PAWS "
1800 "security.\n"));
1801 pf_print_state(state);
1802 pf_print_flags(th->th_flags);
1803 printf("\n");
1804 }
1805 }
1806 }
1807
1808
1809 /*
1810 * Update PAWS values
1811 */
1812 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
1813 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
1814 getmicrouptime(&src->scrub->pfss_last);
1815 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
1816 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1817 src->scrub->pfss_tsval = tsval;
1818
1819 if (tsecr) {
1820 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
1821 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1822 src->scrub->pfss_tsecr = tsecr;
1823
1824 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
1825 (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
1826 src->scrub->pfss_tsval0 == 0)) {
1827 /* tsval0 MUST be the lowest timestamp */
1828 src->scrub->pfss_tsval0 = tsval;
1829 }
1830
1831 /* Only fully initialized after a TS gets echoed */
1832 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
1833 src->scrub->pfss_flags |= PFSS_PAWS;
1834 }
1835 }
1836
1837 /* I have a dream.... TCP segment reassembly.... */
1838 return (0);
1839 }
1840
1841 int
1842 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
1843 int off)
1844 {
1845 u_int16_t *mss;
1846 int thoff;
1847 int opt, cnt, optlen = 0;
1848 int rewrite = 0;
1849 u_char *optp;
1850
1851 thoff = th->th_off << 2;
1852 cnt = thoff - sizeof(struct tcphdr);
1853 optp = mtod(m, u_char *) + off + sizeof(struct tcphdr);
1854
1855 for (; cnt > 0; cnt -= optlen, optp += optlen) {
1856 opt = optp[0];
1857 if (opt == TCPOPT_EOL)
1858 break;
1859 if (opt == TCPOPT_NOP)
1860 optlen = 1;
1861 else {
1862 if (cnt < 2)
1863 break;
1864 optlen = optp[1];
1865 if (optlen < 2 || optlen > cnt)
1866 break;
1867 }
1868 switch (opt) {
1869 case TCPOPT_MAXSEG:
1870 mss = (u_int16_t *)(optp + 2);
1871 if ((ntohs(*mss)) > r->max_mss) {
1872 th->th_sum = pf_cksum_fixup(th->th_sum,
1873 *mss, htons(r->max_mss), 0);
1874 *mss = htons(r->max_mss);
1875 rewrite = 1;
1876 }
1877 break;
1878 default:
1879 break;
1880 }
1881 }
1882
1883 return (rewrite);
1884 }
1885