pf_norm.c revision 1.26 1 /* $NetBSD: pf_norm.c,v 1.26 2011/11/28 08:05:05 tls Exp $ */
2 /* $OpenBSD: pf_norm.c,v 1.109 2007/05/28 17:16:39 henning Exp $ */
3
4 /*
5 * Copyright 2001 Niels Provos <provos (at) citi.umich.edu>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: pf_norm.c,v 1.26 2011/11/28 08:05:05 tls Exp $");
31
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35
36 #include "pflog.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/mbuf.h>
41 #include <sys/filio.h>
42 #include <sys/fcntl.h>
43 #include <sys/socket.h>
44 #include <sys/kernel.h>
45 #include <sys/time.h>
46 #include <sys/pool.h>
47
48 #ifdef __NetBSD__
49 #include <sys/rnd.h>
50 #include <sys/cprng.h>
51 #else
52 #include <dev/rndvar.h>
53 #endif /* !__NetBSD__ */
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/bpf.h>
57 #include <net/route.h>
58 #include <net/if_pflog.h>
59
60 #include <netinet/in.h>
61 #include <netinet/in_var.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/tcp.h>
66 #include <netinet/tcp_seq.h>
67 #include <netinet/udp.h>
68 #include <netinet/ip_icmp.h>
69
70 #ifdef INET6
71 #include <netinet/ip6.h>
72 #endif /* INET6 */
73
74 #include <net/pfvar.h>
75
76 struct pf_frent {
77 LIST_ENTRY(pf_frent) fr_next;
78 struct ip *fr_ip;
79 struct mbuf *fr_m;
80 };
81
82 struct pf_frcache {
83 LIST_ENTRY(pf_frcache) fr_next;
84 uint16_t fr_off;
85 uint16_t fr_end;
86 };
87
88 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
89 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
90 #define PFFRAG_DROP 0x0004 /* Drop all fragments */
91 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
92
93 struct pf_fragment {
94 RB_ENTRY(pf_fragment) fr_entry;
95 TAILQ_ENTRY(pf_fragment) frag_next;
96 struct in_addr fr_src;
97 struct in_addr fr_dst;
98 u_int8_t fr_p; /* protocol of this fragment */
99 u_int8_t fr_flags; /* status flags */
100 u_int16_t fr_id; /* fragment id for reassemble */
101 u_int16_t fr_max; /* fragment data max */
102 u_int32_t fr_timeout;
103 #define fr_queue fr_u.fru_queue
104 #define fr_cache fr_u.fru_cache
105 union {
106 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */
107 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */
108 } fr_u;
109 };
110
111 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
112 TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
113
114 static __inline int pf_frag_compare(struct pf_fragment *,
115 struct pf_fragment *);
116 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree;
117 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
118 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
119
120 /* Private prototypes */
121 void pf_ip2key(struct pf_fragment *, struct ip *);
122 void pf_remove_fragment(struct pf_fragment *);
123 void pf_flush_fragments(void);
124 void pf_free_fragment(struct pf_fragment *);
125 struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *);
126 struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
127 struct pf_frent *, int);
128 struct mbuf *pf_fragcache(struct mbuf **, struct ip*,
129 struct pf_fragment **, int, int, int *);
130 int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
131 struct tcphdr *, int);
132
133 #define DPFPRINTF(x) do { \
134 if (pf_status.debug >= PF_DEBUG_MISC) { \
135 printf("%s: ", __func__); \
136 printf x ; \
137 } \
138 } while(0)
139
140 /* Globals */
141 struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
142 struct pool pf_state_scrub_pl;
143 int pf_nfrents, pf_ncache;
144
145 void
146 pf_normalize_init(void)
147 {
148 #ifdef __NetBSD__
149 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
150 NULL, IPL_SOFTNET);
151 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
152 NULL, IPL_SOFTNET);
153 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
154 "pffrcache", NULL, IPL_SOFTNET);
155 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
156 NULL, IPL_SOFTNET);
157 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
158 "pfstscr", NULL, IPL_SOFTNET);
159 #else
160 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
161 NULL);
162 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
163 NULL);
164 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
165 "pffrcache", NULL);
166 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
167 NULL);
168 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
169 "pfstscr", NULL);
170 #endif /* !__NetBSD__ */
171
172 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
173 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
174 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
175 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
176
177 TAILQ_INIT(&pf_fragqueue);
178 TAILQ_INIT(&pf_cachequeue);
179 }
180
181 #ifdef _MODULE
182 void
183 pf_normalize_destroy(void)
184 {
185 pool_destroy(&pf_state_scrub_pl);
186 pool_destroy(&pf_cent_pl);
187 pool_destroy(&pf_cache_pl);
188 pool_destroy(&pf_frag_pl);
189 pool_destroy(&pf_frent_pl);
190 }
191 #endif /* _MODULE */
192
193 static __inline int
194 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
195 {
196 int diff;
197
198 if ((diff = a->fr_id - b->fr_id))
199 return (diff);
200 else if ((diff = a->fr_p - b->fr_p))
201 return (diff);
202 else if (a->fr_src.s_addr < b->fr_src.s_addr)
203 return (-1);
204 else if (a->fr_src.s_addr > b->fr_src.s_addr)
205 return (1);
206 else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
207 return (-1);
208 else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
209 return (1);
210 return (0);
211 }
212
213 void
214 pf_purge_expired_fragments(void)
215 {
216 struct pf_fragment *frag;
217 u_int32_t expire = time_second -
218 pf_default_rule.timeout[PFTM_FRAG];
219
220 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
221 KASSERT(BUFFER_FRAGMENTS(frag));
222 if (frag->fr_timeout > expire)
223 break;
224
225 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
226 pf_free_fragment(frag);
227 }
228
229 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
230 KASSERT(!BUFFER_FRAGMENTS(frag));
231 if (frag->fr_timeout > expire)
232 break;
233
234 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
235 pf_free_fragment(frag);
236 KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
237 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
238 }
239 }
240
241 /*
242 * Try to flush old fragments to make space for new ones
243 */
244
245 void
246 pf_flush_fragments(void)
247 {
248 struct pf_fragment *frag;
249 int goal;
250
251 goal = pf_nfrents * 9 / 10;
252 DPFPRINTF(("trying to free > %d frents\n",
253 pf_nfrents - goal));
254 while (goal < pf_nfrents) {
255 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
256 if (frag == NULL)
257 break;
258 pf_free_fragment(frag);
259 }
260
261
262 goal = pf_ncache * 9 / 10;
263 DPFPRINTF(("trying to free > %d cache entries\n",
264 pf_ncache - goal));
265 while (goal < pf_ncache) {
266 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
267 if (frag == NULL)
268 break;
269 pf_free_fragment(frag);
270 }
271 }
272
273 /* Frees the fragments and all associated entries */
274
275 void
276 pf_free_fragment(struct pf_fragment *frag)
277 {
278 struct pf_frent *frent;
279 struct pf_frcache *frcache;
280
281 /* Free all fragments */
282 if (BUFFER_FRAGMENTS(frag)) {
283 for (frent = LIST_FIRST(&frag->fr_queue); frent;
284 frent = LIST_FIRST(&frag->fr_queue)) {
285 LIST_REMOVE(frent, fr_next);
286
287 m_freem(frent->fr_m);
288 pool_put(&pf_frent_pl, frent);
289 pf_nfrents--;
290 }
291 } else {
292 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
293 frcache = LIST_FIRST(&frag->fr_cache)) {
294 LIST_REMOVE(frcache, fr_next);
295
296 KASSERT(LIST_EMPTY(&frag->fr_cache) ||
297 LIST_FIRST(&frag->fr_cache)->fr_off >
298 frcache->fr_end);
299
300 pool_put(&pf_cent_pl, frcache);
301 pf_ncache--;
302 }
303 }
304
305 pf_remove_fragment(frag);
306 }
307
308 void
309 pf_ip2key(struct pf_fragment *key, struct ip *ip)
310 {
311 key->fr_p = ip->ip_p;
312 key->fr_id = ip->ip_id;
313 key->fr_src.s_addr = ip->ip_src.s_addr;
314 key->fr_dst.s_addr = ip->ip_dst.s_addr;
315 }
316
317 struct pf_fragment *
318 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
319 {
320 struct pf_fragment key;
321 struct pf_fragment *frag;
322
323 pf_ip2key(&key, ip);
324
325 frag = RB_FIND(pf_frag_tree, tree, &key);
326 if (frag != NULL) {
327 /* XXX Are we sure we want to update the timeout? */
328 frag->fr_timeout = time_second;
329 if (BUFFER_FRAGMENTS(frag)) {
330 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
331 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
332 } else {
333 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
334 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
335 }
336 }
337
338 return (frag);
339 }
340
341 /* Removes a fragment from the fragment queue and frees the fragment */
342
343 void
344 pf_remove_fragment(struct pf_fragment *frag)
345 {
346 if (BUFFER_FRAGMENTS(frag)) {
347 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
348 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
349 pool_put(&pf_frag_pl, frag);
350 } else {
351 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
352 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
353 pool_put(&pf_cache_pl, frag);
354 }
355 }
356
357 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
358 struct mbuf *
359 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
360 struct pf_frent *frent, int mff)
361 {
362 struct mbuf *m = *m0, *m2;
363 struct pf_frent *frea, *next;
364 struct pf_frent *frep = NULL;
365 struct ip *ip = frent->fr_ip;
366 int hlen = ip->ip_hl << 2;
367 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
368 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
369 u_int16_t frmax = ip_len + off;
370
371 KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
372
373 /* Strip off ip header */
374 m->m_data += hlen;
375 m->m_len -= hlen;
376
377 /* Create a new reassembly queue for this packet */
378 if (*frag == NULL) {
379 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
380 if (*frag == NULL) {
381 pf_flush_fragments();
382 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
383 if (*frag == NULL)
384 goto drop_fragment;
385 }
386
387 (*frag)->fr_flags = 0;
388 (*frag)->fr_max = 0;
389 (*frag)->fr_src = frent->fr_ip->ip_src;
390 (*frag)->fr_dst = frent->fr_ip->ip_dst;
391 (*frag)->fr_p = frent->fr_ip->ip_p;
392 (*frag)->fr_id = frent->fr_ip->ip_id;
393 (*frag)->fr_timeout = time_second;
394 LIST_INIT(&(*frag)->fr_queue);
395
396 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
397 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
398
399 /* We do not have a previous fragment */
400 frep = NULL;
401 goto insert;
402 }
403
404 /*
405 * Find a fragment after the current one:
406 * - off contains the real shifted offset.
407 */
408 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
409 if (FR_IP_OFF(frea) > off)
410 break;
411 frep = frea;
412 }
413
414 KASSERT(frep != NULL || frea != NULL);
415
416 if (frep != NULL &&
417 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
418 4 > off)
419 {
420 u_int16_t precut;
421
422 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
423 frep->fr_ip->ip_hl * 4 - off;
424 if (precut >= ip_len)
425 goto drop_fragment;
426 m_adj(frent->fr_m, precut);
427 DPFPRINTF(("overlap -%d\n", precut));
428 /* Enforce 8 byte boundaries */
429 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
430 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
431 ip_len -= precut;
432 ip->ip_len = htons(ip_len);
433 }
434
435 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
436 frea = next)
437 {
438 u_int16_t aftercut;
439
440 aftercut = ip_len + off - FR_IP_OFF(frea);
441 DPFPRINTF(("adjust overlap %d\n", aftercut));
442 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
443 * 4)
444 {
445 frea->fr_ip->ip_len =
446 htons(ntohs(frea->fr_ip->ip_len) - aftercut);
447 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
448 (aftercut >> 3));
449 m_adj(frea->fr_m, aftercut);
450 break;
451 }
452
453 /* This fragment is completely overlapped, lose it */
454 next = LIST_NEXT(frea, fr_next);
455 m_freem(frea->fr_m);
456 LIST_REMOVE(frea, fr_next);
457 pool_put(&pf_frent_pl, frea);
458 pf_nfrents--;
459 }
460
461 insert:
462 /* Update maximum data size */
463 if ((*frag)->fr_max < frmax)
464 (*frag)->fr_max = frmax;
465 /* This is the last segment */
466 if (!mff)
467 (*frag)->fr_flags |= PFFRAG_SEENLAST;
468
469 if (frep == NULL)
470 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
471 else
472 LIST_INSERT_AFTER(frep, frent, fr_next);
473
474 /* Check if we are completely reassembled */
475 if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
476 return (NULL);
477
478 /* Check if we have all the data */
479 off = 0;
480 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
481 next = LIST_NEXT(frep, fr_next);
482
483 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
484 if (off < (*frag)->fr_max &&
485 (next == NULL || FR_IP_OFF(next) != off))
486 {
487 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
488 off, next == NULL ? -1 : FR_IP_OFF(next),
489 (*frag)->fr_max));
490 return (NULL);
491 }
492 }
493 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
494 if (off < (*frag)->fr_max)
495 return (NULL);
496
497 /* We have all the data */
498 frent = LIST_FIRST(&(*frag)->fr_queue);
499 KASSERT(frent != NULL);
500 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
501 DPFPRINTF(("drop: too big: %d\n", off));
502 pf_free_fragment(*frag);
503 *frag = NULL;
504 return (NULL);
505 }
506 next = LIST_NEXT(frent, fr_next);
507
508 /* Magic from ip_input */
509 ip = frent->fr_ip;
510 m = frent->fr_m;
511 m2 = m->m_next;
512 m->m_next = NULL;
513 m_cat(m, m2);
514 pool_put(&pf_frent_pl, frent);
515 pf_nfrents--;
516 for (frent = next; frent != NULL; frent = next) {
517 next = LIST_NEXT(frent, fr_next);
518
519 m2 = frent->fr_m;
520 pool_put(&pf_frent_pl, frent);
521 pf_nfrents--;
522 m_cat(m, m2);
523 }
524
525 ip->ip_src = (*frag)->fr_src;
526 ip->ip_dst = (*frag)->fr_dst;
527
528 /* Remove from fragment queue */
529 pf_remove_fragment(*frag);
530 *frag = NULL;
531
532 hlen = ip->ip_hl << 2;
533 ip->ip_len = htons(off + hlen);
534 m->m_len += hlen;
535 m->m_data -= hlen;
536
537 /* some debugging cruft by sklower, below, will go away soon */
538 /* XXX this should be done elsewhere */
539 if (m->m_flags & M_PKTHDR) {
540 int plen = 0;
541 for (m2 = m; m2; m2 = m2->m_next)
542 plen += m2->m_len;
543 m->m_pkthdr.len = plen;
544 #ifdef __NetBSD__
545 m->m_pkthdr.csum_flags = 0;
546 #endif /* __NetBSD__ */
547 }
548
549 DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
550 return (m);
551
552 drop_fragment:
553 /* Oops - fail safe - drop packet */
554 pool_put(&pf_frent_pl, frent);
555 pf_nfrents--;
556 m_freem(m);
557 return (NULL);
558 }
559
560 struct mbuf *
561 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
562 int drop, int *nomem)
563 {
564 struct mbuf *m = *m0;
565 struct pf_frcache *frp, *fra, *cur = NULL;
566 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
567 u_int16_t off = ntohs(h->ip_off) << 3;
568 u_int16_t frmax = ip_len + off;
569 int hosed = 0;
570
571 KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
572
573 /* Create a new range queue for this packet */
574 if (*frag == NULL) {
575 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
576 if (*frag == NULL) {
577 pf_flush_fragments();
578 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
579 if (*frag == NULL)
580 goto no_mem;
581 }
582
583 /* Get an entry for the queue */
584 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
585 if (cur == NULL) {
586 pool_put(&pf_cache_pl, *frag);
587 *frag = NULL;
588 goto no_mem;
589 }
590 pf_ncache++;
591
592 (*frag)->fr_flags = PFFRAG_NOBUFFER;
593 (*frag)->fr_max = 0;
594 (*frag)->fr_src = h->ip_src;
595 (*frag)->fr_dst = h->ip_dst;
596 (*frag)->fr_p = h->ip_p;
597 (*frag)->fr_id = h->ip_id;
598 (*frag)->fr_timeout = time_second;
599
600 cur->fr_off = off;
601 cur->fr_end = frmax;
602 LIST_INIT(&(*frag)->fr_cache);
603 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
604
605 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
606 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
607
608 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, frmax));
609
610 goto pass;
611 }
612
613 /*
614 * Find a fragment after the current one:
615 * - off contains the real shifted offset.
616 */
617 frp = NULL;
618 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
619 if (fra->fr_off > off)
620 break;
621 frp = fra;
622 }
623
624 KASSERT(frp != NULL || fra != NULL);
625
626 if (frp != NULL) {
627 int precut;
628
629 precut = frp->fr_end - off;
630 if (precut >= ip_len) {
631 /* Fragment is entirely a duplicate */
632 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
633 h->ip_id, frp->fr_off, frp->fr_end, off, frmax));
634 goto drop_fragment;
635 }
636 if (precut == 0) {
637 /* They are adjacent. Fixup cache entry */
638 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
639 h->ip_id, frp->fr_off, frp->fr_end, off, frmax));
640 frp->fr_end = frmax;
641 } else if (precut > 0) {
642 /* The first part of this payload overlaps with a
643 * fragment that has already been passed.
644 * Need to trim off the first part of the payload.
645 * But to do so easily, we need to create another
646 * mbuf to throw the original header into.
647 */
648
649 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
650 h->ip_id, precut, frp->fr_off, frp->fr_end, off,
651 frmax));
652
653 off += precut;
654 frmax -= precut;
655 /* Update the previous frag to encompass this one */
656 frp->fr_end = frmax;
657
658 if (!drop) {
659 /* XXX Optimization opportunity
660 * This is a very heavy way to trim the payload.
661 * we could do it much faster by diddling mbuf
662 * internals but that would be even less legible
663 * than this mbuf magic. For my next trick,
664 * I'll pull a rabbit out of my laptop.
665 */
666 *m0 = m_dup(m, 0, h->ip_hl << 2, M_NOWAIT);
667 if (*m0 == NULL)
668 goto no_mem;
669 KASSERT((*m0)->m_next == NULL);
670 m_adj(m, precut + (h->ip_hl << 2));
671 m_cat(*m0, m);
672 m = *m0;
673 if (m->m_flags & M_PKTHDR) {
674 int plen = 0;
675 struct mbuf *t;
676 for (t = m; t; t = t->m_next)
677 plen += t->m_len;
678 m->m_pkthdr.len = plen;
679 }
680
681
682 h = mtod(m, struct ip *);
683
684
685 KASSERT((int)m->m_len ==
686 ntohs(h->ip_len) - precut);
687 h->ip_off = htons(ntohs(h->ip_off) +
688 (precut >> 3));
689 h->ip_len = htons(ntohs(h->ip_len) - precut);
690 } else {
691 hosed++;
692 }
693 } else {
694 /* There is a gap between fragments */
695
696 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
697 h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
698 frmax));
699
700 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
701 if (cur == NULL)
702 goto no_mem;
703 pf_ncache++;
704
705 cur->fr_off = off;
706 cur->fr_end = frmax;
707 LIST_INSERT_AFTER(frp, cur, fr_next);
708 }
709 }
710
711 if (fra != NULL) {
712 int aftercut;
713 int merge = 0;
714
715 aftercut = frmax - fra->fr_off;
716 if (aftercut == 0) {
717 /* Adjacent fragments */
718 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
719 h->ip_id, off, frmax, fra->fr_off, fra->fr_end));
720 fra->fr_off = off;
721 merge = 1;
722 } else if (aftercut > 0) {
723 /* Need to chop off the tail of this fragment */
724 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
725 h->ip_id, aftercut, off, frmax, fra->fr_off,
726 fra->fr_end));
727 fra->fr_off = off;
728 frmax -= aftercut;
729
730 merge = 1;
731
732 if (!drop) {
733 m_adj(m, -aftercut);
734 if (m->m_flags & M_PKTHDR) {
735 int plen = 0;
736 struct mbuf *t;
737 for (t = m; t; t = t->m_next)
738 plen += t->m_len;
739 m->m_pkthdr.len = plen;
740 }
741 h = mtod(m, struct ip *);
742 KASSERT((int)m->m_len ==
743 ntohs(h->ip_len) - aftercut);
744 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
745 } else {
746 hosed++;
747 }
748 } else if (frp == NULL) {
749 /* There is a gap between fragments */
750 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
751 h->ip_id, -aftercut, off, frmax, fra->fr_off,
752 fra->fr_end));
753
754 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
755 if (cur == NULL)
756 goto no_mem;
757 pf_ncache++;
758
759 cur->fr_off = off;
760 cur->fr_end = frmax;
761 LIST_INSERT_BEFORE(fra, cur, fr_next);
762 }
763
764
765 /* Need to glue together two separate fragment descriptors */
766 if (merge) {
767 if (cur && fra->fr_off <= cur->fr_end) {
768 /* Need to merge in a previous 'cur' */
769 DPFPRINTF(("fragcache[%d]: adjacent(merge "
770 "%d-%d) %d-%d (%d-%d)\n",
771 h->ip_id, cur->fr_off, cur->fr_end, off,
772 frmax, fra->fr_off, fra->fr_end));
773 fra->fr_off = cur->fr_off;
774 LIST_REMOVE(cur, fr_next);
775 pool_put(&pf_cent_pl, cur);
776 pf_ncache--;
777 cur = NULL;
778
779 } else if (frp && fra->fr_off <= frp->fr_end) {
780 /* Need to merge in a modified 'frp' */
781 KASSERT(cur == NULL);
782 DPFPRINTF(("fragcache[%d]: adjacent(merge "
783 "%d-%d) %d-%d (%d-%d)\n",
784 h->ip_id, frp->fr_off, frp->fr_end, off,
785 frmax, fra->fr_off, fra->fr_end));
786 fra->fr_off = frp->fr_off;
787 LIST_REMOVE(frp, fr_next);
788 pool_put(&pf_cent_pl, frp);
789 pf_ncache--;
790 frp = NULL;
791
792 }
793 }
794 }
795
796 if (hosed) {
797 /*
798 * We must keep tracking the overall fragment even when
799 * we're going to drop it anyway so that we know when to
800 * free the overall descriptor. Thus we drop the frag late.
801 */
802 goto drop_fragment;
803 }
804
805
806 pass:
807 /* Update maximum data size */
808 if ((*frag)->fr_max < frmax)
809 (*frag)->fr_max = frmax;
810
811 /* This is the last segment */
812 if (!mff)
813 (*frag)->fr_flags |= PFFRAG_SEENLAST;
814
815 /* Check if we are completely reassembled */
816 if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
817 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
818 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
819 /* Remove from fragment queue */
820 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
821 (*frag)->fr_max));
822 pf_free_fragment(*frag);
823 *frag = NULL;
824 }
825
826 return (m);
827
828 no_mem:
829 *nomem = 1;
830
831 /* Still need to pay attention to !IP_MF */
832 if (!mff && *frag != NULL)
833 (*frag)->fr_flags |= PFFRAG_SEENLAST;
834
835 m_freem(m);
836 return (NULL);
837
838 drop_fragment:
839
840 /* Still need to pay attention to !IP_MF */
841 if (!mff && *frag != NULL)
842 (*frag)->fr_flags |= PFFRAG_SEENLAST;
843
844 if (drop) {
845 /* This fragment has been deemed bad. Don't reass */
846 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
847 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
848 h->ip_id));
849 (*frag)->fr_flags |= PFFRAG_DROP;
850 }
851
852 m_freem(m);
853 return (NULL);
854 }
855
856 int
857 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
858 struct pf_pdesc *pd)
859 {
860 struct mbuf *m = *m0;
861 struct pf_rule *r;
862 struct pf_frent *frent;
863 struct pf_fragment *frag = NULL;
864 struct ip *h = mtod(m, struct ip *);
865 int mff = (ntohs(h->ip_off) & IP_MF);
866 int hlen = h->ip_hl << 2;
867 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
868 u_int16_t frmax;
869 int ip_len;
870 int ip_off;
871
872 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
873 while (r != NULL) {
874 r->evaluations++;
875 if (pfi_kif_match(r->kif, kif) == r->ifnot)
876 r = r->skip[PF_SKIP_IFP].ptr;
877 else if (r->direction && r->direction != dir)
878 r = r->skip[PF_SKIP_DIR].ptr;
879 else if (r->af && r->af != AF_INET)
880 r = r->skip[PF_SKIP_AF].ptr;
881 else if (r->proto && r->proto != h->ip_p)
882 r = r->skip[PF_SKIP_PROTO].ptr;
883 else if (PF_MISMATCHAW(&r->src.addr,
884 (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
885 r->src.neg, kif))
886 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
887 else if (PF_MISMATCHAW(&r->dst.addr,
888 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
889 r->dst.neg, NULL))
890 r = r->skip[PF_SKIP_DST_ADDR].ptr;
891 else
892 break;
893 }
894
895 if (r == NULL || r->action == PF_NOSCRUB)
896 return (PF_PASS);
897 else {
898 r->packets[dir == PF_OUT]++;
899 r->bytes[dir == PF_OUT] += pd->tot_len;
900 }
901
902 /* Check for illegal packets */
903 if (hlen < (int)sizeof(struct ip))
904 goto drop;
905
906 if (hlen > ntohs(h->ip_len))
907 goto drop;
908
909 /* Clear IP_DF if the rule uses the no-df option */
910 if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
911 u_int16_t off = h->ip_off;
912
913 h->ip_off &= htons(~IP_DF);
914 h->ip_sum = pf_cksum_fixup(h->ip_sum, off, h->ip_off, 0);
915 }
916
917 /* We will need other tests here */
918 if (!fragoff && !mff)
919 goto no_fragment;
920
921 /* We're dealing with a fragment now. Don't allow fragments
922 * with IP_DF to enter the cache. If the flag was cleared by
923 * no-df above, fine. Otherwise drop it.
924 */
925 if (h->ip_off & htons(IP_DF)) {
926 DPFPRINTF(("IP_DF\n"));
927 goto bad;
928 }
929
930 ip_len = ntohs(h->ip_len) - hlen;
931 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
932
933 /* All fragments are 8 byte aligned */
934 if (mff && (ip_len & 0x7)) {
935 DPFPRINTF(("mff and %d\n", ip_len));
936 goto bad;
937 }
938
939 /* Respect maximum length */
940 if (fragoff + ip_len > IP_MAXPACKET) {
941 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
942 goto bad;
943 }
944 frmax = fragoff + ip_len;
945
946 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
947 /* Fully buffer all of the fragments */
948
949 frag = pf_find_fragment(h, &pf_frag_tree);
950
951 /* Check if we saw the last fragment already */
952 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
953 frmax > frag->fr_max)
954 goto bad;
955
956 /* Get an entry for the fragment queue */
957 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
958 if (frent == NULL) {
959 REASON_SET(reason, PFRES_MEMORY);
960 return (PF_DROP);
961 }
962 pf_nfrents++;
963 frent->fr_ip = h;
964 frent->fr_m = m;
965
966 /* Might return a completely reassembled mbuf, or NULL */
967 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, frmax));
968 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
969
970 if (m == NULL)
971 return (PF_DROP);
972
973 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
974 goto drop;
975
976 h = mtod(m, struct ip *);
977 } else {
978 /* non-buffering fragment cache (drops or masks overlaps) */
979 int nomem = 0;
980
981 #ifdef __NetBSD__
982 struct pf_mtag *pf_mtag = pf_find_mtag(m);
983 KASSERT(pf_mtag != NULL);
984
985 if (dir == PF_OUT && pf_mtag->flags & PF_TAG_FRAGCACHE) {
986 #else
987 if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) {
988 #endif /* !__NetBSD__ */
989 /*
990 * Already passed the fragment cache in the
991 * input direction. If we continued, it would
992 * appear to be a dup and would be dropped.
993 */
994 goto fragment_pass;
995 }
996
997 frag = pf_find_fragment(h, &pf_cache_tree);
998
999 /* Check if we saw the last fragment already */
1000 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1001 frmax > frag->fr_max) {
1002 if (r->rule_flag & PFRULE_FRAGDROP)
1003 frag->fr_flags |= PFFRAG_DROP;
1004 goto bad;
1005 }
1006
1007 *m0 = m = pf_fragcache(m0, h, &frag, mff,
1008 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1009 if (m == NULL) {
1010 if (nomem)
1011 goto no_mem;
1012 goto drop;
1013 }
1014
1015 if (dir == PF_IN)
1016 #ifdef __NetBSD__
1017 pf_mtag = pf_find_mtag(m);
1018 KASSERT(pf_mtag != NULL);
1019
1020 pf_mtag->flags |= PF_TAG_FRAGCACHE;
1021 #else
1022 m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE;
1023 #endif /* !__NetBSD__ */
1024
1025 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1026 goto drop;
1027 goto fragment_pass;
1028 }
1029
1030 no_fragment:
1031 /* At this point, only IP_DF is allowed in ip_off */
1032 if (h->ip_off & ~htons(IP_DF)) {
1033 u_int16_t off = h->ip_off;
1034
1035 h->ip_off &= htons(IP_DF);
1036 h->ip_sum = pf_cksum_fixup(h->ip_sum, off, h->ip_off, 0);
1037 }
1038
1039 /* Enforce a minimum ttl, may cause endless packet loops */
1040 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1041 u_int16_t ip_ttl = h->ip_ttl;
1042
1043 h->ip_ttl = r->min_ttl;
1044 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1045 }
1046
1047 if (r->rule_flag & PFRULE_RANDOMID) {
1048 u_int16_t id = h->ip_id;
1049
1050 h->ip_id = ip_randomid(ip_ids, 0);
1051 h->ip_sum = pf_cksum_fixup(h->ip_sum, id, h->ip_id, 0);
1052 }
1053 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1054 pd->flags |= PFDESC_IP_REAS;
1055
1056 return (PF_PASS);
1057
1058 fragment_pass:
1059 /* Enforce a minimum ttl, may cause endless packet loops */
1060 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1061 u_int16_t ip_ttl = h->ip_ttl;
1062
1063 h->ip_ttl = r->min_ttl;
1064 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1065 }
1066 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1067 pd->flags |= PFDESC_IP_REAS;
1068 return (PF_PASS);
1069
1070 no_mem:
1071 REASON_SET(reason, PFRES_MEMORY);
1072 if (r != NULL && r->log)
1073 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1074 return (PF_DROP);
1075
1076 drop:
1077 REASON_SET(reason, PFRES_NORM);
1078 if (r != NULL && r->log)
1079 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1080 return (PF_DROP);
1081
1082 bad:
1083 DPFPRINTF(("dropping bad fragment\n"));
1084
1085 /* Free associated fragments */
1086 if (frag != NULL)
1087 pf_free_fragment(frag);
1088
1089 REASON_SET(reason, PFRES_FRAG);
1090 if (r != NULL && r->log)
1091 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1092
1093 return (PF_DROP);
1094 }
1095
1096 #ifdef INET6
1097 int
1098 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1099 u_short *reason, struct pf_pdesc *pd)
1100 {
1101 struct mbuf *m = *m0;
1102 struct pf_rule *r;
1103 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1104 int off;
1105 struct ip6_ext ext;
1106 struct ip6_opt opt;
1107 struct ip6_opt_jumbo jumbo;
1108 struct ip6_frag frag;
1109 u_int32_t jumbolen = 0, plen;
1110 u_int16_t fragoff = 0;
1111 int optend;
1112 int ooff;
1113 u_int8_t proto;
1114 int terminal;
1115
1116 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1117 while (r != NULL) {
1118 r->evaluations++;
1119 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1120 r = r->skip[PF_SKIP_IFP].ptr;
1121 else if (r->direction && r->direction != dir)
1122 r = r->skip[PF_SKIP_DIR].ptr;
1123 else if (r->af && r->af != AF_INET6)
1124 r = r->skip[PF_SKIP_AF].ptr;
1125 #if 0 /* header chain! */
1126 else if (r->proto && r->proto != h->ip6_nxt)
1127 r = r->skip[PF_SKIP_PROTO].ptr;
1128 #endif
1129 else if (PF_MISMATCHAW(&r->src.addr,
1130 (struct pf_addr *)&h->ip6_src, AF_INET6,
1131 r->src.neg, kif))
1132 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1133 else if (PF_MISMATCHAW(&r->dst.addr,
1134 (struct pf_addr *)&h->ip6_dst, AF_INET6,
1135 r->dst.neg, NULL))
1136 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1137 else
1138 break;
1139 }
1140
1141 if (r == NULL || r->action == PF_NOSCRUB)
1142 return (PF_PASS);
1143 else {
1144 r->packets[dir == PF_OUT]++;
1145 r->bytes[dir == PF_OUT] += pd->tot_len;
1146 }
1147
1148 /* Check for illegal packets */
1149 if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1150 goto drop;
1151
1152 off = sizeof(struct ip6_hdr);
1153 proto = h->ip6_nxt;
1154 terminal = 0;
1155 do {
1156 switch (proto) {
1157 case IPPROTO_FRAGMENT:
1158 goto fragment;
1159 break;
1160 case IPPROTO_AH:
1161 case IPPROTO_ROUTING:
1162 case IPPROTO_DSTOPTS:
1163 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1164 NULL, AF_INET6))
1165 goto shortpkt;
1166 if (proto == IPPROTO_AH)
1167 off += (ext.ip6e_len + 2) * 4;
1168 else
1169 off += (ext.ip6e_len + 1) * 8;
1170 proto = ext.ip6e_nxt;
1171 break;
1172 case IPPROTO_HOPOPTS:
1173 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1174 NULL, AF_INET6))
1175 goto shortpkt;
1176 optend = off + (ext.ip6e_len + 1) * 8;
1177 ooff = off + sizeof(ext);
1178 do {
1179 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1180 sizeof(opt.ip6o_type), NULL, NULL,
1181 AF_INET6))
1182 goto shortpkt;
1183 if (opt.ip6o_type == IP6OPT_PAD1) {
1184 ooff++;
1185 continue;
1186 }
1187 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1188 NULL, NULL, AF_INET6))
1189 goto shortpkt;
1190 if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1191 goto drop;
1192 switch (opt.ip6o_type) {
1193 case IP6OPT_JUMBO:
1194 if (h->ip6_plen != 0)
1195 goto drop;
1196 if (!pf_pull_hdr(m, ooff, &jumbo,
1197 sizeof(jumbo), NULL, NULL,
1198 AF_INET6))
1199 goto shortpkt;
1200 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1201 sizeof(jumbolen));
1202 jumbolen = ntohl(jumbolen);
1203 if (jumbolen <= IPV6_MAXPACKET)
1204 goto drop;
1205 if (sizeof(struct ip6_hdr) + jumbolen !=
1206 m->m_pkthdr.len)
1207 goto drop;
1208 break;
1209 default:
1210 break;
1211 }
1212 ooff += sizeof(opt) + opt.ip6o_len;
1213 } while (ooff < optend);
1214
1215 off = optend;
1216 proto = ext.ip6e_nxt;
1217 break;
1218 default:
1219 terminal = 1;
1220 break;
1221 }
1222 } while (!terminal);
1223
1224 /* jumbo payload option must be present, or plen > 0 */
1225 if (ntohs(h->ip6_plen) == 0)
1226 plen = jumbolen;
1227 else
1228 plen = ntohs(h->ip6_plen);
1229 if (plen == 0)
1230 goto drop;
1231 if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1232 goto shortpkt;
1233
1234 /* Enforce a minimum ttl, may cause endless packet loops */
1235 if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1236 h->ip6_hlim = r->min_ttl;
1237
1238 return (PF_PASS);
1239
1240 fragment:
1241 if (ntohs(h->ip6_plen) == 0 || jumbolen)
1242 goto drop;
1243 plen = ntohs(h->ip6_plen);
1244
1245 if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1246 goto shortpkt;
1247 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1248 if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
1249 goto badfrag;
1250
1251 /* do something about it */
1252 /* remember to set pd->flags |= PFDESC_IP_REAS */
1253 return (PF_PASS);
1254
1255 shortpkt:
1256 REASON_SET(reason, PFRES_SHORT);
1257 if (r != NULL && r->log)
1258 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1259 return (PF_DROP);
1260
1261 drop:
1262 REASON_SET(reason, PFRES_NORM);
1263 if (r != NULL && r->log)
1264 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1265 return (PF_DROP);
1266
1267 badfrag:
1268 REASON_SET(reason, PFRES_FRAG);
1269 if (r != NULL && r->log)
1270 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1271 return (PF_DROP);
1272 }
1273 #endif /* INET6 */
1274
1275 int
1276 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m,
1277 int ipoff, int off, void *h, struct pf_pdesc *pd)
1278 {
1279 struct pf_rule *r, *rm = NULL;
1280 struct tcphdr *th = pd->hdr.tcp;
1281 int rewrite = 0;
1282 u_short reason;
1283 u_int8_t flags;
1284 sa_family_t af = pd->af;
1285
1286 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1287 while (r != NULL) {
1288 r->evaluations++;
1289 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1290 r = r->skip[PF_SKIP_IFP].ptr;
1291 else if (r->direction && r->direction != dir)
1292 r = r->skip[PF_SKIP_DIR].ptr;
1293 else if (r->af && r->af != af)
1294 r = r->skip[PF_SKIP_AF].ptr;
1295 else if (r->proto && r->proto != pd->proto)
1296 r = r->skip[PF_SKIP_PROTO].ptr;
1297 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
1298 r->src.neg, kif))
1299 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1300 else if (r->src.port_op && !pf_match_port(r->src.port_op,
1301 r->src.port[0], r->src.port[1], th->th_sport))
1302 r = r->skip[PF_SKIP_SRC_PORT].ptr;
1303 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
1304 r->dst.neg, NULL))
1305 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1306 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1307 r->dst.port[0], r->dst.port[1], th->th_dport))
1308 r = r->skip[PF_SKIP_DST_PORT].ptr;
1309 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1310 pf_osfp_fingerprint(pd, m, off, th),
1311 r->os_fingerprint))
1312 r = TAILQ_NEXT(r, entries);
1313 else {
1314 rm = r;
1315 break;
1316 }
1317 }
1318
1319 if (rm == NULL || rm->action == PF_NOSCRUB)
1320 return (PF_PASS);
1321 else {
1322 r->packets[dir == PF_OUT]++;
1323 r->bytes[dir == PF_OUT] += pd->tot_len;
1324 }
1325
1326 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1327 pd->flags |= PFDESC_TCP_NORM;
1328
1329 flags = th->th_flags;
1330 if (flags & TH_SYN) {
1331 /* Illegal packet */
1332 if (flags & TH_RST)
1333 goto tcp_drop;
1334
1335 if (flags & TH_FIN)
1336 flags &= ~TH_FIN;
1337 } else {
1338 /* Illegal packet */
1339 if (!(flags & (TH_ACK|TH_RST)))
1340 goto tcp_drop;
1341 }
1342
1343 if (!(flags & TH_ACK)) {
1344 /* These flags are only valid if ACK is set */
1345 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1346 goto tcp_drop;
1347 }
1348
1349 /* Check for illegal header length */
1350 if (th->th_off < (sizeof(struct tcphdr) >> 2))
1351 goto tcp_drop;
1352
1353 /* If flags changed, or reserved data set, then adjust */
1354 if (flags != th->th_flags || th->th_x2 != 0) {
1355 u_int16_t ov, nv;
1356
1357 ov = *(u_int16_t *)(&th->th_ack + 1);
1358 th->th_flags = flags;
1359 th->th_x2 = 0;
1360 nv = *(u_int16_t *)(&th->th_ack + 1);
1361
1362 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
1363 rewrite = 1;
1364 }
1365
1366 /* Remove urgent pointer, if TH_URG is not set */
1367 if (!(flags & TH_URG) && th->th_urp) {
1368 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
1369 th->th_urp = 0;
1370 rewrite = 1;
1371 }
1372
1373 /* Process options */
1374 if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
1375 rewrite = 1;
1376
1377 /* copy back packet headers if we sanitized */
1378 if (rewrite)
1379 m_copyback(m, off, sizeof(*th), th);
1380
1381 return (PF_PASS);
1382
1383 tcp_drop:
1384 REASON_SET(&reason, PFRES_NORM);
1385 if (rm != NULL && r->log)
1386 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
1387 return (PF_DROP);
1388 }
1389
1390 int
1391 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1392 struct tcphdr *th, struct pf_state_peer *src,
1393 struct pf_state_peer *dst)
1394 {
1395 u_int32_t tsval, tsecr;
1396 u_int8_t hdr[60];
1397 u_int8_t *opt;
1398
1399 KASSERT(src->scrub == NULL);
1400
1401 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
1402 if (src->scrub == NULL)
1403 return (1);
1404 bzero(src->scrub, sizeof(*src->scrub));
1405
1406 switch (pd->af) {
1407 #ifdef INET
1408 case AF_INET: {
1409 struct ip *h = mtod(m, struct ip *);
1410 src->scrub->pfss_ttl = h->ip_ttl;
1411 break;
1412 }
1413 #endif /* INET */
1414 #ifdef INET6
1415 case AF_INET6: {
1416 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1417 src->scrub->pfss_ttl = h->ip6_hlim;
1418 break;
1419 }
1420 #endif /* INET6 */
1421 }
1422
1423
1424 /*
1425 * All normalizations below are only begun if we see the start of
1426 * the connections. They must all set an enabled bit in pfss_flags
1427 */
1428 if ((th->th_flags & TH_SYN) == 0)
1429 return (0);
1430
1431
1432 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1433 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1434 /* Diddle with TCP options */
1435 int hlen;
1436 opt = hdr + sizeof(struct tcphdr);
1437 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1438 while (hlen >= TCPOLEN_TIMESTAMP) {
1439 switch (*opt) {
1440 case TCPOPT_EOL: /* FALLTHROUGH */
1441 case TCPOPT_NOP:
1442 opt++;
1443 hlen--;
1444 break;
1445 case TCPOPT_TIMESTAMP:
1446 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1447 src->scrub->pfss_flags |=
1448 PFSS_TIMESTAMP;
1449 src->scrub->pfss_ts_mod =
1450 htonl(cprng_fast32());
1451
1452 /* note PFSS_PAWS not set yet */
1453 memcpy(&tsval, &opt[2],
1454 sizeof(u_int32_t));
1455 memcpy(&tsecr, &opt[6],
1456 sizeof(u_int32_t));
1457 src->scrub->pfss_tsval0 = ntohl(tsval);
1458 src->scrub->pfss_tsval = ntohl(tsval);
1459 src->scrub->pfss_tsecr = ntohl(tsecr);
1460 getmicrouptime(&src->scrub->pfss_last);
1461 }
1462 /* FALLTHROUGH */
1463 default:
1464 hlen -= MAX(opt[1], 2);
1465 opt += MAX(opt[1], 2);
1466 break;
1467 }
1468 }
1469 }
1470
1471 return (0);
1472 }
1473
1474 void
1475 pf_normalize_tcp_cleanup(struct pf_state *state)
1476 {
1477 if (state->src.scrub)
1478 pool_put(&pf_state_scrub_pl, state->src.scrub);
1479 if (state->dst.scrub)
1480 pool_put(&pf_state_scrub_pl, state->dst.scrub);
1481
1482 /* Someday... flush the TCP segment reassembly descriptors. */
1483 }
1484
1485 int
1486 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1487 u_short *reason, struct tcphdr *th, struct pf_state *state,
1488 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1489 {
1490 struct timeval uptime;
1491 u_int32_t tsval = 0, tsecr = 0;
1492 u_int tsval_from_last;
1493 u_int8_t hdr[60];
1494 u_int8_t *opt;
1495 int copyback = 0;
1496 int got_ts = 0;
1497
1498 KASSERT(src->scrub || dst->scrub);
1499
1500 /*
1501 * Enforce the minimum TTL seen for this connection. Negate a common
1502 * technique to evade an intrusion detection system and confuse
1503 * firewall state code.
1504 */
1505 switch (pd->af) {
1506 #ifdef INET
1507 case AF_INET: {
1508 if (src->scrub) {
1509 struct ip *h = mtod(m, struct ip *);
1510 if (h->ip_ttl > src->scrub->pfss_ttl)
1511 src->scrub->pfss_ttl = h->ip_ttl;
1512 h->ip_ttl = src->scrub->pfss_ttl;
1513 }
1514 break;
1515 }
1516 #endif /* INET */
1517 #ifdef INET6
1518 case AF_INET6: {
1519 if (src->scrub) {
1520 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1521 if (h->ip6_hlim > src->scrub->pfss_ttl)
1522 src->scrub->pfss_ttl = h->ip6_hlim;
1523 h->ip6_hlim = src->scrub->pfss_ttl;
1524 }
1525 break;
1526 }
1527 #endif /* INET6 */
1528 }
1529
1530 if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1531 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1532 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1533 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1534 /* Diddle with TCP options */
1535 int hlen;
1536 opt = hdr + sizeof(struct tcphdr);
1537 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1538 while (hlen >= TCPOLEN_TIMESTAMP) {
1539 switch (*opt) {
1540 case TCPOPT_EOL: /* FALLTHROUGH */
1541 case TCPOPT_NOP:
1542 opt++;
1543 hlen--;
1544 break;
1545 case TCPOPT_TIMESTAMP:
1546 /* Modulate the timestamps. Can be used for
1547 * NAT detection, OS uptime determination or
1548 * reboot detection.
1549 */
1550
1551 if (got_ts) {
1552 /* Huh? Multiple timestamps!? */
1553 if (pf_status.debug >= PF_DEBUG_MISC) {
1554 DPFPRINTF(("multiple TS??"));
1555 pf_print_state(state);
1556 printf("\n");
1557 }
1558 REASON_SET(reason, PFRES_TS);
1559 return (PF_DROP);
1560 }
1561 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1562 memcpy(&tsval, &opt[2],
1563 sizeof(u_int32_t));
1564 if (tsval && src->scrub &&
1565 (src->scrub->pfss_flags &
1566 PFSS_TIMESTAMP)) {
1567 tsval = ntohl(tsval);
1568 pf_change_a(&opt[2],
1569 &th->th_sum,
1570 htonl(tsval +
1571 src->scrub->pfss_ts_mod),
1572 0);
1573 copyback = 1;
1574 }
1575
1576 /* Modulate TS reply iff valid (!0) */
1577 memcpy(&tsecr, &opt[6],
1578 sizeof(u_int32_t));
1579 if (tsecr && dst->scrub &&
1580 (dst->scrub->pfss_flags &
1581 PFSS_TIMESTAMP)) {
1582 tsecr = ntohl(tsecr)
1583 - dst->scrub->pfss_ts_mod;
1584 pf_change_a(&opt[6],
1585 &th->th_sum, htonl(tsecr),
1586 0);
1587 copyback = 1;
1588 }
1589 got_ts = 1;
1590 }
1591 /* FALLTHROUGH */
1592 default:
1593 hlen -= MAX(opt[1], 2);
1594 opt += MAX(opt[1], 2);
1595 break;
1596 }
1597 }
1598 if (copyback) {
1599 /* Copyback the options, caller copys back header */
1600 *writeback = 1;
1601 m_copyback(m, off + sizeof(struct tcphdr),
1602 (th->th_off << 2) - sizeof(struct tcphdr), hdr +
1603 sizeof(struct tcphdr));
1604 }
1605 }
1606
1607
1608 /*
1609 * Must invalidate PAWS checks on connections idle for too long.
1610 * The fastest allowed timestamp clock is 1ms. That turns out to
1611 * be about 24 days before it wraps. XXX Right now our lowerbound
1612 * TS echo check only works for the first 12 days of a connection
1613 * when the TS has exhausted half its 32bit space
1614 */
1615 #define TS_MAX_IDLE (24*24*60*60)
1616 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
1617
1618 getmicrouptime(&uptime);
1619 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1620 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1621 time_second - state->creation > TS_MAX_CONN)) {
1622 if (pf_status.debug >= PF_DEBUG_MISC) {
1623 DPFPRINTF(("src idled out of PAWS\n"));
1624 pf_print_state(state);
1625 printf("\n");
1626 }
1627 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1628 | PFSS_PAWS_IDLED;
1629 }
1630 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1631 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1632 if (pf_status.debug >= PF_DEBUG_MISC) {
1633 DPFPRINTF(("dst idled out of PAWS\n"));
1634 pf_print_state(state);
1635 printf("\n");
1636 }
1637 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1638 | PFSS_PAWS_IDLED;
1639 }
1640
1641 if (got_ts && src->scrub && dst->scrub &&
1642 (src->scrub->pfss_flags & PFSS_PAWS) &&
1643 (dst->scrub->pfss_flags & PFSS_PAWS)) {
1644 /* Validate that the timestamps are "in-window".
1645 * RFC1323 describes TCP Timestamp options that allow
1646 * measurement of RTT (round trip time) and PAWS
1647 * (protection against wrapped sequence numbers). PAWS
1648 * gives us a set of rules for rejecting packets on
1649 * long fat pipes (packets that were somehow delayed
1650 * in transit longer than the time it took to send the
1651 * full TCP sequence space of 4Gb). We can use these
1652 * rules and infer a few others that will let us treat
1653 * the 32bit timestamp and the 32bit echoed timestamp
1654 * as sequence numbers to prevent a blind attacker from
1655 * inserting packets into a connection.
1656 *
1657 * RFC1323 tells us:
1658 * - The timestamp on this packet must be greater than
1659 * or equal to the last value echoed by the other
1660 * endpoint. The RFC says those will be discarded
1661 * since it is a dup that has already been acked.
1662 * This gives us a lowerbound on the timestamp.
1663 * timestamp >= other last echoed timestamp
1664 * - The timestamp will be less than or equal to
1665 * the last timestamp plus the time between the
1666 * last packet and now. The RFC defines the max
1667 * clock rate as 1ms. We will allow clocks to be
1668 * up to 10% fast and will allow a total difference
1669 * or 30 seconds due to a route change. And this
1670 * gives us an upperbound on the timestamp.
1671 * timestamp <= last timestamp + max ticks
1672 * We have to be careful here. Windows will send an
1673 * initial timestamp of zero and then initialize it
1674 * to a random value after the 3whs; presumably to
1675 * avoid a DoS by having to call an expensive RNG
1676 * during a SYN flood. Proof MS has at least one
1677 * good security geek.
1678 *
1679 * - The TCP timestamp option must also echo the other
1680 * endpoints timestamp. The timestamp echoed is the
1681 * one carried on the earliest unacknowledged segment
1682 * on the left edge of the sequence window. The RFC
1683 * states that the host will reject any echoed
1684 * timestamps that were larger than any ever sent.
1685 * This gives us an upperbound on the TS echo.
1686 * tescr <= largest_tsval
1687 * - The lowerbound on the TS echo is a little more
1688 * tricky to determine. The other endpoint's echoed
1689 * values will not decrease. But there may be
1690 * network conditions that re-order packets and
1691 * cause our view of them to decrease. For now the
1692 * only lowerbound we can safely determine is that
1693 * the TS echo will never be less than the original
1694 * TS. XXX There is probably a better lowerbound.
1695 * Remove TS_MAX_CONN with better lowerbound check.
1696 * tescr >= other original TS
1697 *
1698 * It is also important to note that the fastest
1699 * timestamp clock of 1ms will wrap its 32bit space in
1700 * 24 days. So we just disable TS checking after 24
1701 * days of idle time. We actually must use a 12d
1702 * connection limit until we can come up with a better
1703 * lowerbound to the TS echo check.
1704 */
1705 struct timeval delta_ts;
1706 int ts_fudge;
1707
1708
1709 /*
1710 * PFTM_TS_DIFF is how many seconds of leeway to allow
1711 * a host's timestamp. This can happen if the previous
1712 * packet got delayed in transit for much longer than
1713 * this packet.
1714 */
1715 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
1716 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
1717
1718
1719 /* Calculate max ticks since the last timestamp */
1720 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1 kHz + 10% skew */
1721 #define TS_MICROSECS 1000000 /* microseconds per second */
1722 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
1723 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
1724 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
1725
1726
1727 if ((src->state >= TCPS_ESTABLISHED &&
1728 dst->state >= TCPS_ESTABLISHED) &&
1729 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
1730 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
1731 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
1732 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
1733 /* Bad RFC1323 implementation or an insertion attack.
1734 *
1735 * - Solaris 2.6 and 2.7 are known to send another ACK
1736 * after the FIN,FIN|ACK,ACK closing that carries
1737 * an old timestamp.
1738 */
1739
1740 DPFPRINTF(("Timestamp failed %c%c%c%c\n",
1741 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
1742 SEQ_GT(tsval, src->scrub->pfss_tsval +
1743 tsval_from_last) ? '1' : ' ',
1744 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
1745 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
1746 DPFPRINTF((" tsval: %" PRIu32 " tsecr: %" PRIu32
1747 " +ticks: %" PRIu32 " idle: %"PRIx64"s %ums\n",
1748 tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
1749 delta_ts.tv_usec / 1000U));
1750 DPFPRINTF((" src->tsval: %" PRIu32 " tsecr: %" PRIu32
1751 "\n",
1752 src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
1753 DPFPRINTF((" dst->tsval: %" PRIu32 " tsecr: %" PRIu32
1754 " tsval0: %" PRIu32 "\n",
1755 dst->scrub->pfss_tsval,
1756 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
1757 if (pf_status.debug >= PF_DEBUG_MISC) {
1758 pf_print_state(state);
1759 pf_print_flags(th->th_flags);
1760 printf("\n");
1761 }
1762 REASON_SET(reason, PFRES_TS);
1763 return (PF_DROP);
1764 }
1765
1766 /* XXX I'd really like to require tsecr but it's optional */
1767
1768 } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
1769 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
1770 || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
1771 src->scrub && dst->scrub &&
1772 (src->scrub->pfss_flags & PFSS_PAWS) &&
1773 (dst->scrub->pfss_flags & PFSS_PAWS)) {
1774 /* Didn't send a timestamp. Timestamps aren't really useful
1775 * when:
1776 * - connection opening or closing (often not even sent).
1777 * but we must not let an attacker to put a FIN on a
1778 * data packet to sneak it through our ESTABLISHED check.
1779 * - on a TCP reset. RFC suggests not even looking at TS.
1780 * - on an empty ACK. The TS will not be echoed so it will
1781 * probably not help keep the RTT calculation in sync and
1782 * there isn't as much danger when the sequence numbers
1783 * got wrapped. So some stacks don't include TS on empty
1784 * ACKs :-(
1785 *
1786 * To minimize the disruption to mostly RFC1323 conformant
1787 * stacks, we will only require timestamps on data packets.
1788 *
1789 * And what do ya know, we cannot require timestamps on data
1790 * packets. There appear to be devices that do legitimate
1791 * TCP connection hijacking. There are HTTP devices that allow
1792 * a 3whs (with timestamps) and then buffer the HTTP request.
1793 * If the intermediate device has the HTTP response cache, it
1794 * will spoof the response but not bother timestamping its
1795 * packets. So we can look for the presence of a timestamp in
1796 * the first data packet and if there, require it in all future
1797 * packets.
1798 */
1799
1800 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
1801 /*
1802 * Hey! Someone tried to sneak a packet in. Or the
1803 * stack changed its RFC1323 behavior?!?!
1804 */
1805 if (pf_status.debug >= PF_DEBUG_MISC) {
1806 DPFPRINTF(("Did not receive expected RFC1323 "
1807 "timestamp\n"));
1808 pf_print_state(state);
1809 pf_print_flags(th->th_flags);
1810 printf("\n");
1811 }
1812 REASON_SET(reason, PFRES_TS);
1813 return (PF_DROP);
1814 }
1815 }
1816
1817
1818 /*
1819 * We will note if a host sends his data packets with or without
1820 * timestamps. And require all data packets to contain a timestamp
1821 * if the first does. PAWS implicitly requires that all data packets be
1822 * timestamped. But I think there are middle-man devices that hijack
1823 * TCP streams immediately after the 3whs and don't timestamp their
1824 * packets (seen in a WWW accelerator or cache).
1825 */
1826 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
1827 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
1828 if (got_ts)
1829 src->scrub->pfss_flags |= PFSS_DATA_TS;
1830 else {
1831 src->scrub->pfss_flags |= PFSS_DATA_NOTS;
1832 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
1833 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
1834 /* Don't warn if other host rejected RFC1323 */
1835 DPFPRINTF(("Broken RFC1323 stack did not "
1836 "timestamp data packet. Disabled PAWS "
1837 "security.\n"));
1838 pf_print_state(state);
1839 pf_print_flags(th->th_flags);
1840 printf("\n");
1841 }
1842 }
1843 }
1844
1845
1846 /*
1847 * Update PAWS values
1848 */
1849 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
1850 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
1851 getmicrouptime(&src->scrub->pfss_last);
1852 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
1853 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1854 src->scrub->pfss_tsval = tsval;
1855
1856 if (tsecr) {
1857 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
1858 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1859 src->scrub->pfss_tsecr = tsecr;
1860
1861 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
1862 (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
1863 src->scrub->pfss_tsval0 == 0)) {
1864 /* tsval0 MUST be the lowest timestamp */
1865 src->scrub->pfss_tsval0 = tsval;
1866 }
1867
1868 /* Only fully initialized after a TS gets echoed */
1869 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
1870 src->scrub->pfss_flags |= PFSS_PAWS;
1871 }
1872 }
1873
1874 /* I have a dream.... TCP segment reassembly.... */
1875 return (0);
1876 }
1877
1878 int
1879 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
1880 int off)
1881 {
1882 u_int16_t *mss;
1883 int thoff;
1884 int opt, cnt, optlen = 0;
1885 int rewrite = 0;
1886 u_char *optp;
1887
1888 thoff = th->th_off << 2;
1889 cnt = thoff - sizeof(struct tcphdr);
1890 optp = mtod(m, u_char *) + off + sizeof(struct tcphdr);
1891
1892 for (; cnt > 0; cnt -= optlen, optp += optlen) {
1893 opt = optp[0];
1894 if (opt == TCPOPT_EOL)
1895 break;
1896 if (opt == TCPOPT_NOP)
1897 optlen = 1;
1898 else {
1899 if (cnt < 2)
1900 break;
1901 optlen = optp[1];
1902 if (optlen < 2 || optlen > cnt)
1903 break;
1904 }
1905 switch (opt) {
1906 case TCPOPT_MAXSEG:
1907 mss = (u_int16_t *)(optp + 2);
1908 if ((ntohs(*mss)) > r->max_mss) {
1909 th->th_sum = pf_cksum_fixup(th->th_sum,
1910 *mss, htons(r->max_mss), 0);
1911 *mss = htons(r->max_mss);
1912 rewrite = 1;
1913 }
1914 break;
1915 default:
1916 break;
1917 }
1918 }
1919
1920 return (rewrite);
1921 }
1922