pf_norm.c revision 1.26.8.1 1 /* $NetBSD: pf_norm.c,v 1.26.8.1 2014/08/20 00:03:52 tls Exp $ */
2 /* $OpenBSD: pf_norm.c,v 1.109 2007/05/28 17:16:39 henning Exp $ */
3
4 /*
5 * Copyright 2001 Niels Provos <provos (at) citi.umich.edu>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: pf_norm.c,v 1.26.8.1 2014/08/20 00:03:52 tls Exp $");
31
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35
36 #include "pflog.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/mbuf.h>
41 #include <sys/filio.h>
42 #include <sys/fcntl.h>
43 #include <sys/socket.h>
44 #include <sys/kernel.h>
45 #include <sys/time.h>
46 #include <sys/pool.h>
47
48 #ifdef __NetBSD__
49 #include <sys/rnd.h>
50 #include <sys/cprng.h>
51 #else
52 #include <dev/rndvar.h>
53 #endif /* !__NetBSD__ */
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/bpf.h>
57 #include <net/route.h>
58 #include <net/if_pflog.h>
59
60 #include <netinet/in.h>
61 #include <netinet/in_var.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/tcp.h>
66 #include <netinet/tcp_seq.h>
67 #include <netinet/udp.h>
68 #include <netinet/ip_icmp.h>
69
70 #ifdef INET6
71 #include <netinet/ip6.h>
72 #endif /* INET6 */
73
74 #include <net/pfvar.h>
75
76 struct pf_frent {
77 LIST_ENTRY(pf_frent) fr_next;
78 struct ip *fr_ip;
79 struct mbuf *fr_m;
80 };
81
82 struct pf_frcache {
83 LIST_ENTRY(pf_frcache) fr_next;
84 uint16_t fr_off;
85 uint16_t fr_end;
86 };
87
88 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
89 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
90 #define PFFRAG_DROP 0x0004 /* Drop all fragments */
91 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
92
93 struct pf_fragment {
94 RB_ENTRY(pf_fragment) fr_entry;
95 TAILQ_ENTRY(pf_fragment) frag_next;
96 struct in_addr fr_src;
97 struct in_addr fr_dst;
98 u_int8_t fr_p; /* protocol of this fragment */
99 u_int8_t fr_flags; /* status flags */
100 u_int16_t fr_id; /* fragment id for reassemble */
101 u_int16_t fr_max; /* fragment data max */
102 u_int32_t fr_timeout;
103 #define fr_queue fr_u.fru_queue
104 #define fr_cache fr_u.fru_cache
105 union {
106 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */
107 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */
108 } fr_u;
109 };
110
111 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
112 TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
113
114 static __inline int pf_frag_compare(struct pf_fragment *,
115 struct pf_fragment *);
116 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree;
117 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
118 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
119
120 /* Private prototypes */
121 void pf_ip2key(struct pf_fragment *, struct ip *);
122 void pf_remove_fragment(struct pf_fragment *);
123 void pf_flush_fragments(void);
124 void pf_free_fragment(struct pf_fragment *);
125 struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *);
126 struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
127 struct pf_frent *, int);
128 struct mbuf *pf_fragcache(struct mbuf **, struct ip*,
129 struct pf_fragment **, int, int, int *);
130 int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
131 struct tcphdr *, int);
132
133 #define DPFPRINTF(x) do { \
134 if (pf_status.debug >= PF_DEBUG_MISC) { \
135 printf("%s: ", __func__); \
136 printf x ; \
137 } \
138 } while(0)
139
140 /* Globals */
141 struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
142 struct pool pf_state_scrub_pl;
143 int pf_nfrents, pf_ncache;
144
145 void
146 pf_normalize_init(void)
147 {
148 #ifdef __NetBSD__
149 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
150 NULL, IPL_SOFTNET);
151 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
152 NULL, IPL_SOFTNET);
153 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
154 "pffrcache", NULL, IPL_SOFTNET);
155 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
156 NULL, IPL_SOFTNET);
157 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
158 "pfstscr", NULL, IPL_SOFTNET);
159 #else
160 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
161 NULL);
162 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
163 NULL);
164 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
165 "pffrcache", NULL);
166 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
167 NULL);
168 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
169 "pfstscr", NULL);
170 #endif /* !__NetBSD__ */
171
172 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
173 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
174 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
175 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
176
177 TAILQ_INIT(&pf_fragqueue);
178 TAILQ_INIT(&pf_cachequeue);
179 }
180
181 #ifdef _MODULE
182 void
183 pf_normalize_destroy(void)
184 {
185 pool_destroy(&pf_state_scrub_pl);
186 pool_destroy(&pf_cent_pl);
187 pool_destroy(&pf_cache_pl);
188 pool_destroy(&pf_frag_pl);
189 pool_destroy(&pf_frent_pl);
190 }
191 #endif /* _MODULE */
192
193 static __inline int
194 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
195 {
196 int diff;
197
198 if ((diff = a->fr_id - b->fr_id))
199 return (diff);
200 else if ((diff = a->fr_p - b->fr_p))
201 return (diff);
202 else if (a->fr_src.s_addr < b->fr_src.s_addr)
203 return (-1);
204 else if (a->fr_src.s_addr > b->fr_src.s_addr)
205 return (1);
206 else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
207 return (-1);
208 else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
209 return (1);
210 return (0);
211 }
212
213 void
214 pf_purge_expired_fragments(void)
215 {
216 struct pf_fragment *frag;
217 u_int32_t expire = time_second -
218 pf_default_rule.timeout[PFTM_FRAG];
219
220 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
221 KASSERT(BUFFER_FRAGMENTS(frag));
222 if (frag->fr_timeout > expire)
223 break;
224
225 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
226 pf_free_fragment(frag);
227 }
228
229 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
230 KASSERT(!BUFFER_FRAGMENTS(frag));
231 if (frag->fr_timeout > expire)
232 break;
233
234 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
235 pf_free_fragment(frag);
236 KASSERT(TAILQ_EMPTY(&pf_cachequeue) ||
237 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
238 }
239 }
240
241 /*
242 * Try to flush old fragments to make space for new ones
243 */
244
245 void
246 pf_flush_fragments(void)
247 {
248 struct pf_fragment *frag;
249 int goal;
250
251 goal = pf_nfrents * 9 / 10;
252 DPFPRINTF(("trying to free > %d frents\n",
253 pf_nfrents - goal));
254 while (goal < pf_nfrents) {
255 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
256 if (frag == NULL)
257 break;
258 pf_free_fragment(frag);
259 }
260
261
262 goal = pf_ncache * 9 / 10;
263 DPFPRINTF(("trying to free > %d cache entries\n",
264 pf_ncache - goal));
265 while (goal < pf_ncache) {
266 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
267 if (frag == NULL)
268 break;
269 pf_free_fragment(frag);
270 }
271 }
272
273 /* Frees the fragments and all associated entries */
274
275 void
276 pf_free_fragment(struct pf_fragment *frag)
277 {
278 struct pf_frent *frent;
279 struct pf_frcache *frcache;
280
281 /* Free all fragments */
282 if (BUFFER_FRAGMENTS(frag)) {
283 for (frent = LIST_FIRST(&frag->fr_queue); frent;
284 frent = LIST_FIRST(&frag->fr_queue)) {
285 LIST_REMOVE(frent, fr_next);
286
287 m_freem(frent->fr_m);
288 pool_put(&pf_frent_pl, frent);
289 pf_nfrents--;
290 }
291 } else {
292 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
293 frcache = LIST_FIRST(&frag->fr_cache)) {
294 LIST_REMOVE(frcache, fr_next);
295
296 KASSERT(LIST_EMPTY(&frag->fr_cache) ||
297 LIST_FIRST(&frag->fr_cache)->fr_off >
298 frcache->fr_end);
299
300 pool_put(&pf_cent_pl, frcache);
301 pf_ncache--;
302 }
303 }
304
305 pf_remove_fragment(frag);
306 }
307
308 void
309 pf_ip2key(struct pf_fragment *key, struct ip *ip)
310 {
311 key->fr_p = ip->ip_p;
312 key->fr_id = ip->ip_id;
313 key->fr_src.s_addr = ip->ip_src.s_addr;
314 key->fr_dst.s_addr = ip->ip_dst.s_addr;
315 }
316
317 struct pf_fragment *
318 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
319 {
320 struct pf_fragment key;
321 struct pf_fragment *frag;
322
323 pf_ip2key(&key, ip);
324
325 frag = RB_FIND(pf_frag_tree, tree, &key);
326 if (frag != NULL) {
327 /* XXX Are we sure we want to update the timeout? */
328 frag->fr_timeout = time_second;
329 if (BUFFER_FRAGMENTS(frag)) {
330 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
331 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
332 } else {
333 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
334 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
335 }
336 }
337
338 return (frag);
339 }
340
341 /* Removes a fragment from the fragment queue and frees the fragment */
342
343 void
344 pf_remove_fragment(struct pf_fragment *frag)
345 {
346 if (BUFFER_FRAGMENTS(frag)) {
347 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
348 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
349 pool_put(&pf_frag_pl, frag);
350 } else {
351 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
352 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
353 pool_put(&pf_cache_pl, frag);
354 }
355 }
356
357 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
358 struct mbuf *
359 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
360 struct pf_frent *frent, int mff)
361 {
362 struct mbuf *m = *m0, *m2;
363 struct pf_frent *frea, *next;
364 struct pf_frent *frep = NULL;
365 struct ip *ip = frent->fr_ip;
366 int hlen = ip->ip_hl << 2;
367 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
368 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
369 u_int16_t frmax = ip_len + off;
370
371 KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag));
372
373 /* Strip off ip header */
374 m->m_data += hlen;
375 m->m_len -= hlen;
376
377 /* Create a new reassembly queue for this packet */
378 if (*frag == NULL) {
379 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
380 if (*frag == NULL) {
381 pf_flush_fragments();
382 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
383 if (*frag == NULL)
384 goto drop_fragment;
385 }
386
387 (*frag)->fr_flags = 0;
388 (*frag)->fr_max = 0;
389 (*frag)->fr_src = frent->fr_ip->ip_src;
390 (*frag)->fr_dst = frent->fr_ip->ip_dst;
391 (*frag)->fr_p = frent->fr_ip->ip_p;
392 (*frag)->fr_id = frent->fr_ip->ip_id;
393 (*frag)->fr_timeout = time_second;
394 LIST_INIT(&(*frag)->fr_queue);
395
396 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
397 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
398
399 /* We do not have a previous fragment */
400 frep = NULL;
401 goto insert;
402 }
403
404 /*
405 * Find a fragment after the current one:
406 * - off contains the real shifted offset.
407 */
408 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
409 if (FR_IP_OFF(frea) > off)
410 break;
411 frep = frea;
412 }
413
414 KASSERT(frep != NULL || frea != NULL);
415
416 if (frep != NULL &&
417 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
418 4 > off)
419 {
420 u_int16_t precut;
421
422 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
423 frep->fr_ip->ip_hl * 4 - off;
424 if (precut >= ip_len)
425 goto drop_fragment;
426 m_adj(frent->fr_m, precut);
427 DPFPRINTF(("overlap -%d\n", precut));
428 /* Enforce 8 byte boundaries */
429 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
430 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
431 ip_len -= precut;
432 ip->ip_len = htons(ip_len);
433 }
434
435 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
436 frea = next)
437 {
438 u_int16_t aftercut;
439
440 aftercut = ip_len + off - FR_IP_OFF(frea);
441 DPFPRINTF(("adjust overlap %d\n", aftercut));
442 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
443 * 4)
444 {
445 frea->fr_ip->ip_len =
446 htons(ntohs(frea->fr_ip->ip_len) - aftercut);
447 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
448 (aftercut >> 3));
449 m_adj(frea->fr_m, aftercut);
450 break;
451 }
452
453 /* This fragment is completely overlapped, lose it */
454 next = LIST_NEXT(frea, fr_next);
455 m_freem(frea->fr_m);
456 LIST_REMOVE(frea, fr_next);
457 pool_put(&pf_frent_pl, frea);
458 pf_nfrents--;
459 }
460
461 insert:
462 /* Update maximum data size */
463 if ((*frag)->fr_max < frmax)
464 (*frag)->fr_max = frmax;
465 /* This is the last segment */
466 if (!mff)
467 (*frag)->fr_flags |= PFFRAG_SEENLAST;
468
469 if (frep == NULL)
470 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
471 else
472 LIST_INSERT_AFTER(frep, frent, fr_next);
473
474 /* Check if we are completely reassembled */
475 if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
476 return (NULL);
477
478 /* Check if we have all the data */
479 off = 0;
480 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
481 next = LIST_NEXT(frep, fr_next);
482
483 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
484 if (off < (*frag)->fr_max &&
485 (next == NULL || FR_IP_OFF(next) != off))
486 {
487 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
488 off, next == NULL ? -1 : FR_IP_OFF(next),
489 (*frag)->fr_max));
490 return (NULL);
491 }
492 }
493 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
494 if (off < (*frag)->fr_max)
495 return (NULL);
496
497 /* We have all the data */
498 frent = LIST_FIRST(&(*frag)->fr_queue);
499 KASSERT(frent != NULL);
500 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
501 DPFPRINTF(("drop: too big: %d\n", off));
502 pf_free_fragment(*frag);
503 *frag = NULL;
504 return (NULL);
505 }
506 next = LIST_NEXT(frent, fr_next);
507
508 /* Magic from ip_input */
509 ip = frent->fr_ip;
510 m = frent->fr_m;
511 m2 = m->m_next;
512 m->m_next = NULL;
513 m_cat(m, m2);
514 pool_put(&pf_frent_pl, frent);
515 pf_nfrents--;
516 for (frent = next; frent != NULL; frent = next) {
517 next = LIST_NEXT(frent, fr_next);
518
519 m2 = frent->fr_m;
520 pool_put(&pf_frent_pl, frent);
521 pf_nfrents--;
522 m_cat(m, m2);
523 }
524
525 ip->ip_src = (*frag)->fr_src;
526 ip->ip_dst = (*frag)->fr_dst;
527
528 /* Remove from fragment queue */
529 pf_remove_fragment(*frag);
530 *frag = NULL;
531
532 hlen = ip->ip_hl << 2;
533 ip->ip_len = htons(off + hlen);
534 m->m_len += hlen;
535 m->m_data -= hlen;
536
537 /* some debugging cruft by sklower, below, will go away soon */
538 /* XXX this should be done elsewhere */
539 if (m->m_flags & M_PKTHDR) {
540 int plen = 0;
541 for (m2 = m; m2; m2 = m2->m_next)
542 plen += m2->m_len;
543 m->m_pkthdr.len = plen;
544 #ifdef __NetBSD__
545 m->m_pkthdr.csum_flags = 0;
546 #endif /* __NetBSD__ */
547 }
548
549 DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
550 return (m);
551
552 drop_fragment:
553 /* Oops - fail safe - drop packet */
554 pool_put(&pf_frent_pl, frent);
555 pf_nfrents--;
556 m_freem(m);
557 return (NULL);
558 }
559
560 struct mbuf *
561 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
562 int drop, int *nomem)
563 {
564 struct mbuf *m = *m0;
565 struct pf_frcache *frp, *fra, *cur = NULL;
566 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
567 u_int16_t off = ntohs(h->ip_off) << 3;
568 u_int16_t frmax = ip_len + off;
569 int hosed = 0;
570
571 KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
572
573 /* Create a new range queue for this packet */
574 if (*frag == NULL) {
575 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
576 if (*frag == NULL) {
577 pf_flush_fragments();
578 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
579 if (*frag == NULL)
580 goto no_mem;
581 }
582
583 /* Get an entry for the queue */
584 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
585 if (cur == NULL) {
586 pool_put(&pf_cache_pl, *frag);
587 *frag = NULL;
588 goto no_mem;
589 }
590 pf_ncache++;
591
592 (*frag)->fr_flags = PFFRAG_NOBUFFER;
593 (*frag)->fr_max = 0;
594 (*frag)->fr_src = h->ip_src;
595 (*frag)->fr_dst = h->ip_dst;
596 (*frag)->fr_p = h->ip_p;
597 (*frag)->fr_id = h->ip_id;
598 (*frag)->fr_timeout = time_second;
599
600 cur->fr_off = off;
601 cur->fr_end = frmax;
602 LIST_INIT(&(*frag)->fr_cache);
603 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
604
605 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
606 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
607
608 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, frmax));
609
610 goto pass;
611 }
612
613 /*
614 * Find a fragment after the current one:
615 * - off contains the real shifted offset.
616 */
617 frp = NULL;
618 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
619 if (fra->fr_off > off)
620 break;
621 frp = fra;
622 }
623
624 KASSERT(frp != NULL || fra != NULL);
625
626 if (frp != NULL) {
627 int precut;
628
629 precut = frp->fr_end - off;
630 if (precut >= ip_len) {
631 /* Fragment is entirely a duplicate */
632 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
633 h->ip_id, frp->fr_off, frp->fr_end, off, frmax));
634 goto drop_fragment;
635 }
636 if (precut == 0) {
637 /* They are adjacent. Fixup cache entry */
638 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
639 h->ip_id, frp->fr_off, frp->fr_end, off, frmax));
640 frp->fr_end = frmax;
641 } else if (precut > 0) {
642 /* The first part of this payload overlaps with a
643 * fragment that has already been passed.
644 * Need to trim off the first part of the payload.
645 * But to do so easily, we need to create another
646 * mbuf to throw the original header into.
647 */
648
649 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
650 h->ip_id, precut, frp->fr_off, frp->fr_end, off,
651 frmax));
652
653 off += precut;
654 frmax -= precut;
655 /* Update the previous frag to encompass this one */
656 frp->fr_end = frmax;
657
658 if (!drop) {
659 /* XXX Optimization opportunity
660 * This is a very heavy way to trim the payload.
661 * we could do it much faster by diddling mbuf
662 * internals but that would be even less legible
663 * than this mbuf magic. For my next trick,
664 * I'll pull a rabbit out of my laptop.
665 */
666 *m0 = m_dup(m, 0, h->ip_hl << 2, M_NOWAIT);
667 if (*m0 == NULL)
668 goto no_mem;
669 KASSERT((*m0)->m_next == NULL);
670 m_adj(m, precut + (h->ip_hl << 2));
671 m_cat(*m0, m);
672 m = *m0;
673 if (m->m_flags & M_PKTHDR) {
674 int plen = 0;
675 struct mbuf *t;
676 for (t = m; t; t = t->m_next)
677 plen += t->m_len;
678 m->m_pkthdr.len = plen;
679 }
680
681
682 h = mtod(m, struct ip *);
683
684
685 KASSERT((int)m->m_len ==
686 ntohs(h->ip_len) - precut);
687 h->ip_off = htons(ntohs(h->ip_off) +
688 (precut >> 3));
689 h->ip_len = htons(ntohs(h->ip_len) - precut);
690 } else {
691 hosed++;
692 }
693 } else {
694 /* There is a gap between fragments */
695
696 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
697 h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
698 frmax));
699
700 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
701 if (cur == NULL)
702 goto no_mem;
703 pf_ncache++;
704
705 cur->fr_off = off;
706 cur->fr_end = frmax;
707 LIST_INSERT_AFTER(frp, cur, fr_next);
708 }
709 }
710
711 if (fra != NULL) {
712 int aftercut;
713 int merge = 0;
714
715 aftercut = frmax - fra->fr_off;
716 if (aftercut == 0) {
717 /* Adjacent fragments */
718 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
719 h->ip_id, off, frmax, fra->fr_off, fra->fr_end));
720 fra->fr_off = off;
721 merge = 1;
722 } else if (aftercut > 0) {
723 /* Need to chop off the tail of this fragment */
724 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
725 h->ip_id, aftercut, off, frmax, fra->fr_off,
726 fra->fr_end));
727 fra->fr_off = off;
728 frmax -= aftercut;
729
730 merge = 1;
731
732 if (!drop) {
733 m_adj(m, -aftercut);
734 if (m->m_flags & M_PKTHDR) {
735 int plen = 0;
736 struct mbuf *t;
737 for (t = m; t; t = t->m_next)
738 plen += t->m_len;
739 m->m_pkthdr.len = plen;
740 }
741 h = mtod(m, struct ip *);
742 KASSERT((int)m->m_len ==
743 ntohs(h->ip_len) - aftercut);
744 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
745 } else {
746 hosed++;
747 }
748 } else if (frp == NULL) {
749 /* There is a gap between fragments */
750 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
751 h->ip_id, -aftercut, off, frmax, fra->fr_off,
752 fra->fr_end));
753
754 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
755 if (cur == NULL)
756 goto no_mem;
757 pf_ncache++;
758
759 cur->fr_off = off;
760 cur->fr_end = frmax;
761 LIST_INSERT_BEFORE(fra, cur, fr_next);
762 }
763
764
765 /* Need to glue together two separate fragment descriptors */
766 if (merge) {
767 if (cur && fra->fr_off <= cur->fr_end) {
768 /* Need to merge in a previous 'cur' */
769 DPFPRINTF(("fragcache[%d]: adjacent(merge "
770 "%d-%d) %d-%d (%d-%d)\n",
771 h->ip_id, cur->fr_off, cur->fr_end, off,
772 frmax, fra->fr_off, fra->fr_end));
773 fra->fr_off = cur->fr_off;
774 LIST_REMOVE(cur, fr_next);
775 pool_put(&pf_cent_pl, cur);
776 pf_ncache--;
777 cur = NULL;
778
779 } else if (frp && fra->fr_off <= frp->fr_end) {
780 /* Need to merge in a modified 'frp' */
781 KASSERT(cur == NULL);
782 DPFPRINTF(("fragcache[%d]: adjacent(merge "
783 "%d-%d) %d-%d (%d-%d)\n",
784 h->ip_id, frp->fr_off, frp->fr_end, off,
785 frmax, fra->fr_off, fra->fr_end));
786 fra->fr_off = frp->fr_off;
787 LIST_REMOVE(frp, fr_next);
788 pool_put(&pf_cent_pl, frp);
789 pf_ncache--;
790 frp = NULL;
791
792 }
793 }
794 }
795
796 if (hosed) {
797 /*
798 * We must keep tracking the overall fragment even when
799 * we're going to drop it anyway so that we know when to
800 * free the overall descriptor. Thus we drop the frag late.
801 */
802 goto drop_fragment;
803 }
804
805
806 pass:
807 /* Update maximum data size */
808 if ((*frag)->fr_max < frmax)
809 (*frag)->fr_max = frmax;
810
811 /* This is the last segment */
812 if (!mff)
813 (*frag)->fr_flags |= PFFRAG_SEENLAST;
814
815 /* Check if we are completely reassembled */
816 if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
817 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
818 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
819 /* Remove from fragment queue */
820 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
821 (*frag)->fr_max));
822 pf_free_fragment(*frag);
823 *frag = NULL;
824 }
825
826 return (m);
827
828 no_mem:
829 *nomem = 1;
830
831 /* Still need to pay attention to !IP_MF */
832 if (!mff && *frag != NULL)
833 (*frag)->fr_flags |= PFFRAG_SEENLAST;
834
835 m_freem(m);
836 return (NULL);
837
838 drop_fragment:
839
840 /* Still need to pay attention to !IP_MF */
841 if (!mff && *frag != NULL)
842 (*frag)->fr_flags |= PFFRAG_SEENLAST;
843
844 if (drop) {
845 /* This fragment has been deemed bad. Don't reass */
846 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
847 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
848 h->ip_id));
849 (*frag)->fr_flags |= PFFRAG_DROP;
850 }
851
852 m_freem(m);
853 return (NULL);
854 }
855
856 int
857 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
858 struct pf_pdesc *pd)
859 {
860 struct mbuf *m = *m0;
861 struct pf_rule *r;
862 struct pf_frent *frent;
863 struct pf_fragment *frag = NULL;
864 struct ip *h = mtod(m, struct ip *);
865 int mff = (ntohs(h->ip_off) & IP_MF);
866 int hlen = h->ip_hl << 2;
867 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
868 u_int16_t frmax;
869 int ip_len;
870
871 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
872 while (r != NULL) {
873 r->evaluations++;
874 if (pfi_kif_match(r->kif, kif) == r->ifnot)
875 r = r->skip[PF_SKIP_IFP].ptr;
876 else if (r->direction && r->direction != dir)
877 r = r->skip[PF_SKIP_DIR].ptr;
878 else if (r->af && r->af != AF_INET)
879 r = r->skip[PF_SKIP_AF].ptr;
880 else if (r->proto && r->proto != h->ip_p)
881 r = r->skip[PF_SKIP_PROTO].ptr;
882 else if (PF_MISMATCHAW(&r->src.addr,
883 (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
884 r->src.neg, kif))
885 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
886 else if (PF_MISMATCHAW(&r->dst.addr,
887 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
888 r->dst.neg, NULL))
889 r = r->skip[PF_SKIP_DST_ADDR].ptr;
890 else
891 break;
892 }
893
894 if (r == NULL || r->action == PF_NOSCRUB)
895 return (PF_PASS);
896 else {
897 r->packets[dir == PF_OUT]++;
898 r->bytes[dir == PF_OUT] += pd->tot_len;
899 }
900
901 /* Check for illegal packets */
902 if (hlen < (int)sizeof(struct ip))
903 goto drop;
904
905 if (hlen > ntohs(h->ip_len))
906 goto drop;
907
908 /* Clear IP_DF if the rule uses the no-df option */
909 if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
910 u_int16_t off = h->ip_off;
911
912 h->ip_off &= htons(~IP_DF);
913 h->ip_sum = pf_cksum_fixup(h->ip_sum, off, h->ip_off, 0);
914 }
915
916 /* We will need other tests here */
917 if (!fragoff && !mff)
918 goto no_fragment;
919
920 /* We're dealing with a fragment now. Don't allow fragments
921 * with IP_DF to enter the cache. If the flag was cleared by
922 * no-df above, fine. Otherwise drop it.
923 */
924 if (h->ip_off & htons(IP_DF)) {
925 DPFPRINTF(("IP_DF\n"));
926 goto bad;
927 }
928
929 ip_len = ntohs(h->ip_len) - hlen;
930
931 /* All fragments are 8 byte aligned */
932 if (mff && (ip_len & 0x7)) {
933 DPFPRINTF(("mff and %d\n", ip_len));
934 goto bad;
935 }
936
937 /* Respect maximum length */
938 if (fragoff + ip_len > IP_MAXPACKET) {
939 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
940 goto bad;
941 }
942 frmax = fragoff + ip_len;
943
944 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
945 /* Fully buffer all of the fragments */
946
947 frag = pf_find_fragment(h, &pf_frag_tree);
948
949 /* Check if we saw the last fragment already */
950 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
951 frmax > frag->fr_max)
952 goto bad;
953
954 /* Get an entry for the fragment queue */
955 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
956 if (frent == NULL) {
957 REASON_SET(reason, PFRES_MEMORY);
958 return (PF_DROP);
959 }
960 pf_nfrents++;
961 frent->fr_ip = h;
962 frent->fr_m = m;
963
964 /* Might return a completely reassembled mbuf, or NULL */
965 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, frmax));
966 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
967
968 if (m == NULL)
969 return (PF_DROP);
970
971 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
972 goto drop;
973
974 h = mtod(m, struct ip *);
975 } else {
976 /* non-buffering fragment cache (drops or masks overlaps) */
977 int nomem = 0;
978
979 #ifdef __NetBSD__
980 struct pf_mtag *pf_mtag = pf_find_mtag(m);
981 KASSERT(pf_mtag != NULL);
982
983 if (dir == PF_OUT && pf_mtag->flags & PF_TAG_FRAGCACHE) {
984 #else
985 if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) {
986 #endif /* !__NetBSD__ */
987 /*
988 * Already passed the fragment cache in the
989 * input direction. If we continued, it would
990 * appear to be a dup and would be dropped.
991 */
992 goto fragment_pass;
993 }
994
995 frag = pf_find_fragment(h, &pf_cache_tree);
996
997 /* Check if we saw the last fragment already */
998 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
999 frmax > frag->fr_max) {
1000 if (r->rule_flag & PFRULE_FRAGDROP)
1001 frag->fr_flags |= PFFRAG_DROP;
1002 goto bad;
1003 }
1004
1005 *m0 = m = pf_fragcache(m0, h, &frag, mff,
1006 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1007 if (m == NULL) {
1008 if (nomem)
1009 goto no_mem;
1010 goto drop;
1011 }
1012
1013 if (dir == PF_IN)
1014 #ifdef __NetBSD__
1015 pf_mtag = pf_find_mtag(m);
1016 KASSERT(pf_mtag != NULL);
1017
1018 pf_mtag->flags |= PF_TAG_FRAGCACHE;
1019 #else
1020 m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE;
1021 #endif /* !__NetBSD__ */
1022
1023 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1024 goto drop;
1025 goto fragment_pass;
1026 }
1027
1028 no_fragment:
1029 /* At this point, only IP_DF is allowed in ip_off */
1030 if (h->ip_off & ~htons(IP_DF)) {
1031 u_int16_t off = h->ip_off;
1032
1033 h->ip_off &= htons(IP_DF);
1034 h->ip_sum = pf_cksum_fixup(h->ip_sum, off, h->ip_off, 0);
1035 }
1036
1037 /* Enforce a minimum ttl, may cause endless packet loops */
1038 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1039 u_int16_t ip_ttl = h->ip_ttl;
1040
1041 h->ip_ttl = r->min_ttl;
1042 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1043 }
1044
1045 if (r->rule_flag & PFRULE_RANDOMID) {
1046 u_int16_t id = h->ip_id;
1047
1048 h->ip_id = ip_randomid(ip_ids, 0);
1049 h->ip_sum = pf_cksum_fixup(h->ip_sum, id, h->ip_id, 0);
1050 }
1051 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1052 pd->flags |= PFDESC_IP_REAS;
1053
1054 return (PF_PASS);
1055
1056 fragment_pass:
1057 /* Enforce a minimum ttl, may cause endless packet loops */
1058 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1059 u_int16_t ip_ttl = h->ip_ttl;
1060
1061 h->ip_ttl = r->min_ttl;
1062 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1063 }
1064 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1065 pd->flags |= PFDESC_IP_REAS;
1066 return (PF_PASS);
1067
1068 no_mem:
1069 REASON_SET(reason, PFRES_MEMORY);
1070 if (r != NULL && r->log)
1071 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1072 return (PF_DROP);
1073
1074 drop:
1075 REASON_SET(reason, PFRES_NORM);
1076 if (r != NULL && r->log)
1077 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1078 return (PF_DROP);
1079
1080 bad:
1081 DPFPRINTF(("dropping bad fragment\n"));
1082
1083 /* Free associated fragments */
1084 if (frag != NULL)
1085 pf_free_fragment(frag);
1086
1087 REASON_SET(reason, PFRES_FRAG);
1088 if (r != NULL && r->log)
1089 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1090
1091 return (PF_DROP);
1092 }
1093
1094 #ifdef INET6
1095 int
1096 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1097 u_short *reason, struct pf_pdesc *pd)
1098 {
1099 struct mbuf *m = *m0;
1100 struct pf_rule *r;
1101 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1102 int off;
1103 struct ip6_ext ext;
1104 struct ip6_opt opt;
1105 struct ip6_opt_jumbo jumbo;
1106 struct ip6_frag frag;
1107 u_int32_t jumbolen = 0, plen;
1108 u_int16_t fragoff = 0;
1109 int optend;
1110 int ooff;
1111 u_int8_t proto;
1112 int terminal;
1113
1114 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1115 while (r != NULL) {
1116 r->evaluations++;
1117 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1118 r = r->skip[PF_SKIP_IFP].ptr;
1119 else if (r->direction && r->direction != dir)
1120 r = r->skip[PF_SKIP_DIR].ptr;
1121 else if (r->af && r->af != AF_INET6)
1122 r = r->skip[PF_SKIP_AF].ptr;
1123 #if 0 /* header chain! */
1124 else if (r->proto && r->proto != h->ip6_nxt)
1125 r = r->skip[PF_SKIP_PROTO].ptr;
1126 #endif
1127 else if (PF_MISMATCHAW(&r->src.addr,
1128 (struct pf_addr *)&h->ip6_src, AF_INET6,
1129 r->src.neg, kif))
1130 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1131 else if (PF_MISMATCHAW(&r->dst.addr,
1132 (struct pf_addr *)&h->ip6_dst, AF_INET6,
1133 r->dst.neg, NULL))
1134 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1135 else
1136 break;
1137 }
1138
1139 if (r == NULL || r->action == PF_NOSCRUB)
1140 return (PF_PASS);
1141 else {
1142 r->packets[dir == PF_OUT]++;
1143 r->bytes[dir == PF_OUT] += pd->tot_len;
1144 }
1145
1146 /* Check for illegal packets */
1147 if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1148 goto drop;
1149
1150 off = sizeof(struct ip6_hdr);
1151 proto = h->ip6_nxt;
1152 terminal = 0;
1153 do {
1154 switch (proto) {
1155 case IPPROTO_FRAGMENT:
1156 goto fragment;
1157 break;
1158 case IPPROTO_AH:
1159 case IPPROTO_ROUTING:
1160 case IPPROTO_DSTOPTS:
1161 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1162 NULL, AF_INET6))
1163 goto shortpkt;
1164 if (proto == IPPROTO_AH)
1165 off += (ext.ip6e_len + 2) * 4;
1166 else
1167 off += (ext.ip6e_len + 1) * 8;
1168 proto = ext.ip6e_nxt;
1169 break;
1170 case IPPROTO_HOPOPTS:
1171 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1172 NULL, AF_INET6))
1173 goto shortpkt;
1174 optend = off + (ext.ip6e_len + 1) * 8;
1175 ooff = off + sizeof(ext);
1176 do {
1177 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1178 sizeof(opt.ip6o_type), NULL, NULL,
1179 AF_INET6))
1180 goto shortpkt;
1181 if (opt.ip6o_type == IP6OPT_PAD1) {
1182 ooff++;
1183 continue;
1184 }
1185 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1186 NULL, NULL, AF_INET6))
1187 goto shortpkt;
1188 if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1189 goto drop;
1190 switch (opt.ip6o_type) {
1191 case IP6OPT_JUMBO:
1192 if (h->ip6_plen != 0)
1193 goto drop;
1194 if (!pf_pull_hdr(m, ooff, &jumbo,
1195 sizeof(jumbo), NULL, NULL,
1196 AF_INET6))
1197 goto shortpkt;
1198 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1199 sizeof(jumbolen));
1200 jumbolen = ntohl(jumbolen);
1201 if (jumbolen <= IPV6_MAXPACKET)
1202 goto drop;
1203 if (sizeof(struct ip6_hdr) + jumbolen !=
1204 m->m_pkthdr.len)
1205 goto drop;
1206 break;
1207 default:
1208 break;
1209 }
1210 ooff += sizeof(opt) + opt.ip6o_len;
1211 } while (ooff < optend);
1212
1213 off = optend;
1214 proto = ext.ip6e_nxt;
1215 break;
1216 default:
1217 terminal = 1;
1218 break;
1219 }
1220 } while (!terminal);
1221
1222 /* jumbo payload option must be present, or plen > 0 */
1223 if (ntohs(h->ip6_plen) == 0)
1224 plen = jumbolen;
1225 else
1226 plen = ntohs(h->ip6_plen);
1227 if (plen == 0)
1228 goto drop;
1229 if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1230 goto shortpkt;
1231
1232 /* Enforce a minimum ttl, may cause endless packet loops */
1233 if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1234 h->ip6_hlim = r->min_ttl;
1235
1236 return (PF_PASS);
1237
1238 fragment:
1239 if (ntohs(h->ip6_plen) == 0 || jumbolen)
1240 goto drop;
1241 plen = ntohs(h->ip6_plen);
1242
1243 if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1244 goto shortpkt;
1245 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1246 if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
1247 goto badfrag;
1248
1249 /* do something about it */
1250 /* remember to set pd->flags |= PFDESC_IP_REAS */
1251 return (PF_PASS);
1252
1253 shortpkt:
1254 REASON_SET(reason, PFRES_SHORT);
1255 if (r != NULL && r->log)
1256 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1257 return (PF_DROP);
1258
1259 drop:
1260 REASON_SET(reason, PFRES_NORM);
1261 if (r != NULL && r->log)
1262 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1263 return (PF_DROP);
1264
1265 badfrag:
1266 REASON_SET(reason, PFRES_FRAG);
1267 if (r != NULL && r->log)
1268 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1269 return (PF_DROP);
1270 }
1271 #endif /* INET6 */
1272
1273 int
1274 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m,
1275 int ipoff, int off, void *h, struct pf_pdesc *pd)
1276 {
1277 struct pf_rule *r, *rm = NULL;
1278 struct tcphdr *th = pd->hdr.tcp;
1279 int rewrite = 0;
1280 u_short reason;
1281 u_int8_t flags;
1282 sa_family_t af = pd->af;
1283
1284 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1285 while (r != NULL) {
1286 r->evaluations++;
1287 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1288 r = r->skip[PF_SKIP_IFP].ptr;
1289 else if (r->direction && r->direction != dir)
1290 r = r->skip[PF_SKIP_DIR].ptr;
1291 else if (r->af && r->af != af)
1292 r = r->skip[PF_SKIP_AF].ptr;
1293 else if (r->proto && r->proto != pd->proto)
1294 r = r->skip[PF_SKIP_PROTO].ptr;
1295 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
1296 r->src.neg, kif))
1297 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1298 else if (r->src.port_op && !pf_match_port(r->src.port_op,
1299 r->src.port[0], r->src.port[1], th->th_sport))
1300 r = r->skip[PF_SKIP_SRC_PORT].ptr;
1301 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
1302 r->dst.neg, NULL))
1303 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1304 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1305 r->dst.port[0], r->dst.port[1], th->th_dport))
1306 r = r->skip[PF_SKIP_DST_PORT].ptr;
1307 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1308 pf_osfp_fingerprint(pd, m, off, th),
1309 r->os_fingerprint))
1310 r = TAILQ_NEXT(r, entries);
1311 else {
1312 rm = r;
1313 break;
1314 }
1315 }
1316
1317 if (rm == NULL || rm->action == PF_NOSCRUB)
1318 return (PF_PASS);
1319 else {
1320 r->packets[dir == PF_OUT]++;
1321 r->bytes[dir == PF_OUT] += pd->tot_len;
1322 }
1323
1324 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1325 pd->flags |= PFDESC_TCP_NORM;
1326
1327 flags = th->th_flags;
1328 if (flags & TH_SYN) {
1329 /* Illegal packet */
1330 if (flags & TH_RST)
1331 goto tcp_drop;
1332
1333 if (flags & TH_FIN)
1334 flags &= ~TH_FIN;
1335 } else {
1336 /* Illegal packet */
1337 if (!(flags & (TH_ACK|TH_RST)))
1338 goto tcp_drop;
1339 }
1340
1341 if (!(flags & TH_ACK)) {
1342 /* These flags are only valid if ACK is set */
1343 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1344 goto tcp_drop;
1345 }
1346
1347 /* Check for illegal header length */
1348 if (th->th_off < (sizeof(struct tcphdr) >> 2))
1349 goto tcp_drop;
1350
1351 /* If flags changed, or reserved data set, then adjust */
1352 if (flags != th->th_flags || th->th_x2 != 0) {
1353 u_int16_t ov, nv;
1354
1355 ov = *(u_int16_t *)(&th->th_ack + 1);
1356 th->th_flags = flags;
1357 th->th_x2 = 0;
1358 nv = *(u_int16_t *)(&th->th_ack + 1);
1359
1360 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
1361 rewrite = 1;
1362 }
1363
1364 /* Remove urgent pointer, if TH_URG is not set */
1365 if (!(flags & TH_URG) && th->th_urp) {
1366 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
1367 th->th_urp = 0;
1368 rewrite = 1;
1369 }
1370
1371 /* Process options */
1372 if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
1373 rewrite = 1;
1374
1375 /* copy back packet headers if we sanitized */
1376 if (rewrite)
1377 m_copyback(m, off, sizeof(*th), th);
1378
1379 return (PF_PASS);
1380
1381 tcp_drop:
1382 REASON_SET_NOPTR(&reason, PFRES_NORM);
1383 if (rm != NULL && r->log)
1384 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
1385 return (PF_DROP);
1386 }
1387
1388 int
1389 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1390 struct tcphdr *th, struct pf_state_peer *src,
1391 struct pf_state_peer *dst)
1392 {
1393 u_int32_t tsval, tsecr;
1394 u_int8_t hdr[60];
1395 u_int8_t *opt;
1396
1397 KASSERT(src->scrub == NULL);
1398
1399 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
1400 if (src->scrub == NULL)
1401 return (1);
1402 bzero(src->scrub, sizeof(*src->scrub));
1403
1404 switch (pd->af) {
1405 #ifdef INET
1406 case AF_INET: {
1407 struct ip *h = mtod(m, struct ip *);
1408 src->scrub->pfss_ttl = h->ip_ttl;
1409 break;
1410 }
1411 #endif /* INET */
1412 #ifdef INET6
1413 case AF_INET6: {
1414 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1415 src->scrub->pfss_ttl = h->ip6_hlim;
1416 break;
1417 }
1418 #endif /* INET6 */
1419 }
1420
1421
1422 /*
1423 * All normalizations below are only begun if we see the start of
1424 * the connections. They must all set an enabled bit in pfss_flags
1425 */
1426 if ((th->th_flags & TH_SYN) == 0)
1427 return (0);
1428
1429
1430 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1431 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1432 /* Diddle with TCP options */
1433 int hlen;
1434 opt = hdr + sizeof(struct tcphdr);
1435 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1436 while (hlen >= TCPOLEN_TIMESTAMP) {
1437 switch (*opt) {
1438 case TCPOPT_EOL: /* FALLTHROUGH */
1439 case TCPOPT_NOP:
1440 opt++;
1441 hlen--;
1442 break;
1443 case TCPOPT_TIMESTAMP:
1444 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1445 src->scrub->pfss_flags |=
1446 PFSS_TIMESTAMP;
1447 src->scrub->pfss_ts_mod =
1448 htonl(cprng_fast32());
1449
1450 /* note PFSS_PAWS not set yet */
1451 memcpy(&tsval, &opt[2],
1452 sizeof(u_int32_t));
1453 memcpy(&tsecr, &opt[6],
1454 sizeof(u_int32_t));
1455 src->scrub->pfss_tsval0 = ntohl(tsval);
1456 src->scrub->pfss_tsval = ntohl(tsval);
1457 src->scrub->pfss_tsecr = ntohl(tsecr);
1458 getmicrouptime(&src->scrub->pfss_last);
1459 }
1460 /* FALLTHROUGH */
1461 default:
1462 hlen -= MAX(opt[1], 2);
1463 opt += MAX(opt[1], 2);
1464 break;
1465 }
1466 }
1467 }
1468
1469 return (0);
1470 }
1471
1472 void
1473 pf_normalize_tcp_cleanup(struct pf_state *state)
1474 {
1475 if (state->src.scrub)
1476 pool_put(&pf_state_scrub_pl, state->src.scrub);
1477 if (state->dst.scrub)
1478 pool_put(&pf_state_scrub_pl, state->dst.scrub);
1479
1480 /* Someday... flush the TCP segment reassembly descriptors. */
1481 }
1482
1483 int
1484 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1485 u_short *reason, struct tcphdr *th, struct pf_state *state,
1486 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1487 {
1488 struct timeval uptime;
1489 u_int32_t tsval = 0, tsecr = 0;
1490 u_int tsval_from_last;
1491 u_int8_t hdr[60];
1492 u_int8_t *opt;
1493 int copyback = 0;
1494 int got_ts = 0;
1495
1496 KASSERT(src->scrub || dst->scrub);
1497
1498 /*
1499 * Enforce the minimum TTL seen for this connection. Negate a common
1500 * technique to evade an intrusion detection system and confuse
1501 * firewall state code.
1502 */
1503 switch (pd->af) {
1504 #ifdef INET
1505 case AF_INET: {
1506 if (src->scrub) {
1507 struct ip *h = mtod(m, struct ip *);
1508 if (h->ip_ttl > src->scrub->pfss_ttl)
1509 src->scrub->pfss_ttl = h->ip_ttl;
1510 h->ip_ttl = src->scrub->pfss_ttl;
1511 }
1512 break;
1513 }
1514 #endif /* INET */
1515 #ifdef INET6
1516 case AF_INET6: {
1517 if (src->scrub) {
1518 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1519 if (h->ip6_hlim > src->scrub->pfss_ttl)
1520 src->scrub->pfss_ttl = h->ip6_hlim;
1521 h->ip6_hlim = src->scrub->pfss_ttl;
1522 }
1523 break;
1524 }
1525 #endif /* INET6 */
1526 }
1527
1528 if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1529 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1530 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1531 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1532 /* Diddle with TCP options */
1533 int hlen;
1534 opt = hdr + sizeof(struct tcphdr);
1535 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1536 while (hlen >= TCPOLEN_TIMESTAMP) {
1537 switch (*opt) {
1538 case TCPOPT_EOL: /* FALLTHROUGH */
1539 case TCPOPT_NOP:
1540 opt++;
1541 hlen--;
1542 break;
1543 case TCPOPT_TIMESTAMP:
1544 /* Modulate the timestamps. Can be used for
1545 * NAT detection, OS uptime determination or
1546 * reboot detection.
1547 */
1548
1549 if (got_ts) {
1550 /* Huh? Multiple timestamps!? */
1551 if (pf_status.debug >= PF_DEBUG_MISC) {
1552 DPFPRINTF(("multiple TS??"));
1553 pf_print_state(state);
1554 printf("\n");
1555 }
1556 REASON_SET(reason, PFRES_TS);
1557 return (PF_DROP);
1558 }
1559 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1560 memcpy(&tsval, &opt[2],
1561 sizeof(u_int32_t));
1562 if (tsval && src->scrub &&
1563 (src->scrub->pfss_flags &
1564 PFSS_TIMESTAMP)) {
1565 tsval = ntohl(tsval);
1566 pf_change_a(&opt[2],
1567 &th->th_sum,
1568 htonl(tsval +
1569 src->scrub->pfss_ts_mod),
1570 0);
1571 copyback = 1;
1572 }
1573
1574 /* Modulate TS reply iff valid (!0) */
1575 memcpy(&tsecr, &opt[6],
1576 sizeof(u_int32_t));
1577 if (tsecr && dst->scrub &&
1578 (dst->scrub->pfss_flags &
1579 PFSS_TIMESTAMP)) {
1580 tsecr = ntohl(tsecr)
1581 - dst->scrub->pfss_ts_mod;
1582 pf_change_a(&opt[6],
1583 &th->th_sum, htonl(tsecr),
1584 0);
1585 copyback = 1;
1586 }
1587 got_ts = 1;
1588 }
1589 /* FALLTHROUGH */
1590 default:
1591 hlen -= MAX(opt[1], 2);
1592 opt += MAX(opt[1], 2);
1593 break;
1594 }
1595 }
1596 if (copyback) {
1597 /* Copyback the options, caller copys back header */
1598 *writeback = 1;
1599 m_copyback(m, off + sizeof(struct tcphdr),
1600 (th->th_off << 2) - sizeof(struct tcphdr), hdr +
1601 sizeof(struct tcphdr));
1602 }
1603 }
1604
1605
1606 /*
1607 * Must invalidate PAWS checks on connections idle for too long.
1608 * The fastest allowed timestamp clock is 1ms. That turns out to
1609 * be about 24 days before it wraps. XXX Right now our lowerbound
1610 * TS echo check only works for the first 12 days of a connection
1611 * when the TS has exhausted half its 32bit space
1612 */
1613 #define TS_MAX_IDLE (24*24*60*60)
1614 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
1615
1616 getmicrouptime(&uptime);
1617 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1618 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1619 time_second - state->creation > TS_MAX_CONN)) {
1620 if (pf_status.debug >= PF_DEBUG_MISC) {
1621 DPFPRINTF(("src idled out of PAWS\n"));
1622 pf_print_state(state);
1623 printf("\n");
1624 }
1625 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1626 | PFSS_PAWS_IDLED;
1627 }
1628 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1629 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1630 if (pf_status.debug >= PF_DEBUG_MISC) {
1631 DPFPRINTF(("dst idled out of PAWS\n"));
1632 pf_print_state(state);
1633 printf("\n");
1634 }
1635 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1636 | PFSS_PAWS_IDLED;
1637 }
1638
1639 if (got_ts && src->scrub && dst->scrub &&
1640 (src->scrub->pfss_flags & PFSS_PAWS) &&
1641 (dst->scrub->pfss_flags & PFSS_PAWS)) {
1642 /* Validate that the timestamps are "in-window".
1643 * RFC1323 describes TCP Timestamp options that allow
1644 * measurement of RTT (round trip time) and PAWS
1645 * (protection against wrapped sequence numbers). PAWS
1646 * gives us a set of rules for rejecting packets on
1647 * long fat pipes (packets that were somehow delayed
1648 * in transit longer than the time it took to send the
1649 * full TCP sequence space of 4Gb). We can use these
1650 * rules and infer a few others that will let us treat
1651 * the 32bit timestamp and the 32bit echoed timestamp
1652 * as sequence numbers to prevent a blind attacker from
1653 * inserting packets into a connection.
1654 *
1655 * RFC1323 tells us:
1656 * - The timestamp on this packet must be greater than
1657 * or equal to the last value echoed by the other
1658 * endpoint. The RFC says those will be discarded
1659 * since it is a dup that has already been acked.
1660 * This gives us a lowerbound on the timestamp.
1661 * timestamp >= other last echoed timestamp
1662 * - The timestamp will be less than or equal to
1663 * the last timestamp plus the time between the
1664 * last packet and now. The RFC defines the max
1665 * clock rate as 1ms. We will allow clocks to be
1666 * up to 10% fast and will allow a total difference
1667 * or 30 seconds due to a route change. And this
1668 * gives us an upperbound on the timestamp.
1669 * timestamp <= last timestamp + max ticks
1670 * We have to be careful here. Windows will send an
1671 * initial timestamp of zero and then initialize it
1672 * to a random value after the 3whs; presumably to
1673 * avoid a DoS by having to call an expensive RNG
1674 * during a SYN flood. Proof MS has at least one
1675 * good security geek.
1676 *
1677 * - The TCP timestamp option must also echo the other
1678 * endpoints timestamp. The timestamp echoed is the
1679 * one carried on the earliest unacknowledged segment
1680 * on the left edge of the sequence window. The RFC
1681 * states that the host will reject any echoed
1682 * timestamps that were larger than any ever sent.
1683 * This gives us an upperbound on the TS echo.
1684 * tescr <= largest_tsval
1685 * - The lowerbound on the TS echo is a little more
1686 * tricky to determine. The other endpoint's echoed
1687 * values will not decrease. But there may be
1688 * network conditions that re-order packets and
1689 * cause our view of them to decrease. For now the
1690 * only lowerbound we can safely determine is that
1691 * the TS echo will never be less than the original
1692 * TS. XXX There is probably a better lowerbound.
1693 * Remove TS_MAX_CONN with better lowerbound check.
1694 * tescr >= other original TS
1695 *
1696 * It is also important to note that the fastest
1697 * timestamp clock of 1ms will wrap its 32bit space in
1698 * 24 days. So we just disable TS checking after 24
1699 * days of idle time. We actually must use a 12d
1700 * connection limit until we can come up with a better
1701 * lowerbound to the TS echo check.
1702 */
1703 struct timeval delta_ts;
1704 int ts_fudge;
1705
1706
1707 /*
1708 * PFTM_TS_DIFF is how many seconds of leeway to allow
1709 * a host's timestamp. This can happen if the previous
1710 * packet got delayed in transit for much longer than
1711 * this packet.
1712 */
1713 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
1714 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
1715
1716
1717 /* Calculate max ticks since the last timestamp */
1718 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1 kHz + 10% skew */
1719 #define TS_MICROSECS 1000000 /* microseconds per second */
1720 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
1721 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
1722 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
1723
1724
1725 if ((src->state >= TCPS_ESTABLISHED &&
1726 dst->state >= TCPS_ESTABLISHED) &&
1727 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
1728 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
1729 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
1730 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
1731 /* Bad RFC1323 implementation or an insertion attack.
1732 *
1733 * - Solaris 2.6 and 2.7 are known to send another ACK
1734 * after the FIN,FIN|ACK,ACK closing that carries
1735 * an old timestamp.
1736 */
1737
1738 DPFPRINTF(("Timestamp failed %c%c%c%c\n",
1739 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
1740 SEQ_GT(tsval, src->scrub->pfss_tsval +
1741 tsval_from_last) ? '1' : ' ',
1742 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
1743 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
1744 DPFPRINTF((" tsval: %" PRIu32 " tsecr: %" PRIu32
1745 " +ticks: %" PRIu32 " idle: %"PRIx64"s %ums\n",
1746 tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
1747 delta_ts.tv_usec / 1000U));
1748 DPFPRINTF((" src->tsval: %" PRIu32 " tsecr: %" PRIu32
1749 "\n",
1750 src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
1751 DPFPRINTF((" dst->tsval: %" PRIu32 " tsecr: %" PRIu32
1752 " tsval0: %" PRIu32 "\n",
1753 dst->scrub->pfss_tsval,
1754 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
1755 if (pf_status.debug >= PF_DEBUG_MISC) {
1756 pf_print_state(state);
1757 pf_print_flags(th->th_flags);
1758 printf("\n");
1759 }
1760 REASON_SET(reason, PFRES_TS);
1761 return (PF_DROP);
1762 }
1763
1764 /* XXX I'd really like to require tsecr but it's optional */
1765
1766 } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
1767 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
1768 || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
1769 src->scrub && dst->scrub &&
1770 (src->scrub->pfss_flags & PFSS_PAWS) &&
1771 (dst->scrub->pfss_flags & PFSS_PAWS)) {
1772 /* Didn't send a timestamp. Timestamps aren't really useful
1773 * when:
1774 * - connection opening or closing (often not even sent).
1775 * but we must not let an attacker to put a FIN on a
1776 * data packet to sneak it through our ESTABLISHED check.
1777 * - on a TCP reset. RFC suggests not even looking at TS.
1778 * - on an empty ACK. The TS will not be echoed so it will
1779 * probably not help keep the RTT calculation in sync and
1780 * there isn't as much danger when the sequence numbers
1781 * got wrapped. So some stacks don't include TS on empty
1782 * ACKs :-(
1783 *
1784 * To minimize the disruption to mostly RFC1323 conformant
1785 * stacks, we will only require timestamps on data packets.
1786 *
1787 * And what do ya know, we cannot require timestamps on data
1788 * packets. There appear to be devices that do legitimate
1789 * TCP connection hijacking. There are HTTP devices that allow
1790 * a 3whs (with timestamps) and then buffer the HTTP request.
1791 * If the intermediate device has the HTTP response cache, it
1792 * will spoof the response but not bother timestamping its
1793 * packets. So we can look for the presence of a timestamp in
1794 * the first data packet and if there, require it in all future
1795 * packets.
1796 */
1797
1798 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
1799 /*
1800 * Hey! Someone tried to sneak a packet in. Or the
1801 * stack changed its RFC1323 behavior?!?!
1802 */
1803 if (pf_status.debug >= PF_DEBUG_MISC) {
1804 DPFPRINTF(("Did not receive expected RFC1323 "
1805 "timestamp\n"));
1806 pf_print_state(state);
1807 pf_print_flags(th->th_flags);
1808 printf("\n");
1809 }
1810 REASON_SET(reason, PFRES_TS);
1811 return (PF_DROP);
1812 }
1813 }
1814
1815
1816 /*
1817 * We will note if a host sends his data packets with or without
1818 * timestamps. And require all data packets to contain a timestamp
1819 * if the first does. PAWS implicitly requires that all data packets be
1820 * timestamped. But I think there are middle-man devices that hijack
1821 * TCP streams immediately after the 3whs and don't timestamp their
1822 * packets (seen in a WWW accelerator or cache).
1823 */
1824 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
1825 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
1826 if (got_ts)
1827 src->scrub->pfss_flags |= PFSS_DATA_TS;
1828 else {
1829 src->scrub->pfss_flags |= PFSS_DATA_NOTS;
1830 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
1831 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
1832 /* Don't warn if other host rejected RFC1323 */
1833 DPFPRINTF(("Broken RFC1323 stack did not "
1834 "timestamp data packet. Disabled PAWS "
1835 "security.\n"));
1836 pf_print_state(state);
1837 pf_print_flags(th->th_flags);
1838 printf("\n");
1839 }
1840 }
1841 }
1842
1843
1844 /*
1845 * Update PAWS values
1846 */
1847 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
1848 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
1849 getmicrouptime(&src->scrub->pfss_last);
1850 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
1851 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1852 src->scrub->pfss_tsval = tsval;
1853
1854 if (tsecr) {
1855 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
1856 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1857 src->scrub->pfss_tsecr = tsecr;
1858
1859 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
1860 (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
1861 src->scrub->pfss_tsval0 == 0)) {
1862 /* tsval0 MUST be the lowest timestamp */
1863 src->scrub->pfss_tsval0 = tsval;
1864 }
1865
1866 /* Only fully initialized after a TS gets echoed */
1867 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
1868 src->scrub->pfss_flags |= PFSS_PAWS;
1869 }
1870 }
1871
1872 /* I have a dream.... TCP segment reassembly.... */
1873 return (0);
1874 }
1875
1876 int
1877 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
1878 int off)
1879 {
1880 u_int16_t *mss;
1881 int thoff;
1882 int opt, cnt, optlen = 0;
1883 int rewrite = 0;
1884 u_char *optp;
1885
1886 thoff = th->th_off << 2;
1887 cnt = thoff - sizeof(struct tcphdr);
1888 optp = mtod(m, u_char *) + off + sizeof(struct tcphdr);
1889
1890 for (; cnt > 0; cnt -= optlen, optp += optlen) {
1891 opt = optp[0];
1892 if (opt == TCPOPT_EOL)
1893 break;
1894 if (opt == TCPOPT_NOP)
1895 optlen = 1;
1896 else {
1897 if (cnt < 2)
1898 break;
1899 optlen = optp[1];
1900 if (optlen < 2 || optlen > cnt)
1901 break;
1902 }
1903 switch (opt) {
1904 case TCPOPT_MAXSEG:
1905 mss = (u_int16_t *)(optp + 2);
1906 if ((ntohs(*mss)) > r->max_mss) {
1907 th->th_sum = pf_cksum_fixup(th->th_sum,
1908 *mss, htons(r->max_mss), 0);
1909 *mss = htons(r->max_mss);
1910 rewrite = 1;
1911 }
1912 break;
1913 default:
1914 break;
1915 }
1916 }
1917
1918 return (rewrite);
1919 }
1920