ip_reass.c revision 1.2 1 /* $NetBSD: ip_reass.c,v 1.2 2010/07/19 14:09:45 rmind Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
32 */
33
34 /*
35 * IP reassembly.
36 *
37 * Additive-Increase/Multiplicative-Decrease (AIMD) strategy for IP
38 * reassembly queue buffer managment.
39 *
40 * We keep a count of total IP fragments (NB: not fragmented packets),
41 * awaiting reassembly (ip_nfrags) and a limit (ip_maxfrags) on fragments.
42 * If ip_nfrags exceeds ip_maxfrags the limit, we drop half the total
43 * fragments in reassembly queues. This AIMD policy avoids repeatedly
44 * deleting single packets under heavy fragmentation load (e.g., from lossy
45 * NFS peers).
46 */
47
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: ip_reass.c,v 1.2 2010/07/19 14:09:45 rmind Exp $");
50
51 #include <sys/param.h>
52 #include <sys/types.h>
53
54 #include <sys/malloc.h>
55 #include <sys/mbuf.h>
56 #include <sys/domain.h>
57 #include <sys/protosw.h>
58 #include <sys/pool.h>
59 #include <sys/queue.h>
60 #include <sys/sysctl.h>
61 #include <sys/systm.h>
62
63 #include <net/if.h>
64 #include <net/route.h>
65
66 #include <netinet/in.h>
67 #include <netinet/in_systm.h>
68 #include <netinet/ip.h>
69 #include <netinet/in_pcb.h>
70 #include <netinet/ip_var.h>
71 #include <netinet/in_proto.h>
72 #include <netinet/ip_private.h>
73 #include <netinet/in_var.h>
74
75 /*
76 * IP datagram reassembly hashed queues, pool, lock and counters.
77 */
78 #define IPREASS_HASH_SHIFT 6
79 #define IPREASS_HASH_SIZE (1 << IPREASS_HASH_SHIFT)
80 #define IPREASS_HASH_MASK (IPREASS_HASH_SIZE - 1)
81 #define IPREASS_HASH(x, y) \
82 (((((x) & 0xf) | ((((x) >> 8) & 0xf) << 4)) ^ (y)) & IPREASS_HASH_MASK)
83
84 struct ipqhead ipq[IPREASS_HASH_SIZE];
85 struct pool ipqent_pool;
86 static int ipq_locked;
87
88 static int ip_nfragpackets; /* packets in reass queue */
89 static int ip_nfrags; /* total fragments in reass queues */
90
91 static int ip_maxfragpackets; /* limit on packets. XXX sysctl */
92 static int ip_maxfrags; /* limit on fragments. XXX sysctl */
93
94 /*
95 * IP reassembly queue structure. Each fragment being reassembled is
96 * attached to one of these structures. They are timed out after ipq_ttl
97 * drops to 0, and may also be reclaimed if memory becomes tight.
98 */
99 struct ipq {
100 LIST_ENTRY(ipq) ipq_q; /* to other reass headers */
101 uint8_t ipq_ttl; /* time for reass q to live */
102 uint8_t ipq_p; /* protocol of this fragment */
103 uint16_t ipq_id; /* sequence id for reassembly */
104 struct ipqehead ipq_fragq; /* to ip fragment queue */
105 struct in_addr ipq_src;
106 struct in_addr ipq_dst;
107 uint16_t ipq_nfrags; /* frags in this queue entry */
108 uint8_t ipq_tos; /* TOS of this fragment */
109 };
110
111 /*
112 * Cached copy of nmbclusters. If nbclusters is different,
113 * recalculate IP parameters derived from nmbclusters.
114 */
115 static int ip_nmbclusters; /* copy of nmbclusters */
116
117 /*
118 * IP reassembly TTL machinery for multiplicative drop.
119 */
120 static u_int fragttl_histo[IPFRAGTTL + 1];
121
122 void sysctl_ip_reass_setup(void);
123 static void ip_nmbclusters_changed(void);
124
125 static struct ipq * ip_reass_lookup(struct ip *, u_int *);
126 static struct mbuf * ip_reass(struct ipqent *, struct ipq *, u_int);
127 static u_int ip_reass_ttl_decr(u_int ticks);
128 static void ip_reass_drophalf(void);
129 static void ip_freef(struct ipq *);
130
131 /*
132 * ip_reass_init:
133 *
134 * Initialization of IP reassembly mechanism.
135 */
136 void
137 ip_reass_init(void)
138 {
139 int i;
140
141 pool_init(&ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl",
142 NULL, IPL_VM);
143
144 for (i = 0; i < IPREASS_HASH_SIZE; i++) {
145 LIST_INIT(&ipq[i]);
146 }
147 ip_maxfragpackets = 200;
148 ip_maxfrags = 0;
149 ip_nmbclusters_changed();
150
151 sysctl_ip_reass_setup();
152 }
153
154 static struct sysctllog *ip_reass_sysctllog;
155
156 void
157 sysctl_ip_reass_setup(void)
158 {
159
160 sysctl_createv(&ip_reass_sysctllog, 0, NULL, NULL,
161 CTLFLAG_PERMANENT,
162 CTLTYPE_NODE, "net", NULL,
163 NULL, 0, NULL, 0,
164 CTL_NET, CTL_EOL);
165 sysctl_createv(&ip_reass_sysctllog, 0, NULL, NULL,
166 CTLFLAG_PERMANENT,
167 CTLTYPE_NODE, "inet",
168 SYSCTL_DESCR("PF_INET related settings"),
169 NULL, 0, NULL, 0,
170 CTL_NET, PF_INET, CTL_EOL);
171 sysctl_createv(&ip_reass_sysctllog, 0, NULL, NULL,
172 CTLFLAG_PERMANENT,
173 CTLTYPE_NODE, "ip",
174 SYSCTL_DESCR("IPv4 related settings"),
175 NULL, 0, NULL, 0,
176 CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL);
177
178 sysctl_createv(&ip_reass_sysctllog, 0, NULL, NULL,
179 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
180 CTLTYPE_INT, "maxfragpackets",
181 SYSCTL_DESCR("Maximum number of fragments to retain for "
182 "possible reassembly"),
183 NULL, 0, &ip_maxfragpackets, 0,
184 CTL_NET, PF_INET, IPPROTO_IP, IPCTL_MAXFRAGPACKETS, CTL_EOL);
185 }
186
187 #define CHECK_NMBCLUSTER_PARAMS() \
188 do { \
189 if (__predict_false(ip_nmbclusters != nmbclusters)) \
190 ip_nmbclusters_changed(); \
191 } while (/*CONSTCOND*/0)
192
193 /*
194 * Compute IP limits derived from the value of nmbclusters.
195 */
196 static void
197 ip_nmbclusters_changed(void)
198 {
199 ip_maxfrags = nmbclusters / 4;
200 ip_nmbclusters = nmbclusters;
201 }
202
203 static inline int ipq_lock_try(void);
204 static inline void ipq_unlock(void);
205
206 static inline int
207 ipq_lock_try(void)
208 {
209 int s;
210
211 /*
212 * Use splvm() -- we're blocking things that would cause
213 * mbuf allocation.
214 */
215 s = splvm();
216 if (ipq_locked) {
217 splx(s);
218 return (0);
219 }
220 ipq_locked = 1;
221 splx(s);
222 return (1);
223 }
224
225 static inline void
226 ipq_unlock(void)
227 {
228 int s;
229
230 s = splvm();
231 ipq_locked = 0;
232 splx(s);
233 }
234
235 #ifdef DIAGNOSTIC
236 #define IPQ_LOCK() \
237 do { \
238 if (ipq_lock_try() == 0) { \
239 printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \
240 panic("ipq_lock"); \
241 } \
242 } while (/*CONSTCOND*/ 0)
243 #define IPQ_LOCK_CHECK() \
244 do { \
245 if (ipq_locked == 0) { \
246 printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \
247 panic("ipq lock check"); \
248 } \
249 } while (/*CONSTCOND*/ 0)
250 #else
251 #define IPQ_LOCK() (void) ipq_lock_try()
252 #define IPQ_LOCK_CHECK() /* nothing */
253 #endif
254
255 #define IPQ_UNLOCK() ipq_unlock()
256
257 /*
258 * ip_reass_lookup:
259 *
260 * Look for queue of fragments of this datagram.
261 */
262 static struct ipq *
263 ip_reass_lookup(struct ip *ip, u_int *hashp)
264 {
265 struct ipq *fp;
266 u_int hash;
267
268 IPQ_LOCK();
269 hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
270 LIST_FOREACH(fp, &ipq[hash], ipq_q) {
271 if (ip->ip_id != fp->ipq_id)
272 continue;
273 if (!in_hosteq(ip->ip_src, fp->ipq_src))
274 continue;
275 if (!in_hosteq(ip->ip_dst, fp->ipq_dst))
276 continue;
277 if (ip->ip_p != fp->ipq_p)
278 continue;
279 break;
280 }
281 *hashp = hash;
282 return fp;
283 }
284
285 /*
286 * ip_reass:
287 *
288 * Take incoming datagram fragment and try to reassemble it into whole
289 * datagram. If a chain for reassembly of this datagram already exists,
290 * then it is given as 'fp'; otherwise have to make a chain.
291 */
292 struct mbuf *
293 ip_reass(struct ipqent *ipqe, struct ipq *fp, u_int hash)
294 {
295 struct ipqhead *ipqhead = &ipq[hash];
296 const int hlen = ipqe->ipqe_ip->ip_hl << 2;
297 struct mbuf *m = ipqe->ipqe_m, *t;
298 struct ipqent *nq, *p, *q;
299 struct ip *ip;
300 int i, next, s;
301
302 IPQ_LOCK_CHECK();
303
304 /*
305 * Presence of header sizes in mbufs would confuse code below.
306 */
307 m->m_data += hlen;
308 m->m_len -= hlen;
309
310 #ifdef notyet
311 /* Make sure fragment limit is up-to-date. */
312 CHECK_NMBCLUSTER_PARAMS();
313
314 /* If we have too many fragments, drop the older half. */
315 if (ip_nfrags >= ip_maxfrags) {
316 ip_reass_drophalf(void);
317 }
318 #endif
319
320 /*
321 * We are about to add a fragment; increment frag count.
322 */
323 ip_nfrags++;
324
325 /*
326 * If first fragment to arrive, create a reassembly queue.
327 */
328 if (fp == NULL) {
329 /*
330 * Enforce upper bound on number of fragmented packets
331 * for which we attempt reassembly: a) if maxfrag is 0,
332 * never accept fragments b) if maxfrag is -1, accept
333 * all fragments without limitation.
334 */
335 if (ip_maxfragpackets < 0)
336 ;
337 else if (ip_nfragpackets >= ip_maxfragpackets) {
338 goto dropfrag;
339 }
340 ip_nfragpackets++;
341 fp = malloc(sizeof(struct ipq), M_FTABLE, M_NOWAIT);
342 if (fp == NULL) {
343 goto dropfrag;
344 }
345 LIST_INSERT_HEAD(ipqhead, fp, ipq_q);
346 fp->ipq_nfrags = 1;
347 fp->ipq_ttl = IPFRAGTTL;
348 fp->ipq_p = ipqe->ipqe_ip->ip_p;
349 fp->ipq_id = ipqe->ipqe_ip->ip_id;
350 fp->ipq_tos = ipqe->ipqe_ip->ip_tos;
351 TAILQ_INIT(&fp->ipq_fragq);
352 fp->ipq_src = ipqe->ipqe_ip->ip_src;
353 fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
354 p = NULL;
355 goto insert;
356 } else {
357 fp->ipq_nfrags++;
358 }
359
360 /*
361 * Find a segment which begins after this one does.
362 */
363 for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
364 p = q, q = TAILQ_NEXT(q, ipqe_q))
365 if (ntohs(q->ipqe_ip->ip_off) > ntohs(ipqe->ipqe_ip->ip_off))
366 break;
367
368 /*
369 * If there is a preceding segment, it may provide some of our
370 * data already. If so, drop the data from the incoming segment.
371 * If it provides all of our data, drop us.
372 */
373 if (p != NULL) {
374 i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) -
375 ntohs(ipqe->ipqe_ip->ip_off);
376 if (i > 0) {
377 if (i >= ntohs(ipqe->ipqe_ip->ip_len)) {
378 goto dropfrag;
379 }
380 m_adj(ipqe->ipqe_m, i);
381 ipqe->ipqe_ip->ip_off =
382 htons(ntohs(ipqe->ipqe_ip->ip_off) + i);
383 ipqe->ipqe_ip->ip_len =
384 htons(ntohs(ipqe->ipqe_ip->ip_len) - i);
385 }
386 }
387
388 /*
389 * While we overlap succeeding segments trim them or, if they are
390 * completely covered, dequeue them.
391 */
392 for (; q != NULL &&
393 ntohs(ipqe->ipqe_ip->ip_off) + ntohs(ipqe->ipqe_ip->ip_len) >
394 ntohs(q->ipqe_ip->ip_off); q = nq) {
395 i = (ntohs(ipqe->ipqe_ip->ip_off) +
396 ntohs(ipqe->ipqe_ip->ip_len)) - ntohs(q->ipqe_ip->ip_off);
397 if (i < ntohs(q->ipqe_ip->ip_len)) {
398 q->ipqe_ip->ip_len =
399 htons(ntohs(q->ipqe_ip->ip_len) - i);
400 q->ipqe_ip->ip_off =
401 htons(ntohs(q->ipqe_ip->ip_off) + i);
402 m_adj(q->ipqe_m, i);
403 break;
404 }
405 nq = TAILQ_NEXT(q, ipqe_q);
406 m_freem(q->ipqe_m);
407 TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
408 s = splvm();
409 pool_put(&ipqent_pool, q);
410 splx(s);
411 fp->ipq_nfrags--;
412 ip_nfrags--;
413 }
414
415 insert:
416 /*
417 * Stick new segment in its place; check for complete reassembly.
418 */
419 if (p == NULL) {
420 TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
421 } else {
422 TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q);
423 }
424 next = 0;
425 for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
426 p = q, q = TAILQ_NEXT(q, ipqe_q)) {
427 if (ntohs(q->ipqe_ip->ip_off) != next) {
428 IPQ_UNLOCK();
429 return NULL;
430 }
431 next += ntohs(q->ipqe_ip->ip_len);
432 }
433 if (p->ipqe_mff) {
434 IPQ_UNLOCK();
435 return NULL;
436 }
437 /*
438 * Reassembly is complete. Check for a bogus message size and
439 * concatenate fragments.
440 */
441 q = TAILQ_FIRST(&fp->ipq_fragq);
442 ip = q->ipqe_ip;
443 if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
444 IP_STATINC(IP_STAT_TOOLONG);
445 ip_freef(fp);
446 IPQ_UNLOCK();
447 return NULL;
448 }
449 m = q->ipqe_m;
450 t = m->m_next;
451 m->m_next = NULL;
452 m_cat(m, t);
453 nq = TAILQ_NEXT(q, ipqe_q);
454 s = splvm();
455 pool_put(&ipqent_pool, q);
456 splx(s);
457 for (q = nq; q != NULL; q = nq) {
458 t = q->ipqe_m;
459 nq = TAILQ_NEXT(q, ipqe_q);
460 s = splvm();
461 pool_put(&ipqent_pool, q);
462 splx(s);
463 m_cat(m, t);
464 }
465 ip_nfrags -= fp->ipq_nfrags;
466
467 /*
468 * Create header for new packet by modifying header of first
469 * packet. Dequeue and discard fragment reassembly header. Make
470 * header visible.
471 */
472 ip->ip_len = htons((ip->ip_hl << 2) + next);
473 ip->ip_src = fp->ipq_src;
474 ip->ip_dst = fp->ipq_dst;
475
476 LIST_REMOVE(fp, ipq_q);
477 free(fp, M_FTABLE);
478 ip_nfragpackets--;
479 m->m_len += (ip->ip_hl << 2);
480 m->m_data -= (ip->ip_hl << 2);
481 /* some debugging cruft by sklower, below, will go away soon */
482 if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
483 int plen = 0;
484 for (t = m; t; t = t->m_next) {
485 plen += t->m_len;
486 }
487 m->m_pkthdr.len = plen;
488 m->m_pkthdr.csum_flags = 0;
489 }
490 IPQ_UNLOCK();
491 return m;
492
493 dropfrag:
494 if (fp != NULL) {
495 fp->ipq_nfrags--;
496 }
497 ip_nfrags--;
498 IP_STATINC(IP_STAT_FRAGDROPPED);
499 m_freem(m);
500 s = splvm();
501 pool_put(&ipqent_pool, ipqe);
502 splx(s);
503 IPQ_UNLOCK();
504 return NULL;
505 }
506
507 /*
508 * ip_freef:
509 *
510 * Free a fragment reassembly header and all associated datagrams.
511 */
512 static void
513 ip_freef(struct ipq *fp)
514 {
515 struct ipqent *q, *p;
516 u_int nfrags = 0;
517 int s;
518
519 IPQ_LOCK_CHECK();
520
521 for (q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; q = p) {
522 p = TAILQ_NEXT(q, ipqe_q);
523 m_freem(q->ipqe_m);
524 nfrags++;
525 TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
526 s = splvm();
527 pool_put(&ipqent_pool, q);
528 splx(s);
529 }
530
531 if (nfrags != fp->ipq_nfrags) {
532 printf("ip_freef: nfrags %d != %d\n", fp->ipq_nfrags, nfrags);
533 }
534 ip_nfrags -= nfrags;
535 LIST_REMOVE(fp, ipq_q);
536 free(fp, M_FTABLE);
537 ip_nfragpackets--;
538 }
539
540 /*
541 * ip_reass_ttl_decr:
542 *
543 * Decrement TTL of all reasembly queue entries by `ticks'. Count
544 * number of distinct fragments (as opposed to partial, fragmented
545 * datagrams) inthe reassembly queue. While we traverse the entire
546 * reassembly queue, compute and return the median TTL over all
547 * fragments.
548 */
549 static u_int
550 ip_reass_ttl_decr(u_int ticks)
551 {
552 u_int nfrags, median, dropfraction, keepfraction;
553 struct ipq *fp, *nfp;
554 int i;
555
556 nfrags = 0;
557 memset(fragttl_histo, 0, sizeof(fragttl_histo));
558
559 for (i = 0; i < IPREASS_HASH_SIZE; i++) {
560 for (fp = LIST_FIRST(&ipq[i]); fp != NULL; fp = nfp) {
561 fp->ipq_ttl = ((fp->ipq_ttl <= ticks) ?
562 0 : fp->ipq_ttl - ticks);
563 nfp = LIST_NEXT(fp, ipq_q);
564 if (fp->ipq_ttl == 0) {
565 IP_STATINC(IP_STAT_FRAGTIMEOUT);
566 ip_freef(fp);
567 } else {
568 nfrags += fp->ipq_nfrags;
569 fragttl_histo[fp->ipq_ttl] += fp->ipq_nfrags;
570 }
571 }
572 }
573
574 KASSERT(ip_nfrags == nfrags);
575
576 /* Find median (or other drop fraction) in histogram. */
577 dropfraction = (ip_nfrags / 2);
578 keepfraction = ip_nfrags - dropfraction;
579 for (i = IPFRAGTTL, median = 0; i >= 0; i--) {
580 median += fragttl_histo[i];
581 if (median >= keepfraction)
582 break;
583 }
584
585 /* Return TTL of median (or other fraction). */
586 return (u_int)i;
587 }
588
589 static void
590 ip_reass_drophalf(void)
591 {
592 u_int median_ticks;
593
594 /*
595 * Compute median TTL of all fragments, and count frags
596 * with that TTL or lower (roughly half of all fragments).
597 */
598 median_ticks = ip_reass_ttl_decr(0);
599
600 /* Drop half. */
601 median_ticks = ip_reass_ttl_decr(median_ticks);
602 }
603
604 /*
605 * ip_reass_drain: drain off all datagram fragments. Do not acquire
606 * softnet_lock as can be called from hardware interrupt context.
607 */
608 void
609 ip_reass_drain(void)
610 {
611
612 /*
613 * We may be called from a device's interrupt context. If
614 * the ipq is already busy, just bail out now.
615 */
616 if (ipq_lock_try() != 0) {
617 /*
618 * Drop half the total fragments now. If more mbufs are
619 * needed, we will be called again soon.
620 */
621 ip_reass_drophalf();
622 IPQ_UNLOCK();
623 }
624 }
625
626 /*
627 * ip_reass_slowtimo:
628 *
629 * If a timer expires on a reassembly queue, discard it.
630 */
631 void
632 ip_reass_slowtimo(void)
633 {
634 static u_int dropscanidx = 0;
635 u_int i, median_ttl;
636
637 IPQ_LOCK();
638
639 /* Age TTL of all fragments by 1 tick .*/
640 median_ttl = ip_reass_ttl_decr(1);
641
642 /* Make sure fragment limit is up-to-date. */
643 CHECK_NMBCLUSTER_PARAMS();
644
645 /* If we have too many fragments, drop the older half. */
646 if (ip_nfrags > ip_maxfrags) {
647 ip_reass_ttl_decr(median_ttl);
648 }
649
650 /*
651 * If we are over the maximum number of fragmented packets (due to
652 * the limit being lowered), drain off enough to get down to the
653 * new limit. Start draining from the reassembly hashqueue most
654 * recently drained.
655 */
656 if (ip_maxfragpackets < 0)
657 ;
658 else {
659 int wrapped = 0;
660
661 i = dropscanidx;
662 while (ip_nfragpackets > ip_maxfragpackets && wrapped == 0) {
663 while (LIST_FIRST(&ipq[i]) != NULL) {
664 ip_freef(LIST_FIRST(&ipq[i]));
665 }
666 if (++i >= IPREASS_HASH_SIZE) {
667 i = 0;
668 }
669 /*
670 * Do not scan forever even if fragment counters are
671 * wrong: stop after scanning entire reassembly queue.
672 */
673 if (i == dropscanidx) {
674 wrapped = 1;
675 }
676 }
677 dropscanidx = i;
678 }
679 IPQ_UNLOCK();
680 }
681
682 /*
683 * ip_reass_packet: generic routine to perform IP reassembly.
684 *
685 * => Passed fragment should have IP_MF flag and/or offset set.
686 * => Fragment should not have other than IP_MF flags set.
687 *
688 * => Returns 0 on success or error otherwise. When reassembly is complete,
689 * m_final representing a constructed final packet is set.
690 */
691 int
692 ip_reass_packet(struct mbuf *m, struct ip *ip, bool mff, struct mbuf **m_final)
693 {
694 struct ipq *fp;
695 struct ipqent *ipqe;
696 u_int hash;
697
698 /* Look for queue of fragments of this datagram. */
699 fp = ip_reass_lookup(ip, &hash);
700
701 /* Make sure that TOS matches previous fragments. */
702 if (fp && fp->ipq_tos != ip->ip_tos) {
703 IP_STATINC(IP_STAT_BADFRAGS);
704 IPQ_UNLOCK();
705 return EINVAL;
706 }
707
708 /*
709 * Create new entry and attempt to reassembly.
710 */
711 IP_STATINC(IP_STAT_FRAGMENTS);
712 int s = splvm();
713 ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
714 splx(s);
715 if (ipqe == NULL) {
716 IP_STATINC(IP_STAT_RCVMEMDROP);
717 IPQ_UNLOCK();
718 return ENOMEM;
719 }
720 ipqe->ipqe_mff = mff;
721 ipqe->ipqe_m = m;
722 ipqe->ipqe_ip = ip;
723
724 *m_final = ip_reass(ipqe, fp, hash);
725 if (*m_final) {
726 /* Note if finally reassembled. */
727 IP_STATINC(IP_STAT_REASSEMBLED);
728 }
729 return 0;
730 }
731