uipc_mbuf.c revision 1.105 1 /* $NetBSD: uipc_mbuf.c,v 1.105 2006/01/24 13:02:58 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Copyright (c) 1982, 1986, 1988, 1991, 1993
42 * The Regents of the University of California. All rights reserved.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 * 3. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * SUCH DAMAGE.
67 *
68 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95
69 */
70
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.105 2006/01/24 13:02:58 yamt Exp $");
73
74 #include "opt_mbuftrace.h"
75 #include "opt_ddb.h"
76
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/malloc.h>
81 #define MBTYPES
82 #include <sys/mbuf.h>
83 #include <sys/kernel.h>
84 #include <sys/syslog.h>
85 #include <sys/domain.h>
86 #include <sys/protosw.h>
87 #include <sys/pool.h>
88 #include <sys/socket.h>
89 #include <sys/sysctl.h>
90
91 #include <net/if.h>
92
93 #include <uvm/uvm.h>
94
95
96 struct pool mbpool; /* mbuf pool */
97 struct pool mclpool; /* mbuf cluster pool */
98
99 struct pool_cache mbpool_cache;
100 struct pool_cache mclpool_cache;
101
102 struct mbstat mbstat;
103 int max_linkhdr;
104 int max_protohdr;
105 int max_hdr;
106 int max_datalen;
107
108 static int mb_ctor(void *, void *, int);
109
110 static void *mclpool_alloc(struct pool *, int);
111 static void mclpool_release(struct pool *, void *);
112
113 static struct pool_allocator mclpool_allocator = {
114 mclpool_alloc, mclpool_release, 0,
115 };
116
117 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int);
118 static struct mbuf *m_split0(struct mbuf *, int, int, int);
119 static int m_copyback0(struct mbuf **, int, int, const void *, int, int);
120
121 /* flags for m_copyback0 */
122 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */
123 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */
124 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */
125 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */
126
127 static const char mclpool_warnmsg[] =
128 "WARNING: mclpool limit reached; increase NMBCLUSTERS";
129
130 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
131
132 #ifdef MBUFTRACE
133 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners);
134 struct mowner unknown_mowners[] = {
135 { "unknown", "free" },
136 { "unknown", "data" },
137 { "unknown", "header" },
138 { "unknown", "soname" },
139 { "unknown", "soopts" },
140 { "unknown", "ftable" },
141 { "unknown", "control" },
142 { "unknown", "oobdata" },
143 };
144 struct mowner revoked_mowner = { "revoked", "" };
145 #endif
146
147 /*
148 * Initialize the mbuf allocator.
149 */
150 void
151 mbinit(void)
152 {
153
154 KASSERT(sizeof(struct _m_ext) <= MHLEN);
155 KASSERT(sizeof(struct mbuf) == MSIZE);
156
157 pool_init(&mbpool, msize, 0, 0, 0, "mbpl", NULL);
158 pool_init(&mclpool, mclbytes, 0, 0, 0, "mclpl", &mclpool_allocator);
159
160 pool_set_drain_hook(&mbpool, m_reclaim, NULL);
161 pool_set_drain_hook(&mclpool, m_reclaim, NULL);
162
163 pool_cache_init(&mbpool_cache, &mbpool, mb_ctor, NULL, NULL);
164 pool_cache_init(&mclpool_cache, &mclpool, NULL, NULL, NULL);
165
166 /*
167 * Set the hard limit on the mclpool to the number of
168 * mbuf clusters the kernel is to support. Log the limit
169 * reached message max once a minute.
170 */
171 pool_sethardlimit(&mclpool, nmbclusters, mclpool_warnmsg, 60);
172
173 /*
174 * Set a low water mark for both mbufs and clusters. This should
175 * help ensure that they can be allocated in a memory starvation
176 * situation. This is important for e.g. diskless systems which
177 * must allocate mbufs in order for the pagedaemon to clean pages.
178 */
179 pool_setlowat(&mbpool, mblowat);
180 pool_setlowat(&mclpool, mcllowat);
181
182 #ifdef MBUFTRACE
183 {
184 /*
185 * Attach the unknown mowners.
186 */
187 int i;
188 MOWNER_ATTACH(&revoked_mowner);
189 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]);
190 i-- > 0; )
191 MOWNER_ATTACH(&unknown_mowners[i]);
192 }
193 #endif
194 }
195
196 /*
197 * sysctl helper routine for the kern.mbuf subtree. nmbclusters may
198 * or may not be writable, and mblowat and mcllowat need range
199 * checking and pool tweaking after being reset.
200 */
201 static int
202 sysctl_kern_mbuf(SYSCTLFN_ARGS)
203 {
204 int error, newval;
205 struct sysctlnode node;
206
207 node = *rnode;
208 node.sysctl_data = &newval;
209 switch (rnode->sysctl_num) {
210 case MBUF_NMBCLUSTERS:
211 if (mb_map != NULL) {
212 node.sysctl_flags &= ~CTLFLAG_READWRITE;
213 node.sysctl_flags |= CTLFLAG_READONLY;
214 }
215 /* FALLTHROUGH */
216 case MBUF_MBLOWAT:
217 case MBUF_MCLLOWAT:
218 newval = *(int*)rnode->sysctl_data;
219 break;
220 default:
221 return (EOPNOTSUPP);
222 }
223
224 error = sysctl_lookup(SYSCTLFN_CALL(&node));
225 if (error || newp == NULL)
226 return (error);
227 if (newval < 0)
228 return (EINVAL);
229
230 switch (node.sysctl_num) {
231 case MBUF_NMBCLUSTERS:
232 if (newval < nmbclusters)
233 return (EINVAL);
234 nmbclusters = newval;
235 pool_sethardlimit(&mclpool, nmbclusters, mclpool_warnmsg, 60);
236 break;
237 case MBUF_MBLOWAT:
238 mblowat = newval;
239 pool_setlowat(&mbpool, mblowat);
240 break;
241 case MBUF_MCLLOWAT:
242 mcllowat = newval;
243 pool_setlowat(&mclpool, mcllowat);
244 break;
245 }
246
247 return (0);
248 }
249
250 #ifdef MBUFTRACE
251 static int
252 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS)
253 {
254 struct mowner *mo;
255 size_t len = 0;
256 int error = 0;
257
258 if (namelen != 0)
259 return (EINVAL);
260 if (newp != NULL)
261 return (EPERM);
262
263 LIST_FOREACH(mo, &mowners, mo_link) {
264 if (oldp != NULL) {
265 if (*oldlenp - len < sizeof(*mo)) {
266 error = ENOMEM;
267 break;
268 }
269 error = copyout(mo, (caddr_t) oldp + len,
270 sizeof(*mo));
271 if (error)
272 break;
273 }
274 len += sizeof(*mo);
275 }
276
277 if (error == 0)
278 *oldlenp = len;
279
280 return (error);
281 }
282 #endif /* MBUFTRACE */
283
284 SYSCTL_SETUP(sysctl_kern_mbuf_setup, "sysctl kern.mbuf subtree setup")
285 {
286
287 sysctl_createv(clog, 0, NULL, NULL,
288 CTLFLAG_PERMANENT,
289 CTLTYPE_NODE, "kern", NULL,
290 NULL, 0, NULL, 0,
291 CTL_KERN, CTL_EOL);
292 sysctl_createv(clog, 0, NULL, NULL,
293 CTLFLAG_PERMANENT,
294 CTLTYPE_NODE, "mbuf",
295 SYSCTL_DESCR("mbuf control variables"),
296 NULL, 0, NULL, 0,
297 CTL_KERN, KERN_MBUF, CTL_EOL);
298
299 sysctl_createv(clog, 0, NULL, NULL,
300 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
301 CTLTYPE_INT, "msize",
302 SYSCTL_DESCR("mbuf base size"),
303 NULL, msize, NULL, 0,
304 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL);
305 sysctl_createv(clog, 0, NULL, NULL,
306 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
307 CTLTYPE_INT, "mclbytes",
308 SYSCTL_DESCR("mbuf cluster size"),
309 NULL, mclbytes, NULL, 0,
310 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL);
311 sysctl_createv(clog, 0, NULL, NULL,
312 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
313 CTLTYPE_INT, "nmbclusters",
314 SYSCTL_DESCR("Limit on the number of mbuf clusters"),
315 sysctl_kern_mbuf, 0, &nmbclusters, 0,
316 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL);
317 sysctl_createv(clog, 0, NULL, NULL,
318 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
319 CTLTYPE_INT, "mblowat",
320 SYSCTL_DESCR("mbuf low water mark"),
321 sysctl_kern_mbuf, 0, &mblowat, 0,
322 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL);
323 sysctl_createv(clog, 0, NULL, NULL,
324 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
325 CTLTYPE_INT, "mcllowat",
326 SYSCTL_DESCR("mbuf cluster low water mark"),
327 sysctl_kern_mbuf, 0, &mcllowat, 0,
328 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL);
329 sysctl_createv(clog, 0, NULL, NULL,
330 CTLFLAG_PERMANENT,
331 CTLTYPE_STRUCT, "stats",
332 SYSCTL_DESCR("mbuf allocation statistics"),
333 NULL, 0, &mbstat, sizeof(mbstat),
334 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL);
335 #ifdef MBUFTRACE
336 sysctl_createv(clog, 0, NULL, NULL,
337 CTLFLAG_PERMANENT,
338 CTLTYPE_STRUCT, "mowners",
339 SYSCTL_DESCR("Information about mbuf owners"),
340 sysctl_kern_mbuf_mowners, 0, NULL, 0,
341 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL);
342 #endif /* MBUFTRACE */
343 }
344
345 static void *
346 mclpool_alloc(struct pool *pp, int flags)
347 {
348 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
349
350 return ((void *)uvm_km_alloc_poolpage(mb_map, waitok));
351 }
352
353 static void
354 mclpool_release(struct pool *pp, void *v)
355 {
356
357 uvm_km_free_poolpage(mb_map, (vaddr_t)v);
358 }
359
360 /*ARGSUSED*/
361 static int
362 mb_ctor(void *arg, void *object, int flags)
363 {
364 struct mbuf *m = object;
365
366 #ifdef POOL_VTOPHYS
367 m->m_paddr = POOL_VTOPHYS(m);
368 #else
369 m->m_paddr = M_PADDR_INVALID;
370 #endif
371 return (0);
372 }
373
374 void
375 m_reclaim(void *arg, int flags)
376 {
377 struct domain *dp;
378 const struct protosw *pr;
379 struct ifnet *ifp;
380 int s = splvm();
381
382 DOMAIN_FOREACH(dp) {
383 for (pr = dp->dom_protosw;
384 pr < dp->dom_protoswNPROTOSW; pr++)
385 if (pr->pr_drain)
386 (*pr->pr_drain)();
387 }
388 IFNET_FOREACH(ifp) {
389 if (ifp->if_drain)
390 (*ifp->if_drain)(ifp);
391 }
392 splx(s);
393 mbstat.m_drain++;
394 }
395
396 /*
397 * Space allocation routines.
398 * These are also available as macros
399 * for critical paths.
400 */
401 struct mbuf *
402 m_get(int nowait, int type)
403 {
404 struct mbuf *m;
405
406 MGET(m, nowait, type);
407 return (m);
408 }
409
410 struct mbuf *
411 m_gethdr(int nowait, int type)
412 {
413 struct mbuf *m;
414
415 MGETHDR(m, nowait, type);
416 return (m);
417 }
418
419 struct mbuf *
420 m_getclr(int nowait, int type)
421 {
422 struct mbuf *m;
423
424 MGET(m, nowait, type);
425 if (m == 0)
426 return (NULL);
427 memset(mtod(m, caddr_t), 0, MLEN);
428 return (m);
429 }
430
431 void
432 m_clget(struct mbuf *m, int nowait)
433 {
434
435 MCLGET(m, nowait);
436 }
437
438 struct mbuf *
439 m_free(struct mbuf *m)
440 {
441 struct mbuf *n;
442
443 MFREE(m, n);
444 return (n);
445 }
446
447 void
448 m_freem(struct mbuf *m)
449 {
450 struct mbuf *n;
451
452 if (m == NULL)
453 return;
454 do {
455 MFREE(m, n);
456 m = n;
457 } while (m);
458 }
459
460 #ifdef MBUFTRACE
461 /*
462 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain.
463 */
464 void
465 m_claimm(struct mbuf *m, struct mowner *mo)
466 {
467
468 for (; m != NULL; m = m->m_next)
469 MCLAIM(m, mo);
470 }
471 #endif
472
473 /*
474 * Mbuffer utility routines.
475 */
476
477 /*
478 * Lesser-used path for M_PREPEND:
479 * allocate new mbuf to prepend to chain,
480 * copy junk along.
481 */
482 struct mbuf *
483 m_prepend(struct mbuf *m, int len, int how)
484 {
485 struct mbuf *mn;
486
487 MGET(mn, how, m->m_type);
488 if (mn == (struct mbuf *)NULL) {
489 m_freem(m);
490 return ((struct mbuf *)NULL);
491 }
492 if (m->m_flags & M_PKTHDR) {
493 M_MOVE_PKTHDR(mn, m);
494 } else {
495 MCLAIM(mn, m->m_owner);
496 }
497 mn->m_next = m;
498 m = mn;
499 if (len < MHLEN)
500 MH_ALIGN(m, len);
501 m->m_len = len;
502 return (m);
503 }
504
505 /*
506 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
507 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
508 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
509 */
510 int MCFail;
511
512 struct mbuf *
513 m_copym(struct mbuf *m, int off0, int len, int wait)
514 {
515
516 return m_copym0(m, off0, len, wait, 0); /* shallow copy on M_EXT */
517 }
518
519 struct mbuf *
520 m_dup(struct mbuf *m, int off0, int len, int wait)
521 {
522
523 return m_copym0(m, off0, len, wait, 1); /* deep copy */
524 }
525
526 static struct mbuf *
527 m_copym0(struct mbuf *m, int off0, int len, int wait, int deep)
528 {
529 struct mbuf *n, **np;
530 int off = off0;
531 struct mbuf *top;
532 int copyhdr = 0;
533
534 if (off < 0 || len < 0)
535 panic("m_copym: off %d, len %d", off, len);
536 if (off == 0 && m->m_flags & M_PKTHDR)
537 copyhdr = 1;
538 while (off > 0) {
539 if (m == 0)
540 panic("m_copym: m == 0, off %d", off);
541 if (off < m->m_len)
542 break;
543 off -= m->m_len;
544 m = m->m_next;
545 }
546 np = ⊤
547 top = 0;
548 while (len > 0) {
549 if (m == 0) {
550 if (len != M_COPYALL)
551 panic("m_copym: m == 0, len %d [!COPYALL]",
552 len);
553 break;
554 }
555 MGET(n, wait, m->m_type);
556 *np = n;
557 if (n == 0)
558 goto nospace;
559 MCLAIM(n, m->m_owner);
560 if (copyhdr) {
561 M_COPY_PKTHDR(n, m);
562 if (len == M_COPYALL)
563 n->m_pkthdr.len -= off0;
564 else
565 n->m_pkthdr.len = len;
566 copyhdr = 0;
567 }
568 n->m_len = min(len, m->m_len - off);
569 if (m->m_flags & M_EXT) {
570 if (!deep) {
571 n->m_data = m->m_data + off;
572 n->m_ext = m->m_ext;
573 MCLADDREFERENCE(m, n);
574 } else {
575 /*
576 * we are unsure about the way m was allocated.
577 * copy into multiple MCLBYTES cluster mbufs.
578 */
579 MCLGET(n, wait);
580 n->m_len = 0;
581 n->m_len = M_TRAILINGSPACE(n);
582 n->m_len = min(n->m_len, len);
583 n->m_len = min(n->m_len, m->m_len - off);
584 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
585 (unsigned)n->m_len);
586 }
587 } else
588 memcpy(mtod(n, caddr_t), mtod(m, caddr_t)+off,
589 (unsigned)n->m_len);
590 if (len != M_COPYALL)
591 len -= n->m_len;
592 off += n->m_len;
593 #ifdef DIAGNOSTIC
594 if (off > m->m_len)
595 panic("m_copym0 overrun");
596 #endif
597 if (off == m->m_len) {
598 m = m->m_next;
599 off = 0;
600 }
601 np = &n->m_next;
602 }
603 if (top == 0)
604 MCFail++;
605 return (top);
606 nospace:
607 m_freem(top);
608 MCFail++;
609 return (NULL);
610 }
611
612 /*
613 * Copy an entire packet, including header (which must be present).
614 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
615 */
616 struct mbuf *
617 m_copypacket(struct mbuf *m, int how)
618 {
619 struct mbuf *top, *n, *o;
620
621 MGET(n, how, m->m_type);
622 top = n;
623 if (!n)
624 goto nospace;
625
626 MCLAIM(n, m->m_owner);
627 M_COPY_PKTHDR(n, m);
628 n->m_len = m->m_len;
629 if (m->m_flags & M_EXT) {
630 n->m_data = m->m_data;
631 n->m_ext = m->m_ext;
632 MCLADDREFERENCE(m, n);
633 } else {
634 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
635 }
636
637 m = m->m_next;
638 while (m) {
639 MGET(o, how, m->m_type);
640 if (!o)
641 goto nospace;
642
643 MCLAIM(o, m->m_owner);
644 n->m_next = o;
645 n = n->m_next;
646
647 n->m_len = m->m_len;
648 if (m->m_flags & M_EXT) {
649 n->m_data = m->m_data;
650 n->m_ext = m->m_ext;
651 MCLADDREFERENCE(m, n);
652 } else {
653 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
654 }
655
656 m = m->m_next;
657 }
658 return top;
659 nospace:
660 m_freem(top);
661 MCFail++;
662 return NULL;
663 }
664
665 /*
666 * Copy data from an mbuf chain starting "off" bytes from the beginning,
667 * continuing for "len" bytes, into the indicated buffer.
668 */
669 void
670 m_copydata(struct mbuf *m, int off, int len, void *vp)
671 {
672 unsigned count;
673 caddr_t cp = vp;
674
675 if (off < 0 || len < 0)
676 panic("m_copydata: off %d, len %d", off, len);
677 while (off > 0) {
678 if (m == NULL)
679 panic("m_copydata: m == NULL, off %d", off);
680 if (off < m->m_len)
681 break;
682 off -= m->m_len;
683 m = m->m_next;
684 }
685 while (len > 0) {
686 if (m == NULL)
687 panic("m_copydata: m == NULL, len %d", len);
688 count = min(m->m_len - off, len);
689 memcpy(cp, mtod(m, caddr_t) + off, count);
690 len -= count;
691 cp += count;
692 off = 0;
693 m = m->m_next;
694 }
695 }
696
697 /*
698 * Concatenate mbuf chain n to m.
699 * n might be copied into m (when n->m_len is small), therefore data portion of
700 * n could be copied into an mbuf of different mbuf type.
701 * Any m_pkthdr is not updated.
702 */
703 void
704 m_cat(struct mbuf *m, struct mbuf *n)
705 {
706
707 while (m->m_next)
708 m = m->m_next;
709 while (n) {
710 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) {
711 /* just join the two chains */
712 m->m_next = n;
713 return;
714 }
715 /* splat the data from one into the other */
716 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
717 (u_int)n->m_len);
718 m->m_len += n->m_len;
719 n = m_free(n);
720 }
721 }
722
723 void
724 m_adj(struct mbuf *mp, int req_len)
725 {
726 int len = req_len;
727 struct mbuf *m;
728 int count;
729
730 if ((m = mp) == NULL)
731 return;
732 if (len >= 0) {
733 /*
734 * Trim from head.
735 */
736 while (m != NULL && len > 0) {
737 if (m->m_len <= len) {
738 len -= m->m_len;
739 m->m_len = 0;
740 m = m->m_next;
741 } else {
742 m->m_len -= len;
743 m->m_data += len;
744 len = 0;
745 }
746 }
747 m = mp;
748 if (mp->m_flags & M_PKTHDR)
749 m->m_pkthdr.len -= (req_len - len);
750 } else {
751 /*
752 * Trim from tail. Scan the mbuf chain,
753 * calculating its length and finding the last mbuf.
754 * If the adjustment only affects this mbuf, then just
755 * adjust and return. Otherwise, rescan and truncate
756 * after the remaining size.
757 */
758 len = -len;
759 count = 0;
760 for (;;) {
761 count += m->m_len;
762 if (m->m_next == (struct mbuf *)0)
763 break;
764 m = m->m_next;
765 }
766 if (m->m_len >= len) {
767 m->m_len -= len;
768 if (mp->m_flags & M_PKTHDR)
769 mp->m_pkthdr.len -= len;
770 return;
771 }
772 count -= len;
773 if (count < 0)
774 count = 0;
775 /*
776 * Correct length for chain is "count".
777 * Find the mbuf with last data, adjust its length,
778 * and toss data from remaining mbufs on chain.
779 */
780 m = mp;
781 if (m->m_flags & M_PKTHDR)
782 m->m_pkthdr.len = count;
783 for (; m; m = m->m_next) {
784 if (m->m_len >= count) {
785 m->m_len = count;
786 break;
787 }
788 count -= m->m_len;
789 }
790 while (m->m_next)
791 (m = m->m_next) ->m_len = 0;
792 }
793 }
794
795 /*
796 * Rearrange an mbuf chain so that len bytes are contiguous
797 * and in the data area of an mbuf (so that mtod and dtom
798 * will work for a structure of size len). Returns the resulting
799 * mbuf chain on success, frees it and returns null on failure.
800 * If there is room, it will add up to max_protohdr-len extra bytes to the
801 * contiguous region in an attempt to avoid being called next time.
802 */
803 int MPFail;
804
805 struct mbuf *
806 m_pullup(struct mbuf *n, int len)
807 {
808 struct mbuf *m;
809 int count;
810 int space;
811
812 /*
813 * If first mbuf has no cluster, and has room for len bytes
814 * without shifting current data, pullup into it,
815 * otherwise allocate a new mbuf to prepend to the chain.
816 */
817 if ((n->m_flags & M_EXT) == 0 &&
818 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
819 if (n->m_len >= len)
820 return (n);
821 m = n;
822 n = n->m_next;
823 len -= m->m_len;
824 } else {
825 if (len > MHLEN)
826 goto bad;
827 MGET(m, M_DONTWAIT, n->m_type);
828 if (m == 0)
829 goto bad;
830 MCLAIM(m, n->m_owner);
831 m->m_len = 0;
832 if (n->m_flags & M_PKTHDR) {
833 M_MOVE_PKTHDR(m, n);
834 }
835 }
836 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
837 do {
838 count = min(min(max(len, max_protohdr), space), n->m_len);
839 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
840 (unsigned)count);
841 len -= count;
842 m->m_len += count;
843 n->m_len -= count;
844 space -= count;
845 if (n->m_len)
846 n->m_data += count;
847 else
848 n = m_free(n);
849 } while (len > 0 && n);
850 if (len > 0) {
851 (void) m_free(m);
852 goto bad;
853 }
854 m->m_next = n;
855 return (m);
856 bad:
857 m_freem(n);
858 MPFail++;
859 return (NULL);
860 }
861
862 /*
863 * Like m_pullup(), except a new mbuf is always allocated, and we allow
864 * the amount of empty space before the data in the new mbuf to be specified
865 * (in the event that the caller expects to prepend later).
866 */
867 int MSFail;
868
869 struct mbuf *
870 m_copyup(struct mbuf *n, int len, int dstoff)
871 {
872 struct mbuf *m;
873 int count, space;
874
875 if (len > (MHLEN - dstoff))
876 goto bad;
877 MGET(m, M_DONTWAIT, n->m_type);
878 if (m == NULL)
879 goto bad;
880 MCLAIM(m, n->m_owner);
881 m->m_len = 0;
882 if (n->m_flags & M_PKTHDR) {
883 M_MOVE_PKTHDR(m, n);
884 }
885 m->m_data += dstoff;
886 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
887 do {
888 count = min(min(max(len, max_protohdr), space), n->m_len);
889 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
890 (unsigned)count);
891 len -= count;
892 m->m_len += count;
893 n->m_len -= count;
894 space -= count;
895 if (n->m_len)
896 n->m_data += count;
897 else
898 n = m_free(n);
899 } while (len > 0 && n);
900 if (len > 0) {
901 (void) m_free(m);
902 goto bad;
903 }
904 m->m_next = n;
905 return (m);
906 bad:
907 m_freem(n);
908 MSFail++;
909 return (NULL);
910 }
911
912 /*
913 * Partition an mbuf chain in two pieces, returning the tail --
914 * all but the first len0 bytes. In case of failure, it returns NULL and
915 * attempts to restore the chain to its original state.
916 */
917 struct mbuf *
918 m_split(struct mbuf *m0, int len0, int wait)
919 {
920
921 return m_split0(m0, len0, wait, 1);
922 }
923
924 static struct mbuf *
925 m_split0(struct mbuf *m0, int len0, int wait, int copyhdr)
926 {
927 struct mbuf *m, *n;
928 unsigned len = len0, remain, len_save;
929
930 for (m = m0; m && len > m->m_len; m = m->m_next)
931 len -= m->m_len;
932 if (m == 0)
933 return (NULL);
934 remain = m->m_len - len;
935 if (copyhdr && (m0->m_flags & M_PKTHDR)) {
936 MGETHDR(n, wait, m0->m_type);
937 if (n == 0)
938 return (NULL);
939 MCLAIM(m, m0->m_owner);
940 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
941 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
942 len_save = m0->m_pkthdr.len;
943 m0->m_pkthdr.len = len0;
944 if (m->m_flags & M_EXT)
945 goto extpacket;
946 if (remain > MHLEN) {
947 /* m can't be the lead packet */
948 MH_ALIGN(n, 0);
949 n->m_next = m_split(m, len, wait);
950 if (n->m_next == 0) {
951 (void) m_free(n);
952 m0->m_pkthdr.len = len_save;
953 return (NULL);
954 } else
955 return (n);
956 } else
957 MH_ALIGN(n, remain);
958 } else if (remain == 0) {
959 n = m->m_next;
960 m->m_next = 0;
961 return (n);
962 } else {
963 MGET(n, wait, m->m_type);
964 if (n == 0)
965 return (NULL);
966 MCLAIM(n, m->m_owner);
967 M_ALIGN(n, remain);
968 }
969 extpacket:
970 if (m->m_flags & M_EXT) {
971 n->m_ext = m->m_ext;
972 MCLADDREFERENCE(m, n);
973 n->m_data = m->m_data + len;
974 } else {
975 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
976 }
977 n->m_len = remain;
978 m->m_len = len;
979 n->m_next = m->m_next;
980 m->m_next = 0;
981 return (n);
982 }
983 /*
984 * Routine to copy from device local memory into mbufs.
985 */
986 struct mbuf *
987 m_devget(char *buf, int totlen, int off0, struct ifnet *ifp,
988 void (*copy)(const void *from, void *to, size_t len))
989 {
990 struct mbuf *m;
991 struct mbuf *top = 0, **mp = ⊤
992 int off = off0, len;
993 char *cp;
994 char *epkt;
995
996 cp = buf;
997 epkt = cp + totlen;
998 if (off) {
999 /*
1000 * If 'off' is non-zero, packet is trailer-encapsulated,
1001 * so we have to skip the type and length fields.
1002 */
1003 cp += off + 2 * sizeof(uint16_t);
1004 totlen -= 2 * sizeof(uint16_t);
1005 }
1006 MGETHDR(m, M_DONTWAIT, MT_DATA);
1007 if (m == 0)
1008 return (NULL);
1009 m->m_pkthdr.rcvif = ifp;
1010 m->m_pkthdr.len = totlen;
1011 m->m_len = MHLEN;
1012
1013 while (totlen > 0) {
1014 if (top) {
1015 MGET(m, M_DONTWAIT, MT_DATA);
1016 if (m == 0) {
1017 m_freem(top);
1018 return (NULL);
1019 }
1020 m->m_len = MLEN;
1021 }
1022 len = min(totlen, epkt - cp);
1023 if (len >= MINCLSIZE) {
1024 MCLGET(m, M_DONTWAIT);
1025 if ((m->m_flags & M_EXT) == 0) {
1026 m_free(m);
1027 m_freem(top);
1028 return (NULL);
1029 }
1030 m->m_len = len = min(len, MCLBYTES);
1031 } else {
1032 /*
1033 * Place initial small packet/header at end of mbuf.
1034 */
1035 if (len < m->m_len) {
1036 if (top == 0 && len + max_linkhdr <= m->m_len)
1037 m->m_data += max_linkhdr;
1038 m->m_len = len;
1039 } else
1040 len = m->m_len;
1041 }
1042 if (copy)
1043 copy(cp, mtod(m, caddr_t), (size_t)len);
1044 else
1045 memcpy(mtod(m, caddr_t), cp, (size_t)len);
1046 cp += len;
1047 *mp = m;
1048 mp = &m->m_next;
1049 totlen -= len;
1050 if (cp == epkt)
1051 cp = buf;
1052 }
1053 return (top);
1054 }
1055
1056 /*
1057 * Copy data from a buffer back into the indicated mbuf chain,
1058 * starting "off" bytes from the beginning, extending the mbuf
1059 * chain if necessary.
1060 */
1061 void
1062 m_copyback(struct mbuf *m0, int off, int len, const void *cp)
1063 {
1064 #if defined(DEBUG)
1065 struct mbuf *origm = m0;
1066 int error;
1067 #endif /* defined(DEBUG) */
1068
1069 if (m0 == NULL)
1070 return;
1071
1072 #if defined(DEBUG)
1073 error =
1074 #endif /* defined(DEBUG) */
1075 m_copyback0(&m0, off, len, cp,
1076 M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT);
1077
1078 #if defined(DEBUG)
1079 if (error != 0 || (m0 != NULL && origm != m0))
1080 panic("m_copyback");
1081 #endif /* defined(DEBUG) */
1082 }
1083
1084 struct mbuf *
1085 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how)
1086 {
1087 int error;
1088
1089 /* don't support chain expansion */
1090 KDASSERT(off + len <= m_length(m0));
1091
1092 error = m_copyback0(&m0, off, len, cp,
1093 M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how);
1094 if (error) {
1095 /*
1096 * no way to recover from partial success.
1097 * just free the chain.
1098 */
1099 m_freem(m0);
1100 return NULL;
1101 }
1102 return m0;
1103 }
1104
1105 /*
1106 * m_makewritable: ensure the specified range writable.
1107 */
1108 int
1109 m_makewritable(struct mbuf **mp, int off, int len, int how)
1110 {
1111 int error;
1112 #if defined(DEBUG)
1113 struct mbuf *n;
1114 int origlen, reslen;
1115
1116 origlen = m_length(*mp);
1117 #endif /* defined(DEBUG) */
1118
1119 #if 0 /* M_COPYALL is large enough */
1120 if (len == M_COPYALL)
1121 len = m_length(*mp) - off; /* XXX */
1122 #endif
1123
1124 error = m_copyback0(mp, off, len, NULL,
1125 M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how);
1126
1127 #if defined(DEBUG)
1128 reslen = 0;
1129 for (n = *mp; n; n = n->m_next)
1130 reslen += n->m_len;
1131 if (origlen != reslen)
1132 panic("m_makewritable: length changed");
1133 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len)
1134 panic("m_makewritable: inconsist");
1135 #endif /* defined(DEBUG) */
1136
1137 return error;
1138 }
1139
1140 int
1141 m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags,
1142 int how)
1143 {
1144 int mlen;
1145 struct mbuf *m, *n;
1146 struct mbuf **mp;
1147 int totlen = 0;
1148 const char *cp = vp;
1149
1150 KASSERT(mp0 != NULL);
1151 KASSERT(*mp0 != NULL);
1152 KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL);
1153 KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL);
1154
1155 mp = mp0;
1156 m = *mp;
1157 while (off > (mlen = m->m_len)) {
1158 off -= mlen;
1159 totlen += mlen;
1160 if (m->m_next == 0) {
1161 if ((flags & M_COPYBACK0_EXTEND) == 0)
1162 goto out;
1163 n = m_getclr(how, m->m_type);
1164 if (n == 0)
1165 goto out;
1166 n->m_len = min(MLEN, len + off);
1167 m->m_next = n;
1168 }
1169 mp = &m->m_next;
1170 m = m->m_next;
1171 }
1172 while (len > 0) {
1173 mlen = m->m_len - off;
1174 if (mlen != 0 && M_READONLY(m)) {
1175 char *datap;
1176 int eatlen;
1177
1178 /*
1179 * this mbuf is read-only.
1180 * allocate a new writable mbuf and try again.
1181 */
1182
1183 #if defined(DIAGNOSTIC)
1184 if ((flags & M_COPYBACK0_COW) == 0)
1185 panic("m_copyback0: read-only");
1186 #endif /* defined(DIAGNOSTIC) */
1187
1188 /*
1189 * if we're going to write into the middle of
1190 * a mbuf, split it first.
1191 */
1192 if (off > 0 && len < mlen) {
1193 n = m_split0(m, off, how, 0);
1194 if (n == NULL)
1195 goto enobufs;
1196 m->m_next = n;
1197 mp = &m->m_next;
1198 m = n;
1199 off = 0;
1200 continue;
1201 }
1202
1203 /*
1204 * XXX TODO coalesce into the trailingspace of
1205 * the previous mbuf when possible.
1206 */
1207
1208 /*
1209 * allocate a new mbuf. copy packet header if needed.
1210 */
1211 MGET(n, how, m->m_type);
1212 if (n == NULL)
1213 goto enobufs;
1214 MCLAIM(n, m->m_owner);
1215 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) {
1216 M_MOVE_PKTHDR(n, m);
1217 n->m_len = MHLEN;
1218 } else {
1219 if (len >= MINCLSIZE)
1220 MCLGET(n, M_DONTWAIT);
1221 n->m_len =
1222 (n->m_flags & M_EXT) ? MCLBYTES : MLEN;
1223 }
1224 if (n->m_len > len)
1225 n->m_len = len;
1226
1227 /*
1228 * free the region which has been overwritten.
1229 * copying data from old mbufs if requested.
1230 */
1231 if (flags & M_COPYBACK0_PRESERVE)
1232 datap = mtod(n, char *);
1233 else
1234 datap = NULL;
1235 eatlen = n->m_len;
1236 KDASSERT(off == 0 || eatlen >= mlen);
1237 if (off > 0) {
1238 KDASSERT(len >= mlen);
1239 m->m_len = off;
1240 m->m_next = n;
1241 if (datap) {
1242 m_copydata(m, off, mlen, datap);
1243 datap += mlen;
1244 }
1245 eatlen -= mlen;
1246 mp = &m->m_next;
1247 m = m->m_next;
1248 }
1249 while (m != NULL && M_READONLY(m) &&
1250 n->m_type == m->m_type && eatlen > 0) {
1251 mlen = min(eatlen, m->m_len);
1252 if (datap) {
1253 m_copydata(m, 0, mlen, datap);
1254 datap += mlen;
1255 }
1256 m->m_data += mlen;
1257 m->m_len -= mlen;
1258 eatlen -= mlen;
1259 if (m->m_len == 0)
1260 *mp = m = m_free(m);
1261 }
1262 if (eatlen > 0)
1263 n->m_len -= eatlen;
1264 n->m_next = m;
1265 *mp = m = n;
1266 continue;
1267 }
1268 mlen = min(mlen, len);
1269 if (flags & M_COPYBACK0_COPYBACK) {
1270 memcpy(mtod(m, caddr_t) + off, cp, (unsigned)mlen);
1271 cp += mlen;
1272 }
1273 len -= mlen;
1274 mlen += off;
1275 off = 0;
1276 totlen += mlen;
1277 if (len == 0)
1278 break;
1279 if (m->m_next == 0) {
1280 if ((flags & M_COPYBACK0_EXTEND) == 0)
1281 goto out;
1282 n = m_get(how, m->m_type);
1283 if (n == 0)
1284 break;
1285 n->m_len = min(MLEN, len);
1286 m->m_next = n;
1287 }
1288 mp = &m->m_next;
1289 m = m->m_next;
1290 }
1291 out: if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
1292 m->m_pkthdr.len = totlen;
1293
1294 return 0;
1295
1296 enobufs:
1297 return ENOBUFS;
1298 }
1299
1300 void
1301 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
1302 {
1303
1304 KASSERT((to->m_flags & M_EXT) == 0);
1305 KASSERT((to->m_flags & M_PKTHDR) == 0 || m_tag_first(to) == NULL);
1306 KASSERT((from->m_flags & M_PKTHDR) != 0);
1307
1308 to->m_pkthdr = from->m_pkthdr;
1309 to->m_flags = from->m_flags & M_COPYFLAGS;
1310 to->m_data = to->m_pktdat;
1311
1312 from->m_flags &= ~M_PKTHDR;
1313 }
1314
1315 /*
1316 * Apply function f to the data in an mbuf chain starting "off" bytes from the
1317 * beginning, continuing for "len" bytes.
1318 */
1319 int
1320 m_apply(struct mbuf *m, int off, int len,
1321 int (*f)(void *, caddr_t, unsigned int), void *arg)
1322 {
1323 unsigned int count;
1324 int rval;
1325
1326 KASSERT(len >= 0);
1327 KASSERT(off >= 0);
1328
1329 while (off > 0) {
1330 KASSERT(m != NULL);
1331 if (off < m->m_len)
1332 break;
1333 off -= m->m_len;
1334 m = m->m_next;
1335 }
1336 while (len > 0) {
1337 KASSERT(m != NULL);
1338 count = min(m->m_len - off, len);
1339
1340 rval = (*f)(arg, mtod(m, caddr_t) + off, count);
1341 if (rval)
1342 return (rval);
1343
1344 len -= count;
1345 off = 0;
1346 m = m->m_next;
1347 }
1348
1349 return (0);
1350 }
1351
1352 /*
1353 * Return a pointer to mbuf/offset of location in mbuf chain.
1354 */
1355 struct mbuf *
1356 m_getptr(struct mbuf *m, int loc, int *off)
1357 {
1358
1359 while (loc >= 0) {
1360 /* Normal end of search */
1361 if (m->m_len > loc) {
1362 *off = loc;
1363 return (m);
1364 } else {
1365 loc -= m->m_len;
1366
1367 if (m->m_next == NULL) {
1368 if (loc == 0) {
1369 /* Point at the end of valid data */
1370 *off = m->m_len;
1371 return (m);
1372 } else
1373 return (NULL);
1374 } else
1375 m = m->m_next;
1376 }
1377 }
1378
1379 return (NULL);
1380 }
1381
1382 #if defined(DDB)
1383 void
1384 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...))
1385 {
1386 char ch;
1387 boolean_t opt_c = FALSE;
1388 char buf[512];
1389
1390 while ((ch = *(modif++)) != '\0') {
1391 switch (ch) {
1392 case 'c':
1393 opt_c = TRUE;
1394 break;
1395 }
1396 }
1397
1398 nextchain:
1399 (*pr)("MBUF %p\n", m);
1400 bitmask_snprintf(m->m_flags, M_FLAGS_BITS, buf, sizeof(buf));
1401 (*pr)(" data=%p, len=%d, type=%d, flags=0x%s\n",
1402 m->m_data, m->m_len, m->m_type, buf);
1403 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next,
1404 m->m_nextpkt);
1405 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n",
1406 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m),
1407 (int)M_READONLY(m));
1408 if ((m->m_flags & M_PKTHDR) != 0) {
1409 bitmask_snprintf(m->m_pkthdr.csum_flags, M_CSUM_BITS, buf,
1410 sizeof(buf));
1411 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=0x%s, csum_data=0x%"
1412 PRIx32 ", segsz=%u\n",
1413 m->m_pkthdr.len, m->m_pkthdr.rcvif,
1414 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz);
1415 }
1416 if ((m->m_flags & M_EXT)) {
1417 (*pr)(" shared=%u, ext_buf=%p, ext_size=%zd, "
1418 "ext_free=%p, ext_arg=%p\n",
1419 (int)MCLISREFERENCED(m),
1420 m->m_ext.ext_buf, m->m_ext.ext_size,
1421 m->m_ext.ext_free, m->m_ext.ext_arg);
1422 }
1423 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) {
1424 int i;
1425
1426 (*pr)(" pages:");
1427 for (i = 0; i < m->m_ext.ext_size; i += PAGE_SIZE) {
1428 (*pr)(" %p", m->m_ext.ext_pgs[i]);
1429 }
1430 (*pr)("\n");
1431 }
1432
1433 if (opt_c) {
1434 m = m->m_next;
1435 if (m != NULL) {
1436 goto nextchain;
1437 }
1438 }
1439 }
1440 #endif /* defined(DDB) */
1441