uipc_mbuf.c revision 1.249 1 /* $NetBSD: uipc_mbuf.c,v 1.249 2023/03/31 19:22:56 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 1999, 2001, 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and Maxime Villard.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1988, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.249 2023/03/31 19:22:56 riastradh Exp $");
66
67 #ifdef _KERNEL_OPT
68 #include "opt_mbuftrace.h"
69 #include "opt_nmbclusters.h"
70 #include "opt_ddb.h"
71 #include "ether.h"
72 #endif
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/atomic.h>
77 #include <sys/cpu.h>
78 #include <sys/proc.h>
79 #include <sys/mbuf.h>
80 #include <sys/kernel.h>
81 #include <sys/syslog.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/percpu.h>
85 #include <sys/pool.h>
86 #include <sys/socket.h>
87 #include <sys/sysctl.h>
88
89 #include <net/if.h>
90
91 pool_cache_t mb_cache; /* mbuf cache */
92 static pool_cache_t mcl_cache; /* mbuf cluster cache */
93
94 struct mbstat mbstat;
95 int max_linkhdr;
96 int max_protohdr;
97 int max_hdr;
98 int max_datalen;
99
100 static void mb_drain(void *, int);
101 static int mb_ctor(void *, void *, int);
102
103 static void sysctl_kern_mbuf_setup(void);
104
105 static struct sysctllog *mbuf_sysctllog;
106
107 static struct mbuf *m_copy_internal(struct mbuf *, int, int, int, bool);
108 static struct mbuf *m_split_internal(struct mbuf *, int, int, bool);
109 static int m_copyback_internal(struct mbuf **, int, int, const void *,
110 int, int);
111
112 /* Flags for m_copyback_internal. */
113 #define CB_COPYBACK 0x0001 /* copyback from cp */
114 #define CB_PRESERVE 0x0002 /* preserve original data */
115 #define CB_COW 0x0004 /* do copy-on-write */
116 #define CB_EXTEND 0x0008 /* extend chain */
117
118 static const char mclpool_warnmsg[] =
119 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters";
120
121 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
122
123 static percpu_t *mbstat_percpu;
124
125 #ifdef MBUFTRACE
126 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners);
127 struct mowner unknown_mowners[] = {
128 MOWNER_INIT("unknown", "free"),
129 MOWNER_INIT("unknown", "data"),
130 MOWNER_INIT("unknown", "header"),
131 MOWNER_INIT("unknown", "soname"),
132 MOWNER_INIT("unknown", "soopts"),
133 MOWNER_INIT("unknown", "ftable"),
134 MOWNER_INIT("unknown", "control"),
135 MOWNER_INIT("unknown", "oobdata"),
136 };
137 struct mowner revoked_mowner = MOWNER_INIT("revoked", "");
138 #endif
139
140 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m))
141
142 #define MCLADDREFERENCE(o, n) \
143 do { \
144 KASSERT(((o)->m_flags & M_EXT) != 0); \
145 KASSERT(((n)->m_flags & M_EXT) == 0); \
146 KASSERT((o)->m_ext.ext_refcnt >= 1); \
147 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \
148 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \
149 (n)->m_ext_ref = (o)->m_ext_ref; \
150 mowner_ref((n), (n)->m_flags); \
151 } while (/* CONSTCOND */ 0)
152
153 static int
154 nmbclusters_limit(void)
155 {
156 #if defined(PMAP_MAP_POOLPAGE)
157 /* direct mapping, doesn't use space in kmem_arena */
158 vsize_t max_size = physmem / 4;
159 #else
160 vsize_t max_size = MIN(physmem / 4, nkmempages / 4);
161 #endif
162
163 max_size = max_size * PAGE_SIZE / MCLBYTES;
164 #ifdef NMBCLUSTERS_MAX
165 max_size = MIN(max_size, NMBCLUSTERS_MAX);
166 #endif
167
168 return max_size;
169 }
170
171 /*
172 * Initialize the mbuf allocator.
173 */
174 void
175 mbinit(void)
176 {
177
178 CTASSERT(sizeof(struct _m_ext) <= MHLEN);
179 CTASSERT(sizeof(struct mbuf) == MSIZE);
180
181 sysctl_kern_mbuf_setup();
182
183 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl",
184 NULL, IPL_VM, mb_ctor, NULL, NULL);
185 KASSERT(mb_cache != NULL);
186
187 mcl_cache = pool_cache_init(mclbytes, COHERENCY_UNIT, 0, 0, "mclpl",
188 NULL, IPL_VM, NULL, NULL, NULL);
189 KASSERT(mcl_cache != NULL);
190
191 pool_cache_set_drain_hook(mb_cache, mb_drain, NULL);
192 pool_cache_set_drain_hook(mcl_cache, mb_drain, NULL);
193
194 /*
195 * Set an arbitrary default limit on the number of mbuf clusters.
196 */
197 #ifdef NMBCLUSTERS
198 nmbclusters = MIN(NMBCLUSTERS, nmbclusters_limit());
199 #else
200 nmbclusters = MAX(1024,
201 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16);
202 nmbclusters = MIN(nmbclusters, nmbclusters_limit());
203 #endif
204
205 /*
206 * Set the hard limit on the mclpool to the number of
207 * mbuf clusters the kernel is to support. Log the limit
208 * reached message max once a minute.
209 */
210 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60);
211
212 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu));
213
214 /*
215 * Set a low water mark for both mbufs and clusters. This should
216 * help ensure that they can be allocated in a memory starvation
217 * situation. This is important for e.g. diskless systems which
218 * must allocate mbufs in order for the pagedaemon to clean pages.
219 */
220 pool_cache_setlowat(mb_cache, mblowat);
221 pool_cache_setlowat(mcl_cache, mcllowat);
222
223 #ifdef MBUFTRACE
224 {
225 /*
226 * Attach the unknown mowners.
227 */
228 int i;
229 MOWNER_ATTACH(&revoked_mowner);
230 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]);
231 i-- > 0; )
232 MOWNER_ATTACH(&unknown_mowners[i]);
233 }
234 #endif
235 }
236
237 static void
238 mb_drain(void *arg, int flags)
239 {
240 struct domain *dp;
241 const struct protosw *pr;
242 struct ifnet *ifp;
243 int s;
244
245 KERNEL_LOCK(1, NULL);
246 s = splvm();
247 DOMAIN_FOREACH(dp) {
248 for (pr = dp->dom_protosw;
249 pr < dp->dom_protoswNPROTOSW; pr++)
250 if (pr->pr_drain)
251 (*pr->pr_drain)();
252 }
253 /* XXX we cannot use psref in H/W interrupt */
254 if (!cpu_intr_p()) {
255 int bound = curlwp_bind();
256 IFNET_READER_FOREACH(ifp) {
257 struct psref psref;
258
259 if_acquire(ifp, &psref);
260
261 if (ifp->if_drain)
262 (*ifp->if_drain)(ifp);
263
264 if_release(ifp, &psref);
265 }
266 curlwp_bindx(bound);
267 }
268 splx(s);
269 mbstat.m_drain++;
270 KERNEL_UNLOCK_ONE(NULL);
271 }
272
273 /*
274 * sysctl helper routine for the kern.mbuf subtree.
275 * nmbclusters, mblowat and mcllowat need range
276 * checking and pool tweaking after being reset.
277 */
278 static int
279 sysctl_kern_mbuf(SYSCTLFN_ARGS)
280 {
281 int error, newval;
282 struct sysctlnode node;
283
284 node = *rnode;
285 node.sysctl_data = &newval;
286 switch (rnode->sysctl_num) {
287 case MBUF_NMBCLUSTERS:
288 case MBUF_MBLOWAT:
289 case MBUF_MCLLOWAT:
290 newval = *(int*)rnode->sysctl_data;
291 break;
292 case MBUF_NMBCLUSTERS_LIMIT:
293 newval = nmbclusters_limit();
294 break;
295 default:
296 return EOPNOTSUPP;
297 }
298
299 error = sysctl_lookup(SYSCTLFN_CALL(&node));
300 if (error || newp == NULL)
301 return error;
302 if (newval < 0)
303 return EINVAL;
304
305 switch (node.sysctl_num) {
306 case MBUF_NMBCLUSTERS:
307 if (newval < nmbclusters)
308 return EINVAL;
309 if (newval > nmbclusters_limit())
310 return EINVAL;
311 nmbclusters = newval;
312 pool_cache_sethardlimit(mcl_cache, nmbclusters,
313 mclpool_warnmsg, 60);
314 break;
315 case MBUF_MBLOWAT:
316 mblowat = newval;
317 pool_cache_setlowat(mb_cache, mblowat);
318 break;
319 case MBUF_MCLLOWAT:
320 mcllowat = newval;
321 pool_cache_setlowat(mcl_cache, mcllowat);
322 break;
323 }
324
325 return 0;
326 }
327
328 #ifdef MBUFTRACE
329 static void
330 mowner_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci)
331 {
332 struct mowner_counter *mc = v1;
333 struct mowner_user *mo_user = v2;
334 int i;
335
336 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) {
337 mo_user->mo_counter[i] += mc->mc_counter[i];
338 }
339 }
340
341 static void
342 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user)
343 {
344
345 memset(mo_user, 0, sizeof(*mo_user));
346 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name));
347 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr));
348 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name));
349 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr));
350 percpu_foreach(mo->mo_counters, mowner_convert_to_user_cb, mo_user);
351 }
352
353 static int
354 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS)
355 {
356 struct mowner *mo;
357 size_t len = 0;
358 int error = 0;
359
360 if (namelen != 0)
361 return EINVAL;
362 if (newp != NULL)
363 return EPERM;
364
365 LIST_FOREACH(mo, &mowners, mo_link) {
366 struct mowner_user mo_user;
367
368 mowner_convert_to_user(mo, &mo_user);
369
370 if (oldp != NULL) {
371 if (*oldlenp - len < sizeof(mo_user)) {
372 error = ENOMEM;
373 break;
374 }
375 error = copyout(&mo_user, (char *)oldp + len,
376 sizeof(mo_user));
377 if (error)
378 break;
379 }
380 len += sizeof(mo_user);
381 }
382
383 if (error == 0)
384 *oldlenp = len;
385
386 return error;
387 }
388 #endif /* MBUFTRACE */
389
390 void
391 mbstat_type_add(int type, int diff)
392 {
393 struct mbstat_cpu *mb;
394 int s;
395
396 s = splvm();
397 mb = percpu_getref(mbstat_percpu);
398 mb->m_mtypes[type] += diff;
399 percpu_putref(mbstat_percpu);
400 splx(s);
401 }
402
403 static void
404 mbstat_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci)
405 {
406 struct mbstat_cpu *mbsc = v1;
407 struct mbstat *mbs = v2;
408 int i;
409
410 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) {
411 mbs->m_mtypes[i] += mbsc->m_mtypes[i];
412 }
413 }
414
415 static void
416 mbstat_convert_to_user(struct mbstat *mbs)
417 {
418
419 memset(mbs, 0, sizeof(*mbs));
420 mbs->m_drain = mbstat.m_drain;
421 percpu_foreach(mbstat_percpu, mbstat_convert_to_user_cb, mbs);
422 }
423
424 static int
425 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS)
426 {
427 struct sysctlnode node;
428 struct mbstat mbs;
429
430 mbstat_convert_to_user(&mbs);
431 node = *rnode;
432 node.sysctl_data = &mbs;
433 node.sysctl_size = sizeof(mbs);
434 return sysctl_lookup(SYSCTLFN_CALL(&node));
435 }
436
437 static void
438 sysctl_kern_mbuf_setup(void)
439 {
440
441 KASSERT(mbuf_sysctllog == NULL);
442 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
443 CTLFLAG_PERMANENT,
444 CTLTYPE_NODE, "mbuf",
445 SYSCTL_DESCR("mbuf control variables"),
446 NULL, 0, NULL, 0,
447 CTL_KERN, KERN_MBUF, CTL_EOL);
448
449 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
450 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
451 CTLTYPE_INT, "msize",
452 SYSCTL_DESCR("mbuf base size"),
453 NULL, msize, NULL, 0,
454 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL);
455 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
456 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
457 CTLTYPE_INT, "mclbytes",
458 SYSCTL_DESCR("mbuf cluster size"),
459 NULL, mclbytes, NULL, 0,
460 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL);
461 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
462 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
463 CTLTYPE_INT, "nmbclusters",
464 SYSCTL_DESCR("Limit on the number of mbuf clusters"),
465 sysctl_kern_mbuf, 0, &nmbclusters, 0,
466 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL);
467 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
468 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
469 CTLTYPE_INT, "mblowat",
470 SYSCTL_DESCR("mbuf low water mark"),
471 sysctl_kern_mbuf, 0, &mblowat, 0,
472 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL);
473 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
474 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
475 CTLTYPE_INT, "mcllowat",
476 SYSCTL_DESCR("mbuf cluster low water mark"),
477 sysctl_kern_mbuf, 0, &mcllowat, 0,
478 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL);
479 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
480 CTLFLAG_PERMANENT,
481 CTLTYPE_STRUCT, "stats",
482 SYSCTL_DESCR("mbuf allocation statistics"),
483 sysctl_kern_mbuf_stats, 0, NULL, 0,
484 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL);
485 #ifdef MBUFTRACE
486 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
487 CTLFLAG_PERMANENT,
488 CTLTYPE_STRUCT, "mowners",
489 SYSCTL_DESCR("Information about mbuf owners"),
490 sysctl_kern_mbuf_mowners, 0, NULL, 0,
491 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL);
492 #endif
493 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
494 CTLFLAG_PERMANENT|CTLFLAG_READONLY,
495 CTLTYPE_INT, "nmbclusters_limit",
496 SYSCTL_DESCR("Limit of nmbclusters"),
497 sysctl_kern_mbuf, 0, NULL, 0,
498 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS_LIMIT, CTL_EOL);
499 }
500
501 static int
502 mb_ctor(void *arg, void *object, int flags)
503 {
504 struct mbuf *m = object;
505
506 #ifdef POOL_VTOPHYS
507 m->m_paddr = POOL_VTOPHYS(m);
508 #else
509 m->m_paddr = M_PADDR_INVALID;
510 #endif
511 return 0;
512 }
513
514 /*
515 * Add mbuf to the end of a chain
516 */
517 struct mbuf *
518 m_add(struct mbuf *c, struct mbuf *m)
519 {
520 struct mbuf *n;
521
522 if (c == NULL)
523 return m;
524
525 for (n = c; n->m_next != NULL; n = n->m_next)
526 continue;
527 n->m_next = m;
528 return c;
529 }
530
531 struct mbuf *
532 m_get(int how, int type)
533 {
534 struct mbuf *m;
535
536 KASSERT(type != MT_FREE);
537
538 m = pool_cache_get(mb_cache,
539 how == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : PR_NOWAIT);
540 if (m == NULL)
541 return NULL;
542 KASSERTMSG(((vaddr_t)m->m_dat & PAGE_MASK) + MLEN <= PAGE_SIZE,
543 "m=%p m->m_dat=0x%p"
544 " MLEN=%u PAGE_MASK=0x%x PAGE_SIZE=%u",
545 m, m->m_dat,
546 (unsigned)MLEN, (unsigned)PAGE_MASK, (unsigned)PAGE_SIZE);
547
548 mbstat_type_add(type, 1);
549
550 mowner_init(m, type);
551 m->m_ext_ref = m; /* default */
552 m->m_type = type;
553 m->m_len = 0;
554 m->m_next = NULL;
555 m->m_nextpkt = NULL; /* default */
556 m->m_data = m->m_dat;
557 m->m_flags = 0; /* default */
558
559 return m;
560 }
561
562 struct mbuf *
563 m_gethdr(int how, int type)
564 {
565 struct mbuf *m;
566
567 m = m_get(how, type);
568 if (m == NULL)
569 return NULL;
570
571 m->m_data = m->m_pktdat;
572 m->m_flags = M_PKTHDR;
573
574 m_reset_rcvif(m);
575 m->m_pkthdr.len = 0;
576 m->m_pkthdr.csum_flags = 0;
577 m->m_pkthdr.csum_data = 0;
578 m->m_pkthdr.segsz = 0;
579 m->m_pkthdr.ether_vtag = 0;
580 m->m_pkthdr.pkthdr_flags = 0;
581 SLIST_INIT(&m->m_pkthdr.tags);
582
583 m->m_pkthdr.pattr_class = NULL;
584 m->m_pkthdr.pattr_af = AF_UNSPEC;
585 m->m_pkthdr.pattr_hdr = NULL;
586
587 return m;
588 }
589
590 void
591 m_clget(struct mbuf *m, int how)
592 {
593 m->m_ext_storage.ext_buf = (char *)pool_cache_get_paddr(mcl_cache,
594 how == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : PR_NOWAIT,
595 &m->m_ext_storage.ext_paddr);
596
597 if (m->m_ext_storage.ext_buf == NULL)
598 return;
599
600 KASSERTMSG((((vaddr_t)m->m_ext_storage.ext_buf & PAGE_MASK) + mclbytes
601 <= PAGE_SIZE),
602 "m=%p m->m_ext_storage.ext_buf=0x%p"
603 " mclbytes=%u PAGE_MASK=0x%x PAGE_SIZE=%u",
604 m, m->m_dat,
605 (unsigned)mclbytes, (unsigned)PAGE_MASK, (unsigned)PAGE_SIZE);
606
607 MCLINITREFERENCE(m);
608 m->m_data = m->m_ext.ext_buf;
609 m->m_flags = (m->m_flags & ~M_EXTCOPYFLAGS) |
610 M_EXT|M_EXT_CLUSTER|M_EXT_RW;
611 m->m_ext.ext_size = MCLBYTES;
612 m->m_ext.ext_free = NULL;
613 m->m_ext.ext_arg = NULL;
614 /* ext_paddr initialized above */
615
616 mowner_ref(m, M_EXT|M_EXT_CLUSTER);
617 }
618
619 struct mbuf *
620 m_getcl(int how, int type, int flags)
621 {
622 struct mbuf *mp;
623
624 if ((flags & M_PKTHDR) != 0)
625 mp = m_gethdr(how, type);
626 else
627 mp = m_get(how, type);
628
629 if (mp == NULL)
630 return NULL;
631
632 MCLGET(mp, how);
633 if ((mp->m_flags & M_EXT) != 0)
634 return mp;
635
636 m_free(mp);
637 return NULL;
638 }
639
640 /*
641 * Utility function for M_PREPEND. Do *NOT* use it directly.
642 */
643 struct mbuf *
644 m_prepend(struct mbuf *m, int len, int how)
645 {
646 struct mbuf *mn;
647
648 if (__predict_false(len > MHLEN)) {
649 panic("%s: len > MHLEN", __func__);
650 }
651
652 KASSERT(len != M_COPYALL);
653 mn = m_get(how, m->m_type);
654 if (mn == NULL) {
655 m_freem(m);
656 return NULL;
657 }
658
659 if (m->m_flags & M_PKTHDR) {
660 m_move_pkthdr(mn, m);
661 } else {
662 MCLAIM(mn, m->m_owner);
663 }
664 mn->m_next = m;
665 m = mn;
666
667 if (m->m_flags & M_PKTHDR) {
668 if (len < MHLEN)
669 m_align(m, len);
670 } else {
671 if (len < MLEN)
672 m_align(m, len);
673 }
674
675 m->m_len = len;
676 return m;
677 }
678
679 struct mbuf *
680 m_copym(struct mbuf *m, int off, int len, int wait)
681 {
682 /* Shallow copy on M_EXT. */
683 return m_copy_internal(m, off, len, wait, false);
684 }
685
686 struct mbuf *
687 m_dup(struct mbuf *m, int off, int len, int wait)
688 {
689 /* Deep copy. */
690 return m_copy_internal(m, off, len, wait, true);
691 }
692
693 static inline int
694 m_copylen(int len, int copylen)
695 {
696 return (len == M_COPYALL) ? copylen : uimin(len, copylen);
697 }
698
699 static struct mbuf *
700 m_copy_internal(struct mbuf *m, int off0, int len, int wait, bool deep)
701 {
702 struct mbuf *m0 __diagused = m;
703 int len0 __diagused = len;
704 struct mbuf *n, **np;
705 int off = off0;
706 struct mbuf *top;
707 int copyhdr = 0;
708
709 if (off < 0 || (len != M_COPYALL && len < 0))
710 panic("%s: off %d, len %d", __func__, off, len);
711 if (off == 0 && m->m_flags & M_PKTHDR)
712 copyhdr = 1;
713 while (off > 0) {
714 if (m == NULL)
715 panic("%s: m == NULL, off %d", __func__, off);
716 if (off < m->m_len)
717 break;
718 off -= m->m_len;
719 m = m->m_next;
720 }
721
722 np = ⊤
723 top = NULL;
724 while (len == M_COPYALL || len > 0) {
725 if (m == NULL) {
726 if (len != M_COPYALL)
727 panic("%s: m == NULL, len %d [!COPYALL]",
728 __func__, len);
729 break;
730 }
731
732 n = m_get(wait, m->m_type);
733 *np = n;
734 if (n == NULL)
735 goto nospace;
736 MCLAIM(n, m->m_owner);
737
738 if (copyhdr) {
739 m_copy_pkthdr(n, m);
740 if (len == M_COPYALL)
741 n->m_pkthdr.len -= off0;
742 else
743 n->m_pkthdr.len = len;
744 copyhdr = 0;
745 }
746 n->m_len = m_copylen(len, m->m_len - off);
747
748 if (m->m_flags & M_EXT) {
749 if (!deep) {
750 n->m_data = m->m_data + off;
751 MCLADDREFERENCE(m, n);
752 } else {
753 /*
754 * We don't care if MCLGET fails. n->m_len is
755 * recomputed and handles that.
756 */
757 MCLGET(n, wait);
758 n->m_len = 0;
759 n->m_len = M_TRAILINGSPACE(n);
760 n->m_len = m_copylen(len, n->m_len);
761 n->m_len = uimin(n->m_len, m->m_len - off);
762 memcpy(mtod(n, void *), mtod(m, char *) + off,
763 (unsigned)n->m_len);
764 }
765 } else {
766 memcpy(mtod(n, void *), mtod(m, char *) + off,
767 (unsigned)n->m_len);
768 }
769
770 if (len != M_COPYALL)
771 len -= n->m_len;
772 off += n->m_len;
773
774 KASSERTMSG(off <= m->m_len,
775 "m=%p m->m_len=%d off=%d len=%d m0=%p off0=%d len0=%d",
776 m, m->m_len, off, len, m0, off0, len0);
777
778 if (off == m->m_len) {
779 m = m->m_next;
780 off = 0;
781 }
782 np = &n->m_next;
783 }
784
785 return top;
786
787 nospace:
788 m_freem(top);
789 return NULL;
790 }
791
792 /*
793 * Copy an entire packet, including header (which must be present).
794 * An optimization of the common case 'm_copym(m, 0, M_COPYALL, how)'.
795 */
796 struct mbuf *
797 m_copypacket(struct mbuf *m, int how)
798 {
799 struct mbuf *top, *n, *o;
800
801 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) {
802 panic("%s: no header (m = %p)", __func__, m);
803 }
804
805 n = m_get(how, m->m_type);
806 top = n;
807 if (!n)
808 goto nospace;
809
810 MCLAIM(n, m->m_owner);
811 m_copy_pkthdr(n, m);
812 n->m_len = m->m_len;
813 if (m->m_flags & M_EXT) {
814 n->m_data = m->m_data;
815 MCLADDREFERENCE(m, n);
816 } else {
817 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
818 }
819
820 m = m->m_next;
821 while (m) {
822 o = m_get(how, m->m_type);
823 if (!o)
824 goto nospace;
825
826 MCLAIM(o, m->m_owner);
827 n->m_next = o;
828 n = n->m_next;
829
830 n->m_len = m->m_len;
831 if (m->m_flags & M_EXT) {
832 n->m_data = m->m_data;
833 MCLADDREFERENCE(m, n);
834 } else {
835 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
836 }
837
838 m = m->m_next;
839 }
840 return top;
841
842 nospace:
843 m_freem(top);
844 return NULL;
845 }
846
847 void
848 m_copydata(struct mbuf *m, int off, int len, void *cp)
849 {
850 unsigned int count;
851 struct mbuf *m0 = m;
852 int len0 = len;
853 int off0 = off;
854 void *cp0 = cp;
855
856 KASSERT(len != M_COPYALL);
857 if (off < 0 || len < 0)
858 panic("m_copydata: off %d, len %d", off, len);
859 while (off > 0) {
860 if (m == NULL)
861 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)",
862 m0, len0, off0, cp0, off, off0 - off);
863 if (off < m->m_len)
864 break;
865 off -= m->m_len;
866 m = m->m_next;
867 }
868 while (len > 0) {
869 if (m == NULL)
870 panic("m_copydata(%p,%d,%d,%p): "
871 "m=NULL, off=%d (%d), len=%d (%d)",
872 m0, len0, off0, cp0,
873 off, off0 - off, len, len0 - len);
874 count = uimin(m->m_len - off, len);
875 memcpy(cp, mtod(m, char *) + off, count);
876 len -= count;
877 cp = (char *)cp + count;
878 off = 0;
879 m = m->m_next;
880 }
881 }
882
883 /*
884 * Concatenate mbuf chain n to m.
885 * n might be copied into m (when n->m_len is small), therefore data portion of
886 * n could be copied into an mbuf of different mbuf type.
887 * Any m_pkthdr is not updated.
888 */
889 void
890 m_cat(struct mbuf *m, struct mbuf *n)
891 {
892
893 while (m->m_next)
894 m = m->m_next;
895 while (n) {
896 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) {
897 /* just join the two chains */
898 m->m_next = n;
899 return;
900 }
901 /* splat the data from one into the other */
902 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
903 (u_int)n->m_len);
904 m->m_len += n->m_len;
905 n = m_free(n);
906 }
907 }
908
909 void
910 m_adj(struct mbuf *mp, int req_len)
911 {
912 int len = req_len;
913 struct mbuf *m;
914 int count;
915
916 if ((m = mp) == NULL)
917 return;
918 if (len >= 0) {
919 /*
920 * Trim from head.
921 */
922 while (m != NULL && len > 0) {
923 if (m->m_len <= len) {
924 len -= m->m_len;
925 m->m_len = 0;
926 m = m->m_next;
927 } else {
928 m->m_len -= len;
929 m->m_data += len;
930 len = 0;
931 }
932 }
933 if (mp->m_flags & M_PKTHDR)
934 mp->m_pkthdr.len -= (req_len - len);
935 } else {
936 /*
937 * Trim from tail. Scan the mbuf chain,
938 * calculating its length and finding the last mbuf.
939 * If the adjustment only affects this mbuf, then just
940 * adjust and return. Otherwise, rescan and truncate
941 * after the remaining size.
942 */
943 len = -len;
944 count = 0;
945 for (;;) {
946 count += m->m_len;
947 if (m->m_next == NULL)
948 break;
949 m = m->m_next;
950 }
951 if (m->m_len >= len) {
952 m->m_len -= len;
953 if (mp->m_flags & M_PKTHDR)
954 mp->m_pkthdr.len -= len;
955 return;
956 }
957
958 count -= len;
959 if (count < 0)
960 count = 0;
961
962 /*
963 * Correct length for chain is "count".
964 * Find the mbuf with last data, adjust its length,
965 * and toss data from remaining mbufs on chain.
966 */
967 m = mp;
968 if (m->m_flags & M_PKTHDR)
969 m->m_pkthdr.len = count;
970 for (; m; m = m->m_next) {
971 if (m->m_len >= count) {
972 m->m_len = count;
973 break;
974 }
975 count -= m->m_len;
976 }
977 if (m) {
978 while (m->m_next)
979 (m = m->m_next)->m_len = 0;
980 }
981 }
982 }
983
984 /*
985 * m_ensure_contig: rearrange an mbuf chain that given length of bytes
986 * would be contiguous and in the data area of an mbuf (therefore, mtod()
987 * would work for a structure of given length).
988 *
989 * => On success, returns true and the resulting mbuf chain; false otherwise.
990 * => The mbuf chain may change, but is always preserved valid.
991 */
992 bool
993 m_ensure_contig(struct mbuf **m0, int len)
994 {
995 struct mbuf *n = *m0, *m;
996 size_t count, space;
997
998 KASSERT(len != M_COPYALL);
999 /*
1000 * If first mbuf has no cluster, and has room for len bytes
1001 * without shifting current data, pullup into it,
1002 * otherwise allocate a new mbuf to prepend to the chain.
1003 */
1004 if ((n->m_flags & M_EXT) == 0 &&
1005 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
1006 if (n->m_len >= len) {
1007 return true;
1008 }
1009 m = n;
1010 n = n->m_next;
1011 len -= m->m_len;
1012 } else {
1013 if (len > MHLEN) {
1014 return false;
1015 }
1016 m = m_get(M_DONTWAIT, n->m_type);
1017 if (m == NULL) {
1018 return false;
1019 }
1020 MCLAIM(m, n->m_owner);
1021 if (n->m_flags & M_PKTHDR) {
1022 m_move_pkthdr(m, n);
1023 }
1024 }
1025 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1026 do {
1027 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len);
1028 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
1029 (unsigned)count);
1030 len -= count;
1031 m->m_len += count;
1032 n->m_len -= count;
1033 space -= count;
1034 if (n->m_len)
1035 n->m_data += count;
1036 else
1037 n = m_free(n);
1038 } while (len > 0 && n);
1039
1040 m->m_next = n;
1041 *m0 = m;
1042
1043 return len <= 0;
1044 }
1045
1046 /*
1047 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error.
1048 */
1049 struct mbuf *
1050 m_pullup(struct mbuf *n, int len)
1051 {
1052 struct mbuf *m = n;
1053
1054 KASSERT(len != M_COPYALL);
1055 if (!m_ensure_contig(&m, len)) {
1056 KASSERT(m != NULL);
1057 m_freem(m);
1058 m = NULL;
1059 }
1060 return m;
1061 }
1062
1063 /*
1064 * ensure that [off, off + len) is contiguous on the mbuf chain "m".
1065 * packet chain before "off" is kept untouched.
1066 * if offp == NULL, the target will start at <retval, 0> on resulting chain.
1067 * if offp != NULL, the target will start at <retval, *offp> on resulting chain.
1068 *
1069 * on error return (NULL return value), original "m" will be freed.
1070 *
1071 * XXX M_TRAILINGSPACE/M_LEADINGSPACE on shared cluster (sharedcluster)
1072 */
1073 struct mbuf *
1074 m_pulldown(struct mbuf *m, int off, int len, int *offp)
1075 {
1076 struct mbuf *n, *o;
1077 int hlen, tlen, olen;
1078 int sharedcluster;
1079
1080 /* Check invalid arguments. */
1081 if (m == NULL)
1082 panic("%s: m == NULL", __func__);
1083 if (len > MCLBYTES) {
1084 m_freem(m);
1085 return NULL;
1086 }
1087
1088 n = m;
1089 while (n != NULL && off > 0) {
1090 if (n->m_len > off)
1091 break;
1092 off -= n->m_len;
1093 n = n->m_next;
1094 }
1095 /* Be sure to point non-empty mbuf. */
1096 while (n != NULL && n->m_len == 0)
1097 n = n->m_next;
1098 if (!n) {
1099 m_freem(m);
1100 return NULL; /* mbuf chain too short */
1101 }
1102
1103 sharedcluster = M_READONLY(n);
1104
1105 /*
1106 * The target data is on <n, off>. If we got enough data on the mbuf
1107 * "n", we're done.
1108 */
1109 #ifdef __NO_STRICT_ALIGNMENT
1110 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster)
1111 #else
1112 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster &&
1113 ALIGNED_POINTER((mtod(n, char *) + off), uint32_t))
1114 #endif
1115 goto ok;
1116
1117 /*
1118 * When (len <= n->m_len - off) and (off != 0), it is a special case.
1119 * Len bytes from <n, off> sit in single mbuf, but the caller does
1120 * not like the starting position (off).
1121 *
1122 * Chop the current mbuf into two pieces, set off to 0.
1123 */
1124 if (len <= n->m_len - off) {
1125 struct mbuf *mlast;
1126
1127 o = m_dup(n, off, n->m_len - off, M_DONTWAIT);
1128 if (o == NULL) {
1129 m_freem(m);
1130 return NULL; /* ENOBUFS */
1131 }
1132 KASSERTMSG(o->m_len >= len, "o=%p o->m_len=%d len=%d",
1133 o, o->m_len, len);
1134 for (mlast = o; mlast->m_next != NULL; mlast = mlast->m_next)
1135 ;
1136 n->m_len = off;
1137 mlast->m_next = n->m_next;
1138 n->m_next = o;
1139 n = o;
1140 off = 0;
1141 goto ok;
1142 }
1143
1144 /*
1145 * We need to take hlen from <n, off> and tlen from <n->m_next, 0>,
1146 * and construct contiguous mbuf with m_len == len.
1147 *
1148 * Note that hlen + tlen == len, and tlen > 0.
1149 */
1150 hlen = n->m_len - off;
1151 tlen = len - hlen;
1152
1153 /*
1154 * Ensure that we have enough trailing data on mbuf chain. If not,
1155 * we can do nothing about the chain.
1156 */
1157 olen = 0;
1158 for (o = n->m_next; o != NULL; o = o->m_next)
1159 olen += o->m_len;
1160 if (hlen + olen < len) {
1161 m_freem(m);
1162 return NULL; /* mbuf chain too short */
1163 }
1164
1165 /*
1166 * Easy cases first. We need to use m_copydata() to get data from
1167 * <n->m_next, 0>.
1168 */
1169 if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen &&
1170 !sharedcluster) {
1171 m_copydata(n->m_next, 0, tlen, mtod(n, char *) + n->m_len);
1172 n->m_len += tlen;
1173 m_adj(n->m_next, tlen);
1174 goto ok;
1175 }
1176 if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen &&
1177 #ifndef __NO_STRICT_ALIGNMENT
1178 ALIGNED_POINTER((n->m_next->m_data - hlen), uint32_t) &&
1179 #endif
1180 !sharedcluster && n->m_next->m_len >= tlen) {
1181 n->m_next->m_data -= hlen;
1182 n->m_next->m_len += hlen;
1183 memcpy(mtod(n->m_next, void *), mtod(n, char *) + off, hlen);
1184 n->m_len -= hlen;
1185 n = n->m_next;
1186 off = 0;
1187 goto ok;
1188 }
1189
1190 /*
1191 * Now, we need to do the hard way. Don't copy as there's no room
1192 * on both ends.
1193 */
1194 o = m_get(M_DONTWAIT, m->m_type);
1195 if (o && len > MLEN) {
1196 MCLGET(o, M_DONTWAIT);
1197 if ((o->m_flags & M_EXT) == 0) {
1198 m_free(o);
1199 o = NULL;
1200 }
1201 }
1202 if (!o) {
1203 m_freem(m);
1204 return NULL; /* ENOBUFS */
1205 }
1206 /* get hlen from <n, off> into <o, 0> */
1207 o->m_len = hlen;
1208 memcpy(mtod(o, void *), mtod(n, char *) + off, hlen);
1209 n->m_len -= hlen;
1210 /* get tlen from <n->m_next, 0> into <o, hlen> */
1211 m_copydata(n->m_next, 0, tlen, mtod(o, char *) + o->m_len);
1212 o->m_len += tlen;
1213 m_adj(n->m_next, tlen);
1214 o->m_next = n->m_next;
1215 n->m_next = o;
1216 n = o;
1217 off = 0;
1218
1219 ok:
1220 if (offp)
1221 *offp = off;
1222 return n;
1223 }
1224
1225 /*
1226 * Like m_pullup(), except a new mbuf is always allocated, and we allow
1227 * the amount of empty space before the data in the new mbuf to be specified
1228 * (in the event that the caller expects to prepend later).
1229 */
1230 struct mbuf *
1231 m_copyup(struct mbuf *n, int len, int dstoff)
1232 {
1233 struct mbuf *m;
1234 int count, space;
1235
1236 KASSERT(len != M_COPYALL);
1237 if (len > ((int)MHLEN - dstoff))
1238 goto bad;
1239 m = m_get(M_DONTWAIT, n->m_type);
1240 if (m == NULL)
1241 goto bad;
1242 MCLAIM(m, n->m_owner);
1243 if (n->m_flags & M_PKTHDR) {
1244 m_move_pkthdr(m, n);
1245 }
1246 m->m_data += dstoff;
1247 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1248 do {
1249 count = uimin(uimin(uimax(len, max_protohdr), space), n->m_len);
1250 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
1251 (unsigned)count);
1252 len -= count;
1253 m->m_len += count;
1254 n->m_len -= count;
1255 space -= count;
1256 if (n->m_len)
1257 n->m_data += count;
1258 else
1259 n = m_free(n);
1260 } while (len > 0 && n);
1261 if (len > 0) {
1262 (void) m_free(m);
1263 goto bad;
1264 }
1265 m->m_next = n;
1266 return m;
1267 bad:
1268 m_freem(n);
1269 return NULL;
1270 }
1271
1272 struct mbuf *
1273 m_split(struct mbuf *m0, int len, int wait)
1274 {
1275 return m_split_internal(m0, len, wait, true);
1276 }
1277
1278 static struct mbuf *
1279 m_split_internal(struct mbuf *m0, int len0, int wait, bool copyhdr)
1280 {
1281 struct mbuf *m, *n;
1282 unsigned len = len0, remain, len_save;
1283
1284 KASSERT(len0 != M_COPYALL);
1285 for (m = m0; m && len > m->m_len; m = m->m_next)
1286 len -= m->m_len;
1287 if (m == NULL)
1288 return NULL;
1289
1290 remain = m->m_len - len;
1291 if (copyhdr && (m0->m_flags & M_PKTHDR)) {
1292 n = m_gethdr(wait, m0->m_type);
1293 if (n == NULL)
1294 return NULL;
1295
1296 MCLAIM(n, m0->m_owner);
1297 m_copy_rcvif(n, m0);
1298 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
1299 len_save = m0->m_pkthdr.len;
1300 m0->m_pkthdr.len = len0;
1301
1302 if (m->m_flags & M_EXT)
1303 goto extpacket;
1304
1305 if (remain > MHLEN) {
1306 /* m can't be the lead packet */
1307 m_align(n, 0);
1308 n->m_len = 0;
1309 n->m_next = m_split(m, len, wait);
1310 if (n->m_next == NULL) {
1311 (void)m_free(n);
1312 m0->m_pkthdr.len = len_save;
1313 return NULL;
1314 }
1315 return n;
1316 } else {
1317 m_align(n, remain);
1318 }
1319 } else if (remain == 0) {
1320 n = m->m_next;
1321 m->m_next = NULL;
1322 return n;
1323 } else {
1324 n = m_get(wait, m->m_type);
1325 if (n == NULL)
1326 return NULL;
1327 MCLAIM(n, m->m_owner);
1328 m_align(n, remain);
1329 }
1330
1331 extpacket:
1332 if (m->m_flags & M_EXT) {
1333 n->m_data = m->m_data + len;
1334 MCLADDREFERENCE(m, n);
1335 } else {
1336 memcpy(mtod(n, void *), mtod(m, char *) + len, remain);
1337 }
1338
1339 n->m_len = remain;
1340 m->m_len = len;
1341 n->m_next = m->m_next;
1342 m->m_next = NULL;
1343 return n;
1344 }
1345
1346 /*
1347 * Routine to copy from device local memory into mbufs.
1348 */
1349 struct mbuf *
1350 m_devget(char *buf, int totlen, int off, struct ifnet *ifp)
1351 {
1352 struct mbuf *m;
1353 struct mbuf *top = NULL, **mp = ⊤
1354 char *cp, *epkt;
1355 int len;
1356
1357 cp = buf;
1358 epkt = cp + totlen;
1359 if (off) {
1360 /*
1361 * If 'off' is non-zero, packet is trailer-encapsulated,
1362 * so we have to skip the type and length fields.
1363 */
1364 cp += off + 2 * sizeof(uint16_t);
1365 totlen -= 2 * sizeof(uint16_t);
1366 }
1367
1368 m = m_gethdr(M_DONTWAIT, MT_DATA);
1369 if (m == NULL)
1370 return NULL;
1371 m_set_rcvif(m, ifp);
1372 m->m_pkthdr.len = totlen;
1373 m->m_len = MHLEN;
1374
1375 while (totlen > 0) {
1376 if (top) {
1377 m = m_get(M_DONTWAIT, MT_DATA);
1378 if (m == NULL) {
1379 m_freem(top);
1380 return NULL;
1381 }
1382 m->m_len = MLEN;
1383 }
1384
1385 len = uimin(totlen, epkt - cp);
1386
1387 if (len >= MINCLSIZE) {
1388 MCLGET(m, M_DONTWAIT);
1389 if ((m->m_flags & M_EXT) == 0) {
1390 m_free(m);
1391 m_freem(top);
1392 return NULL;
1393 }
1394 m->m_len = len = uimin(len, MCLBYTES);
1395 } else {
1396 /*
1397 * Place initial small packet/header at end of mbuf.
1398 */
1399 if (len < m->m_len) {
1400 if (top == 0 && len + max_linkhdr <= m->m_len)
1401 m->m_data += max_linkhdr;
1402 m->m_len = len;
1403 } else
1404 len = m->m_len;
1405 }
1406
1407 memcpy(mtod(m, void *), cp, (size_t)len);
1408
1409 cp += len;
1410 *mp = m;
1411 mp = &m->m_next;
1412 totlen -= len;
1413 if (cp == epkt)
1414 cp = buf;
1415 }
1416
1417 return top;
1418 }
1419
1420 /*
1421 * Copy data from a buffer back into the indicated mbuf chain,
1422 * starting "off" bytes from the beginning, extending the mbuf
1423 * chain if necessary.
1424 */
1425 void
1426 m_copyback(struct mbuf *m0, int off, int len, const void *cp)
1427 {
1428 #if defined(DEBUG)
1429 struct mbuf *origm = m0;
1430 int error;
1431 #endif
1432
1433 if (m0 == NULL)
1434 return;
1435
1436 #if defined(DEBUG)
1437 error =
1438 #endif
1439 m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_EXTEND,
1440 M_DONTWAIT);
1441
1442 #if defined(DEBUG)
1443 if (error != 0 || (m0 != NULL && origm != m0))
1444 panic("m_copyback");
1445 #endif
1446 }
1447
1448 struct mbuf *
1449 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how)
1450 {
1451 int error;
1452
1453 /* don't support chain expansion */
1454 KASSERT(len != M_COPYALL);
1455 KDASSERT(off + len <= m_length(m0));
1456
1457 error = m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_COW,
1458 how);
1459 if (error) {
1460 /*
1461 * no way to recover from partial success.
1462 * just free the chain.
1463 */
1464 m_freem(m0);
1465 return NULL;
1466 }
1467 return m0;
1468 }
1469
1470 int
1471 m_makewritable(struct mbuf **mp, int off, int len, int how)
1472 {
1473 int error;
1474 #if defined(DEBUG)
1475 int origlen = m_length(*mp);
1476 #endif
1477
1478 error = m_copyback_internal(mp, off, len, NULL, CB_PRESERVE|CB_COW,
1479 how);
1480 if (error)
1481 return error;
1482
1483 #if defined(DEBUG)
1484 int reslen = 0;
1485 for (struct mbuf *n = *mp; n; n = n->m_next)
1486 reslen += n->m_len;
1487 if (origlen != reslen)
1488 panic("m_makewritable: length changed");
1489 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len)
1490 panic("m_makewritable: inconsist");
1491 #endif
1492
1493 return 0;
1494 }
1495
1496 static int
1497 m_copyback_internal(struct mbuf **mp0, int off, int len, const void *vp,
1498 int flags, int how)
1499 {
1500 int mlen;
1501 struct mbuf *m, *n;
1502 struct mbuf **mp;
1503 int totlen = 0;
1504 const char *cp = vp;
1505
1506 KASSERT(mp0 != NULL);
1507 KASSERT(*mp0 != NULL);
1508 KASSERT((flags & CB_PRESERVE) == 0 || cp == NULL);
1509 KASSERT((flags & CB_COPYBACK) == 0 || cp != NULL);
1510
1511 if (len == M_COPYALL)
1512 len = m_length(*mp0) - off;
1513
1514 /*
1515 * we don't bother to update "totlen" in the case of CB_COW,
1516 * assuming that CB_EXTEND and CB_COW are exclusive.
1517 */
1518
1519 KASSERT((~flags & (CB_EXTEND|CB_COW)) != 0);
1520
1521 mp = mp0;
1522 m = *mp;
1523 while (off > (mlen = m->m_len)) {
1524 off -= mlen;
1525 totlen += mlen;
1526 if (m->m_next == NULL) {
1527 int tspace;
1528 extend:
1529 if ((flags & CB_EXTEND) == 0)
1530 goto out;
1531
1532 /*
1533 * try to make some space at the end of "m".
1534 */
1535
1536 mlen = m->m_len;
1537 if (off + len >= MINCLSIZE &&
1538 (m->m_flags & M_EXT) == 0 && m->m_len == 0) {
1539 MCLGET(m, how);
1540 }
1541 tspace = M_TRAILINGSPACE(m);
1542 if (tspace > 0) {
1543 tspace = uimin(tspace, off + len);
1544 KASSERT(tspace > 0);
1545 memset(mtod(m, char *) + m->m_len, 0,
1546 uimin(off, tspace));
1547 m->m_len += tspace;
1548 off += mlen;
1549 totlen -= mlen;
1550 continue;
1551 }
1552
1553 /*
1554 * need to allocate an mbuf.
1555 */
1556
1557 if (off + len >= MINCLSIZE) {
1558 n = m_getcl(how, m->m_type, 0);
1559 } else {
1560 n = m_get(how, m->m_type);
1561 }
1562 if (n == NULL) {
1563 goto out;
1564 }
1565 n->m_len = uimin(M_TRAILINGSPACE(n), off + len);
1566 memset(mtod(n, char *), 0, uimin(n->m_len, off));
1567 m->m_next = n;
1568 }
1569 mp = &m->m_next;
1570 m = m->m_next;
1571 }
1572 while (len > 0) {
1573 mlen = m->m_len - off;
1574 if (mlen != 0 && M_READONLY(m)) {
1575 /*
1576 * This mbuf is read-only. Allocate a new writable
1577 * mbuf and try again.
1578 */
1579 char *datap;
1580 int eatlen;
1581
1582 KASSERT((flags & CB_COW) != 0);
1583
1584 /*
1585 * if we're going to write into the middle of
1586 * a mbuf, split it first.
1587 */
1588 if (off > 0) {
1589 n = m_split_internal(m, off, how, false);
1590 if (n == NULL)
1591 goto enobufs;
1592 m->m_next = n;
1593 mp = &m->m_next;
1594 m = n;
1595 off = 0;
1596 continue;
1597 }
1598
1599 /*
1600 * XXX TODO coalesce into the trailingspace of
1601 * the previous mbuf when possible.
1602 */
1603
1604 /*
1605 * allocate a new mbuf. copy packet header if needed.
1606 */
1607 n = m_get(how, m->m_type);
1608 if (n == NULL)
1609 goto enobufs;
1610 MCLAIM(n, m->m_owner);
1611 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) {
1612 m_move_pkthdr(n, m);
1613 n->m_len = MHLEN;
1614 } else {
1615 if (len >= MINCLSIZE)
1616 MCLGET(n, M_DONTWAIT);
1617 n->m_len =
1618 (n->m_flags & M_EXT) ? MCLBYTES : MLEN;
1619 }
1620 if (n->m_len > len)
1621 n->m_len = len;
1622
1623 /*
1624 * free the region which has been overwritten.
1625 * copying data from old mbufs if requested.
1626 */
1627 if (flags & CB_PRESERVE)
1628 datap = mtod(n, char *);
1629 else
1630 datap = NULL;
1631 eatlen = n->m_len;
1632 while (m != NULL && M_READONLY(m) &&
1633 n->m_type == m->m_type && eatlen > 0) {
1634 mlen = uimin(eatlen, m->m_len);
1635 if (datap) {
1636 m_copydata(m, 0, mlen, datap);
1637 datap += mlen;
1638 }
1639 m->m_data += mlen;
1640 m->m_len -= mlen;
1641 eatlen -= mlen;
1642 if (m->m_len == 0)
1643 *mp = m = m_free(m);
1644 }
1645 if (eatlen > 0)
1646 n->m_len -= eatlen;
1647 n->m_next = m;
1648 *mp = m = n;
1649 continue;
1650 }
1651 mlen = uimin(mlen, len);
1652 if (flags & CB_COPYBACK) {
1653 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen);
1654 cp += mlen;
1655 }
1656 len -= mlen;
1657 mlen += off;
1658 off = 0;
1659 totlen += mlen;
1660 if (len == 0)
1661 break;
1662 if (m->m_next == NULL) {
1663 goto extend;
1664 }
1665 mp = &m->m_next;
1666 m = m->m_next;
1667 }
1668
1669 out:
1670 if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) {
1671 KASSERT((flags & CB_EXTEND) != 0);
1672 m->m_pkthdr.len = totlen;
1673 }
1674
1675 return 0;
1676
1677 enobufs:
1678 return ENOBUFS;
1679 }
1680
1681 /*
1682 * Compress the mbuf chain. Return the new mbuf chain on success, NULL on
1683 * failure. The first mbuf is preserved, and on success the pointer returned
1684 * is the same as the one passed.
1685 */
1686 struct mbuf *
1687 m_defrag(struct mbuf *m, int how)
1688 {
1689 struct mbuf *m0, *mn, *n;
1690 int sz;
1691
1692 KASSERT((m->m_flags & M_PKTHDR) != 0);
1693
1694 if (m->m_next == NULL)
1695 return m;
1696
1697 /* Defrag to single mbuf if at all possible */
1698 if ((m->m_flags & M_EXT) == 0 && m->m_pkthdr.len <= MCLBYTES) {
1699 if (m->m_pkthdr.len <= MHLEN) {
1700 if (M_TRAILINGSPACE(m) < (m->m_pkthdr.len - m->m_len)) {
1701 KASSERTMSG(M_LEADINGSPACE(m) +
1702 M_TRAILINGSPACE(m) >=
1703 (m->m_pkthdr.len - m->m_len),
1704 "too small leading %d trailing %d ro? %d"
1705 " pkthdr.len %d mlen %d",
1706 (int)M_LEADINGSPACE(m),
1707 (int)M_TRAILINGSPACE(m),
1708 M_READONLY(m),
1709 m->m_pkthdr.len, m->m_len);
1710
1711 memmove(m->m_pktdat, m->m_data, m->m_len);
1712 m->m_data = m->m_pktdat;
1713
1714 KASSERT(M_TRAILINGSPACE(m) >=
1715 (m->m_pkthdr.len - m->m_len));
1716 }
1717 } else {
1718 /* Must copy data before adding cluster */
1719 m0 = m_get(how, MT_DATA);
1720 if (m0 == NULL)
1721 return NULL;
1722 KASSERTMSG(m->m_len <= MHLEN,
1723 "m=%p m->m_len=%d MHLEN=%u",
1724 m, m->m_len, (unsigned)MHLEN);
1725 m_copydata(m, 0, m->m_len, mtod(m0, void *));
1726
1727 MCLGET(m, how);
1728 if ((m->m_flags & M_EXT) == 0) {
1729 m_free(m0);
1730 return NULL;
1731 }
1732 memcpy(m->m_data, mtod(m0, void *), m->m_len);
1733 m_free(m0);
1734 }
1735 KASSERTMSG(M_TRAILINGSPACE(m) >= (m->m_pkthdr.len - m->m_len),
1736 "m=%p M_TRAILINGSPACE(m)=%zd m->m_pkthdr.len=%d"
1737 " m->m_len=%d",
1738 m, M_TRAILINGSPACE(m), m->m_pkthdr.len, m->m_len);
1739 m_copydata(m->m_next, 0, m->m_pkthdr.len - m->m_len,
1740 mtod(m, char *) + m->m_len);
1741 m->m_len = m->m_pkthdr.len;
1742 m_freem(m->m_next);
1743 m->m_next = NULL;
1744 return m;
1745 }
1746
1747 m0 = m_get(how, MT_DATA);
1748 if (m0 == NULL)
1749 return NULL;
1750 mn = m0;
1751
1752 sz = m->m_pkthdr.len - m->m_len;
1753 KASSERT(sz >= 0);
1754
1755 do {
1756 if (sz > MLEN) {
1757 MCLGET(mn, how);
1758 if ((mn->m_flags & M_EXT) == 0) {
1759 m_freem(m0);
1760 return NULL;
1761 }
1762 }
1763
1764 mn->m_len = MIN(sz, MCLBYTES);
1765
1766 m_copydata(m, m->m_pkthdr.len - sz, mn->m_len,
1767 mtod(mn, void *));
1768
1769 sz -= mn->m_len;
1770
1771 if (sz > 0) {
1772 /* need more mbufs */
1773 n = m_get(how, MT_DATA);
1774 if (n == NULL) {
1775 m_freem(m0);
1776 return NULL;
1777 }
1778
1779 mn->m_next = n;
1780 mn = n;
1781 }
1782 } while (sz > 0);
1783
1784 m_freem(m->m_next);
1785 m->m_next = m0;
1786
1787 return m;
1788 }
1789
1790 void
1791 m_remove_pkthdr(struct mbuf *m)
1792 {
1793 KASSERT(m->m_flags & M_PKTHDR);
1794
1795 m_tag_delete_chain(m);
1796 m->m_flags &= ~M_PKTHDR;
1797 memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
1798 }
1799
1800 void
1801 m_copy_pkthdr(struct mbuf *to, struct mbuf *from)
1802 {
1803 KASSERT((to->m_flags & M_EXT) == 0);
1804 KASSERT((to->m_flags & M_PKTHDR) == 0 ||
1805 SLIST_FIRST(&to->m_pkthdr.tags) == NULL);
1806 KASSERT((from->m_flags & M_PKTHDR) != 0);
1807
1808 to->m_pkthdr = from->m_pkthdr;
1809 to->m_flags = from->m_flags & M_COPYFLAGS;
1810 to->m_data = to->m_pktdat;
1811
1812 SLIST_INIT(&to->m_pkthdr.tags);
1813 m_tag_copy_chain(to, from);
1814 }
1815
1816 void
1817 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
1818 {
1819 KASSERT((to->m_flags & M_EXT) == 0);
1820 KASSERT((to->m_flags & M_PKTHDR) == 0 ||
1821 SLIST_FIRST(&to->m_pkthdr.tags) == NULL);
1822 KASSERT((from->m_flags & M_PKTHDR) != 0);
1823
1824 to->m_pkthdr = from->m_pkthdr;
1825 to->m_flags = from->m_flags & M_COPYFLAGS;
1826 to->m_data = to->m_pktdat;
1827
1828 from->m_flags &= ~M_PKTHDR;
1829 }
1830
1831 /*
1832 * Set the m_data pointer of a newly-allocated mbuf to place an object of the
1833 * specified size at the end of the mbuf, longword aligned.
1834 */
1835 void
1836 m_align(struct mbuf *m, int len)
1837 {
1838 int buflen, adjust;
1839
1840 KASSERT(len != M_COPYALL);
1841 KASSERTMSG(M_LEADINGSPACE(m) == 0, "m=%p M_LEADINGSPACE(m)=%zd",
1842 m, M_LEADINGSPACE(m));
1843
1844 buflen = M_BUFSIZE(m);
1845
1846 KASSERTMSG(len <= buflen, "m=%p len=%d buflen=%d", m, len, buflen);
1847 adjust = buflen - len;
1848 m->m_data += adjust &~ (sizeof(long)-1);
1849 }
1850
1851 /*
1852 * Apply function f to the data in an mbuf chain starting "off" bytes from the
1853 * beginning, continuing for "len" bytes.
1854 */
1855 int
1856 m_apply(struct mbuf *m, int off, int len,
1857 int (*f)(void *, void *, unsigned int), void *arg)
1858 {
1859 unsigned int count;
1860 int rval;
1861
1862 KASSERT(len != M_COPYALL);
1863 KASSERT(len >= 0);
1864 KASSERT(off >= 0);
1865
1866 while (off > 0) {
1867 KASSERT(m != NULL);
1868 if (off < m->m_len)
1869 break;
1870 off -= m->m_len;
1871 m = m->m_next;
1872 }
1873 while (len > 0) {
1874 KASSERT(m != NULL);
1875 count = uimin(m->m_len - off, len);
1876
1877 rval = (*f)(arg, mtod(m, char *) + off, count);
1878 if (rval)
1879 return rval;
1880
1881 len -= count;
1882 off = 0;
1883 m = m->m_next;
1884 }
1885
1886 return 0;
1887 }
1888
1889 /*
1890 * Return a pointer to mbuf/offset of location in mbuf chain.
1891 */
1892 struct mbuf *
1893 m_getptr(struct mbuf *m, int loc, int *off)
1894 {
1895
1896 while (loc >= 0) {
1897 /* Normal end of search */
1898 if (m->m_len > loc) {
1899 *off = loc;
1900 return m;
1901 }
1902
1903 loc -= m->m_len;
1904
1905 if (m->m_next == NULL) {
1906 if (loc == 0) {
1907 /* Point at the end of valid data */
1908 *off = m->m_len;
1909 return m;
1910 }
1911 return NULL;
1912 } else {
1913 m = m->m_next;
1914 }
1915 }
1916
1917 return NULL;
1918 }
1919
1920 /*
1921 * Release a reference to the mbuf external storage.
1922 *
1923 * => free the mbuf m itself as well.
1924 */
1925 static void
1926 m_ext_free(struct mbuf *m)
1927 {
1928 const bool embedded = MEXT_ISEMBEDDED(m);
1929 bool dofree = true;
1930 u_int refcnt;
1931
1932 KASSERT((m->m_flags & M_EXT) != 0);
1933 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref));
1934 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0);
1935 KASSERT((m->m_flags & M_EXT_CLUSTER) ==
1936 (m->m_ext_ref->m_flags & M_EXT_CLUSTER));
1937
1938 if (__predict_false(m->m_type == MT_FREE)) {
1939 panic("mbuf %p already freed", m);
1940 }
1941
1942 if (__predict_true(m->m_ext.ext_refcnt == 1)) {
1943 refcnt = m->m_ext.ext_refcnt = 0;
1944 } else {
1945 membar_release();
1946 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt);
1947 }
1948
1949 if (refcnt > 0) {
1950 if (embedded) {
1951 /*
1952 * other mbuf's m_ext_ref still points to us.
1953 */
1954 dofree = false;
1955 } else {
1956 m->m_ext_ref = m;
1957 }
1958 } else {
1959 /*
1960 * dropping the last reference
1961 */
1962 membar_acquire();
1963 if (!embedded) {
1964 m->m_ext.ext_refcnt++; /* XXX */
1965 m_ext_free(m->m_ext_ref);
1966 m->m_ext_ref = m;
1967 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) {
1968 pool_cache_put_paddr(mcl_cache,
1969 m->m_ext.ext_buf, m->m_ext.ext_paddr);
1970 } else if (m->m_ext.ext_free) {
1971 (*m->m_ext.ext_free)(m,
1972 m->m_ext.ext_buf, m->m_ext.ext_size,
1973 m->m_ext.ext_arg);
1974 /*
1975 * 'm' is already freed by the ext_free callback.
1976 */
1977 dofree = false;
1978 } else {
1979 free(m->m_ext.ext_buf, 0);
1980 }
1981 }
1982
1983 if (dofree) {
1984 m->m_type = MT_FREE;
1985 m->m_data = NULL;
1986 pool_cache_put(mb_cache, m);
1987 }
1988 }
1989
1990 /*
1991 * Free a single mbuf and associated external storage. Return the
1992 * successor, if any.
1993 */
1994 struct mbuf *
1995 m_free(struct mbuf *m)
1996 {
1997 struct mbuf *n;
1998
1999 mowner_revoke(m, 1, m->m_flags);
2000 mbstat_type_add(m->m_type, -1);
2001
2002 if (m->m_flags & M_PKTHDR)
2003 m_tag_delete_chain(m);
2004
2005 n = m->m_next;
2006
2007 if (m->m_flags & M_EXT) {
2008 m_ext_free(m);
2009 } else {
2010 if (__predict_false(m->m_type == MT_FREE)) {
2011 panic("mbuf %p already freed", m);
2012 }
2013 m->m_type = MT_FREE;
2014 m->m_data = NULL;
2015 pool_cache_put(mb_cache, m);
2016 }
2017
2018 return n;
2019 }
2020
2021 void
2022 m_freem(struct mbuf *m)
2023 {
2024 if (m == NULL)
2025 return;
2026 do {
2027 m = m_free(m);
2028 } while (m);
2029 }
2030
2031 #if defined(DDB)
2032 void
2033 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...))
2034 {
2035 char ch;
2036 bool opt_c = false;
2037 bool opt_d = false;
2038 #if NETHER > 0
2039 bool opt_v = false;
2040 const struct mbuf *m0 = NULL;
2041 #endif
2042 int no = 0;
2043 char buf[512];
2044
2045 while ((ch = *(modif++)) != '\0') {
2046 switch (ch) {
2047 case 'c':
2048 opt_c = true;
2049 break;
2050 case 'd':
2051 opt_d = true;
2052 break;
2053 #if NETHER > 0
2054 case 'v':
2055 opt_v = true;
2056 m0 = m;
2057 break;
2058 #endif
2059 default:
2060 break;
2061 }
2062 }
2063
2064 nextchain:
2065 (*pr)("MBUF(%d) %p\n", no, m);
2066 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags);
2067 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n",
2068 m->m_data, m->m_len, m->m_type, buf);
2069 if (opt_d) {
2070 int i;
2071 unsigned char *p = m->m_data;
2072
2073 (*pr)(" data:");
2074
2075 for (i = 0; i < m->m_len; i++) {
2076 if (i % 16 == 0)
2077 (*pr)("\n");
2078 (*pr)(" %02x", p[i]);
2079 }
2080
2081 (*pr)("\n");
2082 }
2083 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next,
2084 m->m_nextpkt);
2085 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n",
2086 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m),
2087 (int)M_READONLY(m));
2088 if ((m->m_flags & M_PKTHDR) != 0) {
2089 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags);
2090 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=%s, csum_data=0x%"
2091 PRIx32 ", segsz=%u\n",
2092 m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m),
2093 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz);
2094 }
2095 if ((m->m_flags & M_EXT)) {
2096 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, "
2097 "ext_free=%p, ext_arg=%p\n",
2098 m->m_ext.ext_refcnt,
2099 m->m_ext.ext_buf, m->m_ext.ext_size,
2100 m->m_ext.ext_free, m->m_ext.ext_arg);
2101 }
2102 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) {
2103 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf;
2104 vaddr_t eva = sva + m->m_ext.ext_size;
2105 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT;
2106 int i;
2107
2108 (*pr)(" pages:");
2109 for (i = 0; i < n; i ++) {
2110 (*pr)(" %p", m->m_ext.ext_pgs[i]);
2111 }
2112 (*pr)("\n");
2113 }
2114
2115 if (opt_c) {
2116 m = m->m_next;
2117 if (m != NULL) {
2118 no++;
2119 goto nextchain;
2120 }
2121 }
2122
2123 #if NETHER > 0
2124 if (opt_v && m0)
2125 m_examine(m0, AF_ETHER, modif, pr);
2126 #endif
2127 }
2128 #endif /* defined(DDB) */
2129
2130 #if defined(MBUFTRACE)
2131 void
2132 mowner_init_owner(struct mowner *mo, const char *name, const char *descr)
2133 {
2134 memset(mo, 0, sizeof(*mo));
2135 strlcpy(mo->mo_name, name, sizeof(mo->mo_name));
2136 strlcpy(mo->mo_descr, descr, sizeof(mo->mo_descr));
2137 }
2138
2139 void
2140 mowner_attach(struct mowner *mo)
2141 {
2142
2143 KASSERT(mo->mo_counters == NULL);
2144 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter));
2145
2146 /* XXX lock */
2147 LIST_INSERT_HEAD(&mowners, mo, mo_link);
2148 }
2149
2150 void
2151 mowner_detach(struct mowner *mo)
2152 {
2153
2154 KASSERT(mo->mo_counters != NULL);
2155
2156 /* XXX lock */
2157 LIST_REMOVE(mo, mo_link);
2158
2159 percpu_free(mo->mo_counters, sizeof(struct mowner_counter));
2160 mo->mo_counters = NULL;
2161 }
2162
2163 void
2164 mowner_init(struct mbuf *m, int type)
2165 {
2166 struct mowner_counter *mc;
2167 struct mowner *mo;
2168 int s;
2169
2170 m->m_owner = mo = &unknown_mowners[type];
2171 s = splvm();
2172 mc = percpu_getref(mo->mo_counters);
2173 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++;
2174 percpu_putref(mo->mo_counters);
2175 splx(s);
2176 }
2177
2178 void
2179 mowner_ref(struct mbuf *m, int flags)
2180 {
2181 struct mowner *mo = m->m_owner;
2182 struct mowner_counter *mc;
2183 int s;
2184
2185 s = splvm();
2186 mc = percpu_getref(mo->mo_counters);
2187 if ((flags & M_EXT) != 0)
2188 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++;
2189 if ((flags & M_EXT_CLUSTER) != 0)
2190 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++;
2191 percpu_putref(mo->mo_counters);
2192 splx(s);
2193 }
2194
2195 void
2196 mowner_revoke(struct mbuf *m, bool all, int flags)
2197 {
2198 struct mowner *mo = m->m_owner;
2199 struct mowner_counter *mc;
2200 int s;
2201
2202 s = splvm();
2203 mc = percpu_getref(mo->mo_counters);
2204 if ((flags & M_EXT) != 0)
2205 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++;
2206 if ((flags & M_EXT_CLUSTER) != 0)
2207 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++;
2208 if (all)
2209 mc->mc_counter[MOWNER_COUNTER_RELEASES]++;
2210 percpu_putref(mo->mo_counters);
2211 splx(s);
2212 if (all)
2213 m->m_owner = &revoked_mowner;
2214 }
2215
2216 static void
2217 mowner_claim(struct mbuf *m, struct mowner *mo)
2218 {
2219 struct mowner_counter *mc;
2220 int flags = m->m_flags;
2221 int s;
2222
2223 s = splvm();
2224 mc = percpu_getref(mo->mo_counters);
2225 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++;
2226 if ((flags & M_EXT) != 0)
2227 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++;
2228 if ((flags & M_EXT_CLUSTER) != 0)
2229 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++;
2230 percpu_putref(mo->mo_counters);
2231 splx(s);
2232 m->m_owner = mo;
2233 }
2234
2235 void
2236 m_claim(struct mbuf *m, struct mowner *mo)
2237 {
2238
2239 if (m->m_owner == mo || mo == NULL)
2240 return;
2241
2242 mowner_revoke(m, true, m->m_flags);
2243 mowner_claim(m, mo);
2244 }
2245
2246 void
2247 m_claimm(struct mbuf *m, struct mowner *mo)
2248 {
2249
2250 for (; m != NULL; m = m->m_next)
2251 m_claim(m, mo);
2252 }
2253 #endif /* defined(MBUFTRACE) */
2254
2255 #ifdef DIAGNOSTIC
2256 /*
2257 * Verify that the mbuf chain is not malformed. Used only for diagnostic.
2258 * Panics on error.
2259 */
2260 void
2261 m_verify_packet(struct mbuf *m)
2262 {
2263 struct mbuf *n = m;
2264 char *low, *high, *dat;
2265 int totlen = 0, len;
2266
2267 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) {
2268 panic("%s: mbuf doesn't have M_PKTHDR", __func__);
2269 }
2270
2271 while (n != NULL) {
2272 if (__predict_false(n->m_type == MT_FREE)) {
2273 panic("%s: mbuf already freed (n = %p)", __func__, n);
2274 }
2275 #if 0
2276 /*
2277 * This ought to be a rule of the mbuf API. Unfortunately,
2278 * many places don't respect that rule.
2279 */
2280 if (__predict_false((n != m) && (n->m_flags & M_PKTHDR) != 0)) {
2281 panic("%s: M_PKTHDR set on secondary mbuf", __func__);
2282 }
2283 #endif
2284 if (__predict_false(n->m_nextpkt != NULL)) {
2285 panic("%s: m_nextpkt not null (m_nextpkt = %p)",
2286 __func__, n->m_nextpkt);
2287 }
2288
2289 dat = n->m_data;
2290 len = n->m_len;
2291 if (__predict_false(len < 0)) {
2292 panic("%s: incorrect length (len = %d)", __func__, len);
2293 }
2294
2295 low = M_BUFADDR(n);
2296 high = low + M_BUFSIZE(n);
2297 if (__predict_false((dat < low) || (dat + len > high))) {
2298 panic("%s: m_data not in packet"
2299 "(dat = %p, len = %d, low = %p, high = %p)",
2300 __func__, dat, len, low, high);
2301 }
2302
2303 totlen += len;
2304 n = n->m_next;
2305 }
2306
2307 if (__predict_false(totlen != m->m_pkthdr.len)) {
2308 panic("%s: inconsistent mbuf length (%d != %d)", __func__,
2309 totlen, m->m_pkthdr.len);
2310 }
2311 }
2312 #endif
2313
2314 struct m_tag *
2315 m_tag_get(int type, int len, int wait)
2316 {
2317 struct m_tag *t;
2318
2319 if (len < 0)
2320 return NULL;
2321 t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait);
2322 if (t == NULL)
2323 return NULL;
2324 t->m_tag_id = type;
2325 t->m_tag_len = len;
2326 return t;
2327 }
2328
2329 void
2330 m_tag_free(struct m_tag *t)
2331 {
2332 free(t, M_PACKET_TAGS);
2333 }
2334
2335 void
2336 m_tag_prepend(struct mbuf *m, struct m_tag *t)
2337 {
2338 KASSERT((m->m_flags & M_PKTHDR) != 0);
2339 SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link);
2340 }
2341
2342 void
2343 m_tag_unlink(struct mbuf *m, struct m_tag *t)
2344 {
2345 KASSERT((m->m_flags & M_PKTHDR) != 0);
2346 SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link);
2347 }
2348
2349 void
2350 m_tag_delete(struct mbuf *m, struct m_tag *t)
2351 {
2352 m_tag_unlink(m, t);
2353 m_tag_free(t);
2354 }
2355
2356 void
2357 m_tag_delete_chain(struct mbuf *m)
2358 {
2359 struct m_tag *p, *q;
2360
2361 KASSERT((m->m_flags & M_PKTHDR) != 0);
2362
2363 p = SLIST_FIRST(&m->m_pkthdr.tags);
2364 if (p == NULL)
2365 return;
2366 while ((q = SLIST_NEXT(p, m_tag_link)) != NULL)
2367 m_tag_delete(m, q);
2368 m_tag_delete(m, p);
2369 }
2370
2371 struct m_tag *
2372 m_tag_find(const struct mbuf *m, int type)
2373 {
2374 struct m_tag *p;
2375
2376 KASSERT((m->m_flags & M_PKTHDR) != 0);
2377
2378 p = SLIST_FIRST(&m->m_pkthdr.tags);
2379 while (p != NULL) {
2380 if (p->m_tag_id == type)
2381 return p;
2382 p = SLIST_NEXT(p, m_tag_link);
2383 }
2384 return NULL;
2385 }
2386
2387 struct m_tag *
2388 m_tag_copy(struct m_tag *t)
2389 {
2390 struct m_tag *p;
2391
2392 p = m_tag_get(t->m_tag_id, t->m_tag_len, M_NOWAIT);
2393 if (p == NULL)
2394 return NULL;
2395 memcpy(p + 1, t + 1, t->m_tag_len);
2396 return p;
2397 }
2398
2399 /*
2400 * Copy two tag chains. The destination mbuf (to) loses any attached
2401 * tags even if the operation fails. This should not be a problem, as
2402 * m_tag_copy_chain() is typically called with a newly-allocated
2403 * destination mbuf.
2404 */
2405 int
2406 m_tag_copy_chain(struct mbuf *to, struct mbuf *from)
2407 {
2408 struct m_tag *p, *t, *tprev = NULL;
2409
2410 KASSERT((from->m_flags & M_PKTHDR) != 0);
2411
2412 m_tag_delete_chain(to);
2413 SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) {
2414 t = m_tag_copy(p);
2415 if (t == NULL) {
2416 m_tag_delete_chain(to);
2417 return 0;
2418 }
2419 if (tprev == NULL)
2420 SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link);
2421 else
2422 SLIST_INSERT_AFTER(tprev, t, m_tag_link);
2423 tprev = t;
2424 }
2425 return 1;
2426 }
2427