uipc_mbuf.c revision 1.254 1
2 /* $NetBSD: uipc_mbuf.c,v 1.254 2024/12/06 18:44:00 riastradh Exp $ */
3
4 /*
5 * Copyright (c) 1999, 2001, 2018 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10 * NASA Ames Research Center, and Maxime Villard.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /*
35 * Copyright (c) 1982, 1986, 1988, 1991, 1993
36 * The Regents of the University of California. All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95
63 */
64
65 #include <sys/cdefs.h>
66 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.254 2024/12/06 18:44:00 riastradh Exp $");
67
68 #ifdef _KERNEL_OPT
69 #include "ether.h"
70 #include "opt_ddb.h"
71 #include "opt_mbuftrace.h"
72 #include "opt_nmbclusters.h"
73 #endif
74
75 #include <sys/param.h>
76 #include <sys/types.h>
77
78 #include <sys/atomic.h>
79 #include <sys/cpu.h>
80 #include <sys/domain.h>
81 #include <sys/kernel.h>
82 #include <sys/mbuf.h>
83 #include <sys/percpu.h>
84 #include <sys/pool.h>
85 #include <sys/proc.h>
86 #include <sys/protosw.h>
87 #include <sys/sdt.h>
88 #include <sys/socket.h>
89 #include <sys/sysctl.h>
90 #include <sys/syslog.h>
91 #include <sys/systm.h>
92
93 #include <net/if.h>
94
95 pool_cache_t mb_cache; /* mbuf cache */
96 static pool_cache_t mcl_cache; /* mbuf cluster cache */
97
98 struct mbstat mbstat;
99 int max_linkhdr;
100 int max_protohdr;
101 int max_hdr;
102 int max_datalen;
103
104 static void mb_drain(void *, int);
105 static int mb_ctor(void *, void *, int);
106
107 static void sysctl_kern_mbuf_setup(void);
108
109 static struct sysctllog *mbuf_sysctllog;
110
111 static struct mbuf *m_copy_internal(struct mbuf *, int, int, int, bool);
112 static struct mbuf *m_split_internal(struct mbuf *, int, int, bool);
113 static int m_copyback_internal(struct mbuf **, int, int, const void *,
114 int, int);
115
116 /* Flags for m_copyback_internal. */
117 #define CB_COPYBACK 0x0001 /* copyback from cp */
118 #define CB_PRESERVE 0x0002 /* preserve original data */
119 #define CB_COW 0x0004 /* do copy-on-write */
120 #define CB_EXTEND 0x0008 /* extend chain */
121
122 static const char mclpool_warnmsg[] =
123 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters";
124
125 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
126
127 static percpu_t *mbstat_percpu;
128
129 #ifdef MBUFTRACE
130 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners);
131 struct mowner unknown_mowners[] = {
132 MOWNER_INIT("unknown", "free"),
133 MOWNER_INIT("unknown", "data"),
134 MOWNER_INIT("unknown", "header"),
135 MOWNER_INIT("unknown", "soname"),
136 MOWNER_INIT("unknown", "soopts"),
137 MOWNER_INIT("unknown", "ftable"),
138 MOWNER_INIT("unknown", "control"),
139 MOWNER_INIT("unknown", "oobdata"),
140 };
141 struct mowner revoked_mowner = MOWNER_INIT("revoked", "");
142 #endif
143
144 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m))
145
146 #define MCLADDREFERENCE(o, n) \
147 do { \
148 KASSERT(((o)->m_flags & M_EXT) != 0); \
149 KASSERT(((n)->m_flags & M_EXT) == 0); \
150 KASSERT((o)->m_ext.ext_refcnt >= 1); \
151 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \
152 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \
153 (n)->m_ext_ref = (o)->m_ext_ref; \
154 mowner_ref((n), (n)->m_flags); \
155 } while (/* CONSTCOND */ 0)
156
157 static int
158 nmbclusters_limit(void)
159 {
160 #if defined(PMAP_MAP_POOLPAGE)
161 /* direct mapping, doesn't use space in kmem_arena */
162 vsize_t max_size = physmem / 4;
163 #else
164 vsize_t max_size = MIN(physmem / 4, nkmempages / 4);
165 #endif
166
167 max_size = max_size * PAGE_SIZE / MCLBYTES;
168 #ifdef NMBCLUSTERS_MAX
169 max_size = MIN(max_size, NMBCLUSTERS_MAX);
170 #endif
171
172 return max_size;
173 }
174
175 /*
176 * Initialize the mbuf allocator.
177 */
178 void
179 mbinit(void)
180 {
181
182 CTASSERT(sizeof(struct _m_ext) <= MHLEN);
183 CTASSERT(sizeof(struct mbuf) == MSIZE);
184
185 sysctl_kern_mbuf_setup();
186
187 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl",
188 NULL, IPL_VM, mb_ctor, NULL, NULL);
189 KASSERT(mb_cache != NULL);
190
191 mcl_cache = pool_cache_init(mclbytes, COHERENCY_UNIT, 0, 0, "mclpl",
192 NULL, IPL_VM, NULL, NULL, NULL);
193 KASSERT(mcl_cache != NULL);
194
195 pool_cache_set_drain_hook(mb_cache, mb_drain, NULL);
196 pool_cache_set_drain_hook(mcl_cache, mb_drain, NULL);
197
198 /*
199 * Set an arbitrary default limit on the number of mbuf clusters.
200 */
201 #ifdef NMBCLUSTERS
202 nmbclusters = MIN(NMBCLUSTERS, nmbclusters_limit());
203 #else
204 nmbclusters = MAX(1024,
205 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16);
206 nmbclusters = MIN(nmbclusters, nmbclusters_limit());
207 #endif
208
209 /*
210 * Set the hard limit on the mclpool to the number of
211 * mbuf clusters the kernel is to support. Log the limit
212 * reached message max once a minute.
213 */
214 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60);
215
216 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu));
217
218 /*
219 * Set a low water mark for both mbufs and clusters. This should
220 * help ensure that they can be allocated in a memory starvation
221 * situation. This is important for e.g. diskless systems which
222 * must allocate mbufs in order for the pagedaemon to clean pages.
223 */
224 pool_cache_setlowat(mb_cache, mblowat);
225 pool_cache_setlowat(mcl_cache, mcllowat);
226
227 #ifdef MBUFTRACE
228 {
229 /*
230 * Attach the unknown mowners.
231 */
232 int i;
233 MOWNER_ATTACH(&revoked_mowner);
234 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]);
235 i-- > 0; )
236 MOWNER_ATTACH(&unknown_mowners[i]);
237 }
238 #endif
239 }
240
241 static void
242 mb_drain(void *arg, int flags)
243 {
244 struct domain *dp;
245 const struct protosw *pr;
246 struct ifnet *ifp;
247 int s;
248
249 KERNEL_LOCK(1, NULL);
250 s = splvm();
251 DOMAIN_FOREACH(dp) {
252 for (pr = dp->dom_protosw;
253 pr < dp->dom_protoswNPROTOSW; pr++)
254 if (pr->pr_drain)
255 (*pr->pr_drain)();
256 }
257 /* XXX we cannot use psref in H/W interrupt */
258 if (!cpu_intr_p()) {
259 int bound = curlwp_bind();
260 IFNET_READER_FOREACH(ifp) {
261 struct psref psref;
262
263 if_acquire(ifp, &psref);
264
265 if (ifp->if_drain)
266 (*ifp->if_drain)(ifp);
267
268 if_release(ifp, &psref);
269 }
270 curlwp_bindx(bound);
271 }
272 splx(s);
273 mbstat.m_drain++;
274 KERNEL_UNLOCK_ONE(NULL);
275 }
276
277 /*
278 * sysctl helper routine for the kern.mbuf subtree.
279 * nmbclusters, mblowat and mcllowat need range
280 * checking and pool tweaking after being reset.
281 */
282 static int
283 sysctl_kern_mbuf(SYSCTLFN_ARGS)
284 {
285 int error, newval;
286 struct sysctlnode node;
287
288 node = *rnode;
289 node.sysctl_data = &newval;
290 switch (rnode->sysctl_num) {
291 case MBUF_NMBCLUSTERS:
292 case MBUF_MBLOWAT:
293 case MBUF_MCLLOWAT:
294 newval = *(int*)rnode->sysctl_data;
295 break;
296 case MBUF_NMBCLUSTERS_LIMIT:
297 newval = nmbclusters_limit();
298 break;
299 default:
300 return SET_ERROR(EOPNOTSUPP);
301 }
302
303 error = sysctl_lookup(SYSCTLFN_CALL(&node));
304 if (error || newp == NULL)
305 return error;
306 if (newval < 0)
307 return SET_ERROR(EINVAL);
308
309 switch (node.sysctl_num) {
310 case MBUF_NMBCLUSTERS:
311 if (newval < nmbclusters)
312 return SET_ERROR(EINVAL);
313 if (newval > nmbclusters_limit())
314 return SET_ERROR(EINVAL);
315 nmbclusters = newval;
316 pool_cache_sethardlimit(mcl_cache, nmbclusters,
317 mclpool_warnmsg, 60);
318 break;
319 case MBUF_MBLOWAT:
320 mblowat = newval;
321 pool_cache_setlowat(mb_cache, mblowat);
322 break;
323 case MBUF_MCLLOWAT:
324 mcllowat = newval;
325 pool_cache_setlowat(mcl_cache, mcllowat);
326 break;
327 }
328
329 return 0;
330 }
331
332 #ifdef MBUFTRACE
333 static void
334 mowner_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci)
335 {
336 struct mowner_counter *mc = v1;
337 struct mowner_user *mo_user = v2;
338 int i;
339
340 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) {
341 mo_user->mo_counter[i] += mc->mc_counter[i];
342 }
343 }
344
345 static void
346 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user)
347 {
348
349 memset(mo_user, 0, sizeof(*mo_user));
350 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name));
351 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr));
352 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name));
353 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr));
354 percpu_foreach(mo->mo_counters, mowner_convert_to_user_cb, mo_user);
355 }
356
357 static int
358 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS)
359 {
360 struct mowner *mo;
361 size_t len = 0;
362 int error = 0;
363
364 if (namelen != 0)
365 return SET_ERROR(EINVAL);
366 if (newp != NULL)
367 return SET_ERROR(EPERM);
368
369 LIST_FOREACH(mo, &mowners, mo_link) {
370 struct mowner_user mo_user;
371
372 mowner_convert_to_user(mo, &mo_user);
373
374 if (oldp != NULL) {
375 if (*oldlenp - len < sizeof(mo_user)) {
376 error = SET_ERROR(ENOMEM);
377 break;
378 }
379 error = copyout(&mo_user, (char *)oldp + len,
380 sizeof(mo_user));
381 if (error)
382 break;
383 }
384 len += sizeof(mo_user);
385 }
386
387 if (error == 0)
388 *oldlenp = len;
389
390 return error;
391 }
392 #endif /* MBUFTRACE */
393
394 void
395 mbstat_type_add(int type, int diff)
396 {
397 struct mbstat_cpu *mb;
398 int s;
399
400 s = splvm();
401 mb = percpu_getref(mbstat_percpu);
402 mb->m_mtypes[type] += diff;
403 percpu_putref(mbstat_percpu);
404 splx(s);
405 }
406
407 static void
408 mbstat_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci)
409 {
410 struct mbstat_cpu *mbsc = v1;
411 struct mbstat *mbs = v2;
412 int i;
413
414 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) {
415 mbs->m_mtypes[i] += mbsc->m_mtypes[i];
416 }
417 }
418
419 static void
420 mbstat_convert_to_user(struct mbstat *mbs)
421 {
422
423 memset(mbs, 0, sizeof(*mbs));
424 mbs->m_drain = mbstat.m_drain;
425 percpu_foreach(mbstat_percpu, mbstat_convert_to_user_cb, mbs);
426 }
427
428 static int
429 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS)
430 {
431 struct sysctlnode node;
432 struct mbstat mbs;
433
434 mbstat_convert_to_user(&mbs);
435 node = *rnode;
436 node.sysctl_data = &mbs;
437 node.sysctl_size = sizeof(mbs);
438 return sysctl_lookup(SYSCTLFN_CALL(&node));
439 }
440
441 static void
442 sysctl_kern_mbuf_setup(void)
443 {
444
445 KASSERT(mbuf_sysctllog == NULL);
446 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
447 CTLFLAG_PERMANENT,
448 CTLTYPE_NODE, "mbuf",
449 SYSCTL_DESCR("mbuf control variables"),
450 NULL, 0, NULL, 0,
451 CTL_KERN, KERN_MBUF, CTL_EOL);
452
453 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
454 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
455 CTLTYPE_INT, "msize",
456 SYSCTL_DESCR("mbuf base size"),
457 NULL, msize, NULL, 0,
458 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL);
459 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
460 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
461 CTLTYPE_INT, "mclbytes",
462 SYSCTL_DESCR("mbuf cluster size"),
463 NULL, mclbytes, NULL, 0,
464 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL);
465 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
466 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
467 CTLTYPE_INT, "nmbclusters",
468 SYSCTL_DESCR("Limit on the number of mbuf clusters"),
469 sysctl_kern_mbuf, 0, &nmbclusters, 0,
470 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL);
471 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
472 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
473 CTLTYPE_INT, "mblowat",
474 SYSCTL_DESCR("mbuf low water mark"),
475 sysctl_kern_mbuf, 0, &mblowat, 0,
476 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL);
477 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
478 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
479 CTLTYPE_INT, "mcllowat",
480 SYSCTL_DESCR("mbuf cluster low water mark"),
481 sysctl_kern_mbuf, 0, &mcllowat, 0,
482 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL);
483 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
484 CTLFLAG_PERMANENT,
485 CTLTYPE_STRUCT, "stats",
486 SYSCTL_DESCR("mbuf allocation statistics"),
487 sysctl_kern_mbuf_stats, 0, NULL, 0,
488 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL);
489 #ifdef MBUFTRACE
490 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
491 CTLFLAG_PERMANENT,
492 CTLTYPE_STRUCT, "mowners",
493 SYSCTL_DESCR("Information about mbuf owners"),
494 sysctl_kern_mbuf_mowners, 0, NULL, 0,
495 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL);
496 #endif
497 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
498 CTLFLAG_PERMANENT|CTLFLAG_READONLY,
499 CTLTYPE_INT, "nmbclusters_limit",
500 SYSCTL_DESCR("Limit of nmbclusters"),
501 sysctl_kern_mbuf, 0, NULL, 0,
502 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS_LIMIT, CTL_EOL);
503 }
504
505 static int
506 mb_ctor(void *arg, void *object, int flags)
507 {
508 struct mbuf *m = object;
509
510 #ifdef POOL_VTOPHYS
511 m->m_paddr = POOL_VTOPHYS(m);
512 #else
513 m->m_paddr = M_PADDR_INVALID;
514 #endif
515 return 0;
516 }
517
518 /*
519 * Add mbuf to the end of a chain
520 */
521 struct mbuf *
522 m_add(struct mbuf *c, struct mbuf *m)
523 {
524 struct mbuf *n;
525
526 if (c == NULL)
527 return m;
528
529 for (n = c; n->m_next != NULL; n = n->m_next)
530 continue;
531 n->m_next = m;
532 return c;
533 }
534
535 struct mbuf *
536 m_get(int how, int type)
537 {
538 struct mbuf *m;
539
540 KASSERT(type != MT_FREE);
541
542 m = pool_cache_get(mb_cache,
543 how == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : PR_NOWAIT);
544 if (m == NULL)
545 return NULL;
546 KASSERTMSG(((vaddr_t)m->m_dat & PAGE_MASK) + MLEN <= PAGE_SIZE,
547 "m=%p m->m_dat=%p"
548 " MLEN=%u PAGE_MASK=0x%x PAGE_SIZE=%u",
549 m, m->m_dat,
550 (unsigned)MLEN, (unsigned)PAGE_MASK, (unsigned)PAGE_SIZE);
551
552 mbstat_type_add(type, 1);
553
554 mowner_init(m, type);
555 m->m_ext_ref = m; /* default */
556 m->m_type = type;
557 m->m_len = 0;
558 m->m_next = NULL;
559 m->m_nextpkt = NULL; /* default */
560 m->m_data = m->m_dat;
561 m->m_flags = 0; /* default */
562
563 return m;
564 }
565
566 struct mbuf *
567 m_gethdr(int how, int type)
568 {
569 struct mbuf *m;
570
571 m = m_get(how, type);
572 if (m == NULL)
573 return NULL;
574
575 m->m_data = m->m_pktdat;
576 m->m_flags = M_PKTHDR;
577
578 m_reset_rcvif(m);
579 m->m_pkthdr.len = 0;
580 m->m_pkthdr.csum_flags = 0;
581 m->m_pkthdr.csum_data = 0;
582 m->m_pkthdr.segsz = 0;
583 m->m_pkthdr.ether_vtag = 0;
584 m->m_pkthdr.pkthdr_flags = 0;
585 SLIST_INIT(&m->m_pkthdr.tags);
586
587 m->m_pkthdr.pattr_class = NULL;
588 m->m_pkthdr.pattr_af = AF_UNSPEC;
589 m->m_pkthdr.pattr_hdr = NULL;
590
591 return m;
592 }
593
594 struct mbuf *
595 m_get_n(int how, int type, size_t alignbytes, size_t nbytes)
596 {
597 struct mbuf *m;
598
599 if (alignbytes > MCLBYTES || nbytes > MCLBYTES - alignbytes)
600 return NULL;
601 if ((m = m_get(how, type)) == NULL)
602 return NULL;
603 if (nbytes + alignbytes > MLEN) {
604 m_clget(m, how);
605 if ((m->m_flags & M_EXT) == 0) {
606 m_free(m);
607 return NULL;
608 }
609 }
610 m->m_len = alignbytes + nbytes;
611 m_adj(m, alignbytes);
612
613 return m;
614 }
615
616 struct mbuf *
617 m_gethdr_n(int how, int type, size_t alignbytes, size_t nbytes)
618 {
619 struct mbuf *m;
620
621 if (nbytes > MCLBYTES || nbytes > MCLBYTES - alignbytes)
622 return NULL;
623 if ((m = m_gethdr(how, type)) == NULL)
624 return NULL;
625 if (alignbytes + nbytes > MHLEN) {
626 m_clget(m, how);
627 if ((m->m_flags & M_EXT) == 0) {
628 m_free(m);
629 return NULL;
630 }
631 }
632 m->m_len = m->m_pkthdr.len = alignbytes + nbytes;
633 m_adj(m, alignbytes);
634
635 return m;
636 }
637
638 void
639 m_clget(struct mbuf *m, int how)
640 {
641 m->m_ext_storage.ext_buf = (char *)pool_cache_get_paddr(mcl_cache,
642 how == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : PR_NOWAIT,
643 &m->m_ext_storage.ext_paddr);
644
645 if (m->m_ext_storage.ext_buf == NULL)
646 return;
647
648 KASSERTMSG((((vaddr_t)m->m_ext_storage.ext_buf & PAGE_MASK) + mclbytes
649 <= PAGE_SIZE),
650 "m=%p m->m_ext_storage.ext_buf=%p"
651 " mclbytes=%u PAGE_MASK=0x%x PAGE_SIZE=%u",
652 m, m->m_dat,
653 (unsigned)mclbytes, (unsigned)PAGE_MASK, (unsigned)PAGE_SIZE);
654
655 MCLINITREFERENCE(m);
656 m->m_data = m->m_ext.ext_buf;
657 m->m_flags = (m->m_flags & ~M_EXTCOPYFLAGS) |
658 M_EXT|M_EXT_CLUSTER|M_EXT_RW;
659 m->m_ext.ext_size = MCLBYTES;
660 m->m_ext.ext_free = NULL;
661 m->m_ext.ext_arg = NULL;
662 /* ext_paddr initialized above */
663
664 mowner_ref(m, M_EXT|M_EXT_CLUSTER);
665 }
666
667 struct mbuf *
668 m_getcl(int how, int type, int flags)
669 {
670 struct mbuf *mp;
671
672 if ((flags & M_PKTHDR) != 0)
673 mp = m_gethdr(how, type);
674 else
675 mp = m_get(how, type);
676
677 if (mp == NULL)
678 return NULL;
679
680 MCLGET(mp, how);
681 if ((mp->m_flags & M_EXT) != 0)
682 return mp;
683
684 m_free(mp);
685 return NULL;
686 }
687
688 /*
689 * Utility function for M_PREPEND. Do *NOT* use it directly.
690 */
691 struct mbuf *
692 m_prepend(struct mbuf *m, int len, int how)
693 {
694 struct mbuf *mn;
695
696 if (__predict_false(len > MHLEN)) {
697 panic("%s: len > MHLEN", __func__);
698 }
699
700 KASSERT(len != M_COPYALL);
701 mn = m_get(how, m->m_type);
702 if (mn == NULL) {
703 m_freem(m);
704 return NULL;
705 }
706
707 if (m->m_flags & M_PKTHDR) {
708 m_move_pkthdr(mn, m);
709 } else {
710 MCLAIM(mn, m->m_owner);
711 }
712 mn->m_next = m;
713 m = mn;
714
715 if (m->m_flags & M_PKTHDR) {
716 if (len < MHLEN)
717 m_align(m, len);
718 } else {
719 if (len < MLEN)
720 m_align(m, len);
721 }
722
723 m->m_len = len;
724 return m;
725 }
726
727 struct mbuf *
728 m_copym(struct mbuf *m, int off, int len, int wait)
729 {
730 /* Shallow copy on M_EXT. */
731 return m_copy_internal(m, off, len, wait, false);
732 }
733
734 struct mbuf *
735 m_dup(struct mbuf *m, int off, int len, int wait)
736 {
737 /* Deep copy. */
738 return m_copy_internal(m, off, len, wait, true);
739 }
740
741 static inline int
742 m_copylen(int len, int copylen)
743 {
744 return (len == M_COPYALL) ? copylen : uimin(len, copylen);
745 }
746
747 static struct mbuf *
748 m_copy_internal(struct mbuf *m, int off0, int len, int wait, bool deep)
749 {
750 struct mbuf *m0 __diagused = m;
751 int len0 __diagused = len;
752 struct mbuf *n, **np;
753 int off = off0;
754 struct mbuf *top;
755 int copyhdr = 0;
756
757 if (off < 0 || (len != M_COPYALL && len < 0))
758 panic("%s: off %d, len %d", __func__, off, len);
759 if (off == 0 && m->m_flags & M_PKTHDR)
760 copyhdr = 1;
761 while (off > 0) {
762 if (m == NULL)
763 panic("%s: m == NULL, off %d", __func__, off);
764 if (off < m->m_len)
765 break;
766 off -= m->m_len;
767 m = m->m_next;
768 }
769
770 np = ⊤
771 top = NULL;
772 while (len == M_COPYALL || len > 0) {
773 if (m == NULL) {
774 if (len != M_COPYALL)
775 panic("%s: m == NULL, len %d [!COPYALL]",
776 __func__, len);
777 break;
778 }
779
780 n = m_get(wait, m->m_type);
781 *np = n;
782 if (n == NULL)
783 goto nospace;
784 MCLAIM(n, m->m_owner);
785
786 if (copyhdr) {
787 m_copy_pkthdr(n, m);
788 if (len == M_COPYALL)
789 n->m_pkthdr.len -= off0;
790 else
791 n->m_pkthdr.len = len;
792 copyhdr = 0;
793 }
794 n->m_len = m_copylen(len, m->m_len - off);
795
796 if (m->m_flags & M_EXT) {
797 if (!deep) {
798 n->m_data = m->m_data + off;
799 MCLADDREFERENCE(m, n);
800 } else {
801 /*
802 * We don't care if MCLGET fails. n->m_len is
803 * recomputed and handles that.
804 */
805 MCLGET(n, wait);
806 n->m_len = 0;
807 n->m_len = M_TRAILINGSPACE(n);
808 n->m_len = m_copylen(len, n->m_len);
809 n->m_len = uimin(n->m_len, m->m_len - off);
810 memcpy(mtod(n, void *), mtod(m, char *) + off,
811 (unsigned)n->m_len);
812 }
813 } else {
814 memcpy(mtod(n, void *), mtod(m, char *) + off,
815 (unsigned)n->m_len);
816 }
817
818 if (len != M_COPYALL)
819 len -= n->m_len;
820 off += n->m_len;
821
822 KASSERTMSG(off <= m->m_len,
823 "m=%p m->m_len=%d off=%d len=%d m0=%p off0=%d len0=%d",
824 m, m->m_len, off, len, m0, off0, len0);
825
826 if (off == m->m_len) {
827 m = m->m_next;
828 off = 0;
829 }
830 np = &n->m_next;
831 }
832
833 return top;
834
835 nospace:
836 m_freem(top);
837 return NULL;
838 }
839
840 /*
841 * Copy an entire packet, including header (which must be present).
842 * An optimization of the common case 'm_copym(m, 0, M_COPYALL, how)'.
843 */
844 struct mbuf *
845 m_copypacket(struct mbuf *m, int how)
846 {
847 struct mbuf *top, *n, *o;
848
849 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) {
850 panic("%s: no header (m = %p)", __func__, m);
851 }
852
853 n = m_get(how, m->m_type);
854 top = n;
855 if (!n)
856 goto nospace;
857
858 MCLAIM(n, m->m_owner);
859 m_copy_pkthdr(n, m);
860 n->m_len = m->m_len;
861 if (m->m_flags & M_EXT) {
862 n->m_data = m->m_data;
863 MCLADDREFERENCE(m, n);
864 } else {
865 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
866 }
867
868 m = m->m_next;
869 while (m) {
870 o = m_get(how, m->m_type);
871 if (!o)
872 goto nospace;
873
874 MCLAIM(o, m->m_owner);
875 n->m_next = o;
876 n = n->m_next;
877
878 n->m_len = m->m_len;
879 if (m->m_flags & M_EXT) {
880 n->m_data = m->m_data;
881 MCLADDREFERENCE(m, n);
882 } else {
883 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
884 }
885
886 m = m->m_next;
887 }
888 return top;
889
890 nospace:
891 m_freem(top);
892 return NULL;
893 }
894
895 void
896 m_copydata(struct mbuf *m, int off, int len, void *cp)
897 {
898 unsigned int count;
899 struct mbuf *m0 = m;
900 int len0 = len;
901 int off0 = off;
902 void *cp0 = cp;
903
904 KASSERT(len != M_COPYALL);
905 if (off < 0 || len < 0)
906 panic("m_copydata: off %d, len %d", off, len);
907 while (off > 0) {
908 if (m == NULL)
909 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)",
910 m0, len0, off0, cp0, off, off0 - off);
911 if (off < m->m_len)
912 break;
913 off -= m->m_len;
914 m = m->m_next;
915 }
916 while (len > 0) {
917 if (m == NULL)
918 panic("m_copydata(%p,%d,%d,%p): "
919 "m=NULL, off=%d (%d), len=%d (%d)",
920 m0, len0, off0, cp0,
921 off, off0 - off, len, len0 - len);
922 count = uimin(m->m_len - off, len);
923 memcpy(cp, mtod(m, char *) + off, count);
924 len -= count;
925 cp = (char *)cp + count;
926 off = 0;
927 m = m->m_next;
928 }
929 }
930
931 /*
932 * Concatenate mbuf chain n to m.
933 * n might be copied into m (when n->m_len is small), therefore data portion of
934 * n could be copied into an mbuf of different mbuf type.
935 * Any m_pkthdr is not updated.
936 */
937 void
938 m_cat(struct mbuf *m, struct mbuf *n)
939 {
940
941 while (m->m_next)
942 m = m->m_next;
943 while (n) {
944 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) {
945 /* just join the two chains */
946 m->m_next = n;
947 return;
948 }
949 /* splat the data from one into the other */
950 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
951 (u_int)n->m_len);
952 m->m_len += n->m_len;
953 n = m_free(n);
954 }
955 }
956
957 void
958 m_adj(struct mbuf *mp, int req_len)
959 {
960 int len = req_len;
961 struct mbuf *m;
962 int count;
963
964 if ((m = mp) == NULL)
965 return;
966 if (len >= 0) {
967 /*
968 * Trim from head.
969 */
970 while (m != NULL && len > 0) {
971 if (m->m_len <= len) {
972 len -= m->m_len;
973 m->m_len = 0;
974 m = m->m_next;
975 } else {
976 m->m_len -= len;
977 m->m_data += len;
978 len = 0;
979 }
980 }
981 if (mp->m_flags & M_PKTHDR)
982 mp->m_pkthdr.len -= (req_len - len);
983 } else {
984 /*
985 * Trim from tail. Scan the mbuf chain,
986 * calculating its length and finding the last mbuf.
987 * If the adjustment only affects this mbuf, then just
988 * adjust and return. Otherwise, rescan and truncate
989 * after the remaining size.
990 */
991 len = -len;
992 count = 0;
993 for (;;) {
994 count += m->m_len;
995 if (m->m_next == NULL)
996 break;
997 m = m->m_next;
998 }
999 if (m->m_len >= len) {
1000 m->m_len -= len;
1001 if (mp->m_flags & M_PKTHDR)
1002 mp->m_pkthdr.len -= len;
1003 return;
1004 }
1005
1006 count -= len;
1007 if (count < 0)
1008 count = 0;
1009
1010 /*
1011 * Correct length for chain is "count".
1012 * Find the mbuf with last data, adjust its length,
1013 * and toss data from remaining mbufs on chain.
1014 */
1015 m = mp;
1016 if (m->m_flags & M_PKTHDR)
1017 m->m_pkthdr.len = count;
1018 for (; m; m = m->m_next) {
1019 if (m->m_len >= count) {
1020 m->m_len = count;
1021 break;
1022 }
1023 count -= m->m_len;
1024 }
1025 if (m) {
1026 while (m->m_next)
1027 (m = m->m_next)->m_len = 0;
1028 }
1029 }
1030 }
1031
1032 /*
1033 * m_ensure_contig: rearrange an mbuf chain that given length of bytes
1034 * would be contiguous and in the data area of an mbuf (therefore, mtod()
1035 * would work for a structure of given length).
1036 *
1037 * => On success, returns true and the resulting mbuf chain; false otherwise.
1038 * => The mbuf chain may change, but is always preserved valid.
1039 */
1040 bool
1041 m_ensure_contig(struct mbuf **m0, int len)
1042 {
1043 struct mbuf *n = *m0, *m;
1044 size_t count, space;
1045
1046 KASSERT(len != M_COPYALL);
1047 /*
1048 * If first mbuf has no cluster, and has room for len bytes
1049 * without shifting current data, pullup into it,
1050 * otherwise allocate a new mbuf to prepend to the chain.
1051 */
1052 if ((n->m_flags & M_EXT) == 0 &&
1053 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
1054 if (n->m_len >= len) {
1055 return true;
1056 }
1057 m = n;
1058 n = n->m_next;
1059 len -= m->m_len;
1060 } else {
1061 if (len > MHLEN) {
1062 return false;
1063 }
1064 m = m_get(M_DONTWAIT, n->m_type);
1065 if (m == NULL) {
1066 return false;
1067 }
1068 MCLAIM(m, n->m_owner);
1069 if (n->m_flags & M_PKTHDR) {
1070 m_move_pkthdr(m, n);
1071 }
1072 }
1073 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1074 do {
1075 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len);
1076 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
1077 (unsigned)count);
1078 len -= count;
1079 m->m_len += count;
1080 n->m_len -= count;
1081 space -= count;
1082 if (n->m_len)
1083 n->m_data += count;
1084 else
1085 n = m_free(n);
1086 } while (len > 0 && n);
1087
1088 m->m_next = n;
1089 *m0 = m;
1090
1091 return len <= 0;
1092 }
1093
1094 /*
1095 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error.
1096 */
1097 struct mbuf *
1098 m_pullup(struct mbuf *n, int len)
1099 {
1100 struct mbuf *m = n;
1101
1102 KASSERT(len != M_COPYALL);
1103 if (!m_ensure_contig(&m, len)) {
1104 KASSERT(m != NULL);
1105 m_freem(m);
1106 m = NULL;
1107 }
1108 return m;
1109 }
1110
1111 /*
1112 * ensure that [off, off + len) is contiguous on the mbuf chain "m".
1113 * packet chain before "off" is kept untouched.
1114 * if offp == NULL, the target will start at <retval, 0> on resulting chain.
1115 * if offp != NULL, the target will start at <retval, *offp> on resulting chain.
1116 *
1117 * on error return (NULL return value), original "m" will be freed.
1118 *
1119 * XXX M_TRAILINGSPACE/M_LEADINGSPACE on shared cluster (sharedcluster)
1120 */
1121 struct mbuf *
1122 m_pulldown(struct mbuf *m, int off, int len, int *offp)
1123 {
1124 struct mbuf *n, *o;
1125 int hlen, tlen, olen;
1126 int sharedcluster;
1127
1128 /* Check invalid arguments. */
1129 if (m == NULL)
1130 panic("%s: m == NULL", __func__);
1131 if (len > MCLBYTES) {
1132 m_freem(m);
1133 return NULL;
1134 }
1135
1136 n = m;
1137 while (n != NULL && off > 0) {
1138 if (n->m_len > off)
1139 break;
1140 off -= n->m_len;
1141 n = n->m_next;
1142 }
1143 /* Be sure to point non-empty mbuf. */
1144 while (n != NULL && n->m_len == 0)
1145 n = n->m_next;
1146 if (!n) {
1147 m_freem(m);
1148 return NULL; /* mbuf chain too short */
1149 }
1150
1151 sharedcluster = M_READONLY(n);
1152
1153 /*
1154 * The target data is on <n, off>. If we got enough data on the mbuf
1155 * "n", we're done.
1156 */
1157 #ifdef __NO_STRICT_ALIGNMENT
1158 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster)
1159 #else
1160 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster &&
1161 ALIGNED_POINTER((mtod(n, char *) + off), uint32_t))
1162 #endif
1163 goto ok;
1164
1165 /*
1166 * When (len <= n->m_len - off) and (off != 0), it is a special case.
1167 * Len bytes from <n, off> sit in single mbuf, but the caller does
1168 * not like the starting position (off).
1169 *
1170 * Chop the current mbuf into two pieces, set off to 0.
1171 */
1172 if (len <= n->m_len - off) {
1173 struct mbuf *mlast;
1174
1175 o = m_dup(n, off, n->m_len - off, M_DONTWAIT);
1176 if (o == NULL) {
1177 m_freem(m);
1178 return NULL; /* ENOBUFS */
1179 }
1180 KASSERTMSG(o->m_len >= len, "o=%p o->m_len=%d len=%d",
1181 o, o->m_len, len);
1182 for (mlast = o; mlast->m_next != NULL; mlast = mlast->m_next)
1183 ;
1184 n->m_len = off;
1185 mlast->m_next = n->m_next;
1186 n->m_next = o;
1187 n = o;
1188 off = 0;
1189 goto ok;
1190 }
1191
1192 /*
1193 * We need to take hlen from <n, off> and tlen from <n->m_next, 0>,
1194 * and construct contiguous mbuf with m_len == len.
1195 *
1196 * Note that hlen + tlen == len, and tlen > 0.
1197 */
1198 hlen = n->m_len - off;
1199 tlen = len - hlen;
1200
1201 /*
1202 * Ensure that we have enough trailing data on mbuf chain. If not,
1203 * we can do nothing about the chain.
1204 */
1205 olen = 0;
1206 for (o = n->m_next; o != NULL; o = o->m_next)
1207 olen += o->m_len;
1208 if (hlen + olen < len) {
1209 m_freem(m);
1210 return NULL; /* mbuf chain too short */
1211 }
1212
1213 /*
1214 * Easy cases first. We need to use m_copydata() to get data from
1215 * <n->m_next, 0>.
1216 */
1217 if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen &&
1218 !sharedcluster) {
1219 m_copydata(n->m_next, 0, tlen, mtod(n, char *) + n->m_len);
1220 n->m_len += tlen;
1221 m_adj(n->m_next, tlen);
1222 goto ok;
1223 }
1224 if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen &&
1225 #ifndef __NO_STRICT_ALIGNMENT
1226 ALIGNED_POINTER((n->m_next->m_data - hlen), uint32_t) &&
1227 #endif
1228 !sharedcluster && n->m_next->m_len >= tlen) {
1229 n->m_next->m_data -= hlen;
1230 n->m_next->m_len += hlen;
1231 memcpy(mtod(n->m_next, void *), mtod(n, char *) + off, hlen);
1232 n->m_len -= hlen;
1233 n = n->m_next;
1234 off = 0;
1235 goto ok;
1236 }
1237
1238 /*
1239 * Now, we need to do the hard way. Don't copy as there's no room
1240 * on both ends.
1241 */
1242 o = m_get(M_DONTWAIT, m->m_type);
1243 if (o && len > MLEN) {
1244 MCLGET(o, M_DONTWAIT);
1245 if ((o->m_flags & M_EXT) == 0) {
1246 m_free(o);
1247 o = NULL;
1248 }
1249 }
1250 if (!o) {
1251 m_freem(m);
1252 return NULL; /* ENOBUFS */
1253 }
1254 /* get hlen from <n, off> into <o, 0> */
1255 o->m_len = hlen;
1256 memcpy(mtod(o, void *), mtod(n, char *) + off, hlen);
1257 n->m_len -= hlen;
1258 /* get tlen from <n->m_next, 0> into <o, hlen> */
1259 m_copydata(n->m_next, 0, tlen, mtod(o, char *) + o->m_len);
1260 o->m_len += tlen;
1261 m_adj(n->m_next, tlen);
1262 o->m_next = n->m_next;
1263 n->m_next = o;
1264 n = o;
1265 off = 0;
1266
1267 ok:
1268 if (offp)
1269 *offp = off;
1270 return n;
1271 }
1272
1273 /*
1274 * Like m_pullup(), except a new mbuf is always allocated, and we allow
1275 * the amount of empty space before the data in the new mbuf to be specified
1276 * (in the event that the caller expects to prepend later).
1277 */
1278 struct mbuf *
1279 m_copyup(struct mbuf *n, int len, int dstoff)
1280 {
1281 struct mbuf *m;
1282 int count, space;
1283
1284 KASSERT(len != M_COPYALL);
1285 if (len > ((int)MHLEN - dstoff))
1286 goto bad;
1287 m = m_get(M_DONTWAIT, n->m_type);
1288 if (m == NULL)
1289 goto bad;
1290 MCLAIM(m, n->m_owner);
1291 if (n->m_flags & M_PKTHDR) {
1292 m_move_pkthdr(m, n);
1293 }
1294 m->m_data += dstoff;
1295 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1296 do {
1297 count = uimin(uimin(uimax(len, max_protohdr), space), n->m_len);
1298 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
1299 (unsigned)count);
1300 len -= count;
1301 m->m_len += count;
1302 n->m_len -= count;
1303 space -= count;
1304 if (n->m_len)
1305 n->m_data += count;
1306 else
1307 n = m_free(n);
1308 } while (len > 0 && n);
1309 if (len > 0) {
1310 (void) m_free(m);
1311 goto bad;
1312 }
1313 m->m_next = n;
1314 return m;
1315 bad:
1316 m_freem(n);
1317 return NULL;
1318 }
1319
1320 struct mbuf *
1321 m_split(struct mbuf *m0, int len, int wait)
1322 {
1323 return m_split_internal(m0, len, wait, true);
1324 }
1325
1326 static struct mbuf *
1327 m_split_internal(struct mbuf *m0, int len0, int wait, bool copyhdr)
1328 {
1329 struct mbuf *m, *n;
1330 unsigned len = len0, remain, len_save;
1331
1332 KASSERT(len0 != M_COPYALL);
1333 for (m = m0; m && len > m->m_len; m = m->m_next)
1334 len -= m->m_len;
1335 if (m == NULL)
1336 return NULL;
1337
1338 remain = m->m_len - len;
1339 if (copyhdr && (m0->m_flags & M_PKTHDR)) {
1340 n = m_gethdr(wait, m0->m_type);
1341 if (n == NULL)
1342 return NULL;
1343
1344 MCLAIM(n, m0->m_owner);
1345 m_copy_rcvif(n, m0);
1346 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
1347 len_save = m0->m_pkthdr.len;
1348 m0->m_pkthdr.len = len0;
1349
1350 if ((m->m_flags & M_EXT) == 0 && remain > MHLEN) {
1351 /* m can't be the lead packet */
1352 m_align(n, 0);
1353 n->m_len = 0;
1354 n->m_next = m_split(m, len, wait);
1355 if (n->m_next == NULL) {
1356 (void)m_free(n);
1357 m0->m_pkthdr.len = len_save;
1358 return NULL;
1359 }
1360 return n;
1361 }
1362 } else if (remain == 0) {
1363 n = m->m_next;
1364 m->m_next = NULL;
1365 return n;
1366 } else {
1367 n = m_get(wait, m->m_type);
1368 if (n == NULL)
1369 return NULL;
1370 MCLAIM(n, m->m_owner);
1371 }
1372
1373 if (m->m_flags & M_EXT) {
1374 n->m_data = m->m_data + len;
1375 MCLADDREFERENCE(m, n);
1376 } else {
1377 m_align(n, remain);
1378 memcpy(mtod(n, void *), mtod(m, char *) + len, remain);
1379 }
1380
1381 n->m_len = remain;
1382 m->m_len = len;
1383 n->m_next = m->m_next;
1384 m->m_next = NULL;
1385 return n;
1386 }
1387
1388 /*
1389 * Routine to copy from device local memory into mbufs.
1390 */
1391 struct mbuf *
1392 m_devget(char *buf, int totlen, int off, struct ifnet *ifp)
1393 {
1394 struct mbuf *m;
1395 struct mbuf *top = NULL, **mp = ⊤
1396 char *cp, *epkt;
1397 int len;
1398
1399 cp = buf;
1400 epkt = cp + totlen;
1401 if (off) {
1402 /*
1403 * If 'off' is non-zero, packet is trailer-encapsulated,
1404 * so we have to skip the type and length fields.
1405 */
1406 cp += off + 2 * sizeof(uint16_t);
1407 totlen -= 2 * sizeof(uint16_t);
1408 }
1409
1410 m = m_gethdr(M_DONTWAIT, MT_DATA);
1411 if (m == NULL)
1412 return NULL;
1413 m_set_rcvif(m, ifp);
1414 m->m_pkthdr.len = totlen;
1415 m->m_len = MHLEN;
1416
1417 while (totlen > 0) {
1418 if (top) {
1419 m = m_get(M_DONTWAIT, MT_DATA);
1420 if (m == NULL) {
1421 m_freem(top);
1422 return NULL;
1423 }
1424 m->m_len = MLEN;
1425 }
1426
1427 len = uimin(totlen, epkt - cp);
1428
1429 if (len >= MINCLSIZE) {
1430 MCLGET(m, M_DONTWAIT);
1431 if ((m->m_flags & M_EXT) == 0) {
1432 m_free(m);
1433 m_freem(top);
1434 return NULL;
1435 }
1436 m->m_len = len = uimin(len, MCLBYTES);
1437 } else {
1438 /*
1439 * Place initial small packet/header at end of mbuf.
1440 */
1441 if (len < m->m_len) {
1442 if (top == 0 && len + max_linkhdr <= m->m_len)
1443 m->m_data += max_linkhdr;
1444 m->m_len = len;
1445 } else
1446 len = m->m_len;
1447 }
1448
1449 memcpy(mtod(m, void *), cp, (size_t)len);
1450
1451 cp += len;
1452 *mp = m;
1453 mp = &m->m_next;
1454 totlen -= len;
1455 if (cp == epkt)
1456 cp = buf;
1457 }
1458
1459 return top;
1460 }
1461
1462 /*
1463 * Copy data from a buffer back into the indicated mbuf chain,
1464 * starting "off" bytes from the beginning, extending the mbuf
1465 * chain if necessary.
1466 */
1467 void
1468 m_copyback(struct mbuf *m0, int off, int len, const void *cp)
1469 {
1470 #if defined(DEBUG)
1471 struct mbuf *origm = m0;
1472 int error;
1473 #endif
1474
1475 if (m0 == NULL)
1476 return;
1477
1478 #if defined(DEBUG)
1479 error =
1480 #endif
1481 m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_EXTEND,
1482 M_DONTWAIT);
1483
1484 #if defined(DEBUG)
1485 if (error != 0 || (m0 != NULL && origm != m0))
1486 panic("m_copyback");
1487 #endif
1488 }
1489
1490 struct mbuf *
1491 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how)
1492 {
1493 int error;
1494
1495 /* don't support chain expansion */
1496 KASSERT(len != M_COPYALL);
1497 KDASSERT(off + len <= m_length(m0));
1498
1499 error = m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_COW,
1500 how);
1501 if (error) {
1502 /*
1503 * no way to recover from partial success.
1504 * just free the chain.
1505 */
1506 m_freem(m0);
1507 return NULL;
1508 }
1509 return m0;
1510 }
1511
1512 int
1513 m_makewritable(struct mbuf **mp, int off, int len, int how)
1514 {
1515 int error;
1516 #if defined(DEBUG)
1517 int origlen = m_length(*mp);
1518 #endif
1519
1520 error = m_copyback_internal(mp, off, len, NULL, CB_PRESERVE|CB_COW,
1521 how);
1522 if (error)
1523 return error;
1524
1525 #if defined(DEBUG)
1526 int reslen = 0;
1527 for (struct mbuf *n = *mp; n; n = n->m_next)
1528 reslen += n->m_len;
1529 if (origlen != reslen)
1530 panic("m_makewritable: length changed");
1531 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len)
1532 panic("m_makewritable: inconsist");
1533 #endif
1534
1535 return 0;
1536 }
1537
1538 static int
1539 m_copyback_internal(struct mbuf **mp0, int off, int len, const void *vp,
1540 int flags, int how)
1541 {
1542 int mlen;
1543 struct mbuf *m, *n;
1544 struct mbuf **mp;
1545 int totlen = 0;
1546 const char *cp = vp;
1547
1548 KASSERT(mp0 != NULL);
1549 KASSERT(*mp0 != NULL);
1550 KASSERT((flags & CB_PRESERVE) == 0 || cp == NULL);
1551 KASSERT((flags & CB_COPYBACK) == 0 || cp != NULL);
1552
1553 if (len == M_COPYALL)
1554 len = m_length(*mp0) - off;
1555
1556 /*
1557 * we don't bother to update "totlen" in the case of CB_COW,
1558 * assuming that CB_EXTEND and CB_COW are exclusive.
1559 */
1560
1561 KASSERT((~flags & (CB_EXTEND|CB_COW)) != 0);
1562
1563 mp = mp0;
1564 m = *mp;
1565 while (off > (mlen = m->m_len)) {
1566 off -= mlen;
1567 totlen += mlen;
1568 if (m->m_next == NULL) {
1569 int tspace;
1570 extend:
1571 if ((flags & CB_EXTEND) == 0)
1572 goto out;
1573
1574 /*
1575 * try to make some space at the end of "m".
1576 */
1577
1578 mlen = m->m_len;
1579 if (off + len >= MINCLSIZE &&
1580 (m->m_flags & M_EXT) == 0 && m->m_len == 0) {
1581 MCLGET(m, how);
1582 }
1583 tspace = M_TRAILINGSPACE(m);
1584 if (tspace > 0) {
1585 tspace = uimin(tspace, off + len);
1586 KASSERT(tspace > 0);
1587 memset(mtod(m, char *) + m->m_len, 0,
1588 uimin(off, tspace));
1589 m->m_len += tspace;
1590 off += mlen;
1591 totlen -= mlen;
1592 continue;
1593 }
1594
1595 /*
1596 * need to allocate an mbuf.
1597 */
1598
1599 if (off + len >= MINCLSIZE) {
1600 n = m_getcl(how, m->m_type, 0);
1601 } else {
1602 n = m_get(how, m->m_type);
1603 }
1604 if (n == NULL) {
1605 goto out;
1606 }
1607 n->m_len = uimin(M_TRAILINGSPACE(n), off + len);
1608 memset(mtod(n, char *), 0, uimin(n->m_len, off));
1609 m->m_next = n;
1610 }
1611 mp = &m->m_next;
1612 m = m->m_next;
1613 }
1614 while (len > 0) {
1615 mlen = m->m_len - off;
1616 if (mlen != 0 && M_READONLY(m)) {
1617 /*
1618 * This mbuf is read-only. Allocate a new writable
1619 * mbuf and try again.
1620 */
1621 char *datap;
1622 int eatlen;
1623
1624 KASSERT((flags & CB_COW) != 0);
1625
1626 /*
1627 * if we're going to write into the middle of
1628 * a mbuf, split it first.
1629 */
1630 if (off > 0) {
1631 n = m_split_internal(m, off, how, false);
1632 if (n == NULL)
1633 goto enobufs;
1634 m->m_next = n;
1635 mp = &m->m_next;
1636 m = n;
1637 off = 0;
1638 continue;
1639 }
1640
1641 /*
1642 * XXX TODO coalesce into the trailingspace of
1643 * the previous mbuf when possible.
1644 */
1645
1646 /*
1647 * allocate a new mbuf. copy packet header if needed.
1648 */
1649 n = m_get(how, m->m_type);
1650 if (n == NULL)
1651 goto enobufs;
1652 MCLAIM(n, m->m_owner);
1653 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) {
1654 m_move_pkthdr(n, m);
1655 n->m_len = MHLEN;
1656 } else {
1657 if (len >= MINCLSIZE)
1658 MCLGET(n, M_DONTWAIT);
1659 n->m_len =
1660 (n->m_flags & M_EXT) ? MCLBYTES : MLEN;
1661 }
1662 if (n->m_len > len)
1663 n->m_len = len;
1664
1665 /*
1666 * free the region which has been overwritten.
1667 * copying data from old mbufs if requested.
1668 */
1669 if (flags & CB_PRESERVE)
1670 datap = mtod(n, char *);
1671 else
1672 datap = NULL;
1673 eatlen = n->m_len;
1674 while (m != NULL && M_READONLY(m) &&
1675 n->m_type == m->m_type && eatlen > 0) {
1676 mlen = uimin(eatlen, m->m_len);
1677 if (datap) {
1678 m_copydata(m, 0, mlen, datap);
1679 datap += mlen;
1680 }
1681 m->m_data += mlen;
1682 m->m_len -= mlen;
1683 eatlen -= mlen;
1684 if (m->m_len == 0)
1685 *mp = m = m_free(m);
1686 }
1687 if (eatlen > 0)
1688 n->m_len -= eatlen;
1689 n->m_next = m;
1690 *mp = m = n;
1691 continue;
1692 }
1693 mlen = uimin(mlen, len);
1694 if (flags & CB_COPYBACK) {
1695 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen);
1696 cp += mlen;
1697 }
1698 len -= mlen;
1699 mlen += off;
1700 off = 0;
1701 totlen += mlen;
1702 if (len == 0)
1703 break;
1704 if (m->m_next == NULL) {
1705 goto extend;
1706 }
1707 mp = &m->m_next;
1708 m = m->m_next;
1709 }
1710
1711 out:
1712 if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) {
1713 KASSERT((flags & CB_EXTEND) != 0);
1714 m->m_pkthdr.len = totlen;
1715 }
1716
1717 return 0;
1718
1719 enobufs:
1720 return SET_ERROR(ENOBUFS);
1721 }
1722
1723 /*
1724 * Compress the mbuf chain. Return the new mbuf chain on success, NULL on
1725 * failure. The first mbuf is preserved, and on success the pointer returned
1726 * is the same as the one passed.
1727 */
1728 struct mbuf *
1729 m_defrag(struct mbuf *m, int how)
1730 {
1731 struct mbuf *m0, *mn, *n;
1732 int sz;
1733
1734 KASSERT((m->m_flags & M_PKTHDR) != 0);
1735
1736 if (m->m_next == NULL)
1737 return m;
1738
1739 /* Defrag to single mbuf if at all possible */
1740 if ((m->m_flags & M_EXT) == 0 && m->m_pkthdr.len <= MCLBYTES) {
1741 if (m->m_pkthdr.len <= MHLEN) {
1742 if (M_TRAILINGSPACE(m) < (m->m_pkthdr.len - m->m_len)) {
1743 KASSERTMSG(M_LEADINGSPACE(m) +
1744 M_TRAILINGSPACE(m) >=
1745 (m->m_pkthdr.len - m->m_len),
1746 "too small leading %d trailing %d ro? %d"
1747 " pkthdr.len %d mlen %d",
1748 (int)M_LEADINGSPACE(m),
1749 (int)M_TRAILINGSPACE(m),
1750 M_READONLY(m),
1751 m->m_pkthdr.len, m->m_len);
1752
1753 memmove(m->m_pktdat, m->m_data, m->m_len);
1754 m->m_data = m->m_pktdat;
1755
1756 KASSERT(M_TRAILINGSPACE(m) >=
1757 (m->m_pkthdr.len - m->m_len));
1758 }
1759 } else {
1760 /* Must copy data before adding cluster */
1761 m0 = m_get(how, MT_DATA);
1762 if (m0 == NULL)
1763 return NULL;
1764 KASSERTMSG(m->m_len <= MHLEN,
1765 "m=%p m->m_len=%d MHLEN=%u",
1766 m, m->m_len, (unsigned)MHLEN);
1767 m_copydata(m, 0, m->m_len, mtod(m0, void *));
1768
1769 MCLGET(m, how);
1770 if ((m->m_flags & M_EXT) == 0) {
1771 m_free(m0);
1772 return NULL;
1773 }
1774 memcpy(m->m_data, mtod(m0, void *), m->m_len);
1775 m_free(m0);
1776 }
1777 KASSERTMSG(M_TRAILINGSPACE(m) >= (m->m_pkthdr.len - m->m_len),
1778 "m=%p M_TRAILINGSPACE(m)=%zd m->m_pkthdr.len=%d"
1779 " m->m_len=%d",
1780 m, M_TRAILINGSPACE(m), m->m_pkthdr.len, m->m_len);
1781 m_copydata(m->m_next, 0, m->m_pkthdr.len - m->m_len,
1782 mtod(m, char *) + m->m_len);
1783 m->m_len = m->m_pkthdr.len;
1784 m_freem(m->m_next);
1785 m->m_next = NULL;
1786 return m;
1787 }
1788
1789 m0 = m_get(how, MT_DATA);
1790 if (m0 == NULL)
1791 return NULL;
1792 mn = m0;
1793
1794 sz = m->m_pkthdr.len - m->m_len;
1795 KASSERT(sz >= 0);
1796
1797 do {
1798 if (sz > MLEN) {
1799 MCLGET(mn, how);
1800 if ((mn->m_flags & M_EXT) == 0) {
1801 m_freem(m0);
1802 return NULL;
1803 }
1804 }
1805
1806 mn->m_len = MIN(sz, MCLBYTES);
1807
1808 m_copydata(m, m->m_pkthdr.len - sz, mn->m_len,
1809 mtod(mn, void *));
1810
1811 sz -= mn->m_len;
1812
1813 if (sz > 0) {
1814 /* need more mbufs */
1815 n = m_get(how, MT_DATA);
1816 if (n == NULL) {
1817 m_freem(m0);
1818 return NULL;
1819 }
1820
1821 mn->m_next = n;
1822 mn = n;
1823 }
1824 } while (sz > 0);
1825
1826 m_freem(m->m_next);
1827 m->m_next = m0;
1828
1829 return m;
1830 }
1831
1832 void
1833 m_remove_pkthdr(struct mbuf *m)
1834 {
1835 KASSERT(m->m_flags & M_PKTHDR);
1836
1837 m_tag_delete_chain(m);
1838 m->m_flags &= ~M_PKTHDR;
1839 memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
1840 }
1841
1842 void
1843 m_copy_pkthdr(struct mbuf *to, struct mbuf *from)
1844 {
1845 KASSERT((to->m_flags & M_EXT) == 0);
1846 KASSERT((to->m_flags & M_PKTHDR) == 0 ||
1847 SLIST_FIRST(&to->m_pkthdr.tags) == NULL);
1848 KASSERT((from->m_flags & M_PKTHDR) != 0);
1849
1850 to->m_pkthdr = from->m_pkthdr;
1851 to->m_flags = from->m_flags & M_COPYFLAGS;
1852 to->m_data = to->m_pktdat;
1853
1854 SLIST_INIT(&to->m_pkthdr.tags);
1855 m_tag_copy_chain(to, from);
1856 }
1857
1858 void
1859 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
1860 {
1861 KASSERT((to->m_flags & M_EXT) == 0);
1862 KASSERT((to->m_flags & M_PKTHDR) == 0 ||
1863 SLIST_FIRST(&to->m_pkthdr.tags) == NULL);
1864 KASSERT((from->m_flags & M_PKTHDR) != 0);
1865
1866 to->m_pkthdr = from->m_pkthdr;
1867 to->m_flags = from->m_flags & M_COPYFLAGS;
1868 to->m_data = to->m_pktdat;
1869
1870 from->m_flags &= ~M_PKTHDR;
1871 }
1872
1873 /*
1874 * Set the m_data pointer of a newly-allocated mbuf to place an object of the
1875 * specified size at the end of the mbuf, longword aligned.
1876 */
1877 void
1878 m_align(struct mbuf *m, int len)
1879 {
1880 int buflen, adjust;
1881
1882 KASSERT(len != M_COPYALL);
1883 KASSERTMSG(M_LEADINGSPACE(m) == 0, "m=%p M_LEADINGSPACE(m)=%zd",
1884 m, M_LEADINGSPACE(m));
1885
1886 buflen = M_BUFSIZE(m);
1887
1888 KASSERTMSG(len <= buflen, "m=%p len=%d buflen=%d", m, len, buflen);
1889 adjust = buflen - len;
1890 m->m_data += adjust &~ (sizeof(long)-1);
1891 }
1892
1893 /*
1894 * Apply function f to the data in an mbuf chain starting "off" bytes from the
1895 * beginning, continuing for "len" bytes.
1896 */
1897 int
1898 m_apply(struct mbuf *m, int off, int len,
1899 int (*f)(void *, void *, unsigned int), void *arg)
1900 {
1901 unsigned int count;
1902 int rval;
1903
1904 KASSERT(len != M_COPYALL);
1905 KASSERT(len >= 0);
1906 KASSERT(off >= 0);
1907
1908 while (off > 0) {
1909 KASSERT(m != NULL);
1910 if (off < m->m_len)
1911 break;
1912 off -= m->m_len;
1913 m = m->m_next;
1914 }
1915 while (len > 0) {
1916 KASSERT(m != NULL);
1917 count = uimin(m->m_len - off, len);
1918
1919 rval = (*f)(arg, mtod(m, char *) + off, count);
1920 if (rval)
1921 return rval;
1922
1923 len -= count;
1924 off = 0;
1925 m = m->m_next;
1926 }
1927
1928 return 0;
1929 }
1930
1931 /*
1932 * Return a pointer to mbuf/offset of location in mbuf chain.
1933 */
1934 struct mbuf *
1935 m_getptr(struct mbuf *m, int loc, int *off)
1936 {
1937
1938 while (loc >= 0) {
1939 /* Normal end of search */
1940 if (m->m_len > loc) {
1941 *off = loc;
1942 return m;
1943 }
1944
1945 loc -= m->m_len;
1946
1947 if (m->m_next == NULL) {
1948 if (loc == 0) {
1949 /* Point at the end of valid data */
1950 *off = m->m_len;
1951 return m;
1952 }
1953 return NULL;
1954 } else {
1955 m = m->m_next;
1956 }
1957 }
1958
1959 return NULL;
1960 }
1961
1962 /*
1963 * Release a reference to the mbuf external storage.
1964 *
1965 * => free the mbuf m itself as well.
1966 */
1967 static void
1968 m_ext_free(struct mbuf *m)
1969 {
1970 const bool embedded = MEXT_ISEMBEDDED(m);
1971 bool dofree = true;
1972 u_int refcnt;
1973
1974 KASSERT((m->m_flags & M_EXT) != 0);
1975 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref));
1976 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0);
1977 KASSERT((m->m_flags & M_EXT_CLUSTER) ==
1978 (m->m_ext_ref->m_flags & M_EXT_CLUSTER));
1979
1980 if (__predict_false(m->m_type == MT_FREE)) {
1981 panic("mbuf %p already freed", m);
1982 }
1983
1984 if (__predict_true(m->m_ext.ext_refcnt == 1)) {
1985 refcnt = m->m_ext.ext_refcnt = 0;
1986 } else {
1987 membar_release();
1988 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt);
1989 }
1990
1991 if (refcnt > 0) {
1992 if (embedded) {
1993 /*
1994 * other mbuf's m_ext_ref still points to us.
1995 */
1996 dofree = false;
1997 } else {
1998 m->m_ext_ref = m;
1999 }
2000 } else {
2001 /*
2002 * dropping the last reference
2003 */
2004 membar_acquire();
2005 if (!embedded) {
2006 m->m_ext.ext_refcnt++; /* XXX */
2007 m_ext_free(m->m_ext_ref);
2008 m->m_ext_ref = m;
2009 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) {
2010 pool_cache_put_paddr(mcl_cache,
2011 m->m_ext.ext_buf, m->m_ext.ext_paddr);
2012 } else if (m->m_ext.ext_free) {
2013 (*m->m_ext.ext_free)(m,
2014 m->m_ext.ext_buf, m->m_ext.ext_size,
2015 m->m_ext.ext_arg);
2016 /*
2017 * 'm' is already freed by the ext_free callback.
2018 */
2019 dofree = false;
2020 } else {
2021 free(m->m_ext.ext_buf, 0);
2022 }
2023 }
2024
2025 if (dofree) {
2026 m->m_type = MT_FREE;
2027 m->m_data = NULL;
2028 pool_cache_put(mb_cache, m);
2029 }
2030 }
2031
2032 /*
2033 * Free a single mbuf and associated external storage. Return the
2034 * successor, if any.
2035 */
2036 struct mbuf *
2037 m_free(struct mbuf *m)
2038 {
2039 struct mbuf *n;
2040
2041 mowner_revoke(m, 1, m->m_flags);
2042 mbstat_type_add(m->m_type, -1);
2043
2044 if (m->m_flags & M_PKTHDR)
2045 m_tag_delete_chain(m);
2046
2047 n = m->m_next;
2048
2049 if (m->m_flags & M_EXT) {
2050 m_ext_free(m);
2051 } else {
2052 if (__predict_false(m->m_type == MT_FREE)) {
2053 panic("mbuf %p already freed", m);
2054 }
2055 m->m_type = MT_FREE;
2056 m->m_data = NULL;
2057 pool_cache_put(mb_cache, m);
2058 }
2059
2060 return n;
2061 }
2062
2063 void
2064 m_freem(struct mbuf *m)
2065 {
2066 if (m == NULL)
2067 return;
2068 do {
2069 m = m_free(m);
2070 } while (m);
2071 }
2072
2073 #if defined(DDB)
2074 void
2075 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...))
2076 {
2077 char ch;
2078 bool opt_c = false;
2079 bool opt_d = false;
2080 #if NETHER > 0
2081 bool opt_v = false;
2082 const struct mbuf *m0 = NULL;
2083 #endif
2084 int no = 0;
2085 char buf[512];
2086
2087 while ((ch = *(modif++)) != '\0') {
2088 switch (ch) {
2089 case 'c':
2090 opt_c = true;
2091 break;
2092 case 'd':
2093 opt_d = true;
2094 break;
2095 #if NETHER > 0
2096 case 'v':
2097 opt_v = true;
2098 m0 = m;
2099 break;
2100 #endif
2101 default:
2102 break;
2103 }
2104 }
2105
2106 nextchain:
2107 (*pr)("MBUF(%d) %p\n", no, m);
2108 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags);
2109 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n",
2110 m->m_data, m->m_len, m->m_type, buf);
2111 if (opt_d) {
2112 int i;
2113 unsigned char *p = m->m_data;
2114
2115 (*pr)(" data:");
2116
2117 for (i = 0; i < m->m_len; i++) {
2118 if (i % 16 == 0)
2119 (*pr)("\n");
2120 (*pr)(" %02x", p[i]);
2121 }
2122
2123 (*pr)("\n");
2124 }
2125 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next,
2126 m->m_nextpkt);
2127 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n",
2128 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m),
2129 (int)M_READONLY(m));
2130 if ((m->m_flags & M_PKTHDR) != 0) {
2131 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags);
2132 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=%s, csum_data=0x%"
2133 PRIx32 ", segsz=%u\n",
2134 m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m),
2135 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz);
2136 }
2137 if ((m->m_flags & M_EXT)) {
2138 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, "
2139 "ext_free=%p, ext_arg=%p\n",
2140 m->m_ext.ext_refcnt,
2141 m->m_ext.ext_buf, m->m_ext.ext_size,
2142 m->m_ext.ext_free, m->m_ext.ext_arg);
2143 }
2144 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) {
2145 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf;
2146 vaddr_t eva = sva + m->m_ext.ext_size;
2147 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT;
2148 int i;
2149
2150 (*pr)(" pages:");
2151 for (i = 0; i < n; i ++) {
2152 (*pr)(" %p", m->m_ext.ext_pgs[i]);
2153 }
2154 (*pr)("\n");
2155 }
2156
2157 if (opt_c) {
2158 m = m->m_next;
2159 if (m != NULL) {
2160 no++;
2161 goto nextchain;
2162 }
2163 }
2164
2165 #if NETHER > 0
2166 if (opt_v && m0)
2167 m_examine(m0, AF_ETHER, modif, pr);
2168 #endif
2169 }
2170 #endif /* defined(DDB) */
2171
2172 #if defined(MBUFTRACE)
2173 void
2174 mowner_init_owner(struct mowner *mo, const char *name, const char *descr)
2175 {
2176 memset(mo, 0, sizeof(*mo));
2177 strlcpy(mo->mo_name, name, sizeof(mo->mo_name));
2178 strlcpy(mo->mo_descr, descr, sizeof(mo->mo_descr));
2179 }
2180
2181 void
2182 mowner_attach(struct mowner *mo)
2183 {
2184
2185 KASSERT(mo->mo_counters == NULL);
2186 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter));
2187
2188 /* XXX lock */
2189 LIST_INSERT_HEAD(&mowners, mo, mo_link);
2190 }
2191
2192 void
2193 mowner_detach(struct mowner *mo)
2194 {
2195
2196 KASSERT(mo->mo_counters != NULL);
2197
2198 /* XXX lock */
2199 LIST_REMOVE(mo, mo_link);
2200
2201 percpu_free(mo->mo_counters, sizeof(struct mowner_counter));
2202 mo->mo_counters = NULL;
2203 }
2204
2205 void
2206 mowner_init(struct mbuf *m, int type)
2207 {
2208 struct mowner_counter *mc;
2209 struct mowner *mo;
2210 int s;
2211
2212 m->m_owner = mo = &unknown_mowners[type];
2213 s = splvm();
2214 mc = percpu_getref(mo->mo_counters);
2215 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++;
2216 percpu_putref(mo->mo_counters);
2217 splx(s);
2218 }
2219
2220 void
2221 mowner_ref(struct mbuf *m, int flags)
2222 {
2223 struct mowner *mo = m->m_owner;
2224 struct mowner_counter *mc;
2225 int s;
2226
2227 s = splvm();
2228 mc = percpu_getref(mo->mo_counters);
2229 if ((flags & M_EXT) != 0)
2230 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++;
2231 if ((flags & M_EXT_CLUSTER) != 0)
2232 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++;
2233 percpu_putref(mo->mo_counters);
2234 splx(s);
2235 }
2236
2237 void
2238 mowner_revoke(struct mbuf *m, bool all, int flags)
2239 {
2240 struct mowner *mo = m->m_owner;
2241 struct mowner_counter *mc;
2242 int s;
2243
2244 s = splvm();
2245 mc = percpu_getref(mo->mo_counters);
2246 if ((flags & M_EXT) != 0)
2247 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++;
2248 if ((flags & M_EXT_CLUSTER) != 0)
2249 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++;
2250 if (all)
2251 mc->mc_counter[MOWNER_COUNTER_RELEASES]++;
2252 percpu_putref(mo->mo_counters);
2253 splx(s);
2254 if (all)
2255 m->m_owner = &revoked_mowner;
2256 }
2257
2258 static void
2259 mowner_claim(struct mbuf *m, struct mowner *mo)
2260 {
2261 struct mowner_counter *mc;
2262 int flags = m->m_flags;
2263 int s;
2264
2265 s = splvm();
2266 mc = percpu_getref(mo->mo_counters);
2267 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++;
2268 if ((flags & M_EXT) != 0)
2269 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++;
2270 if ((flags & M_EXT_CLUSTER) != 0)
2271 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++;
2272 percpu_putref(mo->mo_counters);
2273 splx(s);
2274 m->m_owner = mo;
2275 }
2276
2277 void
2278 m_claim(struct mbuf *m, struct mowner *mo)
2279 {
2280
2281 if (m->m_owner == mo || mo == NULL)
2282 return;
2283
2284 mowner_revoke(m, true, m->m_flags);
2285 mowner_claim(m, mo);
2286 }
2287
2288 void
2289 m_claimm(struct mbuf *m, struct mowner *mo)
2290 {
2291
2292 for (; m != NULL; m = m->m_next)
2293 m_claim(m, mo);
2294 }
2295 #endif /* defined(MBUFTRACE) */
2296
2297 #ifdef DIAGNOSTIC
2298 /*
2299 * Verify that the mbuf chain is not malformed. Used only for diagnostic.
2300 * Panics on error.
2301 */
2302 void
2303 m_verify_packet(struct mbuf *m)
2304 {
2305 struct mbuf *n = m;
2306 char *low, *high, *dat;
2307 int totlen = 0, len;
2308
2309 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) {
2310 panic("%s: mbuf doesn't have M_PKTHDR", __func__);
2311 }
2312
2313 while (n != NULL) {
2314 if (__predict_false(n->m_type == MT_FREE)) {
2315 panic("%s: mbuf already freed (n = %p)", __func__, n);
2316 }
2317 #if 0
2318 /*
2319 * This ought to be a rule of the mbuf API. Unfortunately,
2320 * many places don't respect that rule.
2321 */
2322 if (__predict_false((n != m) && (n->m_flags & M_PKTHDR) != 0)) {
2323 panic("%s: M_PKTHDR set on secondary mbuf", __func__);
2324 }
2325 #endif
2326 if (__predict_false(n->m_nextpkt != NULL)) {
2327 panic("%s: m_nextpkt not null (m_nextpkt = %p)",
2328 __func__, n->m_nextpkt);
2329 }
2330
2331 dat = n->m_data;
2332 len = n->m_len;
2333 if (__predict_false(len < 0)) {
2334 panic("%s: incorrect length (len = %d)", __func__, len);
2335 }
2336
2337 low = M_BUFADDR(n);
2338 high = low + M_BUFSIZE(n);
2339 if (__predict_false((dat < low) || (dat + len > high))) {
2340 panic("%s: m_data not in packet"
2341 "(dat = %p, len = %d, low = %p, high = %p)",
2342 __func__, dat, len, low, high);
2343 }
2344
2345 totlen += len;
2346 n = n->m_next;
2347 }
2348
2349 if (__predict_false(totlen != m->m_pkthdr.len)) {
2350 panic("%s: inconsistent mbuf length (%d != %d)", __func__,
2351 totlen, m->m_pkthdr.len);
2352 }
2353 }
2354 #endif
2355
2356 struct m_tag *
2357 m_tag_get(int type, int len, int wait)
2358 {
2359 struct m_tag *t;
2360
2361 if (len < 0)
2362 return NULL;
2363 t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait);
2364 if (t == NULL)
2365 return NULL;
2366 t->m_tag_id = type;
2367 t->m_tag_len = len;
2368 return t;
2369 }
2370
2371 void
2372 m_tag_free(struct m_tag *t)
2373 {
2374 free(t, M_PACKET_TAGS);
2375 }
2376
2377 void
2378 m_tag_prepend(struct mbuf *m, struct m_tag *t)
2379 {
2380 KASSERT((m->m_flags & M_PKTHDR) != 0);
2381 SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link);
2382 }
2383
2384 void
2385 m_tag_unlink(struct mbuf *m, struct m_tag *t)
2386 {
2387 KASSERT((m->m_flags & M_PKTHDR) != 0);
2388 SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link);
2389 }
2390
2391 void
2392 m_tag_delete(struct mbuf *m, struct m_tag *t)
2393 {
2394 m_tag_unlink(m, t);
2395 m_tag_free(t);
2396 }
2397
2398 void
2399 m_tag_delete_chain(struct mbuf *m)
2400 {
2401 struct m_tag *p, *q;
2402
2403 KASSERT((m->m_flags & M_PKTHDR) != 0);
2404
2405 p = SLIST_FIRST(&m->m_pkthdr.tags);
2406 if (p == NULL)
2407 return;
2408 while ((q = SLIST_NEXT(p, m_tag_link)) != NULL)
2409 m_tag_delete(m, q);
2410 m_tag_delete(m, p);
2411 }
2412
2413 struct m_tag *
2414 m_tag_find(const struct mbuf *m, int type)
2415 {
2416 struct m_tag *p;
2417
2418 KASSERT((m->m_flags & M_PKTHDR) != 0);
2419
2420 p = SLIST_FIRST(&m->m_pkthdr.tags);
2421 while (p != NULL) {
2422 if (p->m_tag_id == type)
2423 return p;
2424 p = SLIST_NEXT(p, m_tag_link);
2425 }
2426 return NULL;
2427 }
2428
2429 struct m_tag *
2430 m_tag_copy(struct m_tag *t)
2431 {
2432 struct m_tag *p;
2433
2434 p = m_tag_get(t->m_tag_id, t->m_tag_len, M_NOWAIT);
2435 if (p == NULL)
2436 return NULL;
2437 memcpy(p + 1, t + 1, t->m_tag_len);
2438 return p;
2439 }
2440
2441 /*
2442 * Copy two tag chains. The destination mbuf (to) loses any attached
2443 * tags even if the operation fails. This should not be a problem, as
2444 * m_tag_copy_chain() is typically called with a newly-allocated
2445 * destination mbuf.
2446 */
2447 int
2448 m_tag_copy_chain(struct mbuf *to, struct mbuf *from)
2449 {
2450 struct m_tag *p, *t, *tprev = NULL;
2451
2452 KASSERT((from->m_flags & M_PKTHDR) != 0);
2453
2454 m_tag_delete_chain(to);
2455 SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) {
2456 t = m_tag_copy(p);
2457 if (t == NULL) {
2458 m_tag_delete_chain(to);
2459 return 0;
2460 }
2461 if (tprev == NULL)
2462 SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link);
2463 else
2464 SLIST_INSERT_AFTER(tprev, t, m_tag_link);
2465 tprev = t;
2466 }
2467 return 1;
2468 }
2469