uipc_mbuf.c revision 1.239 1 /* $NetBSD: uipc_mbuf.c,v 1.239 2020/04/24 22:50:55 jdolecek Exp $ */
2
3 /*
4 * Copyright (c) 1999, 2001, 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and Maxime Villard.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1988, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.239 2020/04/24 22:50:55 jdolecek Exp $");
66
67 #ifdef _KERNEL_OPT
68 #include "opt_mbuftrace.h"
69 #include "opt_nmbclusters.h"
70 #include "opt_ddb.h"
71 #include "ether.h"
72 #endif
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/atomic.h>
77 #include <sys/cpu.h>
78 #include <sys/proc.h>
79 #include <sys/mbuf.h>
80 #include <sys/kernel.h>
81 #include <sys/syslog.h>
82 #include <sys/domain.h>
83 #include <sys/protosw.h>
84 #include <sys/percpu.h>
85 #include <sys/pool.h>
86 #include <sys/socket.h>
87 #include <sys/sysctl.h>
88
89 #include <net/if.h>
90
91 pool_cache_t mb_cache; /* mbuf cache */
92 static pool_cache_t mcl_cache; /* mbuf cluster cache */
93
94 struct mbstat mbstat;
95 int max_linkhdr;
96 int max_protohdr;
97 int max_hdr;
98 int max_datalen;
99
100 static void mb_drain(void *, int);
101 static int mb_ctor(void *, void *, int);
102
103 static void sysctl_kern_mbuf_setup(void);
104
105 static struct sysctllog *mbuf_sysctllog;
106
107 static struct mbuf *m_copy_internal(struct mbuf *, int, int, int, bool);
108 static struct mbuf *m_split_internal(struct mbuf *, int, int, bool);
109 static int m_copyback_internal(struct mbuf **, int, int, const void *,
110 int, int);
111
112 /* Flags for m_copyback_internal. */
113 #define CB_COPYBACK 0x0001 /* copyback from cp */
114 #define CB_PRESERVE 0x0002 /* preserve original data */
115 #define CB_COW 0x0004 /* do copy-on-write */
116 #define CB_EXTEND 0x0008 /* extend chain */
117
118 static const char mclpool_warnmsg[] =
119 "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters";
120
121 MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
122
123 static percpu_t *mbstat_percpu;
124
125 #ifdef MBUFTRACE
126 struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners);
127 struct mowner unknown_mowners[] = {
128 MOWNER_INIT("unknown", "free"),
129 MOWNER_INIT("unknown", "data"),
130 MOWNER_INIT("unknown", "header"),
131 MOWNER_INIT("unknown", "soname"),
132 MOWNER_INIT("unknown", "soopts"),
133 MOWNER_INIT("unknown", "ftable"),
134 MOWNER_INIT("unknown", "control"),
135 MOWNER_INIT("unknown", "oobdata"),
136 };
137 struct mowner revoked_mowner = MOWNER_INIT("revoked", "");
138 #endif
139
140 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m))
141
142 #define MCLADDREFERENCE(o, n) \
143 do { \
144 KASSERT(((o)->m_flags & M_EXT) != 0); \
145 KASSERT(((n)->m_flags & M_EXT) == 0); \
146 KASSERT((o)->m_ext.ext_refcnt >= 1); \
147 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \
148 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \
149 (n)->m_ext_ref = (o)->m_ext_ref; \
150 mowner_ref((n), (n)->m_flags); \
151 } while (/* CONSTCOND */ 0)
152
153 static int
154 nmbclusters_limit(void)
155 {
156 #if defined(PMAP_MAP_POOLPAGE)
157 /* direct mapping, doesn't use space in kmem_arena */
158 vsize_t max_size = physmem / 4;
159 #else
160 vsize_t max_size = MIN(physmem / 4, nkmempages / 4);
161 #endif
162
163 max_size = max_size * PAGE_SIZE / MCLBYTES;
164 #ifdef NMBCLUSTERS_MAX
165 max_size = MIN(max_size, NMBCLUSTERS_MAX);
166 #endif
167
168 #ifdef NMBCLUSTERS
169 return MIN(max_size, NMBCLUSTERS);
170 #else
171 return max_size;
172 #endif
173 }
174
175 /*
176 * Initialize the mbuf allocator.
177 */
178 void
179 mbinit(void)
180 {
181
182 CTASSERT(sizeof(struct _m_ext) <= MHLEN);
183 CTASSERT(sizeof(struct mbuf) == MSIZE);
184
185 sysctl_kern_mbuf_setup();
186
187 mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl",
188 NULL, IPL_VM, mb_ctor, NULL, NULL);
189 KASSERT(mb_cache != NULL);
190
191 mcl_cache = pool_cache_init(mclbytes, COHERENCY_UNIT, 0, 0, "mclpl",
192 NULL, IPL_VM, NULL, NULL, NULL);
193 KASSERT(mcl_cache != NULL);
194
195 pool_cache_set_drain_hook(mb_cache, mb_drain, NULL);
196 pool_cache_set_drain_hook(mcl_cache, mb_drain, NULL);
197
198 /*
199 * Set an arbitrary default limit on the number of mbuf clusters.
200 */
201 #ifdef NMBCLUSTERS
202 nmbclusters = nmbclusters_limit();
203 #else
204 nmbclusters = MAX(1024,
205 (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16);
206 nmbclusters = MIN(nmbclusters, nmbclusters_limit());
207 #endif
208
209 /*
210 * Set the hard limit on the mclpool to the number of
211 * mbuf clusters the kernel is to support. Log the limit
212 * reached message max once a minute.
213 */
214 pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60);
215
216 mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu));
217
218 /*
219 * Set a low water mark for both mbufs and clusters. This should
220 * help ensure that they can be allocated in a memory starvation
221 * situation. This is important for e.g. diskless systems which
222 * must allocate mbufs in order for the pagedaemon to clean pages.
223 */
224 pool_cache_setlowat(mb_cache, mblowat);
225 pool_cache_setlowat(mcl_cache, mcllowat);
226
227 #ifdef MBUFTRACE
228 {
229 /*
230 * Attach the unknown mowners.
231 */
232 int i;
233 MOWNER_ATTACH(&revoked_mowner);
234 for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]);
235 i-- > 0; )
236 MOWNER_ATTACH(&unknown_mowners[i]);
237 }
238 #endif
239 }
240
241 static void
242 mb_drain(void *arg, int flags)
243 {
244 struct domain *dp;
245 const struct protosw *pr;
246 struct ifnet *ifp;
247 int s;
248
249 KERNEL_LOCK(1, NULL);
250 s = splvm();
251 DOMAIN_FOREACH(dp) {
252 for (pr = dp->dom_protosw;
253 pr < dp->dom_protoswNPROTOSW; pr++)
254 if (pr->pr_drain)
255 (*pr->pr_drain)();
256 }
257 /* XXX we cannot use psref in H/W interrupt */
258 if (!cpu_intr_p()) {
259 int bound = curlwp_bind();
260 IFNET_READER_FOREACH(ifp) {
261 struct psref psref;
262
263 if_acquire(ifp, &psref);
264
265 if (ifp->if_drain)
266 (*ifp->if_drain)(ifp);
267
268 if_release(ifp, &psref);
269 }
270 curlwp_bindx(bound);
271 }
272 splx(s);
273 mbstat.m_drain++;
274 KERNEL_UNLOCK_ONE(NULL);
275 }
276
277 /*
278 * sysctl helper routine for the kern.mbuf subtree.
279 * nmbclusters, mblowat and mcllowat need range
280 * checking and pool tweaking after being reset.
281 */
282 static int
283 sysctl_kern_mbuf(SYSCTLFN_ARGS)
284 {
285 int error, newval;
286 struct sysctlnode node;
287
288 node = *rnode;
289 node.sysctl_data = &newval;
290 switch (rnode->sysctl_num) {
291 case MBUF_NMBCLUSTERS:
292 case MBUF_MBLOWAT:
293 case MBUF_MCLLOWAT:
294 newval = *(int*)rnode->sysctl_data;
295 break;
296 default:
297 return EOPNOTSUPP;
298 }
299
300 error = sysctl_lookup(SYSCTLFN_CALL(&node));
301 if (error || newp == NULL)
302 return error;
303 if (newval < 0)
304 return EINVAL;
305
306 switch (node.sysctl_num) {
307 case MBUF_NMBCLUSTERS:
308 if (newval < nmbclusters)
309 return EINVAL;
310 if (newval > nmbclusters_limit())
311 return EINVAL;
312 nmbclusters = newval;
313 pool_cache_sethardlimit(mcl_cache, nmbclusters,
314 mclpool_warnmsg, 60);
315 break;
316 case MBUF_MBLOWAT:
317 mblowat = newval;
318 pool_cache_setlowat(mb_cache, mblowat);
319 break;
320 case MBUF_MCLLOWAT:
321 mcllowat = newval;
322 pool_cache_setlowat(mcl_cache, mcllowat);
323 break;
324 }
325
326 return 0;
327 }
328
329 #ifdef MBUFTRACE
330 static void
331 mowner_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci)
332 {
333 struct mowner_counter *mc = v1;
334 struct mowner_user *mo_user = v2;
335 int i;
336
337 for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) {
338 mo_user->mo_counter[i] += mc->mc_counter[i];
339 }
340 }
341
342 static void
343 mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user)
344 {
345
346 memset(mo_user, 0, sizeof(*mo_user));
347 CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name));
348 CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr));
349 memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name));
350 memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr));
351 percpu_foreach(mo->mo_counters, mowner_convert_to_user_cb, mo_user);
352 }
353
354 static int
355 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS)
356 {
357 struct mowner *mo;
358 size_t len = 0;
359 int error = 0;
360
361 if (namelen != 0)
362 return EINVAL;
363 if (newp != NULL)
364 return EPERM;
365
366 LIST_FOREACH(mo, &mowners, mo_link) {
367 struct mowner_user mo_user;
368
369 mowner_convert_to_user(mo, &mo_user);
370
371 if (oldp != NULL) {
372 if (*oldlenp - len < sizeof(mo_user)) {
373 error = ENOMEM;
374 break;
375 }
376 error = copyout(&mo_user, (char *)oldp + len,
377 sizeof(mo_user));
378 if (error)
379 break;
380 }
381 len += sizeof(mo_user);
382 }
383
384 if (error == 0)
385 *oldlenp = len;
386
387 return error;
388 }
389 #endif /* MBUFTRACE */
390
391 void
392 mbstat_type_add(int type, int diff)
393 {
394 struct mbstat_cpu *mb;
395 int s;
396
397 s = splvm();
398 mb = percpu_getref(mbstat_percpu);
399 mb->m_mtypes[type] += diff;
400 percpu_putref(mbstat_percpu);
401 splx(s);
402 }
403
404 static void
405 mbstat_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci)
406 {
407 struct mbstat_cpu *mbsc = v1;
408 struct mbstat *mbs = v2;
409 int i;
410
411 for (i = 0; i < __arraycount(mbs->m_mtypes); i++) {
412 mbs->m_mtypes[i] += mbsc->m_mtypes[i];
413 }
414 }
415
416 static void
417 mbstat_convert_to_user(struct mbstat *mbs)
418 {
419
420 memset(mbs, 0, sizeof(*mbs));
421 mbs->m_drain = mbstat.m_drain;
422 percpu_foreach(mbstat_percpu, mbstat_convert_to_user_cb, mbs);
423 }
424
425 static int
426 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS)
427 {
428 struct sysctlnode node;
429 struct mbstat mbs;
430
431 mbstat_convert_to_user(&mbs);
432 node = *rnode;
433 node.sysctl_data = &mbs;
434 node.sysctl_size = sizeof(mbs);
435 return sysctl_lookup(SYSCTLFN_CALL(&node));
436 }
437
438 static void
439 sysctl_kern_mbuf_setup(void)
440 {
441
442 KASSERT(mbuf_sysctllog == NULL);
443 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
444 CTLFLAG_PERMANENT,
445 CTLTYPE_NODE, "mbuf",
446 SYSCTL_DESCR("mbuf control variables"),
447 NULL, 0, NULL, 0,
448 CTL_KERN, KERN_MBUF, CTL_EOL);
449
450 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
451 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
452 CTLTYPE_INT, "msize",
453 SYSCTL_DESCR("mbuf base size"),
454 NULL, msize, NULL, 0,
455 CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL);
456 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
457 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
458 CTLTYPE_INT, "mclbytes",
459 SYSCTL_DESCR("mbuf cluster size"),
460 NULL, mclbytes, NULL, 0,
461 CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL);
462 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
463 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
464 CTLTYPE_INT, "nmbclusters",
465 SYSCTL_DESCR("Limit on the number of mbuf clusters"),
466 sysctl_kern_mbuf, 0, &nmbclusters, 0,
467 CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL);
468 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
469 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
470 CTLTYPE_INT, "mblowat",
471 SYSCTL_DESCR("mbuf low water mark"),
472 sysctl_kern_mbuf, 0, &mblowat, 0,
473 CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL);
474 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
475 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
476 CTLTYPE_INT, "mcllowat",
477 SYSCTL_DESCR("mbuf cluster low water mark"),
478 sysctl_kern_mbuf, 0, &mcllowat, 0,
479 CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL);
480 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
481 CTLFLAG_PERMANENT,
482 CTLTYPE_STRUCT, "stats",
483 SYSCTL_DESCR("mbuf allocation statistics"),
484 sysctl_kern_mbuf_stats, 0, NULL, 0,
485 CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL);
486 #ifdef MBUFTRACE
487 sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
488 CTLFLAG_PERMANENT,
489 CTLTYPE_STRUCT, "mowners",
490 SYSCTL_DESCR("Information about mbuf owners"),
491 sysctl_kern_mbuf_mowners, 0, NULL, 0,
492 CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL);
493 #endif
494 }
495
496 static int
497 mb_ctor(void *arg, void *object, int flags)
498 {
499 struct mbuf *m = object;
500
501 #ifdef POOL_VTOPHYS
502 m->m_paddr = POOL_VTOPHYS(m);
503 #else
504 m->m_paddr = M_PADDR_INVALID;
505 #endif
506 return 0;
507 }
508
509 /*
510 * Add mbuf to the end of a chain
511 */
512 struct mbuf *
513 m_add(struct mbuf *c, struct mbuf *m)
514 {
515 struct mbuf *n;
516
517 if (c == NULL)
518 return m;
519
520 for (n = c; n->m_next != NULL; n = n->m_next)
521 continue;
522 n->m_next = m;
523 return c;
524 }
525
526 struct mbuf *
527 m_get(int how, int type)
528 {
529 struct mbuf *m;
530
531 KASSERT(type != MT_FREE);
532
533 m = pool_cache_get(mb_cache,
534 how == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : PR_NOWAIT);
535 if (m == NULL)
536 return NULL;
537 KASSERT(((vaddr_t)m->m_dat & PAGE_MASK) + MLEN <= PAGE_SIZE);
538
539 mbstat_type_add(type, 1);
540
541 mowner_init(m, type);
542 m->m_ext_ref = m; /* default */
543 m->m_type = type;
544 m->m_len = 0;
545 m->m_next = NULL;
546 m->m_nextpkt = NULL; /* default */
547 m->m_data = m->m_dat;
548 m->m_flags = 0; /* default */
549
550 return m;
551 }
552
553 struct mbuf *
554 m_gethdr(int how, int type)
555 {
556 struct mbuf *m;
557
558 m = m_get(how, type);
559 if (m == NULL)
560 return NULL;
561
562 m->m_data = m->m_pktdat;
563 m->m_flags = M_PKTHDR;
564
565 m_reset_rcvif(m);
566 m->m_pkthdr.len = 0;
567 m->m_pkthdr.csum_flags = 0;
568 m->m_pkthdr.csum_data = 0;
569 m->m_pkthdr.segsz = 0;
570 m->m_pkthdr.ether_vtag = 0;
571 m->m_pkthdr.pkthdr_flags = 0;
572 SLIST_INIT(&m->m_pkthdr.tags);
573
574 m->m_pkthdr.pattr_class = NULL;
575 m->m_pkthdr.pattr_af = AF_UNSPEC;
576 m->m_pkthdr.pattr_hdr = NULL;
577
578 return m;
579 }
580
581 void
582 m_clget(struct mbuf *m, int how)
583 {
584 m->m_ext_storage.ext_buf = (char *)pool_cache_get_paddr(mcl_cache,
585 how == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : PR_NOWAIT,
586 &m->m_ext_storage.ext_paddr);
587
588 if (m->m_ext_storage.ext_buf == NULL)
589 return;
590
591 KASSERT(((vaddr_t)m->m_ext_storage.ext_buf & PAGE_MASK) + mclbytes
592 <= PAGE_SIZE);
593
594 MCLINITREFERENCE(m);
595 m->m_data = m->m_ext.ext_buf;
596 m->m_flags = (m->m_flags & ~M_EXTCOPYFLAGS) |
597 M_EXT|M_EXT_CLUSTER|M_EXT_RW;
598 m->m_ext.ext_size = MCLBYTES;
599 m->m_ext.ext_free = NULL;
600 m->m_ext.ext_arg = NULL;
601 /* ext_paddr initialized above */
602
603 mowner_ref(m, M_EXT|M_EXT_CLUSTER);
604 }
605
606 struct mbuf *
607 m_getcl(int how, int type, int flags)
608 {
609 struct mbuf *mp;
610
611 if ((flags & M_PKTHDR) != 0)
612 mp = m_gethdr(how, type);
613 else
614 mp = m_get(how, type);
615
616 if (mp == NULL)
617 return NULL;
618
619 MCLGET(mp, how);
620 if ((mp->m_flags & M_EXT) != 0)
621 return mp;
622
623 m_free(mp);
624 return NULL;
625 }
626
627 /*
628 * Utility function for M_PREPEND. Do *NOT* use it directly.
629 */
630 struct mbuf *
631 m_prepend(struct mbuf *m, int len, int how)
632 {
633 struct mbuf *mn;
634
635 if (__predict_false(len > MHLEN)) {
636 panic("%s: len > MHLEN", __func__);
637 }
638
639 KASSERT(len != M_COPYALL);
640 mn = m_get(how, m->m_type);
641 if (mn == NULL) {
642 m_freem(m);
643 return NULL;
644 }
645
646 if (m->m_flags & M_PKTHDR) {
647 m_move_pkthdr(mn, m);
648 } else {
649 MCLAIM(mn, m->m_owner);
650 }
651 mn->m_next = m;
652 m = mn;
653
654 if (m->m_flags & M_PKTHDR) {
655 if (len < MHLEN)
656 m_align(m, len);
657 } else {
658 if (len < MLEN)
659 m_align(m, len);
660 }
661
662 m->m_len = len;
663 return m;
664 }
665
666 struct mbuf *
667 m_copym(struct mbuf *m, int off, int len, int wait)
668 {
669 /* Shallow copy on M_EXT. */
670 return m_copy_internal(m, off, len, wait, false);
671 }
672
673 struct mbuf *
674 m_dup(struct mbuf *m, int off, int len, int wait)
675 {
676 /* Deep copy. */
677 return m_copy_internal(m, off, len, wait, true);
678 }
679
680 static inline int
681 m_copylen(int len, int copylen)
682 {
683 return (len == M_COPYALL) ? copylen : uimin(len, copylen);
684 }
685
686 static struct mbuf *
687 m_copy_internal(struct mbuf *m, int off0, int len, int wait, bool deep)
688 {
689 struct mbuf *n, **np;
690 int off = off0;
691 struct mbuf *top;
692 int copyhdr = 0;
693
694 if (off < 0 || (len != M_COPYALL && len < 0))
695 panic("%s: off %d, len %d", __func__, off, len);
696 if (off == 0 && m->m_flags & M_PKTHDR)
697 copyhdr = 1;
698 while (off > 0) {
699 if (m == NULL)
700 panic("%s: m == NULL, off %d", __func__, off);
701 if (off < m->m_len)
702 break;
703 off -= m->m_len;
704 m = m->m_next;
705 }
706
707 np = ⊤
708 top = NULL;
709 while (len == M_COPYALL || len > 0) {
710 if (m == NULL) {
711 if (len != M_COPYALL)
712 panic("%s: m == NULL, len %d [!COPYALL]",
713 __func__, len);
714 break;
715 }
716
717 n = m_get(wait, m->m_type);
718 *np = n;
719 if (n == NULL)
720 goto nospace;
721 MCLAIM(n, m->m_owner);
722
723 if (copyhdr) {
724 m_copy_pkthdr(n, m);
725 if (len == M_COPYALL)
726 n->m_pkthdr.len -= off0;
727 else
728 n->m_pkthdr.len = len;
729 copyhdr = 0;
730 }
731 n->m_len = m_copylen(len, m->m_len - off);
732
733 if (m->m_flags & M_EXT) {
734 if (!deep) {
735 n->m_data = m->m_data + off;
736 MCLADDREFERENCE(m, n);
737 } else {
738 /*
739 * We don't care if MCLGET fails. n->m_len is
740 * recomputed and handles that.
741 */
742 MCLGET(n, wait);
743 n->m_len = 0;
744 n->m_len = M_TRAILINGSPACE(n);
745 n->m_len = m_copylen(len, n->m_len);
746 n->m_len = uimin(n->m_len, m->m_len - off);
747 memcpy(mtod(n, void *), mtod(m, char *) + off,
748 (unsigned)n->m_len);
749 }
750 } else {
751 memcpy(mtod(n, void *), mtod(m, char *) + off,
752 (unsigned)n->m_len);
753 }
754
755 if (len != M_COPYALL)
756 len -= n->m_len;
757 off += n->m_len;
758
759 KASSERT(off <= m->m_len);
760
761 if (off == m->m_len) {
762 m = m->m_next;
763 off = 0;
764 }
765 np = &n->m_next;
766 }
767
768 return top;
769
770 nospace:
771 m_freem(top);
772 return NULL;
773 }
774
775 /*
776 * Copy an entire packet, including header (which must be present).
777 * An optimization of the common case 'm_copym(m, 0, M_COPYALL, how)'.
778 */
779 struct mbuf *
780 m_copypacket(struct mbuf *m, int how)
781 {
782 struct mbuf *top, *n, *o;
783
784 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) {
785 panic("%s: no header (m = %p)", __func__, m);
786 }
787
788 n = m_get(how, m->m_type);
789 top = n;
790 if (!n)
791 goto nospace;
792
793 MCLAIM(n, m->m_owner);
794 m_copy_pkthdr(n, m);
795 n->m_len = m->m_len;
796 if (m->m_flags & M_EXT) {
797 n->m_data = m->m_data;
798 MCLADDREFERENCE(m, n);
799 } else {
800 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
801 }
802
803 m = m->m_next;
804 while (m) {
805 o = m_get(how, m->m_type);
806 if (!o)
807 goto nospace;
808
809 MCLAIM(o, m->m_owner);
810 n->m_next = o;
811 n = n->m_next;
812
813 n->m_len = m->m_len;
814 if (m->m_flags & M_EXT) {
815 n->m_data = m->m_data;
816 MCLADDREFERENCE(m, n);
817 } else {
818 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
819 }
820
821 m = m->m_next;
822 }
823 return top;
824
825 nospace:
826 m_freem(top);
827 return NULL;
828 }
829
830 void
831 m_copydata(struct mbuf *m, int off, int len, void *cp)
832 {
833 unsigned int count;
834 struct mbuf *m0 = m;
835 int len0 = len;
836 int off0 = off;
837 void *cp0 = cp;
838
839 KASSERT(len != M_COPYALL);
840 if (off < 0 || len < 0)
841 panic("m_copydata: off %d, len %d", off, len);
842 while (off > 0) {
843 if (m == NULL)
844 panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)",
845 m0, len0, off0, cp0, off, off0 - off);
846 if (off < m->m_len)
847 break;
848 off -= m->m_len;
849 m = m->m_next;
850 }
851 while (len > 0) {
852 if (m == NULL)
853 panic("m_copydata(%p,%d,%d,%p): "
854 "m=NULL, off=%d (%d), len=%d (%d)",
855 m0, len0, off0, cp0,
856 off, off0 - off, len, len0 - len);
857 count = uimin(m->m_len - off, len);
858 memcpy(cp, mtod(m, char *) + off, count);
859 len -= count;
860 cp = (char *)cp + count;
861 off = 0;
862 m = m->m_next;
863 }
864 }
865
866 /*
867 * Concatenate mbuf chain n to m.
868 * n might be copied into m (when n->m_len is small), therefore data portion of
869 * n could be copied into an mbuf of different mbuf type.
870 * Any m_pkthdr is not updated.
871 */
872 void
873 m_cat(struct mbuf *m, struct mbuf *n)
874 {
875
876 while (m->m_next)
877 m = m->m_next;
878 while (n) {
879 if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) {
880 /* just join the two chains */
881 m->m_next = n;
882 return;
883 }
884 /* splat the data from one into the other */
885 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
886 (u_int)n->m_len);
887 m->m_len += n->m_len;
888 n = m_free(n);
889 }
890 }
891
892 void
893 m_adj(struct mbuf *mp, int req_len)
894 {
895 int len = req_len;
896 struct mbuf *m;
897 int count;
898
899 if ((m = mp) == NULL)
900 return;
901 if (len >= 0) {
902 /*
903 * Trim from head.
904 */
905 while (m != NULL && len > 0) {
906 if (m->m_len <= len) {
907 len -= m->m_len;
908 m->m_len = 0;
909 m = m->m_next;
910 } else {
911 m->m_len -= len;
912 m->m_data += len;
913 len = 0;
914 }
915 }
916 if (mp->m_flags & M_PKTHDR)
917 mp->m_pkthdr.len -= (req_len - len);
918 } else {
919 /*
920 * Trim from tail. Scan the mbuf chain,
921 * calculating its length and finding the last mbuf.
922 * If the adjustment only affects this mbuf, then just
923 * adjust and return. Otherwise, rescan and truncate
924 * after the remaining size.
925 */
926 len = -len;
927 count = 0;
928 for (;;) {
929 count += m->m_len;
930 if (m->m_next == NULL)
931 break;
932 m = m->m_next;
933 }
934 if (m->m_len >= len) {
935 m->m_len -= len;
936 if (mp->m_flags & M_PKTHDR)
937 mp->m_pkthdr.len -= len;
938 return;
939 }
940
941 count -= len;
942 if (count < 0)
943 count = 0;
944
945 /*
946 * Correct length for chain is "count".
947 * Find the mbuf with last data, adjust its length,
948 * and toss data from remaining mbufs on chain.
949 */
950 m = mp;
951 if (m->m_flags & M_PKTHDR)
952 m->m_pkthdr.len = count;
953 for (; m; m = m->m_next) {
954 if (m->m_len >= count) {
955 m->m_len = count;
956 break;
957 }
958 count -= m->m_len;
959 }
960 if (m) {
961 while (m->m_next)
962 (m = m->m_next)->m_len = 0;
963 }
964 }
965 }
966
967 /*
968 * m_ensure_contig: rearrange an mbuf chain that given length of bytes
969 * would be contiguous and in the data area of an mbuf (therefore, mtod()
970 * would work for a structure of given length).
971 *
972 * => On success, returns true and the resulting mbuf chain; false otherwise.
973 * => The mbuf chain may change, but is always preserved valid.
974 */
975 bool
976 m_ensure_contig(struct mbuf **m0, int len)
977 {
978 struct mbuf *n = *m0, *m;
979 size_t count, space;
980
981 KASSERT(len != M_COPYALL);
982 /*
983 * If first mbuf has no cluster, and has room for len bytes
984 * without shifting current data, pullup into it,
985 * otherwise allocate a new mbuf to prepend to the chain.
986 */
987 if ((n->m_flags & M_EXT) == 0 &&
988 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
989 if (n->m_len >= len) {
990 return true;
991 }
992 m = n;
993 n = n->m_next;
994 len -= m->m_len;
995 } else {
996 if (len > MHLEN) {
997 return false;
998 }
999 m = m_get(M_DONTWAIT, n->m_type);
1000 if (m == NULL) {
1001 return false;
1002 }
1003 MCLAIM(m, n->m_owner);
1004 if (n->m_flags & M_PKTHDR) {
1005 m_move_pkthdr(m, n);
1006 }
1007 }
1008 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1009 do {
1010 count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len);
1011 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
1012 (unsigned)count);
1013 len -= count;
1014 m->m_len += count;
1015 n->m_len -= count;
1016 space -= count;
1017 if (n->m_len)
1018 n->m_data += count;
1019 else
1020 n = m_free(n);
1021 } while (len > 0 && n);
1022
1023 m->m_next = n;
1024 *m0 = m;
1025
1026 return len <= 0;
1027 }
1028
1029 /*
1030 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error.
1031 */
1032 struct mbuf *
1033 m_pullup(struct mbuf *n, int len)
1034 {
1035 struct mbuf *m = n;
1036
1037 KASSERT(len != M_COPYALL);
1038 if (!m_ensure_contig(&m, len)) {
1039 KASSERT(m != NULL);
1040 m_freem(m);
1041 m = NULL;
1042 }
1043 return m;
1044 }
1045
1046 /*
1047 * ensure that [off, off + len) is contiguous on the mbuf chain "m".
1048 * packet chain before "off" is kept untouched.
1049 * if offp == NULL, the target will start at <retval, 0> on resulting chain.
1050 * if offp != NULL, the target will start at <retval, *offp> on resulting chain.
1051 *
1052 * on error return (NULL return value), original "m" will be freed.
1053 *
1054 * XXX M_TRAILINGSPACE/M_LEADINGSPACE on shared cluster (sharedcluster)
1055 */
1056 struct mbuf *
1057 m_pulldown(struct mbuf *m, int off, int len, int *offp)
1058 {
1059 struct mbuf *n, *o;
1060 int hlen, tlen, olen;
1061 int sharedcluster;
1062
1063 /* Check invalid arguments. */
1064 if (m == NULL)
1065 panic("%s: m == NULL", __func__);
1066 if (len > MCLBYTES) {
1067 m_freem(m);
1068 return NULL;
1069 }
1070
1071 n = m;
1072 while (n != NULL && off > 0) {
1073 if (n->m_len > off)
1074 break;
1075 off -= n->m_len;
1076 n = n->m_next;
1077 }
1078 /* Be sure to point non-empty mbuf. */
1079 while (n != NULL && n->m_len == 0)
1080 n = n->m_next;
1081 if (!n) {
1082 m_freem(m);
1083 return NULL; /* mbuf chain too short */
1084 }
1085
1086 sharedcluster = M_READONLY(n);
1087
1088 /*
1089 * The target data is on <n, off>. If we got enough data on the mbuf
1090 * "n", we're done.
1091 */
1092 #ifdef __NO_STRICT_ALIGNMENT
1093 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster)
1094 #else
1095 if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster &&
1096 ALIGNED_POINTER((mtod(n, char *) + off), uint32_t))
1097 #endif
1098 goto ok;
1099
1100 /*
1101 * When (len <= n->m_len - off) and (off != 0), it is a special case.
1102 * Len bytes from <n, off> sit in single mbuf, but the caller does
1103 * not like the starting position (off).
1104 *
1105 * Chop the current mbuf into two pieces, set off to 0.
1106 */
1107 if (len <= n->m_len - off) {
1108 struct mbuf *mlast;
1109
1110 o = m_dup(n, off, n->m_len - off, M_DONTWAIT);
1111 if (o == NULL) {
1112 m_freem(m);
1113 return NULL; /* ENOBUFS */
1114 }
1115 KASSERT(o->m_len >= len);
1116 for (mlast = o; mlast->m_next != NULL; mlast = mlast->m_next)
1117 ;
1118 n->m_len = off;
1119 mlast->m_next = n->m_next;
1120 n->m_next = o;
1121 n = o;
1122 off = 0;
1123 goto ok;
1124 }
1125
1126 /*
1127 * We need to take hlen from <n, off> and tlen from <n->m_next, 0>,
1128 * and construct contiguous mbuf with m_len == len.
1129 *
1130 * Note that hlen + tlen == len, and tlen > 0.
1131 */
1132 hlen = n->m_len - off;
1133 tlen = len - hlen;
1134
1135 /*
1136 * Ensure that we have enough trailing data on mbuf chain. If not,
1137 * we can do nothing about the chain.
1138 */
1139 olen = 0;
1140 for (o = n->m_next; o != NULL; o = o->m_next)
1141 olen += o->m_len;
1142 if (hlen + olen < len) {
1143 m_freem(m);
1144 return NULL; /* mbuf chain too short */
1145 }
1146
1147 /*
1148 * Easy cases first. We need to use m_copydata() to get data from
1149 * <n->m_next, 0>.
1150 */
1151 if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen &&
1152 !sharedcluster) {
1153 m_copydata(n->m_next, 0, tlen, mtod(n, char *) + n->m_len);
1154 n->m_len += tlen;
1155 m_adj(n->m_next, tlen);
1156 goto ok;
1157 }
1158 if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen &&
1159 #ifndef __NO_STRICT_ALIGNMENT
1160 ALIGNED_POINTER((n->m_next->m_data - hlen), uint32_t) &&
1161 #endif
1162 !sharedcluster && n->m_next->m_len >= tlen) {
1163 n->m_next->m_data -= hlen;
1164 n->m_next->m_len += hlen;
1165 memcpy(mtod(n->m_next, void *), mtod(n, char *) + off, hlen);
1166 n->m_len -= hlen;
1167 n = n->m_next;
1168 off = 0;
1169 goto ok;
1170 }
1171
1172 /*
1173 * Now, we need to do the hard way. Don't copy as there's no room
1174 * on both ends.
1175 */
1176 o = m_get(M_DONTWAIT, m->m_type);
1177 if (o && len > MLEN) {
1178 MCLGET(o, M_DONTWAIT);
1179 if ((o->m_flags & M_EXT) == 0) {
1180 m_free(o);
1181 o = NULL;
1182 }
1183 }
1184 if (!o) {
1185 m_freem(m);
1186 return NULL; /* ENOBUFS */
1187 }
1188 /* get hlen from <n, off> into <o, 0> */
1189 o->m_len = hlen;
1190 memcpy(mtod(o, void *), mtod(n, char *) + off, hlen);
1191 n->m_len -= hlen;
1192 /* get tlen from <n->m_next, 0> into <o, hlen> */
1193 m_copydata(n->m_next, 0, tlen, mtod(o, char *) + o->m_len);
1194 o->m_len += tlen;
1195 m_adj(n->m_next, tlen);
1196 o->m_next = n->m_next;
1197 n->m_next = o;
1198 n = o;
1199 off = 0;
1200
1201 ok:
1202 if (offp)
1203 *offp = off;
1204 return n;
1205 }
1206
1207 /*
1208 * Like m_pullup(), except a new mbuf is always allocated, and we allow
1209 * the amount of empty space before the data in the new mbuf to be specified
1210 * (in the event that the caller expects to prepend later).
1211 */
1212 struct mbuf *
1213 m_copyup(struct mbuf *n, int len, int dstoff)
1214 {
1215 struct mbuf *m;
1216 int count, space;
1217
1218 KASSERT(len != M_COPYALL);
1219 if (len > ((int)MHLEN - dstoff))
1220 goto bad;
1221 m = m_get(M_DONTWAIT, n->m_type);
1222 if (m == NULL)
1223 goto bad;
1224 MCLAIM(m, n->m_owner);
1225 if (n->m_flags & M_PKTHDR) {
1226 m_move_pkthdr(m, n);
1227 }
1228 m->m_data += dstoff;
1229 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1230 do {
1231 count = uimin(uimin(uimax(len, max_protohdr), space), n->m_len);
1232 memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
1233 (unsigned)count);
1234 len -= count;
1235 m->m_len += count;
1236 n->m_len -= count;
1237 space -= count;
1238 if (n->m_len)
1239 n->m_data += count;
1240 else
1241 n = m_free(n);
1242 } while (len > 0 && n);
1243 if (len > 0) {
1244 (void) m_free(m);
1245 goto bad;
1246 }
1247 m->m_next = n;
1248 return m;
1249 bad:
1250 m_freem(n);
1251 return NULL;
1252 }
1253
1254 struct mbuf *
1255 m_split(struct mbuf *m0, int len, int wait)
1256 {
1257 return m_split_internal(m0, len, wait, true);
1258 }
1259
1260 static struct mbuf *
1261 m_split_internal(struct mbuf *m0, int len0, int wait, bool copyhdr)
1262 {
1263 struct mbuf *m, *n;
1264 unsigned len = len0, remain, len_save;
1265
1266 KASSERT(len0 != M_COPYALL);
1267 for (m = m0; m && len > m->m_len; m = m->m_next)
1268 len -= m->m_len;
1269 if (m == NULL)
1270 return NULL;
1271
1272 remain = m->m_len - len;
1273 if (copyhdr && (m0->m_flags & M_PKTHDR)) {
1274 n = m_gethdr(wait, m0->m_type);
1275 if (n == NULL)
1276 return NULL;
1277
1278 MCLAIM(n, m0->m_owner);
1279 m_copy_rcvif(n, m0);
1280 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
1281 len_save = m0->m_pkthdr.len;
1282 m0->m_pkthdr.len = len0;
1283
1284 if (m->m_flags & M_EXT)
1285 goto extpacket;
1286
1287 if (remain > MHLEN) {
1288 /* m can't be the lead packet */
1289 m_align(n, 0);
1290 n->m_len = 0;
1291 n->m_next = m_split(m, len, wait);
1292 if (n->m_next == NULL) {
1293 (void)m_free(n);
1294 m0->m_pkthdr.len = len_save;
1295 return NULL;
1296 }
1297 return n;
1298 } else {
1299 m_align(n, remain);
1300 }
1301 } else if (remain == 0) {
1302 n = m->m_next;
1303 m->m_next = NULL;
1304 return n;
1305 } else {
1306 n = m_get(wait, m->m_type);
1307 if (n == NULL)
1308 return NULL;
1309 MCLAIM(n, m->m_owner);
1310 m_align(n, remain);
1311 }
1312
1313 extpacket:
1314 if (m->m_flags & M_EXT) {
1315 n->m_data = m->m_data + len;
1316 MCLADDREFERENCE(m, n);
1317 } else {
1318 memcpy(mtod(n, void *), mtod(m, char *) + len, remain);
1319 }
1320
1321 n->m_len = remain;
1322 m->m_len = len;
1323 n->m_next = m->m_next;
1324 m->m_next = NULL;
1325 return n;
1326 }
1327
1328 /*
1329 * Routine to copy from device local memory into mbufs.
1330 */
1331 struct mbuf *
1332 m_devget(char *buf, int totlen, int off, struct ifnet *ifp)
1333 {
1334 struct mbuf *m;
1335 struct mbuf *top = NULL, **mp = ⊤
1336 char *cp, *epkt;
1337 int len;
1338
1339 cp = buf;
1340 epkt = cp + totlen;
1341 if (off) {
1342 /*
1343 * If 'off' is non-zero, packet is trailer-encapsulated,
1344 * so we have to skip the type and length fields.
1345 */
1346 cp += off + 2 * sizeof(uint16_t);
1347 totlen -= 2 * sizeof(uint16_t);
1348 }
1349
1350 m = m_gethdr(M_DONTWAIT, MT_DATA);
1351 if (m == NULL)
1352 return NULL;
1353 m_set_rcvif(m, ifp);
1354 m->m_pkthdr.len = totlen;
1355 m->m_len = MHLEN;
1356
1357 while (totlen > 0) {
1358 if (top) {
1359 m = m_get(M_DONTWAIT, MT_DATA);
1360 if (m == NULL) {
1361 m_freem(top);
1362 return NULL;
1363 }
1364 m->m_len = MLEN;
1365 }
1366
1367 len = uimin(totlen, epkt - cp);
1368
1369 if (len >= MINCLSIZE) {
1370 MCLGET(m, M_DONTWAIT);
1371 if ((m->m_flags & M_EXT) == 0) {
1372 m_free(m);
1373 m_freem(top);
1374 return NULL;
1375 }
1376 m->m_len = len = uimin(len, MCLBYTES);
1377 } else {
1378 /*
1379 * Place initial small packet/header at end of mbuf.
1380 */
1381 if (len < m->m_len) {
1382 if (top == 0 && len + max_linkhdr <= m->m_len)
1383 m->m_data += max_linkhdr;
1384 m->m_len = len;
1385 } else
1386 len = m->m_len;
1387 }
1388
1389 memcpy(mtod(m, void *), cp, (size_t)len);
1390
1391 cp += len;
1392 *mp = m;
1393 mp = &m->m_next;
1394 totlen -= len;
1395 if (cp == epkt)
1396 cp = buf;
1397 }
1398
1399 return top;
1400 }
1401
1402 /*
1403 * Copy data from a buffer back into the indicated mbuf chain,
1404 * starting "off" bytes from the beginning, extending the mbuf
1405 * chain if necessary.
1406 */
1407 void
1408 m_copyback(struct mbuf *m0, int off, int len, const void *cp)
1409 {
1410 #if defined(DEBUG)
1411 struct mbuf *origm = m0;
1412 int error;
1413 #endif
1414
1415 if (m0 == NULL)
1416 return;
1417
1418 #if defined(DEBUG)
1419 error =
1420 #endif
1421 m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_EXTEND,
1422 M_DONTWAIT);
1423
1424 #if defined(DEBUG)
1425 if (error != 0 || (m0 != NULL && origm != m0))
1426 panic("m_copyback");
1427 #endif
1428 }
1429
1430 struct mbuf *
1431 m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how)
1432 {
1433 int error;
1434
1435 /* don't support chain expansion */
1436 KASSERT(len != M_COPYALL);
1437 KDASSERT(off + len <= m_length(m0));
1438
1439 error = m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_COW,
1440 how);
1441 if (error) {
1442 /*
1443 * no way to recover from partial success.
1444 * just free the chain.
1445 */
1446 m_freem(m0);
1447 return NULL;
1448 }
1449 return m0;
1450 }
1451
1452 int
1453 m_makewritable(struct mbuf **mp, int off, int len, int how)
1454 {
1455 int error;
1456 #if defined(DEBUG)
1457 int origlen = m_length(*mp);
1458 #endif
1459
1460 error = m_copyback_internal(mp, off, len, NULL, CB_PRESERVE|CB_COW,
1461 how);
1462 if (error)
1463 return error;
1464
1465 #if defined(DEBUG)
1466 int reslen = 0;
1467 for (struct mbuf *n = *mp; n; n = n->m_next)
1468 reslen += n->m_len;
1469 if (origlen != reslen)
1470 panic("m_makewritable: length changed");
1471 if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len)
1472 panic("m_makewritable: inconsist");
1473 #endif
1474
1475 return 0;
1476 }
1477
1478 static int
1479 m_copyback_internal(struct mbuf **mp0, int off, int len, const void *vp,
1480 int flags, int how)
1481 {
1482 int mlen;
1483 struct mbuf *m, *n;
1484 struct mbuf **mp;
1485 int totlen = 0;
1486 const char *cp = vp;
1487
1488 KASSERT(mp0 != NULL);
1489 KASSERT(*mp0 != NULL);
1490 KASSERT((flags & CB_PRESERVE) == 0 || cp == NULL);
1491 KASSERT((flags & CB_COPYBACK) == 0 || cp != NULL);
1492
1493 if (len == M_COPYALL)
1494 len = m_length(*mp0) - off;
1495
1496 /*
1497 * we don't bother to update "totlen" in the case of CB_COW,
1498 * assuming that CB_EXTEND and CB_COW are exclusive.
1499 */
1500
1501 KASSERT((~flags & (CB_EXTEND|CB_COW)) != 0);
1502
1503 mp = mp0;
1504 m = *mp;
1505 while (off > (mlen = m->m_len)) {
1506 off -= mlen;
1507 totlen += mlen;
1508 if (m->m_next == NULL) {
1509 int tspace;
1510 extend:
1511 if ((flags & CB_EXTEND) == 0)
1512 goto out;
1513
1514 /*
1515 * try to make some space at the end of "m".
1516 */
1517
1518 mlen = m->m_len;
1519 if (off + len >= MINCLSIZE &&
1520 (m->m_flags & M_EXT) == 0 && m->m_len == 0) {
1521 MCLGET(m, how);
1522 }
1523 tspace = M_TRAILINGSPACE(m);
1524 if (tspace > 0) {
1525 tspace = uimin(tspace, off + len);
1526 KASSERT(tspace > 0);
1527 memset(mtod(m, char *) + m->m_len, 0,
1528 uimin(off, tspace));
1529 m->m_len += tspace;
1530 off += mlen;
1531 totlen -= mlen;
1532 continue;
1533 }
1534
1535 /*
1536 * need to allocate an mbuf.
1537 */
1538
1539 if (off + len >= MINCLSIZE) {
1540 n = m_getcl(how, m->m_type, 0);
1541 } else {
1542 n = m_get(how, m->m_type);
1543 }
1544 if (n == NULL) {
1545 goto out;
1546 }
1547 n->m_len = uimin(M_TRAILINGSPACE(n), off + len);
1548 memset(mtod(n, char *), 0, uimin(n->m_len, off));
1549 m->m_next = n;
1550 }
1551 mp = &m->m_next;
1552 m = m->m_next;
1553 }
1554 while (len > 0) {
1555 mlen = m->m_len - off;
1556 if (mlen != 0 && M_READONLY(m)) {
1557 /*
1558 * This mbuf is read-only. Allocate a new writable
1559 * mbuf and try again.
1560 */
1561 char *datap;
1562 int eatlen;
1563
1564 KASSERT((flags & CB_COW) != 0);
1565
1566 /*
1567 * if we're going to write into the middle of
1568 * a mbuf, split it first.
1569 */
1570 if (off > 0) {
1571 n = m_split_internal(m, off, how, false);
1572 if (n == NULL)
1573 goto enobufs;
1574 m->m_next = n;
1575 mp = &m->m_next;
1576 m = n;
1577 off = 0;
1578 continue;
1579 }
1580
1581 /*
1582 * XXX TODO coalesce into the trailingspace of
1583 * the previous mbuf when possible.
1584 */
1585
1586 /*
1587 * allocate a new mbuf. copy packet header if needed.
1588 */
1589 n = m_get(how, m->m_type);
1590 if (n == NULL)
1591 goto enobufs;
1592 MCLAIM(n, m->m_owner);
1593 if (off == 0 && (m->m_flags & M_PKTHDR) != 0) {
1594 m_move_pkthdr(n, m);
1595 n->m_len = MHLEN;
1596 } else {
1597 if (len >= MINCLSIZE)
1598 MCLGET(n, M_DONTWAIT);
1599 n->m_len =
1600 (n->m_flags & M_EXT) ? MCLBYTES : MLEN;
1601 }
1602 if (n->m_len > len)
1603 n->m_len = len;
1604
1605 /*
1606 * free the region which has been overwritten.
1607 * copying data from old mbufs if requested.
1608 */
1609 if (flags & CB_PRESERVE)
1610 datap = mtod(n, char *);
1611 else
1612 datap = NULL;
1613 eatlen = n->m_len;
1614 while (m != NULL && M_READONLY(m) &&
1615 n->m_type == m->m_type && eatlen > 0) {
1616 mlen = uimin(eatlen, m->m_len);
1617 if (datap) {
1618 m_copydata(m, 0, mlen, datap);
1619 datap += mlen;
1620 }
1621 m->m_data += mlen;
1622 m->m_len -= mlen;
1623 eatlen -= mlen;
1624 if (m->m_len == 0)
1625 *mp = m = m_free(m);
1626 }
1627 if (eatlen > 0)
1628 n->m_len -= eatlen;
1629 n->m_next = m;
1630 *mp = m = n;
1631 continue;
1632 }
1633 mlen = uimin(mlen, len);
1634 if (flags & CB_COPYBACK) {
1635 memcpy(mtod(m, char *) + off, cp, (unsigned)mlen);
1636 cp += mlen;
1637 }
1638 len -= mlen;
1639 mlen += off;
1640 off = 0;
1641 totlen += mlen;
1642 if (len == 0)
1643 break;
1644 if (m->m_next == NULL) {
1645 goto extend;
1646 }
1647 mp = &m->m_next;
1648 m = m->m_next;
1649 }
1650
1651 out:
1652 if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) {
1653 KASSERT((flags & CB_EXTEND) != 0);
1654 m->m_pkthdr.len = totlen;
1655 }
1656
1657 return 0;
1658
1659 enobufs:
1660 return ENOBUFS;
1661 }
1662
1663 /*
1664 * Compress the mbuf chain. Return the new mbuf chain on success, NULL on
1665 * failure. The first mbuf is preserved, and on success the pointer returned
1666 * is the same as the one passed.
1667 */
1668 struct mbuf *
1669 m_defrag(struct mbuf *m, int how)
1670 {
1671 struct mbuf *m0, *mn, *n;
1672 int sz;
1673
1674 KASSERT((m->m_flags & M_PKTHDR) != 0);
1675
1676 if (m->m_next == NULL)
1677 return m;
1678
1679 /* Defrag to single mbuf if at all possible */
1680 if ((m->m_flags & M_EXT) == 0) {
1681 if (m->m_pkthdr.len <= MHLEN) {
1682 if (M_TRAILINGSPACE(m) < (m->m_pkthdr.len - m->m_len)) {
1683 KASSERT(M_LEADINGSPACE(m) >=
1684 (m->m_pkthdr.len - m->m_len));
1685 memmove(m->m_pktdat, m->m_data, m->m_len);
1686 m->m_data = m->m_pktdat;
1687 }
1688
1689 KASSERT(M_TRAILINGSPACE(m) >=
1690 (m->m_pkthdr.len - m->m_len));
1691 if (__predict_false(!m_ensure_contig(&m,
1692 m->m_pkthdr.len))) {
1693 panic("m_ensure_contig(%d) failed\n",
1694 m->m_pkthdr.len);
1695 }
1696 return m;
1697 } else if (m->m_pkthdr.len <= MCLBYTES) {
1698 void *odata = m->m_data;
1699
1700 MCLGET(m, how);
1701 if ((m->m_flags & M_EXT) == 0)
1702 return NULL;
1703 memcpy(m->m_data, odata, m->m_len);
1704 if (m_pulldown(m, m->m_len, m->m_pkthdr.len - m->m_len,
1705 NULL) == NULL) {
1706 panic("m_pulldown(%d, %d) failed\n",
1707 m->m_len, m->m_pkthdr.len - m->m_len);
1708 }
1709 return m;
1710 }
1711 }
1712
1713 m0 = m_get(how, MT_DATA);
1714 if (m0 == NULL)
1715 return NULL;
1716 mn = m0;
1717
1718 sz = m->m_pkthdr.len - m->m_len;
1719 KASSERT(sz >= 0);
1720
1721 do {
1722 if (sz > MLEN) {
1723 MCLGET(mn, how);
1724 if ((mn->m_flags & M_EXT) == 0) {
1725 m_freem(m0);
1726 return NULL;
1727 }
1728 }
1729
1730 mn->m_len = MIN(sz, MCLBYTES);
1731
1732 m_copydata(m, m->m_pkthdr.len - sz, mn->m_len,
1733 mtod(mn, void *));
1734
1735 sz -= mn->m_len;
1736
1737 if (sz > 0) {
1738 /* need more mbufs */
1739 n = m_get(how, MT_DATA);
1740 if (n == NULL) {
1741 m_freem(m0);
1742 return NULL;
1743 }
1744
1745 mn->m_next = n;
1746 mn = n;
1747 }
1748 } while (sz > 0);
1749
1750 m_freem(m->m_next);
1751 m->m_next = m0;
1752
1753 return m;
1754 }
1755
1756 void
1757 m_remove_pkthdr(struct mbuf *m)
1758 {
1759 KASSERT(m->m_flags & M_PKTHDR);
1760
1761 m_tag_delete_chain(m);
1762 m->m_flags &= ~M_PKTHDR;
1763 memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
1764 }
1765
1766 void
1767 m_copy_pkthdr(struct mbuf *to, struct mbuf *from)
1768 {
1769 KASSERT((to->m_flags & M_EXT) == 0);
1770 KASSERT((to->m_flags & M_PKTHDR) == 0 ||
1771 SLIST_FIRST(&to->m_pkthdr.tags) == NULL);
1772 KASSERT((from->m_flags & M_PKTHDR) != 0);
1773
1774 to->m_pkthdr = from->m_pkthdr;
1775 to->m_flags = from->m_flags & M_COPYFLAGS;
1776 to->m_data = to->m_pktdat;
1777
1778 SLIST_INIT(&to->m_pkthdr.tags);
1779 m_tag_copy_chain(to, from);
1780 }
1781
1782 void
1783 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
1784 {
1785 KASSERT((to->m_flags & M_EXT) == 0);
1786 KASSERT((to->m_flags & M_PKTHDR) == 0 ||
1787 SLIST_FIRST(&to->m_pkthdr.tags) == NULL);
1788 KASSERT((from->m_flags & M_PKTHDR) != 0);
1789
1790 to->m_pkthdr = from->m_pkthdr;
1791 to->m_flags = from->m_flags & M_COPYFLAGS;
1792 to->m_data = to->m_pktdat;
1793
1794 from->m_flags &= ~M_PKTHDR;
1795 }
1796
1797 /*
1798 * Set the m_data pointer of a newly-allocated mbuf to place an object of the
1799 * specified size at the end of the mbuf, longword aligned.
1800 */
1801 void
1802 m_align(struct mbuf *m, int len)
1803 {
1804 int buflen, adjust;
1805
1806 KASSERT(len != M_COPYALL);
1807 KASSERT(M_LEADINGSPACE(m) == 0);
1808
1809 buflen = M_BUFSIZE(m);
1810
1811 KASSERT(len <= buflen);
1812 adjust = buflen - len;
1813 m->m_data += adjust &~ (sizeof(long)-1);
1814 }
1815
1816 /*
1817 * Apply function f to the data in an mbuf chain starting "off" bytes from the
1818 * beginning, continuing for "len" bytes.
1819 */
1820 int
1821 m_apply(struct mbuf *m, int off, int len,
1822 int (*f)(void *, void *, unsigned int), void *arg)
1823 {
1824 unsigned int count;
1825 int rval;
1826
1827 KASSERT(len != M_COPYALL);
1828 KASSERT(len >= 0);
1829 KASSERT(off >= 0);
1830
1831 while (off > 0) {
1832 KASSERT(m != NULL);
1833 if (off < m->m_len)
1834 break;
1835 off -= m->m_len;
1836 m = m->m_next;
1837 }
1838 while (len > 0) {
1839 KASSERT(m != NULL);
1840 count = uimin(m->m_len - off, len);
1841
1842 rval = (*f)(arg, mtod(m, char *) + off, count);
1843 if (rval)
1844 return rval;
1845
1846 len -= count;
1847 off = 0;
1848 m = m->m_next;
1849 }
1850
1851 return 0;
1852 }
1853
1854 /*
1855 * Return a pointer to mbuf/offset of location in mbuf chain.
1856 */
1857 struct mbuf *
1858 m_getptr(struct mbuf *m, int loc, int *off)
1859 {
1860
1861 while (loc >= 0) {
1862 /* Normal end of search */
1863 if (m->m_len > loc) {
1864 *off = loc;
1865 return m;
1866 }
1867
1868 loc -= m->m_len;
1869
1870 if (m->m_next == NULL) {
1871 if (loc == 0) {
1872 /* Point at the end of valid data */
1873 *off = m->m_len;
1874 return m;
1875 }
1876 return NULL;
1877 } else {
1878 m = m->m_next;
1879 }
1880 }
1881
1882 return NULL;
1883 }
1884
1885 /*
1886 * Release a reference to the mbuf external storage.
1887 *
1888 * => free the mbuf m itself as well.
1889 */
1890 static void
1891 m_ext_free(struct mbuf *m)
1892 {
1893 const bool embedded = MEXT_ISEMBEDDED(m);
1894 bool dofree = true;
1895 u_int refcnt;
1896
1897 KASSERT((m->m_flags & M_EXT) != 0);
1898 KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref));
1899 KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0);
1900 KASSERT((m->m_flags & M_EXT_CLUSTER) ==
1901 (m->m_ext_ref->m_flags & M_EXT_CLUSTER));
1902
1903 if (__predict_false(m->m_type == MT_FREE)) {
1904 panic("mbuf %p already freed", m);
1905 }
1906
1907 if (__predict_true(m->m_ext.ext_refcnt == 1)) {
1908 refcnt = m->m_ext.ext_refcnt = 0;
1909 } else {
1910 refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt);
1911 }
1912
1913 if (refcnt > 0) {
1914 if (embedded) {
1915 /*
1916 * other mbuf's m_ext_ref still points to us.
1917 */
1918 dofree = false;
1919 } else {
1920 m->m_ext_ref = m;
1921 }
1922 } else {
1923 /*
1924 * dropping the last reference
1925 */
1926 if (!embedded) {
1927 m->m_ext.ext_refcnt++; /* XXX */
1928 m_ext_free(m->m_ext_ref);
1929 m->m_ext_ref = m;
1930 } else if ((m->m_flags & M_EXT_CLUSTER) != 0) {
1931 pool_cache_put_paddr(mcl_cache,
1932 m->m_ext.ext_buf, m->m_ext.ext_paddr);
1933 } else if (m->m_ext.ext_free) {
1934 (*m->m_ext.ext_free)(m,
1935 m->m_ext.ext_buf, m->m_ext.ext_size,
1936 m->m_ext.ext_arg);
1937 /*
1938 * 'm' is already freed by the ext_free callback.
1939 */
1940 dofree = false;
1941 } else {
1942 free(m->m_ext.ext_buf, 0);
1943 }
1944 }
1945
1946 if (dofree) {
1947 m->m_type = MT_FREE;
1948 m->m_data = NULL;
1949 pool_cache_put(mb_cache, m);
1950 }
1951 }
1952
1953 /*
1954 * Free a single mbuf and associated external storage. Return the
1955 * successor, if any.
1956 */
1957 struct mbuf *
1958 m_free(struct mbuf *m)
1959 {
1960 struct mbuf *n;
1961
1962 mowner_revoke(m, 1, m->m_flags);
1963 mbstat_type_add(m->m_type, -1);
1964
1965 if (m->m_flags & M_PKTHDR)
1966 m_tag_delete_chain(m);
1967
1968 n = m->m_next;
1969
1970 if (m->m_flags & M_EXT) {
1971 m_ext_free(m);
1972 } else {
1973 if (__predict_false(m->m_type == MT_FREE)) {
1974 panic("mbuf %p already freed", m);
1975 }
1976 m->m_type = MT_FREE;
1977 m->m_data = NULL;
1978 pool_cache_put(mb_cache, m);
1979 }
1980
1981 return n;
1982 }
1983
1984 void
1985 m_freem(struct mbuf *m)
1986 {
1987 if (m == NULL)
1988 return;
1989 do {
1990 m = m_free(m);
1991 } while (m);
1992 }
1993
1994 #if defined(DDB)
1995 void
1996 m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...))
1997 {
1998 char ch;
1999 bool opt_c = false;
2000 bool opt_d = false;
2001 #if NETHER > 0
2002 bool opt_v = false;
2003 const struct mbuf *m0 = NULL;
2004 #endif
2005 int no = 0;
2006 char buf[512];
2007
2008 while ((ch = *(modif++)) != '\0') {
2009 switch (ch) {
2010 case 'c':
2011 opt_c = true;
2012 break;
2013 case 'd':
2014 opt_d = true;
2015 break;
2016 #if NETHER > 0
2017 case 'v':
2018 opt_v = true;
2019 m0 = m;
2020 break;
2021 #endif
2022 default:
2023 break;
2024 }
2025 }
2026
2027 nextchain:
2028 (*pr)("MBUF(%d) %p\n", no, m);
2029 snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags);
2030 (*pr)(" data=%p, len=%d, type=%d, flags=%s\n",
2031 m->m_data, m->m_len, m->m_type, buf);
2032 if (opt_d) {
2033 int i;
2034 unsigned char *p = m->m_data;
2035
2036 (*pr)(" data:");
2037
2038 for (i = 0; i < m->m_len; i++) {
2039 if (i % 16 == 0)
2040 (*pr)("\n");
2041 (*pr)(" %02x", p[i]);
2042 }
2043
2044 (*pr)("\n");
2045 }
2046 (*pr)(" owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next,
2047 m->m_nextpkt);
2048 (*pr)(" leadingspace=%u, trailingspace=%u, readonly=%u\n",
2049 (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m),
2050 (int)M_READONLY(m));
2051 if ((m->m_flags & M_PKTHDR) != 0) {
2052 snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags);
2053 (*pr)(" pktlen=%d, rcvif=%p, csum_flags=%s, csum_data=0x%"
2054 PRIx32 ", segsz=%u\n",
2055 m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m),
2056 buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz);
2057 }
2058 if ((m->m_flags & M_EXT)) {
2059 (*pr)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, "
2060 "ext_free=%p, ext_arg=%p\n",
2061 m->m_ext.ext_refcnt,
2062 m->m_ext.ext_buf, m->m_ext.ext_size,
2063 m->m_ext.ext_free, m->m_ext.ext_arg);
2064 }
2065 if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) {
2066 vaddr_t sva = (vaddr_t)m->m_ext.ext_buf;
2067 vaddr_t eva = sva + m->m_ext.ext_size;
2068 int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT;
2069 int i;
2070
2071 (*pr)(" pages:");
2072 for (i = 0; i < n; i ++) {
2073 (*pr)(" %p", m->m_ext.ext_pgs[i]);
2074 }
2075 (*pr)("\n");
2076 }
2077
2078 if (opt_c) {
2079 m = m->m_next;
2080 if (m != NULL) {
2081 no++;
2082 goto nextchain;
2083 }
2084 }
2085
2086 #if NETHER > 0
2087 if (opt_v && m0)
2088 m_examine(m0, AF_ETHER, modif, pr);
2089 #endif
2090 }
2091 #endif /* defined(DDB) */
2092
2093 #if defined(MBUFTRACE)
2094 void
2095 mowner_init_owner(struct mowner *mo, const char *name, const char *descr)
2096 {
2097 memset(mo, 0, sizeof(*mo));
2098 strlcpy(mo->mo_name, name, sizeof(mo->mo_name));
2099 strlcpy(mo->mo_descr, descr, sizeof(mo->mo_descr));
2100 }
2101
2102 void
2103 mowner_attach(struct mowner *mo)
2104 {
2105
2106 KASSERT(mo->mo_counters == NULL);
2107 mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter));
2108
2109 /* XXX lock */
2110 LIST_INSERT_HEAD(&mowners, mo, mo_link);
2111 }
2112
2113 void
2114 mowner_detach(struct mowner *mo)
2115 {
2116
2117 KASSERT(mo->mo_counters != NULL);
2118
2119 /* XXX lock */
2120 LIST_REMOVE(mo, mo_link);
2121
2122 percpu_free(mo->mo_counters, sizeof(struct mowner_counter));
2123 mo->mo_counters = NULL;
2124 }
2125
2126 void
2127 mowner_init(struct mbuf *m, int type)
2128 {
2129 struct mowner_counter *mc;
2130 struct mowner *mo;
2131 int s;
2132
2133 m->m_owner = mo = &unknown_mowners[type];
2134 s = splvm();
2135 mc = percpu_getref(mo->mo_counters);
2136 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++;
2137 percpu_putref(mo->mo_counters);
2138 splx(s);
2139 }
2140
2141 void
2142 mowner_ref(struct mbuf *m, int flags)
2143 {
2144 struct mowner *mo = m->m_owner;
2145 struct mowner_counter *mc;
2146 int s;
2147
2148 s = splvm();
2149 mc = percpu_getref(mo->mo_counters);
2150 if ((flags & M_EXT) != 0)
2151 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++;
2152 if ((flags & M_EXT_CLUSTER) != 0)
2153 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++;
2154 percpu_putref(mo->mo_counters);
2155 splx(s);
2156 }
2157
2158 void
2159 mowner_revoke(struct mbuf *m, bool all, int flags)
2160 {
2161 struct mowner *mo = m->m_owner;
2162 struct mowner_counter *mc;
2163 int s;
2164
2165 s = splvm();
2166 mc = percpu_getref(mo->mo_counters);
2167 if ((flags & M_EXT) != 0)
2168 mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++;
2169 if ((flags & M_EXT_CLUSTER) != 0)
2170 mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++;
2171 if (all)
2172 mc->mc_counter[MOWNER_COUNTER_RELEASES]++;
2173 percpu_putref(mo->mo_counters);
2174 splx(s);
2175 if (all)
2176 m->m_owner = &revoked_mowner;
2177 }
2178
2179 static void
2180 mowner_claim(struct mbuf *m, struct mowner *mo)
2181 {
2182 struct mowner_counter *mc;
2183 int flags = m->m_flags;
2184 int s;
2185
2186 s = splvm();
2187 mc = percpu_getref(mo->mo_counters);
2188 mc->mc_counter[MOWNER_COUNTER_CLAIMS]++;
2189 if ((flags & M_EXT) != 0)
2190 mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++;
2191 if ((flags & M_EXT_CLUSTER) != 0)
2192 mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++;
2193 percpu_putref(mo->mo_counters);
2194 splx(s);
2195 m->m_owner = mo;
2196 }
2197
2198 void
2199 m_claim(struct mbuf *m, struct mowner *mo)
2200 {
2201
2202 if (m->m_owner == mo || mo == NULL)
2203 return;
2204
2205 mowner_revoke(m, true, m->m_flags);
2206 mowner_claim(m, mo);
2207 }
2208
2209 void
2210 m_claimm(struct mbuf *m, struct mowner *mo)
2211 {
2212
2213 for (; m != NULL; m = m->m_next)
2214 m_claim(m, mo);
2215 }
2216 #endif /* defined(MBUFTRACE) */
2217
2218 #ifdef DIAGNOSTIC
2219 /*
2220 * Verify that the mbuf chain is not malformed. Used only for diagnostic.
2221 * Panics on error.
2222 */
2223 void
2224 m_verify_packet(struct mbuf *m)
2225 {
2226 struct mbuf *n = m;
2227 char *low, *high, *dat;
2228 int totlen = 0, len;
2229
2230 if (__predict_false((m->m_flags & M_PKTHDR) == 0)) {
2231 panic("%s: mbuf doesn't have M_PKTHDR", __func__);
2232 }
2233
2234 while (n != NULL) {
2235 if (__predict_false(n->m_type == MT_FREE)) {
2236 panic("%s: mbuf already freed (n = %p)", __func__, n);
2237 }
2238 #if 0
2239 /*
2240 * This ought to be a rule of the mbuf API. Unfortunately,
2241 * many places don't respect that rule.
2242 */
2243 if (__predict_false((n != m) && (n->m_flags & M_PKTHDR) != 0)) {
2244 panic("%s: M_PKTHDR set on secondary mbuf", __func__);
2245 }
2246 #endif
2247 if (__predict_false(n->m_nextpkt != NULL)) {
2248 panic("%s: m_nextpkt not null (m_nextpkt = %p)",
2249 __func__, n->m_nextpkt);
2250 }
2251
2252 dat = n->m_data;
2253 len = n->m_len;
2254 if (__predict_false(len < 0)) {
2255 panic("%s: incorrect length (len = %d)", __func__, len);
2256 }
2257
2258 low = M_BUFADDR(n);
2259 high = low + M_BUFSIZE(n);
2260 if (__predict_false((dat < low) || (dat + len > high))) {
2261 panic("%s: m_data not in packet"
2262 "(dat = %p, len = %d, low = %p, high = %p)",
2263 __func__, dat, len, low, high);
2264 }
2265
2266 totlen += len;
2267 n = n->m_next;
2268 }
2269
2270 if (__predict_false(totlen != m->m_pkthdr.len)) {
2271 panic("%s: inconsistent mbuf length (%d != %d)", __func__,
2272 totlen, m->m_pkthdr.len);
2273 }
2274 }
2275 #endif
2276
2277 struct m_tag *
2278 m_tag_get(int type, int len, int wait)
2279 {
2280 struct m_tag *t;
2281
2282 if (len < 0)
2283 return NULL;
2284 t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait);
2285 if (t == NULL)
2286 return NULL;
2287 t->m_tag_id = type;
2288 t->m_tag_len = len;
2289 return t;
2290 }
2291
2292 void
2293 m_tag_free(struct m_tag *t)
2294 {
2295 free(t, M_PACKET_TAGS);
2296 }
2297
2298 void
2299 m_tag_prepend(struct mbuf *m, struct m_tag *t)
2300 {
2301 KASSERT((m->m_flags & M_PKTHDR) != 0);
2302 SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link);
2303 }
2304
2305 void
2306 m_tag_unlink(struct mbuf *m, struct m_tag *t)
2307 {
2308 KASSERT((m->m_flags & M_PKTHDR) != 0);
2309 SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link);
2310 }
2311
2312 void
2313 m_tag_delete(struct mbuf *m, struct m_tag *t)
2314 {
2315 m_tag_unlink(m, t);
2316 m_tag_free(t);
2317 }
2318
2319 void
2320 m_tag_delete_chain(struct mbuf *m)
2321 {
2322 struct m_tag *p, *q;
2323
2324 KASSERT((m->m_flags & M_PKTHDR) != 0);
2325
2326 p = SLIST_FIRST(&m->m_pkthdr.tags);
2327 if (p == NULL)
2328 return;
2329 while ((q = SLIST_NEXT(p, m_tag_link)) != NULL)
2330 m_tag_delete(m, q);
2331 m_tag_delete(m, p);
2332 }
2333
2334 struct m_tag *
2335 m_tag_find(const struct mbuf *m, int type)
2336 {
2337 struct m_tag *p;
2338
2339 KASSERT((m->m_flags & M_PKTHDR) != 0);
2340
2341 p = SLIST_FIRST(&m->m_pkthdr.tags);
2342 while (p != NULL) {
2343 if (p->m_tag_id == type)
2344 return p;
2345 p = SLIST_NEXT(p, m_tag_link);
2346 }
2347 return NULL;
2348 }
2349
2350 struct m_tag *
2351 m_tag_copy(struct m_tag *t)
2352 {
2353 struct m_tag *p;
2354
2355 p = m_tag_get(t->m_tag_id, t->m_tag_len, M_NOWAIT);
2356 if (p == NULL)
2357 return NULL;
2358 memcpy(p + 1, t + 1, t->m_tag_len);
2359 return p;
2360 }
2361
2362 /*
2363 * Copy two tag chains. The destination mbuf (to) loses any attached
2364 * tags even if the operation fails. This should not be a problem, as
2365 * m_tag_copy_chain() is typically called with a newly-allocated
2366 * destination mbuf.
2367 */
2368 int
2369 m_tag_copy_chain(struct mbuf *to, struct mbuf *from)
2370 {
2371 struct m_tag *p, *t, *tprev = NULL;
2372
2373 KASSERT((from->m_flags & M_PKTHDR) != 0);
2374
2375 m_tag_delete_chain(to);
2376 SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) {
2377 t = m_tag_copy(p);
2378 if (t == NULL) {
2379 m_tag_delete_chain(to);
2380 return 0;
2381 }
2382 if (tprev == NULL)
2383 SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link);
2384 else
2385 SLIST_INSERT_AFTER(tprev, t, m_tag_link);
2386 tprev = t;
2387 }
2388 return 1;
2389 }
2390