1 /* $NetBSD: mbuf.h,v 1.240 2024/05/12 10:34:56 rillig Exp $ */ 2 3 /* 4 * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center and Matt Thomas of 3am Software Foundry. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)mbuf.h 8.5 (Berkeley) 2/19/95 62 */ 63 64 #ifndef _SYS_MBUF_H_ 65 #define _SYS_MBUF_H_ 66 67 #ifdef _KERNEL_OPT 68 #include "opt_mbuftrace.h" 69 #endif 70 71 #ifndef M_WAITOK 72 #include <sys/malloc.h> 73 #endif 74 #include <sys/pool.h> 75 #include <sys/queue.h> 76 #if defined(_KERNEL) 77 #include <sys/percpu_types.h> 78 #include <sys/socket.h> /* for AF_UNSPEC */ 79 #include <sys/psref.h> 80 #endif /* defined(_KERNEL) */ 81 82 /* For offsetof() */ 83 #if defined(_KERNEL) || defined(_STANDALONE) 84 #include <sys/systm.h> 85 #else 86 #include <stddef.h> 87 #endif 88 89 #include <uvm/uvm_param.h> /* for MIN_PAGE_SIZE */ 90 91 #include <net/if.h> 92 93 /* 94 * Mbufs are of a single size, MSIZE (machine/param.h), which 95 * includes overhead. An mbuf may add a single "mbuf cluster" of size 96 * MCLBYTES (also in machine/param.h), which has no additional overhead 97 * and is used instead of the internal data area; this is done when 98 * at least MINCLSIZE of data must be stored. 99 */ 100 101 /* Packet tags structure */ 102 struct m_tag { 103 SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ 104 uint16_t m_tag_id; /* Tag ID */ 105 uint16_t m_tag_len; /* Length of data */ 106 }; 107 108 /* mbuf ownership structure */ 109 struct mowner { 110 char mo_name[16]; /* owner name (fxp0) */ 111 char mo_descr[16]; /* owner description (input) */ 112 LIST_ENTRY(mowner) mo_link; /* */ 113 struct percpu *mo_counters; 114 }; 115 116 #define MOWNER_INIT(x, y) { .mo_name = x, .mo_descr = y } 117 118 enum mowner_counter_index { 119 MOWNER_COUNTER_CLAIMS, /* # of small mbuf claimed */ 120 MOWNER_COUNTER_RELEASES, /* # of small mbuf released */ 121 MOWNER_COUNTER_CLUSTER_CLAIMS, /* # of cluster mbuf claimed */ 122 MOWNER_COUNTER_CLUSTER_RELEASES,/* # of cluster mbuf released */ 123 MOWNER_COUNTER_EXT_CLAIMS, /* # of M_EXT mbuf claimed */ 124 MOWNER_COUNTER_EXT_RELEASES, /* # of M_EXT mbuf released */ 125 126 MOWNER_COUNTER_NCOUNTERS, 127 }; 128 129 #if defined(_KERNEL) 130 struct mowner_counter { 131 u_long mc_counter[MOWNER_COUNTER_NCOUNTERS]; 132 }; 133 #endif 134 135 /* userland-exported version of struct mowner */ 136 struct mowner_user { 137 char mo_name[16]; /* owner name (fxp0) */ 138 char mo_descr[16]; /* owner description (input) */ 139 LIST_ENTRY(mowner) mo_link; /* unused padding; for compatibility */ 140 u_long mo_counter[MOWNER_COUNTER_NCOUNTERS]; /* counters */ 141 }; 142 143 /* 144 * Macros for type conversion 145 * mtod(m,t) - convert mbuf pointer to data pointer of correct type 146 */ 147 #define mtod(m, t) ((t)((m)->m_data)) 148 149 /* header at beginning of each mbuf */ 150 struct m_hdr { 151 struct mbuf *mh_next; /* next buffer in chain */ 152 struct mbuf *mh_nextpkt; /* next chain in queue/record */ 153 char *mh_data; /* location of data */ 154 struct mowner *mh_owner; /* mbuf owner */ 155 int mh_len; /* amount of data in this mbuf */ 156 int mh_flags; /* flags; see below */ 157 paddr_t mh_paddr; /* physical address of mbuf */ 158 short mh_type; /* type of data in this mbuf */ 159 }; 160 161 /* 162 * record/packet header in first mbuf of chain; valid if M_PKTHDR set 163 * 164 * A note about csum_data: 165 * 166 * o For the out-bound direction, the low 16 bits indicates the offset after 167 * the L4 header where the final L4 checksum value is to be stored and the 168 * high 16 bits is the length of the L3 header (the start of the data to 169 * be checksummed). 170 * 171 * o For the in-bound direction, it is only valid if the M_CSUM_DATA flag is 172 * set. In this case, an L4 checksum has been calculated by hardware and 173 * is stored in csum_data, but it is up to software to perform final 174 * verification. 175 * 176 * Note for in-bound TCP/UDP checksums: we expect the csum_data to NOT 177 * be bit-wise inverted (the final step in the calculation of an IP 178 * checksum) -- this is so we can accumulate the checksum for fragmented 179 * packets during reassembly. 180 * 181 * Size ILP32: 40 182 * LP64: 56 183 */ 184 struct pkthdr { 185 union { 186 void *ctx; /* for M_GETCTX/M_SETCTX */ 187 if_index_t index; /* rcv interface index */ 188 } _rcvif; 189 #define rcvif_index _rcvif.index 190 SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ 191 int len; /* total packet length */ 192 int csum_flags; /* checksum flags */ 193 uint32_t csum_data; /* checksum data */ 194 u_int segsz; /* segment size */ 195 uint16_t ether_vtag; /* ethernet 802.1p+q vlan tag */ 196 uint16_t pkthdr_flags; /* flags for pkthdr, see blow */ 197 #define PKTHDR_FLAG_IPSEC_SKIP_PFIL 0x0001 /* skip pfil_run_hooks() after ipsec decrypt */ 198 199 /* 200 * Following three fields are open-coded struct altq_pktattr 201 * to rearrange struct pkthdr fields flexibly. 202 */ 203 int pattr_af; /* ALTQ: address family */ 204 void *pattr_class; /* ALTQ: sched class set by classifier */ 205 void *pattr_hdr; /* ALTQ: saved header position in mbuf */ 206 }; 207 208 /* Checksumming flags (csum_flags). */ 209 #define M_CSUM_TCPv4 0x00000001 /* TCP header/payload */ 210 #define M_CSUM_UDPv4 0x00000002 /* UDP header/payload */ 211 #define M_CSUM_TCP_UDP_BAD 0x00000004 /* TCP/UDP checksum bad */ 212 #define M_CSUM_DATA 0x00000008 /* consult csum_data */ 213 #define M_CSUM_TCPv6 0x00000010 /* IPv6 TCP header/payload */ 214 #define M_CSUM_UDPv6 0x00000020 /* IPv6 UDP header/payload */ 215 #define M_CSUM_IPv4 0x00000040 /* IPv4 header */ 216 #define M_CSUM_IPv4_BAD 0x00000080 /* IPv4 header checksum bad */ 217 #define M_CSUM_TSOv4 0x00000100 /* TCPv4 segmentation offload */ 218 #define M_CSUM_TSOv6 0x00000200 /* TCPv6 segmentation offload */ 219 220 /* Checksum-assist quirks: keep separate from jump-table bits. */ 221 #define M_CSUM_BLANK 0x40000000 /* csum is missing */ 222 #define M_CSUM_NO_PSEUDOHDR 0x80000000 /* Rx csum_data does not include 223 * the UDP/TCP pseudo-hdr, and 224 * is not yet 1s-complemented. 225 */ 226 227 #define M_CSUM_BITS \ 228 "\20\1TCPv4\2UDPv4\3TCP_UDP_BAD\4DATA\5TCPv6\6UDPv6\7IPv4\10IPv4_BAD" \ 229 "\11TSOv4\12TSOv6\37BLANK\40NO_PSEUDOHDR" 230 231 /* 232 * Macros for manipulating csum_data on outgoing packets. These are 233 * used to pass information down from the L4/L3 to the L2. 234 * 235 * _IPHL: Length of the IPv{4/6} header, plus the options; in other 236 * words the offset of the UDP/TCP header in the packet. 237 * _OFFSET: Offset of the checksum field in the UDP/TCP header. 238 */ 239 #define M_CSUM_DATA_IPv4_IPHL(x) ((x) >> 16) 240 #define M_CSUM_DATA_IPv4_OFFSET(x) ((x) & 0xffff) 241 #define M_CSUM_DATA_IPv6_IPHL(x) ((x) >> 16) 242 #define M_CSUM_DATA_IPv6_OFFSET(x) ((x) & 0xffff) 243 #define M_CSUM_DATA_IPv6_SET(x, v) (x) = ((x) & 0xffff) | ((v) << 16) 244 245 /* 246 * Max # of pages we can attach to m_ext. This is carefully chosen 247 * to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page. 248 */ 249 #ifdef MIN_PAGE_SIZE 250 #define M_EXT_MAXPAGES ((65536 / MIN_PAGE_SIZE) + 1) 251 #endif 252 253 /* 254 * Description of external storage mapped into mbuf, valid if M_EXT set. 255 */ 256 struct _m_ext_storage { 257 unsigned int ext_refcnt; 258 char *ext_buf; /* start of buffer */ 259 void (*ext_free) /* free routine if not the usual */ 260 (struct mbuf *, void *, size_t, void *); 261 void *ext_arg; /* argument for ext_free */ 262 size_t ext_size; /* size of buffer, for ext_free */ 263 264 union { 265 /* M_EXT_CLUSTER: physical address */ 266 paddr_t extun_paddr; 267 #ifdef M_EXT_MAXPAGES 268 /* M_EXT_PAGES: pages */ 269 struct vm_page *extun_pgs[M_EXT_MAXPAGES]; 270 #endif 271 } ext_un; 272 #define ext_paddr ext_un.extun_paddr 273 #define ext_pgs ext_un.extun_pgs 274 }; 275 276 struct _m_ext { 277 struct mbuf *ext_ref; 278 struct _m_ext_storage ext_storage; 279 }; 280 281 #define M_PADDR_INVALID POOL_PADDR_INVALID 282 283 /* 284 * Definition of "struct mbuf". 285 * Don't change this without understanding how MHLEN/MLEN are defined. 286 */ 287 #define MBUF_DEFINE(name, mhlen, mlen) \ 288 struct name { \ 289 struct m_hdr m_hdr; \ 290 union { \ 291 struct { \ 292 struct pkthdr MH_pkthdr; \ 293 union { \ 294 struct _m_ext MH_ext; \ 295 char MH_databuf[(mhlen)]; \ 296 } MH_dat; \ 297 } MH; \ 298 char M_databuf[(mlen)]; \ 299 } M_dat; \ 300 } 301 #define m_next m_hdr.mh_next 302 #define m_len m_hdr.mh_len 303 #define m_data m_hdr.mh_data 304 #define m_owner m_hdr.mh_owner 305 #define m_type m_hdr.mh_type 306 #define m_flags m_hdr.mh_flags 307 #define m_nextpkt m_hdr.mh_nextpkt 308 #define m_paddr m_hdr.mh_paddr 309 #define m_pkthdr M_dat.MH.MH_pkthdr 310 #define m_ext_storage M_dat.MH.MH_dat.MH_ext.ext_storage 311 #define m_ext_ref M_dat.MH.MH_dat.MH_ext.ext_ref 312 #define m_ext m_ext_ref->m_ext_storage 313 #define m_pktdat M_dat.MH.MH_dat.MH_databuf 314 #define m_dat M_dat.M_databuf 315 316 /* 317 * Dummy mbuf structure to calculate the right values for MLEN/MHLEN, taking 318 * into account inter-structure padding. 319 */ 320 MBUF_DEFINE(_mbuf_dummy, 1, 1); 321 322 /* normal data len */ 323 #define MLEN ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_dat))) 324 /* data len w/pkthdr */ 325 #define MHLEN ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_pktdat))) 326 327 #define MINCLSIZE (MHLEN+MLEN+1) /* smallest amount to put in cluster */ 328 329 /* 330 * The *real* struct mbuf 331 */ 332 MBUF_DEFINE(mbuf, MHLEN, MLEN); 333 334 /* mbuf flags */ 335 #define M_EXT 0x00000001 /* has associated external storage */ 336 #define M_PKTHDR 0x00000002 /* start of record */ 337 #define M_EOR 0x00000004 /* end of record */ 338 #define M_PROTO1 0x00000008 /* protocol-specific */ 339 340 /* mbuf pkthdr flags, also in m_flags */ 341 #define M_AUTHIPHDR 0x00000010 /* authenticated (IPsec) */ 342 #define M_DECRYPTED 0x00000020 /* decrypted (IPsec) */ 343 #define M_LOOP 0x00000040 /* received on loopback */ 344 #define M_BCAST 0x00000100 /* send/received as L2 broadcast */ 345 #define M_MCAST 0x00000200 /* send/received as L2 multicast */ 346 #define M_CANFASTFWD 0x00000400 /* packet can be fast-forwarded */ 347 #define M_ANYCAST6 0x00000800 /* received as IPv6 anycast */ 348 349 #define M_LINK0 0x00001000 /* link layer specific flag */ 350 #define M_LINK1 0x00002000 /* link layer specific flag */ 351 #define M_LINK2 0x00004000 /* link layer specific flag */ 352 #define M_LINK3 0x00008000 /* link layer specific flag */ 353 #define M_LINK4 0x00010000 /* link layer specific flag */ 354 #define M_LINK5 0x00020000 /* link layer specific flag */ 355 #define M_LINK6 0x00040000 /* link layer specific flag */ 356 #define M_LINK7 0x00080000 /* link layer specific flag */ 357 358 #define M_VLANTAG 0x00100000 /* ether_vtag is valid */ 359 360 /* additional flags for M_EXT mbufs */ 361 #define M_EXT_FLAGS 0xff000000 362 #define M_EXT_CLUSTER 0x01000000 /* ext is a cluster */ 363 #define M_EXT_PAGES 0x02000000 /* ext_pgs is valid */ 364 #define M_EXT_ROMAP 0x04000000 /* ext mapping is r-o at MMU */ 365 #define M_EXT_RW 0x08000000 /* ext storage is writable */ 366 367 /* for source-level compatibility */ 368 #define M_NOTIFICATION M_PROTO1 369 370 #define M_FLAGS_BITS \ 371 "\20\1EXT\2PKTHDR\3EOR\4PROTO1\5AUTHIPHDR\6DECRYPTED\7LOOP\10NONE" \ 372 "\11BCAST\12MCAST\13CANFASTFWD\14ANYCAST6\15LINK0\16LINK1\17LINK2\20LINK3" \ 373 "\21LINK4\22LINK5\23LINK6\24LINK7" \ 374 "\25VLANTAG" \ 375 "\31EXT_CLUSTER\32EXT_PAGES\33EXT_ROMAP\34EXT_RW" 376 377 /* flags copied when copying m_pkthdr */ 378 #define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST|M_CANFASTFWD| \ 379 M_ANYCAST6|M_LINK0|M_LINK1|M_LINK2|M_AUTHIPHDR|M_DECRYPTED|M_LOOP| \ 380 M_VLANTAG) 381 382 /* flag copied when shallow-copying external storage */ 383 #define M_EXTCOPYFLAGS (M_EXT|M_EXT_FLAGS) 384 385 /* mbuf types */ 386 #define MT_FREE 0 /* should be on free list */ 387 #define MT_DATA 1 /* dynamic (data) allocation */ 388 #define MT_HEADER 2 /* packet header */ 389 #define MT_SONAME 3 /* socket name */ 390 #define MT_SOOPTS 4 /* socket options */ 391 #define MT_FTABLE 5 /* fragment reassembly header */ 392 #define MT_CONTROL 6 /* extra-data protocol message */ 393 #define MT_OOBDATA 7 /* expedited data */ 394 395 #ifdef MBUFTYPES 396 const char * const mbuftypes[] = { 397 "mbfree", 398 "mbdata", 399 "mbheader", 400 "mbsoname", 401 "mbsopts", 402 "mbftable", 403 "mbcontrol", 404 "mboobdata", 405 }; 406 #else 407 extern const char * const mbuftypes[]; 408 #endif 409 410 /* flags to m_get/MGET */ 411 #define M_DONTWAIT M_NOWAIT 412 #define M_WAIT M_WAITOK 413 414 #ifdef MBUFTRACE 415 /* Mbuf allocation tracing. */ 416 void mowner_init_owner(struct mowner *, const char *, const char *); 417 void mowner_init(struct mbuf *, int); 418 void mowner_ref(struct mbuf *, int); 419 void m_claim(struct mbuf *, struct mowner *); 420 void mowner_revoke(struct mbuf *, bool, int); 421 void mowner_attach(struct mowner *); 422 void mowner_detach(struct mowner *); 423 void m_claimm(struct mbuf *, struct mowner *); 424 #else 425 #define mowner_init_owner(mo, n, d) __nothing 426 #define mowner_init(m, type) __nothing 427 #define mowner_ref(m, flags) __nothing 428 #define mowner_revoke(m, all, flags) __nothing 429 #define m_claim(m, mowner) __nothing 430 #define mowner_attach(mo) __nothing 431 #define mowner_detach(mo) __nothing 432 #define m_claimm(m, mo) __nothing 433 #endif 434 435 #define MCLAIM(m, mo) m_claim((m), (mo)) 436 #define MOWNER_ATTACH(mo) mowner_attach(mo) 437 #define MOWNER_DETACH(mo) mowner_detach(mo) 438 439 /* 440 * mbuf allocation/deallocation macros: 441 * 442 * MGET(struct mbuf *m, int how, int type) 443 * allocates an mbuf and initializes it to contain internal data. 444 * 445 * MGETHDR(struct mbuf *m, int how, int type) 446 * allocates an mbuf and initializes it to contain a packet header 447 * and internal data. 448 * 449 * If 'how' is M_WAIT, these macros (and the corresponding functions) 450 * are guaranteed to return successfully. 451 */ 452 #define MGET(m, how, type) m = m_get((how), (type)) 453 #define MGETHDR(m, how, type) m = m_gethdr((how), (type)) 454 455 #if defined(_KERNEL) 456 457 #define MCLINITREFERENCE(m) \ 458 do { \ 459 KASSERT(((m)->m_flags & M_EXT) == 0); \ 460 (m)->m_ext_ref = (m); \ 461 (m)->m_ext.ext_refcnt = 1; \ 462 } while (0) 463 464 /* 465 * Macros for mbuf external storage. 466 * 467 * MCLGET allocates and adds an mbuf cluster to a normal mbuf; 468 * the flag M_EXT is set upon success. 469 * 470 * MEXTMALLOC allocates external storage and adds it to 471 * a normal mbuf; the flag M_EXT is set upon success. 472 * 473 * MEXTADD adds pre-allocated external storage to 474 * a normal mbuf; the flag M_EXT is set upon success. 475 */ 476 477 #define MCLGET(m, how) m_clget((m), (how)) 478 479 #define MEXTMALLOC(m, size, how) \ 480 do { \ 481 (m)->m_ext_storage.ext_buf = malloc((size), 0, (how)); \ 482 if ((m)->m_ext_storage.ext_buf != NULL) { \ 483 MCLINITREFERENCE(m); \ 484 (m)->m_data = (m)->m_ext.ext_buf; \ 485 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \ 486 M_EXT|M_EXT_RW; \ 487 (m)->m_ext.ext_size = (size); \ 488 (m)->m_ext.ext_free = NULL; \ 489 (m)->m_ext.ext_arg = NULL; \ 490 mowner_ref((m), M_EXT); \ 491 } \ 492 } while (0) 493 494 #define MEXTADD(m, buf, size, type, free, arg) \ 495 do { \ 496 MCLINITREFERENCE(m); \ 497 (m)->m_data = (m)->m_ext.ext_buf = (char *)(buf); \ 498 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | M_EXT; \ 499 (m)->m_ext.ext_size = (size); \ 500 (m)->m_ext.ext_free = (free); \ 501 (m)->m_ext.ext_arg = (arg); \ 502 mowner_ref((m), M_EXT); \ 503 } while (0) 504 505 #define M_BUFADDR(m) \ 506 (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \ 507 ((m)->m_flags & M_PKTHDR) ? (m)->m_pktdat : (m)->m_dat) 508 509 #define M_BUFSIZE(m) \ 510 (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \ 511 ((m)->m_flags & M_PKTHDR) ? MHLEN : MLEN) 512 513 #define MRESETDATA(m) (m)->m_data = M_BUFADDR(m) 514 515 /* 516 * Compute the offset of the beginning of the data buffer of a non-ext 517 * mbuf. 518 */ 519 #define M_BUFOFFSET(m) \ 520 (((m)->m_flags & M_PKTHDR) ? \ 521 offsetof(struct mbuf, m_pktdat) : offsetof(struct mbuf, m_dat)) 522 523 /* 524 * Determine if an mbuf's data area is read-only. This is true 525 * if external storage is read-only mapped, or not marked as R/W, 526 * or referenced by more than one mbuf. 527 */ 528 #define M_READONLY(m) \ 529 (((m)->m_flags & M_EXT) != 0 && \ 530 (((m)->m_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW || \ 531 (m)->m_ext.ext_refcnt > 1)) 532 533 #define M_UNWRITABLE(__m, __len) \ 534 ((__m)->m_len < (__len) || M_READONLY((__m))) 535 536 /* 537 * Determine if an mbuf's data area is read-only at the MMU. 538 */ 539 #define M_ROMAP(m) \ 540 (((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP)) 541 542 /* 543 * Compute the amount of space available before the current start of 544 * data in an mbuf. 545 */ 546 #define M_LEADINGSPACE(m) \ 547 (M_READONLY((m)) ? 0 : ((m)->m_data - M_BUFADDR(m))) 548 549 /* 550 * Compute the amount of space available 551 * after the end of data in an mbuf. 552 */ 553 #define _M_TRAILINGSPACE(m) \ 554 ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \ 555 ((m)->m_data + (m)->m_len) : \ 556 &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len)) 557 558 #define M_TRAILINGSPACE(m) \ 559 (M_READONLY((m)) ? 0 : _M_TRAILINGSPACE((m))) 560 561 /* 562 * Arrange to prepend space of size plen to mbuf m. 563 * If a new mbuf must be allocated, how specifies whether to wait. 564 * If how is M_DONTWAIT and allocation fails, the original mbuf chain 565 * is freed and m is set to NULL. 566 */ 567 #define M_PREPEND(m, plen, how) \ 568 do { \ 569 if (M_LEADINGSPACE(m) >= (plen)) { \ 570 (m)->m_data -= (plen); \ 571 (m)->m_len += (plen); \ 572 } else \ 573 (m) = m_prepend((m), (plen), (how)); \ 574 if ((m) && (m)->m_flags & M_PKTHDR) \ 575 (m)->m_pkthdr.len += (plen); \ 576 } while (0) 577 578 /* change mbuf to new type */ 579 #define MCHTYPE(m, t) \ 580 do { \ 581 KASSERT((t) != MT_FREE); \ 582 mbstat_type_add((m)->m_type, -1); \ 583 mbstat_type_add(t, 1); \ 584 (m)->m_type = t; \ 585 } while (0) 586 587 #ifdef DIAGNOSTIC 588 #define M_VERIFY_PACKET(m) m_verify_packet(m) 589 #else 590 #define M_VERIFY_PACKET(m) __nothing 591 #endif 592 593 /* The "copy all" special length. */ 594 #define M_COPYALL -1 595 596 /* 597 * Allow drivers and/or protocols to store private context information. 598 */ 599 #define M_GETCTX(m, t) ((t)(m)->m_pkthdr._rcvif.ctx) 600 #define M_SETCTX(m, c) ((void)((m)->m_pkthdr._rcvif.ctx = (void *)(c))) 601 #define M_CLEARCTX(m) M_SETCTX((m), NULL) 602 603 /* 604 * M_REGION_GET ensures that the "len"-sized region of type "typ" starting 605 * from "off" within "m" is located in a single mbuf, contiguously. 606 * 607 * The pointer to the region will be returned to pointer variable "val". 608 */ 609 #define M_REGION_GET(val, typ, m, off, len) \ 610 do { \ 611 struct mbuf *_t; \ 612 int _tmp; \ 613 if ((m)->m_len >= (off) + (len)) \ 614 (val) = (typ)(mtod((m), char *) + (off)); \ 615 else { \ 616 _t = m_pulldown((m), (off), (len), &_tmp); \ 617 if (_t) { \ 618 if (_t->m_len < _tmp + (len)) \ 619 panic("m_pulldown malfunction"); \ 620 (val) = (typ)(mtod(_t, char *) + _tmp); \ 621 } else { \ 622 (val) = (typ)NULL; \ 623 (m) = NULL; \ 624 } \ 625 } \ 626 } while (0) 627 628 #endif /* defined(_KERNEL) */ 629 630 /* 631 * Simple mbuf queueing system 632 * 633 * this is basically a SIMPLEQ adapted to mbuf use (ie using 634 * m_nextpkt instead of field.sqe_next). 635 * 636 * m_next is ignored, so queueing chains of mbufs is possible 637 */ 638 #define MBUFQ_HEAD(name) \ 639 struct name { \ 640 struct mbuf *mq_first; \ 641 struct mbuf **mq_last; \ 642 } 643 644 #define MBUFQ_INIT(q) do { \ 645 (q)->mq_first = NULL; \ 646 (q)->mq_last = &(q)->mq_first; \ 647 } while (0) 648 649 #define MBUFQ_ENQUEUE(q, m) do { \ 650 (m)->m_nextpkt = NULL; \ 651 *(q)->mq_last = (m); \ 652 (q)->mq_last = &(m)->m_nextpkt; \ 653 } while (0) 654 655 #define MBUFQ_PREPEND(q, m) do { \ 656 if (((m)->m_nextpkt = (q)->mq_first) == NULL) \ 657 (q)->mq_last = &(m)->m_nextpkt; \ 658 (q)->mq_first = (m); \ 659 } while (0) 660 661 #define MBUFQ_DEQUEUE(q, m) do { \ 662 if (((m) = (q)->mq_first) != NULL) { \ 663 if (((q)->mq_first = (m)->m_nextpkt) == NULL) \ 664 (q)->mq_last = &(q)->mq_first; \ 665 else \ 666 (m)->m_nextpkt = NULL; \ 667 } \ 668 } while (0) 669 670 #define MBUFQ_DRAIN(q) do { \ 671 struct mbuf *__m0; \ 672 while ((__m0 = (q)->mq_first) != NULL) { \ 673 (q)->mq_first = __m0->m_nextpkt; \ 674 m_freem(__m0); \ 675 } \ 676 (q)->mq_last = &(q)->mq_first; \ 677 } while (0) 678 679 #define MBUFQ_FIRST(q) ((q)->mq_first) 680 #define MBUFQ_NEXT(m) ((m)->m_nextpkt) 681 #define MBUFQ_LAST(q) (*(q)->mq_last) 682 683 /* 684 * Mbuf statistics. 685 * For statistics related to mbuf and cluster allocations, see also the 686 * pool headers (mb_cache and mcl_cache). 687 */ 688 struct mbstat { 689 u_long _m_spare; /* formerly m_mbufs */ 690 u_long _m_spare1; /* formerly m_clusters */ 691 u_long _m_spare2; /* spare field */ 692 u_long _m_spare3; /* formely m_clfree - free clusters */ 693 u_long m_drops; /* times failed to find space */ 694 u_long m_wait; /* times waited for space */ 695 u_long m_drain; /* times drained protocols for space */ 696 u_short m_mtypes[256]; /* type specific mbuf allocations */ 697 }; 698 699 struct mbstat_cpu { 700 u_int m_mtypes[256]; /* type specific mbuf allocations */ 701 }; 702 703 /* 704 * Mbuf sysctl variables. 705 */ 706 #define MBUF_MSIZE 1 /* int: mbuf base size */ 707 #define MBUF_MCLBYTES 2 /* int: mbuf cluster size */ 708 #define MBUF_NMBCLUSTERS 3 /* int: limit on the # of clusters */ 709 #define MBUF_MBLOWAT 4 /* int: mbuf low water mark */ 710 #define MBUF_MCLLOWAT 5 /* int: mbuf cluster low water mark */ 711 #define MBUF_STATS 6 /* struct: mbstat */ 712 #define MBUF_MOWNERS 7 /* struct: m_owner[] */ 713 #define MBUF_NMBCLUSTERS_LIMIT 8 /* int: limit of nmbclusters */ 714 715 #ifdef _KERNEL 716 extern struct mbstat mbstat; 717 extern int nmbclusters; /* limit on the # of clusters */ 718 extern int mblowat; /* mbuf low water mark */ 719 extern int mcllowat; /* mbuf cluster low water mark */ 720 extern int max_linkhdr; /* largest link-level header */ 721 extern int max_protohdr; /* largest protocol header */ 722 extern int max_hdr; /* largest link+protocol header */ 723 extern int max_datalen; /* MHLEN - max_hdr */ 724 extern const int msize; /* mbuf base size */ 725 extern const int mclbytes; /* mbuf cluster size */ 726 extern pool_cache_t mb_cache; 727 #ifdef MBUFTRACE 728 LIST_HEAD(mownerhead, mowner); 729 extern struct mownerhead mowners; 730 extern struct mowner unknown_mowners[]; 731 extern struct mowner revoked_mowner; 732 #endif 733 734 MALLOC_DECLARE(M_MBUF); 735 MALLOC_DECLARE(M_SONAME); 736 737 struct mbuf *m_copym(struct mbuf *, int, int, int); 738 struct mbuf *m_copypacket(struct mbuf *, int); 739 struct mbuf *m_devget(char *, int, int, struct ifnet *); 740 struct mbuf *m_dup(struct mbuf *, int, int, int); 741 struct mbuf *m_get(int, int); 742 struct mbuf *m_gethdr(int, int); 743 struct mbuf *m_get_n(int, int, size_t, size_t); 744 struct mbuf *m_gethdr_n(int, int, size_t, size_t); 745 struct mbuf *m_prepend(struct mbuf *,int, int); 746 struct mbuf *m_pulldown(struct mbuf *, int, int, int *); 747 struct mbuf *m_pullup(struct mbuf *, int); 748 struct mbuf *m_copyup(struct mbuf *, int, int); 749 struct mbuf *m_split(struct mbuf *,int, int); 750 struct mbuf *m_getptr(struct mbuf *, int, int *); 751 void m_adj(struct mbuf *, int); 752 struct mbuf *m_defrag(struct mbuf *, int); 753 int m_apply(struct mbuf *, int, int, 754 int (*)(void *, void *, unsigned int), void *); 755 void m_cat(struct mbuf *,struct mbuf *); 756 void m_clget(struct mbuf *, int); 757 void m_copyback(struct mbuf *, int, int, const void *); 758 struct mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int); 759 int m_makewritable(struct mbuf **, int, int, int); 760 struct mbuf *m_getcl(int, int, int); 761 void m_copydata(struct mbuf *, int, int, void *); 762 void m_verify_packet(struct mbuf *); 763 struct mbuf *m_free(struct mbuf *); 764 void m_freem(struct mbuf *); 765 void mbinit(void); 766 void m_remove_pkthdr(struct mbuf *); 767 void m_copy_pkthdr(struct mbuf *, struct mbuf *); 768 void m_move_pkthdr(struct mbuf *, struct mbuf *); 769 void m_align(struct mbuf *, int); 770 771 bool m_ensure_contig(struct mbuf **, int); 772 struct mbuf *m_add(struct mbuf *, struct mbuf *); 773 774 /* Inline routines. */ 775 static __inline u_int m_length(const struct mbuf *) __unused; 776 777 /* Statistics */ 778 void mbstat_type_add(int, int); 779 780 /* Packet tag routines */ 781 struct m_tag *m_tag_get(int, int, int); 782 void m_tag_free(struct m_tag *); 783 void m_tag_prepend(struct mbuf *, struct m_tag *); 784 void m_tag_unlink(struct mbuf *, struct m_tag *); 785 void m_tag_delete(struct mbuf *, struct m_tag *); 786 void m_tag_delete_chain(struct mbuf *); 787 struct m_tag *m_tag_find(const struct mbuf *, int); 788 struct m_tag *m_tag_copy(struct m_tag *); 789 int m_tag_copy_chain(struct mbuf *, struct mbuf *); 790 791 /* Packet tag types */ 792 #define PACKET_TAG_NONE 0 /* Nothing */ 793 #define PACKET_TAG_SO 4 /* sending socket pointer */ 794 #define PACKET_TAG_NPF 10 /* packet filter */ 795 #define PACKET_TAG_PF 11 /* packet filter */ 796 #define PACKET_TAG_ALTQ_QID 12 /* ALTQ queue id */ 797 #define PACKET_TAG_IPSEC_OUT_DONE 18 798 #define PACKET_TAG_IPSEC_NAT_T_PORTS 25 /* two uint16_t */ 799 #define PACKET_TAG_INET6 26 /* IPv6 info */ 800 #define PACKET_TAG_TUNNEL_INFO 28 /* tunnel identification and 801 * protocol callback, for loop 802 * detection/recovery 803 */ 804 #define PACKET_TAG_MPLS 29 /* Indicate it's for MPLS */ 805 #define PACKET_TAG_SRCROUTE 30 /* IPv4 source routing */ 806 #define PACKET_TAG_ETHERNET_SRC 31 /* Ethernet source address */ 807 808 /* 809 * Return the number of bytes in the mbuf chain, m. 810 */ 811 static __inline u_int 812 m_length(const struct mbuf *m) 813 { 814 const struct mbuf *m0; 815 u_int pktlen; 816 817 if ((m->m_flags & M_PKTHDR) != 0) 818 return m->m_pkthdr.len; 819 820 pktlen = 0; 821 for (m0 = m; m0 != NULL; m0 = m0->m_next) 822 pktlen += m0->m_len; 823 return pktlen; 824 } 825 826 static __inline void 827 m_set_rcvif(struct mbuf *m, const struct ifnet *ifp) 828 { 829 KASSERT(m->m_flags & M_PKTHDR); 830 m->m_pkthdr.rcvif_index = ifp->if_index; 831 } 832 833 static __inline void 834 m_reset_rcvif(struct mbuf *m) 835 { 836 KASSERT(m->m_flags & M_PKTHDR); 837 /* A caller may expect whole _rcvif union is zeroed */ 838 /* m->m_pkthdr.rcvif_index = 0; */ 839 m->m_pkthdr._rcvif.ctx = NULL; 840 } 841 842 static __inline void 843 m_copy_rcvif(struct mbuf *m, const struct mbuf *n) 844 { 845 KASSERT(m->m_flags & M_PKTHDR); 846 KASSERT(n->m_flags & M_PKTHDR); 847 m->m_pkthdr.rcvif_index = n->m_pkthdr.rcvif_index; 848 } 849 850 #define M_GET_ALIGNED_HDR(m, type, linkhdr) \ 851 m_get_aligned_hdr((m), __alignof(type) - 1, sizeof(type), (linkhdr)) 852 853 static __inline int 854 m_get_aligned_hdr(struct mbuf **m, int mask, size_t hlen, bool linkhdr) 855 { 856 #ifndef __NO_STRICT_ALIGNMENT 857 if (((uintptr_t)mtod(*m, void *) & mask) != 0) 858 *m = m_copyup(*m, hlen, 859 linkhdr ? (max_linkhdr + mask) & ~mask : 0); 860 else 861 #endif 862 if (__predict_false((size_t)(*m)->m_len < hlen)) 863 *m = m_pullup(*m, hlen); 864 865 return *m == NULL; 866 } 867 868 void m_print(const struct mbuf *, const char *, void (*)(const char *, ...) 869 __printflike(1, 2)); 870 871 /* from uipc_mbufdebug.c */ 872 void m_examine(const struct mbuf *, int, const char *, 873 void (*)(const char *, ...) __printflike(1, 2)); 874 875 /* parsers for m_examine() */ 876 void m_examine_ether(const struct mbuf *, int, const char *, 877 void (*)(const char *, ...) __printflike(1, 2)); 878 void m_examine_pppoe(const struct mbuf *, int, const char *, 879 void (*)(const char *, ...) __printflike(1, 2)); 880 void m_examine_ppp(const struct mbuf *, int, const char *, 881 void (*)(const char *, ...) __printflike(1, 2)); 882 void m_examine_arp(const struct mbuf *, int, const char *, 883 void (*)(const char *, ...) __printflike(1, 2)); 884 void m_examine_ip(const struct mbuf *, int, const char *, 885 void (*)(const char *, ...) __printflike(1, 2)); 886 void m_examine_icmp(const struct mbuf *, int, const char *, 887 void (*)(const char *, ...) __printflike(1, 2)); 888 void m_examine_ip6(const struct mbuf *, int, const char *, 889 void (*)(const char *, ...) __printflike(1, 2)); 890 void m_examine_icmp6(const struct mbuf *, int, const char *, 891 void (*)(const char *, ...) __printflike(1, 2)); 892 void m_examine_tcp(const struct mbuf *, int, const char *, 893 void (*)(const char *, ...) __printflike(1, 2)); 894 void m_examine_udp(const struct mbuf *, int, const char *, 895 void (*)(const char *, ...) __printflike(1, 2)); 896 void m_examine_hex(const struct mbuf *, int, const char *, 897 void (*)(const char *, ...) __printflike(1, 2)); 898 899 /* 900 * Get rcvif of a mbuf. 901 * 902 * The caller must call m_put_rcvif after using rcvif if the returned rcvif 903 * isn't NULL. If the returned rcvif is NULL, the caller doesn't need to call 904 * m_put_rcvif (although calling it is safe). 905 * 906 * The caller must not block or sleep while using rcvif. The API ensures a 907 * returned rcvif isn't freed until m_put_rcvif is called. 908 */ 909 static __inline struct ifnet * 910 m_get_rcvif(const struct mbuf *m, int *s) 911 { 912 struct ifnet *ifp; 913 914 KASSERT(m->m_flags & M_PKTHDR); 915 *s = pserialize_read_enter(); 916 ifp = if_byindex(m->m_pkthdr.rcvif_index); 917 if (__predict_false(ifp == NULL)) 918 pserialize_read_exit(*s); 919 920 return ifp; 921 } 922 923 static __inline void 924 m_put_rcvif(struct ifnet *ifp, int *s) 925 { 926 927 if (ifp == NULL) 928 return; 929 pserialize_read_exit(*s); 930 } 931 932 /* 933 * Get rcvif of a mbuf. 934 * 935 * The caller must call m_put_rcvif_psref after using rcvif. The API ensures 936 * a got rcvif isn't be freed until m_put_rcvif_psref is called. 937 */ 938 static __inline struct ifnet * 939 m_get_rcvif_psref(const struct mbuf *m, struct psref *psref) 940 { 941 KASSERT(m->m_flags & M_PKTHDR); 942 return if_get_byindex(m->m_pkthdr.rcvif_index, psref); 943 } 944 945 static __inline void 946 m_put_rcvif_psref(struct ifnet *ifp, struct psref *psref) 947 { 948 949 if (ifp == NULL) 950 return; 951 if_put(ifp, psref); 952 } 953 954 /* 955 * Get rcvif of a mbuf. 956 * 957 * This is NOT an MP-safe API and shouldn't be used at where we want MP-safe. 958 */ 959 static __inline struct ifnet * 960 m_get_rcvif_NOMPSAFE(const struct mbuf *m) 961 { 962 KASSERT(m->m_flags & M_PKTHDR); 963 return if_byindex(m->m_pkthdr.rcvif_index); 964 } 965 966 #endif /* _KERNEL */ 967 #endif /* !_SYS_MBUF_H_ */ 968