Home | History | Annotate | Line # | Download | only in netipsec
ipsec_mbuf.c revision 1.4
      1 /*	$NetBSD: ipsec_mbuf.c,v 1.4 2004/03/01 23:24:10 thorpej Exp $	*/
      2 /*-
      3  * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
      4  * All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  *
     27  * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.4 2004/03/01 23:24:10 thorpej Exp $");
     32 
     33 /*
     34  * IPsec-specific mbuf routines.
     35  */
     36 
     37 #ifdef __FreeBSD__
     38 #include "opt_param.h"
     39 #endif
     40 
     41 #include <sys/param.h>
     42 #include <sys/systm.h>
     43 #include <sys/mbuf.h>
     44 #include <sys/socket.h>
     45 
     46 #include <net/route.h>
     47 #include <netinet/in.h>
     48 
     49 #include <netipsec/ipsec.h>
     50 
     51 #include <netipsec/ipsec_osdep.h>
     52 #include <net/net_osdep.h>
     53 
     54 extern	struct mbuf *m_getptr(struct mbuf *, int, int *);
     55 
     56 /*
     57  * Create a writable copy of the mbuf chain.  While doing this
     58  * we compact the chain with a goal of producing a chain with
     59  * at most two mbufs.  The second mbuf in this chain is likely
     60  * to be a cluster.  The primary purpose of this work is to create
     61  * a writable packet for encryption, compression, etc.  The
     62  * secondary goal is to linearize the data so the data can be
     63  * passed to crypto hardware in the most efficient manner possible.
     64  */
     65 struct mbuf *
     66 m_clone(struct mbuf *m0)
     67 {
     68 	struct mbuf *m, *mprev;
     69 	struct mbuf *n, *mfirst, *mlast;
     70 	int len, off;
     71 
     72 	IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
     73 
     74 	mprev = NULL;
     75 	for (m = m0; m != NULL; m = mprev->m_next) {
     76 		/*
     77 		 * Regular mbufs are ignored unless there's a cluster
     78 		 * in front of it that we can use to coalesce.  We do
     79 		 * the latter mainly so later clusters can be coalesced
     80 		 * also w/o having to handle them specially (i.e. convert
     81 		 * mbuf+cluster -> cluster).  This optimization is heavily
     82 		 * influenced by the assumption that we're running over
     83 		 * Ethernet where MCLBYTES is large enough that the max
     84 		 * packet size will permit lots of coalescing into a
     85 		 * single cluster.  This in turn permits efficient
     86 		 * crypto operations, especially when using hardware.
     87 		 */
     88 		if ((m->m_flags & M_EXT) == 0) {
     89 			if (mprev && (mprev->m_flags & M_EXT) &&
     90 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
     91 				/* XXX: this ignores mbuf types */
     92 				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
     93 				       mtod(m, caddr_t), m->m_len);
     94 				mprev->m_len += m->m_len;
     95 				mprev->m_next = m->m_next;	/* unlink from chain */
     96 				m_free(m);			/* reclaim mbuf */
     97 				newipsecstat.ips_mbcoalesced++;
     98 			} else {
     99 				mprev = m;
    100 			}
    101 			continue;
    102 		}
    103 		/*
    104 		 * Writable mbufs are left alone (for now).  Note
    105 		 * that for 4.x systems it's not possible to identify
    106 		 * whether or not mbufs with external buffers are
    107 		 * writable unless they use clusters.
    108 		 */
    109 		if (M_EXT_WRITABLE(m)) {
    110 			mprev = m;
    111 			continue;
    112 		}
    113 
    114 		/*
    115 		 * Not writable, replace with a copy or coalesce with
    116 		 * the previous mbuf if possible (since we have to copy
    117 		 * it anyway, we try to reduce the number of mbufs and
    118 		 * clusters so that future work is easier).
    119 		 */
    120 		IPSEC_ASSERT(m->m_flags & M_EXT,
    121 			("m_clone: m_flags 0x%x", m->m_flags));
    122 		/* NB: we only coalesce into a cluster or larger */
    123 		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
    124 		    m->m_len <= M_TRAILINGSPACE(mprev)) {
    125 			/* XXX: this ignores mbuf types */
    126 			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
    127 			       mtod(m, caddr_t), m->m_len);
    128 			mprev->m_len += m->m_len;
    129 			mprev->m_next = m->m_next;	/* unlink from chain */
    130 			m_free(m);			/* reclaim mbuf */
    131 			newipsecstat.ips_clcoalesced++;
    132 			continue;
    133 		}
    134 
    135 		/*
    136 		 * Allocate new space to hold the copy...
    137 		 */
    138 		/* XXX why can M_PKTHDR be set past the first mbuf? */
    139 		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
    140 			/*
    141 			 * NB: if a packet header is present we must
    142 			 * allocate the mbuf separately from any cluster
    143 			 * because M_MOVE_PKTHDR will smash the data
    144 			 * pointer and drop the M_EXT marker.
    145 			 */
    146 			MGETHDR(n, M_DONTWAIT, m->m_type);
    147 			if (n == NULL) {
    148 				m_freem(m0);
    149 				return (NULL);
    150 			}
    151 			M_MOVE_PKTHDR(n, m);
    152 			MCLGET(n, M_DONTWAIT);
    153 			if ((n->m_flags & M_EXT) == 0) {
    154 				m_free(n);
    155 				m_freem(m0);
    156 				return (NULL);
    157 			}
    158 		} else {
    159 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
    160 			if (n == NULL) {
    161 				m_freem(m0);
    162 				return (NULL);
    163 			}
    164 		}
    165 		/*
    166 		 * ... and copy the data.  We deal with jumbo mbufs
    167 		 * (i.e. m_len > MCLBYTES) by splitting them into
    168 		 * clusters.  We could just malloc a buffer and make
    169 		 * it external but too many device drivers don't know
    170 		 * how to break up the non-contiguous memory when
    171 		 * doing DMA.
    172 		 */
    173 		len = m->m_len;
    174 		off = 0;
    175 		mfirst = n;
    176 		mlast = NULL;
    177 		for (;;) {
    178 			int cc = min(len, MCLBYTES);
    179 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
    180 			n->m_len = cc;
    181 			if (mlast != NULL)
    182 				mlast->m_next = n;
    183 			mlast = n;
    184 			newipsecstat.ips_clcopied++;
    185 
    186 			len -= cc;
    187 			if (len <= 0)
    188 				break;
    189 			off += cc;
    190 
    191 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
    192 			if (n == NULL) {
    193 				m_freem(mfirst);
    194 				m_freem(m0);
    195 				return (NULL);
    196 			}
    197 		}
    198 		n->m_next = m->m_next;
    199 		if (mprev == NULL)
    200 			m0 = mfirst;		/* new head of chain */
    201 		else
    202 			mprev->m_next = mfirst;	/* replace old mbuf */
    203 		m_free(m);			/* release old mbuf */
    204 		mprev = mfirst;
    205 	}
    206 	return (m0);
    207 }
    208 
    209 /*
    210  * Make space for a new header of length hlen at skip bytes
    211  * into the packet.  When doing this we allocate new mbufs only
    212  * when absolutely necessary.  The mbuf where the new header
    213  * is to go is returned together with an offset into the mbuf.
    214  * If NULL is returned then the mbuf chain may have been modified;
    215  * the caller is assumed to always free the chain.
    216  */
    217 struct mbuf *
    218 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
    219 {
    220 	struct mbuf *m;
    221 	unsigned remain;
    222 
    223 	IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
    224 	IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
    225 
    226 	for (m = m0; m && skip > m->m_len; m = m->m_next)
    227 		skip -= m->m_len;
    228 	if (m == NULL)
    229 		return (NULL);
    230 	/*
    231 	 * At this point skip is the offset into the mbuf m
    232 	 * where the new header should be placed.  Figure out
    233 	 * if there's space to insert the new header.  If so,
    234 	 * and copying the remainder makese sense then do so.
    235 	 * Otherwise insert a new mbuf in the chain, splitting
    236 	 * the contents of m as needed.
    237 	 */
    238 	remain = m->m_len - skip;		/* data to move */
    239 	if (hlen > M_TRAILINGSPACE(m)) {
    240 		struct mbuf *n;
    241 
    242 		/* XXX code doesn't handle clusters XXX */
    243 		IPSEC_ASSERT(remain < MLEN,
    244 			("m_makespace: remainder too big: %u", remain));
    245 		/*
    246 		 * Not enough space in m, split the contents
    247 		 * of m, inserting new mbufs as required.
    248 		 *
    249 		 * NB: this ignores mbuf types.
    250 		 */
    251 		MGET(n, M_DONTWAIT, MT_DATA);
    252 		if (n == NULL)
    253 			return (NULL);
    254 		n->m_next = m->m_next;		/* splice new mbuf */
    255 		m->m_next = n;
    256 		newipsecstat.ips_mbinserted++;
    257 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
    258 			/*
    259 			 * New header fits in the old mbuf if we copy
    260 			 * the remainder; just do the copy to the new
    261 			 * mbuf and we're good to go.
    262 			 */
    263 			memcpy(mtod(n, caddr_t),
    264 			       mtod(m, caddr_t) + skip, remain);
    265 			n->m_len = remain;
    266 			m->m_len = skip + hlen;
    267 			*off = skip;
    268 		} else {
    269 			/*
    270 			 * No space in the old mbuf for the new header.
    271 			 * Make space in the new mbuf and check the
    272 			 * remainder'd data fits too.  If not then we
    273 			 * must allocate an additional mbuf (yech).
    274 			 */
    275 			n->m_len = 0;
    276 			if (remain + hlen > M_TRAILINGSPACE(n)) {
    277 				struct mbuf *n2;
    278 
    279 				MGET(n2, M_DONTWAIT, MT_DATA);
    280 				/* NB: new mbuf is on chain, let caller free */
    281 				if (n2 == NULL)
    282 					return (NULL);
    283 				n2->m_len = 0;
    284 				memcpy(mtod(n2, caddr_t),
    285 				       mtod(m, caddr_t) + skip, remain);
    286 				n2->m_len = remain;
    287 				/* splice in second mbuf */
    288 				n2->m_next = n->m_next;
    289 				n->m_next = n2;
    290 				newipsecstat.ips_mbinserted++;
    291 			} else {
    292 				memcpy(mtod(n, caddr_t) + hlen,
    293 				       mtod(m, caddr_t) + skip, remain);
    294 				n->m_len += remain;
    295 			}
    296 			m->m_len -= remain;
    297 			n->m_len += hlen;
    298 			m = n;			/* header is at front ... */
    299 			*off = 0;		/* ... of new mbuf */
    300 		}
    301 	} else {
    302 		/*
    303 		 * Copy the remainder to the back of the mbuf
    304 		 * so there's space to write the new header.
    305 		 */
    306 		/* XXX can this be memcpy? does it handle overlap? */
    307 		ovbcopy(mtod(m, caddr_t) + skip,
    308 			mtod(m, caddr_t) + skip + hlen, remain);
    309 		m->m_len += hlen;
    310 		*off = skip;
    311 	}
    312 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
    313 	return m;
    314 }
    315 
    316 /*
    317  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
    318  * length is updated, and a pointer to the first byte of the padding
    319  * (which is guaranteed to be all in one mbuf) is returned.
    320  */
    321 caddr_t
    322 m_pad(struct mbuf *m, int n)
    323 {
    324 	register struct mbuf *m0, *m1;
    325 	register int len, pad;
    326 	caddr_t retval;
    327 
    328 	if (n <= 0) {  /* No stupid arguments. */
    329 		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
    330 		m_freem(m);
    331 		return NULL;
    332 	}
    333 
    334 	len = m->m_pkthdr.len;
    335 	pad = n;
    336 	m0 = m;
    337 
    338 	while (m0->m_len < len) {
    339 IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
    340 		len -= m0->m_len;
    341 		m0 = m0->m_next;
    342 	}
    343 
    344 	if (m0->m_len != len) {
    345 		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
    346 		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
    347 
    348 		m_freem(m);
    349 		return NULL;
    350 	}
    351 
    352 	/* Check for zero-length trailing mbufs, and find the last one. */
    353 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
    354 		if (m1->m_next->m_len != 0) {
    355 			DPRINTF(("m_pad: length mismatch (should be %d "
    356 			    "instead of %d)\n",
    357 			    m->m_pkthdr.len,
    358 			    m->m_pkthdr.len + m1->m_next->m_len));
    359 
    360 			m_freem(m);
    361 			return NULL;
    362 		}
    363 
    364 		m0 = m1->m_next;
    365 	}
    366 
    367 	if (pad > M_TRAILINGSPACE(m0)) {
    368 		/* Add an mbuf to the chain. */
    369 		MGET(m1, M_DONTWAIT, MT_DATA);
    370 		if (m1 == 0) {
    371 			m_freem(m0);
    372 			DPRINTF(("m_pad: unable to get extra mbuf\n"));
    373 			return NULL;
    374 		}
    375 
    376 		m0->m_next = m1;
    377 		m0 = m1;
    378 		m0->m_len = 0;
    379 	}
    380 
    381 	retval = m0->m_data + m0->m_len;
    382 	m0->m_len += pad;
    383 	m->m_pkthdr.len += pad;
    384 
    385 	return retval;
    386 }
    387 
    388 /*
    389  * Remove hlen data at offset skip in the packet.  This is used by
    390  * the protocols strip protocol headers and associated data (e.g. IV,
    391  * authenticator) on input.
    392  */
    393 int
    394 m_striphdr(struct mbuf *m, int skip, int hlen)
    395 {
    396 	struct mbuf *m1;
    397 	int roff;
    398 
    399 	/* Find beginning of header */
    400 	m1 = m_getptr(m, skip, &roff);
    401 	if (m1 == NULL)
    402 		return (EINVAL);
    403 
    404 	/* Remove the header and associated data from the mbuf. */
    405 	if (roff == 0) {
    406 		/* The header was at the beginning of the mbuf */
    407 		newipsecstat.ips_input_front++;
    408 		m_adj(m1, hlen);
    409 		if ((m1->m_flags & M_PKTHDR) == 0)
    410 			m->m_pkthdr.len -= hlen;
    411 	} else if (roff + hlen >= m1->m_len) {
    412 		struct mbuf *mo;
    413 
    414 		/*
    415 		 * Part or all of the header is at the end of this mbuf,
    416 		 * so first let's remove the remainder of the header from
    417 		 * the beginning of the remainder of the mbuf chain, if any.
    418 		 */
    419 		newipsecstat.ips_input_end++;
    420 		if (roff + hlen > m1->m_len) {
    421 			/* Adjust the next mbuf by the remainder */
    422 			m_adj(m1->m_next, roff + hlen - m1->m_len);
    423 
    424 			/* The second mbuf is guaranteed not to have a pkthdr... */
    425 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
    426 		}
    427 
    428 		/* Now, let's unlink the mbuf chain for a second...*/
    429 		mo = m1->m_next;
    430 		m1->m_next = NULL;
    431 
    432 		/* ...and trim the end of the first part of the chain...sick */
    433 		m_adj(m1, -(m1->m_len - roff));
    434 		if ((m1->m_flags & M_PKTHDR) == 0)
    435 			m->m_pkthdr.len -= (m1->m_len - roff);
    436 
    437 		/* Finally, let's relink */
    438 		m1->m_next = mo;
    439 	} else {
    440 		/*
    441 		 * The header lies in the "middle" of the mbuf; copy
    442 		 * the remainder of the mbuf down over the header.
    443 		 */
    444 		newipsecstat.ips_input_middle++;
    445 		ovbcopy(mtod(m1, u_char *) + roff + hlen,
    446 		      mtod(m1, u_char *) + roff,
    447 		      m1->m_len - (roff + hlen));
    448 		m1->m_len -= hlen;
    449 		m->m_pkthdr.len -= hlen;
    450 	}
    451 	return (0);
    452 }
    453 
    454 /*
    455  * Diagnostic routine to check mbuf alignment as required by the
    456  * crypto device drivers (that use DMA).
    457  */
    458 void
    459 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
    460 {
    461 	int roff;
    462 	struct mbuf *m = m_getptr(m0, off, &roff);
    463 	caddr_t addr;
    464 
    465 	if (m == NULL)
    466 		return;
    467 	printf("%s (off %u len %u): ", where, off, len);
    468 	addr = mtod(m, caddr_t) + roff;
    469 	do {
    470 		int mlen;
    471 
    472 		if (((uintptr_t) addr) & 3) {
    473 			printf("addr misaligned %p,", addr);
    474 			break;
    475 		}
    476 		mlen = m->m_len;
    477 		if (mlen > len)
    478 			mlen = len;
    479 		len -= mlen;
    480 		if (len && (mlen & 3)) {
    481 			printf("len mismatch %u,", mlen);
    482 			break;
    483 		}
    484 		m = m->m_next;
    485 		addr = m ? mtod(m, caddr_t) : NULL;
    486 	} while (m && len > 0);
    487 	for (m = m0; m; m = m->m_next)
    488 		printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
    489 	printf("\n");
    490 }
    491