Home | History | Annotate | Line # | Download | only in netipsec
ipsec_mbuf.c revision 1.2
      1 /*	$NetBSD: ipsec_mbuf.c,v 1.2 2003/08/13 20:13:59 jonathan Exp $	*/
      2 /*	$FreeBSD: src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.1 2003/01/24 05:11:35 sam Exp $	*/
      3 
      4 #include <sys/cdefs.h>
      5 __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.2 2003/08/13 20:13:59 jonathan Exp $");
      6 
      7 /*
      8  * IPsec-specific mbuf routines.
      9  */
     10 
     11 #include "opt_param.h"
     12 
     13 #include <sys/param.h>
     14 #include <sys/systm.h>
     15 #include <sys/mbuf.h>
     16 #include <sys/socket.h>
     17 
     18 #include <net/route.h>
     19 #include <netinet/in.h>
     20 
     21 #include <netipsec/ipsec.h>
     22 
     23 #include <netipsec/ipsec_osdep.h>
     24 #include <net/net_osdep.h>
     25 
     26 extern	struct mbuf *m_getptr(struct mbuf *, int, int *);
     27 
     28 /*
     29  * Create a writable copy of the mbuf chain.  While doing this
     30  * we compact the chain with a goal of producing a chain with
     31  * at most two mbufs.  The second mbuf in this chain is likely
     32  * to be a cluster.  The primary purpose of this work is to create
     33  * a writable packet for encryption, compression, etc.  The
     34  * secondary goal is to linearize the data so the data can be
     35  * passed to crypto hardware in the most efficient manner possible.
     36  */
     37 struct mbuf *
     38 m_clone(struct mbuf *m0)
     39 {
     40 	struct mbuf *m, *mprev;
     41 	struct mbuf *n, *mfirst, *mlast;
     42 	int len, off;
     43 
     44 	IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
     45 
     46 	mprev = NULL;
     47 	for (m = m0; m != NULL; m = mprev->m_next) {
     48 		/*
     49 		 * Regular mbufs are ignored unless there's a cluster
     50 		 * in front of it that we can use to coalesce.  We do
     51 		 * the latter mainly so later clusters can be coalesced
     52 		 * also w/o having to handle them specially (i.e. convert
     53 		 * mbuf+cluster -> cluster).  This optimization is heavily
     54 		 * influenced by the assumption that we're running over
     55 		 * Ethernet where MCLBYTES is large enough that the max
     56 		 * packet size will permit lots of coalescing into a
     57 		 * single cluster.  This in turn permits efficient
     58 		 * crypto operations, especially when using hardware.
     59 		 */
     60 		if ((m->m_flags & M_EXT) == 0) {
     61 			if (mprev && (mprev->m_flags & M_EXT) &&
     62 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
     63 				/* XXX: this ignores mbuf types */
     64 				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
     65 				       mtod(m, caddr_t), m->m_len);
     66 				mprev->m_len += m->m_len;
     67 				mprev->m_next = m->m_next;	/* unlink from chain */
     68 				m_free(m);			/* reclaim mbuf */
     69 				newipsecstat.ips_mbcoalesced++;
     70 			} else {
     71 				mprev = m;
     72 			}
     73 			continue;
     74 		}
     75 		/*
     76 		 * Writable mbufs are left alone (for now).  Note
     77 		 * that for 4.x systems it's not possible to identify
     78 		 * whether or not mbufs with external buffers are
     79 		 * writable unless they use clusters.
     80 		 */
     81 		if (M_EXT_WRITABLE(m)) {
     82 			mprev = m;
     83 			continue;
     84 		}
     85 
     86 		/*
     87 		 * Not writable, replace with a copy or coalesce with
     88 		 * the previous mbuf if possible (since we have to copy
     89 		 * it anyway, we try to reduce the number of mbufs and
     90 		 * clusters so that future work is easier).
     91 		 */
     92 		IPSEC_ASSERT(m->m_flags & M_EXT,
     93 			("m_clone: m_flags 0x%x", m->m_flags));
     94 		/* NB: we only coalesce into a cluster or larger */
     95 		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
     96 		    m->m_len <= M_TRAILINGSPACE(mprev)) {
     97 			/* XXX: this ignores mbuf types */
     98 			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
     99 			       mtod(m, caddr_t), m->m_len);
    100 			mprev->m_len += m->m_len;
    101 			mprev->m_next = m->m_next;	/* unlink from chain */
    102 			m_free(m);			/* reclaim mbuf */
    103 			newipsecstat.ips_clcoalesced++;
    104 			continue;
    105 		}
    106 
    107 		/*
    108 		 * Allocate new space to hold the copy...
    109 		 */
    110 		/* XXX why can M_PKTHDR be set past the first mbuf? */
    111 		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
    112 			/*
    113 			 * NB: if a packet header is present we must
    114 			 * allocate the mbuf separately from any cluster
    115 			 * because M_MOVE_PKTHDR will smash the data
    116 			 * pointer and drop the M_EXT marker.
    117 			 */
    118 			MGETHDR(n, M_DONTWAIT, m->m_type);
    119 			if (n == NULL) {
    120 				m_freem(m0);
    121 				return (NULL);
    122 			}
    123 			M_MOVE_PKTHDR(n, m);
    124 			MCLGET(n, M_DONTWAIT);
    125 			if ((n->m_flags & M_EXT) == 0) {
    126 				m_free(n);
    127 				m_freem(m0);
    128 				return (NULL);
    129 			}
    130 		} else {
    131 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
    132 			if (n == NULL) {
    133 				m_freem(m0);
    134 				return (NULL);
    135 			}
    136 		}
    137 		/*
    138 		 * ... and copy the data.  We deal with jumbo mbufs
    139 		 * (i.e. m_len > MCLBYTES) by splitting them into
    140 		 * clusters.  We could just malloc a buffer and make
    141 		 * it external but too many device drivers don't know
    142 		 * how to break up the non-contiguous memory when
    143 		 * doing DMA.
    144 		 */
    145 		len = m->m_len;
    146 		off = 0;
    147 		mfirst = n;
    148 		mlast = NULL;
    149 		for (;;) {
    150 			int cc = min(len, MCLBYTES);
    151 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
    152 			n->m_len = cc;
    153 			if (mlast != NULL)
    154 				mlast->m_next = n;
    155 			mlast = n;
    156 			newipsecstat.ips_clcopied++;
    157 
    158 			len -= cc;
    159 			if (len <= 0)
    160 				break;
    161 			off += cc;
    162 
    163 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
    164 			if (n == NULL) {
    165 				m_freem(mfirst);
    166 				m_freem(m0);
    167 				return (NULL);
    168 			}
    169 		}
    170 		n->m_next = m->m_next;
    171 		if (mprev == NULL)
    172 			m0 = mfirst;		/* new head of chain */
    173 		else
    174 			mprev->m_next = mfirst;	/* replace old mbuf */
    175 		m_free(m);			/* release old mbuf */
    176 		mprev = mfirst;
    177 	}
    178 	return (m0);
    179 }
    180 
    181 /*
    182  * Make space for a new header of length hlen at skip bytes
    183  * into the packet.  When doing this we allocate new mbufs only
    184  * when absolutely necessary.  The mbuf where the new header
    185  * is to go is returned together with an offset into the mbuf.
    186  * If NULL is returned then the mbuf chain may have been modified;
    187  * the caller is assumed to always free the chain.
    188  */
    189 struct mbuf *
    190 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
    191 {
    192 	struct mbuf *m;
    193 	unsigned remain;
    194 
    195 	IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
    196 	IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
    197 
    198 	for (m = m0; m && skip > m->m_len; m = m->m_next)
    199 		skip -= m->m_len;
    200 	if (m == NULL)
    201 		return (NULL);
    202 	/*
    203 	 * At this point skip is the offset into the mbuf m
    204 	 * where the new header should be placed.  Figure out
    205 	 * if there's space to insert the new header.  If so,
    206 	 * and copying the remainder makese sense then do so.
    207 	 * Otherwise insert a new mbuf in the chain, splitting
    208 	 * the contents of m as needed.
    209 	 */
    210 	remain = m->m_len - skip;		/* data to move */
    211 	if (hlen > M_TRAILINGSPACE(m)) {
    212 		struct mbuf *n;
    213 
    214 		/* XXX code doesn't handle clusters XXX */
    215 		IPSEC_ASSERT(remain < MLEN,
    216 			("m_makespace: remainder too big: %u", remain));
    217 		/*
    218 		 * Not enough space in m, split the contents
    219 		 * of m, inserting new mbufs as required.
    220 		 *
    221 		 * NB: this ignores mbuf types.
    222 		 */
    223 		MGET(n, M_DONTWAIT, MT_DATA);
    224 		if (n == NULL)
    225 			return (NULL);
    226 		n->m_next = m->m_next;		/* splice new mbuf */
    227 		m->m_next = n;
    228 		newipsecstat.ips_mbinserted++;
    229 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
    230 			/*
    231 			 * New header fits in the old mbuf if we copy
    232 			 * the remainder; just do the copy to the new
    233 			 * mbuf and we're good to go.
    234 			 */
    235 			memcpy(mtod(n, caddr_t),
    236 			       mtod(m, caddr_t) + skip, remain);
    237 			n->m_len = remain;
    238 			m->m_len = skip + hlen;
    239 			*off = skip;
    240 		} else {
    241 			/*
    242 			 * No space in the old mbuf for the new header.
    243 			 * Make space in the new mbuf and check the
    244 			 * remainder'd data fits too.  If not then we
    245 			 * must allocate an additional mbuf (yech).
    246 			 */
    247 			n->m_len = 0;
    248 			if (remain + hlen > M_TRAILINGSPACE(n)) {
    249 				struct mbuf *n2;
    250 
    251 				MGET(n2, M_DONTWAIT, MT_DATA);
    252 				/* NB: new mbuf is on chain, let caller free */
    253 				if (n2 == NULL)
    254 					return (NULL);
    255 				n2->m_len = 0;
    256 				memcpy(mtod(n2, caddr_t),
    257 				       mtod(m, caddr_t) + skip, remain);
    258 				n2->m_len = remain;
    259 				/* splice in second mbuf */
    260 				n2->m_next = n->m_next;
    261 				n->m_next = n2;
    262 				newipsecstat.ips_mbinserted++;
    263 			} else {
    264 				memcpy(mtod(n, caddr_t) + hlen,
    265 				       mtod(m, caddr_t) + skip, remain);
    266 				n->m_len += remain;
    267 			}
    268 			m->m_len -= remain;
    269 			n->m_len += hlen;
    270 			m = n;			/* header is at front ... */
    271 			*off = 0;		/* ... of new mbuf */
    272 		}
    273 	} else {
    274 		/*
    275 		 * Copy the remainder to the back of the mbuf
    276 		 * so there's space to write the new header.
    277 		 */
    278 		/* XXX can this be memcpy? does it handle overlap? */
    279 		ovbcopy(mtod(m, caddr_t) + skip,
    280 			mtod(m, caddr_t) + skip + hlen, remain);
    281 		m->m_len += hlen;
    282 		*off = skip;
    283 	}
    284 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
    285 	return m;
    286 }
    287 
    288 /*
    289  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
    290  * length is updated, and a pointer to the first byte of the padding
    291  * (which is guaranteed to be all in one mbuf) is returned.
    292  */
    293 caddr_t
    294 m_pad(struct mbuf *m, int n)
    295 {
    296 	register struct mbuf *m0, *m1;
    297 	register int len, pad;
    298 	caddr_t retval;
    299 
    300 	if (n <= 0) {  /* No stupid arguments. */
    301 		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
    302 		m_freem(m);
    303 		return NULL;
    304 	}
    305 
    306 	len = m->m_pkthdr.len;
    307 	pad = n;
    308 	m0 = m;
    309 
    310 	while (m0->m_len < len) {
    311 IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
    312 		len -= m0->m_len;
    313 		m0 = m0->m_next;
    314 	}
    315 
    316 	if (m0->m_len != len) {
    317 		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
    318 		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
    319 
    320 		m_freem(m);
    321 		return NULL;
    322 	}
    323 
    324 	/* Check for zero-length trailing mbufs, and find the last one. */
    325 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
    326 		if (m1->m_next->m_len != 0) {
    327 			DPRINTF(("m_pad: length mismatch (should be %d "
    328 			    "instead of %d)\n",
    329 			    m->m_pkthdr.len,
    330 			    m->m_pkthdr.len + m1->m_next->m_len));
    331 
    332 			m_freem(m);
    333 			return NULL;
    334 		}
    335 
    336 		m0 = m1->m_next;
    337 	}
    338 
    339 	if (pad > M_TRAILINGSPACE(m0)) {
    340 		/* Add an mbuf to the chain. */
    341 		MGET(m1, M_DONTWAIT, MT_DATA);
    342 		if (m1 == 0) {
    343 			m_freem(m0);
    344 			DPRINTF(("m_pad: unable to get extra mbuf\n"));
    345 			return NULL;
    346 		}
    347 
    348 		m0->m_next = m1;
    349 		m0 = m1;
    350 		m0->m_len = 0;
    351 	}
    352 
    353 	retval = m0->m_data + m0->m_len;
    354 	m0->m_len += pad;
    355 	m->m_pkthdr.len += pad;
    356 
    357 	return retval;
    358 }
    359 
    360 /*
    361  * Remove hlen data at offset skip in the packet.  This is used by
    362  * the protocols strip protocol headers and associated data (e.g. IV,
    363  * authenticator) on input.
    364  */
    365 int
    366 m_striphdr(struct mbuf *m, int skip, int hlen)
    367 {
    368 	struct mbuf *m1;
    369 	int roff;
    370 
    371 	/* Find beginning of header */
    372 	m1 = m_getptr(m, skip, &roff);
    373 	if (m1 == NULL)
    374 		return (EINVAL);
    375 
    376 	/* Remove the header and associated data from the mbuf. */
    377 	if (roff == 0) {
    378 		/* The header was at the beginning of the mbuf */
    379 		newipsecstat.ips_input_front++;
    380 		m_adj(m1, hlen);
    381 		if ((m1->m_flags & M_PKTHDR) == 0)
    382 			m->m_pkthdr.len -= hlen;
    383 	} else if (roff + hlen >= m1->m_len) {
    384 		struct mbuf *mo;
    385 
    386 		/*
    387 		 * Part or all of the header is at the end of this mbuf,
    388 		 * so first let's remove the remainder of the header from
    389 		 * the beginning of the remainder of the mbuf chain, if any.
    390 		 */
    391 		newipsecstat.ips_input_end++;
    392 		if (roff + hlen > m1->m_len) {
    393 			/* Adjust the next mbuf by the remainder */
    394 			m_adj(m1->m_next, roff + hlen - m1->m_len);
    395 
    396 			/* The second mbuf is guaranteed not to have a pkthdr... */
    397 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
    398 		}
    399 
    400 		/* Now, let's unlink the mbuf chain for a second...*/
    401 		mo = m1->m_next;
    402 		m1->m_next = NULL;
    403 
    404 		/* ...and trim the end of the first part of the chain...sick */
    405 		m_adj(m1, -(m1->m_len - roff));
    406 		if ((m1->m_flags & M_PKTHDR) == 0)
    407 			m->m_pkthdr.len -= (m1->m_len - roff);
    408 
    409 		/* Finally, let's relink */
    410 		m1->m_next = mo;
    411 	} else {
    412 		/*
    413 		 * The header lies in the "middle" of the mbuf; copy
    414 		 * the remainder of the mbuf down over the header.
    415 		 */
    416 		newipsecstat.ips_input_middle++;
    417 		ovbcopy(mtod(m1, u_char *) + roff + hlen,
    418 		      mtod(m1, u_char *) + roff,
    419 		      m1->m_len - (roff + hlen));
    420 		m1->m_len -= hlen;
    421 		m->m_pkthdr.len -= hlen;
    422 	}
    423 	return (0);
    424 }
    425 
    426 /*
    427  * Diagnostic routine to check mbuf alignment as required by the
    428  * crypto device drivers (that use DMA).
    429  */
    430 void
    431 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
    432 {
    433 	int roff;
    434 	struct mbuf *m = m_getptr(m0, off, &roff);
    435 	caddr_t addr;
    436 
    437 	if (m == NULL)
    438 		return;
    439 	printf("%s (off %u len %u): ", where, off, len);
    440 	addr = mtod(m, caddr_t) + roff;
    441 	do {
    442 		int mlen;
    443 
    444 		if (((uintptr_t) addr) & 3) {
    445 			printf("addr misaligned %p,", addr);
    446 			break;
    447 		}
    448 		mlen = m->m_len;
    449 		if (mlen > len)
    450 			mlen = len;
    451 		len -= mlen;
    452 		if (len && (mlen & 3)) {
    453 			printf("len mismatch %u,", mlen);
    454 			break;
    455 		}
    456 		m = m->m_next;
    457 		addr = m ? mtod(m, caddr_t) : NULL;
    458 	} while (m && len > 0);
    459 	for (m = m0; m; m = m->m_next)
    460 		printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
    461 	printf("\n");
    462 }
    463