ipsec_mbuf.c revision 1.4 1 /* $NetBSD: ipsec_mbuf.c,v 1.4 2004/03/01 23:24:10 thorpej Exp $ */
2 /*-
3 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.4 2004/03/01 23:24:10 thorpej Exp $");
32
33 /*
34 * IPsec-specific mbuf routines.
35 */
36
37 #ifdef __FreeBSD__
38 #include "opt_param.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45
46 #include <net/route.h>
47 #include <netinet/in.h>
48
49 #include <netipsec/ipsec.h>
50
51 #include <netipsec/ipsec_osdep.h>
52 #include <net/net_osdep.h>
53
54 extern struct mbuf *m_getptr(struct mbuf *, int, int *);
55
56 /*
57 * Create a writable copy of the mbuf chain. While doing this
58 * we compact the chain with a goal of producing a chain with
59 * at most two mbufs. The second mbuf in this chain is likely
60 * to be a cluster. The primary purpose of this work is to create
61 * a writable packet for encryption, compression, etc. The
62 * secondary goal is to linearize the data so the data can be
63 * passed to crypto hardware in the most efficient manner possible.
64 */
65 struct mbuf *
66 m_clone(struct mbuf *m0)
67 {
68 struct mbuf *m, *mprev;
69 struct mbuf *n, *mfirst, *mlast;
70 int len, off;
71
72 IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
73
74 mprev = NULL;
75 for (m = m0; m != NULL; m = mprev->m_next) {
76 /*
77 * Regular mbufs are ignored unless there's a cluster
78 * in front of it that we can use to coalesce. We do
79 * the latter mainly so later clusters can be coalesced
80 * also w/o having to handle them specially (i.e. convert
81 * mbuf+cluster -> cluster). This optimization is heavily
82 * influenced by the assumption that we're running over
83 * Ethernet where MCLBYTES is large enough that the max
84 * packet size will permit lots of coalescing into a
85 * single cluster. This in turn permits efficient
86 * crypto operations, especially when using hardware.
87 */
88 if ((m->m_flags & M_EXT) == 0) {
89 if (mprev && (mprev->m_flags & M_EXT) &&
90 m->m_len <= M_TRAILINGSPACE(mprev)) {
91 /* XXX: this ignores mbuf types */
92 memcpy(mtod(mprev, caddr_t) + mprev->m_len,
93 mtod(m, caddr_t), m->m_len);
94 mprev->m_len += m->m_len;
95 mprev->m_next = m->m_next; /* unlink from chain */
96 m_free(m); /* reclaim mbuf */
97 newipsecstat.ips_mbcoalesced++;
98 } else {
99 mprev = m;
100 }
101 continue;
102 }
103 /*
104 * Writable mbufs are left alone (for now). Note
105 * that for 4.x systems it's not possible to identify
106 * whether or not mbufs with external buffers are
107 * writable unless they use clusters.
108 */
109 if (M_EXT_WRITABLE(m)) {
110 mprev = m;
111 continue;
112 }
113
114 /*
115 * Not writable, replace with a copy or coalesce with
116 * the previous mbuf if possible (since we have to copy
117 * it anyway, we try to reduce the number of mbufs and
118 * clusters so that future work is easier).
119 */
120 IPSEC_ASSERT(m->m_flags & M_EXT,
121 ("m_clone: m_flags 0x%x", m->m_flags));
122 /* NB: we only coalesce into a cluster or larger */
123 if (mprev != NULL && (mprev->m_flags & M_EXT) &&
124 m->m_len <= M_TRAILINGSPACE(mprev)) {
125 /* XXX: this ignores mbuf types */
126 memcpy(mtod(mprev, caddr_t) + mprev->m_len,
127 mtod(m, caddr_t), m->m_len);
128 mprev->m_len += m->m_len;
129 mprev->m_next = m->m_next; /* unlink from chain */
130 m_free(m); /* reclaim mbuf */
131 newipsecstat.ips_clcoalesced++;
132 continue;
133 }
134
135 /*
136 * Allocate new space to hold the copy...
137 */
138 /* XXX why can M_PKTHDR be set past the first mbuf? */
139 if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
140 /*
141 * NB: if a packet header is present we must
142 * allocate the mbuf separately from any cluster
143 * because M_MOVE_PKTHDR will smash the data
144 * pointer and drop the M_EXT marker.
145 */
146 MGETHDR(n, M_DONTWAIT, m->m_type);
147 if (n == NULL) {
148 m_freem(m0);
149 return (NULL);
150 }
151 M_MOVE_PKTHDR(n, m);
152 MCLGET(n, M_DONTWAIT);
153 if ((n->m_flags & M_EXT) == 0) {
154 m_free(n);
155 m_freem(m0);
156 return (NULL);
157 }
158 } else {
159 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
160 if (n == NULL) {
161 m_freem(m0);
162 return (NULL);
163 }
164 }
165 /*
166 * ... and copy the data. We deal with jumbo mbufs
167 * (i.e. m_len > MCLBYTES) by splitting them into
168 * clusters. We could just malloc a buffer and make
169 * it external but too many device drivers don't know
170 * how to break up the non-contiguous memory when
171 * doing DMA.
172 */
173 len = m->m_len;
174 off = 0;
175 mfirst = n;
176 mlast = NULL;
177 for (;;) {
178 int cc = min(len, MCLBYTES);
179 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
180 n->m_len = cc;
181 if (mlast != NULL)
182 mlast->m_next = n;
183 mlast = n;
184 newipsecstat.ips_clcopied++;
185
186 len -= cc;
187 if (len <= 0)
188 break;
189 off += cc;
190
191 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
192 if (n == NULL) {
193 m_freem(mfirst);
194 m_freem(m0);
195 return (NULL);
196 }
197 }
198 n->m_next = m->m_next;
199 if (mprev == NULL)
200 m0 = mfirst; /* new head of chain */
201 else
202 mprev->m_next = mfirst; /* replace old mbuf */
203 m_free(m); /* release old mbuf */
204 mprev = mfirst;
205 }
206 return (m0);
207 }
208
209 /*
210 * Make space for a new header of length hlen at skip bytes
211 * into the packet. When doing this we allocate new mbufs only
212 * when absolutely necessary. The mbuf where the new header
213 * is to go is returned together with an offset into the mbuf.
214 * If NULL is returned then the mbuf chain may have been modified;
215 * the caller is assumed to always free the chain.
216 */
217 struct mbuf *
218 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
219 {
220 struct mbuf *m;
221 unsigned remain;
222
223 IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
224 IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
225
226 for (m = m0; m && skip > m->m_len; m = m->m_next)
227 skip -= m->m_len;
228 if (m == NULL)
229 return (NULL);
230 /*
231 * At this point skip is the offset into the mbuf m
232 * where the new header should be placed. Figure out
233 * if there's space to insert the new header. If so,
234 * and copying the remainder makese sense then do so.
235 * Otherwise insert a new mbuf in the chain, splitting
236 * the contents of m as needed.
237 */
238 remain = m->m_len - skip; /* data to move */
239 if (hlen > M_TRAILINGSPACE(m)) {
240 struct mbuf *n;
241
242 /* XXX code doesn't handle clusters XXX */
243 IPSEC_ASSERT(remain < MLEN,
244 ("m_makespace: remainder too big: %u", remain));
245 /*
246 * Not enough space in m, split the contents
247 * of m, inserting new mbufs as required.
248 *
249 * NB: this ignores mbuf types.
250 */
251 MGET(n, M_DONTWAIT, MT_DATA);
252 if (n == NULL)
253 return (NULL);
254 n->m_next = m->m_next; /* splice new mbuf */
255 m->m_next = n;
256 newipsecstat.ips_mbinserted++;
257 if (hlen <= M_TRAILINGSPACE(m) + remain) {
258 /*
259 * New header fits in the old mbuf if we copy
260 * the remainder; just do the copy to the new
261 * mbuf and we're good to go.
262 */
263 memcpy(mtod(n, caddr_t),
264 mtod(m, caddr_t) + skip, remain);
265 n->m_len = remain;
266 m->m_len = skip + hlen;
267 *off = skip;
268 } else {
269 /*
270 * No space in the old mbuf for the new header.
271 * Make space in the new mbuf and check the
272 * remainder'd data fits too. If not then we
273 * must allocate an additional mbuf (yech).
274 */
275 n->m_len = 0;
276 if (remain + hlen > M_TRAILINGSPACE(n)) {
277 struct mbuf *n2;
278
279 MGET(n2, M_DONTWAIT, MT_DATA);
280 /* NB: new mbuf is on chain, let caller free */
281 if (n2 == NULL)
282 return (NULL);
283 n2->m_len = 0;
284 memcpy(mtod(n2, caddr_t),
285 mtod(m, caddr_t) + skip, remain);
286 n2->m_len = remain;
287 /* splice in second mbuf */
288 n2->m_next = n->m_next;
289 n->m_next = n2;
290 newipsecstat.ips_mbinserted++;
291 } else {
292 memcpy(mtod(n, caddr_t) + hlen,
293 mtod(m, caddr_t) + skip, remain);
294 n->m_len += remain;
295 }
296 m->m_len -= remain;
297 n->m_len += hlen;
298 m = n; /* header is at front ... */
299 *off = 0; /* ... of new mbuf */
300 }
301 } else {
302 /*
303 * Copy the remainder to the back of the mbuf
304 * so there's space to write the new header.
305 */
306 /* XXX can this be memcpy? does it handle overlap? */
307 ovbcopy(mtod(m, caddr_t) + skip,
308 mtod(m, caddr_t) + skip + hlen, remain);
309 m->m_len += hlen;
310 *off = skip;
311 }
312 m0->m_pkthdr.len += hlen; /* adjust packet length */
313 return m;
314 }
315
316 /*
317 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
318 * length is updated, and a pointer to the first byte of the padding
319 * (which is guaranteed to be all in one mbuf) is returned.
320 */
321 caddr_t
322 m_pad(struct mbuf *m, int n)
323 {
324 register struct mbuf *m0, *m1;
325 register int len, pad;
326 caddr_t retval;
327
328 if (n <= 0) { /* No stupid arguments. */
329 DPRINTF(("m_pad: pad length invalid (%d)\n", n));
330 m_freem(m);
331 return NULL;
332 }
333
334 len = m->m_pkthdr.len;
335 pad = n;
336 m0 = m;
337
338 while (m0->m_len < len) {
339 IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
340 len -= m0->m_len;
341 m0 = m0->m_next;
342 }
343
344 if (m0->m_len != len) {
345 DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
346 m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
347
348 m_freem(m);
349 return NULL;
350 }
351
352 /* Check for zero-length trailing mbufs, and find the last one. */
353 for (m1 = m0; m1->m_next; m1 = m1->m_next) {
354 if (m1->m_next->m_len != 0) {
355 DPRINTF(("m_pad: length mismatch (should be %d "
356 "instead of %d)\n",
357 m->m_pkthdr.len,
358 m->m_pkthdr.len + m1->m_next->m_len));
359
360 m_freem(m);
361 return NULL;
362 }
363
364 m0 = m1->m_next;
365 }
366
367 if (pad > M_TRAILINGSPACE(m0)) {
368 /* Add an mbuf to the chain. */
369 MGET(m1, M_DONTWAIT, MT_DATA);
370 if (m1 == 0) {
371 m_freem(m0);
372 DPRINTF(("m_pad: unable to get extra mbuf\n"));
373 return NULL;
374 }
375
376 m0->m_next = m1;
377 m0 = m1;
378 m0->m_len = 0;
379 }
380
381 retval = m0->m_data + m0->m_len;
382 m0->m_len += pad;
383 m->m_pkthdr.len += pad;
384
385 return retval;
386 }
387
388 /*
389 * Remove hlen data at offset skip in the packet. This is used by
390 * the protocols strip protocol headers and associated data (e.g. IV,
391 * authenticator) on input.
392 */
393 int
394 m_striphdr(struct mbuf *m, int skip, int hlen)
395 {
396 struct mbuf *m1;
397 int roff;
398
399 /* Find beginning of header */
400 m1 = m_getptr(m, skip, &roff);
401 if (m1 == NULL)
402 return (EINVAL);
403
404 /* Remove the header and associated data from the mbuf. */
405 if (roff == 0) {
406 /* The header was at the beginning of the mbuf */
407 newipsecstat.ips_input_front++;
408 m_adj(m1, hlen);
409 if ((m1->m_flags & M_PKTHDR) == 0)
410 m->m_pkthdr.len -= hlen;
411 } else if (roff + hlen >= m1->m_len) {
412 struct mbuf *mo;
413
414 /*
415 * Part or all of the header is at the end of this mbuf,
416 * so first let's remove the remainder of the header from
417 * the beginning of the remainder of the mbuf chain, if any.
418 */
419 newipsecstat.ips_input_end++;
420 if (roff + hlen > m1->m_len) {
421 /* Adjust the next mbuf by the remainder */
422 m_adj(m1->m_next, roff + hlen - m1->m_len);
423
424 /* The second mbuf is guaranteed not to have a pkthdr... */
425 m->m_pkthdr.len -= (roff + hlen - m1->m_len);
426 }
427
428 /* Now, let's unlink the mbuf chain for a second...*/
429 mo = m1->m_next;
430 m1->m_next = NULL;
431
432 /* ...and trim the end of the first part of the chain...sick */
433 m_adj(m1, -(m1->m_len - roff));
434 if ((m1->m_flags & M_PKTHDR) == 0)
435 m->m_pkthdr.len -= (m1->m_len - roff);
436
437 /* Finally, let's relink */
438 m1->m_next = mo;
439 } else {
440 /*
441 * The header lies in the "middle" of the mbuf; copy
442 * the remainder of the mbuf down over the header.
443 */
444 newipsecstat.ips_input_middle++;
445 ovbcopy(mtod(m1, u_char *) + roff + hlen,
446 mtod(m1, u_char *) + roff,
447 m1->m_len - (roff + hlen));
448 m1->m_len -= hlen;
449 m->m_pkthdr.len -= hlen;
450 }
451 return (0);
452 }
453
454 /*
455 * Diagnostic routine to check mbuf alignment as required by the
456 * crypto device drivers (that use DMA).
457 */
458 void
459 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
460 {
461 int roff;
462 struct mbuf *m = m_getptr(m0, off, &roff);
463 caddr_t addr;
464
465 if (m == NULL)
466 return;
467 printf("%s (off %u len %u): ", where, off, len);
468 addr = mtod(m, caddr_t) + roff;
469 do {
470 int mlen;
471
472 if (((uintptr_t) addr) & 3) {
473 printf("addr misaligned %p,", addr);
474 break;
475 }
476 mlen = m->m_len;
477 if (mlen > len)
478 mlen = len;
479 len -= mlen;
480 if (len && (mlen & 3)) {
481 printf("len mismatch %u,", mlen);
482 break;
483 }
484 m = m->m_next;
485 addr = m ? mtod(m, caddr_t) : NULL;
486 } while (m && len > 0);
487 for (m = m0; m; m = m->m_next)
488 printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
489 printf("\n");
490 }
491