ipsec_mbuf.c revision 1.3 1 /* $NetBSD: ipsec_mbuf.c,v 1.3 2003/08/20 22:33:40 jonathan Exp $ */
2 /* $FreeBSD: src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.1 2003/01/24 05:11:35 sam Exp $ */
3
4 #include <sys/cdefs.h>
5 __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.3 2003/08/20 22:33:40 jonathan Exp $");
6
7 /*
8 * IPsec-specific mbuf routines.
9 */
10
11 #ifdef __FreeBSD__
12 #include "opt_param.h"
13 #endif
14
15 #include <sys/param.h>
16 #include <sys/systm.h>
17 #include <sys/mbuf.h>
18 #include <sys/socket.h>
19
20 #include <net/route.h>
21 #include <netinet/in.h>
22
23 #include <netipsec/ipsec.h>
24
25 #include <netipsec/ipsec_osdep.h>
26 #include <net/net_osdep.h>
27
28 extern struct mbuf *m_getptr(struct mbuf *, int, int *);
29
30 /*
31 * Create a writable copy of the mbuf chain. While doing this
32 * we compact the chain with a goal of producing a chain with
33 * at most two mbufs. The second mbuf in this chain is likely
34 * to be a cluster. The primary purpose of this work is to create
35 * a writable packet for encryption, compression, etc. The
36 * secondary goal is to linearize the data so the data can be
37 * passed to crypto hardware in the most efficient manner possible.
38 */
39 struct mbuf *
40 m_clone(struct mbuf *m0)
41 {
42 struct mbuf *m, *mprev;
43 struct mbuf *n, *mfirst, *mlast;
44 int len, off;
45
46 IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
47
48 mprev = NULL;
49 for (m = m0; m != NULL; m = mprev->m_next) {
50 /*
51 * Regular mbufs are ignored unless there's a cluster
52 * in front of it that we can use to coalesce. We do
53 * the latter mainly so later clusters can be coalesced
54 * also w/o having to handle them specially (i.e. convert
55 * mbuf+cluster -> cluster). This optimization is heavily
56 * influenced by the assumption that we're running over
57 * Ethernet where MCLBYTES is large enough that the max
58 * packet size will permit lots of coalescing into a
59 * single cluster. This in turn permits efficient
60 * crypto operations, especially when using hardware.
61 */
62 if ((m->m_flags & M_EXT) == 0) {
63 if (mprev && (mprev->m_flags & M_EXT) &&
64 m->m_len <= M_TRAILINGSPACE(mprev)) {
65 /* XXX: this ignores mbuf types */
66 memcpy(mtod(mprev, caddr_t) + mprev->m_len,
67 mtod(m, caddr_t), m->m_len);
68 mprev->m_len += m->m_len;
69 mprev->m_next = m->m_next; /* unlink from chain */
70 m_free(m); /* reclaim mbuf */
71 newipsecstat.ips_mbcoalesced++;
72 } else {
73 mprev = m;
74 }
75 continue;
76 }
77 /*
78 * Writable mbufs are left alone (for now). Note
79 * that for 4.x systems it's not possible to identify
80 * whether or not mbufs with external buffers are
81 * writable unless they use clusters.
82 */
83 if (M_EXT_WRITABLE(m)) {
84 mprev = m;
85 continue;
86 }
87
88 /*
89 * Not writable, replace with a copy or coalesce with
90 * the previous mbuf if possible (since we have to copy
91 * it anyway, we try to reduce the number of mbufs and
92 * clusters so that future work is easier).
93 */
94 IPSEC_ASSERT(m->m_flags & M_EXT,
95 ("m_clone: m_flags 0x%x", m->m_flags));
96 /* NB: we only coalesce into a cluster or larger */
97 if (mprev != NULL && (mprev->m_flags & M_EXT) &&
98 m->m_len <= M_TRAILINGSPACE(mprev)) {
99 /* XXX: this ignores mbuf types */
100 memcpy(mtod(mprev, caddr_t) + mprev->m_len,
101 mtod(m, caddr_t), m->m_len);
102 mprev->m_len += m->m_len;
103 mprev->m_next = m->m_next; /* unlink from chain */
104 m_free(m); /* reclaim mbuf */
105 newipsecstat.ips_clcoalesced++;
106 continue;
107 }
108
109 /*
110 * Allocate new space to hold the copy...
111 */
112 /* XXX why can M_PKTHDR be set past the first mbuf? */
113 if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
114 /*
115 * NB: if a packet header is present we must
116 * allocate the mbuf separately from any cluster
117 * because M_MOVE_PKTHDR will smash the data
118 * pointer and drop the M_EXT marker.
119 */
120 MGETHDR(n, M_DONTWAIT, m->m_type);
121 if (n == NULL) {
122 m_freem(m0);
123 return (NULL);
124 }
125 M_MOVE_PKTHDR(n, m);
126 MCLGET(n, M_DONTWAIT);
127 if ((n->m_flags & M_EXT) == 0) {
128 m_free(n);
129 m_freem(m0);
130 return (NULL);
131 }
132 } else {
133 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
134 if (n == NULL) {
135 m_freem(m0);
136 return (NULL);
137 }
138 }
139 /*
140 * ... and copy the data. We deal with jumbo mbufs
141 * (i.e. m_len > MCLBYTES) by splitting them into
142 * clusters. We could just malloc a buffer and make
143 * it external but too many device drivers don't know
144 * how to break up the non-contiguous memory when
145 * doing DMA.
146 */
147 len = m->m_len;
148 off = 0;
149 mfirst = n;
150 mlast = NULL;
151 for (;;) {
152 int cc = min(len, MCLBYTES);
153 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
154 n->m_len = cc;
155 if (mlast != NULL)
156 mlast->m_next = n;
157 mlast = n;
158 newipsecstat.ips_clcopied++;
159
160 len -= cc;
161 if (len <= 0)
162 break;
163 off += cc;
164
165 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
166 if (n == NULL) {
167 m_freem(mfirst);
168 m_freem(m0);
169 return (NULL);
170 }
171 }
172 n->m_next = m->m_next;
173 if (mprev == NULL)
174 m0 = mfirst; /* new head of chain */
175 else
176 mprev->m_next = mfirst; /* replace old mbuf */
177 m_free(m); /* release old mbuf */
178 mprev = mfirst;
179 }
180 return (m0);
181 }
182
183 /*
184 * Make space for a new header of length hlen at skip bytes
185 * into the packet. When doing this we allocate new mbufs only
186 * when absolutely necessary. The mbuf where the new header
187 * is to go is returned together with an offset into the mbuf.
188 * If NULL is returned then the mbuf chain may have been modified;
189 * the caller is assumed to always free the chain.
190 */
191 struct mbuf *
192 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
193 {
194 struct mbuf *m;
195 unsigned remain;
196
197 IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
198 IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
199
200 for (m = m0; m && skip > m->m_len; m = m->m_next)
201 skip -= m->m_len;
202 if (m == NULL)
203 return (NULL);
204 /*
205 * At this point skip is the offset into the mbuf m
206 * where the new header should be placed. Figure out
207 * if there's space to insert the new header. If so,
208 * and copying the remainder makese sense then do so.
209 * Otherwise insert a new mbuf in the chain, splitting
210 * the contents of m as needed.
211 */
212 remain = m->m_len - skip; /* data to move */
213 if (hlen > M_TRAILINGSPACE(m)) {
214 struct mbuf *n;
215
216 /* XXX code doesn't handle clusters XXX */
217 IPSEC_ASSERT(remain < MLEN,
218 ("m_makespace: remainder too big: %u", remain));
219 /*
220 * Not enough space in m, split the contents
221 * of m, inserting new mbufs as required.
222 *
223 * NB: this ignores mbuf types.
224 */
225 MGET(n, M_DONTWAIT, MT_DATA);
226 if (n == NULL)
227 return (NULL);
228 n->m_next = m->m_next; /* splice new mbuf */
229 m->m_next = n;
230 newipsecstat.ips_mbinserted++;
231 if (hlen <= M_TRAILINGSPACE(m) + remain) {
232 /*
233 * New header fits in the old mbuf if we copy
234 * the remainder; just do the copy to the new
235 * mbuf and we're good to go.
236 */
237 memcpy(mtod(n, caddr_t),
238 mtod(m, caddr_t) + skip, remain);
239 n->m_len = remain;
240 m->m_len = skip + hlen;
241 *off = skip;
242 } else {
243 /*
244 * No space in the old mbuf for the new header.
245 * Make space in the new mbuf and check the
246 * remainder'd data fits too. If not then we
247 * must allocate an additional mbuf (yech).
248 */
249 n->m_len = 0;
250 if (remain + hlen > M_TRAILINGSPACE(n)) {
251 struct mbuf *n2;
252
253 MGET(n2, M_DONTWAIT, MT_DATA);
254 /* NB: new mbuf is on chain, let caller free */
255 if (n2 == NULL)
256 return (NULL);
257 n2->m_len = 0;
258 memcpy(mtod(n2, caddr_t),
259 mtod(m, caddr_t) + skip, remain);
260 n2->m_len = remain;
261 /* splice in second mbuf */
262 n2->m_next = n->m_next;
263 n->m_next = n2;
264 newipsecstat.ips_mbinserted++;
265 } else {
266 memcpy(mtod(n, caddr_t) + hlen,
267 mtod(m, caddr_t) + skip, remain);
268 n->m_len += remain;
269 }
270 m->m_len -= remain;
271 n->m_len += hlen;
272 m = n; /* header is at front ... */
273 *off = 0; /* ... of new mbuf */
274 }
275 } else {
276 /*
277 * Copy the remainder to the back of the mbuf
278 * so there's space to write the new header.
279 */
280 /* XXX can this be memcpy? does it handle overlap? */
281 ovbcopy(mtod(m, caddr_t) + skip,
282 mtod(m, caddr_t) + skip + hlen, remain);
283 m->m_len += hlen;
284 *off = skip;
285 }
286 m0->m_pkthdr.len += hlen; /* adjust packet length */
287 return m;
288 }
289
290 /*
291 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
292 * length is updated, and a pointer to the first byte of the padding
293 * (which is guaranteed to be all in one mbuf) is returned.
294 */
295 caddr_t
296 m_pad(struct mbuf *m, int n)
297 {
298 register struct mbuf *m0, *m1;
299 register int len, pad;
300 caddr_t retval;
301
302 if (n <= 0) { /* No stupid arguments. */
303 DPRINTF(("m_pad: pad length invalid (%d)\n", n));
304 m_freem(m);
305 return NULL;
306 }
307
308 len = m->m_pkthdr.len;
309 pad = n;
310 m0 = m;
311
312 while (m0->m_len < len) {
313 IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
314 len -= m0->m_len;
315 m0 = m0->m_next;
316 }
317
318 if (m0->m_len != len) {
319 DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
320 m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
321
322 m_freem(m);
323 return NULL;
324 }
325
326 /* Check for zero-length trailing mbufs, and find the last one. */
327 for (m1 = m0; m1->m_next; m1 = m1->m_next) {
328 if (m1->m_next->m_len != 0) {
329 DPRINTF(("m_pad: length mismatch (should be %d "
330 "instead of %d)\n",
331 m->m_pkthdr.len,
332 m->m_pkthdr.len + m1->m_next->m_len));
333
334 m_freem(m);
335 return NULL;
336 }
337
338 m0 = m1->m_next;
339 }
340
341 if (pad > M_TRAILINGSPACE(m0)) {
342 /* Add an mbuf to the chain. */
343 MGET(m1, M_DONTWAIT, MT_DATA);
344 if (m1 == 0) {
345 m_freem(m0);
346 DPRINTF(("m_pad: unable to get extra mbuf\n"));
347 return NULL;
348 }
349
350 m0->m_next = m1;
351 m0 = m1;
352 m0->m_len = 0;
353 }
354
355 retval = m0->m_data + m0->m_len;
356 m0->m_len += pad;
357 m->m_pkthdr.len += pad;
358
359 return retval;
360 }
361
362 /*
363 * Remove hlen data at offset skip in the packet. This is used by
364 * the protocols strip protocol headers and associated data (e.g. IV,
365 * authenticator) on input.
366 */
367 int
368 m_striphdr(struct mbuf *m, int skip, int hlen)
369 {
370 struct mbuf *m1;
371 int roff;
372
373 /* Find beginning of header */
374 m1 = m_getptr(m, skip, &roff);
375 if (m1 == NULL)
376 return (EINVAL);
377
378 /* Remove the header and associated data from the mbuf. */
379 if (roff == 0) {
380 /* The header was at the beginning of the mbuf */
381 newipsecstat.ips_input_front++;
382 m_adj(m1, hlen);
383 if ((m1->m_flags & M_PKTHDR) == 0)
384 m->m_pkthdr.len -= hlen;
385 } else if (roff + hlen >= m1->m_len) {
386 struct mbuf *mo;
387
388 /*
389 * Part or all of the header is at the end of this mbuf,
390 * so first let's remove the remainder of the header from
391 * the beginning of the remainder of the mbuf chain, if any.
392 */
393 newipsecstat.ips_input_end++;
394 if (roff + hlen > m1->m_len) {
395 /* Adjust the next mbuf by the remainder */
396 m_adj(m1->m_next, roff + hlen - m1->m_len);
397
398 /* The second mbuf is guaranteed not to have a pkthdr... */
399 m->m_pkthdr.len -= (roff + hlen - m1->m_len);
400 }
401
402 /* Now, let's unlink the mbuf chain for a second...*/
403 mo = m1->m_next;
404 m1->m_next = NULL;
405
406 /* ...and trim the end of the first part of the chain...sick */
407 m_adj(m1, -(m1->m_len - roff));
408 if ((m1->m_flags & M_PKTHDR) == 0)
409 m->m_pkthdr.len -= (m1->m_len - roff);
410
411 /* Finally, let's relink */
412 m1->m_next = mo;
413 } else {
414 /*
415 * The header lies in the "middle" of the mbuf; copy
416 * the remainder of the mbuf down over the header.
417 */
418 newipsecstat.ips_input_middle++;
419 ovbcopy(mtod(m1, u_char *) + roff + hlen,
420 mtod(m1, u_char *) + roff,
421 m1->m_len - (roff + hlen));
422 m1->m_len -= hlen;
423 m->m_pkthdr.len -= hlen;
424 }
425 return (0);
426 }
427
428 /*
429 * Diagnostic routine to check mbuf alignment as required by the
430 * crypto device drivers (that use DMA).
431 */
432 void
433 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
434 {
435 int roff;
436 struct mbuf *m = m_getptr(m0, off, &roff);
437 caddr_t addr;
438
439 if (m == NULL)
440 return;
441 printf("%s (off %u len %u): ", where, off, len);
442 addr = mtod(m, caddr_t) + roff;
443 do {
444 int mlen;
445
446 if (((uintptr_t) addr) & 3) {
447 printf("addr misaligned %p,", addr);
448 break;
449 }
450 mlen = m->m_len;
451 if (mlen > len)
452 mlen = len;
453 len -= mlen;
454 if (len && (mlen & 3)) {
455 printf("len mismatch %u,", mlen);
456 break;
457 }
458 m = m->m_next;
459 addr = m ? mtod(m, caddr_t) : NULL;
460 } while (m && len > 0);
461 for (m = m0; m; m = m->m_next)
462 printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
463 printf("\n");
464 }
465