ipsec_mbuf.c revision 1.2 1 /* $NetBSD: ipsec_mbuf.c,v 1.2 2003/08/13 20:13:59 jonathan Exp $ */
2 /* $FreeBSD: src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.1 2003/01/24 05:11:35 sam Exp $ */
3
4 #include <sys/cdefs.h>
5 __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.2 2003/08/13 20:13:59 jonathan Exp $");
6
7 /*
8 * IPsec-specific mbuf routines.
9 */
10
11 #include "opt_param.h"
12
13 #include <sys/param.h>
14 #include <sys/systm.h>
15 #include <sys/mbuf.h>
16 #include <sys/socket.h>
17
18 #include <net/route.h>
19 #include <netinet/in.h>
20
21 #include <netipsec/ipsec.h>
22
23 #include <netipsec/ipsec_osdep.h>
24 #include <net/net_osdep.h>
25
26 extern struct mbuf *m_getptr(struct mbuf *, int, int *);
27
28 /*
29 * Create a writable copy of the mbuf chain. While doing this
30 * we compact the chain with a goal of producing a chain with
31 * at most two mbufs. The second mbuf in this chain is likely
32 * to be a cluster. The primary purpose of this work is to create
33 * a writable packet for encryption, compression, etc. The
34 * secondary goal is to linearize the data so the data can be
35 * passed to crypto hardware in the most efficient manner possible.
36 */
37 struct mbuf *
38 m_clone(struct mbuf *m0)
39 {
40 struct mbuf *m, *mprev;
41 struct mbuf *n, *mfirst, *mlast;
42 int len, off;
43
44 IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf"));
45
46 mprev = NULL;
47 for (m = m0; m != NULL; m = mprev->m_next) {
48 /*
49 * Regular mbufs are ignored unless there's a cluster
50 * in front of it that we can use to coalesce. We do
51 * the latter mainly so later clusters can be coalesced
52 * also w/o having to handle them specially (i.e. convert
53 * mbuf+cluster -> cluster). This optimization is heavily
54 * influenced by the assumption that we're running over
55 * Ethernet where MCLBYTES is large enough that the max
56 * packet size will permit lots of coalescing into a
57 * single cluster. This in turn permits efficient
58 * crypto operations, especially when using hardware.
59 */
60 if ((m->m_flags & M_EXT) == 0) {
61 if (mprev && (mprev->m_flags & M_EXT) &&
62 m->m_len <= M_TRAILINGSPACE(mprev)) {
63 /* XXX: this ignores mbuf types */
64 memcpy(mtod(mprev, caddr_t) + mprev->m_len,
65 mtod(m, caddr_t), m->m_len);
66 mprev->m_len += m->m_len;
67 mprev->m_next = m->m_next; /* unlink from chain */
68 m_free(m); /* reclaim mbuf */
69 newipsecstat.ips_mbcoalesced++;
70 } else {
71 mprev = m;
72 }
73 continue;
74 }
75 /*
76 * Writable mbufs are left alone (for now). Note
77 * that for 4.x systems it's not possible to identify
78 * whether or not mbufs with external buffers are
79 * writable unless they use clusters.
80 */
81 if (M_EXT_WRITABLE(m)) {
82 mprev = m;
83 continue;
84 }
85
86 /*
87 * Not writable, replace with a copy or coalesce with
88 * the previous mbuf if possible (since we have to copy
89 * it anyway, we try to reduce the number of mbufs and
90 * clusters so that future work is easier).
91 */
92 IPSEC_ASSERT(m->m_flags & M_EXT,
93 ("m_clone: m_flags 0x%x", m->m_flags));
94 /* NB: we only coalesce into a cluster or larger */
95 if (mprev != NULL && (mprev->m_flags & M_EXT) &&
96 m->m_len <= M_TRAILINGSPACE(mprev)) {
97 /* XXX: this ignores mbuf types */
98 memcpy(mtod(mprev, caddr_t) + mprev->m_len,
99 mtod(m, caddr_t), m->m_len);
100 mprev->m_len += m->m_len;
101 mprev->m_next = m->m_next; /* unlink from chain */
102 m_free(m); /* reclaim mbuf */
103 newipsecstat.ips_clcoalesced++;
104 continue;
105 }
106
107 /*
108 * Allocate new space to hold the copy...
109 */
110 /* XXX why can M_PKTHDR be set past the first mbuf? */
111 if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
112 /*
113 * NB: if a packet header is present we must
114 * allocate the mbuf separately from any cluster
115 * because M_MOVE_PKTHDR will smash the data
116 * pointer and drop the M_EXT marker.
117 */
118 MGETHDR(n, M_DONTWAIT, m->m_type);
119 if (n == NULL) {
120 m_freem(m0);
121 return (NULL);
122 }
123 M_MOVE_PKTHDR(n, m);
124 MCLGET(n, M_DONTWAIT);
125 if ((n->m_flags & M_EXT) == 0) {
126 m_free(n);
127 m_freem(m0);
128 return (NULL);
129 }
130 } else {
131 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
132 if (n == NULL) {
133 m_freem(m0);
134 return (NULL);
135 }
136 }
137 /*
138 * ... and copy the data. We deal with jumbo mbufs
139 * (i.e. m_len > MCLBYTES) by splitting them into
140 * clusters. We could just malloc a buffer and make
141 * it external but too many device drivers don't know
142 * how to break up the non-contiguous memory when
143 * doing DMA.
144 */
145 len = m->m_len;
146 off = 0;
147 mfirst = n;
148 mlast = NULL;
149 for (;;) {
150 int cc = min(len, MCLBYTES);
151 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
152 n->m_len = cc;
153 if (mlast != NULL)
154 mlast->m_next = n;
155 mlast = n;
156 newipsecstat.ips_clcopied++;
157
158 len -= cc;
159 if (len <= 0)
160 break;
161 off += cc;
162
163 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
164 if (n == NULL) {
165 m_freem(mfirst);
166 m_freem(m0);
167 return (NULL);
168 }
169 }
170 n->m_next = m->m_next;
171 if (mprev == NULL)
172 m0 = mfirst; /* new head of chain */
173 else
174 mprev->m_next = mfirst; /* replace old mbuf */
175 m_free(m); /* release old mbuf */
176 mprev = mfirst;
177 }
178 return (m0);
179 }
180
181 /*
182 * Make space for a new header of length hlen at skip bytes
183 * into the packet. When doing this we allocate new mbufs only
184 * when absolutely necessary. The mbuf where the new header
185 * is to go is returned together with an offset into the mbuf.
186 * If NULL is returned then the mbuf chain may have been modified;
187 * the caller is assumed to always free the chain.
188 */
189 struct mbuf *
190 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
191 {
192 struct mbuf *m;
193 unsigned remain;
194
195 IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
196 IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
197
198 for (m = m0; m && skip > m->m_len; m = m->m_next)
199 skip -= m->m_len;
200 if (m == NULL)
201 return (NULL);
202 /*
203 * At this point skip is the offset into the mbuf m
204 * where the new header should be placed. Figure out
205 * if there's space to insert the new header. If so,
206 * and copying the remainder makese sense then do so.
207 * Otherwise insert a new mbuf in the chain, splitting
208 * the contents of m as needed.
209 */
210 remain = m->m_len - skip; /* data to move */
211 if (hlen > M_TRAILINGSPACE(m)) {
212 struct mbuf *n;
213
214 /* XXX code doesn't handle clusters XXX */
215 IPSEC_ASSERT(remain < MLEN,
216 ("m_makespace: remainder too big: %u", remain));
217 /*
218 * Not enough space in m, split the contents
219 * of m, inserting new mbufs as required.
220 *
221 * NB: this ignores mbuf types.
222 */
223 MGET(n, M_DONTWAIT, MT_DATA);
224 if (n == NULL)
225 return (NULL);
226 n->m_next = m->m_next; /* splice new mbuf */
227 m->m_next = n;
228 newipsecstat.ips_mbinserted++;
229 if (hlen <= M_TRAILINGSPACE(m) + remain) {
230 /*
231 * New header fits in the old mbuf if we copy
232 * the remainder; just do the copy to the new
233 * mbuf and we're good to go.
234 */
235 memcpy(mtod(n, caddr_t),
236 mtod(m, caddr_t) + skip, remain);
237 n->m_len = remain;
238 m->m_len = skip + hlen;
239 *off = skip;
240 } else {
241 /*
242 * No space in the old mbuf for the new header.
243 * Make space in the new mbuf and check the
244 * remainder'd data fits too. If not then we
245 * must allocate an additional mbuf (yech).
246 */
247 n->m_len = 0;
248 if (remain + hlen > M_TRAILINGSPACE(n)) {
249 struct mbuf *n2;
250
251 MGET(n2, M_DONTWAIT, MT_DATA);
252 /* NB: new mbuf is on chain, let caller free */
253 if (n2 == NULL)
254 return (NULL);
255 n2->m_len = 0;
256 memcpy(mtod(n2, caddr_t),
257 mtod(m, caddr_t) + skip, remain);
258 n2->m_len = remain;
259 /* splice in second mbuf */
260 n2->m_next = n->m_next;
261 n->m_next = n2;
262 newipsecstat.ips_mbinserted++;
263 } else {
264 memcpy(mtod(n, caddr_t) + hlen,
265 mtod(m, caddr_t) + skip, remain);
266 n->m_len += remain;
267 }
268 m->m_len -= remain;
269 n->m_len += hlen;
270 m = n; /* header is at front ... */
271 *off = 0; /* ... of new mbuf */
272 }
273 } else {
274 /*
275 * Copy the remainder to the back of the mbuf
276 * so there's space to write the new header.
277 */
278 /* XXX can this be memcpy? does it handle overlap? */
279 ovbcopy(mtod(m, caddr_t) + skip,
280 mtod(m, caddr_t) + skip + hlen, remain);
281 m->m_len += hlen;
282 *off = skip;
283 }
284 m0->m_pkthdr.len += hlen; /* adjust packet length */
285 return m;
286 }
287
288 /*
289 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
290 * length is updated, and a pointer to the first byte of the padding
291 * (which is guaranteed to be all in one mbuf) is returned.
292 */
293 caddr_t
294 m_pad(struct mbuf *m, int n)
295 {
296 register struct mbuf *m0, *m1;
297 register int len, pad;
298 caddr_t retval;
299
300 if (n <= 0) { /* No stupid arguments. */
301 DPRINTF(("m_pad: pad length invalid (%d)\n", n));
302 m_freem(m);
303 return NULL;
304 }
305
306 len = m->m_pkthdr.len;
307 pad = n;
308 m0 = m;
309
310 while (m0->m_len < len) {
311 IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
312 len -= m0->m_len;
313 m0 = m0->m_next;
314 }
315
316 if (m0->m_len != len) {
317 DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
318 m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
319
320 m_freem(m);
321 return NULL;
322 }
323
324 /* Check for zero-length trailing mbufs, and find the last one. */
325 for (m1 = m0; m1->m_next; m1 = m1->m_next) {
326 if (m1->m_next->m_len != 0) {
327 DPRINTF(("m_pad: length mismatch (should be %d "
328 "instead of %d)\n",
329 m->m_pkthdr.len,
330 m->m_pkthdr.len + m1->m_next->m_len));
331
332 m_freem(m);
333 return NULL;
334 }
335
336 m0 = m1->m_next;
337 }
338
339 if (pad > M_TRAILINGSPACE(m0)) {
340 /* Add an mbuf to the chain. */
341 MGET(m1, M_DONTWAIT, MT_DATA);
342 if (m1 == 0) {
343 m_freem(m0);
344 DPRINTF(("m_pad: unable to get extra mbuf\n"));
345 return NULL;
346 }
347
348 m0->m_next = m1;
349 m0 = m1;
350 m0->m_len = 0;
351 }
352
353 retval = m0->m_data + m0->m_len;
354 m0->m_len += pad;
355 m->m_pkthdr.len += pad;
356
357 return retval;
358 }
359
360 /*
361 * Remove hlen data at offset skip in the packet. This is used by
362 * the protocols strip protocol headers and associated data (e.g. IV,
363 * authenticator) on input.
364 */
365 int
366 m_striphdr(struct mbuf *m, int skip, int hlen)
367 {
368 struct mbuf *m1;
369 int roff;
370
371 /* Find beginning of header */
372 m1 = m_getptr(m, skip, &roff);
373 if (m1 == NULL)
374 return (EINVAL);
375
376 /* Remove the header and associated data from the mbuf. */
377 if (roff == 0) {
378 /* The header was at the beginning of the mbuf */
379 newipsecstat.ips_input_front++;
380 m_adj(m1, hlen);
381 if ((m1->m_flags & M_PKTHDR) == 0)
382 m->m_pkthdr.len -= hlen;
383 } else if (roff + hlen >= m1->m_len) {
384 struct mbuf *mo;
385
386 /*
387 * Part or all of the header is at the end of this mbuf,
388 * so first let's remove the remainder of the header from
389 * the beginning of the remainder of the mbuf chain, if any.
390 */
391 newipsecstat.ips_input_end++;
392 if (roff + hlen > m1->m_len) {
393 /* Adjust the next mbuf by the remainder */
394 m_adj(m1->m_next, roff + hlen - m1->m_len);
395
396 /* The second mbuf is guaranteed not to have a pkthdr... */
397 m->m_pkthdr.len -= (roff + hlen - m1->m_len);
398 }
399
400 /* Now, let's unlink the mbuf chain for a second...*/
401 mo = m1->m_next;
402 m1->m_next = NULL;
403
404 /* ...and trim the end of the first part of the chain...sick */
405 m_adj(m1, -(m1->m_len - roff));
406 if ((m1->m_flags & M_PKTHDR) == 0)
407 m->m_pkthdr.len -= (m1->m_len - roff);
408
409 /* Finally, let's relink */
410 m1->m_next = mo;
411 } else {
412 /*
413 * The header lies in the "middle" of the mbuf; copy
414 * the remainder of the mbuf down over the header.
415 */
416 newipsecstat.ips_input_middle++;
417 ovbcopy(mtod(m1, u_char *) + roff + hlen,
418 mtod(m1, u_char *) + roff,
419 m1->m_len - (roff + hlen));
420 m1->m_len -= hlen;
421 m->m_pkthdr.len -= hlen;
422 }
423 return (0);
424 }
425
426 /*
427 * Diagnostic routine to check mbuf alignment as required by the
428 * crypto device drivers (that use DMA).
429 */
430 void
431 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
432 {
433 int roff;
434 struct mbuf *m = m_getptr(m0, off, &roff);
435 caddr_t addr;
436
437 if (m == NULL)
438 return;
439 printf("%s (off %u len %u): ", where, off, len);
440 addr = mtod(m, caddr_t) + roff;
441 do {
442 int mlen;
443
444 if (((uintptr_t) addr) & 3) {
445 printf("addr misaligned %p,", addr);
446 break;
447 }
448 mlen = m->m_len;
449 if (mlen > len)
450 mlen = len;
451 len -= mlen;
452 if (len && (mlen & 3)) {
453 printf("len mismatch %u,", mlen);
454 break;
455 }
456 m = m->m_next;
457 addr = m ? mtod(m, caddr_t) : NULL;
458 } while (m && len > 0);
459 for (m = m0; m; m = m->m_next)
460 printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
461 printf("\n");
462 }
463