uipc_mbuf.c revision 1.30 1 /* $NetBSD: uipc_mbuf.c,v 1.30 1998/08/04 04:03:17 perry Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95
36 */
37
38 #include "opt_uvm.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/malloc.h>
44 #include <sys/map.h>
45 #define MBTYPES
46 #include <sys/mbuf.h>
47 #include <sys/kernel.h>
48 #include <sys/syslog.h>
49 #include <sys/domain.h>
50 #include <sys/protosw.h>
51 #include <sys/pool.h>
52 #include <sys/socket.h>
53 #include <net/if.h>
54
55 #include <vm/vm.h>
56
57 #if defined(UVM)
58 #include <uvm/uvm_extern.h>
59 #endif
60
61 struct pool mbpool; /* mbuf pool */
62 struct pool mclpool; /* mbuf cluster pool */
63
64 struct mbuf *mbutl;
65 struct mbstat mbstat;
66 union mcluster *mclfree;
67 int max_linkhdr;
68 int max_protohdr;
69 int max_hdr;
70 int max_datalen;
71
72 extern vm_map_t mb_map;
73
74 void *mclpool_alloc __P((unsigned long, int, int));
75 void mclpool_release __P((void *, unsigned long, int));
76
77 /*
78 * Initialize the mbuf allcator. Note, this cannot allocate any
79 * memory itself; we are called before mb_map has been allocated.
80 */
81 void
82 mbinit()
83 {
84
85 /* XXX malloc types! */
86 pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", 0, NULL, NULL, 0);
87 pool_init(&mclpool, MCLBYTES, 0, 0, 0, "mclpl", 0, mclpool_alloc,
88 mclpool_release, 0);
89 }
90
91 void *
92 mclpool_alloc(sz, flags, mtype)
93 unsigned long sz;
94 int flags;
95 int mtype;
96 {
97
98 #if defined(UVM)
99 return ((void *)uvm_km_alloc_poolpage1(mb_map, uvmexp.mb_object));
100 #else
101 return ((void *)kmem_alloc_poolpage1(mb_map));
102 #endif
103 }
104
105 void
106 mclpool_release(v, sz, mtype)
107 void *v;
108 unsigned long sz;
109 int mtype;
110 {
111
112 #if defined(UVM)
113 uvm_km_free_poolpage1(mb_map, (vm_offset_t)v);
114 #else
115 kmem_free_poolpage1(mb_map, (vm_offset_t)v);
116 #endif
117 }
118
119 /*
120 * When MGET failes, ask protocols to free space when short of memory,
121 * then re-attempt to allocate an mbuf.
122 */
123 struct mbuf *
124 m_retry(i, t)
125 int i, t;
126 {
127 struct mbuf *m;
128
129 m_reclaim(i);
130 #define m_retry(i, t) (struct mbuf *)0
131 MGET(m, i, t);
132 #undef m_retry
133 if (m != NULL)
134 mbstat.m_wait++;
135 else
136 mbstat.m_drops++;
137 return (m);
138 }
139
140 /*
141 * As above; retry an MGETHDR.
142 */
143 struct mbuf *
144 m_retryhdr(i, t)
145 int i, t;
146 {
147 struct mbuf *m;
148
149 m_reclaim(i);
150 #define m_retryhdr(i, t) (struct mbuf *)0
151 MGETHDR(m, i, t);
152 #undef m_retryhdr
153 if (m != NULL)
154 mbstat.m_wait++;
155 else
156 mbstat.m_drops++;
157 return (m);
158 }
159
160 void
161 m_reclaim(how)
162 int how;
163 {
164 struct domain *dp;
165 struct protosw *pr;
166 struct ifnet *ifp;
167 int s = splimp();
168
169 /*
170 * Don't call the protocol drain routines if how == M_NOWAIT, which
171 * typically means we're in interrupt context. Since we can be
172 * called from a network hardware interrupt, we could corrupt the
173 * protocol queues we try to drain them at that time.
174 */
175 if (how == M_WAIT) {
176 for (dp = domains; dp; dp = dp->dom_next)
177 for (pr = dp->dom_protosw;
178 pr < dp->dom_protoswNPROTOSW; pr++)
179 if (pr->pr_drain)
180 (*pr->pr_drain)();
181 }
182 for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
183 if (ifp->if_drain)
184 (*ifp->if_drain)(ifp);
185 splx(s);
186 mbstat.m_drain++;
187 }
188
189 /*
190 * Space allocation routines.
191 * These are also available as macros
192 * for critical paths.
193 */
194 struct mbuf *
195 m_get(nowait, type)
196 int nowait, type;
197 {
198 struct mbuf *m;
199
200 MGET(m, nowait, type);
201 return (m);
202 }
203
204 struct mbuf *
205 m_gethdr(nowait, type)
206 int nowait, type;
207 {
208 struct mbuf *m;
209
210 MGETHDR(m, nowait, type);
211 return (m);
212 }
213
214 struct mbuf *
215 m_getclr(nowait, type)
216 int nowait, type;
217 {
218 struct mbuf *m;
219
220 MGET(m, nowait, type);
221 if (m == 0)
222 return (0);
223 memset(mtod(m, caddr_t), 0, MLEN);
224 return (m);
225 }
226
227 struct mbuf *
228 m_free(m)
229 struct mbuf *m;
230 {
231 struct mbuf *n;
232
233 MFREE(m, n);
234 return (n);
235 }
236
237 void
238 m_freem(m)
239 struct mbuf *m;
240 {
241 struct mbuf *n;
242
243 if (m == NULL)
244 return;
245 do {
246 MFREE(m, n);
247 m = n;
248 } while (m);
249 }
250
251 /*
252 * Mbuffer utility routines.
253 */
254
255 /*
256 * Lesser-used path for M_PREPEND:
257 * allocate new mbuf to prepend to chain,
258 * copy junk along.
259 */
260 struct mbuf *
261 m_prepend(m, len, how)
262 struct mbuf *m;
263 int len, how;
264 {
265 struct mbuf *mn;
266
267 MGET(mn, how, m->m_type);
268 if (mn == (struct mbuf *)NULL) {
269 m_freem(m);
270 return ((struct mbuf *)NULL);
271 }
272 if (m->m_flags & M_PKTHDR) {
273 M_COPY_PKTHDR(mn, m);
274 m->m_flags &= ~M_PKTHDR;
275 }
276 mn->m_next = m;
277 m = mn;
278 if (len < MHLEN)
279 MH_ALIGN(m, len);
280 m->m_len = len;
281 return (m);
282 }
283
284 /*
285 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
286 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
287 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
288 */
289 int MCFail;
290
291 struct mbuf *
292 m_copym(m, off0, len, wait)
293 struct mbuf *m;
294 int off0, wait;
295 int len;
296 {
297 struct mbuf *n, **np;
298 int off = off0;
299 struct mbuf *top;
300 int copyhdr = 0;
301
302 if (off < 0 || len < 0)
303 panic("m_copym");
304 if (off == 0 && m->m_flags & M_PKTHDR)
305 copyhdr = 1;
306 while (off > 0) {
307 if (m == 0)
308 panic("m_copym");
309 if (off < m->m_len)
310 break;
311 off -= m->m_len;
312 m = m->m_next;
313 }
314 np = ⊤
315 top = 0;
316 while (len > 0) {
317 if (m == 0) {
318 if (len != M_COPYALL)
319 panic("m_copym");
320 break;
321 }
322 MGET(n, wait, m->m_type);
323 *np = n;
324 if (n == 0)
325 goto nospace;
326 if (copyhdr) {
327 M_COPY_PKTHDR(n, m);
328 if (len == M_COPYALL)
329 n->m_pkthdr.len -= off0;
330 else
331 n->m_pkthdr.len = len;
332 copyhdr = 0;
333 }
334 n->m_len = min(len, m->m_len - off);
335 if (m->m_flags & M_EXT) {
336 n->m_data = m->m_data + off;
337 n->m_ext = m->m_ext;
338 MCLADDREFERENCE(m, n);
339 } else
340 memcpy(mtod(n, caddr_t), mtod(m, caddr_t)+off,
341 (unsigned)n->m_len);
342 if (len != M_COPYALL)
343 len -= n->m_len;
344 off = 0;
345 m = m->m_next;
346 np = &n->m_next;
347 }
348 if (top == 0)
349 MCFail++;
350 return (top);
351 nospace:
352 m_freem(top);
353 MCFail++;
354 return (0);
355 }
356
357 /*
358 * Copy an entire packet, including header (which must be present).
359 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
360 */
361 struct mbuf *
362 m_copypacket(m, how)
363 struct mbuf *m;
364 int how;
365 {
366 struct mbuf *top, *n, *o;
367
368 MGET(n, how, m->m_type);
369 top = n;
370 if (!n)
371 goto nospace;
372
373 M_COPY_PKTHDR(n, m);
374 n->m_len = m->m_len;
375 if (m->m_flags & M_EXT) {
376 n->m_data = m->m_data;
377 n->m_ext = m->m_ext;
378 MCLADDREFERENCE(m, n);
379 } else {
380 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
381 }
382
383 m = m->m_next;
384 while (m) {
385 MGET(o, how, m->m_type);
386 if (!o)
387 goto nospace;
388
389 n->m_next = o;
390 n = n->m_next;
391
392 n->m_len = m->m_len;
393 if (m->m_flags & M_EXT) {
394 n->m_data = m->m_data;
395 n->m_ext = m->m_ext;
396 MCLADDREFERENCE(m, n);
397 } else {
398 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
399 }
400
401 m = m->m_next;
402 }
403 return top;
404 nospace:
405 m_freem(top);
406 MCFail++;
407 return 0;
408 }
409
410 /*
411 * Copy data from an mbuf chain starting "off" bytes from the beginning,
412 * continuing for "len" bytes, into the indicated buffer.
413 */
414 void
415 m_copydata(m, off, len, cp)
416 struct mbuf *m;
417 int off;
418 int len;
419 caddr_t cp;
420 {
421 unsigned count;
422
423 if (off < 0 || len < 0)
424 panic("m_copydata");
425 while (off > 0) {
426 if (m == 0)
427 panic("m_copydata");
428 if (off < m->m_len)
429 break;
430 off -= m->m_len;
431 m = m->m_next;
432 }
433 while (len > 0) {
434 if (m == 0)
435 panic("m_copydata");
436 count = min(m->m_len - off, len);
437 memcpy(cp, mtod(m, caddr_t) + off, count);
438 len -= count;
439 cp += count;
440 off = 0;
441 m = m->m_next;
442 }
443 }
444
445 /*
446 * Concatenate mbuf chain n to m.
447 * Both chains must be of the same type (e.g. MT_DATA).
448 * Any m_pkthdr is not updated.
449 */
450 void
451 m_cat(m, n)
452 struct mbuf *m, *n;
453 {
454 while (m->m_next)
455 m = m->m_next;
456 while (n) {
457 if (m->m_flags & M_EXT ||
458 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
459 /* just join the two chains */
460 m->m_next = n;
461 return;
462 }
463 /* splat the data from one into the other */
464 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
465 (u_int)n->m_len);
466 m->m_len += n->m_len;
467 n = m_free(n);
468 }
469 }
470
471 void
472 m_adj(mp, req_len)
473 struct mbuf *mp;
474 int req_len;
475 {
476 int len = req_len;
477 struct mbuf *m;
478 int count;
479
480 if ((m = mp) == NULL)
481 return;
482 if (len >= 0) {
483 /*
484 * Trim from head.
485 */
486 while (m != NULL && len > 0) {
487 if (m->m_len <= len) {
488 len -= m->m_len;
489 m->m_len = 0;
490 m = m->m_next;
491 } else {
492 m->m_len -= len;
493 m->m_data += len;
494 len = 0;
495 }
496 }
497 m = mp;
498 if (mp->m_flags & M_PKTHDR)
499 m->m_pkthdr.len -= (req_len - len);
500 } else {
501 /*
502 * Trim from tail. Scan the mbuf chain,
503 * calculating its length and finding the last mbuf.
504 * If the adjustment only affects this mbuf, then just
505 * adjust and return. Otherwise, rescan and truncate
506 * after the remaining size.
507 */
508 len = -len;
509 count = 0;
510 for (;;) {
511 count += m->m_len;
512 if (m->m_next == (struct mbuf *)0)
513 break;
514 m = m->m_next;
515 }
516 if (m->m_len >= len) {
517 m->m_len -= len;
518 if (mp->m_flags & M_PKTHDR)
519 mp->m_pkthdr.len -= len;
520 return;
521 }
522 count -= len;
523 if (count < 0)
524 count = 0;
525 /*
526 * Correct length for chain is "count".
527 * Find the mbuf with last data, adjust its length,
528 * and toss data from remaining mbufs on chain.
529 */
530 m = mp;
531 if (m->m_flags & M_PKTHDR)
532 m->m_pkthdr.len = count;
533 for (; m; m = m->m_next) {
534 if (m->m_len >= count) {
535 m->m_len = count;
536 break;
537 }
538 count -= m->m_len;
539 }
540 while (m->m_next)
541 (m = m->m_next) ->m_len = 0;
542 }
543 }
544
545 /*
546 * Rearange an mbuf chain so that len bytes are contiguous
547 * and in the data area of an mbuf (so that mtod and dtom
548 * will work for a structure of size len). Returns the resulting
549 * mbuf chain on success, frees it and returns null on failure.
550 * If there is room, it will add up to max_protohdr-len extra bytes to the
551 * contiguous region in an attempt to avoid being called next time.
552 */
553 int MPFail;
554
555 struct mbuf *
556 m_pullup(n, len)
557 struct mbuf *n;
558 int len;
559 {
560 struct mbuf *m;
561 int count;
562 int space;
563
564 /*
565 * If first mbuf has no cluster, and has room for len bytes
566 * without shifting current data, pullup into it,
567 * otherwise allocate a new mbuf to prepend to the chain.
568 */
569 if ((n->m_flags & M_EXT) == 0 &&
570 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
571 if (n->m_len >= len)
572 return (n);
573 m = n;
574 n = n->m_next;
575 len -= m->m_len;
576 } else {
577 if (len > MHLEN)
578 goto bad;
579 MGET(m, M_DONTWAIT, n->m_type);
580 if (m == 0)
581 goto bad;
582 m->m_len = 0;
583 if (n->m_flags & M_PKTHDR) {
584 M_COPY_PKTHDR(m, n);
585 n->m_flags &= ~M_PKTHDR;
586 }
587 }
588 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
589 do {
590 count = min(min(max(len, max_protohdr), space), n->m_len);
591 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
592 (unsigned)count);
593 len -= count;
594 m->m_len += count;
595 n->m_len -= count;
596 space -= count;
597 if (n->m_len)
598 n->m_data += count;
599 else
600 n = m_free(n);
601 } while (len > 0 && n);
602 if (len > 0) {
603 (void) m_free(m);
604 goto bad;
605 }
606 m->m_next = n;
607 return (m);
608 bad:
609 m_freem(n);
610 MPFail++;
611 return (0);
612 }
613
614 /*
615 * Partition an mbuf chain in two pieces, returning the tail --
616 * all but the first len0 bytes. In case of failure, it returns NULL and
617 * attempts to restore the chain to its original state.
618 */
619 struct mbuf *
620 m_split(m0, len0, wait)
621 struct mbuf *m0;
622 int len0, wait;
623 {
624 struct mbuf *m, *n;
625 unsigned len = len0, remain, len_save;
626
627 for (m = m0; m && len > m->m_len; m = m->m_next)
628 len -= m->m_len;
629 if (m == 0)
630 return (0);
631 remain = m->m_len - len;
632 if (m0->m_flags & M_PKTHDR) {
633 MGETHDR(n, wait, m0->m_type);
634 if (n == 0)
635 return (0);
636 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
637 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
638 len_save = m0->m_pkthdr.len;
639 m0->m_pkthdr.len = len0;
640 if (m->m_flags & M_EXT)
641 goto extpacket;
642 if (remain > MHLEN) {
643 /* m can't be the lead packet */
644 MH_ALIGN(n, 0);
645 n->m_next = m_split(m, len, wait);
646 if (n->m_next == 0) {
647 (void) m_free(n);
648 m0->m_pkthdr.len = len_save;
649 return (0);
650 } else
651 return (n);
652 } else
653 MH_ALIGN(n, remain);
654 } else if (remain == 0) {
655 n = m->m_next;
656 m->m_next = 0;
657 return (n);
658 } else {
659 MGET(n, wait, m->m_type);
660 if (n == 0)
661 return (0);
662 M_ALIGN(n, remain);
663 }
664 extpacket:
665 if (m->m_flags & M_EXT) {
666 n->m_ext = m->m_ext;
667 MCLADDREFERENCE(m, n);
668 n->m_data = m->m_data + len;
669 } else {
670 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
671 }
672 n->m_len = remain;
673 m->m_len = len;
674 n->m_next = m->m_next;
675 m->m_next = 0;
676 return (n);
677 }
678 /*
679 * Routine to copy from device local memory into mbufs.
680 */
681 struct mbuf *
682 m_devget(buf, totlen, off0, ifp, copy)
683 char *buf;
684 int totlen, off0;
685 struct ifnet *ifp;
686 void (*copy) __P((const void *from, void *to, size_t len));
687 {
688 struct mbuf *m;
689 struct mbuf *top = 0, **mp = ⊤
690 int off = off0, len;
691 char *cp;
692 char *epkt;
693
694 cp = buf;
695 epkt = cp + totlen;
696 if (off) {
697 /*
698 * If 'off' is non-zero, packet is trailer-encapsulated,
699 * so we have to skip the type and length fields.
700 */
701 cp += off + 2 * sizeof(u_int16_t);
702 totlen -= 2 * sizeof(u_int16_t);
703 }
704 MGETHDR(m, M_DONTWAIT, MT_DATA);
705 if (m == 0)
706 return (0);
707 m->m_pkthdr.rcvif = ifp;
708 m->m_pkthdr.len = totlen;
709 m->m_len = MHLEN;
710
711 while (totlen > 0) {
712 if (top) {
713 MGET(m, M_DONTWAIT, MT_DATA);
714 if (m == 0) {
715 m_freem(top);
716 return (0);
717 }
718 m->m_len = MLEN;
719 }
720 len = min(totlen, epkt - cp);
721 if (len >= MINCLSIZE) {
722 MCLGET(m, M_DONTWAIT);
723 if ((m->m_flags & M_EXT) == 0) {
724 m_free(m);
725 m_freem(top);
726 return (0);
727 }
728 m->m_len = len = min(len, MCLBYTES);
729 } else {
730 /*
731 * Place initial small packet/header at end of mbuf.
732 */
733 if (len < m->m_len) {
734 if (top == 0 && len + max_linkhdr <= m->m_len)
735 m->m_data += max_linkhdr;
736 m->m_len = len;
737 } else
738 len = m->m_len;
739 }
740 if (copy)
741 copy(cp, mtod(m, caddr_t), (size_t)len);
742 else
743 memcpy(mtod(m, caddr_t), cp, (size_t)len);
744 cp += len;
745 *mp = m;
746 mp = &m->m_next;
747 totlen -= len;
748 if (cp == epkt)
749 cp = buf;
750 }
751 return (top);
752 }
753
754 /*
755 * Copy data from a buffer back into the indicated mbuf chain,
756 * starting "off" bytes from the beginning, extending the mbuf
757 * chain if necessary.
758 */
759 void
760 m_copyback(m0, off, len, cp)
761 struct mbuf *m0;
762 int off;
763 int len;
764 caddr_t cp;
765 {
766 int mlen;
767 struct mbuf *m = m0, *n;
768 int totlen = 0;
769
770 if (m0 == 0)
771 return;
772 while (off > (mlen = m->m_len)) {
773 off -= mlen;
774 totlen += mlen;
775 if (m->m_next == 0) {
776 n = m_getclr(M_DONTWAIT, m->m_type);
777 if (n == 0)
778 goto out;
779 n->m_len = min(MLEN, len + off);
780 m->m_next = n;
781 }
782 m = m->m_next;
783 }
784 while (len > 0) {
785 mlen = min (m->m_len - off, len);
786 memcpy(mtod(m, caddr_t) + off, cp, (unsigned)mlen);
787 cp += mlen;
788 len -= mlen;
789 mlen += off;
790 off = 0;
791 totlen += mlen;
792 if (len == 0)
793 break;
794 if (m->m_next == 0) {
795 n = m_get(M_DONTWAIT, m->m_type);
796 if (n == 0)
797 break;
798 n->m_len = min(MLEN, len);
799 m->m_next = n;
800 }
801 m = m->m_next;
802 }
803 out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
804 m->m_pkthdr.len = totlen;
805 }
806