uipc_mbuf.c revision 1.32 1 /* $NetBSD: uipc_mbuf.c,v 1.32 1998/08/28 20:05:48 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95
36 */
37
38 #include "opt_uvm.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/malloc.h>
44 #include <sys/map.h>
45 #define MBTYPES
46 #include <sys/mbuf.h>
47 #include <sys/kernel.h>
48 #include <sys/syslog.h>
49 #include <sys/domain.h>
50 #include <sys/protosw.h>
51 #include <sys/pool.h>
52 #include <sys/socket.h>
53 #include <net/if.h>
54
55 #include <vm/vm.h>
56
57 #if defined(UVM)
58 #include <uvm/uvm_extern.h>
59 #endif
60
61 struct pool mbpool; /* mbuf pool */
62 struct pool mclpool; /* mbuf cluster pool */
63
64 struct mbuf *mbutl;
65 struct mbstat mbstat;
66 union mcluster *mclfree;
67 int max_linkhdr;
68 int max_protohdr;
69 int max_hdr;
70 int max_datalen;
71
72 extern vm_map_t mb_map;
73
74 void *mclpool_alloc __P((unsigned long, int, int));
75 void mclpool_release __P((void *, unsigned long, int));
76
77 /*
78 * Initialize the mbuf allcator. Note, this cannot allocate any
79 * memory itself; we are called before mb_map has been allocated.
80 */
81 void
82 mbinit()
83 {
84
85 /* XXX malloc types! */
86 pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", 0, NULL, NULL, 0);
87 pool_init(&mclpool, MCLBYTES, 0, 0, 0, "mclpl", 0, mclpool_alloc,
88 mclpool_release, 0);
89 }
90
91 void *
92 mclpool_alloc(sz, flags, mtype)
93 unsigned long sz;
94 int flags;
95 int mtype;
96 {
97 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
98
99 #if defined(UVM)
100 return ((void *)uvm_km_alloc_poolpage1(mb_map, uvmexp.mb_object,
101 waitok));
102 #else
103 return ((void *)kmem_alloc_poolpage1(mb_map, waitok));
104 #endif
105 }
106
107 void
108 mclpool_release(v, sz, mtype)
109 void *v;
110 unsigned long sz;
111 int mtype;
112 {
113
114 #if defined(UVM)
115 uvm_km_free_poolpage1(mb_map, (vaddr_t)v);
116 #else
117 kmem_free_poolpage1(mb_map, (vaddr_t)v);
118 #endif
119 }
120
121 /*
122 * When MGET failes, ask protocols to free space when short of memory,
123 * then re-attempt to allocate an mbuf.
124 */
125 struct mbuf *
126 m_retry(i, t)
127 int i, t;
128 {
129 struct mbuf *m;
130
131 m_reclaim(i);
132 #define m_retry(i, t) (struct mbuf *)0
133 MGET(m, i, t);
134 #undef m_retry
135 if (m != NULL)
136 mbstat.m_wait++;
137 else
138 mbstat.m_drops++;
139 return (m);
140 }
141
142 /*
143 * As above; retry an MGETHDR.
144 */
145 struct mbuf *
146 m_retryhdr(i, t)
147 int i, t;
148 {
149 struct mbuf *m;
150
151 m_reclaim(i);
152 #define m_retryhdr(i, t) (struct mbuf *)0
153 MGETHDR(m, i, t);
154 #undef m_retryhdr
155 if (m != NULL)
156 mbstat.m_wait++;
157 else
158 mbstat.m_drops++;
159 return (m);
160 }
161
162 void
163 m_reclaim(how)
164 int how;
165 {
166 struct domain *dp;
167 struct protosw *pr;
168 struct ifnet *ifp;
169 int s = splimp();
170
171 /*
172 * Don't call the protocol drain routines if how == M_NOWAIT, which
173 * typically means we're in interrupt context. Since we can be
174 * called from a network hardware interrupt, we could corrupt the
175 * protocol queues we try to drain them at that time.
176 */
177 if (how == M_WAIT) {
178 for (dp = domains; dp; dp = dp->dom_next)
179 for (pr = dp->dom_protosw;
180 pr < dp->dom_protoswNPROTOSW; pr++)
181 if (pr->pr_drain)
182 (*pr->pr_drain)();
183 }
184 for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
185 if (ifp->if_drain)
186 (*ifp->if_drain)(ifp);
187 splx(s);
188 mbstat.m_drain++;
189 }
190
191 /*
192 * Space allocation routines.
193 * These are also available as macros
194 * for critical paths.
195 */
196 struct mbuf *
197 m_get(nowait, type)
198 int nowait, type;
199 {
200 struct mbuf *m;
201
202 MGET(m, nowait, type);
203 return (m);
204 }
205
206 struct mbuf *
207 m_gethdr(nowait, type)
208 int nowait, type;
209 {
210 struct mbuf *m;
211
212 MGETHDR(m, nowait, type);
213 return (m);
214 }
215
216 struct mbuf *
217 m_getclr(nowait, type)
218 int nowait, type;
219 {
220 struct mbuf *m;
221
222 MGET(m, nowait, type);
223 if (m == 0)
224 return (0);
225 memset(mtod(m, caddr_t), 0, MLEN);
226 return (m);
227 }
228
229 struct mbuf *
230 m_free(m)
231 struct mbuf *m;
232 {
233 struct mbuf *n;
234
235 MFREE(m, n);
236 return (n);
237 }
238
239 void
240 m_freem(m)
241 struct mbuf *m;
242 {
243 struct mbuf *n;
244
245 if (m == NULL)
246 return;
247 do {
248 MFREE(m, n);
249 m = n;
250 } while (m);
251 }
252
253 /*
254 * Mbuffer utility routines.
255 */
256
257 /*
258 * Lesser-used path for M_PREPEND:
259 * allocate new mbuf to prepend to chain,
260 * copy junk along.
261 */
262 struct mbuf *
263 m_prepend(m, len, how)
264 struct mbuf *m;
265 int len, how;
266 {
267 struct mbuf *mn;
268
269 MGET(mn, how, m->m_type);
270 if (mn == (struct mbuf *)NULL) {
271 m_freem(m);
272 return ((struct mbuf *)NULL);
273 }
274 if (m->m_flags & M_PKTHDR) {
275 M_COPY_PKTHDR(mn, m);
276 m->m_flags &= ~M_PKTHDR;
277 }
278 mn->m_next = m;
279 m = mn;
280 if (len < MHLEN)
281 MH_ALIGN(m, len);
282 m->m_len = len;
283 return (m);
284 }
285
286 /*
287 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
288 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
289 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
290 */
291 int MCFail;
292
293 struct mbuf *
294 m_copym(m, off0, len, wait)
295 struct mbuf *m;
296 int off0, wait;
297 int len;
298 {
299 struct mbuf *n, **np;
300 int off = off0;
301 struct mbuf *top;
302 int copyhdr = 0;
303
304 if (off < 0 || len < 0)
305 panic("m_copym");
306 if (off == 0 && m->m_flags & M_PKTHDR)
307 copyhdr = 1;
308 while (off > 0) {
309 if (m == 0)
310 panic("m_copym");
311 if (off < m->m_len)
312 break;
313 off -= m->m_len;
314 m = m->m_next;
315 }
316 np = ⊤
317 top = 0;
318 while (len > 0) {
319 if (m == 0) {
320 if (len != M_COPYALL)
321 panic("m_copym");
322 break;
323 }
324 MGET(n, wait, m->m_type);
325 *np = n;
326 if (n == 0)
327 goto nospace;
328 if (copyhdr) {
329 M_COPY_PKTHDR(n, m);
330 if (len == M_COPYALL)
331 n->m_pkthdr.len -= off0;
332 else
333 n->m_pkthdr.len = len;
334 copyhdr = 0;
335 }
336 n->m_len = min(len, m->m_len - off);
337 if (m->m_flags & M_EXT) {
338 n->m_data = m->m_data + off;
339 n->m_ext = m->m_ext;
340 MCLADDREFERENCE(m, n);
341 } else
342 memcpy(mtod(n, caddr_t), mtod(m, caddr_t)+off,
343 (unsigned)n->m_len);
344 if (len != M_COPYALL)
345 len -= n->m_len;
346 off = 0;
347 m = m->m_next;
348 np = &n->m_next;
349 }
350 if (top == 0)
351 MCFail++;
352 return (top);
353 nospace:
354 m_freem(top);
355 MCFail++;
356 return (0);
357 }
358
359 /*
360 * Copy an entire packet, including header (which must be present).
361 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
362 */
363 struct mbuf *
364 m_copypacket(m, how)
365 struct mbuf *m;
366 int how;
367 {
368 struct mbuf *top, *n, *o;
369
370 MGET(n, how, m->m_type);
371 top = n;
372 if (!n)
373 goto nospace;
374
375 M_COPY_PKTHDR(n, m);
376 n->m_len = m->m_len;
377 if (m->m_flags & M_EXT) {
378 n->m_data = m->m_data;
379 n->m_ext = m->m_ext;
380 MCLADDREFERENCE(m, n);
381 } else {
382 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
383 }
384
385 m = m->m_next;
386 while (m) {
387 MGET(o, how, m->m_type);
388 if (!o)
389 goto nospace;
390
391 n->m_next = o;
392 n = n->m_next;
393
394 n->m_len = m->m_len;
395 if (m->m_flags & M_EXT) {
396 n->m_data = m->m_data;
397 n->m_ext = m->m_ext;
398 MCLADDREFERENCE(m, n);
399 } else {
400 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
401 }
402
403 m = m->m_next;
404 }
405 return top;
406 nospace:
407 m_freem(top);
408 MCFail++;
409 return 0;
410 }
411
412 /*
413 * Copy data from an mbuf chain starting "off" bytes from the beginning,
414 * continuing for "len" bytes, into the indicated buffer.
415 */
416 void
417 m_copydata(m, off, len, cp)
418 struct mbuf *m;
419 int off;
420 int len;
421 caddr_t cp;
422 {
423 unsigned count;
424
425 if (off < 0 || len < 0)
426 panic("m_copydata");
427 while (off > 0) {
428 if (m == 0)
429 panic("m_copydata");
430 if (off < m->m_len)
431 break;
432 off -= m->m_len;
433 m = m->m_next;
434 }
435 while (len > 0) {
436 if (m == 0)
437 panic("m_copydata");
438 count = min(m->m_len - off, len);
439 memcpy(cp, mtod(m, caddr_t) + off, count);
440 len -= count;
441 cp += count;
442 off = 0;
443 m = m->m_next;
444 }
445 }
446
447 /*
448 * Concatenate mbuf chain n to m.
449 * Both chains must be of the same type (e.g. MT_DATA).
450 * Any m_pkthdr is not updated.
451 */
452 void
453 m_cat(m, n)
454 struct mbuf *m, *n;
455 {
456 while (m->m_next)
457 m = m->m_next;
458 while (n) {
459 if (m->m_flags & M_EXT ||
460 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
461 /* just join the two chains */
462 m->m_next = n;
463 return;
464 }
465 /* splat the data from one into the other */
466 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
467 (u_int)n->m_len);
468 m->m_len += n->m_len;
469 n = m_free(n);
470 }
471 }
472
473 void
474 m_adj(mp, req_len)
475 struct mbuf *mp;
476 int req_len;
477 {
478 int len = req_len;
479 struct mbuf *m;
480 int count;
481
482 if ((m = mp) == NULL)
483 return;
484 if (len >= 0) {
485 /*
486 * Trim from head.
487 */
488 while (m != NULL && len > 0) {
489 if (m->m_len <= len) {
490 len -= m->m_len;
491 m->m_len = 0;
492 m = m->m_next;
493 } else {
494 m->m_len -= len;
495 m->m_data += len;
496 len = 0;
497 }
498 }
499 m = mp;
500 if (mp->m_flags & M_PKTHDR)
501 m->m_pkthdr.len -= (req_len - len);
502 } else {
503 /*
504 * Trim from tail. Scan the mbuf chain,
505 * calculating its length and finding the last mbuf.
506 * If the adjustment only affects this mbuf, then just
507 * adjust and return. Otherwise, rescan and truncate
508 * after the remaining size.
509 */
510 len = -len;
511 count = 0;
512 for (;;) {
513 count += m->m_len;
514 if (m->m_next == (struct mbuf *)0)
515 break;
516 m = m->m_next;
517 }
518 if (m->m_len >= len) {
519 m->m_len -= len;
520 if (mp->m_flags & M_PKTHDR)
521 mp->m_pkthdr.len -= len;
522 return;
523 }
524 count -= len;
525 if (count < 0)
526 count = 0;
527 /*
528 * Correct length for chain is "count".
529 * Find the mbuf with last data, adjust its length,
530 * and toss data from remaining mbufs on chain.
531 */
532 m = mp;
533 if (m->m_flags & M_PKTHDR)
534 m->m_pkthdr.len = count;
535 for (; m; m = m->m_next) {
536 if (m->m_len >= count) {
537 m->m_len = count;
538 break;
539 }
540 count -= m->m_len;
541 }
542 while (m->m_next)
543 (m = m->m_next) ->m_len = 0;
544 }
545 }
546
547 /*
548 * Rearange an mbuf chain so that len bytes are contiguous
549 * and in the data area of an mbuf (so that mtod and dtom
550 * will work for a structure of size len). Returns the resulting
551 * mbuf chain on success, frees it and returns null on failure.
552 * If there is room, it will add up to max_protohdr-len extra bytes to the
553 * contiguous region in an attempt to avoid being called next time.
554 */
555 int MPFail;
556
557 struct mbuf *
558 m_pullup(n, len)
559 struct mbuf *n;
560 int len;
561 {
562 struct mbuf *m;
563 int count;
564 int space;
565
566 /*
567 * If first mbuf has no cluster, and has room for len bytes
568 * without shifting current data, pullup into it,
569 * otherwise allocate a new mbuf to prepend to the chain.
570 */
571 if ((n->m_flags & M_EXT) == 0 &&
572 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
573 if (n->m_len >= len)
574 return (n);
575 m = n;
576 n = n->m_next;
577 len -= m->m_len;
578 } else {
579 if (len > MHLEN)
580 goto bad;
581 MGET(m, M_DONTWAIT, n->m_type);
582 if (m == 0)
583 goto bad;
584 m->m_len = 0;
585 if (n->m_flags & M_PKTHDR) {
586 M_COPY_PKTHDR(m, n);
587 n->m_flags &= ~M_PKTHDR;
588 }
589 }
590 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
591 do {
592 count = min(min(max(len, max_protohdr), space), n->m_len);
593 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
594 (unsigned)count);
595 len -= count;
596 m->m_len += count;
597 n->m_len -= count;
598 space -= count;
599 if (n->m_len)
600 n->m_data += count;
601 else
602 n = m_free(n);
603 } while (len > 0 && n);
604 if (len > 0) {
605 (void) m_free(m);
606 goto bad;
607 }
608 m->m_next = n;
609 return (m);
610 bad:
611 m_freem(n);
612 MPFail++;
613 return (0);
614 }
615
616 /*
617 * Partition an mbuf chain in two pieces, returning the tail --
618 * all but the first len0 bytes. In case of failure, it returns NULL and
619 * attempts to restore the chain to its original state.
620 */
621 struct mbuf *
622 m_split(m0, len0, wait)
623 struct mbuf *m0;
624 int len0, wait;
625 {
626 struct mbuf *m, *n;
627 unsigned len = len0, remain, len_save;
628
629 for (m = m0; m && len > m->m_len; m = m->m_next)
630 len -= m->m_len;
631 if (m == 0)
632 return (0);
633 remain = m->m_len - len;
634 if (m0->m_flags & M_PKTHDR) {
635 MGETHDR(n, wait, m0->m_type);
636 if (n == 0)
637 return (0);
638 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
639 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
640 len_save = m0->m_pkthdr.len;
641 m0->m_pkthdr.len = len0;
642 if (m->m_flags & M_EXT)
643 goto extpacket;
644 if (remain > MHLEN) {
645 /* m can't be the lead packet */
646 MH_ALIGN(n, 0);
647 n->m_next = m_split(m, len, wait);
648 if (n->m_next == 0) {
649 (void) m_free(n);
650 m0->m_pkthdr.len = len_save;
651 return (0);
652 } else
653 return (n);
654 } else
655 MH_ALIGN(n, remain);
656 } else if (remain == 0) {
657 n = m->m_next;
658 m->m_next = 0;
659 return (n);
660 } else {
661 MGET(n, wait, m->m_type);
662 if (n == 0)
663 return (0);
664 M_ALIGN(n, remain);
665 }
666 extpacket:
667 if (m->m_flags & M_EXT) {
668 n->m_ext = m->m_ext;
669 MCLADDREFERENCE(m, n);
670 n->m_data = m->m_data + len;
671 } else {
672 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
673 }
674 n->m_len = remain;
675 m->m_len = len;
676 n->m_next = m->m_next;
677 m->m_next = 0;
678 return (n);
679 }
680 /*
681 * Routine to copy from device local memory into mbufs.
682 */
683 struct mbuf *
684 m_devget(buf, totlen, off0, ifp, copy)
685 char *buf;
686 int totlen, off0;
687 struct ifnet *ifp;
688 void (*copy) __P((const void *from, void *to, size_t len));
689 {
690 struct mbuf *m;
691 struct mbuf *top = 0, **mp = ⊤
692 int off = off0, len;
693 char *cp;
694 char *epkt;
695
696 cp = buf;
697 epkt = cp + totlen;
698 if (off) {
699 /*
700 * If 'off' is non-zero, packet is trailer-encapsulated,
701 * so we have to skip the type and length fields.
702 */
703 cp += off + 2 * sizeof(u_int16_t);
704 totlen -= 2 * sizeof(u_int16_t);
705 }
706 MGETHDR(m, M_DONTWAIT, MT_DATA);
707 if (m == 0)
708 return (0);
709 m->m_pkthdr.rcvif = ifp;
710 m->m_pkthdr.len = totlen;
711 m->m_len = MHLEN;
712
713 while (totlen > 0) {
714 if (top) {
715 MGET(m, M_DONTWAIT, MT_DATA);
716 if (m == 0) {
717 m_freem(top);
718 return (0);
719 }
720 m->m_len = MLEN;
721 }
722 len = min(totlen, epkt - cp);
723 if (len >= MINCLSIZE) {
724 MCLGET(m, M_DONTWAIT);
725 if ((m->m_flags & M_EXT) == 0) {
726 m_free(m);
727 m_freem(top);
728 return (0);
729 }
730 m->m_len = len = min(len, MCLBYTES);
731 } else {
732 /*
733 * Place initial small packet/header at end of mbuf.
734 */
735 if (len < m->m_len) {
736 if (top == 0 && len + max_linkhdr <= m->m_len)
737 m->m_data += max_linkhdr;
738 m->m_len = len;
739 } else
740 len = m->m_len;
741 }
742 if (copy)
743 copy(cp, mtod(m, caddr_t), (size_t)len);
744 else
745 memcpy(mtod(m, caddr_t), cp, (size_t)len);
746 cp += len;
747 *mp = m;
748 mp = &m->m_next;
749 totlen -= len;
750 if (cp == epkt)
751 cp = buf;
752 }
753 return (top);
754 }
755
756 /*
757 * Copy data from a buffer back into the indicated mbuf chain,
758 * starting "off" bytes from the beginning, extending the mbuf
759 * chain if necessary.
760 */
761 void
762 m_copyback(m0, off, len, cp)
763 struct mbuf *m0;
764 int off;
765 int len;
766 caddr_t cp;
767 {
768 int mlen;
769 struct mbuf *m = m0, *n;
770 int totlen = 0;
771
772 if (m0 == 0)
773 return;
774 while (off > (mlen = m->m_len)) {
775 off -= mlen;
776 totlen += mlen;
777 if (m->m_next == 0) {
778 n = m_getclr(M_DONTWAIT, m->m_type);
779 if (n == 0)
780 goto out;
781 n->m_len = min(MLEN, len + off);
782 m->m_next = n;
783 }
784 m = m->m_next;
785 }
786 while (len > 0) {
787 mlen = min (m->m_len - off, len);
788 memcpy(mtod(m, caddr_t) + off, cp, (unsigned)mlen);
789 cp += mlen;
790 len -= mlen;
791 mlen += off;
792 off = 0;
793 totlen += mlen;
794 if (len == 0)
795 break;
796 if (m->m_next == 0) {
797 n = m_get(M_DONTWAIT, m->m_type);
798 if (n == 0)
799 break;
800 n->m_len = min(MLEN, len);
801 m->m_next = n;
802 }
803 m = m->m_next;
804 }
805 out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
806 m->m_pkthdr.len = totlen;
807 }
808