uipc_mbuf.c revision 1.36 1 /* $NetBSD: uipc_mbuf.c,v 1.36 1999/03/22 22:06:58 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95
36 */
37
38 #include "opt_uvm.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/malloc.h>
44 #include <sys/map.h>
45 #define MBTYPES
46 #include <sys/mbuf.h>
47 #include <sys/kernel.h>
48 #include <sys/syslog.h>
49 #include <sys/domain.h>
50 #include <sys/protosw.h>
51 #include <sys/pool.h>
52 #include <sys/socket.h>
53 #include <net/if.h>
54
55 #include <vm/vm.h>
56
57 #if defined(UVM)
58 #include <uvm/uvm_extern.h>
59 #endif
60
61 struct pool mbpool; /* mbuf pool */
62 struct pool mclpool; /* mbuf cluster pool */
63
64 struct mbstat mbstat;
65 int max_linkhdr;
66 int max_protohdr;
67 int max_hdr;
68 int max_datalen;
69
70 extern vm_map_t mb_map;
71
72 void *mclpool_alloc __P((unsigned long, int, int));
73 void mclpool_release __P((void *, unsigned long, int));
74
75 /*
76 * Initialize the mbuf allcator. Note, this cannot allocate any
77 * memory itself; we are called before mb_map has been allocated.
78 */
79 void
80 mbinit()
81 {
82
83 /* XXX malloc types! */
84 pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", 0, NULL, NULL, 0);
85 pool_init(&mclpool, MCLBYTES, 0, 0, 0, "mclpl", 0, mclpool_alloc,
86 mclpool_release, 0);
87 }
88
89 void *
90 mclpool_alloc(sz, flags, mtype)
91 unsigned long sz;
92 int flags;
93 int mtype;
94 {
95 volatile static struct timeval lastlogged;
96 struct timeval curtime, logdiff;
97 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
98 vaddr_t va;
99 int s;
100
101 #if defined(UVM)
102 va = uvm_km_alloc_poolpage1(mb_map, uvmexp.mb_object, waitok);
103 #else
104 va = kmem_alloc_poolpage1(mb_map, waitok);
105 #endif
106 if (va == 0) {
107 s = splclock();
108 curtime = mono_time;
109 splx(s);
110 timersub(&curtime, &lastlogged, &logdiff);
111 if (logdiff.tv_sec >= 60) {
112 lastlogged = curtime;
113 log(LOG_ERR, "mb_map full\n");
114 }
115 /*
116 * Don't need to reclaim here; MCLGET(), which calls
117 * pool_get(), will reclaim and attempt the allocation
118 * again.
119 */
120 }
121 return ((void *)va);
122 }
123
124 void
125 mclpool_release(v, sz, mtype)
126 void *v;
127 unsigned long sz;
128 int mtype;
129 {
130
131 #if defined(UVM)
132 uvm_km_free_poolpage1(mb_map, (vaddr_t)v);
133 #else
134 kmem_free_poolpage1(mb_map, (vaddr_t)v);
135 #endif
136 }
137
138 /*
139 * When MGET failes, ask protocols to free space when short of memory,
140 * then re-attempt to allocate an mbuf.
141 */
142 struct mbuf *
143 m_retry(i, t)
144 int i, t;
145 {
146 struct mbuf *m;
147
148 m_reclaim(i);
149 #define m_retry(i, t) (struct mbuf *)0
150 MGET(m, i, t);
151 #undef m_retry
152 if (m != NULL)
153 mbstat.m_wait++;
154 else
155 mbstat.m_drops++;
156 return (m);
157 }
158
159 /*
160 * As above; retry an MGETHDR.
161 */
162 struct mbuf *
163 m_retryhdr(i, t)
164 int i, t;
165 {
166 struct mbuf *m;
167
168 m_reclaim(i);
169 #define m_retryhdr(i, t) (struct mbuf *)0
170 MGETHDR(m, i, t);
171 #undef m_retryhdr
172 if (m != NULL)
173 mbstat.m_wait++;
174 else
175 mbstat.m_drops++;
176 return (m);
177 }
178
179 void
180 m_reclaim(how)
181 int how;
182 {
183 struct domain *dp;
184 struct protosw *pr;
185 struct ifnet *ifp;
186 int s = splimp();
187
188 for (dp = domains; dp; dp = dp->dom_next)
189 for (pr = dp->dom_protosw;
190 pr < dp->dom_protoswNPROTOSW; pr++)
191 if (pr->pr_drain)
192 (*pr->pr_drain)();
193 for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
194 if (ifp->if_drain)
195 (*ifp->if_drain)(ifp);
196 splx(s);
197 mbstat.m_drain++;
198 }
199
200 /*
201 * Space allocation routines.
202 * These are also available as macros
203 * for critical paths.
204 */
205 struct mbuf *
206 m_get(nowait, type)
207 int nowait, type;
208 {
209 struct mbuf *m;
210
211 MGET(m, nowait, type);
212 return (m);
213 }
214
215 struct mbuf *
216 m_gethdr(nowait, type)
217 int nowait, type;
218 {
219 struct mbuf *m;
220
221 MGETHDR(m, nowait, type);
222 return (m);
223 }
224
225 struct mbuf *
226 m_getclr(nowait, type)
227 int nowait, type;
228 {
229 struct mbuf *m;
230
231 MGET(m, nowait, type);
232 if (m == 0)
233 return (0);
234 memset(mtod(m, caddr_t), 0, MLEN);
235 return (m);
236 }
237
238 struct mbuf *
239 m_free(m)
240 struct mbuf *m;
241 {
242 struct mbuf *n;
243
244 MFREE(m, n);
245 return (n);
246 }
247
248 void
249 m_freem(m)
250 struct mbuf *m;
251 {
252 struct mbuf *n;
253
254 if (m == NULL)
255 return;
256 do {
257 MFREE(m, n);
258 m = n;
259 } while (m);
260 }
261
262 /*
263 * Mbuffer utility routines.
264 */
265
266 /*
267 * Lesser-used path for M_PREPEND:
268 * allocate new mbuf to prepend to chain,
269 * copy junk along.
270 */
271 struct mbuf *
272 m_prepend(m, len, how)
273 struct mbuf *m;
274 int len, how;
275 {
276 struct mbuf *mn;
277
278 MGET(mn, how, m->m_type);
279 if (mn == (struct mbuf *)NULL) {
280 m_freem(m);
281 return ((struct mbuf *)NULL);
282 }
283 if (m->m_flags & M_PKTHDR) {
284 M_COPY_PKTHDR(mn, m);
285 m->m_flags &= ~M_PKTHDR;
286 }
287 mn->m_next = m;
288 m = mn;
289 if (len < MHLEN)
290 MH_ALIGN(m, len);
291 m->m_len = len;
292 return (m);
293 }
294
295 /*
296 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
297 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
298 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
299 */
300 int MCFail;
301
302 struct mbuf *
303 m_copym(m, off0, len, wait)
304 struct mbuf *m;
305 int off0, wait;
306 int len;
307 {
308 struct mbuf *n, **np;
309 int off = off0;
310 struct mbuf *top;
311 int copyhdr = 0;
312
313 if (off < 0 || len < 0)
314 panic("m_copym");
315 if (off == 0 && m->m_flags & M_PKTHDR)
316 copyhdr = 1;
317 while (off > 0) {
318 if (m == 0)
319 panic("m_copym");
320 if (off < m->m_len)
321 break;
322 off -= m->m_len;
323 m = m->m_next;
324 }
325 np = ⊤
326 top = 0;
327 while (len > 0) {
328 if (m == 0) {
329 if (len != M_COPYALL)
330 panic("m_copym");
331 break;
332 }
333 MGET(n, wait, m->m_type);
334 *np = n;
335 if (n == 0)
336 goto nospace;
337 if (copyhdr) {
338 M_COPY_PKTHDR(n, m);
339 if (len == M_COPYALL)
340 n->m_pkthdr.len -= off0;
341 else
342 n->m_pkthdr.len = len;
343 copyhdr = 0;
344 }
345 n->m_len = min(len, m->m_len - off);
346 if (m->m_flags & M_EXT) {
347 n->m_data = m->m_data + off;
348 n->m_ext = m->m_ext;
349 MCLADDREFERENCE(m, n);
350 } else
351 memcpy(mtod(n, caddr_t), mtod(m, caddr_t)+off,
352 (unsigned)n->m_len);
353 if (len != M_COPYALL)
354 len -= n->m_len;
355 off = 0;
356 m = m->m_next;
357 np = &n->m_next;
358 }
359 if (top == 0)
360 MCFail++;
361 return (top);
362 nospace:
363 m_freem(top);
364 MCFail++;
365 return (0);
366 }
367
368 /*
369 * Copy an entire packet, including header (which must be present).
370 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
371 */
372 struct mbuf *
373 m_copypacket(m, how)
374 struct mbuf *m;
375 int how;
376 {
377 struct mbuf *top, *n, *o;
378
379 MGET(n, how, m->m_type);
380 top = n;
381 if (!n)
382 goto nospace;
383
384 M_COPY_PKTHDR(n, m);
385 n->m_len = m->m_len;
386 if (m->m_flags & M_EXT) {
387 n->m_data = m->m_data;
388 n->m_ext = m->m_ext;
389 MCLADDREFERENCE(m, n);
390 } else {
391 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
392 }
393
394 m = m->m_next;
395 while (m) {
396 MGET(o, how, m->m_type);
397 if (!o)
398 goto nospace;
399
400 n->m_next = o;
401 n = n->m_next;
402
403 n->m_len = m->m_len;
404 if (m->m_flags & M_EXT) {
405 n->m_data = m->m_data;
406 n->m_ext = m->m_ext;
407 MCLADDREFERENCE(m, n);
408 } else {
409 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
410 }
411
412 m = m->m_next;
413 }
414 return top;
415 nospace:
416 m_freem(top);
417 MCFail++;
418 return 0;
419 }
420
421 /*
422 * Copy data from an mbuf chain starting "off" bytes from the beginning,
423 * continuing for "len" bytes, into the indicated buffer.
424 */
425 void
426 m_copydata(m, off, len, cp)
427 struct mbuf *m;
428 int off;
429 int len;
430 caddr_t cp;
431 {
432 unsigned count;
433
434 if (off < 0 || len < 0)
435 panic("m_copydata");
436 while (off > 0) {
437 if (m == 0)
438 panic("m_copydata");
439 if (off < m->m_len)
440 break;
441 off -= m->m_len;
442 m = m->m_next;
443 }
444 while (len > 0) {
445 if (m == 0)
446 panic("m_copydata");
447 count = min(m->m_len - off, len);
448 memcpy(cp, mtod(m, caddr_t) + off, count);
449 len -= count;
450 cp += count;
451 off = 0;
452 m = m->m_next;
453 }
454 }
455
456 /*
457 * Concatenate mbuf chain n to m.
458 * Both chains must be of the same type (e.g. MT_DATA).
459 * Any m_pkthdr is not updated.
460 */
461 void
462 m_cat(m, n)
463 struct mbuf *m, *n;
464 {
465 while (m->m_next)
466 m = m->m_next;
467 while (n) {
468 if (m->m_flags & M_EXT ||
469 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
470 /* just join the two chains */
471 m->m_next = n;
472 return;
473 }
474 /* splat the data from one into the other */
475 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
476 (u_int)n->m_len);
477 m->m_len += n->m_len;
478 n = m_free(n);
479 }
480 }
481
482 void
483 m_adj(mp, req_len)
484 struct mbuf *mp;
485 int req_len;
486 {
487 int len = req_len;
488 struct mbuf *m;
489 int count;
490
491 if ((m = mp) == NULL)
492 return;
493 if (len >= 0) {
494 /*
495 * Trim from head.
496 */
497 while (m != NULL && len > 0) {
498 if (m->m_len <= len) {
499 len -= m->m_len;
500 m->m_len = 0;
501 m = m->m_next;
502 } else {
503 m->m_len -= len;
504 m->m_data += len;
505 len = 0;
506 }
507 }
508 m = mp;
509 if (mp->m_flags & M_PKTHDR)
510 m->m_pkthdr.len -= (req_len - len);
511 } else {
512 /*
513 * Trim from tail. Scan the mbuf chain,
514 * calculating its length and finding the last mbuf.
515 * If the adjustment only affects this mbuf, then just
516 * adjust and return. Otherwise, rescan and truncate
517 * after the remaining size.
518 */
519 len = -len;
520 count = 0;
521 for (;;) {
522 count += m->m_len;
523 if (m->m_next == (struct mbuf *)0)
524 break;
525 m = m->m_next;
526 }
527 if (m->m_len >= len) {
528 m->m_len -= len;
529 if (mp->m_flags & M_PKTHDR)
530 mp->m_pkthdr.len -= len;
531 return;
532 }
533 count -= len;
534 if (count < 0)
535 count = 0;
536 /*
537 * Correct length for chain is "count".
538 * Find the mbuf with last data, adjust its length,
539 * and toss data from remaining mbufs on chain.
540 */
541 m = mp;
542 if (m->m_flags & M_PKTHDR)
543 m->m_pkthdr.len = count;
544 for (; m; m = m->m_next) {
545 if (m->m_len >= count) {
546 m->m_len = count;
547 break;
548 }
549 count -= m->m_len;
550 }
551 while (m->m_next)
552 (m = m->m_next) ->m_len = 0;
553 }
554 }
555
556 /*
557 * Rearange an mbuf chain so that len bytes are contiguous
558 * and in the data area of an mbuf (so that mtod and dtom
559 * will work for a structure of size len). Returns the resulting
560 * mbuf chain on success, frees it and returns null on failure.
561 * If there is room, it will add up to max_protohdr-len extra bytes to the
562 * contiguous region in an attempt to avoid being called next time.
563 */
564 int MPFail;
565
566 struct mbuf *
567 m_pullup(n, len)
568 struct mbuf *n;
569 int len;
570 {
571 struct mbuf *m;
572 int count;
573 int space;
574
575 /*
576 * If first mbuf has no cluster, and has room for len bytes
577 * without shifting current data, pullup into it,
578 * otherwise allocate a new mbuf to prepend to the chain.
579 */
580 if ((n->m_flags & M_EXT) == 0 &&
581 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
582 if (n->m_len >= len)
583 return (n);
584 m = n;
585 n = n->m_next;
586 len -= m->m_len;
587 } else {
588 if (len > MHLEN)
589 goto bad;
590 MGET(m, M_DONTWAIT, n->m_type);
591 if (m == 0)
592 goto bad;
593 m->m_len = 0;
594 if (n->m_flags & M_PKTHDR) {
595 M_COPY_PKTHDR(m, n);
596 n->m_flags &= ~M_PKTHDR;
597 }
598 }
599 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
600 do {
601 count = min(min(max(len, max_protohdr), space), n->m_len);
602 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
603 (unsigned)count);
604 len -= count;
605 m->m_len += count;
606 n->m_len -= count;
607 space -= count;
608 if (n->m_len)
609 n->m_data += count;
610 else
611 n = m_free(n);
612 } while (len > 0 && n);
613 if (len > 0) {
614 (void) m_free(m);
615 goto bad;
616 }
617 m->m_next = n;
618 return (m);
619 bad:
620 m_freem(n);
621 MPFail++;
622 return (0);
623 }
624
625 /*
626 * Partition an mbuf chain in two pieces, returning the tail --
627 * all but the first len0 bytes. In case of failure, it returns NULL and
628 * attempts to restore the chain to its original state.
629 */
630 struct mbuf *
631 m_split(m0, len0, wait)
632 struct mbuf *m0;
633 int len0, wait;
634 {
635 struct mbuf *m, *n;
636 unsigned len = len0, remain, len_save;
637
638 for (m = m0; m && len > m->m_len; m = m->m_next)
639 len -= m->m_len;
640 if (m == 0)
641 return (0);
642 remain = m->m_len - len;
643 if (m0->m_flags & M_PKTHDR) {
644 MGETHDR(n, wait, m0->m_type);
645 if (n == 0)
646 return (0);
647 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
648 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
649 len_save = m0->m_pkthdr.len;
650 m0->m_pkthdr.len = len0;
651 if (m->m_flags & M_EXT)
652 goto extpacket;
653 if (remain > MHLEN) {
654 /* m can't be the lead packet */
655 MH_ALIGN(n, 0);
656 n->m_next = m_split(m, len, wait);
657 if (n->m_next == 0) {
658 (void) m_free(n);
659 m0->m_pkthdr.len = len_save;
660 return (0);
661 } else
662 return (n);
663 } else
664 MH_ALIGN(n, remain);
665 } else if (remain == 0) {
666 n = m->m_next;
667 m->m_next = 0;
668 return (n);
669 } else {
670 MGET(n, wait, m->m_type);
671 if (n == 0)
672 return (0);
673 M_ALIGN(n, remain);
674 }
675 extpacket:
676 if (m->m_flags & M_EXT) {
677 n->m_ext = m->m_ext;
678 MCLADDREFERENCE(m, n);
679 n->m_data = m->m_data + len;
680 } else {
681 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
682 }
683 n->m_len = remain;
684 m->m_len = len;
685 n->m_next = m->m_next;
686 m->m_next = 0;
687 return (n);
688 }
689 /*
690 * Routine to copy from device local memory into mbufs.
691 */
692 struct mbuf *
693 m_devget(buf, totlen, off0, ifp, copy)
694 char *buf;
695 int totlen, off0;
696 struct ifnet *ifp;
697 void (*copy) __P((const void *from, void *to, size_t len));
698 {
699 struct mbuf *m;
700 struct mbuf *top = 0, **mp = ⊤
701 int off = off0, len;
702 char *cp;
703 char *epkt;
704
705 cp = buf;
706 epkt = cp + totlen;
707 if (off) {
708 /*
709 * If 'off' is non-zero, packet is trailer-encapsulated,
710 * so we have to skip the type and length fields.
711 */
712 cp += off + 2 * sizeof(u_int16_t);
713 totlen -= 2 * sizeof(u_int16_t);
714 }
715 MGETHDR(m, M_DONTWAIT, MT_DATA);
716 if (m == 0)
717 return (0);
718 m->m_pkthdr.rcvif = ifp;
719 m->m_pkthdr.len = totlen;
720 m->m_len = MHLEN;
721
722 while (totlen > 0) {
723 if (top) {
724 MGET(m, M_DONTWAIT, MT_DATA);
725 if (m == 0) {
726 m_freem(top);
727 return (0);
728 }
729 m->m_len = MLEN;
730 }
731 len = min(totlen, epkt - cp);
732 if (len >= MINCLSIZE) {
733 MCLGET(m, M_DONTWAIT);
734 if ((m->m_flags & M_EXT) == 0) {
735 m_free(m);
736 m_freem(top);
737 return (0);
738 }
739 m->m_len = len = min(len, MCLBYTES);
740 } else {
741 /*
742 * Place initial small packet/header at end of mbuf.
743 */
744 if (len < m->m_len) {
745 if (top == 0 && len + max_linkhdr <= m->m_len)
746 m->m_data += max_linkhdr;
747 m->m_len = len;
748 } else
749 len = m->m_len;
750 }
751 if (copy)
752 copy(cp, mtod(m, caddr_t), (size_t)len);
753 else
754 memcpy(mtod(m, caddr_t), cp, (size_t)len);
755 cp += len;
756 *mp = m;
757 mp = &m->m_next;
758 totlen -= len;
759 if (cp == epkt)
760 cp = buf;
761 }
762 return (top);
763 }
764
765 /*
766 * Copy data from a buffer back into the indicated mbuf chain,
767 * starting "off" bytes from the beginning, extending the mbuf
768 * chain if necessary.
769 */
770 void
771 m_copyback(m0, off, len, cp)
772 struct mbuf *m0;
773 int off;
774 int len;
775 caddr_t cp;
776 {
777 int mlen;
778 struct mbuf *m = m0, *n;
779 int totlen = 0;
780
781 if (m0 == 0)
782 return;
783 while (off > (mlen = m->m_len)) {
784 off -= mlen;
785 totlen += mlen;
786 if (m->m_next == 0) {
787 n = m_getclr(M_DONTWAIT, m->m_type);
788 if (n == 0)
789 goto out;
790 n->m_len = min(MLEN, len + off);
791 m->m_next = n;
792 }
793 m = m->m_next;
794 }
795 while (len > 0) {
796 mlen = min (m->m_len - off, len);
797 memcpy(mtod(m, caddr_t) + off, cp, (unsigned)mlen);
798 cp += mlen;
799 len -= mlen;
800 mlen += off;
801 off = 0;
802 totlen += mlen;
803 if (len == 0)
804 break;
805 if (m->m_next == 0) {
806 n = m_get(M_DONTWAIT, m->m_type);
807 if (n == 0)
808 break;
809 n->m_len = min(MLEN, len);
810 m->m_next = n;
811 }
812 m = m->m_next;
813 }
814 out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
815 m->m_pkthdr.len = totlen;
816 }
817