uipc_mbuf.c revision 1.37 1 /* $NetBSD: uipc_mbuf.c,v 1.37 1999/03/23 02:51:27 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95
36 */
37
38 #include "opt_uvm.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/malloc.h>
44 #include <sys/map.h>
45 #define MBTYPES
46 #include <sys/mbuf.h>
47 #include <sys/kernel.h>
48 #include <sys/syslog.h>
49 #include <sys/domain.h>
50 #include <sys/protosw.h>
51 #include <sys/pool.h>
52 #include <sys/socket.h>
53 #include <net/if.h>
54
55 #include <vm/vm.h>
56
57 #if defined(UVM)
58 #include <uvm/uvm_extern.h>
59 #endif
60
61 struct pool mbpool; /* mbuf pool */
62 struct pool mclpool; /* mbuf cluster pool */
63
64 struct mbstat mbstat;
65 int max_linkhdr;
66 int max_protohdr;
67 int max_hdr;
68 int max_datalen;
69
70 extern vm_map_t mb_map;
71
72 void *mclpool_alloc __P((unsigned long, int, int));
73 void mclpool_release __P((void *, unsigned long, int));
74
75 /*
76 * Initialize the mbuf allcator. Note, this cannot allocate any
77 * memory itself; we are called before mb_map has been allocated.
78 */
79 void
80 mbinit()
81 {
82
83 /* XXX malloc types! */
84 pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", 0, NULL, NULL, 0);
85 pool_init(&mclpool, MCLBYTES, 0, 0, 0, "mclpl", 0, mclpool_alloc,
86 mclpool_release, 0);
87
88 /*
89 * Set the high water mark on the mclpool to the number of
90 * mbuf clusters the kernel is to support.
91 */
92 pool_sethiwat(&mclpool, NMBCLUSTERS);
93 }
94
95 void *
96 mclpool_alloc(sz, flags, mtype)
97 unsigned long sz;
98 int flags;
99 int mtype;
100 {
101 volatile static struct timeval lastlogged;
102 struct timeval curtime, logdiff;
103 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
104 vaddr_t va;
105 int s;
106
107 #if defined(UVM)
108 va = uvm_km_alloc_poolpage1(mb_map, uvmexp.mb_object, waitok);
109 #else
110 va = kmem_alloc_poolpage1(mb_map, waitok);
111 #endif
112 if (va == 0) {
113 s = splclock();
114 curtime = mono_time;
115 splx(s);
116 timersub(&curtime, &lastlogged, &logdiff);
117 if (logdiff.tv_sec >= 60) {
118 lastlogged = curtime;
119 log(LOG_ERR, "mb_map full\n");
120 }
121 /*
122 * Don't need to reclaim here; MCLGET(), which calls
123 * pool_get(), will reclaim and attempt the allocation
124 * again.
125 */
126 }
127 return ((void *)va);
128 }
129
130 void
131 mclpool_release(v, sz, mtype)
132 void *v;
133 unsigned long sz;
134 int mtype;
135 {
136
137 #if defined(UVM)
138 uvm_km_free_poolpage1(mb_map, (vaddr_t)v);
139 #else
140 kmem_free_poolpage1(mb_map, (vaddr_t)v);
141 #endif
142 }
143
144 /*
145 * When MGET failes, ask protocols to free space when short of memory,
146 * then re-attempt to allocate an mbuf.
147 */
148 struct mbuf *
149 m_retry(i, t)
150 int i, t;
151 {
152 struct mbuf *m;
153
154 m_reclaim(i);
155 #define m_retry(i, t) (struct mbuf *)0
156 MGET(m, i, t);
157 #undef m_retry
158 if (m != NULL)
159 mbstat.m_wait++;
160 else
161 mbstat.m_drops++;
162 return (m);
163 }
164
165 /*
166 * As above; retry an MGETHDR.
167 */
168 struct mbuf *
169 m_retryhdr(i, t)
170 int i, t;
171 {
172 struct mbuf *m;
173
174 m_reclaim(i);
175 #define m_retryhdr(i, t) (struct mbuf *)0
176 MGETHDR(m, i, t);
177 #undef m_retryhdr
178 if (m != NULL)
179 mbstat.m_wait++;
180 else
181 mbstat.m_drops++;
182 return (m);
183 }
184
185 void
186 m_reclaim(how)
187 int how;
188 {
189 struct domain *dp;
190 struct protosw *pr;
191 struct ifnet *ifp;
192 int s = splimp();
193
194 for (dp = domains; dp; dp = dp->dom_next)
195 for (pr = dp->dom_protosw;
196 pr < dp->dom_protoswNPROTOSW; pr++)
197 if (pr->pr_drain)
198 (*pr->pr_drain)();
199 for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
200 if (ifp->if_drain)
201 (*ifp->if_drain)(ifp);
202 splx(s);
203 mbstat.m_drain++;
204 }
205
206 /*
207 * Space allocation routines.
208 * These are also available as macros
209 * for critical paths.
210 */
211 struct mbuf *
212 m_get(nowait, type)
213 int nowait, type;
214 {
215 struct mbuf *m;
216
217 MGET(m, nowait, type);
218 return (m);
219 }
220
221 struct mbuf *
222 m_gethdr(nowait, type)
223 int nowait, type;
224 {
225 struct mbuf *m;
226
227 MGETHDR(m, nowait, type);
228 return (m);
229 }
230
231 struct mbuf *
232 m_getclr(nowait, type)
233 int nowait, type;
234 {
235 struct mbuf *m;
236
237 MGET(m, nowait, type);
238 if (m == 0)
239 return (0);
240 memset(mtod(m, caddr_t), 0, MLEN);
241 return (m);
242 }
243
244 struct mbuf *
245 m_free(m)
246 struct mbuf *m;
247 {
248 struct mbuf *n;
249
250 MFREE(m, n);
251 return (n);
252 }
253
254 void
255 m_freem(m)
256 struct mbuf *m;
257 {
258 struct mbuf *n;
259
260 if (m == NULL)
261 return;
262 do {
263 MFREE(m, n);
264 m = n;
265 } while (m);
266 }
267
268 /*
269 * Mbuffer utility routines.
270 */
271
272 /*
273 * Lesser-used path for M_PREPEND:
274 * allocate new mbuf to prepend to chain,
275 * copy junk along.
276 */
277 struct mbuf *
278 m_prepend(m, len, how)
279 struct mbuf *m;
280 int len, how;
281 {
282 struct mbuf *mn;
283
284 MGET(mn, how, m->m_type);
285 if (mn == (struct mbuf *)NULL) {
286 m_freem(m);
287 return ((struct mbuf *)NULL);
288 }
289 if (m->m_flags & M_PKTHDR) {
290 M_COPY_PKTHDR(mn, m);
291 m->m_flags &= ~M_PKTHDR;
292 }
293 mn->m_next = m;
294 m = mn;
295 if (len < MHLEN)
296 MH_ALIGN(m, len);
297 m->m_len = len;
298 return (m);
299 }
300
301 /*
302 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
303 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
304 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
305 */
306 int MCFail;
307
308 struct mbuf *
309 m_copym(m, off0, len, wait)
310 struct mbuf *m;
311 int off0, wait;
312 int len;
313 {
314 struct mbuf *n, **np;
315 int off = off0;
316 struct mbuf *top;
317 int copyhdr = 0;
318
319 if (off < 0 || len < 0)
320 panic("m_copym");
321 if (off == 0 && m->m_flags & M_PKTHDR)
322 copyhdr = 1;
323 while (off > 0) {
324 if (m == 0)
325 panic("m_copym");
326 if (off < m->m_len)
327 break;
328 off -= m->m_len;
329 m = m->m_next;
330 }
331 np = ⊤
332 top = 0;
333 while (len > 0) {
334 if (m == 0) {
335 if (len != M_COPYALL)
336 panic("m_copym");
337 break;
338 }
339 MGET(n, wait, m->m_type);
340 *np = n;
341 if (n == 0)
342 goto nospace;
343 if (copyhdr) {
344 M_COPY_PKTHDR(n, m);
345 if (len == M_COPYALL)
346 n->m_pkthdr.len -= off0;
347 else
348 n->m_pkthdr.len = len;
349 copyhdr = 0;
350 }
351 n->m_len = min(len, m->m_len - off);
352 if (m->m_flags & M_EXT) {
353 n->m_data = m->m_data + off;
354 n->m_ext = m->m_ext;
355 MCLADDREFERENCE(m, n);
356 } else
357 memcpy(mtod(n, caddr_t), mtod(m, caddr_t)+off,
358 (unsigned)n->m_len);
359 if (len != M_COPYALL)
360 len -= n->m_len;
361 off = 0;
362 m = m->m_next;
363 np = &n->m_next;
364 }
365 if (top == 0)
366 MCFail++;
367 return (top);
368 nospace:
369 m_freem(top);
370 MCFail++;
371 return (0);
372 }
373
374 /*
375 * Copy an entire packet, including header (which must be present).
376 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
377 */
378 struct mbuf *
379 m_copypacket(m, how)
380 struct mbuf *m;
381 int how;
382 {
383 struct mbuf *top, *n, *o;
384
385 MGET(n, how, m->m_type);
386 top = n;
387 if (!n)
388 goto nospace;
389
390 M_COPY_PKTHDR(n, m);
391 n->m_len = m->m_len;
392 if (m->m_flags & M_EXT) {
393 n->m_data = m->m_data;
394 n->m_ext = m->m_ext;
395 MCLADDREFERENCE(m, n);
396 } else {
397 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
398 }
399
400 m = m->m_next;
401 while (m) {
402 MGET(o, how, m->m_type);
403 if (!o)
404 goto nospace;
405
406 n->m_next = o;
407 n = n->m_next;
408
409 n->m_len = m->m_len;
410 if (m->m_flags & M_EXT) {
411 n->m_data = m->m_data;
412 n->m_ext = m->m_ext;
413 MCLADDREFERENCE(m, n);
414 } else {
415 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
416 }
417
418 m = m->m_next;
419 }
420 return top;
421 nospace:
422 m_freem(top);
423 MCFail++;
424 return 0;
425 }
426
427 /*
428 * Copy data from an mbuf chain starting "off" bytes from the beginning,
429 * continuing for "len" bytes, into the indicated buffer.
430 */
431 void
432 m_copydata(m, off, len, cp)
433 struct mbuf *m;
434 int off;
435 int len;
436 caddr_t cp;
437 {
438 unsigned count;
439
440 if (off < 0 || len < 0)
441 panic("m_copydata");
442 while (off > 0) {
443 if (m == 0)
444 panic("m_copydata");
445 if (off < m->m_len)
446 break;
447 off -= m->m_len;
448 m = m->m_next;
449 }
450 while (len > 0) {
451 if (m == 0)
452 panic("m_copydata");
453 count = min(m->m_len - off, len);
454 memcpy(cp, mtod(m, caddr_t) + off, count);
455 len -= count;
456 cp += count;
457 off = 0;
458 m = m->m_next;
459 }
460 }
461
462 /*
463 * Concatenate mbuf chain n to m.
464 * Both chains must be of the same type (e.g. MT_DATA).
465 * Any m_pkthdr is not updated.
466 */
467 void
468 m_cat(m, n)
469 struct mbuf *m, *n;
470 {
471 while (m->m_next)
472 m = m->m_next;
473 while (n) {
474 if (m->m_flags & M_EXT ||
475 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
476 /* just join the two chains */
477 m->m_next = n;
478 return;
479 }
480 /* splat the data from one into the other */
481 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
482 (u_int)n->m_len);
483 m->m_len += n->m_len;
484 n = m_free(n);
485 }
486 }
487
488 void
489 m_adj(mp, req_len)
490 struct mbuf *mp;
491 int req_len;
492 {
493 int len = req_len;
494 struct mbuf *m;
495 int count;
496
497 if ((m = mp) == NULL)
498 return;
499 if (len >= 0) {
500 /*
501 * Trim from head.
502 */
503 while (m != NULL && len > 0) {
504 if (m->m_len <= len) {
505 len -= m->m_len;
506 m->m_len = 0;
507 m = m->m_next;
508 } else {
509 m->m_len -= len;
510 m->m_data += len;
511 len = 0;
512 }
513 }
514 m = mp;
515 if (mp->m_flags & M_PKTHDR)
516 m->m_pkthdr.len -= (req_len - len);
517 } else {
518 /*
519 * Trim from tail. Scan the mbuf chain,
520 * calculating its length and finding the last mbuf.
521 * If the adjustment only affects this mbuf, then just
522 * adjust and return. Otherwise, rescan and truncate
523 * after the remaining size.
524 */
525 len = -len;
526 count = 0;
527 for (;;) {
528 count += m->m_len;
529 if (m->m_next == (struct mbuf *)0)
530 break;
531 m = m->m_next;
532 }
533 if (m->m_len >= len) {
534 m->m_len -= len;
535 if (mp->m_flags & M_PKTHDR)
536 mp->m_pkthdr.len -= len;
537 return;
538 }
539 count -= len;
540 if (count < 0)
541 count = 0;
542 /*
543 * Correct length for chain is "count".
544 * Find the mbuf with last data, adjust its length,
545 * and toss data from remaining mbufs on chain.
546 */
547 m = mp;
548 if (m->m_flags & M_PKTHDR)
549 m->m_pkthdr.len = count;
550 for (; m; m = m->m_next) {
551 if (m->m_len >= count) {
552 m->m_len = count;
553 break;
554 }
555 count -= m->m_len;
556 }
557 while (m->m_next)
558 (m = m->m_next) ->m_len = 0;
559 }
560 }
561
562 /*
563 * Rearange an mbuf chain so that len bytes are contiguous
564 * and in the data area of an mbuf (so that mtod and dtom
565 * will work for a structure of size len). Returns the resulting
566 * mbuf chain on success, frees it and returns null on failure.
567 * If there is room, it will add up to max_protohdr-len extra bytes to the
568 * contiguous region in an attempt to avoid being called next time.
569 */
570 int MPFail;
571
572 struct mbuf *
573 m_pullup(n, len)
574 struct mbuf *n;
575 int len;
576 {
577 struct mbuf *m;
578 int count;
579 int space;
580
581 /*
582 * If first mbuf has no cluster, and has room for len bytes
583 * without shifting current data, pullup into it,
584 * otherwise allocate a new mbuf to prepend to the chain.
585 */
586 if ((n->m_flags & M_EXT) == 0 &&
587 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
588 if (n->m_len >= len)
589 return (n);
590 m = n;
591 n = n->m_next;
592 len -= m->m_len;
593 } else {
594 if (len > MHLEN)
595 goto bad;
596 MGET(m, M_DONTWAIT, n->m_type);
597 if (m == 0)
598 goto bad;
599 m->m_len = 0;
600 if (n->m_flags & M_PKTHDR) {
601 M_COPY_PKTHDR(m, n);
602 n->m_flags &= ~M_PKTHDR;
603 }
604 }
605 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
606 do {
607 count = min(min(max(len, max_protohdr), space), n->m_len);
608 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
609 (unsigned)count);
610 len -= count;
611 m->m_len += count;
612 n->m_len -= count;
613 space -= count;
614 if (n->m_len)
615 n->m_data += count;
616 else
617 n = m_free(n);
618 } while (len > 0 && n);
619 if (len > 0) {
620 (void) m_free(m);
621 goto bad;
622 }
623 m->m_next = n;
624 return (m);
625 bad:
626 m_freem(n);
627 MPFail++;
628 return (0);
629 }
630
631 /*
632 * Partition an mbuf chain in two pieces, returning the tail --
633 * all but the first len0 bytes. In case of failure, it returns NULL and
634 * attempts to restore the chain to its original state.
635 */
636 struct mbuf *
637 m_split(m0, len0, wait)
638 struct mbuf *m0;
639 int len0, wait;
640 {
641 struct mbuf *m, *n;
642 unsigned len = len0, remain, len_save;
643
644 for (m = m0; m && len > m->m_len; m = m->m_next)
645 len -= m->m_len;
646 if (m == 0)
647 return (0);
648 remain = m->m_len - len;
649 if (m0->m_flags & M_PKTHDR) {
650 MGETHDR(n, wait, m0->m_type);
651 if (n == 0)
652 return (0);
653 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
654 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
655 len_save = m0->m_pkthdr.len;
656 m0->m_pkthdr.len = len0;
657 if (m->m_flags & M_EXT)
658 goto extpacket;
659 if (remain > MHLEN) {
660 /* m can't be the lead packet */
661 MH_ALIGN(n, 0);
662 n->m_next = m_split(m, len, wait);
663 if (n->m_next == 0) {
664 (void) m_free(n);
665 m0->m_pkthdr.len = len_save;
666 return (0);
667 } else
668 return (n);
669 } else
670 MH_ALIGN(n, remain);
671 } else if (remain == 0) {
672 n = m->m_next;
673 m->m_next = 0;
674 return (n);
675 } else {
676 MGET(n, wait, m->m_type);
677 if (n == 0)
678 return (0);
679 M_ALIGN(n, remain);
680 }
681 extpacket:
682 if (m->m_flags & M_EXT) {
683 n->m_ext = m->m_ext;
684 MCLADDREFERENCE(m, n);
685 n->m_data = m->m_data + len;
686 } else {
687 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
688 }
689 n->m_len = remain;
690 m->m_len = len;
691 n->m_next = m->m_next;
692 m->m_next = 0;
693 return (n);
694 }
695 /*
696 * Routine to copy from device local memory into mbufs.
697 */
698 struct mbuf *
699 m_devget(buf, totlen, off0, ifp, copy)
700 char *buf;
701 int totlen, off0;
702 struct ifnet *ifp;
703 void (*copy) __P((const void *from, void *to, size_t len));
704 {
705 struct mbuf *m;
706 struct mbuf *top = 0, **mp = ⊤
707 int off = off0, len;
708 char *cp;
709 char *epkt;
710
711 cp = buf;
712 epkt = cp + totlen;
713 if (off) {
714 /*
715 * If 'off' is non-zero, packet is trailer-encapsulated,
716 * so we have to skip the type and length fields.
717 */
718 cp += off + 2 * sizeof(u_int16_t);
719 totlen -= 2 * sizeof(u_int16_t);
720 }
721 MGETHDR(m, M_DONTWAIT, MT_DATA);
722 if (m == 0)
723 return (0);
724 m->m_pkthdr.rcvif = ifp;
725 m->m_pkthdr.len = totlen;
726 m->m_len = MHLEN;
727
728 while (totlen > 0) {
729 if (top) {
730 MGET(m, M_DONTWAIT, MT_DATA);
731 if (m == 0) {
732 m_freem(top);
733 return (0);
734 }
735 m->m_len = MLEN;
736 }
737 len = min(totlen, epkt - cp);
738 if (len >= MINCLSIZE) {
739 MCLGET(m, M_DONTWAIT);
740 if ((m->m_flags & M_EXT) == 0) {
741 m_free(m);
742 m_freem(top);
743 return (0);
744 }
745 m->m_len = len = min(len, MCLBYTES);
746 } else {
747 /*
748 * Place initial small packet/header at end of mbuf.
749 */
750 if (len < m->m_len) {
751 if (top == 0 && len + max_linkhdr <= m->m_len)
752 m->m_data += max_linkhdr;
753 m->m_len = len;
754 } else
755 len = m->m_len;
756 }
757 if (copy)
758 copy(cp, mtod(m, caddr_t), (size_t)len);
759 else
760 memcpy(mtod(m, caddr_t), cp, (size_t)len);
761 cp += len;
762 *mp = m;
763 mp = &m->m_next;
764 totlen -= len;
765 if (cp == epkt)
766 cp = buf;
767 }
768 return (top);
769 }
770
771 /*
772 * Copy data from a buffer back into the indicated mbuf chain,
773 * starting "off" bytes from the beginning, extending the mbuf
774 * chain if necessary.
775 */
776 void
777 m_copyback(m0, off, len, cp)
778 struct mbuf *m0;
779 int off;
780 int len;
781 caddr_t cp;
782 {
783 int mlen;
784 struct mbuf *m = m0, *n;
785 int totlen = 0;
786
787 if (m0 == 0)
788 return;
789 while (off > (mlen = m->m_len)) {
790 off -= mlen;
791 totlen += mlen;
792 if (m->m_next == 0) {
793 n = m_getclr(M_DONTWAIT, m->m_type);
794 if (n == 0)
795 goto out;
796 n->m_len = min(MLEN, len + off);
797 m->m_next = n;
798 }
799 m = m->m_next;
800 }
801 while (len > 0) {
802 mlen = min (m->m_len - off, len);
803 memcpy(mtod(m, caddr_t) + off, cp, (unsigned)mlen);
804 cp += mlen;
805 len -= mlen;
806 mlen += off;
807 off = 0;
808 totlen += mlen;
809 if (len == 0)
810 break;
811 if (m->m_next == 0) {
812 n = m_get(M_DONTWAIT, m->m_type);
813 if (n == 0)
814 break;
815 n->m_len = min(MLEN, len);
816 m->m_next = n;
817 }
818 m = m->m_next;
819 }
820 out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
821 m->m_pkthdr.len = totlen;
822 }
823