uipc_mbuf.c revision 1.39 1 /* $NetBSD: uipc_mbuf.c,v 1.39 1999/03/31 01:26:40 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/malloc.h>
42 #include <sys/map.h>
43 #define MBTYPES
44 #include <sys/mbuf.h>
45 #include <sys/kernel.h>
46 #include <sys/syslog.h>
47 #include <sys/domain.h>
48 #include <sys/protosw.h>
49 #include <sys/pool.h>
50 #include <sys/socket.h>
51 #include <net/if.h>
52
53 #include <vm/vm.h>
54
55 #include <uvm/uvm_extern.h>
56
57 struct pool mbpool; /* mbuf pool */
58 struct pool mclpool; /* mbuf cluster pool */
59
60 struct mbstat mbstat;
61 int max_linkhdr;
62 int max_protohdr;
63 int max_hdr;
64 int max_datalen;
65
66 extern vm_map_t mb_map;
67
68 void *mclpool_alloc __P((unsigned long, int, int));
69 void mclpool_release __P((void *, unsigned long, int));
70
71 /*
72 * Initialize the mbuf allcator. Note, this cannot allocate any
73 * memory itself; we are called before mb_map has been allocated.
74 */
75 void
76 mbinit()
77 {
78
79 /* XXX malloc types! */
80 pool_init(&mbpool, MSIZE, 0, 0, 0, "mbpl", 0, NULL, NULL, 0);
81 pool_init(&mclpool, MCLBYTES, 0, 0, 0, "mclpl", 0, mclpool_alloc,
82 mclpool_release, 0);
83
84 /*
85 * Set the hard limit on the mclpool to the number of
86 * mbuf clusters the kernel is to support. Log the limit
87 * reached message max once a minute.
88 */
89 pool_sethardlimit(&mclpool, NMBCLUSTERS,
90 "WARNING: mclpool limit reached; increase NMBCLUSTERS", 60);
91
92 /*
93 * XXX Consider setting a low-water mark here. That will help
94 * e.g. pagedaemon on diskless systems as it scrambles to clean
95 * pages in memory starvation situations.
96 */
97 }
98
99 void *
100 mclpool_alloc(sz, flags, mtype)
101 unsigned long sz;
102 int flags;
103 int mtype;
104 {
105 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
106
107 return ((void *)uvm_km_alloc_poolpage1(mb_map, uvmexp.mb_object,
108 waitok));
109 }
110
111 void
112 mclpool_release(v, sz, mtype)
113 void *v;
114 unsigned long sz;
115 int mtype;
116 {
117
118 uvm_km_free_poolpage1(mb_map, (vaddr_t)v);
119 }
120
121 /*
122 * When MGET failes, ask protocols to free space when short of memory,
123 * then re-attempt to allocate an mbuf.
124 */
125 struct mbuf *
126 m_retry(i, t)
127 int i, t;
128 {
129 struct mbuf *m;
130
131 m_reclaim(i);
132 #define m_retry(i, t) (struct mbuf *)0
133 MGET(m, i, t);
134 #undef m_retry
135 if (m != NULL)
136 mbstat.m_wait++;
137 else
138 mbstat.m_drops++;
139 return (m);
140 }
141
142 /*
143 * As above; retry an MGETHDR.
144 */
145 struct mbuf *
146 m_retryhdr(i, t)
147 int i, t;
148 {
149 struct mbuf *m;
150
151 m_reclaim(i);
152 #define m_retryhdr(i, t) (struct mbuf *)0
153 MGETHDR(m, i, t);
154 #undef m_retryhdr
155 if (m != NULL)
156 mbstat.m_wait++;
157 else
158 mbstat.m_drops++;
159 return (m);
160 }
161
162 void
163 m_reclaim(how)
164 int how;
165 {
166 struct domain *dp;
167 struct protosw *pr;
168 struct ifnet *ifp;
169 int s = splimp();
170
171 for (dp = domains; dp; dp = dp->dom_next)
172 for (pr = dp->dom_protosw;
173 pr < dp->dom_protoswNPROTOSW; pr++)
174 if (pr->pr_drain)
175 (*pr->pr_drain)();
176 for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
177 if (ifp->if_drain)
178 (*ifp->if_drain)(ifp);
179 splx(s);
180 mbstat.m_drain++;
181 }
182
183 /*
184 * Space allocation routines.
185 * These are also available as macros
186 * for critical paths.
187 */
188 struct mbuf *
189 m_get(nowait, type)
190 int nowait, type;
191 {
192 struct mbuf *m;
193
194 MGET(m, nowait, type);
195 return (m);
196 }
197
198 struct mbuf *
199 m_gethdr(nowait, type)
200 int nowait, type;
201 {
202 struct mbuf *m;
203
204 MGETHDR(m, nowait, type);
205 return (m);
206 }
207
208 struct mbuf *
209 m_getclr(nowait, type)
210 int nowait, type;
211 {
212 struct mbuf *m;
213
214 MGET(m, nowait, type);
215 if (m == 0)
216 return (0);
217 memset(mtod(m, caddr_t), 0, MLEN);
218 return (m);
219 }
220
221 struct mbuf *
222 m_free(m)
223 struct mbuf *m;
224 {
225 struct mbuf *n;
226
227 MFREE(m, n);
228 return (n);
229 }
230
231 void
232 m_freem(m)
233 struct mbuf *m;
234 {
235 struct mbuf *n;
236
237 if (m == NULL)
238 return;
239 do {
240 MFREE(m, n);
241 m = n;
242 } while (m);
243 }
244
245 /*
246 * Mbuffer utility routines.
247 */
248
249 /*
250 * Lesser-used path for M_PREPEND:
251 * allocate new mbuf to prepend to chain,
252 * copy junk along.
253 */
254 struct mbuf *
255 m_prepend(m, len, how)
256 struct mbuf *m;
257 int len, how;
258 {
259 struct mbuf *mn;
260
261 MGET(mn, how, m->m_type);
262 if (mn == (struct mbuf *)NULL) {
263 m_freem(m);
264 return ((struct mbuf *)NULL);
265 }
266 if (m->m_flags & M_PKTHDR) {
267 M_COPY_PKTHDR(mn, m);
268 m->m_flags &= ~M_PKTHDR;
269 }
270 mn->m_next = m;
271 m = mn;
272 if (len < MHLEN)
273 MH_ALIGN(m, len);
274 m->m_len = len;
275 return (m);
276 }
277
278 /*
279 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
280 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
281 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
282 */
283 int MCFail;
284
285 struct mbuf *
286 m_copym(m, off0, len, wait)
287 struct mbuf *m;
288 int off0, wait;
289 int len;
290 {
291 struct mbuf *n, **np;
292 int off = off0;
293 struct mbuf *top;
294 int copyhdr = 0;
295
296 if (off < 0 || len < 0)
297 panic("m_copym");
298 if (off == 0 && m->m_flags & M_PKTHDR)
299 copyhdr = 1;
300 while (off > 0) {
301 if (m == 0)
302 panic("m_copym");
303 if (off < m->m_len)
304 break;
305 off -= m->m_len;
306 m = m->m_next;
307 }
308 np = ⊤
309 top = 0;
310 while (len > 0) {
311 if (m == 0) {
312 if (len != M_COPYALL)
313 panic("m_copym");
314 break;
315 }
316 MGET(n, wait, m->m_type);
317 *np = n;
318 if (n == 0)
319 goto nospace;
320 if (copyhdr) {
321 M_COPY_PKTHDR(n, m);
322 if (len == M_COPYALL)
323 n->m_pkthdr.len -= off0;
324 else
325 n->m_pkthdr.len = len;
326 copyhdr = 0;
327 }
328 n->m_len = min(len, m->m_len - off);
329 if (m->m_flags & M_EXT) {
330 n->m_data = m->m_data + off;
331 n->m_ext = m->m_ext;
332 MCLADDREFERENCE(m, n);
333 } else
334 memcpy(mtod(n, caddr_t), mtod(m, caddr_t)+off,
335 (unsigned)n->m_len);
336 if (len != M_COPYALL)
337 len -= n->m_len;
338 off = 0;
339 m = m->m_next;
340 np = &n->m_next;
341 }
342 if (top == 0)
343 MCFail++;
344 return (top);
345 nospace:
346 m_freem(top);
347 MCFail++;
348 return (0);
349 }
350
351 /*
352 * Copy an entire packet, including header (which must be present).
353 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
354 */
355 struct mbuf *
356 m_copypacket(m, how)
357 struct mbuf *m;
358 int how;
359 {
360 struct mbuf *top, *n, *o;
361
362 MGET(n, how, m->m_type);
363 top = n;
364 if (!n)
365 goto nospace;
366
367 M_COPY_PKTHDR(n, m);
368 n->m_len = m->m_len;
369 if (m->m_flags & M_EXT) {
370 n->m_data = m->m_data;
371 n->m_ext = m->m_ext;
372 MCLADDREFERENCE(m, n);
373 } else {
374 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
375 }
376
377 m = m->m_next;
378 while (m) {
379 MGET(o, how, m->m_type);
380 if (!o)
381 goto nospace;
382
383 n->m_next = o;
384 n = n->m_next;
385
386 n->m_len = m->m_len;
387 if (m->m_flags & M_EXT) {
388 n->m_data = m->m_data;
389 n->m_ext = m->m_ext;
390 MCLADDREFERENCE(m, n);
391 } else {
392 memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
393 }
394
395 m = m->m_next;
396 }
397 return top;
398 nospace:
399 m_freem(top);
400 MCFail++;
401 return 0;
402 }
403
404 /*
405 * Copy data from an mbuf chain starting "off" bytes from the beginning,
406 * continuing for "len" bytes, into the indicated buffer.
407 */
408 void
409 m_copydata(m, off, len, cp)
410 struct mbuf *m;
411 int off;
412 int len;
413 caddr_t cp;
414 {
415 unsigned count;
416
417 if (off < 0 || len < 0)
418 panic("m_copydata");
419 while (off > 0) {
420 if (m == 0)
421 panic("m_copydata");
422 if (off < m->m_len)
423 break;
424 off -= m->m_len;
425 m = m->m_next;
426 }
427 while (len > 0) {
428 if (m == 0)
429 panic("m_copydata");
430 count = min(m->m_len - off, len);
431 memcpy(cp, mtod(m, caddr_t) + off, count);
432 len -= count;
433 cp += count;
434 off = 0;
435 m = m->m_next;
436 }
437 }
438
439 /*
440 * Concatenate mbuf chain n to m.
441 * Both chains must be of the same type (e.g. MT_DATA).
442 * Any m_pkthdr is not updated.
443 */
444 void
445 m_cat(m, n)
446 struct mbuf *m, *n;
447 {
448 while (m->m_next)
449 m = m->m_next;
450 while (n) {
451 if (m->m_flags & M_EXT ||
452 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
453 /* just join the two chains */
454 m->m_next = n;
455 return;
456 }
457 /* splat the data from one into the other */
458 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
459 (u_int)n->m_len);
460 m->m_len += n->m_len;
461 n = m_free(n);
462 }
463 }
464
465 void
466 m_adj(mp, req_len)
467 struct mbuf *mp;
468 int req_len;
469 {
470 int len = req_len;
471 struct mbuf *m;
472 int count;
473
474 if ((m = mp) == NULL)
475 return;
476 if (len >= 0) {
477 /*
478 * Trim from head.
479 */
480 while (m != NULL && len > 0) {
481 if (m->m_len <= len) {
482 len -= m->m_len;
483 m->m_len = 0;
484 m = m->m_next;
485 } else {
486 m->m_len -= len;
487 m->m_data += len;
488 len = 0;
489 }
490 }
491 m = mp;
492 if (mp->m_flags & M_PKTHDR)
493 m->m_pkthdr.len -= (req_len - len);
494 } else {
495 /*
496 * Trim from tail. Scan the mbuf chain,
497 * calculating its length and finding the last mbuf.
498 * If the adjustment only affects this mbuf, then just
499 * adjust and return. Otherwise, rescan and truncate
500 * after the remaining size.
501 */
502 len = -len;
503 count = 0;
504 for (;;) {
505 count += m->m_len;
506 if (m->m_next == (struct mbuf *)0)
507 break;
508 m = m->m_next;
509 }
510 if (m->m_len >= len) {
511 m->m_len -= len;
512 if (mp->m_flags & M_PKTHDR)
513 mp->m_pkthdr.len -= len;
514 return;
515 }
516 count -= len;
517 if (count < 0)
518 count = 0;
519 /*
520 * Correct length for chain is "count".
521 * Find the mbuf with last data, adjust its length,
522 * and toss data from remaining mbufs on chain.
523 */
524 m = mp;
525 if (m->m_flags & M_PKTHDR)
526 m->m_pkthdr.len = count;
527 for (; m; m = m->m_next) {
528 if (m->m_len >= count) {
529 m->m_len = count;
530 break;
531 }
532 count -= m->m_len;
533 }
534 while (m->m_next)
535 (m = m->m_next) ->m_len = 0;
536 }
537 }
538
539 /*
540 * Rearange an mbuf chain so that len bytes are contiguous
541 * and in the data area of an mbuf (so that mtod and dtom
542 * will work for a structure of size len). Returns the resulting
543 * mbuf chain on success, frees it and returns null on failure.
544 * If there is room, it will add up to max_protohdr-len extra bytes to the
545 * contiguous region in an attempt to avoid being called next time.
546 */
547 int MPFail;
548
549 struct mbuf *
550 m_pullup(n, len)
551 struct mbuf *n;
552 int len;
553 {
554 struct mbuf *m;
555 int count;
556 int space;
557
558 /*
559 * If first mbuf has no cluster, and has room for len bytes
560 * without shifting current data, pullup into it,
561 * otherwise allocate a new mbuf to prepend to the chain.
562 */
563 if ((n->m_flags & M_EXT) == 0 &&
564 n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
565 if (n->m_len >= len)
566 return (n);
567 m = n;
568 n = n->m_next;
569 len -= m->m_len;
570 } else {
571 if (len > MHLEN)
572 goto bad;
573 MGET(m, M_DONTWAIT, n->m_type);
574 if (m == 0)
575 goto bad;
576 m->m_len = 0;
577 if (n->m_flags & M_PKTHDR) {
578 M_COPY_PKTHDR(m, n);
579 n->m_flags &= ~M_PKTHDR;
580 }
581 }
582 space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
583 do {
584 count = min(min(max(len, max_protohdr), space), n->m_len);
585 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
586 (unsigned)count);
587 len -= count;
588 m->m_len += count;
589 n->m_len -= count;
590 space -= count;
591 if (n->m_len)
592 n->m_data += count;
593 else
594 n = m_free(n);
595 } while (len > 0 && n);
596 if (len > 0) {
597 (void) m_free(m);
598 goto bad;
599 }
600 m->m_next = n;
601 return (m);
602 bad:
603 m_freem(n);
604 MPFail++;
605 return (0);
606 }
607
608 /*
609 * Partition an mbuf chain in two pieces, returning the tail --
610 * all but the first len0 bytes. In case of failure, it returns NULL and
611 * attempts to restore the chain to its original state.
612 */
613 struct mbuf *
614 m_split(m0, len0, wait)
615 struct mbuf *m0;
616 int len0, wait;
617 {
618 struct mbuf *m, *n;
619 unsigned len = len0, remain, len_save;
620
621 for (m = m0; m && len > m->m_len; m = m->m_next)
622 len -= m->m_len;
623 if (m == 0)
624 return (0);
625 remain = m->m_len - len;
626 if (m0->m_flags & M_PKTHDR) {
627 MGETHDR(n, wait, m0->m_type);
628 if (n == 0)
629 return (0);
630 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
631 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
632 len_save = m0->m_pkthdr.len;
633 m0->m_pkthdr.len = len0;
634 if (m->m_flags & M_EXT)
635 goto extpacket;
636 if (remain > MHLEN) {
637 /* m can't be the lead packet */
638 MH_ALIGN(n, 0);
639 n->m_next = m_split(m, len, wait);
640 if (n->m_next == 0) {
641 (void) m_free(n);
642 m0->m_pkthdr.len = len_save;
643 return (0);
644 } else
645 return (n);
646 } else
647 MH_ALIGN(n, remain);
648 } else if (remain == 0) {
649 n = m->m_next;
650 m->m_next = 0;
651 return (n);
652 } else {
653 MGET(n, wait, m->m_type);
654 if (n == 0)
655 return (0);
656 M_ALIGN(n, remain);
657 }
658 extpacket:
659 if (m->m_flags & M_EXT) {
660 n->m_ext = m->m_ext;
661 MCLADDREFERENCE(m, n);
662 n->m_data = m->m_data + len;
663 } else {
664 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
665 }
666 n->m_len = remain;
667 m->m_len = len;
668 n->m_next = m->m_next;
669 m->m_next = 0;
670 return (n);
671 }
672 /*
673 * Routine to copy from device local memory into mbufs.
674 */
675 struct mbuf *
676 m_devget(buf, totlen, off0, ifp, copy)
677 char *buf;
678 int totlen, off0;
679 struct ifnet *ifp;
680 void (*copy) __P((const void *from, void *to, size_t len));
681 {
682 struct mbuf *m;
683 struct mbuf *top = 0, **mp = ⊤
684 int off = off0, len;
685 char *cp;
686 char *epkt;
687
688 cp = buf;
689 epkt = cp + totlen;
690 if (off) {
691 /*
692 * If 'off' is non-zero, packet is trailer-encapsulated,
693 * so we have to skip the type and length fields.
694 */
695 cp += off + 2 * sizeof(u_int16_t);
696 totlen -= 2 * sizeof(u_int16_t);
697 }
698 MGETHDR(m, M_DONTWAIT, MT_DATA);
699 if (m == 0)
700 return (0);
701 m->m_pkthdr.rcvif = ifp;
702 m->m_pkthdr.len = totlen;
703 m->m_len = MHLEN;
704
705 while (totlen > 0) {
706 if (top) {
707 MGET(m, M_DONTWAIT, MT_DATA);
708 if (m == 0) {
709 m_freem(top);
710 return (0);
711 }
712 m->m_len = MLEN;
713 }
714 len = min(totlen, epkt - cp);
715 if (len >= MINCLSIZE) {
716 MCLGET(m, M_DONTWAIT);
717 if ((m->m_flags & M_EXT) == 0) {
718 m_free(m);
719 m_freem(top);
720 return (0);
721 }
722 m->m_len = len = min(len, MCLBYTES);
723 } else {
724 /*
725 * Place initial small packet/header at end of mbuf.
726 */
727 if (len < m->m_len) {
728 if (top == 0 && len + max_linkhdr <= m->m_len)
729 m->m_data += max_linkhdr;
730 m->m_len = len;
731 } else
732 len = m->m_len;
733 }
734 if (copy)
735 copy(cp, mtod(m, caddr_t), (size_t)len);
736 else
737 memcpy(mtod(m, caddr_t), cp, (size_t)len);
738 cp += len;
739 *mp = m;
740 mp = &m->m_next;
741 totlen -= len;
742 if (cp == epkt)
743 cp = buf;
744 }
745 return (top);
746 }
747
748 /*
749 * Copy data from a buffer back into the indicated mbuf chain,
750 * starting "off" bytes from the beginning, extending the mbuf
751 * chain if necessary.
752 */
753 void
754 m_copyback(m0, off, len, cp)
755 struct mbuf *m0;
756 int off;
757 int len;
758 caddr_t cp;
759 {
760 int mlen;
761 struct mbuf *m = m0, *n;
762 int totlen = 0;
763
764 if (m0 == 0)
765 return;
766 while (off > (mlen = m->m_len)) {
767 off -= mlen;
768 totlen += mlen;
769 if (m->m_next == 0) {
770 n = m_getclr(M_DONTWAIT, m->m_type);
771 if (n == 0)
772 goto out;
773 n->m_len = min(MLEN, len + off);
774 m->m_next = n;
775 }
776 m = m->m_next;
777 }
778 while (len > 0) {
779 mlen = min (m->m_len - off, len);
780 memcpy(mtod(m, caddr_t) + off, cp, (unsigned)mlen);
781 cp += mlen;
782 len -= mlen;
783 mlen += off;
784 off = 0;
785 totlen += mlen;
786 if (len == 0)
787 break;
788 if (m->m_next == 0) {
789 n = m_get(M_DONTWAIT, m->m_type);
790 if (n == 0)
791 break;
792 n->m_len = min(MLEN, len);
793 m->m_next = n;
794 }
795 m = m->m_next;
796 }
797 out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
798 m->m_pkthdr.len = totlen;
799 }
800