bpf.c revision 1.204 1 /* $NetBSD: bpf.c,v 1.204 2017/01/23 10:17:36 ozaki-r Exp $ */
2
3 /*
4 * Copyright (c) 1990, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from the Stanford/CMU enet packet filter,
8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
10 * Berkeley Laboratory.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95
37 * static char rcsid[] =
38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.204 2017/01/23 10:17:36 ozaki-r Exp $");
43
44 #if defined(_KERNEL_OPT)
45 #include "opt_bpf.h"
46 #include "sl.h"
47 #include "strip.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/mbuf.h>
53 #include <sys/buf.h>
54 #include <sys/time.h>
55 #include <sys/proc.h>
56 #include <sys/ioctl.h>
57 #include <sys/conf.h>
58 #include <sys/vnode.h>
59 #include <sys/queue.h>
60 #include <sys/stat.h>
61 #include <sys/module.h>
62 #include <sys/atomic.h>
63
64 #include <sys/file.h>
65 #include <sys/filedesc.h>
66 #include <sys/tty.h>
67 #include <sys/uio.h>
68
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/errno.h>
72 #include <sys/kernel.h>
73 #include <sys/poll.h>
74 #include <sys/sysctl.h>
75 #include <sys/kauth.h>
76
77 #include <net/if.h>
78 #include <net/slip.h>
79
80 #include <net/bpf.h>
81 #include <net/bpfdesc.h>
82 #include <net/bpfjit.h>
83
84 #include <net/if_arc.h>
85 #include <net/if_ether.h>
86
87 #include <netinet/in.h>
88 #include <netinet/if_inarp.h>
89
90
91 #include <compat/sys/sockio.h>
92
93 #ifndef BPF_BUFSIZE
94 /*
95 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
96 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
97 */
98 # define BPF_BUFSIZE 32768
99 #endif
100
101 #define PRINET 26 /* interruptible */
102
103 /*
104 * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able.
105 * XXX the default values should be computed dynamically based
106 * on available memory size and available mbuf clusters.
107 */
108 int bpf_bufsize = BPF_BUFSIZE;
109 int bpf_maxbufsize = BPF_DFLTBUFSIZE; /* XXX set dynamically, see above */
110 bool bpf_jit = false;
111
112 struct bpfjit_ops bpfjit_module_ops = {
113 .bj_generate_code = NULL,
114 .bj_free_code = NULL
115 };
116
117 /*
118 * Global BPF statistics returned by net.bpf.stats sysctl.
119 */
120 struct bpf_stat bpf_gstats;
121
122 /*
123 * Use a mutex to avoid a race condition between gathering the stats/peers
124 * and opening/closing the device.
125 */
126 static kmutex_t bpf_mtx;
127
128 /*
129 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
130 * bpf_dtab holds the descriptors, indexed by minor device #
131 */
132 struct bpf_if *bpf_iflist;
133 LIST_HEAD(, bpf_d) bpf_list;
134
135 static int bpf_allocbufs(struct bpf_d *);
136 static void bpf_deliver(struct bpf_if *,
137 void *(*cpfn)(void *, const void *, size_t),
138 void *, u_int, u_int, const bool);
139 static void bpf_freed(struct bpf_d *);
140 static void bpf_ifname(struct ifnet *, struct ifreq *);
141 static void *bpf_mcpy(void *, const void *, size_t);
142 static int bpf_movein(struct uio *, int, uint64_t,
143 struct mbuf **, struct sockaddr *);
144 static void bpf_attachd(struct bpf_d *, struct bpf_if *);
145 static void bpf_detachd(struct bpf_d *);
146 static int bpf_setif(struct bpf_d *, struct ifreq *);
147 static int bpf_setf(struct bpf_d *, struct bpf_program *);
148 static void bpf_timed_out(void *);
149 static inline void
150 bpf_wakeup(struct bpf_d *);
151 static int bpf_hdrlen(struct bpf_d *);
152 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
153 void *(*)(void *, const void *, size_t), struct timespec *);
154 static void reset_d(struct bpf_d *);
155 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
156 static int bpf_setdlt(struct bpf_d *, u_int);
157
158 static int bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t,
159 int);
160 static int bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t,
161 int);
162 static int bpf_ioctl(struct file *, u_long, void *);
163 static int bpf_poll(struct file *, int);
164 static int bpf_stat(struct file *, struct stat *);
165 static int bpf_close(struct file *);
166 static int bpf_kqfilter(struct file *, struct knote *);
167 static void bpf_softintr(void *);
168
169 static const struct fileops bpf_fileops = {
170 .fo_read = bpf_read,
171 .fo_write = bpf_write,
172 .fo_ioctl = bpf_ioctl,
173 .fo_fcntl = fnullop_fcntl,
174 .fo_poll = bpf_poll,
175 .fo_stat = bpf_stat,
176 .fo_close = bpf_close,
177 .fo_kqfilter = bpf_kqfilter,
178 .fo_restart = fnullop_restart,
179 };
180
181 dev_type_open(bpfopen);
182
183 const struct cdevsw bpf_cdevsw = {
184 .d_open = bpfopen,
185 .d_close = noclose,
186 .d_read = noread,
187 .d_write = nowrite,
188 .d_ioctl = noioctl,
189 .d_stop = nostop,
190 .d_tty = notty,
191 .d_poll = nopoll,
192 .d_mmap = nommap,
193 .d_kqfilter = nokqfilter,
194 .d_discard = nodiscard,
195 .d_flag = D_OTHER
196 };
197
198 bpfjit_func_t
199 bpf_jit_generate(bpf_ctx_t *bc, void *code, size_t size)
200 {
201
202 membar_consumer();
203 if (bpfjit_module_ops.bj_generate_code != NULL) {
204 return bpfjit_module_ops.bj_generate_code(bc, code, size);
205 }
206 return NULL;
207 }
208
209 void
210 bpf_jit_freecode(bpfjit_func_t jcode)
211 {
212 KASSERT(bpfjit_module_ops.bj_free_code != NULL);
213 bpfjit_module_ops.bj_free_code(jcode);
214 }
215
216 static int
217 bpf_movein(struct uio *uio, int linktype, uint64_t mtu, struct mbuf **mp,
218 struct sockaddr *sockp)
219 {
220 struct mbuf *m;
221 int error;
222 size_t len;
223 size_t hlen;
224 size_t align;
225
226 /*
227 * Build a sockaddr based on the data link layer type.
228 * We do this at this level because the ethernet header
229 * is copied directly into the data field of the sockaddr.
230 * In the case of SLIP, there is no header and the packet
231 * is forwarded as is.
232 * Also, we are careful to leave room at the front of the mbuf
233 * for the link level header.
234 */
235 switch (linktype) {
236
237 case DLT_SLIP:
238 sockp->sa_family = AF_INET;
239 hlen = 0;
240 align = 0;
241 break;
242
243 case DLT_PPP:
244 sockp->sa_family = AF_UNSPEC;
245 hlen = 0;
246 align = 0;
247 break;
248
249 case DLT_EN10MB:
250 sockp->sa_family = AF_UNSPEC;
251 /* XXX Would MAXLINKHDR be better? */
252 /* 6(dst)+6(src)+2(type) */
253 hlen = sizeof(struct ether_header);
254 align = 2;
255 break;
256
257 case DLT_ARCNET:
258 sockp->sa_family = AF_UNSPEC;
259 hlen = ARC_HDRLEN;
260 align = 5;
261 break;
262
263 case DLT_FDDI:
264 sockp->sa_family = AF_LINK;
265 /* XXX 4(FORMAC)+6(dst)+6(src) */
266 hlen = 16;
267 align = 0;
268 break;
269
270 case DLT_ECONET:
271 sockp->sa_family = AF_UNSPEC;
272 hlen = 6;
273 align = 2;
274 break;
275
276 case DLT_NULL:
277 sockp->sa_family = AF_UNSPEC;
278 hlen = 0;
279 align = 0;
280 break;
281
282 default:
283 return (EIO);
284 }
285
286 len = uio->uio_resid;
287 /*
288 * If there aren't enough bytes for a link level header or the
289 * packet length exceeds the interface mtu, return an error.
290 */
291 if (len - hlen > mtu)
292 return (EMSGSIZE);
293
294 /*
295 * XXX Avoid complicated buffer chaining ---
296 * bail if it won't fit in a single mbuf.
297 * (Take into account possible alignment bytes)
298 */
299 if (len + align > MCLBYTES)
300 return (EIO);
301
302 m = m_gethdr(M_WAIT, MT_DATA);
303 m_reset_rcvif(m);
304 m->m_pkthdr.len = (int)(len - hlen);
305 if (len + align > MHLEN) {
306 m_clget(m, M_WAIT);
307 if ((m->m_flags & M_EXT) == 0) {
308 error = ENOBUFS;
309 goto bad;
310 }
311 }
312
313 /* Insure the data is properly aligned */
314 if (align > 0) {
315 m->m_data += align;
316 m->m_len -= (int)align;
317 }
318
319 error = uiomove(mtod(m, void *), len, uio);
320 if (error)
321 goto bad;
322 if (hlen != 0) {
323 memcpy(sockp->sa_data, mtod(m, void *), hlen);
324 m->m_data += hlen; /* XXX */
325 len -= hlen;
326 }
327 m->m_len = (int)len;
328 *mp = m;
329 return (0);
330
331 bad:
332 m_freem(m);
333 return (error);
334 }
335
336 /*
337 * Attach file to the bpf interface, i.e. make d listen on bp.
338 * Must be called at splnet.
339 */
340 static void
341 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
342 {
343 KASSERT(mutex_owned(&bpf_mtx));
344 /*
345 * Point d at bp, and add d to the interface's list of listeners.
346 * Finally, point the driver's bpf cookie at the interface so
347 * it will divert packets to bpf.
348 */
349 d->bd_bif = bp;
350 d->bd_next = bp->bif_dlist;
351 bp->bif_dlist = d;
352
353 *bp->bif_driverp = bp;
354 }
355
356 /*
357 * Detach a file from its interface.
358 */
359 static void
360 bpf_detachd(struct bpf_d *d)
361 {
362 struct bpf_d **p;
363 struct bpf_if *bp;
364
365 KASSERT(mutex_owned(&bpf_mtx));
366
367 bp = d->bd_bif;
368 /*
369 * Check if this descriptor had requested promiscuous mode.
370 * If so, turn it off.
371 */
372 if (d->bd_promisc) {
373 int error __diagused;
374
375 d->bd_promisc = 0;
376 /*
377 * Take device out of promiscuous mode. Since we were
378 * able to enter promiscuous mode, we should be able
379 * to turn it off. But we can get an error if
380 * the interface was configured down, so only panic
381 * if we don't get an unexpected error.
382 */
383 error = ifpromisc(bp->bif_ifp, 0);
384 #ifdef DIAGNOSTIC
385 if (error)
386 printf("%s: ifpromisc failed: %d", __func__, error);
387 #endif
388 }
389 /* Remove d from the interface's descriptor list. */
390 p = &bp->bif_dlist;
391 while (*p != d) {
392 p = &(*p)->bd_next;
393 if (*p == NULL)
394 panic("%s: descriptor not in list", __func__);
395 }
396 *p = (*p)->bd_next;
397 if (bp->bif_dlist == NULL)
398 /*
399 * Let the driver know that there are no more listeners.
400 */
401 *d->bd_bif->bif_driverp = NULL;
402 d->bd_bif = NULL;
403 }
404
405 static void
406 bpf_init(void)
407 {
408
409 mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE);
410
411 LIST_INIT(&bpf_list);
412
413 bpf_gstats.bs_recv = 0;
414 bpf_gstats.bs_drop = 0;
415 bpf_gstats.bs_capt = 0;
416
417 return;
418 }
419
420 /*
421 * bpfilterattach() is called at boot time. We don't need to do anything
422 * here, since any initialization will happen as part of module init code.
423 */
424 /* ARGSUSED */
425 void
426 bpfilterattach(int n)
427 {
428
429 }
430
431 /*
432 * Open ethernet device. Clones.
433 */
434 /* ARGSUSED */
435 int
436 bpfopen(dev_t dev, int flag, int mode, struct lwp *l)
437 {
438 struct bpf_d *d;
439 struct file *fp;
440 int error, fd;
441
442 /* falloc() will fill in the descriptor for us. */
443 if ((error = fd_allocfile(&fp, &fd)) != 0)
444 return error;
445
446 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK|M_ZERO);
447 d->bd_bufsize = bpf_bufsize;
448 d->bd_seesent = 1;
449 d->bd_feedback = 0;
450 d->bd_pid = l->l_proc->p_pid;
451 #ifdef _LP64
452 if (curproc->p_flag & PK_32)
453 d->bd_compat32 = 1;
454 #endif
455 getnanotime(&d->bd_btime);
456 d->bd_atime = d->bd_mtime = d->bd_btime;
457 callout_init(&d->bd_callout, 0);
458 selinit(&d->bd_sel);
459 d->bd_sih = softint_establish(SOFTINT_CLOCK, bpf_softintr, d);
460 d->bd_jitcode = NULL;
461
462 mutex_enter(&bpf_mtx);
463 LIST_INSERT_HEAD(&bpf_list, d, bd_list);
464 mutex_exit(&bpf_mtx);
465
466 return fd_clone(fp, fd, flag, &bpf_fileops, d);
467 }
468
469 /*
470 * Close the descriptor by detaching it from its interface,
471 * deallocating its buffers, and marking it free.
472 */
473 /* ARGSUSED */
474 static int
475 bpf_close(struct file *fp)
476 {
477 struct bpf_d *d;
478 int s;
479
480 KERNEL_LOCK(1, NULL);
481 mutex_enter(&bpf_mtx);
482
483 if ((d = fp->f_bpf) == NULL) {
484 mutex_exit(&bpf_mtx);
485 KERNEL_UNLOCK_ONE(NULL);
486 return 0;
487 }
488
489 /*
490 * Refresh the PID associated with this bpf file.
491 */
492 d->bd_pid = curproc->p_pid;
493
494 s = splnet();
495 if (d->bd_state == BPF_WAITING)
496 callout_stop(&d->bd_callout);
497 d->bd_state = BPF_IDLE;
498 if (d->bd_bif)
499 bpf_detachd(d);
500 splx(s);
501 bpf_freed(d);
502 LIST_REMOVE(d, bd_list);
503 fp->f_bpf = NULL;
504
505 mutex_exit(&bpf_mtx);
506 KERNEL_UNLOCK_ONE(NULL);
507
508 callout_destroy(&d->bd_callout);
509 seldestroy(&d->bd_sel);
510 softint_disestablish(d->bd_sih);
511 free(d, M_DEVBUF);
512
513 return (0);
514 }
515
516 /*
517 * Rotate the packet buffers in descriptor d. Move the store buffer
518 * into the hold slot, and the free buffer into the store slot.
519 * Zero the length of the new store buffer.
520 */
521 #define ROTATE_BUFFERS(d) \
522 (d)->bd_hbuf = (d)->bd_sbuf; \
523 (d)->bd_hlen = (d)->bd_slen; \
524 (d)->bd_sbuf = (d)->bd_fbuf; \
525 (d)->bd_slen = 0; \
526 (d)->bd_fbuf = NULL;
527 /*
528 * bpfread - read next chunk of packets from buffers
529 */
530 static int
531 bpf_read(struct file *fp, off_t *offp, struct uio *uio,
532 kauth_cred_t cred, int flags)
533 {
534 struct bpf_d *d = fp->f_bpf;
535 int timed_out;
536 int error;
537 int s;
538
539 getnanotime(&d->bd_atime);
540 /*
541 * Restrict application to use a buffer the same size as
542 * the kernel buffers.
543 */
544 if (uio->uio_resid != d->bd_bufsize)
545 return (EINVAL);
546
547 KERNEL_LOCK(1, NULL);
548 s = splnet();
549 if (d->bd_state == BPF_WAITING)
550 callout_stop(&d->bd_callout);
551 timed_out = (d->bd_state == BPF_TIMED_OUT);
552 d->bd_state = BPF_IDLE;
553 /*
554 * If the hold buffer is empty, then do a timed sleep, which
555 * ends when the timeout expires or when enough packets
556 * have arrived to fill the store buffer.
557 */
558 while (d->bd_hbuf == NULL) {
559 if (fp->f_flag & FNONBLOCK) {
560 if (d->bd_slen == 0) {
561 splx(s);
562 KERNEL_UNLOCK_ONE(NULL);
563 return (EWOULDBLOCK);
564 }
565 ROTATE_BUFFERS(d);
566 break;
567 }
568
569 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
570 /*
571 * A packet(s) either arrived since the previous
572 * read or arrived while we were asleep.
573 * Rotate the buffers and return what's here.
574 */
575 ROTATE_BUFFERS(d);
576 break;
577 }
578 error = tsleep(d, PRINET|PCATCH, "bpf",
579 d->bd_rtout);
580 if (error == EINTR || error == ERESTART) {
581 splx(s);
582 KERNEL_UNLOCK_ONE(NULL);
583 return (error);
584 }
585 if (error == EWOULDBLOCK) {
586 /*
587 * On a timeout, return what's in the buffer,
588 * which may be nothing. If there is something
589 * in the store buffer, we can rotate the buffers.
590 */
591 if (d->bd_hbuf)
592 /*
593 * We filled up the buffer in between
594 * getting the timeout and arriving
595 * here, so we don't need to rotate.
596 */
597 break;
598
599 if (d->bd_slen == 0) {
600 splx(s);
601 KERNEL_UNLOCK_ONE(NULL);
602 return (0);
603 }
604 ROTATE_BUFFERS(d);
605 break;
606 }
607 if (error != 0)
608 goto done;
609 }
610 /*
611 * At this point, we know we have something in the hold slot.
612 */
613 splx(s);
614
615 /*
616 * Move data from hold buffer into user space.
617 * We know the entire buffer is transferred since
618 * we checked above that the read buffer is bpf_bufsize bytes.
619 */
620 error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
621
622 s = splnet();
623 d->bd_fbuf = d->bd_hbuf;
624 d->bd_hbuf = NULL;
625 d->bd_hlen = 0;
626 done:
627 splx(s);
628 KERNEL_UNLOCK_ONE(NULL);
629 return (error);
630 }
631
632
633 /*
634 * If there are processes sleeping on this descriptor, wake them up.
635 */
636 static inline void
637 bpf_wakeup(struct bpf_d *d)
638 {
639 wakeup(d);
640 if (d->bd_async)
641 softint_schedule(d->bd_sih);
642 selnotify(&d->bd_sel, 0, 0);
643 }
644
645 static void
646 bpf_softintr(void *cookie)
647 {
648 struct bpf_d *d;
649
650 d = cookie;
651 if (d->bd_async)
652 fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL);
653 }
654
655 static void
656 bpf_timed_out(void *arg)
657 {
658 struct bpf_d *d = arg;
659 int s;
660
661 s = splnet();
662 if (d->bd_state == BPF_WAITING) {
663 d->bd_state = BPF_TIMED_OUT;
664 if (d->bd_slen != 0)
665 bpf_wakeup(d);
666 }
667 splx(s);
668 }
669
670
671 static int
672 bpf_write(struct file *fp, off_t *offp, struct uio *uio,
673 kauth_cred_t cred, int flags)
674 {
675 struct bpf_d *d = fp->f_bpf;
676 struct ifnet *ifp;
677 struct mbuf *m, *mc;
678 int error, s;
679 static struct sockaddr_storage dst;
680
681 m = NULL; /* XXX gcc */
682
683 KERNEL_LOCK(1, NULL);
684
685 if (d->bd_bif == NULL) {
686 KERNEL_UNLOCK_ONE(NULL);
687 return (ENXIO);
688 }
689 getnanotime(&d->bd_mtime);
690
691 ifp = d->bd_bif->bif_ifp;
692
693 if (uio->uio_resid == 0) {
694 KERNEL_UNLOCK_ONE(NULL);
695 return (0);
696 }
697
698 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m,
699 (struct sockaddr *) &dst);
700 if (error) {
701 KERNEL_UNLOCK_ONE(NULL);
702 return (error);
703 }
704
705 if (m->m_pkthdr.len > ifp->if_mtu) {
706 KERNEL_UNLOCK_ONE(NULL);
707 m_freem(m);
708 return (EMSGSIZE);
709 }
710
711 if (d->bd_hdrcmplt)
712 dst.ss_family = pseudo_AF_HDRCMPLT;
713
714 if (d->bd_feedback) {
715 mc = m_dup(m, 0, M_COPYALL, M_NOWAIT);
716 if (mc != NULL)
717 m_set_rcvif(mc, ifp);
718 /* Set M_PROMISC for outgoing packets to be discarded. */
719 if (1 /*d->bd_direction == BPF_D_INOUT*/)
720 m->m_flags |= M_PROMISC;
721 } else
722 mc = NULL;
723
724 s = splsoftnet();
725 error = if_output_lock(ifp, ifp, m, (struct sockaddr *) &dst, NULL);
726
727 if (mc != NULL) {
728 if (error == 0)
729 ifp->_if_input(ifp, mc);
730 else
731 m_freem(mc);
732 }
733 splx(s);
734 KERNEL_UNLOCK_ONE(NULL);
735 /*
736 * The driver frees the mbuf.
737 */
738 return (error);
739 }
740
741 /*
742 * Reset a descriptor by flushing its packet buffer and clearing the
743 * receive and drop counts. Should be called at splnet.
744 */
745 static void
746 reset_d(struct bpf_d *d)
747 {
748 if (d->bd_hbuf) {
749 /* Free the hold buffer. */
750 d->bd_fbuf = d->bd_hbuf;
751 d->bd_hbuf = NULL;
752 }
753 d->bd_slen = 0;
754 d->bd_hlen = 0;
755 d->bd_rcount = 0;
756 d->bd_dcount = 0;
757 d->bd_ccount = 0;
758 }
759
760 /*
761 * FIONREAD Check for read packet available.
762 * BIOCGBLEN Get buffer len [for read()].
763 * BIOCSETF Set ethernet read filter.
764 * BIOCFLUSH Flush read packet buffer.
765 * BIOCPROMISC Put interface into promiscuous mode.
766 * BIOCGDLT Get link layer type.
767 * BIOCGETIF Get interface name.
768 * BIOCSETIF Set interface.
769 * BIOCSRTIMEOUT Set read timeout.
770 * BIOCGRTIMEOUT Get read timeout.
771 * BIOCGSTATS Get packet stats.
772 * BIOCIMMEDIATE Set immediate mode.
773 * BIOCVERSION Get filter language version.
774 * BIOCGHDRCMPLT Get "header already complete" flag.
775 * BIOCSHDRCMPLT Set "header already complete" flag.
776 * BIOCSFEEDBACK Set packet feedback mode.
777 * BIOCGFEEDBACK Get packet feedback mode.
778 * BIOCGSEESENT Get "see sent packets" mode.
779 * BIOCSSEESENT Set "see sent packets" mode.
780 */
781 /* ARGSUSED */
782 static int
783 bpf_ioctl(struct file *fp, u_long cmd, void *addr)
784 {
785 struct bpf_d *d = fp->f_bpf;
786 int s, error = 0;
787
788 /*
789 * Refresh the PID associated with this bpf file.
790 */
791 KERNEL_LOCK(1, NULL);
792 d->bd_pid = curproc->p_pid;
793 #ifdef _LP64
794 if (curproc->p_flag & PK_32)
795 d->bd_compat32 = 1;
796 else
797 d->bd_compat32 = 0;
798 #endif
799
800 s = splnet();
801 if (d->bd_state == BPF_WAITING)
802 callout_stop(&d->bd_callout);
803 d->bd_state = BPF_IDLE;
804 splx(s);
805
806 switch (cmd) {
807
808 default:
809 error = EINVAL;
810 break;
811
812 /*
813 * Check for read packet available.
814 */
815 case FIONREAD:
816 {
817 int n;
818
819 s = splnet();
820 n = d->bd_slen;
821 if (d->bd_hbuf)
822 n += d->bd_hlen;
823 splx(s);
824
825 *(int *)addr = n;
826 break;
827 }
828
829 /*
830 * Get buffer len [for read()].
831 */
832 case BIOCGBLEN:
833 *(u_int *)addr = d->bd_bufsize;
834 break;
835
836 /*
837 * Set buffer length.
838 */
839 case BIOCSBLEN:
840 if (d->bd_bif != NULL)
841 error = EINVAL;
842 else {
843 u_int size = *(u_int *)addr;
844
845 if (size > bpf_maxbufsize)
846 *(u_int *)addr = size = bpf_maxbufsize;
847 else if (size < BPF_MINBUFSIZE)
848 *(u_int *)addr = size = BPF_MINBUFSIZE;
849 d->bd_bufsize = size;
850 }
851 break;
852
853 /*
854 * Set link layer read filter.
855 */
856 case BIOCSETF:
857 error = bpf_setf(d, addr);
858 break;
859
860 /*
861 * Flush read packet buffer.
862 */
863 case BIOCFLUSH:
864 s = splnet();
865 reset_d(d);
866 splx(s);
867 break;
868
869 /*
870 * Put interface into promiscuous mode.
871 */
872 case BIOCPROMISC:
873 if (d->bd_bif == NULL) {
874 /*
875 * No interface attached yet.
876 */
877 error = EINVAL;
878 break;
879 }
880 s = splnet();
881 if (d->bd_promisc == 0) {
882 error = ifpromisc(d->bd_bif->bif_ifp, 1);
883 if (error == 0)
884 d->bd_promisc = 1;
885 }
886 splx(s);
887 break;
888
889 /*
890 * Get device parameters.
891 */
892 case BIOCGDLT:
893 if (d->bd_bif == NULL)
894 error = EINVAL;
895 else
896 *(u_int *)addr = d->bd_bif->bif_dlt;
897 break;
898
899 /*
900 * Get a list of supported device parameters.
901 */
902 case BIOCGDLTLIST:
903 if (d->bd_bif == NULL)
904 error = EINVAL;
905 else
906 error = bpf_getdltlist(d, addr);
907 break;
908
909 /*
910 * Set device parameters.
911 */
912 case BIOCSDLT:
913 mutex_enter(&bpf_mtx);
914 if (d->bd_bif == NULL)
915 error = EINVAL;
916 else
917 error = bpf_setdlt(d, *(u_int *)addr);
918 mutex_exit(&bpf_mtx);
919 break;
920
921 /*
922 * Set interface name.
923 */
924 #ifdef OBIOCGETIF
925 case OBIOCGETIF:
926 #endif
927 case BIOCGETIF:
928 if (d->bd_bif == NULL)
929 error = EINVAL;
930 else
931 bpf_ifname(d->bd_bif->bif_ifp, addr);
932 break;
933
934 /*
935 * Set interface.
936 */
937 #ifdef OBIOCSETIF
938 case OBIOCSETIF:
939 #endif
940 case BIOCSETIF:
941 mutex_enter(&bpf_mtx);
942 error = bpf_setif(d, addr);
943 mutex_exit(&bpf_mtx);
944 break;
945
946 /*
947 * Set read timeout.
948 */
949 case BIOCSRTIMEOUT:
950 {
951 struct timeval *tv = addr;
952
953 /* Compute number of ticks. */
954 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
955 if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
956 d->bd_rtout = 1;
957 break;
958 }
959
960 #ifdef BIOCGORTIMEOUT
961 /*
962 * Get read timeout.
963 */
964 case BIOCGORTIMEOUT:
965 {
966 struct timeval50 *tv = addr;
967
968 tv->tv_sec = d->bd_rtout / hz;
969 tv->tv_usec = (d->bd_rtout % hz) * tick;
970 break;
971 }
972 #endif
973
974 #ifdef BIOCSORTIMEOUT
975 /*
976 * Set read timeout.
977 */
978 case BIOCSORTIMEOUT:
979 {
980 struct timeval50 *tv = addr;
981
982 /* Compute number of ticks. */
983 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
984 if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
985 d->bd_rtout = 1;
986 break;
987 }
988 #endif
989
990 /*
991 * Get read timeout.
992 */
993 case BIOCGRTIMEOUT:
994 {
995 struct timeval *tv = addr;
996
997 tv->tv_sec = d->bd_rtout / hz;
998 tv->tv_usec = (d->bd_rtout % hz) * tick;
999 break;
1000 }
1001 /*
1002 * Get packet stats.
1003 */
1004 case BIOCGSTATS:
1005 {
1006 struct bpf_stat *bs = addr;
1007
1008 bs->bs_recv = d->bd_rcount;
1009 bs->bs_drop = d->bd_dcount;
1010 bs->bs_capt = d->bd_ccount;
1011 break;
1012 }
1013
1014 case BIOCGSTATSOLD:
1015 {
1016 struct bpf_stat_old *bs = addr;
1017
1018 bs->bs_recv = d->bd_rcount;
1019 bs->bs_drop = d->bd_dcount;
1020 break;
1021 }
1022
1023 /*
1024 * Set immediate mode.
1025 */
1026 case BIOCIMMEDIATE:
1027 d->bd_immediate = *(u_int *)addr;
1028 break;
1029
1030 case BIOCVERSION:
1031 {
1032 struct bpf_version *bv = addr;
1033
1034 bv->bv_major = BPF_MAJOR_VERSION;
1035 bv->bv_minor = BPF_MINOR_VERSION;
1036 break;
1037 }
1038
1039 case BIOCGHDRCMPLT: /* get "header already complete" flag */
1040 *(u_int *)addr = d->bd_hdrcmplt;
1041 break;
1042
1043 case BIOCSHDRCMPLT: /* set "header already complete" flag */
1044 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1045 break;
1046
1047 /*
1048 * Get "see sent packets" flag
1049 */
1050 case BIOCGSEESENT:
1051 *(u_int *)addr = d->bd_seesent;
1052 break;
1053
1054 /*
1055 * Set "see sent" packets flag
1056 */
1057 case BIOCSSEESENT:
1058 d->bd_seesent = *(u_int *)addr;
1059 break;
1060
1061 /*
1062 * Set "feed packets from bpf back to input" mode
1063 */
1064 case BIOCSFEEDBACK:
1065 d->bd_feedback = *(u_int *)addr;
1066 break;
1067
1068 /*
1069 * Get "feed packets from bpf back to input" mode
1070 */
1071 case BIOCGFEEDBACK:
1072 *(u_int *)addr = d->bd_feedback;
1073 break;
1074
1075 case FIONBIO: /* Non-blocking I/O */
1076 /*
1077 * No need to do anything special as we use IO_NDELAY in
1078 * bpfread() as an indication of whether or not to block
1079 * the read.
1080 */
1081 break;
1082
1083 case FIOASYNC: /* Send signal on receive packets */
1084 d->bd_async = *(int *)addr;
1085 break;
1086
1087 case TIOCSPGRP: /* Process or group to send signals to */
1088 case FIOSETOWN:
1089 error = fsetown(&d->bd_pgid, cmd, addr);
1090 break;
1091
1092 case TIOCGPGRP:
1093 case FIOGETOWN:
1094 error = fgetown(d->bd_pgid, cmd, addr);
1095 break;
1096 }
1097 KERNEL_UNLOCK_ONE(NULL);
1098 return (error);
1099 }
1100
1101 /*
1102 * Set d's packet filter program to fp. If this file already has a filter,
1103 * free it and replace it. Returns EINVAL for bogus requests.
1104 */
1105 static int
1106 bpf_setf(struct bpf_d *d, struct bpf_program *fp)
1107 {
1108 struct bpf_insn *fcode, *old;
1109 bpfjit_func_t jcode, oldj;
1110 size_t flen, size;
1111 int s;
1112
1113 jcode = NULL;
1114 flen = fp->bf_len;
1115
1116 if ((fp->bf_insns == NULL && flen) || flen > BPF_MAXINSNS) {
1117 return EINVAL;
1118 }
1119
1120 if (flen) {
1121 /*
1122 * Allocate the buffer, copy the byte-code from
1123 * userspace and validate it.
1124 */
1125 size = flen * sizeof(*fp->bf_insns);
1126 fcode = malloc(size, M_DEVBUF, M_WAITOK);
1127 if (copyin(fp->bf_insns, fcode, size) != 0 ||
1128 !bpf_validate(fcode, (int)flen)) {
1129 free(fcode, M_DEVBUF);
1130 return EINVAL;
1131 }
1132 membar_consumer();
1133 if (bpf_jit)
1134 jcode = bpf_jit_generate(NULL, fcode, flen);
1135 } else {
1136 fcode = NULL;
1137 }
1138
1139 s = splnet();
1140 old = d->bd_filter;
1141 d->bd_filter = fcode;
1142 oldj = d->bd_jitcode;
1143 d->bd_jitcode = jcode;
1144 reset_d(d);
1145 splx(s);
1146
1147 if (old) {
1148 free(old, M_DEVBUF);
1149 }
1150 if (oldj) {
1151 bpf_jit_freecode(oldj);
1152 }
1153
1154 return 0;
1155 }
1156
1157 /*
1158 * Detach a file from its current interface (if attached at all) and attach
1159 * to the interface indicated by the name stored in ifr.
1160 * Return an errno or 0.
1161 */
1162 static int
1163 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1164 {
1165 struct bpf_if *bp;
1166 char *cp;
1167 int unit_seen, i, s, error;
1168
1169 KASSERT(mutex_owned(&bpf_mtx));
1170 /*
1171 * Make sure the provided name has a unit number, and default
1172 * it to '0' if not specified.
1173 * XXX This is ugly ... do this differently?
1174 */
1175 unit_seen = 0;
1176 cp = ifr->ifr_name;
1177 cp[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */
1178 while (*cp++)
1179 if (*cp >= '0' && *cp <= '9')
1180 unit_seen = 1;
1181 if (!unit_seen) {
1182 /* Make sure to leave room for the '\0'. */
1183 for (i = 0; i < (IFNAMSIZ - 1); ++i) {
1184 if ((ifr->ifr_name[i] >= 'a' &&
1185 ifr->ifr_name[i] <= 'z') ||
1186 (ifr->ifr_name[i] >= 'A' &&
1187 ifr->ifr_name[i] <= 'Z'))
1188 continue;
1189 ifr->ifr_name[i] = '0';
1190 }
1191 }
1192
1193 /*
1194 * Look through attached interfaces for the named one.
1195 */
1196 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1197 struct ifnet *ifp = bp->bif_ifp;
1198
1199 if (ifp == NULL ||
1200 strcmp(ifp->if_xname, ifr->ifr_name) != 0)
1201 continue;
1202 /* skip additional entry */
1203 if (bp->bif_driverp != &ifp->if_bpf)
1204 continue;
1205 /*
1206 * We found the requested interface.
1207 * Allocate the packet buffers if we need to.
1208 * If we're already attached to requested interface,
1209 * just flush the buffer.
1210 */
1211 if (d->bd_sbuf == NULL) {
1212 error = bpf_allocbufs(d);
1213 if (error != 0)
1214 return (error);
1215 }
1216 s = splnet();
1217 if (bp != d->bd_bif) {
1218 if (d->bd_bif)
1219 /*
1220 * Detach if attached to something else.
1221 */
1222 bpf_detachd(d);
1223
1224 bpf_attachd(d, bp);
1225 }
1226 reset_d(d);
1227 splx(s);
1228 return (0);
1229 }
1230 /* Not found. */
1231 return (ENXIO);
1232 }
1233
1234 /*
1235 * Copy the interface name to the ifreq.
1236 */
1237 static void
1238 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr)
1239 {
1240 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
1241 }
1242
1243 static int
1244 bpf_stat(struct file *fp, struct stat *st)
1245 {
1246 struct bpf_d *d = fp->f_bpf;
1247
1248 (void)memset(st, 0, sizeof(*st));
1249 KERNEL_LOCK(1, NULL);
1250 st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid);
1251 st->st_atimespec = d->bd_atime;
1252 st->st_mtimespec = d->bd_mtime;
1253 st->st_ctimespec = st->st_birthtimespec = d->bd_btime;
1254 st->st_uid = kauth_cred_geteuid(fp->f_cred);
1255 st->st_gid = kauth_cred_getegid(fp->f_cred);
1256 st->st_mode = S_IFCHR;
1257 KERNEL_UNLOCK_ONE(NULL);
1258 return 0;
1259 }
1260
1261 /*
1262 * Support for poll() system call
1263 *
1264 * Return true iff the specific operation will not block indefinitely - with
1265 * the assumption that it is safe to positively acknowledge a request for the
1266 * ability to write to the BPF device.
1267 * Otherwise, return false but make a note that a selnotify() must be done.
1268 */
1269 static int
1270 bpf_poll(struct file *fp, int events)
1271 {
1272 struct bpf_d *d = fp->f_bpf;
1273 int s = splnet();
1274 int revents;
1275
1276 /*
1277 * Refresh the PID associated with this bpf file.
1278 */
1279 KERNEL_LOCK(1, NULL);
1280 d->bd_pid = curproc->p_pid;
1281
1282 revents = events & (POLLOUT | POLLWRNORM);
1283 if (events & (POLLIN | POLLRDNORM)) {
1284 /*
1285 * An imitation of the FIONREAD ioctl code.
1286 */
1287 if (d->bd_hlen != 0 ||
1288 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1289 d->bd_slen != 0)) {
1290 revents |= events & (POLLIN | POLLRDNORM);
1291 } else {
1292 selrecord(curlwp, &d->bd_sel);
1293 /* Start the read timeout if necessary */
1294 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1295 callout_reset(&d->bd_callout, d->bd_rtout,
1296 bpf_timed_out, d);
1297 d->bd_state = BPF_WAITING;
1298 }
1299 }
1300 }
1301
1302 KERNEL_UNLOCK_ONE(NULL);
1303 splx(s);
1304 return (revents);
1305 }
1306
1307 static void
1308 filt_bpfrdetach(struct knote *kn)
1309 {
1310 struct bpf_d *d = kn->kn_hook;
1311 int s;
1312
1313 KERNEL_LOCK(1, NULL);
1314 s = splnet();
1315 SLIST_REMOVE(&d->bd_sel.sel_klist, kn, knote, kn_selnext);
1316 splx(s);
1317 KERNEL_UNLOCK_ONE(NULL);
1318 }
1319
1320 static int
1321 filt_bpfread(struct knote *kn, long hint)
1322 {
1323 struct bpf_d *d = kn->kn_hook;
1324 int rv;
1325
1326 KERNEL_LOCK(1, NULL);
1327 kn->kn_data = d->bd_hlen;
1328 if (d->bd_immediate)
1329 kn->kn_data += d->bd_slen;
1330 rv = (kn->kn_data > 0);
1331 KERNEL_UNLOCK_ONE(NULL);
1332 return rv;
1333 }
1334
1335 static const struct filterops bpfread_filtops =
1336 { 1, NULL, filt_bpfrdetach, filt_bpfread };
1337
1338 static int
1339 bpf_kqfilter(struct file *fp, struct knote *kn)
1340 {
1341 struct bpf_d *d = fp->f_bpf;
1342 struct klist *klist;
1343 int s;
1344
1345 KERNEL_LOCK(1, NULL);
1346
1347 switch (kn->kn_filter) {
1348 case EVFILT_READ:
1349 klist = &d->bd_sel.sel_klist;
1350 kn->kn_fop = &bpfread_filtops;
1351 break;
1352
1353 default:
1354 KERNEL_UNLOCK_ONE(NULL);
1355 return (EINVAL);
1356 }
1357
1358 kn->kn_hook = d;
1359
1360 s = splnet();
1361 SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1362 splx(s);
1363 KERNEL_UNLOCK_ONE(NULL);
1364
1365 return (0);
1366 }
1367
1368 /*
1369 * Copy data from an mbuf chain into a buffer. This code is derived
1370 * from m_copydata in sys/uipc_mbuf.c.
1371 */
1372 static void *
1373 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len)
1374 {
1375 const struct mbuf *m;
1376 u_int count;
1377 u_char *dst;
1378
1379 m = src_arg;
1380 dst = dst_arg;
1381 while (len > 0) {
1382 if (m == NULL)
1383 panic("bpf_mcpy");
1384 count = min(m->m_len, len);
1385 memcpy(dst, mtod(m, const void *), count);
1386 m = m->m_next;
1387 dst += count;
1388 len -= count;
1389 }
1390 return dst_arg;
1391 }
1392
1393 /*
1394 * Dispatch a packet to all the listeners on interface bp.
1395 *
1396 * pkt pointer to the packet, either a data buffer or an mbuf chain
1397 * buflen buffer length, if pkt is a data buffer
1398 * cpfn a function that can copy pkt into the listener's buffer
1399 * pktlen length of the packet
1400 * rcv true if packet came in
1401 */
1402 static inline void
1403 bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t),
1404 void *pkt, u_int pktlen, u_int buflen, const bool rcv)
1405 {
1406 uint32_t mem[BPF_MEMWORDS];
1407 bpf_args_t args = {
1408 .pkt = (const uint8_t *)pkt,
1409 .wirelen = pktlen,
1410 .buflen = buflen,
1411 .mem = mem,
1412 .arg = NULL
1413 };
1414 bool gottime = false;
1415 struct timespec ts;
1416
1417 /*
1418 * Note that the IPL does not have to be raised at this point.
1419 * The only problem that could arise here is that if two different
1420 * interfaces shared any data. This is not the case.
1421 */
1422 for (struct bpf_d *d = bp->bif_dlist; d != NULL; d = d->bd_next) {
1423 u_int slen;
1424
1425 if (!d->bd_seesent && !rcv) {
1426 continue;
1427 }
1428 d->bd_rcount++;
1429 bpf_gstats.bs_recv++;
1430
1431 if (d->bd_jitcode)
1432 slen = d->bd_jitcode(NULL, &args);
1433 else
1434 slen = bpf_filter_ext(NULL, d->bd_filter, &args);
1435
1436 if (!slen) {
1437 continue;
1438 }
1439 if (!gottime) {
1440 gottime = true;
1441 nanotime(&ts);
1442 }
1443 catchpacket(d, pkt, pktlen, slen, cpfn, &ts);
1444 }
1445 }
1446
1447 /*
1448 * Incoming linkage from device drivers. Process the packet pkt, of length
1449 * pktlen, which is stored in a contiguous buffer. The packet is parsed
1450 * by each process' filter, and if accepted, stashed into the corresponding
1451 * buffer.
1452 */
1453 static void
1454 _bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1455 {
1456
1457 bpf_deliver(bp, memcpy, pkt, pktlen, pktlen, true);
1458 }
1459
1460 /*
1461 * Incoming linkage from device drivers, when the head of the packet is in
1462 * a buffer, and the tail is in an mbuf chain.
1463 */
1464 static void
1465 _bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1466 {
1467 u_int pktlen;
1468 struct mbuf mb;
1469
1470 /* Skip outgoing duplicate packets. */
1471 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) {
1472 m->m_flags &= ~M_PROMISC;
1473 return;
1474 }
1475
1476 pktlen = m_length(m) + dlen;
1477
1478 /*
1479 * Craft on-stack mbuf suitable for passing to bpf_filter.
1480 * Note that we cut corners here; we only setup what's
1481 * absolutely needed--this mbuf should never go anywhere else.
1482 */
1483 (void)memset(&mb, 0, sizeof(mb));
1484 mb.m_next = m;
1485 mb.m_data = data;
1486 mb.m_len = dlen;
1487
1488 bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, m->m_pkthdr.rcvif_index != 0);
1489 }
1490
1491 /*
1492 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1493 */
1494 static void
1495 _bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1496 {
1497 void *(*cpfn)(void *, const void *, size_t);
1498 u_int pktlen, buflen;
1499 void *marg;
1500
1501 /* Skip outgoing duplicate packets. */
1502 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) {
1503 m->m_flags &= ~M_PROMISC;
1504 return;
1505 }
1506
1507 pktlen = m_length(m);
1508
1509 if (pktlen == m->m_len) {
1510 cpfn = (void *)memcpy;
1511 marg = mtod(m, void *);
1512 buflen = pktlen;
1513 } else {
1514 cpfn = bpf_mcpy;
1515 marg = m;
1516 buflen = 0;
1517 }
1518
1519 bpf_deliver(bp, cpfn, marg, pktlen, buflen, m->m_pkthdr.rcvif_index != 0);
1520 }
1521
1522 /*
1523 * We need to prepend the address family as
1524 * a four byte field. Cons up a dummy header
1525 * to pacify bpf. This is safe because bpf
1526 * will only read from the mbuf (i.e., it won't
1527 * try to free it or keep a pointer a to it).
1528 */
1529 static void
1530 _bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m)
1531 {
1532 struct mbuf m0;
1533
1534 m0.m_flags = 0;
1535 m0.m_next = m;
1536 m0.m_len = 4;
1537 m0.m_data = (char *)⁡
1538
1539 _bpf_mtap(bp, &m0);
1540 }
1541
1542 /*
1543 * Put the SLIP pseudo-"link header" in place.
1544 * Note this M_PREPEND() should never fail,
1545 * swince we know we always have enough space
1546 * in the input buffer.
1547 */
1548 static void
1549 _bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m)
1550 {
1551 int s;
1552 u_char *hp;
1553
1554 M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT);
1555 if (*m == NULL)
1556 return;
1557
1558 hp = mtod(*m, u_char *);
1559 hp[SLX_DIR] = SLIPDIR_IN;
1560 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
1561
1562 s = splnet();
1563 _bpf_mtap(bp, *m);
1564 splx(s);
1565
1566 m_adj(*m, SLIP_HDRLEN);
1567 }
1568
1569 /*
1570 * Put the SLIP pseudo-"link header" in
1571 * place. The compressed header is now
1572 * at the beginning of the mbuf.
1573 */
1574 static void
1575 _bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m)
1576 {
1577 struct mbuf m0;
1578 u_char *hp;
1579 int s;
1580
1581 m0.m_flags = 0;
1582 m0.m_next = m;
1583 m0.m_data = m0.m_dat;
1584 m0.m_len = SLIP_HDRLEN;
1585
1586 hp = mtod(&m0, u_char *);
1587
1588 hp[SLX_DIR] = SLIPDIR_OUT;
1589 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
1590
1591 s = splnet();
1592 _bpf_mtap(bp, &m0);
1593 splx(s);
1594 m_freem(m);
1595 }
1596
1597 static int
1598 bpf_hdrlen(struct bpf_d *d)
1599 {
1600 int hdrlen = d->bd_bif->bif_hdrlen;
1601 /*
1602 * Compute the length of the bpf header. This is not necessarily
1603 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1604 * that the network layer header begins on a longword boundary (for
1605 * performance reasons and to alleviate alignment restrictions).
1606 */
1607 #ifdef _LP64
1608 if (d->bd_compat32)
1609 return (BPF_WORDALIGN32(hdrlen + SIZEOF_BPF_HDR32) - hdrlen);
1610 else
1611 #endif
1612 return (BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen);
1613 }
1614
1615 /*
1616 * Move the packet data from interface memory (pkt) into the
1617 * store buffer. Call the wakeup functions if it's time to wakeup
1618 * a listener (buffer full), "cpfn" is the routine called to do the
1619 * actual data transfer. memcpy is passed in to copy contiguous chunks,
1620 * while bpf_mcpy is passed in to copy mbuf chains. In the latter case,
1621 * pkt is really an mbuf.
1622 */
1623 static void
1624 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1625 void *(*cpfn)(void *, const void *, size_t), struct timespec *ts)
1626 {
1627 char *h;
1628 int totlen, curlen, caplen;
1629 int hdrlen = bpf_hdrlen(d);
1630 int do_wakeup = 0;
1631
1632 ++d->bd_ccount;
1633 ++bpf_gstats.bs_capt;
1634 /*
1635 * Figure out how many bytes to move. If the packet is
1636 * greater or equal to the snapshot length, transfer that
1637 * much. Otherwise, transfer the whole packet (unless
1638 * we hit the buffer size limit).
1639 */
1640 totlen = hdrlen + min(snaplen, pktlen);
1641 if (totlen > d->bd_bufsize)
1642 totlen = d->bd_bufsize;
1643 /*
1644 * If we adjusted totlen to fit the bufsize, it could be that
1645 * totlen is smaller than hdrlen because of the link layer header.
1646 */
1647 caplen = totlen - hdrlen;
1648 if (caplen < 0)
1649 caplen = 0;
1650
1651 /*
1652 * Round up the end of the previous packet to the next longword.
1653 */
1654 #ifdef _LP64
1655 if (d->bd_compat32)
1656 curlen = BPF_WORDALIGN32(d->bd_slen);
1657 else
1658 #endif
1659 curlen = BPF_WORDALIGN(d->bd_slen);
1660 if (curlen + totlen > d->bd_bufsize) {
1661 /*
1662 * This packet will overflow the storage buffer.
1663 * Rotate the buffers if we can, then wakeup any
1664 * pending reads.
1665 */
1666 if (d->bd_fbuf == NULL) {
1667 /*
1668 * We haven't completed the previous read yet,
1669 * so drop the packet.
1670 */
1671 ++d->bd_dcount;
1672 ++bpf_gstats.bs_drop;
1673 return;
1674 }
1675 ROTATE_BUFFERS(d);
1676 do_wakeup = 1;
1677 curlen = 0;
1678 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
1679 /*
1680 * Immediate mode is set, or the read timeout has
1681 * already expired during a select call. A packet
1682 * arrived, so the reader should be woken up.
1683 */
1684 do_wakeup = 1;
1685 }
1686
1687 /*
1688 * Append the bpf header.
1689 */
1690 h = (char *)d->bd_sbuf + curlen;
1691 #ifdef _LP64
1692 if (d->bd_compat32) {
1693 struct bpf_hdr32 *hp32;
1694
1695 hp32 = (struct bpf_hdr32 *)h;
1696 hp32->bh_tstamp.tv_sec = ts->tv_sec;
1697 hp32->bh_tstamp.tv_usec = ts->tv_nsec / 1000;
1698 hp32->bh_datalen = pktlen;
1699 hp32->bh_hdrlen = hdrlen;
1700 hp32->bh_caplen = caplen;
1701 } else
1702 #endif
1703 {
1704 struct bpf_hdr *hp;
1705
1706 hp = (struct bpf_hdr *)h;
1707 hp->bh_tstamp.tv_sec = ts->tv_sec;
1708 hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000;
1709 hp->bh_datalen = pktlen;
1710 hp->bh_hdrlen = hdrlen;
1711 hp->bh_caplen = caplen;
1712 }
1713
1714 /*
1715 * Copy the packet data into the store buffer and update its length.
1716 */
1717 (*cpfn)(h + hdrlen, pkt, caplen);
1718 d->bd_slen = curlen + totlen;
1719
1720 /*
1721 * Call bpf_wakeup after bd_slen has been updated so that kevent(2)
1722 * will cause filt_bpfread() to be called with it adjusted.
1723 */
1724 if (do_wakeup)
1725 bpf_wakeup(d);
1726 }
1727
1728 /*
1729 * Initialize all nonzero fields of a descriptor.
1730 */
1731 static int
1732 bpf_allocbufs(struct bpf_d *d)
1733 {
1734
1735 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1736 if (!d->bd_fbuf)
1737 return (ENOBUFS);
1738 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1739 if (!d->bd_sbuf) {
1740 free(d->bd_fbuf, M_DEVBUF);
1741 return (ENOBUFS);
1742 }
1743 d->bd_slen = 0;
1744 d->bd_hlen = 0;
1745 return (0);
1746 }
1747
1748 /*
1749 * Free buffers currently in use by a descriptor.
1750 * Called on close.
1751 */
1752 static void
1753 bpf_freed(struct bpf_d *d)
1754 {
1755 /*
1756 * We don't need to lock out interrupts since this descriptor has
1757 * been detached from its interface and it yet hasn't been marked
1758 * free.
1759 */
1760 if (d->bd_sbuf != NULL) {
1761 free(d->bd_sbuf, M_DEVBUF);
1762 if (d->bd_hbuf != NULL)
1763 free(d->bd_hbuf, M_DEVBUF);
1764 if (d->bd_fbuf != NULL)
1765 free(d->bd_fbuf, M_DEVBUF);
1766 }
1767 if (d->bd_filter)
1768 free(d->bd_filter, M_DEVBUF);
1769
1770 if (d->bd_jitcode != NULL) {
1771 bpf_jit_freecode(d->bd_jitcode);
1772 }
1773 }
1774
1775 /*
1776 * Attach an interface to bpf. dlt is the link layer type;
1777 * hdrlen is the fixed size of the link header for the specified dlt
1778 * (variable length headers not yet supported).
1779 */
1780 static void
1781 _bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1782 {
1783 struct bpf_if *bp;
1784 bp = malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT);
1785 if (bp == NULL)
1786 panic("bpfattach");
1787
1788 mutex_enter(&bpf_mtx);
1789 bp->bif_dlist = NULL;
1790 bp->bif_driverp = driverp;
1791 bp->bif_ifp = ifp;
1792 bp->bif_dlt = dlt;
1793
1794 bp->bif_next = bpf_iflist;
1795 bpf_iflist = bp;
1796
1797 *bp->bif_driverp = NULL;
1798
1799 bp->bif_hdrlen = hdrlen;
1800 mutex_exit(&bpf_mtx);
1801 #if 0
1802 printf("bpf: %s attached\n", ifp->if_xname);
1803 #endif
1804 }
1805
1806 /*
1807 * Remove an interface from bpf.
1808 */
1809 static void
1810 _bpfdetach(struct ifnet *ifp)
1811 {
1812 struct bpf_if *bp, **pbp;
1813 struct bpf_d *d;
1814 int s;
1815
1816 mutex_enter(&bpf_mtx);
1817 /* Nuke the vnodes for any open instances */
1818 LIST_FOREACH(d, &bpf_list, bd_list) {
1819 if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) {
1820 /*
1821 * Detach the descriptor from an interface now.
1822 * It will be free'ed later by close routine.
1823 */
1824 s = splnet();
1825 d->bd_promisc = 0; /* we can't touch device. */
1826 bpf_detachd(d);
1827 splx(s);
1828 }
1829 }
1830
1831 again:
1832 for (bp = bpf_iflist, pbp = &bpf_iflist;
1833 bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) {
1834 if (bp->bif_ifp == ifp) {
1835 *pbp = bp->bif_next;
1836 free(bp, M_DEVBUF);
1837 goto again;
1838 }
1839 }
1840 mutex_exit(&bpf_mtx);
1841 }
1842
1843 /*
1844 * Change the data link type of a interface.
1845 */
1846 static void
1847 _bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1848 {
1849 struct bpf_if *bp;
1850
1851 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1852 if (bp->bif_driverp == &ifp->if_bpf)
1853 break;
1854 }
1855 if (bp == NULL)
1856 panic("bpf_change_type");
1857
1858 bp->bif_dlt = dlt;
1859
1860 bp->bif_hdrlen = hdrlen;
1861 }
1862
1863 /*
1864 * Get a list of available data link type of the interface.
1865 */
1866 static int
1867 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1868 {
1869 int n, error;
1870 struct ifnet *ifp;
1871 struct bpf_if *bp;
1872
1873 ifp = d->bd_bif->bif_ifp;
1874 n = 0;
1875 error = 0;
1876 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1877 if (bp->bif_ifp != ifp)
1878 continue;
1879 if (bfl->bfl_list != NULL) {
1880 if (n >= bfl->bfl_len)
1881 return ENOMEM;
1882 error = copyout(&bp->bif_dlt,
1883 bfl->bfl_list + n, sizeof(u_int));
1884 }
1885 n++;
1886 }
1887 bfl->bfl_len = n;
1888 return error;
1889 }
1890
1891 /*
1892 * Set the data link type of a BPF instance.
1893 */
1894 static int
1895 bpf_setdlt(struct bpf_d *d, u_int dlt)
1896 {
1897 int s, error, opromisc;
1898 struct ifnet *ifp;
1899 struct bpf_if *bp;
1900
1901 KASSERT(mutex_owned(&bpf_mtx));
1902
1903 if (d->bd_bif->bif_dlt == dlt)
1904 return 0;
1905 ifp = d->bd_bif->bif_ifp;
1906 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1907 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1908 break;
1909 }
1910 if (bp == NULL)
1911 return EINVAL;
1912 s = splnet();
1913 opromisc = d->bd_promisc;
1914 bpf_detachd(d);
1915 bpf_attachd(d, bp);
1916 reset_d(d);
1917 if (opromisc) {
1918 error = ifpromisc(bp->bif_ifp, 1);
1919 if (error)
1920 printf("%s: bpf_setdlt: ifpromisc failed (%d)\n",
1921 bp->bif_ifp->if_xname, error);
1922 else
1923 d->bd_promisc = 1;
1924 }
1925 splx(s);
1926 return 0;
1927 }
1928
1929 static int
1930 sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS)
1931 {
1932 int newsize, error;
1933 struct sysctlnode node;
1934
1935 node = *rnode;
1936 node.sysctl_data = &newsize;
1937 newsize = bpf_maxbufsize;
1938 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1939 if (error || newp == NULL)
1940 return (error);
1941
1942 if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE)
1943 return (EINVAL);
1944
1945 bpf_maxbufsize = newsize;
1946
1947 return (0);
1948 }
1949
1950 #if defined(MODULAR) || defined(BPFJIT)
1951 static int
1952 sysctl_net_bpf_jit(SYSCTLFN_ARGS)
1953 {
1954 bool newval;
1955 int error;
1956 struct sysctlnode node;
1957
1958 node = *rnode;
1959 node.sysctl_data = &newval;
1960 newval = bpf_jit;
1961 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1962 if (error != 0 || newp == NULL)
1963 return error;
1964
1965 bpf_jit = newval;
1966
1967 /*
1968 * Do a full sync to publish new bpf_jit value and
1969 * update bpfjit_module_ops.bj_generate_code variable.
1970 */
1971 membar_sync();
1972
1973 if (newval && bpfjit_module_ops.bj_generate_code == NULL) {
1974 printf("JIT compilation is postponed "
1975 "until after bpfjit module is loaded\n");
1976 }
1977
1978 return 0;
1979 }
1980 #endif
1981
1982 static int
1983 sysctl_net_bpf_peers(SYSCTLFN_ARGS)
1984 {
1985 int error, elem_count;
1986 struct bpf_d *dp;
1987 struct bpf_d_ext dpe;
1988 size_t len, needed, elem_size, out_size;
1989 char *sp;
1990
1991 if (namelen == 1 && name[0] == CTL_QUERY)
1992 return (sysctl_query(SYSCTLFN_CALL(rnode)));
1993
1994 if (namelen != 2)
1995 return (EINVAL);
1996
1997 /* BPF peers is privileged information. */
1998 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
1999 KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL);
2000 if (error)
2001 return (EPERM);
2002
2003 len = (oldp != NULL) ? *oldlenp : 0;
2004 sp = oldp;
2005 elem_size = name[0];
2006 elem_count = name[1];
2007 out_size = MIN(sizeof(dpe), elem_size);
2008 needed = 0;
2009
2010 if (elem_size < 1 || elem_count < 0)
2011 return (EINVAL);
2012
2013 mutex_enter(&bpf_mtx);
2014 LIST_FOREACH(dp, &bpf_list, bd_list) {
2015 if (len >= elem_size && elem_count > 0) {
2016 #define BPF_EXT(field) dpe.bde_ ## field = dp->bd_ ## field
2017 BPF_EXT(bufsize);
2018 BPF_EXT(promisc);
2019 BPF_EXT(state);
2020 BPF_EXT(immediate);
2021 BPF_EXT(hdrcmplt);
2022 BPF_EXT(seesent);
2023 BPF_EXT(pid);
2024 BPF_EXT(rcount);
2025 BPF_EXT(dcount);
2026 BPF_EXT(ccount);
2027 #undef BPF_EXT
2028 if (dp->bd_bif)
2029 (void)strlcpy(dpe.bde_ifname,
2030 dp->bd_bif->bif_ifp->if_xname,
2031 IFNAMSIZ - 1);
2032 else
2033 dpe.bde_ifname[0] = '\0';
2034
2035 error = copyout(&dpe, sp, out_size);
2036 if (error)
2037 break;
2038 sp += elem_size;
2039 len -= elem_size;
2040 }
2041 needed += elem_size;
2042 if (elem_count > 0 && elem_count != INT_MAX)
2043 elem_count--;
2044 }
2045 mutex_exit(&bpf_mtx);
2046
2047 *oldlenp = needed;
2048
2049 return (error);
2050 }
2051
2052 static struct sysctllog *bpf_sysctllog;
2053 static void
2054 sysctl_net_bpf_setup(void)
2055 {
2056 const struct sysctlnode *node;
2057
2058 node = NULL;
2059 sysctl_createv(&bpf_sysctllog, 0, NULL, &node,
2060 CTLFLAG_PERMANENT,
2061 CTLTYPE_NODE, "bpf",
2062 SYSCTL_DESCR("BPF options"),
2063 NULL, 0, NULL, 0,
2064 CTL_NET, CTL_CREATE, CTL_EOL);
2065 if (node != NULL) {
2066 #if defined(MODULAR) || defined(BPFJIT)
2067 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2068 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2069 CTLTYPE_BOOL, "jit",
2070 SYSCTL_DESCR("Toggle Just-In-Time compilation"),
2071 sysctl_net_bpf_jit, 0, &bpf_jit, 0,
2072 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2073 #endif
2074 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2075 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2076 CTLTYPE_INT, "maxbufsize",
2077 SYSCTL_DESCR("Maximum size for data capture buffer"),
2078 sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0,
2079 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2080 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2081 CTLFLAG_PERMANENT,
2082 CTLTYPE_STRUCT, "stats",
2083 SYSCTL_DESCR("BPF stats"),
2084 NULL, 0, &bpf_gstats, sizeof(bpf_gstats),
2085 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2086 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2087 CTLFLAG_PERMANENT,
2088 CTLTYPE_STRUCT, "peers",
2089 SYSCTL_DESCR("BPF peers"),
2090 sysctl_net_bpf_peers, 0, NULL, 0,
2091 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2092 }
2093
2094 }
2095
2096 struct bpf_ops bpf_ops_kernel = {
2097 .bpf_attach = _bpfattach,
2098 .bpf_detach = _bpfdetach,
2099 .bpf_change_type = _bpf_change_type,
2100
2101 .bpf_tap = _bpf_tap,
2102 .bpf_mtap = _bpf_mtap,
2103 .bpf_mtap2 = _bpf_mtap2,
2104 .bpf_mtap_af = _bpf_mtap_af,
2105 .bpf_mtap_sl_in = _bpf_mtap_sl_in,
2106 .bpf_mtap_sl_out = _bpf_mtap_sl_out,
2107 };
2108
2109 MODULE(MODULE_CLASS_DRIVER, bpf, "bpf_filter");
2110
2111 static int
2112 bpf_modcmd(modcmd_t cmd, void *arg)
2113 {
2114 #ifdef _MODULE
2115 devmajor_t bmajor, cmajor;
2116 #endif
2117 int error = 0;
2118
2119 switch (cmd) {
2120 case MODULE_CMD_INIT:
2121 bpf_init();
2122 #ifdef _MODULE
2123 bmajor = cmajor = NODEVMAJOR;
2124 error = devsw_attach("bpf", NULL, &bmajor,
2125 &bpf_cdevsw, &cmajor);
2126 if (error)
2127 break;
2128 #endif
2129
2130 bpf_ops_handover_enter(&bpf_ops_kernel);
2131 atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel);
2132 bpf_ops_handover_exit();
2133 sysctl_net_bpf_setup();
2134 break;
2135
2136 case MODULE_CMD_FINI:
2137 /*
2138 * While there is no reference counting for bpf callers,
2139 * unload could at least in theory be done similarly to
2140 * system call disestablishment. This should even be
2141 * a little simpler:
2142 *
2143 * 1) replace op vector with stubs
2144 * 2) post update to all cpus with xc
2145 * 3) check that nobody is in bpf anymore
2146 * (it's doubtful we'd want something like l_sysent,
2147 * but we could do something like *signed* percpu
2148 * counters. if the sum is 0, we're good).
2149 * 4) if fail, unroll changes
2150 *
2151 * NOTE: change won't be atomic to the outside. some
2152 * packets may be not captured even if unload is
2153 * not succesful. I think packet capture not working
2154 * is a perfectly logical consequence of trying to
2155 * disable packet capture.
2156 */
2157 error = EOPNOTSUPP;
2158 /* insert sysctl teardown */
2159 break;
2160
2161 default:
2162 error = ENOTTY;
2163 break;
2164 }
2165
2166 return error;
2167 }
2168