bpf.c revision 1.207 1 /* $NetBSD: bpf.c,v 1.207 2017/02/01 08:06:01 ozaki-r Exp $ */
2
3 /*
4 * Copyright (c) 1990, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from the Stanford/CMU enet packet filter,
8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
10 * Berkeley Laboratory.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95
37 * static char rcsid[] =
38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.207 2017/02/01 08:06:01 ozaki-r Exp $");
43
44 #if defined(_KERNEL_OPT)
45 #include "opt_bpf.h"
46 #include "sl.h"
47 #include "strip.h"
48 #include "opt_net_mpsafe.h"
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/mbuf.h>
54 #include <sys/buf.h>
55 #include <sys/time.h>
56 #include <sys/proc.h>
57 #include <sys/ioctl.h>
58 #include <sys/conf.h>
59 #include <sys/vnode.h>
60 #include <sys/queue.h>
61 #include <sys/stat.h>
62 #include <sys/module.h>
63 #include <sys/atomic.h>
64 #include <sys/cpu.h>
65
66 #include <sys/file.h>
67 #include <sys/filedesc.h>
68 #include <sys/tty.h>
69 #include <sys/uio.h>
70
71 #include <sys/protosw.h>
72 #include <sys/socket.h>
73 #include <sys/errno.h>
74 #include <sys/kernel.h>
75 #include <sys/poll.h>
76 #include <sys/sysctl.h>
77 #include <sys/kauth.h>
78 #include <sys/syslog.h>
79
80 #include <net/if.h>
81 #include <net/slip.h>
82
83 #include <net/bpf.h>
84 #include <net/bpfdesc.h>
85 #include <net/bpfjit.h>
86
87 #include <net/if_arc.h>
88 #include <net/if_ether.h>
89
90 #include <netinet/in.h>
91 #include <netinet/if_inarp.h>
92
93
94 #include <compat/sys/sockio.h>
95
96 #ifndef BPF_BUFSIZE
97 /*
98 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
99 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
100 */
101 # define BPF_BUFSIZE 32768
102 #endif
103
104 #define PRINET 26 /* interruptible */
105
106 /*
107 * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able.
108 * XXX the default values should be computed dynamically based
109 * on available memory size and available mbuf clusters.
110 */
111 static int bpf_bufsize = BPF_BUFSIZE;
112 static int bpf_maxbufsize = BPF_DFLTBUFSIZE; /* XXX set dynamically, see above */
113 static bool bpf_jit = false;
114
115 struct bpfjit_ops bpfjit_module_ops = {
116 .bj_generate_code = NULL,
117 .bj_free_code = NULL
118 };
119
120 /*
121 * Global BPF statistics returned by net.bpf.stats sysctl.
122 */
123 static struct bpf_stat bpf_gstats;
124
125 /*
126 * Use a mutex to avoid a race condition between gathering the stats/peers
127 * and opening/closing the device.
128 */
129 static kmutex_t bpf_mtx;
130
131 /*
132 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
133 * bpf_dtab holds the descriptors, indexed by minor device #
134 */
135 static struct bpf_if *bpf_iflist;
136 static LIST_HEAD(, bpf_d) bpf_list;
137
138 static int bpf_allocbufs(struct bpf_d *);
139 static void bpf_deliver(struct bpf_if *,
140 void *(*cpfn)(void *, const void *, size_t),
141 void *, u_int, u_int, const bool);
142 static void bpf_freed(struct bpf_d *);
143 static void bpf_ifname(struct ifnet *, struct ifreq *);
144 static void *bpf_mcpy(void *, const void *, size_t);
145 static int bpf_movein(struct uio *, int, uint64_t,
146 struct mbuf **, struct sockaddr *);
147 static void bpf_attachd(struct bpf_d *, struct bpf_if *);
148 static void bpf_detachd(struct bpf_d *);
149 static int bpf_setif(struct bpf_d *, struct ifreq *);
150 static int bpf_setf(struct bpf_d *, struct bpf_program *);
151 static void bpf_timed_out(void *);
152 static inline void
153 bpf_wakeup(struct bpf_d *);
154 static int bpf_hdrlen(struct bpf_d *);
155 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
156 void *(*)(void *, const void *, size_t), struct timespec *);
157 static void reset_d(struct bpf_d *);
158 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
159 static int bpf_setdlt(struct bpf_d *, u_int);
160
161 static int bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t,
162 int);
163 static int bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t,
164 int);
165 static int bpf_ioctl(struct file *, u_long, void *);
166 static int bpf_poll(struct file *, int);
167 static int bpf_stat(struct file *, struct stat *);
168 static int bpf_close(struct file *);
169 static int bpf_kqfilter(struct file *, struct knote *);
170 static void bpf_softintr(void *);
171
172 static const struct fileops bpf_fileops = {
173 .fo_read = bpf_read,
174 .fo_write = bpf_write,
175 .fo_ioctl = bpf_ioctl,
176 .fo_fcntl = fnullop_fcntl,
177 .fo_poll = bpf_poll,
178 .fo_stat = bpf_stat,
179 .fo_close = bpf_close,
180 .fo_kqfilter = bpf_kqfilter,
181 .fo_restart = fnullop_restart,
182 };
183
184 dev_type_open(bpfopen);
185
186 const struct cdevsw bpf_cdevsw = {
187 .d_open = bpfopen,
188 .d_close = noclose,
189 .d_read = noread,
190 .d_write = nowrite,
191 .d_ioctl = noioctl,
192 .d_stop = nostop,
193 .d_tty = notty,
194 .d_poll = nopoll,
195 .d_mmap = nommap,
196 .d_kqfilter = nokqfilter,
197 .d_discard = nodiscard,
198 .d_flag = D_OTHER
199 };
200
201 bpfjit_func_t
202 bpf_jit_generate(bpf_ctx_t *bc, void *code, size_t size)
203 {
204
205 membar_consumer();
206 if (bpfjit_module_ops.bj_generate_code != NULL) {
207 return bpfjit_module_ops.bj_generate_code(bc, code, size);
208 }
209 return NULL;
210 }
211
212 void
213 bpf_jit_freecode(bpfjit_func_t jcode)
214 {
215 KASSERT(bpfjit_module_ops.bj_free_code != NULL);
216 bpfjit_module_ops.bj_free_code(jcode);
217 }
218
219 static int
220 bpf_movein(struct uio *uio, int linktype, uint64_t mtu, struct mbuf **mp,
221 struct sockaddr *sockp)
222 {
223 struct mbuf *m;
224 int error;
225 size_t len;
226 size_t hlen;
227 size_t align;
228
229 /*
230 * Build a sockaddr based on the data link layer type.
231 * We do this at this level because the ethernet header
232 * is copied directly into the data field of the sockaddr.
233 * In the case of SLIP, there is no header and the packet
234 * is forwarded as is.
235 * Also, we are careful to leave room at the front of the mbuf
236 * for the link level header.
237 */
238 switch (linktype) {
239
240 case DLT_SLIP:
241 sockp->sa_family = AF_INET;
242 hlen = 0;
243 align = 0;
244 break;
245
246 case DLT_PPP:
247 sockp->sa_family = AF_UNSPEC;
248 hlen = 0;
249 align = 0;
250 break;
251
252 case DLT_EN10MB:
253 sockp->sa_family = AF_UNSPEC;
254 /* XXX Would MAXLINKHDR be better? */
255 /* 6(dst)+6(src)+2(type) */
256 hlen = sizeof(struct ether_header);
257 align = 2;
258 break;
259
260 case DLT_ARCNET:
261 sockp->sa_family = AF_UNSPEC;
262 hlen = ARC_HDRLEN;
263 align = 5;
264 break;
265
266 case DLT_FDDI:
267 sockp->sa_family = AF_LINK;
268 /* XXX 4(FORMAC)+6(dst)+6(src) */
269 hlen = 16;
270 align = 0;
271 break;
272
273 case DLT_ECONET:
274 sockp->sa_family = AF_UNSPEC;
275 hlen = 6;
276 align = 2;
277 break;
278
279 case DLT_NULL:
280 sockp->sa_family = AF_UNSPEC;
281 hlen = 0;
282 align = 0;
283 break;
284
285 default:
286 return (EIO);
287 }
288
289 len = uio->uio_resid;
290 /*
291 * If there aren't enough bytes for a link level header or the
292 * packet length exceeds the interface mtu, return an error.
293 */
294 if (len - hlen > mtu)
295 return (EMSGSIZE);
296
297 /*
298 * XXX Avoid complicated buffer chaining ---
299 * bail if it won't fit in a single mbuf.
300 * (Take into account possible alignment bytes)
301 */
302 if (len + align > MCLBYTES)
303 return (EIO);
304
305 m = m_gethdr(M_WAIT, MT_DATA);
306 m_reset_rcvif(m);
307 m->m_pkthdr.len = (int)(len - hlen);
308 if (len + align > MHLEN) {
309 m_clget(m, M_WAIT);
310 if ((m->m_flags & M_EXT) == 0) {
311 error = ENOBUFS;
312 goto bad;
313 }
314 }
315
316 /* Insure the data is properly aligned */
317 if (align > 0) {
318 m->m_data += align;
319 m->m_len -= (int)align;
320 }
321
322 error = uiomove(mtod(m, void *), len, uio);
323 if (error)
324 goto bad;
325 if (hlen != 0) {
326 memcpy(sockp->sa_data, mtod(m, void *), hlen);
327 m->m_data += hlen; /* XXX */
328 len -= hlen;
329 }
330 m->m_len = (int)len;
331 *mp = m;
332 return (0);
333
334 bad:
335 m_freem(m);
336 return (error);
337 }
338
339 /*
340 * Attach file to the bpf interface, i.e. make d listen on bp.
341 * Must be called at splnet.
342 */
343 static void
344 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
345 {
346 KASSERT(mutex_owned(&bpf_mtx));
347 /*
348 * Point d at bp, and add d to the interface's list of listeners.
349 * Finally, point the driver's bpf cookie at the interface so
350 * it will divert packets to bpf.
351 */
352 d->bd_bif = bp;
353 d->bd_next = bp->bif_dlist;
354 bp->bif_dlist = d;
355
356 *bp->bif_driverp = bp;
357 }
358
359 /*
360 * Detach a file from its interface.
361 */
362 static void
363 bpf_detachd(struct bpf_d *d)
364 {
365 struct bpf_d **p;
366 struct bpf_if *bp;
367
368 KASSERT(mutex_owned(&bpf_mtx));
369
370 bp = d->bd_bif;
371 /*
372 * Check if this descriptor had requested promiscuous mode.
373 * If so, turn it off.
374 */
375 if (d->bd_promisc) {
376 int error __diagused;
377
378 d->bd_promisc = 0;
379 /*
380 * Take device out of promiscuous mode. Since we were
381 * able to enter promiscuous mode, we should be able
382 * to turn it off. But we can get an error if
383 * the interface was configured down, so only panic
384 * if we don't get an unexpected error.
385 */
386 error = ifpromisc(bp->bif_ifp, 0);
387 #ifdef DIAGNOSTIC
388 if (error)
389 printf("%s: ifpromisc failed: %d", __func__, error);
390 #endif
391 }
392 /* Remove d from the interface's descriptor list. */
393 p = &bp->bif_dlist;
394 while (*p != d) {
395 p = &(*p)->bd_next;
396 if (*p == NULL)
397 panic("%s: descriptor not in list", __func__);
398 }
399 *p = (*p)->bd_next;
400 if (bp->bif_dlist == NULL)
401 /*
402 * Let the driver know that there are no more listeners.
403 */
404 *d->bd_bif->bif_driverp = NULL;
405 d->bd_bif = NULL;
406 }
407
408 static void
409 bpf_init(void)
410 {
411
412 mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE);
413
414 LIST_INIT(&bpf_list);
415
416 bpf_gstats.bs_recv = 0;
417 bpf_gstats.bs_drop = 0;
418 bpf_gstats.bs_capt = 0;
419
420 return;
421 }
422
423 /*
424 * bpfilterattach() is called at boot time. We don't need to do anything
425 * here, since any initialization will happen as part of module init code.
426 */
427 /* ARGSUSED */
428 void
429 bpfilterattach(int n)
430 {
431
432 }
433
434 /*
435 * Open ethernet device. Clones.
436 */
437 /* ARGSUSED */
438 int
439 bpfopen(dev_t dev, int flag, int mode, struct lwp *l)
440 {
441 struct bpf_d *d;
442 struct file *fp;
443 int error, fd;
444
445 /* falloc() will fill in the descriptor for us. */
446 if ((error = fd_allocfile(&fp, &fd)) != 0)
447 return error;
448
449 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK|M_ZERO);
450 d->bd_bufsize = bpf_bufsize;
451 d->bd_seesent = 1;
452 d->bd_feedback = 0;
453 d->bd_pid = l->l_proc->p_pid;
454 #ifdef _LP64
455 if (curproc->p_flag & PK_32)
456 d->bd_compat32 = 1;
457 #endif
458 getnanotime(&d->bd_btime);
459 d->bd_atime = d->bd_mtime = d->bd_btime;
460 callout_init(&d->bd_callout, 0);
461 selinit(&d->bd_sel);
462 d->bd_sih = softint_establish(SOFTINT_CLOCK, bpf_softintr, d);
463 d->bd_jitcode = NULL;
464
465 mutex_enter(&bpf_mtx);
466 LIST_INSERT_HEAD(&bpf_list, d, bd_list);
467 mutex_exit(&bpf_mtx);
468
469 return fd_clone(fp, fd, flag, &bpf_fileops, d);
470 }
471
472 /*
473 * Close the descriptor by detaching it from its interface,
474 * deallocating its buffers, and marking it free.
475 */
476 /* ARGSUSED */
477 static int
478 bpf_close(struct file *fp)
479 {
480 struct bpf_d *d;
481 int s;
482
483 KERNEL_LOCK(1, NULL);
484 mutex_enter(&bpf_mtx);
485
486 if ((d = fp->f_bpf) == NULL) {
487 mutex_exit(&bpf_mtx);
488 KERNEL_UNLOCK_ONE(NULL);
489 return 0;
490 }
491
492 /*
493 * Refresh the PID associated with this bpf file.
494 */
495 d->bd_pid = curproc->p_pid;
496
497 s = splnet();
498 if (d->bd_state == BPF_WAITING)
499 callout_stop(&d->bd_callout);
500 d->bd_state = BPF_IDLE;
501 if (d->bd_bif)
502 bpf_detachd(d);
503 splx(s);
504 bpf_freed(d);
505 LIST_REMOVE(d, bd_list);
506 fp->f_bpf = NULL;
507
508 mutex_exit(&bpf_mtx);
509 KERNEL_UNLOCK_ONE(NULL);
510
511 callout_destroy(&d->bd_callout);
512 seldestroy(&d->bd_sel);
513 softint_disestablish(d->bd_sih);
514 free(d, M_DEVBUF);
515
516 return (0);
517 }
518
519 /*
520 * Rotate the packet buffers in descriptor d. Move the store buffer
521 * into the hold slot, and the free buffer into the store slot.
522 * Zero the length of the new store buffer.
523 */
524 #define ROTATE_BUFFERS(d) \
525 (d)->bd_hbuf = (d)->bd_sbuf; \
526 (d)->bd_hlen = (d)->bd_slen; \
527 (d)->bd_sbuf = (d)->bd_fbuf; \
528 (d)->bd_slen = 0; \
529 (d)->bd_fbuf = NULL;
530 /*
531 * bpfread - read next chunk of packets from buffers
532 */
533 static int
534 bpf_read(struct file *fp, off_t *offp, struct uio *uio,
535 kauth_cred_t cred, int flags)
536 {
537 struct bpf_d *d = fp->f_bpf;
538 int timed_out;
539 int error;
540 int s;
541
542 getnanotime(&d->bd_atime);
543 /*
544 * Restrict application to use a buffer the same size as
545 * the kernel buffers.
546 */
547 if (uio->uio_resid != d->bd_bufsize)
548 return (EINVAL);
549
550 KERNEL_LOCK(1, NULL);
551 s = splnet();
552 if (d->bd_state == BPF_WAITING)
553 callout_stop(&d->bd_callout);
554 timed_out = (d->bd_state == BPF_TIMED_OUT);
555 d->bd_state = BPF_IDLE;
556 /*
557 * If the hold buffer is empty, then do a timed sleep, which
558 * ends when the timeout expires or when enough packets
559 * have arrived to fill the store buffer.
560 */
561 while (d->bd_hbuf == NULL) {
562 if (fp->f_flag & FNONBLOCK) {
563 if (d->bd_slen == 0) {
564 splx(s);
565 KERNEL_UNLOCK_ONE(NULL);
566 return (EWOULDBLOCK);
567 }
568 ROTATE_BUFFERS(d);
569 break;
570 }
571
572 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
573 /*
574 * A packet(s) either arrived since the previous
575 * read or arrived while we were asleep.
576 * Rotate the buffers and return what's here.
577 */
578 ROTATE_BUFFERS(d);
579 break;
580 }
581 error = tsleep(d, PRINET|PCATCH, "bpf",
582 d->bd_rtout);
583 if (error == EINTR || error == ERESTART) {
584 splx(s);
585 KERNEL_UNLOCK_ONE(NULL);
586 return (error);
587 }
588 if (error == EWOULDBLOCK) {
589 /*
590 * On a timeout, return what's in the buffer,
591 * which may be nothing. If there is something
592 * in the store buffer, we can rotate the buffers.
593 */
594 if (d->bd_hbuf)
595 /*
596 * We filled up the buffer in between
597 * getting the timeout and arriving
598 * here, so we don't need to rotate.
599 */
600 break;
601
602 if (d->bd_slen == 0) {
603 splx(s);
604 KERNEL_UNLOCK_ONE(NULL);
605 return (0);
606 }
607 ROTATE_BUFFERS(d);
608 break;
609 }
610 if (error != 0)
611 goto done;
612 }
613 /*
614 * At this point, we know we have something in the hold slot.
615 */
616 splx(s);
617
618 /*
619 * Move data from hold buffer into user space.
620 * We know the entire buffer is transferred since
621 * we checked above that the read buffer is bpf_bufsize bytes.
622 */
623 error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
624
625 s = splnet();
626 d->bd_fbuf = d->bd_hbuf;
627 d->bd_hbuf = NULL;
628 d->bd_hlen = 0;
629 done:
630 splx(s);
631 KERNEL_UNLOCK_ONE(NULL);
632 return (error);
633 }
634
635
636 /*
637 * If there are processes sleeping on this descriptor, wake them up.
638 */
639 static inline void
640 bpf_wakeup(struct bpf_d *d)
641 {
642 wakeup(d);
643 if (d->bd_async)
644 softint_schedule(d->bd_sih);
645 selnotify(&d->bd_sel, 0, 0);
646 }
647
648 static void
649 bpf_softintr(void *cookie)
650 {
651 struct bpf_d *d;
652
653 d = cookie;
654 if (d->bd_async)
655 fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL);
656 }
657
658 static void
659 bpf_timed_out(void *arg)
660 {
661 struct bpf_d *d = arg;
662 int s;
663
664 s = splnet();
665 if (d->bd_state == BPF_WAITING) {
666 d->bd_state = BPF_TIMED_OUT;
667 if (d->bd_slen != 0)
668 bpf_wakeup(d);
669 }
670 splx(s);
671 }
672
673
674 static int
675 bpf_write(struct file *fp, off_t *offp, struct uio *uio,
676 kauth_cred_t cred, int flags)
677 {
678 struct bpf_d *d = fp->f_bpf;
679 struct ifnet *ifp;
680 struct mbuf *m, *mc;
681 int error, s;
682 static struct sockaddr_storage dst;
683
684 m = NULL; /* XXX gcc */
685
686 KERNEL_LOCK(1, NULL);
687
688 if (d->bd_bif == NULL) {
689 KERNEL_UNLOCK_ONE(NULL);
690 return (ENXIO);
691 }
692 getnanotime(&d->bd_mtime);
693
694 ifp = d->bd_bif->bif_ifp;
695
696 if (uio->uio_resid == 0) {
697 KERNEL_UNLOCK_ONE(NULL);
698 return (0);
699 }
700
701 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m,
702 (struct sockaddr *) &dst);
703 if (error) {
704 KERNEL_UNLOCK_ONE(NULL);
705 return (error);
706 }
707
708 if (m->m_pkthdr.len > ifp->if_mtu) {
709 KERNEL_UNLOCK_ONE(NULL);
710 m_freem(m);
711 return (EMSGSIZE);
712 }
713
714 if (d->bd_hdrcmplt)
715 dst.ss_family = pseudo_AF_HDRCMPLT;
716
717 if (d->bd_feedback) {
718 mc = m_dup(m, 0, M_COPYALL, M_NOWAIT);
719 if (mc != NULL)
720 m_set_rcvif(mc, ifp);
721 /* Set M_PROMISC for outgoing packets to be discarded. */
722 if (1 /*d->bd_direction == BPF_D_INOUT*/)
723 m->m_flags |= M_PROMISC;
724 } else
725 mc = NULL;
726
727 s = splsoftnet();
728 error = if_output_lock(ifp, ifp, m, (struct sockaddr *) &dst, NULL);
729
730 if (mc != NULL) {
731 if (error == 0)
732 ifp->_if_input(ifp, mc);
733 else
734 m_freem(mc);
735 }
736 splx(s);
737 KERNEL_UNLOCK_ONE(NULL);
738 /*
739 * The driver frees the mbuf.
740 */
741 return (error);
742 }
743
744 /*
745 * Reset a descriptor by flushing its packet buffer and clearing the
746 * receive and drop counts. Should be called at splnet.
747 */
748 static void
749 reset_d(struct bpf_d *d)
750 {
751 if (d->bd_hbuf) {
752 /* Free the hold buffer. */
753 d->bd_fbuf = d->bd_hbuf;
754 d->bd_hbuf = NULL;
755 }
756 d->bd_slen = 0;
757 d->bd_hlen = 0;
758 d->bd_rcount = 0;
759 d->bd_dcount = 0;
760 d->bd_ccount = 0;
761 }
762
763 /*
764 * FIONREAD Check for read packet available.
765 * BIOCGBLEN Get buffer len [for read()].
766 * BIOCSETF Set ethernet read filter.
767 * BIOCFLUSH Flush read packet buffer.
768 * BIOCPROMISC Put interface into promiscuous mode.
769 * BIOCGDLT Get link layer type.
770 * BIOCGETIF Get interface name.
771 * BIOCSETIF Set interface.
772 * BIOCSRTIMEOUT Set read timeout.
773 * BIOCGRTIMEOUT Get read timeout.
774 * BIOCGSTATS Get packet stats.
775 * BIOCIMMEDIATE Set immediate mode.
776 * BIOCVERSION Get filter language version.
777 * BIOCGHDRCMPLT Get "header already complete" flag.
778 * BIOCSHDRCMPLT Set "header already complete" flag.
779 * BIOCSFEEDBACK Set packet feedback mode.
780 * BIOCGFEEDBACK Get packet feedback mode.
781 * BIOCGSEESENT Get "see sent packets" mode.
782 * BIOCSSEESENT Set "see sent packets" mode.
783 */
784 /* ARGSUSED */
785 static int
786 bpf_ioctl(struct file *fp, u_long cmd, void *addr)
787 {
788 struct bpf_d *d = fp->f_bpf;
789 int s, error = 0;
790
791 /*
792 * Refresh the PID associated with this bpf file.
793 */
794 KERNEL_LOCK(1, NULL);
795 d->bd_pid = curproc->p_pid;
796 #ifdef _LP64
797 if (curproc->p_flag & PK_32)
798 d->bd_compat32 = 1;
799 else
800 d->bd_compat32 = 0;
801 #endif
802
803 s = splnet();
804 if (d->bd_state == BPF_WAITING)
805 callout_stop(&d->bd_callout);
806 d->bd_state = BPF_IDLE;
807 splx(s);
808
809 switch (cmd) {
810
811 default:
812 error = EINVAL;
813 break;
814
815 /*
816 * Check for read packet available.
817 */
818 case FIONREAD:
819 {
820 int n;
821
822 s = splnet();
823 n = d->bd_slen;
824 if (d->bd_hbuf)
825 n += d->bd_hlen;
826 splx(s);
827
828 *(int *)addr = n;
829 break;
830 }
831
832 /*
833 * Get buffer len [for read()].
834 */
835 case BIOCGBLEN:
836 *(u_int *)addr = d->bd_bufsize;
837 break;
838
839 /*
840 * Set buffer length.
841 */
842 case BIOCSBLEN:
843 if (d->bd_bif != NULL)
844 error = EINVAL;
845 else {
846 u_int size = *(u_int *)addr;
847
848 if (size > bpf_maxbufsize)
849 *(u_int *)addr = size = bpf_maxbufsize;
850 else if (size < BPF_MINBUFSIZE)
851 *(u_int *)addr = size = BPF_MINBUFSIZE;
852 d->bd_bufsize = size;
853 }
854 break;
855
856 /*
857 * Set link layer read filter.
858 */
859 case BIOCSETF:
860 error = bpf_setf(d, addr);
861 break;
862
863 /*
864 * Flush read packet buffer.
865 */
866 case BIOCFLUSH:
867 s = splnet();
868 reset_d(d);
869 splx(s);
870 break;
871
872 /*
873 * Put interface into promiscuous mode.
874 */
875 case BIOCPROMISC:
876 if (d->bd_bif == NULL) {
877 /*
878 * No interface attached yet.
879 */
880 error = EINVAL;
881 break;
882 }
883 s = splnet();
884 if (d->bd_promisc == 0) {
885 error = ifpromisc(d->bd_bif->bif_ifp, 1);
886 if (error == 0)
887 d->bd_promisc = 1;
888 }
889 splx(s);
890 break;
891
892 /*
893 * Get device parameters.
894 */
895 case BIOCGDLT:
896 if (d->bd_bif == NULL)
897 error = EINVAL;
898 else
899 *(u_int *)addr = d->bd_bif->bif_dlt;
900 break;
901
902 /*
903 * Get a list of supported device parameters.
904 */
905 case BIOCGDLTLIST:
906 if (d->bd_bif == NULL)
907 error = EINVAL;
908 else
909 error = bpf_getdltlist(d, addr);
910 break;
911
912 /*
913 * Set device parameters.
914 */
915 case BIOCSDLT:
916 mutex_enter(&bpf_mtx);
917 if (d->bd_bif == NULL)
918 error = EINVAL;
919 else
920 error = bpf_setdlt(d, *(u_int *)addr);
921 mutex_exit(&bpf_mtx);
922 break;
923
924 /*
925 * Set interface name.
926 */
927 #ifdef OBIOCGETIF
928 case OBIOCGETIF:
929 #endif
930 case BIOCGETIF:
931 if (d->bd_bif == NULL)
932 error = EINVAL;
933 else
934 bpf_ifname(d->bd_bif->bif_ifp, addr);
935 break;
936
937 /*
938 * Set interface.
939 */
940 #ifdef OBIOCSETIF
941 case OBIOCSETIF:
942 #endif
943 case BIOCSETIF:
944 mutex_enter(&bpf_mtx);
945 error = bpf_setif(d, addr);
946 mutex_exit(&bpf_mtx);
947 break;
948
949 /*
950 * Set read timeout.
951 */
952 case BIOCSRTIMEOUT:
953 {
954 struct timeval *tv = addr;
955
956 /* Compute number of ticks. */
957 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
958 if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
959 d->bd_rtout = 1;
960 break;
961 }
962
963 #ifdef BIOCGORTIMEOUT
964 /*
965 * Get read timeout.
966 */
967 case BIOCGORTIMEOUT:
968 {
969 struct timeval50 *tv = addr;
970
971 tv->tv_sec = d->bd_rtout / hz;
972 tv->tv_usec = (d->bd_rtout % hz) * tick;
973 break;
974 }
975 #endif
976
977 #ifdef BIOCSORTIMEOUT
978 /*
979 * Set read timeout.
980 */
981 case BIOCSORTIMEOUT:
982 {
983 struct timeval50 *tv = addr;
984
985 /* Compute number of ticks. */
986 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
987 if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
988 d->bd_rtout = 1;
989 break;
990 }
991 #endif
992
993 /*
994 * Get read timeout.
995 */
996 case BIOCGRTIMEOUT:
997 {
998 struct timeval *tv = addr;
999
1000 tv->tv_sec = d->bd_rtout / hz;
1001 tv->tv_usec = (d->bd_rtout % hz) * tick;
1002 break;
1003 }
1004 /*
1005 * Get packet stats.
1006 */
1007 case BIOCGSTATS:
1008 {
1009 struct bpf_stat *bs = addr;
1010
1011 bs->bs_recv = d->bd_rcount;
1012 bs->bs_drop = d->bd_dcount;
1013 bs->bs_capt = d->bd_ccount;
1014 break;
1015 }
1016
1017 case BIOCGSTATSOLD:
1018 {
1019 struct bpf_stat_old *bs = addr;
1020
1021 bs->bs_recv = d->bd_rcount;
1022 bs->bs_drop = d->bd_dcount;
1023 break;
1024 }
1025
1026 /*
1027 * Set immediate mode.
1028 */
1029 case BIOCIMMEDIATE:
1030 d->bd_immediate = *(u_int *)addr;
1031 break;
1032
1033 case BIOCVERSION:
1034 {
1035 struct bpf_version *bv = addr;
1036
1037 bv->bv_major = BPF_MAJOR_VERSION;
1038 bv->bv_minor = BPF_MINOR_VERSION;
1039 break;
1040 }
1041
1042 case BIOCGHDRCMPLT: /* get "header already complete" flag */
1043 *(u_int *)addr = d->bd_hdrcmplt;
1044 break;
1045
1046 case BIOCSHDRCMPLT: /* set "header already complete" flag */
1047 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1048 break;
1049
1050 /*
1051 * Get "see sent packets" flag
1052 */
1053 case BIOCGSEESENT:
1054 *(u_int *)addr = d->bd_seesent;
1055 break;
1056
1057 /*
1058 * Set "see sent" packets flag
1059 */
1060 case BIOCSSEESENT:
1061 d->bd_seesent = *(u_int *)addr;
1062 break;
1063
1064 /*
1065 * Set "feed packets from bpf back to input" mode
1066 */
1067 case BIOCSFEEDBACK:
1068 d->bd_feedback = *(u_int *)addr;
1069 break;
1070
1071 /*
1072 * Get "feed packets from bpf back to input" mode
1073 */
1074 case BIOCGFEEDBACK:
1075 *(u_int *)addr = d->bd_feedback;
1076 break;
1077
1078 case FIONBIO: /* Non-blocking I/O */
1079 /*
1080 * No need to do anything special as we use IO_NDELAY in
1081 * bpfread() as an indication of whether or not to block
1082 * the read.
1083 */
1084 break;
1085
1086 case FIOASYNC: /* Send signal on receive packets */
1087 d->bd_async = *(int *)addr;
1088 break;
1089
1090 case TIOCSPGRP: /* Process or group to send signals to */
1091 case FIOSETOWN:
1092 error = fsetown(&d->bd_pgid, cmd, addr);
1093 break;
1094
1095 case TIOCGPGRP:
1096 case FIOGETOWN:
1097 error = fgetown(d->bd_pgid, cmd, addr);
1098 break;
1099 }
1100 KERNEL_UNLOCK_ONE(NULL);
1101 return (error);
1102 }
1103
1104 /*
1105 * Set d's packet filter program to fp. If this file already has a filter,
1106 * free it and replace it. Returns EINVAL for bogus requests.
1107 */
1108 static int
1109 bpf_setf(struct bpf_d *d, struct bpf_program *fp)
1110 {
1111 struct bpf_insn *fcode, *old;
1112 bpfjit_func_t jcode, oldj;
1113 size_t flen, size;
1114 int s;
1115
1116 jcode = NULL;
1117 flen = fp->bf_len;
1118
1119 if ((fp->bf_insns == NULL && flen) || flen > BPF_MAXINSNS) {
1120 return EINVAL;
1121 }
1122
1123 if (flen) {
1124 /*
1125 * Allocate the buffer, copy the byte-code from
1126 * userspace and validate it.
1127 */
1128 size = flen * sizeof(*fp->bf_insns);
1129 fcode = malloc(size, M_DEVBUF, M_WAITOK);
1130 if (copyin(fp->bf_insns, fcode, size) != 0 ||
1131 !bpf_validate(fcode, (int)flen)) {
1132 free(fcode, M_DEVBUF);
1133 return EINVAL;
1134 }
1135 membar_consumer();
1136 if (bpf_jit)
1137 jcode = bpf_jit_generate(NULL, fcode, flen);
1138 } else {
1139 fcode = NULL;
1140 }
1141
1142 s = splnet();
1143 old = d->bd_filter;
1144 d->bd_filter = fcode;
1145 oldj = d->bd_jitcode;
1146 d->bd_jitcode = jcode;
1147 reset_d(d);
1148 splx(s);
1149
1150 if (old) {
1151 free(old, M_DEVBUF);
1152 }
1153 if (oldj) {
1154 bpf_jit_freecode(oldj);
1155 }
1156
1157 return 0;
1158 }
1159
1160 /*
1161 * Detach a file from its current interface (if attached at all) and attach
1162 * to the interface indicated by the name stored in ifr.
1163 * Return an errno or 0.
1164 */
1165 static int
1166 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1167 {
1168 struct bpf_if *bp;
1169 char *cp;
1170 int unit_seen, i, s, error;
1171
1172 KASSERT(mutex_owned(&bpf_mtx));
1173 /*
1174 * Make sure the provided name has a unit number, and default
1175 * it to '0' if not specified.
1176 * XXX This is ugly ... do this differently?
1177 */
1178 unit_seen = 0;
1179 cp = ifr->ifr_name;
1180 cp[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */
1181 while (*cp++)
1182 if (*cp >= '0' && *cp <= '9')
1183 unit_seen = 1;
1184 if (!unit_seen) {
1185 /* Make sure to leave room for the '\0'. */
1186 for (i = 0; i < (IFNAMSIZ - 1); ++i) {
1187 if ((ifr->ifr_name[i] >= 'a' &&
1188 ifr->ifr_name[i] <= 'z') ||
1189 (ifr->ifr_name[i] >= 'A' &&
1190 ifr->ifr_name[i] <= 'Z'))
1191 continue;
1192 ifr->ifr_name[i] = '0';
1193 }
1194 }
1195
1196 /*
1197 * Look through attached interfaces for the named one.
1198 */
1199 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1200 struct ifnet *ifp = bp->bif_ifp;
1201
1202 if (ifp == NULL ||
1203 strcmp(ifp->if_xname, ifr->ifr_name) != 0)
1204 continue;
1205 /* skip additional entry */
1206 if (bp->bif_driverp != &ifp->if_bpf)
1207 continue;
1208 /*
1209 * We found the requested interface.
1210 * Allocate the packet buffers if we need to.
1211 * If we're already attached to requested interface,
1212 * just flush the buffer.
1213 */
1214 if (d->bd_sbuf == NULL) {
1215 error = bpf_allocbufs(d);
1216 if (error != 0)
1217 return (error);
1218 }
1219 s = splnet();
1220 if (bp != d->bd_bif) {
1221 if (d->bd_bif)
1222 /*
1223 * Detach if attached to something else.
1224 */
1225 bpf_detachd(d);
1226
1227 bpf_attachd(d, bp);
1228 }
1229 reset_d(d);
1230 splx(s);
1231 return (0);
1232 }
1233 /* Not found. */
1234 return (ENXIO);
1235 }
1236
1237 /*
1238 * Copy the interface name to the ifreq.
1239 */
1240 static void
1241 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr)
1242 {
1243 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
1244 }
1245
1246 static int
1247 bpf_stat(struct file *fp, struct stat *st)
1248 {
1249 struct bpf_d *d = fp->f_bpf;
1250
1251 (void)memset(st, 0, sizeof(*st));
1252 KERNEL_LOCK(1, NULL);
1253 st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid);
1254 st->st_atimespec = d->bd_atime;
1255 st->st_mtimespec = d->bd_mtime;
1256 st->st_ctimespec = st->st_birthtimespec = d->bd_btime;
1257 st->st_uid = kauth_cred_geteuid(fp->f_cred);
1258 st->st_gid = kauth_cred_getegid(fp->f_cred);
1259 st->st_mode = S_IFCHR;
1260 KERNEL_UNLOCK_ONE(NULL);
1261 return 0;
1262 }
1263
1264 /*
1265 * Support for poll() system call
1266 *
1267 * Return true iff the specific operation will not block indefinitely - with
1268 * the assumption that it is safe to positively acknowledge a request for the
1269 * ability to write to the BPF device.
1270 * Otherwise, return false but make a note that a selnotify() must be done.
1271 */
1272 static int
1273 bpf_poll(struct file *fp, int events)
1274 {
1275 struct bpf_d *d = fp->f_bpf;
1276 int s = splnet();
1277 int revents;
1278
1279 /*
1280 * Refresh the PID associated with this bpf file.
1281 */
1282 KERNEL_LOCK(1, NULL);
1283 d->bd_pid = curproc->p_pid;
1284
1285 revents = events & (POLLOUT | POLLWRNORM);
1286 if (events & (POLLIN | POLLRDNORM)) {
1287 /*
1288 * An imitation of the FIONREAD ioctl code.
1289 */
1290 if (d->bd_hlen != 0 ||
1291 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1292 d->bd_slen != 0)) {
1293 revents |= events & (POLLIN | POLLRDNORM);
1294 } else {
1295 selrecord(curlwp, &d->bd_sel);
1296 /* Start the read timeout if necessary */
1297 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1298 callout_reset(&d->bd_callout, d->bd_rtout,
1299 bpf_timed_out, d);
1300 d->bd_state = BPF_WAITING;
1301 }
1302 }
1303 }
1304
1305 KERNEL_UNLOCK_ONE(NULL);
1306 splx(s);
1307 return (revents);
1308 }
1309
1310 static void
1311 filt_bpfrdetach(struct knote *kn)
1312 {
1313 struct bpf_d *d = kn->kn_hook;
1314 int s;
1315
1316 KERNEL_LOCK(1, NULL);
1317 s = splnet();
1318 SLIST_REMOVE(&d->bd_sel.sel_klist, kn, knote, kn_selnext);
1319 splx(s);
1320 KERNEL_UNLOCK_ONE(NULL);
1321 }
1322
1323 static int
1324 filt_bpfread(struct knote *kn, long hint)
1325 {
1326 struct bpf_d *d = kn->kn_hook;
1327 int rv;
1328
1329 KERNEL_LOCK(1, NULL);
1330 kn->kn_data = d->bd_hlen;
1331 if (d->bd_immediate)
1332 kn->kn_data += d->bd_slen;
1333 rv = (kn->kn_data > 0);
1334 KERNEL_UNLOCK_ONE(NULL);
1335 return rv;
1336 }
1337
1338 static const struct filterops bpfread_filtops =
1339 { 1, NULL, filt_bpfrdetach, filt_bpfread };
1340
1341 static int
1342 bpf_kqfilter(struct file *fp, struct knote *kn)
1343 {
1344 struct bpf_d *d = fp->f_bpf;
1345 struct klist *klist;
1346 int s;
1347
1348 KERNEL_LOCK(1, NULL);
1349
1350 switch (kn->kn_filter) {
1351 case EVFILT_READ:
1352 klist = &d->bd_sel.sel_klist;
1353 kn->kn_fop = &bpfread_filtops;
1354 break;
1355
1356 default:
1357 KERNEL_UNLOCK_ONE(NULL);
1358 return (EINVAL);
1359 }
1360
1361 kn->kn_hook = d;
1362
1363 s = splnet();
1364 SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1365 splx(s);
1366 KERNEL_UNLOCK_ONE(NULL);
1367
1368 return (0);
1369 }
1370
1371 /*
1372 * Copy data from an mbuf chain into a buffer. This code is derived
1373 * from m_copydata in sys/uipc_mbuf.c.
1374 */
1375 static void *
1376 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len)
1377 {
1378 const struct mbuf *m;
1379 u_int count;
1380 u_char *dst;
1381
1382 m = src_arg;
1383 dst = dst_arg;
1384 while (len > 0) {
1385 if (m == NULL)
1386 panic("bpf_mcpy");
1387 count = min(m->m_len, len);
1388 memcpy(dst, mtod(m, const void *), count);
1389 m = m->m_next;
1390 dst += count;
1391 len -= count;
1392 }
1393 return dst_arg;
1394 }
1395
1396 /*
1397 * Dispatch a packet to all the listeners on interface bp.
1398 *
1399 * pkt pointer to the packet, either a data buffer or an mbuf chain
1400 * buflen buffer length, if pkt is a data buffer
1401 * cpfn a function that can copy pkt into the listener's buffer
1402 * pktlen length of the packet
1403 * rcv true if packet came in
1404 */
1405 static inline void
1406 bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t),
1407 void *pkt, u_int pktlen, u_int buflen, const bool rcv)
1408 {
1409 uint32_t mem[BPF_MEMWORDS];
1410 bpf_args_t args = {
1411 .pkt = (const uint8_t *)pkt,
1412 .wirelen = pktlen,
1413 .buflen = buflen,
1414 .mem = mem,
1415 .arg = NULL
1416 };
1417 bool gottime = false;
1418 struct timespec ts;
1419
1420 /*
1421 * Note that the IPL does not have to be raised at this point.
1422 * The only problem that could arise here is that if two different
1423 * interfaces shared any data. This is not the case.
1424 */
1425 for (struct bpf_d *d = bp->bif_dlist; d != NULL; d = d->bd_next) {
1426 u_int slen;
1427
1428 if (!d->bd_seesent && !rcv) {
1429 continue;
1430 }
1431 d->bd_rcount++;
1432 bpf_gstats.bs_recv++;
1433
1434 if (d->bd_jitcode)
1435 slen = d->bd_jitcode(NULL, &args);
1436 else
1437 slen = bpf_filter_ext(NULL, d->bd_filter, &args);
1438
1439 if (!slen) {
1440 continue;
1441 }
1442 if (!gottime) {
1443 gottime = true;
1444 nanotime(&ts);
1445 }
1446 catchpacket(d, pkt, pktlen, slen, cpfn, &ts);
1447 }
1448 }
1449
1450 /*
1451 * Incoming linkage from device drivers. Process the packet pkt, of length
1452 * pktlen, which is stored in a contiguous buffer. The packet is parsed
1453 * by each process' filter, and if accepted, stashed into the corresponding
1454 * buffer.
1455 */
1456 static void
1457 _bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1458 {
1459
1460 bpf_deliver(bp, memcpy, pkt, pktlen, pktlen, true);
1461 }
1462
1463 /*
1464 * Incoming linkage from device drivers, when the head of the packet is in
1465 * a buffer, and the tail is in an mbuf chain.
1466 */
1467 static void
1468 _bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1469 {
1470 u_int pktlen;
1471 struct mbuf mb;
1472
1473 /* Skip outgoing duplicate packets. */
1474 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) {
1475 m->m_flags &= ~M_PROMISC;
1476 return;
1477 }
1478
1479 pktlen = m_length(m) + dlen;
1480
1481 /*
1482 * Craft on-stack mbuf suitable for passing to bpf_filter.
1483 * Note that we cut corners here; we only setup what's
1484 * absolutely needed--this mbuf should never go anywhere else.
1485 */
1486 (void)memset(&mb, 0, sizeof(mb));
1487 mb.m_next = m;
1488 mb.m_data = data;
1489 mb.m_len = dlen;
1490
1491 bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, m->m_pkthdr.rcvif_index != 0);
1492 }
1493
1494 /*
1495 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1496 */
1497 static void
1498 _bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1499 {
1500 void *(*cpfn)(void *, const void *, size_t);
1501 u_int pktlen, buflen;
1502 void *marg;
1503
1504 /* Skip outgoing duplicate packets. */
1505 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) {
1506 m->m_flags &= ~M_PROMISC;
1507 return;
1508 }
1509
1510 pktlen = m_length(m);
1511
1512 if (pktlen == m->m_len) {
1513 cpfn = (void *)memcpy;
1514 marg = mtod(m, void *);
1515 buflen = pktlen;
1516 } else {
1517 cpfn = bpf_mcpy;
1518 marg = m;
1519 buflen = 0;
1520 }
1521
1522 bpf_deliver(bp, cpfn, marg, pktlen, buflen, m->m_pkthdr.rcvif_index != 0);
1523 }
1524
1525 /*
1526 * We need to prepend the address family as
1527 * a four byte field. Cons up a dummy header
1528 * to pacify bpf. This is safe because bpf
1529 * will only read from the mbuf (i.e., it won't
1530 * try to free it or keep a pointer a to it).
1531 */
1532 static void
1533 _bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m)
1534 {
1535 struct mbuf m0;
1536
1537 m0.m_flags = 0;
1538 m0.m_next = m;
1539 m0.m_len = 4;
1540 m0.m_data = (char *)⁡
1541
1542 _bpf_mtap(bp, &m0);
1543 }
1544
1545 /*
1546 * Put the SLIP pseudo-"link header" in place.
1547 * Note this M_PREPEND() should never fail,
1548 * swince we know we always have enough space
1549 * in the input buffer.
1550 */
1551 static void
1552 _bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m)
1553 {
1554 int s;
1555 u_char *hp;
1556
1557 M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT);
1558 if (*m == NULL)
1559 return;
1560
1561 hp = mtod(*m, u_char *);
1562 hp[SLX_DIR] = SLIPDIR_IN;
1563 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
1564
1565 s = splnet();
1566 _bpf_mtap(bp, *m);
1567 splx(s);
1568
1569 m_adj(*m, SLIP_HDRLEN);
1570 }
1571
1572 /*
1573 * Put the SLIP pseudo-"link header" in
1574 * place. The compressed header is now
1575 * at the beginning of the mbuf.
1576 */
1577 static void
1578 _bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m)
1579 {
1580 struct mbuf m0;
1581 u_char *hp;
1582 int s;
1583
1584 m0.m_flags = 0;
1585 m0.m_next = m;
1586 m0.m_data = m0.m_dat;
1587 m0.m_len = SLIP_HDRLEN;
1588
1589 hp = mtod(&m0, u_char *);
1590
1591 hp[SLX_DIR] = SLIPDIR_OUT;
1592 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
1593
1594 s = splnet();
1595 _bpf_mtap(bp, &m0);
1596 splx(s);
1597 m_freem(m);
1598 }
1599
1600 static struct mbuf *
1601 bpf_mbuf_enqueue(struct bpf_if *bp, struct mbuf *m)
1602 {
1603 struct mbuf *dup;
1604
1605 dup = m_dup(m, 0, M_COPYALL, M_NOWAIT);
1606 if (dup == NULL)
1607 return NULL;
1608
1609 if (bp->bif_mbuf_tail != NULL) {
1610 bp->bif_mbuf_tail->m_nextpkt = dup;
1611 } else {
1612 bp->bif_mbuf_head = dup;
1613 }
1614 bp->bif_mbuf_tail = dup;
1615 #ifdef BPF_MTAP_SOFTINT_DEBUG
1616 log(LOG_DEBUG, "%s: enqueued mbuf=%p to %s\n",
1617 __func__, dup, bp->bif_ifp->if_xname);
1618 #endif
1619
1620 return dup;
1621 }
1622
1623 static struct mbuf *
1624 bpf_mbuf_dequeue(struct bpf_if *bp)
1625 {
1626 struct mbuf *m;
1627 int s;
1628
1629 s = splnet();
1630 m = bp->bif_mbuf_head;
1631 if (m != NULL) {
1632 bp->bif_mbuf_head = m->m_nextpkt;
1633 m->m_nextpkt = NULL;
1634
1635 if (bp->bif_mbuf_head == NULL)
1636 bp->bif_mbuf_tail = NULL;
1637 #ifdef BPF_MTAP_SOFTINT_DEBUG
1638 log(LOG_DEBUG, "%s: dequeued mbuf=%p from %s\n",
1639 __func__, m, bp->bif_ifp->if_xname);
1640 #endif
1641 }
1642 splx(s);
1643
1644 return m;
1645 }
1646
1647 static void
1648 bpf_mtap_si(void *arg)
1649 {
1650 struct bpf_if *bp = arg;
1651 struct mbuf *m;
1652
1653 while ((m = bpf_mbuf_dequeue(bp)) != NULL) {
1654 #ifdef BPF_MTAP_SOFTINT_DEBUG
1655 log(LOG_DEBUG, "%s: tapping mbuf=%p on %s\n",
1656 __func__, m, bp->bif_ifp->if_xname);
1657 #endif
1658 #ifndef NET_MPSAFE
1659 KERNEL_LOCK(1, NULL);
1660 #endif
1661 bpf_ops->bpf_mtap(bp, m);
1662 #ifndef NET_MPSAFE
1663 KERNEL_UNLOCK_ONE(NULL);
1664 #endif
1665 m_freem(m);
1666 }
1667 }
1668
1669 static void
1670 _bpf_mtap_softint(struct ifnet *ifp, struct mbuf *m)
1671 {
1672 struct bpf_if *bp = ifp->if_bpf;
1673 struct mbuf *dup;
1674
1675 KASSERT(cpu_intr_p());
1676
1677 /* To avoid extra invocations of the softint */
1678 if (bp->bif_dlist == NULL)
1679 return;
1680 KASSERT(bp->bif_si != NULL);
1681
1682 dup = bpf_mbuf_enqueue(bp, m);
1683 if (dup != NULL)
1684 softint_schedule(bp->bif_si);
1685 }
1686
1687 static int
1688 bpf_hdrlen(struct bpf_d *d)
1689 {
1690 int hdrlen = d->bd_bif->bif_hdrlen;
1691 /*
1692 * Compute the length of the bpf header. This is not necessarily
1693 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1694 * that the network layer header begins on a longword boundary (for
1695 * performance reasons and to alleviate alignment restrictions).
1696 */
1697 #ifdef _LP64
1698 if (d->bd_compat32)
1699 return (BPF_WORDALIGN32(hdrlen + SIZEOF_BPF_HDR32) - hdrlen);
1700 else
1701 #endif
1702 return (BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen);
1703 }
1704
1705 /*
1706 * Move the packet data from interface memory (pkt) into the
1707 * store buffer. Call the wakeup functions if it's time to wakeup
1708 * a listener (buffer full), "cpfn" is the routine called to do the
1709 * actual data transfer. memcpy is passed in to copy contiguous chunks,
1710 * while bpf_mcpy is passed in to copy mbuf chains. In the latter case,
1711 * pkt is really an mbuf.
1712 */
1713 static void
1714 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1715 void *(*cpfn)(void *, const void *, size_t), struct timespec *ts)
1716 {
1717 char *h;
1718 int totlen, curlen, caplen;
1719 int hdrlen = bpf_hdrlen(d);
1720 int do_wakeup = 0;
1721
1722 ++d->bd_ccount;
1723 ++bpf_gstats.bs_capt;
1724 /*
1725 * Figure out how many bytes to move. If the packet is
1726 * greater or equal to the snapshot length, transfer that
1727 * much. Otherwise, transfer the whole packet (unless
1728 * we hit the buffer size limit).
1729 */
1730 totlen = hdrlen + min(snaplen, pktlen);
1731 if (totlen > d->bd_bufsize)
1732 totlen = d->bd_bufsize;
1733 /*
1734 * If we adjusted totlen to fit the bufsize, it could be that
1735 * totlen is smaller than hdrlen because of the link layer header.
1736 */
1737 caplen = totlen - hdrlen;
1738 if (caplen < 0)
1739 caplen = 0;
1740
1741 /*
1742 * Round up the end of the previous packet to the next longword.
1743 */
1744 #ifdef _LP64
1745 if (d->bd_compat32)
1746 curlen = BPF_WORDALIGN32(d->bd_slen);
1747 else
1748 #endif
1749 curlen = BPF_WORDALIGN(d->bd_slen);
1750 if (curlen + totlen > d->bd_bufsize) {
1751 /*
1752 * This packet will overflow the storage buffer.
1753 * Rotate the buffers if we can, then wakeup any
1754 * pending reads.
1755 */
1756 if (d->bd_fbuf == NULL) {
1757 /*
1758 * We haven't completed the previous read yet,
1759 * so drop the packet.
1760 */
1761 ++d->bd_dcount;
1762 ++bpf_gstats.bs_drop;
1763 return;
1764 }
1765 ROTATE_BUFFERS(d);
1766 do_wakeup = 1;
1767 curlen = 0;
1768 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
1769 /*
1770 * Immediate mode is set, or the read timeout has
1771 * already expired during a select call. A packet
1772 * arrived, so the reader should be woken up.
1773 */
1774 do_wakeup = 1;
1775 }
1776
1777 /*
1778 * Append the bpf header.
1779 */
1780 h = (char *)d->bd_sbuf + curlen;
1781 #ifdef _LP64
1782 if (d->bd_compat32) {
1783 struct bpf_hdr32 *hp32;
1784
1785 hp32 = (struct bpf_hdr32 *)h;
1786 hp32->bh_tstamp.tv_sec = ts->tv_sec;
1787 hp32->bh_tstamp.tv_usec = ts->tv_nsec / 1000;
1788 hp32->bh_datalen = pktlen;
1789 hp32->bh_hdrlen = hdrlen;
1790 hp32->bh_caplen = caplen;
1791 } else
1792 #endif
1793 {
1794 struct bpf_hdr *hp;
1795
1796 hp = (struct bpf_hdr *)h;
1797 hp->bh_tstamp.tv_sec = ts->tv_sec;
1798 hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000;
1799 hp->bh_datalen = pktlen;
1800 hp->bh_hdrlen = hdrlen;
1801 hp->bh_caplen = caplen;
1802 }
1803
1804 /*
1805 * Copy the packet data into the store buffer and update its length.
1806 */
1807 (*cpfn)(h + hdrlen, pkt, caplen);
1808 d->bd_slen = curlen + totlen;
1809
1810 /*
1811 * Call bpf_wakeup after bd_slen has been updated so that kevent(2)
1812 * will cause filt_bpfread() to be called with it adjusted.
1813 */
1814 if (do_wakeup)
1815 bpf_wakeup(d);
1816 }
1817
1818 /*
1819 * Initialize all nonzero fields of a descriptor.
1820 */
1821 static int
1822 bpf_allocbufs(struct bpf_d *d)
1823 {
1824
1825 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1826 if (!d->bd_fbuf)
1827 return (ENOBUFS);
1828 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1829 if (!d->bd_sbuf) {
1830 free(d->bd_fbuf, M_DEVBUF);
1831 return (ENOBUFS);
1832 }
1833 d->bd_slen = 0;
1834 d->bd_hlen = 0;
1835 return (0);
1836 }
1837
1838 /*
1839 * Free buffers currently in use by a descriptor.
1840 * Called on close.
1841 */
1842 static void
1843 bpf_freed(struct bpf_d *d)
1844 {
1845 /*
1846 * We don't need to lock out interrupts since this descriptor has
1847 * been detached from its interface and it yet hasn't been marked
1848 * free.
1849 */
1850 if (d->bd_sbuf != NULL) {
1851 free(d->bd_sbuf, M_DEVBUF);
1852 if (d->bd_hbuf != NULL)
1853 free(d->bd_hbuf, M_DEVBUF);
1854 if (d->bd_fbuf != NULL)
1855 free(d->bd_fbuf, M_DEVBUF);
1856 }
1857 if (d->bd_filter)
1858 free(d->bd_filter, M_DEVBUF);
1859
1860 if (d->bd_jitcode != NULL) {
1861 bpf_jit_freecode(d->bd_jitcode);
1862 }
1863 }
1864
1865 /*
1866 * Attach an interface to bpf. dlt is the link layer type;
1867 * hdrlen is the fixed size of the link header for the specified dlt
1868 * (variable length headers not yet supported).
1869 */
1870 static void
1871 _bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1872 {
1873 struct bpf_if *bp;
1874 bp = malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT);
1875 if (bp == NULL)
1876 panic("bpfattach");
1877
1878 mutex_enter(&bpf_mtx);
1879 bp->bif_dlist = NULL;
1880 bp->bif_driverp = driverp;
1881 bp->bif_ifp = ifp;
1882 bp->bif_dlt = dlt;
1883 bp->bif_si = NULL;
1884
1885 bp->bif_next = bpf_iflist;
1886 bpf_iflist = bp;
1887
1888 *bp->bif_driverp = NULL;
1889
1890 bp->bif_hdrlen = hdrlen;
1891 mutex_exit(&bpf_mtx);
1892 #if 0
1893 printf("bpf: %s attached\n", ifp->if_xname);
1894 #endif
1895 }
1896
1897 static void
1898 _bpf_mtap_softint_init(struct ifnet *ifp)
1899 {
1900 struct bpf_if *bp;
1901
1902 mutex_enter(&bpf_mtx);
1903 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1904 if (bp->bif_ifp != ifp)
1905 continue;
1906
1907 bp->bif_mbuf_head = NULL;
1908 bp->bif_mbuf_tail = NULL;
1909 bp->bif_si = softint_establish(SOFTINT_NET, bpf_mtap_si, bp);
1910 if (bp->bif_si == NULL)
1911 panic("%s: softint_establish() failed", __func__);
1912 break;
1913 }
1914 mutex_exit(&bpf_mtx);
1915
1916 if (bp == NULL)
1917 panic("%s: no bpf_if found for %s", __func__, ifp->if_xname);
1918 }
1919
1920 /*
1921 * Remove an interface from bpf.
1922 */
1923 static void
1924 _bpfdetach(struct ifnet *ifp)
1925 {
1926 struct bpf_if *bp, **pbp;
1927 struct bpf_d *d;
1928 int s;
1929
1930 mutex_enter(&bpf_mtx);
1931 /* Nuke the vnodes for any open instances */
1932 LIST_FOREACH(d, &bpf_list, bd_list) {
1933 if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) {
1934 /*
1935 * Detach the descriptor from an interface now.
1936 * It will be free'ed later by close routine.
1937 */
1938 s = splnet();
1939 d->bd_promisc = 0; /* we can't touch device. */
1940 bpf_detachd(d);
1941 splx(s);
1942 }
1943 }
1944
1945 again:
1946 for (bp = bpf_iflist, pbp = &bpf_iflist;
1947 bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) {
1948 if (bp->bif_ifp == ifp) {
1949 *pbp = bp->bif_next;
1950 if (bp->bif_si != NULL) {
1951 s = splnet();
1952 while (bp->bif_mbuf_head != NULL) {
1953 struct mbuf *m = bp->bif_mbuf_head;
1954 bp->bif_mbuf_head = m->m_nextpkt;
1955 m_freem(m);
1956 }
1957 splx(s);
1958 softint_disestablish(bp->bif_si);
1959 }
1960 free(bp, M_DEVBUF);
1961 goto again;
1962 }
1963 }
1964 mutex_exit(&bpf_mtx);
1965 }
1966
1967 /*
1968 * Change the data link type of a interface.
1969 */
1970 static void
1971 _bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1972 {
1973 struct bpf_if *bp;
1974
1975 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1976 if (bp->bif_driverp == &ifp->if_bpf)
1977 break;
1978 }
1979 if (bp == NULL)
1980 panic("bpf_change_type");
1981
1982 bp->bif_dlt = dlt;
1983
1984 bp->bif_hdrlen = hdrlen;
1985 }
1986
1987 /*
1988 * Get a list of available data link type of the interface.
1989 */
1990 static int
1991 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1992 {
1993 int n, error;
1994 struct ifnet *ifp;
1995 struct bpf_if *bp;
1996
1997 ifp = d->bd_bif->bif_ifp;
1998 n = 0;
1999 error = 0;
2000 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
2001 if (bp->bif_ifp != ifp)
2002 continue;
2003 if (bfl->bfl_list != NULL) {
2004 if (n >= bfl->bfl_len)
2005 return ENOMEM;
2006 error = copyout(&bp->bif_dlt,
2007 bfl->bfl_list + n, sizeof(u_int));
2008 }
2009 n++;
2010 }
2011 bfl->bfl_len = n;
2012 return error;
2013 }
2014
2015 /*
2016 * Set the data link type of a BPF instance.
2017 */
2018 static int
2019 bpf_setdlt(struct bpf_d *d, u_int dlt)
2020 {
2021 int s, error, opromisc;
2022 struct ifnet *ifp;
2023 struct bpf_if *bp;
2024
2025 KASSERT(mutex_owned(&bpf_mtx));
2026
2027 if (d->bd_bif->bif_dlt == dlt)
2028 return 0;
2029 ifp = d->bd_bif->bif_ifp;
2030 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
2031 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2032 break;
2033 }
2034 if (bp == NULL)
2035 return EINVAL;
2036 s = splnet();
2037 opromisc = d->bd_promisc;
2038 bpf_detachd(d);
2039 bpf_attachd(d, bp);
2040 reset_d(d);
2041 if (opromisc) {
2042 error = ifpromisc(bp->bif_ifp, 1);
2043 if (error)
2044 printf("%s: bpf_setdlt: ifpromisc failed (%d)\n",
2045 bp->bif_ifp->if_xname, error);
2046 else
2047 d->bd_promisc = 1;
2048 }
2049 splx(s);
2050 return 0;
2051 }
2052
2053 static int
2054 sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS)
2055 {
2056 int newsize, error;
2057 struct sysctlnode node;
2058
2059 node = *rnode;
2060 node.sysctl_data = &newsize;
2061 newsize = bpf_maxbufsize;
2062 error = sysctl_lookup(SYSCTLFN_CALL(&node));
2063 if (error || newp == NULL)
2064 return (error);
2065
2066 if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE)
2067 return (EINVAL);
2068
2069 bpf_maxbufsize = newsize;
2070
2071 return (0);
2072 }
2073
2074 #if defined(MODULAR) || defined(BPFJIT)
2075 static int
2076 sysctl_net_bpf_jit(SYSCTLFN_ARGS)
2077 {
2078 bool newval;
2079 int error;
2080 struct sysctlnode node;
2081
2082 node = *rnode;
2083 node.sysctl_data = &newval;
2084 newval = bpf_jit;
2085 error = sysctl_lookup(SYSCTLFN_CALL(&node));
2086 if (error != 0 || newp == NULL)
2087 return error;
2088
2089 bpf_jit = newval;
2090
2091 /*
2092 * Do a full sync to publish new bpf_jit value and
2093 * update bpfjit_module_ops.bj_generate_code variable.
2094 */
2095 membar_sync();
2096
2097 if (newval && bpfjit_module_ops.bj_generate_code == NULL) {
2098 printf("JIT compilation is postponed "
2099 "until after bpfjit module is loaded\n");
2100 }
2101
2102 return 0;
2103 }
2104 #endif
2105
2106 static int
2107 sysctl_net_bpf_peers(SYSCTLFN_ARGS)
2108 {
2109 int error, elem_count;
2110 struct bpf_d *dp;
2111 struct bpf_d_ext dpe;
2112 size_t len, needed, elem_size, out_size;
2113 char *sp;
2114
2115 if (namelen == 1 && name[0] == CTL_QUERY)
2116 return (sysctl_query(SYSCTLFN_CALL(rnode)));
2117
2118 if (namelen != 2)
2119 return (EINVAL);
2120
2121 /* BPF peers is privileged information. */
2122 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
2123 KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL);
2124 if (error)
2125 return (EPERM);
2126
2127 len = (oldp != NULL) ? *oldlenp : 0;
2128 sp = oldp;
2129 elem_size = name[0];
2130 elem_count = name[1];
2131 out_size = MIN(sizeof(dpe), elem_size);
2132 needed = 0;
2133
2134 if (elem_size < 1 || elem_count < 0)
2135 return (EINVAL);
2136
2137 mutex_enter(&bpf_mtx);
2138 LIST_FOREACH(dp, &bpf_list, bd_list) {
2139 if (len >= elem_size && elem_count > 0) {
2140 #define BPF_EXT(field) dpe.bde_ ## field = dp->bd_ ## field
2141 BPF_EXT(bufsize);
2142 BPF_EXT(promisc);
2143 BPF_EXT(state);
2144 BPF_EXT(immediate);
2145 BPF_EXT(hdrcmplt);
2146 BPF_EXT(seesent);
2147 BPF_EXT(pid);
2148 BPF_EXT(rcount);
2149 BPF_EXT(dcount);
2150 BPF_EXT(ccount);
2151 #undef BPF_EXT
2152 if (dp->bd_bif)
2153 (void)strlcpy(dpe.bde_ifname,
2154 dp->bd_bif->bif_ifp->if_xname,
2155 IFNAMSIZ - 1);
2156 else
2157 dpe.bde_ifname[0] = '\0';
2158
2159 error = copyout(&dpe, sp, out_size);
2160 if (error)
2161 break;
2162 sp += elem_size;
2163 len -= elem_size;
2164 }
2165 needed += elem_size;
2166 if (elem_count > 0 && elem_count != INT_MAX)
2167 elem_count--;
2168 }
2169 mutex_exit(&bpf_mtx);
2170
2171 *oldlenp = needed;
2172
2173 return (error);
2174 }
2175
2176 static struct sysctllog *bpf_sysctllog;
2177 static void
2178 sysctl_net_bpf_setup(void)
2179 {
2180 const struct sysctlnode *node;
2181
2182 node = NULL;
2183 sysctl_createv(&bpf_sysctllog, 0, NULL, &node,
2184 CTLFLAG_PERMANENT,
2185 CTLTYPE_NODE, "bpf",
2186 SYSCTL_DESCR("BPF options"),
2187 NULL, 0, NULL, 0,
2188 CTL_NET, CTL_CREATE, CTL_EOL);
2189 if (node != NULL) {
2190 #if defined(MODULAR) || defined(BPFJIT)
2191 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2192 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2193 CTLTYPE_BOOL, "jit",
2194 SYSCTL_DESCR("Toggle Just-In-Time compilation"),
2195 sysctl_net_bpf_jit, 0, &bpf_jit, 0,
2196 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2197 #endif
2198 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2199 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2200 CTLTYPE_INT, "maxbufsize",
2201 SYSCTL_DESCR("Maximum size for data capture buffer"),
2202 sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0,
2203 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2204 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2205 CTLFLAG_PERMANENT,
2206 CTLTYPE_STRUCT, "stats",
2207 SYSCTL_DESCR("BPF stats"),
2208 NULL, 0, &bpf_gstats, sizeof(bpf_gstats),
2209 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2210 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2211 CTLFLAG_PERMANENT,
2212 CTLTYPE_STRUCT, "peers",
2213 SYSCTL_DESCR("BPF peers"),
2214 sysctl_net_bpf_peers, 0, NULL, 0,
2215 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2216 }
2217
2218 }
2219
2220 struct bpf_ops bpf_ops_kernel = {
2221 .bpf_attach = _bpfattach,
2222 .bpf_detach = _bpfdetach,
2223 .bpf_change_type = _bpf_change_type,
2224
2225 .bpf_tap = _bpf_tap,
2226 .bpf_mtap = _bpf_mtap,
2227 .bpf_mtap2 = _bpf_mtap2,
2228 .bpf_mtap_af = _bpf_mtap_af,
2229 .bpf_mtap_sl_in = _bpf_mtap_sl_in,
2230 .bpf_mtap_sl_out = _bpf_mtap_sl_out,
2231
2232 .bpf_mtap_softint = _bpf_mtap_softint,
2233 .bpf_mtap_softint_init = _bpf_mtap_softint_init,
2234 };
2235
2236 MODULE(MODULE_CLASS_DRIVER, bpf, "bpf_filter");
2237
2238 static int
2239 bpf_modcmd(modcmd_t cmd, void *arg)
2240 {
2241 #ifdef _MODULE
2242 devmajor_t bmajor, cmajor;
2243 #endif
2244 int error = 0;
2245
2246 switch (cmd) {
2247 case MODULE_CMD_INIT:
2248 bpf_init();
2249 #ifdef _MODULE
2250 bmajor = cmajor = NODEVMAJOR;
2251 error = devsw_attach("bpf", NULL, &bmajor,
2252 &bpf_cdevsw, &cmajor);
2253 if (error)
2254 break;
2255 #endif
2256
2257 bpf_ops_handover_enter(&bpf_ops_kernel);
2258 atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel);
2259 bpf_ops_handover_exit();
2260 sysctl_net_bpf_setup();
2261 break;
2262
2263 case MODULE_CMD_FINI:
2264 /*
2265 * While there is no reference counting for bpf callers,
2266 * unload could at least in theory be done similarly to
2267 * system call disestablishment. This should even be
2268 * a little simpler:
2269 *
2270 * 1) replace op vector with stubs
2271 * 2) post update to all cpus with xc
2272 * 3) check that nobody is in bpf anymore
2273 * (it's doubtful we'd want something like l_sysent,
2274 * but we could do something like *signed* percpu
2275 * counters. if the sum is 0, we're good).
2276 * 4) if fail, unroll changes
2277 *
2278 * NOTE: change won't be atomic to the outside. some
2279 * packets may be not captured even if unload is
2280 * not succesful. I think packet capture not working
2281 * is a perfectly logical consequence of trying to
2282 * disable packet capture.
2283 */
2284 error = EOPNOTSUPP;
2285 /* insert sysctl teardown */
2286 break;
2287
2288 default:
2289 error = ENOTTY;
2290 break;
2291 }
2292
2293 return error;
2294 }
2295