bpf.c revision 1.184 1 /* $NetBSD: bpf.c,v 1.184 2014/07/10 15:32:09 christos Exp $ */
2
3 /*
4 * Copyright (c) 1990, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from the Stanford/CMU enet packet filter,
8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
10 * Berkeley Laboratory.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95
37 * static char rcsid[] =
38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.184 2014/07/10 15:32:09 christos Exp $");
43
44 #if defined(_KERNEL_OPT)
45 #include "opt_bpf.h"
46 #include "sl.h"
47 #include "strip.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/mbuf.h>
53 #include <sys/buf.h>
54 #include <sys/time.h>
55 #include <sys/proc.h>
56 #include <sys/ioctl.h>
57 #include <sys/conf.h>
58 #include <sys/vnode.h>
59 #include <sys/queue.h>
60 #include <sys/stat.h>
61 #include <sys/module.h>
62 #include <sys/once.h>
63 #include <sys/atomic.h>
64
65 #include <sys/file.h>
66 #include <sys/filedesc.h>
67 #include <sys/tty.h>
68 #include <sys/uio.h>
69
70 #include <sys/protosw.h>
71 #include <sys/socket.h>
72 #include <sys/errno.h>
73 #include <sys/kernel.h>
74 #include <sys/poll.h>
75 #include <sys/sysctl.h>
76 #include <sys/kauth.h>
77
78 #include <net/if.h>
79 #include <net/slip.h>
80
81 #include <net/bpf.h>
82 #include <net/bpfdesc.h>
83 #include <net/bpfjit.h>
84
85 #include <net/if_arc.h>
86 #include <net/if_ether.h>
87
88 #include <netinet/in.h>
89 #include <netinet/if_inarp.h>
90
91
92 #include <compat/sys/sockio.h>
93
94 #ifndef BPF_BUFSIZE
95 /*
96 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
97 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
98 */
99 # define BPF_BUFSIZE 32768
100 #endif
101
102 #define PRINET 26 /* interruptible */
103
104 /*
105 * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able.
106 * XXX the default values should be computed dynamically based
107 * on available memory size and available mbuf clusters.
108 */
109 int bpf_bufsize = BPF_BUFSIZE;
110 int bpf_maxbufsize = BPF_DFLTBUFSIZE; /* XXX set dynamically, see above */
111 bool bpf_jit = false;
112
113 struct bpfjit_ops bpfjit_module_ops = {
114 .bj_generate_code = NULL,
115 .bj_free_code = NULL
116 };
117
118 /*
119 * Global BPF statistics returned by net.bpf.stats sysctl.
120 */
121 struct bpf_stat bpf_gstats;
122
123 /*
124 * Use a mutex to avoid a race condition between gathering the stats/peers
125 * and opening/closing the device.
126 */
127 static kmutex_t bpf_mtx;
128
129 /*
130 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
131 * bpf_dtab holds the descriptors, indexed by minor device #
132 */
133 struct bpf_if *bpf_iflist;
134 LIST_HEAD(, bpf_d) bpf_list;
135
136 static int bpf_allocbufs(struct bpf_d *);
137 static void bpf_deliver(struct bpf_if *,
138 void *(*cpfn)(void *, const void *, size_t),
139 void *, u_int, u_int, const bool);
140 static void bpf_freed(struct bpf_d *);
141 static void bpf_ifname(struct ifnet *, struct ifreq *);
142 static void *bpf_mcpy(void *, const void *, size_t);
143 static int bpf_movein(struct uio *, int, uint64_t,
144 struct mbuf **, struct sockaddr *);
145 static void bpf_attachd(struct bpf_d *, struct bpf_if *);
146 static void bpf_detachd(struct bpf_d *);
147 static int bpf_setif(struct bpf_d *, struct ifreq *);
148 static void bpf_timed_out(void *);
149 static inline void
150 bpf_wakeup(struct bpf_d *);
151 static int bpf_hdrlen(struct bpf_d *);
152 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
153 void *(*)(void *, const void *, size_t), struct timespec *);
154 static void reset_d(struct bpf_d *);
155 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
156 static int bpf_setdlt(struct bpf_d *, u_int);
157
158 static int bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t,
159 int);
160 static int bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t,
161 int);
162 static int bpf_ioctl(struct file *, u_long, void *);
163 static int bpf_poll(struct file *, int);
164 static int bpf_stat(struct file *, struct stat *);
165 static int bpf_close(struct file *);
166 static int bpf_kqfilter(struct file *, struct knote *);
167 static void bpf_softintr(void *);
168
169 static const struct fileops bpf_fileops = {
170 .fo_read = bpf_read,
171 .fo_write = bpf_write,
172 .fo_ioctl = bpf_ioctl,
173 .fo_fcntl = fnullop_fcntl,
174 .fo_poll = bpf_poll,
175 .fo_stat = bpf_stat,
176 .fo_close = bpf_close,
177 .fo_kqfilter = bpf_kqfilter,
178 .fo_restart = fnullop_restart,
179 };
180
181 dev_type_open(bpfopen);
182
183 const struct cdevsw bpf_cdevsw = {
184 .d_open = bpfopen,
185 .d_close = noclose,
186 .d_read = noread,
187 .d_write = nowrite,
188 .d_ioctl = noioctl,
189 .d_stop = nostop,
190 .d_tty = notty,
191 .d_poll = nopoll,
192 .d_mmap = nommap,
193 .d_kqfilter = nokqfilter,
194 .d_flag = D_OTHER
195 };
196
197 bpfjit_func_t
198 bpf_jit_generate(bpf_ctx_t *bc, void *code, size_t size)
199 {
200
201 membar_consumer();
202 if (bpfjit_module_ops.bj_generate_code != NULL) {
203 return bpfjit_module_ops.bj_generate_code(bc, code, size);
204 }
205 return NULL;
206 }
207
208 void
209 bpf_jit_freecode(bpfjit_func_t jcode)
210 {
211 KASSERT(bpfjit_module_ops.bj_free_code != NULL);
212 bpfjit_module_ops.bj_free_code(jcode);
213 }
214
215 static int
216 bpf_movein(struct uio *uio, int linktype, uint64_t mtu, struct mbuf **mp,
217 struct sockaddr *sockp)
218 {
219 struct mbuf *m;
220 int error;
221 size_t len;
222 size_t hlen;
223 size_t align;
224
225 /*
226 * Build a sockaddr based on the data link layer type.
227 * We do this at this level because the ethernet header
228 * is copied directly into the data field of the sockaddr.
229 * In the case of SLIP, there is no header and the packet
230 * is forwarded as is.
231 * Also, we are careful to leave room at the front of the mbuf
232 * for the link level header.
233 */
234 switch (linktype) {
235
236 case DLT_SLIP:
237 sockp->sa_family = AF_INET;
238 hlen = 0;
239 align = 0;
240 break;
241
242 case DLT_PPP:
243 sockp->sa_family = AF_UNSPEC;
244 hlen = 0;
245 align = 0;
246 break;
247
248 case DLT_EN10MB:
249 sockp->sa_family = AF_UNSPEC;
250 /* XXX Would MAXLINKHDR be better? */
251 /* 6(dst)+6(src)+2(type) */
252 hlen = sizeof(struct ether_header);
253 align = 2;
254 break;
255
256 case DLT_ARCNET:
257 sockp->sa_family = AF_UNSPEC;
258 hlen = ARC_HDRLEN;
259 align = 5;
260 break;
261
262 case DLT_FDDI:
263 sockp->sa_family = AF_LINK;
264 /* XXX 4(FORMAC)+6(dst)+6(src) */
265 hlen = 16;
266 align = 0;
267 break;
268
269 case DLT_ECONET:
270 sockp->sa_family = AF_UNSPEC;
271 hlen = 6;
272 align = 2;
273 break;
274
275 case DLT_NULL:
276 sockp->sa_family = AF_UNSPEC;
277 hlen = 0;
278 align = 0;
279 break;
280
281 default:
282 return (EIO);
283 }
284
285 len = uio->uio_resid;
286 /*
287 * If there aren't enough bytes for a link level header or the
288 * packet length exceeds the interface mtu, return an error.
289 */
290 if (len - hlen > mtu)
291 return (EMSGSIZE);
292
293 /*
294 * XXX Avoid complicated buffer chaining ---
295 * bail if it won't fit in a single mbuf.
296 * (Take into account possible alignment bytes)
297 */
298 if (len + align > MCLBYTES)
299 return (EIO);
300
301 m = m_gethdr(M_WAIT, MT_DATA);
302 m->m_pkthdr.rcvif = 0;
303 m->m_pkthdr.len = (int)(len - hlen);
304 if (len + align > MHLEN) {
305 m_clget(m, M_WAIT);
306 if ((m->m_flags & M_EXT) == 0) {
307 error = ENOBUFS;
308 goto bad;
309 }
310 }
311
312 /* Insure the data is properly aligned */
313 if (align > 0) {
314 m->m_data += align;
315 m->m_len -= (int)align;
316 }
317
318 error = uiomove(mtod(m, void *), len, uio);
319 if (error)
320 goto bad;
321 if (hlen != 0) {
322 memcpy(sockp->sa_data, mtod(m, void *), hlen);
323 m->m_data += hlen; /* XXX */
324 len -= hlen;
325 }
326 m->m_len = (int)len;
327 *mp = m;
328 return (0);
329
330 bad:
331 m_freem(m);
332 return (error);
333 }
334
335 /*
336 * Attach file to the bpf interface, i.e. make d listen on bp.
337 * Must be called at splnet.
338 */
339 static void
340 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
341 {
342 /*
343 * Point d at bp, and add d to the interface's list of listeners.
344 * Finally, point the driver's bpf cookie at the interface so
345 * it will divert packets to bpf.
346 */
347 d->bd_bif = bp;
348 d->bd_next = bp->bif_dlist;
349 bp->bif_dlist = d;
350
351 *bp->bif_driverp = bp;
352 }
353
354 /*
355 * Detach a file from its interface.
356 */
357 static void
358 bpf_detachd(struct bpf_d *d)
359 {
360 struct bpf_d **p;
361 struct bpf_if *bp;
362
363 bp = d->bd_bif;
364 /*
365 * Check if this descriptor had requested promiscuous mode.
366 * If so, turn it off.
367 */
368 if (d->bd_promisc) {
369 int error __diagused;
370
371 d->bd_promisc = 0;
372 /*
373 * Take device out of promiscuous mode. Since we were
374 * able to enter promiscuous mode, we should be able
375 * to turn it off. But we can get an error if
376 * the interface was configured down, so only panic
377 * if we don't get an unexpected error.
378 */
379 error = ifpromisc(bp->bif_ifp, 0);
380 #ifdef DIAGNOSTIC
381 if (error)
382 printf("%s: ifpromisc failed: %d", __func__, error);
383 #endif
384 }
385 /* Remove d from the interface's descriptor list. */
386 p = &bp->bif_dlist;
387 while (*p != d) {
388 p = &(*p)->bd_next;
389 if (*p == 0)
390 panic("%s: descriptor not in list", __func__);
391 }
392 *p = (*p)->bd_next;
393 if (bp->bif_dlist == 0)
394 /*
395 * Let the driver know that there are no more listeners.
396 */
397 *d->bd_bif->bif_driverp = 0;
398 d->bd_bif = 0;
399 }
400
401 static int
402 doinit(void)
403 {
404
405 mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE);
406
407 LIST_INIT(&bpf_list);
408
409 bpf_gstats.bs_recv = 0;
410 bpf_gstats.bs_drop = 0;
411 bpf_gstats.bs_capt = 0;
412
413 return 0;
414 }
415
416 /*
417 * bpfilterattach() is called at boot time.
418 */
419 /* ARGSUSED */
420 void
421 bpfilterattach(int n)
422 {
423 static ONCE_DECL(control);
424
425 RUN_ONCE(&control, doinit);
426 }
427
428 /*
429 * Open ethernet device. Clones.
430 */
431 /* ARGSUSED */
432 int
433 bpfopen(dev_t dev, int flag, int mode, struct lwp *l)
434 {
435 struct bpf_d *d;
436 struct file *fp;
437 int error, fd;
438
439 /* falloc() will use the descriptor for us. */
440 if ((error = fd_allocfile(&fp, &fd)) != 0)
441 return error;
442
443 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK|M_ZERO);
444 d->bd_bufsize = bpf_bufsize;
445 d->bd_seesent = 1;
446 d->bd_feedback = 0;
447 d->bd_pid = l->l_proc->p_pid;
448 #ifdef _LP64
449 if (curproc->p_flag & PK_32)
450 d->bd_compat32 = 1;
451 #endif
452 getnanotime(&d->bd_btime);
453 d->bd_atime = d->bd_mtime = d->bd_btime;
454 callout_init(&d->bd_callout, 0);
455 selinit(&d->bd_sel);
456 d->bd_sih = softint_establish(SOFTINT_CLOCK, bpf_softintr, d);
457 d->bd_jitcode = NULL;
458
459 mutex_enter(&bpf_mtx);
460 LIST_INSERT_HEAD(&bpf_list, d, bd_list);
461 mutex_exit(&bpf_mtx);
462
463 return fd_clone(fp, fd, flag, &bpf_fileops, d);
464 }
465
466 /*
467 * Close the descriptor by detaching it from its interface,
468 * deallocating its buffers, and marking it free.
469 */
470 /* ARGSUSED */
471 static int
472 bpf_close(struct file *fp)
473 {
474 struct bpf_d *d = fp->f_data;
475 int s;
476
477 KERNEL_LOCK(1, NULL);
478
479 /*
480 * Refresh the PID associated with this bpf file.
481 */
482 d->bd_pid = curproc->p_pid;
483
484 s = splnet();
485 if (d->bd_state == BPF_WAITING)
486 callout_stop(&d->bd_callout);
487 d->bd_state = BPF_IDLE;
488 if (d->bd_bif)
489 bpf_detachd(d);
490 splx(s);
491 bpf_freed(d);
492 mutex_enter(&bpf_mtx);
493 LIST_REMOVE(d, bd_list);
494 mutex_exit(&bpf_mtx);
495 callout_destroy(&d->bd_callout);
496 seldestroy(&d->bd_sel);
497 softint_disestablish(d->bd_sih);
498 free(d, M_DEVBUF);
499 fp->f_data = NULL;
500
501 KERNEL_UNLOCK_ONE(NULL);
502
503 return (0);
504 }
505
506 /*
507 * Rotate the packet buffers in descriptor d. Move the store buffer
508 * into the hold slot, and the free buffer into the store slot.
509 * Zero the length of the new store buffer.
510 */
511 #define ROTATE_BUFFERS(d) \
512 (d)->bd_hbuf = (d)->bd_sbuf; \
513 (d)->bd_hlen = (d)->bd_slen; \
514 (d)->bd_sbuf = (d)->bd_fbuf; \
515 (d)->bd_slen = 0; \
516 (d)->bd_fbuf = 0;
517 /*
518 * bpfread - read next chunk of packets from buffers
519 */
520 static int
521 bpf_read(struct file *fp, off_t *offp, struct uio *uio,
522 kauth_cred_t cred, int flags)
523 {
524 struct bpf_d *d = fp->f_data;
525 int timed_out;
526 int error;
527 int s;
528
529 getnanotime(&d->bd_atime);
530 /*
531 * Restrict application to use a buffer the same size as
532 * the kernel buffers.
533 */
534 if (uio->uio_resid != d->bd_bufsize)
535 return (EINVAL);
536
537 KERNEL_LOCK(1, NULL);
538 s = splnet();
539 if (d->bd_state == BPF_WAITING)
540 callout_stop(&d->bd_callout);
541 timed_out = (d->bd_state == BPF_TIMED_OUT);
542 d->bd_state = BPF_IDLE;
543 /*
544 * If the hold buffer is empty, then do a timed sleep, which
545 * ends when the timeout expires or when enough packets
546 * have arrived to fill the store buffer.
547 */
548 while (d->bd_hbuf == 0) {
549 if (fp->f_flag & FNONBLOCK) {
550 if (d->bd_slen == 0) {
551 splx(s);
552 KERNEL_UNLOCK_ONE(NULL);
553 return (EWOULDBLOCK);
554 }
555 ROTATE_BUFFERS(d);
556 break;
557 }
558
559 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
560 /*
561 * A packet(s) either arrived since the previous
562 * read or arrived while we were asleep.
563 * Rotate the buffers and return what's here.
564 */
565 ROTATE_BUFFERS(d);
566 break;
567 }
568 error = tsleep(d, PRINET|PCATCH, "bpf",
569 d->bd_rtout);
570 if (error == EINTR || error == ERESTART) {
571 splx(s);
572 KERNEL_UNLOCK_ONE(NULL);
573 return (error);
574 }
575 if (error == EWOULDBLOCK) {
576 /*
577 * On a timeout, return what's in the buffer,
578 * which may be nothing. If there is something
579 * in the store buffer, we can rotate the buffers.
580 */
581 if (d->bd_hbuf)
582 /*
583 * We filled up the buffer in between
584 * getting the timeout and arriving
585 * here, so we don't need to rotate.
586 */
587 break;
588
589 if (d->bd_slen == 0) {
590 splx(s);
591 KERNEL_UNLOCK_ONE(NULL);
592 return (0);
593 }
594 ROTATE_BUFFERS(d);
595 break;
596 }
597 if (error != 0)
598 goto done;
599 }
600 /*
601 * At this point, we know we have something in the hold slot.
602 */
603 splx(s);
604
605 /*
606 * Move data from hold buffer into user space.
607 * We know the entire buffer is transferred since
608 * we checked above that the read buffer is bpf_bufsize bytes.
609 */
610 error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
611
612 s = splnet();
613 d->bd_fbuf = d->bd_hbuf;
614 d->bd_hbuf = 0;
615 d->bd_hlen = 0;
616 done:
617 splx(s);
618 KERNEL_UNLOCK_ONE(NULL);
619 return (error);
620 }
621
622
623 /*
624 * If there are processes sleeping on this descriptor, wake them up.
625 */
626 static inline void
627 bpf_wakeup(struct bpf_d *d)
628 {
629 wakeup(d);
630 if (d->bd_async)
631 softint_schedule(d->bd_sih);
632 selnotify(&d->bd_sel, 0, 0);
633 }
634
635 static void
636 bpf_softintr(void *cookie)
637 {
638 struct bpf_d *d;
639
640 d = cookie;
641 if (d->bd_async)
642 fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL);
643 }
644
645 static void
646 bpf_timed_out(void *arg)
647 {
648 struct bpf_d *d = arg;
649 int s;
650
651 s = splnet();
652 if (d->bd_state == BPF_WAITING) {
653 d->bd_state = BPF_TIMED_OUT;
654 if (d->bd_slen != 0)
655 bpf_wakeup(d);
656 }
657 splx(s);
658 }
659
660
661 static int
662 bpf_write(struct file *fp, off_t *offp, struct uio *uio,
663 kauth_cred_t cred, int flags)
664 {
665 struct bpf_d *d = fp->f_data;
666 struct ifnet *ifp;
667 struct mbuf *m, *mc;
668 int error, s;
669 static struct sockaddr_storage dst;
670
671 m = NULL; /* XXX gcc */
672
673 KERNEL_LOCK(1, NULL);
674
675 if (d->bd_bif == 0) {
676 KERNEL_UNLOCK_ONE(NULL);
677 return (ENXIO);
678 }
679 getnanotime(&d->bd_mtime);
680
681 ifp = d->bd_bif->bif_ifp;
682
683 if (uio->uio_resid == 0) {
684 KERNEL_UNLOCK_ONE(NULL);
685 return (0);
686 }
687
688 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m,
689 (struct sockaddr *) &dst);
690 if (error) {
691 KERNEL_UNLOCK_ONE(NULL);
692 return (error);
693 }
694
695 if (m->m_pkthdr.len > ifp->if_mtu) {
696 KERNEL_UNLOCK_ONE(NULL);
697 m_freem(m);
698 return (EMSGSIZE);
699 }
700
701 if (d->bd_hdrcmplt)
702 dst.ss_family = pseudo_AF_HDRCMPLT;
703
704 if (d->bd_feedback) {
705 mc = m_dup(m, 0, M_COPYALL, M_NOWAIT);
706 if (mc != NULL)
707 mc->m_pkthdr.rcvif = ifp;
708 /* Set M_PROMISC for outgoing packets to be discarded. */
709 if (1 /*d->bd_direction == BPF_D_INOUT*/)
710 m->m_flags |= M_PROMISC;
711 } else
712 mc = NULL;
713
714 s = splsoftnet();
715 error = (*ifp->if_output)(ifp, m, (struct sockaddr *) &dst, NULL);
716
717 if (mc != NULL) {
718 if (error == 0)
719 (*ifp->if_input)(ifp, mc);
720 m_freem(mc);
721 }
722 splx(s);
723 KERNEL_UNLOCK_ONE(NULL);
724 /*
725 * The driver frees the mbuf.
726 */
727 return (error);
728 }
729
730 /*
731 * Reset a descriptor by flushing its packet buffer and clearing the
732 * receive and drop counts. Should be called at splnet.
733 */
734 static void
735 reset_d(struct bpf_d *d)
736 {
737 if (d->bd_hbuf) {
738 /* Free the hold buffer. */
739 d->bd_fbuf = d->bd_hbuf;
740 d->bd_hbuf = 0;
741 }
742 d->bd_slen = 0;
743 d->bd_hlen = 0;
744 d->bd_rcount = 0;
745 d->bd_dcount = 0;
746 d->bd_ccount = 0;
747 }
748
749 /*
750 * FIONREAD Check for read packet available.
751 * BIOCGBLEN Get buffer len [for read()].
752 * BIOCSETF Set ethernet read filter.
753 * BIOCFLUSH Flush read packet buffer.
754 * BIOCPROMISC Put interface into promiscuous mode.
755 * BIOCGDLT Get link layer type.
756 * BIOCGETIF Get interface name.
757 * BIOCSETIF Set interface.
758 * BIOCSRTIMEOUT Set read timeout.
759 * BIOCGRTIMEOUT Get read timeout.
760 * BIOCGSTATS Get packet stats.
761 * BIOCIMMEDIATE Set immediate mode.
762 * BIOCVERSION Get filter language version.
763 * BIOCGHDRCMPLT Get "header already complete" flag.
764 * BIOCSHDRCMPLT Set "header already complete" flag.
765 * BIOCSFEEDBACK Set packet feedback mode.
766 * BIOCGFEEDBACK Get packet feedback mode.
767 * BIOCGSEESENT Get "see sent packets" mode.
768 * BIOCSSEESENT Set "see sent packets" mode.
769 */
770 /* ARGSUSED */
771 static int
772 bpf_ioctl(struct file *fp, u_long cmd, void *addr)
773 {
774 struct bpf_d *d = fp->f_data;
775 int s, error = 0;
776
777 /*
778 * Refresh the PID associated with this bpf file.
779 */
780 KERNEL_LOCK(1, NULL);
781 d->bd_pid = curproc->p_pid;
782 #ifdef _LP64
783 if (curproc->p_flag & PK_32)
784 d->bd_compat32 = 1;
785 else
786 d->bd_compat32 = 0;
787 #endif
788
789 s = splnet();
790 if (d->bd_state == BPF_WAITING)
791 callout_stop(&d->bd_callout);
792 d->bd_state = BPF_IDLE;
793 splx(s);
794
795 switch (cmd) {
796
797 default:
798 error = EINVAL;
799 break;
800
801 /*
802 * Check for read packet available.
803 */
804 case FIONREAD:
805 {
806 int n;
807
808 s = splnet();
809 n = d->bd_slen;
810 if (d->bd_hbuf)
811 n += d->bd_hlen;
812 splx(s);
813
814 *(int *)addr = n;
815 break;
816 }
817
818 /*
819 * Get buffer len [for read()].
820 */
821 case BIOCGBLEN:
822 *(u_int *)addr = d->bd_bufsize;
823 break;
824
825 /*
826 * Set buffer length.
827 */
828 case BIOCSBLEN:
829 if (d->bd_bif != 0)
830 error = EINVAL;
831 else {
832 u_int size = *(u_int *)addr;
833
834 if (size > bpf_maxbufsize)
835 *(u_int *)addr = size = bpf_maxbufsize;
836 else if (size < BPF_MINBUFSIZE)
837 *(u_int *)addr = size = BPF_MINBUFSIZE;
838 d->bd_bufsize = size;
839 }
840 break;
841
842 /*
843 * Set link layer read filter.
844 */
845 case BIOCSETF:
846 error = bpf_setf(d, addr);
847 break;
848
849 /*
850 * Flush read packet buffer.
851 */
852 case BIOCFLUSH:
853 s = splnet();
854 reset_d(d);
855 splx(s);
856 break;
857
858 /*
859 * Put interface into promiscuous mode.
860 */
861 case BIOCPROMISC:
862 if (d->bd_bif == 0) {
863 /*
864 * No interface attached yet.
865 */
866 error = EINVAL;
867 break;
868 }
869 s = splnet();
870 if (d->bd_promisc == 0) {
871 error = ifpromisc(d->bd_bif->bif_ifp, 1);
872 if (error == 0)
873 d->bd_promisc = 1;
874 }
875 splx(s);
876 break;
877
878 /*
879 * Get device parameters.
880 */
881 case BIOCGDLT:
882 if (d->bd_bif == 0)
883 error = EINVAL;
884 else
885 *(u_int *)addr = d->bd_bif->bif_dlt;
886 break;
887
888 /*
889 * Get a list of supported device parameters.
890 */
891 case BIOCGDLTLIST:
892 if (d->bd_bif == 0)
893 error = EINVAL;
894 else
895 error = bpf_getdltlist(d, addr);
896 break;
897
898 /*
899 * Set device parameters.
900 */
901 case BIOCSDLT:
902 if (d->bd_bif == 0)
903 error = EINVAL;
904 else
905 error = bpf_setdlt(d, *(u_int *)addr);
906 break;
907
908 /*
909 * Set interface name.
910 */
911 #ifdef OBIOCGETIF
912 case OBIOCGETIF:
913 #endif
914 case BIOCGETIF:
915 if (d->bd_bif == 0)
916 error = EINVAL;
917 else
918 bpf_ifname(d->bd_bif->bif_ifp, addr);
919 break;
920
921 /*
922 * Set interface.
923 */
924 #ifdef OBIOCSETIF
925 case OBIOCSETIF:
926 #endif
927 case BIOCSETIF:
928 error = bpf_setif(d, addr);
929 break;
930
931 /*
932 * Set read timeout.
933 */
934 case BIOCSRTIMEOUT:
935 {
936 struct timeval *tv = addr;
937
938 /* Compute number of ticks. */
939 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
940 if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
941 d->bd_rtout = 1;
942 break;
943 }
944
945 #ifdef BIOCGORTIMEOUT
946 /*
947 * Get read timeout.
948 */
949 case BIOCGORTIMEOUT:
950 {
951 struct timeval50 *tv = addr;
952
953 tv->tv_sec = d->bd_rtout / hz;
954 tv->tv_usec = (d->bd_rtout % hz) * tick;
955 break;
956 }
957 #endif
958
959 #ifdef BIOCSORTIMEOUT
960 /*
961 * Set read timeout.
962 */
963 case BIOCSORTIMEOUT:
964 {
965 struct timeval50 *tv = addr;
966
967 /* Compute number of ticks. */
968 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick;
969 if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
970 d->bd_rtout = 1;
971 break;
972 }
973 #endif
974
975 /*
976 * Get read timeout.
977 */
978 case BIOCGRTIMEOUT:
979 {
980 struct timeval *tv = addr;
981
982 tv->tv_sec = d->bd_rtout / hz;
983 tv->tv_usec = (d->bd_rtout % hz) * tick;
984 break;
985 }
986 /*
987 * Get packet stats.
988 */
989 case BIOCGSTATS:
990 {
991 struct bpf_stat *bs = addr;
992
993 bs->bs_recv = d->bd_rcount;
994 bs->bs_drop = d->bd_dcount;
995 bs->bs_capt = d->bd_ccount;
996 break;
997 }
998
999 case BIOCGSTATSOLD:
1000 {
1001 struct bpf_stat_old *bs = addr;
1002
1003 bs->bs_recv = d->bd_rcount;
1004 bs->bs_drop = d->bd_dcount;
1005 break;
1006 }
1007
1008 /*
1009 * Set immediate mode.
1010 */
1011 case BIOCIMMEDIATE:
1012 d->bd_immediate = *(u_int *)addr;
1013 break;
1014
1015 case BIOCVERSION:
1016 {
1017 struct bpf_version *bv = addr;
1018
1019 bv->bv_major = BPF_MAJOR_VERSION;
1020 bv->bv_minor = BPF_MINOR_VERSION;
1021 break;
1022 }
1023
1024 case BIOCGHDRCMPLT: /* get "header already complete" flag */
1025 *(u_int *)addr = d->bd_hdrcmplt;
1026 break;
1027
1028 case BIOCSHDRCMPLT: /* set "header already complete" flag */
1029 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1030 break;
1031
1032 /*
1033 * Get "see sent packets" flag
1034 */
1035 case BIOCGSEESENT:
1036 *(u_int *)addr = d->bd_seesent;
1037 break;
1038
1039 /*
1040 * Set "see sent" packets flag
1041 */
1042 case BIOCSSEESENT:
1043 d->bd_seesent = *(u_int *)addr;
1044 break;
1045
1046 /*
1047 * Set "feed packets from bpf back to input" mode
1048 */
1049 case BIOCSFEEDBACK:
1050 d->bd_feedback = *(u_int *)addr;
1051 break;
1052
1053 /*
1054 * Get "feed packets from bpf back to input" mode
1055 */
1056 case BIOCGFEEDBACK:
1057 *(u_int *)addr = d->bd_feedback;
1058 break;
1059
1060 case FIONBIO: /* Non-blocking I/O */
1061 /*
1062 * No need to do anything special as we use IO_NDELAY in
1063 * bpfread() as an indication of whether or not to block
1064 * the read.
1065 */
1066 break;
1067
1068 case FIOASYNC: /* Send signal on receive packets */
1069 d->bd_async = *(int *)addr;
1070 break;
1071
1072 case TIOCSPGRP: /* Process or group to send signals to */
1073 case FIOSETOWN:
1074 error = fsetown(&d->bd_pgid, cmd, addr);
1075 break;
1076
1077 case TIOCGPGRP:
1078 case FIOGETOWN:
1079 error = fgetown(d->bd_pgid, cmd, addr);
1080 break;
1081 }
1082 KERNEL_UNLOCK_ONE(NULL);
1083 return (error);
1084 }
1085
1086 /*
1087 * Set d's packet filter program to fp. If this file already has a filter,
1088 * free it and replace it. Returns EINVAL for bogus requests.
1089 */
1090 int
1091 bpf_setf(struct bpf_d *d, struct bpf_program *fp)
1092 {
1093 struct bpf_insn *fcode, *old;
1094 bpfjit_func_t jcode, oldj;
1095 size_t flen, size;
1096 int s;
1097
1098 jcode = NULL;
1099 flen = fp->bf_len;
1100
1101 if ((fp->bf_insns == NULL && flen) || flen > BPF_MAXINSNS) {
1102 return EINVAL;
1103 }
1104
1105 if (flen) {
1106 /*
1107 * Allocate the buffer, copy the byte-code from
1108 * userspace and validate it.
1109 */
1110 size = flen * sizeof(*fp->bf_insns);
1111 fcode = malloc(size, M_DEVBUF, M_WAITOK);
1112 if (copyin(fp->bf_insns, fcode, size) != 0 ||
1113 !bpf_validate(fcode, (int)flen)) {
1114 free(fcode, M_DEVBUF);
1115 return EINVAL;
1116 }
1117 membar_consumer();
1118 if (bpf_jit)
1119 jcode = bpf_jit_generate(NULL, fcode, flen);
1120 } else {
1121 fcode = NULL;
1122 }
1123
1124 s = splnet();
1125 old = d->bd_filter;
1126 d->bd_filter = fcode;
1127 oldj = d->bd_jitcode;
1128 d->bd_jitcode = jcode;
1129 reset_d(d);
1130 splx(s);
1131
1132 if (old) {
1133 free(old, M_DEVBUF);
1134 }
1135 if (oldj) {
1136 bpf_jit_freecode(oldj);
1137 }
1138
1139 return 0;
1140 }
1141
1142 /*
1143 * Detach a file from its current interface (if attached at all) and attach
1144 * to the interface indicated by the name stored in ifr.
1145 * Return an errno or 0.
1146 */
1147 static int
1148 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1149 {
1150 struct bpf_if *bp;
1151 char *cp;
1152 int unit_seen, i, s, error;
1153
1154 /*
1155 * Make sure the provided name has a unit number, and default
1156 * it to '0' if not specified.
1157 * XXX This is ugly ... do this differently?
1158 */
1159 unit_seen = 0;
1160 cp = ifr->ifr_name;
1161 cp[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */
1162 while (*cp++)
1163 if (*cp >= '0' && *cp <= '9')
1164 unit_seen = 1;
1165 if (!unit_seen) {
1166 /* Make sure to leave room for the '\0'. */
1167 for (i = 0; i < (IFNAMSIZ - 1); ++i) {
1168 if ((ifr->ifr_name[i] >= 'a' &&
1169 ifr->ifr_name[i] <= 'z') ||
1170 (ifr->ifr_name[i] >= 'A' &&
1171 ifr->ifr_name[i] <= 'Z'))
1172 continue;
1173 ifr->ifr_name[i] = '0';
1174 }
1175 }
1176
1177 /*
1178 * Look through attached interfaces for the named one.
1179 */
1180 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
1181 struct ifnet *ifp = bp->bif_ifp;
1182
1183 if (ifp == 0 ||
1184 strcmp(ifp->if_xname, ifr->ifr_name) != 0)
1185 continue;
1186 /* skip additional entry */
1187 if (bp->bif_driverp != &ifp->if_bpf)
1188 continue;
1189 /*
1190 * We found the requested interface.
1191 * Allocate the packet buffers if we need to.
1192 * If we're already attached to requested interface,
1193 * just flush the buffer.
1194 */
1195 if (d->bd_sbuf == 0) {
1196 error = bpf_allocbufs(d);
1197 if (error != 0)
1198 return (error);
1199 }
1200 s = splnet();
1201 if (bp != d->bd_bif) {
1202 if (d->bd_bif)
1203 /*
1204 * Detach if attached to something else.
1205 */
1206 bpf_detachd(d);
1207
1208 bpf_attachd(d, bp);
1209 }
1210 reset_d(d);
1211 splx(s);
1212 return (0);
1213 }
1214 /* Not found. */
1215 return (ENXIO);
1216 }
1217
1218 /*
1219 * Copy the interface name to the ifreq.
1220 */
1221 static void
1222 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr)
1223 {
1224 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
1225 }
1226
1227 static int
1228 bpf_stat(struct file *fp, struct stat *st)
1229 {
1230 struct bpf_d *d = fp->f_data;
1231
1232 (void)memset(st, 0, sizeof(*st));
1233 KERNEL_LOCK(1, NULL);
1234 st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid);
1235 st->st_atimespec = d->bd_atime;
1236 st->st_mtimespec = d->bd_mtime;
1237 st->st_ctimespec = st->st_birthtimespec = d->bd_btime;
1238 st->st_uid = kauth_cred_geteuid(fp->f_cred);
1239 st->st_gid = kauth_cred_getegid(fp->f_cred);
1240 st->st_mode = S_IFCHR;
1241 KERNEL_UNLOCK_ONE(NULL);
1242 return 0;
1243 }
1244
1245 /*
1246 * Support for poll() system call
1247 *
1248 * Return true iff the specific operation will not block indefinitely - with
1249 * the assumption that it is safe to positively acknowledge a request for the
1250 * ability to write to the BPF device.
1251 * Otherwise, return false but make a note that a selnotify() must be done.
1252 */
1253 static int
1254 bpf_poll(struct file *fp, int events)
1255 {
1256 struct bpf_d *d = fp->f_data;
1257 int s = splnet();
1258 int revents;
1259
1260 /*
1261 * Refresh the PID associated with this bpf file.
1262 */
1263 KERNEL_LOCK(1, NULL);
1264 d->bd_pid = curproc->p_pid;
1265
1266 revents = events & (POLLOUT | POLLWRNORM);
1267 if (events & (POLLIN | POLLRDNORM)) {
1268 /*
1269 * An imitation of the FIONREAD ioctl code.
1270 */
1271 if (d->bd_hlen != 0 ||
1272 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1273 d->bd_slen != 0)) {
1274 revents |= events & (POLLIN | POLLRDNORM);
1275 } else {
1276 selrecord(curlwp, &d->bd_sel);
1277 /* Start the read timeout if necessary */
1278 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1279 callout_reset(&d->bd_callout, d->bd_rtout,
1280 bpf_timed_out, d);
1281 d->bd_state = BPF_WAITING;
1282 }
1283 }
1284 }
1285
1286 KERNEL_UNLOCK_ONE(NULL);
1287 splx(s);
1288 return (revents);
1289 }
1290
1291 static void
1292 filt_bpfrdetach(struct knote *kn)
1293 {
1294 struct bpf_d *d = kn->kn_hook;
1295 int s;
1296
1297 KERNEL_LOCK(1, NULL);
1298 s = splnet();
1299 SLIST_REMOVE(&d->bd_sel.sel_klist, kn, knote, kn_selnext);
1300 splx(s);
1301 KERNEL_UNLOCK_ONE(NULL);
1302 }
1303
1304 static int
1305 filt_bpfread(struct knote *kn, long hint)
1306 {
1307 struct bpf_d *d = kn->kn_hook;
1308 int rv;
1309
1310 KERNEL_LOCK(1, NULL);
1311 kn->kn_data = d->bd_hlen;
1312 if (d->bd_immediate)
1313 kn->kn_data += d->bd_slen;
1314 rv = (kn->kn_data > 0);
1315 KERNEL_UNLOCK_ONE(NULL);
1316 return rv;
1317 }
1318
1319 static const struct filterops bpfread_filtops =
1320 { 1, NULL, filt_bpfrdetach, filt_bpfread };
1321
1322 static int
1323 bpf_kqfilter(struct file *fp, struct knote *kn)
1324 {
1325 struct bpf_d *d = fp->f_data;
1326 struct klist *klist;
1327 int s;
1328
1329 KERNEL_LOCK(1, NULL);
1330
1331 switch (kn->kn_filter) {
1332 case EVFILT_READ:
1333 klist = &d->bd_sel.sel_klist;
1334 kn->kn_fop = &bpfread_filtops;
1335 break;
1336
1337 default:
1338 KERNEL_UNLOCK_ONE(NULL);
1339 return (EINVAL);
1340 }
1341
1342 kn->kn_hook = d;
1343
1344 s = splnet();
1345 SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1346 splx(s);
1347 KERNEL_UNLOCK_ONE(NULL);
1348
1349 return (0);
1350 }
1351
1352 /*
1353 * Copy data from an mbuf chain into a buffer. This code is derived
1354 * from m_copydata in sys/uipc_mbuf.c.
1355 */
1356 static void *
1357 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len)
1358 {
1359 const struct mbuf *m;
1360 u_int count;
1361 u_char *dst;
1362
1363 m = src_arg;
1364 dst = dst_arg;
1365 while (len > 0) {
1366 if (m == NULL)
1367 panic("bpf_mcpy");
1368 count = min(m->m_len, len);
1369 memcpy(dst, mtod(m, const void *), count);
1370 m = m->m_next;
1371 dst += count;
1372 len -= count;
1373 }
1374 return dst_arg;
1375 }
1376
1377 /*
1378 * Dispatch a packet to all the listeners on interface bp.
1379 *
1380 * pkt pointer to the packet, either a data buffer or an mbuf chain
1381 * buflen buffer length, if pkt is a data buffer
1382 * cpfn a function that can copy pkt into the listener's buffer
1383 * pktlen length of the packet
1384 * rcv true if packet came in
1385 */
1386 static inline void
1387 bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t),
1388 void *pkt, u_int pktlen, u_int buflen, const bool rcv)
1389 {
1390 struct timespec ts;
1391 bpf_args_t args = {
1392 .pkt = (const uint8_t *)pkt,
1393 .wirelen = pktlen,
1394 .buflen = buflen,
1395 .mem = NULL,
1396 .arg = NULL
1397 };
1398 struct bpf_d *d;
1399
1400 const bpf_ctx_t *bc = NULL;
1401 bool gottime = false;
1402
1403 /*
1404 * Note that the IPL does not have to be raised at this point.
1405 * The only problem that could arise here is that if two different
1406 * interfaces shared any data. This is not the case.
1407 */
1408 for (d = bp->bif_dlist; d != NULL; d = d->bd_next) {
1409 u_int slen;
1410
1411 if (!d->bd_seesent && !rcv) {
1412 continue;
1413 }
1414 d->bd_rcount++;
1415 bpf_gstats.bs_recv++;
1416
1417 if (d->bd_jitcode)
1418 slen = d->bd_jitcode(bc, &args);
1419 else
1420 slen = bpf_filter_ext(bc, d->bd_filter, &args);
1421
1422 if (!slen) {
1423 continue;
1424 }
1425 if (!gottime) {
1426 gottime = true;
1427 nanotime(&ts);
1428 }
1429 catchpacket(d, pkt, pktlen, slen, cpfn, &ts);
1430 }
1431 }
1432
1433 /*
1434 * Incoming linkage from device drivers. Process the packet pkt, of length
1435 * pktlen, which is stored in a contiguous buffer. The packet is parsed
1436 * by each process' filter, and if accepted, stashed into the corresponding
1437 * buffer.
1438 */
1439 static void
1440 _bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1441 {
1442
1443 bpf_deliver(bp, memcpy, pkt, pktlen, pktlen, true);
1444 }
1445
1446 /*
1447 * Incoming linkage from device drivers, when the head of the packet is in
1448 * a buffer, and the tail is in an mbuf chain.
1449 */
1450 static void
1451 _bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1452 {
1453 u_int pktlen;
1454 struct mbuf mb;
1455
1456 /* Skip outgoing duplicate packets. */
1457 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1458 m->m_flags &= ~M_PROMISC;
1459 return;
1460 }
1461
1462 pktlen = m_length(m) + dlen;
1463
1464 /*
1465 * Craft on-stack mbuf suitable for passing to bpf_filter.
1466 * Note that we cut corners here; we only setup what's
1467 * absolutely needed--this mbuf should never go anywhere else.
1468 */
1469 (void)memset(&mb, 0, sizeof(mb));
1470 mb.m_next = m;
1471 mb.m_data = data;
1472 mb.m_len = dlen;
1473
1474 bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, m->m_pkthdr.rcvif != NULL);
1475 }
1476
1477 /*
1478 * Incoming linkage from device drivers, when packet is in an mbuf chain.
1479 */
1480 static void
1481 _bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1482 {
1483 void *(*cpfn)(void *, const void *, size_t);
1484 u_int pktlen, buflen;
1485 void *marg;
1486
1487 /* Skip outgoing duplicate packets. */
1488 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1489 m->m_flags &= ~M_PROMISC;
1490 return;
1491 }
1492
1493 pktlen = m_length(m);
1494
1495 if (pktlen == m->m_len) {
1496 cpfn = (void *)memcpy;
1497 marg = mtod(m, void *);
1498 buflen = pktlen;
1499 } else {
1500 cpfn = bpf_mcpy;
1501 marg = m;
1502 buflen = 0;
1503 }
1504
1505 bpf_deliver(bp, cpfn, marg, pktlen, buflen, m->m_pkthdr.rcvif != NULL);
1506 }
1507
1508 /*
1509 * We need to prepend the address family as
1510 * a four byte field. Cons up a dummy header
1511 * to pacify bpf. This is safe because bpf
1512 * will only read from the mbuf (i.e., it won't
1513 * try to free it or keep a pointer a to it).
1514 */
1515 static void
1516 _bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m)
1517 {
1518 struct mbuf m0;
1519
1520 m0.m_flags = 0;
1521 m0.m_next = m;
1522 m0.m_len = 4;
1523 m0.m_data = (char *)⁡
1524
1525 _bpf_mtap(bp, &m0);
1526 }
1527
1528 /*
1529 * Put the SLIP pseudo-"link header" in place.
1530 * Note this M_PREPEND() should never fail,
1531 * swince we know we always have enough space
1532 * in the input buffer.
1533 */
1534 static void
1535 _bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m)
1536 {
1537 int s;
1538 u_char *hp;
1539
1540 M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT);
1541 if (*m == NULL)
1542 return;
1543
1544 hp = mtod(*m, u_char *);
1545 hp[SLX_DIR] = SLIPDIR_IN;
1546 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
1547
1548 s = splnet();
1549 _bpf_mtap(bp, *m);
1550 splx(s);
1551
1552 m_adj(*m, SLIP_HDRLEN);
1553 }
1554
1555 /*
1556 * Put the SLIP pseudo-"link header" in
1557 * place. The compressed header is now
1558 * at the beginning of the mbuf.
1559 */
1560 static void
1561 _bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m)
1562 {
1563 struct mbuf m0;
1564 u_char *hp;
1565 int s;
1566
1567 m0.m_flags = 0;
1568 m0.m_next = m;
1569 m0.m_data = m0.m_dat;
1570 m0.m_len = SLIP_HDRLEN;
1571
1572 hp = mtod(&m0, u_char *);
1573
1574 hp[SLX_DIR] = SLIPDIR_OUT;
1575 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
1576
1577 s = splnet();
1578 _bpf_mtap(bp, &m0);
1579 splx(s);
1580 m_freem(m);
1581 }
1582
1583 static int
1584 bpf_hdrlen(struct bpf_d *d)
1585 {
1586 int hdrlen = d->bd_bif->bif_hdrlen;
1587 /*
1588 * Compute the length of the bpf header. This is not necessarily
1589 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1590 * that the network layer header begins on a longword boundary (for
1591 * performance reasons and to alleviate alignment restrictions).
1592 */
1593 #ifdef _LP64
1594 if (d->bd_compat32)
1595 return (BPF_WORDALIGN32(hdrlen + SIZEOF_BPF_HDR32) - hdrlen);
1596 else
1597 #endif
1598 return (BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen);
1599 }
1600
1601 /*
1602 * Move the packet data from interface memory (pkt) into the
1603 * store buffer. Call the wakeup functions if it's time to wakeup
1604 * a listener (buffer full), "cpfn" is the routine called to do the
1605 * actual data transfer. memcpy is passed in to copy contiguous chunks,
1606 * while bpf_mcpy is passed in to copy mbuf chains. In the latter case,
1607 * pkt is really an mbuf.
1608 */
1609 static void
1610 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1611 void *(*cpfn)(void *, const void *, size_t), struct timespec *ts)
1612 {
1613 char *h;
1614 int totlen, curlen, caplen;
1615 int hdrlen = bpf_hdrlen(d);
1616 int do_wakeup = 0;
1617
1618 ++d->bd_ccount;
1619 ++bpf_gstats.bs_capt;
1620 /*
1621 * Figure out how many bytes to move. If the packet is
1622 * greater or equal to the snapshot length, transfer that
1623 * much. Otherwise, transfer the whole packet (unless
1624 * we hit the buffer size limit).
1625 */
1626 totlen = hdrlen + min(snaplen, pktlen);
1627 if (totlen > d->bd_bufsize)
1628 totlen = d->bd_bufsize;
1629 /*
1630 * If we adjusted totlen to fit the bufsize, it could be that
1631 * totlen is smaller than hdrlen because of the link layer header.
1632 */
1633 caplen = totlen - hdrlen;
1634 if (caplen < 0)
1635 caplen = 0;
1636
1637 /*
1638 * Round up the end of the previous packet to the next longword.
1639 */
1640 #ifdef _LP64
1641 if (d->bd_compat32)
1642 curlen = BPF_WORDALIGN32(d->bd_slen);
1643 else
1644 #endif
1645 curlen = BPF_WORDALIGN(d->bd_slen);
1646 if (curlen + totlen > d->bd_bufsize) {
1647 /*
1648 * This packet will overflow the storage buffer.
1649 * Rotate the buffers if we can, then wakeup any
1650 * pending reads.
1651 */
1652 if (d->bd_fbuf == 0) {
1653 /*
1654 * We haven't completed the previous read yet,
1655 * so drop the packet.
1656 */
1657 ++d->bd_dcount;
1658 ++bpf_gstats.bs_drop;
1659 return;
1660 }
1661 ROTATE_BUFFERS(d);
1662 do_wakeup = 1;
1663 curlen = 0;
1664 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
1665 /*
1666 * Immediate mode is set, or the read timeout has
1667 * already expired during a select call. A packet
1668 * arrived, so the reader should be woken up.
1669 */
1670 do_wakeup = 1;
1671 }
1672
1673 /*
1674 * Append the bpf header.
1675 */
1676 h = (char *)d->bd_sbuf + curlen;
1677 #ifdef _LP64
1678 if (d->bd_compat32) {
1679 struct bpf_hdr32 *hp32;
1680
1681 hp32 = (struct bpf_hdr32 *)h;
1682 hp32->bh_tstamp.tv_sec = ts->tv_sec;
1683 hp32->bh_tstamp.tv_usec = ts->tv_nsec / 1000;
1684 hp32->bh_datalen = pktlen;
1685 hp32->bh_hdrlen = hdrlen;
1686 hp32->bh_caplen = caplen;
1687 } else
1688 #endif
1689 {
1690 struct bpf_hdr *hp;
1691
1692 hp = (struct bpf_hdr *)h;
1693 hp->bh_tstamp.tv_sec = ts->tv_sec;
1694 hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000;
1695 hp->bh_datalen = pktlen;
1696 hp->bh_hdrlen = hdrlen;
1697 hp->bh_caplen = caplen;
1698 }
1699
1700 /*
1701 * Copy the packet data into the store buffer and update its length.
1702 */
1703 (*cpfn)(h + hdrlen, pkt, caplen);
1704 d->bd_slen = curlen + totlen;
1705
1706 /*
1707 * Call bpf_wakeup after bd_slen has been updated so that kevent(2)
1708 * will cause filt_bpfread() to be called with it adjusted.
1709 */
1710 if (do_wakeup)
1711 bpf_wakeup(d);
1712 }
1713
1714 /*
1715 * Initialize all nonzero fields of a descriptor.
1716 */
1717 static int
1718 bpf_allocbufs(struct bpf_d *d)
1719 {
1720
1721 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK | M_CANFAIL);
1722 if (!d->bd_fbuf)
1723 return (ENOBUFS);
1724 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK | M_CANFAIL);
1725 if (!d->bd_sbuf) {
1726 free(d->bd_fbuf, M_DEVBUF);
1727 return (ENOBUFS);
1728 }
1729 d->bd_slen = 0;
1730 d->bd_hlen = 0;
1731 return (0);
1732 }
1733
1734 /*
1735 * Free buffers currently in use by a descriptor.
1736 * Called on close.
1737 */
1738 static void
1739 bpf_freed(struct bpf_d *d)
1740 {
1741 /*
1742 * We don't need to lock out interrupts since this descriptor has
1743 * been detached from its interface and it yet hasn't been marked
1744 * free.
1745 */
1746 if (d->bd_sbuf != NULL) {
1747 free(d->bd_sbuf, M_DEVBUF);
1748 if (d->bd_hbuf != NULL)
1749 free(d->bd_hbuf, M_DEVBUF);
1750 if (d->bd_fbuf != NULL)
1751 free(d->bd_fbuf, M_DEVBUF);
1752 }
1753 if (d->bd_filter)
1754 free(d->bd_filter, M_DEVBUF);
1755
1756 if (d->bd_jitcode != NULL) {
1757 bpf_jit_freecode(d->bd_jitcode);
1758 }
1759 }
1760
1761 /*
1762 * Attach an interface to bpf. dlt is the link layer type;
1763 * hdrlen is the fixed size of the link header for the specified dlt
1764 * (variable length headers not yet supported).
1765 */
1766 static void
1767 _bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1768 {
1769 struct bpf_if *bp;
1770 bp = malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT);
1771 if (bp == 0)
1772 panic("bpfattach");
1773
1774 bp->bif_dlist = 0;
1775 bp->bif_driverp = driverp;
1776 bp->bif_ifp = ifp;
1777 bp->bif_dlt = dlt;
1778
1779 bp->bif_next = bpf_iflist;
1780 bpf_iflist = bp;
1781
1782 *bp->bif_driverp = 0;
1783
1784 bp->bif_hdrlen = hdrlen;
1785 #if 0
1786 printf("bpf: %s attached\n", ifp->if_xname);
1787 #endif
1788 }
1789
1790 /*
1791 * Remove an interface from bpf.
1792 */
1793 static void
1794 _bpfdetach(struct ifnet *ifp)
1795 {
1796 struct bpf_if *bp, **pbp;
1797 struct bpf_d *d;
1798 int s;
1799
1800 /* Nuke the vnodes for any open instances */
1801 LIST_FOREACH(d, &bpf_list, bd_list) {
1802 if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) {
1803 /*
1804 * Detach the descriptor from an interface now.
1805 * It will be free'ed later by close routine.
1806 */
1807 s = splnet();
1808 d->bd_promisc = 0; /* we can't touch device. */
1809 bpf_detachd(d);
1810 splx(s);
1811 }
1812 }
1813
1814 again:
1815 for (bp = bpf_iflist, pbp = &bpf_iflist;
1816 bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) {
1817 if (bp->bif_ifp == ifp) {
1818 *pbp = bp->bif_next;
1819 free(bp, M_DEVBUF);
1820 goto again;
1821 }
1822 }
1823 }
1824
1825 /*
1826 * Change the data link type of a interface.
1827 */
1828 static void
1829 _bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1830 {
1831 struct bpf_if *bp;
1832
1833 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1834 if (bp->bif_driverp == &ifp->if_bpf)
1835 break;
1836 }
1837 if (bp == NULL)
1838 panic("bpf_change_type");
1839
1840 bp->bif_dlt = dlt;
1841
1842 bp->bif_hdrlen = hdrlen;
1843 }
1844
1845 /*
1846 * Get a list of available data link type of the interface.
1847 */
1848 static int
1849 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1850 {
1851 int n, error;
1852 struct ifnet *ifp;
1853 struct bpf_if *bp;
1854
1855 ifp = d->bd_bif->bif_ifp;
1856 n = 0;
1857 error = 0;
1858 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1859 if (bp->bif_ifp != ifp)
1860 continue;
1861 if (bfl->bfl_list != NULL) {
1862 if (n >= bfl->bfl_len)
1863 return ENOMEM;
1864 error = copyout(&bp->bif_dlt,
1865 bfl->bfl_list + n, sizeof(u_int));
1866 }
1867 n++;
1868 }
1869 bfl->bfl_len = n;
1870 return error;
1871 }
1872
1873 /*
1874 * Set the data link type of a BPF instance.
1875 */
1876 static int
1877 bpf_setdlt(struct bpf_d *d, u_int dlt)
1878 {
1879 int s, error, opromisc;
1880 struct ifnet *ifp;
1881 struct bpf_if *bp;
1882
1883 if (d->bd_bif->bif_dlt == dlt)
1884 return 0;
1885 ifp = d->bd_bif->bif_ifp;
1886 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1887 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1888 break;
1889 }
1890 if (bp == NULL)
1891 return EINVAL;
1892 s = splnet();
1893 opromisc = d->bd_promisc;
1894 bpf_detachd(d);
1895 bpf_attachd(d, bp);
1896 reset_d(d);
1897 if (opromisc) {
1898 error = ifpromisc(bp->bif_ifp, 1);
1899 if (error)
1900 printf("%s: bpf_setdlt: ifpromisc failed (%d)\n",
1901 bp->bif_ifp->if_xname, error);
1902 else
1903 d->bd_promisc = 1;
1904 }
1905 splx(s);
1906 return 0;
1907 }
1908
1909 static int
1910 sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS)
1911 {
1912 int newsize, error;
1913 struct sysctlnode node;
1914
1915 node = *rnode;
1916 node.sysctl_data = &newsize;
1917 newsize = bpf_maxbufsize;
1918 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1919 if (error || newp == NULL)
1920 return (error);
1921
1922 if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE)
1923 return (EINVAL);
1924
1925 bpf_maxbufsize = newsize;
1926
1927 return (0);
1928 }
1929
1930 static int
1931 sysctl_net_bpf_jit(SYSCTLFN_ARGS)
1932 {
1933 bool newval;
1934 int error;
1935 struct sysctlnode node;
1936
1937 node = *rnode;
1938 node.sysctl_data = &newval;
1939 newval = bpf_jit;
1940 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1941 if (error != 0 || newp == NULL)
1942 return error;
1943
1944 bpf_jit = newval;
1945
1946 /*
1947 * Do a full sync to publish new bpf_jit value and
1948 * update bpfjit_module_ops.bj_generate_code variable.
1949 */
1950 membar_sync();
1951
1952 if (newval && bpfjit_module_ops.bj_generate_code == NULL) {
1953 printf("WARNING jit activation is postponed "
1954 "until after bpfjit module is loaded\n");
1955 }
1956
1957 return 0;
1958 }
1959
1960 static int
1961 sysctl_net_bpf_peers(SYSCTLFN_ARGS)
1962 {
1963 int error, elem_count;
1964 struct bpf_d *dp;
1965 struct bpf_d_ext dpe;
1966 size_t len, needed, elem_size, out_size;
1967 char *sp;
1968
1969 if (namelen == 1 && name[0] == CTL_QUERY)
1970 return (sysctl_query(SYSCTLFN_CALL(rnode)));
1971
1972 if (namelen != 2)
1973 return (EINVAL);
1974
1975 /* BPF peers is privileged information. */
1976 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
1977 KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL);
1978 if (error)
1979 return (EPERM);
1980
1981 len = (oldp != NULL) ? *oldlenp : 0;
1982 sp = oldp;
1983 elem_size = name[0];
1984 elem_count = name[1];
1985 out_size = MIN(sizeof(dpe), elem_size);
1986 needed = 0;
1987
1988 if (elem_size < 1 || elem_count < 0)
1989 return (EINVAL);
1990
1991 mutex_enter(&bpf_mtx);
1992 LIST_FOREACH(dp, &bpf_list, bd_list) {
1993 if (len >= elem_size && elem_count > 0) {
1994 #define BPF_EXT(field) dpe.bde_ ## field = dp->bd_ ## field
1995 BPF_EXT(bufsize);
1996 BPF_EXT(promisc);
1997 BPF_EXT(state);
1998 BPF_EXT(immediate);
1999 BPF_EXT(hdrcmplt);
2000 BPF_EXT(seesent);
2001 BPF_EXT(pid);
2002 BPF_EXT(rcount);
2003 BPF_EXT(dcount);
2004 BPF_EXT(ccount);
2005 #undef BPF_EXT
2006 if (dp->bd_bif)
2007 (void)strlcpy(dpe.bde_ifname,
2008 dp->bd_bif->bif_ifp->if_xname,
2009 IFNAMSIZ - 1);
2010 else
2011 dpe.bde_ifname[0] = '\0';
2012
2013 error = copyout(&dpe, sp, out_size);
2014 if (error)
2015 break;
2016 sp += elem_size;
2017 len -= elem_size;
2018 }
2019 needed += elem_size;
2020 if (elem_count > 0 && elem_count != INT_MAX)
2021 elem_count--;
2022 }
2023 mutex_exit(&bpf_mtx);
2024
2025 *oldlenp = needed;
2026
2027 return (error);
2028 }
2029
2030 static struct sysctllog *bpf_sysctllog;
2031 static void
2032 sysctl_net_bpf_setup(void)
2033 {
2034 const struct sysctlnode *node;
2035
2036 node = NULL;
2037 sysctl_createv(&bpf_sysctllog, 0, NULL, &node,
2038 CTLFLAG_PERMANENT,
2039 CTLTYPE_NODE, "bpf",
2040 SYSCTL_DESCR("BPF options"),
2041 NULL, 0, NULL, 0,
2042 CTL_NET, CTL_CREATE, CTL_EOL);
2043 if (node != NULL) {
2044 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2045 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2046 CTLTYPE_BOOL, "jit",
2047 SYSCTL_DESCR("Toggle Just-In-Time compilation"),
2048 sysctl_net_bpf_jit, 0, &bpf_jit, 0,
2049 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2050 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2051 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2052 CTLTYPE_INT, "maxbufsize",
2053 SYSCTL_DESCR("Maximum size for data capture buffer"),
2054 sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0,
2055 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2056 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2057 CTLFLAG_PERMANENT,
2058 CTLTYPE_STRUCT, "stats",
2059 SYSCTL_DESCR("BPF stats"),
2060 NULL, 0, &bpf_gstats, sizeof(bpf_gstats),
2061 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2062 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL,
2063 CTLFLAG_PERMANENT,
2064 CTLTYPE_STRUCT, "peers",
2065 SYSCTL_DESCR("BPF peers"),
2066 sysctl_net_bpf_peers, 0, NULL, 0,
2067 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2068 }
2069
2070 }
2071
2072 struct bpf_ops bpf_ops_kernel = {
2073 .bpf_attach = _bpfattach,
2074 .bpf_detach = _bpfdetach,
2075 .bpf_change_type = _bpf_change_type,
2076
2077 .bpf_tap = _bpf_tap,
2078 .bpf_mtap = _bpf_mtap,
2079 .bpf_mtap2 = _bpf_mtap2,
2080 .bpf_mtap_af = _bpf_mtap_af,
2081 .bpf_mtap_sl_in = _bpf_mtap_sl_in,
2082 .bpf_mtap_sl_out = _bpf_mtap_sl_out,
2083 };
2084
2085 MODULE(MODULE_CLASS_DRIVER, bpf, NULL);
2086
2087 static int
2088 bpf_modcmd(modcmd_t cmd, void *arg)
2089 {
2090 devmajor_t bmajor, cmajor;
2091 int error;
2092
2093 bmajor = cmajor = NODEVMAJOR;
2094
2095 switch (cmd) {
2096 case MODULE_CMD_INIT:
2097 bpfilterattach(0);
2098 error = devsw_attach("bpf", NULL, &bmajor,
2099 &bpf_cdevsw, &cmajor);
2100 if (error == EEXIST)
2101 error = 0; /* maybe built-in ... improve eventually */
2102 if (error)
2103 break;
2104
2105 bpf_ops_handover_enter(&bpf_ops_kernel);
2106 atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel);
2107 bpf_ops_handover_exit();
2108 sysctl_net_bpf_setup();
2109 break;
2110
2111 case MODULE_CMD_FINI:
2112 /*
2113 * While there is no reference counting for bpf callers,
2114 * unload could at least in theory be done similarly to
2115 * system call disestablishment. This should even be
2116 * a little simpler:
2117 *
2118 * 1) replace op vector with stubs
2119 * 2) post update to all cpus with xc
2120 * 3) check that nobody is in bpf anymore
2121 * (it's doubtful we'd want something like l_sysent,
2122 * but we could do something like *signed* percpu
2123 * counters. if the sum is 0, we're good).
2124 * 4) if fail, unroll changes
2125 *
2126 * NOTE: change won't be atomic to the outside. some
2127 * packets may be not captured even if unload is
2128 * not succesful. I think packet capture not working
2129 * is a perfectly logical consequence of trying to
2130 * disable packet capture.
2131 */
2132 error = EOPNOTSUPP;
2133 /* insert sysctl teardown */
2134 break;
2135
2136 default:
2137 error = ENOTTY;
2138 break;
2139 }
2140
2141 return error;
2142 }
2143