if_ethersubr.c revision 1.319 1 /* $NetBSD: if_ethersubr.c,v 1.319 2022/09/03 02:24:59 thorpej Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)if_ethersubr.c 8.2 (Berkeley) 4/4/96
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: if_ethersubr.c,v 1.319 2022/09/03 02:24:59 thorpej Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_inet.h"
68 #include "opt_atalk.h"
69 #include "opt_mbuftrace.h"
70 #include "opt_mpls.h"
71 #include "opt_gateway.h"
72 #include "opt_pppoe.h"
73 #include "opt_net_mpsafe.h"
74 #endif
75
76 #include "vlan.h"
77 #include "pppoe.h"
78 #include "bridge.h"
79 #include "arp.h"
80 #include "agr.h"
81
82 #include <sys/sysctl.h>
83 #include <sys/mbuf.h>
84 #include <sys/mutex.h>
85 #include <sys/ioctl.h>
86 #include <sys/errno.h>
87 #include <sys/device.h>
88 #include <sys/entropy.h>
89 #include <sys/rndsource.h>
90 #include <sys/cpu.h>
91 #include <sys/kmem.h>
92 #include <sys/hook.h>
93
94 #include <net/if.h>
95 #include <net/netisr.h>
96 #include <net/route.h>
97 #include <net/if_llc.h>
98 #include <net/if_dl.h>
99 #include <net/if_types.h>
100 #include <net/pktqueue.h>
101
102 #include <net/if_media.h>
103 #include <dev/mii/mii.h>
104 #include <dev/mii/miivar.h>
105
106 #if NARP == 0
107 /*
108 * XXX there should really be a way to issue this warning from within config(8)
109 */
110 #error You have included NETATALK or a pseudo-device in your configuration that depends on the presence of ethernet interfaces, but have no such interfaces configured. Check if you really need pseudo-device bridge, pppoe, vlan or options NETATALK.
111 #endif
112
113 #include <net/bpf.h>
114
115 #include <net/if_ether.h>
116 #include <net/if_vlanvar.h>
117
118 #if NPPPOE > 0
119 #include <net/if_pppoe.h>
120 #endif
121
122 #if NAGR > 0
123 #include <net/ether_slowprotocols.h>
124 #include <net/agr/ieee8023ad.h>
125 #include <net/agr/if_agrvar.h>
126 #endif
127
128 #if NBRIDGE > 0
129 #include <net/if_bridgevar.h>
130 #endif
131
132 #include <netinet/in.h>
133 #ifdef INET
134 #include <netinet/in_var.h>
135 #endif
136 #include <netinet/if_inarp.h>
137
138 #ifdef INET6
139 #ifndef INET
140 #include <netinet/in.h>
141 #endif
142 #include <netinet6/in6_var.h>
143 #include <netinet6/nd6.h>
144 #endif
145
146 #include "carp.h"
147 #if NCARP > 0
148 #include <netinet/ip_carp.h>
149 #endif
150
151 #ifdef NETATALK
152 #include <netatalk/at.h>
153 #include <netatalk/at_var.h>
154 #include <netatalk/at_extern.h>
155
156 #define llc_snap_org_code llc_un.type_snap.org_code
157 #define llc_snap_ether_type llc_un.type_snap.ether_type
158
159 extern u_char at_org_code[3];
160 extern u_char aarp_org_code[3];
161 #endif /* NETATALK */
162
163 #ifdef MPLS
164 #include <netmpls/mpls.h>
165 #include <netmpls/mpls_var.h>
166 #endif
167
168 CTASSERT(sizeof(struct ether_addr) == 6);
169 CTASSERT(sizeof(struct ether_header) == 14);
170
171 #ifdef DIAGNOSTIC
172 static struct timeval bigpktppslim_last;
173 static int bigpktppslim = 2; /* XXX */
174 static int bigpktpps_count;
175 static kmutex_t bigpktpps_lock __cacheline_aligned;
176 #endif
177
178 const uint8_t etherbroadcastaddr[ETHER_ADDR_LEN] =
179 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
180 const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
181 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
182 #define senderr(e) { error = (e); goto bad;}
183
184 static pktq_rps_hash_func_t ether_pktq_rps_hash_p;
185
186 static int ether_output(struct ifnet *, struct mbuf *,
187 const struct sockaddr *, const struct rtentry *);
188
189 /*
190 * Ethernet output routine.
191 * Encapsulate a packet of type family for the local net.
192 * Assumes that ifp is actually pointer to ethercom structure.
193 */
194 static int
195 ether_output(struct ifnet * const ifp0, struct mbuf * const m0,
196 const struct sockaddr * const dst, const struct rtentry *rt)
197 {
198 uint8_t esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
199 uint16_t etype = 0;
200 int error = 0, hdrcmplt = 0;
201 struct mbuf *m = m0;
202 struct mbuf *mcopy = NULL;
203 struct ether_header *eh;
204 struct ifnet *ifp = ifp0;
205 #ifdef INET
206 struct arphdr *ah;
207 #endif
208 #ifdef NETATALK
209 struct at_ifaddr *aa;
210 #endif
211
212 #ifdef MBUFTRACE
213 m_claimm(m, ifp->if_mowner);
214 #endif
215
216 #if NCARP > 0
217 if (ifp->if_type == IFT_CARP) {
218 struct ifaddr *ifa;
219 int s = pserialize_read_enter();
220
221 /* loop back if this is going to the carp interface */
222 if (dst != NULL && ifp0->if_link_state == LINK_STATE_UP &&
223 (ifa = ifa_ifwithaddr(dst)) != NULL) {
224 if (ifa->ifa_ifp == ifp0) {
225 pserialize_read_exit(s);
226 return looutput(ifp0, m, dst, rt);
227 }
228 }
229 pserialize_read_exit(s);
230
231 ifp = ifp->if_carpdev;
232 /* ac = (struct arpcom *)ifp; */
233
234 if ((ifp0->if_flags & (IFF_UP | IFF_RUNNING)) !=
235 (IFF_UP | IFF_RUNNING))
236 senderr(ENETDOWN);
237 }
238 #endif
239
240 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING))
241 senderr(ENETDOWN);
242
243 switch (dst->sa_family) {
244
245 #ifdef INET
246 case AF_INET:
247 if (m->m_flags & M_BCAST) {
248 memcpy(edst, etherbroadcastaddr, sizeof(edst));
249 } else if (m->m_flags & M_MCAST) {
250 ETHER_MAP_IP_MULTICAST(&satocsin(dst)->sin_addr, edst);
251 } else {
252 error = arpresolve(ifp0, rt, m, dst, edst, sizeof(edst));
253 if (error)
254 return (error == EWOULDBLOCK) ? 0 : error;
255 }
256 /* If broadcasting on a simplex interface, loopback a copy */
257 if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
258 mcopy = m_copypacket(m, M_DONTWAIT);
259 etype = htons(ETHERTYPE_IP);
260 break;
261
262 case AF_ARP:
263 ah = mtod(m, struct arphdr *);
264 if (m->m_flags & M_BCAST) {
265 memcpy(edst, etherbroadcastaddr, sizeof(edst));
266 } else {
267 void *tha = ar_tha(ah);
268
269 if (tha == NULL) {
270 /* fake with ARPHRD_IEEE1394 */
271 m_freem(m);
272 return 0;
273 }
274 memcpy(edst, tha, sizeof(edst));
275 }
276
277 ah->ar_hrd = htons(ARPHRD_ETHER);
278
279 switch (ntohs(ah->ar_op)) {
280 case ARPOP_REVREQUEST:
281 case ARPOP_REVREPLY:
282 etype = htons(ETHERTYPE_REVARP);
283 break;
284
285 case ARPOP_REQUEST:
286 case ARPOP_REPLY:
287 default:
288 etype = htons(ETHERTYPE_ARP);
289 }
290 break;
291 #endif
292
293 #ifdef INET6
294 case AF_INET6:
295 if (m->m_flags & M_BCAST) {
296 memcpy(edst, etherbroadcastaddr, sizeof(edst));
297 } else if (m->m_flags & M_MCAST) {
298 ETHER_MAP_IPV6_MULTICAST(&satocsin6(dst)->sin6_addr,
299 edst);
300 } else {
301 error = nd6_resolve(ifp0, rt, m, dst, edst,
302 sizeof(edst));
303 if (error)
304 return (error == EWOULDBLOCK) ? 0 : error;
305 }
306 etype = htons(ETHERTYPE_IPV6);
307 break;
308 #endif
309
310 #ifdef NETATALK
311 case AF_APPLETALK: {
312 struct ifaddr *ifa;
313 int s;
314
315 KERNEL_LOCK(1, NULL);
316
317 if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst)) {
318 KERNEL_UNLOCK_ONE(NULL);
319 return 0;
320 }
321
322 /*
323 * ifaddr is the first thing in at_ifaddr
324 */
325 s = pserialize_read_enter();
326 ifa = at_ifawithnet((const struct sockaddr_at *)dst, ifp);
327 if (ifa == NULL) {
328 pserialize_read_exit(s);
329 KERNEL_UNLOCK_ONE(NULL);
330 senderr(EADDRNOTAVAIL);
331 }
332 aa = (struct at_ifaddr *)ifa;
333
334 /*
335 * In the phase 2 case, we need to prepend an mbuf for the
336 * llc header.
337 */
338 if (aa->aa_flags & AFA_PHASE2) {
339 struct llc llc;
340
341 M_PREPEND(m, sizeof(struct llc), M_DONTWAIT);
342 if (m == NULL) {
343 pserialize_read_exit(s);
344 KERNEL_UNLOCK_ONE(NULL);
345 senderr(ENOBUFS);
346 }
347
348 llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
349 llc.llc_control = LLC_UI;
350 memcpy(llc.llc_snap_org_code, at_org_code,
351 sizeof(llc.llc_snap_org_code));
352 llc.llc_snap_ether_type = htons(ETHERTYPE_ATALK);
353 memcpy(mtod(m, void *), &llc, sizeof(struct llc));
354 } else {
355 etype = htons(ETHERTYPE_ATALK);
356 }
357 pserialize_read_exit(s);
358 KERNEL_UNLOCK_ONE(NULL);
359 break;
360 }
361 #endif /* NETATALK */
362
363 case pseudo_AF_HDRCMPLT:
364 hdrcmplt = 1;
365 memcpy(esrc,
366 ((const struct ether_header *)dst->sa_data)->ether_shost,
367 sizeof(esrc));
368 /* FALLTHROUGH */
369
370 case AF_UNSPEC:
371 memcpy(edst,
372 ((const struct ether_header *)dst->sa_data)->ether_dhost,
373 sizeof(edst));
374 /* AF_UNSPEC doesn't swap the byte order of the ether_type. */
375 etype = ((const struct ether_header *)dst->sa_data)->ether_type;
376 break;
377
378 default:
379 printf("%s: can't handle af%d\n", ifp->if_xname,
380 dst->sa_family);
381 senderr(EAFNOSUPPORT);
382 }
383
384 #ifdef MPLS
385 {
386 struct m_tag *mtag;
387 mtag = m_tag_find(m, PACKET_TAG_MPLS);
388 if (mtag != NULL) {
389 /* Having the tag itself indicates it's MPLS */
390 etype = htons(ETHERTYPE_MPLS);
391 m_tag_delete(m, mtag);
392 }
393 }
394 #endif
395
396 if (mcopy)
397 (void)looutput(ifp, mcopy, dst, rt);
398
399 KASSERT((m->m_flags & M_PKTHDR) != 0);
400
401 /*
402 * If no ether type is set, this must be a 802.2 formatted packet.
403 */
404 if (etype == 0)
405 etype = htons(m->m_pkthdr.len);
406
407 /*
408 * Add local net header. If no space in first mbuf, allocate another.
409 */
410 M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
411 if (m == NULL)
412 senderr(ENOBUFS);
413
414 eh = mtod(m, struct ether_header *);
415 /* Note: etype is already in network byte order. */
416 memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
417 memcpy(eh->ether_dhost, edst, sizeof(edst));
418 if (hdrcmplt) {
419 memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost));
420 } else {
421 memcpy(eh->ether_shost, CLLADDR(ifp->if_sadl),
422 sizeof(eh->ether_shost));
423 }
424
425 #if NCARP > 0
426 if (ifp0 != ifp && ifp0->if_type == IFT_CARP) {
427 memcpy(eh->ether_shost, CLLADDR(ifp0->if_sadl),
428 sizeof(eh->ether_shost));
429 }
430 #endif
431
432 if ((error = pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_OUT)) != 0)
433 return error;
434 if (m == NULL)
435 return 0;
436
437 #if NBRIDGE > 0
438 /*
439 * Bridges require special output handling.
440 */
441 if (ifp->if_bridge)
442 return bridge_output(ifp, m, NULL, NULL);
443 #endif
444
445 #if NCARP > 0
446 if (ifp != ifp0)
447 if_statadd(ifp0, if_obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
448 #endif
449
450 #ifdef ALTQ
451 KERNEL_LOCK(1, NULL);
452 /*
453 * If ALTQ is enabled on the parent interface, do
454 * classification; the queueing discipline might not
455 * require classification, but might require the
456 * address family/header pointer in the pktattr.
457 */
458 if (ALTQ_IS_ENABLED(&ifp->if_snd))
459 altq_etherclassify(&ifp->if_snd, m);
460 KERNEL_UNLOCK_ONE(NULL);
461 #endif
462 return ifq_enqueue(ifp, m);
463
464 bad:
465 if_statinc(ifp, if_oerrors);
466 if (m)
467 m_freem(m);
468 return error;
469 }
470
471 #ifdef ALTQ
472 /*
473 * This routine is a slight hack to allow a packet to be classified
474 * if the Ethernet headers are present. It will go away when ALTQ's
475 * classification engine understands link headers.
476 *
477 * XXX: We may need to do m_pullups here. First to ensure struct ether_header
478 * is indeed contiguous, then to read the LLC and so on.
479 */
480 void
481 altq_etherclassify(struct ifaltq *ifq, struct mbuf *m)
482 {
483 struct ether_header *eh;
484 struct mbuf *mtop = m;
485 uint16_t ether_type;
486 int hlen, af, hdrsize;
487 void *hdr;
488
489 KASSERT((mtop->m_flags & M_PKTHDR) != 0);
490
491 hlen = ETHER_HDR_LEN;
492 eh = mtod(m, struct ether_header *);
493
494 ether_type = htons(eh->ether_type);
495
496 if (ether_type < ETHERMTU) {
497 /* LLC/SNAP */
498 struct llc *llc = (struct llc *)(eh + 1);
499 hlen += 8;
500
501 if (m->m_len < hlen ||
502 llc->llc_dsap != LLC_SNAP_LSAP ||
503 llc->llc_ssap != LLC_SNAP_LSAP ||
504 llc->llc_control != LLC_UI) {
505 /* Not SNAP. */
506 goto bad;
507 }
508
509 ether_type = htons(llc->llc_un.type_snap.ether_type);
510 }
511
512 switch (ether_type) {
513 case ETHERTYPE_IP:
514 af = AF_INET;
515 hdrsize = 20; /* sizeof(struct ip) */
516 break;
517
518 case ETHERTYPE_IPV6:
519 af = AF_INET6;
520 hdrsize = 40; /* sizeof(struct ip6_hdr) */
521 break;
522
523 default:
524 af = AF_UNSPEC;
525 hdrsize = 0;
526 break;
527 }
528
529 while (m->m_len <= hlen) {
530 hlen -= m->m_len;
531 m = m->m_next;
532 if (m == NULL)
533 goto bad;
534 }
535
536 if (m->m_len < (hlen + hdrsize)) {
537 /*
538 * protocol header not in a single mbuf.
539 * We can't cope with this situation right
540 * now (but it shouldn't ever happen, really, anyhow).
541 */
542 #ifdef DEBUG
543 printf("altq_etherclassify: headers span multiple mbufs: "
544 "%d < %d\n", m->m_len, (hlen + hdrsize));
545 #endif
546 goto bad;
547 }
548
549 m->m_data += hlen;
550 m->m_len -= hlen;
551
552 hdr = mtod(m, void *);
553
554 if (ALTQ_NEEDS_CLASSIFY(ifq)) {
555 mtop->m_pkthdr.pattr_class =
556 (*ifq->altq_classify)(ifq->altq_clfier, m, af);
557 }
558 mtop->m_pkthdr.pattr_af = af;
559 mtop->m_pkthdr.pattr_hdr = hdr;
560
561 m->m_data -= hlen;
562 m->m_len += hlen;
563
564 return;
565
566 bad:
567 mtop->m_pkthdr.pattr_class = NULL;
568 mtop->m_pkthdr.pattr_hdr = NULL;
569 mtop->m_pkthdr.pattr_af = AF_UNSPEC;
570 }
571 #endif /* ALTQ */
572
573 #if defined (LLC) || defined (NETATALK)
574 static void
575 ether_input_llc(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh)
576 {
577 pktqueue_t *pktq = NULL;
578 struct llc *l;
579
580 if (m->m_len < sizeof(*eh) + sizeof(struct llc))
581 goto error;
582
583 l = (struct llc *)(eh+1);
584 switch (l->llc_dsap) {
585 #ifdef NETATALK
586 case LLC_SNAP_LSAP:
587 switch (l->llc_control) {
588 case LLC_UI:
589 if (l->llc_ssap != LLC_SNAP_LSAP)
590 goto error;
591
592 if (memcmp(&(l->llc_snap_org_code)[0],
593 at_org_code, sizeof(at_org_code)) == 0 &&
594 ntohs(l->llc_snap_ether_type) ==
595 ETHERTYPE_ATALK) {
596 pktq = at_pktq2;
597 m_adj(m, sizeof(struct ether_header)
598 + sizeof(struct llc));
599 break;
600 }
601
602 if (memcmp(&(l->llc_snap_org_code)[0],
603 aarp_org_code,
604 sizeof(aarp_org_code)) == 0 &&
605 ntohs(l->llc_snap_ether_type) ==
606 ETHERTYPE_AARP) {
607 m_adj(m, sizeof(struct ether_header)
608 + sizeof(struct llc));
609 aarpinput(ifp, m); /* XXX queue? */
610 return;
611 }
612
613 default:
614 goto error;
615 }
616 break;
617 #endif
618 default:
619 goto noproto;
620 }
621
622 KASSERT(pktq != NULL);
623 if (__predict_false(!pktq_enqueue(pktq, m, 0))) {
624 m_freem(m);
625 }
626 return;
627
628 noproto:
629 m_freem(m);
630 if_statinc(ifp, if_noproto);
631 return;
632 error:
633 m_freem(m);
634 if_statinc(ifp, if_ierrors);
635 return;
636 }
637 #endif /* defined (LLC) || defined (NETATALK) */
638
639 /*
640 * Process a received Ethernet packet;
641 * the packet is in the mbuf chain m with
642 * the ether header.
643 */
644 void
645 ether_input(struct ifnet *ifp, struct mbuf *m)
646 {
647 #if NVLAN > 0 || defined(MBUFTRACE)
648 struct ethercom *ec = (struct ethercom *) ifp;
649 #endif
650 pktqueue_t *pktq = NULL;
651 uint16_t etype;
652 struct ether_header *eh;
653 size_t ehlen;
654 static int earlypkts;
655
656 /* No RPS for not-IP. */
657 pktq_rps_hash_func_t rps_hash = NULL;
658
659 KASSERT(!cpu_intr_p());
660 KASSERT((m->m_flags & M_PKTHDR) != 0);
661
662 if ((ifp->if_flags & IFF_UP) == 0)
663 goto drop;
664
665 #ifdef MBUFTRACE
666 m_claimm(m, &ec->ec_rx_mowner);
667 #endif
668
669 if (__predict_false(m->m_len < sizeof(*eh))) {
670 if ((m = m_pullup(m, sizeof(*eh))) == NULL) {
671 if_statinc(ifp, if_ierrors);
672 return;
673 }
674 }
675
676 eh = mtod(m, struct ether_header *);
677 etype = ntohs(eh->ether_type);
678 ehlen = sizeof(*eh);
679
680 if (__predict_false(earlypkts < 100 ||
681 entropy_epoch() == (unsigned)-1)) {
682 rnd_add_data(NULL, eh, ehlen, 0);
683 earlypkts++;
684 }
685
686 /*
687 * Determine if the packet is within its size limits. For MPLS the
688 * header length is variable, so we skip the check.
689 */
690 if (etype != ETHERTYPE_MPLS && m->m_pkthdr.len >
691 ETHER_MAX_FRAME(ifp, etype, m->m_flags & M_HASFCS)) {
692 #ifdef DIAGNOSTIC
693 mutex_enter(&bigpktpps_lock);
694 if (ppsratecheck(&bigpktppslim_last, &bigpktpps_count,
695 bigpktppslim)) {
696 printf("%s: discarding oversize frame (len=%d)\n",
697 ifp->if_xname, m->m_pkthdr.len);
698 }
699 mutex_exit(&bigpktpps_lock);
700 #endif
701 goto error;
702 }
703
704 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
705 /*
706 * If this is not a simplex interface, drop the packet
707 * if it came from us.
708 */
709 if ((ifp->if_flags & IFF_SIMPLEX) == 0 &&
710 memcmp(CLLADDR(ifp->if_sadl), eh->ether_shost,
711 ETHER_ADDR_LEN) == 0) {
712 goto drop;
713 }
714
715 if (memcmp(etherbroadcastaddr,
716 eh->ether_dhost, ETHER_ADDR_LEN) == 0)
717 m->m_flags |= M_BCAST;
718 else
719 m->m_flags |= M_MCAST;
720 if_statinc(ifp, if_imcasts);
721 }
722
723 /* If the CRC is still on the packet, trim it off. */
724 if (m->m_flags & M_HASFCS) {
725 m_adj(m, -ETHER_CRC_LEN);
726 m->m_flags &= ~M_HASFCS;
727 }
728
729 if_statadd(ifp, if_ibytes, m->m_pkthdr.len);
730
731 if (!vlan_has_tag(m) && etype == ETHERTYPE_VLAN) {
732 m = ether_strip_vlantag(m);
733 if (m == NULL) {
734 if_statinc(ifp, if_ierrors);
735 return;
736 }
737
738 eh = mtod(m, struct ether_header *);
739 etype = ntohs(eh->ether_type);
740 ehlen = sizeof(*eh);
741 }
742
743 if ((m->m_flags & (M_BCAST | M_MCAST | M_PROMISC)) == 0 &&
744 (ifp->if_flags & IFF_PROMISC) != 0 &&
745 memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
746 ETHER_ADDR_LEN) != 0) {
747 m->m_flags |= M_PROMISC;
748 }
749
750 if ((m->m_flags & M_PROMISC) == 0) {
751 if (pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_IN) != 0)
752 return;
753 if (m == NULL)
754 return;
755
756 eh = mtod(m, struct ether_header *);
757 etype = ntohs(eh->ether_type);
758 }
759
760 /*
761 * Processing a logical interfaces that are able
762 * to configure vlan(4).
763 */
764 #if NAGR > 0
765 if (ifp->if_lagg != NULL &&
766 __predict_true(etype != ETHERTYPE_SLOWPROTOCOLS)) {
767 m->m_flags &= ~M_PROMISC;
768 agr_input(ifp, m);
769 return;
770 }
771 #endif
772
773 /*
774 * VLAN processing.
775 *
776 * VLAN provides service delimiting so the frames are
777 * processed before other handlings. If a VLAN interface
778 * does not exist to take those frames, they're returned
779 * to ether_input().
780 */
781
782 if (vlan_has_tag(m)) {
783 if (EVL_VLANOFTAG(vlan_get_tag(m)) == 0) {
784 if (etype == ETHERTYPE_VLAN ||
785 etype == ETHERTYPE_QINQ)
786 goto drop;
787
788 /* XXX we should actually use the prio value? */
789 m->m_flags &= ~M_VLANTAG;
790 } else {
791 #if NVLAN > 0
792 if (ec->ec_nvlans > 0) {
793 m = vlan_input(ifp, m);
794
795 /* vlan_input() called ether_input() recursively */
796 if (m == NULL)
797 return;
798 }
799 #endif
800 /* drop VLAN frames not for this port. */
801 goto noproto;
802 }
803 }
804
805 #if NCARP > 0
806 if (__predict_false(ifp->if_carp && ifp->if_type != IFT_CARP)) {
807 /*
808 * Clear M_PROMISC, in case the packet comes from a
809 * vlan.
810 */
811 m->m_flags &= ~M_PROMISC;
812 if (carp_input(m, (uint8_t *)&eh->ether_shost,
813 (uint8_t *)&eh->ether_dhost, eh->ether_type) == 0)
814 return;
815 }
816 #endif
817
818 /*
819 * Handle protocols that expect to have the Ethernet header
820 * (and possibly FCS) intact.
821 */
822 switch (etype) {
823 #if NPPPOE > 0
824 case ETHERTYPE_PPPOEDISC:
825 pppoedisc_input(ifp, m);
826 return;
827
828 case ETHERTYPE_PPPOE:
829 pppoe_input(ifp, m);
830 return;
831 #endif
832
833 case ETHERTYPE_SLOWPROTOCOLS: {
834 uint8_t subtype;
835
836 if (m->m_pkthdr.len < sizeof(*eh) + sizeof(subtype))
837 goto error;
838
839 m_copydata(m, sizeof(*eh), sizeof(subtype), &subtype);
840 switch (subtype) {
841 #if NAGR > 0
842 case SLOWPROTOCOLS_SUBTYPE_LACP:
843 if (ifp->if_lagg != NULL) {
844 ieee8023ad_lacp_input(ifp, m);
845 return;
846 }
847 break;
848
849 case SLOWPROTOCOLS_SUBTYPE_MARKER:
850 if (ifp->if_lagg != NULL) {
851 ieee8023ad_marker_input(ifp, m);
852 return;
853 }
854 break;
855 #endif
856
857 default:
858 if (subtype == 0 || subtype > 10) {
859 /* illegal value */
860 goto error;
861 }
862 /* unknown subtype */
863 break;
864 }
865 }
866 /* FALLTHROUGH */
867 default:
868 if (m->m_flags & M_PROMISC)
869 goto drop;
870 }
871
872 /* If the CRC is still on the packet, trim it off. */
873 if (m->m_flags & M_HASFCS) {
874 m_adj(m, -ETHER_CRC_LEN);
875 m->m_flags &= ~M_HASFCS;
876 }
877
878 /* etype represents the size of the payload in this case */
879 if (etype <= ETHERMTU + sizeof(struct ether_header)) {
880 KASSERT(ehlen == sizeof(*eh));
881 #if defined (LLC) || defined (NETATALK)
882 ether_input_llc(ifp, m, eh);
883 return;
884 #else
885 /* ethertype of 0-1500 is regarded as noproto */
886 goto noproto;
887 #endif
888 }
889
890 /* Strip off the Ethernet header. */
891 m_adj(m, ehlen);
892
893 switch (etype) {
894 #ifdef INET
895 case ETHERTYPE_IP:
896 #ifdef GATEWAY
897 if (ipflow_fastforward(m))
898 return;
899 #endif
900 pktq = ip_pktq;
901 rps_hash = atomic_load_relaxed(ðer_pktq_rps_hash_p);
902 break;
903
904 case ETHERTYPE_ARP:
905 pktq = arp_pktq;
906 break;
907
908 case ETHERTYPE_REVARP:
909 revarpinput(m); /* XXX queue? */
910 return;
911 #endif
912
913 #ifdef INET6
914 case ETHERTYPE_IPV6:
915 if (__predict_false(!in6_present))
916 goto noproto;
917 #ifdef GATEWAY
918 if (ip6flow_fastforward(&m))
919 return;
920 #endif
921 pktq = ip6_pktq;
922 rps_hash = atomic_load_relaxed(ðer_pktq_rps_hash_p);
923 break;
924 #endif
925
926 #ifdef NETATALK
927 case ETHERTYPE_ATALK:
928 pktq = at_pktq1;
929 break;
930
931 case ETHERTYPE_AARP:
932 aarpinput(ifp, m); /* XXX queue? */
933 return;
934 #endif
935
936 #ifdef MPLS
937 case ETHERTYPE_MPLS:
938 pktq = mpls_pktq;
939 break;
940 #endif
941
942 default:
943 goto noproto;
944 }
945
946 KASSERT(pktq != NULL);
947 const uint32_t h = rps_hash ? pktq_rps_hash(&rps_hash, m) : 0;
948 if (__predict_false(!pktq_enqueue(pktq, m, h))) {
949 m_freem(m);
950 }
951 return;
952
953 drop:
954 m_freem(m);
955 if_statinc(ifp, if_iqdrops);
956 return;
957 noproto:
958 m_freem(m);
959 if_statinc(ifp, if_noproto);
960 return;
961 error:
962 m_freem(m);
963 if_statinc(ifp, if_ierrors);
964 return;
965 }
966
967 static void
968 ether_bpf_mtap(struct bpf_if *bp, struct mbuf *m, u_int direction)
969 {
970 struct ether_vlan_header evl;
971 struct m_hdr mh, md;
972
973 KASSERT(bp != NULL);
974
975 if (!vlan_has_tag(m)) {
976 bpf_mtap3(bp, m, direction);
977 return;
978 }
979
980 memcpy(&evl, mtod(m, char *), ETHER_HDR_LEN);
981 evl.evl_proto = evl.evl_encap_proto;
982 evl.evl_encap_proto = htons(ETHERTYPE_VLAN);
983 evl.evl_tag = htons(vlan_get_tag(m));
984
985 md.mh_flags = 0;
986 md.mh_data = m->m_data + ETHER_HDR_LEN;
987 md.mh_len = m->m_len - ETHER_HDR_LEN;
988 md.mh_next = m->m_next;
989
990 mh.mh_flags = 0;
991 mh.mh_data = (char *)&evl;
992 mh.mh_len = sizeof(evl);
993 mh.mh_next = (struct mbuf *)&md;
994
995 bpf_mtap3(bp, (struct mbuf *)&mh, direction);
996 }
997
998 /*
999 * Convert Ethernet address to printable (loggable) representation.
1000 */
1001 char *
1002 ether_sprintf(const u_char *ap)
1003 {
1004 static char etherbuf[3 * ETHER_ADDR_LEN];
1005 return ether_snprintf(etherbuf, sizeof(etherbuf), ap);
1006 }
1007
1008 char *
1009 ether_snprintf(char *buf, size_t len, const u_char *ap)
1010 {
1011 char *cp = buf;
1012 size_t i;
1013
1014 for (i = 0; i < len / 3; i++) {
1015 *cp++ = hexdigits[*ap >> 4];
1016 *cp++ = hexdigits[*ap++ & 0xf];
1017 *cp++ = ':';
1018 }
1019 *--cp = '\0';
1020 return buf;
1021 }
1022
1023 /*
1024 * Perform common duties while attaching to interface list
1025 */
1026 void
1027 ether_ifattach(struct ifnet *ifp, const uint8_t *lla)
1028 {
1029 struct ethercom *ec = (struct ethercom *)ifp;
1030 char xnamebuf[HOOKNAMSIZ];
1031
1032 ifp->if_type = IFT_ETHER;
1033 ifp->if_hdrlen = ETHER_HDR_LEN;
1034 ifp->if_dlt = DLT_EN10MB;
1035 ifp->if_mtu = ETHERMTU;
1036 ifp->if_output = ether_output;
1037 ifp->_if_input = ether_input;
1038 ifp->if_bpf_mtap = ether_bpf_mtap;
1039 if (ifp->if_baudrate == 0)
1040 ifp->if_baudrate = IF_Mbps(10); /* just a default */
1041
1042 if (lla != NULL)
1043 if_set_sadl(ifp, lla, ETHER_ADDR_LEN, !ETHER_IS_LOCAL(lla));
1044
1045 LIST_INIT(&ec->ec_multiaddrs);
1046 SIMPLEQ_INIT(&ec->ec_vids);
1047 ec->ec_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
1048 ec->ec_flags = 0;
1049 ifp->if_broadcastaddr = etherbroadcastaddr;
1050 bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1051 snprintf(xnamebuf, sizeof(xnamebuf),
1052 "%s-ether_ifdetachhooks", ifp->if_xname);
1053 ec->ec_ifdetach_hooks = simplehook_create(IPL_NET, xnamebuf);
1054 #ifdef MBUFTRACE
1055 mowner_init_owner(&ec->ec_tx_mowner, ifp->if_xname, "tx");
1056 mowner_init_owner(&ec->ec_rx_mowner, ifp->if_xname, "rx");
1057 MOWNER_ATTACH(&ec->ec_tx_mowner);
1058 MOWNER_ATTACH(&ec->ec_rx_mowner);
1059 ifp->if_mowner = &ec->ec_tx_mowner;
1060 #endif
1061 }
1062
1063 void
1064 ether_ifdetach(struct ifnet *ifp)
1065 {
1066 struct ethercom *ec = (void *) ifp;
1067 struct ether_multi *enm;
1068
1069 IFNET_ASSERT_UNLOCKED(ifp);
1070 /*
1071 * Prevent further calls to ioctl (for example turning off
1072 * promiscuous mode from the bridge code), which eventually can
1073 * call if_init() which can cause panics because the interface
1074 * is in the process of being detached. Return device not configured
1075 * instead.
1076 */
1077 ifp->if_ioctl = __FPTRCAST(int (*)(struct ifnet *, u_long, void *),
1078 enxio);
1079
1080 simplehook_dohooks(ec->ec_ifdetach_hooks);
1081 KASSERT(!simplehook_has_hooks(ec->ec_ifdetach_hooks));
1082 simplehook_destroy(ec->ec_ifdetach_hooks);
1083
1084 bpf_detach(ifp);
1085
1086 ETHER_LOCK(ec);
1087 KASSERT(ec->ec_nvlans == 0);
1088 while ((enm = LIST_FIRST(&ec->ec_multiaddrs)) != NULL) {
1089 LIST_REMOVE(enm, enm_list);
1090 kmem_free(enm, sizeof(*enm));
1091 ec->ec_multicnt--;
1092 }
1093 ETHER_UNLOCK(ec);
1094
1095 mutex_obj_free(ec->ec_lock);
1096 ec->ec_lock = NULL;
1097
1098 ifp->if_mowner = NULL;
1099 MOWNER_DETACH(&ec->ec_rx_mowner);
1100 MOWNER_DETACH(&ec->ec_tx_mowner);
1101 }
1102
1103 void *
1104 ether_ifdetachhook_establish(struct ifnet *ifp,
1105 void (*fn)(void *), void *arg)
1106 {
1107 struct ethercom *ec;
1108 khook_t *hk;
1109
1110 if (ifp->if_type != IFT_ETHER)
1111 return NULL;
1112
1113 ec = (struct ethercom *)ifp;
1114 hk = simplehook_establish(ec->ec_ifdetach_hooks,
1115 fn, arg);
1116
1117 return (void *)hk;
1118 }
1119
1120 void
1121 ether_ifdetachhook_disestablish(struct ifnet *ifp,
1122 void *vhook, kmutex_t *lock)
1123 {
1124 struct ethercom *ec;
1125
1126 if (vhook == NULL)
1127 return;
1128
1129 ec = (struct ethercom *)ifp;
1130 simplehook_disestablish(ec->ec_ifdetach_hooks, vhook, lock);
1131 }
1132
1133 #if 0
1134 /*
1135 * This is for reference. We have a table-driven version
1136 * of the little-endian crc32 generator, which is faster
1137 * than the double-loop.
1138 */
1139 uint32_t
1140 ether_crc32_le(const uint8_t *buf, size_t len)
1141 {
1142 uint32_t c, crc, carry;
1143 size_t i, j;
1144
1145 crc = 0xffffffffU; /* initial value */
1146
1147 for (i = 0; i < len; i++) {
1148 c = buf[i];
1149 for (j = 0; j < 8; j++) {
1150 carry = ((crc & 0x01) ? 1 : 0) ^ (c & 0x01);
1151 crc >>= 1;
1152 c >>= 1;
1153 if (carry)
1154 crc = (crc ^ ETHER_CRC_POLY_LE);
1155 }
1156 }
1157
1158 return (crc);
1159 }
1160 #else
1161 uint32_t
1162 ether_crc32_le(const uint8_t *buf, size_t len)
1163 {
1164 static const uint32_t crctab[] = {
1165 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
1166 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
1167 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
1168 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
1169 };
1170 uint32_t crc;
1171 size_t i;
1172
1173 crc = 0xffffffffU; /* initial value */
1174
1175 for (i = 0; i < len; i++) {
1176 crc ^= buf[i];
1177 crc = (crc >> 4) ^ crctab[crc & 0xf];
1178 crc = (crc >> 4) ^ crctab[crc & 0xf];
1179 }
1180
1181 return (crc);
1182 }
1183 #endif
1184
1185 uint32_t
1186 ether_crc32_be(const uint8_t *buf, size_t len)
1187 {
1188 uint32_t c, crc, carry;
1189 size_t i, j;
1190
1191 crc = 0xffffffffU; /* initial value */
1192
1193 for (i = 0; i < len; i++) {
1194 c = buf[i];
1195 for (j = 0; j < 8; j++) {
1196 carry = ((crc & 0x80000000U) ? 1 : 0) ^ (c & 0x01);
1197 crc <<= 1;
1198 c >>= 1;
1199 if (carry)
1200 crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
1201 }
1202 }
1203
1204 return (crc);
1205 }
1206
1207 #ifdef INET
1208 const uint8_t ether_ipmulticast_min[ETHER_ADDR_LEN] =
1209 { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 };
1210 const uint8_t ether_ipmulticast_max[ETHER_ADDR_LEN] =
1211 { 0x01, 0x00, 0x5e, 0x7f, 0xff, 0xff };
1212 #endif
1213 #ifdef INET6
1214 const uint8_t ether_ip6multicast_min[ETHER_ADDR_LEN] =
1215 { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 };
1216 const uint8_t ether_ip6multicast_max[ETHER_ADDR_LEN] =
1217 { 0x33, 0x33, 0xff, 0xff, 0xff, 0xff };
1218 #endif
1219
1220 /*
1221 * ether_aton implementation, not using a static buffer.
1222 */
1223 int
1224 ether_aton_r(u_char *dest, size_t len, const char *str)
1225 {
1226 const u_char *cp = (const void *)str;
1227 u_char *ep;
1228
1229 #define atox(c) (((c) <= '9') ? ((c) - '0') : ((toupper(c) - 'A') + 10))
1230
1231 if (len < ETHER_ADDR_LEN)
1232 return ENOSPC;
1233
1234 ep = dest + ETHER_ADDR_LEN;
1235
1236 while (*cp) {
1237 if (!isxdigit(*cp))
1238 return EINVAL;
1239
1240 *dest = atox(*cp);
1241 cp++;
1242 if (isxdigit(*cp)) {
1243 *dest = (*dest << 4) | atox(*cp);
1244 cp++;
1245 }
1246 dest++;
1247
1248 if (dest == ep)
1249 return (*cp == '\0') ? 0 : ENAMETOOLONG;
1250
1251 switch (*cp) {
1252 case ':':
1253 case '-':
1254 case '.':
1255 cp++;
1256 break;
1257 }
1258 }
1259 return ENOBUFS;
1260 }
1261
1262 /*
1263 * Convert a sockaddr into an Ethernet address or range of Ethernet
1264 * addresses.
1265 */
1266 int
1267 ether_multiaddr(const struct sockaddr *sa, uint8_t addrlo[ETHER_ADDR_LEN],
1268 uint8_t addrhi[ETHER_ADDR_LEN])
1269 {
1270 #ifdef INET
1271 const struct sockaddr_in *sin;
1272 #endif
1273 #ifdef INET6
1274 const struct sockaddr_in6 *sin6;
1275 #endif
1276
1277 switch (sa->sa_family) {
1278
1279 case AF_UNSPEC:
1280 memcpy(addrlo, sa->sa_data, ETHER_ADDR_LEN);
1281 memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1282 break;
1283
1284 #ifdef INET
1285 case AF_INET:
1286 sin = satocsin(sa);
1287 if (sin->sin_addr.s_addr == INADDR_ANY) {
1288 /*
1289 * An IP address of INADDR_ANY means listen to
1290 * or stop listening to all of the Ethernet
1291 * multicast addresses used for IP.
1292 * (This is for the sake of IP multicast routers.)
1293 */
1294 memcpy(addrlo, ether_ipmulticast_min, ETHER_ADDR_LEN);
1295 memcpy(addrhi, ether_ipmulticast_max, ETHER_ADDR_LEN);
1296 } else {
1297 ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo);
1298 memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1299 }
1300 break;
1301 #endif
1302 #ifdef INET6
1303 case AF_INET6:
1304 sin6 = satocsin6(sa);
1305 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1306 /*
1307 * An IP6 address of 0 means listen to or stop
1308 * listening to all of the Ethernet multicast
1309 * address used for IP6.
1310 * (This is used for multicast routers.)
1311 */
1312 memcpy(addrlo, ether_ip6multicast_min, ETHER_ADDR_LEN);
1313 memcpy(addrhi, ether_ip6multicast_max, ETHER_ADDR_LEN);
1314 } else {
1315 ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, addrlo);
1316 memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1317 }
1318 break;
1319 #endif
1320
1321 default:
1322 return EAFNOSUPPORT;
1323 }
1324 return 0;
1325 }
1326
1327 /*
1328 * Add an Ethernet multicast address or range of addresses to the list for a
1329 * given interface.
1330 */
1331 int
1332 ether_addmulti(const struct sockaddr *sa, struct ethercom *ec)
1333 {
1334 struct ether_multi *enm, *_enm;
1335 u_char addrlo[ETHER_ADDR_LEN];
1336 u_char addrhi[ETHER_ADDR_LEN];
1337 int error = 0;
1338
1339 /* Allocate out of lock */
1340 enm = kmem_alloc(sizeof(*enm), KM_SLEEP);
1341
1342 ETHER_LOCK(ec);
1343 error = ether_multiaddr(sa, addrlo, addrhi);
1344 if (error != 0)
1345 goto out;
1346
1347 /*
1348 * Verify that we have valid Ethernet multicast addresses.
1349 */
1350 if (!ETHER_IS_MULTICAST(addrlo) || !ETHER_IS_MULTICAST(addrhi)) {
1351 error = EINVAL;
1352 goto out;
1353 }
1354
1355 /*
1356 * See if the address range is already in the list.
1357 */
1358 _enm = ether_lookup_multi(addrlo, addrhi, ec);
1359 if (_enm != NULL) {
1360 /*
1361 * Found it; just increment the reference count.
1362 */
1363 ++_enm->enm_refcount;
1364 error = 0;
1365 goto out;
1366 }
1367
1368 /*
1369 * Link a new multicast record into the interface's multicast list.
1370 */
1371 memcpy(enm->enm_addrlo, addrlo, ETHER_ADDR_LEN);
1372 memcpy(enm->enm_addrhi, addrhi, ETHER_ADDR_LEN);
1373 enm->enm_refcount = 1;
1374 LIST_INSERT_HEAD(&ec->ec_multiaddrs, enm, enm_list);
1375 ec->ec_multicnt++;
1376
1377 /*
1378 * Return ENETRESET to inform the driver that the list has changed
1379 * and its reception filter should be adjusted accordingly.
1380 */
1381 error = ENETRESET;
1382 enm = NULL;
1383
1384 out:
1385 ETHER_UNLOCK(ec);
1386 if (enm != NULL)
1387 kmem_free(enm, sizeof(*enm));
1388 return error;
1389 }
1390
1391 /*
1392 * Delete a multicast address record.
1393 */
1394 int
1395 ether_delmulti(const struct sockaddr *sa, struct ethercom *ec)
1396 {
1397 struct ether_multi *enm;
1398 u_char addrlo[ETHER_ADDR_LEN];
1399 u_char addrhi[ETHER_ADDR_LEN];
1400 int error;
1401
1402 ETHER_LOCK(ec);
1403 error = ether_multiaddr(sa, addrlo, addrhi);
1404 if (error != 0)
1405 goto error;
1406
1407 /*
1408 * Look up the address in our list.
1409 */
1410 enm = ether_lookup_multi(addrlo, addrhi, ec);
1411 if (enm == NULL) {
1412 error = ENXIO;
1413 goto error;
1414 }
1415 if (--enm->enm_refcount != 0) {
1416 /*
1417 * Still some claims to this record.
1418 */
1419 error = 0;
1420 goto error;
1421 }
1422
1423 /*
1424 * No remaining claims to this record; unlink and free it.
1425 */
1426 LIST_REMOVE(enm, enm_list);
1427 ec->ec_multicnt--;
1428 ETHER_UNLOCK(ec);
1429 kmem_free(enm, sizeof(*enm));
1430
1431 /*
1432 * Return ENETRESET to inform the driver that the list has changed
1433 * and its reception filter should be adjusted accordingly.
1434 */
1435 return ENETRESET;
1436
1437 error:
1438 ETHER_UNLOCK(ec);
1439 return error;
1440 }
1441
1442 void
1443 ether_set_ifflags_cb(struct ethercom *ec, ether_cb_t cb)
1444 {
1445 ec->ec_ifflags_cb = cb;
1446 }
1447
1448 void
1449 ether_set_vlan_cb(struct ethercom *ec, ether_vlancb_t cb)
1450 {
1451
1452 ec->ec_vlan_cb = cb;
1453 }
1454
1455 static int
1456 ether_ioctl_reinit(struct ethercom *ec)
1457 {
1458 struct ifnet *ifp = &ec->ec_if;
1459 int error;
1460
1461 KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
1462
1463 switch (ifp->if_flags & (IFF_UP | IFF_RUNNING)) {
1464 case IFF_RUNNING:
1465 /*
1466 * If interface is marked down and it is running,
1467 * then stop and disable it.
1468 */
1469 if_stop(ifp, 1);
1470 break;
1471 case IFF_UP:
1472 /*
1473 * If interface is marked up and it is stopped, then
1474 * start it.
1475 */
1476 return if_init(ifp);
1477 case IFF_UP | IFF_RUNNING:
1478 error = 0;
1479 if (ec->ec_ifflags_cb != NULL) {
1480 error = (*ec->ec_ifflags_cb)(ec);
1481 if (error == ENETRESET) {
1482 /*
1483 * Reset the interface to pick up
1484 * changes in any other flags that
1485 * affect the hardware state.
1486 */
1487 return if_init(ifp);
1488 }
1489 } else
1490 error = if_init(ifp);
1491 return error;
1492 case 0:
1493 break;
1494 }
1495
1496 return 0;
1497 }
1498
1499 /*
1500 * Common ioctls for Ethernet interfaces. Note, we must be
1501 * called at splnet().
1502 */
1503 int
1504 ether_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1505 {
1506 struct ethercom *ec = (void *)ifp;
1507 struct eccapreq *eccr;
1508 struct ifreq *ifr = (struct ifreq *)data;
1509 struct if_laddrreq *iflr = data;
1510 const struct sockaddr_dl *sdl;
1511 static const uint8_t zero[ETHER_ADDR_LEN];
1512 int error;
1513
1514 switch (cmd) {
1515 case SIOCINITIFADDR:
1516 {
1517 struct ifaddr *ifa = (struct ifaddr *)data;
1518 if (ifa->ifa_addr->sa_family != AF_LINK
1519 && (ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
1520 (IFF_UP | IFF_RUNNING)) {
1521 ifp->if_flags |= IFF_UP;
1522 if ((error = if_init(ifp)) != 0)
1523 return error;
1524 }
1525 #ifdef INET
1526 if (ifa->ifa_addr->sa_family == AF_INET)
1527 arp_ifinit(ifp, ifa);
1528 #endif
1529 return 0;
1530 }
1531
1532 case SIOCSIFMTU:
1533 {
1534 int maxmtu;
1535
1536 if (ec->ec_capabilities & ETHERCAP_JUMBO_MTU)
1537 maxmtu = ETHERMTU_JUMBO;
1538 else
1539 maxmtu = ETHERMTU;
1540
1541 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > maxmtu)
1542 return EINVAL;
1543 else if ((error = ifioctl_common(ifp, cmd, data)) != ENETRESET)
1544 return error;
1545 else if (ifp->if_flags & IFF_UP) {
1546 /* Make sure the device notices the MTU change. */
1547 return if_init(ifp);
1548 } else
1549 return 0;
1550 }
1551
1552 case SIOCSIFFLAGS:
1553 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
1554 return error;
1555 return ether_ioctl_reinit(ec);
1556 case SIOCGIFFLAGS:
1557 error = ifioctl_common(ifp, cmd, data);
1558 if (error == 0) {
1559 /* Set IFF_ALLMULTI for backcompat */
1560 ifr->ifr_flags |= (ec->ec_flags & ETHER_F_ALLMULTI) ?
1561 IFF_ALLMULTI : 0;
1562 }
1563 return error;
1564 case SIOCGETHERCAP:
1565 eccr = (struct eccapreq *)data;
1566 eccr->eccr_capabilities = ec->ec_capabilities;
1567 eccr->eccr_capenable = ec->ec_capenable;
1568 return 0;
1569 case SIOCSETHERCAP:
1570 eccr = (struct eccapreq *)data;
1571 if ((eccr->eccr_capenable & ~ec->ec_capabilities) != 0)
1572 return EINVAL;
1573 if (eccr->eccr_capenable == ec->ec_capenable)
1574 return 0;
1575 #if 0 /* notyet */
1576 ec->ec_capenable = (ec->ec_capenable & ETHERCAP_CANTCHANGE)
1577 | (eccr->eccr_capenable & ~ETHERCAP_CANTCHANGE);
1578 #else
1579 ec->ec_capenable = eccr->eccr_capenable;
1580 #endif
1581 return ether_ioctl_reinit(ec);
1582 case SIOCADDMULTI:
1583 return ether_addmulti(ifreq_getaddr(cmd, ifr), ec);
1584 case SIOCDELMULTI:
1585 return ether_delmulti(ifreq_getaddr(cmd, ifr), ec);
1586 case SIOCSIFMEDIA:
1587 case SIOCGIFMEDIA:
1588 if (ec->ec_mii != NULL)
1589 return ifmedia_ioctl(ifp, ifr, &ec->ec_mii->mii_media,
1590 cmd);
1591 else if (ec->ec_ifmedia != NULL)
1592 return ifmedia_ioctl(ifp, ifr, ec->ec_ifmedia, cmd);
1593 else
1594 return ENOTTY;
1595 break;
1596 case SIOCALIFADDR:
1597 sdl = satocsdl(sstocsa(&iflr->addr));
1598 if (sdl->sdl_family != AF_LINK)
1599 ;
1600 else if (ETHER_IS_MULTICAST(CLLADDR(sdl)))
1601 return EINVAL;
1602 else if (memcmp(zero, CLLADDR(sdl), sizeof(zero)) == 0)
1603 return EINVAL;
1604 /*FALLTHROUGH*/
1605 default:
1606 return ifioctl_common(ifp, cmd, data);
1607 }
1608 return 0;
1609 }
1610
1611 /*
1612 * Enable/disable passing VLAN packets if the parent interface supports it.
1613 * Return:
1614 * 0: Ok
1615 * -1: Parent interface does not support vlans
1616 * >0: Error
1617 */
1618 int
1619 ether_enable_vlan_mtu(struct ifnet *ifp)
1620 {
1621 int error;
1622 struct ethercom *ec = (void *)ifp;
1623
1624 /* Parent does not support VLAN's */
1625 if ((ec->ec_capabilities & ETHERCAP_VLAN_MTU) == 0)
1626 return -1;
1627
1628 /*
1629 * Parent supports the VLAN_MTU capability,
1630 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames;
1631 * enable it.
1632 */
1633 ec->ec_capenable |= ETHERCAP_VLAN_MTU;
1634
1635 /* Interface is down, defer for later */
1636 if ((ifp->if_flags & IFF_UP) == 0)
1637 return 0;
1638
1639 if ((error = if_flags_set(ifp, ifp->if_flags)) == 0)
1640 return 0;
1641
1642 ec->ec_capenable &= ~ETHERCAP_VLAN_MTU;
1643 return error;
1644 }
1645
1646 int
1647 ether_disable_vlan_mtu(struct ifnet *ifp)
1648 {
1649 int error;
1650 struct ethercom *ec = (void *)ifp;
1651
1652 /* We still have VLAN's, defer for later */
1653 if (ec->ec_nvlans != 0)
1654 return 0;
1655
1656 /* Parent does not support VLAB's, nothing to do. */
1657 if ((ec->ec_capenable & ETHERCAP_VLAN_MTU) == 0)
1658 return -1;
1659
1660 /*
1661 * Disable Tx/Rx of VLAN-sized frames.
1662 */
1663 ec->ec_capenable &= ~ETHERCAP_VLAN_MTU;
1664
1665 /* Interface is down, defer for later */
1666 if ((ifp->if_flags & IFF_UP) == 0)
1667 return 0;
1668
1669 if ((error = if_flags_set(ifp, ifp->if_flags)) == 0)
1670 return 0;
1671
1672 ec->ec_capenable |= ETHERCAP_VLAN_MTU;
1673 return error;
1674 }
1675
1676 /*
1677 * Add and delete VLAN TAG
1678 */
1679 int
1680 ether_add_vlantag(struct ifnet *ifp, uint16_t vtag, bool *vlanmtu_status)
1681 {
1682 struct ethercom *ec = (void *)ifp;
1683 struct vlanid_list *vidp;
1684 bool vlanmtu_enabled;
1685 uint16_t vid = EVL_VLANOFTAG(vtag);
1686 int error;
1687
1688 vlanmtu_enabled = false;
1689
1690 /* Add a vid to the list */
1691 vidp = kmem_alloc(sizeof(*vidp), KM_SLEEP);
1692 vidp->vid = vid;
1693
1694 ETHER_LOCK(ec);
1695 ec->ec_nvlans++;
1696 SIMPLEQ_INSERT_TAIL(&ec->ec_vids, vidp, vid_list);
1697 ETHER_UNLOCK(ec);
1698
1699 if (ec->ec_nvlans == 1) {
1700 IFNET_LOCK(ifp);
1701 error = ether_enable_vlan_mtu(ifp);
1702 IFNET_UNLOCK(ifp);
1703
1704 if (error == 0) {
1705 vlanmtu_enabled = true;
1706 } else if (error != -1) {
1707 goto fail;
1708 }
1709 }
1710
1711 if (ec->ec_vlan_cb != NULL) {
1712 error = (*ec->ec_vlan_cb)(ec, vid, true);
1713 if (error != 0)
1714 goto fail;
1715 }
1716
1717 if (vlanmtu_status != NULL)
1718 *vlanmtu_status = vlanmtu_enabled;
1719
1720 return 0;
1721 fail:
1722 ETHER_LOCK(ec);
1723 ec->ec_nvlans--;
1724 SIMPLEQ_REMOVE(&ec->ec_vids, vidp, vlanid_list, vid_list);
1725 ETHER_UNLOCK(ec);
1726
1727 if (vlanmtu_enabled) {
1728 IFNET_LOCK(ifp);
1729 (void)ether_disable_vlan_mtu(ifp);
1730 IFNET_UNLOCK(ifp);
1731 }
1732
1733 kmem_free(vidp, sizeof(*vidp));
1734
1735 return error;
1736 }
1737
1738 int
1739 ether_del_vlantag(struct ifnet *ifp, uint16_t vtag)
1740 {
1741 struct ethercom *ec = (void *)ifp;
1742 struct vlanid_list *vidp;
1743 uint16_t vid = EVL_VLANOFTAG(vtag);
1744
1745 ETHER_LOCK(ec);
1746 SIMPLEQ_FOREACH(vidp, &ec->ec_vids, vid_list) {
1747 if (vidp->vid == vid) {
1748 SIMPLEQ_REMOVE(&ec->ec_vids, vidp,
1749 vlanid_list, vid_list);
1750 ec->ec_nvlans--;
1751 break;
1752 }
1753 }
1754 ETHER_UNLOCK(ec);
1755
1756 if (vidp == NULL)
1757 return ENOENT;
1758
1759 if (ec->ec_vlan_cb != NULL) {
1760 (void)(*ec->ec_vlan_cb)(ec, vidp->vid, false);
1761 }
1762
1763 if (ec->ec_nvlans == 0) {
1764 IFNET_LOCK(ifp);
1765 (void)ether_disable_vlan_mtu(ifp);
1766 IFNET_UNLOCK(ifp);
1767 }
1768
1769 kmem_free(vidp, sizeof(*vidp));
1770
1771 return 0;
1772 }
1773
1774 int
1775 ether_inject_vlantag(struct mbuf **mp, uint16_t etype, uint16_t tag)
1776 {
1777 static const size_t min_data_len =
1778 ETHER_MIN_LEN - ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1779 /* Used to pad ethernet frames with < ETHER_MIN_LEN bytes */
1780 static const char vlan_zero_pad_buff[ETHER_MIN_LEN] = { 0 };
1781
1782 struct ether_vlan_header *evl;
1783 struct mbuf *m = *mp;
1784 int error;
1785
1786 error = 0;
1787
1788 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1789 if (m == NULL) {
1790 error = ENOBUFS;
1791 goto out;
1792 }
1793
1794 if (m->m_len < sizeof(*evl)) {
1795 m = m_pullup(m, sizeof(*evl));
1796 if (m == NULL) {
1797 error = ENOBUFS;
1798 goto out;
1799 }
1800 }
1801
1802 /*
1803 * Transform the Ethernet header into an
1804 * Ethernet header with 802.1Q encapsulation.
1805 */
1806 memmove(mtod(m, void *),
1807 mtod(m, char *) + ETHER_VLAN_ENCAP_LEN,
1808 sizeof(struct ether_header));
1809 evl = mtod(m, struct ether_vlan_header *);
1810 evl->evl_proto = evl->evl_encap_proto;
1811 evl->evl_encap_proto = htons(etype);
1812 evl->evl_tag = htons(tag);
1813
1814 /*
1815 * To cater for VLAN-aware layer 2 ethernet
1816 * switches which may need to strip the tag
1817 * before forwarding the packet, make sure
1818 * the packet+tag is at least 68 bytes long.
1819 * This is necessary because our parent will
1820 * only pad to 64 bytes (ETHER_MIN_LEN) and
1821 * some switches will not pad by themselves
1822 * after deleting a tag.
1823 */
1824 if (m->m_pkthdr.len < min_data_len) {
1825 m_copyback(m, m->m_pkthdr.len,
1826 min_data_len - m->m_pkthdr.len,
1827 vlan_zero_pad_buff);
1828 }
1829
1830 m->m_flags &= ~M_VLANTAG;
1831
1832 out:
1833 *mp = m;
1834 return error;
1835 }
1836
1837 struct mbuf *
1838 ether_strip_vlantag(struct mbuf *m)
1839 {
1840 struct ether_vlan_header *evl;
1841
1842 if (m->m_len < sizeof(*evl) &&
1843 (m = m_pullup(m, sizeof(*evl))) == NULL) {
1844 return NULL;
1845 }
1846
1847 if (m_makewritable(&m, 0, sizeof(*evl), M_DONTWAIT)) {
1848 m_freem(m);
1849 return NULL;
1850 }
1851
1852 evl = mtod(m, struct ether_vlan_header *);
1853 KASSERT(ntohs(evl->evl_encap_proto) == ETHERTYPE_VLAN);
1854
1855 vlan_set_tag(m, ntohs(evl->evl_tag));
1856
1857 /*
1858 * Restore the original ethertype. We'll remove
1859 * the encapsulation after we've found the vlan
1860 * interface corresponding to the tag.
1861 */
1862 evl->evl_encap_proto = evl->evl_proto;
1863
1864 /*
1865 * Remove the encapsulation header and append tag.
1866 * The original header has already been fixed up above.
1867 */
1868 vlan_set_tag(m, ntohs(evl->evl_tag));
1869 memmove((char *)evl + ETHER_VLAN_ENCAP_LEN, evl,
1870 offsetof(struct ether_vlan_header, evl_encap_proto));
1871 m_adj(m, ETHER_VLAN_ENCAP_LEN);
1872
1873 return m;
1874 }
1875
1876 static int
1877 ether_multicast_sysctl(SYSCTLFN_ARGS)
1878 {
1879 struct ether_multi *enm;
1880 struct ifnet *ifp;
1881 struct ethercom *ec;
1882 int error = 0;
1883 size_t written;
1884 struct psref psref;
1885 int bound;
1886 unsigned int multicnt;
1887 struct ether_multi_sysctl *addrs;
1888 int i;
1889
1890 if (namelen != 1)
1891 return EINVAL;
1892
1893 bound = curlwp_bind();
1894 ifp = if_get_byindex(name[0], &psref);
1895 if (ifp == NULL) {
1896 error = ENODEV;
1897 goto out;
1898 }
1899 if (ifp->if_type != IFT_ETHER) {
1900 if_put(ifp, &psref);
1901 *oldlenp = 0;
1902 goto out;
1903 }
1904 ec = (struct ethercom *)ifp;
1905
1906 if (oldp == NULL) {
1907 if_put(ifp, &psref);
1908 *oldlenp = ec->ec_multicnt * sizeof(*addrs);
1909 goto out;
1910 }
1911
1912 /*
1913 * ec->ec_lock is a spin mutex so we cannot call sysctl_copyout, which
1914 * is sleepable, while holding it. Copy data to a local buffer first
1915 * with the lock taken and then call sysctl_copyout without holding it.
1916 */
1917 retry:
1918 multicnt = ec->ec_multicnt;
1919
1920 if (multicnt == 0) {
1921 if_put(ifp, &psref);
1922 *oldlenp = 0;
1923 goto out;
1924 }
1925
1926 addrs = kmem_zalloc(sizeof(*addrs) * multicnt, KM_SLEEP);
1927
1928 ETHER_LOCK(ec);
1929 if (multicnt != ec->ec_multicnt) {
1930 /* The number of multicast addresses has changed */
1931 ETHER_UNLOCK(ec);
1932 kmem_free(addrs, sizeof(*addrs) * multicnt);
1933 goto retry;
1934 }
1935
1936 i = 0;
1937 LIST_FOREACH(enm, &ec->ec_multiaddrs, enm_list) {
1938 struct ether_multi_sysctl *addr = &addrs[i];
1939 addr->enm_refcount = enm->enm_refcount;
1940 memcpy(addr->enm_addrlo, enm->enm_addrlo, ETHER_ADDR_LEN);
1941 memcpy(addr->enm_addrhi, enm->enm_addrhi, ETHER_ADDR_LEN);
1942 i++;
1943 }
1944 ETHER_UNLOCK(ec);
1945
1946 error = 0;
1947 written = 0;
1948 for (i = 0; i < multicnt; i++) {
1949 struct ether_multi_sysctl *addr = &addrs[i];
1950
1951 if (written + sizeof(*addr) > *oldlenp)
1952 break;
1953 error = sysctl_copyout(l, addr, oldp, sizeof(*addr));
1954 if (error)
1955 break;
1956 written += sizeof(*addr);
1957 oldp = (char *)oldp + sizeof(*addr);
1958 }
1959 kmem_free(addrs, sizeof(*addrs) * multicnt);
1960
1961 if_put(ifp, &psref);
1962
1963 *oldlenp = written;
1964 out:
1965 curlwp_bindx(bound);
1966 return error;
1967 }
1968
1969 static void
1970 ether_sysctl_setup(struct sysctllog **clog)
1971 {
1972 const struct sysctlnode *rnode = NULL;
1973
1974 sysctl_createv(clog, 0, NULL, &rnode,
1975 CTLFLAG_PERMANENT,
1976 CTLTYPE_NODE, "ether",
1977 SYSCTL_DESCR("Ethernet-specific information"),
1978 NULL, 0, NULL, 0,
1979 CTL_NET, CTL_CREATE, CTL_EOL);
1980
1981 sysctl_createv(clog, 0, &rnode, NULL,
1982 CTLFLAG_PERMANENT,
1983 CTLTYPE_NODE, "multicast",
1984 SYSCTL_DESCR("multicast addresses"),
1985 ether_multicast_sysctl, 0, NULL, 0,
1986 CTL_CREATE, CTL_EOL);
1987
1988 sysctl_createv(clog, 0, &rnode, NULL,
1989 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1990 CTLTYPE_STRING, "rps_hash",
1991 SYSCTL_DESCR("Interface rps hash function control"),
1992 sysctl_pktq_rps_hash_handler, 0, (void *)ðer_pktq_rps_hash_p,
1993 PKTQ_RPS_HASH_NAME_LEN,
1994 CTL_CREATE, CTL_EOL);
1995 }
1996
1997 void
1998 etherinit(void)
1999 {
2000
2001 #ifdef DIAGNOSTIC
2002 mutex_init(&bigpktpps_lock, MUTEX_DEFAULT, IPL_NET);
2003 #endif
2004 ether_pktq_rps_hash_p = pktq_rps_hash_default;
2005 ether_sysctl_setup(NULL);
2006 }
2007