if_ethersubr.c revision 1.317 1 /* $NetBSD: if_ethersubr.c,v 1.317 2022/09/03 01:35:03 thorpej Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)if_ethersubr.c 8.2 (Berkeley) 4/4/96
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: if_ethersubr.c,v 1.317 2022/09/03 01:35:03 thorpej Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_inet.h"
68 #include "opt_atalk.h"
69 #include "opt_mbuftrace.h"
70 #include "opt_mpls.h"
71 #include "opt_gateway.h"
72 #include "opt_pppoe.h"
73 #include "opt_net_mpsafe.h"
74 #endif
75
76 #include "vlan.h"
77 #include "pppoe.h"
78 #include "bridge.h"
79 #include "arp.h"
80 #include "agr.h"
81
82 #include <sys/sysctl.h>
83 #include <sys/mbuf.h>
84 #include <sys/mutex.h>
85 #include <sys/ioctl.h>
86 #include <sys/errno.h>
87 #include <sys/device.h>
88 #include <sys/entropy.h>
89 #include <sys/rndsource.h>
90 #include <sys/cpu.h>
91 #include <sys/kmem.h>
92 #include <sys/hook.h>
93
94 #include <net/if.h>
95 #include <net/netisr.h>
96 #include <net/route.h>
97 #include <net/if_llc.h>
98 #include <net/if_dl.h>
99 #include <net/if_types.h>
100 #include <net/pktqueue.h>
101
102 #include <net/if_media.h>
103 #include <dev/mii/mii.h>
104 #include <dev/mii/miivar.h>
105
106 #if NARP == 0
107 /*
108 * XXX there should really be a way to issue this warning from within config(8)
109 */
110 #error You have included NETATALK or a pseudo-device in your configuration that depends on the presence of ethernet interfaces, but have no such interfaces configured. Check if you really need pseudo-device bridge, pppoe, vlan or options NETATALK.
111 #endif
112
113 #include <net/bpf.h>
114
115 #include <net/if_ether.h>
116 #include <net/if_vlanvar.h>
117
118 #if NPPPOE > 0
119 #include <net/if_pppoe.h>
120 #endif
121
122 #if NAGR > 0
123 #include <net/ether_slowprotocols.h>
124 #include <net/agr/ieee8023ad.h>
125 #include <net/agr/if_agrvar.h>
126 #endif
127
128 #if NBRIDGE > 0
129 #include <net/if_bridgevar.h>
130 #endif
131
132 #include <netinet/in.h>
133 #ifdef INET
134 #include <netinet/in_var.h>
135 #endif
136 #include <netinet/if_inarp.h>
137
138 #ifdef INET6
139 #ifndef INET
140 #include <netinet/in.h>
141 #endif
142 #include <netinet6/in6_var.h>
143 #include <netinet6/nd6.h>
144 #endif
145
146 #include "carp.h"
147 #if NCARP > 0
148 #include <netinet/ip_carp.h>
149 #endif
150
151 #ifdef NETATALK
152 #include <netatalk/at.h>
153 #include <netatalk/at_var.h>
154 #include <netatalk/at_extern.h>
155
156 #define llc_snap_org_code llc_un.type_snap.org_code
157 #define llc_snap_ether_type llc_un.type_snap.ether_type
158
159 extern u_char at_org_code[3];
160 extern u_char aarp_org_code[3];
161 #endif /* NETATALK */
162
163 #ifdef MPLS
164 #include <netmpls/mpls.h>
165 #include <netmpls/mpls_var.h>
166 #endif
167
168 CTASSERT(sizeof(struct ether_addr) == 6);
169 CTASSERT(sizeof(struct ether_header) == 14);
170
171 #ifdef DIAGNOSTIC
172 static struct timeval bigpktppslim_last;
173 static int bigpktppslim = 2; /* XXX */
174 static int bigpktpps_count;
175 static kmutex_t bigpktpps_lock __cacheline_aligned;
176 #endif
177
178 const uint8_t etherbroadcastaddr[ETHER_ADDR_LEN] =
179 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
180 const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
181 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
182 #define senderr(e) { error = (e); goto bad;}
183
184 static pktq_rps_hash_func_t ether_pktq_rps_hash_p;
185
186 static int ether_output(struct ifnet *, struct mbuf *,
187 const struct sockaddr *, const struct rtentry *);
188
189 /*
190 * Ethernet output routine.
191 * Encapsulate a packet of type family for the local net.
192 * Assumes that ifp is actually pointer to ethercom structure.
193 */
194 static int
195 ether_output(struct ifnet * const ifp0, struct mbuf * const m0,
196 const struct sockaddr * const dst, const struct rtentry *rt)
197 {
198 uint8_t esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
199 uint16_t etype = 0;
200 int error = 0, hdrcmplt = 0;
201 struct mbuf *m = m0;
202 struct mbuf *mcopy = NULL;
203 struct ether_header *eh;
204 struct ifnet *ifp = ifp0;
205 #ifdef INET
206 struct arphdr *ah;
207 #endif
208 #ifdef NETATALK
209 struct at_ifaddr *aa;
210 #endif
211
212 #ifdef MBUFTRACE
213 m_claimm(m, ifp->if_mowner);
214 #endif
215
216 #if NCARP > 0
217 if (ifp->if_type == IFT_CARP) {
218 struct ifaddr *ifa;
219 int s = pserialize_read_enter();
220
221 /* loop back if this is going to the carp interface */
222 if (dst != NULL && ifp0->if_link_state == LINK_STATE_UP &&
223 (ifa = ifa_ifwithaddr(dst)) != NULL) {
224 if (ifa->ifa_ifp == ifp0) {
225 pserialize_read_exit(s);
226 return looutput(ifp0, m, dst, rt);
227 }
228 }
229 pserialize_read_exit(s);
230
231 ifp = ifp->if_carpdev;
232 /* ac = (struct arpcom *)ifp; */
233
234 if ((ifp0->if_flags & (IFF_UP | IFF_RUNNING)) !=
235 (IFF_UP | IFF_RUNNING))
236 senderr(ENETDOWN);
237 }
238 #endif
239
240 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING))
241 senderr(ENETDOWN);
242
243 switch (dst->sa_family) {
244
245 #ifdef INET
246 case AF_INET:
247 if (m->m_flags & M_BCAST) {
248 memcpy(edst, etherbroadcastaddr, sizeof(edst));
249 } else if (m->m_flags & M_MCAST) {
250 ETHER_MAP_IP_MULTICAST(&satocsin(dst)->sin_addr, edst);
251 } else {
252 error = arpresolve(ifp0, rt, m, dst, edst, sizeof(edst));
253 if (error)
254 return (error == EWOULDBLOCK) ? 0 : error;
255 }
256 /* If broadcasting on a simplex interface, loopback a copy */
257 if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
258 mcopy = m_copypacket(m, M_DONTWAIT);
259 etype = htons(ETHERTYPE_IP);
260 break;
261
262 case AF_ARP:
263 ah = mtod(m, struct arphdr *);
264 if (m->m_flags & M_BCAST) {
265 memcpy(edst, etherbroadcastaddr, sizeof(edst));
266 } else {
267 void *tha = ar_tha(ah);
268
269 if (tha == NULL) {
270 /* fake with ARPHRD_IEEE1394 */
271 m_freem(m);
272 return 0;
273 }
274 memcpy(edst, tha, sizeof(edst));
275 }
276
277 ah->ar_hrd = htons(ARPHRD_ETHER);
278
279 switch (ntohs(ah->ar_op)) {
280 case ARPOP_REVREQUEST:
281 case ARPOP_REVREPLY:
282 etype = htons(ETHERTYPE_REVARP);
283 break;
284
285 case ARPOP_REQUEST:
286 case ARPOP_REPLY:
287 default:
288 etype = htons(ETHERTYPE_ARP);
289 }
290 break;
291 #endif
292
293 #ifdef INET6
294 case AF_INET6:
295 if (m->m_flags & M_BCAST) {
296 memcpy(edst, etherbroadcastaddr, sizeof(edst));
297 } else if (m->m_flags & M_MCAST) {
298 ETHER_MAP_IPV6_MULTICAST(&satocsin6(dst)->sin6_addr,
299 edst);
300 } else {
301 error = nd6_resolve(ifp0, rt, m, dst, edst,
302 sizeof(edst));
303 if (error)
304 return (error == EWOULDBLOCK) ? 0 : error;
305 }
306 etype = htons(ETHERTYPE_IPV6);
307 break;
308 #endif
309
310 #ifdef NETATALK
311 case AF_APPLETALK: {
312 struct ifaddr *ifa;
313 int s;
314
315 KERNEL_LOCK(1, NULL);
316
317 if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst)) {
318 KERNEL_UNLOCK_ONE(NULL);
319 return 0;
320 }
321
322 /*
323 * ifaddr is the first thing in at_ifaddr
324 */
325 s = pserialize_read_enter();
326 ifa = at_ifawithnet((const struct sockaddr_at *)dst, ifp);
327 if (ifa == NULL) {
328 pserialize_read_exit(s);
329 KERNEL_UNLOCK_ONE(NULL);
330 senderr(EADDRNOTAVAIL);
331 }
332 aa = (struct at_ifaddr *)ifa;
333
334 /*
335 * In the phase 2 case, we need to prepend an mbuf for the
336 * llc header.
337 */
338 if (aa->aa_flags & AFA_PHASE2) {
339 struct llc llc;
340
341 M_PREPEND(m, sizeof(struct llc), M_DONTWAIT);
342 if (m == NULL) {
343 pserialize_read_exit(s);
344 KERNEL_UNLOCK_ONE(NULL);
345 senderr(ENOBUFS);
346 }
347
348 llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
349 llc.llc_control = LLC_UI;
350 memcpy(llc.llc_snap_org_code, at_org_code,
351 sizeof(llc.llc_snap_org_code));
352 llc.llc_snap_ether_type = htons(ETHERTYPE_ATALK);
353 memcpy(mtod(m, void *), &llc, sizeof(struct llc));
354 } else {
355 etype = htons(ETHERTYPE_ATALK);
356 }
357 pserialize_read_exit(s);
358 KERNEL_UNLOCK_ONE(NULL);
359 break;
360 }
361 #endif /* NETATALK */
362
363 case pseudo_AF_HDRCMPLT:
364 hdrcmplt = 1;
365 memcpy(esrc,
366 ((const struct ether_header *)dst->sa_data)->ether_shost,
367 sizeof(esrc));
368 /* FALLTHROUGH */
369
370 case AF_UNSPEC:
371 memcpy(edst,
372 ((const struct ether_header *)dst->sa_data)->ether_dhost,
373 sizeof(edst));
374 /* AF_UNSPEC doesn't swap the byte order of the ether_type. */
375 etype = ((const struct ether_header *)dst->sa_data)->ether_type;
376 break;
377
378 default:
379 printf("%s: can't handle af%d\n", ifp->if_xname,
380 dst->sa_family);
381 senderr(EAFNOSUPPORT);
382 }
383
384 #ifdef MPLS
385 {
386 struct m_tag *mtag;
387 mtag = m_tag_find(m, PACKET_TAG_MPLS);
388 if (mtag != NULL) {
389 /* Having the tag itself indicates it's MPLS */
390 etype = htons(ETHERTYPE_MPLS);
391 m_tag_delete(m, mtag);
392 }
393 }
394 #endif
395
396 if (mcopy)
397 (void)looutput(ifp, mcopy, dst, rt);
398
399 KASSERT((m->m_flags & M_PKTHDR) != 0);
400
401 /*
402 * If no ether type is set, this must be a 802.2 formatted packet.
403 */
404 if (etype == 0)
405 etype = htons(m->m_pkthdr.len);
406
407 /*
408 * Add local net header. If no space in first mbuf, allocate another.
409 */
410 M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
411 if (m == NULL)
412 senderr(ENOBUFS);
413
414 eh = mtod(m, struct ether_header *);
415 /* Note: etype is already in network byte order. */
416 memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
417 memcpy(eh->ether_dhost, edst, sizeof(edst));
418 if (hdrcmplt) {
419 memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost));
420 } else {
421 memcpy(eh->ether_shost, CLLADDR(ifp->if_sadl),
422 sizeof(eh->ether_shost));
423 }
424
425 #if NCARP > 0
426 if (ifp0 != ifp && ifp0->if_type == IFT_CARP) {
427 memcpy(eh->ether_shost, CLLADDR(ifp0->if_sadl),
428 sizeof(eh->ether_shost));
429 }
430 #endif
431
432 if ((error = pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_OUT)) != 0)
433 return error;
434 if (m == NULL)
435 return 0;
436
437 #if NBRIDGE > 0
438 /*
439 * Bridges require special output handling.
440 */
441 if (ifp->if_bridge)
442 return bridge_output(ifp, m, NULL, NULL);
443 #endif
444
445 #if NCARP > 0
446 if (ifp != ifp0)
447 if_statadd(ifp0, if_obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
448 #endif
449
450 #ifdef ALTQ
451 KERNEL_LOCK(1, NULL);
452 /*
453 * If ALTQ is enabled on the parent interface, do
454 * classification; the queueing discipline might not
455 * require classification, but might require the
456 * address family/header pointer in the pktattr.
457 */
458 if (ALTQ_IS_ENABLED(&ifp->if_snd))
459 altq_etherclassify(&ifp->if_snd, m);
460 KERNEL_UNLOCK_ONE(NULL);
461 #endif
462 return ifq_enqueue(ifp, m);
463
464 bad:
465 if_statinc(ifp, if_oerrors);
466 if (m)
467 m_freem(m);
468 return error;
469 }
470
471 #ifdef ALTQ
472 /*
473 * This routine is a slight hack to allow a packet to be classified
474 * if the Ethernet headers are present. It will go away when ALTQ's
475 * classification engine understands link headers.
476 *
477 * XXX: We may need to do m_pullups here. First to ensure struct ether_header
478 * is indeed contiguous, then to read the LLC and so on.
479 */
480 void
481 altq_etherclassify(struct ifaltq *ifq, struct mbuf *m)
482 {
483 struct ether_header *eh;
484 struct mbuf *mtop = m;
485 uint16_t ether_type;
486 int hlen, af, hdrsize;
487 void *hdr;
488
489 KASSERT((mtop->m_flags & M_PKTHDR) != 0);
490
491 hlen = ETHER_HDR_LEN;
492 eh = mtod(m, struct ether_header *);
493
494 ether_type = htons(eh->ether_type);
495
496 if (ether_type < ETHERMTU) {
497 /* LLC/SNAP */
498 struct llc *llc = (struct llc *)(eh + 1);
499 hlen += 8;
500
501 if (m->m_len < hlen ||
502 llc->llc_dsap != LLC_SNAP_LSAP ||
503 llc->llc_ssap != LLC_SNAP_LSAP ||
504 llc->llc_control != LLC_UI) {
505 /* Not SNAP. */
506 goto bad;
507 }
508
509 ether_type = htons(llc->llc_un.type_snap.ether_type);
510 }
511
512 switch (ether_type) {
513 case ETHERTYPE_IP:
514 af = AF_INET;
515 hdrsize = 20; /* sizeof(struct ip) */
516 break;
517
518 case ETHERTYPE_IPV6:
519 af = AF_INET6;
520 hdrsize = 40; /* sizeof(struct ip6_hdr) */
521 break;
522
523 default:
524 af = AF_UNSPEC;
525 hdrsize = 0;
526 break;
527 }
528
529 while (m->m_len <= hlen) {
530 hlen -= m->m_len;
531 m = m->m_next;
532 if (m == NULL)
533 goto bad;
534 }
535
536 if (m->m_len < (hlen + hdrsize)) {
537 /*
538 * protocol header not in a single mbuf.
539 * We can't cope with this situation right
540 * now (but it shouldn't ever happen, really, anyhow).
541 */
542 #ifdef DEBUG
543 printf("altq_etherclassify: headers span multiple mbufs: "
544 "%d < %d\n", m->m_len, (hlen + hdrsize));
545 #endif
546 goto bad;
547 }
548
549 m->m_data += hlen;
550 m->m_len -= hlen;
551
552 hdr = mtod(m, void *);
553
554 if (ALTQ_NEEDS_CLASSIFY(ifq)) {
555 mtop->m_pkthdr.pattr_class =
556 (*ifq->altq_classify)(ifq->altq_clfier, m, af);
557 }
558 mtop->m_pkthdr.pattr_af = af;
559 mtop->m_pkthdr.pattr_hdr = hdr;
560
561 m->m_data -= hlen;
562 m->m_len += hlen;
563
564 return;
565
566 bad:
567 mtop->m_pkthdr.pattr_class = NULL;
568 mtop->m_pkthdr.pattr_hdr = NULL;
569 mtop->m_pkthdr.pattr_af = AF_UNSPEC;
570 }
571 #endif /* ALTQ */
572
573 #if defined (LLC) || defined (NETATALK)
574 static void
575 ether_input_llc(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh)
576 {
577 struct ifqueue *inq = NULL;
578 int isr = 0;
579 struct llc *l;
580
581 if (m->m_len < sizeof(*eh) + sizeof(struct llc))
582 goto error;
583
584 l = (struct llc *)(eh+1);
585 switch (l->llc_dsap) {
586 #ifdef NETATALK
587 case LLC_SNAP_LSAP:
588 switch (l->llc_control) {
589 case LLC_UI:
590 if (l->llc_ssap != LLC_SNAP_LSAP)
591 goto error;
592
593 if (memcmp(&(l->llc_snap_org_code)[0],
594 at_org_code, sizeof(at_org_code)) == 0 &&
595 ntohs(l->llc_snap_ether_type) ==
596 ETHERTYPE_ATALK) {
597 inq = &atintrq2;
598 m_adj(m, sizeof(struct ether_header)
599 + sizeof(struct llc));
600 isr = NETISR_ATALK;
601 break;
602 }
603
604 if (memcmp(&(l->llc_snap_org_code)[0],
605 aarp_org_code,
606 sizeof(aarp_org_code)) == 0 &&
607 ntohs(l->llc_snap_ether_type) ==
608 ETHERTYPE_AARP) {
609 m_adj(m, sizeof(struct ether_header)
610 + sizeof(struct llc));
611 aarpinput(ifp, m); /* XXX queue? */
612 return;
613 }
614
615 default:
616 goto error;
617 }
618 break;
619 #endif
620 default:
621 goto noproto;
622 }
623
624 KASSERT(inq != NULL);
625 IFQ_ENQUEUE_ISR(inq, m, isr);
626 return;
627
628 noproto:
629 m_freem(m);
630 if_statinc(ifp, if_noproto);
631 return;
632 error:
633 m_freem(m);
634 if_statinc(ifp, if_ierrors);
635 return;
636 }
637 #endif /* defined (LLC) || defined (NETATALK) */
638
639 /*
640 * Process a received Ethernet packet;
641 * the packet is in the mbuf chain m with
642 * the ether header.
643 */
644 void
645 ether_input(struct ifnet *ifp, struct mbuf *m)
646 {
647 #if NVLAN > 0 || defined(MBUFTRACE)
648 struct ethercom *ec = (struct ethercom *) ifp;
649 #endif
650 pktqueue_t *pktq = NULL;
651 struct ifqueue *inq = NULL;
652 uint16_t etype;
653 struct ether_header *eh;
654 size_t ehlen;
655 static int earlypkts;
656 int isr = 0;
657
658 /* No RPS for not-IP. */
659 pktq_rps_hash_func_t rps_hash = NULL;
660
661 KASSERT(!cpu_intr_p());
662 KASSERT((m->m_flags & M_PKTHDR) != 0);
663
664 if ((ifp->if_flags & IFF_UP) == 0)
665 goto drop;
666
667 #ifdef MBUFTRACE
668 m_claimm(m, &ec->ec_rx_mowner);
669 #endif
670
671 if (__predict_false(m->m_len < sizeof(*eh))) {
672 if ((m = m_pullup(m, sizeof(*eh))) == NULL) {
673 if_statinc(ifp, if_ierrors);
674 return;
675 }
676 }
677
678 eh = mtod(m, struct ether_header *);
679 etype = ntohs(eh->ether_type);
680 ehlen = sizeof(*eh);
681
682 if (__predict_false(earlypkts < 100 ||
683 entropy_epoch() == (unsigned)-1)) {
684 rnd_add_data(NULL, eh, ehlen, 0);
685 earlypkts++;
686 }
687
688 /*
689 * Determine if the packet is within its size limits. For MPLS the
690 * header length is variable, so we skip the check.
691 */
692 if (etype != ETHERTYPE_MPLS && m->m_pkthdr.len >
693 ETHER_MAX_FRAME(ifp, etype, m->m_flags & M_HASFCS)) {
694 #ifdef DIAGNOSTIC
695 mutex_enter(&bigpktpps_lock);
696 if (ppsratecheck(&bigpktppslim_last, &bigpktpps_count,
697 bigpktppslim)) {
698 printf("%s: discarding oversize frame (len=%d)\n",
699 ifp->if_xname, m->m_pkthdr.len);
700 }
701 mutex_exit(&bigpktpps_lock);
702 #endif
703 goto error;
704 }
705
706 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
707 /*
708 * If this is not a simplex interface, drop the packet
709 * if it came from us.
710 */
711 if ((ifp->if_flags & IFF_SIMPLEX) == 0 &&
712 memcmp(CLLADDR(ifp->if_sadl), eh->ether_shost,
713 ETHER_ADDR_LEN) == 0) {
714 goto drop;
715 }
716
717 if (memcmp(etherbroadcastaddr,
718 eh->ether_dhost, ETHER_ADDR_LEN) == 0)
719 m->m_flags |= M_BCAST;
720 else
721 m->m_flags |= M_MCAST;
722 if_statinc(ifp, if_imcasts);
723 }
724
725 /* If the CRC is still on the packet, trim it off. */
726 if (m->m_flags & M_HASFCS) {
727 m_adj(m, -ETHER_CRC_LEN);
728 m->m_flags &= ~M_HASFCS;
729 }
730
731 if_statadd(ifp, if_ibytes, m->m_pkthdr.len);
732
733 if (!vlan_has_tag(m) && etype == ETHERTYPE_VLAN) {
734 m = ether_strip_vlantag(m);
735 if (m == NULL) {
736 if_statinc(ifp, if_ierrors);
737 return;
738 }
739
740 eh = mtod(m, struct ether_header *);
741 etype = ntohs(eh->ether_type);
742 ehlen = sizeof(*eh);
743 }
744
745 if ((m->m_flags & (M_BCAST | M_MCAST | M_PROMISC)) == 0 &&
746 (ifp->if_flags & IFF_PROMISC) != 0 &&
747 memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
748 ETHER_ADDR_LEN) != 0) {
749 m->m_flags |= M_PROMISC;
750 }
751
752 if ((m->m_flags & M_PROMISC) == 0) {
753 if (pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_IN) != 0)
754 return;
755 if (m == NULL)
756 return;
757
758 eh = mtod(m, struct ether_header *);
759 etype = ntohs(eh->ether_type);
760 }
761
762 /*
763 * Processing a logical interfaces that are able
764 * to configure vlan(4).
765 */
766 #if NAGR > 0
767 if (ifp->if_lagg != NULL &&
768 __predict_true(etype != ETHERTYPE_SLOWPROTOCOLS)) {
769 m->m_flags &= ~M_PROMISC;
770 agr_input(ifp, m);
771 return;
772 }
773 #endif
774
775 /*
776 * VLAN processing.
777 *
778 * VLAN provides service delimiting so the frames are
779 * processed before other handlings. If a VLAN interface
780 * does not exist to take those frames, they're returned
781 * to ether_input().
782 */
783
784 if (vlan_has_tag(m)) {
785 if (EVL_VLANOFTAG(vlan_get_tag(m)) == 0) {
786 if (etype == ETHERTYPE_VLAN ||
787 etype == ETHERTYPE_QINQ)
788 goto drop;
789
790 /* XXX we should actually use the prio value? */
791 m->m_flags &= ~M_VLANTAG;
792 } else {
793 #if NVLAN > 0
794 if (ec->ec_nvlans > 0) {
795 m = vlan_input(ifp, m);
796
797 /* vlan_input() called ether_input() recursively */
798 if (m == NULL)
799 return;
800 }
801 #endif
802 /* drop VLAN frames not for this port. */
803 goto noproto;
804 }
805 }
806
807 #if NCARP > 0
808 if (__predict_false(ifp->if_carp && ifp->if_type != IFT_CARP)) {
809 /*
810 * Clear M_PROMISC, in case the packet comes from a
811 * vlan.
812 */
813 m->m_flags &= ~M_PROMISC;
814 if (carp_input(m, (uint8_t *)&eh->ether_shost,
815 (uint8_t *)&eh->ether_dhost, eh->ether_type) == 0)
816 return;
817 }
818 #endif
819
820 /*
821 * Handle protocols that expect to have the Ethernet header
822 * (and possibly FCS) intact.
823 */
824 switch (etype) {
825 #if NPPPOE > 0
826 case ETHERTYPE_PPPOEDISC:
827 pppoedisc_input(ifp, m);
828 return;
829
830 case ETHERTYPE_PPPOE:
831 pppoe_input(ifp, m);
832 return;
833 #endif
834
835 case ETHERTYPE_SLOWPROTOCOLS: {
836 uint8_t subtype;
837
838 if (m->m_pkthdr.len < sizeof(*eh) + sizeof(subtype))
839 goto error;
840
841 m_copydata(m, sizeof(*eh), sizeof(subtype), &subtype);
842 switch (subtype) {
843 #if NAGR > 0
844 case SLOWPROTOCOLS_SUBTYPE_LACP:
845 if (ifp->if_lagg != NULL) {
846 ieee8023ad_lacp_input(ifp, m);
847 return;
848 }
849 break;
850
851 case SLOWPROTOCOLS_SUBTYPE_MARKER:
852 if (ifp->if_lagg != NULL) {
853 ieee8023ad_marker_input(ifp, m);
854 return;
855 }
856 break;
857 #endif
858
859 default:
860 if (subtype == 0 || subtype > 10) {
861 /* illegal value */
862 goto error;
863 }
864 /* unknown subtype */
865 break;
866 }
867 }
868 /* FALLTHROUGH */
869 default:
870 if (m->m_flags & M_PROMISC)
871 goto drop;
872 }
873
874 /* If the CRC is still on the packet, trim it off. */
875 if (m->m_flags & M_HASFCS) {
876 m_adj(m, -ETHER_CRC_LEN);
877 m->m_flags &= ~M_HASFCS;
878 }
879
880 /* etype represents the size of the payload in this case */
881 if (etype <= ETHERMTU + sizeof(struct ether_header)) {
882 KASSERT(ehlen == sizeof(*eh));
883 #if defined (LLC) || defined (NETATALK)
884 ether_input_llc(ifp, m, eh);
885 return;
886 #else
887 /* ethertype of 0-1500 is regarded as noproto */
888 goto noproto;
889 #endif
890 }
891
892 /* Strip off the Ethernet header. */
893 m_adj(m, ehlen);
894
895 switch (etype) {
896 #ifdef INET
897 case ETHERTYPE_IP:
898 #ifdef GATEWAY
899 if (ipflow_fastforward(m))
900 return;
901 #endif
902 pktq = ip_pktq;
903 rps_hash = atomic_load_relaxed(ðer_pktq_rps_hash_p);
904 break;
905
906 case ETHERTYPE_ARP:
907 pktq = arp_pktq;
908 break;
909
910 case ETHERTYPE_REVARP:
911 revarpinput(m); /* XXX queue? */
912 return;
913 #endif
914
915 #ifdef INET6
916 case ETHERTYPE_IPV6:
917 if (__predict_false(!in6_present))
918 goto noproto;
919 #ifdef GATEWAY
920 if (ip6flow_fastforward(&m))
921 return;
922 #endif
923 pktq = ip6_pktq;
924 rps_hash = atomic_load_relaxed(ðer_pktq_rps_hash_p);
925 break;
926 #endif
927
928 #ifdef NETATALK
929 case ETHERTYPE_ATALK:
930 isr = NETISR_ATALK;
931 inq = &atintrq1;
932 break;
933
934 case ETHERTYPE_AARP:
935 aarpinput(ifp, m); /* XXX queue? */
936 return;
937 #endif
938
939 #ifdef MPLS
940 case ETHERTYPE_MPLS:
941 isr = NETISR_MPLS;
942 inq = &mplsintrq;
943 break;
944 #endif
945
946 default:
947 goto noproto;
948 }
949
950 if (__predict_true(pktq)) {
951 const uint32_t h = rps_hash ? pktq_rps_hash(&rps_hash, m) : 0;
952 if (__predict_false(!pktq_enqueue(pktq, m, h))) {
953 m_freem(m);
954 }
955 return;
956 }
957
958 if (__predict_false(!inq)) {
959 /* Should not happen. */
960 goto error;
961 }
962
963 IFQ_ENQUEUE_ISR(inq, m, isr);
964 return;
965
966 drop:
967 m_freem(m);
968 if_statinc(ifp, if_iqdrops);
969 return;
970 noproto:
971 m_freem(m);
972 if_statinc(ifp, if_noproto);
973 return;
974 error:
975 m_freem(m);
976 if_statinc(ifp, if_ierrors);
977 return;
978 }
979
980 static void
981 ether_bpf_mtap(struct bpf_if *bp, struct mbuf *m, u_int direction)
982 {
983 struct ether_vlan_header evl;
984 struct m_hdr mh, md;
985
986 KASSERT(bp != NULL);
987
988 if (!vlan_has_tag(m)) {
989 bpf_mtap3(bp, m, direction);
990 return;
991 }
992
993 memcpy(&evl, mtod(m, char *), ETHER_HDR_LEN);
994 evl.evl_proto = evl.evl_encap_proto;
995 evl.evl_encap_proto = htons(ETHERTYPE_VLAN);
996 evl.evl_tag = htons(vlan_get_tag(m));
997
998 md.mh_flags = 0;
999 md.mh_data = m->m_data + ETHER_HDR_LEN;
1000 md.mh_len = m->m_len - ETHER_HDR_LEN;
1001 md.mh_next = m->m_next;
1002
1003 mh.mh_flags = 0;
1004 mh.mh_data = (char *)&evl;
1005 mh.mh_len = sizeof(evl);
1006 mh.mh_next = (struct mbuf *)&md;
1007
1008 bpf_mtap3(bp, (struct mbuf *)&mh, direction);
1009 }
1010
1011 /*
1012 * Convert Ethernet address to printable (loggable) representation.
1013 */
1014 char *
1015 ether_sprintf(const u_char *ap)
1016 {
1017 static char etherbuf[3 * ETHER_ADDR_LEN];
1018 return ether_snprintf(etherbuf, sizeof(etherbuf), ap);
1019 }
1020
1021 char *
1022 ether_snprintf(char *buf, size_t len, const u_char *ap)
1023 {
1024 char *cp = buf;
1025 size_t i;
1026
1027 for (i = 0; i < len / 3; i++) {
1028 *cp++ = hexdigits[*ap >> 4];
1029 *cp++ = hexdigits[*ap++ & 0xf];
1030 *cp++ = ':';
1031 }
1032 *--cp = '\0';
1033 return buf;
1034 }
1035
1036 /*
1037 * Perform common duties while attaching to interface list
1038 */
1039 void
1040 ether_ifattach(struct ifnet *ifp, const uint8_t *lla)
1041 {
1042 struct ethercom *ec = (struct ethercom *)ifp;
1043 char xnamebuf[HOOKNAMSIZ];
1044
1045 ifp->if_type = IFT_ETHER;
1046 ifp->if_hdrlen = ETHER_HDR_LEN;
1047 ifp->if_dlt = DLT_EN10MB;
1048 ifp->if_mtu = ETHERMTU;
1049 ifp->if_output = ether_output;
1050 ifp->_if_input = ether_input;
1051 ifp->if_bpf_mtap = ether_bpf_mtap;
1052 if (ifp->if_baudrate == 0)
1053 ifp->if_baudrate = IF_Mbps(10); /* just a default */
1054
1055 if (lla != NULL)
1056 if_set_sadl(ifp, lla, ETHER_ADDR_LEN, !ETHER_IS_LOCAL(lla));
1057
1058 LIST_INIT(&ec->ec_multiaddrs);
1059 SIMPLEQ_INIT(&ec->ec_vids);
1060 ec->ec_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
1061 ec->ec_flags = 0;
1062 ifp->if_broadcastaddr = etherbroadcastaddr;
1063 bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1064 snprintf(xnamebuf, sizeof(xnamebuf),
1065 "%s-ether_ifdetachhooks", ifp->if_xname);
1066 ec->ec_ifdetach_hooks = simplehook_create(IPL_NET, xnamebuf);
1067 #ifdef MBUFTRACE
1068 mowner_init_owner(&ec->ec_tx_mowner, ifp->if_xname, "tx");
1069 mowner_init_owner(&ec->ec_rx_mowner, ifp->if_xname, "rx");
1070 MOWNER_ATTACH(&ec->ec_tx_mowner);
1071 MOWNER_ATTACH(&ec->ec_rx_mowner);
1072 ifp->if_mowner = &ec->ec_tx_mowner;
1073 #endif
1074 }
1075
1076 void
1077 ether_ifdetach(struct ifnet *ifp)
1078 {
1079 struct ethercom *ec = (void *) ifp;
1080 struct ether_multi *enm;
1081
1082 IFNET_ASSERT_UNLOCKED(ifp);
1083 /*
1084 * Prevent further calls to ioctl (for example turning off
1085 * promiscuous mode from the bridge code), which eventually can
1086 * call if_init() which can cause panics because the interface
1087 * is in the process of being detached. Return device not configured
1088 * instead.
1089 */
1090 ifp->if_ioctl = __FPTRCAST(int (*)(struct ifnet *, u_long, void *),
1091 enxio);
1092
1093 simplehook_dohooks(ec->ec_ifdetach_hooks);
1094 KASSERT(!simplehook_has_hooks(ec->ec_ifdetach_hooks));
1095 simplehook_destroy(ec->ec_ifdetach_hooks);
1096
1097 bpf_detach(ifp);
1098
1099 ETHER_LOCK(ec);
1100 KASSERT(ec->ec_nvlans == 0);
1101 while ((enm = LIST_FIRST(&ec->ec_multiaddrs)) != NULL) {
1102 LIST_REMOVE(enm, enm_list);
1103 kmem_free(enm, sizeof(*enm));
1104 ec->ec_multicnt--;
1105 }
1106 ETHER_UNLOCK(ec);
1107
1108 mutex_obj_free(ec->ec_lock);
1109 ec->ec_lock = NULL;
1110
1111 ifp->if_mowner = NULL;
1112 MOWNER_DETACH(&ec->ec_rx_mowner);
1113 MOWNER_DETACH(&ec->ec_tx_mowner);
1114 }
1115
1116 void *
1117 ether_ifdetachhook_establish(struct ifnet *ifp,
1118 void (*fn)(void *), void *arg)
1119 {
1120 struct ethercom *ec;
1121 khook_t *hk;
1122
1123 if (ifp->if_type != IFT_ETHER)
1124 return NULL;
1125
1126 ec = (struct ethercom *)ifp;
1127 hk = simplehook_establish(ec->ec_ifdetach_hooks,
1128 fn, arg);
1129
1130 return (void *)hk;
1131 }
1132
1133 void
1134 ether_ifdetachhook_disestablish(struct ifnet *ifp,
1135 void *vhook, kmutex_t *lock)
1136 {
1137 struct ethercom *ec;
1138
1139 if (vhook == NULL)
1140 return;
1141
1142 ec = (struct ethercom *)ifp;
1143 simplehook_disestablish(ec->ec_ifdetach_hooks, vhook, lock);
1144 }
1145
1146 #if 0
1147 /*
1148 * This is for reference. We have a table-driven version
1149 * of the little-endian crc32 generator, which is faster
1150 * than the double-loop.
1151 */
1152 uint32_t
1153 ether_crc32_le(const uint8_t *buf, size_t len)
1154 {
1155 uint32_t c, crc, carry;
1156 size_t i, j;
1157
1158 crc = 0xffffffffU; /* initial value */
1159
1160 for (i = 0; i < len; i++) {
1161 c = buf[i];
1162 for (j = 0; j < 8; j++) {
1163 carry = ((crc & 0x01) ? 1 : 0) ^ (c & 0x01);
1164 crc >>= 1;
1165 c >>= 1;
1166 if (carry)
1167 crc = (crc ^ ETHER_CRC_POLY_LE);
1168 }
1169 }
1170
1171 return (crc);
1172 }
1173 #else
1174 uint32_t
1175 ether_crc32_le(const uint8_t *buf, size_t len)
1176 {
1177 static const uint32_t crctab[] = {
1178 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
1179 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
1180 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
1181 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
1182 };
1183 uint32_t crc;
1184 size_t i;
1185
1186 crc = 0xffffffffU; /* initial value */
1187
1188 for (i = 0; i < len; i++) {
1189 crc ^= buf[i];
1190 crc = (crc >> 4) ^ crctab[crc & 0xf];
1191 crc = (crc >> 4) ^ crctab[crc & 0xf];
1192 }
1193
1194 return (crc);
1195 }
1196 #endif
1197
1198 uint32_t
1199 ether_crc32_be(const uint8_t *buf, size_t len)
1200 {
1201 uint32_t c, crc, carry;
1202 size_t i, j;
1203
1204 crc = 0xffffffffU; /* initial value */
1205
1206 for (i = 0; i < len; i++) {
1207 c = buf[i];
1208 for (j = 0; j < 8; j++) {
1209 carry = ((crc & 0x80000000U) ? 1 : 0) ^ (c & 0x01);
1210 crc <<= 1;
1211 c >>= 1;
1212 if (carry)
1213 crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
1214 }
1215 }
1216
1217 return (crc);
1218 }
1219
1220 #ifdef INET
1221 const uint8_t ether_ipmulticast_min[ETHER_ADDR_LEN] =
1222 { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 };
1223 const uint8_t ether_ipmulticast_max[ETHER_ADDR_LEN] =
1224 { 0x01, 0x00, 0x5e, 0x7f, 0xff, 0xff };
1225 #endif
1226 #ifdef INET6
1227 const uint8_t ether_ip6multicast_min[ETHER_ADDR_LEN] =
1228 { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 };
1229 const uint8_t ether_ip6multicast_max[ETHER_ADDR_LEN] =
1230 { 0x33, 0x33, 0xff, 0xff, 0xff, 0xff };
1231 #endif
1232
1233 /*
1234 * ether_aton implementation, not using a static buffer.
1235 */
1236 int
1237 ether_aton_r(u_char *dest, size_t len, const char *str)
1238 {
1239 const u_char *cp = (const void *)str;
1240 u_char *ep;
1241
1242 #define atox(c) (((c) <= '9') ? ((c) - '0') : ((toupper(c) - 'A') + 10))
1243
1244 if (len < ETHER_ADDR_LEN)
1245 return ENOSPC;
1246
1247 ep = dest + ETHER_ADDR_LEN;
1248
1249 while (*cp) {
1250 if (!isxdigit(*cp))
1251 return EINVAL;
1252
1253 *dest = atox(*cp);
1254 cp++;
1255 if (isxdigit(*cp)) {
1256 *dest = (*dest << 4) | atox(*cp);
1257 cp++;
1258 }
1259 dest++;
1260
1261 if (dest == ep)
1262 return (*cp == '\0') ? 0 : ENAMETOOLONG;
1263
1264 switch (*cp) {
1265 case ':':
1266 case '-':
1267 case '.':
1268 cp++;
1269 break;
1270 }
1271 }
1272 return ENOBUFS;
1273 }
1274
1275 /*
1276 * Convert a sockaddr into an Ethernet address or range of Ethernet
1277 * addresses.
1278 */
1279 int
1280 ether_multiaddr(const struct sockaddr *sa, uint8_t addrlo[ETHER_ADDR_LEN],
1281 uint8_t addrhi[ETHER_ADDR_LEN])
1282 {
1283 #ifdef INET
1284 const struct sockaddr_in *sin;
1285 #endif
1286 #ifdef INET6
1287 const struct sockaddr_in6 *sin6;
1288 #endif
1289
1290 switch (sa->sa_family) {
1291
1292 case AF_UNSPEC:
1293 memcpy(addrlo, sa->sa_data, ETHER_ADDR_LEN);
1294 memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1295 break;
1296
1297 #ifdef INET
1298 case AF_INET:
1299 sin = satocsin(sa);
1300 if (sin->sin_addr.s_addr == INADDR_ANY) {
1301 /*
1302 * An IP address of INADDR_ANY means listen to
1303 * or stop listening to all of the Ethernet
1304 * multicast addresses used for IP.
1305 * (This is for the sake of IP multicast routers.)
1306 */
1307 memcpy(addrlo, ether_ipmulticast_min, ETHER_ADDR_LEN);
1308 memcpy(addrhi, ether_ipmulticast_max, ETHER_ADDR_LEN);
1309 } else {
1310 ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo);
1311 memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1312 }
1313 break;
1314 #endif
1315 #ifdef INET6
1316 case AF_INET6:
1317 sin6 = satocsin6(sa);
1318 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1319 /*
1320 * An IP6 address of 0 means listen to or stop
1321 * listening to all of the Ethernet multicast
1322 * address used for IP6.
1323 * (This is used for multicast routers.)
1324 */
1325 memcpy(addrlo, ether_ip6multicast_min, ETHER_ADDR_LEN);
1326 memcpy(addrhi, ether_ip6multicast_max, ETHER_ADDR_LEN);
1327 } else {
1328 ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, addrlo);
1329 memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1330 }
1331 break;
1332 #endif
1333
1334 default:
1335 return EAFNOSUPPORT;
1336 }
1337 return 0;
1338 }
1339
1340 /*
1341 * Add an Ethernet multicast address or range of addresses to the list for a
1342 * given interface.
1343 */
1344 int
1345 ether_addmulti(const struct sockaddr *sa, struct ethercom *ec)
1346 {
1347 struct ether_multi *enm, *_enm;
1348 u_char addrlo[ETHER_ADDR_LEN];
1349 u_char addrhi[ETHER_ADDR_LEN];
1350 int error = 0;
1351
1352 /* Allocate out of lock */
1353 enm = kmem_alloc(sizeof(*enm), KM_SLEEP);
1354
1355 ETHER_LOCK(ec);
1356 error = ether_multiaddr(sa, addrlo, addrhi);
1357 if (error != 0)
1358 goto out;
1359
1360 /*
1361 * Verify that we have valid Ethernet multicast addresses.
1362 */
1363 if (!ETHER_IS_MULTICAST(addrlo) || !ETHER_IS_MULTICAST(addrhi)) {
1364 error = EINVAL;
1365 goto out;
1366 }
1367
1368 /*
1369 * See if the address range is already in the list.
1370 */
1371 _enm = ether_lookup_multi(addrlo, addrhi, ec);
1372 if (_enm != NULL) {
1373 /*
1374 * Found it; just increment the reference count.
1375 */
1376 ++_enm->enm_refcount;
1377 error = 0;
1378 goto out;
1379 }
1380
1381 /*
1382 * Link a new multicast record into the interface's multicast list.
1383 */
1384 memcpy(enm->enm_addrlo, addrlo, ETHER_ADDR_LEN);
1385 memcpy(enm->enm_addrhi, addrhi, ETHER_ADDR_LEN);
1386 enm->enm_refcount = 1;
1387 LIST_INSERT_HEAD(&ec->ec_multiaddrs, enm, enm_list);
1388 ec->ec_multicnt++;
1389
1390 /*
1391 * Return ENETRESET to inform the driver that the list has changed
1392 * and its reception filter should be adjusted accordingly.
1393 */
1394 error = ENETRESET;
1395 enm = NULL;
1396
1397 out:
1398 ETHER_UNLOCK(ec);
1399 if (enm != NULL)
1400 kmem_free(enm, sizeof(*enm));
1401 return error;
1402 }
1403
1404 /*
1405 * Delete a multicast address record.
1406 */
1407 int
1408 ether_delmulti(const struct sockaddr *sa, struct ethercom *ec)
1409 {
1410 struct ether_multi *enm;
1411 u_char addrlo[ETHER_ADDR_LEN];
1412 u_char addrhi[ETHER_ADDR_LEN];
1413 int error;
1414
1415 ETHER_LOCK(ec);
1416 error = ether_multiaddr(sa, addrlo, addrhi);
1417 if (error != 0)
1418 goto error;
1419
1420 /*
1421 * Look up the address in our list.
1422 */
1423 enm = ether_lookup_multi(addrlo, addrhi, ec);
1424 if (enm == NULL) {
1425 error = ENXIO;
1426 goto error;
1427 }
1428 if (--enm->enm_refcount != 0) {
1429 /*
1430 * Still some claims to this record.
1431 */
1432 error = 0;
1433 goto error;
1434 }
1435
1436 /*
1437 * No remaining claims to this record; unlink and free it.
1438 */
1439 LIST_REMOVE(enm, enm_list);
1440 ec->ec_multicnt--;
1441 ETHER_UNLOCK(ec);
1442 kmem_free(enm, sizeof(*enm));
1443
1444 /*
1445 * Return ENETRESET to inform the driver that the list has changed
1446 * and its reception filter should be adjusted accordingly.
1447 */
1448 return ENETRESET;
1449
1450 error:
1451 ETHER_UNLOCK(ec);
1452 return error;
1453 }
1454
1455 void
1456 ether_set_ifflags_cb(struct ethercom *ec, ether_cb_t cb)
1457 {
1458 ec->ec_ifflags_cb = cb;
1459 }
1460
1461 void
1462 ether_set_vlan_cb(struct ethercom *ec, ether_vlancb_t cb)
1463 {
1464
1465 ec->ec_vlan_cb = cb;
1466 }
1467
1468 static int
1469 ether_ioctl_reinit(struct ethercom *ec)
1470 {
1471 struct ifnet *ifp = &ec->ec_if;
1472 int error;
1473
1474 KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
1475
1476 switch (ifp->if_flags & (IFF_UP | IFF_RUNNING)) {
1477 case IFF_RUNNING:
1478 /*
1479 * If interface is marked down and it is running,
1480 * then stop and disable it.
1481 */
1482 if_stop(ifp, 1);
1483 break;
1484 case IFF_UP:
1485 /*
1486 * If interface is marked up and it is stopped, then
1487 * start it.
1488 */
1489 return if_init(ifp);
1490 case IFF_UP | IFF_RUNNING:
1491 error = 0;
1492 if (ec->ec_ifflags_cb != NULL) {
1493 error = (*ec->ec_ifflags_cb)(ec);
1494 if (error == ENETRESET) {
1495 /*
1496 * Reset the interface to pick up
1497 * changes in any other flags that
1498 * affect the hardware state.
1499 */
1500 return if_init(ifp);
1501 }
1502 } else
1503 error = if_init(ifp);
1504 return error;
1505 case 0:
1506 break;
1507 }
1508
1509 return 0;
1510 }
1511
1512 /*
1513 * Common ioctls for Ethernet interfaces. Note, we must be
1514 * called at splnet().
1515 */
1516 int
1517 ether_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1518 {
1519 struct ethercom *ec = (void *)ifp;
1520 struct eccapreq *eccr;
1521 struct ifreq *ifr = (struct ifreq *)data;
1522 struct if_laddrreq *iflr = data;
1523 const struct sockaddr_dl *sdl;
1524 static const uint8_t zero[ETHER_ADDR_LEN];
1525 int error;
1526
1527 switch (cmd) {
1528 case SIOCINITIFADDR:
1529 {
1530 struct ifaddr *ifa = (struct ifaddr *)data;
1531 if (ifa->ifa_addr->sa_family != AF_LINK
1532 && (ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
1533 (IFF_UP | IFF_RUNNING)) {
1534 ifp->if_flags |= IFF_UP;
1535 if ((error = if_init(ifp)) != 0)
1536 return error;
1537 }
1538 #ifdef INET
1539 if (ifa->ifa_addr->sa_family == AF_INET)
1540 arp_ifinit(ifp, ifa);
1541 #endif
1542 return 0;
1543 }
1544
1545 case SIOCSIFMTU:
1546 {
1547 int maxmtu;
1548
1549 if (ec->ec_capabilities & ETHERCAP_JUMBO_MTU)
1550 maxmtu = ETHERMTU_JUMBO;
1551 else
1552 maxmtu = ETHERMTU;
1553
1554 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > maxmtu)
1555 return EINVAL;
1556 else if ((error = ifioctl_common(ifp, cmd, data)) != ENETRESET)
1557 return error;
1558 else if (ifp->if_flags & IFF_UP) {
1559 /* Make sure the device notices the MTU change. */
1560 return if_init(ifp);
1561 } else
1562 return 0;
1563 }
1564
1565 case SIOCSIFFLAGS:
1566 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
1567 return error;
1568 return ether_ioctl_reinit(ec);
1569 case SIOCGIFFLAGS:
1570 error = ifioctl_common(ifp, cmd, data);
1571 if (error == 0) {
1572 /* Set IFF_ALLMULTI for backcompat */
1573 ifr->ifr_flags |= (ec->ec_flags & ETHER_F_ALLMULTI) ?
1574 IFF_ALLMULTI : 0;
1575 }
1576 return error;
1577 case SIOCGETHERCAP:
1578 eccr = (struct eccapreq *)data;
1579 eccr->eccr_capabilities = ec->ec_capabilities;
1580 eccr->eccr_capenable = ec->ec_capenable;
1581 return 0;
1582 case SIOCSETHERCAP:
1583 eccr = (struct eccapreq *)data;
1584 if ((eccr->eccr_capenable & ~ec->ec_capabilities) != 0)
1585 return EINVAL;
1586 if (eccr->eccr_capenable == ec->ec_capenable)
1587 return 0;
1588 #if 0 /* notyet */
1589 ec->ec_capenable = (ec->ec_capenable & ETHERCAP_CANTCHANGE)
1590 | (eccr->eccr_capenable & ~ETHERCAP_CANTCHANGE);
1591 #else
1592 ec->ec_capenable = eccr->eccr_capenable;
1593 #endif
1594 return ether_ioctl_reinit(ec);
1595 case SIOCADDMULTI:
1596 return ether_addmulti(ifreq_getaddr(cmd, ifr), ec);
1597 case SIOCDELMULTI:
1598 return ether_delmulti(ifreq_getaddr(cmd, ifr), ec);
1599 case SIOCSIFMEDIA:
1600 case SIOCGIFMEDIA:
1601 if (ec->ec_mii != NULL)
1602 return ifmedia_ioctl(ifp, ifr, &ec->ec_mii->mii_media,
1603 cmd);
1604 else if (ec->ec_ifmedia != NULL)
1605 return ifmedia_ioctl(ifp, ifr, ec->ec_ifmedia, cmd);
1606 else
1607 return ENOTTY;
1608 break;
1609 case SIOCALIFADDR:
1610 sdl = satocsdl(sstocsa(&iflr->addr));
1611 if (sdl->sdl_family != AF_LINK)
1612 ;
1613 else if (ETHER_IS_MULTICAST(CLLADDR(sdl)))
1614 return EINVAL;
1615 else if (memcmp(zero, CLLADDR(sdl), sizeof(zero)) == 0)
1616 return EINVAL;
1617 /*FALLTHROUGH*/
1618 default:
1619 return ifioctl_common(ifp, cmd, data);
1620 }
1621 return 0;
1622 }
1623
1624 /*
1625 * Enable/disable passing VLAN packets if the parent interface supports it.
1626 * Return:
1627 * 0: Ok
1628 * -1: Parent interface does not support vlans
1629 * >0: Error
1630 */
1631 int
1632 ether_enable_vlan_mtu(struct ifnet *ifp)
1633 {
1634 int error;
1635 struct ethercom *ec = (void *)ifp;
1636
1637 /* Parent does not support VLAN's */
1638 if ((ec->ec_capabilities & ETHERCAP_VLAN_MTU) == 0)
1639 return -1;
1640
1641 /*
1642 * Parent supports the VLAN_MTU capability,
1643 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames;
1644 * enable it.
1645 */
1646 ec->ec_capenable |= ETHERCAP_VLAN_MTU;
1647
1648 /* Interface is down, defer for later */
1649 if ((ifp->if_flags & IFF_UP) == 0)
1650 return 0;
1651
1652 if ((error = if_flags_set(ifp, ifp->if_flags)) == 0)
1653 return 0;
1654
1655 ec->ec_capenable &= ~ETHERCAP_VLAN_MTU;
1656 return error;
1657 }
1658
1659 int
1660 ether_disable_vlan_mtu(struct ifnet *ifp)
1661 {
1662 int error;
1663 struct ethercom *ec = (void *)ifp;
1664
1665 /* We still have VLAN's, defer for later */
1666 if (ec->ec_nvlans != 0)
1667 return 0;
1668
1669 /* Parent does not support VLAB's, nothing to do. */
1670 if ((ec->ec_capenable & ETHERCAP_VLAN_MTU) == 0)
1671 return -1;
1672
1673 /*
1674 * Disable Tx/Rx of VLAN-sized frames.
1675 */
1676 ec->ec_capenable &= ~ETHERCAP_VLAN_MTU;
1677
1678 /* Interface is down, defer for later */
1679 if ((ifp->if_flags & IFF_UP) == 0)
1680 return 0;
1681
1682 if ((error = if_flags_set(ifp, ifp->if_flags)) == 0)
1683 return 0;
1684
1685 ec->ec_capenable |= ETHERCAP_VLAN_MTU;
1686 return error;
1687 }
1688
1689 /*
1690 * Add and delete VLAN TAG
1691 */
1692 int
1693 ether_add_vlantag(struct ifnet *ifp, uint16_t vtag, bool *vlanmtu_status)
1694 {
1695 struct ethercom *ec = (void *)ifp;
1696 struct vlanid_list *vidp;
1697 bool vlanmtu_enabled;
1698 uint16_t vid = EVL_VLANOFTAG(vtag);
1699 int error;
1700
1701 vlanmtu_enabled = false;
1702
1703 /* Add a vid to the list */
1704 vidp = kmem_alloc(sizeof(*vidp), KM_SLEEP);
1705 vidp->vid = vid;
1706
1707 ETHER_LOCK(ec);
1708 ec->ec_nvlans++;
1709 SIMPLEQ_INSERT_TAIL(&ec->ec_vids, vidp, vid_list);
1710 ETHER_UNLOCK(ec);
1711
1712 if (ec->ec_nvlans == 1) {
1713 IFNET_LOCK(ifp);
1714 error = ether_enable_vlan_mtu(ifp);
1715 IFNET_UNLOCK(ifp);
1716
1717 if (error == 0) {
1718 vlanmtu_enabled = true;
1719 } else if (error != -1) {
1720 goto fail;
1721 }
1722 }
1723
1724 if (ec->ec_vlan_cb != NULL) {
1725 error = (*ec->ec_vlan_cb)(ec, vid, true);
1726 if (error != 0)
1727 goto fail;
1728 }
1729
1730 if (vlanmtu_status != NULL)
1731 *vlanmtu_status = vlanmtu_enabled;
1732
1733 return 0;
1734 fail:
1735 ETHER_LOCK(ec);
1736 ec->ec_nvlans--;
1737 SIMPLEQ_REMOVE(&ec->ec_vids, vidp, vlanid_list, vid_list);
1738 ETHER_UNLOCK(ec);
1739
1740 if (vlanmtu_enabled) {
1741 IFNET_LOCK(ifp);
1742 (void)ether_disable_vlan_mtu(ifp);
1743 IFNET_UNLOCK(ifp);
1744 }
1745
1746 kmem_free(vidp, sizeof(*vidp));
1747
1748 return error;
1749 }
1750
1751 int
1752 ether_del_vlantag(struct ifnet *ifp, uint16_t vtag)
1753 {
1754 struct ethercom *ec = (void *)ifp;
1755 struct vlanid_list *vidp;
1756 uint16_t vid = EVL_VLANOFTAG(vtag);
1757
1758 ETHER_LOCK(ec);
1759 SIMPLEQ_FOREACH(vidp, &ec->ec_vids, vid_list) {
1760 if (vidp->vid == vid) {
1761 SIMPLEQ_REMOVE(&ec->ec_vids, vidp,
1762 vlanid_list, vid_list);
1763 ec->ec_nvlans--;
1764 break;
1765 }
1766 }
1767 ETHER_UNLOCK(ec);
1768
1769 if (vidp == NULL)
1770 return ENOENT;
1771
1772 if (ec->ec_vlan_cb != NULL) {
1773 (void)(*ec->ec_vlan_cb)(ec, vidp->vid, false);
1774 }
1775
1776 if (ec->ec_nvlans == 0) {
1777 IFNET_LOCK(ifp);
1778 (void)ether_disable_vlan_mtu(ifp);
1779 IFNET_UNLOCK(ifp);
1780 }
1781
1782 kmem_free(vidp, sizeof(*vidp));
1783
1784 return 0;
1785 }
1786
1787 int
1788 ether_inject_vlantag(struct mbuf **mp, uint16_t etype, uint16_t tag)
1789 {
1790 static const size_t min_data_len =
1791 ETHER_MIN_LEN - ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1792 /* Used to pad ethernet frames with < ETHER_MIN_LEN bytes */
1793 static const char vlan_zero_pad_buff[ETHER_MIN_LEN] = { 0 };
1794
1795 struct ether_vlan_header *evl;
1796 struct mbuf *m = *mp;
1797 int error;
1798
1799 error = 0;
1800
1801 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1802 if (m == NULL) {
1803 error = ENOBUFS;
1804 goto out;
1805 }
1806
1807 if (m->m_len < sizeof(*evl)) {
1808 m = m_pullup(m, sizeof(*evl));
1809 if (m == NULL) {
1810 error = ENOBUFS;
1811 goto out;
1812 }
1813 }
1814
1815 /*
1816 * Transform the Ethernet header into an
1817 * Ethernet header with 802.1Q encapsulation.
1818 */
1819 memmove(mtod(m, void *),
1820 mtod(m, char *) + ETHER_VLAN_ENCAP_LEN,
1821 sizeof(struct ether_header));
1822 evl = mtod(m, struct ether_vlan_header *);
1823 evl->evl_proto = evl->evl_encap_proto;
1824 evl->evl_encap_proto = htons(etype);
1825 evl->evl_tag = htons(tag);
1826
1827 /*
1828 * To cater for VLAN-aware layer 2 ethernet
1829 * switches which may need to strip the tag
1830 * before forwarding the packet, make sure
1831 * the packet+tag is at least 68 bytes long.
1832 * This is necessary because our parent will
1833 * only pad to 64 bytes (ETHER_MIN_LEN) and
1834 * some switches will not pad by themselves
1835 * after deleting a tag.
1836 */
1837 if (m->m_pkthdr.len < min_data_len) {
1838 m_copyback(m, m->m_pkthdr.len,
1839 min_data_len - m->m_pkthdr.len,
1840 vlan_zero_pad_buff);
1841 }
1842
1843 m->m_flags &= ~M_VLANTAG;
1844
1845 out:
1846 *mp = m;
1847 return error;
1848 }
1849
1850 struct mbuf *
1851 ether_strip_vlantag(struct mbuf *m)
1852 {
1853 struct ether_vlan_header *evl;
1854
1855 if (m->m_len < sizeof(*evl) &&
1856 (m = m_pullup(m, sizeof(*evl))) == NULL) {
1857 return NULL;
1858 }
1859
1860 if (m_makewritable(&m, 0, sizeof(*evl), M_DONTWAIT)) {
1861 m_freem(m);
1862 return NULL;
1863 }
1864
1865 evl = mtod(m, struct ether_vlan_header *);
1866 KASSERT(ntohs(evl->evl_encap_proto) == ETHERTYPE_VLAN);
1867
1868 vlan_set_tag(m, ntohs(evl->evl_tag));
1869
1870 /*
1871 * Restore the original ethertype. We'll remove
1872 * the encapsulation after we've found the vlan
1873 * interface corresponding to the tag.
1874 */
1875 evl->evl_encap_proto = evl->evl_proto;
1876
1877 /*
1878 * Remove the encapsulation header and append tag.
1879 * The original header has already been fixed up above.
1880 */
1881 vlan_set_tag(m, ntohs(evl->evl_tag));
1882 memmove((char *)evl + ETHER_VLAN_ENCAP_LEN, evl,
1883 offsetof(struct ether_vlan_header, evl_encap_proto));
1884 m_adj(m, ETHER_VLAN_ENCAP_LEN);
1885
1886 return m;
1887 }
1888
1889 static int
1890 ether_multicast_sysctl(SYSCTLFN_ARGS)
1891 {
1892 struct ether_multi *enm;
1893 struct ifnet *ifp;
1894 struct ethercom *ec;
1895 int error = 0;
1896 size_t written;
1897 struct psref psref;
1898 int bound;
1899 unsigned int multicnt;
1900 struct ether_multi_sysctl *addrs;
1901 int i;
1902
1903 if (namelen != 1)
1904 return EINVAL;
1905
1906 bound = curlwp_bind();
1907 ifp = if_get_byindex(name[0], &psref);
1908 if (ifp == NULL) {
1909 error = ENODEV;
1910 goto out;
1911 }
1912 if (ifp->if_type != IFT_ETHER) {
1913 if_put(ifp, &psref);
1914 *oldlenp = 0;
1915 goto out;
1916 }
1917 ec = (struct ethercom *)ifp;
1918
1919 if (oldp == NULL) {
1920 if_put(ifp, &psref);
1921 *oldlenp = ec->ec_multicnt * sizeof(*addrs);
1922 goto out;
1923 }
1924
1925 /*
1926 * ec->ec_lock is a spin mutex so we cannot call sysctl_copyout, which
1927 * is sleepable, while holding it. Copy data to a local buffer first
1928 * with the lock taken and then call sysctl_copyout without holding it.
1929 */
1930 retry:
1931 multicnt = ec->ec_multicnt;
1932
1933 if (multicnt == 0) {
1934 if_put(ifp, &psref);
1935 *oldlenp = 0;
1936 goto out;
1937 }
1938
1939 addrs = kmem_zalloc(sizeof(*addrs) * multicnt, KM_SLEEP);
1940
1941 ETHER_LOCK(ec);
1942 if (multicnt != ec->ec_multicnt) {
1943 /* The number of multicast addresses has changed */
1944 ETHER_UNLOCK(ec);
1945 kmem_free(addrs, sizeof(*addrs) * multicnt);
1946 goto retry;
1947 }
1948
1949 i = 0;
1950 LIST_FOREACH(enm, &ec->ec_multiaddrs, enm_list) {
1951 struct ether_multi_sysctl *addr = &addrs[i];
1952 addr->enm_refcount = enm->enm_refcount;
1953 memcpy(addr->enm_addrlo, enm->enm_addrlo, ETHER_ADDR_LEN);
1954 memcpy(addr->enm_addrhi, enm->enm_addrhi, ETHER_ADDR_LEN);
1955 i++;
1956 }
1957 ETHER_UNLOCK(ec);
1958
1959 error = 0;
1960 written = 0;
1961 for (i = 0; i < multicnt; i++) {
1962 struct ether_multi_sysctl *addr = &addrs[i];
1963
1964 if (written + sizeof(*addr) > *oldlenp)
1965 break;
1966 error = sysctl_copyout(l, addr, oldp, sizeof(*addr));
1967 if (error)
1968 break;
1969 written += sizeof(*addr);
1970 oldp = (char *)oldp + sizeof(*addr);
1971 }
1972 kmem_free(addrs, sizeof(*addrs) * multicnt);
1973
1974 if_put(ifp, &psref);
1975
1976 *oldlenp = written;
1977 out:
1978 curlwp_bindx(bound);
1979 return error;
1980 }
1981
1982 static void
1983 ether_sysctl_setup(struct sysctllog **clog)
1984 {
1985 const struct sysctlnode *rnode = NULL;
1986
1987 sysctl_createv(clog, 0, NULL, &rnode,
1988 CTLFLAG_PERMANENT,
1989 CTLTYPE_NODE, "ether",
1990 SYSCTL_DESCR("Ethernet-specific information"),
1991 NULL, 0, NULL, 0,
1992 CTL_NET, CTL_CREATE, CTL_EOL);
1993
1994 sysctl_createv(clog, 0, &rnode, NULL,
1995 CTLFLAG_PERMANENT,
1996 CTLTYPE_NODE, "multicast",
1997 SYSCTL_DESCR("multicast addresses"),
1998 ether_multicast_sysctl, 0, NULL, 0,
1999 CTL_CREATE, CTL_EOL);
2000
2001 sysctl_createv(clog, 0, &rnode, NULL,
2002 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
2003 CTLTYPE_STRING, "rps_hash",
2004 SYSCTL_DESCR("Interface rps hash function control"),
2005 sysctl_pktq_rps_hash_handler, 0, (void *)ðer_pktq_rps_hash_p,
2006 PKTQ_RPS_HASH_NAME_LEN,
2007 CTL_CREATE, CTL_EOL);
2008 }
2009
2010 void
2011 etherinit(void)
2012 {
2013
2014 #ifdef DIAGNOSTIC
2015 mutex_init(&bigpktpps_lock, MUTEX_DEFAULT, IPL_NET);
2016 #endif
2017 ether_pktq_rps_hash_p = pktq_rps_hash_default;
2018 ether_sysctl_setup(NULL);
2019 }
2020