if_ethersubr.c revision 1.316 1 /* $NetBSD: if_ethersubr.c,v 1.316 2022/09/03 00:31:02 thorpej Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)if_ethersubr.c 8.2 (Berkeley) 4/4/96
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: if_ethersubr.c,v 1.316 2022/09/03 00:31:02 thorpej Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_inet.h"
68 #include "opt_atalk.h"
69 #include "opt_mbuftrace.h"
70 #include "opt_mpls.h"
71 #include "opt_gateway.h"
72 #include "opt_pppoe.h"
73 #include "opt_net_mpsafe.h"
74 #endif
75
76 #include "vlan.h"
77 #include "pppoe.h"
78 #include "bridge.h"
79 #include "arp.h"
80 #include "agr.h"
81
82 #include <sys/sysctl.h>
83 #include <sys/mbuf.h>
84 #include <sys/mutex.h>
85 #include <sys/ioctl.h>
86 #include <sys/errno.h>
87 #include <sys/device.h>
88 #include <sys/entropy.h>
89 #include <sys/rndsource.h>
90 #include <sys/cpu.h>
91 #include <sys/kmem.h>
92 #include <sys/hook.h>
93
94 #include <net/if.h>
95 #include <net/netisr.h>
96 #include <net/route.h>
97 #include <net/if_llc.h>
98 #include <net/if_dl.h>
99 #include <net/if_types.h>
100 #include <net/pktqueue.h>
101
102 #include <net/if_media.h>
103 #include <dev/mii/mii.h>
104 #include <dev/mii/miivar.h>
105
106 #if NARP == 0
107 /*
108 * XXX there should really be a way to issue this warning from within config(8)
109 */
110 #error You have included NETATALK or a pseudo-device in your configuration that depends on the presence of ethernet interfaces, but have no such interfaces configured. Check if you really need pseudo-device bridge, pppoe, vlan or options NETATALK.
111 #endif
112
113 #include <net/bpf.h>
114
115 #include <net/if_ether.h>
116 #include <net/if_vlanvar.h>
117
118 #if NPPPOE > 0
119 #include <net/if_pppoe.h>
120 #endif
121
122 #if NAGR > 0
123 #include <net/ether_slowprotocols.h>
124 #include <net/agr/ieee8023ad.h>
125 #include <net/agr/if_agrvar.h>
126 #endif
127
128 #if NBRIDGE > 0
129 #include <net/if_bridgevar.h>
130 #endif
131
132 #include <netinet/in.h>
133 #ifdef INET
134 #include <netinet/in_var.h>
135 #endif
136 #include <netinet/if_inarp.h>
137
138 #ifdef INET6
139 #ifndef INET
140 #include <netinet/in.h>
141 #endif
142 #include <netinet6/in6_var.h>
143 #include <netinet6/nd6.h>
144 #endif
145
146 #include "carp.h"
147 #if NCARP > 0
148 #include <netinet/ip_carp.h>
149 #endif
150
151 #ifdef NETATALK
152 #include <netatalk/at.h>
153 #include <netatalk/at_var.h>
154 #include <netatalk/at_extern.h>
155
156 #define llc_snap_org_code llc_un.type_snap.org_code
157 #define llc_snap_ether_type llc_un.type_snap.ether_type
158
159 extern u_char at_org_code[3];
160 extern u_char aarp_org_code[3];
161 #endif /* NETATALK */
162
163 #ifdef MPLS
164 #include <netmpls/mpls.h>
165 #include <netmpls/mpls_var.h>
166 #endif
167
168 CTASSERT(sizeof(struct ether_addr) == 6);
169 CTASSERT(sizeof(struct ether_header) == 14);
170
171 #ifdef DIAGNOSTIC
172 static struct timeval bigpktppslim_last;
173 static int bigpktppslim = 2; /* XXX */
174 static int bigpktpps_count;
175 static kmutex_t bigpktpps_lock __cacheline_aligned;
176 #endif
177
178 const uint8_t etherbroadcastaddr[ETHER_ADDR_LEN] =
179 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
180 const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
181 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
182 #define senderr(e) { error = (e); goto bad;}
183
184 static pktq_rps_hash_func_t ether_pktq_rps_hash_p;
185
186 static int ether_output(struct ifnet *, struct mbuf *,
187 const struct sockaddr *, const struct rtentry *);
188
189 /*
190 * Ethernet output routine.
191 * Encapsulate a packet of type family for the local net.
192 * Assumes that ifp is actually pointer to ethercom structure.
193 */
194 static int
195 ether_output(struct ifnet * const ifp0, struct mbuf * const m0,
196 const struct sockaddr * const dst, const struct rtentry *rt)
197 {
198 uint8_t esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
199 uint16_t etype = 0;
200 int error = 0, hdrcmplt = 0;
201 struct mbuf *m = m0;
202 struct mbuf *mcopy = NULL;
203 struct ether_header *eh;
204 struct ifnet *ifp = ifp0;
205 #ifdef INET
206 struct arphdr *ah;
207 #endif
208 #ifdef NETATALK
209 struct at_ifaddr *aa;
210 #endif
211
212 #ifdef MBUFTRACE
213 m_claimm(m, ifp->if_mowner);
214 #endif
215
216 #if NCARP > 0
217 if (ifp->if_type == IFT_CARP) {
218 struct ifaddr *ifa;
219 int s = pserialize_read_enter();
220
221 /* loop back if this is going to the carp interface */
222 if (dst != NULL && ifp0->if_link_state == LINK_STATE_UP &&
223 (ifa = ifa_ifwithaddr(dst)) != NULL) {
224 if (ifa->ifa_ifp == ifp0) {
225 pserialize_read_exit(s);
226 return looutput(ifp0, m, dst, rt);
227 }
228 }
229 pserialize_read_exit(s);
230
231 ifp = ifp->if_carpdev;
232 /* ac = (struct arpcom *)ifp; */
233
234 if ((ifp0->if_flags & (IFF_UP | IFF_RUNNING)) !=
235 (IFF_UP | IFF_RUNNING))
236 senderr(ENETDOWN);
237 }
238 #endif
239
240 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING))
241 senderr(ENETDOWN);
242
243 switch (dst->sa_family) {
244
245 #ifdef INET
246 case AF_INET:
247 if (m->m_flags & M_BCAST) {
248 memcpy(edst, etherbroadcastaddr, sizeof(edst));
249 } else if (m->m_flags & M_MCAST) {
250 ETHER_MAP_IP_MULTICAST(&satocsin(dst)->sin_addr, edst);
251 } else {
252 error = arpresolve(ifp0, rt, m, dst, edst, sizeof(edst));
253 if (error)
254 return (error == EWOULDBLOCK) ? 0 : error;
255 }
256 /* If broadcasting on a simplex interface, loopback a copy */
257 if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
258 mcopy = m_copypacket(m, M_DONTWAIT);
259 etype = htons(ETHERTYPE_IP);
260 break;
261
262 case AF_ARP:
263 ah = mtod(m, struct arphdr *);
264 if (m->m_flags & M_BCAST) {
265 memcpy(edst, etherbroadcastaddr, sizeof(edst));
266 } else {
267 void *tha = ar_tha(ah);
268
269 if (tha == NULL) {
270 /* fake with ARPHRD_IEEE1394 */
271 m_freem(m);
272 return 0;
273 }
274 memcpy(edst, tha, sizeof(edst));
275 }
276
277 ah->ar_hrd = htons(ARPHRD_ETHER);
278
279 switch (ntohs(ah->ar_op)) {
280 case ARPOP_REVREQUEST:
281 case ARPOP_REVREPLY:
282 etype = htons(ETHERTYPE_REVARP);
283 break;
284
285 case ARPOP_REQUEST:
286 case ARPOP_REPLY:
287 default:
288 etype = htons(ETHERTYPE_ARP);
289 }
290 break;
291 #endif
292
293 #ifdef INET6
294 case AF_INET6:
295 if (m->m_flags & M_BCAST) {
296 memcpy(edst, etherbroadcastaddr, sizeof(edst));
297 } else if (m->m_flags & M_MCAST) {
298 ETHER_MAP_IPV6_MULTICAST(&satocsin6(dst)->sin6_addr,
299 edst);
300 } else {
301 error = nd6_resolve(ifp0, rt, m, dst, edst,
302 sizeof(edst));
303 if (error)
304 return (error == EWOULDBLOCK) ? 0 : error;
305 }
306 etype = htons(ETHERTYPE_IPV6);
307 break;
308 #endif
309
310 #ifdef NETATALK
311 case AF_APPLETALK: {
312 struct ifaddr *ifa;
313 int s;
314
315 KERNEL_LOCK(1, NULL);
316
317 if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst)) {
318 KERNEL_UNLOCK_ONE(NULL);
319 return 0;
320 }
321
322 /*
323 * ifaddr is the first thing in at_ifaddr
324 */
325 s = pserialize_read_enter();
326 ifa = at_ifawithnet((const struct sockaddr_at *)dst, ifp);
327 if (ifa == NULL) {
328 pserialize_read_exit(s);
329 KERNEL_UNLOCK_ONE(NULL);
330 senderr(EADDRNOTAVAIL);
331 }
332 aa = (struct at_ifaddr *)ifa;
333
334 /*
335 * In the phase 2 case, we need to prepend an mbuf for the
336 * llc header.
337 */
338 if (aa->aa_flags & AFA_PHASE2) {
339 struct llc llc;
340
341 M_PREPEND(m, sizeof(struct llc), M_DONTWAIT);
342 if (m == NULL) {
343 pserialize_read_exit(s);
344 KERNEL_UNLOCK_ONE(NULL);
345 senderr(ENOBUFS);
346 }
347
348 llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
349 llc.llc_control = LLC_UI;
350 memcpy(llc.llc_snap_org_code, at_org_code,
351 sizeof(llc.llc_snap_org_code));
352 llc.llc_snap_ether_type = htons(ETHERTYPE_ATALK);
353 memcpy(mtod(m, void *), &llc, sizeof(struct llc));
354 } else {
355 etype = htons(ETHERTYPE_ATALK);
356 }
357 pserialize_read_exit(s);
358 KERNEL_UNLOCK_ONE(NULL);
359 break;
360 }
361 #endif /* NETATALK */
362
363 case pseudo_AF_HDRCMPLT:
364 hdrcmplt = 1;
365 memcpy(esrc,
366 ((const struct ether_header *)dst->sa_data)->ether_shost,
367 sizeof(esrc));
368 /* FALLTHROUGH */
369
370 case AF_UNSPEC:
371 memcpy(edst,
372 ((const struct ether_header *)dst->sa_data)->ether_dhost,
373 sizeof(edst));
374 /* AF_UNSPEC doesn't swap the byte order of the ether_type. */
375 etype = ((const struct ether_header *)dst->sa_data)->ether_type;
376 break;
377
378 default:
379 printf("%s: can't handle af%d\n", ifp->if_xname,
380 dst->sa_family);
381 senderr(EAFNOSUPPORT);
382 }
383
384 #ifdef MPLS
385 {
386 struct m_tag *mtag;
387 mtag = m_tag_find(m, PACKET_TAG_MPLS);
388 if (mtag != NULL) {
389 /* Having the tag itself indicates it's MPLS */
390 etype = htons(ETHERTYPE_MPLS);
391 m_tag_delete(m, mtag);
392 }
393 }
394 #endif
395
396 if (mcopy)
397 (void)looutput(ifp, mcopy, dst, rt);
398
399 KASSERT((m->m_flags & M_PKTHDR) != 0);
400
401 /*
402 * If no ether type is set, this must be a 802.2 formatted packet.
403 */
404 if (etype == 0)
405 etype = htons(m->m_pkthdr.len);
406
407 /*
408 * Add local net header. If no space in first mbuf, allocate another.
409 */
410 M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
411 if (m == NULL)
412 senderr(ENOBUFS);
413
414 eh = mtod(m, struct ether_header *);
415 /* Note: etype is already in network byte order. */
416 memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
417 memcpy(eh->ether_dhost, edst, sizeof(edst));
418 if (hdrcmplt) {
419 memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost));
420 } else {
421 memcpy(eh->ether_shost, CLLADDR(ifp->if_sadl),
422 sizeof(eh->ether_shost));
423 }
424
425 #if NCARP > 0
426 if (ifp0 != ifp && ifp0->if_type == IFT_CARP) {
427 memcpy(eh->ether_shost, CLLADDR(ifp0->if_sadl),
428 sizeof(eh->ether_shost));
429 }
430 #endif
431
432 if ((error = pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_OUT)) != 0)
433 return error;
434 if (m == NULL)
435 return 0;
436
437 #if NBRIDGE > 0
438 /*
439 * Bridges require special output handling.
440 */
441 if (ifp->if_bridge)
442 return bridge_output(ifp, m, NULL, NULL);
443 #endif
444
445 #if NCARP > 0
446 if (ifp != ifp0)
447 if_statadd(ifp0, if_obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
448 #endif
449
450 #ifdef ALTQ
451 KERNEL_LOCK(1, NULL);
452 /*
453 * If ALTQ is enabled on the parent interface, do
454 * classification; the queueing discipline might not
455 * require classification, but might require the
456 * address family/header pointer in the pktattr.
457 */
458 if (ALTQ_IS_ENABLED(&ifp->if_snd))
459 altq_etherclassify(&ifp->if_snd, m);
460 KERNEL_UNLOCK_ONE(NULL);
461 #endif
462 return ifq_enqueue(ifp, m);
463
464 bad:
465 if_statinc(ifp, if_oerrors);
466 if (m)
467 m_freem(m);
468 return error;
469 }
470
471 #ifdef ALTQ
472 /*
473 * This routine is a slight hack to allow a packet to be classified
474 * if the Ethernet headers are present. It will go away when ALTQ's
475 * classification engine understands link headers.
476 *
477 * XXX: We may need to do m_pullups here. First to ensure struct ether_header
478 * is indeed contiguous, then to read the LLC and so on.
479 */
480 void
481 altq_etherclassify(struct ifaltq *ifq, struct mbuf *m)
482 {
483 struct ether_header *eh;
484 struct mbuf *mtop = m;
485 uint16_t ether_type;
486 int hlen, af, hdrsize;
487 void *hdr;
488
489 KASSERT((mtop->m_flags & M_PKTHDR) != 0);
490
491 hlen = ETHER_HDR_LEN;
492 eh = mtod(m, struct ether_header *);
493
494 ether_type = htons(eh->ether_type);
495
496 if (ether_type < ETHERMTU) {
497 /* LLC/SNAP */
498 struct llc *llc = (struct llc *)(eh + 1);
499 hlen += 8;
500
501 if (m->m_len < hlen ||
502 llc->llc_dsap != LLC_SNAP_LSAP ||
503 llc->llc_ssap != LLC_SNAP_LSAP ||
504 llc->llc_control != LLC_UI) {
505 /* Not SNAP. */
506 goto bad;
507 }
508
509 ether_type = htons(llc->llc_un.type_snap.ether_type);
510 }
511
512 switch (ether_type) {
513 case ETHERTYPE_IP:
514 af = AF_INET;
515 hdrsize = 20; /* sizeof(struct ip) */
516 break;
517
518 case ETHERTYPE_IPV6:
519 af = AF_INET6;
520 hdrsize = 40; /* sizeof(struct ip6_hdr) */
521 break;
522
523 default:
524 af = AF_UNSPEC;
525 hdrsize = 0;
526 break;
527 }
528
529 while (m->m_len <= hlen) {
530 hlen -= m->m_len;
531 m = m->m_next;
532 if (m == NULL)
533 goto bad;
534 }
535
536 if (m->m_len < (hlen + hdrsize)) {
537 /*
538 * protocol header not in a single mbuf.
539 * We can't cope with this situation right
540 * now (but it shouldn't ever happen, really, anyhow).
541 */
542 #ifdef DEBUG
543 printf("altq_etherclassify: headers span multiple mbufs: "
544 "%d < %d\n", m->m_len, (hlen + hdrsize));
545 #endif
546 goto bad;
547 }
548
549 m->m_data += hlen;
550 m->m_len -= hlen;
551
552 hdr = mtod(m, void *);
553
554 if (ALTQ_NEEDS_CLASSIFY(ifq)) {
555 mtop->m_pkthdr.pattr_class =
556 (*ifq->altq_classify)(ifq->altq_clfier, m, af);
557 }
558 mtop->m_pkthdr.pattr_af = af;
559 mtop->m_pkthdr.pattr_hdr = hdr;
560
561 m->m_data -= hlen;
562 m->m_len += hlen;
563
564 return;
565
566 bad:
567 mtop->m_pkthdr.pattr_class = NULL;
568 mtop->m_pkthdr.pattr_hdr = NULL;
569 mtop->m_pkthdr.pattr_af = AF_UNSPEC;
570 }
571 #endif /* ALTQ */
572
573 #if defined (LLC) || defined (NETATALK)
574 static void
575 ether_input_llc(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh)
576 {
577 struct ifqueue *inq = NULL;
578 int isr = 0;
579 struct llc *l;
580
581 if (m->m_len < sizeof(*eh) + sizeof(struct llc))
582 goto error;
583
584 l = (struct llc *)(eh+1);
585 switch (l->llc_dsap) {
586 #ifdef NETATALK
587 case LLC_SNAP_LSAP:
588 switch (l->llc_control) {
589 case LLC_UI:
590 if (l->llc_ssap != LLC_SNAP_LSAP)
591 goto error;
592
593 if (memcmp(&(l->llc_snap_org_code)[0],
594 at_org_code, sizeof(at_org_code)) == 0 &&
595 ntohs(l->llc_snap_ether_type) ==
596 ETHERTYPE_ATALK) {
597 inq = &atintrq2;
598 m_adj(m, sizeof(struct ether_header)
599 + sizeof(struct llc));
600 isr = NETISR_ATALK;
601 break;
602 }
603
604 if (memcmp(&(l->llc_snap_org_code)[0],
605 aarp_org_code,
606 sizeof(aarp_org_code)) == 0 &&
607 ntohs(l->llc_snap_ether_type) ==
608 ETHERTYPE_AARP) {
609 m_adj(m, sizeof(struct ether_header)
610 + sizeof(struct llc));
611 aarpinput(ifp, m); /* XXX queue? */
612 return;
613 }
614
615 default:
616 goto error;
617 }
618 break;
619 #endif
620 default:
621 goto noproto;
622 }
623
624 KASSERT(inq != NULL);
625 IFQ_ENQUEUE_ISR(inq, m, isr);
626 return;
627
628 noproto:
629 m_freem(m);
630 if_statinc(ifp, if_noproto);
631 return;
632 error:
633 m_freem(m);
634 if_statinc(ifp, if_ierrors);
635 return;
636 }
637 #endif /* defined (LLC) || defined (NETATALK) */
638
639 /*
640 * Process a received Ethernet packet;
641 * the packet is in the mbuf chain m with
642 * the ether header.
643 */
644 void
645 ether_input(struct ifnet *ifp, struct mbuf *m)
646 {
647 #if NVLAN > 0 || defined(MBUFTRACE)
648 struct ethercom *ec = (struct ethercom *) ifp;
649 #endif
650 pktqueue_t *pktq = NULL;
651 struct ifqueue *inq = NULL;
652 uint16_t etype;
653 struct ether_header *eh;
654 size_t ehlen;
655 static int earlypkts;
656 int isr = 0;
657
658 /* No RPS for not-IP. */
659 pktq_rps_hash_func_t rps_hash = NULL;
660
661 KASSERT(!cpu_intr_p());
662 KASSERT((m->m_flags & M_PKTHDR) != 0);
663
664 if ((ifp->if_flags & IFF_UP) == 0)
665 goto drop;
666
667 #ifdef MBUFTRACE
668 m_claimm(m, &ec->ec_rx_mowner);
669 #endif
670
671 if (__predict_false(m->m_len < sizeof(*eh))) {
672 if ((m = m_pullup(m, sizeof(*eh))) == NULL) {
673 if_statinc(ifp, if_ierrors);
674 return;
675 }
676 }
677
678 eh = mtod(m, struct ether_header *);
679 etype = ntohs(eh->ether_type);
680 ehlen = sizeof(*eh);
681
682 if (__predict_false(earlypkts < 100 ||
683 entropy_epoch() == (unsigned)-1)) {
684 rnd_add_data(NULL, eh, ehlen, 0);
685 earlypkts++;
686 }
687
688 /*
689 * Determine if the packet is within its size limits. For MPLS the
690 * header length is variable, so we skip the check.
691 */
692 if (etype != ETHERTYPE_MPLS && m->m_pkthdr.len >
693 ETHER_MAX_FRAME(ifp, etype, m->m_flags & M_HASFCS)) {
694 #ifdef DIAGNOSTIC
695 mutex_enter(&bigpktpps_lock);
696 if (ppsratecheck(&bigpktppslim_last, &bigpktpps_count,
697 bigpktppslim)) {
698 printf("%s: discarding oversize frame (len=%d)\n",
699 ifp->if_xname, m->m_pkthdr.len);
700 }
701 mutex_exit(&bigpktpps_lock);
702 #endif
703 goto error;
704 }
705
706 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
707 /*
708 * If this is not a simplex interface, drop the packet
709 * if it came from us.
710 */
711 if ((ifp->if_flags & IFF_SIMPLEX) == 0 &&
712 memcmp(CLLADDR(ifp->if_sadl), eh->ether_shost,
713 ETHER_ADDR_LEN) == 0) {
714 goto drop;
715 }
716
717 if (memcmp(etherbroadcastaddr,
718 eh->ether_dhost, ETHER_ADDR_LEN) == 0)
719 m->m_flags |= M_BCAST;
720 else
721 m->m_flags |= M_MCAST;
722 if_statinc(ifp, if_imcasts);
723 }
724
725 /* If the CRC is still on the packet, trim it off. */
726 if (m->m_flags & M_HASFCS) {
727 m_adj(m, -ETHER_CRC_LEN);
728 m->m_flags &= ~M_HASFCS;
729 }
730
731 if_statadd(ifp, if_ibytes, m->m_pkthdr.len);
732
733 if (!vlan_has_tag(m) && etype == ETHERTYPE_VLAN) {
734 m = ether_strip_vlantag(m);
735 if (m == NULL) {
736 if_statinc(ifp, if_ierrors);
737 return;
738 }
739
740 eh = mtod(m, struct ether_header *);
741 etype = ntohs(eh->ether_type);
742 ehlen = sizeof(*eh);
743 }
744
745 if ((m->m_flags & (M_BCAST | M_MCAST | M_PROMISC)) == 0 &&
746 (ifp->if_flags & IFF_PROMISC) != 0 &&
747 memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
748 ETHER_ADDR_LEN) != 0) {
749 m->m_flags |= M_PROMISC;
750 }
751
752 if ((m->m_flags & M_PROMISC) == 0) {
753 if (pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_IN) != 0)
754 return;
755 if (m == NULL)
756 return;
757
758 eh = mtod(m, struct ether_header *);
759 etype = ntohs(eh->ether_type);
760 }
761
762 /*
763 * Processing a logical interfaces that are able
764 * to configure vlan(4).
765 */
766 #if NAGR > 0
767 if (ifp->if_lagg != NULL &&
768 __predict_true(etype != ETHERTYPE_SLOWPROTOCOLS)) {
769 m->m_flags &= ~M_PROMISC;
770 agr_input(ifp, m);
771 return;
772 }
773 #endif
774
775 /*
776 * VLAN processing.
777 *
778 * VLAN provides service delimiting so the frames are
779 * processed before other handlings. If a VLAN interface
780 * does not exist to take those frames, they're returned
781 * to ether_input().
782 */
783
784 if (vlan_has_tag(m)) {
785 if (EVL_VLANOFTAG(vlan_get_tag(m)) == 0) {
786 if (etype == ETHERTYPE_VLAN ||
787 etype == ETHERTYPE_QINQ)
788 goto drop;
789
790 /* XXX we should actually use the prio value? */
791 m->m_flags &= ~M_VLANTAG;
792 } else {
793 #if NVLAN > 0
794 if (ec->ec_nvlans > 0) {
795 m = vlan_input(ifp, m);
796
797 /* vlan_input() called ether_input() recursively */
798 if (m == NULL)
799 return;
800 }
801 #endif
802 /* drop VLAN frames not for this port. */
803 goto noproto;
804 }
805 }
806
807 #if NCARP > 0
808 if (__predict_false(ifp->if_carp && ifp->if_type != IFT_CARP)) {
809 /*
810 * Clear M_PROMISC, in case the packet comes from a
811 * vlan.
812 */
813 m->m_flags &= ~M_PROMISC;
814 if (carp_input(m, (uint8_t *)&eh->ether_shost,
815 (uint8_t *)&eh->ether_dhost, eh->ether_type) == 0)
816 return;
817 }
818 #endif
819
820 /*
821 * Handle protocols that expect to have the Ethernet header
822 * (and possibly FCS) intact.
823 */
824 switch (etype) {
825 #if NPPPOE > 0
826 case ETHERTYPE_PPPOEDISC:
827 pppoedisc_input(ifp, m);
828 return;
829
830 case ETHERTYPE_PPPOE:
831 pppoe_input(ifp, m);
832 return;
833 #endif
834
835 case ETHERTYPE_SLOWPROTOCOLS: {
836 uint8_t subtype;
837
838 if (m->m_pkthdr.len < sizeof(*eh) + sizeof(subtype))
839 goto error;
840
841 m_copydata(m, sizeof(*eh), sizeof(subtype), &subtype);
842 switch (subtype) {
843 #if NAGR > 0
844 case SLOWPROTOCOLS_SUBTYPE_LACP:
845 if (ifp->if_lagg != NULL) {
846 ieee8023ad_lacp_input(ifp, m);
847 return;
848 }
849 break;
850
851 case SLOWPROTOCOLS_SUBTYPE_MARKER:
852 if (ifp->if_lagg != NULL) {
853 ieee8023ad_marker_input(ifp, m);
854 return;
855 }
856 break;
857 #endif
858
859 default:
860 if (subtype == 0 || subtype > 10) {
861 /* illegal value */
862 goto error;
863 }
864 /* unknown subtype */
865 break;
866 }
867 }
868 /* FALLTHROUGH */
869 default:
870 if (m->m_flags & M_PROMISC)
871 goto drop;
872 }
873
874 /* If the CRC is still on the packet, trim it off. */
875 if (m->m_flags & M_HASFCS) {
876 m_adj(m, -ETHER_CRC_LEN);
877 m->m_flags &= ~M_HASFCS;
878 }
879
880 /* etype represents the size of the payload in this case */
881 if (etype <= ETHERMTU + sizeof(struct ether_header)) {
882 KASSERT(ehlen == sizeof(*eh));
883 #if defined (LLC) || defined (NETATALK)
884 ether_input_llc(ifp, m, eh);
885 return;
886 #else
887 /* ethertype of 0-1500 is regarded as noproto */
888 goto noproto;
889 #endif
890 }
891
892 /* Strip off the Ethernet header. */
893 m_adj(m, ehlen);
894
895 switch (etype) {
896 #ifdef INET
897 case ETHERTYPE_IP:
898 #ifdef GATEWAY
899 if (ipflow_fastforward(m))
900 return;
901 #endif
902 pktq = ip_pktq;
903 rps_hash = atomic_load_relaxed(ðer_pktq_rps_hash_p);
904 break;
905
906 case ETHERTYPE_ARP:
907 isr = NETISR_ARP;
908 inq = &arpintrq;
909 break;
910
911 case ETHERTYPE_REVARP:
912 revarpinput(m); /* XXX queue? */
913 return;
914 #endif
915
916 #ifdef INET6
917 case ETHERTYPE_IPV6:
918 if (__predict_false(!in6_present))
919 goto noproto;
920 #ifdef GATEWAY
921 if (ip6flow_fastforward(&m))
922 return;
923 #endif
924 pktq = ip6_pktq;
925 rps_hash = atomic_load_relaxed(ðer_pktq_rps_hash_p);
926 break;
927 #endif
928
929 #ifdef NETATALK
930 case ETHERTYPE_ATALK:
931 isr = NETISR_ATALK;
932 inq = &atintrq1;
933 break;
934
935 case ETHERTYPE_AARP:
936 aarpinput(ifp, m); /* XXX queue? */
937 return;
938 #endif
939
940 #ifdef MPLS
941 case ETHERTYPE_MPLS:
942 isr = NETISR_MPLS;
943 inq = &mplsintrq;
944 break;
945 #endif
946
947 default:
948 goto noproto;
949 }
950
951 if (__predict_true(pktq)) {
952 const uint32_t h = rps_hash ? pktq_rps_hash(&rps_hash, m) : 0;
953 if (__predict_false(!pktq_enqueue(pktq, m, h))) {
954 m_freem(m);
955 }
956 return;
957 }
958
959 if (__predict_false(!inq)) {
960 /* Should not happen. */
961 goto error;
962 }
963
964 IFQ_ENQUEUE_ISR(inq, m, isr);
965 return;
966
967 drop:
968 m_freem(m);
969 if_statinc(ifp, if_iqdrops);
970 return;
971 noproto:
972 m_freem(m);
973 if_statinc(ifp, if_noproto);
974 return;
975 error:
976 m_freem(m);
977 if_statinc(ifp, if_ierrors);
978 return;
979 }
980
981 static void
982 ether_bpf_mtap(struct bpf_if *bp, struct mbuf *m, u_int direction)
983 {
984 struct ether_vlan_header evl;
985 struct m_hdr mh, md;
986
987 KASSERT(bp != NULL);
988
989 if (!vlan_has_tag(m)) {
990 bpf_mtap3(bp, m, direction);
991 return;
992 }
993
994 memcpy(&evl, mtod(m, char *), ETHER_HDR_LEN);
995 evl.evl_proto = evl.evl_encap_proto;
996 evl.evl_encap_proto = htons(ETHERTYPE_VLAN);
997 evl.evl_tag = htons(vlan_get_tag(m));
998
999 md.mh_flags = 0;
1000 md.mh_data = m->m_data + ETHER_HDR_LEN;
1001 md.mh_len = m->m_len - ETHER_HDR_LEN;
1002 md.mh_next = m->m_next;
1003
1004 mh.mh_flags = 0;
1005 mh.mh_data = (char *)&evl;
1006 mh.mh_len = sizeof(evl);
1007 mh.mh_next = (struct mbuf *)&md;
1008
1009 bpf_mtap3(bp, (struct mbuf *)&mh, direction);
1010 }
1011
1012 /*
1013 * Convert Ethernet address to printable (loggable) representation.
1014 */
1015 char *
1016 ether_sprintf(const u_char *ap)
1017 {
1018 static char etherbuf[3 * ETHER_ADDR_LEN];
1019 return ether_snprintf(etherbuf, sizeof(etherbuf), ap);
1020 }
1021
1022 char *
1023 ether_snprintf(char *buf, size_t len, const u_char *ap)
1024 {
1025 char *cp = buf;
1026 size_t i;
1027
1028 for (i = 0; i < len / 3; i++) {
1029 *cp++ = hexdigits[*ap >> 4];
1030 *cp++ = hexdigits[*ap++ & 0xf];
1031 *cp++ = ':';
1032 }
1033 *--cp = '\0';
1034 return buf;
1035 }
1036
1037 /*
1038 * Perform common duties while attaching to interface list
1039 */
1040 void
1041 ether_ifattach(struct ifnet *ifp, const uint8_t *lla)
1042 {
1043 struct ethercom *ec = (struct ethercom *)ifp;
1044 char xnamebuf[HOOKNAMSIZ];
1045
1046 ifp->if_type = IFT_ETHER;
1047 ifp->if_hdrlen = ETHER_HDR_LEN;
1048 ifp->if_dlt = DLT_EN10MB;
1049 ifp->if_mtu = ETHERMTU;
1050 ifp->if_output = ether_output;
1051 ifp->_if_input = ether_input;
1052 ifp->if_bpf_mtap = ether_bpf_mtap;
1053 if (ifp->if_baudrate == 0)
1054 ifp->if_baudrate = IF_Mbps(10); /* just a default */
1055
1056 if (lla != NULL)
1057 if_set_sadl(ifp, lla, ETHER_ADDR_LEN, !ETHER_IS_LOCAL(lla));
1058
1059 LIST_INIT(&ec->ec_multiaddrs);
1060 SIMPLEQ_INIT(&ec->ec_vids);
1061 ec->ec_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
1062 ec->ec_flags = 0;
1063 ifp->if_broadcastaddr = etherbroadcastaddr;
1064 bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1065 snprintf(xnamebuf, sizeof(xnamebuf),
1066 "%s-ether_ifdetachhooks", ifp->if_xname);
1067 ec->ec_ifdetach_hooks = simplehook_create(IPL_NET, xnamebuf);
1068 #ifdef MBUFTRACE
1069 mowner_init_owner(&ec->ec_tx_mowner, ifp->if_xname, "tx");
1070 mowner_init_owner(&ec->ec_rx_mowner, ifp->if_xname, "rx");
1071 MOWNER_ATTACH(&ec->ec_tx_mowner);
1072 MOWNER_ATTACH(&ec->ec_rx_mowner);
1073 ifp->if_mowner = &ec->ec_tx_mowner;
1074 #endif
1075 }
1076
1077 void
1078 ether_ifdetach(struct ifnet *ifp)
1079 {
1080 struct ethercom *ec = (void *) ifp;
1081 struct ether_multi *enm;
1082
1083 IFNET_ASSERT_UNLOCKED(ifp);
1084 /*
1085 * Prevent further calls to ioctl (for example turning off
1086 * promiscuous mode from the bridge code), which eventually can
1087 * call if_init() which can cause panics because the interface
1088 * is in the process of being detached. Return device not configured
1089 * instead.
1090 */
1091 ifp->if_ioctl = __FPTRCAST(int (*)(struct ifnet *, u_long, void *),
1092 enxio);
1093
1094 simplehook_dohooks(ec->ec_ifdetach_hooks);
1095 KASSERT(!simplehook_has_hooks(ec->ec_ifdetach_hooks));
1096 simplehook_destroy(ec->ec_ifdetach_hooks);
1097
1098 bpf_detach(ifp);
1099
1100 ETHER_LOCK(ec);
1101 KASSERT(ec->ec_nvlans == 0);
1102 while ((enm = LIST_FIRST(&ec->ec_multiaddrs)) != NULL) {
1103 LIST_REMOVE(enm, enm_list);
1104 kmem_free(enm, sizeof(*enm));
1105 ec->ec_multicnt--;
1106 }
1107 ETHER_UNLOCK(ec);
1108
1109 mutex_obj_free(ec->ec_lock);
1110 ec->ec_lock = NULL;
1111
1112 ifp->if_mowner = NULL;
1113 MOWNER_DETACH(&ec->ec_rx_mowner);
1114 MOWNER_DETACH(&ec->ec_tx_mowner);
1115 }
1116
1117 void *
1118 ether_ifdetachhook_establish(struct ifnet *ifp,
1119 void (*fn)(void *), void *arg)
1120 {
1121 struct ethercom *ec;
1122 khook_t *hk;
1123
1124 if (ifp->if_type != IFT_ETHER)
1125 return NULL;
1126
1127 ec = (struct ethercom *)ifp;
1128 hk = simplehook_establish(ec->ec_ifdetach_hooks,
1129 fn, arg);
1130
1131 return (void *)hk;
1132 }
1133
1134 void
1135 ether_ifdetachhook_disestablish(struct ifnet *ifp,
1136 void *vhook, kmutex_t *lock)
1137 {
1138 struct ethercom *ec;
1139
1140 if (vhook == NULL)
1141 return;
1142
1143 ec = (struct ethercom *)ifp;
1144 simplehook_disestablish(ec->ec_ifdetach_hooks, vhook, lock);
1145 }
1146
1147 #if 0
1148 /*
1149 * This is for reference. We have a table-driven version
1150 * of the little-endian crc32 generator, which is faster
1151 * than the double-loop.
1152 */
1153 uint32_t
1154 ether_crc32_le(const uint8_t *buf, size_t len)
1155 {
1156 uint32_t c, crc, carry;
1157 size_t i, j;
1158
1159 crc = 0xffffffffU; /* initial value */
1160
1161 for (i = 0; i < len; i++) {
1162 c = buf[i];
1163 for (j = 0; j < 8; j++) {
1164 carry = ((crc & 0x01) ? 1 : 0) ^ (c & 0x01);
1165 crc >>= 1;
1166 c >>= 1;
1167 if (carry)
1168 crc = (crc ^ ETHER_CRC_POLY_LE);
1169 }
1170 }
1171
1172 return (crc);
1173 }
1174 #else
1175 uint32_t
1176 ether_crc32_le(const uint8_t *buf, size_t len)
1177 {
1178 static const uint32_t crctab[] = {
1179 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
1180 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
1181 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
1182 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
1183 };
1184 uint32_t crc;
1185 size_t i;
1186
1187 crc = 0xffffffffU; /* initial value */
1188
1189 for (i = 0; i < len; i++) {
1190 crc ^= buf[i];
1191 crc = (crc >> 4) ^ crctab[crc & 0xf];
1192 crc = (crc >> 4) ^ crctab[crc & 0xf];
1193 }
1194
1195 return (crc);
1196 }
1197 #endif
1198
1199 uint32_t
1200 ether_crc32_be(const uint8_t *buf, size_t len)
1201 {
1202 uint32_t c, crc, carry;
1203 size_t i, j;
1204
1205 crc = 0xffffffffU; /* initial value */
1206
1207 for (i = 0; i < len; i++) {
1208 c = buf[i];
1209 for (j = 0; j < 8; j++) {
1210 carry = ((crc & 0x80000000U) ? 1 : 0) ^ (c & 0x01);
1211 crc <<= 1;
1212 c >>= 1;
1213 if (carry)
1214 crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
1215 }
1216 }
1217
1218 return (crc);
1219 }
1220
1221 #ifdef INET
1222 const uint8_t ether_ipmulticast_min[ETHER_ADDR_LEN] =
1223 { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 };
1224 const uint8_t ether_ipmulticast_max[ETHER_ADDR_LEN] =
1225 { 0x01, 0x00, 0x5e, 0x7f, 0xff, 0xff };
1226 #endif
1227 #ifdef INET6
1228 const uint8_t ether_ip6multicast_min[ETHER_ADDR_LEN] =
1229 { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 };
1230 const uint8_t ether_ip6multicast_max[ETHER_ADDR_LEN] =
1231 { 0x33, 0x33, 0xff, 0xff, 0xff, 0xff };
1232 #endif
1233
1234 /*
1235 * ether_aton implementation, not using a static buffer.
1236 */
1237 int
1238 ether_aton_r(u_char *dest, size_t len, const char *str)
1239 {
1240 const u_char *cp = (const void *)str;
1241 u_char *ep;
1242
1243 #define atox(c) (((c) <= '9') ? ((c) - '0') : ((toupper(c) - 'A') + 10))
1244
1245 if (len < ETHER_ADDR_LEN)
1246 return ENOSPC;
1247
1248 ep = dest + ETHER_ADDR_LEN;
1249
1250 while (*cp) {
1251 if (!isxdigit(*cp))
1252 return EINVAL;
1253
1254 *dest = atox(*cp);
1255 cp++;
1256 if (isxdigit(*cp)) {
1257 *dest = (*dest << 4) | atox(*cp);
1258 cp++;
1259 }
1260 dest++;
1261
1262 if (dest == ep)
1263 return (*cp == '\0') ? 0 : ENAMETOOLONG;
1264
1265 switch (*cp) {
1266 case ':':
1267 case '-':
1268 case '.':
1269 cp++;
1270 break;
1271 }
1272 }
1273 return ENOBUFS;
1274 }
1275
1276 /*
1277 * Convert a sockaddr into an Ethernet address or range of Ethernet
1278 * addresses.
1279 */
1280 int
1281 ether_multiaddr(const struct sockaddr *sa, uint8_t addrlo[ETHER_ADDR_LEN],
1282 uint8_t addrhi[ETHER_ADDR_LEN])
1283 {
1284 #ifdef INET
1285 const struct sockaddr_in *sin;
1286 #endif
1287 #ifdef INET6
1288 const struct sockaddr_in6 *sin6;
1289 #endif
1290
1291 switch (sa->sa_family) {
1292
1293 case AF_UNSPEC:
1294 memcpy(addrlo, sa->sa_data, ETHER_ADDR_LEN);
1295 memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1296 break;
1297
1298 #ifdef INET
1299 case AF_INET:
1300 sin = satocsin(sa);
1301 if (sin->sin_addr.s_addr == INADDR_ANY) {
1302 /*
1303 * An IP address of INADDR_ANY means listen to
1304 * or stop listening to all of the Ethernet
1305 * multicast addresses used for IP.
1306 * (This is for the sake of IP multicast routers.)
1307 */
1308 memcpy(addrlo, ether_ipmulticast_min, ETHER_ADDR_LEN);
1309 memcpy(addrhi, ether_ipmulticast_max, ETHER_ADDR_LEN);
1310 } else {
1311 ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo);
1312 memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1313 }
1314 break;
1315 #endif
1316 #ifdef INET6
1317 case AF_INET6:
1318 sin6 = satocsin6(sa);
1319 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1320 /*
1321 * An IP6 address of 0 means listen to or stop
1322 * listening to all of the Ethernet multicast
1323 * address used for IP6.
1324 * (This is used for multicast routers.)
1325 */
1326 memcpy(addrlo, ether_ip6multicast_min, ETHER_ADDR_LEN);
1327 memcpy(addrhi, ether_ip6multicast_max, ETHER_ADDR_LEN);
1328 } else {
1329 ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, addrlo);
1330 memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1331 }
1332 break;
1333 #endif
1334
1335 default:
1336 return EAFNOSUPPORT;
1337 }
1338 return 0;
1339 }
1340
1341 /*
1342 * Add an Ethernet multicast address or range of addresses to the list for a
1343 * given interface.
1344 */
1345 int
1346 ether_addmulti(const struct sockaddr *sa, struct ethercom *ec)
1347 {
1348 struct ether_multi *enm, *_enm;
1349 u_char addrlo[ETHER_ADDR_LEN];
1350 u_char addrhi[ETHER_ADDR_LEN];
1351 int error = 0;
1352
1353 /* Allocate out of lock */
1354 enm = kmem_alloc(sizeof(*enm), KM_SLEEP);
1355
1356 ETHER_LOCK(ec);
1357 error = ether_multiaddr(sa, addrlo, addrhi);
1358 if (error != 0)
1359 goto out;
1360
1361 /*
1362 * Verify that we have valid Ethernet multicast addresses.
1363 */
1364 if (!ETHER_IS_MULTICAST(addrlo) || !ETHER_IS_MULTICAST(addrhi)) {
1365 error = EINVAL;
1366 goto out;
1367 }
1368
1369 /*
1370 * See if the address range is already in the list.
1371 */
1372 _enm = ether_lookup_multi(addrlo, addrhi, ec);
1373 if (_enm != NULL) {
1374 /*
1375 * Found it; just increment the reference count.
1376 */
1377 ++_enm->enm_refcount;
1378 error = 0;
1379 goto out;
1380 }
1381
1382 /*
1383 * Link a new multicast record into the interface's multicast list.
1384 */
1385 memcpy(enm->enm_addrlo, addrlo, ETHER_ADDR_LEN);
1386 memcpy(enm->enm_addrhi, addrhi, ETHER_ADDR_LEN);
1387 enm->enm_refcount = 1;
1388 LIST_INSERT_HEAD(&ec->ec_multiaddrs, enm, enm_list);
1389 ec->ec_multicnt++;
1390
1391 /*
1392 * Return ENETRESET to inform the driver that the list has changed
1393 * and its reception filter should be adjusted accordingly.
1394 */
1395 error = ENETRESET;
1396 enm = NULL;
1397
1398 out:
1399 ETHER_UNLOCK(ec);
1400 if (enm != NULL)
1401 kmem_free(enm, sizeof(*enm));
1402 return error;
1403 }
1404
1405 /*
1406 * Delete a multicast address record.
1407 */
1408 int
1409 ether_delmulti(const struct sockaddr *sa, struct ethercom *ec)
1410 {
1411 struct ether_multi *enm;
1412 u_char addrlo[ETHER_ADDR_LEN];
1413 u_char addrhi[ETHER_ADDR_LEN];
1414 int error;
1415
1416 ETHER_LOCK(ec);
1417 error = ether_multiaddr(sa, addrlo, addrhi);
1418 if (error != 0)
1419 goto error;
1420
1421 /*
1422 * Look up the address in our list.
1423 */
1424 enm = ether_lookup_multi(addrlo, addrhi, ec);
1425 if (enm == NULL) {
1426 error = ENXIO;
1427 goto error;
1428 }
1429 if (--enm->enm_refcount != 0) {
1430 /*
1431 * Still some claims to this record.
1432 */
1433 error = 0;
1434 goto error;
1435 }
1436
1437 /*
1438 * No remaining claims to this record; unlink and free it.
1439 */
1440 LIST_REMOVE(enm, enm_list);
1441 ec->ec_multicnt--;
1442 ETHER_UNLOCK(ec);
1443 kmem_free(enm, sizeof(*enm));
1444
1445 /*
1446 * Return ENETRESET to inform the driver that the list has changed
1447 * and its reception filter should be adjusted accordingly.
1448 */
1449 return ENETRESET;
1450
1451 error:
1452 ETHER_UNLOCK(ec);
1453 return error;
1454 }
1455
1456 void
1457 ether_set_ifflags_cb(struct ethercom *ec, ether_cb_t cb)
1458 {
1459 ec->ec_ifflags_cb = cb;
1460 }
1461
1462 void
1463 ether_set_vlan_cb(struct ethercom *ec, ether_vlancb_t cb)
1464 {
1465
1466 ec->ec_vlan_cb = cb;
1467 }
1468
1469 static int
1470 ether_ioctl_reinit(struct ethercom *ec)
1471 {
1472 struct ifnet *ifp = &ec->ec_if;
1473 int error;
1474
1475 KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
1476
1477 switch (ifp->if_flags & (IFF_UP | IFF_RUNNING)) {
1478 case IFF_RUNNING:
1479 /*
1480 * If interface is marked down and it is running,
1481 * then stop and disable it.
1482 */
1483 if_stop(ifp, 1);
1484 break;
1485 case IFF_UP:
1486 /*
1487 * If interface is marked up and it is stopped, then
1488 * start it.
1489 */
1490 return if_init(ifp);
1491 case IFF_UP | IFF_RUNNING:
1492 error = 0;
1493 if (ec->ec_ifflags_cb != NULL) {
1494 error = (*ec->ec_ifflags_cb)(ec);
1495 if (error == ENETRESET) {
1496 /*
1497 * Reset the interface to pick up
1498 * changes in any other flags that
1499 * affect the hardware state.
1500 */
1501 return if_init(ifp);
1502 }
1503 } else
1504 error = if_init(ifp);
1505 return error;
1506 case 0:
1507 break;
1508 }
1509
1510 return 0;
1511 }
1512
1513 /*
1514 * Common ioctls for Ethernet interfaces. Note, we must be
1515 * called at splnet().
1516 */
1517 int
1518 ether_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1519 {
1520 struct ethercom *ec = (void *)ifp;
1521 struct eccapreq *eccr;
1522 struct ifreq *ifr = (struct ifreq *)data;
1523 struct if_laddrreq *iflr = data;
1524 const struct sockaddr_dl *sdl;
1525 static const uint8_t zero[ETHER_ADDR_LEN];
1526 int error;
1527
1528 switch (cmd) {
1529 case SIOCINITIFADDR:
1530 {
1531 struct ifaddr *ifa = (struct ifaddr *)data;
1532 if (ifa->ifa_addr->sa_family != AF_LINK
1533 && (ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
1534 (IFF_UP | IFF_RUNNING)) {
1535 ifp->if_flags |= IFF_UP;
1536 if ((error = if_init(ifp)) != 0)
1537 return error;
1538 }
1539 #ifdef INET
1540 if (ifa->ifa_addr->sa_family == AF_INET)
1541 arp_ifinit(ifp, ifa);
1542 #endif
1543 return 0;
1544 }
1545
1546 case SIOCSIFMTU:
1547 {
1548 int maxmtu;
1549
1550 if (ec->ec_capabilities & ETHERCAP_JUMBO_MTU)
1551 maxmtu = ETHERMTU_JUMBO;
1552 else
1553 maxmtu = ETHERMTU;
1554
1555 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > maxmtu)
1556 return EINVAL;
1557 else if ((error = ifioctl_common(ifp, cmd, data)) != ENETRESET)
1558 return error;
1559 else if (ifp->if_flags & IFF_UP) {
1560 /* Make sure the device notices the MTU change. */
1561 return if_init(ifp);
1562 } else
1563 return 0;
1564 }
1565
1566 case SIOCSIFFLAGS:
1567 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
1568 return error;
1569 return ether_ioctl_reinit(ec);
1570 case SIOCGIFFLAGS:
1571 error = ifioctl_common(ifp, cmd, data);
1572 if (error == 0) {
1573 /* Set IFF_ALLMULTI for backcompat */
1574 ifr->ifr_flags |= (ec->ec_flags & ETHER_F_ALLMULTI) ?
1575 IFF_ALLMULTI : 0;
1576 }
1577 return error;
1578 case SIOCGETHERCAP:
1579 eccr = (struct eccapreq *)data;
1580 eccr->eccr_capabilities = ec->ec_capabilities;
1581 eccr->eccr_capenable = ec->ec_capenable;
1582 return 0;
1583 case SIOCSETHERCAP:
1584 eccr = (struct eccapreq *)data;
1585 if ((eccr->eccr_capenable & ~ec->ec_capabilities) != 0)
1586 return EINVAL;
1587 if (eccr->eccr_capenable == ec->ec_capenable)
1588 return 0;
1589 #if 0 /* notyet */
1590 ec->ec_capenable = (ec->ec_capenable & ETHERCAP_CANTCHANGE)
1591 | (eccr->eccr_capenable & ~ETHERCAP_CANTCHANGE);
1592 #else
1593 ec->ec_capenable = eccr->eccr_capenable;
1594 #endif
1595 return ether_ioctl_reinit(ec);
1596 case SIOCADDMULTI:
1597 return ether_addmulti(ifreq_getaddr(cmd, ifr), ec);
1598 case SIOCDELMULTI:
1599 return ether_delmulti(ifreq_getaddr(cmd, ifr), ec);
1600 case SIOCSIFMEDIA:
1601 case SIOCGIFMEDIA:
1602 if (ec->ec_mii != NULL)
1603 return ifmedia_ioctl(ifp, ifr, &ec->ec_mii->mii_media,
1604 cmd);
1605 else if (ec->ec_ifmedia != NULL)
1606 return ifmedia_ioctl(ifp, ifr, ec->ec_ifmedia, cmd);
1607 else
1608 return ENOTTY;
1609 break;
1610 case SIOCALIFADDR:
1611 sdl = satocsdl(sstocsa(&iflr->addr));
1612 if (sdl->sdl_family != AF_LINK)
1613 ;
1614 else if (ETHER_IS_MULTICAST(CLLADDR(sdl)))
1615 return EINVAL;
1616 else if (memcmp(zero, CLLADDR(sdl), sizeof(zero)) == 0)
1617 return EINVAL;
1618 /*FALLTHROUGH*/
1619 default:
1620 return ifioctl_common(ifp, cmd, data);
1621 }
1622 return 0;
1623 }
1624
1625 /*
1626 * Enable/disable passing VLAN packets if the parent interface supports it.
1627 * Return:
1628 * 0: Ok
1629 * -1: Parent interface does not support vlans
1630 * >0: Error
1631 */
1632 int
1633 ether_enable_vlan_mtu(struct ifnet *ifp)
1634 {
1635 int error;
1636 struct ethercom *ec = (void *)ifp;
1637
1638 /* Parent does not support VLAN's */
1639 if ((ec->ec_capabilities & ETHERCAP_VLAN_MTU) == 0)
1640 return -1;
1641
1642 /*
1643 * Parent supports the VLAN_MTU capability,
1644 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames;
1645 * enable it.
1646 */
1647 ec->ec_capenable |= ETHERCAP_VLAN_MTU;
1648
1649 /* Interface is down, defer for later */
1650 if ((ifp->if_flags & IFF_UP) == 0)
1651 return 0;
1652
1653 if ((error = if_flags_set(ifp, ifp->if_flags)) == 0)
1654 return 0;
1655
1656 ec->ec_capenable &= ~ETHERCAP_VLAN_MTU;
1657 return error;
1658 }
1659
1660 int
1661 ether_disable_vlan_mtu(struct ifnet *ifp)
1662 {
1663 int error;
1664 struct ethercom *ec = (void *)ifp;
1665
1666 /* We still have VLAN's, defer for later */
1667 if (ec->ec_nvlans != 0)
1668 return 0;
1669
1670 /* Parent does not support VLAB's, nothing to do. */
1671 if ((ec->ec_capenable & ETHERCAP_VLAN_MTU) == 0)
1672 return -1;
1673
1674 /*
1675 * Disable Tx/Rx of VLAN-sized frames.
1676 */
1677 ec->ec_capenable &= ~ETHERCAP_VLAN_MTU;
1678
1679 /* Interface is down, defer for later */
1680 if ((ifp->if_flags & IFF_UP) == 0)
1681 return 0;
1682
1683 if ((error = if_flags_set(ifp, ifp->if_flags)) == 0)
1684 return 0;
1685
1686 ec->ec_capenable |= ETHERCAP_VLAN_MTU;
1687 return error;
1688 }
1689
1690 /*
1691 * Add and delete VLAN TAG
1692 */
1693 int
1694 ether_add_vlantag(struct ifnet *ifp, uint16_t vtag, bool *vlanmtu_status)
1695 {
1696 struct ethercom *ec = (void *)ifp;
1697 struct vlanid_list *vidp;
1698 bool vlanmtu_enabled;
1699 uint16_t vid = EVL_VLANOFTAG(vtag);
1700 int error;
1701
1702 vlanmtu_enabled = false;
1703
1704 /* Add a vid to the list */
1705 vidp = kmem_alloc(sizeof(*vidp), KM_SLEEP);
1706 vidp->vid = vid;
1707
1708 ETHER_LOCK(ec);
1709 ec->ec_nvlans++;
1710 SIMPLEQ_INSERT_TAIL(&ec->ec_vids, vidp, vid_list);
1711 ETHER_UNLOCK(ec);
1712
1713 if (ec->ec_nvlans == 1) {
1714 IFNET_LOCK(ifp);
1715 error = ether_enable_vlan_mtu(ifp);
1716 IFNET_UNLOCK(ifp);
1717
1718 if (error == 0) {
1719 vlanmtu_enabled = true;
1720 } else if (error != -1) {
1721 goto fail;
1722 }
1723 }
1724
1725 if (ec->ec_vlan_cb != NULL) {
1726 error = (*ec->ec_vlan_cb)(ec, vid, true);
1727 if (error != 0)
1728 goto fail;
1729 }
1730
1731 if (vlanmtu_status != NULL)
1732 *vlanmtu_status = vlanmtu_enabled;
1733
1734 return 0;
1735 fail:
1736 ETHER_LOCK(ec);
1737 ec->ec_nvlans--;
1738 SIMPLEQ_REMOVE(&ec->ec_vids, vidp, vlanid_list, vid_list);
1739 ETHER_UNLOCK(ec);
1740
1741 if (vlanmtu_enabled) {
1742 IFNET_LOCK(ifp);
1743 (void)ether_disable_vlan_mtu(ifp);
1744 IFNET_UNLOCK(ifp);
1745 }
1746
1747 kmem_free(vidp, sizeof(*vidp));
1748
1749 return error;
1750 }
1751
1752 int
1753 ether_del_vlantag(struct ifnet *ifp, uint16_t vtag)
1754 {
1755 struct ethercom *ec = (void *)ifp;
1756 struct vlanid_list *vidp;
1757 uint16_t vid = EVL_VLANOFTAG(vtag);
1758
1759 ETHER_LOCK(ec);
1760 SIMPLEQ_FOREACH(vidp, &ec->ec_vids, vid_list) {
1761 if (vidp->vid == vid) {
1762 SIMPLEQ_REMOVE(&ec->ec_vids, vidp,
1763 vlanid_list, vid_list);
1764 ec->ec_nvlans--;
1765 break;
1766 }
1767 }
1768 ETHER_UNLOCK(ec);
1769
1770 if (vidp == NULL)
1771 return ENOENT;
1772
1773 if (ec->ec_vlan_cb != NULL) {
1774 (void)(*ec->ec_vlan_cb)(ec, vidp->vid, false);
1775 }
1776
1777 if (ec->ec_nvlans == 0) {
1778 IFNET_LOCK(ifp);
1779 (void)ether_disable_vlan_mtu(ifp);
1780 IFNET_UNLOCK(ifp);
1781 }
1782
1783 kmem_free(vidp, sizeof(*vidp));
1784
1785 return 0;
1786 }
1787
1788 int
1789 ether_inject_vlantag(struct mbuf **mp, uint16_t etype, uint16_t tag)
1790 {
1791 static const size_t min_data_len =
1792 ETHER_MIN_LEN - ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1793 /* Used to pad ethernet frames with < ETHER_MIN_LEN bytes */
1794 static const char vlan_zero_pad_buff[ETHER_MIN_LEN] = { 0 };
1795
1796 struct ether_vlan_header *evl;
1797 struct mbuf *m = *mp;
1798 int error;
1799
1800 error = 0;
1801
1802 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1803 if (m == NULL) {
1804 error = ENOBUFS;
1805 goto out;
1806 }
1807
1808 if (m->m_len < sizeof(*evl)) {
1809 m = m_pullup(m, sizeof(*evl));
1810 if (m == NULL) {
1811 error = ENOBUFS;
1812 goto out;
1813 }
1814 }
1815
1816 /*
1817 * Transform the Ethernet header into an
1818 * Ethernet header with 802.1Q encapsulation.
1819 */
1820 memmove(mtod(m, void *),
1821 mtod(m, char *) + ETHER_VLAN_ENCAP_LEN,
1822 sizeof(struct ether_header));
1823 evl = mtod(m, struct ether_vlan_header *);
1824 evl->evl_proto = evl->evl_encap_proto;
1825 evl->evl_encap_proto = htons(etype);
1826 evl->evl_tag = htons(tag);
1827
1828 /*
1829 * To cater for VLAN-aware layer 2 ethernet
1830 * switches which may need to strip the tag
1831 * before forwarding the packet, make sure
1832 * the packet+tag is at least 68 bytes long.
1833 * This is necessary because our parent will
1834 * only pad to 64 bytes (ETHER_MIN_LEN) and
1835 * some switches will not pad by themselves
1836 * after deleting a tag.
1837 */
1838 if (m->m_pkthdr.len < min_data_len) {
1839 m_copyback(m, m->m_pkthdr.len,
1840 min_data_len - m->m_pkthdr.len,
1841 vlan_zero_pad_buff);
1842 }
1843
1844 m->m_flags &= ~M_VLANTAG;
1845
1846 out:
1847 *mp = m;
1848 return error;
1849 }
1850
1851 struct mbuf *
1852 ether_strip_vlantag(struct mbuf *m)
1853 {
1854 struct ether_vlan_header *evl;
1855
1856 if (m->m_len < sizeof(*evl) &&
1857 (m = m_pullup(m, sizeof(*evl))) == NULL) {
1858 return NULL;
1859 }
1860
1861 if (m_makewritable(&m, 0, sizeof(*evl), M_DONTWAIT)) {
1862 m_freem(m);
1863 return NULL;
1864 }
1865
1866 evl = mtod(m, struct ether_vlan_header *);
1867 KASSERT(ntohs(evl->evl_encap_proto) == ETHERTYPE_VLAN);
1868
1869 vlan_set_tag(m, ntohs(evl->evl_tag));
1870
1871 /*
1872 * Restore the original ethertype. We'll remove
1873 * the encapsulation after we've found the vlan
1874 * interface corresponding to the tag.
1875 */
1876 evl->evl_encap_proto = evl->evl_proto;
1877
1878 /*
1879 * Remove the encapsulation header and append tag.
1880 * The original header has already been fixed up above.
1881 */
1882 vlan_set_tag(m, ntohs(evl->evl_tag));
1883 memmove((char *)evl + ETHER_VLAN_ENCAP_LEN, evl,
1884 offsetof(struct ether_vlan_header, evl_encap_proto));
1885 m_adj(m, ETHER_VLAN_ENCAP_LEN);
1886
1887 return m;
1888 }
1889
1890 static int
1891 ether_multicast_sysctl(SYSCTLFN_ARGS)
1892 {
1893 struct ether_multi *enm;
1894 struct ifnet *ifp;
1895 struct ethercom *ec;
1896 int error = 0;
1897 size_t written;
1898 struct psref psref;
1899 int bound;
1900 unsigned int multicnt;
1901 struct ether_multi_sysctl *addrs;
1902 int i;
1903
1904 if (namelen != 1)
1905 return EINVAL;
1906
1907 bound = curlwp_bind();
1908 ifp = if_get_byindex(name[0], &psref);
1909 if (ifp == NULL) {
1910 error = ENODEV;
1911 goto out;
1912 }
1913 if (ifp->if_type != IFT_ETHER) {
1914 if_put(ifp, &psref);
1915 *oldlenp = 0;
1916 goto out;
1917 }
1918 ec = (struct ethercom *)ifp;
1919
1920 if (oldp == NULL) {
1921 if_put(ifp, &psref);
1922 *oldlenp = ec->ec_multicnt * sizeof(*addrs);
1923 goto out;
1924 }
1925
1926 /*
1927 * ec->ec_lock is a spin mutex so we cannot call sysctl_copyout, which
1928 * is sleepable, while holding it. Copy data to a local buffer first
1929 * with the lock taken and then call sysctl_copyout without holding it.
1930 */
1931 retry:
1932 multicnt = ec->ec_multicnt;
1933
1934 if (multicnt == 0) {
1935 if_put(ifp, &psref);
1936 *oldlenp = 0;
1937 goto out;
1938 }
1939
1940 addrs = kmem_zalloc(sizeof(*addrs) * multicnt, KM_SLEEP);
1941
1942 ETHER_LOCK(ec);
1943 if (multicnt != ec->ec_multicnt) {
1944 /* The number of multicast addresses has changed */
1945 ETHER_UNLOCK(ec);
1946 kmem_free(addrs, sizeof(*addrs) * multicnt);
1947 goto retry;
1948 }
1949
1950 i = 0;
1951 LIST_FOREACH(enm, &ec->ec_multiaddrs, enm_list) {
1952 struct ether_multi_sysctl *addr = &addrs[i];
1953 addr->enm_refcount = enm->enm_refcount;
1954 memcpy(addr->enm_addrlo, enm->enm_addrlo, ETHER_ADDR_LEN);
1955 memcpy(addr->enm_addrhi, enm->enm_addrhi, ETHER_ADDR_LEN);
1956 i++;
1957 }
1958 ETHER_UNLOCK(ec);
1959
1960 error = 0;
1961 written = 0;
1962 for (i = 0; i < multicnt; i++) {
1963 struct ether_multi_sysctl *addr = &addrs[i];
1964
1965 if (written + sizeof(*addr) > *oldlenp)
1966 break;
1967 error = sysctl_copyout(l, addr, oldp, sizeof(*addr));
1968 if (error)
1969 break;
1970 written += sizeof(*addr);
1971 oldp = (char *)oldp + sizeof(*addr);
1972 }
1973 kmem_free(addrs, sizeof(*addrs) * multicnt);
1974
1975 if_put(ifp, &psref);
1976
1977 *oldlenp = written;
1978 out:
1979 curlwp_bindx(bound);
1980 return error;
1981 }
1982
1983 static void
1984 ether_sysctl_setup(struct sysctllog **clog)
1985 {
1986 const struct sysctlnode *rnode = NULL;
1987
1988 sysctl_createv(clog, 0, NULL, &rnode,
1989 CTLFLAG_PERMANENT,
1990 CTLTYPE_NODE, "ether",
1991 SYSCTL_DESCR("Ethernet-specific information"),
1992 NULL, 0, NULL, 0,
1993 CTL_NET, CTL_CREATE, CTL_EOL);
1994
1995 sysctl_createv(clog, 0, &rnode, NULL,
1996 CTLFLAG_PERMANENT,
1997 CTLTYPE_NODE, "multicast",
1998 SYSCTL_DESCR("multicast addresses"),
1999 ether_multicast_sysctl, 0, NULL, 0,
2000 CTL_CREATE, CTL_EOL);
2001
2002 sysctl_createv(clog, 0, &rnode, NULL,
2003 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
2004 CTLTYPE_STRING, "rps_hash",
2005 SYSCTL_DESCR("Interface rps hash function control"),
2006 sysctl_pktq_rps_hash_handler, 0, (void *)ðer_pktq_rps_hash_p,
2007 PKTQ_RPS_HASH_NAME_LEN,
2008 CTL_CREATE, CTL_EOL);
2009 }
2010
2011 void
2012 etherinit(void)
2013 {
2014
2015 #ifdef DIAGNOSTIC
2016 mutex_init(&bigpktpps_lock, MUTEX_DEFAULT, IPL_NET);
2017 #endif
2018 ether_pktq_rps_hash_p = pktq_rps_hash_default;
2019 ether_sysctl_setup(NULL);
2020 }
2021