ip_carp.c revision 1.60 1 /* $NetBSD: ip_carp.c,v 1.60 2015/02/26 09:54:46 roy Exp $ */
2 /* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */
3
4 /*
5 * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
6 * Copyright (c) 2003 Ryan McBride. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include "opt_inet.h"
31 #include "opt_mbuftrace.h"
32
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.60 2015/02/26 09:54:46 roy Exp $");
35
36 /*
37 * TODO:
38 * - iface reconfigure
39 * - support for hardware checksum calculations;
40 *
41 */
42
43 #include <sys/param.h>
44 #include <sys/proc.h>
45 #include <sys/mbuf.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/callout.h>
49 #include <sys/ioctl.h>
50 #include <sys/errno.h>
51 #include <sys/device.h>
52 #include <sys/time.h>
53 #include <sys/kernel.h>
54 #include <sys/kauth.h>
55 #include <sys/sysctl.h>
56 #include <sys/ucred.h>
57 #include <sys/syslog.h>
58 #include <sys/acct.h>
59 #include <sys/cprng.h>
60
61 #include <sys/cpu.h>
62
63 #include <net/if.h>
64 #include <net/pfil.h>
65 #include <net/if_types.h>
66 #include <net/if_ether.h>
67 #include <net/route.h>
68 #include <net/netisr.h>
69 #include <net/net_stats.h>
70 #include <netinet/if_inarp.h>
71
72 #if NFDDI > 0
73 #include <net/if_fddi.h>
74 #endif
75 #if NTOKEN > 0
76 #include <net/if_token.h>
77 #endif
78
79 #ifdef INET
80 #include <netinet/in.h>
81 #include <netinet/in_systm.h>
82 #include <netinet/in_var.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip_var.h>
85
86 #include <net/if_dl.h>
87 #endif
88
89 #ifdef INET6
90 #include <netinet/icmp6.h>
91 #include <netinet/ip6.h>
92 #include <netinet6/ip6_var.h>
93 #include <netinet6/nd6.h>
94 #include <netinet6/scope6_var.h>
95 #endif
96
97 #include <net/bpf.h>
98
99 #include <sys/sha1.h>
100
101 #include <netinet/ip_carp.h>
102
103 struct carp_mc_entry {
104 LIST_ENTRY(carp_mc_entry) mc_entries;
105 union {
106 struct ether_multi *mcu_enm;
107 } mc_u;
108 struct sockaddr_storage mc_addr;
109 };
110 #define mc_enm mc_u.mcu_enm
111
112 struct carp_softc {
113 struct ethercom sc_ac;
114 #define sc_if sc_ac.ec_if
115 #define sc_carpdev sc_ac.ec_if.if_carpdev
116 int ah_cookie;
117 int lh_cookie;
118 struct ip_moptions sc_imo;
119 #ifdef INET6
120 struct ip6_moptions sc_im6o;
121 #endif /* INET6 */
122 TAILQ_ENTRY(carp_softc) sc_list;
123
124 enum { INIT = 0, BACKUP, MASTER } sc_state;
125
126 int sc_suppress;
127 int sc_bow_out;
128
129 int sc_sendad_errors;
130 #define CARP_SENDAD_MAX_ERRORS 3
131 int sc_sendad_success;
132 #define CARP_SENDAD_MIN_SUCCESS 3
133
134 int sc_vhid;
135 int sc_advskew;
136 int sc_naddrs;
137 int sc_naddrs6;
138 int sc_advbase; /* seconds */
139 int sc_init_counter;
140 u_int64_t sc_counter;
141
142 /* authentication */
143 #define CARP_HMAC_PAD 64
144 unsigned char sc_key[CARP_KEY_LEN];
145 unsigned char sc_pad[CARP_HMAC_PAD];
146 SHA1_CTX sc_sha1;
147 u_int32_t sc_hashkey[2];
148
149 struct callout sc_ad_tmo; /* advertisement timeout */
150 struct callout sc_md_tmo; /* master down timeout */
151 struct callout sc_md6_tmo; /* master down timeout */
152
153 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead;
154 };
155
156 int carp_suppress_preempt = 0;
157 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 }; /* XXX for now */
158
159 static percpu_t *carpstat_percpu;
160
161 #define CARP_STATINC(x) _NET_STATINC(carpstat_percpu, x)
162
163 #ifdef MBUFTRACE
164 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx");
165 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx");
166 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx");
167 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx");
168 #endif
169
170 struct carp_if {
171 TAILQ_HEAD(, carp_softc) vhif_vrs;
172 int vhif_nvrs;
173
174 struct ifnet *vhif_ifp;
175 };
176
177 #define CARP_LOG(sc, s) \
178 if (carp_opts[CARPCTL_LOG]) { \
179 if (sc) \
180 log(LOG_INFO, "%s: ", \
181 (sc)->sc_if.if_xname); \
182 else \
183 log(LOG_INFO, "carp: "); \
184 addlog s; \
185 addlog("\n"); \
186 }
187
188 static void carp_hmac_prepare(struct carp_softc *);
189 static void carp_hmac_generate(struct carp_softc *, u_int32_t *,
190 unsigned char *);
191 static int carp_hmac_verify(struct carp_softc *, u_int32_t *,
192 unsigned char *);
193 static void carp_setroute(struct carp_softc *, int);
194 static void carp_proto_input_c(struct mbuf *, struct carp_header *,
195 sa_family_t);
196 void carpattach(int);
197 static void carpdetach(struct carp_softc *);
198 static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
199 struct carp_header *);
200 static void carp_send_ad_all(void);
201 static void carp_send_ad(void *);
202 static void carp_send_arp(struct carp_softc *);
203 static void carp_master_down(void *);
204 static int carp_ioctl(struct ifnet *, u_long, void *);
205 static void carp_start(struct ifnet *);
206 static void carp_setrun(struct carp_softc *, sa_family_t);
207 static void carp_set_state(struct carp_softc *, int);
208 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
209 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
210
211 static void carp_multicast_cleanup(struct carp_softc *);
212 static int carp_set_ifp(struct carp_softc *, struct ifnet *);
213 static void carp_set_enaddr(struct carp_softc *);
214 #if 0
215 static void carp_addr_updated(void *);
216 #endif
217 static u_int32_t carp_hash(struct carp_softc *, u_char *);
218 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
219 static int carp_join_multicast(struct carp_softc *);
220 #ifdef INET6
221 static void carp_send_na(struct carp_softc *);
222 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
223 static int carp_join_multicast6(struct carp_softc *);
224 #endif
225 static int carp_clone_create(struct if_clone *, int);
226 static int carp_clone_destroy(struct ifnet *);
227 static int carp_ether_addmulti(struct carp_softc *, struct ifreq *);
228 static int carp_ether_delmulti(struct carp_softc *, struct ifreq *);
229 static void carp_ether_purgemulti(struct carp_softc *);
230
231 static void sysctl_net_inet_carp_setup(struct sysctllog **);
232
233 struct if_clone carp_cloner =
234 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
235
236 static __inline u_int16_t
237 carp_cksum(struct mbuf *m, int len)
238 {
239 return (in_cksum(m, len));
240 }
241
242 static void
243 carp_hmac_prepare(struct carp_softc *sc)
244 {
245 u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT;
246 u_int8_t vhid = sc->sc_vhid & 0xff;
247 SHA1_CTX sha1ctx;
248 u_int32_t kmd[5];
249 struct ifaddr *ifa;
250 int i, found;
251 struct in_addr last, cur, in;
252 #ifdef INET6
253 struct in6_addr last6, cur6, in6;
254 #endif /* INET6 */
255
256 /* compute ipad from key */
257 memset(sc->sc_pad, 0, sizeof(sc->sc_pad));
258 memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key));
259 for (i = 0; i < sizeof(sc->sc_pad); i++)
260 sc->sc_pad[i] ^= 0x36;
261
262 /* precompute first part of inner hash */
263 SHA1Init(&sc->sc_sha1);
264 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
265 SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version));
266 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
267
268 /* generate a key for the arpbalance hash, before the vhid is hashed */
269 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
270 SHA1Final((unsigned char *)kmd, &sha1ctx);
271 sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
272 sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
273
274 /* the rest of the precomputation */
275 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
276
277 /* Hash the addresses from smallest to largest, not interface order */
278 #ifdef INET
279 cur.s_addr = 0;
280 do {
281 found = 0;
282 last = cur;
283 cur.s_addr = 0xffffffff;
284 IFADDR_FOREACH(ifa, &sc->sc_if) {
285 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
286 if (ifa->ifa_addr->sa_family == AF_INET &&
287 ntohl(in.s_addr) > ntohl(last.s_addr) &&
288 ntohl(in.s_addr) < ntohl(cur.s_addr)) {
289 cur.s_addr = in.s_addr;
290 found++;
291 }
292 }
293 if (found)
294 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
295 } while (found);
296 #endif /* INET */
297
298 #ifdef INET6
299 memset(&cur6, 0x00, sizeof(cur6));
300 do {
301 found = 0;
302 last6 = cur6;
303 memset(&cur6, 0xff, sizeof(cur6));
304 IFADDR_FOREACH(ifa, &sc->sc_if) {
305 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
306 if (IN6_IS_ADDR_LINKLOCAL(&in6))
307 in6.s6_addr16[1] = 0;
308 if (ifa->ifa_addr->sa_family == AF_INET6 &&
309 memcmp(&in6, &last6, sizeof(in6)) > 0 &&
310 memcmp(&in6, &cur6, sizeof(in6)) < 0) {
311 cur6 = in6;
312 found++;
313 }
314 }
315 if (found)
316 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
317 } while (found);
318 #endif /* INET6 */
319
320 /* convert ipad to opad */
321 for (i = 0; i < sizeof(sc->sc_pad); i++)
322 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
323 }
324
325 static void
326 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
327 unsigned char md[20])
328 {
329 SHA1_CTX sha1ctx;
330
331 /* fetch first half of inner hash */
332 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
333
334 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
335 SHA1Final(md, &sha1ctx);
336
337 /* outer hash */
338 SHA1Init(&sha1ctx);
339 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
340 SHA1Update(&sha1ctx, md, 20);
341 SHA1Final(md, &sha1ctx);
342 }
343
344 static int
345 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
346 unsigned char md[20])
347 {
348 unsigned char md2[20];
349
350 carp_hmac_generate(sc, counter, md2);
351
352 return (memcmp(md, md2, sizeof(md2)));
353 }
354
355 static void
356 carp_setroute(struct carp_softc *sc, int cmd)
357 {
358 struct ifaddr *ifa;
359 int s;
360
361 KERNEL_LOCK(1, NULL);
362 s = splsoftnet();
363 IFADDR_FOREACH(ifa, &sc->sc_if) {
364 switch (ifa->ifa_addr->sa_family) {
365 case AF_INET: {
366 int count = 0;
367 struct rtentry *rt;
368 int hr_otherif, nr_ourif;
369
370 /*
371 * Avoid screwing with the routes if there are other
372 * carp interfaces which are master and have the same
373 * address.
374 */
375 if (sc->sc_carpdev != NULL &&
376 sc->sc_carpdev->if_carp != NULL) {
377 count = carp_addrcount(
378 (struct carp_if *)sc->sc_carpdev->if_carp,
379 ifatoia(ifa), CARP_COUNT_MASTER);
380 if ((cmd == RTM_ADD && count != 1) ||
381 (cmd == RTM_DELETE && count != 0))
382 continue;
383 }
384
385 /* Remove the existing host route, if any */
386 rtrequest(RTM_DELETE, ifa->ifa_addr,
387 ifa->ifa_addr, ifa->ifa_netmask,
388 RTF_HOST, NULL);
389
390 rt = NULL;
391 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
392 ifa->ifa_netmask, RTF_HOST, &rt);
393 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
394 rt->rt_flags & (RTF_CLONING|RTF_CLONED));
395 if (rt != NULL) {
396 rtfree(rt);
397 rt = NULL;
398 }
399
400 /* Check for a network route on our interface */
401
402 rt = NULL;
403 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
404 ifa->ifa_netmask, 0, &rt);
405 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
406
407 switch (cmd) {
408 case RTM_ADD:
409 if (hr_otherif) {
410 ifa->ifa_rtrequest = NULL;
411 ifa->ifa_flags &= ~RTF_CLONING;
412
413 rtrequest(RTM_ADD, ifa->ifa_addr,
414 ifa->ifa_addr, ifa->ifa_netmask,
415 RTF_UP | RTF_HOST, NULL);
416 }
417 if (!hr_otherif || nr_ourif || !rt) {
418 if (nr_ourif && !(rt->rt_flags &
419 RTF_CLONING))
420 rtrequest(RTM_DELETE,
421 ifa->ifa_addr,
422 ifa->ifa_addr,
423 ifa->ifa_netmask, 0, NULL);
424
425 ifa->ifa_rtrequest = arp_rtrequest;
426 ifa->ifa_flags |= RTF_CLONING;
427
428 if (rtrequest(RTM_ADD, ifa->ifa_addr,
429 ifa->ifa_addr, ifa->ifa_netmask, 0,
430 NULL) == 0)
431 ifa->ifa_flags |= IFA_ROUTE;
432 }
433 break;
434 case RTM_DELETE:
435 break;
436 default:
437 break;
438 }
439 if (rt != NULL) {
440 rtfree(rt);
441 rt = NULL;
442 }
443 break;
444 }
445
446 #ifdef INET6
447 case AF_INET6:
448 if (cmd == RTM_ADD)
449 in6_ifaddlocal(ifa);
450 else
451 in6_ifremlocal(ifa);
452 break;
453 #endif /* INET6 */
454 default:
455 break;
456 }
457 }
458 splx(s);
459 KERNEL_UNLOCK_ONE(NULL);
460 }
461
462 /*
463 * process input packet.
464 * we have rearranged checks order compared to the rfc,
465 * but it seems more efficient this way or not possible otherwise.
466 */
467 void
468 carp_proto_input(struct mbuf *m, ...)
469 {
470 struct ip *ip = mtod(m, struct ip *);
471 struct carp_softc *sc = NULL;
472 struct carp_header *ch;
473 int iplen, len;
474 va_list ap;
475
476 va_start(ap, m);
477 va_end(ap);
478
479 CARP_STATINC(CARP_STAT_IPACKETS);
480 MCLAIM(m, &carp_proto_mowner_rx);
481
482 if (!carp_opts[CARPCTL_ALLOW]) {
483 m_freem(m);
484 return;
485 }
486
487 /* check if received on a valid carp interface */
488 if (m->m_pkthdr.rcvif->if_type != IFT_CARP) {
489 CARP_STATINC(CARP_STAT_BADIF);
490 CARP_LOG(sc, ("packet received on non-carp interface: %s",
491 m->m_pkthdr.rcvif->if_xname));
492 m_freem(m);
493 return;
494 }
495
496 /* verify that the IP TTL is 255. */
497 if (ip->ip_ttl != CARP_DFLTTL) {
498 CARP_STATINC(CARP_STAT_BADTTL);
499 CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl,
500 CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
501 m_freem(m);
502 return;
503 }
504
505 /*
506 * verify that the received packet length is
507 * equal to the CARP header
508 */
509 iplen = ip->ip_hl << 2;
510 len = iplen + sizeof(*ch);
511 if (len > m->m_pkthdr.len) {
512 CARP_STATINC(CARP_STAT_BADLEN);
513 CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
514 m->m_pkthdr.rcvif->if_xname));
515 m_freem(m);
516 return;
517 }
518
519 if ((m = m_pullup(m, len)) == NULL) {
520 CARP_STATINC(CARP_STAT_HDROPS);
521 return;
522 }
523 ip = mtod(m, struct ip *);
524 ch = (struct carp_header *)((char *)ip + iplen);
525 /* verify the CARP checksum */
526 m->m_data += iplen;
527 if (carp_cksum(m, len - iplen)) {
528 CARP_STATINC(CARP_STAT_BADSUM);
529 CARP_LOG(sc, ("checksum failed on %s",
530 m->m_pkthdr.rcvif->if_xname));
531 m_freem(m);
532 return;
533 }
534 m->m_data -= iplen;
535
536 carp_proto_input_c(m, ch, AF_INET);
537 }
538
539 #ifdef INET6
540 int
541 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
542 {
543 struct mbuf *m = *mp;
544 struct carp_softc *sc = NULL;
545 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
546 struct carp_header *ch;
547 u_int len;
548
549 CARP_STATINC(CARP_STAT_IPACKETS6);
550 MCLAIM(m, &carp_proto6_mowner_rx);
551
552 if (!carp_opts[CARPCTL_ALLOW]) {
553 m_freem(m);
554 return (IPPROTO_DONE);
555 }
556
557 /* check if received on a valid carp interface */
558 if (m->m_pkthdr.rcvif->if_type != IFT_CARP) {
559 CARP_STATINC(CARP_STAT_BADIF);
560 CARP_LOG(sc, ("packet received on non-carp interface: %s",
561 m->m_pkthdr.rcvif->if_xname));
562 m_freem(m);
563 return (IPPROTO_DONE);
564 }
565
566 /* verify that the IP TTL is 255 */
567 if (ip6->ip6_hlim != CARP_DFLTTL) {
568 CARP_STATINC(CARP_STAT_BADTTL);
569 CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
570 CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
571 m_freem(m);
572 return (IPPROTO_DONE);
573 }
574
575 /* verify that we have a complete carp packet */
576 len = m->m_len;
577 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
578 if (ch == NULL) {
579 CARP_STATINC(CARP_STAT_BADLEN);
580 CARP_LOG(sc, ("packet size %u too small", len));
581 return (IPPROTO_DONE);
582 }
583
584
585 /* verify the CARP checksum */
586 m->m_data += *offp;
587 if (carp_cksum(m, sizeof(*ch))) {
588 CARP_STATINC(CARP_STAT_BADSUM);
589 CARP_LOG(sc, ("checksum failed, on %s",
590 m->m_pkthdr.rcvif->if_xname));
591 m_freem(m);
592 return (IPPROTO_DONE);
593 }
594 m->m_data -= *offp;
595
596 carp_proto_input_c(m, ch, AF_INET6);
597 return (IPPROTO_DONE);
598 }
599 #endif /* INET6 */
600
601 static void
602 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
603 {
604 struct carp_softc *sc;
605 u_int64_t tmp_counter;
606 struct timeval sc_tv, ch_tv;
607
608 TAILQ_FOREACH(sc, &((struct carp_if *)
609 m->m_pkthdr.rcvif->if_carpdev->if_carp)->vhif_vrs, sc_list)
610 if (sc->sc_vhid == ch->carp_vhid)
611 break;
612
613 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
614 (IFF_UP|IFF_RUNNING)) {
615 CARP_STATINC(CARP_STAT_BADVHID);
616 m_freem(m);
617 return;
618 }
619
620 /*
621 * Check if our own advertisement was duplicated
622 * from a non simplex interface.
623 * XXX If there is no address on our physical interface
624 * there is no way to distinguish our ads from the ones
625 * another carp host might have sent us.
626 */
627 if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) {
628 struct sockaddr sa;
629 struct ifaddr *ifa;
630
631 memset(&sa, 0, sizeof(sa));
632 sa.sa_family = af;
633 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
634
635 if (ifa && af == AF_INET) {
636 struct ip *ip = mtod(m, struct ip *);
637 if (ip->ip_src.s_addr ==
638 ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
639 m_freem(m);
640 return;
641 }
642 }
643 #ifdef INET6
644 if (ifa && af == AF_INET6) {
645 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
646 struct in6_addr in6_src, in6_found;
647
648 in6_src = ip6->ip6_src;
649 in6_found = ifatoia6(ifa)->ia_addr.sin6_addr;
650 if (IN6_IS_ADDR_LINKLOCAL(&in6_src))
651 in6_src.s6_addr16[1] = 0;
652 if (IN6_IS_ADDR_LINKLOCAL(&in6_found))
653 in6_found.s6_addr16[1] = 0;
654 if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) {
655 m_freem(m);
656 return;
657 }
658 }
659 #endif /* INET6 */
660 }
661
662 nanotime(&sc->sc_if.if_lastchange);
663 sc->sc_if.if_ipackets++;
664 sc->sc_if.if_ibytes += m->m_pkthdr.len;
665
666 /* verify the CARP version. */
667 if (ch->carp_version != CARP_VERSION) {
668 CARP_STATINC(CARP_STAT_BADVER);
669 sc->sc_if.if_ierrors++;
670 CARP_LOG(sc, ("invalid version %d != %d",
671 ch->carp_version, CARP_VERSION));
672 m_freem(m);
673 return;
674 }
675
676 /* verify the hash */
677 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
678 CARP_STATINC(CARP_STAT_BADAUTH);
679 sc->sc_if.if_ierrors++;
680 CARP_LOG(sc, ("incorrect hash"));
681 m_freem(m);
682 return;
683 }
684
685 tmp_counter = ntohl(ch->carp_counter[0]);
686 tmp_counter = tmp_counter<<32;
687 tmp_counter += ntohl(ch->carp_counter[1]);
688
689 /* XXX Replay protection goes here */
690
691 sc->sc_init_counter = 0;
692 sc->sc_counter = tmp_counter;
693
694
695 sc_tv.tv_sec = sc->sc_advbase;
696 if (carp_suppress_preempt && sc->sc_advskew < 240)
697 sc_tv.tv_usec = 240 * 1000000 / 256;
698 else
699 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
700 ch_tv.tv_sec = ch->carp_advbase;
701 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
702
703 switch (sc->sc_state) {
704 case INIT:
705 break;
706 case MASTER:
707 /*
708 * If we receive an advertisement from a backup who's going to
709 * be more frequent than us, go into BACKUP state.
710 */
711 if (timercmp(&sc_tv, &ch_tv, >) ||
712 timercmp(&sc_tv, &ch_tv, ==)) {
713 callout_stop(&sc->sc_ad_tmo);
714 CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)"));
715 carp_set_state(sc, BACKUP);
716 carp_setrun(sc, 0);
717 carp_setroute(sc, RTM_DELETE);
718 }
719 break;
720 case BACKUP:
721 /*
722 * If we're pre-empting masters who advertise slower than us,
723 * and this one claims to be slower, treat him as down.
724 */
725 if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
726 CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)"));
727 carp_master_down(sc);
728 break;
729 }
730
731 /*
732 * If the master is going to advertise at such a low frequency
733 * that he's guaranteed to time out, we'd might as well just
734 * treat him as timed out now.
735 */
736 sc_tv.tv_sec = sc->sc_advbase * 3;
737 if (timercmp(&sc_tv, &ch_tv, <)) {
738 CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)"));
739 carp_master_down(sc);
740 break;
741 }
742
743 /*
744 * Otherwise, we reset the counter and wait for the next
745 * advertisement.
746 */
747 carp_setrun(sc, af);
748 break;
749 }
750
751 m_freem(m);
752 return;
753 }
754
755 /*
756 * Interface side of the CARP implementation.
757 */
758
759 /* ARGSUSED */
760 void
761 carpattach(int n)
762 {
763 if_clone_attach(&carp_cloner);
764
765 carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS);
766 }
767
768 static int
769 carp_clone_create(struct if_clone *ifc, int unit)
770 {
771 extern int ifqmaxlen;
772 struct carp_softc *sc;
773 struct ifnet *ifp;
774
775 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
776 if (!sc)
777 return (ENOMEM);
778
779 sc->sc_suppress = 0;
780 sc->sc_advbase = CARP_DFLTINTV;
781 sc->sc_vhid = -1; /* required setting */
782 sc->sc_advskew = 0;
783 sc->sc_init_counter = 1;
784 sc->sc_naddrs = sc->sc_naddrs6 = 0;
785 #ifdef INET6
786 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
787 #endif /* INET6 */
788
789 callout_init(&sc->sc_ad_tmo, 0);
790 callout_init(&sc->sc_md_tmo, 0);
791 callout_init(&sc->sc_md6_tmo, 0);
792
793 callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc);
794 callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc);
795 callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc);
796
797 LIST_INIT(&sc->carp_mc_listhead);
798 ifp = &sc->sc_if;
799 ifp->if_softc = sc;
800 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
801 unit);
802 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
803 ifp->if_ioctl = carp_ioctl;
804 ifp->if_start = carp_start;
805 ifp->if_output = carp_output;
806 ifp->if_type = IFT_CARP;
807 ifp->if_addrlen = ETHER_ADDR_LEN;
808 ifp->if_hdrlen = ETHER_HDR_LEN;
809 ifp->if_mtu = ETHERMTU;
810 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
811 IFQ_SET_READY(&ifp->if_snd);
812 if_attach(ifp);
813
814 if_alloc_sadl(ifp);
815 ifp->if_broadcastaddr = etherbroadcastaddr;
816 carp_set_enaddr(sc);
817 LIST_INIT(&sc->sc_ac.ec_multiaddrs);
818 bpf_attach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
819 #ifdef MBUFTRACE
820 strlcpy(sc->sc_ac.ec_tx_mowner.mo_name, ifp->if_xname,
821 sizeof(sc->sc_ac.ec_tx_mowner.mo_name));
822 strlcpy(sc->sc_ac.ec_tx_mowner.mo_descr, "tx",
823 sizeof(sc->sc_ac.ec_tx_mowner.mo_descr));
824 strlcpy(sc->sc_ac.ec_rx_mowner.mo_name, ifp->if_xname,
825 sizeof(sc->sc_ac.ec_rx_mowner.mo_name));
826 strlcpy(sc->sc_ac.ec_rx_mowner.mo_descr, "rx",
827 sizeof(sc->sc_ac.ec_rx_mowner.mo_descr));
828 MOWNER_ATTACH(&sc->sc_ac.ec_tx_mowner);
829 MOWNER_ATTACH(&sc->sc_ac.ec_rx_mowner);
830 ifp->if_mowner = &sc->sc_ac.ec_tx_mowner;
831 #endif
832 return (0);
833 }
834
835 static int
836 carp_clone_destroy(struct ifnet *ifp)
837 {
838 struct carp_softc *sc = ifp->if_softc;
839
840 carpdetach(ifp->if_softc);
841 ether_ifdetach(ifp);
842 if_detach(ifp);
843 callout_destroy(&sc->sc_ad_tmo);
844 callout_destroy(&sc->sc_md_tmo);
845 callout_destroy(&sc->sc_md6_tmo);
846 free(ifp->if_softc, M_DEVBUF);
847
848 return (0);
849 }
850
851 static void
852 carpdetach(struct carp_softc *sc)
853 {
854 struct carp_if *cif;
855 int s;
856
857 callout_stop(&sc->sc_ad_tmo);
858 callout_stop(&sc->sc_md_tmo);
859 callout_stop(&sc->sc_md6_tmo);
860
861 if (sc->sc_suppress)
862 carp_suppress_preempt--;
863 sc->sc_suppress = 0;
864
865 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
866 carp_suppress_preempt--;
867 sc->sc_sendad_errors = 0;
868
869 carp_set_state(sc, INIT);
870 sc->sc_if.if_flags &= ~IFF_UP;
871 carp_setrun(sc, 0);
872 carp_multicast_cleanup(sc);
873
874 KERNEL_LOCK(1, NULL);
875 s = splnet();
876 if (sc->sc_carpdev != NULL) {
877 /* XXX linkstatehook removal */
878 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
879 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
880 if (!--cif->vhif_nvrs) {
881 ifpromisc(sc->sc_carpdev, 0);
882 sc->sc_carpdev->if_carp = NULL;
883 free(cif, M_IFADDR);
884 }
885 }
886 sc->sc_carpdev = NULL;
887 splx(s);
888 KERNEL_UNLOCK_ONE(NULL);
889 }
890
891 /* Detach an interface from the carp. */
892 void
893 carp_ifdetach(struct ifnet *ifp)
894 {
895 struct carp_softc *sc, *nextsc;
896 struct carp_if *cif = (struct carp_if *)ifp->if_carp;
897
898 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
899 nextsc = TAILQ_NEXT(sc, sc_list);
900 carpdetach(sc);
901 }
902 }
903
904 static int
905 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc,
906 struct carp_header *ch)
907 {
908 if (sc->sc_init_counter) {
909 /* this could also be seconds since unix epoch */
910 sc->sc_counter = cprng_fast64();
911 } else
912 sc->sc_counter++;
913
914 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
915 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
916
917 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
918
919 return (0);
920 }
921
922 static void
923 carp_send_ad_all(void)
924 {
925 struct ifnet *ifp;
926 struct carp_if *cif;
927 struct carp_softc *vh;
928
929 IFNET_FOREACH(ifp) {
930 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
931 continue;
932
933 cif = (struct carp_if *)ifp->if_carp;
934 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
935 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
936 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER)
937 carp_send_ad(vh);
938 }
939 }
940 }
941
942
943 static void
944 carp_send_ad(void *v)
945 {
946 struct carp_header ch;
947 struct timeval tv;
948 struct carp_softc *sc = v;
949 struct carp_header *ch_ptr;
950 struct mbuf *m;
951 int error, len, advbase, advskew, s;
952 struct ifaddr *ifa;
953 struct sockaddr sa;
954
955 KERNEL_LOCK(1, NULL);
956 s = splsoftnet();
957
958 advbase = advskew = 0; /* Sssssh compiler */
959 if (sc->sc_carpdev == NULL) {
960 sc->sc_if.if_oerrors++;
961 goto retry_later;
962 }
963
964 /* bow out if we've gone to backup (the carp interface is going down) */
965 if (sc->sc_bow_out) {
966 sc->sc_bow_out = 0;
967 advbase = 255;
968 advskew = 255;
969 } else {
970 advbase = sc->sc_advbase;
971 if (!carp_suppress_preempt || sc->sc_advskew > 240)
972 advskew = sc->sc_advskew;
973 else
974 advskew = 240;
975 tv.tv_sec = advbase;
976 tv.tv_usec = advskew * 1000000 / 256;
977 }
978
979 ch.carp_version = CARP_VERSION;
980 ch.carp_type = CARP_ADVERTISEMENT;
981 ch.carp_vhid = sc->sc_vhid;
982 ch.carp_advbase = advbase;
983 ch.carp_advskew = advskew;
984 ch.carp_authlen = 7; /* XXX DEFINE */
985 ch.carp_pad1 = 0; /* must be zero */
986 ch.carp_cksum = 0;
987
988
989 #ifdef INET
990 if (sc->sc_naddrs) {
991 struct ip *ip;
992
993 MGETHDR(m, M_DONTWAIT, MT_HEADER);
994 if (m == NULL) {
995 sc->sc_if.if_oerrors++;
996 CARP_STATINC(CARP_STAT_ONOMEM);
997 /* XXX maybe less ? */
998 goto retry_later;
999 }
1000 MCLAIM(m, &carp_proto_mowner_tx);
1001 len = sizeof(*ip) + sizeof(ch);
1002 m->m_pkthdr.len = len;
1003 m->m_pkthdr.rcvif = NULL;
1004 m->m_len = len;
1005 MH_ALIGN(m, m->m_len);
1006 m->m_flags |= M_MCAST;
1007 ip = mtod(m, struct ip *);
1008 ip->ip_v = IPVERSION;
1009 ip->ip_hl = sizeof(*ip) >> 2;
1010 ip->ip_tos = IPTOS_LOWDELAY;
1011 ip->ip_len = htons(len);
1012 ip->ip_id = 0; /* no need for id, we don't support fragments */
1013 ip->ip_off = htons(IP_DF);
1014 ip->ip_ttl = CARP_DFLTTL;
1015 ip->ip_p = IPPROTO_CARP;
1016 ip->ip_sum = 0;
1017
1018 memset(&sa, 0, sizeof(sa));
1019 sa.sa_family = AF_INET;
1020 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1021 if (ifa == NULL)
1022 ip->ip_src.s_addr = 0;
1023 else
1024 ip->ip_src.s_addr =
1025 ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1026 ip->ip_dst.s_addr = INADDR_CARP_GROUP;
1027
1028 ch_ptr = (struct carp_header *)(&ip[1]);
1029 memcpy(ch_ptr, &ch, sizeof(ch));
1030 if (carp_prepare_ad(m, sc, ch_ptr))
1031 goto retry_later;
1032
1033 m->m_data += sizeof(*ip);
1034 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1035 m->m_data -= sizeof(*ip);
1036
1037 nanotime(&sc->sc_if.if_lastchange);
1038 sc->sc_if.if_opackets++;
1039 sc->sc_if.if_obytes += len;
1040 CARP_STATINC(CARP_STAT_OPACKETS);
1041
1042 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1043 NULL);
1044 if (error) {
1045 if (error == ENOBUFS)
1046 CARP_STATINC(CARP_STAT_ONOMEM);
1047 else
1048 CARP_LOG(sc, ("ip_output failed: %d", error));
1049 sc->sc_if.if_oerrors++;
1050 if (sc->sc_sendad_errors < INT_MAX)
1051 sc->sc_sendad_errors++;
1052 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1053 carp_suppress_preempt++;
1054 if (carp_suppress_preempt == 1)
1055 carp_send_ad_all();
1056 }
1057 sc->sc_sendad_success = 0;
1058 } else {
1059 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1060 if (++sc->sc_sendad_success >=
1061 CARP_SENDAD_MIN_SUCCESS) {
1062 carp_suppress_preempt--;
1063 sc->sc_sendad_errors = 0;
1064 }
1065 } else
1066 sc->sc_sendad_errors = 0;
1067 }
1068 }
1069 #endif /* INET */
1070 #ifdef INET6
1071 if (sc->sc_naddrs6) {
1072 struct ip6_hdr *ip6;
1073
1074 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1075 if (m == NULL) {
1076 sc->sc_if.if_oerrors++;
1077 CARP_STATINC(CARP_STAT_ONOMEM);
1078 /* XXX maybe less ? */
1079 goto retry_later;
1080 }
1081 MCLAIM(m, &carp_proto6_mowner_tx);
1082 len = sizeof(*ip6) + sizeof(ch);
1083 m->m_pkthdr.len = len;
1084 m->m_pkthdr.rcvif = NULL;
1085 m->m_len = len;
1086 MH_ALIGN(m, m->m_len);
1087 m->m_flags |= M_MCAST;
1088 ip6 = mtod(m, struct ip6_hdr *);
1089 memset(ip6, 0, sizeof(*ip6));
1090 ip6->ip6_vfc |= IPV6_VERSION;
1091 ip6->ip6_hlim = CARP_DFLTTL;
1092 ip6->ip6_nxt = IPPROTO_CARP;
1093
1094 /* set the source address */
1095 memset(&sa, 0, sizeof(sa));
1096 sa.sa_family = AF_INET6;
1097 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1098 if (ifa == NULL) /* This should never happen with IPv6 */
1099 memset(&ip6->ip6_src, 0, sizeof(struct in6_addr));
1100 else
1101 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1102 &ip6->ip6_src, sizeof(struct in6_addr));
1103 /* set the multicast destination */
1104
1105 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1106 ip6->ip6_dst.s6_addr8[15] = 0x12;
1107 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1108 sc->sc_if.if_oerrors++;
1109 m_freem(m);
1110 CARP_LOG(sc, ("in6_setscope failed"));
1111 goto retry_later;
1112 }
1113
1114 ch_ptr = (struct carp_header *)(&ip6[1]);
1115 memcpy(ch_ptr, &ch, sizeof(ch));
1116 if (carp_prepare_ad(m, sc, ch_ptr))
1117 goto retry_later;
1118
1119 m->m_data += sizeof(*ip6);
1120 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1121 m->m_data -= sizeof(*ip6);
1122
1123 nanotime(&sc->sc_if.if_lastchange);
1124 sc->sc_if.if_opackets++;
1125 sc->sc_if.if_obytes += len;
1126 CARP_STATINC(CARP_STAT_OPACKETS6);
1127
1128 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1129 if (error) {
1130 if (error == ENOBUFS)
1131 CARP_STATINC(CARP_STAT_ONOMEM);
1132 else
1133 CARP_LOG(sc, ("ip6_output failed: %d", error));
1134 sc->sc_if.if_oerrors++;
1135 if (sc->sc_sendad_errors < INT_MAX)
1136 sc->sc_sendad_errors++;
1137 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1138 carp_suppress_preempt++;
1139 if (carp_suppress_preempt == 1)
1140 carp_send_ad_all();
1141 }
1142 sc->sc_sendad_success = 0;
1143 } else {
1144 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1145 if (++sc->sc_sendad_success >=
1146 CARP_SENDAD_MIN_SUCCESS) {
1147 carp_suppress_preempt--;
1148 sc->sc_sendad_errors = 0;
1149 }
1150 } else
1151 sc->sc_sendad_errors = 0;
1152 }
1153 }
1154 #endif /* INET6 */
1155
1156 retry_later:
1157 splx(s);
1158 KERNEL_UNLOCK_ONE(NULL);
1159 if (advbase != 255 || advskew != 255)
1160 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1161 }
1162
1163 /*
1164 * Broadcast a gratuitous ARP request containing
1165 * the virtual router MAC address for each IP address
1166 * associated with the virtual router.
1167 */
1168 static void
1169 carp_send_arp(struct carp_softc *sc)
1170 {
1171 struct ifaddr *ifa;
1172 struct in_addr *in;
1173 int s;
1174
1175 KERNEL_LOCK(1, NULL);
1176 s = splsoftnet();
1177 IFADDR_FOREACH(ifa, &sc->sc_if) {
1178
1179 if (ifa->ifa_addr->sa_family != AF_INET)
1180 continue;
1181
1182 in = &ifatoia(ifa)->ia_addr.sin_addr;
1183 arprequest(sc->sc_carpdev, in, in, CLLADDR(sc->sc_if.if_sadl));
1184 }
1185 splx(s);
1186 KERNEL_UNLOCK_ONE(NULL);
1187 }
1188
1189 #ifdef INET6
1190 static void
1191 carp_send_na(struct carp_softc *sc)
1192 {
1193 struct ifaddr *ifa;
1194 struct in6_addr *in6;
1195 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1196 int s;
1197
1198 KERNEL_LOCK(1, NULL);
1199 s = splsoftnet();
1200
1201 IFADDR_FOREACH(ifa, &sc->sc_if) {
1202
1203 if (ifa->ifa_addr->sa_family != AF_INET6)
1204 continue;
1205
1206 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1207 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1208 ND_NA_FLAG_OVERRIDE, 1, NULL);
1209 }
1210 splx(s);
1211 KERNEL_UNLOCK_ONE(NULL);
1212 }
1213 #endif /* INET6 */
1214
1215 /*
1216 * Based on bridge_hash() in if_bridge.c
1217 */
1218 #define mix(a,b,c) \
1219 do { \
1220 a -= b; a -= c; a ^= (c >> 13); \
1221 b -= c; b -= a; b ^= (a << 8); \
1222 c -= a; c -= b; c ^= (b >> 13); \
1223 a -= b; a -= c; a ^= (c >> 12); \
1224 b -= c; b -= a; b ^= (a << 16); \
1225 c -= a; c -= b; c ^= (b >> 5); \
1226 a -= b; a -= c; a ^= (c >> 3); \
1227 b -= c; b -= a; b ^= (a << 10); \
1228 c -= a; c -= b; c ^= (b >> 15); \
1229 } while (0)
1230
1231 static u_int32_t
1232 carp_hash(struct carp_softc *sc, u_char *src)
1233 {
1234 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1235
1236 c += sc->sc_key[3] << 24;
1237 c += sc->sc_key[2] << 16;
1238 c += sc->sc_key[1] << 8;
1239 c += sc->sc_key[0];
1240 b += src[5] << 8;
1241 b += src[4];
1242 a += src[3] << 24;
1243 a += src[2] << 16;
1244 a += src[1] << 8;
1245 a += src[0];
1246
1247 mix(a, b, c);
1248 return (c);
1249 }
1250
1251 static int
1252 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
1253 {
1254 struct carp_softc *vh;
1255 struct ifaddr *ifa;
1256 int count = 0;
1257
1258 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1259 if ((type == CARP_COUNT_RUNNING &&
1260 (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1261 (IFF_UP|IFF_RUNNING)) ||
1262 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1263 IFADDR_FOREACH(ifa, &vh->sc_if) {
1264 if (ifa->ifa_addr->sa_family == AF_INET &&
1265 ia->ia_addr.sin_addr.s_addr ==
1266 ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1267 count++;
1268 }
1269 }
1270 }
1271 return (count);
1272 }
1273
1274 int
1275 carp_iamatch(struct in_ifaddr *ia, u_char *src,
1276 u_int32_t *count, u_int32_t index)
1277 {
1278 struct carp_softc *sc = ia->ia_ifp->if_softc;
1279
1280 if (carp_opts[CARPCTL_ARPBALANCE]) {
1281 /*
1282 * We use the source ip to decide which virtual host should
1283 * handle the request. If we're master of that virtual host,
1284 * then we respond, otherwise, just drop the arp packet on
1285 * the floor.
1286 */
1287
1288 /* Count the elegible carp interfaces with this address */
1289 if (*count == 0)
1290 *count = carp_addrcount(
1291 (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp,
1292 ia, CARP_COUNT_RUNNING);
1293
1294 /* This should never happen, but... */
1295 if (*count == 0)
1296 return (0);
1297
1298 if (carp_hash(sc, src) % *count == index - 1 &&
1299 sc->sc_state == MASTER) {
1300 return (1);
1301 }
1302 } else {
1303 if (sc->sc_state == MASTER)
1304 return (1);
1305 }
1306
1307 return (0);
1308 }
1309
1310 #ifdef INET6
1311 struct ifaddr *
1312 carp_iamatch6(void *v, struct in6_addr *taddr)
1313 {
1314 struct carp_if *cif = v;
1315 struct carp_softc *vh;
1316 struct ifaddr *ifa;
1317
1318 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1319 IFADDR_FOREACH(ifa, &vh->sc_if) {
1320 if (IN6_ARE_ADDR_EQUAL(taddr,
1321 &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1322 ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1323 (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER)
1324 return (ifa);
1325 }
1326 }
1327
1328 return (NULL);
1329 }
1330 #endif /* INET6 */
1331
1332 struct ifnet *
1333 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src)
1334 {
1335 struct carp_if *cif = (struct carp_if *)v;
1336 struct carp_softc *vh;
1337 u_int8_t *ena;
1338
1339 if (src)
1340 ena = (u_int8_t *)&eh->ether_shost;
1341 else
1342 ena = (u_int8_t *)&eh->ether_dhost;
1343
1344 switch (iftype) {
1345 case IFT_ETHER:
1346 case IFT_FDDI:
1347 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1348 return (NULL);
1349 break;
1350 case IFT_ISO88025:
1351 if (ena[0] != 3 || ena[1] || ena[4] || ena[5])
1352 return (NULL);
1353 break;
1354 default:
1355 return (NULL);
1356 break;
1357 }
1358
1359 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
1360 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1361 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
1362 !memcmp(ena, CLLADDR(vh->sc_if.if_sadl),
1363 ETHER_ADDR_LEN)) {
1364 return (&vh->sc_if);
1365 }
1366
1367 return (NULL);
1368 }
1369
1370 int
1371 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1372 {
1373 struct ether_header eh;
1374 struct carp_if *cif = (struct carp_if *)m->m_pkthdr.rcvif->if_carp;
1375 struct ifnet *ifp;
1376
1377 memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost));
1378 memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost));
1379 eh.ether_type = etype;
1380
1381 if (m->m_flags & (M_BCAST|M_MCAST)) {
1382 struct carp_softc *vh;
1383 struct mbuf *m0;
1384
1385 /*
1386 * XXX Should really check the list of multicast addresses
1387 * for each CARP interface _before_ copying.
1388 */
1389 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1390 m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1391 if (m0 == NULL)
1392 continue;
1393 m0->m_pkthdr.rcvif = &vh->sc_if;
1394 ether_input(&vh->sc_if, m0);
1395 }
1396 return (1);
1397 }
1398
1399 ifp = carp_ourether(cif, &eh, m->m_pkthdr.rcvif->if_type, 0);
1400 if (ifp == NULL) {
1401 return (1);
1402 }
1403
1404 m->m_pkthdr.rcvif = ifp;
1405
1406 bpf_mtap(ifp, m);
1407 ifp->if_ipackets++;
1408 ether_input(ifp, m);
1409 return (0);
1410 }
1411
1412 static void
1413 carp_master_down(void *v)
1414 {
1415 struct carp_softc *sc = v;
1416
1417 switch (sc->sc_state) {
1418 case INIT:
1419 printf("%s: master_down event in INIT state\n",
1420 sc->sc_if.if_xname);
1421 break;
1422 case MASTER:
1423 break;
1424 case BACKUP:
1425 CARP_LOG(sc, ("INIT -> MASTER (preempting)"));
1426 carp_set_state(sc, MASTER);
1427 carp_send_ad(sc);
1428 carp_send_arp(sc);
1429 #ifdef INET6
1430 carp_send_na(sc);
1431 #endif /* INET6 */
1432 carp_setrun(sc, 0);
1433 carp_setroute(sc, RTM_ADD);
1434 break;
1435 }
1436 }
1437
1438 /*
1439 * When in backup state, af indicates whether to reset the master down timer
1440 * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1441 */
1442 static void
1443 carp_setrun(struct carp_softc *sc, sa_family_t af)
1444 {
1445 struct timeval tv;
1446
1447 if (sc->sc_carpdev == NULL) {
1448 sc->sc_if.if_flags &= ~IFF_RUNNING;
1449 carp_set_state(sc, INIT);
1450 return;
1451 }
1452
1453 if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 &&
1454 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1455 sc->sc_if.if_flags |= IFF_RUNNING;
1456 } else {
1457 sc->sc_if.if_flags &= ~IFF_RUNNING;
1458 carp_setroute(sc, RTM_DELETE);
1459 return;
1460 }
1461
1462 switch (sc->sc_state) {
1463 case INIT:
1464 carp_set_state(sc, BACKUP);
1465 carp_setroute(sc, RTM_DELETE);
1466 carp_setrun(sc, 0);
1467 break;
1468 case BACKUP:
1469 callout_stop(&sc->sc_ad_tmo);
1470 tv.tv_sec = 3 * sc->sc_advbase;
1471 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1472 switch (af) {
1473 #ifdef INET
1474 case AF_INET:
1475 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1476 break;
1477 #endif /* INET */
1478 #ifdef INET6
1479 case AF_INET6:
1480 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1481 break;
1482 #endif /* INET6 */
1483 default:
1484 if (sc->sc_naddrs)
1485 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1486 if (sc->sc_naddrs6)
1487 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1488 break;
1489 }
1490 break;
1491 case MASTER:
1492 tv.tv_sec = sc->sc_advbase;
1493 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1494 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1495 break;
1496 }
1497 }
1498
1499 static void
1500 carp_multicast_cleanup(struct carp_softc *sc)
1501 {
1502 struct ip_moptions *imo = &sc->sc_imo;
1503 #ifdef INET6
1504 struct ip6_moptions *im6o = &sc->sc_im6o;
1505 #endif
1506 u_int16_t n = imo->imo_num_memberships;
1507
1508 /* Clean up our own multicast memberships */
1509 while (n-- > 0) {
1510 if (imo->imo_membership[n] != NULL) {
1511 in_delmulti(imo->imo_membership[n]);
1512 imo->imo_membership[n] = NULL;
1513 }
1514 }
1515 imo->imo_num_memberships = 0;
1516 imo->imo_multicast_ifp = NULL;
1517
1518 #ifdef INET6
1519 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1520 struct in6_multi_mship *imm =
1521 LIST_FIRST(&im6o->im6o_memberships);
1522
1523 LIST_REMOVE(imm, i6mm_chain);
1524 in6_leavegroup(imm);
1525 }
1526 im6o->im6o_multicast_ifp = NULL;
1527 #endif
1528
1529 /* And any other multicast memberships */
1530 carp_ether_purgemulti(sc);
1531 }
1532
1533 static int
1534 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1535 {
1536 struct carp_if *cif, *ncif = NULL;
1537 struct carp_softc *vr, *after = NULL;
1538 int myself = 0, error = 0;
1539 int s;
1540
1541 if (ifp == sc->sc_carpdev)
1542 return (0);
1543
1544 if (ifp != NULL) {
1545 if ((ifp->if_flags & IFF_MULTICAST) == 0)
1546 return (EADDRNOTAVAIL);
1547
1548 if (ifp->if_type == IFT_CARP)
1549 return (EINVAL);
1550
1551 if (ifp->if_carp == NULL) {
1552 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT);
1553 if (ncif == NULL)
1554 return (ENOBUFS);
1555 if ((error = ifpromisc(ifp, 1))) {
1556 free(ncif, M_IFADDR);
1557 return (error);
1558 }
1559
1560 ncif->vhif_ifp = ifp;
1561 TAILQ_INIT(&ncif->vhif_vrs);
1562 } else {
1563 cif = (struct carp_if *)ifp->if_carp;
1564 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1565 if (vr != sc && vr->sc_vhid == sc->sc_vhid)
1566 return (EINVAL);
1567 }
1568
1569 /* detach from old interface */
1570 if (sc->sc_carpdev != NULL)
1571 carpdetach(sc);
1572
1573 /* join multicast groups */
1574 if (sc->sc_naddrs < 0 &&
1575 (error = carp_join_multicast(sc)) != 0) {
1576 if (ncif != NULL)
1577 free(ncif, M_IFADDR);
1578 return (error);
1579 }
1580
1581 #ifdef INET6
1582 if (sc->sc_naddrs6 < 0 &&
1583 (error = carp_join_multicast6(sc)) != 0) {
1584 if (ncif != NULL)
1585 free(ncif, M_IFADDR);
1586 carp_multicast_cleanup(sc);
1587 return (error);
1588 }
1589 #endif
1590
1591 /* attach carp interface to physical interface */
1592 if (ncif != NULL)
1593 ifp->if_carp = (void *)ncif;
1594 sc->sc_carpdev = ifp;
1595 sc->sc_if.if_capabilities = ifp->if_capabilities &
1596 (IFCAP_TSOv4 | IFCAP_TSOv6 |
1597 IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx|
1598 IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx|
1599 IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx|
1600 IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx|
1601 IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx);
1602
1603 cif = (struct carp_if *)ifp->if_carp;
1604 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1605 if (vr == sc)
1606 myself = 1;
1607 if (vr->sc_vhid < sc->sc_vhid)
1608 after = vr;
1609 }
1610
1611 if (!myself) {
1612 /* We're trying to keep things in order */
1613 if (after == NULL) {
1614 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1615 } else {
1616 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1617 sc, sc_list);
1618 }
1619 cif->vhif_nvrs++;
1620 }
1621 if (sc->sc_naddrs || sc->sc_naddrs6)
1622 sc->sc_if.if_flags |= IFF_UP;
1623 carp_set_enaddr(sc);
1624 KERNEL_LOCK(1, NULL);
1625 s = splnet();
1626 /* XXX linkstatehooks establish */
1627 carp_carpdev_state(ifp);
1628 splx(s);
1629 KERNEL_UNLOCK_ONE(NULL);
1630 } else {
1631 carpdetach(sc);
1632 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1633 }
1634 return (0);
1635 }
1636
1637 static void
1638 carp_set_enaddr(struct carp_softc *sc)
1639 {
1640 uint8_t enaddr[ETHER_ADDR_LEN];
1641 if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) {
1642 enaddr[0] = 3;
1643 enaddr[1] = 0;
1644 enaddr[2] = 0x40 >> (sc->sc_vhid - 1);
1645 enaddr[3] = 0x40000 >> (sc->sc_vhid - 1);
1646 enaddr[4] = 0;
1647 enaddr[5] = 0;
1648 } else {
1649 enaddr[0] = 0;
1650 enaddr[1] = 0;
1651 enaddr[2] = 0x5e;
1652 enaddr[3] = 0;
1653 enaddr[4] = 1;
1654 enaddr[5] = sc->sc_vhid;
1655 }
1656 if_set_sadl(&sc->sc_if, enaddr, sizeof(enaddr), false);
1657 }
1658
1659 #if 0
1660 static void
1661 carp_addr_updated(void *v)
1662 {
1663 struct carp_softc *sc = (struct carp_softc *) v;
1664 struct ifaddr *ifa;
1665 int new_naddrs = 0, new_naddrs6 = 0;
1666
1667 IFADDR_FOREACH(ifa, &sc->sc_if) {
1668 if (ifa->ifa_addr->sa_family == AF_INET)
1669 new_naddrs++;
1670 else if (ifa->ifa_addr->sa_family == AF_INET6)
1671 new_naddrs6++;
1672 }
1673
1674 /* Handle a callback after SIOCDIFADDR */
1675 if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
1676 struct in_addr mc_addr;
1677
1678 sc->sc_naddrs = new_naddrs;
1679 sc->sc_naddrs6 = new_naddrs6;
1680
1681 /* Re-establish multicast membership removed by in_control */
1682 mc_addr.s_addr = INADDR_CARP_GROUP;
1683 if (!in_multi_group(mc_addr, &sc->sc_if, 0)) {
1684 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1685
1686 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1687 carp_join_multicast(sc);
1688 }
1689
1690 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1691 sc->sc_if.if_flags &= ~IFF_UP;
1692 carp_set_state(sc, INIT);
1693 } else
1694 carp_hmac_prepare(sc);
1695 }
1696
1697 carp_setrun(sc, 0);
1698 }
1699 #endif
1700
1701 static int
1702 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1703 {
1704 struct ifnet *ifp = sc->sc_carpdev;
1705 struct in_ifaddr *ia, *ia_if;
1706 int error = 0;
1707
1708 if (sin->sin_addr.s_addr == 0) {
1709 if (!(sc->sc_if.if_flags & IFF_UP))
1710 carp_set_state(sc, INIT);
1711 if (sc->sc_naddrs)
1712 sc->sc_if.if_flags |= IFF_UP;
1713 carp_setrun(sc, 0);
1714 return (0);
1715 }
1716
1717 /* we have to do this by hand to ensure we don't match on ourselves */
1718 ia_if = NULL;
1719 for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
1720 ia = TAILQ_NEXT(ia, ia_list)) {
1721
1722 /* and, yeah, we need a multicast-capable iface too */
1723 if (ia->ia_ifp != &sc->sc_if &&
1724 ia->ia_ifp->if_type != IFT_CARP &&
1725 (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1726 (sin->sin_addr.s_addr & ia->ia_subnetmask) ==
1727 ia->ia_subnet) {
1728 if (!ia_if)
1729 ia_if = ia;
1730 }
1731 }
1732
1733 if (ia_if) {
1734 ia = ia_if;
1735 if (ifp) {
1736 if (ifp != ia->ia_ifp)
1737 return (EADDRNOTAVAIL);
1738 } else {
1739 ifp = ia->ia_ifp;
1740 }
1741 }
1742
1743 if ((error = carp_set_ifp(sc, ifp)))
1744 return (error);
1745
1746 if (sc->sc_carpdev == NULL)
1747 return (EADDRNOTAVAIL);
1748
1749 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1750 return (error);
1751
1752 sc->sc_naddrs++;
1753 if (sc->sc_carpdev != NULL)
1754 sc->sc_if.if_flags |= IFF_UP;
1755
1756 carp_set_state(sc, INIT);
1757 carp_setrun(sc, 0);
1758
1759 /*
1760 * Hook if_addrhooks so that we get a callback after in_ifinit has run,
1761 * to correct any inappropriate routes that it inserted.
1762 */
1763 if (sc->ah_cookie == 0) {
1764 /* XXX link address hook */
1765 }
1766
1767 return (0);
1768 }
1769
1770 static int
1771 carp_join_multicast(struct carp_softc *sc)
1772 {
1773 struct ip_moptions *imo = &sc->sc_imo, tmpimo;
1774 struct in_addr addr;
1775
1776 memset(&tmpimo, 0, sizeof(tmpimo));
1777 addr.s_addr = INADDR_CARP_GROUP;
1778 if ((tmpimo.imo_membership[0] =
1779 in_addmulti(&addr, &sc->sc_if)) == NULL) {
1780 return (ENOBUFS);
1781 }
1782
1783 imo->imo_membership[0] = tmpimo.imo_membership[0];
1784 imo->imo_num_memberships = 1;
1785 imo->imo_multicast_ifp = &sc->sc_if;
1786 imo->imo_multicast_ttl = CARP_DFLTTL;
1787 imo->imo_multicast_loop = 0;
1788 return (0);
1789 }
1790
1791
1792 #ifdef INET6
1793 static int
1794 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1795 {
1796 struct ifnet *ifp = sc->sc_carpdev;
1797 struct in6_ifaddr *ia, *ia_if;
1798 int error = 0;
1799
1800 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1801 if (!(sc->sc_if.if_flags & IFF_UP))
1802 carp_set_state(sc, INIT);
1803 if (sc->sc_naddrs6)
1804 sc->sc_if.if_flags |= IFF_UP;
1805 carp_setrun(sc, 0);
1806 return (0);
1807 }
1808
1809 /* we have to do this by hand to ensure we don't match on ourselves */
1810 ia_if = NULL;
1811 for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
1812 int i;
1813
1814 for (i = 0; i < 4; i++) {
1815 if ((sin6->sin6_addr.s6_addr32[i] &
1816 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1817 (ia->ia_addr.sin6_addr.s6_addr32[i] &
1818 ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1819 break;
1820 }
1821 /* and, yeah, we need a multicast-capable iface too */
1822 if (ia->ia_ifp != &sc->sc_if &&
1823 ia->ia_ifp->if_type != IFT_CARP &&
1824 (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1825 (i == 4)) {
1826 if (!ia_if)
1827 ia_if = ia;
1828 }
1829 }
1830
1831 if (ia_if) {
1832 ia = ia_if;
1833 if (sc->sc_carpdev) {
1834 if (sc->sc_carpdev != ia->ia_ifp)
1835 return (EADDRNOTAVAIL);
1836 } else {
1837 ifp = ia->ia_ifp;
1838 }
1839 }
1840
1841 if ((error = carp_set_ifp(sc, ifp)))
1842 return (error);
1843
1844 if (sc->sc_carpdev == NULL)
1845 return (EADDRNOTAVAIL);
1846
1847 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1848 return (error);
1849
1850 sc->sc_naddrs6++;
1851 if (sc->sc_carpdev != NULL)
1852 sc->sc_if.if_flags |= IFF_UP;
1853 carp_set_state(sc, INIT);
1854 carp_setrun(sc, 0);
1855
1856 return (0);
1857 }
1858
1859 static int
1860 carp_join_multicast6(struct carp_softc *sc)
1861 {
1862 struct in6_multi_mship *imm, *imm2;
1863 struct ip6_moptions *im6o = &sc->sc_im6o;
1864 struct sockaddr_in6 addr6;
1865 int error;
1866
1867 /* Join IPv6 CARP multicast group */
1868 memset(&addr6, 0, sizeof(addr6));
1869 addr6.sin6_family = AF_INET6;
1870 addr6.sin6_len = sizeof(addr6);
1871 addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1872 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1873 addr6.sin6_addr.s6_addr8[15] = 0x12;
1874 if ((imm = in6_joingroup(&sc->sc_if,
1875 &addr6.sin6_addr, &error, 0)) == NULL) {
1876 return (error);
1877 }
1878 /* join solicited multicast address */
1879 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1880 addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1881 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1882 addr6.sin6_addr.s6_addr32[1] = 0;
1883 addr6.sin6_addr.s6_addr32[2] = htonl(1);
1884 addr6.sin6_addr.s6_addr32[3] = 0;
1885 addr6.sin6_addr.s6_addr8[12] = 0xff;
1886 if ((imm2 = in6_joingroup(&sc->sc_if,
1887 &addr6.sin6_addr, &error, 0)) == NULL) {
1888 in6_leavegroup(imm);
1889 return (error);
1890 }
1891
1892 /* apply v6 multicast membership */
1893 im6o->im6o_multicast_ifp = &sc->sc_if;
1894 if (imm)
1895 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
1896 i6mm_chain);
1897 if (imm2)
1898 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
1899 i6mm_chain);
1900
1901 return (0);
1902 }
1903
1904 #endif /* INET6 */
1905
1906 static int
1907 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1908 {
1909 struct lwp *l = curlwp; /* XXX */
1910 struct carp_softc *sc = ifp->if_softc, *vr;
1911 struct carpreq carpr;
1912 struct ifaddr *ifa;
1913 struct ifreq *ifr;
1914 struct ifnet *cdev = NULL;
1915 int error = 0;
1916
1917 ifa = (struct ifaddr *)data;
1918 ifr = (struct ifreq *)data;
1919
1920 switch (cmd) {
1921 case SIOCINITIFADDR:
1922 switch (ifa->ifa_addr->sa_family) {
1923 #ifdef INET
1924 case AF_INET:
1925 sc->sc_if.if_flags |= IFF_UP;
1926 memcpy(ifa->ifa_dstaddr, ifa->ifa_addr,
1927 sizeof(struct sockaddr));
1928 error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1929 break;
1930 #endif /* INET */
1931 #ifdef INET6
1932 case AF_INET6:
1933 sc->sc_if.if_flags|= IFF_UP;
1934 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1935 break;
1936 #endif /* INET6 */
1937 default:
1938 error = EAFNOSUPPORT;
1939 break;
1940 }
1941 break;
1942
1943 case SIOCSIFFLAGS:
1944 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
1945 break;
1946 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
1947 callout_stop(&sc->sc_ad_tmo);
1948 callout_stop(&sc->sc_md_tmo);
1949 callout_stop(&sc->sc_md6_tmo);
1950 if (sc->sc_state == MASTER) {
1951 /* we need the interface up to bow out */
1952 sc->sc_if.if_flags |= IFF_UP;
1953 sc->sc_bow_out = 1;
1954 carp_send_ad(sc);
1955 }
1956 sc->sc_if.if_flags &= ~IFF_UP;
1957 carp_set_state(sc, INIT);
1958 carp_setrun(sc, 0);
1959 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
1960 sc->sc_if.if_flags |= IFF_UP;
1961 carp_setrun(sc, 0);
1962 }
1963 break;
1964
1965 case SIOCSVH:
1966 if (l == NULL)
1967 break;
1968 if ((error = kauth_authorize_network(l->l_cred,
1969 KAUTH_NETWORK_INTERFACE,
1970 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
1971 NULL)) != 0)
1972 break;
1973 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
1974 break;
1975 error = 1;
1976 if (carpr.carpr_carpdev[0] != '\0' &&
1977 (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
1978 return (EINVAL);
1979 if ((error = carp_set_ifp(sc, cdev)))
1980 return (error);
1981 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
1982 switch (carpr.carpr_state) {
1983 case BACKUP:
1984 callout_stop(&sc->sc_ad_tmo);
1985 carp_set_state(sc, BACKUP);
1986 carp_setrun(sc, 0);
1987 carp_setroute(sc, RTM_DELETE);
1988 break;
1989 case MASTER:
1990 carp_master_down(sc);
1991 break;
1992 default:
1993 break;
1994 }
1995 }
1996 if (carpr.carpr_vhid > 0) {
1997 if (carpr.carpr_vhid > 255) {
1998 error = EINVAL;
1999 break;
2000 }
2001 if (sc->sc_carpdev) {
2002 struct carp_if *cif;
2003 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2004 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
2005 if (vr != sc &&
2006 vr->sc_vhid == carpr.carpr_vhid)
2007 return (EINVAL);
2008 }
2009 sc->sc_vhid = carpr.carpr_vhid;
2010 carp_set_enaddr(sc);
2011 carp_set_state(sc, INIT);
2012 error--;
2013 }
2014 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
2015 if (carpr.carpr_advskew > 254) {
2016 error = EINVAL;
2017 break;
2018 }
2019 if (carpr.carpr_advbase > 255) {
2020 error = EINVAL;
2021 break;
2022 }
2023 sc->sc_advbase = carpr.carpr_advbase;
2024 sc->sc_advskew = carpr.carpr_advskew;
2025 error--;
2026 }
2027 memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key));
2028 if (error > 0)
2029 error = EINVAL;
2030 else {
2031 error = 0;
2032 carp_setrun(sc, 0);
2033 }
2034 break;
2035
2036 case SIOCGVH:
2037 memset(&carpr, 0, sizeof(carpr));
2038 if (sc->sc_carpdev != NULL)
2039 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2040 IFNAMSIZ);
2041 carpr.carpr_state = sc->sc_state;
2042 carpr.carpr_vhid = sc->sc_vhid;
2043 carpr.carpr_advbase = sc->sc_advbase;
2044 carpr.carpr_advskew = sc->sc_advskew;
2045
2046 if ((l != NULL) && (error = kauth_authorize_network(l->l_cred,
2047 KAUTH_NETWORK_INTERFACE,
2048 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2049 NULL)) == 0)
2050 memcpy(carpr.carpr_key, sc->sc_key,
2051 sizeof(carpr.carpr_key));
2052 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2053 break;
2054
2055 case SIOCADDMULTI:
2056 error = carp_ether_addmulti(sc, ifr);
2057 break;
2058
2059 case SIOCDELMULTI:
2060 error = carp_ether_delmulti(sc, ifr);
2061 break;
2062
2063 case SIOCSIFCAP:
2064 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
2065 error = 0;
2066 break;
2067
2068 default:
2069 error = ether_ioctl(ifp, cmd, data);
2070 }
2071
2072 carp_hmac_prepare(sc);
2073 return (error);
2074 }
2075
2076
2077 /*
2078 * Start output on carp interface. This function should never be called.
2079 */
2080 static void
2081 carp_start(struct ifnet *ifp)
2082 {
2083 #ifdef DEBUG
2084 printf("%s: start called\n", ifp->if_xname);
2085 #endif
2086 }
2087
2088 int
2089 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
2090 struct rtentry *rt)
2091 {
2092 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2093 KASSERT(KERNEL_LOCKED_P());
2094
2095 if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) {
2096 return (sc->sc_carpdev->if_output(ifp, m, sa, rt));
2097 } else {
2098 m_freem(m);
2099 return (ENETUNREACH);
2100 }
2101 }
2102
2103 static void
2104 carp_set_state(struct carp_softc *sc, int state)
2105 {
2106 static const char *carp_states[] = { CARP_STATES };
2107 if (sc->sc_state == state)
2108 return;
2109
2110 CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state]));
2111
2112 sc->sc_state = state;
2113 switch (state) {
2114 case BACKUP:
2115 sc->sc_if.if_link_state = LINK_STATE_DOWN;
2116 break;
2117 case MASTER:
2118 sc->sc_if.if_link_state = LINK_STATE_UP;
2119 break;
2120 default:
2121 sc->sc_if.if_link_state = LINK_STATE_UNKNOWN;
2122 break;
2123 }
2124 rt_ifmsg(&sc->sc_if);
2125 }
2126
2127 void
2128 carp_carpdev_state(void *v)
2129 {
2130 struct carp_if *cif;
2131 struct carp_softc *sc;
2132 struct ifnet *ifp = v;
2133
2134 if (ifp->if_type == IFT_CARP)
2135 return;
2136
2137 cif = (struct carp_if *)ifp->if_carp;
2138
2139 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2140 int suppressed = sc->sc_suppress;
2141
2142 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2143 !(sc->sc_carpdev->if_flags & IFF_UP)) {
2144 sc->sc_if.if_flags &= ~IFF_RUNNING;
2145 callout_stop(&sc->sc_ad_tmo);
2146 callout_stop(&sc->sc_md_tmo);
2147 callout_stop(&sc->sc_md6_tmo);
2148 carp_set_state(sc, INIT);
2149 sc->sc_suppress = 1;
2150 carp_setrun(sc, 0);
2151 if (!suppressed) {
2152 carp_suppress_preempt++;
2153 if (carp_suppress_preempt == 1)
2154 carp_send_ad_all();
2155 }
2156 } else {
2157 carp_set_state(sc, INIT);
2158 sc->sc_suppress = 0;
2159 carp_setrun(sc, 0);
2160 if (suppressed)
2161 carp_suppress_preempt--;
2162 }
2163 }
2164 }
2165
2166 static int
2167 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2168 {
2169 const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr);
2170 struct ifnet *ifp;
2171 struct carp_mc_entry *mc;
2172 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2173 int error;
2174
2175 ifp = sc->sc_carpdev;
2176 if (ifp == NULL)
2177 return (EINVAL);
2178
2179 error = ether_addmulti(sa, &sc->sc_ac);
2180 if (error != ENETRESET)
2181 return (error);
2182
2183 /*
2184 * This is new multicast address. We have to tell parent
2185 * about it. Also, remember this multicast address so that
2186 * we can delete them on unconfigure.
2187 */
2188 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2189 if (mc == NULL) {
2190 error = ENOMEM;
2191 goto alloc_failed;
2192 }
2193
2194 /*
2195 * As ether_addmulti() returns ENETRESET, following two
2196 * statement shouldn't fail.
2197 */
2198 (void)ether_multiaddr(sa, addrlo, addrhi);
2199 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2200 memcpy(&mc->mc_addr, sa, sa->sa_len);
2201 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2202
2203 error = if_mcast_op(ifp, SIOCADDMULTI, sa);
2204 if (error != 0)
2205 goto ioctl_failed;
2206
2207 return (error);
2208
2209 ioctl_failed:
2210 LIST_REMOVE(mc, mc_entries);
2211 free(mc, M_DEVBUF);
2212 alloc_failed:
2213 (void)ether_delmulti(sa, &sc->sc_ac);
2214
2215 return (error);
2216 }
2217
2218 static int
2219 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2220 {
2221 const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr);
2222 struct ifnet *ifp;
2223 struct ether_multi *enm;
2224 struct carp_mc_entry *mc;
2225 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2226 int error;
2227
2228 ifp = sc->sc_carpdev;
2229 if (ifp == NULL)
2230 return (EINVAL);
2231
2232 /*
2233 * Find a key to lookup carp_mc_entry. We have to do this
2234 * before calling ether_delmulti for obvious reason.
2235 */
2236 if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0)
2237 return (error);
2238 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2239 if (enm == NULL)
2240 return (EINVAL);
2241
2242 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2243 if (mc->mc_enm == enm)
2244 break;
2245
2246 /* We won't delete entries we didn't add */
2247 if (mc == NULL)
2248 return (EINVAL);
2249
2250 error = ether_delmulti(sa, &sc->sc_ac);
2251 if (error != ENETRESET)
2252 return (error);
2253
2254 /* We no longer use this multicast address. Tell parent so. */
2255 error = if_mcast_op(ifp, SIOCDELMULTI, sa);
2256 if (error == 0) {
2257 /* And forget about this address. */
2258 LIST_REMOVE(mc, mc_entries);
2259 free(mc, M_DEVBUF);
2260 } else
2261 (void)ether_addmulti(sa, &sc->sc_ac);
2262 return (error);
2263 }
2264
2265 /*
2266 * Delete any multicast address we have asked to add from parent
2267 * interface. Called when the carp is being unconfigured.
2268 */
2269 static void
2270 carp_ether_purgemulti(struct carp_softc *sc)
2271 {
2272 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */
2273 struct carp_mc_entry *mc;
2274
2275 if (ifp == NULL)
2276 return;
2277
2278 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2279 (void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr));
2280 LIST_REMOVE(mc, mc_entries);
2281 free(mc, M_DEVBUF);
2282 }
2283 }
2284
2285 static int
2286 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS)
2287 {
2288
2289 return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS));
2290 }
2291
2292 void
2293 carp_init(void)
2294 {
2295
2296 sysctl_net_inet_carp_setup(NULL);
2297 #ifdef MBUFTRACE
2298 MOWNER_ATTACH(&carp_proto_mowner_rx);
2299 MOWNER_ATTACH(&carp_proto_mowner_tx);
2300 MOWNER_ATTACH(&carp_proto6_mowner_rx);
2301 MOWNER_ATTACH(&carp_proto6_mowner_tx);
2302 #endif
2303 }
2304
2305 static void
2306 sysctl_net_inet_carp_setup(struct sysctllog **clog)
2307 {
2308
2309 sysctl_createv(clog, 0, NULL, NULL,
2310 CTLFLAG_PERMANENT,
2311 CTLTYPE_NODE, "inet", NULL,
2312 NULL, 0, NULL, 0,
2313 CTL_NET, PF_INET, CTL_EOL);
2314 sysctl_createv(clog, 0, NULL, NULL,
2315 CTLFLAG_PERMANENT,
2316 CTLTYPE_NODE, "carp",
2317 SYSCTL_DESCR("CARP related settings"),
2318 NULL, 0, NULL, 0,
2319 CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL);
2320
2321 sysctl_createv(clog, 0, NULL, NULL,
2322 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2323 CTLTYPE_INT, "preempt",
2324 SYSCTL_DESCR("Enable CARP Preempt"),
2325 NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0,
2326 CTL_NET, PF_INET, IPPROTO_CARP,
2327 CTL_CREATE, CTL_EOL);
2328 sysctl_createv(clog, 0, NULL, NULL,
2329 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2330 CTLTYPE_INT, "arpbalance",
2331 SYSCTL_DESCR("Enable ARP balancing"),
2332 NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0,
2333 CTL_NET, PF_INET, IPPROTO_CARP,
2334 CTL_CREATE, CTL_EOL);
2335 sysctl_createv(clog, 0, NULL, NULL,
2336 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2337 CTLTYPE_INT, "allow",
2338 SYSCTL_DESCR("Enable CARP"),
2339 NULL, 0, &carp_opts[CARPCTL_ALLOW], 0,
2340 CTL_NET, PF_INET, IPPROTO_CARP,
2341 CTL_CREATE, CTL_EOL);
2342 sysctl_createv(clog, 0, NULL, NULL,
2343 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2344 CTLTYPE_INT, "log",
2345 SYSCTL_DESCR("CARP logging"),
2346 NULL, 0, &carp_opts[CARPCTL_LOG], 0,
2347 CTL_NET, PF_INET, IPPROTO_CARP,
2348 CTL_CREATE, CTL_EOL);
2349 sysctl_createv(clog, 0, NULL, NULL,
2350 CTLFLAG_PERMANENT,
2351 CTLTYPE_STRUCT, "stats",
2352 SYSCTL_DESCR("CARP statistics"),
2353 sysctl_net_inet_carp_stats, 0, NULL, 0,
2354 CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS,
2355 CTL_EOL);
2356 }
2357