ip_carp.c revision 1.84 1 /* $NetBSD: ip_carp.c,v 1.84 2017/02/02 02:52:10 ozaki-r Exp $ */
2 /* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */
3
4 /*
5 * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
6 * Copyright (c) 2003 Ryan McBride. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #ifdef _KERNEL_OPT
31 #include "opt_inet.h"
32 #include "opt_mbuftrace.h"
33 #endif
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.84 2017/02/02 02:52:10 ozaki-r Exp $");
37
38 /*
39 * TODO:
40 * - iface reconfigure
41 * - support for hardware checksum calculations;
42 *
43 */
44
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/mbuf.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/callout.h>
51 #include <sys/ioctl.h>
52 #include <sys/errno.h>
53 #include <sys/device.h>
54 #include <sys/time.h>
55 #include <sys/kernel.h>
56 #include <sys/kauth.h>
57 #include <sys/sysctl.h>
58 #include <sys/ucred.h>
59 #include <sys/syslog.h>
60 #include <sys/acct.h>
61 #include <sys/cprng.h>
62
63 #include <sys/cpu.h>
64
65 #include <net/if.h>
66 #include <net/pfil.h>
67 #include <net/if_types.h>
68 #include <net/if_ether.h>
69 #include <net/route.h>
70 #include <net/netisr.h>
71 #include <net/net_stats.h>
72 #include <netinet/if_inarp.h>
73 #include <netinet/wqinput.h>
74
75 #if NFDDI > 0
76 #include <net/if_fddi.h>
77 #endif
78 #if NTOKEN > 0
79 #include <net/if_token.h>
80 #endif
81
82 #ifdef INET
83 #include <netinet/in.h>
84 #include <netinet/in_systm.h>
85 #include <netinet/in_var.h>
86 #include <netinet/ip.h>
87 #include <netinet/ip_var.h>
88
89 #include <net/if_dl.h>
90 #endif
91
92 #ifdef INET6
93 #include <netinet/icmp6.h>
94 #include <netinet/ip6.h>
95 #include <netinet6/ip6_var.h>
96 #include <netinet6/nd6.h>
97 #include <netinet6/scope6_var.h>
98 #include <netinet6/in6_var.h>
99 #endif
100
101 #include <net/bpf.h>
102
103 #include <sys/sha1.h>
104
105 #include <netinet/ip_carp.h>
106
107 #include "ioconf.h"
108
109 struct carp_mc_entry {
110 LIST_ENTRY(carp_mc_entry) mc_entries;
111 union {
112 struct ether_multi *mcu_enm;
113 } mc_u;
114 struct sockaddr_storage mc_addr;
115 };
116 #define mc_enm mc_u.mcu_enm
117
118 struct carp_softc {
119 struct ethercom sc_ac;
120 #define sc_if sc_ac.ec_if
121 #define sc_carpdev sc_ac.ec_if.if_carpdev
122 int ah_cookie;
123 int lh_cookie;
124 struct ip_moptions sc_imo;
125 #ifdef INET6
126 struct ip6_moptions sc_im6o;
127 #endif /* INET6 */
128 TAILQ_ENTRY(carp_softc) sc_list;
129
130 enum { INIT = 0, BACKUP, MASTER } sc_state;
131
132 int sc_suppress;
133 int sc_bow_out;
134
135 int sc_sendad_errors;
136 #define CARP_SENDAD_MAX_ERRORS 3
137 int sc_sendad_success;
138 #define CARP_SENDAD_MIN_SUCCESS 3
139
140 int sc_vhid;
141 int sc_advskew;
142 int sc_naddrs;
143 int sc_naddrs6;
144 int sc_advbase; /* seconds */
145 int sc_init_counter;
146 u_int64_t sc_counter;
147
148 /* authentication */
149 #define CARP_HMAC_PAD 64
150 unsigned char sc_key[CARP_KEY_LEN];
151 unsigned char sc_pad[CARP_HMAC_PAD];
152 SHA1_CTX sc_sha1;
153 u_int32_t sc_hashkey[2];
154
155 struct callout sc_ad_tmo; /* advertisement timeout */
156 struct callout sc_md_tmo; /* master down timeout */
157 struct callout sc_md6_tmo; /* master down timeout */
158
159 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead;
160 };
161
162 int carp_suppress_preempt = 0;
163 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 }; /* XXX for now */
164
165 static percpu_t *carpstat_percpu;
166
167 #define CARP_STATINC(x) _NET_STATINC(carpstat_percpu, x)
168
169 #ifdef MBUFTRACE
170 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx");
171 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx");
172 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx");
173 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx");
174 #endif
175
176 struct carp_if {
177 TAILQ_HEAD(, carp_softc) vhif_vrs;
178 int vhif_nvrs;
179
180 struct ifnet *vhif_ifp;
181 };
182
183 #define CARP_LOG(sc, s) \
184 if (carp_opts[CARPCTL_LOG]) { \
185 if (sc) \
186 log(LOG_INFO, "%s: ", \
187 (sc)->sc_if.if_xname); \
188 else \
189 log(LOG_INFO, "carp: "); \
190 addlog s; \
191 addlog("\n"); \
192 }
193
194 static void carp_hmac_prepare(struct carp_softc *);
195 static void carp_hmac_generate(struct carp_softc *, u_int32_t *,
196 unsigned char *);
197 static int carp_hmac_verify(struct carp_softc *, u_int32_t *,
198 unsigned char *);
199 static void carp_setroute(struct carp_softc *, int);
200 static void carp_proto_input_c(struct mbuf *, struct carp_header *,
201 sa_family_t);
202 static void carpdetach(struct carp_softc *);
203 static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
204 struct carp_header *);
205 static void carp_send_ad_all(void);
206 static void carp_send_ad(void *);
207 static void carp_send_arp(struct carp_softc *);
208 static void carp_master_down(void *);
209 static int carp_ioctl(struct ifnet *, u_long, void *);
210 static void carp_start(struct ifnet *);
211 static void carp_setrun(struct carp_softc *, sa_family_t);
212 static void carp_set_state(struct carp_softc *, int);
213 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
214 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
215
216 static void carp_multicast_cleanup(struct carp_softc *);
217 static int carp_set_ifp(struct carp_softc *, struct ifnet *);
218 static void carp_set_enaddr(struct carp_softc *);
219 #if 0
220 static void carp_addr_updated(void *);
221 #endif
222 static u_int32_t carp_hash(struct carp_softc *, u_char *);
223 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
224 static int carp_join_multicast(struct carp_softc *);
225 #ifdef INET6
226 static void carp_send_na(struct carp_softc *);
227 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
228 static int carp_join_multicast6(struct carp_softc *);
229 #endif
230 static int carp_clone_create(struct if_clone *, int);
231 static int carp_clone_destroy(struct ifnet *);
232 static int carp_ether_addmulti(struct carp_softc *, struct ifreq *);
233 static int carp_ether_delmulti(struct carp_softc *, struct ifreq *);
234 static void carp_ether_purgemulti(struct carp_softc *);
235
236 static void sysctl_net_inet_carp_setup(struct sysctllog **);
237
238 /* workqueue-based pr_input */
239 static struct wqinput *carp_wqinput;
240 static void _carp_proto_input(struct mbuf *, int, int);
241 #ifdef INET6
242 static struct wqinput *carp6_wqinput;
243 static void _carp6_proto_input(struct mbuf *, int, int);
244 #endif
245
246 struct if_clone carp_cloner =
247 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
248
249 static __inline u_int16_t
250 carp_cksum(struct mbuf *m, int len)
251 {
252 return (in_cksum(m, len));
253 }
254
255 static void
256 carp_hmac_prepare(struct carp_softc *sc)
257 {
258 u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT;
259 u_int8_t vhid = sc->sc_vhid & 0xff;
260 SHA1_CTX sha1ctx;
261 u_int32_t kmd[5];
262 struct ifaddr *ifa;
263 int i, found;
264 struct in_addr last, cur, in;
265 #ifdef INET6
266 struct in6_addr last6, cur6, in6;
267 #endif /* INET6 */
268
269 /* compute ipad from key */
270 memset(sc->sc_pad, 0, sizeof(sc->sc_pad));
271 memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key));
272 for (i = 0; i < sizeof(sc->sc_pad); i++)
273 sc->sc_pad[i] ^= 0x36;
274
275 /* precompute first part of inner hash */
276 SHA1Init(&sc->sc_sha1);
277 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
278 SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version));
279 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
280
281 /* generate a key for the arpbalance hash, before the vhid is hashed */
282 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
283 SHA1Final((unsigned char *)kmd, &sha1ctx);
284 sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
285 sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
286
287 /* the rest of the precomputation */
288 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
289
290 /* Hash the addresses from smallest to largest, not interface order */
291 #ifdef INET
292 cur.s_addr = 0;
293 do {
294 found = 0;
295 last = cur;
296 cur.s_addr = 0xffffffff;
297 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
298 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
299 if (ifa->ifa_addr->sa_family == AF_INET &&
300 ntohl(in.s_addr) > ntohl(last.s_addr) &&
301 ntohl(in.s_addr) < ntohl(cur.s_addr)) {
302 cur.s_addr = in.s_addr;
303 found++;
304 }
305 }
306 if (found)
307 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
308 } while (found);
309 #endif /* INET */
310
311 #ifdef INET6
312 memset(&cur6, 0x00, sizeof(cur6));
313 do {
314 found = 0;
315 last6 = cur6;
316 memset(&cur6, 0xff, sizeof(cur6));
317 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
318 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
319 if (IN6_IS_ADDR_LINKLOCAL(&in6))
320 in6.s6_addr16[1] = 0;
321 if (ifa->ifa_addr->sa_family == AF_INET6 &&
322 memcmp(&in6, &last6, sizeof(in6)) > 0 &&
323 memcmp(&in6, &cur6, sizeof(in6)) < 0) {
324 cur6 = in6;
325 found++;
326 }
327 }
328 if (found)
329 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
330 } while (found);
331 #endif /* INET6 */
332
333 /* convert ipad to opad */
334 for (i = 0; i < sizeof(sc->sc_pad); i++)
335 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
336 }
337
338 static void
339 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
340 unsigned char md[20])
341 {
342 SHA1_CTX sha1ctx;
343
344 /* fetch first half of inner hash */
345 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
346
347 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
348 SHA1Final(md, &sha1ctx);
349
350 /* outer hash */
351 SHA1Init(&sha1ctx);
352 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
353 SHA1Update(&sha1ctx, md, 20);
354 SHA1Final(md, &sha1ctx);
355 }
356
357 static int
358 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
359 unsigned char md[20])
360 {
361 unsigned char md2[20];
362
363 carp_hmac_generate(sc, counter, md2);
364
365 return (memcmp(md, md2, sizeof(md2)));
366 }
367
368 static void
369 carp_setroute(struct carp_softc *sc, int cmd)
370 {
371 struct ifaddr *ifa;
372 int s;
373
374 KERNEL_LOCK(1, NULL);
375 s = splsoftnet();
376 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
377 switch (ifa->ifa_addr->sa_family) {
378 case AF_INET: {
379 int count = 0;
380 struct rtentry *rt;
381 int hr_otherif, nr_ourif;
382
383 /*
384 * Avoid screwing with the routes if there are other
385 * carp interfaces which are master and have the same
386 * address.
387 */
388 if (sc->sc_carpdev != NULL &&
389 sc->sc_carpdev->if_carp != NULL) {
390 count = carp_addrcount(
391 (struct carp_if *)sc->sc_carpdev->if_carp,
392 ifatoia(ifa), CARP_COUNT_MASTER);
393 if ((cmd == RTM_ADD && count != 1) ||
394 (cmd == RTM_DELETE && count != 0))
395 continue;
396 }
397
398 /* Remove the existing host route, if any */
399 rtrequest(RTM_DELETE, ifa->ifa_addr,
400 ifa->ifa_addr, ifa->ifa_netmask,
401 RTF_HOST, NULL);
402
403 rt = NULL;
404 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
405 ifa->ifa_netmask, RTF_HOST, &rt);
406 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
407 (rt->rt_flags & RTF_CONNECTED));
408 if (rt != NULL) {
409 rt_unref(rt);
410 rt = NULL;
411 }
412
413 /* Check for a network route on our interface */
414
415 rt = NULL;
416 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
417 ifa->ifa_netmask, 0, &rt);
418 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
419
420 switch (cmd) {
421 case RTM_ADD:
422 if (hr_otherif) {
423 ifa->ifa_rtrequest = NULL;
424 ifa->ifa_flags &= ~RTF_CONNECTED;
425
426 rtrequest(RTM_ADD, ifa->ifa_addr,
427 ifa->ifa_addr, ifa->ifa_netmask,
428 RTF_UP | RTF_HOST, NULL);
429 }
430 if (!hr_otherif || nr_ourif || !rt) {
431 if (nr_ourif &&
432 (rt->rt_flags & RTF_CONNECTED) == 0)
433 rtrequest(RTM_DELETE,
434 ifa->ifa_addr,
435 ifa->ifa_addr,
436 ifa->ifa_netmask, 0, NULL);
437
438 ifa->ifa_rtrequest = arp_rtrequest;
439 ifa->ifa_flags |= RTF_CONNECTED;
440
441 if (rtrequest(RTM_ADD, ifa->ifa_addr,
442 ifa->ifa_addr, ifa->ifa_netmask, 0,
443 NULL) == 0)
444 ifa->ifa_flags |= IFA_ROUTE;
445 }
446 break;
447 case RTM_DELETE:
448 break;
449 default:
450 break;
451 }
452 if (rt != NULL) {
453 rt_unref(rt);
454 rt = NULL;
455 }
456 break;
457 }
458
459 #ifdef INET6
460 case AF_INET6:
461 if (cmd == RTM_ADD)
462 in6_ifaddlocal(ifa);
463 else
464 in6_ifremlocal(ifa);
465 break;
466 #endif /* INET6 */
467 default:
468 break;
469 }
470 }
471 splx(s);
472 KERNEL_UNLOCK_ONE(NULL);
473 }
474
475 /*
476 * process input packet.
477 * we have rearranged checks order compared to the rfc,
478 * but it seems more efficient this way or not possible otherwise.
479 */
480 static void
481 _carp_proto_input(struct mbuf *m, int hlen, int proto)
482 {
483 struct ip *ip = mtod(m, struct ip *);
484 struct carp_softc *sc = NULL;
485 struct carp_header *ch;
486 int iplen, len;
487 struct ifnet *rcvif;
488
489 CARP_STATINC(CARP_STAT_IPACKETS);
490 MCLAIM(m, &carp_proto_mowner_rx);
491
492 if (!carp_opts[CARPCTL_ALLOW]) {
493 m_freem(m);
494 return;
495 }
496
497 rcvif = m_get_rcvif_NOMPSAFE(m);
498 /* check if received on a valid carp interface */
499 if (rcvif->if_type != IFT_CARP) {
500 CARP_STATINC(CARP_STAT_BADIF);
501 CARP_LOG(sc, ("packet received on non-carp interface: %s",
502 rcvif->if_xname));
503 m_freem(m);
504 return;
505 }
506
507 /* verify that the IP TTL is 255. */
508 if (ip->ip_ttl != CARP_DFLTTL) {
509 CARP_STATINC(CARP_STAT_BADTTL);
510 CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl,
511 CARP_DFLTTL, rcvif->if_xname));
512 m_freem(m);
513 return;
514 }
515
516 /*
517 * verify that the received packet length is
518 * equal to the CARP header
519 */
520 iplen = ip->ip_hl << 2;
521 len = iplen + sizeof(*ch);
522 if (len > m->m_pkthdr.len) {
523 CARP_STATINC(CARP_STAT_BADLEN);
524 CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
525 rcvif->if_xname));
526 m_freem(m);
527 return;
528 }
529
530 if ((m = m_pullup(m, len)) == NULL) {
531 CARP_STATINC(CARP_STAT_HDROPS);
532 return;
533 }
534 ip = mtod(m, struct ip *);
535 ch = (struct carp_header *)((char *)ip + iplen);
536 /* verify the CARP checksum */
537 m->m_data += iplen;
538 if (carp_cksum(m, len - iplen)) {
539 CARP_STATINC(CARP_STAT_BADSUM);
540 CARP_LOG(sc, ("checksum failed on %s",
541 rcvif->if_xname));
542 m_freem(m);
543 return;
544 }
545 m->m_data -= iplen;
546
547 carp_proto_input_c(m, ch, AF_INET);
548 }
549
550 void
551 carp_proto_input(struct mbuf *m, ...)
552 {
553
554 wqinput_input(carp_wqinput, m, 0, 0);
555 }
556
557 #ifdef INET6
558 static void
559 _carp6_proto_input(struct mbuf *m, int off, int proto)
560 {
561 struct carp_softc *sc = NULL;
562 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
563 struct carp_header *ch;
564 u_int len;
565 struct ifnet *rcvif;
566
567 CARP_STATINC(CARP_STAT_IPACKETS6);
568 MCLAIM(m, &carp_proto6_mowner_rx);
569
570 if (!carp_opts[CARPCTL_ALLOW]) {
571 m_freem(m);
572 return;
573 }
574
575 rcvif = m_get_rcvif_NOMPSAFE(m);
576
577 /* check if received on a valid carp interface */
578 if (rcvif->if_type != IFT_CARP) {
579 CARP_STATINC(CARP_STAT_BADIF);
580 CARP_LOG(sc, ("packet received on non-carp interface: %s",
581 rcvif->if_xname));
582 m_freem(m);
583 return;
584 }
585
586 /* verify that the IP TTL is 255 */
587 if (ip6->ip6_hlim != CARP_DFLTTL) {
588 CARP_STATINC(CARP_STAT_BADTTL);
589 CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
590 CARP_DFLTTL, rcvif->if_xname));
591 m_freem(m);
592 return;
593 }
594
595 /* verify that we have a complete carp packet */
596 len = m->m_len;
597 IP6_EXTHDR_GET(ch, struct carp_header *, m, off, sizeof(*ch));
598 if (ch == NULL) {
599 CARP_STATINC(CARP_STAT_BADLEN);
600 CARP_LOG(sc, ("packet size %u too small", len));
601 return;
602 }
603
604
605 /* verify the CARP checksum */
606 m->m_data += off;
607 if (carp_cksum(m, sizeof(*ch))) {
608 CARP_STATINC(CARP_STAT_BADSUM);
609 CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname));
610 m_freem(m);
611 return;
612 }
613 m->m_data -= off;
614
615 carp_proto_input_c(m, ch, AF_INET6);
616 return;
617 }
618
619 int
620 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
621 {
622
623 wqinput_input(carp6_wqinput, *mp, *offp, proto);
624
625 return IPPROTO_DONE;
626 }
627 #endif /* INET6 */
628
629 static void
630 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
631 {
632 struct carp_softc *sc;
633 u_int64_t tmp_counter;
634 struct timeval sc_tv, ch_tv;
635
636 TAILQ_FOREACH(sc, &((struct carp_if *)
637 m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list)
638 if (sc->sc_vhid == ch->carp_vhid)
639 break;
640
641 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
642 (IFF_UP|IFF_RUNNING)) {
643 CARP_STATINC(CARP_STAT_BADVHID);
644 m_freem(m);
645 return;
646 }
647
648 /*
649 * Check if our own advertisement was duplicated
650 * from a non simplex interface.
651 * XXX If there is no address on our physical interface
652 * there is no way to distinguish our ads from the ones
653 * another carp host might have sent us.
654 */
655 if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) {
656 struct sockaddr sa;
657 struct ifaddr *ifa;
658 int s;
659
660 memset(&sa, 0, sizeof(sa));
661 sa.sa_family = af;
662 s = pserialize_read_enter();
663 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
664
665 if (ifa && af == AF_INET) {
666 struct ip *ip = mtod(m, struct ip *);
667 if (ip->ip_src.s_addr ==
668 ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
669 pserialize_read_exit(s);
670 m_freem(m);
671 return;
672 }
673 }
674 #ifdef INET6
675 if (ifa && af == AF_INET6) {
676 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
677 struct in6_addr in6_src, in6_found;
678
679 in6_src = ip6->ip6_src;
680 in6_found = ifatoia6(ifa)->ia_addr.sin6_addr;
681 if (IN6_IS_ADDR_LINKLOCAL(&in6_src))
682 in6_src.s6_addr16[1] = 0;
683 if (IN6_IS_ADDR_LINKLOCAL(&in6_found))
684 in6_found.s6_addr16[1] = 0;
685 if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) {
686 pserialize_read_exit(s);
687 m_freem(m);
688 return;
689 }
690 }
691 #endif /* INET6 */
692 pserialize_read_exit(s);
693 }
694
695 nanotime(&sc->sc_if.if_lastchange);
696 sc->sc_if.if_ipackets++;
697 sc->sc_if.if_ibytes += m->m_pkthdr.len;
698
699 /* verify the CARP version. */
700 if (ch->carp_version != CARP_VERSION) {
701 CARP_STATINC(CARP_STAT_BADVER);
702 sc->sc_if.if_ierrors++;
703 CARP_LOG(sc, ("invalid version %d != %d",
704 ch->carp_version, CARP_VERSION));
705 m_freem(m);
706 return;
707 }
708
709 /* verify the hash */
710 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
711 struct ip *ip;
712 struct ip6_hdr *ip6;
713 char ip6buf[INET6_ADDRSTRLEN];
714 char ipbuf[INET_ADDRSTRLEN];
715
716 CARP_STATINC(CARP_STAT_BADAUTH);
717 sc->sc_if.if_ierrors++;
718
719 switch(af) {
720 case AF_INET:
721 ip = mtod(m, struct ip *);
722 CARP_LOG(sc, ("incorrect hash from %s",
723 in_fmtaddr(ipbuf, ip->ip_src)));
724 break;
725
726 case AF_INET6:
727 ip6 = mtod(m, struct ip6_hdr *);
728 CARP_LOG(sc, ("incorrect hash from %s",
729 IN6_PRINT(ip6buf, &ip6->ip6_src)));
730 break;
731
732 default: CARP_LOG(sc, ("incorrect hash"));
733 break;
734 }
735 m_freem(m);
736 return;
737 }
738
739 tmp_counter = ntohl(ch->carp_counter[0]);
740 tmp_counter = tmp_counter<<32;
741 tmp_counter += ntohl(ch->carp_counter[1]);
742
743 /* XXX Replay protection goes here */
744
745 sc->sc_init_counter = 0;
746 sc->sc_counter = tmp_counter;
747
748
749 sc_tv.tv_sec = sc->sc_advbase;
750 if (carp_suppress_preempt && sc->sc_advskew < 240)
751 sc_tv.tv_usec = 240 * 1000000 / 256;
752 else
753 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
754 ch_tv.tv_sec = ch->carp_advbase;
755 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
756
757 switch (sc->sc_state) {
758 case INIT:
759 break;
760 case MASTER:
761 /*
762 * If we receive an advertisement from a backup who's going to
763 * be more frequent than us, go into BACKUP state.
764 */
765 if (timercmp(&sc_tv, &ch_tv, >) ||
766 timercmp(&sc_tv, &ch_tv, ==)) {
767 callout_stop(&sc->sc_ad_tmo);
768 CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)"));
769 carp_set_state(sc, BACKUP);
770 carp_setrun(sc, 0);
771 carp_setroute(sc, RTM_DELETE);
772 }
773 break;
774 case BACKUP:
775 /*
776 * If we're pre-empting masters who advertise slower than us,
777 * and this one claims to be slower, treat him as down.
778 */
779 if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
780 CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)"));
781 carp_master_down(sc);
782 break;
783 }
784
785 /*
786 * If the master is going to advertise at such a low frequency
787 * that he's guaranteed to time out, we'd might as well just
788 * treat him as timed out now.
789 */
790 sc_tv.tv_sec = sc->sc_advbase * 3;
791 if (timercmp(&sc_tv, &ch_tv, <)) {
792 CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)"));
793 carp_master_down(sc);
794 break;
795 }
796
797 /*
798 * Otherwise, we reset the counter and wait for the next
799 * advertisement.
800 */
801 carp_setrun(sc, af);
802 break;
803 }
804
805 m_freem(m);
806 return;
807 }
808
809 /*
810 * Interface side of the CARP implementation.
811 */
812
813 /* ARGSUSED */
814 void
815 carpattach(int n)
816 {
817 if_clone_attach(&carp_cloner);
818
819 carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS);
820 }
821
822 static int
823 carp_clone_create(struct if_clone *ifc, int unit)
824 {
825 extern int ifqmaxlen;
826 struct carp_softc *sc;
827 struct ifnet *ifp;
828
829 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
830 if (!sc)
831 return (ENOMEM);
832
833 sc->sc_suppress = 0;
834 sc->sc_advbase = CARP_DFLTINTV;
835 sc->sc_vhid = -1; /* required setting */
836 sc->sc_advskew = 0;
837 sc->sc_init_counter = 1;
838 sc->sc_naddrs = sc->sc_naddrs6 = 0;
839 #ifdef INET6
840 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
841 #endif /* INET6 */
842
843 callout_init(&sc->sc_ad_tmo, 0);
844 callout_init(&sc->sc_md_tmo, 0);
845 callout_init(&sc->sc_md6_tmo, 0);
846
847 callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc);
848 callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc);
849 callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc);
850
851 LIST_INIT(&sc->carp_mc_listhead);
852 ifp = &sc->sc_if;
853 ifp->if_softc = sc;
854 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
855 unit);
856 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
857 ifp->if_ioctl = carp_ioctl;
858 ifp->if_start = carp_start;
859 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
860 IFQ_SET_READY(&ifp->if_snd);
861 if_initialize(ifp);
862 ether_ifattach(ifp, NULL);
863 carp_set_enaddr(sc);
864 /* Overwrite ethernet defaults */
865 ifp->if_type = IFT_CARP;
866 ifp->if_output = carp_output;
867 ifp->if_extflags &= ~IFEF_OUTPUT_MPSAFE;
868 if_register(ifp);
869
870 return (0);
871 }
872
873 static int
874 carp_clone_destroy(struct ifnet *ifp)
875 {
876 struct carp_softc *sc = ifp->if_softc;
877
878 carpdetach(ifp->if_softc);
879 ether_ifdetach(ifp);
880 if_detach(ifp);
881 callout_destroy(&sc->sc_ad_tmo);
882 callout_destroy(&sc->sc_md_tmo);
883 callout_destroy(&sc->sc_md6_tmo);
884 free(ifp->if_softc, M_DEVBUF);
885
886 return (0);
887 }
888
889 static void
890 carpdetach(struct carp_softc *sc)
891 {
892 struct carp_if *cif;
893 int s;
894
895 callout_stop(&sc->sc_ad_tmo);
896 callout_stop(&sc->sc_md_tmo);
897 callout_stop(&sc->sc_md6_tmo);
898
899 if (sc->sc_suppress)
900 carp_suppress_preempt--;
901 sc->sc_suppress = 0;
902
903 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
904 carp_suppress_preempt--;
905 sc->sc_sendad_errors = 0;
906
907 carp_set_state(sc, INIT);
908 sc->sc_if.if_flags &= ~IFF_UP;
909 carp_setrun(sc, 0);
910 carp_multicast_cleanup(sc);
911
912 KERNEL_LOCK(1, NULL);
913 s = splnet();
914 if (sc->sc_carpdev != NULL) {
915 /* XXX linkstatehook removal */
916 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
917 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
918 if (!--cif->vhif_nvrs) {
919 ifpromisc(sc->sc_carpdev, 0);
920 sc->sc_carpdev->if_carp = NULL;
921 free(cif, M_IFADDR);
922 }
923 }
924 sc->sc_carpdev = NULL;
925 splx(s);
926 KERNEL_UNLOCK_ONE(NULL);
927 }
928
929 /* Detach an interface from the carp. */
930 void
931 carp_ifdetach(struct ifnet *ifp)
932 {
933 struct carp_softc *sc, *nextsc;
934 struct carp_if *cif = (struct carp_if *)ifp->if_carp;
935
936 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
937 nextsc = TAILQ_NEXT(sc, sc_list);
938 carpdetach(sc);
939 }
940 }
941
942 static int
943 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc,
944 struct carp_header *ch)
945 {
946 if (sc->sc_init_counter) {
947 /* this could also be seconds since unix epoch */
948 sc->sc_counter = cprng_fast64();
949 } else
950 sc->sc_counter++;
951
952 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
953 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
954
955 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
956
957 return (0);
958 }
959
960 static void
961 carp_send_ad_all(void)
962 {
963 struct ifnet *ifp;
964 struct carp_if *cif;
965 struct carp_softc *vh;
966 int s;
967 int bound = curlwp_bind();
968
969 s = pserialize_read_enter();
970 IFNET_READER_FOREACH(ifp) {
971 struct psref psref;
972 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
973 continue;
974
975 psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
976 pserialize_read_exit(s);
977
978 cif = (struct carp_if *)ifp->if_carp;
979 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
980 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
981 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER)
982 carp_send_ad(vh);
983 }
984
985 s = pserialize_read_enter();
986 psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
987 }
988 pserialize_read_exit(s);
989 curlwp_bindx(bound);
990 }
991
992
993 static void
994 carp_send_ad(void *v)
995 {
996 struct carp_header ch;
997 struct timeval tv;
998 struct carp_softc *sc = v;
999 struct carp_header *ch_ptr;
1000 struct mbuf *m;
1001 int error, len, advbase, advskew, s;
1002 struct sockaddr sa;
1003
1004 KERNEL_LOCK(1, NULL);
1005 s = splsoftnet();
1006
1007 advbase = advskew = 0; /* Sssssh compiler */
1008 if (sc->sc_carpdev == NULL) {
1009 sc->sc_if.if_oerrors++;
1010 goto retry_later;
1011 }
1012
1013 /* bow out if we've gone to backup (the carp interface is going down) */
1014 if (sc->sc_bow_out) {
1015 sc->sc_bow_out = 0;
1016 advbase = 255;
1017 advskew = 255;
1018 } else {
1019 advbase = sc->sc_advbase;
1020 if (!carp_suppress_preempt || sc->sc_advskew > 240)
1021 advskew = sc->sc_advskew;
1022 else
1023 advskew = 240;
1024 tv.tv_sec = advbase;
1025 tv.tv_usec = advskew * 1000000 / 256;
1026 }
1027
1028 ch.carp_version = CARP_VERSION;
1029 ch.carp_type = CARP_ADVERTISEMENT;
1030 ch.carp_vhid = sc->sc_vhid;
1031 ch.carp_advbase = advbase;
1032 ch.carp_advskew = advskew;
1033 ch.carp_authlen = 7; /* XXX DEFINE */
1034 ch.carp_pad1 = 0; /* must be zero */
1035 ch.carp_cksum = 0;
1036
1037
1038 #ifdef INET
1039 if (sc->sc_naddrs) {
1040 struct ip *ip;
1041 struct ifaddr *ifa;
1042 int _s;
1043
1044 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1045 if (m == NULL) {
1046 sc->sc_if.if_oerrors++;
1047 CARP_STATINC(CARP_STAT_ONOMEM);
1048 /* XXX maybe less ? */
1049 goto retry_later;
1050 }
1051 MCLAIM(m, &carp_proto_mowner_tx);
1052 len = sizeof(*ip) + sizeof(ch);
1053 m->m_pkthdr.len = len;
1054 m_reset_rcvif(m);
1055 m->m_len = len;
1056 MH_ALIGN(m, m->m_len);
1057 m->m_flags |= M_MCAST;
1058 ip = mtod(m, struct ip *);
1059 ip->ip_v = IPVERSION;
1060 ip->ip_hl = sizeof(*ip) >> 2;
1061 ip->ip_tos = IPTOS_LOWDELAY;
1062 ip->ip_len = htons(len);
1063 ip->ip_id = 0; /* no need for id, we don't support fragments */
1064 ip->ip_off = htons(IP_DF);
1065 ip->ip_ttl = CARP_DFLTTL;
1066 ip->ip_p = IPPROTO_CARP;
1067 ip->ip_sum = 0;
1068
1069 memset(&sa, 0, sizeof(sa));
1070 sa.sa_family = AF_INET;
1071 _s = pserialize_read_enter();
1072 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1073 if (ifa == NULL)
1074 ip->ip_src.s_addr = 0;
1075 else
1076 ip->ip_src.s_addr =
1077 ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1078 pserialize_read_exit(_s);
1079 ip->ip_dst.s_addr = INADDR_CARP_GROUP;
1080
1081 ch_ptr = (struct carp_header *)(&ip[1]);
1082 memcpy(ch_ptr, &ch, sizeof(ch));
1083 if (carp_prepare_ad(m, sc, ch_ptr))
1084 goto retry_later;
1085
1086 m->m_data += sizeof(*ip);
1087 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1088 m->m_data -= sizeof(*ip);
1089
1090 nanotime(&sc->sc_if.if_lastchange);
1091 sc->sc_if.if_opackets++;
1092 sc->sc_if.if_obytes += len;
1093 CARP_STATINC(CARP_STAT_OPACKETS);
1094
1095 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1096 NULL);
1097 if (error) {
1098 if (error == ENOBUFS)
1099 CARP_STATINC(CARP_STAT_ONOMEM);
1100 else
1101 CARP_LOG(sc, ("ip_output failed: %d", error));
1102 sc->sc_if.if_oerrors++;
1103 if (sc->sc_sendad_errors < INT_MAX)
1104 sc->sc_sendad_errors++;
1105 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1106 carp_suppress_preempt++;
1107 if (carp_suppress_preempt == 1)
1108 carp_send_ad_all();
1109 }
1110 sc->sc_sendad_success = 0;
1111 } else {
1112 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1113 if (++sc->sc_sendad_success >=
1114 CARP_SENDAD_MIN_SUCCESS) {
1115 carp_suppress_preempt--;
1116 sc->sc_sendad_errors = 0;
1117 }
1118 } else
1119 sc->sc_sendad_errors = 0;
1120 }
1121 }
1122 #endif /* INET */
1123 #ifdef INET6_notyet
1124 if (sc->sc_naddrs6) {
1125 struct ip6_hdr *ip6;
1126 struct ifaddr *ifa;
1127 int _s;
1128
1129 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1130 if (m == NULL) {
1131 sc->sc_if.if_oerrors++;
1132 CARP_STATINC(CARP_STAT_ONOMEM);
1133 /* XXX maybe less ? */
1134 goto retry_later;
1135 }
1136 MCLAIM(m, &carp_proto6_mowner_tx);
1137 len = sizeof(*ip6) + sizeof(ch);
1138 m->m_pkthdr.len = len;
1139 m_reset_rcvif(m);
1140 m->m_len = len;
1141 MH_ALIGN(m, m->m_len);
1142 m->m_flags |= M_MCAST;
1143 ip6 = mtod(m, struct ip6_hdr *);
1144 memset(ip6, 0, sizeof(*ip6));
1145 ip6->ip6_vfc |= IPV6_VERSION;
1146 ip6->ip6_hlim = CARP_DFLTTL;
1147 ip6->ip6_nxt = IPPROTO_CARP;
1148
1149 /* set the source address */
1150 memset(&sa, 0, sizeof(sa));
1151 sa.sa_family = AF_INET6;
1152 _s = pserialize_read_enter();
1153 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1154 if (ifa == NULL) /* This should never happen with IPv6 */
1155 memset(&ip6->ip6_src, 0, sizeof(struct in6_addr));
1156 else
1157 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1158 &ip6->ip6_src, sizeof(struct in6_addr));
1159 pserialize_read_exit(_s);
1160 /* set the multicast destination */
1161
1162 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1163 ip6->ip6_dst.s6_addr8[15] = 0x12;
1164 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1165 sc->sc_if.if_oerrors++;
1166 m_freem(m);
1167 CARP_LOG(sc, ("in6_setscope failed"));
1168 goto retry_later;
1169 }
1170
1171 ch_ptr = (struct carp_header *)(&ip6[1]);
1172 memcpy(ch_ptr, &ch, sizeof(ch));
1173 if (carp_prepare_ad(m, sc, ch_ptr))
1174 goto retry_later;
1175
1176 m->m_data += sizeof(*ip6);
1177 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1178 m->m_data -= sizeof(*ip6);
1179
1180 nanotime(&sc->sc_if.if_lastchange);
1181 sc->sc_if.if_opackets++;
1182 sc->sc_if.if_obytes += len;
1183 CARP_STATINC(CARP_STAT_OPACKETS6);
1184
1185 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1186 if (error) {
1187 if (error == ENOBUFS)
1188 CARP_STATINC(CARP_STAT_ONOMEM);
1189 else
1190 CARP_LOG(sc, ("ip6_output failed: %d", error));
1191 sc->sc_if.if_oerrors++;
1192 if (sc->sc_sendad_errors < INT_MAX)
1193 sc->sc_sendad_errors++;
1194 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1195 carp_suppress_preempt++;
1196 if (carp_suppress_preempt == 1)
1197 carp_send_ad_all();
1198 }
1199 sc->sc_sendad_success = 0;
1200 } else {
1201 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1202 if (++sc->sc_sendad_success >=
1203 CARP_SENDAD_MIN_SUCCESS) {
1204 carp_suppress_preempt--;
1205 sc->sc_sendad_errors = 0;
1206 }
1207 } else
1208 sc->sc_sendad_errors = 0;
1209 }
1210 }
1211 #endif /* INET6 */
1212
1213 retry_later:
1214 splx(s);
1215 KERNEL_UNLOCK_ONE(NULL);
1216 if (advbase != 255 || advskew != 255)
1217 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1218 }
1219
1220 /*
1221 * Broadcast a gratuitous ARP request containing
1222 * the virtual router MAC address for each IP address
1223 * associated with the virtual router.
1224 */
1225 static void
1226 carp_send_arp(struct carp_softc *sc)
1227 {
1228 struct ifaddr *ifa;
1229 int s;
1230
1231 KERNEL_LOCK(1, NULL);
1232 s = splsoftnet();
1233 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1234
1235 if (ifa->ifa_addr->sa_family != AF_INET)
1236 continue;
1237
1238 arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl));
1239 }
1240 splx(s);
1241 KERNEL_UNLOCK_ONE(NULL);
1242 }
1243
1244 #ifdef INET6
1245 static void
1246 carp_send_na(struct carp_softc *sc)
1247 {
1248 struct ifaddr *ifa;
1249 struct in6_addr *in6;
1250 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1251 int s;
1252
1253 KERNEL_LOCK(1, NULL);
1254 s = splsoftnet();
1255
1256 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1257
1258 if (ifa->ifa_addr->sa_family != AF_INET6)
1259 continue;
1260
1261 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1262 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1263 ND_NA_FLAG_OVERRIDE, 1, NULL);
1264 }
1265 splx(s);
1266 KERNEL_UNLOCK_ONE(NULL);
1267 }
1268 #endif /* INET6 */
1269
1270 /*
1271 * Based on bridge_hash() in if_bridge.c
1272 */
1273 #define mix(a,b,c) \
1274 do { \
1275 a -= b; a -= c; a ^= (c >> 13); \
1276 b -= c; b -= a; b ^= (a << 8); \
1277 c -= a; c -= b; c ^= (b >> 13); \
1278 a -= b; a -= c; a ^= (c >> 12); \
1279 b -= c; b -= a; b ^= (a << 16); \
1280 c -= a; c -= b; c ^= (b >> 5); \
1281 a -= b; a -= c; a ^= (c >> 3); \
1282 b -= c; b -= a; b ^= (a << 10); \
1283 c -= a; c -= b; c ^= (b >> 15); \
1284 } while (0)
1285
1286 static u_int32_t
1287 carp_hash(struct carp_softc *sc, u_char *src)
1288 {
1289 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1290
1291 c += sc->sc_key[3] << 24;
1292 c += sc->sc_key[2] << 16;
1293 c += sc->sc_key[1] << 8;
1294 c += sc->sc_key[0];
1295 b += src[5] << 8;
1296 b += src[4];
1297 a += src[3] << 24;
1298 a += src[2] << 16;
1299 a += src[1] << 8;
1300 a += src[0];
1301
1302 mix(a, b, c);
1303 return (c);
1304 }
1305
1306 static int
1307 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
1308 {
1309 struct carp_softc *vh;
1310 struct ifaddr *ifa;
1311 int count = 0;
1312
1313 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1314 if ((type == CARP_COUNT_RUNNING &&
1315 (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1316 (IFF_UP|IFF_RUNNING)) ||
1317 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1318 IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1319 if (ifa->ifa_addr->sa_family == AF_INET &&
1320 ia->ia_addr.sin_addr.s_addr ==
1321 ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1322 count++;
1323 }
1324 }
1325 }
1326 return (count);
1327 }
1328
1329 int
1330 carp_iamatch(struct in_ifaddr *ia, u_char *src,
1331 u_int32_t *count, u_int32_t index)
1332 {
1333 struct carp_softc *sc = ia->ia_ifp->if_softc;
1334
1335 if (carp_opts[CARPCTL_ARPBALANCE]) {
1336 /*
1337 * We use the source ip to decide which virtual host should
1338 * handle the request. If we're master of that virtual host,
1339 * then we respond, otherwise, just drop the arp packet on
1340 * the floor.
1341 */
1342
1343 /* Count the elegible carp interfaces with this address */
1344 if (*count == 0)
1345 *count = carp_addrcount(
1346 (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp,
1347 ia, CARP_COUNT_RUNNING);
1348
1349 /* This should never happen, but... */
1350 if (*count == 0)
1351 return (0);
1352
1353 if (carp_hash(sc, src) % *count == index - 1 &&
1354 sc->sc_state == MASTER) {
1355 return (1);
1356 }
1357 } else {
1358 if (sc->sc_state == MASTER)
1359 return (1);
1360 }
1361
1362 return (0);
1363 }
1364
1365 #ifdef INET6
1366 struct ifaddr *
1367 carp_iamatch6(void *v, struct in6_addr *taddr)
1368 {
1369 struct carp_if *cif = v;
1370 struct carp_softc *vh;
1371 struct ifaddr *ifa;
1372
1373 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1374 IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1375 if (IN6_ARE_ADDR_EQUAL(taddr,
1376 &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1377 ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1378 (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER)
1379 return (ifa);
1380 }
1381 }
1382
1383 return (NULL);
1384 }
1385 #endif /* INET6 */
1386
1387 struct ifnet *
1388 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src)
1389 {
1390 struct carp_if *cif = (struct carp_if *)v;
1391 struct carp_softc *vh;
1392 u_int8_t *ena;
1393
1394 if (src)
1395 ena = (u_int8_t *)&eh->ether_shost;
1396 else
1397 ena = (u_int8_t *)&eh->ether_dhost;
1398
1399 switch (iftype) {
1400 case IFT_ETHER:
1401 case IFT_FDDI:
1402 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1403 return (NULL);
1404 break;
1405 case IFT_ISO88025:
1406 if (ena[0] != 3 || ena[1] || ena[4] || ena[5])
1407 return (NULL);
1408 break;
1409 default:
1410 return (NULL);
1411 break;
1412 }
1413
1414 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
1415 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1416 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
1417 !memcmp(ena, CLLADDR(vh->sc_if.if_sadl),
1418 ETHER_ADDR_LEN)) {
1419 return (&vh->sc_if);
1420 }
1421
1422 return (NULL);
1423 }
1424
1425 int
1426 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1427 {
1428 struct ether_header eh;
1429 struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp;
1430 struct ifnet *ifp;
1431
1432 memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost));
1433 memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost));
1434 eh.ether_type = etype;
1435
1436 if (m->m_flags & (M_BCAST|M_MCAST)) {
1437 struct carp_softc *vh;
1438 struct mbuf *m0;
1439
1440 /*
1441 * XXX Should really check the list of multicast addresses
1442 * for each CARP interface _before_ copying.
1443 */
1444 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1445 m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1446 if (m0 == NULL)
1447 continue;
1448 m_set_rcvif(m0, &vh->sc_if);
1449 ether_input(&vh->sc_if, m0);
1450 }
1451 return (1);
1452 }
1453
1454 ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0);
1455 if (ifp == NULL) {
1456 return (1);
1457 }
1458
1459 m_set_rcvif(m, ifp);
1460
1461 bpf_mtap(ifp, m);
1462 ifp->if_ipackets++;
1463 ether_input(ifp, m);
1464 return (0);
1465 }
1466
1467 static void
1468 carp_master_down(void *v)
1469 {
1470 struct carp_softc *sc = v;
1471
1472 switch (sc->sc_state) {
1473 case INIT:
1474 printf("%s: master_down event in INIT state\n",
1475 sc->sc_if.if_xname);
1476 break;
1477 case MASTER:
1478 break;
1479 case BACKUP:
1480 CARP_LOG(sc, ("INIT -> MASTER (preempting)"));
1481 carp_set_state(sc, MASTER);
1482 carp_send_ad(sc);
1483 carp_send_arp(sc);
1484 #ifdef INET6
1485 carp_send_na(sc);
1486 #endif /* INET6 */
1487 carp_setrun(sc, 0);
1488 carp_setroute(sc, RTM_ADD);
1489 break;
1490 }
1491 }
1492
1493 /*
1494 * When in backup state, af indicates whether to reset the master down timer
1495 * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1496 */
1497 static void
1498 carp_setrun(struct carp_softc *sc, sa_family_t af)
1499 {
1500 struct timeval tv;
1501
1502 if (sc->sc_carpdev == NULL) {
1503 sc->sc_if.if_flags &= ~IFF_RUNNING;
1504 carp_set_state(sc, INIT);
1505 return;
1506 }
1507
1508 if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 &&
1509 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1510 sc->sc_if.if_flags |= IFF_RUNNING;
1511 } else {
1512 sc->sc_if.if_flags &= ~IFF_RUNNING;
1513 carp_setroute(sc, RTM_DELETE);
1514 return;
1515 }
1516
1517 switch (sc->sc_state) {
1518 case INIT:
1519 carp_set_state(sc, BACKUP);
1520 carp_setroute(sc, RTM_DELETE);
1521 carp_setrun(sc, 0);
1522 break;
1523 case BACKUP:
1524 callout_stop(&sc->sc_ad_tmo);
1525 tv.tv_sec = 3 * sc->sc_advbase;
1526 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1527 switch (af) {
1528 #ifdef INET
1529 case AF_INET:
1530 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1531 break;
1532 #endif /* INET */
1533 #ifdef INET6_notyet
1534 case AF_INET6:
1535 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1536 break;
1537 #endif /* INET6 */
1538 default:
1539 if (sc->sc_naddrs)
1540 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1541 #ifdef INET6_notyet
1542 if (sc->sc_naddrs6)
1543 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1544 #endif /* INET6 */
1545 break;
1546 }
1547 break;
1548 case MASTER:
1549 tv.tv_sec = sc->sc_advbase;
1550 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1551 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1552 break;
1553 }
1554 }
1555
1556 static void
1557 carp_multicast_cleanup(struct carp_softc *sc)
1558 {
1559 struct ip_moptions *imo = &sc->sc_imo;
1560 #ifdef INET6
1561 struct ip6_moptions *im6o = &sc->sc_im6o;
1562 #endif
1563 u_int16_t n = imo->imo_num_memberships;
1564
1565 /* Clean up our own multicast memberships */
1566 while (n-- > 0) {
1567 if (imo->imo_membership[n] != NULL) {
1568 in_delmulti(imo->imo_membership[n]);
1569 imo->imo_membership[n] = NULL;
1570 }
1571 }
1572 imo->imo_num_memberships = 0;
1573 imo->imo_multicast_if_index = 0;
1574
1575 #ifdef INET6
1576 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1577 struct in6_multi_mship *imm =
1578 LIST_FIRST(&im6o->im6o_memberships);
1579
1580 LIST_REMOVE(imm, i6mm_chain);
1581 in6_leavegroup(imm);
1582 }
1583 im6o->im6o_multicast_if_index = 0;
1584 #endif
1585
1586 /* And any other multicast memberships */
1587 carp_ether_purgemulti(sc);
1588 }
1589
1590 static int
1591 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1592 {
1593 struct carp_if *cif, *ncif = NULL;
1594 struct carp_softc *vr, *after = NULL;
1595 int myself = 0, error = 0;
1596 int s;
1597
1598 if (ifp == sc->sc_carpdev)
1599 return (0);
1600
1601 if (ifp != NULL) {
1602 if ((ifp->if_flags & IFF_MULTICAST) == 0)
1603 return (EADDRNOTAVAIL);
1604
1605 if (ifp->if_type == IFT_CARP)
1606 return (EINVAL);
1607
1608 if (ifp->if_carp == NULL) {
1609 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT);
1610 if (ncif == NULL)
1611 return (ENOBUFS);
1612 if ((error = ifpromisc(ifp, 1))) {
1613 free(ncif, M_IFADDR);
1614 return (error);
1615 }
1616
1617 ncif->vhif_ifp = ifp;
1618 TAILQ_INIT(&ncif->vhif_vrs);
1619 } else {
1620 cif = (struct carp_if *)ifp->if_carp;
1621 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1622 if (vr != sc && vr->sc_vhid == sc->sc_vhid)
1623 return (EINVAL);
1624 }
1625
1626 /* detach from old interface */
1627 if (sc->sc_carpdev != NULL)
1628 carpdetach(sc);
1629
1630 /* join multicast groups */
1631 if (sc->sc_naddrs < 0 &&
1632 (error = carp_join_multicast(sc)) != 0) {
1633 if (ncif != NULL)
1634 free(ncif, M_IFADDR);
1635 return (error);
1636 }
1637
1638 #ifdef INET6
1639 if (sc->sc_naddrs6 < 0 &&
1640 (error = carp_join_multicast6(sc)) != 0) {
1641 if (ncif != NULL)
1642 free(ncif, M_IFADDR);
1643 carp_multicast_cleanup(sc);
1644 return (error);
1645 }
1646 #endif
1647
1648 /* attach carp interface to physical interface */
1649 if (ncif != NULL)
1650 ifp->if_carp = (void *)ncif;
1651 sc->sc_carpdev = ifp;
1652 sc->sc_if.if_capabilities = ifp->if_capabilities &
1653 (IFCAP_TSOv4 | IFCAP_TSOv6 |
1654 IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx|
1655 IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx|
1656 IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx|
1657 IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx|
1658 IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx);
1659
1660 cif = (struct carp_if *)ifp->if_carp;
1661 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1662 if (vr == sc)
1663 myself = 1;
1664 if (vr->sc_vhid < sc->sc_vhid)
1665 after = vr;
1666 }
1667
1668 if (!myself) {
1669 /* We're trying to keep things in order */
1670 if (after == NULL) {
1671 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1672 } else {
1673 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1674 sc, sc_list);
1675 }
1676 cif->vhif_nvrs++;
1677 }
1678 if (sc->sc_naddrs || sc->sc_naddrs6)
1679 sc->sc_if.if_flags |= IFF_UP;
1680 carp_set_enaddr(sc);
1681 KERNEL_LOCK(1, NULL);
1682 s = splnet();
1683 /* XXX linkstatehooks establish */
1684 carp_carpdev_state(ifp);
1685 splx(s);
1686 KERNEL_UNLOCK_ONE(NULL);
1687 } else {
1688 carpdetach(sc);
1689 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1690 }
1691 return (0);
1692 }
1693
1694 static void
1695 carp_set_enaddr(struct carp_softc *sc)
1696 {
1697 uint8_t enaddr[ETHER_ADDR_LEN];
1698 if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) {
1699 enaddr[0] = 3;
1700 enaddr[1] = 0;
1701 enaddr[2] = 0x40 >> (sc->sc_vhid - 1);
1702 enaddr[3] = 0x40000 >> (sc->sc_vhid - 1);
1703 enaddr[4] = 0;
1704 enaddr[5] = 0;
1705 } else {
1706 enaddr[0] = 0;
1707 enaddr[1] = 0;
1708 enaddr[2] = 0x5e;
1709 enaddr[3] = 0;
1710 enaddr[4] = 1;
1711 enaddr[5] = sc->sc_vhid;
1712 }
1713 if_set_sadl(&sc->sc_if, enaddr, sizeof(enaddr), false);
1714 }
1715
1716 #if 0
1717 static void
1718 carp_addr_updated(void *v)
1719 {
1720 struct carp_softc *sc = (struct carp_softc *) v;
1721 struct ifaddr *ifa;
1722 int new_naddrs = 0, new_naddrs6 = 0;
1723
1724 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1725 if (ifa->ifa_addr->sa_family == AF_INET)
1726 new_naddrs++;
1727 else if (ifa->ifa_addr->sa_family == AF_INET6)
1728 new_naddrs6++;
1729 }
1730
1731 /* Handle a callback after SIOCDIFADDR */
1732 if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
1733 struct in_addr mc_addr;
1734
1735 sc->sc_naddrs = new_naddrs;
1736 sc->sc_naddrs6 = new_naddrs6;
1737
1738 /* Re-establish multicast membership removed by in_control */
1739 mc_addr.s_addr = INADDR_CARP_GROUP;
1740 if (!in_multi_group(mc_addr, &sc->sc_if, 0)) {
1741 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1742
1743 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1744 carp_join_multicast(sc);
1745 }
1746
1747 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1748 sc->sc_if.if_flags &= ~IFF_UP;
1749 carp_set_state(sc, INIT);
1750 } else
1751 carp_hmac_prepare(sc);
1752 }
1753
1754 carp_setrun(sc, 0);
1755 }
1756 #endif
1757
1758 static int
1759 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1760 {
1761 struct ifnet *ifp = sc->sc_carpdev;
1762 struct in_ifaddr *ia, *ia_if;
1763 int error = 0;
1764 int s;
1765
1766 if (sin->sin_addr.s_addr == 0) {
1767 if (!(sc->sc_if.if_flags & IFF_UP))
1768 carp_set_state(sc, INIT);
1769 if (sc->sc_naddrs)
1770 sc->sc_if.if_flags |= IFF_UP;
1771 carp_setrun(sc, 0);
1772 return (0);
1773 }
1774
1775 /* we have to do this by hand to ensure we don't match on ourselves */
1776 ia_if = NULL;
1777 s = pserialize_read_enter();
1778 IN_ADDRLIST_READER_FOREACH(ia) {
1779 /* and, yeah, we need a multicast-capable iface too */
1780 if (ia->ia_ifp != &sc->sc_if &&
1781 ia->ia_ifp->if_type != IFT_CARP &&
1782 (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1783 (sin->sin_addr.s_addr & ia->ia_subnetmask) ==
1784 ia->ia_subnet) {
1785 if (!ia_if)
1786 ia_if = ia;
1787 }
1788 }
1789
1790 if (ia_if) {
1791 ia = ia_if;
1792 if (ifp) {
1793 if (ifp != ia->ia_ifp)
1794 return (EADDRNOTAVAIL);
1795 } else {
1796 /* FIXME NOMPSAFE */
1797 ifp = ia->ia_ifp;
1798 }
1799 }
1800 pserialize_read_exit(s);
1801
1802 if ((error = carp_set_ifp(sc, ifp)))
1803 return (error);
1804
1805 if (sc->sc_carpdev == NULL)
1806 return (EADDRNOTAVAIL);
1807
1808 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1809 return (error);
1810
1811 sc->sc_naddrs++;
1812 if (sc->sc_carpdev != NULL)
1813 sc->sc_if.if_flags |= IFF_UP;
1814
1815 carp_set_state(sc, INIT);
1816 carp_setrun(sc, 0);
1817
1818 /*
1819 * Hook if_addrhooks so that we get a callback after in_ifinit has run,
1820 * to correct any inappropriate routes that it inserted.
1821 */
1822 if (sc->ah_cookie == 0) {
1823 /* XXX link address hook */
1824 }
1825
1826 return (0);
1827 }
1828
1829 static int
1830 carp_join_multicast(struct carp_softc *sc)
1831 {
1832 struct ip_moptions *imo = &sc->sc_imo, tmpimo;
1833 struct in_addr addr;
1834
1835 memset(&tmpimo, 0, sizeof(tmpimo));
1836 addr.s_addr = INADDR_CARP_GROUP;
1837 if ((tmpimo.imo_membership[0] =
1838 in_addmulti(&addr, &sc->sc_if)) == NULL) {
1839 return (ENOBUFS);
1840 }
1841
1842 imo->imo_membership[0] = tmpimo.imo_membership[0];
1843 imo->imo_num_memberships = 1;
1844 imo->imo_multicast_if_index = sc->sc_if.if_index;
1845 imo->imo_multicast_ttl = CARP_DFLTTL;
1846 imo->imo_multicast_loop = 0;
1847 return (0);
1848 }
1849
1850
1851 #ifdef INET6
1852 static int
1853 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1854 {
1855 struct ifnet *ifp = sc->sc_carpdev;
1856 struct in6_ifaddr *ia, *ia_if;
1857 int error = 0;
1858 int s;
1859
1860 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1861 if (!(sc->sc_if.if_flags & IFF_UP))
1862 carp_set_state(sc, INIT);
1863 if (sc->sc_naddrs6)
1864 sc->sc_if.if_flags |= IFF_UP;
1865 carp_setrun(sc, 0);
1866 return (0);
1867 }
1868
1869 /* we have to do this by hand to ensure we don't match on ourselves */
1870 ia_if = NULL;
1871 s = pserialize_read_enter();
1872 IN6_ADDRLIST_READER_FOREACH(ia) {
1873 int i;
1874
1875 for (i = 0; i < 4; i++) {
1876 if ((sin6->sin6_addr.s6_addr32[i] &
1877 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1878 (ia->ia_addr.sin6_addr.s6_addr32[i] &
1879 ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1880 break;
1881 }
1882 /* and, yeah, we need a multicast-capable iface too */
1883 if (ia->ia_ifp != &sc->sc_if &&
1884 ia->ia_ifp->if_type != IFT_CARP &&
1885 (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1886 (i == 4)) {
1887 if (!ia_if)
1888 ia_if = ia;
1889 }
1890 }
1891 pserialize_read_exit(s);
1892
1893 if (ia_if) {
1894 ia = ia_if;
1895 if (sc->sc_carpdev) {
1896 if (sc->sc_carpdev != ia->ia_ifp)
1897 return (EADDRNOTAVAIL);
1898 } else {
1899 ifp = ia->ia_ifp;
1900 }
1901 }
1902
1903 if ((error = carp_set_ifp(sc, ifp)))
1904 return (error);
1905
1906 if (sc->sc_carpdev == NULL)
1907 return (EADDRNOTAVAIL);
1908
1909 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1910 return (error);
1911
1912 sc->sc_naddrs6++;
1913 if (sc->sc_carpdev != NULL)
1914 sc->sc_if.if_flags |= IFF_UP;
1915 carp_set_state(sc, INIT);
1916 carp_setrun(sc, 0);
1917
1918 return (0);
1919 }
1920
1921 static int
1922 carp_join_multicast6(struct carp_softc *sc)
1923 {
1924 struct in6_multi_mship *imm, *imm2;
1925 struct ip6_moptions *im6o = &sc->sc_im6o;
1926 struct sockaddr_in6 addr6;
1927 int error;
1928
1929 /* Join IPv6 CARP multicast group */
1930 memset(&addr6, 0, sizeof(addr6));
1931 addr6.sin6_family = AF_INET6;
1932 addr6.sin6_len = sizeof(addr6);
1933 addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1934 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1935 addr6.sin6_addr.s6_addr8[15] = 0x12;
1936 if ((imm = in6_joingroup(&sc->sc_if,
1937 &addr6.sin6_addr, &error, 0)) == NULL) {
1938 return (error);
1939 }
1940 /* join solicited multicast address */
1941 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1942 addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1943 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1944 addr6.sin6_addr.s6_addr32[1] = 0;
1945 addr6.sin6_addr.s6_addr32[2] = htonl(1);
1946 addr6.sin6_addr.s6_addr32[3] = 0;
1947 addr6.sin6_addr.s6_addr8[12] = 0xff;
1948 if ((imm2 = in6_joingroup(&sc->sc_if,
1949 &addr6.sin6_addr, &error, 0)) == NULL) {
1950 in6_leavegroup(imm);
1951 return (error);
1952 }
1953
1954 /* apply v6 multicast membership */
1955 im6o->im6o_multicast_if_index = sc->sc_if.if_index;
1956 if (imm)
1957 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
1958 i6mm_chain);
1959 if (imm2)
1960 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
1961 i6mm_chain);
1962
1963 return (0);
1964 }
1965
1966 #endif /* INET6 */
1967
1968 static int
1969 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1970 {
1971 struct lwp *l = curlwp; /* XXX */
1972 struct carp_softc *sc = ifp->if_softc, *vr;
1973 struct carpreq carpr;
1974 struct ifaddr *ifa;
1975 struct ifreq *ifr;
1976 struct ifnet *cdev = NULL;
1977 int error = 0;
1978
1979 ifa = (struct ifaddr *)data;
1980 ifr = (struct ifreq *)data;
1981
1982 switch (cmd) {
1983 case SIOCINITIFADDR:
1984 switch (ifa->ifa_addr->sa_family) {
1985 #ifdef INET
1986 case AF_INET:
1987 sc->sc_if.if_flags |= IFF_UP;
1988 memcpy(ifa->ifa_dstaddr, ifa->ifa_addr,
1989 sizeof(struct sockaddr));
1990 error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1991 break;
1992 #endif /* INET */
1993 #ifdef INET6
1994 case AF_INET6:
1995 sc->sc_if.if_flags|= IFF_UP;
1996 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1997 break;
1998 #endif /* INET6 */
1999 default:
2000 error = EAFNOSUPPORT;
2001 break;
2002 }
2003 break;
2004
2005 case SIOCSIFFLAGS:
2006 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
2007 break;
2008 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2009 callout_stop(&sc->sc_ad_tmo);
2010 callout_stop(&sc->sc_md_tmo);
2011 callout_stop(&sc->sc_md6_tmo);
2012 if (sc->sc_state == MASTER) {
2013 /* we need the interface up to bow out */
2014 sc->sc_if.if_flags |= IFF_UP;
2015 sc->sc_bow_out = 1;
2016 carp_send_ad(sc);
2017 }
2018 sc->sc_if.if_flags &= ~IFF_UP;
2019 carp_set_state(sc, INIT);
2020 carp_setrun(sc, 0);
2021 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
2022 sc->sc_if.if_flags |= IFF_UP;
2023 carp_setrun(sc, 0);
2024 }
2025 break;
2026
2027 case SIOCSVH:
2028 if (l == NULL)
2029 break;
2030 if ((error = kauth_authorize_network(l->l_cred,
2031 KAUTH_NETWORK_INTERFACE,
2032 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2033 NULL)) != 0)
2034 break;
2035 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2036 break;
2037 error = 1;
2038 if (carpr.carpr_carpdev[0] != '\0' &&
2039 (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
2040 return (EINVAL);
2041 if ((error = carp_set_ifp(sc, cdev)))
2042 return (error);
2043 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
2044 switch (carpr.carpr_state) {
2045 case BACKUP:
2046 callout_stop(&sc->sc_ad_tmo);
2047 carp_set_state(sc, BACKUP);
2048 carp_setrun(sc, 0);
2049 carp_setroute(sc, RTM_DELETE);
2050 break;
2051 case MASTER:
2052 carp_master_down(sc);
2053 break;
2054 default:
2055 break;
2056 }
2057 }
2058 if (carpr.carpr_vhid > 0) {
2059 if (carpr.carpr_vhid > 255) {
2060 error = EINVAL;
2061 break;
2062 }
2063 if (sc->sc_carpdev) {
2064 struct carp_if *cif;
2065 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2066 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
2067 if (vr != sc &&
2068 vr->sc_vhid == carpr.carpr_vhid)
2069 return (EINVAL);
2070 }
2071 sc->sc_vhid = carpr.carpr_vhid;
2072 carp_set_enaddr(sc);
2073 carp_set_state(sc, INIT);
2074 error--;
2075 }
2076 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
2077 if (carpr.carpr_advskew > 254) {
2078 error = EINVAL;
2079 break;
2080 }
2081 if (carpr.carpr_advbase > 255) {
2082 error = EINVAL;
2083 break;
2084 }
2085 sc->sc_advbase = carpr.carpr_advbase;
2086 sc->sc_advskew = carpr.carpr_advskew;
2087 error--;
2088 }
2089 memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key));
2090 if (error > 0)
2091 error = EINVAL;
2092 else {
2093 error = 0;
2094 carp_setrun(sc, 0);
2095 }
2096 break;
2097
2098 case SIOCGVH:
2099 memset(&carpr, 0, sizeof(carpr));
2100 if (sc->sc_carpdev != NULL)
2101 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2102 IFNAMSIZ);
2103 carpr.carpr_state = sc->sc_state;
2104 carpr.carpr_vhid = sc->sc_vhid;
2105 carpr.carpr_advbase = sc->sc_advbase;
2106 carpr.carpr_advskew = sc->sc_advskew;
2107
2108 if ((l != NULL) && (error = kauth_authorize_network(l->l_cred,
2109 KAUTH_NETWORK_INTERFACE,
2110 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2111 NULL)) == 0)
2112 memcpy(carpr.carpr_key, sc->sc_key,
2113 sizeof(carpr.carpr_key));
2114 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2115 break;
2116
2117 case SIOCADDMULTI:
2118 error = carp_ether_addmulti(sc, ifr);
2119 break;
2120
2121 case SIOCDELMULTI:
2122 error = carp_ether_delmulti(sc, ifr);
2123 break;
2124
2125 case SIOCSIFCAP:
2126 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
2127 error = 0;
2128 break;
2129
2130 default:
2131 error = ether_ioctl(ifp, cmd, data);
2132 }
2133
2134 carp_hmac_prepare(sc);
2135 return (error);
2136 }
2137
2138
2139 /*
2140 * Start output on carp interface. This function should never be called.
2141 */
2142 static void
2143 carp_start(struct ifnet *ifp)
2144 {
2145 #ifdef DEBUG
2146 printf("%s: start called\n", ifp->if_xname);
2147 #endif
2148 }
2149
2150 int
2151 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
2152 const struct rtentry *rt)
2153 {
2154 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2155 KASSERT(KERNEL_LOCKED_P());
2156
2157 if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) {
2158 return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt);
2159 } else {
2160 m_freem(m);
2161 return (ENETUNREACH);
2162 }
2163 }
2164
2165 static void
2166 carp_set_state(struct carp_softc *sc, int state)
2167 {
2168 static const char *carp_states[] = { CARP_STATES };
2169 if (sc->sc_state == state)
2170 return;
2171
2172 CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state]));
2173
2174 sc->sc_state = state;
2175 switch (state) {
2176 case BACKUP:
2177 sc->sc_if.if_link_state = LINK_STATE_DOWN;
2178 break;
2179 case MASTER:
2180 sc->sc_if.if_link_state = LINK_STATE_UP;
2181 break;
2182 default:
2183 sc->sc_if.if_link_state = LINK_STATE_UNKNOWN;
2184 break;
2185 }
2186 rt_ifmsg(&sc->sc_if);
2187 }
2188
2189 void
2190 carp_carpdev_state(void *v)
2191 {
2192 struct carp_if *cif;
2193 struct carp_softc *sc;
2194 struct ifnet *ifp = v;
2195
2196 if (ifp->if_type == IFT_CARP)
2197 return;
2198
2199 cif = (struct carp_if *)ifp->if_carp;
2200
2201 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2202 int suppressed = sc->sc_suppress;
2203
2204 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2205 !(sc->sc_carpdev->if_flags & IFF_UP)) {
2206 sc->sc_if.if_flags &= ~IFF_RUNNING;
2207 callout_stop(&sc->sc_ad_tmo);
2208 callout_stop(&sc->sc_md_tmo);
2209 callout_stop(&sc->sc_md6_tmo);
2210 carp_set_state(sc, INIT);
2211 sc->sc_suppress = 1;
2212 carp_setrun(sc, 0);
2213 if (!suppressed) {
2214 carp_suppress_preempt++;
2215 if (carp_suppress_preempt == 1)
2216 carp_send_ad_all();
2217 }
2218 } else {
2219 carp_set_state(sc, INIT);
2220 sc->sc_suppress = 0;
2221 carp_setrun(sc, 0);
2222 if (suppressed)
2223 carp_suppress_preempt--;
2224 }
2225 }
2226 }
2227
2228 static int
2229 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2230 {
2231 const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr);
2232 struct ifnet *ifp;
2233 struct carp_mc_entry *mc;
2234 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2235 int error;
2236
2237 ifp = sc->sc_carpdev;
2238 if (ifp == NULL)
2239 return (EINVAL);
2240
2241 error = ether_addmulti(sa, &sc->sc_ac);
2242 if (error != ENETRESET)
2243 return (error);
2244
2245 /*
2246 * This is new multicast address. We have to tell parent
2247 * about it. Also, remember this multicast address so that
2248 * we can delete them on unconfigure.
2249 */
2250 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2251 if (mc == NULL) {
2252 error = ENOMEM;
2253 goto alloc_failed;
2254 }
2255
2256 /*
2257 * As ether_addmulti() returns ENETRESET, following two
2258 * statement shouldn't fail.
2259 */
2260 (void)ether_multiaddr(sa, addrlo, addrhi);
2261 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2262 memcpy(&mc->mc_addr, sa, sa->sa_len);
2263 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2264
2265 error = if_mcast_op(ifp, SIOCADDMULTI, sa);
2266 if (error != 0)
2267 goto ioctl_failed;
2268
2269 return (error);
2270
2271 ioctl_failed:
2272 LIST_REMOVE(mc, mc_entries);
2273 free(mc, M_DEVBUF);
2274 alloc_failed:
2275 (void)ether_delmulti(sa, &sc->sc_ac);
2276
2277 return (error);
2278 }
2279
2280 static int
2281 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2282 {
2283 const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr);
2284 struct ifnet *ifp;
2285 struct ether_multi *enm;
2286 struct carp_mc_entry *mc;
2287 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2288 int error;
2289
2290 ifp = sc->sc_carpdev;
2291 if (ifp == NULL)
2292 return (EINVAL);
2293
2294 /*
2295 * Find a key to lookup carp_mc_entry. We have to do this
2296 * before calling ether_delmulti for obvious reason.
2297 */
2298 if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0)
2299 return (error);
2300 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2301 if (enm == NULL)
2302 return (EINVAL);
2303
2304 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2305 if (mc->mc_enm == enm)
2306 break;
2307
2308 /* We won't delete entries we didn't add */
2309 if (mc == NULL)
2310 return (EINVAL);
2311
2312 error = ether_delmulti(sa, &sc->sc_ac);
2313 if (error != ENETRESET)
2314 return (error);
2315
2316 /* We no longer use this multicast address. Tell parent so. */
2317 error = if_mcast_op(ifp, SIOCDELMULTI, sa);
2318 if (error == 0) {
2319 /* And forget about this address. */
2320 LIST_REMOVE(mc, mc_entries);
2321 free(mc, M_DEVBUF);
2322 } else
2323 (void)ether_addmulti(sa, &sc->sc_ac);
2324 return (error);
2325 }
2326
2327 /*
2328 * Delete any multicast address we have asked to add from parent
2329 * interface. Called when the carp is being unconfigured.
2330 */
2331 static void
2332 carp_ether_purgemulti(struct carp_softc *sc)
2333 {
2334 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */
2335 struct carp_mc_entry *mc;
2336
2337 if (ifp == NULL)
2338 return;
2339
2340 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2341 (void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr));
2342 LIST_REMOVE(mc, mc_entries);
2343 free(mc, M_DEVBUF);
2344 }
2345 }
2346
2347 static int
2348 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS)
2349 {
2350
2351 return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS));
2352 }
2353
2354 void
2355 carp_init(void)
2356 {
2357
2358 sysctl_net_inet_carp_setup(NULL);
2359 #ifdef MBUFTRACE
2360 MOWNER_ATTACH(&carp_proto_mowner_rx);
2361 MOWNER_ATTACH(&carp_proto_mowner_tx);
2362 MOWNER_ATTACH(&carp_proto6_mowner_rx);
2363 MOWNER_ATTACH(&carp_proto6_mowner_tx);
2364 #endif
2365
2366 carp_wqinput = wqinput_create("carp", _carp_proto_input);
2367 #ifdef INET6
2368 carp6_wqinput = wqinput_create("carp6", _carp6_proto_input);
2369 #endif
2370 }
2371
2372 static void
2373 sysctl_net_inet_carp_setup(struct sysctllog **clog)
2374 {
2375
2376 sysctl_createv(clog, 0, NULL, NULL,
2377 CTLFLAG_PERMANENT,
2378 CTLTYPE_NODE, "inet", NULL,
2379 NULL, 0, NULL, 0,
2380 CTL_NET, PF_INET, CTL_EOL);
2381 sysctl_createv(clog, 0, NULL, NULL,
2382 CTLFLAG_PERMANENT,
2383 CTLTYPE_NODE, "carp",
2384 SYSCTL_DESCR("CARP related settings"),
2385 NULL, 0, NULL, 0,
2386 CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL);
2387
2388 sysctl_createv(clog, 0, NULL, NULL,
2389 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2390 CTLTYPE_INT, "preempt",
2391 SYSCTL_DESCR("Enable CARP Preempt"),
2392 NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0,
2393 CTL_NET, PF_INET, IPPROTO_CARP,
2394 CTL_CREATE, CTL_EOL);
2395 sysctl_createv(clog, 0, NULL, NULL,
2396 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2397 CTLTYPE_INT, "arpbalance",
2398 SYSCTL_DESCR("Enable ARP balancing"),
2399 NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0,
2400 CTL_NET, PF_INET, IPPROTO_CARP,
2401 CTL_CREATE, CTL_EOL);
2402 sysctl_createv(clog, 0, NULL, NULL,
2403 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2404 CTLTYPE_INT, "allow",
2405 SYSCTL_DESCR("Enable CARP"),
2406 NULL, 0, &carp_opts[CARPCTL_ALLOW], 0,
2407 CTL_NET, PF_INET, IPPROTO_CARP,
2408 CTL_CREATE, CTL_EOL);
2409 sysctl_createv(clog, 0, NULL, NULL,
2410 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2411 CTLTYPE_INT, "log",
2412 SYSCTL_DESCR("CARP logging"),
2413 NULL, 0, &carp_opts[CARPCTL_LOG], 0,
2414 CTL_NET, PF_INET, IPPROTO_CARP,
2415 CTL_CREATE, CTL_EOL);
2416 sysctl_createv(clog, 0, NULL, NULL,
2417 CTLFLAG_PERMANENT,
2418 CTLTYPE_STRUCT, "stats",
2419 SYSCTL_DESCR("CARP statistics"),
2420 sysctl_net_inet_carp_stats, 0, NULL, 0,
2421 CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS,
2422 CTL_EOL);
2423 }
2424