ip_carp.c revision 1.89 1 /* $NetBSD: ip_carp.c,v 1.89 2017/05/12 17:53:54 ryo Exp $ */
2 /* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */
3
4 /*
5 * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
6 * Copyright (c) 2003 Ryan McBride. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #ifdef _KERNEL_OPT
31 #include "opt_inet.h"
32 #include "opt_mbuftrace.h"
33 #endif
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.89 2017/05/12 17:53:54 ryo Exp $");
37
38 /*
39 * TODO:
40 * - iface reconfigure
41 * - support for hardware checksum calculations;
42 *
43 */
44
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/mbuf.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/callout.h>
51 #include <sys/ioctl.h>
52 #include <sys/errno.h>
53 #include <sys/device.h>
54 #include <sys/time.h>
55 #include <sys/kernel.h>
56 #include <sys/kauth.h>
57 #include <sys/sysctl.h>
58 #include <sys/ucred.h>
59 #include <sys/syslog.h>
60 #include <sys/acct.h>
61 #include <sys/cprng.h>
62
63 #include <sys/cpu.h>
64
65 #include <net/if.h>
66 #include <net/pfil.h>
67 #include <net/if_types.h>
68 #include <net/if_ether.h>
69 #include <net/route.h>
70 #include <net/netisr.h>
71 #include <net/net_stats.h>
72 #include <netinet/if_inarp.h>
73 #include <netinet/wqinput.h>
74
75 #if NFDDI > 0
76 #include <net/if_fddi.h>
77 #endif
78 #if NTOKEN > 0
79 #include <net/if_token.h>
80 #endif
81
82 #ifdef INET
83 #include <netinet/in.h>
84 #include <netinet/in_systm.h>
85 #include <netinet/in_var.h>
86 #include <netinet/ip.h>
87 #include <netinet/ip_var.h>
88
89 #include <net/if_dl.h>
90 #endif
91
92 #ifdef INET6
93 #include <netinet/icmp6.h>
94 #include <netinet/ip6.h>
95 #include <netinet6/ip6_var.h>
96 #include <netinet6/nd6.h>
97 #include <netinet6/scope6_var.h>
98 #include <netinet6/in6_var.h>
99 #endif
100
101 #include <net/bpf.h>
102
103 #include <sys/sha1.h>
104
105 #include <netinet/ip_carp.h>
106
107 #include "ioconf.h"
108
109 struct carp_mc_entry {
110 LIST_ENTRY(carp_mc_entry) mc_entries;
111 union {
112 struct ether_multi *mcu_enm;
113 } mc_u;
114 struct sockaddr_storage mc_addr;
115 };
116 #define mc_enm mc_u.mcu_enm
117
118 struct carp_softc {
119 struct ethercom sc_ac;
120 #define sc_if sc_ac.ec_if
121 #define sc_carpdev sc_ac.ec_if.if_carpdev
122 int ah_cookie;
123 int lh_cookie;
124 struct ip_moptions sc_imo;
125 #ifdef INET6
126 struct ip6_moptions sc_im6o;
127 #endif /* INET6 */
128 TAILQ_ENTRY(carp_softc) sc_list;
129
130 enum { INIT = 0, BACKUP, MASTER } sc_state;
131
132 int sc_suppress;
133 int sc_bow_out;
134
135 int sc_sendad_errors;
136 #define CARP_SENDAD_MAX_ERRORS 3
137 int sc_sendad_success;
138 #define CARP_SENDAD_MIN_SUCCESS 3
139
140 int sc_vhid;
141 int sc_advskew;
142 int sc_naddrs;
143 int sc_naddrs6;
144 int sc_advbase; /* seconds */
145 int sc_init_counter;
146 u_int64_t sc_counter;
147
148 /* authentication */
149 #define CARP_HMAC_PAD 64
150 unsigned char sc_key[CARP_KEY_LEN];
151 unsigned char sc_pad[CARP_HMAC_PAD];
152 SHA1_CTX sc_sha1;
153 u_int32_t sc_hashkey[2];
154
155 struct callout sc_ad_tmo; /* advertisement timeout */
156 struct callout sc_md_tmo; /* master down timeout */
157 struct callout sc_md6_tmo; /* master down timeout */
158
159 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead;
160 };
161
162 int carp_suppress_preempt = 0;
163 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 }; /* XXX for now */
164
165 static percpu_t *carpstat_percpu;
166
167 #define CARP_STATINC(x) _NET_STATINC(carpstat_percpu, x)
168
169 #ifdef MBUFTRACE
170 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx");
171 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx");
172 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx");
173 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx");
174 #endif
175
176 struct carp_if {
177 TAILQ_HEAD(, carp_softc) vhif_vrs;
178 int vhif_nvrs;
179
180 struct ifnet *vhif_ifp;
181 };
182
183 #define CARP_LOG(sc, s) \
184 if (carp_opts[CARPCTL_LOG]) { \
185 if (sc) \
186 log(LOG_INFO, "%s: ", \
187 (sc)->sc_if.if_xname); \
188 else \
189 log(LOG_INFO, "carp: "); \
190 addlog s; \
191 addlog("\n"); \
192 }
193
194 static void carp_hmac_prepare(struct carp_softc *);
195 static void carp_hmac_generate(struct carp_softc *, u_int32_t *,
196 unsigned char *);
197 static int carp_hmac_verify(struct carp_softc *, u_int32_t *,
198 unsigned char *);
199 static void carp_setroute(struct carp_softc *, int);
200 static void carp_proto_input_c(struct mbuf *, struct carp_header *,
201 sa_family_t);
202 static void carpdetach(struct carp_softc *);
203 static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
204 struct carp_header *);
205 static void carp_send_ad_all(void);
206 static void carp_send_ad(void *);
207 static void carp_send_arp(struct carp_softc *);
208 static void carp_master_down(void *);
209 static int carp_ioctl(struct ifnet *, u_long, void *);
210 static void carp_start(struct ifnet *);
211 static void carp_setrun(struct carp_softc *, sa_family_t);
212 static void carp_set_state(struct carp_softc *, int);
213 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
214 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
215
216 static void carp_multicast_cleanup(struct carp_softc *);
217 static int carp_set_ifp(struct carp_softc *, struct ifnet *);
218 static void carp_set_enaddr(struct carp_softc *);
219 #if 0
220 static void carp_addr_updated(void *);
221 #endif
222 static u_int32_t carp_hash(struct carp_softc *, u_char *);
223 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
224 static int carp_join_multicast(struct carp_softc *);
225 #ifdef INET6
226 static void carp_send_na(struct carp_softc *);
227 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
228 static int carp_join_multicast6(struct carp_softc *);
229 #endif
230 static int carp_clone_create(struct if_clone *, int);
231 static int carp_clone_destroy(struct ifnet *);
232 static int carp_ether_addmulti(struct carp_softc *, struct ifreq *);
233 static int carp_ether_delmulti(struct carp_softc *, struct ifreq *);
234 static void carp_ether_purgemulti(struct carp_softc *);
235
236 static void sysctl_net_inet_carp_setup(struct sysctllog **);
237
238 /* workqueue-based pr_input */
239 static struct wqinput *carp_wqinput;
240 static void _carp_proto_input(struct mbuf *, int, int);
241 #ifdef INET6
242 static struct wqinput *carp6_wqinput;
243 static void _carp6_proto_input(struct mbuf *, int, int);
244 #endif
245
246 struct if_clone carp_cloner =
247 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
248
249 static __inline u_int16_t
250 carp_cksum(struct mbuf *m, int len)
251 {
252 return (in_cksum(m, len));
253 }
254
255 static __inline u_int16_t
256 carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len)
257 {
258 return (in6_cksum(m, IPPROTO_CARP, off, len));
259 }
260
261 static void
262 carp_hmac_prepare(struct carp_softc *sc)
263 {
264 u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT;
265 u_int8_t vhid = sc->sc_vhid & 0xff;
266 SHA1_CTX sha1ctx;
267 u_int32_t kmd[5];
268 struct ifaddr *ifa;
269 int i, found;
270 struct in_addr last, cur, in;
271 #ifdef INET6
272 struct in6_addr last6, cur6, in6;
273 #endif /* INET6 */
274
275 /* compute ipad from key */
276 memset(sc->sc_pad, 0, sizeof(sc->sc_pad));
277 memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key));
278 for (i = 0; i < sizeof(sc->sc_pad); i++)
279 sc->sc_pad[i] ^= 0x36;
280
281 /* precompute first part of inner hash */
282 SHA1Init(&sc->sc_sha1);
283 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
284 SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version));
285 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
286
287 /* generate a key for the arpbalance hash, before the vhid is hashed */
288 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
289 SHA1Final((unsigned char *)kmd, &sha1ctx);
290 sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
291 sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
292
293 /* the rest of the precomputation */
294 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
295
296 /* Hash the addresses from smallest to largest, not interface order */
297 #ifdef INET
298 cur.s_addr = 0;
299 do {
300 found = 0;
301 last = cur;
302 cur.s_addr = 0xffffffff;
303 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
304 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
305 if (ifa->ifa_addr->sa_family == AF_INET &&
306 ntohl(in.s_addr) > ntohl(last.s_addr) &&
307 ntohl(in.s_addr) < ntohl(cur.s_addr)) {
308 cur.s_addr = in.s_addr;
309 found++;
310 }
311 }
312 if (found)
313 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
314 } while (found);
315 #endif /* INET */
316
317 #ifdef INET6
318 memset(&cur6, 0x00, sizeof(cur6));
319 do {
320 found = 0;
321 last6 = cur6;
322 memset(&cur6, 0xff, sizeof(cur6));
323 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
324 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
325 if (IN6_IS_ADDR_LINKLOCAL(&in6))
326 in6.s6_addr16[1] = 0;
327 if (ifa->ifa_addr->sa_family == AF_INET6 &&
328 memcmp(&in6, &last6, sizeof(in6)) > 0 &&
329 memcmp(&in6, &cur6, sizeof(in6)) < 0) {
330 cur6 = in6;
331 found++;
332 }
333 }
334 if (found)
335 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
336 } while (found);
337 #endif /* INET6 */
338
339 /* convert ipad to opad */
340 for (i = 0; i < sizeof(sc->sc_pad); i++)
341 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
342 }
343
344 static void
345 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
346 unsigned char md[20])
347 {
348 SHA1_CTX sha1ctx;
349
350 /* fetch first half of inner hash */
351 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
352
353 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
354 SHA1Final(md, &sha1ctx);
355
356 /* outer hash */
357 SHA1Init(&sha1ctx);
358 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
359 SHA1Update(&sha1ctx, md, 20);
360 SHA1Final(md, &sha1ctx);
361 }
362
363 static int
364 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
365 unsigned char md[20])
366 {
367 unsigned char md2[20];
368
369 carp_hmac_generate(sc, counter, md2);
370
371 return (memcmp(md, md2, sizeof(md2)));
372 }
373
374 static void
375 carp_setroute(struct carp_softc *sc, int cmd)
376 {
377 struct ifaddr *ifa;
378 int s;
379
380 KERNEL_LOCK(1, NULL);
381 s = splsoftnet();
382 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
383 switch (ifa->ifa_addr->sa_family) {
384 case AF_INET: {
385 int count = 0;
386 struct rtentry *rt;
387 int hr_otherif, nr_ourif;
388
389 /*
390 * Avoid screwing with the routes if there are other
391 * carp interfaces which are master and have the same
392 * address.
393 */
394 if (sc->sc_carpdev != NULL &&
395 sc->sc_carpdev->if_carp != NULL) {
396 count = carp_addrcount(
397 (struct carp_if *)sc->sc_carpdev->if_carp,
398 ifatoia(ifa), CARP_COUNT_MASTER);
399 if ((cmd == RTM_ADD && count != 1) ||
400 (cmd == RTM_DELETE && count != 0))
401 continue;
402 }
403
404 /* Remove the existing host route, if any */
405 rtrequest(RTM_DELETE, ifa->ifa_addr,
406 ifa->ifa_addr, ifa->ifa_netmask,
407 RTF_HOST, NULL);
408
409 rt = NULL;
410 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
411 ifa->ifa_netmask, RTF_HOST, &rt);
412 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
413 (rt->rt_flags & RTF_CONNECTED));
414 if (rt != NULL) {
415 rt_unref(rt);
416 rt = NULL;
417 }
418
419 /* Check for a network route on our interface */
420
421 rt = NULL;
422 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
423 ifa->ifa_netmask, 0, &rt);
424 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
425
426 switch (cmd) {
427 case RTM_ADD:
428 if (hr_otherif) {
429 ifa->ifa_rtrequest = NULL;
430 ifa->ifa_flags &= ~RTF_CONNECTED;
431
432 rtrequest(RTM_ADD, ifa->ifa_addr,
433 ifa->ifa_addr, ifa->ifa_netmask,
434 RTF_UP | RTF_HOST, NULL);
435 }
436 if (!hr_otherif || nr_ourif || !rt) {
437 if (nr_ourif &&
438 (rt->rt_flags & RTF_CONNECTED) == 0)
439 rtrequest(RTM_DELETE,
440 ifa->ifa_addr,
441 ifa->ifa_addr,
442 ifa->ifa_netmask, 0, NULL);
443
444 ifa->ifa_rtrequest = arp_rtrequest;
445 ifa->ifa_flags |= RTF_CONNECTED;
446
447 if (rtrequest(RTM_ADD, ifa->ifa_addr,
448 ifa->ifa_addr, ifa->ifa_netmask, 0,
449 NULL) == 0)
450 ifa->ifa_flags |= IFA_ROUTE;
451 }
452 break;
453 case RTM_DELETE:
454 break;
455 default:
456 break;
457 }
458 if (rt != NULL) {
459 rt_unref(rt);
460 rt = NULL;
461 }
462 break;
463 }
464
465 #ifdef INET6
466 case AF_INET6:
467 if (cmd == RTM_ADD)
468 in6_ifaddlocal(ifa);
469 else
470 in6_ifremlocal(ifa);
471 break;
472 #endif /* INET6 */
473 default:
474 break;
475 }
476 }
477 splx(s);
478 KERNEL_UNLOCK_ONE(NULL);
479 }
480
481 /*
482 * process input packet.
483 * we have rearranged checks order compared to the rfc,
484 * but it seems more efficient this way or not possible otherwise.
485 */
486 static void
487 _carp_proto_input(struct mbuf *m, int hlen, int proto)
488 {
489 struct ip *ip = mtod(m, struct ip *);
490 struct carp_softc *sc = NULL;
491 struct carp_header *ch;
492 int iplen, len;
493 struct ifnet *rcvif;
494
495 CARP_STATINC(CARP_STAT_IPACKETS);
496 MCLAIM(m, &carp_proto_mowner_rx);
497
498 if (!carp_opts[CARPCTL_ALLOW]) {
499 m_freem(m);
500 return;
501 }
502
503 rcvif = m_get_rcvif_NOMPSAFE(m);
504 /* check if received on a valid carp interface */
505 if (rcvif->if_type != IFT_CARP) {
506 CARP_STATINC(CARP_STAT_BADIF);
507 CARP_LOG(sc, ("packet received on non-carp interface: %s",
508 rcvif->if_xname));
509 m_freem(m);
510 return;
511 }
512
513 /* verify that the IP TTL is 255. */
514 if (ip->ip_ttl != CARP_DFLTTL) {
515 CARP_STATINC(CARP_STAT_BADTTL);
516 CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl,
517 CARP_DFLTTL, rcvif->if_xname));
518 m_freem(m);
519 return;
520 }
521
522 /*
523 * verify that the received packet length is
524 * equal to the CARP header
525 */
526 iplen = ip->ip_hl << 2;
527 len = iplen + sizeof(*ch);
528 if (len > m->m_pkthdr.len) {
529 CARP_STATINC(CARP_STAT_BADLEN);
530 CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
531 rcvif->if_xname));
532 m_freem(m);
533 return;
534 }
535
536 if ((m = m_pullup(m, len)) == NULL) {
537 CARP_STATINC(CARP_STAT_HDROPS);
538 return;
539 }
540 ip = mtod(m, struct ip *);
541 ch = (struct carp_header *)((char *)ip + iplen);
542 /* verify the CARP checksum */
543 m->m_data += iplen;
544 if (carp_cksum(m, len - iplen)) {
545 CARP_STATINC(CARP_STAT_BADSUM);
546 CARP_LOG(sc, ("checksum failed on %s",
547 rcvif->if_xname));
548 m_freem(m);
549 return;
550 }
551 m->m_data -= iplen;
552
553 carp_proto_input_c(m, ch, AF_INET);
554 }
555
556 void
557 carp_proto_input(struct mbuf *m, ...)
558 {
559
560 wqinput_input(carp_wqinput, m, 0, 0);
561 }
562
563 #ifdef INET6
564 static void
565 _carp6_proto_input(struct mbuf *m, int off, int proto)
566 {
567 struct carp_softc *sc = NULL;
568 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
569 struct carp_header *ch;
570 u_int len;
571 struct ifnet *rcvif;
572
573 CARP_STATINC(CARP_STAT_IPACKETS6);
574 MCLAIM(m, &carp_proto6_mowner_rx);
575
576 if (!carp_opts[CARPCTL_ALLOW]) {
577 m_freem(m);
578 return;
579 }
580
581 rcvif = m_get_rcvif_NOMPSAFE(m);
582
583 /* check if received on a valid carp interface */
584 if (rcvif->if_type != IFT_CARP) {
585 CARP_STATINC(CARP_STAT_BADIF);
586 CARP_LOG(sc, ("packet received on non-carp interface: %s",
587 rcvif->if_xname));
588 m_freem(m);
589 return;
590 }
591
592 /* verify that the IP TTL is 255 */
593 if (ip6->ip6_hlim != CARP_DFLTTL) {
594 CARP_STATINC(CARP_STAT_BADTTL);
595 CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
596 CARP_DFLTTL, rcvif->if_xname));
597 m_freem(m);
598 return;
599 }
600
601 /* verify that we have a complete carp packet */
602 len = m->m_len;
603 IP6_EXTHDR_GET(ch, struct carp_header *, m, off, sizeof(*ch));
604 if (ch == NULL) {
605 CARP_STATINC(CARP_STAT_BADLEN);
606 CARP_LOG(sc, ("packet size %u too small", len));
607 return;
608 }
609
610 /* verify the CARP checksum */
611 if (carp6_cksum(m, off, sizeof(*ch))) {
612 CARP_STATINC(CARP_STAT_BADSUM);
613 CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname));
614 m_freem(m);
615 return;
616 }
617
618 carp_proto_input_c(m, ch, AF_INET6);
619 return;
620 }
621
622 int
623 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
624 {
625
626 wqinput_input(carp6_wqinput, *mp, *offp, proto);
627
628 return IPPROTO_DONE;
629 }
630 #endif /* INET6 */
631
632 static void
633 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
634 {
635 struct carp_softc *sc;
636 u_int64_t tmp_counter;
637 struct timeval sc_tv, ch_tv;
638
639 TAILQ_FOREACH(sc, &((struct carp_if *)
640 m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list)
641 if (sc->sc_vhid == ch->carp_vhid)
642 break;
643
644 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
645 (IFF_UP|IFF_RUNNING)) {
646 CARP_STATINC(CARP_STAT_BADVHID);
647 m_freem(m);
648 return;
649 }
650
651 /*
652 * Check if our own advertisement was duplicated
653 * from a non simplex interface.
654 * XXX If there is no address on our physical interface
655 * there is no way to distinguish our ads from the ones
656 * another carp host might have sent us.
657 */
658 if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) {
659 struct sockaddr sa;
660 struct ifaddr *ifa;
661 int s;
662
663 memset(&sa, 0, sizeof(sa));
664 sa.sa_family = af;
665 s = pserialize_read_enter();
666 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
667
668 if (ifa && af == AF_INET) {
669 struct ip *ip = mtod(m, struct ip *);
670 if (ip->ip_src.s_addr ==
671 ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
672 pserialize_read_exit(s);
673 m_freem(m);
674 return;
675 }
676 }
677 #ifdef INET6
678 if (ifa && af == AF_INET6) {
679 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
680 struct in6_addr in6_src, in6_found;
681
682 in6_src = ip6->ip6_src;
683 in6_found = ifatoia6(ifa)->ia_addr.sin6_addr;
684 if (IN6_IS_ADDR_LINKLOCAL(&in6_src))
685 in6_src.s6_addr16[1] = 0;
686 if (IN6_IS_ADDR_LINKLOCAL(&in6_found))
687 in6_found.s6_addr16[1] = 0;
688 if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) {
689 pserialize_read_exit(s);
690 m_freem(m);
691 return;
692 }
693 }
694 #endif /* INET6 */
695 pserialize_read_exit(s);
696 }
697
698 nanotime(&sc->sc_if.if_lastchange);
699 sc->sc_if.if_ipackets++;
700 sc->sc_if.if_ibytes += m->m_pkthdr.len;
701
702 /* verify the CARP version. */
703 if (ch->carp_version != CARP_VERSION) {
704 CARP_STATINC(CARP_STAT_BADVER);
705 sc->sc_if.if_ierrors++;
706 CARP_LOG(sc, ("invalid version %d != %d",
707 ch->carp_version, CARP_VERSION));
708 m_freem(m);
709 return;
710 }
711
712 /* verify the hash */
713 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
714 struct ip *ip;
715 char ipbuf[INET_ADDRSTRLEN];
716 #ifdef INET6
717 struct ip6_hdr *ip6;
718 char ip6buf[INET6_ADDRSTRLEN];
719 #endif
720
721 CARP_STATINC(CARP_STAT_BADAUTH);
722 sc->sc_if.if_ierrors++;
723
724 switch(af) {
725 case AF_INET:
726 ip = mtod(m, struct ip *);
727 CARP_LOG(sc, ("incorrect hash from %s",
728 IN_PRINT(ipbuf, &ip->ip_src)));
729 break;
730
731 #ifdef INET6
732 case AF_INET6:
733 ip6 = mtod(m, struct ip6_hdr *);
734 CARP_LOG(sc, ("incorrect hash from %s",
735 IN6_PRINT(ip6buf, &ip6->ip6_src)));
736 break;
737 #endif
738
739 default: CARP_LOG(sc, ("incorrect hash"));
740 break;
741 }
742 m_freem(m);
743 return;
744 }
745
746 tmp_counter = ntohl(ch->carp_counter[0]);
747 tmp_counter = tmp_counter<<32;
748 tmp_counter += ntohl(ch->carp_counter[1]);
749
750 /* XXX Replay protection goes here */
751
752 sc->sc_init_counter = 0;
753 sc->sc_counter = tmp_counter;
754
755
756 sc_tv.tv_sec = sc->sc_advbase;
757 if (carp_suppress_preempt && sc->sc_advskew < 240)
758 sc_tv.tv_usec = 240 * 1000000 / 256;
759 else
760 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
761 ch_tv.tv_sec = ch->carp_advbase;
762 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
763
764 switch (sc->sc_state) {
765 case INIT:
766 break;
767 case MASTER:
768 /*
769 * If we receive an advertisement from a backup who's going to
770 * be more frequent than us, go into BACKUP state.
771 */
772 if (timercmp(&sc_tv, &ch_tv, >) ||
773 timercmp(&sc_tv, &ch_tv, ==)) {
774 callout_stop(&sc->sc_ad_tmo);
775 CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)"));
776 carp_set_state(sc, BACKUP);
777 carp_setrun(sc, 0);
778 carp_setroute(sc, RTM_DELETE);
779 }
780 break;
781 case BACKUP:
782 /*
783 * If we're pre-empting masters who advertise slower than us,
784 * and this one claims to be slower, treat him as down.
785 */
786 if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
787 CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)"));
788 carp_master_down(sc);
789 break;
790 }
791
792 /*
793 * If the master is going to advertise at such a low frequency
794 * that he's guaranteed to time out, we'd might as well just
795 * treat him as timed out now.
796 */
797 sc_tv.tv_sec = sc->sc_advbase * 3;
798 if (timercmp(&sc_tv, &ch_tv, <)) {
799 CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)"));
800 carp_master_down(sc);
801 break;
802 }
803
804 /*
805 * Otherwise, we reset the counter and wait for the next
806 * advertisement.
807 */
808 carp_setrun(sc, af);
809 break;
810 }
811
812 m_freem(m);
813 return;
814 }
815
816 /*
817 * Interface side of the CARP implementation.
818 */
819
820 /* ARGSUSED */
821 void
822 carpattach(int n)
823 {
824 if_clone_attach(&carp_cloner);
825
826 carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS);
827 }
828
829 static int
830 carp_clone_create(struct if_clone *ifc, int unit)
831 {
832 extern int ifqmaxlen;
833 struct carp_softc *sc;
834 struct ifnet *ifp;
835
836 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
837 if (!sc)
838 return (ENOMEM);
839
840 sc->sc_suppress = 0;
841 sc->sc_advbase = CARP_DFLTINTV;
842 sc->sc_vhid = -1; /* required setting */
843 sc->sc_advskew = 0;
844 sc->sc_init_counter = 1;
845 sc->sc_naddrs = sc->sc_naddrs6 = 0;
846 #ifdef INET6
847 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
848 #endif /* INET6 */
849
850 callout_init(&sc->sc_ad_tmo, 0);
851 callout_init(&sc->sc_md_tmo, 0);
852 callout_init(&sc->sc_md6_tmo, 0);
853
854 callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc);
855 callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc);
856 callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc);
857
858 LIST_INIT(&sc->carp_mc_listhead);
859 ifp = &sc->sc_if;
860 ifp->if_softc = sc;
861 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
862 unit);
863 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
864 ifp->if_ioctl = carp_ioctl;
865 ifp->if_start = carp_start;
866 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
867 IFQ_SET_READY(&ifp->if_snd);
868 if_initialize(ifp);
869 ether_ifattach(ifp, NULL);
870 carp_set_enaddr(sc);
871 /* Overwrite ethernet defaults */
872 ifp->if_type = IFT_CARP;
873 ifp->if_output = carp_output;
874 ifp->if_extflags &= ~IFEF_OUTPUT_MPSAFE;
875 if_register(ifp);
876
877 return (0);
878 }
879
880 static int
881 carp_clone_destroy(struct ifnet *ifp)
882 {
883 struct carp_softc *sc = ifp->if_softc;
884
885 carpdetach(ifp->if_softc);
886 ether_ifdetach(ifp);
887 if_detach(ifp);
888 callout_destroy(&sc->sc_ad_tmo);
889 callout_destroy(&sc->sc_md_tmo);
890 callout_destroy(&sc->sc_md6_tmo);
891 free(ifp->if_softc, M_DEVBUF);
892
893 return (0);
894 }
895
896 static void
897 carpdetach(struct carp_softc *sc)
898 {
899 struct carp_if *cif;
900 int s;
901
902 callout_stop(&sc->sc_ad_tmo);
903 callout_stop(&sc->sc_md_tmo);
904 callout_stop(&sc->sc_md6_tmo);
905
906 if (sc->sc_suppress)
907 carp_suppress_preempt--;
908 sc->sc_suppress = 0;
909
910 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
911 carp_suppress_preempt--;
912 sc->sc_sendad_errors = 0;
913
914 carp_set_state(sc, INIT);
915 sc->sc_if.if_flags &= ~IFF_UP;
916 carp_setrun(sc, 0);
917 carp_multicast_cleanup(sc);
918
919 KERNEL_LOCK(1, NULL);
920 s = splnet();
921 if (sc->sc_carpdev != NULL) {
922 /* XXX linkstatehook removal */
923 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
924 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
925 if (!--cif->vhif_nvrs) {
926 ifpromisc(sc->sc_carpdev, 0);
927 sc->sc_carpdev->if_carp = NULL;
928 free(cif, M_IFADDR);
929 }
930 }
931 sc->sc_carpdev = NULL;
932 splx(s);
933 KERNEL_UNLOCK_ONE(NULL);
934 }
935
936 /* Detach an interface from the carp. */
937 void
938 carp_ifdetach(struct ifnet *ifp)
939 {
940 struct carp_softc *sc, *nextsc;
941 struct carp_if *cif = (struct carp_if *)ifp->if_carp;
942
943 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
944 nextsc = TAILQ_NEXT(sc, sc_list);
945 carpdetach(sc);
946 }
947 }
948
949 static int
950 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc,
951 struct carp_header *ch)
952 {
953 if (sc->sc_init_counter) {
954 /* this could also be seconds since unix epoch */
955 sc->sc_counter = cprng_fast64();
956 } else
957 sc->sc_counter++;
958
959 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
960 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
961
962 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
963
964 return (0);
965 }
966
967 static void
968 carp_send_ad_all(void)
969 {
970 struct ifnet *ifp;
971 struct carp_if *cif;
972 struct carp_softc *vh;
973 int s;
974 int bound = curlwp_bind();
975
976 s = pserialize_read_enter();
977 IFNET_READER_FOREACH(ifp) {
978 struct psref psref;
979 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
980 continue;
981
982 if_acquire(ifp, &psref);
983 pserialize_read_exit(s);
984
985 cif = (struct carp_if *)ifp->if_carp;
986 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
987 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
988 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER)
989 carp_send_ad(vh);
990 }
991
992 s = pserialize_read_enter();
993 if_release(ifp, &psref);
994 }
995 pserialize_read_exit(s);
996 curlwp_bindx(bound);
997 }
998
999
1000 static void
1001 carp_send_ad(void *v)
1002 {
1003 struct carp_header ch;
1004 struct timeval tv;
1005 struct carp_softc *sc = v;
1006 struct carp_header *ch_ptr;
1007 struct mbuf *m;
1008 int error, len, advbase, advskew, s;
1009 struct sockaddr sa;
1010
1011 KERNEL_LOCK(1, NULL);
1012 s = splsoftnet();
1013
1014 advbase = advskew = 0; /* Sssssh compiler */
1015 if (sc->sc_carpdev == NULL) {
1016 sc->sc_if.if_oerrors++;
1017 goto retry_later;
1018 }
1019
1020 /* bow out if we've gone to backup (the carp interface is going down) */
1021 if (sc->sc_bow_out) {
1022 sc->sc_bow_out = 0;
1023 advbase = 255;
1024 advskew = 255;
1025 } else {
1026 advbase = sc->sc_advbase;
1027 if (!carp_suppress_preempt || sc->sc_advskew > 240)
1028 advskew = sc->sc_advskew;
1029 else
1030 advskew = 240;
1031 tv.tv_sec = advbase;
1032 tv.tv_usec = advskew * 1000000 / 256;
1033 }
1034
1035 ch.carp_version = CARP_VERSION;
1036 ch.carp_type = CARP_ADVERTISEMENT;
1037 ch.carp_vhid = sc->sc_vhid;
1038 ch.carp_advbase = advbase;
1039 ch.carp_advskew = advskew;
1040 ch.carp_authlen = 7; /* XXX DEFINE */
1041 ch.carp_pad1 = 0; /* must be zero */
1042 ch.carp_cksum = 0;
1043
1044
1045 #ifdef INET
1046 if (sc->sc_naddrs) {
1047 struct ip *ip;
1048 struct ifaddr *ifa;
1049 int _s;
1050
1051 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1052 if (m == NULL) {
1053 sc->sc_if.if_oerrors++;
1054 CARP_STATINC(CARP_STAT_ONOMEM);
1055 /* XXX maybe less ? */
1056 goto retry_later;
1057 }
1058 MCLAIM(m, &carp_proto_mowner_tx);
1059 len = sizeof(*ip) + sizeof(ch);
1060 m->m_pkthdr.len = len;
1061 m_reset_rcvif(m);
1062 m->m_len = len;
1063 MH_ALIGN(m, m->m_len);
1064 m->m_flags |= M_MCAST;
1065 ip = mtod(m, struct ip *);
1066 ip->ip_v = IPVERSION;
1067 ip->ip_hl = sizeof(*ip) >> 2;
1068 ip->ip_tos = IPTOS_LOWDELAY;
1069 ip->ip_len = htons(len);
1070 ip->ip_id = 0; /* no need for id, we don't support fragments */
1071 ip->ip_off = htons(IP_DF);
1072 ip->ip_ttl = CARP_DFLTTL;
1073 ip->ip_p = IPPROTO_CARP;
1074 ip->ip_sum = 0;
1075
1076 memset(&sa, 0, sizeof(sa));
1077 sa.sa_family = AF_INET;
1078 _s = pserialize_read_enter();
1079 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1080 if (ifa == NULL)
1081 ip->ip_src.s_addr = 0;
1082 else
1083 ip->ip_src.s_addr =
1084 ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1085 pserialize_read_exit(_s);
1086 ip->ip_dst.s_addr = INADDR_CARP_GROUP;
1087
1088 ch_ptr = (struct carp_header *)(&ip[1]);
1089 memcpy(ch_ptr, &ch, sizeof(ch));
1090 if (carp_prepare_ad(m, sc, ch_ptr))
1091 goto retry_later;
1092
1093 m->m_data += sizeof(*ip);
1094 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1095 m->m_data -= sizeof(*ip);
1096
1097 nanotime(&sc->sc_if.if_lastchange);
1098 sc->sc_if.if_opackets++;
1099 sc->sc_if.if_obytes += len;
1100 CARP_STATINC(CARP_STAT_OPACKETS);
1101
1102 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1103 NULL);
1104 if (error) {
1105 if (error == ENOBUFS)
1106 CARP_STATINC(CARP_STAT_ONOMEM);
1107 else
1108 CARP_LOG(sc, ("ip_output failed: %d", error));
1109 sc->sc_if.if_oerrors++;
1110 if (sc->sc_sendad_errors < INT_MAX)
1111 sc->sc_sendad_errors++;
1112 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1113 carp_suppress_preempt++;
1114 if (carp_suppress_preempt == 1)
1115 carp_send_ad_all();
1116 }
1117 sc->sc_sendad_success = 0;
1118 } else {
1119 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1120 if (++sc->sc_sendad_success >=
1121 CARP_SENDAD_MIN_SUCCESS) {
1122 carp_suppress_preempt--;
1123 sc->sc_sendad_errors = 0;
1124 }
1125 } else
1126 sc->sc_sendad_errors = 0;
1127 }
1128 }
1129 #endif /* INET */
1130 #ifdef INET6
1131 if (sc->sc_naddrs6) {
1132 struct ip6_hdr *ip6;
1133 struct ifaddr *ifa;
1134 int _s;
1135
1136 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1137 if (m == NULL) {
1138 sc->sc_if.if_oerrors++;
1139 CARP_STATINC(CARP_STAT_ONOMEM);
1140 /* XXX maybe less ? */
1141 goto retry_later;
1142 }
1143 MCLAIM(m, &carp_proto6_mowner_tx);
1144 len = sizeof(*ip6) + sizeof(ch);
1145 m->m_pkthdr.len = len;
1146 m_reset_rcvif(m);
1147 m->m_len = len;
1148 MH_ALIGN(m, m->m_len);
1149 m->m_flags |= M_MCAST;
1150 ip6 = mtod(m, struct ip6_hdr *);
1151 memset(ip6, 0, sizeof(*ip6));
1152 ip6->ip6_vfc |= IPV6_VERSION;
1153 ip6->ip6_hlim = CARP_DFLTTL;
1154 ip6->ip6_nxt = IPPROTO_CARP;
1155
1156 /* set the source address */
1157 memset(&sa, 0, sizeof(sa));
1158 sa.sa_family = AF_INET6;
1159 _s = pserialize_read_enter();
1160 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1161 if (ifa == NULL) /* This should never happen with IPv6 */
1162 memset(&ip6->ip6_src, 0, sizeof(struct in6_addr));
1163 else
1164 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1165 &ip6->ip6_src, sizeof(struct in6_addr));
1166 pserialize_read_exit(_s);
1167 /* set the multicast destination */
1168
1169 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1170 ip6->ip6_dst.s6_addr8[15] = 0x12;
1171 if (in6_setscope(&ip6->ip6_dst, &sc->sc_if, NULL) != 0) {
1172 sc->sc_if.if_oerrors++;
1173 m_freem(m);
1174 CARP_LOG(sc, ("in6_setscope failed"));
1175 goto retry_later;
1176 }
1177
1178 ch_ptr = (struct carp_header *)(&ip6[1]);
1179 memcpy(ch_ptr, &ch, sizeof(ch));
1180 if (carp_prepare_ad(m, sc, ch_ptr))
1181 goto retry_later;
1182
1183 ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6),
1184 len - sizeof(*ip6));
1185
1186 nanotime(&sc->sc_if.if_lastchange);
1187 sc->sc_if.if_opackets++;
1188 sc->sc_if.if_obytes += len;
1189 CARP_STATINC(CARP_STAT_OPACKETS6);
1190
1191 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1192 if (error) {
1193 if (error == ENOBUFS)
1194 CARP_STATINC(CARP_STAT_ONOMEM);
1195 else
1196 CARP_LOG(sc, ("ip6_output failed: %d", error));
1197 sc->sc_if.if_oerrors++;
1198 if (sc->sc_sendad_errors < INT_MAX)
1199 sc->sc_sendad_errors++;
1200 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1201 carp_suppress_preempt++;
1202 if (carp_suppress_preempt == 1)
1203 carp_send_ad_all();
1204 }
1205 sc->sc_sendad_success = 0;
1206 } else {
1207 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1208 if (++sc->sc_sendad_success >=
1209 CARP_SENDAD_MIN_SUCCESS) {
1210 carp_suppress_preempt--;
1211 sc->sc_sendad_errors = 0;
1212 }
1213 } else
1214 sc->sc_sendad_errors = 0;
1215 }
1216 }
1217 #endif /* INET6 */
1218
1219 retry_later:
1220 splx(s);
1221 KERNEL_UNLOCK_ONE(NULL);
1222 if (advbase != 255 || advskew != 255)
1223 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1224 }
1225
1226 /*
1227 * Broadcast a gratuitous ARP request containing
1228 * the virtual router MAC address for each IP address
1229 * associated with the virtual router.
1230 */
1231 static void
1232 carp_send_arp(struct carp_softc *sc)
1233 {
1234 struct ifaddr *ifa;
1235 int s;
1236
1237 KERNEL_LOCK(1, NULL);
1238 s = splsoftnet();
1239 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1240
1241 if (ifa->ifa_addr->sa_family != AF_INET)
1242 continue;
1243
1244 arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl));
1245 }
1246 splx(s);
1247 KERNEL_UNLOCK_ONE(NULL);
1248 }
1249
1250 #ifdef INET6
1251 static void
1252 carp_send_na(struct carp_softc *sc)
1253 {
1254 struct ifaddr *ifa;
1255 struct in6_addr *in6;
1256 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1257 int s;
1258
1259 KERNEL_LOCK(1, NULL);
1260 s = splsoftnet();
1261
1262 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1263
1264 if (ifa->ifa_addr->sa_family != AF_INET6)
1265 continue;
1266
1267 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1268 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1269 ND_NA_FLAG_OVERRIDE, 1, NULL);
1270 }
1271 splx(s);
1272 KERNEL_UNLOCK_ONE(NULL);
1273 }
1274 #endif /* INET6 */
1275
1276 /*
1277 * Based on bridge_hash() in if_bridge.c
1278 */
1279 #define mix(a,b,c) \
1280 do { \
1281 a -= b; a -= c; a ^= (c >> 13); \
1282 b -= c; b -= a; b ^= (a << 8); \
1283 c -= a; c -= b; c ^= (b >> 13); \
1284 a -= b; a -= c; a ^= (c >> 12); \
1285 b -= c; b -= a; b ^= (a << 16); \
1286 c -= a; c -= b; c ^= (b >> 5); \
1287 a -= b; a -= c; a ^= (c >> 3); \
1288 b -= c; b -= a; b ^= (a << 10); \
1289 c -= a; c -= b; c ^= (b >> 15); \
1290 } while (0)
1291
1292 static u_int32_t
1293 carp_hash(struct carp_softc *sc, u_char *src)
1294 {
1295 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1296
1297 c += sc->sc_key[3] << 24;
1298 c += sc->sc_key[2] << 16;
1299 c += sc->sc_key[1] << 8;
1300 c += sc->sc_key[0];
1301 b += src[5] << 8;
1302 b += src[4];
1303 a += src[3] << 24;
1304 a += src[2] << 16;
1305 a += src[1] << 8;
1306 a += src[0];
1307
1308 mix(a, b, c);
1309 return (c);
1310 }
1311
1312 static int
1313 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
1314 {
1315 struct carp_softc *vh;
1316 struct ifaddr *ifa;
1317 int count = 0;
1318
1319 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1320 if ((type == CARP_COUNT_RUNNING &&
1321 (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1322 (IFF_UP|IFF_RUNNING)) ||
1323 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1324 IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1325 if (ifa->ifa_addr->sa_family == AF_INET &&
1326 ia->ia_addr.sin_addr.s_addr ==
1327 ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1328 count++;
1329 }
1330 }
1331 }
1332 return (count);
1333 }
1334
1335 int
1336 carp_iamatch(struct in_ifaddr *ia, u_char *src,
1337 u_int32_t *count, u_int32_t index)
1338 {
1339 struct carp_softc *sc = ia->ia_ifp->if_softc;
1340
1341 if (carp_opts[CARPCTL_ARPBALANCE]) {
1342 /*
1343 * We use the source ip to decide which virtual host should
1344 * handle the request. If we're master of that virtual host,
1345 * then we respond, otherwise, just drop the arp packet on
1346 * the floor.
1347 */
1348
1349 /* Count the elegible carp interfaces with this address */
1350 if (*count == 0)
1351 *count = carp_addrcount(
1352 (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp,
1353 ia, CARP_COUNT_RUNNING);
1354
1355 /* This should never happen, but... */
1356 if (*count == 0)
1357 return (0);
1358
1359 if (carp_hash(sc, src) % *count == index - 1 &&
1360 sc->sc_state == MASTER) {
1361 return (1);
1362 }
1363 } else {
1364 if (sc->sc_state == MASTER)
1365 return (1);
1366 }
1367
1368 return (0);
1369 }
1370
1371 #ifdef INET6
1372 struct ifaddr *
1373 carp_iamatch6(void *v, struct in6_addr *taddr)
1374 {
1375 struct carp_if *cif = v;
1376 struct carp_softc *vh;
1377 struct ifaddr *ifa;
1378
1379 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1380 IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1381 if (IN6_ARE_ADDR_EQUAL(taddr,
1382 &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1383 ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1384 (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER)
1385 return (ifa);
1386 }
1387 }
1388
1389 return (NULL);
1390 }
1391 #endif /* INET6 */
1392
1393 struct ifnet *
1394 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src)
1395 {
1396 struct carp_if *cif = (struct carp_if *)v;
1397 struct carp_softc *vh;
1398 u_int8_t *ena;
1399
1400 if (src)
1401 ena = (u_int8_t *)&eh->ether_shost;
1402 else
1403 ena = (u_int8_t *)&eh->ether_dhost;
1404
1405 switch (iftype) {
1406 case IFT_ETHER:
1407 case IFT_FDDI:
1408 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1409 return (NULL);
1410 break;
1411 case IFT_ISO88025:
1412 if (ena[0] != 3 || ena[1] || ena[4] || ena[5])
1413 return (NULL);
1414 break;
1415 default:
1416 return (NULL);
1417 break;
1418 }
1419
1420 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
1421 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1422 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
1423 !memcmp(ena, CLLADDR(vh->sc_if.if_sadl),
1424 ETHER_ADDR_LEN)) {
1425 return (&vh->sc_if);
1426 }
1427
1428 return (NULL);
1429 }
1430
1431 int
1432 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1433 {
1434 struct ether_header eh;
1435 struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp;
1436 struct ifnet *ifp;
1437
1438 memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost));
1439 memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost));
1440 eh.ether_type = etype;
1441
1442 if (m->m_flags & (M_BCAST|M_MCAST)) {
1443 struct carp_softc *vh;
1444 struct mbuf *m0;
1445
1446 /*
1447 * XXX Should really check the list of multicast addresses
1448 * for each CARP interface _before_ copying.
1449 */
1450 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1451 m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1452 if (m0 == NULL)
1453 continue;
1454 m_set_rcvif(m0, &vh->sc_if);
1455 ether_input(&vh->sc_if, m0);
1456 }
1457 return (1);
1458 }
1459
1460 ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0);
1461 if (ifp == NULL) {
1462 return (1);
1463 }
1464
1465 m_set_rcvif(m, ifp);
1466
1467 bpf_mtap(ifp, m);
1468 ifp->if_ipackets++;
1469 ether_input(ifp, m);
1470 return (0);
1471 }
1472
1473 static void
1474 carp_master_down(void *v)
1475 {
1476 struct carp_softc *sc = v;
1477
1478 switch (sc->sc_state) {
1479 case INIT:
1480 printf("%s: master_down event in INIT state\n",
1481 sc->sc_if.if_xname);
1482 break;
1483 case MASTER:
1484 break;
1485 case BACKUP:
1486 CARP_LOG(sc, ("INIT -> MASTER (preempting)"));
1487 carp_set_state(sc, MASTER);
1488 carp_send_ad(sc);
1489 carp_send_arp(sc);
1490 #ifdef INET6
1491 carp_send_na(sc);
1492 #endif /* INET6 */
1493 carp_setrun(sc, 0);
1494 carp_setroute(sc, RTM_ADD);
1495 break;
1496 }
1497 }
1498
1499 /*
1500 * When in backup state, af indicates whether to reset the master down timer
1501 * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1502 */
1503 static void
1504 carp_setrun(struct carp_softc *sc, sa_family_t af)
1505 {
1506 struct timeval tv;
1507
1508 if (sc->sc_carpdev == NULL) {
1509 sc->sc_if.if_flags &= ~IFF_RUNNING;
1510 carp_set_state(sc, INIT);
1511 return;
1512 }
1513
1514 if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 &&
1515 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1516 sc->sc_if.if_flags |= IFF_RUNNING;
1517 } else {
1518 sc->sc_if.if_flags &= ~IFF_RUNNING;
1519 carp_setroute(sc, RTM_DELETE);
1520 return;
1521 }
1522
1523 switch (sc->sc_state) {
1524 case INIT:
1525 carp_set_state(sc, BACKUP);
1526 carp_setroute(sc, RTM_DELETE);
1527 carp_setrun(sc, 0);
1528 break;
1529 case BACKUP:
1530 callout_stop(&sc->sc_ad_tmo);
1531 tv.tv_sec = 3 * sc->sc_advbase;
1532 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1533 switch (af) {
1534 #ifdef INET
1535 case AF_INET:
1536 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1537 break;
1538 #endif /* INET */
1539 #ifdef INET6
1540 case AF_INET6:
1541 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1542 break;
1543 #endif /* INET6 */
1544 default:
1545 if (sc->sc_naddrs)
1546 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1547 #ifdef INET6
1548 if (sc->sc_naddrs6)
1549 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1550 #endif /* INET6 */
1551 break;
1552 }
1553 break;
1554 case MASTER:
1555 tv.tv_sec = sc->sc_advbase;
1556 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1557 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1558 break;
1559 }
1560 }
1561
1562 static void
1563 carp_multicast_cleanup(struct carp_softc *sc)
1564 {
1565 struct ip_moptions *imo = &sc->sc_imo;
1566 #ifdef INET6
1567 struct ip6_moptions *im6o = &sc->sc_im6o;
1568 #endif
1569 u_int16_t n = imo->imo_num_memberships;
1570
1571 /* Clean up our own multicast memberships */
1572 while (n-- > 0) {
1573 if (imo->imo_membership[n] != NULL) {
1574 in_delmulti(imo->imo_membership[n]);
1575 imo->imo_membership[n] = NULL;
1576 }
1577 }
1578 imo->imo_num_memberships = 0;
1579 imo->imo_multicast_if_index = 0;
1580
1581 #ifdef INET6
1582 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1583 struct in6_multi_mship *imm =
1584 LIST_FIRST(&im6o->im6o_memberships);
1585
1586 LIST_REMOVE(imm, i6mm_chain);
1587 in6_leavegroup(imm);
1588 }
1589 im6o->im6o_multicast_if_index = 0;
1590 #endif
1591
1592 /* And any other multicast memberships */
1593 carp_ether_purgemulti(sc);
1594 }
1595
1596 static int
1597 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1598 {
1599 struct carp_if *cif, *ncif = NULL;
1600 struct carp_softc *vr, *after = NULL;
1601 int myself = 0, error = 0;
1602 int s;
1603
1604 if (ifp == sc->sc_carpdev)
1605 return (0);
1606
1607 if (ifp != NULL) {
1608 if ((ifp->if_flags & IFF_MULTICAST) == 0)
1609 return (EADDRNOTAVAIL);
1610
1611 if (ifp->if_type == IFT_CARP)
1612 return (EINVAL);
1613
1614 if (ifp->if_carp == NULL) {
1615 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT);
1616 if (ncif == NULL)
1617 return (ENOBUFS);
1618 if ((error = ifpromisc(ifp, 1))) {
1619 free(ncif, M_IFADDR);
1620 return (error);
1621 }
1622
1623 ncif->vhif_ifp = ifp;
1624 TAILQ_INIT(&ncif->vhif_vrs);
1625 } else {
1626 cif = (struct carp_if *)ifp->if_carp;
1627 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1628 if (vr != sc && vr->sc_vhid == sc->sc_vhid)
1629 return (EINVAL);
1630 }
1631
1632 /* detach from old interface */
1633 if (sc->sc_carpdev != NULL)
1634 carpdetach(sc);
1635
1636 /* join multicast groups */
1637 if (sc->sc_naddrs < 0 &&
1638 (error = carp_join_multicast(sc)) != 0) {
1639 if (ncif != NULL)
1640 free(ncif, M_IFADDR);
1641 return (error);
1642 }
1643
1644 #ifdef INET6
1645 if (sc->sc_naddrs6 < 0 &&
1646 (error = carp_join_multicast6(sc)) != 0) {
1647 if (ncif != NULL)
1648 free(ncif, M_IFADDR);
1649 carp_multicast_cleanup(sc);
1650 return (error);
1651 }
1652 #endif
1653
1654 /* attach carp interface to physical interface */
1655 if (ncif != NULL)
1656 ifp->if_carp = (void *)ncif;
1657 sc->sc_carpdev = ifp;
1658 sc->sc_if.if_capabilities = ifp->if_capabilities &
1659 (IFCAP_TSOv4 | IFCAP_TSOv6 |
1660 IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx|
1661 IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx|
1662 IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx|
1663 IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx|
1664 IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx);
1665
1666 cif = (struct carp_if *)ifp->if_carp;
1667 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1668 if (vr == sc)
1669 myself = 1;
1670 if (vr->sc_vhid < sc->sc_vhid)
1671 after = vr;
1672 }
1673
1674 if (!myself) {
1675 /* We're trying to keep things in order */
1676 if (after == NULL) {
1677 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1678 } else {
1679 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1680 sc, sc_list);
1681 }
1682 cif->vhif_nvrs++;
1683 }
1684 if (sc->sc_naddrs || sc->sc_naddrs6)
1685 sc->sc_if.if_flags |= IFF_UP;
1686 carp_set_enaddr(sc);
1687 KERNEL_LOCK(1, NULL);
1688 s = splnet();
1689 /* XXX linkstatehooks establish */
1690 carp_carpdev_state(ifp);
1691 splx(s);
1692 KERNEL_UNLOCK_ONE(NULL);
1693 } else {
1694 carpdetach(sc);
1695 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1696 }
1697 return (0);
1698 }
1699
1700 static void
1701 carp_set_enaddr(struct carp_softc *sc)
1702 {
1703 uint8_t enaddr[ETHER_ADDR_LEN];
1704 if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) {
1705 enaddr[0] = 3;
1706 enaddr[1] = 0;
1707 enaddr[2] = 0x40 >> (sc->sc_vhid - 1);
1708 enaddr[3] = 0x40000 >> (sc->sc_vhid - 1);
1709 enaddr[4] = 0;
1710 enaddr[5] = 0;
1711 } else {
1712 enaddr[0] = 0;
1713 enaddr[1] = 0;
1714 enaddr[2] = 0x5e;
1715 enaddr[3] = 0;
1716 enaddr[4] = 1;
1717 enaddr[5] = sc->sc_vhid;
1718 }
1719 if_set_sadl(&sc->sc_if, enaddr, sizeof(enaddr), false);
1720 }
1721
1722 #if 0
1723 static void
1724 carp_addr_updated(void *v)
1725 {
1726 struct carp_softc *sc = (struct carp_softc *) v;
1727 struct ifaddr *ifa;
1728 int new_naddrs = 0, new_naddrs6 = 0;
1729
1730 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1731 if (ifa->ifa_addr->sa_family == AF_INET)
1732 new_naddrs++;
1733 else if (ifa->ifa_addr->sa_family == AF_INET6)
1734 new_naddrs6++;
1735 }
1736
1737 /* Handle a callback after SIOCDIFADDR */
1738 if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
1739 struct in_addr mc_addr;
1740
1741 sc->sc_naddrs = new_naddrs;
1742 sc->sc_naddrs6 = new_naddrs6;
1743
1744 /* Re-establish multicast membership removed by in_control */
1745 mc_addr.s_addr = INADDR_CARP_GROUP;
1746 if (!in_multi_group(mc_addr, &sc->sc_if, 0)) {
1747 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1748
1749 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1750 carp_join_multicast(sc);
1751 }
1752
1753 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1754 sc->sc_if.if_flags &= ~IFF_UP;
1755 carp_set_state(sc, INIT);
1756 } else
1757 carp_hmac_prepare(sc);
1758 }
1759
1760 carp_setrun(sc, 0);
1761 }
1762 #endif
1763
1764 static int
1765 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1766 {
1767 struct ifnet *ifp = sc->sc_carpdev;
1768 struct in_ifaddr *ia, *ia_if;
1769 int error = 0;
1770 int s;
1771
1772 if (sin->sin_addr.s_addr == 0) {
1773 if (!(sc->sc_if.if_flags & IFF_UP))
1774 carp_set_state(sc, INIT);
1775 if (sc->sc_naddrs)
1776 sc->sc_if.if_flags |= IFF_UP;
1777 carp_setrun(sc, 0);
1778 return (0);
1779 }
1780
1781 /* we have to do this by hand to ensure we don't match on ourselves */
1782 ia_if = NULL;
1783 s = pserialize_read_enter();
1784 IN_ADDRLIST_READER_FOREACH(ia) {
1785 /* and, yeah, we need a multicast-capable iface too */
1786 if (ia->ia_ifp != &sc->sc_if &&
1787 ia->ia_ifp->if_type != IFT_CARP &&
1788 (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1789 (sin->sin_addr.s_addr & ia->ia_subnetmask) ==
1790 ia->ia_subnet) {
1791 if (!ia_if)
1792 ia_if = ia;
1793 }
1794 }
1795
1796 if (ia_if) {
1797 ia = ia_if;
1798 if (ifp) {
1799 if (ifp != ia->ia_ifp)
1800 return (EADDRNOTAVAIL);
1801 } else {
1802 /* FIXME NOMPSAFE */
1803 ifp = ia->ia_ifp;
1804 }
1805 }
1806 pserialize_read_exit(s);
1807
1808 if ((error = carp_set_ifp(sc, ifp)))
1809 return (error);
1810
1811 if (sc->sc_carpdev == NULL)
1812 return (EADDRNOTAVAIL);
1813
1814 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1815 return (error);
1816
1817 sc->sc_naddrs++;
1818 if (sc->sc_carpdev != NULL)
1819 sc->sc_if.if_flags |= IFF_UP;
1820
1821 carp_set_state(sc, INIT);
1822 carp_setrun(sc, 0);
1823
1824 /*
1825 * Hook if_addrhooks so that we get a callback after in_ifinit has run,
1826 * to correct any inappropriate routes that it inserted.
1827 */
1828 if (sc->ah_cookie == 0) {
1829 /* XXX link address hook */
1830 }
1831
1832 return (0);
1833 }
1834
1835 static int
1836 carp_join_multicast(struct carp_softc *sc)
1837 {
1838 struct ip_moptions *imo = &sc->sc_imo, tmpimo;
1839 struct in_addr addr;
1840
1841 memset(&tmpimo, 0, sizeof(tmpimo));
1842 addr.s_addr = INADDR_CARP_GROUP;
1843 if ((tmpimo.imo_membership[0] =
1844 in_addmulti(&addr, &sc->sc_if)) == NULL) {
1845 return (ENOBUFS);
1846 }
1847
1848 imo->imo_membership[0] = tmpimo.imo_membership[0];
1849 imo->imo_num_memberships = 1;
1850 imo->imo_multicast_if_index = sc->sc_if.if_index;
1851 imo->imo_multicast_ttl = CARP_DFLTTL;
1852 imo->imo_multicast_loop = 0;
1853 return (0);
1854 }
1855
1856
1857 #ifdef INET6
1858 static int
1859 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1860 {
1861 struct ifnet *ifp = sc->sc_carpdev;
1862 struct in6_ifaddr *ia, *ia_if;
1863 int error = 0;
1864 int s;
1865
1866 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1867 if (!(sc->sc_if.if_flags & IFF_UP))
1868 carp_set_state(sc, INIT);
1869 if (sc->sc_naddrs6)
1870 sc->sc_if.if_flags |= IFF_UP;
1871 carp_setrun(sc, 0);
1872 return (0);
1873 }
1874
1875 /* we have to do this by hand to ensure we don't match on ourselves */
1876 ia_if = NULL;
1877 s = pserialize_read_enter();
1878 IN6_ADDRLIST_READER_FOREACH(ia) {
1879 int i;
1880
1881 for (i = 0; i < 4; i++) {
1882 if ((sin6->sin6_addr.s6_addr32[i] &
1883 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1884 (ia->ia_addr.sin6_addr.s6_addr32[i] &
1885 ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1886 break;
1887 }
1888 /* and, yeah, we need a multicast-capable iface too */
1889 if (ia->ia_ifp != &sc->sc_if &&
1890 ia->ia_ifp->if_type != IFT_CARP &&
1891 (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1892 (i == 4)) {
1893 if (!ia_if)
1894 ia_if = ia;
1895 }
1896 }
1897 pserialize_read_exit(s);
1898
1899 if (ia_if) {
1900 ia = ia_if;
1901 if (sc->sc_carpdev) {
1902 if (sc->sc_carpdev != ia->ia_ifp)
1903 return (EADDRNOTAVAIL);
1904 } else {
1905 ifp = ia->ia_ifp;
1906 }
1907 }
1908
1909 if ((error = carp_set_ifp(sc, ifp)))
1910 return (error);
1911
1912 if (sc->sc_carpdev == NULL)
1913 return (EADDRNOTAVAIL);
1914
1915 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1916 return (error);
1917
1918 sc->sc_naddrs6++;
1919 if (sc->sc_carpdev != NULL)
1920 sc->sc_if.if_flags |= IFF_UP;
1921 carp_set_state(sc, INIT);
1922 carp_setrun(sc, 0);
1923
1924 return (0);
1925 }
1926
1927 static int
1928 carp_join_multicast6(struct carp_softc *sc)
1929 {
1930 struct in6_multi_mship *imm, *imm2;
1931 struct ip6_moptions *im6o = &sc->sc_im6o;
1932 struct sockaddr_in6 addr6;
1933 int error;
1934
1935 /* Join IPv6 CARP multicast group */
1936 memset(&addr6, 0, sizeof(addr6));
1937 addr6.sin6_family = AF_INET6;
1938 addr6.sin6_len = sizeof(addr6);
1939 addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1940 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1941 addr6.sin6_addr.s6_addr8[15] = 0x12;
1942 if ((imm = in6_joingroup(&sc->sc_if,
1943 &addr6.sin6_addr, &error, 0)) == NULL) {
1944 return (error);
1945 }
1946 /* join solicited multicast address */
1947 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1948 addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1949 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1950 addr6.sin6_addr.s6_addr32[1] = 0;
1951 addr6.sin6_addr.s6_addr32[2] = htonl(1);
1952 addr6.sin6_addr.s6_addr32[3] = 0;
1953 addr6.sin6_addr.s6_addr8[12] = 0xff;
1954 if ((imm2 = in6_joingroup(&sc->sc_if,
1955 &addr6.sin6_addr, &error, 0)) == NULL) {
1956 in6_leavegroup(imm);
1957 return (error);
1958 }
1959
1960 /* apply v6 multicast membership */
1961 im6o->im6o_multicast_if_index = sc->sc_if.if_index;
1962 if (imm)
1963 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
1964 i6mm_chain);
1965 if (imm2)
1966 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
1967 i6mm_chain);
1968
1969 return (0);
1970 }
1971
1972 #endif /* INET6 */
1973
1974 static int
1975 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1976 {
1977 struct lwp *l = curlwp; /* XXX */
1978 struct carp_softc *sc = ifp->if_softc, *vr;
1979 struct carpreq carpr;
1980 struct ifaddr *ifa;
1981 struct ifreq *ifr;
1982 struct ifnet *cdev = NULL;
1983 int error = 0;
1984
1985 ifa = (struct ifaddr *)data;
1986 ifr = (struct ifreq *)data;
1987
1988 switch (cmd) {
1989 case SIOCINITIFADDR:
1990 switch (ifa->ifa_addr->sa_family) {
1991 #ifdef INET
1992 case AF_INET:
1993 sc->sc_if.if_flags |= IFF_UP;
1994 memcpy(ifa->ifa_dstaddr, ifa->ifa_addr,
1995 sizeof(struct sockaddr));
1996 error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1997 break;
1998 #endif /* INET */
1999 #ifdef INET6
2000 case AF_INET6:
2001 sc->sc_if.if_flags|= IFF_UP;
2002 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
2003 break;
2004 #endif /* INET6 */
2005 default:
2006 error = EAFNOSUPPORT;
2007 break;
2008 }
2009 break;
2010
2011 case SIOCSIFFLAGS:
2012 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
2013 break;
2014 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2015 callout_stop(&sc->sc_ad_tmo);
2016 callout_stop(&sc->sc_md_tmo);
2017 callout_stop(&sc->sc_md6_tmo);
2018 if (sc->sc_state == MASTER) {
2019 /* we need the interface up to bow out */
2020 sc->sc_if.if_flags |= IFF_UP;
2021 sc->sc_bow_out = 1;
2022 carp_send_ad(sc);
2023 }
2024 sc->sc_if.if_flags &= ~IFF_UP;
2025 carp_set_state(sc, INIT);
2026 carp_setrun(sc, 0);
2027 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
2028 sc->sc_if.if_flags |= IFF_UP;
2029 carp_setrun(sc, 0);
2030 }
2031 break;
2032
2033 case SIOCSVH:
2034 if (l == NULL)
2035 break;
2036 if ((error = kauth_authorize_network(l->l_cred,
2037 KAUTH_NETWORK_INTERFACE,
2038 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2039 NULL)) != 0)
2040 break;
2041 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2042 break;
2043 error = 1;
2044 if (carpr.carpr_carpdev[0] != '\0' &&
2045 (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
2046 return (EINVAL);
2047 if ((error = carp_set_ifp(sc, cdev)))
2048 return (error);
2049 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
2050 switch (carpr.carpr_state) {
2051 case BACKUP:
2052 callout_stop(&sc->sc_ad_tmo);
2053 carp_set_state(sc, BACKUP);
2054 carp_setrun(sc, 0);
2055 carp_setroute(sc, RTM_DELETE);
2056 break;
2057 case MASTER:
2058 carp_master_down(sc);
2059 break;
2060 default:
2061 break;
2062 }
2063 }
2064 if (carpr.carpr_vhid > 0) {
2065 if (carpr.carpr_vhid > 255) {
2066 error = EINVAL;
2067 break;
2068 }
2069 if (sc->sc_carpdev) {
2070 struct carp_if *cif;
2071 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2072 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
2073 if (vr != sc &&
2074 vr->sc_vhid == carpr.carpr_vhid)
2075 return (EINVAL);
2076 }
2077 sc->sc_vhid = carpr.carpr_vhid;
2078 carp_set_enaddr(sc);
2079 carp_set_state(sc, INIT);
2080 error--;
2081 }
2082 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
2083 if (carpr.carpr_advskew > 254) {
2084 error = EINVAL;
2085 break;
2086 }
2087 if (carpr.carpr_advbase > 255) {
2088 error = EINVAL;
2089 break;
2090 }
2091 sc->sc_advbase = carpr.carpr_advbase;
2092 sc->sc_advskew = carpr.carpr_advskew;
2093 error--;
2094 }
2095 memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key));
2096 if (error > 0)
2097 error = EINVAL;
2098 else {
2099 error = 0;
2100 carp_setrun(sc, 0);
2101 }
2102 break;
2103
2104 case SIOCGVH:
2105 memset(&carpr, 0, sizeof(carpr));
2106 if (sc->sc_carpdev != NULL)
2107 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2108 IFNAMSIZ);
2109 carpr.carpr_state = sc->sc_state;
2110 carpr.carpr_vhid = sc->sc_vhid;
2111 carpr.carpr_advbase = sc->sc_advbase;
2112 carpr.carpr_advskew = sc->sc_advskew;
2113
2114 if ((l != NULL) && (error = kauth_authorize_network(l->l_cred,
2115 KAUTH_NETWORK_INTERFACE,
2116 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2117 NULL)) == 0)
2118 memcpy(carpr.carpr_key, sc->sc_key,
2119 sizeof(carpr.carpr_key));
2120 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2121 break;
2122
2123 case SIOCADDMULTI:
2124 error = carp_ether_addmulti(sc, ifr);
2125 break;
2126
2127 case SIOCDELMULTI:
2128 error = carp_ether_delmulti(sc, ifr);
2129 break;
2130
2131 case SIOCSIFCAP:
2132 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
2133 error = 0;
2134 break;
2135
2136 default:
2137 error = ether_ioctl(ifp, cmd, data);
2138 }
2139
2140 carp_hmac_prepare(sc);
2141 return (error);
2142 }
2143
2144
2145 /*
2146 * Start output on carp interface. This function should never be called.
2147 */
2148 static void
2149 carp_start(struct ifnet *ifp)
2150 {
2151 #ifdef DEBUG
2152 printf("%s: start called\n", ifp->if_xname);
2153 #endif
2154 }
2155
2156 int
2157 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
2158 const struct rtentry *rt)
2159 {
2160 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2161 KASSERT(KERNEL_LOCKED_P());
2162
2163 if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) {
2164 return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt);
2165 } else {
2166 m_freem(m);
2167 return (ENETUNREACH);
2168 }
2169 }
2170
2171 static void
2172 carp_set_state(struct carp_softc *sc, int state)
2173 {
2174 static const char *carp_states[] = { CARP_STATES };
2175 int link_state;
2176
2177 if (sc->sc_state == state)
2178 return;
2179
2180 CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state]));
2181
2182 sc->sc_state = state;
2183 switch (state) {
2184 case BACKUP:
2185 link_state = LINK_STATE_DOWN;
2186 break;
2187 case MASTER:
2188 link_state = LINK_STATE_UP;
2189 break;
2190 default:
2191 link_state = LINK_STATE_UNKNOWN;
2192 break;
2193 }
2194 if_link_state_change(&sc->sc_if, link_state);
2195 }
2196
2197 void
2198 carp_carpdev_state(void *v)
2199 {
2200 struct carp_if *cif;
2201 struct carp_softc *sc;
2202 struct ifnet *ifp = v;
2203
2204 if (ifp->if_type == IFT_CARP)
2205 return;
2206
2207 cif = (struct carp_if *)ifp->if_carp;
2208
2209 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2210 int suppressed = sc->sc_suppress;
2211
2212 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2213 !(sc->sc_carpdev->if_flags & IFF_UP)) {
2214 sc->sc_if.if_flags &= ~IFF_RUNNING;
2215 callout_stop(&sc->sc_ad_tmo);
2216 callout_stop(&sc->sc_md_tmo);
2217 callout_stop(&sc->sc_md6_tmo);
2218 carp_set_state(sc, INIT);
2219 sc->sc_suppress = 1;
2220 carp_setrun(sc, 0);
2221 if (!suppressed) {
2222 carp_suppress_preempt++;
2223 if (carp_suppress_preempt == 1)
2224 carp_send_ad_all();
2225 }
2226 } else {
2227 carp_set_state(sc, INIT);
2228 sc->sc_suppress = 0;
2229 carp_setrun(sc, 0);
2230 if (suppressed)
2231 carp_suppress_preempt--;
2232 }
2233 }
2234 }
2235
2236 static int
2237 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2238 {
2239 const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr);
2240 struct ifnet *ifp;
2241 struct carp_mc_entry *mc;
2242 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2243 int error;
2244
2245 ifp = sc->sc_carpdev;
2246 if (ifp == NULL)
2247 return (EINVAL);
2248
2249 error = ether_addmulti(sa, &sc->sc_ac);
2250 if (error != ENETRESET)
2251 return (error);
2252
2253 /*
2254 * This is new multicast address. We have to tell parent
2255 * about it. Also, remember this multicast address so that
2256 * we can delete them on unconfigure.
2257 */
2258 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2259 if (mc == NULL) {
2260 error = ENOMEM;
2261 goto alloc_failed;
2262 }
2263
2264 /*
2265 * As ether_addmulti() returns ENETRESET, following two
2266 * statement shouldn't fail.
2267 */
2268 (void)ether_multiaddr(sa, addrlo, addrhi);
2269 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2270 memcpy(&mc->mc_addr, sa, sa->sa_len);
2271 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2272
2273 error = if_mcast_op(ifp, SIOCADDMULTI, sa);
2274 if (error != 0)
2275 goto ioctl_failed;
2276
2277 return (error);
2278
2279 ioctl_failed:
2280 LIST_REMOVE(mc, mc_entries);
2281 free(mc, M_DEVBUF);
2282 alloc_failed:
2283 (void)ether_delmulti(sa, &sc->sc_ac);
2284
2285 return (error);
2286 }
2287
2288 static int
2289 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2290 {
2291 const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr);
2292 struct ifnet *ifp;
2293 struct ether_multi *enm;
2294 struct carp_mc_entry *mc;
2295 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2296 int error;
2297
2298 ifp = sc->sc_carpdev;
2299 if (ifp == NULL)
2300 return (EINVAL);
2301
2302 /*
2303 * Find a key to lookup carp_mc_entry. We have to do this
2304 * before calling ether_delmulti for obvious reason.
2305 */
2306 if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0)
2307 return (error);
2308 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2309 if (enm == NULL)
2310 return (EINVAL);
2311
2312 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2313 if (mc->mc_enm == enm)
2314 break;
2315
2316 /* We won't delete entries we didn't add */
2317 if (mc == NULL)
2318 return (EINVAL);
2319
2320 error = ether_delmulti(sa, &sc->sc_ac);
2321 if (error != ENETRESET)
2322 return (error);
2323
2324 /* We no longer use this multicast address. Tell parent so. */
2325 error = if_mcast_op(ifp, SIOCDELMULTI, sa);
2326 if (error == 0) {
2327 /* And forget about this address. */
2328 LIST_REMOVE(mc, mc_entries);
2329 free(mc, M_DEVBUF);
2330 } else
2331 (void)ether_addmulti(sa, &sc->sc_ac);
2332 return (error);
2333 }
2334
2335 /*
2336 * Delete any multicast address we have asked to add from parent
2337 * interface. Called when the carp is being unconfigured.
2338 */
2339 static void
2340 carp_ether_purgemulti(struct carp_softc *sc)
2341 {
2342 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */
2343 struct carp_mc_entry *mc;
2344
2345 if (ifp == NULL)
2346 return;
2347
2348 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2349 (void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr));
2350 LIST_REMOVE(mc, mc_entries);
2351 free(mc, M_DEVBUF);
2352 }
2353 }
2354
2355 static int
2356 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS)
2357 {
2358
2359 return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS));
2360 }
2361
2362 void
2363 carp_init(void)
2364 {
2365
2366 sysctl_net_inet_carp_setup(NULL);
2367 #ifdef MBUFTRACE
2368 MOWNER_ATTACH(&carp_proto_mowner_rx);
2369 MOWNER_ATTACH(&carp_proto_mowner_tx);
2370 MOWNER_ATTACH(&carp_proto6_mowner_rx);
2371 MOWNER_ATTACH(&carp_proto6_mowner_tx);
2372 #endif
2373
2374 carp_wqinput = wqinput_create("carp", _carp_proto_input);
2375 #ifdef INET6
2376 carp6_wqinput = wqinput_create("carp6", _carp6_proto_input);
2377 #endif
2378 }
2379
2380 static void
2381 sysctl_net_inet_carp_setup(struct sysctllog **clog)
2382 {
2383
2384 sysctl_createv(clog, 0, NULL, NULL,
2385 CTLFLAG_PERMANENT,
2386 CTLTYPE_NODE, "inet", NULL,
2387 NULL, 0, NULL, 0,
2388 CTL_NET, PF_INET, CTL_EOL);
2389 sysctl_createv(clog, 0, NULL, NULL,
2390 CTLFLAG_PERMANENT,
2391 CTLTYPE_NODE, "carp",
2392 SYSCTL_DESCR("CARP related settings"),
2393 NULL, 0, NULL, 0,
2394 CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL);
2395
2396 sysctl_createv(clog, 0, NULL, NULL,
2397 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2398 CTLTYPE_INT, "preempt",
2399 SYSCTL_DESCR("Enable CARP Preempt"),
2400 NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0,
2401 CTL_NET, PF_INET, IPPROTO_CARP,
2402 CTL_CREATE, CTL_EOL);
2403 sysctl_createv(clog, 0, NULL, NULL,
2404 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2405 CTLTYPE_INT, "arpbalance",
2406 SYSCTL_DESCR("Enable ARP balancing"),
2407 NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0,
2408 CTL_NET, PF_INET, IPPROTO_CARP,
2409 CTL_CREATE, CTL_EOL);
2410 sysctl_createv(clog, 0, NULL, NULL,
2411 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2412 CTLTYPE_INT, "allow",
2413 SYSCTL_DESCR("Enable CARP"),
2414 NULL, 0, &carp_opts[CARPCTL_ALLOW], 0,
2415 CTL_NET, PF_INET, IPPROTO_CARP,
2416 CTL_CREATE, CTL_EOL);
2417 sysctl_createv(clog, 0, NULL, NULL,
2418 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2419 CTLTYPE_INT, "log",
2420 SYSCTL_DESCR("CARP logging"),
2421 NULL, 0, &carp_opts[CARPCTL_LOG], 0,
2422 CTL_NET, PF_INET, IPPROTO_CARP,
2423 CTL_CREATE, CTL_EOL);
2424 sysctl_createv(clog, 0, NULL, NULL,
2425 CTLFLAG_PERMANENT,
2426 CTLTYPE_STRUCT, "stats",
2427 SYSCTL_DESCR("CARP statistics"),
2428 sysctl_net_inet_carp_stats, 0, NULL, 0,
2429 CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS,
2430 CTL_EOL);
2431 }
2432