ip_carp.c revision 1.86 1 /* $NetBSD: ip_carp.c,v 1.86 2017/03/14 09:03:08 ozaki-r Exp $ */
2 /* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */
3
4 /*
5 * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
6 * Copyright (c) 2003 Ryan McBride. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #ifdef _KERNEL_OPT
31 #include "opt_inet.h"
32 #include "opt_mbuftrace.h"
33 #endif
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.86 2017/03/14 09:03:08 ozaki-r Exp $");
37
38 /*
39 * TODO:
40 * - iface reconfigure
41 * - support for hardware checksum calculations;
42 *
43 */
44
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/mbuf.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/callout.h>
51 #include <sys/ioctl.h>
52 #include <sys/errno.h>
53 #include <sys/device.h>
54 #include <sys/time.h>
55 #include <sys/kernel.h>
56 #include <sys/kauth.h>
57 #include <sys/sysctl.h>
58 #include <sys/ucred.h>
59 #include <sys/syslog.h>
60 #include <sys/acct.h>
61 #include <sys/cprng.h>
62
63 #include <sys/cpu.h>
64
65 #include <net/if.h>
66 #include <net/pfil.h>
67 #include <net/if_types.h>
68 #include <net/if_ether.h>
69 #include <net/route.h>
70 #include <net/netisr.h>
71 #include <net/net_stats.h>
72 #include <netinet/if_inarp.h>
73 #include <netinet/wqinput.h>
74
75 #if NFDDI > 0
76 #include <net/if_fddi.h>
77 #endif
78 #if NTOKEN > 0
79 #include <net/if_token.h>
80 #endif
81
82 #ifdef INET
83 #include <netinet/in.h>
84 #include <netinet/in_systm.h>
85 #include <netinet/in_var.h>
86 #include <netinet/ip.h>
87 #include <netinet/ip_var.h>
88
89 #include <net/if_dl.h>
90 #endif
91
92 #ifdef INET6
93 #include <netinet/icmp6.h>
94 #include <netinet/ip6.h>
95 #include <netinet6/ip6_var.h>
96 #include <netinet6/nd6.h>
97 #include <netinet6/scope6_var.h>
98 #include <netinet6/in6_var.h>
99 #endif
100
101 #include <net/bpf.h>
102
103 #include <sys/sha1.h>
104
105 #include <netinet/ip_carp.h>
106
107 #include "ioconf.h"
108
109 struct carp_mc_entry {
110 LIST_ENTRY(carp_mc_entry) mc_entries;
111 union {
112 struct ether_multi *mcu_enm;
113 } mc_u;
114 struct sockaddr_storage mc_addr;
115 };
116 #define mc_enm mc_u.mcu_enm
117
118 struct carp_softc {
119 struct ethercom sc_ac;
120 #define sc_if sc_ac.ec_if
121 #define sc_carpdev sc_ac.ec_if.if_carpdev
122 int ah_cookie;
123 int lh_cookie;
124 struct ip_moptions sc_imo;
125 #ifdef INET6
126 struct ip6_moptions sc_im6o;
127 #endif /* INET6 */
128 TAILQ_ENTRY(carp_softc) sc_list;
129
130 enum { INIT = 0, BACKUP, MASTER } sc_state;
131
132 int sc_suppress;
133 int sc_bow_out;
134
135 int sc_sendad_errors;
136 #define CARP_SENDAD_MAX_ERRORS 3
137 int sc_sendad_success;
138 #define CARP_SENDAD_MIN_SUCCESS 3
139
140 int sc_vhid;
141 int sc_advskew;
142 int sc_naddrs;
143 int sc_naddrs6;
144 int sc_advbase; /* seconds */
145 int sc_init_counter;
146 u_int64_t sc_counter;
147
148 /* authentication */
149 #define CARP_HMAC_PAD 64
150 unsigned char sc_key[CARP_KEY_LEN];
151 unsigned char sc_pad[CARP_HMAC_PAD];
152 SHA1_CTX sc_sha1;
153 u_int32_t sc_hashkey[2];
154
155 struct callout sc_ad_tmo; /* advertisement timeout */
156 struct callout sc_md_tmo; /* master down timeout */
157 struct callout sc_md6_tmo; /* master down timeout */
158
159 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead;
160 };
161
162 int carp_suppress_preempt = 0;
163 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 }; /* XXX for now */
164
165 static percpu_t *carpstat_percpu;
166
167 #define CARP_STATINC(x) _NET_STATINC(carpstat_percpu, x)
168
169 #ifdef MBUFTRACE
170 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx");
171 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx");
172 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx");
173 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx");
174 #endif
175
176 struct carp_if {
177 TAILQ_HEAD(, carp_softc) vhif_vrs;
178 int vhif_nvrs;
179
180 struct ifnet *vhif_ifp;
181 };
182
183 #define CARP_LOG(sc, s) \
184 if (carp_opts[CARPCTL_LOG]) { \
185 if (sc) \
186 log(LOG_INFO, "%s: ", \
187 (sc)->sc_if.if_xname); \
188 else \
189 log(LOG_INFO, "carp: "); \
190 addlog s; \
191 addlog("\n"); \
192 }
193
194 static void carp_hmac_prepare(struct carp_softc *);
195 static void carp_hmac_generate(struct carp_softc *, u_int32_t *,
196 unsigned char *);
197 static int carp_hmac_verify(struct carp_softc *, u_int32_t *,
198 unsigned char *);
199 static void carp_setroute(struct carp_softc *, int);
200 static void carp_proto_input_c(struct mbuf *, struct carp_header *,
201 sa_family_t);
202 static void carpdetach(struct carp_softc *);
203 static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
204 struct carp_header *);
205 static void carp_send_ad_all(void);
206 static void carp_send_ad(void *);
207 static void carp_send_arp(struct carp_softc *);
208 static void carp_master_down(void *);
209 static int carp_ioctl(struct ifnet *, u_long, void *);
210 static void carp_start(struct ifnet *);
211 static void carp_setrun(struct carp_softc *, sa_family_t);
212 static void carp_set_state(struct carp_softc *, int);
213 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
214 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
215
216 static void carp_multicast_cleanup(struct carp_softc *);
217 static int carp_set_ifp(struct carp_softc *, struct ifnet *);
218 static void carp_set_enaddr(struct carp_softc *);
219 #if 0
220 static void carp_addr_updated(void *);
221 #endif
222 static u_int32_t carp_hash(struct carp_softc *, u_char *);
223 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
224 static int carp_join_multicast(struct carp_softc *);
225 #ifdef INET6
226 static void carp_send_na(struct carp_softc *);
227 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
228 static int carp_join_multicast6(struct carp_softc *);
229 #endif
230 static int carp_clone_create(struct if_clone *, int);
231 static int carp_clone_destroy(struct ifnet *);
232 static int carp_ether_addmulti(struct carp_softc *, struct ifreq *);
233 static int carp_ether_delmulti(struct carp_softc *, struct ifreq *);
234 static void carp_ether_purgemulti(struct carp_softc *);
235
236 static void sysctl_net_inet_carp_setup(struct sysctllog **);
237
238 /* workqueue-based pr_input */
239 static struct wqinput *carp_wqinput;
240 static void _carp_proto_input(struct mbuf *, int, int);
241 #ifdef INET6
242 static struct wqinput *carp6_wqinput;
243 static void _carp6_proto_input(struct mbuf *, int, int);
244 #endif
245
246 struct if_clone carp_cloner =
247 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
248
249 static __inline u_int16_t
250 carp_cksum(struct mbuf *m, int len)
251 {
252 return (in_cksum(m, len));
253 }
254
255 static __inline u_int16_t
256 carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len)
257 {
258 return (in6_cksum(m, IPPROTO_CARP, off, len));
259 }
260
261 static void
262 carp_hmac_prepare(struct carp_softc *sc)
263 {
264 u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT;
265 u_int8_t vhid = sc->sc_vhid & 0xff;
266 SHA1_CTX sha1ctx;
267 u_int32_t kmd[5];
268 struct ifaddr *ifa;
269 int i, found;
270 struct in_addr last, cur, in;
271 #ifdef INET6
272 struct in6_addr last6, cur6, in6;
273 #endif /* INET6 */
274
275 /* compute ipad from key */
276 memset(sc->sc_pad, 0, sizeof(sc->sc_pad));
277 memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key));
278 for (i = 0; i < sizeof(sc->sc_pad); i++)
279 sc->sc_pad[i] ^= 0x36;
280
281 /* precompute first part of inner hash */
282 SHA1Init(&sc->sc_sha1);
283 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
284 SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version));
285 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
286
287 /* generate a key for the arpbalance hash, before the vhid is hashed */
288 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
289 SHA1Final((unsigned char *)kmd, &sha1ctx);
290 sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
291 sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
292
293 /* the rest of the precomputation */
294 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
295
296 /* Hash the addresses from smallest to largest, not interface order */
297 #ifdef INET
298 cur.s_addr = 0;
299 do {
300 found = 0;
301 last = cur;
302 cur.s_addr = 0xffffffff;
303 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
304 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
305 if (ifa->ifa_addr->sa_family == AF_INET &&
306 ntohl(in.s_addr) > ntohl(last.s_addr) &&
307 ntohl(in.s_addr) < ntohl(cur.s_addr)) {
308 cur.s_addr = in.s_addr;
309 found++;
310 }
311 }
312 if (found)
313 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
314 } while (found);
315 #endif /* INET */
316
317 #ifdef INET6
318 memset(&cur6, 0x00, sizeof(cur6));
319 do {
320 found = 0;
321 last6 = cur6;
322 memset(&cur6, 0xff, sizeof(cur6));
323 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
324 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
325 if (IN6_IS_ADDR_LINKLOCAL(&in6))
326 in6.s6_addr16[1] = 0;
327 if (ifa->ifa_addr->sa_family == AF_INET6 &&
328 memcmp(&in6, &last6, sizeof(in6)) > 0 &&
329 memcmp(&in6, &cur6, sizeof(in6)) < 0) {
330 cur6 = in6;
331 found++;
332 }
333 }
334 if (found)
335 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
336 } while (found);
337 #endif /* INET6 */
338
339 /* convert ipad to opad */
340 for (i = 0; i < sizeof(sc->sc_pad); i++)
341 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
342 }
343
344 static void
345 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
346 unsigned char md[20])
347 {
348 SHA1_CTX sha1ctx;
349
350 /* fetch first half of inner hash */
351 memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
352
353 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
354 SHA1Final(md, &sha1ctx);
355
356 /* outer hash */
357 SHA1Init(&sha1ctx);
358 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
359 SHA1Update(&sha1ctx, md, 20);
360 SHA1Final(md, &sha1ctx);
361 }
362
363 static int
364 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
365 unsigned char md[20])
366 {
367 unsigned char md2[20];
368
369 carp_hmac_generate(sc, counter, md2);
370
371 return (memcmp(md, md2, sizeof(md2)));
372 }
373
374 static void
375 carp_setroute(struct carp_softc *sc, int cmd)
376 {
377 struct ifaddr *ifa;
378 int s;
379
380 KERNEL_LOCK(1, NULL);
381 s = splsoftnet();
382 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
383 switch (ifa->ifa_addr->sa_family) {
384 case AF_INET: {
385 int count = 0;
386 struct rtentry *rt;
387 int hr_otherif, nr_ourif;
388
389 /*
390 * Avoid screwing with the routes if there are other
391 * carp interfaces which are master and have the same
392 * address.
393 */
394 if (sc->sc_carpdev != NULL &&
395 sc->sc_carpdev->if_carp != NULL) {
396 count = carp_addrcount(
397 (struct carp_if *)sc->sc_carpdev->if_carp,
398 ifatoia(ifa), CARP_COUNT_MASTER);
399 if ((cmd == RTM_ADD && count != 1) ||
400 (cmd == RTM_DELETE && count != 0))
401 continue;
402 }
403
404 /* Remove the existing host route, if any */
405 rtrequest(RTM_DELETE, ifa->ifa_addr,
406 ifa->ifa_addr, ifa->ifa_netmask,
407 RTF_HOST, NULL);
408
409 rt = NULL;
410 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
411 ifa->ifa_netmask, RTF_HOST, &rt);
412 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
413 (rt->rt_flags & RTF_CONNECTED));
414 if (rt != NULL) {
415 rt_unref(rt);
416 rt = NULL;
417 }
418
419 /* Check for a network route on our interface */
420
421 rt = NULL;
422 (void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
423 ifa->ifa_netmask, 0, &rt);
424 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
425
426 switch (cmd) {
427 case RTM_ADD:
428 if (hr_otherif) {
429 ifa->ifa_rtrequest = NULL;
430 ifa->ifa_flags &= ~RTF_CONNECTED;
431
432 rtrequest(RTM_ADD, ifa->ifa_addr,
433 ifa->ifa_addr, ifa->ifa_netmask,
434 RTF_UP | RTF_HOST, NULL);
435 }
436 if (!hr_otherif || nr_ourif || !rt) {
437 if (nr_ourif &&
438 (rt->rt_flags & RTF_CONNECTED) == 0)
439 rtrequest(RTM_DELETE,
440 ifa->ifa_addr,
441 ifa->ifa_addr,
442 ifa->ifa_netmask, 0, NULL);
443
444 ifa->ifa_rtrequest = arp_rtrequest;
445 ifa->ifa_flags |= RTF_CONNECTED;
446
447 if (rtrequest(RTM_ADD, ifa->ifa_addr,
448 ifa->ifa_addr, ifa->ifa_netmask, 0,
449 NULL) == 0)
450 ifa->ifa_flags |= IFA_ROUTE;
451 }
452 break;
453 case RTM_DELETE:
454 break;
455 default:
456 break;
457 }
458 if (rt != NULL) {
459 rt_unref(rt);
460 rt = NULL;
461 }
462 break;
463 }
464
465 #ifdef INET6
466 case AF_INET6:
467 if (cmd == RTM_ADD)
468 in6_ifaddlocal(ifa);
469 else
470 in6_ifremlocal(ifa);
471 break;
472 #endif /* INET6 */
473 default:
474 break;
475 }
476 }
477 splx(s);
478 KERNEL_UNLOCK_ONE(NULL);
479 }
480
481 /*
482 * process input packet.
483 * we have rearranged checks order compared to the rfc,
484 * but it seems more efficient this way or not possible otherwise.
485 */
486 static void
487 _carp_proto_input(struct mbuf *m, int hlen, int proto)
488 {
489 struct ip *ip = mtod(m, struct ip *);
490 struct carp_softc *sc = NULL;
491 struct carp_header *ch;
492 int iplen, len;
493 struct ifnet *rcvif;
494
495 CARP_STATINC(CARP_STAT_IPACKETS);
496 MCLAIM(m, &carp_proto_mowner_rx);
497
498 if (!carp_opts[CARPCTL_ALLOW]) {
499 m_freem(m);
500 return;
501 }
502
503 rcvif = m_get_rcvif_NOMPSAFE(m);
504 /* check if received on a valid carp interface */
505 if (rcvif->if_type != IFT_CARP) {
506 CARP_STATINC(CARP_STAT_BADIF);
507 CARP_LOG(sc, ("packet received on non-carp interface: %s",
508 rcvif->if_xname));
509 m_freem(m);
510 return;
511 }
512
513 /* verify that the IP TTL is 255. */
514 if (ip->ip_ttl != CARP_DFLTTL) {
515 CARP_STATINC(CARP_STAT_BADTTL);
516 CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl,
517 CARP_DFLTTL, rcvif->if_xname));
518 m_freem(m);
519 return;
520 }
521
522 /*
523 * verify that the received packet length is
524 * equal to the CARP header
525 */
526 iplen = ip->ip_hl << 2;
527 len = iplen + sizeof(*ch);
528 if (len > m->m_pkthdr.len) {
529 CARP_STATINC(CARP_STAT_BADLEN);
530 CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
531 rcvif->if_xname));
532 m_freem(m);
533 return;
534 }
535
536 if ((m = m_pullup(m, len)) == NULL) {
537 CARP_STATINC(CARP_STAT_HDROPS);
538 return;
539 }
540 ip = mtod(m, struct ip *);
541 ch = (struct carp_header *)((char *)ip + iplen);
542 /* verify the CARP checksum */
543 m->m_data += iplen;
544 if (carp_cksum(m, len - iplen)) {
545 CARP_STATINC(CARP_STAT_BADSUM);
546 CARP_LOG(sc, ("checksum failed on %s",
547 rcvif->if_xname));
548 m_freem(m);
549 return;
550 }
551 m->m_data -= iplen;
552
553 carp_proto_input_c(m, ch, AF_INET);
554 }
555
556 void
557 carp_proto_input(struct mbuf *m, ...)
558 {
559
560 wqinput_input(carp_wqinput, m, 0, 0);
561 }
562
563 #ifdef INET6
564 static void
565 _carp6_proto_input(struct mbuf *m, int off, int proto)
566 {
567 struct carp_softc *sc = NULL;
568 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
569 struct carp_header *ch;
570 u_int len;
571 struct ifnet *rcvif;
572
573 CARP_STATINC(CARP_STAT_IPACKETS6);
574 MCLAIM(m, &carp_proto6_mowner_rx);
575
576 if (!carp_opts[CARPCTL_ALLOW]) {
577 m_freem(m);
578 return;
579 }
580
581 rcvif = m_get_rcvif_NOMPSAFE(m);
582
583 /* check if received on a valid carp interface */
584 if (rcvif->if_type != IFT_CARP) {
585 CARP_STATINC(CARP_STAT_BADIF);
586 CARP_LOG(sc, ("packet received on non-carp interface: %s",
587 rcvif->if_xname));
588 m_freem(m);
589 return;
590 }
591
592 /* verify that the IP TTL is 255 */
593 if (ip6->ip6_hlim != CARP_DFLTTL) {
594 CARP_STATINC(CARP_STAT_BADTTL);
595 CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
596 CARP_DFLTTL, rcvif->if_xname));
597 m_freem(m);
598 return;
599 }
600
601 /* verify that we have a complete carp packet */
602 len = m->m_len;
603 IP6_EXTHDR_GET(ch, struct carp_header *, m, off, sizeof(*ch));
604 if (ch == NULL) {
605 CARP_STATINC(CARP_STAT_BADLEN);
606 CARP_LOG(sc, ("packet size %u too small", len));
607 return;
608 }
609
610 /* verify the CARP checksum */
611 if (carp6_cksum(m, off, sizeof(*ch))) {
612 CARP_STATINC(CARP_STAT_BADSUM);
613 CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname));
614 m_freem(m);
615 return;
616 }
617
618 carp_proto_input_c(m, ch, AF_INET6);
619 return;
620 }
621
622 int
623 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
624 {
625
626 wqinput_input(carp6_wqinput, *mp, *offp, proto);
627
628 return IPPROTO_DONE;
629 }
630 #endif /* INET6 */
631
632 static void
633 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
634 {
635 struct carp_softc *sc;
636 u_int64_t tmp_counter;
637 struct timeval sc_tv, ch_tv;
638
639 TAILQ_FOREACH(sc, &((struct carp_if *)
640 m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list)
641 if (sc->sc_vhid == ch->carp_vhid)
642 break;
643
644 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
645 (IFF_UP|IFF_RUNNING)) {
646 CARP_STATINC(CARP_STAT_BADVHID);
647 m_freem(m);
648 return;
649 }
650
651 /*
652 * Check if our own advertisement was duplicated
653 * from a non simplex interface.
654 * XXX If there is no address on our physical interface
655 * there is no way to distinguish our ads from the ones
656 * another carp host might have sent us.
657 */
658 if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) {
659 struct sockaddr sa;
660 struct ifaddr *ifa;
661 int s;
662
663 memset(&sa, 0, sizeof(sa));
664 sa.sa_family = af;
665 s = pserialize_read_enter();
666 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
667
668 if (ifa && af == AF_INET) {
669 struct ip *ip = mtod(m, struct ip *);
670 if (ip->ip_src.s_addr ==
671 ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
672 pserialize_read_exit(s);
673 m_freem(m);
674 return;
675 }
676 }
677 #ifdef INET6
678 if (ifa && af == AF_INET6) {
679 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
680 struct in6_addr in6_src, in6_found;
681
682 in6_src = ip6->ip6_src;
683 in6_found = ifatoia6(ifa)->ia_addr.sin6_addr;
684 if (IN6_IS_ADDR_LINKLOCAL(&in6_src))
685 in6_src.s6_addr16[1] = 0;
686 if (IN6_IS_ADDR_LINKLOCAL(&in6_found))
687 in6_found.s6_addr16[1] = 0;
688 if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) {
689 pserialize_read_exit(s);
690 m_freem(m);
691 return;
692 }
693 }
694 #endif /* INET6 */
695 pserialize_read_exit(s);
696 }
697
698 nanotime(&sc->sc_if.if_lastchange);
699 sc->sc_if.if_ipackets++;
700 sc->sc_if.if_ibytes += m->m_pkthdr.len;
701
702 /* verify the CARP version. */
703 if (ch->carp_version != CARP_VERSION) {
704 CARP_STATINC(CARP_STAT_BADVER);
705 sc->sc_if.if_ierrors++;
706 CARP_LOG(sc, ("invalid version %d != %d",
707 ch->carp_version, CARP_VERSION));
708 m_freem(m);
709 return;
710 }
711
712 /* verify the hash */
713 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
714 struct ip *ip;
715 struct ip6_hdr *ip6;
716 char ip6buf[INET6_ADDRSTRLEN];
717 char ipbuf[INET_ADDRSTRLEN];
718
719 CARP_STATINC(CARP_STAT_BADAUTH);
720 sc->sc_if.if_ierrors++;
721
722 switch(af) {
723 case AF_INET:
724 ip = mtod(m, struct ip *);
725 CARP_LOG(sc, ("incorrect hash from %s",
726 in_fmtaddr(ipbuf, ip->ip_src)));
727 break;
728
729 case AF_INET6:
730 ip6 = mtod(m, struct ip6_hdr *);
731 CARP_LOG(sc, ("incorrect hash from %s",
732 IN6_PRINT(ip6buf, &ip6->ip6_src)));
733 break;
734
735 default: CARP_LOG(sc, ("incorrect hash"));
736 break;
737 }
738 m_freem(m);
739 return;
740 }
741
742 tmp_counter = ntohl(ch->carp_counter[0]);
743 tmp_counter = tmp_counter<<32;
744 tmp_counter += ntohl(ch->carp_counter[1]);
745
746 /* XXX Replay protection goes here */
747
748 sc->sc_init_counter = 0;
749 sc->sc_counter = tmp_counter;
750
751
752 sc_tv.tv_sec = sc->sc_advbase;
753 if (carp_suppress_preempt && sc->sc_advskew < 240)
754 sc_tv.tv_usec = 240 * 1000000 / 256;
755 else
756 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
757 ch_tv.tv_sec = ch->carp_advbase;
758 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
759
760 switch (sc->sc_state) {
761 case INIT:
762 break;
763 case MASTER:
764 /*
765 * If we receive an advertisement from a backup who's going to
766 * be more frequent than us, go into BACKUP state.
767 */
768 if (timercmp(&sc_tv, &ch_tv, >) ||
769 timercmp(&sc_tv, &ch_tv, ==)) {
770 callout_stop(&sc->sc_ad_tmo);
771 CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)"));
772 carp_set_state(sc, BACKUP);
773 carp_setrun(sc, 0);
774 carp_setroute(sc, RTM_DELETE);
775 }
776 break;
777 case BACKUP:
778 /*
779 * If we're pre-empting masters who advertise slower than us,
780 * and this one claims to be slower, treat him as down.
781 */
782 if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
783 CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)"));
784 carp_master_down(sc);
785 break;
786 }
787
788 /*
789 * If the master is going to advertise at such a low frequency
790 * that he's guaranteed to time out, we'd might as well just
791 * treat him as timed out now.
792 */
793 sc_tv.tv_sec = sc->sc_advbase * 3;
794 if (timercmp(&sc_tv, &ch_tv, <)) {
795 CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)"));
796 carp_master_down(sc);
797 break;
798 }
799
800 /*
801 * Otherwise, we reset the counter and wait for the next
802 * advertisement.
803 */
804 carp_setrun(sc, af);
805 break;
806 }
807
808 m_freem(m);
809 return;
810 }
811
812 /*
813 * Interface side of the CARP implementation.
814 */
815
816 /* ARGSUSED */
817 void
818 carpattach(int n)
819 {
820 if_clone_attach(&carp_cloner);
821
822 carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS);
823 }
824
825 static int
826 carp_clone_create(struct if_clone *ifc, int unit)
827 {
828 extern int ifqmaxlen;
829 struct carp_softc *sc;
830 struct ifnet *ifp;
831
832 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
833 if (!sc)
834 return (ENOMEM);
835
836 sc->sc_suppress = 0;
837 sc->sc_advbase = CARP_DFLTINTV;
838 sc->sc_vhid = -1; /* required setting */
839 sc->sc_advskew = 0;
840 sc->sc_init_counter = 1;
841 sc->sc_naddrs = sc->sc_naddrs6 = 0;
842 #ifdef INET6
843 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
844 #endif /* INET6 */
845
846 callout_init(&sc->sc_ad_tmo, 0);
847 callout_init(&sc->sc_md_tmo, 0);
848 callout_init(&sc->sc_md6_tmo, 0);
849
850 callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc);
851 callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc);
852 callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc);
853
854 LIST_INIT(&sc->carp_mc_listhead);
855 ifp = &sc->sc_if;
856 ifp->if_softc = sc;
857 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
858 unit);
859 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
860 ifp->if_ioctl = carp_ioctl;
861 ifp->if_start = carp_start;
862 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
863 IFQ_SET_READY(&ifp->if_snd);
864 if_initialize(ifp);
865 ether_ifattach(ifp, NULL);
866 carp_set_enaddr(sc);
867 /* Overwrite ethernet defaults */
868 ifp->if_type = IFT_CARP;
869 ifp->if_output = carp_output;
870 ifp->if_extflags &= ~IFEF_OUTPUT_MPSAFE;
871 if_register(ifp);
872
873 return (0);
874 }
875
876 static int
877 carp_clone_destroy(struct ifnet *ifp)
878 {
879 struct carp_softc *sc = ifp->if_softc;
880
881 carpdetach(ifp->if_softc);
882 ether_ifdetach(ifp);
883 if_detach(ifp);
884 callout_destroy(&sc->sc_ad_tmo);
885 callout_destroy(&sc->sc_md_tmo);
886 callout_destroy(&sc->sc_md6_tmo);
887 free(ifp->if_softc, M_DEVBUF);
888
889 return (0);
890 }
891
892 static void
893 carpdetach(struct carp_softc *sc)
894 {
895 struct carp_if *cif;
896 int s;
897
898 callout_stop(&sc->sc_ad_tmo);
899 callout_stop(&sc->sc_md_tmo);
900 callout_stop(&sc->sc_md6_tmo);
901
902 if (sc->sc_suppress)
903 carp_suppress_preempt--;
904 sc->sc_suppress = 0;
905
906 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
907 carp_suppress_preempt--;
908 sc->sc_sendad_errors = 0;
909
910 carp_set_state(sc, INIT);
911 sc->sc_if.if_flags &= ~IFF_UP;
912 carp_setrun(sc, 0);
913 carp_multicast_cleanup(sc);
914
915 KERNEL_LOCK(1, NULL);
916 s = splnet();
917 if (sc->sc_carpdev != NULL) {
918 /* XXX linkstatehook removal */
919 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
920 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
921 if (!--cif->vhif_nvrs) {
922 ifpromisc(sc->sc_carpdev, 0);
923 sc->sc_carpdev->if_carp = NULL;
924 free(cif, M_IFADDR);
925 }
926 }
927 sc->sc_carpdev = NULL;
928 splx(s);
929 KERNEL_UNLOCK_ONE(NULL);
930 }
931
932 /* Detach an interface from the carp. */
933 void
934 carp_ifdetach(struct ifnet *ifp)
935 {
936 struct carp_softc *sc, *nextsc;
937 struct carp_if *cif = (struct carp_if *)ifp->if_carp;
938
939 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
940 nextsc = TAILQ_NEXT(sc, sc_list);
941 carpdetach(sc);
942 }
943 }
944
945 static int
946 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc,
947 struct carp_header *ch)
948 {
949 if (sc->sc_init_counter) {
950 /* this could also be seconds since unix epoch */
951 sc->sc_counter = cprng_fast64();
952 } else
953 sc->sc_counter++;
954
955 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
956 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
957
958 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
959
960 return (0);
961 }
962
963 static void
964 carp_send_ad_all(void)
965 {
966 struct ifnet *ifp;
967 struct carp_if *cif;
968 struct carp_softc *vh;
969 int s;
970 int bound = curlwp_bind();
971
972 s = pserialize_read_enter();
973 IFNET_READER_FOREACH(ifp) {
974 struct psref psref;
975 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
976 continue;
977
978 if_acquire(ifp, &psref);
979 pserialize_read_exit(s);
980
981 cif = (struct carp_if *)ifp->if_carp;
982 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
983 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
984 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER)
985 carp_send_ad(vh);
986 }
987
988 s = pserialize_read_enter();
989 if_release(ifp, &psref);
990 }
991 pserialize_read_exit(s);
992 curlwp_bindx(bound);
993 }
994
995
996 static void
997 carp_send_ad(void *v)
998 {
999 struct carp_header ch;
1000 struct timeval tv;
1001 struct carp_softc *sc = v;
1002 struct carp_header *ch_ptr;
1003 struct mbuf *m;
1004 int error, len, advbase, advskew, s;
1005 struct sockaddr sa;
1006
1007 KERNEL_LOCK(1, NULL);
1008 s = splsoftnet();
1009
1010 advbase = advskew = 0; /* Sssssh compiler */
1011 if (sc->sc_carpdev == NULL) {
1012 sc->sc_if.if_oerrors++;
1013 goto retry_later;
1014 }
1015
1016 /* bow out if we've gone to backup (the carp interface is going down) */
1017 if (sc->sc_bow_out) {
1018 sc->sc_bow_out = 0;
1019 advbase = 255;
1020 advskew = 255;
1021 } else {
1022 advbase = sc->sc_advbase;
1023 if (!carp_suppress_preempt || sc->sc_advskew > 240)
1024 advskew = sc->sc_advskew;
1025 else
1026 advskew = 240;
1027 tv.tv_sec = advbase;
1028 tv.tv_usec = advskew * 1000000 / 256;
1029 }
1030
1031 ch.carp_version = CARP_VERSION;
1032 ch.carp_type = CARP_ADVERTISEMENT;
1033 ch.carp_vhid = sc->sc_vhid;
1034 ch.carp_advbase = advbase;
1035 ch.carp_advskew = advskew;
1036 ch.carp_authlen = 7; /* XXX DEFINE */
1037 ch.carp_pad1 = 0; /* must be zero */
1038 ch.carp_cksum = 0;
1039
1040
1041 #ifdef INET
1042 if (sc->sc_naddrs) {
1043 struct ip *ip;
1044 struct ifaddr *ifa;
1045 int _s;
1046
1047 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1048 if (m == NULL) {
1049 sc->sc_if.if_oerrors++;
1050 CARP_STATINC(CARP_STAT_ONOMEM);
1051 /* XXX maybe less ? */
1052 goto retry_later;
1053 }
1054 MCLAIM(m, &carp_proto_mowner_tx);
1055 len = sizeof(*ip) + sizeof(ch);
1056 m->m_pkthdr.len = len;
1057 m_reset_rcvif(m);
1058 m->m_len = len;
1059 MH_ALIGN(m, m->m_len);
1060 m->m_flags |= M_MCAST;
1061 ip = mtod(m, struct ip *);
1062 ip->ip_v = IPVERSION;
1063 ip->ip_hl = sizeof(*ip) >> 2;
1064 ip->ip_tos = IPTOS_LOWDELAY;
1065 ip->ip_len = htons(len);
1066 ip->ip_id = 0; /* no need for id, we don't support fragments */
1067 ip->ip_off = htons(IP_DF);
1068 ip->ip_ttl = CARP_DFLTTL;
1069 ip->ip_p = IPPROTO_CARP;
1070 ip->ip_sum = 0;
1071
1072 memset(&sa, 0, sizeof(sa));
1073 sa.sa_family = AF_INET;
1074 _s = pserialize_read_enter();
1075 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1076 if (ifa == NULL)
1077 ip->ip_src.s_addr = 0;
1078 else
1079 ip->ip_src.s_addr =
1080 ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1081 pserialize_read_exit(_s);
1082 ip->ip_dst.s_addr = INADDR_CARP_GROUP;
1083
1084 ch_ptr = (struct carp_header *)(&ip[1]);
1085 memcpy(ch_ptr, &ch, sizeof(ch));
1086 if (carp_prepare_ad(m, sc, ch_ptr))
1087 goto retry_later;
1088
1089 m->m_data += sizeof(*ip);
1090 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1091 m->m_data -= sizeof(*ip);
1092
1093 nanotime(&sc->sc_if.if_lastchange);
1094 sc->sc_if.if_opackets++;
1095 sc->sc_if.if_obytes += len;
1096 CARP_STATINC(CARP_STAT_OPACKETS);
1097
1098 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1099 NULL);
1100 if (error) {
1101 if (error == ENOBUFS)
1102 CARP_STATINC(CARP_STAT_ONOMEM);
1103 else
1104 CARP_LOG(sc, ("ip_output failed: %d", error));
1105 sc->sc_if.if_oerrors++;
1106 if (sc->sc_sendad_errors < INT_MAX)
1107 sc->sc_sendad_errors++;
1108 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1109 carp_suppress_preempt++;
1110 if (carp_suppress_preempt == 1)
1111 carp_send_ad_all();
1112 }
1113 sc->sc_sendad_success = 0;
1114 } else {
1115 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1116 if (++sc->sc_sendad_success >=
1117 CARP_SENDAD_MIN_SUCCESS) {
1118 carp_suppress_preempt--;
1119 sc->sc_sendad_errors = 0;
1120 }
1121 } else
1122 sc->sc_sendad_errors = 0;
1123 }
1124 }
1125 #endif /* INET */
1126 #ifdef INET6
1127 if (sc->sc_naddrs6) {
1128 struct ip6_hdr *ip6;
1129 struct ifaddr *ifa;
1130 int _s;
1131
1132 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1133 if (m == NULL) {
1134 sc->sc_if.if_oerrors++;
1135 CARP_STATINC(CARP_STAT_ONOMEM);
1136 /* XXX maybe less ? */
1137 goto retry_later;
1138 }
1139 MCLAIM(m, &carp_proto6_mowner_tx);
1140 len = sizeof(*ip6) + sizeof(ch);
1141 m->m_pkthdr.len = len;
1142 m_reset_rcvif(m);
1143 m->m_len = len;
1144 MH_ALIGN(m, m->m_len);
1145 m->m_flags |= M_MCAST;
1146 ip6 = mtod(m, struct ip6_hdr *);
1147 memset(ip6, 0, sizeof(*ip6));
1148 ip6->ip6_vfc |= IPV6_VERSION;
1149 ip6->ip6_hlim = CARP_DFLTTL;
1150 ip6->ip6_nxt = IPPROTO_CARP;
1151
1152 /* set the source address */
1153 memset(&sa, 0, sizeof(sa));
1154 sa.sa_family = AF_INET6;
1155 _s = pserialize_read_enter();
1156 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1157 if (ifa == NULL) /* This should never happen with IPv6 */
1158 memset(&ip6->ip6_src, 0, sizeof(struct in6_addr));
1159 else
1160 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1161 &ip6->ip6_src, sizeof(struct in6_addr));
1162 pserialize_read_exit(_s);
1163 /* set the multicast destination */
1164
1165 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1166 ip6->ip6_dst.s6_addr8[15] = 0x12;
1167 if (in6_setscope(&ip6->ip6_dst, &sc->sc_if, NULL) != 0) {
1168 sc->sc_if.if_oerrors++;
1169 m_freem(m);
1170 CARP_LOG(sc, ("in6_setscope failed"));
1171 goto retry_later;
1172 }
1173
1174 ch_ptr = (struct carp_header *)(&ip6[1]);
1175 memcpy(ch_ptr, &ch, sizeof(ch));
1176 if (carp_prepare_ad(m, sc, ch_ptr))
1177 goto retry_later;
1178
1179 ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6),
1180 len - sizeof(*ip6));
1181
1182 nanotime(&sc->sc_if.if_lastchange);
1183 sc->sc_if.if_opackets++;
1184 sc->sc_if.if_obytes += len;
1185 CARP_STATINC(CARP_STAT_OPACKETS6);
1186
1187 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1188 if (error) {
1189 if (error == ENOBUFS)
1190 CARP_STATINC(CARP_STAT_ONOMEM);
1191 else
1192 CARP_LOG(sc, ("ip6_output failed: %d", error));
1193 sc->sc_if.if_oerrors++;
1194 if (sc->sc_sendad_errors < INT_MAX)
1195 sc->sc_sendad_errors++;
1196 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1197 carp_suppress_preempt++;
1198 if (carp_suppress_preempt == 1)
1199 carp_send_ad_all();
1200 }
1201 sc->sc_sendad_success = 0;
1202 } else {
1203 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1204 if (++sc->sc_sendad_success >=
1205 CARP_SENDAD_MIN_SUCCESS) {
1206 carp_suppress_preempt--;
1207 sc->sc_sendad_errors = 0;
1208 }
1209 } else
1210 sc->sc_sendad_errors = 0;
1211 }
1212 }
1213 #endif /* INET6 */
1214
1215 retry_later:
1216 splx(s);
1217 KERNEL_UNLOCK_ONE(NULL);
1218 if (advbase != 255 || advskew != 255)
1219 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1220 }
1221
1222 /*
1223 * Broadcast a gratuitous ARP request containing
1224 * the virtual router MAC address for each IP address
1225 * associated with the virtual router.
1226 */
1227 static void
1228 carp_send_arp(struct carp_softc *sc)
1229 {
1230 struct ifaddr *ifa;
1231 int s;
1232
1233 KERNEL_LOCK(1, NULL);
1234 s = splsoftnet();
1235 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1236
1237 if (ifa->ifa_addr->sa_family != AF_INET)
1238 continue;
1239
1240 arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl));
1241 }
1242 splx(s);
1243 KERNEL_UNLOCK_ONE(NULL);
1244 }
1245
1246 #ifdef INET6
1247 static void
1248 carp_send_na(struct carp_softc *sc)
1249 {
1250 struct ifaddr *ifa;
1251 struct in6_addr *in6;
1252 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1253 int s;
1254
1255 KERNEL_LOCK(1, NULL);
1256 s = splsoftnet();
1257
1258 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1259
1260 if (ifa->ifa_addr->sa_family != AF_INET6)
1261 continue;
1262
1263 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1264 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1265 ND_NA_FLAG_OVERRIDE, 1, NULL);
1266 }
1267 splx(s);
1268 KERNEL_UNLOCK_ONE(NULL);
1269 }
1270 #endif /* INET6 */
1271
1272 /*
1273 * Based on bridge_hash() in if_bridge.c
1274 */
1275 #define mix(a,b,c) \
1276 do { \
1277 a -= b; a -= c; a ^= (c >> 13); \
1278 b -= c; b -= a; b ^= (a << 8); \
1279 c -= a; c -= b; c ^= (b >> 13); \
1280 a -= b; a -= c; a ^= (c >> 12); \
1281 b -= c; b -= a; b ^= (a << 16); \
1282 c -= a; c -= b; c ^= (b >> 5); \
1283 a -= b; a -= c; a ^= (c >> 3); \
1284 b -= c; b -= a; b ^= (a << 10); \
1285 c -= a; c -= b; c ^= (b >> 15); \
1286 } while (0)
1287
1288 static u_int32_t
1289 carp_hash(struct carp_softc *sc, u_char *src)
1290 {
1291 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1292
1293 c += sc->sc_key[3] << 24;
1294 c += sc->sc_key[2] << 16;
1295 c += sc->sc_key[1] << 8;
1296 c += sc->sc_key[0];
1297 b += src[5] << 8;
1298 b += src[4];
1299 a += src[3] << 24;
1300 a += src[2] << 16;
1301 a += src[1] << 8;
1302 a += src[0];
1303
1304 mix(a, b, c);
1305 return (c);
1306 }
1307
1308 static int
1309 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
1310 {
1311 struct carp_softc *vh;
1312 struct ifaddr *ifa;
1313 int count = 0;
1314
1315 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1316 if ((type == CARP_COUNT_RUNNING &&
1317 (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1318 (IFF_UP|IFF_RUNNING)) ||
1319 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1320 IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1321 if (ifa->ifa_addr->sa_family == AF_INET &&
1322 ia->ia_addr.sin_addr.s_addr ==
1323 ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1324 count++;
1325 }
1326 }
1327 }
1328 return (count);
1329 }
1330
1331 int
1332 carp_iamatch(struct in_ifaddr *ia, u_char *src,
1333 u_int32_t *count, u_int32_t index)
1334 {
1335 struct carp_softc *sc = ia->ia_ifp->if_softc;
1336
1337 if (carp_opts[CARPCTL_ARPBALANCE]) {
1338 /*
1339 * We use the source ip to decide which virtual host should
1340 * handle the request. If we're master of that virtual host,
1341 * then we respond, otherwise, just drop the arp packet on
1342 * the floor.
1343 */
1344
1345 /* Count the elegible carp interfaces with this address */
1346 if (*count == 0)
1347 *count = carp_addrcount(
1348 (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp,
1349 ia, CARP_COUNT_RUNNING);
1350
1351 /* This should never happen, but... */
1352 if (*count == 0)
1353 return (0);
1354
1355 if (carp_hash(sc, src) % *count == index - 1 &&
1356 sc->sc_state == MASTER) {
1357 return (1);
1358 }
1359 } else {
1360 if (sc->sc_state == MASTER)
1361 return (1);
1362 }
1363
1364 return (0);
1365 }
1366
1367 #ifdef INET6
1368 struct ifaddr *
1369 carp_iamatch6(void *v, struct in6_addr *taddr)
1370 {
1371 struct carp_if *cif = v;
1372 struct carp_softc *vh;
1373 struct ifaddr *ifa;
1374
1375 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1376 IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1377 if (IN6_ARE_ADDR_EQUAL(taddr,
1378 &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1379 ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1380 (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER)
1381 return (ifa);
1382 }
1383 }
1384
1385 return (NULL);
1386 }
1387 #endif /* INET6 */
1388
1389 struct ifnet *
1390 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src)
1391 {
1392 struct carp_if *cif = (struct carp_if *)v;
1393 struct carp_softc *vh;
1394 u_int8_t *ena;
1395
1396 if (src)
1397 ena = (u_int8_t *)&eh->ether_shost;
1398 else
1399 ena = (u_int8_t *)&eh->ether_dhost;
1400
1401 switch (iftype) {
1402 case IFT_ETHER:
1403 case IFT_FDDI:
1404 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1405 return (NULL);
1406 break;
1407 case IFT_ISO88025:
1408 if (ena[0] != 3 || ena[1] || ena[4] || ena[5])
1409 return (NULL);
1410 break;
1411 default:
1412 return (NULL);
1413 break;
1414 }
1415
1416 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
1417 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1418 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
1419 !memcmp(ena, CLLADDR(vh->sc_if.if_sadl),
1420 ETHER_ADDR_LEN)) {
1421 return (&vh->sc_if);
1422 }
1423
1424 return (NULL);
1425 }
1426
1427 int
1428 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1429 {
1430 struct ether_header eh;
1431 struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp;
1432 struct ifnet *ifp;
1433
1434 memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost));
1435 memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost));
1436 eh.ether_type = etype;
1437
1438 if (m->m_flags & (M_BCAST|M_MCAST)) {
1439 struct carp_softc *vh;
1440 struct mbuf *m0;
1441
1442 /*
1443 * XXX Should really check the list of multicast addresses
1444 * for each CARP interface _before_ copying.
1445 */
1446 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1447 m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1448 if (m0 == NULL)
1449 continue;
1450 m_set_rcvif(m0, &vh->sc_if);
1451 ether_input(&vh->sc_if, m0);
1452 }
1453 return (1);
1454 }
1455
1456 ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0);
1457 if (ifp == NULL) {
1458 return (1);
1459 }
1460
1461 m_set_rcvif(m, ifp);
1462
1463 bpf_mtap(ifp, m);
1464 ifp->if_ipackets++;
1465 ether_input(ifp, m);
1466 return (0);
1467 }
1468
1469 static void
1470 carp_master_down(void *v)
1471 {
1472 struct carp_softc *sc = v;
1473
1474 switch (sc->sc_state) {
1475 case INIT:
1476 printf("%s: master_down event in INIT state\n",
1477 sc->sc_if.if_xname);
1478 break;
1479 case MASTER:
1480 break;
1481 case BACKUP:
1482 CARP_LOG(sc, ("INIT -> MASTER (preempting)"));
1483 carp_set_state(sc, MASTER);
1484 carp_send_ad(sc);
1485 carp_send_arp(sc);
1486 #ifdef INET6
1487 carp_send_na(sc);
1488 #endif /* INET6 */
1489 carp_setrun(sc, 0);
1490 carp_setroute(sc, RTM_ADD);
1491 break;
1492 }
1493 }
1494
1495 /*
1496 * When in backup state, af indicates whether to reset the master down timer
1497 * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1498 */
1499 static void
1500 carp_setrun(struct carp_softc *sc, sa_family_t af)
1501 {
1502 struct timeval tv;
1503
1504 if (sc->sc_carpdev == NULL) {
1505 sc->sc_if.if_flags &= ~IFF_RUNNING;
1506 carp_set_state(sc, INIT);
1507 return;
1508 }
1509
1510 if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 &&
1511 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1512 sc->sc_if.if_flags |= IFF_RUNNING;
1513 } else {
1514 sc->sc_if.if_flags &= ~IFF_RUNNING;
1515 carp_setroute(sc, RTM_DELETE);
1516 return;
1517 }
1518
1519 switch (sc->sc_state) {
1520 case INIT:
1521 carp_set_state(sc, BACKUP);
1522 carp_setroute(sc, RTM_DELETE);
1523 carp_setrun(sc, 0);
1524 break;
1525 case BACKUP:
1526 callout_stop(&sc->sc_ad_tmo);
1527 tv.tv_sec = 3 * sc->sc_advbase;
1528 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1529 switch (af) {
1530 #ifdef INET
1531 case AF_INET:
1532 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1533 break;
1534 #endif /* INET */
1535 #ifdef INET6
1536 case AF_INET6:
1537 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1538 break;
1539 #endif /* INET6 */
1540 default:
1541 if (sc->sc_naddrs)
1542 callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1543 #ifdef INET6
1544 if (sc->sc_naddrs6)
1545 callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1546 #endif /* INET6 */
1547 break;
1548 }
1549 break;
1550 case MASTER:
1551 tv.tv_sec = sc->sc_advbase;
1552 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1553 callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1554 break;
1555 }
1556 }
1557
1558 static void
1559 carp_multicast_cleanup(struct carp_softc *sc)
1560 {
1561 struct ip_moptions *imo = &sc->sc_imo;
1562 #ifdef INET6
1563 struct ip6_moptions *im6o = &sc->sc_im6o;
1564 #endif
1565 u_int16_t n = imo->imo_num_memberships;
1566
1567 /* Clean up our own multicast memberships */
1568 while (n-- > 0) {
1569 if (imo->imo_membership[n] != NULL) {
1570 in_delmulti(imo->imo_membership[n]);
1571 imo->imo_membership[n] = NULL;
1572 }
1573 }
1574 imo->imo_num_memberships = 0;
1575 imo->imo_multicast_if_index = 0;
1576
1577 #ifdef INET6
1578 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1579 struct in6_multi_mship *imm =
1580 LIST_FIRST(&im6o->im6o_memberships);
1581
1582 LIST_REMOVE(imm, i6mm_chain);
1583 in6_leavegroup(imm);
1584 }
1585 im6o->im6o_multicast_if_index = 0;
1586 #endif
1587
1588 /* And any other multicast memberships */
1589 carp_ether_purgemulti(sc);
1590 }
1591
1592 static int
1593 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1594 {
1595 struct carp_if *cif, *ncif = NULL;
1596 struct carp_softc *vr, *after = NULL;
1597 int myself = 0, error = 0;
1598 int s;
1599
1600 if (ifp == sc->sc_carpdev)
1601 return (0);
1602
1603 if (ifp != NULL) {
1604 if ((ifp->if_flags & IFF_MULTICAST) == 0)
1605 return (EADDRNOTAVAIL);
1606
1607 if (ifp->if_type == IFT_CARP)
1608 return (EINVAL);
1609
1610 if (ifp->if_carp == NULL) {
1611 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT);
1612 if (ncif == NULL)
1613 return (ENOBUFS);
1614 if ((error = ifpromisc(ifp, 1))) {
1615 free(ncif, M_IFADDR);
1616 return (error);
1617 }
1618
1619 ncif->vhif_ifp = ifp;
1620 TAILQ_INIT(&ncif->vhif_vrs);
1621 } else {
1622 cif = (struct carp_if *)ifp->if_carp;
1623 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1624 if (vr != sc && vr->sc_vhid == sc->sc_vhid)
1625 return (EINVAL);
1626 }
1627
1628 /* detach from old interface */
1629 if (sc->sc_carpdev != NULL)
1630 carpdetach(sc);
1631
1632 /* join multicast groups */
1633 if (sc->sc_naddrs < 0 &&
1634 (error = carp_join_multicast(sc)) != 0) {
1635 if (ncif != NULL)
1636 free(ncif, M_IFADDR);
1637 return (error);
1638 }
1639
1640 #ifdef INET6
1641 if (sc->sc_naddrs6 < 0 &&
1642 (error = carp_join_multicast6(sc)) != 0) {
1643 if (ncif != NULL)
1644 free(ncif, M_IFADDR);
1645 carp_multicast_cleanup(sc);
1646 return (error);
1647 }
1648 #endif
1649
1650 /* attach carp interface to physical interface */
1651 if (ncif != NULL)
1652 ifp->if_carp = (void *)ncif;
1653 sc->sc_carpdev = ifp;
1654 sc->sc_if.if_capabilities = ifp->if_capabilities &
1655 (IFCAP_TSOv4 | IFCAP_TSOv6 |
1656 IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx|
1657 IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx|
1658 IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx|
1659 IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx|
1660 IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx);
1661
1662 cif = (struct carp_if *)ifp->if_carp;
1663 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1664 if (vr == sc)
1665 myself = 1;
1666 if (vr->sc_vhid < sc->sc_vhid)
1667 after = vr;
1668 }
1669
1670 if (!myself) {
1671 /* We're trying to keep things in order */
1672 if (after == NULL) {
1673 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1674 } else {
1675 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1676 sc, sc_list);
1677 }
1678 cif->vhif_nvrs++;
1679 }
1680 if (sc->sc_naddrs || sc->sc_naddrs6)
1681 sc->sc_if.if_flags |= IFF_UP;
1682 carp_set_enaddr(sc);
1683 KERNEL_LOCK(1, NULL);
1684 s = splnet();
1685 /* XXX linkstatehooks establish */
1686 carp_carpdev_state(ifp);
1687 splx(s);
1688 KERNEL_UNLOCK_ONE(NULL);
1689 } else {
1690 carpdetach(sc);
1691 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1692 }
1693 return (0);
1694 }
1695
1696 static void
1697 carp_set_enaddr(struct carp_softc *sc)
1698 {
1699 uint8_t enaddr[ETHER_ADDR_LEN];
1700 if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) {
1701 enaddr[0] = 3;
1702 enaddr[1] = 0;
1703 enaddr[2] = 0x40 >> (sc->sc_vhid - 1);
1704 enaddr[3] = 0x40000 >> (sc->sc_vhid - 1);
1705 enaddr[4] = 0;
1706 enaddr[5] = 0;
1707 } else {
1708 enaddr[0] = 0;
1709 enaddr[1] = 0;
1710 enaddr[2] = 0x5e;
1711 enaddr[3] = 0;
1712 enaddr[4] = 1;
1713 enaddr[5] = sc->sc_vhid;
1714 }
1715 if_set_sadl(&sc->sc_if, enaddr, sizeof(enaddr), false);
1716 }
1717
1718 #if 0
1719 static void
1720 carp_addr_updated(void *v)
1721 {
1722 struct carp_softc *sc = (struct carp_softc *) v;
1723 struct ifaddr *ifa;
1724 int new_naddrs = 0, new_naddrs6 = 0;
1725
1726 IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1727 if (ifa->ifa_addr->sa_family == AF_INET)
1728 new_naddrs++;
1729 else if (ifa->ifa_addr->sa_family == AF_INET6)
1730 new_naddrs6++;
1731 }
1732
1733 /* Handle a callback after SIOCDIFADDR */
1734 if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
1735 struct in_addr mc_addr;
1736
1737 sc->sc_naddrs = new_naddrs;
1738 sc->sc_naddrs6 = new_naddrs6;
1739
1740 /* Re-establish multicast membership removed by in_control */
1741 mc_addr.s_addr = INADDR_CARP_GROUP;
1742 if (!in_multi_group(mc_addr, &sc->sc_if, 0)) {
1743 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1744
1745 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1746 carp_join_multicast(sc);
1747 }
1748
1749 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1750 sc->sc_if.if_flags &= ~IFF_UP;
1751 carp_set_state(sc, INIT);
1752 } else
1753 carp_hmac_prepare(sc);
1754 }
1755
1756 carp_setrun(sc, 0);
1757 }
1758 #endif
1759
1760 static int
1761 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1762 {
1763 struct ifnet *ifp = sc->sc_carpdev;
1764 struct in_ifaddr *ia, *ia_if;
1765 int error = 0;
1766 int s;
1767
1768 if (sin->sin_addr.s_addr == 0) {
1769 if (!(sc->sc_if.if_flags & IFF_UP))
1770 carp_set_state(sc, INIT);
1771 if (sc->sc_naddrs)
1772 sc->sc_if.if_flags |= IFF_UP;
1773 carp_setrun(sc, 0);
1774 return (0);
1775 }
1776
1777 /* we have to do this by hand to ensure we don't match on ourselves */
1778 ia_if = NULL;
1779 s = pserialize_read_enter();
1780 IN_ADDRLIST_READER_FOREACH(ia) {
1781 /* and, yeah, we need a multicast-capable iface too */
1782 if (ia->ia_ifp != &sc->sc_if &&
1783 ia->ia_ifp->if_type != IFT_CARP &&
1784 (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1785 (sin->sin_addr.s_addr & ia->ia_subnetmask) ==
1786 ia->ia_subnet) {
1787 if (!ia_if)
1788 ia_if = ia;
1789 }
1790 }
1791
1792 if (ia_if) {
1793 ia = ia_if;
1794 if (ifp) {
1795 if (ifp != ia->ia_ifp)
1796 return (EADDRNOTAVAIL);
1797 } else {
1798 /* FIXME NOMPSAFE */
1799 ifp = ia->ia_ifp;
1800 }
1801 }
1802 pserialize_read_exit(s);
1803
1804 if ((error = carp_set_ifp(sc, ifp)))
1805 return (error);
1806
1807 if (sc->sc_carpdev == NULL)
1808 return (EADDRNOTAVAIL);
1809
1810 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1811 return (error);
1812
1813 sc->sc_naddrs++;
1814 if (sc->sc_carpdev != NULL)
1815 sc->sc_if.if_flags |= IFF_UP;
1816
1817 carp_set_state(sc, INIT);
1818 carp_setrun(sc, 0);
1819
1820 /*
1821 * Hook if_addrhooks so that we get a callback after in_ifinit has run,
1822 * to correct any inappropriate routes that it inserted.
1823 */
1824 if (sc->ah_cookie == 0) {
1825 /* XXX link address hook */
1826 }
1827
1828 return (0);
1829 }
1830
1831 static int
1832 carp_join_multicast(struct carp_softc *sc)
1833 {
1834 struct ip_moptions *imo = &sc->sc_imo, tmpimo;
1835 struct in_addr addr;
1836
1837 memset(&tmpimo, 0, sizeof(tmpimo));
1838 addr.s_addr = INADDR_CARP_GROUP;
1839 if ((tmpimo.imo_membership[0] =
1840 in_addmulti(&addr, &sc->sc_if)) == NULL) {
1841 return (ENOBUFS);
1842 }
1843
1844 imo->imo_membership[0] = tmpimo.imo_membership[0];
1845 imo->imo_num_memberships = 1;
1846 imo->imo_multicast_if_index = sc->sc_if.if_index;
1847 imo->imo_multicast_ttl = CARP_DFLTTL;
1848 imo->imo_multicast_loop = 0;
1849 return (0);
1850 }
1851
1852
1853 #ifdef INET6
1854 static int
1855 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1856 {
1857 struct ifnet *ifp = sc->sc_carpdev;
1858 struct in6_ifaddr *ia, *ia_if;
1859 int error = 0;
1860 int s;
1861
1862 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1863 if (!(sc->sc_if.if_flags & IFF_UP))
1864 carp_set_state(sc, INIT);
1865 if (sc->sc_naddrs6)
1866 sc->sc_if.if_flags |= IFF_UP;
1867 carp_setrun(sc, 0);
1868 return (0);
1869 }
1870
1871 /* we have to do this by hand to ensure we don't match on ourselves */
1872 ia_if = NULL;
1873 s = pserialize_read_enter();
1874 IN6_ADDRLIST_READER_FOREACH(ia) {
1875 int i;
1876
1877 for (i = 0; i < 4; i++) {
1878 if ((sin6->sin6_addr.s6_addr32[i] &
1879 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1880 (ia->ia_addr.sin6_addr.s6_addr32[i] &
1881 ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1882 break;
1883 }
1884 /* and, yeah, we need a multicast-capable iface too */
1885 if (ia->ia_ifp != &sc->sc_if &&
1886 ia->ia_ifp->if_type != IFT_CARP &&
1887 (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1888 (i == 4)) {
1889 if (!ia_if)
1890 ia_if = ia;
1891 }
1892 }
1893 pserialize_read_exit(s);
1894
1895 if (ia_if) {
1896 ia = ia_if;
1897 if (sc->sc_carpdev) {
1898 if (sc->sc_carpdev != ia->ia_ifp)
1899 return (EADDRNOTAVAIL);
1900 } else {
1901 ifp = ia->ia_ifp;
1902 }
1903 }
1904
1905 if ((error = carp_set_ifp(sc, ifp)))
1906 return (error);
1907
1908 if (sc->sc_carpdev == NULL)
1909 return (EADDRNOTAVAIL);
1910
1911 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1912 return (error);
1913
1914 sc->sc_naddrs6++;
1915 if (sc->sc_carpdev != NULL)
1916 sc->sc_if.if_flags |= IFF_UP;
1917 carp_set_state(sc, INIT);
1918 carp_setrun(sc, 0);
1919
1920 return (0);
1921 }
1922
1923 static int
1924 carp_join_multicast6(struct carp_softc *sc)
1925 {
1926 struct in6_multi_mship *imm, *imm2;
1927 struct ip6_moptions *im6o = &sc->sc_im6o;
1928 struct sockaddr_in6 addr6;
1929 int error;
1930
1931 /* Join IPv6 CARP multicast group */
1932 memset(&addr6, 0, sizeof(addr6));
1933 addr6.sin6_family = AF_INET6;
1934 addr6.sin6_len = sizeof(addr6);
1935 addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1936 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1937 addr6.sin6_addr.s6_addr8[15] = 0x12;
1938 if ((imm = in6_joingroup(&sc->sc_if,
1939 &addr6.sin6_addr, &error, 0)) == NULL) {
1940 return (error);
1941 }
1942 /* join solicited multicast address */
1943 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1944 addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1945 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1946 addr6.sin6_addr.s6_addr32[1] = 0;
1947 addr6.sin6_addr.s6_addr32[2] = htonl(1);
1948 addr6.sin6_addr.s6_addr32[3] = 0;
1949 addr6.sin6_addr.s6_addr8[12] = 0xff;
1950 if ((imm2 = in6_joingroup(&sc->sc_if,
1951 &addr6.sin6_addr, &error, 0)) == NULL) {
1952 in6_leavegroup(imm);
1953 return (error);
1954 }
1955
1956 /* apply v6 multicast membership */
1957 im6o->im6o_multicast_if_index = sc->sc_if.if_index;
1958 if (imm)
1959 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
1960 i6mm_chain);
1961 if (imm2)
1962 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
1963 i6mm_chain);
1964
1965 return (0);
1966 }
1967
1968 #endif /* INET6 */
1969
1970 static int
1971 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1972 {
1973 struct lwp *l = curlwp; /* XXX */
1974 struct carp_softc *sc = ifp->if_softc, *vr;
1975 struct carpreq carpr;
1976 struct ifaddr *ifa;
1977 struct ifreq *ifr;
1978 struct ifnet *cdev = NULL;
1979 int error = 0;
1980
1981 ifa = (struct ifaddr *)data;
1982 ifr = (struct ifreq *)data;
1983
1984 switch (cmd) {
1985 case SIOCINITIFADDR:
1986 switch (ifa->ifa_addr->sa_family) {
1987 #ifdef INET
1988 case AF_INET:
1989 sc->sc_if.if_flags |= IFF_UP;
1990 memcpy(ifa->ifa_dstaddr, ifa->ifa_addr,
1991 sizeof(struct sockaddr));
1992 error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1993 break;
1994 #endif /* INET */
1995 #ifdef INET6
1996 case AF_INET6:
1997 sc->sc_if.if_flags|= IFF_UP;
1998 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1999 break;
2000 #endif /* INET6 */
2001 default:
2002 error = EAFNOSUPPORT;
2003 break;
2004 }
2005 break;
2006
2007 case SIOCSIFFLAGS:
2008 if ((error = ifioctl_common(ifp, cmd, data)) != 0)
2009 break;
2010 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2011 callout_stop(&sc->sc_ad_tmo);
2012 callout_stop(&sc->sc_md_tmo);
2013 callout_stop(&sc->sc_md6_tmo);
2014 if (sc->sc_state == MASTER) {
2015 /* we need the interface up to bow out */
2016 sc->sc_if.if_flags |= IFF_UP;
2017 sc->sc_bow_out = 1;
2018 carp_send_ad(sc);
2019 }
2020 sc->sc_if.if_flags &= ~IFF_UP;
2021 carp_set_state(sc, INIT);
2022 carp_setrun(sc, 0);
2023 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
2024 sc->sc_if.if_flags |= IFF_UP;
2025 carp_setrun(sc, 0);
2026 }
2027 break;
2028
2029 case SIOCSVH:
2030 if (l == NULL)
2031 break;
2032 if ((error = kauth_authorize_network(l->l_cred,
2033 KAUTH_NETWORK_INTERFACE,
2034 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2035 NULL)) != 0)
2036 break;
2037 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2038 break;
2039 error = 1;
2040 if (carpr.carpr_carpdev[0] != '\0' &&
2041 (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
2042 return (EINVAL);
2043 if ((error = carp_set_ifp(sc, cdev)))
2044 return (error);
2045 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
2046 switch (carpr.carpr_state) {
2047 case BACKUP:
2048 callout_stop(&sc->sc_ad_tmo);
2049 carp_set_state(sc, BACKUP);
2050 carp_setrun(sc, 0);
2051 carp_setroute(sc, RTM_DELETE);
2052 break;
2053 case MASTER:
2054 carp_master_down(sc);
2055 break;
2056 default:
2057 break;
2058 }
2059 }
2060 if (carpr.carpr_vhid > 0) {
2061 if (carpr.carpr_vhid > 255) {
2062 error = EINVAL;
2063 break;
2064 }
2065 if (sc->sc_carpdev) {
2066 struct carp_if *cif;
2067 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2068 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
2069 if (vr != sc &&
2070 vr->sc_vhid == carpr.carpr_vhid)
2071 return (EINVAL);
2072 }
2073 sc->sc_vhid = carpr.carpr_vhid;
2074 carp_set_enaddr(sc);
2075 carp_set_state(sc, INIT);
2076 error--;
2077 }
2078 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
2079 if (carpr.carpr_advskew > 254) {
2080 error = EINVAL;
2081 break;
2082 }
2083 if (carpr.carpr_advbase > 255) {
2084 error = EINVAL;
2085 break;
2086 }
2087 sc->sc_advbase = carpr.carpr_advbase;
2088 sc->sc_advskew = carpr.carpr_advskew;
2089 error--;
2090 }
2091 memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key));
2092 if (error > 0)
2093 error = EINVAL;
2094 else {
2095 error = 0;
2096 carp_setrun(sc, 0);
2097 }
2098 break;
2099
2100 case SIOCGVH:
2101 memset(&carpr, 0, sizeof(carpr));
2102 if (sc->sc_carpdev != NULL)
2103 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2104 IFNAMSIZ);
2105 carpr.carpr_state = sc->sc_state;
2106 carpr.carpr_vhid = sc->sc_vhid;
2107 carpr.carpr_advbase = sc->sc_advbase;
2108 carpr.carpr_advskew = sc->sc_advskew;
2109
2110 if ((l != NULL) && (error = kauth_authorize_network(l->l_cred,
2111 KAUTH_NETWORK_INTERFACE,
2112 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2113 NULL)) == 0)
2114 memcpy(carpr.carpr_key, sc->sc_key,
2115 sizeof(carpr.carpr_key));
2116 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2117 break;
2118
2119 case SIOCADDMULTI:
2120 error = carp_ether_addmulti(sc, ifr);
2121 break;
2122
2123 case SIOCDELMULTI:
2124 error = carp_ether_delmulti(sc, ifr);
2125 break;
2126
2127 case SIOCSIFCAP:
2128 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
2129 error = 0;
2130 break;
2131
2132 default:
2133 error = ether_ioctl(ifp, cmd, data);
2134 }
2135
2136 carp_hmac_prepare(sc);
2137 return (error);
2138 }
2139
2140
2141 /*
2142 * Start output on carp interface. This function should never be called.
2143 */
2144 static void
2145 carp_start(struct ifnet *ifp)
2146 {
2147 #ifdef DEBUG
2148 printf("%s: start called\n", ifp->if_xname);
2149 #endif
2150 }
2151
2152 int
2153 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
2154 const struct rtentry *rt)
2155 {
2156 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2157 KASSERT(KERNEL_LOCKED_P());
2158
2159 if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) {
2160 return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt);
2161 } else {
2162 m_freem(m);
2163 return (ENETUNREACH);
2164 }
2165 }
2166
2167 static void
2168 carp_set_state(struct carp_softc *sc, int state)
2169 {
2170 static const char *carp_states[] = { CARP_STATES };
2171 if (sc->sc_state == state)
2172 return;
2173
2174 CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state]));
2175
2176 sc->sc_state = state;
2177 switch (state) {
2178 case BACKUP:
2179 sc->sc_if.if_link_state = LINK_STATE_DOWN;
2180 break;
2181 case MASTER:
2182 sc->sc_if.if_link_state = LINK_STATE_UP;
2183 break;
2184 default:
2185 sc->sc_if.if_link_state = LINK_STATE_UNKNOWN;
2186 break;
2187 }
2188 rt_ifmsg(&sc->sc_if);
2189 }
2190
2191 void
2192 carp_carpdev_state(void *v)
2193 {
2194 struct carp_if *cif;
2195 struct carp_softc *sc;
2196 struct ifnet *ifp = v;
2197
2198 if (ifp->if_type == IFT_CARP)
2199 return;
2200
2201 cif = (struct carp_if *)ifp->if_carp;
2202
2203 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2204 int suppressed = sc->sc_suppress;
2205
2206 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2207 !(sc->sc_carpdev->if_flags & IFF_UP)) {
2208 sc->sc_if.if_flags &= ~IFF_RUNNING;
2209 callout_stop(&sc->sc_ad_tmo);
2210 callout_stop(&sc->sc_md_tmo);
2211 callout_stop(&sc->sc_md6_tmo);
2212 carp_set_state(sc, INIT);
2213 sc->sc_suppress = 1;
2214 carp_setrun(sc, 0);
2215 if (!suppressed) {
2216 carp_suppress_preempt++;
2217 if (carp_suppress_preempt == 1)
2218 carp_send_ad_all();
2219 }
2220 } else {
2221 carp_set_state(sc, INIT);
2222 sc->sc_suppress = 0;
2223 carp_setrun(sc, 0);
2224 if (suppressed)
2225 carp_suppress_preempt--;
2226 }
2227 }
2228 }
2229
2230 static int
2231 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2232 {
2233 const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr);
2234 struct ifnet *ifp;
2235 struct carp_mc_entry *mc;
2236 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2237 int error;
2238
2239 ifp = sc->sc_carpdev;
2240 if (ifp == NULL)
2241 return (EINVAL);
2242
2243 error = ether_addmulti(sa, &sc->sc_ac);
2244 if (error != ENETRESET)
2245 return (error);
2246
2247 /*
2248 * This is new multicast address. We have to tell parent
2249 * about it. Also, remember this multicast address so that
2250 * we can delete them on unconfigure.
2251 */
2252 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2253 if (mc == NULL) {
2254 error = ENOMEM;
2255 goto alloc_failed;
2256 }
2257
2258 /*
2259 * As ether_addmulti() returns ENETRESET, following two
2260 * statement shouldn't fail.
2261 */
2262 (void)ether_multiaddr(sa, addrlo, addrhi);
2263 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2264 memcpy(&mc->mc_addr, sa, sa->sa_len);
2265 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2266
2267 error = if_mcast_op(ifp, SIOCADDMULTI, sa);
2268 if (error != 0)
2269 goto ioctl_failed;
2270
2271 return (error);
2272
2273 ioctl_failed:
2274 LIST_REMOVE(mc, mc_entries);
2275 free(mc, M_DEVBUF);
2276 alloc_failed:
2277 (void)ether_delmulti(sa, &sc->sc_ac);
2278
2279 return (error);
2280 }
2281
2282 static int
2283 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2284 {
2285 const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr);
2286 struct ifnet *ifp;
2287 struct ether_multi *enm;
2288 struct carp_mc_entry *mc;
2289 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2290 int error;
2291
2292 ifp = sc->sc_carpdev;
2293 if (ifp == NULL)
2294 return (EINVAL);
2295
2296 /*
2297 * Find a key to lookup carp_mc_entry. We have to do this
2298 * before calling ether_delmulti for obvious reason.
2299 */
2300 if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0)
2301 return (error);
2302 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2303 if (enm == NULL)
2304 return (EINVAL);
2305
2306 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2307 if (mc->mc_enm == enm)
2308 break;
2309
2310 /* We won't delete entries we didn't add */
2311 if (mc == NULL)
2312 return (EINVAL);
2313
2314 error = ether_delmulti(sa, &sc->sc_ac);
2315 if (error != ENETRESET)
2316 return (error);
2317
2318 /* We no longer use this multicast address. Tell parent so. */
2319 error = if_mcast_op(ifp, SIOCDELMULTI, sa);
2320 if (error == 0) {
2321 /* And forget about this address. */
2322 LIST_REMOVE(mc, mc_entries);
2323 free(mc, M_DEVBUF);
2324 } else
2325 (void)ether_addmulti(sa, &sc->sc_ac);
2326 return (error);
2327 }
2328
2329 /*
2330 * Delete any multicast address we have asked to add from parent
2331 * interface. Called when the carp is being unconfigured.
2332 */
2333 static void
2334 carp_ether_purgemulti(struct carp_softc *sc)
2335 {
2336 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */
2337 struct carp_mc_entry *mc;
2338
2339 if (ifp == NULL)
2340 return;
2341
2342 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2343 (void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr));
2344 LIST_REMOVE(mc, mc_entries);
2345 free(mc, M_DEVBUF);
2346 }
2347 }
2348
2349 static int
2350 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS)
2351 {
2352
2353 return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS));
2354 }
2355
2356 void
2357 carp_init(void)
2358 {
2359
2360 sysctl_net_inet_carp_setup(NULL);
2361 #ifdef MBUFTRACE
2362 MOWNER_ATTACH(&carp_proto_mowner_rx);
2363 MOWNER_ATTACH(&carp_proto_mowner_tx);
2364 MOWNER_ATTACH(&carp_proto6_mowner_rx);
2365 MOWNER_ATTACH(&carp_proto6_mowner_tx);
2366 #endif
2367
2368 carp_wqinput = wqinput_create("carp", _carp_proto_input);
2369 #ifdef INET6
2370 carp6_wqinput = wqinput_create("carp6", _carp6_proto_input);
2371 #endif
2372 }
2373
2374 static void
2375 sysctl_net_inet_carp_setup(struct sysctllog **clog)
2376 {
2377
2378 sysctl_createv(clog, 0, NULL, NULL,
2379 CTLFLAG_PERMANENT,
2380 CTLTYPE_NODE, "inet", NULL,
2381 NULL, 0, NULL, 0,
2382 CTL_NET, PF_INET, CTL_EOL);
2383 sysctl_createv(clog, 0, NULL, NULL,
2384 CTLFLAG_PERMANENT,
2385 CTLTYPE_NODE, "carp",
2386 SYSCTL_DESCR("CARP related settings"),
2387 NULL, 0, NULL, 0,
2388 CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL);
2389
2390 sysctl_createv(clog, 0, NULL, NULL,
2391 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2392 CTLTYPE_INT, "preempt",
2393 SYSCTL_DESCR("Enable CARP Preempt"),
2394 NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0,
2395 CTL_NET, PF_INET, IPPROTO_CARP,
2396 CTL_CREATE, CTL_EOL);
2397 sysctl_createv(clog, 0, NULL, NULL,
2398 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2399 CTLTYPE_INT, "arpbalance",
2400 SYSCTL_DESCR("Enable ARP balancing"),
2401 NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0,
2402 CTL_NET, PF_INET, IPPROTO_CARP,
2403 CTL_CREATE, CTL_EOL);
2404 sysctl_createv(clog, 0, NULL, NULL,
2405 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2406 CTLTYPE_INT, "allow",
2407 SYSCTL_DESCR("Enable CARP"),
2408 NULL, 0, &carp_opts[CARPCTL_ALLOW], 0,
2409 CTL_NET, PF_INET, IPPROTO_CARP,
2410 CTL_CREATE, CTL_EOL);
2411 sysctl_createv(clog, 0, NULL, NULL,
2412 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2413 CTLTYPE_INT, "log",
2414 SYSCTL_DESCR("CARP logging"),
2415 NULL, 0, &carp_opts[CARPCTL_LOG], 0,
2416 CTL_NET, PF_INET, IPPROTO_CARP,
2417 CTL_CREATE, CTL_EOL);
2418 sysctl_createv(clog, 0, NULL, NULL,
2419 CTLFLAG_PERMANENT,
2420 CTLTYPE_STRUCT, "stats",
2421 SYSCTL_DESCR("CARP statistics"),
2422 sysctl_net_inet_carp_stats, 0, NULL, 0,
2423 CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS,
2424 CTL_EOL);
2425 }
2426