Home | History | Annotate | Line # | Download | only in netinet6
mld6.c revision 1.72
      1 /*	$NetBSD: mld6.c,v 1.72 2016/07/08 04:33:30 ozaki-r Exp $	*/
      2 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
      3 
      4 /*
      5  * Copyright (C) 1998 WIDE Project.
      6  * All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. Neither the name of the project nor the names of its contributors
     17  *    may be used to endorse or promote products derived from this software
     18  *    without specific prior written permission.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     30  * SUCH DAMAGE.
     31  */
     32 
     33 /*
     34  * Copyright (c) 1992, 1993
     35  *	The Regents of the University of California.  All rights reserved.
     36  *
     37  * This code is derived from software contributed to Berkeley by
     38  * Stephen Deering of Stanford University.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  * 3. Neither the name of the University nor the names of its contributors
     49  *    may be used to endorse or promote products derived from this software
     50  *    without specific prior written permission.
     51  *
     52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     62  * SUCH DAMAGE.
     63  *
     64  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
     65  */
     66 
     67 /*
     68  * Copyright (c) 1988 Stephen Deering.
     69  *
     70  * This code is derived from software contributed to Berkeley by
     71  * Stephen Deering of Stanford University.
     72  *
     73  * Redistribution and use in source and binary forms, with or without
     74  * modification, are permitted provided that the following conditions
     75  * are met:
     76  * 1. Redistributions of source code must retain the above copyright
     77  *    notice, this list of conditions and the following disclaimer.
     78  * 2. Redistributions in binary form must reproduce the above copyright
     79  *    notice, this list of conditions and the following disclaimer in the
     80  *    documentation and/or other materials provided with the distribution.
     81  * 3. All advertising materials mentioning features or use of this software
     82  *    must display the following acknowledgement:
     83  *	This product includes software developed by the University of
     84  *	California, Berkeley and its contributors.
     85  * 4. Neither the name of the University nor the names of its contributors
     86  *    may be used to endorse or promote products derived from this software
     87  *    without specific prior written permission.
     88  *
     89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     99  * SUCH DAMAGE.
    100  *
    101  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
    102  */
    103 
    104 #include <sys/cdefs.h>
    105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.72 2016/07/08 04:33:30 ozaki-r Exp $");
    106 
    107 #ifdef _KERNEL_OPT
    108 #include "opt_inet.h"
    109 #endif
    110 
    111 #include <sys/param.h>
    112 #include <sys/systm.h>
    113 #include <sys/mbuf.h>
    114 #include <sys/socket.h>
    115 #include <sys/socketvar.h>
    116 #include <sys/protosw.h>
    117 #include <sys/syslog.h>
    118 #include <sys/sysctl.h>
    119 #include <sys/kernel.h>
    120 #include <sys/callout.h>
    121 #include <sys/cprng.h>
    122 
    123 #include <net/if.h>
    124 
    125 #include <netinet/in.h>
    126 #include <netinet/in_var.h>
    127 #include <netinet6/in6_var.h>
    128 #include <netinet/ip6.h>
    129 #include <netinet6/ip6_var.h>
    130 #include <netinet6/scope6_var.h>
    131 #include <netinet/icmp6.h>
    132 #include <netinet6/icmp6_private.h>
    133 #include <netinet6/mld6_var.h>
    134 
    135 #include <net/net_osdep.h>
    136 
    137 
    138 /*
    139  * This structure is used to keep track of in6_multi chains which belong to
    140  * deleted interface addresses.
    141  */
    142 static LIST_HEAD(, multi6_kludge) in6_mk = LIST_HEAD_INITIALIZER(in6_mk);
    143 
    144 struct multi6_kludge {
    145 	LIST_ENTRY(multi6_kludge) mk_entry;
    146 	struct ifnet *mk_ifp;
    147 	struct in6_multihead mk_head;
    148 };
    149 
    150 
    151 /*
    152  * Protocol constants
    153  */
    154 
    155 /*
    156  * time between repetitions of a node's initial report of interest in a
    157  * multicast address(in seconds)
    158  */
    159 #define MLD_UNSOLICITED_REPORT_INTERVAL	10
    160 
    161 static struct ip6_pktopts ip6_opts;
    162 
    163 static void mld_start_listening(struct in6_multi *);
    164 static void mld_stop_listening(struct in6_multi *);
    165 
    166 static struct mld_hdr * mld_allocbuf(struct mbuf **, int, struct in6_multi *,
    167 	int);
    168 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
    169 static void mld_starttimer(struct in6_multi *);
    170 static void mld_stoptimer(struct in6_multi *);
    171 static u_long mld_timerresid(struct in6_multi *);
    172 
    173 void
    174 mld_init(void)
    175 {
    176 	static u_int8_t hbh_buf[8];
    177 	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
    178 	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
    179 
    180 	/* ip6h_nxt will be fill in later */
    181 	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
    182 
    183 	/* XXX: grotty hard coding... */
    184 	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
    185 	hbh_buf[3] = 0;
    186 	hbh_buf[4] = IP6OPT_RTALERT;
    187 	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
    188 	memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
    189 
    190 	ip6_opts.ip6po_hbh = hbh;
    191 	/* We will specify the hoplimit by a multicast option. */
    192 	ip6_opts.ip6po_hlim = -1;
    193 	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
    194 }
    195 
    196 static void
    197 mld_starttimer(struct in6_multi *in6m)
    198 {
    199 	struct timeval now;
    200 
    201 	KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
    202 
    203 	microtime(&now);
    204 	in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
    205 	in6m->in6m_timer_expire.tv_usec = now.tv_usec +
    206 	    (in6m->in6m_timer % hz) * (1000000 / hz);
    207 	if (in6m->in6m_timer_expire.tv_usec > 1000000) {
    208 		in6m->in6m_timer_expire.tv_sec++;
    209 		in6m->in6m_timer_expire.tv_usec -= 1000000;
    210 	}
    211 
    212 	/* start or restart the timer */
    213 	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
    214 }
    215 
    216 static void
    217 mld_stoptimer(struct in6_multi *in6m)
    218 {
    219 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
    220 		return;
    221 
    222 	callout_stop(&in6m->in6m_timer_ch);
    223 
    224 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
    225 }
    226 
    227 static void
    228 mld_timeo(void *arg)
    229 {
    230 	struct in6_multi *in6m = arg;
    231 
    232 	mutex_enter(softnet_lock);
    233 	KERNEL_LOCK(1, NULL);
    234 
    235 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
    236 		goto out;
    237 
    238 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
    239 
    240 	switch (in6m->in6m_state) {
    241 	case MLD_REPORTPENDING:
    242 		mld_start_listening(in6m);
    243 		break;
    244 	default:
    245 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
    246 		break;
    247 	}
    248 
    249 out:
    250 	KERNEL_UNLOCK_ONE(NULL);
    251 	mutex_exit(softnet_lock);
    252 }
    253 
    254 static u_long
    255 mld_timerresid(struct in6_multi *in6m)
    256 {
    257 	struct timeval now, diff;
    258 
    259 	microtime(&now);
    260 
    261 	if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
    262 	    (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
    263 	    now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
    264 		return (0);
    265 	}
    266 	diff = in6m->in6m_timer_expire;
    267 	diff.tv_sec -= now.tv_sec;
    268 	diff.tv_usec -= now.tv_usec;
    269 	if (diff.tv_usec < 0) {
    270 		diff.tv_sec--;
    271 		diff.tv_usec += 1000000;
    272 	}
    273 
    274 	/* return the remaining time in milliseconds */
    275 	return diff.tv_sec * 1000 + diff.tv_usec / 1000;
    276 }
    277 
    278 static void
    279 mld_start_listening(struct in6_multi *in6m)
    280 {
    281 	struct in6_addr all_in6;
    282 
    283 	/*
    284 	 * RFC2710 page 10:
    285 	 * The node never sends a Report or Done for the link-scope all-nodes
    286 	 * address.
    287 	 * MLD messages are never sent for multicast addresses whose scope is 0
    288 	 * (reserved) or 1 (node-local).
    289 	 */
    290 	all_in6 = in6addr_linklocal_allnodes;
    291 	if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
    292 		/* XXX: this should not happen! */
    293 		in6m->in6m_timer = 0;
    294 		in6m->in6m_state = MLD_OTHERLISTENER;
    295 	}
    296 	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
    297 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
    298 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
    299 		in6m->in6m_state = MLD_OTHERLISTENER;
    300 	} else {
    301 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
    302 		in6m->in6m_timer = cprng_fast32() %
    303 		    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
    304 		in6m->in6m_state = MLD_IREPORTEDLAST;
    305 
    306 		mld_starttimer(in6m);
    307 	}
    308 }
    309 
    310 static void
    311 mld_stop_listening(struct in6_multi *in6m)
    312 {
    313 	struct in6_addr allnode, allrouter;
    314 
    315 	allnode = in6addr_linklocal_allnodes;
    316 	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
    317 		/* XXX: this should not happen! */
    318 		return;
    319 	}
    320 	allrouter = in6addr_linklocal_allrouters;
    321 	if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
    322 		/* XXX impossible */
    323 		return;
    324 	}
    325 
    326 	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
    327 	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
    328 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
    329 	    IPV6_ADDR_SCOPE_INTFACELOCAL) {
    330 		mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
    331 	}
    332 }
    333 
    334 void
    335 mld_input(struct mbuf *m, int off)
    336 {
    337 	struct ip6_hdr *ip6;
    338 	struct mld_hdr *mldh;
    339 	struct ifnet *ifp;
    340 	struct in6_multi *in6m = NULL;
    341 	struct in6_addr mld_addr, all_in6;
    342 	struct in6_ifaddr *ia;
    343 	u_long timer = 0;	/* timer value in the MLD query header */
    344 	int s;
    345 
    346 	ifp = m_get_rcvif(m, &s);
    347 	IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
    348 	if (mldh == NULL) {
    349 		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
    350 		goto out_nodrop;
    351 	}
    352 
    353 	/* source address validation */
    354 	ip6 = mtod(m, struct ip6_hdr *);/* in case mpullup */
    355 	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
    356 		/*
    357 		 * RFC3590 allows the IPv6 unspecified address as the source
    358 		 * address of MLD report and done messages.  However, as this
    359 		 * same document says, this special rule is for snooping
    360 		 * switches and the RFC requires routers to discard MLD packets
    361 		 * with the unspecified source address.  The RFC only talks
    362 		 * about hosts receiving an MLD query or report in Security
    363 		 * Considerations, but this is probably the correct intention.
    364 		 * RFC3590 does not talk about other cases than link-local and
    365 		 * the unspecified source addresses, but we believe the same
    366 		 * rule should be applied.
    367 		 * As a result, we only allow link-local addresses as the
    368 		 * source address; otherwise, simply discard the packet.
    369 		 */
    370 #if 0
    371 		/*
    372 		 * XXX: do not log in an input path to avoid log flooding,
    373 		 * though RFC3590 says "SHOULD log" if the source of a query
    374 		 * is the unspecified address.
    375 		 */
    376 		log(LOG_INFO,
    377 		    "mld_input: src %s is not link-local (grp=%s)\n",
    378 		    ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&mldh->mld_addr));
    379 #endif
    380 		goto out;
    381 	}
    382 
    383 	/*
    384 	 * make a copy for local work (in6_setscope() may modify the 1st arg)
    385 	 */
    386 	mld_addr = mldh->mld_addr;
    387 	if (in6_setscope(&mld_addr, ifp, NULL)) {
    388 		/* XXX: this should not happen! */
    389 		goto out;
    390 	}
    391 
    392 	/*
    393 	 * In the MLD specification, there are 3 states and a flag.
    394 	 *
    395 	 * In Non-Listener state, we simply don't have a membership record.
    396 	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
    397 	 * In Idle Listener state, our timer is not running
    398 	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
    399 	 *
    400 	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
    401 	 * we have heard a report from another member, or MLD_IREPORTEDLAST
    402 	 * if we sent the last report.
    403 	 */
    404 	switch (mldh->mld_type) {
    405 	case MLD_LISTENER_QUERY:
    406 		if (ifp->if_flags & IFF_LOOPBACK)
    407 			break;
    408 
    409 		if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
    410 		    !IN6_IS_ADDR_MULTICAST(&mld_addr))
    411 			break;	/* print error or log stat? */
    412 
    413 		all_in6 = in6addr_linklocal_allnodes;
    414 		if (in6_setscope(&all_in6, ifp, NULL)) {
    415 			/* XXX: this should not happen! */
    416 			break;
    417 		}
    418 
    419 		/*
    420 		 * - Start the timers in all of our membership records
    421 		 *   that the query applies to for the interface on
    422 		 *   which the query arrived excl. those that belong
    423 		 *   to the "all-nodes" group (ff02::1).
    424 		 * - Restart any timer that is already running but has
    425 		 *   a value longer than the requested timeout.
    426 		 * - Use the value specified in the query message as
    427 		 *   the maximum timeout.
    428 		 */
    429 		timer = ntohs(mldh->mld_maxdelay);
    430 
    431 		ia = in6_get_ia_from_ifp(ifp);
    432 		if (ia == NULL)
    433 			break;
    434 
    435 		LIST_FOREACH(in6m, &ia->ia6_multiaddrs, in6m_entry) {
    436 			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
    437 			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
    438 			    IPV6_ADDR_SCOPE_LINKLOCAL)
    439 				continue;
    440 
    441 			if (in6m->in6m_state == MLD_REPORTPENDING)
    442 				continue; /* we are not yet ready */
    443 
    444 			if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
    445 			    !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
    446 				continue;
    447 
    448 			if (timer == 0) {
    449 				/* send a report immediately */
    450 				mld_stoptimer(in6m);
    451 				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
    452 				in6m->in6m_state = MLD_IREPORTEDLAST;
    453 			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
    454 			    mld_timerresid(in6m) > timer) {
    455 				in6m->in6m_timer =
    456 				   1 + (cprng_fast32() % timer) * hz / 1000;
    457 				mld_starttimer(in6m);
    458 			}
    459 		}
    460 		break;
    461 
    462 	case MLD_LISTENER_REPORT:
    463 		/*
    464 		 * For fast leave to work, we have to know that we are the
    465 		 * last person to send a report for this group.  Reports
    466 		 * can potentially get looped back if we are a multicast
    467 		 * router, so discard reports sourced by me.
    468 		 * Note that it is impossible to check IFF_LOOPBACK flag of
    469 		 * ifp for this purpose, since ip6_mloopback pass the physical
    470 		 * interface to looutput.
    471 		 */
    472 		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
    473 			break;
    474 
    475 		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
    476 			break;
    477 
    478 		/*
    479 		 * If we belong to the group being reported, stop
    480 		 * our timer for that group.
    481 		 */
    482 		IN6_LOOKUP_MULTI(mld_addr, ifp, in6m);
    483 		if (in6m) {
    484 			mld_stoptimer(in6m); /* transit to idle state */
    485 			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
    486 		}
    487 		break;
    488 	default:		/* this is impossible */
    489 #if 0
    490 		/*
    491 		 * this case should be impossible because of filtering in
    492 		 * icmp6_input().  But we explicitly disabled this part
    493 		 * just in case.
    494 		 */
    495 		log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
    496 #endif
    497 		break;
    498 	}
    499 
    500 out:
    501 	m_freem(m);
    502 out_nodrop:
    503 	m_put_rcvif(ifp, &s);
    504 }
    505 
    506 static void
    507 mld_sendpkt(struct in6_multi *in6m, int type,
    508 	const struct in6_addr *dst)
    509 {
    510 	struct mbuf *mh;
    511 	struct mld_hdr *mldh;
    512 	struct ip6_hdr *ip6 = NULL;
    513 	struct ip6_moptions im6o;
    514 	struct in6_ifaddr *ia = NULL;
    515 	struct ifnet *ifp = in6m->in6m_ifp;
    516 	int ignflags;
    517 
    518 	/*
    519 	 * At first, find a link local address on the outgoing interface
    520 	 * to use as the source address of the MLD packet.
    521 	 * We do not reject tentative addresses for MLD report to deal with
    522 	 * the case where we first join a link-local address.
    523 	 */
    524 	ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
    525 	if ((ia = in6ifa_ifpforlinklocal(ifp, ignflags)) == NULL)
    526 		return;
    527 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE))
    528 		ia = NULL;
    529 
    530 	/* Allocate two mbufs to store IPv6 header and MLD header */
    531 	mldh = mld_allocbuf(&mh, sizeof(struct mld_hdr), in6m, type);
    532 	if (mldh == NULL)
    533 		return;
    534 
    535 	/* fill src/dst here */
    536  	ip6 = mtod(mh, struct ip6_hdr *);
    537  	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
    538  	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
    539 
    540 	mldh->mld_addr = in6m->in6m_addr;
    541 	in6_clearscope(&mldh->mld_addr); /* XXX */
    542 	mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
    543 	    sizeof(struct mld_hdr));
    544 
    545 	/* construct multicast option */
    546 	memset(&im6o, 0, sizeof(im6o));
    547 	im6o.im6o_multicast_if_index = if_get_index(ifp);
    548 	im6o.im6o_multicast_hlim = 1;
    549 
    550 	/*
    551 	 * Request loopback of the report if we are acting as a multicast
    552 	 * router, so that the process-level routing daemon can hear it.
    553 	 */
    554 	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
    555 
    556 	/* increment output statictics */
    557 	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
    558 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
    559 	switch (type) {
    560 	case MLD_LISTENER_QUERY:
    561 		icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
    562 		break;
    563 	case MLD_LISTENER_REPORT:
    564 		icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
    565 		break;
    566 	case MLD_LISTENER_DONE:
    567 		icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
    568 		break;
    569 	}
    570 
    571 	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
    572 	    &im6o, NULL, NULL);
    573 }
    574 
    575 static struct mld_hdr *
    576 mld_allocbuf(struct mbuf **mh, int len, struct in6_multi *in6m,
    577     int type)
    578 {
    579 	struct mbuf *md;
    580 	struct mld_hdr *mldh;
    581 	struct ip6_hdr *ip6;
    582 
    583 	/*
    584 	 * Allocate mbufs to store ip6 header and MLD header.
    585 	 * We allocate 2 mbufs and make chain in advance because
    586 	 * it is more convenient when inserting the hop-by-hop option later.
    587 	 */
    588 	MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
    589 	if (*mh == NULL)
    590 		return NULL;
    591 	MGET(md, M_DONTWAIT, MT_DATA);
    592 	if (md == NULL) {
    593 		m_free(*mh);
    594 		*mh = NULL;
    595 		return NULL;
    596 	}
    597 	(*mh)->m_next = md;
    598 	md->m_next = NULL;
    599 
    600 	m_reset_rcvif((*mh));
    601 	(*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + len;
    602 	(*mh)->m_len = sizeof(struct ip6_hdr);
    603 	MH_ALIGN(*mh, sizeof(struct ip6_hdr));
    604 
    605 	/* fill in the ip6 header */
    606 	ip6 = mtod(*mh, struct ip6_hdr *);
    607 	memset(ip6, 0, sizeof(*ip6));
    608 	ip6->ip6_flow = 0;
    609 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
    610 	ip6->ip6_vfc |= IPV6_VERSION;
    611 	/* ip6_plen will be set later */
    612 	ip6->ip6_nxt = IPPROTO_ICMPV6;
    613 	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
    614 	/* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
    615 
    616 	/* fill in the MLD header as much as possible */
    617 	md->m_len = len;
    618 	mldh = mtod(md, struct mld_hdr *);
    619 	memset(mldh, 0, len);
    620 	mldh->mld_type = type;
    621 	return mldh;
    622 }
    623 
    624 /*
    625  * Add an address to the list of IP6 multicast addresses for a given interface.
    626  */
    627 struct	in6_multi *
    628 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp,
    629 	int *errorp, int timer)
    630 {
    631 	struct	in6_ifaddr *ia;
    632 	struct	sockaddr_in6 sin6;
    633 	struct	in6_multi *in6m;
    634 	int	s = splsoftnet();
    635 
    636 	*errorp = 0;
    637 
    638 	/*
    639 	 * See if address already in list.
    640 	 */
    641 	IN6_LOOKUP_MULTI(*maddr6, ifp, in6m);
    642 	if (in6m != NULL) {
    643 		/*
    644 		 * Found it; just increment the refrence count.
    645 		 */
    646 		in6m->in6m_refcount++;
    647 	} else {
    648 		/*
    649 		 * New address; allocate a new multicast record
    650 		 * and link it into the interface's multicast list.
    651 		 */
    652 		in6m = (struct in6_multi *)
    653 			malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
    654 		if (in6m == NULL) {
    655 			splx(s);
    656 			*errorp = ENOBUFS;
    657 			return (NULL);
    658 		}
    659 
    660 		in6m->in6m_addr = *maddr6;
    661 		in6m->in6m_ifp = ifp;
    662 		in6m->in6m_refcount = 1;
    663 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
    664 		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
    665 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
    666 
    667 		ia = in6_get_ia_from_ifp(ifp);
    668 		if (ia == NULL) {
    669 			callout_destroy(&in6m->in6m_timer_ch);
    670 			free(in6m, M_IPMADDR);
    671 			splx(s);
    672 			*errorp = EADDRNOTAVAIL; /* appropriate? */
    673 			return (NULL);
    674 		}
    675 		in6m->in6m_ia = ia;
    676 		ifaref(&ia->ia_ifa); /* gain a reference */
    677 		LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
    678 
    679 		/*
    680 		 * Ask the network driver to update its multicast reception
    681 		 * filter appropriately for the new address.
    682 		 */
    683 		sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
    684 		*errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
    685 		if (*errorp) {
    686 			callout_destroy(&in6m->in6m_timer_ch);
    687 			LIST_REMOVE(in6m, in6m_entry);
    688 			free(in6m, M_IPMADDR);
    689 			ifafree(&ia->ia_ifa);
    690 			splx(s);
    691 			return (NULL);
    692 		}
    693 
    694 		in6m->in6m_timer = timer;
    695 		if (in6m->in6m_timer > 0) {
    696 			in6m->in6m_state = MLD_REPORTPENDING;
    697 			mld_starttimer(in6m);
    698 
    699 			splx(s);
    700 			return (in6m);
    701 		}
    702 
    703 		/*
    704 		 * Let MLD6 know that we have joined a new IP6 multicast
    705 		 * group.
    706 		 */
    707 		mld_start_listening(in6m);
    708 	}
    709 	splx(s);
    710 	return (in6m);
    711 }
    712 
    713 /*
    714  * Delete a multicast address record.
    715  */
    716 void
    717 in6_delmulti(struct in6_multi *in6m)
    718 {
    719 	struct	sockaddr_in6 sin6;
    720 	struct	in6_ifaddr *ia;
    721 	int	s = splsoftnet();
    722 
    723 	mld_stoptimer(in6m);
    724 
    725 	if (--in6m->in6m_refcount == 0) {
    726 		/*
    727 		 * No remaining claims to this record; let MLD6 know
    728 		 * that we are leaving the multicast group.
    729 		 */
    730 		mld_stop_listening(in6m);
    731 
    732 		/*
    733 		 * Unlink from list.
    734 		 */
    735 		LIST_REMOVE(in6m, in6m_entry);
    736 		if (in6m->in6m_ia != NULL) {
    737 			ifafree(&in6m->in6m_ia->ia_ifa); /* release reference */
    738 			in6m->in6m_ia = NULL;
    739 		}
    740 
    741 		/*
    742 		 * Delete all references of this multicasting group from
    743 		 * the membership arrays
    744 		 */
    745 		IN6_ADDRLIST_READER_FOREACH(ia) {
    746 			struct in6_multi_mship *imm;
    747 			LIST_FOREACH(imm, &ia->ia6_memberships, i6mm_chain) {
    748 				if (imm->i6mm_maddr == in6m)
    749 					imm->i6mm_maddr = NULL;
    750 			}
    751 		}
    752 
    753 		/*
    754 		 * Notify the network driver to update its multicast
    755 		 * reception filter.
    756 		 */
    757 		sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
    758 		if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
    759 
    760 		/* Tell mld_timeo we're halting the timer */
    761 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
    762 		callout_halt(&in6m->in6m_timer_ch, softnet_lock);
    763 		callout_destroy(&in6m->in6m_timer_ch);
    764 
    765 		free(in6m, M_IPMADDR);
    766 	}
    767 	splx(s);
    768 }
    769 
    770 
    771 struct in6_multi_mship *
    772 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
    773 	int *errorp, int timer)
    774 {
    775 	struct in6_multi_mship *imm;
    776 
    777 	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
    778 	if (imm == NULL) {
    779 		*errorp = ENOBUFS;
    780 		return NULL;
    781 	}
    782 
    783 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
    784 	if (!imm->i6mm_maddr) {
    785 		/* *errorp is already set */
    786 		free(imm, M_IPMADDR);
    787 		return NULL;
    788 	}
    789 	return imm;
    790 }
    791 
    792 int
    793 in6_leavegroup(struct in6_multi_mship *imm)
    794 {
    795 
    796 	if (imm->i6mm_maddr) {
    797 		in6_delmulti(imm->i6mm_maddr);
    798 	}
    799 	free(imm, M_IPMADDR);
    800 	return 0;
    801 }
    802 
    803 
    804 /*
    805  * Multicast address kludge:
    806  * If there were any multicast addresses attached to this interface address,
    807  * either move them to another address on this interface, or save them until
    808  * such time as this interface is reconfigured for IPv6.
    809  */
    810 void
    811 in6_savemkludge(struct in6_ifaddr *oia)
    812 {
    813 	struct in6_ifaddr *ia;
    814 	struct in6_multi *in6m;
    815 
    816 	ia = in6_get_ia_from_ifp(oia->ia_ifp);
    817 	if (ia) {	/* there is another address */
    818 		KASSERT(ia != oia);
    819 		while ((in6m = LIST_FIRST(&oia->ia6_multiaddrs)) != NULL) {
    820 			LIST_REMOVE(in6m, in6m_entry);
    821 			ifaref(&ia->ia_ifa);
    822 			ifafree(&in6m->in6m_ia->ia_ifa);
    823 			in6m->in6m_ia = ia;
    824 			LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
    825 		}
    826 	} else {	/* last address on this if deleted, save */
    827 		struct multi6_kludge *mk;
    828 
    829 		LIST_FOREACH(mk, &in6_mk, mk_entry) {
    830 			if (mk->mk_ifp == oia->ia_ifp)
    831 				break;
    832 		}
    833 		if (mk == NULL) /* this should not happen! */
    834 			panic("in6_savemkludge: no kludge space");
    835 
    836 		while ((in6m = LIST_FIRST(&oia->ia6_multiaddrs)) != NULL) {
    837 			LIST_REMOVE(in6m, in6m_entry);
    838 			ifafree(&in6m->in6m_ia->ia_ifa); /* release reference */
    839 			in6m->in6m_ia = NULL;
    840 			LIST_INSERT_HEAD(&mk->mk_head, in6m, in6m_entry);
    841 		}
    842 	}
    843 }
    844 
    845 /*
    846  * Continuation of multicast address hack:
    847  * If there was a multicast group list previously saved for this interface,
    848  * then we re-attach it to the first address configured on the i/f.
    849  */
    850 void
    851 in6_restoremkludge(struct in6_ifaddr *ia, struct ifnet *ifp)
    852 {
    853 	struct multi6_kludge *mk;
    854 	struct in6_multi *in6m;
    855 
    856 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
    857 		if (mk->mk_ifp == ifp)
    858 			break;
    859 	}
    860 	if (mk == NULL)
    861 		return;
    862 	while ((in6m = LIST_FIRST(&mk->mk_head)) != NULL) {
    863 		LIST_REMOVE(in6m, in6m_entry);
    864 		in6m->in6m_ia = ia;
    865 		ifaref(&ia->ia_ifa);
    866 		LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
    867 	}
    868 }
    869 
    870 /*
    871  * Allocate space for the kludge at interface initialization time.
    872  * Formerly, we dynamically allocated the space in in6_savemkludge() with
    873  * malloc(M_WAITOK).  However, it was wrong since the function could be called
    874  * under an interrupt context (software timer on address lifetime expiration).
    875  * Also, we cannot just give up allocating the strucutre, since the group
    876  * membership structure is very complex and we need to keep it anyway.
    877  * Of course, this function MUST NOT be called under an interrupt context.
    878  * Specifically, it is expected to be called only from in6_ifattach(), though
    879  * it is a global function.
    880  */
    881 void
    882 in6_createmkludge(struct ifnet *ifp)
    883 {
    884 	struct multi6_kludge *mk;
    885 
    886 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
    887 		/* If we've already had one, do not allocate. */
    888 		if (mk->mk_ifp == ifp)
    889 			return;
    890 	}
    891 
    892 	mk = malloc(sizeof(*mk), M_IPMADDR, M_ZERO|M_WAITOK);
    893 
    894 	LIST_INIT(&mk->mk_head);
    895 	mk->mk_ifp = ifp;
    896 	LIST_INSERT_HEAD(&in6_mk, mk, mk_entry);
    897 }
    898 
    899 void
    900 in6_purgemkludge(struct ifnet *ifp)
    901 {
    902 	struct multi6_kludge *mk;
    903 	struct in6_multi *in6m, *next;
    904 
    905 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
    906 		if (mk->mk_ifp == ifp)
    907 			break;
    908 	}
    909 	if (mk == NULL)
    910 		return;
    911 
    912 	/* leave from all multicast groups joined */
    913 	for (in6m = LIST_FIRST(&mk->mk_head); in6m != NULL; in6m = next) {
    914 		next = LIST_NEXT(in6m, in6m_entry);
    915 		in6_delmulti(in6m);
    916 	}
    917 	LIST_REMOVE(mk, mk_entry);
    918 	free(mk, M_IPMADDR);
    919 }
    920 
    921 static int
    922 in6_mkludge_sysctl(SYSCTLFN_ARGS)
    923 {
    924 	struct multi6_kludge *mk;
    925 	struct in6_multi *in6m;
    926 	int error;
    927 	uint32_t tmp;
    928 	size_t written;
    929 
    930 	if (namelen != 1)
    931 		return EINVAL;
    932 
    933 	if (oldp == NULL) {
    934 		*oldlenp = 0;
    935 		LIST_FOREACH(mk, &in6_mk, mk_entry) {
    936 			if (mk->mk_ifp->if_index == name[0])
    937 				continue;
    938 			LIST_FOREACH(in6m, &mk->mk_head, in6m_entry) {
    939 				*oldlenp += sizeof(struct in6_addr) +
    940 				    sizeof(uint32_t);
    941 			}
    942 		}
    943 		return 0;
    944 	}
    945 
    946 	error = 0;
    947 	written = 0;
    948 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
    949 		if (mk->mk_ifp->if_index == name[0])
    950 			continue;
    951 		LIST_FOREACH(in6m, &mk->mk_head, in6m_entry) {
    952 			if (written + sizeof(struct in6_addr) +
    953 			    sizeof(uint32_t) > *oldlenp)
    954 				goto done;
    955 			error = sysctl_copyout(l, &in6m->in6m_addr,
    956 			    oldp, sizeof(struct in6_addr));
    957 			if (error)
    958 				goto done;
    959 			oldp = (char *)oldp + sizeof(struct in6_addr);
    960 			written += sizeof(struct in6_addr);
    961 			tmp = in6m->in6m_refcount;
    962 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
    963 			if (error)
    964 				goto done;
    965 			oldp = (char *)oldp + sizeof(tmp);
    966 			written += sizeof(tmp);
    967 		}
    968 	}
    969 
    970 done:
    971 	*oldlenp = written;
    972 	return error;
    973 }
    974 
    975 static int
    976 in6_multicast_sysctl(SYSCTLFN_ARGS)
    977 {
    978 	struct ifnet *ifp;
    979 	struct ifaddr *ifa;
    980 	struct in6_ifaddr *ifa6;
    981 	struct in6_multi *in6m;
    982 	uint32_t tmp;
    983 	int error;
    984 	size_t written;
    985 	struct psref psref;
    986 	int bound;
    987 
    988 	if (namelen != 1)
    989 		return EINVAL;
    990 
    991 	bound = curlwp_bind();
    992 	ifp = if_get_byindex(name[0], &psref);
    993 	if (ifp == NULL) {
    994 		curlwp_bindx(bound);
    995 		return ENODEV;
    996 	}
    997 
    998 	if (oldp == NULL) {
    999 		*oldlenp = 0;
   1000 		IFADDR_READER_FOREACH(ifa, ifp) {
   1001 			if (ifa->ifa_addr->sa_family != AF_INET6)
   1002 				continue;
   1003 			ifa6 = (struct in6_ifaddr *)ifa;
   1004 			LIST_FOREACH(in6m, &ifa6->ia6_multiaddrs, in6m_entry) {
   1005 				*oldlenp += 2 * sizeof(struct in6_addr) +
   1006 				    sizeof(uint32_t);
   1007 			}
   1008 		}
   1009 		if_put(ifp, &psref);
   1010 		curlwp_bindx(bound);
   1011 		return 0;
   1012 	}
   1013 
   1014 	error = 0;
   1015 	written = 0;
   1016 	IFADDR_READER_FOREACH(ifa, ifp) {
   1017 		if (ifa->ifa_addr->sa_family != AF_INET6)
   1018 			continue;
   1019 		ifa6 = (struct in6_ifaddr *)ifa;
   1020 		LIST_FOREACH(in6m, &ifa6->ia6_multiaddrs, in6m_entry) {
   1021 			if (written + 2 * sizeof(struct in6_addr) +
   1022 			    sizeof(uint32_t) > *oldlenp)
   1023 				goto done;
   1024 			error = sysctl_copyout(l, &ifa6->ia_addr.sin6_addr,
   1025 			    oldp, sizeof(struct in6_addr));
   1026 			if (error)
   1027 				goto done;
   1028 			oldp = (char *)oldp + sizeof(struct in6_addr);
   1029 			written += sizeof(struct in6_addr);
   1030 			error = sysctl_copyout(l, &in6m->in6m_addr,
   1031 			    oldp, sizeof(struct in6_addr));
   1032 			if (error)
   1033 				goto done;
   1034 			oldp = (char *)oldp + sizeof(struct in6_addr);
   1035 			written += sizeof(struct in6_addr);
   1036 			tmp = in6m->in6m_refcount;
   1037 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
   1038 			if (error)
   1039 				goto done;
   1040 			oldp = (char *)oldp + sizeof(tmp);
   1041 			written += sizeof(tmp);
   1042 		}
   1043 	}
   1044 done:
   1045 	if_put(ifp, &psref);
   1046 	curlwp_bindx(bound);
   1047 	*oldlenp = written;
   1048 	return error;
   1049 }
   1050 
   1051 SYSCTL_SETUP(sysctl_in6_mklude_setup, "sysctl net.inet6.multicast_kludge subtree setup")
   1052 {
   1053 
   1054 	sysctl_createv(clog, 0, NULL, NULL,
   1055 		       CTLFLAG_PERMANENT,
   1056 		       CTLTYPE_NODE, "inet6", NULL,
   1057 		       NULL, 0, NULL, 0,
   1058 		       CTL_NET, PF_INET6, CTL_EOL);
   1059 
   1060 	sysctl_createv(clog, 0, NULL, NULL,
   1061 		       CTLFLAG_PERMANENT,
   1062 		       CTLTYPE_NODE, "multicast",
   1063 		       SYSCTL_DESCR("Multicast information"),
   1064 		       in6_multicast_sysctl, 0, NULL, 0,
   1065 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
   1066 
   1067 	sysctl_createv(clog, 0, NULL, NULL,
   1068 		       CTLFLAG_PERMANENT,
   1069 		       CTLTYPE_NODE, "multicast_kludge",
   1070 		       SYSCTL_DESCR("multicast kludge information"),
   1071 		       in6_mkludge_sysctl, 0, NULL, 0,
   1072 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
   1073 }
   1074