Home | History | Annotate | Line # | Download | only in netinet6
mld6.c revision 1.89.2.2
      1 /*	$NetBSD: mld6.c,v 1.89.2.2 2018/06/07 17:48:31 martin Exp $	*/
      2 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
      3 
      4 /*
      5  * Copyright (C) 1998 WIDE Project.
      6  * All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. Neither the name of the project nor the names of its contributors
     17  *    may be used to endorse or promote products derived from this software
     18  *    without specific prior written permission.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     30  * SUCH DAMAGE.
     31  */
     32 
     33 /*
     34  * Copyright (c) 1992, 1993
     35  *	The Regents of the University of California.  All rights reserved.
     36  *
     37  * This code is derived from software contributed to Berkeley by
     38  * Stephen Deering of Stanford University.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  * 3. Neither the name of the University nor the names of its contributors
     49  *    may be used to endorse or promote products derived from this software
     50  *    without specific prior written permission.
     51  *
     52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     62  * SUCH DAMAGE.
     63  *
     64  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
     65  */
     66 
     67 /*
     68  * Copyright (c) 1988 Stephen Deering.
     69  *
     70  * This code is derived from software contributed to Berkeley by
     71  * Stephen Deering of Stanford University.
     72  *
     73  * Redistribution and use in source and binary forms, with or without
     74  * modification, are permitted provided that the following conditions
     75  * are met:
     76  * 1. Redistributions of source code must retain the above copyright
     77  *    notice, this list of conditions and the following disclaimer.
     78  * 2. Redistributions in binary form must reproduce the above copyright
     79  *    notice, this list of conditions and the following disclaimer in the
     80  *    documentation and/or other materials provided with the distribution.
     81  * 3. All advertising materials mentioning features or use of this software
     82  *    must display the following acknowledgement:
     83  *	This product includes software developed by the University of
     84  *	California, Berkeley and its contributors.
     85  * 4. Neither the name of the University nor the names of its contributors
     86  *    may be used to endorse or promote products derived from this software
     87  *    without specific prior written permission.
     88  *
     89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     99  * SUCH DAMAGE.
    100  *
    101  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
    102  */
    103 
    104 #include <sys/cdefs.h>
    105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.89.2.2 2018/06/07 17:48:31 martin Exp $");
    106 
    107 #ifdef _KERNEL_OPT
    108 #include "opt_inet.h"
    109 #include "opt_net_mpsafe.h"
    110 #endif
    111 
    112 #include <sys/param.h>
    113 #include <sys/systm.h>
    114 #include <sys/mbuf.h>
    115 #include <sys/socket.h>
    116 #include <sys/socketvar.h>
    117 #include <sys/syslog.h>
    118 #include <sys/sysctl.h>
    119 #include <sys/kernel.h>
    120 #include <sys/callout.h>
    121 #include <sys/cprng.h>
    122 #include <sys/rwlock.h>
    123 
    124 #include <net/if.h>
    125 
    126 #include <netinet/in.h>
    127 #include <netinet/in_var.h>
    128 #include <netinet6/in6_var.h>
    129 #include <netinet/ip6.h>
    130 #include <netinet6/ip6_var.h>
    131 #include <netinet6/scope6_var.h>
    132 #include <netinet/icmp6.h>
    133 #include <netinet6/icmp6_private.h>
    134 #include <netinet6/mld6_var.h>
    135 
    136 #include <net/net_osdep.h>
    137 
    138 
    139 static krwlock_t	in6_multilock __cacheline_aligned;
    140 
    141 /*
    142  * Protocol constants
    143  */
    144 
    145 /*
    146  * time between repetitions of a node's initial report of interest in a
    147  * multicast address(in seconds)
    148  */
    149 #define MLD_UNSOLICITED_REPORT_INTERVAL	10
    150 
    151 static struct ip6_pktopts ip6_opts;
    152 
    153 static void mld_start_listening(struct in6_multi *);
    154 static void mld_stop_listening(struct in6_multi *);
    155 
    156 static struct mld_hdr * mld_allocbuf(struct mbuf **, int, struct in6_multi *,
    157 	int);
    158 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
    159 static void mld_starttimer(struct in6_multi *);
    160 static void mld_stoptimer(struct in6_multi *);
    161 static u_long mld_timerresid(struct in6_multi *);
    162 
    163 static void in6m_ref(struct in6_multi *);
    164 static void in6m_unref(struct in6_multi *);
    165 static void in6m_destroy(struct in6_multi *);
    166 
    167 void
    168 mld_init(void)
    169 {
    170 	static u_int8_t hbh_buf[8];
    171 	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
    172 	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
    173 
    174 	/* ip6h_nxt will be fill in later */
    175 	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
    176 
    177 	/* XXX: grotty hard coding... */
    178 	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
    179 	hbh_buf[3] = 0;
    180 	hbh_buf[4] = IP6OPT_RTALERT;
    181 	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
    182 	memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
    183 
    184 	ip6_opts.ip6po_hbh = hbh;
    185 	/* We will specify the hoplimit by a multicast option. */
    186 	ip6_opts.ip6po_hlim = -1;
    187 	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
    188 
    189 	rw_init(&in6_multilock);
    190 }
    191 
    192 static void
    193 mld_starttimer(struct in6_multi *in6m)
    194 {
    195 	struct timeval now;
    196 
    197 	KASSERT(rw_write_held(&in6_multilock));
    198 	KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
    199 
    200 	microtime(&now);
    201 	in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
    202 	in6m->in6m_timer_expire.tv_usec = now.tv_usec +
    203 	    (in6m->in6m_timer % hz) * (1000000 / hz);
    204 	if (in6m->in6m_timer_expire.tv_usec > 1000000) {
    205 		in6m->in6m_timer_expire.tv_sec++;
    206 		in6m->in6m_timer_expire.tv_usec -= 1000000;
    207 	}
    208 
    209 	/* start or restart the timer */
    210 	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
    211 }
    212 
    213 /*
    214  * mld_stoptimer releases in6_multilock when calling callout_halt.
    215  * The caller must ensure in6m won't be freed while releasing the lock.
    216  */
    217 static void
    218 mld_stoptimer(struct in6_multi *in6m)
    219 {
    220 
    221 	KASSERT(rw_write_held(&in6_multilock));
    222 
    223 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
    224 		return;
    225 
    226 	rw_exit(&in6_multilock);
    227 
    228 	callout_halt(&in6m->in6m_timer_ch, NULL);
    229 
    230 	rw_enter(&in6_multilock, RW_WRITER);
    231 
    232 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
    233 }
    234 
    235 static void
    236 mld_timeo(void *arg)
    237 {
    238 	struct in6_multi *in6m = arg;
    239 
    240 	KASSERT(in6m->in6m_refcount > 0);
    241 
    242 	KERNEL_LOCK_UNLESS_NET_MPSAFE();
    243 	rw_enter(&in6_multilock, RW_WRITER);
    244 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
    245 		goto out;
    246 
    247 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
    248 
    249 	switch (in6m->in6m_state) {
    250 	case MLD_REPORTPENDING:
    251 		mld_start_listening(in6m);
    252 		break;
    253 	default:
    254 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
    255 		break;
    256 	}
    257 
    258 out:
    259 	rw_exit(&in6_multilock);
    260 	KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
    261 }
    262 
    263 static u_long
    264 mld_timerresid(struct in6_multi *in6m)
    265 {
    266 	struct timeval now, diff;
    267 
    268 	microtime(&now);
    269 
    270 	if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
    271 	    (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
    272 	    now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
    273 		return (0);
    274 	}
    275 	diff = in6m->in6m_timer_expire;
    276 	diff.tv_sec -= now.tv_sec;
    277 	diff.tv_usec -= now.tv_usec;
    278 	if (diff.tv_usec < 0) {
    279 		diff.tv_sec--;
    280 		diff.tv_usec += 1000000;
    281 	}
    282 
    283 	/* return the remaining time in milliseconds */
    284 	return diff.tv_sec * 1000 + diff.tv_usec / 1000;
    285 }
    286 
    287 static void
    288 mld_start_listening(struct in6_multi *in6m)
    289 {
    290 	struct in6_addr all_in6;
    291 
    292 	KASSERT(rw_write_held(&in6_multilock));
    293 
    294 	/*
    295 	 * RFC2710 page 10:
    296 	 * The node never sends a Report or Done for the link-scope all-nodes
    297 	 * address.
    298 	 * MLD messages are never sent for multicast addresses whose scope is 0
    299 	 * (reserved) or 1 (node-local).
    300 	 */
    301 	all_in6 = in6addr_linklocal_allnodes;
    302 	if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
    303 		/* XXX: this should not happen! */
    304 		in6m->in6m_timer = 0;
    305 		in6m->in6m_state = MLD_OTHERLISTENER;
    306 	}
    307 	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
    308 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
    309 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
    310 		in6m->in6m_state = MLD_OTHERLISTENER;
    311 	} else {
    312 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
    313 		in6m->in6m_timer = cprng_fast32() %
    314 		    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
    315 		in6m->in6m_state = MLD_IREPORTEDLAST;
    316 
    317 		mld_starttimer(in6m);
    318 	}
    319 }
    320 
    321 static void
    322 mld_stop_listening(struct in6_multi *in6m)
    323 {
    324 	struct in6_addr allnode, allrouter;
    325 
    326 	KASSERT(rw_lock_held(&in6_multilock));
    327 
    328 	allnode = in6addr_linklocal_allnodes;
    329 	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
    330 		/* XXX: this should not happen! */
    331 		return;
    332 	}
    333 	allrouter = in6addr_linklocal_allrouters;
    334 	if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
    335 		/* XXX impossible */
    336 		return;
    337 	}
    338 
    339 	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
    340 	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
    341 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
    342 	    IPV6_ADDR_SCOPE_INTFACELOCAL) {
    343 		mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
    344 	}
    345 }
    346 
    347 void
    348 mld_input(struct mbuf *m, int off)
    349 {
    350 	struct ip6_hdr *ip6;
    351 	struct mld_hdr *mldh;
    352 	struct ifnet *ifp;
    353 	struct in6_multi *in6m = NULL;
    354 	struct in6_addr mld_addr, all_in6;
    355 	u_long timer = 0;	/* timer value in the MLD query header */
    356 	struct psref psref;
    357 
    358 	ifp = m_get_rcvif_psref(m, &psref);
    359 	if (__predict_false(ifp == NULL))
    360 		goto out;
    361 	IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
    362 	if (mldh == NULL) {
    363 		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
    364 		goto out_nodrop;
    365 	}
    366 
    367 	/* source address validation */
    368 	ip6 = mtod(m, struct ip6_hdr *);/* in case mpullup */
    369 	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
    370 		/*
    371 		 * RFC3590 allows the IPv6 unspecified address as the source
    372 		 * address of MLD report and done messages.  However, as this
    373 		 * same document says, this special rule is for snooping
    374 		 * switches and the RFC requires routers to discard MLD packets
    375 		 * with the unspecified source address.  The RFC only talks
    376 		 * about hosts receiving an MLD query or report in Security
    377 		 * Considerations, but this is probably the correct intention.
    378 		 * RFC3590 does not talk about other cases than link-local and
    379 		 * the unspecified source addresses, but we believe the same
    380 		 * rule should be applied.
    381 		 * As a result, we only allow link-local addresses as the
    382 		 * source address; otherwise, simply discard the packet.
    383 		 */
    384 #if 0
    385 		/*
    386 		 * XXX: do not log in an input path to avoid log flooding,
    387 		 * though RFC3590 says "SHOULD log" if the source of a query
    388 		 * is the unspecified address.
    389 		 */
    390 		char ip6bufs[INET6_ADDRSTRLEN];
    391 		char ip6bufm[INET6_ADDRSTRLEN];
    392 		log(LOG_INFO,
    393 		    "mld_input: src %s is not link-local (grp=%s)\n",
    394 		    IN6_PRINT(ip6bufs,&ip6->ip6_src),
    395 		    IN6_PRINT(ip6bufm, &mldh->mld_addr));
    396 #endif
    397 		goto out;
    398 	}
    399 
    400 	/*
    401 	 * make a copy for local work (in6_setscope() may modify the 1st arg)
    402 	 */
    403 	mld_addr = mldh->mld_addr;
    404 	if (in6_setscope(&mld_addr, ifp, NULL)) {
    405 		/* XXX: this should not happen! */
    406 		goto out;
    407 	}
    408 
    409 	/*
    410 	 * In the MLD specification, there are 3 states and a flag.
    411 	 *
    412 	 * In Non-Listener state, we simply don't have a membership record.
    413 	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
    414 	 * In Idle Listener state, our timer is not running
    415 	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
    416 	 *
    417 	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
    418 	 * we have heard a report from another member, or MLD_IREPORTEDLAST
    419 	 * if we sent the last report.
    420 	 */
    421 	switch (mldh->mld_type) {
    422 	case MLD_LISTENER_QUERY: {
    423 		struct in6_multi *next;
    424 
    425 		if (ifp->if_flags & IFF_LOOPBACK)
    426 			break;
    427 
    428 		if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
    429 		    !IN6_IS_ADDR_MULTICAST(&mld_addr))
    430 			break;	/* print error or log stat? */
    431 
    432 		all_in6 = in6addr_linklocal_allnodes;
    433 		if (in6_setscope(&all_in6, ifp, NULL)) {
    434 			/* XXX: this should not happen! */
    435 			break;
    436 		}
    437 
    438 		/*
    439 		 * - Start the timers in all of our membership records
    440 		 *   that the query applies to for the interface on
    441 		 *   which the query arrived excl. those that belong
    442 		 *   to the "all-nodes" group (ff02::1).
    443 		 * - Restart any timer that is already running but has
    444 		 *   a value longer than the requested timeout.
    445 		 * - Use the value specified in the query message as
    446 		 *   the maximum timeout.
    447 		 */
    448 		timer = ntohs(mldh->mld_maxdelay);
    449 
    450 		rw_enter(&in6_multilock, RW_WRITER);
    451 		/*
    452 		 * mld_stoptimer and mld_sendpkt release in6_multilock
    453 		 * temporarily, so we have to prevent in6m from being freed
    454 		 * while releasing the lock by having an extra reference to it.
    455 		 *
    456 		 * Also in6_purge_multi might remove items from the list of the
    457 		 * ifp while releasing the lock. Fortunately in6_purge_multi is
    458 		 * never executed as long as we have a psref of the ifp.
    459 		 */
    460 		LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
    461 			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
    462 			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
    463 			    IPV6_ADDR_SCOPE_LINKLOCAL)
    464 				continue;
    465 
    466 			if (in6m->in6m_state == MLD_REPORTPENDING)
    467 				continue; /* we are not yet ready */
    468 
    469 			if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
    470 			    !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
    471 				continue;
    472 
    473 			if (timer == 0) {
    474 				in6m_ref(in6m);
    475 
    476 				/* send a report immediately */
    477 				mld_stoptimer(in6m);
    478 				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
    479 				in6m->in6m_state = MLD_IREPORTEDLAST;
    480 
    481 				in6m_unref(in6m); /* May free in6m */
    482 			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
    483 			    mld_timerresid(in6m) > timer) {
    484 				in6m->in6m_timer =
    485 				   1 + (cprng_fast32() % timer) * hz / 1000;
    486 				mld_starttimer(in6m);
    487 			}
    488 		}
    489 		rw_exit(&in6_multilock);
    490 		break;
    491 	    }
    492 
    493 	case MLD_LISTENER_REPORT:
    494 		/*
    495 		 * For fast leave to work, we have to know that we are the
    496 		 * last person to send a report for this group.  Reports
    497 		 * can potentially get looped back if we are a multicast
    498 		 * router, so discard reports sourced by me.
    499 		 * Note that it is impossible to check IFF_LOOPBACK flag of
    500 		 * ifp for this purpose, since ip6_mloopback pass the physical
    501 		 * interface to looutput.
    502 		 */
    503 		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
    504 			break;
    505 
    506 		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
    507 			break;
    508 
    509 		/*
    510 		 * If we belong to the group being reported, stop
    511 		 * our timer for that group.
    512 		 */
    513 		rw_enter(&in6_multilock, RW_WRITER);
    514 		in6m = in6_lookup_multi(&mld_addr, ifp);
    515 		if (in6m) {
    516 			in6m_ref(in6m);
    517 			mld_stoptimer(in6m); /* transit to idle state */
    518 			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
    519 			in6m_unref(in6m);
    520 			in6m = NULL; /* in6m might be freed */
    521 		}
    522 		rw_exit(&in6_multilock);
    523 		break;
    524 	default:		/* this is impossible */
    525 #if 0
    526 		/*
    527 		 * this case should be impossible because of filtering in
    528 		 * icmp6_input().  But we explicitly disabled this part
    529 		 * just in case.
    530 		 */
    531 		log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
    532 #endif
    533 		break;
    534 	}
    535 
    536 out:
    537 	m_freem(m);
    538 out_nodrop:
    539 	m_put_rcvif_psref(ifp, &psref);
    540 }
    541 
    542 /*
    543  * XXX mld_sendpkt must be called with in6_multilock held and
    544  * will release in6_multilock before calling ip6_output and
    545  * returning to avoid locking against myself in ip6_output.
    546  */
    547 static void
    548 mld_sendpkt(struct in6_multi *in6m, int type,
    549 	const struct in6_addr *dst)
    550 {
    551 	struct mbuf *mh;
    552 	struct mld_hdr *mldh;
    553 	struct ip6_hdr *ip6 = NULL;
    554 	struct ip6_moptions im6o;
    555 	struct in6_ifaddr *ia = NULL;
    556 	struct ifnet *ifp = in6m->in6m_ifp;
    557 	int ignflags;
    558 	struct psref psref;
    559 	int bound;
    560 
    561 	KASSERT(rw_write_held(&in6_multilock));
    562 
    563 	/*
    564 	 * At first, find a link local address on the outgoing interface
    565 	 * to use as the source address of the MLD packet.
    566 	 * We do not reject tentative addresses for MLD report to deal with
    567 	 * the case where we first join a link-local address.
    568 	 */
    569 	ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
    570 	bound = curlwp_bind();
    571 	ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
    572 	if (ia == NULL) {
    573 		curlwp_bindx(bound);
    574 		return;
    575 	}
    576 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
    577 		ia6_release(ia, &psref);
    578 		ia = NULL;
    579 	}
    580 
    581 	/* Allocate two mbufs to store IPv6 header and MLD header */
    582 	mldh = mld_allocbuf(&mh, sizeof(struct mld_hdr), in6m, type);
    583 	if (mldh == NULL) {
    584 		ia6_release(ia, &psref);
    585 		curlwp_bindx(bound);
    586 		return;
    587 	}
    588 
    589 	/* fill src/dst here */
    590  	ip6 = mtod(mh, struct ip6_hdr *);
    591  	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
    592  	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
    593 	ia6_release(ia, &psref);
    594 	curlwp_bindx(bound);
    595 
    596 	mldh->mld_addr = in6m->in6m_addr;
    597 	in6_clearscope(&mldh->mld_addr); /* XXX */
    598 	mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
    599 	    sizeof(struct mld_hdr));
    600 
    601 	/* construct multicast option */
    602 	memset(&im6o, 0, sizeof(im6o));
    603 	im6o.im6o_multicast_if_index = if_get_index(ifp);
    604 	im6o.im6o_multicast_hlim = 1;
    605 
    606 	/*
    607 	 * Request loopback of the report if we are acting as a multicast
    608 	 * router, so that the process-level routing daemon can hear it.
    609 	 */
    610 	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
    611 
    612 	/* increment output statictics */
    613 	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
    614 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
    615 	switch (type) {
    616 	case MLD_LISTENER_QUERY:
    617 		icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
    618 		break;
    619 	case MLD_LISTENER_REPORT:
    620 		icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
    621 		break;
    622 	case MLD_LISTENER_DONE:
    623 		icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
    624 		break;
    625 	}
    626 
    627 	/* XXX we cannot call ip6_output with holding in6_multilock */
    628 	rw_exit(&in6_multilock);
    629 
    630 	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
    631 	    &im6o, NULL, NULL);
    632 
    633 	rw_enter(&in6_multilock, RW_WRITER);
    634 }
    635 
    636 static struct mld_hdr *
    637 mld_allocbuf(struct mbuf **mh, int len, struct in6_multi *in6m,
    638     int type)
    639 {
    640 	struct mbuf *md;
    641 	struct mld_hdr *mldh;
    642 	struct ip6_hdr *ip6;
    643 
    644 	/*
    645 	 * Allocate mbufs to store ip6 header and MLD header.
    646 	 * We allocate 2 mbufs and make chain in advance because
    647 	 * it is more convenient when inserting the hop-by-hop option later.
    648 	 */
    649 	MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
    650 	if (*mh == NULL)
    651 		return NULL;
    652 	MGET(md, M_DONTWAIT, MT_DATA);
    653 	if (md == NULL) {
    654 		m_free(*mh);
    655 		*mh = NULL;
    656 		return NULL;
    657 	}
    658 	(*mh)->m_next = md;
    659 	md->m_next = NULL;
    660 
    661 	m_reset_rcvif((*mh));
    662 	(*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + len;
    663 	(*mh)->m_len = sizeof(struct ip6_hdr);
    664 	MH_ALIGN(*mh, sizeof(struct ip6_hdr));
    665 
    666 	/* fill in the ip6 header */
    667 	ip6 = mtod(*mh, struct ip6_hdr *);
    668 	memset(ip6, 0, sizeof(*ip6));
    669 	ip6->ip6_flow = 0;
    670 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
    671 	ip6->ip6_vfc |= IPV6_VERSION;
    672 	/* ip6_plen will be set later */
    673 	ip6->ip6_nxt = IPPROTO_ICMPV6;
    674 	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
    675 	/* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
    676 
    677 	/* fill in the MLD header as much as possible */
    678 	md->m_len = len;
    679 	mldh = mtod(md, struct mld_hdr *);
    680 	memset(mldh, 0, len);
    681 	mldh->mld_type = type;
    682 	return mldh;
    683 }
    684 
    685 static void
    686 in6m_ref(struct in6_multi *in6m)
    687 {
    688 
    689 	KASSERT(rw_write_held(&in6_multilock));
    690 	in6m->in6m_refcount++;
    691 }
    692 
    693 static void
    694 in6m_unref(struct in6_multi *in6m)
    695 {
    696 
    697 	KASSERT(rw_write_held(&in6_multilock));
    698 	if (--in6m->in6m_refcount == 0)
    699 		in6m_destroy(in6m);
    700 }
    701 
    702 /*
    703  * Add an address to the list of IP6 multicast addresses for a given interface.
    704  */
    705 struct	in6_multi *
    706 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp,
    707 	int *errorp, int timer)
    708 {
    709 	struct	sockaddr_in6 sin6;
    710 	struct	in6_multi *in6m;
    711 
    712 	*errorp = 0;
    713 
    714 	rw_enter(&in6_multilock, RW_WRITER);
    715 	/*
    716 	 * See if address already in list.
    717 	 */
    718 	in6m = in6_lookup_multi(maddr6, ifp);
    719 	if (in6m != NULL) {
    720 		/*
    721 		 * Found it; just increment the refrence count.
    722 		 */
    723 		in6m->in6m_refcount++;
    724 	} else {
    725 		/*
    726 		 * New address; allocate a new multicast record
    727 		 * and link it into the interface's multicast list.
    728 		 */
    729 		in6m = (struct in6_multi *)
    730 			malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
    731 		if (in6m == NULL) {
    732 			*errorp = ENOBUFS;
    733 			goto out;
    734 		}
    735 
    736 		in6m->in6m_addr = *maddr6;
    737 		in6m->in6m_ifp = ifp;
    738 		in6m->in6m_refcount = 1;
    739 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
    740 		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
    741 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
    742 
    743 		LIST_INSERT_HEAD(&ifp->if_multiaddrs, in6m, in6m_entry);
    744 
    745 		/*
    746 		 * Ask the network driver to update its multicast reception
    747 		 * filter appropriately for the new address.
    748 		 */
    749 		sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
    750 		*errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
    751 		if (*errorp) {
    752 			callout_destroy(&in6m->in6m_timer_ch);
    753 			LIST_REMOVE(in6m, in6m_entry);
    754 			free(in6m, M_IPMADDR);
    755 			in6m = NULL;
    756 			goto out;
    757 		}
    758 
    759 		in6m->in6m_timer = timer;
    760 		if (in6m->in6m_timer > 0) {
    761 			in6m->in6m_state = MLD_REPORTPENDING;
    762 			mld_starttimer(in6m);
    763 			goto out;
    764 		}
    765 
    766 		/*
    767 		 * Let MLD6 know that we have joined a new IP6 multicast
    768 		 * group.
    769 		 */
    770 		mld_start_listening(in6m);
    771 	}
    772 out:
    773 	rw_exit(&in6_multilock);
    774 	return in6m;
    775 }
    776 
    777 static void
    778 in6m_destroy(struct in6_multi *in6m)
    779 {
    780 	struct sockaddr_in6 sin6;
    781 
    782 	KASSERT(rw_write_held(&in6_multilock));
    783 	KASSERT(in6m->in6m_refcount == 0);
    784 
    785 	/*
    786 	 * Unlink from list if it's listed.  This must be done before
    787 	 * mld_stop_listening because it releases in6_multilock and that allows
    788 	 * someone to look up the removing in6m from the list and add a
    789 	 * reference to the entry unexpectedly.
    790 	 */
    791 	if (in6_lookup_multi(&in6m->in6m_addr, in6m->in6m_ifp) != NULL)
    792 		LIST_REMOVE(in6m, in6m_entry);
    793 
    794 	/*
    795 	 * No remaining claims to this record; let MLD6 know
    796 	 * that we are leaving the multicast group.
    797 	 */
    798 	mld_stop_listening(in6m);
    799 
    800 	/*
    801 	 * Delete all references of this multicasting group from
    802 	 * the membership arrays
    803 	 */
    804 	in6_purge_mcast_references(in6m);
    805 
    806 	/*
    807 	 * Notify the network driver to update its multicast
    808 	 * reception filter.
    809 	 */
    810 	sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
    811 	if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
    812 
    813 	/* Tell mld_timeo we're halting the timer */
    814 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
    815 
    816 	rw_exit(&in6_multilock);
    817 	callout_halt(&in6m->in6m_timer_ch, NULL);
    818 	callout_destroy(&in6m->in6m_timer_ch);
    819 
    820 	free(in6m, M_IPMADDR);
    821 	rw_enter(&in6_multilock, RW_WRITER);
    822 }
    823 
    824 /*
    825  * Delete a multicast address record.
    826  */
    827 void
    828 in6_delmulti_locked(struct in6_multi *in6m)
    829 {
    830 
    831 	KASSERT(rw_write_held(&in6_multilock));
    832 	KASSERT(in6m->in6m_refcount > 0);
    833 
    834 	/*
    835 	 * The caller should have a reference to in6m. So we don't need to care
    836 	 * of releasing the lock in mld_stoptimer.
    837 	 */
    838 	mld_stoptimer(in6m);
    839 	if (--in6m->in6m_refcount == 0)
    840 		in6m_destroy(in6m);
    841 }
    842 
    843 void
    844 in6_delmulti(struct in6_multi *in6m)
    845 {
    846 
    847 	rw_enter(&in6_multilock, RW_WRITER);
    848 	in6_delmulti_locked(in6m);
    849 	rw_exit(&in6_multilock);
    850 }
    851 
    852 /*
    853  * Look up the in6_multi record for a given IP6 multicast address
    854  * on a given interface. If no matching record is found, "in6m"
    855  * returns NULL.
    856  */
    857 struct in6_multi *
    858 in6_lookup_multi(const struct in6_addr *addr, const struct ifnet *ifp)
    859 {
    860 	struct in6_multi *in6m;
    861 
    862 	KASSERT(rw_lock_held(&in6_multilock));
    863 
    864 	LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
    865 		if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, addr))
    866 			break;
    867 	}
    868 	return in6m;
    869 }
    870 
    871 void
    872 in6_lookup_and_delete_multi(const struct in6_addr *addr,
    873     const struct ifnet *ifp)
    874 {
    875 	struct in6_multi *in6m;
    876 
    877 	rw_enter(&in6_multilock, RW_WRITER);
    878 	in6m = in6_lookup_multi(addr, ifp);
    879 	if (in6m != NULL)
    880 		in6_delmulti_locked(in6m);
    881 	rw_exit(&in6_multilock);
    882 }
    883 
    884 bool
    885 in6_multi_group(const struct in6_addr *addr, const struct ifnet *ifp)
    886 {
    887 	bool ingroup;
    888 
    889 	rw_enter(&in6_multilock, RW_READER);
    890 	ingroup = in6_lookup_multi(addr, ifp) != NULL;
    891 	rw_exit(&in6_multilock);
    892 
    893 	return ingroup;
    894 }
    895 
    896 /*
    897  * Purge in6_multi records associated to the interface.
    898  */
    899 void
    900 in6_purge_multi(struct ifnet *ifp)
    901 {
    902 	struct in6_multi *in6m, *next;
    903 
    904 	rw_enter(&in6_multilock, RW_WRITER);
    905 	LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
    906 		LIST_REMOVE(in6m, in6m_entry);
    907 		/*
    908 		 * Normally multicast addresses are already purged at this
    909 		 * point. Remaining references aren't accessible via ifp,
    910 		 * so what we can do here is to prevent ifp from being
    911 		 * accessed via in6m by removing it from the list of ifp.
    912 		 */
    913 		mld_stoptimer(in6m);
    914 	}
    915 	rw_exit(&in6_multilock);
    916 }
    917 
    918 void
    919 in6_multi_lock(int op)
    920 {
    921 
    922 	rw_enter(&in6_multilock, op);
    923 }
    924 
    925 void
    926 in6_multi_unlock(void)
    927 {
    928 
    929 	rw_exit(&in6_multilock);
    930 }
    931 
    932 bool
    933 in6_multi_locked(int op)
    934 {
    935 
    936 	switch (op) {
    937 	case RW_READER:
    938 		return rw_read_held(&in6_multilock);
    939 	case RW_WRITER:
    940 		return rw_write_held(&in6_multilock);
    941 	default:
    942 		return rw_lock_held(&in6_multilock);
    943 	}
    944 }
    945 
    946 struct in6_multi_mship *
    947 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
    948 	int *errorp, int timer)
    949 {
    950 	struct in6_multi_mship *imm;
    951 
    952 	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
    953 	if (imm == NULL) {
    954 		*errorp = ENOBUFS;
    955 		return NULL;
    956 	}
    957 
    958 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
    959 	if (!imm->i6mm_maddr) {
    960 		/* *errorp is already set */
    961 		free(imm, M_IPMADDR);
    962 		return NULL;
    963 	}
    964 	return imm;
    965 }
    966 
    967 int
    968 in6_leavegroup(struct in6_multi_mship *imm)
    969 {
    970 	struct in6_multi *in6m;
    971 
    972 	rw_enter(&in6_multilock, RW_WRITER);
    973 	in6m = imm->i6mm_maddr;
    974 	imm->i6mm_maddr = NULL;
    975 	if (in6m != NULL) {
    976 		in6_delmulti_locked(in6m);
    977 	}
    978 	rw_exit(&in6_multilock);
    979 	free(imm, M_IPMADDR);
    980 	return 0;
    981 }
    982 
    983 /*
    984  * DEPRECATED: keep it just to avoid breaking old sysctl users.
    985  */
    986 static int
    987 in6_mkludge_sysctl(SYSCTLFN_ARGS)
    988 {
    989 
    990 	if (namelen != 1)
    991 		return EINVAL;
    992 	*oldlenp = 0;
    993 	return 0;
    994 }
    995 
    996 static int
    997 in6_multicast_sysctl(SYSCTLFN_ARGS)
    998 {
    999 	struct ifnet *ifp;
   1000 	struct ifaddr *ifa;
   1001 	struct in6_ifaddr *ia6;
   1002 	struct in6_multi *in6m;
   1003 	uint32_t tmp;
   1004 	int error;
   1005 	size_t written;
   1006 	struct psref psref, psref_ia;
   1007 	int bound, s;
   1008 
   1009 	if (namelen != 1)
   1010 		return EINVAL;
   1011 
   1012 	rw_enter(&in6_multilock, RW_READER);
   1013 
   1014 	bound = curlwp_bind();
   1015 	ifp = if_get_byindex(name[0], &psref);
   1016 	if (ifp == NULL) {
   1017 		curlwp_bindx(bound);
   1018 		rw_exit(&in6_multilock);
   1019 		return ENODEV;
   1020 	}
   1021 
   1022 	if (oldp == NULL) {
   1023 		*oldlenp = 0;
   1024 		s = pserialize_read_enter();
   1025 		IFADDR_READER_FOREACH(ifa, ifp) {
   1026 			LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
   1027 				*oldlenp += 2 * sizeof(struct in6_addr) +
   1028 				    sizeof(uint32_t);
   1029 			}
   1030 		}
   1031 		pserialize_read_exit(s);
   1032 		if_put(ifp, &psref);
   1033 		curlwp_bindx(bound);
   1034 		rw_exit(&in6_multilock);
   1035 		return 0;
   1036 	}
   1037 
   1038 	error = 0;
   1039 	written = 0;
   1040 	s = pserialize_read_enter();
   1041 	IFADDR_READER_FOREACH(ifa, ifp) {
   1042 		if (ifa->ifa_addr->sa_family != AF_INET6)
   1043 			continue;
   1044 
   1045 		ifa_acquire(ifa, &psref_ia);
   1046 		pserialize_read_exit(s);
   1047 
   1048 		ia6 = ifatoia6(ifa);
   1049 		LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
   1050 			if (written + 2 * sizeof(struct in6_addr) +
   1051 			    sizeof(uint32_t) > *oldlenp)
   1052 				goto done;
   1053 			/*
   1054 			 * XXX return the first IPv6 address to keep backward
   1055 			 * compatibility, however now multicast addresses
   1056 			 * don't belong to any IPv6 addresses so it should be
   1057 			 * unnecessary.
   1058 			 */
   1059 			error = sysctl_copyout(l, &ia6->ia_addr.sin6_addr,
   1060 			    oldp, sizeof(struct in6_addr));
   1061 			if (error)
   1062 				goto done;
   1063 			oldp = (char *)oldp + sizeof(struct in6_addr);
   1064 			written += sizeof(struct in6_addr);
   1065 			error = sysctl_copyout(l, &in6m->in6m_addr,
   1066 			    oldp, sizeof(struct in6_addr));
   1067 			if (error)
   1068 				goto done;
   1069 			oldp = (char *)oldp + sizeof(struct in6_addr);
   1070 			written += sizeof(struct in6_addr);
   1071 			tmp = in6m->in6m_refcount;
   1072 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
   1073 			if (error)
   1074 				goto done;
   1075 			oldp = (char *)oldp + sizeof(tmp);
   1076 			written += sizeof(tmp);
   1077 		}
   1078 
   1079 		s = pserialize_read_enter();
   1080 
   1081 		break;
   1082 	}
   1083 	pserialize_read_exit(s);
   1084 done:
   1085 	ifa_release(ifa, &psref_ia);
   1086 	if_put(ifp, &psref);
   1087 	curlwp_bindx(bound);
   1088 	rw_exit(&in6_multilock);
   1089 	*oldlenp = written;
   1090 	return error;
   1091 }
   1092 
   1093 void
   1094 in6_sysctl_multicast_setup(struct sysctllog **clog)
   1095 {
   1096 
   1097 	sysctl_createv(clog, 0, NULL, NULL,
   1098 		       CTLFLAG_PERMANENT,
   1099 		       CTLTYPE_NODE, "inet6", NULL,
   1100 		       NULL, 0, NULL, 0,
   1101 		       CTL_NET, PF_INET6, CTL_EOL);
   1102 
   1103 	sysctl_createv(clog, 0, NULL, NULL,
   1104 		       CTLFLAG_PERMANENT,
   1105 		       CTLTYPE_NODE, "multicast",
   1106 		       SYSCTL_DESCR("Multicast information"),
   1107 		       in6_multicast_sysctl, 0, NULL, 0,
   1108 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
   1109 
   1110 	sysctl_createv(clog, 0, NULL, NULL,
   1111 		       CTLFLAG_PERMANENT,
   1112 		       CTLTYPE_NODE, "multicast_kludge",
   1113 		       SYSCTL_DESCR("multicast kludge information"),
   1114 		       in6_mkludge_sysctl, 0, NULL, 0,
   1115 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
   1116 }
   1117