mld6.c revision 1.92 1 /* $NetBSD: mld6.c,v 1.92 2018/05/01 07:21:39 maxv Exp $ */
2 /* $KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $ */
3
4 /*
5 * Copyright (C) 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1992, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * Stephen Deering of Stanford University.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)igmp.c 8.1 (Berkeley) 7/19/93
65 */
66
67 /*
68 * Copyright (c) 1988 Stephen Deering.
69 *
70 * This code is derived from software contributed to Berkeley by
71 * Stephen Deering of Stanford University.
72 *
73 * Redistribution and use in source and binary forms, with or without
74 * modification, are permitted provided that the following conditions
75 * are met:
76 * 1. Redistributions of source code must retain the above copyright
77 * notice, this list of conditions and the following disclaimer.
78 * 2. Redistributions in binary form must reproduce the above copyright
79 * notice, this list of conditions and the following disclaimer in the
80 * documentation and/or other materials provided with the distribution.
81 * 3. All advertising materials mentioning features or use of this software
82 * must display the following acknowledgement:
83 * This product includes software developed by the University of
84 * California, Berkeley and its contributors.
85 * 4. Neither the name of the University nor the names of its contributors
86 * may be used to endorse or promote products derived from this software
87 * without specific prior written permission.
88 *
89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99 * SUCH DAMAGE.
100 *
101 * @(#)igmp.c 8.1 (Berkeley) 7/19/93
102 */
103
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.92 2018/05/01 07:21:39 maxv Exp $");
106
107 #ifdef _KERNEL_OPT
108 #include "opt_inet.h"
109 #include "opt_net_mpsafe.h"
110 #endif
111
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/mbuf.h>
115 #include <sys/socket.h>
116 #include <sys/socketvar.h>
117 #include <sys/syslog.h>
118 #include <sys/sysctl.h>
119 #include <sys/kernel.h>
120 #include <sys/callout.h>
121 #include <sys/cprng.h>
122 #include <sys/rwlock.h>
123
124 #include <net/if.h>
125
126 #include <netinet/in.h>
127 #include <netinet/in_var.h>
128 #include <netinet6/in6_var.h>
129 #include <netinet/ip6.h>
130 #include <netinet6/ip6_var.h>
131 #include <netinet6/scope6_var.h>
132 #include <netinet/icmp6.h>
133 #include <netinet6/icmp6_private.h>
134 #include <netinet6/mld6_var.h>
135
136 static krwlock_t in6_multilock __cacheline_aligned;
137
138 /*
139 * Protocol constants
140 */
141
142 /*
143 * time between repetitions of a node's initial report of interest in a
144 * multicast address(in seconds)
145 */
146 #define MLD_UNSOLICITED_REPORT_INTERVAL 10
147
148 static struct ip6_pktopts ip6_opts;
149
150 static void mld_start_listening(struct in6_multi *);
151 static void mld_stop_listening(struct in6_multi *);
152
153 static struct mld_hdr *mld_allocbuf(struct mbuf **, struct in6_multi *, int);
154 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
155 static void mld_starttimer(struct in6_multi *);
156 static void mld_stoptimer(struct in6_multi *);
157 static u_long mld_timerresid(struct in6_multi *);
158
159 static void in6m_ref(struct in6_multi *);
160 static void in6m_unref(struct in6_multi *);
161 static void in6m_destroy(struct in6_multi *);
162
163 void
164 mld_init(void)
165 {
166 static u_int8_t hbh_buf[8];
167 struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
168 u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
169
170 /* ip6h_nxt will be fill in later */
171 hbh->ip6h_len = 0; /* (8 >> 3) - 1 */
172
173 /* XXX: grotty hard coding... */
174 hbh_buf[2] = IP6OPT_PADN; /* 2 byte padding */
175 hbh_buf[3] = 0;
176 hbh_buf[4] = IP6OPT_RTALERT;
177 hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
178 memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
179
180 ip6_opts.ip6po_hbh = hbh;
181 /* We will specify the hoplimit by a multicast option. */
182 ip6_opts.ip6po_hlim = -1;
183 ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
184
185 rw_init(&in6_multilock);
186 }
187
188 static void
189 mld_starttimer(struct in6_multi *in6m)
190 {
191 struct timeval now;
192
193 KASSERT(rw_write_held(&in6_multilock));
194 KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
195
196 microtime(&now);
197 in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
198 in6m->in6m_timer_expire.tv_usec = now.tv_usec +
199 (in6m->in6m_timer % hz) * (1000000 / hz);
200 if (in6m->in6m_timer_expire.tv_usec > 1000000) {
201 in6m->in6m_timer_expire.tv_sec++;
202 in6m->in6m_timer_expire.tv_usec -= 1000000;
203 }
204
205 /* start or restart the timer */
206 callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
207 }
208
209 /*
210 * mld_stoptimer releases in6_multilock when calling callout_halt.
211 * The caller must ensure in6m won't be freed while releasing the lock.
212 */
213 static void
214 mld_stoptimer(struct in6_multi *in6m)
215 {
216
217 KASSERT(rw_write_held(&in6_multilock));
218
219 if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
220 return;
221
222 rw_exit(&in6_multilock);
223
224 if (mutex_owned(softnet_lock))
225 callout_halt(&in6m->in6m_timer_ch, softnet_lock);
226 else
227 callout_halt(&in6m->in6m_timer_ch, NULL);
228
229 rw_enter(&in6_multilock, RW_WRITER);
230
231 in6m->in6m_timer = IN6M_TIMER_UNDEF;
232 }
233
234 static void
235 mld_timeo(void *arg)
236 {
237 struct in6_multi *in6m = arg;
238
239 KASSERT(in6m->in6m_refcount > 0);
240
241 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
242 rw_enter(&in6_multilock, RW_WRITER);
243 if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
244 goto out;
245
246 in6m->in6m_timer = IN6M_TIMER_UNDEF;
247
248 switch (in6m->in6m_state) {
249 case MLD_REPORTPENDING:
250 mld_start_listening(in6m);
251 break;
252 default:
253 mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
254 break;
255 }
256
257 out:
258 rw_exit(&in6_multilock);
259 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
260 }
261
262 static u_long
263 mld_timerresid(struct in6_multi *in6m)
264 {
265 struct timeval now, diff;
266
267 microtime(&now);
268
269 if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
270 (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
271 now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
272 return (0);
273 }
274 diff = in6m->in6m_timer_expire;
275 diff.tv_sec -= now.tv_sec;
276 diff.tv_usec -= now.tv_usec;
277 if (diff.tv_usec < 0) {
278 diff.tv_sec--;
279 diff.tv_usec += 1000000;
280 }
281
282 /* return the remaining time in milliseconds */
283 return diff.tv_sec * 1000 + diff.tv_usec / 1000;
284 }
285
286 static void
287 mld_start_listening(struct in6_multi *in6m)
288 {
289 struct in6_addr all_in6;
290
291 KASSERT(rw_write_held(&in6_multilock));
292
293 /*
294 * RFC2710 page 10:
295 * The node never sends a Report or Done for the link-scope all-nodes
296 * address.
297 * MLD messages are never sent for multicast addresses whose scope is 0
298 * (reserved) or 1 (node-local).
299 */
300 all_in6 = in6addr_linklocal_allnodes;
301 if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
302 /* XXX: this should not happen! */
303 in6m->in6m_timer = 0;
304 in6m->in6m_state = MLD_OTHERLISTENER;
305 }
306 if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
307 IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
308 in6m->in6m_timer = IN6M_TIMER_UNDEF;
309 in6m->in6m_state = MLD_OTHERLISTENER;
310 } else {
311 mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
312 in6m->in6m_timer = cprng_fast32() %
313 (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
314 in6m->in6m_state = MLD_IREPORTEDLAST;
315
316 mld_starttimer(in6m);
317 }
318 }
319
320 static void
321 mld_stop_listening(struct in6_multi *in6m)
322 {
323 struct in6_addr allnode, allrouter;
324
325 KASSERT(rw_lock_held(&in6_multilock));
326
327 allnode = in6addr_linklocal_allnodes;
328 if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
329 /* XXX: this should not happen! */
330 return;
331 }
332 allrouter = in6addr_linklocal_allrouters;
333 if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
334 /* XXX impossible */
335 return;
336 }
337
338 if (in6m->in6m_state == MLD_IREPORTEDLAST &&
339 (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
340 IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
341 IPV6_ADDR_SCOPE_INTFACELOCAL) {
342 mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
343 }
344 }
345
346 void
347 mld_input(struct mbuf *m, int off)
348 {
349 struct ip6_hdr *ip6;
350 struct mld_hdr *mldh;
351 struct ifnet *ifp;
352 struct in6_multi *in6m = NULL;
353 struct in6_addr mld_addr, all_in6;
354 u_long timer = 0; /* timer value in the MLD query header */
355 struct psref psref;
356
357 ifp = m_get_rcvif_psref(m, &psref);
358 if (__predict_false(ifp == NULL))
359 goto out;
360 IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
361 if (mldh == NULL) {
362 ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
363 goto out_nodrop;
364 }
365
366 ip6 = mtod(m, struct ip6_hdr *);
367
368 /* source address validation */
369 if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
370 /*
371 * RFC3590 allows the IPv6 unspecified address as the source
372 * address of MLD report and done messages. However, as this
373 * same document says, this special rule is for snooping
374 * switches and the RFC requires routers to discard MLD packets
375 * with the unspecified source address. The RFC only talks
376 * about hosts receiving an MLD query or report in Security
377 * Considerations, but this is probably the correct intention.
378 * RFC3590 does not talk about other cases than link-local and
379 * the unspecified source addresses, but we believe the same
380 * rule should be applied.
381 * As a result, we only allow link-local addresses as the
382 * source address; otherwise, simply discard the packet.
383 */
384 #if 0
385 /*
386 * XXX: do not log in an input path to avoid log flooding,
387 * though RFC3590 says "SHOULD log" if the source of a query
388 * is the unspecified address.
389 */
390 char ip6bufs[INET6_ADDRSTRLEN];
391 char ip6bufm[INET6_ADDRSTRLEN];
392 log(LOG_INFO,
393 "mld_input: src %s is not link-local (grp=%s)\n",
394 IN6_PRINT(ip6bufs,&ip6->ip6_src),
395 IN6_PRINT(ip6bufm, &mldh->mld_addr));
396 #endif
397 goto out;
398 }
399
400 /*
401 * make a copy for local work (in6_setscope() may modify the 1st arg)
402 */
403 mld_addr = mldh->mld_addr;
404 if (in6_setscope(&mld_addr, ifp, NULL)) {
405 /* XXX: this should not happen! */
406 goto out;
407 }
408
409 /*
410 * In the MLD specification, there are 3 states and a flag.
411 *
412 * In Non-Listener state, we simply don't have a membership record.
413 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
414 * In Idle Listener state, our timer is not running
415 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
416 *
417 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
418 * we have heard a report from another member, or MLD_IREPORTEDLAST
419 * if we sent the last report.
420 */
421 switch (mldh->mld_type) {
422 case MLD_LISTENER_QUERY: {
423 struct in6_multi *next;
424
425 if (ifp->if_flags & IFF_LOOPBACK)
426 break;
427
428 if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
429 !IN6_IS_ADDR_MULTICAST(&mld_addr))
430 break; /* print error or log stat? */
431
432 all_in6 = in6addr_linklocal_allnodes;
433 if (in6_setscope(&all_in6, ifp, NULL)) {
434 /* XXX: this should not happen! */
435 break;
436 }
437
438 /*
439 * - Start the timers in all of our membership records
440 * that the query applies to for the interface on
441 * which the query arrived excl. those that belong
442 * to the "all-nodes" group (ff02::1).
443 * - Restart any timer that is already running but has
444 * a value longer than the requested timeout.
445 * - Use the value specified in the query message as
446 * the maximum timeout.
447 */
448 timer = ntohs(mldh->mld_maxdelay);
449
450 rw_enter(&in6_multilock, RW_WRITER);
451 /*
452 * mld_stoptimer and mld_sendpkt release in6_multilock
453 * temporarily, so we have to prevent in6m from being freed
454 * while releasing the lock by having an extra reference to it.
455 *
456 * Also in6_purge_multi might remove items from the list of the
457 * ifp while releasing the lock. Fortunately in6_purge_multi is
458 * never executed as long as we have a psref of the ifp.
459 */
460 LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
461 if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
462 IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
463 IPV6_ADDR_SCOPE_LINKLOCAL)
464 continue;
465
466 if (in6m->in6m_state == MLD_REPORTPENDING)
467 continue; /* we are not yet ready */
468
469 if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
470 !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
471 continue;
472
473 if (timer == 0) {
474 in6m_ref(in6m);
475
476 /* send a report immediately */
477 mld_stoptimer(in6m);
478 mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
479 in6m->in6m_state = MLD_IREPORTEDLAST;
480
481 in6m_unref(in6m); /* May free in6m */
482 } else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
483 mld_timerresid(in6m) > timer) {
484 in6m->in6m_timer =
485 1 + (cprng_fast32() % timer) * hz / 1000;
486 mld_starttimer(in6m);
487 }
488 }
489 rw_exit(&in6_multilock);
490 break;
491 }
492
493 case MLD_LISTENER_REPORT:
494 /*
495 * For fast leave to work, we have to know that we are the
496 * last person to send a report for this group. Reports
497 * can potentially get looped back if we are a multicast
498 * router, so discard reports sourced by me.
499 * Note that it is impossible to check IFF_LOOPBACK flag of
500 * ifp for this purpose, since ip6_mloopback pass the physical
501 * interface to looutput.
502 */
503 if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
504 break;
505
506 if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
507 break;
508
509 /*
510 * If we belong to the group being reported, stop
511 * our timer for that group.
512 */
513 rw_enter(&in6_multilock, RW_WRITER);
514 in6m = in6_lookup_multi(&mld_addr, ifp);
515 if (in6m) {
516 in6m_ref(in6m);
517 mld_stoptimer(in6m); /* transit to idle state */
518 in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
519 in6m_unref(in6m);
520 in6m = NULL; /* in6m might be freed */
521 }
522 rw_exit(&in6_multilock);
523 break;
524 default: /* this is impossible */
525 #if 0
526 /*
527 * this case should be impossible because of filtering in
528 * icmp6_input(). But we explicitly disabled this part
529 * just in case.
530 */
531 log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
532 #endif
533 break;
534 }
535
536 out:
537 m_freem(m);
538 out_nodrop:
539 m_put_rcvif_psref(ifp, &psref);
540 }
541
542 /*
543 * XXX mld_sendpkt must be called with in6_multilock held and
544 * will release in6_multilock before calling ip6_output and
545 * returning to avoid locking against myself in ip6_output.
546 */
547 static void
548 mld_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst)
549 {
550 struct mbuf *mh;
551 struct mld_hdr *mldh;
552 struct ip6_hdr *ip6 = NULL;
553 struct ip6_moptions im6o;
554 struct in6_ifaddr *ia = NULL;
555 struct ifnet *ifp = in6m->in6m_ifp;
556 int ignflags;
557 struct psref psref;
558 int bound;
559
560 KASSERT(rw_write_held(&in6_multilock));
561
562 /*
563 * At first, find a link local address on the outgoing interface
564 * to use as the source address of the MLD packet.
565 * We do not reject tentative addresses for MLD report to deal with
566 * the case where we first join a link-local address.
567 */
568 ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
569 bound = curlwp_bind();
570 ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
571 if (ia == NULL) {
572 curlwp_bindx(bound);
573 return;
574 }
575 if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
576 ia6_release(ia, &psref);
577 ia = NULL;
578 }
579
580 /* Allocate two mbufs to store IPv6 header and MLD header */
581 mldh = mld_allocbuf(&mh, in6m, type);
582 if (mldh == NULL) {
583 ia6_release(ia, &psref);
584 curlwp_bindx(bound);
585 return;
586 }
587
588 /* fill src/dst here */
589 ip6 = mtod(mh, struct ip6_hdr *);
590 ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
591 ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
592 ia6_release(ia, &psref);
593 curlwp_bindx(bound);
594
595 mldh->mld_addr = in6m->in6m_addr;
596 in6_clearscope(&mldh->mld_addr); /* XXX */
597 mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
598 sizeof(struct mld_hdr));
599
600 /* construct multicast option */
601 memset(&im6o, 0, sizeof(im6o));
602 im6o.im6o_multicast_if_index = if_get_index(ifp);
603 im6o.im6o_multicast_hlim = 1;
604
605 /*
606 * Request loopback of the report if we are acting as a multicast
607 * router, so that the process-level routing daemon can hear it.
608 */
609 im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
610
611 /* increment output statistics */
612 ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
613 icmp6_ifstat_inc(ifp, ifs6_out_msg);
614 switch (type) {
615 case MLD_LISTENER_QUERY:
616 icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
617 break;
618 case MLD_LISTENER_REPORT:
619 icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
620 break;
621 case MLD_LISTENER_DONE:
622 icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
623 break;
624 }
625
626 /* XXX we cannot call ip6_output with holding in6_multilock */
627 rw_exit(&in6_multilock);
628
629 ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
630 &im6o, NULL, NULL);
631
632 rw_enter(&in6_multilock, RW_WRITER);
633 }
634
635 static struct mld_hdr *
636 mld_allocbuf(struct mbuf **mh, struct in6_multi *in6m, int type)
637 {
638 struct mbuf *md;
639 struct mld_hdr *mldh;
640 struct ip6_hdr *ip6;
641
642 /*
643 * Allocate mbufs to store ip6 header and MLD header.
644 * We allocate 2 mbufs and make chain in advance because
645 * it is more convenient when inserting the hop-by-hop option later.
646 */
647 MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
648 if (*mh == NULL)
649 return NULL;
650 MGET(md, M_DONTWAIT, MT_DATA);
651 if (md == NULL) {
652 m_free(*mh);
653 *mh = NULL;
654 return NULL;
655 }
656 (*mh)->m_next = md;
657 md->m_next = NULL;
658
659 m_reset_rcvif((*mh));
660 (*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
661 (*mh)->m_len = sizeof(struct ip6_hdr);
662 MH_ALIGN(*mh, sizeof(struct ip6_hdr));
663
664 /* fill in the ip6 header */
665 ip6 = mtod(*mh, struct ip6_hdr *);
666 memset(ip6, 0, sizeof(*ip6));
667 ip6->ip6_flow = 0;
668 ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
669 ip6->ip6_vfc |= IPV6_VERSION;
670 /* ip6_plen will be set later */
671 ip6->ip6_nxt = IPPROTO_ICMPV6;
672 /* ip6_hlim will be set by im6o.im6o_multicast_hlim */
673 /* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
674
675 /* fill in the MLD header as much as possible */
676 md->m_len = sizeof(struct mld_hdr);
677 mldh = mtod(md, struct mld_hdr *);
678 memset(mldh, 0, sizeof(struct mld_hdr));
679 mldh->mld_type = type;
680 return mldh;
681 }
682
683 static void
684 in6m_ref(struct in6_multi *in6m)
685 {
686
687 KASSERT(rw_write_held(&in6_multilock));
688 in6m->in6m_refcount++;
689 }
690
691 static void
692 in6m_unref(struct in6_multi *in6m)
693 {
694
695 KASSERT(rw_write_held(&in6_multilock));
696 if (--in6m->in6m_refcount == 0)
697 in6m_destroy(in6m);
698 }
699
700 /*
701 * Add an address to the list of IP6 multicast addresses for a given interface.
702 */
703 struct in6_multi *
704 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp, int *errorp,
705 int timer)
706 {
707 struct sockaddr_in6 sin6;
708 struct in6_multi *in6m;
709
710 *errorp = 0;
711
712 rw_enter(&in6_multilock, RW_WRITER);
713 /*
714 * See if address already in list.
715 */
716 in6m = in6_lookup_multi(maddr6, ifp);
717 if (in6m != NULL) {
718 /*
719 * Found it; just increment the reference count.
720 */
721 in6m->in6m_refcount++;
722 } else {
723 /*
724 * New address; allocate a new multicast record
725 * and link it into the interface's multicast list.
726 */
727 in6m = malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
728 if (in6m == NULL) {
729 *errorp = ENOBUFS;
730 goto out;
731 }
732
733 in6m->in6m_addr = *maddr6;
734 in6m->in6m_ifp = ifp;
735 in6m->in6m_refcount = 1;
736 in6m->in6m_timer = IN6M_TIMER_UNDEF;
737 callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
738 callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
739
740 LIST_INSERT_HEAD(&ifp->if_multiaddrs, in6m, in6m_entry);
741
742 /*
743 * Ask the network driver to update its multicast reception
744 * filter appropriately for the new address.
745 */
746 sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
747 *errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
748 if (*errorp) {
749 callout_destroy(&in6m->in6m_timer_ch);
750 LIST_REMOVE(in6m, in6m_entry);
751 free(in6m, M_IPMADDR);
752 in6m = NULL;
753 goto out;
754 }
755
756 in6m->in6m_timer = timer;
757 if (in6m->in6m_timer > 0) {
758 in6m->in6m_state = MLD_REPORTPENDING;
759 mld_starttimer(in6m);
760 goto out;
761 }
762
763 /*
764 * Let MLD6 know that we have joined a new IP6 multicast
765 * group.
766 */
767 mld_start_listening(in6m);
768 }
769 out:
770 rw_exit(&in6_multilock);
771 return in6m;
772 }
773
774 static void
775 in6m_destroy(struct in6_multi *in6m)
776 {
777 struct sockaddr_in6 sin6;
778
779 KASSERT(rw_write_held(&in6_multilock));
780 KASSERT(in6m->in6m_refcount == 0);
781
782 /*
783 * No remaining claims to this record; let MLD6 know
784 * that we are leaving the multicast group.
785 */
786 mld_stop_listening(in6m);
787
788 /*
789 * Unlink from list.
790 */
791 LIST_REMOVE(in6m, in6m_entry);
792
793 /*
794 * Delete all references of this multicasting group from
795 * the membership arrays
796 */
797 in6_purge_mcast_references(in6m);
798
799 /*
800 * Notify the network driver to update its multicast
801 * reception filter.
802 */
803 sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
804 if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
805
806 /* Tell mld_timeo we're halting the timer */
807 in6m->in6m_timer = IN6M_TIMER_UNDEF;
808 if (mutex_owned(softnet_lock))
809 callout_halt(&in6m->in6m_timer_ch, softnet_lock);
810 else
811 callout_halt(&in6m->in6m_timer_ch, NULL);
812 callout_destroy(&in6m->in6m_timer_ch);
813
814 free(in6m, M_IPMADDR);
815 }
816
817 /*
818 * Delete a multicast address record.
819 */
820 void
821 in6_delmulti(struct in6_multi *in6m)
822 {
823
824 KASSERT(in6m->in6m_refcount > 0);
825
826 rw_enter(&in6_multilock, RW_WRITER);
827 /*
828 * The caller should have a reference to in6m. So we don't need to care
829 * of releasing the lock in mld_stoptimer.
830 */
831 mld_stoptimer(in6m);
832 if (--in6m->in6m_refcount == 0)
833 in6m_destroy(in6m);
834 rw_exit(&in6_multilock);
835 }
836
837 /*
838 * Look up the in6_multi record for a given IP6 multicast address
839 * on a given interface. If no matching record is found, "in6m"
840 * returns NULL.
841 */
842 struct in6_multi *
843 in6_lookup_multi(const struct in6_addr *addr, const struct ifnet *ifp)
844 {
845 struct in6_multi *in6m;
846
847 KASSERT(rw_lock_held(&in6_multilock));
848
849 LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
850 if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, addr))
851 break;
852 }
853 return in6m;
854 }
855
856 bool
857 in6_multi_group(const struct in6_addr *addr, const struct ifnet *ifp)
858 {
859 bool ingroup;
860
861 rw_enter(&in6_multilock, RW_READER);
862 ingroup = in6_lookup_multi(addr, ifp) != NULL;
863 rw_exit(&in6_multilock);
864
865 return ingroup;
866 }
867
868 /*
869 * Purge in6_multi records associated to the interface.
870 */
871 void
872 in6_purge_multi(struct ifnet *ifp)
873 {
874 struct in6_multi *in6m, *next;
875
876 rw_enter(&in6_multilock, RW_WRITER);
877 LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
878 /*
879 * Normally multicast addresses are already purged at this
880 * point. Remaining references aren't accessible via ifp,
881 * so what we can do here is to prevent ifp from being
882 * accessed via in6m by removing it from the list of ifp.
883 */
884 mld_stoptimer(in6m);
885 LIST_REMOVE(in6m, in6m_entry);
886 }
887 rw_exit(&in6_multilock);
888 }
889
890 void
891 in6_multi_lock(int op)
892 {
893
894 rw_enter(&in6_multilock, op);
895 }
896
897 void
898 in6_multi_unlock(void)
899 {
900
901 rw_exit(&in6_multilock);
902 }
903
904 bool
905 in6_multi_locked(int op)
906 {
907
908 switch (op) {
909 case RW_READER:
910 return rw_read_held(&in6_multilock);
911 case RW_WRITER:
912 return rw_write_held(&in6_multilock);
913 default:
914 return rw_lock_held(&in6_multilock);
915 }
916 }
917
918 struct in6_multi_mship *
919 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr, int *errorp, int timer)
920 {
921 struct in6_multi_mship *imm;
922
923 imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
924 if (imm == NULL) {
925 *errorp = ENOBUFS;
926 return NULL;
927 }
928
929 imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
930 if (!imm->i6mm_maddr) {
931 /* *errorp is already set */
932 free(imm, M_IPMADDR);
933 return NULL;
934 }
935 return imm;
936 }
937
938 int
939 in6_leavegroup(struct in6_multi_mship *imm)
940 {
941 struct in6_multi *in6m;
942
943 rw_enter(&in6_multilock, RW_READER);
944 in6m = imm->i6mm_maddr;
945 rw_exit(&in6_multilock);
946 if (in6m != NULL) {
947 in6_delmulti(in6m);
948 }
949 free(imm, M_IPMADDR);
950 return 0;
951 }
952
953 /*
954 * DEPRECATED: keep it just to avoid breaking old sysctl users.
955 */
956 static int
957 in6_mkludge_sysctl(SYSCTLFN_ARGS)
958 {
959
960 if (namelen != 1)
961 return EINVAL;
962 *oldlenp = 0;
963 return 0;
964 }
965
966 static int
967 in6_multicast_sysctl(SYSCTLFN_ARGS)
968 {
969 struct ifnet *ifp;
970 struct ifaddr *ifa;
971 struct in6_ifaddr *ia6;
972 struct in6_multi *in6m;
973 uint32_t tmp;
974 int error;
975 size_t written;
976 struct psref psref, psref_ia;
977 int bound, s;
978
979 if (namelen != 1)
980 return EINVAL;
981
982 rw_enter(&in6_multilock, RW_READER);
983
984 bound = curlwp_bind();
985 ifp = if_get_byindex(name[0], &psref);
986 if (ifp == NULL) {
987 curlwp_bindx(bound);
988 rw_exit(&in6_multilock);
989 return ENODEV;
990 }
991
992 if (oldp == NULL) {
993 *oldlenp = 0;
994 s = pserialize_read_enter();
995 IFADDR_READER_FOREACH(ifa, ifp) {
996 LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
997 *oldlenp += 2 * sizeof(struct in6_addr) +
998 sizeof(uint32_t);
999 }
1000 }
1001 pserialize_read_exit(s);
1002 if_put(ifp, &psref);
1003 curlwp_bindx(bound);
1004 rw_exit(&in6_multilock);
1005 return 0;
1006 }
1007
1008 error = 0;
1009 written = 0;
1010 s = pserialize_read_enter();
1011 IFADDR_READER_FOREACH(ifa, ifp) {
1012 if (ifa->ifa_addr->sa_family != AF_INET6)
1013 continue;
1014
1015 ifa_acquire(ifa, &psref_ia);
1016 pserialize_read_exit(s);
1017
1018 ia6 = ifatoia6(ifa);
1019 LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1020 if (written + 2 * sizeof(struct in6_addr) +
1021 sizeof(uint32_t) > *oldlenp)
1022 goto done;
1023 /*
1024 * XXX return the first IPv6 address to keep backward
1025 * compatibility, however now multicast addresses
1026 * don't belong to any IPv6 addresses so it should be
1027 * unnecessary.
1028 */
1029 error = sysctl_copyout(l, &ia6->ia_addr.sin6_addr,
1030 oldp, sizeof(struct in6_addr));
1031 if (error)
1032 goto done;
1033 oldp = (char *)oldp + sizeof(struct in6_addr);
1034 written += sizeof(struct in6_addr);
1035 error = sysctl_copyout(l, &in6m->in6m_addr,
1036 oldp, sizeof(struct in6_addr));
1037 if (error)
1038 goto done;
1039 oldp = (char *)oldp + sizeof(struct in6_addr);
1040 written += sizeof(struct in6_addr);
1041 tmp = in6m->in6m_refcount;
1042 error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1043 if (error)
1044 goto done;
1045 oldp = (char *)oldp + sizeof(tmp);
1046 written += sizeof(tmp);
1047 }
1048
1049 s = pserialize_read_enter();
1050
1051 break;
1052 }
1053 pserialize_read_exit(s);
1054 done:
1055 ifa_release(ifa, &psref_ia);
1056 if_put(ifp, &psref);
1057 curlwp_bindx(bound);
1058 rw_exit(&in6_multilock);
1059 *oldlenp = written;
1060 return error;
1061 }
1062
1063 void
1064 in6_sysctl_multicast_setup(struct sysctllog **clog)
1065 {
1066
1067 sysctl_createv(clog, 0, NULL, NULL,
1068 CTLFLAG_PERMANENT,
1069 CTLTYPE_NODE, "inet6", NULL,
1070 NULL, 0, NULL, 0,
1071 CTL_NET, PF_INET6, CTL_EOL);
1072
1073 sysctl_createv(clog, 0, NULL, NULL,
1074 CTLFLAG_PERMANENT,
1075 CTLTYPE_NODE, "multicast",
1076 SYSCTL_DESCR("Multicast information"),
1077 in6_multicast_sysctl, 0, NULL, 0,
1078 CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1079
1080 sysctl_createv(clog, 0, NULL, NULL,
1081 CTLFLAG_PERMANENT,
1082 CTLTYPE_NODE, "multicast_kludge",
1083 SYSCTL_DESCR("multicast kludge information"),
1084 in6_mkludge_sysctl, 0, NULL, 0,
1085 CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1086 }
1087