1 1.166 ozaki /* $NetBSD: ip_mroute.c,v 1.166 2025/06/11 02:44:13 ozaki-r Exp $ */ 2 1.61 itojun 3 1.61 itojun /* 4 1.61 itojun * Copyright (c) 1992, 1993 5 1.61 itojun * The Regents of the University of California. All rights reserved. 6 1.61 itojun * 7 1.61 itojun * This code is derived from software contributed to Berkeley by 8 1.61 itojun * Stephen Deering of Stanford University. 9 1.61 itojun * 10 1.61 itojun * Redistribution and use in source and binary forms, with or without 11 1.61 itojun * modification, are permitted provided that the following conditions 12 1.61 itojun * are met: 13 1.61 itojun * 1. Redistributions of source code must retain the above copyright 14 1.61 itojun * notice, this list of conditions and the following disclaimer. 15 1.61 itojun * 2. Redistributions in binary form must reproduce the above copyright 16 1.61 itojun * notice, this list of conditions and the following disclaimer in the 17 1.61 itojun * documentation and/or other materials provided with the distribution. 18 1.76 agc * 3. Neither the name of the University nor the names of its contributors 19 1.76 agc * may be used to endorse or promote products derived from this software 20 1.76 agc * without specific prior written permission. 21 1.76 agc * 22 1.76 agc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 1.76 agc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 1.76 agc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 1.76 agc * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 1.76 agc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 1.76 agc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 1.76 agc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 1.76 agc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 1.76 agc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 1.76 agc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 1.76 agc * SUCH DAMAGE. 33 1.76 agc * 34 1.76 agc * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93 35 1.76 agc */ 36 1.76 agc 37 1.76 agc /* 38 1.76 agc * Copyright (c) 1989 Stephen Deering 39 1.76 agc * 40 1.76 agc * This code is derived from software contributed to Berkeley by 41 1.76 agc * Stephen Deering of Stanford University. 42 1.76 agc * 43 1.76 agc * Redistribution and use in source and binary forms, with or without 44 1.76 agc * modification, are permitted provided that the following conditions 45 1.76 agc * are met: 46 1.76 agc * 1. Redistributions of source code must retain the above copyright 47 1.76 agc * notice, this list of conditions and the following disclaimer. 48 1.76 agc * 2. Redistributions in binary form must reproduce the above copyright 49 1.76 agc * notice, this list of conditions and the following disclaimer in the 50 1.76 agc * documentation and/or other materials provided with the distribution. 51 1.61 itojun * 3. All advertising materials mentioning features or use of this software 52 1.61 itojun * must display the following acknowledgement: 53 1.61 itojun * This product includes software developed by the University of 54 1.61 itojun * California, Berkeley and its contributors. 55 1.61 itojun * 4. Neither the name of the University nor the names of its contributors 56 1.61 itojun * may be used to endorse or promote products derived from this software 57 1.61 itojun * without specific prior written permission. 58 1.61 itojun * 59 1.61 itojun * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 1.61 itojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 1.61 itojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 1.61 itojun * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 1.61 itojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 1.61 itojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 1.61 itojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 1.61 itojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 1.61 itojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 1.61 itojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 1.61 itojun * SUCH DAMAGE. 70 1.61 itojun * 71 1.61 itojun * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93 72 1.61 itojun */ 73 1.13 cgd 74 1.1 hpeyerl /* 75 1.15 mycroft * IP multicast forwarding procedures 76 1.1 hpeyerl * 77 1.1 hpeyerl * Written by David Waitzman, BBN Labs, August 1988. 78 1.1 hpeyerl * Modified by Steve Deering, Stanford, February 1989. 79 1.15 mycroft * Modified by Mark J. Steiglitz, Stanford, May, 1991 80 1.15 mycroft * Modified by Van Jacobson, LBL, January 1993 81 1.15 mycroft * Modified by Ajit Thyagarajan, PARC, August 1993 82 1.15 mycroft * Modified by Bill Fenner, PARC, April 1994 83 1.15 mycroft * Modified by Charles M. Hannum, NetBSD, May 1995. 84 1.86 manu * Modified by Ahmed Helmy, SGI, June 1996 85 1.86 manu * Modified by George Edmond Eddy (Rusty), ISI, February 1998 86 1.86 manu * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000 87 1.86 manu * Modified by Hitoshi Asaeda, WIDE, August 2000 88 1.86 manu * Modified by Pavlin Radoslavov, ICSI, October 2002 89 1.1 hpeyerl * 90 1.15 mycroft * MROUTING Revision: 1.2 91 1.86 manu * and PIM-SMv2 and PIM-DM support, advanced API support, 92 1.86 manu * bandwidth metering and signaling 93 1.1 hpeyerl */ 94 1.58 lukem 95 1.58 lukem #include <sys/cdefs.h> 96 1.166 ozaki __KERNEL_RCSID(0, "$NetBSD: ip_mroute.c,v 1.166 2025/06/11 02:44:13 ozaki-r Exp $"); 97 1.44 thorpej 98 1.132 pooka #ifdef _KERNEL_OPT 99 1.82 jonathan #include "opt_inet.h" 100 1.44 thorpej #include "opt_ipsec.h" 101 1.86 manu #include "opt_pim.h" 102 1.132 pooka #endif 103 1.86 manu 104 1.86 manu #ifdef PIM 105 1.86 manu #define _PIM_VT 1 106 1.86 manu #endif 107 1.1 hpeyerl 108 1.1 hpeyerl #include <sys/param.h> 109 1.15 mycroft #include <sys/systm.h> 110 1.47 thorpej #include <sys/callout.h> 111 1.1 hpeyerl #include <sys/mbuf.h> 112 1.1 hpeyerl #include <sys/socket.h> 113 1.1 hpeyerl #include <sys/socketvar.h> 114 1.15 mycroft #include <sys/errno.h> 115 1.1 hpeyerl #include <sys/time.h> 116 1.15 mycroft #include <sys/kernel.h> 117 1.125 martin #include <sys/kmem.h> 118 1.15 mycroft #include <sys/ioctl.h> 119 1.15 mycroft #include <sys/syslog.h> 120 1.86 manu 121 1.1 hpeyerl #include <net/if.h> 122 1.1 hpeyerl #include <net/raw_cb.h> 123 1.86 manu 124 1.1 hpeyerl #include <netinet/in.h> 125 1.15 mycroft #include <netinet/in_var.h> 126 1.1 hpeyerl #include <netinet/in_systm.h> 127 1.162 martin #include <netinet/in_offload.h> 128 1.1 hpeyerl #include <netinet/ip.h> 129 1.15 mycroft #include <netinet/ip_var.h> 130 1.1 hpeyerl #include <netinet/in_pcb.h> 131 1.15 mycroft #include <netinet/udp.h> 132 1.1 hpeyerl #include <netinet/igmp.h> 133 1.1 hpeyerl #include <netinet/igmp_var.h> 134 1.1 hpeyerl #include <netinet/ip_mroute.h> 135 1.86 manu #ifdef PIM 136 1.86 manu #include <netinet/pim.h> 137 1.86 manu #include <netinet/pim_var.h> 138 1.86 manu #endif 139 1.54 itojun #include <netinet/ip_encap.h> 140 1.64 fair 141 1.127 christos #ifdef IPSEC 142 1.77 jonathan #include <netipsec/ipsec.h> 143 1.77 jonathan #include <netipsec/key.h> 144 1.77 jonathan #endif 145 1.77 jonathan 146 1.15 mycroft #define IP_MULTICASTOPTS 0 147 1.86 manu #define M_PULLUP(m, len) \ 148 1.86 manu do { \ 149 1.15 mycroft if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \ 150 1.86 manu (m) = m_pullup((m), (len)); \ 151 1.63 perry } while (/*CONSTCOND*/ 0) 152 1.1 hpeyerl 153 1.1 hpeyerl /* 154 1.1 hpeyerl * Globals. All but ip_mrouter and ip_mrtproto could be static, 155 1.1 hpeyerl * except for netstat or debugging purposes. 156 1.1 hpeyerl */ 157 1.86 manu struct socket *ip_mrouter = NULL; 158 1.15 mycroft int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 159 1.15 mycroft 160 1.86 manu #define MFCHASH(a, g) \ 161 1.86 manu ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \ 162 1.29 mycroft ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & mfchash) 163 1.15 mycroft LIST_HEAD(mfchashhdr, mfc) *mfchashtbl; 164 1.15 mycroft u_long mfchash; 165 1.15 mycroft 166 1.15 mycroft u_char nexpire[MFCTBLSIZ]; 167 1.15 mycroft struct vif viftable[MAXVIFS]; 168 1.15 mycroft struct mrtstat mrtstat; 169 1.150 maxv u_int mrtdebug = 0; /* debug level */ 170 1.15 mycroft #define DEBUG_MFC 0x02 171 1.15 mycroft #define DEBUG_FORWARD 0x04 172 1.15 mycroft #define DEBUG_EXPIRE 0x08 173 1.15 mycroft #define DEBUG_XMIT 0x10 174 1.86 manu #define DEBUG_PIM 0x20 175 1.86 manu 176 1.86 manu #define VIFI_INVALID ((vifi_t) -1) 177 1.86 manu 178 1.150 maxv u_int tbfdebug = 0; /* tbf debug level */ 179 1.15 mycroft 180 1.54 itojun /* vif attachment using sys/netinet/ip_encap.c */ 181 1.148 knakahar static void vif_input(struct mbuf *, int, int, void *); 182 1.94 martin static int vif_encapcheck(struct mbuf *, int, int, void *); 183 1.84 matt 184 1.137 knakahar static const struct encapsw vif_encapsw = { 185 1.137 knakahar .encapsw4 = { 186 1.137 knakahar .pr_input = vif_input, 187 1.137 knakahar .pr_ctlinput = NULL, 188 1.137 knakahar } 189 1.54 itojun }; 190 1.54 itojun 191 1.15 mycroft #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ 192 1.15 mycroft #define UPCALL_EXPIRE 6 /* number of timeouts */ 193 1.15 mycroft 194 1.15 mycroft /* 195 1.15 mycroft * Define the token bucket filter structures 196 1.15 mycroft */ 197 1.15 mycroft 198 1.31 mycroft #define TBF_REPROCESS (hz / 100) /* 100x / second */ 199 1.15 mycroft 200 1.88 perry static int get_sg_cnt(struct sioc_sg_req *); 201 1.88 perry static int get_vif_cnt(struct sioc_vif_req *); 202 1.115 plunky static int ip_mrouter_init(struct socket *, int); 203 1.115 plunky static int set_assert(int); 204 1.115 plunky static int add_vif(struct vifctl *); 205 1.115 plunky static int del_vif(vifi_t *); 206 1.88 perry static void update_mfc_params(struct mfc *, struct mfcctl2 *); 207 1.88 perry static void init_mfc_params(struct mfc *, struct mfcctl2 *); 208 1.88 perry static void expire_mfc(struct mfc *); 209 1.115 plunky static int add_mfc(struct sockopt *); 210 1.25 christos #ifdef UPCALL_TIMING 211 1.88 perry static void collate(struct timeval *); 212 1.25 christos #endif 213 1.115 plunky static int del_mfc(struct sockopt *); 214 1.115 plunky static int set_api_config(struct sockopt *); /* chose API capabilities */ 215 1.88 perry static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); 216 1.88 perry static void expire_upcalls(void *); 217 1.88 perry static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *); 218 1.88 perry static void phyint_send(struct ip *, struct vif *, struct mbuf *); 219 1.88 perry static void encap_send(struct ip *, struct vif *, struct mbuf *); 220 1.88 perry static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_int32_t); 221 1.88 perry static void tbf_queue(struct vif *, struct mbuf *); 222 1.88 perry static void tbf_process_q(struct vif *); 223 1.88 perry static void tbf_reprocess_q(void *); 224 1.88 perry static int tbf_dq_sel(struct vif *, struct ip *); 225 1.88 perry static void tbf_send_packet(struct vif *, struct mbuf *); 226 1.88 perry static void tbf_update_tokens(struct vif *); 227 1.88 perry static int priority(struct vif *, struct ip *); 228 1.164 kardel static int ip_mforward_real(struct mbuf *, struct ifnet *); 229 1.164 kardel 230 1.1 hpeyerl 231 1.1 hpeyerl /* 232 1.86 manu * Bandwidth monitoring 233 1.86 manu */ 234 1.88 perry static void free_bw_list(struct bw_meter *); 235 1.115 plunky static int add_bw_upcall(struct bw_upcall *); 236 1.115 plunky static int del_bw_upcall(struct bw_upcall *); 237 1.88 perry static void bw_meter_receive_packet(struct bw_meter *, int , struct timeval *); 238 1.88 perry static void bw_meter_prepare_upcall(struct bw_meter *, struct timeval *); 239 1.88 perry static void bw_upcalls_send(void); 240 1.88 perry static void schedule_bw_meter(struct bw_meter *, struct timeval *); 241 1.88 perry static void unschedule_bw_meter(struct bw_meter *); 242 1.88 perry static void bw_meter_process(void); 243 1.88 perry static void expire_bw_upcalls_send(void *); 244 1.88 perry static void expire_bw_meter_process(void *); 245 1.86 manu 246 1.86 manu #ifdef PIM 247 1.88 perry static int pim_register_send(struct ip *, struct vif *, 248 1.150 maxv struct mbuf *, struct mfc *); 249 1.88 perry static int pim_register_send_rp(struct ip *, struct vif *, 250 1.150 maxv struct mbuf *, struct mfc *); 251 1.88 perry static int pim_register_send_upcall(struct ip *, struct vif *, 252 1.150 maxv struct mbuf *, struct mfc *); 253 1.88 perry static struct mbuf *pim_register_prepare(struct ip *, struct mbuf *); 254 1.86 manu #endif 255 1.86 manu 256 1.17 mycroft #define ENCAP_TTL 64 257 1.150 maxv #define ENCAP_PROTO IPPROTO_IPIP 258 1.12 brezak 259 1.12 brezak /* prototype IP hdr for encapsulated packets */ 260 1.153 maxv static const struct ip multicast_encap_iphdr = { 261 1.98 christos .ip_hl = sizeof(struct ip) >> 2, 262 1.98 christos .ip_v = IPVERSION, 263 1.98 christos .ip_len = sizeof(struct ip), 264 1.98 christos .ip_ttl = ENCAP_TTL, 265 1.98 christos .ip_p = ENCAP_PROTO, 266 1.12 brezak }; 267 1.12 brezak 268 1.12 brezak /* 269 1.86 manu * Bandwidth meter variables and constants 270 1.86 manu */ 271 1.86 manu 272 1.86 manu /* 273 1.86 manu * Pending timeouts are stored in a hash table, the key being the 274 1.86 manu * expiration time. Periodically, the entries are analysed and processed. 275 1.86 manu */ 276 1.86 manu #define BW_METER_BUCKETS 1024 277 1.86 manu static struct bw_meter *bw_meter_timers[BW_METER_BUCKETS]; 278 1.86 manu struct callout bw_meter_ch; 279 1.86 manu #define BW_METER_PERIOD (hz) /* periodical handling of bw meters */ 280 1.86 manu 281 1.86 manu /* 282 1.86 manu * Pending upcalls are stored in a vector which is flushed when 283 1.86 manu * full, or periodically 284 1.86 manu */ 285 1.86 manu static struct bw_upcall bw_upcalls[BW_UPCALLS_MAX]; 286 1.86 manu static u_int bw_upcalls_n; /* # of pending upcalls */ 287 1.86 manu struct callout bw_upcalls_ch; 288 1.86 manu #define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */ 289 1.86 manu 290 1.86 manu #ifdef PIM 291 1.86 manu struct pimstat pimstat; 292 1.86 manu 293 1.86 manu /* 294 1.86 manu * Note: the PIM Register encapsulation adds the following in front of a 295 1.86 manu * data packet: 296 1.86 manu * 297 1.86 manu * struct pim_encap_hdr { 298 1.150 maxv * struct ip ip; 299 1.150 maxv * struct pim_encap_pimhdr pim; 300 1.86 manu * } 301 1.86 manu */ 302 1.86 manu 303 1.86 manu struct pim_encap_pimhdr { 304 1.86 manu struct pim pim; 305 1.86 manu uint32_t flags; 306 1.86 manu }; 307 1.86 manu 308 1.86 manu static struct ip pim_encap_iphdr = { 309 1.98 christos .ip_v = IPVERSION, 310 1.98 christos .ip_hl = sizeof(struct ip) >> 2, 311 1.98 christos .ip_len = sizeof(struct ip), 312 1.98 christos .ip_ttl = ENCAP_TTL, 313 1.98 christos .ip_p = IPPROTO_PIM, 314 1.86 manu }; 315 1.86 manu 316 1.86 manu static struct pim_encap_pimhdr pim_encap_pimhdr = { 317 1.86 manu { 318 1.86 manu PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */ 319 1.86 manu 0, /* reserved */ 320 1.86 manu 0, /* checksum */ 321 1.86 manu }, 322 1.86 manu 0 /* flags */ 323 1.86 manu }; 324 1.86 manu 325 1.86 manu static struct ifnet multicast_register_if; 326 1.86 manu static vifi_t reg_vif_num = VIFI_INVALID; 327 1.86 manu #endif /* PIM */ 328 1.86 manu 329 1.86 manu 330 1.86 manu /* 331 1.1 hpeyerl * Private variables. 332 1.1 hpeyerl */ 333 1.15 mycroft static vifi_t numvifs = 0; 334 1.12 brezak 335 1.47 thorpej static struct callout expire_upcalls_ch; 336 1.47 thorpej 337 1.12 brezak /* 338 1.15 mycroft * whether or not special PIM assert processing is enabled. 339 1.15 mycroft */ 340 1.15 mycroft static int pim_assert; 341 1.15 mycroft /* 342 1.15 mycroft * Rate limit for assert notification messages, in usec 343 1.12 brezak */ 344 1.15 mycroft #define ASSERT_MSG_TIME 3000000 345 1.12 brezak 346 1.15 mycroft /* 347 1.86 manu * Kernel multicast routing API capabilities and setup. 348 1.86 manu * If more API capabilities are added to the kernel, they should be 349 1.86 manu * recorded in `mrt_api_support'. 350 1.86 manu */ 351 1.86 manu static const u_int32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF | 352 1.86 manu MRT_MFC_FLAGS_BORDER_VIF | 353 1.86 manu MRT_MFC_RP | 354 1.86 manu MRT_MFC_BW_UPCALL); 355 1.86 manu static u_int32_t mrt_api_config = 0; 356 1.86 manu 357 1.86 manu /* 358 1.15 mycroft * Find a route for a given origin IP address and Multicast group address 359 1.15 mycroft * Type of service parameter to be added in the future!!! 360 1.86 manu * Statistics are updated by the caller if needed 361 1.86 manu * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses) 362 1.15 mycroft */ 363 1.86 manu static struct mfc * 364 1.86 manu mfc_find(struct in_addr *o, struct in_addr *g) 365 1.86 manu { 366 1.86 manu struct mfc *rt; 367 1.86 manu 368 1.86 manu LIST_FOREACH(rt, &mfchashtbl[MFCHASH(*o, *g)], mfc_hash) { 369 1.86 manu if (in_hosteq(rt->mfc_origin, *o) && 370 1.86 manu in_hosteq(rt->mfc_mcastgrp, *g) && 371 1.86 manu (rt->mfc_stall == NULL)) 372 1.86 manu break; 373 1.86 manu } 374 1.15 mycroft 375 1.150 maxv return rt; 376 1.86 manu } 377 1.12 brezak 378 1.12 brezak /* 379 1.15 mycroft * Macros to compute elapsed time efficiently 380 1.15 mycroft * Borrowed from Van Jacobson's scheduling code 381 1.12 brezak */ 382 1.86 manu #define TV_DELTA(a, b, delta) do { \ 383 1.86 manu int xxs; \ 384 1.86 manu delta = (a).tv_usec - (b).tv_usec; \ 385 1.86 manu xxs = (a).tv_sec - (b).tv_sec; \ 386 1.86 manu switch (xxs) { \ 387 1.86 manu case 2: \ 388 1.86 manu delta += 1000000; \ 389 1.86 manu /* fall through */ \ 390 1.86 manu case 1: \ 391 1.86 manu delta += 1000000; \ 392 1.86 manu /* fall through */ \ 393 1.86 manu case 0: \ 394 1.86 manu break; \ 395 1.86 manu default: \ 396 1.86 manu delta += (1000000 * xxs); \ 397 1.86 manu break; \ 398 1.86 manu } \ 399 1.69 itojun } while (/*CONSTCOND*/ 0) 400 1.15 mycroft 401 1.15 mycroft #ifdef UPCALL_TIMING 402 1.15 mycroft u_int32_t upcall_data[51]; 403 1.15 mycroft #endif /* UPCALL_TIMING */ 404 1.15 mycroft 405 1.12 brezak /* 406 1.15 mycroft * Handle MRT setsockopt commands to modify the multicast routing tables. 407 1.12 brezak */ 408 1.15 mycroft int 409 1.115 plunky ip_mrouter_set(struct socket *so, struct sockopt *sopt) 410 1.15 mycroft { 411 1.15 mycroft int error; 412 1.115 plunky int optval; 413 1.115 plunky struct vifctl vifc; 414 1.115 plunky vifi_t vifi; 415 1.115 plunky struct bw_upcall bwuc; 416 1.15 mycroft 417 1.115 plunky if (sopt->sopt_name != MRT_INIT && so != ip_mrouter) 418 1.28 mycroft error = ENOPROTOOPT; 419 1.115 plunky else { 420 1.115 plunky switch (sopt->sopt_name) { 421 1.15 mycroft case MRT_INIT: 422 1.115 plunky error = sockopt_getint(sopt, &optval); 423 1.115 plunky if (error) 424 1.115 plunky break; 425 1.115 plunky 426 1.115 plunky error = ip_mrouter_init(so, optval); 427 1.15 mycroft break; 428 1.15 mycroft case MRT_DONE: 429 1.15 mycroft error = ip_mrouter_done(); 430 1.15 mycroft break; 431 1.15 mycroft case MRT_ADD_VIF: 432 1.115 plunky error = sockopt_get(sopt, &vifc, sizeof(vifc)); 433 1.115 plunky if (error) 434 1.115 plunky break; 435 1.115 plunky error = add_vif(&vifc); 436 1.15 mycroft break; 437 1.15 mycroft case MRT_DEL_VIF: 438 1.115 plunky error = sockopt_get(sopt, &vifi, sizeof(vifi)); 439 1.115 plunky if (error) 440 1.115 plunky break; 441 1.115 plunky error = del_vif(&vifi); 442 1.15 mycroft break; 443 1.15 mycroft case MRT_ADD_MFC: 444 1.115 plunky error = add_mfc(sopt); 445 1.15 mycroft break; 446 1.15 mycroft case MRT_DEL_MFC: 447 1.115 plunky error = del_mfc(sopt); 448 1.15 mycroft break; 449 1.15 mycroft case MRT_ASSERT: 450 1.115 plunky error = sockopt_getint(sopt, &optval); 451 1.115 plunky if (error) 452 1.115 plunky break; 453 1.115 plunky error = set_assert(optval); 454 1.15 mycroft break; 455 1.86 manu case MRT_API_CONFIG: 456 1.115 plunky error = set_api_config(sopt); 457 1.86 manu break; 458 1.86 manu case MRT_ADD_BW_UPCALL: 459 1.115 plunky error = sockopt_get(sopt, &bwuc, sizeof(bwuc)); 460 1.115 plunky if (error) 461 1.115 plunky break; 462 1.115 plunky error = add_bw_upcall(&bwuc); 463 1.86 manu break; 464 1.86 manu case MRT_DEL_BW_UPCALL: 465 1.115 plunky error = sockopt_get(sopt, &bwuc, sizeof(bwuc)); 466 1.115 plunky if (error) 467 1.115 plunky break; 468 1.115 plunky error = del_bw_upcall(&bwuc); 469 1.86 manu break; 470 1.15 mycroft default: 471 1.28 mycroft error = ENOPROTOOPT; 472 1.15 mycroft break; 473 1.15 mycroft } 474 1.115 plunky } 475 1.150 maxv return error; 476 1.12 brezak } 477 1.12 brezak 478 1.15 mycroft /* 479 1.15 mycroft * Handle MRT getsockopt commands 480 1.15 mycroft */ 481 1.15 mycroft int 482 1.115 plunky ip_mrouter_get(struct socket *so, struct sockopt *sopt) 483 1.12 brezak { 484 1.15 mycroft int error; 485 1.12 brezak 486 1.15 mycroft if (so != ip_mrouter) 487 1.28 mycroft error = ENOPROTOOPT; 488 1.15 mycroft else { 489 1.115 plunky switch (sopt->sopt_name) { 490 1.15 mycroft case MRT_VERSION: 491 1.115 plunky error = sockopt_setint(sopt, 0x0305); /* XXX !!!! */ 492 1.15 mycroft break; 493 1.15 mycroft case MRT_ASSERT: 494 1.115 plunky error = sockopt_setint(sopt, pim_assert); 495 1.15 mycroft break; 496 1.86 manu case MRT_API_SUPPORT: 497 1.115 plunky error = sockopt_set(sopt, &mrt_api_support, 498 1.115 plunky sizeof(mrt_api_support)); 499 1.86 manu break; 500 1.86 manu case MRT_API_CONFIG: 501 1.115 plunky error = sockopt_set(sopt, &mrt_api_config, 502 1.115 plunky sizeof(mrt_api_config)); 503 1.86 manu break; 504 1.15 mycroft default: 505 1.28 mycroft error = ENOPROTOOPT; 506 1.15 mycroft break; 507 1.15 mycroft } 508 1.12 brezak } 509 1.150 maxv return error; 510 1.12 brezak } 511 1.12 brezak 512 1.1 hpeyerl /* 513 1.15 mycroft * Handle ioctl commands to obtain information from the cache 514 1.1 hpeyerl */ 515 1.1 hpeyerl int 516 1.101 christos mrt_ioctl(struct socket *so, u_long cmd, void *data) 517 1.1 hpeyerl { 518 1.15 mycroft int error; 519 1.1 hpeyerl 520 1.28 mycroft if (so != ip_mrouter) 521 1.15 mycroft error = EINVAL; 522 1.28 mycroft else 523 1.28 mycroft switch (cmd) { 524 1.28 mycroft case SIOCGETVIFCNT: 525 1.28 mycroft error = get_vif_cnt((struct sioc_vif_req *)data); 526 1.28 mycroft break; 527 1.28 mycroft case SIOCGETSGCNT: 528 1.28 mycroft error = get_sg_cnt((struct sioc_sg_req *)data); 529 1.28 mycroft break; 530 1.28 mycroft default: 531 1.28 mycroft error = EINVAL; 532 1.28 mycroft break; 533 1.28 mycroft } 534 1.1 hpeyerl 535 1.150 maxv return error; 536 1.15 mycroft } 537 1.1 hpeyerl 538 1.15 mycroft /* 539 1.15 mycroft * returns the packet, byte, rpf-failure count for the source group provided 540 1.15 mycroft */ 541 1.15 mycroft static int 542 1.89 perry get_sg_cnt(struct sioc_sg_req *req) 543 1.15 mycroft { 544 1.86 manu int s; 545 1.48 augustss struct mfc *rt; 546 1.1 hpeyerl 547 1.24 mycroft s = splsoftnet(); 548 1.86 manu rt = mfc_find(&req->src, &req->grp); 549 1.86 manu if (rt == NULL) { 550 1.86 manu splx(s); 551 1.86 manu req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; 552 1.150 maxv return EADDRNOTAVAIL; 553 1.86 manu } 554 1.86 manu req->pktcnt = rt->mfc_pkt_cnt; 555 1.86 manu req->bytecnt = rt->mfc_byte_cnt; 556 1.86 manu req->wrong_if = rt->mfc_wrong_if; 557 1.15 mycroft splx(s); 558 1.1 hpeyerl 559 1.150 maxv return 0; 560 1.15 mycroft } 561 1.1 hpeyerl 562 1.15 mycroft /* 563 1.15 mycroft * returns the input and output packet and byte counts on the vif provided 564 1.15 mycroft */ 565 1.15 mycroft static int 566 1.89 perry get_vif_cnt(struct sioc_vif_req *req) 567 1.15 mycroft { 568 1.48 augustss vifi_t vifi = req->vifi; 569 1.1 hpeyerl 570 1.15 mycroft if (vifi >= numvifs) 571 1.150 maxv return EINVAL; 572 1.1 hpeyerl 573 1.15 mycroft req->icount = viftable[vifi].v_pkt_in; 574 1.15 mycroft req->ocount = viftable[vifi].v_pkt_out; 575 1.15 mycroft req->ibytes = viftable[vifi].v_bytes_in; 576 1.15 mycroft req->obytes = viftable[vifi].v_bytes_out; 577 1.1 hpeyerl 578 1.150 maxv return 0; 579 1.1 hpeyerl } 580 1.1 hpeyerl 581 1.1 hpeyerl /* 582 1.1 hpeyerl * Enable multicast routing 583 1.1 hpeyerl */ 584 1.1 hpeyerl static int 585 1.115 plunky ip_mrouter_init(struct socket *so, int v) 586 1.1 hpeyerl { 587 1.15 mycroft if (mrtdebug) 588 1.15 mycroft log(LOG_DEBUG, 589 1.30 mycroft "ip_mrouter_init: so_type = %d, pr_protocol = %d\n", 590 1.15 mycroft so->so_type, so->so_proto->pr_protocol); 591 1.15 mycroft 592 1.1 hpeyerl if (so->so_type != SOCK_RAW || 593 1.1 hpeyerl so->so_proto->pr_protocol != IPPROTO_IGMP) 594 1.150 maxv return EOPNOTSUPP; 595 1.1 hpeyerl 596 1.115 plunky if (v != 1) 597 1.150 maxv return EINVAL; 598 1.15 mycroft 599 1.86 manu if (ip_mrouter != NULL) 600 1.150 maxv return EADDRINUSE; 601 1.1 hpeyerl 602 1.1 hpeyerl ip_mrouter = so; 603 1.1 hpeyerl 604 1.112 ad mfchashtbl = hashinit(MFCTBLSIZ, HASH_LIST, true, &mfchash); 605 1.118 cegger memset((void *)nexpire, 0, sizeof(nexpire)); 606 1.15 mycroft 607 1.15 mycroft pim_assert = 0; 608 1.15 mycroft 609 1.104 ad callout_init(&expire_upcalls_ch, 0); 610 1.47 thorpej callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, 611 1.86 manu expire_upcalls, NULL); 612 1.86 manu 613 1.104 ad callout_init(&bw_upcalls_ch, 0); 614 1.86 manu callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD, 615 1.86 manu expire_bw_upcalls_send, NULL); 616 1.86 manu 617 1.104 ad callout_init(&bw_meter_ch, 0); 618 1.86 manu callout_reset(&bw_meter_ch, BW_METER_PERIOD, 619 1.86 manu expire_bw_meter_process, NULL); 620 1.15 mycroft 621 1.15 mycroft if (mrtdebug) 622 1.30 mycroft log(LOG_DEBUG, "ip_mrouter_init\n"); 623 1.15 mycroft 624 1.150 maxv return 0; 625 1.1 hpeyerl } 626 1.1 hpeyerl 627 1.1 hpeyerl /* 628 1.1 hpeyerl * Disable multicast routing 629 1.1 hpeyerl */ 630 1.1 hpeyerl int 631 1.89 perry ip_mrouter_done(void) 632 1.1 hpeyerl { 633 1.15 mycroft vifi_t vifi; 634 1.48 augustss struct vif *vifp; 635 1.15 mycroft int i; 636 1.15 mycroft int s; 637 1.60 itojun 638 1.24 mycroft s = splsoftnet(); 639 1.1 hpeyerl 640 1.17 mycroft /* Clear out all the vifs currently in use. */ 641 1.1 hpeyerl for (vifi = 0; vifi < numvifs; vifi++) { 642 1.15 mycroft vifp = &viftable[vifi]; 643 1.29 mycroft if (!in_nullhost(vifp->v_lcl_addr)) 644 1.17 mycroft reset_vif(vifp); 645 1.1 hpeyerl } 646 1.17 mycroft 647 1.1 hpeyerl numvifs = 0; 648 1.15 mycroft pim_assert = 0; 649 1.86 manu mrt_api_config = 0; 650 1.60 itojun 651 1.47 thorpej callout_stop(&expire_upcalls_ch); 652 1.86 manu callout_stop(&bw_upcalls_ch); 653 1.86 manu callout_stop(&bw_meter_ch); 654 1.60 itojun 655 1.15 mycroft /* 656 1.15 mycroft * Free all multicast forwarding cache entries. 657 1.15 mycroft */ 658 1.15 mycroft for (i = 0; i < MFCTBLSIZ; i++) { 659 1.48 augustss struct mfc *rt, *nrt; 660 1.1 hpeyerl 661 1.57 matt for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) { 662 1.57 matt nrt = LIST_NEXT(rt, mfc_hash); 663 1.60 itojun 664 1.15 mycroft expire_mfc(rt); 665 1.15 mycroft } 666 1.15 mycroft } 667 1.40 mycroft 668 1.118 cegger memset((void *)nexpire, 0, sizeof(nexpire)); 669 1.116 rmind hashdone(mfchashtbl, HASH_LIST, mfchash); 670 1.86 manu mfchashtbl = NULL; 671 1.86 manu 672 1.86 manu bw_upcalls_n = 0; 673 1.118 cegger memset(bw_meter_timers, 0, sizeof(bw_meter_timers)); 674 1.60 itojun 675 1.17 mycroft /* Reset de-encapsulation cache. */ 676 1.60 itojun 677 1.86 manu ip_mrouter = NULL; 678 1.60 itojun 679 1.15 mycroft splx(s); 680 1.60 itojun 681 1.15 mycroft if (mrtdebug) 682 1.30 mycroft log(LOG_DEBUG, "ip_mrouter_done\n"); 683 1.60 itojun 684 1.151 maxv return 0; 685 1.72 itojun } 686 1.72 itojun 687 1.72 itojun void 688 1.89 perry ip_mrouter_detach(struct ifnet *ifp) 689 1.72 itojun { 690 1.72 itojun int vifi, i; 691 1.72 itojun struct vif *vifp; 692 1.75 itojun struct mfc *rt; 693 1.75 itojun struct rtdetq *rte; 694 1.72 itojun 695 1.86 manu /* XXX not sure about side effect to userland routing daemon */ 696 1.72 itojun for (vifi = 0; vifi < numvifs; vifi++) { 697 1.72 itojun vifp = &viftable[vifi]; 698 1.72 itojun if (vifp->v_ifp == ifp) 699 1.72 itojun reset_vif(vifp); 700 1.72 itojun } 701 1.72 itojun for (i = 0; i < MFCTBLSIZ; i++) { 702 1.75 itojun if (nexpire[i] == 0) 703 1.75 itojun continue; 704 1.75 itojun LIST_FOREACH(rt, &mfchashtbl[i], mfc_hash) { 705 1.75 itojun for (rte = rt->mfc_stall; rte; rte = rte->next) { 706 1.75 itojun if (rte->ifp == ifp) 707 1.75 itojun rte->ifp = NULL; 708 1.72 itojun } 709 1.72 itojun } 710 1.72 itojun } 711 1.15 mycroft } 712 1.15 mycroft 713 1.15 mycroft /* 714 1.15 mycroft * Set PIM assert processing global 715 1.15 mycroft */ 716 1.15 mycroft static int 717 1.115 plunky set_assert(int i) 718 1.15 mycroft { 719 1.115 plunky pim_assert = !!i; 720 1.151 maxv return 0; 721 1.1 hpeyerl } 722 1.1 hpeyerl 723 1.86 manu /* 724 1.86 manu * Configure API capabilities 725 1.86 manu */ 726 1.86 manu static int 727 1.115 plunky set_api_config(struct sockopt *sopt) 728 1.86 manu { 729 1.115 plunky u_int32_t apival; 730 1.115 plunky int i, error; 731 1.86 manu 732 1.86 manu /* 733 1.86 manu * We can set the API capabilities only if it is the first operation 734 1.86 manu * after MRT_INIT. I.e.: 735 1.86 manu * - there are no vifs installed 736 1.86 manu * - pim_assert is not enabled 737 1.86 manu * - the MFC table is empty 738 1.86 manu */ 739 1.115 plunky error = sockopt_get(sopt, &apival, sizeof(apival)); 740 1.115 plunky if (error) 741 1.151 maxv return error; 742 1.115 plunky if (numvifs > 0) 743 1.151 maxv return EPERM; 744 1.115 plunky if (pim_assert) 745 1.151 maxv return EPERM; 746 1.86 manu for (i = 0; i < MFCTBLSIZ; i++) { 747 1.115 plunky if (LIST_FIRST(&mfchashtbl[i]) != NULL) 748 1.151 maxv return EPERM; 749 1.86 manu } 750 1.86 manu 751 1.115 plunky mrt_api_config = apival & mrt_api_support; 752 1.151 maxv return 0; 753 1.86 manu } 754 1.86 manu 755 1.1 hpeyerl /* 756 1.1 hpeyerl * Add a vif to the vif table 757 1.1 hpeyerl */ 758 1.1 hpeyerl static int 759 1.115 plunky add_vif(struct vifctl *vifcp) 760 1.15 mycroft { 761 1.48 augustss struct vif *vifp; 762 1.15 mycroft struct ifnet *ifp; 763 1.15 mycroft int error, s; 764 1.105 dyoung struct sockaddr_in sin; 765 1.60 itojun 766 1.1 hpeyerl if (vifcp->vifc_vifi >= MAXVIFS) 767 1.151 maxv return EINVAL; 768 1.86 manu if (in_nullhost(vifcp->vifc_lcl_addr)) 769 1.151 maxv return EADDRNOTAVAIL; 770 1.15 mycroft 771 1.15 mycroft vifp = &viftable[vifcp->vifc_vifi]; 772 1.29 mycroft if (!in_nullhost(vifp->v_lcl_addr)) 773 1.151 maxv return EADDRINUSE; 774 1.60 itojun 775 1.15 mycroft /* Find the interface with an address in AF_INET family. */ 776 1.86 manu #ifdef PIM 777 1.86 manu if (vifcp->vifc_flags & VIFF_REGISTER) { 778 1.86 manu /* 779 1.86 manu * XXX: Because VIFF_REGISTER does not really need a valid 780 1.86 manu * local interface (e.g. it could be 127.0.0.2), we don't 781 1.86 manu * check its address. 782 1.86 manu */ 783 1.153 maxv ifp = NULL; 784 1.86 manu } else 785 1.86 manu #endif 786 1.86 manu { 787 1.144 ozaki struct ifaddr *ifa; 788 1.144 ozaki 789 1.105 dyoung sockaddr_in_init(&sin, &vifcp->vifc_lcl_addr, 0); 790 1.144 ozaki s = pserialize_read_enter(); 791 1.86 manu ifa = ifa_ifwithaddr(sintosa(&sin)); 792 1.144 ozaki if (ifa == NULL) { 793 1.144 ozaki pserialize_read_exit(s); 794 1.144 ozaki return EADDRNOTAVAIL; 795 1.144 ozaki } 796 1.86 manu ifp = ifa->ifa_ifp; 797 1.144 ozaki /* FIXME NOMPSAFE */ 798 1.144 ozaki pserialize_read_exit(s); 799 1.86 manu } 800 1.60 itojun 801 1.12 brezak if (vifcp->vifc_flags & VIFF_TUNNEL) { 802 1.17 mycroft if (vifcp->vifc_flags & VIFF_SRCRT) { 803 1.86 manu log(LOG_ERR, "source routed tunnels not supported\n"); 804 1.151 maxv return EOPNOTSUPP; 805 1.12 brezak } 806 1.17 mycroft 807 1.54 itojun /* attach this vif to decapsulator dispatch table */ 808 1.95 gdt /* 809 1.95 gdt * XXX Use addresses in registration so that matching 810 1.95 gdt * can be done with radix tree in decapsulator. But, 811 1.95 gdt * we need to check inner header for multicast, so 812 1.95 gdt * this requires both radix tree lookup and then a 813 1.95 gdt * function to check, and this is not supported yet. 814 1.95 gdt */ 815 1.143 knakahar error = encap_lock_enter(); 816 1.143 knakahar if (error) 817 1.143 knakahar return error; 818 1.54 itojun vifp->v_encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4, 819 1.137 knakahar vif_encapcheck, &vif_encapsw, vifp); 820 1.142 knakahar encap_lock_exit(); 821 1.54 itojun if (!vifp->v_encap_cookie) 822 1.151 maxv return EINVAL; 823 1.54 itojun 824 1.17 mycroft /* Create a fake encapsulation interface. */ 825 1.117 cegger ifp = malloc(sizeof(*ifp), M_MRTABLE, M_WAITOK|M_ZERO); 826 1.86 manu snprintf(ifp->if_xname, sizeof(ifp->if_xname), 827 1.86 manu "mdecap%d", vifcp->vifc_vifi); 828 1.17 mycroft 829 1.17 mycroft /* Prepare cached route entry. */ 830 1.118 cegger memset(&vifp->v_route, 0, sizeof(vifp->v_route)); 831 1.86 manu #ifdef PIM 832 1.86 manu } else if (vifcp->vifc_flags & VIFF_REGISTER) { 833 1.86 manu ifp = &multicast_register_if; 834 1.86 manu if (mrtdebug) 835 1.86 manu log(LOG_DEBUG, "Adding a register vif, ifp: %p\n", 836 1.86 manu (void *)ifp); 837 1.86 manu if (reg_vif_num == VIFI_INVALID) { 838 1.118 cegger memset(ifp, 0, sizeof(*ifp)); 839 1.86 manu snprintf(ifp->if_xname, sizeof(ifp->if_xname), 840 1.86 manu "register_vif"); 841 1.86 manu ifp->if_flags = IFF_LOOPBACK; 842 1.118 cegger memset(&vifp->v_route, 0, sizeof(vifp->v_route)); 843 1.86 manu reg_vif_num = vifcp->vifc_vifi; 844 1.86 manu } 845 1.86 manu #endif 846 1.12 brezak } else { 847 1.15 mycroft /* Make sure the interface supports multicast. */ 848 1.12 brezak if ((ifp->if_flags & IFF_MULTICAST) == 0) 849 1.151 maxv return EOPNOTSUPP; 850 1.45 thorpej 851 1.15 mycroft /* Enable promiscuous reception of all IP multicasts. */ 852 1.105 dyoung sockaddr_in_init(&sin, &zeroin_addr, 0); 853 1.121 dyoung error = if_mcast_op(ifp, SIOCADDMULTI, sintosa(&sin)); 854 1.12 brezak if (error) 855 1.151 maxv return error; 856 1.1 hpeyerl } 857 1.45 thorpej 858 1.24 mycroft s = splsoftnet(); 859 1.31 mycroft 860 1.15 mycroft /* Define parameters for the tbf structure. */ 861 1.86 manu vifp->tbf_q = NULL; 862 1.31 mycroft vifp->tbf_t = &vifp->tbf_q; 863 1.31 mycroft microtime(&vifp->tbf_last_pkt_t); 864 1.31 mycroft vifp->tbf_n_tok = 0; 865 1.31 mycroft vifp->tbf_q_len = 0; 866 1.31 mycroft vifp->tbf_max_q_len = MAXQSIZE; 867 1.60 itojun 868 1.1 hpeyerl vifp->v_flags = vifcp->vifc_flags; 869 1.1 hpeyerl vifp->v_threshold = vifcp->vifc_threshold; 870 1.31 mycroft /* scaling up here allows division by 1024 in critical code */ 871 1.31 mycroft vifp->v_rate_limit = vifcp->vifc_rate_limit * 1024 / 1000; 872 1.1 hpeyerl vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 873 1.15 mycroft vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 874 1.12 brezak vifp->v_ifp = ifp; 875 1.15 mycroft /* Initialize per vif pkt counters. */ 876 1.15 mycroft vifp->v_pkt_in = 0; 877 1.15 mycroft vifp->v_pkt_out = 0; 878 1.15 mycroft vifp->v_bytes_in = 0; 879 1.15 mycroft vifp->v_bytes_out = 0; 880 1.47 thorpej 881 1.104 ad callout_init(&vifp->v_repq_ch, 0); 882 1.47 thorpej 883 1.12 brezak splx(s); 884 1.60 itojun 885 1.15 mycroft /* Adjust numvifs up if the vifi is higher than numvifs. */ 886 1.1 hpeyerl if (numvifs <= vifcp->vifc_vifi) 887 1.1 hpeyerl numvifs = vifcp->vifc_vifi + 1; 888 1.60 itojun 889 1.15 mycroft if (mrtdebug) 890 1.30 mycroft log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n", 891 1.60 itojun vifcp->vifc_vifi, 892 1.15 mycroft ntohl(vifcp->vifc_lcl_addr.s_addr), 893 1.15 mycroft (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 894 1.15 mycroft ntohl(vifcp->vifc_rmt_addr.s_addr), 895 1.15 mycroft vifcp->vifc_threshold, 896 1.60 itojun vifcp->vifc_rate_limit); 897 1.60 itojun 898 1.151 maxv return 0; 899 1.1 hpeyerl } 900 1.1 hpeyerl 901 1.17 mycroft void 902 1.89 perry reset_vif(struct vif *vifp) 903 1.17 mycroft { 904 1.48 augustss struct mbuf *m, *n; 905 1.17 mycroft struct ifnet *ifp; 906 1.105 dyoung struct sockaddr_in sin; 907 1.17 mycroft 908 1.47 thorpej callout_stop(&vifp->v_repq_ch); 909 1.47 thorpej 910 1.54 itojun /* detach this vif from decapsulator dispatch table */ 911 1.142 knakahar encap_lock_enter(); 912 1.54 itojun encap_detach(vifp->v_encap_cookie); 913 1.142 knakahar encap_lock_exit(); 914 1.54 itojun vifp->v_encap_cookie = NULL; 915 1.54 itojun 916 1.86 manu /* 917 1.86 manu * Free packets queued at the interface 918 1.86 manu */ 919 1.86 manu for (m = vifp->tbf_q; m != NULL; m = n) { 920 1.31 mycroft n = m->m_nextpkt; 921 1.31 mycroft m_freem(m); 922 1.31 mycroft } 923 1.31 mycroft 924 1.95 gdt if (vifp->v_flags & VIFF_TUNNEL) 925 1.17 mycroft free(vifp->v_ifp, M_MRTABLE); 926 1.95 gdt else if (vifp->v_flags & VIFF_REGISTER) { 927 1.86 manu #ifdef PIM 928 1.87 manu reg_vif_num = VIFI_INVALID; 929 1.86 manu #endif 930 1.17 mycroft } else { 931 1.105 dyoung sockaddr_in_init(&sin, &zeroin_addr, 0); 932 1.17 mycroft ifp = vifp->v_ifp; 933 1.121 dyoung if_mcast_op(ifp, SIOCDELMULTI, sintosa(&sin)); 934 1.17 mycroft } 935 1.118 cegger memset((void *)vifp, 0, sizeof(*vifp)); 936 1.17 mycroft } 937 1.17 mycroft 938 1.1 hpeyerl /* 939 1.1 hpeyerl * Delete a vif from the vif table 940 1.1 hpeyerl */ 941 1.1 hpeyerl static int 942 1.115 plunky del_vif(vifi_t *vifip) 943 1.1 hpeyerl { 944 1.48 augustss struct vif *vifp; 945 1.48 augustss vifi_t vifi; 946 1.15 mycroft int s; 947 1.60 itojun 948 1.1 hpeyerl if (*vifip >= numvifs) 949 1.151 maxv return EINVAL; 950 1.15 mycroft 951 1.15 mycroft vifp = &viftable[*vifip]; 952 1.29 mycroft if (in_nullhost(vifp->v_lcl_addr)) 953 1.151 maxv return EADDRNOTAVAIL; 954 1.60 itojun 955 1.24 mycroft s = splsoftnet(); 956 1.60 itojun 957 1.17 mycroft reset_vif(vifp); 958 1.60 itojun 959 1.1 hpeyerl /* Adjust numvifs down */ 960 1.15 mycroft for (vifi = numvifs; vifi > 0; vifi--) 961 1.86 manu if (!in_nullhost(viftable[vifi - 1].v_lcl_addr)) 962 1.1 hpeyerl break; 963 1.15 mycroft numvifs = vifi; 964 1.60 itojun 965 1.1 hpeyerl splx(s); 966 1.60 itojun 967 1.15 mycroft if (mrtdebug) 968 1.30 mycroft log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs); 969 1.60 itojun 970 1.151 maxv return 0; 971 1.1 hpeyerl } 972 1.1 hpeyerl 973 1.86 manu /* 974 1.86 manu * update an mfc entry without resetting counters and S,G addresses. 975 1.86 manu */ 976 1.15 mycroft static void 977 1.86 manu update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 978 1.1 hpeyerl { 979 1.86 manu int i; 980 1.1 hpeyerl 981 1.15 mycroft rt->mfc_parent = mfccp->mfcc_parent; 982 1.86 manu for (i = 0; i < numvifs; i++) { 983 1.86 manu rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 984 1.86 manu rt->mfc_flags[i] = mfccp->mfcc_flags[i] & mrt_api_config & 985 1.86 manu MRT_MFC_FLAGS_ALL; 986 1.86 manu } 987 1.86 manu /* set the RP address */ 988 1.86 manu if (mrt_api_config & MRT_MFC_RP) 989 1.86 manu rt->mfc_rp = mfccp->mfcc_rp; 990 1.86 manu else 991 1.86 manu rt->mfc_rp = zeroin_addr; 992 1.86 manu } 993 1.86 manu 994 1.86 manu /* 995 1.86 manu * fully initialize an mfc entry from the parameter. 996 1.86 manu */ 997 1.86 manu static void 998 1.86 manu init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 999 1.86 manu { 1000 1.86 manu rt->mfc_origin = mfccp->mfcc_origin; 1001 1.86 manu rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 1002 1.86 manu 1003 1.86 manu update_mfc_params(rt, mfccp); 1004 1.86 manu 1005 1.86 manu /* initialize pkt counters per src-grp */ 1006 1.86 manu rt->mfc_pkt_cnt = 0; 1007 1.86 manu rt->mfc_byte_cnt = 0; 1008 1.86 manu rt->mfc_wrong_if = 0; 1009 1.86 manu timerclear(&rt->mfc_last_assert); 1010 1.15 mycroft } 1011 1.1 hpeyerl 1012 1.15 mycroft static void 1013 1.89 perry expire_mfc(struct mfc *rt) 1014 1.15 mycroft { 1015 1.15 mycroft struct rtdetq *rte, *nrte; 1016 1.1 hpeyerl 1017 1.86 manu free_bw_list(rt->mfc_bw_meter); 1018 1.86 manu 1019 1.86 manu for (rte = rt->mfc_stall; rte != NULL; rte = nrte) { 1020 1.15 mycroft nrte = rte->next; 1021 1.15 mycroft m_freem(rte->m); 1022 1.15 mycroft free(rte, M_MRTABLE); 1023 1.1 hpeyerl } 1024 1.1 hpeyerl 1025 1.15 mycroft LIST_REMOVE(rt, mfc_hash); 1026 1.15 mycroft free(rt, M_MRTABLE); 1027 1.1 hpeyerl } 1028 1.1 hpeyerl 1029 1.1 hpeyerl /* 1030 1.15 mycroft * Add an mfc entry 1031 1.1 hpeyerl */ 1032 1.1 hpeyerl static int 1033 1.115 plunky add_mfc(struct sockopt *sopt) 1034 1.1 hpeyerl { 1035 1.86 manu struct mfcctl2 mfcctl2; 1036 1.86 manu struct mfcctl2 *mfccp; 1037 1.25 christos struct mfc *rt; 1038 1.25 christos u_int32_t hash = 0; 1039 1.15 mycroft struct rtdetq *rte, *nrte; 1040 1.48 augustss u_short nstl; 1041 1.15 mycroft int s; 1042 1.115 plunky int error; 1043 1.1 hpeyerl 1044 1.86 manu /* 1045 1.86 manu * select data size depending on API version. 1046 1.86 manu */ 1047 1.86 manu mfccp = &mfcctl2; 1048 1.115 plunky memset(&mfcctl2, 0, sizeof(mfcctl2)); 1049 1.115 plunky 1050 1.115 plunky if (mrt_api_config & MRT_API_FLAGS_ALL) 1051 1.115 plunky error = sockopt_get(sopt, mfccp, sizeof(struct mfcctl2)); 1052 1.115 plunky else 1053 1.115 plunky error = sockopt_get(sopt, mfccp, sizeof(struct mfcctl)); 1054 1.115 plunky 1055 1.115 plunky if (error) 1056 1.151 maxv return error; 1057 1.1 hpeyerl 1058 1.24 mycroft s = splsoftnet(); 1059 1.86 manu rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp); 1060 1.1 hpeyerl 1061 1.15 mycroft /* If an entry already exists, just update the fields */ 1062 1.15 mycroft if (rt) { 1063 1.15 mycroft if (mrtdebug & DEBUG_MFC) 1064 1.70 itojun log(LOG_DEBUG, "add_mfc update o %x g %x p %x\n", 1065 1.15 mycroft ntohl(mfccp->mfcc_origin.s_addr), 1066 1.15 mycroft ntohl(mfccp->mfcc_mcastgrp.s_addr), 1067 1.15 mycroft mfccp->mfcc_parent); 1068 1.1 hpeyerl 1069 1.86 manu update_mfc_params(rt, mfccp); 1070 1.1 hpeyerl 1071 1.15 mycroft splx(s); 1072 1.151 maxv return 0; 1073 1.15 mycroft } 1074 1.1 hpeyerl 1075 1.60 itojun /* 1076 1.15 mycroft * Find the entry for which the upcall was made and update 1077 1.15 mycroft */ 1078 1.15 mycroft nstl = 0; 1079 1.29 mycroft hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp); 1080 1.57 matt LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) { 1081 1.29 mycroft if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && 1082 1.29 mycroft in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) && 1083 1.86 manu rt->mfc_stall != NULL) { 1084 1.15 mycroft if (nstl++) 1085 1.30 mycroft log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %p\n", 1086 1.15 mycroft "multiple kernel entries", 1087 1.15 mycroft ntohl(mfccp->mfcc_origin.s_addr), 1088 1.15 mycroft ntohl(mfccp->mfcc_mcastgrp.s_addr), 1089 1.15 mycroft mfccp->mfcc_parent, rt->mfc_stall); 1090 1.15 mycroft 1091 1.15 mycroft if (mrtdebug & DEBUG_MFC) 1092 1.70 itojun log(LOG_DEBUG, "add_mfc o %x g %x p %x dbg %p\n", 1093 1.15 mycroft ntohl(mfccp->mfcc_origin.s_addr), 1094 1.15 mycroft ntohl(mfccp->mfcc_mcastgrp.s_addr), 1095 1.15 mycroft mfccp->mfcc_parent, rt->mfc_stall); 1096 1.15 mycroft 1097 1.86 manu rte = rt->mfc_stall; 1098 1.86 manu init_mfc_params(rt, mfccp); 1099 1.86 manu rt->mfc_stall = NULL; 1100 1.15 mycroft 1101 1.86 manu rt->mfc_expire = 0; /* Don't clean this guy up */ 1102 1.86 manu nexpire[hash]--; 1103 1.35 mycroft 1104 1.15 mycroft /* free packets Qed at the end of this entry */ 1105 1.86 manu for (; rte != NULL; rte = nrte) { 1106 1.15 mycroft nrte = rte->next; 1107 1.75 itojun if (rte->ifp) { 1108 1.75 itojun ip_mdq(rte->m, rte->ifp, rt); 1109 1.75 itojun } 1110 1.15 mycroft m_freem(rte->m); 1111 1.15 mycroft #ifdef UPCALL_TIMING 1112 1.15 mycroft collate(&rte->t); 1113 1.15 mycroft #endif /* UPCALL_TIMING */ 1114 1.15 mycroft free(rte, M_MRTABLE); 1115 1.15 mycroft } 1116 1.15 mycroft } 1117 1.15 mycroft } 1118 1.1 hpeyerl 1119 1.86 manu /* 1120 1.86 manu * It is possible that an entry is being inserted without an upcall 1121 1.86 manu */ 1122 1.15 mycroft if (nstl == 0) { 1123 1.15 mycroft /* 1124 1.15 mycroft * No mfc; make a new one 1125 1.15 mycroft */ 1126 1.15 mycroft if (mrtdebug & DEBUG_MFC) 1127 1.70 itojun log(LOG_DEBUG, "add_mfc no upcall o %x g %x p %x\n", 1128 1.15 mycroft ntohl(mfccp->mfcc_origin.s_addr), 1129 1.15 mycroft ntohl(mfccp->mfcc_mcastgrp.s_addr), 1130 1.15 mycroft mfccp->mfcc_parent); 1131 1.60 itojun 1132 1.86 manu LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) { 1133 1.86 manu if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && 1134 1.86 manu in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) { 1135 1.86 manu init_mfc_params(rt, mfccp); 1136 1.86 manu if (rt->mfc_expire) 1137 1.86 manu nexpire[hash]--; 1138 1.86 manu rt->mfc_expire = 0; 1139 1.86 manu break; /* XXX */ 1140 1.86 manu } 1141 1.1 hpeyerl } 1142 1.86 manu if (rt == NULL) { /* no upcall, so make a new entry */ 1143 1.153 maxv rt = malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1144 1.86 manu if (rt == NULL) { 1145 1.86 manu splx(s); 1146 1.151 maxv return ENOBUFS; 1147 1.86 manu } 1148 1.15 mycroft 1149 1.86 manu init_mfc_params(rt, mfccp); 1150 1.86 manu rt->mfc_expire = 0; 1151 1.86 manu rt->mfc_stall = NULL; 1152 1.86 manu rt->mfc_bw_meter = NULL; 1153 1.60 itojun 1154 1.86 manu /* insert new entry at head of hash chain */ 1155 1.86 manu LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash); 1156 1.86 manu } 1157 1.15 mycroft } 1158 1.15 mycroft 1159 1.1 hpeyerl splx(s); 1160 1.151 maxv return 0; 1161 1.1 hpeyerl } 1162 1.1 hpeyerl 1163 1.15 mycroft #ifdef UPCALL_TIMING 1164 1.15 mycroft /* 1165 1.60 itojun * collect delay statistics on the upcalls 1166 1.15 mycroft */ 1167 1.86 manu static void 1168 1.89 perry collate(struct timeval *t) 1169 1.15 mycroft { 1170 1.67 itojun u_int32_t d; 1171 1.67 itojun struct timeval tp; 1172 1.67 itojun u_int32_t delta; 1173 1.60 itojun 1174 1.67 itojun microtime(&tp); 1175 1.60 itojun 1176 1.67 itojun if (timercmp(t, &tp, <)) { 1177 1.67 itojun TV_DELTA(tp, *t, delta); 1178 1.60 itojun 1179 1.67 itojun d = delta >> 10; 1180 1.67 itojun if (d > 50) 1181 1.67 itojun d = 50; 1182 1.60 itojun 1183 1.67 itojun ++upcall_data[d]; 1184 1.67 itojun } 1185 1.15 mycroft } 1186 1.15 mycroft #endif /* UPCALL_TIMING */ 1187 1.15 mycroft 1188 1.1 hpeyerl /* 1189 1.15 mycroft * Delete an mfc entry 1190 1.1 hpeyerl */ 1191 1.1 hpeyerl static int 1192 1.115 plunky del_mfc(struct sockopt *sopt) 1193 1.1 hpeyerl { 1194 1.86 manu struct mfcctl2 mfcctl2; 1195 1.86 manu struct mfcctl2 *mfccp; 1196 1.15 mycroft struct mfc *rt; 1197 1.1 hpeyerl int s; 1198 1.115 plunky int error; 1199 1.1 hpeyerl 1200 1.86 manu /* 1201 1.86 manu * XXX: for deleting MFC entries the information in entries 1202 1.86 manu * of size "struct mfcctl" is sufficient. 1203 1.86 manu */ 1204 1.86 manu 1205 1.115 plunky mfccp = &mfcctl2; 1206 1.115 plunky memset(&mfcctl2, 0, sizeof(mfcctl2)); 1207 1.15 mycroft 1208 1.115 plunky error = sockopt_get(sopt, mfccp, sizeof(struct mfcctl)); 1209 1.115 plunky if (error) { 1210 1.115 plunky /* Try with the size of mfcctl2. */ 1211 1.115 plunky error = sockopt_get(sopt, mfccp, sizeof(struct mfcctl2)); 1212 1.115 plunky if (error) 1213 1.151 maxv return error; 1214 1.115 plunky } 1215 1.15 mycroft 1216 1.15 mycroft if (mrtdebug & DEBUG_MFC) 1217 1.30 mycroft log(LOG_DEBUG, "del_mfc origin %x mcastgrp %x\n", 1218 1.29 mycroft ntohl(mfccp->mfcc_origin.s_addr), 1219 1.29 mycroft ntohl(mfccp->mfcc_mcastgrp.s_addr)); 1220 1.1 hpeyerl 1221 1.24 mycroft s = splsoftnet(); 1222 1.1 hpeyerl 1223 1.86 manu rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp); 1224 1.86 manu if (rt == NULL) { 1225 1.1 hpeyerl splx(s); 1226 1.151 maxv return EADDRNOTAVAIL; 1227 1.1 hpeyerl } 1228 1.1 hpeyerl 1229 1.86 manu /* 1230 1.86 manu * free the bw_meter entries 1231 1.86 manu */ 1232 1.86 manu free_bw_list(rt->mfc_bw_meter); 1233 1.86 manu rt->mfc_bw_meter = NULL; 1234 1.86 manu 1235 1.15 mycroft LIST_REMOVE(rt, mfc_hash); 1236 1.15 mycroft free(rt, M_MRTABLE); 1237 1.1 hpeyerl 1238 1.1 hpeyerl splx(s); 1239 1.151 maxv return 0; 1240 1.1 hpeyerl } 1241 1.1 hpeyerl 1242 1.1 hpeyerl static int 1243 1.89 perry socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) 1244 1.67 itojun { 1245 1.67 itojun if (s) { 1246 1.120 plunky if (sbappendaddr(&s->so_rcv, sintosa(src), mm, NULL) != 0) { 1247 1.67 itojun sorwakeup(s); 1248 1.151 maxv return 0; 1249 1.67 itojun } 1250 1.155 roy soroverflow(s); 1251 1.67 itojun } 1252 1.67 itojun m_freem(mm); 1253 1.151 maxv return -1; 1254 1.1 hpeyerl } 1255 1.1 hpeyerl 1256 1.1 hpeyerl /* 1257 1.1 hpeyerl * IP multicast forwarding function. This function assumes that the packet 1258 1.1 hpeyerl * pointed to by "ip" has arrived on (or is about to be sent to) the interface 1259 1.1 hpeyerl * pointed to by "ifp", and the packet is to be relayed to other networks 1260 1.1 hpeyerl * that have members of the packet's destination IP multicast group. 1261 1.1 hpeyerl * 1262 1.15 mycroft * The packet is returned unscathed to the caller, unless it is 1263 1.15 mycroft * erroneous, in which case a non-zero return value tells the caller to 1264 1.1 hpeyerl * discard it. 1265 1.1 hpeyerl */ 1266 1.1 hpeyerl 1267 1.15 mycroft #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 1268 1.15 mycroft #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 1269 1.1 hpeyerl 1270 1.1 hpeyerl int 1271 1.89 perry ip_mforward(struct mbuf *m, struct ifnet *ifp) 1272 1.1 hpeyerl { 1273 1.164 kardel int rc; 1274 1.164 kardel /* 1275 1.164 kardel * save csum_flags to uphold the 1276 1.164 kardel * "unscathed" guarantee. 1277 1.164 kardel * ip_output() relies on that and 1278 1.164 kardel * without it we send out 1279 1.164 kardel * multicast packets with an invalid 1280 1.164 kardel * checksum 1281 1.164 kardel * 1282 1.164 kardel * see PR kern/55779 1283 1.164 kardel */ 1284 1.164 kardel int csum_flags = m->m_pkthdr.csum_flags; 1285 1.164 kardel 1286 1.164 kardel /* 1287 1.164 kardel * Temporarily clear any in-bound checksum flags for this packet. 1288 1.164 kardel */ 1289 1.164 kardel m->m_pkthdr.csum_flags = 0; 1290 1.164 kardel 1291 1.164 kardel rc = ip_mforward_real(m, ifp); 1292 1.164 kardel 1293 1.164 kardel m->m_pkthdr.csum_flags = csum_flags; 1294 1.164 kardel 1295 1.164 kardel return rc; 1296 1.164 kardel } 1297 1.164 kardel 1298 1.164 kardel static int 1299 1.164 kardel ip_mforward_real(struct mbuf *m, struct ifnet *ifp) 1300 1.164 kardel { 1301 1.67 itojun struct ip *ip = mtod(m, struct ip *); 1302 1.67 itojun struct mfc *rt; 1303 1.67 itojun static int srctun = 0; 1304 1.67 itojun struct mbuf *mm; 1305 1.105 dyoung struct sockaddr_in sin; 1306 1.67 itojun int s; 1307 1.67 itojun vifi_t vifi; 1308 1.15 mycroft 1309 1.67 itojun if (mrtdebug & DEBUG_FORWARD) 1310 1.71 itojun log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p\n", 1311 1.71 itojun ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); 1312 1.1 hpeyerl 1313 1.156 maxv /* 1314 1.156 maxv * XXX XXX: Why do we check [1] against IPOPT_LSRR? Because we 1315 1.156 maxv * expect [0] to be IPOPT_NOP, maybe? In all cases that doesn't 1316 1.156 maxv * make a lot of sense, a forged packet can just put two IPOPT_NOPs 1317 1.156 maxv * followed by one IPOPT_LSRR, and bypass the check. 1318 1.156 maxv */ 1319 1.67 itojun if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 1320 1.67 itojun ((u_char *)(ip + 1))[1] != IPOPT_LSRR) { 1321 1.67 itojun /* 1322 1.67 itojun * Packet arrived via a physical interface or 1323 1.86 manu * an encapsulated tunnel or a register_vif. 1324 1.67 itojun */ 1325 1.67 itojun } else { 1326 1.67 itojun /* 1327 1.67 itojun * Packet arrived through a source-route tunnel. 1328 1.67 itojun * Source-route tunnels are no longer supported. 1329 1.67 itojun */ 1330 1.67 itojun if ((srctun++ % 1000) == 0) 1331 1.67 itojun log(LOG_ERR, 1332 1.67 itojun "ip_mforward: received source-routed packet from %x\n", 1333 1.67 itojun ntohl(ip->ip_src.s_addr)); 1334 1.153 maxv return EOPNOTSUPP; 1335 1.67 itojun } 1336 1.15 mycroft 1337 1.113 taca /* 1338 1.67 itojun * Don't forward a packet with time-to-live of zero or one, 1339 1.67 itojun * or a packet destined to a local-only group. 1340 1.67 itojun */ 1341 1.67 itojun if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ip->ip_dst.s_addr)) 1342 1.151 maxv return 0; 1343 1.15 mycroft 1344 1.67 itojun /* 1345 1.67 itojun * Determine forwarding vifs from the forwarding cache table 1346 1.67 itojun */ 1347 1.67 itojun s = splsoftnet(); 1348 1.86 manu ++mrtstat.mrts_mfc_lookups; 1349 1.86 manu rt = mfc_find(&ip->ip_src, &ip->ip_dst); 1350 1.1 hpeyerl 1351 1.67 itojun /* Entry exists, so forward if necessary */ 1352 1.86 manu if (rt != NULL) { 1353 1.67 itojun splx(s); 1354 1.151 maxv return ip_mdq(m, ifp, rt); 1355 1.67 itojun } else { 1356 1.67 itojun /* 1357 1.153 maxv * If we don't have a route for packet's origin, make a copy 1358 1.153 maxv * of the packet and send message to routing daemon. 1359 1.67 itojun */ 1360 1.15 mycroft 1361 1.67 itojun struct mbuf *mb0; 1362 1.67 itojun struct rtdetq *rte; 1363 1.67 itojun u_int32_t hash; 1364 1.153 maxv const int hlen = ip->ip_hl << 2; 1365 1.15 mycroft #ifdef UPCALL_TIMING 1366 1.67 itojun struct timeval tp; 1367 1.67 itojun microtime(&tp); 1368 1.153 maxv #endif 1369 1.15 mycroft 1370 1.86 manu ++mrtstat.mrts_mfc_misses; 1371 1.86 manu 1372 1.67 itojun mrtstat.mrts_no_route++; 1373 1.67 itojun if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) 1374 1.67 itojun log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n", 1375 1.67 itojun ntohl(ip->ip_src.s_addr), 1376 1.67 itojun ntohl(ip->ip_dst.s_addr)); 1377 1.67 itojun 1378 1.67 itojun /* 1379 1.67 itojun * Allocate mbufs early so that we don't do extra work if we are 1380 1.67 itojun * just going to fail anyway. Make sure to pullup the header so 1381 1.67 itojun * that other people can't step on it. 1382 1.67 itojun */ 1383 1.153 maxv rte = malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT); 1384 1.86 manu if (rte == NULL) { 1385 1.67 itojun splx(s); 1386 1.151 maxv return ENOBUFS; 1387 1.67 itojun } 1388 1.108 dyoung mb0 = m_copypacket(m, M_DONTWAIT); 1389 1.67 itojun M_PULLUP(mb0, hlen); 1390 1.86 manu if (mb0 == NULL) { 1391 1.67 itojun free(rte, M_MRTABLE); 1392 1.67 itojun splx(s); 1393 1.151 maxv return ENOBUFS; 1394 1.67 itojun } 1395 1.67 itojun 1396 1.86 manu /* is there an upcall waiting for this flow? */ 1397 1.67 itojun hash = MFCHASH(ip->ip_src, ip->ip_dst); 1398 1.67 itojun LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) { 1399 1.67 itojun if (in_hosteq(ip->ip_src, rt->mfc_origin) && 1400 1.67 itojun in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) && 1401 1.86 manu rt->mfc_stall != NULL) 1402 1.67 itojun break; 1403 1.67 itojun } 1404 1.67 itojun 1405 1.86 manu if (rt == NULL) { 1406 1.67 itojun int i; 1407 1.67 itojun struct igmpmsg *im; 1408 1.67 itojun 1409 1.86 manu /* 1410 1.86 manu * Locate the vifi for the incoming interface for 1411 1.86 manu * this packet. 1412 1.86 manu * If none found, drop packet. 1413 1.86 manu */ 1414 1.86 manu for (vifi = 0; vifi < numvifs && 1415 1.86 manu viftable[vifi].v_ifp != ifp; vifi++) 1416 1.86 manu ; 1417 1.86 manu if (vifi >= numvifs) /* vif not found, drop packet */ 1418 1.86 manu goto non_fatal; 1419 1.86 manu 1420 1.67 itojun /* no upcall, so make a new entry */ 1421 1.153 maxv rt = malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1422 1.86 manu if (rt == NULL) 1423 1.86 manu goto fail; 1424 1.86 manu 1425 1.67 itojun /* 1426 1.67 itojun * Make a copy of the header to send to the user level 1427 1.67 itojun * process 1428 1.67 itojun */ 1429 1.107 dyoung mm = m_copym(m, 0, hlen, M_DONTWAIT); 1430 1.67 itojun M_PULLUP(mm, hlen); 1431 1.86 manu if (mm == NULL) 1432 1.86 manu goto fail1; 1433 1.60 itojun 1434 1.67 itojun /* 1435 1.67 itojun * Send message to routing daemon to install 1436 1.67 itojun * a route into the kernel table 1437 1.67 itojun */ 1438 1.15 mycroft 1439 1.67 itojun im = mtod(mm, struct igmpmsg *); 1440 1.67 itojun im->im_msgtype = IGMPMSG_NOCACHE; 1441 1.67 itojun im->im_mbz = 0; 1442 1.86 manu im->im_vif = vifi; 1443 1.67 itojun 1444 1.67 itojun mrtstat.mrts_upcalls++; 1445 1.67 itojun 1446 1.105 dyoung sockaddr_in_init(&sin, &ip->ip_src, 0); 1447 1.67 itojun if (socket_send(ip_mrouter, mm, &sin) < 0) { 1448 1.67 itojun log(LOG_WARNING, 1449 1.67 itojun "ip_mforward: ip_mrouter socket queue full\n"); 1450 1.67 itojun ++mrtstat.mrts_upq_sockfull; 1451 1.86 manu fail1: 1452 1.86 manu free(rt, M_MRTABLE); 1453 1.86 manu fail: 1454 1.67 itojun free(rte, M_MRTABLE); 1455 1.67 itojun m_freem(mb0); 1456 1.67 itojun splx(s); 1457 1.151 maxv return ENOBUFS; 1458 1.67 itojun } 1459 1.15 mycroft 1460 1.67 itojun /* insert new entry at head of hash chain */ 1461 1.67 itojun rt->mfc_origin = ip->ip_src; 1462 1.67 itojun rt->mfc_mcastgrp = ip->ip_dst; 1463 1.67 itojun rt->mfc_pkt_cnt = 0; 1464 1.67 itojun rt->mfc_byte_cnt = 0; 1465 1.67 itojun rt->mfc_wrong_if = 0; 1466 1.67 itojun rt->mfc_expire = UPCALL_EXPIRE; 1467 1.67 itojun nexpire[hash]++; 1468 1.86 manu for (i = 0; i < numvifs; i++) { 1469 1.67 itojun rt->mfc_ttls[i] = 0; 1470 1.86 manu rt->mfc_flags[i] = 0; 1471 1.86 manu } 1472 1.67 itojun rt->mfc_parent = -1; 1473 1.67 itojun 1474 1.86 manu /* clear the RP address */ 1475 1.86 manu rt->mfc_rp = zeroin_addr; 1476 1.86 manu 1477 1.86 manu rt->mfc_bw_meter = NULL; 1478 1.86 manu 1479 1.67 itojun /* link into table */ 1480 1.67 itojun LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash); 1481 1.67 itojun /* Add this entry to the end of the queue */ 1482 1.67 itojun rt->mfc_stall = rte; 1483 1.67 itojun } else { 1484 1.67 itojun /* determine if q has overflowed */ 1485 1.67 itojun struct rtdetq **p; 1486 1.67 itojun int npkts = 0; 1487 1.67 itojun 1488 1.86 manu /* 1489 1.86 manu * XXX ouch! we need to append to the list, but we 1490 1.86 manu * only have a pointer to the front, so we have to 1491 1.86 manu * scan the entire list every time. 1492 1.86 manu */ 1493 1.86 manu for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) 1494 1.67 itojun if (++npkts > MAX_UPQ) { 1495 1.67 itojun mrtstat.mrts_upq_ovflw++; 1496 1.86 manu non_fatal: 1497 1.67 itojun free(rte, M_MRTABLE); 1498 1.67 itojun m_freem(mb0); 1499 1.67 itojun splx(s); 1500 1.151 maxv return 0; 1501 1.67 itojun } 1502 1.15 mycroft 1503 1.67 itojun /* Add this entry to the end of the queue */ 1504 1.67 itojun *p = rte; 1505 1.67 itojun } 1506 1.15 mycroft 1507 1.86 manu rte->next = NULL; 1508 1.67 itojun rte->m = mb0; 1509 1.67 itojun rte->ifp = ifp; 1510 1.15 mycroft #ifdef UPCALL_TIMING 1511 1.67 itojun rte->t = tp; 1512 1.153 maxv #endif 1513 1.15 mycroft 1514 1.67 itojun splx(s); 1515 1.15 mycroft 1516 1.151 maxv return 0; 1517 1.67 itojun } 1518 1.1 hpeyerl } 1519 1.1 hpeyerl 1520 1.25 christos /*ARGSUSED*/ 1521 1.1 hpeyerl static void 1522 1.100 christos expire_upcalls(void *v) 1523 1.1 hpeyerl { 1524 1.15 mycroft int i; 1525 1.15 mycroft 1526 1.146 ozaki /* XXX NOMPSAFE still need softnet_lock */ 1527 1.146 ozaki mutex_enter(softnet_lock); 1528 1.146 ozaki KERNEL_LOCK(1, NULL); 1529 1.15 mycroft 1530 1.15 mycroft for (i = 0; i < MFCTBLSIZ; i++) { 1531 1.48 augustss struct mfc *rt, *nrt; 1532 1.15 mycroft 1533 1.15 mycroft if (nexpire[i] == 0) 1534 1.15 mycroft continue; 1535 1.15 mycroft 1536 1.57 matt for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) { 1537 1.57 matt nrt = LIST_NEXT(rt, mfc_hash); 1538 1.1 hpeyerl 1539 1.67 itojun if (rt->mfc_expire == 0 || --rt->mfc_expire > 0) 1540 1.15 mycroft continue; 1541 1.15 mycroft nexpire[i]--; 1542 1.15 mycroft 1543 1.86 manu /* 1544 1.86 manu * free the bw_meter entries 1545 1.86 manu */ 1546 1.86 manu while (rt->mfc_bw_meter != NULL) { 1547 1.86 manu struct bw_meter *x = rt->mfc_bw_meter; 1548 1.86 manu 1549 1.86 manu rt->mfc_bw_meter = x->bm_mfc_next; 1550 1.147 para kmem_intr_free(x, sizeof(*x)); 1551 1.86 manu } 1552 1.86 manu 1553 1.15 mycroft ++mrtstat.mrts_cache_cleanups; 1554 1.15 mycroft if (mrtdebug & DEBUG_EXPIRE) 1555 1.15 mycroft log(LOG_DEBUG, 1556 1.30 mycroft "expire_upcalls: expiring (%x %x)\n", 1557 1.15 mycroft ntohl(rt->mfc_origin.s_addr), 1558 1.15 mycroft ntohl(rt->mfc_mcastgrp.s_addr)); 1559 1.1 hpeyerl 1560 1.15 mycroft expire_mfc(rt); 1561 1.15 mycroft } 1562 1.15 mycroft } 1563 1.1 hpeyerl 1564 1.47 thorpej callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, 1565 1.47 thorpej expire_upcalls, NULL); 1566 1.146 ozaki 1567 1.146 ozaki KERNEL_UNLOCK_ONE(NULL); 1568 1.146 ozaki mutex_exit(softnet_lock); 1569 1.1 hpeyerl } 1570 1.1 hpeyerl 1571 1.15 mycroft /* 1572 1.153 maxv * Macro to send packet on vif. 1573 1.153 maxv */ 1574 1.153 maxv #define MC_SEND(ip, vifp, m) do { \ 1575 1.153 maxv if ((vifp)->v_flags & VIFF_TUNNEL) \ 1576 1.153 maxv encap_send((ip), (vifp), (m)); \ 1577 1.153 maxv else \ 1578 1.153 maxv phyint_send((ip), (vifp), (m)); \ 1579 1.153 maxv } while (/*CONSTCOND*/ 0) 1580 1.153 maxv 1581 1.153 maxv /* 1582 1.15 mycroft * Packet forwarding routine once entry in the cache is made 1583 1.15 mycroft */ 1584 1.15 mycroft static int 1585 1.89 perry ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt) 1586 1.1 hpeyerl { 1587 1.153 maxv struct ip *ip = mtod(m, struct ip *); 1588 1.67 itojun vifi_t vifi; 1589 1.67 itojun struct vif *vifp; 1590 1.105 dyoung struct sockaddr_in sin; 1591 1.153 maxv const int plen = ntohs(ip->ip_len) - (ip->ip_hl << 2); 1592 1.1 hpeyerl 1593 1.67 itojun /* 1594 1.67 itojun * Don't forward if it didn't arrive from the parent vif for its origin. 1595 1.67 itojun */ 1596 1.67 itojun vifi = rt->mfc_parent; 1597 1.67 itojun if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { 1598 1.67 itojun /* came in the wrong interface */ 1599 1.67 itojun if (mrtdebug & DEBUG_FORWARD) 1600 1.67 itojun log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", 1601 1.70 itojun ifp, vifi, 1602 1.70 itojun vifi >= numvifs ? 0 : viftable[vifi].v_ifp); 1603 1.67 itojun ++mrtstat.mrts_wrong_if; 1604 1.67 itojun ++rt->mfc_wrong_if; 1605 1.153 maxv 1606 1.67 itojun /* 1607 1.86 manu * If we are doing PIM assert processing, send a message 1608 1.86 manu * to the routing daemon. 1609 1.86 manu * 1610 1.86 manu * XXX: A PIM-SM router needs the WRONGVIF detection so it 1611 1.86 manu * can complete the SPT switch, regardless of the type 1612 1.86 manu * of the iif (broadcast media, GRE tunnel, etc). 1613 1.67 itojun */ 1614 1.86 manu if (pim_assert && (vifi < numvifs) && viftable[vifi].v_ifp) { 1615 1.67 itojun struct timeval now; 1616 1.67 itojun u_int32_t delta; 1617 1.67 itojun 1618 1.86 manu #ifdef PIM 1619 1.86 manu if (ifp == &multicast_register_if) 1620 1.86 manu pimstat.pims_rcv_registers_wrongiif++; 1621 1.86 manu #endif 1622 1.86 manu 1623 1.86 manu /* Get vifi for the incoming packet */ 1624 1.86 manu for (vifi = 0; 1625 1.86 manu vifi < numvifs && viftable[vifi].v_ifp != ifp; 1626 1.86 manu vifi++) 1627 1.86 manu ; 1628 1.86 manu if (vifi >= numvifs) { 1629 1.86 manu /* The iif is not found: ignore the packet. */ 1630 1.151 maxv return 0; 1631 1.86 manu } 1632 1.86 manu 1633 1.86 manu if (rt->mfc_flags[vifi] & 1634 1.86 manu MRT_MFC_FLAGS_DISABLE_WRONGVIF) { 1635 1.86 manu /* WRONGVIF disabled: ignore the packet */ 1636 1.151 maxv return 0; 1637 1.86 manu } 1638 1.86 manu 1639 1.67 itojun microtime(&now); 1640 1.67 itojun 1641 1.67 itojun TV_DELTA(rt->mfc_last_assert, now, delta); 1642 1.67 itojun 1643 1.67 itojun if (delta > ASSERT_MSG_TIME) { 1644 1.86 manu struct igmpmsg *im; 1645 1.153 maxv const int hlen = ip->ip_hl << 2; 1646 1.107 dyoung struct mbuf *mm = 1647 1.107 dyoung m_copym(m, 0, hlen, M_DONTWAIT); 1648 1.86 manu 1649 1.67 itojun M_PULLUP(mm, hlen); 1650 1.86 manu if (mm == NULL) 1651 1.151 maxv return ENOBUFS; 1652 1.67 itojun 1653 1.67 itojun rt->mfc_last_assert = now; 1654 1.67 itojun 1655 1.67 itojun im = mtod(mm, struct igmpmsg *); 1656 1.67 itojun im->im_msgtype = IGMPMSG_WRONGVIF; 1657 1.67 itojun im->im_mbz = 0; 1658 1.67 itojun im->im_vif = vifi; 1659 1.15 mycroft 1660 1.86 manu mrtstat.mrts_upcalls++; 1661 1.86 manu 1662 1.106 dyoung sockaddr_in_init(&sin, &im->im_src, 0); 1663 1.86 manu if (socket_send(ip_mrouter, mm, &sin) < 0) { 1664 1.86 manu log(LOG_WARNING, 1665 1.86 manu "ip_mforward: ip_mrouter socket queue full\n"); 1666 1.86 manu ++mrtstat.mrts_upq_sockfull; 1667 1.151 maxv return ENOBUFS; 1668 1.86 manu } 1669 1.67 itojun } 1670 1.67 itojun } 1671 1.151 maxv return 0; 1672 1.15 mycroft } 1673 1.15 mycroft 1674 1.67 itojun /* If I sourced this packet, it counts as output, else it was input. */ 1675 1.67 itojun if (in_hosteq(ip->ip_src, viftable[vifi].v_lcl_addr)) { 1676 1.67 itojun viftable[vifi].v_pkt_out++; 1677 1.67 itojun viftable[vifi].v_bytes_out += plen; 1678 1.67 itojun } else { 1679 1.67 itojun viftable[vifi].v_pkt_in++; 1680 1.67 itojun viftable[vifi].v_bytes_in += plen; 1681 1.1 hpeyerl } 1682 1.67 itojun rt->mfc_pkt_cnt++; 1683 1.67 itojun rt->mfc_byte_cnt += plen; 1684 1.67 itojun 1685 1.67 itojun /* 1686 1.67 itojun * For each vif, decide if a copy of the packet should be forwarded. 1687 1.67 itojun * Forward if: 1688 1.153 maxv * - the ttl exceeds the vif's threshold 1689 1.153 maxv * - there are group members downstream on interface 1690 1.67 itojun */ 1691 1.153 maxv for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) { 1692 1.67 itojun if ((rt->mfc_ttls[vifi] > 0) && 1693 1.86 manu (ip->ip_ttl > rt->mfc_ttls[vifi])) { 1694 1.67 itojun vifp->v_pkt_out++; 1695 1.67 itojun vifp->v_bytes_out += plen; 1696 1.86 manu #ifdef PIM 1697 1.86 manu if (vifp->v_flags & VIFF_REGISTER) 1698 1.86 manu pim_register_send(ip, vifp, m, rt); 1699 1.86 manu else 1700 1.86 manu #endif 1701 1.67 itojun MC_SEND(ip, vifp, m); 1702 1.67 itojun } 1703 1.153 maxv } 1704 1.1 hpeyerl 1705 1.86 manu /* 1706 1.86 manu * Perform upcall-related bw measuring. 1707 1.86 manu */ 1708 1.86 manu if (rt->mfc_bw_meter != NULL) { 1709 1.86 manu struct bw_meter *x; 1710 1.86 manu struct timeval now; 1711 1.86 manu 1712 1.86 manu microtime(&now); 1713 1.86 manu for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) 1714 1.86 manu bw_meter_receive_packet(x, plen, &now); 1715 1.86 manu } 1716 1.86 manu 1717 1.151 maxv return 0; 1718 1.15 mycroft } 1719 1.15 mycroft 1720 1.15 mycroft static void 1721 1.89 perry phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 1722 1.15 mycroft { 1723 1.48 augustss struct mbuf *mb_copy; 1724 1.153 maxv const int hlen = ip->ip_hl << 2; 1725 1.15 mycroft 1726 1.15 mycroft /* 1727 1.15 mycroft * Make a new reference to the packet; make sure that 1728 1.15 mycroft * the IP header is actually copied, not just referenced, 1729 1.15 mycroft * so that ip_output() only scribbles on the copy. 1730 1.15 mycroft */ 1731 1.108 dyoung mb_copy = m_copypacket(m, M_DONTWAIT); 1732 1.15 mycroft M_PULLUP(mb_copy, hlen); 1733 1.86 manu if (mb_copy == NULL) 1734 1.1 hpeyerl return; 1735 1.1 hpeyerl 1736 1.15 mycroft if (vifp->v_rate_limit <= 0) 1737 1.15 mycroft tbf_send_packet(vifp, mb_copy); 1738 1.15 mycroft else 1739 1.62 itojun tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), 1740 1.62 itojun ntohs(ip->ip_len)); 1741 1.12 brezak } 1742 1.12 brezak 1743 1.12 brezak static void 1744 1.89 perry encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 1745 1.48 augustss { 1746 1.48 augustss struct mbuf *mb_copy; 1747 1.48 augustss struct ip *ip_copy; 1748 1.62 itojun int i, len = ntohs(ip->ip_len) + sizeof(multicast_encap_iphdr); 1749 1.12 brezak 1750 1.86 manu /* Take care of delayed checksums */ 1751 1.86 manu if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 1752 1.161 maxv in_undefer_cksum_tcpudp(m); 1753 1.86 manu m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 1754 1.86 manu } 1755 1.86 manu 1756 1.12 brezak /* 1757 1.131 snj * copy the old packet & pullup its IP header into the 1758 1.12 brezak * new mbuf so we can modify it. Try to fill the new 1759 1.12 brezak * mbuf since if we don't the ethernet driver will. 1760 1.12 brezak */ 1761 1.15 mycroft MGETHDR(mb_copy, M_DONTWAIT, MT_DATA); 1762 1.86 manu if (mb_copy == NULL) 1763 1.12 brezak return; 1764 1.15 mycroft mb_copy->m_data += max_linkhdr; 1765 1.15 mycroft mb_copy->m_pkthdr.len = len; 1766 1.12 brezak mb_copy->m_len = sizeof(multicast_encap_iphdr); 1767 1.60 itojun 1768 1.108 dyoung if ((mb_copy->m_next = m_copypacket(m, M_DONTWAIT)) == NULL) { 1769 1.12 brezak m_freem(mb_copy); 1770 1.12 brezak return; 1771 1.12 brezak } 1772 1.15 mycroft i = MHLEN - max_linkhdr; 1773 1.12 brezak if (i > len) 1774 1.12 brezak i = len; 1775 1.12 brezak mb_copy = m_pullup(mb_copy, i); 1776 1.86 manu if (mb_copy == NULL) 1777 1.12 brezak return; 1778 1.60 itojun 1779 1.12 brezak /* 1780 1.12 brezak * fill in the encapsulating IP header. 1781 1.12 brezak */ 1782 1.12 brezak ip_copy = mtod(mb_copy, struct ip *); 1783 1.12 brezak *ip_copy = multicast_encap_iphdr; 1784 1.110 matt if (len < IP_MINFRAGSIZE) 1785 1.110 matt ip_copy->ip_id = 0; 1786 1.110 matt else 1787 1.166 ozaki ip_copy->ip_id = ip_newid(); 1788 1.62 itojun ip_copy->ip_len = htons(len); 1789 1.12 brezak ip_copy->ip_src = vifp->v_lcl_addr; 1790 1.12 brezak ip_copy->ip_dst = vifp->v_rmt_addr; 1791 1.60 itojun 1792 1.12 brezak /* 1793 1.12 brezak * turn the encapsulated IP header back into a valid one. 1794 1.12 brezak */ 1795 1.101 christos ip = (struct ip *)((char *)ip_copy + sizeof(multicast_encap_iphdr)); 1796 1.12 brezak --ip->ip_ttl; 1797 1.12 brezak ip->ip_sum = 0; 1798 1.12 brezak mb_copy->m_data += sizeof(multicast_encap_iphdr); 1799 1.12 brezak ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1800 1.12 brezak mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1801 1.60 itojun 1802 1.15 mycroft if (vifp->v_rate_limit <= 0) 1803 1.15 mycroft tbf_send_packet(vifp, mb_copy); 1804 1.15 mycroft else 1805 1.62 itojun tbf_control(vifp, mb_copy, ip, ntohs(ip_copy->ip_len)); 1806 1.12 brezak } 1807 1.12 brezak 1808 1.12 brezak /* 1809 1.54 itojun * De-encapsulate a packet and feed it back through ip input. 1810 1.12 brezak */ 1811 1.54 itojun static void 1812 1.148 knakahar vif_input(struct mbuf *m, int off, int proto, void *eparg) 1813 1.25 christos { 1814 1.148 knakahar struct vif *vifp = eparg; 1815 1.148 knakahar 1816 1.148 knakahar KASSERT(vifp != NULL); 1817 1.25 christos 1818 1.148 knakahar if (proto != ENCAP_PROTO) { 1819 1.54 itojun m_freem(m); 1820 1.54 itojun mrtstat.mrts_bad_tunnel++; 1821 1.54 itojun return; 1822 1.12 brezak } 1823 1.22 mycroft 1824 1.54 itojun m_adj(m, off); 1825 1.140 ozaki m_set_rcvif(m, vifp->v_ifp); 1826 1.130 rmind 1827 1.130 rmind if (__predict_false(!pktq_enqueue(ip_pktq, m, 0))) { 1828 1.12 brezak m_freem(m); 1829 1.12 brezak } 1830 1.54 itojun } 1831 1.54 itojun 1832 1.54 itojun /* 1833 1.95 gdt * Check if the packet should be received on the vif denoted by arg. 1834 1.95 gdt * (The encap selection code will call this once per vif since each is 1835 1.95 gdt * registered separately.) 1836 1.54 itojun */ 1837 1.54 itojun static int 1838 1.94 martin vif_encapcheck(struct mbuf *m, int off, int proto, void *arg) 1839 1.54 itojun { 1840 1.54 itojun struct vif *vifp; 1841 1.54 itojun struct ip ip; 1842 1.54 itojun 1843 1.54 itojun #ifdef DIAGNOSTIC 1844 1.54 itojun if (!arg || proto != IPPROTO_IPV4) 1845 1.54 itojun panic("unexpected arg in vif_encapcheck"); 1846 1.54 itojun #endif 1847 1.54 itojun 1848 1.54 itojun /* 1849 1.165 andvar * Accept the packet only if the inner header is multicast 1850 1.95 gdt * and the outer header matches a tunnel-mode vif. Order 1851 1.95 gdt * checks in the hope that common non-matching packets will be 1852 1.95 gdt * rejected quickly. Assume that unicast IPv4 traffic in a 1853 1.95 gdt * parallel tunnel (e.g. gif(4)) is unlikely. 1854 1.54 itojun */ 1855 1.54 itojun 1856 1.95 gdt /* Obtain the outer IP header and the vif pointer. */ 1857 1.159 maxv m_copydata(m, 0, sizeof(ip), (void *)&ip); 1858 1.95 gdt vifp = (struct vif *)arg; 1859 1.95 gdt 1860 1.95 gdt /* 1861 1.95 gdt * The outer source must match the vif's remote peer address. 1862 1.95 gdt * For a multicast router with several tunnels, this is the 1863 1.95 gdt * only check that will fail on packets in other tunnels, 1864 1.150 maxv * assuming the local address is the same. 1865 1.95 gdt */ 1866 1.95 gdt if (!in_hosteq(vifp->v_rmt_addr, ip.ip_src)) 1867 1.95 gdt return 0; 1868 1.95 gdt 1869 1.95 gdt /* The outer destination must match the vif's local address. */ 1870 1.95 gdt if (!in_hosteq(vifp->v_lcl_addr, ip.ip_dst)) 1871 1.95 gdt return 0; 1872 1.95 gdt 1873 1.95 gdt /* The vif must be of tunnel type. */ 1874 1.95 gdt if ((vifp->v_flags & VIFF_TUNNEL) == 0) 1875 1.95 gdt return 0; 1876 1.95 gdt 1877 1.95 gdt /* Check that the inner destination is multicast. */ 1878 1.159 maxv if (off + sizeof(ip) > m->m_pkthdr.len) 1879 1.159 maxv return 0; 1880 1.159 maxv m_copydata(m, off, sizeof(ip), (void *)&ip); 1881 1.54 itojun if (!IN_MULTICAST(ip.ip_dst.s_addr)) 1882 1.54 itojun return 0; 1883 1.54 itojun 1884 1.95 gdt /* 1885 1.95 gdt * We have checked that both the outer src and dst addresses 1886 1.95 gdt * match the vif, and that the inner destination is multicast 1887 1.95 gdt * (224/5). By claiming more than 64, we intend to 1888 1.95 gdt * preferentially take packets that also match a parallel 1889 1.95 gdt * gif(4). 1890 1.95 gdt */ 1891 1.95 gdt return 32 + 32 + 5; 1892 1.1 hpeyerl } 1893 1.15 mycroft 1894 1.15 mycroft /* 1895 1.15 mycroft * Token bucket filter module 1896 1.15 mycroft */ 1897 1.15 mycroft static void 1898 1.89 perry tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, u_int32_t len) 1899 1.15 mycroft { 1900 1.15 mycroft 1901 1.31 mycroft if (len > MAX_BKT_SIZE) { 1902 1.31 mycroft /* drop if packet is too large */ 1903 1.31 mycroft mrtstat.mrts_pkt2large++; 1904 1.31 mycroft m_freem(m); 1905 1.31 mycroft return; 1906 1.31 mycroft } 1907 1.31 mycroft 1908 1.21 mycroft tbf_update_tokens(vifp); 1909 1.15 mycroft 1910 1.21 mycroft /* 1911 1.21 mycroft * If there are enough tokens, and the queue is empty, send this packet 1912 1.21 mycroft * out immediately. Otherwise, try to insert it on this vif's queue. 1913 1.21 mycroft */ 1914 1.31 mycroft if (vifp->tbf_q_len == 0) { 1915 1.31 mycroft if (len <= vifp->tbf_n_tok) { 1916 1.31 mycroft vifp->tbf_n_tok -= len; 1917 1.21 mycroft tbf_send_packet(vifp, m); 1918 1.21 mycroft } else { 1919 1.21 mycroft /* queue packet and timeout till later */ 1920 1.31 mycroft tbf_queue(vifp, m); 1921 1.47 thorpej callout_reset(&vifp->v_repq_ch, TBF_REPROCESS, 1922 1.47 thorpej tbf_reprocess_q, vifp); 1923 1.21 mycroft } 1924 1.15 mycroft } else { 1925 1.31 mycroft if (vifp->tbf_q_len >= vifp->tbf_max_q_len && 1926 1.21 mycroft !tbf_dq_sel(vifp, ip)) { 1927 1.86 manu /* queue full, and couldn't make room */ 1928 1.21 mycroft mrtstat.mrts_q_overflow++; 1929 1.21 mycroft m_freem(m); 1930 1.21 mycroft } else { 1931 1.21 mycroft /* queue length low enough, or made room */ 1932 1.31 mycroft tbf_queue(vifp, m); 1933 1.21 mycroft tbf_process_q(vifp); 1934 1.21 mycroft } 1935 1.15 mycroft } 1936 1.15 mycroft } 1937 1.15 mycroft 1938 1.60 itojun /* 1939 1.15 mycroft * adds a packet to the queue at the interface 1940 1.15 mycroft */ 1941 1.15 mycroft static void 1942 1.89 perry tbf_queue(struct vif *vifp, struct mbuf *m) 1943 1.15 mycroft { 1944 1.48 augustss int s = splsoftnet(); 1945 1.15 mycroft 1946 1.31 mycroft /* insert at tail */ 1947 1.31 mycroft *vifp->tbf_t = m; 1948 1.31 mycroft vifp->tbf_t = &m->m_nextpkt; 1949 1.31 mycroft vifp->tbf_q_len++; 1950 1.15 mycroft 1951 1.31 mycroft splx(s); 1952 1.15 mycroft } 1953 1.15 mycroft 1954 1.60 itojun /* 1955 1.15 mycroft * processes the queue at the interface 1956 1.15 mycroft */ 1957 1.15 mycroft static void 1958 1.89 perry tbf_process_q(struct vif *vifp) 1959 1.15 mycroft { 1960 1.48 augustss struct mbuf *m; 1961 1.48 augustss int len; 1962 1.48 augustss int s = splsoftnet(); 1963 1.15 mycroft 1964 1.31 mycroft /* 1965 1.31 mycroft * Loop through the queue at the interface and send as many packets 1966 1.31 mycroft * as possible. 1967 1.31 mycroft */ 1968 1.86 manu for (m = vifp->tbf_q; m != NULL; m = vifp->tbf_q) { 1969 1.62 itojun len = ntohs(mtod(m, struct ip *)->ip_len); 1970 1.31 mycroft 1971 1.31 mycroft /* determine if the packet can be sent */ 1972 1.31 mycroft if (len <= vifp->tbf_n_tok) { 1973 1.31 mycroft /* if so, 1974 1.31 mycroft * reduce no of tokens, dequeue the packet, 1975 1.31 mycroft * send the packet. 1976 1.31 mycroft */ 1977 1.86 manu if ((vifp->tbf_q = m->m_nextpkt) == NULL) 1978 1.31 mycroft vifp->tbf_t = &vifp->tbf_q; 1979 1.31 mycroft --vifp->tbf_q_len; 1980 1.15 mycroft 1981 1.86 manu m->m_nextpkt = NULL; 1982 1.31 mycroft vifp->tbf_n_tok -= len; 1983 1.31 mycroft tbf_send_packet(vifp, m); 1984 1.31 mycroft } else 1985 1.31 mycroft break; 1986 1.31 mycroft } 1987 1.31 mycroft splx(s); 1988 1.15 mycroft } 1989 1.15 mycroft 1990 1.15 mycroft static void 1991 1.89 perry tbf_reprocess_q(void *arg) 1992 1.15 mycroft { 1993 1.48 augustss struct vif *vifp = arg; 1994 1.15 mycroft 1995 1.86 manu if (ip_mrouter == NULL) 1996 1.20 mycroft return; 1997 1.15 mycroft 1998 1.20 mycroft tbf_update_tokens(vifp); 1999 1.20 mycroft tbf_process_q(vifp); 2000 1.15 mycroft 2001 1.31 mycroft if (vifp->tbf_q_len != 0) 2002 1.47 thorpej callout_reset(&vifp->v_repq_ch, TBF_REPROCESS, 2003 1.47 thorpej tbf_reprocess_q, vifp); 2004 1.15 mycroft } 2005 1.15 mycroft 2006 1.15 mycroft /* function that will selectively discard a member of the queue 2007 1.31 mycroft * based on the precedence value and the priority 2008 1.15 mycroft */ 2009 1.15 mycroft static int 2010 1.89 perry tbf_dq_sel(struct vif *vifp, struct ip *ip) 2011 1.15 mycroft { 2012 1.48 augustss u_int p; 2013 1.48 augustss struct mbuf **mp, *m; 2014 1.48 augustss int s = splsoftnet(); 2015 1.31 mycroft 2016 1.31 mycroft p = priority(vifp, ip); 2017 1.31 mycroft 2018 1.31 mycroft for (mp = &vifp->tbf_q, m = *mp; 2019 1.86 manu m != NULL; 2020 1.31 mycroft mp = &m->m_nextpkt, m = *mp) { 2021 1.31 mycroft if (p > priority(vifp, mtod(m, struct ip *))) { 2022 1.86 manu if ((*mp = m->m_nextpkt) == NULL) 2023 1.31 mycroft vifp->tbf_t = mp; 2024 1.31 mycroft --vifp->tbf_q_len; 2025 1.31 mycroft 2026 1.31 mycroft m_freem(m); 2027 1.31 mycroft mrtstat.mrts_drop_sel++; 2028 1.31 mycroft splx(s); 2029 1.151 maxv return 1; 2030 1.31 mycroft } 2031 1.15 mycroft } 2032 1.31 mycroft splx(s); 2033 1.151 maxv return 0; 2034 1.15 mycroft } 2035 1.15 mycroft 2036 1.15 mycroft static void 2037 1.89 perry tbf_send_packet(struct vif *vifp, struct mbuf *m) 2038 1.15 mycroft { 2039 1.31 mycroft int error; 2040 1.31 mycroft int s = splsoftnet(); 2041 1.31 mycroft 2042 1.31 mycroft if (vifp->v_flags & VIFF_TUNNEL) { 2043 1.31 mycroft /* If tunnel options */ 2044 1.120 plunky ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, NULL, NULL); 2045 1.31 mycroft } else { 2046 1.31 mycroft /* if physical interface option, extract the options and then send */ 2047 1.31 mycroft struct ip_moptions imo; 2048 1.15 mycroft 2049 1.141 ozaki imo.imo_multicast_if_index = if_get_index(vifp->v_ifp); 2050 1.31 mycroft imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; 2051 1.31 mycroft imo.imo_multicast_loop = 1; 2052 1.15 mycroft 2053 1.102 dyoung error = ip_output(m, NULL, NULL, IP_FORWARDING|IP_MULTICASTOPTS, 2054 1.102 dyoung &imo, NULL); 2055 1.31 mycroft 2056 1.31 mycroft if (mrtdebug & DEBUG_XMIT) 2057 1.42 nathanw log(LOG_DEBUG, "phyint_send on vif %ld err %d\n", 2058 1.67 itojun (long)(vifp - viftable), error); 2059 1.31 mycroft } 2060 1.31 mycroft splx(s); 2061 1.15 mycroft } 2062 1.15 mycroft 2063 1.15 mycroft /* determine the current time and then 2064 1.15 mycroft * the elapsed time (between the last time and time now) 2065 1.15 mycroft * in milliseconds & update the no. of tokens in the bucket 2066 1.15 mycroft */ 2067 1.15 mycroft static void 2068 1.89 perry tbf_update_tokens(struct vif *vifp) 2069 1.15 mycroft { 2070 1.31 mycroft struct timeval tp; 2071 1.48 augustss u_int32_t tm; 2072 1.48 augustss int s = splsoftnet(); 2073 1.15 mycroft 2074 1.31 mycroft microtime(&tp); 2075 1.15 mycroft 2076 1.31 mycroft TV_DELTA(tp, vifp->tbf_last_pkt_t, tm); 2077 1.15 mycroft 2078 1.31 mycroft /* 2079 1.31 mycroft * This formula is actually 2080 1.31 mycroft * "time in seconds" * "bytes/second". 2081 1.31 mycroft * 2082 1.31 mycroft * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) 2083 1.31 mycroft * 2084 1.31 mycroft * The (1000/1024) was introduced in add_vif to optimize 2085 1.31 mycroft * this divide into a shift. 2086 1.31 mycroft */ 2087 1.31 mycroft vifp->tbf_n_tok += tm * vifp->v_rate_limit / 8192; 2088 1.31 mycroft vifp->tbf_last_pkt_t = tp; 2089 1.15 mycroft 2090 1.31 mycroft if (vifp->tbf_n_tok > MAX_BKT_SIZE) 2091 1.31 mycroft vifp->tbf_n_tok = MAX_BKT_SIZE; 2092 1.15 mycroft 2093 1.31 mycroft splx(s); 2094 1.15 mycroft } 2095 1.15 mycroft 2096 1.15 mycroft static int 2097 1.100 christos priority(struct vif *vifp, struct ip *ip) 2098 1.15 mycroft { 2099 1.86 manu int prio = 50; /* the lowest priority -- default case */ 2100 1.15 mycroft 2101 1.67 itojun /* temporary hack; may add general packet classifier some day */ 2102 1.60 itojun 2103 1.67 itojun /* 2104 1.157 maxv * XXX XXX: We're reading the UDP header, but we didn't ensure 2105 1.157 maxv * it was present in the packet. 2106 1.157 maxv */ 2107 1.157 maxv 2108 1.157 maxv /* 2109 1.67 itojun * The UDP port space is divided up into four priority ranges: 2110 1.67 itojun * [0, 16384) : unclassified - lowest priority 2111 1.67 itojun * [16384, 32768) : audio - highest priority 2112 1.67 itojun * [32768, 49152) : whiteboard - medium priority 2113 1.67 itojun * [49152, 65536) : video - low priority 2114 1.67 itojun */ 2115 1.67 itojun if (ip->ip_p == IPPROTO_UDP) { 2116 1.67 itojun struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); 2117 1.15 mycroft 2118 1.67 itojun switch (ntohs(udp->uh_dport) & 0xc000) { 2119 1.67 itojun case 0x4000: 2120 1.67 itojun prio = 70; 2121 1.67 itojun break; 2122 1.67 itojun case 0x8000: 2123 1.67 itojun prio = 60; 2124 1.67 itojun break; 2125 1.67 itojun case 0xc000: 2126 1.67 itojun prio = 55; 2127 1.67 itojun break; 2128 1.67 itojun } 2129 1.15 mycroft 2130 1.67 itojun if (tbfdebug > 1) 2131 1.67 itojun log(LOG_DEBUG, "port %x prio %d\n", 2132 1.67 itojun ntohs(udp->uh_dport), prio); 2133 1.86 manu } 2134 1.15 mycroft 2135 1.151 maxv return prio; 2136 1.15 mycroft } 2137 1.15 mycroft 2138 1.15 mycroft /* 2139 1.86 manu * Code for bandwidth monitors 2140 1.86 manu */ 2141 1.86 manu 2142 1.86 manu /* 2143 1.86 manu * Define common interface for timeval-related methods 2144 1.86 manu */ 2145 1.86 manu #define BW_TIMEVALCMP(tvp, uvp, cmp) timercmp((tvp), (uvp), cmp) 2146 1.86 manu #define BW_TIMEVALDECR(vvp, uvp) timersub((vvp), (uvp), (vvp)) 2147 1.86 manu #define BW_TIMEVALADD(vvp, uvp) timeradd((vvp), (uvp), (vvp)) 2148 1.86 manu 2149 1.86 manu static uint32_t 2150 1.86 manu compute_bw_meter_flags(struct bw_upcall *req) 2151 1.86 manu { 2152 1.150 maxv uint32_t flags = 0; 2153 1.86 manu 2154 1.150 maxv if (req->bu_flags & BW_UPCALL_UNIT_PACKETS) 2155 1.150 maxv flags |= BW_METER_UNIT_PACKETS; 2156 1.150 maxv if (req->bu_flags & BW_UPCALL_UNIT_BYTES) 2157 1.150 maxv flags |= BW_METER_UNIT_BYTES; 2158 1.150 maxv if (req->bu_flags & BW_UPCALL_GEQ) 2159 1.150 maxv flags |= BW_METER_GEQ; 2160 1.150 maxv if (req->bu_flags & BW_UPCALL_LEQ) 2161 1.150 maxv flags |= BW_METER_LEQ; 2162 1.90 perry 2163 1.150 maxv return flags; 2164 1.86 manu } 2165 1.90 perry 2166 1.86 manu /* 2167 1.86 manu * Add a bw_meter entry 2168 1.86 manu */ 2169 1.86 manu static int 2170 1.115 plunky add_bw_upcall(struct bw_upcall *req) 2171 1.86 manu { 2172 1.150 maxv int s; 2173 1.150 maxv struct mfc *mfc; 2174 1.150 maxv struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC, 2175 1.86 manu BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC }; 2176 1.150 maxv struct timeval now; 2177 1.150 maxv struct bw_meter *x; 2178 1.150 maxv uint32_t flags; 2179 1.150 maxv 2180 1.150 maxv if (!(mrt_api_config & MRT_MFC_BW_UPCALL)) 2181 1.150 maxv return EOPNOTSUPP; 2182 1.150 maxv 2183 1.150 maxv /* Test if the flags are valid */ 2184 1.150 maxv if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES))) 2185 1.150 maxv return EINVAL; 2186 1.150 maxv if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))) 2187 1.150 maxv return EINVAL; 2188 1.150 maxv if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) 2189 1.86 manu == (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) 2190 1.150 maxv return EINVAL; 2191 1.86 manu 2192 1.150 maxv /* Test if the threshold time interval is valid */ 2193 1.150 maxv if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <)) 2194 1.150 maxv return EINVAL; 2195 1.86 manu 2196 1.150 maxv flags = compute_bw_meter_flags(req); 2197 1.86 manu 2198 1.150 maxv /* 2199 1.150 maxv * Find if we have already same bw_meter entry 2200 1.150 maxv */ 2201 1.150 maxv s = splsoftnet(); 2202 1.150 maxv mfc = mfc_find(&req->bu_src, &req->bu_dst); 2203 1.150 maxv if (mfc == NULL) { 2204 1.150 maxv splx(s); 2205 1.150 maxv return EADDRNOTAVAIL; 2206 1.150 maxv } 2207 1.150 maxv for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) { 2208 1.150 maxv if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, 2209 1.150 maxv &req->bu_threshold.b_time, ==)) && 2210 1.150 maxv (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && 2211 1.150 maxv (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && 2212 1.150 maxv (x->bm_flags & BW_METER_USER_FLAGS) == flags) { 2213 1.150 maxv splx(s); 2214 1.150 maxv return 0; /* XXX Already installed */ 2215 1.150 maxv } 2216 1.150 maxv } 2217 1.150 maxv 2218 1.150 maxv /* Allocate the new bw_meter entry */ 2219 1.150 maxv x = kmem_intr_alloc(sizeof(*x), KM_NOSLEEP); 2220 1.150 maxv if (x == NULL) { 2221 1.150 maxv splx(s); 2222 1.150 maxv return ENOBUFS; 2223 1.86 manu } 2224 1.86 manu 2225 1.150 maxv /* Set the new bw_meter entry */ 2226 1.150 maxv x->bm_threshold.b_time = req->bu_threshold.b_time; 2227 1.150 maxv microtime(&now); 2228 1.150 maxv x->bm_start_time = now; 2229 1.150 maxv x->bm_threshold.b_packets = req->bu_threshold.b_packets; 2230 1.150 maxv x->bm_threshold.b_bytes = req->bu_threshold.b_bytes; 2231 1.150 maxv x->bm_measured.b_packets = 0; 2232 1.150 maxv x->bm_measured.b_bytes = 0; 2233 1.150 maxv x->bm_flags = flags; 2234 1.150 maxv x->bm_time_next = NULL; 2235 1.150 maxv x->bm_time_hash = BW_METER_BUCKETS; 2236 1.150 maxv 2237 1.150 maxv /* Add the new bw_meter entry to the front of entries for this MFC */ 2238 1.150 maxv x->bm_mfc = mfc; 2239 1.150 maxv x->bm_mfc_next = mfc->mfc_bw_meter; 2240 1.150 maxv mfc->mfc_bw_meter = x; 2241 1.150 maxv schedule_bw_meter(x, &now); 2242 1.86 manu splx(s); 2243 1.86 manu 2244 1.150 maxv return 0; 2245 1.86 manu } 2246 1.86 manu 2247 1.86 manu static void 2248 1.86 manu free_bw_list(struct bw_meter *list) 2249 1.86 manu { 2250 1.150 maxv while (list != NULL) { 2251 1.150 maxv struct bw_meter *x = list; 2252 1.86 manu 2253 1.150 maxv list = list->bm_mfc_next; 2254 1.150 maxv unschedule_bw_meter(x); 2255 1.150 maxv kmem_intr_free(x, sizeof(*x)); 2256 1.150 maxv } 2257 1.86 manu } 2258 1.86 manu 2259 1.86 manu /* 2260 1.86 manu * Delete one or multiple bw_meter entries 2261 1.86 manu */ 2262 1.86 manu static int 2263 1.115 plunky del_bw_upcall(struct bw_upcall *req) 2264 1.86 manu { 2265 1.150 maxv int s; 2266 1.150 maxv struct mfc *mfc; 2267 1.150 maxv struct bw_meter *x; 2268 1.90 perry 2269 1.150 maxv if (!(mrt_api_config & MRT_MFC_BW_UPCALL)) 2270 1.150 maxv return EOPNOTSUPP; 2271 1.86 manu 2272 1.150 maxv s = splsoftnet(); 2273 1.150 maxv /* Find the corresponding MFC entry */ 2274 1.150 maxv mfc = mfc_find(&req->bu_src, &req->bu_dst); 2275 1.150 maxv if (mfc == NULL) { 2276 1.150 maxv splx(s); 2277 1.150 maxv return EADDRNOTAVAIL; 2278 1.150 maxv } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) { 2279 1.150 maxv /* 2280 1.150 maxv * Delete all bw_meter entries for this mfc 2281 1.150 maxv */ 2282 1.150 maxv struct bw_meter *list; 2283 1.86 manu 2284 1.150 maxv list = mfc->mfc_bw_meter; 2285 1.150 maxv mfc->mfc_bw_meter = NULL; 2286 1.150 maxv free_bw_list(list); 2287 1.150 maxv splx(s); 2288 1.150 maxv return 0; 2289 1.150 maxv } else { /* Delete a single bw_meter entry */ 2290 1.150 maxv struct bw_meter *prev; 2291 1.150 maxv uint32_t flags = 0; 2292 1.150 maxv 2293 1.150 maxv flags = compute_bw_meter_flags(req); 2294 1.150 maxv 2295 1.150 maxv /* Find the bw_meter entry to delete */ 2296 1.150 maxv for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL; 2297 1.150 maxv prev = x, x = x->bm_mfc_next) { 2298 1.150 maxv if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, 2299 1.150 maxv &req->bu_threshold.b_time, ==)) && 2300 1.150 maxv (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && 2301 1.150 maxv (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && 2302 1.150 maxv (x->bm_flags & BW_METER_USER_FLAGS) == flags) 2303 1.150 maxv break; 2304 1.150 maxv } 2305 1.150 maxv if (x != NULL) { /* Delete entry from the list for this MFC */ 2306 1.150 maxv if (prev != NULL) 2307 1.150 maxv prev->bm_mfc_next = x->bm_mfc_next; /* remove from middle*/ 2308 1.150 maxv else 2309 1.150 maxv x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */ 2310 1.86 manu 2311 1.150 maxv unschedule_bw_meter(x); 2312 1.150 maxv splx(s); 2313 1.150 maxv /* Free the bw_meter entry */ 2314 1.150 maxv kmem_intr_free(x, sizeof(*x)); 2315 1.150 maxv return 0; 2316 1.150 maxv } else { 2317 1.150 maxv splx(s); 2318 1.150 maxv return EINVAL; 2319 1.150 maxv } 2320 1.86 manu } 2321 1.150 maxv /* NOTREACHED */ 2322 1.86 manu } 2323 1.86 manu 2324 1.86 manu /* 2325 1.86 manu * Perform bandwidth measurement processing that may result in an upcall 2326 1.86 manu */ 2327 1.86 manu static void 2328 1.86 manu bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp) 2329 1.86 manu { 2330 1.150 maxv struct timeval delta; 2331 1.86 manu 2332 1.150 maxv delta = *nowp; 2333 1.150 maxv BW_TIMEVALDECR(&delta, &x->bm_start_time); 2334 1.86 manu 2335 1.150 maxv if (x->bm_flags & BW_METER_GEQ) { 2336 1.150 maxv /* 2337 1.150 maxv * Processing for ">=" type of bw_meter entry 2338 1.150 maxv */ 2339 1.150 maxv if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { 2340 1.150 maxv /* Reset the bw_meter entry */ 2341 1.150 maxv x->bm_start_time = *nowp; 2342 1.150 maxv x->bm_measured.b_packets = 0; 2343 1.150 maxv x->bm_measured.b_bytes = 0; 2344 1.150 maxv x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2345 1.150 maxv } 2346 1.86 manu 2347 1.150 maxv /* Record that a packet is received */ 2348 1.150 maxv x->bm_measured.b_packets++; 2349 1.150 maxv x->bm_measured.b_bytes += plen; 2350 1.86 manu 2351 1.150 maxv /* 2352 1.150 maxv * Test if we should deliver an upcall 2353 1.150 maxv */ 2354 1.150 maxv if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) { 2355 1.150 maxv if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2356 1.150 maxv (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) || 2357 1.150 maxv ((x->bm_flags & BW_METER_UNIT_BYTES) && 2358 1.150 maxv (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) { 2359 1.150 maxv /* Prepare an upcall for delivery */ 2360 1.150 maxv bw_meter_prepare_upcall(x, nowp); 2361 1.150 maxv x->bm_flags |= BW_METER_UPCALL_DELIVERED; 2362 1.150 maxv } 2363 1.150 maxv } 2364 1.150 maxv } else if (x->bm_flags & BW_METER_LEQ) { 2365 1.150 maxv /* 2366 1.150 maxv * Processing for "<=" type of bw_meter entry 2367 1.150 maxv */ 2368 1.150 maxv if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { 2369 1.150 maxv /* 2370 1.150 maxv * We are behind time with the multicast forwarding table 2371 1.150 maxv * scanning for "<=" type of bw_meter entries, so test now 2372 1.150 maxv * if we should deliver an upcall. 2373 1.150 maxv */ 2374 1.150 maxv if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2375 1.150 maxv (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || 2376 1.150 maxv ((x->bm_flags & BW_METER_UNIT_BYTES) && 2377 1.150 maxv (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { 2378 1.150 maxv /* Prepare an upcall for delivery */ 2379 1.150 maxv bw_meter_prepare_upcall(x, nowp); 2380 1.150 maxv } 2381 1.150 maxv /* Reschedule the bw_meter entry */ 2382 1.150 maxv unschedule_bw_meter(x); 2383 1.150 maxv schedule_bw_meter(x, nowp); 2384 1.150 maxv } 2385 1.90 perry 2386 1.150 maxv /* Record that a packet is received */ 2387 1.150 maxv x->bm_measured.b_packets++; 2388 1.150 maxv x->bm_measured.b_bytes += plen; 2389 1.86 manu 2390 1.150 maxv /* 2391 1.150 maxv * Test if we should restart the measuring interval 2392 1.150 maxv */ 2393 1.150 maxv if ((x->bm_flags & BW_METER_UNIT_PACKETS && 2394 1.150 maxv x->bm_measured.b_packets <= x->bm_threshold.b_packets) || 2395 1.150 maxv (x->bm_flags & BW_METER_UNIT_BYTES && 2396 1.150 maxv x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) { 2397 1.150 maxv /* Don't restart the measuring interval */ 2398 1.150 maxv } else { 2399 1.150 maxv /* Do restart the measuring interval */ 2400 1.150 maxv /* 2401 1.150 maxv * XXX: note that we don't unschedule and schedule, because this 2402 1.150 maxv * might be too much overhead per packet. Instead, when we process 2403 1.150 maxv * all entries for a given timer hash bin, we check whether it is 2404 1.150 maxv * really a timeout. If not, we reschedule at that time. 2405 1.150 maxv */ 2406 1.150 maxv x->bm_start_time = *nowp; 2407 1.150 maxv x->bm_measured.b_packets = 0; 2408 1.150 maxv x->bm_measured.b_bytes = 0; 2409 1.150 maxv x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2410 1.150 maxv } 2411 1.86 manu } 2412 1.86 manu } 2413 1.86 manu 2414 1.86 manu /* 2415 1.86 manu * Prepare a bandwidth-related upcall 2416 1.86 manu */ 2417 1.86 manu static void 2418 1.86 manu bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp) 2419 1.86 manu { 2420 1.150 maxv struct timeval delta; 2421 1.150 maxv struct bw_upcall *u; 2422 1.86 manu 2423 1.150 maxv /* 2424 1.150 maxv * Compute the measured time interval 2425 1.150 maxv */ 2426 1.150 maxv delta = *nowp; 2427 1.150 maxv BW_TIMEVALDECR(&delta, &x->bm_start_time); 2428 1.86 manu 2429 1.150 maxv /* 2430 1.150 maxv * If there are too many pending upcalls, deliver them now 2431 1.150 maxv */ 2432 1.150 maxv if (bw_upcalls_n >= BW_UPCALLS_MAX) 2433 1.150 maxv bw_upcalls_send(); 2434 1.150 maxv 2435 1.150 maxv /* 2436 1.150 maxv * Set the bw_upcall entry 2437 1.150 maxv */ 2438 1.150 maxv u = &bw_upcalls[bw_upcalls_n++]; 2439 1.150 maxv u->bu_src = x->bm_mfc->mfc_origin; 2440 1.150 maxv u->bu_dst = x->bm_mfc->mfc_mcastgrp; 2441 1.150 maxv u->bu_threshold.b_time = x->bm_threshold.b_time; 2442 1.150 maxv u->bu_threshold.b_packets = x->bm_threshold.b_packets; 2443 1.150 maxv u->bu_threshold.b_bytes = x->bm_threshold.b_bytes; 2444 1.150 maxv u->bu_measured.b_time = delta; 2445 1.150 maxv u->bu_measured.b_packets = x->bm_measured.b_packets; 2446 1.150 maxv u->bu_measured.b_bytes = x->bm_measured.b_bytes; 2447 1.150 maxv u->bu_flags = 0; 2448 1.150 maxv if (x->bm_flags & BW_METER_UNIT_PACKETS) 2449 1.150 maxv u->bu_flags |= BW_UPCALL_UNIT_PACKETS; 2450 1.150 maxv if (x->bm_flags & BW_METER_UNIT_BYTES) 2451 1.150 maxv u->bu_flags |= BW_UPCALL_UNIT_BYTES; 2452 1.150 maxv if (x->bm_flags & BW_METER_GEQ) 2453 1.150 maxv u->bu_flags |= BW_UPCALL_GEQ; 2454 1.150 maxv if (x->bm_flags & BW_METER_LEQ) 2455 1.150 maxv u->bu_flags |= BW_UPCALL_LEQ; 2456 1.86 manu } 2457 1.86 manu 2458 1.86 manu /* 2459 1.86 manu * Send the pending bandwidth-related upcalls 2460 1.86 manu */ 2461 1.86 manu static void 2462 1.86 manu bw_upcalls_send(void) 2463 1.86 manu { 2464 1.152 maxv struct mbuf *m; 2465 1.152 maxv int len = bw_upcalls_n * sizeof(bw_upcalls[0]); 2466 1.152 maxv struct sockaddr_in k_igmpsrc = { 2467 1.152 maxv .sin_len = sizeof(k_igmpsrc), 2468 1.152 maxv .sin_family = AF_INET, 2469 1.152 maxv }; 2470 1.152 maxv static struct igmpmsg igmpmsg = { 2471 1.152 maxv 0, /* unused1 */ 2472 1.152 maxv 0, /* unused2 */ 2473 1.152 maxv IGMPMSG_BW_UPCALL,/* im_msgtype */ 2474 1.152 maxv 0, /* im_mbz */ 2475 1.152 maxv 0, /* im_vif */ 2476 1.152 maxv 0, /* unused3 */ 2477 1.152 maxv { 0 }, /* im_src */ 2478 1.152 maxv { 0 } /* im_dst */ 2479 1.152 maxv }; 2480 1.152 maxv 2481 1.152 maxv if (bw_upcalls_n == 0) 2482 1.152 maxv return; /* No pending upcalls */ 2483 1.152 maxv 2484 1.152 maxv bw_upcalls_n = 0; 2485 1.152 maxv 2486 1.152 maxv /* 2487 1.152 maxv * Allocate a new mbuf, initialize it with the header and 2488 1.152 maxv * the payload for the pending calls. 2489 1.152 maxv */ 2490 1.152 maxv MGETHDR(m, M_DONTWAIT, MT_HEADER); 2491 1.152 maxv if (m == NULL) { 2492 1.152 maxv log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n"); 2493 1.152 maxv return; 2494 1.152 maxv } 2495 1.86 manu 2496 1.152 maxv m->m_len = m->m_pkthdr.len = 0; 2497 1.152 maxv m_copyback(m, 0, sizeof(struct igmpmsg), (void *)&igmpmsg); 2498 1.152 maxv m_copyback(m, sizeof(struct igmpmsg), len, (void *)&bw_upcalls[0]); 2499 1.86 manu 2500 1.152 maxv /* 2501 1.152 maxv * Send the upcalls 2502 1.152 maxv * XXX do we need to set the address in k_igmpsrc ? 2503 1.152 maxv */ 2504 1.152 maxv mrtstat.mrts_upcalls++; 2505 1.152 maxv if (socket_send(ip_mrouter, m, &k_igmpsrc) < 0) { 2506 1.152 maxv log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n"); 2507 1.152 maxv ++mrtstat.mrts_upq_sockfull; 2508 1.152 maxv } 2509 1.86 manu } 2510 1.86 manu 2511 1.86 manu /* 2512 1.86 manu * Compute the timeout hash value for the bw_meter entries 2513 1.86 manu */ 2514 1.86 manu #define BW_METER_TIMEHASH(bw_meter, hash) \ 2515 1.86 manu do { \ 2516 1.86 manu struct timeval next_timeval = (bw_meter)->bm_start_time; \ 2517 1.152 maxv BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \ 2518 1.86 manu (hash) = next_timeval.tv_sec; \ 2519 1.86 manu if (next_timeval.tv_usec) \ 2520 1.152 maxv (hash)++; /* XXX: make sure we don't timeout early */ \ 2521 1.86 manu (hash) %= BW_METER_BUCKETS; \ 2522 1.86 manu } while (/*CONSTCOND*/ 0) 2523 1.86 manu 2524 1.86 manu /* 2525 1.86 manu * Schedule a timer to process periodically bw_meter entry of type "<=" 2526 1.86 manu * by linking the entry in the proper hash bucket. 2527 1.86 manu */ 2528 1.86 manu static void 2529 1.86 manu schedule_bw_meter(struct bw_meter *x, struct timeval *nowp) 2530 1.86 manu { 2531 1.152 maxv int time_hash; 2532 1.86 manu 2533 1.152 maxv if (!(x->bm_flags & BW_METER_LEQ)) 2534 1.152 maxv return; /* XXX: we schedule timers only for "<=" entries */ 2535 1.86 manu 2536 1.152 maxv /* 2537 1.152 maxv * Reset the bw_meter entry 2538 1.152 maxv */ 2539 1.152 maxv x->bm_start_time = *nowp; 2540 1.152 maxv x->bm_measured.b_packets = 0; 2541 1.152 maxv x->bm_measured.b_bytes = 0; 2542 1.152 maxv x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2543 1.86 manu 2544 1.152 maxv /* 2545 1.152 maxv * Compute the timeout hash value and insert the entry 2546 1.152 maxv */ 2547 1.152 maxv BW_METER_TIMEHASH(x, time_hash); 2548 1.152 maxv x->bm_time_next = bw_meter_timers[time_hash]; 2549 1.152 maxv bw_meter_timers[time_hash] = x; 2550 1.152 maxv x->bm_time_hash = time_hash; 2551 1.86 manu } 2552 1.86 manu 2553 1.86 manu /* 2554 1.86 manu * Unschedule the periodic timer that processes bw_meter entry of type "<=" 2555 1.86 manu * by removing the entry from the proper hash bucket. 2556 1.86 manu */ 2557 1.86 manu static void 2558 1.86 manu unschedule_bw_meter(struct bw_meter *x) 2559 1.86 manu { 2560 1.152 maxv int time_hash; 2561 1.152 maxv struct bw_meter *prev, *tmp; 2562 1.86 manu 2563 1.152 maxv if (!(x->bm_flags & BW_METER_LEQ)) 2564 1.152 maxv return; /* XXX: we schedule timers only for "<=" entries */ 2565 1.86 manu 2566 1.152 maxv /* 2567 1.152 maxv * Compute the timeout hash value and delete the entry 2568 1.152 maxv */ 2569 1.152 maxv time_hash = x->bm_time_hash; 2570 1.152 maxv if (time_hash >= BW_METER_BUCKETS) 2571 1.152 maxv return; /* Entry was not scheduled */ 2572 1.86 manu 2573 1.152 maxv for (prev = NULL, tmp = bw_meter_timers[time_hash]; 2574 1.86 manu tmp != NULL; prev = tmp, tmp = tmp->bm_time_next) 2575 1.152 maxv if (tmp == x) 2576 1.152 maxv break; 2577 1.86 manu 2578 1.152 maxv if (tmp == NULL) 2579 1.152 maxv panic("unschedule_bw_meter: bw_meter entry not found"); 2580 1.86 manu 2581 1.152 maxv if (prev != NULL) 2582 1.152 maxv prev->bm_time_next = x->bm_time_next; 2583 1.152 maxv else 2584 1.152 maxv bw_meter_timers[time_hash] = x->bm_time_next; 2585 1.86 manu 2586 1.152 maxv x->bm_time_next = NULL; 2587 1.152 maxv x->bm_time_hash = BW_METER_BUCKETS; 2588 1.86 manu } 2589 1.86 manu 2590 1.86 manu /* 2591 1.86 manu * Process all "<=" type of bw_meter that should be processed now, 2592 1.86 manu * and for each entry prepare an upcall if necessary. Each processed 2593 1.86 manu * entry is rescheduled again for the (periodic) processing. 2594 1.86 manu * 2595 1.86 manu * This is run periodically (once per second normally). On each round, 2596 1.86 manu * all the potentially matching entries are in the hash slot that we are 2597 1.86 manu * looking at. 2598 1.86 manu */ 2599 1.86 manu static void 2600 1.89 perry bw_meter_process(void) 2601 1.86 manu { 2602 1.154 maxv int s; 2603 1.154 maxv static uint32_t last_tv_sec; /* last time we processed this */ 2604 1.154 maxv 2605 1.154 maxv uint32_t loops; 2606 1.154 maxv int i; 2607 1.154 maxv struct timeval now, process_endtime; 2608 1.154 maxv 2609 1.154 maxv microtime(&now); 2610 1.154 maxv if (last_tv_sec == now.tv_sec) 2611 1.154 maxv return; /* nothing to do */ 2612 1.154 maxv 2613 1.154 maxv loops = now.tv_sec - last_tv_sec; 2614 1.154 maxv last_tv_sec = now.tv_sec; 2615 1.154 maxv if (loops > BW_METER_BUCKETS) 2616 1.154 maxv loops = BW_METER_BUCKETS; 2617 1.154 maxv 2618 1.154 maxv s = splsoftnet(); 2619 1.154 maxv /* 2620 1.154 maxv * Process all bins of bw_meter entries from the one after the last 2621 1.154 maxv * processed to the current one. On entry, i points to the last bucket 2622 1.154 maxv * visited, so we need to increment i at the beginning of the loop. 2623 1.154 maxv */ 2624 1.154 maxv for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) { 2625 1.154 maxv struct bw_meter *x, *tmp_list; 2626 1.154 maxv 2627 1.154 maxv if (++i >= BW_METER_BUCKETS) 2628 1.154 maxv i = 0; 2629 1.154 maxv 2630 1.154 maxv /* Disconnect the list of bw_meter entries from the bin */ 2631 1.154 maxv tmp_list = bw_meter_timers[i]; 2632 1.154 maxv bw_meter_timers[i] = NULL; 2633 1.154 maxv 2634 1.154 maxv /* Process the list of bw_meter entries */ 2635 1.154 maxv while (tmp_list != NULL) { 2636 1.154 maxv x = tmp_list; 2637 1.154 maxv tmp_list = tmp_list->bm_time_next; 2638 1.154 maxv 2639 1.154 maxv /* Test if the time interval is over */ 2640 1.154 maxv process_endtime = x->bm_start_time; 2641 1.154 maxv BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time); 2642 1.154 maxv if (BW_TIMEVALCMP(&process_endtime, &now, >)) { 2643 1.154 maxv /* Not yet: reschedule, but don't reset */ 2644 1.154 maxv int time_hash; 2645 1.154 maxv 2646 1.154 maxv BW_METER_TIMEHASH(x, time_hash); 2647 1.154 maxv if (time_hash == i && process_endtime.tv_sec == now.tv_sec) { 2648 1.154 maxv /* 2649 1.154 maxv * XXX: somehow the bin processing is a bit ahead of time. 2650 1.154 maxv * Put the entry in the next bin. 2651 1.154 maxv */ 2652 1.154 maxv if (++time_hash >= BW_METER_BUCKETS) 2653 1.154 maxv time_hash = 0; 2654 1.154 maxv } 2655 1.154 maxv x->bm_time_next = bw_meter_timers[time_hash]; 2656 1.154 maxv bw_meter_timers[time_hash] = x; 2657 1.154 maxv x->bm_time_hash = time_hash; 2658 1.154 maxv 2659 1.154 maxv continue; 2660 1.154 maxv } 2661 1.86 manu 2662 1.154 maxv /* 2663 1.154 maxv * Test if we should deliver an upcall 2664 1.154 maxv */ 2665 1.154 maxv if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2666 1.154 maxv (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || 2667 1.154 maxv ((x->bm_flags & BW_METER_UNIT_BYTES) && 2668 1.154 maxv (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { 2669 1.154 maxv /* Prepare an upcall for delivery */ 2670 1.154 maxv bw_meter_prepare_upcall(x, &now); 2671 1.154 maxv } 2672 1.154 maxv 2673 1.154 maxv /* 2674 1.154 maxv * Reschedule for next processing 2675 1.154 maxv */ 2676 1.154 maxv schedule_bw_meter(x, &now); 2677 1.154 maxv } 2678 1.86 manu } 2679 1.86 manu 2680 1.154 maxv /* Send all upcalls that are pending delivery */ 2681 1.154 maxv bw_upcalls_send(); 2682 1.86 manu 2683 1.154 maxv splx(s); 2684 1.86 manu } 2685 1.86 manu 2686 1.86 manu /* 2687 1.86 manu * A periodic function for sending all upcalls that are pending delivery 2688 1.86 manu */ 2689 1.86 manu static void 2690 1.100 christos expire_bw_upcalls_send(void *unused) 2691 1.86 manu { 2692 1.154 maxv int s; 2693 1.86 manu 2694 1.154 maxv s = splsoftnet(); 2695 1.154 maxv bw_upcalls_send(); 2696 1.154 maxv splx(s); 2697 1.86 manu 2698 1.154 maxv callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD, 2699 1.154 maxv expire_bw_upcalls_send, NULL); 2700 1.86 manu } 2701 1.86 manu 2702 1.86 manu /* 2703 1.86 manu * A periodic function for periodic scanning of the multicast forwarding 2704 1.86 manu * table for processing all "<=" bw_meter entries. 2705 1.86 manu */ 2706 1.86 manu static void 2707 1.100 christos expire_bw_meter_process(void *unused) 2708 1.86 manu { 2709 1.154 maxv if (mrt_api_config & MRT_MFC_BW_UPCALL) 2710 1.154 maxv bw_meter_process(); 2711 1.86 manu 2712 1.154 maxv callout_reset(&bw_meter_ch, BW_METER_PERIOD, 2713 1.154 maxv expire_bw_meter_process, NULL); 2714 1.86 manu } 2715 1.86 manu 2716 1.86 manu /* 2717 1.86 manu * End of bandwidth monitoring code 2718 1.86 manu */ 2719 1.86 manu 2720 1.86 manu #ifdef PIM 2721 1.86 manu /* 2722 1.86 manu * Send the packet up to the user daemon, or eventually do kernel encapsulation 2723 1.86 manu */ 2724 1.86 manu static int 2725 1.154 maxv pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m, 2726 1.154 maxv struct mfc *rt) 2727 1.86 manu { 2728 1.154 maxv struct mbuf *mb_copy, *mm; 2729 1.86 manu 2730 1.154 maxv if (mrtdebug & DEBUG_PIM) 2731 1.154 maxv log(LOG_DEBUG, "pim_register_send: \n"); 2732 1.86 manu 2733 1.154 maxv mb_copy = pim_register_prepare(ip, m); 2734 1.154 maxv if (mb_copy == NULL) 2735 1.154 maxv return ENOBUFS; 2736 1.154 maxv 2737 1.154 maxv /* 2738 1.154 maxv * Send all the fragments. Note that the mbuf for each fragment 2739 1.154 maxv * is freed by the sending machinery. 2740 1.154 maxv */ 2741 1.154 maxv for (mm = mb_copy; mm; mm = mb_copy) { 2742 1.154 maxv mb_copy = mm->m_nextpkt; 2743 1.154 maxv mm->m_nextpkt = NULL; 2744 1.154 maxv mm = m_pullup(mm, sizeof(struct ip)); 2745 1.154 maxv if (mm != NULL) { 2746 1.154 maxv ip = mtod(mm, struct ip *); 2747 1.154 maxv if ((mrt_api_config & MRT_MFC_RP) && 2748 1.154 maxv !in_nullhost(rt->mfc_rp)) { 2749 1.154 maxv pim_register_send_rp(ip, vifp, mm, rt); 2750 1.154 maxv } else { 2751 1.154 maxv pim_register_send_upcall(ip, vifp, mm, rt); 2752 1.154 maxv } 2753 1.154 maxv } 2754 1.86 manu } 2755 1.86 manu 2756 1.154 maxv return 0; 2757 1.86 manu } 2758 1.86 manu 2759 1.86 manu /* 2760 1.86 manu * Return a copy of the data packet that is ready for PIM Register 2761 1.86 manu * encapsulation. 2762 1.86 manu * XXX: Note that in the returned copy the IP header is a valid one. 2763 1.86 manu */ 2764 1.86 manu static struct mbuf * 2765 1.86 manu pim_register_prepare(struct ip *ip, struct mbuf *m) 2766 1.86 manu { 2767 1.154 maxv struct mbuf *mb_copy = NULL; 2768 1.154 maxv int mtu; 2769 1.154 maxv 2770 1.154 maxv /* Take care of delayed checksums */ 2771 1.154 maxv if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 2772 1.161 maxv in_undefer_cksum_tcpudp(m); 2773 1.154 maxv m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 2774 1.154 maxv } 2775 1.86 manu 2776 1.154 maxv /* 2777 1.154 maxv * Copy the old packet & pullup its IP header into the 2778 1.154 maxv * new mbuf so we can modify it. 2779 1.154 maxv */ 2780 1.154 maxv mb_copy = m_copypacket(m, M_DONTWAIT); 2781 1.154 maxv if (mb_copy == NULL) 2782 1.154 maxv return NULL; 2783 1.154 maxv mb_copy = m_pullup(mb_copy, ip->ip_hl << 2); 2784 1.154 maxv if (mb_copy == NULL) 2785 1.154 maxv return NULL; 2786 1.86 manu 2787 1.154 maxv /* take care of the TTL */ 2788 1.154 maxv ip = mtod(mb_copy, struct ip *); 2789 1.154 maxv --ip->ip_ttl; 2790 1.86 manu 2791 1.154 maxv /* Compute the MTU after the PIM Register encapsulation */ 2792 1.154 maxv mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr); 2793 1.154 maxv 2794 1.154 maxv if (ntohs(ip->ip_len) <= mtu) { 2795 1.154 maxv /* Turn the IP header into a valid one */ 2796 1.154 maxv ip->ip_sum = 0; 2797 1.154 maxv ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 2798 1.154 maxv } else { 2799 1.154 maxv /* Fragment the packet */ 2800 1.154 maxv if (ip_fragment(mb_copy, NULL, mtu) != 0) { 2801 1.154 maxv /* XXX: mb_copy was freed by ip_fragment() */ 2802 1.154 maxv return NULL; 2803 1.154 maxv } 2804 1.86 manu } 2805 1.154 maxv return mb_copy; 2806 1.86 manu } 2807 1.86 manu 2808 1.86 manu /* 2809 1.86 manu * Send an upcall with the data packet to the user-level process. 2810 1.86 manu */ 2811 1.86 manu static int 2812 1.86 manu pim_register_send_upcall(struct ip *ip, struct vif *vifp, 2813 1.100 christos struct mbuf *mb_copy, struct mfc *rt) 2814 1.86 manu { 2815 1.154 maxv struct mbuf *mb_first; 2816 1.154 maxv int len = ntohs(ip->ip_len); 2817 1.154 maxv struct igmpmsg *im; 2818 1.154 maxv struct sockaddr_in k_igmpsrc = { 2819 1.154 maxv .sin_len = sizeof(k_igmpsrc), 2820 1.154 maxv .sin_family = AF_INET, 2821 1.154 maxv }; 2822 1.86 manu 2823 1.154 maxv /* 2824 1.154 maxv * Add a new mbuf with an upcall header 2825 1.154 maxv */ 2826 1.154 maxv MGETHDR(mb_first, M_DONTWAIT, MT_HEADER); 2827 1.154 maxv if (mb_first == NULL) { 2828 1.154 maxv m_freem(mb_copy); 2829 1.154 maxv return ENOBUFS; 2830 1.154 maxv } 2831 1.154 maxv mb_first->m_data += max_linkhdr; 2832 1.154 maxv mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg); 2833 1.154 maxv mb_first->m_len = sizeof(struct igmpmsg); 2834 1.154 maxv mb_first->m_next = mb_copy; 2835 1.154 maxv 2836 1.154 maxv /* Send message to routing daemon */ 2837 1.154 maxv im = mtod(mb_first, struct igmpmsg *); 2838 1.154 maxv im->im_msgtype = IGMPMSG_WHOLEPKT; 2839 1.154 maxv im->im_mbz = 0; 2840 1.154 maxv im->im_vif = vifp - viftable; 2841 1.154 maxv im->im_src = ip->ip_src; 2842 1.154 maxv im->im_dst = ip->ip_dst; 2843 1.86 manu 2844 1.154 maxv k_igmpsrc.sin_addr = ip->ip_src; 2845 1.154 maxv 2846 1.154 maxv mrtstat.mrts_upcalls++; 2847 1.86 manu 2848 1.154 maxv if (socket_send(ip_mrouter, mb_first, &k_igmpsrc) < 0) { 2849 1.154 maxv if (mrtdebug & DEBUG_PIM) 2850 1.154 maxv log(LOG_WARNING, 2851 1.154 maxv "mcast: pim_register_send_upcall: ip_mrouter socket queue full\n"); 2852 1.154 maxv ++mrtstat.mrts_upq_sockfull; 2853 1.154 maxv return ENOBUFS; 2854 1.154 maxv } 2855 1.154 maxv 2856 1.154 maxv /* Keep statistics */ 2857 1.154 maxv pimstat.pims_snd_registers_msgs++; 2858 1.154 maxv pimstat.pims_snd_registers_bytes += len; 2859 1.86 manu 2860 1.154 maxv return 0; 2861 1.86 manu } 2862 1.86 manu 2863 1.86 manu /* 2864 1.86 manu * Encapsulate the data packet in PIM Register message and send it to the RP. 2865 1.86 manu */ 2866 1.86 manu static int 2867 1.86 manu pim_register_send_rp(struct ip *ip, struct vif *vifp, 2868 1.154 maxv struct mbuf *mb_copy, struct mfc *rt) 2869 1.86 manu { 2870 1.154 maxv struct mbuf *mb_first; 2871 1.154 maxv struct ip *ip_outer; 2872 1.154 maxv struct pim_encap_pimhdr *pimhdr; 2873 1.154 maxv int len = ntohs(ip->ip_len); 2874 1.154 maxv vifi_t vifi = rt->mfc_parent; 2875 1.154 maxv 2876 1.154 maxv if ((vifi >= numvifs) || in_nullhost(viftable[vifi].v_lcl_addr)) { 2877 1.154 maxv m_freem(mb_copy); 2878 1.154 maxv return EADDRNOTAVAIL; /* The iif vif is invalid */ 2879 1.154 maxv } 2880 1.86 manu 2881 1.154 maxv /* 2882 1.154 maxv * Add a new mbuf with the encapsulating header 2883 1.154 maxv */ 2884 1.154 maxv MGETHDR(mb_first, M_DONTWAIT, MT_HEADER); 2885 1.154 maxv if (mb_first == NULL) { 2886 1.154 maxv m_freem(mb_copy); 2887 1.154 maxv return ENOBUFS; 2888 1.154 maxv } 2889 1.154 maxv mb_first->m_data += max_linkhdr; 2890 1.154 maxv mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); 2891 1.154 maxv mb_first->m_next = mb_copy; 2892 1.154 maxv 2893 1.154 maxv mb_first->m_pkthdr.len = len + mb_first->m_len; 2894 1.154 maxv 2895 1.154 maxv /* 2896 1.154 maxv * Fill in the encapsulating IP and PIM header 2897 1.154 maxv */ 2898 1.154 maxv ip_outer = mtod(mb_first, struct ip *); 2899 1.154 maxv *ip_outer = pim_encap_iphdr; 2900 1.154 maxv if (mb_first->m_pkthdr.len < IP_MINFRAGSIZE) 2901 1.154 maxv ip_outer->ip_id = 0; 2902 1.154 maxv else 2903 1.166 ozaki ip_outer->ip_id = ip_newid(); 2904 1.154 maxv ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) + 2905 1.154 maxv sizeof(pim_encap_pimhdr)); 2906 1.154 maxv ip_outer->ip_src = viftable[vifi].v_lcl_addr; 2907 1.154 maxv ip_outer->ip_dst = rt->mfc_rp; 2908 1.154 maxv /* 2909 1.154 maxv * Copy the inner header TOS to the outer header, and take care of the 2910 1.154 maxv * IP_DF bit. 2911 1.154 maxv */ 2912 1.154 maxv ip_outer->ip_tos = ip->ip_tos; 2913 1.154 maxv if (ntohs(ip->ip_off) & IP_DF) 2914 1.154 maxv ip_outer->ip_off |= htons(IP_DF); 2915 1.154 maxv pimhdr = (struct pim_encap_pimhdr *)((char *)ip_outer 2916 1.154 maxv + sizeof(pim_encap_iphdr)); 2917 1.154 maxv *pimhdr = pim_encap_pimhdr; 2918 1.154 maxv /* If the iif crosses a border, set the Border-bit */ 2919 1.154 maxv if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & mrt_api_config) 2920 1.154 maxv pimhdr->flags |= htonl(PIM_BORDER_REGISTER); 2921 1.154 maxv 2922 1.154 maxv mb_first->m_data += sizeof(pim_encap_iphdr); 2923 1.154 maxv pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr)); 2924 1.154 maxv mb_first->m_data -= sizeof(pim_encap_iphdr); 2925 1.154 maxv 2926 1.154 maxv if (vifp->v_rate_limit == 0) 2927 1.154 maxv tbf_send_packet(vifp, mb_first); 2928 1.154 maxv else 2929 1.154 maxv tbf_control(vifp, mb_first, ip, ntohs(ip_outer->ip_len)); 2930 1.154 maxv 2931 1.154 maxv /* Keep statistics */ 2932 1.154 maxv pimstat.pims_snd_registers_msgs++; 2933 1.154 maxv pimstat.pims_snd_registers_bytes += len; 2934 1.154 maxv 2935 1.154 maxv return 0; 2936 1.86 manu } 2937 1.86 manu 2938 1.86 manu /* 2939 1.86 manu * PIM-SMv2 and PIM-DM messages processing. 2940 1.86 manu * Receives and verifies the PIM control messages, and passes them 2941 1.86 manu * up to the listening socket, using rip_input(). 2942 1.86 manu * The only message with special processing is the PIM_REGISTER message 2943 1.86 manu * (used by PIM-SM): the PIM header is stripped off, and the inner packet 2944 1.86 manu * is passed to if_simloop(). 2945 1.86 manu */ 2946 1.86 manu void 2947 1.163 maxv pim_input(struct mbuf *m, int off, int proto) 2948 1.86 manu { 2949 1.154 maxv struct ip *ip = mtod(m, struct ip *); 2950 1.154 maxv struct pim *pim; 2951 1.154 maxv int minlen; 2952 1.154 maxv int datalen; 2953 1.154 maxv int ip_tos; 2954 1.154 maxv int iphlen; 2955 1.86 manu 2956 1.163 maxv iphlen = off; 2957 1.154 maxv datalen = ntohs(ip->ip_len) - iphlen; 2958 1.86 manu 2959 1.154 maxv /* Keep statistics */ 2960 1.154 maxv pimstat.pims_rcv_total_msgs++; 2961 1.154 maxv pimstat.pims_rcv_total_bytes += datalen; 2962 1.86 manu 2963 1.154 maxv /* 2964 1.154 maxv * Validate lengths 2965 1.154 maxv */ 2966 1.154 maxv if (datalen < PIM_MINLEN) { 2967 1.154 maxv pimstat.pims_rcv_tooshort++; 2968 1.154 maxv log(LOG_ERR, "pim_input: packet size too small %d from %lx\n", 2969 1.154 maxv datalen, (u_long)ip->ip_src.s_addr); 2970 1.154 maxv m_freem(m); 2971 1.154 maxv return; 2972 1.154 maxv } 2973 1.86 manu 2974 1.86 manu /* 2975 1.154 maxv * If the packet is at least as big as a REGISTER, go ahead 2976 1.154 maxv * and grab the PIM REGISTER header size, to avoid another 2977 1.154 maxv * possible m_pullup() later. 2978 1.154 maxv * 2979 1.154 maxv * PIM_MINLEN == pimhdr + u_int32_t == 4 + 4 = 8 2980 1.154 maxv * PIM_REG_MINLEN == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28 2981 1.86 manu */ 2982 1.154 maxv minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN); 2983 1.86 manu 2984 1.154 maxv /* 2985 1.154 maxv * Get the IP and PIM headers in contiguous memory, and 2986 1.154 maxv * possibly the PIM REGISTER header. 2987 1.154 maxv */ 2988 1.154 maxv if ((m->m_flags & M_EXT || m->m_len < minlen) && 2989 1.154 maxv (m = m_pullup(m, minlen)) == NULL) { 2990 1.154 maxv log(LOG_ERR, "pim_input: m_pullup failure\n"); 2991 1.154 maxv return; 2992 1.86 manu } 2993 1.154 maxv ip = mtod(m, struct ip *); 2994 1.154 maxv ip_tos = ip->ip_tos; 2995 1.154 maxv 2996 1.154 maxv /* adjust mbuf to point to the PIM header */ 2997 1.154 maxv m->m_data += iphlen; 2998 1.154 maxv m->m_len -= iphlen; 2999 1.154 maxv pim = mtod(m, struct pim *); 3000 1.86 manu 3001 1.86 manu /* 3002 1.154 maxv * Validate checksum. If PIM REGISTER, exclude the data packet. 3003 1.154 maxv * 3004 1.154 maxv * XXX: some older PIMv2 implementations don't make this distinction, 3005 1.154 maxv * so for compatibility reason perform the checksum over part of the 3006 1.154 maxv * message, and if error, then over the whole message. 3007 1.154 maxv */ 3008 1.154 maxv if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) { 3009 1.154 maxv /* do nothing, checksum okay */ 3010 1.154 maxv } else if (in_cksum(m, datalen)) { 3011 1.154 maxv pimstat.pims_rcv_badsum++; 3012 1.154 maxv if (mrtdebug & DEBUG_PIM) 3013 1.154 maxv log(LOG_DEBUG, "pim_input: invalid checksum\n"); 3014 1.154 maxv m_freem(m); 3015 1.154 maxv return; 3016 1.154 maxv } 3017 1.154 maxv 3018 1.154 maxv /* PIM version check */ 3019 1.154 maxv if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) { 3020 1.154 maxv pimstat.pims_rcv_badversion++; 3021 1.154 maxv log(LOG_ERR, "pim_input: incorrect version %d, expecting %d\n", 3022 1.154 maxv PIM_VT_V(pim->pim_vt), PIM_VERSION); 3023 1.154 maxv m_freem(m); 3024 1.154 maxv return; 3025 1.86 manu } 3026 1.86 manu 3027 1.154 maxv /* restore mbuf back to the outer IP */ 3028 1.154 maxv m->m_data -= iphlen; 3029 1.154 maxv m->m_len += iphlen; 3030 1.86 manu 3031 1.154 maxv if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) { 3032 1.154 maxv /* 3033 1.154 maxv * Since this is a REGISTER, we'll make a copy of the register 3034 1.154 maxv * headers ip + pim + u_int32 + encap_ip, to be passed up to the 3035 1.154 maxv * routing daemon. 3036 1.154 maxv */ 3037 1.154 maxv int s; 3038 1.154 maxv struct sockaddr_in dst = { 3039 1.154 maxv .sin_len = sizeof(dst), 3040 1.154 maxv .sin_family = AF_INET, 3041 1.154 maxv }; 3042 1.154 maxv struct mbuf *mcp; 3043 1.154 maxv struct ip *encap_ip; 3044 1.154 maxv u_int32_t *reghdr; 3045 1.154 maxv struct ifnet *vifp; 3046 1.154 maxv 3047 1.154 maxv s = splsoftnet(); 3048 1.154 maxv if ((reg_vif_num >= numvifs) || (reg_vif_num == VIFI_INVALID)) { 3049 1.154 maxv splx(s); 3050 1.154 maxv if (mrtdebug & DEBUG_PIM) 3051 1.154 maxv log(LOG_DEBUG, 3052 1.154 maxv "pim_input: register vif not set: %d\n", reg_vif_num); 3053 1.154 maxv m_freem(m); 3054 1.154 maxv return; 3055 1.154 maxv } 3056 1.154 maxv /* XXX need refcnt? */ 3057 1.154 maxv vifp = viftable[reg_vif_num].v_ifp; 3058 1.154 maxv splx(s); 3059 1.154 maxv 3060 1.154 maxv /* 3061 1.154 maxv * Validate length 3062 1.154 maxv */ 3063 1.154 maxv if (datalen < PIM_REG_MINLEN) { 3064 1.154 maxv pimstat.pims_rcv_tooshort++; 3065 1.154 maxv pimstat.pims_rcv_badregisters++; 3066 1.154 maxv log(LOG_ERR, 3067 1.154 maxv "pim_input: register packet size too small %d from %lx\n", 3068 1.154 maxv datalen, (u_long)ip->ip_src.s_addr); 3069 1.154 maxv m_freem(m); 3070 1.154 maxv return; 3071 1.154 maxv } 3072 1.154 maxv 3073 1.154 maxv reghdr = (u_int32_t *)(pim + 1); 3074 1.154 maxv encap_ip = (struct ip *)(reghdr + 1); 3075 1.154 maxv 3076 1.154 maxv if (mrtdebug & DEBUG_PIM) { 3077 1.154 maxv log(LOG_DEBUG, 3078 1.154 maxv "pim_input[register], encap_ip: %lx -> %lx, encap_ip len %d\n", 3079 1.154 maxv (u_long)ntohl(encap_ip->ip_src.s_addr), 3080 1.154 maxv (u_long)ntohl(encap_ip->ip_dst.s_addr), 3081 1.154 maxv ntohs(encap_ip->ip_len)); 3082 1.154 maxv } 3083 1.154 maxv 3084 1.154 maxv /* verify the version number of the inner packet */ 3085 1.154 maxv if (encap_ip->ip_v != IPVERSION) { 3086 1.154 maxv pimstat.pims_rcv_badregisters++; 3087 1.154 maxv if (mrtdebug & DEBUG_PIM) { 3088 1.154 maxv log(LOG_DEBUG, "pim_input: invalid IP version (%d) " 3089 1.154 maxv "of the inner packet\n", encap_ip->ip_v); 3090 1.154 maxv } 3091 1.154 maxv m_freem(m); 3092 1.154 maxv return; 3093 1.154 maxv } 3094 1.154 maxv 3095 1.158 maxv /* verify the inner packet doesn't have options */ 3096 1.158 maxv if (encap_ip->ip_hl != (sizeof(struct ip) >> 2)) { 3097 1.158 maxv pimstat.pims_rcv_badregisters++; 3098 1.158 maxv m_freem(m); 3099 1.158 maxv return; 3100 1.158 maxv } 3101 1.158 maxv 3102 1.154 maxv /* verify the inner packet is destined to a mcast group */ 3103 1.154 maxv if (!IN_MULTICAST(encap_ip->ip_dst.s_addr)) { 3104 1.154 maxv pimstat.pims_rcv_badregisters++; 3105 1.154 maxv if (mrtdebug & DEBUG_PIM) 3106 1.154 maxv log(LOG_DEBUG, 3107 1.154 maxv "pim_input: inner packet of register is not " 3108 1.154 maxv "multicast %lx\n", 3109 1.154 maxv (u_long)ntohl(encap_ip->ip_dst.s_addr)); 3110 1.154 maxv m_freem(m); 3111 1.154 maxv return; 3112 1.154 maxv } 3113 1.86 manu 3114 1.154 maxv /* If a NULL_REGISTER, pass it to the daemon */ 3115 1.154 maxv if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) 3116 1.154 maxv goto pim_input_to_daemon; 3117 1.86 manu 3118 1.154 maxv /* 3119 1.154 maxv * Copy the TOS from the outer IP header to the inner IP header. 3120 1.154 maxv */ 3121 1.154 maxv if (encap_ip->ip_tos != ip_tos) { 3122 1.154 maxv /* Outer TOS -> inner TOS */ 3123 1.154 maxv encap_ip->ip_tos = ip_tos; 3124 1.154 maxv /* Recompute the inner header checksum. Sigh... */ 3125 1.154 maxv 3126 1.154 maxv /* adjust mbuf to point to the inner IP header */ 3127 1.154 maxv m->m_data += (iphlen + PIM_MINLEN); 3128 1.154 maxv m->m_len -= (iphlen + PIM_MINLEN); 3129 1.154 maxv 3130 1.154 maxv encap_ip->ip_sum = 0; 3131 1.154 maxv encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2); 3132 1.154 maxv 3133 1.154 maxv /* restore mbuf to point back to the outer IP header */ 3134 1.154 maxv m->m_data -= (iphlen + PIM_MINLEN); 3135 1.154 maxv m->m_len += (iphlen + PIM_MINLEN); 3136 1.154 maxv } 3137 1.86 manu 3138 1.154 maxv /* 3139 1.154 maxv * Decapsulate the inner IP packet and loopback to forward it 3140 1.154 maxv * as a normal multicast packet. Also, make a copy of the 3141 1.154 maxv * outer_iphdr + pimhdr + reghdr + encap_iphdr 3142 1.154 maxv * to pass to the daemon later, so it can take the appropriate 3143 1.154 maxv * actions (e.g., send back PIM_REGISTER_STOP). 3144 1.154 maxv * XXX: here m->m_data points to the outer IP header. 3145 1.154 maxv */ 3146 1.154 maxv mcp = m_copym(m, 0, iphlen + PIM_REG_MINLEN, M_DONTWAIT); 3147 1.154 maxv if (mcp == NULL) { 3148 1.154 maxv log(LOG_ERR, 3149 1.154 maxv "pim_input: pim register: could not copy register head\n"); 3150 1.154 maxv m_freem(m); 3151 1.154 maxv return; 3152 1.154 maxv } 3153 1.86 manu 3154 1.154 maxv /* Keep statistics */ 3155 1.154 maxv /* XXX: registers_bytes include only the encap. mcast pkt */ 3156 1.154 maxv pimstat.pims_rcv_registers_msgs++; 3157 1.154 maxv pimstat.pims_rcv_registers_bytes += ntohs(encap_ip->ip_len); 3158 1.86 manu 3159 1.154 maxv /* 3160 1.154 maxv * forward the inner ip packet; point m_data at the inner ip. 3161 1.154 maxv */ 3162 1.154 maxv m_adj(m, iphlen + PIM_MINLEN); 3163 1.86 manu 3164 1.154 maxv if (mrtdebug & DEBUG_PIM) { 3165 1.154 maxv log(LOG_DEBUG, 3166 1.154 maxv "pim_input: forwarding decapsulated register: " 3167 1.154 maxv "src %lx, dst %lx, vif %d\n", 3168 1.154 maxv (u_long)ntohl(encap_ip->ip_src.s_addr), 3169 1.154 maxv (u_long)ntohl(encap_ip->ip_dst.s_addr), 3170 1.154 maxv reg_vif_num); 3171 1.154 maxv } 3172 1.154 maxv /* NB: vifp was collected above; can it change on us? */ 3173 1.154 maxv looutput(vifp, m, (struct sockaddr *)&dst, NULL); 3174 1.86 manu 3175 1.154 maxv /* prepare the register head to send to the mrouting daemon */ 3176 1.154 maxv m = mcp; 3177 1.86 manu } 3178 1.86 manu 3179 1.86 manu pim_input_to_daemon: 3180 1.154 maxv /* 3181 1.154 maxv * Pass the PIM message up to the daemon; if it is a Register message, 3182 1.154 maxv * pass the 'head' only up to the daemon. This includes the 3183 1.154 maxv * outer IP header, PIM header, PIM-Register header and the 3184 1.154 maxv * inner IP header. 3185 1.154 maxv * XXX: the outer IP header pkt size of a Register is not adjust to 3186 1.154 maxv * reflect the fact that the inner multicast data is truncated. 3187 1.154 maxv */ 3188 1.160 knakahar /* 3189 1.160 knakahar * Currently, pim_input() is always called holding softnet_lock 3190 1.160 knakahar * by ipintr()(!NET_MPSAFE) or PR_INPUT_WRAP()(NET_MPSAFE). 3191 1.160 knakahar */ 3192 1.160 knakahar KASSERT(mutex_owned(softnet_lock)); 3193 1.154 maxv rip_input(m, iphlen, proto); 3194 1.86 manu 3195 1.154 maxv return; 3196 1.86 manu } 3197 1.86 manu #endif /* PIM */ 3198