Home | History | Annotate | Line # | Download | only in netinet
in_pcb.c revision 1.201
      1 /*	$NetBSD: in_pcb.c,v 1.201 2022/11/04 09:05:04 ozaki-r Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998, 2011 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Coyote Point Systems, Inc.
     38  * This code is derived from software contributed to The NetBSD Foundation
     39  * by Public Access Networks Corporation ("Panix").  It was developed under
     40  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     41  *
     42  * Redistribution and use in source and binary forms, with or without
     43  * modification, are permitted provided that the following conditions
     44  * are met:
     45  * 1. Redistributions of source code must retain the above copyright
     46  *    notice, this list of conditions and the following disclaimer.
     47  * 2. Redistributions in binary form must reproduce the above copyright
     48  *    notice, this list of conditions and the following disclaimer in the
     49  *    documentation and/or other materials provided with the distribution.
     50  *
     51  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     52  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     53  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     54  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     55  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     56  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     57  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     58  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     59  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     60  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     61  * POSSIBILITY OF SUCH DAMAGE.
     62  */
     63 
     64 /*
     65  * Copyright (c) 1982, 1986, 1991, 1993, 1995
     66  *	The Regents of the University of California.  All rights reserved.
     67  *
     68  * Redistribution and use in source and binary forms, with or without
     69  * modification, are permitted provided that the following conditions
     70  * are met:
     71  * 1. Redistributions of source code must retain the above copyright
     72  *    notice, this list of conditions and the following disclaimer.
     73  * 2. Redistributions in binary form must reproduce the above copyright
     74  *    notice, this list of conditions and the following disclaimer in the
     75  *    documentation and/or other materials provided with the distribution.
     76  * 3. Neither the name of the University nor the names of its contributors
     77  *    may be used to endorse or promote products derived from this software
     78  *    without specific prior written permission.
     79  *
     80  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     81  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     82  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     83  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     84  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     85  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     86  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     87  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     88  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     89  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     90  * SUCH DAMAGE.
     91  *
     92  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
     93  */
     94 
     95 #include <sys/cdefs.h>
     96 __KERNEL_RCSID(0, "$NetBSD: in_pcb.c,v 1.201 2022/11/04 09:05:04 ozaki-r Exp $");
     97 
     98 #ifdef _KERNEL_OPT
     99 #include "opt_inet.h"
    100 #include "opt_ipsec.h"
    101 #endif
    102 
    103 #include <sys/param.h>
    104 #include <sys/systm.h>
    105 #include <sys/mbuf.h>
    106 #include <sys/socket.h>
    107 #include <sys/socketvar.h>
    108 #include <sys/ioctl.h>
    109 #include <sys/errno.h>
    110 #include <sys/time.h>
    111 #include <sys/once.h>
    112 #include <sys/pool.h>
    113 #include <sys/proc.h>
    114 #include <sys/kauth.h>
    115 #include <sys/uidinfo.h>
    116 #include <sys/domain.h>
    117 
    118 #include <net/if.h>
    119 #include <net/route.h>
    120 
    121 #include <netinet/in.h>
    122 #include <netinet/in_systm.h>
    123 #include <netinet/ip.h>
    124 #include <netinet/in_pcb.h>
    125 #include <netinet/in_var.h>
    126 #include <netinet/ip_var.h>
    127 #include <netinet/portalgo.h>
    128 
    129 #ifdef INET6
    130 #include <netinet/ip6.h>
    131 #include <netinet6/ip6_var.h>
    132 #include <netinet6/in6_pcb.h>
    133 #endif
    134 
    135 #ifdef IPSEC
    136 #include <netipsec/ipsec.h>
    137 #include <netipsec/key.h>
    138 #endif /* IPSEC */
    139 
    140 #include <netinet/tcp_vtw.h>
    141 
    142 struct	in_addr zeroin_addr;
    143 
    144 #define	INPCBHASH_PORT(table, lport) \
    145 	&(table)->inpt_porthashtbl[ntohs(lport) & (table)->inpt_porthash]
    146 #define	INPCBHASH_BIND(table, laddr, lport) \
    147 	&(table)->inpt_bindhashtbl[ \
    148 	    ((ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_bindhash]
    149 #define	INPCBHASH_CONNECT(table, faddr, fport, laddr, lport) \
    150 	&(table)->inpt_connecthashtbl[ \
    151 	    ((ntohl((faddr).s_addr) + ntohs(fport)) + \
    152 	     (ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_connecthash]
    153 
    154 int	anonportmin = IPPORT_ANONMIN;
    155 int	anonportmax = IPPORT_ANONMAX;
    156 int	lowportmin  = IPPORT_RESERVEDMIN;
    157 int	lowportmax  = IPPORT_RESERVEDMAX;
    158 
    159 static pool_cache_t	in4pcb_pool_cache;
    160 #ifdef INET6
    161 static pool_cache_t	in6pcb_pool_cache;
    162 #endif
    163 
    164 static int
    165 inpcb_poolinit(void)
    166 {
    167 
    168 	in4pcb_pool_cache = pool_cache_init(sizeof(struct in4pcb), coherency_unit,
    169 	    0, 0, "in4pcbpl", NULL, IPL_NET, NULL, NULL, NULL);
    170 #ifdef INET6
    171 	in6pcb_pool_cache = pool_cache_init(sizeof(struct in6pcb), coherency_unit,
    172 	    0, 0, "in6pcbpl", NULL, IPL_NET, NULL, NULL, NULL);
    173 #endif
    174 	return 0;
    175 }
    176 
    177 void
    178 inpcb_init(struct inpcbtable *table, int bindhashsize, int connecthashsize)
    179 {
    180 	static ONCE_DECL(control);
    181 
    182 	TAILQ_INIT(&table->inpt_queue);
    183 	table->inpt_porthashtbl = hashinit(bindhashsize, HASH_LIST, true,
    184 	    &table->inpt_porthash);
    185 	table->inpt_bindhashtbl = hashinit(bindhashsize, HASH_LIST, true,
    186 	    &table->inpt_bindhash);
    187 	table->inpt_connecthashtbl = hashinit(connecthashsize, HASH_LIST, true,
    188 	    &table->inpt_connecthash);
    189 	table->inpt_lastlow = IPPORT_RESERVEDMAX;
    190 	table->inpt_lastport = (in_port_t)anonportmax;
    191 
    192 	RUN_ONCE(&control, inpcb_poolinit);
    193 }
    194 
    195 int
    196 inpcb_create(struct socket *so, void *v)
    197 {
    198 	struct inpcbtable *table = v;
    199 	struct inpcb *inp;
    200 	int s;
    201 
    202 #ifdef INET6
    203 	KASSERT(soaf(so) == AF_INET || soaf(so) == AF_INET6);
    204 
    205 	if (soaf(so) == AF_INET)
    206 		inp = pool_cache_get(in4pcb_pool_cache, PR_NOWAIT);
    207 	else
    208 		inp = pool_cache_get(in6pcb_pool_cache, PR_NOWAIT);
    209 #else
    210 	KASSERT(soaf(so) == AF_INET);
    211 	inp = pool_cache_get(in4pcb_pool_cache, PR_NOWAIT);
    212 #endif
    213 	if (inp == NULL)
    214 		return ENOBUFS;
    215 	if (soaf(so) == AF_INET)
    216 		memset(inp, 0, sizeof(struct in4pcb));
    217 #ifdef INET6
    218 	else
    219 		memset(inp, 0, sizeof(struct in6pcb));
    220 #endif
    221 	inp->inp_af = soaf(so);
    222 	inp->inp_table = table;
    223 	inp->inp_socket = so;
    224 	inp->inp_portalgo = PORTALGO_DEFAULT;
    225 	inp->inp_bindportonsend = false;
    226 
    227 	if (inp->inp_af == AF_INET) {
    228 		in4p_errormtu(inp) = -1;
    229 		in4p_prefsrcip(inp).s_addr = INADDR_ANY;
    230 	}
    231 #ifdef INET6
    232 	else {
    233 		in6p_hops6(inp) = -1;	/* use kernel default */
    234 		if (ip6_v6only)
    235 			inp->inp_flags |= IN6P_IPV6_V6ONLY;
    236 	}
    237 #endif
    238 #if defined(IPSEC)
    239 	if (ipsec_enabled) {
    240 		int error = ipsec_init_pcbpolicy(so, &inp->inp_sp);
    241 		if (error != 0) {
    242 #ifdef INET6
    243 			if (inp->inp_af == AF_INET)
    244 				pool_cache_put(in4pcb_pool_cache, inp);
    245 			else
    246 				pool_cache_put(in6pcb_pool_cache, inp);
    247 #else
    248 			KASSERT(inp->inp_af == AF_INET);
    249 			pool_cache_put(in4pcb_pool_cache, inp);
    250 #endif
    251 			return error;
    252 		}
    253 		inp->inp_sp->sp_inp = inp;
    254 	}
    255 #endif
    256 	so->so_pcb = inp;
    257 	s = splsoftnet();
    258 	TAILQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue);
    259 	LIST_INSERT_HEAD(INPCBHASH_PORT(table, inp->inp_lport), inp,
    260 	    inp_lhash);
    261 	inpcb_set_state(inp, INP_ATTACHED);
    262 	splx(s);
    263 	return 0;
    264 }
    265 
    266 static int
    267 inpcb_set_port(struct sockaddr_in *sin, struct inpcb *inp, kauth_cred_t cred)
    268 {
    269 	struct inpcbtable *table = inp->inp_table;
    270 	struct socket *so = inp->inp_socket;
    271 	in_port_t *lastport;
    272 	in_port_t lport = 0;
    273 	enum kauth_network_req req;
    274 	int error;
    275 
    276 	if (inp->inp_flags & INP_LOWPORT) {
    277 #ifndef IPNOPRIVPORTS
    278 		req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
    279 #else
    280 		req = KAUTH_REQ_NETWORK_BIND_PORT;
    281 #endif
    282 
    283 		lastport = &table->inpt_lastlow;
    284 	} else {
    285 		req = KAUTH_REQ_NETWORK_BIND_PORT;
    286 
    287 		lastport = &table->inpt_lastport;
    288 	}
    289 
    290 	/* XXX-kauth: KAUTH_REQ_NETWORK_BIND_AUTOASSIGN_{,PRIV}PORT */
    291 	error = kauth_authorize_network(cred, KAUTH_NETWORK_BIND, req, so, sin,
    292 	    NULL);
    293 	if (error)
    294 		return EACCES;
    295 
    296        /*
    297         * Use RFC6056 randomized port selection
    298         */
    299 	error = portalgo_randport(&lport, inp, cred);
    300 	if (error)
    301 		return error;
    302 
    303 	inp->inp_flags |= INP_ANONPORT;
    304 	*lastport = lport;
    305 	lport = htons(lport);
    306 	inp->inp_lport = lport;
    307 	inpcb_set_state(inp, INP_BOUND);
    308 
    309 	return 0;
    310 }
    311 
    312 int
    313 inpcb_bindableaddr(const struct inpcb *inp, struct sockaddr_in *sin,
    314     kauth_cred_t cred)
    315 {
    316 	int error = EADDRNOTAVAIL;
    317 	struct ifaddr *ifa = NULL;
    318 	int s;
    319 
    320 	if (sin->sin_family != AF_INET)
    321 		return EAFNOSUPPORT;
    322 
    323 	s = pserialize_read_enter();
    324 	if (IN_MULTICAST(sin->sin_addr.s_addr)) {
    325 		/* Always succeed; port reuse handled in inpcb_bind_port(). */
    326 	} else if (!in_nullhost(sin->sin_addr)) {
    327 		struct in_ifaddr *ia;
    328 
    329 		ia = in_get_ia(sin->sin_addr);
    330 		/* check for broadcast addresses */
    331 		if (ia == NULL) {
    332 			ifa = ifa_ifwithaddr(sintosa(sin));
    333 			if (ifa != NULL)
    334 				ia = ifatoia(ifa);
    335 			else if ((inp->inp_flags & INP_BINDANY) != 0) {
    336 				error = 0;
    337 				goto error;
    338 			}
    339 		}
    340 		if (ia == NULL)
    341 			goto error;
    342 		if (ia->ia4_flags & IN_IFF_DUPLICATED)
    343 			goto error;
    344 	}
    345 	error = 0;
    346  error:
    347 	pserialize_read_exit(s);
    348 	return error;
    349 }
    350 
    351 static int
    352 inpcb_bind_addr(struct inpcb *inp, struct sockaddr_in *sin, kauth_cred_t cred)
    353 {
    354 	int error;
    355 
    356 	error = inpcb_bindableaddr(inp, sin, cred);
    357 	if (error == 0)
    358 		in4p_laddr(inp) = sin->sin_addr;
    359 	return error;
    360 }
    361 
    362 static int
    363 inpcb_bind_port(struct inpcb *inp, struct sockaddr_in *sin, kauth_cred_t cred)
    364 {
    365 	struct inpcbtable *table = inp->inp_table;
    366 	struct socket *so = inp->inp_socket;
    367 	int reuseport = (so->so_options & SO_REUSEPORT);
    368 	int wild = 0, error;
    369 
    370 	if (IN_MULTICAST(sin->sin_addr.s_addr)) {
    371 		/*
    372 		 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
    373 		 * allow complete duplication of binding if
    374 		 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
    375 		 * and a multicast address is bound on both
    376 		 * new and duplicated sockets.
    377 		 */
    378 		if (so->so_options & (SO_REUSEADDR | SO_REUSEPORT))
    379 			reuseport = SO_REUSEADDR|SO_REUSEPORT;
    380 	}
    381 
    382 	if (sin->sin_port == 0) {
    383 		error = inpcb_set_port(sin, inp, cred);
    384 		if (error)
    385 			return error;
    386 	} else {
    387 		struct inpcb *t;
    388 		vestigial_inpcb_t vestige;
    389 #ifdef INET6
    390 		struct inpcb *t6;
    391 		struct in6_addr mapped;
    392 #endif
    393 		enum kauth_network_req req;
    394 
    395 		if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
    396 			wild = 1;
    397 
    398 #ifndef IPNOPRIVPORTS
    399 		if (ntohs(sin->sin_port) < IPPORT_RESERVED)
    400 			req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
    401 		else
    402 #endif /* !IPNOPRIVPORTS */
    403 			req = KAUTH_REQ_NETWORK_BIND_PORT;
    404 
    405 		error = kauth_authorize_network(cred, KAUTH_NETWORK_BIND, req,
    406 		    so, sin, NULL);
    407 		if (error)
    408 			return EACCES;
    409 
    410 #ifdef INET6
    411 		in6_in_2_v4mapin6(&sin->sin_addr, &mapped);
    412 		t6 = in6pcb_lookup_local(table, &mapped, sin->sin_port, wild, &vestige);
    413 		if (t6 && (reuseport & t6->inp_socket->so_options) == 0)
    414 			return EADDRINUSE;
    415 		if (!t6 && vestige.valid) {
    416 		    if (!!reuseport != !!vestige.reuse_port) {
    417 			return EADDRINUSE;
    418 		    }
    419 		}
    420 #endif
    421 
    422 		/* XXX-kauth */
    423 		if (so->so_uidinfo->ui_uid && !IN_MULTICAST(sin->sin_addr.s_addr)) {
    424 			t = inpcb_lookup_local(table, sin->sin_addr, sin->sin_port, 1, &vestige);
    425 			/*
    426 			 * XXX:	investigate ramifications of loosening this
    427 			 *	restriction so that as long as both ports have
    428 			 *	SO_REUSEPORT allow the bind
    429 			 */
    430 			if (t &&
    431 			    (!in_nullhost(sin->sin_addr) ||
    432 			     !in_nullhost(in4p_laddr(t)) ||
    433 			     (t->inp_socket->so_options & SO_REUSEPORT) == 0)
    434 			    && (so->so_uidinfo->ui_uid != t->inp_socket->so_uidinfo->ui_uid)) {
    435 				return EADDRINUSE;
    436 			}
    437 			if (!t && vestige.valid) {
    438 				if ((!in_nullhost(sin->sin_addr)
    439 				     || !in_nullhost(vestige.laddr.v4)
    440 				     || !vestige.reuse_port)
    441 				    && so->so_uidinfo->ui_uid != vestige.uid) {
    442 					return EADDRINUSE;
    443 				}
    444 			}
    445 		}
    446 		t = inpcb_lookup_local(table, sin->sin_addr, sin->sin_port, wild, &vestige);
    447 		if (t && (reuseport & t->inp_socket->so_options) == 0)
    448 			return EADDRINUSE;
    449 		if (!t
    450 		    && vestige.valid
    451 		    && !(reuseport && vestige.reuse_port))
    452 			return EADDRINUSE;
    453 
    454 		inp->inp_lport = sin->sin_port;
    455 		inpcb_set_state(inp, INP_BOUND);
    456 	}
    457 
    458 	LIST_REMOVE(inp, inp_lhash);
    459 	LIST_INSERT_HEAD(INPCBHASH_PORT(table, inp->inp_lport), inp,
    460 	    inp_lhash);
    461 
    462 	return 0;
    463 }
    464 
    465 int
    466 inpcb_bind(void *v, struct sockaddr_in *sin, struct lwp *l)
    467 {
    468 	struct inpcb *inp = v;
    469 	struct sockaddr_in lsin;
    470 	int error;
    471 
    472 	if (inp->inp_af != AF_INET)
    473 		return EINVAL;
    474 
    475 	if (inp->inp_lport || !in_nullhost(in4p_laddr(inp)))
    476 		return EINVAL;
    477 
    478 	if (NULL != sin) {
    479 		if (sin->sin_len != sizeof(*sin))
    480 			return EINVAL;
    481 	} else {
    482 		lsin = *((const struct sockaddr_in *)
    483 		    inp->inp_socket->so_proto->pr_domain->dom_sa_any);
    484 		sin = &lsin;
    485 	}
    486 
    487 	/* Bind address. */
    488 	error = inpcb_bind_addr(inp, sin, l->l_cred);
    489 	if (error)
    490 		return error;
    491 
    492 	/* Bind port. */
    493 	error = inpcb_bind_port(inp, sin, l->l_cred);
    494 	if (error) {
    495 		in4p_laddr(inp).s_addr = INADDR_ANY;
    496 
    497 		return error;
    498 	}
    499 
    500 	return 0;
    501 }
    502 
    503 /*
    504  * Connect from a socket to a specified address.
    505  * Both address and port must be specified in argument sin.
    506  * If don't have a local address for this socket yet,
    507  * then pick one.
    508  */
    509 int
    510 inpcb_connect(void *v, struct sockaddr_in *sin, struct lwp *l)
    511 {
    512 	struct inpcb *inp = v;
    513 	vestigial_inpcb_t vestige;
    514 	int error;
    515 	struct in_addr laddr;
    516 
    517 	if (inp->inp_af != AF_INET)
    518 		return EINVAL;
    519 
    520 	if (sin->sin_len != sizeof (*sin))
    521 		return EINVAL;
    522 	if (sin->sin_family != AF_INET)
    523 		return EAFNOSUPPORT;
    524 	if (sin->sin_port == 0)
    525 		return EADDRNOTAVAIL;
    526 
    527 	if (IN_MULTICAST(sin->sin_addr.s_addr) &&
    528 	    inp->inp_socket->so_type == SOCK_STREAM)
    529 		return EADDRNOTAVAIL;
    530 
    531 	if (!IN_ADDRLIST_READER_EMPTY()) {
    532 		/*
    533 		 * If the destination address is INADDR_ANY,
    534 		 * use any local address (likely loopback).
    535 		 * If the supplied address is INADDR_BROADCAST,
    536 		 * use the broadcast address of an interface
    537 		 * which supports broadcast. (loopback does not)
    538 		 */
    539 
    540 		if (in_nullhost(sin->sin_addr)) {
    541 			/* XXX racy */
    542 			sin->sin_addr =
    543 			    IN_ADDRLIST_READER_FIRST()->ia_addr.sin_addr;
    544 		} else if (sin->sin_addr.s_addr == INADDR_BROADCAST) {
    545 			struct in_ifaddr *ia;
    546 			int s = pserialize_read_enter();
    547 			IN_ADDRLIST_READER_FOREACH(ia) {
    548 				if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
    549 					sin->sin_addr =
    550 					    ia->ia_broadaddr.sin_addr;
    551 					break;
    552 				}
    553 			}
    554 			pserialize_read_exit(s);
    555 		}
    556 	}
    557 	/*
    558 	 * If we haven't bound which network number to use as ours,
    559 	 * we will use the number of the outgoing interface.
    560 	 * This depends on having done a routing lookup, which
    561 	 * we will probably have to do anyway, so we might
    562 	 * as well do it now.  On the other hand if we are
    563 	 * sending to multiple destinations we may have already
    564 	 * done the lookup, so see if we can use the route
    565 	 * from before.  In any case, we only
    566 	 * chose a port number once, even if sending to multiple
    567 	 * destinations.
    568 	 */
    569 	if (in_nullhost(in4p_laddr(inp))) {
    570 		int xerror;
    571 		struct in_ifaddr *ia, *_ia;
    572 		int s;
    573 		struct psref psref;
    574 		int bound;
    575 
    576 		bound = curlwp_bind();
    577 		ia = in_selectsrc(sin, &inp->inp_route,
    578 		    inp->inp_socket->so_options, inp->inp_moptions, &xerror,
    579 		    &psref);
    580 		if (ia == NULL) {
    581 			curlwp_bindx(bound);
    582 			if (xerror == 0)
    583 				xerror = EADDRNOTAVAIL;
    584 			return xerror;
    585 		}
    586 		s = pserialize_read_enter();
    587 		_ia = in_get_ia(IA_SIN(ia)->sin_addr);
    588 		if (_ia == NULL && (inp->inp_flags & INP_BINDANY) == 0) {
    589 			pserialize_read_exit(s);
    590 			ia4_release(ia, &psref);
    591 			curlwp_bindx(bound);
    592 			return EADDRNOTAVAIL;
    593 		}
    594 		pserialize_read_exit(s);
    595 		laddr = IA_SIN(ia)->sin_addr;
    596 		ia4_release(ia, &psref);
    597 		curlwp_bindx(bound);
    598 	} else
    599 		laddr = in4p_laddr(inp);
    600 	if (inpcb_lookup(inp->inp_table, sin->sin_addr, sin->sin_port,
    601 	                         laddr, inp->inp_lport, &vestige) != NULL ||
    602 	    vestige.valid) {
    603 		return EADDRINUSE;
    604 	}
    605 	if (in_nullhost(in4p_laddr(inp))) {
    606 		if (inp->inp_lport == 0) {
    607 			error = inpcb_bind(inp, NULL, l);
    608 			/*
    609 			 * This used to ignore the return value
    610 			 * completely, but we need to check for
    611 			 * ephemeral port shortage.
    612 			 * And attempts to request low ports if not root.
    613 			 */
    614 			if (error != 0)
    615 				return error;
    616 		}
    617 		in4p_laddr(inp) = laddr;
    618 	}
    619 	in4p_faddr(inp) = sin->sin_addr;
    620 	inp->inp_fport = sin->sin_port;
    621 
    622 	/* Late bind, if needed */
    623 	if (inp->inp_bindportonsend) {
    624 		struct sockaddr_in lsin = *((const struct sockaddr_in *)
    625 		    inp->inp_socket->so_proto->pr_domain->dom_sa_any);
    626 		lsin.sin_addr = in4p_laddr(inp);
    627 		lsin.sin_port = 0;
    628 
    629 		if ((error = inpcb_bind_port(inp, &lsin, l->l_cred)) != 0)
    630 			return error;
    631 	}
    632 
    633 	inpcb_set_state(inp, INP_CONNECTED);
    634 #if defined(IPSEC)
    635 	if (ipsec_enabled && inp->inp_socket->so_type == SOCK_STREAM)
    636 		ipsec_pcbconn(inp->inp_sp);
    637 #endif
    638 	return 0;
    639 }
    640 
    641 void
    642 inpcb_disconnect(void *v)
    643 {
    644 	struct inpcb *inp = v;
    645 
    646 	if (inp->inp_af != AF_INET)
    647 		return;
    648 
    649 	in4p_faddr(inp) = zeroin_addr;
    650 	inp->inp_fport = 0;
    651 	inpcb_set_state(inp, INP_BOUND);
    652 #if defined(IPSEC)
    653 	if (ipsec_enabled)
    654 		ipsec_pcbdisconn(inp->inp_sp);
    655 #endif
    656 	if (inp->inp_socket->so_state & SS_NOFDREF)
    657 		inpcb_destroy(inp);
    658 }
    659 
    660 void
    661 inpcb_destroy(void *v)
    662 {
    663 	struct inpcb *inp = v;
    664 	struct socket *so = inp->inp_socket;
    665 	int s;
    666 
    667 	KASSERT(inp->inp_af == AF_INET || inp->inp_af == AF_INET6);
    668 
    669 #if defined(IPSEC)
    670 	if (ipsec_enabled)
    671 		ipsec_delete_pcbpolicy(inp);
    672 #endif
    673 	so->so_pcb = NULL;
    674 
    675 	s = splsoftnet();
    676 	inpcb_set_state(inp, INP_ATTACHED);
    677 	LIST_REMOVE(inp, inp_lhash);
    678 	TAILQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue);
    679 	splx(s);
    680 
    681 	if (inp->inp_options) {
    682 		m_free(inp->inp_options);
    683 	}
    684 	rtcache_free(&inp->inp_route);
    685 	ip_freemoptions(inp->inp_moptions);
    686 #ifdef INET6
    687 	if (inp->inp_af == AF_INET6) {
    688 		if (in6p_outputopts(inp) != NULL) {
    689 			ip6_clearpktopts(in6p_outputopts(inp), -1);
    690 			free(in6p_outputopts(inp), M_IP6OPT);
    691 		}
    692 		ip6_freemoptions(in6p_moptions(inp));
    693 	}
    694 #endif
    695 	sofree(so);			/* drops the socket's lock */
    696 
    697 #ifdef INET6
    698 	if (inp->inp_af == AF_INET)
    699 		pool_cache_put(in4pcb_pool_cache, inp);
    700 	else
    701 		pool_cache_put(in6pcb_pool_cache, inp);
    702 #else
    703 	KASSERT(inp->inp_af == AF_INET);
    704 	pool_cache_put(in4pcb_pool_cache, inp);
    705 #endif
    706 	mutex_enter(softnet_lock);	/* reacquire the softnet_lock */
    707 }
    708 
    709 void
    710 inpcb_fetch_sockaddr(struct inpcb *inp, struct sockaddr_in *sin)
    711 {
    712 
    713 	if (inp->inp_af != AF_INET)
    714 		return;
    715 
    716 	sockaddr_in_init(sin, &in4p_laddr(inp), inp->inp_lport);
    717 }
    718 
    719 void
    720 inpcb_fetch_peeraddr(struct inpcb *inp, struct sockaddr_in *sin)
    721 {
    722 
    723 	if (inp->inp_af != AF_INET)
    724 		return;
    725 
    726 	sockaddr_in_init(sin, &in4p_faddr(inp), inp->inp_fport);
    727 }
    728 
    729 /*
    730  * Pass some notification to all connections of a protocol
    731  * associated with address dst.  The local address and/or port numbers
    732  * may be specified to limit the search.  The "usual action" will be
    733  * taken, depending on the ctlinput cmd.  The caller must filter any
    734  * cmds that are uninteresting (e.g., no error in the map).
    735  * Call the protocol specific routine (if any) to report
    736  * any errors for each matching socket.
    737  *
    738  * Must be called at splsoftnet.
    739  */
    740 int
    741 inpcb_notify(struct inpcbtable *table, struct in_addr faddr, u_int fport_arg,
    742     struct in_addr laddr, u_int lport_arg, int errno,
    743     void (*notify)(struct inpcb *, int))
    744 {
    745 	struct inpcbhead *head;
    746 	struct inpcb *inp;
    747 	in_port_t fport = fport_arg, lport = lport_arg;
    748 	int nmatch;
    749 
    750 	if (in_nullhost(faddr) || notify == NULL)
    751 		return 0;
    752 
    753 	nmatch = 0;
    754 	head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
    755 	LIST_FOREACH(inp, head, inp_hash) {
    756 		if (inp->inp_af != AF_INET)
    757 			continue;
    758 
    759 		if (in_hosteq(in4p_faddr(inp), faddr) &&
    760 		    inp->inp_fport == fport &&
    761 		    inp->inp_lport == lport &&
    762 		    in_hosteq(in4p_laddr(inp), laddr)) {
    763 			(*notify)(inp, errno);
    764 			nmatch++;
    765 		}
    766 	}
    767 	return nmatch;
    768 }
    769 
    770 void
    771 inpcb_notifyall(struct inpcbtable *table, struct in_addr faddr, int errno,
    772     void (*notify)(struct inpcb *, int))
    773 {
    774 	struct inpcb *inp;
    775 
    776 	if (in_nullhost(faddr) || notify == NULL)
    777 		return;
    778 
    779 	TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
    780 		if (inp->inp_af != AF_INET)
    781 			continue;
    782 		if (in_hosteq(in4p_faddr(inp), faddr))
    783 			(*notify)(inp, errno);
    784 	}
    785 }
    786 
    787 void
    788 in_purgeifmcast(struct ip_moptions *imo, struct ifnet *ifp)
    789 {
    790 	int i, gap;
    791 
    792 	/* The owner of imo should be protected by solock */
    793 	KASSERT(ifp != NULL);
    794 
    795 	if (imo == NULL)
    796 		return;
    797 
    798 	/*
    799 	 * Unselect the outgoing interface if it is being
    800 	 * detached.
    801 	 */
    802 	if (imo->imo_multicast_if_index == ifp->if_index)
    803 		imo->imo_multicast_if_index = 0;
    804 
    805 	/*
    806 	 * Drop multicast group membership if we joined
    807 	 * through the interface being detached.
    808 	 */
    809 	for (i = 0, gap = 0; i < imo->imo_num_memberships; i++) {
    810 		if (imo->imo_membership[i]->inm_ifp == ifp) {
    811 			in_delmulti(imo->imo_membership[i]);
    812 			gap++;
    813 		} else if (gap != 0)
    814 			imo->imo_membership[i - gap] = imo->imo_membership[i];
    815 	}
    816 	imo->imo_num_memberships -= gap;
    817 }
    818 
    819 void
    820 inpcb_purgeif0(struct inpcbtable *table, struct ifnet *ifp)
    821 {
    822 	struct inpcb *inp;
    823 
    824 	TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
    825 		bool need_unlock = false;
    826 
    827 		if (inp->inp_af != AF_INET)
    828 			continue;
    829 
    830 		/* The caller holds either one of inps' lock */
    831 		if (!inp_locked(inp)) {
    832 			inp_lock(inp);
    833 			need_unlock = true;
    834 		}
    835 
    836 		in_purgeifmcast(inp->inp_moptions, ifp);
    837 
    838 		if (need_unlock)
    839 			inp_unlock(inp);
    840 	}
    841 }
    842 
    843 void
    844 inpcb_purgeif(struct inpcbtable *table, struct ifnet *ifp)
    845 {
    846 	struct rtentry *rt;
    847 	struct inpcb *inp;
    848 
    849 	TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
    850 		if (inp->inp_af != AF_INET)
    851 			continue;
    852 		if ((rt = rtcache_validate(&inp->inp_route)) != NULL &&
    853 		    rt->rt_ifp == ifp) {
    854 			rtcache_unref(rt, &inp->inp_route);
    855 			inpcb_rtchange(inp, 0);
    856 		} else
    857 			rtcache_unref(rt, &inp->inp_route);
    858 	}
    859 }
    860 
    861 /*
    862  * Check for alternatives when higher level complains
    863  * about service problems.  For now, invalidate cached
    864  * routing information.  If the route was created dynamically
    865  * (by a redirect), time to try a default gateway again.
    866  */
    867 void
    868 inpcb_losing(struct inpcb *inp)
    869 {
    870 	struct rtentry *rt;
    871 	struct rt_addrinfo info;
    872 
    873 	if (inp->inp_af != AF_INET)
    874 		return;
    875 
    876 	if ((rt = rtcache_validate(&inp->inp_route)) == NULL)
    877 		return;
    878 
    879 	memset(&info, 0, sizeof(info));
    880 	info.rti_info[RTAX_DST] = rtcache_getdst(&inp->inp_route);
    881 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
    882 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
    883 	rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
    884 	if (rt->rt_flags & RTF_DYNAMIC) {
    885 		int error;
    886 		struct rtentry *nrt;
    887 
    888 		error = rtrequest(RTM_DELETE, rt_getkey(rt),
    889 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, &nrt);
    890 		rtcache_unref(rt, &inp->inp_route);
    891 		if (error == 0) {
    892 			rt_newmsg_dynamic(RTM_DELETE, nrt);
    893 			rt_free(nrt);
    894 		}
    895 	} else
    896 		rtcache_unref(rt, &inp->inp_route);
    897 	/*
    898 	 * A new route can be allocated
    899 	 * the next time output is attempted.
    900 	 */
    901 	rtcache_free(&inp->inp_route);
    902 }
    903 
    904 /*
    905  * After a routing change, flush old routing.  A new route can be
    906  * allocated the next time output is attempted.
    907  */
    908 void
    909 inpcb_rtchange(struct inpcb *inp, int errno)
    910 {
    911 
    912 	if (inp->inp_af != AF_INET)
    913 		return;
    914 
    915 	rtcache_free(&inp->inp_route);
    916 
    917 	/* XXX SHOULD NOTIFY HIGHER-LEVEL PROTOCOLS */
    918 }
    919 
    920 struct inpcb *
    921 inpcb_lookup_local(struct inpcbtable *table, struct in_addr laddr,
    922 		  u_int lport_arg, int lookup_wildcard, vestigial_inpcb_t *vp)
    923 {
    924 	struct inpcbhead *head;
    925 	struct inpcb *inp;
    926 	struct inpcb *match = NULL;
    927 	int matchwild = 3;
    928 	int wildcard;
    929 	in_port_t lport = lport_arg;
    930 
    931 	if (vp)
    932 		vp->valid = 0;
    933 
    934 	head = INPCBHASH_PORT(table, lport);
    935 	LIST_FOREACH(inp, head, inp_lhash) {
    936 		if (inp->inp_af != AF_INET)
    937 			continue;
    938 		if (inp->inp_lport != lport)
    939 			continue;
    940 		/*
    941 		 * check if inp's faddr and laddr match with ours.
    942 		 * our faddr is considered null.
    943 		 * count the number of wildcard matches. (0 - 2)
    944 		 *
    945 		 *	null	null	match
    946 		 *	A	null	wildcard match
    947 		 *	null	B	wildcard match
    948 		 *	A	B	non match
    949 		 *	A	A	match
    950 		 */
    951 		wildcard = 0;
    952 		if (!in_nullhost(in4p_faddr(inp)))
    953 			wildcard++;
    954 		if (in_nullhost(in4p_laddr(inp))) {
    955 			if (!in_nullhost(laddr))
    956 				wildcard++;
    957 		} else {
    958 			if (in_nullhost(laddr))
    959 				wildcard++;
    960 			else {
    961 				if (!in_hosteq(in4p_laddr(inp), laddr))
    962 					continue;
    963 			}
    964 		}
    965 		if (wildcard && !lookup_wildcard)
    966 			continue;
    967 		/*
    968 		 * prefer an address with less wildcards.
    969 		 */
    970 		if (wildcard < matchwild) {
    971 			match = inp;
    972 			matchwild = wildcard;
    973 			if (matchwild == 0)
    974 				break;
    975 		}
    976 	}
    977 	if (match && matchwild == 0)
    978 		return match;
    979 
    980 	if (vp && table->vestige) {
    981 		void	*state = (*table->vestige->init_ports4)(laddr, lport_arg, lookup_wildcard);
    982 		vestigial_inpcb_t better;
    983 		bool has_better = false;
    984 
    985 		while (table->vestige
    986 		       && (*table->vestige->next_port4)(state, vp)) {
    987 
    988 			if (vp->lport != lport)
    989 				continue;
    990 			wildcard = 0;
    991 			if (!in_nullhost(vp->faddr.v4))
    992 				wildcard++;
    993 			if (in_nullhost(vp->laddr.v4)) {
    994 				if (!in_nullhost(laddr))
    995 					wildcard++;
    996 			} else {
    997 				if (in_nullhost(laddr))
    998 					wildcard++;
    999 				else {
   1000 					if (!in_hosteq(vp->laddr.v4, laddr))
   1001 						continue;
   1002 				}
   1003 			}
   1004 			if (wildcard && !lookup_wildcard)
   1005 				continue;
   1006 			if (wildcard < matchwild) {
   1007 				better = *vp;
   1008 				has_better = true;
   1009 
   1010 				matchwild = wildcard;
   1011 				if (matchwild == 0)
   1012 					break;
   1013 			}
   1014 		}
   1015 
   1016 		if (has_better) {
   1017 			*vp = better;
   1018 			return 0;
   1019 		}
   1020 	}
   1021 
   1022 	return match;
   1023 }
   1024 
   1025 #ifdef DIAGNOSTIC
   1026 int	inpcb_notifymiss = 0;
   1027 #endif
   1028 
   1029 struct inpcb *
   1030 inpcb_lookup(struct inpcbtable *table,
   1031     struct in_addr faddr, u_int fport_arg,
   1032     struct in_addr laddr, u_int lport_arg,
   1033     vestigial_inpcb_t *vp)
   1034 {
   1035 	struct inpcbhead *head;
   1036 	struct inpcb *inp;
   1037 	in_port_t fport = fport_arg, lport = lport_arg;
   1038 
   1039 	if (vp)
   1040 		vp->valid = 0;
   1041 
   1042 	head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
   1043 	LIST_FOREACH(inp, head, inp_hash) {
   1044 		if (inp->inp_af != AF_INET)
   1045 			continue;
   1046 
   1047 		if (in_hosteq(in4p_faddr(inp), faddr) &&
   1048 		    inp->inp_fport == fport &&
   1049 		    inp->inp_lport == lport &&
   1050 		    in_hosteq(in4p_laddr(inp), laddr))
   1051 			goto out;
   1052 	}
   1053 	if (vp && table->vestige) {
   1054 		if ((*table->vestige->lookup4)(faddr, fport_arg,
   1055 					       laddr, lport_arg, vp))
   1056 			return 0;
   1057 	}
   1058 
   1059 #ifdef DIAGNOSTIC
   1060 	if (inpcb_notifymiss) {
   1061 		printf("inpcb_lookup: faddr=%08x fport=%d laddr=%08x lport=%d\n",
   1062 		    ntohl(faddr.s_addr), ntohs(fport),
   1063 		    ntohl(laddr.s_addr), ntohs(lport));
   1064 	}
   1065 #endif
   1066 	return 0;
   1067 
   1068 out:
   1069 	/* Move this PCB to the head of hash chain. */
   1070 	if (inp != LIST_FIRST(head)) {
   1071 		LIST_REMOVE(inp, inp_hash);
   1072 		LIST_INSERT_HEAD(head, inp, inp_hash);
   1073 	}
   1074 	return inp;
   1075 }
   1076 
   1077 struct inpcb *
   1078 inpcb_lookup_bound(struct inpcbtable *table,
   1079     struct in_addr laddr, u_int lport_arg)
   1080 {
   1081 	struct inpcbhead *head;
   1082 	struct inpcb *inp;
   1083 	in_port_t lport = lport_arg;
   1084 
   1085 	head = INPCBHASH_BIND(table, laddr, lport);
   1086 	LIST_FOREACH(inp, head, inp_hash) {
   1087 		if (inp->inp_af != AF_INET)
   1088 			continue;
   1089 
   1090 		if (inp->inp_lport == lport &&
   1091 		    in_hosteq(in4p_laddr(inp), laddr))
   1092 			goto out;
   1093 	}
   1094 	head = INPCBHASH_BIND(table, zeroin_addr, lport);
   1095 	LIST_FOREACH(inp, head, inp_hash) {
   1096 		if (inp->inp_af != AF_INET)
   1097 			continue;
   1098 
   1099 		if (inp->inp_lport == lport &&
   1100 		    in_hosteq(in4p_laddr(inp), zeroin_addr))
   1101 			goto out;
   1102 	}
   1103 #ifdef DIAGNOSTIC
   1104 	if (inpcb_notifymiss) {
   1105 		printf("inpcb_lookup_bound: laddr=%08x lport=%d\n",
   1106 		    ntohl(laddr.s_addr), ntohs(lport));
   1107 	}
   1108 #endif
   1109 	return 0;
   1110 
   1111 out:
   1112 	/* Move this PCB to the head of hash chain. */
   1113 	if (inp != LIST_FIRST(head)) {
   1114 		LIST_REMOVE(inp, inp_hash);
   1115 		LIST_INSERT_HEAD(head, inp, inp_hash);
   1116 	}
   1117 	return inp;
   1118 }
   1119 
   1120 void
   1121 inpcb_set_state(struct inpcb *inp, int state)
   1122 {
   1123 
   1124 #ifdef INET6
   1125 	if (inp->inp_af == AF_INET6) {
   1126 		in6pcb_set_state(inp, state);
   1127 		return;
   1128 	}
   1129 #else
   1130 	if (inp->inp_af != AF_INET)
   1131 		return;
   1132 #endif
   1133 
   1134 	if (inp->inp_state > INP_ATTACHED)
   1135 		LIST_REMOVE(inp, inp_hash);
   1136 
   1137 	switch (state) {
   1138 	case INP_BOUND:
   1139 		LIST_INSERT_HEAD(INPCBHASH_BIND(inp->inp_table,
   1140 		    in4p_laddr(inp), inp->inp_lport), inp,
   1141 		    inp_hash);
   1142 		break;
   1143 	case INP_CONNECTED:
   1144 		LIST_INSERT_HEAD(INPCBHASH_CONNECT(inp->inp_table,
   1145 		    in4p_faddr(inp), inp->inp_fport,
   1146 		    in4p_laddr(inp), inp->inp_lport), inp,
   1147 		    inp_hash);
   1148 		break;
   1149 	}
   1150 
   1151 	inp->inp_state = state;
   1152 }
   1153 
   1154 struct rtentry *
   1155 inpcb_rtentry(struct inpcb *inp)
   1156 {
   1157 	struct route *ro;
   1158 	union {
   1159 		struct sockaddr		dst;
   1160 		struct sockaddr_in	dst4;
   1161 	} u;
   1162 
   1163 #ifdef INET6
   1164 	if (inp->inp_af == AF_INET6)
   1165 		return in6pcb_rtentry(inp);
   1166 #endif
   1167 	if (inp->inp_af != AF_INET)
   1168 		return NULL;
   1169 
   1170 	ro = &inp->inp_route;
   1171 
   1172 	sockaddr_in_init(&u.dst4, &in4p_faddr(inp), 0);
   1173 	return rtcache_lookup(ro, &u.dst);
   1174 }
   1175 
   1176 void
   1177 inpcb_rtentry_unref(struct rtentry *rt, struct inpcb *inp)
   1178 {
   1179 
   1180 	rtcache_unref(rt, &inp->inp_route);
   1181 }
   1182