Home | History | Annotate | Line # | Download | only in netmgr
socket.c revision 1.2
      1 /*	$NetBSD: socket.c,v 1.2 2025/01/26 16:25:43 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  *
      6  * SPDX-License-Identifier: MPL-2.0
      7  *
      8  * This Source Code Form is subject to the terms of the Mozilla Public
      9  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  *
     12  * See the COPYRIGHT file distributed with this work for additional
     13  * information regarding copyright ownership.
     14  */
     15 
     16 #include <isc/errno.h>
     17 #include <isc/uv.h>
     18 
     19 #include "netmgr-int.h"
     20 
     21 #define setsockopt_on(socket, level, name) \
     22 	setsockopt(socket, level, name, &(int){ 1 }, sizeof(int))
     23 
     24 #define setsockopt_off(socket, level, name) \
     25 	setsockopt(socket, level, name, &(int){ 0 }, sizeof(int))
     26 
     27 static isc_result_t
     28 socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) {
     29 	/*
     30 	 * Set the IP_FREEBIND (or equivalent option) on the uv_handle.
     31 	 */
     32 #ifdef IP_FREEBIND
     33 	UNUSED(sa_family);
     34 	if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) {
     35 		return ISC_R_FAILURE;
     36 	}
     37 	return ISC_R_SUCCESS;
     38 #elif defined(IP_BINDANY) || defined(IPV6_BINDANY)
     39 	if (sa_family == AF_INET) {
     40 #if defined(IP_BINDANY)
     41 		if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) {
     42 			return ISC_R_FAILURE;
     43 		}
     44 		return ISC_R_SUCCESS;
     45 #endif
     46 	} else if (sa_family == AF_INET6) {
     47 #if defined(IPV6_BINDANY)
     48 		if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) {
     49 			return ISC_R_FAILURE;
     50 		}
     51 		return ISC_R_SUCCESS;
     52 #endif
     53 	}
     54 	return ISC_R_NOTIMPLEMENTED;
     55 #elif defined(SO_BINDANY)
     56 	UNUSED(sa_family);
     57 	if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) {
     58 		return ISC_R_FAILURE;
     59 	}
     60 	return ISC_R_SUCCESS;
     61 #else
     62 	UNUSED(fd);
     63 	UNUSED(sa_family);
     64 	return ISC_R_NOTIMPLEMENTED;
     65 #endif
     66 }
     67 
     68 int
     69 isc__nm_udp_freebind(uv_udp_t *handle, const struct sockaddr *addr,
     70 		     unsigned int flags) {
     71 	int r;
     72 	uv_os_sock_t fd = -1;
     73 
     74 	r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd);
     75 	if (r < 0) {
     76 		return r;
     77 	}
     78 
     79 	r = uv_udp_bind(handle, addr, flags);
     80 	if (r == UV_EADDRNOTAVAIL &&
     81 	    socket_freebind(fd, addr->sa_family) == ISC_R_SUCCESS)
     82 	{
     83 		/*
     84 		 * Retry binding with IP_FREEBIND (or equivalent option) if the
     85 		 * address is not available. This helps with IPv6 tentative
     86 		 * addresses which are reported by the route socket, although
     87 		 * named is not yet able to properly bind to them.
     88 		 */
     89 		r = uv_udp_bind(handle, addr, flags);
     90 	}
     91 
     92 	return r;
     93 }
     94 
     95 static int
     96 tcp_bind_now(uv_tcp_t *handle, const struct sockaddr *addr,
     97 	     unsigned int flags) {
     98 	int r;
     99 	struct sockaddr_storage sname;
    100 	int snamelen = sizeof(sname);
    101 
    102 	r = uv_tcp_bind(handle, addr, flags);
    103 	if (r < 0) {
    104 		return r;
    105 	}
    106 
    107 	/*
    108 	 * uv_tcp_bind() uses a delayed error, initially returning
    109 	 * success even if bind() fails. By calling uv_tcp_getsockname()
    110 	 * here we can find out whether the bind() call was successful.
    111 	 */
    112 	r = uv_tcp_getsockname(handle, (struct sockaddr *)&sname, &snamelen);
    113 	if (r < 0) {
    114 		return r;
    115 	}
    116 
    117 	return 0;
    118 }
    119 
    120 int
    121 isc__nm_tcp_freebind(uv_tcp_t *handle, const struct sockaddr *addr,
    122 		     unsigned int flags) {
    123 	int r;
    124 	uv_os_sock_t fd = -1;
    125 
    126 	r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd);
    127 	if (r < 0) {
    128 		return r;
    129 	}
    130 
    131 	r = tcp_bind_now(handle, addr, flags);
    132 	if (r == UV_EADDRNOTAVAIL &&
    133 	    socket_freebind(fd, addr->sa_family) == ISC_R_SUCCESS)
    134 	{
    135 		/*
    136 		 * Retry binding with IP_FREEBIND (or equivalent option) if the
    137 		 * address is not available. This helps with IPv6 tentative
    138 		 * addresses which are reported by the route socket, although
    139 		 * named is not yet able to properly bind to them.
    140 		 */
    141 		r = tcp_bind_now(handle, addr, flags);
    142 	}
    143 
    144 	return r;
    145 }
    146 
    147 isc_result_t
    148 isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp) {
    149 	int sock = socket(domain, type, protocol);
    150 	if (sock < 0) {
    151 		return isc_errno_toresult(errno);
    152 	}
    153 
    154 	*sockp = (uv_os_sock_t)sock;
    155 	return ISC_R_SUCCESS;
    156 }
    157 
    158 void
    159 isc__nm_closesocket(uv_os_sock_t sock) {
    160 	close(sock);
    161 }
    162 
    163 isc_result_t
    164 isc__nm_socket_reuse(uv_os_sock_t fd, int val) {
    165 	/*
    166 	 * Generally, the SO_REUSEADDR socket option allows reuse of
    167 	 * local addresses.
    168 	 *
    169 	 * On the BSDs, SO_REUSEPORT implies SO_REUSEADDR but with some
    170 	 * additional refinements for programs that use multicast.
    171 	 *
    172 	 * On Linux, SO_REUSEPORT has different semantics: it _shares_ the port
    173 	 * rather than steal it from the current listener, so we don't use it
    174 	 * here, but rather in isc__nm_socket_reuse_lb().
    175 	 */
    176 
    177 #if defined(SO_REUSEPORT) && !defined(__linux__)
    178 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)) == -1) {
    179 		return ISC_R_FAILURE;
    180 	}
    181 	return ISC_R_SUCCESS;
    182 #elif defined(SO_REUSEADDR)
    183 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) == -1) {
    184 		return ISC_R_FAILURE;
    185 	}
    186 	return ISC_R_SUCCESS;
    187 #else
    188 	UNUSED(fd);
    189 	return ISC_R_NOTIMPLEMENTED;
    190 #endif
    191 }
    192 
    193 isc_result_t
    194 isc__nm_socket_reuse_lb(uv_os_sock_t fd) {
    195 	/*
    196 	 * On FreeBSD 12+, SO_REUSEPORT_LB socket option allows sockets to be
    197 	 * bound to an identical socket address. For UDP sockets, the use of
    198 	 * this option can provide better distribution of incoming datagrams to
    199 	 * multiple processes (or threads) as compared to the traditional
    200 	 * technique of having multiple processes compete to receive datagrams
    201 	 * on the same socket.
    202 	 *
    203 	 * On Linux, the same thing is achieved simply with SO_REUSEPORT.
    204 	 */
    205 #if defined(SO_REUSEPORT_LB)
    206 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) {
    207 		return ISC_R_FAILURE;
    208 	} else {
    209 		return ISC_R_SUCCESS;
    210 	}
    211 #elif defined(SO_REUSEPORT) && defined(__linux__)
    212 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
    213 		return ISC_R_FAILURE;
    214 	} else {
    215 		return ISC_R_SUCCESS;
    216 	}
    217 #else
    218 	UNUSED(fd);
    219 	return ISC_R_NOTIMPLEMENTED;
    220 #endif
    221 }
    222 
    223 isc_result_t
    224 isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family) {
    225 	/*
    226 	 * Disable the Path MTU Discovery on IP packets
    227 	 */
    228 	if (sa_family == AF_INET6) {
    229 #if defined(IPV6_DONTFRAG)
    230 		if (setsockopt_off(fd, IPPROTO_IPV6, IPV6_DONTFRAG) == -1) {
    231 			return ISC_R_FAILURE;
    232 		} else {
    233 			return ISC_R_SUCCESS;
    234 		}
    235 #elif defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
    236 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
    237 			       &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
    238 		{
    239 			return ISC_R_FAILURE;
    240 		} else {
    241 			return ISC_R_SUCCESS;
    242 		}
    243 #else
    244 		UNUSED(fd);
    245 #endif
    246 	} else if (sa_family == AF_INET) {
    247 #if defined(IP_DONTFRAG)
    248 		if (setsockopt_off(fd, IPPROTO_IP, IP_DONTFRAG) == -1) {
    249 			return ISC_R_FAILURE;
    250 		} else {
    251 			return ISC_R_SUCCESS;
    252 		}
    253 #elif defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
    254 		if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER,
    255 			       &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
    256 		{
    257 			return ISC_R_FAILURE;
    258 		} else {
    259 			return ISC_R_SUCCESS;
    260 		}
    261 #else
    262 		UNUSED(fd);
    263 #endif
    264 	} else {
    265 		return ISC_R_FAMILYNOSUPPORT;
    266 	}
    267 
    268 	return ISC_R_NOTIMPLEMENTED;
    269 }
    270 
    271 isc_result_t
    272 isc__nm_socket_v6only(uv_os_sock_t fd, sa_family_t sa_family) {
    273 	/*
    274 	 * Enable the IPv6-only option on IPv6 sockets
    275 	 */
    276 	if (sa_family == AF_INET6) {
    277 #if defined(IPV6_V6ONLY)
    278 		if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_V6ONLY) == -1) {
    279 			return ISC_R_FAILURE;
    280 		} else {
    281 			return ISC_R_SUCCESS;
    282 		}
    283 #else
    284 		UNUSED(fd);
    285 #endif
    286 	}
    287 	return ISC_R_NOTIMPLEMENTED;
    288 }
    289 
    290 isc_result_t
    291 isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms) {
    292 #if defined(TIMEOUT_OPTNAME)
    293 	TIMEOUT_TYPE timeout = timeout_ms / TIMEOUT_DIV;
    294 
    295 	if (timeout == 0) {
    296 		timeout = 1;
    297 	}
    298 
    299 	if (setsockopt(fd, IPPROTO_TCP, TIMEOUT_OPTNAME, &timeout,
    300 		       sizeof(timeout)) == -1)
    301 	{
    302 		return ISC_R_FAILURE;
    303 	}
    304 
    305 	return ISC_R_SUCCESS;
    306 #else
    307 	UNUSED(fd);
    308 	UNUSED(timeout_ms);
    309 
    310 	return ISC_R_SUCCESS;
    311 #endif
    312 }
    313 
    314 isc_result_t
    315 isc__nm_socket_tcp_nodelay(uv_os_sock_t fd, bool value) {
    316 #ifdef TCP_NODELAY
    317 	int ret;
    318 
    319 	if (value) {
    320 		ret = setsockopt_on(fd, IPPROTO_TCP, TCP_NODELAY);
    321 	} else {
    322 		ret = setsockopt_off(fd, IPPROTO_TCP, TCP_NODELAY);
    323 	}
    324 
    325 	if (ret == -1) {
    326 		return ISC_R_FAILURE;
    327 	} else {
    328 		return ISC_R_SUCCESS;
    329 	}
    330 #else
    331 	UNUSED(fd);
    332 	return ISC_R_SUCCESS;
    333 #endif
    334 }
    335 
    336 isc_result_t
    337 isc__nm_socket_tcp_maxseg(uv_os_sock_t fd, int size) {
    338 #ifdef TCP_MAXSEG
    339 	if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, (void *)&size,
    340 		       sizeof(size)))
    341 	{
    342 		return ISC_R_FAILURE;
    343 	} else {
    344 		return ISC_R_SUCCESS;
    345 	}
    346 #else
    347 	UNUSED(fd);
    348 	UNUSED(size);
    349 	return ISC_R_SUCCESS;
    350 #endif
    351 }
    352 
    353 isc_result_t
    354 isc__nm_socket_min_mtu(uv_os_sock_t fd, sa_family_t sa_family) {
    355 	if (sa_family != AF_INET6) {
    356 		return ISC_R_SUCCESS;
    357 	}
    358 #ifdef IPV6_USE_MIN_MTU
    359 	if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU) == -1) {
    360 		return ISC_R_FAILURE;
    361 	}
    362 #elif defined(IPV6_MTU)
    363 	if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU, &(int){ 1280 },
    364 		       sizeof(int)) == -1)
    365 	{
    366 		return ISC_R_FAILURE;
    367 	}
    368 #else
    369 	UNUSED(fd);
    370 #endif
    371 
    372 	return ISC_R_SUCCESS;
    373 }
    374