Home | History | Annotate | Line # | Download | only in netmgr
socket.c revision 1.2
      1  1.1  christos /*	$NetBSD: socket.c,v 1.2 2025/01/26 16:25:43 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /*
      4  1.1  christos  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  1.1  christos  *
      6  1.1  christos  * SPDX-License-Identifier: MPL-2.0
      7  1.1  christos  *
      8  1.1  christos  * This Source Code Form is subject to the terms of the Mozilla Public
      9  1.1  christos  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  1.1  christos  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  1.1  christos  *
     12  1.1  christos  * See the COPYRIGHT file distributed with this work for additional
     13  1.1  christos  * information regarding copyright ownership.
     14  1.1  christos  */
     15  1.1  christos 
     16  1.1  christos #include <isc/errno.h>
     17  1.1  christos #include <isc/uv.h>
     18  1.1  christos 
     19  1.1  christos #include "netmgr-int.h"
     20  1.1  christos 
     21  1.1  christos #define setsockopt_on(socket, level, name) \
     22  1.1  christos 	setsockopt(socket, level, name, &(int){ 1 }, sizeof(int))
     23  1.1  christos 
     24  1.1  christos #define setsockopt_off(socket, level, name) \
     25  1.1  christos 	setsockopt(socket, level, name, &(int){ 0 }, sizeof(int))
     26  1.1  christos 
     27  1.1  christos static isc_result_t
     28  1.1  christos socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) {
     29  1.1  christos 	/*
     30  1.1  christos 	 * Set the IP_FREEBIND (or equivalent option) on the uv_handle.
     31  1.1  christos 	 */
     32  1.1  christos #ifdef IP_FREEBIND
     33  1.1  christos 	UNUSED(sa_family);
     34  1.1  christos 	if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) {
     35  1.1  christos 		return ISC_R_FAILURE;
     36  1.1  christos 	}
     37  1.1  christos 	return ISC_R_SUCCESS;
     38  1.1  christos #elif defined(IP_BINDANY) || defined(IPV6_BINDANY)
     39  1.1  christos 	if (sa_family == AF_INET) {
     40  1.1  christos #if defined(IP_BINDANY)
     41  1.1  christos 		if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) {
     42  1.1  christos 			return ISC_R_FAILURE;
     43  1.1  christos 		}
     44  1.1  christos 		return ISC_R_SUCCESS;
     45  1.1  christos #endif
     46  1.1  christos 	} else if (sa_family == AF_INET6) {
     47  1.1  christos #if defined(IPV6_BINDANY)
     48  1.1  christos 		if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) {
     49  1.1  christos 			return ISC_R_FAILURE;
     50  1.1  christos 		}
     51  1.1  christos 		return ISC_R_SUCCESS;
     52  1.1  christos #endif
     53  1.1  christos 	}
     54  1.1  christos 	return ISC_R_NOTIMPLEMENTED;
     55  1.1  christos #elif defined(SO_BINDANY)
     56  1.1  christos 	UNUSED(sa_family);
     57  1.1  christos 	if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) {
     58  1.1  christos 		return ISC_R_FAILURE;
     59  1.1  christos 	}
     60  1.1  christos 	return ISC_R_SUCCESS;
     61  1.1  christos #else
     62  1.1  christos 	UNUSED(fd);
     63  1.1  christos 	UNUSED(sa_family);
     64  1.1  christos 	return ISC_R_NOTIMPLEMENTED;
     65  1.1  christos #endif
     66  1.1  christos }
     67  1.1  christos 
     68  1.1  christos int
     69  1.1  christos isc__nm_udp_freebind(uv_udp_t *handle, const struct sockaddr *addr,
     70  1.1  christos 		     unsigned int flags) {
     71  1.1  christos 	int r;
     72  1.1  christos 	uv_os_sock_t fd = -1;
     73  1.1  christos 
     74  1.1  christos 	r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd);
     75  1.1  christos 	if (r < 0) {
     76  1.1  christos 		return r;
     77  1.1  christos 	}
     78  1.1  christos 
     79  1.1  christos 	r = uv_udp_bind(handle, addr, flags);
     80  1.1  christos 	if (r == UV_EADDRNOTAVAIL &&
     81  1.1  christos 	    socket_freebind(fd, addr->sa_family) == ISC_R_SUCCESS)
     82  1.1  christos 	{
     83  1.1  christos 		/*
     84  1.1  christos 		 * Retry binding with IP_FREEBIND (or equivalent option) if the
     85  1.1  christos 		 * address is not available. This helps with IPv6 tentative
     86  1.1  christos 		 * addresses which are reported by the route socket, although
     87  1.1  christos 		 * named is not yet able to properly bind to them.
     88  1.1  christos 		 */
     89  1.1  christos 		r = uv_udp_bind(handle, addr, flags);
     90  1.1  christos 	}
     91  1.1  christos 
     92  1.1  christos 	return r;
     93  1.1  christos }
     94  1.1  christos 
     95  1.1  christos static int
     96  1.1  christos tcp_bind_now(uv_tcp_t *handle, const struct sockaddr *addr,
     97  1.1  christos 	     unsigned int flags) {
     98  1.1  christos 	int r;
     99  1.1  christos 	struct sockaddr_storage sname;
    100  1.1  christos 	int snamelen = sizeof(sname);
    101  1.1  christos 
    102  1.1  christos 	r = uv_tcp_bind(handle, addr, flags);
    103  1.1  christos 	if (r < 0) {
    104  1.1  christos 		return r;
    105  1.1  christos 	}
    106  1.1  christos 
    107  1.1  christos 	/*
    108  1.1  christos 	 * uv_tcp_bind() uses a delayed error, initially returning
    109  1.1  christos 	 * success even if bind() fails. By calling uv_tcp_getsockname()
    110  1.1  christos 	 * here we can find out whether the bind() call was successful.
    111  1.1  christos 	 */
    112  1.1  christos 	r = uv_tcp_getsockname(handle, (struct sockaddr *)&sname, &snamelen);
    113  1.1  christos 	if (r < 0) {
    114  1.1  christos 		return r;
    115  1.1  christos 	}
    116  1.1  christos 
    117  1.1  christos 	return 0;
    118  1.1  christos }
    119  1.1  christos 
    120  1.1  christos int
    121  1.1  christos isc__nm_tcp_freebind(uv_tcp_t *handle, const struct sockaddr *addr,
    122  1.1  christos 		     unsigned int flags) {
    123  1.1  christos 	int r;
    124  1.1  christos 	uv_os_sock_t fd = -1;
    125  1.1  christos 
    126  1.1  christos 	r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd);
    127  1.1  christos 	if (r < 0) {
    128  1.1  christos 		return r;
    129  1.1  christos 	}
    130  1.1  christos 
    131  1.1  christos 	r = tcp_bind_now(handle, addr, flags);
    132  1.1  christos 	if (r == UV_EADDRNOTAVAIL &&
    133  1.1  christos 	    socket_freebind(fd, addr->sa_family) == ISC_R_SUCCESS)
    134  1.1  christos 	{
    135  1.1  christos 		/*
    136  1.1  christos 		 * Retry binding with IP_FREEBIND (or equivalent option) if the
    137  1.1  christos 		 * address is not available. This helps with IPv6 tentative
    138  1.1  christos 		 * addresses which are reported by the route socket, although
    139  1.1  christos 		 * named is not yet able to properly bind to them.
    140  1.1  christos 		 */
    141  1.1  christos 		r = tcp_bind_now(handle, addr, flags);
    142  1.1  christos 	}
    143  1.1  christos 
    144  1.1  christos 	return r;
    145  1.1  christos }
    146  1.1  christos 
    147  1.1  christos isc_result_t
    148  1.1  christos isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp) {
    149  1.1  christos 	int sock = socket(domain, type, protocol);
    150  1.1  christos 	if (sock < 0) {
    151  1.1  christos 		return isc_errno_toresult(errno);
    152  1.1  christos 	}
    153  1.1  christos 
    154  1.1  christos 	*sockp = (uv_os_sock_t)sock;
    155  1.1  christos 	return ISC_R_SUCCESS;
    156  1.1  christos }
    157  1.1  christos 
    158  1.1  christos void
    159  1.1  christos isc__nm_closesocket(uv_os_sock_t sock) {
    160  1.1  christos 	close(sock);
    161  1.1  christos }
    162  1.1  christos 
    163  1.1  christos isc_result_t
    164  1.1  christos isc__nm_socket_reuse(uv_os_sock_t fd, int val) {
    165  1.1  christos 	/*
    166  1.1  christos 	 * Generally, the SO_REUSEADDR socket option allows reuse of
    167  1.1  christos 	 * local addresses.
    168  1.1  christos 	 *
    169  1.1  christos 	 * On the BSDs, SO_REUSEPORT implies SO_REUSEADDR but with some
    170  1.1  christos 	 * additional refinements for programs that use multicast.
    171  1.1  christos 	 *
    172  1.1  christos 	 * On Linux, SO_REUSEPORT has different semantics: it _shares_ the port
    173  1.1  christos 	 * rather than steal it from the current listener, so we don't use it
    174  1.1  christos 	 * here, but rather in isc__nm_socket_reuse_lb().
    175  1.1  christos 	 */
    176  1.1  christos 
    177  1.1  christos #if defined(SO_REUSEPORT) && !defined(__linux__)
    178  1.1  christos 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)) == -1) {
    179  1.1  christos 		return ISC_R_FAILURE;
    180  1.1  christos 	}
    181  1.1  christos 	return ISC_R_SUCCESS;
    182  1.1  christos #elif defined(SO_REUSEADDR)
    183  1.1  christos 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) == -1) {
    184  1.1  christos 		return ISC_R_FAILURE;
    185  1.1  christos 	}
    186  1.1  christos 	return ISC_R_SUCCESS;
    187  1.1  christos #else
    188  1.1  christos 	UNUSED(fd);
    189  1.1  christos 	return ISC_R_NOTIMPLEMENTED;
    190  1.1  christos #endif
    191  1.1  christos }
    192  1.1  christos 
    193  1.1  christos isc_result_t
    194  1.1  christos isc__nm_socket_reuse_lb(uv_os_sock_t fd) {
    195  1.1  christos 	/*
    196  1.1  christos 	 * On FreeBSD 12+, SO_REUSEPORT_LB socket option allows sockets to be
    197  1.1  christos 	 * bound to an identical socket address. For UDP sockets, the use of
    198  1.1  christos 	 * this option can provide better distribution of incoming datagrams to
    199  1.1  christos 	 * multiple processes (or threads) as compared to the traditional
    200  1.1  christos 	 * technique of having multiple processes compete to receive datagrams
    201  1.1  christos 	 * on the same socket.
    202  1.1  christos 	 *
    203  1.1  christos 	 * On Linux, the same thing is achieved simply with SO_REUSEPORT.
    204  1.1  christos 	 */
    205  1.1  christos #if defined(SO_REUSEPORT_LB)
    206  1.1  christos 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) {
    207  1.1  christos 		return ISC_R_FAILURE;
    208  1.1  christos 	} else {
    209  1.1  christos 		return ISC_R_SUCCESS;
    210  1.1  christos 	}
    211  1.1  christos #elif defined(SO_REUSEPORT) && defined(__linux__)
    212  1.1  christos 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
    213  1.1  christos 		return ISC_R_FAILURE;
    214  1.1  christos 	} else {
    215  1.1  christos 		return ISC_R_SUCCESS;
    216  1.1  christos 	}
    217  1.1  christos #else
    218  1.1  christos 	UNUSED(fd);
    219  1.1  christos 	return ISC_R_NOTIMPLEMENTED;
    220  1.1  christos #endif
    221  1.1  christos }
    222  1.1  christos 
    223  1.1  christos isc_result_t
    224  1.1  christos isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family) {
    225  1.1  christos 	/*
    226  1.1  christos 	 * Disable the Path MTU Discovery on IP packets
    227  1.1  christos 	 */
    228  1.1  christos 	if (sa_family == AF_INET6) {
    229  1.1  christos #if defined(IPV6_DONTFRAG)
    230  1.1  christos 		if (setsockopt_off(fd, IPPROTO_IPV6, IPV6_DONTFRAG) == -1) {
    231  1.1  christos 			return ISC_R_FAILURE;
    232  1.1  christos 		} else {
    233  1.1  christos 			return ISC_R_SUCCESS;
    234  1.1  christos 		}
    235  1.1  christos #elif defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
    236  1.1  christos 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
    237  1.1  christos 			       &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
    238  1.1  christos 		{
    239  1.1  christos 			return ISC_R_FAILURE;
    240  1.1  christos 		} else {
    241  1.1  christos 			return ISC_R_SUCCESS;
    242  1.1  christos 		}
    243  1.1  christos #else
    244  1.1  christos 		UNUSED(fd);
    245  1.1  christos #endif
    246  1.1  christos 	} else if (sa_family == AF_INET) {
    247  1.1  christos #if defined(IP_DONTFRAG)
    248  1.1  christos 		if (setsockopt_off(fd, IPPROTO_IP, IP_DONTFRAG) == -1) {
    249  1.1  christos 			return ISC_R_FAILURE;
    250  1.1  christos 		} else {
    251  1.1  christos 			return ISC_R_SUCCESS;
    252  1.1  christos 		}
    253  1.1  christos #elif defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
    254  1.1  christos 		if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER,
    255  1.1  christos 			       &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
    256  1.1  christos 		{
    257  1.1  christos 			return ISC_R_FAILURE;
    258  1.1  christos 		} else {
    259  1.1  christos 			return ISC_R_SUCCESS;
    260  1.1  christos 		}
    261  1.1  christos #else
    262  1.1  christos 		UNUSED(fd);
    263  1.1  christos #endif
    264  1.1  christos 	} else {
    265  1.1  christos 		return ISC_R_FAMILYNOSUPPORT;
    266  1.1  christos 	}
    267  1.1  christos 
    268  1.1  christos 	return ISC_R_NOTIMPLEMENTED;
    269  1.1  christos }
    270  1.1  christos 
    271  1.1  christos isc_result_t
    272  1.1  christos isc__nm_socket_v6only(uv_os_sock_t fd, sa_family_t sa_family) {
    273  1.1  christos 	/*
    274  1.1  christos 	 * Enable the IPv6-only option on IPv6 sockets
    275  1.1  christos 	 */
    276  1.1  christos 	if (sa_family == AF_INET6) {
    277  1.1  christos #if defined(IPV6_V6ONLY)
    278  1.1  christos 		if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_V6ONLY) == -1) {
    279  1.1  christos 			return ISC_R_FAILURE;
    280  1.1  christos 		} else {
    281  1.1  christos 			return ISC_R_SUCCESS;
    282  1.1  christos 		}
    283  1.1  christos #else
    284  1.1  christos 		UNUSED(fd);
    285  1.1  christos #endif
    286  1.1  christos 	}
    287  1.1  christos 	return ISC_R_NOTIMPLEMENTED;
    288  1.1  christos }
    289  1.1  christos 
    290  1.1  christos isc_result_t
    291  1.1  christos isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms) {
    292  1.1  christos #if defined(TIMEOUT_OPTNAME)
    293  1.1  christos 	TIMEOUT_TYPE timeout = timeout_ms / TIMEOUT_DIV;
    294  1.1  christos 
    295  1.1  christos 	if (timeout == 0) {
    296  1.1  christos 		timeout = 1;
    297  1.1  christos 	}
    298  1.1  christos 
    299  1.1  christos 	if (setsockopt(fd, IPPROTO_TCP, TIMEOUT_OPTNAME, &timeout,
    300  1.1  christos 		       sizeof(timeout)) == -1)
    301  1.1  christos 	{
    302  1.1  christos 		return ISC_R_FAILURE;
    303  1.1  christos 	}
    304  1.1  christos 
    305  1.1  christos 	return ISC_R_SUCCESS;
    306  1.1  christos #else
    307  1.1  christos 	UNUSED(fd);
    308  1.1  christos 	UNUSED(timeout_ms);
    309  1.1  christos 
    310  1.1  christos 	return ISC_R_SUCCESS;
    311  1.1  christos #endif
    312  1.1  christos }
    313  1.1  christos 
    314  1.1  christos isc_result_t
    315  1.1  christos isc__nm_socket_tcp_nodelay(uv_os_sock_t fd, bool value) {
    316  1.1  christos #ifdef TCP_NODELAY
    317  1.1  christos 	int ret;
    318  1.1  christos 
    319  1.1  christos 	if (value) {
    320  1.1  christos 		ret = setsockopt_on(fd, IPPROTO_TCP, TCP_NODELAY);
    321  1.1  christos 	} else {
    322  1.1  christos 		ret = setsockopt_off(fd, IPPROTO_TCP, TCP_NODELAY);
    323  1.1  christos 	}
    324  1.1  christos 
    325  1.1  christos 	if (ret == -1) {
    326  1.1  christos 		return ISC_R_FAILURE;
    327  1.1  christos 	} else {
    328  1.1  christos 		return ISC_R_SUCCESS;
    329  1.1  christos 	}
    330  1.1  christos #else
    331  1.1  christos 	UNUSED(fd);
    332  1.1  christos 	return ISC_R_SUCCESS;
    333  1.1  christos #endif
    334  1.1  christos }
    335  1.1  christos 
    336  1.1  christos isc_result_t
    337  1.1  christos isc__nm_socket_tcp_maxseg(uv_os_sock_t fd, int size) {
    338  1.1  christos #ifdef TCP_MAXSEG
    339  1.1  christos 	if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, (void *)&size,
    340  1.1  christos 		       sizeof(size)))
    341  1.1  christos 	{
    342  1.1  christos 		return ISC_R_FAILURE;
    343  1.1  christos 	} else {
    344  1.1  christos 		return ISC_R_SUCCESS;
    345  1.1  christos 	}
    346  1.1  christos #else
    347  1.1  christos 	UNUSED(fd);
    348  1.1  christos 	UNUSED(size);
    349  1.1  christos 	return ISC_R_SUCCESS;
    350  1.1  christos #endif
    351  1.1  christos }
    352  1.1  christos 
    353  1.1  christos isc_result_t
    354  1.1  christos isc__nm_socket_min_mtu(uv_os_sock_t fd, sa_family_t sa_family) {
    355  1.1  christos 	if (sa_family != AF_INET6) {
    356  1.1  christos 		return ISC_R_SUCCESS;
    357  1.1  christos 	}
    358  1.1  christos #ifdef IPV6_USE_MIN_MTU
    359  1.1  christos 	if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU) == -1) {
    360  1.1  christos 		return ISC_R_FAILURE;
    361  1.1  christos 	}
    362  1.1  christos #elif defined(IPV6_MTU)
    363  1.1  christos 	if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU, &(int){ 1280 },
    364  1.1  christos 		       sizeof(int)) == -1)
    365  1.1  christos 	{
    366  1.1  christos 		return ISC_R_FAILURE;
    367  1.1  christos 	}
    368  1.1  christos #else
    369  1.1  christos 	UNUSED(fd);
    370  1.1  christos #endif
    371  1.1  christos 
    372  1.1  christos 	return ISC_R_SUCCESS;
    373  1.1  christos }
    374