socket.c revision 1.2 1 /* $NetBSD: socket.c,v 1.2 2025/01/26 16:25:43 christos Exp $ */
2
3 /*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16 #include <isc/errno.h>
17 #include <isc/uv.h>
18
19 #include "netmgr-int.h"
20
21 #define setsockopt_on(socket, level, name) \
22 setsockopt(socket, level, name, &(int){ 1 }, sizeof(int))
23
24 #define setsockopt_off(socket, level, name) \
25 setsockopt(socket, level, name, &(int){ 0 }, sizeof(int))
26
27 static isc_result_t
28 socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) {
29 /*
30 * Set the IP_FREEBIND (or equivalent option) on the uv_handle.
31 */
32 #ifdef IP_FREEBIND
33 UNUSED(sa_family);
34 if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) {
35 return ISC_R_FAILURE;
36 }
37 return ISC_R_SUCCESS;
38 #elif defined(IP_BINDANY) || defined(IPV6_BINDANY)
39 if (sa_family == AF_INET) {
40 #if defined(IP_BINDANY)
41 if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) {
42 return ISC_R_FAILURE;
43 }
44 return ISC_R_SUCCESS;
45 #endif
46 } else if (sa_family == AF_INET6) {
47 #if defined(IPV6_BINDANY)
48 if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) {
49 return ISC_R_FAILURE;
50 }
51 return ISC_R_SUCCESS;
52 #endif
53 }
54 return ISC_R_NOTIMPLEMENTED;
55 #elif defined(SO_BINDANY)
56 UNUSED(sa_family);
57 if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) {
58 return ISC_R_FAILURE;
59 }
60 return ISC_R_SUCCESS;
61 #else
62 UNUSED(fd);
63 UNUSED(sa_family);
64 return ISC_R_NOTIMPLEMENTED;
65 #endif
66 }
67
68 int
69 isc__nm_udp_freebind(uv_udp_t *handle, const struct sockaddr *addr,
70 unsigned int flags) {
71 int r;
72 uv_os_sock_t fd = -1;
73
74 r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd);
75 if (r < 0) {
76 return r;
77 }
78
79 r = uv_udp_bind(handle, addr, flags);
80 if (r == UV_EADDRNOTAVAIL &&
81 socket_freebind(fd, addr->sa_family) == ISC_R_SUCCESS)
82 {
83 /*
84 * Retry binding with IP_FREEBIND (or equivalent option) if the
85 * address is not available. This helps with IPv6 tentative
86 * addresses which are reported by the route socket, although
87 * named is not yet able to properly bind to them.
88 */
89 r = uv_udp_bind(handle, addr, flags);
90 }
91
92 return r;
93 }
94
95 static int
96 tcp_bind_now(uv_tcp_t *handle, const struct sockaddr *addr,
97 unsigned int flags) {
98 int r;
99 struct sockaddr_storage sname;
100 int snamelen = sizeof(sname);
101
102 r = uv_tcp_bind(handle, addr, flags);
103 if (r < 0) {
104 return r;
105 }
106
107 /*
108 * uv_tcp_bind() uses a delayed error, initially returning
109 * success even if bind() fails. By calling uv_tcp_getsockname()
110 * here we can find out whether the bind() call was successful.
111 */
112 r = uv_tcp_getsockname(handle, (struct sockaddr *)&sname, &snamelen);
113 if (r < 0) {
114 return r;
115 }
116
117 return 0;
118 }
119
120 int
121 isc__nm_tcp_freebind(uv_tcp_t *handle, const struct sockaddr *addr,
122 unsigned int flags) {
123 int r;
124 uv_os_sock_t fd = -1;
125
126 r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd);
127 if (r < 0) {
128 return r;
129 }
130
131 r = tcp_bind_now(handle, addr, flags);
132 if (r == UV_EADDRNOTAVAIL &&
133 socket_freebind(fd, addr->sa_family) == ISC_R_SUCCESS)
134 {
135 /*
136 * Retry binding with IP_FREEBIND (or equivalent option) if the
137 * address is not available. This helps with IPv6 tentative
138 * addresses which are reported by the route socket, although
139 * named is not yet able to properly bind to them.
140 */
141 r = tcp_bind_now(handle, addr, flags);
142 }
143
144 return r;
145 }
146
147 isc_result_t
148 isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp) {
149 int sock = socket(domain, type, protocol);
150 if (sock < 0) {
151 return isc_errno_toresult(errno);
152 }
153
154 *sockp = (uv_os_sock_t)sock;
155 return ISC_R_SUCCESS;
156 }
157
158 void
159 isc__nm_closesocket(uv_os_sock_t sock) {
160 close(sock);
161 }
162
163 isc_result_t
164 isc__nm_socket_reuse(uv_os_sock_t fd, int val) {
165 /*
166 * Generally, the SO_REUSEADDR socket option allows reuse of
167 * local addresses.
168 *
169 * On the BSDs, SO_REUSEPORT implies SO_REUSEADDR but with some
170 * additional refinements for programs that use multicast.
171 *
172 * On Linux, SO_REUSEPORT has different semantics: it _shares_ the port
173 * rather than steal it from the current listener, so we don't use it
174 * here, but rather in isc__nm_socket_reuse_lb().
175 */
176
177 #if defined(SO_REUSEPORT) && !defined(__linux__)
178 if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)) == -1) {
179 return ISC_R_FAILURE;
180 }
181 return ISC_R_SUCCESS;
182 #elif defined(SO_REUSEADDR)
183 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) == -1) {
184 return ISC_R_FAILURE;
185 }
186 return ISC_R_SUCCESS;
187 #else
188 UNUSED(fd);
189 return ISC_R_NOTIMPLEMENTED;
190 #endif
191 }
192
193 isc_result_t
194 isc__nm_socket_reuse_lb(uv_os_sock_t fd) {
195 /*
196 * On FreeBSD 12+, SO_REUSEPORT_LB socket option allows sockets to be
197 * bound to an identical socket address. For UDP sockets, the use of
198 * this option can provide better distribution of incoming datagrams to
199 * multiple processes (or threads) as compared to the traditional
200 * technique of having multiple processes compete to receive datagrams
201 * on the same socket.
202 *
203 * On Linux, the same thing is achieved simply with SO_REUSEPORT.
204 */
205 #if defined(SO_REUSEPORT_LB)
206 if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) {
207 return ISC_R_FAILURE;
208 } else {
209 return ISC_R_SUCCESS;
210 }
211 #elif defined(SO_REUSEPORT) && defined(__linux__)
212 if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
213 return ISC_R_FAILURE;
214 } else {
215 return ISC_R_SUCCESS;
216 }
217 #else
218 UNUSED(fd);
219 return ISC_R_NOTIMPLEMENTED;
220 #endif
221 }
222
223 isc_result_t
224 isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family) {
225 /*
226 * Disable the Path MTU Discovery on IP packets
227 */
228 if (sa_family == AF_INET6) {
229 #if defined(IPV6_DONTFRAG)
230 if (setsockopt_off(fd, IPPROTO_IPV6, IPV6_DONTFRAG) == -1) {
231 return ISC_R_FAILURE;
232 } else {
233 return ISC_R_SUCCESS;
234 }
235 #elif defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
236 if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
237 &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
238 {
239 return ISC_R_FAILURE;
240 } else {
241 return ISC_R_SUCCESS;
242 }
243 #else
244 UNUSED(fd);
245 #endif
246 } else if (sa_family == AF_INET) {
247 #if defined(IP_DONTFRAG)
248 if (setsockopt_off(fd, IPPROTO_IP, IP_DONTFRAG) == -1) {
249 return ISC_R_FAILURE;
250 } else {
251 return ISC_R_SUCCESS;
252 }
253 #elif defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
254 if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER,
255 &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
256 {
257 return ISC_R_FAILURE;
258 } else {
259 return ISC_R_SUCCESS;
260 }
261 #else
262 UNUSED(fd);
263 #endif
264 } else {
265 return ISC_R_FAMILYNOSUPPORT;
266 }
267
268 return ISC_R_NOTIMPLEMENTED;
269 }
270
271 isc_result_t
272 isc__nm_socket_v6only(uv_os_sock_t fd, sa_family_t sa_family) {
273 /*
274 * Enable the IPv6-only option on IPv6 sockets
275 */
276 if (sa_family == AF_INET6) {
277 #if defined(IPV6_V6ONLY)
278 if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_V6ONLY) == -1) {
279 return ISC_R_FAILURE;
280 } else {
281 return ISC_R_SUCCESS;
282 }
283 #else
284 UNUSED(fd);
285 #endif
286 }
287 return ISC_R_NOTIMPLEMENTED;
288 }
289
290 isc_result_t
291 isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms) {
292 #if defined(TIMEOUT_OPTNAME)
293 TIMEOUT_TYPE timeout = timeout_ms / TIMEOUT_DIV;
294
295 if (timeout == 0) {
296 timeout = 1;
297 }
298
299 if (setsockopt(fd, IPPROTO_TCP, TIMEOUT_OPTNAME, &timeout,
300 sizeof(timeout)) == -1)
301 {
302 return ISC_R_FAILURE;
303 }
304
305 return ISC_R_SUCCESS;
306 #else
307 UNUSED(fd);
308 UNUSED(timeout_ms);
309
310 return ISC_R_SUCCESS;
311 #endif
312 }
313
314 isc_result_t
315 isc__nm_socket_tcp_nodelay(uv_os_sock_t fd, bool value) {
316 #ifdef TCP_NODELAY
317 int ret;
318
319 if (value) {
320 ret = setsockopt_on(fd, IPPROTO_TCP, TCP_NODELAY);
321 } else {
322 ret = setsockopt_off(fd, IPPROTO_TCP, TCP_NODELAY);
323 }
324
325 if (ret == -1) {
326 return ISC_R_FAILURE;
327 } else {
328 return ISC_R_SUCCESS;
329 }
330 #else
331 UNUSED(fd);
332 return ISC_R_SUCCESS;
333 #endif
334 }
335
336 isc_result_t
337 isc__nm_socket_tcp_maxseg(uv_os_sock_t fd, int size) {
338 #ifdef TCP_MAXSEG
339 if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, (void *)&size,
340 sizeof(size)))
341 {
342 return ISC_R_FAILURE;
343 } else {
344 return ISC_R_SUCCESS;
345 }
346 #else
347 UNUSED(fd);
348 UNUSED(size);
349 return ISC_R_SUCCESS;
350 #endif
351 }
352
353 isc_result_t
354 isc__nm_socket_min_mtu(uv_os_sock_t fd, sa_family_t sa_family) {
355 if (sa_family != AF_INET6) {
356 return ISC_R_SUCCESS;
357 }
358 #ifdef IPV6_USE_MIN_MTU
359 if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU) == -1) {
360 return ISC_R_FAILURE;
361 }
362 #elif defined(IPV6_MTU)
363 if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU, &(int){ 1280 },
364 sizeof(int)) == -1)
365 {
366 return ISC_R_FAILURE;
367 }
368 #else
369 UNUSED(fd);
370 #endif
371
372 return ISC_R_SUCCESS;
373 }
374