1 /* 2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file has functions to get queries from clients. 40 */ 41 #include "config.h" 42 #ifdef HAVE_SYS_TYPES_H 43 # include <sys/types.h> 44 #endif 45 #include <sys/time.h> 46 #include <limits.h> 47 #ifdef USE_TCP_FASTOPEN 48 #include <netinet/tcp.h> 49 #endif 50 #include <ctype.h> 51 #include "services/listen_dnsport.h" 52 #include "services/outside_network.h" 53 #include "util/netevent.h" 54 #include "util/log.h" 55 #include "util/config_file.h" 56 #include "util/net_help.h" 57 #include "sldns/sbuffer.h" 58 #include "sldns/parseutil.h" 59 #include "sldns/wire2str.h" 60 #include "services/mesh.h" 61 #include "util/fptr_wlist.h" 62 #include "util/locks.h" 63 #include "util/timeval_func.h" 64 65 #ifdef HAVE_NETDB_H 66 #include <netdb.h> 67 #endif 68 #include <fcntl.h> 69 70 #ifdef HAVE_SYS_UN_H 71 #include <sys/un.h> 72 #endif 73 74 #ifdef HAVE_SYSTEMD 75 #include <systemd/sd-daemon.h> 76 #endif 77 78 #ifdef HAVE_IFADDRS_H 79 #include <ifaddrs.h> 80 #endif 81 #ifdef HAVE_NET_IF_H 82 #include <net/if.h> 83 #endif 84 85 #ifdef HAVE_TIME_H 86 #include <time.h> 87 #endif 88 #include <sys/time.h> 89 90 #ifdef HAVE_NGTCP2 91 #include <ngtcp2/ngtcp2.h> 92 #include <ngtcp2/ngtcp2_crypto.h> 93 #ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_OSSL_H 94 #include <ngtcp2/ngtcp2_crypto_ossl.h> 95 #elif defined(HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H) 96 #include <ngtcp2/ngtcp2_crypto_quictls.h> 97 #elif defined(HAVE_NGTCP2_NGTCP2_CRYPTO_OPENSSL_H) 98 #include <ngtcp2/ngtcp2_crypto_openssl.h> 99 #define MAKE_QUIC_METHOD 1 100 #endif 101 #endif 102 103 #ifdef HAVE_OPENSSL_SSL_H 104 #include <openssl/ssl.h> 105 #endif 106 107 #ifdef HAVE_LINUX_NET_TSTAMP_H 108 #include <linux/net_tstamp.h> 109 #endif 110 111 /** number of queued TCP connections for listen() */ 112 #define TCP_BACKLOG 256 113 114 #ifndef THREADS_DISABLED 115 /** lock on the counter of stream buffer memory */ 116 static lock_basic_type stream_wait_count_lock; 117 /** lock on the counter of HTTP2 query buffer memory */ 118 static lock_basic_type http2_query_buffer_count_lock; 119 /** lock on the counter of HTTP2 response buffer memory */ 120 static lock_basic_type http2_response_buffer_count_lock; 121 #endif 122 /** size (in bytes) of stream wait buffers */ 123 static size_t stream_wait_count = 0; 124 /** is the lock initialised for stream wait buffers */ 125 static int stream_wait_lock_inited = 0; 126 /** size (in bytes) of HTTP2 query buffers */ 127 static size_t http2_query_buffer_count = 0; 128 /** is the lock initialised for HTTP2 query buffers */ 129 static int http2_query_buffer_lock_inited = 0; 130 /** size (in bytes) of HTTP2 response buffers */ 131 static size_t http2_response_buffer_count = 0; 132 /** is the lock initialised for HTTP2 response buffers */ 133 static int http2_response_buffer_lock_inited = 0; 134 135 /** 136 * Debug print of the getaddrinfo returned address. 137 * @param addr: the address returned. 138 * @param additional: additional text that describes the type of socket, 139 * or NULL for no text. 140 */ 141 static void 142 verbose_print_addr(struct addrinfo *addr, const char* additional) 143 { 144 if(verbosity >= VERB_ALGO) { 145 char buf[100]; 146 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr; 147 #ifdef INET6 148 if(addr->ai_family == AF_INET6) 149 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)-> 150 sin6_addr; 151 #endif /* INET6 */ 152 if(inet_ntop(addr->ai_family, sinaddr, buf, 153 (socklen_t)sizeof(buf)) == 0) { 154 (void)strlcpy(buf, "(null)", sizeof(buf)); 155 } 156 buf[sizeof(buf)-1] = 0; 157 verbose(VERB_ALGO, "creating %s%s socket %s %d%s%s", 158 addr->ai_socktype==SOCK_DGRAM?"udp": 159 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto", 160 addr->ai_family==AF_INET?"4": 161 addr->ai_family==AF_INET6?"6": 162 "_otherfam", buf, 163 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port), 164 (additional?" ":""), (additional?additional:"")); 165 } 166 } 167 168 void 169 verbose_print_unbound_socket(struct unbound_socket* ub_sock) 170 { 171 if(verbosity >= VERB_ALGO) { 172 char buf[256]; 173 log_info("listing of unbound_socket structure:"); 174 addr_to_str((void*)ub_sock->addr, ub_sock->addrlen, buf, 175 sizeof(buf)); 176 log_info("%s s is: %d, fam is: %s, acl: %s", buf, ub_sock->s, 177 ub_sock->fam == AF_INET?"AF_INET":"AF_INET6", 178 ub_sock->acl?"yes":"no"); 179 } 180 } 181 182 #ifdef HAVE_SYSTEMD 183 static int 184 systemd_get_activated(int family, int socktype, int listen, 185 struct sockaddr *addr, socklen_t addrlen, 186 const char *path) 187 { 188 int i = 0; 189 int r = 0; 190 int s = -1; 191 const char* listen_pid, *listen_fds; 192 193 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */ 194 195 if((r = sd_booted()) < 1) { 196 if(r == 0) 197 log_warn("systemd is not running"); 198 else 199 log_err("systemd sd_booted(): %s", strerror(-r)); 200 return -1; 201 } 202 203 listen_pid = getenv("LISTEN_PID"); 204 listen_fds = getenv("LISTEN_FDS"); 205 206 if (!listen_pid) { 207 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID"); 208 return -1; 209 } 210 211 if (!listen_fds) { 212 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS"); 213 return -1; 214 } 215 216 if((r = sd_listen_fds(0)) < 1) { 217 if(r == 0) 218 log_warn("systemd: did not return socket, check unit configuration"); 219 else 220 log_err("systemd sd_listen_fds(): %s", strerror(-r)); 221 return -1; 222 } 223 224 for(i = 0; i < r; i++) { 225 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) { 226 s = SD_LISTEN_FDS_START + i; 227 break; 228 } 229 } 230 if (s == -1) { 231 if (addr) 232 log_err_addr("systemd sd_listen_fds()", 233 "no such socket", 234 (struct sockaddr_storage *)addr, addrlen); 235 else 236 log_err("systemd sd_listen_fds(): %s", path); 237 } 238 return s; 239 } 240 #endif 241 242 int 243 create_udp_sock(int family, int socktype, struct sockaddr* addr, 244 socklen_t addrlen, int v6only, int* inuse, int* noproto, 245 int rcv, int snd, int listen, int* reuseport, int transparent, 246 int freebind, int use_systemd, int dscp) 247 { 248 int s; 249 char* err; 250 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY) 251 int on=1; 252 #endif 253 #ifdef IPV6_MTU 254 int mtu = IPV6_MIN_MTU; 255 #endif 256 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF) 257 (void)rcv; 258 #endif 259 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF) 260 (void)snd; 261 #endif 262 #ifndef IPV6_V6ONLY 263 (void)v6only; 264 #endif 265 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 266 (void)transparent; 267 #endif 268 #if !defined(IP_FREEBIND) 269 (void)freebind; 270 #endif 271 #ifdef HAVE_SYSTEMD 272 int got_fd_from_systemd = 0; 273 274 if (!use_systemd 275 || (use_systemd 276 && (s = systemd_get_activated(family, socktype, -1, addr, 277 addrlen, NULL)) == -1)) { 278 #else 279 (void)use_systemd; 280 #endif 281 if((s = socket(family, socktype, 0)) == -1) { 282 *inuse = 0; 283 #ifndef USE_WINSOCK 284 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 285 *noproto = 1; 286 return -1; 287 } 288 #else 289 if(WSAGetLastError() == WSAEAFNOSUPPORT || 290 WSAGetLastError() == WSAEPROTONOSUPPORT) { 291 *noproto = 1; 292 return -1; 293 } 294 #endif 295 log_err("can't create socket: %s", sock_strerror(errno)); 296 *noproto = 0; 297 return -1; 298 } 299 #ifdef HAVE_SYSTEMD 300 } else { 301 got_fd_from_systemd = 1; 302 } 303 #endif 304 if(listen) { 305 #ifdef SO_REUSEADDR 306 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 307 (socklen_t)sizeof(on)) < 0) { 308 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 309 sock_strerror(errno)); 310 #ifndef USE_WINSOCK 311 if(errno != ENOSYS) { 312 close(s); 313 *noproto = 0; 314 *inuse = 0; 315 return -1; 316 } 317 #else 318 closesocket(s); 319 *noproto = 0; 320 *inuse = 0; 321 return -1; 322 #endif 323 } 324 #endif /* SO_REUSEADDR */ 325 #ifdef SO_REUSEPORT 326 # ifdef SO_REUSEPORT_LB 327 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance 328 * like SO_REUSEPORT on Linux. This is what the users want 329 * with the config option in unbound.conf; if we actually 330 * need local address and port reuse they'll also need to 331 * have SO_REUSEPORT set for them, assume it was _LB they want. 332 */ 333 if (reuseport && *reuseport && 334 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on, 335 (socklen_t)sizeof(on)) < 0) { 336 #ifdef ENOPROTOOPT 337 if(errno != ENOPROTOOPT || verbosity >= 3) 338 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s", 339 strerror(errno)); 340 #endif 341 /* this option is not essential, we can continue */ 342 *reuseport = 0; 343 } 344 # else /* no SO_REUSEPORT_LB */ 345 346 /* try to set SO_REUSEPORT so that incoming 347 * queries are distributed evenly among the receiving threads. 348 * Each thread must have its own socket bound to the same port, 349 * with SO_REUSEPORT set on each socket. 350 */ 351 if (reuseport && *reuseport && 352 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 353 (socklen_t)sizeof(on)) < 0) { 354 #ifdef ENOPROTOOPT 355 if(errno != ENOPROTOOPT || verbosity >= 3) 356 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 357 strerror(errno)); 358 #endif 359 /* this option is not essential, we can continue */ 360 *reuseport = 0; 361 } 362 # endif /* SO_REUSEPORT_LB */ 363 #else 364 (void)reuseport; 365 #endif /* defined(SO_REUSEPORT) */ 366 #ifdef IP_TRANSPARENT 367 if (transparent && 368 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 369 (socklen_t)sizeof(on)) < 0) { 370 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 371 strerror(errno)); 372 } 373 #elif defined(IP_BINDANY) 374 if (transparent && 375 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 376 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 377 (void*)&on, (socklen_t)sizeof(on)) < 0) { 378 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 379 (family==AF_INET6?"V6":""), strerror(errno)); 380 } 381 #elif defined(SO_BINDANY) 382 if (transparent && 383 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, 384 (socklen_t)sizeof(on)) < 0) { 385 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 386 strerror(errno)); 387 } 388 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 389 } 390 #ifdef IP_FREEBIND 391 if(freebind && 392 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 393 (socklen_t)sizeof(on)) < 0) { 394 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 395 strerror(errno)); 396 } 397 #endif /* IP_FREEBIND */ 398 if(rcv) { 399 #ifdef SO_RCVBUF 400 int got; 401 socklen_t slen = (socklen_t)sizeof(got); 402 # ifdef SO_RCVBUFFORCE 403 /* Linux specific: try to use root permission to override 404 * system limits on rcvbuf. The limit is stored in 405 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */ 406 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 407 (socklen_t)sizeof(rcv)) < 0) { 408 if(errno != EPERM) { 409 log_err("setsockopt(..., SO_RCVBUFFORCE, " 410 "...) failed: %s", sock_strerror(errno)); 411 sock_close(s); 412 *noproto = 0; 413 *inuse = 0; 414 return -1; 415 } 416 # endif /* SO_RCVBUFFORCE */ 417 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 418 (socklen_t)sizeof(rcv)) < 0) { 419 log_err("setsockopt(..., SO_RCVBUF, " 420 "...) failed: %s", sock_strerror(errno)); 421 sock_close(s); 422 *noproto = 0; 423 *inuse = 0; 424 return -1; 425 } 426 /* check if we got the right thing or if system 427 * reduced to some system max. Warn if so */ 428 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, 429 &slen) >= 0 && got < rcv/2) { 430 log_warn("so-rcvbuf %u was not granted. " 431 "Got %u. To fix: start with " 432 "root permissions(linux) or sysctl " 433 "bigger net.core.rmem_max(linux) or " 434 "kern.ipc.maxsockbuf(bsd) values.", 435 (unsigned)rcv, (unsigned)got); 436 } 437 # ifdef SO_RCVBUFFORCE 438 } 439 # endif 440 #endif /* SO_RCVBUF */ 441 } 442 /* first do RCVBUF as the receive buffer is more important */ 443 if(snd) { 444 #ifdef SO_SNDBUF 445 int got; 446 socklen_t slen = (socklen_t)sizeof(got); 447 # ifdef SO_SNDBUFFORCE 448 /* Linux specific: try to use root permission to override 449 * system limits on sndbuf. The limit is stored in 450 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */ 451 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 452 (socklen_t)sizeof(snd)) < 0) { 453 if(errno != EPERM && errno != ENOBUFS) { 454 log_err("setsockopt(..., SO_SNDBUFFORCE, " 455 "...) failed: %s", sock_strerror(errno)); 456 sock_close(s); 457 *noproto = 0; 458 *inuse = 0; 459 return -1; 460 } 461 if(errno != EPERM) { 462 verbose(VERB_ALGO, "setsockopt(..., SO_SNDBUFFORCE, " 463 "...) was not granted: %s", sock_strerror(errno)); 464 } 465 # endif /* SO_SNDBUFFORCE */ 466 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 467 (socklen_t)sizeof(snd)) < 0) { 468 if(errno != ENOSYS && errno != ENOBUFS) { 469 log_err("setsockopt(..., SO_SNDBUF, " 470 "...) failed: %s", sock_strerror(errno)); 471 sock_close(s); 472 *noproto = 0; 473 *inuse = 0; 474 return -1; 475 } 476 log_warn("setsockopt(..., SO_SNDBUF, " 477 "...) was not granted: %s", sock_strerror(errno)); 478 } 479 /* check if we got the right thing or if system 480 * reduced to some system max. Warn if so */ 481 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, 482 &slen) >= 0 && got < snd/2) { 483 log_warn("so-sndbuf %u was not granted. " 484 "Got %u. To fix: start with " 485 "root permissions(linux) or sysctl " 486 "bigger net.core.wmem_max(linux) or " 487 "kern.ipc.maxsockbuf(bsd) values. or " 488 "set so-sndbuf: 0 (use system value).", 489 (unsigned)snd, (unsigned)got); 490 } 491 # ifdef SO_SNDBUFFORCE 492 } 493 # endif 494 #endif /* SO_SNDBUF */ 495 } 496 err = set_ip_dscp(s, family, dscp); 497 if(err != NULL) 498 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err); 499 if(family == AF_INET6) { 500 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 501 int omit6_set = 0; 502 int action; 503 # endif 504 # if defined(IPV6_V6ONLY) 505 if(v6only 506 # ifdef HAVE_SYSTEMD 507 /* Systemd wants to control if the socket is v6 only 508 * or both, with BindIPv6Only=default, ipv6-only or 509 * both in systemd.socket, so it is not set here. */ 510 && !got_fd_from_systemd 511 # endif 512 ) { 513 int val=(v6only==2)?0:1; 514 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 515 (void*)&val, (socklen_t)sizeof(val)) < 0) { 516 log_err("setsockopt(..., IPV6_V6ONLY" 517 ", ...) failed: %s", sock_strerror(errno)); 518 sock_close(s); 519 *noproto = 0; 520 *inuse = 0; 521 return -1; 522 } 523 } 524 # endif 525 # if defined(IPV6_USE_MIN_MTU) 526 /* 527 * There is no fragmentation of IPv6 datagrams 528 * during forwarding in the network. Therefore 529 * we do not send UDP datagrams larger than 530 * the minimum IPv6 MTU of 1280 octets. The 531 * EDNS0 message length can be larger if the 532 * network stack supports IPV6_USE_MIN_MTU. 533 */ 534 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU, 535 (void*)&on, (socklen_t)sizeof(on)) < 0) { 536 log_err("setsockopt(..., IPV6_USE_MIN_MTU, " 537 "...) failed: %s", sock_strerror(errno)); 538 sock_close(s); 539 *noproto = 0; 540 *inuse = 0; 541 return -1; 542 } 543 # elif defined(IPV6_MTU) 544 # ifndef USE_WINSOCK 545 /* 546 * On Linux, to send no larger than 1280, the PMTUD is 547 * disabled by default for datagrams anyway, so we set 548 * the MTU to use. 549 */ 550 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU, 551 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 552 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 553 sock_strerror(errno)); 554 sock_close(s); 555 *noproto = 0; 556 *inuse = 0; 557 return -1; 558 } 559 # elif defined(IPV6_USER_MTU) 560 /* As later versions of the mingw crosscompiler define 561 * IPV6_MTU, do the same for windows but use IPV6_USER_MTU 562 * instead which is writable; IPV6_MTU is readonly there. */ 563 if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU, 564 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 565 if (WSAGetLastError() != WSAENOPROTOOPT) { 566 log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s", 567 wsa_strerror(WSAGetLastError())); 568 sock_close(s); 569 *noproto = 0; 570 *inuse = 0; 571 return -1; 572 } 573 } 574 # endif /* USE_WINSOCK */ 575 # endif /* IPv6 MTU */ 576 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 577 # if defined(IP_PMTUDISC_OMIT) 578 action = IP_PMTUDISC_OMIT; 579 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 580 &action, (socklen_t)sizeof(action)) < 0) { 581 582 if (errno != EINVAL) { 583 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 584 strerror(errno)); 585 sock_close(s); 586 *noproto = 0; 587 *inuse = 0; 588 return -1; 589 } 590 } 591 else 592 { 593 omit6_set = 1; 594 } 595 # endif 596 if (omit6_set == 0) { 597 action = IP_PMTUDISC_DONT; 598 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 599 &action, (socklen_t)sizeof(action)) < 0) { 600 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 601 strerror(errno)); 602 sock_close(s); 603 *noproto = 0; 604 *inuse = 0; 605 return -1; 606 } 607 } 608 # endif /* IPV6_MTU_DISCOVER */ 609 } else if(family == AF_INET) { 610 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 611 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that 612 * PMTU information is not accepted, but fragmentation is allowed 613 * if and only if the packet size exceeds the outgoing interface MTU 614 * (and also uses the interface mtu to determine the size of the packets). 615 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks. 616 * FreeBSD already has same semantics without setting the option. */ 617 int omit_set = 0; 618 int action; 619 # if defined(IP_PMTUDISC_OMIT) 620 action = IP_PMTUDISC_OMIT; 621 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 622 &action, (socklen_t)sizeof(action)) < 0) { 623 624 if (errno != EINVAL) { 625 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 626 strerror(errno)); 627 sock_close(s); 628 *noproto = 0; 629 *inuse = 0; 630 return -1; 631 } 632 } 633 else 634 { 635 omit_set = 1; 636 } 637 # endif 638 if (omit_set == 0) { 639 action = IP_PMTUDISC_DONT; 640 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 641 &action, (socklen_t)sizeof(action)) < 0) { 642 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 643 strerror(errno)); 644 sock_close(s); 645 *noproto = 0; 646 *inuse = 0; 647 return -1; 648 } 649 } 650 # elif defined(IP_DONTFRAG) && !defined(__APPLE__) 651 /* the IP_DONTFRAG option if defined in the 11.0 OSX headers, 652 * but does not work on that version, so we exclude it */ 653 /* a nonzero value disables fragmentation, according to 654 * docs.oracle.com for ip(4). */ 655 int off = 1; 656 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, 657 &off, (socklen_t)sizeof(off)) < 0) { 658 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s", 659 strerror(errno)); 660 sock_close(s); 661 *noproto = 0; 662 *inuse = 0; 663 return -1; 664 } 665 # endif /* IPv4 MTU */ 666 } 667 if( 668 #ifdef HAVE_SYSTEMD 669 !got_fd_from_systemd && 670 #endif 671 bind(s, (struct sockaddr*)addr, addrlen) != 0) { 672 *noproto = 0; 673 *inuse = 0; 674 #ifndef USE_WINSOCK 675 #ifdef EADDRINUSE 676 *inuse = (errno == EADDRINUSE); 677 /* detect freebsd jail with no ipv6 permission */ 678 if(family==AF_INET6 && errno==EINVAL) 679 *noproto = 1; 680 else if(errno != EADDRINUSE && 681 !(errno == EACCES && verbosity < 4 && !listen) 682 #ifdef EADDRNOTAVAIL 683 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen) 684 #endif 685 ) { 686 log_err_addr("can't bind socket", strerror(errno), 687 (struct sockaddr_storage*)addr, addrlen); 688 } 689 #endif /* EADDRINUSE */ 690 #else /* USE_WINSOCK */ 691 if(WSAGetLastError() != WSAEADDRINUSE && 692 WSAGetLastError() != WSAEADDRNOTAVAIL && 693 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) { 694 log_err_addr("can't bind socket", 695 wsa_strerror(WSAGetLastError()), 696 (struct sockaddr_storage*)addr, addrlen); 697 } 698 #endif /* USE_WINSOCK */ 699 sock_close(s); 700 return -1; 701 } 702 if(!fd_set_nonblock(s)) { 703 *noproto = 0; 704 *inuse = 0; 705 sock_close(s); 706 return -1; 707 } 708 return s; 709 } 710 711 int 712 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, 713 int* reuseport, int transparent, int mss, int nodelay, int freebind, 714 int use_systemd, int dscp, const char* additional) 715 { 716 int s = -1; 717 char* err; 718 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) \ 719 || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) \ 720 || defined(IP_BINDANY) || defined(IP_FREEBIND) \ 721 || defined(SO_BINDANY) || defined(TCP_NODELAY) 722 int on = 1; 723 #endif 724 #ifdef HAVE_SYSTEMD 725 int got_fd_from_systemd = 0; 726 #endif 727 #ifdef USE_TCP_FASTOPEN 728 int qlen; 729 #endif 730 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 731 (void)transparent; 732 #endif 733 #if !defined(IP_FREEBIND) 734 (void)freebind; 735 #endif 736 verbose_print_addr(addr, additional); 737 *noproto = 0; 738 #ifdef HAVE_SYSTEMD 739 if (!use_systemd || 740 (use_systemd 741 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1, 742 addr->ai_addr, addr->ai_addrlen, 743 NULL)) == -1)) { 744 #else 745 (void)use_systemd; 746 #endif 747 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) { 748 #ifndef USE_WINSOCK 749 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 750 *noproto = 1; 751 return -1; 752 } 753 #else 754 if(WSAGetLastError() == WSAEAFNOSUPPORT || 755 WSAGetLastError() == WSAEPROTONOSUPPORT) { 756 *noproto = 1; 757 return -1; 758 } 759 #endif 760 log_err("can't create socket: %s", sock_strerror(errno)); 761 return -1; 762 } 763 if(nodelay) { 764 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY) 765 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on, 766 (socklen_t)sizeof(on)) < 0) { 767 #ifndef USE_WINSOCK 768 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 769 strerror(errno)); 770 #else 771 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 772 wsa_strerror(WSAGetLastError())); 773 #endif 774 } 775 #else 776 log_warn(" setsockopt(TCP_NODELAY) unsupported"); 777 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */ 778 } 779 if (mss > 0) { 780 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 781 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss, 782 (socklen_t)sizeof(mss)) < 0) { 783 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s", 784 sock_strerror(errno)); 785 } else { 786 verbose(VERB_ALGO, 787 " tcp socket mss set to %d", mss); 788 } 789 #else 790 log_warn(" setsockopt(TCP_MAXSEG) unsupported"); 791 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */ 792 } 793 #ifdef HAVE_SYSTEMD 794 } else { 795 got_fd_from_systemd = 1; 796 } 797 #endif 798 #ifdef SO_REUSEADDR 799 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 800 (socklen_t)sizeof(on)) < 0) { 801 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 802 sock_strerror(errno)); 803 sock_close(s); 804 return -1; 805 } 806 #endif /* SO_REUSEADDR */ 807 #ifdef IP_FREEBIND 808 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 809 (socklen_t)sizeof(on)) < 0) { 810 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 811 strerror(errno)); 812 } 813 #endif /* IP_FREEBIND */ 814 #ifdef SO_REUSEPORT 815 /* try to set SO_REUSEPORT so that incoming 816 * connections are distributed evenly among the receiving threads. 817 * Each thread must have its own socket bound to the same port, 818 * with SO_REUSEPORT set on each socket. 819 */ 820 if (reuseport && *reuseport && 821 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 822 (socklen_t)sizeof(on)) < 0) { 823 #ifdef ENOPROTOOPT 824 if(errno != ENOPROTOOPT || verbosity >= 3) 825 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 826 strerror(errno)); 827 #endif 828 /* this option is not essential, we can continue */ 829 *reuseport = 0; 830 } 831 #else 832 (void)reuseport; 833 #endif /* defined(SO_REUSEPORT) */ 834 #if defined(IPV6_V6ONLY) 835 if(addr->ai_family == AF_INET6 && v6only 836 # ifdef HAVE_SYSTEMD 837 /* Systemd wants to control if the socket is v6 only 838 * or both, with BindIPv6Only=default, ipv6-only or 839 * both in systemd.socket, so it is not set here. */ 840 && !got_fd_from_systemd 841 # endif 842 ) { 843 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 844 (void*)&on, (socklen_t)sizeof(on)) < 0) { 845 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s", 846 sock_strerror(errno)); 847 sock_close(s); 848 return -1; 849 } 850 } 851 #else 852 (void)v6only; 853 #endif /* IPV6_V6ONLY */ 854 #ifdef IP_TRANSPARENT 855 if (transparent && 856 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 857 (socklen_t)sizeof(on)) < 0) { 858 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 859 strerror(errno)); 860 } 861 #elif defined(IP_BINDANY) 862 if (transparent && 863 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 864 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 865 (void*)&on, (socklen_t)sizeof(on)) < 0) { 866 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 867 (addr->ai_family==AF_INET6?"V6":""), strerror(errno)); 868 } 869 #elif defined(SO_BINDANY) 870 if (transparent && 871 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t) 872 sizeof(on)) < 0) { 873 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 874 strerror(errno)); 875 } 876 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 877 err = set_ip_dscp(s, addr->ai_family, dscp); 878 if(err != NULL) 879 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err); 880 if( 881 #ifdef HAVE_SYSTEMD 882 !got_fd_from_systemd && 883 #endif 884 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) { 885 #ifndef USE_WINSOCK 886 /* detect freebsd jail with no ipv6 permission */ 887 if(addr->ai_family==AF_INET6 && errno==EINVAL) 888 *noproto = 1; 889 else { 890 log_err_addr("can't bind socket", strerror(errno), 891 (struct sockaddr_storage*)addr->ai_addr, 892 addr->ai_addrlen); 893 } 894 #else 895 log_err_addr("can't bind socket", 896 wsa_strerror(WSAGetLastError()), 897 (struct sockaddr_storage*)addr->ai_addr, 898 addr->ai_addrlen); 899 #endif 900 sock_close(s); 901 return -1; 902 } 903 if(!fd_set_nonblock(s)) { 904 sock_close(s); 905 return -1; 906 } 907 if(listen(s, TCP_BACKLOG) == -1) { 908 log_err("can't listen: %s", sock_strerror(errno)); 909 sock_close(s); 910 return -1; 911 } 912 #ifdef USE_TCP_FASTOPEN 913 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense 914 against IP spoofing attacks as suggested in RFC7413 */ 915 #ifdef __APPLE__ 916 /* OS X implementation only supports qlen of 1 via this call. Actual 917 value is configured by the net.inet.tcp.fastopen_backlog kernel param. */ 918 qlen = 1; 919 #else 920 /* 5 is recommended on linux */ 921 qlen = 5; 922 #endif 923 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, 924 sizeof(qlen))) == -1 ) { 925 #ifdef ENOPROTOOPT 926 /* squelch ENOPROTOOPT: freebsd server mode with kernel support 927 disabled, except when verbosity enabled for debugging */ 928 if(errno != ENOPROTOOPT || verbosity >= 3) { 929 #endif 930 if(errno == EPERM) { 931 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno)); 932 } else { 933 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno)); 934 } 935 #ifdef ENOPROTOOPT 936 } 937 #endif 938 } 939 #endif 940 return s; 941 } 942 943 char* 944 set_ip_dscp(int socket, int addrfamily, int dscp) 945 { 946 int ds; 947 948 if(dscp == 0) 949 return NULL; 950 ds = dscp << 2; 951 switch(addrfamily) { 952 case AF_INET6: 953 #ifdef IPV6_TCLASS 954 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, 955 sizeof(ds)) < 0) 956 return sock_strerror(errno); 957 break; 958 #else 959 return "IPV6_TCLASS not defined on this system"; 960 #endif 961 default: 962 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0) 963 return sock_strerror(errno); 964 break; 965 } 966 return NULL; 967 } 968 969 int 970 create_local_accept_sock(const char *path, int* noproto, int use_systemd) 971 { 972 #ifdef HAVE_SYSTEMD 973 int ret; 974 975 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1) 976 return ret; 977 else { 978 #endif 979 #ifdef HAVE_SYS_UN_H 980 int s; 981 struct sockaddr_un usock; 982 #ifndef HAVE_SYSTEMD 983 (void)use_systemd; 984 #endif 985 986 verbose(VERB_ALGO, "creating unix socket %s", path); 987 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN 988 /* this member exists on BSDs, not Linux */ 989 usock.sun_len = (unsigned)sizeof(usock); 990 #endif 991 usock.sun_family = AF_LOCAL; 992 /* length is 92-108, 104 on FreeBSD */ 993 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path)); 994 995 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) { 996 log_err("Cannot create local socket %s (%s)", 997 path, strerror(errno)); 998 return -1; 999 } 1000 1001 if (unlink(path) && errno != ENOENT) { 1002 /* The socket already exists and cannot be removed */ 1003 log_err("Cannot remove old local socket %s (%s)", 1004 path, strerror(errno)); 1005 goto err; 1006 } 1007 1008 if (bind(s, (struct sockaddr *)&usock, 1009 (socklen_t)sizeof(struct sockaddr_un)) == -1) { 1010 log_err("Cannot bind local socket %s (%s)", 1011 path, strerror(errno)); 1012 goto err; 1013 } 1014 1015 if (!fd_set_nonblock(s)) { 1016 log_err("Cannot set non-blocking mode"); 1017 goto err; 1018 } 1019 1020 if (listen(s, TCP_BACKLOG) == -1) { 1021 log_err("can't listen: %s", strerror(errno)); 1022 goto err; 1023 } 1024 1025 (void)noproto; /*unused*/ 1026 return s; 1027 1028 err: 1029 sock_close(s); 1030 return -1; 1031 1032 #ifdef HAVE_SYSTEMD 1033 } 1034 #endif 1035 #else 1036 (void)use_systemd; 1037 (void)path; 1038 log_err("Local sockets are not supported"); 1039 *noproto = 1; 1040 return -1; 1041 #endif 1042 } 1043 1044 1045 /** 1046 * Create socket from getaddrinfo results 1047 */ 1048 static int 1049 make_sock(int stype, const char* ifname, int port, 1050 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1051 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1052 int use_systemd, int dscp, struct unbound_socket* ub_sock, 1053 const char* additional) 1054 { 1055 struct addrinfo *res = NULL; 1056 int r, s, inuse, noproto; 1057 char portbuf[32]; 1058 snprintf(portbuf, sizeof(portbuf), "%d", port); 1059 hints->ai_socktype = stype; 1060 *noip6 = 0; 1061 if((r=getaddrinfo(ifname, portbuf, hints, &res)) != 0 || !res) { 1062 #ifdef USE_WINSOCK 1063 if(r == EAI_NONAME && hints->ai_family == AF_INET6){ 1064 *noip6 = 1; /* 'Host not found' for IP6 on winXP */ 1065 return -1; 1066 } 1067 #endif 1068 log_err("node %s:%s getaddrinfo: %s %s", 1069 ifname?ifname:"default", portbuf, gai_strerror(r), 1070 #ifdef EAI_SYSTEM 1071 (r==EAI_SYSTEM?(char*)strerror(errno):"") 1072 #else 1073 "" 1074 #endif 1075 ); 1076 return -1; 1077 } 1078 if(stype == SOCK_DGRAM) { 1079 verbose_print_addr(res, additional); 1080 s = create_udp_sock(res->ai_family, res->ai_socktype, 1081 (struct sockaddr*)res->ai_addr, res->ai_addrlen, 1082 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1, 1083 reuseport, transparent, freebind, use_systemd, dscp); 1084 if(s == -1 && inuse) { 1085 log_err("bind: address already in use"); 1086 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1087 *noip6 = 1; 1088 } 1089 } else { 1090 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport, 1091 transparent, tcp_mss, nodelay, freebind, use_systemd, 1092 dscp, additional); 1093 if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1094 *noip6 = 1; 1095 } 1096 } 1097 1098 if(!res->ai_addr) { 1099 log_err("getaddrinfo returned no address"); 1100 freeaddrinfo(res); 1101 sock_close(s); 1102 return -1; 1103 } 1104 ub_sock->addr = memdup(res->ai_addr, res->ai_addrlen); 1105 ub_sock->addrlen = res->ai_addrlen; 1106 if(!ub_sock->addr) { 1107 log_err("out of memory: allocate listening address"); 1108 freeaddrinfo(res); 1109 sock_close(s); 1110 return -1; 1111 } 1112 freeaddrinfo(res); 1113 1114 ub_sock->s = s; 1115 ub_sock->fam = hints->ai_family; 1116 ub_sock->acl = NULL; 1117 1118 return s; 1119 } 1120 1121 /** make socket and first see if ifname contains port override info */ 1122 static int 1123 make_sock_port(int stype, const char* ifname, int port, 1124 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1125 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1126 int use_systemd, int dscp, struct unbound_socket* ub_sock, 1127 const char* additional) 1128 { 1129 char* s = strchr(ifname, '@'); 1130 if(s) { 1131 /* override port with ifspec@port */ 1132 int port; 1133 char newif[128]; 1134 if((size_t)(s-ifname) >= sizeof(newif)) { 1135 log_err("ifname too long: %s", ifname); 1136 *noip6 = 0; 1137 return -1; 1138 } 1139 port = atoi(s+1); 1140 if(port < 0 || 0 == port || port > 65535) { 1141 log_err("invalid portnumber in interface: %s", ifname); 1142 *noip6 = 0; 1143 return -1; 1144 } 1145 (void)strlcpy(newif, ifname, sizeof(newif)); 1146 newif[s-ifname] = 0; 1147 return make_sock(stype, newif, port, hints, v6only, noip6, rcv, 1148 snd, reuseport, transparent, tcp_mss, nodelay, freebind, 1149 use_systemd, dscp, ub_sock, additional); 1150 } 1151 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, 1152 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, 1153 dscp, ub_sock, additional); 1154 } 1155 1156 /** 1157 * Add port to open ports list. 1158 * @param list: list head. changed. 1159 * @param s: fd. 1160 * @param ftype: if fd is UDP. 1161 * @param pp2_enabled: if PROXYv2 is enabled for this port. 1162 * @param ub_sock: socket with address. 1163 * @return false on failure. list in unchanged then. 1164 */ 1165 static int 1166 port_insert(struct listen_port** list, int s, enum listen_type ftype, 1167 int pp2_enabled, struct unbound_socket* ub_sock) 1168 { 1169 struct listen_port* item = (struct listen_port*)malloc( 1170 sizeof(struct listen_port)); 1171 if(!item) 1172 return 0; 1173 item->next = *list; 1174 item->fd = s; 1175 item->ftype = ftype; 1176 item->pp2_enabled = pp2_enabled; 1177 item->socket = ub_sock; 1178 *list = item; 1179 return 1; 1180 } 1181 1182 /** set fd to receive software timestamps */ 1183 static int 1184 set_recvtimestamp(int s) 1185 { 1186 #ifdef HAVE_LINUX_NET_TSTAMP_H 1187 int opt = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; 1188 if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMPNS, (void*)&opt, (socklen_t)sizeof(opt)) < 0) { 1189 log_err("setsockopt(..., SO_TIMESTAMPNS, ...) failed: %s", 1190 strerror(errno)); 1191 return 0; 1192 } 1193 return 1; 1194 #elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP) 1195 int on = 1; 1196 /* FreeBSD and also Linux. */ 1197 if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMP, (void*)&on, (socklen_t)sizeof(on)) < 0) { 1198 log_err("setsockopt(..., SO_TIMESTAMP, ...) failed: %s", 1199 strerror(errno)); 1200 return 0; 1201 } 1202 return 1; 1203 #else 1204 log_err("packets timestamping is not supported on this platform"); 1205 (void)s; 1206 return 0; 1207 #endif 1208 } 1209 1210 /** set fd to receive source address packet info */ 1211 static int 1212 set_recvpktinfo(int s, int family) 1213 { 1214 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO) 1215 int on = 1; 1216 #else 1217 (void)s; 1218 #endif 1219 if(family == AF_INET6) { 1220 # ifdef IPV6_RECVPKTINFO 1221 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1222 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1223 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s", 1224 strerror(errno)); 1225 return 0; 1226 } 1227 # elif defined(IPV6_PKTINFO) 1228 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO, 1229 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1230 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s", 1231 strerror(errno)); 1232 return 0; 1233 } 1234 # else 1235 log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please " 1236 "disable interface-automatic or do-ip6 in config"); 1237 return 0; 1238 # endif /* defined IPV6_RECVPKTINFO */ 1239 1240 } else if(family == AF_INET) { 1241 # ifdef IP_PKTINFO 1242 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO, 1243 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1244 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s", 1245 strerror(errno)); 1246 return 0; 1247 } 1248 # elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR) 1249 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR, 1250 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1251 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s", 1252 strerror(errno)); 1253 return 0; 1254 } 1255 # else 1256 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable " 1257 "interface-automatic or do-ip4 in config"); 1258 return 0; 1259 # endif /* IP_PKTINFO */ 1260 1261 } 1262 return 1; 1263 } 1264 1265 /** 1266 * Helper for ports_open. Creates one interface (or NULL for default). 1267 * @param ifname: The interface ip address. 1268 * @param do_auto: use automatic interface detection. 1269 * If enabled, then ifname must be the wildcard name. 1270 * @param do_udp: if udp should be used. 1271 * @param do_tcp: if tcp should be used. 1272 * @param hints: for getaddrinfo. family and flags have to be set by caller. 1273 * @param port: Port number to use. 1274 * @param list: list of open ports, appended to, changed to point to list head. 1275 * @param rcv: receive buffer size for UDP 1276 * @param snd: send buffer size for UDP 1277 * @param ssl_port: ssl service port number 1278 * @param tls_additional_port: list of additional ssl service port numbers. 1279 * @param https_port: DoH service port number 1280 * @param proxy_protocol_port: list of PROXYv2 port numbers. 1281 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true. 1282 * set to false on exit if reuseport failed due to no kernel support. 1283 * @param transparent: set IP_TRANSPARENT socket option. 1284 * @param tcp_mss: maximum segment size of tcp socket. default if zero. 1285 * @param freebind: set IP_FREEBIND socket option. 1286 * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection 1287 * @param use_systemd: if true, fetch sockets from systemd. 1288 * @param dnscrypt_port: dnscrypt service port number 1289 * @param dscp: DSCP to use. 1290 * @param quic_port: dns over quic port number. 1291 * @param http_notls_downstream: if no tls is used for https downstream. 1292 * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to 1293 * wait to discard if UDP packets have waited for long in the socket 1294 * buffer. 1295 * @return: returns false on error. 1296 */ 1297 static int 1298 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, 1299 struct addrinfo *hints, int port, struct listen_port** list, 1300 size_t rcv, size_t snd, int ssl_port, 1301 struct config_strlist* tls_additional_port, int https_port, 1302 struct config_strlist* proxy_protocol_port, 1303 int* reuseport, int transparent, int tcp_mss, int freebind, 1304 int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp, 1305 int quic_port, int http_notls_downstream, int sock_queue_timeout) 1306 { 1307 int s, noip6=0; 1308 int is_ssl = if_is_ssl(ifname, port, ssl_port, tls_additional_port); 1309 int is_https = if_is_https(ifname, port, https_port); 1310 int is_dnscrypt = if_is_dnscrypt(ifname, port, dnscrypt_port); 1311 int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port); 1312 int is_doq = if_is_quic(ifname, port, quic_port); 1313 /* Always set TCP_NODELAY on TLS connection as it speeds up the TLS 1314 * handshake. DoH had already such option so we respect it. 1315 * Otherwise the server waits before sending more handshake data for 1316 * the client ACK (Nagle's algorithm), which is delayed because the 1317 * client waits for more data before ACKing (delayed ACK). */ 1318 int nodelay = is_https?http2_nodelay:is_ssl; 1319 struct unbound_socket* ub_sock; 1320 const char* add = NULL; 1321 1322 if(!do_udp && !do_tcp) 1323 return 0; 1324 1325 if(is_pp2) { 1326 if(is_dnscrypt) { 1327 fatal_exit("PROXYv2 and DNSCrypt combination not " 1328 "supported!"); 1329 } else if(is_https) { 1330 fatal_exit("PROXYv2 and DoH combination not " 1331 "supported!"); 1332 } else if(is_doq) { 1333 fatal_exit("PROXYv2 and DoQ combination not " 1334 "supported!"); 1335 } 1336 } 1337 1338 /* Check if both UDP and TCP ports should be open. 1339 * In the case of encrypted channels, probably an unencrypted channel 1340 * at the same port is not desired. */ 1341 if((is_ssl || is_https) && !is_doq) do_udp = do_auto = 0; 1342 if((is_doq) && !(is_https || is_ssl)) do_tcp = 0; 1343 1344 if(do_auto) { 1345 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1346 if(!ub_sock) 1347 return 0; 1348 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1349 &noip6, rcv, snd, reuseport, transparent, 1350 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock, 1351 (is_dnscrypt?"udpancil_dnscrypt":"udpancil"))) == -1) { 1352 free(ub_sock->addr); 1353 free(ub_sock); 1354 if(noip6) { 1355 log_warn("IPv6 protocol not available"); 1356 return 1; 1357 } 1358 return 0; 1359 } 1360 /* getting source addr packet info is highly non-portable */ 1361 if(!set_recvpktinfo(s, hints->ai_family)) { 1362 sock_close(s); 1363 free(ub_sock->addr); 1364 free(ub_sock); 1365 return 0; 1366 } 1367 if (sock_queue_timeout && !set_recvtimestamp(s)) { 1368 log_warn("socket timestamping is not available"); 1369 } 1370 if(!port_insert(list, s, is_dnscrypt 1371 ?listen_type_udpancil_dnscrypt:listen_type_udpancil, 1372 is_pp2, ub_sock)) { 1373 sock_close(s); 1374 free(ub_sock->addr); 1375 free(ub_sock); 1376 return 0; 1377 } 1378 } else if(do_udp) { 1379 enum listen_type udp_port_type; 1380 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1381 if(!ub_sock) 1382 return 0; 1383 if(is_dnscrypt) { 1384 udp_port_type = listen_type_udp_dnscrypt; 1385 add = "dnscrypt"; 1386 } else if(is_doq) { 1387 udp_port_type = listen_type_doq; 1388 add = "doq"; 1389 if(if_listens_on(ifname, port, 53, NULL)) { 1390 log_err("DNS over QUIC is strictly not " 1391 "allowed on port 53 as per RFC 9250. " 1392 "Port 53 is for DNS datagrams. Error " 1393 "for interface '%s'.", ifname); 1394 free(ub_sock->addr); 1395 free(ub_sock); 1396 return 0; 1397 } 1398 } else { 1399 udp_port_type = listen_type_udp; 1400 add = NULL; 1401 } 1402 /* regular udp socket */ 1403 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1404 &noip6, rcv, snd, reuseport, transparent, 1405 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock, 1406 add)) == -1) { 1407 free(ub_sock->addr); 1408 free(ub_sock); 1409 if(noip6) { 1410 log_warn("IPv6 protocol not available"); 1411 return 1; 1412 } 1413 return 0; 1414 } 1415 if(udp_port_type == listen_type_doq) { 1416 if(!set_recvpktinfo(s, hints->ai_family)) { 1417 sock_close(s); 1418 free(ub_sock->addr); 1419 free(ub_sock); 1420 return 0; 1421 } 1422 } 1423 if(udp_port_type == listen_type_udp && sock_queue_timeout) 1424 udp_port_type = listen_type_udpancil; 1425 if (sock_queue_timeout) { 1426 if(!set_recvtimestamp(s)) { 1427 log_warn("socket timestamping is not available"); 1428 } else { 1429 if(udp_port_type == listen_type_udp) 1430 udp_port_type = listen_type_udpancil; 1431 } 1432 } 1433 if(!port_insert(list, s, udp_port_type, is_pp2, ub_sock)) { 1434 sock_close(s); 1435 free(ub_sock->addr); 1436 free(ub_sock); 1437 return 0; 1438 } 1439 } 1440 if(do_tcp) { 1441 enum listen_type port_type; 1442 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1443 if(!ub_sock) 1444 return 0; 1445 if(is_ssl) { 1446 port_type = listen_type_ssl; 1447 add = "tls"; 1448 } else if(is_https) { 1449 port_type = listen_type_http; 1450 add = "https"; 1451 if(http_notls_downstream) 1452 add = "http"; 1453 } else if(is_dnscrypt) { 1454 port_type = listen_type_tcp_dnscrypt; 1455 add = "dnscrypt"; 1456 } else { 1457 port_type = listen_type_tcp; 1458 add = NULL; 1459 } 1460 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, 1461 &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay, 1462 freebind, use_systemd, dscp, ub_sock, add)) == -1) { 1463 free(ub_sock->addr); 1464 free(ub_sock); 1465 if(noip6) { 1466 /*log_warn("IPv6 protocol not available");*/ 1467 return 1; 1468 } 1469 return 0; 1470 } 1471 if(is_ssl) 1472 verbose(VERB_ALGO, "setup TCP for SSL service"); 1473 if(!port_insert(list, s, port_type, is_pp2, ub_sock)) { 1474 sock_close(s); 1475 free(ub_sock->addr); 1476 free(ub_sock); 1477 return 0; 1478 } 1479 } 1480 return 1; 1481 } 1482 1483 /** 1484 * Add items to commpoint list in front. 1485 * @param c: commpoint to add. 1486 * @param front: listen struct. 1487 * @return: false on failure. 1488 */ 1489 static int 1490 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front) 1491 { 1492 struct listen_list* item = (struct listen_list*)malloc( 1493 sizeof(struct listen_list)); 1494 if(!item) 1495 return 0; 1496 item->com = c; 1497 item->next = front->cps; 1498 front->cps = item; 1499 return 1; 1500 } 1501 1502 void listen_setup_locks(void) 1503 { 1504 if(!stream_wait_lock_inited) { 1505 lock_basic_init(&stream_wait_count_lock); 1506 stream_wait_lock_inited = 1; 1507 } 1508 if(!http2_query_buffer_lock_inited) { 1509 lock_basic_init(&http2_query_buffer_count_lock); 1510 http2_query_buffer_lock_inited = 1; 1511 } 1512 if(!http2_response_buffer_lock_inited) { 1513 lock_basic_init(&http2_response_buffer_count_lock); 1514 http2_response_buffer_lock_inited = 1; 1515 } 1516 } 1517 1518 void listen_desetup_locks(void) 1519 { 1520 if(stream_wait_lock_inited) { 1521 stream_wait_lock_inited = 0; 1522 lock_basic_destroy(&stream_wait_count_lock); 1523 } 1524 if(http2_query_buffer_lock_inited) { 1525 http2_query_buffer_lock_inited = 0; 1526 lock_basic_destroy(&http2_query_buffer_count_lock); 1527 } 1528 if(http2_response_buffer_lock_inited) { 1529 http2_response_buffer_lock_inited = 0; 1530 lock_basic_destroy(&http2_response_buffer_count_lock); 1531 } 1532 } 1533 1534 struct listen_dnsport* 1535 listen_create(struct comm_base* base, struct listen_port* ports, 1536 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout, 1537 int harden_large_queries, uint32_t http_max_streams, 1538 char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit, 1539 void* dot_sslctx, void* doh_sslctx, void* quic_sslctx, 1540 struct dt_env* dtenv, 1541 struct doq_table* doq_table, 1542 struct ub_randstate* rnd,struct config_file* cfg, 1543 comm_point_callback_type* cb, void *cb_arg) 1544 { 1545 struct listen_dnsport* front = (struct listen_dnsport*) 1546 malloc(sizeof(struct listen_dnsport)); 1547 if(!front) 1548 return NULL; 1549 front->cps = NULL; 1550 front->udp_buff = sldns_buffer_new(bufsize); 1551 #ifdef USE_DNSCRYPT 1552 front->dnscrypt_udp_buff = NULL; 1553 #endif 1554 if(!front->udp_buff) { 1555 free(front); 1556 return NULL; 1557 } 1558 1559 /* create comm points as needed */ 1560 while(ports) { 1561 struct comm_point* cp = NULL; 1562 if(ports->ftype == listen_type_udp || 1563 ports->ftype == listen_type_udp_dnscrypt) { 1564 cp = comm_point_create_udp(base, ports->fd, 1565 front->udp_buff, ports->pp2_enabled, cb, 1566 cb_arg, ports->socket); 1567 } else if(ports->ftype == listen_type_doq && doq_table) { 1568 #ifndef HAVE_NGTCP2 1569 log_warn("Unbound is not compiled with " 1570 "ngtcp2. This is required to use DNS " 1571 "over QUIC."); 1572 #endif 1573 cp = comm_point_create_doq(base, ports->fd, 1574 front->udp_buff, cb, cb_arg, ports->socket, 1575 doq_table, rnd, quic_sslctx, cfg); 1576 } else if(ports->ftype == listen_type_tcp || 1577 ports->ftype == listen_type_tcp_dnscrypt) { 1578 cp = comm_point_create_tcp(base, ports->fd, 1579 tcp_accept_count, tcp_idle_timeout, 1580 harden_large_queries, 0, NULL, 1581 tcp_conn_limit, bufsize, front->udp_buff, 1582 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1583 ports->socket); 1584 } else if(ports->ftype == listen_type_ssl || 1585 ports->ftype == listen_type_http) { 1586 cp = comm_point_create_tcp(base, ports->fd, 1587 tcp_accept_count, tcp_idle_timeout, 1588 harden_large_queries, 1589 http_max_streams, http_endpoint, 1590 tcp_conn_limit, bufsize, front->udp_buff, 1591 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1592 ports->socket); 1593 if(ports->ftype == listen_type_http) { 1594 if(!doh_sslctx && !http_notls) { 1595 log_warn("HTTPS port configured, but " 1596 "no TLS tls-service-key or " 1597 "tls-service-pem set"); 1598 } 1599 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB 1600 if(!http_notls) { 1601 log_warn("Unbound is not compiled " 1602 "with an OpenSSL version " 1603 "supporting ALPN " 1604 "(OpenSSL >= 1.0.2). This " 1605 "is required to use " 1606 "DNS-over-HTTPS"); 1607 } 1608 #endif 1609 #ifndef HAVE_NGHTTP2_NGHTTP2_H 1610 log_warn("Unbound is not compiled with " 1611 "nghttp2. This is required to use " 1612 "DNS-over-HTTPS."); 1613 #endif 1614 } 1615 } else if(ports->ftype == listen_type_udpancil || 1616 ports->ftype == listen_type_udpancil_dnscrypt) { 1617 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 1618 cp = comm_point_create_udp_ancil(base, ports->fd, 1619 front->udp_buff, ports->pp2_enabled, cb, 1620 cb_arg, ports->socket); 1621 #else 1622 log_warn("This system does not support UDP ancillary data."); 1623 #endif 1624 } 1625 if(!cp) { 1626 log_err("can't create commpoint"); 1627 listen_delete(front); 1628 return NULL; 1629 } 1630 if((http_notls && ports->ftype == listen_type_http) || 1631 (ports->ftype == listen_type_tcp) || 1632 (ports->ftype == listen_type_udp) || 1633 (ports->ftype == listen_type_udpancil) || 1634 (ports->ftype == listen_type_tcp_dnscrypt) || 1635 (ports->ftype == listen_type_udp_dnscrypt) || 1636 (ports->ftype == listen_type_udpancil_dnscrypt)) { 1637 cp->ssl = NULL; 1638 } else if(ports->ftype == listen_type_doq) { 1639 cp->ssl = quic_sslctx; 1640 } else if(ports->ftype == listen_type_http) { 1641 cp->ssl = doh_sslctx; 1642 } else { 1643 cp->ssl = dot_sslctx; 1644 } 1645 cp->dtenv = dtenv; 1646 cp->do_not_close = 1; 1647 #ifdef USE_DNSCRYPT 1648 if (ports->ftype == listen_type_udp_dnscrypt || 1649 ports->ftype == listen_type_tcp_dnscrypt || 1650 ports->ftype == listen_type_udpancil_dnscrypt) { 1651 cp->dnscrypt = 1; 1652 cp->dnscrypt_buffer = sldns_buffer_new(bufsize); 1653 if(!cp->dnscrypt_buffer) { 1654 log_err("can't alloc dnscrypt_buffer"); 1655 comm_point_delete(cp); 1656 listen_delete(front); 1657 return NULL; 1658 } 1659 front->dnscrypt_udp_buff = cp->dnscrypt_buffer; 1660 } 1661 #endif 1662 if(!listen_cp_insert(cp, front)) { 1663 log_err("malloc failed"); 1664 comm_point_delete(cp); 1665 listen_delete(front); 1666 return NULL; 1667 } 1668 ports = ports->next; 1669 } 1670 if(!front->cps) { 1671 log_err("Could not open sockets to accept queries."); 1672 listen_delete(front); 1673 return NULL; 1674 } 1675 1676 return front; 1677 } 1678 1679 void 1680 listen_list_delete(struct listen_list* list) 1681 { 1682 struct listen_list *p = list, *pn; 1683 while(p) { 1684 pn = p->next; 1685 comm_point_delete(p->com); 1686 free(p); 1687 p = pn; 1688 } 1689 } 1690 1691 void 1692 listen_delete(struct listen_dnsport* front) 1693 { 1694 if(!front) 1695 return; 1696 listen_list_delete(front->cps); 1697 #ifdef USE_DNSCRYPT 1698 if(front->dnscrypt_udp_buff && 1699 front->udp_buff != front->dnscrypt_udp_buff) { 1700 sldns_buffer_free(front->dnscrypt_udp_buff); 1701 } 1702 #endif 1703 sldns_buffer_free(front->udp_buff); 1704 free(front); 1705 } 1706 1707 #ifdef HAVE_GETIFADDRS 1708 static int 1709 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size) 1710 { 1711 struct ifaddrs *ifa; 1712 void *tmpbuf; 1713 int last_ip_addresses_size = *ip_addresses_size; 1714 1715 for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) { 1716 sa_family_t family; 1717 const char* atsign; 1718 #ifdef INET6 /* | address ip | % | ifa name | @ | port | nul */ 1719 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1]; 1720 #else 1721 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1]; 1722 #endif 1723 1724 if((atsign=strrchr(search_ifa, '@')) != NULL) { 1725 if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa) 1726 || strncmp(ifa->ifa_name, search_ifa, 1727 atsign-search_ifa) != 0) 1728 continue; 1729 } else { 1730 if(strcmp(ifa->ifa_name, search_ifa) != 0) 1731 continue; 1732 atsign = ""; 1733 } 1734 1735 if(ifa->ifa_addr == NULL) 1736 continue; 1737 1738 family = ifa->ifa_addr->sa_family; 1739 if(family == AF_INET) { 1740 char a4[INET_ADDRSTRLEN + 1]; 1741 struct sockaddr_in *in4 = (struct sockaddr_in *) 1742 ifa->ifa_addr; 1743 if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) { 1744 log_err("inet_ntop failed"); 1745 return 0; 1746 } 1747 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1748 a4, atsign); 1749 } 1750 #ifdef INET6 1751 else if(family == AF_INET6) { 1752 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) 1753 ifa->ifa_addr; 1754 char a6[INET6_ADDRSTRLEN + 1]; 1755 char if_index_name[IF_NAMESIZE + 1]; 1756 if_index_name[0] = 0; 1757 if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) { 1758 log_err("inet_ntop failed"); 1759 return 0; 1760 } 1761 (void)if_indextoname(in6->sin6_scope_id, 1762 (char *)if_index_name); 1763 if (strlen(if_index_name) != 0) { 1764 snprintf(addr_buf, sizeof(addr_buf), 1765 "%s%%%s%s", a6, if_index_name, atsign); 1766 } else { 1767 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1768 a6, atsign); 1769 } 1770 } 1771 #endif 1772 else { 1773 continue; 1774 } 1775 verbose(4, "interface %s has address %s", search_ifa, addr_buf); 1776 1777 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1778 if(!tmpbuf) { 1779 log_err("realloc failed: out of memory"); 1780 return 0; 1781 } else { 1782 *ip_addresses = tmpbuf; 1783 } 1784 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf); 1785 if(!(*ip_addresses)[*ip_addresses_size]) { 1786 log_err("strdup failed: out of memory"); 1787 return 0; 1788 } 1789 (*ip_addresses_size)++; 1790 } 1791 1792 if (*ip_addresses_size == last_ip_addresses_size) { 1793 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1794 if(!tmpbuf) { 1795 log_err("realloc failed: out of memory"); 1796 return 0; 1797 } else { 1798 *ip_addresses = tmpbuf; 1799 } 1800 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa); 1801 if(!(*ip_addresses)[*ip_addresses_size]) { 1802 log_err("strdup failed: out of memory"); 1803 return 0; 1804 } 1805 (*ip_addresses_size)++; 1806 } 1807 return 1; 1808 } 1809 #endif /* HAVE_GETIFADDRS */ 1810 1811 int resolve_interface_names(char** ifs, int num_ifs, 1812 struct config_strlist* list, char*** resif, int* num_resif) 1813 { 1814 #ifdef HAVE_GETIFADDRS 1815 struct ifaddrs *addrs = NULL; 1816 if(num_ifs == 0 && list == NULL) { 1817 *resif = NULL; 1818 *num_resif = 0; 1819 return 1; 1820 } 1821 if(getifaddrs(&addrs) == -1) { 1822 log_err("failed to list interfaces: getifaddrs: %s", 1823 strerror(errno)); 1824 freeifaddrs(addrs); 1825 return 0; 1826 } 1827 if(ifs) { 1828 int i; 1829 for(i=0; i<num_ifs; i++) { 1830 if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) { 1831 freeifaddrs(addrs); 1832 config_del_strarray(*resif, *num_resif); 1833 *resif = NULL; 1834 *num_resif = 0; 1835 return 0; 1836 } 1837 } 1838 } 1839 if(list) { 1840 struct config_strlist* p; 1841 for(p = list; p; p = p->next) { 1842 if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) { 1843 freeifaddrs(addrs); 1844 config_del_strarray(*resif, *num_resif); 1845 *resif = NULL; 1846 *num_resif = 0; 1847 return 0; 1848 } 1849 } 1850 } 1851 freeifaddrs(addrs); 1852 return 1; 1853 #else 1854 struct config_strlist* p; 1855 if(num_ifs == 0 && list == NULL) { 1856 *resif = NULL; 1857 *num_resif = 0; 1858 return 1; 1859 } 1860 *num_resif = num_ifs; 1861 for(p = list; p; p = p->next) { 1862 (*num_resif)++; 1863 } 1864 *resif = calloc(*num_resif, sizeof(**resif)); 1865 if(!*resif) { 1866 log_err("out of memory"); 1867 return 0; 1868 } 1869 if(ifs) { 1870 int i; 1871 for(i=0; i<num_ifs; i++) { 1872 (*resif)[i] = strdup(ifs[i]); 1873 if(!((*resif)[i])) { 1874 log_err("out of memory"); 1875 config_del_strarray(*resif, *num_resif); 1876 *resif = NULL; 1877 *num_resif = 0; 1878 return 0; 1879 } 1880 } 1881 } 1882 if(list) { 1883 int idx = num_ifs; 1884 for(p = list; p; p = p->next) { 1885 (*resif)[idx] = strdup(p->str); 1886 if(!((*resif)[idx])) { 1887 log_err("out of memory"); 1888 config_del_strarray(*resif, *num_resif); 1889 *resif = NULL; 1890 *num_resif = 0; 1891 return 0; 1892 } 1893 idx++; 1894 } 1895 } 1896 return 1; 1897 #endif /* HAVE_GETIFADDRS */ 1898 } 1899 1900 struct listen_port* 1901 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, 1902 int* reuseport) 1903 { 1904 struct listen_port* list = NULL; 1905 struct addrinfo hints; 1906 int i, do_ip4, do_ip6; 1907 int do_tcp, do_auto; 1908 do_ip4 = cfg->do_ip4; 1909 do_ip6 = cfg->do_ip6; 1910 do_tcp = cfg->do_tcp; 1911 do_auto = cfg->if_automatic && cfg->do_udp; 1912 if(cfg->incoming_num_tcp == 0) 1913 do_tcp = 0; 1914 1915 /* getaddrinfo */ 1916 memset(&hints, 0, sizeof(hints)); 1917 hints.ai_flags = AI_PASSIVE; 1918 /* no name lookups on our listening ports */ 1919 if(num_ifs > 0) 1920 hints.ai_flags |= AI_NUMERICHOST; 1921 hints.ai_family = AF_UNSPEC; 1922 #ifndef INET6 1923 do_ip6 = 0; 1924 #endif 1925 if(!do_ip4 && !do_ip6) { 1926 return NULL; 1927 } 1928 /* create ip4 and ip6 ports so that return addresses are nice. */ 1929 if(do_auto || num_ifs == 0) { 1930 if(do_auto && cfg->if_automatic_ports && 1931 cfg->if_automatic_ports[0]!=0) { 1932 char* now = cfg->if_automatic_ports; 1933 while(now && *now) { 1934 char* after; 1935 int extraport; 1936 while(isspace((unsigned char)*now)) 1937 now++; 1938 if(!*now) 1939 break; 1940 after = now; 1941 extraport = (int)strtol(now, &after, 10); 1942 if(extraport < 0 || extraport > 65535) { 1943 log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1944 listening_ports_free(list); 1945 return NULL; 1946 } 1947 if(extraport == 0 && now == after) { 1948 log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1949 listening_ports_free(list); 1950 return NULL; 1951 } 1952 now = after; 1953 if(do_ip6) { 1954 hints.ai_family = AF_INET6; 1955 if(!ports_create_if("::0", 1956 do_auto, cfg->do_udp, do_tcp, 1957 &hints, extraport, &list, 1958 cfg->so_rcvbuf, cfg->so_sndbuf, 1959 cfg->ssl_port, cfg->tls_additional_port, 1960 cfg->https_port, 1961 cfg->proxy_protocol_port, 1962 reuseport, cfg->ip_transparent, 1963 cfg->tcp_mss, cfg->ip_freebind, 1964 cfg->http_nodelay, cfg->use_systemd, 1965 cfg->dnscrypt_port, cfg->ip_dscp, 1966 cfg->quic_port, cfg->http_notls_downstream, 1967 cfg->sock_queue_timeout)) { 1968 listening_ports_free(list); 1969 return NULL; 1970 } 1971 } 1972 if(do_ip4) { 1973 hints.ai_family = AF_INET; 1974 if(!ports_create_if("0.0.0.0", 1975 do_auto, cfg->do_udp, do_tcp, 1976 &hints, extraport, &list, 1977 cfg->so_rcvbuf, cfg->so_sndbuf, 1978 cfg->ssl_port, cfg->tls_additional_port, 1979 cfg->https_port, 1980 cfg->proxy_protocol_port, 1981 reuseport, cfg->ip_transparent, 1982 cfg->tcp_mss, cfg->ip_freebind, 1983 cfg->http_nodelay, cfg->use_systemd, 1984 cfg->dnscrypt_port, cfg->ip_dscp, 1985 cfg->quic_port, cfg->http_notls_downstream, 1986 cfg->sock_queue_timeout)) { 1987 listening_ports_free(list); 1988 return NULL; 1989 } 1990 } 1991 } 1992 return list; 1993 } 1994 if(do_ip6) { 1995 hints.ai_family = AF_INET6; 1996 if(!ports_create_if(do_auto?"::0":"::1", 1997 do_auto, cfg->do_udp, do_tcp, 1998 &hints, cfg->port, &list, 1999 cfg->so_rcvbuf, cfg->so_sndbuf, 2000 cfg->ssl_port, cfg->tls_additional_port, 2001 cfg->https_port, cfg->proxy_protocol_port, 2002 reuseport, cfg->ip_transparent, 2003 cfg->tcp_mss, cfg->ip_freebind, 2004 cfg->http_nodelay, cfg->use_systemd, 2005 cfg->dnscrypt_port, cfg->ip_dscp, 2006 cfg->quic_port, cfg->http_notls_downstream, 2007 cfg->sock_queue_timeout)) { 2008 listening_ports_free(list); 2009 return NULL; 2010 } 2011 } 2012 if(do_ip4) { 2013 hints.ai_family = AF_INET; 2014 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", 2015 do_auto, cfg->do_udp, do_tcp, 2016 &hints, cfg->port, &list, 2017 cfg->so_rcvbuf, cfg->so_sndbuf, 2018 cfg->ssl_port, cfg->tls_additional_port, 2019 cfg->https_port, cfg->proxy_protocol_port, 2020 reuseport, cfg->ip_transparent, 2021 cfg->tcp_mss, cfg->ip_freebind, 2022 cfg->http_nodelay, cfg->use_systemd, 2023 cfg->dnscrypt_port, cfg->ip_dscp, 2024 cfg->quic_port, cfg->http_notls_downstream, 2025 cfg->sock_queue_timeout)) { 2026 listening_ports_free(list); 2027 return NULL; 2028 } 2029 } 2030 } else for(i = 0; i<num_ifs; i++) { 2031 if(str_is_ip6(ifs[i])) { 2032 if(!do_ip6) 2033 continue; 2034 hints.ai_family = AF_INET6; 2035 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 2036 do_tcp, &hints, cfg->port, &list, 2037 cfg->so_rcvbuf, cfg->so_sndbuf, 2038 cfg->ssl_port, cfg->tls_additional_port, 2039 cfg->https_port, cfg->proxy_protocol_port, 2040 reuseport, cfg->ip_transparent, 2041 cfg->tcp_mss, cfg->ip_freebind, 2042 cfg->http_nodelay, cfg->use_systemd, 2043 cfg->dnscrypt_port, cfg->ip_dscp, 2044 cfg->quic_port, cfg->http_notls_downstream, 2045 cfg->sock_queue_timeout)) { 2046 listening_ports_free(list); 2047 return NULL; 2048 } 2049 } else { 2050 if(!do_ip4) 2051 continue; 2052 hints.ai_family = AF_INET; 2053 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 2054 do_tcp, &hints, cfg->port, &list, 2055 cfg->so_rcvbuf, cfg->so_sndbuf, 2056 cfg->ssl_port, cfg->tls_additional_port, 2057 cfg->https_port, cfg->proxy_protocol_port, 2058 reuseport, cfg->ip_transparent, 2059 cfg->tcp_mss, cfg->ip_freebind, 2060 cfg->http_nodelay, cfg->use_systemd, 2061 cfg->dnscrypt_port, cfg->ip_dscp, 2062 cfg->quic_port, cfg->http_notls_downstream, 2063 cfg->sock_queue_timeout)) { 2064 listening_ports_free(list); 2065 return NULL; 2066 } 2067 } 2068 } 2069 2070 return list; 2071 } 2072 2073 void listening_ports_free(struct listen_port* list) 2074 { 2075 struct listen_port* nx; 2076 while(list) { 2077 nx = list->next; 2078 if(list->fd != -1) { 2079 sock_close(list->fd); 2080 } 2081 /* rc_ports don't have ub_socket */ 2082 if(list->socket) { 2083 free(list->socket->addr); 2084 free(list->socket); 2085 } 2086 free(list); 2087 list = nx; 2088 } 2089 } 2090 2091 size_t listen_get_mem(struct listen_dnsport* listen) 2092 { 2093 struct listen_list* p; 2094 size_t s = sizeof(*listen) + sizeof(*listen->base) + 2095 sizeof(*listen->udp_buff) + 2096 sldns_buffer_capacity(listen->udp_buff); 2097 #ifdef USE_DNSCRYPT 2098 s += sizeof(*listen->dnscrypt_udp_buff); 2099 if(listen->udp_buff != listen->dnscrypt_udp_buff){ 2100 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff); 2101 } 2102 #endif 2103 for(p = listen->cps; p; p = p->next) { 2104 s += sizeof(*p); 2105 s += comm_point_get_mem(p->com); 2106 } 2107 return s; 2108 } 2109 2110 void listen_stop_accept(struct listen_dnsport* listen) 2111 { 2112 /* do not stop the ones that have no tcp_free list 2113 * (they have already stopped listening) */ 2114 struct listen_list* p; 2115 for(p=listen->cps; p; p=p->next) { 2116 if(p->com->type == comm_tcp_accept && 2117 p->com->tcp_free != NULL) { 2118 comm_point_stop_listening(p->com); 2119 } 2120 } 2121 } 2122 2123 void listen_start_accept(struct listen_dnsport* listen) 2124 { 2125 /* do not start the ones that have no tcp_free list, it is no 2126 * use to listen to them because they have no free tcp handlers */ 2127 struct listen_list* p; 2128 for(p=listen->cps; p; p=p->next) { 2129 if(p->com->type == comm_tcp_accept && 2130 p->com->tcp_free != NULL) { 2131 comm_point_start_listening(p->com, -1, -1); 2132 } 2133 } 2134 } 2135 2136 struct tcp_req_info* 2137 tcp_req_info_create(struct sldns_buffer* spoolbuf) 2138 { 2139 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req)); 2140 if(!req) { 2141 log_err("malloc failure for new stream outoforder processing structure"); 2142 return NULL; 2143 } 2144 memset(req, 0, sizeof(*req)); 2145 req->spool_buffer = spoolbuf; 2146 return req; 2147 } 2148 2149 void 2150 tcp_req_info_delete(struct tcp_req_info* req) 2151 { 2152 if(!req) return; 2153 tcp_req_info_clear(req); 2154 /* cp is pointer back to commpoint that owns this struct and 2155 * called delete on us */ 2156 /* spool_buffer is shared udp buffer, not deleted here */ 2157 free(req); 2158 } 2159 2160 void tcp_req_info_clear(struct tcp_req_info* req) 2161 { 2162 struct tcp_req_open_item* open, *nopen; 2163 struct tcp_req_done_item* item, *nitem; 2164 if(!req) return; 2165 2166 /* free outstanding request mesh reply entries */ 2167 open = req->open_req_list; 2168 while(open) { 2169 nopen = open->next; 2170 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp); 2171 free(open); 2172 open = nopen; 2173 } 2174 req->open_req_list = NULL; 2175 req->num_open_req = 0; 2176 2177 /* free pending writable result packets */ 2178 item = req->done_req_list; 2179 while(item) { 2180 nitem = item->next; 2181 lock_basic_lock(&stream_wait_count_lock); 2182 stream_wait_count -= (sizeof(struct tcp_req_done_item) 2183 +item->len); 2184 lock_basic_unlock(&stream_wait_count_lock); 2185 free(item->buf); 2186 free(item); 2187 item = nitem; 2188 } 2189 req->done_req_list = NULL; 2190 req->num_done_req = 0; 2191 req->read_is_closed = 0; 2192 } 2193 2194 void 2195 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m) 2196 { 2197 struct tcp_req_open_item* open, *prev = NULL; 2198 if(!req || !m) return; 2199 open = req->open_req_list; 2200 while(open) { 2201 if(open->mesh_state == m) { 2202 struct tcp_req_open_item* next; 2203 if(prev) prev->next = open->next; 2204 else req->open_req_list = open->next; 2205 /* caller has to manage the mesh state reply entry */ 2206 next = open->next; 2207 free(open); 2208 req->num_open_req --; 2209 2210 /* prev = prev; */ 2211 open = next; 2212 continue; 2213 } 2214 prev = open; 2215 open = open->next; 2216 } 2217 } 2218 2219 /** setup listening for read or write */ 2220 static void 2221 tcp_req_info_setup_listen(struct tcp_req_info* req) 2222 { 2223 int wr = 0; 2224 int rd = 0; 2225 2226 if(req->cp->tcp_byte_count != 0) { 2227 /* cannot change, halfway through */ 2228 return; 2229 } 2230 2231 if(!req->cp->tcp_is_reading) 2232 wr = 1; 2233 if(!req->read_is_closed) 2234 rd = 1; 2235 2236 if(wr) { 2237 req->cp->tcp_is_reading = 0; 2238 comm_point_stop_listening(req->cp); 2239 comm_point_start_listening(req->cp, -1, 2240 adjusted_tcp_timeout(req->cp)); 2241 } else if(rd) { 2242 req->cp->tcp_is_reading = 1; 2243 comm_point_stop_listening(req->cp); 2244 comm_point_start_listening(req->cp, -1, 2245 adjusted_tcp_timeout(req->cp)); 2246 /* and also read it (from SSL stack buffers), so 2247 * no event read event is expected since the remainder of 2248 * the TLS frame is sitting in the buffers. */ 2249 req->read_again = 1; 2250 } else { 2251 comm_point_stop_listening(req->cp); 2252 comm_point_start_listening(req->cp, -1, 2253 adjusted_tcp_timeout(req->cp)); 2254 comm_point_listen_for_rw(req->cp, 0, 0); 2255 } 2256 } 2257 2258 /** remove first item from list of pending results */ 2259 static struct tcp_req_done_item* 2260 tcp_req_info_pop_done(struct tcp_req_info* req) 2261 { 2262 struct tcp_req_done_item* item; 2263 log_assert(req->num_done_req > 0 && req->done_req_list); 2264 item = req->done_req_list; 2265 lock_basic_lock(&stream_wait_count_lock); 2266 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len); 2267 lock_basic_unlock(&stream_wait_count_lock); 2268 req->done_req_list = req->done_req_list->next; 2269 req->num_done_req --; 2270 return item; 2271 } 2272 2273 /** Send given buffer and setup to write */ 2274 static void 2275 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf, 2276 size_t len) 2277 { 2278 sldns_buffer_clear(req->cp->buffer); 2279 sldns_buffer_write(req->cp->buffer, buf, len); 2280 sldns_buffer_flip(req->cp->buffer); 2281 2282 req->cp->tcp_is_reading = 0; /* we are now writing */ 2283 } 2284 2285 /** pick up the next result and start writing it to the channel */ 2286 static void 2287 tcp_req_pickup_next_result(struct tcp_req_info* req) 2288 { 2289 if(req->num_done_req > 0) { 2290 /* unlist the done item from the list of pending results */ 2291 struct tcp_req_done_item* item = tcp_req_info_pop_done(req); 2292 tcp_req_info_start_write_buf(req, item->buf, item->len); 2293 free(item->buf); 2294 free(item); 2295 } 2296 } 2297 2298 /** the read channel has closed */ 2299 int 2300 tcp_req_info_handle_read_close(struct tcp_req_info* req) 2301 { 2302 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd); 2303 /* RFC 7766 6.2.4 says to drop pending replies when client closes. */ 2304 return 0; /* drop connection */ 2305 } 2306 2307 void 2308 tcp_req_info_handle_writedone(struct tcp_req_info* req) 2309 { 2310 /* back to reading state, we finished this write event */ 2311 sldns_buffer_clear(req->cp->buffer); 2312 if(req->num_done_req == 0 && req->read_is_closed) { 2313 /* no more to write and nothing to read, close it */ 2314 comm_point_drop_reply(&req->cp->repinfo); 2315 return; 2316 } 2317 req->cp->tcp_is_reading = 1; 2318 /* see if another result needs writing */ 2319 tcp_req_pickup_next_result(req); 2320 2321 /* see if there is more to write, if not stop_listening for writing */ 2322 /* see if new requests are allowed, if so, start_listening 2323 * for reading */ 2324 tcp_req_info_setup_listen(req); 2325 } 2326 2327 void 2328 tcp_req_info_handle_readdone(struct tcp_req_info* req) 2329 { 2330 struct comm_point* c = req->cp; 2331 2332 /* we want to read up several requests, unless there are 2333 * pending answers */ 2334 2335 req->is_drop = 0; 2336 req->is_reply = 0; 2337 req->in_worker_handle = 1; 2338 sldns_buffer_set_limit(req->spool_buffer, 0); 2339 /* handle the current request */ 2340 /* this calls the worker handle request routine that could give 2341 * a cache response, or localdata response, or drop the reply, 2342 * or schedule a mesh entry for later */ 2343 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2344 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 2345 req->in_worker_handle = 0; 2346 /* there is an answer, put it up. It is already in the 2347 * c->buffer, just send it. */ 2348 /* since we were just reading a query, the channel is 2349 * clear to write to */ 2350 send_it: 2351 c->tcp_is_reading = 0; 2352 comm_point_stop_listening(c); 2353 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 2354 return; 2355 } 2356 req->in_worker_handle = 0; 2357 /* it should be waiting in the mesh for recursion. 2358 * If mesh failed to add a new entry and called commpoint_drop_reply. 2359 * Then the mesh state has been cleared. */ 2360 if(req->is_drop) { 2361 /* the reply has been dropped, stream has been closed. */ 2362 return; 2363 } 2364 /* If mesh failed(mallocfail) and called commpoint_send_reply with 2365 * something like servfail then we pick up that reply below. */ 2366 if(req->is_reply) { 2367 goto send_it; 2368 } 2369 2370 sldns_buffer_clear(c->buffer); 2371 /* if pending answers, pick up an answer and start sending it */ 2372 tcp_req_pickup_next_result(req); 2373 2374 /* if answers pending, start sending answers */ 2375 /* read more requests if we can have more requests */ 2376 tcp_req_info_setup_listen(req); 2377 } 2378 2379 int 2380 tcp_req_info_add_meshstate(struct tcp_req_info* req, 2381 struct mesh_area* mesh, struct mesh_state* m) 2382 { 2383 struct tcp_req_open_item* item; 2384 log_assert(req && mesh && m); 2385 item = (struct tcp_req_open_item*)malloc(sizeof(*item)); 2386 if(!item) return 0; 2387 item->next = req->open_req_list; 2388 item->mesh = mesh; 2389 item->mesh_state = m; 2390 req->open_req_list = item; 2391 req->num_open_req++; 2392 return 1; 2393 } 2394 2395 /** Add a result to the result list. At the end. */ 2396 static int 2397 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len) 2398 { 2399 struct tcp_req_done_item* last = NULL; 2400 struct tcp_req_done_item* item; 2401 size_t space; 2402 2403 /* see if we have space */ 2404 space = sizeof(struct tcp_req_done_item) + len; 2405 lock_basic_lock(&stream_wait_count_lock); 2406 if(stream_wait_count + space > stream_wait_max) { 2407 lock_basic_unlock(&stream_wait_count_lock); 2408 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size"); 2409 return 0; 2410 } 2411 stream_wait_count += space; 2412 lock_basic_unlock(&stream_wait_count_lock); 2413 2414 /* find last element */ 2415 last = req->done_req_list; 2416 while(last && last->next) 2417 last = last->next; 2418 2419 /* create new element */ 2420 item = (struct tcp_req_done_item*)malloc(sizeof(*item)); 2421 if(!item) { 2422 log_err("malloc failure, for stream result list"); 2423 return 0; 2424 } 2425 item->next = NULL; 2426 item->len = len; 2427 item->buf = memdup(buf, len); 2428 if(!item->buf) { 2429 free(item); 2430 log_err("malloc failure, adding reply to stream result list"); 2431 return 0; 2432 } 2433 2434 /* link in */ 2435 if(last) last->next = item; 2436 else req->done_req_list = item; 2437 req->num_done_req++; 2438 return 1; 2439 } 2440 2441 void 2442 tcp_req_info_send_reply(struct tcp_req_info* req) 2443 { 2444 if(req->in_worker_handle) { 2445 /* reply from mesh is in the spool_buffer */ 2446 /* copy now, so that the spool buffer is free for other tasks 2447 * before the callback is done */ 2448 sldns_buffer_clear(req->cp->buffer); 2449 sldns_buffer_write(req->cp->buffer, 2450 sldns_buffer_begin(req->spool_buffer), 2451 sldns_buffer_limit(req->spool_buffer)); 2452 sldns_buffer_flip(req->cp->buffer); 2453 req->is_reply = 1; 2454 return; 2455 } 2456 /* now that the query has been handled, that mesh_reply entry 2457 * should be removed, from the tcp_req_info list, 2458 * the mesh state cleanup removes then with region_cleanup and 2459 * replies_sent true. */ 2460 /* see if we can send it straight away (we are not doing 2461 * anything else). If so, copy to buffer and start */ 2462 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) { 2463 /* buffer is free, and was ready to read new query into, 2464 * but we are now going to use it to send this answer */ 2465 tcp_req_info_start_write_buf(req, 2466 sldns_buffer_begin(req->spool_buffer), 2467 sldns_buffer_limit(req->spool_buffer)); 2468 /* switch to listen to write events */ 2469 comm_point_stop_listening(req->cp); 2470 comm_point_start_listening(req->cp, -1, 2471 adjusted_tcp_timeout(req->cp)); 2472 return; 2473 } 2474 /* queue up the answer behind the others already pending */ 2475 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer), 2476 sldns_buffer_limit(req->spool_buffer))) { 2477 /* drop the connection, we are out of resources */ 2478 comm_point_drop_reply(&req->cp->repinfo); 2479 } 2480 } 2481 2482 size_t tcp_req_info_get_stream_buffer_size(void) 2483 { 2484 size_t s; 2485 if(!stream_wait_lock_inited) 2486 return stream_wait_count; 2487 lock_basic_lock(&stream_wait_count_lock); 2488 s = stream_wait_count; 2489 lock_basic_unlock(&stream_wait_count_lock); 2490 return s; 2491 } 2492 2493 size_t http2_get_query_buffer_size(void) 2494 { 2495 size_t s; 2496 if(!http2_query_buffer_lock_inited) 2497 return http2_query_buffer_count; 2498 lock_basic_lock(&http2_query_buffer_count_lock); 2499 s = http2_query_buffer_count; 2500 lock_basic_unlock(&http2_query_buffer_count_lock); 2501 return s; 2502 } 2503 2504 size_t http2_get_response_buffer_size(void) 2505 { 2506 size_t s; 2507 if(!http2_response_buffer_lock_inited) 2508 return http2_response_buffer_count; 2509 lock_basic_lock(&http2_response_buffer_count_lock); 2510 s = http2_response_buffer_count; 2511 lock_basic_unlock(&http2_response_buffer_count_lock); 2512 return s; 2513 } 2514 2515 #ifdef HAVE_NGHTTP2 2516 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */ 2517 static ssize_t http2_submit_response_read_callback( 2518 nghttp2_session* ATTR_UNUSED(session), 2519 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2520 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2521 { 2522 struct http2_stream* h2_stream; 2523 struct http2_session* h2_session = source->ptr; 2524 size_t copylen = length; 2525 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2526 h2_session->session, stream_id))) { 2527 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2528 "stream"); 2529 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2530 } 2531 if(!h2_stream->rbuffer || 2532 sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2533 verbose(VERB_QUERY, "http2: cannot submit buffer. No data " 2534 "available in rbuffer"); 2535 /* rbuffer will be free'd in frame close cb */ 2536 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2537 } 2538 2539 if(copylen > sldns_buffer_remaining(h2_stream->rbuffer)) 2540 copylen = sldns_buffer_remaining(h2_stream->rbuffer); 2541 if(copylen > SSIZE_MAX) 2542 copylen = SSIZE_MAX; /* will probably never happen */ 2543 2544 memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen); 2545 sldns_buffer_skip(h2_stream->rbuffer, copylen); 2546 2547 if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2548 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2549 lock_basic_lock(&http2_response_buffer_count_lock); 2550 http2_response_buffer_count -= 2551 sldns_buffer_capacity(h2_stream->rbuffer); 2552 lock_basic_unlock(&http2_response_buffer_count_lock); 2553 sldns_buffer_free(h2_stream->rbuffer); 2554 h2_stream->rbuffer = NULL; 2555 } 2556 2557 return copylen; 2558 } 2559 2560 /** 2561 * Send RST_STREAM frame for stream. 2562 * @param h2_session: http2 session to submit frame to 2563 * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM 2564 * @return 0 on error, 1 otherwise 2565 */ 2566 static int http2_submit_rst_stream(struct http2_session* h2_session, 2567 struct http2_stream* h2_stream) 2568 { 2569 int ret = nghttp2_submit_rst_stream(h2_session->session, 2570 NGHTTP2_FLAG_NONE, h2_stream->stream_id, 2571 NGHTTP2_INTERNAL_ERROR); 2572 if(ret) { 2573 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, " 2574 "error: %s", nghttp2_strerror(ret)); 2575 return 0; 2576 } 2577 return 1; 2578 } 2579 2580 /** 2581 * DNS response ready to be submitted to nghttp2, to be prepared for sending 2582 * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer 2583 * might be used before this will be sent out. 2584 * @param h2_session: http2 session, containing c->buffer which contains answer 2585 * @return 0 on error, 1 otherwise 2586 */ 2587 int http2_submit_dns_response(struct http2_session* h2_session) 2588 { 2589 int ret; 2590 nghttp2_data_provider data_prd; 2591 char status[4]; 2592 nghttp2_nv headers[3]; 2593 struct http2_stream* h2_stream = h2_session->c->h2_stream; 2594 size_t rlen; 2595 char rlen_str[32]; 2596 2597 if(h2_stream->rbuffer) { 2598 log_err("http2 submit response error: rbuffer already " 2599 "exists"); 2600 return 0; 2601 } 2602 if(sldns_buffer_remaining(h2_session->c->buffer) == 0) { 2603 log_err("http2 submit response error: c->buffer not complete"); 2604 return 0; 2605 } 2606 2607 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2608 verbose(VERB_QUERY, "http2: submit response error: " 2609 "invalid status"); 2610 return 0; 2611 } 2612 2613 rlen = sldns_buffer_remaining(h2_session->c->buffer); 2614 snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen); 2615 2616 lock_basic_lock(&http2_response_buffer_count_lock); 2617 if(http2_response_buffer_count + rlen > http2_response_buffer_max) { 2618 lock_basic_unlock(&http2_response_buffer_count_lock); 2619 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2620 "in https-response-buffer-size"); 2621 return http2_submit_rst_stream(h2_session, h2_stream); 2622 } 2623 http2_response_buffer_count += rlen; 2624 lock_basic_unlock(&http2_response_buffer_count_lock); 2625 2626 if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) { 2627 lock_basic_lock(&http2_response_buffer_count_lock); 2628 http2_response_buffer_count -= rlen; 2629 lock_basic_unlock(&http2_response_buffer_count_lock); 2630 log_err("http2 submit response error: malloc failure"); 2631 return 0; 2632 } 2633 2634 headers[0].name = (uint8_t*)":status"; 2635 headers[0].namelen = 7; 2636 headers[0].value = (uint8_t*)status; 2637 headers[0].valuelen = 3; 2638 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2639 2640 headers[1].name = (uint8_t*)"content-type"; 2641 headers[1].namelen = 12; 2642 headers[1].value = (uint8_t*)"application/dns-message"; 2643 headers[1].valuelen = 23; 2644 headers[1].flags = NGHTTP2_NV_FLAG_NONE; 2645 2646 headers[2].name = (uint8_t*)"content-length"; 2647 headers[2].namelen = 14; 2648 headers[2].value = (uint8_t*)rlen_str; 2649 headers[2].valuelen = strlen(rlen_str); 2650 headers[2].flags = NGHTTP2_NV_FLAG_NONE; 2651 2652 sldns_buffer_write(h2_stream->rbuffer, 2653 sldns_buffer_current(h2_session->c->buffer), 2654 sldns_buffer_remaining(h2_session->c->buffer)); 2655 sldns_buffer_flip(h2_stream->rbuffer); 2656 2657 data_prd.source.ptr = h2_session; 2658 data_prd.read_callback = http2_submit_response_read_callback; 2659 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2660 headers, 3, &data_prd); 2661 if(ret) { 2662 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2663 "error: %s", nghttp2_strerror(ret)); 2664 return 0; 2665 } 2666 return 1; 2667 } 2668 #else 2669 int http2_submit_dns_response(void* ATTR_UNUSED(v)) 2670 { 2671 return 0; 2672 } 2673 #endif 2674 2675 #ifdef HAVE_NGHTTP2 2676 /** HTTP status to descriptive string */ 2677 static char* http_status_to_str(enum http_status s) 2678 { 2679 switch(s) { 2680 case HTTP_STATUS_OK: 2681 return "OK"; 2682 case HTTP_STATUS_BAD_REQUEST: 2683 return "Bad Request"; 2684 case HTTP_STATUS_NOT_FOUND: 2685 return "Not Found"; 2686 case HTTP_STATUS_PAYLOAD_TOO_LARGE: 2687 return "Payload Too Large"; 2688 case HTTP_STATUS_URI_TOO_LONG: 2689 return "URI Too Long"; 2690 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE: 2691 return "Unsupported Media Type"; 2692 case HTTP_STATUS_NOT_IMPLEMENTED: 2693 return "Not Implemented"; 2694 } 2695 return "Status Unknown"; 2696 } 2697 2698 /** nghttp2 callback. Used to copy error message to nghttp2 session */ 2699 static ssize_t http2_submit_error_read_callback( 2700 nghttp2_session* ATTR_UNUSED(session), 2701 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2702 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2703 { 2704 struct http2_stream* h2_stream; 2705 struct http2_session* h2_session = source->ptr; 2706 char* msg; 2707 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2708 h2_session->session, stream_id))) { 2709 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2710 "stream"); 2711 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2712 } 2713 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2714 msg = http_status_to_str(h2_stream->status); 2715 if(length < strlen(msg)) 2716 return 0; /* not worth trying over multiple frames */ 2717 memcpy(buf, msg, strlen(msg)); 2718 return strlen(msg); 2719 2720 } 2721 2722 /** 2723 * HTTP error response ready to be submitted to nghttp2, to be prepared for 2724 * sending out. Message body will contain descriptive string for HTTP status. 2725 * @param h2_session: http2 session to submit to 2726 * @param h2_stream: http2 stream containing HTTP status to use for error 2727 * @return 0 on error, 1 otherwise 2728 */ 2729 static int http2_submit_error(struct http2_session* h2_session, 2730 struct http2_stream* h2_stream) 2731 { 2732 int ret; 2733 char status[4]; 2734 nghttp2_data_provider data_prd; 2735 nghttp2_nv headers[1]; /* will be copied by nghttp */ 2736 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2737 verbose(VERB_QUERY, "http2: submit error failed, " 2738 "invalid status"); 2739 return 0; 2740 } 2741 headers[0].name = (uint8_t*)":status"; 2742 headers[0].namelen = 7; 2743 headers[0].value = (uint8_t*)status; 2744 headers[0].valuelen = 3; 2745 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2746 2747 data_prd.source.ptr = h2_session; 2748 data_prd.read_callback = http2_submit_error_read_callback; 2749 2750 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2751 headers, 1, &data_prd); 2752 if(ret) { 2753 verbose(VERB_QUERY, "http2: submit error failed, " 2754 "error: %s", nghttp2_strerror(ret)); 2755 return 0; 2756 } 2757 return 1; 2758 } 2759 2760 /** 2761 * Start query handling. Query is stored in the stream, and will be free'd here. 2762 * @param h2_session: http2 session, containing comm point 2763 * @param h2_stream: stream containing buffered query 2764 * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no 2765 * reply available (yet). 2766 */ 2767 static int http2_query_read_done(struct http2_session* h2_session, 2768 struct http2_stream* h2_stream) 2769 { 2770 log_assert(h2_stream->qbuffer); 2771 2772 if(h2_session->c->h2_stream) { 2773 verbose(VERB_ALGO, "http2_query_read_done failure: shared " 2774 "buffer already assigned to stream"); 2775 return -1; 2776 } 2777 2778 /* the c->buffer might be used by mesh_send_reply and no be cleard 2779 * need to be cleared before use */ 2780 sldns_buffer_clear(h2_session->c->buffer); 2781 if(sldns_buffer_remaining(h2_session->c->buffer) < 2782 sldns_buffer_remaining(h2_stream->qbuffer)) { 2783 /* qbuffer will be free'd in frame close cb */ 2784 sldns_buffer_clear(h2_session->c->buffer); 2785 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit " 2786 "qbuffer in c->buffer"); 2787 return -1; 2788 } 2789 2790 sldns_buffer_write(h2_session->c->buffer, 2791 sldns_buffer_current(h2_stream->qbuffer), 2792 sldns_buffer_remaining(h2_stream->qbuffer)); 2793 2794 lock_basic_lock(&http2_query_buffer_count_lock); 2795 http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer); 2796 lock_basic_unlock(&http2_query_buffer_count_lock); 2797 sldns_buffer_free(h2_stream->qbuffer); 2798 h2_stream->qbuffer = NULL; 2799 2800 sldns_buffer_flip(h2_session->c->buffer); 2801 h2_session->c->h2_stream = h2_stream; 2802 fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback)); 2803 if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg, 2804 NETEVENT_NOERROR, &h2_session->c->repinfo)) { 2805 return 1; /* answer in c->buffer */ 2806 } 2807 sldns_buffer_clear(h2_session->c->buffer); 2808 h2_session->c->h2_stream = NULL; 2809 return 0; /* mesh state added, or dropped */ 2810 } 2811 2812 /** nghttp2 callback. Used to check if the received frame indicates the end of a 2813 * stream. Gather collected request data and start query handling. */ 2814 static int http2_req_frame_recv_cb(nghttp2_session* session, 2815 const nghttp2_frame* frame, void* cb_arg) 2816 { 2817 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2818 struct http2_stream* h2_stream; 2819 int query_read_done; 2820 2821 if((frame->hd.type != NGHTTP2_DATA && 2822 frame->hd.type != NGHTTP2_HEADERS) || 2823 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) { 2824 return 0; 2825 } 2826 2827 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2828 session, frame->hd.stream_id))) 2829 return 0; 2830 2831 if(h2_stream->invalid_endpoint) { 2832 h2_stream->status = HTTP_STATUS_NOT_FOUND; 2833 goto submit_http_error; 2834 } 2835 2836 if(h2_stream->invalid_content_type) { 2837 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE; 2838 goto submit_http_error; 2839 } 2840 2841 if(h2_stream->http_method != HTTP_METHOD_GET && 2842 h2_stream->http_method != HTTP_METHOD_POST) { 2843 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED; 2844 goto submit_http_error; 2845 } 2846 2847 if(h2_stream->query_too_large) { 2848 if(h2_stream->http_method == HTTP_METHOD_POST) 2849 h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE; 2850 else 2851 h2_stream->status = HTTP_STATUS_URI_TOO_LONG; 2852 goto submit_http_error; 2853 } 2854 2855 if(!h2_stream->qbuffer) { 2856 h2_stream->status = HTTP_STATUS_BAD_REQUEST; 2857 goto submit_http_error; 2858 } 2859 2860 if(h2_stream->status) { 2861 submit_http_error: 2862 verbose(VERB_QUERY, "http2 request invalid, returning :status=" 2863 "%d", h2_stream->status); 2864 if(!http2_submit_error(h2_session, h2_stream)) { 2865 return NGHTTP2_ERR_CALLBACK_FAILURE; 2866 } 2867 return 0; 2868 } 2869 h2_stream->status = HTTP_STATUS_OK; 2870 2871 sldns_buffer_flip(h2_stream->qbuffer); 2872 h2_session->postpone_drop = 1; 2873 query_read_done = http2_query_read_done(h2_session, h2_stream); 2874 h2_session->postpone_drop = 0; 2875 if(query_read_done < 0) 2876 return NGHTTP2_ERR_CALLBACK_FAILURE; 2877 else if(!query_read_done) { 2878 if(h2_session->is_drop) { 2879 /* connection needs to be closed. Return failure to make 2880 * sure no other action are taken anymore on comm point. 2881 * failure will result in reclaiming (and closing) 2882 * of comm point. */ 2883 verbose(VERB_QUERY, "http2 query dropped in worker cb"); 2884 return NGHTTP2_ERR_CALLBACK_FAILURE; 2885 } 2886 /* nothing to submit right now, query added to mesh. */ 2887 return 0; 2888 } 2889 if(!http2_submit_dns_response(h2_session)) { 2890 sldns_buffer_clear(h2_session->c->buffer); 2891 h2_session->c->h2_stream = NULL; 2892 return NGHTTP2_ERR_CALLBACK_FAILURE; 2893 } 2894 verbose(VERB_QUERY, "http2 query submitted to session"); 2895 sldns_buffer_clear(h2_session->c->buffer); 2896 h2_session->c->h2_stream = NULL; 2897 return 0; 2898 } 2899 2900 /** nghttp2 callback. Used to detect start of new streams. */ 2901 static int http2_req_begin_headers_cb(nghttp2_session* session, 2902 const nghttp2_frame* frame, void* cb_arg) 2903 { 2904 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2905 struct http2_stream* h2_stream; 2906 int ret; 2907 if(frame->hd.type != NGHTTP2_HEADERS || 2908 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 2909 /* only interested in request headers */ 2910 return 0; 2911 } 2912 if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) { 2913 log_err("malloc failure while creating http2 stream"); 2914 return NGHTTP2_ERR_CALLBACK_FAILURE; 2915 } 2916 http2_session_add_stream(h2_session, h2_stream); 2917 ret = nghttp2_session_set_stream_user_data(session, 2918 frame->hd.stream_id, h2_stream); 2919 if(ret) { 2920 /* stream does not exist */ 2921 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2922 "error: %s", nghttp2_strerror(ret)); 2923 return NGHTTP2_ERR_CALLBACK_FAILURE; 2924 } 2925 2926 return 0; 2927 } 2928 2929 /** 2930 * base64url decode, store in qbuffer 2931 * @param h2_session: http2 session 2932 * @param h2_stream: http2 stream 2933 * @param start: start of the base64 string 2934 * @param length: length of the base64 string 2935 * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer, 2936 * buffer will be NULL is unparseble. 2937 */ 2938 static int http2_buffer_uri_query(struct http2_session* h2_session, 2939 struct http2_stream* h2_stream, const uint8_t* start, size_t length) 2940 { 2941 size_t expectb64len; 2942 int b64len; 2943 if(h2_stream->http_method == HTTP_METHOD_POST) 2944 return 1; 2945 if(length == 0) 2946 return 1; 2947 if(h2_stream->qbuffer) { 2948 verbose(VERB_ALGO, "http2_req_header fail, " 2949 "qbuffer already set"); 2950 return 0; 2951 } 2952 2953 /* calculate size, might be a bit bigger than the real 2954 * decoded buffer size */ 2955 expectb64len = sldns_b64_pton_calculate_size(length); 2956 log_assert(expectb64len > 0); 2957 if(expectb64len > 2958 h2_session->c->http2_stream_max_qbuffer_size) { 2959 h2_stream->query_too_large = 1; 2960 return 1; 2961 } 2962 2963 lock_basic_lock(&http2_query_buffer_count_lock); 2964 if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) { 2965 lock_basic_unlock(&http2_query_buffer_count_lock); 2966 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2967 "in http2-query-buffer-size"); 2968 return http2_submit_rst_stream(h2_session, h2_stream); 2969 } 2970 http2_query_buffer_count += expectb64len; 2971 lock_basic_unlock(&http2_query_buffer_count_lock); 2972 if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) { 2973 lock_basic_lock(&http2_query_buffer_count_lock); 2974 http2_query_buffer_count -= expectb64len; 2975 lock_basic_unlock(&http2_query_buffer_count_lock); 2976 log_err("http2_req_header fail, qbuffer " 2977 "malloc failure"); 2978 return 0; 2979 } 2980 2981 if(sldns_b64_contains_nonurl((char const*)start, length)) { 2982 char buf[65536+4]; 2983 verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding"); 2984 /* copy to the scratch buffer temporarily to terminate the 2985 * string with a zero */ 2986 if(length+1 > sizeof(buf)) { 2987 /* too long */ 2988 lock_basic_lock(&http2_query_buffer_count_lock); 2989 http2_query_buffer_count -= expectb64len; 2990 lock_basic_unlock(&http2_query_buffer_count_lock); 2991 sldns_buffer_free(h2_stream->qbuffer); 2992 h2_stream->qbuffer = NULL; 2993 return 1; 2994 } 2995 memmove(buf, start, length); 2996 buf[length] = 0; 2997 if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current( 2998 h2_stream->qbuffer), expectb64len)) || b64len < 0) { 2999 lock_basic_lock(&http2_query_buffer_count_lock); 3000 http2_query_buffer_count -= expectb64len; 3001 lock_basic_unlock(&http2_query_buffer_count_lock); 3002 sldns_buffer_free(h2_stream->qbuffer); 3003 h2_stream->qbuffer = NULL; 3004 return 1; 3005 } 3006 } else { 3007 if(!(b64len = sldns_b64url_pton( 3008 (char const *)start, length, 3009 sldns_buffer_current(h2_stream->qbuffer), 3010 expectb64len)) || b64len < 0) { 3011 lock_basic_lock(&http2_query_buffer_count_lock); 3012 http2_query_buffer_count -= expectb64len; 3013 lock_basic_unlock(&http2_query_buffer_count_lock); 3014 sldns_buffer_free(h2_stream->qbuffer); 3015 h2_stream->qbuffer = NULL; 3016 /* return without error, method can be an 3017 * unknown POST */ 3018 return 1; 3019 } 3020 } 3021 sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len); 3022 return 1; 3023 } 3024 3025 /** nghttp2 callback. Used to parse headers from HEADER frames. */ 3026 static int http2_req_header_cb(nghttp2_session* session, 3027 const nghttp2_frame* frame, const uint8_t* name, size_t namelen, 3028 const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags), 3029 void* cb_arg) 3030 { 3031 struct http2_stream* h2_stream = NULL; 3032 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3033 /* nghttp2 deals with CONTINUATION frames and provides them as part of 3034 * the HEADER */ 3035 if(frame->hd.type != NGHTTP2_HEADERS || 3036 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 3037 /* only interested in request headers */ 3038 return 0; 3039 } 3040 if(!(h2_stream = nghttp2_session_get_stream_user_data(session, 3041 frame->hd.stream_id))) 3042 return 0; 3043 3044 /* earlier checks already indicate we can stop handling this query */ 3045 if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED || 3046 h2_stream->invalid_content_type || 3047 h2_stream->invalid_endpoint) 3048 return 0; 3049 3050 3051 /* nghttp2 performs some sanity checks in the headers, including: 3052 * name and value are guaranteed to be null terminated 3053 * name is guaranteed to be lowercase 3054 * content-length value is guaranteed to contain digits 3055 */ 3056 3057 if(!h2_stream->http_method && namelen == 7 && 3058 memcmp(":method", name, namelen) == 0) { 3059 /* Case insensitive check on :method value to be on the safe 3060 * side. I failed to find text about case sensitivity in specs. 3061 */ 3062 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0) 3063 h2_stream->http_method = HTTP_METHOD_GET; 3064 else if(valuelen == 4 && 3065 strcasecmp("POST", (const char*)value) == 0) { 3066 h2_stream->http_method = HTTP_METHOD_POST; 3067 if(h2_stream->qbuffer) { 3068 /* POST method uses query from DATA frames */ 3069 lock_basic_lock(&http2_query_buffer_count_lock); 3070 http2_query_buffer_count -= 3071 sldns_buffer_capacity(h2_stream->qbuffer); 3072 lock_basic_unlock(&http2_query_buffer_count_lock); 3073 sldns_buffer_free(h2_stream->qbuffer); 3074 h2_stream->qbuffer = NULL; 3075 } 3076 } else 3077 h2_stream->http_method = HTTP_METHOD_UNSUPPORTED; 3078 return 0; 3079 } 3080 if(namelen == 5 && memcmp(":path", name, namelen) == 0) { 3081 /* :path may contain DNS query, depending on method. Method might 3082 * not be known yet here, so check after finishing receiving 3083 * stream. */ 3084 #define HTTP_QUERY_PARAM "?dns=" 3085 size_t el = strlen(h2_session->c->http_endpoint); 3086 size_t qpl = strlen(HTTP_QUERY_PARAM); 3087 3088 if(valuelen < el || memcmp(h2_session->c->http_endpoint, 3089 value, el) != 0) { 3090 h2_stream->invalid_endpoint = 1; 3091 return 0; 3092 } 3093 /* larger than endpoint only allowed if it is for the query 3094 * parameter */ 3095 if(valuelen <= el+qpl || 3096 memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) { 3097 if(valuelen != el) 3098 h2_stream->invalid_endpoint = 1; 3099 return 0; 3100 } 3101 3102 if(!http2_buffer_uri_query(h2_session, h2_stream, 3103 value+(el+qpl), valuelen-(el+qpl))) { 3104 return NGHTTP2_ERR_CALLBACK_FAILURE; 3105 } 3106 return 0; 3107 } 3108 /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST, 3109 * and not needed when using GET. Don't enforce. 3110 * If set only allow lowercase "application/dns-message". 3111 * 3112 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST 3113 * be able to handle "application/dns-message". Since that is the only 3114 * content-type supported we can ignore the accept header. 3115 */ 3116 if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) { 3117 if(valuelen != 23 || memcmp("application/dns-message", value, 3118 valuelen) != 0) { 3119 h2_stream->invalid_content_type = 1; 3120 } 3121 } 3122 3123 /* Only interested in content-lentg for POST (on not yet known) method. 3124 */ 3125 if((!h2_stream->http_method || 3126 h2_stream->http_method == HTTP_METHOD_POST) && 3127 !h2_stream->content_length && namelen == 14 && 3128 memcmp("content-length", name, namelen) == 0) { 3129 if(valuelen > 5) { 3130 h2_stream->query_too_large = 1; 3131 return 0; 3132 } 3133 /* guaranteed to only contain digits and be null terminated */ 3134 h2_stream->content_length = atoi((const char*)value); 3135 if(h2_stream->content_length > 3136 h2_session->c->http2_stream_max_qbuffer_size) { 3137 h2_stream->query_too_large = 1; 3138 return 0; 3139 } 3140 } 3141 return 0; 3142 } 3143 3144 /** nghttp2 callback. Used to get data from DATA frames, which can contain 3145 * queries in POST requests. */ 3146 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session), 3147 uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data, 3148 size_t len, void* cb_arg) 3149 { 3150 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3151 struct http2_stream* h2_stream; 3152 size_t qlen = 0; 3153 3154 if(!(h2_stream = nghttp2_session_get_stream_user_data( 3155 h2_session->session, stream_id))) { 3156 return 0; 3157 } 3158 3159 if(h2_stream->query_too_large) 3160 return 0; 3161 3162 if(!h2_stream->qbuffer) { 3163 if(h2_stream->content_length) { 3164 if(h2_stream->content_length < len) 3165 /* getting more data in DATA frame than 3166 * advertised in content-length header. */ 3167 return NGHTTP2_ERR_CALLBACK_FAILURE; 3168 qlen = h2_stream->content_length; 3169 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) { 3170 /* setting this to msg-buffer-size can result in a lot 3171 * of memory consumption. Most queries should fit in a 3172 * single DATA frame, and most POST queries will 3173 * contain content-length which does not impose this 3174 * limit. */ 3175 qlen = len; 3176 } 3177 } 3178 if(!h2_stream->qbuffer && qlen) { 3179 lock_basic_lock(&http2_query_buffer_count_lock); 3180 if(http2_query_buffer_count + qlen > http2_query_buffer_max) { 3181 lock_basic_unlock(&http2_query_buffer_count_lock); 3182 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 3183 "in http2-query-buffer-size"); 3184 return http2_submit_rst_stream(h2_session, h2_stream); 3185 } 3186 http2_query_buffer_count += qlen; 3187 lock_basic_unlock(&http2_query_buffer_count_lock); 3188 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) { 3189 lock_basic_lock(&http2_query_buffer_count_lock); 3190 http2_query_buffer_count -= qlen; 3191 lock_basic_unlock(&http2_query_buffer_count_lock); 3192 } 3193 } 3194 3195 if(!h2_stream->qbuffer || 3196 sldns_buffer_remaining(h2_stream->qbuffer) < len) { 3197 verbose(VERB_ALGO, "http2 data_chunk_recv failed. Not enough " 3198 "buffer space for POST query. Can happen on multi " 3199 "frame requests without content-length header"); 3200 h2_stream->query_too_large = 1; 3201 return 0; 3202 } 3203 3204 sldns_buffer_write(h2_stream->qbuffer, data, len); 3205 3206 return 0; 3207 } 3208 3209 void http2_req_stream_clear(struct http2_stream* h2_stream) 3210 { 3211 if(h2_stream->qbuffer) { 3212 lock_basic_lock(&http2_query_buffer_count_lock); 3213 http2_query_buffer_count -= 3214 sldns_buffer_capacity(h2_stream->qbuffer); 3215 lock_basic_unlock(&http2_query_buffer_count_lock); 3216 sldns_buffer_free(h2_stream->qbuffer); 3217 h2_stream->qbuffer = NULL; 3218 } 3219 if(h2_stream->rbuffer) { 3220 lock_basic_lock(&http2_response_buffer_count_lock); 3221 http2_response_buffer_count -= 3222 sldns_buffer_capacity(h2_stream->rbuffer); 3223 lock_basic_unlock(&http2_response_buffer_count_lock); 3224 sldns_buffer_free(h2_stream->rbuffer); 3225 h2_stream->rbuffer = NULL; 3226 } 3227 } 3228 3229 nghttp2_session_callbacks* http2_req_callbacks_create(void) 3230 { 3231 nghttp2_session_callbacks *callbacks; 3232 if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) { 3233 log_err("failed to initialize nghttp2 callback"); 3234 return NULL; 3235 } 3236 /* reception of header block started, used to create h2_stream */ 3237 nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks, 3238 http2_req_begin_headers_cb); 3239 /* complete frame received, used to get data from stream if frame 3240 * has end stream flag, and start processing query */ 3241 nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks, 3242 http2_req_frame_recv_cb); 3243 /* get request info from headers */ 3244 nghttp2_session_callbacks_set_on_header_callback(callbacks, 3245 http2_req_header_cb); 3246 /* get data from DATA frames, containing POST query */ 3247 nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks, 3248 http2_req_data_chunk_recv_cb); 3249 3250 /* generic HTTP2 callbacks */ 3251 nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb); 3252 nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb); 3253 nghttp2_session_callbacks_set_on_stream_close_callback(callbacks, 3254 http2_stream_close_cb); 3255 3256 return callbacks; 3257 } 3258 #endif /* HAVE_NGHTTP2 */ 3259 3260 #ifdef HAVE_NGTCP2 3261 struct doq_table* 3262 doq_table_create(struct config_file* cfg, struct ub_randstate* rnd) 3263 { 3264 struct doq_table* table; 3265 3266 if (!cfg->quic_port) 3267 return NULL; 3268 table = calloc(1, sizeof(*table)); 3269 if(!table) 3270 return NULL; 3271 #ifdef USE_NGTCP2_CRYPTO_OSSL 3272 /* Initialize the ossl crypto, it is harmless to call twice, 3273 * and this is before use of doq connections. */ 3274 if(ngtcp2_crypto_ossl_init() != 0) { 3275 log_err("ngtcp2_crypto_ossl_init failed"); 3276 free(table); 3277 return NULL; 3278 } 3279 #elif defined(HAVE_NGTCP2_CRYPTO_QUICTLS_INIT) 3280 if(ngtcp2_crypto_quictls_init() != 0) { 3281 log_err("ngtcp2_crypto_quictls_init failed"); 3282 free(table); 3283 return NULL; 3284 } 3285 #endif 3286 table->idle_timeout = ((uint64_t)cfg->tcp_idle_timeout)* 3287 NGTCP2_MILLISECONDS; 3288 table->sv_scidlen = 16; 3289 table->static_secret_len = 16; 3290 table->static_secret = malloc(table->static_secret_len); 3291 if(!table->static_secret) { 3292 free(table); 3293 return NULL; 3294 } 3295 doq_fill_rand(rnd, table->static_secret, table->static_secret_len); 3296 table->conn_tree = rbtree_create(doq_conn_cmp); 3297 if(!table->conn_tree) { 3298 free(table->static_secret); 3299 free(table); 3300 return NULL; 3301 } 3302 table->conid_tree = rbtree_create(doq_conid_cmp); 3303 if(!table->conid_tree) { 3304 free(table->static_secret); 3305 free(table->conn_tree); 3306 free(table); 3307 return NULL; 3308 } 3309 table->timer_tree = rbtree_create(doq_timer_cmp); 3310 if(!table->timer_tree) { 3311 free(table->static_secret); 3312 free(table->conn_tree); 3313 free(table->conid_tree); 3314 free(table); 3315 return NULL; 3316 } 3317 lock_rw_init(&table->lock); 3318 lock_rw_init(&table->conid_lock); 3319 lock_basic_init(&table->size_lock); 3320 lock_protect(&table->lock, &table->static_secret, 3321 sizeof(table->static_secret)); 3322 lock_protect(&table->lock, &table->static_secret_len, 3323 sizeof(table->static_secret_len)); 3324 lock_protect(&table->lock, table->static_secret, 3325 table->static_secret_len); 3326 lock_protect(&table->lock, &table->sv_scidlen, 3327 sizeof(table->sv_scidlen)); 3328 lock_protect(&table->lock, &table->idle_timeout, 3329 sizeof(table->idle_timeout)); 3330 lock_protect(&table->lock, &table->conn_tree, sizeof(table->conn_tree)); 3331 lock_protect(&table->lock, table->conn_tree, sizeof(*table->conn_tree)); 3332 lock_protect(&table->conid_lock, table->conid_tree, 3333 sizeof(*table->conid_tree)); 3334 lock_protect(&table->lock, table->timer_tree, 3335 sizeof(*table->timer_tree)); 3336 lock_protect(&table->size_lock, &table->current_size, 3337 sizeof(table->current_size)); 3338 return table; 3339 } 3340 3341 /** delete elements from the connection tree */ 3342 static void 3343 conn_tree_del(rbnode_type* node, void* arg) 3344 { 3345 struct doq_table* table = (struct doq_table*)arg; 3346 struct doq_conn* conn; 3347 if(!node || !table) 3348 return; 3349 conn = (struct doq_conn*)node->key; 3350 if(conn->timer.timer_in_list) { 3351 /* Remove timer from list first, because finding the rbnode 3352 * element of the setlist of same timeouts needs tree lookup. 3353 * Edit the tree structure after that lookup. */ 3354 doq_timer_list_remove(conn->table, &conn->timer); 3355 } 3356 if(conn->timer.timer_in_tree) 3357 doq_timer_tree_remove(conn->table, &conn->timer); 3358 doq_table_quic_size_subtract(table, sizeof(*conn)+conn->key.dcidlen); 3359 doq_conn_delete(conn, table); 3360 } 3361 3362 /** delete elements from the connection id tree */ 3363 static void 3364 conid_tree_del(rbnode_type* node, void* ATTR_UNUSED(arg)) 3365 { 3366 if(!node) 3367 return; 3368 doq_conid_delete((struct doq_conid*)node->key); 3369 } 3370 3371 void 3372 doq_table_delete(struct doq_table* table) 3373 { 3374 if(!table) 3375 return; 3376 lock_rw_destroy(&table->lock); 3377 free(table->static_secret); 3378 if(table->conn_tree) { 3379 traverse_postorder(table->conn_tree, conn_tree_del, table); 3380 free(table->conn_tree); 3381 } 3382 lock_rw_destroy(&table->conid_lock); 3383 if(table->conid_tree) { 3384 /* The tree should be empty, because the doq_conn_delete calls 3385 * above should have also removed their conid elements. */ 3386 traverse_postorder(table->conid_tree, conid_tree_del, NULL); 3387 free(table->conid_tree); 3388 } 3389 lock_basic_destroy(&table->size_lock); 3390 if(table->timer_tree) { 3391 /* The tree should be empty, because the conn_tree_del calls 3392 * above should also have removed them. Also the doq_timer 3393 * is part of the doq_conn struct, so is already freed. */ 3394 free(table->timer_tree); 3395 } 3396 table->write_list_first = NULL; 3397 table->write_list_last = NULL; 3398 free(table); 3399 } 3400 3401 struct doq_timer* 3402 doq_timer_find_time(struct doq_table* table, struct timeval* tv) 3403 { 3404 struct doq_timer key; 3405 struct rbnode_type* node; 3406 log_assert(table != NULL); 3407 memset(&key, 0, sizeof(key)); 3408 key.time.tv_sec = tv->tv_sec; 3409 key.time.tv_usec = tv->tv_usec; 3410 node = rbtree_search(table->timer_tree, &key); 3411 if(node) 3412 return (struct doq_timer*)node->key; 3413 return NULL; 3414 } 3415 3416 void 3417 doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer) 3418 { 3419 if(!timer->timer_in_tree) 3420 return; 3421 rbtree_delete(table->timer_tree, timer); 3422 timer->timer_in_tree = 0; 3423 /* This item could have more timers in the same set. */ 3424 if(timer->setlist_first) { 3425 struct doq_timer* rb_timer = timer->setlist_first; 3426 /* del first element from setlist */ 3427 if(rb_timer->setlist_next) 3428 rb_timer->setlist_next->setlist_prev = NULL; 3429 else 3430 timer->setlist_last = NULL; 3431 timer->setlist_first = rb_timer->setlist_next; 3432 rb_timer->setlist_prev = NULL; 3433 rb_timer->setlist_next = NULL; 3434 rb_timer->timer_in_list = 0; 3435 /* insert it into the tree as new rb element */ 3436 memset(&rb_timer->node, 0, sizeof(rb_timer->node)); 3437 rb_timer->node.key = rb_timer; 3438 rbtree_insert(table->timer_tree, &rb_timer->node); 3439 rb_timer->timer_in_tree = 1; 3440 /* the setlist, if any remainder, moves to the rb element */ 3441 rb_timer->setlist_first = timer->setlist_first; 3442 rb_timer->setlist_last = timer->setlist_last; 3443 timer->setlist_first = NULL; 3444 timer->setlist_last = NULL; 3445 rb_timer->worker_doq_socket = timer->worker_doq_socket; 3446 } 3447 timer->worker_doq_socket = NULL; 3448 } 3449 3450 void 3451 doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer) 3452 { 3453 struct doq_timer* rb_timer; 3454 if(!timer->timer_in_list) 3455 return; 3456 /* The item in the rbtree has the list start and end. */ 3457 rb_timer = doq_timer_find_time(table, &timer->time); 3458 if(rb_timer) { 3459 if(timer->setlist_prev) 3460 timer->setlist_prev->setlist_next = timer->setlist_next; 3461 else 3462 rb_timer->setlist_first = timer->setlist_next; 3463 if(timer->setlist_next) 3464 timer->setlist_next->setlist_prev = timer->setlist_prev; 3465 else 3466 rb_timer->setlist_last = timer->setlist_prev; 3467 timer->setlist_prev = NULL; 3468 timer->setlist_next = NULL; 3469 } 3470 timer->timer_in_list = 0; 3471 } 3472 3473 /** doq append timer to setlist */ 3474 static void 3475 doq_timer_list_append(struct doq_timer* rb_timer, struct doq_timer* timer) 3476 { 3477 log_assert(timer->timer_in_list == 0); 3478 timer->timer_in_list = 1; 3479 timer->setlist_next = NULL; 3480 timer->setlist_prev = rb_timer->setlist_last; 3481 if(rb_timer->setlist_last) 3482 rb_timer->setlist_last->setlist_next = timer; 3483 else 3484 rb_timer->setlist_first = timer; 3485 rb_timer->setlist_last = timer; 3486 } 3487 3488 void 3489 doq_timer_unset(struct doq_table* table, struct doq_timer* timer) 3490 { 3491 if(timer->timer_in_list) { 3492 /* Remove timer from list first, because finding the rbnode 3493 * element of the setlist of same timeouts needs tree lookup. 3494 * Edit the tree structure after that lookup. */ 3495 doq_timer_list_remove(table, timer); 3496 } 3497 if(timer->timer_in_tree) 3498 doq_timer_tree_remove(table, timer); 3499 timer->worker_doq_socket = NULL; 3500 } 3501 3502 void doq_timer_set(struct doq_table* table, struct doq_timer* timer, 3503 struct doq_server_socket* worker_doq_socket, struct timeval* tv) 3504 { 3505 struct doq_timer* rb_timer; 3506 if(verbosity >= VERB_ALGO && timer->conn) { 3507 char a[256]; 3508 struct timeval rel; 3509 addr_to_str((void*)&timer->conn->key.paddr.addr, 3510 timer->conn->key.paddr.addrlen, a, sizeof(a)); 3511 timeval_subtract(&rel, tv, worker_doq_socket->now_tv); 3512 verbose(VERB_ALGO, "doq %s timer set %d.%6.6d in %d.%6.6d", 3513 a, (int)tv->tv_sec, (int)tv->tv_usec, 3514 (int)rel.tv_sec, (int)rel.tv_usec); 3515 } 3516 if(timer->timer_in_tree || timer->timer_in_list) { 3517 if(timer->time.tv_sec == tv->tv_sec && 3518 timer->time.tv_usec == tv->tv_usec) 3519 return; /* already set on that time */ 3520 doq_timer_unset(table, timer); 3521 } 3522 timer->time.tv_sec = tv->tv_sec; 3523 timer->time.tv_usec = tv->tv_usec; 3524 rb_timer = doq_timer_find_time(table, tv); 3525 if(rb_timer) { 3526 /* There is a timeout already with this value. Timer is 3527 * added to the setlist. */ 3528 doq_timer_list_append(rb_timer, timer); 3529 } else { 3530 /* There is no timeout with this value. Make timer a new 3531 * tree element. */ 3532 memset(&timer->node, 0, sizeof(timer->node)); 3533 timer->node.key = timer; 3534 rbtree_insert(table->timer_tree, &timer->node); 3535 timer->timer_in_tree = 1; 3536 timer->setlist_first = NULL; 3537 timer->setlist_last = NULL; 3538 timer->worker_doq_socket = worker_doq_socket; 3539 } 3540 } 3541 3542 struct doq_conn* 3543 doq_conn_create(struct comm_point* c, struct doq_pkt_addr* paddr, 3544 const uint8_t* dcid, size_t dcidlen, uint32_t version) 3545 { 3546 struct doq_conn* conn = calloc(1, sizeof(*conn)); 3547 if(!conn) 3548 return NULL; 3549 conn->node.key = conn; 3550 conn->doq_socket = c->doq_socket; 3551 conn->table = c->doq_socket->table; 3552 memmove(&conn->key.paddr.addr, &paddr->addr, paddr->addrlen); 3553 conn->key.paddr.addrlen = paddr->addrlen; 3554 memmove(&conn->key.paddr.localaddr, &paddr->localaddr, 3555 paddr->localaddrlen); 3556 conn->key.paddr.localaddrlen = paddr->localaddrlen; 3557 conn->key.paddr.ifindex = paddr->ifindex; 3558 conn->key.dcid = memdup((void*)dcid, dcidlen); 3559 if(!conn->key.dcid) { 3560 free(conn); 3561 return NULL; 3562 } 3563 conn->key.dcidlen = dcidlen; 3564 conn->version = version; 3565 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 3566 ngtcp2_ccerr_default(&conn->ccerr); 3567 #else 3568 ngtcp2_connection_close_error_default(&conn->last_error); 3569 #endif 3570 rbtree_init(&conn->stream_tree, &doq_stream_cmp); 3571 conn->timer.conn = conn; 3572 lock_basic_init(&conn->lock); 3573 lock_protect(&conn->lock, &conn->key, sizeof(conn->key)); 3574 lock_protect(&conn->lock, &conn->doq_socket, sizeof(conn->doq_socket)); 3575 lock_protect(&conn->lock, &conn->table, sizeof(conn->table)); 3576 lock_protect(&conn->lock, &conn->is_deleted, sizeof(conn->is_deleted)); 3577 lock_protect(&conn->lock, &conn->version, sizeof(conn->version)); 3578 lock_protect(&conn->lock, &conn->conn, sizeof(conn->conn)); 3579 lock_protect(&conn->lock, &conn->conid_list, sizeof(conn->conid_list)); 3580 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 3581 lock_protect(&conn->lock, &conn->ccerr, sizeof(conn->ccerr)); 3582 #else 3583 lock_protect(&conn->lock, &conn->last_error, sizeof(conn->last_error)); 3584 #endif 3585 lock_protect(&conn->lock, &conn->tls_alert, sizeof(conn->tls_alert)); 3586 lock_protect(&conn->lock, &conn->ssl, sizeof(conn->ssl)); 3587 lock_protect(&conn->lock, &conn->close_pkt, sizeof(conn->close_pkt)); 3588 lock_protect(&conn->lock, &conn->close_pkt_len, sizeof(conn->close_pkt_len)); 3589 lock_protect(&conn->lock, &conn->close_ecn, sizeof(conn->close_ecn)); 3590 lock_protect(&conn->lock, &conn->stream_tree, sizeof(conn->stream_tree)); 3591 lock_protect(&conn->lock, &conn->stream_write_first, sizeof(conn->stream_write_first)); 3592 lock_protect(&conn->lock, &conn->stream_write_last, sizeof(conn->stream_write_last)); 3593 lock_protect(&conn->lock, &conn->write_interest, sizeof(conn->write_interest)); 3594 lock_protect(&conn->lock, &conn->on_write_list, sizeof(conn->on_write_list)); 3595 lock_protect(&conn->lock, &conn->write_prev, sizeof(conn->write_prev)); 3596 lock_protect(&conn->lock, &conn->write_next, sizeof(conn->write_next)); 3597 return conn; 3598 } 3599 3600 /** delete stream tree node */ 3601 static void 3602 stream_tree_del(rbnode_type* node, void* arg) 3603 { 3604 struct doq_table* table = (struct doq_table*)arg; 3605 struct doq_stream* stream; 3606 if(!node) 3607 return; 3608 stream = (struct doq_stream*)node; 3609 if(stream->in) 3610 doq_table_quic_size_subtract(table, stream->inlen); 3611 if(stream->out) 3612 doq_table_quic_size_subtract(table, stream->outlen); 3613 doq_table_quic_size_subtract(table, sizeof(*stream)); 3614 doq_stream_delete(stream); 3615 } 3616 3617 void 3618 doq_conn_delete(struct doq_conn* conn, struct doq_table* table) 3619 { 3620 if(!conn) 3621 return; 3622 lock_basic_destroy(&conn->lock); 3623 lock_rw_wrlock(&conn->table->conid_lock); 3624 doq_conn_clear_conids(conn); 3625 lock_rw_unlock(&conn->table->conid_lock); 3626 /* Remove the app data from ngtcp2 before SSL_free of conn->ssl, 3627 * because the ngtcp2 conn is deleted. */ 3628 SSL_set_app_data(conn->ssl, NULL); 3629 if(conn->stream_tree.count != 0) { 3630 traverse_postorder(&conn->stream_tree, stream_tree_del, table); 3631 } 3632 free(conn->key.dcid); 3633 SSL_free(conn->ssl); 3634 #ifdef USE_NGTCP2_CRYPTO_OSSL 3635 ngtcp2_crypto_ossl_ctx_del(conn->ossl_ctx); 3636 #endif 3637 ngtcp2_conn_del(conn->conn); 3638 free(conn->close_pkt); 3639 free(conn); 3640 } 3641 3642 int 3643 doq_conn_cmp(const void* key1, const void* key2) 3644 { 3645 struct doq_conn* c = (struct doq_conn*)key1; 3646 struct doq_conn* d = (struct doq_conn*)key2; 3647 int r; 3648 /* Compared in the order destination address, then 3649 * local address, ifindex and then dcid. 3650 * So that for a search for findlessorequal for the destination 3651 * address will find connections to that address, with different 3652 * dcids. 3653 * Also a printout in sorted order prints the connections by IP 3654 * address of destination, and then a number of them depending on the 3655 * dcids. */ 3656 if(c->key.paddr.addrlen != d->key.paddr.addrlen) { 3657 if(c->key.paddr.addrlen < d->key.paddr.addrlen) 3658 return -1; 3659 return 1; 3660 } 3661 if((r=memcmp(&c->key.paddr.addr, &d->key.paddr.addr, 3662 c->key.paddr.addrlen))!=0) 3663 return r; 3664 if(c->key.paddr.localaddrlen != d->key.paddr.localaddrlen) { 3665 if(c->key.paddr.localaddrlen < d->key.paddr.localaddrlen) 3666 return -1; 3667 return 1; 3668 } 3669 if((r=memcmp(&c->key.paddr.localaddr, &d->key.paddr.localaddr, 3670 c->key.paddr.localaddrlen))!=0) 3671 return r; 3672 if(c->key.paddr.ifindex != d->key.paddr.ifindex) { 3673 if(c->key.paddr.ifindex < d->key.paddr.ifindex) 3674 return -1; 3675 return 1; 3676 } 3677 if(c->key.dcidlen != d->key.dcidlen) { 3678 if(c->key.dcidlen < d->key.dcidlen) 3679 return -1; 3680 return 1; 3681 } 3682 if((r=memcmp(c->key.dcid, d->key.dcid, c->key.dcidlen))!=0) 3683 return r; 3684 return 0; 3685 } 3686 3687 int doq_conid_cmp(const void* key1, const void* key2) 3688 { 3689 struct doq_conid* c = (struct doq_conid*)key1; 3690 struct doq_conid* d = (struct doq_conid*)key2; 3691 if(c->cidlen != d->cidlen) { 3692 if(c->cidlen < d->cidlen) 3693 return -1; 3694 return 1; 3695 } 3696 return memcmp(c->cid, d->cid, c->cidlen); 3697 } 3698 3699 int doq_timer_cmp(const void* key1, const void* key2) 3700 { 3701 struct doq_timer* e = (struct doq_timer*)key1; 3702 struct doq_timer* f = (struct doq_timer*)key2; 3703 if(e->time.tv_sec < f->time.tv_sec) 3704 return -1; 3705 if(e->time.tv_sec > f->time.tv_sec) 3706 return 1; 3707 if(e->time.tv_usec < f->time.tv_usec) 3708 return -1; 3709 if(e->time.tv_usec > f->time.tv_usec) 3710 return 1; 3711 return 0; 3712 } 3713 3714 int doq_stream_cmp(const void* key1, const void* key2) 3715 { 3716 struct doq_stream* c = (struct doq_stream*)key1; 3717 struct doq_stream* d = (struct doq_stream*)key2; 3718 if(c->stream_id != d->stream_id) { 3719 if(c->stream_id < d->stream_id) 3720 return -1; 3721 return 1; 3722 } 3723 return 0; 3724 } 3725 3726 /** doq store a local address in repinfo */ 3727 static void 3728 doq_repinfo_store_localaddr(struct comm_reply* repinfo, 3729 struct doq_addr_storage* localaddr, socklen_t localaddrlen) 3730 { 3731 /* use the pktinfo that we have for ancillary udp data otherwise, 3732 * this saves space for a sockaddr */ 3733 memset(&repinfo->pktinfo, 0, sizeof(repinfo->pktinfo)); 3734 if(addr_is_ip6((void*)localaddr, localaddrlen)) { 3735 #ifdef IPV6_PKTINFO 3736 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; 3737 memmove(&repinfo->pktinfo.v6info.ipi6_addr, 3738 &sa6->sin6_addr, sizeof(struct in6_addr)); 3739 repinfo->doq_srcport = sa6->sin6_port; 3740 #endif 3741 repinfo->srctype = 6; 3742 } else { 3743 #ifdef IP_PKTINFO 3744 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3745 memmove(&repinfo->pktinfo.v4info.ipi_addr, 3746 &sa->sin_addr, sizeof(struct in_addr)); 3747 repinfo->doq_srcport = sa->sin_port; 3748 #elif defined(IP_RECVDSTADDR) 3749 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3750 memmove(&repinfo->pktinfo.v4addr, &sa->sin_addr, 3751 sizeof(struct in_addr)); 3752 repinfo->doq_srcport = sa->sin_port; 3753 #endif 3754 repinfo->srctype = 4; 3755 } 3756 } 3757 3758 /** doq retrieve localaddr from repinfo */ 3759 static void 3760 doq_repinfo_retrieve_localaddr(struct comm_reply* repinfo, 3761 struct doq_addr_storage* localaddr, socklen_t* localaddrlen) 3762 { 3763 if(repinfo->srctype == 6) { 3764 #ifdef IPV6_PKTINFO 3765 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; 3766 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in6); 3767 memset(sa6, 0, *localaddrlen); 3768 sa6->sin6_family = AF_INET6; 3769 memmove(&sa6->sin6_addr, &repinfo->pktinfo.v6info.ipi6_addr, 3770 sizeof(struct in6_addr)); 3771 sa6->sin6_port = repinfo->doq_srcport; 3772 #endif 3773 } else { 3774 #ifdef IP_PKTINFO 3775 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3776 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in); 3777 memset(sa, 0, *localaddrlen); 3778 sa->sin_family = AF_INET; 3779 memmove(&sa->sin_addr, &repinfo->pktinfo.v4info.ipi_addr, 3780 sizeof(struct in_addr)); 3781 sa->sin_port = repinfo->doq_srcport; 3782 #elif defined(IP_RECVDSTADDR) 3783 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3784 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in); 3785 memset(sa, 0, *localaddrlen); 3786 sa->sin_family = AF_INET; 3787 memmove(&sa->sin_addr, &repinfo->pktinfo.v4addr, 3788 sizeof(struct in_addr)); 3789 sa->sin_port = repinfo->doq_srcport; 3790 #endif 3791 } 3792 } 3793 3794 /** doq write a connection key into repinfo, false if it does not fit */ 3795 static int 3796 doq_conn_key_store_repinfo(struct doq_conn_key* key, 3797 struct comm_reply* repinfo) 3798 { 3799 repinfo->is_proxied = 0; 3800 repinfo->doq_ifindex = key->paddr.ifindex; 3801 repinfo->remote_addrlen = key->paddr.addrlen; 3802 memmove(&repinfo->remote_addr, &key->paddr.addr, 3803 repinfo->remote_addrlen); 3804 repinfo->client_addrlen = key->paddr.addrlen; 3805 memmove(&repinfo->client_addr, &key->paddr.addr, 3806 repinfo->client_addrlen); 3807 doq_repinfo_store_localaddr(repinfo, &key->paddr.localaddr, 3808 key->paddr.localaddrlen); 3809 if(key->dcidlen > sizeof(repinfo->doq_dcid)) 3810 return 0; 3811 repinfo->doq_dcidlen = key->dcidlen; 3812 memmove(repinfo->doq_dcid, key->dcid, key->dcidlen); 3813 return 1; 3814 } 3815 3816 void 3817 doq_conn_key_from_repinfo(struct doq_conn_key* key, struct comm_reply* repinfo) 3818 { 3819 key->paddr.ifindex = repinfo->doq_ifindex; 3820 key->paddr.addrlen = repinfo->remote_addrlen; 3821 memmove(&key->paddr.addr, &repinfo->remote_addr, 3822 repinfo->remote_addrlen); 3823 doq_repinfo_retrieve_localaddr(repinfo, &key->paddr.localaddr, 3824 &key->paddr.localaddrlen); 3825 key->dcidlen = repinfo->doq_dcidlen; 3826 key->dcid = repinfo->doq_dcid; 3827 } 3828 3829 /** doq add a stream to the connection */ 3830 static void 3831 doq_conn_add_stream(struct doq_conn* conn, struct doq_stream* stream) 3832 { 3833 (void)rbtree_insert(&conn->stream_tree, &stream->node); 3834 } 3835 3836 /** doq delete a stream from the connection */ 3837 static void 3838 doq_conn_del_stream(struct doq_conn* conn, struct doq_stream* stream) 3839 { 3840 (void)rbtree_delete(&conn->stream_tree, &stream->node); 3841 } 3842 3843 /** doq create new stream */ 3844 static struct doq_stream* 3845 doq_stream_create(int64_t stream_id) 3846 { 3847 struct doq_stream* stream = calloc(1, sizeof(*stream)); 3848 if(!stream) 3849 return NULL; 3850 stream->node.key = stream; 3851 stream->stream_id = stream_id; 3852 return stream; 3853 } 3854 3855 void doq_stream_delete(struct doq_stream* stream) 3856 { 3857 if(!stream) 3858 return; 3859 free(stream->in); 3860 free(stream->out); 3861 free(stream); 3862 } 3863 3864 struct doq_stream* 3865 doq_stream_find(struct doq_conn* conn, int64_t stream_id) 3866 { 3867 rbnode_type* node; 3868 struct doq_stream key; 3869 key.node.key = &key; 3870 key.stream_id = stream_id; 3871 node = rbtree_search(&conn->stream_tree, &key); 3872 if(node) 3873 return (struct doq_stream*)node->key; 3874 return NULL; 3875 } 3876 3877 /** doq put stream on the conn write list */ 3878 static void 3879 doq_stream_on_write_list(struct doq_conn* conn, struct doq_stream* stream) 3880 { 3881 if(stream->on_write_list) 3882 return; 3883 stream->write_prev = conn->stream_write_last; 3884 if(conn->stream_write_last) 3885 conn->stream_write_last->write_next = stream; 3886 else 3887 conn->stream_write_first = stream; 3888 conn->stream_write_last = stream; 3889 stream->write_next = NULL; 3890 stream->on_write_list = 1; 3891 } 3892 3893 /** doq remove stream from the conn write list */ 3894 static void 3895 doq_stream_off_write_list(struct doq_conn* conn, struct doq_stream* stream) 3896 { 3897 if(!stream->on_write_list) 3898 return; 3899 if(stream->write_next) 3900 stream->write_next->write_prev = stream->write_prev; 3901 else conn->stream_write_last = stream->write_prev; 3902 if(stream->write_prev) 3903 stream->write_prev->write_next = stream->write_next; 3904 else conn->stream_write_first = stream->write_next; 3905 stream->write_prev = NULL; 3906 stream->write_next = NULL; 3907 stream->on_write_list = 0; 3908 } 3909 3910 /** doq stream remove in buffer */ 3911 static void 3912 doq_stream_remove_in_buffer(struct doq_stream* stream, struct doq_table* table) 3913 { 3914 if(stream->in) { 3915 doq_table_quic_size_subtract(table, stream->inlen); 3916 free(stream->in); 3917 stream->in = NULL; 3918 stream->inlen = 0; 3919 } 3920 } 3921 3922 /** doq stream remove out buffer */ 3923 static void 3924 doq_stream_remove_out_buffer(struct doq_stream* stream, 3925 struct doq_table* table) 3926 { 3927 if(stream->out) { 3928 doq_table_quic_size_subtract(table, stream->outlen); 3929 free(stream->out); 3930 stream->out = NULL; 3931 stream->outlen = 0; 3932 } 3933 } 3934 3935 int 3936 doq_stream_close(struct doq_conn* conn, struct doq_stream* stream, 3937 int send_shutdown) 3938 { 3939 int ret; 3940 if(stream->is_closed) 3941 return 1; 3942 stream->is_closed = 1; 3943 doq_stream_off_write_list(conn, stream); 3944 if(send_shutdown) { 3945 verbose(VERB_ALGO, "doq: shutdown stream_id %d with app_error_code %d", 3946 (int)stream->stream_id, (int)DOQ_APP_ERROR_CODE); 3947 ret = ngtcp2_conn_shutdown_stream(conn->conn, 3948 #ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 3949 0, 3950 #endif 3951 stream->stream_id, DOQ_APP_ERROR_CODE); 3952 if(ret != 0) { 3953 log_err("doq ngtcp2_conn_shutdown_stream %d failed: %s", 3954 (int)stream->stream_id, ngtcp2_strerror(ret)); 3955 return 0; 3956 } 3957 doq_conn_write_enable(conn); 3958 } 3959 verbose(VERB_ALGO, "doq: conn extend max streams bidi by 1"); 3960 ngtcp2_conn_extend_max_streams_bidi(conn->conn, 1); 3961 doq_conn_write_enable(conn); 3962 doq_stream_remove_in_buffer(stream, conn->doq_socket->table); 3963 doq_stream_remove_out_buffer(stream, conn->doq_socket->table); 3964 doq_table_quic_size_subtract(conn->doq_socket->table, sizeof(*stream)); 3965 doq_conn_del_stream(conn, stream); 3966 doq_stream_delete(stream); 3967 return 1; 3968 } 3969 3970 /** doq stream pick up answer data from buffer */ 3971 static int 3972 doq_stream_pickup_answer(struct doq_stream* stream, struct sldns_buffer* buf) 3973 { 3974 stream->is_answer_available = 1; 3975 if(stream->out) { 3976 free(stream->out); 3977 stream->out = NULL; 3978 stream->outlen = 0; 3979 } 3980 stream->nwrite = 0; 3981 stream->outlen = sldns_buffer_limit(buf); 3982 /* For quic the output bytes have to stay allocated and available, 3983 * for potential resends, until the remote end has acknowledged them. 3984 * This includes the tcplen start uint16_t, in outlen_wire. */ 3985 stream->outlen_wire = htons(stream->outlen); 3986 stream->out = memdup(sldns_buffer_begin(buf), sldns_buffer_limit(buf)); 3987 if(!stream->out) { 3988 log_err("doq could not send answer: out of memory"); 3989 return 0; 3990 } 3991 return 1; 3992 } 3993 3994 int 3995 doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream, 3996 struct sldns_buffer* buf) 3997 { 3998 if(verbosity >= VERB_ALGO) { 3999 char* s = sldns_wire2str_pkt(sldns_buffer_begin(buf), 4000 sldns_buffer_limit(buf)); 4001 verbose(VERB_ALGO, "doq stream %d response\n%s", 4002 (int)stream->stream_id, (s?s:"null")); 4003 free(s); 4004 } 4005 if(stream->out) 4006 doq_table_quic_size_subtract(conn->doq_socket->table, 4007 stream->outlen); 4008 if(!doq_stream_pickup_answer(stream, buf)) 4009 return 0; 4010 doq_table_quic_size_add(conn->doq_socket->table, stream->outlen); 4011 doq_stream_on_write_list(conn, stream); 4012 doq_conn_write_enable(conn); 4013 return 1; 4014 } 4015 4016 /** doq stream data length has completed, allocations can be done. False on 4017 * allocation failure. */ 4018 static int 4019 doq_stream_datalen_complete(struct doq_stream* stream, struct doq_table* table) 4020 { 4021 if(stream->inlen > 1024*1024) { 4022 log_err("doq stream in length too large %d", 4023 (int)stream->inlen); 4024 return 0; 4025 } 4026 stream->in = calloc(1, stream->inlen); 4027 if(!stream->in) { 4028 log_err("doq could not read stream, calloc failed: " 4029 "out of memory"); 4030 return 0; 4031 } 4032 doq_table_quic_size_add(table, stream->inlen); 4033 return 1; 4034 } 4035 4036 /** doq stream data is complete, the input data has been received. */ 4037 static int 4038 doq_stream_data_complete(struct doq_conn* conn, struct doq_stream* stream) 4039 { 4040 struct comm_point* c; 4041 if(verbosity >= VERB_ALGO) { 4042 char* s = sldns_wire2str_pkt(stream->in, stream->inlen); 4043 char a[128]; 4044 addr_to_str((void*)&conn->key.paddr.addr, 4045 conn->key.paddr.addrlen, a, sizeof(a)); 4046 verbose(VERB_ALGO, "doq %s stream %d incoming query\n%s", 4047 a, (int)stream->stream_id, (s?s:"null")); 4048 free(s); 4049 } 4050 stream->is_query_complete = 1; 4051 c = conn->doq_socket->cp; 4052 if(!stream->in) { 4053 verbose(VERB_ALGO, "doq_stream_data_complete: no in buffer"); 4054 return 0; 4055 } 4056 if(stream->inlen > sldns_buffer_capacity(c->buffer)) { 4057 verbose(VERB_ALGO, "doq_stream_data_complete: query too long"); 4058 return 0; 4059 } 4060 sldns_buffer_clear(c->buffer); 4061 sldns_buffer_write(c->buffer, stream->in, stream->inlen); 4062 sldns_buffer_flip(c->buffer); 4063 c->repinfo.c = c; 4064 if(!doq_conn_key_store_repinfo(&conn->key, &c->repinfo)) { 4065 verbose(VERB_ALGO, "doq_stream_data_complete: connection " 4066 "DCID too long"); 4067 return 0; 4068 } 4069 c->repinfo.doq_streamid = stream->stream_id; 4070 conn->doq_socket->current_conn = conn; 4071 fptr_ok(fptr_whitelist_comm_point(c->callback)); 4072 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo)) { 4073 conn->doq_socket->current_conn = NULL; 4074 if(!doq_stream_send_reply(conn, stream, c->buffer)) { 4075 verbose(VERB_ALGO, "doq: failed to send_reply"); 4076 return 0; 4077 } 4078 return 1; 4079 } 4080 conn->doq_socket->current_conn = NULL; 4081 return 1; 4082 } 4083 4084 /** doq receive data for a stream, more bytes of the incoming data */ 4085 static int 4086 doq_stream_recv_data(struct doq_stream* stream, const uint8_t* data, 4087 size_t datalen, int* recv_done, struct doq_table* table) 4088 { 4089 int got_data = 0; 4090 /* read the tcplength uint16_t at the start */ 4091 if(stream->nread < 2) { 4092 uint16_t tcplen = 0; 4093 size_t todolen = 2 - stream->nread; 4094 4095 if(stream->nread > 0) { 4096 /* put in the already read byte if there is one */ 4097 tcplen = stream->inlen; 4098 } 4099 if(datalen < todolen) 4100 todolen = datalen; 4101 memmove(((uint8_t*)&tcplen)+stream->nread, data, todolen); 4102 stream->nread += todolen; 4103 data += todolen; 4104 datalen -= todolen; 4105 if(stream->nread == 2) { 4106 /* the initial length value is completed */ 4107 stream->inlen = ntohs(tcplen); 4108 if(!doq_stream_datalen_complete(stream, table)) 4109 return 0; 4110 } else { 4111 /* store for later */ 4112 stream->inlen = tcplen; 4113 return 1; 4114 } 4115 } 4116 /* if there are more data bytes */ 4117 if(datalen > 0) { 4118 size_t to_write = datalen; 4119 if(stream->nread-2 > stream->inlen) { 4120 verbose(VERB_ALGO, "doq stream buffer too small"); 4121 return 0; 4122 } 4123 if(datalen > stream->inlen - (stream->nread-2)) 4124 to_write = stream->inlen - (stream->nread-2); 4125 if(to_write > 0) { 4126 if(!stream->in) { 4127 verbose(VERB_ALGO, "doq: stream has " 4128 "no buffer"); 4129 return 0; 4130 } 4131 memmove(stream->in+(stream->nread-2), data, to_write); 4132 stream->nread += to_write; 4133 data += to_write; 4134 datalen -= to_write; 4135 got_data = 1; 4136 } 4137 } 4138 /* Are there extra bytes received after the end? If so, log them. */ 4139 if(datalen > 0) { 4140 if(verbosity >= VERB_ALGO) 4141 log_hex("doq stream has extra bytes received after end", 4142 (void*)data, datalen); 4143 } 4144 /* Is the input data complete? */ 4145 if(got_data && stream->nread >= stream->inlen+2) { 4146 if(!stream->in) { 4147 verbose(VERB_ALGO, "doq: completed stream has " 4148 "no buffer"); 4149 return 0; 4150 } 4151 *recv_done = 1; 4152 } 4153 return 1; 4154 } 4155 4156 /** doq receive FIN for a stream. No more bytes are going to arrive. */ 4157 static int 4158 doq_stream_recv_fin(struct doq_conn* conn, struct doq_stream* stream, int 4159 recv_done) 4160 { 4161 if(!stream->is_query_complete && !recv_done) { 4162 verbose(VERB_ALGO, "doq: stream recv FIN, but is " 4163 "not complete, have %d of %d bytes", 4164 ((int)stream->nread)-2, (int)stream->inlen); 4165 if(!doq_stream_close(conn, stream, 1)) 4166 return 0; 4167 } 4168 return 1; 4169 } 4170 4171 void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len) 4172 { 4173 size_t i; 4174 for(i=0; i<len; i++) 4175 buf[i] = ub_random(rnd)&0xff; 4176 } 4177 4178 /** generate new connection id, checks for duplicates. 4179 * caller must hold lock on conid tree. */ 4180 static int 4181 doq_conn_generate_new_conid(struct doq_conn* conn, uint8_t* data, 4182 size_t datalen) 4183 { 4184 int max_try = 100; 4185 int i; 4186 for(i=0; i<max_try; i++) { 4187 doq_fill_rand(conn->doq_socket->rnd, data, datalen); 4188 if(!doq_conid_find(conn->table, data, datalen)) { 4189 /* Found an unused connection id. */ 4190 return 1; 4191 } 4192 } 4193 verbose(VERB_ALGO, "doq_conn_generate_new_conid failed: could not " 4194 "generate random unused connection id value in %d attempts.", 4195 max_try); 4196 return 0; 4197 } 4198 4199 /** ngtcp2 rand callback function */ 4200 static void 4201 doq_rand_cb(uint8_t* dest, size_t destlen, const ngtcp2_rand_ctx* rand_ctx) 4202 { 4203 struct ub_randstate* rnd = (struct ub_randstate*) 4204 rand_ctx->native_handle; 4205 doq_fill_rand(rnd, dest, destlen); 4206 } 4207 4208 /** ngtcp2 get_new_connection_id callback function */ 4209 static int 4210 doq_get_new_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), ngtcp2_cid* cid, 4211 uint8_t* token, size_t cidlen, void* user_data) 4212 { 4213 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4214 /* Lock the conid tree, so we can check for duplicates while 4215 * generating the id, and then insert it, whilst keeping the tree 4216 * locked against other modifications, guaranteeing uniqueness. */ 4217 lock_rw_wrlock(&doq_conn->table->conid_lock); 4218 if(!doq_conn_generate_new_conid(doq_conn, cid->data, cidlen)) { 4219 lock_rw_unlock(&doq_conn->table->conid_lock); 4220 return NGTCP2_ERR_CALLBACK_FAILURE; 4221 } 4222 cid->datalen = cidlen; 4223 if(ngtcp2_crypto_generate_stateless_reset_token(token, 4224 doq_conn->doq_socket->static_secret, 4225 doq_conn->doq_socket->static_secret_len, cid) != 0) { 4226 lock_rw_unlock(&doq_conn->table->conid_lock); 4227 return NGTCP2_ERR_CALLBACK_FAILURE; 4228 } 4229 if(!doq_conn_associate_conid(doq_conn, cid->data, cid->datalen)) { 4230 lock_rw_unlock(&doq_conn->table->conid_lock); 4231 return NGTCP2_ERR_CALLBACK_FAILURE; 4232 } 4233 lock_rw_unlock(&doq_conn->table->conid_lock); 4234 return 0; 4235 } 4236 4237 /** ngtcp2 remove_connection_id callback function */ 4238 static int 4239 doq_remove_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), 4240 const ngtcp2_cid* cid, void* user_data) 4241 { 4242 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4243 lock_rw_wrlock(&doq_conn->table->conid_lock); 4244 doq_conn_dissociate_conid(doq_conn, cid->data, cid->datalen); 4245 lock_rw_unlock(&doq_conn->table->conid_lock); 4246 return 0; 4247 } 4248 4249 /** doq submit a new token */ 4250 static int 4251 doq_submit_new_token(struct doq_conn* conn) 4252 { 4253 uint8_t token[NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN]; 4254 ngtcp2_ssize tokenlen; 4255 int ret; 4256 const ngtcp2_path* path = ngtcp2_conn_get_path(conn->conn); 4257 ngtcp2_tstamp ts = doq_get_timestamp_nanosec(); 4258 4259 tokenlen = ngtcp2_crypto_generate_regular_token(token, 4260 conn->doq_socket->static_secret, 4261 conn->doq_socket->static_secret_len, path->remote.addr, 4262 path->remote.addrlen, ts); 4263 if(tokenlen < 0) { 4264 log_err("doq ngtcp2_crypto_generate_regular_token failed"); 4265 return 1; 4266 } 4267 4268 verbose(VERB_ALGO, "doq submit new token"); 4269 ret = ngtcp2_conn_submit_new_token(conn->conn, token, tokenlen); 4270 if(ret != 0) { 4271 log_err("doq ngtcp2_conn_submit_new_token failed: %s", 4272 ngtcp2_strerror(ret)); 4273 return 0; 4274 } 4275 return 1; 4276 } 4277 4278 /** ngtcp2 handshake_completed callback function */ 4279 static int 4280 doq_handshake_completed_cb(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data) 4281 { 4282 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4283 verbose(VERB_ALGO, "doq handshake_completed callback"); 4284 verbose(VERB_ALGO, "ngtcp2_conn_get_max_data_left is %d", 4285 (int)ngtcp2_conn_get_max_data_left(doq_conn->conn)); 4286 #ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI 4287 verbose(VERB_ALGO, "ngtcp2_conn_get_max_local_streams_uni is %d", 4288 (int)ngtcp2_conn_get_max_local_streams_uni(doq_conn->conn)); 4289 #endif 4290 verbose(VERB_ALGO, "ngtcp2_conn_get_streams_uni_left is %d", 4291 (int)ngtcp2_conn_get_streams_uni_left(doq_conn->conn)); 4292 verbose(VERB_ALGO, "ngtcp2_conn_get_streams_bidi_left is %d", 4293 (int)ngtcp2_conn_get_streams_bidi_left(doq_conn->conn)); 4294 verbose(VERB_ALGO, "negotiated cipher name is %s", 4295 SSL_get_cipher_name(doq_conn->ssl)); 4296 if(verbosity > VERB_ALGO) { 4297 const unsigned char* alpn = NULL; 4298 unsigned int alpnlen = 0; 4299 char alpnstr[128]; 4300 SSL_get0_alpn_selected(doq_conn->ssl, &alpn, &alpnlen); 4301 if(alpnlen > sizeof(alpnstr)-1) 4302 alpnlen = sizeof(alpnstr)-1; 4303 memmove(alpnstr, alpn, alpnlen); 4304 alpnstr[alpnlen]=0; 4305 verbose(VERB_ALGO, "negotiated ALPN is '%s'", alpnstr); 4306 } 4307 4308 if(!doq_submit_new_token(doq_conn)) 4309 return -1; 4310 return 0; 4311 } 4312 4313 /** ngtcp2 stream_open callback function */ 4314 static int 4315 doq_stream_open_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, 4316 void* user_data) 4317 { 4318 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4319 struct doq_stream* stream; 4320 verbose(VERB_ALGO, "doq new stream %x", (int)stream_id); 4321 if(doq_stream_find(doq_conn, stream_id)) { 4322 verbose(VERB_ALGO, "doq: stream with this id already exists"); 4323 return 0; 4324 } 4325 if(stream_id != 0 && stream_id != 4 && /* allow one stream on a new connection */ 4326 !doq_table_quic_size_available(doq_conn->doq_socket->table, 4327 doq_conn->doq_socket->cfg, sizeof(*stream) 4328 + 100 /* estimated query in */ 4329 + 512 /* estimated response out */ 4330 )) { 4331 int rv; 4332 verbose(VERB_ALGO, "doq: no mem for new stream"); 4333 rv = ngtcp2_conn_shutdown_stream(doq_conn->conn, 4334 #ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 4335 0, 4336 #endif 4337 stream_id, NGTCP2_CONNECTION_REFUSED); 4338 if(rv != 0) { 4339 log_err("ngtcp2_conn_shutdown_stream failed: %s", 4340 ngtcp2_strerror(rv)); 4341 return NGTCP2_ERR_CALLBACK_FAILURE; 4342 } 4343 return 0; 4344 } 4345 stream = doq_stream_create(stream_id); 4346 if(!stream) { 4347 log_err("doq: could not doq_stream_create: out of memory"); 4348 return NGTCP2_ERR_CALLBACK_FAILURE; 4349 } 4350 doq_table_quic_size_add(doq_conn->doq_socket->table, sizeof(*stream)); 4351 doq_conn_add_stream(doq_conn, stream); 4352 return 0; 4353 } 4354 4355 /** ngtcp2 recv_stream_data callback function */ 4356 static int 4357 doq_recv_stream_data_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, 4358 int64_t stream_id, uint64_t offset, const uint8_t* data, 4359 size_t datalen, void* user_data, void* ATTR_UNUSED(stream_user_data)) 4360 { 4361 int recv_done = 0; 4362 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4363 struct doq_stream* stream; 4364 verbose(VERB_ALGO, "doq recv stream data stream id %d offset %d " 4365 "datalen %d%s%s", (int)stream_id, (int)offset, (int)datalen, 4366 ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""), 4367 #ifdef NGTCP2_STREAM_DATA_FLAG_0RTT 4368 ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"") 4369 #else 4370 ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"") 4371 #endif 4372 ); 4373 stream = doq_stream_find(doq_conn, stream_id); 4374 if(!stream) { 4375 verbose(VERB_ALGO, "doq: received stream data for " 4376 "unknown stream %d", (int)stream_id); 4377 return 0; 4378 } 4379 if(stream->is_closed) { 4380 verbose(VERB_ALGO, "doq: stream is closed, ignore recv data"); 4381 return 0; 4382 } 4383 if(datalen != 0) { 4384 if(!doq_stream_recv_data(stream, data, datalen, &recv_done, 4385 doq_conn->doq_socket->table)) 4386 return NGTCP2_ERR_CALLBACK_FAILURE; 4387 } 4388 if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) { 4389 if(!doq_stream_recv_fin(doq_conn, stream, recv_done)) 4390 return NGTCP2_ERR_CALLBACK_FAILURE; 4391 } 4392 ngtcp2_conn_extend_max_stream_offset(doq_conn->conn, stream_id, 4393 datalen); 4394 ngtcp2_conn_extend_max_offset(doq_conn->conn, datalen); 4395 if(recv_done) { 4396 if(!doq_stream_data_complete(doq_conn, stream)) 4397 return NGTCP2_ERR_CALLBACK_FAILURE; 4398 } 4399 return 0; 4400 } 4401 4402 /** ngtcp2 stream_close callback function */ 4403 static int 4404 doq_stream_close_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, 4405 int64_t stream_id, uint64_t app_error_code, void* user_data, 4406 void* ATTR_UNUSED(stream_user_data)) 4407 { 4408 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4409 struct doq_stream* stream; 4410 if((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0) 4411 verbose(VERB_ALGO, "doq stream close for stream id %d %sapp_error_code %d", 4412 (int)stream_id, 4413 (((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)? 4414 "APP_ERROR_CODE_SET ":""), 4415 (int)app_error_code); 4416 else 4417 verbose(VERB_ALGO, "doq stream close for stream id %d", 4418 (int)stream_id); 4419 4420 stream = doq_stream_find(doq_conn, stream_id); 4421 if(!stream) { 4422 verbose(VERB_ALGO, "doq: stream close for " 4423 "unknown stream %d", (int)stream_id); 4424 return 0; 4425 } 4426 if(!doq_stream_close(doq_conn, stream, 0)) 4427 return NGTCP2_ERR_CALLBACK_FAILURE; 4428 return 0; 4429 } 4430 4431 /** ngtcp2 stream_reset callback function */ 4432 static int 4433 doq_stream_reset_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, 4434 uint64_t final_size, uint64_t app_error_code, void* user_data, 4435 void* ATTR_UNUSED(stream_user_data)) 4436 { 4437 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4438 struct doq_stream* stream; 4439 verbose(VERB_ALGO, "doq stream reset for stream id %d final_size %d " 4440 "app_error_code %d", (int)stream_id, (int)final_size, 4441 (int)app_error_code); 4442 4443 stream = doq_stream_find(doq_conn, stream_id); 4444 if(!stream) { 4445 verbose(VERB_ALGO, "doq: stream reset for " 4446 "unknown stream %d", (int)stream_id); 4447 return 0; 4448 } 4449 if(!doq_stream_close(doq_conn, stream, 0)) 4450 return NGTCP2_ERR_CALLBACK_FAILURE; 4451 return 0; 4452 } 4453 4454 /** ngtcp2 acked_stream_data_offset callback function */ 4455 static int 4456 doq_acked_stream_data_offset_cb(ngtcp2_conn* ATTR_UNUSED(conn), 4457 int64_t stream_id, uint64_t offset, uint64_t datalen, void* user_data, 4458 void* ATTR_UNUSED(stream_user_data)) 4459 { 4460 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4461 struct doq_stream* stream; 4462 verbose(VERB_ALGO, "doq stream acked data for stream id %d offset %d " 4463 "datalen %d", (int)stream_id, (int)offset, (int)datalen); 4464 4465 stream = doq_stream_find(doq_conn, stream_id); 4466 if(!stream) { 4467 verbose(VERB_ALGO, "doq: stream acked data for " 4468 "unknown stream %d", (int)stream_id); 4469 return 0; 4470 } 4471 /* Acked the data from [offset .. offset+datalen). */ 4472 if(stream->is_closed) 4473 return 0; 4474 if(offset+datalen >= stream->outlen) { 4475 doq_stream_remove_in_buffer(stream, 4476 doq_conn->doq_socket->table); 4477 doq_stream_remove_out_buffer(stream, 4478 doq_conn->doq_socket->table); 4479 } 4480 return 0; 4481 } 4482 4483 /** ngtc2p log_printf callback function */ 4484 static void 4485 doq_log_printf_cb(void* ATTR_UNUSED(user_data), const char* fmt, ...) 4486 { 4487 char buf[1024]; 4488 va_list ap; 4489 va_start(ap, fmt); 4490 vsnprintf(buf, sizeof(buf), fmt, ap); 4491 verbose(VERB_ALGO, "libngtcp2: %s", buf); 4492 va_end(ap); 4493 } 4494 4495 #ifdef MAKE_QUIC_METHOD 4496 /** the doq application tx key callback, false on failure */ 4497 static int 4498 doq_application_tx_key_cb(struct doq_conn* conn) 4499 { 4500 verbose(VERB_ALGO, "doq application tx key cb"); 4501 /* The server does not want to open streams to the client, 4502 * the client instead initiates by opening bidi streams. */ 4503 verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_data_left is %d", 4504 (int)ngtcp2_conn_get_max_data_left(conn->conn)); 4505 #ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI 4506 verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_local_streams_uni is %d", 4507 (int)ngtcp2_conn_get_max_local_streams_uni(conn->conn)); 4508 #endif 4509 verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_uni_left is %d", 4510 (int)ngtcp2_conn_get_streams_uni_left(conn->conn)); 4511 verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_bidi_left is %d", 4512 (int)ngtcp2_conn_get_streams_bidi_left(conn->conn)); 4513 return 1; 4514 } 4515 4516 /** quic_method set_encryption_secrets function */ 4517 static int 4518 doq_set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, 4519 const uint8_t *read_secret, const uint8_t *write_secret, 4520 size_t secret_len) 4521 { 4522 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); 4523 #ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL 4524 ngtcp2_encryption_level 4525 #else 4526 ngtcp2_crypto_level 4527 #endif 4528 level = 4529 #ifdef USE_NGTCP2_CRYPTO_OSSL 4530 ngtcp2_crypto_ossl_from_ossl_encryption_level(ossl_level); 4531 #elif defined(HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL) 4532 ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); 4533 #else 4534 ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); 4535 #endif 4536 4537 if(read_secret) { 4538 verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_rx_key for level %d ossl %d", (int)level, (int)ossl_level); 4539 if(ngtcp2_crypto_derive_and_install_rx_key(doq_conn->conn, 4540 NULL, NULL, NULL, level, read_secret, secret_len) 4541 != 0) { 4542 log_err("ngtcp2_crypto_derive_and_install_rx_key " 4543 "failed"); 4544 return 0; 4545 } 4546 } 4547 4548 if(write_secret) { 4549 verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_tx_key for level %d ossl %d", (int)level, (int)ossl_level); 4550 if(ngtcp2_crypto_derive_and_install_tx_key(doq_conn->conn, 4551 NULL, NULL, NULL, level, write_secret, secret_len) 4552 != 0) { 4553 log_err("ngtcp2_crypto_derive_and_install_tx_key " 4554 "failed"); 4555 return 0; 4556 } 4557 if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) { 4558 if(!doq_application_tx_key_cb(doq_conn)) 4559 return 0; 4560 } 4561 } 4562 return 1; 4563 } 4564 4565 /** quic_method add_handshake_data function */ 4566 static int 4567 doq_add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, 4568 const uint8_t *data, size_t len) 4569 { 4570 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); 4571 #ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL 4572 ngtcp2_encryption_level 4573 #else 4574 ngtcp2_crypto_level 4575 #endif 4576 level = 4577 #ifdef USE_NGTCP2_CRYPTO_OSSL 4578 ngtcp2_crypto_ossl_from_ossl_encryption_level(ossl_level); 4579 #elif defined(HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL) 4580 ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); 4581 #else 4582 ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); 4583 #endif 4584 int rv; 4585 4586 verbose(VERB_ALGO, "doq_add_handshake_data: " 4587 "ngtcp2_con_submit_crypto_data level %d", (int)level); 4588 rv = ngtcp2_conn_submit_crypto_data(doq_conn->conn, level, data, len); 4589 if(rv != 0) { 4590 log_err("ngtcp2_conn_submit_crypto_data failed: %s", 4591 ngtcp2_strerror(rv)); 4592 ngtcp2_conn_set_tls_error(doq_conn->conn, rv); 4593 return 0; 4594 } 4595 return 1; 4596 } 4597 4598 /** quic_method flush_flight function */ 4599 static int 4600 doq_flush_flight(SSL* ATTR_UNUSED(ssl)) 4601 { 4602 return 1; 4603 } 4604 4605 /** quic_method send_alert function */ 4606 static int 4607 doq_send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level), 4608 uint8_t alert) 4609 { 4610 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); 4611 doq_conn->tls_alert = alert; 4612 return 1; 4613 } 4614 #endif /* MAKE_QUIC_METHOD */ 4615 4616 /** ALPN select callback for the doq SSL context */ 4617 static int 4618 doq_alpn_select_cb(SSL* ATTR_UNUSED(ssl), const unsigned char** out, 4619 unsigned char* outlen, const unsigned char* in, unsigned int inlen, 4620 void* ATTR_UNUSED(arg)) 4621 { 4622 /* select "doq" */ 4623 int ret = SSL_select_next_proto((void*)out, outlen, 4624 (const unsigned char*)"\x03""doq", 4, in, inlen); 4625 if(ret == OPENSSL_NPN_NEGOTIATED) 4626 return SSL_TLSEXT_ERR_OK; 4627 verbose(VERB_ALGO, "doq alpn_select_cb: ALPN from client does " 4628 "not have 'doq'"); 4629 return SSL_TLSEXT_ERR_ALERT_FATAL; 4630 } 4631 4632 void* quic_sslctx_create(char* key, char* pem, char* verifypem) 4633 { 4634 #ifdef HAVE_NGTCP2 4635 char* sid_ctx = "unbound server"; 4636 #ifdef MAKE_QUIC_METHOD 4637 SSL_QUIC_METHOD* quic_method; 4638 #endif 4639 SSL_CTX* ctx = SSL_CTX_new(TLS_server_method()); 4640 if(!ctx) { 4641 log_crypto_err("Could not SSL_CTX_new"); 4642 return NULL; 4643 } 4644 if(!key || key[0] == 0) { 4645 log_err("doq: error, no tls-service-key file specified"); 4646 SSL_CTX_free(ctx); 4647 return NULL; 4648 } 4649 if(!pem || pem[0] == 0) { 4650 log_err("doq: error, no tls-service-pem file specified"); 4651 SSL_CTX_free(ctx); 4652 return NULL; 4653 } 4654 SSL_CTX_set_options(ctx, 4655 (SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS) | 4656 SSL_OP_SINGLE_ECDH_USE | 4657 SSL_OP_CIPHER_SERVER_PREFERENCE | 4658 SSL_OP_NO_ANTI_REPLAY); 4659 SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS); 4660 SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION); 4661 SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION); 4662 #ifdef HAVE_SSL_CTX_SET_ALPN_SELECT_CB 4663 SSL_CTX_set_alpn_select_cb(ctx, doq_alpn_select_cb, NULL); 4664 #endif 4665 SSL_CTX_set_default_verify_paths(ctx); 4666 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) { 4667 log_err("doq: error for cert file: %s", pem); 4668 log_crypto_err("doq: error in " 4669 "SSL_CTX_use_certificate_chain_file"); 4670 SSL_CTX_free(ctx); 4671 return NULL; 4672 } 4673 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) { 4674 log_err("doq: error for private key file: %s", key); 4675 log_crypto_err("doq: error in SSL_CTX_use_PrivateKey_file"); 4676 SSL_CTX_free(ctx); 4677 return NULL; 4678 } 4679 if(!SSL_CTX_check_private_key(ctx)) { 4680 log_err("doq: error for key file: %s", key); 4681 log_crypto_err("doq: error in SSL_CTX_check_private_key"); 4682 SSL_CTX_free(ctx); 4683 return NULL; 4684 } 4685 SSL_CTX_set_session_id_context(ctx, (void*)sid_ctx, strlen(sid_ctx)); 4686 if(verifypem && verifypem[0]) { 4687 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) { 4688 log_err("doq: error for verify pem file: %s", 4689 verifypem); 4690 log_crypto_err("doq: error in " 4691 "SSL_CTX_load_verify_locations"); 4692 SSL_CTX_free(ctx); 4693 return NULL; 4694 } 4695 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file( 4696 verifypem)); 4697 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER| 4698 SSL_VERIFY_CLIENT_ONCE| 4699 SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); 4700 } 4701 4702 SSL_CTX_set_max_early_data(ctx, 0xffffffff); 4703 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 4704 if(ngtcp2_crypto_quictls_configure_server_context(ctx) != 0) { 4705 log_err("ngtcp2_crypto_quictls_configure_server_context failed"); 4706 SSL_CTX_free(ctx); 4707 return NULL; 4708 } 4709 #elif defined(MAKE_QUIC_METHOD) 4710 /* The quic_method needs to remain valid during the SSL_CTX 4711 * lifetime, so we allocate it. It is freed with the 4712 * doq_server_socket. */ 4713 quic_method = calloc(1, sizeof(SSL_QUIC_METHOD)); 4714 if(!quic_method) { 4715 log_err("calloc failed: out of memory"); 4716 SSL_CTX_free(ctx); 4717 return NULL; 4718 } 4719 doq_socket->quic_method = quic_method; 4720 quic_method->set_encryption_secrets = doq_set_encryption_secrets; 4721 quic_method->add_handshake_data = doq_add_handshake_data; 4722 quic_method->flush_flight = doq_flush_flight; 4723 quic_method->send_alert = doq_send_alert; 4724 SSL_CTX_set_quic_method(ctx, doq_socket->quic_method); 4725 #endif 4726 return ctx; 4727 #else /* HAVE_NGTCP2 */ 4728 (void)key; (void)pem; (void)verifypem; 4729 return NULL; 4730 #endif /* HAVE_NGTCP2 */ 4731 } 4732 4733 /** Get the ngtcp2_conn from ssl userdata of type ngtcp2_conn_ref */ 4734 static ngtcp2_conn* doq_conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref) 4735 { 4736 struct doq_conn* conn = (struct doq_conn*)conn_ref->user_data; 4737 return conn->conn; 4738 } 4739 4740 /** create new SSL session for server connection */ 4741 static SSL* 4742 doq_ssl_server_setup(SSL_CTX* ctx, struct doq_conn* conn) 4743 { 4744 #ifdef USE_NGTCP2_CRYPTO_OSSL 4745 int ret; 4746 #endif 4747 SSL* ssl = SSL_new(ctx); 4748 if(!ssl) { 4749 log_crypto_err("doq: SSL_new failed"); 4750 return NULL; 4751 } 4752 #ifdef USE_NGTCP2_CRYPTO_OSSL 4753 if((ret=ngtcp2_crypto_ossl_ctx_new(&conn->ossl_ctx, NULL)) != 0) { 4754 log_err("doq: ngtcp2_crypto_ossl_ctx_new failed: %s", 4755 ngtcp2_strerror(ret)); 4756 SSL_free(ssl); 4757 return NULL; 4758 } 4759 ngtcp2_crypto_ossl_ctx_set_ssl(conn->ossl_ctx, ssl); 4760 if(ngtcp2_crypto_ossl_configure_server_session(ssl) != 0) { 4761 log_err("doq: ngtcp2_crypto_ossl_configure_server_session failed"); 4762 SSL_free(ssl); 4763 return NULL; 4764 } 4765 #endif 4766 #if defined(USE_NGTCP2_CRYPTO_OSSL) || defined(HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT) 4767 conn->conn_ref.get_conn = &doq_conn_ref_get_conn; 4768 conn->conn_ref.user_data = conn; 4769 SSL_set_app_data(ssl, &conn->conn_ref); 4770 #else 4771 SSL_set_app_data(ssl, conn); 4772 #endif 4773 SSL_set_accept_state(ssl); 4774 #ifdef USE_NGTCP2_CRYPTO_OSSL 4775 SSL_set_quic_tls_early_data_enabled(ssl, 1); 4776 #else 4777 SSL_set_quic_early_data_enabled(ssl, 1); 4778 #endif 4779 return ssl; 4780 } 4781 4782 int 4783 doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen, 4784 uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen) 4785 { 4786 int rv; 4787 struct ngtcp2_cid dcid, sv_scid, scid_cid; 4788 struct ngtcp2_path path; 4789 struct ngtcp2_callbacks callbacks; 4790 struct ngtcp2_settings settings; 4791 struct ngtcp2_transport_params params; 4792 memset(&dcid, 0, sizeof(dcid)); 4793 memset(&sv_scid, 0, sizeof(sv_scid)); 4794 memset(&scid_cid, 0, sizeof(scid_cid)); 4795 memset(&path, 0, sizeof(path)); 4796 memset(&callbacks, 0, sizeof(callbacks)); 4797 memset(&settings, 0, sizeof(settings)); 4798 memset(¶ms, 0, sizeof(params)); 4799 4800 ngtcp2_cid_init(&scid_cid, scid, scidlen); 4801 ngtcp2_cid_init(&dcid, conn->key.dcid, conn->key.dcidlen); 4802 4803 path.remote.addr = (struct sockaddr*)&conn->key.paddr.addr; 4804 path.remote.addrlen = conn->key.paddr.addrlen; 4805 path.local.addr = (struct sockaddr*)&conn->key.paddr.localaddr; 4806 path.local.addrlen = conn->key.paddr.localaddrlen; 4807 4808 callbacks.recv_client_initial = ngtcp2_crypto_recv_client_initial_cb; 4809 callbacks.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb; 4810 callbacks.encrypt = ngtcp2_crypto_encrypt_cb; 4811 callbacks.decrypt = ngtcp2_crypto_decrypt_cb; 4812 callbacks.hp_mask = ngtcp2_crypto_hp_mask; 4813 callbacks.update_key = ngtcp2_crypto_update_key_cb; 4814 callbacks.delete_crypto_aead_ctx = 4815 ngtcp2_crypto_delete_crypto_aead_ctx_cb; 4816 callbacks.delete_crypto_cipher_ctx = 4817 ngtcp2_crypto_delete_crypto_cipher_ctx_cb; 4818 callbacks.get_path_challenge_data = 4819 ngtcp2_crypto_get_path_challenge_data_cb; 4820 callbacks.version_negotiation = ngtcp2_crypto_version_negotiation_cb; 4821 callbacks.rand = doq_rand_cb; 4822 callbacks.get_new_connection_id = doq_get_new_connection_id_cb; 4823 callbacks.remove_connection_id = doq_remove_connection_id_cb; 4824 callbacks.handshake_completed = doq_handshake_completed_cb; 4825 callbacks.stream_open = doq_stream_open_cb; 4826 callbacks.stream_close = doq_stream_close_cb; 4827 callbacks.stream_reset = doq_stream_reset_cb; 4828 callbacks.acked_stream_data_offset = doq_acked_stream_data_offset_cb; 4829 callbacks.recv_stream_data = doq_recv_stream_data_cb; 4830 4831 ngtcp2_settings_default(&settings); 4832 if(verbosity >= VERB_ALGO) { 4833 settings.log_printf = doq_log_printf_cb; 4834 } 4835 settings.rand_ctx.native_handle = conn->doq_socket->rnd; 4836 settings.initial_ts = doq_get_timestamp_nanosec(); 4837 settings.max_stream_window = 6*1024*1024; 4838 settings.max_window = 6*1024*1024; 4839 #ifdef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN 4840 settings.token = (void*)token; 4841 settings.tokenlen = tokenlen; 4842 #else 4843 settings.token.base = (void*)token; 4844 settings.token.len = tokenlen; 4845 #endif 4846 4847 ngtcp2_transport_params_default(¶ms); 4848 params.max_idle_timeout = conn->doq_socket->idle_timeout; 4849 params.active_connection_id_limit = 7; 4850 params.initial_max_stream_data_bidi_local = 256*1024; 4851 params.initial_max_stream_data_bidi_remote = 256*1024; 4852 params.initial_max_data = 1024*1024; 4853 /* DoQ uses bidi streams, so we allow 0 uni streams. */ 4854 params.initial_max_streams_uni = 0; 4855 /* Initial max on number of bidi streams the remote end can open. 4856 * That is the number of queries it can make, at first. */ 4857 params.initial_max_streams_bidi = 10; 4858 if(ocid) { 4859 ngtcp2_cid_init(¶ms.original_dcid, ocid, ocidlen); 4860 ngtcp2_cid_init(¶ms.retry_scid, conn->key.dcid, 4861 conn->key.dcidlen); 4862 params.retry_scid_present = 1; 4863 } else { 4864 ngtcp2_cid_init(¶ms.original_dcid, conn->key.dcid, 4865 conn->key.dcidlen); 4866 } 4867 #ifdef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT 4868 params.original_dcid_present = 1; 4869 #endif 4870 doq_fill_rand(conn->doq_socket->rnd, params.stateless_reset_token, 4871 sizeof(params.stateless_reset_token)); 4872 sv_scid.datalen = conn->doq_socket->sv_scidlen; 4873 lock_rw_wrlock(&conn->table->conid_lock); 4874 if(!doq_conn_generate_new_conid(conn, sv_scid.data, sv_scid.datalen)) { 4875 lock_rw_unlock(&conn->table->conid_lock); 4876 return 0; 4877 } 4878 4879 rv = ngtcp2_conn_server_new(&conn->conn, &scid_cid, &sv_scid, &path, 4880 conn->version, &callbacks, &settings, ¶ms, NULL, conn); 4881 if(rv != 0) { 4882 lock_rw_unlock(&conn->table->conid_lock); 4883 log_err("ngtcp2_conn_server_new failed: %s", 4884 ngtcp2_strerror(rv)); 4885 return 0; 4886 } 4887 if(!doq_conn_setup_conids(conn)) { 4888 lock_rw_unlock(&conn->table->conid_lock); 4889 log_err("doq_conn_setup_conids failed: out of memory"); 4890 return 0; 4891 } 4892 lock_rw_unlock(&conn->table->conid_lock); 4893 conn->ssl = doq_ssl_server_setup((SSL_CTX*)conn->doq_socket->ctx, 4894 conn); 4895 if(!conn->ssl) { 4896 log_err("doq_ssl_server_setup failed"); 4897 return 0; 4898 } 4899 #ifdef USE_NGTCP2_CRYPTO_OSSL 4900 ngtcp2_conn_set_tls_native_handle(conn->conn, conn->ossl_ctx); 4901 #else 4902 ngtcp2_conn_set_tls_native_handle(conn->conn, conn->ssl); 4903 #endif 4904 doq_conn_write_enable(conn); 4905 return 1; 4906 } 4907 4908 struct doq_conid* 4909 doq_conid_find(struct doq_table* table, const uint8_t* data, size_t datalen) 4910 { 4911 struct rbnode_type* node; 4912 struct doq_conid key; 4913 key.node.key = &key; 4914 key.cid = (void*)data; 4915 key.cidlen = datalen; 4916 log_assert(table != NULL); 4917 node = rbtree_search(table->conid_tree, &key); 4918 if(node) 4919 return (struct doq_conid*)node->key; 4920 return NULL; 4921 } 4922 4923 /** insert conid in the conid list */ 4924 static void 4925 doq_conid_list_insert(struct doq_conn* conn, struct doq_conid* conid) 4926 { 4927 conid->prev = NULL; 4928 conid->next = conn->conid_list; 4929 if(conn->conid_list) 4930 conn->conid_list->prev = conid; 4931 conn->conid_list = conid; 4932 } 4933 4934 /** remove conid from the conid list */ 4935 static void 4936 doq_conid_list_remove(struct doq_conn* conn, struct doq_conid* conid) 4937 { 4938 if(conid->prev) 4939 conid->prev->next = conid->next; 4940 else conn->conid_list = conid->next; 4941 if(conid->next) 4942 conid->next->prev = conid->prev; 4943 } 4944 4945 /** create a doq_conid */ 4946 static struct doq_conid* 4947 doq_conid_create(uint8_t* data, size_t datalen, struct doq_conn_key* key) 4948 { 4949 struct doq_conid* conid; 4950 conid = calloc(1, sizeof(*conid)); 4951 if(!conid) 4952 return NULL; 4953 conid->cid = memdup(data, datalen); 4954 if(!conid->cid) { 4955 free(conid); 4956 return NULL; 4957 } 4958 conid->cidlen = datalen; 4959 conid->node.key = conid; 4960 conid->key = *key; 4961 conid->key.dcid = memdup(key->dcid, key->dcidlen); 4962 if(!conid->key.dcid) { 4963 free(conid->cid); 4964 free(conid); 4965 return NULL; 4966 } 4967 return conid; 4968 } 4969 4970 void 4971 doq_conid_delete(struct doq_conid* conid) 4972 { 4973 if(!conid) 4974 return; 4975 free(conid->key.dcid); 4976 free(conid->cid); 4977 free(conid); 4978 } 4979 4980 /** return true if the conid is for the conn. */ 4981 static int 4982 conid_is_for_conn(struct doq_conn* conn, struct doq_conid* conid) 4983 { 4984 if(conid->key.dcidlen == conn->key.dcidlen && 4985 memcmp(conid->key.dcid, conn->key.dcid, conid->key.dcidlen)==0 4986 && conid->key.paddr.addrlen == conn->key.paddr.addrlen && 4987 memcmp(&conid->key.paddr.addr, &conn->key.paddr.addr, 4988 conid->key.paddr.addrlen) == 0 && 4989 conid->key.paddr.localaddrlen == conn->key.paddr.localaddrlen && 4990 memcmp(&conid->key.paddr.localaddr, &conn->key.paddr.localaddr, 4991 conid->key.paddr.localaddrlen) == 0 && 4992 conid->key.paddr.ifindex == conn->key.paddr.ifindex) 4993 return 1; 4994 return 0; 4995 } 4996 4997 int 4998 doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data, size_t datalen) 4999 { 5000 struct doq_conid* conid; 5001 conid = doq_conid_find(conn->table, data, datalen); 5002 if(conid && !conid_is_for_conn(conn, conid)) { 5003 verbose(VERB_ALGO, "doq connection id already exists for " 5004 "another doq_conn. Ignoring second connection id."); 5005 /* Already exists to another conn, ignore it. 5006 * This works, in that the conid is listed in the doq_conn 5007 * conid_list element, and removed from there. So our conid 5008 * tree and list are fine, when created and removed. 5009 * The tree now does not have the lookup element pointing 5010 * to this connection. */ 5011 return 1; 5012 } 5013 if(conid) 5014 return 1; /* already inserted */ 5015 conid = doq_conid_create(data, datalen, &conn->key); 5016 if(!conid) 5017 return 0; 5018 doq_conid_list_insert(conn, conid); 5019 (void)rbtree_insert(conn->table->conid_tree, &conid->node); 5020 return 1; 5021 } 5022 5023 void 5024 doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data, 5025 size_t datalen) 5026 { 5027 struct doq_conid* conid; 5028 conid = doq_conid_find(conn->table, data, datalen); 5029 if(conid && !conid_is_for_conn(conn, conid)) 5030 return; 5031 if(conid) { 5032 (void)rbtree_delete(conn->table->conid_tree, 5033 conid->node.key); 5034 doq_conid_list_remove(conn, conid); 5035 doq_conid_delete(conid); 5036 } 5037 } 5038 5039 /** associate the scid array and also the dcid. 5040 * caller must hold the locks on conn and doq_table.conid_lock. */ 5041 static int 5042 doq_conn_setup_id_array_and_dcid(struct doq_conn* conn, 5043 struct ngtcp2_cid* scids, size_t num_scid) 5044 { 5045 size_t i; 5046 for(i=0; i<num_scid; i++) { 5047 if(!doq_conn_associate_conid(conn, scids[i].data, 5048 scids[i].datalen)) 5049 return 0; 5050 } 5051 if(!doq_conn_associate_conid(conn, conn->key.dcid, conn->key.dcidlen)) 5052 return 0; 5053 return 1; 5054 } 5055 5056 int 5057 doq_conn_setup_conids(struct doq_conn* conn) 5058 { 5059 size_t num_scid = 5060 #ifndef HAVE_NGTCP2_CONN_GET_NUM_SCID 5061 ngtcp2_conn_get_scid(conn->conn, NULL); 5062 #else 5063 ngtcp2_conn_get_num_scid(conn->conn); 5064 #endif 5065 if(num_scid <= 4) { 5066 struct ngtcp2_cid ids[4]; 5067 /* Usually there are not that many scids when just accepted, 5068 * like only 2. */ 5069 ngtcp2_conn_get_scid(conn->conn, ids); 5070 return doq_conn_setup_id_array_and_dcid(conn, ids, num_scid); 5071 } else { 5072 struct ngtcp2_cid *scids = calloc(num_scid, 5073 sizeof(struct ngtcp2_cid)); 5074 if(!scids) 5075 return 0; 5076 ngtcp2_conn_get_scid(conn->conn, scids); 5077 if(!doq_conn_setup_id_array_and_dcid(conn, scids, num_scid)) { 5078 free(scids); 5079 return 0; 5080 } 5081 free(scids); 5082 } 5083 return 1; 5084 } 5085 5086 void 5087 doq_conn_clear_conids(struct doq_conn* conn) 5088 { 5089 struct doq_conid* p, *next; 5090 if(!conn) 5091 return; 5092 p = conn->conid_list; 5093 while(p) { 5094 next = p->next; 5095 (void)rbtree_delete(conn->table->conid_tree, p->node.key); 5096 doq_conid_delete(p); 5097 p = next; 5098 } 5099 conn->conid_list = NULL; 5100 } 5101 5102 ngtcp2_tstamp doq_get_timestamp_nanosec(void) 5103 { 5104 #ifdef CLOCK_REALTIME 5105 struct timespec tp; 5106 memset(&tp, 0, sizeof(tp)); 5107 /* Get a nanosecond time, that can be compared with the event base. */ 5108 if(clock_gettime(CLOCK_REALTIME, &tp) == -1) { 5109 log_err("clock_gettime failed: %s", strerror(errno)); 5110 } 5111 return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) + 5112 ((uint64_t)tp.tv_nsec); 5113 #else 5114 struct timeval tv; 5115 if(gettimeofday(&tv, NULL) < 0) { 5116 log_err("gettimeofday failed: %s", strerror(errno)); 5117 } 5118 return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) + 5119 ((uint64_t)tv.tv_usec)*((uint64_t)1000); 5120 #endif /* CLOCK_REALTIME */ 5121 } 5122 5123 /** doq start the closing period for the connection. */ 5124 static int 5125 doq_conn_start_closing_period(struct comm_point* c, struct doq_conn* conn) 5126 { 5127 struct ngtcp2_path_storage ps; 5128 struct ngtcp2_pkt_info pi; 5129 ngtcp2_ssize ret; 5130 if(!conn) 5131 return 1; 5132 if( 5133 #ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD 5134 ngtcp2_conn_in_closing_period(conn->conn) 5135 #else 5136 ngtcp2_conn_is_in_closing_period(conn->conn) 5137 #endif 5138 ) 5139 return 1; 5140 if( 5141 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 5142 ngtcp2_conn_in_draining_period(conn->conn) 5143 #else 5144 ngtcp2_conn_is_in_draining_period(conn->conn) 5145 #endif 5146 ) { 5147 doq_conn_write_disable(conn); 5148 return 1; 5149 } 5150 ngtcp2_path_storage_zero(&ps); 5151 sldns_buffer_clear(c->doq_socket->pkt_buf); 5152 /* the call to ngtcp2_conn_write_connection_close causes the 5153 * conn to be closed. It is now in the closing period. */ 5154 ret = ngtcp2_conn_write_connection_close(conn->conn, &ps.path, 5155 &pi, sldns_buffer_begin(c->doq_socket->pkt_buf), 5156 sldns_buffer_remaining(c->doq_socket->pkt_buf), 5157 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5158 &conn->ccerr 5159 #else 5160 &conn->last_error 5161 #endif 5162 , doq_get_timestamp_nanosec()); 5163 if(ret < 0) { 5164 log_err("doq ngtcp2_conn_write_connection_close failed: %s", 5165 ngtcp2_strerror(ret)); 5166 return 0; 5167 } 5168 if(ret == 0) { 5169 return 0; 5170 } 5171 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); 5172 sldns_buffer_flip(c->doq_socket->pkt_buf); 5173 5174 /* The close packet is allocated, because it may have to be repeated. 5175 * When incoming packets have this connection dcid. */ 5176 conn->close_pkt = memdup(sldns_buffer_begin(c->doq_socket->pkt_buf), 5177 sldns_buffer_limit(c->doq_socket->pkt_buf)); 5178 if(!conn->close_pkt) { 5179 log_err("doq: could not allocate close packet: out of memory"); 5180 return 0; 5181 } 5182 conn->close_pkt_len = sldns_buffer_limit(c->doq_socket->pkt_buf); 5183 conn->close_ecn = pi.ecn; 5184 return 1; 5185 } 5186 5187 /** doq send the close packet for the connection, perhaps again. */ 5188 int 5189 doq_conn_send_close(struct comm_point* c, struct doq_conn* conn) 5190 { 5191 if(!conn) 5192 return 0; 5193 if(!conn->close_pkt) 5194 return 0; 5195 if(conn->close_pkt_len > sldns_buffer_capacity(c->doq_socket->pkt_buf)) 5196 return 0; 5197 sldns_buffer_clear(c->doq_socket->pkt_buf); 5198 sldns_buffer_write(c->doq_socket->pkt_buf, conn->close_pkt, conn->close_pkt_len); 5199 sldns_buffer_flip(c->doq_socket->pkt_buf); 5200 verbose(VERB_ALGO, "doq send connection close"); 5201 doq_send_pkt(c, &conn->key.paddr, conn->close_ecn); 5202 doq_conn_write_disable(conn); 5203 return 1; 5204 } 5205 5206 /** doq close the connection on error. If it returns a failure, it 5207 * does not wait to send a close, and the connection can be dropped. */ 5208 static int 5209 doq_conn_close_error(struct comm_point* c, struct doq_conn* conn) 5210 { 5211 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5212 if(conn->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE) 5213 return 0; 5214 #else 5215 if(conn->last_error.type == 5216 NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE) 5217 return 0; 5218 #endif 5219 if(!doq_conn_start_closing_period(c, conn)) 5220 return 0; 5221 if( 5222 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 5223 ngtcp2_conn_in_draining_period(conn->conn) 5224 #else 5225 ngtcp2_conn_is_in_draining_period(conn->conn) 5226 #endif 5227 ) { 5228 doq_conn_write_disable(conn); 5229 return 1; 5230 } 5231 doq_conn_write_enable(conn); 5232 if(!doq_conn_send_close(c, conn)) 5233 return 0; 5234 return 1; 5235 } 5236 5237 int 5238 doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr, 5239 struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry, 5240 int* err_drop) 5241 { 5242 int ret; 5243 ngtcp2_tstamp ts; 5244 struct ngtcp2_path path; 5245 memset(&path, 0, sizeof(path)); 5246 path.remote.addr = (struct sockaddr*)&paddr->addr; 5247 path.remote.addrlen = paddr->addrlen; 5248 path.local.addr = (struct sockaddr*)&paddr->localaddr; 5249 path.local.addrlen = paddr->localaddrlen; 5250 ts = doq_get_timestamp_nanosec(); 5251 5252 ret = ngtcp2_conn_read_pkt(conn->conn, &path, pi, 5253 sldns_buffer_begin(c->doq_socket->pkt_buf), 5254 sldns_buffer_limit(c->doq_socket->pkt_buf), ts); 5255 if(ret != 0) { 5256 if(err_retry) 5257 *err_retry = 0; 5258 if(err_drop) 5259 *err_drop = 0; 5260 if(ret == NGTCP2_ERR_DRAINING) { 5261 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", 5262 ngtcp2_strerror(ret)); 5263 doq_conn_write_disable(conn); 5264 return 0; 5265 } else if(ret == NGTCP2_ERR_DROP_CONN) { 5266 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", 5267 ngtcp2_strerror(ret)); 5268 if(err_drop) 5269 *err_drop = 1; 5270 return 0; 5271 } else if(ret == NGTCP2_ERR_RETRY) { 5272 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", 5273 ngtcp2_strerror(ret)); 5274 if(err_retry) 5275 *err_retry = 1; 5276 if(err_drop) 5277 *err_drop = 1; 5278 return 0; 5279 } else if(ret == NGTCP2_ERR_CRYPTO) { 5280 if( 5281 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5282 !conn->ccerr.error_code 5283 #else 5284 !conn->last_error.error_code 5285 #endif 5286 ) { 5287 /* in picotls the tls alert may need to be 5288 * copied, but this is with openssl. And there 5289 * is conn->tls_alert. */ 5290 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5291 ngtcp2_ccerr_set_tls_alert(&conn->ccerr, 5292 conn->tls_alert, NULL, 0); 5293 #else 5294 ngtcp2_connection_close_error_set_transport_error_tls_alert( 5295 &conn->last_error, conn->tls_alert, 5296 NULL, 0); 5297 #endif 5298 } 5299 } else { 5300 if( 5301 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5302 !conn->ccerr.error_code 5303 #else 5304 !conn->last_error.error_code 5305 #endif 5306 ) { 5307 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5308 ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, 5309 NULL, 0); 5310 #else 5311 ngtcp2_connection_close_error_set_transport_error_liberr( 5312 &conn->last_error, ret, NULL, 0); 5313 #endif 5314 } 5315 } 5316 log_err("ngtcp2_conn_read_pkt failed: %s", 5317 ngtcp2_strerror(ret)); 5318 if(!doq_conn_close_error(c, conn)) { 5319 if(err_drop) 5320 *err_drop = 1; 5321 } 5322 return 0; 5323 } 5324 doq_conn_write_enable(conn); 5325 return 1; 5326 } 5327 5328 /** doq stream write is done */ 5329 static void 5330 doq_stream_write_is_done(struct doq_conn* conn, struct doq_stream* stream) 5331 { 5332 /* Cannot deallocate, the buffer may be needed for resends. */ 5333 doq_stream_off_write_list(conn, stream); 5334 } 5335 5336 int 5337 doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn, 5338 int* err_drop) 5339 { 5340 struct doq_stream* stream = conn->stream_write_first; 5341 ngtcp2_path_storage ps; 5342 ngtcp2_tstamp ts = doq_get_timestamp_nanosec(); 5343 size_t num_packets = 0, max_packets = 65535; 5344 ngtcp2_path_storage_zero(&ps); 5345 5346 for(;;) { 5347 int64_t stream_id; 5348 uint32_t flags = 0; 5349 ngtcp2_pkt_info pi; 5350 ngtcp2_vec datav[2]; 5351 size_t datav_count = 0; 5352 ngtcp2_ssize ret, ndatalen = 0; 5353 int fin; 5354 5355 if(stream) { 5356 /* data to send */ 5357 verbose(VERB_ALGO, "doq: doq_conn write stream %d", 5358 (int)stream->stream_id); 5359 stream_id = stream->stream_id; 5360 fin = 1; 5361 if(stream->nwrite < 2) { 5362 datav[0].base = ((uint8_t*)&stream-> 5363 outlen_wire) + stream->nwrite; 5364 datav[0].len = 2 - stream->nwrite; 5365 datav[1].base = stream->out; 5366 datav[1].len = stream->outlen; 5367 datav_count = 2; 5368 } else { 5369 datav[0].base = stream->out + 5370 (stream->nwrite-2); 5371 datav[0].len = stream->outlen - 5372 (stream->nwrite-2); 5373 datav_count = 1; 5374 } 5375 } else { 5376 /* no data to send */ 5377 verbose(VERB_ALGO, "doq: doq_conn write stream -1"); 5378 stream_id = -1; 5379 fin = 0; 5380 datav[0].base = NULL; 5381 datav[0].len = 0; 5382 datav_count = 1; 5383 } 5384 5385 /* if more streams, set it to write more */ 5386 if(stream && stream->write_next) 5387 flags |= NGTCP2_WRITE_STREAM_FLAG_MORE; 5388 if(fin) 5389 flags |= NGTCP2_WRITE_STREAM_FLAG_FIN; 5390 5391 sldns_buffer_clear(c->doq_socket->pkt_buf); 5392 ret = ngtcp2_conn_writev_stream(conn->conn, &ps.path, &pi, 5393 sldns_buffer_begin(c->doq_socket->pkt_buf), 5394 sldns_buffer_remaining(c->doq_socket->pkt_buf), 5395 &ndatalen, flags, stream_id, datav, datav_count, ts); 5396 if(ret < 0) { 5397 if(ret == NGTCP2_ERR_WRITE_MORE) { 5398 verbose(VERB_ALGO, "doq: write more, ndatalen %d", (int)ndatalen); 5399 if(stream) { 5400 if(ndatalen >= 0) 5401 stream->nwrite += ndatalen; 5402 if(stream->nwrite >= stream->outlen+2) 5403 doq_stream_write_is_done( 5404 conn, stream); 5405 stream = stream->write_next; 5406 } 5407 continue; 5408 } else if(ret == NGTCP2_ERR_STREAM_DATA_BLOCKED) { 5409 verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_DATA_BLOCKED"); 5410 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5411 ngtcp2_ccerr_set_application_error( 5412 &conn->ccerr, -1, NULL, 0); 5413 #else 5414 ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0); 5415 #endif 5416 if(err_drop) 5417 *err_drop = 0; 5418 if(!doq_conn_close_error(c, conn)) { 5419 if(err_drop) 5420 *err_drop = 1; 5421 } 5422 return 0; 5423 } else if(ret == NGTCP2_ERR_STREAM_SHUT_WR) { 5424 verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_SHUT_WR"); 5425 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5426 ngtcp2_ccerr_set_application_error( 5427 &conn->ccerr, -1, NULL, 0); 5428 #else 5429 ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0); 5430 #endif 5431 if(err_drop) 5432 *err_drop = 0; 5433 if(!doq_conn_close_error(c, conn)) { 5434 if(err_drop) 5435 *err_drop = 1; 5436 } 5437 return 0; 5438 } 5439 5440 log_err("doq: ngtcp2_conn_writev_stream failed: %s", 5441 ngtcp2_strerror(ret)); 5442 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5443 ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, NULL, 0); 5444 #else 5445 ngtcp2_connection_close_error_set_transport_error_liberr( 5446 &conn->last_error, ret, NULL, 0); 5447 #endif 5448 if(err_drop) 5449 *err_drop = 0; 5450 if(!doq_conn_close_error(c, conn)) { 5451 if(err_drop) 5452 *err_drop = 1; 5453 } 5454 return 0; 5455 } 5456 verbose(VERB_ALGO, "doq: writev_stream pkt size %d ndatawritten %d", 5457 (int)ret, (int)ndatalen); 5458 5459 if(ndatalen >= 0 && stream) { 5460 stream->nwrite += ndatalen; 5461 if(stream->nwrite >= stream->outlen+2) 5462 doq_stream_write_is_done(conn, stream); 5463 } 5464 if(ret == 0) { 5465 /* congestion limited */ 5466 doq_conn_write_disable(conn); 5467 ngtcp2_conn_update_pkt_tx_time(conn->conn, ts); 5468 return 1; 5469 } 5470 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); 5471 sldns_buffer_flip(c->doq_socket->pkt_buf); 5472 doq_send_pkt(c, &conn->key.paddr, pi.ecn); 5473 5474 if(c->doq_socket->have_blocked_pkt) 5475 break; 5476 if(++num_packets == max_packets) 5477 break; 5478 if(stream) 5479 stream = stream->write_next; 5480 } 5481 ngtcp2_conn_update_pkt_tx_time(conn->conn, ts); 5482 return 1; 5483 } 5484 5485 void 5486 doq_conn_write_enable(struct doq_conn* conn) 5487 { 5488 conn->write_interest = 1; 5489 } 5490 5491 void 5492 doq_conn_write_disable(struct doq_conn* conn) 5493 { 5494 conn->write_interest = 0; 5495 } 5496 5497 /** doq append the connection to the write list */ 5498 static void 5499 doq_conn_write_list_append(struct doq_table* table, struct doq_conn* conn) 5500 { 5501 if(conn->on_write_list) 5502 return; 5503 conn->write_prev = table->write_list_last; 5504 if(table->write_list_last) 5505 table->write_list_last->write_next = conn; 5506 else table->write_list_first = conn; 5507 conn->write_next = NULL; 5508 table->write_list_last = conn; 5509 conn->on_write_list = 1; 5510 } 5511 5512 void 5513 doq_conn_write_list_remove(struct doq_table* table, struct doq_conn* conn) 5514 { 5515 if(!conn->on_write_list) 5516 return; 5517 if(conn->write_next) 5518 conn->write_next->write_prev = conn->write_prev; 5519 else table->write_list_last = conn->write_prev; 5520 if(conn->write_prev) 5521 conn->write_prev->write_next = conn->write_next; 5522 else table->write_list_first = conn->write_next; 5523 conn->write_prev = NULL; 5524 conn->write_next = NULL; 5525 conn->on_write_list = 0; 5526 } 5527 5528 void 5529 doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn) 5530 { 5531 if(conn->write_interest && conn->on_write_list) 5532 return; 5533 if(!conn->write_interest && !conn->on_write_list) 5534 return; 5535 if(conn->write_interest) 5536 doq_conn_write_list_append(table, conn); 5537 else doq_conn_write_list_remove(table, conn); 5538 } 5539 5540 struct doq_conn* 5541 doq_table_pop_first(struct doq_table* table) 5542 { 5543 struct doq_conn* conn = table->write_list_first; 5544 if(!conn) 5545 return NULL; 5546 lock_basic_lock(&conn->lock); 5547 table->write_list_first = conn->write_next; 5548 if(conn->write_next) 5549 conn->write_next->write_prev = NULL; 5550 else table->write_list_last = NULL; 5551 conn->write_next = NULL; 5552 conn->write_prev = NULL; 5553 conn->on_write_list = 0; 5554 return conn; 5555 } 5556 5557 int 5558 doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv) 5559 { 5560 ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(conn->conn); 5561 ngtcp2_tstamp now = doq_get_timestamp_nanosec(); 5562 ngtcp2_tstamp t; 5563 5564 if(expiry <= now) { 5565 /* The timer has already expired, add with zero timeout. 5566 * This should call the callback straight away. Calling it 5567 * from the event callbacks is cleaner than calling it here, 5568 * because then it is always called with the same locks and 5569 * so on. This routine only has the conn.lock. */ 5570 t = now; 5571 } else { 5572 t = expiry; 5573 } 5574 5575 /* convert to timeval */ 5576 memset(tv, 0, sizeof(*tv)); 5577 tv->tv_sec = t / NGTCP2_SECONDS; 5578 tv->tv_usec = (t / NGTCP2_MICROSECONDS)%1000000; 5579 5580 /* If we already have a timer, is it the right value? */ 5581 if(conn->timer.timer_in_tree || conn->timer.timer_in_list) { 5582 if(conn->timer.time.tv_sec == tv->tv_sec && 5583 conn->timer.time.tv_usec == tv->tv_usec) 5584 return 0; 5585 } 5586 return 1; 5587 } 5588 5589 /* doq print connection log */ 5590 static void 5591 doq_conn_log_line(struct doq_conn* conn, char* s) 5592 { 5593 char remotestr[256], localstr[256]; 5594 addr_to_str((void*)&conn->key.paddr.addr, conn->key.paddr.addrlen, 5595 remotestr, sizeof(remotestr)); 5596 addr_to_str((void*)&conn->key.paddr.localaddr, 5597 conn->key.paddr.localaddrlen, localstr, sizeof(localstr)); 5598 log_info("doq conn %s %s %s", remotestr, localstr, s); 5599 } 5600 5601 int 5602 doq_conn_handle_timeout(struct doq_conn* conn) 5603 { 5604 ngtcp2_tstamp now = doq_get_timestamp_nanosec(); 5605 int rv; 5606 5607 if(verbosity >= VERB_ALGO) 5608 doq_conn_log_line(conn, "timeout"); 5609 5610 rv = ngtcp2_conn_handle_expiry(conn->conn, now); 5611 if(rv != 0) { 5612 verbose(VERB_ALGO, "ngtcp2_conn_handle_expiry failed: %s", 5613 ngtcp2_strerror(rv)); 5614 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5615 ngtcp2_ccerr_set_liberr(&conn->ccerr, rv, NULL, 0); 5616 #else 5617 ngtcp2_connection_close_error_set_transport_error_liberr( 5618 &conn->last_error, rv, NULL, 0); 5619 #endif 5620 if(!doq_conn_close_error(conn->doq_socket->cp, conn)) { 5621 /* failed, return for deletion */ 5622 return 0; 5623 } 5624 return 1; 5625 } 5626 doq_conn_write_enable(conn); 5627 if(!doq_conn_write_streams(conn->doq_socket->cp, conn, NULL)) { 5628 /* failed, return for deletion. */ 5629 return 0; 5630 } 5631 return 1; 5632 } 5633 5634 void 5635 doq_table_quic_size_add(struct doq_table* table, size_t add) 5636 { 5637 lock_basic_lock(&table->size_lock); 5638 table->current_size += add; 5639 lock_basic_unlock(&table->size_lock); 5640 } 5641 5642 void 5643 doq_table_quic_size_subtract(struct doq_table* table, size_t subtract) 5644 { 5645 lock_basic_lock(&table->size_lock); 5646 if(table->current_size < subtract) 5647 table->current_size = 0; 5648 else table->current_size -= subtract; 5649 lock_basic_unlock(&table->size_lock); 5650 } 5651 5652 int 5653 doq_table_quic_size_available(struct doq_table* table, 5654 struct config_file* cfg, size_t mem) 5655 { 5656 size_t cur; 5657 if (!table) 5658 return 0; 5659 lock_basic_lock(&table->size_lock); 5660 cur = table->current_size; 5661 lock_basic_unlock(&table->size_lock); 5662 5663 if(cur + mem > cfg->quic_size) 5664 return 0; 5665 return 1; 5666 } 5667 5668 size_t doq_table_quic_size_get(struct doq_table* table) 5669 { 5670 size_t sz; 5671 if(!table) 5672 return 0; 5673 lock_basic_lock(&table->size_lock); 5674 sz = table->current_size; 5675 lock_basic_unlock(&table->size_lock); 5676 return sz; 5677 } 5678 #endif /* HAVE_NGTCP2 */ 5679