Home | History | Annotate | Line # | Download | only in netmgr
      1 /*	$NetBSD: tcp.c,v 1.13 2025/05/21 14:48:05 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  *
      6  * SPDX-License-Identifier: MPL-2.0
      7  *
      8  * This Source Code Form is subject to the terms of the Mozilla Public
      9  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  *
     12  * See the COPYRIGHT file distributed with this work for additional
     13  * information regarding copyright ownership.
     14  */
     15 
     16 #include <libgen.h>
     17 #include <unistd.h>
     18 
     19 #include <isc/async.h>
     20 #include <isc/atomic.h>
     21 #include <isc/barrier.h>
     22 #include <isc/buffer.h>
     23 #include <isc/condition.h>
     24 #include <isc/errno.h>
     25 #include <isc/log.h>
     26 #include <isc/magic.h>
     27 #include <isc/mem.h>
     28 #include <isc/netmgr.h>
     29 #include <isc/quota.h>
     30 #include <isc/random.h>
     31 #include <isc/refcount.h>
     32 #include <isc/region.h>
     33 #include <isc/result.h>
     34 #include <isc/sockaddr.h>
     35 #include <isc/stdtime.h>
     36 #include <isc/thread.h>
     37 #include <isc/util.h>
     38 #include <isc/uv.h>
     39 
     40 #include "../loop_p.h"
     41 #include "netmgr-int.h"
     42 
     43 static atomic_uint_fast32_t last_tcpquota_log = 0;
     44 
     45 static bool
     46 can_log_tcp_quota(void) {
     47 	isc_stdtime_t last;
     48 	isc_stdtime_t now = isc_stdtime_now();
     49 	last = atomic_exchange_relaxed(&last_tcpquota_log, now);
     50 	if (now != last) {
     51 		return true;
     52 	}
     53 
     54 	return false;
     55 }
     56 
     57 static isc_result_t
     58 tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
     59 
     60 static isc_result_t
     61 tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
     62 static void
     63 tcp_connect_cb(uv_connect_t *uvreq, int status);
     64 static void
     65 tcp_stop_cb(uv_handle_t *handle);
     66 
     67 static void
     68 tcp_connection_cb(uv_stream_t *server, int status);
     69 
     70 static void
     71 tcp_close_cb(uv_handle_t *uvhandle);
     72 
     73 static isc_result_t
     74 accept_connection(isc_nmsocket_t *ssock);
     75 
     76 static void
     77 quota_accept_cb(void *arg);
     78 
     79 static isc_result_t
     80 tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
     81 	isc__networker_t *worker = NULL;
     82 	isc_result_t result = ISC_R_UNSET;
     83 	int r;
     84 
     85 	REQUIRE(VALID_NMSOCK(sock));
     86 	REQUIRE(VALID_UVREQ(req));
     87 
     88 	REQUIRE(sock->tid == isc_tid());
     89 
     90 	worker = sock->worker;
     91 
     92 	sock->connecting = true;
     93 
     94 	/* 2 minute timeout */
     95 	result = isc__nm_socket_connectiontimeout(sock->fd, 120 * 1000);
     96 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
     97 
     98 	r = uv_tcp_init(&worker->loop->loop, &sock->uv_handle.tcp);
     99 	UV_RUNTIME_CHECK(uv_tcp_init, r);
    100 	uv_handle_set_data(&sock->uv_handle.handle, sock);
    101 
    102 	r = uv_timer_init(&worker->loop->loop, &sock->read_timer);
    103 	UV_RUNTIME_CHECK(uv_timer_init, r);
    104 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
    105 
    106 	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
    107 	if (r != 0) {
    108 		isc__nm_closesocket(sock->fd);
    109 		isc__nm_incstats(sock, STATID_OPENFAIL);
    110 		return isc_uverr2result(r);
    111 	}
    112 	isc__nm_incstats(sock, STATID_OPEN);
    113 
    114 	if (req->local.length != 0) {
    115 		r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0);
    116 		if (r != 0) {
    117 			isc__nm_incstats(sock, STATID_BINDFAIL);
    118 			return isc_uverr2result(r);
    119 		}
    120 	}
    121 
    122 	isc__nm_set_network_buffers(sock->worker->netmgr,
    123 				    &sock->uv_handle.handle);
    124 
    125 	uv_handle_set_data(&req->uv_req.handle, req);
    126 	r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp,
    127 			   &req->peer.type.sa, tcp_connect_cb);
    128 	if (r != 0) {
    129 		isc__nm_incstats(sock, STATID_CONNECTFAIL);
    130 		return isc_uverr2result(r);
    131 	}
    132 
    133 	uv_handle_set_data((uv_handle_t *)&sock->read_timer,
    134 			   &req->uv_req.connect);
    135 	isc__nmsocket_timer_start(sock);
    136 
    137 	return ISC_R_SUCCESS;
    138 }
    139 
    140 static void
    141 tcp_connect_cb(uv_connect_t *uvreq, int status) {
    142 	isc_result_t result = ISC_R_UNSET;
    143 	isc__nm_uvreq_t *req = NULL;
    144 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
    145 	struct sockaddr_storage ss;
    146 	isc__networker_t *worker = NULL;
    147 	int r;
    148 
    149 	REQUIRE(VALID_NMSOCK(sock));
    150 	REQUIRE(sock->tid == isc_tid());
    151 
    152 	worker = sock->worker;
    153 
    154 	req = uv_handle_get_data((uv_handle_t *)uvreq);
    155 
    156 	REQUIRE(VALID_UVREQ(req));
    157 	REQUIRE(VALID_NMHANDLE(req->handle));
    158 
    159 	INSIST(sock->connecting);
    160 
    161 	if (sock->timedout || status == UV_ETIMEDOUT) {
    162 		/* Connection timed-out */
    163 		result = ISC_R_TIMEDOUT;
    164 		goto error;
    165 	} else if (isc__nm_closing(worker)) {
    166 		/* Network manager shutting down */
    167 		result = ISC_R_SHUTTINGDOWN;
    168 		goto error;
    169 	} else if (isc__nmsocket_closing(sock)) {
    170 		/* Connection canceled */
    171 		result = ISC_R_CANCELED;
    172 		goto error;
    173 	} else if (status == UV_EADDRINUSE) {
    174 		/*
    175 		 * On FreeBSD the TCP connect() call sometimes results in a
    176 		 * spurious transient EADDRINUSE. Try a few more times before
    177 		 * giving up.
    178 		 */
    179 		if (--req->connect_tries > 0) {
    180 			r = uv_tcp_connect(&req->uv_req.connect,
    181 					   &sock->uv_handle.tcp,
    182 					   &req->peer.type.sa, tcp_connect_cb);
    183 			if (r != 0) {
    184 				result = isc_uverr2result(r);
    185 				goto error;
    186 			}
    187 			return;
    188 		}
    189 		result = isc_uverr2result(status);
    190 		goto error;
    191 	} else if (status != 0) {
    192 		result = isc_uverr2result(status);
    193 		goto error;
    194 	}
    195 
    196 	isc__nmsocket_timer_stop(sock);
    197 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
    198 
    199 	isc__nm_incstats(sock, STATID_CONNECT);
    200 	r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss,
    201 			       &(int){ sizeof(ss) });
    202 	if (r != 0) {
    203 		result = isc_uverr2result(r);
    204 		goto error;
    205 	}
    206 
    207 	sock->connecting = false;
    208 	sock->connected = true;
    209 
    210 	result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss);
    211 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
    212 
    213 	isc__nm_connectcb(sock, req, ISC_R_SUCCESS, false);
    214 
    215 	return;
    216 error:
    217 	isc__nm_failed_connect_cb(sock, req, result, false);
    218 }
    219 
    220 void
    221 isc_nm_tcpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
    222 		  isc_nm_cb_t connect_cb, void *connect_cbarg,
    223 		  unsigned int timeout) {
    224 	isc_result_t result = ISC_R_SUCCESS;
    225 	isc_nmsocket_t *sock = NULL;
    226 	isc__nm_uvreq_t *req = NULL;
    227 	sa_family_t sa_family;
    228 	isc__networker_t *worker = NULL;
    229 	uv_os_sock_t fd = -1;
    230 
    231 	REQUIRE(VALID_NM(mgr));
    232 	REQUIRE(local != NULL);
    233 	REQUIRE(peer != NULL);
    234 
    235 	worker = &mgr->workers[isc_tid()];
    236 
    237 	if (isc__nm_closing(worker)) {
    238 		connect_cb(NULL, ISC_R_SHUTTINGDOWN, connect_cbarg);
    239 		return;
    240 	}
    241 
    242 	sa_family = peer->type.sa.sa_family;
    243 
    244 	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &fd);
    245 	if (result != ISC_R_SUCCESS) {
    246 		connect_cb(NULL, result, connect_cbarg);
    247 		return;
    248 	}
    249 
    250 	sock = isc_mempool_get(worker->nmsocket_pool);
    251 	isc__nmsocket_init(sock, worker, isc_nm_tcpsocket, local, NULL);
    252 
    253 	sock->connect_timeout = timeout;
    254 	sock->fd = fd;
    255 	sock->client = true;
    256 
    257 	req = isc__nm_uvreq_get(sock);
    258 	req->cb.connect = connect_cb;
    259 	req->cbarg = connect_cbarg;
    260 	req->peer = *peer;
    261 	req->local = *local;
    262 	req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
    263 
    264 	(void)isc__nm_socket_min_mtu(sock->fd, sa_family);
    265 	(void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
    266 
    267 	sock->active = true;
    268 
    269 	result = tcp_connect_direct(sock, req);
    270 	if (result != ISC_R_SUCCESS) {
    271 		sock->active = false;
    272 		isc__nm_tcp_close(sock);
    273 		isc__nm_connectcb(sock, req, result, true);
    274 	}
    275 
    276 	/*
    277 	 * The sock is now attached to the handle.
    278 	 */
    279 	isc__nmsocket_detach(&sock);
    280 }
    281 
    282 static uv_os_sock_t
    283 isc__nm_tcp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) {
    284 	isc_result_t result;
    285 	uv_os_sock_t sock;
    286 
    287 	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock);
    288 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
    289 
    290 	(void)isc__nm_socket_v6only(sock, sa_family);
    291 
    292 	/* FIXME: set mss */
    293 
    294 	result = isc__nm_socket_reuse(sock, 1);
    295 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
    296 
    297 	if (mgr->load_balance_sockets) {
    298 		result = isc__nm_socket_reuse_lb(sock);
    299 		RUNTIME_CHECK(result == ISC_R_SUCCESS);
    300 	}
    301 
    302 	return sock;
    303 }
    304 
    305 static void
    306 start_tcp_child_job(void *arg) {
    307 	isc_nmsocket_t *sock = arg;
    308 
    309 	REQUIRE(VALID_NMSOCK(sock));
    310 	REQUIRE(VALID_NMSOCK(sock->parent));
    311 	REQUIRE(sock->type == isc_nm_tcpsocket);
    312 	REQUIRE(sock->tid == isc_tid());
    313 
    314 	sa_family_t sa_family = sock->iface.type.sa.sa_family;
    315 	int r, flags = 0;
    316 	isc_result_t result = ISC_R_UNSET;
    317 	isc_loop_t *loop = sock->worker->loop;
    318 	struct sockaddr_storage ss;
    319 
    320 	(void)isc__nm_socket_min_mtu(sock->fd, sa_family);
    321 	(void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
    322 
    323 	r = uv_tcp_init(&loop->loop, &sock->uv_handle.tcp);
    324 	UV_RUNTIME_CHECK(uv_tcp_init, r);
    325 	uv_handle_set_data(&sock->uv_handle.handle, sock);
    326 	/* This keeps the socket alive after everything else is gone */
    327 	isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
    328 
    329 	r = uv_timer_init(&loop->loop, &sock->read_timer);
    330 	UV_RUNTIME_CHECK(uv_timer_init, r);
    331 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
    332 
    333 	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
    334 	if (r < 0) {
    335 		isc__nm_closesocket(sock->fd);
    336 		isc__nm_incstats(sock, STATID_OPENFAIL);
    337 		goto done;
    338 	}
    339 	isc__nm_incstats(sock, STATID_OPEN);
    340 
    341 	if (sa_family == AF_INET6) {
    342 		flags = UV_TCP_IPV6ONLY;
    343 	}
    344 
    345 	if (sock->worker->netmgr->load_balance_sockets) {
    346 		r = isc__nm_tcp_freebind(&sock->uv_handle.tcp,
    347 					 &sock->iface.type.sa, flags);
    348 		if (r < 0) {
    349 			isc__nm_incstats(sock, STATID_BINDFAIL);
    350 			goto done;
    351 		}
    352 	} else if (sock->tid == 0) {
    353 		r = isc__nm_tcp_freebind(&sock->uv_handle.tcp,
    354 					 &sock->iface.type.sa, flags);
    355 		if (r < 0) {
    356 			isc__nm_incstats(sock, STATID_BINDFAIL);
    357 			goto done;
    358 		}
    359 		sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags;
    360 	} else {
    361 		/* The socket is already bound, just copy the flags */
    362 		sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags;
    363 	}
    364 
    365 	isc__nm_set_network_buffers(sock->worker->netmgr,
    366 				    &sock->uv_handle.handle);
    367 
    368 	/*
    369 	 * The callback will run in the same thread uv_listen() was called
    370 	 * from, so a race with tcp_connection_cb() isn't possible.
    371 	 */
    372 	r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog,
    373 		      tcp_connection_cb);
    374 	if (r != 0) {
    375 		isc__nmsocket_log(sock, ISC_LOG_ERROR, "uv_listen failed: %s",
    376 				  isc_result_totext(isc_uverr2result(r)));
    377 		isc__nm_incstats(sock, STATID_BINDFAIL);
    378 		goto done;
    379 	}
    380 
    381 	if (sock->tid == 0) {
    382 		r = uv_tcp_getsockname(&sock->uv_handle.tcp,
    383 				       (struct sockaddr *)&ss,
    384 				       &(int){ sizeof(ss) });
    385 		if (r != 0) {
    386 			goto done;
    387 		}
    388 
    389 		result = isc_sockaddr_fromsockaddr(&sock->parent->iface,
    390 						   (struct sockaddr *)&ss);
    391 		if (result != ISC_R_SUCCESS) {
    392 			goto done_result;
    393 		}
    394 	}
    395 
    396 done:
    397 	result = isc_uverr2result(r);
    398 
    399 done_result:
    400 	if (result != ISC_R_SUCCESS) {
    401 		sock->pquota = NULL;
    402 	}
    403 
    404 	sock->result = result;
    405 
    406 	REQUIRE(!loop->paused);
    407 
    408 	if (sock->tid != 0) {
    409 		isc_barrier_wait(&sock->parent->listen_barrier);
    410 	}
    411 }
    412 
    413 static void
    414 start_tcp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock,
    415 		uv_os_sock_t fd, int tid) {
    416 	isc_nmsocket_t *csock = &sock->children[tid];
    417 	isc__networker_t *worker = &mgr->workers[tid];
    418 
    419 	isc__nmsocket_init(csock, worker, isc_nm_tcpsocket, iface, sock);
    420 	csock->accept_cb = sock->accept_cb;
    421 	csock->accept_cbarg = sock->accept_cbarg;
    422 	csock->backlog = sock->backlog;
    423 
    424 	/*
    425 	 * Quota isn't attached, just assigned.
    426 	 */
    427 	csock->pquota = sock->pquota;
    428 
    429 	if (mgr->load_balance_sockets) {
    430 		UNUSED(fd);
    431 		csock->fd = isc__nm_tcp_lb_socket(mgr,
    432 						  iface->type.sa.sa_family);
    433 	} else {
    434 		csock->fd = dup(fd);
    435 	}
    436 	REQUIRE(csock->fd >= 0);
    437 
    438 	if (tid == 0) {
    439 		start_tcp_child_job(csock);
    440 	} else {
    441 		isc_async_run(worker->loop, start_tcp_child_job, csock);
    442 	}
    443 }
    444 
    445 isc_result_t
    446 isc_nm_listentcp(isc_nm_t *mgr, uint32_t workers, isc_sockaddr_t *iface,
    447 		 isc_nm_accept_cb_t accept_cb, void *accept_cbarg, int backlog,
    448 		 isc_quota_t *quota, isc_nmsocket_t **sockp) {
    449 	isc_nmsocket_t *sock = NULL;
    450 	uv_os_sock_t fd = -1;
    451 	isc_result_t result = ISC_R_UNSET;
    452 	isc__networker_t *worker = NULL;
    453 
    454 	REQUIRE(VALID_NM(mgr));
    455 	REQUIRE(isc_tid() == 0);
    456 
    457 	if (workers == 0) {
    458 		workers = mgr->nloops;
    459 	}
    460 	REQUIRE(workers <= mgr->nloops);
    461 
    462 	worker = &mgr->workers[0];
    463 	sock = isc_mempool_get(worker->nmsocket_pool);
    464 	isc__nmsocket_init(sock, worker, isc_nm_tcplistener, iface, NULL);
    465 
    466 	sock->nchildren = (workers == ISC_NM_LISTEN_ALL) ? (uint32_t)mgr->nloops
    467 							 : workers;
    468 	sock->children = isc_mem_cget(worker->mctx, sock->nchildren,
    469 				      sizeof(sock->children[0]));
    470 
    471 	isc__nmsocket_barrier_init(sock);
    472 
    473 	sock->accept_cb = accept_cb;
    474 	sock->accept_cbarg = accept_cbarg;
    475 	sock->backlog = backlog;
    476 	sock->pquota = quota;
    477 
    478 	if (!mgr->load_balance_sockets) {
    479 		fd = isc__nm_tcp_lb_socket(mgr, iface->type.sa.sa_family);
    480 	}
    481 
    482 	start_tcp_child(mgr, iface, sock, fd, 0);
    483 	result = sock->children[0].result;
    484 	INSIST(result != ISC_R_UNSET);
    485 
    486 	for (size_t i = 1; i < sock->nchildren; i++) {
    487 		start_tcp_child(mgr, iface, sock, fd, i);
    488 	}
    489 
    490 	isc_barrier_wait(&sock->listen_barrier);
    491 
    492 	if (!mgr->load_balance_sockets) {
    493 		isc__nm_closesocket(fd);
    494 	}
    495 
    496 	/*
    497 	 * If any of the child sockets have failed then isc_nm_listentcp
    498 	 * fails.
    499 	 */
    500 	for (size_t i = 1; i < sock->nchildren; i++) {
    501 		if (result == ISC_R_SUCCESS &&
    502 		    sock->children[i].result != ISC_R_SUCCESS)
    503 		{
    504 			result = sock->children[i].result;
    505 		}
    506 	}
    507 
    508 	if (result != ISC_R_SUCCESS) {
    509 		sock->active = false;
    510 		isc__nm_tcp_stoplistening(sock);
    511 		isc_nmsocket_close(&sock);
    512 
    513 		return result;
    514 	}
    515 
    516 	sock->active = true;
    517 
    518 	*sockp = sock;
    519 	return ISC_R_SUCCESS;
    520 }
    521 
    522 static void
    523 tcp_connection_cb(uv_stream_t *server, int status) {
    524 	isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server);
    525 	isc_result_t result;
    526 
    527 	REQUIRE(ssock->accept_cb != NULL);
    528 
    529 	if (status != 0) {
    530 		result = isc_uverr2result(status);
    531 		goto done;
    532 	}
    533 
    534 	REQUIRE(VALID_NMSOCK(ssock));
    535 	REQUIRE(ssock->tid == isc_tid());
    536 
    537 	if (isc__nmsocket_closing(ssock)) {
    538 		result = ISC_R_CANCELED;
    539 		goto done;
    540 	}
    541 
    542 	/* Prepare the child socket */
    543 	isc_nmsocket_t *csock = isc_mempool_get(ssock->worker->nmsocket_pool);
    544 	isc__nmsocket_init(csock, ssock->worker, isc_nm_tcpsocket,
    545 			   &ssock->iface, NULL);
    546 	isc__nmsocket_attach(ssock, &csock->server);
    547 
    548 	if (csock->server->pquota != NULL) {
    549 		result = isc_quota_acquire_cb(csock->server->pquota,
    550 					      &csock->quotacb, quota_accept_cb,
    551 					      csock);
    552 		if (result == ISC_R_QUOTA) {
    553 			csock->quota_accept_ts = isc_time_monotonic();
    554 			isc__nm_incstats(ssock, STATID_ACCEPTFAIL);
    555 			goto done;
    556 		}
    557 	}
    558 
    559 	result = accept_connection(csock);
    560 done:
    561 	isc__nm_accept_connection_log(ssock, result, can_log_tcp_quota());
    562 }
    563 
    564 static void
    565 stop_tcp_child_job(void *arg) {
    566 	isc_nmsocket_t *sock = arg;
    567 
    568 	REQUIRE(VALID_NMSOCK(sock));
    569 	REQUIRE(sock->tid == isc_tid());
    570 	REQUIRE(sock->parent != NULL);
    571 	REQUIRE(sock->type == isc_nm_tcpsocket);
    572 	REQUIRE(!sock->closing);
    573 
    574 	sock->active = false;
    575 	sock->closing = true;
    576 
    577 	/*
    578 	 * The order of the close operation is important here, the uv_close()
    579 	 * gets scheduled in the reverse order, so we need to close the timer
    580 	 * last, so its gone by the time we destroy the socket
    581 	 */
    582 
    583 	/* 2. close the listening socket */
    584 	isc__nmsocket_clearcb(sock);
    585 	isc__nm_stop_reading(sock);
    586 	uv_close(&sock->uv_handle.handle, tcp_stop_cb);
    587 
    588 	/* 1. close the read timer */
    589 	isc__nmsocket_timer_stop(sock);
    590 	uv_close(&sock->read_timer, NULL);
    591 
    592 	REQUIRE(!sock->worker->loop->paused);
    593 	isc_barrier_wait(&sock->parent->stop_barrier);
    594 }
    595 
    596 static void
    597 stop_tcp_child(isc_nmsocket_t *sock) {
    598 	REQUIRE(VALID_NMSOCK(sock));
    599 
    600 	if (sock->tid == 0) {
    601 		stop_tcp_child_job(sock);
    602 	} else {
    603 		isc_async_run(sock->worker->loop, stop_tcp_child_job, sock);
    604 	}
    605 }
    606 
    607 void
    608 isc__nm_tcp_stoplistening(isc_nmsocket_t *sock) {
    609 	REQUIRE(VALID_NMSOCK(sock));
    610 	REQUIRE(sock->type == isc_nm_tcplistener);
    611 	REQUIRE(sock->tid == isc_tid());
    612 	REQUIRE(sock->tid == 0);
    613 	REQUIRE(!sock->closing);
    614 
    615 	sock->closing = true;
    616 
    617 	/* Mark the parent socket inactive */
    618 	sock->active = false;
    619 
    620 	/* Stop all the other threads' children */
    621 	for (size_t i = 1; i < sock->nchildren; i++) {
    622 		stop_tcp_child(&sock->children[i]);
    623 	}
    624 
    625 	/* Stop the child for the main thread */
    626 	stop_tcp_child(&sock->children[0]);
    627 
    628 	/* Stop the parent */
    629 	sock->closed = true;
    630 
    631 	isc__nmsocket_prep_destroy(sock);
    632 }
    633 
    634 static void
    635 tcp_stop_cb(uv_handle_t *handle) {
    636 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
    637 	uv_handle_set_data(handle, NULL);
    638 
    639 	REQUIRE(VALID_NMSOCK(sock));
    640 	REQUIRE(sock->tid == isc_tid());
    641 	REQUIRE(sock->closing);
    642 	REQUIRE(sock->type == isc_nm_tcpsocket);
    643 	REQUIRE(!sock->closed);
    644 
    645 	sock->closed = true;
    646 
    647 	isc__nm_incstats(sock, STATID_CLOSE);
    648 
    649 	isc__nmsocket_detach(&sock);
    650 }
    651 
    652 void
    653 isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result,
    654 			   bool async) {
    655 	REQUIRE(VALID_NMSOCK(sock));
    656 	REQUIRE(result != ISC_R_SUCCESS);
    657 
    658 	isc__nmsocket_timer_stop(sock);
    659 	isc__nm_stop_reading(sock);
    660 	sock->reading = false;
    661 
    662 	if (sock->recv_cb != NULL) {
    663 		isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
    664 		isc__nmsocket_clearcb(sock);
    665 		isc__nm_readcb(sock, req, result, async);
    666 	}
    667 
    668 	isc__nmsocket_prep_destroy(sock);
    669 }
    670 
    671 void
    672 isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
    673 	isc_nmsocket_t *sock;
    674 	isc_nm_t *netmgr;
    675 	isc_result_t result;
    676 
    677 	REQUIRE(VALID_NMHANDLE(handle));
    678 	REQUIRE(VALID_NMSOCK(handle->sock));
    679 
    680 	sock = handle->sock;
    681 	netmgr = sock->worker->netmgr;
    682 
    683 	REQUIRE(sock->type == isc_nm_tcpsocket);
    684 	REQUIRE(sock->statichandle == handle);
    685 
    686 	sock->recv_cb = cb;
    687 	sock->recv_cbarg = cbarg;
    688 
    689 	/* Initialize the timer */
    690 	if (sock->read_timeout == 0) {
    691 		sock->read_timeout =
    692 			sock->keepalive
    693 				? atomic_load_relaxed(&netmgr->keepalive)
    694 				: atomic_load_relaxed(&netmgr->idle);
    695 	}
    696 
    697 	if (isc__nmsocket_closing(sock)) {
    698 		result = ISC_R_CANCELED;
    699 		goto failure;
    700 	}
    701 
    702 	if (!sock->reading_throttled) {
    703 		result = isc__nm_start_reading(sock);
    704 		if (result != ISC_R_SUCCESS) {
    705 			goto failure;
    706 		}
    707 	}
    708 
    709 	sock->reading = true;
    710 
    711 	if (!sock->manual_read_timer) {
    712 		isc__nmsocket_timer_start(sock);
    713 	}
    714 
    715 	return;
    716 failure:
    717 	isc__nm_tcp_failed_read_cb(sock, result, true);
    718 }
    719 
    720 void
    721 isc__nm_tcp_read_stop(isc_nmhandle_t *handle) {
    722 	REQUIRE(VALID_NMHANDLE(handle));
    723 	REQUIRE(VALID_NMSOCK(handle->sock));
    724 
    725 	isc_nmsocket_t *sock = handle->sock;
    726 
    727 	if (!sock->manual_read_timer) {
    728 		isc__nmsocket_timer_stop(sock);
    729 	}
    730 	isc__nm_stop_reading(sock);
    731 	sock->reading = false;
    732 
    733 	return;
    734 }
    735 
    736 void
    737 isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
    738 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream);
    739 	isc__nm_uvreq_t *req = NULL;
    740 	isc_nm_t *netmgr = NULL;
    741 
    742 	REQUIRE(VALID_NMSOCK(sock));
    743 	REQUIRE(sock->tid == isc_tid());
    744 	REQUIRE(buf != NULL);
    745 
    746 	netmgr = sock->worker->netmgr;
    747 
    748 	if (isc__nmsocket_closing(sock)) {
    749 		isc__nm_tcp_failed_read_cb(sock, ISC_R_CANCELED, false);
    750 		goto free;
    751 	}
    752 
    753 	if (nread < 0) {
    754 		if (nread != UV_EOF) {
    755 			isc__nm_incstats(sock, STATID_RECVFAIL);
    756 		}
    757 
    758 		isc__nm_tcp_failed_read_cb(sock, isc_uverr2result(nread),
    759 					   false);
    760 
    761 		goto free;
    762 	}
    763 
    764 	req = isc__nm_get_read_req(sock, NULL);
    765 
    766 	/*
    767 	 * The callback will be called synchronously because the
    768 	 * result is ISC_R_SUCCESS, so we don't need to retain
    769 	 * the buffer
    770 	 */
    771 	req->uvbuf.base = buf->base;
    772 	req->uvbuf.len = nread;
    773 
    774 	if (!sock->client) {
    775 		sock->read_timeout =
    776 			sock->keepalive
    777 				? atomic_load_relaxed(&netmgr->keepalive)
    778 				: atomic_load_relaxed(&netmgr->idle);
    779 	}
    780 
    781 	isc__nm_readcb(sock, req, ISC_R_SUCCESS, false);
    782 
    783 	if (!sock->client && sock->reading) {
    784 		/*
    785 		 * Stop reading if we have accumulated enough bytes in the send
    786 		 * queue; this means that the TCP client is not reading back the
    787 		 * data we sending to it, and there's no reason to continue
    788 		 * processing more incoming DNS messages, if the client is not
    789 		 * reading back the responses.
    790 		 */
    791 		size_t write_queue_size =
    792 			uv_stream_get_write_queue_size(&sock->uv_handle.stream);
    793 
    794 		if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) {
    795 			isc__nmsocket_log(
    796 				sock, ISC_LOG_DEBUG(3),
    797 				"throttling TCP connection, the other side is "
    798 				"not reading the data (%zu)",
    799 				write_queue_size);
    800 			sock->reading_throttled = true;
    801 			isc__nm_stop_reading(sock);
    802 		}
    803 	} else if (uv_is_active(&sock->uv_handle.handle) &&
    804 		   !sock->manual_read_timer)
    805 	{
    806 		/* The readcb could have paused the reading */
    807 		/* The timer will be updated */
    808 		isc__nmsocket_timer_restart(sock);
    809 	}
    810 
    811 free:
    812 	if (nread < 0) {
    813 		/*
    814 		 * The buffer may be a null buffer on error.
    815 		 */
    816 		if (buf->base == NULL && buf->len == 0) {
    817 			return;
    818 		}
    819 	}
    820 
    821 	isc__nm_free_uvbuf(sock, buf);
    822 }
    823 
    824 /*
    825  * This is called after we get a quota_accept_cb() callback.
    826  */
    827 static void
    828 tcpaccept_cb(void *arg) {
    829 	isc_nmsocket_t *csock = arg;
    830 	isc_nmsocket_t *ssock = csock->server;
    831 
    832 	REQUIRE(VALID_NMSOCK(csock));
    833 	REQUIRE(csock->tid == isc_tid());
    834 
    835 	isc_result_t result = accept_connection(csock);
    836 	isc__nm_accept_connection_log(ssock, result, can_log_tcp_quota());
    837 	isc__nmsocket_detach(&csock);
    838 }
    839 
    840 static void
    841 quota_accept_cb(void *arg) {
    842 	isc_nmsocket_t *csock = arg;
    843 	isc_nmsocket_t *ssock = csock->server;
    844 
    845 	REQUIRE(VALID_NMSOCK(csock));
    846 
    847 	/*
    848 	 * This needs to be asynchronous, because the quota might have been
    849 	 * released by a different child socket.
    850 	 */
    851 	if (csock->tid == isc_tid()) {
    852 		isc_result_t result = accept_connection(csock);
    853 		isc__nm_accept_connection_log(ssock, result,
    854 					      can_log_tcp_quota());
    855 	} else {
    856 		isc__nmsocket_attach(csock, &(isc_nmsocket_t *){ NULL });
    857 		isc_async_run(csock->worker->loop, tcpaccept_cb, csock);
    858 	}
    859 }
    860 
    861 static isc_result_t
    862 accept_connection(isc_nmsocket_t *csock) {
    863 	int r;
    864 	isc_result_t result;
    865 	struct sockaddr_storage ss;
    866 	isc_sockaddr_t local;
    867 	isc_nmhandle_t *handle = NULL;
    868 
    869 	REQUIRE(VALID_NMSOCK(csock));
    870 	REQUIRE(VALID_NMSOCK(csock->server));
    871 	REQUIRE(csock->tid == isc_tid());
    872 
    873 	csock->accepting = true;
    874 	csock->accept_cb = csock->server->accept_cb;
    875 	csock->accept_cbarg = csock->server->accept_cbarg;
    876 	csock->recv_cb = csock->server->recv_cb;
    877 	csock->recv_cbarg = csock->server->recv_cbarg;
    878 	csock->read_timeout = atomic_load_relaxed(&csock->worker->netmgr->init);
    879 
    880 	r = uv_tcp_init(&csock->worker->loop->loop, &csock->uv_handle.tcp);
    881 	UV_RUNTIME_CHECK(uv_tcp_init, r);
    882 	uv_handle_set_data(&csock->uv_handle.handle, csock);
    883 
    884 	r = uv_timer_init(&csock->worker->loop->loop, &csock->read_timer);
    885 	UV_RUNTIME_CHECK(uv_timer_init, r);
    886 	uv_handle_set_data((uv_handle_t *)&csock->read_timer, csock);
    887 
    888 	if (csock->server->pquota != NULL) {
    889 		isc__nm_incstats(csock, STATID_CLIENTS);
    890 	}
    891 
    892 	/*
    893 	 * We need to initialize the tcp and timer before failing because
    894 	 * isc__nm_tcp_close() can't handle uninitalized TCP nmsocket.
    895 	 */
    896 	if (isc__nmsocket_closing(csock)) {
    897 		result = ISC_R_CANCELED;
    898 		goto failure;
    899 	}
    900 
    901 	r = uv_accept(&csock->server->uv_handle.stream,
    902 		      &csock->uv_handle.stream);
    903 	if (r != 0) {
    904 		result = isc_uverr2result(r);
    905 		goto failure;
    906 	}
    907 
    908 	/* Check if the connection is not expired */
    909 	if (csock->quota_accept_ts != 0) {
    910 		/* The timestamp is given in nanoseconds */
    911 		const uint64_t time_elapsed_ms =
    912 			(isc_time_monotonic() - csock->quota_accept_ts) /
    913 			NS_PER_MS;
    914 
    915 		if (time_elapsed_ms >= csock->read_timeout) {
    916 			/*
    917 			 * At this point we have received a connection from a
    918 			 * queue of accepted connections (via uv_accept()), but
    919 			 * it has expired. We cannot do anything better than
    920 			 * drop it on the floor at this point.
    921 			 */
    922 			result = ISC_R_TIMEDOUT;
    923 			goto failure;
    924 		} else {
    925 			/* Adjust the initial read timeout accordingly */
    926 			csock->read_timeout -= time_elapsed_ms;
    927 		}
    928 	}
    929 
    930 	r = uv_tcp_getpeername(&csock->uv_handle.tcp, (struct sockaddr *)&ss,
    931 			       &(int){ sizeof(ss) });
    932 	if (r != 0) {
    933 		result = isc_uverr2result(r);
    934 		goto failure;
    935 	}
    936 
    937 	result = isc_sockaddr_fromsockaddr(&csock->peer,
    938 					   (struct sockaddr *)&ss);
    939 	if (result != ISC_R_SUCCESS) {
    940 		goto failure;
    941 	}
    942 
    943 	r = uv_tcp_getsockname(&csock->uv_handle.tcp, (struct sockaddr *)&ss,
    944 			       &(int){ sizeof(ss) });
    945 	if (r != 0) {
    946 		result = isc_uverr2result(r);
    947 		goto failure;
    948 	}
    949 
    950 	result = isc_sockaddr_fromsockaddr(&local, (struct sockaddr *)&ss);
    951 	if (result != ISC_R_SUCCESS) {
    952 		goto failure;
    953 	}
    954 
    955 	handle = isc__nmhandle_get(csock, NULL, &local);
    956 
    957 	result = csock->accept_cb(handle, ISC_R_SUCCESS, csock->accept_cbarg);
    958 	if (result != ISC_R_SUCCESS) {
    959 		isc_nmhandle_detach(&handle);
    960 		goto failure;
    961 	}
    962 
    963 	csock->accepting = false;
    964 
    965 	isc__nm_incstats(csock, STATID_ACCEPT);
    966 
    967 	/*
    968 	 * The acceptcb needs to attach to the handle if it wants to keep the
    969 	 * connection alive
    970 	 */
    971 	isc_nmhandle_detach(&handle);
    972 
    973 	/*
    974 	 * sock is now attached to the handle.
    975 	 */
    976 	isc__nmsocket_detach(&csock);
    977 
    978 	return ISC_R_SUCCESS;
    979 
    980 failure:
    981 	csock->active = false;
    982 	csock->accepting = false;
    983 
    984 	if (result != ISC_R_NOTCONNECTED) {
    985 		/* IGNORE: The client disconnected before we could accept */
    986 		isc__nmsocket_log(csock, ISC_LOG_ERROR,
    987 				  "Accepting TCP connection failed: %s",
    988 				  isc_result_totext(result));
    989 	}
    990 
    991 	isc__nmsocket_prep_destroy(csock);
    992 
    993 	isc__nmsocket_detach(&csock);
    994 
    995 	return result;
    996 }
    997 
    998 static void
    999 tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, isc_nm_cb_t cb,
   1000 	 void *cbarg, const bool dnsmsg) {
   1001 	REQUIRE(VALID_NMHANDLE(handle));
   1002 	REQUIRE(VALID_NMSOCK(handle->sock));
   1003 
   1004 	isc_nmsocket_t *sock = handle->sock;
   1005 	isc_result_t result;
   1006 	isc__nm_uvreq_t *uvreq = NULL;
   1007 	isc_nm_t *netmgr = sock->worker->netmgr;
   1008 
   1009 	REQUIRE(sock->type == isc_nm_tcpsocket);
   1010 	REQUIRE(sock->tid == isc_tid());
   1011 
   1012 	uvreq = isc__nm_uvreq_get(sock);
   1013 	if (dnsmsg) {
   1014 		*(uint16_t *)uvreq->tcplen = htons(region->length);
   1015 	}
   1016 	uvreq->uvbuf.base = (char *)region->base;
   1017 	uvreq->uvbuf.len = region->length;
   1018 
   1019 	isc_nmhandle_attach(handle, &uvreq->handle);
   1020 
   1021 	uvreq->cb.send = cb;
   1022 	uvreq->cbarg = cbarg;
   1023 
   1024 	if (sock->write_timeout == 0) {
   1025 		sock->write_timeout =
   1026 			sock->keepalive
   1027 				? atomic_load_relaxed(&netmgr->keepalive)
   1028 				: atomic_load_relaxed(&netmgr->idle);
   1029 	}
   1030 
   1031 	result = tcp_send_direct(sock, uvreq);
   1032 	if (result != ISC_R_SUCCESS) {
   1033 		isc__nm_incstats(sock, STATID_SENDFAIL);
   1034 		isc__nm_failed_send_cb(sock, uvreq, result, true);
   1035 	}
   1036 
   1037 	return;
   1038 }
   1039 
   1040 void
   1041 isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region,
   1042 		 isc_nm_cb_t cb, void *cbarg) {
   1043 	tcp_send(handle, region, cb, cbarg, false);
   1044 }
   1045 
   1046 void
   1047 isc__nm_tcp_senddns(isc_nmhandle_t *handle, const isc_region_t *region,
   1048 		    isc_nm_cb_t cb, void *cbarg) {
   1049 	tcp_send(handle, region, cb, cbarg, true);
   1050 }
   1051 
   1052 static void
   1053 tcp_maybe_restart_reading(isc_nmsocket_t *sock) {
   1054 	if (!sock->client && sock->reading &&
   1055 	    !uv_is_active(&sock->uv_handle.handle))
   1056 	{
   1057 		/*
   1058 		 * Restart reading if we have less data in the send queue than
   1059 		 * the send buffer size, this means that the TCP client has
   1060 		 * started reading some data again.  Starting reading when we go
   1061 		 * under the limit instead of waiting for all data has been
   1062 		 * flushed allows faster recovery (in case there was a
   1063 		 * congestion and now there isn't).
   1064 		 */
   1065 		size_t write_queue_size =
   1066 			uv_stream_get_write_queue_size(&sock->uv_handle.stream);
   1067 		if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) {
   1068 			isc__nmsocket_log(
   1069 				sock, ISC_LOG_DEBUG(3),
   1070 				"resuming TCP connection, the other side  "
   1071 				"is reading the data again (%zu)",
   1072 				write_queue_size);
   1073 			isc__nm_start_reading(sock);
   1074 			sock->reading_throttled = false;
   1075 		}
   1076 	}
   1077 }
   1078 
   1079 static void
   1080 tcp_send_cb(uv_write_t *req, int status) {
   1081 	isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
   1082 	isc_nmsocket_t *sock = NULL;
   1083 
   1084 	REQUIRE(VALID_UVREQ(uvreq));
   1085 	REQUIRE(VALID_NMSOCK(uvreq->sock));
   1086 
   1087 	sock = uvreq->sock;
   1088 
   1089 	isc_nm_timer_stop(uvreq->timer);
   1090 	isc_nm_timer_detach(&uvreq->timer);
   1091 
   1092 	if (status < 0) {
   1093 		isc__nm_incstats(sock, STATID_SENDFAIL);
   1094 		isc__nm_failed_send_cb(sock, uvreq, isc_uverr2result(status),
   1095 				       false);
   1096 		if (!sock->client && sock->reading) {
   1097 			/*
   1098 			 * As we are resuming reading, it is not throttled
   1099 			 * anymore (technically).
   1100 			 */
   1101 			sock->reading_throttled = false;
   1102 			isc__nm_start_reading(sock);
   1103 			isc__nmsocket_reset(sock);
   1104 		}
   1105 		return;
   1106 	}
   1107 
   1108 	isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
   1109 	tcp_maybe_restart_reading(sock);
   1110 }
   1111 
   1112 static isc_result_t
   1113 tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
   1114 	REQUIRE(VALID_NMSOCK(sock));
   1115 	REQUIRE(VALID_UVREQ(req));
   1116 	REQUIRE(sock->tid == isc_tid());
   1117 	REQUIRE(sock->type == isc_nm_tcpsocket);
   1118 
   1119 	int r;
   1120 	uv_buf_t bufs[2] = { { 0 }, { 0 } }; /* ugly, but required for old GCC
   1121 						versions */
   1122 	size_t nbufs = 1;
   1123 
   1124 	if (isc__nmsocket_closing(sock)) {
   1125 		return ISC_R_CANCELED;
   1126 	}
   1127 
   1128 	/* Check if we are not trying to send a DNS message */
   1129 	if (*(uint16_t *)req->tcplen == 0) {
   1130 		bufs[0].base = req->uvbuf.base;
   1131 		bufs[0].len = req->uvbuf.len;
   1132 
   1133 		r = uv_try_write(&sock->uv_handle.stream, bufs, nbufs);
   1134 
   1135 		if (r == (int)(bufs[0].len)) {
   1136 			/* Wrote everything */
   1137 			isc__nm_sendcb(sock, req, ISC_R_SUCCESS, true);
   1138 			tcp_maybe_restart_reading(sock);
   1139 			return ISC_R_SUCCESS;
   1140 		} else if (r > 0) {
   1141 			bufs[0].base += (size_t)r;
   1142 			bufs[0].len -= (size_t)r;
   1143 		} else if (!(r == UV_ENOSYS || r == UV_EAGAIN)) {
   1144 			return isc_uverr2result(r);
   1145 		}
   1146 	} else {
   1147 		nbufs = 2;
   1148 		bufs[0].base = req->tcplen;
   1149 		bufs[0].len = 2;
   1150 		bufs[1].base = req->uvbuf.base;
   1151 		bufs[1].len = req->uvbuf.len;
   1152 
   1153 		r = uv_try_write(&sock->uv_handle.stream, bufs, nbufs);
   1154 
   1155 		if (r == (int)(bufs[0].len + bufs[1].len)) {
   1156 			/* Wrote everything */
   1157 			isc__nm_sendcb(sock, req, ISC_R_SUCCESS, true);
   1158 			tcp_maybe_restart_reading(sock);
   1159 			return ISC_R_SUCCESS;
   1160 		} else if (r == 1) {
   1161 			/* Partial write of DNSMSG length */
   1162 			bufs[0].base = req->tcplen + 1;
   1163 			bufs[0].len = 1;
   1164 		} else if (r > 0) {
   1165 			/* Partial write of DNSMSG */
   1166 			nbufs = 1;
   1167 			bufs[0].base = req->uvbuf.base + (r - 2);
   1168 			bufs[0].len = req->uvbuf.len - (r - 2);
   1169 		} else if (!(r == UV_ENOSYS || r == UV_EAGAIN)) {
   1170 			return isc_uverr2result(r);
   1171 		}
   1172 	}
   1173 
   1174 	if (!sock->client && sock->reading) {
   1175 		sock->reading_throttled = true;
   1176 		isc__nm_stop_reading(sock);
   1177 	}
   1178 	isc__nmsocket_log(sock, ISC_LOG_DEBUG(3),
   1179 			  "%sthe other side is not "
   1180 			  "reading the data, switching to uv_write()",
   1181 			  !sock->client && sock->reading
   1182 				  ? "throttling TCP connection, "
   1183 				  : "");
   1184 
   1185 	r = uv_write(&req->uv_req.write, &sock->uv_handle.stream, bufs, nbufs,
   1186 		     tcp_send_cb);
   1187 	if (r < 0) {
   1188 		return isc_uverr2result(r);
   1189 	}
   1190 
   1191 	isc_nm_timer_create(req->handle, isc__nmsocket_writetimeout_cb, req,
   1192 			    &req->timer);
   1193 	if (sock->write_timeout > 0) {
   1194 		isc_nm_timer_start(req->timer, sock->write_timeout);
   1195 	}
   1196 
   1197 	return ISC_R_SUCCESS;
   1198 }
   1199 
   1200 static void
   1201 tcp_close_sock(isc_nmsocket_t *sock) {
   1202 	REQUIRE(VALID_NMSOCK(sock));
   1203 	REQUIRE(sock->tid == isc_tid());
   1204 	REQUIRE(sock->closing);
   1205 	REQUIRE(!sock->closed);
   1206 
   1207 	sock->closed = true;
   1208 	sock->connected = false;
   1209 
   1210 	isc__nm_incstats(sock, STATID_CLOSE);
   1211 
   1212 	if (sock->server != NULL) {
   1213 		if (sock->server->pquota != NULL) {
   1214 			isc__nm_decstats(sock, STATID_CLIENTS);
   1215 			isc_quota_release(sock->server->pquota);
   1216 		}
   1217 		isc__nmsocket_detach(&sock->server);
   1218 	}
   1219 
   1220 	isc__nmsocket_prep_destroy(sock);
   1221 }
   1222 
   1223 static void
   1224 tcp_close_cb(uv_handle_t *handle) {
   1225 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
   1226 	uv_handle_set_data(handle, NULL);
   1227 
   1228 	tcp_close_sock(sock);
   1229 }
   1230 
   1231 void
   1232 isc__nm_tcp_close(isc_nmsocket_t *sock) {
   1233 	REQUIRE(VALID_NMSOCK(sock));
   1234 	REQUIRE(sock->type == isc_nm_tcpsocket);
   1235 	REQUIRE(!isc__nmsocket_active(sock));
   1236 	REQUIRE(sock->tid == isc_tid());
   1237 	REQUIRE(sock->parent == NULL);
   1238 	REQUIRE(!sock->closing);
   1239 
   1240 	sock->closing = true;
   1241 
   1242 	/*
   1243 	 * The order of the close operation is important here, the uv_close()
   1244 	 * gets scheduled in the reverse order, so we need to close the timer
   1245 	 * last, so its gone by the time we destroy the socket
   1246 	 */
   1247 
   1248 	if (!uv_is_closing(&sock->uv_handle.handle)) {
   1249 		/* Normal order of operation */
   1250 
   1251 		/* 2. close the socket + destroy the socket in callback */
   1252 		isc__nmsocket_clearcb(sock);
   1253 		isc__nm_stop_reading(sock);
   1254 		sock->reading = false;
   1255 		uv_close(&sock->uv_handle.handle, tcp_close_cb);
   1256 
   1257 		/* 1. close the timer */
   1258 		isc__nmsocket_timer_stop(sock);
   1259 		uv_close((uv_handle_t *)&sock->read_timer, NULL);
   1260 	} else {
   1261 		/* The socket was already closed elsewhere */
   1262 
   1263 		/* 1. close the timer + destroy the socket in callback */
   1264 		isc__nmsocket_timer_stop(sock);
   1265 		uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
   1266 		uv_close((uv_handle_t *)&sock->read_timer, tcp_close_cb);
   1267 	}
   1268 }
   1269 
   1270 static void
   1271 tcp_close_connect_cb(uv_handle_t *handle) {
   1272 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
   1273 
   1274 	REQUIRE(VALID_NMSOCK(sock));
   1275 
   1276 	REQUIRE(sock->tid == isc_tid());
   1277 
   1278 	isc__nmsocket_prep_destroy(sock);
   1279 	isc__nmsocket_detach(&sock);
   1280 }
   1281 
   1282 void
   1283 isc__nm_tcp_shutdown(isc_nmsocket_t *sock) {
   1284 	REQUIRE(VALID_NMSOCK(sock));
   1285 	REQUIRE(sock->tid == isc_tid());
   1286 	REQUIRE(sock->type == isc_nm_tcpsocket);
   1287 
   1288 	/*
   1289 	 * If the socket is active, mark it inactive and
   1290 	 * continue. If it isn't active, stop now.
   1291 	 */
   1292 	if (!sock->active) {
   1293 		return;
   1294 	}
   1295 	sock->active = false;
   1296 
   1297 	INSIST(!sock->accepting);
   1298 
   1299 	if (sock->connecting) {
   1300 		isc_nmsocket_t *tsock = NULL;
   1301 		isc__nmsocket_attach(sock, &tsock);
   1302 		uv_close(&sock->uv_handle.handle, tcp_close_connect_cb);
   1303 		return;
   1304 	}
   1305 
   1306 	/* There's a handle attached to the socket (from accept or connect) */
   1307 	if (sock->statichandle) {
   1308 		isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false);
   1309 		return;
   1310 	}
   1311 
   1312 	/* Destroy the non-listening socket */
   1313 	if (sock->parent == NULL) {
   1314 		isc__nmsocket_prep_destroy(sock);
   1315 		return;
   1316 	}
   1317 
   1318 	/* Destroy the listening socket if on the same loop */
   1319 	if (sock->tid == sock->parent->tid) {
   1320 		isc__nmsocket_prep_destroy(sock->parent);
   1321 	}
   1322 }
   1323 
   1324 void
   1325 isc__nmhandle_tcp_set_manual_timer(isc_nmhandle_t *handle, const bool manual) {
   1326 	isc_nmsocket_t *sock;
   1327 
   1328 	REQUIRE(VALID_NMHANDLE(handle));
   1329 	sock = handle->sock;
   1330 	REQUIRE(VALID_NMSOCK(sock));
   1331 	REQUIRE(sock->type == isc_nm_tcpsocket);
   1332 	REQUIRE(sock->tid == isc_tid());
   1333 	REQUIRE(!uv_is_active(&sock->uv_handle.handle));
   1334 
   1335 	sock->manual_read_timer = manual;
   1336 }
   1337