Home | History | Annotate | Line # | Download | only in netmgr
      1 /*	$NetBSD: tcp.c,v 1.14 2026/04/08 00:16:16 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  *
      6  * SPDX-License-Identifier: MPL-2.0
      7  *
      8  * This Source Code Form is subject to the terms of the Mozilla Public
      9  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  *
     12  * See the COPYRIGHT file distributed with this work for additional
     13  * information regarding copyright ownership.
     14  */
     15 
     16 #include <libgen.h>
     17 #include <string.h>
     18 #include <unistd.h>
     19 
     20 #include <isc/async.h>
     21 #include <isc/atomic.h>
     22 #include <isc/barrier.h>
     23 #include <isc/buffer.h>
     24 #include <isc/condition.h>
     25 #include <isc/errno.h>
     26 #include <isc/log.h>
     27 #include <isc/magic.h>
     28 #include <isc/mem.h>
     29 #include <isc/netmgr.h>
     30 #include <isc/quota.h>
     31 #include <isc/random.h>
     32 #include <isc/refcount.h>
     33 #include <isc/region.h>
     34 #include <isc/result.h>
     35 #include <isc/sockaddr.h>
     36 #include <isc/stdtime.h>
     37 #include <isc/thread.h>
     38 #include <isc/util.h>
     39 #include <isc/uv.h>
     40 
     41 #include "../loop_p.h"
     42 #include "netmgr-int.h"
     43 
     44 static atomic_uint_fast32_t last_tcpquota_log = 0;
     45 
     46 static bool
     47 can_log_tcp_quota(void) {
     48 	isc_stdtime_t last;
     49 	isc_stdtime_t now = isc_stdtime_now();
     50 	last = atomic_exchange_relaxed(&last_tcpquota_log, now);
     51 	if (now != last) {
     52 		return true;
     53 	}
     54 
     55 	return false;
     56 }
     57 
     58 static isc_result_t
     59 tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
     60 
     61 static isc_result_t
     62 tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
     63 static void
     64 tcp_connect_cb(uv_connect_t *uvreq, int status);
     65 static void
     66 tcp_stop_cb(uv_handle_t *handle);
     67 
     68 static void
     69 tcp_connection_cb(uv_stream_t *server, int status);
     70 
     71 static void
     72 tcp_close_cb(uv_handle_t *uvhandle);
     73 
     74 static isc_result_t
     75 accept_connection(isc_nmsocket_t *ssock);
     76 
     77 static void
     78 quota_accept_cb(void *arg);
     79 
     80 static isc_result_t
     81 tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
     82 	isc__networker_t *worker = NULL;
     83 	isc_result_t result = ISC_R_UNSET;
     84 	int r;
     85 
     86 	REQUIRE(VALID_NMSOCK(sock));
     87 	REQUIRE(VALID_UVREQ(req));
     88 
     89 	REQUIRE(sock->tid == isc_tid());
     90 
     91 	worker = sock->worker;
     92 
     93 	sock->connecting = true;
     94 
     95 	/* 2 minute timeout */
     96 	result = isc__nm_socket_connectiontimeout(sock->fd, 120 * 1000);
     97 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
     98 
     99 	r = uv_tcp_init(&worker->loop->loop, &sock->uv_handle.tcp);
    100 	UV_RUNTIME_CHECK(uv_tcp_init, r);
    101 	uv_handle_set_data(&sock->uv_handle.handle, sock);
    102 
    103 	r = uv_timer_init(&worker->loop->loop, &sock->read_timer);
    104 	UV_RUNTIME_CHECK(uv_timer_init, r);
    105 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
    106 
    107 	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
    108 	if (r != 0) {
    109 		isc__nm_closesocket(sock->fd);
    110 		isc__nm_incstats(sock, STATID_OPENFAIL);
    111 		return isc_uverr2result(r);
    112 	}
    113 	isc__nm_incstats(sock, STATID_OPEN);
    114 
    115 	if (req->local.length != 0) {
    116 		r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0);
    117 		if (r != 0) {
    118 			isc__nm_incstats(sock, STATID_BINDFAIL);
    119 			return isc_uverr2result(r);
    120 		}
    121 	}
    122 
    123 	isc__nm_set_network_buffers(sock->worker->netmgr,
    124 				    &sock->uv_handle.handle);
    125 
    126 	uv_handle_set_data(&req->uv_req.handle, req);
    127 	r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp,
    128 			   &req->peer.type.sa, tcp_connect_cb);
    129 	if (r != 0) {
    130 		isc__nm_incstats(sock, STATID_CONNECTFAIL);
    131 		return isc_uverr2result(r);
    132 	}
    133 
    134 	uv_handle_set_data((uv_handle_t *)&sock->read_timer,
    135 			   &req->uv_req.connect);
    136 	isc__nmsocket_timer_start(sock);
    137 
    138 	return ISC_R_SUCCESS;
    139 }
    140 
    141 static void
    142 tcp_connect_cb(uv_connect_t *uvreq, int status) {
    143 	isc_result_t result = ISC_R_UNSET;
    144 	isc__nm_uvreq_t *req = NULL;
    145 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
    146 	struct sockaddr_storage ss;
    147 	isc__networker_t *worker = NULL;
    148 	int r;
    149 
    150 	REQUIRE(VALID_NMSOCK(sock));
    151 	REQUIRE(sock->tid == isc_tid());
    152 
    153 	worker = sock->worker;
    154 
    155 	req = uv_handle_get_data((uv_handle_t *)uvreq);
    156 
    157 	REQUIRE(VALID_UVREQ(req));
    158 	REQUIRE(VALID_NMHANDLE(req->handle));
    159 
    160 	INSIST(sock->connecting);
    161 
    162 	if (sock->timedout || status == UV_ETIMEDOUT) {
    163 		/* Connection timed-out */
    164 		result = ISC_R_TIMEDOUT;
    165 		goto error;
    166 	} else if (isc__nm_closing(worker)) {
    167 		/* Network manager shutting down */
    168 		result = ISC_R_SHUTTINGDOWN;
    169 		goto error;
    170 	} else if (isc__nmsocket_closing(sock)) {
    171 		/* Connection canceled */
    172 		result = ISC_R_CANCELED;
    173 		goto error;
    174 	} else if (status == UV_EADDRINUSE) {
    175 		/*
    176 		 * On FreeBSD the TCP connect() call sometimes results in a
    177 		 * spurious transient EADDRINUSE. Try a few more times before
    178 		 * giving up.
    179 		 */
    180 		if (--req->connect_tries > 0) {
    181 			r = uv_tcp_connect(&req->uv_req.connect,
    182 					   &sock->uv_handle.tcp,
    183 					   &req->peer.type.sa, tcp_connect_cb);
    184 			if (r != 0) {
    185 				result = isc_uverr2result(r);
    186 				goto error;
    187 			}
    188 			return;
    189 		}
    190 		result = isc_uverr2result(status);
    191 		goto error;
    192 	} else if (status != 0) {
    193 		result = isc_uverr2result(status);
    194 		goto error;
    195 	}
    196 
    197 	isc__nmsocket_timer_stop(sock);
    198 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
    199 
    200 	isc__nm_incstats(sock, STATID_CONNECT);
    201 	r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss,
    202 			       &(int){ sizeof(ss) });
    203 	if (r != 0) {
    204 		result = isc_uverr2result(r);
    205 		goto error;
    206 	}
    207 
    208 	sock->connecting = false;
    209 	sock->connected = true;
    210 
    211 	result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss);
    212 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
    213 
    214 	isc__nm_connectcb(sock, req, ISC_R_SUCCESS, false);
    215 
    216 	return;
    217 error:
    218 	isc__nm_failed_connect_cb(sock, req, result, false);
    219 }
    220 
    221 void
    222 isc_nm_tcpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
    223 		  isc_nm_cb_t connect_cb, void *connect_cbarg,
    224 		  unsigned int timeout) {
    225 	isc_result_t result = ISC_R_SUCCESS;
    226 	isc_nmsocket_t *sock = NULL;
    227 	isc__nm_uvreq_t *req = NULL;
    228 	sa_family_t sa_family;
    229 	isc__networker_t *worker = NULL;
    230 	uv_os_sock_t fd = -1;
    231 	in_port_t port_low, port_high;
    232 
    233 	REQUIRE(VALID_NM(mgr));
    234 	REQUIRE(local != NULL);
    235 	REQUIRE(peer != NULL);
    236 
    237 	worker = &mgr->workers[isc_tid()];
    238 
    239 	if (isc__nm_closing(worker)) {
    240 		connect_cb(NULL, ISC_R_SHUTTINGDOWN, connect_cbarg);
    241 		return;
    242 	}
    243 
    244 	sa_family = peer->type.sa.sa_family;
    245 
    246 	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &fd);
    247 	if (result != ISC_R_SUCCESS) {
    248 		connect_cb(NULL, result, connect_cbarg);
    249 		return;
    250 	}
    251 
    252 	sock = isc_mempool_get(worker->nmsocket_pool);
    253 	isc__nmsocket_init(sock, worker, isc_nm_tcpsocket, local, NULL);
    254 
    255 	sock->connect_timeout = timeout;
    256 	sock->fd = fd;
    257 	sock->client = true;
    258 
    259 	req = isc__nm_uvreq_get(sock);
    260 	req->cb.connect = connect_cb;
    261 	req->cbarg = connect_cbarg;
    262 	req->peer = *peer;
    263 	req->local = *local;
    264 	req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
    265 
    266 	(void)isc__nm_socket_min_mtu(sock->fd, sa_family);
    267 	(void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
    268 	port_low = (sa_family == AF_INET) ? mgr->port_low4 : mgr->port_low6;
    269 	port_high = (sa_family == AF_INET) ? mgr->port_high4 : mgr->port_high6;
    270 	result = isc__nm_socket_max_port_range(sock->fd, sa_family, port_low,
    271 					       port_high);
    272 	if (result != ISC_R_SUCCESS) {
    273 		isc__nmsocket_log(sock, ISC_LOG_DEBUG(99),
    274 				  "setting up IP_BIND_ADDRESS_NO_PORT or "
    275 				  "IP_LOCAL_PORT_RANGE failed: %s\n",
    276 				  result == ISC_R_RANGE
    277 					  ? isc_result_totext(result)
    278 					  : strerror(errno));
    279 	}
    280 
    281 	sock->active = true;
    282 
    283 	result = tcp_connect_direct(sock, req);
    284 	if (result != ISC_R_SUCCESS) {
    285 		sock->active = false;
    286 		isc__nm_tcp_close(sock);
    287 		isc__nm_connectcb(sock, req, result, true);
    288 	}
    289 
    290 	/*
    291 	 * The sock is now attached to the handle.
    292 	 */
    293 	isc__nmsocket_detach(&sock);
    294 }
    295 
    296 static uv_os_sock_t
    297 isc__nm_tcp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) {
    298 	isc_result_t result;
    299 	uv_os_sock_t sock;
    300 
    301 	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock);
    302 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
    303 
    304 	(void)isc__nm_socket_v6only(sock, sa_family);
    305 
    306 	/* FIXME: set mss */
    307 
    308 	result = isc__nm_socket_reuse(sock, 1);
    309 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
    310 
    311 	if (mgr->load_balance_sockets) {
    312 		result = isc__nm_socket_reuse_lb(sock);
    313 		RUNTIME_CHECK(result == ISC_R_SUCCESS);
    314 	}
    315 
    316 	return sock;
    317 }
    318 
    319 static void
    320 start_tcp_child_job(void *arg) {
    321 	isc_nmsocket_t *sock = arg;
    322 
    323 	REQUIRE(VALID_NMSOCK(sock));
    324 	REQUIRE(VALID_NMSOCK(sock->parent));
    325 	REQUIRE(sock->type == isc_nm_tcpsocket);
    326 	REQUIRE(sock->tid == isc_tid());
    327 
    328 	sa_family_t sa_family = sock->iface.type.sa.sa_family;
    329 	int r, flags = 0;
    330 	isc_result_t result = ISC_R_UNSET;
    331 	isc_loop_t *loop = sock->worker->loop;
    332 	struct sockaddr_storage ss;
    333 
    334 	(void)isc__nm_socket_min_mtu(sock->fd, sa_family);
    335 	(void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
    336 
    337 	r = uv_tcp_init(&loop->loop, &sock->uv_handle.tcp);
    338 	UV_RUNTIME_CHECK(uv_tcp_init, r);
    339 	uv_handle_set_data(&sock->uv_handle.handle, sock);
    340 	/* This keeps the socket alive after everything else is gone */
    341 	isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
    342 
    343 	r = uv_timer_init(&loop->loop, &sock->read_timer);
    344 	UV_RUNTIME_CHECK(uv_timer_init, r);
    345 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
    346 
    347 	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
    348 	if (r < 0) {
    349 		isc__nm_closesocket(sock->fd);
    350 		isc__nm_incstats(sock, STATID_OPENFAIL);
    351 		goto done;
    352 	}
    353 	isc__nm_incstats(sock, STATID_OPEN);
    354 
    355 	if (sa_family == AF_INET6) {
    356 		flags = UV_TCP_IPV6ONLY;
    357 	}
    358 
    359 	if (sock->worker->netmgr->load_balance_sockets) {
    360 		r = isc__nm_tcp_freebind(&sock->uv_handle.tcp,
    361 					 &sock->iface.type.sa, flags);
    362 		if (r < 0) {
    363 			isc__nm_incstats(sock, STATID_BINDFAIL);
    364 			goto done;
    365 		}
    366 	} else if (sock->tid == 0) {
    367 		r = isc__nm_tcp_freebind(&sock->uv_handle.tcp,
    368 					 &sock->iface.type.sa, flags);
    369 		if (r < 0) {
    370 			isc__nm_incstats(sock, STATID_BINDFAIL);
    371 			goto done;
    372 		}
    373 		sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags;
    374 	} else {
    375 		/* The socket is already bound, just copy the flags */
    376 		sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags;
    377 	}
    378 
    379 	isc__nm_set_network_buffers(sock->worker->netmgr,
    380 				    &sock->uv_handle.handle);
    381 
    382 	/*
    383 	 * The callback will run in the same thread uv_listen() was called
    384 	 * from, so a race with tcp_connection_cb() isn't possible.
    385 	 */
    386 	r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog,
    387 		      tcp_connection_cb);
    388 	if (r != 0) {
    389 		isc__nmsocket_log(sock, ISC_LOG_ERROR, "uv_listen failed: %s",
    390 				  isc_result_totext(isc_uverr2result(r)));
    391 		isc__nm_incstats(sock, STATID_BINDFAIL);
    392 		goto done;
    393 	}
    394 
    395 	if (sock->tid == 0) {
    396 		r = uv_tcp_getsockname(&sock->uv_handle.tcp,
    397 				       (struct sockaddr *)&ss,
    398 				       &(int){ sizeof(ss) });
    399 		if (r != 0) {
    400 			goto done;
    401 		}
    402 
    403 		result = isc_sockaddr_fromsockaddr(&sock->parent->iface,
    404 						   (struct sockaddr *)&ss);
    405 		if (result != ISC_R_SUCCESS) {
    406 			goto done_result;
    407 		}
    408 	}
    409 
    410 done:
    411 	result = isc_uverr2result(r);
    412 
    413 done_result:
    414 	if (result != ISC_R_SUCCESS) {
    415 		sock->pquota = NULL;
    416 	}
    417 
    418 	sock->result = result;
    419 
    420 	REQUIRE(!loop->paused);
    421 
    422 	if (sock->tid != 0) {
    423 		isc_barrier_wait(&sock->parent->listen_barrier);
    424 	}
    425 }
    426 
    427 static void
    428 start_tcp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock,
    429 		uv_os_sock_t fd, int tid) {
    430 	isc_nmsocket_t *csock = &sock->children[tid];
    431 	isc__networker_t *worker = &mgr->workers[tid];
    432 
    433 	isc__nmsocket_init(csock, worker, isc_nm_tcpsocket, iface, sock);
    434 	csock->accept_cb = sock->accept_cb;
    435 	csock->accept_cbarg = sock->accept_cbarg;
    436 	csock->backlog = sock->backlog;
    437 
    438 	/*
    439 	 * Quota isn't attached, just assigned.
    440 	 */
    441 	csock->pquota = sock->pquota;
    442 
    443 	if (mgr->load_balance_sockets) {
    444 		UNUSED(fd);
    445 		csock->fd = isc__nm_tcp_lb_socket(mgr,
    446 						  iface->type.sa.sa_family);
    447 	} else {
    448 		csock->fd = dup(fd);
    449 	}
    450 	REQUIRE(csock->fd >= 0);
    451 
    452 	if (tid == 0) {
    453 		start_tcp_child_job(csock);
    454 	} else {
    455 		isc_async_run(worker->loop, start_tcp_child_job, csock);
    456 	}
    457 }
    458 
    459 isc_result_t
    460 isc_nm_listentcp(isc_nm_t *mgr, uint32_t workers, isc_sockaddr_t *iface,
    461 		 isc_nm_accept_cb_t accept_cb, void *accept_cbarg, int backlog,
    462 		 isc_quota_t *quota, isc_nmsocket_t **sockp) {
    463 	isc_nmsocket_t *sock = NULL;
    464 	uv_os_sock_t fd = -1;
    465 	isc_result_t result = ISC_R_UNSET;
    466 	isc__networker_t *worker = NULL;
    467 
    468 	REQUIRE(VALID_NM(mgr));
    469 	REQUIRE(isc_tid() == 0);
    470 
    471 	if (workers == 0) {
    472 		workers = mgr->nloops;
    473 	}
    474 	REQUIRE(workers <= mgr->nloops);
    475 
    476 	worker = &mgr->workers[0];
    477 	sock = isc_mempool_get(worker->nmsocket_pool);
    478 	isc__nmsocket_init(sock, worker, isc_nm_tcplistener, iface, NULL);
    479 
    480 	sock->nchildren = (workers == ISC_NM_LISTEN_ALL) ? (uint32_t)mgr->nloops
    481 							 : workers;
    482 	sock->children = isc_mem_cget(worker->mctx, sock->nchildren,
    483 				      sizeof(sock->children[0]));
    484 
    485 	isc__nmsocket_barrier_init(sock);
    486 
    487 	sock->accept_cb = accept_cb;
    488 	sock->accept_cbarg = accept_cbarg;
    489 	sock->backlog = backlog;
    490 	sock->pquota = quota;
    491 
    492 	if (!mgr->load_balance_sockets) {
    493 		fd = isc__nm_tcp_lb_socket(mgr, iface->type.sa.sa_family);
    494 	}
    495 
    496 	start_tcp_child(mgr, iface, sock, fd, 0);
    497 	result = sock->children[0].result;
    498 	INSIST(result != ISC_R_UNSET);
    499 
    500 	for (size_t i = 1; i < sock->nchildren; i++) {
    501 		start_tcp_child(mgr, iface, sock, fd, i);
    502 	}
    503 
    504 	isc_barrier_wait(&sock->listen_barrier);
    505 
    506 	if (!mgr->load_balance_sockets) {
    507 		isc__nm_closesocket(fd);
    508 	}
    509 
    510 	/*
    511 	 * If any of the child sockets have failed then isc_nm_listentcp
    512 	 * fails.
    513 	 */
    514 	for (size_t i = 1; i < sock->nchildren; i++) {
    515 		if (result == ISC_R_SUCCESS &&
    516 		    sock->children[i].result != ISC_R_SUCCESS)
    517 		{
    518 			result = sock->children[i].result;
    519 		}
    520 	}
    521 
    522 	if (result != ISC_R_SUCCESS) {
    523 		sock->active = false;
    524 		isc__nm_tcp_stoplistening(sock);
    525 		isc_nmsocket_close(&sock);
    526 
    527 		return result;
    528 	}
    529 
    530 	sock->active = true;
    531 
    532 	*sockp = sock;
    533 	return ISC_R_SUCCESS;
    534 }
    535 
    536 static void
    537 tcp_connection_cb(uv_stream_t *server, int status) {
    538 	isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server);
    539 	isc_result_t result;
    540 
    541 	REQUIRE(ssock->accept_cb != NULL);
    542 
    543 	if (status != 0) {
    544 		result = isc_uverr2result(status);
    545 		goto done;
    546 	}
    547 
    548 	REQUIRE(VALID_NMSOCK(ssock));
    549 	REQUIRE(ssock->tid == isc_tid());
    550 
    551 	if (isc__nmsocket_closing(ssock)) {
    552 		result = ISC_R_CANCELED;
    553 		goto done;
    554 	}
    555 
    556 	/* Prepare the child socket */
    557 	isc_nmsocket_t *csock = isc_mempool_get(ssock->worker->nmsocket_pool);
    558 	isc__nmsocket_init(csock, ssock->worker, isc_nm_tcpsocket,
    559 			   &ssock->iface, NULL);
    560 	isc__nmsocket_attach(ssock, &csock->server);
    561 
    562 	if (csock->server->pquota != NULL) {
    563 		result = isc_quota_acquire_cb(csock->server->pquota,
    564 					      &csock->quotacb, quota_accept_cb,
    565 					      csock);
    566 		if (result == ISC_R_QUOTA) {
    567 			csock->quota_accept_ts = isc_time_monotonic();
    568 			isc__nm_incstats(ssock, STATID_ACCEPTFAIL);
    569 			goto done;
    570 		}
    571 	}
    572 
    573 	result = accept_connection(csock);
    574 done:
    575 	isc__nm_accept_connection_log(ssock, result, can_log_tcp_quota());
    576 }
    577 
    578 static void
    579 stop_tcp_child_job(void *arg) {
    580 	isc_nmsocket_t *sock = arg;
    581 
    582 	REQUIRE(VALID_NMSOCK(sock));
    583 	REQUIRE(sock->tid == isc_tid());
    584 	REQUIRE(sock->parent != NULL);
    585 	REQUIRE(sock->type == isc_nm_tcpsocket);
    586 	REQUIRE(!sock->closing);
    587 
    588 	sock->active = false;
    589 	sock->closing = true;
    590 
    591 	/*
    592 	 * The order of the close operation is important here, the uv_close()
    593 	 * gets scheduled in the reverse order, so we need to close the timer
    594 	 * last, so its gone by the time we destroy the socket
    595 	 */
    596 
    597 	/* 2. close the listening socket */
    598 	isc__nmsocket_clearcb(sock);
    599 	isc__nm_stop_reading(sock);
    600 	uv_close(&sock->uv_handle.handle, tcp_stop_cb);
    601 
    602 	/* 1. close the read timer */
    603 	isc__nmsocket_timer_stop(sock);
    604 	uv_close(&sock->read_timer, NULL);
    605 
    606 	REQUIRE(!sock->worker->loop->paused);
    607 	isc_barrier_wait(&sock->parent->stop_barrier);
    608 }
    609 
    610 static void
    611 stop_tcp_child(isc_nmsocket_t *sock) {
    612 	REQUIRE(VALID_NMSOCK(sock));
    613 
    614 	if (sock->tid == 0) {
    615 		stop_tcp_child_job(sock);
    616 	} else {
    617 		isc_async_run(sock->worker->loop, stop_tcp_child_job, sock);
    618 	}
    619 }
    620 
    621 void
    622 isc__nm_tcp_stoplistening(isc_nmsocket_t *sock) {
    623 	REQUIRE(VALID_NMSOCK(sock));
    624 	REQUIRE(sock->type == isc_nm_tcplistener);
    625 	REQUIRE(sock->tid == isc_tid());
    626 	REQUIRE(sock->tid == 0);
    627 	REQUIRE(!sock->closing);
    628 
    629 	sock->closing = true;
    630 
    631 	/* Mark the parent socket inactive */
    632 	sock->active = false;
    633 
    634 	/* Stop all the other threads' children */
    635 	for (size_t i = 1; i < sock->nchildren; i++) {
    636 		stop_tcp_child(&sock->children[i]);
    637 	}
    638 
    639 	/* Stop the child for the main thread */
    640 	stop_tcp_child(&sock->children[0]);
    641 
    642 	/* Stop the parent */
    643 	sock->closed = true;
    644 
    645 	isc__nmsocket_prep_destroy(sock);
    646 }
    647 
    648 static void
    649 tcp_stop_cb(uv_handle_t *handle) {
    650 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
    651 	uv_handle_set_data(handle, NULL);
    652 
    653 	REQUIRE(VALID_NMSOCK(sock));
    654 	REQUIRE(sock->tid == isc_tid());
    655 	REQUIRE(sock->closing);
    656 	REQUIRE(sock->type == isc_nm_tcpsocket);
    657 	REQUIRE(!sock->closed);
    658 
    659 	sock->closed = true;
    660 
    661 	isc__nm_incstats(sock, STATID_CLOSE);
    662 
    663 	isc__nmsocket_detach(&sock);
    664 }
    665 
    666 void
    667 isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result,
    668 			   bool async) {
    669 	REQUIRE(VALID_NMSOCK(sock));
    670 	REQUIRE(result != ISC_R_SUCCESS);
    671 
    672 	isc__nmsocket_timer_stop(sock);
    673 	isc__nm_stop_reading(sock);
    674 	sock->reading = false;
    675 
    676 	if (sock->recv_cb != NULL) {
    677 		isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
    678 		isc__nmsocket_clearcb(sock);
    679 		isc__nm_readcb(sock, req, result, async);
    680 	}
    681 
    682 	isc__nmsocket_prep_destroy(sock);
    683 }
    684 
    685 void
    686 isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
    687 	isc_nmsocket_t *sock;
    688 	isc_nm_t *netmgr;
    689 	isc_result_t result;
    690 
    691 	REQUIRE(VALID_NMHANDLE(handle));
    692 	REQUIRE(VALID_NMSOCK(handle->sock));
    693 
    694 	sock = handle->sock;
    695 	netmgr = sock->worker->netmgr;
    696 
    697 	REQUIRE(sock->type == isc_nm_tcpsocket);
    698 	REQUIRE(sock->statichandle == handle);
    699 
    700 	sock->recv_cb = cb;
    701 	sock->recv_cbarg = cbarg;
    702 
    703 	/* Initialize the timer */
    704 	if (sock->read_timeout == 0) {
    705 		sock->read_timeout =
    706 			sock->keepalive
    707 				? atomic_load_relaxed(&netmgr->keepalive)
    708 				: atomic_load_relaxed(&netmgr->idle);
    709 	}
    710 
    711 	if (isc__nmsocket_closing(sock)) {
    712 		result = ISC_R_CANCELED;
    713 		goto failure;
    714 	}
    715 
    716 	if (!sock->reading_throttled) {
    717 		result = isc__nm_start_reading(sock);
    718 		if (result != ISC_R_SUCCESS) {
    719 			goto failure;
    720 		}
    721 	}
    722 
    723 	sock->reading = true;
    724 
    725 	if (!sock->manual_read_timer) {
    726 		isc__nmsocket_timer_start(sock);
    727 	}
    728 
    729 	return;
    730 failure:
    731 	isc__nm_tcp_failed_read_cb(sock, result, true);
    732 }
    733 
    734 void
    735 isc__nm_tcp_read_stop(isc_nmhandle_t *handle) {
    736 	REQUIRE(VALID_NMHANDLE(handle));
    737 	REQUIRE(VALID_NMSOCK(handle->sock));
    738 
    739 	isc_nmsocket_t *sock = handle->sock;
    740 
    741 	if (!sock->manual_read_timer) {
    742 		isc__nmsocket_timer_stop(sock);
    743 	}
    744 	isc__nm_stop_reading(sock);
    745 	sock->reading = false;
    746 
    747 	return;
    748 }
    749 
    750 void
    751 isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
    752 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream);
    753 	isc__nm_uvreq_t *req = NULL;
    754 	isc_nm_t *netmgr = NULL;
    755 
    756 	REQUIRE(VALID_NMSOCK(sock));
    757 	REQUIRE(sock->tid == isc_tid());
    758 	REQUIRE(buf != NULL);
    759 
    760 	netmgr = sock->worker->netmgr;
    761 
    762 	if (isc__nmsocket_closing(sock)) {
    763 		isc__nm_tcp_failed_read_cb(sock, ISC_R_CANCELED, false);
    764 		goto free;
    765 	}
    766 
    767 	if (nread < 0) {
    768 		if (nread != UV_EOF) {
    769 			isc__nm_incstats(sock, STATID_RECVFAIL);
    770 		}
    771 
    772 		isc__nm_tcp_failed_read_cb(sock, isc_uverr2result(nread),
    773 					   false);
    774 
    775 		goto free;
    776 	}
    777 
    778 	req = isc__nm_get_read_req(sock, NULL);
    779 
    780 	/*
    781 	 * The callback will be called synchronously because the
    782 	 * result is ISC_R_SUCCESS, so we don't need to retain
    783 	 * the buffer
    784 	 */
    785 	req->uvbuf.base = buf->base;
    786 	req->uvbuf.len = nread;
    787 
    788 	if (!sock->client) {
    789 		sock->read_timeout =
    790 			sock->keepalive
    791 				? atomic_load_relaxed(&netmgr->keepalive)
    792 				: atomic_load_relaxed(&netmgr->idle);
    793 	}
    794 
    795 	isc__nm_readcb(sock, req, ISC_R_SUCCESS, false);
    796 
    797 	if (!sock->client && sock->reading) {
    798 		/*
    799 		 * Stop reading if we have accumulated enough bytes in the send
    800 		 * queue; this means that the TCP client is not reading back the
    801 		 * data we sending to it, and there's no reason to continue
    802 		 * processing more incoming DNS messages, if the client is not
    803 		 * reading back the responses.
    804 		 */
    805 		size_t write_queue_size =
    806 			uv_stream_get_write_queue_size(&sock->uv_handle.stream);
    807 
    808 		if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) {
    809 			isc__nmsocket_log(
    810 				sock, ISC_LOG_DEBUG(3),
    811 				"throttling TCP connection, the other side is "
    812 				"not reading the data (%zu)",
    813 				write_queue_size);
    814 			sock->reading_throttled = true;
    815 			isc__nm_stop_reading(sock);
    816 		}
    817 	} else if (uv_is_active(&sock->uv_handle.handle) &&
    818 		   !sock->manual_read_timer)
    819 	{
    820 		/* The readcb could have paused the reading */
    821 		/* The timer will be updated */
    822 		isc__nmsocket_timer_restart(sock);
    823 	}
    824 
    825 free:
    826 	if (nread < 0) {
    827 		/*
    828 		 * The buffer may be a null buffer on error.
    829 		 */
    830 		if (buf->base == NULL && buf->len == 0) {
    831 			return;
    832 		}
    833 	}
    834 
    835 	isc__nm_free_uvbuf(sock, buf);
    836 }
    837 
    838 /*
    839  * This is called after we get a quota_accept_cb() callback.
    840  */
    841 static void
    842 tcpaccept_cb(void *arg) {
    843 	isc_nmsocket_t *csock = arg;
    844 	isc_nmsocket_t *ssock = csock->server;
    845 
    846 	REQUIRE(VALID_NMSOCK(csock));
    847 	REQUIRE(csock->tid == isc_tid());
    848 
    849 	isc_result_t result = accept_connection(csock);
    850 	isc__nm_accept_connection_log(ssock, result, can_log_tcp_quota());
    851 	isc__nmsocket_detach(&csock);
    852 }
    853 
    854 static void
    855 quota_accept_cb(void *arg) {
    856 	isc_nmsocket_t *csock = arg;
    857 	isc_nmsocket_t *ssock = csock->server;
    858 
    859 	REQUIRE(VALID_NMSOCK(csock));
    860 
    861 	/*
    862 	 * This needs to be asynchronous, because the quota might have been
    863 	 * released by a different child socket.
    864 	 */
    865 	if (csock->tid == isc_tid()) {
    866 		isc_result_t result = accept_connection(csock);
    867 		isc__nm_accept_connection_log(ssock, result,
    868 					      can_log_tcp_quota());
    869 	} else {
    870 		isc__nmsocket_attach(csock, &(isc_nmsocket_t *){ NULL });
    871 		isc_async_run(csock->worker->loop, tcpaccept_cb, csock);
    872 	}
    873 }
    874 
    875 static isc_result_t
    876 accept_connection(isc_nmsocket_t *csock) {
    877 	int r;
    878 	isc_result_t result;
    879 	struct sockaddr_storage ss;
    880 	isc_sockaddr_t local;
    881 	isc_nmhandle_t *handle = NULL;
    882 
    883 	REQUIRE(VALID_NMSOCK(csock));
    884 	REQUIRE(VALID_NMSOCK(csock->server));
    885 	REQUIRE(csock->tid == isc_tid());
    886 
    887 	csock->accepting = true;
    888 	csock->accept_cb = csock->server->accept_cb;
    889 	csock->accept_cbarg = csock->server->accept_cbarg;
    890 	csock->recv_cb = csock->server->recv_cb;
    891 	csock->recv_cbarg = csock->server->recv_cbarg;
    892 	csock->read_timeout = atomic_load_relaxed(&csock->worker->netmgr->init);
    893 
    894 	r = uv_tcp_init(&csock->worker->loop->loop, &csock->uv_handle.tcp);
    895 	UV_RUNTIME_CHECK(uv_tcp_init, r);
    896 	uv_handle_set_data(&csock->uv_handle.handle, csock);
    897 
    898 	r = uv_timer_init(&csock->worker->loop->loop, &csock->read_timer);
    899 	UV_RUNTIME_CHECK(uv_timer_init, r);
    900 	uv_handle_set_data((uv_handle_t *)&csock->read_timer, csock);
    901 
    902 	if (csock->server->pquota != NULL) {
    903 		isc__nm_incstats(csock, STATID_CLIENTS);
    904 	}
    905 
    906 	/*
    907 	 * We need to initialize the tcp and timer before failing because
    908 	 * isc__nm_tcp_close() can't handle uninitalized TCP nmsocket.
    909 	 */
    910 	if (isc__nmsocket_closing(csock)) {
    911 		result = ISC_R_CANCELED;
    912 		goto failure;
    913 	}
    914 
    915 	r = uv_accept(&csock->server->uv_handle.stream,
    916 		      &csock->uv_handle.stream);
    917 	if (r != 0) {
    918 		result = isc_uverr2result(r);
    919 		goto failure;
    920 	}
    921 
    922 	/* Check if the connection is not expired */
    923 	if (csock->quota_accept_ts != 0) {
    924 		/* The timestamp is given in nanoseconds */
    925 		const uint64_t time_elapsed_ms =
    926 			(isc_time_monotonic() - csock->quota_accept_ts) /
    927 			NS_PER_MS;
    928 
    929 		if (time_elapsed_ms >= csock->read_timeout) {
    930 			/*
    931 			 * At this point we have received a connection from a
    932 			 * queue of accepted connections (via uv_accept()), but
    933 			 * it has expired. We cannot do anything better than
    934 			 * drop it on the floor at this point.
    935 			 */
    936 			result = ISC_R_TIMEDOUT;
    937 			goto failure;
    938 		} else {
    939 			/* Adjust the initial read timeout accordingly */
    940 			csock->read_timeout -= time_elapsed_ms;
    941 		}
    942 	}
    943 
    944 	r = uv_tcp_getpeername(&csock->uv_handle.tcp, (struct sockaddr *)&ss,
    945 			       &(int){ sizeof(ss) });
    946 	if (r != 0) {
    947 		result = isc_uverr2result(r);
    948 		goto failure;
    949 	}
    950 
    951 	result = isc_sockaddr_fromsockaddr(&csock->peer,
    952 					   (struct sockaddr *)&ss);
    953 	if (result != ISC_R_SUCCESS) {
    954 		goto failure;
    955 	}
    956 
    957 	r = uv_tcp_getsockname(&csock->uv_handle.tcp, (struct sockaddr *)&ss,
    958 			       &(int){ sizeof(ss) });
    959 	if (r != 0) {
    960 		result = isc_uverr2result(r);
    961 		goto failure;
    962 	}
    963 
    964 	result = isc_sockaddr_fromsockaddr(&local, (struct sockaddr *)&ss);
    965 	if (result != ISC_R_SUCCESS) {
    966 		goto failure;
    967 	}
    968 
    969 	handle = isc__nmhandle_get(csock, NULL, &local);
    970 
    971 	result = csock->accept_cb(handle, ISC_R_SUCCESS, csock->accept_cbarg);
    972 	if (result != ISC_R_SUCCESS) {
    973 		isc_nmhandle_detach(&handle);
    974 		goto failure;
    975 	}
    976 
    977 	csock->accepting = false;
    978 
    979 	isc__nm_incstats(csock, STATID_ACCEPT);
    980 
    981 	/*
    982 	 * The acceptcb needs to attach to the handle if it wants to keep the
    983 	 * connection alive
    984 	 */
    985 	isc_nmhandle_detach(&handle);
    986 
    987 	/*
    988 	 * sock is now attached to the handle.
    989 	 */
    990 	isc__nmsocket_detach(&csock);
    991 
    992 	return ISC_R_SUCCESS;
    993 
    994 failure:
    995 	csock->active = false;
    996 	csock->accepting = false;
    997 
    998 	if (result != ISC_R_NOTCONNECTED) {
    999 		/* IGNORE: The client disconnected before we could accept */
   1000 		isc__nmsocket_log(csock, ISC_LOG_ERROR,
   1001 				  "Accepting TCP connection failed: %s",
   1002 				  isc_result_totext(result));
   1003 	}
   1004 
   1005 	isc__nmsocket_prep_destroy(csock);
   1006 
   1007 	isc__nmsocket_detach(&csock);
   1008 
   1009 	return result;
   1010 }
   1011 
   1012 static void
   1013 tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, isc_nm_cb_t cb,
   1014 	 void *cbarg, const bool dnsmsg) {
   1015 	REQUIRE(VALID_NMHANDLE(handle));
   1016 	REQUIRE(VALID_NMSOCK(handle->sock));
   1017 
   1018 	isc_nmsocket_t *sock = handle->sock;
   1019 	isc_result_t result;
   1020 	isc__nm_uvreq_t *uvreq = NULL;
   1021 	isc_nm_t *netmgr = sock->worker->netmgr;
   1022 
   1023 	REQUIRE(sock->type == isc_nm_tcpsocket);
   1024 	REQUIRE(sock->tid == isc_tid());
   1025 
   1026 	uvreq = isc__nm_uvreq_get(sock);
   1027 	if (dnsmsg) {
   1028 		*(uint16_t *)uvreq->tcplen = htons(region->length);
   1029 	}
   1030 	uvreq->uvbuf.base = (char *)region->base;
   1031 	uvreq->uvbuf.len = region->length;
   1032 
   1033 	isc_nmhandle_attach(handle, &uvreq->handle);
   1034 
   1035 	uvreq->cb.send = cb;
   1036 	uvreq->cbarg = cbarg;
   1037 
   1038 	if (sock->write_timeout == 0) {
   1039 		sock->write_timeout =
   1040 			sock->keepalive
   1041 				? atomic_load_relaxed(&netmgr->keepalive)
   1042 				: atomic_load_relaxed(&netmgr->idle);
   1043 	}
   1044 
   1045 	result = tcp_send_direct(sock, uvreq);
   1046 	if (result != ISC_R_SUCCESS) {
   1047 		isc__nm_incstats(sock, STATID_SENDFAIL);
   1048 		isc__nm_failed_send_cb(sock, uvreq, result, true);
   1049 	}
   1050 
   1051 	return;
   1052 }
   1053 
   1054 void
   1055 isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region,
   1056 		 isc_nm_cb_t cb, void *cbarg) {
   1057 	tcp_send(handle, region, cb, cbarg, false);
   1058 }
   1059 
   1060 void
   1061 isc__nm_tcp_senddns(isc_nmhandle_t *handle, const isc_region_t *region,
   1062 		    isc_nm_cb_t cb, void *cbarg) {
   1063 	tcp_send(handle, region, cb, cbarg, true);
   1064 }
   1065 
   1066 static void
   1067 tcp_maybe_restart_reading(isc_nmsocket_t *sock) {
   1068 	if (!sock->client && sock->reading &&
   1069 	    !uv_is_active(&sock->uv_handle.handle))
   1070 	{
   1071 		/*
   1072 		 * Restart reading if we have less data in the send queue than
   1073 		 * the send buffer size, this means that the TCP client has
   1074 		 * started reading some data again.  Starting reading when we go
   1075 		 * under the limit instead of waiting for all data has been
   1076 		 * flushed allows faster recovery (in case there was a
   1077 		 * congestion and now there isn't).
   1078 		 */
   1079 		size_t write_queue_size =
   1080 			uv_stream_get_write_queue_size(&sock->uv_handle.stream);
   1081 		if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) {
   1082 			isc__nmsocket_log(
   1083 				sock, ISC_LOG_DEBUG(3),
   1084 				"resuming TCP connection, the other side  "
   1085 				"is reading the data again (%zu)",
   1086 				write_queue_size);
   1087 			isc__nm_start_reading(sock);
   1088 			sock->reading_throttled = false;
   1089 		}
   1090 	}
   1091 }
   1092 
   1093 static void
   1094 tcp_send_cb(uv_write_t *req, int status) {
   1095 	isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
   1096 	isc_nmsocket_t *sock = NULL;
   1097 
   1098 	REQUIRE(VALID_UVREQ(uvreq));
   1099 	REQUIRE(VALID_NMSOCK(uvreq->sock));
   1100 
   1101 	sock = uvreq->sock;
   1102 
   1103 	isc_nm_timer_stop(uvreq->timer);
   1104 	isc_nm_timer_detach(&uvreq->timer);
   1105 
   1106 	if (status < 0) {
   1107 		isc__nm_incstats(sock, STATID_SENDFAIL);
   1108 		isc__nm_failed_send_cb(sock, uvreq, isc_uverr2result(status),
   1109 				       false);
   1110 		if (!sock->client && sock->reading) {
   1111 			/*
   1112 			 * As we are resuming reading, it is not throttled
   1113 			 * anymore (technically).
   1114 			 */
   1115 			sock->reading_throttled = false;
   1116 			isc__nm_start_reading(sock);
   1117 			isc__nmsocket_reset(sock);
   1118 		}
   1119 		return;
   1120 	}
   1121 
   1122 	isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
   1123 	tcp_maybe_restart_reading(sock);
   1124 }
   1125 
   1126 static isc_result_t
   1127 tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
   1128 	REQUIRE(VALID_NMSOCK(sock));
   1129 	REQUIRE(VALID_UVREQ(req));
   1130 	REQUIRE(sock->tid == isc_tid());
   1131 	REQUIRE(sock->type == isc_nm_tcpsocket);
   1132 
   1133 	int r;
   1134 	uv_buf_t bufs[2] = { { 0 }, { 0 } }; /* ugly, but required for old GCC
   1135 						versions */
   1136 	size_t nbufs = 1;
   1137 
   1138 	if (isc__nmsocket_closing(sock)) {
   1139 		return ISC_R_CANCELED;
   1140 	}
   1141 
   1142 	/* Check if we are not trying to send a DNS message */
   1143 	if (*(uint16_t *)req->tcplen == 0) {
   1144 		bufs[0].base = req->uvbuf.base;
   1145 		bufs[0].len = req->uvbuf.len;
   1146 
   1147 		r = uv_try_write(&sock->uv_handle.stream, bufs, nbufs);
   1148 
   1149 		if (r == (int)(bufs[0].len)) {
   1150 			/* Wrote everything */
   1151 			isc__nm_sendcb(sock, req, ISC_R_SUCCESS, true);
   1152 			tcp_maybe_restart_reading(sock);
   1153 			return ISC_R_SUCCESS;
   1154 		} else if (r > 0) {
   1155 			bufs[0].base += (size_t)r;
   1156 			bufs[0].len -= (size_t)r;
   1157 		} else if (!(r == UV_ENOSYS || r == UV_EAGAIN)) {
   1158 			return isc_uverr2result(r);
   1159 		}
   1160 	} else {
   1161 		nbufs = 2;
   1162 		bufs[0].base = req->tcplen;
   1163 		bufs[0].len = 2;
   1164 		bufs[1].base = req->uvbuf.base;
   1165 		bufs[1].len = req->uvbuf.len;
   1166 
   1167 		r = uv_try_write(&sock->uv_handle.stream, bufs, nbufs);
   1168 
   1169 		if (r == (int)(bufs[0].len + bufs[1].len)) {
   1170 			/* Wrote everything */
   1171 			isc__nm_sendcb(sock, req, ISC_R_SUCCESS, true);
   1172 			tcp_maybe_restart_reading(sock);
   1173 			return ISC_R_SUCCESS;
   1174 		} else if (r == 1) {
   1175 			/* Partial write of DNSMSG length */
   1176 			bufs[0].base = req->tcplen + 1;
   1177 			bufs[0].len = 1;
   1178 		} else if (r > 0) {
   1179 			/* Partial write of DNSMSG */
   1180 			nbufs = 1;
   1181 			bufs[0].base = req->uvbuf.base + (r - 2);
   1182 			bufs[0].len = req->uvbuf.len - (r - 2);
   1183 		} else if (!(r == UV_ENOSYS || r == UV_EAGAIN)) {
   1184 			return isc_uverr2result(r);
   1185 		}
   1186 	}
   1187 
   1188 	if (!sock->client && sock->reading) {
   1189 		sock->reading_throttled = true;
   1190 		isc__nm_stop_reading(sock);
   1191 	}
   1192 	isc__nmsocket_log(sock, ISC_LOG_DEBUG(3),
   1193 			  "%sthe other side is not "
   1194 			  "reading the data, switching to uv_write()",
   1195 			  !sock->client && sock->reading
   1196 				  ? "throttling TCP connection, "
   1197 				  : "");
   1198 
   1199 	r = uv_write(&req->uv_req.write, &sock->uv_handle.stream, bufs, nbufs,
   1200 		     tcp_send_cb);
   1201 	if (r < 0) {
   1202 		return isc_uverr2result(r);
   1203 	}
   1204 
   1205 	isc_nm_timer_create(req->handle, isc__nmsocket_writetimeout_cb, req,
   1206 			    &req->timer);
   1207 	if (sock->write_timeout > 0) {
   1208 		isc_nm_timer_start(req->timer, sock->write_timeout);
   1209 	}
   1210 
   1211 	return ISC_R_SUCCESS;
   1212 }
   1213 
   1214 static void
   1215 tcp_close_sock(isc_nmsocket_t *sock) {
   1216 	REQUIRE(VALID_NMSOCK(sock));
   1217 	REQUIRE(sock->tid == isc_tid());
   1218 	REQUIRE(sock->closing);
   1219 	REQUIRE(!sock->closed);
   1220 
   1221 	sock->closed = true;
   1222 	sock->connected = false;
   1223 
   1224 	isc__nm_incstats(sock, STATID_CLOSE);
   1225 
   1226 	if (sock->server != NULL) {
   1227 		if (sock->server->pquota != NULL) {
   1228 			isc__nm_decstats(sock, STATID_CLIENTS);
   1229 			isc_quota_release(sock->server->pquota);
   1230 		}
   1231 		isc__nmsocket_detach(&sock->server);
   1232 	}
   1233 
   1234 	isc__nmsocket_prep_destroy(sock);
   1235 }
   1236 
   1237 static void
   1238 tcp_close_cb(uv_handle_t *handle) {
   1239 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
   1240 	uv_handle_set_data(handle, NULL);
   1241 
   1242 	tcp_close_sock(sock);
   1243 }
   1244 
   1245 void
   1246 isc__nm_tcp_close(isc_nmsocket_t *sock) {
   1247 	REQUIRE(VALID_NMSOCK(sock));
   1248 	REQUIRE(sock->type == isc_nm_tcpsocket);
   1249 	REQUIRE(!isc__nmsocket_active(sock));
   1250 	REQUIRE(sock->tid == isc_tid());
   1251 	REQUIRE(sock->parent == NULL);
   1252 	REQUIRE(!sock->closing);
   1253 
   1254 	sock->closing = true;
   1255 
   1256 	/*
   1257 	 * The order of the close operation is important here, the uv_close()
   1258 	 * gets scheduled in the reverse order, so we need to close the timer
   1259 	 * last, so its gone by the time we destroy the socket
   1260 	 */
   1261 
   1262 	if (!uv_is_closing(&sock->uv_handle.handle)) {
   1263 		/* Normal order of operation */
   1264 
   1265 		/* 2. close the socket + destroy the socket in callback */
   1266 		isc__nmsocket_clearcb(sock);
   1267 		isc__nm_stop_reading(sock);
   1268 		sock->reading = false;
   1269 		uv_close(&sock->uv_handle.handle, tcp_close_cb);
   1270 
   1271 		/* 1. close the timer */
   1272 		isc__nmsocket_timer_stop(sock);
   1273 		uv_close((uv_handle_t *)&sock->read_timer, NULL);
   1274 	} else {
   1275 		/* The socket was already closed elsewhere */
   1276 
   1277 		/* 1. close the timer + destroy the socket in callback */
   1278 		isc__nmsocket_timer_stop(sock);
   1279 		uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
   1280 		uv_close((uv_handle_t *)&sock->read_timer, tcp_close_cb);
   1281 	}
   1282 }
   1283 
   1284 static void
   1285 tcp_close_connect_cb(uv_handle_t *handle) {
   1286 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
   1287 
   1288 	REQUIRE(VALID_NMSOCK(sock));
   1289 
   1290 	REQUIRE(sock->tid == isc_tid());
   1291 
   1292 	isc__nmsocket_prep_destroy(sock);
   1293 	isc__nmsocket_detach(&sock);
   1294 }
   1295 
   1296 void
   1297 isc__nm_tcp_shutdown(isc_nmsocket_t *sock) {
   1298 	REQUIRE(VALID_NMSOCK(sock));
   1299 	REQUIRE(sock->tid == isc_tid());
   1300 	REQUIRE(sock->type == isc_nm_tcpsocket);
   1301 
   1302 	/*
   1303 	 * If the socket is active, mark it inactive and
   1304 	 * continue. If it isn't active, stop now.
   1305 	 */
   1306 	if (!sock->active) {
   1307 		return;
   1308 	}
   1309 	sock->active = false;
   1310 
   1311 	INSIST(!sock->accepting);
   1312 
   1313 	if (sock->connecting) {
   1314 		isc_nmsocket_t *tsock = NULL;
   1315 		isc__nmsocket_attach(sock, &tsock);
   1316 		uv_close(&sock->uv_handle.handle, tcp_close_connect_cb);
   1317 		return;
   1318 	}
   1319 
   1320 	/* There's a handle attached to the socket (from accept or connect) */
   1321 	if (sock->statichandle) {
   1322 		isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false);
   1323 		return;
   1324 	}
   1325 
   1326 	/* Destroy the non-listening socket */
   1327 	if (sock->parent == NULL) {
   1328 		isc__nmsocket_prep_destroy(sock);
   1329 		return;
   1330 	}
   1331 
   1332 	/* Destroy the listening socket if on the same loop */
   1333 	if (sock->tid == sock->parent->tid) {
   1334 		isc__nmsocket_prep_destroy(sock->parent);
   1335 	}
   1336 }
   1337 
   1338 void
   1339 isc__nmhandle_tcp_set_manual_timer(isc_nmhandle_t *handle, const bool manual) {
   1340 	isc_nmsocket_t *sock;
   1341 
   1342 	REQUIRE(VALID_NMHANDLE(handle));
   1343 	sock = handle->sock;
   1344 	REQUIRE(VALID_NMSOCK(sock));
   1345 	REQUIRE(sock->type == isc_nm_tcpsocket);
   1346 	REQUIRE(sock->tid == isc_tid());
   1347 	REQUIRE(!uv_is_active(&sock->uv_handle.handle));
   1348 
   1349 	sock->manual_read_timer = manual;
   1350 }
   1351