Home | History | Annotate | Line # | Download | only in dist
xfrd-tcp.c revision 1.1.1.5.2.1
      1 /*
      2  * xfrd-tcp.c - XFR (transfer) Daemon TCP system source file. Manages tcp conn.
      3  *
      4  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
      5  *
      6  * See LICENSE for the license.
      7  *
      8  */
      9 
     10 #include "config.h"
     11 #include <assert.h>
     12 #include <errno.h>
     13 #include <fcntl.h>
     14 #include <unistd.h>
     15 #include <stdlib.h>
     16 #include <sys/uio.h>
     17 #include "nsd.h"
     18 #include "xfrd-tcp.h"
     19 #include "buffer.h"
     20 #include "packet.h"
     21 #include "dname.h"
     22 #include "options.h"
     23 #include "namedb.h"
     24 #include "xfrd.h"
     25 #include "xfrd-disk.h"
     26 #include "util.h"
     27 #ifdef HAVE_TLS_1_3
     28 #include <openssl/ssl.h>
     29 #include <openssl/err.h>
     30 #endif
     31 
     32 #ifdef HAVE_TLS_1_3
     33 void log_crypto_err(const char* str); /* in server.c */
     34 
     35 static SSL_CTX*
     36 create_ssl_context()
     37 {
     38 	SSL_CTX *ctx;
     39 	unsigned char protos[] = { 3, 'd', 'o', 't' };
     40 	ctx = SSL_CTX_new(TLS_client_method());
     41 	if (!ctx) {
     42 		log_msg(LOG_ERR, "xfrd tls: Unable to create SSL ctxt");
     43 	}
     44 	else if (SSL_CTX_set_default_verify_paths(ctx) != 1) {
     45 		SSL_CTX_free(ctx);
     46 		log_msg(LOG_ERR, "xfrd tls: Unable to set default SSL verify paths");
     47 		return NULL;
     48 	}
     49 	/* Only trust 1.3 as per the specification */
     50 	else if (!SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION)) {
     51 		SSL_CTX_free(ctx);
     52 		log_msg(LOG_ERR, "xfrd tls: Unable to set minimum TLS version 1.3");
     53 		return NULL;
     54 	}
     55 
     56 	if (SSL_CTX_set_alpn_protos(ctx, protos, sizeof(protos)) != 0) {
     57 		SSL_CTX_free(ctx);
     58 		log_msg(LOG_ERR, "xfrd tls: Unable to set ALPN protocols");
     59 		return NULL;
     60 	}
     61 	return ctx;
     62 }
     63 
     64 static int
     65 tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
     66 {
     67 	int err = X509_STORE_CTX_get_error(ctx);
     68 	int depth = X509_STORE_CTX_get_error_depth(ctx);
     69 
     70 	// report the specific cert error here - will need custom verify code if
     71 	// SPKI pins are supported
     72 	if (!preverify_ok)
     73 		log_msg(LOG_ERR, "xfrd tls: TLS verify failed - (%d) depth: %d error: %s",
     74 				err,
     75 				depth,
     76 				X509_verify_cert_error_string(err));
     77 	return preverify_ok;
     78 }
     79 
     80 static int
     81 setup_ssl(struct xfrd_tcp_pipeline* tp, struct xfrd_tcp_set* tcp_set,
     82 		  const char* auth_domain_name)
     83 {
     84 	if (!tcp_set->ssl_ctx) {
     85 		log_msg(LOG_ERR, "xfrd tls: No TLS CTX, cannot set up XFR-over-TLS");
     86 		return 0;
     87 	}
     88 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: setting up TLS for tls_auth domain name %s",
     89 						 auth_domain_name));
     90 	tp->ssl = SSL_new((SSL_CTX*)tcp_set->ssl_ctx);
     91 	if(!tp->ssl) {
     92 		log_msg(LOG_ERR, "xfrd tls: Unable to create TLS object");
     93 		return 0;
     94 	}
     95 	SSL_set_connect_state(tp->ssl);
     96 	(void)SSL_set_mode(tp->ssl, SSL_MODE_AUTO_RETRY);
     97 	if(!SSL_set_fd(tp->ssl, tp->tcp_w->fd)) {
     98 		log_msg(LOG_ERR, "xfrd tls: Unable to set TLS fd");
     99 		SSL_free(tp->ssl);
    100 		tp->ssl = NULL;
    101 		return 0;
    102 	}
    103 
    104 	SSL_set_verify(tp->ssl, SSL_VERIFY_PEER, tls_verify_callback);
    105 	if(!SSL_set1_host(tp->ssl, auth_domain_name)) {
    106 		log_msg(LOG_ERR, "xfrd tls: TLS setting of hostname %s failed",
    107 		auth_domain_name);
    108 		SSL_free(tp->ssl);
    109 		tp->ssl = NULL;
    110 		return 0;
    111 	}
    112 	return 1;
    113 }
    114 
    115 static int
    116 ssl_handshake(struct xfrd_tcp_pipeline* tp)
    117 {
    118 	int ret;
    119 
    120 	ERR_clear_error();
    121 	ret = SSL_do_handshake(tp->ssl);
    122 	if(ret == 1) {
    123 		DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: TLS handshake successful"));
    124 		tp->handshake_done = 1;
    125 		return 1;
    126 	}
    127 	tp->handshake_want = SSL_get_error(tp->ssl, ret);
    128 	if(tp->handshake_want == SSL_ERROR_WANT_READ
    129 	|| tp->handshake_want == SSL_ERROR_WANT_WRITE)
    130 		return 1;
    131 
    132 	return 0;
    133 }
    134 
    135 int password_cb(char *buf, int size, int ATTR_UNUSED(rwflag), void *u)
    136 {
    137 	strlcpy(buf, (char*)u, size);
    138 	return strlen(buf);
    139 }
    140 
    141 #endif
    142 
    143 /* sort tcppipe, first on IP address, for an IPaddresss, sort on num_unused */
    144 static int
    145 xfrd_pipe_cmp(const void* a, const void* b)
    146 {
    147 	const struct xfrd_tcp_pipeline* x = (struct xfrd_tcp_pipeline*)a;
    148 	const struct xfrd_tcp_pipeline* y = (struct xfrd_tcp_pipeline*)b;
    149 	int r;
    150 	if(x == y)
    151 		return 0;
    152 	if(y->key.ip_len != x->key.ip_len)
    153 		/* subtraction works because nonnegative and small numbers */
    154 		return (int)y->key.ip_len - (int)x->key.ip_len;
    155 	r = memcmp(&x->key.ip, &y->key.ip, x->key.ip_len);
    156 	if(r != 0)
    157 		return r;
    158 	/* sort that num_unused is sorted ascending, */
    159 	if(x->key.num_unused != y->key.num_unused) {
    160 		return (x->key.num_unused < y->key.num_unused) ? -1 : 1;
    161 	}
    162 	/* different pipelines are different still, even with same numunused*/
    163 	return (uintptr_t)x < (uintptr_t)y ? -1 : 1;
    164 }
    165 
    166 struct xfrd_tcp_set* xfrd_tcp_set_create(struct region* region, const char *tls_cert_bundle, int tcp_max, int tcp_pipeline)
    167 {
    168 	int i;
    169 	struct xfrd_tcp_set* tcp_set = region_alloc(region,
    170 		sizeof(struct xfrd_tcp_set));
    171 	memset(tcp_set, 0, sizeof(struct xfrd_tcp_set));
    172 	tcp_set->tcp_state = NULL;
    173 	tcp_set->tcp_max = tcp_max;
    174 	tcp_set->tcp_pipeline = tcp_pipeline;
    175 	tcp_set->tcp_count = 0;
    176 	tcp_set->tcp_waiting_first = 0;
    177 	tcp_set->tcp_waiting_last = 0;
    178 #ifdef HAVE_TLS_1_3
    179 	/* Set up SSL context */
    180 	tcp_set->ssl_ctx = create_ssl_context();
    181 	if (tcp_set->ssl_ctx == NULL)
    182 		log_msg(LOG_ERR, "xfrd: XFR-over-TLS not available");
    183 
    184 	else if (tls_cert_bundle && tls_cert_bundle[0] && SSL_CTX_load_verify_locations(
    185 				tcp_set->ssl_ctx, tls_cert_bundle, NULL) != 1) {
    186 		log_msg(LOG_ERR, "xfrd tls: Unable to set the certificate bundle file %s",
    187 				tls_cert_bundle);
    188 	}
    189 #else
    190 	(void)tls_cert_bundle;
    191 	log_msg(LOG_INFO, "xfrd: No TLS 1.3 support - XFR-over-TLS not available");
    192 #endif
    193 	tcp_set->tcp_state = region_alloc(region,
    194 		sizeof(*tcp_set->tcp_state)*tcp_set->tcp_max);
    195 	for(i=0; i<tcp_set->tcp_max; i++)
    196 		tcp_set->tcp_state[i] = xfrd_tcp_pipeline_create(region,
    197 			tcp_pipeline);
    198 	tcp_set->pipetree = rbtree_create(region, &xfrd_pipe_cmp);
    199 	return tcp_set;
    200 }
    201 
    202 static int pipeline_id_compare(const void* x, const void* y)
    203 {
    204 	struct xfrd_tcp_pipeline_id* a = (struct xfrd_tcp_pipeline_id*)x;
    205 	struct xfrd_tcp_pipeline_id* b = (struct xfrd_tcp_pipeline_id*)y;
    206 	if(a->id < b->id)
    207 		return -1;
    208 	if(a->id > b->id)
    209 		return 1;
    210 	return 0;
    211 }
    212 
    213 void pick_id_values(uint16_t* array, int num, int max)
    214 {
    215 	uint8_t inserted[65536];
    216 	int j, done;
    217 	if(num == 65536) {
    218 		/* all of them, loop and insert */
    219 		int i;
    220 		for(i=0; i<num; i++)
    221 			array[i] = (uint16_t)i;
    222 		return;
    223 	}
    224 	assert(max <= 65536);
    225 	/* This uses the Robert Floyd sampling algorithm */
    226 	/* keep track if values are already inserted, using the bitmap
    227 	 * in insert array */
    228 	memset(inserted, 0, sizeof(inserted[0])*max);
    229 	done=0;
    230 	for(j = max-num; j<max; j++) {
    231 		/* random generate creates from 0..arg-1 */
    232 		int t;
    233 		if(j+1 <= 1)
    234 			t = 0;
    235 		else	t = random_generate(j+1);
    236 		if(!inserted[t]) {
    237 			array[done++]=t;
    238 			inserted[t] = 1;
    239 		} else {
    240 			array[done++]=j;
    241 			inserted[j] = 1;
    242 		}
    243 	}
    244 }
    245 
    246 static void
    247 clear_pipeline_entry(struct xfrd_tcp_pipeline* tp, rbnode_type* node)
    248 {
    249 	struct xfrd_tcp_pipeline_id *n;
    250 	if(node == NULL || node == RBTREE_NULL)
    251 		return;
    252 	clear_pipeline_entry(tp, node->left);
    253 	node->left = NULL;
    254 	clear_pipeline_entry(tp, node->right);
    255 	node->right = NULL;
    256 	/* move the node into the free list */
    257 	n = (struct xfrd_tcp_pipeline_id*)node;
    258 	n->next_free = tp->pipe_id_free_list;
    259 	tp->pipe_id_free_list = n;
    260 }
    261 
    262 static void
    263 xfrd_tcp_pipeline_cleanup(struct xfrd_tcp_pipeline* tp)
    264 {
    265 	/* move entries into free list */
    266 	clear_pipeline_entry(tp, tp->zone_per_id->root);
    267 	/* clear the tree */
    268 	tp->zone_per_id->count = 0;
    269 	tp->zone_per_id->root = RBTREE_NULL;
    270 }
    271 
    272 static void
    273 xfrd_tcp_pipeline_init(struct xfrd_tcp_pipeline* tp)
    274 {
    275 	tp->key.node.key = tp;
    276 	tp->key.num_unused = tp->pipe_num;
    277 	tp->key.num_skip = 0;
    278 	tp->tcp_send_first = NULL;
    279 	tp->tcp_send_last = NULL;
    280 	xfrd_tcp_pipeline_cleanup(tp);
    281 	pick_id_values(tp->unused, tp->pipe_num, 65536);
    282 }
    283 
    284 struct xfrd_tcp_pipeline*
    285 xfrd_tcp_pipeline_create(region_type* region, int tcp_pipeline)
    286 {
    287 	int i;
    288 	struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)
    289 		region_alloc_zero(region, sizeof(*tp));
    290 	if(tcp_pipeline < 0)
    291 		tcp_pipeline = 0;
    292 	if(tcp_pipeline > 65536)
    293 		tcp_pipeline = 65536; /* max 16 bit ID numbers */
    294 	tp->pipe_num = tcp_pipeline;
    295 	tp->key.num_unused = tp->pipe_num;
    296 	tp->zone_per_id = rbtree_create(region, &pipeline_id_compare);
    297 	tp->pipe_id_free_list = NULL;
    298 	for(i=0; i<tp->pipe_num; i++) {
    299 		struct xfrd_tcp_pipeline_id* n = (struct xfrd_tcp_pipeline_id*)
    300 			region_alloc_zero(region, sizeof(*n));
    301 		n->next_free = tp->pipe_id_free_list;
    302 		tp->pipe_id_free_list = n;
    303 	}
    304 	tp->unused = (uint16_t*)region_alloc_zero(region,
    305 		sizeof(tp->unused[0])*tp->pipe_num);
    306 	tp->tcp_r = xfrd_tcp_create(region, QIOBUFSZ);
    307 	tp->tcp_w = xfrd_tcp_create(region, 512);
    308 	xfrd_tcp_pipeline_init(tp);
    309 	return tp;
    310 }
    311 
    312 static struct xfrd_zone*
    313 xfrd_tcp_pipeline_lookup_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
    314 {
    315 	struct xfrd_tcp_pipeline_id key;
    316 	rbnode_type* n;
    317 	memset(&key, 0, sizeof(key));
    318 	key.node.key = &key;
    319 	key.id = id;
    320 	n = rbtree_search(tp->zone_per_id, &key);
    321 	if(n && n != RBTREE_NULL) {
    322 		return ((struct xfrd_tcp_pipeline_id*)n)->zone;
    323 	}
    324 	return NULL;
    325 }
    326 
    327 static void
    328 xfrd_tcp_pipeline_insert_id(struct xfrd_tcp_pipeline* tp, uint16_t id,
    329 	struct xfrd_zone* zone)
    330 {
    331 	struct xfrd_tcp_pipeline_id* n;
    332 	/* because there are tp->pipe_num preallocated entries, and we have
    333 	 * only tp->pipe_num id values, the list cannot be empty now. */
    334 	assert(tp->pipe_id_free_list != NULL);
    335 	/* pick up next free xfrd_tcp_pipeline_id node */
    336 	n = tp->pipe_id_free_list;
    337 	tp->pipe_id_free_list = n->next_free;
    338 	n->next_free = NULL;
    339 	memset(&n->node, 0, sizeof(n->node));
    340 	n->node.key = n;
    341 	n->id = id;
    342 	n->zone = zone;
    343 	rbtree_insert(tp->zone_per_id, &n->node);
    344 }
    345 
    346 static void
    347 xfrd_tcp_pipeline_remove_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
    348 {
    349 	struct xfrd_tcp_pipeline_id key;
    350 	rbnode_type* node;
    351 	memset(&key, 0, sizeof(key));
    352 	key.node.key = &key;
    353 	key.id = id;
    354 	node = rbtree_delete(tp->zone_per_id, &key);
    355 	if(node && node != RBTREE_NULL) {
    356 		struct xfrd_tcp_pipeline_id* n =
    357 			(struct xfrd_tcp_pipeline_id*)node;
    358 		n->next_free = tp->pipe_id_free_list;
    359 		tp->pipe_id_free_list = n;
    360 	}
    361 }
    362 
    363 static void
    364 xfrd_tcp_pipeline_skip_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
    365 {
    366 	struct xfrd_tcp_pipeline_id key;
    367 	rbnode_type* n;
    368 	memset(&key, 0, sizeof(key));
    369 	key.node.key = &key;
    370 	key.id = id;
    371 	n = rbtree_search(tp->zone_per_id, &key);
    372 	if(n && n != RBTREE_NULL) {
    373 		struct xfrd_tcp_pipeline_id* zid = (struct xfrd_tcp_pipeline_id*)n;
    374 		zid->zone = TCP_NULL_SKIP;
    375 	}
    376 }
    377 
    378 void
    379 xfrd_setup_packet(buffer_type* packet,
    380 	uint16_t type, uint16_t klass, const dname_type* dname, uint16_t qid)
    381 {
    382 	/* Set up the header */
    383 	buffer_clear(packet);
    384 	ID_SET(packet, qid);
    385 	FLAGS_SET(packet, 0);
    386 	OPCODE_SET(packet, OPCODE_QUERY);
    387 	QDCOUNT_SET(packet, 1);
    388 	ANCOUNT_SET(packet, 0);
    389 	NSCOUNT_SET(packet, 0);
    390 	ARCOUNT_SET(packet, 0);
    391 	buffer_skip(packet, QHEADERSZ);
    392 
    393 	/* The question record. */
    394 	buffer_write(packet, dname_name(dname), dname->name_size);
    395 	buffer_write_u16(packet, type);
    396 	buffer_write_u16(packet, klass);
    397 }
    398 
    399 static socklen_t
    400 #ifdef INET6
    401 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
    402 	struct sockaddr_storage *sck)
    403 #else
    404 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
    405 	struct sockaddr_in *sck, const char* fromto)
    406 #endif /* INET6 */
    407 {
    408 	/* setup address structure */
    409 #ifdef INET6
    410 	memset(sck, 0, sizeof(struct sockaddr_storage));
    411 #else
    412 	memset(sck, 0, sizeof(struct sockaddr_in));
    413 #endif
    414 	if(acl->is_ipv6) {
    415 #ifdef INET6
    416 		struct sockaddr_in6* sa = (struct sockaddr_in6*)sck;
    417 		sa->sin6_family = AF_INET6;
    418 		sa->sin6_port = htons(port);
    419 		sa->sin6_addr = acl->addr.addr6;
    420 		return sizeof(struct sockaddr_in6);
    421 #else
    422 		log_msg(LOG_ERR, "xfrd: IPv6 connection %s %s attempted but no \
    423 INET6.", fromto, acl->ip_address_spec);
    424 		return 0;
    425 #endif
    426 	} else {
    427 		struct sockaddr_in* sa = (struct sockaddr_in*)sck;
    428 		sa->sin_family = AF_INET;
    429 		sa->sin_port = htons(port);
    430 		sa->sin_addr = acl->addr.addr;
    431 		return sizeof(struct sockaddr_in);
    432 	}
    433 }
    434 
    435 socklen_t
    436 #ifdef INET6
    437 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_storage *to)
    438 #else
    439 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_in *to)
    440 #endif /* INET6 */
    441 {
    442 #ifdef HAVE_TLS_1_3
    443 	unsigned int port = acl->port?acl->port:(acl->tls_auth_options?
    444 						(unsigned)atoi(TLS_PORT):(unsigned)atoi(TCP_PORT));
    445 #else
    446 	unsigned int port = acl->port?acl->port:(unsigned)atoi(TCP_PORT);
    447 #endif
    448 #ifdef INET6
    449 	return xfrd_acl_sockaddr(acl, port, to);
    450 #else
    451 	return xfrd_acl_sockaddr(acl, port, to, "to");
    452 #endif /* INET6 */
    453 }
    454 
    455 socklen_t
    456 #ifdef INET6
    457 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_storage *frm)
    458 #else
    459 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_in *frm)
    460 #endif /* INET6 */
    461 {
    462 	unsigned int port = acl->port?acl->port:0;
    463 #ifdef INET6
    464 	return xfrd_acl_sockaddr(acl, port, frm);
    465 #else
    466 	return xfrd_acl_sockaddr(acl, port, frm, "from");
    467 #endif /* INET6 */
    468 }
    469 
    470 void
    471 xfrd_write_soa_buffer(struct buffer* packet,
    472 	const dname_type* apex, struct xfrd_soa* soa)
    473 {
    474 	size_t rdlength_pos;
    475 	uint16_t rdlength;
    476 	buffer_write(packet, dname_name(apex), apex->name_size);
    477 
    478 	/* already in network order */
    479 	buffer_write(packet, &soa->type, sizeof(soa->type));
    480 	buffer_write(packet, &soa->klass, sizeof(soa->klass));
    481 	buffer_write(packet, &soa->ttl, sizeof(soa->ttl));
    482 	rdlength_pos = buffer_position(packet);
    483 	buffer_skip(packet, sizeof(rdlength));
    484 
    485 	/* uncompressed dnames */
    486 	buffer_write(packet, soa->prim_ns+1, soa->prim_ns[0]);
    487 	buffer_write(packet, soa->email+1, soa->email[0]);
    488 
    489 	buffer_write(packet, &soa->serial, sizeof(uint32_t));
    490 	buffer_write(packet, &soa->refresh, sizeof(uint32_t));
    491 	buffer_write(packet, &soa->retry, sizeof(uint32_t));
    492 	buffer_write(packet, &soa->expire, sizeof(uint32_t));
    493 	buffer_write(packet, &soa->minimum, sizeof(uint32_t));
    494 
    495 	/* write length of RR */
    496 	rdlength = buffer_position(packet) - rdlength_pos - sizeof(rdlength);
    497 	buffer_write_u16_at(packet, rdlength_pos, rdlength);
    498 }
    499 
    500 struct xfrd_tcp*
    501 xfrd_tcp_create(region_type* region, size_t bufsize)
    502 {
    503 	struct xfrd_tcp* tcp_state = (struct xfrd_tcp*)region_alloc(
    504 		region, sizeof(struct xfrd_tcp));
    505 	memset(tcp_state, 0, sizeof(struct xfrd_tcp));
    506 	tcp_state->packet = buffer_create(region, bufsize);
    507 	tcp_state->fd = -1;
    508 
    509 	return tcp_state;
    510 }
    511 
    512 static struct xfrd_tcp_pipeline*
    513 pipeline_find(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
    514 {
    515 	rbnode_type* sme = NULL;
    516 	struct xfrd_tcp_pipeline* r;
    517 	/* smaller buf than a full pipeline with 64kb ID array, only need
    518 	 * the front part with the key info, this front part contains the
    519 	 * members that the compare function uses. */
    520 	struct xfrd_tcp_pipeline_key k, *key=&k;
    521 	key->node.key = key;
    522 	key->ip_len = xfrd_acl_sockaddr_to(zone->master, &key->ip);
    523 	key->num_unused = set->tcp_pipeline;
    524 	/* lookup existing tcp transfer to the master with highest unused */
    525 	if(rbtree_find_less_equal(set->pipetree, key, &sme)) {
    526 		/* exact match, strange, fully unused tcp cannot be open */
    527 		assert(0);
    528 	}
    529 	if(!sme)
    530 		return NULL;
    531 	r = (struct xfrd_tcp_pipeline*)sme->key;
    532 	/* <= key pointed at, is the master correct ? */
    533 	if(r->key.ip_len != key->ip_len)
    534 		return NULL;
    535 	if(memcmp(&r->key.ip, &key->ip, key->ip_len) != 0)
    536 		return NULL;
    537 	/* correct master, is there a slot free for this transfer? */
    538 	if(r->key.num_unused == 0)
    539 		return NULL;
    540 	return r;
    541 }
    542 
    543 /* remove zone from tcp waiting list */
    544 static void
    545 tcp_zone_waiting_list_popfirst(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
    546 {
    547 	assert(zone->tcp_waiting);
    548 	set->tcp_waiting_first = zone->tcp_waiting_next;
    549 	if(zone->tcp_waiting_next)
    550 		zone->tcp_waiting_next->tcp_waiting_prev = NULL;
    551 	else	set->tcp_waiting_last = 0;
    552 	zone->tcp_waiting_next = 0;
    553 	zone->tcp_waiting = 0;
    554 }
    555 
    556 /* remove zone from tcp pipe write-wait list */
    557 static void
    558 tcp_pipe_sendlist_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
    559 {
    560 	if(zone->in_tcp_send) {
    561 		if(zone->tcp_send_prev)
    562 			zone->tcp_send_prev->tcp_send_next=zone->tcp_send_next;
    563 		else	tp->tcp_send_first=zone->tcp_send_next;
    564 		if(zone->tcp_send_next)
    565 			zone->tcp_send_next->tcp_send_prev=zone->tcp_send_prev;
    566 		else	tp->tcp_send_last=zone->tcp_send_prev;
    567 		zone->in_tcp_send = 0;
    568 	}
    569 }
    570 
    571 /* remove first from write-wait list */
    572 static void
    573 tcp_pipe_sendlist_popfirst(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
    574 {
    575 	tp->tcp_send_first = zone->tcp_send_next;
    576 	if(tp->tcp_send_first)
    577 		tp->tcp_send_first->tcp_send_prev = NULL;
    578 	else	tp->tcp_send_last = NULL;
    579 	zone->in_tcp_send = 0;
    580 }
    581 
    582 /* remove zone from tcp pipe ID map */
    583 static void
    584 tcp_pipe_id_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone,
    585 	int alsotree)
    586 {
    587 	assert(tp->key.num_unused < tp->pipe_num && tp->key.num_unused >= 0);
    588 	if(alsotree)
    589 		xfrd_tcp_pipeline_remove_id(tp, zone->query_id);
    590 	tp->unused[tp->key.num_unused] = zone->query_id;
    591 	/* must remove and re-add for sort order in tree */
    592 	(void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
    593 	tp->key.num_unused++;
    594 	(void)rbtree_insert(xfrd->tcp_set->pipetree, &tp->key.node);
    595 }
    596 
    597 /* stop the tcp pipe (and all its zones need to retry) */
    598 static void
    599 xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline* tp)
    600 {
    601 	struct xfrd_tcp_pipeline_id* zid;
    602 	int conn = -1;
    603 	assert(tp->key.num_unused < tp->pipe_num); /* at least one 'in-use' */
    604 	assert(tp->pipe_num - tp->key.num_unused > tp->key.num_skip); /* at least one 'nonskip' */
    605 	/* need to retry for all the zones connected to it */
    606 	/* these could use different lists and go to a different nextmaster*/
    607 	RBTREE_FOR(zid, struct xfrd_tcp_pipeline_id*, tp->zone_per_id) {
    608 		xfrd_zone_type* zone = zid->zone;
    609 		if(zone && zone != TCP_NULL_SKIP) {
    610 			assert(zone->query_id == zid->id);
    611 			conn = zone->tcp_conn;
    612 			zone->tcp_conn = -1;
    613 			zone->tcp_waiting = 0;
    614 			tcp_pipe_sendlist_remove(tp, zone);
    615 			tcp_pipe_id_remove(tp, zone, 0);
    616 			xfrd_set_refresh_now(zone);
    617 		}
    618 	}
    619 	xfrd_tcp_pipeline_cleanup(tp);
    620 	assert(conn != -1);
    621 	/* now release the entire tcp pipe */
    622 	xfrd_tcp_pipe_release(xfrd->tcp_set, tp, conn);
    623 }
    624 
    625 static void
    626 tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline* tp)
    627 {
    628 	int fd = tp->handler.ev_fd;
    629 	struct timeval tv;
    630 	tv.tv_sec = xfrd->tcp_set->tcp_timeout;
    631 	tv.tv_usec = 0;
    632 	if(tp->handler_added)
    633 		event_del(&tp->handler);
    634 	memset(&tp->handler, 0, sizeof(tp->handler));
    635 	event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
    636 #ifdef HAVE_TLS_1_3
    637 		( tp->ssl
    638 		? ( tp->handshake_done ?  ( tp->tcp_send_first ? EV_WRITE : 0 )
    639 		  : tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0 )
    640 		: tp->tcp_send_first ? EV_WRITE : 0 ),
    641 #else
    642 		( tp->tcp_send_first ? EV_WRITE : 0 ),
    643 #endif
    644 		xfrd_handle_tcp_pipe, tp);
    645 	if(event_base_set(xfrd->event_base, &tp->handler) != 0)
    646 		log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
    647 	if(event_add(&tp->handler, &tv) != 0)
    648 		log_msg(LOG_ERR, "xfrd tcp: event_add failed");
    649 	tp->handler_added = 1;
    650 }
    651 
    652 /* handle event from fd of tcp pipe */
    653 void
    654 xfrd_handle_tcp_pipe(int ATTR_UNUSED(fd), short event, void* arg)
    655 {
    656 	struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)arg;
    657 	if((event & EV_WRITE)) {
    658 		tcp_pipe_reset_timeout(tp);
    659 		if(tp->tcp_send_first) {
    660 			DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp write, zone %s",
    661 				tp->tcp_send_first->apex_str));
    662 			xfrd_tcp_write(tp, tp->tcp_send_first);
    663 		}
    664 	}
    665 	if((event & EV_READ) && tp->handler_added) {
    666 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp read"));
    667 		tcp_pipe_reset_timeout(tp);
    668 		xfrd_tcp_read(tp);
    669 	}
    670 	if((event & EV_TIMEOUT) && tp->handler_added) {
    671 		/* tcp connection timed out */
    672 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp timeout"));
    673 		xfrd_tcp_pipe_stop(tp);
    674 	}
    675 }
    676 
    677 /* add a zone to the pipeline, it starts to want to write its query */
    678 static void
    679 pipeline_setup_new_zone(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
    680 	xfrd_zone_type* zone)
    681 {
    682 	/* assign the ID */
    683 	int idx;
    684 	assert(tp->key.num_unused > 0);
    685 	/* we pick a random ID, even though it is TCP anyway */
    686 	idx = random_generate(tp->key.num_unused);
    687 	zone->query_id = tp->unused[idx];
    688 	tp->unused[idx] = tp->unused[tp->key.num_unused-1];
    689 	xfrd_tcp_pipeline_insert_id(tp, zone->query_id, zone);
    690 	/* decrement unused counter, and fixup tree */
    691 	(void)rbtree_delete(set->pipetree, &tp->key.node);
    692 	tp->key.num_unused--;
    693 	(void)rbtree_insert(set->pipetree, &tp->key.node);
    694 
    695 	/* add to sendlist, at end */
    696 	zone->tcp_send_next = NULL;
    697 	zone->tcp_send_prev = tp->tcp_send_last;
    698 	zone->in_tcp_send = 1;
    699 	if(tp->tcp_send_last)
    700 		tp->tcp_send_last->tcp_send_next = zone;
    701 	else	tp->tcp_send_first = zone;
    702 	tp->tcp_send_last = zone;
    703 
    704 	/* is it first in line? */
    705 	if(tp->tcp_send_first == zone) {
    706 		xfrd_tcp_setup_write_packet(tp, zone);
    707 		/* add write to event handler */
    708 		tcp_pipe_reset_timeout(tp);
    709 	}
    710 }
    711 
    712 void
    713 xfrd_tcp_obtain(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
    714 {
    715 	struct xfrd_tcp_pipeline* tp;
    716 	assert(zone->tcp_conn == -1);
    717 	assert(zone->tcp_waiting == 0);
    718 
    719 	if(set->tcp_count < set->tcp_max) {
    720 		int i;
    721 		assert(!set->tcp_waiting_first);
    722 		set->tcp_count ++;
    723 		/* find a free tcp_buffer */
    724 		for(i=0; i<set->tcp_max; i++) {
    725 			if(set->tcp_state[i]->tcp_r->fd == -1) {
    726 				zone->tcp_conn = i;
    727 				break;
    728 			}
    729 		}
    730 		/** What if there is no free tcp_buffer? return; */
    731 		if (zone->tcp_conn < 0) {
    732 			return;
    733 		}
    734 
    735 		tp = set->tcp_state[zone->tcp_conn];
    736 		zone->tcp_waiting = 0;
    737 
    738 		/* stop udp use (if any) */
    739 		if(zone->zone_handler.ev_fd != -1)
    740 			xfrd_udp_release(zone);
    741 
    742 		if(!xfrd_tcp_open(set, tp, zone)) {
    743 			zone->tcp_conn = -1;
    744 			set->tcp_count --;
    745 			xfrd_set_refresh_now(zone);
    746 			return;
    747 		}
    748 		/* ip and ip_len set by tcp_open */
    749 		xfrd_tcp_pipeline_init(tp);
    750 
    751 		/* insert into tree */
    752 		(void)rbtree_insert(set->pipetree, &tp->key.node);
    753 		xfrd_deactivate_zone(zone);
    754 		xfrd_unset_timer(zone);
    755 		pipeline_setup_new_zone(set, tp, zone);
    756 		return;
    757 	}
    758 	/* check for a pipeline to the same master with unused ID */
    759 	if((tp = pipeline_find(set, zone))!= NULL) {
    760 		int i;
    761 		if(zone->zone_handler.ev_fd != -1)
    762 			xfrd_udp_release(zone);
    763 		for(i=0; i<set->tcp_max; i++) {
    764 			if(set->tcp_state[i] == tp)
    765 				zone->tcp_conn = i;
    766 		}
    767 		xfrd_deactivate_zone(zone);
    768 		xfrd_unset_timer(zone);
    769 		pipeline_setup_new_zone(set, tp, zone);
    770 		return;
    771 	}
    772 
    773 	/* wait, at end of line */
    774 	DEBUG(DEBUG_XFRD,2, (LOG_INFO, "xfrd: max number of tcp "
    775 		"connections (%d) reached.", set->tcp_max));
    776 	zone->tcp_waiting_next = 0;
    777 	zone->tcp_waiting_prev = set->tcp_waiting_last;
    778 	zone->tcp_waiting = 1;
    779 	if(!set->tcp_waiting_last) {
    780 		set->tcp_waiting_first = zone;
    781 		set->tcp_waiting_last = zone;
    782 	} else {
    783 		set->tcp_waiting_last->tcp_waiting_next = zone;
    784 		set->tcp_waiting_last = zone;
    785 	}
    786 	xfrd_deactivate_zone(zone);
    787 	xfrd_unset_timer(zone);
    788 }
    789 
    790 int
    791 xfrd_tcp_open(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
    792 	xfrd_zone_type* zone)
    793 {
    794 	int fd, family, conn;
    795 	struct timeval tv;
    796 	assert(zone->tcp_conn != -1);
    797 
    798 	/* if there is no next master, fallback to use the first one */
    799 	/* but there really should be a master set */
    800 	if(!zone->master) {
    801 		zone->master = zone->zone_options->pattern->request_xfr;
    802 		zone->master_num = 0;
    803 	}
    804 
    805 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s open tcp conn to %s",
    806 		zone->apex_str, zone->master->ip_address_spec));
    807 	tp->tcp_r->is_reading = 1;
    808 	tp->tcp_r->total_bytes = 0;
    809 	tp->tcp_r->msglen = 0;
    810 	buffer_clear(tp->tcp_r->packet);
    811 	tp->tcp_w->is_reading = 0;
    812 	tp->tcp_w->total_bytes = 0;
    813 	tp->tcp_w->msglen = 0;
    814 	tp->connection_established = 0;
    815 
    816 	if(zone->master->is_ipv6) {
    817 #ifdef INET6
    818 		family = PF_INET6;
    819 #else
    820 		xfrd_set_refresh_now(zone);
    821 		return 0;
    822 #endif
    823 	} else {
    824 		family = PF_INET;
    825 	}
    826 	fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
    827 	if(fd == -1) {
    828 		/* squelch 'Address family not supported by protocol' at low
    829 		 * verbosity levels */
    830 		if(errno != EAFNOSUPPORT || verbosity > 2)
    831 		    log_msg(LOG_ERR, "xfrd: %s cannot create tcp socket: %s",
    832 			zone->master->ip_address_spec, strerror(errno));
    833 		xfrd_set_refresh_now(zone);
    834 		return 0;
    835 	}
    836 	if(fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
    837 		log_msg(LOG_ERR, "xfrd: fcntl failed: %s", strerror(errno));
    838 		close(fd);
    839 		xfrd_set_refresh_now(zone);
    840 		return 0;
    841 	}
    842 
    843 	if(xfrd->nsd->outgoing_tcp_mss > 0) {
    844 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
    845 		if(setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
    846 			(void*)&xfrd->nsd->outgoing_tcp_mss,
    847 			sizeof(xfrd->nsd->outgoing_tcp_mss)) < 0) {
    848 			log_msg(LOG_ERR, "xfrd: setsockopt(TCP_MAXSEG)"
    849 					"failed: %s", strerror(errno));
    850 		}
    851 #else
    852 		log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported");
    853 #endif
    854 	}
    855 
    856 	tp->key.ip_len = xfrd_acl_sockaddr_to(zone->master, &tp->key.ip);
    857 
    858 	/* bind it */
    859 	if (!xfrd_bind_local_interface(fd, zone->zone_options->pattern->
    860 		outgoing_interface, zone->master, 1)) {
    861 		close(fd);
    862 		xfrd_set_refresh_now(zone);
    863 		return 0;
    864         }
    865 
    866 	conn = connect(fd, (struct sockaddr*)&tp->key.ip, tp->key.ip_len);
    867 	if (conn == -1 && errno != EINPROGRESS) {
    868 		log_msg(LOG_ERR, "xfrd: connect %s failed: %s",
    869 			zone->master->ip_address_spec, strerror(errno));
    870 		close(fd);
    871 		xfrd_set_refresh_now(zone);
    872 		return 0;
    873 	}
    874 	tp->tcp_r->fd = fd;
    875 	tp->tcp_w->fd = fd;
    876 
    877 	/* Check if an tls_auth name is configured which means we should try to
    878 	   establish an SSL connection */
    879 	if (zone->master->tls_auth_options &&
    880 		zone->master->tls_auth_options->auth_domain_name) {
    881 #ifdef HAVE_TLS_1_3
    882 		if (!setup_ssl(tp, set, zone->master->tls_auth_options->auth_domain_name)) {
    883 			log_msg(LOG_ERR, "xfrd: Cannot setup TLS on pipeline for %s to %s",
    884 					zone->apex_str, zone->master->ip_address_spec);
    885 			close(fd);
    886 			xfrd_set_refresh_now(zone);
    887 			return 0;
    888 		}
    889 
    890 		/* Load client certificate (if provided) */
    891 		if (zone->master->tls_auth_options->client_cert &&
    892 		    zone->master->tls_auth_options->client_key) {
    893 			if (SSL_CTX_use_certificate_chain_file(set->ssl_ctx,
    894 			                                       zone->master->tls_auth_options->client_cert) != 1) {
    895 				log_msg(LOG_ERR, "xfrd tls: Unable to load client certificate from file %s", zone->master->tls_auth_options->client_cert);
    896 			}
    897 
    898 			if (zone->master->tls_auth_options->client_key_pw) {
    899 				SSL_CTX_set_default_passwd_cb(set->ssl_ctx, password_cb);
    900 				SSL_CTX_set_default_passwd_cb_userdata(set->ssl_ctx, zone->master->tls_auth_options->client_key_pw);
    901 			}
    902 
    903 			if (SSL_CTX_use_PrivateKey_file(set->ssl_ctx, zone->master->tls_auth_options->client_key, SSL_FILETYPE_PEM) != 1) {
    904 				log_msg(LOG_ERR, "xfrd tls: Unable to load private key from file %s", zone->master->tls_auth_options->client_key);
    905 			}
    906 		}
    907 
    908 		tp->handshake_done = 0;
    909 		if(!ssl_handshake(tp)) {
    910 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
    911 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
    912 					"for %s to %s: %s", zone->apex_str,
    913 					zone->master->ip_address_spec,
    914 					strerror(errno));
    915 
    916 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
    917 				char errmsg[1024];
    918 				snprintf(errmsg, sizeof(errmsg), "xfrd: "
    919 					"TLS handshake failed for %s to %s",
    920 					zone->apex_str,
    921 					zone->master->ip_address_spec);
    922 				log_crypto_err(errmsg);
    923 			} else {
    924 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
    925 					"for %s to %s with %d", zone->apex_str,
    926 					zone->master->ip_address_spec,
    927 					tp->handshake_want);
    928 			}
    929 			close(fd);
    930 			xfrd_set_refresh_now(zone);
    931 			return 0;
    932 		}
    933 #else
    934 		log_msg(LOG_ERR, "xfrd: TLS 1.3 is not available, XFR-over-TLS is "
    935 						 "not supported for %s to %s",
    936 						  zone->apex_str, zone->master->ip_address_spec);
    937 		close(fd);
    938 		xfrd_set_refresh_now(zone);
    939 		return 0;
    940 #endif
    941 	}
    942 
    943 	/* set the tcp pipe event */
    944 	if(tp->handler_added)
    945 		event_del(&tp->handler);
    946 	memset(&tp->handler, 0, sizeof(tp->handler));
    947 	event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
    948 #ifdef HAVE_TLS_1_3
    949 		( !tp->ssl
    950 		|| tp->handshake_done
    951 		|| tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0),
    952 #else
    953 		EV_WRITE,
    954 #endif
    955 	        xfrd_handle_tcp_pipe, tp);
    956 	if(event_base_set(xfrd->event_base, &tp->handler) != 0)
    957 		log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
    958 	tv.tv_sec = set->tcp_timeout;
    959 	tv.tv_usec = 0;
    960 	if(event_add(&tp->handler, &tv) != 0)
    961 		log_msg(LOG_ERR, "xfrd tcp: event_add failed");
    962 	tp->handler_added = 1;
    963 	return 1;
    964 }
    965 
    966 void
    967 xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
    968 {
    969 	struct xfrd_tcp* tcp = tp->tcp_w;
    970 	assert(zone->tcp_conn != -1);
    971 	assert(zone->tcp_waiting == 0);
    972 	/* start AXFR or IXFR for the zone */
    973 	if(zone->soa_disk_acquired == 0 || zone->master->use_axfr_only ||
    974 		zone->master->ixfr_disabled ||
    975 		/* if zone expired, after the first round, do not ask for
    976 		 * IXFR any more, but full AXFR (of any serial number) */
    977 		(zone->state == xfrd_zone_expired && zone->round_num != 0)) {
    978 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request full zone transfer "
    979 						"(AXFR) for %s to %s",
    980 			zone->apex_str, zone->master->ip_address_spec));
    981 
    982 		xfrd_setup_packet(tcp->packet, TYPE_AXFR, CLASS_IN, zone->apex,
    983 			zone->query_id);
    984 		xfrd_prepare_zone_xfr(zone, TYPE_AXFR);
    985 	} else {
    986 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request incremental zone "
    987 						"transfer (IXFR) for %s to %s",
    988 			zone->apex_str, zone->master->ip_address_spec));
    989 
    990 		xfrd_setup_packet(tcp->packet, TYPE_IXFR, CLASS_IN, zone->apex,
    991 			zone->query_id);
    992 		xfrd_prepare_zone_xfr(zone, TYPE_IXFR);
    993 		NSCOUNT_SET(tcp->packet, 1);
    994 		xfrd_write_soa_buffer(tcp->packet, zone->apex, &zone->soa_disk);
    995 	}
    996 	if(zone->master->key_options && zone->master->key_options->tsig_key) {
    997 		xfrd_tsig_sign_request(
    998 			tcp->packet, &zone->latest_xfr->tsig, zone->master);
    999 	}
   1000 	buffer_flip(tcp->packet);
   1001 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "sent tcp query with ID %d", zone->query_id));
   1002 	tcp->msglen = buffer_limit(tcp->packet);
   1003 	tcp->total_bytes = 0;
   1004 }
   1005 
   1006 static void
   1007 tcp_conn_ready_for_reading(struct xfrd_tcp* tcp)
   1008 {
   1009 	tcp->total_bytes = 0;
   1010 	tcp->msglen = 0;
   1011 	buffer_clear(tcp->packet);
   1012 }
   1013 
   1014 #ifdef HAVE_TLS_1_3
   1015 static int
   1016 conn_write_ssl(struct xfrd_tcp* tcp, SSL* ssl)
   1017 {
   1018 	int request_length;
   1019 	ssize_t sent;
   1020 
   1021 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1022 		uint16_t sendlen = htons(tcp->msglen);
   1023 		// send
   1024 		request_length = sizeof(tcp->msglen) - tcp->total_bytes;
   1025 		ERR_clear_error();
   1026 		sent = SSL_write(ssl, (const char*)&sendlen + tcp->total_bytes,
   1027 						 request_length);
   1028 		switch(SSL_get_error(ssl,sent)) {
   1029 			case SSL_ERROR_NONE:
   1030 				break;
   1031 			default:
   1032 				log_msg(LOG_ERR, "xfrd: generic write problem with tls");
   1033 		}
   1034 
   1035 		if(sent == -1) {
   1036 			if(errno == EAGAIN || errno == EINTR) {
   1037 				/* write would block, try later */
   1038 				return 0;
   1039 			} else {
   1040 				return -1;
   1041 			}
   1042 		}
   1043 
   1044 		tcp->total_bytes += sent;
   1045 		if(sent > (ssize_t)sizeof(tcp->msglen))
   1046 			buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
   1047 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1048 			/* incomplete write, resume later */
   1049 			return 0;
   1050 		}
   1051 		assert(tcp->total_bytes >= sizeof(tcp->msglen));
   1052 	}
   1053 
   1054 	assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
   1055 
   1056 	request_length = buffer_remaining(tcp->packet);
   1057 	ERR_clear_error();
   1058 	sent = SSL_write(ssl, buffer_current(tcp->packet), request_length);
   1059 	switch(SSL_get_error(ssl,sent)) {
   1060 		case SSL_ERROR_NONE:
   1061 			break;
   1062 		default:
   1063 			log_msg(LOG_ERR, "xfrd: generic write problem with tls");
   1064 	}
   1065 	if(sent == -1) {
   1066 		if(errno == EAGAIN || errno == EINTR) {
   1067 			/* write would block, try later */
   1068 			return 0;
   1069 		} else {
   1070 			return -1;
   1071 		}
   1072 	}
   1073 
   1074 	buffer_skip(tcp->packet, sent);
   1075 	tcp->total_bytes += sent;
   1076 
   1077 	if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
   1078 		/* more to write when socket becomes writable again */
   1079 		return 0;
   1080 	}
   1081 
   1082 	assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
   1083 	return 1;
   1084 }
   1085 #endif
   1086 
   1087 int conn_write(struct xfrd_tcp* tcp)
   1088 {
   1089 	ssize_t sent;
   1090 
   1091 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1092 		uint16_t sendlen = htons(tcp->msglen);
   1093 #ifdef HAVE_WRITEV
   1094 		struct iovec iov[2];
   1095 		iov[0].iov_base = (uint8_t*)&sendlen + tcp->total_bytes;
   1096 		iov[0].iov_len = sizeof(sendlen) - tcp->total_bytes;
   1097 		iov[1].iov_base = buffer_begin(tcp->packet);
   1098 		iov[1].iov_len = buffer_limit(tcp->packet);
   1099 		sent = writev(tcp->fd, iov, 2);
   1100 #else /* HAVE_WRITEV */
   1101 		sent = write(tcp->fd,
   1102 			(const char*)&sendlen + tcp->total_bytes,
   1103 			sizeof(tcp->msglen) - tcp->total_bytes);
   1104 #endif /* HAVE_WRITEV */
   1105 
   1106 		if(sent == -1) {
   1107 			if(errno == EAGAIN || errno == EINTR) {
   1108 				/* write would block, try later */
   1109 				return 0;
   1110 			} else {
   1111 				return -1;
   1112 			}
   1113 		}
   1114 
   1115 		tcp->total_bytes += sent;
   1116 		if(sent > (ssize_t)sizeof(tcp->msglen))
   1117 			buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
   1118 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1119 			/* incomplete write, resume later */
   1120 			return 0;
   1121 		}
   1122 #ifdef HAVE_WRITEV
   1123 		if(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen)) {
   1124 			/* packet done */
   1125 			return 1;
   1126 		}
   1127 #endif
   1128 		assert(tcp->total_bytes >= sizeof(tcp->msglen));
   1129 	}
   1130 
   1131 	assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
   1132 
   1133 	sent = write(tcp->fd,
   1134 		buffer_current(tcp->packet),
   1135 		buffer_remaining(tcp->packet));
   1136 	if(sent == -1) {
   1137 		if(errno == EAGAIN || errno == EINTR) {
   1138 			/* write would block, try later */
   1139 			return 0;
   1140 		} else {
   1141 			return -1;
   1142 		}
   1143 	}
   1144 
   1145 	buffer_skip(tcp->packet, sent);
   1146 	tcp->total_bytes += sent;
   1147 
   1148 	if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
   1149 		/* more to write when socket becomes writable again */
   1150 		return 0;
   1151 	}
   1152 
   1153 	assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
   1154 	return 1;
   1155 }
   1156 
   1157 void
   1158 xfrd_tcp_write(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
   1159 {
   1160 	int ret;
   1161 	struct xfrd_tcp* tcp = tp->tcp_w;
   1162 	assert(zone->tcp_conn != -1);
   1163 	assert(zone == tp->tcp_send_first);
   1164 	/* see if for non-established connection, there is a connect error */
   1165 	if(!tp->connection_established) {
   1166 		/* check for pending error from nonblocking connect */
   1167 		/* from Stevens, unix network programming, vol1, 3rd ed, p450 */
   1168 		int error = 0;
   1169 		socklen_t len = sizeof(error);
   1170 		if(getsockopt(tcp->fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0){
   1171 			error = errno; /* on solaris errno is error */
   1172 		}
   1173 		if(error == EINPROGRESS || error == EWOULDBLOCK)
   1174 			return; /* try again later */
   1175 		if(error != 0) {
   1176 			log_msg(LOG_ERR, "%s: Could not tcp connect to %s: %s",
   1177 				zone->apex_str, zone->master->ip_address_spec,
   1178 				strerror(error));
   1179 			xfrd_tcp_pipe_stop(tp);
   1180 			return;
   1181 		}
   1182 	}
   1183 #ifdef HAVE_TLS_1_3
   1184 	if (tp->ssl) {
   1185 		if(tp->handshake_done) {
   1186 			ret = conn_write_ssl(tcp, tp->ssl);
   1187 
   1188 		} else if(ssl_handshake(tp)) {
   1189 			tcp_pipe_reset_timeout(tp); /* reschedule */
   1190 			return;
   1191 
   1192 		} else {
   1193 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
   1194 				log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
   1195 					strerror(errno));
   1196 
   1197 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
   1198 				log_crypto_err("xfrd: TLS handshake failed");
   1199 			} else {
   1200 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
   1201 					"with value: %d", tp->handshake_want);
   1202 			}
   1203 			xfrd_tcp_pipe_stop(tp);
   1204 			return;
   1205 		}
   1206 	} else
   1207 #endif
   1208 		ret = conn_write(tcp);
   1209 	if(ret == -1) {
   1210 		log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
   1211 		xfrd_tcp_pipe_stop(tp);
   1212 		return;
   1213 	}
   1214 	if(tcp->total_bytes != 0 && !tp->connection_established)
   1215 		tp->connection_established = 1;
   1216 	if(ret == 0) {
   1217 		return; /* write again later */
   1218 	}
   1219 	/* done writing this message */
   1220 
   1221 	/* remove first zone from sendlist */
   1222 	tcp_pipe_sendlist_popfirst(tp, zone);
   1223 
   1224 	/* see if other zone wants to write; init; let it write (now) */
   1225 	/* and use a loop, because 64k stack calls is a too much */
   1226 	while(tp->tcp_send_first) {
   1227 		/* setup to write for this zone */
   1228 		xfrd_tcp_setup_write_packet(tp, tp->tcp_send_first);
   1229 		/* attempt to write for this zone (if success, continue loop)*/
   1230 #ifdef HAVE_TLS_1_3
   1231 		if (tp->ssl)
   1232 			ret = conn_write_ssl(tcp, tp->ssl);
   1233 		else
   1234 #endif
   1235 			ret = conn_write(tcp);
   1236 		if(ret == -1) {
   1237 			log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
   1238 			xfrd_tcp_pipe_stop(tp);
   1239 			return;
   1240 		}
   1241 		if(ret == 0)
   1242 			return; /* write again later */
   1243 		tcp_pipe_sendlist_popfirst(tp, tp->tcp_send_first);
   1244 	}
   1245 
   1246 	/* if sendlist empty, remove WRITE from event */
   1247 
   1248 	/* listen to READ, and not WRITE events */
   1249 	assert(tp->tcp_send_first == NULL);
   1250 	tcp_pipe_reset_timeout(tp);
   1251 }
   1252 
   1253 #ifdef HAVE_TLS_1_3
   1254 static int
   1255 conn_read_ssl(struct xfrd_tcp* tcp, SSL* ssl)
   1256 {
   1257 	ssize_t received;
   1258 	/* receive leading packet length bytes */
   1259 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1260 		ERR_clear_error();
   1261 		received = SSL_read(ssl,
   1262 						(char*) &tcp->msglen + tcp->total_bytes,
   1263 						sizeof(tcp->msglen) - tcp->total_bytes);
   1264 		if (received <= 0) {
   1265 			int err = SSL_get_error(ssl, received);
   1266 			if(err == SSL_ERROR_WANT_READ && errno == EAGAIN) {
   1267 				return 0;
   1268 			}
   1269 			if(err == SSL_ERROR_ZERO_RETURN) {
   1270 				/* EOF */
   1271 				return -1;
   1272 			}
   1273 			if(err == SSL_ERROR_SYSCALL)
   1274 				log_msg(LOG_ERR, "ssl_read returned error SSL_ERROR_SYSCALL with received %zd: %s", received, strerror(errno));
   1275 			else
   1276 				log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
   1277 		}
   1278 		if(received == -1) {
   1279 			if(errno == EAGAIN || errno == EINTR) {
   1280 				/* read would block, try later */
   1281 				return 0;
   1282 			} else {
   1283 #ifdef ECONNRESET
   1284 				if (verbosity >= 2 || errno != ECONNRESET)
   1285 #endif /* ECONNRESET */
   1286 					log_msg(LOG_ERR, "tls read sz: %s", strerror(errno));
   1287 				return -1;
   1288 			}
   1289 		} else if(received == 0) {
   1290 			/* EOF */
   1291 			return -1;
   1292 		}
   1293 		tcp->total_bytes += received;
   1294 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1295 			/* not complete yet, try later */
   1296 			return 0;
   1297 		}
   1298 
   1299 		assert(tcp->total_bytes == sizeof(tcp->msglen));
   1300 		tcp->msglen = ntohs(tcp->msglen);
   1301 
   1302 		if(tcp->msglen == 0) {
   1303 			buffer_set_limit(tcp->packet, tcp->msglen);
   1304 			return 1;
   1305 		}
   1306 		if(tcp->msglen > buffer_capacity(tcp->packet)) {
   1307 			log_msg(LOG_ERR, "buffer too small, dropping connection");
   1308 			return 0;
   1309 		}
   1310 		buffer_set_limit(tcp->packet, tcp->msglen);
   1311 	}
   1312 
   1313 	assert(buffer_remaining(tcp->packet) > 0);
   1314 	ERR_clear_error();
   1315 
   1316 	received = SSL_read(ssl, buffer_current(tcp->packet),
   1317 					buffer_remaining(tcp->packet));
   1318 
   1319 	if (received <= 0) {
   1320 		int err = SSL_get_error(ssl, received);
   1321 		if(err == SSL_ERROR_ZERO_RETURN) {
   1322 			/* EOF */
   1323 			return -1;
   1324 		}
   1325 		if(err == SSL_ERROR_SYSCALL)
   1326 			log_msg(LOG_ERR, "ssl_read returned error SSL_ERROR_SYSCALL with received %zd: %s", received, strerror(errno));
   1327 		else
   1328 			log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
   1329 	}
   1330 	if(received == -1) {
   1331 		if(errno == EAGAIN || errno == EINTR) {
   1332 			/* read would block, try later */
   1333 			return 0;
   1334 		} else {
   1335 #ifdef ECONNRESET
   1336 			if (verbosity >= 2 || errno != ECONNRESET)
   1337 #endif /* ECONNRESET */
   1338 				log_msg(LOG_ERR, "tcp read %s", strerror(errno));
   1339 			return -1;
   1340 		}
   1341 	} else if(received == 0) {
   1342 		/* EOF */
   1343 		return -1;
   1344 	}
   1345 
   1346 	tcp->total_bytes += received;
   1347 	buffer_skip(tcp->packet, received);
   1348 
   1349 	if(buffer_remaining(tcp->packet) > 0) {
   1350 		/* not complete yet, wait for more */
   1351 		return 0;
   1352 	}
   1353 
   1354 	/* completed */
   1355 	assert(buffer_position(tcp->packet) == tcp->msglen);
   1356 	return 1;
   1357 }
   1358 #endif
   1359 
   1360 int
   1361 conn_read(struct xfrd_tcp* tcp)
   1362 {
   1363 	ssize_t received;
   1364 	/* receive leading packet length bytes */
   1365 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1366 		received = read(tcp->fd,
   1367 			(char*) &tcp->msglen + tcp->total_bytes,
   1368 			sizeof(tcp->msglen) - tcp->total_bytes);
   1369 		if(received == -1) {
   1370 			if(errno == EAGAIN || errno == EINTR) {
   1371 				/* read would block, try later */
   1372 				return 0;
   1373 			} else {
   1374 #ifdef ECONNRESET
   1375 				if (verbosity >= 2 || errno != ECONNRESET)
   1376 #endif /* ECONNRESET */
   1377 				log_msg(LOG_ERR, "tcp read sz: %s", strerror(errno));
   1378 				return -1;
   1379 			}
   1380 		} else if(received == 0) {
   1381 			/* EOF */
   1382 			return -1;
   1383 		}
   1384 		tcp->total_bytes += received;
   1385 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1386 			/* not complete yet, try later */
   1387 			return 0;
   1388 		}
   1389 
   1390 		assert(tcp->total_bytes == sizeof(tcp->msglen));
   1391 		tcp->msglen = ntohs(tcp->msglen);
   1392 
   1393 		if(tcp->msglen == 0) {
   1394 			buffer_set_limit(tcp->packet, tcp->msglen);
   1395 			return 1;
   1396 		}
   1397 		if(tcp->msglen > buffer_capacity(tcp->packet)) {
   1398 			log_msg(LOG_ERR, "buffer too small, dropping connection");
   1399 			return 0;
   1400 		}
   1401 		buffer_set_limit(tcp->packet, tcp->msglen);
   1402 	}
   1403 
   1404 	assert(buffer_remaining(tcp->packet) > 0);
   1405 
   1406 	received = read(tcp->fd, buffer_current(tcp->packet),
   1407 		buffer_remaining(tcp->packet));
   1408 	if(received == -1) {
   1409 		if(errno == EAGAIN || errno == EINTR) {
   1410 			/* read would block, try later */
   1411 			return 0;
   1412 		} else {
   1413 #ifdef ECONNRESET
   1414 			if (verbosity >= 2 || errno != ECONNRESET)
   1415 #endif /* ECONNRESET */
   1416 			log_msg(LOG_ERR, "tcp read %s", strerror(errno));
   1417 			return -1;
   1418 		}
   1419 	} else if(received == 0) {
   1420 		/* EOF */
   1421 		return -1;
   1422 	}
   1423 
   1424 	tcp->total_bytes += received;
   1425 	buffer_skip(tcp->packet, received);
   1426 
   1427 	if(buffer_remaining(tcp->packet) > 0) {
   1428 		/* not complete yet, wait for more */
   1429 		return 0;
   1430 	}
   1431 
   1432 	/* completed */
   1433 	assert(buffer_position(tcp->packet) == tcp->msglen);
   1434 	return 1;
   1435 }
   1436 
   1437 void
   1438 xfrd_tcp_read(struct xfrd_tcp_pipeline* tp)
   1439 {
   1440 	xfrd_zone_type* zone;
   1441 	struct xfrd_tcp* tcp = tp->tcp_r;
   1442 	int ret;
   1443 	enum xfrd_packet_result pkt_result;
   1444 #ifdef HAVE_TLS_1_3
   1445 	if(tp->ssl) {
   1446 		if(tp->handshake_done) {
   1447 			ret = conn_read_ssl(tcp, tp->ssl);
   1448 
   1449 		} else if(ssl_handshake(tp)) {
   1450 			tcp_pipe_reset_timeout(tp); /* reschedule */
   1451 			return;
   1452 
   1453 		} else {
   1454 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
   1455 				log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
   1456 					strerror(errno));
   1457 
   1458 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
   1459 				log_crypto_err("xfrd: TLS handshake failed");
   1460 			} else {
   1461 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
   1462 					"with value: %d", tp->handshake_want);
   1463 			}
   1464 			xfrd_tcp_pipe_stop(tp);
   1465 			return;
   1466 		}
   1467 	} else
   1468 #endif
   1469 		ret = conn_read(tcp);
   1470 	if(ret == -1) {
   1471 		if(errno != 0)
   1472 			log_msg(LOG_ERR, "xfrd: failed reading tcp %s", strerror(errno));
   1473 		else
   1474 			log_msg(LOG_ERR, "xfrd: failed reading tcp: closed");
   1475 		xfrd_tcp_pipe_stop(tp);
   1476 		return;
   1477 	}
   1478 	if(ret == 0)
   1479 		return;
   1480 	/* completed msg */
   1481 	buffer_flip(tcp->packet);
   1482 	/* see which ID number it is, if skip, handle skip, NULL: warn */
   1483 	if(tcp->msglen < QHEADERSZ) {
   1484 		/* too short for DNS header, skip it */
   1485 		DEBUG(DEBUG_XFRD,1, (LOG_INFO,
   1486 			"xfrd: tcp skip response that is too short"));
   1487 		tcp_conn_ready_for_reading(tcp);
   1488 		return;
   1489 	}
   1490 	zone = xfrd_tcp_pipeline_lookup_id(tp, ID(tcp->packet));
   1491 	if(!zone || zone == TCP_NULL_SKIP) {
   1492 		/* no zone for this id? skip it */
   1493 		DEBUG(DEBUG_XFRD,1, (LOG_INFO,
   1494 			"xfrd: tcp skip response with %s ID",
   1495 			zone?"set-to-skip":"unknown"));
   1496 		tcp_conn_ready_for_reading(tcp);
   1497 		return;
   1498 	}
   1499 	assert(zone->tcp_conn != -1);
   1500 
   1501 	/* handle message for zone */
   1502 	pkt_result = xfrd_handle_received_xfr_packet(zone, tcp->packet);
   1503 	/* setup for reading the next packet on this connection */
   1504 	tcp_conn_ready_for_reading(tcp);
   1505 	switch(pkt_result) {
   1506 		case xfrd_packet_more:
   1507 			/* wait for next packet */
   1508 			break;
   1509 		case xfrd_packet_newlease:
   1510 			/* set to skip if more packets with this ID */
   1511 			xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
   1512 			tp->key.num_skip++;
   1513 			/* fall through to remove zone from tp */
   1514 			/* fallthrough */
   1515 		case xfrd_packet_transfer:
   1516 			if(zone->zone_options->pattern->multi_master_check) {
   1517 				xfrd_tcp_release(xfrd->tcp_set, zone);
   1518 				xfrd_make_request(zone);
   1519 				break;
   1520 			}
   1521 			xfrd_tcp_release(xfrd->tcp_set, zone);
   1522 			assert(zone->round_num == -1);
   1523 			break;
   1524 		case xfrd_packet_notimpl:
   1525 			xfrd_disable_ixfr(zone);
   1526 			xfrd_tcp_release(xfrd->tcp_set, zone);
   1527 			/* query next server */
   1528 			xfrd_make_request(zone);
   1529 			break;
   1530 		case xfrd_packet_bad:
   1531 		case xfrd_packet_tcp:
   1532 		default:
   1533 			/* set to skip if more packets with this ID */
   1534 			xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
   1535 			tp->key.num_skip++;
   1536 			xfrd_tcp_release(xfrd->tcp_set, zone);
   1537 			/* query next server */
   1538 			xfrd_make_request(zone);
   1539 			break;
   1540 	}
   1541 }
   1542 
   1543 void
   1544 xfrd_tcp_release(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
   1545 {
   1546 	int conn = zone->tcp_conn;
   1547 	struct xfrd_tcp_pipeline* tp = set->tcp_state[conn];
   1548 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s released tcp conn to %s",
   1549 		zone->apex_str, zone->master->ip_address_spec));
   1550 	assert(zone->tcp_conn != -1);
   1551 	assert(zone->tcp_waiting == 0);
   1552 	zone->tcp_conn = -1;
   1553 	zone->tcp_waiting = 0;
   1554 
   1555 	/* remove from tcp_send list */
   1556 	tcp_pipe_sendlist_remove(tp, zone);
   1557 	/* remove it from the ID list */
   1558 	if(xfrd_tcp_pipeline_lookup_id(tp, zone->query_id) != TCP_NULL_SKIP)
   1559 		tcp_pipe_id_remove(tp, zone, 1);
   1560 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: released tcp pipe now %d unused",
   1561 		tp->key.num_unused));
   1562 	/* if pipe was full, but no more, then see if waiting element is
   1563 	 * for the same master, and can fill the unused ID */
   1564 	if(tp->key.num_unused == 1 && set->tcp_waiting_first) {
   1565 #ifdef INET6
   1566 		struct sockaddr_storage to;
   1567 #else
   1568 		struct sockaddr_in to;
   1569 #endif
   1570 		socklen_t to_len = xfrd_acl_sockaddr_to(
   1571 			set->tcp_waiting_first->master, &to);
   1572 		if(to_len == tp->key.ip_len && memcmp(&to, &tp->key.ip, to_len) == 0) {
   1573 			/* use this connection for the waiting zone */
   1574 			zone = set->tcp_waiting_first;
   1575 			assert(zone->tcp_conn == -1);
   1576 			zone->tcp_conn = conn;
   1577 			tcp_zone_waiting_list_popfirst(set, zone);
   1578 			if(zone->zone_handler.ev_fd != -1)
   1579 				xfrd_udp_release(zone);
   1580 			xfrd_unset_timer(zone);
   1581 			pipeline_setup_new_zone(set, tp, zone);
   1582 			return;
   1583 		}
   1584 		/* waiting zone did not go to same server */
   1585 	}
   1586 
   1587 	/* if all unused, or only skipped leftover, close the pipeline */
   1588 	if(tp->key.num_unused >= tp->pipe_num || tp->key.num_skip >= tp->pipe_num - tp->key.num_unused)
   1589 		xfrd_tcp_pipe_release(set, tp, conn);
   1590 }
   1591 
   1592 void
   1593 xfrd_tcp_pipe_release(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
   1594 	int conn)
   1595 {
   1596 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: tcp pipe released"));
   1597 	/* one handler per tcp pipe */
   1598 	if(tp->handler_added)
   1599 		event_del(&tp->handler);
   1600 	tp->handler_added = 0;
   1601 
   1602 #ifdef HAVE_TLS_1_3
   1603 	/* close SSL */
   1604 	if (tp->ssl) {
   1605 		DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: Shutting down TLS"));
   1606 		SSL_shutdown(tp->ssl);
   1607 		SSL_free(tp->ssl);
   1608 		tp->ssl = NULL;
   1609 	}
   1610 #endif
   1611 
   1612 	/* fd in tcp_r and tcp_w is the same, close once */
   1613 	if(tp->tcp_r->fd != -1)
   1614 		close(tp->tcp_r->fd);
   1615 	tp->tcp_r->fd = -1;
   1616 	tp->tcp_w->fd = -1;
   1617 
   1618 	/* remove from pipetree */
   1619 	(void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
   1620 
   1621 	/* a waiting zone can use the free tcp slot (to another server) */
   1622 	/* if that zone fails to set-up or connect, we try to start the next
   1623 	 * waiting zone in the list */
   1624 	while(set->tcp_count == set->tcp_max && set->tcp_waiting_first) {
   1625 		/* pop first waiting process */
   1626 		xfrd_zone_type* zone = set->tcp_waiting_first;
   1627 		/* start it */
   1628 		assert(zone->tcp_conn == -1);
   1629 		zone->tcp_conn = conn;
   1630 		tcp_zone_waiting_list_popfirst(set, zone);
   1631 
   1632 		/* stop udp (if any) */
   1633 		if(zone->zone_handler.ev_fd != -1)
   1634 			xfrd_udp_release(zone);
   1635 		if(!xfrd_tcp_open(set, tp, zone)) {
   1636 			zone->tcp_conn = -1;
   1637 			xfrd_set_refresh_now(zone);
   1638 			/* try to start the next zone (if any) */
   1639 			continue;
   1640 		}
   1641 		/* re-init this tcppipe */
   1642 		/* ip and ip_len set by tcp_open */
   1643 		xfrd_tcp_pipeline_init(tp);
   1644 
   1645 		/* insert into tree */
   1646 		(void)rbtree_insert(set->pipetree, &tp->key.node);
   1647 		/* setup write */
   1648 		xfrd_unset_timer(zone);
   1649 		pipeline_setup_new_zone(set, tp, zone);
   1650 		/* started a task, no need for cleanups, so return */
   1651 		return;
   1652 	}
   1653 	/* no task to start, cleanup */
   1654 	assert(!set->tcp_waiting_first);
   1655 	set->tcp_count --;
   1656 	assert(set->tcp_count >= 0);
   1657 }
   1658 
   1659