Home | History | Annotate | Line # | Download | only in dist
xfrd-tcp.c revision 1.1.1.7
      1 /*
      2  * xfrd-tcp.c - XFR (transfer) Daemon TCP system source file. Manages tcp conn.
      3  *
      4  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
      5  *
      6  * See LICENSE for the license.
      7  *
      8  */
      9 
     10 #include "config.h"
     11 #include <assert.h>
     12 #include <errno.h>
     13 #include <fcntl.h>
     14 #include <unistd.h>
     15 #include <stdlib.h>
     16 #include <sys/uio.h>
     17 #include "nsd.h"
     18 #include "xfrd-tcp.h"
     19 #include "buffer.h"
     20 #include "packet.h"
     21 #include "dname.h"
     22 #include "options.h"
     23 #include "namedb.h"
     24 #include "xfrd.h"
     25 #include "xfrd-disk.h"
     26 #include "util.h"
     27 #ifdef HAVE_SSL
     28 #include <openssl/ssl.h>
     29 #include <openssl/err.h>
     30 #include <openssl/x509.h>
     31 #include <openssl/evp.h>
     32 #endif
     33 
     34 #ifdef HAVE_SSL
     35 void log_crypto_err(const char* str); /* in server.c */
     36 
     37 /* Extract certificate information for logging */
     38 void
     39 get_cert_info(SSL* ssl, region_type* region, char** cert_serial,
     40               char** key_id, char** cert_algorithm, char** tls_version)
     41 {
     42     X509* cert = NULL;
     43     ASN1_INTEGER* serial = NULL;
     44     EVP_PKEY* pkey = NULL;
     45     unsigned char key_fingerprint[EVP_MAX_MD_SIZE];
     46     unsigned int key_fingerprint_len = 0;
     47     const EVP_MD* md = EVP_sha256();
     48     const char* pkey_name = NULL;
     49     const char* version_name = NULL;
     50     int i;
     51     char temp_buffer[1024]; /* Temporary buffer for serial number */
     52 
     53     *cert_serial = NULL;
     54     *key_id = NULL;
     55     *cert_algorithm = NULL;
     56     *tls_version = NULL;
     57 
     58 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
     59     cert = SSL_get1_peer_certificate(ssl);
     60 #else
     61     cert = SSL_get_peer_certificate(ssl);
     62 #endif
     63 
     64     if (!cert) {
     65         return;
     66     }
     67 
     68     /* Get certificate serial number */
     69     serial = X509_get_serialNumber(cert);
     70     if (serial) {
     71         BIGNUM* bn = ASN1_INTEGER_to_BN(serial, NULL);
     72         if (bn) {
     73             char* hex_serial = BN_bn2hex(bn);
     74             if (hex_serial) {
     75                 snprintf(temp_buffer, sizeof(temp_buffer), "%s", hex_serial);
     76                 *cert_serial = region_strdup(region, temp_buffer);
     77                 OPENSSL_free(hex_serial);
     78             }
     79             BN_free(bn);
     80         }
     81     }
     82 
     83     /* Get public key identifier (SHA-256 fingerprint) */
     84     if (X509_pubkey_digest(cert, md, key_fingerprint, &key_fingerprint_len) == 1 && key_fingerprint_len >= 8) {
     85         size_t id_len = 8; /* Use first 8 bytes as key identifier */
     86         char key_id_buffer[17]; /* 8 bytes * 2 hex chars + null terminator */
     87         for (i = 0; i < (int)id_len; i++) {
     88             snprintf(key_id_buffer + (i * 2), sizeof(key_id_buffer) - (i * 2), "%02x", key_fingerprint[i]);
     89         }
     90         *key_id = region_strdup(region, key_id_buffer);
     91     }
     92 
     93     /* Get certificate algorithm using OpenSSL's native functions */
     94     pkey = X509_get_pubkey(cert);
     95     if (pkey) {
     96 #ifdef HAVE_EVP_PKEY_GET0_TYPE_NAME
     97         pkey_name = EVP_PKEY_get0_type_name(pkey);
     98 #else
     99         pkey_name = OBJ_nid2sn(EVP_PKEY_type(EVP_PKEY_id(pkey)));
    100 #endif
    101         if (pkey_name) {
    102             *cert_algorithm = region_strdup(region, pkey_name);
    103         } else {
    104             int pkey_type = EVP_PKEY_id(pkey);
    105             char algo_buffer[32];
    106             snprintf(algo_buffer, sizeof(algo_buffer), "Unknown(%d)", pkey_type);
    107             *cert_algorithm = region_strdup(region, algo_buffer);
    108         }
    109         EVP_PKEY_free(pkey);
    110     }
    111 
    112     /* Get TLS version using OpenSSL's native function */
    113     version_name = SSL_get_version(ssl);
    114     if (version_name) {
    115         *tls_version = region_strdup(region, version_name);
    116     } else {
    117         int version = SSL_version(ssl);
    118         char version_buffer[16];
    119         snprintf(version_buffer, sizeof(version_buffer), "Unknown(%d)", version);
    120         *tls_version = region_strdup(region, version_buffer);
    121     }
    122 
    123     X509_free(cert);
    124 }
    125 
    126 static SSL_CTX*
    127 create_ssl_context()
    128 {
    129 	SSL_CTX *ctx;
    130 	unsigned char protos[] = { 3, 'd', 'o', 't' };
    131 	ctx = SSL_CTX_new(TLS_client_method());
    132 	if (!ctx) {
    133 		log_msg(LOG_ERR, "xfrd tls: Unable to create SSL ctxt");
    134 	}
    135 	else if (SSL_CTX_set_default_verify_paths(ctx) != 1) {
    136 		SSL_CTX_free(ctx);
    137 		log_msg(LOG_ERR, "xfrd tls: Unable to set default SSL verify paths");
    138 		return NULL;
    139 	}
    140 	/* Only trust 1.3 as per the specification */
    141 	else if (!SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION)) {
    142 		SSL_CTX_free(ctx);
    143 		log_msg(LOG_ERR, "xfrd tls: Unable to set minimum TLS version 1.3");
    144 		return NULL;
    145 	}
    146 
    147 	if (SSL_CTX_set_alpn_protos(ctx, protos, sizeof(protos)) != 0) {
    148 		SSL_CTX_free(ctx);
    149 		log_msg(LOG_ERR, "xfrd tls: Unable to set ALPN protocols");
    150 		return NULL;
    151 	}
    152 	return ctx;
    153 }
    154 
    155 static int
    156 tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
    157 {
    158 	int err = X509_STORE_CTX_get_error(ctx);
    159 	int depth = X509_STORE_CTX_get_error_depth(ctx);
    160 
    161 	// report the specific cert error here - will need custom verify code if
    162 	// SPKI pins are supported
    163 	if (!preverify_ok)
    164 		log_msg(LOG_ERR, "xfrd tls: TLS verify failed - (%d) depth: %d error: %s",
    165 				err,
    166 				depth,
    167 				X509_verify_cert_error_string(err));
    168 	return preverify_ok;
    169 }
    170 
    171 static int
    172 setup_ssl(struct xfrd_tcp_pipeline* tp, struct xfrd_tcp_set* tcp_set,
    173 		  const char* auth_domain_name)
    174 {
    175 	if (!tcp_set->ssl_ctx) {
    176 		log_msg(LOG_ERR, "xfrd tls: No TLS CTX, cannot set up XFR-over-TLS");
    177 		return 0;
    178 	}
    179 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: setting up TLS for tls_auth domain name %s",
    180 						 auth_domain_name));
    181 	tp->ssl = SSL_new((SSL_CTX*)tcp_set->ssl_ctx);
    182 	if(!tp->ssl) {
    183 		log_msg(LOG_ERR, "xfrd tls: Unable to create TLS object");
    184 		return 0;
    185 	}
    186 	SSL_set_connect_state(tp->ssl);
    187 	(void)SSL_set_mode(tp->ssl, SSL_MODE_AUTO_RETRY);
    188 	if(!SSL_set_fd(tp->ssl, tp->tcp_w->fd)) {
    189 		log_msg(LOG_ERR, "xfrd tls: Unable to set TLS fd");
    190 		SSL_free(tp->ssl);
    191 		tp->ssl = NULL;
    192 		return 0;
    193 	}
    194 
    195 	SSL_set_verify(tp->ssl, SSL_VERIFY_PEER, tls_verify_callback);
    196 	if(!SSL_set1_host(tp->ssl, auth_domain_name)) {
    197 		log_msg(LOG_ERR, "xfrd tls: TLS setting of hostname %s failed",
    198 		auth_domain_name);
    199 		SSL_free(tp->ssl);
    200 		tp->ssl = NULL;
    201 		return 0;
    202 	}
    203 	return 1;
    204 }
    205 
    206 static int
    207 ssl_handshake(struct xfrd_tcp_pipeline* tp)
    208 {
    209 	int ret;
    210 
    211 	ERR_clear_error();
    212 	ret = SSL_do_handshake(tp->ssl);
    213 	if(ret == 1) {
    214 		DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: TLS handshake successful"));
    215 		tp->handshake_done = 1;
    216 		return 1;
    217 	}
    218 	tp->handshake_want = SSL_get_error(tp->ssl, ret);
    219 	if(tp->handshake_want == SSL_ERROR_WANT_READ
    220 	|| tp->handshake_want == SSL_ERROR_WANT_WRITE)
    221 		return 1;
    222 
    223 	return 0;
    224 }
    225 
    226 int password_cb(char *buf, int size, int ATTR_UNUSED(rwflag), void *u)
    227 {
    228 	strlcpy(buf, (char*)u, size);
    229 	return strlen(buf);
    230 }
    231 
    232 #endif
    233 
    234 /* sort tcppipe, first on IP address, for an IPaddresss, sort on num_unused */
    235 static int
    236 xfrd_pipe_cmp(const void* a, const void* b)
    237 {
    238 	const struct xfrd_tcp_pipeline* x = (struct xfrd_tcp_pipeline*)a;
    239 	const struct xfrd_tcp_pipeline* y = (struct xfrd_tcp_pipeline*)b;
    240 	int r;
    241 	if(x == y)
    242 		return 0;
    243 	if(y->key.ip_len != x->key.ip_len)
    244 		/* subtraction works because nonnegative and small numbers */
    245 		return (int)y->key.ip_len - (int)x->key.ip_len;
    246 	r = memcmp(&x->key.ip, &y->key.ip, x->key.ip_len);
    247 	if(r != 0)
    248 		return r;
    249 	/* sort that num_unused is sorted ascending, */
    250 	if(x->key.num_unused != y->key.num_unused) {
    251 		return (x->key.num_unused < y->key.num_unused) ? -1 : 1;
    252 	}
    253 	/* different pipelines are different still, even with same numunused*/
    254 	return (uintptr_t)x < (uintptr_t)y ? -1 : 1;
    255 }
    256 
    257 struct xfrd_tcp_set* xfrd_tcp_set_create(struct region* region, const char *tls_cert_bundle, int tcp_max, int tcp_pipeline)
    258 {
    259 	int i;
    260 	struct xfrd_tcp_set* tcp_set = region_alloc(region,
    261 		sizeof(struct xfrd_tcp_set));
    262 	memset(tcp_set, 0, sizeof(struct xfrd_tcp_set));
    263 	tcp_set->tcp_state = NULL;
    264 	tcp_set->tcp_max = tcp_max;
    265 	tcp_set->tcp_pipeline = tcp_pipeline;
    266 	tcp_set->tcp_count = 0;
    267 	tcp_set->tcp_waiting_first = 0;
    268 	tcp_set->tcp_waiting_last = 0;
    269 #ifdef HAVE_TLS_1_3
    270 	/* Set up SSL context */
    271 	tcp_set->ssl_ctx = create_ssl_context();
    272 	if (tcp_set->ssl_ctx == NULL)
    273 		log_msg(LOG_ERR, "xfrd: XFR-over-TLS not available");
    274 
    275 	else if (tls_cert_bundle && tls_cert_bundle[0] && SSL_CTX_load_verify_locations(
    276 				tcp_set->ssl_ctx, tls_cert_bundle, NULL) != 1) {
    277 		log_msg(LOG_ERR, "xfrd tls: Unable to set the certificate bundle file %s",
    278 				tls_cert_bundle);
    279 	}
    280 #else
    281 	(void)tls_cert_bundle;
    282 	log_msg(LOG_INFO, "xfrd: No TLS 1.3 support - XFR-over-TLS not available");
    283 #endif
    284 	tcp_set->tcp_state = region_alloc(region,
    285 		sizeof(*tcp_set->tcp_state)*tcp_set->tcp_max);
    286 	for(i=0; i<tcp_set->tcp_max; i++)
    287 		tcp_set->tcp_state[i] = xfrd_tcp_pipeline_create(region,
    288 			tcp_pipeline);
    289 	tcp_set->pipetree = rbtree_create(region, &xfrd_pipe_cmp);
    290 	return tcp_set;
    291 }
    292 
    293 static int pipeline_id_compare(const void* x, const void* y)
    294 {
    295 	struct xfrd_tcp_pipeline_id* a = (struct xfrd_tcp_pipeline_id*)x;
    296 	struct xfrd_tcp_pipeline_id* b = (struct xfrd_tcp_pipeline_id*)y;
    297 	if(a->id < b->id)
    298 		return -1;
    299 	if(a->id > b->id)
    300 		return 1;
    301 	return 0;
    302 }
    303 
    304 void pick_id_values(uint16_t* array, int num, int max)
    305 {
    306 	uint8_t inserted[65536];
    307 	int j, done;
    308 	if(num == 65536) {
    309 		/* all of them, loop and insert */
    310 		int i;
    311 		for(i=0; i<num; i++)
    312 			array[i] = (uint16_t)i;
    313 		return;
    314 	}
    315 	assert(max <= 65536);
    316 	/* This uses the Robert Floyd sampling algorithm */
    317 	/* keep track if values are already inserted, using the bitmap
    318 	 * in insert array */
    319 	memset(inserted, 0, sizeof(inserted[0])*max);
    320 	done=0;
    321 	for(j = max-num; j<max; j++) {
    322 		/* random generate creates from 0..arg-1 */
    323 		int t;
    324 		if(j+1 <= 1)
    325 			t = 0;
    326 		else	t = random_generate(j+1);
    327 		if(!inserted[t]) {
    328 			array[done++]=t;
    329 			inserted[t] = 1;
    330 		} else {
    331 			array[done++]=j;
    332 			inserted[j] = 1;
    333 		}
    334 	}
    335 }
    336 
    337 static void
    338 clear_pipeline_entry(struct xfrd_tcp_pipeline* tp, rbnode_type* node)
    339 {
    340 	struct xfrd_tcp_pipeline_id *n;
    341 	if(node == NULL || node == RBTREE_NULL)
    342 		return;
    343 	clear_pipeline_entry(tp, node->left);
    344 	node->left = NULL;
    345 	clear_pipeline_entry(tp, node->right);
    346 	node->right = NULL;
    347 	/* move the node into the free list */
    348 	n = (struct xfrd_tcp_pipeline_id*)node;
    349 	n->next_free = tp->pipe_id_free_list;
    350 	tp->pipe_id_free_list = n;
    351 }
    352 
    353 static void
    354 xfrd_tcp_pipeline_cleanup(struct xfrd_tcp_pipeline* tp)
    355 {
    356 	/* move entries into free list */
    357 	clear_pipeline_entry(tp, tp->zone_per_id->root);
    358 	/* clear the tree */
    359 	tp->zone_per_id->count = 0;
    360 	tp->zone_per_id->root = RBTREE_NULL;
    361 }
    362 
    363 static void
    364 xfrd_tcp_pipeline_init(struct xfrd_tcp_pipeline* tp)
    365 {
    366 	tp->key.node.key = tp;
    367 	tp->key.num_unused = tp->pipe_num;
    368 	tp->key.num_skip = 0;
    369 	tp->tcp_send_first = NULL;
    370 	tp->tcp_send_last = NULL;
    371 	xfrd_tcp_pipeline_cleanup(tp);
    372 	pick_id_values(tp->unused, tp->pipe_num, 65536);
    373 }
    374 
    375 struct xfrd_tcp_pipeline*
    376 xfrd_tcp_pipeline_create(region_type* region, int tcp_pipeline)
    377 {
    378 	int i;
    379 	struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)
    380 		region_alloc_zero(region, sizeof(*tp));
    381 	if(tcp_pipeline < 0)
    382 		tcp_pipeline = 0;
    383 	if(tcp_pipeline > 65536)
    384 		tcp_pipeline = 65536; /* max 16 bit ID numbers */
    385 	tp->pipe_num = tcp_pipeline;
    386 	tp->key.num_unused = tp->pipe_num;
    387 	tp->zone_per_id = rbtree_create(region, &pipeline_id_compare);
    388 	tp->pipe_id_free_list = NULL;
    389 	for(i=0; i<tp->pipe_num; i++) {
    390 		struct xfrd_tcp_pipeline_id* n = (struct xfrd_tcp_pipeline_id*)
    391 			region_alloc_zero(region, sizeof(*n));
    392 		n->next_free = tp->pipe_id_free_list;
    393 		tp->pipe_id_free_list = n;
    394 	}
    395 	tp->unused = (uint16_t*)region_alloc_zero(region,
    396 		sizeof(tp->unused[0])*tp->pipe_num);
    397 	tp->tcp_r = xfrd_tcp_create(region, QIOBUFSZ);
    398 	tp->tcp_w = xfrd_tcp_create(region, QIOBUFSZ);
    399 	xfrd_tcp_pipeline_init(tp);
    400 	return tp;
    401 }
    402 
    403 static struct xfrd_zone*
    404 xfrd_tcp_pipeline_lookup_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
    405 {
    406 	struct xfrd_tcp_pipeline_id key;
    407 	rbnode_type* n;
    408 	memset(&key, 0, sizeof(key));
    409 	key.node.key = &key;
    410 	key.id = id;
    411 	n = rbtree_search(tp->zone_per_id, &key);
    412 	if(n && n != RBTREE_NULL) {
    413 		return ((struct xfrd_tcp_pipeline_id*)n)->zone;
    414 	}
    415 	return NULL;
    416 }
    417 
    418 static void
    419 xfrd_tcp_pipeline_insert_id(struct xfrd_tcp_pipeline* tp, uint16_t id,
    420 	struct xfrd_zone* zone)
    421 {
    422 	struct xfrd_tcp_pipeline_id* n;
    423 	/* because there are tp->pipe_num preallocated entries, and we have
    424 	 * only tp->pipe_num id values, the list cannot be empty now. */
    425 	assert(tp->pipe_id_free_list != NULL);
    426 	/* pick up next free xfrd_tcp_pipeline_id node */
    427 	n = tp->pipe_id_free_list;
    428 	tp->pipe_id_free_list = n->next_free;
    429 	n->next_free = NULL;
    430 	memset(&n->node, 0, sizeof(n->node));
    431 	n->node.key = n;
    432 	n->id = id;
    433 	n->zone = zone;
    434 	rbtree_insert(tp->zone_per_id, &n->node);
    435 }
    436 
    437 static void
    438 xfrd_tcp_pipeline_remove_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
    439 {
    440 	struct xfrd_tcp_pipeline_id key;
    441 	rbnode_type* node;
    442 	memset(&key, 0, sizeof(key));
    443 	key.node.key = &key;
    444 	key.id = id;
    445 	node = rbtree_delete(tp->zone_per_id, &key);
    446 	if(node && node != RBTREE_NULL) {
    447 		struct xfrd_tcp_pipeline_id* n =
    448 			(struct xfrd_tcp_pipeline_id*)node;
    449 		n->next_free = tp->pipe_id_free_list;
    450 		tp->pipe_id_free_list = n;
    451 	}
    452 }
    453 
    454 static void
    455 xfrd_tcp_pipeline_skip_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
    456 {
    457 	struct xfrd_tcp_pipeline_id key;
    458 	rbnode_type* n;
    459 	memset(&key, 0, sizeof(key));
    460 	key.node.key = &key;
    461 	key.id = id;
    462 	n = rbtree_search(tp->zone_per_id, &key);
    463 	if(n && n != RBTREE_NULL) {
    464 		struct xfrd_tcp_pipeline_id* zid = (struct xfrd_tcp_pipeline_id*)n;
    465 		zid->zone = TCP_NULL_SKIP;
    466 	}
    467 }
    468 
    469 void
    470 xfrd_setup_packet(buffer_type* packet,
    471 	uint16_t type, uint16_t klass, const dname_type* dname, uint16_t qid,
    472 	int* apex_compress)
    473 {
    474 	/* Set up the header */
    475 	buffer_clear(packet);
    476 	ID_SET(packet, qid);
    477 	FLAGS_SET(packet, 0);
    478 	OPCODE_SET(packet, OPCODE_QUERY);
    479 	QDCOUNT_SET(packet, 1);
    480 	ANCOUNT_SET(packet, 0);
    481 	NSCOUNT_SET(packet, 0);
    482 	ARCOUNT_SET(packet, 0);
    483 	buffer_skip(packet, QHEADERSZ);
    484 
    485 	/* The question record. */
    486 	if(apex_compress)
    487 		*apex_compress = buffer_position(packet);
    488 	buffer_write(packet, dname_name(dname), dname->name_size);
    489 	buffer_write_u16(packet, type);
    490 	buffer_write_u16(packet, klass);
    491 }
    492 
    493 static socklen_t
    494 #ifdef INET6
    495 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
    496 	struct sockaddr_storage *sck)
    497 #else
    498 xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
    499 	struct sockaddr_in *sck, const char* fromto)
    500 #endif /* INET6 */
    501 {
    502 	/* setup address structure */
    503 #ifdef INET6
    504 	memset(sck, 0, sizeof(struct sockaddr_storage));
    505 #else
    506 	memset(sck, 0, sizeof(struct sockaddr_in));
    507 #endif
    508 	if(acl->is_ipv6) {
    509 #ifdef INET6
    510 		struct sockaddr_in6* sa = (struct sockaddr_in6*)sck;
    511 		sa->sin6_family = AF_INET6;
    512 		sa->sin6_port = htons(port);
    513 		sa->sin6_addr = acl->addr.addr6;
    514 		return sizeof(struct sockaddr_in6);
    515 #else
    516 		log_msg(LOG_ERR, "xfrd: IPv6 connection %s %s attempted but no \
    517 INET6.", fromto, acl->ip_address_spec);
    518 		return 0;
    519 #endif
    520 	} else {
    521 		struct sockaddr_in* sa = (struct sockaddr_in*)sck;
    522 		sa->sin_family = AF_INET;
    523 		sa->sin_port = htons(port);
    524 		sa->sin_addr = acl->addr.addr;
    525 		return sizeof(struct sockaddr_in);
    526 	}
    527 }
    528 
    529 socklen_t
    530 #ifdef INET6
    531 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_storage *to)
    532 #else
    533 xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_in *to)
    534 #endif /* INET6 */
    535 {
    536 #ifdef HAVE_TLS_1_3
    537 	unsigned int port = acl->port?acl->port:(acl->tls_auth_options?
    538 						(unsigned)atoi(TLS_PORT):(unsigned)atoi(TCP_PORT));
    539 #else
    540 	unsigned int port = acl->port?acl->port:(unsigned)atoi(TCP_PORT);
    541 #endif
    542 #ifdef INET6
    543 	return xfrd_acl_sockaddr(acl, port, to);
    544 #else
    545 	return xfrd_acl_sockaddr(acl, port, to, "to");
    546 #endif /* INET6 */
    547 }
    548 
    549 socklen_t
    550 #ifdef INET6
    551 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_storage *frm)
    552 #else
    553 xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_in *frm)
    554 #endif /* INET6 */
    555 {
    556 	unsigned int port = acl->port?acl->port:0;
    557 #ifdef INET6
    558 	return xfrd_acl_sockaddr(acl, port, frm);
    559 #else
    560 	return xfrd_acl_sockaddr(acl, port, frm, "from");
    561 #endif /* INET6 */
    562 }
    563 
    564 void
    565 xfrd_write_soa_buffer(struct buffer* packet,
    566 	const dname_type* apex, struct xfrd_soa* soa, int apex_compress)
    567 {
    568 	size_t rdlength_pos;
    569 	uint16_t rdlength;
    570 	if(apex_compress > 0 && apex_compress < (int)buffer_limit(packet) &&
    571 		apex->name_size > 1)
    572 		buffer_write_u16(packet, 0xc000 | apex_compress);
    573 	else	buffer_write(packet, dname_name(apex), apex->name_size);
    574 
    575 	/* already in network order */
    576 	buffer_write(packet, &soa->type, sizeof(soa->type));
    577 	buffer_write(packet, &soa->klass, sizeof(soa->klass));
    578 	buffer_write(packet, &soa->ttl, sizeof(soa->ttl));
    579 	rdlength_pos = buffer_position(packet);
    580 	buffer_skip(packet, sizeof(rdlength));
    581 
    582 	/* compress dnames to apex if possible */
    583 	if(apex_compress > 0 && apex_compress < (int)buffer_limit(packet) &&
    584 		apex->name_size > 1 && is_dname_subdomain_of_case(
    585 		soa->prim_ns+1, soa->prim_ns[0], dname_name(apex),
    586 		apex->name_size)) {
    587 		if(soa->prim_ns[0] > apex->name_size)
    588 			buffer_write(packet, soa->prim_ns+1, soa->prim_ns[0]-
    589 				apex->name_size);
    590 		buffer_write_u16(packet, 0xc000 | apex_compress);
    591 	} else {
    592 		buffer_write(packet, soa->prim_ns+1, soa->prim_ns[0]);
    593 	}
    594 	if(apex_compress > 0 && apex_compress < (int)buffer_limit(packet) &&
    595 		apex->name_size > 1 && is_dname_subdomain_of_case(soa->email+1,
    596 		soa->email[0], dname_name(apex), apex->name_size)) {
    597 		if(soa->email[0] > apex->name_size)
    598 			buffer_write(packet, soa->email+1, soa->email[0]-
    599 				apex->name_size);
    600 		buffer_write_u16(packet, 0xc000 | apex_compress);
    601 	} else {
    602 		buffer_write(packet, soa->email+1, soa->email[0]);
    603 	}
    604 
    605 	buffer_write(packet, &soa->serial, sizeof(uint32_t));
    606 	buffer_write(packet, &soa->refresh, sizeof(uint32_t));
    607 	buffer_write(packet, &soa->retry, sizeof(uint32_t));
    608 	buffer_write(packet, &soa->expire, sizeof(uint32_t));
    609 	buffer_write(packet, &soa->minimum, sizeof(uint32_t));
    610 
    611 	/* write length of RR */
    612 	rdlength = buffer_position(packet) - rdlength_pos - sizeof(rdlength);
    613 	buffer_write_u16_at(packet, rdlength_pos, rdlength);
    614 }
    615 
    616 struct xfrd_tcp*
    617 xfrd_tcp_create(region_type* region, size_t bufsize)
    618 {
    619 	struct xfrd_tcp* tcp_state = (struct xfrd_tcp*)region_alloc(
    620 		region, sizeof(struct xfrd_tcp));
    621 	memset(tcp_state, 0, sizeof(struct xfrd_tcp));
    622 	tcp_state->packet = buffer_create(region, bufsize);
    623 	tcp_state->fd = -1;
    624 
    625 	return tcp_state;
    626 }
    627 
    628 static struct xfrd_tcp_pipeline*
    629 pipeline_find(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
    630 {
    631 	rbnode_type* sme = NULL;
    632 	struct xfrd_tcp_pipeline* r;
    633 	/* smaller buf than a full pipeline with 64kb ID array, only need
    634 	 * the front part with the key info, this front part contains the
    635 	 * members that the compare function uses. */
    636 	struct xfrd_tcp_pipeline_key k, *key=&k;
    637 	key->node.key = key;
    638 	key->ip_len = xfrd_acl_sockaddr_to(zone->master, &key->ip);
    639 	key->num_unused = set->tcp_pipeline;
    640 	/* lookup existing tcp transfer to the master with highest unused */
    641 	if(rbtree_find_less_equal(set->pipetree, key, &sme)) {
    642 		/* exact match, strange, fully unused tcp cannot be open */
    643 		assert(0);
    644 	}
    645 	if(!sme)
    646 		return NULL;
    647 	r = (struct xfrd_tcp_pipeline*)sme->key;
    648 	/* <= key pointed at, is the master correct ? */
    649 	if(r->key.ip_len != key->ip_len)
    650 		return NULL;
    651 	if(memcmp(&r->key.ip, &key->ip, key->ip_len) != 0)
    652 		return NULL;
    653 	/* correct master, is there a slot free for this transfer? */
    654 	if(r->key.num_unused == 0)
    655 		return NULL;
    656 	return r;
    657 }
    658 
    659 /* remove zone from tcp waiting list */
    660 static void
    661 tcp_zone_waiting_list_popfirst(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
    662 {
    663 	assert(zone->tcp_waiting);
    664 	set->tcp_waiting_first = zone->tcp_waiting_next;
    665 	if(zone->tcp_waiting_next)
    666 		zone->tcp_waiting_next->tcp_waiting_prev = NULL;
    667 	else	set->tcp_waiting_last = 0;
    668 	zone->tcp_waiting_next = 0;
    669 	zone->tcp_waiting = 0;
    670 }
    671 
    672 /* remove zone from tcp pipe write-wait list */
    673 static void
    674 tcp_pipe_sendlist_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
    675 {
    676 	if(zone->in_tcp_send) {
    677 		if(zone->tcp_send_prev)
    678 			zone->tcp_send_prev->tcp_send_next=zone->tcp_send_next;
    679 		else	tp->tcp_send_first=zone->tcp_send_next;
    680 		if(zone->tcp_send_next)
    681 			zone->tcp_send_next->tcp_send_prev=zone->tcp_send_prev;
    682 		else	tp->tcp_send_last=zone->tcp_send_prev;
    683 		zone->in_tcp_send = 0;
    684 	}
    685 }
    686 
    687 /* remove first from write-wait list */
    688 static void
    689 tcp_pipe_sendlist_popfirst(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
    690 {
    691 	tp->tcp_send_first = zone->tcp_send_next;
    692 	if(tp->tcp_send_first)
    693 		tp->tcp_send_first->tcp_send_prev = NULL;
    694 	else	tp->tcp_send_last = NULL;
    695 	zone->in_tcp_send = 0;
    696 }
    697 
    698 /* remove zone from tcp pipe ID map */
    699 static void
    700 tcp_pipe_id_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone,
    701 	int alsotree)
    702 {
    703 	assert(tp->key.num_unused < tp->pipe_num && tp->key.num_unused >= 0);
    704 	if(alsotree)
    705 		xfrd_tcp_pipeline_remove_id(tp, zone->query_id);
    706 	tp->unused[tp->key.num_unused] = zone->query_id;
    707 	/* must remove and re-add for sort order in tree */
    708 	(void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
    709 	tp->key.num_unused++;
    710 	(void)rbtree_insert(xfrd->tcp_set->pipetree, &tp->key.node);
    711 }
    712 
    713 /* stop the tcp pipe (and all its zones need to retry) */
    714 static void
    715 xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline* tp)
    716 {
    717 	struct xfrd_tcp_pipeline_id* zid;
    718 	int conn = -1;
    719 	assert(tp->key.num_unused < tp->pipe_num); /* at least one 'in-use' */
    720 	assert(tp->pipe_num - tp->key.num_unused > tp->key.num_skip); /* at least one 'nonskip' */
    721 	/* need to retry for all the zones connected to it */
    722 	/* these could use different lists and go to a different nextmaster*/
    723 	RBTREE_FOR(zid, struct xfrd_tcp_pipeline_id*, tp->zone_per_id) {
    724 		xfrd_zone_type* zone = zid->zone;
    725 		if(zone && zone != TCP_NULL_SKIP) {
    726 			assert(zone->query_id == zid->id);
    727 			conn = zone->tcp_conn;
    728 			zone->tcp_conn = -1;
    729 			zone->tcp_waiting = 0;
    730 			tcp_pipe_sendlist_remove(tp, zone);
    731 			tcp_pipe_id_remove(tp, zone, 0);
    732 			xfrd_set_refresh_now(zone);
    733 		}
    734 	}
    735 	xfrd_tcp_pipeline_cleanup(tp);
    736 	assert(conn != -1);
    737 	/* now release the entire tcp pipe */
    738 	xfrd_tcp_pipe_release(xfrd->tcp_set, tp, conn);
    739 }
    740 
    741 static void
    742 tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline* tp)
    743 {
    744 	int fd = tp->handler.ev_fd;
    745 	struct timeval tv;
    746 	tv.tv_sec = xfrd->tcp_set->tcp_timeout;
    747 	tv.tv_usec = 0;
    748 	if(tp->handler_added)
    749 		event_del(&tp->handler);
    750 	memset(&tp->handler, 0, sizeof(tp->handler));
    751 	event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
    752 #ifdef HAVE_TLS_1_3
    753 		( tp->ssl
    754 		? ( tp->handshake_done ?  ( tp->tcp_send_first ? EV_WRITE : 0 )
    755 		  : tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0 )
    756 		: tp->tcp_send_first ? EV_WRITE : 0 ),
    757 #else
    758 		( tp->tcp_send_first ? EV_WRITE : 0 ),
    759 #endif
    760 		xfrd_handle_tcp_pipe, tp);
    761 	if(event_base_set(xfrd->event_base, &tp->handler) != 0)
    762 		log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
    763 	if(event_add(&tp->handler, &tv) != 0)
    764 		log_msg(LOG_ERR, "xfrd tcp: event_add failed");
    765 	tp->handler_added = 1;
    766 }
    767 
    768 /* handle event from fd of tcp pipe */
    769 void
    770 xfrd_handle_tcp_pipe(int ATTR_UNUSED(fd), short event, void* arg)
    771 {
    772 	struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)arg;
    773 	if((event & EV_WRITE)) {
    774 		tcp_pipe_reset_timeout(tp);
    775 		if(tp->tcp_send_first) {
    776 			DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp write, zone %s",
    777 				tp->tcp_send_first->apex_str));
    778 			xfrd_tcp_write(tp, tp->tcp_send_first);
    779 		}
    780 	}
    781 	if((event & EV_READ) && tp->handler_added) {
    782 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp read"));
    783 		tcp_pipe_reset_timeout(tp);
    784 		xfrd_tcp_read(tp);
    785 	}
    786 	if((event & EV_TIMEOUT) && tp->handler_added) {
    787 		/* tcp connection timed out */
    788 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp timeout"));
    789 		xfrd_tcp_pipe_stop(tp);
    790 	}
    791 }
    792 
    793 /* add a zone to the pipeline, it starts to want to write its query */
    794 static void
    795 pipeline_setup_new_zone(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
    796 	xfrd_zone_type* zone)
    797 {
    798 	/* assign the ID */
    799 	int idx;
    800 	assert(tp->key.num_unused > 0);
    801 	/* we pick a random ID, even though it is TCP anyway */
    802 	idx = random_generate(tp->key.num_unused);
    803 	zone->query_id = tp->unused[idx];
    804 	tp->unused[idx] = tp->unused[tp->key.num_unused-1];
    805 	xfrd_tcp_pipeline_insert_id(tp, zone->query_id, zone);
    806 	/* decrement unused counter, and fixup tree */
    807 	(void)rbtree_delete(set->pipetree, &tp->key.node);
    808 	tp->key.num_unused--;
    809 	(void)rbtree_insert(set->pipetree, &tp->key.node);
    810 
    811 	/* add to sendlist, at end */
    812 	zone->tcp_send_next = NULL;
    813 	zone->tcp_send_prev = tp->tcp_send_last;
    814 	zone->in_tcp_send = 1;
    815 	if(tp->tcp_send_last)
    816 		tp->tcp_send_last->tcp_send_next = zone;
    817 	else	tp->tcp_send_first = zone;
    818 	tp->tcp_send_last = zone;
    819 
    820 	/* is it first in line? */
    821 	if(tp->tcp_send_first == zone) {
    822 		xfrd_tcp_setup_write_packet(tp, zone);
    823 		/* add write to event handler */
    824 		tcp_pipe_reset_timeout(tp);
    825 	}
    826 }
    827 
    828 void
    829 xfrd_tcp_obtain(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
    830 {
    831 	struct xfrd_tcp_pipeline* tp;
    832 	assert(zone->tcp_conn == -1);
    833 	assert(zone->tcp_waiting == 0);
    834 
    835 	if(set->tcp_count < set->tcp_max) {
    836 		int i;
    837 		assert(!set->tcp_waiting_first);
    838 		set->tcp_count ++;
    839 		/* find a free tcp_buffer */
    840 		for(i=0; i<set->tcp_max; i++) {
    841 			if(set->tcp_state[i]->tcp_r->fd == -1) {
    842 				zone->tcp_conn = i;
    843 				break;
    844 			}
    845 		}
    846 		/** What if there is no free tcp_buffer? return; */
    847 		if (zone->tcp_conn < 0) {
    848 			return;
    849 		}
    850 
    851 		tp = set->tcp_state[zone->tcp_conn];
    852 		zone->tcp_waiting = 0;
    853 
    854 		/* stop udp use (if any) */
    855 		if(zone->zone_handler.ev_fd != -1)
    856 			xfrd_udp_release(zone);
    857 
    858 		if(!xfrd_tcp_open(set, tp, zone)) {
    859 			zone->tcp_conn = -1;
    860 			set->tcp_count --;
    861 			xfrd_set_refresh_now(zone);
    862 			return;
    863 		}
    864 		/* ip and ip_len set by tcp_open */
    865 		xfrd_tcp_pipeline_init(tp);
    866 
    867 		/* insert into tree */
    868 		(void)rbtree_insert(set->pipetree, &tp->key.node);
    869 		xfrd_deactivate_zone(zone);
    870 		xfrd_unset_timer(zone);
    871 		pipeline_setup_new_zone(set, tp, zone);
    872 		return;
    873 	}
    874 	/* check for a pipeline to the same master with unused ID */
    875 	if((tp = pipeline_find(set, zone))!= NULL) {
    876 		int i;
    877 		if(zone->zone_handler.ev_fd != -1)
    878 			xfrd_udp_release(zone);
    879 		for(i=0; i<set->tcp_max; i++) {
    880 			if(set->tcp_state[i] == tp)
    881 				zone->tcp_conn = i;
    882 		}
    883 		xfrd_deactivate_zone(zone);
    884 		xfrd_unset_timer(zone);
    885 		pipeline_setup_new_zone(set, tp, zone);
    886 		return;
    887 	}
    888 
    889 	/* wait, at end of line */
    890 	DEBUG(DEBUG_XFRD,2, (LOG_INFO, "xfrd: max number of tcp "
    891 		"connections (%d) reached.", set->tcp_max));
    892 	zone->tcp_waiting_next = 0;
    893 	zone->tcp_waiting_prev = set->tcp_waiting_last;
    894 	zone->tcp_waiting = 1;
    895 	if(!set->tcp_waiting_last) {
    896 		set->tcp_waiting_first = zone;
    897 		set->tcp_waiting_last = zone;
    898 	} else {
    899 		set->tcp_waiting_last->tcp_waiting_next = zone;
    900 		set->tcp_waiting_last = zone;
    901 	}
    902 	xfrd_deactivate_zone(zone);
    903 	xfrd_unset_timer(zone);
    904 }
    905 
    906 int
    907 xfrd_tcp_open(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
    908 	xfrd_zone_type* zone)
    909 {
    910 	int fd, family, conn;
    911 	struct timeval tv;
    912 	assert(zone->tcp_conn != -1);
    913 
    914 	/* if there is no next master, fallback to use the first one */
    915 	/* but there really should be a master set */
    916 	if(!zone->master) {
    917 		zone->master = zone->zone_options->pattern->request_xfr;
    918 		zone->master_num = 0;
    919 	}
    920 
    921 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s open tcp conn to %s",
    922 		zone->apex_str, zone->master->ip_address_spec));
    923 	tp->tcp_r->is_reading = 1;
    924 	tp->tcp_r->total_bytes = 0;
    925 	tp->tcp_r->msglen = 0;
    926 	buffer_clear(tp->tcp_r->packet);
    927 	tp->tcp_w->is_reading = 0;
    928 	tp->tcp_w->total_bytes = 0;
    929 	tp->tcp_w->msglen = 0;
    930 	tp->connection_established = 0;
    931 
    932 	if(zone->master->is_ipv6) {
    933 #ifdef INET6
    934 		family = PF_INET6;
    935 #else
    936 		xfrd_set_refresh_now(zone);
    937 		return 0;
    938 #endif
    939 	} else {
    940 		family = PF_INET;
    941 	}
    942 	fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
    943 	if(fd == -1) {
    944 		/* squelch 'Address family not supported by protocol' at low
    945 		 * verbosity levels */
    946 		if(errno != EAFNOSUPPORT || verbosity > 2)
    947 		    log_msg(LOG_ERR, "xfrd: %s cannot create tcp socket: %s",
    948 			zone->master->ip_address_spec, strerror(errno));
    949 		xfrd_set_refresh_now(zone);
    950 		return 0;
    951 	}
    952 	if(fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
    953 		log_msg(LOG_ERR, "xfrd: fcntl failed: %s", strerror(errno));
    954 		close(fd);
    955 		xfrd_set_refresh_now(zone);
    956 		return 0;
    957 	}
    958 
    959 	if(xfrd->nsd->outgoing_tcp_mss > 0) {
    960 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
    961 		if(setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
    962 			(void*)&xfrd->nsd->outgoing_tcp_mss,
    963 			sizeof(xfrd->nsd->outgoing_tcp_mss)) < 0) {
    964 			log_msg(LOG_ERR, "xfrd: setsockopt(TCP_MAXSEG)"
    965 					"failed: %s", strerror(errno));
    966 		}
    967 #else
    968 		log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported");
    969 #endif
    970 	}
    971 
    972 	tp->key.ip_len = xfrd_acl_sockaddr_to(zone->master, &tp->key.ip);
    973 
    974 	/* bind it */
    975 	if (!xfrd_bind_local_interface(fd, zone->zone_options->pattern->
    976 		outgoing_interface, zone->master, 1)) {
    977 		close(fd);
    978 		xfrd_set_refresh_now(zone);
    979 		return 0;
    980         }
    981 
    982 	conn = connect(fd, (struct sockaddr*)&tp->key.ip, tp->key.ip_len);
    983 	if (conn == -1 && errno != EINPROGRESS) {
    984 		log_msg(LOG_ERR, "xfrd: connect %s failed: %s",
    985 			zone->master->ip_address_spec, strerror(errno));
    986 		close(fd);
    987 		xfrd_set_refresh_now(zone);
    988 		return 0;
    989 	}
    990 	tp->tcp_r->fd = fd;
    991 	tp->tcp_w->fd = fd;
    992 
    993 	/* Check if an tls_auth name is configured which means we should try to
    994 	   establish an SSL connection */
    995 	if (zone->master->tls_auth_options &&
    996 		zone->master->tls_auth_options->auth_domain_name) {
    997 #ifdef HAVE_TLS_1_3
    998 		/* Load client certificate (if provided) */
    999 		if (zone->master->tls_auth_options->client_cert &&
   1000 		    zone->master->tls_auth_options->client_key) {
   1001 			if (SSL_CTX_use_certificate_chain_file(set->ssl_ctx,
   1002 			                                       zone->master->tls_auth_options->client_cert) != 1) {
   1003 				log_msg(LOG_ERR, "xfrd tls: Unable to load client certificate from file %s", zone->master->tls_auth_options->client_cert);
   1004 			}
   1005 
   1006 			if (zone->master->tls_auth_options->client_key_pw) {
   1007 				SSL_CTX_set_default_passwd_cb(set->ssl_ctx, password_cb);
   1008 				SSL_CTX_set_default_passwd_cb_userdata(set->ssl_ctx, zone->master->tls_auth_options->client_key_pw);
   1009 			}
   1010 
   1011 			if (SSL_CTX_use_PrivateKey_file(set->ssl_ctx, zone->master->tls_auth_options->client_key, SSL_FILETYPE_PEM) != 1) {
   1012 				log_msg(LOG_ERR, "xfrd tls: Unable to load private key from file %s", zone->master->tls_auth_options->client_key);
   1013 			}
   1014 
   1015 			if (!SSL_CTX_check_private_key(set->ssl_ctx)) {
   1016 				log_msg(LOG_ERR, "xfrd tls: Client private key from file %s does not match the certificate from file %s",
   1017 				                 zone->master->tls_auth_options->client_key,
   1018 				                 zone->master->tls_auth_options->client_cert);
   1019 			}
   1020 		/* If client certificate/private key loading has failed,
   1021 		   client will not try to authenticate to the server but the connection
   1022 		   will procceed and will be up to the server to allow or deny the
   1023 		   unauthenticated connection. A server that does not enforce authentication
   1024 		   (or a badly configured server?) might allow the transfer.
   1025 		   XXX: Maybe we should close the connection now to make it obvious that
   1026 		   there is something wrong from our side. Alternatively make it obvious
   1027 		   to the operator that we're not being authenticated to the server.
   1028 		*/
   1029 		}
   1030 
   1031 		if (!setup_ssl(tp, set, zone->master->tls_auth_options->auth_domain_name)) {
   1032 			log_msg(LOG_ERR, "xfrd: Cannot setup TLS on pipeline for %s to %s",
   1033 					zone->apex_str, zone->master->ip_address_spec);
   1034 			close(fd);
   1035 			xfrd_set_refresh_now(zone);
   1036 			return 0;
   1037 		}
   1038 
   1039 		tp->handshake_done = 0;
   1040 		if(!ssl_handshake(tp)) {
   1041 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
   1042 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
   1043 					"for %s to %s: %s", zone->apex_str,
   1044 					zone->master->ip_address_spec,
   1045 					strerror(errno));
   1046 
   1047 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
   1048 				char errmsg[1024];
   1049 				snprintf(errmsg, sizeof(errmsg), "xfrd: "
   1050 					"TLS handshake failed for %s to %s",
   1051 					zone->apex_str,
   1052 					zone->master->ip_address_spec);
   1053 				log_crypto_err(errmsg);
   1054 			} else {
   1055 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
   1056 					"for %s to %s with %d", zone->apex_str,
   1057 					zone->master->ip_address_spec,
   1058 					tp->handshake_want);
   1059 			}
   1060 			close(fd);
   1061 			xfrd_set_refresh_now(zone);
   1062 			return 0;
   1063 		}
   1064 #else
   1065 		log_msg(LOG_ERR, "xfrd: TLS 1.3 is not available, XFR-over-TLS is "
   1066 						 "not supported for %s to %s",
   1067 						  zone->apex_str, zone->master->ip_address_spec);
   1068 		close(fd);
   1069 		xfrd_set_refresh_now(zone);
   1070 		return 0;
   1071 #endif
   1072 	}
   1073 
   1074 	/* set the tcp pipe event */
   1075 	if(tp->handler_added)
   1076 		event_del(&tp->handler);
   1077 	memset(&tp->handler, 0, sizeof(tp->handler));
   1078 	event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
   1079 #ifdef HAVE_TLS_1_3
   1080 		( !tp->ssl
   1081 		|| tp->handshake_done
   1082 		|| tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0),
   1083 #else
   1084 		EV_WRITE,
   1085 #endif
   1086 	        xfrd_handle_tcp_pipe, tp);
   1087 	if(event_base_set(xfrd->event_base, &tp->handler) != 0)
   1088 		log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
   1089 	tv.tv_sec = set->tcp_timeout;
   1090 	tv.tv_usec = 0;
   1091 	if(event_add(&tp->handler, &tv) != 0)
   1092 		log_msg(LOG_ERR, "xfrd tcp: event_add failed");
   1093 	tp->handler_added = 1;
   1094 	return 1;
   1095 }
   1096 
   1097 void
   1098 xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
   1099 {
   1100 	struct xfrd_tcp* tcp = tp->tcp_w;
   1101 	assert(zone->tcp_conn != -1);
   1102 	assert(zone->tcp_waiting == 0);
   1103 	/* start AXFR or IXFR for the zone */
   1104 	if(zone->soa_disk_acquired == 0 || zone->master->use_axfr_only ||
   1105 		zone->master->ixfr_disabled ||
   1106 		/* if zone expired, after the first round, do not ask for
   1107 		 * IXFR any more, but full AXFR (of any serial number) */
   1108 		(zone->state == xfrd_zone_expired && zone->round_num != 0)) {
   1109 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request full zone transfer "
   1110 						"(AXFR) for %s to %s",
   1111 			zone->apex_str, zone->master->ip_address_spec));
   1112 		VERBOSITY(3, (LOG_INFO, "request full zone transfer "
   1113 						"(AXFR) for %s to %s",
   1114 			zone->apex_str, zone->master->ip_address_spec));
   1115 
   1116 		xfrd_setup_packet(tcp->packet, TYPE_AXFR, CLASS_IN, zone->apex,
   1117 			zone->query_id, NULL);
   1118 		xfrd_prepare_zone_xfr(zone, TYPE_AXFR);
   1119 	} else {
   1120 		int apex_compress = 0;
   1121 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request incremental zone "
   1122 						"transfer (IXFR) for %s to %s",
   1123 			zone->apex_str, zone->master->ip_address_spec));
   1124 		VERBOSITY(3, (LOG_INFO, "request incremental zone "
   1125 						"transfer (IXFR) for %s to %s",
   1126 			zone->apex_str, zone->master->ip_address_spec));
   1127 
   1128 		xfrd_setup_packet(tcp->packet, TYPE_IXFR, CLASS_IN, zone->apex,
   1129 			zone->query_id, &apex_compress);
   1130 		xfrd_prepare_zone_xfr(zone, TYPE_IXFR);
   1131 		NSCOUNT_SET(tcp->packet, 1);
   1132 		xfrd_write_soa_buffer(tcp->packet, zone->apex, &zone->soa_disk,
   1133 			apex_compress);
   1134 	}
   1135 	if(zone->master->key_options && zone->master->key_options->tsig_key) {
   1136 		xfrd_tsig_sign_request(
   1137 			tcp->packet, &zone->latest_xfr->tsig, zone->master);
   1138 	}
   1139 	buffer_flip(tcp->packet);
   1140 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "sent tcp query with ID %d", zone->query_id));
   1141 	tcp->msglen = buffer_limit(tcp->packet);
   1142 	tcp->total_bytes = 0;
   1143 }
   1144 
   1145 static void
   1146 tcp_conn_ready_for_reading(struct xfrd_tcp* tcp)
   1147 {
   1148 	tcp->total_bytes = 0;
   1149 	tcp->msglen = 0;
   1150 	buffer_clear(tcp->packet);
   1151 }
   1152 
   1153 #ifdef HAVE_TLS_1_3
   1154 static int
   1155 conn_write_ssl(struct xfrd_tcp* tcp, SSL* ssl)
   1156 {
   1157 	int request_length;
   1158 	ssize_t sent;
   1159 
   1160 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1161 		uint16_t sendlen = htons(tcp->msglen);
   1162 		// send
   1163 		request_length = sizeof(tcp->msglen) - tcp->total_bytes;
   1164 		ERR_clear_error();
   1165 		sent = SSL_write(ssl, (const char*)&sendlen + tcp->total_bytes,
   1166 						 request_length);
   1167 		switch(SSL_get_error(ssl,sent)) {
   1168 			case SSL_ERROR_NONE:
   1169 				break;
   1170 			default:
   1171 				log_msg(LOG_ERR, "xfrd: generic write problem with tls");
   1172 		}
   1173 
   1174 		if(sent == -1) {
   1175 			if(errno == EAGAIN || errno == EINTR) {
   1176 				/* write would block, try later */
   1177 				return 0;
   1178 			} else {
   1179 				return -1;
   1180 			}
   1181 		}
   1182 
   1183 		tcp->total_bytes += sent;
   1184 		if(sent > (ssize_t)sizeof(tcp->msglen))
   1185 			buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
   1186 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1187 			/* incomplete write, resume later */
   1188 			return 0;
   1189 		}
   1190 		assert(tcp->total_bytes >= sizeof(tcp->msglen));
   1191 	}
   1192 
   1193 	assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
   1194 
   1195 	request_length = buffer_remaining(tcp->packet);
   1196 	ERR_clear_error();
   1197 	sent = SSL_write(ssl, buffer_current(tcp->packet), request_length);
   1198 	switch(SSL_get_error(ssl,sent)) {
   1199 		case SSL_ERROR_NONE:
   1200 			break;
   1201 		default:
   1202 			log_msg(LOG_ERR, "xfrd: generic write problem with tls");
   1203 	}
   1204 	if(sent == -1) {
   1205 		if(errno == EAGAIN || errno == EINTR) {
   1206 			/* write would block, try later */
   1207 			return 0;
   1208 		} else {
   1209 			return -1;
   1210 		}
   1211 	}
   1212 
   1213 	buffer_skip(tcp->packet, sent);
   1214 	tcp->total_bytes += sent;
   1215 
   1216 	if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
   1217 		/* more to write when socket becomes writable again */
   1218 		return 0;
   1219 	}
   1220 
   1221 	assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
   1222 	return 1;
   1223 }
   1224 #endif
   1225 
   1226 int conn_write(struct xfrd_tcp* tcp)
   1227 {
   1228 	ssize_t sent;
   1229 
   1230 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1231 		uint16_t sendlen = htons(tcp->msglen);
   1232 #ifdef HAVE_WRITEV
   1233 		struct iovec iov[2];
   1234 		iov[0].iov_base = (uint8_t*)&sendlen + tcp->total_bytes;
   1235 		iov[0].iov_len = sizeof(sendlen) - tcp->total_bytes;
   1236 		iov[1].iov_base = buffer_begin(tcp->packet);
   1237 		iov[1].iov_len = buffer_limit(tcp->packet);
   1238 		sent = writev(tcp->fd, iov, 2);
   1239 #else /* HAVE_WRITEV */
   1240 		sent = write(tcp->fd,
   1241 			(const char*)&sendlen + tcp->total_bytes,
   1242 			sizeof(tcp->msglen) - tcp->total_bytes);
   1243 #endif /* HAVE_WRITEV */
   1244 
   1245 		if(sent == -1) {
   1246 			if(errno == EAGAIN || errno == EINTR) {
   1247 				/* write would block, try later */
   1248 				return 0;
   1249 			} else {
   1250 				return -1;
   1251 			}
   1252 		}
   1253 
   1254 		tcp->total_bytes += sent;
   1255 		if(sent > (ssize_t)sizeof(tcp->msglen))
   1256 			buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
   1257 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1258 			/* incomplete write, resume later */
   1259 			return 0;
   1260 		}
   1261 #ifdef HAVE_WRITEV
   1262 		if(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen)) {
   1263 			/* packet done */
   1264 			return 1;
   1265 		}
   1266 #endif
   1267 		assert(tcp->total_bytes >= sizeof(tcp->msglen));
   1268 	}
   1269 
   1270 	assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
   1271 
   1272 	sent = write(tcp->fd,
   1273 		buffer_current(tcp->packet),
   1274 		buffer_remaining(tcp->packet));
   1275 	if(sent == -1) {
   1276 		if(errno == EAGAIN || errno == EINTR) {
   1277 			/* write would block, try later */
   1278 			return 0;
   1279 		} else {
   1280 			return -1;
   1281 		}
   1282 	}
   1283 
   1284 	buffer_skip(tcp->packet, sent);
   1285 	tcp->total_bytes += sent;
   1286 
   1287 	if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
   1288 		/* more to write when socket becomes writable again */
   1289 		return 0;
   1290 	}
   1291 
   1292 	assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
   1293 	return 1;
   1294 }
   1295 
   1296 void
   1297 xfrd_tcp_write(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
   1298 {
   1299 	int ret;
   1300 	struct xfrd_tcp* tcp = tp->tcp_w;
   1301 	assert(zone->tcp_conn != -1);
   1302 	assert(zone == tp->tcp_send_first);
   1303 	/* see if for non-established connection, there is a connect error */
   1304 	if(!tp->connection_established) {
   1305 		/* check for pending error from nonblocking connect */
   1306 		/* from Stevens, unix network programming, vol1, 3rd ed, p450 */
   1307 		int error = 0;
   1308 		socklen_t len = sizeof(error);
   1309 		if(getsockopt(tcp->fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0){
   1310 			error = errno; /* on solaris errno is error */
   1311 		}
   1312 		if(error == EINPROGRESS || error == EWOULDBLOCK)
   1313 			return; /* try again later */
   1314 		if(error != 0) {
   1315 			log_msg(LOG_ERR, "%s: Could not tcp connect to %s: %s",
   1316 				zone->apex_str, zone->master->ip_address_spec,
   1317 				strerror(error));
   1318 			xfrd_tcp_pipe_stop(tp);
   1319 			return;
   1320 		}
   1321 	}
   1322 #ifdef HAVE_TLS_1_3
   1323 	if (tp->ssl) {
   1324 		if(tp->handshake_done) {
   1325 			ret = conn_write_ssl(tcp, tp->ssl);
   1326 
   1327 		} else if(ssl_handshake(tp)) {
   1328 			tcp_pipe_reset_timeout(tp); /* reschedule */
   1329 			return;
   1330 
   1331 		} else {
   1332 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
   1333 				log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
   1334 					strerror(errno));
   1335 
   1336 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
   1337 				log_crypto_err("xfrd: TLS handshake failed");
   1338 			} else {
   1339 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
   1340 					"with value: %d", tp->handshake_want);
   1341 			}
   1342 			xfrd_tcp_pipe_stop(tp);
   1343 			return;
   1344 		}
   1345 	} else
   1346 #endif
   1347 		ret = conn_write(tcp);
   1348 	if(ret == -1) {
   1349 		log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
   1350 		xfrd_tcp_pipe_stop(tp);
   1351 		return;
   1352 	}
   1353 	if(tcp->total_bytes != 0 && !tp->connection_established)
   1354 		tp->connection_established = 1;
   1355 	if(ret == 0) {
   1356 		return; /* write again later */
   1357 	}
   1358 	/* done writing this message */
   1359 
   1360 	/* remove first zone from sendlist */
   1361 	tcp_pipe_sendlist_popfirst(tp, zone);
   1362 
   1363 	/* see if other zone wants to write; init; let it write (now) */
   1364 	/* and use a loop, because 64k stack calls is a too much */
   1365 	while(tp->tcp_send_first) {
   1366 		/* setup to write for this zone */
   1367 		xfrd_tcp_setup_write_packet(tp, tp->tcp_send_first);
   1368 		/* attempt to write for this zone (if success, continue loop)*/
   1369 #ifdef HAVE_TLS_1_3
   1370 		if (tp->ssl)
   1371 			ret = conn_write_ssl(tcp, tp->ssl);
   1372 		else
   1373 #endif
   1374 			ret = conn_write(tcp);
   1375 		if(ret == -1) {
   1376 			log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
   1377 			xfrd_tcp_pipe_stop(tp);
   1378 			return;
   1379 		}
   1380 		if(ret == 0)
   1381 			return; /* write again later */
   1382 		tcp_pipe_sendlist_popfirst(tp, tp->tcp_send_first);
   1383 	}
   1384 
   1385 	/* if sendlist empty, remove WRITE from event */
   1386 
   1387 	/* listen to READ, and not WRITE events */
   1388 	assert(tp->tcp_send_first == NULL);
   1389 	tcp_pipe_reset_timeout(tp);
   1390 }
   1391 
   1392 #ifdef HAVE_TLS_1_3
   1393 static int
   1394 conn_read_ssl(struct xfrd_tcp* tcp, SSL* ssl)
   1395 {
   1396 	ssize_t received;
   1397 	/* receive leading packet length bytes */
   1398 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1399 		ERR_clear_error();
   1400 		received = SSL_read(ssl,
   1401 						(char*) &tcp->msglen + tcp->total_bytes,
   1402 						sizeof(tcp->msglen) - tcp->total_bytes);
   1403 		if (received <= 0) {
   1404 			int err = SSL_get_error(ssl, received);
   1405 			if(err == SSL_ERROR_WANT_READ && errno == EAGAIN) {
   1406 				return 0;
   1407 			}
   1408 			if(err == SSL_ERROR_ZERO_RETURN) {
   1409 				/* EOF */
   1410 				return -1;
   1411 			}
   1412 			if(err == SSL_ERROR_SYSCALL)
   1413 				log_msg(LOG_ERR, "ssl_read returned error SSL_ERROR_SYSCALL with received %zd: %s", received, strerror(errno));
   1414 			else
   1415 				log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
   1416 		}
   1417 		if(received == -1) {
   1418 			if(errno == EAGAIN || errno == EINTR) {
   1419 				/* read would block, try later */
   1420 				return 0;
   1421 			} else {
   1422 #ifdef ECONNRESET
   1423 				if (verbosity >= 2 || errno != ECONNRESET)
   1424 #endif /* ECONNRESET */
   1425 					log_msg(LOG_ERR, "tls read sz: %s", strerror(errno));
   1426 				return -1;
   1427 			}
   1428 		} else if(received == 0) {
   1429 			/* EOF */
   1430 			return -1;
   1431 		}
   1432 		tcp->total_bytes += received;
   1433 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1434 			/* not complete yet, try later */
   1435 			return 0;
   1436 		}
   1437 
   1438 		assert(tcp->total_bytes == sizeof(tcp->msglen));
   1439 		tcp->msglen = ntohs(tcp->msglen);
   1440 
   1441 		if(tcp->msglen == 0) {
   1442 			buffer_set_limit(tcp->packet, tcp->msglen);
   1443 			return 1;
   1444 		}
   1445 		if(tcp->msglen > buffer_capacity(tcp->packet)) {
   1446 			log_msg(LOG_ERR, "buffer too small, dropping connection");
   1447 			return 0;
   1448 		}
   1449 		buffer_set_limit(tcp->packet, tcp->msglen);
   1450 	}
   1451 
   1452 	assert(buffer_remaining(tcp->packet) > 0);
   1453 	ERR_clear_error();
   1454 
   1455 	received = SSL_read(ssl, buffer_current(tcp->packet),
   1456 					buffer_remaining(tcp->packet));
   1457 
   1458 	if (received <= 0) {
   1459 		int err = SSL_get_error(ssl, received);
   1460 		if(err == SSL_ERROR_ZERO_RETURN) {
   1461 			/* EOF */
   1462 			return -1;
   1463 		}
   1464 		if(err == SSL_ERROR_SYSCALL)
   1465 			log_msg(LOG_ERR, "ssl_read returned error SSL_ERROR_SYSCALL with received %zd: %s", received, strerror(errno));
   1466 		else
   1467 			log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
   1468 	}
   1469 	if(received == -1) {
   1470 		if(errno == EAGAIN || errno == EINTR) {
   1471 			/* read would block, try later */
   1472 			return 0;
   1473 		} else {
   1474 #ifdef ECONNRESET
   1475 			if (verbosity >= 2 || errno != ECONNRESET)
   1476 #endif /* ECONNRESET */
   1477 				log_msg(LOG_ERR, "tcp read %s", strerror(errno));
   1478 			return -1;
   1479 		}
   1480 	} else if(received == 0) {
   1481 		/* EOF */
   1482 		return -1;
   1483 	}
   1484 
   1485 	tcp->total_bytes += received;
   1486 	buffer_skip(tcp->packet, received);
   1487 
   1488 	if(buffer_remaining(tcp->packet) > 0) {
   1489 		/* not complete yet, wait for more */
   1490 		return 0;
   1491 	}
   1492 
   1493 	/* completed */
   1494 	assert(buffer_position(tcp->packet) == tcp->msglen);
   1495 	return 1;
   1496 }
   1497 #endif
   1498 
   1499 int
   1500 conn_read(struct xfrd_tcp* tcp)
   1501 {
   1502 	ssize_t received;
   1503 	/* receive leading packet length bytes */
   1504 	if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1505 		received = read(tcp->fd,
   1506 			(char*) &tcp->msglen + tcp->total_bytes,
   1507 			sizeof(tcp->msglen) - tcp->total_bytes);
   1508 		if(received == -1) {
   1509 			if(errno == EAGAIN || errno == EINTR) {
   1510 				/* read would block, try later */
   1511 				return 0;
   1512 			} else {
   1513 #ifdef ECONNRESET
   1514 				if (verbosity >= 2 || errno != ECONNRESET)
   1515 #endif /* ECONNRESET */
   1516 				log_msg(LOG_ERR, "tcp read sz: %s", strerror(errno));
   1517 				return -1;
   1518 			}
   1519 		} else if(received == 0) {
   1520 			/* EOF */
   1521 			return -1;
   1522 		}
   1523 		tcp->total_bytes += received;
   1524 		if(tcp->total_bytes < sizeof(tcp->msglen)) {
   1525 			/* not complete yet, try later */
   1526 			return 0;
   1527 		}
   1528 
   1529 		assert(tcp->total_bytes == sizeof(tcp->msglen));
   1530 		tcp->msglen = ntohs(tcp->msglen);
   1531 
   1532 		if(tcp->msglen == 0) {
   1533 			buffer_set_limit(tcp->packet, tcp->msglen);
   1534 			return 1;
   1535 		}
   1536 		if(tcp->msglen > buffer_capacity(tcp->packet)) {
   1537 			log_msg(LOG_ERR, "buffer too small, dropping connection");
   1538 			return 0;
   1539 		}
   1540 		buffer_set_limit(tcp->packet, tcp->msglen);
   1541 	}
   1542 
   1543 	assert(buffer_remaining(tcp->packet) > 0);
   1544 
   1545 	received = read(tcp->fd, buffer_current(tcp->packet),
   1546 		buffer_remaining(tcp->packet));
   1547 	if(received == -1) {
   1548 		if(errno == EAGAIN || errno == EINTR) {
   1549 			/* read would block, try later */
   1550 			return 0;
   1551 		} else {
   1552 #ifdef ECONNRESET
   1553 			if (verbosity >= 2 || errno != ECONNRESET)
   1554 #endif /* ECONNRESET */
   1555 			log_msg(LOG_ERR, "tcp read %s", strerror(errno));
   1556 			return -1;
   1557 		}
   1558 	} else if(received == 0) {
   1559 		/* EOF */
   1560 		return -1;
   1561 	}
   1562 
   1563 	tcp->total_bytes += received;
   1564 	buffer_skip(tcp->packet, received);
   1565 
   1566 	if(buffer_remaining(tcp->packet) > 0) {
   1567 		/* not complete yet, wait for more */
   1568 		return 0;
   1569 	}
   1570 
   1571 	/* completed */
   1572 	assert(buffer_position(tcp->packet) == tcp->msglen);
   1573 	return 1;
   1574 }
   1575 
   1576 void
   1577 xfrd_tcp_read(struct xfrd_tcp_pipeline* tp)
   1578 {
   1579 	xfrd_zone_type* zone;
   1580 	struct xfrd_tcp* tcp = tp->tcp_r;
   1581 	int ret;
   1582 	enum xfrd_packet_result pkt_result;
   1583 #ifdef HAVE_TLS_1_3
   1584 	if(tp->ssl) {
   1585 		if(tp->handshake_done) {
   1586 			ret = conn_read_ssl(tcp, tp->ssl);
   1587 
   1588 		} else if(ssl_handshake(tp)) {
   1589 			tcp_pipe_reset_timeout(tp); /* reschedule */
   1590 			return;
   1591 
   1592 		} else {
   1593 			if(tp->handshake_want == SSL_ERROR_SYSCALL) {
   1594 				log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
   1595 					strerror(errno));
   1596 
   1597 			} else if(tp->handshake_want == SSL_ERROR_SSL) {
   1598 				log_crypto_err("xfrd: TLS handshake failed");
   1599 			} else {
   1600 				log_msg(LOG_ERR, "xfrd: TLS handshake failed "
   1601 					"with value: %d", tp->handshake_want);
   1602 			}
   1603 			xfrd_tcp_pipe_stop(tp);
   1604 			return;
   1605 		}
   1606 	} else
   1607 #endif
   1608 		ret = conn_read(tcp);
   1609 	if(ret == -1) {
   1610 		if(errno != 0)
   1611 			log_msg(LOG_ERR, "xfrd: failed reading tcp %s", strerror(errno));
   1612 		else
   1613 			log_msg(LOG_ERR, "xfrd: failed reading tcp: closed");
   1614 		xfrd_tcp_pipe_stop(tp);
   1615 		return;
   1616 	}
   1617 	if(ret == 0)
   1618 		return;
   1619 	/* completed msg */
   1620 	buffer_flip(tcp->packet);
   1621 	/* see which ID number it is, if skip, handle skip, NULL: warn */
   1622 	if(tcp->msglen < QHEADERSZ) {
   1623 		/* too short for DNS header, skip it */
   1624 		DEBUG(DEBUG_XFRD,1, (LOG_INFO,
   1625 			"xfrd: tcp skip response that is too short"));
   1626 		tcp_conn_ready_for_reading(tcp);
   1627 		return;
   1628 	}
   1629 	zone = xfrd_tcp_pipeline_lookup_id(tp, ID(tcp->packet));
   1630 	if(!zone || zone == TCP_NULL_SKIP) {
   1631 		/* no zone for this id? skip it */
   1632 		DEBUG(DEBUG_XFRD,1, (LOG_INFO,
   1633 			"xfrd: tcp skip response with %s ID",
   1634 			zone?"set-to-skip":"unknown"));
   1635 		tcp_conn_ready_for_reading(tcp);
   1636 		return;
   1637 	}
   1638 	assert(zone->tcp_conn != -1);
   1639 
   1640 	/* handle message for zone */
   1641 	pkt_result = xfrd_handle_received_xfr_packet(zone, tcp->packet);
   1642 	/* setup for reading the next packet on this connection */
   1643 	tcp_conn_ready_for_reading(tcp);
   1644 	switch(pkt_result) {
   1645 		case xfrd_packet_more:
   1646 			/* wait for next packet */
   1647 			break;
   1648 		case xfrd_packet_newlease:
   1649 			/* set to skip if more packets with this ID */
   1650 			xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
   1651 			tp->key.num_skip++;
   1652 			/* fall through to remove zone from tp */
   1653 			/* fallthrough */
   1654 		case xfrd_packet_transfer:
   1655 			if(zone->zone_options->pattern->multi_primary_check) {
   1656 				xfrd_tcp_release(xfrd->tcp_set, zone);
   1657 				xfrd_make_request(zone);
   1658 				break;
   1659 			}
   1660 			xfrd_tcp_release(xfrd->tcp_set, zone);
   1661 			assert(zone->round_num == -1);
   1662 			break;
   1663 		case xfrd_packet_notimpl:
   1664 			xfrd_disable_ixfr(zone);
   1665 			xfrd_tcp_release(xfrd->tcp_set, zone);
   1666 			/* query next server */
   1667 			xfrd_make_request(zone);
   1668 			break;
   1669 		case xfrd_packet_bad:
   1670 		case xfrd_packet_tcp:
   1671 		default:
   1672 			/* set to skip if more packets with this ID */
   1673 			xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
   1674 			tp->key.num_skip++;
   1675 			xfrd_tcp_release(xfrd->tcp_set, zone);
   1676 			/* query next server */
   1677 			xfrd_make_request(zone);
   1678 			break;
   1679 	}
   1680 }
   1681 
   1682 void
   1683 xfrd_tcp_release(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
   1684 {
   1685 	int conn = zone->tcp_conn;
   1686 	struct xfrd_tcp_pipeline* tp = set->tcp_state[conn];
   1687 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s released tcp conn to %s",
   1688 		zone->apex_str, zone->master->ip_address_spec));
   1689 	assert(zone->tcp_conn != -1);
   1690 	assert(zone->tcp_waiting == 0);
   1691 	zone->tcp_conn = -1;
   1692 	zone->tcp_waiting = 0;
   1693 
   1694 	/* remove from tcp_send list */
   1695 	tcp_pipe_sendlist_remove(tp, zone);
   1696 	/* remove it from the ID list */
   1697 	if(xfrd_tcp_pipeline_lookup_id(tp, zone->query_id) != TCP_NULL_SKIP)
   1698 		tcp_pipe_id_remove(tp, zone, 1);
   1699 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: released tcp pipe now %d unused",
   1700 		tp->key.num_unused));
   1701 	/* if pipe was full, but no more, then see if waiting element is
   1702 	 * for the same master, and can fill the unused ID */
   1703 	if(tp->key.num_unused == 1 && set->tcp_waiting_first) {
   1704 #ifdef INET6
   1705 		struct sockaddr_storage to;
   1706 #else
   1707 		struct sockaddr_in to;
   1708 #endif
   1709 		socklen_t to_len = xfrd_acl_sockaddr_to(
   1710 			set->tcp_waiting_first->master, &to);
   1711 		if(to_len == tp->key.ip_len && memcmp(&to, &tp->key.ip, to_len) == 0) {
   1712 			/* use this connection for the waiting zone */
   1713 			zone = set->tcp_waiting_first;
   1714 			assert(zone->tcp_conn == -1);
   1715 			zone->tcp_conn = conn;
   1716 			tcp_zone_waiting_list_popfirst(set, zone);
   1717 			if(zone->zone_handler.ev_fd != -1)
   1718 				xfrd_udp_release(zone);
   1719 			xfrd_unset_timer(zone);
   1720 			pipeline_setup_new_zone(set, tp, zone);
   1721 			return;
   1722 		}
   1723 		/* waiting zone did not go to same server */
   1724 	}
   1725 
   1726 	/* if all unused, or only skipped leftover, close the pipeline */
   1727 	if(tp->key.num_unused >= tp->pipe_num || tp->key.num_skip >= tp->pipe_num - tp->key.num_unused)
   1728 		xfrd_tcp_pipe_release(set, tp, conn);
   1729 }
   1730 
   1731 void
   1732 xfrd_tcp_pipe_release(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
   1733 	int conn)
   1734 {
   1735 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: tcp pipe released"));
   1736 	/* one handler per tcp pipe */
   1737 	if(tp->handler_added)
   1738 		event_del(&tp->handler);
   1739 	tp->handler_added = 0;
   1740 
   1741 #ifdef HAVE_TLS_1_3
   1742 	/* close SSL */
   1743 	if (tp->ssl) {
   1744 		DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: Shutting down TLS"));
   1745 		SSL_shutdown(tp->ssl);
   1746 		SSL_free(tp->ssl);
   1747 		tp->ssl = NULL;
   1748 	}
   1749 #endif
   1750 
   1751 	/* fd in tcp_r and tcp_w is the same, close once */
   1752 	if(tp->tcp_r->fd != -1)
   1753 		close(tp->tcp_r->fd);
   1754 	tp->tcp_r->fd = -1;
   1755 	tp->tcp_w->fd = -1;
   1756 
   1757 	/* remove from pipetree */
   1758 	(void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
   1759 
   1760 	/* a waiting zone can use the free tcp slot (to another server) */
   1761 	/* if that zone fails to set-up or connect, we try to start the next
   1762 	 * waiting zone in the list */
   1763 	while(set->tcp_count == set->tcp_max && set->tcp_waiting_first) {
   1764 		/* pop first waiting process */
   1765 		xfrd_zone_type* zone = set->tcp_waiting_first;
   1766 		/* start it */
   1767 		assert(zone->tcp_conn == -1);
   1768 		zone->tcp_conn = conn;
   1769 		tcp_zone_waiting_list_popfirst(set, zone);
   1770 
   1771 		/* stop udp (if any) */
   1772 		if(zone->zone_handler.ev_fd != -1)
   1773 			xfrd_udp_release(zone);
   1774 		if(!xfrd_tcp_open(set, tp, zone)) {
   1775 			zone->tcp_conn = -1;
   1776 			xfrd_set_refresh_now(zone);
   1777 			/* try to start the next zone (if any) */
   1778 			continue;
   1779 		}
   1780 		/* re-init this tcppipe */
   1781 		/* ip and ip_len set by tcp_open */
   1782 		xfrd_tcp_pipeline_init(tp);
   1783 
   1784 		/* insert into tree */
   1785 		(void)rbtree_insert(set->pipetree, &tp->key.node);
   1786 		/* setup write */
   1787 		xfrd_unset_timer(zone);
   1788 		pipeline_setup_new_zone(set, tp, zone);
   1789 		/* started a task, no need for cleanups, so return */
   1790 		return;
   1791 	}
   1792 	/* no task to start, cleanup */
   1793 	assert(!set->tcp_waiting_first);
   1794 	set->tcp_count --;
   1795 	assert(set->tcp_count >= 0);
   1796 }
   1797 
   1798