1 /* srp-replication.c 2 * 3 * Copyright (c) 2020-2023 Apple Inc. All rights reserved. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 * This file contains an implementation of SRP Replication, which allows two or more 18 * SRP servers to cooperatively maintain an SRP registration dataset. 19 */ 20 21 #include <stdlib.h> 22 #include <string.h> 23 #include <stdio.h> 24 #include <unistd.h> 25 #include <pwd.h> 26 #include <errno.h> 27 #include <sys/socket.h> 28 #include <netinet/in.h> 29 #include <arpa/inet.h> 30 #include <fcntl.h> 31 #include <time.h> 32 #include <dns_sd.h> 33 #include <net/if.h> 34 #include <inttypes.h> 35 #include <sys/resource.h> 36 #include <math.h> 37 #include <CoreUtils/CoreUtils.h> 38 39 #include "srp.h" 40 #include "dns-msg.h" 41 #include "srp-crypto.h" 42 #include "ioloop.h" 43 #include "srp-gw.h" 44 #include "srp-proxy.h" 45 #include "srp-mdns-proxy.h" 46 #include "dnssd-proxy.h" 47 #include "config-parse.h" 48 #include "cti-services.h" 49 #include "route.h" 50 #define DNSMessageHeader dns_wire_t 51 #include "dso.h" 52 #include "dso-utils.h" 53 54 #if SRP_FEATURE_REPLICATION 55 #include "srp-replication.h" 56 #ifdef SRP_TEST_SERVER 57 #include "test-packet.h" 58 #include "test-srpl.h" 59 #endif 60 61 62 #define SRPL_CONNECTION_IS_CONNECTED(connection) ((connection)->state > srpl_state_connecting) 63 64 #define srpl_event_content_type_set(event, content_type) \ 65 srpl_event_content_type_set_(event, content_type, __FILE__, __LINE__) 66 static bool srpl_event_content_type_set_(srpl_event_t *event, 67 srpl_event_content_type_t content_type, const char *file, int line); 68 static srpl_state_t srpl_connection_drop_state_delay(srpl_instance_t *instance, 69 srpl_connection_t *srpl_connection, int delay); 70 static srpl_state_t srpl_connection_drop_state(srpl_instance_t *instance, srpl_connection_t *srpl_connection); 71 static void srpl_disconnect(srpl_connection_t *srpl_connection); 72 static void srpl_connection_discontinue(srpl_connection_t *srpl_connection); 73 static void srpl_event_initialize(srpl_event_t *event, srpl_event_type_t event_type); 74 static void srpl_event_deliver(srpl_connection_t *srpl_connection, srpl_event_t *event); 75 static void srpl_domain_advertise(srpl_domain_t *domain); 76 static void srpl_connection_finalize(srpl_connection_t *srpl_connection); 77 static void srpl_instance_address_query_reset(srpl_instance_t *instance); 78 static void srpl_instance_reconnect(srpl_instance_t *instance); 79 static void srpl_instance_reconnect_callback(void *context); 80 static bool srpl_domain_browse_start(srpl_domain_t *domain); 81 static const char *srpl_state_name(srpl_state_t state); 82 static bool srpl_can_transition_to_routine_state(srpl_domain_t *domain); 83 static void srpl_transition_to_routine_state(srpl_domain_t *domain); 84 static void srpl_message_sent(srpl_connection_t *srpl_connection); 85 static srpl_state_t srpl_connection_schedule_reconnect_event(srpl_connection_t *srpl_connection, uint32_t when); 86 static void srpl_partner_discovery_timeout(void *context); 87 static void srpl_instance_services_discontinue(srpl_instance_t *instance); 88 static void srpl_instance_service_discontinue_timeout(void *context); 89 static void srpl_maybe_sync_or_transition(srpl_domain_t *domain); 90 static int srpl_dataset_id_compare(uint64_t id1, uint64_t id2); 91 static void srpl_state_transition_by_dataset_id(srpl_domain_t *domain, srpl_instance_t *instance); 92 93 #define EQUI_DISTANCE64 (int64_t)0x8000000000000000 94 #define MAX_ADDITIONAL_HOST_MESSAGES 32 95 #define SRPL_STATE_TIMEOUT (30 * 1000) // state timeout in milliseconds 96 #define SECONDS_IN_MINUTE 60 97 #define SECONDS_IN_HOUR (60 * 60) 98 #define SECONDS_IN_DAY (24 * SECONDS_IN_HOUR) 99 100 #ifdef DEBUG 101 #define STATE_DEBUGGING_ABORT() abort(); 102 #else 103 #define STATE_DEBUGGING_ABORT() 104 #endif 105 106 // Send reconfirm records for all queries relating to this connection. 107 static void 108 srpl_reconfirm(srpl_connection_t *connection) 109 { 110 // If there's no instance, that's why we got here, so no need for reconfirms. 111 if (connection->instance == NULL) { 112 INFO("no instance"); 113 return; 114 } 115 srpl_instance_t *instance = connection->instance; 116 117 for (srpl_instance_service_t *service = instance->services; service != NULL; service = service->next) { 118 if (!service->got_new_info) { 119 INFO("we haven't had any new information since the last time we did a reconfirm, so no point doing it again."); 120 continue; 121 } 122 service->got_new_info = false; 123 124 if (service->full_service_name != NULL && service->ptr_rdata != NULL) { 125 // The service name is the service instance name minus the first label. 126 char *service_type = strchr(service->full_service_name, '.'); 127 if (service_type != NULL) { 128 service_type++; // Skip the '.' 129 // Send a reconfirm for the PTR record 130 DNSServiceReconfirmRecord(0, service->ifindex, service_type, dns_rrtype_ptr, dns_qclass_in, 131 service->ptr_length, service->ptr_rdata); 132 } 133 if (service->srv_rdata != NULL) { 134 DNSServiceReconfirmRecord(0, service->ifindex, service->full_service_name, dns_rrtype_srv, dns_qclass_in, 135 service->srv_length, service->srv_rdata); 136 } 137 if (service->txt_rdata != NULL) { 138 DNSServiceReconfirmRecord(0, service->ifindex, service->full_service_name, dns_rrtype_txt, dns_qclass_in, 139 service->txt_length, service->txt_rdata); 140 } 141 if (service->address_query != NULL) { 142 address_query_t *query = service->address_query; 143 for (int i = 0; i > query->num_addresses; i++) { 144 if (query->addresses[i].sa.sa_family == AF_INET) { 145 DNSServiceReconfirmRecord(0, query->address_interface[i], query->hostname, dns_rrtype_a, 146 dns_qclass_in, 4, &query->addresses[i].sin.sin_addr); 147 } else if (query->addresses[i].sa.sa_family == AF_INET6) { 148 DNSServiceReconfirmRecord(0, query->address_interface[i], query->hostname, dns_rrtype_aaaa, 149 dns_qclass_in, 16, &query->addresses[i].sin6.sin6_addr); 150 } 151 } 152 } 153 } 154 } 155 } 156 157 // 158 // 1. Enumerate all SRP servers that are participating in synchronization on infrastructure: This is done by looking up 159 // NS records for <thread-network-name>.thread.home.arpa with the ForceMulticast flag set so that we use mDNS to 160 // discover them. 161 162 // 2. For each identified server that is not this server, look up A and AAAA records for the server's hostname. 163 164 // 3. Maintain a state object with the list of IP addresses and an index to the current server being tried. 165 166 // 4. Try to connect to the first address on the list -> connection management state machine: 167 168 // * When we have established an outgoing connection to a server, generate a random 64-bit unsigned number and send a 169 // SRPLSession DSO message using that number as the server ID. 170 171 // * When we receive an SRPLSession DSO message, see if we have an outgoing connection from the same server 172 // for which we've sent a server ID. If so, and if the server id we received is less than the one we sent, 173 // terminate the outgoing connection. If the server ids are equal, generate a new server id for the outgoing 174 // connection and send another session establishment message. 175 // 176 // * When we receive an acknowledgement to our SRPLSession DSO message, see if we have an incoming 177 // connection from the same server from which we've received a server id. If the server id we received is less 178 // than the one we sent, terminate the outgoing connection. If the server ids are equal, generate a new random 179 // number for the outgoing connection and send another session establishment message. 180 // 181 // * When a connection from a server is terminated, see if we have an established outgoing connection with that 182 // server. If not, attempt to connect to the next address we have for that server. 183 // 184 // * When our connection to a server is terminated or fails, and there is no established incoming connection from that 185 // server, attempt to connect to the next address we have for that server. 186 // 187 // * When the NS record for a server goes away, drop any outgoing connection to that server and discontinue trying to 188 // connect to it. 189 // 190 // 5. When we have established a session, meaning that we either got an acknowledgment to a SRPLSession DSO 191 // message that we sent and _didn't_ drop the connection, or we got an SRPLSession DSO message on an 192 // incoming connection with a lower server id than the outgoing connection, we begin the synchronization process. 193 // 194 // * Immediately following session establishment, we generate a list of candidate hosts to send to the other server 195 // from our internal list of SRP hosts (clients). Every non-expired host entry goes into the candidate list. 196 // 197 // * Then, if we are the originator, we sent an SRPLSendCandidates message. 198 // 199 // * If we are the recipient, we wait for an SRPLSendCandidates message. 200 // 201 // * When we receive an SRPLSendCandidates message, we iterate across the candidates list, for each 202 // candidate sending an SRPLCandidate message containing the host key, current time, and last message 203 // received times, in seconds since the epoch. When we come to the end of the candidates list, we send an 204 // acknowledgement to the SRPLSendCandidates message and discard the candidates list. 205 // 206 // * When we receive an SRPLCandidate message, we look in our own candidate list, if there is one, to see 207 // if the host key is present in the candidates list. If it is not present, or if it is present and the received 208 // time from the SRPLCandidate message is later than the time we have recorded in our own candidate 209 // list, we send an SRPLCandidateRequest message with the host key from the SRPLCandidate 210 // message. 211 // 212 // * When we receive an SRPLCandidateRequest message, we send an SRPLHost message which 213 // encapsulates the SRP update for the host and includes the timestamp when we received the SRP update from that 214 // host, which may have changed since we sent the SRPLCandidate message. 215 // 216 // * When we receive an SRPLHost message, we look in our list of hosts (SRP clients) for a matching 217 // host. If no such host exists, or if it exists and the timestamp is less than the timestamp in the 218 // SRPLHost message, we process the SRP update from the SRPLHost message and mark the host as 219 // "not received locally." In other words, this message was not received directly from an SRP client, but rather 220 // indirectly through our SRP replication partner. Note that this message cannot be assumed to be syntactically 221 // correct and must be treated like any other data received from the network. If we are sent an invalid message, 222 // this is an indication that our partner is broken in some way, since it should have validated the message before 223 // accepting it. 224 // 225 // * Whenever the SRP engine applies an SRP update from a host, it also delivers that update to each replication 226 // server state engine. 227 // 228 // * That replication server state engine first checks to see if it is connected to its partner; if not, no action 229 // is taken. 230 // 231 // * It then checks to see if there is a candidate list. 232 // * If so, it checks to see if the host implicated in the update is already on the candidate list. 233 // * If so, it updates that candidate's update time. 234 // * If not, it adds the host to the end of the candidate list. 235 // * If not, it sends an SRPLCandidate message to the other replication server, with the host 236 // key and new timestamp. 237 // 238 // 6. When there is more than one SRP server participating in replication, only one server should advertise using 239 // mDNS. All other servers should only advertise using DNS and DNS Push (SRP scalability feature). The SRP server 240 // with the lowest numbered server ID is the one that acts as an advertising proxy for SRP. In practice this means 241 // that if we have the lowest server ID of all the SRP servers we are connected to, we advertise mDNS. If two servers 242 // on the same link can't connect to each other, they probably can't see each others' multicasts, so this is the 243 // right outcome. 244 245 static bool 246 ip_addresses_equal(const addr_t *a, const addr_t *b) 247 { 248 return (a->sa.sa_family == b->sa.sa_family && 249 ((a->sa.sa_family == AF_INET && !memcmp(&a->sin.sin_addr, &b->sin.sin_addr, 4)) || 250 (a->sa.sa_family == AF_INET6 && !memcmp(&a->sin6.sin6_addr, &b->sin6.sin6_addr, 16)))); 251 } 252 253 #define ADDR_NAME_LOGGER(log_type, address, preamble, conjunction, number, fullname, interfaceIndex) \ 254 if ((address)->sa.sa_family == AF_INET6) { \ 255 SEGMENTED_IPv6_ADDR_GEN_SRP(&(address)->sin6.sin6_addr, rdata_buf); \ 256 log_type(PUB_S_SRP PRI_SEGMENTED_IPv6_ADDR_SRP PUB_S_SRP PRI_S_SRP PUB_S_SRP "%d", preamble, \ 257 SEGMENTED_IPv6_ADDR_PARAM_SRP(&(address)->sin6.sin6_addr, rdata_buf), \ 258 conjunction, fullname, number, interfaceIndex); \ 259 } else { \ 260 IPv4_ADDR_GEN_SRP(&(address)->sin.sin_addr, rdata_buf); \ 261 log_type(PUB_S_SRP PRI_IPv4_ADDR_SRP PUB_S_SRP PRI_S_SRP PUB_S_SRP "%d", preamble, \ 262 IPv4_ADDR_PARAM_SRP(&(address)->sin.sin_addr, rdata_buf), \ 263 conjunction, fullname, number, interfaceIndex); \ 264 } 265 266 static void 267 address_query_callback(DNSServiceRef UNUSED sdRef, DNSServiceFlags flags, uint32_t interfaceIndex, 268 DNSServiceErrorType errorCode, const char *fullname, uint16_t rrtype, uint16_t rrclass, 269 uint16_t rdlen, const void *rdata, uint32_t UNUSED ttl, void *context) 270 { 271 address_query_t *address = context; 272 addr_t addr; 273 int i, j; 274 275 if (errorCode != kDNSServiceErr_NoError) { 276 ERROR("address resolution for " PRI_S_SRP " failed with %d", fullname, errorCode); 277 address->change_callback(address->context, NULL, false, flags & kDNSServiceFlagsMoreComing, errorCode); 278 return; 279 } 280 if (rrclass != dns_qclass_in || ((rrtype != dns_rrtype_a || rdlen != 4) && 281 (rrtype != dns_rrtype_aaaa || rdlen != 16))) { 282 ERROR("Invalid response record type (%d) or class (%d) provided for " PRI_S_SRP, rrtype, rrclass, fullname); 283 return; 284 } 285 286 memset(&addr, 0, sizeof(addr)); 287 if (rrtype == dns_rrtype_a) { 288 #ifndef NOT_HAVE_SA_LEN 289 addr.sa.sa_len = sizeof(struct sockaddr_in); 290 #endif 291 addr.sa.sa_family = AF_INET; 292 memcpy(&addr.sin.sin_addr, rdata, rdlen); 293 if (IN_LINKLOCAL(addr.sin.sin_addr.s_addr)) { 294 ADDR_NAME_LOGGER(INFO, &addr, "Skipping link-local address ", " received for instance ", " index ", 295 fullname, interfaceIndex); 296 return; 297 } 298 } else { 299 #ifndef NOT_HAVE_SA_LEN 300 addr.sa.sa_len = sizeof(struct sockaddr_in6); 301 #endif 302 addr.sa.sa_family = AF_INET6; 303 memcpy(&addr.sin6.sin6_addr, rdata, rdlen); 304 if (IN6_IS_ADDR_LINKLOCAL(&addr.sin6.sin6_addr)) { 305 ADDR_NAME_LOGGER(INFO, &addr, "Skipping link-local address ", " received for instance ", " index ", 306 fullname, interfaceIndex); 307 return; 308 } 309 } 310 311 for (i = 0, j = 0; i < address->num_addresses; i++) { 312 // Already in the list? 313 if (address->address_interface[i] == interfaceIndex && !memcmp(&address->addresses[i], &addr, sizeof(addr))) { 314 if (flags & kDNSServiceFlagsAdd) { 315 ADDR_NAME_LOGGER(INFO, &addr, "Duplicate address ", " received for instance ", " index ", 316 fullname, interfaceIndex); 317 return; 318 } else { 319 ADDR_NAME_LOGGER(INFO, &addr, "Removing address ", " from instance ", " index ", 320 fullname, interfaceIndex); 321 322 // If we're removing an address, we keep going through the array copying down. 323 if (address->cur_address >= i) { 324 address->cur_address--; 325 } 326 } 327 } else { 328 // Copy down. 329 if (i != j) { 330 address->addresses[j] = address->addresses[i]; 331 address->address_interface[j] = address->address_interface[i]; 332 } 333 j++; 334 } 335 } 336 if (flags & kDNSServiceFlagsAdd) { 337 if (i == ADDRESS_QUERY_MAX_ADDRESSES) { 338 ADDR_NAME_LOGGER(ERROR, &addr, "No room for address ", " received for ", " index ", 339 fullname, interfaceIndex); 340 return; 341 } 342 343 ADDR_NAME_LOGGER(INFO, &addr, "Adding address ", " to ", " index ", fullname, interfaceIndex); 344 345 address->addresses[i] = addr; 346 address->address_interface[i] = interfaceIndex; 347 address->num_addresses++; 348 address->change_callback(address->context, &address->addresses[i], true, flags & kDNSServiceFlagsMoreComing, kDNSServiceErr_NoError); 349 } else { 350 if (i == j) { 351 ADDR_NAME_LOGGER(ERROR, &addr, "Remove for unknown address ", " received for ", " index ", 352 fullname, interfaceIndex); 353 return; 354 } else { 355 address->num_addresses--; 356 address->change_callback(address->context, &addr, false, flags & kDNSServiceFlagsMoreComing, kDNSServiceErr_NoError); 357 } 358 } 359 } 360 361 static void 362 address_query_finalize(void *context) 363 { 364 address_query_t *address = context; 365 free(address->hostname); 366 free(address); 367 } 368 369 static void 370 address_query_cancel(address_query_t *address) 371 { 372 if (address->a_query != NULL) { 373 ioloop_dnssd_txn_cancel(address->a_query); 374 ioloop_dnssd_txn_release(address->a_query); 375 address->a_query = NULL; 376 } 377 if (address->aaaa_query != NULL) { 378 ioloop_dnssd_txn_cancel(address->aaaa_query); 379 ioloop_dnssd_txn_release(address->aaaa_query); 380 address->aaaa_query = NULL; 381 } 382 383 // Have whatever holds a reference to the address query let go of it. 384 if (address->cancel_callback != NULL && address->context != NULL) { 385 address->cancel_callback(address->context); 386 address->context = NULL; 387 address->cancel_callback = NULL; 388 } 389 } 390 391 static void 392 address_query_txn_fail(void *context, int err) 393 { 394 address_query_t *address = context; 395 ERROR("address query " PRI_S_SRP " i/o failure: %d", address->hostname, err); 396 address_query_cancel(address); 397 } 398 399 static void 400 address_query_context_release(void *context) 401 { 402 address_query_t *address = context; 403 RELEASE_HERE(address, address_query); 404 } 405 406 static address_query_t * 407 address_query_create(const char *hostname, void *context, address_change_callback_t change_callback, 408 address_query_cancel_callback_t cancel_callback) 409 { 410 address_query_t *address = calloc(1, sizeof(*address)); 411 DNSServiceRef sdref; 412 dnssd_txn_t **txn; 413 414 require_action_quiet(address != NULL, exit_no_free, ERROR("No memory for address query.")); 415 RETAIN_HERE(address, address_query); // We return a retained object, or free it. 416 address->hostname = strdup(hostname); 417 require_action_quiet(address->hostname != NULL, exit, ERROR("No memory for address query hostname.")); 418 419 for (int i = 0; i < 2; i++) { 420 int ret = DNSServiceQueryRecord(&sdref, kDNSServiceFlagsForceMulticast | kDNSServiceFlagsLongLivedQuery, 421 kDNSServiceInterfaceIndexAny, hostname, (i 422 ? kDNSServiceType_A 423 : kDNSServiceType_AAAA), 424 kDNSServiceClass_IN, address_query_callback, address); 425 require_action_quiet(ret == kDNSServiceErr_NoError, exit, 426 ERROR("Unable to resolve instance hostname " PRI_S_SRP " addresses: %d", 427 hostname, ret)); 428 429 txn = i ? &address->a_query : &address->aaaa_query; 430 *txn = ioloop_dnssd_txn_add(sdref, address, address_query_context_release, address_query_txn_fail); 431 require_action_quiet(*txn != NULL, exit, 432 ERROR("Unable to set up ioloop transaction for " PRI_S_SRP " query on " THREAD_BROWSING_DOMAIN, 433 hostname); 434 DNSServiceRefDeallocate(sdref)); 435 RETAIN_HERE(address, address_query); // For the QueryRecord context 436 } 437 address->change_callback = change_callback; 438 address->cancel_callback = cancel_callback; 439 address->context = context; 440 address->cur_address = -1; 441 return address; 442 443 exit: 444 if (address->a_query != NULL) { 445 ioloop_dnssd_txn_cancel(address->a_query); 446 ioloop_dnssd_txn_release(address->a_query); 447 address->a_query = NULL; 448 } 449 if (address->aaaa_query != NULL) { // Un-possible right now, but better safe than sorry in case of future change 450 ioloop_dnssd_txn_cancel(address->aaaa_query); 451 ioloop_dnssd_txn_release(address->aaaa_query); 452 address->aaaa_query = NULL; 453 } 454 RELEASE_HERE(address, address_query); 455 address = NULL; 456 457 exit_no_free: 458 return address; 459 } 460 461 static void 462 srpl_domain_finalize(srpl_domain_t *domain) 463 { 464 srpl_instance_t *instance, *next; 465 srpl_instance_service_t *service, *next_service; 466 467 free(domain->name); 468 if (domain->query != NULL) { 469 ioloop_dnssd_txn_cancel(domain->query); 470 ioloop_dnssd_txn_release(domain->query); 471 } 472 473 for (instance = domain->instances; instance != NULL; instance = next) { 474 next = instance->next; 475 srpl_instance_services_discontinue(instance); 476 } 477 for (service = domain->unresolved_services; service != NULL; service = next_service) { 478 next_service = service->next; 479 srpl_instance_service_discontinue_timeout(service); 480 } 481 if (domain->query != NULL) { 482 ioloop_dnssd_txn_cancel(domain->query); 483 ioloop_dnssd_txn_release(domain->query); 484 domain->query = NULL; 485 } 486 if (domain->srpl_advertise_txn != NULL) { 487 ioloop_dnssd_txn_cancel(domain->srpl_advertise_txn); 488 ioloop_dnssd_txn_release(domain->srpl_advertise_txn); 489 domain->srpl_advertise_txn = NULL; 490 } 491 if (domain->srpl_register_wakeup != NULL) { 492 ioloop_cancel_wake_event(domain->srpl_register_wakeup); 493 ioloop_wakeup_release(domain->srpl_register_wakeup); 494 domain->srpl_register_wakeup = NULL; 495 } 496 if (domain->partner_discovery_timeout != NULL) { 497 ioloop_cancel_wake_event(domain->partner_discovery_timeout); 498 ioloop_wakeup_release(domain->partner_discovery_timeout); 499 domain->partner_discovery_timeout = NULL; 500 } 501 502 free(domain); 503 } 504 505 static void srpl_instance_finalize(srpl_instance_t *instance); 506 507 static void 508 srpl_instance_service_finalize(srpl_instance_service_t *service) 509 { 510 if (service->domain != NULL) { 511 RELEASE_HERE(service->domain, srpl_domain); 512 } 513 if (service->txt_txn != NULL) { 514 ioloop_dnssd_txn_cancel(service->txt_txn); 515 ioloop_dnssd_txn_release(service->txt_txn); 516 service->txt_txn = NULL; 517 } 518 if (service->srv_txn != NULL) { 519 ioloop_dnssd_txn_cancel(service->srv_txn); 520 ioloop_dnssd_txn_release(service->srv_txn); 521 service->srv_txn = NULL; 522 } 523 free(service->full_service_name); 524 free(service->host_name); 525 free(service->txt_rdata); 526 free(service->srv_rdata); 527 free(service->ptr_rdata); 528 if (service->address_query != NULL) { 529 address_query_cancel(service->address_query); 530 RELEASE_HERE(service->address_query, address_query); 531 service->address_query = NULL; 532 } 533 if (service->discontinue_timeout != NULL) { 534 ioloop_cancel_wake_event(service->discontinue_timeout); 535 ioloop_wakeup_release(service->discontinue_timeout); 536 service->discontinue_timeout = NULL; 537 } 538 if (service->resolve_wakeup != NULL) { 539 ioloop_cancel_wake_event(service->resolve_wakeup); 540 ioloop_wakeup_release(service->resolve_wakeup); 541 service->resolve_wakeup = NULL; 542 } 543 if (service->instance != NULL) { 544 RELEASE_HERE(service->instance, srpl_instance); 545 service->instance = NULL; 546 } 547 free(service); 548 } 549 550 static void 551 srpl_instance_finalize(srpl_instance_t *instance) 552 { 553 if (instance->domain != NULL) { 554 RELEASE_HERE(instance->domain, srpl_domain); 555 instance->domain = NULL; 556 } 557 free(instance->instance_name); 558 if (instance->connection != NULL) { 559 srpl_connection_discontinue(instance->connection); 560 RELEASE_HERE(instance->connection, srpl_connection); 561 instance->connection = NULL; 562 } 563 if (instance->reconnect_timeout != NULL) { 564 ioloop_cancel_wake_event(instance->reconnect_timeout); 565 ioloop_wakeup_release(instance->reconnect_timeout); 566 instance->reconnect_timeout = NULL; 567 } 568 569 srpl_instance_service_t *service = instance->services, *next; 570 while (service != NULL) { 571 next = service->next; 572 RELEASE_HERE(service, srpl_instance_service); 573 service = next; 574 } 575 instance->services = NULL; 576 free(instance); 577 } 578 579 #define srpl_connection_message_set(srpl_connection, message) \ 580 srpl_connection_message_set_(srpl_connection, message, __FILE__, __LINE__) 581 static void 582 srpl_connection_message_set_(srpl_connection_t *srpl_connection, message_t *message, const char *file, int line) 583 { 584 if (srpl_connection->message != NULL) { 585 ioloop_message_release_(srpl_connection->message, file, line); 586 srpl_connection->message = NULL; 587 } 588 if (message != NULL) { 589 srpl_connection->message = message; 590 ioloop_message_retain_(srpl_connection->message, file, line); 591 } 592 } 593 594 static message_t * 595 srpl_connection_message_get(srpl_connection_t *srpl_connection) 596 { 597 return srpl_connection->message; 598 } 599 600 #define srpl_candidate_free(candidate) srpl_candidate_free_(candidate, __FILE__, __LINE__) 601 static void 602 srpl_candidate_free_(srpl_candidate_t *candidate, const char *file, int line) 603 { 604 if (candidate != NULL) { 605 if (candidate->name != NULL) { 606 dns_name_free(candidate->name); 607 candidate->name = NULL; 608 } 609 if (candidate->message != NULL) { 610 ioloop_message_release_(candidate->message, file, line); 611 candidate->message = NULL; 612 } 613 if (candidate->host != NULL) { 614 srp_adv_host_release_(candidate->host, file, line); 615 candidate->host = NULL; 616 } 617 free(candidate); 618 } 619 } 620 621 static void 622 srpl_connection_candidates_free(srpl_connection_t *srpl_connection) 623 { 624 if (srpl_connection->candidates == NULL) { 625 goto out; 626 } 627 for (int i = 0; i < srpl_connection->num_candidates; i++) { 628 if (srpl_connection->candidates[i] != NULL) { 629 srp_adv_host_release(srpl_connection->candidates[i]); 630 } 631 } 632 free(srpl_connection->candidates); 633 srpl_connection->candidates = NULL; 634 out: 635 srpl_connection->num_candidates = srpl_connection->current_candidate = 0; 636 return; 637 } 638 639 static void 640 srpl_srp_client_update_queue_free(srpl_connection_t *srpl_connection) 641 { 642 srpl_srp_client_queue_entry_t **cp = &srpl_connection->client_update_queue; 643 while (*cp) { 644 srpl_srp_client_queue_entry_t *entry = *cp; 645 srp_adv_host_release(entry->host); 646 *cp = entry->next; 647 free(entry); 648 } 649 } 650 651 static void 652 srpl_connection_candidate_set(srpl_connection_t *srpl_connection, srpl_candidate_t *candidate) 653 { 654 if (srpl_connection->candidate != NULL) { 655 srpl_candidate_free(srpl_connection->candidate); 656 } 657 srpl_connection->candidate = candidate; 658 } 659 660 static void 661 srpl_host_update_parts_free(srpl_host_update_t *update) 662 { 663 if (update->messages != NULL) { 664 for (int i = 0; i < update->num_messages; i++) { 665 ioloop_message_release(update->messages[i]); 666 } 667 free(update->messages); 668 update->messages = NULL; 669 update->num_messages = update->max_messages = update->messages_processed = 0; 670 } 671 if (update->hostname != NULL) { 672 dns_name_free(update->hostname); 673 update->hostname = NULL; 674 } 675 } 676 677 // Free up any temporarily retained or allocated objects on the connection (i.e., not the name). 678 static void 679 srpl_connection_reset(srpl_connection_t *srpl_connection) 680 { 681 srpl_connection->candidates_not_generated = true; 682 srpl_connection->database_synchronized = false; 683 srpl_host_update_parts_free(&srpl_connection->stashed_host); 684 srpl_connection_message_set(srpl_connection, NULL); 685 if (srpl_connection->candidate != NULL) { 686 srpl_candidate_free(srpl_connection->candidate); 687 srpl_connection->candidate = NULL; 688 } 689 690 // Cancel keepalive timers 691 if (srpl_connection->keepalive_send_wakeup) { 692 ioloop_cancel_wake_event(srpl_connection->keepalive_send_wakeup); 693 } 694 if (srpl_connection->keepalive_receive_wakeup) { 695 ioloop_cancel_wake_event(srpl_connection->keepalive_receive_wakeup); 696 } 697 698 srpl_connection_candidates_free(srpl_connection); 699 srpl_srp_client_update_queue_free(srpl_connection); 700 } 701 702 static void 703 srpl_connection_finalize(srpl_connection_t *srpl_connection) 704 { 705 if (srpl_connection->instance) { 706 RELEASE_HERE(srpl_connection->instance, srpl_instance); 707 srpl_connection->instance = NULL; 708 } 709 if (srpl_connection->connection != NULL) { 710 ioloop_comm_release(srpl_connection->connection); 711 srpl_connection->connection = NULL; 712 gettimeofday(&srpl_connection->connection_null_time, NULL); 713 srpl_connection->connection_null_reason = "finalize"; // obvsly should never see this! 714 } 715 if (srpl_connection->reconnect_wakeup != NULL) { 716 ioloop_cancel_wake_event(srpl_connection->reconnect_wakeup); 717 ioloop_wakeup_release(srpl_connection->reconnect_wakeup); 718 srpl_connection->reconnect_wakeup = NULL; 719 } 720 if (srpl_connection->state_timeout != NULL) { 721 ioloop_cancel_wake_event(srpl_connection->state_timeout); 722 ioloop_wakeup_release(srpl_connection->state_timeout); 723 srpl_connection->state_timeout = NULL; 724 } 725 if (srpl_connection->keepalive_send_wakeup != NULL) { 726 ioloop_cancel_wake_event(srpl_connection->keepalive_send_wakeup); 727 ioloop_wakeup_release(srpl_connection->keepalive_send_wakeup); 728 srpl_connection->keepalive_send_wakeup = NULL; 729 } 730 if (srpl_connection->keepalive_receive_wakeup != NULL) { 731 ioloop_cancel_wake_event(srpl_connection->keepalive_receive_wakeup); 732 ioloop_wakeup_release(srpl_connection->keepalive_receive_wakeup); 733 srpl_connection->keepalive_receive_wakeup = NULL; 734 } 735 srpl_host_update_parts_free(&srpl_connection->stashed_host); 736 free(srpl_connection->name); 737 srpl_connection_reset(srpl_connection); 738 free(srpl_connection); 739 } 740 741 void 742 srpl_connection_release_(srpl_connection_t *srpl_connection, const char *file, int line) 743 { 744 RELEASE(srpl_connection, srpl_connection); 745 } 746 747 void 748 srpl_connection_retain_(srpl_connection_t *srpl_connection, const char *file, int line) 749 { 750 RETAIN(srpl_connection, srpl_connection); 751 } 752 753 srpl_connection_t * 754 srpl_connection_create(srpl_instance_t *instance, bool outgoing) 755 { 756 srpl_connection_t *srpl_connection = calloc(1, sizeof (*srpl_connection)), *ret = NULL; 757 if (srpl_connection == NULL) { 758 goto out; 759 } 760 RETAIN_HERE(srpl_connection, srpl_connection); 761 #define POINTER_TO_HEX_MAX_STRLEN 19 // 0x<...> 762 size_t srpl_connection_name_length = strlen(instance->instance_name) + 2 + POINTER_TO_HEX_MAX_STRLEN + 3; 763 srpl_connection->name = malloc(srpl_connection_name_length); 764 if (srpl_connection->name == NULL) { 765 goto out; 766 } 767 srpl_connection->keepalive_send_wakeup = ioloop_wakeup_create(); 768 if (srpl_connection->keepalive_send_wakeup == NULL) { 769 goto out; 770 } 771 srpl_connection->keepalive_receive_wakeup = ioloop_wakeup_create(); 772 if (srpl_connection->keepalive_receive_wakeup == NULL) { 773 goto out; 774 } 775 srpl_connection->keepalive_interval = DEFAULT_KEEPALIVE_WAKEUP_EXPIRY / 2; 776 snprintf(srpl_connection->name, srpl_connection_name_length, "%s%s (%p)", outgoing ? ">" : "<", instance->instance_name, srpl_connection); 777 srpl_connection->is_server = !outgoing; 778 srpl_connection->instance = instance; 779 RETAIN_HERE(instance, srpl_instance); 780 #ifdef SRP_TEST_SERVER 781 srpl_connection->state = srpl_state_test_event_intercept; 782 #endif 783 ret = srpl_connection; 784 srpl_connection = NULL; 785 out: 786 if (srpl_connection != NULL) { 787 RELEASE_HERE(srpl_connection, srpl_connection); 788 } 789 return ret; 790 } 791 792 static void 793 srpl_connection_context_release(void *context) 794 { 795 srpl_connection_t *srpl_connection = context; 796 797 RELEASE_HERE(srpl_connection, srpl_connection); 798 } 799 800 static void 801 srpl_instance_service_context_release(void *context) 802 { 803 srpl_instance_service_t *service = context; 804 805 RELEASE_HERE(service, srpl_instance_service); 806 } 807 808 static void 809 srpl_instance_context_release(void *context) 810 { 811 srpl_instance_t *instance = context; 812 813 RELEASE_HERE(instance, srpl_instance); 814 } 815 816 static void 817 srpl_instance_discontinue_timeout(void *context) 818 { 819 srpl_instance_t **sp = NULL, *instance = context; 820 srpl_domain_t *domain = instance->domain; 821 822 INFO("discontinuing instance " PRI_S_SRP " with partner id %" PRIx64, instance->instance_name, instance->partner_id); 823 for (sp = &domain->instances; *sp; sp = &(*sp)->next) { 824 if (*sp == instance) { 825 *sp = instance->next; 826 break; 827 } 828 } 829 830 srpl_connection_t *srpl_connection = instance->connection; 831 if (srpl_connection != NULL) { 832 RELEASE_HERE(srpl_connection->instance, srpl_instance); 833 srpl_connection->instance = NULL; 834 srpl_connection_discontinue(srpl_connection); 835 // The instance no longer has a reference to the srpl_connection object. 836 RELEASE_HERE(srpl_connection, srpl_connection); 837 instance->connection = NULL; 838 } 839 RELEASE_HERE(instance, srpl_instance); 840 841 // Check to see if we are eligible to move into the routine state if we haven't done so. 842 // If the partner we failed to sync with goes away, we could enter the routine state if 843 // we have succcessfully sync-ed with all other partners discovered in startup. 844 if (domain->srpl_opstate != SRPL_OPSTATE_ROUTINE) { 845 srpl_maybe_sync_or_transition(domain); 846 } 847 } 848 849 static void 850 srpl_instance_service_discontinue_timeout(void *context) 851 { 852 srpl_instance_service_t **hp = NULL, *service = context; 853 srpl_domain_t *domain = service->domain; 854 srpl_instance_t *instance = service->instance; 855 856 // Retain for duration of function, since otherwise we might finalize it below. 857 858 // This retain shouldn't be necessary if we are actually being called by the timeout, because the timeout holds a 859 // reference to service that can't be released below. However, this function can be called directly, outside of a 860 // timeout, and in that case we do need to retain service for the function. 861 862 RETAIN_HERE(service, srpl_instance_service); 863 864 // Remove the service from either the unresolved_services list or resolved instance list 865 if (instance == NULL) { 866 hp = &domain->unresolved_services; 867 } else { 868 RETAIN_HERE(instance, srpl_instance); // Retain instance for life of function in case we decrement its refcnt below. 869 hp = &instance->services; 870 } 871 for (; *hp; hp = &(*hp)->next) { 872 if (*hp == service) { 873 *hp = service->next; 874 RELEASE_HERE(service, srpl_instance_service); // Release service list's reference to instance_service. 875 break; 876 } 877 } 878 879 if (service->discontinue_timeout != NULL) { 880 ioloop_cancel_wake_event(service->discontinue_timeout); 881 ioloop_wakeup_release(service->discontinue_timeout); 882 service->discontinue_timeout = NULL; 883 } 884 if (service->resolve_wakeup != NULL) { 885 ioloop_cancel_wake_event(service->resolve_wakeup); 886 ioloop_wakeup_release(service->resolve_wakeup); 887 service->resolve_wakeup = NULL; 888 } 889 if (service->address_query != NULL) { 890 address_query_cancel(service->address_query); 891 RELEASE_HERE(service->address_query, address_query); 892 service->address_query = NULL; 893 } 894 if (service->txt_txn != NULL) { 895 ioloop_dnssd_txn_cancel(service->txt_txn); 896 ioloop_dnssd_txn_release(service->txt_txn); 897 service->txt_txn = NULL; 898 service->resolve_started = false; 899 } 900 if (service->srv_txn != NULL) { 901 ioloop_dnssd_txn_cancel(service->srv_txn); 902 ioloop_dnssd_txn_release(service->srv_txn); 903 service->srv_txn = NULL; 904 service->resolve_started = false; 905 } 906 if (service->instance != NULL) { 907 RELEASE_HERE(service->instance, srpl_instance); 908 service->instance = NULL; 909 } 910 if (instance != NULL) { 911 if (instance->services == NULL) { 912 srpl_instance_discontinue_timeout(instance); 913 } 914 RELEASE_HERE(instance, srpl_instance); // Release this function's reference to instance 915 } 916 RELEASE_HERE(service, srpl_instance_service); // Release this functions reference to instance_service. 917 } 918 919 static void 920 srpl_instance_services_discontinue(srpl_instance_t *instance) 921 { 922 srpl_instance_service_t *service; 923 for (service = instance->services; service != NULL; ) { 924 // The service is retained on the list, but... 925 srpl_instance_service_t *next = service->next; 926 // This is going to release it... 927 srpl_instance_service_discontinue_timeout(service); 928 // So next is still valid here, but service isn't. 929 service = next; 930 } 931 } 932 933 static void 934 srpl_instance_service_discontinue(srpl_instance_service_t *service) 935 { 936 // Already discontinuing. 937 if (service->discontinuing) { 938 INFO("Replication service " PRI_S_SRP " went away, already discontinuing", service->full_service_name); 939 return; 940 } 941 if (service->num_copies > 0) { 942 INFO("Replication service " PRI_S_SRP " went away, %d still left", service->host_name, service->num_copies); 943 return; 944 } 945 INFO("Replication service " PRI_S_SRP " went away, none left, discontinuing", service->full_service_name); 946 service->discontinuing = true; 947 948 // DNSServiceResolve doesn't give us the kDNSServiceFlagAdd flag--apparently it's assumed that we know the 949 // service was removed because we get a remove on the browse. So we need to restart the resolve if the 950 // instance comes back, rather than continuing to use the old resolve transaction. 951 if (service->txt_txn != NULL) { 952 ioloop_dnssd_txn_cancel(service->txt_txn); 953 ioloop_dnssd_txn_release(service->txt_txn); 954 service->txt_txn = NULL; 955 } 956 if (service->srv_txn != NULL) { 957 ioloop_dnssd_txn_cancel(service->srv_txn); 958 ioloop_dnssd_txn_release(service->srv_txn); 959 service->srv_txn = NULL; 960 } 961 service->resolve_started = false; 962 963 // if all the services are discontinuing, we mark the instance to be discontinuing as well. 964 // discontinuing instance will be exluded when we check if the server has sync-ed on all the 965 // instances in order to move to the routine state and when we pick the winning dataset id 966 // from the discovered instances (i.e., discontinuing instance no longer qualifies for 967 // dataset_id election). 968 srpl_instance_t *instance = service->instance; 969 if (instance != NULL) { 970 srpl_instance_service_t *sp; 971 for(sp = instance->services; sp != NULL; sp = sp->next) { 972 if (!sp->discontinuing) { 973 break; 974 } 975 } 976 if (sp == NULL) { 977 instance->discontinuing = true; 978 } 979 } 980 // It's not uncommon for a name to drop and then come back immediately. Wait 30s before 981 // discontinuing the instance host. 982 if (service->discontinue_timeout == NULL) { 983 service->discontinue_timeout = ioloop_wakeup_create(); 984 // Oh well. 985 if (service->discontinue_timeout == NULL) { 986 srpl_instance_service_discontinue_timeout(service); 987 return; 988 } 989 } 990 991 RETAIN_HERE(service, srpl_instance_service); 992 ioloop_add_wake_event(service->discontinue_timeout, service, srpl_instance_service_discontinue_timeout, 993 srpl_instance_service_context_release, 30 * 1000); 994 } 995 996 997 static void 998 srpl_instance_discontinue(srpl_instance_t *instance) 999 { 1000 srpl_instance_service_t *service, *next; 1001 instance->discontinuing = true; 1002 for (service = instance->services; service != NULL; service = next) { 1003 next = service->next; 1004 service->num_copies = 0; 1005 srpl_instance_service_discontinue(service); 1006 } 1007 } 1008 1009 void 1010 srpl_shutdown(srp_server_t *server_state) 1011 { 1012 srpl_instance_t *instance, *next; 1013 srpl_instance_service_t *service, *next_service; 1014 1015 if (server_state->current_thread_domain_name == NULL) { 1016 INFO("no current domain"); 1017 return; 1018 } 1019 for (srpl_domain_t **dp = &server_state->srpl_domains; *dp != NULL; ) { 1020 srpl_domain_t *domain = *dp; 1021 if (!strcmp(domain->name, server_state->current_thread_domain_name)) { 1022 for (instance = domain->instances; instance != NULL; instance = next) { 1023 next = instance->next; 1024 srpl_instance_services_discontinue(instance); 1025 } 1026 for (service = domain->unresolved_services; service != NULL; service = next_service) { 1027 next_service = service->next; 1028 srpl_instance_service_discontinue_timeout(service); 1029 } 1030 if (domain->query != NULL) { 1031 ioloop_dnssd_txn_cancel(domain->query); 1032 ioloop_dnssd_txn_release(domain->query); 1033 domain->query = NULL; 1034 } 1035 if (domain->srpl_advertise_txn != NULL) { 1036 ioloop_dnssd_txn_cancel(domain->srpl_advertise_txn); 1037 ioloop_dnssd_txn_release(domain->srpl_advertise_txn); 1038 domain->srpl_advertise_txn = NULL; 1039 } 1040 if (domain->partner_discovery_timeout != NULL) { 1041 ioloop_cancel_wake_event(domain->partner_discovery_timeout); 1042 ioloop_wakeup_release(domain->partner_discovery_timeout); 1043 domain->partner_discovery_timeout = NULL; 1044 } 1045 *dp = domain->next; 1046 RELEASE_HERE(domain, srpl_domain); 1047 free(server_state->current_thread_domain_name); 1048 server_state->current_thread_domain_name = NULL; 1049 } else { 1050 dp = &(*dp)->next; 1051 } 1052 } 1053 } 1054 1055 void 1056 srpl_disable(srp_server_t *server_state) 1057 { 1058 srpl_shutdown(server_state); 1059 server_state->srp_replication_enabled = false; 1060 } 1061 1062 void 1063 srpl_drop_srpl_connection(srp_server_t *NONNULL server_state) 1064 { 1065 for (srpl_domain_t *domain = server_state->srpl_domains; domain != NULL; domain = domain->next) { 1066 for (srpl_instance_t *instance = domain->instances; instance != NULL; instance = instance->next) { 1067 if (instance->connection != NULL && instance->connection->state > srpl_state_disconnect_wait) { 1068 srpl_connection_discontinue(instance->connection); 1069 } 1070 } 1071 } 1072 } 1073 1074 void 1075 srpl_undrop_srpl_connection(srp_server_t *NONNULL server_state) 1076 { 1077 for (srpl_domain_t *domain = server_state->srpl_domains; domain != NULL; domain = domain->next) { 1078 for (srpl_instance_t *instance = domain->instances; instance != NULL; instance = instance->next) { 1079 srpl_instance_reconnect(instance); 1080 } 1081 } 1082 } 1083 1084 // Stop service advertisement in the given domain. 1085 static void 1086 srpl_stop_domain_advertisement(srpl_domain_t *NONNULL domain) 1087 { 1088 INFO("dropping advertisement for domain " PUB_S_SRP, domain->name); 1089 if (domain->srpl_advertise_txn != NULL) { 1090 ioloop_dnssd_txn_cancel(domain->srpl_advertise_txn); 1091 ioloop_dnssd_txn_release(domain->srpl_advertise_txn); 1092 domain->srpl_advertise_txn = NULL; 1093 } 1094 } 1095 1096 // Stop service advertisement in all the domains 1097 void 1098 srpl_drop_srpl_advertisement(srp_server_t *NONNULL server_state) 1099 { 1100 srpl_domain_t *domain; 1101 for (domain = server_state->srpl_domains; domain != NULL; domain = domain->next) { 1102 srpl_stop_domain_advertisement(domain); 1103 } 1104 } 1105 1106 void 1107 srpl_undrop_srpl_advertisement(srp_server_t *NONNULL server_state) 1108 { 1109 srpl_domain_t *domain; 1110 for (domain = server_state->srpl_domains; domain != NULL; domain = domain->next) { 1111 srpl_domain_advertise(domain); 1112 } 1113 } 1114 1115 1116 // Copy from into to, and then NULL out the host pointer in from, which is not refcounted, so that we don't get a 1117 // double free later. Add a reference to the message, since it is refcounted. 1118 static void 1119 srpl_host_update_steal_parts(srpl_host_update_t *to, srpl_host_update_t *from) 1120 { 1121 srpl_host_update_parts_free(to); 1122 *to = *from; 1123 from->hostname = NULL; 1124 from->messages = NULL; 1125 from->num_messages = from->max_messages = from->messages_processed = 0; 1126 } 1127 1128 static bool 1129 srpl_event_content_type_set_(srpl_event_t *event, srpl_event_content_type_t content_type, const char *file, int line) 1130 { 1131 switch(event->content_type) { 1132 case srpl_event_content_type_none: 1133 case srpl_event_content_type_address: 1134 case srpl_event_content_type_session: 1135 case srpl_event_content_type_candidate_disposition: 1136 case srpl_event_content_type_rcode: 1137 case srpl_event_content_type_client_result: // pointers owned by caller 1138 case srpl_event_content_type_advertise_finished_result: 1139 break; 1140 1141 case srpl_event_content_type_candidate: 1142 if (event->content.candidate != NULL) { 1143 srpl_candidate_free_(event->content.candidate, file, line); 1144 event->content.candidate = NULL; 1145 } 1146 break; 1147 case srpl_event_content_type_host_update: 1148 srpl_host_update_parts_free(&event->content.host_update); 1149 break; 1150 } 1151 memset(&event->content, 0, sizeof(event->content)); 1152 if (content_type == srpl_event_content_type_candidate) { 1153 event->content.candidate = calloc(1, sizeof(srpl_candidate_t)); 1154 if (event->content.candidate == NULL) { 1155 return false; 1156 } 1157 } 1158 event->content_type = content_type; 1159 return true; 1160 } 1161 1162 static void 1163 srpl_disconnected_callback(comm_t *comm, void *context, int UNUSED error) 1164 { 1165 srpl_connection_t *srpl_connection = context; 1166 srpl_domain_t *domain; 1167 1168 // No matter what state we are in, if we are disconnected, we can't continue with the existing connection. 1169 // Either we need to make a new connection, or go idle. 1170 1171 srpl_instance_t *instance = srpl_connection->instance; 1172 1173 // The connection would still be holding a reference; hold a reference to the connection to avoid it being released 1174 // prematurely. 1175 RETAIN_HERE(srpl_connection, srpl_connection); 1176 1177 // Get rid of the comm_t connection object if it's still around 1178 if (srpl_connection->connection != NULL && srpl_connection->connection == comm) { 1179 comm_t *connection = srpl_connection->connection; 1180 srpl_connection->connection = NULL; 1181 gettimeofday(&srpl_connection->connection_null_time, NULL); 1182 srpl_connection->connection_null_reason = "disconnected_callback"; 1183 ioloop_comm_release(connection); 1184 1185 if (srpl_connection->dso != NULL) { 1186 dso_state_cancel(srpl_connection->dso); 1187 srpl_connection->dso = NULL; 1188 } 1189 } 1190 1191 // If there's no instance, this connection just needs to go away (and presumably has). 1192 if (instance == NULL) { 1193 INFO("the instance is NULL."); 1194 goto out; 1195 } 1196 1197 // Because instance is still holding a reference to srpl_connection, it's safe to keep using srpl_connection. 1198 1199 // Clear old data from connection. 1200 srpl_connection_reset(srpl_connection); 1201 1202 domain = instance->domain; 1203 if (domain == NULL) { 1204 // If domain is NULL, instance has been discontinued. 1205 INFO(PRI_S_SRP "instance was discontinued, not reconnecting.", instance->instance_name); 1206 } else { 1207 // If we are in the startup state, we should reinitiate the connection to the peer. 1208 // Otherwise, we should reconnect only if our partner id is greater than the peer's. 1209 // If there's no partner id on the instance, the instance should be a temporary one 1210 // and that means we haven't discovered the peer yet, so we can just drop the connection 1211 // and wait to discover it or for it to reconnect. 1212 if (domain->srpl_opstate == SRPL_OPSTATE_STARTUP || 1213 (instance->have_partner_id && domain->server_state != NULL && 1214 domain->partner_id > instance->partner_id)) 1215 { 1216 // cancel reconnect_timeout if there's one scheduled. 1217 if (instance->reconnect_timeout != NULL) { 1218 ioloop_cancel_wake_event(instance->reconnect_timeout); 1219 } 1220 INFO(PRI_S_SRP ": disconnect received, reconnecting.", srpl_connection->name); 1221 srpl_connection_next_state(srpl_connection, srpl_state_next_address_get); 1222 goto out; 1223 } 1224 } 1225 1226 // If the connection is in the disconnect_wait state, deliver an event. 1227 if (srpl_connection->state == srpl_state_disconnect_wait) { 1228 srpl_event_t event; 1229 srpl_event_initialize(&event, srpl_event_disconnected); 1230 srpl_event_deliver(srpl_connection, &event); 1231 goto out; 1232 } 1233 1234 // If it's not our job to reconnect, we no longer need this connection. Release the reference 1235 // held by the instance (which'd cause the connection to be finalized). 1236 srpl_connection_next_state(srpl_connection, srpl_state_idle); 1237 if (instance->connection == srpl_connection) { 1238 RELEASE_HERE(srpl_connection, srpl_connection); 1239 instance->connection = NULL; 1240 } 1241 1242 out: 1243 RELEASE_HERE(srpl_connection, srpl_connection); 1244 } 1245 1246 static bool 1247 srpl_dso_message_setup(dso_state_t *dso, dso_message_t *state, dns_towire_state_t *towire, uint8_t *buffer, 1248 size_t buffer_size, message_t *message, bool unidirectional, bool response, int rcode, 1249 uint16_t xid, srpl_connection_t *srpl_connection) 1250 { 1251 uint16_t send_xid = 0; 1252 1253 if (srpl_connection->connection == NULL) { 1254 struct tm tm; 1255 localtime_r(&srpl_connection->connection_null_time.tv_sec, &tm); 1256 char tmoff = tm.tm_gmtoff > 0 ? '+' : '-'; 1257 long tzoff = tm.tm_gmtoff > 0 ? tm.tm_gmtoff : -tm.tm_gmtoff; 1258 FAULT("sending a message on a nonexistent connection: " PUB_S_SRP " (%04d-%02d-%02d %02d:%02d:%02d.%06d%c%02ld%02ld)!", 1259 srpl_connection->connection_null_reason == NULL 1260 ? "no connection ever set" : srpl_connection->connection_null_reason, 1261 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, 1262 srpl_connection->connection_null_time.tv_usec, tmoff, tzoff / 3600, (tzoff / 60) % 60); 1263 return false; 1264 } 1265 1266 if (buffer_size < DNS_HEADER_SIZE) { 1267 ERROR("internal: invalid buffer size %zd", buffer_size); 1268 return false; 1269 } 1270 1271 if (response) { 1272 if (message != NULL) { 1273 send_xid = message->wire.id; 1274 } else { 1275 send_xid = xid; 1276 } 1277 } 1278 dso_make_message(state, buffer, buffer_size, dso, unidirectional, response, 1279 send_xid, rcode, srpl_connection); 1280 memset(towire, 0, sizeof(*towire)); 1281 towire->p = &buffer[DNS_HEADER_SIZE]; 1282 towire->lim = towire->p + (buffer_size - DNS_HEADER_SIZE); 1283 towire->message = (dns_wire_t *)buffer; 1284 return true; 1285 } 1286 1287 static srpl_domain_t * 1288 srpl_connection_domain(srpl_connection_t *srpl_connection) 1289 { 1290 if (srpl_connection->instance == NULL) { 1291 INFO("connection " PRI_S_SRP " (%p) has no instance.", srpl_connection->name, srpl_connection); 1292 return NULL; 1293 } 1294 return srpl_connection->instance->domain; 1295 } 1296 1297 static bool 1298 srpl_keepalive_send(srpl_connection_t *srpl_connection, bool response, uint16_t xid) 1299 { 1300 uint8_t dsobuf[SRPL_KEEPALIVE_MESSAGE_LENGTH]; 1301 dns_towire_state_t towire; 1302 dso_message_t message; 1303 struct iovec iov; 1304 1305 if (!srpl_dso_message_setup(srpl_connection->dso, &message, &towire, dsobuf, sizeof(dsobuf), 1306 NULL, srpl_connection->dso->is_server, response, 1307 dns_rcode_noerror, xid, srpl_connection)) { 1308 return false; 1309 } 1310 dns_u16_to_wire(&towire, kDSOType_Keepalive); 1311 dns_rdlength_begin(&towire); 1312 dns_u32_to_wire(&towire, DEFAULT_KEEPALIVE_WAKEUP_EXPIRY / 2); // Idle timeout (we are never idle) 1313 dns_u32_to_wire(&towire, DEFAULT_KEEPALIVE_WAKEUP_EXPIRY / 2); // Keepalive timeout 1314 dns_rdlength_end(&towire); 1315 if (towire.error) { 1316 ERROR("ran out of message space at " PUB_S_SRP ", :%d", __FILE__, towire.line); 1317 return false; 1318 } 1319 memset(&iov, 0, sizeof(iov)); 1320 iov.iov_len = towire.p - dsobuf; 1321 iov.iov_base = dsobuf; 1322 if (!ioloop_send_message(srpl_connection->connection, srpl_connection_message_get(srpl_connection), &iov, 1)) { 1323 INFO("send failed"); 1324 srpl_disconnect(srpl_connection); 1325 return false; 1326 } 1327 1328 INFO("sent %zd byte " PUB_S_SRP " Keepalive, xid %02x%02x (was %04x), to " PRI_S_SRP, iov.iov_len, 1329 (response ? "response" : (srpl_connection->is_server 1330 ? "unidirectional" 1331 : "query")), dsobuf[0], dsobuf[1], xid, srpl_connection->name); 1332 srpl_message_sent(srpl_connection); 1333 return true; 1334 } 1335 1336 // If we ever get a wakeup, it means that a wakeup send interval has passed since the last time we sent any message on 1337 // this connection, so we should send a keepalive message. 1338 static void 1339 srpl_connection_keepalive_send_wakeup(void *context) 1340 { 1341 srpl_connection_t *srpl_connection = context; 1342 1343 // In case we lost our connection but still have keepalive timers going, now's a good time to 1344 // cancel them. 1345 if (srpl_connection->connection == NULL) { 1346 // Cancel keepalive timers 1347 if (srpl_connection->keepalive_send_wakeup) { 1348 ioloop_cancel_wake_event(srpl_connection->keepalive_send_wakeup); 1349 } 1350 if (srpl_connection->keepalive_receive_wakeup) { 1351 ioloop_cancel_wake_event(srpl_connection->keepalive_receive_wakeup); 1352 } 1353 return; 1354 } 1355 srpl_keepalive_send(srpl_connection, false, 0); 1356 srpl_message_sent(srpl_connection); 1357 } 1358 1359 static void 1360 srpl_message_sent(srpl_connection_t *srpl_connection) 1361 { 1362 if (!srpl_connection->is_server) { 1363 srpl_connection->last_message_sent = srp_time(); 1364 ioloop_add_wake_event(srpl_connection->keepalive_send_wakeup, 1365 srpl_connection, srpl_connection_keepalive_send_wakeup, srpl_connection_context_release, 1366 srpl_connection->keepalive_interval); 1367 RETAIN_HERE(srpl_connection, srpl_connection); // for the callback 1368 } 1369 } 1370 1371 static bool 1372 srpl_session_message_send(srpl_connection_t *srpl_connection, bool response) 1373 { 1374 uint8_t dsobuf[SRPL_SESSION_MESSAGE_LENGTH]; 1375 dns_towire_state_t towire; 1376 dso_message_t message; 1377 struct iovec iov; 1378 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 1379 if (domain == NULL) { 1380 return false; 1381 } 1382 #ifdef TEST_DSO_MESSAGE_SETUP_CONNECTION_NULL_FAULT 1383 comm_t *connection = srpl_connection->connection; 1384 const char *old_null_reason = srpl_connection->connection_null_reason; 1385 struct timeval old_null_time = srpl_connection->connection_null_time; 1386 srpl_connection->connection = NULL; 1387 srpl_connection->connection_null_reason = "unit test"; 1388 gettimeofday(&srpl_connection->connection_null_time, NULL); 1389 bool ret = srpl_dso_message_setup(srpl_connection->dso, &message, &towire, dsobuf, sizeof(dsobuf), 1390 srpl_connection_message_get(srpl_connection), false, response, 0, 0, srpl_connection); 1391 if (ret != false) { 1392 FAULT("failed to detect null connection!"); 1393 } 1394 srpl_connection->connection = connection; 1395 srpl_connection->connection_null_reason = old_null_reason; 1396 srpl_connection->connection_null_time = old_null_time; 1397 #endif 1398 1399 if (!srpl_dso_message_setup(srpl_connection->dso, &message, &towire, dsobuf, sizeof(dsobuf), 1400 srpl_connection_message_get(srpl_connection), false, response, 0, 0, srpl_connection)) { 1401 return false; 1402 } 1403 dns_u16_to_wire(&towire, kDSOType_SRPLSession); 1404 dns_rdlength_begin(&towire); 1405 dns_u64_to_wire(&towire, domain->partner_id); 1406 dns_rdlength_end(&towire); 1407 1408 // version TLV 1409 dns_u16_to_wire(&towire, kDSOType_SRPLVersion); 1410 dns_rdlength_begin(&towire); 1411 dns_u16_to_wire(&towire, SRPL_CURRENT_VERSION); 1412 dns_rdlength_end(&towire); 1413 1414 // domain name tlv 1415 dns_u16_to_wire(&towire, kDSOType_SRPLDomainName); 1416 dns_rdlength_begin(&towire); 1417 INFO("include domain " PRI_S_SRP, domain->name); 1418 dns_full_name_to_wire(NULL, &towire, domain->name); 1419 dns_rdlength_end(&towire); 1420 if (domain->srpl_opstate == SRPL_OPSTATE_STARTUP) { 1421 // new partner TLV 1422 dns_u16_to_wire(&towire, kDSOType_SRPLNewPartner); 1423 dns_rdlength_begin(&towire); 1424 dns_rdlength_end(&towire); 1425 } 1426 1427 if (towire.error) { 1428 ERROR("ran out of message space at " PUB_S_SRP ", :%d", __FILE__, towire.line); 1429 return false; 1430 } 1431 memset(&iov, 0, sizeof(iov)); 1432 iov.iov_len = towire.p - dsobuf; 1433 iov.iov_base = dsobuf; 1434 if (!ioloop_send_message(srpl_connection->connection, srpl_connection_message_get(srpl_connection), &iov, 1)) { 1435 INFO("send failed"); 1436 srpl_disconnect(srpl_connection); 1437 return false; 1438 } 1439 1440 INFO(PRI_S_SRP " sent SRPLSession " PUB_S_SRP ", id %" PRIx64, srpl_connection->name, 1441 response ? "response" : "message", domain->partner_id); 1442 srpl_message_sent(srpl_connection); 1443 return true; 1444 } 1445 1446 static bool 1447 srpl_send_candidates_message_send(srpl_connection_t *srpl_connection, bool response) 1448 { 1449 uint8_t dsobuf[SRPL_SEND_CANDIDATES_LENGTH]; 1450 dns_towire_state_t towire; 1451 dso_message_t message; 1452 struct iovec iov; 1453 1454 if (!srpl_dso_message_setup(srpl_connection->dso, &message, &towire, dsobuf, sizeof(dsobuf), 1455 srpl_connection_message_get(srpl_connection), false, response, 0, 0, srpl_connection)) { 1456 return false; 1457 } 1458 dns_u16_to_wire(&towire, kDSOType_SRPLSendCandidates); 1459 dns_rdlength_begin(&towire); 1460 dns_rdlength_end(&towire); 1461 if (towire.error) { 1462 ERROR("ran out of message space at " PUB_S_SRP ", :%d", __FILE__, towire.line); 1463 return false; 1464 } 1465 memset(&iov, 0, sizeof(iov)); 1466 iov.iov_len = towire.p - dsobuf; 1467 iov.iov_base = dsobuf; 1468 if (!ioloop_send_message(srpl_connection->connection, srpl_connection_message_get(srpl_connection), &iov, 1)) { 1469 INFO("send failed"); 1470 srpl_disconnect(srpl_connection); 1471 return false; 1472 } 1473 1474 INFO(PRI_S_SRP " sent SRPLSendCandidates " PUB_S_SRP, srpl_connection->name, response ? "response" : "query"); 1475 srpl_message_sent(srpl_connection); 1476 return true; 1477 } 1478 1479 static bool 1480 srpl_candidate_message_send(srpl_connection_t *srpl_connection, adv_host_t *host) 1481 { 1482 uint8_t dsobuf[SRPL_CANDIDATE_MESSAGE_LENGTH]; 1483 dns_towire_state_t towire; 1484 dso_message_t message; 1485 struct iovec iov; 1486 time_t update_time = host->update_time; 1487 1488 if (!srpl_dso_message_setup(srpl_connection->dso, &message, &towire, 1489 dsobuf, sizeof(dsobuf), NULL, false, false, 0, 0, srpl_connection)) { 1490 return false; 1491 } 1492 1493 // For testing, make the update time really wrong so that signature validation fails. This will not actually 1494 // cause a failure unless the SRP requestor sends a time range, so really only useful for testing with the 1495 // mDNSResponder srp-client, not with e.g. a Thread client. 1496 if (host->server_state != NULL && host->server_state->break_srpl_time) { 1497 INFO("breaking time: %lu -> %lu", (unsigned long)update_time, (unsigned long)(update_time - 1800)); 1498 update_time -= 1800; 1499 } 1500 dns_u16_to_wire(&towire, kDSOType_SRPLCandidate); 1501 dns_rdlength_begin(&towire); 1502 dns_rdlength_end(&towire); 1503 dns_u16_to_wire(&towire, kDSOType_SRPLHostname); 1504 dns_rdlength_begin(&towire); 1505 dns_full_name_to_wire(NULL, &towire, host->name); 1506 dns_rdlength_end(&towire); 1507 dns_u16_to_wire(&towire, kDSOType_SRPLTimeOffset); 1508 dns_rdlength_begin(&towire); 1509 dns_u32_to_wire(&towire, (uint32_t)(srp_time() - update_time)); 1510 dns_rdlength_end(&towire); 1511 dns_u16_to_wire(&towire, kDSOType_SRPLKeyID); 1512 dns_rdlength_begin(&towire); 1513 dns_u32_to_wire(&towire, host->key_id); 1514 dns_rdlength_end(&towire); 1515 if (towire.error) { 1516 ERROR("ran out of message space at " PUB_S_SRP ", :%d", __FILE__, towire.line); 1517 return false; 1518 } 1519 memset(&iov, 0, sizeof(iov)); 1520 iov.iov_len = towire.p - dsobuf; 1521 iov.iov_base = dsobuf; 1522 if (!ioloop_send_message(srpl_connection->connection, srpl_connection_message_get(srpl_connection), &iov, 1)) { 1523 INFO("send failed"); 1524 srpl_disconnect(srpl_connection); 1525 return false; 1526 } 1527 1528 INFO(PRI_S_SRP " sent SRPLCandidate message on connection.", srpl_connection->name); 1529 srpl_message_sent(srpl_connection); 1530 return true; 1531 } 1532 1533 static bool 1534 srpl_candidate_response_send(srpl_connection_t *srpl_connection, dso_message_types_t response_type) 1535 { 1536 uint8_t dsobuf[SRPL_CANDIDATE_RESPONSE_LENGTH]; 1537 dns_towire_state_t towire; 1538 dso_message_t message; 1539 struct iovec iov; 1540 1541 if (!srpl_dso_message_setup(srpl_connection->dso, &message, &towire, dsobuf, sizeof(dsobuf), 1542 srpl_connection_message_get(srpl_connection), false, true, 0, 0, srpl_connection)) { 1543 return false; 1544 } 1545 dns_u16_to_wire(&towire, kDSOType_SRPLCandidate); 1546 dns_rdlength_begin(&towire); 1547 dns_rdlength_end(&towire); 1548 dns_u16_to_wire(&towire, response_type); 1549 dns_rdlength_begin(&towire); 1550 dns_rdlength_end(&towire); 1551 if (towire.error) { 1552 ERROR("ran out of message space at " PUB_S_SRP ", :%d", __FILE__, towire.line); 1553 return false; 1554 } 1555 memset(&iov, 0, sizeof(iov)); 1556 iov.iov_len = towire.p - dsobuf; 1557 iov.iov_base = dsobuf; 1558 if (!ioloop_send_message(srpl_connection->connection, srpl_connection_message_get(srpl_connection), &iov, 1)) { 1559 INFO("send failed"); 1560 srpl_disconnect(srpl_connection); 1561 return false; 1562 } 1563 1564 INFO(PRI_S_SRP " sent SRPLCandidate response on connection.", srpl_connection->name); 1565 srpl_message_sent(srpl_connection); 1566 return true; 1567 } 1568 1569 // Qsort comparison function for message receipt times. 1570 static int 1571 srpl_message_compare(const void *v1, const void *v2) 1572 { 1573 const message_t *m1 = *(message_t**)v1; 1574 const message_t *m2 = *(message_t**)v2; 1575 if (m1->received_time - m2->received_time < 0) { 1576 return -1; 1577 } else if(m1->received_time - m2->received_time > 0) { 1578 return 1; 1579 } else { 1580 return 0; 1581 } 1582 } 1583 1584 static bool 1585 srpl_host_message_send(srpl_connection_t *srpl_connection, adv_host_t *host) 1586 { 1587 uint8_t *dsobuf = NULL; 1588 size_t dsobuf_length = SRPL_HOST_MESSAGE_LENGTH; 1589 dns_towire_state_t towire; 1590 dso_message_t message; 1591 struct iovec *iov = NULL; 1592 int num_messages; // Number of SRP updates we need to send 1593 int iovec_count = 1, iov_cur = 0; 1594 message_t **messages = NULL; 1595 bool rv = false; 1596 1597 if (host->message == NULL) { 1598 FAULT("no host message to send for " PRI_S_SRP " on " PRI_S_SRP ".", host->name, srpl_connection->name); 1599 goto out; 1600 } 1601 iovec_count++; 1602 num_messages = 1; 1603 1604 time_t srpl_now = srp_time(); 1605 1606 if (SRPL_SUPPORTS(srpl_connection, SRPL_VARIATION_MULTI_HOST_MESSAGE)) { 1607 int num_instances = host->instances == NULL ? 0 : host->instances->num; 1608 for (int i = 0; i < num_instances; i++) { 1609 adv_instance_t *instance = host->instances->vec[i]; 1610 if (instance != NULL) { 1611 if (instance->message != NULL && instance->message != host->message && instance->message != NULL) { 1612 num_messages++; 1613 } 1614 } 1615 } 1616 messages = calloc(num_messages, sizeof (*messages)); 1617 if (messages == NULL) { 1618 INFO("no memory for message vector"); 1619 goto out; 1620 } 1621 messages[0] = host->message; 1622 num_messages = 1; 1623 for (int i = 0; i < num_instances; i++) { 1624 adv_instance_t *instance = host->instances->vec[i]; 1625 if (instance != NULL) { 1626 if (instance->message != NULL && instance->message != host->message && instance->message != NULL) { 1627 messages[num_messages] = instance->message; 1628 num_messages++; 1629 } 1630 } 1631 } 1632 qsort(messages, num_messages, sizeof(*messages), srpl_message_compare); 1633 // eliminate the duplicate messages in the sorted array. 1634 int nondup_pos = 0; 1635 INFO("messages[0] = %p, received_time = %ld", messages[0], srpl_now - messages[0]->received_time); 1636 for (int i = 1; i < num_messages; i++) { 1637 if (messages[i] != messages[nondup_pos]) { 1638 nondup_pos++; 1639 messages[nondup_pos] = messages[i]; 1640 INFO("messages[%d] = messages[%d] (%p), received_time = %ld", nondup_pos, i, messages[i], 1641 srpl_now - messages[nondup_pos]->received_time); 1642 } 1643 } 1644 num_messages = nondup_pos + 1; 1645 // update iovec_count and dsobuf_length based on the number of messages. 1646 // nondup_pos now is the position of the last unique message and it also 1647 // is the number of extra host messages we have got in this SRPLHost message. 1648 iovec_count += 2 * nondup_pos; 1649 // Account for additional HostMessage TLV. 1650 dsobuf_length += (DSO_TLV_HEADER_SIZE + sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint32_t)) * nondup_pos; 1651 } 1652 iov = calloc(iovec_count, sizeof(*iov)); 1653 if (iov == NULL) { 1654 ERROR("no memory for iovec."); 1655 goto out; 1656 } 1657 dsobuf = malloc(dsobuf_length); 1658 if (dsobuf == NULL) { 1659 ERROR("no memory for dso buffer"); 1660 goto out; 1661 } 1662 1663 if (!srpl_dso_message_setup(srpl_connection->dso, &message, &towire, 1664 dsobuf, dsobuf_length, NULL, false, false, 0, 0, srpl_connection)) { 1665 goto out; 1666 } 1667 1668 // For testing, make the update time really wrong so that signature validation fails. This will not actually 1669 // cause a failure unless the SRP requestor sends a time range, so really only useful for testing with the 1670 // mDNSResponder srp-client, not with e.g. a Thread client. 1671 if (host->server_state != NULL && host->server_state->break_srpl_time) { 1672 INFO("breaking time: %lu -> %lu", (unsigned long)srpl_now, (unsigned long)(srpl_now + 1800)); 1673 srpl_now += 1800; 1674 } 1675 dns_u16_to_wire(&towire, kDSOType_SRPLHost); 1676 dns_rdlength_begin(&towire); 1677 dns_rdlength_end(&towire); 1678 dns_u16_to_wire(&towire, kDSOType_SRPLHostname); 1679 dns_rdlength_begin(&towire); 1680 dns_full_name_to_wire(NULL, &towire, host->name); 1681 dns_rdlength_end(&towire); 1682 // v0 of the protocol only includes one host message option, and timeoffset is sent 1683 // as its own secondary TLV. 1684 if (!SRPL_SUPPORTS(srpl_connection, SRPL_VARIATION_MULTI_HOST_MESSAGE)) { 1685 dns_u16_to_wire(&towire, kDSOType_SRPLTimeOffset); 1686 dns_rdlength_begin(&towire); 1687 dns_u32_to_wire(&towire, (uint32_t)(srpl_now - host->message->received_time)); 1688 dns_rdlength_end(&towire); 1689 } 1690 dns_u16_to_wire(&towire, kDSOType_SRPLServerStableID); 1691 dns_rdlength_begin(&towire); 1692 dns_u64_to_wire(&towire, host->server_stable_id); 1693 dns_rdlength_end(&towire); 1694 if (!SRPL_SUPPORTS(srpl_connection, SRPL_VARIATION_MULTI_HOST_MESSAGE)) { 1695 dns_u16_to_wire(&towire, kDSOType_SRPLHostMessage); 1696 dns_u16_to_wire(&towire, host->message->length); 1697 iov[iov_cur].iov_len = towire.p - dsobuf; 1698 iov[iov_cur].iov_base = dsobuf; 1699 iov_cur++; 1700 iov[iov_cur].iov_len = host->message->length; 1701 iov[iov_cur].iov_base = &host->message->wire; 1702 iov_cur++; 1703 } else { 1704 uint8_t *start = dsobuf; 1705 for (int i = 0; i < num_messages; i++) { 1706 dns_u16_to_wire(&towire, kDSOType_SRPLHostMessage); 1707 dns_u16_to_wire(&towire, 12 + messages[i]->length); 1708 dns_u32_to_wire(&towire, messages[i]->lease); 1709 dns_u32_to_wire(&towire, messages[i]->key_lease); 1710 dns_u32_to_wire(&towire, (uint32_t)(srpl_now - messages[i]->received_time)); 1711 iov[iov_cur].iov_len = towire.p - start; 1712 iov[iov_cur].iov_base = start; 1713 iov_cur++; 1714 iov[iov_cur].iov_len = messages[i]->length; 1715 iov[iov_cur].iov_base = &messages[i]->wire; 1716 iov_cur++; 1717 start = towire.p; 1718 } 1719 } 1720 1721 if (towire.error) { 1722 ERROR("ran out of message space at " PUB_S_SRP ", :%d", __FILE__, towire.line); 1723 goto out; 1724 } 1725 1726 if (!ioloop_send_message(srpl_connection->connection, srpl_connection_message_get(srpl_connection), iov, iov_cur)) { 1727 INFO("send failed"); 1728 srpl_disconnect(srpl_connection); 1729 return false; 1730 } 1731 1732 INFO(PRI_S_SRP " sent SRPLHost message %02x%02x " PRI_S_SRP " stable ID %" PRIx64 ", Host Message Count %d", 1733 srpl_connection->name, message.buf[0], message.buf[1], host->name, host->server_stable_id, num_messages); 1734 rv = true; 1735 srpl_message_sent(srpl_connection); 1736 out: 1737 if (messages != NULL) { 1738 free(messages); 1739 } 1740 if (iov != NULL) { 1741 free(iov); 1742 } 1743 if (dsobuf != NULL) { 1744 free(dsobuf); 1745 } 1746 return rv; 1747 } 1748 1749 1750 static bool 1751 srpl_host_response_send(srpl_connection_t *srpl_connection, int rcode) 1752 { 1753 uint8_t dsobuf[SRPL_HOST_RESPONSE_LENGTH]; 1754 dns_towire_state_t towire; 1755 dso_message_t message; 1756 struct iovec iov; 1757 1758 if (!srpl_dso_message_setup(srpl_connection->dso, &message, &towire, dsobuf, sizeof(dsobuf), 1759 srpl_connection_message_get(srpl_connection), false, true, rcode, 0, srpl_connection)) { 1760 return false; 1761 } 1762 dns_u16_to_wire(&towire, kDSOType_SRPLHost); 1763 dns_rdlength_begin(&towire); 1764 dns_rdlength_end(&towire); 1765 if (towire.error) { 1766 ERROR("ran out of message space at " PUB_S_SRP ", :%d", __FILE__, towire.line); 1767 return false; 1768 } 1769 memset(&iov, 0, sizeof(iov)); 1770 iov.iov_len = towire.p - dsobuf; 1771 iov.iov_base = dsobuf; 1772 if (!ioloop_send_message(srpl_connection->connection, srpl_connection_message_get(srpl_connection), &iov, 1)) { 1773 INFO("send failed"); 1774 srpl_disconnect(srpl_connection); 1775 return false; 1776 } 1777 INFO(PRI_S_SRP " sent SRPLHost response %02x%02x rcode %d on connection.", 1778 srpl_connection->name, message.buf[0], message.buf[1], rcode); 1779 srpl_message_sent(srpl_connection); 1780 return true; 1781 } 1782 1783 static bool 1784 srpl_retry_delay_send(srpl_connection_t *srpl_connection, uint32_t delay) 1785 { 1786 uint8_t dsobuf[SRPL_RETRY_DELAY_LENGTH]; 1787 dns_towire_state_t towire; 1788 dso_message_t message; 1789 struct iovec iov; 1790 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 1791 if (domain == NULL) { 1792 ERROR("domain is NULL."); 1793 return false; 1794 } 1795 1796 // If this isn't a server, there's no benefit to sending retry delay. 1797 if (!srpl_connection->is_server) { 1798 return true; 1799 } 1800 1801 if (!srpl_dso_message_setup(srpl_connection->dso, &message, &towire, dsobuf, sizeof(dsobuf), 1802 srpl_connection_message_get(srpl_connection), false, true, dns_rcode_noerror, 0, 1803 srpl_connection)) 1804 { 1805 return false; 1806 } 1807 dns_u16_to_wire(&towire, kDSOType_RetryDelay); 1808 dns_rdlength_begin(&towire); 1809 dns_u32_to_wire(&towire, delay); // One hour. 1810 dns_rdlength_end(&towire); 1811 if (towire.error) { 1812 ERROR("ran out of message space at " PUB_S_SRP ", :%d", __FILE__, towire.line); 1813 return false; 1814 } 1815 memset(&iov, 0, sizeof(iov)); 1816 iov.iov_len = towire.p - dsobuf; 1817 iov.iov_base = dsobuf; 1818 if (!ioloop_send_message(srpl_connection->connection, srpl_connection_message_get(srpl_connection), &iov, 1)) { 1819 INFO("send failed"); 1820 srpl_disconnect(srpl_connection); 1821 return false; 1822 } 1823 1824 INFO(PRI_S_SRP " sent Retry Delay, id %" PRIx64, srpl_connection->name, domain->partner_id); 1825 srpl_message_sent(srpl_connection); 1826 return true; 1827 } 1828 static bool 1829 srpl_find_dso_additionals(srpl_connection_t *srpl_connection, dso_state_t *dso, const dso_message_types_t *additionals, 1830 bool *required, bool *multiple, const char **names, int *indices, int num, 1831 int min_additls, int max_additls, const char *message_name, void *context, 1832 bool (*iterator)(int index, const uint8_t *buf, unsigned *offp, uint16_t len, void *context)) 1833 { 1834 int ret = true; 1835 int count = 0; 1836 1837 for (int j = 0; j < num; j++) { 1838 indices[j] = -1; 1839 } 1840 for (unsigned i = 0; i < dso->num_additls; i++) { 1841 bool found = false; 1842 for (int j = 0; j < num; j++) { 1843 if (dso->additl[i].opcode == additionals[j]) { 1844 if (indices[j] != -1 && (multiple == NULL || multiple[j] == false)) { 1845 ERROR(PRI_S_SRP ": duplicate " PUB_S_SRP " for " PUB_S_SRP ".", 1846 srpl_connection->name, names[j], message_name); 1847 ret = false; 1848 continue; 1849 } 1850 indices[j] = i; 1851 unsigned offp = 0; 1852 if (!iterator(j, dso->additl[i].payload, &offp, dso->additl[i].length, context) || 1853 offp != dso->additl[i].length) 1854 { 1855 ERROR(PRI_S_SRP ": invalid " PUB_S_SRP " for " PUB_S_SRP ".", 1856 srpl_connection->name, names[j], message_name); 1857 found = true; // So we don't complain later. 1858 count++; 1859 ret = false; 1860 } else { 1861 found = true; 1862 count++; 1863 } 1864 } 1865 } 1866 if (!found) { 1867 ERROR(PRI_S_SRP ": unexpected opcode %x for " PUB_S_SRP ".", 1868 srpl_connection->name, dso->additl[i].opcode, message_name); 1869 } 1870 } 1871 for (int j = 0; j < num; j++) { 1872 if (required[j] && indices[j] == -1) { 1873 ERROR(PRI_S_SRP ": missing " PUB_S_SRP " for " PUB_S_SRP ".", 1874 srpl_connection->name, names[j], message_name); 1875 ret = false; 1876 } 1877 } 1878 if (count < min_additls) { 1879 ERROR(PRI_S_SRP ": not enough additional TLVs (%d < %d) for " PUB_S_SRP ".", 1880 srpl_connection->name, count, min_additls, message_name); 1881 ret = false; 1882 } else if (count > max_additls) { 1883 ERROR(PRI_S_SRP ": too many additional TLVs (%d > %d) for " PUB_S_SRP ".", 1884 srpl_connection->name, count, max_additls, message_name); 1885 ret = false; 1886 } 1887 return ret; 1888 } 1889 1890 static void 1891 srpl_connection_discontinue(srpl_connection_t *srpl_connection) 1892 { 1893 srpl_connection->candidates_not_generated = true; 1894 // Cancel any outstanding reconnect wakeup event, so that we don't accidentally restart the connection we decided to 1895 // discontinue. 1896 if (srpl_connection->reconnect_wakeup != NULL) { 1897 ioloop_cancel_wake_event(srpl_connection->reconnect_wakeup); 1898 // We have to get rid of the wakeup here because it's holding a reference to the connection, which we may want to 1899 // have go away. 1900 ioloop_wakeup_release(srpl_connection->reconnect_wakeup); 1901 srpl_connection->reconnect_wakeup = NULL; 1902 } 1903 // Cancel any outstanding state timeout event. 1904 if (srpl_connection->state_timeout != NULL) { 1905 ioloop_cancel_wake_event(srpl_connection->state_timeout); 1906 ioloop_wakeup_release(srpl_connection->state_timeout); 1907 srpl_connection->state_timeout = NULL; 1908 } 1909 srpl_connection_reset(srpl_connection); 1910 srpl_connection_next_state(srpl_connection, srpl_state_disconnect); 1911 } 1912 1913 static bool 1914 srpl_session_message_parse_in(int index, const uint8_t *buffer, unsigned *offp, uint16_t length, void *context) 1915 { 1916 srpl_session_t *session = context; 1917 1918 switch(index) { 1919 case 0: 1920 session->new_partner = true; 1921 return true; 1922 case 1: 1923 return dns_name_parse(&session->domain_name, buffer, length, offp, length); 1924 case 2: 1925 return dns_u16_parse(buffer, length, offp, &session->remote_version); 1926 } 1927 return false; 1928 } 1929 1930 static bool 1931 srpl_session_message_parse(srpl_connection_t *srpl_connection, 1932 srpl_event_t *event, dso_state_t *dso, const char *message_name) 1933 { 1934 const char *names[3] = { "New Partner", "Domain Name", "Protocol Version" }; 1935 dso_message_types_t additionals[3] = { kDSOType_SRPLNewPartner, kDSOType_SRPLDomainName, kDSOType_SRPLVersion }; 1936 bool required[3] = { false, false, false }; 1937 bool multiple[3] = { false, false, false }; 1938 int indices[3]; 1939 1940 if (dso->primary.length != 8) { 1941 ERROR(PRI_S_SRP ": invalid DSO Primary length %d for " PUB_S_SRP ".", 1942 srpl_connection->name, dso->primary.length, message_name); 1943 return false; 1944 } 1945 1946 unsigned offp = 0; 1947 srpl_event_content_type_set(event, srpl_event_content_type_session); 1948 if (!dns_u64_parse(dso->primary.payload, 8, &offp, &event->content.session.partner_id)) { 1949 // This should be un-possible. 1950 ERROR(PRI_S_SRP ": invalid DSO Primary server id in " PRI_S_SRP ".", 1951 srpl_connection->name, message_name); 1952 return false; 1953 } 1954 1955 event->content.session.new_partner = false; 1956 if (!srpl_find_dso_additionals(srpl_connection, dso, additionals, required, multiple, names, indices, 3, 0, 3, 1957 "SRPLSession message", &(event->content.session), srpl_session_message_parse_in)) { 1958 return false; 1959 } 1960 1961 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 1962 if (domain == NULL) { 1963 ERROR("connection has no domain."); 1964 return false; 1965 } 1966 // If this is an unidentified connection that is associated with a temporary instance and 1967 // a temporary domain, we need to retrieve the domain name from the session message and 1968 // find the real domain for this connection. 1969 // A connection can not be identified due to either 1970 // the sending partner is still in the startup state and has not advertised yet; or 1971 // the sending partner is in the routine state and has advertised the domain, but 1972 // the receiving partner has not discovered it yet. 1973 DNS_NAME_GEN_SRP(event->content.session.domain_name, dname_buf); 1974 if (domain->name == NULL) { 1975 srp_server_t *server_state = domain->server_state; 1976 if (server_state == NULL) { 1977 ERROR("server state is NULL."); 1978 return false; 1979 } 1980 if (event->content.session.domain_name == NULL) { 1981 ERROR(PUB_S_SRP " does not include domain name", message_name); 1982 return false; 1983 } 1984 srpl_domain_t **dp, *match_domain = NULL; 1985 // Find the domain. 1986 for (dp = &server_state->srpl_domains; *dp; dp = &(*dp)->next) { 1987 match_domain = *dp; 1988 if (!strcasecmp(match_domain->name, dname_buf)) { 1989 break; 1990 } 1991 } 1992 if (match_domain == NULL) { 1993 ERROR("domain name in " PUB_S_SRP " does not match any domain", message_name); 1994 return false; 1995 } 1996 RELEASE_HERE(srpl_connection->instance->domain, srpl_domain); 1997 srpl_connection->instance->domain = match_domain; 1998 RETAIN_HERE(match_domain, srpl_domain); 1999 } 2000 2001 2002 if (event->content.session.remote_version >= SRPL_VERSION_MULTI_HOST_MESSAGE) { 2003 srpl_connection->variation_mask |= SRPL_VARIATION_MULTI_HOST_MESSAGE; 2004 } 2005 2006 INFO(PRI_S_SRP " received " PUB_S_SRP ", id %" PRIx64 ", startup " PUB_S_SRP 2007 ", domain " PRI_S_SRP ", version %d", srpl_connection->name, message_name, 2008 event->content.session.partner_id, event->content.session.new_partner? "yes" : "no", 2009 dname_buf, event->content.session.remote_version); 2010 return true; 2011 } 2012 2013 static void 2014 srpl_session_message(srpl_connection_t *srpl_connection, message_t *message, dso_state_t *dso) 2015 { 2016 srpl_event_t event; 2017 srpl_event_initialize(&event, srpl_event_session_message_received); 2018 2019 srpl_connection_message_set(srpl_connection, message); 2020 if (!srpl_session_message_parse(srpl_connection, &event, dso, "SRPLSession message")) { 2021 dns_name_free(event.content.session.domain_name); 2022 srpl_disconnect(srpl_connection); 2023 return; 2024 } 2025 srpl_event_deliver(srpl_connection, &event); 2026 dns_name_free(event.content.session.domain_name); 2027 } 2028 2029 static void 2030 srpl_session_response(srpl_connection_t *srpl_connection, dso_state_t *dso) 2031 { 2032 srpl_event_t event; 2033 srpl_event_initialize(&event, srpl_event_session_response_received); 2034 if (!srpl_session_message_parse(srpl_connection, &event, dso, "SRPLSession response")) { 2035 srpl_disconnect(srpl_connection); 2036 return; 2037 } 2038 2039 srpl_event_deliver(srpl_connection, &event); 2040 dns_name_free(event.content.session.domain_name); 2041 } 2042 2043 static bool 2044 srpl_send_candidates_message_parse(srpl_connection_t *srpl_connection, dso_state_t *dso, const char *message_name) 2045 { 2046 if (dso->primary.length != 0) { 2047 ERROR(PRI_S_SRP ": invalid DSO Primary length %d for " PUB_S_SRP ".", 2048 srpl_connection->name, dso->primary.length, message_name); 2049 srpl_disconnect(srpl_connection); 2050 return false; 2051 } 2052 return true; 2053 } 2054 2055 static void 2056 srpl_send_candidates_message(srpl_connection_t *srpl_connection, message_t *message, dso_state_t *dso) 2057 { 2058 srpl_event_t event; 2059 srpl_event_initialize(&event, srpl_event_send_candidates_message_received); 2060 2061 srpl_connection_message_set(srpl_connection, message); 2062 if (srpl_send_candidates_message_parse(srpl_connection, dso, "SRPLSendCandidates message")) { 2063 INFO(PRI_S_SRP " received SRPLSendCandidates query", srpl_connection->name); 2064 2065 srpl_event_deliver(srpl_connection, &event); 2066 return; 2067 } 2068 srpl_disconnect(srpl_connection); 2069 } 2070 2071 static void 2072 srpl_send_candidates_response(srpl_connection_t *srpl_connection, dso_state_t *dso) 2073 { 2074 srpl_event_t event; 2075 srpl_event_initialize(&event, srpl_event_send_candidates_response_received); 2076 2077 if (srpl_send_candidates_message_parse(srpl_connection, dso, "SRPLSendCandidates message")) { 2078 INFO(PRI_S_SRP " received SRPLSendCandidates response", srpl_connection->name); 2079 srpl_event_deliver(srpl_connection, &event); 2080 return; 2081 } 2082 } 2083 2084 static bool 2085 srpl_candidate_message_parse_in(int index, const uint8_t *buffer, unsigned *offp, uint16_t length, void *context) 2086 { 2087 srpl_candidate_t *candidate = context; 2088 2089 switch(index) { 2090 case 0: 2091 return dns_name_parse(&candidate->name, buffer, length, offp, length); 2092 case 1: 2093 return dns_u32_parse(buffer, length, offp, &candidate->update_offset); 2094 case 2: 2095 return dns_u32_parse(buffer, length, offp, &candidate->key_id); 2096 } 2097 return false; 2098 } 2099 2100 static void 2101 srpl_candidate_message(srpl_connection_t *srpl_connection, message_t *message, dso_state_t *dso) 2102 { 2103 const char *names[3] = { "Candidate Name", "Time Offset", "Key ID" }; 2104 dso_message_types_t additionals[3] = { kDSOType_SRPLHostname, kDSOType_SRPLTimeOffset, kDSOType_SRPLKeyID }; 2105 bool required[3] = { true, true, true }; 2106 int indices[3]; 2107 2108 srpl_event_t event; 2109 srpl_event_initialize(&event, srpl_event_candidate_received); 2110 srpl_connection_message_set(srpl_connection, message); 2111 if (!srpl_event_content_type_set(&event, srpl_event_content_type_candidate) || 2112 !srpl_find_dso_additionals(srpl_connection, dso, additionals, 2113 required, NULL, names, indices, 3, 3, 3, "SRPLCandidate message", 2114 event.content.candidate, srpl_candidate_message_parse_in)) { 2115 goto fail; 2116 } 2117 2118 event.content.candidate->update_time = srp_time() - event.content.candidate->update_offset; 2119 srpl_event_deliver(srpl_connection, &event); 2120 srpl_event_content_type_set(&event, srpl_event_content_type_none); 2121 return; 2122 2123 fail: 2124 srpl_disconnect(srpl_connection); 2125 } 2126 2127 static bool 2128 srpl_candidate_response_parse_in(int index, 2129 const uint8_t *UNUSED buffer, unsigned *offp, uint16_t length, void *context) 2130 { 2131 srpl_candidate_disposition_t *candidate_disposition = context; 2132 2133 if (length != 0) { 2134 return false; 2135 } 2136 2137 switch(index) { 2138 case 0: 2139 *candidate_disposition = srpl_candidate_yes; 2140 break; 2141 case 1: 2142 *candidate_disposition = srpl_candidate_no; 2143 break; 2144 case 2: 2145 *candidate_disposition = srpl_candidate_conflict; 2146 break; 2147 } 2148 *offp = 0; 2149 return true; 2150 } 2151 2152 static void 2153 srpl_candidate_response(srpl_connection_t *srpl_connection, dso_state_t *dso) 2154 { 2155 const char *names[3] = { "Candidate Yes", "Candidate No", "Conflict" }; 2156 dso_message_types_t additionals[3] = { kDSOType_SRPLCandidateYes, kDSOType_SRPLCandidateNo, kDSOType_SRPLConflict }; 2157 bool required[3] = { false, false, false }; 2158 int indices[3]; 2159 srpl_event_t event; 2160 2161 srpl_event_initialize(&event, srpl_event_candidate_response_received); 2162 srpl_event_content_type_set(&event, srpl_event_content_type_candidate_disposition); 2163 if (!srpl_find_dso_additionals(srpl_connection, dso, additionals, 2164 required, NULL, names, indices, 3, 1, 1, "SRPLCandidate reply", 2165 &event.content.disposition, srpl_candidate_response_parse_in)) { 2166 goto fail; 2167 } 2168 srpl_event_deliver(srpl_connection, &event); 2169 return; 2170 2171 fail: 2172 srpl_disconnect(srpl_connection); 2173 } 2174 2175 static bool 2176 srpl_host_message_parse_in(int index, const uint8_t *buffer, unsigned *offp, uint16_t length, void *context) 2177 { 2178 srpl_host_update_t *update = context; 2179 2180 switch(index) { 2181 case 0: // Host Name 2182 if (update->hostname == NULL) { 2183 unsigned offp_orig = *offp; 2184 bool ret = dns_name_parse(&update->hostname, buffer, length, offp, length); 2185 update->num_bytes = *offp - offp_orig; 2186 update->orig_buffer = (intptr_t)buffer; 2187 return ret; 2188 } else { 2189 if ((intptr_t)buffer == update->orig_buffer) { 2190 (*offp) += update->num_bytes; 2191 } 2192 return true; 2193 } 2194 case 1: // Host Message 2195 if (update->messages != NULL) { 2196 const uint8_t *message_buffer; 2197 size_t message_length; 2198 if (update->rcode) { 2199 message_buffer = buffer + 12; // lease, key-lease, time offset 2200 message_length = length - 12; 2201 } else { 2202 message_buffer = buffer; 2203 message_length = length; 2204 } 2205 message_t *message = ioloop_message_create(message_length); 2206 if (message == NULL) { 2207 return false; 2208 } 2209 if (update->rcode) { 2210 uint32_t time_offset = 0; 2211 if (!(dns_u32_parse(buffer, length, offp, &message->lease) && 2212 dns_u32_parse(buffer, length, offp, &message->key_lease) && 2213 dns_u32_parse(buffer, length, offp, &time_offset))) 2214 { 2215 INFO("failed to parse lease, key_lease or time_offset"); 2216 return false; 2217 } 2218 message->received_time = srp_time() - time_offset; 2219 } 2220 memcpy(&message->wire, message_buffer, message_length); 2221 2222 // We are parsing across the same message, so we can't exceed max_messages here. 2223 update->messages[update->num_messages++] = message; 2224 } else { 2225 update->max_messages++; 2226 } 2227 *offp = length; 2228 return true; 2229 case 2: // Server Stable ID 2230 return dns_u64_parse(buffer, length, offp, &update->server_stable_id); 2231 case 3: // Time Offset 2232 return dns_u32_parse(buffer, length, offp, &update->update_offset); 2233 } 2234 return false; 2235 } 2236 2237 static void 2238 srpl_host_message(srpl_connection_t *srpl_connection, message_t *message, dso_state_t *dso) 2239 { 2240 srpl_event_t event; 2241 memset(&event, 0, sizeof(event)); 2242 srpl_connection_message_set(srpl_connection, message); 2243 if (dso->primary.length != 0) { 2244 ERROR(PRI_S_SRP ": invalid DSO Primary length %d for SRPLHost message.", 2245 srpl_connection->name, dso->primary.length); 2246 goto fail; 2247 } else { 2248 const char *names[4] = { "Host Name", "Host Message", "Server Stable ID", "Time Offset" }; 2249 dso_message_types_t additionals[4] = { kDSOType_SRPLHostname, kDSOType_SRPLHostMessage, 2250 kDSOType_SRPLServerStableID, kDSOType_SRPLTimeOffset }; 2251 bool required[4] = { true, true, false, true }; 2252 bool multiple[4] = { false, true, false, false }; 2253 int indices[4]; 2254 int num_additls = 4; 2255 2256 // Parse host message 2257 srpl_event_initialize(&event, srpl_event_host_message_received); 2258 srpl_event_content_type_set(&event, srpl_event_content_type_host_update); 2259 if (SRPL_SUPPORTS(srpl_connection, SRPL_VARIATION_MULTI_HOST_MESSAGE)) { 2260 num_additls--; 2261 event.content.host_update.rcode = 1; // temporarily use rcode for flag 2262 } 2263 2264 if (!srpl_find_dso_additionals(srpl_connection, dso, additionals, required, multiple, names, indices, 2265 num_additls, num_additls - 1, num_additls + MAX_ADDITIONAL_HOST_MESSAGES, 2266 "SRPLHost message", &event.content.host_update, srpl_host_message_parse_in)) { 2267 goto fail; 2268 } 2269 // update->max_messages can't be zero here, or we would have gotten a false return from 2270 // srpl_find_dso_additionals and not gotten here. 2271 event.content.host_update.messages = calloc(event.content.host_update.max_messages, 2272 sizeof (*event.content.host_update.messages)); 2273 if (event.content.host_update.messages == NULL) { 2274 goto fail; 2275 } 2276 // Now that we know how many messages, we can copy them out. 2277 if (!srpl_find_dso_additionals(srpl_connection, dso, additionals, required, multiple, names, indices, 2278 num_additls, num_additls - 1, num_additls + MAX_ADDITIONAL_HOST_MESSAGES, 2279 "SRPLHost message", &event.content.host_update, srpl_host_message_parse_in)) { 2280 goto fail; 2281 } 2282 DNS_NAME_GEN_SRP(event.content.host_update.hostname, hostname_buf); 2283 if (!SRPL_SUPPORTS(srpl_connection, SRPL_VARIATION_MULTI_HOST_MESSAGE)) { 2284 time_t update_time = srp_time() - event.content.host_update.update_offset; 2285 event.content.host_update.messages[0]->received_time = update_time; 2286 INFO(PRI_S_SRP " received SRPLHost message %x for " PRI_DNS_NAME_SRP " server stable ID %" PRIx64 2287 " update offset = %d", srpl_connection->name, ntohs(message->wire.id), 2288 DNS_NAME_PARAM_SRP(event.content.host_update.hostname, hostname_buf), 2289 event.content.host_update.server_stable_id, event.content.host_update.update_offset); 2290 } else { 2291 // Make sure times are sequential. 2292 time_t last_received_time = event.content.host_update.messages[0]->received_time; 2293 INFO(PRI_S_SRP " received SRPLHost message %x for " PRI_DNS_NAME_SRP " server stable ID %" PRIx64 2294 " message 0 received time = %ld", srpl_connection->name, ntohs(message->wire.id), 2295 DNS_NAME_PARAM_SRP(event.content.host_update.hostname, hostname_buf), 2296 event.content.host_update.server_stable_id, srp_time() - last_received_time); 2297 for (int i = 1; i < event.content.host_update.num_messages; i++) { 2298 time_t cur_received_time = event.content.host_update.messages[i]->received_time; 2299 if (cur_received_time - last_received_time <= 0) { 2300 INFO(PRI_S_SRP 2301 " received invalid SRPLHost message %x with message %d time %lld <= message %d time %lld", 2302 srpl_connection->name, ntohs(event.content.host_update.messages[i]->wire.id), 2303 i, (long long)cur_received_time, i - 1, (long long)last_received_time); 2304 goto fail_no_message; 2305 } 2306 INFO("message %d received time = %ld", i, cur_received_time); 2307 last_received_time = cur_received_time; 2308 } 2309 } 2310 event.content.host_update.rcode = 0; 2311 srpl_event_deliver(srpl_connection, &event); 2312 srpl_event_content_type_set(&event, srpl_event_content_type_none); 2313 } 2314 return; 2315 fail: 2316 INFO(PRI_S_SRP " received invalid SRPLHost message %x", srpl_connection->name, ntohs(message->wire.id)); 2317 fail_no_message: 2318 if (event.content_type == srpl_event_content_type_host_update) { 2319 srpl_event_content_type_set(&event, srpl_event_content_type_none); 2320 } 2321 srpl_disconnect(srpl_connection); 2322 return; 2323 } 2324 2325 static void 2326 srpl_host_response(srpl_connection_t *srpl_connection, message_t *message, dso_state_t *dso) 2327 { 2328 if (dso->primary.length != 0) { 2329 ERROR(PRI_S_SRP ": invalid DSO Primary length %d for SRPLHost response.", 2330 srpl_connection->name, dso->primary.length); 2331 srpl_disconnect(srpl_connection); 2332 return; 2333 } else { 2334 srpl_event_t event; 2335 INFO(PRI_S_SRP " received SRPLHost response %x", srpl_connection->name, ntohs(message->wire.id)); 2336 srpl_event_initialize(&event, srpl_event_host_response_received); 2337 srpl_event_content_type_set(&event, srpl_event_content_type_rcode); 2338 event.content.rcode = dns_rcode_get(&message->wire); 2339 srpl_event_deliver(srpl_connection, &event); 2340 srpl_event_content_type_set(&event, srpl_event_content_type_none); 2341 return; 2342 } 2343 } 2344 2345 static void 2346 srpl_keepalive_receive(srpl_connection_t *srpl_connection, int keepalive_interval, uint16_t xid) 2347 { 2348 if (srpl_connection->is_server) { 2349 int num_standby = 0; 2350 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 2351 if (domain != NULL && domain->server_state != NULL) { 2352 for (srpl_instance_t *unmatched = domain->server_state->unmatched_instances; unmatched != NULL; unmatched = unmatched->next) { 2353 srpl_connection_t *unidentified = unmatched->connection; 2354 if (unidentified != NULL && unidentified->state > srpl_state_session_evaluate) { 2355 num_standby++; 2356 } 2357 } 2358 } 2359 int new_interval = num_standby * DEFAULT_KEEPALIVE_WAKEUP_EXPIRY / 2; 2360 if (new_interval > 0 && srpl_connection->keepalive_interval != new_interval) { 2361 srpl_connection->keepalive_interval = new_interval; 2362 INFO("suggest keepalive %d for connection " PRI_S_SRP, new_interval, srpl_connection->name); 2363 } 2364 srpl_keepalive_send(srpl_connection, true, xid); 2365 } else { 2366 if (srpl_connection->state <= srpl_state_sync_wait) { 2367 INFO("keepalive for connection " PRI_S_SRP " - old %d, new %d.", srpl_connection->name, 2368 srpl_connection->keepalive_interval, keepalive_interval); 2369 srpl_connection->keepalive_interval = keepalive_interval; 2370 } 2371 } 2372 } 2373 2374 static void 2375 srpl_dso_retry_delay(srpl_connection_t *srpl_connection, int reconnect_delay) 2376 { 2377 if (srpl_connection->instance == NULL) { 2378 // If there's no instance, we're already disconnecting. 2379 INFO(PRI_S_SRP ": no instance", srpl_connection->name); 2380 return; 2381 } 2382 srpl_instance_t *instance = srpl_connection->instance; 2383 RETAIN_HERE(srpl_connection, srpl_connection); // In case there's only one reference left. 2384 if (instance->unmatched) { 2385 INFO(PRI_S_SRP ": not sending retry delay for %d seconds because unidentified", srpl_connection->name, reconnect_delay); 2386 if (instance->connection == srpl_connection) { 2387 RELEASE_HERE(instance->connection, srpl_connection); 2388 instance->connection = NULL; 2389 } 2390 } else { 2391 INFO(PRI_S_SRP ": sending retry delay for %d seconds", srpl_connection->name, reconnect_delay); 2392 2393 // Set things up to reconnect later. 2394 srpl_connection_drop_state_delay(instance, srpl_connection, reconnect_delay); 2395 } 2396 2397 // Drop the connection 2398 srpl_connection_discontinue(srpl_connection); 2399 RELEASE_HERE(srpl_connection, srpl_connection); // For the function. 2400 } 2401 2402 static void 2403 srpl_dso_message(srpl_connection_t *srpl_connection, message_t *message, dso_state_t *dso, bool response) 2404 { 2405 const char *name = "<null>"; 2406 if (srpl_connection != NULL) { 2407 if (srpl_connection->instance != NULL) { 2408 name = srpl_connection->instance->instance_name; 2409 } else { 2410 name = srpl_connection->name; 2411 } 2412 } 2413 2414 switch(dso->primary.opcode) { 2415 case kDSOType_SRPLSession: 2416 if (response) { 2417 srpl_session_response(srpl_connection, dso); 2418 } else { 2419 srpl_session_message(srpl_connection, message, dso); 2420 } 2421 break; 2422 2423 case kDSOType_SRPLSendCandidates: 2424 if (response) { 2425 srpl_send_candidates_response(srpl_connection, dso); 2426 } else { 2427 srpl_send_candidates_message(srpl_connection, message, dso); 2428 } 2429 break; 2430 2431 case kDSOType_SRPLCandidate: 2432 if (response) { 2433 srpl_candidate_response(srpl_connection, dso); 2434 } else { 2435 srpl_candidate_message(srpl_connection, message, dso); 2436 } 2437 break; 2438 2439 case kDSOType_SRPLHost: 2440 if (response) { 2441 srpl_host_response(srpl_connection, message, dso); 2442 } else { 2443 srpl_host_message(srpl_connection, message, dso); 2444 } 2445 break; 2446 2447 case kDSOType_Keepalive: 2448 if (response) { 2449 INFO(PRI_S_SRP ": keepalive response, xid %04x", name, message->wire.id); 2450 } else if (message->wire.id) { 2451 INFO(PRI_S_SRP ": keepalive query, xid %04x", name, message->wire.id); 2452 } else { 2453 INFO(PRI_S_SRP ": keepalive unidirectional, xid %04x (should be zero)", name, message->wire.id); 2454 } 2455 break; 2456 2457 default: 2458 INFO(PRI_S_SRP ": unexpected primary TLV %d", name, dso->primary.opcode); 2459 dso_simple_response(srpl_connection->connection, NULL, &message->wire, dns_rcode_dsotypeni); 2460 break; 2461 } 2462 2463 } 2464 2465 // We should never get here. If we do, it means that we haven't gotten a keepalive in the required interval. 2466 static void 2467 srpl_keepalive_receive_wakeup(void *context) 2468 { 2469 srpl_connection_t *srpl_connection = context; 2470 2471 INFO(PUB_S_SRP ": nothing heard from partner across keepalive interval--disconnecting", srpl_connection->name); 2472 srpl_connection_discontinue(srpl_connection); // Drop the connection, don't send a retry_delay. 2473 } 2474 2475 static void 2476 srpl_instance_dso_event_callback(void *context, void *event_context, dso_state_t *dso, dso_event_type_t eventType) 2477 { 2478 message_t *message; 2479 dso_query_receive_context_t *response_context; 2480 dso_disconnect_context_t *disconnect_context; 2481 dso_keepalive_context_t *keepalive_context; 2482 srpl_connection_t *srpl_connection = context; 2483 const char *name = "<null>"; 2484 if (srpl_connection != NULL) { 2485 if (srpl_connection->instance != NULL) { 2486 name = srpl_connection->instance->instance_name; 2487 } else { 2488 name = srpl_connection->name; 2489 } 2490 } 2491 2492 switch(eventType) 2493 { 2494 case kDSOEventType_DNSMessage: 2495 // We shouldn't get here because we already handled any DNS messages 2496 message = event_context; 2497 INFO(PRI_S_SRP ": DNS Message (opcode=%d) received from " PRI_S_SRP, 2498 name, dns_opcode_get(&message->wire), dso->remote_name); 2499 break; 2500 case kDSOEventType_DNSResponse: 2501 // We shouldn't get here because we already handled any DNS messages 2502 message = event_context; 2503 INFO(PRI_S_SRP ": DNS Response (opcode=%d) received from " PRI_S_SRP, 2504 name, dns_opcode_get(&message->wire), dso->remote_name); 2505 break; 2506 case kDSOEventType_DSOMessage: 2507 INFO(PRI_S_SRP ": DSO Message (Primary TLV=%d) received from " PRI_S_SRP, 2508 name, dso->primary.opcode, dso->remote_name); 2509 srpl_connection->last_message_received = srp_time(); 2510 ioloop_add_wake_event(srpl_connection->keepalive_receive_wakeup, 2511 srpl_connection, srpl_keepalive_receive_wakeup, srpl_connection_context_release, 2512 srpl_connection->keepalive_interval * 2); 2513 RETAIN_HERE(srpl_connection, srpl_connection); // for the callback 2514 message = event_context; 2515 srpl_dso_message(srpl_connection, message, dso, false); 2516 break; 2517 case kDSOEventType_DSOResponse: 2518 INFO(PRI_S_SRP ": DSO Response (Primary TLV=%d) received from " PRI_S_SRP, 2519 name, dso->primary.opcode, dso->remote_name); 2520 srpl_connection->last_message_received = srp_time(); 2521 ioloop_add_wake_event(srpl_connection->keepalive_receive_wakeup, 2522 srpl_connection, srpl_keepalive_receive_wakeup, srpl_connection_context_release, 2523 srpl_connection->keepalive_interval * 2); 2524 RETAIN_HERE(srpl_connection, srpl_connection); // for the callback 2525 response_context = event_context; 2526 message = response_context->message_context; 2527 srpl_dso_message(srpl_connection, message, dso, true); 2528 break; 2529 2530 case kDSOEventType_Finalize: 2531 INFO("Finalize"); 2532 break; 2533 2534 case kDSOEventType_Connected: 2535 INFO("Connected to " PRI_S_SRP, dso->remote_name); 2536 break; 2537 2538 case kDSOEventType_ConnectFailed: 2539 INFO("Connection to " PRI_S_SRP " failed", dso->remote_name); 2540 break; 2541 2542 case kDSOEventType_Disconnected: 2543 INFO("Connection to " PRI_S_SRP " disconnected", dso->remote_name); 2544 break; 2545 case kDSOEventType_ShouldReconnect: 2546 INFO("Connection to " PRI_S_SRP " should reconnect (not for a server)", dso->remote_name); 2547 break; 2548 case kDSOEventType_Inactive: 2549 INFO(PRI_S_SRP "Inactivity timer went off, closing connection.", name); 2550 break; 2551 case kDSOEventType_Keepalive: 2552 INFO("should send a keepalive now."); 2553 break; 2554 case kDSOEventType_KeepaliveRcvd: 2555 keepalive_context = event_context; 2556 keepalive_context->send_response = false; 2557 INFO(PRI_S_SRP ": keepalive received, xid %04x.", name, keepalive_context->xid); 2558 srpl_keepalive_receive(srpl_connection, keepalive_context->keepalive_interval, keepalive_context->xid); 2559 srpl_connection->last_message_received = srp_time(); 2560 ioloop_add_wake_event(srpl_connection->keepalive_receive_wakeup, 2561 srpl_connection, srpl_keepalive_receive_wakeup, srpl_connection_context_release, 2562 srpl_connection->keepalive_interval * 2); 2563 RETAIN_HERE(srpl_connection, srpl_connection); // for the callback 2564 break; 2565 case kDSOEventType_RetryDelay: 2566 disconnect_context = event_context; 2567 INFO(PRI_S_SRP ": retry delay received, %d seconds", name, disconnect_context->reconnect_delay); 2568 srpl_dso_retry_delay(srpl_connection, disconnect_context->reconnect_delay); 2569 break; 2570 } 2571 } 2572 2573 static void 2574 srpl_datagram_callback(comm_t *comm, message_t *message, void *context) 2575 { 2576 srpl_connection_t *srpl_connection = context; 2577 srpl_instance_t *instance = srpl_connection->instance; 2578 if (instance == NULL) { 2579 INFO("datagram on connection " PRI_S_SRP " with no instance.", comm->name); 2580 return; 2581 } 2582 2583 // If this is a DSO message, see if we have a session yet. 2584 switch(dns_opcode_get(&message->wire)) { 2585 case dns_opcode_dso: 2586 if (srpl_connection->dso == NULL) { 2587 INFO("dso message received with no DSO object on instance " PRI_S_SRP, instance->instance_name); 2588 srpl_disconnect(srpl_connection); 2589 return; 2590 } 2591 dso_message_received(srpl_connection->dso, (uint8_t *)&message->wire, message->length, message); 2592 return; 2593 break; 2594 } 2595 INFO("datagram on connection " PRI_S_SRP " not handled, type = %d.", 2596 comm->name, dns_opcode_get(&message->wire)); 2597 } 2598 2599 static void 2600 srpl_connection_dso_cleanup(void *UNUSED context) 2601 { 2602 dso_cleanup(false); 2603 } 2604 2605 // Call this to break the current srpl_connection without sending the state machine into idle. 2606 static void 2607 srpl_trigger_disconnect(srpl_connection_t *srpl_connection) 2608 { 2609 // Trigger a disconnect 2610 if (srpl_connection->dso != NULL) { 2611 dso_state_cancel(srpl_connection->dso); 2612 srpl_connection->dso = NULL; 2613 } else { 2614 ioloop_comm_cancel(srpl_connection->connection); 2615 ioloop_comm_release(srpl_connection->connection); 2616 gettimeofday(&srpl_connection->connection_null_time, NULL); 2617 srpl_connection->connection_null_reason = "trigger_disconnect"; 2618 srpl_connection->connection = NULL; 2619 } 2620 } 2621 2622 static bool 2623 srpl_connection_dso_life_cycle_callback(dso_life_cycle_t cycle, void *const context, dso_state_t *const dso) 2624 { 2625 if (cycle == dso_life_cycle_cancel) { 2626 srpl_connection_t *srpl_connection = context; 2627 INFO(PRI_S_SRP ": %p %p", srpl_connection->name, srpl_connection, dso); 2628 if (srpl_connection->connection != NULL) { 2629 ioloop_comm_cancel(srpl_connection->connection); 2630 srpl_connection->connection->dso = NULL; 2631 ioloop_comm_release(srpl_connection->connection); 2632 gettimeofday(&srpl_connection->connection_null_time, NULL); 2633 srpl_connection->connection_null_reason = "dso_life_cycle_callback"; 2634 srpl_connection->connection = NULL; 2635 } 2636 srpl_connection_reset(srpl_connection); 2637 srpl_connection->dso = NULL; 2638 RELEASE_HERE(srpl_connection, srpl_connection); 2639 ioloop_run_async(srpl_connection_dso_cleanup, NULL); 2640 return true; 2641 } 2642 return false; 2643 } 2644 2645 static void 2646 srpl_associate_incoming_with_instance(comm_t *connection, message_t *message, 2647 dso_state_t *dso, srpl_instance_t *instance) 2648 { 2649 srpl_connection_t *old_connection = NULL; 2650 2651 srpl_connection_t *srpl_connection = srpl_connection_create(instance, false); 2652 if (srpl_connection == NULL) { 2653 ioloop_comm_cancel(connection); 2654 return; 2655 } 2656 2657 2658 srpl_connection->connection = connection; 2659 ioloop_comm_retain(srpl_connection->connection); 2660 2661 srpl_connection->dso = dso; 2662 srpl_connection->instance = instance; 2663 srpl_connection->connected_address = connection->address; 2664 srpl_connection->state = srpl_state_session_message_wait; 2665 2666 dso_set_event_context(dso, srpl_connection); 2667 RETAIN_HERE(srpl_connection, srpl_connection); // dso holds reference. 2668 dso_set_event_callback(dso, srpl_instance_dso_event_callback); 2669 dso_set_life_cycle_callback(dso, srpl_connection_dso_life_cycle_callback); 2670 2671 connection->datagram_callback = srpl_datagram_callback; 2672 connection->disconnected = srpl_disconnected_callback; 2673 ioloop_comm_context_set(connection, srpl_connection, srpl_connection_context_release); 2674 RETAIN_HERE(srpl_connection, srpl_connection); // the connection has a reference. 2675 2676 srpl_connection_next_state(srpl_connection, srpl_state_session_message_wait); 2677 srpl_instance_dso_event_callback(srpl_connection, message, dso, kDSOEventType_DSOMessage); 2678 2679 // We drop it after we set it up because that lets us send a retry_delay to the peer. 2680 if (instance->domain == NULL || instance->domain->srpl_opstate != SRPL_OPSTATE_ROUTINE) { 2681 INFO(PRI_S_SRP ": dropping peer reconnect because we aren't in routine state.", instance->instance_name); 2682 RELEASE_HERE(instance, srpl_instance); 2683 srpl_connection->instance = NULL; 2684 srpl_connection_discontinue(srpl_connection); 2685 RELEASE_HERE(srpl_connection, srpl_connection); 2686 return; 2687 } 2688 2689 // If we already have a connection with the remote partner, we replace it with the new connection. 2690 if (instance->connection != NULL) { 2691 INFO(PRI_S_SRP ": we already have a connection (%p).", instance->instance_name, old_connection); 2692 old_connection = instance->connection; 2693 RELEASE_HERE(old_connection->instance, srpl_instance); 2694 old_connection->instance = NULL; 2695 srpl_connection_discontinue(old_connection); 2696 RELEASE_HERE(old_connection, srpl_connection); 2697 } 2698 instance->connection = srpl_connection; // Retained via create/copy rule. 2699 } 2700 2701 static void 2702 srpl_add_unidentified_server(comm_t *connection, message_t *message, dso_state_t *dso, srp_server_t *server_state) 2703 { 2704 srpl_instance_t **inp, *unmatched_instance = NULL; 2705 2706 const char *instance_name; 2707 char nbuf[kDNSServiceMaxDomainName]; 2708 // Take ip address as the instance name 2709 if (connection->address.sa.sa_family == AF_INET6) { 2710 instance_name = inet_ntop(AF_INET6, &connection->address.sin6.sin6_addr, nbuf, sizeof nbuf); 2711 } else { 2712 instance_name = inet_ntop(AF_INET, &connection->address.sin.sin_addr, nbuf, sizeof nbuf); 2713 } 2714 2715 // Check if an unmatched instance has been created for the same address 2716 for (inp = &server_state->unmatched_instances; *inp != NULL; inp = &(*inp)->next) { 2717 if (!strcmp((*inp)->instance_name, instance_name)) { 2718 INFO("we already have an unmatched instance " PRI_S_SRP, instance_name); 2719 unmatched_instance = *inp; 2720 break; 2721 } 2722 } 2723 if (unmatched_instance == NULL) { 2724 INFO("create a temporary instance " PRI_S_SRP, instance_name); 2725 unmatched_instance = calloc(1, sizeof(*unmatched_instance)); 2726 if (unmatched_instance == NULL) { 2727 ERROR("no memory for unmatched instance"); 2728 return; 2729 } 2730 RETAIN_HERE(unmatched_instance, srpl_instance); // The unmatched instance list will hold this reference. 2731 // Create a dummy domain because domain can not be decided at this point 2732 srpl_domain_t *domain = calloc(1, sizeof(*domain)); 2733 if (domain == NULL) { 2734 ERROR("no memory for domain structure"); 2735 RELEASE_HERE(unmatched_instance, srpl_instance); 2736 return; 2737 } 2738 RETAIN_HERE(domain, srpl_domain); 2739 unmatched_instance->domain = domain; 2740 domain->server_state = server_state; 2741 unmatched_instance->unmatched = true; 2742 2743 unmatched_instance->instance_name = strdup(instance_name); 2744 if (unmatched_instance->instance_name == NULL) { 2745 ERROR("no memory for unmatched instance" PRI_S_SRP, instance_name); 2746 RELEASE_HERE(unmatched_instance, srpl_instance); 2747 return; 2748 } 2749 // Find the end of the list and append the newly created instance. 2750 for (inp = &server_state->unmatched_instances; *inp != NULL; inp = &(*inp)->next) { 2751 } 2752 *inp = unmatched_instance; 2753 } 2754 srpl_associate_incoming_with_instance(connection, message, dso, unmatched_instance); 2755 } 2756 2757 static void 2758 srpl_match_unidentified_with_instance(srpl_connection_t *connection, 2759 srpl_instance_t *instance) 2760 { 2761 srpl_instance_t *cur = connection->instance; 2762 2763 // Take the connection from its instance. 2764 RETAIN_HERE(connection, srpl_connection); // for the function 2765 RELEASE_HERE(cur->connection, srpl_connection); 2766 cur->connection = NULL; 2767 RELEASE_HERE(connection->instance, srpl_instance); // Get rid of the instance's reference to the connection 2768 connection->instance = NULL; 2769 2770 // Remove the currently associated instance from the unmatched_instances 2771 srpl_instance_t **sp = NULL; 2772 2773 INFO("matched temporary instance " PRI_S_SRP " to instance " PRI_S_SRP " with partner_id %" PRIx64, 2774 cur->instance_name, instance->instance_name, instance->partner_id); 2775 instance->version_mismatch = cur->version_mismatch; 2776 #define POINTER_TO_HEX_MAX_STRLEN 19 // 0x<...> 2777 size_t srpl_connection_name_length = strlen(instance->instance_name) + 2 + POINTER_TO_HEX_MAX_STRLEN + 3; 2778 char *new_name = malloc(srpl_connection_name_length); 2779 if (new_name != NULL) { 2780 free(connection->name); 2781 connection->name = new_name; 2782 // unidentified connection is by definition an incoming connection. 2783 snprintf(new_name, srpl_connection_name_length, "<%s (%p)", instance->instance_name, connection); 2784 } 2785 srp_server_t *server_state = cur->domain->server_state; 2786 for (sp = &server_state->unmatched_instances; *sp; sp = &(*sp)->next) { 2787 if (*sp == cur) { 2788 *sp = cur->next; 2789 break; 2790 } 2791 } 2792 2793 // Release the unmatched instance list's reference to the unmatched instance. We already released the srpl_connection's 2794 // reference, so this should dispose of the unmatched instance. 2795 RELEASE_HERE(cur, srpl_instance); 2796 2797 if (instance->domain == NULL || instance->domain->srpl_opstate != SRPL_OPSTATE_ROUTINE) { 2798 INFO(PRI_S_SRP "dropping peer reconnect because we aren't in routine state.", instance->domain->name); 2799 srpl_disconnect(connection); 2800 goto out; 2801 } 2802 2803 if (connection->state == srpl_state_idle || 2804 connection->state == srpl_state_disconnect || 2805 connection->state == srpl_state_disconnect_wait) 2806 { 2807 INFO("connection " PRI_S_SRP " is in " PUB_S_SRP, connection->name, srpl_state_name(connection->state)); 2808 goto out; 2809 } 2810 2811 if (connection->database_synchronized) { 2812 srpl_state_transition_by_dataset_id(instance->domain, instance); 2813 } 2814 2815 // Release any older connection we might have. 2816 if (instance->connection) { 2817 srpl_connection_t *old_connection = instance->connection; 2818 if (old_connection->instance != NULL) { 2819 RELEASE_HERE(old_connection->instance, srpl_instance); 2820 old_connection->instance = NULL; 2821 } 2822 srpl_disconnect(instance->connection); 2823 RELEASE_HERE(instance->connection, srpl_connection); // Instance still holds reference. 2824 instance->connection = NULL; 2825 INFO("release connection on instance " PRI_S_SRP " with partner_id %" PRIx64, instance->instance_name, instance->partner_id); 2826 } 2827 instance->connection = connection; 2828 RETAIN_HERE(instance->connection, srpl_connection); 2829 connection->instance = instance; 2830 RETAIN_HERE(connection->instance, srpl_instance); // Retain on the connection 2831 out: 2832 RELEASE_HERE(connection, srpl_connection); // done with using it for this function. 2833 } 2834 2835 void 2836 srpl_dso_server_message(comm_t *connection, message_t *message, dso_state_t *dso, srp_server_t *server_state) 2837 { 2838 srpl_domain_t *domain; 2839 srpl_instance_t *instance; 2840 srpl_instance_service_t *service; 2841 address_query_t *address; 2842 int i; 2843 2844 // Figure out from which instance this connection originated 2845 for (domain = server_state->srpl_domains; domain != NULL; domain = domain->next) { 2846 for (instance = domain->instances; instance != NULL; instance = instance->next) { 2847 for (service = instance->services; service != NULL; service = service->next) { 2848 address = service->address_query; 2849 if (address == NULL) { 2850 continue; 2851 } 2852 for (i = 0; i < address->num_addresses; i++) { 2853 if (ip_addresses_equal(&connection->address, &address->addresses[i])) { 2854 INFO("SRP Replication connection received from " PRI_S_SRP " on " PRI_S_SRP, 2855 address->hostname, connection->name); 2856 srpl_associate_incoming_with_instance(connection, message, dso, instance); 2857 return; 2858 } 2859 } 2860 } 2861 } 2862 } 2863 2864 INFO("incoming SRP Replication server connection from unrecognized server " PRI_S_SRP, connection->name); 2865 srpl_add_unidentified_server(connection, message, dso, server_state); 2866 } 2867 2868 static void 2869 srpl_connected(comm_t *connection, void *context) 2870 { 2871 srpl_connection_t *srpl_connection = context; 2872 2873 INFO(PRI_S_SRP " connected", connection->name); 2874 connection->dso = dso_state_create(false, 2, connection->name, srpl_instance_dso_event_callback, 2875 srpl_connection, srpl_connection_dso_life_cycle_callback, connection); 2876 if (connection->dso == NULL) { 2877 ERROR(PRI_S_SRP " can't create dso state object.", srpl_connection->name); 2878 srpl_disconnect(srpl_connection); 2879 return; 2880 } 2881 RETAIN_HERE(srpl_connection, srpl_connection); // dso holds reference to connection 2882 srpl_connection->dso = connection->dso; 2883 2884 // Generate an event indicating that we've been connected 2885 srpl_event_t event; 2886 srpl_event_initialize(&event, srpl_event_connected); 2887 srpl_event_deliver(srpl_connection, &event); 2888 } 2889 2890 static bool 2891 srpl_connection_connect(srpl_connection_t *srpl_connection) 2892 { 2893 if (srpl_connection->instance == NULL) { 2894 ERROR(PRI_S_SRP ": no instance to connect to", srpl_connection->name); 2895 return false; 2896 } 2897 srpl_connection->connection = ioloop_connection_create(&srpl_connection->connected_address, 2898 // tls, stream, stable, opportunistic 2899 true, true, true, true, 2900 srpl_datagram_callback, srpl_connected, 2901 srpl_disconnected_callback, srpl_connection_context_release, 2902 srpl_connection); 2903 if (srpl_connection->connection == NULL) { 2904 ADDR_NAME_LOGGER(ERROR, &srpl_connection->connected_address, "can't create connection to address ", 2905 " for srpl connection ", " port ", srpl_connection->name, 2906 srpl_connection->connected_address.sa.sa_family == AF_INET ? 2907 srpl_connection->connected_address.sin.sin_port: srpl_connection->connected_address.sin6.sin6_port); 2908 return false; 2909 } 2910 srpl_connection->is_server = false; 2911 ADDR_NAME_LOGGER(INFO, &srpl_connection->connected_address, "connecting to address ", " for instance ", " port ", 2912 srpl_connection->name, srpl_connection->connected_address.sa.sa_family == AF_INET ? 2913 srpl_connection->connected_address.sin.sin_port: srpl_connection->connected_address.sin6.sin6_port); 2914 RETAIN_HERE(srpl_connection, srpl_connection); // For the connection's reference 2915 return true; 2916 } 2917 2918 static void 2919 srpl_instance_is_me(srpl_instance_t *instance, srpl_instance_service_t *service, const char *ifname, const addr_t *address, bool pid_match) 2920 { 2921 instance->is_me = true; 2922 if (ifname != NULL) { 2923 INFO(PUB_S_SRP "/" PUB_S_SRP ": name server for service " PRI_S_SRP " is me.", service->host_name, ifname, service->full_service_name); 2924 } else if (address != NULL) { 2925 ADDR_NAME_LOGGER(INFO, address, "", " service ", " is me. ", service->host_name, 0); 2926 } else if (pid_match) { 2927 INFO(PUB_S_SRP ": partner id %" PRIx64 "match.", instance->instance_name, instance->partner_id); 2928 } else { 2929 ERROR("null ifname and address, partner id doesn't match!"); 2930 return; 2931 } 2932 2933 // When we create the instance, we start an outgoing connection; when we discover that this is a connection 2934 // to me, we can discontinue that outgoing connection. 2935 if (instance->connection && !instance->connection->is_server) { 2936 srpl_connection_discontinue(instance->connection); 2937 } 2938 } 2939 2940 static bool 2941 srpl_my_address_check(srp_server_t *server_state, const addr_t *address) 2942 { 2943 static interface_address_state_t *ifaddrs = NULL; 2944 interface_address_state_t *ifa; 2945 static time_t last_fetch = 0; 2946 // Update the interface address list every sixty seconds, but only if we're asked to check an address. 2947 const time_t now = srp_time(); 2948 if (last_fetch == 0 || now - last_fetch > 60) { 2949 last_fetch = now; 2950 ioloop_map_interface_addresses_here(server_state, &ifaddrs, NULL, NULL, NULL); 2951 } 2952 // See if there's a match. 2953 for (ifa = ifaddrs; ifa; ifa = ifa->next) { 2954 if (ip_addresses_equal(address, &ifa->addr)) { 2955 return true; 2956 } 2957 } 2958 return false; 2959 } 2960 2961 static void 2962 srpl_instance_address_callback(void *context, addr_t *address, bool added, bool more, int err) 2963 { 2964 srpl_instance_service_t *service = context; 2965 srpl_instance_t *instance = service->instance; 2966 if (err != kDNSServiceErr_NoError) { 2967 ERROR("service instance address resolution for " PRI_S_SRP " failed with %d", service->host_name, err); 2968 if (service->address_query) { 2969 address_query_cancel(service->address_query); 2970 RELEASE_HERE(service->address_query, address_query); 2971 service->address_query = NULL; 2972 } 2973 return; 2974 } 2975 2976 if (added) { 2977 bool matched_unidentified = false; 2978 srp_server_t *server_state = service->domain->server_state; 2979 srpl_instance_t **up = &server_state->unmatched_instances; 2980 while (*up != NULL) { 2981 srpl_instance_t *unmatched_instance = *up; 2982 srpl_connection_t *unidentified = unmatched_instance->connection; 2983 if (unidentified == NULL) { 2984 up = &(*up)->next; 2985 continue; 2986 } 2987 if (unidentified->dso == NULL) { 2988 FAULT("unidentified instance " PRI_S_SRP " (%p) has outgoing connection (%p)!", 2989 unmatched_instance->instance_name, unmatched_instance, unidentified); 2990 srpl_connection_discontinue(unidentified); 2991 RELEASE_HERE(unmatched_instance->connection, srpl_connection); 2992 unmatched_instance->connection = NULL; 2993 up = &(*up)->next; 2994 continue; 2995 } 2996 if (ip_addresses_equal(address, &unidentified->connected_address)) { 2997 INFO("Unidentified connection " PRI_S_SRP " matches new address for instance " PRI_S_SRP, 2998 unidentified->dso->remote_name, instance->instance_name); 2999 srpl_match_unidentified_with_instance(unidentified, instance); 3000 matched_unidentified = true; 3001 break; 3002 } else { 3003 if (unidentified->connected_address.sa.sa_family == AF_INET6) { 3004 SEGMENTED_IPv6_ADDR_GEN_SRP(&unidentified->connected_address.sin6.sin6_addr, rdata_buf); 3005 INFO("Unidentified connection address is: " PRI_SEGMENTED_IPv6_ADDR_SRP, 3006 SEGMENTED_IPv6_ADDR_PARAM_SRP(&unidentified->connected_address.sin6.sin6_addr, rdata_buf)); 3007 } else { 3008 IPv4_ADDR_GEN_SRP(&unidentified->connected_address.sin.sin_addr, rdata_buf); 3009 INFO("Unidentified connection address is: " PRI_IPv4_ADDR_SRP, 3010 IPv4_ADDR_PARAM_SRP(&unidentified->connected_address.sin.sin_addr, rdata_buf)); 3011 } 3012 if (address->sa.sa_family == AF_INET6) { 3013 SEGMENTED_IPv6_ADDR_GEN_SRP(&address->sin6.sin6_addr, rdata_buf); 3014 INFO("New address is: " PRI_SEGMENTED_IPv6_ADDR_SRP, 3015 SEGMENTED_IPv6_ADDR_PARAM_SRP(&address->sin6.sin6_addr, rdata_buf)); 3016 } else { 3017 IPv4_ADDR_GEN_SRP(&address->sin.sin_addr, rdata_buf); 3018 INFO("New address is: " PRI_IPv4_ADDR_SRP, 3019 IPv4_ADDR_PARAM_SRP(&address->sin.sin_addr, rdata_buf)); 3020 } 3021 INFO("Unidentified connection addr %p does not match new address for instance addr %p", 3022 unidentified, instance); 3023 INFO("Unidentified connection " PRI_S_SRP " does not match new address for instance " PRI_S_SRP, 3024 unidentified->dso->remote_name, instance->instance_name); 3025 up = &(*up)->next; 3026 } 3027 } 3028 3029 if (srpl_my_address_check(server_state, address)) { 3030 srpl_instance_is_me(instance, service, NULL, address, false); 3031 } else { 3032 instance->added_address = true; 3033 if (matched_unidentified) { 3034 instance->matched_unidentified = true; 3035 } 3036 } 3037 } else { 3038 srpl_event_t event; 3039 srpl_event_initialize(&event, srpl_event_address_remove); 3040 3041 // Generate an event indicating that an address has been removed. 3042 if (!instance->is_me) { 3043 if (instance->connection != NULL) { 3044 srpl_event_deliver(instance->connection, &event); 3045 } 3046 } 3047 } 3048 3049 INFO(PRI_S_SRP " on instance " PRI_S_SRP ", matched_unidentified " PRI_S_SRP ", added_address " PRI_S_SRP, 3050 more ? "more coming" : "done", instance->instance_name, instance->matched_unidentified ? "yes" : "no", 3051 instance->added_address ? "yes" : "no"); 3052 if (!more) { 3053 if (instance->added_address && !instance->matched_unidentified) { 3054 // Generate an event indicating that we have a new address. 3055 if (instance->connection != NULL && !instance->connection->is_server) { 3056 srpl_event_t event; 3057 srpl_event_initialize(&event, srpl_event_address_add); 3058 srpl_event_deliver(instance->connection, &event); 3059 } 3060 } 3061 // reset added_address and matched_unidentified 3062 instance->added_address = false; 3063 instance->matched_unidentified = false; 3064 } 3065 } 3066 3067 static void 3068 srpl_abandon_nonpreferred_dataset(srpl_domain_t *NONNULL domain) 3069 { 3070 srpl_instance_t *instance, *next; 3071 for (instance = domain->instances; instance != NULL; instance = next) { 3072 next = instance->next; 3073 if (instance->have_dataset_id) { 3074 if (srpl_dataset_id_compare(domain->dataset_id, instance->dataset_id) > 0) { 3075 instance->sync_to_join = false; 3076 if (instance->connection != NULL) { 3077 INFO("abandon dataset with instance " PRI_S_SRP " of partner id %" PRIx64, 3078 instance->instance_name, instance->partner_id); 3079 srpl_connection_reset(instance->connection); 3080 srpl_connection_next_state(instance->connection, srpl_state_disconnected); 3081 } 3082 } 3083 } 3084 } 3085 } 3086 3087 static void srpl_transition_to_startup_state(srpl_domain_t *domain); 3088 3089 static int 3090 srpl_dataset_id_compare(uint64_t id1, uint64_t id2) 3091 { 3092 int64_t distance = id1 - id2; 3093 if (distance == 0) { 3094 return 0; 3095 } else if (distance > 0) { 3096 return 1; 3097 } else if (distance == EQUI_DISTANCE64 && (int64_t)id1 > (int64_t)id2) { 3098 // the number 2^(N1) (where N is 64) is equidistant in both directions in sequence number terms. 3099 // they are both considered to be "less than" each other. This is true for any serial number with 3100 // distance of 0x8000000000000000 between them. To break the tie, higher signed number wins. 3101 return 1; 3102 } else { 3103 return -1; 3104 } 3105 } 3106 3107 static bool 3108 srpl_instances_max_dataset_id(srpl_domain_t *domain, uint64_t *dataset_id) 3109 { 3110 srpl_instance_t *instance = NULL; 3111 bool have_max = false; 3112 uint64_t max = 0; 3113 3114 for (instance = domain->instances; instance != NULL; instance = instance->next) { 3115 if (instance->sync_fail || instance->discontinuing || instance->version_mismatch) { 3116 continue; 3117 } 3118 if (!have_max) { 3119 max = instance->dataset_id; 3120 have_max = true; 3121 } else { 3122 if (srpl_dataset_id_compare(instance->dataset_id, max) > 0) { 3123 max = instance->dataset_id; 3124 } 3125 } 3126 } 3127 if (!have_max) { 3128 INFO("no available dataset id."); 3129 } else { 3130 *dataset_id = max; 3131 } 3132 return have_max; 3133 } 3134 3135 static void 3136 srpl_instance_add(const char *hostname, const char *service_name, 3137 const char *ifname, srpl_domain_t *domain, srpl_instance_service_t *service, 3138 bool have_partner_id, uint64_t advertised_partner_id, 3139 bool have_dataset_id, uint64_t advertised_dataset_id, 3140 bool have_priority, uint32_t advertised_priority) 3141 { 3142 srpl_instance_t **sp, *instance; 3143 srpl_instance_service_t **hp; 3144 3145 // Find the service on the instance list for this domain. 3146 for (instance = domain->instances; instance != NULL; instance = instance->next) { 3147 for (hp = &instance->services; *hp != NULL; hp = &(*hp)->next) { 3148 if (service == *hp) { 3149 INFO("service " PRI_S_SRP " is found with instance " PRI_S_SRP, service_name, instance->instance_name); 3150 break; 3151 } 3152 } 3153 if (*hp != NULL) { 3154 break; 3155 } 3156 } 3157 3158 if (instance == NULL) { 3159 INFO("service " PRI_S_SRP " for " PRI_S_SRP "/" PUB_S_SRP " " PUB_S_SRP 3160 "id %" PRIx64 " " PUB_S_SRP "did %" PRIx64 " not on list", 3161 service_name != NULL ? service_name : "<NULL>", hostname, ifname, 3162 have_partner_id ? "" : "!", advertised_partner_id, 3163 have_dataset_id ? "" : "!", advertised_dataset_id); 3164 // Look for the instance with the same partner id 3165 for (sp = &domain->instances; *sp != NULL; sp = &(*sp)->next) { 3166 instance = *sp; 3167 if (instance->have_partner_id && instance->partner_id == advertised_partner_id) { 3168 INFO("instance " PRI_S_SRP " has matched partner_id %" PRIx64, instance->instance_name, instance->partner_id); 3169 break; 3170 } 3171 } 3172 3173 if (*sp == NULL) { 3174 // We don't have the instance to the remote partner yet, create one 3175 instance = calloc(1, sizeof(*instance)); 3176 if (instance == NULL) { 3177 ERROR("no memory to create instance for service " PRI_S_SRP, service_name); 3178 RELEASE_HERE(service, srpl_instance_service); 3179 return; 3180 } 3181 // Retain for the instance list on the domain 3182 RETAIN_HERE(instance, srpl_instance); 3183 instance->domain = domain; 3184 RETAIN_HERE(instance->domain, srpl_domain); 3185 instance->services = service; 3186 *sp = instance; 3187 INFO("create a new instance for service " PRI_S_SRP, service_name); 3188 } else { 3189 for (hp = &instance->services; *hp != NULL; hp = &(*hp)->next) 3190 ; 3191 *hp = service; 3192 INFO("instance " PRI_S_SRP " exists; just link the service " PRI_S_SRP, instance->instance_name, service_name); 3193 } 3194 // Retain service for the instance service list 3195 RETAIN_HERE(service, srpl_instance_service); 3196 // Retain instance for service 3197 RETAIN_HERE(instance, srpl_instance); 3198 service->instance = instance; 3199 } 3200 // take the host name of the remote partner as the instance name 3201 char *pch = strchr(service_name, '.'); 3202 if (instance->instance_name == NULL || strncmp(instance->instance_name, service_name, pch - service_name)) { 3203 char partner_name[kDNSServiceMaxDomainName]; 3204 memcpy(partner_name, service_name, pch - service_name); 3205 partner_name[pch - service_name] = '\0'; 3206 char *new_partner_name = strdup(partner_name); 3207 if (new_partner_name == NULL) { 3208 ERROR("no memory for instance name."); 3209 return; 3210 } else { 3211 INFO("instance name changed from " PRI_S_SRP " to " PRI_S_SRP, instance->instance_name ? 3212 instance->instance_name : "NULL", new_partner_name); 3213 free(instance->instance_name); 3214 instance->instance_name = new_partner_name; 3215 } 3216 } 3217 bool some_id_updated = false; 3218 if (have_dataset_id && (!instance->have_dataset_id || instance->dataset_id != advertised_dataset_id)) { 3219 some_id_updated = true; 3220 instance->have_dataset_id = true; 3221 instance->dataset_id = advertised_dataset_id; 3222 INFO("update instance " PRI_S_SRP " dataset_id %" PRIx64 " from service " PRI_S_SRP, 3223 instance->instance_name, instance->dataset_id, service_name); 3224 } 3225 3226 // If this add changed the partner ID, we may want to re-attempt a connect. 3227 if (have_partner_id && (!instance->have_partner_id || instance->partner_id != advertised_partner_id)) { 3228 some_id_updated = true; 3229 instance->have_partner_id = true; 3230 instance->partner_id = advertised_partner_id; 3231 INFO("instance " PRI_S_SRP " update partner_id to %" PRIx64, instance->instance_name, advertised_partner_id); 3232 } 3233 3234 // If this add changed the priority, we may want to re-attempt a connect. 3235 if (have_priority && (!instance->have_priority || instance->priority != advertised_priority)) { 3236 some_id_updated = true; 3237 instance->have_priority = true; 3238 instance->priority = advertised_priority; 3239 INFO("instance " PRI_S_SRP " update priority to %" PRIx32, instance->instance_name, advertised_priority); 3240 } 3241 // To join the replication, sync with remote partners that are discovered during the 3242 // discovery window. 3243 if (domain->partner_discovery_pending) { 3244 instance->discovered_in_window = true; 3245 } 3246 3247 // If the hostname changed, we need to restart the address query. 3248 if (service->host_name == NULL || strcmp(service->host_name, hostname)) { 3249 if (service->address_query != NULL) { 3250 address_query_cancel(service->address_query); 3251 RELEASE_HERE(service->address_query, address_query); 3252 service->address_query = NULL; 3253 } 3254 3255 if (service->host_name != NULL) { 3256 INFO("name server name change from " PRI_S_SRP " to " PRI_S_SRP " for " PRI_S_SRP "/" PUB_S_SRP " in domain " PRI_S_SRP, 3257 service->host_name, hostname, service_name == NULL ? "<NULL>" : service_name, ifname, domain->name); 3258 } else { 3259 INFO("new name server " PRI_S_SRP " for " PRI_S_SRP "/" PUB_S_SRP " in domain " PRI_S_SRP, 3260 hostname, service_name == NULL ? "<NULL>" : service_name, ifname, domain->name); 3261 } 3262 3263 char *new_name = strdup(hostname); 3264 if (new_name == NULL) { 3265 // This should never happen, and if it does there's actually no clean way to recover from it. This approach 3266 // will result in no crash, and since we don't start an address query in this case, we will just wind up in 3267 // a quiescent state for this replication peer until something changes. 3268 ERROR("no memory for service name."); 3269 return; 3270 } else { 3271 free(service->host_name); 3272 service->host_name = new_name; 3273 } 3274 // The instance may be connected. It's possible its IP address hasn't changed. If it has changed, we should 3275 // get a disconnect due to a connection timeout or (if something else got the same address, a reset) if for 3276 // no other reason, and then we'll try to reconnect, so this should be harmless. 3277 } 3278 3279 // The address query can be NULL either because we only just created the instance, or because the instance name changed (e.g. 3280 // as the result of a hostname conflict). 3281 if (service->address_query == NULL) { 3282 service->address_query = address_query_create(service->host_name, service, 3283 srpl_instance_address_callback, 3284 srpl_instance_service_context_release); 3285 if (service->address_query == NULL) { 3286 INFO("unable to create address query"); 3287 } else { 3288 RETAIN_HERE(service, srpl_instance_service); // retain for the address query. 3289 } 3290 } 3291 3292 if (instance->have_partner_id && 3293 domain->partner_id == instance->partner_id) 3294 { 3295 srpl_instance_is_me(instance, service, NULL, NULL, true); 3296 } 3297 3298 // If there's no existing connection, the partner initiates an outgoing connection if 3299 // it is in the startup state or its partner id is greater than the remote partner id. 3300 if (!instance->is_me && ((instance->connection == NULL || some_id_updated) && 3301 (domain->srpl_opstate == SRPL_OPSTATE_STARTUP || 3302 domain->partner_id > advertised_partner_id))) 3303 { 3304 char msg_buf[256]; 3305 if (domain->srpl_opstate == SRPL_OPSTATE_STARTUP) { 3306 snprintf(msg_buf, sizeof(msg_buf), "I am in startup state"); 3307 } else { 3308 snprintf(msg_buf, sizeof(msg_buf), "local partner_id %" PRIx64 3309 " greater than remote partner_id %" PRIx64, domain->partner_id, 3310 advertised_partner_id); 3311 } 3312 INFO("making outgoing connection on instance " PRI_S_SRP " (partner_id: %" PRIx64 ") since " PUB_S_SRP, 3313 instance->instance_name, instance->partner_id, msg_buf); 3314 if (instance->connection != NULL && instance->connection->connection != NULL) { 3315 srpl_trigger_disconnect(instance->connection); 3316 srpl_connection_next_state(instance->connection, srpl_state_idle); 3317 } else { 3318 srpl_instance_reconnect(instance); 3319 } 3320 } else { 3321 INFO(PRI_S_SRP ": not making outgoing connection: " PUB_S_SRP "is_me, connection = %p, " PRI_S_SRP 3322 "some_id_updated, local partner_id %" PRIx64 ", remote partner_id %" PRIx64, 3323 instance->instance_name, instance->is_me ? "" : "!", instance->connection, some_id_updated ? "" : "!", 3324 domain->partner_id, advertised_partner_id); 3325 // it's not our job to connect, but since there's some id change, we'd disconnect 3326 // the current connection and trigger the peer to reconnect. 3327 if (some_id_updated && instance->connection != NULL && 3328 instance->connection->connection != NULL) 3329 { 3330 INFO("some id updated, disconnect the current connection."); 3331 srpl_trigger_disconnect(instance->connection); 3332 srpl_connection_next_state(instance->connection, srpl_state_idle); 3333 } 3334 } 3335 } 3336 3337 static void 3338 srpl_resolve_callback(srpl_instance_service_t *service) 3339 { 3340 char ifname[IFNAMSIZ]; 3341 srpl_domain_t *domain = service->domain; 3342 const char *domain_name; 3343 uint8_t domain_len; 3344 const char *partner_id_string; 3345 const char *dataset_id_string; 3346 const char *xpanid_string; 3347 const char *priority_string; 3348 uint8_t partner_id_string_len; 3349 uint8_t dataset_id_string_len; 3350 uint8_t xpanid_string_len; 3351 uint8_t priority_string_len; 3352 char partner_id_buf[INT64_HEX_STRING_MAX]; 3353 char dataset_id_buf[INT64_HEX_STRING_MAX]; 3354 char xpanid_buf[INT64_HEX_STRING_MAX]; 3355 char priority_buf[INT64_HEX_STRING_MAX]; 3356 uint64_t advertised_partner_id = 0; 3357 bool have_partner_id = false; 3358 uint64_t advertised_dataset_id = 0; 3359 bool have_dataset_id = false; 3360 bool have_priority = false; 3361 uint16_t advertised_priority = 0; 3362 srpl_instance_service_t **sp; 3363 srp_server_t *server_state = domain->server_state; 3364 3365 // These are just used to do the "satisfied" check--we can tell that we have these records from the rdata pointers. 3366 service->have_txt_record = service->have_srv_record = false; 3367 3368 // In case we later determine that the data we got is stale, this flag indicates that it's okay to try to 3369 // reconfirm it. 3370 service->got_new_info = true; 3371 3372 if (service->txt_rdata == NULL) { 3373 INFO(PRI_S_SRP ": service update with no TXT record--skipping", service->full_service_name); 3374 return; 3375 } 3376 if (service->srv_rdata == NULL) { 3377 INFO(PRI_S_SRP ": service update with no SRV record--skipping", service->full_service_name); 3378 return; 3379 } 3380 3381 domain_name = TXTRecordGetValuePtr(service->txt_length, service->txt_rdata, "dn", &domain_len); 3382 if (domain_name == NULL) { 3383 INFO("resolve for " PRI_S_SRP " succeeded, but there is no domain name.", service->full_service_name); 3384 return; 3385 } 3386 3387 if (domain_len != strlen(domain->name) || memcmp(domain_name, domain->name, domain_len)) { 3388 const char *domain_print; 3389 char *domain_terminated = malloc(domain_len + 1); 3390 if (domain_terminated == NULL) { 3391 domain_print = "<no memory for domain name>"; 3392 } else { 3393 memcpy(domain_terminated, domain_name, domain_len); 3394 domain_terminated[domain_len] = 0; 3395 domain_print = domain_terminated; 3396 } 3397 INFO("domain (" PRI_S_SRP ") for " PRI_S_SRP " doesn't match expected domain " PRI_S_SRP, 3398 domain_print, service->full_service_name, domain->name); 3399 free(domain_terminated); 3400 return; 3401 } 3402 3403 if (server_state->current_thread_domain_name == NULL) { 3404 FAULT("current_thread_domain_name is NULL."); 3405 return; 3406 } 3407 3408 if (strcmp(domain->name, server_state->current_thread_domain_name)) { 3409 INFO("discovered srpl instance is not for current thread domain, so not setting up replication."); 3410 return; 3411 } 3412 3413 INFO("server " PRI_S_SRP " for " PRI_S_SRP, service->full_service_name, domain->name); 3414 3415 // Make sure it's for our mesh. 3416 snprintf(xpanid_buf, sizeof(xpanid_buf), "%" PRIx64, server_state->xpanid); 3417 xpanid_string = TXTRecordGetValuePtr(service->txt_length, service->txt_rdata, "xpanid", &xpanid_string_len); 3418 if (xpanid_string == NULL || 3419 (xpanid_string_len != strlen(xpanid_buf) || memcmp(xpanid_buf, xpanid_string, xpanid_string_len))) 3420 { 3421 char other_xpanid_buf[INT64_HEX_STRING_MAX]; 3422 if (xpanid_string_len >= sizeof(other_xpanid_buf)) { 3423 xpanid_string_len = sizeof(other_xpanid_buf) - 1; 3424 } 3425 if (xpanid_string == NULL) { 3426 const char none[] = "(none)"; 3427 memcpy(other_xpanid_buf, none, sizeof(none)); 3428 } else { 3429 memcpy(other_xpanid_buf, xpanid_string, xpanid_string_len); 3430 other_xpanid_buf[xpanid_string_len] = 0; 3431 } 3432 INFO("discovered srpl instance is not for xpanid " PRI_S_SRP ", not " PRI_S_SRP 3433 " so not setting up replication.", xpanid_buf, other_xpanid_buf); 3434 return; 3435 } 3436 3437 partner_id_string = TXTRecordGetValuePtr(service->txt_length, service->txt_rdata, "pid", &partner_id_string_len); 3438 if (partner_id_string != NULL && partner_id_string_len < INT64_HEX_STRING_MAX) { 3439 char *endptr, *nulptr; 3440 unsigned long long num; 3441 memcpy(partner_id_buf, partner_id_string, partner_id_string_len); 3442 nulptr = &partner_id_buf[partner_id_string_len]; 3443 *nulptr = '\0'; 3444 num = strtoull(partner_id_buf, &endptr, 16); 3445 // On current architectures, unsigned long long and uint64_t are the same size, but we should have a check here 3446 // just in case, because the standard doesn't guarantee that this will be true. 3447 // If endptr == nulptr, that means we converted the entire buffer and didn't run into a NUL in the middle of it 3448 // somewhere. 3449 if (num < UINT64_MAX && endptr == nulptr) { 3450 advertised_partner_id = num; 3451 have_partner_id = true; 3452 } 3453 } 3454 3455 dataset_id_string = TXTRecordGetValuePtr(service->txt_length, service->txt_rdata, "did", &dataset_id_string_len); 3456 if (dataset_id_string != NULL && dataset_id_string_len < INT64_HEX_STRING_MAX) { 3457 char *endptr, *nulptr; 3458 unsigned long long num; 3459 memcpy(dataset_id_buf, dataset_id_string, dataset_id_string_len); 3460 nulptr = &dataset_id_buf[dataset_id_string_len]; 3461 *nulptr = '\0'; 3462 num = strtoull(dataset_id_buf, &endptr, 16); 3463 if (num < UINT64_MAX && endptr == nulptr) { 3464 advertised_dataset_id = num; 3465 have_dataset_id = true; 3466 } 3467 } 3468 3469 priority_string = TXTRecordGetValuePtr(service->txt_length, service->txt_rdata, "priority", &priority_string_len); 3470 if (priority_string != NULL && priority_string_len < INT64_HEX_STRING_MAX) { 3471 char *endptr, *nulptr; 3472 unsigned long long num; 3473 memcpy(priority_buf, priority_string, priority_string_len); 3474 nulptr = &priority_buf[priority_string_len]; 3475 *nulptr = '\0'; 3476 num = strtoull(priority_buf, &endptr, 16); 3477 if (num < UINT64_MAX && endptr == nulptr) { 3478 advertised_priority = num; 3479 have_priority = true; 3480 } 3481 } 3482 3483 dns_rr_t srv_record; 3484 unsigned offset = 0; 3485 memset(&srv_record, 0, sizeof(srv_record)); 3486 srv_record.type = dns_rrtype_srv; 3487 if (!dns_rdata_parse_data(&srv_record, service->srv_rdata, &offset, offset + service->srv_length, service->srv_length, 0)) { 3488 ERROR(PRI_S_SRP ": unable to parse srv record", service->full_service_name); 3489 return; 3490 } 3491 3492 service->outgoing_port = srv_record.data.srv.port; 3493 3494 if (if_indextoname(service->ifindex, ifname) == NULL) { 3495 snprintf(ifname, sizeof(ifname), "%d", service->ifindex); 3496 } 3497 3498 char namebuf[DNS_MAX_NAME_SIZE_ESCAPED + 1]; 3499 dns_name_print(srv_record.data.srv.name, namebuf, sizeof(namebuf)); 3500 dns_name_free(srv_record.data.srv.name); 3501 3502 srpl_instance_add(namebuf, service->full_service_name, ifname, service->domain, service, have_partner_id, 3503 advertised_partner_id, have_dataset_id, advertised_dataset_id, have_priority, 3504 advertised_priority); 3505 3506 // After the service is associated with a resolved instance, we should take it off the unresolved 3507 // list if the service is still on it. If the service fails to assocaited with an instance because 3508 // for example, the resolve shows a service that does not include required data, we should still 3509 // keep the service on the unresolved list. Later on when we get an expected resolve, the service 3510 // can be moved to the associated list. This guarantees that a service at a time has to be on either 3511 // unresolved or associated list. 3512 for (sp = &domain->unresolved_services; *sp; sp = &(*sp)->next) { 3513 if (*sp == service) { 3514 *sp = service->next; 3515 service->next = NULL; 3516 RELEASE_HERE(service, srpl_instance_service); 3517 break; 3518 } 3519 } 3520 } 3521 3522 static void 3523 srpl_instance_service_newdata_timeout(void *context) 3524 { 3525 srpl_instance_service_t *service = context; 3526 srpl_resolve_callback(service); 3527 } 3528 3529 static void 3530 srpl_instance_service_satisfied_check(srpl_instance_service_t *service) 3531 { 3532 if (service->have_srv_record && service->have_txt_record) { 3533 if (service->resolve_wakeup != NULL) { 3534 ioloop_cancel_wake_event(service->resolve_wakeup); 3535 } 3536 srpl_resolve_callback(service); 3537 return; 3538 } 3539 INFO(PRI_S_SRP ": not satisfied, waiting.", service->full_service_name); 3540 if (service->resolve_wakeup == NULL) { 3541 service->resolve_wakeup = ioloop_wakeup_create(); 3542 if (service->resolve_wakeup == NULL) { 3543 ERROR(PRI_S_SRP ": unable to allocate resolve wakeup", service->full_service_name); 3544 return; 3545 } 3546 } 3547 ioloop_add_wake_event(service->resolve_wakeup, service, srpl_instance_service_newdata_timeout, 3548 srpl_instance_service_context_release, 1000); // max one second 3549 RETAIN_HERE(service, srpl_instance_service); // for the wakeup 3550 } 3551 3552 static void 3553 srpl_service_txt_callback(DNSServiceRef UNUSED sdRef, DNSServiceFlags UNUSED flags, uint32_t UNUSED interfaceIndex, 3554 DNSServiceErrorType errorCode, const char *fullname, uint16_t UNUSED rrtype, uint16_t UNUSED rrclass, 3555 uint16_t rdlen, const void *rdata, uint32_t UNUSED ttl, void *context) 3556 { 3557 srpl_instance_service_t *service = context; 3558 if (errorCode != kDNSServiceErr_NoError) { 3559 ERROR("txt resolve for " PRI_S_SRP " failed with %d", fullname, errorCode); 3560 if (service->txt_txn != NULL) { 3561 ioloop_dnssd_txn_cancel(service->txt_txn); 3562 ioloop_dnssd_txn_release(service->txt_txn); 3563 service->txt_txn = NULL; 3564 } 3565 return; 3566 } 3567 3568 free(service->txt_rdata); 3569 if (!(flags & kDNSServiceFlagsAdd)) { 3570 INFO("TXT record for " PRI_S_SRP " went away", service->full_service_name); 3571 service->txt_rdata = NULL; 3572 service->txt_length = 0; 3573 service->have_txt_record = false; 3574 return; 3575 } 3576 service->txt_rdata = malloc(rdlen); 3577 if (service->txt_rdata == NULL) { 3578 ERROR("unable to save txt rdata for " PRI_S_SRP, service->full_service_name); 3579 return; 3580 } 3581 memcpy(service->txt_rdata, rdata, rdlen); 3582 service->txt_length = rdlen; 3583 service->have_txt_record = true; 3584 srpl_instance_service_satisfied_check(service); 3585 } 3586 3587 static void 3588 srpl_service_srv_callback(DNSServiceRef UNUSED sdRef, DNSServiceFlags flags, uint32_t UNUSED interfaceIndex, 3589 DNSServiceErrorType errorCode, const char *UNUSED fullname, uint16_t UNUSED rrtype, uint16_t UNUSED rrclass, 3590 uint16_t rdlen, const void *rdata, uint32_t UNUSED ttl, void *context) 3591 { 3592 srpl_instance_service_t *service = context; 3593 if (errorCode != kDNSServiceErr_NoError) { 3594 ERROR("srv resolve for " PRI_S_SRP " failed with %d", fullname, errorCode); 3595 if (service->srv_txn != NULL) { 3596 ioloop_dnssd_txn_cancel(service->srv_txn); 3597 ioloop_dnssd_txn_release(service->srv_txn); 3598 service->srv_txn = NULL; 3599 } 3600 return; 3601 } 3602 3603 free(service->srv_rdata); 3604 if (!(flags & kDNSServiceFlagsAdd)) { 3605 INFO("SRV record for " PRI_S_SRP " went away", service->full_service_name); 3606 service->srv_rdata = NULL; 3607 service->srv_length = 0; 3608 service->have_srv_record = false; 3609 return; 3610 } 3611 service->srv_rdata = malloc(rdlen); 3612 if (service->srv_rdata == NULL) { 3613 ERROR("unable to save srv rdata for " PRI_S_SRP, service->full_service_name); 3614 return; 3615 } 3616 memcpy(service->srv_rdata, rdata, rdlen); 3617 service->srv_length = rdlen; 3618 service->have_srv_record = true; 3619 srpl_instance_service_satisfied_check(service); 3620 } 3621 3622 static void 3623 srpl_browse_restart(void *context) 3624 { 3625 srpl_domain_t *domain = context; 3626 ERROR("restarting browse on domain " PRI_S_SRP, domain->name); 3627 srpl_domain_browse_start(domain); 3628 } 3629 3630 static bool 3631 srpl_service_instance_query_start(srpl_instance_service_t *service, dnssd_txn_t **txn, const char *rrtype_name, 3632 uint16_t rrtype, uint16_t qclass, DNSServiceQueryRecordReply callback) 3633 { 3634 DNSServiceRef sdref; 3635 3636 int err = DNSServiceQueryRecord(&sdref, kDNSServiceFlagsLongLivedQuery, kDNSServiceInterfaceIndexAny, 3637 service->full_service_name, rrtype, qclass, callback, service); 3638 if (err != kDNSServiceErr_NoError) { 3639 ERROR("unable to resolve " PUB_S_SRP " record for " PRI_S_SRP ": code %d", 3640 rrtype_name, service->full_service_name, err); 3641 return false; 3642 } 3643 *txn = ioloop_dnssd_txn_add(sdref, service, srpl_instance_service_context_release, NULL); 3644 if (*txn == NULL) { 3645 ERROR("unable to allocate dnssd_txn_t for " PUB_S_SRP " record for " PRI_S_SRP, 3646 rrtype_name, service->full_service_name); 3647 DNSServiceRefDeallocate(sdref); 3648 return false; 3649 } 3650 // Retain for the dnssd_txn. 3651 RETAIN_HERE(service, srpl_instance_service); 3652 return true; 3653 } 3654 3655 static void 3656 srpl_domain_context_release(void *context) 3657 { 3658 srpl_domain_t *domain = context; 3659 RELEASE_HERE(domain, srpl_domain); 3660 } 3661 3662 static void 3663 srpl_browse_callback(DNSServiceRef UNUSED sdRef, DNSServiceFlags flags, uint32_t interfaceIndex, 3664 DNSServiceErrorType errorCode, const char *serviceName, const char *regtype, 3665 const char *replyDomain, void *context) 3666 { 3667 srpl_domain_t *domain = context; 3668 if (errorCode != kDNSServiceErr_NoError) { 3669 ERROR("browse on domain " PRI_S_SRP " failed with %d", domain->name, errorCode); 3670 if (domain->query != NULL) { 3671 ioloop_dnssd_txn_cancel(domain->query); 3672 ioloop_dnssd_txn_release(domain->query); 3673 domain->query = NULL; 3674 } 3675 3676 // Get rid of all instances on the domain, because we aren't going to get remove events for them. 3677 // If we start a new browse and get add events while the connections are still up, this will 3678 // have no effect. 3679 srpl_instance_t *next_instance; 3680 for (srpl_instance_t *instance = domain->instances; instance; instance = next_instance) { 3681 INFO("_srpl-tls._tcp instance " PRI_S_SRP " went away.", instance->instance_name); 3682 next_instance = instance->next; 3683 srpl_instance_discontinue(instance); 3684 } 3685 3686 srpl_instance_service_t *service, *next; 3687 for (service = domain->unresolved_services; service != NULL; service = next) { 3688 INFO("discontinue unresolved service " PRI_S_SRP, service->full_service_name); 3689 next = service->next; 3690 service->num_copies = 0; 3691 srpl_instance_service_discontinue(service); 3692 } 3693 3694 if (domain->server_state->srpl_browse_wakeup == NULL) { 3695 domain->server_state->srpl_browse_wakeup = ioloop_wakeup_create(); 3696 } 3697 if (domain->server_state->srpl_browse_wakeup != NULL) { 3698 ioloop_add_wake_event(domain->server_state->srpl_browse_wakeup, 3699 domain, srpl_browse_restart, srpl_domain_context_release, 1000); 3700 RETAIN_HERE(domain, srpl_domain); 3701 } 3702 return; 3703 } 3704 3705 char full_service_name[kDNSServiceMaxDomainName]; 3706 DNSServiceConstructFullName(full_service_name, serviceName, regtype, replyDomain); 3707 3708 srpl_instance_t *instance; 3709 srpl_instance_service_t *service; 3710 // See if we already have a service record going; First search in unresolved_services list which 3711 // contains the services that haven't been resolved yet. 3712 for (service = domain->unresolved_services; service; service = service->next) { 3713 if (!strcmp(service->full_service_name, full_service_name)) { 3714 break; 3715 } 3716 } 3717 // If the service is not found in unresolved_services list, search in instance list which contains services 3718 // that have been resolved. 3719 if (service == NULL) { 3720 for (instance = domain->instances; instance; instance = instance->next) { 3721 for (service = instance->services; service; service = service->next) { 3722 if (!strcmp(service->full_service_name, full_service_name)) { 3723 break; 3724 } 3725 } 3726 if (service != NULL) { 3727 break; 3728 } 3729 } 3730 } 3731 if (flags & kDNSServiceFlagsAdd) { 3732 if (service != NULL) { 3733 // it's possible that a service goes away and starts discontinuing, and before the timeout, 3734 // the service comes back again. In this case, since the service is still on the list, it 3735 // appears as a duplicate add. But we should cancel the discontinue timer. 3736 if (service->discontinue_timeout != NULL) { 3737 if (service->discontinuing) { 3738 INFO("discontinue on service " PRI_S_SRP " canceled.", service->full_service_name); 3739 ioloop_cancel_wake_event(service->discontinue_timeout); 3740 service->discontinuing = false; 3741 if (service->instance != NULL) { 3742 service->instance->discontinuing = false; 3743 } 3744 } 3745 } 3746 3747 if (service->resolve_started) { 3748 INFO(PRI_S_SRP ": resolve_started true, incrementing num_copies to %d", 3749 full_service_name, service->num_copies + 1); 3750 service->num_copies++; 3751 INFO("duplicate add for service " PRI_S_SRP, full_service_name); 3752 return; 3753 } 3754 // In this case the service went away and came back, so service->resolve_started is false, but the 3755 // instance still exists. 3756 INFO(PRI_S_SRP ": resolve_started false, incrementing num_copies to %d", 3757 full_service_name, service->num_copies + 1); 3758 service->num_copies++; 3759 INFO("service " PRI_S_SRP " went away but came back.", full_service_name); 3760 } else { 3761 service = calloc(1, sizeof(*service)); 3762 if (service == NULL) { 3763 ERROR("no memory for service " PRI_S_SRP, full_service_name); 3764 return; 3765 } 3766 // Retain for unresolved_services list 3767 RETAIN_HERE(service, srpl_instance_service); 3768 service->domain = domain; 3769 RETAIN_HERE(service->domain, srpl_domain); 3770 3771 service->full_service_name = strdup(full_service_name); 3772 if (service->full_service_name == NULL) { 3773 ERROR("no memory for service name " PRI_S_SRP, full_service_name); 3774 RELEASE_HERE(service, srpl_instance_service); 3775 return; 3776 } 3777 INFO(PRI_S_SRP ": new service, setting num_copies to 1", full_service_name); 3778 service->num_copies = 1; 3779 service->ifindex = interfaceIndex; 3780 // add to the unresolved service list 3781 srpl_instance_service_t **sp; 3782 for (sp = &domain->unresolved_services; *sp != NULL; sp = &(*sp)->next) {;} 3783 *sp = service; 3784 3785 dns_towire_state_t towire; 3786 uint8_t name_buffer[kDNSServiceMaxDomainName]; 3787 memset(&towire, 0, sizeof(towire)); 3788 towire.p = name_buffer; 3789 towire.lim = towire.p + kDNSServiceMaxDomainName; 3790 dns_full_name_to_wire(NULL, &towire, full_service_name); 3791 3792 free(service->ptr_rdata); 3793 service->ptr_length = towire.p - name_buffer; 3794 service->ptr_rdata = malloc(service->ptr_length); 3795 if (service->ptr_rdata == NULL) { 3796 ERROR("unable to save PTR rdata for " PRI_S_SRP, full_service_name); 3797 return; 3798 } 3799 memcpy(service->ptr_rdata, name_buffer, service->ptr_length); 3800 } 3801 3802 if (!srpl_service_instance_query_start(service, &service->txt_txn, "TXT", dns_rrtype_txt, dns_qclass_in, 3803 srpl_service_txt_callback) || 3804 !srpl_service_instance_query_start(service, &service->srv_txn, "SRV", dns_rrtype_srv, dns_qclass_in, 3805 srpl_service_srv_callback)) 3806 { 3807 return; 3808 } 3809 INFO("resolving " PRI_S_SRP, full_service_name); 3810 service->resolve_started = true; 3811 } else { 3812 if (service != NULL) { 3813 INFO(PRI_S_SRP ": decrementing num_copies to %d", full_service_name, service->num_copies - 1); 3814 service->num_copies--; 3815 if (service->num_copies < 0) { 3816 FAULT("num_copies went negative"); 3817 service->num_copies = 0; 3818 } 3819 if (service->num_copies == 0) { 3820 INFO("discontinuing service " PRI_S_SRP, full_service_name); 3821 srpl_instance_service_discontinue(service); 3822 return; 3823 } 3824 } 3825 } 3826 } 3827 3828 static void 3829 srpl_dnssd_txn_fail(void *context, int err) 3830 { 3831 srpl_domain_t *domain = context; 3832 ERROR("service browse " PRI_S_SRP " i/o failure: %d", domain->name, err); 3833 } 3834 3835 static bool 3836 srpl_domain_browse_start(srpl_domain_t *domain) 3837 { 3838 int ret; 3839 DNSServiceRef sdref; 3840 3841 INFO("starting browse on _srpl-tls._tcp"); 3842 // Look for an NS record for the specified domain using mDNS, not DNS. 3843 ret = DNSServiceBrowse(&sdref, kDNSServiceFlagsLongLivedQuery, 3844 kDNSServiceInterfaceIndexAny, "_srpl-tls._tcp", NULL, srpl_browse_callback, domain); 3845 if (ret != kDNSServiceErr_NoError) { 3846 ERROR("Unable to query for NS records for " PRI_S_SRP, domain->name); 3847 return false; 3848 } 3849 domain->query = ioloop_dnssd_txn_add(sdref, domain, srpl_domain_context_release, srpl_dnssd_txn_fail); 3850 if (domain->query == NULL) { 3851 ERROR("Unable to set up ioloop transaction for NS query on " PRI_S_SRP, domain->name); 3852 DNSServiceRefDeallocate(sdref); 3853 return false; 3854 } 3855 RETAIN_HERE(domain, srpl_domain); // For the browse 3856 return true; 3857 } 3858 3859 srpl_domain_t * 3860 srpl_domain_create_or_copy(srp_server_t *server_state, const char *domain_name) 3861 { 3862 srpl_domain_t **dp, *domain; 3863 3864 // Find the domain, if it's already there. 3865 for (dp = &server_state->srpl_domains; *dp; dp = &(*dp)->next) { 3866 domain = *dp; 3867 if (!strcasecmp(domain->name, domain_name)) { 3868 break; 3869 } 3870 } 3871 3872 // If not there, make it. 3873 if (*dp == NULL) { 3874 domain = calloc(1, sizeof(*domain)); 3875 if (domain == NULL || (domain->name = strdup(domain_name)) == NULL) { 3876 ERROR("Unable to allocate replication structure for domain " PRI_S_SRP, domain_name); 3877 free(domain); 3878 return NULL; 3879 } 3880 *dp = domain; 3881 // Hold a reference for the domain list 3882 RETAIN_HERE(domain, srpl_domain); 3883 INFO("New service replication browsing domain: " PRI_S_SRP, domain->name); 3884 3885 domain->srpl_opstate = SRPL_OPSTATE_STARTUP; 3886 domain->server_state = server_state; 3887 domain->partner_id = srp_random64(); 3888 INFO("generate partner id %" PRIx64 " for domain " PRI_S_SRP, domain->partner_id, domain->name); 3889 } else { 3890 ERROR("Unexpected duplicate replication domain: " PRI_S_SRP, domain_name); 3891 return NULL; 3892 } 3893 return domain; 3894 } 3895 3896 static void 3897 srpl_domain_add(srp_server_t *server_state, const char *domain_name) 3898 { 3899 srpl_domain_t *domain = srpl_domain_create_or_copy(server_state, domain_name); 3900 if (domain == NULL) { 3901 return; 3902 } 3903 3904 domain->partner_discovery_timeout = ioloop_wakeup_create(); 3905 if (domain->partner_discovery_timeout) { 3906 ioloop_add_wake_event(domain->partner_discovery_timeout, domain, 3907 srpl_partner_discovery_timeout, srpl_domain_context_release, 3908 MIN_PARTNER_DISCOVERY_INTERVAL + srp_random16() % PARTNER_DISCOVERY_INTERVAL_RANGE); 3909 RETAIN_HERE(domain, srpl_domain); 3910 } else { 3911 ERROR("unable to add wakeup event for partner discovery for domain " PRI_S_SRP, domain->name); 3912 return; 3913 } 3914 domain->partner_discovery_pending = true; 3915 3916 // Start a browse on the domain. 3917 if (!srpl_domain_browse_start(domain)) { 3918 return; 3919 } 3920 } 3921 3922 static void 3923 srpl_domain_rename(const char *current_name, const char *new_name) 3924 { 3925 ERROR("replication domain " PRI_S_SRP " renamed to " PRI_S_SRP ", not currently handled.", current_name, new_name); 3926 } 3927 3928 // Note that when this is implemented, it has the potential to return new thread domain names more than once, so 3929 // in principle we need to change the name of the domain we are advertising. 3930 static cti_status_t 3931 cti_get_thread_network_name(void *context, cti_string_property_reply_t NONNULL callback, 3932 run_context_t NULLABLE UNUSED client_queue) 3933 { 3934 callback(context, "openthread", kCTIStatus_NoError); 3935 return kCTIStatus_NoError; 3936 } 3937 3938 // 3939 // Event apply functions, print functions, and state actions, generally in order 3940 // 3941 3942 static bool 3943 event_is_message(srpl_event_t *event) 3944 { 3945 switch(event->event_type) { 3946 case srpl_event_invalid: 3947 case srpl_event_address_add: 3948 case srpl_event_address_remove: 3949 case srpl_event_server_disconnect: 3950 case srpl_event_reconnect_timer_expiry: 3951 case srpl_event_disconnected: 3952 case srpl_event_connected: 3953 case srpl_event_advertise_finished: 3954 case srpl_event_srp_client_update_finished: 3955 case srpl_event_do_sync: 3956 return false; 3957 3958 case srpl_event_session_response_received: 3959 case srpl_event_send_candidates_response_received: 3960 case srpl_event_candidate_received: 3961 case srpl_event_host_message_received: 3962 case srpl_event_candidate_response_received: 3963 case srpl_event_host_response_received: 3964 case srpl_event_session_message_received: 3965 case srpl_event_send_candidates_message_received: 3966 return true; 3967 } 3968 return false; 3969 } 3970 3971 // States that require an instance (most states). We also validate the chain up to the server state, because 3972 // it's possible for that to go away and yet still for one last event to arrive, at least in principle. 3973 #define REQUIRE_SRPL_INSTANCE(srpl_connection) \ 3974 do { \ 3975 if ((srpl_connection)->instance == NULL || (srpl_connection)->instance->domain == NULL || \ 3976 (srpl_connection)->instance->domain->server_state == NULL) { \ 3977 ERROR(PRI_S_SRP ": no instance in state " PUB_S_SRP, srpl_connection->name, \ 3978 srpl_connection->state_name); \ 3979 return srpl_state_invalid; \ 3980 } \ 3981 } while(false) 3982 3983 // For states that never receive events. 3984 #define REQUIRE_SRPL_EVENT_NULL(srpl_connection, event) \ 3985 do { \ 3986 if ((event) != NULL) { \ 3987 ERROR(PRI_S_SRP ": received unexpected " PUB_S_SRP " event in state " PUB_S_SRP, \ 3988 srpl_connection->name, event->name, srpl_connection->state_name); \ 3989 return srpl_state_invalid; \ 3990 } \ 3991 } while (false) 3992 3993 // Announce that we have entered a state that takes no events 3994 #define STATE_ANNOUNCE_NO_EVENTS(srpl_connection) \ 3995 do { \ 3996 INFO(PRI_S_SRP ": entering state " PUB_S_SRP, srpl_connection->name, srpl_connection->state_name); \ 3997 } while (false) 3998 3999 // Announce that we have entered a state that takes no events 4000 #define STATE_ANNOUNCE_NO_EVENTS_NAME(connection, fqdn) \ 4001 do { \ 4002 char hostname[kDNSServiceMaxDomainName]; \ 4003 dns_name_print(fqdn, hostname, sizeof(hostname)); \ 4004 INFO(PRI_S_SRP ": entering state " PUB_S_SRP " with host " PRI_S_SRP, \ 4005 connection->name, connection->state_name, hostname); \ 4006 } while (false) 4007 4008 // Announce that we have entered a state that takes no events 4009 #define STATE_ANNOUNCE(srpl_connection, event) \ 4010 do { \ 4011 if (event != NULL) { \ 4012 INFO(PRI_S_SRP ": event " PUB_S_SRP " received in state " PUB_S_SRP, \ 4013 srpl_connection->name, event->name, srpl_connection->state_name); \ 4014 } else { \ 4015 INFO(PRI_S_SRP ": entering state " PUB_S_SRP, \ 4016 srpl_connection->name, srpl_connection->state_name); \ 4017 } \ 4018 } while (false) 4019 4020 #define UNEXPECTED_EVENT_MAIN(srpl_connection, event, bad) \ 4021 do { \ 4022 if (event_is_message(event)) { \ 4023 INFO(PRI_S_SRP ": invalid event " PUB_S_SRP " in state " PUB_S_SRP, \ 4024 (srpl_connection)->name, (event)->name, srpl_connection->state_name); \ 4025 return bad; \ 4026 } \ 4027 INFO(PRI_S_SRP ": unexpected event " PUB_S_SRP " in state " PUB_S_SRP, \ 4028 (srpl_connection)->name, (event)->name, \ 4029 srpl_connection->state_name); \ 4030 return srpl_state_invalid; \ 4031 } while (false) 4032 4033 // UNEXPECTED_EVENT flags the response as bad on a protocol level, triggering a retry delay 4034 // UNEXPECTED_EVENT_NO_ERROR doesn't. 4035 #define UNEXPECTED_EVENT(srpl_connection, event) UNEXPECTED_EVENT_MAIN(srpl_connection, event, srpl_state_invalid) 4036 #define UNEXPECTED_EVENT_NO_ERROR(srpl_connection, event) \ 4037 UNEXPECTED_EVENT_MAIN(srpl_connection, event, srpl_connection_drop_state(srpl_connection->instance, srpl_connection)) 4038 4039 static void 4040 srpl_instance_reconnect(srpl_instance_t *instance) 4041 { 4042 srpl_event_t event; 4043 4044 // If we have a new connection, no need to reconnect. 4045 if (instance->connection != NULL && instance->connection->is_server && 4046 SRPL_CONNECTION_IS_CONNECTED(instance->connection)) 4047 { 4048 INFO(PRI_S_SRP ": we have a valid connection.", instance->instance_name); 4049 return; 4050 } 4051 // We shouldn't have an outgoing connection. 4052 if (instance->connection != NULL && !instance->connection->is_server && 4053 SRPL_CONNECTION_IS_CONNECTED(instance->connection)) 4054 { 4055 FAULT(PRI_S_SRP ": got to srpl_instance_reconnect with a connected (" PUB_S_SRP ") outgoing connection.", 4056 instance->instance_name, srpl_state_name(instance->connection->state)); 4057 return; 4058 } 4059 4060 // Start from the beginning of the address list. 4061 srpl_instance_address_query_reset(instance); 4062 4063 // If we don't have an srpl_connection at this point, make one. 4064 if (instance->connection == NULL) { 4065 INFO(PRI_S_SRP ": instance has no connection.", instance->instance_name); 4066 instance->connection = srpl_connection_create(instance, true); 4067 if (instance->connection == NULL) { 4068 ERROR(PRI_S_SRP ": unable to create srpl_connection", instance->instance_name); 4069 return; 4070 } 4071 srpl_connection_next_state(instance->connection, srpl_state_idle); 4072 } 4073 4074 // Trigger a reconnect if appropriate 4075 if (!instance->is_me && instance->domain != NULL && 4076 (instance->domain->srpl_opstate == SRPL_OPSTATE_STARTUP || instance->domain->partner_id > instance->partner_id)) 4077 { 4078 // We might be in some disconnected state other than idle, so first move to idle if that's the case. 4079 if (instance->connection->state != srpl_state_idle) { 4080 srpl_connection_next_state(instance->connection, srpl_state_idle); 4081 } 4082 srpl_event_initialize(&event, srpl_event_reconnect_timer_expiry); 4083 srpl_event_deliver(instance->connection, &event); 4084 } else { 4085 ERROR(PRI_S_SRP ": reconnect requested but not appropriate: is_me = " PUB_S_SRP 4086 ", domain = %p, opstate = %d, ddid %" PRIx64 ", idid %" PRIx64, 4087 instance->instance_name, instance->is_me ? "true" : "false", instance->domain, 4088 instance->domain == NULL ? -1 : instance->domain->srpl_opstate, 4089 instance->domain == NULL ? 0 : instance->domain->partner_id, instance->partner_id); 4090 } 4091 } 4092 4093 static void 4094 srpl_instance_reconnect_callback(void *context) 4095 { 4096 srpl_instance_reconnect(context); 4097 } 4098 4099 static srpl_state_t 4100 srpl_connection_drop_state_delay(srpl_instance_t *instance, srpl_connection_t *srpl_connection, int delay) 4101 { 4102 // Schedule a reconnect. 4103 if (instance->reconnect_timeout == NULL) { 4104 instance->reconnect_timeout = ioloop_wakeup_create(); 4105 } 4106 if (instance->reconnect_timeout == NULL) { 4107 FAULT(PRI_S_SRP "disconnecting, but can't reconnect!", srpl_connection->name); 4108 } else { 4109 RETAIN_HERE(instance, srpl_instance); // for the timeout 4110 ioloop_add_wake_event(instance->reconnect_timeout, instance, 4111 srpl_instance_reconnect_callback, srpl_instance_context_release, delay * MSEC_PER_SEC); 4112 } 4113 4114 if (srpl_connection == instance->connection && srpl_connection->is_server) { 4115 srpl_connection->retry_delay = delay; 4116 return srpl_state_retry_delay_send; 4117 } else { 4118 return srpl_state_disconnect; 4119 } 4120 } 4121 4122 static srpl_state_t 4123 srpl_connection_drop_state(srpl_instance_t *instance, srpl_connection_t *srpl_connection) 4124 { 4125 if (instance == NULL) { 4126 return srpl_state_disconnect; 4127 } else if (instance->unmatched) { 4128 if (instance->connection == srpl_connection) { 4129 RELEASE_HERE(instance->connection, srpl_connection); 4130 instance->connection = NULL; 4131 } 4132 return srpl_state_disconnect; 4133 } else { 4134 return srpl_connection_drop_state_delay(instance, srpl_connection, 300); 4135 } 4136 } 4137 4138 // Call when there's a protocol error, so that we don't start reconnecting over and over. 4139 static void 4140 srpl_disconnect(srpl_connection_t *srpl_connection) 4141 { 4142 const int delay = 300; // five minutes 4143 srpl_instance_t *instance = srpl_connection->instance; 4144 if (instance != NULL && srpl_connection->connection != NULL) { 4145 srpl_state_t state = srpl_connection_drop_state_delay(instance, srpl_connection, delay); 4146 if (state == srpl_state_retry_delay_send) { 4147 srpl_retry_delay_send(srpl_connection, delay); 4148 } 4149 } 4150 srpl_connection_discontinue(srpl_connection); 4151 } 4152 4153 static void 4154 srpl_connection_state_timeout(void *context) 4155 { 4156 srpl_connection_t *srpl_connection = context; 4157 srpl_instance_t *instance = srpl_connection->instance; 4158 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 4159 4160 // Connection might have been discontinued before we came back. 4161 if (instance == NULL) { 4162 return; 4163 } 4164 4165 // If the srpl connection has been in the current state for timeout and not received any 4166 // event to get out of the current state, we assume the peer is gone or unavailable and 4167 // can exclude this instance when we make a decision to enter the routine state. 4168 instance->sync_fail = true; 4169 INFO("connection for instance " PRI_S_SRP " timed out in state " PUB_S_SRP, 4170 instance->instance_name, srpl_connection->state_name); 4171 if (instance != NULL && instance->sync_to_join && 4172 domain != NULL && domain->srpl_opstate != SRPL_OPSTATE_ROUTINE) 4173 { 4174 instance->sync_to_join = false; 4175 srpl_maybe_sync_or_transition(domain); 4176 } 4177 } 4178 4179 static void 4180 srpl_connection_schedule_state_timeout(srpl_connection_t *srpl_connection, uint32_t when) 4181 { 4182 // Create a state timer on the srpl_connection_t 4183 if (srpl_connection->state_timeout == NULL) { 4184 srpl_connection->state_timeout = ioloop_wakeup_create(); 4185 if (srpl_connection->state_timeout == NULL) { 4186 ERROR("no memory for state_timeout for service instance " PRI_S_SRP, srpl_connection->name); 4187 return; 4188 } 4189 } else { 4190 ioloop_cancel_wake_event(srpl_connection->state_timeout); 4191 } 4192 ioloop_add_wake_event(srpl_connection->state_timeout, srpl_connection, srpl_connection_state_timeout, 4193 srpl_connection_context_release, when); 4194 RETAIN_HERE(srpl_connection, srpl_connection); // the timer has a reference. 4195 return; 4196 } 4197 4198 // We arrive at the disconnected state when there is no connection to make, or no need to make a connection. 4199 // This state takes no action, but waits for events. If we get an add event and we don't have a viable incoming 4200 // connection, we go to the next_address_get event. 4201 static srpl_state_t 4202 srpl_disconnected_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4203 { 4204 STATE_ANNOUNCE(srpl_connection, event); 4205 4206 if (event == NULL) { 4207 srpl_connection_schedule_state_timeout(srpl_connection, SRPL_STATE_TIMEOUT); 4208 return srpl_state_invalid; 4209 } else if (event->event_type == srpl_event_address_add) { 4210 ioloop_cancel_wake_event(srpl_connection->state_timeout); 4211 return srpl_state_next_address_get; 4212 } else { 4213 UNEXPECTED_EVENT(srpl_connection, event); 4214 } 4215 } 4216 4217 static void 4218 srpl_instance_address_query_reset(srpl_instance_t *instance) 4219 { 4220 for (srpl_instance_service_t *service = instance->services; service != NULL; service = service->next) { 4221 address_query_t *address_query = service->address_query; 4222 if (address_query != NULL && address_query->num_addresses > 0) { 4223 address_query->cur_address = -1; 4224 } 4225 } 4226 } 4227 4228 // This state takes the action of looking for an address to try. This can have three outcomes: 4229 // 4230 // * No addresses available: go to the disconnected state 4231 // * End of address list: go to the reconnect_wait state 4232 // * Address found: got to the connect state 4233 4234 static srpl_state_t 4235 srpl_next_address_get_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4236 { 4237 address_query_t *address_query = NULL; 4238 srpl_instance_t *instance; 4239 srpl_instance_service_t *service; 4240 bool no_address = true; 4241 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 4242 REQUIRE_SRPL_INSTANCE(srpl_connection); 4243 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 4244 4245 instance = srpl_connection->instance; 4246 // Get the next address 4247 // Return an event, one of "next address", "end of address list" or "no addresses" 4248 for (service = instance->services; service != NULL; service = service->next) { 4249 address_query = service->address_query; 4250 if (address_query == NULL || address_query->num_addresses == 0) { 4251 continue; 4252 } else { 4253 no_address = false; 4254 if (address_query->cur_address == address_query->num_addresses || 4255 ++address_query->cur_address == address_query->num_addresses) 4256 { 4257 continue; 4258 } else { 4259 memcpy(&srpl_connection->connected_address, 4260 &address_query->addresses[address_query->cur_address], sizeof(addr_t)); 4261 if (srpl_connection->connected_address.sa.sa_family == AF_INET) { 4262 srpl_connection->connected_address.sin.sin_port = htons(service->outgoing_port); 4263 } else { 4264 srpl_connection->connected_address.sin6.sin6_port = htons(service->outgoing_port); 4265 } 4266 return srpl_state_connect; 4267 } 4268 } 4269 } 4270 4271 if (no_address) { 4272 return srpl_state_disconnected; 4273 } else { 4274 srpl_instance_address_query_reset(instance); 4275 return srpl_state_reconnect_wait; 4276 } 4277 } 4278 4279 // This state takes the action of connecting to the connection's current address, which is expected to have 4280 // been set. This can have two outcomes: 4281 // 4282 // * The connect attempt fails immediately: go to the next_address_get state 4283 // * The connection attempt is in progress: go to the connecting state 4284 static srpl_state_t 4285 srpl_connect_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4286 { 4287 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 4288 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 4289 REQUIRE_SRPL_INSTANCE(srpl_connection); 4290 4291 // Connect to the address from the event. 4292 if (!srpl_connection_connect(srpl_connection)) { 4293 return srpl_state_next_address_get; 4294 } else { 4295 return srpl_state_connecting; 4296 } 4297 } 4298 4299 // We reach this state when we are disconnected and don't need to reconnect because we have an active server 4300 // connection. If we get a server disconnect here, then we go to the next_address_get state. 4301 static srpl_state_t 4302 srpl_idle_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4303 { 4304 REQUIRE_SRPL_INSTANCE(srpl_connection); 4305 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 4306 if (event == NULL) { 4307 srpl_connection_schedule_state_timeout(srpl_connection, SRPL_STATE_TIMEOUT); 4308 return srpl_state_invalid; 4309 } else if (event->event_type == srpl_event_server_disconnect || 4310 event->event_type == srpl_event_reconnect_timer_expiry) 4311 { 4312 ioloop_cancel_wake_event(srpl_connection->state_timeout); 4313 INFO(PRI_S_SRP ": event " PUB_S_SRP " received in state " PUB_S_SRP, 4314 srpl_connection->name, event->name, srpl_connection->state_name); 4315 return srpl_state_next_address_get; 4316 } else { 4317 // We don't log unhandled events in the idle state because it creates a lot of noise. 4318 return srpl_state_invalid; 4319 } 4320 } 4321 4322 static void 4323 srpl_maybe_propose_new_dataset_id(srpl_domain_t *domain) 4324 { 4325 if (domain->have_dataset_id) { 4326 for(srpl_instance_t *instance = domain->instances; instance != NULL; instance = instance->next) 4327 { 4328 // as long as there's one instance of the proposed dataset id that has not failed 4329 // to sync, we are going to wait 4330 if (!instance->sync_fail && 4331 instance->dataset_id == domain->dataset_id) 4332 { 4333 INFO("instance " PRI_S_SRP " has matched dataset_id %" PRIx64, 4334 instance->instance_name, instance->dataset_id); 4335 return; 4336 } 4337 } 4338 } 4339 domain->have_dataset_id = srpl_instances_max_dataset_id(domain, &domain->dataset_id); 4340 INFO(PRI_S_SRP "propose a new dataset id %" PRIx64, domain->have_dataset_id? "": "fail to ", domain->dataset_id); 4341 } 4342 4343 // We've received a timeout event on the reconnect timer. Generate a reconnect_timeout event and send it to the 4344 // connection. 4345 static void 4346 srpl_connection_reconnect_timeout(void *context) 4347 { 4348 srpl_connection_t *srpl_connection = context; 4349 srpl_instance_t *instance = srpl_connection->instance; 4350 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 4351 4352 INFO("reconnect timeout on " PRI_S_SRP, srpl_connection->name); 4353 srpl_event_t event; 4354 srpl_event_initialize(&event, srpl_event_reconnect_timer_expiry); 4355 srpl_event_deliver(srpl_connection, &event); 4356 // If we have tried to connect to all the addresses but failed, we assume the peer is 4357 // gone. We no longer need to synchronize with this peer and if this was an obstacle 4358 // to enter the routine state, we should recheck again. 4359 instance->sync_fail = true; 4360 INFO("fail to sync with instance " PRI_S_SRP, instance->instance_name); 4361 if (instance != NULL && instance->sync_to_join && 4362 domain != NULL && domain->srpl_opstate != SRPL_OPSTATE_ROUTINE) 4363 { 4364 instance->sync_to_join = false; 4365 srpl_maybe_sync_or_transition(domain); 4366 } 4367 } 4368 4369 static srpl_state_t 4370 srpl_connection_schedule_reconnect_event(srpl_connection_t *srpl_connection, uint32_t when) 4371 { 4372 // Create a reconnect timer on the srpl_connection_t 4373 if (srpl_connection->reconnect_wakeup == NULL) { 4374 srpl_connection->reconnect_wakeup = ioloop_wakeup_create(); 4375 if (srpl_connection->reconnect_wakeup == NULL) { 4376 ERROR("no memory for reconnect_wakeup for service instance " PRI_S_SRP, srpl_connection->name); 4377 return srpl_state_invalid; 4378 } 4379 } else { 4380 ioloop_cancel_wake_event(srpl_connection->reconnect_wakeup); 4381 } 4382 ioloop_add_wake_event(srpl_connection->reconnect_wakeup, srpl_connection, srpl_connection_reconnect_timeout, 4383 srpl_connection_context_release, when); 4384 RETAIN_HERE(srpl_connection, srpl_connection); // the timer has a reference. 4385 return srpl_state_invalid; 4386 } 4387 4388 // We reach the set_reconnect_timer state when we have tried to connect to all the known addresses. Once we have set a 4389 // timer, we wait for events. If we get a reconnect_timeout event, we go to the next_address_get state. If we get an 4390 // add_adress event, we cancel the retransmit timer and go to the next_address_get state. 4391 static srpl_state_t 4392 srpl_reconnect_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4393 { 4394 REQUIRE_SRPL_INSTANCE(srpl_connection); 4395 STATE_ANNOUNCE(srpl_connection, event); 4396 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 4397 4398 if (event == NULL) { 4399 return srpl_connection_schedule_reconnect_event(srpl_connection, 60 * 1000); 4400 } 4401 if (event->event_type == srpl_event_reconnect_timer_expiry) { 4402 // if it's not our job to reconnect, we move into idle. 4403 if (domain->srpl_opstate != SRPL_OPSTATE_STARTUP && 4404 domain->partner_id < srpl_connection->instance->partner_id) 4405 { 4406 return srpl_state_idle; 4407 } 4408 return srpl_state_next_address_get; 4409 } else if (event->event_type == srpl_event_address_add) { 4410 ioloop_cancel_wake_event(srpl_connection->reconnect_wakeup); 4411 // if it's not our job to reconnect, we move into idle. 4412 if (domain->srpl_opstate != SRPL_OPSTATE_STARTUP && 4413 domain->partner_id < srpl_connection->instance->partner_id) 4414 { 4415 return srpl_state_idle; 4416 } 4417 return srpl_state_next_address_get; 4418 } 4419 UNEXPECTED_EVENT(srpl_connection, event); 4420 } 4421 4422 // We get to this state when the remote end has sent something bogus; in this case we send a retry_delay message to 4423 // tell the client not to reconnect for a while. 4424 static srpl_state_t 4425 srpl_retry_delay_send_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4426 { 4427 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 4428 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 4429 4430 srpl_retry_delay_send(srpl_connection, srpl_connection->retry_delay); 4431 return srpl_state_disconnect; 4432 } 4433 4434 // We go to the disconnect state when the connection needs to be dropped either because we lost the session ID 4435 // coin toss or something's gone wrong. In either case, we do not attempt to reconnect--we either go to the idle state 4436 // or the disconnect_wait state, depending on whether or not the connection has already been closed. 4437 static srpl_state_t 4438 srpl_disconnect_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4439 { 4440 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 4441 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 4442 4443 // Any ongoing state needs to be discarded. 4444 srpl_connection_reset(srpl_connection); 4445 4446 // Disconnect the srpl_connection_t 4447 if (srpl_connection->connection == NULL) { 4448 return srpl_state_idle; 4449 } 4450 srpl_trigger_disconnect(srpl_connection); 4451 return srpl_state_disconnect_wait; 4452 } 4453 4454 // We enter disconnect_wait when we are waiting for a disconnect event after cancelling a connection. 4455 // There is no action for this event. The only event we are interested in is the disconnect event. 4456 static srpl_state_t 4457 srpl_disconnect_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4458 { 4459 STATE_ANNOUNCE(srpl_connection, event); 4460 if (event == NULL) { 4461 return srpl_state_invalid; 4462 } else if (event->event_type == srpl_event_disconnected) { 4463 return srpl_state_idle; 4464 } else { 4465 UNEXPECTED_EVENT_NO_ERROR(srpl_connection, event); 4466 } 4467 return srpl_state_invalid; 4468 } 4469 4470 // We enter the connecting state when we've attempted a connection to some address. 4471 // This state has no action. If a connected event is received, we move to the connected state. 4472 // If a disconnected event is received, we move to the next_address_get state. 4473 static srpl_state_t 4474 srpl_connecting_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4475 { 4476 STATE_ANNOUNCE(srpl_connection, event); 4477 if (event == NULL) { 4478 srpl_connection_schedule_state_timeout(srpl_connection, SRPL_STATE_TIMEOUT); 4479 return srpl_state_invalid; 4480 } else if (event->event_type == srpl_event_disconnected) { 4481 ioloop_cancel_wake_event(srpl_connection->state_timeout); 4482 // We tried to connect and the connection failed. This may mean that the information we see in the _srpl-tls.tcp 4483 // advertisement is wrong, or that the address records are wrong. Reconfirm the records. 4484 srpl_reconfirm(srpl_connection); 4485 return srpl_state_next_address_get; 4486 } else if (event->event_type == srpl_event_connected) { 4487 ioloop_cancel_wake_event(srpl_connection->state_timeout); 4488 return srpl_state_session_send; 4489 } else { 4490 UNEXPECTED_EVENT_NO_ERROR(srpl_connection, event); 4491 } 4492 return srpl_state_invalid; 4493 } 4494 4495 static void 4496 srpl_sync_wait_check(void *context) 4497 { 4498 srpl_connection_t *srpl_connection = context; 4499 4500 if (srpl_connection->instance == NULL) { 4501 FAULT("srpl_connection->instance shouldn't ever be NULL here, but it is."); 4502 return; 4503 } 4504 if (srpl_connection->instance->domain == NULL) { 4505 FAULT("srpl_connection->instance->domain shouldn't ever be NULL here, but it is."); 4506 return; 4507 } 4508 if (!srpl_connection->instance->domain->partner_discovery_pending) { 4509 srpl_maybe_sync_or_transition(srpl_connection->instance->domain); 4510 } 4511 } 4512 4513 static srpl_state_t 4514 srpl_sync_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4515 { 4516 STATE_ANNOUNCE(srpl_connection, event); 4517 if (event == NULL) { 4518 // This will trigger a do_sync event if we should synchronize at this point. 4519 ioloop_run_async(srpl_sync_wait_check, srpl_connection); 4520 return srpl_state_invalid; 4521 } else if (event->event_type == srpl_event_do_sync) { 4522 // When starting to sync, we reset the keepalive_interval so that we can detect 4523 // the problem sooner during synchronization. 4524 srpl_connection->keepalive_interval = DEFAULT_KEEPALIVE_WAKEUP_EXPIRY / 2; 4525 return srpl_state_send_candidates_send; 4526 } else { 4527 UNEXPECTED_EVENT_NO_ERROR(srpl_connection, event); 4528 } 4529 return srpl_state_invalid; 4530 } 4531 4532 // This state sends a SRPL session message and then goes to session_response_wait, unless the send failed, in which 4533 // case it goes to the disconnect state. 4534 static srpl_state_t 4535 srpl_session_send_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4536 { 4537 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 4538 REQUIRE_SRPL_INSTANCE(srpl_connection); 4539 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 4540 4541 // Send a session message 4542 // Now we say hello. 4543 if (!srpl_session_message_send(srpl_connection, false)) { 4544 return srpl_state_disconnect; 4545 } 4546 return srpl_state_session_response_wait; 4547 } 4548 4549 // This state waits for a session response with the remote partner ID and whether 4550 // the remote partner is in startup state. 4551 // When the response arrives, it goes to the send_candidates_send state. 4552 static srpl_state_t 4553 srpl_session_response_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4554 { 4555 STATE_ANNOUNCE(srpl_connection, event); 4556 if (event == NULL) { 4557 return srpl_state_invalid; 4558 } else if (event->event_type == srpl_event_session_response_received) { 4559 // we are connecting and have sent the peer a session message. if the received session 4560 // response shows that the peer has a lower version number, we don't have to sync with 4561 // this peer to join the replication. we also note version_mismatch on the corresponding 4562 // instance and move to the idle state. when we move to the routine state later, we'll 4563 // pick a dataset id so that the sequence number of our anycast service would win over 4564 // the low-version peers. 4565 if (event->content.session.remote_version < SRPL_CURRENT_VERSION) { // version mismatch 4566 INFO("instance " PRI_S_SRP " has a lower version number", srpl_connection->instance->instance_name); 4567 srpl_connection->instance->version_mismatch = true; 4568 srpl_connection->instance->sync_to_join = false; 4569 return srpl_state_idle; 4570 } 4571 4572 srpl_connection->remote_partner_id = event->content.session.partner_id; 4573 srpl_connection->new_partner = event->content.session.new_partner; 4574 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 4575 // if we are already in the routine state, we can directly move forward 4576 // with sync; otherwise we put the srpl connection in the sync_wait state 4577 // where we check the number of active srp servers to decide if we should 4578 // continue sync at this point. 4579 if (domain->srpl_opstate == SRPL_OPSTATE_ROUTINE) { 4580 return srpl_state_send_candidates_send; 4581 } 4582 return srpl_state_sync_wait; 4583 } else { 4584 UNEXPECTED_EVENT(srpl_connection, event); 4585 } 4586 return srpl_state_invalid; 4587 } 4588 4589 // When evaluating the incoming session, we've decided to continue (called by srpl_session_evaluate_action). 4590 static srpl_state_t 4591 srpl_evaluate_incoming_continue(srpl_connection_t *srpl_connection) 4592 { 4593 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 4594 4595 if (srpl_connection->new_partner) { 4596 INFO(PRI_S_SRP " connecting partner is in startup state", srpl_connection->name); 4597 } else { 4598 INFO(PRI_S_SRP ": my partner id %" PRIx64 " < connecting partner id %" PRIx64, 4599 srpl_connection->name, domain->partner_id, srpl_connection->remote_partner_id); 4600 } 4601 if (srpl_connection->is_server) { 4602 return srpl_state_session_response_send; 4603 } else { 4604 return srpl_state_send_candidates_send; 4605 } 4606 } 4607 4608 // When evaluating the incoming ID, we've decided to disconnect (called by srpl_session_evaluate_action). 4609 static srpl_state_t 4610 srpl_evaluate_incoming_disconnect(srpl_connection_t *srpl_connection, bool bad) 4611 { 4612 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 4613 4614 if (domain->srpl_opstate != SRPL_OPSTATE_ROUTINE) { 4615 INFO(PRI_S_SRP ": not in the routine state yet in domain " PRI_S_SRP, 4616 srpl_connection->name, domain->name); 4617 } else { 4618 INFO(PRI_S_SRP ": my partner id %" PRIx64 " > connectiong partner id %" PRIx64, 4619 srpl_connection->name, domain->partner_id, srpl_connection->remote_partner_id); 4620 } 4621 if (srpl_connection->instance->is_me) { 4622 return srpl_evaluate_incoming_continue(srpl_connection); 4623 } else { 4624 if (bad) { 4625 // bad is set if the server send back the same ID we sent, which means it's misbehaving. 4626 return srpl_connection_drop_state(srpl_connection->instance, srpl_connection); 4627 } 4628 return srpl_state_disconnect; 4629 } 4630 } 4631 4632 // This state's action is to evaluate if the partner should accept the connection. 4633 // The receiving partner accepts the connection if the connecting partner is in the 4634 // "startup" state (flaged as new partner), or the receiving partner's ID is smaller 4635 // than the connecting partner's ID. Otherwise, the receiving partner disconnects. 4636 static srpl_state_t 4637 srpl_session_evaluate_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4638 { 4639 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 4640 REQUIRE_SRPL_INSTANCE(srpl_connection); 4641 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 4642 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 4643 4644 // The receiving partner must be in routine state to accept the connection. 4645 // Recceiving connection in startup state should not happen, but add a guard 4646 // here anyway to protect against such situation. 4647 if (domain->srpl_opstate == SRPL_OPSTATE_ROUTINE && (srpl_connection->new_partner || 4648 domain->partner_id < srpl_connection->remote_partner_id)) 4649 { 4650 return srpl_evaluate_incoming_continue(srpl_connection); 4651 } else { 4652 return srpl_evaluate_incoming_disconnect(srpl_connection, false); 4653 } 4654 } 4655 4656 // This state's action is to send the "send candidates" message, and then go to the send_candidates_wait state. 4657 static srpl_state_t 4658 srpl_send_candidates_send_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4659 { 4660 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 4661 REQUIRE_SRPL_INSTANCE(srpl_connection); 4662 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 4663 4664 // Send "send candidates" message 4665 // Return no event 4666 srpl_send_candidates_message_send(srpl_connection, false); 4667 return srpl_state_send_candidates_wait; 4668 } 4669 4670 static bool 4671 srpl_can_transition_to_routine_state(srpl_domain_t *domain) 4672 { 4673 if (domain == NULL) { 4674 INFO("returning false because there's no domain"); 4675 return false; 4676 } 4677 4678 // We only transition to routine state after discovery is completed, as 4679 // we need to sync with all the partners discovered during the discovery 4680 // window to join the replication 4681 if (domain->partner_discovery_pending) { 4682 INFO("returning false because partner discovery is still pending"); 4683 return false; 4684 } 4685 4686 if (domain->srpl_opstate == SRPL_OPSTATE_ROUTINE) { 4687 INFO("returning false because we are already in routine state"); 4688 return false; 4689 } 4690 4691 if (domain->have_dataset_id) { 4692 for (srpl_instance_t *instance = domain->instances; instance != NULL; instance = instance->next) { 4693 // We skip checking the instance if the instance 4694 // 1. is to myself (this could happen if I restarts and receives a stale advertisement from myself). 4695 // 2. is not discovered during the discovery_timeout, or 4696 // 3. the dataset id is not what we are looking for, or 4697 // 4. is discontinuing, or 4698 // 5. has lower version 4699 if (instance->is_me || !instance->sync_to_join || 4700 instance->dataset_id != domain->dataset_id || 4701 instance->discontinuing || instance->version_mismatch) 4702 { 4703 INFO("instance " PUB_S_SRP ": is_me (" PUB_S_SRP ") or sync_to_join (" PUB_S_SRP 4704 ") or discontinuing (" PUB_S_SRP ") or version_mismatch (" PUB_S_SRP ")", 4705 instance->instance_name, instance->is_me ? "true" : "false", instance->sync_to_join ? "true" : "false", 4706 instance->discontinuing ? "true" : "false", instance->version_mismatch ? "true" : "false"); 4707 continue; 4708 } 4709 // Instance is a valid partner that we should sync with to possibly move to the routine state 4710 if (instance->connection == NULL || 4711 !instance->connection->database_synchronized) 4712 { 4713 INFO("synchronization on " PRI_S_SRP " with partner_id %" PRIx64 " is not ready (%p " PUB_S_SRP ").", 4714 instance->instance_name, instance->partner_id, instance->connection, 4715 (instance->connection == NULL ? "null" : 4716 instance->connection->database_synchronized ? "true" : "false")); 4717 return false; 4718 } 4719 } 4720 } 4721 INFO("ready"); 4722 return true; 4723 } 4724 4725 static void 4726 srpl_store_dataset_id(srpl_domain_t *domain) 4727 { 4728 uint64_t dataset_id = domain->dataset_id; 4729 uint8_t msb; 4730 OSStatus err; 4731 4732 // read out the stored msb of the dataset id. increment the msb and generate the dataset id. 4733 const CFStringRef app_id = CFSTR("com.apple.srp-mdns-proxy.preferences"); 4734 const CFStringRef key = CFStringCreateWithFormat(kCFAllocatorDefault, NULL, CFSTR("dataset-id-msb-%s"), domain->name); 4735 4736 if (key) { 4737 msb = (dataset_id & 0xFF00000000000000) >> 56; 4738 err = CFPrefs_SetInt64(app_id, key, msb); 4739 4740 if (err) { 4741 ERROR("Unable to store the msb of the dataset id in preferences."); 4742 } 4743 INFO("store msb %d.", msb); 4744 CFRelease(key); 4745 } else { 4746 ERROR("unable to create key for domain " PRI_S_SRP, domain->name); 4747 } 4748 } 4749 4750 4751 // SRPL partners MUST persist the highest (most significant byte or MSB) of the dataset ID. 4752 // When generating a new dataset ID, the partner MUST increment the MSB of last used dataset 4753 // ID to use as MSB of new dataset ID and populate the lower 56 bits randomly. If there is no 4754 // previously saved ID, then the partner randomly generates the entire 64-bit ID. 4755 static uint64_t 4756 srpl_generate_store_dataset_id(srpl_domain_t *domain) 4757 { 4758 uint64_t dataset_id; 4759 uint8_t msb; 4760 OSStatus err; 4761 4762 // read out the stored msb of the dataset id. increment the msb and generate the dataset id. 4763 const CFStringRef app_id = CFSTR("com.apple.srp-mdns-proxy.preferences"); 4764 const CFStringRef key = CFStringCreateWithFormat(kCFAllocatorDefault, NULL, CFSTR("dataset-id-msb-%s"), domain->name); 4765 4766 if (key) { 4767 msb = (uint8_t)CFPrefs_GetInt64(app_id, key, &err); 4768 if (err) { 4769 INFO("fail to fetch msb, generate random dataset id."); 4770 dataset_id = srp_random64(); 4771 } else { 4772 dataset_id = (((uint64_t)msb+1) << 56) | (srp_random64() & LOWER56_BIT_MASK); 4773 } 4774 // store the most significant byte (msb) of the generated dataset id 4775 msb = (dataset_id & 0xFF00000000000000) >> 56; 4776 err = CFPrefs_SetInt64(app_id, key, msb); 4777 4778 if (err) { 4779 ERROR("Unable to store the msb of the dataset id in preferences."); 4780 } 4781 CFRelease(key); 4782 } else { 4783 ERROR("unable to create key for domain " PRI_S_SRP, domain->name); 4784 dataset_id = srp_random64(); 4785 } 4786 4787 return dataset_id; 4788 } 4789 4790 static void 4791 srpl_transition_to_routine_state(srpl_domain_t *domain) 4792 { 4793 domain->srpl_opstate = SRPL_OPSTATE_ROUTINE; 4794 INFO("transitions to routine state in domain " PRI_S_SRP, domain->name); 4795 // If the partner does not discover any other partners advertising 4796 // the same domain in the "startup" state, it generates a new dataset 4797 // ID when entering the "routine operation" state. 4798 // When generating a new dataset ID, the partner MUST increment the MSB of 4799 // last used dataset ID to use as MSB of new dataset ID and populate the 4800 // lower 56 bits randomly using a random number generator. If there is no 4801 // previously saved ID, then the partner randomly generates the entire 64-bit ID. 4802 if (!domain->have_dataset_id) { 4803 domain->dataset_id = srpl_generate_store_dataset_id(domain); 4804 domain->have_dataset_id = true; 4805 domain->dataset_id_committed = true; 4806 INFO("generate new dataset id %" PRIx64 " for domain " PRI_S_SRP, 4807 domain->dataset_id, domain->name); 4808 } else { 4809 // if there are instances that have lower version number but the MSB of their dataset_id 4810 // is larger than ours, we'll increment the MSB of the instance dataset id and use it 4811 // as our dataset id. this is to guarantee that the current version has a higher sequence 4812 // number so that its service will be preferred. 4813 for (srpl_instance_t *instance = domain->instances; instance != NULL; instance = instance->next) { 4814 if (instance->version_mismatch && 4815 srpl_dataset_id_compare(instance->dataset_id & 0xFF00000000000000, 4816 domain->dataset_id & 0xFF00000000000000) >= 0) 4817 { 4818 INFO("increment the MSB of the dataset_id %" PRIx64 " of instance " PRI_S_SRP, 4819 instance->dataset_id, instance->instance_name); 4820 domain->dataset_id = instance->dataset_id + 0x0100000000000000; 4821 } 4822 } 4823 } 4824 #if STUB_ROUTER 4825 srp_server_t *server_state = domain->server_state; 4826 // Advertise the SRPL service in the "routine" state. 4827 srpl_domain_advertise(domain); 4828 if (!strcmp(domain->name, server_state->current_thread_domain_name)) { 4829 route_state_t *route_state = server_state->route_state; 4830 if (route_state != NULL) { 4831 route_state->thread_sequence_number = (domain->dataset_id & 0xFF00000000000000) >> 56; 4832 INFO("thread sequence number 0x%02x", route_state->thread_sequence_number); 4833 route_state->partition_can_advertise_anycast_service = true; 4834 partition_maybe_advertise_anycast_service(route_state); 4835 } 4836 } 4837 #endif 4838 } 4839 4840 static bool 4841 srpl_keep_current_dataset_id(srpl_domain_t *domain, srpl_instance_t *instance) 4842 { 4843 bool keep = true; 4844 bool stored = domain->dataset_id_committed; 4845 if (instance->have_dataset_id) { 4846 uint64_t new = instance->dataset_id; 4847 int compare = srpl_dataset_id_compare(new, domain->dataset_id); 4848 if (compare == 0) { 4849 domain->dataset_id_committed = true; 4850 keep = true; 4851 INFO("keep and commit dataset_id %" PRIx64, domain->dataset_id); 4852 } else if (compare > 0) { 4853 INFO("abandon dataset_id %" PRIx64 " and commit preferred %" PRIx64, domain->dataset_id, new); 4854 domain->dataset_id = new; 4855 domain->dataset_id_committed = true; 4856 keep = false; 4857 } else { 4858 INFO("non-preferred dataset id %" PRIx64, instance->dataset_id); 4859 } 4860 } 4861 // we store the msb of dataset id if we haven't done so for current 4862 // committed dataset id or the committed dataset id has changed. 4863 if ((!stored || !keep) && domain->dataset_id_committed) { 4864 srpl_store_dataset_id(domain); 4865 } 4866 return keep; 4867 } 4868 4869 static void 4870 srpl_state_transition_by_dataset_id(srpl_domain_t *domain, srpl_instance_t *instance) 4871 { 4872 if (!srpl_keep_current_dataset_id(domain, instance)) { 4873 // DNS-SD SRP Replication Spec: if at any time (regardless of "startup" or "routine 4874 // operation" state) an SRPL partner discovers that it is synchronizing with a 4875 // non-preferred dataset ID, it MUST abandon that dataset, re-enter the "startup" 4876 // state, and attempt to synchronize with the (newly discovered) preferred dataset id. 4877 INFO("more preferred dataset id %" PRIx64 ", reenter startup state", domain->dataset_id); 4878 srpl_abandon_nonpreferred_dataset(domain); 4879 srpl_transition_to_startup_state(domain); 4880 } else { 4881 srpl_maybe_sync_or_transition(domain); 4882 } 4883 } 4884 4885 // Used by srpl_send_candidates_wait_action and srpl_host_wait_action 4886 static srpl_state_t 4887 srpl_send_candidates_wait_event_process(srpl_connection_t *srpl_connection, srpl_event_t *event) 4888 { 4889 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 4890 if (event->event_type == srpl_event_send_candidates_response_received) { 4891 if (srpl_connection->is_server) { 4892 srpl_connection->database_synchronized = true; 4893 srpl_instance_t *instance = srpl_connection->instance; 4894 instance->sync_fail = false; 4895 srpl_state_transition_by_dataset_id(domain, instance); 4896 return srpl_state_ready; 4897 } else { 4898 return srpl_state_send_candidates_message_wait; 4899 } 4900 } else if (event->event_type == srpl_event_candidate_received) { 4901 srpl_connection_candidate_set(srpl_connection, event->content.candidate); 4902 event->content.candidate = NULL; // steal! 4903 return srpl_state_candidate_check; 4904 } else { 4905 UNEXPECTED_EVENT(srpl_connection, event); 4906 } 4907 } 4908 4909 // We reach this state after having sent a "send candidates" message, so we can in principle get either a 4910 // "candidate" message or a "send candidates" response here, leading either to send_candidates check or one 4911 // of two states depending on whether this connection is an incoming or outgoing connection. Outgoing 4912 // connections send the "send candidates" message first, so when they get a "send candidates" reply, they 4913 // need to wait for a "send candidates" message from the remote. Incoming connections send the "send candidates" 4914 // message last, so when they get the "send candidates" reply, the database sync is done and it's time to 4915 // just deal with ongoing updates. In this case we go to the check_for_srp_client_updates state, which 4916 // looks to see if any updates came in from SRP clients while we were syncing the databases. 4917 static srpl_state_t 4918 srpl_send_candidates_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4919 { 4920 REQUIRE_SRPL_INSTANCE(srpl_connection); 4921 STATE_ANNOUNCE(srpl_connection, event); 4922 4923 if (event == NULL) { 4924 return srpl_state_invalid; // Wait for events. 4925 } 4926 return srpl_send_candidates_wait_event_process(srpl_connection, event); 4927 } 4928 4929 static srpl_candidate_disposition_t 4930 srpl_candidate_host_check(srpl_connection_t *srpl_connection, adv_host_t *host) 4931 { 4932 // Evaluate candidate 4933 // Return "host candidate wanted" or "host candidate not wanted" event 4934 if (host == NULL) { 4935 INFO("host is NULL, answer is yes."); 4936 return srpl_candidate_yes; 4937 } else { 4938 if (host->removed) { 4939 INFO("host is removed, answer is yes."); 4940 return srpl_candidate_yes; 4941 } else if (host->key_id != srpl_connection->candidate->key_id) { 4942 INFO("host key conflict (%x vs %x), answer is conflict.", host->key_id, srpl_connection->candidate->key_id); 4943 return srpl_candidate_conflict; 4944 } else { 4945 // We allow for a bit of jitter. Bear in mind that candidates only happen on startup, so 4946 // even if a previous run of the SRP server on this device was responsible for registering 4947 // the candidate, we don't have it, so we still need it. 4948 if (host->update_time - srpl_connection->candidate->update_time > SRPL_UPDATE_JITTER_WINDOW) { 4949 INFO("host update time %" PRId64 " candidate update time %" PRId64 ", answer is no.", 4950 (int64_t)host->update_time, (int64_t)srpl_connection->candidate->update_time); 4951 return srpl_candidate_no; 4952 } else { 4953 INFO("host update time %" PRId64 " candidate update time %" PRId64 ", answer is yes.", 4954 (int64_t)host->update_time, (int64_t)srpl_connection->candidate->update_time); 4955 return srpl_candidate_yes; 4956 } 4957 } 4958 } 4959 } 4960 4961 // We enter this state after we've received a "candidate" message, and check to see if we want the host the candidate 4962 // represents. We then send an appropriate response. 4963 static srpl_state_t 4964 srpl_candidate_check_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4965 { 4966 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 4967 REQUIRE_SRPL_INSTANCE(srpl_connection); 4968 STATE_ANNOUNCE_NO_EVENTS_NAME(srpl_connection, srpl_connection->candidate->name); 4969 4970 adv_host_t *host = srp_adv_host_copy(srpl_connection->instance->domain->server_state, 4971 srpl_connection->candidate->name); 4972 srpl_candidate_disposition_t disposition = srpl_candidate_host_check(srpl_connection, host); 4973 if (host != NULL) { 4974 srp_adv_host_release(host); 4975 } 4976 switch(disposition) { 4977 case srpl_candidate_yes: 4978 srpl_candidate_response_send(srpl_connection, kDSOType_SRPLCandidateYes); 4979 return srpl_state_candidate_host_wait; 4980 case srpl_candidate_no: 4981 srpl_candidate_response_send(srpl_connection, kDSOType_SRPLCandidateNo); 4982 return srpl_state_send_candidates_wait; 4983 case srpl_candidate_conflict: 4984 srpl_candidate_response_send(srpl_connection, kDSOType_SRPLConflict); 4985 return srpl_state_send_candidates_wait; 4986 } 4987 return srpl_state_invalid; 4988 } 4989 4990 // In candidate_host_send_wait, we take no action and wait for events. We're hoping for a "host" message, leading to 4991 // candidate_host_prepare. We could also receive a "candidate" message, leading to candidate_received, or a "send 4992 // candidates" reply, leading to candidate_reply_received. 4993 4994 static srpl_state_t 4995 srpl_candidate_host_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 4996 { 4997 REQUIRE_SRPL_INSTANCE(srpl_connection); 4998 STATE_ANNOUNCE(srpl_connection, event); 4999 5000 if (event == NULL) { 5001 return srpl_state_invalid; // Wait for events. 5002 } else if (event->event_type == srpl_event_host_message_received) { 5003 // Copy the update information, retain what's refcounted, and free what's not on the event. 5004 srpl_host_update_steal_parts(&srpl_connection->stashed_host, &event->content.host_update); 5005 return srpl_state_candidate_host_prepare; 5006 } else { 5007 return srpl_send_candidates_wait_event_process(srpl_connection, event); 5008 } 5009 } 5010 5011 // Here we want to see if we can do an immediate update; if so, we go to candidate_host_re_evaluate; otherwise 5012 // we go to candidate_host_contention_wait 5013 static srpl_state_t 5014 srpl_candidate_host_prepare_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5015 { 5016 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5017 REQUIRE_SRPL_INSTANCE(srpl_connection); 5018 STATE_ANNOUNCE_NO_EVENTS_NAME(srpl_connection, srpl_connection->candidate->name); 5019 5020 // Apply the host from the event to the current host list 5021 // Return no event 5022 adv_host_t *host = srp_adv_host_copy(srpl_connection->instance->domain->server_state, 5023 srpl_connection->candidate->name); 5024 if (host == NULL) { 5025 // If we don't have this host, we can apply the update immediately. 5026 return srpl_state_candidate_host_apply; 5027 } 5028 if (host->srpl_connection != NULL || host->update != NULL) { 5029 // We are processing an update from a different srpl server or a client. 5030 INFO(PRI_S_SRP ": host->srpl_connection = %p host->update=%p--going into contention", 5031 srpl_connection->name, host->srpl_connection, host->update); 5032 srp_adv_host_release(host); 5033 return srpl_state_candidate_host_contention_wait; 5034 } else { 5035 srpl_connection->candidate->host = host; 5036 return srpl_state_candidate_host_re_evaluate; 5037 } 5038 } 5039 5040 static adv_host_t * 5041 srpl_client_update_matches(dns_name_t *hostname, srpl_event_t *event) 5042 { 5043 adv_host_t *host = event->content.client_result.host; 5044 if (event->content.client_result.rcode == dns_rcode_noerror && dns_names_equal_text(hostname, host->name)) { 5045 INFO("returning host " PRI_S_SRP, host->name); 5046 return host; 5047 } 5048 char name[kDNSServiceMaxDomainName]; 5049 dns_name_print(hostname, name, sizeof(name)); 5050 INFO("returning NULL: rcode = " PUB_S_SRP " hostname = " PRI_S_SRP " host->name = " PRI_S_SRP, 5051 dns_rcode_name(event->content.client_result.rcode), name, host->name); 5052 return NULL; 5053 } 5054 5055 // and wait for a srp_client_update_finished event for the host, which 5056 // will trigger us to move to candidate_host_re_evaluate. 5057 static srpl_state_t 5058 srpl_candidate_host_contention_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5059 { 5060 REQUIRE_SRPL_INSTANCE(srpl_connection); 5061 STATE_ANNOUNCE(srpl_connection, event); 5062 5063 if (event == NULL) { 5064 return srpl_state_invalid; // Wait for events. 5065 } else if (event->event_type == srpl_event_srp_client_update_finished) { 5066 adv_host_t *host = srpl_client_update_matches(srpl_connection->candidate->name, event); 5067 if (host != NULL) { 5068 srpl_connection->candidate->host = host; 5069 srp_adv_host_retain(srpl_connection->candidate->host); 5070 return srpl_state_candidate_host_re_evaluate; 5071 } 5072 return srpl_state_invalid; // Keep waiting 5073 } else if (event->event_type == srpl_event_advertise_finished) { 5074 // See if this is an event on the host we were waiting for. 5075 if (event->content.advertise_finished.hostname != NULL && 5076 dns_names_equal_text(srpl_connection->candidate->name, event->content.advertise_finished.hostname)) 5077 { 5078 return srpl_state_candidate_host_re_evaluate; 5079 } 5080 return srpl_state_invalid; 5081 } else { 5082 UNEXPECTED_EVENT(srpl_connection, event); 5083 } 5084 } 5085 5086 // At this point we've either waited for the host to no longer be in contention, or else it wasn't in contention. 5087 // There was a time gap between when we sent the candidate response and when the host message arrived, so an update 5088 // may have arrived locally for that SRP client. We therefore re-evaluate at this point. 5089 static srpl_state_t 5090 srpl_candidate_host_re_evaluate_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5091 { 5092 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5093 REQUIRE_SRPL_INSTANCE(srpl_connection); 5094 STATE_ANNOUNCE_NO_EVENTS_NAME(srpl_connection, srpl_connection->candidate->name); 5095 5096 adv_host_t *host = srpl_connection->candidate->host; 5097 // The host we retained may have become invalid; if so, discard it 5098 if (host != NULL && !srp_adv_host_valid(host)) { 5099 srp_adv_host_release(srpl_connection->candidate->host); 5100 srpl_connection->candidate->host = host = NULL; 5101 } 5102 // If it was invalidated, or if we got here directly, look up the host by name 5103 if (host == NULL) { 5104 host = srp_adv_host_copy(srpl_connection->instance->domain->server_state, 5105 srpl_connection->candidate->name); 5106 srpl_connection->candidate->host = host; 5107 } 5108 // It's possible that the host is gone; in this case we definitely want the update. 5109 if (host == NULL) { 5110 return srpl_state_candidate_host_apply; 5111 } 5112 5113 // At this point we know that the host we were looking for is valid. Now check to see if we still want to apply it. 5114 srpl_state_t ret = srpl_state_invalid; 5115 srpl_candidate_disposition_t disposition = srpl_candidate_host_check(srpl_connection, host); 5116 switch(disposition) { 5117 case srpl_candidate_yes: 5118 ret = srpl_state_candidate_host_apply; 5119 break; 5120 case srpl_candidate_no: 5121 // This happens if we got a candidate and wanted it, but then got an SRP update on that candidate while waiting 5122 // for events. In this case, there's no real problem, and the successful update should trigger an update to be 5123 // sent to the remote. 5124 srpl_host_response_send(srpl_connection, dns_rcode_noerror); 5125 INFO("candidate_no: freeing parts"); 5126 srpl_host_update_parts_free(&srpl_connection->stashed_host); 5127 ret = srpl_state_send_candidates_wait; 5128 break; 5129 case srpl_candidate_conflict: 5130 srpl_host_response_send(srpl_connection, dns_rcode_yxdomain); 5131 INFO("candidate_conflict: freeing parts"); 5132 srpl_host_update_parts_free(&srpl_connection->stashed_host); 5133 ret = srpl_state_send_candidates_wait; 5134 break; 5135 } 5136 return ret; 5137 } 5138 5139 static bool 5140 srpl_connection_host_apply(srpl_connection_t *srpl_connection) 5141 { 5142 DNS_NAME_GEN_SRP(srpl_connection->stashed_host.hostname, name_buf); 5143 INFO("applying update from " PRI_S_SRP " for host " PRI_DNS_NAME_SRP ", %d messages", 5144 srpl_connection->name, DNS_NAME_PARAM_SRP(srpl_connection->stashed_host.hostname, name_buf), 5145 srpl_connection->stashed_host.num_messages); 5146 if (!srp_parse_host_messages_evaluate(srpl_connection->instance->domain->server_state, srpl_connection, 5147 srpl_connection->stashed_host.messages, 5148 srpl_connection->stashed_host.num_messages)) 5149 { 5150 srpl_host_response_send(srpl_connection, dns_rcode_formerr); 5151 return false; 5152 } 5153 return true; 5154 } 5155 5156 // At this point we know there is no contention on the host, and we want to update it, so start the update by passing the 5157 // host message to dns_evaluate. 5158 static srpl_state_t 5159 srpl_candidate_host_apply_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5160 { 5161 REQUIRE_SRPL_INSTANCE(srpl_connection); 5162 STATE_ANNOUNCE(srpl_connection, event); 5163 5164 // Apply the host from the event to the current host list 5165 // Return no event 5166 // Note that we set host->srpl_connection _after_ we call dns_evaluate. This ensures that any "advertise_finished" 5167 // calls that are done during the call to dns_evaluate do not deliver an event here. 5168 if (event == NULL) { 5169 if (!srpl_connection_host_apply(srpl_connection)) { 5170 return srpl_state_send_candidates_wait; 5171 } 5172 return srpl_state_candidate_host_apply_wait; 5173 } else if (event->event_type == srpl_event_advertise_finished) { 5174 // This shouldn't be possible anymore, but I'm putting a FAULT in here in case I'm mistaken. 5175 FAULT(PRI_S_SRP ": advertise_finished event!", srpl_connection->name); 5176 return srpl_state_invalid; 5177 } else { 5178 UNEXPECTED_EVENT(srpl_connection, event); 5179 } 5180 } 5181 5182 // Called by the SRP server when an advertise has finished for an update recevied on a connection. 5183 static void 5184 srpl_deferred_advertise_finished_event_deliver(void *context) 5185 { 5186 srpl_event_t *event = context; 5187 srp_server_t *server_state = event->content.advertise_finished.server_state; 5188 for (srpl_domain_t *domain = server_state->srpl_domains; domain != NULL; domain = domain->next) { 5189 for (srpl_instance_t *instance = domain->instances; instance != NULL; instance = instance->next) { 5190 if (instance->connection != NULL) { 5191 srpl_event_deliver(instance->connection, event); 5192 } 5193 } 5194 } 5195 for (srpl_instance_t *instance = server_state->unmatched_instances; instance != NULL; instance = instance->next) { 5196 if (instance->connection != NULL) { 5197 srpl_event_deliver(instance->connection, event); 5198 } 5199 } 5200 5201 free(event->content.advertise_finished.hostname); 5202 free(event); 5203 } 5204 5205 // Send an advertise_finished event for the specified hostname to all connections. Because this is called from 5206 // advertise_finished, we do not want any state machine to advance immediately, so we defer delivery of this 5207 // event until the next time we return to the main event loop. 5208 void 5209 srpl_advertise_finished_event_send(char *hostname, int rcode, srp_server_t *server_state) 5210 { 5211 srpl_event_t *event = calloc(1, sizeof(*event)); 5212 if (event == NULL) { 5213 ERROR("No memory to defer advertise_finished event for " PUB_S_SRP, hostname); 5214 return; 5215 } 5216 5217 srpl_event_initialize(event, srpl_event_advertise_finished); 5218 event->content.advertise_finished.rcode = rcode; 5219 event->content.advertise_finished.hostname = strdup(hostname); 5220 event->content.advertise_finished.server_state = server_state; 5221 if (event->content.advertise_finished.hostname == NULL) { 5222 INFO(PRI_S_SRP ": no memory for hostname", hostname); 5223 free(event); 5224 return; 5225 } 5226 ioloop_run_async(srpl_deferred_advertise_finished_event_deliver, event); 5227 } 5228 5229 5230 // We enter this state to wait for the application of a host update to complete. 5231 // We exit the state for the send_candidates_wait state when we receive an advertise_finished event. 5232 // Additionally when we receive an advertise_finished event we send a "host" response with the rcode 5233 // returned in the advertise_finished event. 5234 static srpl_state_t 5235 srpl_candidate_host_apply_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5236 { 5237 REQUIRE_SRPL_INSTANCE(srpl_connection); 5238 STATE_ANNOUNCE(srpl_connection, event); 5239 5240 if (event == NULL) { 5241 return srpl_state_invalid; // Wait for events. 5242 } else if (event->event_type == srpl_event_advertise_finished) { 5243 srpl_host_response_send(srpl_connection, event->content.advertise_finished.rcode); 5244 INFO("freeing parts"); 5245 srpl_host_update_parts_free(&srpl_connection->stashed_host); 5246 return srpl_state_send_candidates_wait; 5247 } else { 5248 UNEXPECTED_EVENT(srpl_connection, event); 5249 } 5250 } 5251 5252 // This marks the end of states that occur as a result of sending a "send candidates" message. 5253 // This marks the beginning of states that occur as a result of receiving a send_candidates message. 5254 5255 // We have received a "send candidates" message; the action is to create a candidates list. 5256 static srpl_state_t 5257 srpl_send_candidates_received_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5258 { 5259 int num_candidates; 5260 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5261 REQUIRE_SRPL_INSTANCE(srpl_connection); 5262 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 5263 5264 // Make sure we don't have a candidate list. 5265 if (srpl_connection->candidates != NULL) { 5266 srpl_connection_candidates_free(srpl_connection); 5267 srpl_connection->candidates = NULL; 5268 // Just in case we exit due to a failure... 5269 srpl_connection->num_candidates = 0; 5270 srpl_connection->current_candidate = -1; 5271 } 5272 // Generate a list of candidates from the current host list. 5273 // Return no event 5274 srp_server_t *server_state = srpl_connection->instance->domain->server_state; 5275 num_candidates = srp_current_valid_host_count(server_state); 5276 if (num_candidates > 0) { 5277 adv_host_t **candidates = calloc(num_candidates, sizeof(*candidates)); 5278 int copied_candidates; 5279 if (candidates == NULL) { 5280 ERROR("unable to allocate candidates list."); 5281 return srpl_connection_drop_state(srpl_connection->instance, srpl_connection); 5282 } 5283 copied_candidates = srp_hosts_to_array(server_state, candidates, num_candidates); 5284 if (copied_candidates > num_candidates) { 5285 FAULT("copied_candidates %d > num_candidates %d", 5286 copied_candidates, num_candidates); 5287 return srpl_connection_drop_state(srpl_connection->instance, srpl_connection); 5288 } 5289 if (num_candidates != copied_candidates) { 5290 INFO("srp_hosts_to_array returned the wrong number of hosts: copied_candidates %d > num_candidates %d", 5291 copied_candidates, num_candidates); 5292 num_candidates = copied_candidates; 5293 } 5294 srpl_connection->candidates = candidates; 5295 } 5296 srpl_connection->candidates_not_generated = false; 5297 srpl_connection->num_candidates = num_candidates; 5298 srpl_connection->current_candidate = -1; 5299 return srpl_state_send_candidates_remaining_check; 5300 } 5301 5302 // See if there are candidates remaining; if not, send "send candidates" response. 5303 static srpl_state_t 5304 srpl_candidates_remaining_check_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5305 { 5306 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5307 REQUIRE_SRPL_INSTANCE(srpl_connection); 5308 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 5309 5310 // Get the next candidate out of the candidate list 5311 // Return "no candidates left" or "next candidate" 5312 if (srpl_connection->current_candidate + 1 < srpl_connection->num_candidates) { 5313 srpl_connection->current_candidate++; 5314 return srpl_state_next_candidate_send; 5315 } else { 5316 return srpl_state_send_candidates_response_send; 5317 } 5318 } 5319 5320 // Send the next candidate. 5321 static srpl_state_t 5322 srpl_next_candidate_send_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5323 { 5324 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5325 REQUIRE_SRPL_INSTANCE(srpl_connection); 5326 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 5327 5328 srpl_candidate_message_send(srpl_connection, srpl_connection->candidates[srpl_connection->current_candidate]); 5329 return srpl_state_next_candidate_send_wait; 5330 } 5331 5332 // Wait for a "candidate" response. 5333 static srpl_state_t 5334 srpl_next_candidate_send_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5335 { 5336 REQUIRE_SRPL_INSTANCE(srpl_connection); 5337 STATE_ANNOUNCE(srpl_connection, event); 5338 5339 if (event == NULL) { 5340 return srpl_state_invalid; // Wait for events. 5341 } else if (event->event_type == srpl_event_candidate_response_received) { 5342 switch (event->content.disposition) { 5343 case srpl_candidate_yes: 5344 return srpl_state_candidate_host_send; 5345 case srpl_candidate_no: 5346 case srpl_candidate_conflict: 5347 return srpl_state_send_candidates_remaining_check; 5348 } 5349 return srpl_state_invalid; 5350 } else { 5351 UNEXPECTED_EVENT(srpl_connection, event); 5352 } 5353 } 5354 5355 // Send the host for the candidate. 5356 static srpl_state_t 5357 srpl_candidate_host_send_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5358 { 5359 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5360 REQUIRE_SRPL_INSTANCE(srpl_connection); 5361 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 5362 5363 // It's possible that the host that we put on the candidates list has become invalid. If so, just go back and send 5364 // the next candidate (or finish). 5365 adv_host_t *host = srpl_connection->candidates[srpl_connection->current_candidate]; 5366 if (!srp_adv_host_valid(host) || host->message == NULL) { 5367 return srpl_state_send_candidates_remaining_check; 5368 } 5369 if (!srpl_host_message_send(srpl_connection, host)) { 5370 srpl_disconnect(srpl_connection); 5371 return srpl_state_invalid; 5372 } 5373 return srpl_state_candidate_host_response_wait; 5374 } 5375 5376 // Wait for a "host" response. 5377 static srpl_state_t 5378 srpl_candidate_host_response_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5379 { 5380 REQUIRE_SRPL_INSTANCE(srpl_connection); 5381 STATE_ANNOUNCE(srpl_connection, event); 5382 5383 if (event == NULL) { 5384 return srpl_state_invalid; // Wait for events. 5385 } else if (event->event_type == srpl_event_host_response_received) { 5386 // The only failure case we care about is a conflict, and we don't have a way to handle that, so just 5387 // continue without checking the status. 5388 return srpl_state_send_candidates_remaining_check; 5389 } else { 5390 UNEXPECTED_EVENT(srpl_connection, event); 5391 } 5392 } 5393 5394 // At this point we're done sending candidates, so we send a "send candidates" response. 5395 static srpl_state_t 5396 srpl_send_candidates_response_send_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5397 { 5398 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5399 REQUIRE_SRPL_INSTANCE(srpl_connection); 5400 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 5401 5402 srpl_send_candidates_message_send(srpl_connection, true); 5403 // When the server has sent its candidate response, it's immediately ready to send a "send candidate" message 5404 // When the client has sent its candidate response, the database synchronization is done on the client. 5405 if (srpl_connection->is_server) { 5406 return srpl_state_send_candidates_send; 5407 } else { 5408 srpl_domain_t *domain = srpl_connection_domain(srpl_connection); 5409 srpl_connection->database_synchronized = true; 5410 srpl_instance_t *instance = srpl_connection->instance; 5411 instance->sync_fail = false; 5412 srpl_state_transition_by_dataset_id(domain, instance); 5413 return srpl_state_ready; 5414 } 5415 } 5416 5417 // The ready state is where we land when there's no remaining work to do. We wait for events, and when we get one, 5418 // we handle it, ultimately returning to this state. 5419 static srpl_state_t 5420 srpl_ready_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5421 { 5422 REQUIRE_SRPL_INSTANCE(srpl_connection); 5423 STATE_ANNOUNCE(srpl_connection, event); 5424 5425 if (event == NULL) { 5426 // Whenever we newly land in this state, see if there is an unsent client update at the head of the 5427 // queue, and if so, send it. 5428 if (srpl_connection->client_update_queue != NULL && !srpl_connection->client_update_queue->sent) { 5429 adv_host_t *host = srpl_connection->client_update_queue->host; 5430 if (host == NULL || host->name == NULL) { 5431 INFO(PRI_S_SRP ": we have an update to send for bogus host %p.", srpl_connection->name, host); 5432 } else { 5433 INFO(PRI_S_SRP ": we have an update to send for host " PRI_S_SRP, srpl_connection->name, 5434 srpl_connection->client_update_queue->host->name); 5435 } 5436 return srpl_state_srp_client_update_send; 5437 } else { 5438 if (srpl_connection->client_update_queue != NULL) { 5439 adv_host_t *host = srpl_connection->client_update_queue->host; 5440 if (host == NULL || host->name == NULL) { 5441 INFO(PRI_S_SRP ": there is anupdate that's marked sent for bogus host %p.", 5442 srpl_connection->name, host); 5443 } else { 5444 INFO(PRI_S_SRP ": there is an update on the queue that's marked sent for host " PRI_S_SRP, 5445 srpl_connection->name, host->name); 5446 } 5447 } else { 5448 INFO(PRI_S_SRP ": the client update queue is empty.", srpl_connection->name); 5449 } 5450 } 5451 return srpl_state_invalid; 5452 } else if (event->event_type == srpl_event_host_message_received) { 5453 if (srpl_connection->stashed_host.messages != NULL) { 5454 FAULT(PRI_S_SRP ": stashed host present but host message received", srpl_connection->name); 5455 return srpl_connection_drop_state(srpl_connection->instance, srpl_connection); 5456 } 5457 // Copy the update information, retain what's refcounted, and NULL out what's not, on the event. 5458 srpl_host_update_steal_parts(&srpl_connection->stashed_host, &event->content.host_update); 5459 return srpl_state_stashed_host_check; 5460 } else if (event->event_type == srpl_event_host_response_received) { 5461 return srpl_state_srp_client_ack_evaluate; 5462 } else if (event->event_type == srpl_event_advertise_finished) { 5463 if (srpl_connection->stashed_host.hostname != NULL && 5464 event->content.advertise_finished.hostname != NULL && 5465 dns_names_equal_text(srpl_connection->stashed_host.hostname, event->content.advertise_finished.hostname)) 5466 { 5467 srpl_connection->stashed_host.rcode = event->content.advertise_finished.rcode; 5468 return srpl_state_stashed_host_finished; 5469 } 5470 return srpl_state_invalid; 5471 } else if (event->event_type == srpl_event_srp_client_update_finished) { 5472 // When we receive a client update in ready state, we just need to re-run the state's action. 5473 return srpl_state_ready; 5474 } else { 5475 UNEXPECTED_EVENT(srpl_connection, event); 5476 } 5477 } 5478 5479 // We get here when there is at least one client update queued up to send 5480 static srpl_state_t 5481 srpl_srp_client_update_send_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5482 { 5483 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5484 REQUIRE_SRPL_INSTANCE(srpl_connection); 5485 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 5486 5487 srpl_srp_client_queue_entry_t *update = srpl_connection->client_update_queue; 5488 if (update != NULL) { 5489 // If the host has a message, send it. Note that this host may well be removed, but if it had been removed 5490 // through a lease expiry we wouldn't have got here, because the host object would have been removed from 5491 // the list. So if it has a message attached to it, that means that either it's been removed explicitly by 5492 // the client, which we need to propagate, or else it is still valid, and so we need to propagate the most 5493 // recent update we got. 5494 if (update->host->message != NULL) { 5495 srpl_host_message_send(srpl_connection, update->host); 5496 update->sent = true; 5497 } else { 5498 ERROR(PRI_S_SRP ": no host message to send for host " PRI_S_SRP ".", 5499 srpl_connection->name, update->host->name); 5500 5501 // We're not going to send this update, so take it off the queue. 5502 srpl_connection->client_update_queue = update->next; 5503 srp_adv_host_release(update->host); 5504 free(update); 5505 } 5506 } 5507 return srpl_state_ready; 5508 } 5509 5510 // We go here when we get a "host" response; all we do is remove the host from the top of the queue. 5511 static srpl_state_t 5512 srpl_srp_client_ack_evaluate_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5513 { 5514 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5515 REQUIRE_SRPL_INSTANCE(srpl_connection); 5516 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 5517 5518 if (srpl_connection->client_update_queue == NULL) { 5519 FAULT(PRI_S_SRP ": update queue empty in ready, but host_response_received event received.", 5520 srpl_connection->name); 5521 return srpl_connection_drop_state(srpl_connection->instance, srpl_connection); 5522 } 5523 if (!srpl_connection->client_update_queue->sent) { 5524 FAULT(PRI_S_SRP ": top of update queue not sent, but host_response_received event received.", 5525 srpl_connection->name); 5526 return srpl_connection_drop_state(srpl_connection->instance, srpl_connection); 5527 } 5528 srpl_srp_client_queue_entry_t *finished_update = srpl_connection->client_update_queue; 5529 srpl_connection->client_update_queue = finished_update->next; 5530 if (finished_update->host != NULL) { 5531 srp_adv_host_release(finished_update->host); 5532 } 5533 free(finished_update); 5534 return srpl_state_ready; 5535 } 5536 5537 // We go here when we get a "host" message 5538 static srpl_state_t 5539 srpl_stashed_host_check_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5540 { 5541 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5542 REQUIRE_SRPL_INSTANCE(srpl_connection); 5543 STATE_ANNOUNCE_NO_EVENTS_NAME(srpl_connection, srpl_connection->stashed_host.hostname); 5544 5545 adv_host_t *host = srp_adv_host_copy(srpl_connection->instance->domain->server_state, 5546 srpl_connection->stashed_host.hostname); 5547 // No contention... 5548 if (host == NULL) { 5549 INFO("applying host because it doesn't exist locally."); 5550 return srpl_state_stashed_host_apply; 5551 } else if (host->update == NULL && host->srpl_connection == NULL) { 5552 INFO("applying host because there's no contention."); 5553 srp_adv_host_release(host); 5554 return srpl_state_stashed_host_apply; 5555 } else { 5556 INFO("not applying host because there is contention. host->update %p host->srpl_connection: %p", 5557 host->update, host->srpl_connection); 5558 } 5559 srp_adv_host_release(host); 5560 return srpl_state_ready; // Wait for something to happen 5561 } 5562 5563 // We go here when we have a stashed host to apply. 5564 static srpl_state_t 5565 srpl_stashed_host_apply_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5566 { 5567 REQUIRE_SRPL_INSTANCE(srpl_connection); 5568 STATE_ANNOUNCE(srpl_connection, event); 5569 5570 if (event == NULL) { 5571 if (!srpl_connection_host_apply(srpl_connection)) { 5572 srpl_connection->stashed_host.rcode = dns_rcode_servfail; 5573 return srpl_state_stashed_host_finished; 5574 } 5575 return srpl_state_ready; // Wait for something to happen 5576 } else if (event->event_type == srpl_event_advertise_finished) { 5577 // This shouldn't be possible anymore, but I'm putting a FAULT in here in case I'm mistaken. 5578 FAULT(PRI_S_SRP ": advertise_finished event!", srpl_connection->name); 5579 return srpl_state_invalid; 5580 } else { 5581 UNEXPECTED_EVENT(srpl_connection, event); 5582 } 5583 } 5584 5585 // We go here when a host update advertise finishes. 5586 static srpl_state_t 5587 srpl_stashed_host_finished_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5588 { 5589 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5590 REQUIRE_SRPL_INSTANCE(srpl_connection); 5591 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 5592 5593 if (srpl_connection->stashed_host.hostname == NULL) { 5594 FAULT(PRI_S_SRP ": stashed host not present, but advertise_finished event received.", srpl_connection->name); 5595 return srpl_state_ready; 5596 } 5597 if (srpl_connection->stashed_host.messages == NULL) { 5598 FAULT(PRI_S_SRP ": stashed host present, no messages.", srpl_connection->name); 5599 return srpl_state_ready; 5600 } 5601 srpl_host_response_send(srpl_connection, srpl_connection->stashed_host.rcode); 5602 INFO("freeing parts"); 5603 srpl_host_update_parts_free(&srpl_connection->stashed_host); 5604 return srpl_state_ready; 5605 } 5606 5607 // We land here immediately after a server connection is received. 5608 static srpl_state_t 5609 srpl_session_message_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5610 { 5611 REQUIRE_SRPL_INSTANCE(srpl_connection); 5612 STATE_ANNOUNCE(srpl_connection, event); 5613 5614 if (event == NULL) { 5615 return srpl_state_invalid; // Wait for events. 5616 } else if (event->event_type == srpl_event_session_message_received) { 5617 // we check the remote_version. if it's smaller than our current version, we note 5618 // version_mismatch on the instance and move to the idle state. Since no response 5619 // is sent out, the remote peer will stay in the session_response_wait. 5620 if (event->content.session.remote_version < SRPL_CURRENT_VERSION) { // version mismatch 5621 INFO("instance " PRI_S_SRP " has a lower version number", srpl_connection->instance->instance_name); 5622 srpl_connection->instance->version_mismatch = true; 5623 return srpl_state_idle; 5624 } 5625 srpl_connection->remote_partner_id = event->content.session.partner_id; 5626 srpl_connection->new_partner = event->content.session.new_partner; 5627 return srpl_state_session_evaluate; 5628 } else { 5629 UNEXPECTED_EVENT(srpl_connection, event); 5630 } 5631 } 5632 5633 // Send a session response 5634 static srpl_state_t 5635 srpl_session_response_send(srpl_connection_t *UNUSED srpl_connection, srpl_event_t *event) 5636 { 5637 REQUIRE_SRPL_EVENT_NULL(srpl_connection, event); 5638 REQUIRE_SRPL_INSTANCE(srpl_connection); 5639 STATE_ANNOUNCE_NO_EVENTS(srpl_connection); 5640 5641 if (!srpl_session_message_send(srpl_connection, true)) { 5642 return srpl_state_disconnect; 5643 } 5644 return srpl_state_send_candidates_message_wait; 5645 } 5646 5647 // We land here immediately after a server connection is received. 5648 static srpl_state_t 5649 srpl_send_candidates_message_wait_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5650 { 5651 REQUIRE_SRPL_INSTANCE(srpl_connection); 5652 STATE_ANNOUNCE(srpl_connection, event); 5653 5654 if (event == NULL) { 5655 return srpl_state_invalid; // Wait for events. 5656 } else if (event->event_type == srpl_event_send_candidates_message_received) { 5657 return srpl_state_send_candidates_received; 5658 } else { 5659 UNEXPECTED_EVENT(srpl_connection, event); 5660 } 5661 } 5662 5663 #ifdef SRP_TEST_SERVER 5664 // When testing, we may want an srpl_connection_t that just calls back to the test system when an event 5665 // is delivered and otherwise does nothing. 5666 static srpl_state_t 5667 srpl_test_event_intercept_action(srpl_connection_t *srpl_connection, srpl_event_t *event) 5668 { 5669 STATE_ANNOUNCE(srpl_connection, event); 5670 5671 return test_packet_srpl_intercept(srpl_connection, event); 5672 } 5673 #endif 5674 5675 // Check to see if host is on the list of remaining candidates to send. If so, no need to do anything--it'll go out soon. 5676 static bool 5677 srpl_reschedule_candidate(srpl_connection_t *srpl_connection, adv_host_t *host) 5678 { 5679 // We don't need to queue new updates if we haven't yet generated a candidates list. 5680 if (srpl_connection->candidates_not_generated) { 5681 INFO("returning true because we haven't generated candidates."); 5682 return true; 5683 } 5684 if (srpl_connection->candidates == NULL) { 5685 INFO("returning false because we have no candidates."); 5686 return false; 5687 } 5688 for (int i = srpl_connection->current_candidate + 1; i < srpl_connection->num_candidates; i++) { 5689 if (srpl_connection->candidates[i] == host) { 5690 INFO("returning true because the host is on the candidate list."); 5691 return true; 5692 } 5693 } 5694 INFO("returning false because the host is not on the candidate list."); 5695 return false; 5696 } 5697 5698 static void 5699 srpl_queue_srp_client_update(srpl_connection_t *srpl_connection, adv_host_t *host) 5700 { 5701 srpl_srp_client_queue_entry_t *new_entry, **qp; 5702 // Find the end of the queue 5703 for (qp = &srpl_connection->client_update_queue; *qp; qp = &(*qp)->next) { 5704 srpl_srp_client_queue_entry_t *entry = *qp; 5705 // No need to re-queue if we're already on the queue 5706 if (!entry->sent && entry->host == host) { 5707 INFO("host " PRI_S_SRP " is already on the update queue for connection " PRI_S_SRP, 5708 host->name, srpl_connection->name); 5709 return; 5710 } 5711 } 5712 new_entry = calloc(1, sizeof(*new_entry)); 5713 if (new_entry == NULL) { 5714 ERROR(PRI_S_SRP ": no memory to queue SRP client update.", srpl_connection->name); 5715 return; 5716 } 5717 INFO("adding host " PRI_S_SRP " to the update queue for connection " PRI_S_SRP, host->name, srpl_connection->name); 5718 new_entry->host = host; 5719 srp_adv_host_retain(new_entry->host); 5720 *qp = new_entry; 5721 } 5722 5723 // Client update events are interesting in two cases. First, we might have received a host update for a 5724 // host that was in contention when the update was received; in this case, we want to now apply the update, 5725 // assuming that the contention is no longer present (it's possible that there are multiple sources of 5726 // contention). 5727 // 5728 // The second case is where a client update succeeded; in this case we want to send that update to all of 5729 // the remotes. 5730 // 5731 // We do not receive this event when an update that was triggered by an SRP Replication update; in that 5732 // case we get an "apply finished" event instead of a "client update finished" event. 5733 static void 5734 srpl_srp_client_update_send_event_to_connection(srpl_connection_t *srpl_connection, srpl_event_t *event) 5735 { 5736 if (event->content.client_result.rcode == dns_rcode_noerror) { 5737 adv_host_t *host = event->content.client_result.host; 5738 if (!srpl_reschedule_candidate(srpl_connection, host)) { 5739 srpl_queue_srp_client_update(srpl_connection, host); 5740 } 5741 } 5742 srpl_event_deliver(srpl_connection, event); 5743 } 5744 5745 static void 5746 srpl_deferred_srp_client_update_finished_event_deliver(void *context) 5747 { 5748 srpl_event_t *event = context; 5749 srp_server_t *server_state = event->content.client_result.host->server_state; 5750 if (server_state == NULL) { 5751 FAULT("server state is NULL."); // this can't currently happen, because we just finished updating the host. 5752 goto out; 5753 } 5754 for (srpl_domain_t *domain = server_state->srpl_domains; domain != NULL; domain = domain->next) { 5755 for (srpl_instance_t *instance = domain->instances; instance != NULL; instance = instance->next) { 5756 if (instance->connection != NULL) { 5757 srpl_srp_client_update_send_event_to_connection(instance->connection, event); 5758 } 5759 } 5760 } 5761 for (srpl_instance_t *instance = server_state->unmatched_instances; instance != NULL; instance = instance->next) { 5762 if (instance->connection != NULL) { 5763 srpl_srp_client_update_send_event_to_connection(instance->connection, event); 5764 } 5765 } 5766 out: 5767 srp_adv_host_release(event->content.client_result.host); 5768 free(event); 5769 } 5770 5771 // When an SRP client update finished, we need to deliver an event to all connections indicating that this has 5772 // occurred. This event must be delivered from the main run loop, to avoid starting an update before advertise_finish 5773 // has completed its work. 5774 void 5775 srpl_srp_client_update_finished_event_send(adv_host_t *host, int rcode) 5776 { 5777 srpl_event_t *event; 5778 event = malloc(sizeof(*event)); 5779 if (event == NULL) { 5780 FAULT(PRI_S_SRP ": unable to allocate memory to defer event", host->name); 5781 return; 5782 } 5783 srpl_event_initialize(event, srpl_event_srp_client_update_finished); 5784 srpl_event_content_type_set(event, srpl_event_content_type_client_result); 5785 event->content.client_result.host = host; 5786 srp_adv_host_retain(event->content.client_result.host); 5787 event->content.client_result.rcode = rcode; 5788 ioloop_run_async(srpl_deferred_srp_client_update_finished_event_deliver, event); 5789 } 5790 5791 typedef struct { 5792 srpl_state_t state; 5793 char *name; 5794 srpl_action_t action; 5795 } srpl_connection_state_t; 5796 5797 #define STATE_NAME_DECL(name) srpl_state_##name, #name 5798 static srpl_connection_state_t srpl_connection_states[] = { 5799 { STATE_NAME_DECL(invalid), NULL }, 5800 { STATE_NAME_DECL(disconnected), srpl_disconnected_action }, 5801 { STATE_NAME_DECL(next_address_get), srpl_next_address_get_action }, 5802 { STATE_NAME_DECL(connect), srpl_connect_action }, 5803 { STATE_NAME_DECL(idle), srpl_idle_action }, 5804 { STATE_NAME_DECL(reconnect_wait), srpl_reconnect_wait_action }, 5805 { STATE_NAME_DECL(retry_delay_send), srpl_retry_delay_send_action }, 5806 { STATE_NAME_DECL(disconnect), srpl_disconnect_action }, 5807 { STATE_NAME_DECL(disconnect_wait), srpl_disconnect_wait_action }, 5808 // If a disconnected state is added here, please fix SRPL_CONNECTION_IS_CONNECTED above. 5809 // connecting is counted as a connected state because we have a connection, even if it is not 5810 // actually connected, and we'll get a disconnect if it fails, so we aren't stuck. 5811 { STATE_NAME_DECL(connecting), srpl_connecting_action }, 5812 { STATE_NAME_DECL(session_send), srpl_session_send_action }, 5813 { STATE_NAME_DECL(session_response_wait), srpl_session_response_wait_action }, 5814 { STATE_NAME_DECL(session_evaluate), srpl_session_evaluate_action }, 5815 { STATE_NAME_DECL(sync_wait), srpl_sync_wait_action }, 5816 // Here we are the endpoint that has send the "send candidates message" and we are cycling through the candidates 5817 // we receive until we get a "send candidates" reply. 5818 5819 { STATE_NAME_DECL(send_candidates_send), srpl_send_candidates_send_action }, 5820 { STATE_NAME_DECL(send_candidates_wait), srpl_send_candidates_wait_action }, 5821 5822 // Got a "candidate" message, need to check it and send the right reply. 5823 { STATE_NAME_DECL(candidate_check), srpl_candidate_check_action }, 5824 5825 // At this point we've send a candidate reply, so we're waiting for a host message. It's possible that the host 5826 // went away in the interim, in which case we will get a "candidate" message or a "send candidate" reply. 5827 5828 { STATE_NAME_DECL(candidate_host_wait), srpl_candidate_host_wait_action }, 5829 { STATE_NAME_DECL(candidate_host_prepare), srpl_candidate_host_prepare_action }, 5830 { STATE_NAME_DECL(candidate_host_contention_wait), srpl_candidate_host_contention_wait_action }, 5831 { STATE_NAME_DECL(candidate_host_re_evaluate), srpl_candidate_host_re_evaluate_action }, 5832 5833 // Here we've gotten the host message (the SRP message), and need to apply it and send a response 5834 { STATE_NAME_DECL(candidate_host_apply), srpl_candidate_host_apply_action }, 5835 { STATE_NAME_DECL(candidate_host_apply_wait), srpl_candidate_host_apply_wait_action }, 5836 5837 // We've received a "send candidates" message. Make a list of candidates to send, and then start sending them. 5838 { STATE_NAME_DECL(send_candidates_received), srpl_send_candidates_received_action }, 5839 // See if there are any candidates left to send; if not, go to send_candidates_response_send 5840 { STATE_NAME_DECL(send_candidates_remaining_check), srpl_candidates_remaining_check_action }, 5841 // Send a "candidate" message for the next candidate 5842 { STATE_NAME_DECL(next_candidate_send), srpl_next_candidate_send_action }, 5843 // Wait for a response to the "candidate" message 5844 { STATE_NAME_DECL(next_candidate_send_wait), srpl_next_candidate_send_wait_action }, 5845 // The candidate requested, so send its host info 5846 { STATE_NAME_DECL(candidate_host_send), srpl_candidate_host_send_action }, 5847 // We're waiting for the remote to acknowledge the host update 5848 { STATE_NAME_DECL(candidate_host_response_wait), srpl_candidate_host_response_wait_action }, 5849 5850 // When we've run out of candidates to send, we send the candidates response. 5851 { STATE_NAME_DECL(send_candidates_response_send), srpl_send_candidates_response_send_action }, 5852 5853 // This is the quiescent state for servers and clients after session establishment database sync. 5854 // Waiting for updates received locally, or updates sent by remote 5855 { STATE_NAME_DECL(ready), srpl_ready_action }, 5856 // An update was received locally 5857 { STATE_NAME_DECL(srp_client_update_send), srpl_srp_client_update_send_action }, 5858 // We've gotten an ack 5859 { STATE_NAME_DECL(srp_client_ack_evaluate), srpl_srp_client_ack_evaluate_action }, 5860 // See if we have an update from the remote that we stashed because it arrived while we were sending one 5861 { STATE_NAME_DECL(stashed_host_check), srpl_stashed_host_check_action }, 5862 // Apply a stashed update (which may have been stashed in the ready state or the client_update_ack_wait state 5863 { STATE_NAME_DECL(stashed_host_apply), srpl_stashed_host_apply_action }, 5864 // A stashed update finished; check the results 5865 { STATE_NAME_DECL(stashed_host_finished), srpl_stashed_host_finished_action }, 5866 5867 // Initial startup state for server 5868 { STATE_NAME_DECL(session_message_wait), srpl_session_message_wait_action }, 5869 // Send a response once we've figured out that we're going to continue 5870 { STATE_NAME_DECL(session_response_send), srpl_session_response_send }, 5871 // Wait for a "send candidates" message. 5872 { STATE_NAME_DECL(send_candidates_message_wait), srpl_send_candidates_message_wait_action }, 5873 5874 #ifdef SRP_TEST_SERVER 5875 { STATE_NAME_DECL(test_event_intercept), srpl_test_event_intercept_action }, 5876 #endif 5877 }; 5878 #define SRPL_NUM_CONNECTION_STATES (sizeof(srpl_connection_states) / sizeof(srpl_connection_state_t)) 5879 5880 static srpl_connection_state_t * 5881 srpl_state_get(srpl_state_t state) 5882 { 5883 static bool once = false; 5884 if (!once) { 5885 for (unsigned i = 0; i < SRPL_NUM_CONNECTION_STATES; i++) { 5886 if (srpl_connection_states[i].state != (srpl_state_t)i) { 5887 ERROR("srpl connection state %d doesn't match " PUB_S_SRP, i, srpl_connection_states[i].name); 5888 STATE_DEBUGGING_ABORT(); 5889 return NULL; 5890 } 5891 } 5892 once = true; 5893 } 5894 if (state < 0 || state >= SRPL_NUM_CONNECTION_STATES) { 5895 STATE_DEBUGGING_ABORT(); 5896 return NULL; 5897 } 5898 return &srpl_connection_states[state]; 5899 } 5900 5901 void 5902 srpl_connection_next_state(srpl_connection_t *srpl_connection, srpl_state_t state) 5903 { 5904 srpl_state_t next_state = state; 5905 5906 do { 5907 srpl_connection_state_t *new_state = srpl_state_get(next_state); 5908 5909 if (new_state == NULL) { 5910 ERROR(PRI_S_SRP " next state is invalid: %d", srpl_connection->name, next_state); 5911 STATE_DEBUGGING_ABORT(); 5912 return; 5913 } 5914 srpl_connection->state = next_state; 5915 srpl_connection->state_name = new_state->name; 5916 srpl_connection->state_start_time = srp_time(); 5917 srpl_action_t action = new_state->action; 5918 if (action != NULL) { 5919 next_state = action(srpl_connection, NULL); 5920 } 5921 } while (next_state != srpl_state_invalid); 5922 } 5923 5924 // 5925 // Event functions 5926 // 5927 5928 typedef struct { 5929 srpl_event_type_t event_type; 5930 char *name; 5931 } srpl_event_configuration_t; 5932 5933 #define EVENT_NAME_DECL(name) { srpl_event_##name, #name } 5934 5935 srpl_event_configuration_t srpl_event_configurations[] = { 5936 EVENT_NAME_DECL(invalid), 5937 EVENT_NAME_DECL(address_add), 5938 EVENT_NAME_DECL(address_remove), 5939 EVENT_NAME_DECL(server_disconnect), 5940 EVENT_NAME_DECL(reconnect_timer_expiry), 5941 EVENT_NAME_DECL(disconnected), 5942 EVENT_NAME_DECL(connected), 5943 EVENT_NAME_DECL(session_response_received), 5944 EVENT_NAME_DECL(send_candidates_response_received), 5945 EVENT_NAME_DECL(candidate_received), 5946 EVENT_NAME_DECL(host_message_received), 5947 EVENT_NAME_DECL(srp_client_update_finished), 5948 EVENT_NAME_DECL(advertise_finished), 5949 EVENT_NAME_DECL(candidate_response_received), 5950 EVENT_NAME_DECL(host_response_received), 5951 EVENT_NAME_DECL(session_message_received), 5952 EVENT_NAME_DECL(send_candidates_message_received), 5953 EVENT_NAME_DECL(do_sync), 5954 }; 5955 #define SRPL_NUM_EVENT_TYPES (sizeof(srpl_event_configurations) / sizeof(srpl_event_configuration_t)) 5956 5957 static srpl_event_configuration_t * 5958 srpl_event_configuration_get(srpl_event_type_t event) 5959 { 5960 static bool once = false; 5961 if (!once) { 5962 for (unsigned i = 0; i < SRPL_NUM_EVENT_TYPES; i++) { 5963 if (srpl_event_configurations[i].event_type != (srpl_event_type_t)i) { 5964 ERROR("srpl connection event %d doesn't match " PUB_S_SRP, i, srpl_event_configurations[i].name); 5965 STATE_DEBUGGING_ABORT(); 5966 return NULL; 5967 } 5968 } 5969 once = true; 5970 } 5971 if (event < 0 || event >= SRPL_NUM_EVENT_TYPES) { 5972 STATE_DEBUGGING_ABORT(); 5973 return NULL; 5974 } 5975 return &srpl_event_configurations[event]; 5976 } 5977 5978 static const char * 5979 srpl_state_name(srpl_state_t state) 5980 { 5981 for (unsigned i = 0; i < SRPL_NUM_CONNECTION_STATES; i++) { 5982 if (srpl_connection_states[i].state == state) { 5983 return srpl_connection_states[i].name; 5984 } 5985 } 5986 return "unknown state"; 5987 } 5988 5989 void 5990 srpl_dump_connection_states(srp_server_t *server_state) 5991 { 5992 srpl_domain_t *domain; 5993 srpl_instance_t *instance; 5994 srpl_connection_t *connection; 5995 uint32_t days, hours, minutes, seconds; 5996 5997 for (domain = server_state->srpl_domains; domain != NULL; domain = domain->next) { 5998 INFO("srpl connections in domain " PRI_S_SRP ":", domain->name); 5999 for (instance = domain->instances; instance != NULL; instance = instance->next) { 6000 connection = instance->connection; 6001 if (connection != NULL) { 6002 seconds = (uint32_t)(srp_time() - connection->state_start_time); 6003 days = seconds / SECONDS_IN_DAY; 6004 seconds -= days * SECONDS_IN_DAY; 6005 hours = seconds / SECONDS_IN_HOUR; 6006 seconds -= hours * SECONDS_IN_HOUR; 6007 minutes = seconds / SECONDS_IN_MINUTE; 6008 seconds -= minutes * SECONDS_IN_MINUTE; 6009 INFO(PUB_S_SRP "connected to srpl instance " PRI_S_SRP " - connection " PRI_S_SRP 6010 ", in state " PUB_S_SRP " for %" PRIu32 " days %" PRIu32 " hours %" PRIu32 " minutes %" PRIu32 " seconds", 6011 connection->state > srpl_state_connecting ? "" : "not yet ", 6012 instance->instance_name, connection->name, srpl_state_name(connection->state), 6013 days, hours, minutes, seconds); 6014 } else { 6015 INFO("no connection to srpl instance " PRI_S_SRP PUB_S_SRP, instance->instance_name, 6016 instance->is_me ? ", is_me" : ""); 6017 } 6018 } 6019 } 6020 6021 for (instance = server_state->unmatched_instances; instance != NULL; instance = instance->next) { 6022 connection = instance->connection; 6023 if (connection != NULL) { 6024 seconds = (uint32_t)(srp_time() - connection->state_start_time); 6025 days = seconds / SECONDS_IN_DAY; 6026 seconds -= days * SECONDS_IN_DAY; 6027 hours = seconds / SECONDS_IN_HOUR; 6028 seconds -= hours * SECONDS_IN_HOUR; 6029 minutes = seconds / SECONDS_IN_MINUTE; 6030 seconds -= minutes * SECONDS_IN_MINUTE; 6031 INFO(PUB_S_SRP "connected to unidentified instance " PRI_S_SRP " - connection " PRI_S_SRP 6032 ", in state " PUB_S_SRP " for %" PRIu32 " days %" PRIu32 " hours %" PRIu32 " minutes %" PRIu32 " seconds", 6033 connection->state > srpl_state_connecting ? "" : "not yet ", 6034 instance->instance_name, connection->name, srpl_state_name(connection->state), 6035 days, hours, minutes, seconds); 6036 } 6037 } 6038 } 6039 6040 void 6041 srpl_change_server_priority(srp_server_t *server_state, uint32_t new) 6042 { 6043 if (server_state != NULL && new != server_state->priority) { 6044 server_state->priority = new; 6045 for (srpl_domain_t *domain = server_state->srpl_domains; domain != NULL; domain = domain->next) { 6046 // priority changed. re-advertise. 6047 srpl_domain_advertise(domain); 6048 } 6049 } 6050 } 6051 6052 static void 6053 srpl_event_initialize(srpl_event_t *event, srpl_event_type_t event_type) 6054 { 6055 memset(event, 0, sizeof(*event)); 6056 srpl_event_configuration_t *event_config = srpl_event_configuration_get(event_type); 6057 if (event_config == NULL) { 6058 ERROR("invalid event type %d", event_type); 6059 STATE_DEBUGGING_ABORT(); 6060 return; 6061 } 6062 event->event_type = event_type; 6063 event->name = event_config->name; 6064 } 6065 6066 static void 6067 srpl_event_deliver(srpl_connection_t *srpl_connection, srpl_event_t *event) 6068 { 6069 srpl_connection_state_t *state = srpl_state_get(srpl_connection->state); 6070 if (state == NULL) { 6071 ERROR(PRI_S_SRP ": event " PUB_S_SRP " received in invalid state %d", 6072 srpl_connection->name, event->name, srpl_connection->state); 6073 STATE_DEBUGGING_ABORT(); 6074 return; 6075 } 6076 if (state->action == NULL) { 6077 FAULT(PRI_S_SRP": event " PUB_S_SRP " received in state " PUB_S_SRP " with NULL action", 6078 srpl_connection->name, event->name, state->name); 6079 return; 6080 } 6081 srpl_state_t next_state = state->action(srpl_connection, event); 6082 if (next_state != srpl_state_invalid) { 6083 srpl_connection_next_state(srpl_connection, next_state); 6084 } 6085 } 6086 6087 static void 6088 srpl_re_register(void *context) 6089 { 6090 INFO("re-registering SRPL service"); 6091 srpl_domain_advertise(context); 6092 } 6093 6094 static void 6095 srpl_register_completion(DNSServiceRef UNUSED sdref, DNSServiceFlags UNUSED flags, DNSServiceErrorType error_code, 6096 const char *name, const char *regtype, const char *domain, void *context) 6097 { 6098 srpl_domain_t *srpl_domain = context; 6099 6100 if (error_code != kDNSServiceErr_NoError) { 6101 ERROR("unable to advertise _srpl-tls._tcp service: %d", error_code); 6102 if (srpl_domain->srpl_register_wakeup == NULL) { 6103 srpl_domain->srpl_register_wakeup = ioloop_wakeup_create(); 6104 } 6105 if (srpl_domain->srpl_register_wakeup != NULL) { 6106 // Try registering again in one second. 6107 ioloop_add_wake_event(srpl_domain->srpl_register_wakeup, srpl_domain, srpl_re_register, srpl_domain_context_release, 1000); 6108 RETAIN_HERE(srpl_domain, srpl_domain); 6109 } 6110 return; 6111 } 6112 INFO("registered SRP Replication instance name " PRI_S_SRP "." PUB_S_SRP "." PRI_S_SRP, name, regtype, domain); 6113 } 6114 6115 static void 6116 srpl_domain_advertise(srpl_domain_t *domain) 6117 { 6118 DNSServiceRef sdref = NULL; 6119 TXTRecordRef txt_record; 6120 char partner_id_buf[INT64_HEX_STRING_MAX]; 6121 char dataset_id_buf[INT64_HEX_STRING_MAX]; 6122 char xpanid_buf[INT64_HEX_STRING_MAX]; 6123 char priority_buf[INT64_HEX_STRING_MAX]; 6124 srp_server_t *server_state = domain->server_state; 6125 6126 if (domain->srpl_opstate != SRPL_OPSTATE_ROUTINE) { 6127 INFO(PUB_S_SRP ": not in routine state", domain->name); 6128 return; 6129 } 6130 6131 TXTRecordCreate(&txt_record, 0, NULL); 6132 6133 int err = TXTRecordSetValue(&txt_record, "dn", strlen(server_state->current_thread_domain_name), domain->name); 6134 if (err != kDNSServiceErr_NoError) { 6135 ERROR("unable to set domain in TXT record for _srpl-tls._tcp to " PRI_S_SRP, domain->name); 6136 goto exit; 6137 } 6138 6139 snprintf(partner_id_buf, sizeof(partner_id_buf), "%" PRIx64, domain->partner_id); 6140 err = TXTRecordSetValue(&txt_record, "pid", strlen(partner_id_buf), partner_id_buf); 6141 if (err != kDNSServiceErr_NoError) { 6142 ERROR("unable to set partner-id in TXT record for _srpl-tls._tcp to " PUB_S_SRP, partner_id_buf); 6143 goto exit; 6144 } 6145 6146 snprintf(dataset_id_buf, sizeof(dataset_id_buf), "%" PRIx64, domain->dataset_id); 6147 err = TXTRecordSetValue(&txt_record, "did", strlen(dataset_id_buf), dataset_id_buf); 6148 if (err != kDNSServiceErr_NoError) { 6149 ERROR("unable to set dataset-id in TXT record for _srpl-tls._tcp to " PUB_S_SRP, dataset_id_buf); 6150 goto exit; 6151 } 6152 6153 snprintf(xpanid_buf, sizeof(xpanid_buf), "%" PRIx64, domain->server_state->xpanid); 6154 err = TXTRecordSetValue(&txt_record, "xpanid", strlen(xpanid_buf), xpanid_buf); 6155 if (err != kDNSServiceErr_NoError) { 6156 ERROR("unable to set xpanid in TXT record for _srpl-tls._tcp to " PUB_S_SRP, dataset_id_buf); 6157 goto exit; 6158 } 6159 6160 snprintf(priority_buf, sizeof(priority_buf), "%" PRIx32, domain->server_state->priority); 6161 err = TXTRecordSetValue(&txt_record, "priority", strlen(priority_buf), priority_buf); 6162 if (err != kDNSServiceErr_NoError) { 6163 ERROR("unable to set priority in TXT record for _srpl-tls._tcp to " PUB_S_SRP, priority_buf); 6164 goto exit; 6165 } 6166 6167 // If there is already a registration, get rid of it 6168 if (domain->srpl_advertise_txn != NULL) { 6169 ioloop_dnssd_txn_cancel(domain->srpl_advertise_txn); 6170 ioloop_dnssd_txn_release(domain->srpl_advertise_txn); 6171 domain->srpl_advertise_txn = NULL; 6172 } 6173 6174 err = DNSServiceRegister(&sdref, kDNSServiceFlagsUnique, 6175 kDNSServiceInterfaceIndexAny, NULL, "_srpl-tls._tcp", NULL, 6176 NULL, htons(853), TXTRecordGetLength(&txt_record), TXTRecordGetBytesPtr(&txt_record), 6177 srpl_register_completion, domain); 6178 if (err != kDNSServiceErr_NoError) { 6179 ERROR("unable to advertise _srpl-tls._tcp service"); 6180 goto exit; 6181 } 6182 domain->srpl_advertise_txn = ioloop_dnssd_txn_add(sdref, NULL, NULL, NULL); 6183 if (domain->srpl_advertise_txn == NULL) { 6184 ERROR("unable to set up a dnssd_txn_t for _srpl-tls._tcp advertisement."); 6185 goto exit; 6186 } 6187 sdref = NULL; // srpl_advertise_txn holds the reference. 6188 INFO(PUB_S_SRP ": successfully advertised", domain->name); 6189 6190 exit: 6191 if (sdref != NULL) { 6192 DNSServiceRefDeallocate(sdref); 6193 } 6194 TXTRecordDeallocate(&txt_record); 6195 return; 6196 } 6197 6198 static void 6199 srpl_thread_network_name_callback(void *NULLABLE context, const char *NULLABLE thread_network_name, cti_status_t status) 6200 { 6201 size_t thread_domain_size; 6202 char domain_buf[kDNSServiceMaxDomainName]; 6203 char *new_thread_domain_name; 6204 srp_server_t *server_state = context; 6205 6206 if (thread_network_name == NULL || status != kCTIStatus_NoError) { 6207 ERROR("unable to get thread network name."); 6208 return; 6209 } 6210 thread_domain_size = snprintf(domain_buf, sizeof(domain_buf), 6211 "%s.%s", thread_network_name, SRP_THREAD_DOMAIN); 6212 if (thread_domain_size < 0 || thread_domain_size >= sizeof (domain_buf) || 6213 (new_thread_domain_name = strdup(domain_buf)) == NULL) 6214 { 6215 ERROR("no memory for new thread network name: " PRI_S_SRP, thread_network_name); 6216 return; 6217 } 6218 6219 if (server_state->current_thread_domain_name != NULL) { 6220 srpl_domain_rename(server_state->current_thread_domain_name, new_thread_domain_name); 6221 } 6222 srpl_domain_add(server_state, new_thread_domain_name); 6223 free(server_state->current_thread_domain_name); 6224 server_state->current_thread_domain_name = new_thread_domain_name; 6225 } 6226 6227 // If the partner does not discover any other SRPL partners to synchronize with, 6228 // or it has synchronized with all the partners discovered so far, it transitions 6229 // out of the "startup" state to the "routine operation" state. 6230 static void 6231 srpl_partner_discovery_timeout(void *context) 6232 { 6233 srpl_domain_t *domain = context; 6234 6235 INFO("partner discovery timeout."); 6236 6237 domain->partner_discovery_pending = false; 6238 if (domain->partner_discovery_timeout != NULL) { 6239 ioloop_cancel_wake_event(domain->partner_discovery_timeout); 6240 ioloop_wakeup_release(domain->partner_discovery_timeout); 6241 domain->partner_discovery_timeout = NULL; 6242 } 6243 srpl_maybe_sync_or_transition(domain); 6244 } 6245 6246 // count how many partners are advertising the proposed dataset id 6247 static int 6248 srpl_active_winning_partners(srpl_domain_t *domain, int *rank) 6249 { 6250 int num_winners = 0; 6251 int my_rank = 0; 6252 if (domain->have_dataset_id) { 6253 uint64_t proposed_dataset_id = domain->dataset_id; 6254 uint32_t my_priority = domain->server_state->priority; 6255 // winning partners are those with higher priority, or same priority but 6256 // lower partner id, and advertising the proposed dataset id. 6257 for (srpl_instance_t *instance = domain->instances; instance != NULL; instance = instance->next) { 6258 if (instance->connection != NULL && 6259 instance->connection->state > srpl_state_session_evaluate && 6260 instance->dataset_id == proposed_dataset_id && 6261 instance->priority >= my_priority) 6262 { 6263 num_winners++; 6264 if (instance->priority > my_priority || (instance->priority == my_priority && 6265 instance->partner_id < domain->partner_id)) 6266 { 6267 my_rank++; 6268 } 6269 } 6270 } 6271 } 6272 *rank = my_rank; 6273 return num_winners; 6274 } 6275 6276 static void 6277 srpl_sync_with_instance(srpl_instance_t *instance) 6278 { 6279 INFO("sync with " PRI_S_SRP " with dataset_id %" PRIx64, instance->instance_name, instance->dataset_id); 6280 if (instance->discovered_in_window) { 6281 instance->sync_to_join = true; 6282 } 6283 instance->sync_fail = false; 6284 srpl_event_t event; 6285 srpl_event_initialize(&event, srpl_event_do_sync); 6286 srpl_event_deliver(instance->connection, &event); 6287 } 6288 6289 // We check how many active srp servers we discovered. If less than 5, we first 6290 // check if we are ready to enter the routine state. If there are still srp servers 6291 // that we haven't started wo sync with, we do so. 6292 static void 6293 srpl_maybe_sync_or_transition(srpl_domain_t *domain) 6294 { 6295 int num_winners = 0; 6296 int rank = 0; 6297 // if we haven't committed a dataset id, here we check if we 6298 // should propose a new one. we propose a new dataset id if 6299 // sync with the instances of the proposed dataset id all fail, or 6300 // there's no partner advertising the proposed dataset id. 6301 if (!domain->dataset_id_committed) { 6302 srpl_maybe_propose_new_dataset_id(domain); 6303 } 6304 num_winners = srpl_active_winning_partners(domain, &rank); 6305 6306 if (num_winners < MAX_ANYCAST_NUM) { 6307 INFO("%d other srp servers are advertising.", num_winners); 6308 for (srpl_instance_t *instance = domain->instances; instance != NULL; instance = instance->next) { 6309 // we sync with the instances with the same dataset id 6310 if (instance->connection != NULL && 6311 instance->connection->state == srpl_state_sync_wait && 6312 srpl_dataset_id_compare(instance->dataset_id, domain->dataset_id) >= 0) 6313 { 6314 srpl_sync_with_instance(instance); 6315 } 6316 } 6317 if (domain->srpl_opstate != SRPL_OPSTATE_ROUTINE && 6318 srpl_can_transition_to_routine_state(domain)) 6319 { 6320 srpl_transition_to_routine_state(domain); 6321 } 6322 } else { 6323 // MAX_ANYCAST_NUM or more better qualified servers are advertising. if 6324 // we are in the routine state and advertising, we should withdraw and 6325 // reenter the startup state. 6326 INFO("%d other srp servers are advertising, rank = %d.", num_winners, rank); 6327 if (domain->srpl_opstate == SRPL_OPSTATE_ROUTINE && 6328 rank >= MAX_ANYCAST_NUM) 6329 { 6330 INFO("transition to startup state."); 6331 srpl_transition_to_startup_state(domain); 6332 } 6333 } 6334 } 6335 6336 static void 6337 srpl_transition_to_startup_state(srpl_domain_t *domain) 6338 { 6339 srp_server_t *server_state = domain->server_state; 6340 6341 // stop advertising the domain. 6342 srpl_stop_domain_advertisement(domain); 6343 // move to "startup" state. 6344 domain->srpl_opstate = SRPL_OPSTATE_STARTUP; 6345 6346 if (server_state == NULL) { 6347 ERROR("server state is NULL."); 6348 return; 6349 } 6350 6351 #if STUB_ROUTER 6352 route_state_t *route_state = NULL; 6353 route_state = server_state->route_state; 6354 if (route_state != NULL && !strcmp(domain->name, server_state->current_thread_domain_name)) { 6355 route_state->partition_can_advertise_anycast_service = false; 6356 partition_stop_advertising_anycast_service(route_state, route_state->thread_sequence_number); 6357 } 6358 #endif 6359 if (domain->partner_discovery_timeout == NULL) { 6360 domain->partner_discovery_timeout = ioloop_wakeup_create(); 6361 } 6362 if (domain->partner_discovery_timeout) { 6363 ioloop_add_wake_event(domain->partner_discovery_timeout, domain, 6364 srpl_partner_discovery_timeout, srpl_domain_context_release, 6365 MIN_PARTNER_DISCOVERY_INTERVAL + 6366 srp_random16() % PARTNER_DISCOVERY_INTERVAL_RANGE); 6367 RETAIN_HERE(domain, srpl_domain); 6368 } else { 6369 ERROR("unable to add wakeup event for partner discovery."); 6370 return; 6371 } 6372 domain->partner_discovery_pending = true; 6373 } 6374 6375 void 6376 srpl_startup(srp_server_t *server_state) 6377 { 6378 cti_get_thread_network_name(server_state, srpl_thread_network_name_callback, NULL); 6379 } 6380 #endif // SRP_FEATURE_REPLICATION 6381 6382 // Local Variables: 6383 // mode: C 6384 // tab-width: 4 6385 // c-file-style: "bsd" 6386 // c-basic-offset: 4 6387 // fill-column: 120 6388 // indent-tabs-mode: nil 6389 // End: 6390