journal.c revision 1.15 1 /* $NetBSD: journal.c,v 1.15 2026/01/29 18:37:49 christos Exp $ */
2
3 /*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16 #include <errno.h>
17 #include <inttypes.h>
18 #include <stdbool.h>
19 #include <stdlib.h>
20 #include <unistd.h>
21
22 #include <isc/dir.h>
23 #include <isc/file.h>
24 #include <isc/mem.h>
25 #include <isc/overflow.h>
26 #include <isc/result.h>
27 #include <isc/serial.h>
28 #include <isc/stdio.h>
29 #include <isc/string.h>
30 #include <isc/util.h>
31
32 #include <dns/compress.h>
33 #include <dns/db.h>
34 #include <dns/dbiterator.h>
35 #include <dns/diff.h>
36 #include <dns/fixedname.h>
37 #include <dns/journal.h>
38 #include <dns/log.h>
39 #include <dns/rdataset.h>
40 #include <dns/rdatasetiter.h>
41 #include <dns/soa.h>
42
43 /*! \file
44 * \brief Journaling.
45 *
46 * A journal file consists of
47 *
48 * \li A fixed-size header of type journal_rawheader_t.
49 *
50 * \li The index. This is an unordered array of index entries
51 * of type journal_rawpos_t giving the locations
52 * of some arbitrary subset of the journal's addressable
53 * transactions. The index entries are used as hints to
54 * speed up the process of locating a transaction with a given
55 * serial number. Unused index entries have an "offset"
56 * field of zero. The size of the index can vary between
57 * journal files, but does not change during the lifetime
58 * of a file. The size can be zero.
59 *
60 * \li The journal data. This consists of one or more transactions.
61 * Each transaction begins with a transaction header of type
62 * journal_rawxhdr_t. The transaction header is followed by a
63 * sequence of RRs, similar in structure to an IXFR difference
64 * sequence (RFC1995). That is, the pre-transaction SOA,
65 * zero or more other deleted RRs, the post-transaction SOA,
66 * and zero or more other added RRs. Unlike in IXFR, each RR
67 * is prefixed with a 32-bit length.
68 *
69 * The journal data part grows as new transactions are
70 * appended to the file. Only those transactions
71 * whose serial number is current-(2^31-1) to current
72 * are considered "addressable" and may be pointed
73 * to from the header or index. They may be preceded
74 * by old transactions that are no longer addressable,
75 * and they may be followed by transactions that were
76 * appended to the journal but never committed by updating
77 * the "end" position in the header. The latter will
78 * be overwritten when new transactions are added.
79 */
80
81 /**************************************************************************/
82 /*
83 * Miscellaneous utilities.
84 */
85
86 #define JOURNAL_COMMON_LOGARGS \
87 dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL
88
89 #define JOURNAL_DEBUG_LOGARGS(n) JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n)
90
91 #define JOURNAL_SERIALSET 0x01U
92
93 static isc_result_t
94 index_to_disk(dns_journal_t *);
95
96 static uint32_t
97 decode_uint32(unsigned char *p) {
98 return ((uint32_t)p[0] << 24) + ((uint32_t)p[1] << 16) +
99 ((uint32_t)p[2] << 8) + ((uint32_t)p[3] << 0);
100 }
101
102 static void
103 encode_uint32(uint32_t val, unsigned char *p) {
104 p[0] = (uint8_t)(val >> 24);
105 p[1] = (uint8_t)(val >> 16);
106 p[2] = (uint8_t)(val >> 8);
107 p[3] = (uint8_t)(val >> 0);
108 }
109
110 isc_result_t
111 dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx,
112 dns_diffop_t op, dns_difftuple_t **tp) {
113 isc_result_t result;
114 dns_dbnode_t *node;
115 dns_rdataset_t rdataset;
116 dns_rdata_t rdata = DNS_RDATA_INIT;
117 dns_fixedname_t fixed;
118 dns_name_t *zonename;
119
120 zonename = dns_fixedname_initname(&fixed);
121 dns_name_copy(dns_db_origin(db), zonename);
122
123 node = NULL;
124 result = dns_db_findnode(db, zonename, false, &node);
125 if (result != ISC_R_SUCCESS) {
126 goto nonode;
127 }
128
129 dns_rdataset_init(&rdataset);
130 result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0,
131 (isc_stdtime_t)0, &rdataset, NULL);
132 if (result != ISC_R_SUCCESS) {
133 goto freenode;
134 }
135
136 result = dns_rdataset_first(&rdataset);
137 if (result != ISC_R_SUCCESS) {
138 goto freenode;
139 }
140
141 dns_rdataset_current(&rdataset, &rdata);
142 dns_rdataset_getownercase(&rdataset, zonename);
143
144 result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl, &rdata,
145 tp);
146
147 dns_rdataset_disassociate(&rdataset);
148 dns_db_detachnode(db, &node);
149 return result;
150
151 freenode:
152 dns_db_detachnode(db, &node);
153 nonode:
154 UNEXPECTED_ERROR("missing SOA");
155 return result;
156 }
157
158 /* Journaling */
159
160 /*%
161 * On-disk representation of a "pointer" to a journal entry.
162 * These are used in the journal header to locate the beginning
163 * and end of the journal, and in the journal index to locate
164 * other transactions.
165 */
166 typedef struct {
167 unsigned char serial[4]; /*%< SOA serial before update. */
168 /*
169 * XXXRTH Should offset be 8 bytes?
170 * XXXDCL ... probably, since off_t is 8 bytes on many OSs.
171 * XXXAG ... but we will not be able to seek >2G anyway on many
172 * platforms as long as we are using fseek() rather
173 * than lseek().
174 */
175 unsigned char offset[4]; /*%< Offset from beginning of file. */
176 } journal_rawpos_t;
177
178 /*%
179 * The header is of a fixed size, with some spare room for future
180 * extensions.
181 */
182 #define JOURNAL_HEADER_SIZE 64 /* Bytes. */
183
184 typedef enum {
185 XHDR_VERSION1 = 1,
186 XHDR_VERSION2 = 2,
187 } xhdr_version_t;
188
189 /*%
190 * The on-disk representation of the journal header.
191 * All numbers are stored in big-endian order.
192 */
193 typedef union {
194 struct {
195 /*% File format version ID. */
196 unsigned char format[16];
197 /*% Position of the first addressable transaction */
198 journal_rawpos_t begin;
199 /*% Position of the next (yet nonexistent) transaction. */
200 journal_rawpos_t end;
201 /*% Number of index entries following the header. */
202 unsigned char index_size[4];
203 /*% Source serial number. */
204 unsigned char sourceserial[4];
205 unsigned char flags;
206 } h;
207 /* Pad the header to a fixed size. */
208 unsigned char pad[JOURNAL_HEADER_SIZE];
209 } journal_rawheader_t;
210
211 /*%
212 * The on-disk representation of the transaction header, version 2.
213 * There is one of these at the beginning of each transaction.
214 */
215 typedef struct {
216 unsigned char size[4]; /*%< In bytes, excluding header. */
217 unsigned char count[4]; /*%< Number of records in transaction */
218 unsigned char serial0[4]; /*%< SOA serial before update. */
219 unsigned char serial1[4]; /*%< SOA serial after update. */
220 } journal_rawxhdr_t;
221
222 /*%
223 * Old-style raw transaction header, version 1, used for backward
224 * compatibility mode.
225 */
226 typedef struct {
227 unsigned char size[4];
228 unsigned char serial0[4];
229 unsigned char serial1[4];
230 } journal_rawxhdr_ver1_t;
231
232 /*%
233 * The on-disk representation of the RR header.
234 * There is one of these at the beginning of each RR.
235 */
236 typedef struct {
237 unsigned char size[4]; /*%< In bytes, excluding header. */
238 } journal_rawrrhdr_t;
239
240 /*%
241 * The in-core representation of the journal header.
242 */
243 typedef struct {
244 uint32_t serial;
245 off_t offset;
246 } journal_pos_t;
247
248 #define POS_VALID(pos) ((pos).offset != 0)
249 #define POS_INVALIDATE(pos) ((pos).offset = 0, (pos).serial = 0)
250
251 typedef struct {
252 unsigned char format[16];
253 journal_pos_t begin;
254 journal_pos_t end;
255 uint32_t index_size;
256 uint32_t sourceserial;
257 bool serialset;
258 } journal_header_t;
259
260 /*%
261 * The in-core representation of the transaction header.
262 */
263 typedef struct {
264 uint32_t size;
265 uint32_t count;
266 uint32_t serial0;
267 uint32_t serial1;
268 } journal_xhdr_t;
269
270 /*%
271 * The in-core representation of the RR header.
272 */
273 typedef struct {
274 uint32_t size;
275 } journal_rrhdr_t;
276
277 /*%
278 * Initial contents to store in the header of a newly created
279 * journal file.
280 *
281 * The header starts with the magic string ";BIND LOG V9.2\n"
282 * to identify the file as a BIND 9 journal file. An ASCII
283 * identification string is used rather than a binary magic
284 * number to be consistent with BIND 8 (BIND 8 journal files
285 * are ASCII text files).
286 */
287
288 static journal_header_t journal_header_ver1 = {
289 ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0, 0, 0
290 };
291 static journal_header_t initial_journal_header = {
292 ";BIND LOG V9.2\n", { 0, 0 }, { 0, 0 }, 0, 0, 0
293 };
294
295 #define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset)
296
297 typedef enum {
298 JOURNAL_STATE_INVALID,
299 JOURNAL_STATE_READ,
300 JOURNAL_STATE_WRITE,
301 JOURNAL_STATE_TRANSACTION,
302 JOURNAL_STATE_INLINE
303 } journal_state_t;
304
305 struct dns_journal {
306 unsigned int magic; /*%< JOUR */
307 isc_mem_t *mctx; /*%< Memory context */
308 journal_state_t state;
309 xhdr_version_t xhdr_version; /*%< Expected transaction header version */
310 bool header_ver1; /*%< Transaction header compatibility
311 * mode is allowed */
312 bool recovered; /*%< A recoverable error was found
313 * while reading the journal */
314 char *filename; /*%< Journal file name */
315 FILE *fp; /*%< File handle */
316 off_t offset; /*%< Current file offset */
317 journal_xhdr_t curxhdr; /*%< Current transaction header */
318 journal_header_t header; /*%< In-core journal header */
319 unsigned char *rawindex; /*%< In-core buffer for journal index
320 * in on-disk format */
321 journal_pos_t *index; /*%< In-core journal index */
322
323 /*% Current transaction state (when writing). */
324 struct {
325 unsigned int n_soa; /*%< Number of SOAs seen */
326 unsigned int n_rr; /*%< Number of RRs to write */
327 journal_pos_t pos[2]; /*%< Begin/end position */
328 } x;
329
330 /*% Iteration state (when reading). */
331 struct {
332 /* These define the part of the journal we iterate over. */
333 journal_pos_t bpos; /*%< Position before first, */
334 journal_pos_t cpos; /*%< before current, */
335 journal_pos_t epos; /*%< and after last transaction */
336 /* The rest is iterator state. */
337 uint32_t current_serial; /*%< Current SOA serial */
338 isc_buffer_t source; /*%< Data from disk */
339 isc_buffer_t target; /*%< Data from _fromwire check */
340 dns_decompress_t dctx; /*%< Dummy decompression ctx */
341 dns_name_t name; /*%< Current domain name */
342 dns_rdata_t rdata; /*%< Current rdata */
343 uint32_t ttl; /*%< Current TTL */
344 unsigned int xsize; /*%< Size of transaction data */
345 unsigned int xpos; /*%< Current position in it */
346 isc_result_t result; /*%< Result of last call */
347 } it;
348 };
349
350 #define DNS_JOURNAL_MAGIC ISC_MAGIC('J', 'O', 'U', 'R')
351 #define DNS_JOURNAL_VALID(t) ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC)
352
353 static void
354 journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) {
355 cooked->serial = decode_uint32(raw->serial);
356 cooked->offset = decode_uint32(raw->offset);
357 }
358
359 static void
360 journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) {
361 encode_uint32(cooked->serial, raw->serial);
362 encode_uint32(cooked->offset, raw->offset);
363 }
364
365 static void
366 journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) {
367 INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
368
369 memmove(cooked->format, raw->h.format, sizeof(cooked->format));
370 journal_pos_decode(&raw->h.begin, &cooked->begin);
371 journal_pos_decode(&raw->h.end, &cooked->end);
372 cooked->index_size = decode_uint32(raw->h.index_size);
373 cooked->sourceserial = decode_uint32(raw->h.sourceserial);
374 cooked->serialset = ((raw->h.flags & JOURNAL_SERIALSET) != 0);
375 }
376
377 static void
378 journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) {
379 unsigned char flags = 0;
380
381 INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
382
383 memset(raw->pad, 0, sizeof(raw->pad));
384 memmove(raw->h.format, cooked->format, sizeof(raw->h.format));
385 journal_pos_encode(&raw->h.begin, &cooked->begin);
386 journal_pos_encode(&raw->h.end, &cooked->end);
387 encode_uint32(cooked->index_size, raw->h.index_size);
388 encode_uint32(cooked->sourceserial, raw->h.sourceserial);
389 if (cooked->serialset) {
390 flags |= JOURNAL_SERIALSET;
391 }
392 raw->h.flags = flags;
393 }
394
395 /*
396 * Journal file I/O subroutines, with error checking and reporting.
397 */
398 static isc_result_t
399 journal_seek(dns_journal_t *j, uint32_t offset) {
400 isc_result_t result;
401
402 result = isc_stdio_seek(j->fp, (off_t)offset, SEEK_SET);
403 if (result != ISC_R_SUCCESS) {
404 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
405 "%s: seek: %s", j->filename,
406 isc_result_totext(result));
407 return ISC_R_UNEXPECTED;
408 }
409 j->offset = offset;
410 return ISC_R_SUCCESS;
411 }
412
413 static isc_result_t
414 journal_read(dns_journal_t *j, void *mem, size_t nbytes) {
415 isc_result_t result;
416
417 result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL);
418 if (result != ISC_R_SUCCESS) {
419 if (result == ISC_R_EOF) {
420 return ISC_R_NOMORE;
421 }
422 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
423 "%s: read: %s", j->filename,
424 isc_result_totext(result));
425 return ISC_R_UNEXPECTED;
426 }
427 j->offset += (off_t)nbytes;
428 return ISC_R_SUCCESS;
429 }
430
431 static isc_result_t
432 journal_write(dns_journal_t *j, void *mem, size_t nbytes) {
433 isc_result_t result;
434
435 result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL);
436 if (result != ISC_R_SUCCESS) {
437 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
438 "%s: write: %s", j->filename,
439 isc_result_totext(result));
440 return ISC_R_UNEXPECTED;
441 }
442 j->offset += (off_t)nbytes;
443 return ISC_R_SUCCESS;
444 }
445
446 static isc_result_t
447 journal_fsync(dns_journal_t *j) {
448 isc_result_t result;
449
450 result = isc_stdio_flush(j->fp);
451 if (result != ISC_R_SUCCESS) {
452 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
453 "%s: flush: %s", j->filename,
454 isc_result_totext(result));
455 return ISC_R_UNEXPECTED;
456 }
457 result = isc_stdio_sync(j->fp);
458 if (result != ISC_R_SUCCESS) {
459 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
460 "%s: fsync: %s", j->filename,
461 isc_result_totext(result));
462 return ISC_R_UNEXPECTED;
463 }
464 return ISC_R_SUCCESS;
465 }
466
467 /*
468 * Read/write a transaction header at the current file position.
469 */
470 static isc_result_t
471 journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) {
472 isc_result_t result;
473
474 j->it.cpos.offset = j->offset;
475
476 switch (j->xhdr_version) {
477 case XHDR_VERSION1: {
478 journal_rawxhdr_ver1_t raw;
479 result = journal_read(j, &raw, sizeof(raw));
480 if (result != ISC_R_SUCCESS) {
481 return result;
482 }
483 xhdr->size = decode_uint32(raw.size);
484 xhdr->count = 0;
485 xhdr->serial0 = decode_uint32(raw.serial0);
486 xhdr->serial1 = decode_uint32(raw.serial1);
487 j->curxhdr = *xhdr;
488 return ISC_R_SUCCESS;
489 }
490
491 case XHDR_VERSION2: {
492 journal_rawxhdr_t raw;
493 result = journal_read(j, &raw, sizeof(raw));
494 if (result != ISC_R_SUCCESS) {
495 return result;
496 }
497 xhdr->size = decode_uint32(raw.size);
498 xhdr->count = decode_uint32(raw.count);
499 xhdr->serial0 = decode_uint32(raw.serial0);
500 xhdr->serial1 = decode_uint32(raw.serial1);
501 j->curxhdr = *xhdr;
502 return ISC_R_SUCCESS;
503 }
504
505 default:
506 return ISC_R_NOTIMPLEMENTED;
507 }
508 }
509
510 static isc_result_t
511 journal_write_xhdr(dns_journal_t *j, uint32_t size, uint32_t count,
512 uint32_t serial0, uint32_t serial1) {
513 if (j->header_ver1) {
514 journal_rawxhdr_ver1_t raw;
515 encode_uint32(size, raw.size);
516 encode_uint32(serial0, raw.serial0);
517 encode_uint32(serial1, raw.serial1);
518 return journal_write(j, &raw, sizeof(raw));
519 } else {
520 journal_rawxhdr_t raw;
521 encode_uint32(size, raw.size);
522 encode_uint32(count, raw.count);
523 encode_uint32(serial0, raw.serial0);
524 encode_uint32(serial1, raw.serial1);
525 return journal_write(j, &raw, sizeof(raw));
526 }
527 }
528
529 /*
530 * Read an RR header at the current file position.
531 */
532
533 static isc_result_t
534 journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) {
535 journal_rawrrhdr_t raw;
536 isc_result_t result;
537
538 result = journal_read(j, &raw, sizeof(raw));
539 if (result != ISC_R_SUCCESS) {
540 return result;
541 }
542 rrhdr->size = decode_uint32(raw.size);
543 return ISC_R_SUCCESS;
544 }
545
546 static isc_result_t
547 journal_file_create(isc_mem_t *mctx, bool downgrade, const char *filename) {
548 FILE *fp = NULL;
549 isc_result_t result;
550 journal_header_t header;
551 journal_rawheader_t rawheader;
552 int index_size = 56; /* XXX configurable */
553 int size;
554 void *mem = NULL; /* Memory for temporary index image. */
555
556 INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE);
557
558 result = isc_stdio_open(filename, "wb", &fp);
559 if (result != ISC_R_SUCCESS) {
560 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
561 "%s: create: %s", filename,
562 isc_result_totext(result));
563 return ISC_R_UNEXPECTED;
564 }
565
566 if (downgrade) {
567 header = journal_header_ver1;
568 } else {
569 header = initial_journal_header;
570 }
571 header.index_size = index_size;
572 journal_header_encode(&header, &rawheader);
573
574 size = sizeof(journal_rawheader_t) +
575 ISC_CHECKED_MUL(index_size, sizeof(journal_rawpos_t));
576
577 mem = isc_mem_cget(mctx, 1, size);
578 memmove(mem, &rawheader, sizeof(rawheader));
579
580 result = isc_stdio_write(mem, 1, (size_t)size, fp, NULL);
581 if (result != ISC_R_SUCCESS) {
582 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
583 "%s: write: %s", filename,
584 isc_result_totext(result));
585 (void)isc_stdio_close(fp);
586 (void)isc_file_remove(filename);
587 isc_mem_put(mctx, mem, size);
588 return ISC_R_UNEXPECTED;
589 }
590 isc_mem_put(mctx, mem, size);
591
592 result = isc_stdio_close(fp);
593 if (result != ISC_R_SUCCESS) {
594 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
595 "%s: close: %s", filename,
596 isc_result_totext(result));
597 (void)isc_file_remove(filename);
598 return ISC_R_UNEXPECTED;
599 }
600
601 return ISC_R_SUCCESS;
602 }
603
604 static isc_result_t
605 journal_open(isc_mem_t *mctx, const char *filename, bool writable, bool create,
606 bool downgrade, dns_journal_t **journalp) {
607 FILE *fp = NULL;
608 isc_result_t result;
609 journal_rawheader_t rawheader;
610 dns_journal_t *j;
611
612 REQUIRE(journalp != NULL && *journalp == NULL);
613
614 j = isc_mem_get(mctx, sizeof(*j));
615 *j = (dns_journal_t){ .state = JOURNAL_STATE_INVALID,
616 .filename = isc_mem_strdup(mctx, filename),
617 .xhdr_version = XHDR_VERSION2 };
618 isc_mem_attach(mctx, &j->mctx);
619
620 result = isc_stdio_open(j->filename, writable ? "rb+" : "rb", &fp);
621 if (result == ISC_R_FILENOTFOUND) {
622 if (create) {
623 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(1),
624 "journal file %s does not exist, "
625 "creating it",
626 j->filename);
627 CHECK(journal_file_create(mctx, downgrade, filename));
628 /*
629 * Retry.
630 */
631 result = isc_stdio_open(j->filename, "rb+", &fp);
632 } else {
633 CHECK(ISC_R_NOTFOUND);
634 }
635 }
636 if (result != ISC_R_SUCCESS) {
637 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
638 "%s: open: %s", j->filename,
639 isc_result_totext(result));
640 CHECK(ISC_R_UNEXPECTED);
641 }
642
643 j->fp = fp;
644
645 /*
646 * Set magic early so that seek/read can succeed.
647 */
648 j->magic = DNS_JOURNAL_MAGIC;
649
650 CHECK(journal_seek(j, 0));
651 CHECK(journal_read(j, &rawheader, sizeof(rawheader)));
652
653 if (memcmp(rawheader.h.format, journal_header_ver1.format,
654 sizeof(journal_header_ver1.format)) == 0)
655 {
656 /*
657 * The file header says it's the old format, but it
658 * still might have the new xhdr format because we
659 * forgot to change the format string when we introduced
660 * the new xhdr. When we first try to read it, we assume
661 * it uses the new xhdr format. If that fails, we'll be
662 * called a second time with compat set to true, in which
663 * case we can lower xhdr_version to 1 if we find a
664 * corrupt transaction.
665 */
666 j->header_ver1 = true;
667 } else if (memcmp(rawheader.h.format, initial_journal_header.format,
668 sizeof(initial_journal_header.format)) == 0)
669 {
670 /*
671 * File header says this is format version 2; all
672 * transactions have to match.
673 */
674 j->header_ver1 = false;
675 } else {
676 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
677 "%s: journal format not recognized", j->filename);
678 CHECK(ISC_R_UNEXPECTED);
679 }
680 journal_header_decode(&rawheader, &j->header);
681
682 /*
683 * If there is an index, read the raw index into a dynamically
684 * allocated buffer and then convert it into a cooked index.
685 */
686 if (j->header.index_size != 0) {
687 unsigned int i;
688 unsigned int rawbytes;
689 unsigned char *p;
690
691 rawbytes = ISC_CHECKED_MUL(j->header.index_size,
692 sizeof(journal_rawpos_t));
693 j->rawindex = isc_mem_get(mctx, rawbytes);
694
695 CHECK(journal_read(j, j->rawindex, rawbytes));
696
697 j->index = isc_mem_cget(mctx, j->header.index_size,
698 sizeof(journal_pos_t));
699
700 p = j->rawindex;
701 for (i = 0; i < j->header.index_size; i++) {
702 j->index[i].serial = decode_uint32(p);
703 p += 4;
704 j->index[i].offset = decode_uint32(p);
705 p += 4;
706 }
707 INSIST(p == j->rawindex + rawbytes);
708 }
709 j->offset = -1; /* Invalid, must seek explicitly. */
710
711 /*
712 * Initialize the iterator.
713 */
714 dns_name_init(&j->it.name, NULL);
715 dns_rdata_init(&j->it.rdata);
716
717 /*
718 * Set up empty initial buffers for unchecked and checked
719 * wire format RR data. They will be reallocated
720 * later.
721 */
722 isc_buffer_init(&j->it.source, NULL, 0);
723 isc_buffer_init(&j->it.target, NULL, 0);
724 j->it.dctx = DNS_DECOMPRESS_NEVER;
725
726 j->state = writable ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ;
727
728 *journalp = j;
729 return ISC_R_SUCCESS;
730
731 cleanup:
732 j->magic = 0;
733 if (j->rawindex != NULL) {
734 isc_mem_cput(j->mctx, j->rawindex, j->header.index_size,
735 sizeof(journal_rawpos_t));
736 }
737 if (j->index != NULL) {
738 isc_mem_cput(j->mctx, j->index, j->header.index_size,
739 sizeof(journal_pos_t));
740 }
741 isc_mem_free(j->mctx, j->filename);
742 if (j->fp != NULL) {
743 (void)isc_stdio_close(j->fp);
744 }
745 isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
746 return result;
747 }
748
749 isc_result_t
750 dns_journal_open(isc_mem_t *mctx, const char *filename, unsigned int mode,
751 dns_journal_t **journalp) {
752 isc_result_t result;
753 size_t namelen;
754 char backup[1024];
755 bool writable, create;
756
757 create = ((mode & DNS_JOURNAL_CREATE) != 0);
758 writable = ((mode & (DNS_JOURNAL_WRITE | DNS_JOURNAL_CREATE)) != 0);
759
760 result = journal_open(mctx, filename, writable, create, false,
761 journalp);
762 if (result == ISC_R_NOTFOUND) {
763 namelen = strlen(filename);
764 if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0)
765 {
766 namelen -= 4;
767 }
768
769 result = snprintf(backup, sizeof(backup), "%.*s.jbk",
770 (int)namelen, filename);
771 if (result >= sizeof(backup)) {
772 return ISC_R_NOSPACE;
773 }
774 result = journal_open(mctx, backup, writable, writable, false,
775 journalp);
776 }
777 return result;
778 }
779
780 /*
781 * A comparison function defining the sorting order for
782 * entries in the IXFR-style journal file.
783 *
784 * The IXFR format requires that deletions are sorted before
785 * additions, and within either one, SOA records are sorted
786 * before others.
787 *
788 * Also sort the non-SOA records by type as a courtesy to the
789 * server receiving the IXFR - it may help reduce the amount of
790 * rdataset merging it has to do.
791 */
792 static int
793 ixfr_order(const void *av, const void *bv) {
794 dns_difftuple_t const *const *ap = av;
795 dns_difftuple_t const *const *bp = bv;
796 dns_difftuple_t const *a = *ap;
797 dns_difftuple_t const *b = *bp;
798 int r;
799 int bop = 0, aop = 0;
800
801 switch (a->op) {
802 case DNS_DIFFOP_DEL:
803 case DNS_DIFFOP_DELRESIGN:
804 aop = 1;
805 break;
806 case DNS_DIFFOP_ADD:
807 case DNS_DIFFOP_ADDRESIGN:
808 aop = 0;
809 break;
810 default:
811 UNREACHABLE();
812 }
813
814 switch (b->op) {
815 case DNS_DIFFOP_DEL:
816 case DNS_DIFFOP_DELRESIGN:
817 bop = 1;
818 break;
819 case DNS_DIFFOP_ADD:
820 case DNS_DIFFOP_ADDRESIGN:
821 bop = 0;
822 break;
823 default:
824 UNREACHABLE();
825 }
826
827 r = bop - aop;
828 if (r != 0) {
829 return r;
830 }
831
832 r = (b->rdata.type == dns_rdatatype_soa) -
833 (a->rdata.type == dns_rdatatype_soa);
834 if (r != 0) {
835 return r;
836 }
837
838 r = (a->rdata.type - b->rdata.type);
839 return r;
840 }
841
842 static isc_result_t
843 maybe_fixup_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr, uint32_t serial,
844 off_t offset) {
845 isc_result_t result = ISC_R_SUCCESS;
846
847 /*
848 * Handle mixture of version 1 and version 2
849 * transaction headers in a version 1 journal.
850 */
851 if (xhdr->serial0 != serial ||
852 isc_serial_le(xhdr->serial1, xhdr->serial0))
853 {
854 if (j->xhdr_version == XHDR_VERSION1 && xhdr->serial1 == serial)
855 {
856 isc_log_write(
857 JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
858 "%s: XHDR_VERSION1 -> XHDR_VERSION2 at %u",
859 j->filename, serial);
860 j->xhdr_version = XHDR_VERSION2;
861 CHECK(journal_seek(j, offset));
862 CHECK(journal_read_xhdr(j, xhdr));
863 j->recovered = true;
864 } else if (j->xhdr_version == XHDR_VERSION2 &&
865 xhdr->count == serial)
866 {
867 isc_log_write(
868 JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
869 "%s: XHDR_VERSION2 -> XHDR_VERSION1 at %u",
870 j->filename, serial);
871 j->xhdr_version = XHDR_VERSION1;
872 CHECK(journal_seek(j, offset));
873 CHECK(journal_read_xhdr(j, xhdr));
874 j->recovered = true;
875 }
876 }
877
878 /*
879 * Handle <size, serial0, serial1, 0> transaction header.
880 */
881 if (j->xhdr_version == XHDR_VERSION1) {
882 uint32_t value;
883
884 CHECK(journal_read(j, &value, sizeof(value)));
885 if (value != 0L) {
886 CHECK(journal_seek(j, offset + 12));
887 } else {
888 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
889 "%s: XHDR_VERSION1 count zero at %u",
890 j->filename, serial);
891 j->xhdr_version = XHDR_VERSION2;
892 j->recovered = true;
893 }
894 } else if (j->xhdr_version == XHDR_VERSION2 && xhdr->count == serial &&
895 xhdr->serial1 == 0U &&
896 isc_serial_gt(xhdr->serial0, xhdr->count))
897 {
898 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
899 "%s: XHDR_VERSION2 count zero at %u", j->filename,
900 serial);
901 xhdr->serial1 = xhdr->serial0;
902 xhdr->serial0 = xhdr->count;
903 xhdr->count = 0;
904 j->recovered = true;
905 }
906
907 cleanup:
908 return result;
909 }
910
911 /*
912 * Advance '*pos' to the next journal transaction.
913 *
914 * Requires:
915 * *pos refers to a valid journal transaction.
916 *
917 * Ensures:
918 * When ISC_R_SUCCESS is returned,
919 * *pos refers to the next journal transaction.
920 *
921 * Returns one of:
922 *
923 * ISC_R_SUCCESS
924 * ISC_R_NOMORE *pos pointed at the last transaction
925 * Other results due to file errors are possible.
926 */
927 static isc_result_t
928 journal_next(dns_journal_t *j, journal_pos_t *pos) {
929 isc_result_t result;
930 journal_xhdr_t xhdr;
931 size_t hdrsize;
932
933 REQUIRE(DNS_JOURNAL_VALID(j));
934
935 result = journal_seek(j, pos->offset);
936 if (result != ISC_R_SUCCESS) {
937 return result;
938 }
939
940 if (pos->serial == j->header.end.serial) {
941 return ISC_R_NOMORE;
942 }
943
944 /*
945 * Read the header of the current transaction.
946 * This will return ISC_R_NOMORE if we are at EOF.
947 */
948 result = journal_read_xhdr(j, &xhdr);
949 if (result != ISC_R_SUCCESS) {
950 return result;
951 }
952
953 if (j->header_ver1) {
954 CHECK(maybe_fixup_xhdr(j, &xhdr, pos->serial, pos->offset));
955 }
956
957 /*
958 * Check serial number consistency.
959 */
960 if (xhdr.serial0 != pos->serial ||
961 isc_serial_le(xhdr.serial1, xhdr.serial0))
962 {
963 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
964 "%s: journal file corrupt: "
965 "expected serial %u, got %u",
966 j->filename, pos->serial, xhdr.serial0);
967 return ISC_R_UNEXPECTED;
968 }
969
970 /*
971 * Check for offset wraparound.
972 */
973 hdrsize = (j->xhdr_version == XHDR_VERSION2)
974 ? sizeof(journal_rawxhdr_t)
975 : sizeof(journal_rawxhdr_ver1_t);
976
977 if ((off_t)(pos->offset + hdrsize + xhdr.size) < pos->offset) {
978 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
979 "%s: offset too large", j->filename);
980 return ISC_R_UNEXPECTED;
981 }
982
983 pos->offset += hdrsize + xhdr.size;
984 pos->serial = xhdr.serial1;
985 return ISC_R_SUCCESS;
986
987 cleanup:
988 return result;
989 }
990
991 /*
992 * If the index of the journal 'j' contains an entry "better"
993 * than '*best_guess', replace '*best_guess' with it.
994 *
995 * "Better" means having a serial number closer to 'serial'
996 * but not greater than 'serial'.
997 */
998 static void
999 index_find(dns_journal_t *j, uint32_t serial, journal_pos_t *best_guess) {
1000 unsigned int i;
1001 if (j->index == NULL) {
1002 return;
1003 }
1004 for (i = 0; i < j->header.index_size; i++) {
1005 if (POS_VALID(j->index[i]) &&
1006 DNS_SERIAL_GE(serial, j->index[i].serial) &&
1007 DNS_SERIAL_GT(j->index[i].serial, best_guess->serial))
1008 {
1009 *best_guess = j->index[i];
1010 }
1011 }
1012 }
1013
1014 /*
1015 * Add a new index entry. If there is no room, make room by removing
1016 * the odd-numbered entries and compacting the others into the first
1017 * half of the index. This decimates old index entries exponentially
1018 * over time, so that the index always contains a much larger fraction
1019 * of recent serial numbers than of old ones. This is deliberate -
1020 * most index searches are for outgoing IXFR, and IXFR tends to request
1021 * recent versions more often than old ones.
1022 */
1023 static void
1024 index_add(dns_journal_t *j, journal_pos_t *pos) {
1025 unsigned int i;
1026
1027 if (j->index == NULL) {
1028 return;
1029 }
1030
1031 /*
1032 * Search for a vacant position.
1033 */
1034 for (i = 0; i < j->header.index_size; i++) {
1035 if (!POS_VALID(j->index[i])) {
1036 break;
1037 }
1038 }
1039 if (i == j->header.index_size) {
1040 unsigned int k = 0;
1041 /*
1042 * Found no vacant position. Make some room.
1043 */
1044 for (i = 0; i < j->header.index_size; i += 2) {
1045 j->index[k++] = j->index[i];
1046 }
1047 i = k; /* 'i' identifies the first vacant position. */
1048 while (k < j->header.index_size) {
1049 POS_INVALIDATE(j->index[k]);
1050 k++;
1051 }
1052 }
1053 INSIST(i < j->header.index_size);
1054 INSIST(!POS_VALID(j->index[i]));
1055
1056 /*
1057 * Store the new index entry.
1058 */
1059 j->index[i] = *pos;
1060 }
1061
1062 /*
1063 * Invalidate any existing index entries that could become
1064 * ambiguous when a new transaction with number 'serial' is added.
1065 */
1066 static void
1067 index_invalidate(dns_journal_t *j, uint32_t serial) {
1068 unsigned int i;
1069 if (j->index == NULL) {
1070 return;
1071 }
1072 for (i = 0; i < j->header.index_size; i++) {
1073 if (!DNS_SERIAL_GT(serial, j->index[i].serial)) {
1074 POS_INVALIDATE(j->index[i]);
1075 }
1076 }
1077 }
1078
1079 /*
1080 * Try to find a transaction with initial serial number 'serial'
1081 * in the journal 'j'.
1082 *
1083 * If found, store its position at '*pos' and return ISC_R_SUCCESS.
1084 *
1085 * If 'serial' is current (= the ending serial number of the
1086 * last transaction in the journal), set '*pos' to
1087 * the position immediately following the last transaction and
1088 * return ISC_R_SUCCESS.
1089 *
1090 * If 'serial' is within the range of addressable serial numbers
1091 * covered by the journal but that particular serial number is missing
1092 * (from the journal, not just from the index), return ISC_R_NOTFOUND.
1093 *
1094 * If 'serial' is outside the range of addressable serial numbers
1095 * covered by the journal, return ISC_R_RANGE.
1096 *
1097 */
1098 static isc_result_t
1099 journal_find(dns_journal_t *j, uint32_t serial, journal_pos_t *pos) {
1100 isc_result_t result;
1101 journal_pos_t current_pos;
1102
1103 REQUIRE(DNS_JOURNAL_VALID(j));
1104
1105 if (DNS_SERIAL_GT(j->header.begin.serial, serial)) {
1106 return ISC_R_RANGE;
1107 }
1108 if (DNS_SERIAL_GT(serial, j->header.end.serial)) {
1109 return ISC_R_RANGE;
1110 }
1111 if (serial == j->header.end.serial) {
1112 *pos = j->header.end;
1113 return ISC_R_SUCCESS;
1114 }
1115
1116 current_pos = j->header.begin;
1117 index_find(j, serial, ¤t_pos);
1118
1119 while (current_pos.serial != serial) {
1120 if (DNS_SERIAL_GT(current_pos.serial, serial)) {
1121 return ISC_R_NOTFOUND;
1122 }
1123 result = journal_next(j, ¤t_pos);
1124 if (result != ISC_R_SUCCESS) {
1125 return result;
1126 }
1127 }
1128 *pos = current_pos;
1129 return ISC_R_SUCCESS;
1130 }
1131
1132 isc_result_t
1133 dns_journal_begin_transaction(dns_journal_t *j) {
1134 uint32_t offset;
1135 isc_result_t result;
1136
1137 REQUIRE(DNS_JOURNAL_VALID(j));
1138 REQUIRE(j->state == JOURNAL_STATE_WRITE ||
1139 j->state == JOURNAL_STATE_INLINE);
1140
1141 /*
1142 * Find the file offset where the new transaction should
1143 * be written, and seek there.
1144 */
1145 if (JOURNAL_EMPTY(&j->header)) {
1146 offset = sizeof(journal_rawheader_t) +
1147 ISC_CHECKED_MUL(j->header.index_size,
1148 sizeof(journal_rawpos_t));
1149 } else {
1150 offset = j->header.end.offset;
1151 }
1152 j->x.pos[0].offset = offset;
1153 j->x.pos[1].offset = offset; /* Initial value, will be incremented. */
1154 j->x.n_soa = 0;
1155
1156 CHECK(journal_seek(j, offset));
1157
1158 /*
1159 * Write a dummy transaction header of all zeroes to reserve
1160 * space. It will be filled in when the transaction is
1161 * finished.
1162 */
1163 CHECK(journal_write_xhdr(j, 0, 0, 0, 0));
1164 j->x.pos[1].offset = j->offset;
1165
1166 j->state = JOURNAL_STATE_TRANSACTION;
1167 result = ISC_R_SUCCESS;
1168 cleanup:
1169 return result;
1170 }
1171
1172 isc_result_t
1173 dns_journal_writediff(dns_journal_t *j, dns_diff_t *diff) {
1174 dns_difftuple_t *t;
1175 isc_buffer_t buffer;
1176 void *mem = NULL;
1177 uint64_t size = 0;
1178 uint32_t rrcount = 0;
1179 isc_result_t result;
1180 isc_region_t used;
1181
1182 REQUIRE(DNS_DIFF_VALID(diff));
1183 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION);
1184
1185 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal");
1186 (void)dns_diff_print(diff, NULL);
1187
1188 /*
1189 * Pass 1: determine the buffer size needed, and
1190 * keep track of SOA serial numbers.
1191 */
1192 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
1193 t = ISC_LIST_NEXT(t, link))
1194 {
1195 if (t->rdata.type == dns_rdatatype_soa) {
1196 if (j->x.n_soa < 2) {
1197 j->x.pos[j->x.n_soa].serial =
1198 dns_soa_getserial(&t->rdata);
1199 }
1200 j->x.n_soa++;
1201 }
1202 size += sizeof(journal_rawrrhdr_t);
1203 size += t->name.length; /* XXX should have access macro? */
1204 size += 10;
1205 size += t->rdata.length;
1206 }
1207
1208 if (size >= DNS_JOURNAL_SIZE_MAX) {
1209 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1210 "dns_journal_writediff: %s: journal entry "
1211 "too big to be stored: %" PRIu64 " bytes",
1212 j->filename, size);
1213 return ISC_R_NOSPACE;
1214 }
1215
1216 mem = isc_mem_get(j->mctx, size);
1217
1218 isc_buffer_init(&buffer, mem, size);
1219
1220 /*
1221 * Pass 2. Write RRs to buffer.
1222 */
1223 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
1224 t = ISC_LIST_NEXT(t, link))
1225 {
1226 /*
1227 * Write the RR header.
1228 */
1229 isc_buffer_putuint32(&buffer,
1230 t->name.length + 10 + t->rdata.length);
1231 /*
1232 * Write the owner name, RR header, and RR data.
1233 */
1234 isc_buffer_putmem(&buffer, t->name.ndata, t->name.length);
1235 isc_buffer_putuint16(&buffer, t->rdata.type);
1236 isc_buffer_putuint16(&buffer, t->rdata.rdclass);
1237 isc_buffer_putuint32(&buffer, t->ttl);
1238 isc_buffer_putuint16(&buffer, (uint16_t)t->rdata.length);
1239 INSIST(isc_buffer_availablelength(&buffer) >= t->rdata.length);
1240 isc_buffer_putmem(&buffer, t->rdata.data, t->rdata.length);
1241
1242 rrcount++;
1243 }
1244
1245 isc_buffer_usedregion(&buffer, &used);
1246 INSIST(used.length == size);
1247
1248 j->x.pos[1].offset += used.length;
1249 j->x.n_rr = rrcount;
1250
1251 /*
1252 * Write the buffer contents to the journal file.
1253 */
1254 CHECK(journal_write(j, used.base, used.length));
1255
1256 result = ISC_R_SUCCESS;
1257
1258 cleanup:
1259 if (mem != NULL) {
1260 isc_mem_put(j->mctx, mem, size);
1261 }
1262 return result;
1263 }
1264
1265 isc_result_t
1266 dns_journal_commit(dns_journal_t *j) {
1267 isc_result_t result;
1268 journal_rawheader_t rawheader;
1269 uint64_t total;
1270
1271 REQUIRE(DNS_JOURNAL_VALID(j));
1272 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION ||
1273 j->state == JOURNAL_STATE_INLINE);
1274
1275 /*
1276 * Just write out a updated header.
1277 */
1278 if (j->state == JOURNAL_STATE_INLINE) {
1279 CHECK(journal_fsync(j));
1280 journal_header_encode(&j->header, &rawheader);
1281 CHECK(journal_seek(j, 0));
1282 CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
1283 CHECK(journal_fsync(j));
1284 j->state = JOURNAL_STATE_WRITE;
1285 return ISC_R_SUCCESS;
1286 }
1287
1288 /*
1289 * Perform some basic consistency checks.
1290 */
1291 if (j->x.n_soa != 2) {
1292 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1293 "%s: malformed transaction: %d SOAs", j->filename,
1294 j->x.n_soa);
1295 return ISC_R_UNEXPECTED;
1296 }
1297 if (!DNS_SERIAL_GT(j->x.pos[1].serial, j->x.pos[0].serial)) {
1298 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1299 "%s: malformed transaction: serial number "
1300 "did not increase",
1301 j->filename);
1302 return ISC_R_UNEXPECTED;
1303 }
1304 if (!JOURNAL_EMPTY(&j->header)) {
1305 if (j->x.pos[0].serial != j->header.end.serial) {
1306 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1307 "malformed transaction: "
1308 "%s last serial %u != "
1309 "transaction first serial %u",
1310 j->filename, j->header.end.serial,
1311 j->x.pos[0].serial);
1312 return ISC_R_UNEXPECTED;
1313 }
1314 }
1315
1316 /*
1317 * We currently don't support huge journal entries.
1318 */
1319 total = j->x.pos[1].offset - j->x.pos[0].offset;
1320 if (total >= DNS_JOURNAL_SIZE_MAX) {
1321 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1322 "transaction too big to be stored in journal: "
1323 "%" PRIu64 "b (max is %" PRIu64 "b)",
1324 total, (uint64_t)DNS_JOURNAL_SIZE_MAX);
1325 return ISC_R_UNEXPECTED;
1326 }
1327
1328 /*
1329 * Some old journal entries may become non-addressable
1330 * when we increment the current serial number. Purge them
1331 * by stepping header.begin forward to the first addressable
1332 * transaction. Also purge them from the index.
1333 */
1334 if (!JOURNAL_EMPTY(&j->header)) {
1335 while (!DNS_SERIAL_GT(j->x.pos[1].serial,
1336 j->header.begin.serial))
1337 {
1338 CHECK(journal_next(j, &j->header.begin));
1339 }
1340 index_invalidate(j, j->x.pos[1].serial);
1341 }
1342 #ifdef notyet
1343 if (DNS_SERIAL_GT(last_dumped_serial, j->x.pos[1].serial)) {
1344 force_dump(...);
1345 }
1346 #endif /* ifdef notyet */
1347
1348 /*
1349 * Commit the transaction data to stable storage.
1350 */
1351 CHECK(journal_fsync(j));
1352
1353 if (j->state == JOURNAL_STATE_TRANSACTION) {
1354 off_t offset;
1355 offset = (j->x.pos[1].offset - j->x.pos[0].offset) -
1356 (j->header_ver1 ? sizeof(journal_rawxhdr_ver1_t)
1357 : sizeof(journal_rawxhdr_t));
1358 /*
1359 * Update the transaction header.
1360 */
1361 CHECK(journal_seek(j, j->x.pos[0].offset));
1362 CHECK(journal_write_xhdr(j, offset, j->x.n_rr,
1363 j->x.pos[0].serial,
1364 j->x.pos[1].serial));
1365 }
1366
1367 /*
1368 * Update the journal header.
1369 */
1370 if (JOURNAL_EMPTY(&j->header)) {
1371 j->header.begin = j->x.pos[0];
1372 }
1373 j->header.end = j->x.pos[1];
1374 journal_header_encode(&j->header, &rawheader);
1375 CHECK(journal_seek(j, 0));
1376 CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
1377
1378 /*
1379 * Update the index.
1380 */
1381 index_add(j, &j->x.pos[0]);
1382
1383 /*
1384 * Convert the index into on-disk format and write
1385 * it to disk.
1386 */
1387 CHECK(index_to_disk(j));
1388
1389 /*
1390 * Commit the header to stable storage.
1391 */
1392 CHECK(journal_fsync(j));
1393
1394 /*
1395 * We no longer have a transaction open.
1396 */
1397 j->state = JOURNAL_STATE_WRITE;
1398
1399 result = ISC_R_SUCCESS;
1400
1401 cleanup:
1402 return result;
1403 }
1404
1405 isc_result_t
1406 dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) {
1407 isc_result_t result;
1408
1409 CHECK(dns_diff_sort(diff, ixfr_order));
1410 CHECK(dns_journal_begin_transaction(j));
1411 CHECK(dns_journal_writediff(j, diff));
1412 CHECK(dns_journal_commit(j));
1413 result = ISC_R_SUCCESS;
1414 cleanup:
1415 return result;
1416 }
1417
1418 void
1419 dns_journal_destroy(dns_journal_t **journalp) {
1420 dns_journal_t *j = NULL;
1421
1422 REQUIRE(journalp != NULL);
1423 REQUIRE(DNS_JOURNAL_VALID(*journalp));
1424
1425 j = *journalp;
1426 *journalp = NULL;
1427
1428 j->it.result = ISC_R_FAILURE;
1429 dns_name_invalidate(&j->it.name);
1430 if (j->rawindex != NULL) {
1431 isc_mem_cput(j->mctx, j->rawindex, j->header.index_size,
1432 sizeof(journal_rawpos_t));
1433 }
1434 if (j->index != NULL) {
1435 isc_mem_cput(j->mctx, j->index, j->header.index_size,
1436 sizeof(journal_pos_t));
1437 }
1438 if (j->it.target.base != NULL) {
1439 isc_mem_put(j->mctx, j->it.target.base, j->it.target.length);
1440 }
1441 if (j->it.source.base != NULL) {
1442 isc_mem_put(j->mctx, j->it.source.base, j->it.source.length);
1443 }
1444 if (j->filename != NULL) {
1445 isc_mem_free(j->mctx, j->filename);
1446 }
1447 if (j->fp != NULL) {
1448 (void)isc_stdio_close(j->fp);
1449 }
1450 j->magic = 0;
1451 isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
1452 }
1453
1454 /*
1455 * Roll the open journal 'j' into the database 'db'.
1456 * A new database version will be created.
1457 */
1458
1459 /* XXX Share code with incoming IXFR? */
1460
1461 isc_result_t
1462 dns_journal_rollforward(dns_journal_t *j, dns_db_t *db, unsigned int options) {
1463 isc_buffer_t source; /* Transaction data from disk */
1464 isc_buffer_t target; /* Ditto after _fromwire check */
1465 uint32_t db_serial; /* Database SOA serial */
1466 uint32_t end_serial; /* Last journal SOA serial */
1467 isc_result_t result;
1468 dns_dbversion_t *ver = NULL;
1469 journal_pos_t pos;
1470 dns_diff_t diff;
1471 unsigned int n_soa = 0;
1472 unsigned int n_put = 0;
1473 dns_diffop_t op;
1474
1475 REQUIRE(DNS_JOURNAL_VALID(j));
1476 REQUIRE(DNS_DB_VALID(db));
1477
1478 dns_diff_init(j->mctx, &diff);
1479
1480 /*
1481 * Set up empty initial buffers for unchecked and checked
1482 * wire format transaction data. They will be reallocated
1483 * later.
1484 */
1485 isc_buffer_init(&source, NULL, 0);
1486 isc_buffer_init(&target, NULL, 0);
1487
1488 /*
1489 * Create the new database version.
1490 */
1491 CHECK(dns_db_newversion(db, &ver));
1492
1493 /*
1494 * Get the current database SOA serial number.
1495 */
1496 CHECK(dns_db_getsoaserial(db, ver, &db_serial));
1497
1498 /*
1499 * Locate a journal entry for the current database serial.
1500 */
1501 CHECK(journal_find(j, db_serial, &pos));
1502
1503 end_serial = dns_journal_last_serial(j);
1504
1505 /*
1506 * If we're reading a version 1 file, scan all the transactions
1507 * to see if the journal needs rewriting: if any outdated
1508 * transaction headers are found, j->recovered will be set.
1509 */
1510 if (j->header_ver1) {
1511 uint32_t start_serial = dns_journal_first_serial(j);
1512
1513 CHECK(dns_journal_iter_init(j, start_serial, db_serial, NULL));
1514 for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS;
1515 result = dns_journal_next_rr(j))
1516 {
1517 continue;
1518 }
1519 }
1520
1521 if (db_serial == end_serial) {
1522 CHECK(DNS_R_UPTODATE);
1523 }
1524
1525 CHECK(dns_journal_iter_init(j, db_serial, end_serial, NULL));
1526 for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS;
1527 result = dns_journal_next_rr(j))
1528 {
1529 dns_name_t *name = NULL;
1530 dns_rdata_t *rdata = NULL;
1531 dns_difftuple_t *tuple = NULL;
1532 uint32_t ttl;
1533
1534 dns_journal_current_rr(j, &name, &ttl, &rdata);
1535
1536 if (rdata->type == dns_rdatatype_soa) {
1537 n_soa++;
1538 if (n_soa == 2) {
1539 db_serial = j->it.current_serial;
1540 }
1541 }
1542
1543 if (n_soa == 3) {
1544 n_soa = 1;
1545 }
1546 if (n_soa == 0) {
1547 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1548 "%s: journal file corrupt: missing "
1549 "initial SOA",
1550 j->filename);
1551 CHECK(ISC_R_UNEXPECTED);
1552 }
1553 if ((options & DNS_JOURNALOPT_RESIGN) != 0) {
1554 op = (n_soa == 1) ? DNS_DIFFOP_DELRESIGN
1555 : DNS_DIFFOP_ADDRESIGN;
1556 } else {
1557 op = (n_soa == 1) ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD;
1558 }
1559
1560 CHECK(dns_difftuple_create(diff.mctx, op, name, ttl, rdata,
1561 &tuple));
1562 dns_diff_append(&diff, &tuple);
1563
1564 if (++n_put > 100) {
1565 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1566 "%s: applying diff to database (%u)",
1567 j->filename, db_serial);
1568 (void)dns_diff_print(&diff, NULL);
1569 CHECK(dns_diff_apply(&diff, db, ver));
1570 dns_diff_clear(&diff);
1571 n_put = 0;
1572 }
1573 }
1574 if (result == ISC_R_NOMORE) {
1575 result = ISC_R_SUCCESS;
1576 }
1577 CHECK(result);
1578
1579 if (n_put != 0) {
1580 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1581 "%s: applying final diff to database (%u)",
1582 j->filename, db_serial);
1583 (void)dns_diff_print(&diff, NULL);
1584 CHECK(dns_diff_apply(&diff, db, ver));
1585 dns_diff_clear(&diff);
1586 }
1587
1588 cleanup:
1589 if (ver != NULL) {
1590 dns_db_closeversion(db, &ver,
1591 result == ISC_R_SUCCESS ? true : false);
1592 }
1593
1594 if (source.base != NULL) {
1595 isc_mem_put(j->mctx, source.base, source.length);
1596 }
1597 if (target.base != NULL) {
1598 isc_mem_put(j->mctx, target.base, target.length);
1599 }
1600
1601 dns_diff_clear(&diff);
1602
1603 INSIST(ver == NULL);
1604
1605 return result;
1606 }
1607
1608 isc_result_t
1609 dns_journal_print(isc_mem_t *mctx, uint32_t flags, const char *filename,
1610 FILE *file) {
1611 dns_journal_t *j = NULL;
1612 isc_buffer_t source; /* Transaction data from disk */
1613 isc_buffer_t target; /* Ditto after _fromwire check */
1614 uint32_t start_serial; /* Database SOA serial */
1615 uint32_t end_serial; /* Last journal SOA serial */
1616 isc_result_t result;
1617 dns_diff_t diff;
1618 unsigned int n_soa = 0;
1619 unsigned int n_put = 0;
1620 bool printxhdr = ((flags & DNS_JOURNAL_PRINTXHDR) != 0);
1621
1622 REQUIRE(filename != NULL);
1623
1624 result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j);
1625 if (result == ISC_R_NOTFOUND) {
1626 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file");
1627 return DNS_R_NOJOURNAL;
1628 } else if (result != ISC_R_SUCCESS) {
1629 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1630 "journal open failure: %s: %s",
1631 isc_result_totext(result), filename);
1632 return result;
1633 }
1634
1635 if (printxhdr) {
1636 fprintf(file, "Journal format = %sHeader version = %d\n",
1637 j->header.format + 1, j->header_ver1 ? 1 : 2);
1638 fprintf(file, "Start serial = %u\n", j->header.begin.serial);
1639 fprintf(file, "End serial = %u\n", j->header.end.serial);
1640 fprintf(file, "Index (size = %u):\n", j->header.index_size);
1641 for (uint32_t i = 0; i < j->header.index_size; i++) {
1642 if (j->index[i].offset == 0) {
1643 fputc('\n', file);
1644 break;
1645 }
1646 fprintf(file, "%lld", (long long)j->index[i].offset);
1647 fputc((i + 1) % 8 == 0 ? '\n' : ' ', file);
1648 }
1649 }
1650 if (j->header.serialset) {
1651 fprintf(file, "Source serial = %u\n", j->header.sourceserial);
1652 }
1653 dns_diff_init(j->mctx, &diff);
1654
1655 /*
1656 * Set up empty initial buffers for unchecked and checked
1657 * wire format transaction data. They will be reallocated
1658 * later.
1659 */
1660 isc_buffer_init(&source, NULL, 0);
1661 isc_buffer_init(&target, NULL, 0);
1662
1663 start_serial = dns_journal_first_serial(j);
1664 end_serial = dns_journal_last_serial(j);
1665
1666 CHECK(dns_journal_iter_init(j, start_serial, end_serial, NULL));
1667
1668 for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS;
1669 result = dns_journal_next_rr(j))
1670 {
1671 dns_name_t *name = NULL;
1672 dns_rdata_t *rdata = NULL;
1673 dns_difftuple_t *tuple = NULL;
1674 static uint32_t i = 0;
1675 bool print = false;
1676 uint32_t ttl;
1677
1678 dns_journal_current_rr(j, &name, &ttl, &rdata);
1679
1680 if (rdata->type == dns_rdatatype_soa) {
1681 n_soa++;
1682 if (n_soa == 3) {
1683 n_soa = 1;
1684 }
1685 if (n_soa == 1) {
1686 print = printxhdr;
1687 }
1688 }
1689 if (n_soa == 0) {
1690 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1691 "%s: journal file corrupt: missing "
1692 "initial SOA",
1693 j->filename);
1694 CHECK(ISC_R_UNEXPECTED);
1695 }
1696
1697 if (print) {
1698 fprintf(file,
1699 "Transaction: version %d offset %lld size %u "
1700 "rrcount %u start %u end %u\n",
1701 j->xhdr_version, (long long)j->it.cpos.offset,
1702 j->curxhdr.size, j->curxhdr.count,
1703 j->curxhdr.serial0, j->curxhdr.serial1);
1704 if (j->it.cpos.offset > j->index[i].offset) {
1705 fprintf(file,
1706 "ERROR: Offset mismatch, "
1707 "expected %lld\n",
1708 (long long)j->index[i].offset);
1709 } else if (j->it.cpos.offset == j->index[i].offset) {
1710 i++;
1711 }
1712 }
1713 CHECK(dns_difftuple_create(
1714 diff.mctx, n_soa == 1 ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD,
1715 name, ttl, rdata, &tuple));
1716 dns_diff_append(&diff, &tuple);
1717
1718 if (++n_put > 100 || printxhdr) {
1719 result = dns_diff_print(&diff, file);
1720 dns_diff_clear(&diff);
1721 n_put = 0;
1722 if (result != ISC_R_SUCCESS) {
1723 break;
1724 }
1725 }
1726 }
1727 if (result == ISC_R_NOMORE) {
1728 result = ISC_R_SUCCESS;
1729 }
1730 CHECK(result);
1731
1732 if (n_put != 0) {
1733 result = dns_diff_print(&diff, file);
1734 dns_diff_clear(&diff);
1735 }
1736 goto done;
1737
1738 cleanup:
1739 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1740 "%s: cannot print: journal file corrupt", j->filename);
1741
1742 done:
1743 if (source.base != NULL) {
1744 isc_mem_put(j->mctx, source.base, source.length);
1745 }
1746 if (target.base != NULL) {
1747 isc_mem_put(j->mctx, target.base, target.length);
1748 }
1749
1750 dns_diff_clear(&diff);
1751 dns_journal_destroy(&j);
1752
1753 return result;
1754 }
1755
1756 /**************************************************************************/
1757 /*
1758 * Miscellaneous accessors.
1759 */
1760 bool
1761 dns_journal_empty(dns_journal_t *j) {
1762 return JOURNAL_EMPTY(&j->header);
1763 }
1764
1765 bool
1766 dns_journal_recovered(dns_journal_t *j) {
1767 return j->recovered;
1768 }
1769
1770 uint32_t
1771 dns_journal_first_serial(dns_journal_t *j) {
1772 return j->header.begin.serial;
1773 }
1774
1775 uint32_t
1776 dns_journal_last_serial(dns_journal_t *j) {
1777 return j->header.end.serial;
1778 }
1779
1780 void
1781 dns_journal_set_sourceserial(dns_journal_t *j, uint32_t sourceserial) {
1782 REQUIRE(j->state == JOURNAL_STATE_WRITE ||
1783 j->state == JOURNAL_STATE_INLINE ||
1784 j->state == JOURNAL_STATE_TRANSACTION);
1785
1786 j->header.sourceserial = sourceserial;
1787 j->header.serialset = true;
1788 if (j->state == JOURNAL_STATE_WRITE) {
1789 j->state = JOURNAL_STATE_INLINE;
1790 }
1791 }
1792
1793 bool
1794 dns_journal_get_sourceserial(dns_journal_t *j, uint32_t *sourceserial) {
1795 REQUIRE(sourceserial != NULL);
1796
1797 if (!j->header.serialset) {
1798 return false;
1799 }
1800 *sourceserial = j->header.sourceserial;
1801 return true;
1802 }
1803
1804 /**************************************************************************/
1805 /*
1806 * Iteration support.
1807 *
1808 * When serving an outgoing IXFR, we transmit a part the journal starting
1809 * at the serial number in the IXFR request and ending at the serial
1810 * number that is current when the IXFR request arrives. The ending
1811 * serial number is not necessarily at the end of the journal:
1812 * the journal may grow while the IXFR is in progress, but we stop
1813 * when we reach the serial number that was current when the IXFR started.
1814 */
1815
1816 static isc_result_t
1817 read_one_rr(dns_journal_t *j);
1818
1819 /*
1820 * Make sure the buffer 'b' is has at least 'size' bytes
1821 * allocated, and clear it.
1822 *
1823 * Requires:
1824 * Either b->base is NULL, or it points to b->length bytes of memory
1825 * previously allocated by isc_mem_get().
1826 */
1827
1828 static isc_result_t
1829 size_buffer(isc_mem_t *mctx, isc_buffer_t *b, unsigned int size) {
1830 if (b->length < size) {
1831 void *mem = isc_mem_get(mctx, size);
1832 if (mem == NULL) {
1833 return ISC_R_NOMEMORY;
1834 }
1835 if (b->base != NULL) {
1836 isc_mem_put(mctx, b->base, b->length);
1837 }
1838 b->base = mem;
1839 b->length = size;
1840 }
1841 isc_buffer_clear(b);
1842 return ISC_R_SUCCESS;
1843 }
1844
1845 isc_result_t
1846 dns_journal_iter_init(dns_journal_t *j, uint32_t begin_serial,
1847 uint32_t end_serial, size_t *xfrsizep) {
1848 isc_result_t result;
1849
1850 CHECK(journal_find(j, begin_serial, &j->it.bpos));
1851 INSIST(j->it.bpos.serial == begin_serial);
1852
1853 CHECK(journal_find(j, end_serial, &j->it.epos));
1854 INSIST(j->it.epos.serial == end_serial);
1855
1856 if (xfrsizep != NULL) {
1857 journal_pos_t pos = j->it.bpos;
1858 journal_xhdr_t xhdr;
1859 uint64_t size = 0;
1860 uint32_t count = 0;
1861
1862 /*
1863 * We already know the beginning and ending serial
1864 * numbers are in the journal. Scan through them,
1865 * adding up sizes and RR counts so we can calculate
1866 * the IXFR size.
1867 */
1868 do {
1869 CHECK(journal_seek(j, pos.offset));
1870 CHECK(journal_read_xhdr(j, &xhdr));
1871
1872 if (j->header_ver1) {
1873 CHECK(maybe_fixup_xhdr(j, &xhdr, pos.serial,
1874 pos.offset));
1875 }
1876
1877 /*
1878 * Check that xhdr is consistent.
1879 */
1880 if (xhdr.serial0 != pos.serial ||
1881 isc_serial_le(xhdr.serial1, xhdr.serial0))
1882 {
1883 CHECK(ISC_R_UNEXPECTED);
1884 }
1885
1886 size += xhdr.size;
1887 count += xhdr.count;
1888
1889 result = journal_next(j, &pos);
1890 if (result == ISC_R_NOMORE) {
1891 result = ISC_R_SUCCESS;
1892 }
1893 CHECK(result);
1894 } while (pos.serial != end_serial);
1895
1896 /*
1897 * For each RR, subtract the length of the RR header,
1898 * as this would not be present in IXFR messages.
1899 * (We don't need to worry about the transaction header
1900 * because that was already excluded from xdr.size.)
1901 */
1902 *xfrsizep = size - (ISC_CHECKED_MUL(
1903 count, sizeof(journal_rawrrhdr_t)));
1904 }
1905
1906 result = ISC_R_SUCCESS;
1907 cleanup:
1908 j->it.result = result;
1909 return j->it.result;
1910 }
1911
1912 isc_result_t
1913 dns_journal_first_rr(dns_journal_t *j) {
1914 isc_result_t result;
1915
1916 /*
1917 * Seek to the beginning of the first transaction we are
1918 * interested in.
1919 */
1920 CHECK(journal_seek(j, j->it.bpos.offset));
1921 j->it.current_serial = j->it.bpos.serial;
1922
1923 j->it.xsize = 0; /* We have no transaction data yet... */
1924 j->it.xpos = 0; /* ...and haven't used any of it. */
1925
1926 return read_one_rr(j);
1927
1928 cleanup:
1929 return result;
1930 }
1931
1932 static isc_result_t
1933 read_one_rr(dns_journal_t *j) {
1934 isc_result_t result;
1935 dns_rdatatype_t rdtype;
1936 dns_rdataclass_t rdclass;
1937 unsigned int rdlen;
1938 uint32_t ttl;
1939 journal_xhdr_t xhdr;
1940 journal_rrhdr_t rrhdr;
1941 dns_journal_t save = *j;
1942
1943 if (j->offset > j->it.epos.offset) {
1944 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1945 "%s: journal corrupt: possible integer overflow",
1946 j->filename);
1947 return ISC_R_UNEXPECTED;
1948 }
1949 if (j->offset == j->it.epos.offset) {
1950 return ISC_R_NOMORE;
1951 }
1952 if (j->it.xpos == j->it.xsize) {
1953 /*
1954 * We are at a transaction boundary.
1955 * Read another transaction header.
1956 */
1957 CHECK(journal_read_xhdr(j, &xhdr));
1958 if (xhdr.size == 0) {
1959 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1960 "%s: journal corrupt: empty transaction",
1961 j->filename);
1962 CHECK(ISC_R_UNEXPECTED);
1963 }
1964
1965 if (j->header_ver1) {
1966 CHECK(maybe_fixup_xhdr(j, &xhdr, j->it.current_serial,
1967 save.offset));
1968 }
1969
1970 if (xhdr.serial0 != j->it.current_serial ||
1971 isc_serial_le(xhdr.serial1, xhdr.serial0))
1972 {
1973 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1974 "%s: journal file corrupt: "
1975 "expected serial %u, got %u",
1976 j->filename, j->it.current_serial,
1977 xhdr.serial0);
1978 CHECK(ISC_R_UNEXPECTED);
1979 }
1980
1981 j->it.xsize = xhdr.size;
1982 j->it.xpos = 0;
1983 }
1984 /*
1985 * Read an RR.
1986 */
1987 CHECK(journal_read_rrhdr(j, &rrhdr));
1988 /*
1989 * Perform a sanity check on the journal RR size.
1990 * The smallest possible RR has a 1-byte owner name
1991 * and a 10-byte header. The largest possible
1992 * RR has 65535 bytes of data, a header, and a maximum-
1993 * size owner name, well below 70 k total.
1994 */
1995 if (rrhdr.size < 1 + 10 || rrhdr.size > 70000) {
1996 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1997 "%s: journal corrupt: impossible RR size "
1998 "(%d bytes)",
1999 j->filename, rrhdr.size);
2000 CHECK(ISC_R_UNEXPECTED);
2001 }
2002
2003 CHECK(size_buffer(j->mctx, &j->it.source, rrhdr.size));
2004 CHECK(journal_read(j, j->it.source.base, rrhdr.size));
2005 isc_buffer_add(&j->it.source, rrhdr.size);
2006
2007 /*
2008 * The target buffer is made the same size
2009 * as the source buffer, with the assumption that when
2010 * no compression in present, the output of dns_*_fromwire()
2011 * is no larger than the input.
2012 */
2013 CHECK(size_buffer(j->mctx, &j->it.target, rrhdr.size));
2014
2015 /*
2016 * Parse the owner name. We don't know where it
2017 * ends yet, so we make the entire "remaining"
2018 * part of the buffer "active".
2019 */
2020 isc_buffer_setactive(&j->it.source,
2021 j->it.source.used - j->it.source.current);
2022 CHECK(dns_name_fromwire(&j->it.name, &j->it.source, j->it.dctx,
2023 &j->it.target));
2024
2025 /*
2026 * Check that the RR header is there, and parse it.
2027 */
2028 if (isc_buffer_remaininglength(&j->it.source) < 10) {
2029 CHECK(DNS_R_FORMERR);
2030 }
2031
2032 rdtype = isc_buffer_getuint16(&j->it.source);
2033 rdclass = isc_buffer_getuint16(&j->it.source);
2034 ttl = isc_buffer_getuint32(&j->it.source);
2035 rdlen = isc_buffer_getuint16(&j->it.source);
2036
2037 if (rdlen > DNS_RDATA_MAXLENGTH) {
2038 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
2039 "%s: journal corrupt: impossible rdlen "
2040 "(%u bytes)",
2041 j->filename, rdlen);
2042 CHECK(ISC_R_FAILURE);
2043 }
2044
2045 /*
2046 * Parse the rdata.
2047 */
2048 if (isc_buffer_remaininglength(&j->it.source) != rdlen) {
2049 CHECK(DNS_R_FORMERR);
2050 }
2051 isc_buffer_setactive(&j->it.source, rdlen);
2052 dns_rdata_reset(&j->it.rdata);
2053 CHECK(dns_rdata_fromwire(&j->it.rdata, rdclass, rdtype, &j->it.source,
2054 j->it.dctx, &j->it.target));
2055 j->it.ttl = ttl;
2056
2057 j->it.xpos += sizeof(journal_rawrrhdr_t) + rrhdr.size;
2058 if (rdtype == dns_rdatatype_soa) {
2059 /* XXX could do additional consistency checks here */
2060 j->it.current_serial = dns_soa_getserial(&j->it.rdata);
2061 }
2062
2063 result = ISC_R_SUCCESS;
2064
2065 cleanup:
2066 j->it.result = result;
2067 return result;
2068 }
2069
2070 isc_result_t
2071 dns_journal_next_rr(dns_journal_t *j) {
2072 j->it.result = read_one_rr(j);
2073 return j->it.result;
2074 }
2075
2076 void
2077 dns_journal_current_rr(dns_journal_t *j, dns_name_t **name, uint32_t *ttl,
2078 dns_rdata_t **rdata) {
2079 REQUIRE(j->it.result == ISC_R_SUCCESS);
2080 *name = &j->it.name;
2081 *ttl = j->it.ttl;
2082 *rdata = &j->it.rdata;
2083 }
2084
2085 /**************************************************************************/
2086 /*
2087 * Generating diffs from databases
2088 */
2089
2090 /*
2091 * Construct a diff containing all the RRs at the current name of the
2092 * database iterator 'dbit' in database 'db', version 'ver'.
2093 * Set '*name' to the current name, and append the diff to 'diff'.
2094 * All new tuples will have the operation 'op'.
2095 *
2096 * Requires: 'name' must have buffer large enough to hold the name.
2097 * Typically, a dns_fixedname_t would be used.
2098 */
2099 static isc_result_t
2100 get_name_diff(dns_db_t *db, dns_dbversion_t *ver, isc_stdtime_t now,
2101 dns_dbiterator_t *dbit, dns_name_t *name, dns_diffop_t op,
2102 dns_diff_t *diff) {
2103 isc_result_t result;
2104 dns_dbnode_t *node = NULL;
2105 dns_rdatasetiter_t *rdsiter = NULL;
2106 dns_difftuple_t *tuple = NULL;
2107
2108 result = dns_dbiterator_current(dbit, &node, name);
2109 if (result != ISC_R_SUCCESS) {
2110 return result;
2111 }
2112
2113 result = dns_db_allrdatasets(db, node, ver, 0, now, &rdsiter);
2114 if (result != ISC_R_SUCCESS) {
2115 goto cleanup_node;
2116 }
2117
2118 for (result = dns_rdatasetiter_first(rdsiter); result == ISC_R_SUCCESS;
2119 result = dns_rdatasetiter_next(rdsiter))
2120 {
2121 dns_rdataset_t rdataset;
2122
2123 dns_rdataset_init(&rdataset);
2124 dns_rdatasetiter_current(rdsiter, &rdataset);
2125
2126 for (result = dns_rdataset_first(&rdataset);
2127 result == ISC_R_SUCCESS;
2128 result = dns_rdataset_next(&rdataset))
2129 {
2130 dns_rdata_t rdata = DNS_RDATA_INIT;
2131 dns_rdataset_current(&rdataset, &rdata);
2132 result = dns_difftuple_create(diff->mctx, op, name,
2133 rdataset.ttl, &rdata,
2134 &tuple);
2135 if (result != ISC_R_SUCCESS) {
2136 dns_rdataset_disassociate(&rdataset);
2137 goto cleanup_iterator;
2138 }
2139 dns_diff_append(diff, &tuple);
2140 }
2141 dns_rdataset_disassociate(&rdataset);
2142 if (result != ISC_R_NOMORE) {
2143 goto cleanup_iterator;
2144 }
2145 }
2146 if (result != ISC_R_NOMORE) {
2147 goto cleanup_iterator;
2148 }
2149
2150 result = ISC_R_SUCCESS;
2151
2152 cleanup_iterator:
2153 dns_rdatasetiter_destroy(&rdsiter);
2154
2155 cleanup_node:
2156 dns_db_detachnode(db, &node);
2157
2158 return result;
2159 }
2160
2161 /*
2162 * Comparison function for use by dns_diff_subtract when sorting
2163 * the diffs to be subtracted. The sort keys are the rdata type
2164 * and the rdata itself. The owner name is ignored, because
2165 * it is known to be the same for all tuples.
2166 */
2167 static int
2168 rdata_order(const void *av, const void *bv) {
2169 dns_difftuple_t const *const *ap = av;
2170 dns_difftuple_t const *const *bp = bv;
2171 dns_difftuple_t const *a = *ap;
2172 dns_difftuple_t const *b = *bp;
2173 int r;
2174 r = (b->rdata.type - a->rdata.type);
2175 if (r != 0) {
2176 return r;
2177 }
2178 r = dns_rdata_compare(&a->rdata, &b->rdata);
2179 return r;
2180 }
2181
2182 static isc_result_t
2183 dns_diff_subtract(dns_diff_t diff[2], dns_diff_t *r) {
2184 isc_result_t result;
2185 dns_difftuple_t *p[2];
2186 int i, t;
2187 bool append;
2188 dns_difftuplelist_t add, del;
2189
2190 CHECK(dns_diff_sort(&diff[0], rdata_order));
2191 CHECK(dns_diff_sort(&diff[1], rdata_order));
2192 ISC_LIST_INIT(add);
2193 ISC_LIST_INIT(del);
2194
2195 for (;;) {
2196 p[0] = ISC_LIST_HEAD(diff[0].tuples);
2197 p[1] = ISC_LIST_HEAD(diff[1].tuples);
2198 if (p[0] == NULL && p[1] == NULL) {
2199 break;
2200 }
2201
2202 for (i = 0; i < 2; i++) {
2203 if (p[!i] == NULL) {
2204 dns_difftuplelist_t *l = (i == 0) ? &add : &del;
2205 ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
2206 ISC_LIST_APPEND(*l, p[i], link);
2207 goto next;
2208 }
2209 }
2210 t = rdata_order(&p[0], &p[1]);
2211 if (t < 0) {
2212 ISC_LIST_UNLINK(diff[0].tuples, p[0], link);
2213 ISC_LIST_APPEND(add, p[0], link);
2214 goto next;
2215 }
2216 if (t > 0) {
2217 ISC_LIST_UNLINK(diff[1].tuples, p[1], link);
2218 ISC_LIST_APPEND(del, p[1], link);
2219 goto next;
2220 }
2221 INSIST(t == 0);
2222 /*
2223 * Identical RRs in both databases; skip them both
2224 * if the ttl differs.
2225 */
2226 append = (p[0]->ttl != p[1]->ttl);
2227 for (i = 0; i < 2; i++) {
2228 ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
2229 if (append) {
2230 dns_difftuplelist_t *l = (i == 0) ? &add : &del;
2231 ISC_LIST_APPEND(*l, p[i], link);
2232 } else {
2233 dns_difftuple_free(&p[i]);
2234 }
2235 }
2236 next:;
2237 }
2238 ISC_LIST_APPENDLIST(r->tuples, del, link);
2239 ISC_LIST_APPENDLIST(r->tuples, add, link);
2240 result = ISC_R_SUCCESS;
2241 cleanup:
2242 return result;
2243 }
2244
2245 static isc_result_t
2246 diff_namespace(dns_db_t *dba, dns_dbversion_t *dbvera, dns_db_t *dbb,
2247 dns_dbversion_t *dbverb, unsigned int options,
2248 dns_diff_t *resultdiff) {
2249 dns_db_t *db[2];
2250 dns_dbversion_t *ver[2];
2251 dns_dbiterator_t *dbit[2] = { NULL, NULL };
2252 bool have[2] = { false, false };
2253 dns_fixedname_t fixname[2];
2254 isc_result_t result, itresult[2];
2255 dns_diff_t diff[2];
2256 int i, t;
2257
2258 db[0] = dba, db[1] = dbb;
2259 ver[0] = dbvera, ver[1] = dbverb;
2260
2261 dns_diff_init(resultdiff->mctx, &diff[0]);
2262 dns_diff_init(resultdiff->mctx, &diff[1]);
2263
2264 dns_fixedname_init(&fixname[0]);
2265 dns_fixedname_init(&fixname[1]);
2266
2267 result = dns_db_createiterator(db[0], options, &dbit[0]);
2268 if (result != ISC_R_SUCCESS) {
2269 return result;
2270 }
2271 result = dns_db_createiterator(db[1], options, &dbit[1]);
2272 if (result != ISC_R_SUCCESS) {
2273 goto cleanup_iterator;
2274 }
2275
2276 itresult[0] = dns_dbiterator_first(dbit[0]);
2277 itresult[1] = dns_dbiterator_first(dbit[1]);
2278
2279 for (;;) {
2280 for (i = 0; i < 2; i++) {
2281 if (!have[i] && itresult[i] == ISC_R_SUCCESS) {
2282 CHECK(get_name_diff(
2283 db[i], ver[i], 0, dbit[i],
2284 dns_fixedname_name(&fixname[i]),
2285 i == 0 ? DNS_DIFFOP_ADD
2286 : DNS_DIFFOP_DEL,
2287 &diff[i]));
2288 itresult[i] = dns_dbiterator_next(dbit[i]);
2289 have[i] = true;
2290 }
2291 }
2292
2293 if (!have[0] && !have[1]) {
2294 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2295 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2296 break;
2297 }
2298
2299 for (i = 0; i < 2; i++) {
2300 if (!have[!i]) {
2301 ISC_LIST_APPENDLIST(resultdiff->tuples,
2302 diff[i].tuples, link);
2303 INSIST(ISC_LIST_EMPTY(diff[i].tuples));
2304 have[i] = false;
2305 goto next;
2306 }
2307 }
2308
2309 t = dns_name_compare(dns_fixedname_name(&fixname[0]),
2310 dns_fixedname_name(&fixname[1]));
2311 if (t < 0) {
2312 ISC_LIST_APPENDLIST(resultdiff->tuples, diff[0].tuples,
2313 link);
2314 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2315 have[0] = false;
2316 continue;
2317 }
2318 if (t > 0) {
2319 ISC_LIST_APPENDLIST(resultdiff->tuples, diff[1].tuples,
2320 link);
2321 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2322 have[1] = false;
2323 continue;
2324 }
2325 INSIST(t == 0);
2326 CHECK(dns_diff_subtract(diff, resultdiff));
2327 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2328 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2329 have[0] = have[1] = false;
2330 next:;
2331 }
2332 if (itresult[0] != ISC_R_NOMORE) {
2333 CHECK(itresult[0]);
2334 }
2335 if (itresult[1] != ISC_R_NOMORE) {
2336 CHECK(itresult[1]);
2337 }
2338
2339 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2340 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2341
2342 cleanup:
2343 dns_dbiterator_destroy(&dbit[1]);
2344
2345 cleanup_iterator:
2346 dns_dbiterator_destroy(&dbit[0]);
2347 dns_diff_clear(&diff[0]);
2348 dns_diff_clear(&diff[1]);
2349 return result;
2350 }
2351
2352 /*
2353 * Compare the databases 'dba' and 'dbb' and generate a journal
2354 * entry containing the changes to make 'dba' from 'dbb' (note
2355 * the order). This journal entry will consist of a single,
2356 * possibly very large transaction.
2357 */
2358 isc_result_t
2359 dns_db_diff(isc_mem_t *mctx, dns_db_t *dba, dns_dbversion_t *dbvera,
2360 dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename) {
2361 isc_result_t result;
2362 dns_diff_t diff;
2363
2364 dns_diff_init(mctx, &diff);
2365
2366 result = dns_db_diffx(&diff, dba, dbvera, dbb, dbverb, filename);
2367
2368 dns_diff_clear(&diff);
2369
2370 return result;
2371 }
2372
2373 isc_result_t
2374 dns_db_diffx(dns_diff_t *diff, dns_db_t *dba, dns_dbversion_t *dbvera,
2375 dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename) {
2376 isc_result_t result;
2377 dns_journal_t *journal = NULL;
2378
2379 if (filename != NULL) {
2380 result = dns_journal_open(diff->mctx, filename,
2381 DNS_JOURNAL_CREATE, &journal);
2382 if (result != ISC_R_SUCCESS) {
2383 return result;
2384 }
2385 }
2386
2387 CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NONSEC3, diff));
2388 CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NSEC3ONLY, diff));
2389
2390 if (journal != NULL) {
2391 if (ISC_LIST_EMPTY(diff->tuples)) {
2392 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes");
2393 } else {
2394 CHECK(dns_journal_write_transaction(journal, diff));
2395 }
2396 }
2397
2398 cleanup:
2399 if (journal != NULL) {
2400 dns_journal_destroy(&journal);
2401 }
2402 return result;
2403 }
2404
2405 static uint32_t
2406 rrcount(unsigned char *buf, unsigned int size) {
2407 isc_buffer_t b;
2408 uint32_t rrsize, count = 0;
2409
2410 isc_buffer_init(&b, buf, size);
2411 isc_buffer_add(&b, size);
2412 while (isc_buffer_remaininglength(&b) > 0) {
2413 rrsize = isc_buffer_getuint32(&b);
2414 INSIST(isc_buffer_remaininglength(&b) >= rrsize);
2415 isc_buffer_forward(&b, rrsize);
2416 count++;
2417 }
2418
2419 return count;
2420 }
2421
2422 static bool
2423 check_delta(unsigned char *buf, size_t size) {
2424 isc_buffer_t b;
2425 uint32_t rrsize;
2426
2427 isc_buffer_init(&b, buf, size);
2428 isc_buffer_add(&b, size);
2429 while (isc_buffer_remaininglength(&b) > 0) {
2430 if (isc_buffer_remaininglength(&b) < 4) {
2431 return false;
2432 }
2433 rrsize = isc_buffer_getuint32(&b);
2434 /* "." + type + class + ttl + rdlen => 11U */
2435 if (rrsize < 11U || isc_buffer_remaininglength(&b) < rrsize) {
2436 return false;
2437 }
2438 isc_buffer_forward(&b, rrsize);
2439 }
2440
2441 return true;
2442 }
2443
2444 isc_result_t
2445 dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial,
2446 uint32_t flags, uint32_t target_size) {
2447 unsigned int i;
2448 journal_pos_t best_guess;
2449 journal_pos_t current_pos;
2450 dns_journal_t *j1 = NULL;
2451 dns_journal_t *j2 = NULL;
2452 journal_rawheader_t rawheader;
2453 unsigned int len;
2454 size_t namelen;
2455 unsigned char *buf = NULL;
2456 unsigned int size = 0;
2457 isc_result_t result;
2458 unsigned int indexend;
2459 char newname[PATH_MAX];
2460 char backup[PATH_MAX];
2461 bool is_backup = false;
2462 bool rewrite = false;
2463 bool downgrade = false;
2464
2465 REQUIRE(filename != NULL);
2466
2467 namelen = strlen(filename);
2468 if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0) {
2469 namelen -= 4;
2470 }
2471
2472 result = snprintf(newname, sizeof(newname), "%.*s.jnw", (int)namelen,
2473 filename);
2474 RUNTIME_CHECK(result < sizeof(newname));
2475
2476 result = snprintf(backup, sizeof(backup), "%.*s.jbk", (int)namelen,
2477 filename);
2478 RUNTIME_CHECK(result < sizeof(backup));
2479
2480 result = journal_open(mctx, filename, false, false, false, &j1);
2481 if (result == ISC_R_NOTFOUND) {
2482 is_backup = true;
2483 result = journal_open(mctx, backup, false, false, false, &j1);
2484 }
2485 if (result != ISC_R_SUCCESS) {
2486 return result;
2487 }
2488
2489 /*
2490 * Always perform a re-write when processing a version 1 journal.
2491 */
2492 rewrite = j1->header_ver1;
2493
2494 /*
2495 * Check whether we need to rewrite the whole journal
2496 * file (for example, to upversion it).
2497 */
2498 if ((flags & DNS_JOURNAL_COMPACTALL) != 0) {
2499 if ((flags & DNS_JOURNAL_VERSION1) != 0) {
2500 downgrade = true;
2501 }
2502 rewrite = true;
2503 serial = dns_journal_first_serial(j1);
2504 } else if (JOURNAL_EMPTY(&j1->header)) {
2505 dns_journal_destroy(&j1);
2506 return ISC_R_SUCCESS;
2507 }
2508
2509 if (DNS_SERIAL_GT(j1->header.begin.serial, serial) ||
2510 DNS_SERIAL_GT(serial, j1->header.end.serial))
2511 {
2512 dns_journal_destroy(&j1);
2513 return ISC_R_RANGE;
2514 }
2515
2516 /*
2517 * Cope with very small target sizes.
2518 */
2519 indexend = sizeof(journal_rawheader_t) +
2520 ISC_CHECKED_MUL(j1->header.index_size,
2521 sizeof(journal_rawpos_t));
2522 if (target_size < DNS_JOURNAL_SIZE_MIN) {
2523 target_size = DNS_JOURNAL_SIZE_MIN;
2524 }
2525 if (target_size < indexend * 2) {
2526 target_size = target_size / 2 + indexend;
2527 }
2528
2529 /*
2530 * See if there is any work to do.
2531 */
2532 if (!rewrite && (uint32_t)j1->header.end.offset < target_size) {
2533 dns_journal_destroy(&j1);
2534 return ISC_R_SUCCESS;
2535 }
2536
2537 CHECK(journal_open(mctx, newname, true, true, downgrade, &j2));
2538 CHECK(journal_seek(j2, indexend));
2539
2540 /*
2541 * Remove overhead so space test below can succeed.
2542 */
2543 if (target_size >= indexend) {
2544 target_size -= indexend;
2545 }
2546
2547 /*
2548 * Find if we can create enough free space.
2549 */
2550 best_guess = j1->header.begin;
2551 for (i = 0; i < j1->header.index_size; i++) {
2552 if (POS_VALID(j1->index[i]) &&
2553 DNS_SERIAL_GE(serial, j1->index[i].serial) &&
2554 ((uint32_t)(j1->header.end.offset - j1->index[i].offset) >=
2555 target_size / 2) &&
2556 j1->index[i].offset > best_guess.offset)
2557 {
2558 best_guess = j1->index[i];
2559 }
2560 }
2561
2562 current_pos = best_guess;
2563 while (current_pos.serial != serial) {
2564 CHECK(journal_next(j1, ¤t_pos));
2565 if (current_pos.serial == j1->header.end.serial) {
2566 break;
2567 }
2568
2569 if (DNS_SERIAL_GE(serial, current_pos.serial) &&
2570 ((uint32_t)(j1->header.end.offset - current_pos.offset) >=
2571 (target_size / 2)) &&
2572 current_pos.offset > best_guess.offset)
2573 {
2574 best_guess = current_pos;
2575 } else {
2576 break;
2577 }
2578 }
2579
2580 INSIST(best_guess.serial != j1->header.end.serial);
2581 if (best_guess.serial != serial) {
2582 CHECK(journal_next(j1, &best_guess));
2583 serial = best_guess.serial;
2584 }
2585
2586 /*
2587 * We should now be roughly half target_size provided
2588 * we did not reach 'serial'. If not we will just copy
2589 * all uncommitted deltas regardless of the size.
2590 */
2591 len = j1->header.end.offset - best_guess.offset;
2592 if (len != 0) {
2593 CHECK(journal_seek(j1, best_guess.offset));
2594
2595 /* Prepare new header */
2596 j2->header.begin.serial = best_guess.serial;
2597 j2->header.begin.offset = indexend;
2598 j2->header.sourceserial = j1->header.sourceserial;
2599 j2->header.serialset = j1->header.serialset;
2600 j2->header.end.serial = j1->header.end.serial;
2601
2602 /*
2603 * Only use this method if we're rewriting the
2604 * journal to fix outdated transaction headers;
2605 * otherwise we'll copy the whole journal without
2606 * parsing individual deltas below.
2607 */
2608 while (rewrite && len > 0) {
2609 journal_xhdr_t xhdr;
2610 off_t offset = j1->offset;
2611 uint32_t count;
2612
2613 result = journal_read_xhdr(j1, &xhdr);
2614 if (rewrite && result == ISC_R_NOMORE) {
2615 break;
2616 }
2617 CHECK(result);
2618
2619 size = xhdr.size;
2620 if (size > len) {
2621 isc_log_write(JOURNAL_COMMON_LOGARGS,
2622 ISC_LOG_ERROR,
2623 "%s: journal file corrupt, "
2624 "transaction too large",
2625 j1->filename);
2626 CHECK(ISC_R_FAILURE);
2627 }
2628 buf = isc_mem_get(mctx, size);
2629 result = journal_read(j1, buf, size);
2630
2631 /*
2632 * If we're repairing an outdated journal, the
2633 * xhdr format may be wrong.
2634 */
2635 if (rewrite && (result != ISC_R_SUCCESS ||
2636 !check_delta(buf, size)))
2637 {
2638 if (j1->xhdr_version == XHDR_VERSION2) {
2639 /* XHDR_VERSION2 -> XHDR_VERSION1 */
2640 j1->xhdr_version = XHDR_VERSION1;
2641 CHECK(journal_seek(j1, offset));
2642 CHECK(journal_read_xhdr(j1, &xhdr));
2643 } else if (j1->xhdr_version == XHDR_VERSION1) {
2644 /* XHDR_VERSION1 -> XHDR_VERSION2 */
2645 j1->xhdr_version = XHDR_VERSION2;
2646 CHECK(journal_seek(j1, offset));
2647 CHECK(journal_read_xhdr(j1, &xhdr));
2648 }
2649
2650 /* Check again */
2651 isc_mem_put(mctx, buf, size);
2652 size = xhdr.size;
2653 if (size > len) {
2654 isc_log_write(
2655 JOURNAL_COMMON_LOGARGS,
2656 ISC_LOG_ERROR,
2657 "%s: journal file corrupt, "
2658 "transaction too large",
2659 j1->filename);
2660 CHECK(ISC_R_FAILURE);
2661 }
2662 buf = isc_mem_get(mctx, size);
2663 CHECK(journal_read(j1, buf, size));
2664
2665 if (!check_delta(buf, size)) {
2666 CHECK(ISC_R_UNEXPECTED);
2667 }
2668 } else {
2669 CHECK(result);
2670 }
2671
2672 /*
2673 * Recover from incorrectly written transaction header.
2674 * The incorrect header was written as size, serial0,
2675 * serial1, and 0. XHDR_VERSION2 is expecting size,
2676 * count, serial0, and serial1.
2677 */
2678 if (j1->xhdr_version == XHDR_VERSION2 &&
2679 xhdr.count == serial && xhdr.serial1 == 0U &&
2680 isc_serial_gt(xhdr.serial0, xhdr.count))
2681 {
2682 xhdr.serial1 = xhdr.serial0;
2683 xhdr.serial0 = xhdr.count;
2684 xhdr.count = 0;
2685 }
2686
2687 /*
2688 * Check that xhdr is consistent.
2689 */
2690 if (xhdr.serial0 != serial ||
2691 isc_serial_le(xhdr.serial1, xhdr.serial0))
2692 {
2693 CHECK(ISC_R_UNEXPECTED);
2694 }
2695
2696 /*
2697 * Extract record count from the transaction. This
2698 * is needed when converting from XHDR_VERSION1 to
2699 * XHDR_VERSION2, and when recovering from an
2700 * incorrectly written XHDR_VERSION2.
2701 */
2702 count = rrcount(buf, size);
2703 CHECK(journal_write_xhdr(j2, xhdr.size, count,
2704 xhdr.serial0, xhdr.serial1));
2705 CHECK(journal_write(j2, buf, size));
2706
2707 j2->header.end.offset = j2->offset;
2708
2709 serial = xhdr.serial1;
2710
2711 len = j1->header.end.offset - j1->offset;
2712 isc_mem_put(mctx, buf, size);
2713 }
2714
2715 /*
2716 * If we're not rewriting transaction headers, we can use
2717 * this faster method instead.
2718 */
2719 if (!rewrite) {
2720 size = ISC_MIN(64 * 1024, len);
2721 buf = isc_mem_get(mctx, size);
2722 for (i = 0; i < len; i += size) {
2723 unsigned int blob = ISC_MIN(size, len - i);
2724 CHECK(journal_read(j1, buf, blob));
2725 CHECK(journal_write(j2, buf, blob));
2726 }
2727
2728 j2->header.end.offset = indexend + len;
2729 }
2730
2731 CHECK(journal_fsync(j2));
2732
2733 /*
2734 * Update the journal header.
2735 */
2736 journal_header_encode(&j2->header, &rawheader);
2737 CHECK(journal_seek(j2, 0));
2738 CHECK(journal_write(j2, &rawheader, sizeof(rawheader)));
2739 CHECK(journal_fsync(j2));
2740
2741 /*
2742 * Build new index.
2743 */
2744 current_pos = j2->header.begin;
2745 while (current_pos.serial != j2->header.end.serial) {
2746 index_add(j2, ¤t_pos);
2747 CHECK(journal_next(j2, ¤t_pos));
2748 }
2749
2750 /*
2751 * Write index.
2752 */
2753 CHECK(index_to_disk(j2));
2754 CHECK(journal_fsync(j2));
2755
2756 indexend = j2->header.end.offset;
2757 POST(indexend);
2758 }
2759
2760 /*
2761 * Close both journals before trying to rename files.
2762 */
2763 dns_journal_destroy(&j1);
2764 dns_journal_destroy(&j2);
2765
2766 /*
2767 * With a UFS file system this should just succeed and be atomic.
2768 * Any IXFR outs will just continue and the old journal will be
2769 * removed on final close.
2770 *
2771 * With MSDOS / NTFS we need to do a two stage rename, triggered
2772 * by EEXIST. (If any IXFR's are running in other threads, however,
2773 * this will fail, and the journal will not be compacted. But
2774 * if so, hopefully they'll be finished by the next time we
2775 * compact.)
2776 */
2777 if (rename(newname, filename) == -1) {
2778 if (errno == EEXIST && !is_backup) {
2779 result = isc_file_remove(backup);
2780 if (result != ISC_R_SUCCESS &&
2781 result != ISC_R_FILENOTFOUND)
2782 {
2783 CHECK(result);
2784 }
2785 if (rename(filename, backup) == -1) {
2786 goto maperrno;
2787 }
2788 if (rename(newname, filename) == -1) {
2789 goto maperrno;
2790 }
2791 (void)isc_file_remove(backup);
2792 } else {
2793 maperrno:
2794 CHECK(ISC_R_FAILURE);
2795 }
2796 }
2797
2798 result = ISC_R_SUCCESS;
2799
2800 cleanup:
2801 (void)isc_file_remove(newname);
2802 if (buf != NULL) {
2803 isc_mem_put(mctx, buf, size);
2804 }
2805 if (j1 != NULL) {
2806 dns_journal_destroy(&j1);
2807 }
2808 if (j2 != NULL) {
2809 dns_journal_destroy(&j2);
2810 }
2811 return result;
2812 }
2813
2814 static isc_result_t
2815 index_to_disk(dns_journal_t *j) {
2816 isc_result_t result = ISC_R_SUCCESS;
2817
2818 if (j->header.index_size != 0) {
2819 unsigned int i;
2820 unsigned char *p;
2821 unsigned int rawbytes;
2822
2823 rawbytes = ISC_CHECKED_MUL(j->header.index_size,
2824 sizeof(journal_rawpos_t));
2825
2826 p = j->rawindex;
2827 for (i = 0; i < j->header.index_size; i++) {
2828 encode_uint32(j->index[i].serial, p);
2829 p += 4;
2830 encode_uint32(j->index[i].offset, p);
2831 p += 4;
2832 }
2833 INSIST(p == j->rawindex + rawbytes);
2834
2835 CHECK(journal_seek(j, sizeof(journal_rawheader_t)));
2836 CHECK(journal_write(j, j->rawindex, rawbytes));
2837 }
2838 cleanup:
2839 return result;
2840 }
2841