journal.c revision 1.1.1.3 1 /* $NetBSD: journal.c,v 1.1.1.3 2019/11/24 19:57:57 christos Exp $ */
2
3 /*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * This Source Code Form is subject to the terms of the Mozilla Public
7 * License, v. 2.0. If a copy of the MPL was not distributed with this
8 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 *
10 * See the COPYRIGHT file distributed with this work for additional
11 * information regarding copyright ownership.
12 */
13
14 #include <config.h>
15
16 #include <inttypes.h>
17 #include <stdbool.h>
18 #include <stdlib.h>
19 #include <unistd.h>
20 #include <errno.h>
21
22 #include <isc/file.h>
23 #include <isc/mem.h>
24 #include <isc/print.h>
25 #include <isc/stdio.h>
26 #include <isc/string.h>
27 #include <isc/util.h>
28
29 #include <dns/compress.h>
30 #include <dns/db.h>
31 #include <dns/dbiterator.h>
32 #include <dns/diff.h>
33 #include <dns/fixedname.h>
34 #include <dns/journal.h>
35 #include <dns/log.h>
36 #include <dns/rdataset.h>
37 #include <dns/rdatasetiter.h>
38 #include <dns/result.h>
39 #include <dns/soa.h>
40
41 /*! \file
42 * \brief Journaling.
43 *
44 * A journal file consists of
45 *
46 * \li A fixed-size header of type journal_rawheader_t.
47 *
48 * \li The index. This is an unordered array of index entries
49 * of type journal_rawpos_t giving the locations
50 * of some arbitrary subset of the journal's addressable
51 * transactions. The index entries are used as hints to
52 * speed up the process of locating a transaction with a given
53 * serial number. Unused index entries have an "offset"
54 * field of zero. The size of the index can vary between
55 * journal files, but does not change during the lifetime
56 * of a file. The size can be zero.
57 *
58 * \li The journal data. This consists of one or more transactions.
59 * Each transaction begins with a transaction header of type
60 * journal_rawxhdr_t. The transaction header is followed by a
61 * sequence of RRs, similar in structure to an IXFR difference
62 * sequence (RFC1995). That is, the pre-transaction SOA,
63 * zero or more other deleted RRs, the post-transaction SOA,
64 * and zero or more other added RRs. Unlike in IXFR, each RR
65 * is prefixed with a 32-bit length.
66 *
67 * The journal data part grows as new transactions are
68 * appended to the file. Only those transactions
69 * whose serial number is current-(2^31-1) to current
70 * are considered "addressable" and may be pointed
71 * to from the header or index. They may be preceded
72 * by old transactions that are no longer addressable,
73 * and they may be followed by transactions that were
74 * appended to the journal but never committed by updating
75 * the "end" position in the header. The latter will
76 * be overwritten when new transactions are added.
77 */
78
79 /**************************************************************************/
80 /*
81 * Miscellaneous utilities.
82 */
83
84 #define JOURNAL_COMMON_LOGARGS \
85 dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL
86
87 #define JOURNAL_DEBUG_LOGARGS(n) \
88 JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n)
89
90 /*%
91 * It would be non-sensical (or at least obtuse) to use FAIL() with an
92 * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler
93 * from complaining about "end-of-loop code not reached".
94 */
95 #define FAIL(code) \
96 do { result = (code); \
97 if (result != ISC_R_SUCCESS) goto failure; \
98 } while (0)
99
100 #define CHECK(op) \
101 do { result = (op); \
102 if (result != ISC_R_SUCCESS) goto failure; \
103 } while (0)
104
105 #define JOURNAL_SERIALSET 0x01U
106
107 static isc_result_t index_to_disk(dns_journal_t *);
108
109 static inline uint32_t
110 decode_uint32(unsigned char *p) {
111 return ((p[0] << 24) +
112 (p[1] << 16) +
113 (p[2] << 8) +
114 (p[3] << 0));
115 }
116
117 static inline void
118 encode_uint32(uint32_t val, unsigned char *p) {
119 p[0] = (uint8_t)(val >> 24);
120 p[1] = (uint8_t)(val >> 16);
121 p[2] = (uint8_t)(val >> 8);
122 p[3] = (uint8_t)(val >> 0);
123 }
124
125 isc_result_t
126 dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx,
127 dns_diffop_t op, dns_difftuple_t **tp)
128 {
129 isc_result_t result;
130 dns_dbnode_t *node;
131 dns_rdataset_t rdataset;
132 dns_rdata_t rdata = DNS_RDATA_INIT;
133 dns_fixedname_t fixed;
134 dns_name_t *zonename;
135
136 zonename = dns_fixedname_initname(&fixed);
137 dns_name_copynf(dns_db_origin(db), zonename);
138
139 node = NULL;
140 result = dns_db_findnode(db, zonename, false, &node);
141 if (result != ISC_R_SUCCESS)
142 goto nonode;
143
144 dns_rdataset_init(&rdataset);
145 result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0,
146 (isc_stdtime_t)0, &rdataset, NULL);
147 if (result != ISC_R_SUCCESS)
148 goto freenode;
149
150 result = dns_rdataset_first(&rdataset);
151 if (result != ISC_R_SUCCESS)
152 goto freenode;
153
154 dns_rdataset_current(&rdataset, &rdata);
155 dns_rdataset_getownercase(&rdataset, zonename);
156
157 result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl,
158 &rdata, tp);
159
160 dns_rdataset_disassociate(&rdataset);
161 dns_db_detachnode(db, &node);
162 return (result);
163
164 freenode:
165 dns_db_detachnode(db, &node);
166 nonode:
167 UNEXPECTED_ERROR(__FILE__, __LINE__, "missing SOA");
168 return (result);
169 }
170
171 /* Journaling */
172
173 /*%
174 * On-disk representation of a "pointer" to a journal entry.
175 * These are used in the journal header to locate the beginning
176 * and end of the journal, and in the journal index to locate
177 * other transactions.
178 */
179 typedef struct {
180 unsigned char serial[4]; /*%< SOA serial before update. */
181 /*
182 * XXXRTH Should offset be 8 bytes?
183 * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs.
184 * XXXAG ... but we will not be able to seek >2G anyway on many
185 * platforms as long as we are using fseek() rather
186 * than lseek().
187 */
188 unsigned char offset[4]; /*%< Offset from beginning of file. */
189 } journal_rawpos_t;
190
191
192 /*%
193 * The header is of a fixed size, with some spare room for future
194 * extensions.
195 */
196 #define JOURNAL_HEADER_SIZE 64 /* Bytes. */
197
198 /*%
199 * The on-disk representation of the journal header.
200 * All numbers are stored in big-endian order.
201 */
202 typedef union {
203 struct {
204 /*% File format version ID. */
205 unsigned char format[16];
206 /*% Position of the first addressable transaction */
207 journal_rawpos_t begin;
208 /*% Position of the next (yet nonexistent) transaction. */
209 journal_rawpos_t end;
210 /*% Number of index entries following the header. */
211 unsigned char index_size[4];
212 /*% Source serial number. */
213 unsigned char sourceserial[4];
214 unsigned char flags;
215 } h;
216 /* Pad the header to a fixed size. */
217 unsigned char pad[JOURNAL_HEADER_SIZE];
218 } journal_rawheader_t;
219
220 /*%
221 * The on-disk representation of the transaction header.
222 * There is one of these at the beginning of each transaction.
223 */
224 typedef struct {
225 unsigned char size[4]; /*%< In bytes, excluding header. */
226 unsigned char serial0[4]; /*%< SOA serial before update. */
227 unsigned char serial1[4]; /*%< SOA serial after update. */
228 } journal_rawxhdr_t;
229
230 /*%
231 * The on-disk representation of the RR header.
232 * There is one of these at the beginning of each RR.
233 */
234 typedef struct {
235 unsigned char size[4]; /*%< In bytes, excluding header. */
236 } journal_rawrrhdr_t;
237
238 /*%
239 * The in-core representation of the journal header.
240 */
241 typedef struct {
242 uint32_t serial;
243 isc_offset_t offset;
244 } journal_pos_t;
245
246 #define POS_VALID(pos) ((pos).offset != 0)
247 #define POS_INVALIDATE(pos) ((pos).offset = 0, (pos).serial = 0)
248
249 typedef struct {
250 unsigned char format[16];
251 journal_pos_t begin;
252 journal_pos_t end;
253 uint32_t index_size;
254 uint32_t sourceserial;
255 bool serialset;
256 } journal_header_t;
257
258 /*%
259 * The in-core representation of the transaction header.
260 */
261
262 typedef struct {
263 uint32_t size;
264 uint32_t serial0;
265 uint32_t serial1;
266 } journal_xhdr_t;
267
268 /*%
269 * The in-core representation of the RR header.
270 */
271 typedef struct {
272 uint32_t size;
273 } journal_rrhdr_t;
274
275
276 /*%
277 * Initial contents to store in the header of a newly created
278 * journal file.
279 *
280 * The header starts with the magic string ";BIND LOG V9\n"
281 * to identify the file as a BIND 9 journal file. An ASCII
282 * identification string is used rather than a binary magic
283 * number to be consistent with BIND 8 (BIND 8 journal files
284 * are ASCII text files).
285 */
286
287 static journal_header_t
288 initial_journal_header = { ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0, 0, 0 };
289
290 #define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset)
291
292 typedef enum {
293 JOURNAL_STATE_INVALID,
294 JOURNAL_STATE_READ,
295 JOURNAL_STATE_WRITE,
296 JOURNAL_STATE_TRANSACTION,
297 JOURNAL_STATE_INLINE
298 } journal_state_t;
299
300 struct dns_journal {
301 unsigned int magic; /*%< JOUR */
302 isc_mem_t *mctx; /*%< Memory context */
303 journal_state_t state;
304 char *filename; /*%< Journal file name */
305 FILE * fp; /*%< File handle */
306 isc_offset_t offset; /*%< Current file offset */
307 journal_header_t header; /*%< In-core journal header */
308 unsigned char *rawindex; /*%< In-core buffer for journal index in on-disk format */
309 journal_pos_t *index; /*%< In-core journal index */
310
311 /*% Current transaction state (when writing). */
312 struct {
313 unsigned int n_soa; /*%< Number of SOAs seen */
314 journal_pos_t pos[2]; /*%< Begin/end position */
315 } x;
316
317 /*% Iteration state (when reading). */
318 struct {
319 /* These define the part of the journal we iterate over. */
320 journal_pos_t bpos; /*%< Position before first, */
321 journal_pos_t epos; /*%< and after last transaction */
322 /* The rest is iterator state. */
323 uint32_t current_serial; /*%< Current SOA serial */
324 isc_buffer_t source; /*%< Data from disk */
325 isc_buffer_t target; /*%< Data from _fromwire check */
326 dns_decompress_t dctx; /*%< Dummy decompression ctx */
327 dns_name_t name; /*%< Current domain name */
328 dns_rdata_t rdata; /*%< Current rdata */
329 uint32_t ttl; /*%< Current TTL */
330 unsigned int xsize; /*%< Size of transaction data */
331 unsigned int xpos; /*%< Current position in it */
332 isc_result_t result; /*%< Result of last call */
333 } it;
334 };
335
336 #define DNS_JOURNAL_MAGIC ISC_MAGIC('J', 'O', 'U', 'R')
337 #define DNS_JOURNAL_VALID(t) ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC)
338
339 static void
340 journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) {
341 cooked->serial = decode_uint32(raw->serial);
342 cooked->offset = decode_uint32(raw->offset);
343 }
344
345 static void
346 journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) {
347 encode_uint32(cooked->serial, raw->serial);
348 encode_uint32(cooked->offset, raw->offset);
349 }
350
351 static void
352 journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) {
353 INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
354 memmove(cooked->format, raw->h.format, sizeof(cooked->format));
355 journal_pos_decode(&raw->h.begin, &cooked->begin);
356 journal_pos_decode(&raw->h.end, &cooked->end);
357 cooked->index_size = decode_uint32(raw->h.index_size);
358 cooked->sourceserial = decode_uint32(raw->h.sourceserial);
359 cooked->serialset = ((raw->h.flags & JOURNAL_SERIALSET) != 0);
360 }
361
362 static void
363 journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) {
364 unsigned char flags = 0;
365
366 INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
367 memset(raw->pad, 0, sizeof(raw->pad));
368 memmove(raw->h.format, cooked->format, sizeof(raw->h.format));
369 journal_pos_encode(&raw->h.begin, &cooked->begin);
370 journal_pos_encode(&raw->h.end, &cooked->end);
371 encode_uint32(cooked->index_size, raw->h.index_size);
372 encode_uint32(cooked->sourceserial, raw->h.sourceserial);
373 if (cooked->serialset) {
374 flags |= JOURNAL_SERIALSET;
375 }
376 raw->h.flags = flags;
377 }
378
379 /*
380 * Journal file I/O subroutines, with error checking and reporting.
381 */
382 static isc_result_t
383 journal_seek(dns_journal_t *j, uint32_t offset) {
384 isc_result_t result;
385
386 result = isc_stdio_seek(j->fp, (off_t)offset, SEEK_SET);
387 if (result != ISC_R_SUCCESS) {
388 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
389 "%s: seek: %s", j->filename,
390 isc_result_totext(result));
391 return (ISC_R_UNEXPECTED);
392 }
393 j->offset = offset;
394 return (ISC_R_SUCCESS);
395 }
396
397 static isc_result_t
398 journal_read(dns_journal_t *j, void *mem, size_t nbytes) {
399 isc_result_t result;
400
401 result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL);
402 if (result != ISC_R_SUCCESS) {
403 if (result == ISC_R_EOF)
404 return (ISC_R_NOMORE);
405 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
406 "%s: read: %s",
407 j->filename, isc_result_totext(result));
408 return (ISC_R_UNEXPECTED);
409 }
410 j->offset += (isc_offset_t)nbytes;
411 return (ISC_R_SUCCESS);
412 }
413
414 static isc_result_t
415 journal_write(dns_journal_t *j, void *mem, size_t nbytes) {
416 isc_result_t result;
417
418 result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL);
419 if (result != ISC_R_SUCCESS) {
420 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
421 "%s: write: %s",
422 j->filename, isc_result_totext(result));
423 return (ISC_R_UNEXPECTED);
424 }
425 j->offset += (isc_offset_t)nbytes;
426 return (ISC_R_SUCCESS);
427 }
428
429 static isc_result_t
430 journal_fsync(dns_journal_t *j) {
431 isc_result_t result;
432 result = isc_stdio_flush(j->fp);
433 if (result != ISC_R_SUCCESS) {
434 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
435 "%s: flush: %s",
436 j->filename, isc_result_totext(result));
437 return (ISC_R_UNEXPECTED);
438 }
439 result = isc_stdio_sync(j->fp);
440 if (result != ISC_R_SUCCESS) {
441 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
442 "%s: fsync: %s",
443 j->filename, isc_result_totext(result));
444 return (ISC_R_UNEXPECTED);
445 }
446 return (ISC_R_SUCCESS);
447 }
448
449 /*
450 * Read/write a transaction header at the current file position.
451 */
452
453 static isc_result_t
454 journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) {
455 journal_rawxhdr_t raw;
456 isc_result_t result;
457 result = journal_read(j, &raw, sizeof(raw));
458 if (result != ISC_R_SUCCESS)
459 return (result);
460 xhdr->size = decode_uint32(raw.size);
461 xhdr->serial0 = decode_uint32(raw.serial0);
462 xhdr->serial1 = decode_uint32(raw.serial1);
463 return (ISC_R_SUCCESS);
464 }
465
466 static isc_result_t
467 journal_write_xhdr(dns_journal_t *j, uint32_t size,
468 uint32_t serial0, uint32_t serial1)
469 {
470 journal_rawxhdr_t raw;
471 encode_uint32(size, raw.size);
472 encode_uint32(serial0, raw.serial0);
473 encode_uint32(serial1, raw.serial1);
474 return (journal_write(j, &raw, sizeof(raw)));
475 }
476
477
478 /*
479 * Read an RR header at the current file position.
480 */
481
482 static isc_result_t
483 journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) {
484 journal_rawrrhdr_t raw;
485 isc_result_t result;
486 result = journal_read(j, &raw, sizeof(raw));
487 if (result != ISC_R_SUCCESS)
488 return (result);
489 rrhdr->size = decode_uint32(raw.size);
490 return (ISC_R_SUCCESS);
491 }
492
493 static isc_result_t
494 journal_file_create(isc_mem_t *mctx, const char *filename) {
495 FILE *fp = NULL;
496 isc_result_t result;
497 journal_header_t header;
498 journal_rawheader_t rawheader;
499 int index_size = 56; /* XXX configurable */
500 int size;
501 void *mem; /* Memory for temporary index image. */
502
503 INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE);
504
505 result = isc_stdio_open(filename, "wb", &fp);
506 if (result != ISC_R_SUCCESS) {
507 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
508 "%s: create: %s",
509 filename, isc_result_totext(result));
510 return (ISC_R_UNEXPECTED);
511 }
512
513 header = initial_journal_header;
514 header.index_size = index_size;
515 journal_header_encode(&header, &rawheader);
516
517 size = sizeof(journal_rawheader_t) +
518 index_size * sizeof(journal_rawpos_t);
519
520 mem = isc_mem_get(mctx, size);
521 if (mem == NULL) {
522 (void)isc_stdio_close(fp);
523 (void)isc_file_remove(filename);
524 return (ISC_R_NOMEMORY);
525 }
526 memset(mem, 0, size);
527 memmove(mem, &rawheader, sizeof(rawheader));
528
529 result = isc_stdio_write(mem, 1, (size_t) size, fp, NULL);
530 if (result != ISC_R_SUCCESS) {
531 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
532 "%s: write: %s",
533 filename, isc_result_totext(result));
534 (void)isc_stdio_close(fp);
535 (void)isc_file_remove(filename);
536 isc_mem_put(mctx, mem, size);
537 return (ISC_R_UNEXPECTED);
538 }
539 isc_mem_put(mctx, mem, size);
540
541 result = isc_stdio_close(fp);
542 if (result != ISC_R_SUCCESS) {
543 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
544 "%s: close: %s",
545 filename, isc_result_totext(result));
546 (void)isc_file_remove(filename);
547 return (ISC_R_UNEXPECTED);
548 }
549
550 return (ISC_R_SUCCESS);
551 }
552
553 static isc_result_t
554 journal_open(isc_mem_t *mctx, const char *filename, bool writable,
555 bool create, dns_journal_t **journalp)
556 {
557 FILE *fp = NULL;
558 isc_result_t result;
559 journal_rawheader_t rawheader;
560 dns_journal_t *j;
561
562 INSIST(journalp != NULL && *journalp == NULL);
563 j = isc_mem_get(mctx, sizeof(*j));
564 if (j == NULL)
565 return (ISC_R_NOMEMORY);
566
567 j->mctx = NULL;
568 isc_mem_attach(mctx, &j->mctx);
569 j->state = JOURNAL_STATE_INVALID;
570 j->fp = NULL;
571 j->filename = isc_mem_strdup(mctx, filename);
572 j->index = NULL;
573 j->rawindex = NULL;
574
575 if (j->filename == NULL)
576 FAIL(ISC_R_NOMEMORY);
577
578 result = isc_stdio_open(j->filename, writable ? "rb+" : "rb", &fp);
579
580 if (result == ISC_R_FILENOTFOUND) {
581 if (create) {
582 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(1),
583 "journal file %s does not exist, "
584 "creating it", j->filename);
585 CHECK(journal_file_create(mctx, filename));
586 /*
587 * Retry.
588 */
589 result = isc_stdio_open(j->filename, "rb+", &fp);
590 } else {
591 FAIL(ISC_R_NOTFOUND);
592 }
593 }
594 if (result != ISC_R_SUCCESS) {
595 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
596 "%s: open: %s",
597 j->filename, isc_result_totext(result));
598 FAIL(ISC_R_UNEXPECTED);
599 }
600
601 j->fp = fp;
602
603 /*
604 * Set magic early so that seek/read can succeed.
605 */
606 j->magic = DNS_JOURNAL_MAGIC;
607
608 CHECK(journal_seek(j, 0));
609 CHECK(journal_read(j, &rawheader, sizeof(rawheader)));
610
611 if (memcmp(rawheader.h.format, initial_journal_header.format,
612 sizeof(initial_journal_header.format)) != 0) {
613 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
614 "%s: journal format not recognized",
615 j->filename);
616 FAIL(ISC_R_UNEXPECTED);
617 }
618 journal_header_decode(&rawheader, &j->header);
619
620 /*
621 * If there is an index, read the raw index into a dynamically
622 * allocated buffer and then convert it into a cooked index.
623 */
624 if (j->header.index_size != 0) {
625 unsigned int i;
626 unsigned int rawbytes;
627 unsigned char *p;
628
629 rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
630 j->rawindex = isc_mem_get(mctx, rawbytes);
631 if (j->rawindex == NULL)
632 FAIL(ISC_R_NOMEMORY);
633
634 CHECK(journal_read(j, j->rawindex, rawbytes));
635
636 j->index = isc_mem_get(mctx, j->header.index_size *
637 sizeof(journal_pos_t));
638 if (j->index == NULL)
639 FAIL(ISC_R_NOMEMORY);
640
641 p = j->rawindex;
642 for (i = 0; i < j->header.index_size; i++) {
643 j->index[i].serial = decode_uint32(p);
644 p += 4;
645 j->index[i].offset = decode_uint32(p);
646 p += 4;
647 }
648 INSIST(p == j->rawindex + rawbytes);
649 }
650 j->offset = -1; /* Invalid, must seek explicitly. */
651
652 /*
653 * Initialize the iterator.
654 */
655 dns_name_init(&j->it.name, NULL);
656 dns_rdata_init(&j->it.rdata);
657
658 /*
659 * Set up empty initial buffers for unchecked and checked
660 * wire format RR data. They will be reallocated
661 * later.
662 */
663 isc_buffer_init(&j->it.source, NULL, 0);
664 isc_buffer_init(&j->it.target, NULL, 0);
665 dns_decompress_init(&j->it.dctx, -1, DNS_DECOMPRESS_NONE);
666
667 j->state =
668 writable ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ;
669
670 *journalp = j;
671 return (ISC_R_SUCCESS);
672
673 failure:
674 j->magic = 0;
675 if (j->rawindex != NULL)
676 isc_mem_put(j->mctx, j->rawindex, j->header.index_size *
677 sizeof(journal_rawpos_t));
678 if (j->index != NULL)
679 isc_mem_put(j->mctx, j->index, j->header.index_size *
680 sizeof(journal_pos_t));
681 if (j->filename != NULL)
682 isc_mem_free(j->mctx, j->filename);
683 if (j->fp != NULL)
684 (void)isc_stdio_close(j->fp);
685 isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
686 return (result);
687 }
688
689 isc_result_t
690 dns_journal_open(isc_mem_t *mctx, const char *filename, unsigned int mode,
691 dns_journal_t **journalp)
692 {
693 isc_result_t result;
694 size_t namelen;
695 char backup[1024];
696 bool writable, create;
697
698 create = ((mode & DNS_JOURNAL_CREATE) != 0);
699 writable = ((mode & (DNS_JOURNAL_WRITE|DNS_JOURNAL_CREATE)) != 0);
700
701 result = journal_open(mctx, filename, writable, create, journalp);
702 if (result == ISC_R_NOTFOUND) {
703 namelen = strlen(filename);
704 if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0)
705 namelen -= 4;
706
707 result = snprintf(backup, sizeof(backup), "%.*s.jbk",
708 (int)namelen, filename);
709 if (result >= sizeof(backup)) {
710 return ISC_R_NOSPACE;
711 }
712 result = journal_open(mctx, backup, writable, writable,
713 journalp);
714 }
715 return (result);
716 }
717
718 /*
719 * A comparison function defining the sorting order for
720 * entries in the IXFR-style journal file.
721 *
722 * The IXFR format requires that deletions are sorted before
723 * additions, and within either one, SOA records are sorted
724 * before others.
725 *
726 * Also sort the non-SOA records by type as a courtesy to the
727 * server receiving the IXFR - it may help reduce the amount of
728 * rdataset merging it has to do.
729 */
730 static int
731 ixfr_order(const void *av, const void *bv) {
732 dns_difftuple_t const * const *ap = av;
733 dns_difftuple_t const * const *bp = bv;
734 dns_difftuple_t const *a = *ap;
735 dns_difftuple_t const *b = *bp;
736 int r;
737 int bop = 0, aop = 0;
738
739 switch (a->op) {
740 case DNS_DIFFOP_DEL:
741 case DNS_DIFFOP_DELRESIGN:
742 aop = 1;
743 break;
744 case DNS_DIFFOP_ADD:
745 case DNS_DIFFOP_ADDRESIGN:
746 aop = 0;
747 break;
748 default:
749 INSIST(0);
750 ISC_UNREACHABLE();
751 }
752
753 switch (b->op) {
754 case DNS_DIFFOP_DEL:
755 case DNS_DIFFOP_DELRESIGN:
756 bop = 1;
757 break;
758 case DNS_DIFFOP_ADD:
759 case DNS_DIFFOP_ADDRESIGN:
760 bop = 0;
761 break;
762 default:
763 INSIST(0);
764 ISC_UNREACHABLE();
765 }
766
767 r = bop - aop;
768 if (r != 0)
769 return (r);
770
771 r = (b->rdata.type == dns_rdatatype_soa) -
772 (a->rdata.type == dns_rdatatype_soa);
773 if (r != 0)
774 return (r);
775
776 r = (a->rdata.type - b->rdata.type);
777 return (r);
778 }
779
780 /*
781 * Advance '*pos' to the next journal transaction.
782 *
783 * Requires:
784 * *pos refers to a valid journal transaction.
785 *
786 * Ensures:
787 * When ISC_R_SUCCESS is returned,
788 * *pos refers to the next journal transaction.
789 *
790 * Returns one of:
791 *
792 * ISC_R_SUCCESS
793 * ISC_R_NOMORE *pos pointed at the last transaction
794 * Other results due to file errors are possible.
795 */
796 static isc_result_t
797 journal_next(dns_journal_t *j, journal_pos_t *pos) {
798 isc_result_t result;
799 journal_xhdr_t xhdr;
800 REQUIRE(DNS_JOURNAL_VALID(j));
801
802 result = journal_seek(j, pos->offset);
803 if (result != ISC_R_SUCCESS)
804 return (result);
805
806 if (pos->serial == j->header.end.serial)
807 return (ISC_R_NOMORE);
808 /*
809 * Read the header of the current transaction.
810 * This will return ISC_R_NOMORE if we are at EOF.
811 */
812 result = journal_read_xhdr(j, &xhdr);
813 if (result != ISC_R_SUCCESS)
814 return (result);
815
816 /*
817 * Check serial number consistency.
818 */
819 if (xhdr.serial0 != pos->serial) {
820 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
821 "%s: journal file corrupt: "
822 "expected serial %u, got %u",
823 j->filename, pos->serial, xhdr.serial0);
824 return (ISC_R_UNEXPECTED);
825 }
826
827 /*
828 * Check for offset wraparound.
829 */
830 if ((isc_offset_t)(pos->offset + sizeof(journal_rawxhdr_t) + xhdr.size)
831 < pos->offset) {
832 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
833 "%s: offset too large", j->filename);
834 return (ISC_R_UNEXPECTED);
835 }
836
837 pos->offset += sizeof(journal_rawxhdr_t) + xhdr.size;
838 pos->serial = xhdr.serial1;
839 return (ISC_R_SUCCESS);
840 }
841
842 /*
843 * If the index of the journal 'j' contains an entry "better"
844 * than '*best_guess', replace '*best_guess' with it.
845 *
846 * "Better" means having a serial number closer to 'serial'
847 * but not greater than 'serial'.
848 */
849 static void
850 index_find(dns_journal_t *j, uint32_t serial, journal_pos_t *best_guess) {
851 unsigned int i;
852 if (j->index == NULL)
853 return;
854 for (i = 0; i < j->header.index_size; i++) {
855 if (POS_VALID(j->index[i]) &&
856 DNS_SERIAL_GE(serial, j->index[i].serial) &&
857 DNS_SERIAL_GT(j->index[i].serial, best_guess->serial))
858 *best_guess = j->index[i];
859 }
860 }
861
862 /*
863 * Add a new index entry. If there is no room, make room by removing
864 * the odd-numbered entries and compacting the others into the first
865 * half of the index. This decimates old index entries exponentially
866 * over time, so that the index always contains a much larger fraction
867 * of recent serial numbers than of old ones. This is deliberate -
868 * most index searches are for outgoing IXFR, and IXFR tends to request
869 * recent versions more often than old ones.
870 */
871 static void
872 index_add(dns_journal_t *j, journal_pos_t *pos) {
873 unsigned int i;
874 if (j->index == NULL)
875 return;
876 /*
877 * Search for a vacant position.
878 */
879 for (i = 0; i < j->header.index_size; i++) {
880 if (! POS_VALID(j->index[i]))
881 break;
882 }
883 if (i == j->header.index_size) {
884 unsigned int k = 0;
885 /*
886 * Found no vacant position. Make some room.
887 */
888 for (i = 0; i < j->header.index_size; i += 2) {
889 j->index[k++] = j->index[i];
890 }
891 i = k; /* 'i' identifies the first vacant position. */
892 while (k < j->header.index_size) {
893 POS_INVALIDATE(j->index[k]);
894 k++;
895 }
896 }
897 INSIST(i < j->header.index_size);
898 INSIST(! POS_VALID(j->index[i]));
899
900 /*
901 * Store the new index entry.
902 */
903 j->index[i] = *pos;
904 }
905
906 /*
907 * Invalidate any existing index entries that could become
908 * ambiguous when a new transaction with number 'serial' is added.
909 */
910 static void
911 index_invalidate(dns_journal_t *j, uint32_t serial) {
912 unsigned int i;
913 if (j->index == NULL)
914 return;
915 for (i = 0; i < j->header.index_size; i++) {
916 if (! DNS_SERIAL_GT(serial, j->index[i].serial))
917 POS_INVALIDATE(j->index[i]);
918 }
919 }
920
921 /*
922 * Try to find a transaction with initial serial number 'serial'
923 * in the journal 'j'.
924 *
925 * If found, store its position at '*pos' and return ISC_R_SUCCESS.
926 *
927 * If 'serial' is current (= the ending serial number of the
928 * last transaction in the journal), set '*pos' to
929 * the position immediately following the last transaction and
930 * return ISC_R_SUCCESS.
931 *
932 * If 'serial' is within the range of addressable serial numbers
933 * covered by the journal but that particular serial number is missing
934 * (from the journal, not just from the index), return ISC_R_NOTFOUND.
935 *
936 * If 'serial' is outside the range of addressable serial numbers
937 * covered by the journal, return ISC_R_RANGE.
938 *
939 */
940 static isc_result_t
941 journal_find(dns_journal_t *j, uint32_t serial, journal_pos_t *pos) {
942 isc_result_t result;
943 journal_pos_t current_pos;
944 REQUIRE(DNS_JOURNAL_VALID(j));
945
946 if (DNS_SERIAL_GT(j->header.begin.serial, serial))
947 return (ISC_R_RANGE);
948 if (DNS_SERIAL_GT(serial, j->header.end.serial))
949 return (ISC_R_RANGE);
950 if (serial == j->header.end.serial) {
951 *pos = j->header.end;
952 return (ISC_R_SUCCESS);
953 }
954
955 current_pos = j->header.begin;
956 index_find(j, serial, ¤t_pos);
957
958 while (current_pos.serial != serial) {
959 if (DNS_SERIAL_GT(current_pos.serial, serial))
960 return (ISC_R_NOTFOUND);
961 result = journal_next(j, ¤t_pos);
962 if (result != ISC_R_SUCCESS)
963 return (result);
964 }
965 *pos = current_pos;
966 return (ISC_R_SUCCESS);
967 }
968
969 isc_result_t
970 dns_journal_begin_transaction(dns_journal_t *j) {
971 uint32_t offset;
972 isc_result_t result;
973 journal_rawxhdr_t hdr;
974
975 REQUIRE(DNS_JOURNAL_VALID(j));
976 REQUIRE(j->state == JOURNAL_STATE_WRITE ||
977 j->state == JOURNAL_STATE_INLINE);
978
979 /*
980 * Find the file offset where the new transaction should
981 * be written, and seek there.
982 */
983 if (JOURNAL_EMPTY(&j->header)) {
984 offset = sizeof(journal_rawheader_t) +
985 j->header.index_size * sizeof(journal_rawpos_t);
986 } else {
987 offset = j->header.end.offset;
988 }
989 j->x.pos[0].offset = offset;
990 j->x.pos[1].offset = offset; /* Initial value, will be incremented. */
991 j->x.n_soa = 0;
992
993 CHECK(journal_seek(j, offset));
994
995 /*
996 * Write a dummy transaction header of all zeroes to reserve
997 * space. It will be filled in when the transaction is
998 * finished.
999 */
1000 memset(&hdr, 0, sizeof(hdr));
1001 CHECK(journal_write(j, &hdr, sizeof(hdr)));
1002 j->x.pos[1].offset = j->offset;
1003
1004 j->state = JOURNAL_STATE_TRANSACTION;
1005 result = ISC_R_SUCCESS;
1006 failure:
1007 return (result);
1008 }
1009
1010 isc_result_t
1011 dns_journal_writediff(dns_journal_t *j, dns_diff_t *diff) {
1012 dns_difftuple_t *t;
1013 isc_buffer_t buffer;
1014 void *mem = NULL;
1015 uint64_t size;
1016 isc_result_t result;
1017 isc_region_t used;
1018
1019 REQUIRE(DNS_DIFF_VALID(diff));
1020 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION);
1021
1022 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal");
1023 (void)dns_diff_print(diff, NULL);
1024
1025 /*
1026 * Pass 1: determine the buffer size needed, and
1027 * keep track of SOA serial numbers.
1028 */
1029 size = 0;
1030 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
1031 t = ISC_LIST_NEXT(t, link))
1032 {
1033 if (t->rdata.type == dns_rdatatype_soa) {
1034 if (j->x.n_soa < 2)
1035 j->x.pos[j->x.n_soa].serial =
1036 dns_soa_getserial(&t->rdata);
1037 j->x.n_soa++;
1038 }
1039 size += sizeof(journal_rawrrhdr_t);
1040 size += t->name.length; /* XXX should have access macro? */
1041 size += 10;
1042 size += t->rdata.length;
1043 }
1044
1045 if (size >= DNS_JOURNAL_SIZE_MAX) {
1046 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1047 "dns_journal_writediff: %s: journal entry "
1048 "too big to be stored: %" PRIu64 " bytes", j->filename,
1049 size);
1050 return (ISC_R_NOSPACE);
1051 }
1052
1053 mem = isc_mem_get(j->mctx, size);
1054 if (mem == NULL)
1055 return (ISC_R_NOMEMORY);
1056
1057 isc_buffer_init(&buffer, mem, size);
1058
1059 /*
1060 * Pass 2. Write RRs to buffer.
1061 */
1062 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
1063 t = ISC_LIST_NEXT(t, link))
1064 {
1065 /*
1066 * Write the RR header.
1067 */
1068 isc_buffer_putuint32(&buffer, t->name.length + 10 +
1069 t->rdata.length);
1070 /*
1071 * Write the owner name, RR header, and RR data.
1072 */
1073 isc_buffer_putmem(&buffer, t->name.ndata, t->name.length);
1074 isc_buffer_putuint16(&buffer, t->rdata.type);
1075 isc_buffer_putuint16(&buffer, t->rdata.rdclass);
1076 isc_buffer_putuint32(&buffer, t->ttl);
1077 INSIST(t->rdata.length < 65536);
1078 isc_buffer_putuint16(&buffer, (uint16_t)t->rdata.length);
1079 INSIST(isc_buffer_availablelength(&buffer) >= t->rdata.length);
1080 isc_buffer_putmem(&buffer, t->rdata.data, t->rdata.length);
1081 }
1082
1083 isc_buffer_usedregion(&buffer, &used);
1084 INSIST(used.length == size);
1085
1086 j->x.pos[1].offset += used.length;
1087
1088 /*
1089 * Write the buffer contents to the journal file.
1090 */
1091 CHECK(journal_write(j, used.base, used.length));
1092
1093 result = ISC_R_SUCCESS;
1094
1095 failure:
1096 if (mem != NULL)
1097 isc_mem_put(j->mctx, mem, size);
1098 return (result);
1099
1100 }
1101
1102 isc_result_t
1103 dns_journal_commit(dns_journal_t *j) {
1104 isc_result_t result;
1105 journal_rawheader_t rawheader;
1106 uint64_t total;
1107
1108 REQUIRE(DNS_JOURNAL_VALID(j));
1109 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION ||
1110 j->state == JOURNAL_STATE_INLINE);
1111
1112 /*
1113 * Just write out a updated header.
1114 */
1115 if (j->state == JOURNAL_STATE_INLINE) {
1116 CHECK(journal_fsync(j));
1117 journal_header_encode(&j->header, &rawheader);
1118 CHECK(journal_seek(j, 0));
1119 CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
1120 CHECK(journal_fsync(j));
1121 j->state = JOURNAL_STATE_WRITE;
1122 return (ISC_R_SUCCESS);
1123 }
1124
1125 /*
1126 * Perform some basic consistency checks.
1127 */
1128 if (j->x.n_soa != 2) {
1129 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1130 "%s: malformed transaction: %d SOAs",
1131 j->filename, j->x.n_soa);
1132 return (ISC_R_UNEXPECTED);
1133 }
1134 if (! DNS_SERIAL_GT(j->x.pos[1].serial, j->x.pos[0].serial)) {
1135 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1136 "%s: malformed transaction: serial number "
1137 "did not increase", j->filename);
1138 return (ISC_R_UNEXPECTED);
1139 }
1140 if (! JOURNAL_EMPTY(&j->header)) {
1141 if (j->x.pos[0].serial != j->header.end.serial) {
1142 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1143 "malformed transaction: "
1144 "%s last serial %u != "
1145 "transaction first serial %u",
1146 j->filename,
1147 j->header.end.serial,
1148 j->x.pos[0].serial);
1149 return (ISC_R_UNEXPECTED);
1150 }
1151 }
1152
1153 /*
1154 * We currently don't support huge journal entries.
1155 */
1156 total = j->x.pos[1].offset - j->x.pos[0].offset;
1157 if (total >= DNS_JOURNAL_SIZE_MAX) {
1158 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1159 "transaction too big to be stored in journal: "
1160 "%" PRIu64 "b (max is %" PRIu64 "b)", total,
1161 (uint64_t)DNS_JOURNAL_SIZE_MAX);
1162 return (ISC_R_UNEXPECTED);
1163 }
1164
1165 /*
1166 * Some old journal entries may become non-addressable
1167 * when we increment the current serial number. Purge them
1168 * by stepping header.begin forward to the first addressable
1169 * transaction. Also purge them from the index.
1170 */
1171 if (! JOURNAL_EMPTY(&j->header)) {
1172 while (! DNS_SERIAL_GT(j->x.pos[1].serial,
1173 j->header.begin.serial)) {
1174 CHECK(journal_next(j, &j->header.begin));
1175 }
1176 index_invalidate(j, j->x.pos[1].serial);
1177 }
1178 #ifdef notyet
1179 if (DNS_SERIAL_GT(last_dumped_serial, j->x.pos[1].serial)) {
1180 force_dump(...);
1181 }
1182 #endif
1183
1184 /*
1185 * Commit the transaction data to stable storage.
1186 */
1187 CHECK(journal_fsync(j));
1188
1189 if (j->state == JOURNAL_STATE_TRANSACTION) {
1190 isc_offset_t offset;
1191 offset = (j->x.pos[1].offset - j->x.pos[0].offset) -
1192 sizeof(journal_rawxhdr_t);
1193 /*
1194 * Update the transaction header.
1195 */
1196 CHECK(journal_seek(j, j->x.pos[0].offset));
1197 CHECK(journal_write_xhdr(j, offset, j->x.pos[0].serial,
1198 j->x.pos[1].serial));
1199 }
1200
1201 /*
1202 * Update the journal header.
1203 */
1204 if (JOURNAL_EMPTY(&j->header))
1205 j->header.begin = j->x.pos[0];
1206 j->header.end = j->x.pos[1];
1207 journal_header_encode(&j->header, &rawheader);
1208 CHECK(journal_seek(j, 0));
1209 CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
1210
1211 /*
1212 * Update the index.
1213 */
1214 index_add(j, &j->x.pos[0]);
1215
1216 /*
1217 * Convert the index into on-disk format and write
1218 * it to disk.
1219 */
1220 CHECK(index_to_disk(j));
1221
1222 /*
1223 * Commit the header to stable storage.
1224 */
1225 CHECK(journal_fsync(j));
1226
1227 /*
1228 * We no longer have a transaction open.
1229 */
1230 j->state = JOURNAL_STATE_WRITE;
1231
1232 result = ISC_R_SUCCESS;
1233
1234 failure:
1235 return (result);
1236 }
1237
1238 isc_result_t
1239 dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) {
1240 isc_result_t result;
1241 CHECK(dns_diff_sort(diff, ixfr_order));
1242 CHECK(dns_journal_begin_transaction(j));
1243 CHECK(dns_journal_writediff(j, diff));
1244 CHECK(dns_journal_commit(j));
1245 result = ISC_R_SUCCESS;
1246 failure:
1247 return (result);
1248 }
1249
1250 void
1251 dns_journal_destroy(dns_journal_t **journalp) {
1252 dns_journal_t *j = *journalp;
1253 REQUIRE(DNS_JOURNAL_VALID(j));
1254
1255 j->it.result = ISC_R_FAILURE;
1256 dns_name_invalidate(&j->it.name);
1257 dns_decompress_invalidate(&j->it.dctx);
1258 if (j->rawindex != NULL)
1259 isc_mem_put(j->mctx, j->rawindex, j->header.index_size *
1260 sizeof(journal_rawpos_t));
1261 if (j->index != NULL)
1262 isc_mem_put(j->mctx, j->index, j->header.index_size *
1263 sizeof(journal_pos_t));
1264 if (j->it.target.base != NULL)
1265 isc_mem_put(j->mctx, j->it.target.base, j->it.target.length);
1266 if (j->it.source.base != NULL)
1267 isc_mem_put(j->mctx, j->it.source.base, j->it.source.length);
1268 if (j->filename != NULL)
1269 isc_mem_free(j->mctx, j->filename);
1270 if (j->fp != NULL)
1271 (void)isc_stdio_close(j->fp);
1272 j->magic = 0;
1273 isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
1274 *journalp = NULL;
1275 }
1276
1277 /*
1278 * Roll the open journal 'j' into the database 'db'.
1279 * A new database version will be created.
1280 */
1281
1282 /* XXX Share code with incoming IXFR? */
1283
1284 static isc_result_t
1285 roll_forward(dns_journal_t *j, dns_db_t *db, unsigned int options) {
1286 isc_buffer_t source; /* Transaction data from disk */
1287 isc_buffer_t target; /* Ditto after _fromwire check */
1288 uint32_t db_serial; /* Database SOA serial */
1289 uint32_t end_serial; /* Last journal SOA serial */
1290 isc_result_t result;
1291 dns_dbversion_t *ver = NULL;
1292 journal_pos_t pos;
1293 dns_diff_t diff;
1294 unsigned int n_soa = 0;
1295 unsigned int n_put = 0;
1296 dns_diffop_t op;
1297
1298 REQUIRE(DNS_JOURNAL_VALID(j));
1299 REQUIRE(DNS_DB_VALID(db));
1300
1301 dns_diff_init(j->mctx, &diff);
1302
1303 /*
1304 * Set up empty initial buffers for unchecked and checked
1305 * wire format transaction data. They will be reallocated
1306 * later.
1307 */
1308 isc_buffer_init(&source, NULL, 0);
1309 isc_buffer_init(&target, NULL, 0);
1310
1311 /*
1312 * Create the new database version.
1313 */
1314 CHECK(dns_db_newversion(db, &ver));
1315
1316 /*
1317 * Get the current database SOA serial number.
1318 */
1319 CHECK(dns_db_getsoaserial(db, ver, &db_serial));
1320
1321 /*
1322 * Locate a journal entry for the current database serial.
1323 */
1324 CHECK(journal_find(j, db_serial, &pos));
1325 /*
1326 * XXX do more drastic things, like marking zone stale,
1327 * if this fails?
1328 */
1329 /*
1330 * XXXRTH The zone code should probably mark the zone as bad and
1331 * scream loudly into the log if this is a dynamic update
1332 * log reply that failed.
1333 */
1334
1335 end_serial = dns_journal_last_serial(j);
1336 if (db_serial == end_serial)
1337 CHECK(DNS_R_UPTODATE);
1338
1339 CHECK(dns_journal_iter_init(j, db_serial, end_serial));
1340
1341 for (result = dns_journal_first_rr(j);
1342 result == ISC_R_SUCCESS;
1343 result = dns_journal_next_rr(j))
1344 {
1345 dns_name_t *name;
1346 uint32_t ttl;
1347 dns_rdata_t *rdata;
1348 dns_difftuple_t *tuple = NULL;
1349
1350 name = NULL;
1351 rdata = NULL;
1352 dns_journal_current_rr(j, &name, &ttl, &rdata);
1353
1354 if (rdata->type == dns_rdatatype_soa) {
1355 n_soa++;
1356 if (n_soa == 2)
1357 db_serial = j->it.current_serial;
1358 }
1359
1360 if (n_soa == 3)
1361 n_soa = 1;
1362 if (n_soa == 0) {
1363 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1364 "%s: journal file corrupt: missing "
1365 "initial SOA", j->filename);
1366 FAIL(ISC_R_UNEXPECTED);
1367 }
1368 if ((options & DNS_JOURNALOPT_RESIGN) != 0)
1369 op = (n_soa == 1) ? DNS_DIFFOP_DELRESIGN :
1370 DNS_DIFFOP_ADDRESIGN;
1371 else
1372 op = (n_soa == 1) ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD;
1373
1374 CHECK(dns_difftuple_create(diff.mctx, op, name, ttl, rdata,
1375 &tuple));
1376 dns_diff_append(&diff, &tuple);
1377
1378 if (++n_put > 100) {
1379 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1380 "%s: applying diff to database (%u)",
1381 j->filename, db_serial);
1382 (void)dns_diff_print(&diff, NULL);
1383 CHECK(dns_diff_apply(&diff, db, ver));
1384 dns_diff_clear(&diff);
1385 n_put = 0;
1386 }
1387 }
1388 if (result == ISC_R_NOMORE)
1389 result = ISC_R_SUCCESS;
1390 CHECK(result);
1391
1392 if (n_put != 0) {
1393 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1394 "%s: applying final diff to database (%u)",
1395 j->filename, db_serial);
1396 (void)dns_diff_print(&diff, NULL);
1397 CHECK(dns_diff_apply(&diff, db, ver));
1398 dns_diff_clear(&diff);
1399 }
1400
1401 failure:
1402 if (ver != NULL)
1403 dns_db_closeversion(db, &ver, result == ISC_R_SUCCESS ?
1404 true : false);
1405
1406 if (source.base != NULL)
1407 isc_mem_put(j->mctx, source.base, source.length);
1408 if (target.base != NULL)
1409 isc_mem_put(j->mctx, target.base, target.length);
1410
1411 dns_diff_clear(&diff);
1412
1413 INSIST(ver == NULL);
1414
1415 return (result);
1416 }
1417
1418 isc_result_t
1419 dns_journal_rollforward(isc_mem_t *mctx, dns_db_t *db, unsigned int options,
1420 const char *filename)
1421 {
1422 dns_journal_t *j;
1423 isc_result_t result;
1424
1425 REQUIRE(DNS_DB_VALID(db));
1426 REQUIRE(filename != NULL);
1427
1428 j = NULL;
1429 result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j);
1430 if (result == ISC_R_NOTFOUND) {
1431 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1432 "no journal file, but that's OK");
1433 return (DNS_R_NOJOURNAL);
1434 }
1435 if (result != ISC_R_SUCCESS)
1436 return (result);
1437 if (JOURNAL_EMPTY(&j->header))
1438 result = DNS_R_UPTODATE;
1439 else
1440 result = roll_forward(j, db, options);
1441
1442 dns_journal_destroy(&j);
1443
1444 return (result);
1445 }
1446
1447 isc_result_t
1448 dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) {
1449 dns_journal_t *j;
1450 isc_buffer_t source; /* Transaction data from disk */
1451 isc_buffer_t target; /* Ditto after _fromwire check */
1452 uint32_t start_serial; /* Database SOA serial */
1453 uint32_t end_serial; /* Last journal SOA serial */
1454 isc_result_t result;
1455 dns_diff_t diff;
1456 unsigned int n_soa = 0;
1457 unsigned int n_put = 0;
1458
1459 REQUIRE(filename != NULL);
1460
1461 j = NULL;
1462 result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j);
1463 if (result == ISC_R_NOTFOUND) {
1464 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file");
1465 return (DNS_R_NOJOURNAL);
1466 }
1467
1468 if (result != ISC_R_SUCCESS) {
1469 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1470 "journal open failure: %s: %s",
1471 isc_result_totext(result), filename);
1472 return (result);
1473 }
1474
1475 if (j->header.serialset)
1476 fprintf(file, "Source serial = %u\n", j->header.sourceserial);
1477 dns_diff_init(j->mctx, &diff);
1478
1479 /*
1480 * Set up empty initial buffers for unchecked and checked
1481 * wire format transaction data. They will be reallocated
1482 * later.
1483 */
1484 isc_buffer_init(&source, NULL, 0);
1485 isc_buffer_init(&target, NULL, 0);
1486
1487 start_serial = dns_journal_first_serial(j);
1488 end_serial = dns_journal_last_serial(j);
1489
1490 CHECK(dns_journal_iter_init(j, start_serial, end_serial));
1491
1492 for (result = dns_journal_first_rr(j);
1493 result == ISC_R_SUCCESS;
1494 result = dns_journal_next_rr(j))
1495 {
1496 dns_name_t *name;
1497 uint32_t ttl;
1498 dns_rdata_t *rdata;
1499 dns_difftuple_t *tuple = NULL;
1500
1501 name = NULL;
1502 rdata = NULL;
1503 dns_journal_current_rr(j, &name, &ttl, &rdata);
1504
1505 if (rdata->type == dns_rdatatype_soa)
1506 n_soa++;
1507
1508 if (n_soa == 3)
1509 n_soa = 1;
1510 if (n_soa == 0) {
1511 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1512 "%s: journal file corrupt: missing "
1513 "initial SOA", j->filename);
1514 FAIL(ISC_R_UNEXPECTED);
1515 }
1516 CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ?
1517 DNS_DIFFOP_DEL : DNS_DIFFOP_ADD,
1518 name, ttl, rdata, &tuple));
1519 dns_diff_append(&diff, &tuple);
1520
1521 if (++n_put > 100) {
1522 result = dns_diff_print(&diff, file);
1523 dns_diff_clear(&diff);
1524 n_put = 0;
1525 if (result != ISC_R_SUCCESS)
1526 break;
1527 }
1528 }
1529 if (result == ISC_R_NOMORE)
1530 result = ISC_R_SUCCESS;
1531 CHECK(result);
1532
1533 if (n_put != 0) {
1534 result = dns_diff_print(&diff, file);
1535 dns_diff_clear(&diff);
1536 }
1537 goto cleanup;
1538
1539 failure:
1540 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1541 "%s: cannot print: journal file corrupt", j->filename);
1542
1543 cleanup:
1544 if (source.base != NULL)
1545 isc_mem_put(j->mctx, source.base, source.length);
1546 if (target.base != NULL)
1547 isc_mem_put(j->mctx, target.base, target.length);
1548
1549 dns_diff_clear(&diff);
1550 dns_journal_destroy(&j);
1551
1552 return (result);
1553 }
1554
1555 /**************************************************************************/
1556 /*
1557 * Miscellaneous accessors.
1558 */
1559 bool
1560 dns_journal_empty(dns_journal_t *j) {
1561 return (JOURNAL_EMPTY(&j->header));
1562 }
1563
1564 uint32_t
1565 dns_journal_first_serial(dns_journal_t *j) {
1566 return (j->header.begin.serial);
1567 }
1568
1569 uint32_t
1570 dns_journal_last_serial(dns_journal_t *j) {
1571 return (j->header.end.serial);
1572 }
1573
1574 void
1575 dns_journal_set_sourceserial(dns_journal_t *j, uint32_t sourceserial) {
1576
1577 REQUIRE(j->state == JOURNAL_STATE_WRITE ||
1578 j->state == JOURNAL_STATE_INLINE ||
1579 j->state == JOURNAL_STATE_TRANSACTION);
1580
1581 j->header.sourceserial = sourceserial;
1582 j->header.serialset = true;
1583 if (j->state == JOURNAL_STATE_WRITE)
1584 j->state = JOURNAL_STATE_INLINE;
1585 }
1586
1587 bool
1588 dns_journal_get_sourceserial(dns_journal_t *j, uint32_t *sourceserial) {
1589 REQUIRE(sourceserial != NULL);
1590
1591 if (!j->header.serialset)
1592 return (false);
1593 *sourceserial = j->header.sourceserial;
1594 return (true);
1595 }
1596
1597 /**************************************************************************/
1598 /*
1599 * Iteration support.
1600 *
1601 * When serving an outgoing IXFR, we transmit a part the journal starting
1602 * at the serial number in the IXFR request and ending at the serial
1603 * number that is current when the IXFR request arrives. The ending
1604 * serial number is not necessarily at the end of the journal:
1605 * the journal may grow while the IXFR is in progress, but we stop
1606 * when we reach the serial number that was current when the IXFR started.
1607 */
1608
1609 static isc_result_t read_one_rr(dns_journal_t *j);
1610
1611 /*
1612 * Make sure the buffer 'b' is has at least 'size' bytes
1613 * allocated, and clear it.
1614 *
1615 * Requires:
1616 * Either b->base is NULL, or it points to b->length bytes of memory
1617 * previously allocated by isc_mem_get().
1618 */
1619
1620 static isc_result_t
1621 size_buffer(isc_mem_t *mctx, isc_buffer_t *b, unsigned size) {
1622 if (b->length < size) {
1623 void *mem = isc_mem_get(mctx, size);
1624 if (mem == NULL)
1625 return (ISC_R_NOMEMORY);
1626 if (b->base != NULL)
1627 isc_mem_put(mctx, b->base, b->length);
1628 b->base = mem;
1629 b->length = size;
1630 }
1631 isc_buffer_clear(b);
1632 return (ISC_R_SUCCESS);
1633 }
1634
1635 isc_result_t
1636 dns_journal_iter_init(dns_journal_t *j,
1637 uint32_t begin_serial, uint32_t end_serial)
1638 {
1639 isc_result_t result;
1640
1641 CHECK(journal_find(j, begin_serial, &j->it.bpos));
1642 INSIST(j->it.bpos.serial == begin_serial);
1643
1644 CHECK(journal_find(j, end_serial, &j->it.epos));
1645 INSIST(j->it.epos.serial == end_serial);
1646
1647 result = ISC_R_SUCCESS;
1648 failure:
1649 j->it.result = result;
1650 return (j->it.result);
1651 }
1652
1653
1654 isc_result_t
1655 dns_journal_first_rr(dns_journal_t *j) {
1656 isc_result_t result;
1657
1658 /*
1659 * Seek to the beginning of the first transaction we are
1660 * interested in.
1661 */
1662 CHECK(journal_seek(j, j->it.bpos.offset));
1663 j->it.current_serial = j->it.bpos.serial;
1664
1665 j->it.xsize = 0; /* We have no transaction data yet... */
1666 j->it.xpos = 0; /* ...and haven't used any of it. */
1667
1668 return (read_one_rr(j));
1669
1670 failure:
1671 return (result);
1672 }
1673
1674 static isc_result_t
1675 read_one_rr(dns_journal_t *j) {
1676 isc_result_t result;
1677
1678 dns_rdatatype_t rdtype;
1679 dns_rdataclass_t rdclass;
1680 unsigned int rdlen;
1681 uint32_t ttl;
1682 journal_xhdr_t xhdr;
1683 journal_rrhdr_t rrhdr;
1684
1685 if (j->offset > j->it.epos.offset) {
1686 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1687 "%s: journal corrupt: possible integer overflow",
1688 j->filename);
1689 return (ISC_R_UNEXPECTED);
1690 }
1691 if (j->offset == j->it.epos.offset)
1692 return (ISC_R_NOMORE);
1693 if (j->it.xpos == j->it.xsize) {
1694 /*
1695 * We are at a transaction boundary.
1696 * Read another transaction header.
1697 */
1698 CHECK(journal_read_xhdr(j, &xhdr));
1699 if (xhdr.size == 0) {
1700 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1701 "%s: journal corrupt: empty transaction",
1702 j->filename);
1703 FAIL(ISC_R_UNEXPECTED);
1704 }
1705 if (xhdr.serial0 != j->it.current_serial) {
1706 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1707 "%s: journal file corrupt: "
1708 "expected serial %u, got %u",
1709 j->filename,
1710 j->it.current_serial, xhdr.serial0);
1711 FAIL(ISC_R_UNEXPECTED);
1712 }
1713 j->it.xsize = xhdr.size;
1714 j->it.xpos = 0;
1715 }
1716 /*
1717 * Read an RR.
1718 */
1719 CHECK(journal_read_rrhdr(j, &rrhdr));
1720 /*
1721 * Perform a sanity check on the journal RR size.
1722 * The smallest possible RR has a 1-byte owner name
1723 * and a 10-byte header. The largest possible
1724 * RR has 65535 bytes of data, a header, and a maximum-
1725 * size owner name, well below 70 k total.
1726 */
1727 if (rrhdr.size < 1+10 || rrhdr.size > 70000) {
1728 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1729 "%s: journal corrupt: impossible RR size "
1730 "(%d bytes)", j->filename, rrhdr.size);
1731 FAIL(ISC_R_UNEXPECTED);
1732 }
1733
1734 CHECK(size_buffer(j->mctx, &j->it.source, rrhdr.size));
1735 CHECK(journal_read(j, j->it.source.base, rrhdr.size));
1736 isc_buffer_add(&j->it.source, rrhdr.size);
1737
1738 /*
1739 * The target buffer is made the same size
1740 * as the source buffer, with the assumption that when
1741 * no compression in present, the output of dns_*_fromwire()
1742 * is no larger than the input.
1743 */
1744 CHECK(size_buffer(j->mctx, &j->it.target, rrhdr.size));
1745
1746 /*
1747 * Parse the owner name. We don't know where it
1748 * ends yet, so we make the entire "remaining"
1749 * part of the buffer "active".
1750 */
1751 isc_buffer_setactive(&j->it.source,
1752 j->it.source.used - j->it.source.current);
1753 CHECK(dns_name_fromwire(&j->it.name, &j->it.source,
1754 &j->it.dctx, 0, &j->it.target));
1755
1756 /*
1757 * Check that the RR header is there, and parse it.
1758 */
1759 if (isc_buffer_remaininglength(&j->it.source) < 10)
1760 FAIL(DNS_R_FORMERR);
1761
1762 rdtype = isc_buffer_getuint16(&j->it.source);
1763 rdclass = isc_buffer_getuint16(&j->it.source);
1764 ttl = isc_buffer_getuint32(&j->it.source);
1765 rdlen = isc_buffer_getuint16(&j->it.source);
1766
1767 /*
1768 * Parse the rdata.
1769 */
1770 if (isc_buffer_remaininglength(&j->it.source) != rdlen)
1771 FAIL(DNS_R_FORMERR);
1772 isc_buffer_setactive(&j->it.source, rdlen);
1773 dns_rdata_reset(&j->it.rdata);
1774 CHECK(dns_rdata_fromwire(&j->it.rdata, rdclass,
1775 rdtype, &j->it.source, &j->it.dctx,
1776 0, &j->it.target));
1777 j->it.ttl = ttl;
1778
1779 j->it.xpos += sizeof(journal_rawrrhdr_t) + rrhdr.size;
1780 if (rdtype == dns_rdatatype_soa) {
1781 /* XXX could do additional consistency checks here */
1782 j->it.current_serial = dns_soa_getserial(&j->it.rdata);
1783 }
1784
1785 result = ISC_R_SUCCESS;
1786
1787 failure:
1788 j->it.result = result;
1789 return (result);
1790 }
1791
1792 isc_result_t
1793 dns_journal_next_rr(dns_journal_t *j) {
1794 j->it.result = read_one_rr(j);
1795 return (j->it.result);
1796 }
1797
1798 void
1799 dns_journal_current_rr(dns_journal_t *j, dns_name_t **name, uint32_t *ttl,
1800 dns_rdata_t **rdata)
1801 {
1802 REQUIRE(j->it.result == ISC_R_SUCCESS);
1803 *name = &j->it.name;
1804 *ttl = j->it.ttl;
1805 *rdata = &j->it.rdata;
1806 }
1807
1808 /**************************************************************************/
1809 /*
1810 * Generating diffs from databases
1811 */
1812
1813 /*
1814 * Construct a diff containing all the RRs at the current name of the
1815 * database iterator 'dbit' in database 'db', version 'ver'.
1816 * Set '*name' to the current name, and append the diff to 'diff'.
1817 * All new tuples will have the operation 'op'.
1818 *
1819 * Requires: 'name' must have buffer large enough to hold the name.
1820 * Typically, a dns_fixedname_t would be used.
1821 */
1822 static isc_result_t
1823 get_name_diff(dns_db_t *db, dns_dbversion_t *ver, isc_stdtime_t now,
1824 dns_dbiterator_t *dbit, dns_name_t *name, dns_diffop_t op,
1825 dns_diff_t *diff)
1826 {
1827 isc_result_t result;
1828 dns_dbnode_t *node = NULL;
1829 dns_rdatasetiter_t *rdsiter = NULL;
1830 dns_difftuple_t *tuple = NULL;
1831
1832 result = dns_dbiterator_current(dbit, &node, name);
1833 if (result != ISC_R_SUCCESS)
1834 return (result);
1835
1836 result = dns_db_allrdatasets(db, node, ver, now, &rdsiter);
1837 if (result != ISC_R_SUCCESS)
1838 goto cleanup_node;
1839
1840 for (result = dns_rdatasetiter_first(rdsiter);
1841 result == ISC_R_SUCCESS;
1842 result = dns_rdatasetiter_next(rdsiter))
1843 {
1844 dns_rdataset_t rdataset;
1845
1846 dns_rdataset_init(&rdataset);
1847 dns_rdatasetiter_current(rdsiter, &rdataset);
1848
1849 for (result = dns_rdataset_first(&rdataset);
1850 result == ISC_R_SUCCESS;
1851 result = dns_rdataset_next(&rdataset))
1852 {
1853 dns_rdata_t rdata = DNS_RDATA_INIT;
1854 dns_rdataset_current(&rdataset, &rdata);
1855 result = dns_difftuple_create(diff->mctx, op, name,
1856 rdataset.ttl, &rdata,
1857 &tuple);
1858 if (result != ISC_R_SUCCESS) {
1859 dns_rdataset_disassociate(&rdataset);
1860 goto cleanup_iterator;
1861 }
1862 dns_diff_append(diff, &tuple);
1863 }
1864 dns_rdataset_disassociate(&rdataset);
1865 if (result != ISC_R_NOMORE)
1866 goto cleanup_iterator;
1867 }
1868 if (result != ISC_R_NOMORE)
1869 goto cleanup_iterator;
1870
1871 result = ISC_R_SUCCESS;
1872
1873 cleanup_iterator:
1874 dns_rdatasetiter_destroy(&rdsiter);
1875
1876 cleanup_node:
1877 dns_db_detachnode(db, &node);
1878
1879 return (result);
1880 }
1881
1882 /*
1883 * Comparison function for use by dns_diff_subtract when sorting
1884 * the diffs to be subtracted. The sort keys are the rdata type
1885 * and the rdata itself. The owner name is ignored, because
1886 * it is known to be the same for all tuples.
1887 */
1888 static int
1889 rdata_order(const void *av, const void *bv) {
1890 dns_difftuple_t const * const *ap = av;
1891 dns_difftuple_t const * const *bp = bv;
1892 dns_difftuple_t const *a = *ap;
1893 dns_difftuple_t const *b = *bp;
1894 int r;
1895 r = (b->rdata.type - a->rdata.type);
1896 if (r != 0)
1897 return (r);
1898 r = dns_rdata_compare(&a->rdata, &b->rdata);
1899 return (r);
1900 }
1901
1902 static isc_result_t
1903 dns_diff_subtract(dns_diff_t diff[2], dns_diff_t *r) {
1904 isc_result_t result;
1905 dns_difftuple_t *p[2];
1906 int i, t;
1907 bool append;
1908
1909 CHECK(dns_diff_sort(&diff[0], rdata_order));
1910 CHECK(dns_diff_sort(&diff[1], rdata_order));
1911
1912 for (;;) {
1913 p[0] = ISC_LIST_HEAD(diff[0].tuples);
1914 p[1] = ISC_LIST_HEAD(diff[1].tuples);
1915 if (p[0] == NULL && p[1] == NULL)
1916 break;
1917
1918 for (i = 0; i < 2; i++)
1919 if (p[!i] == NULL) {
1920 ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
1921 ISC_LIST_APPEND(r->tuples, p[i], link);
1922 goto next;
1923 }
1924 t = rdata_order(&p[0], &p[1]);
1925 if (t < 0) {
1926 ISC_LIST_UNLINK(diff[0].tuples, p[0], link);
1927 ISC_LIST_APPEND(r->tuples, p[0], link);
1928 goto next;
1929 }
1930 if (t > 0) {
1931 ISC_LIST_UNLINK(diff[1].tuples, p[1], link);
1932 ISC_LIST_APPEND(r->tuples, p[1], link);
1933 goto next;
1934 }
1935 INSIST(t == 0);
1936 /*
1937 * Identical RRs in both databases; skip them both
1938 * if the ttl differs.
1939 */
1940 append = (p[0]->ttl != p[1]->ttl);
1941 for (i = 0; i < 2; i++) {
1942 ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
1943 if (append) {
1944 ISC_LIST_APPEND(r->tuples, p[i], link);
1945 } else {
1946 dns_difftuple_free(&p[i]);
1947 }
1948 }
1949 next: ;
1950 }
1951 result = ISC_R_SUCCESS;
1952 failure:
1953 return (result);
1954 }
1955
1956 static isc_result_t
1957 diff_namespace(dns_db_t *dba, dns_dbversion_t *dbvera,
1958 dns_db_t *dbb, dns_dbversion_t *dbverb,
1959 unsigned int options, dns_diff_t *resultdiff)
1960 {
1961 dns_db_t *db[2];
1962 dns_dbversion_t *ver[2];
1963 dns_dbiterator_t *dbit[2] = { NULL, NULL };
1964 bool have[2] = { false, false };
1965 dns_fixedname_t fixname[2];
1966 isc_result_t result, itresult[2];
1967 dns_diff_t diff[2];
1968 int i, t;
1969
1970 db[0] = dba, db[1] = dbb;
1971 ver[0] = dbvera, ver[1] = dbverb;
1972
1973 dns_diff_init(resultdiff->mctx, &diff[0]);
1974 dns_diff_init(resultdiff->mctx, &diff[1]);
1975
1976 dns_fixedname_init(&fixname[0]);
1977 dns_fixedname_init(&fixname[1]);
1978
1979 result = dns_db_createiterator(db[0], options, &dbit[0]);
1980 if (result != ISC_R_SUCCESS)
1981 return (result);
1982 result = dns_db_createiterator(db[1], options, &dbit[1]);
1983 if (result != ISC_R_SUCCESS)
1984 goto cleanup_iterator;
1985
1986 itresult[0] = dns_dbiterator_first(dbit[0]);
1987 itresult[1] = dns_dbiterator_first(dbit[1]);
1988
1989 for (;;) {
1990 for (i = 0; i < 2; i++) {
1991 if (! have[i] && itresult[i] == ISC_R_SUCCESS) {
1992 CHECK(get_name_diff(db[i], ver[i], 0, dbit[i],
1993 dns_fixedname_name(&fixname[i]),
1994 i == 0 ?
1995 DNS_DIFFOP_ADD :
1996 DNS_DIFFOP_DEL,
1997 &diff[i]));
1998 itresult[i] = dns_dbiterator_next(dbit[i]);
1999 have[i] = true;
2000 }
2001 }
2002
2003 if (! have[0] && ! have[1]) {
2004 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2005 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2006 break;
2007 }
2008
2009 for (i = 0; i < 2; i++) {
2010 if (! have[!i]) {
2011 ISC_LIST_APPENDLIST(resultdiff->tuples,
2012 diff[i].tuples, link);
2013 INSIST(ISC_LIST_EMPTY(diff[i].tuples));
2014 have[i] = false;
2015 goto next;
2016 }
2017 }
2018
2019 t = dns_name_compare(dns_fixedname_name(&fixname[0]),
2020 dns_fixedname_name(&fixname[1]));
2021 if (t < 0) {
2022 ISC_LIST_APPENDLIST(resultdiff->tuples,
2023 diff[0].tuples, link);
2024 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2025 have[0] = false;
2026 continue;
2027 }
2028 if (t > 0) {
2029 ISC_LIST_APPENDLIST(resultdiff->tuples,
2030 diff[1].tuples, link);
2031 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2032 have[1] = false;
2033 continue;
2034 }
2035 INSIST(t == 0);
2036 CHECK(dns_diff_subtract(diff, resultdiff));
2037 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2038 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2039 have[0] = have[1] = false;
2040 next: ;
2041 }
2042 if (itresult[0] != ISC_R_NOMORE)
2043 FAIL(itresult[0]);
2044 if (itresult[1] != ISC_R_NOMORE)
2045 FAIL(itresult[1]);
2046
2047 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
2048 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
2049
2050 failure:
2051 dns_dbiterator_destroy(&dbit[1]);
2052
2053 cleanup_iterator:
2054 dns_dbiterator_destroy(&dbit[0]);
2055 dns_diff_clear(&diff[0]);
2056 dns_diff_clear(&diff[1]);
2057 return (result);
2058 }
2059
2060 /*
2061 * Compare the databases 'dba' and 'dbb' and generate a journal
2062 * entry containing the changes to make 'dba' from 'dbb' (note
2063 * the order). This journal entry will consist of a single,
2064 * possibly very large transaction.
2065 */
2066 isc_result_t
2067 dns_db_diff(isc_mem_t *mctx, dns_db_t *dba, dns_dbversion_t *dbvera,
2068 dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename)
2069 {
2070 isc_result_t result;
2071 dns_diff_t diff;
2072
2073 dns_diff_init(mctx, &diff);
2074
2075 result = dns_db_diffx(&diff, dba, dbvera, dbb, dbverb, filename);
2076
2077 dns_diff_clear(&diff);
2078
2079 return (result);
2080 }
2081
2082 isc_result_t
2083 dns_db_diffx(dns_diff_t *diff, dns_db_t *dba, dns_dbversion_t *dbvera,
2084 dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename)
2085 {
2086 isc_result_t result;
2087 dns_journal_t *journal = NULL;
2088
2089 if (filename != NULL) {
2090 result = dns_journal_open(diff->mctx, filename,
2091 DNS_JOURNAL_CREATE, &journal);
2092 if (result != ISC_R_SUCCESS)
2093 return (result);
2094 }
2095
2096 CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NONSEC3, diff));
2097 CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NSEC3ONLY, diff));
2098
2099 if (journal != NULL) {
2100 if (ISC_LIST_EMPTY(diff->tuples))
2101 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes");
2102 else
2103 CHECK(dns_journal_write_transaction(journal, diff));
2104 }
2105
2106 failure:
2107 if (journal != NULL)
2108 dns_journal_destroy(&journal);
2109 return (result);
2110 }
2111
2112 isc_result_t
2113 dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial,
2114 uint32_t target_size)
2115 {
2116 unsigned int i;
2117 journal_pos_t best_guess;
2118 journal_pos_t current_pos;
2119 dns_journal_t *j1 = NULL;
2120 dns_journal_t *j2 = NULL;
2121 journal_rawheader_t rawheader;
2122 unsigned int copy_length;
2123 size_t namelen;
2124 char *buf = NULL;
2125 unsigned int size = 0;
2126 isc_result_t result;
2127 unsigned int indexend;
2128 char newname[PATH_MAX];
2129 char backup[PATH_MAX];
2130 bool is_backup = false;
2131
2132 REQUIRE(filename != NULL);
2133
2134 namelen = strlen(filename);
2135 if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0) {
2136 namelen -= 4;
2137 }
2138
2139 result = snprintf(newname, sizeof(newname), "%.*s.jnw",
2140 (int)namelen, filename);
2141 RUNTIME_CHECK(result < sizeof(newname));
2142
2143 result = snprintf(backup, sizeof(backup), "%.*s.jbk",
2144 (int)namelen, filename);
2145 RUNTIME_CHECK(result < sizeof(backup));
2146
2147 result = journal_open(mctx, filename, false, false, &j1);
2148 if (result == ISC_R_NOTFOUND) {
2149 is_backup = true;
2150 result = journal_open(mctx, backup, false, false, &j1);
2151 }
2152 if (result != ISC_R_SUCCESS)
2153 return (result);
2154
2155 if (JOURNAL_EMPTY(&j1->header)) {
2156 dns_journal_destroy(&j1);
2157 return (ISC_R_SUCCESS);
2158 }
2159
2160 if (DNS_SERIAL_GT(j1->header.begin.serial, serial) ||
2161 DNS_SERIAL_GT(serial, j1->header.end.serial)) {
2162 dns_journal_destroy(&j1);
2163 return (ISC_R_RANGE);
2164 }
2165
2166 /*
2167 * Cope with very small target sizes.
2168 */
2169 indexend = sizeof(journal_rawheader_t) +
2170 j1->header.index_size * sizeof(journal_rawpos_t);
2171 if (target_size < DNS_JOURNAL_SIZE_MIN)
2172 target_size = DNS_JOURNAL_SIZE_MIN;
2173 if (target_size < indexend * 2)
2174 target_size = target_size/2 + indexend;
2175
2176 /*
2177 * See if there is any work to do.
2178 */
2179 if ((uint32_t) j1->header.end.offset < target_size) {
2180 dns_journal_destroy(&j1);
2181 return (ISC_R_SUCCESS);
2182 }
2183
2184 CHECK(journal_open(mctx, newname, true, true, &j2));
2185
2186 /*
2187 * Remove overhead so space test below can succeed.
2188 */
2189 if (target_size >= indexend)
2190 target_size -= indexend;
2191
2192 /*
2193 * Find if we can create enough free space.
2194 */
2195 best_guess = j1->header.begin;
2196 for (i = 0; i < j1->header.index_size; i++) {
2197 if (POS_VALID(j1->index[i]) &&
2198 DNS_SERIAL_GE(serial, j1->index[i].serial) &&
2199 ((uint32_t)(j1->header.end.offset - j1->index[i].offset)
2200 >= target_size / 2) &&
2201 j1->index[i].offset > best_guess.offset)
2202 best_guess = j1->index[i];
2203 }
2204
2205 current_pos = best_guess;
2206 while (current_pos.serial != serial) {
2207 CHECK(journal_next(j1, ¤t_pos));
2208 if (current_pos.serial == j1->header.end.serial)
2209 break;
2210
2211 if (DNS_SERIAL_GE(serial, current_pos.serial) &&
2212 ((uint32_t)(j1->header.end.offset - current_pos.offset)
2213 >= (target_size / 2)) &&
2214 current_pos.offset > best_guess.offset)
2215 best_guess = current_pos;
2216 else
2217 break;
2218 }
2219
2220 INSIST(best_guess.serial != j1->header.end.serial);
2221 if (best_guess.serial != serial)
2222 CHECK(journal_next(j1, &best_guess));
2223
2224 /*
2225 * We should now be roughly half target_size provided
2226 * we did not reach 'serial'. If not we will just copy
2227 * all uncommitted deltas regardless of the size.
2228 */
2229 copy_length = j1->header.end.offset - best_guess.offset;
2230
2231 if (copy_length != 0) {
2232 /*
2233 * Copy best_guess to end into space just freed.
2234 */
2235 size = 64*1024;
2236 if (copy_length < size)
2237 size = copy_length;
2238 buf = isc_mem_get(mctx, size);
2239 if (buf == NULL) {
2240 result = ISC_R_NOMEMORY;
2241 goto failure;
2242 }
2243
2244 CHECK(journal_seek(j1, best_guess.offset));
2245 CHECK(journal_seek(j2, indexend));
2246 for (i = 0; i < copy_length; i += size) {
2247 unsigned int len = (copy_length - i) > size ? size :
2248 (copy_length - i);
2249 CHECK(journal_read(j1, buf, len));
2250 CHECK(journal_write(j2, buf, len));
2251 }
2252
2253 CHECK(journal_fsync(j2));
2254
2255 /*
2256 * Compute new header.
2257 */
2258 j2->header.begin.serial = best_guess.serial;
2259 j2->header.begin.offset = indexend;
2260 j2->header.end.serial = j1->header.end.serial;
2261 j2->header.end.offset = indexend + copy_length;
2262 j2->header.sourceserial = j1->header.sourceserial;
2263 j2->header.serialset = j1->header.serialset;
2264
2265 /*
2266 * Update the journal header.
2267 */
2268 journal_header_encode(&j2->header, &rawheader);
2269 CHECK(journal_seek(j2, 0));
2270 CHECK(journal_write(j2, &rawheader, sizeof(rawheader)));
2271 CHECK(journal_fsync(j2));
2272
2273 /*
2274 * Build new index.
2275 */
2276 current_pos = j2->header.begin;
2277 while (current_pos.serial != j2->header.end.serial) {
2278 index_add(j2, ¤t_pos);
2279 CHECK(journal_next(j2, ¤t_pos));
2280 }
2281
2282 /*
2283 * Write index.
2284 */
2285 CHECK(index_to_disk(j2));
2286 CHECK(journal_fsync(j2));
2287
2288 indexend = j2->header.end.offset;
2289 POST(indexend);
2290 }
2291
2292 /*
2293 * Close both journals before trying to rename files (this is
2294 * necessary on WIN32).
2295 */
2296 dns_journal_destroy(&j1);
2297 dns_journal_destroy(&j2);
2298
2299 /*
2300 * With a UFS file system this should just succeed and be atomic.
2301 * Any IXFR outs will just continue and the old journal will be
2302 * removed on final close.
2303 *
2304 * With MSDOS / NTFS we need to do a two stage rename, triggered
2305 * by EEXIST. (If any IXFR's are running in other threads, however,
2306 * this will fail, and the journal will not be compacted. But
2307 * if so, hopefully they'll be finished by the next time we
2308 * compact.)
2309 */
2310 if (rename(newname, filename) == -1) {
2311 if (errno == EEXIST && !is_backup) {
2312 result = isc_file_remove(backup);
2313 if (result != ISC_R_SUCCESS &&
2314 result != ISC_R_FILENOTFOUND)
2315 goto failure;
2316 if (rename(filename, backup) == -1)
2317 goto maperrno;
2318 if (rename(newname, filename) == -1)
2319 goto maperrno;
2320 (void)isc_file_remove(backup);
2321 } else {
2322 maperrno:
2323 result = ISC_R_FAILURE;
2324 goto failure;
2325 }
2326 }
2327
2328 result = ISC_R_SUCCESS;
2329
2330 failure:
2331 (void)isc_file_remove(newname);
2332 if (buf != NULL)
2333 isc_mem_put(mctx, buf, size);
2334 if (j1 != NULL)
2335 dns_journal_destroy(&j1);
2336 if (j2 != NULL)
2337 dns_journal_destroy(&j2);
2338 return (result);
2339 }
2340
2341 static isc_result_t
2342 index_to_disk(dns_journal_t *j) {
2343 isc_result_t result = ISC_R_SUCCESS;
2344
2345 if (j->header.index_size != 0) {
2346 unsigned int i;
2347 unsigned char *p;
2348 unsigned int rawbytes;
2349
2350 rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
2351
2352 p = j->rawindex;
2353 for (i = 0; i < j->header.index_size; i++) {
2354 encode_uint32(j->index[i].serial, p);
2355 p += 4;
2356 encode_uint32(j->index[i].offset, p);
2357 p += 4;
2358 }
2359 INSIST(p == j->rawindex + rawbytes);
2360
2361 CHECK(journal_seek(j, sizeof(journal_rawheader_t)));
2362 CHECK(journal_write(j, j->rawindex, rawbytes));
2363 }
2364 failure:
2365 return (result);
2366 }
2367