Home | History | Annotate | Line # | Download | only in dns
journal.c revision 1.6
      1  1.4  christos /*	$NetBSD: journal.c,v 1.6 2021/02/19 16:42:16 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /*
      4  1.1  christos  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  1.1  christos  *
      6  1.1  christos  * This Source Code Form is subject to the terms of the Mozilla Public
      7  1.1  christos  * License, v. 2.0. If a copy of the MPL was not distributed with this
      8  1.6  christos  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
      9  1.1  christos  *
     10  1.1  christos  * See the COPYRIGHT file distributed with this work for additional
     11  1.1  christos  * information regarding copyright ownership.
     12  1.1  christos  */
     13  1.1  christos 
     14  1.5  christos #include <errno.h>
     15  1.3  christos #include <inttypes.h>
     16  1.3  christos #include <stdbool.h>
     17  1.1  christos #include <stdlib.h>
     18  1.1  christos #include <unistd.h>
     19  1.1  christos 
     20  1.1  christos #include <isc/file.h>
     21  1.1  christos #include <isc/mem.h>
     22  1.1  christos #include <isc/print.h>
     23  1.1  christos #include <isc/stdio.h>
     24  1.1  christos #include <isc/string.h>
     25  1.1  christos #include <isc/util.h>
     26  1.1  christos 
     27  1.1  christos #include <dns/compress.h>
     28  1.1  christos #include <dns/db.h>
     29  1.1  christos #include <dns/dbiterator.h>
     30  1.1  christos #include <dns/diff.h>
     31  1.1  christos #include <dns/fixedname.h>
     32  1.1  christos #include <dns/journal.h>
     33  1.1  christos #include <dns/log.h>
     34  1.1  christos #include <dns/rdataset.h>
     35  1.1  christos #include <dns/rdatasetiter.h>
     36  1.1  christos #include <dns/result.h>
     37  1.1  christos #include <dns/soa.h>
     38  1.1  christos 
     39  1.1  christos /*! \file
     40  1.1  christos  * \brief Journaling.
     41  1.1  christos  *
     42  1.1  christos  * A journal file consists of
     43  1.1  christos  *
     44  1.1  christos  *   \li A fixed-size header of type journal_rawheader_t.
     45  1.1  christos  *
     46  1.1  christos  *   \li The index.  This is an unordered array of index entries
     47  1.1  christos  *     of type journal_rawpos_t giving the locations
     48  1.1  christos  *     of some arbitrary subset of the journal's addressable
     49  1.1  christos  *     transactions.  The index entries are used as hints to
     50  1.1  christos  *     speed up the process of locating a transaction with a given
     51  1.1  christos  *     serial number.  Unused index entries have an "offset"
     52  1.1  christos  *     field of zero.  The size of the index can vary between
     53  1.1  christos  *     journal files, but does not change during the lifetime
     54  1.1  christos  *     of a file.  The size can be zero.
     55  1.1  christos  *
     56  1.1  christos  *   \li The journal data.  This  consists of one or more transactions.
     57  1.1  christos  *     Each transaction begins with a transaction header of type
     58  1.1  christos  *     journal_rawxhdr_t.  The transaction header is followed by a
     59  1.1  christos  *     sequence of RRs, similar in structure to an IXFR difference
     60  1.1  christos  *     sequence (RFC1995).  That is, the pre-transaction SOA,
     61  1.1  christos  *     zero or more other deleted RRs, the post-transaction SOA,
     62  1.1  christos  *     and zero or more other added RRs.  Unlike in IXFR, each RR
     63  1.1  christos  *     is prefixed with a 32-bit length.
     64  1.1  christos  *
     65  1.1  christos  *     The journal data part grows as new transactions are
     66  1.1  christos  *     appended to the file.  Only those transactions
     67  1.1  christos  *     whose serial number is current-(2^31-1) to current
     68  1.1  christos  *     are considered "addressable" and may be pointed
     69  1.1  christos  *     to from the header or index.  They may be preceded
     70  1.1  christos  *     by old transactions that are no longer addressable,
     71  1.1  christos  *     and they may be followed by transactions that were
     72  1.1  christos  *     appended to the journal but never committed by updating
     73  1.1  christos  *     the "end" position in the header.  The latter will
     74  1.1  christos  *     be overwritten when new transactions are added.
     75  1.1  christos  */
     76  1.1  christos 
     77  1.1  christos /**************************************************************************/
     78  1.1  christos /*
     79  1.1  christos  * Miscellaneous utilities.
     80  1.1  christos  */
     81  1.1  christos 
     82  1.1  christos #define JOURNAL_COMMON_LOGARGS \
     83  1.1  christos 	dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL
     84  1.1  christos 
     85  1.5  christos #define JOURNAL_DEBUG_LOGARGS(n) JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n)
     86  1.1  christos 
     87  1.1  christos /*%
     88  1.1  christos  * It would be non-sensical (or at least obtuse) to use FAIL() with an
     89  1.1  christos  * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler
     90  1.1  christos  * from complaining about "end-of-loop code not reached".
     91  1.1  christos  */
     92  1.5  christos #define FAIL(code)                           \
     93  1.5  christos 	do {                                 \
     94  1.5  christos 		result = (code);             \
     95  1.5  christos 		if (result != ISC_R_SUCCESS) \
     96  1.5  christos 			goto failure;        \
     97  1.2  christos 	} while (/*CONSTCOND*/0)
     98  1.1  christos 
     99  1.5  christos #define CHECK(op)                            \
    100  1.5  christos 	do {                                 \
    101  1.5  christos 		result = (op);               \
    102  1.5  christos 		if (result != ISC_R_SUCCESS) \
    103  1.5  christos 			goto failure;        \
    104  1.2  christos 	} while (/*CONSTCOND*/0)
    105  1.1  christos 
    106  1.5  christos #define JOURNAL_SERIALSET 0x01U
    107  1.1  christos 
    108  1.5  christos static isc_result_t
    109  1.5  christos index_to_disk(dns_journal_t *);
    110  1.1  christos 
    111  1.3  christos static inline uint32_t
    112  1.1  christos decode_uint32(unsigned char *p) {
    113  1.5  christos 	return ((p[0] << 24) + (p[1] << 16) + (p[2] << 8) + (p[3] << 0));
    114  1.1  christos }
    115  1.1  christos 
    116  1.1  christos static inline void
    117  1.3  christos encode_uint32(uint32_t val, unsigned char *p) {
    118  1.3  christos 	p[0] = (uint8_t)(val >> 24);
    119  1.3  christos 	p[1] = (uint8_t)(val >> 16);
    120  1.5  christos 	p[2] = (uint8_t)(val >> 8);
    121  1.5  christos 	p[3] = (uint8_t)(val >> 0);
    122  1.1  christos }
    123  1.1  christos 
    124  1.1  christos isc_result_t
    125  1.1  christos dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx,
    126  1.5  christos 		      dns_diffop_t op, dns_difftuple_t **tp) {
    127  1.1  christos 	isc_result_t result;
    128  1.1  christos 	dns_dbnode_t *node;
    129  1.1  christos 	dns_rdataset_t rdataset;
    130  1.1  christos 	dns_rdata_t rdata = DNS_RDATA_INIT;
    131  1.1  christos 	dns_fixedname_t fixed;
    132  1.1  christos 	dns_name_t *zonename;
    133  1.1  christos 
    134  1.1  christos 	zonename = dns_fixedname_initname(&fixed);
    135  1.4  christos 	dns_name_copynf(dns_db_origin(db), zonename);
    136  1.1  christos 
    137  1.1  christos 	node = NULL;
    138  1.3  christos 	result = dns_db_findnode(db, zonename, false, &node);
    139  1.5  christos 	if (result != ISC_R_SUCCESS) {
    140  1.1  christos 		goto nonode;
    141  1.5  christos 	}
    142  1.1  christos 
    143  1.1  christos 	dns_rdataset_init(&rdataset);
    144  1.1  christos 	result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0,
    145  1.1  christos 				     (isc_stdtime_t)0, &rdataset, NULL);
    146  1.5  christos 	if (result != ISC_R_SUCCESS) {
    147  1.1  christos 		goto freenode;
    148  1.5  christos 	}
    149  1.1  christos 
    150  1.1  christos 	result = dns_rdataset_first(&rdataset);
    151  1.5  christos 	if (result != ISC_R_SUCCESS) {
    152  1.1  christos 		goto freenode;
    153  1.5  christos 	}
    154  1.1  christos 
    155  1.1  christos 	dns_rdataset_current(&rdataset, &rdata);
    156  1.1  christos 	dns_rdataset_getownercase(&rdataset, zonename);
    157  1.1  christos 
    158  1.5  christos 	result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl, &rdata,
    159  1.5  christos 				      tp);
    160  1.1  christos 
    161  1.1  christos 	dns_rdataset_disassociate(&rdataset);
    162  1.1  christos 	dns_db_detachnode(db, &node);
    163  1.1  christos 	return (result);
    164  1.1  christos 
    165  1.5  christos freenode:
    166  1.1  christos 	dns_db_detachnode(db, &node);
    167  1.5  christos nonode:
    168  1.1  christos 	UNEXPECTED_ERROR(__FILE__, __LINE__, "missing SOA");
    169  1.1  christos 	return (result);
    170  1.1  christos }
    171  1.1  christos 
    172  1.1  christos /* Journaling */
    173  1.1  christos 
    174  1.1  christos /*%
    175  1.1  christos  * On-disk representation of a "pointer" to a journal entry.
    176  1.1  christos  * These are used in the journal header to locate the beginning
    177  1.1  christos  * and end of the journal, and in the journal index to locate
    178  1.1  christos  * other transactions.
    179  1.1  christos  */
    180  1.1  christos typedef struct {
    181  1.5  christos 	unsigned char serial[4]; /*%< SOA serial before update. */
    182  1.1  christos 	/*
    183  1.1  christos 	 * XXXRTH  Should offset be 8 bytes?
    184  1.1  christos 	 * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs.
    185  1.1  christos 	 * XXXAG  ... but we will not be able to seek >2G anyway on many
    186  1.1  christos 	 *            platforms as long as we are using fseek() rather
    187  1.1  christos 	 *            than lseek().
    188  1.1  christos 	 */
    189  1.5  christos 	unsigned char offset[4]; /*%< Offset from beginning of file. */
    190  1.1  christos } journal_rawpos_t;
    191  1.1  christos 
    192  1.1  christos /*%
    193  1.1  christos  * The header is of a fixed size, with some spare room for future
    194  1.1  christos  * extensions.
    195  1.1  christos  */
    196  1.1  christos #define JOURNAL_HEADER_SIZE 64 /* Bytes. */
    197  1.1  christos 
    198  1.1  christos /*%
    199  1.1  christos  * The on-disk representation of the journal header.
    200  1.1  christos  * All numbers are stored in big-endian order.
    201  1.1  christos  */
    202  1.1  christos typedef union {
    203  1.1  christos 	struct {
    204  1.1  christos 		/*% File format version ID. */
    205  1.5  christos 		unsigned char format[16];
    206  1.1  christos 		/*% Position of the first addressable transaction */
    207  1.5  christos 		journal_rawpos_t begin;
    208  1.1  christos 		/*% Position of the next (yet nonexistent) transaction. */
    209  1.5  christos 		journal_rawpos_t end;
    210  1.1  christos 		/*% Number of index entries following the header. */
    211  1.5  christos 		unsigned char index_size[4];
    212  1.1  christos 		/*% Source serial number. */
    213  1.5  christos 		unsigned char sourceserial[4];
    214  1.5  christos 		unsigned char flags;
    215  1.1  christos 	} h;
    216  1.1  christos 	/* Pad the header to a fixed size. */
    217  1.1  christos 	unsigned char pad[JOURNAL_HEADER_SIZE];
    218  1.1  christos } journal_rawheader_t;
    219  1.1  christos 
    220  1.1  christos /*%
    221  1.1  christos  * The on-disk representation of the transaction header.
    222  1.1  christos  * There is one of these at the beginning of each transaction.
    223  1.1  christos  */
    224  1.1  christos typedef struct {
    225  1.5  christos 	unsigned char size[4];	  /*%< In bytes, excluding header. */
    226  1.6  christos 	unsigned char count[4];	  /*%< Number of records in transaction */
    227  1.5  christos 	unsigned char serial0[4]; /*%< SOA serial before update. */
    228  1.5  christos 	unsigned char serial1[4]; /*%< SOA serial after update. */
    229  1.1  christos } journal_rawxhdr_t;
    230  1.1  christos 
    231  1.1  christos /*%
    232  1.1  christos  * The on-disk representation of the RR header.
    233  1.1  christos  * There is one of these at the beginning of each RR.
    234  1.1  christos  */
    235  1.1  christos typedef struct {
    236  1.5  christos 	unsigned char size[4]; /*%< In bytes, excluding header. */
    237  1.1  christos } journal_rawrrhdr_t;
    238  1.1  christos 
    239  1.1  christos /*%
    240  1.1  christos  * The in-core representation of the journal header.
    241  1.1  christos  */
    242  1.1  christos typedef struct {
    243  1.5  christos 	uint32_t serial;
    244  1.5  christos 	isc_offset_t offset;
    245  1.1  christos } journal_pos_t;
    246  1.1  christos 
    247  1.5  christos #define POS_VALID(pos)	    ((pos).offset != 0)
    248  1.5  christos #define POS_INVALIDATE(pos) ((pos).offset = 0, (pos).serial = 0)
    249  1.1  christos 
    250  1.1  christos typedef struct {
    251  1.5  christos 	unsigned char format[16];
    252  1.5  christos 	journal_pos_t begin;
    253  1.5  christos 	journal_pos_t end;
    254  1.5  christos 	uint32_t index_size;
    255  1.5  christos 	uint32_t sourceserial;
    256  1.5  christos 	bool serialset;
    257  1.1  christos } journal_header_t;
    258  1.1  christos 
    259  1.1  christos /*%
    260  1.1  christos  * The in-core representation of the transaction header.
    261  1.1  christos  */
    262  1.1  christos typedef struct {
    263  1.5  christos 	uint32_t size;
    264  1.6  christos 	uint32_t count;
    265  1.5  christos 	uint32_t serial0;
    266  1.5  christos 	uint32_t serial1;
    267  1.1  christos } journal_xhdr_t;
    268  1.1  christos 
    269  1.1  christos /*%
    270  1.1  christos  * The in-core representation of the RR header.
    271  1.1  christos  */
    272  1.1  christos typedef struct {
    273  1.5  christos 	uint32_t size;
    274  1.1  christos } journal_rrhdr_t;
    275  1.1  christos 
    276  1.1  christos /*%
    277  1.1  christos  * Initial contents to store in the header of a newly created
    278  1.1  christos  * journal file.
    279  1.1  christos  *
    280  1.1  christos  * The header starts with the magic string ";BIND LOG V9\n"
    281  1.1  christos  * to identify the file as a BIND 9 journal file.  An ASCII
    282  1.1  christos  * identification string is used rather than a binary magic
    283  1.1  christos  * number to be consistent with BIND 8 (BIND 8 journal files
    284  1.1  christos  * are ASCII text files).
    285  1.1  christos  */
    286  1.1  christos 
    287  1.5  christos static journal_header_t initial_journal_header = {
    288  1.5  christos 	";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0, 0, 0
    289  1.5  christos };
    290  1.1  christos 
    291  1.1  christos #define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset)
    292  1.1  christos 
    293  1.1  christos typedef enum {
    294  1.1  christos 	JOURNAL_STATE_INVALID,
    295  1.1  christos 	JOURNAL_STATE_READ,
    296  1.1  christos 	JOURNAL_STATE_WRITE,
    297  1.1  christos 	JOURNAL_STATE_TRANSACTION,
    298  1.1  christos 	JOURNAL_STATE_INLINE
    299  1.1  christos } journal_state_t;
    300  1.1  christos 
    301  1.1  christos struct dns_journal {
    302  1.5  christos 	unsigned int magic; /*%< JOUR */
    303  1.5  christos 	isc_mem_t *mctx;    /*%< Memory context */
    304  1.5  christos 	journal_state_t state;
    305  1.5  christos 	char *filename;		 /*%< Journal file name */
    306  1.5  christos 	FILE *fp;		 /*%< File handle */
    307  1.5  christos 	isc_offset_t offset;	 /*%< Current file offset */
    308  1.5  christos 	journal_header_t header; /*%< In-core journal header */
    309  1.5  christos 	unsigned char *rawindex; /*%< In-core buffer for journal index
    310  1.6  christos 				  * in on-disk format */
    311  1.5  christos 	journal_pos_t *index;	 /*%< In-core journal index */
    312  1.1  christos 
    313  1.1  christos 	/*% Current transaction state (when writing). */
    314  1.1  christos 	struct {
    315  1.5  christos 		unsigned int n_soa;   /*%< Number of SOAs seen */
    316  1.6  christos 		unsigned int n_rr;    /*%< Number of RRs to write */
    317  1.5  christos 		journal_pos_t pos[2]; /*%< Begin/end position */
    318  1.1  christos 	} x;
    319  1.1  christos 
    320  1.1  christos 	/*% Iteration state (when reading). */
    321  1.1  christos 	struct {
    322  1.1  christos 		/* These define the part of the journal we iterate over. */
    323  1.5  christos 		journal_pos_t bpos; /*%< Position before first, */
    324  1.5  christos 		journal_pos_t epos; /*%< and after last transaction */
    325  1.1  christos 		/* The rest is iterator state. */
    326  1.5  christos 		uint32_t current_serial; /*%< Current SOA serial
    327  1.5  christos 					  * */
    328  1.5  christos 		isc_buffer_t source;	 /*%< Data from disk */
    329  1.6  christos 		isc_buffer_t target;	 /*%< Data from _fromwire check */
    330  1.5  christos 		dns_decompress_t dctx;	 /*%< Dummy decompression ctx */
    331  1.5  christos 		dns_name_t name;	 /*%< Current domain name */
    332  1.5  christos 		dns_rdata_t rdata;	 /*%< Current rdata */
    333  1.5  christos 		uint32_t ttl;		 /*%< Current TTL */
    334  1.5  christos 		unsigned int xsize;	 /*%< Size of transaction data */
    335  1.5  christos 		unsigned int xpos;	 /*%< Current position in it */
    336  1.5  christos 		isc_result_t result;	 /*%< Result of last call */
    337  1.1  christos 	} it;
    338  1.1  christos };
    339  1.1  christos 
    340  1.5  christos #define DNS_JOURNAL_MAGIC    ISC_MAGIC('J', 'O', 'U', 'R')
    341  1.5  christos #define DNS_JOURNAL_VALID(t) ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC)
    342  1.1  christos 
    343  1.1  christos static void
    344  1.1  christos journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) {
    345  1.1  christos 	cooked->serial = decode_uint32(raw->serial);
    346  1.1  christos 	cooked->offset = decode_uint32(raw->offset);
    347  1.1  christos }
    348  1.1  christos 
    349  1.1  christos static void
    350  1.1  christos journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) {
    351  1.1  christos 	encode_uint32(cooked->serial, raw->serial);
    352  1.1  christos 	encode_uint32(cooked->offset, raw->offset);
    353  1.1  christos }
    354  1.1  christos 
    355  1.1  christos static void
    356  1.1  christos journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) {
    357  1.1  christos 	INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
    358  1.1  christos 	memmove(cooked->format, raw->h.format, sizeof(cooked->format));
    359  1.1  christos 	journal_pos_decode(&raw->h.begin, &cooked->begin);
    360  1.1  christos 	journal_pos_decode(&raw->h.end, &cooked->end);
    361  1.1  christos 	cooked->index_size = decode_uint32(raw->h.index_size);
    362  1.1  christos 	cooked->sourceserial = decode_uint32(raw->h.sourceserial);
    363  1.3  christos 	cooked->serialset = ((raw->h.flags & JOURNAL_SERIALSET) != 0);
    364  1.1  christos }
    365  1.1  christos 
    366  1.1  christos static void
    367  1.1  christos journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) {
    368  1.1  christos 	unsigned char flags = 0;
    369  1.1  christos 
    370  1.1  christos 	INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
    371  1.1  christos 	memset(raw->pad, 0, sizeof(raw->pad));
    372  1.1  christos 	memmove(raw->h.format, cooked->format, sizeof(raw->h.format));
    373  1.1  christos 	journal_pos_encode(&raw->h.begin, &cooked->begin);
    374  1.1  christos 	journal_pos_encode(&raw->h.end, &cooked->end);
    375  1.1  christos 	encode_uint32(cooked->index_size, raw->h.index_size);
    376  1.1  christos 	encode_uint32(cooked->sourceserial, raw->h.sourceserial);
    377  1.3  christos 	if (cooked->serialset) {
    378  1.1  christos 		flags |= JOURNAL_SERIALSET;
    379  1.3  christos 	}
    380  1.1  christos 	raw->h.flags = flags;
    381  1.1  christos }
    382  1.1  christos 
    383  1.1  christos /*
    384  1.1  christos  * Journal file I/O subroutines, with error checking and reporting.
    385  1.1  christos  */
    386  1.1  christos static isc_result_t
    387  1.3  christos journal_seek(dns_journal_t *j, uint32_t offset) {
    388  1.1  christos 	isc_result_t result;
    389  1.1  christos 
    390  1.1  christos 	result = isc_stdio_seek(j->fp, (off_t)offset, SEEK_SET);
    391  1.1  christos 	if (result != ISC_R_SUCCESS) {
    392  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    393  1.1  christos 			      "%s: seek: %s", j->filename,
    394  1.1  christos 			      isc_result_totext(result));
    395  1.1  christos 		return (ISC_R_UNEXPECTED);
    396  1.1  christos 	}
    397  1.1  christos 	j->offset = offset;
    398  1.1  christos 	return (ISC_R_SUCCESS);
    399  1.1  christos }
    400  1.1  christos 
    401  1.1  christos static isc_result_t
    402  1.1  christos journal_read(dns_journal_t *j, void *mem, size_t nbytes) {
    403  1.1  christos 	isc_result_t result;
    404  1.1  christos 
    405  1.1  christos 	result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL);
    406  1.1  christos 	if (result != ISC_R_SUCCESS) {
    407  1.5  christos 		if (result == ISC_R_EOF) {
    408  1.1  christos 			return (ISC_R_NOMORE);
    409  1.5  christos 		}
    410  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    411  1.5  christos 			      "%s: read: %s", j->filename,
    412  1.5  christos 			      isc_result_totext(result));
    413  1.1  christos 		return (ISC_R_UNEXPECTED);
    414  1.1  christos 	}
    415  1.1  christos 	j->offset += (isc_offset_t)nbytes;
    416  1.1  christos 	return (ISC_R_SUCCESS);
    417  1.1  christos }
    418  1.1  christos 
    419  1.1  christos static isc_result_t
    420  1.1  christos journal_write(dns_journal_t *j, void *mem, size_t nbytes) {
    421  1.1  christos 	isc_result_t result;
    422  1.1  christos 
    423  1.1  christos 	result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL);
    424  1.1  christos 	if (result != ISC_R_SUCCESS) {
    425  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    426  1.5  christos 			      "%s: write: %s", j->filename,
    427  1.5  christos 			      isc_result_totext(result));
    428  1.1  christos 		return (ISC_R_UNEXPECTED);
    429  1.1  christos 	}
    430  1.1  christos 	j->offset += (isc_offset_t)nbytes;
    431  1.1  christos 	return (ISC_R_SUCCESS);
    432  1.1  christos }
    433  1.1  christos 
    434  1.1  christos static isc_result_t
    435  1.1  christos journal_fsync(dns_journal_t *j) {
    436  1.1  christos 	isc_result_t result;
    437  1.1  christos 	result = isc_stdio_flush(j->fp);
    438  1.1  christos 	if (result != ISC_R_SUCCESS) {
    439  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    440  1.5  christos 			      "%s: flush: %s", j->filename,
    441  1.5  christos 			      isc_result_totext(result));
    442  1.1  christos 		return (ISC_R_UNEXPECTED);
    443  1.1  christos 	}
    444  1.1  christos 	result = isc_stdio_sync(j->fp);
    445  1.1  christos 	if (result != ISC_R_SUCCESS) {
    446  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    447  1.5  christos 			      "%s: fsync: %s", j->filename,
    448  1.5  christos 			      isc_result_totext(result));
    449  1.1  christos 		return (ISC_R_UNEXPECTED);
    450  1.1  christos 	}
    451  1.1  christos 	return (ISC_R_SUCCESS);
    452  1.1  christos }
    453  1.1  christos 
    454  1.1  christos /*
    455  1.1  christos  * Read/write a transaction header at the current file position.
    456  1.1  christos  */
    457  1.1  christos 
    458  1.1  christos static isc_result_t
    459  1.1  christos journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) {
    460  1.1  christos 	journal_rawxhdr_t raw;
    461  1.1  christos 	isc_result_t result;
    462  1.1  christos 	result = journal_read(j, &raw, sizeof(raw));
    463  1.5  christos 	if (result != ISC_R_SUCCESS) {
    464  1.1  christos 		return (result);
    465  1.5  christos 	}
    466  1.1  christos 	xhdr->size = decode_uint32(raw.size);
    467  1.6  christos 	xhdr->count = decode_uint32(raw.count);
    468  1.1  christos 	xhdr->serial0 = decode_uint32(raw.serial0);
    469  1.1  christos 	xhdr->serial1 = decode_uint32(raw.serial1);
    470  1.1  christos 	return (ISC_R_SUCCESS);
    471  1.1  christos }
    472  1.1  christos 
    473  1.1  christos static isc_result_t
    474  1.6  christos journal_write_xhdr(dns_journal_t *j, uint32_t size, uint32_t count,
    475  1.6  christos 		   uint32_t serial0, uint32_t serial1) {
    476  1.1  christos 	journal_rawxhdr_t raw;
    477  1.1  christos 	encode_uint32(size, raw.size);
    478  1.6  christos 	encode_uint32(count, raw.count);
    479  1.1  christos 	encode_uint32(serial0, raw.serial0);
    480  1.1  christos 	encode_uint32(serial1, raw.serial1);
    481  1.1  christos 	return (journal_write(j, &raw, sizeof(raw)));
    482  1.1  christos }
    483  1.1  christos 
    484  1.1  christos /*
    485  1.1  christos  * Read an RR header at the current file position.
    486  1.1  christos  */
    487  1.1  christos 
    488  1.1  christos static isc_result_t
    489  1.1  christos journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) {
    490  1.1  christos 	journal_rawrrhdr_t raw;
    491  1.1  christos 	isc_result_t result;
    492  1.1  christos 	result = journal_read(j, &raw, sizeof(raw));
    493  1.5  christos 	if (result != ISC_R_SUCCESS) {
    494  1.1  christos 		return (result);
    495  1.5  christos 	}
    496  1.1  christos 	rrhdr->size = decode_uint32(raw.size);
    497  1.1  christos 	return (ISC_R_SUCCESS);
    498  1.1  christos }
    499  1.1  christos 
    500  1.1  christos static isc_result_t
    501  1.1  christos journal_file_create(isc_mem_t *mctx, const char *filename) {
    502  1.1  christos 	FILE *fp = NULL;
    503  1.1  christos 	isc_result_t result;
    504  1.1  christos 	journal_header_t header;
    505  1.1  christos 	journal_rawheader_t rawheader;
    506  1.1  christos 	int index_size = 56; /* XXX configurable */
    507  1.1  christos 	int size;
    508  1.1  christos 	void *mem; /* Memory for temporary index image. */
    509  1.1  christos 
    510  1.1  christos 	INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE);
    511  1.1  christos 
    512  1.1  christos 	result = isc_stdio_open(filename, "wb", &fp);
    513  1.1  christos 	if (result != ISC_R_SUCCESS) {
    514  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    515  1.5  christos 			      "%s: create: %s", filename,
    516  1.5  christos 			      isc_result_totext(result));
    517  1.1  christos 		return (ISC_R_UNEXPECTED);
    518  1.1  christos 	}
    519  1.1  christos 
    520  1.1  christos 	header = initial_journal_header;
    521  1.1  christos 	header.index_size = index_size;
    522  1.1  christos 	journal_header_encode(&header, &rawheader);
    523  1.1  christos 
    524  1.1  christos 	size = sizeof(journal_rawheader_t) +
    525  1.5  christos 	       index_size * sizeof(journal_rawpos_t);
    526  1.1  christos 
    527  1.1  christos 	mem = isc_mem_get(mctx, size);
    528  1.1  christos 	memset(mem, 0, size);
    529  1.1  christos 	memmove(mem, &rawheader, sizeof(rawheader));
    530  1.1  christos 
    531  1.5  christos 	result = isc_stdio_write(mem, 1, (size_t)size, fp, NULL);
    532  1.1  christos 	if (result != ISC_R_SUCCESS) {
    533  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    534  1.5  christos 			      "%s: write: %s", filename,
    535  1.5  christos 			      isc_result_totext(result));
    536  1.1  christos 		(void)isc_stdio_close(fp);
    537  1.1  christos 		(void)isc_file_remove(filename);
    538  1.1  christos 		isc_mem_put(mctx, mem, size);
    539  1.1  christos 		return (ISC_R_UNEXPECTED);
    540  1.1  christos 	}
    541  1.1  christos 	isc_mem_put(mctx, mem, size);
    542  1.1  christos 
    543  1.1  christos 	result = isc_stdio_close(fp);
    544  1.1  christos 	if (result != ISC_R_SUCCESS) {
    545  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    546  1.5  christos 			      "%s: close: %s", filename,
    547  1.5  christos 			      isc_result_totext(result));
    548  1.1  christos 		(void)isc_file_remove(filename);
    549  1.1  christos 		return (ISC_R_UNEXPECTED);
    550  1.1  christos 	}
    551  1.1  christos 
    552  1.1  christos 	return (ISC_R_SUCCESS);
    553  1.1  christos }
    554  1.1  christos 
    555  1.1  christos static isc_result_t
    556  1.5  christos journal_open(isc_mem_t *mctx, const char *filename, bool writable, bool create,
    557  1.5  christos 	     dns_journal_t **journalp) {
    558  1.1  christos 	FILE *fp = NULL;
    559  1.1  christos 	isc_result_t result;
    560  1.1  christos 	journal_rawheader_t rawheader;
    561  1.1  christos 	dns_journal_t *j;
    562  1.1  christos 
    563  1.1  christos 	INSIST(journalp != NULL && *journalp == NULL);
    564  1.1  christos 	j = isc_mem_get(mctx, sizeof(*j));
    565  1.1  christos 
    566  1.1  christos 	j->mctx = NULL;
    567  1.1  christos 	isc_mem_attach(mctx, &j->mctx);
    568  1.1  christos 	j->state = JOURNAL_STATE_INVALID;
    569  1.1  christos 	j->fp = NULL;
    570  1.1  christos 	j->filename = isc_mem_strdup(mctx, filename);
    571  1.1  christos 	j->index = NULL;
    572  1.1  christos 	j->rawindex = NULL;
    573  1.1  christos 
    574  1.5  christos 	if (j->filename == NULL) {
    575  1.1  christos 		FAIL(ISC_R_NOMEMORY);
    576  1.5  christos 	}
    577  1.1  christos 
    578  1.1  christos 	result = isc_stdio_open(j->filename, writable ? "rb+" : "rb", &fp);
    579  1.1  christos 
    580  1.1  christos 	if (result == ISC_R_FILENOTFOUND) {
    581  1.1  christos 		if (create) {
    582  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(1),
    583  1.1  christos 				      "journal file %s does not exist, "
    584  1.5  christos 				      "creating it",
    585  1.5  christos 				      j->filename);
    586  1.1  christos 			CHECK(journal_file_create(mctx, filename));
    587  1.1  christos 			/*
    588  1.1  christos 			 * Retry.
    589  1.1  christos 			 */
    590  1.1  christos 			result = isc_stdio_open(j->filename, "rb+", &fp);
    591  1.1  christos 		} else {
    592  1.1  christos 			FAIL(ISC_R_NOTFOUND);
    593  1.1  christos 		}
    594  1.1  christos 	}
    595  1.1  christos 	if (result != ISC_R_SUCCESS) {
    596  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    597  1.5  christos 			      "%s: open: %s", j->filename,
    598  1.5  christos 			      isc_result_totext(result));
    599  1.1  christos 		FAIL(ISC_R_UNEXPECTED);
    600  1.1  christos 	}
    601  1.1  christos 
    602  1.1  christos 	j->fp = fp;
    603  1.1  christos 
    604  1.1  christos 	/*
    605  1.1  christos 	 * Set magic early so that seek/read can succeed.
    606  1.1  christos 	 */
    607  1.1  christos 	j->magic = DNS_JOURNAL_MAGIC;
    608  1.1  christos 
    609  1.1  christos 	CHECK(journal_seek(j, 0));
    610  1.1  christos 	CHECK(journal_read(j, &rawheader, sizeof(rawheader)));
    611  1.1  christos 
    612  1.1  christos 	if (memcmp(rawheader.h.format, initial_journal_header.format,
    613  1.5  christos 		   sizeof(initial_journal_header.format)) != 0)
    614  1.5  christos 	{
    615  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    616  1.5  christos 			      "%s: journal format not recognized", j->filename);
    617  1.1  christos 		FAIL(ISC_R_UNEXPECTED);
    618  1.1  christos 	}
    619  1.1  christos 	journal_header_decode(&rawheader, &j->header);
    620  1.1  christos 
    621  1.1  christos 	/*
    622  1.1  christos 	 * If there is an index, read the raw index into a dynamically
    623  1.1  christos 	 * allocated buffer and then convert it into a cooked index.
    624  1.1  christos 	 */
    625  1.1  christos 	if (j->header.index_size != 0) {
    626  1.1  christos 		unsigned int i;
    627  1.1  christos 		unsigned int rawbytes;
    628  1.1  christos 		unsigned char *p;
    629  1.1  christos 
    630  1.1  christos 		rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
    631  1.1  christos 		j->rawindex = isc_mem_get(mctx, rawbytes);
    632  1.1  christos 
    633  1.1  christos 		CHECK(journal_read(j, j->rawindex, rawbytes));
    634  1.1  christos 
    635  1.1  christos 		j->index = isc_mem_get(mctx, j->header.index_size *
    636  1.5  christos 						     sizeof(journal_pos_t));
    637  1.1  christos 
    638  1.1  christos 		p = j->rawindex;
    639  1.1  christos 		for (i = 0; i < j->header.index_size; i++) {
    640  1.1  christos 			j->index[i].serial = decode_uint32(p);
    641  1.1  christos 			p += 4;
    642  1.1  christos 			j->index[i].offset = decode_uint32(p);
    643  1.1  christos 			p += 4;
    644  1.1  christos 		}
    645  1.1  christos 		INSIST(p == j->rawindex + rawbytes);
    646  1.1  christos 	}
    647  1.1  christos 	j->offset = -1; /* Invalid, must seek explicitly. */
    648  1.1  christos 
    649  1.1  christos 	/*
    650  1.1  christos 	 * Initialize the iterator.
    651  1.1  christos 	 */
    652  1.1  christos 	dns_name_init(&j->it.name, NULL);
    653  1.1  christos 	dns_rdata_init(&j->it.rdata);
    654  1.1  christos 
    655  1.1  christos 	/*
    656  1.1  christos 	 * Set up empty initial buffers for unchecked and checked
    657  1.1  christos 	 * wire format RR data.  They will be reallocated
    658  1.1  christos 	 * later.
    659  1.1  christos 	 */
    660  1.1  christos 	isc_buffer_init(&j->it.source, NULL, 0);
    661  1.1  christos 	isc_buffer_init(&j->it.target, NULL, 0);
    662  1.1  christos 	dns_decompress_init(&j->it.dctx, -1, DNS_DECOMPRESS_NONE);
    663  1.1  christos 
    664  1.5  christos 	j->state = writable ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ;
    665  1.1  christos 
    666  1.1  christos 	*journalp = j;
    667  1.1  christos 	return (ISC_R_SUCCESS);
    668  1.1  christos 
    669  1.5  christos failure:
    670  1.1  christos 	j->magic = 0;
    671  1.5  christos 	if (j->rawindex != NULL) {
    672  1.5  christos 		isc_mem_put(j->mctx, j->rawindex,
    673  1.5  christos 			    j->header.index_size * sizeof(journal_rawpos_t));
    674  1.5  christos 	}
    675  1.5  christos 	if (j->index != NULL) {
    676  1.5  christos 		isc_mem_put(j->mctx, j->index,
    677  1.5  christos 			    j->header.index_size * sizeof(journal_pos_t));
    678  1.5  christos 	}
    679  1.5  christos 	if (j->filename != NULL) {
    680  1.1  christos 		isc_mem_free(j->mctx, j->filename);
    681  1.5  christos 	}
    682  1.5  christos 	if (j->fp != NULL) {
    683  1.1  christos 		(void)isc_stdio_close(j->fp);
    684  1.5  christos 	}
    685  1.1  christos 	isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
    686  1.1  christos 	return (result);
    687  1.1  christos }
    688  1.1  christos 
    689  1.1  christos isc_result_t
    690  1.1  christos dns_journal_open(isc_mem_t *mctx, const char *filename, unsigned int mode,
    691  1.5  christos 		 dns_journal_t **journalp) {
    692  1.1  christos 	isc_result_t result;
    693  1.1  christos 	size_t namelen;
    694  1.1  christos 	char backup[1024];
    695  1.3  christos 	bool writable, create;
    696  1.1  christos 
    697  1.3  christos 	create = ((mode & DNS_JOURNAL_CREATE) != 0);
    698  1.5  christos 	writable = ((mode & (DNS_JOURNAL_WRITE | DNS_JOURNAL_CREATE)) != 0);
    699  1.1  christos 
    700  1.1  christos 	result = journal_open(mctx, filename, writable, create, journalp);
    701  1.1  christos 	if (result == ISC_R_NOTFOUND) {
    702  1.1  christos 		namelen = strlen(filename);
    703  1.1  christos 		if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0)
    704  1.5  christos 		{
    705  1.1  christos 			namelen -= 4;
    706  1.5  christos 		}
    707  1.1  christos 
    708  1.3  christos 		result = snprintf(backup, sizeof(backup), "%.*s.jbk",
    709  1.3  christos 				  (int)namelen, filename);
    710  1.3  christos 		if (result >= sizeof(backup)) {
    711  1.5  christos 			return (ISC_R_NOSPACE);
    712  1.3  christos 		}
    713  1.1  christos 		result = journal_open(mctx, backup, writable, writable,
    714  1.1  christos 				      journalp);
    715  1.1  christos 	}
    716  1.1  christos 	return (result);
    717  1.1  christos }
    718  1.1  christos 
    719  1.1  christos /*
    720  1.1  christos  * A comparison function defining the sorting order for
    721  1.1  christos  * entries in the IXFR-style journal file.
    722  1.1  christos  *
    723  1.1  christos  * The IXFR format requires that deletions are sorted before
    724  1.1  christos  * additions, and within either one, SOA records are sorted
    725  1.1  christos  * before others.
    726  1.1  christos  *
    727  1.1  christos  * Also sort the non-SOA records by type as a courtesy to the
    728  1.1  christos  * server receiving the IXFR - it may help reduce the amount of
    729  1.1  christos  * rdataset merging it has to do.
    730  1.1  christos  */
    731  1.1  christos static int
    732  1.1  christos ixfr_order(const void *av, const void *bv) {
    733  1.5  christos 	dns_difftuple_t const *const *ap = av;
    734  1.5  christos 	dns_difftuple_t const *const *bp = bv;
    735  1.1  christos 	dns_difftuple_t const *a = *ap;
    736  1.1  christos 	dns_difftuple_t const *b = *bp;
    737  1.1  christos 	int r;
    738  1.1  christos 	int bop = 0, aop = 0;
    739  1.1  christos 
    740  1.1  christos 	switch (a->op) {
    741  1.1  christos 	case DNS_DIFFOP_DEL:
    742  1.1  christos 	case DNS_DIFFOP_DELRESIGN:
    743  1.1  christos 		aop = 1;
    744  1.1  christos 		break;
    745  1.1  christos 	case DNS_DIFFOP_ADD:
    746  1.1  christos 	case DNS_DIFFOP_ADDRESIGN:
    747  1.1  christos 		aop = 0;
    748  1.1  christos 		break;
    749  1.1  christos 	default:
    750  1.1  christos 		INSIST(0);
    751  1.3  christos 		ISC_UNREACHABLE();
    752  1.1  christos 	}
    753  1.1  christos 
    754  1.1  christos 	switch (b->op) {
    755  1.1  christos 	case DNS_DIFFOP_DEL:
    756  1.1  christos 	case DNS_DIFFOP_DELRESIGN:
    757  1.1  christos 		bop = 1;
    758  1.1  christos 		break;
    759  1.1  christos 	case DNS_DIFFOP_ADD:
    760  1.1  christos 	case DNS_DIFFOP_ADDRESIGN:
    761  1.1  christos 		bop = 0;
    762  1.1  christos 		break;
    763  1.1  christos 	default:
    764  1.1  christos 		INSIST(0);
    765  1.3  christos 		ISC_UNREACHABLE();
    766  1.1  christos 	}
    767  1.1  christos 
    768  1.1  christos 	r = bop - aop;
    769  1.5  christos 	if (r != 0) {
    770  1.1  christos 		return (r);
    771  1.5  christos 	}
    772  1.1  christos 
    773  1.1  christos 	r = (b->rdata.type == dns_rdatatype_soa) -
    774  1.5  christos 	    (a->rdata.type == dns_rdatatype_soa);
    775  1.5  christos 	if (r != 0) {
    776  1.1  christos 		return (r);
    777  1.5  christos 	}
    778  1.1  christos 
    779  1.1  christos 	r = (a->rdata.type - b->rdata.type);
    780  1.1  christos 	return (r);
    781  1.1  christos }
    782  1.1  christos 
    783  1.1  christos /*
    784  1.1  christos  * Advance '*pos' to the next journal transaction.
    785  1.1  christos  *
    786  1.1  christos  * Requires:
    787  1.1  christos  *	*pos refers to a valid journal transaction.
    788  1.1  christos  *
    789  1.1  christos  * Ensures:
    790  1.1  christos  *	When ISC_R_SUCCESS is returned,
    791  1.1  christos  *	*pos refers to the next journal transaction.
    792  1.1  christos  *
    793  1.1  christos  * Returns one of:
    794  1.1  christos  *
    795  1.1  christos  *    ISC_R_SUCCESS
    796  1.1  christos  *    ISC_R_NOMORE 	*pos pointed at the last transaction
    797  1.1  christos  *    Other results due to file errors are possible.
    798  1.1  christos  */
    799  1.1  christos static isc_result_t
    800  1.1  christos journal_next(dns_journal_t *j, journal_pos_t *pos) {
    801  1.1  christos 	isc_result_t result;
    802  1.1  christos 	journal_xhdr_t xhdr;
    803  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(j));
    804  1.1  christos 
    805  1.1  christos 	result = journal_seek(j, pos->offset);
    806  1.5  christos 	if (result != ISC_R_SUCCESS) {
    807  1.1  christos 		return (result);
    808  1.5  christos 	}
    809  1.1  christos 
    810  1.5  christos 	if (pos->serial == j->header.end.serial) {
    811  1.1  christos 		return (ISC_R_NOMORE);
    812  1.5  christos 	}
    813  1.1  christos 	/*
    814  1.1  christos 	 * Read the header of the current transaction.
    815  1.1  christos 	 * This will return ISC_R_NOMORE if we are at EOF.
    816  1.1  christos 	 */
    817  1.1  christos 	result = journal_read_xhdr(j, &xhdr);
    818  1.5  christos 	if (result != ISC_R_SUCCESS) {
    819  1.1  christos 		return (result);
    820  1.5  christos 	}
    821  1.1  christos 
    822  1.1  christos 	/*
    823  1.1  christos 	 * Check serial number consistency.
    824  1.1  christos 	 */
    825  1.1  christos 	if (xhdr.serial0 != pos->serial) {
    826  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    827  1.1  christos 			      "%s: journal file corrupt: "
    828  1.1  christos 			      "expected serial %u, got %u",
    829  1.1  christos 			      j->filename, pos->serial, xhdr.serial0);
    830  1.1  christos 		return (ISC_R_UNEXPECTED);
    831  1.1  christos 	}
    832  1.1  christos 
    833  1.1  christos 	/*
    834  1.1  christos 	 * Check for offset wraparound.
    835  1.1  christos 	 */
    836  1.5  christos 	if ((isc_offset_t)(pos->offset + sizeof(journal_rawxhdr_t) +
    837  1.5  christos 			   xhdr.size) < pos->offset)
    838  1.5  christos 	{
    839  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    840  1.1  christos 			      "%s: offset too large", j->filename);
    841  1.1  christos 		return (ISC_R_UNEXPECTED);
    842  1.1  christos 	}
    843  1.1  christos 
    844  1.1  christos 	pos->offset += sizeof(journal_rawxhdr_t) + xhdr.size;
    845  1.1  christos 	pos->serial = xhdr.serial1;
    846  1.1  christos 	return (ISC_R_SUCCESS);
    847  1.1  christos }
    848  1.1  christos 
    849  1.1  christos /*
    850  1.1  christos  * If the index of the journal 'j' contains an entry "better"
    851  1.1  christos  * than '*best_guess', replace '*best_guess' with it.
    852  1.1  christos  *
    853  1.1  christos  * "Better" means having a serial number closer to 'serial'
    854  1.1  christos  * but not greater than 'serial'.
    855  1.1  christos  */
    856  1.1  christos static void
    857  1.3  christos index_find(dns_journal_t *j, uint32_t serial, journal_pos_t *best_guess) {
    858  1.1  christos 	unsigned int i;
    859  1.5  christos 	if (j->index == NULL) {
    860  1.1  christos 		return;
    861  1.5  christos 	}
    862  1.1  christos 	for (i = 0; i < j->header.index_size; i++) {
    863  1.1  christos 		if (POS_VALID(j->index[i]) &&
    864  1.1  christos 		    DNS_SERIAL_GE(serial, j->index[i].serial) &&
    865  1.1  christos 		    DNS_SERIAL_GT(j->index[i].serial, best_guess->serial))
    866  1.5  christos 		{
    867  1.1  christos 			*best_guess = j->index[i];
    868  1.5  christos 		}
    869  1.1  christos 	}
    870  1.1  christos }
    871  1.1  christos 
    872  1.1  christos /*
    873  1.1  christos  * Add a new index entry.  If there is no room, make room by removing
    874  1.1  christos  * the odd-numbered entries and compacting the others into the first
    875  1.1  christos  * half of the index.  This decimates old index entries exponentially
    876  1.1  christos  * over time, so that the index always contains a much larger fraction
    877  1.1  christos  * of recent serial numbers than of old ones.  This is deliberate -
    878  1.1  christos  * most index searches are for outgoing IXFR, and IXFR tends to request
    879  1.1  christos  * recent versions more often than old ones.
    880  1.1  christos  */
    881  1.1  christos static void
    882  1.1  christos index_add(dns_journal_t *j, journal_pos_t *pos) {
    883  1.1  christos 	unsigned int i;
    884  1.5  christos 	if (j->index == NULL) {
    885  1.1  christos 		return;
    886  1.5  christos 	}
    887  1.1  christos 	/*
    888  1.1  christos 	 * Search for a vacant position.
    889  1.1  christos 	 */
    890  1.1  christos 	for (i = 0; i < j->header.index_size; i++) {
    891  1.5  christos 		if (!POS_VALID(j->index[i])) {
    892  1.1  christos 			break;
    893  1.5  christos 		}
    894  1.1  christos 	}
    895  1.1  christos 	if (i == j->header.index_size) {
    896  1.1  christos 		unsigned int k = 0;
    897  1.1  christos 		/*
    898  1.1  christos 		 * Found no vacant position.  Make some room.
    899  1.1  christos 		 */
    900  1.1  christos 		for (i = 0; i < j->header.index_size; i += 2) {
    901  1.1  christos 			j->index[k++] = j->index[i];
    902  1.1  christos 		}
    903  1.1  christos 		i = k; /* 'i' identifies the first vacant position. */
    904  1.1  christos 		while (k < j->header.index_size) {
    905  1.1  christos 			POS_INVALIDATE(j->index[k]);
    906  1.1  christos 			k++;
    907  1.1  christos 		}
    908  1.1  christos 	}
    909  1.1  christos 	INSIST(i < j->header.index_size);
    910  1.5  christos 	INSIST(!POS_VALID(j->index[i]));
    911  1.1  christos 
    912  1.1  christos 	/*
    913  1.1  christos 	 * Store the new index entry.
    914  1.1  christos 	 */
    915  1.1  christos 	j->index[i] = *pos;
    916  1.1  christos }
    917  1.1  christos 
    918  1.1  christos /*
    919  1.1  christos  * Invalidate any existing index entries that could become
    920  1.1  christos  * ambiguous when a new transaction with number 'serial' is added.
    921  1.1  christos  */
    922  1.1  christos static void
    923  1.3  christos index_invalidate(dns_journal_t *j, uint32_t serial) {
    924  1.1  christos 	unsigned int i;
    925  1.5  christos 	if (j->index == NULL) {
    926  1.1  christos 		return;
    927  1.5  christos 	}
    928  1.1  christos 	for (i = 0; i < j->header.index_size; i++) {
    929  1.5  christos 		if (!DNS_SERIAL_GT(serial, j->index[i].serial)) {
    930  1.1  christos 			POS_INVALIDATE(j->index[i]);
    931  1.5  christos 		}
    932  1.1  christos 	}
    933  1.1  christos }
    934  1.1  christos 
    935  1.1  christos /*
    936  1.1  christos  * Try to find a transaction with initial serial number 'serial'
    937  1.1  christos  * in the journal 'j'.
    938  1.1  christos  *
    939  1.1  christos  * If found, store its position at '*pos' and return ISC_R_SUCCESS.
    940  1.1  christos  *
    941  1.1  christos  * If 'serial' is current (= the ending serial number of the
    942  1.1  christos  * last transaction in the journal), set '*pos' to
    943  1.1  christos  * the position immediately following the last transaction and
    944  1.1  christos  * return ISC_R_SUCCESS.
    945  1.1  christos  *
    946  1.1  christos  * If 'serial' is within the range of addressable serial numbers
    947  1.1  christos  * covered by the journal but that particular serial number is missing
    948  1.1  christos  * (from the journal, not just from the index), return ISC_R_NOTFOUND.
    949  1.1  christos  *
    950  1.1  christos  * If 'serial' is outside the range of addressable serial numbers
    951  1.1  christos  * covered by the journal, return ISC_R_RANGE.
    952  1.1  christos  *
    953  1.1  christos  */
    954  1.1  christos static isc_result_t
    955  1.3  christos journal_find(dns_journal_t *j, uint32_t serial, journal_pos_t *pos) {
    956  1.1  christos 	isc_result_t result;
    957  1.1  christos 	journal_pos_t current_pos;
    958  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(j));
    959  1.1  christos 
    960  1.5  christos 	if (DNS_SERIAL_GT(j->header.begin.serial, serial)) {
    961  1.1  christos 		return (ISC_R_RANGE);
    962  1.5  christos 	}
    963  1.5  christos 	if (DNS_SERIAL_GT(serial, j->header.end.serial)) {
    964  1.1  christos 		return (ISC_R_RANGE);
    965  1.5  christos 	}
    966  1.1  christos 	if (serial == j->header.end.serial) {
    967  1.1  christos 		*pos = j->header.end;
    968  1.1  christos 		return (ISC_R_SUCCESS);
    969  1.1  christos 	}
    970  1.1  christos 
    971  1.1  christos 	current_pos = j->header.begin;
    972  1.1  christos 	index_find(j, serial, &current_pos);
    973  1.1  christos 
    974  1.1  christos 	while (current_pos.serial != serial) {
    975  1.5  christos 		if (DNS_SERIAL_GT(current_pos.serial, serial)) {
    976  1.1  christos 			return (ISC_R_NOTFOUND);
    977  1.5  christos 		}
    978  1.1  christos 		result = journal_next(j, &current_pos);
    979  1.5  christos 		if (result != ISC_R_SUCCESS) {
    980  1.1  christos 			return (result);
    981  1.5  christos 		}
    982  1.1  christos 	}
    983  1.1  christos 	*pos = current_pos;
    984  1.1  christos 	return (ISC_R_SUCCESS);
    985  1.1  christos }
    986  1.1  christos 
    987  1.1  christos isc_result_t
    988  1.1  christos dns_journal_begin_transaction(dns_journal_t *j) {
    989  1.3  christos 	uint32_t offset;
    990  1.1  christos 	isc_result_t result;
    991  1.1  christos 	journal_rawxhdr_t hdr;
    992  1.1  christos 
    993  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(j));
    994  1.1  christos 	REQUIRE(j->state == JOURNAL_STATE_WRITE ||
    995  1.1  christos 		j->state == JOURNAL_STATE_INLINE);
    996  1.1  christos 
    997  1.1  christos 	/*
    998  1.1  christos 	 * Find the file offset where the new transaction should
    999  1.1  christos 	 * be written, and seek there.
   1000  1.1  christos 	 */
   1001  1.1  christos 	if (JOURNAL_EMPTY(&j->header)) {
   1002  1.1  christos 		offset = sizeof(journal_rawheader_t) +
   1003  1.5  christos 			 j->header.index_size * sizeof(journal_rawpos_t);
   1004  1.1  christos 	} else {
   1005  1.1  christos 		offset = j->header.end.offset;
   1006  1.1  christos 	}
   1007  1.1  christos 	j->x.pos[0].offset = offset;
   1008  1.1  christos 	j->x.pos[1].offset = offset; /* Initial value, will be incremented. */
   1009  1.1  christos 	j->x.n_soa = 0;
   1010  1.1  christos 
   1011  1.1  christos 	CHECK(journal_seek(j, offset));
   1012  1.1  christos 
   1013  1.1  christos 	/*
   1014  1.1  christos 	 * Write a dummy transaction header of all zeroes to reserve
   1015  1.1  christos 	 * space.  It will be filled in when the transaction is
   1016  1.1  christos 	 * finished.
   1017  1.1  christos 	 */
   1018  1.1  christos 	memset(&hdr, 0, sizeof(hdr));
   1019  1.1  christos 	CHECK(journal_write(j, &hdr, sizeof(hdr)));
   1020  1.1  christos 	j->x.pos[1].offset = j->offset;
   1021  1.1  christos 
   1022  1.1  christos 	j->state = JOURNAL_STATE_TRANSACTION;
   1023  1.1  christos 	result = ISC_R_SUCCESS;
   1024  1.5  christos failure:
   1025  1.1  christos 	return (result);
   1026  1.1  christos }
   1027  1.1  christos 
   1028  1.1  christos isc_result_t
   1029  1.1  christos dns_journal_writediff(dns_journal_t *j, dns_diff_t *diff) {
   1030  1.1  christos 	dns_difftuple_t *t;
   1031  1.1  christos 	isc_buffer_t buffer;
   1032  1.1  christos 	void *mem = NULL;
   1033  1.6  christos 	uint64_t size = 0;
   1034  1.6  christos 	uint32_t rrcount = 0;
   1035  1.1  christos 	isc_result_t result;
   1036  1.1  christos 	isc_region_t used;
   1037  1.1  christos 
   1038  1.1  christos 	REQUIRE(DNS_DIFF_VALID(diff));
   1039  1.1  christos 	REQUIRE(j->state == JOURNAL_STATE_TRANSACTION);
   1040  1.1  christos 
   1041  1.1  christos 	isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal");
   1042  1.1  christos 	(void)dns_diff_print(diff, NULL);
   1043  1.1  christos 
   1044  1.1  christos 	/*
   1045  1.1  christos 	 * Pass 1: determine the buffer size needed, and
   1046  1.1  christos 	 * keep track of SOA serial numbers.
   1047  1.1  christos 	 */
   1048  1.1  christos 	for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
   1049  1.5  christos 	     t = ISC_LIST_NEXT(t, link)) {
   1050  1.1  christos 		if (t->rdata.type == dns_rdatatype_soa) {
   1051  1.5  christos 			if (j->x.n_soa < 2) {
   1052  1.1  christos 				j->x.pos[j->x.n_soa].serial =
   1053  1.1  christos 					dns_soa_getserial(&t->rdata);
   1054  1.5  christos 			}
   1055  1.1  christos 			j->x.n_soa++;
   1056  1.1  christos 		}
   1057  1.1  christos 		size += sizeof(journal_rawrrhdr_t);
   1058  1.1  christos 		size += t->name.length; /* XXX should have access macro? */
   1059  1.1  christos 		size += 10;
   1060  1.1  christos 		size += t->rdata.length;
   1061  1.1  christos 	}
   1062  1.1  christos 
   1063  1.1  christos 	if (size >= DNS_JOURNAL_SIZE_MAX) {
   1064  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1065  1.1  christos 			      "dns_journal_writediff: %s: journal entry "
   1066  1.5  christos 			      "too big to be stored: %" PRIu64 " bytes",
   1067  1.5  christos 			      j->filename, size);
   1068  1.1  christos 		return (ISC_R_NOSPACE);
   1069  1.1  christos 	}
   1070  1.1  christos 
   1071  1.1  christos 	mem = isc_mem_get(j->mctx, size);
   1072  1.1  christos 
   1073  1.1  christos 	isc_buffer_init(&buffer, mem, size);
   1074  1.1  christos 
   1075  1.1  christos 	/*
   1076  1.1  christos 	 * Pass 2.  Write RRs to buffer.
   1077  1.1  christos 	 */
   1078  1.1  christos 	for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
   1079  1.5  christos 	     t = ISC_LIST_NEXT(t, link)) {
   1080  1.1  christos 		/*
   1081  1.1  christos 		 * Write the RR header.
   1082  1.1  christos 		 */
   1083  1.5  christos 		isc_buffer_putuint32(&buffer,
   1084  1.5  christos 				     t->name.length + 10 + t->rdata.length);
   1085  1.1  christos 		/*
   1086  1.1  christos 		 * Write the owner name, RR header, and RR data.
   1087  1.1  christos 		 */
   1088  1.1  christos 		isc_buffer_putmem(&buffer, t->name.ndata, t->name.length);
   1089  1.1  christos 		isc_buffer_putuint16(&buffer, t->rdata.type);
   1090  1.1  christos 		isc_buffer_putuint16(&buffer, t->rdata.rdclass);
   1091  1.1  christos 		isc_buffer_putuint32(&buffer, t->ttl);
   1092  1.1  christos 		INSIST(t->rdata.length < 65536);
   1093  1.3  christos 		isc_buffer_putuint16(&buffer, (uint16_t)t->rdata.length);
   1094  1.1  christos 		INSIST(isc_buffer_availablelength(&buffer) >= t->rdata.length);
   1095  1.1  christos 		isc_buffer_putmem(&buffer, t->rdata.data, t->rdata.length);
   1096  1.6  christos 
   1097  1.6  christos 		rrcount++;
   1098  1.1  christos 	}
   1099  1.1  christos 
   1100  1.1  christos 	isc_buffer_usedregion(&buffer, &used);
   1101  1.1  christos 	INSIST(used.length == size);
   1102  1.1  christos 
   1103  1.1  christos 	j->x.pos[1].offset += used.length;
   1104  1.6  christos 	j->x.n_rr = rrcount;
   1105  1.1  christos 
   1106  1.1  christos 	/*
   1107  1.1  christos 	 * Write the buffer contents to the journal file.
   1108  1.1  christos 	 */
   1109  1.1  christos 	CHECK(journal_write(j, used.base, used.length));
   1110  1.1  christos 
   1111  1.1  christos 	result = ISC_R_SUCCESS;
   1112  1.1  christos 
   1113  1.5  christos failure:
   1114  1.5  christos 	if (mem != NULL) {
   1115  1.1  christos 		isc_mem_put(j->mctx, mem, size);
   1116  1.5  christos 	}
   1117  1.1  christos 	return (result);
   1118  1.1  christos }
   1119  1.1  christos 
   1120  1.1  christos isc_result_t
   1121  1.1  christos dns_journal_commit(dns_journal_t *j) {
   1122  1.1  christos 	isc_result_t result;
   1123  1.1  christos 	journal_rawheader_t rawheader;
   1124  1.3  christos 	uint64_t total;
   1125  1.1  christos 
   1126  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(j));
   1127  1.1  christos 	REQUIRE(j->state == JOURNAL_STATE_TRANSACTION ||
   1128  1.1  christos 		j->state == JOURNAL_STATE_INLINE);
   1129  1.1  christos 
   1130  1.1  christos 	/*
   1131  1.1  christos 	 * Just write out a updated header.
   1132  1.1  christos 	 */
   1133  1.1  christos 	if (j->state == JOURNAL_STATE_INLINE) {
   1134  1.1  christos 		CHECK(journal_fsync(j));
   1135  1.1  christos 		journal_header_encode(&j->header, &rawheader);
   1136  1.1  christos 		CHECK(journal_seek(j, 0));
   1137  1.1  christos 		CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
   1138  1.1  christos 		CHECK(journal_fsync(j));
   1139  1.1  christos 		j->state = JOURNAL_STATE_WRITE;
   1140  1.1  christos 		return (ISC_R_SUCCESS);
   1141  1.1  christos 	}
   1142  1.1  christos 
   1143  1.1  christos 	/*
   1144  1.1  christos 	 * Perform some basic consistency checks.
   1145  1.1  christos 	 */
   1146  1.1  christos 	if (j->x.n_soa != 2) {
   1147  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1148  1.5  christos 			      "%s: malformed transaction: %d SOAs", j->filename,
   1149  1.5  christos 			      j->x.n_soa);
   1150  1.1  christos 		return (ISC_R_UNEXPECTED);
   1151  1.1  christos 	}
   1152  1.5  christos 	if (!DNS_SERIAL_GT(j->x.pos[1].serial, j->x.pos[0].serial)) {
   1153  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1154  1.1  christos 			      "%s: malformed transaction: serial number "
   1155  1.5  christos 			      "did not increase",
   1156  1.5  christos 			      j->filename);
   1157  1.1  christos 		return (ISC_R_UNEXPECTED);
   1158  1.1  christos 	}
   1159  1.5  christos 	if (!JOURNAL_EMPTY(&j->header)) {
   1160  1.1  christos 		if (j->x.pos[0].serial != j->header.end.serial) {
   1161  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1162  1.5  christos 				      "malformed transaction: "
   1163  1.5  christos 				      "%s last serial %u != "
   1164  1.5  christos 				      "transaction first serial %u",
   1165  1.5  christos 				      j->filename, j->header.end.serial,
   1166  1.5  christos 				      j->x.pos[0].serial);
   1167  1.1  christos 			return (ISC_R_UNEXPECTED);
   1168  1.1  christos 		}
   1169  1.1  christos 	}
   1170  1.1  christos 
   1171  1.1  christos 	/*
   1172  1.1  christos 	 * We currently don't support huge journal entries.
   1173  1.1  christos 	 */
   1174  1.1  christos 	total = j->x.pos[1].offset - j->x.pos[0].offset;
   1175  1.1  christos 	if (total >= DNS_JOURNAL_SIZE_MAX) {
   1176  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1177  1.5  christos 			      "transaction too big to be stored in journal: "
   1178  1.5  christos 			      "%" PRIu64 "b (max is %" PRIu64 "b)",
   1179  1.5  christos 			      total, (uint64_t)DNS_JOURNAL_SIZE_MAX);
   1180  1.1  christos 		return (ISC_R_UNEXPECTED);
   1181  1.1  christos 	}
   1182  1.1  christos 
   1183  1.1  christos 	/*
   1184  1.1  christos 	 * Some old journal entries may become non-addressable
   1185  1.1  christos 	 * when we increment the current serial number.  Purge them
   1186  1.1  christos 	 * by stepping header.begin forward to the first addressable
   1187  1.1  christos 	 * transaction.  Also purge them from the index.
   1188  1.1  christos 	 */
   1189  1.5  christos 	if (!JOURNAL_EMPTY(&j->header)) {
   1190  1.5  christos 		while (!DNS_SERIAL_GT(j->x.pos[1].serial,
   1191  1.5  christos 				      j->header.begin.serial)) {
   1192  1.1  christos 			CHECK(journal_next(j, &j->header.begin));
   1193  1.1  christos 		}
   1194  1.1  christos 		index_invalidate(j, j->x.pos[1].serial);
   1195  1.1  christos 	}
   1196  1.1  christos #ifdef notyet
   1197  1.1  christos 	if (DNS_SERIAL_GT(last_dumped_serial, j->x.pos[1].serial)) {
   1198  1.1  christos 		force_dump(...);
   1199  1.1  christos 	}
   1200  1.5  christos #endif /* ifdef notyet */
   1201  1.1  christos 
   1202  1.1  christos 	/*
   1203  1.1  christos 	 * Commit the transaction data to stable storage.
   1204  1.1  christos 	 */
   1205  1.1  christos 	CHECK(journal_fsync(j));
   1206  1.1  christos 
   1207  1.1  christos 	if (j->state == JOURNAL_STATE_TRANSACTION) {
   1208  1.1  christos 		isc_offset_t offset;
   1209  1.1  christos 		offset = (j->x.pos[1].offset - j->x.pos[0].offset) -
   1210  1.5  christos 			 sizeof(journal_rawxhdr_t);
   1211  1.1  christos 		/*
   1212  1.1  christos 		 * Update the transaction header.
   1213  1.1  christos 		 */
   1214  1.1  christos 		CHECK(journal_seek(j, j->x.pos[0].offset));
   1215  1.6  christos 		CHECK(journal_write_xhdr(j, offset, j->x.n_rr,
   1216  1.6  christos 					 j->x.pos[0].serial,
   1217  1.1  christos 					 j->x.pos[1].serial));
   1218  1.1  christos 	}
   1219  1.1  christos 
   1220  1.1  christos 	/*
   1221  1.1  christos 	 * Update the journal header.
   1222  1.1  christos 	 */
   1223  1.5  christos 	if (JOURNAL_EMPTY(&j->header)) {
   1224  1.1  christos 		j->header.begin = j->x.pos[0];
   1225  1.5  christos 	}
   1226  1.1  christos 	j->header.end = j->x.pos[1];
   1227  1.1  christos 	journal_header_encode(&j->header, &rawheader);
   1228  1.1  christos 	CHECK(journal_seek(j, 0));
   1229  1.1  christos 	CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
   1230  1.1  christos 
   1231  1.1  christos 	/*
   1232  1.1  christos 	 * Update the index.
   1233  1.1  christos 	 */
   1234  1.1  christos 	index_add(j, &j->x.pos[0]);
   1235  1.1  christos 
   1236  1.1  christos 	/*
   1237  1.1  christos 	 * Convert the index into on-disk format and write
   1238  1.1  christos 	 * it to disk.
   1239  1.1  christos 	 */
   1240  1.1  christos 	CHECK(index_to_disk(j));
   1241  1.1  christos 
   1242  1.1  christos 	/*
   1243  1.1  christos 	 * Commit the header to stable storage.
   1244  1.1  christos 	 */
   1245  1.1  christos 	CHECK(journal_fsync(j));
   1246  1.1  christos 
   1247  1.1  christos 	/*
   1248  1.1  christos 	 * We no longer have a transaction open.
   1249  1.1  christos 	 */
   1250  1.1  christos 	j->state = JOURNAL_STATE_WRITE;
   1251  1.1  christos 
   1252  1.1  christos 	result = ISC_R_SUCCESS;
   1253  1.1  christos 
   1254  1.5  christos failure:
   1255  1.1  christos 	return (result);
   1256  1.1  christos }
   1257  1.1  christos 
   1258  1.1  christos isc_result_t
   1259  1.1  christos dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) {
   1260  1.1  christos 	isc_result_t result;
   1261  1.1  christos 	CHECK(dns_diff_sort(diff, ixfr_order));
   1262  1.1  christos 	CHECK(dns_journal_begin_transaction(j));
   1263  1.1  christos 	CHECK(dns_journal_writediff(j, diff));
   1264  1.1  christos 	CHECK(dns_journal_commit(j));
   1265  1.1  christos 	result = ISC_R_SUCCESS;
   1266  1.5  christos failure:
   1267  1.1  christos 	return (result);
   1268  1.1  christos }
   1269  1.1  christos 
   1270  1.1  christos void
   1271  1.1  christos dns_journal_destroy(dns_journal_t **journalp) {
   1272  1.1  christos 	dns_journal_t *j = *journalp;
   1273  1.5  christos 	*journalp = NULL;
   1274  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(j));
   1275  1.1  christos 
   1276  1.1  christos 	j->it.result = ISC_R_FAILURE;
   1277  1.1  christos 	dns_name_invalidate(&j->it.name);
   1278  1.1  christos 	dns_decompress_invalidate(&j->it.dctx);
   1279  1.5  christos 	if (j->rawindex != NULL) {
   1280  1.5  christos 		isc_mem_put(j->mctx, j->rawindex,
   1281  1.5  christos 			    j->header.index_size * sizeof(journal_rawpos_t));
   1282  1.5  christos 	}
   1283  1.5  christos 	if (j->index != NULL) {
   1284  1.5  christos 		isc_mem_put(j->mctx, j->index,
   1285  1.5  christos 			    j->header.index_size * sizeof(journal_pos_t));
   1286  1.5  christos 	}
   1287  1.5  christos 	if (j->it.target.base != NULL) {
   1288  1.1  christos 		isc_mem_put(j->mctx, j->it.target.base, j->it.target.length);
   1289  1.5  christos 	}
   1290  1.5  christos 	if (j->it.source.base != NULL) {
   1291  1.1  christos 		isc_mem_put(j->mctx, j->it.source.base, j->it.source.length);
   1292  1.5  christos 	}
   1293  1.5  christos 	if (j->filename != NULL) {
   1294  1.1  christos 		isc_mem_free(j->mctx, j->filename);
   1295  1.5  christos 	}
   1296  1.5  christos 	if (j->fp != NULL) {
   1297  1.1  christos 		(void)isc_stdio_close(j->fp);
   1298  1.5  christos 	}
   1299  1.1  christos 	j->magic = 0;
   1300  1.1  christos 	isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
   1301  1.1  christos }
   1302  1.1  christos 
   1303  1.1  christos /*
   1304  1.1  christos  * Roll the open journal 'j' into the database 'db'.
   1305  1.1  christos  * A new database version will be created.
   1306  1.1  christos  */
   1307  1.1  christos 
   1308  1.1  christos /* XXX Share code with incoming IXFR? */
   1309  1.1  christos 
   1310  1.1  christos static isc_result_t
   1311  1.1  christos roll_forward(dns_journal_t *j, dns_db_t *db, unsigned int options) {
   1312  1.5  christos 	isc_buffer_t source; /* Transaction data from disk */
   1313  1.5  christos 	isc_buffer_t target; /* Ditto after _fromwire check */
   1314  1.5  christos 	uint32_t db_serial;  /* Database SOA serial */
   1315  1.5  christos 	uint32_t end_serial; /* Last journal SOA serial */
   1316  1.1  christos 	isc_result_t result;
   1317  1.1  christos 	dns_dbversion_t *ver = NULL;
   1318  1.1  christos 	journal_pos_t pos;
   1319  1.1  christos 	dns_diff_t diff;
   1320  1.1  christos 	unsigned int n_soa = 0;
   1321  1.1  christos 	unsigned int n_put = 0;
   1322  1.1  christos 	dns_diffop_t op;
   1323  1.1  christos 
   1324  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(j));
   1325  1.1  christos 	REQUIRE(DNS_DB_VALID(db));
   1326  1.1  christos 
   1327  1.1  christos 	dns_diff_init(j->mctx, &diff);
   1328  1.1  christos 
   1329  1.1  christos 	/*
   1330  1.1  christos 	 * Set up empty initial buffers for unchecked and checked
   1331  1.1  christos 	 * wire format transaction data.  They will be reallocated
   1332  1.1  christos 	 * later.
   1333  1.1  christos 	 */
   1334  1.1  christos 	isc_buffer_init(&source, NULL, 0);
   1335  1.1  christos 	isc_buffer_init(&target, NULL, 0);
   1336  1.1  christos 
   1337  1.1  christos 	/*
   1338  1.1  christos 	 * Create the new database version.
   1339  1.1  christos 	 */
   1340  1.1  christos 	CHECK(dns_db_newversion(db, &ver));
   1341  1.1  christos 
   1342  1.1  christos 	/*
   1343  1.1  christos 	 * Get the current database SOA serial number.
   1344  1.1  christos 	 */
   1345  1.1  christos 	CHECK(dns_db_getsoaserial(db, ver, &db_serial));
   1346  1.1  christos 
   1347  1.1  christos 	/*
   1348  1.1  christos 	 * Locate a journal entry for the current database serial.
   1349  1.1  christos 	 */
   1350  1.1  christos 	CHECK(journal_find(j, db_serial, &pos));
   1351  1.1  christos 	/*
   1352  1.1  christos 	 * XXX do more drastic things, like marking zone stale,
   1353  1.1  christos 	 * if this fails?
   1354  1.1  christos 	 */
   1355  1.1  christos 	/*
   1356  1.1  christos 	 * XXXRTH  The zone code should probably mark the zone as bad and
   1357  1.1  christos 	 *         scream loudly into the log if this is a dynamic update
   1358  1.1  christos 	 *	   log reply that failed.
   1359  1.1  christos 	 */
   1360  1.1  christos 
   1361  1.1  christos 	end_serial = dns_journal_last_serial(j);
   1362  1.5  christos 	if (db_serial == end_serial) {
   1363  1.1  christos 		CHECK(DNS_R_UPTODATE);
   1364  1.5  christos 	}
   1365  1.1  christos 
   1366  1.6  christos 	CHECK(dns_journal_iter_init(j, db_serial, end_serial, NULL));
   1367  1.1  christos 
   1368  1.5  christos 	for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS;
   1369  1.1  christos 	     result = dns_journal_next_rr(j))
   1370  1.1  christos 	{
   1371  1.1  christos 		dns_name_t *name;
   1372  1.3  christos 		uint32_t ttl;
   1373  1.1  christos 		dns_rdata_t *rdata;
   1374  1.1  christos 		dns_difftuple_t *tuple = NULL;
   1375  1.1  christos 
   1376  1.1  christos 		name = NULL;
   1377  1.1  christos 		rdata = NULL;
   1378  1.1  christos 		dns_journal_current_rr(j, &name, &ttl, &rdata);
   1379  1.1  christos 
   1380  1.1  christos 		if (rdata->type == dns_rdatatype_soa) {
   1381  1.1  christos 			n_soa++;
   1382  1.5  christos 			if (n_soa == 2) {
   1383  1.1  christos 				db_serial = j->it.current_serial;
   1384  1.5  christos 			}
   1385  1.1  christos 		}
   1386  1.1  christos 
   1387  1.5  christos 		if (n_soa == 3) {
   1388  1.1  christos 			n_soa = 1;
   1389  1.5  christos 		}
   1390  1.1  christos 		if (n_soa == 0) {
   1391  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1392  1.5  christos 				      "%s: journal file corrupt: missing "
   1393  1.5  christos 				      "initial SOA",
   1394  1.5  christos 				      j->filename);
   1395  1.1  christos 			FAIL(ISC_R_UNEXPECTED);
   1396  1.1  christos 		}
   1397  1.5  christos 		if ((options & DNS_JOURNALOPT_RESIGN) != 0) {
   1398  1.5  christos 			op = (n_soa == 1) ? DNS_DIFFOP_DELRESIGN
   1399  1.5  christos 					  : DNS_DIFFOP_ADDRESIGN;
   1400  1.5  christos 		} else {
   1401  1.1  christos 			op = (n_soa == 1) ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD;
   1402  1.5  christos 		}
   1403  1.1  christos 
   1404  1.1  christos 		CHECK(dns_difftuple_create(diff.mctx, op, name, ttl, rdata,
   1405  1.1  christos 					   &tuple));
   1406  1.1  christos 		dns_diff_append(&diff, &tuple);
   1407  1.1  christos 
   1408  1.5  christos 		if (++n_put > 100) {
   1409  1.1  christos 			isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
   1410  1.1  christos 				      "%s: applying diff to database (%u)",
   1411  1.1  christos 				      j->filename, db_serial);
   1412  1.1  christos 			(void)dns_diff_print(&diff, NULL);
   1413  1.1  christos 			CHECK(dns_diff_apply(&diff, db, ver));
   1414  1.1  christos 			dns_diff_clear(&diff);
   1415  1.1  christos 			n_put = 0;
   1416  1.1  christos 		}
   1417  1.1  christos 	}
   1418  1.5  christos 	if (result == ISC_R_NOMORE) {
   1419  1.1  christos 		result = ISC_R_SUCCESS;
   1420  1.5  christos 	}
   1421  1.1  christos 	CHECK(result);
   1422  1.1  christos 
   1423  1.1  christos 	if (n_put != 0) {
   1424  1.1  christos 		isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
   1425  1.1  christos 			      "%s: applying final diff to database (%u)",
   1426  1.1  christos 			      j->filename, db_serial);
   1427  1.1  christos 		(void)dns_diff_print(&diff, NULL);
   1428  1.1  christos 		CHECK(dns_diff_apply(&diff, db, ver));
   1429  1.1  christos 		dns_diff_clear(&diff);
   1430  1.1  christos 	}
   1431  1.1  christos 
   1432  1.5  christos failure:
   1433  1.5  christos 	if (ver != NULL) {
   1434  1.5  christos 		dns_db_closeversion(db, &ver,
   1435  1.5  christos 				    result == ISC_R_SUCCESS ? true : false);
   1436  1.5  christos 	}
   1437  1.1  christos 
   1438  1.5  christos 	if (source.base != NULL) {
   1439  1.1  christos 		isc_mem_put(j->mctx, source.base, source.length);
   1440  1.5  christos 	}
   1441  1.5  christos 	if (target.base != NULL) {
   1442  1.1  christos 		isc_mem_put(j->mctx, target.base, target.length);
   1443  1.5  christos 	}
   1444  1.1  christos 
   1445  1.1  christos 	dns_diff_clear(&diff);
   1446  1.1  christos 
   1447  1.1  christos 	INSIST(ver == NULL);
   1448  1.1  christos 
   1449  1.1  christos 	return (result);
   1450  1.1  christos }
   1451  1.1  christos 
   1452  1.1  christos isc_result_t
   1453  1.1  christos dns_journal_rollforward(isc_mem_t *mctx, dns_db_t *db, unsigned int options,
   1454  1.5  christos 			const char *filename) {
   1455  1.1  christos 	dns_journal_t *j;
   1456  1.1  christos 	isc_result_t result;
   1457  1.1  christos 
   1458  1.1  christos 	REQUIRE(DNS_DB_VALID(db));
   1459  1.1  christos 	REQUIRE(filename != NULL);
   1460  1.1  christos 
   1461  1.1  christos 	j = NULL;
   1462  1.1  christos 	result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j);
   1463  1.1  christos 	if (result == ISC_R_NOTFOUND) {
   1464  1.5  christos 		isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file, but "
   1465  1.5  christos 							"that's OK");
   1466  1.1  christos 		return (DNS_R_NOJOURNAL);
   1467  1.1  christos 	}
   1468  1.5  christos 	if (result != ISC_R_SUCCESS) {
   1469  1.1  christos 		return (result);
   1470  1.5  christos 	}
   1471  1.5  christos 	if (JOURNAL_EMPTY(&j->header)) {
   1472  1.1  christos 		result = DNS_R_UPTODATE;
   1473  1.5  christos 	} else {
   1474  1.1  christos 		result = roll_forward(j, db, options);
   1475  1.5  christos 	}
   1476  1.1  christos 
   1477  1.1  christos 	dns_journal_destroy(&j);
   1478  1.1  christos 
   1479  1.1  christos 	return (result);
   1480  1.1  christos }
   1481  1.1  christos 
   1482  1.1  christos isc_result_t
   1483  1.1  christos dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) {
   1484  1.1  christos 	dns_journal_t *j;
   1485  1.5  christos 	isc_buffer_t source;   /* Transaction data from disk */
   1486  1.5  christos 	isc_buffer_t target;   /* Ditto after _fromwire check */
   1487  1.5  christos 	uint32_t start_serial; /* Database SOA serial */
   1488  1.5  christos 	uint32_t end_serial;   /* Last journal SOA serial */
   1489  1.1  christos 	isc_result_t result;
   1490  1.1  christos 	dns_diff_t diff;
   1491  1.1  christos 	unsigned int n_soa = 0;
   1492  1.1  christos 	unsigned int n_put = 0;
   1493  1.1  christos 
   1494  1.1  christos 	REQUIRE(filename != NULL);
   1495  1.1  christos 
   1496  1.1  christos 	j = NULL;
   1497  1.1  christos 	result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j);
   1498  1.1  christos 	if (result == ISC_R_NOTFOUND) {
   1499  1.1  christos 		isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file");
   1500  1.1  christos 		return (DNS_R_NOJOURNAL);
   1501  1.1  christos 	}
   1502  1.1  christos 
   1503  1.1  christos 	if (result != ISC_R_SUCCESS) {
   1504  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1505  1.1  christos 			      "journal open failure: %s: %s",
   1506  1.1  christos 			      isc_result_totext(result), filename);
   1507  1.1  christos 		return (result);
   1508  1.1  christos 	}
   1509  1.1  christos 
   1510  1.5  christos 	if (j->header.serialset) {
   1511  1.1  christos 		fprintf(file, "Source serial = %u\n", j->header.sourceserial);
   1512  1.5  christos 	}
   1513  1.1  christos 	dns_diff_init(j->mctx, &diff);
   1514  1.1  christos 
   1515  1.1  christos 	/*
   1516  1.1  christos 	 * Set up empty initial buffers for unchecked and checked
   1517  1.1  christos 	 * wire format transaction data.  They will be reallocated
   1518  1.1  christos 	 * later.
   1519  1.1  christos 	 */
   1520  1.1  christos 	isc_buffer_init(&source, NULL, 0);
   1521  1.1  christos 	isc_buffer_init(&target, NULL, 0);
   1522  1.1  christos 
   1523  1.1  christos 	start_serial = dns_journal_first_serial(j);
   1524  1.1  christos 	end_serial = dns_journal_last_serial(j);
   1525  1.1  christos 
   1526  1.6  christos 	CHECK(dns_journal_iter_init(j, start_serial, end_serial, NULL));
   1527  1.1  christos 
   1528  1.5  christos 	for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS;
   1529  1.1  christos 	     result = dns_journal_next_rr(j))
   1530  1.1  christos 	{
   1531  1.1  christos 		dns_name_t *name;
   1532  1.3  christos 		uint32_t ttl;
   1533  1.1  christos 		dns_rdata_t *rdata;
   1534  1.1  christos 		dns_difftuple_t *tuple = NULL;
   1535  1.1  christos 
   1536  1.1  christos 		name = NULL;
   1537  1.1  christos 		rdata = NULL;
   1538  1.1  christos 		dns_journal_current_rr(j, &name, &ttl, &rdata);
   1539  1.1  christos 
   1540  1.5  christos 		if (rdata->type == dns_rdatatype_soa) {
   1541  1.1  christos 			n_soa++;
   1542  1.5  christos 		}
   1543  1.1  christos 
   1544  1.5  christos 		if (n_soa == 3) {
   1545  1.1  christos 			n_soa = 1;
   1546  1.5  christos 		}
   1547  1.1  christos 		if (n_soa == 0) {
   1548  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1549  1.1  christos 				      "%s: journal file corrupt: missing "
   1550  1.5  christos 				      "initial SOA",
   1551  1.5  christos 				      j->filename);
   1552  1.1  christos 			FAIL(ISC_R_UNEXPECTED);
   1553  1.1  christos 		}
   1554  1.5  christos 		CHECK(dns_difftuple_create(
   1555  1.5  christos 			diff.mctx, n_soa == 1 ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD,
   1556  1.5  christos 			name, ttl, rdata, &tuple));
   1557  1.1  christos 		dns_diff_append(&diff, &tuple);
   1558  1.1  christos 
   1559  1.5  christos 		if (++n_put > 100) {
   1560  1.1  christos 			result = dns_diff_print(&diff, file);
   1561  1.1  christos 			dns_diff_clear(&diff);
   1562  1.1  christos 			n_put = 0;
   1563  1.5  christos 			if (result != ISC_R_SUCCESS) {
   1564  1.1  christos 				break;
   1565  1.5  christos 			}
   1566  1.1  christos 		}
   1567  1.1  christos 	}
   1568  1.5  christos 	if (result == ISC_R_NOMORE) {
   1569  1.1  christos 		result = ISC_R_SUCCESS;
   1570  1.5  christos 	}
   1571  1.1  christos 	CHECK(result);
   1572  1.1  christos 
   1573  1.1  christos 	if (n_put != 0) {
   1574  1.1  christos 		result = dns_diff_print(&diff, file);
   1575  1.1  christos 		dns_diff_clear(&diff);
   1576  1.1  christos 	}
   1577  1.1  christos 	goto cleanup;
   1578  1.1  christos 
   1579  1.5  christos failure:
   1580  1.1  christos 	isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1581  1.1  christos 		      "%s: cannot print: journal file corrupt", j->filename);
   1582  1.1  christos 
   1583  1.5  christos cleanup:
   1584  1.5  christos 	if (source.base != NULL) {
   1585  1.1  christos 		isc_mem_put(j->mctx, source.base, source.length);
   1586  1.5  christos 	}
   1587  1.5  christos 	if (target.base != NULL) {
   1588  1.1  christos 		isc_mem_put(j->mctx, target.base, target.length);
   1589  1.5  christos 	}
   1590  1.1  christos 
   1591  1.1  christos 	dns_diff_clear(&diff);
   1592  1.1  christos 	dns_journal_destroy(&j);
   1593  1.1  christos 
   1594  1.1  christos 	return (result);
   1595  1.1  christos }
   1596  1.1  christos 
   1597  1.1  christos /**************************************************************************/
   1598  1.1  christos /*
   1599  1.1  christos  * Miscellaneous accessors.
   1600  1.1  christos  */
   1601  1.3  christos bool
   1602  1.1  christos dns_journal_empty(dns_journal_t *j) {
   1603  1.1  christos 	return (JOURNAL_EMPTY(&j->header));
   1604  1.1  christos }
   1605  1.1  christos 
   1606  1.3  christos uint32_t
   1607  1.1  christos dns_journal_first_serial(dns_journal_t *j) {
   1608  1.1  christos 	return (j->header.begin.serial);
   1609  1.1  christos }
   1610  1.1  christos 
   1611  1.3  christos uint32_t
   1612  1.1  christos dns_journal_last_serial(dns_journal_t *j) {
   1613  1.1  christos 	return (j->header.end.serial);
   1614  1.1  christos }
   1615  1.1  christos 
   1616  1.1  christos void
   1617  1.3  christos dns_journal_set_sourceserial(dns_journal_t *j, uint32_t sourceserial) {
   1618  1.1  christos 	REQUIRE(j->state == JOURNAL_STATE_WRITE ||
   1619  1.1  christos 		j->state == JOURNAL_STATE_INLINE ||
   1620  1.1  christos 		j->state == JOURNAL_STATE_TRANSACTION);
   1621  1.1  christos 
   1622  1.1  christos 	j->header.sourceserial = sourceserial;
   1623  1.3  christos 	j->header.serialset = true;
   1624  1.5  christos 	if (j->state == JOURNAL_STATE_WRITE) {
   1625  1.1  christos 		j->state = JOURNAL_STATE_INLINE;
   1626  1.5  christos 	}
   1627  1.1  christos }
   1628  1.1  christos 
   1629  1.3  christos bool
   1630  1.3  christos dns_journal_get_sourceserial(dns_journal_t *j, uint32_t *sourceserial) {
   1631  1.1  christos 	REQUIRE(sourceserial != NULL);
   1632  1.1  christos 
   1633  1.5  christos 	if (!j->header.serialset) {
   1634  1.3  christos 		return (false);
   1635  1.5  christos 	}
   1636  1.1  christos 	*sourceserial = j->header.sourceserial;
   1637  1.3  christos 	return (true);
   1638  1.1  christos }
   1639  1.1  christos 
   1640  1.1  christos /**************************************************************************/
   1641  1.1  christos /*
   1642  1.1  christos  * Iteration support.
   1643  1.1  christos  *
   1644  1.1  christos  * When serving an outgoing IXFR, we transmit a part the journal starting
   1645  1.1  christos  * at the serial number in the IXFR request and ending at the serial
   1646  1.1  christos  * number that is current when the IXFR request arrives.  The ending
   1647  1.1  christos  * serial number is not necessarily at the end of the journal:
   1648  1.1  christos  * the journal may grow while the IXFR is in progress, but we stop
   1649  1.1  christos  * when we reach the serial number that was current when the IXFR started.
   1650  1.1  christos  */
   1651  1.1  christos 
   1652  1.5  christos static isc_result_t
   1653  1.5  christos read_one_rr(dns_journal_t *j);
   1654  1.1  christos 
   1655  1.1  christos /*
   1656  1.1  christos  * Make sure the buffer 'b' is has at least 'size' bytes
   1657  1.1  christos  * allocated, and clear it.
   1658  1.1  christos  *
   1659  1.1  christos  * Requires:
   1660  1.1  christos  *	Either b->base is NULL, or it points to b->length bytes of memory
   1661  1.1  christos  *	previously allocated by isc_mem_get().
   1662  1.1  christos  */
   1663  1.1  christos 
   1664  1.1  christos static isc_result_t
   1665  1.1  christos size_buffer(isc_mem_t *mctx, isc_buffer_t *b, unsigned size) {
   1666  1.1  christos 	if (b->length < size) {
   1667  1.1  christos 		void *mem = isc_mem_get(mctx, size);
   1668  1.5  christos 		if (mem == NULL) {
   1669  1.1  christos 			return (ISC_R_NOMEMORY);
   1670  1.5  christos 		}
   1671  1.5  christos 		if (b->base != NULL) {
   1672  1.1  christos 			isc_mem_put(mctx, b->base, b->length);
   1673  1.5  christos 		}
   1674  1.1  christos 		b->base = mem;
   1675  1.1  christos 		b->length = size;
   1676  1.1  christos 	}
   1677  1.1  christos 	isc_buffer_clear(b);
   1678  1.1  christos 	return (ISC_R_SUCCESS);
   1679  1.1  christos }
   1680  1.1  christos 
   1681  1.1  christos isc_result_t
   1682  1.5  christos dns_journal_iter_init(dns_journal_t *j, uint32_t begin_serial,
   1683  1.6  christos 		      uint32_t end_serial, size_t *xfrsizep) {
   1684  1.1  christos 	isc_result_t result;
   1685  1.1  christos 
   1686  1.1  christos 	CHECK(journal_find(j, begin_serial, &j->it.bpos));
   1687  1.1  christos 	INSIST(j->it.bpos.serial == begin_serial);
   1688  1.1  christos 
   1689  1.1  christos 	CHECK(journal_find(j, end_serial, &j->it.epos));
   1690  1.1  christos 	INSIST(j->it.epos.serial == end_serial);
   1691  1.1  christos 
   1692  1.6  christos 	if (xfrsizep != NULL) {
   1693  1.6  christos 		journal_pos_t pos = j->it.bpos;
   1694  1.6  christos 		journal_xhdr_t xhdr;
   1695  1.6  christos 		uint64_t size = 0;
   1696  1.6  christos 		uint32_t count = 0;
   1697  1.6  christos 
   1698  1.6  christos 		/*
   1699  1.6  christos 		 * We already know the beginning and ending serial
   1700  1.6  christos 		 * numbers are in the journal. Scan through them,
   1701  1.6  christos 		 * adding up sizes and RR counts so we can calculate
   1702  1.6  christos 		 * the IXFR size.
   1703  1.6  christos 		 */
   1704  1.6  christos 		CHECK(journal_seek(j, pos.offset));
   1705  1.6  christos 		do {
   1706  1.6  christos 			CHECK(journal_read_xhdr(j, &xhdr));
   1707  1.6  christos 
   1708  1.6  christos 			size += xhdr.size;
   1709  1.6  christos 			count += xhdr.count;
   1710  1.6  christos 
   1711  1.6  christos 			result = journal_next(j, &pos);
   1712  1.6  christos 			if (result == ISC_R_NOMORE) {
   1713  1.6  christos 				result = ISC_R_SUCCESS;
   1714  1.6  christos 			}
   1715  1.6  christos 			CHECK(result);
   1716  1.6  christos 		} while (pos.serial != end_serial);
   1717  1.6  christos 
   1718  1.6  christos 		/*
   1719  1.6  christos 		 * For each RR, subtract the length of the RR header,
   1720  1.6  christos 		 * as this would not be present in IXFR messages.
   1721  1.6  christos 		 * (We don't need to worry about the transaction header
   1722  1.6  christos 		 * because that was already excluded from xdr.size.)
   1723  1.6  christos 		 */
   1724  1.6  christos 		*xfrsizep = size - (count * sizeof(journal_rawrrhdr_t));
   1725  1.6  christos 	}
   1726  1.6  christos 
   1727  1.1  christos 	result = ISC_R_SUCCESS;
   1728  1.5  christos failure:
   1729  1.1  christos 	j->it.result = result;
   1730  1.1  christos 	return (j->it.result);
   1731  1.1  christos }
   1732  1.1  christos 
   1733  1.1  christos isc_result_t
   1734  1.1  christos dns_journal_first_rr(dns_journal_t *j) {
   1735  1.1  christos 	isc_result_t result;
   1736  1.1  christos 
   1737  1.1  christos 	/*
   1738  1.1  christos 	 * Seek to the beginning of the first transaction we are
   1739  1.1  christos 	 * interested in.
   1740  1.1  christos 	 */
   1741  1.1  christos 	CHECK(journal_seek(j, j->it.bpos.offset));
   1742  1.1  christos 	j->it.current_serial = j->it.bpos.serial;
   1743  1.1  christos 
   1744  1.5  christos 	j->it.xsize = 0; /* We have no transaction data yet... */
   1745  1.5  christos 	j->it.xpos = 0;	 /* ...and haven't used any of it. */
   1746  1.1  christos 
   1747  1.1  christos 	return (read_one_rr(j));
   1748  1.1  christos 
   1749  1.5  christos failure:
   1750  1.1  christos 	return (result);
   1751  1.1  christos }
   1752  1.1  christos 
   1753  1.1  christos static isc_result_t
   1754  1.1  christos read_one_rr(dns_journal_t *j) {
   1755  1.1  christos 	isc_result_t result;
   1756  1.1  christos 
   1757  1.1  christos 	dns_rdatatype_t rdtype;
   1758  1.1  christos 	dns_rdataclass_t rdclass;
   1759  1.1  christos 	unsigned int rdlen;
   1760  1.3  christos 	uint32_t ttl;
   1761  1.1  christos 	journal_xhdr_t xhdr;
   1762  1.1  christos 	journal_rrhdr_t rrhdr;
   1763  1.1  christos 
   1764  1.1  christos 	if (j->offset > j->it.epos.offset) {
   1765  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1766  1.1  christos 			      "%s: journal corrupt: possible integer overflow",
   1767  1.1  christos 			      j->filename);
   1768  1.1  christos 		return (ISC_R_UNEXPECTED);
   1769  1.1  christos 	}
   1770  1.5  christos 	if (j->offset == j->it.epos.offset) {
   1771  1.1  christos 		return (ISC_R_NOMORE);
   1772  1.5  christos 	}
   1773  1.1  christos 	if (j->it.xpos == j->it.xsize) {
   1774  1.1  christos 		/*
   1775  1.1  christos 		 * We are at a transaction boundary.
   1776  1.1  christos 		 * Read another transaction header.
   1777  1.1  christos 		 */
   1778  1.1  christos 		CHECK(journal_read_xhdr(j, &xhdr));
   1779  1.1  christos 		if (xhdr.size == 0) {
   1780  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1781  1.1  christos 				      "%s: journal corrupt: empty transaction",
   1782  1.1  christos 				      j->filename);
   1783  1.1  christos 			FAIL(ISC_R_UNEXPECTED);
   1784  1.1  christos 		}
   1785  1.1  christos 		if (xhdr.serial0 != j->it.current_serial) {
   1786  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1787  1.5  christos 				      "%s: journal file corrupt: "
   1788  1.5  christos 				      "expected serial %u, got %u",
   1789  1.5  christos 				      j->filename, j->it.current_serial,
   1790  1.5  christos 				      xhdr.serial0);
   1791  1.1  christos 			FAIL(ISC_R_UNEXPECTED);
   1792  1.1  christos 		}
   1793  1.1  christos 		j->it.xsize = xhdr.size;
   1794  1.1  christos 		j->it.xpos = 0;
   1795  1.1  christos 	}
   1796  1.1  christos 	/*
   1797  1.1  christos 	 * Read an RR.
   1798  1.1  christos 	 */
   1799  1.1  christos 	CHECK(journal_read_rrhdr(j, &rrhdr));
   1800  1.1  christos 	/*
   1801  1.1  christos 	 * Perform a sanity check on the journal RR size.
   1802  1.1  christos 	 * The smallest possible RR has a 1-byte owner name
   1803  1.1  christos 	 * and a 10-byte header.  The largest possible
   1804  1.1  christos 	 * RR has 65535 bytes of data, a header, and a maximum-
   1805  1.1  christos 	 * size owner name, well below 70 k total.
   1806  1.1  christos 	 */
   1807  1.5  christos 	if (rrhdr.size < 1 + 10 || rrhdr.size > 70000) {
   1808  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1809  1.5  christos 			      "%s: journal corrupt: impossible RR size "
   1810  1.5  christos 			      "(%d bytes)",
   1811  1.5  christos 			      j->filename, rrhdr.size);
   1812  1.1  christos 		FAIL(ISC_R_UNEXPECTED);
   1813  1.1  christos 	}
   1814  1.1  christos 
   1815  1.1  christos 	CHECK(size_buffer(j->mctx, &j->it.source, rrhdr.size));
   1816  1.1  christos 	CHECK(journal_read(j, j->it.source.base, rrhdr.size));
   1817  1.1  christos 	isc_buffer_add(&j->it.source, rrhdr.size);
   1818  1.1  christos 
   1819  1.1  christos 	/*
   1820  1.1  christos 	 * The target buffer is made the same size
   1821  1.1  christos 	 * as the source buffer, with the assumption that when
   1822  1.1  christos 	 * no compression in present, the output of dns_*_fromwire()
   1823  1.1  christos 	 * is no larger than the input.
   1824  1.1  christos 	 */
   1825  1.1  christos 	CHECK(size_buffer(j->mctx, &j->it.target, rrhdr.size));
   1826  1.1  christos 
   1827  1.1  christos 	/*
   1828  1.1  christos 	 * Parse the owner name.  We don't know where it
   1829  1.1  christos 	 * ends yet, so we make the entire "remaining"
   1830  1.1  christos 	 * part of the buffer "active".
   1831  1.1  christos 	 */
   1832  1.1  christos 	isc_buffer_setactive(&j->it.source,
   1833  1.1  christos 			     j->it.source.used - j->it.source.current);
   1834  1.5  christos 	CHECK(dns_name_fromwire(&j->it.name, &j->it.source, &j->it.dctx, 0,
   1835  1.5  christos 				&j->it.target));
   1836  1.1  christos 
   1837  1.1  christos 	/*
   1838  1.1  christos 	 * Check that the RR header is there, and parse it.
   1839  1.1  christos 	 */
   1840  1.5  christos 	if (isc_buffer_remaininglength(&j->it.source) < 10) {
   1841  1.1  christos 		FAIL(DNS_R_FORMERR);
   1842  1.5  christos 	}
   1843  1.1  christos 
   1844  1.1  christos 	rdtype = isc_buffer_getuint16(&j->it.source);
   1845  1.1  christos 	rdclass = isc_buffer_getuint16(&j->it.source);
   1846  1.1  christos 	ttl = isc_buffer_getuint32(&j->it.source);
   1847  1.1  christos 	rdlen = isc_buffer_getuint16(&j->it.source);
   1848  1.1  christos 
   1849  1.1  christos 	/*
   1850  1.1  christos 	 * Parse the rdata.
   1851  1.1  christos 	 */
   1852  1.5  christos 	if (isc_buffer_remaininglength(&j->it.source) != rdlen) {
   1853  1.1  christos 		FAIL(DNS_R_FORMERR);
   1854  1.5  christos 	}
   1855  1.1  christos 	isc_buffer_setactive(&j->it.source, rdlen);
   1856  1.1  christos 	dns_rdata_reset(&j->it.rdata);
   1857  1.5  christos 	CHECK(dns_rdata_fromwire(&j->it.rdata, rdclass, rdtype, &j->it.source,
   1858  1.5  christos 				 &j->it.dctx, 0, &j->it.target));
   1859  1.1  christos 	j->it.ttl = ttl;
   1860  1.1  christos 
   1861  1.1  christos 	j->it.xpos += sizeof(journal_rawrrhdr_t) + rrhdr.size;
   1862  1.1  christos 	if (rdtype == dns_rdatatype_soa) {
   1863  1.1  christos 		/* XXX could do additional consistency checks here */
   1864  1.1  christos 		j->it.current_serial = dns_soa_getserial(&j->it.rdata);
   1865  1.1  christos 	}
   1866  1.1  christos 
   1867  1.1  christos 	result = ISC_R_SUCCESS;
   1868  1.1  christos 
   1869  1.5  christos failure:
   1870  1.1  christos 	j->it.result = result;
   1871  1.1  christos 	return (result);
   1872  1.1  christos }
   1873  1.1  christos 
   1874  1.1  christos isc_result_t
   1875  1.1  christos dns_journal_next_rr(dns_journal_t *j) {
   1876  1.1  christos 	j->it.result = read_one_rr(j);
   1877  1.1  christos 	return (j->it.result);
   1878  1.1  christos }
   1879  1.1  christos 
   1880  1.1  christos void
   1881  1.3  christos dns_journal_current_rr(dns_journal_t *j, dns_name_t **name, uint32_t *ttl,
   1882  1.5  christos 		       dns_rdata_t **rdata) {
   1883  1.1  christos 	REQUIRE(j->it.result == ISC_R_SUCCESS);
   1884  1.1  christos 	*name = &j->it.name;
   1885  1.1  christos 	*ttl = j->it.ttl;
   1886  1.1  christos 	*rdata = &j->it.rdata;
   1887  1.1  christos }
   1888  1.1  christos 
   1889  1.1  christos /**************************************************************************/
   1890  1.1  christos /*
   1891  1.1  christos  * Generating diffs from databases
   1892  1.1  christos  */
   1893  1.1  christos 
   1894  1.1  christos /*
   1895  1.1  christos  * Construct a diff containing all the RRs at the current name of the
   1896  1.1  christos  * database iterator 'dbit' in database 'db', version 'ver'.
   1897  1.1  christos  * Set '*name' to the current name, and append the diff to 'diff'.
   1898  1.1  christos  * All new tuples will have the operation 'op'.
   1899  1.1  christos  *
   1900  1.1  christos  * Requires: 'name' must have buffer large enough to hold the name.
   1901  1.1  christos  * Typically, a dns_fixedname_t would be used.
   1902  1.1  christos  */
   1903  1.1  christos static isc_result_t
   1904  1.1  christos get_name_diff(dns_db_t *db, dns_dbversion_t *ver, isc_stdtime_t now,
   1905  1.1  christos 	      dns_dbiterator_t *dbit, dns_name_t *name, dns_diffop_t op,
   1906  1.5  christos 	      dns_diff_t *diff) {
   1907  1.1  christos 	isc_result_t result;
   1908  1.1  christos 	dns_dbnode_t *node = NULL;
   1909  1.1  christos 	dns_rdatasetiter_t *rdsiter = NULL;
   1910  1.1  christos 	dns_difftuple_t *tuple = NULL;
   1911  1.1  christos 
   1912  1.1  christos 	result = dns_dbiterator_current(dbit, &node, name);
   1913  1.5  christos 	if (result != ISC_R_SUCCESS) {
   1914  1.1  christos 		return (result);
   1915  1.5  christos 	}
   1916  1.1  christos 
   1917  1.1  christos 	result = dns_db_allrdatasets(db, node, ver, now, &rdsiter);
   1918  1.5  christos 	if (result != ISC_R_SUCCESS) {
   1919  1.1  christos 		goto cleanup_node;
   1920  1.5  christos 	}
   1921  1.1  christos 
   1922  1.5  christos 	for (result = dns_rdatasetiter_first(rdsiter); result == ISC_R_SUCCESS;
   1923  1.1  christos 	     result = dns_rdatasetiter_next(rdsiter))
   1924  1.1  christos 	{
   1925  1.1  christos 		dns_rdataset_t rdataset;
   1926  1.1  christos 
   1927  1.1  christos 		dns_rdataset_init(&rdataset);
   1928  1.1  christos 		dns_rdatasetiter_current(rdsiter, &rdataset);
   1929  1.1  christos 
   1930  1.1  christos 		for (result = dns_rdataset_first(&rdataset);
   1931  1.1  christos 		     result == ISC_R_SUCCESS;
   1932  1.1  christos 		     result = dns_rdataset_next(&rdataset))
   1933  1.1  christos 		{
   1934  1.1  christos 			dns_rdata_t rdata = DNS_RDATA_INIT;
   1935  1.1  christos 			dns_rdataset_current(&rdataset, &rdata);
   1936  1.1  christos 			result = dns_difftuple_create(diff->mctx, op, name,
   1937  1.1  christos 						      rdataset.ttl, &rdata,
   1938  1.1  christos 						      &tuple);
   1939  1.1  christos 			if (result != ISC_R_SUCCESS) {
   1940  1.1  christos 				dns_rdataset_disassociate(&rdataset);
   1941  1.1  christos 				goto cleanup_iterator;
   1942  1.1  christos 			}
   1943  1.1  christos 			dns_diff_append(diff, &tuple);
   1944  1.1  christos 		}
   1945  1.1  christos 		dns_rdataset_disassociate(&rdataset);
   1946  1.5  christos 		if (result != ISC_R_NOMORE) {
   1947  1.1  christos 			goto cleanup_iterator;
   1948  1.5  christos 		}
   1949  1.1  christos 	}
   1950  1.5  christos 	if (result != ISC_R_NOMORE) {
   1951  1.1  christos 		goto cleanup_iterator;
   1952  1.5  christos 	}
   1953  1.1  christos 
   1954  1.1  christos 	result = ISC_R_SUCCESS;
   1955  1.1  christos 
   1956  1.5  christos cleanup_iterator:
   1957  1.1  christos 	dns_rdatasetiter_destroy(&rdsiter);
   1958  1.1  christos 
   1959  1.5  christos cleanup_node:
   1960  1.1  christos 	dns_db_detachnode(db, &node);
   1961  1.1  christos 
   1962  1.1  christos 	return (result);
   1963  1.1  christos }
   1964  1.1  christos 
   1965  1.1  christos /*
   1966  1.1  christos  * Comparison function for use by dns_diff_subtract when sorting
   1967  1.1  christos  * the diffs to be subtracted.  The sort keys are the rdata type
   1968  1.1  christos  * and the rdata itself.  The owner name is ignored, because
   1969  1.1  christos  * it is known to be the same for all tuples.
   1970  1.1  christos  */
   1971  1.1  christos static int
   1972  1.1  christos rdata_order(const void *av, const void *bv) {
   1973  1.5  christos 	dns_difftuple_t const *const *ap = av;
   1974  1.5  christos 	dns_difftuple_t const *const *bp = bv;
   1975  1.1  christos 	dns_difftuple_t const *a = *ap;
   1976  1.1  christos 	dns_difftuple_t const *b = *bp;
   1977  1.1  christos 	int r;
   1978  1.1  christos 	r = (b->rdata.type - a->rdata.type);
   1979  1.5  christos 	if (r != 0) {
   1980  1.1  christos 		return (r);
   1981  1.5  christos 	}
   1982  1.1  christos 	r = dns_rdata_compare(&a->rdata, &b->rdata);
   1983  1.1  christos 	return (r);
   1984  1.1  christos }
   1985  1.1  christos 
   1986  1.1  christos static isc_result_t
   1987  1.1  christos dns_diff_subtract(dns_diff_t diff[2], dns_diff_t *r) {
   1988  1.1  christos 	isc_result_t result;
   1989  1.1  christos 	dns_difftuple_t *p[2];
   1990  1.1  christos 	int i, t;
   1991  1.3  christos 	bool append;
   1992  1.1  christos 
   1993  1.1  christos 	CHECK(dns_diff_sort(&diff[0], rdata_order));
   1994  1.1  christos 	CHECK(dns_diff_sort(&diff[1], rdata_order));
   1995  1.1  christos 
   1996  1.1  christos 	for (;;) {
   1997  1.1  christos 		p[0] = ISC_LIST_HEAD(diff[0].tuples);
   1998  1.1  christos 		p[1] = ISC_LIST_HEAD(diff[1].tuples);
   1999  1.5  christos 		if (p[0] == NULL && p[1] == NULL) {
   2000  1.1  christos 			break;
   2001  1.5  christos 		}
   2002  1.1  christos 
   2003  1.5  christos 		for (i = 0; i < 2; i++) {
   2004  1.1  christos 			if (p[!i] == NULL) {
   2005  1.5  christos 				{
   2006  1.5  christos 					ISC_LIST_UNLINK(diff[i].tuples, p[i],
   2007  1.5  christos 							link);
   2008  1.5  christos 					ISC_LIST_APPEND(r->tuples, p[i], link);
   2009  1.5  christos 					goto next;
   2010  1.5  christos 				}
   2011  1.1  christos 			}
   2012  1.5  christos 		}
   2013  1.1  christos 		t = rdata_order(&p[0], &p[1]);
   2014  1.1  christos 		if (t < 0) {
   2015  1.1  christos 			ISC_LIST_UNLINK(diff[0].tuples, p[0], link);
   2016  1.1  christos 			ISC_LIST_APPEND(r->tuples, p[0], link);
   2017  1.1  christos 			goto next;
   2018  1.1  christos 		}
   2019  1.1  christos 		if (t > 0) {
   2020  1.1  christos 			ISC_LIST_UNLINK(diff[1].tuples, p[1], link);
   2021  1.1  christos 			ISC_LIST_APPEND(r->tuples, p[1], link);
   2022  1.1  christos 			goto next;
   2023  1.1  christos 		}
   2024  1.1  christos 		INSIST(t == 0);
   2025  1.1  christos 		/*
   2026  1.1  christos 		 * Identical RRs in both databases; skip them both
   2027  1.1  christos 		 * if the ttl differs.
   2028  1.1  christos 		 */
   2029  1.3  christos 		append = (p[0]->ttl != p[1]->ttl);
   2030  1.1  christos 		for (i = 0; i < 2; i++) {
   2031  1.1  christos 			ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
   2032  1.1  christos 			if (append) {
   2033  1.1  christos 				ISC_LIST_APPEND(r->tuples, p[i], link);
   2034  1.1  christos 			} else {
   2035  1.1  christos 				dns_difftuple_free(&p[i]);
   2036  1.1  christos 			}
   2037  1.1  christos 		}
   2038  1.5  christos 	next:;
   2039  1.1  christos 	}
   2040  1.1  christos 	result = ISC_R_SUCCESS;
   2041  1.5  christos failure:
   2042  1.1  christos 	return (result);
   2043  1.1  christos }
   2044  1.1  christos 
   2045  1.1  christos static isc_result_t
   2046  1.5  christos diff_namespace(dns_db_t *dba, dns_dbversion_t *dbvera, dns_db_t *dbb,
   2047  1.5  christos 	       dns_dbversion_t *dbverb, unsigned int options,
   2048  1.5  christos 	       dns_diff_t *resultdiff) {
   2049  1.1  christos 	dns_db_t *db[2];
   2050  1.1  christos 	dns_dbversion_t *ver[2];
   2051  1.1  christos 	dns_dbiterator_t *dbit[2] = { NULL, NULL };
   2052  1.3  christos 	bool have[2] = { false, false };
   2053  1.1  christos 	dns_fixedname_t fixname[2];
   2054  1.1  christos 	isc_result_t result, itresult[2];
   2055  1.1  christos 	dns_diff_t diff[2];
   2056  1.1  christos 	int i, t;
   2057  1.1  christos 
   2058  1.1  christos 	db[0] = dba, db[1] = dbb;
   2059  1.1  christos 	ver[0] = dbvera, ver[1] = dbverb;
   2060  1.1  christos 
   2061  1.1  christos 	dns_diff_init(resultdiff->mctx, &diff[0]);
   2062  1.1  christos 	dns_diff_init(resultdiff->mctx, &diff[1]);
   2063  1.1  christos 
   2064  1.1  christos 	dns_fixedname_init(&fixname[0]);
   2065  1.1  christos 	dns_fixedname_init(&fixname[1]);
   2066  1.1  christos 
   2067  1.1  christos 	result = dns_db_createiterator(db[0], options, &dbit[0]);
   2068  1.5  christos 	if (result != ISC_R_SUCCESS) {
   2069  1.1  christos 		return (result);
   2070  1.5  christos 	}
   2071  1.1  christos 	result = dns_db_createiterator(db[1], options, &dbit[1]);
   2072  1.5  christos 	if (result != ISC_R_SUCCESS) {
   2073  1.1  christos 		goto cleanup_iterator;
   2074  1.5  christos 	}
   2075  1.1  christos 
   2076  1.1  christos 	itresult[0] = dns_dbiterator_first(dbit[0]);
   2077  1.1  christos 	itresult[1] = dns_dbiterator_first(dbit[1]);
   2078  1.1  christos 
   2079  1.1  christos 	for (;;) {
   2080  1.1  christos 		for (i = 0; i < 2; i++) {
   2081  1.5  christos 			if (!have[i] && itresult[i] == ISC_R_SUCCESS) {
   2082  1.5  christos 				CHECK(get_name_diff(
   2083  1.5  christos 					db[i], ver[i], 0, dbit[i],
   2084  1.5  christos 					dns_fixedname_name(&fixname[i]),
   2085  1.5  christos 					i == 0 ? DNS_DIFFOP_ADD
   2086  1.5  christos 					       : DNS_DIFFOP_DEL,
   2087  1.5  christos 					&diff[i]));
   2088  1.1  christos 				itresult[i] = dns_dbiterator_next(dbit[i]);
   2089  1.3  christos 				have[i] = true;
   2090  1.1  christos 			}
   2091  1.1  christos 		}
   2092  1.1  christos 
   2093  1.5  christos 		if (!have[0] && !have[1]) {
   2094  1.1  christos 			INSIST(ISC_LIST_EMPTY(diff[0].tuples));
   2095  1.1  christos 			INSIST(ISC_LIST_EMPTY(diff[1].tuples));
   2096  1.1  christos 			break;
   2097  1.1  christos 		}
   2098  1.1  christos 
   2099  1.1  christos 		for (i = 0; i < 2; i++) {
   2100  1.5  christos 			if (!have[!i]) {
   2101  1.1  christos 				ISC_LIST_APPENDLIST(resultdiff->tuples,
   2102  1.1  christos 						    diff[i].tuples, link);
   2103  1.1  christos 				INSIST(ISC_LIST_EMPTY(diff[i].tuples));
   2104  1.3  christos 				have[i] = false;
   2105  1.1  christos 				goto next;
   2106  1.1  christos 			}
   2107  1.1  christos 		}
   2108  1.1  christos 
   2109  1.1  christos 		t = dns_name_compare(dns_fixedname_name(&fixname[0]),
   2110  1.1  christos 				     dns_fixedname_name(&fixname[1]));
   2111  1.1  christos 		if (t < 0) {
   2112  1.5  christos 			ISC_LIST_APPENDLIST(resultdiff->tuples, diff[0].tuples,
   2113  1.5  christos 					    link);
   2114  1.1  christos 			INSIST(ISC_LIST_EMPTY(diff[0].tuples));
   2115  1.3  christos 			have[0] = false;
   2116  1.1  christos 			continue;
   2117  1.1  christos 		}
   2118  1.1  christos 		if (t > 0) {
   2119  1.5  christos 			ISC_LIST_APPENDLIST(resultdiff->tuples, diff[1].tuples,
   2120  1.5  christos 					    link);
   2121  1.1  christos 			INSIST(ISC_LIST_EMPTY(diff[1].tuples));
   2122  1.3  christos 			have[1] = false;
   2123  1.1  christos 			continue;
   2124  1.1  christos 		}
   2125  1.1  christos 		INSIST(t == 0);
   2126  1.1  christos 		CHECK(dns_diff_subtract(diff, resultdiff));
   2127  1.1  christos 		INSIST(ISC_LIST_EMPTY(diff[0].tuples));
   2128  1.1  christos 		INSIST(ISC_LIST_EMPTY(diff[1].tuples));
   2129  1.3  christos 		have[0] = have[1] = false;
   2130  1.5  christos 	next:;
   2131  1.1  christos 	}
   2132  1.5  christos 	if (itresult[0] != ISC_R_NOMORE) {
   2133  1.1  christos 		FAIL(itresult[0]);
   2134  1.5  christos 	}
   2135  1.5  christos 	if (itresult[1] != ISC_R_NOMORE) {
   2136  1.1  christos 		FAIL(itresult[1]);
   2137  1.5  christos 	}
   2138  1.1  christos 
   2139  1.1  christos 	INSIST(ISC_LIST_EMPTY(diff[0].tuples));
   2140  1.1  christos 	INSIST(ISC_LIST_EMPTY(diff[1].tuples));
   2141  1.1  christos 
   2142  1.5  christos failure:
   2143  1.1  christos 	dns_dbiterator_destroy(&dbit[1]);
   2144  1.1  christos 
   2145  1.5  christos cleanup_iterator:
   2146  1.1  christos 	dns_dbiterator_destroy(&dbit[0]);
   2147  1.1  christos 	dns_diff_clear(&diff[0]);
   2148  1.1  christos 	dns_diff_clear(&diff[1]);
   2149  1.1  christos 	return (result);
   2150  1.1  christos }
   2151  1.1  christos 
   2152  1.1  christos /*
   2153  1.1  christos  * Compare the databases 'dba' and 'dbb' and generate a journal
   2154  1.1  christos  * entry containing the changes to make 'dba' from 'dbb' (note
   2155  1.1  christos  * the order).  This journal entry will consist of a single,
   2156  1.1  christos  * possibly very large transaction.
   2157  1.1  christos  */
   2158  1.1  christos isc_result_t
   2159  1.1  christos dns_db_diff(isc_mem_t *mctx, dns_db_t *dba, dns_dbversion_t *dbvera,
   2160  1.5  christos 	    dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename) {
   2161  1.1  christos 	isc_result_t result;
   2162  1.1  christos 	dns_diff_t diff;
   2163  1.1  christos 
   2164  1.1  christos 	dns_diff_init(mctx, &diff);
   2165  1.1  christos 
   2166  1.1  christos 	result = dns_db_diffx(&diff, dba, dbvera, dbb, dbverb, filename);
   2167  1.1  christos 
   2168  1.1  christos 	dns_diff_clear(&diff);
   2169  1.1  christos 
   2170  1.1  christos 	return (result);
   2171  1.1  christos }
   2172  1.1  christos 
   2173  1.1  christos isc_result_t
   2174  1.1  christos dns_db_diffx(dns_diff_t *diff, dns_db_t *dba, dns_dbversion_t *dbvera,
   2175  1.5  christos 	     dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename) {
   2176  1.1  christos 	isc_result_t result;
   2177  1.1  christos 	dns_journal_t *journal = NULL;
   2178  1.1  christos 
   2179  1.1  christos 	if (filename != NULL) {
   2180  1.1  christos 		result = dns_journal_open(diff->mctx, filename,
   2181  1.1  christos 					  DNS_JOURNAL_CREATE, &journal);
   2182  1.5  christos 		if (result != ISC_R_SUCCESS) {
   2183  1.1  christos 			return (result);
   2184  1.5  christos 		}
   2185  1.1  christos 	}
   2186  1.1  christos 
   2187  1.1  christos 	CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NONSEC3, diff));
   2188  1.1  christos 	CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NSEC3ONLY, diff));
   2189  1.1  christos 
   2190  1.1  christos 	if (journal != NULL) {
   2191  1.5  christos 		if (ISC_LIST_EMPTY(diff->tuples)) {
   2192  1.1  christos 			isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes");
   2193  1.5  christos 		} else {
   2194  1.1  christos 			CHECK(dns_journal_write_transaction(journal, diff));
   2195  1.5  christos 		}
   2196  1.1  christos 	}
   2197  1.1  christos 
   2198  1.5  christos failure:
   2199  1.5  christos 	if (journal != NULL) {
   2200  1.1  christos 		dns_journal_destroy(&journal);
   2201  1.5  christos 	}
   2202  1.1  christos 	return (result);
   2203  1.1  christos }
   2204  1.1  christos 
   2205  1.1  christos isc_result_t
   2206  1.3  christos dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial,
   2207  1.5  christos 		    uint32_t target_size) {
   2208  1.1  christos 	unsigned int i;
   2209  1.1  christos 	journal_pos_t best_guess;
   2210  1.1  christos 	journal_pos_t current_pos;
   2211  1.1  christos 	dns_journal_t *j1 = NULL;
   2212  1.1  christos 	dns_journal_t *j2 = NULL;
   2213  1.1  christos 	journal_rawheader_t rawheader;
   2214  1.1  christos 	unsigned int copy_length;
   2215  1.1  christos 	size_t namelen;
   2216  1.1  christos 	char *buf = NULL;
   2217  1.1  christos 	unsigned int size = 0;
   2218  1.1  christos 	isc_result_t result;
   2219  1.1  christos 	unsigned int indexend;
   2220  1.3  christos 	char newname[PATH_MAX];
   2221  1.3  christos 	char backup[PATH_MAX];
   2222  1.3  christos 	bool is_backup = false;
   2223  1.1  christos 
   2224  1.1  christos 	REQUIRE(filename != NULL);
   2225  1.1  christos 
   2226  1.1  christos 	namelen = strlen(filename);
   2227  1.3  christos 	if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0) {
   2228  1.1  christos 		namelen -= 4;
   2229  1.3  christos 	}
   2230  1.1  christos 
   2231  1.5  christos 	result = snprintf(newname, sizeof(newname), "%.*s.jnw", (int)namelen,
   2232  1.5  christos 			  filename);
   2233  1.3  christos 	RUNTIME_CHECK(result < sizeof(newname));
   2234  1.3  christos 
   2235  1.5  christos 	result = snprintf(backup, sizeof(backup), "%.*s.jbk", (int)namelen,
   2236  1.5  christos 			  filename);
   2237  1.3  christos 	RUNTIME_CHECK(result < sizeof(backup));
   2238  1.1  christos 
   2239  1.3  christos 	result = journal_open(mctx, filename, false, false, &j1);
   2240  1.1  christos 	if (result == ISC_R_NOTFOUND) {
   2241  1.3  christos 		is_backup = true;
   2242  1.3  christos 		result = journal_open(mctx, backup, false, false, &j1);
   2243  1.1  christos 	}
   2244  1.5  christos 	if (result != ISC_R_SUCCESS) {
   2245  1.1  christos 		return (result);
   2246  1.5  christos 	}
   2247  1.1  christos 
   2248  1.1  christos 	if (JOURNAL_EMPTY(&j1->header)) {
   2249  1.1  christos 		dns_journal_destroy(&j1);
   2250  1.1  christos 		return (ISC_R_SUCCESS);
   2251  1.1  christos 	}
   2252  1.1  christos 
   2253  1.1  christos 	if (DNS_SERIAL_GT(j1->header.begin.serial, serial) ||
   2254  1.5  christos 	    DNS_SERIAL_GT(serial, j1->header.end.serial))
   2255  1.5  christos 	{
   2256  1.1  christos 		dns_journal_destroy(&j1);
   2257  1.1  christos 		return (ISC_R_RANGE);
   2258  1.1  christos 	}
   2259  1.1  christos 
   2260  1.1  christos 	/*
   2261  1.1  christos 	 * Cope with very small target sizes.
   2262  1.1  christos 	 */
   2263  1.1  christos 	indexend = sizeof(journal_rawheader_t) +
   2264  1.1  christos 		   j1->header.index_size * sizeof(journal_rawpos_t);
   2265  1.5  christos 	if (target_size < DNS_JOURNAL_SIZE_MIN) {
   2266  1.1  christos 		target_size = DNS_JOURNAL_SIZE_MIN;
   2267  1.5  christos 	}
   2268  1.5  christos 	if (target_size < indexend * 2) {
   2269  1.5  christos 		target_size = target_size / 2 + indexend;
   2270  1.5  christos 	}
   2271  1.1  christos 
   2272  1.1  christos 	/*
   2273  1.1  christos 	 * See if there is any work to do.
   2274  1.1  christos 	 */
   2275  1.5  christos 	if ((uint32_t)j1->header.end.offset < target_size) {
   2276  1.1  christos 		dns_journal_destroy(&j1);
   2277  1.1  christos 		return (ISC_R_SUCCESS);
   2278  1.1  christos 	}
   2279  1.1  christos 
   2280  1.3  christos 	CHECK(journal_open(mctx, newname, true, true, &j2));
   2281  1.1  christos 
   2282  1.1  christos 	/*
   2283  1.1  christos 	 * Remove overhead so space test below can succeed.
   2284  1.1  christos 	 */
   2285  1.5  christos 	if (target_size >= indexend) {
   2286  1.1  christos 		target_size -= indexend;
   2287  1.5  christos 	}
   2288  1.1  christos 
   2289  1.1  christos 	/*
   2290  1.1  christos 	 * Find if we can create enough free space.
   2291  1.1  christos 	 */
   2292  1.1  christos 	best_guess = j1->header.begin;
   2293  1.1  christos 	for (i = 0; i < j1->header.index_size; i++) {
   2294  1.1  christos 		if (POS_VALID(j1->index[i]) &&
   2295  1.1  christos 		    DNS_SERIAL_GE(serial, j1->index[i].serial) &&
   2296  1.5  christos 		    ((uint32_t)(j1->header.end.offset - j1->index[i].offset) >=
   2297  1.5  christos 		     target_size / 2) &&
   2298  1.1  christos 		    j1->index[i].offset > best_guess.offset)
   2299  1.5  christos 		{
   2300  1.1  christos 			best_guess = j1->index[i];
   2301  1.5  christos 		}
   2302  1.1  christos 	}
   2303  1.1  christos 
   2304  1.1  christos 	current_pos = best_guess;
   2305  1.1  christos 	while (current_pos.serial != serial) {
   2306  1.1  christos 		CHECK(journal_next(j1, &current_pos));
   2307  1.5  christos 		if (current_pos.serial == j1->header.end.serial) {
   2308  1.1  christos 			break;
   2309  1.5  christos 		}
   2310  1.1  christos 
   2311  1.1  christos 		if (DNS_SERIAL_GE(serial, current_pos.serial) &&
   2312  1.5  christos 		    ((uint32_t)(j1->header.end.offset - current_pos.offset) >=
   2313  1.5  christos 		     (target_size / 2)) &&
   2314  1.1  christos 		    current_pos.offset > best_guess.offset)
   2315  1.5  christos 		{
   2316  1.1  christos 			best_guess = current_pos;
   2317  1.5  christos 		} else {
   2318  1.1  christos 			break;
   2319  1.5  christos 		}
   2320  1.1  christos 	}
   2321  1.1  christos 
   2322  1.1  christos 	INSIST(best_guess.serial != j1->header.end.serial);
   2323  1.5  christos 	if (best_guess.serial != serial) {
   2324  1.1  christos 		CHECK(journal_next(j1, &best_guess));
   2325  1.5  christos 	}
   2326  1.1  christos 
   2327  1.1  christos 	/*
   2328  1.1  christos 	 * We should now be roughly half target_size provided
   2329  1.1  christos 	 * we did not reach 'serial'.  If not we will just copy
   2330  1.1  christos 	 * all uncommitted deltas regardless of the size.
   2331  1.1  christos 	 */
   2332  1.1  christos 	copy_length = j1->header.end.offset - best_guess.offset;
   2333  1.1  christos 
   2334  1.1  christos 	if (copy_length != 0) {
   2335  1.1  christos 		/*
   2336  1.1  christos 		 * Copy best_guess to end into space just freed.
   2337  1.1  christos 		 */
   2338  1.5  christos 		size = 64 * 1024;
   2339  1.5  christos 		if (copy_length < size) {
   2340  1.1  christos 			size = copy_length;
   2341  1.5  christos 		}
   2342  1.1  christos 		buf = isc_mem_get(mctx, size);
   2343  1.1  christos 
   2344  1.1  christos 		CHECK(journal_seek(j1, best_guess.offset));
   2345  1.1  christos 		CHECK(journal_seek(j2, indexend));
   2346  1.1  christos 		for (i = 0; i < copy_length; i += size) {
   2347  1.5  christos 			unsigned int len = (copy_length - i) > size
   2348  1.5  christos 						   ? size
   2349  1.5  christos 						   : (copy_length - i);
   2350  1.1  christos 			CHECK(journal_read(j1, buf, len));
   2351  1.1  christos 			CHECK(journal_write(j2, buf, len));
   2352  1.1  christos 		}
   2353  1.1  christos 
   2354  1.1  christos 		CHECK(journal_fsync(j2));
   2355  1.1  christos 
   2356  1.1  christos 		/*
   2357  1.1  christos 		 * Compute new header.
   2358  1.1  christos 		 */
   2359  1.1  christos 		j2->header.begin.serial = best_guess.serial;
   2360  1.1  christos 		j2->header.begin.offset = indexend;
   2361  1.1  christos 		j2->header.end.serial = j1->header.end.serial;
   2362  1.1  christos 		j2->header.end.offset = indexend + copy_length;
   2363  1.1  christos 		j2->header.sourceserial = j1->header.sourceserial;
   2364  1.1  christos 		j2->header.serialset = j1->header.serialset;
   2365  1.1  christos 
   2366  1.1  christos 		/*
   2367  1.1  christos 		 * Update the journal header.
   2368  1.1  christos 		 */
   2369  1.1  christos 		journal_header_encode(&j2->header, &rawheader);
   2370  1.1  christos 		CHECK(journal_seek(j2, 0));
   2371  1.1  christos 		CHECK(journal_write(j2, &rawheader, sizeof(rawheader)));
   2372  1.1  christos 		CHECK(journal_fsync(j2));
   2373  1.1  christos 
   2374  1.1  christos 		/*
   2375  1.1  christos 		 * Build new index.
   2376  1.1  christos 		 */
   2377  1.1  christos 		current_pos = j2->header.begin;
   2378  1.1  christos 		while (current_pos.serial != j2->header.end.serial) {
   2379  1.1  christos 			index_add(j2, &current_pos);
   2380  1.1  christos 			CHECK(journal_next(j2, &current_pos));
   2381  1.1  christos 		}
   2382  1.1  christos 
   2383  1.1  christos 		/*
   2384  1.1  christos 		 * Write index.
   2385  1.1  christos 		 */
   2386  1.1  christos 		CHECK(index_to_disk(j2));
   2387  1.1  christos 		CHECK(journal_fsync(j2));
   2388  1.1  christos 
   2389  1.1  christos 		indexend = j2->header.end.offset;
   2390  1.1  christos 		POST(indexend);
   2391  1.1  christos 	}
   2392  1.1  christos 
   2393  1.1  christos 	/*
   2394  1.1  christos 	 * Close both journals before trying to rename files (this is
   2395  1.1  christos 	 * necessary on WIN32).
   2396  1.1  christos 	 */
   2397  1.1  christos 	dns_journal_destroy(&j1);
   2398  1.1  christos 	dns_journal_destroy(&j2);
   2399  1.1  christos 
   2400  1.1  christos 	/*
   2401  1.1  christos 	 * With a UFS file system this should just succeed and be atomic.
   2402  1.1  christos 	 * Any IXFR outs will just continue and the old journal will be
   2403  1.1  christos 	 * removed on final close.
   2404  1.1  christos 	 *
   2405  1.1  christos 	 * With MSDOS / NTFS we need to do a two stage rename, triggered
   2406  1.1  christos 	 * by EEXIST.  (If any IXFR's are running in other threads, however,
   2407  1.1  christos 	 * this will fail, and the journal will not be compacted.  But
   2408  1.1  christos 	 * if so, hopefully they'll be finished by the next time we
   2409  1.1  christos 	 * compact.)
   2410  1.1  christos 	 */
   2411  1.1  christos 	if (rename(newname, filename) == -1) {
   2412  1.1  christos 		if (errno == EEXIST && !is_backup) {
   2413  1.1  christos 			result = isc_file_remove(backup);
   2414  1.1  christos 			if (result != ISC_R_SUCCESS &&
   2415  1.5  christos 			    result != ISC_R_FILENOTFOUND) {
   2416  1.1  christos 				goto failure;
   2417  1.5  christos 			}
   2418  1.5  christos 			if (rename(filename, backup) == -1) {
   2419  1.1  christos 				goto maperrno;
   2420  1.5  christos 			}
   2421  1.5  christos 			if (rename(newname, filename) == -1) {
   2422  1.1  christos 				goto maperrno;
   2423  1.5  christos 			}
   2424  1.1  christos 			(void)isc_file_remove(backup);
   2425  1.1  christos 		} else {
   2426  1.5  christos 		maperrno:
   2427  1.1  christos 			result = ISC_R_FAILURE;
   2428  1.1  christos 			goto failure;
   2429  1.1  christos 		}
   2430  1.1  christos 	}
   2431  1.1  christos 
   2432  1.1  christos 	result = ISC_R_SUCCESS;
   2433  1.1  christos 
   2434  1.5  christos failure:
   2435  1.1  christos 	(void)isc_file_remove(newname);
   2436  1.5  christos 	if (buf != NULL) {
   2437  1.1  christos 		isc_mem_put(mctx, buf, size);
   2438  1.5  christos 	}
   2439  1.5  christos 	if (j1 != NULL) {
   2440  1.1  christos 		dns_journal_destroy(&j1);
   2441  1.5  christos 	}
   2442  1.5  christos 	if (j2 != NULL) {
   2443  1.1  christos 		dns_journal_destroy(&j2);
   2444  1.5  christos 	}
   2445  1.1  christos 	return (result);
   2446  1.1  christos }
   2447  1.1  christos 
   2448  1.1  christos static isc_result_t
   2449  1.1  christos index_to_disk(dns_journal_t *j) {
   2450  1.1  christos 	isc_result_t result = ISC_R_SUCCESS;
   2451  1.1  christos 
   2452  1.1  christos 	if (j->header.index_size != 0) {
   2453  1.1  christos 		unsigned int i;
   2454  1.1  christos 		unsigned char *p;
   2455  1.1  christos 		unsigned int rawbytes;
   2456  1.1  christos 
   2457  1.1  christos 		rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
   2458  1.1  christos 
   2459  1.1  christos 		p = j->rawindex;
   2460  1.1  christos 		for (i = 0; i < j->header.index_size; i++) {
   2461  1.1  christos 			encode_uint32(j->index[i].serial, p);
   2462  1.1  christos 			p += 4;
   2463  1.1  christos 			encode_uint32(j->index[i].offset, p);
   2464  1.1  christos 			p += 4;
   2465  1.1  christos 		}
   2466  1.1  christos 		INSIST(p == j->rawindex + rawbytes);
   2467  1.1  christos 
   2468  1.1  christos 		CHECK(journal_seek(j, sizeof(journal_rawheader_t)));
   2469  1.1  christos 		CHECK(journal_write(j, j->rawindex, rawbytes));
   2470  1.1  christos 	}
   2471  1.1  christos failure:
   2472  1.1  christos 	return (result);
   2473  1.1  christos }
   2474