Home | History | Annotate | Line # | Download | only in dns
      1  1.1  christos /*	$NetBSD: journal.c,v 1.1 2024/02/18 20:57:32 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /*
      4  1.1  christos  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  1.1  christos  *
      6  1.1  christos  * SPDX-License-Identifier: MPL-2.0
      7  1.1  christos  *
      8  1.1  christos  * This Source Code Form is subject to the terms of the Mozilla Public
      9  1.1  christos  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  1.1  christos  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  1.1  christos  *
     12  1.1  christos  * See the COPYRIGHT file distributed with this work for additional
     13  1.1  christos  * information regarding copyright ownership.
     14  1.1  christos  */
     15  1.1  christos 
     16  1.1  christos #include <errno.h>
     17  1.1  christos #include <inttypes.h>
     18  1.1  christos #include <stdbool.h>
     19  1.1  christos #include <stdlib.h>
     20  1.1  christos #include <unistd.h>
     21  1.1  christos 
     22  1.1  christos #include <isc/file.h>
     23  1.1  christos #include <isc/mem.h>
     24  1.1  christos #include <isc/print.h>
     25  1.1  christos #include <isc/serial.h>
     26  1.1  christos #include <isc/stdio.h>
     27  1.1  christos #include <isc/string.h>
     28  1.1  christos #include <isc/util.h>
     29  1.1  christos 
     30  1.1  christos #include <dns/compress.h>
     31  1.1  christos #include <dns/db.h>
     32  1.1  christos #include <dns/dbiterator.h>
     33  1.1  christos #include <dns/diff.h>
     34  1.1  christos #include <dns/fixedname.h>
     35  1.1  christos #include <dns/journal.h>
     36  1.1  christos #include <dns/log.h>
     37  1.1  christos #include <dns/rdataset.h>
     38  1.1  christos #include <dns/rdatasetiter.h>
     39  1.1  christos #include <dns/result.h>
     40  1.1  christos #include <dns/soa.h>
     41  1.1  christos 
     42  1.1  christos /*! \file
     43  1.1  christos  * \brief Journaling.
     44  1.1  christos  *
     45  1.1  christos  * A journal file consists of
     46  1.1  christos  *
     47  1.1  christos  *   \li A fixed-size header of type journal_rawheader_t.
     48  1.1  christos  *
     49  1.1  christos  *   \li The index.  This is an unordered array of index entries
     50  1.1  christos  *     of type journal_rawpos_t giving the locations
     51  1.1  christos  *     of some arbitrary subset of the journal's addressable
     52  1.1  christos  *     transactions.  The index entries are used as hints to
     53  1.1  christos  *     speed up the process of locating a transaction with a given
     54  1.1  christos  *     serial number.  Unused index entries have an "offset"
     55  1.1  christos  *     field of zero.  The size of the index can vary between
     56  1.1  christos  *     journal files, but does not change during the lifetime
     57  1.1  christos  *     of a file.  The size can be zero.
     58  1.1  christos  *
     59  1.1  christos  *   \li The journal data.  This  consists of one or more transactions.
     60  1.1  christos  *     Each transaction begins with a transaction header of type
     61  1.1  christos  *     journal_rawxhdr_t.  The transaction header is followed by a
     62  1.1  christos  *     sequence of RRs, similar in structure to an IXFR difference
     63  1.1  christos  *     sequence (RFC1995).  That is, the pre-transaction SOA,
     64  1.1  christos  *     zero or more other deleted RRs, the post-transaction SOA,
     65  1.1  christos  *     and zero or more other added RRs.  Unlike in IXFR, each RR
     66  1.1  christos  *     is prefixed with a 32-bit length.
     67  1.1  christos  *
     68  1.1  christos  *     The journal data part grows as new transactions are
     69  1.1  christos  *     appended to the file.  Only those transactions
     70  1.1  christos  *     whose serial number is current-(2^31-1) to current
     71  1.1  christos  *     are considered "addressable" and may be pointed
     72  1.1  christos  *     to from the header or index.  They may be preceded
     73  1.1  christos  *     by old transactions that are no longer addressable,
     74  1.1  christos  *     and they may be followed by transactions that were
     75  1.1  christos  *     appended to the journal but never committed by updating
     76  1.1  christos  *     the "end" position in the header.  The latter will
     77  1.1  christos  *     be overwritten when new transactions are added.
     78  1.1  christos  */
     79  1.1  christos 
     80  1.1  christos /**************************************************************************/
     81  1.1  christos /*
     82  1.1  christos  * Miscellaneous utilities.
     83  1.1  christos  */
     84  1.1  christos 
     85  1.1  christos #define JOURNAL_COMMON_LOGARGS \
     86  1.1  christos 	dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL
     87  1.1  christos 
     88  1.1  christos #define JOURNAL_DEBUG_LOGARGS(n) JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n)
     89  1.1  christos 
     90  1.1  christos /*%
     91  1.1  christos  * It would be non-sensical (or at least obtuse) to use FAIL() with an
     92  1.1  christos  * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler
     93  1.1  christos  * from complaining about "end-of-loop code not reached".
     94  1.1  christos  */
     95  1.1  christos #define FAIL(code)                           \
     96  1.1  christos 	do {                                 \
     97  1.1  christos 		result = (code);             \
     98  1.1  christos 		if (result != ISC_R_SUCCESS) \
     99  1.1  christos 			goto failure;        \
    100  1.1  christos 	} while (0)
    101  1.1  christos 
    102  1.1  christos #define CHECK(op)                            \
    103  1.1  christos 	do {                                 \
    104  1.1  christos 		result = (op);               \
    105  1.1  christos 		if (result != ISC_R_SUCCESS) \
    106  1.1  christos 			goto failure;        \
    107  1.1  christos 	} while (0)
    108  1.1  christos 
    109  1.1  christos #define JOURNAL_SERIALSET 0x01U
    110  1.1  christos 
    111  1.1  christos static isc_result_t
    112  1.1  christos index_to_disk(dns_journal_t *);
    113  1.1  christos 
    114  1.1  christos static uint32_t
    115  1.1  christos decode_uint32(unsigned char *p) {
    116  1.1  christos 	return (((uint32_t)p[0] << 24) + ((uint32_t)p[1] << 16) +
    117  1.1  christos 		((uint32_t)p[2] << 8) + ((uint32_t)p[3] << 0));
    118  1.1  christos }
    119  1.1  christos 
    120  1.1  christos static void
    121  1.1  christos encode_uint32(uint32_t val, unsigned char *p) {
    122  1.1  christos 	p[0] = (uint8_t)(val >> 24);
    123  1.1  christos 	p[1] = (uint8_t)(val >> 16);
    124  1.1  christos 	p[2] = (uint8_t)(val >> 8);
    125  1.1  christos 	p[3] = (uint8_t)(val >> 0);
    126  1.1  christos }
    127  1.1  christos 
    128  1.1  christos isc_result_t
    129  1.1  christos dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx,
    130  1.1  christos 		      dns_diffop_t op, dns_difftuple_t **tp) {
    131  1.1  christos 	isc_result_t result;
    132  1.1  christos 	dns_dbnode_t *node;
    133  1.1  christos 	dns_rdataset_t rdataset;
    134  1.1  christos 	dns_rdata_t rdata = DNS_RDATA_INIT;
    135  1.1  christos 	dns_fixedname_t fixed;
    136  1.1  christos 	dns_name_t *zonename;
    137  1.1  christos 
    138  1.1  christos 	zonename = dns_fixedname_initname(&fixed);
    139  1.1  christos 	dns_name_copynf(dns_db_origin(db), zonename);
    140  1.1  christos 
    141  1.1  christos 	node = NULL;
    142  1.1  christos 	result = dns_db_findnode(db, zonename, false, &node);
    143  1.1  christos 	if (result != ISC_R_SUCCESS) {
    144  1.1  christos 		goto nonode;
    145  1.1  christos 	}
    146  1.1  christos 
    147  1.1  christos 	dns_rdataset_init(&rdataset);
    148  1.1  christos 	result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0,
    149  1.1  christos 				     (isc_stdtime_t)0, &rdataset, NULL);
    150  1.1  christos 	if (result != ISC_R_SUCCESS) {
    151  1.1  christos 		goto freenode;
    152  1.1  christos 	}
    153  1.1  christos 
    154  1.1  christos 	result = dns_rdataset_first(&rdataset);
    155  1.1  christos 	if (result != ISC_R_SUCCESS) {
    156  1.1  christos 		goto freenode;
    157  1.1  christos 	}
    158  1.1  christos 
    159  1.1  christos 	dns_rdataset_current(&rdataset, &rdata);
    160  1.1  christos 	dns_rdataset_getownercase(&rdataset, zonename);
    161  1.1  christos 
    162  1.1  christos 	result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl, &rdata,
    163  1.1  christos 				      tp);
    164  1.1  christos 
    165  1.1  christos 	dns_rdataset_disassociate(&rdataset);
    166  1.1  christos 	dns_db_detachnode(db, &node);
    167  1.1  christos 	return (result);
    168  1.1  christos 
    169  1.1  christos freenode:
    170  1.1  christos 	dns_db_detachnode(db, &node);
    171  1.1  christos nonode:
    172  1.1  christos 	UNEXPECTED_ERROR(__FILE__, __LINE__, "missing SOA");
    173  1.1  christos 	return (result);
    174  1.1  christos }
    175  1.1  christos 
    176  1.1  christos /* Journaling */
    177  1.1  christos 
    178  1.1  christos /*%
    179  1.1  christos  * On-disk representation of a "pointer" to a journal entry.
    180  1.1  christos  * These are used in the journal header to locate the beginning
    181  1.1  christos  * and end of the journal, and in the journal index to locate
    182  1.1  christos  * other transactions.
    183  1.1  christos  */
    184  1.1  christos typedef struct {
    185  1.1  christos 	unsigned char serial[4]; /*%< SOA serial before update. */
    186  1.1  christos 	/*
    187  1.1  christos 	 * XXXRTH  Should offset be 8 bytes?
    188  1.1  christos 	 * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs.
    189  1.1  christos 	 * XXXAG  ... but we will not be able to seek >2G anyway on many
    190  1.1  christos 	 *            platforms as long as we are using fseek() rather
    191  1.1  christos 	 *            than lseek().
    192  1.1  christos 	 */
    193  1.1  christos 	unsigned char offset[4]; /*%< Offset from beginning of file. */
    194  1.1  christos } journal_rawpos_t;
    195  1.1  christos 
    196  1.1  christos /*%
    197  1.1  christos  * The header is of a fixed size, with some spare room for future
    198  1.1  christos  * extensions.
    199  1.1  christos  */
    200  1.1  christos #define JOURNAL_HEADER_SIZE 64 /* Bytes. */
    201  1.1  christos 
    202  1.1  christos typedef enum {
    203  1.1  christos 	XHDR_VERSION1 = 1,
    204  1.1  christos 	XHDR_VERSION2 = 2,
    205  1.1  christos } xhdr_version_t;
    206  1.1  christos 
    207  1.1  christos /*%
    208  1.1  christos  * The on-disk representation of the journal header.
    209  1.1  christos  * All numbers are stored in big-endian order.
    210  1.1  christos  */
    211  1.1  christos typedef union {
    212  1.1  christos 	struct {
    213  1.1  christos 		/*% File format version ID. */
    214  1.1  christos 		unsigned char format[16];
    215  1.1  christos 		/*% Position of the first addressable transaction */
    216  1.1  christos 		journal_rawpos_t begin;
    217  1.1  christos 		/*% Position of the next (yet nonexistent) transaction. */
    218  1.1  christos 		journal_rawpos_t end;
    219  1.1  christos 		/*% Number of index entries following the header. */
    220  1.1  christos 		unsigned char index_size[4];
    221  1.1  christos 		/*% Source serial number. */
    222  1.1  christos 		unsigned char sourceserial[4];
    223  1.1  christos 		unsigned char flags;
    224  1.1  christos 	} h;
    225  1.1  christos 	/* Pad the header to a fixed size. */
    226  1.1  christos 	unsigned char pad[JOURNAL_HEADER_SIZE];
    227  1.1  christos } journal_rawheader_t;
    228  1.1  christos 
    229  1.1  christos /*%
    230  1.1  christos  * The on-disk representation of the transaction header, version 2.
    231  1.1  christos  * There is one of these at the beginning of each transaction.
    232  1.1  christos  */
    233  1.1  christos typedef struct {
    234  1.1  christos 	unsigned char size[4];	  /*%< In bytes, excluding header. */
    235  1.1  christos 	unsigned char count[4];	  /*%< Number of records in transaction */
    236  1.1  christos 	unsigned char serial0[4]; /*%< SOA serial before update. */
    237  1.1  christos 	unsigned char serial1[4]; /*%< SOA serial after update. */
    238  1.1  christos } journal_rawxhdr_t;
    239  1.1  christos 
    240  1.1  christos /*%
    241  1.1  christos  * Old-style raw transaction header, version 1, used for backward
    242  1.1  christos  * compatibility mode.
    243  1.1  christos  */
    244  1.1  christos typedef struct {
    245  1.1  christos 	unsigned char size[4];
    246  1.1  christos 	unsigned char serial0[4];
    247  1.1  christos 	unsigned char serial1[4];
    248  1.1  christos } journal_rawxhdr_ver1_t;
    249  1.1  christos 
    250  1.1  christos /*%
    251  1.1  christos  * The on-disk representation of the RR header.
    252  1.1  christos  * There is one of these at the beginning of each RR.
    253  1.1  christos  */
    254  1.1  christos typedef struct {
    255  1.1  christos 	unsigned char size[4]; /*%< In bytes, excluding header. */
    256  1.1  christos } journal_rawrrhdr_t;
    257  1.1  christos 
    258  1.1  christos /*%
    259  1.1  christos  * The in-core representation of the journal header.
    260  1.1  christos  */
    261  1.1  christos typedef struct {
    262  1.1  christos 	uint32_t serial;
    263  1.1  christos 	isc_offset_t offset;
    264  1.1  christos } journal_pos_t;
    265  1.1  christos 
    266  1.1  christos #define POS_VALID(pos)	    ((pos).offset != 0)
    267  1.1  christos #define POS_INVALIDATE(pos) ((pos).offset = 0, (pos).serial = 0)
    268  1.1  christos 
    269  1.1  christos typedef struct {
    270  1.1  christos 	unsigned char format[16];
    271  1.1  christos 	journal_pos_t begin;
    272  1.1  christos 	journal_pos_t end;
    273  1.1  christos 	uint32_t index_size;
    274  1.1  christos 	uint32_t sourceserial;
    275  1.1  christos 	bool serialset;
    276  1.1  christos } journal_header_t;
    277  1.1  christos 
    278  1.1  christos /*%
    279  1.1  christos  * The in-core representation of the transaction header.
    280  1.1  christos  */
    281  1.1  christos typedef struct {
    282  1.1  christos 	uint32_t size;
    283  1.1  christos 	uint32_t count;
    284  1.1  christos 	uint32_t serial0;
    285  1.1  christos 	uint32_t serial1;
    286  1.1  christos } journal_xhdr_t;
    287  1.1  christos 
    288  1.1  christos /*%
    289  1.1  christos  * The in-core representation of the RR header.
    290  1.1  christos  */
    291  1.1  christos typedef struct {
    292  1.1  christos 	uint32_t size;
    293  1.1  christos } journal_rrhdr_t;
    294  1.1  christos 
    295  1.1  christos /*%
    296  1.1  christos  * Initial contents to store in the header of a newly created
    297  1.1  christos  * journal file.
    298  1.1  christos  *
    299  1.1  christos  * The header starts with the magic string ";BIND LOG V9.2\n"
    300  1.1  christos  * to identify the file as a BIND 9 journal file.  An ASCII
    301  1.1  christos  * identification string is used rather than a binary magic
    302  1.1  christos  * number to be consistent with BIND 8 (BIND 8 journal files
    303  1.1  christos  * are ASCII text files).
    304  1.1  christos  */
    305  1.1  christos 
    306  1.1  christos static journal_header_t journal_header_ver1 = {
    307  1.1  christos 	";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0, 0, 0
    308  1.1  christos };
    309  1.1  christos static journal_header_t initial_journal_header = {
    310  1.1  christos 	";BIND LOG V9.2\n", { 0, 0 }, { 0, 0 }, 0, 0, 0
    311  1.1  christos };
    312  1.1  christos 
    313  1.1  christos #define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset)
    314  1.1  christos 
    315  1.1  christos typedef enum {
    316  1.1  christos 	JOURNAL_STATE_INVALID,
    317  1.1  christos 	JOURNAL_STATE_READ,
    318  1.1  christos 	JOURNAL_STATE_WRITE,
    319  1.1  christos 	JOURNAL_STATE_TRANSACTION,
    320  1.1  christos 	JOURNAL_STATE_INLINE
    321  1.1  christos } journal_state_t;
    322  1.1  christos 
    323  1.1  christos struct dns_journal {
    324  1.1  christos 	unsigned int magic; /*%< JOUR */
    325  1.1  christos 	isc_mem_t *mctx;    /*%< Memory context */
    326  1.1  christos 	journal_state_t state;
    327  1.1  christos 	xhdr_version_t xhdr_version; /*%< Expected transaction header version */
    328  1.1  christos 	bool header_ver1;	     /*%< Transaction header compatibility
    329  1.1  christos 				      *   mode is allowed */
    330  1.1  christos 	bool recovered;		     /*%< A recoverable error was found
    331  1.1  christos 				      *   while reading the journal */
    332  1.1  christos 	char *filename;		     /*%< Journal file name */
    333  1.1  christos 	FILE *fp;		     /*%< File handle */
    334  1.1  christos 	isc_offset_t offset;	     /*%< Current file offset */
    335  1.1  christos 	journal_xhdr_t curxhdr;	     /*%< Current transaction header */
    336  1.1  christos 	journal_header_t header;     /*%< In-core journal header */
    337  1.1  christos 	unsigned char *rawindex;     /*%< In-core buffer for journal index
    338  1.1  christos 				      * in on-disk format */
    339  1.1  christos 	journal_pos_t *index;	     /*%< In-core journal index */
    340  1.1  christos 
    341  1.1  christos 	/*% Current transaction state (when writing). */
    342  1.1  christos 	struct {
    343  1.1  christos 		unsigned int n_soa;   /*%< Number of SOAs seen */
    344  1.1  christos 		unsigned int n_rr;    /*%< Number of RRs to write */
    345  1.1  christos 		journal_pos_t pos[2]; /*%< Begin/end position */
    346  1.1  christos 	} x;
    347  1.1  christos 
    348  1.1  christos 	/*% Iteration state (when reading). */
    349  1.1  christos 	struct {
    350  1.1  christos 		/* These define the part of the journal we iterate over. */
    351  1.1  christos 		journal_pos_t bpos; /*%< Position before first, */
    352  1.1  christos 		journal_pos_t cpos; /*%< before current, */
    353  1.1  christos 		journal_pos_t epos; /*%< and after last transaction */
    354  1.1  christos 		/* The rest is iterator state. */
    355  1.1  christos 		uint32_t current_serial; /*%< Current SOA serial */
    356  1.1  christos 		isc_buffer_t source;	 /*%< Data from disk */
    357  1.1  christos 		isc_buffer_t target;	 /*%< Data from _fromwire check */
    358  1.1  christos 		dns_decompress_t dctx;	 /*%< Dummy decompression ctx */
    359  1.1  christos 		dns_name_t name;	 /*%< Current domain name */
    360  1.1  christos 		dns_rdata_t rdata;	 /*%< Current rdata */
    361  1.1  christos 		uint32_t ttl;		 /*%< Current TTL */
    362  1.1  christos 		unsigned int xsize;	 /*%< Size of transaction data */
    363  1.1  christos 		unsigned int xpos;	 /*%< Current position in it */
    364  1.1  christos 		isc_result_t result;	 /*%< Result of last call */
    365  1.1  christos 	} it;
    366  1.1  christos };
    367  1.1  christos 
    368  1.1  christos #define DNS_JOURNAL_MAGIC    ISC_MAGIC('J', 'O', 'U', 'R')
    369  1.1  christos #define DNS_JOURNAL_VALID(t) ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC)
    370  1.1  christos 
    371  1.1  christos static void
    372  1.1  christos journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) {
    373  1.1  christos 	cooked->serial = decode_uint32(raw->serial);
    374  1.1  christos 	cooked->offset = decode_uint32(raw->offset);
    375  1.1  christos }
    376  1.1  christos 
    377  1.1  christos static void
    378  1.1  christos journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) {
    379  1.1  christos 	encode_uint32(cooked->serial, raw->serial);
    380  1.1  christos 	encode_uint32(cooked->offset, raw->offset);
    381  1.1  christos }
    382  1.1  christos 
    383  1.1  christos static void
    384  1.1  christos journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) {
    385  1.1  christos 	INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
    386  1.1  christos 
    387  1.1  christos 	memmove(cooked->format, raw->h.format, sizeof(cooked->format));
    388  1.1  christos 	journal_pos_decode(&raw->h.begin, &cooked->begin);
    389  1.1  christos 	journal_pos_decode(&raw->h.end, &cooked->end);
    390  1.1  christos 	cooked->index_size = decode_uint32(raw->h.index_size);
    391  1.1  christos 	cooked->sourceserial = decode_uint32(raw->h.sourceserial);
    392  1.1  christos 	cooked->serialset = ((raw->h.flags & JOURNAL_SERIALSET) != 0);
    393  1.1  christos }
    394  1.1  christos 
    395  1.1  christos static void
    396  1.1  christos journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) {
    397  1.1  christos 	unsigned char flags = 0;
    398  1.1  christos 
    399  1.1  christos 	INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
    400  1.1  christos 
    401  1.1  christos 	memset(raw->pad, 0, sizeof(raw->pad));
    402  1.1  christos 	memmove(raw->h.format, cooked->format, sizeof(raw->h.format));
    403  1.1  christos 	journal_pos_encode(&raw->h.begin, &cooked->begin);
    404  1.1  christos 	journal_pos_encode(&raw->h.end, &cooked->end);
    405  1.1  christos 	encode_uint32(cooked->index_size, raw->h.index_size);
    406  1.1  christos 	encode_uint32(cooked->sourceserial, raw->h.sourceserial);
    407  1.1  christos 	if (cooked->serialset) {
    408  1.1  christos 		flags |= JOURNAL_SERIALSET;
    409  1.1  christos 	}
    410  1.1  christos 	raw->h.flags = flags;
    411  1.1  christos }
    412  1.1  christos 
    413  1.1  christos /*
    414  1.1  christos  * Journal file I/O subroutines, with error checking and reporting.
    415  1.1  christos  */
    416  1.1  christos static isc_result_t
    417  1.1  christos journal_seek(dns_journal_t *j, uint32_t offset) {
    418  1.1  christos 	isc_result_t result;
    419  1.1  christos 
    420  1.1  christos 	result = isc_stdio_seek(j->fp, (off_t)offset, SEEK_SET);
    421  1.1  christos 	if (result != ISC_R_SUCCESS) {
    422  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    423  1.1  christos 			      "%s: seek: %s", j->filename,
    424  1.1  christos 			      isc_result_totext(result));
    425  1.1  christos 		return (ISC_R_UNEXPECTED);
    426  1.1  christos 	}
    427  1.1  christos 	j->offset = offset;
    428  1.1  christos 	return (ISC_R_SUCCESS);
    429  1.1  christos }
    430  1.1  christos 
    431  1.1  christos static isc_result_t
    432  1.1  christos journal_read(dns_journal_t *j, void *mem, size_t nbytes) {
    433  1.1  christos 	isc_result_t result;
    434  1.1  christos 
    435  1.1  christos 	result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL);
    436  1.1  christos 	if (result != ISC_R_SUCCESS) {
    437  1.1  christos 		if (result == ISC_R_EOF) {
    438  1.1  christos 			return (ISC_R_NOMORE);
    439  1.1  christos 		}
    440  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    441  1.1  christos 			      "%s: read: %s", j->filename,
    442  1.1  christos 			      isc_result_totext(result));
    443  1.1  christos 		return (ISC_R_UNEXPECTED);
    444  1.1  christos 	}
    445  1.1  christos 	j->offset += (isc_offset_t)nbytes;
    446  1.1  christos 	return (ISC_R_SUCCESS);
    447  1.1  christos }
    448  1.1  christos 
    449  1.1  christos static isc_result_t
    450  1.1  christos journal_write(dns_journal_t *j, void *mem, size_t nbytes) {
    451  1.1  christos 	isc_result_t result;
    452  1.1  christos 
    453  1.1  christos 	result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL);
    454  1.1  christos 	if (result != ISC_R_SUCCESS) {
    455  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    456  1.1  christos 			      "%s: write: %s", j->filename,
    457  1.1  christos 			      isc_result_totext(result));
    458  1.1  christos 		return (ISC_R_UNEXPECTED);
    459  1.1  christos 	}
    460  1.1  christos 	j->offset += (isc_offset_t)nbytes;
    461  1.1  christos 	return (ISC_R_SUCCESS);
    462  1.1  christos }
    463  1.1  christos 
    464  1.1  christos static isc_result_t
    465  1.1  christos journal_fsync(dns_journal_t *j) {
    466  1.1  christos 	isc_result_t result;
    467  1.1  christos 
    468  1.1  christos 	result = isc_stdio_flush(j->fp);
    469  1.1  christos 	if (result != ISC_R_SUCCESS) {
    470  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    471  1.1  christos 			      "%s: flush: %s", j->filename,
    472  1.1  christos 			      isc_result_totext(result));
    473  1.1  christos 		return (ISC_R_UNEXPECTED);
    474  1.1  christos 	}
    475  1.1  christos 	result = isc_stdio_sync(j->fp);
    476  1.1  christos 	if (result != ISC_R_SUCCESS) {
    477  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    478  1.1  christos 			      "%s: fsync: %s", j->filename,
    479  1.1  christos 			      isc_result_totext(result));
    480  1.1  christos 		return (ISC_R_UNEXPECTED);
    481  1.1  christos 	}
    482  1.1  christos 	return (ISC_R_SUCCESS);
    483  1.1  christos }
    484  1.1  christos 
    485  1.1  christos /*
    486  1.1  christos  * Read/write a transaction header at the current file position.
    487  1.1  christos  */
    488  1.1  christos static isc_result_t
    489  1.1  christos journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) {
    490  1.1  christos 	isc_result_t result;
    491  1.1  christos 
    492  1.1  christos 	j->it.cpos.offset = j->offset;
    493  1.1  christos 
    494  1.1  christos 	switch (j->xhdr_version) {
    495  1.1  christos 	case XHDR_VERSION1: {
    496  1.1  christos 		journal_rawxhdr_ver1_t raw;
    497  1.1  christos 		result = journal_read(j, &raw, sizeof(raw));
    498  1.1  christos 		if (result != ISC_R_SUCCESS) {
    499  1.1  christos 			return (result);
    500  1.1  christos 		}
    501  1.1  christos 		xhdr->size = decode_uint32(raw.size);
    502  1.1  christos 		xhdr->count = 0;
    503  1.1  christos 		xhdr->serial0 = decode_uint32(raw.serial0);
    504  1.1  christos 		xhdr->serial1 = decode_uint32(raw.serial1);
    505  1.1  christos 		j->curxhdr = *xhdr;
    506  1.1  christos 		return (ISC_R_SUCCESS);
    507  1.1  christos 	}
    508  1.1  christos 
    509  1.1  christos 	case XHDR_VERSION2: {
    510  1.1  christos 		journal_rawxhdr_t raw;
    511  1.1  christos 		result = journal_read(j, &raw, sizeof(raw));
    512  1.1  christos 		if (result != ISC_R_SUCCESS) {
    513  1.1  christos 			return (result);
    514  1.1  christos 		}
    515  1.1  christos 		xhdr->size = decode_uint32(raw.size);
    516  1.1  christos 		xhdr->count = decode_uint32(raw.count);
    517  1.1  christos 		xhdr->serial0 = decode_uint32(raw.serial0);
    518  1.1  christos 		xhdr->serial1 = decode_uint32(raw.serial1);
    519  1.1  christos 		j->curxhdr = *xhdr;
    520  1.1  christos 		return (ISC_R_SUCCESS);
    521  1.1  christos 	}
    522  1.1  christos 
    523  1.1  christos 	default:
    524  1.1  christos 		return (ISC_R_NOTIMPLEMENTED);
    525  1.1  christos 	}
    526  1.1  christos }
    527  1.1  christos 
    528  1.1  christos static isc_result_t
    529  1.1  christos journal_write_xhdr(dns_journal_t *j, uint32_t size, uint32_t count,
    530  1.1  christos 		   uint32_t serial0, uint32_t serial1) {
    531  1.1  christos 	if (j->header_ver1) {
    532  1.1  christos 		journal_rawxhdr_ver1_t raw;
    533  1.1  christos 		encode_uint32(size, raw.size);
    534  1.1  christos 		encode_uint32(serial0, raw.serial0);
    535  1.1  christos 		encode_uint32(serial1, raw.serial1);
    536  1.1  christos 		return (journal_write(j, &raw, sizeof(raw)));
    537  1.1  christos 	} else {
    538  1.1  christos 		journal_rawxhdr_t raw;
    539  1.1  christos 		encode_uint32(size, raw.size);
    540  1.1  christos 		encode_uint32(count, raw.count);
    541  1.1  christos 		encode_uint32(serial0, raw.serial0);
    542  1.1  christos 		encode_uint32(serial1, raw.serial1);
    543  1.1  christos 		return (journal_write(j, &raw, sizeof(raw)));
    544  1.1  christos 	}
    545  1.1  christos }
    546  1.1  christos 
    547  1.1  christos /*
    548  1.1  christos  * Read an RR header at the current file position.
    549  1.1  christos  */
    550  1.1  christos 
    551  1.1  christos static isc_result_t
    552  1.1  christos journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) {
    553  1.1  christos 	journal_rawrrhdr_t raw;
    554  1.1  christos 	isc_result_t result;
    555  1.1  christos 
    556  1.1  christos 	result = journal_read(j, &raw, sizeof(raw));
    557  1.1  christos 	if (result != ISC_R_SUCCESS) {
    558  1.1  christos 		return (result);
    559  1.1  christos 	}
    560  1.1  christos 	rrhdr->size = decode_uint32(raw.size);
    561  1.1  christos 	return (ISC_R_SUCCESS);
    562  1.1  christos }
    563  1.1  christos 
    564  1.1  christos static isc_result_t
    565  1.1  christos journal_file_create(isc_mem_t *mctx, bool downgrade, const char *filename) {
    566  1.1  christos 	FILE *fp = NULL;
    567  1.1  christos 	isc_result_t result;
    568  1.1  christos 	journal_header_t header;
    569  1.1  christos 	journal_rawheader_t rawheader;
    570  1.1  christos 	int index_size = 56; /* XXX configurable */
    571  1.1  christos 	int size;
    572  1.1  christos 	void *mem = NULL; /* Memory for temporary index image. */
    573  1.1  christos 
    574  1.1  christos 	INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE);
    575  1.1  christos 
    576  1.1  christos 	result = isc_stdio_open(filename, "wb", &fp);
    577  1.1  christos 	if (result != ISC_R_SUCCESS) {
    578  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    579  1.1  christos 			      "%s: create: %s", filename,
    580  1.1  christos 			      isc_result_totext(result));
    581  1.1  christos 		return (ISC_R_UNEXPECTED);
    582  1.1  christos 	}
    583  1.1  christos 
    584  1.1  christos 	if (downgrade) {
    585  1.1  christos 		header = journal_header_ver1;
    586  1.1  christos 	} else {
    587  1.1  christos 		header = initial_journal_header;
    588  1.1  christos 	}
    589  1.1  christos 	header.index_size = index_size;
    590  1.1  christos 	journal_header_encode(&header, &rawheader);
    591  1.1  christos 
    592  1.1  christos 	size = sizeof(journal_rawheader_t) +
    593  1.1  christos 	       index_size * sizeof(journal_rawpos_t);
    594  1.1  christos 
    595  1.1  christos 	mem = isc_mem_get(mctx, size);
    596  1.1  christos 	memset(mem, 0, size);
    597  1.1  christos 	memmove(mem, &rawheader, sizeof(rawheader));
    598  1.1  christos 
    599  1.1  christos 	result = isc_stdio_write(mem, 1, (size_t)size, fp, NULL);
    600  1.1  christos 	if (result != ISC_R_SUCCESS) {
    601  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    602  1.1  christos 			      "%s: write: %s", filename,
    603  1.1  christos 			      isc_result_totext(result));
    604  1.1  christos 		(void)isc_stdio_close(fp);
    605  1.1  christos 		(void)isc_file_remove(filename);
    606  1.1  christos 		isc_mem_put(mctx, mem, size);
    607  1.1  christos 		return (ISC_R_UNEXPECTED);
    608  1.1  christos 	}
    609  1.1  christos 	isc_mem_put(mctx, mem, size);
    610  1.1  christos 
    611  1.1  christos 	result = isc_stdio_close(fp);
    612  1.1  christos 	if (result != ISC_R_SUCCESS) {
    613  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    614  1.1  christos 			      "%s: close: %s", filename,
    615  1.1  christos 			      isc_result_totext(result));
    616  1.1  christos 		(void)isc_file_remove(filename);
    617  1.1  christos 		return (ISC_R_UNEXPECTED);
    618  1.1  christos 	}
    619  1.1  christos 
    620  1.1  christos 	return (ISC_R_SUCCESS);
    621  1.1  christos }
    622  1.1  christos 
    623  1.1  christos static isc_result_t
    624  1.1  christos journal_open(isc_mem_t *mctx, const char *filename, bool writable, bool create,
    625  1.1  christos 	     bool downgrade, dns_journal_t **journalp) {
    626  1.1  christos 	FILE *fp = NULL;
    627  1.1  christos 	isc_result_t result;
    628  1.1  christos 	journal_rawheader_t rawheader;
    629  1.1  christos 	dns_journal_t *j;
    630  1.1  christos 
    631  1.1  christos 	REQUIRE(journalp != NULL && *journalp == NULL);
    632  1.1  christos 
    633  1.1  christos 	j = isc_mem_get(mctx, sizeof(*j));
    634  1.1  christos 	*j = (dns_journal_t){ .state = JOURNAL_STATE_INVALID,
    635  1.1  christos 			      .filename = isc_mem_strdup(mctx, filename),
    636  1.1  christos 			      .xhdr_version = XHDR_VERSION2 };
    637  1.1  christos 	isc_mem_attach(mctx, &j->mctx);
    638  1.1  christos 
    639  1.1  christos 	result = isc_stdio_open(j->filename, writable ? "rb+" : "rb", &fp);
    640  1.1  christos 	if (result == ISC_R_FILENOTFOUND) {
    641  1.1  christos 		if (create) {
    642  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(1),
    643  1.1  christos 				      "journal file %s does not exist, "
    644  1.1  christos 				      "creating it",
    645  1.1  christos 				      j->filename);
    646  1.1  christos 			CHECK(journal_file_create(mctx, downgrade, filename));
    647  1.1  christos 			/*
    648  1.1  christos 			 * Retry.
    649  1.1  christos 			 */
    650  1.1  christos 			result = isc_stdio_open(j->filename, "rb+", &fp);
    651  1.1  christos 		} else {
    652  1.1  christos 			FAIL(ISC_R_NOTFOUND);
    653  1.1  christos 		}
    654  1.1  christos 	}
    655  1.1  christos 	if (result != ISC_R_SUCCESS) {
    656  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    657  1.1  christos 			      "%s: open: %s", j->filename,
    658  1.1  christos 			      isc_result_totext(result));
    659  1.1  christos 		FAIL(ISC_R_UNEXPECTED);
    660  1.1  christos 	}
    661  1.1  christos 
    662  1.1  christos 	j->fp = fp;
    663  1.1  christos 
    664  1.1  christos 	/*
    665  1.1  christos 	 * Set magic early so that seek/read can succeed.
    666  1.1  christos 	 */
    667  1.1  christos 	j->magic = DNS_JOURNAL_MAGIC;
    668  1.1  christos 
    669  1.1  christos 	CHECK(journal_seek(j, 0));
    670  1.1  christos 	CHECK(journal_read(j, &rawheader, sizeof(rawheader)));
    671  1.1  christos 
    672  1.1  christos 	if (memcmp(rawheader.h.format, journal_header_ver1.format,
    673  1.1  christos 		   sizeof(journal_header_ver1.format)) == 0)
    674  1.1  christos 	{
    675  1.1  christos 		/*
    676  1.1  christos 		 * The file header says it's the old format, but it
    677  1.1  christos 		 * still might have the new xhdr format because we
    678  1.1  christos 		 * forgot to change the format string when we introduced
    679  1.1  christos 		 * the new xhdr.  When we first try to read it, we assume
    680  1.1  christos 		 * it uses the new xhdr format. If that fails, we'll be
    681  1.1  christos 		 * called a second time with compat set to true, in which
    682  1.1  christos 		 * case we can lower xhdr_version to 1 if we find a
    683  1.1  christos 		 * corrupt transaction.
    684  1.1  christos 		 */
    685  1.1  christos 		j->header_ver1 = true;
    686  1.1  christos 	} else if (memcmp(rawheader.h.format, initial_journal_header.format,
    687  1.1  christos 			  sizeof(initial_journal_header.format)) == 0)
    688  1.1  christos 	{
    689  1.1  christos 		/*
    690  1.1  christos 		 * File header says this is format version 2; all
    691  1.1  christos 		 * transactions have to match.
    692  1.1  christos 		 */
    693  1.1  christos 		j->header_ver1 = false;
    694  1.1  christos 	} else {
    695  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    696  1.1  christos 			      "%s: journal format not recognized", j->filename);
    697  1.1  christos 		FAIL(ISC_R_UNEXPECTED);
    698  1.1  christos 	}
    699  1.1  christos 	journal_header_decode(&rawheader, &j->header);
    700  1.1  christos 
    701  1.1  christos 	/*
    702  1.1  christos 	 * If there is an index, read the raw index into a dynamically
    703  1.1  christos 	 * allocated buffer and then convert it into a cooked index.
    704  1.1  christos 	 */
    705  1.1  christos 	if (j->header.index_size != 0) {
    706  1.1  christos 		unsigned int i;
    707  1.1  christos 		unsigned int rawbytes;
    708  1.1  christos 		unsigned char *p;
    709  1.1  christos 
    710  1.1  christos 		rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
    711  1.1  christos 		j->rawindex = isc_mem_get(mctx, rawbytes);
    712  1.1  christos 
    713  1.1  christos 		CHECK(journal_read(j, j->rawindex, rawbytes));
    714  1.1  christos 
    715  1.1  christos 		j->index = isc_mem_get(mctx, j->header.index_size *
    716  1.1  christos 						     sizeof(journal_pos_t));
    717  1.1  christos 
    718  1.1  christos 		p = j->rawindex;
    719  1.1  christos 		for (i = 0; i < j->header.index_size; i++) {
    720  1.1  christos 			j->index[i].serial = decode_uint32(p);
    721  1.1  christos 			p += 4;
    722  1.1  christos 			j->index[i].offset = decode_uint32(p);
    723  1.1  christos 			p += 4;
    724  1.1  christos 		}
    725  1.1  christos 		INSIST(p == j->rawindex + rawbytes);
    726  1.1  christos 	}
    727  1.1  christos 	j->offset = -1; /* Invalid, must seek explicitly. */
    728  1.1  christos 
    729  1.1  christos 	/*
    730  1.1  christos 	 * Initialize the iterator.
    731  1.1  christos 	 */
    732  1.1  christos 	dns_name_init(&j->it.name, NULL);
    733  1.1  christos 	dns_rdata_init(&j->it.rdata);
    734  1.1  christos 
    735  1.1  christos 	/*
    736  1.1  christos 	 * Set up empty initial buffers for unchecked and checked
    737  1.1  christos 	 * wire format RR data.  They will be reallocated
    738  1.1  christos 	 * later.
    739  1.1  christos 	 */
    740  1.1  christos 	isc_buffer_init(&j->it.source, NULL, 0);
    741  1.1  christos 	isc_buffer_init(&j->it.target, NULL, 0);
    742  1.1  christos 	dns_decompress_init(&j->it.dctx, -1, DNS_DECOMPRESS_NONE);
    743  1.1  christos 
    744  1.1  christos 	j->state = writable ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ;
    745  1.1  christos 
    746  1.1  christos 	*journalp = j;
    747  1.1  christos 	return (ISC_R_SUCCESS);
    748  1.1  christos 
    749  1.1  christos failure:
    750  1.1  christos 	j->magic = 0;
    751  1.1  christos 	if (j->rawindex != NULL) {
    752  1.1  christos 		isc_mem_put(j->mctx, j->rawindex,
    753  1.1  christos 			    j->header.index_size * sizeof(journal_rawpos_t));
    754  1.1  christos 	}
    755  1.1  christos 	if (j->index != NULL) {
    756  1.1  christos 		isc_mem_put(j->mctx, j->index,
    757  1.1  christos 			    j->header.index_size * sizeof(journal_pos_t));
    758  1.1  christos 	}
    759  1.1  christos 	isc_mem_free(j->mctx, j->filename);
    760  1.1  christos 	if (j->fp != NULL) {
    761  1.1  christos 		(void)isc_stdio_close(j->fp);
    762  1.1  christos 	}
    763  1.1  christos 	isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
    764  1.1  christos 	return (result);
    765  1.1  christos }
    766  1.1  christos 
    767  1.1  christos isc_result_t
    768  1.1  christos dns_journal_open(isc_mem_t *mctx, const char *filename, unsigned int mode,
    769  1.1  christos 		 dns_journal_t **journalp) {
    770  1.1  christos 	isc_result_t result;
    771  1.1  christos 	size_t namelen;
    772  1.1  christos 	char backup[1024];
    773  1.1  christos 	bool writable, create;
    774  1.1  christos 
    775  1.1  christos 	create = ((mode & DNS_JOURNAL_CREATE) != 0);
    776  1.1  christos 	writable = ((mode & (DNS_JOURNAL_WRITE | DNS_JOURNAL_CREATE)) != 0);
    777  1.1  christos 
    778  1.1  christos 	result = journal_open(mctx, filename, writable, create, false,
    779  1.1  christos 			      journalp);
    780  1.1  christos 	if (result == ISC_R_NOTFOUND) {
    781  1.1  christos 		namelen = strlen(filename);
    782  1.1  christos 		if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0)
    783  1.1  christos 		{
    784  1.1  christos 			namelen -= 4;
    785  1.1  christos 		}
    786  1.1  christos 
    787  1.1  christos 		result = snprintf(backup, sizeof(backup), "%.*s.jbk",
    788  1.1  christos 				  (int)namelen, filename);
    789  1.1  christos 		if (result >= sizeof(backup)) {
    790  1.1  christos 			return (ISC_R_NOSPACE);
    791  1.1  christos 		}
    792  1.1  christos 		result = journal_open(mctx, backup, writable, writable, false,
    793  1.1  christos 				      journalp);
    794  1.1  christos 	}
    795  1.1  christos 	return (result);
    796  1.1  christos }
    797  1.1  christos 
    798  1.1  christos /*
    799  1.1  christos  * A comparison function defining the sorting order for
    800  1.1  christos  * entries in the IXFR-style journal file.
    801  1.1  christos  *
    802  1.1  christos  * The IXFR format requires that deletions are sorted before
    803  1.1  christos  * additions, and within either one, SOA records are sorted
    804  1.1  christos  * before others.
    805  1.1  christos  *
    806  1.1  christos  * Also sort the non-SOA records by type as a courtesy to the
    807  1.1  christos  * server receiving the IXFR - it may help reduce the amount of
    808  1.1  christos  * rdataset merging it has to do.
    809  1.1  christos  */
    810  1.1  christos static int
    811  1.1  christos ixfr_order(const void *av, const void *bv) {
    812  1.1  christos 	dns_difftuple_t const *const *ap = av;
    813  1.1  christos 	dns_difftuple_t const *const *bp = bv;
    814  1.1  christos 	dns_difftuple_t const *a = *ap;
    815  1.1  christos 	dns_difftuple_t const *b = *bp;
    816  1.1  christos 	int r;
    817  1.1  christos 	int bop = 0, aop = 0;
    818  1.1  christos 
    819  1.1  christos 	switch (a->op) {
    820  1.1  christos 	case DNS_DIFFOP_DEL:
    821  1.1  christos 	case DNS_DIFFOP_DELRESIGN:
    822  1.1  christos 		aop = 1;
    823  1.1  christos 		break;
    824  1.1  christos 	case DNS_DIFFOP_ADD:
    825  1.1  christos 	case DNS_DIFFOP_ADDRESIGN:
    826  1.1  christos 		aop = 0;
    827  1.1  christos 		break;
    828  1.1  christos 	default:
    829  1.1  christos 		UNREACHABLE();
    830  1.1  christos 	}
    831  1.1  christos 
    832  1.1  christos 	switch (b->op) {
    833  1.1  christos 	case DNS_DIFFOP_DEL:
    834  1.1  christos 	case DNS_DIFFOP_DELRESIGN:
    835  1.1  christos 		bop = 1;
    836  1.1  christos 		break;
    837  1.1  christos 	case DNS_DIFFOP_ADD:
    838  1.1  christos 	case DNS_DIFFOP_ADDRESIGN:
    839  1.1  christos 		bop = 0;
    840  1.1  christos 		break;
    841  1.1  christos 	default:
    842  1.1  christos 		UNREACHABLE();
    843  1.1  christos 	}
    844  1.1  christos 
    845  1.1  christos 	r = bop - aop;
    846  1.1  christos 	if (r != 0) {
    847  1.1  christos 		return (r);
    848  1.1  christos 	}
    849  1.1  christos 
    850  1.1  christos 	r = (b->rdata.type == dns_rdatatype_soa) -
    851  1.1  christos 	    (a->rdata.type == dns_rdatatype_soa);
    852  1.1  christos 	if (r != 0) {
    853  1.1  christos 		return (r);
    854  1.1  christos 	}
    855  1.1  christos 
    856  1.1  christos 	r = (a->rdata.type - b->rdata.type);
    857  1.1  christos 	return (r);
    858  1.1  christos }
    859  1.1  christos 
    860  1.1  christos static isc_result_t
    861  1.1  christos maybe_fixup_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr, uint32_t serial,
    862  1.1  christos 		 isc_offset_t offset) {
    863  1.1  christos 	isc_result_t result = ISC_R_SUCCESS;
    864  1.1  christos 
    865  1.1  christos 	/*
    866  1.1  christos 	 * Handle mixture of version 1 and version 2
    867  1.1  christos 	 * transaction headers in a version 1 journal.
    868  1.1  christos 	 */
    869  1.1  christos 	if ((xhdr->serial0 != serial ||
    870  1.1  christos 	     isc_serial_le(xhdr->serial1, xhdr->serial0)))
    871  1.1  christos 	{
    872  1.1  christos 		if (j->xhdr_version == XHDR_VERSION1 && xhdr->serial1 == serial)
    873  1.1  christos 		{
    874  1.1  christos 			isc_log_write(
    875  1.1  christos 				JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
    876  1.1  christos 				"%s: XHDR_VERSION1 -> XHDR_VERSION2 at %u",
    877  1.1  christos 				j->filename, serial);
    878  1.1  christos 			j->xhdr_version = XHDR_VERSION2;
    879  1.1  christos 			CHECK(journal_seek(j, offset));
    880  1.1  christos 			CHECK(journal_read_xhdr(j, xhdr));
    881  1.1  christos 			j->recovered = true;
    882  1.1  christos 		} else if (j->xhdr_version == XHDR_VERSION2 &&
    883  1.1  christos 			   xhdr->count == serial)
    884  1.1  christos 		{
    885  1.1  christos 			isc_log_write(
    886  1.1  christos 				JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
    887  1.1  christos 				"%s: XHDR_VERSION2 -> XHDR_VERSION1 at %u",
    888  1.1  christos 				j->filename, serial);
    889  1.1  christos 			j->xhdr_version = XHDR_VERSION1;
    890  1.1  christos 			CHECK(journal_seek(j, offset));
    891  1.1  christos 			CHECK(journal_read_xhdr(j, xhdr));
    892  1.1  christos 			j->recovered = true;
    893  1.1  christos 		}
    894  1.1  christos 	}
    895  1.1  christos 
    896  1.1  christos 	/*
    897  1.1  christos 	 * Handle <size, serial0, serial1, 0> transaction header.
    898  1.1  christos 	 */
    899  1.1  christos 	if (j->xhdr_version == XHDR_VERSION1) {
    900  1.1  christos 		uint32_t value;
    901  1.1  christos 
    902  1.1  christos 		CHECK(journal_read(j, &value, sizeof(value)));
    903  1.1  christos 		if (value != 0L) {
    904  1.1  christos 			CHECK(journal_seek(j, offset + 12));
    905  1.1  christos 		} else {
    906  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
    907  1.1  christos 				      "%s: XHDR_VERSION1 count zero at %u",
    908  1.1  christos 				      j->filename, serial);
    909  1.1  christos 			j->xhdr_version = XHDR_VERSION2;
    910  1.1  christos 			j->recovered = true;
    911  1.1  christos 		}
    912  1.1  christos 	} else if (j->xhdr_version == XHDR_VERSION2 && xhdr->count == serial &&
    913  1.1  christos 		   xhdr->serial1 == 0U &&
    914  1.1  christos 		   isc_serial_gt(xhdr->serial0, xhdr->count))
    915  1.1  christos 	{
    916  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3),
    917  1.1  christos 			      "%s: XHDR_VERSION2 count zero at %u", j->filename,
    918  1.1  christos 			      serial);
    919  1.1  christos 		xhdr->serial1 = xhdr->serial0;
    920  1.1  christos 		xhdr->serial0 = xhdr->count;
    921  1.1  christos 		xhdr->count = 0;
    922  1.1  christos 		j->recovered = true;
    923  1.1  christos 	}
    924  1.1  christos 
    925  1.1  christos failure:
    926  1.1  christos 	return (result);
    927  1.1  christos }
    928  1.1  christos 
    929  1.1  christos /*
    930  1.1  christos  * Advance '*pos' to the next journal transaction.
    931  1.1  christos  *
    932  1.1  christos  * Requires:
    933  1.1  christos  *	*pos refers to a valid journal transaction.
    934  1.1  christos  *
    935  1.1  christos  * Ensures:
    936  1.1  christos  *	When ISC_R_SUCCESS is returned,
    937  1.1  christos  *	*pos refers to the next journal transaction.
    938  1.1  christos  *
    939  1.1  christos  * Returns one of:
    940  1.1  christos  *
    941  1.1  christos  *    ISC_R_SUCCESS
    942  1.1  christos  *    ISC_R_NOMORE 	*pos pointed at the last transaction
    943  1.1  christos  *    Other results due to file errors are possible.
    944  1.1  christos  */
    945  1.1  christos static isc_result_t
    946  1.1  christos journal_next(dns_journal_t *j, journal_pos_t *pos) {
    947  1.1  christos 	isc_result_t result;
    948  1.1  christos 	journal_xhdr_t xhdr;
    949  1.1  christos 	size_t hdrsize;
    950  1.1  christos 
    951  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(j));
    952  1.1  christos 
    953  1.1  christos 	result = journal_seek(j, pos->offset);
    954  1.1  christos 	if (result != ISC_R_SUCCESS) {
    955  1.1  christos 		return (result);
    956  1.1  christos 	}
    957  1.1  christos 
    958  1.1  christos 	if (pos->serial == j->header.end.serial) {
    959  1.1  christos 		return (ISC_R_NOMORE);
    960  1.1  christos 	}
    961  1.1  christos 
    962  1.1  christos 	/*
    963  1.1  christos 	 * Read the header of the current transaction.
    964  1.1  christos 	 * This will return ISC_R_NOMORE if we are at EOF.
    965  1.1  christos 	 */
    966  1.1  christos 	result = journal_read_xhdr(j, &xhdr);
    967  1.1  christos 	if (result != ISC_R_SUCCESS) {
    968  1.1  christos 		return (result);
    969  1.1  christos 	}
    970  1.1  christos 
    971  1.1  christos 	if (j->header_ver1) {
    972  1.1  christos 		CHECK(maybe_fixup_xhdr(j, &xhdr, pos->serial, pos->offset));
    973  1.1  christos 	}
    974  1.1  christos 
    975  1.1  christos 	/*
    976  1.1  christos 	 * Check serial number consistency.
    977  1.1  christos 	 */
    978  1.1  christos 	if (xhdr.serial0 != pos->serial ||
    979  1.1  christos 	    isc_serial_le(xhdr.serial1, xhdr.serial0))
    980  1.1  christos 	{
    981  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    982  1.1  christos 			      "%s: journal file corrupt: "
    983  1.1  christos 			      "expected serial %u, got %u",
    984  1.1  christos 			      j->filename, pos->serial, xhdr.serial0);
    985  1.1  christos 		return (ISC_R_UNEXPECTED);
    986  1.1  christos 	}
    987  1.1  christos 
    988  1.1  christos 	/*
    989  1.1  christos 	 * Check for offset wraparound.
    990  1.1  christos 	 */
    991  1.1  christos 	hdrsize = (j->xhdr_version == XHDR_VERSION2)
    992  1.1  christos 			  ? sizeof(journal_rawxhdr_t)
    993  1.1  christos 			  : sizeof(journal_rawxhdr_ver1_t);
    994  1.1  christos 
    995  1.1  christos 	if ((isc_offset_t)(pos->offset + hdrsize + xhdr.size) < pos->offset) {
    996  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
    997  1.1  christos 			      "%s: offset too large", j->filename);
    998  1.1  christos 		return (ISC_R_UNEXPECTED);
    999  1.1  christos 	}
   1000  1.1  christos 
   1001  1.1  christos 	pos->offset += hdrsize + xhdr.size;
   1002  1.1  christos 	pos->serial = xhdr.serial1;
   1003  1.1  christos 	return (ISC_R_SUCCESS);
   1004  1.1  christos 
   1005  1.1  christos failure:
   1006  1.1  christos 	return (result);
   1007  1.1  christos }
   1008  1.1  christos 
   1009  1.1  christos /*
   1010  1.1  christos  * If the index of the journal 'j' contains an entry "better"
   1011  1.1  christos  * than '*best_guess', replace '*best_guess' with it.
   1012  1.1  christos  *
   1013  1.1  christos  * "Better" means having a serial number closer to 'serial'
   1014  1.1  christos  * but not greater than 'serial'.
   1015  1.1  christos  */
   1016  1.1  christos static void
   1017  1.1  christos index_find(dns_journal_t *j, uint32_t serial, journal_pos_t *best_guess) {
   1018  1.1  christos 	unsigned int i;
   1019  1.1  christos 	if (j->index == NULL) {
   1020  1.1  christos 		return;
   1021  1.1  christos 	}
   1022  1.1  christos 	for (i = 0; i < j->header.index_size; i++) {
   1023  1.1  christos 		if (POS_VALID(j->index[i]) &&
   1024  1.1  christos 		    DNS_SERIAL_GE(serial, j->index[i].serial) &&
   1025  1.1  christos 		    DNS_SERIAL_GT(j->index[i].serial, best_guess->serial))
   1026  1.1  christos 		{
   1027  1.1  christos 			*best_guess = j->index[i];
   1028  1.1  christos 		}
   1029  1.1  christos 	}
   1030  1.1  christos }
   1031  1.1  christos 
   1032  1.1  christos /*
   1033  1.1  christos  * Add a new index entry.  If there is no room, make room by removing
   1034  1.1  christos  * the odd-numbered entries and compacting the others into the first
   1035  1.1  christos  * half of the index.  This decimates old index entries exponentially
   1036  1.1  christos  * over time, so that the index always contains a much larger fraction
   1037  1.1  christos  * of recent serial numbers than of old ones.  This is deliberate -
   1038  1.1  christos  * most index searches are for outgoing IXFR, and IXFR tends to request
   1039  1.1  christos  * recent versions more often than old ones.
   1040  1.1  christos  */
   1041  1.1  christos static void
   1042  1.1  christos index_add(dns_journal_t *j, journal_pos_t *pos) {
   1043  1.1  christos 	unsigned int i;
   1044  1.1  christos 
   1045  1.1  christos 	if (j->index == NULL) {
   1046  1.1  christos 		return;
   1047  1.1  christos 	}
   1048  1.1  christos 
   1049  1.1  christos 	/*
   1050  1.1  christos 	 * Search for a vacant position.
   1051  1.1  christos 	 */
   1052  1.1  christos 	for (i = 0; i < j->header.index_size; i++) {
   1053  1.1  christos 		if (!POS_VALID(j->index[i])) {
   1054  1.1  christos 			break;
   1055  1.1  christos 		}
   1056  1.1  christos 	}
   1057  1.1  christos 	if (i == j->header.index_size) {
   1058  1.1  christos 		unsigned int k = 0;
   1059  1.1  christos 		/*
   1060  1.1  christos 		 * Found no vacant position.  Make some room.
   1061  1.1  christos 		 */
   1062  1.1  christos 		for (i = 0; i < j->header.index_size; i += 2) {
   1063  1.1  christos 			j->index[k++] = j->index[i];
   1064  1.1  christos 		}
   1065  1.1  christos 		i = k; /* 'i' identifies the first vacant position. */
   1066  1.1  christos 		while (k < j->header.index_size) {
   1067  1.1  christos 			POS_INVALIDATE(j->index[k]);
   1068  1.1  christos 			k++;
   1069  1.1  christos 		}
   1070  1.1  christos 	}
   1071  1.1  christos 	INSIST(i < j->header.index_size);
   1072  1.1  christos 	INSIST(!POS_VALID(j->index[i]));
   1073  1.1  christos 
   1074  1.1  christos 	/*
   1075  1.1  christos 	 * Store the new index entry.
   1076  1.1  christos 	 */
   1077  1.1  christos 	j->index[i] = *pos;
   1078  1.1  christos }
   1079  1.1  christos 
   1080  1.1  christos /*
   1081  1.1  christos  * Invalidate any existing index entries that could become
   1082  1.1  christos  * ambiguous when a new transaction with number 'serial' is added.
   1083  1.1  christos  */
   1084  1.1  christos static void
   1085  1.1  christos index_invalidate(dns_journal_t *j, uint32_t serial) {
   1086  1.1  christos 	unsigned int i;
   1087  1.1  christos 	if (j->index == NULL) {
   1088  1.1  christos 		return;
   1089  1.1  christos 	}
   1090  1.1  christos 	for (i = 0; i < j->header.index_size; i++) {
   1091  1.1  christos 		if (!DNS_SERIAL_GT(serial, j->index[i].serial)) {
   1092  1.1  christos 			POS_INVALIDATE(j->index[i]);
   1093  1.1  christos 		}
   1094  1.1  christos 	}
   1095  1.1  christos }
   1096  1.1  christos 
   1097  1.1  christos /*
   1098  1.1  christos  * Try to find a transaction with initial serial number 'serial'
   1099  1.1  christos  * in the journal 'j'.
   1100  1.1  christos  *
   1101  1.1  christos  * If found, store its position at '*pos' and return ISC_R_SUCCESS.
   1102  1.1  christos  *
   1103  1.1  christos  * If 'serial' is current (= the ending serial number of the
   1104  1.1  christos  * last transaction in the journal), set '*pos' to
   1105  1.1  christos  * the position immediately following the last transaction and
   1106  1.1  christos  * return ISC_R_SUCCESS.
   1107  1.1  christos  *
   1108  1.1  christos  * If 'serial' is within the range of addressable serial numbers
   1109  1.1  christos  * covered by the journal but that particular serial number is missing
   1110  1.1  christos  * (from the journal, not just from the index), return ISC_R_NOTFOUND.
   1111  1.1  christos  *
   1112  1.1  christos  * If 'serial' is outside the range of addressable serial numbers
   1113  1.1  christos  * covered by the journal, return ISC_R_RANGE.
   1114  1.1  christos  *
   1115  1.1  christos  */
   1116  1.1  christos static isc_result_t
   1117  1.1  christos journal_find(dns_journal_t *j, uint32_t serial, journal_pos_t *pos) {
   1118  1.1  christos 	isc_result_t result;
   1119  1.1  christos 	journal_pos_t current_pos;
   1120  1.1  christos 
   1121  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(j));
   1122  1.1  christos 
   1123  1.1  christos 	if (DNS_SERIAL_GT(j->header.begin.serial, serial)) {
   1124  1.1  christos 		return (ISC_R_RANGE);
   1125  1.1  christos 	}
   1126  1.1  christos 	if (DNS_SERIAL_GT(serial, j->header.end.serial)) {
   1127  1.1  christos 		return (ISC_R_RANGE);
   1128  1.1  christos 	}
   1129  1.1  christos 	if (serial == j->header.end.serial) {
   1130  1.1  christos 		*pos = j->header.end;
   1131  1.1  christos 		return (ISC_R_SUCCESS);
   1132  1.1  christos 	}
   1133  1.1  christos 
   1134  1.1  christos 	current_pos = j->header.begin;
   1135  1.1  christos 	index_find(j, serial, &current_pos);
   1136  1.1  christos 
   1137  1.1  christos 	while (current_pos.serial != serial) {
   1138  1.1  christos 		if (DNS_SERIAL_GT(current_pos.serial, serial)) {
   1139  1.1  christos 			return (ISC_R_NOTFOUND);
   1140  1.1  christos 		}
   1141  1.1  christos 		result = journal_next(j, &current_pos);
   1142  1.1  christos 		if (result != ISC_R_SUCCESS) {
   1143  1.1  christos 			return (result);
   1144  1.1  christos 		}
   1145  1.1  christos 	}
   1146  1.1  christos 	*pos = current_pos;
   1147  1.1  christos 	return (ISC_R_SUCCESS);
   1148  1.1  christos }
   1149  1.1  christos 
   1150  1.1  christos isc_result_t
   1151  1.1  christos dns_journal_begin_transaction(dns_journal_t *j) {
   1152  1.1  christos 	uint32_t offset;
   1153  1.1  christos 	isc_result_t result;
   1154  1.1  christos 
   1155  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(j));
   1156  1.1  christos 	REQUIRE(j->state == JOURNAL_STATE_WRITE ||
   1157  1.1  christos 		j->state == JOURNAL_STATE_INLINE);
   1158  1.1  christos 
   1159  1.1  christos 	/*
   1160  1.1  christos 	 * Find the file offset where the new transaction should
   1161  1.1  christos 	 * be written, and seek there.
   1162  1.1  christos 	 */
   1163  1.1  christos 	if (JOURNAL_EMPTY(&j->header)) {
   1164  1.1  christos 		offset = sizeof(journal_rawheader_t) +
   1165  1.1  christos 			 j->header.index_size * sizeof(journal_rawpos_t);
   1166  1.1  christos 	} else {
   1167  1.1  christos 		offset = j->header.end.offset;
   1168  1.1  christos 	}
   1169  1.1  christos 	j->x.pos[0].offset = offset;
   1170  1.1  christos 	j->x.pos[1].offset = offset; /* Initial value, will be incremented. */
   1171  1.1  christos 	j->x.n_soa = 0;
   1172  1.1  christos 
   1173  1.1  christos 	CHECK(journal_seek(j, offset));
   1174  1.1  christos 
   1175  1.1  christos 	/*
   1176  1.1  christos 	 * Write a dummy transaction header of all zeroes to reserve
   1177  1.1  christos 	 * space.  It will be filled in when the transaction is
   1178  1.1  christos 	 * finished.
   1179  1.1  christos 	 */
   1180  1.1  christos 	CHECK(journal_write_xhdr(j, 0, 0, 0, 0));
   1181  1.1  christos 	j->x.pos[1].offset = j->offset;
   1182  1.1  christos 
   1183  1.1  christos 	j->state = JOURNAL_STATE_TRANSACTION;
   1184  1.1  christos 	result = ISC_R_SUCCESS;
   1185  1.1  christos failure:
   1186  1.1  christos 	return (result);
   1187  1.1  christos }
   1188  1.1  christos 
   1189  1.1  christos isc_result_t
   1190  1.1  christos dns_journal_writediff(dns_journal_t *j, dns_diff_t *diff) {
   1191  1.1  christos 	dns_difftuple_t *t;
   1192  1.1  christos 	isc_buffer_t buffer;
   1193  1.1  christos 	void *mem = NULL;
   1194  1.1  christos 	uint64_t size = 0;
   1195  1.1  christos 	uint32_t rrcount = 0;
   1196  1.1  christos 	isc_result_t result;
   1197  1.1  christos 	isc_region_t used;
   1198  1.1  christos 
   1199  1.1  christos 	REQUIRE(DNS_DIFF_VALID(diff));
   1200  1.1  christos 	REQUIRE(j->state == JOURNAL_STATE_TRANSACTION);
   1201  1.1  christos 
   1202  1.1  christos 	isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal");
   1203  1.1  christos 	(void)dns_diff_print(diff, NULL);
   1204  1.1  christos 
   1205  1.1  christos 	/*
   1206  1.1  christos 	 * Pass 1: determine the buffer size needed, and
   1207  1.1  christos 	 * keep track of SOA serial numbers.
   1208  1.1  christos 	 */
   1209  1.1  christos 	for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
   1210  1.1  christos 	     t = ISC_LIST_NEXT(t, link))
   1211  1.1  christos 	{
   1212  1.1  christos 		if (t->rdata.type == dns_rdatatype_soa) {
   1213  1.1  christos 			if (j->x.n_soa < 2) {
   1214  1.1  christos 				j->x.pos[j->x.n_soa].serial =
   1215  1.1  christos 					dns_soa_getserial(&t->rdata);
   1216  1.1  christos 			}
   1217  1.1  christos 			j->x.n_soa++;
   1218  1.1  christos 		}
   1219  1.1  christos 		size += sizeof(journal_rawrrhdr_t);
   1220  1.1  christos 		size += t->name.length; /* XXX should have access macro? */
   1221  1.1  christos 		size += 10;
   1222  1.1  christos 		size += t->rdata.length;
   1223  1.1  christos 	}
   1224  1.1  christos 
   1225  1.1  christos 	if (size >= DNS_JOURNAL_SIZE_MAX) {
   1226  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1227  1.1  christos 			      "dns_journal_writediff: %s: journal entry "
   1228  1.1  christos 			      "too big to be stored: %" PRIu64 " bytes",
   1229  1.1  christos 			      j->filename, size);
   1230  1.1  christos 		return (ISC_R_NOSPACE);
   1231  1.1  christos 	}
   1232  1.1  christos 
   1233  1.1  christos 	mem = isc_mem_get(j->mctx, size);
   1234  1.1  christos 
   1235  1.1  christos 	isc_buffer_init(&buffer, mem, size);
   1236  1.1  christos 
   1237  1.1  christos 	/*
   1238  1.1  christos 	 * Pass 2.  Write RRs to buffer.
   1239  1.1  christos 	 */
   1240  1.1  christos 	for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
   1241  1.1  christos 	     t = ISC_LIST_NEXT(t, link))
   1242  1.1  christos 	{
   1243  1.1  christos 		/*
   1244  1.1  christos 		 * Write the RR header.
   1245  1.1  christos 		 */
   1246  1.1  christos 		isc_buffer_putuint32(&buffer,
   1247  1.1  christos 				     t->name.length + 10 + t->rdata.length);
   1248  1.1  christos 		/*
   1249  1.1  christos 		 * Write the owner name, RR header, and RR data.
   1250  1.1  christos 		 */
   1251  1.1  christos 		isc_buffer_putmem(&buffer, t->name.ndata, t->name.length);
   1252  1.1  christos 		isc_buffer_putuint16(&buffer, t->rdata.type);
   1253  1.1  christos 		isc_buffer_putuint16(&buffer, t->rdata.rdclass);
   1254  1.1  christos 		isc_buffer_putuint32(&buffer, t->ttl);
   1255  1.1  christos 		INSIST(t->rdata.length < 65536);
   1256  1.1  christos 		isc_buffer_putuint16(&buffer, (uint16_t)t->rdata.length);
   1257  1.1  christos 		INSIST(isc_buffer_availablelength(&buffer) >= t->rdata.length);
   1258  1.1  christos 		isc_buffer_putmem(&buffer, t->rdata.data, t->rdata.length);
   1259  1.1  christos 
   1260  1.1  christos 		rrcount++;
   1261  1.1  christos 	}
   1262  1.1  christos 
   1263  1.1  christos 	isc_buffer_usedregion(&buffer, &used);
   1264  1.1  christos 	INSIST(used.length == size);
   1265  1.1  christos 
   1266  1.1  christos 	j->x.pos[1].offset += used.length;
   1267  1.1  christos 	j->x.n_rr = rrcount;
   1268  1.1  christos 
   1269  1.1  christos 	/*
   1270  1.1  christos 	 * Write the buffer contents to the journal file.
   1271  1.1  christos 	 */
   1272  1.1  christos 	CHECK(journal_write(j, used.base, used.length));
   1273  1.1  christos 
   1274  1.1  christos 	result = ISC_R_SUCCESS;
   1275  1.1  christos 
   1276  1.1  christos failure:
   1277  1.1  christos 	if (mem != NULL) {
   1278  1.1  christos 		isc_mem_put(j->mctx, mem, size);
   1279  1.1  christos 	}
   1280  1.1  christos 	return (result);
   1281  1.1  christos }
   1282  1.1  christos 
   1283  1.1  christos isc_result_t
   1284  1.1  christos dns_journal_commit(dns_journal_t *j) {
   1285  1.1  christos 	isc_result_t result;
   1286  1.1  christos 	journal_rawheader_t rawheader;
   1287  1.1  christos 	uint64_t total;
   1288  1.1  christos 
   1289  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(j));
   1290  1.1  christos 	REQUIRE(j->state == JOURNAL_STATE_TRANSACTION ||
   1291  1.1  christos 		j->state == JOURNAL_STATE_INLINE);
   1292  1.1  christos 
   1293  1.1  christos 	/*
   1294  1.1  christos 	 * Just write out a updated header.
   1295  1.1  christos 	 */
   1296  1.1  christos 	if (j->state == JOURNAL_STATE_INLINE) {
   1297  1.1  christos 		CHECK(journal_fsync(j));
   1298  1.1  christos 		journal_header_encode(&j->header, &rawheader);
   1299  1.1  christos 		CHECK(journal_seek(j, 0));
   1300  1.1  christos 		CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
   1301  1.1  christos 		CHECK(journal_fsync(j));
   1302  1.1  christos 		j->state = JOURNAL_STATE_WRITE;
   1303  1.1  christos 		return (ISC_R_SUCCESS);
   1304  1.1  christos 	}
   1305  1.1  christos 
   1306  1.1  christos 	/*
   1307  1.1  christos 	 * Perform some basic consistency checks.
   1308  1.1  christos 	 */
   1309  1.1  christos 	if (j->x.n_soa != 2) {
   1310  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1311  1.1  christos 			      "%s: malformed transaction: %d SOAs", j->filename,
   1312  1.1  christos 			      j->x.n_soa);
   1313  1.1  christos 		return (ISC_R_UNEXPECTED);
   1314  1.1  christos 	}
   1315  1.1  christos 	if (!DNS_SERIAL_GT(j->x.pos[1].serial, j->x.pos[0].serial)) {
   1316  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1317  1.1  christos 			      "%s: malformed transaction: serial number "
   1318  1.1  christos 			      "did not increase",
   1319  1.1  christos 			      j->filename);
   1320  1.1  christos 		return (ISC_R_UNEXPECTED);
   1321  1.1  christos 	}
   1322  1.1  christos 	if (!JOURNAL_EMPTY(&j->header)) {
   1323  1.1  christos 		if (j->x.pos[0].serial != j->header.end.serial) {
   1324  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1325  1.1  christos 				      "malformed transaction: "
   1326  1.1  christos 				      "%s last serial %u != "
   1327  1.1  christos 				      "transaction first serial %u",
   1328  1.1  christos 				      j->filename, j->header.end.serial,
   1329  1.1  christos 				      j->x.pos[0].serial);
   1330  1.1  christos 			return (ISC_R_UNEXPECTED);
   1331  1.1  christos 		}
   1332  1.1  christos 	}
   1333  1.1  christos 
   1334  1.1  christos 	/*
   1335  1.1  christos 	 * We currently don't support huge journal entries.
   1336  1.1  christos 	 */
   1337  1.1  christos 	total = j->x.pos[1].offset - j->x.pos[0].offset;
   1338  1.1  christos 	if (total >= DNS_JOURNAL_SIZE_MAX) {
   1339  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1340  1.1  christos 			      "transaction too big to be stored in journal: "
   1341  1.1  christos 			      "%" PRIu64 "b (max is %" PRIu64 "b)",
   1342  1.1  christos 			      total, (uint64_t)DNS_JOURNAL_SIZE_MAX);
   1343  1.1  christos 		return (ISC_R_UNEXPECTED);
   1344  1.1  christos 	}
   1345  1.1  christos 
   1346  1.1  christos 	/*
   1347  1.1  christos 	 * Some old journal entries may become non-addressable
   1348  1.1  christos 	 * when we increment the current serial number.  Purge them
   1349  1.1  christos 	 * by stepping header.begin forward to the first addressable
   1350  1.1  christos 	 * transaction.  Also purge them from the index.
   1351  1.1  christos 	 */
   1352  1.1  christos 	if (!JOURNAL_EMPTY(&j->header)) {
   1353  1.1  christos 		while (!DNS_SERIAL_GT(j->x.pos[1].serial,
   1354  1.1  christos 				      j->header.begin.serial))
   1355  1.1  christos 		{
   1356  1.1  christos 			CHECK(journal_next(j, &j->header.begin));
   1357  1.1  christos 		}
   1358  1.1  christos 		index_invalidate(j, j->x.pos[1].serial);
   1359  1.1  christos 	}
   1360  1.1  christos #ifdef notyet
   1361  1.1  christos 	if (DNS_SERIAL_GT(last_dumped_serial, j->x.pos[1].serial)) {
   1362  1.1  christos 		force_dump(...);
   1363  1.1  christos 	}
   1364  1.1  christos #endif /* ifdef notyet */
   1365  1.1  christos 
   1366  1.1  christos 	/*
   1367  1.1  christos 	 * Commit the transaction data to stable storage.
   1368  1.1  christos 	 */
   1369  1.1  christos 	CHECK(journal_fsync(j));
   1370  1.1  christos 
   1371  1.1  christos 	if (j->state == JOURNAL_STATE_TRANSACTION) {
   1372  1.1  christos 		isc_offset_t offset;
   1373  1.1  christos 		offset = (j->x.pos[1].offset - j->x.pos[0].offset) -
   1374  1.1  christos 			 (j->header_ver1 ? sizeof(journal_rawxhdr_ver1_t)
   1375  1.1  christos 					 : sizeof(journal_rawxhdr_t));
   1376  1.1  christos 		/*
   1377  1.1  christos 		 * Update the transaction header.
   1378  1.1  christos 		 */
   1379  1.1  christos 		CHECK(journal_seek(j, j->x.pos[0].offset));
   1380  1.1  christos 		CHECK(journal_write_xhdr(j, offset, j->x.n_rr,
   1381  1.1  christos 					 j->x.pos[0].serial,
   1382  1.1  christos 					 j->x.pos[1].serial));
   1383  1.1  christos 	}
   1384  1.1  christos 
   1385  1.1  christos 	/*
   1386  1.1  christos 	 * Update the journal header.
   1387  1.1  christos 	 */
   1388  1.1  christos 	if (JOURNAL_EMPTY(&j->header)) {
   1389  1.1  christos 		j->header.begin = j->x.pos[0];
   1390  1.1  christos 	}
   1391  1.1  christos 	j->header.end = j->x.pos[1];
   1392  1.1  christos 	journal_header_encode(&j->header, &rawheader);
   1393  1.1  christos 	CHECK(journal_seek(j, 0));
   1394  1.1  christos 	CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
   1395  1.1  christos 
   1396  1.1  christos 	/*
   1397  1.1  christos 	 * Update the index.
   1398  1.1  christos 	 */
   1399  1.1  christos 	index_add(j, &j->x.pos[0]);
   1400  1.1  christos 
   1401  1.1  christos 	/*
   1402  1.1  christos 	 * Convert the index into on-disk format and write
   1403  1.1  christos 	 * it to disk.
   1404  1.1  christos 	 */
   1405  1.1  christos 	CHECK(index_to_disk(j));
   1406  1.1  christos 
   1407  1.1  christos 	/*
   1408  1.1  christos 	 * Commit the header to stable storage.
   1409  1.1  christos 	 */
   1410  1.1  christos 	CHECK(journal_fsync(j));
   1411  1.1  christos 
   1412  1.1  christos 	/*
   1413  1.1  christos 	 * We no longer have a transaction open.
   1414  1.1  christos 	 */
   1415  1.1  christos 	j->state = JOURNAL_STATE_WRITE;
   1416  1.1  christos 
   1417  1.1  christos 	result = ISC_R_SUCCESS;
   1418  1.1  christos 
   1419  1.1  christos failure:
   1420  1.1  christos 	return (result);
   1421  1.1  christos }
   1422  1.1  christos 
   1423  1.1  christos isc_result_t
   1424  1.1  christos dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) {
   1425  1.1  christos 	isc_result_t result;
   1426  1.1  christos 
   1427  1.1  christos 	CHECK(dns_diff_sort(diff, ixfr_order));
   1428  1.1  christos 	CHECK(dns_journal_begin_transaction(j));
   1429  1.1  christos 	CHECK(dns_journal_writediff(j, diff));
   1430  1.1  christos 	CHECK(dns_journal_commit(j));
   1431  1.1  christos 	result = ISC_R_SUCCESS;
   1432  1.1  christos failure:
   1433  1.1  christos 	return (result);
   1434  1.1  christos }
   1435  1.1  christos 
   1436  1.1  christos void
   1437  1.1  christos dns_journal_destroy(dns_journal_t **journalp) {
   1438  1.1  christos 	dns_journal_t *j = NULL;
   1439  1.1  christos 
   1440  1.1  christos 	REQUIRE(journalp != NULL);
   1441  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(*journalp));
   1442  1.1  christos 
   1443  1.1  christos 	j = *journalp;
   1444  1.1  christos 	*journalp = NULL;
   1445  1.1  christos 
   1446  1.1  christos 	j->it.result = ISC_R_FAILURE;
   1447  1.1  christos 	dns_name_invalidate(&j->it.name);
   1448  1.1  christos 	dns_decompress_invalidate(&j->it.dctx);
   1449  1.1  christos 	if (j->rawindex != NULL) {
   1450  1.1  christos 		isc_mem_put(j->mctx, j->rawindex,
   1451  1.1  christos 			    j->header.index_size * sizeof(journal_rawpos_t));
   1452  1.1  christos 	}
   1453  1.1  christos 	if (j->index != NULL) {
   1454  1.1  christos 		isc_mem_put(j->mctx, j->index,
   1455  1.1  christos 			    j->header.index_size * sizeof(journal_pos_t));
   1456  1.1  christos 	}
   1457  1.1  christos 	if (j->it.target.base != NULL) {
   1458  1.1  christos 		isc_mem_put(j->mctx, j->it.target.base, j->it.target.length);
   1459  1.1  christos 	}
   1460  1.1  christos 	if (j->it.source.base != NULL) {
   1461  1.1  christos 		isc_mem_put(j->mctx, j->it.source.base, j->it.source.length);
   1462  1.1  christos 	}
   1463  1.1  christos 	if (j->filename != NULL) {
   1464  1.1  christos 		isc_mem_free(j->mctx, j->filename);
   1465  1.1  christos 	}
   1466  1.1  christos 	if (j->fp != NULL) {
   1467  1.1  christos 		(void)isc_stdio_close(j->fp);
   1468  1.1  christos 	}
   1469  1.1  christos 	j->magic = 0;
   1470  1.1  christos 	isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
   1471  1.1  christos }
   1472  1.1  christos 
   1473  1.1  christos /*
   1474  1.1  christos  * Roll the open journal 'j' into the database 'db'.
   1475  1.1  christos  * A new database version will be created.
   1476  1.1  christos  */
   1477  1.1  christos 
   1478  1.1  christos /* XXX Share code with incoming IXFR? */
   1479  1.1  christos 
   1480  1.1  christos isc_result_t
   1481  1.1  christos dns_journal_rollforward(dns_journal_t *j, dns_db_t *db, unsigned int options) {
   1482  1.1  christos 	isc_buffer_t source; /* Transaction data from disk */
   1483  1.1  christos 	isc_buffer_t target; /* Ditto after _fromwire check */
   1484  1.1  christos 	uint32_t db_serial;  /* Database SOA serial */
   1485  1.1  christos 	uint32_t end_serial; /* Last journal SOA serial */
   1486  1.1  christos 	isc_result_t result;
   1487  1.1  christos 	dns_dbversion_t *ver = NULL;
   1488  1.1  christos 	journal_pos_t pos;
   1489  1.1  christos 	dns_diff_t diff;
   1490  1.1  christos 	unsigned int n_soa = 0;
   1491  1.1  christos 	unsigned int n_put = 0;
   1492  1.1  christos 	dns_diffop_t op;
   1493  1.1  christos 
   1494  1.1  christos 	REQUIRE(DNS_JOURNAL_VALID(j));
   1495  1.1  christos 	REQUIRE(DNS_DB_VALID(db));
   1496  1.1  christos 
   1497  1.1  christos 	dns_diff_init(j->mctx, &diff);
   1498  1.1  christos 
   1499  1.1  christos 	/*
   1500  1.1  christos 	 * Set up empty initial buffers for unchecked and checked
   1501  1.1  christos 	 * wire format transaction data.  They will be reallocated
   1502  1.1  christos 	 * later.
   1503  1.1  christos 	 */
   1504  1.1  christos 	isc_buffer_init(&source, NULL, 0);
   1505  1.1  christos 	isc_buffer_init(&target, NULL, 0);
   1506  1.1  christos 
   1507  1.1  christos 	/*
   1508  1.1  christos 	 * Create the new database version.
   1509  1.1  christos 	 */
   1510  1.1  christos 	CHECK(dns_db_newversion(db, &ver));
   1511  1.1  christos 
   1512  1.1  christos 	/*
   1513  1.1  christos 	 * Get the current database SOA serial number.
   1514  1.1  christos 	 */
   1515  1.1  christos 	CHECK(dns_db_getsoaserial(db, ver, &db_serial));
   1516  1.1  christos 
   1517  1.1  christos 	/*
   1518  1.1  christos 	 * Locate a journal entry for the current database serial.
   1519  1.1  christos 	 */
   1520  1.1  christos 	CHECK(journal_find(j, db_serial, &pos));
   1521  1.1  christos 
   1522  1.1  christos 	end_serial = dns_journal_last_serial(j);
   1523  1.1  christos 
   1524  1.1  christos 	/*
   1525  1.1  christos 	 * If we're reading a version 1 file, scan all the transactions
   1526  1.1  christos 	 * to see if the journal needs rewriting: if any outdated
   1527  1.1  christos 	 * transaction headers are found, j->recovered will be set.
   1528  1.1  christos 	 */
   1529  1.1  christos 	if (j->header_ver1) {
   1530  1.1  christos 		uint32_t start_serial = dns_journal_first_serial(j);
   1531  1.1  christos 
   1532  1.1  christos 		CHECK(dns_journal_iter_init(j, start_serial, db_serial, NULL));
   1533  1.1  christos 		for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS;
   1534  1.1  christos 		     result = dns_journal_next_rr(j))
   1535  1.1  christos 		{
   1536  1.1  christos 			continue;
   1537  1.1  christos 		}
   1538  1.1  christos 	}
   1539  1.1  christos 
   1540  1.1  christos 	if (db_serial == end_serial) {
   1541  1.1  christos 		CHECK(DNS_R_UPTODATE);
   1542  1.1  christos 	}
   1543  1.1  christos 
   1544  1.1  christos 	CHECK(dns_journal_iter_init(j, db_serial, end_serial, NULL));
   1545  1.1  christos 	for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS;
   1546  1.1  christos 	     result = dns_journal_next_rr(j))
   1547  1.1  christos 	{
   1548  1.1  christos 		dns_name_t *name = NULL;
   1549  1.1  christos 		dns_rdata_t *rdata = NULL;
   1550  1.1  christos 		dns_difftuple_t *tuple = NULL;
   1551  1.1  christos 		uint32_t ttl;
   1552  1.1  christos 
   1553  1.1  christos 		dns_journal_current_rr(j, &name, &ttl, &rdata);
   1554  1.1  christos 
   1555  1.1  christos 		if (rdata->type == dns_rdatatype_soa) {
   1556  1.1  christos 			n_soa++;
   1557  1.1  christos 			if (n_soa == 2) {
   1558  1.1  christos 				db_serial = j->it.current_serial;
   1559  1.1  christos 			}
   1560  1.1  christos 		}
   1561  1.1  christos 
   1562  1.1  christos 		if (n_soa == 3) {
   1563  1.1  christos 			n_soa = 1;
   1564  1.1  christos 		}
   1565  1.1  christos 		if (n_soa == 0) {
   1566  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1567  1.1  christos 				      "%s: journal file corrupt: missing "
   1568  1.1  christos 				      "initial SOA",
   1569  1.1  christos 				      j->filename);
   1570  1.1  christos 			FAIL(ISC_R_UNEXPECTED);
   1571  1.1  christos 		}
   1572  1.1  christos 		if ((options & DNS_JOURNALOPT_RESIGN) != 0) {
   1573  1.1  christos 			op = (n_soa == 1) ? DNS_DIFFOP_DELRESIGN
   1574  1.1  christos 					  : DNS_DIFFOP_ADDRESIGN;
   1575  1.1  christos 		} else {
   1576  1.1  christos 			op = (n_soa == 1) ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD;
   1577  1.1  christos 		}
   1578  1.1  christos 
   1579  1.1  christos 		CHECK(dns_difftuple_create(diff.mctx, op, name, ttl, rdata,
   1580  1.1  christos 					   &tuple));
   1581  1.1  christos 		dns_diff_append(&diff, &tuple);
   1582  1.1  christos 
   1583  1.1  christos 		if (++n_put > 100) {
   1584  1.1  christos 			isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
   1585  1.1  christos 				      "%s: applying diff to database (%u)",
   1586  1.1  christos 				      j->filename, db_serial);
   1587  1.1  christos 			(void)dns_diff_print(&diff, NULL);
   1588  1.1  christos 			CHECK(dns_diff_apply(&diff, db, ver));
   1589  1.1  christos 			dns_diff_clear(&diff);
   1590  1.1  christos 			n_put = 0;
   1591  1.1  christos 		}
   1592  1.1  christos 	}
   1593  1.1  christos 	if (result == ISC_R_NOMORE) {
   1594  1.1  christos 		result = ISC_R_SUCCESS;
   1595  1.1  christos 	}
   1596  1.1  christos 	CHECK(result);
   1597  1.1  christos 
   1598  1.1  christos 	if (n_put != 0) {
   1599  1.1  christos 		isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
   1600  1.1  christos 			      "%s: applying final diff to database (%u)",
   1601  1.1  christos 			      j->filename, db_serial);
   1602  1.1  christos 		(void)dns_diff_print(&diff, NULL);
   1603  1.1  christos 		CHECK(dns_diff_apply(&diff, db, ver));
   1604  1.1  christos 		dns_diff_clear(&diff);
   1605  1.1  christos 	}
   1606  1.1  christos 
   1607  1.1  christos failure:
   1608  1.1  christos 	if (ver != NULL) {
   1609  1.1  christos 		dns_db_closeversion(db, &ver,
   1610  1.1  christos 				    result == ISC_R_SUCCESS ? true : false);
   1611  1.1  christos 	}
   1612  1.1  christos 
   1613  1.1  christos 	if (source.base != NULL) {
   1614  1.1  christos 		isc_mem_put(j->mctx, source.base, source.length);
   1615  1.1  christos 	}
   1616  1.1  christos 	if (target.base != NULL) {
   1617  1.1  christos 		isc_mem_put(j->mctx, target.base, target.length);
   1618  1.1  christos 	}
   1619  1.1  christos 
   1620  1.1  christos 	dns_diff_clear(&diff);
   1621  1.1  christos 
   1622  1.1  christos 	INSIST(ver == NULL);
   1623  1.1  christos 
   1624  1.1  christos 	return (result);
   1625  1.1  christos }
   1626  1.1  christos 
   1627  1.1  christos isc_result_t
   1628  1.1  christos dns_journal_print(isc_mem_t *mctx, uint32_t flags, const char *filename,
   1629  1.1  christos 		  FILE *file) {
   1630  1.1  christos 	dns_journal_t *j = NULL;
   1631  1.1  christos 	isc_buffer_t source;   /* Transaction data from disk */
   1632  1.1  christos 	isc_buffer_t target;   /* Ditto after _fromwire check */
   1633  1.1  christos 	uint32_t start_serial; /* Database SOA serial */
   1634  1.1  christos 	uint32_t end_serial;   /* Last journal SOA serial */
   1635  1.1  christos 	isc_result_t result;
   1636  1.1  christos 	dns_diff_t diff;
   1637  1.1  christos 	unsigned int n_soa = 0;
   1638  1.1  christos 	unsigned int n_put = 0;
   1639  1.1  christos 	bool printxhdr = ((flags & DNS_JOURNAL_PRINTXHDR) != 0);
   1640  1.1  christos 
   1641  1.1  christos 	REQUIRE(filename != NULL);
   1642  1.1  christos 
   1643  1.1  christos 	result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j);
   1644  1.1  christos 	if (result == ISC_R_NOTFOUND) {
   1645  1.1  christos 		isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file");
   1646  1.1  christos 		return (DNS_R_NOJOURNAL);
   1647  1.1  christos 	} else if (result != ISC_R_SUCCESS) {
   1648  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1649  1.1  christos 			      "journal open failure: %s: %s",
   1650  1.1  christos 			      isc_result_totext(result), filename);
   1651  1.1  christos 		return (result);
   1652  1.1  christos 	}
   1653  1.1  christos 
   1654  1.1  christos 	if (printxhdr) {
   1655  1.1  christos 		fprintf(file, "Journal format = %sHeader version = %d\n",
   1656  1.1  christos 			j->header.format + 1, j->header_ver1 ? 1 : 2);
   1657  1.1  christos 		fprintf(file, "Start serial = %u\n", j->header.begin.serial);
   1658  1.1  christos 		fprintf(file, "End serial = %u\n", j->header.end.serial);
   1659  1.1  christos 		fprintf(file, "Index (size = %u):\n", j->header.index_size);
   1660  1.1  christos 		for (uint32_t i = 0; i < j->header.index_size; i++) {
   1661  1.1  christos 			if (j->index[i].offset == 0) {
   1662  1.1  christos 				fputc('\n', file);
   1663  1.1  christos 				break;
   1664  1.1  christos 			}
   1665  1.1  christos 			fprintf(file, "%lld", (long long)j->index[i].offset);
   1666  1.1  christos 			fputc((i + 1) % 8 == 0 ? '\n' : ' ', file);
   1667  1.1  christos 		}
   1668  1.1  christos 	}
   1669  1.1  christos 	if (j->header.serialset) {
   1670  1.1  christos 		fprintf(file, "Source serial = %u\n", j->header.sourceserial);
   1671  1.1  christos 	}
   1672  1.1  christos 	dns_diff_init(j->mctx, &diff);
   1673  1.1  christos 
   1674  1.1  christos 	/*
   1675  1.1  christos 	 * Set up empty initial buffers for unchecked and checked
   1676  1.1  christos 	 * wire format transaction data.  They will be reallocated
   1677  1.1  christos 	 * later.
   1678  1.1  christos 	 */
   1679  1.1  christos 	isc_buffer_init(&source, NULL, 0);
   1680  1.1  christos 	isc_buffer_init(&target, NULL, 0);
   1681  1.1  christos 
   1682  1.1  christos 	start_serial = dns_journal_first_serial(j);
   1683  1.1  christos 	end_serial = dns_journal_last_serial(j);
   1684  1.1  christos 
   1685  1.1  christos 	CHECK(dns_journal_iter_init(j, start_serial, end_serial, NULL));
   1686  1.1  christos 
   1687  1.1  christos 	for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS;
   1688  1.1  christos 	     result = dns_journal_next_rr(j))
   1689  1.1  christos 	{
   1690  1.1  christos 		dns_name_t *name = NULL;
   1691  1.1  christos 		dns_rdata_t *rdata = NULL;
   1692  1.1  christos 		dns_difftuple_t *tuple = NULL;
   1693  1.1  christos 		static uint32_t i = 0;
   1694  1.1  christos 		bool print = false;
   1695  1.1  christos 		uint32_t ttl;
   1696  1.1  christos 
   1697  1.1  christos 		dns_journal_current_rr(j, &name, &ttl, &rdata);
   1698  1.1  christos 
   1699  1.1  christos 		if (rdata->type == dns_rdatatype_soa) {
   1700  1.1  christos 			n_soa++;
   1701  1.1  christos 			if (n_soa == 3) {
   1702  1.1  christos 				n_soa = 1;
   1703  1.1  christos 			}
   1704  1.1  christos 			if (n_soa == 1) {
   1705  1.1  christos 				print = printxhdr;
   1706  1.1  christos 			}
   1707  1.1  christos 		}
   1708  1.1  christos 		if (n_soa == 0) {
   1709  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1710  1.1  christos 				      "%s: journal file corrupt: missing "
   1711  1.1  christos 				      "initial SOA",
   1712  1.1  christos 				      j->filename);
   1713  1.1  christos 			FAIL(ISC_R_UNEXPECTED);
   1714  1.1  christos 		}
   1715  1.1  christos 
   1716  1.1  christos 		if (print) {
   1717  1.1  christos 			fprintf(file,
   1718  1.1  christos 				"Transaction: version %d offset %lld size %u "
   1719  1.1  christos 				"rrcount %u start %u end %u\n",
   1720  1.1  christos 				j->xhdr_version, (long long)j->it.cpos.offset,
   1721  1.1  christos 				j->curxhdr.size, j->curxhdr.count,
   1722  1.1  christos 				j->curxhdr.serial0, j->curxhdr.serial1);
   1723  1.1  christos 			if (j->it.cpos.offset > j->index[i].offset) {
   1724  1.1  christos 				fprintf(file,
   1725  1.1  christos 					"ERROR: Offset mismatch, "
   1726  1.1  christos 					"expected %lld\n",
   1727  1.1  christos 					(long long)j->index[i].offset);
   1728  1.1  christos 			} else if (j->it.cpos.offset == j->index[i].offset) {
   1729  1.1  christos 				i++;
   1730  1.1  christos 			}
   1731  1.1  christos 		}
   1732  1.1  christos 		CHECK(dns_difftuple_create(
   1733  1.1  christos 			diff.mctx, n_soa == 1 ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD,
   1734  1.1  christos 			name, ttl, rdata, &tuple));
   1735  1.1  christos 		dns_diff_append(&diff, &tuple);
   1736  1.1  christos 
   1737  1.1  christos 		if (++n_put > 100 || printxhdr) {
   1738  1.1  christos 			result = dns_diff_print(&diff, file);
   1739  1.1  christos 			dns_diff_clear(&diff);
   1740  1.1  christos 			n_put = 0;
   1741  1.1  christos 			if (result != ISC_R_SUCCESS) {
   1742  1.1  christos 				break;
   1743  1.1  christos 			}
   1744  1.1  christos 		}
   1745  1.1  christos 	}
   1746  1.1  christos 	if (result == ISC_R_NOMORE) {
   1747  1.1  christos 		result = ISC_R_SUCCESS;
   1748  1.1  christos 	}
   1749  1.1  christos 	CHECK(result);
   1750  1.1  christos 
   1751  1.1  christos 	if (n_put != 0) {
   1752  1.1  christos 		result = dns_diff_print(&diff, file);
   1753  1.1  christos 		dns_diff_clear(&diff);
   1754  1.1  christos 	}
   1755  1.1  christos 	goto cleanup;
   1756  1.1  christos 
   1757  1.1  christos failure:
   1758  1.1  christos 	isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1759  1.1  christos 		      "%s: cannot print: journal file corrupt", j->filename);
   1760  1.1  christos 
   1761  1.1  christos cleanup:
   1762  1.1  christos 	if (source.base != NULL) {
   1763  1.1  christos 		isc_mem_put(j->mctx, source.base, source.length);
   1764  1.1  christos 	}
   1765  1.1  christos 	if (target.base != NULL) {
   1766  1.1  christos 		isc_mem_put(j->mctx, target.base, target.length);
   1767  1.1  christos 	}
   1768  1.1  christos 
   1769  1.1  christos 	dns_diff_clear(&diff);
   1770  1.1  christos 	dns_journal_destroy(&j);
   1771  1.1  christos 
   1772  1.1  christos 	return (result);
   1773  1.1  christos }
   1774  1.1  christos 
   1775  1.1  christos /**************************************************************************/
   1776  1.1  christos /*
   1777  1.1  christos  * Miscellaneous accessors.
   1778  1.1  christos  */
   1779  1.1  christos bool
   1780  1.1  christos dns_journal_empty(dns_journal_t *j) {
   1781  1.1  christos 	return (JOURNAL_EMPTY(&j->header));
   1782  1.1  christos }
   1783  1.1  christos 
   1784  1.1  christos bool
   1785  1.1  christos dns_journal_recovered(dns_journal_t *j) {
   1786  1.1  christos 	return (j->recovered);
   1787  1.1  christos }
   1788  1.1  christos 
   1789  1.1  christos uint32_t
   1790  1.1  christos dns_journal_first_serial(dns_journal_t *j) {
   1791  1.1  christos 	return (j->header.begin.serial);
   1792  1.1  christos }
   1793  1.1  christos 
   1794  1.1  christos uint32_t
   1795  1.1  christos dns_journal_last_serial(dns_journal_t *j) {
   1796  1.1  christos 	return (j->header.end.serial);
   1797  1.1  christos }
   1798  1.1  christos 
   1799  1.1  christos void
   1800  1.1  christos dns_journal_set_sourceserial(dns_journal_t *j, uint32_t sourceserial) {
   1801  1.1  christos 	REQUIRE(j->state == JOURNAL_STATE_WRITE ||
   1802  1.1  christos 		j->state == JOURNAL_STATE_INLINE ||
   1803  1.1  christos 		j->state == JOURNAL_STATE_TRANSACTION);
   1804  1.1  christos 
   1805  1.1  christos 	j->header.sourceserial = sourceserial;
   1806  1.1  christos 	j->header.serialset = true;
   1807  1.1  christos 	if (j->state == JOURNAL_STATE_WRITE) {
   1808  1.1  christos 		j->state = JOURNAL_STATE_INLINE;
   1809  1.1  christos 	}
   1810  1.1  christos }
   1811  1.1  christos 
   1812  1.1  christos bool
   1813  1.1  christos dns_journal_get_sourceserial(dns_journal_t *j, uint32_t *sourceserial) {
   1814  1.1  christos 	REQUIRE(sourceserial != NULL);
   1815  1.1  christos 
   1816  1.1  christos 	if (!j->header.serialset) {
   1817  1.1  christos 		return (false);
   1818  1.1  christos 	}
   1819  1.1  christos 	*sourceserial = j->header.sourceserial;
   1820  1.1  christos 	return (true);
   1821  1.1  christos }
   1822  1.1  christos 
   1823  1.1  christos /**************************************************************************/
   1824  1.1  christos /*
   1825  1.1  christos  * Iteration support.
   1826  1.1  christos  *
   1827  1.1  christos  * When serving an outgoing IXFR, we transmit a part the journal starting
   1828  1.1  christos  * at the serial number in the IXFR request and ending at the serial
   1829  1.1  christos  * number that is current when the IXFR request arrives.  The ending
   1830  1.1  christos  * serial number is not necessarily at the end of the journal:
   1831  1.1  christos  * the journal may grow while the IXFR is in progress, but we stop
   1832  1.1  christos  * when we reach the serial number that was current when the IXFR started.
   1833  1.1  christos  */
   1834  1.1  christos 
   1835  1.1  christos static isc_result_t
   1836  1.1  christos read_one_rr(dns_journal_t *j);
   1837  1.1  christos 
   1838  1.1  christos /*
   1839  1.1  christos  * Make sure the buffer 'b' is has at least 'size' bytes
   1840  1.1  christos  * allocated, and clear it.
   1841  1.1  christos  *
   1842  1.1  christos  * Requires:
   1843  1.1  christos  *	Either b->base is NULL, or it points to b->length bytes of memory
   1844  1.1  christos  *	previously allocated by isc_mem_get().
   1845  1.1  christos  */
   1846  1.1  christos 
   1847  1.1  christos static isc_result_t
   1848  1.1  christos size_buffer(isc_mem_t *mctx, isc_buffer_t *b, unsigned size) {
   1849  1.1  christos 	if (b->length < size) {
   1850  1.1  christos 		void *mem = isc_mem_get(mctx, size);
   1851  1.1  christos 		if (mem == NULL) {
   1852  1.1  christos 			return (ISC_R_NOMEMORY);
   1853  1.1  christos 		}
   1854  1.1  christos 		if (b->base != NULL) {
   1855  1.1  christos 			isc_mem_put(mctx, b->base, b->length);
   1856  1.1  christos 		}
   1857  1.1  christos 		b->base = mem;
   1858  1.1  christos 		b->length = size;
   1859  1.1  christos 	}
   1860  1.1  christos 	isc_buffer_clear(b);
   1861  1.1  christos 	return (ISC_R_SUCCESS);
   1862  1.1  christos }
   1863  1.1  christos 
   1864  1.1  christos isc_result_t
   1865  1.1  christos dns_journal_iter_init(dns_journal_t *j, uint32_t begin_serial,
   1866  1.1  christos 		      uint32_t end_serial, size_t *xfrsizep) {
   1867  1.1  christos 	isc_result_t result;
   1868  1.1  christos 
   1869  1.1  christos 	CHECK(journal_find(j, begin_serial, &j->it.bpos));
   1870  1.1  christos 	INSIST(j->it.bpos.serial == begin_serial);
   1871  1.1  christos 
   1872  1.1  christos 	CHECK(journal_find(j, end_serial, &j->it.epos));
   1873  1.1  christos 	INSIST(j->it.epos.serial == end_serial);
   1874  1.1  christos 
   1875  1.1  christos 	if (xfrsizep != NULL) {
   1876  1.1  christos 		journal_pos_t pos = j->it.bpos;
   1877  1.1  christos 		journal_xhdr_t xhdr;
   1878  1.1  christos 		uint64_t size = 0;
   1879  1.1  christos 		uint32_t count = 0;
   1880  1.1  christos 
   1881  1.1  christos 		/*
   1882  1.1  christos 		 * We already know the beginning and ending serial
   1883  1.1  christos 		 * numbers are in the journal. Scan through them,
   1884  1.1  christos 		 * adding up sizes and RR counts so we can calculate
   1885  1.1  christos 		 * the IXFR size.
   1886  1.1  christos 		 */
   1887  1.1  christos 		do {
   1888  1.1  christos 			CHECK(journal_seek(j, pos.offset));
   1889  1.1  christos 			CHECK(journal_read_xhdr(j, &xhdr));
   1890  1.1  christos 
   1891  1.1  christos 			if (j->header_ver1) {
   1892  1.1  christos 				CHECK(maybe_fixup_xhdr(j, &xhdr, pos.serial,
   1893  1.1  christos 						       pos.offset));
   1894  1.1  christos 			}
   1895  1.1  christos 
   1896  1.1  christos 			/*
   1897  1.1  christos 			 * Check that xhdr is consistent.
   1898  1.1  christos 			 */
   1899  1.1  christos 			if (xhdr.serial0 != pos.serial ||
   1900  1.1  christos 			    isc_serial_le(xhdr.serial1, xhdr.serial0))
   1901  1.1  christos 			{
   1902  1.1  christos 				CHECK(ISC_R_UNEXPECTED);
   1903  1.1  christos 			}
   1904  1.1  christos 
   1905  1.1  christos 			size += xhdr.size;
   1906  1.1  christos 			count += xhdr.count;
   1907  1.1  christos 
   1908  1.1  christos 			result = journal_next(j, &pos);
   1909  1.1  christos 			if (result == ISC_R_NOMORE) {
   1910  1.1  christos 				result = ISC_R_SUCCESS;
   1911  1.1  christos 			}
   1912  1.1  christos 			CHECK(result);
   1913  1.1  christos 		} while (pos.serial != end_serial);
   1914  1.1  christos 
   1915  1.1  christos 		/*
   1916  1.1  christos 		 * For each RR, subtract the length of the RR header,
   1917  1.1  christos 		 * as this would not be present in IXFR messages.
   1918  1.1  christos 		 * (We don't need to worry about the transaction header
   1919  1.1  christos 		 * because that was already excluded from xdr.size.)
   1920  1.1  christos 		 */
   1921  1.1  christos 		*xfrsizep = size - (count * sizeof(journal_rawrrhdr_t));
   1922  1.1  christos 	}
   1923  1.1  christos 
   1924  1.1  christos 	result = ISC_R_SUCCESS;
   1925  1.1  christos failure:
   1926  1.1  christos 	j->it.result = result;
   1927  1.1  christos 	return (j->it.result);
   1928  1.1  christos }
   1929  1.1  christos 
   1930  1.1  christos isc_result_t
   1931  1.1  christos dns_journal_first_rr(dns_journal_t *j) {
   1932  1.1  christos 	isc_result_t result;
   1933  1.1  christos 
   1934  1.1  christos 	/*
   1935  1.1  christos 	 * Seek to the beginning of the first transaction we are
   1936  1.1  christos 	 * interested in.
   1937  1.1  christos 	 */
   1938  1.1  christos 	CHECK(journal_seek(j, j->it.bpos.offset));
   1939  1.1  christos 	j->it.current_serial = j->it.bpos.serial;
   1940  1.1  christos 
   1941  1.1  christos 	j->it.xsize = 0; /* We have no transaction data yet... */
   1942  1.1  christos 	j->it.xpos = 0;	 /* ...and haven't used any of it. */
   1943  1.1  christos 
   1944  1.1  christos 	return (read_one_rr(j));
   1945  1.1  christos 
   1946  1.1  christos failure:
   1947  1.1  christos 	return (result);
   1948  1.1  christos }
   1949  1.1  christos 
   1950  1.1  christos static isc_result_t
   1951  1.1  christos read_one_rr(dns_journal_t *j) {
   1952  1.1  christos 	isc_result_t result;
   1953  1.1  christos 	dns_rdatatype_t rdtype;
   1954  1.1  christos 	dns_rdataclass_t rdclass;
   1955  1.1  christos 	unsigned int rdlen;
   1956  1.1  christos 	uint32_t ttl;
   1957  1.1  christos 	journal_xhdr_t xhdr;
   1958  1.1  christos 	journal_rrhdr_t rrhdr;
   1959  1.1  christos 	dns_journal_t save = *j;
   1960  1.1  christos 
   1961  1.1  christos 	if (j->offset > j->it.epos.offset) {
   1962  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1963  1.1  christos 			      "%s: journal corrupt: possible integer overflow",
   1964  1.1  christos 			      j->filename);
   1965  1.1  christos 		return (ISC_R_UNEXPECTED);
   1966  1.1  christos 	}
   1967  1.1  christos 	if (j->offset == j->it.epos.offset) {
   1968  1.1  christos 		return (ISC_R_NOMORE);
   1969  1.1  christos 	}
   1970  1.1  christos 	if (j->it.xpos == j->it.xsize) {
   1971  1.1  christos 		/*
   1972  1.1  christos 		 * We are at a transaction boundary.
   1973  1.1  christos 		 * Read another transaction header.
   1974  1.1  christos 		 */
   1975  1.1  christos 		CHECK(journal_read_xhdr(j, &xhdr));
   1976  1.1  christos 		if (xhdr.size == 0) {
   1977  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1978  1.1  christos 				      "%s: journal corrupt: empty transaction",
   1979  1.1  christos 				      j->filename);
   1980  1.1  christos 			FAIL(ISC_R_UNEXPECTED);
   1981  1.1  christos 		}
   1982  1.1  christos 
   1983  1.1  christos 		if (j->header_ver1) {
   1984  1.1  christos 			CHECK(maybe_fixup_xhdr(j, &xhdr, j->it.current_serial,
   1985  1.1  christos 					       save.offset));
   1986  1.1  christos 		}
   1987  1.1  christos 
   1988  1.1  christos 		if (xhdr.serial0 != j->it.current_serial ||
   1989  1.1  christos 		    isc_serial_le(xhdr.serial1, xhdr.serial0))
   1990  1.1  christos 		{
   1991  1.1  christos 			isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   1992  1.1  christos 				      "%s: journal file corrupt: "
   1993  1.1  christos 				      "expected serial %u, got %u",
   1994  1.1  christos 				      j->filename, j->it.current_serial,
   1995  1.1  christos 				      xhdr.serial0);
   1996  1.1  christos 			FAIL(ISC_R_UNEXPECTED);
   1997  1.1  christos 		}
   1998  1.1  christos 
   1999  1.1  christos 		j->it.xsize = xhdr.size;
   2000  1.1  christos 		j->it.xpos = 0;
   2001  1.1  christos 	}
   2002  1.1  christos 	/*
   2003  1.1  christos 	 * Read an RR.
   2004  1.1  christos 	 */
   2005  1.1  christos 	CHECK(journal_read_rrhdr(j, &rrhdr));
   2006  1.1  christos 	/*
   2007  1.1  christos 	 * Perform a sanity check on the journal RR size.
   2008  1.1  christos 	 * The smallest possible RR has a 1-byte owner name
   2009  1.1  christos 	 * and a 10-byte header.  The largest possible
   2010  1.1  christos 	 * RR has 65535 bytes of data, a header, and a maximum-
   2011  1.1  christos 	 * size owner name, well below 70 k total.
   2012  1.1  christos 	 */
   2013  1.1  christos 	if (rrhdr.size < 1 + 10 || rrhdr.size > 70000) {
   2014  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   2015  1.1  christos 			      "%s: journal corrupt: impossible RR size "
   2016  1.1  christos 			      "(%d bytes)",
   2017  1.1  christos 			      j->filename, rrhdr.size);
   2018  1.1  christos 		FAIL(ISC_R_UNEXPECTED);
   2019  1.1  christos 	}
   2020  1.1  christos 
   2021  1.1  christos 	CHECK(size_buffer(j->mctx, &j->it.source, rrhdr.size));
   2022  1.1  christos 	CHECK(journal_read(j, j->it.source.base, rrhdr.size));
   2023  1.1  christos 	isc_buffer_add(&j->it.source, rrhdr.size);
   2024  1.1  christos 
   2025  1.1  christos 	/*
   2026  1.1  christos 	 * The target buffer is made the same size
   2027  1.1  christos 	 * as the source buffer, with the assumption that when
   2028  1.1  christos 	 * no compression in present, the output of dns_*_fromwire()
   2029  1.1  christos 	 * is no larger than the input.
   2030  1.1  christos 	 */
   2031  1.1  christos 	CHECK(size_buffer(j->mctx, &j->it.target, rrhdr.size));
   2032  1.1  christos 
   2033  1.1  christos 	/*
   2034  1.1  christos 	 * Parse the owner name.  We don't know where it
   2035  1.1  christos 	 * ends yet, so we make the entire "remaining"
   2036  1.1  christos 	 * part of the buffer "active".
   2037  1.1  christos 	 */
   2038  1.1  christos 	isc_buffer_setactive(&j->it.source,
   2039  1.1  christos 			     j->it.source.used - j->it.source.current);
   2040  1.1  christos 	CHECK(dns_name_fromwire(&j->it.name, &j->it.source, &j->it.dctx, 0,
   2041  1.1  christos 				&j->it.target));
   2042  1.1  christos 
   2043  1.1  christos 	/*
   2044  1.1  christos 	 * Check that the RR header is there, and parse it.
   2045  1.1  christos 	 */
   2046  1.1  christos 	if (isc_buffer_remaininglength(&j->it.source) < 10) {
   2047  1.1  christos 		FAIL(DNS_R_FORMERR);
   2048  1.1  christos 	}
   2049  1.1  christos 
   2050  1.1  christos 	rdtype = isc_buffer_getuint16(&j->it.source);
   2051  1.1  christos 	rdclass = isc_buffer_getuint16(&j->it.source);
   2052  1.1  christos 	ttl = isc_buffer_getuint32(&j->it.source);
   2053  1.1  christos 	rdlen = isc_buffer_getuint16(&j->it.source);
   2054  1.1  christos 
   2055  1.1  christos 	if (rdlen > DNS_RDATA_MAXLENGTH) {
   2056  1.1  christos 		isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
   2057  1.1  christos 			      "%s: journal corrupt: impossible rdlen "
   2058  1.1  christos 			      "(%u bytes)",
   2059  1.1  christos 			      j->filename, rdlen);
   2060  1.1  christos 		FAIL(ISC_R_FAILURE);
   2061  1.1  christos 	}
   2062  1.1  christos 
   2063  1.1  christos 	/*
   2064  1.1  christos 	 * Parse the rdata.
   2065  1.1  christos 	 */
   2066  1.1  christos 	if (isc_buffer_remaininglength(&j->it.source) != rdlen) {
   2067  1.1  christos 		FAIL(DNS_R_FORMERR);
   2068  1.1  christos 	}
   2069  1.1  christos 	isc_buffer_setactive(&j->it.source, rdlen);
   2070  1.1  christos 	dns_rdata_reset(&j->it.rdata);
   2071  1.1  christos 	CHECK(dns_rdata_fromwire(&j->it.rdata, rdclass, rdtype, &j->it.source,
   2072  1.1  christos 				 &j->it.dctx, 0, &j->it.target));
   2073  1.1  christos 	j->it.ttl = ttl;
   2074  1.1  christos 
   2075  1.1  christos 	j->it.xpos += sizeof(journal_rawrrhdr_t) + rrhdr.size;
   2076  1.1  christos 	if (rdtype == dns_rdatatype_soa) {
   2077  1.1  christos 		/* XXX could do additional consistency checks here */
   2078  1.1  christos 		j->it.current_serial = dns_soa_getserial(&j->it.rdata);
   2079  1.1  christos 	}
   2080  1.1  christos 
   2081  1.1  christos 	result = ISC_R_SUCCESS;
   2082  1.1  christos 
   2083  1.1  christos failure:
   2084  1.1  christos 	j->it.result = result;
   2085  1.1  christos 	return (result);
   2086  1.1  christos }
   2087  1.1  christos 
   2088  1.1  christos isc_result_t
   2089  1.1  christos dns_journal_next_rr(dns_journal_t *j) {
   2090  1.1  christos 	j->it.result = read_one_rr(j);
   2091  1.1  christos 	return (j->it.result);
   2092  1.1  christos }
   2093  1.1  christos 
   2094  1.1  christos void
   2095  1.1  christos dns_journal_current_rr(dns_journal_t *j, dns_name_t **name, uint32_t *ttl,
   2096  1.1  christos 		       dns_rdata_t **rdata) {
   2097  1.1  christos 	REQUIRE(j->it.result == ISC_R_SUCCESS);
   2098  1.1  christos 	*name = &j->it.name;
   2099  1.1  christos 	*ttl = j->it.ttl;
   2100  1.1  christos 	*rdata = &j->it.rdata;
   2101  1.1  christos }
   2102  1.1  christos 
   2103  1.1  christos /**************************************************************************/
   2104  1.1  christos /*
   2105  1.1  christos  * Generating diffs from databases
   2106  1.1  christos  */
   2107  1.1  christos 
   2108  1.1  christos /*
   2109  1.1  christos  * Construct a diff containing all the RRs at the current name of the
   2110  1.1  christos  * database iterator 'dbit' in database 'db', version 'ver'.
   2111  1.1  christos  * Set '*name' to the current name, and append the diff to 'diff'.
   2112  1.1  christos  * All new tuples will have the operation 'op'.
   2113  1.1  christos  *
   2114  1.1  christos  * Requires: 'name' must have buffer large enough to hold the name.
   2115  1.1  christos  * Typically, a dns_fixedname_t would be used.
   2116  1.1  christos  */
   2117  1.1  christos static isc_result_t
   2118  1.1  christos get_name_diff(dns_db_t *db, dns_dbversion_t *ver, isc_stdtime_t now,
   2119  1.1  christos 	      dns_dbiterator_t *dbit, dns_name_t *name, dns_diffop_t op,
   2120  1.1  christos 	      dns_diff_t *diff) {
   2121  1.1  christos 	isc_result_t result;
   2122  1.1  christos 	dns_dbnode_t *node = NULL;
   2123  1.1  christos 	dns_rdatasetiter_t *rdsiter = NULL;
   2124  1.1  christos 	dns_difftuple_t *tuple = NULL;
   2125  1.1  christos 
   2126  1.1  christos 	result = dns_dbiterator_current(dbit, &node, name);
   2127  1.1  christos 	if (result != ISC_R_SUCCESS) {
   2128  1.1  christos 		return (result);
   2129  1.1  christos 	}
   2130  1.1  christos 
   2131  1.1  christos 	result = dns_db_allrdatasets(db, node, ver, 0, now, &rdsiter);
   2132  1.1  christos 	if (result != ISC_R_SUCCESS) {
   2133  1.1  christos 		goto cleanup_node;
   2134  1.1  christos 	}
   2135  1.1  christos 
   2136  1.1  christos 	for (result = dns_rdatasetiter_first(rdsiter); result == ISC_R_SUCCESS;
   2137  1.1  christos 	     result = dns_rdatasetiter_next(rdsiter))
   2138  1.1  christos 	{
   2139  1.1  christos 		dns_rdataset_t rdataset;
   2140  1.1  christos 
   2141  1.1  christos 		dns_rdataset_init(&rdataset);
   2142  1.1  christos 		dns_rdatasetiter_current(rdsiter, &rdataset);
   2143  1.1  christos 
   2144  1.1  christos 		for (result = dns_rdataset_first(&rdataset);
   2145  1.1  christos 		     result == ISC_R_SUCCESS;
   2146  1.1  christos 		     result = dns_rdataset_next(&rdataset))
   2147  1.1  christos 		{
   2148  1.1  christos 			dns_rdata_t rdata = DNS_RDATA_INIT;
   2149  1.1  christos 			dns_rdataset_current(&rdataset, &rdata);
   2150  1.1  christos 			result = dns_difftuple_create(diff->mctx, op, name,
   2151  1.1  christos 						      rdataset.ttl, &rdata,
   2152  1.1  christos 						      &tuple);
   2153  1.1  christos 			if (result != ISC_R_SUCCESS) {
   2154  1.1  christos 				dns_rdataset_disassociate(&rdataset);
   2155  1.1  christos 				goto cleanup_iterator;
   2156  1.1  christos 			}
   2157  1.1  christos 			dns_diff_append(diff, &tuple);
   2158  1.1  christos 		}
   2159  1.1  christos 		dns_rdataset_disassociate(&rdataset);
   2160  1.1  christos 		if (result != ISC_R_NOMORE) {
   2161  1.1  christos 			goto cleanup_iterator;
   2162  1.1  christos 		}
   2163  1.1  christos 	}
   2164  1.1  christos 	if (result != ISC_R_NOMORE) {
   2165  1.1  christos 		goto cleanup_iterator;
   2166  1.1  christos 	}
   2167  1.1  christos 
   2168  1.1  christos 	result = ISC_R_SUCCESS;
   2169  1.1  christos 
   2170  1.1  christos cleanup_iterator:
   2171  1.1  christos 	dns_rdatasetiter_destroy(&rdsiter);
   2172  1.1  christos 
   2173  1.1  christos cleanup_node:
   2174  1.1  christos 	dns_db_detachnode(db, &node);
   2175  1.1  christos 
   2176  1.1  christos 	return (result);
   2177  1.1  christos }
   2178  1.1  christos 
   2179  1.1  christos /*
   2180  1.1  christos  * Comparison function for use by dns_diff_subtract when sorting
   2181  1.1  christos  * the diffs to be subtracted.  The sort keys are the rdata type
   2182  1.1  christos  * and the rdata itself.  The owner name is ignored, because
   2183  1.1  christos  * it is known to be the same for all tuples.
   2184  1.1  christos  */
   2185  1.1  christos static int
   2186  1.1  christos rdata_order(const void *av, const void *bv) {
   2187  1.1  christos 	dns_difftuple_t const *const *ap = av;
   2188  1.1  christos 	dns_difftuple_t const *const *bp = bv;
   2189  1.1  christos 	dns_difftuple_t const *a = *ap;
   2190  1.1  christos 	dns_difftuple_t const *b = *bp;
   2191  1.1  christos 	int r;
   2192  1.1  christos 	r = (b->rdata.type - a->rdata.type);
   2193  1.1  christos 	if (r != 0) {
   2194  1.1  christos 		return (r);
   2195  1.1  christos 	}
   2196  1.1  christos 	r = dns_rdata_compare(&a->rdata, &b->rdata);
   2197  1.1  christos 	return (r);
   2198  1.1  christos }
   2199  1.1  christos 
   2200  1.1  christos static isc_result_t
   2201  1.1  christos dns_diff_subtract(dns_diff_t diff[2], dns_diff_t *r) {
   2202  1.1  christos 	isc_result_t result;
   2203  1.1  christos 	dns_difftuple_t *p[2];
   2204  1.1  christos 	int i, t;
   2205  1.1  christos 	bool append;
   2206  1.1  christos 	dns_difftuplelist_t add, del;
   2207  1.1  christos 
   2208  1.1  christos 	CHECK(dns_diff_sort(&diff[0], rdata_order));
   2209  1.1  christos 	CHECK(dns_diff_sort(&diff[1], rdata_order));
   2210  1.1  christos 	ISC_LIST_INIT(add);
   2211  1.1  christos 	ISC_LIST_INIT(del);
   2212  1.1  christos 
   2213  1.1  christos 	for (;;) {
   2214  1.1  christos 		p[0] = ISC_LIST_HEAD(diff[0].tuples);
   2215  1.1  christos 		p[1] = ISC_LIST_HEAD(diff[1].tuples);
   2216  1.1  christos 		if (p[0] == NULL && p[1] == NULL) {
   2217  1.1  christos 			break;
   2218  1.1  christos 		}
   2219  1.1  christos 
   2220  1.1  christos 		for (i = 0; i < 2; i++) {
   2221  1.1  christos 			if (p[!i] == NULL) {
   2222  1.1  christos 				dns_difftuplelist_t *l = (i == 0) ? &add : &del;
   2223  1.1  christos 				ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
   2224  1.1  christos 				ISC_LIST_APPEND(*l, p[i], link);
   2225  1.1  christos 				goto next;
   2226  1.1  christos 			}
   2227  1.1  christos 		}
   2228  1.1  christos 		t = rdata_order(&p[0], &p[1]);
   2229  1.1  christos 		if (t < 0) {
   2230  1.1  christos 			ISC_LIST_UNLINK(diff[0].tuples, p[0], link);
   2231  1.1  christos 			ISC_LIST_APPEND(add, p[0], link);
   2232  1.1  christos 			goto next;
   2233  1.1  christos 		}
   2234  1.1  christos 		if (t > 0) {
   2235  1.1  christos 			ISC_LIST_UNLINK(diff[1].tuples, p[1], link);
   2236  1.1  christos 			ISC_LIST_APPEND(del, p[1], link);
   2237  1.1  christos 			goto next;
   2238  1.1  christos 		}
   2239  1.1  christos 		INSIST(t == 0);
   2240  1.1  christos 		/*
   2241  1.1  christos 		 * Identical RRs in both databases; skip them both
   2242  1.1  christos 		 * if the ttl differs.
   2243  1.1  christos 		 */
   2244  1.1  christos 		append = (p[0]->ttl != p[1]->ttl);
   2245  1.1  christos 		for (i = 0; i < 2; i++) {
   2246  1.1  christos 			ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
   2247  1.1  christos 			if (append) {
   2248  1.1  christos 				dns_difftuplelist_t *l = (i == 0) ? &add : &del;
   2249  1.1  christos 				ISC_LIST_APPEND(*l, p[i], link);
   2250  1.1  christos 			} else {
   2251  1.1  christos 				dns_difftuple_free(&p[i]);
   2252  1.1  christos 			}
   2253  1.1  christos 		}
   2254  1.1  christos 	next:;
   2255  1.1  christos 	}
   2256  1.1  christos 	ISC_LIST_APPENDLIST(r->tuples, del, link);
   2257  1.1  christos 	ISC_LIST_APPENDLIST(r->tuples, add, link);
   2258  1.1  christos 	result = ISC_R_SUCCESS;
   2259  1.1  christos failure:
   2260  1.1  christos 	return (result);
   2261  1.1  christos }
   2262  1.1  christos 
   2263  1.1  christos static isc_result_t
   2264  1.1  christos diff_namespace(dns_db_t *dba, dns_dbversion_t *dbvera, dns_db_t *dbb,
   2265  1.1  christos 	       dns_dbversion_t *dbverb, unsigned int options,
   2266  1.1  christos 	       dns_diff_t *resultdiff) {
   2267  1.1  christos 	dns_db_t *db[2];
   2268  1.1  christos 	dns_dbversion_t *ver[2];
   2269  1.1  christos 	dns_dbiterator_t *dbit[2] = { NULL, NULL };
   2270  1.1  christos 	bool have[2] = { false, false };
   2271  1.1  christos 	dns_fixedname_t fixname[2];
   2272  1.1  christos 	isc_result_t result, itresult[2];
   2273  1.1  christos 	dns_diff_t diff[2];
   2274  1.1  christos 	int i, t;
   2275  1.1  christos 
   2276  1.1  christos 	db[0] = dba, db[1] = dbb;
   2277  1.1  christos 	ver[0] = dbvera, ver[1] = dbverb;
   2278  1.1  christos 
   2279  1.1  christos 	dns_diff_init(resultdiff->mctx, &diff[0]);
   2280  1.1  christos 	dns_diff_init(resultdiff->mctx, &diff[1]);
   2281  1.1  christos 
   2282  1.1  christos 	dns_fixedname_init(&fixname[0]);
   2283  1.1  christos 	dns_fixedname_init(&fixname[1]);
   2284  1.1  christos 
   2285  1.1  christos 	result = dns_db_createiterator(db[0], options, &dbit[0]);
   2286  1.1  christos 	if (result != ISC_R_SUCCESS) {
   2287  1.1  christos 		return (result);
   2288  1.1  christos 	}
   2289  1.1  christos 	result = dns_db_createiterator(db[1], options, &dbit[1]);
   2290  1.1  christos 	if (result != ISC_R_SUCCESS) {
   2291  1.1  christos 		goto cleanup_iterator;
   2292  1.1  christos 	}
   2293  1.1  christos 
   2294  1.1  christos 	itresult[0] = dns_dbiterator_first(dbit[0]);
   2295  1.1  christos 	itresult[1] = dns_dbiterator_first(dbit[1]);
   2296  1.1  christos 
   2297  1.1  christos 	for (;;) {
   2298  1.1  christos 		for (i = 0; i < 2; i++) {
   2299  1.1  christos 			if (!have[i] && itresult[i] == ISC_R_SUCCESS) {
   2300  1.1  christos 				CHECK(get_name_diff(
   2301  1.1  christos 					db[i], ver[i], 0, dbit[i],
   2302  1.1  christos 					dns_fixedname_name(&fixname[i]),
   2303  1.1  christos 					i == 0 ? DNS_DIFFOP_ADD
   2304  1.1  christos 					       : DNS_DIFFOP_DEL,
   2305  1.1  christos 					&diff[i]));
   2306  1.1  christos 				itresult[i] = dns_dbiterator_next(dbit[i]);
   2307  1.1  christos 				have[i] = true;
   2308  1.1  christos 			}
   2309  1.1  christos 		}
   2310  1.1  christos 
   2311  1.1  christos 		if (!have[0] && !have[1]) {
   2312  1.1  christos 			INSIST(ISC_LIST_EMPTY(diff[0].tuples));
   2313  1.1  christos 			INSIST(ISC_LIST_EMPTY(diff[1].tuples));
   2314  1.1  christos 			break;
   2315  1.1  christos 		}
   2316  1.1  christos 
   2317  1.1  christos 		for (i = 0; i < 2; i++) {
   2318  1.1  christos 			if (!have[!i]) {
   2319  1.1  christos 				ISC_LIST_APPENDLIST(resultdiff->tuples,
   2320  1.1  christos 						    diff[i].tuples, link);
   2321  1.1  christos 				INSIST(ISC_LIST_EMPTY(diff[i].tuples));
   2322  1.1  christos 				have[i] = false;
   2323  1.1  christos 				goto next;
   2324  1.1  christos 			}
   2325  1.1  christos 		}
   2326  1.1  christos 
   2327  1.1  christos 		t = dns_name_compare(dns_fixedname_name(&fixname[0]),
   2328  1.1  christos 				     dns_fixedname_name(&fixname[1]));
   2329  1.1  christos 		if (t < 0) {
   2330  1.1  christos 			ISC_LIST_APPENDLIST(resultdiff->tuples, diff[0].tuples,
   2331  1.1  christos 					    link);
   2332  1.1  christos 			INSIST(ISC_LIST_EMPTY(diff[0].tuples));
   2333  1.1  christos 			have[0] = false;
   2334  1.1  christos 			continue;
   2335  1.1  christos 		}
   2336  1.1  christos 		if (t > 0) {
   2337  1.1  christos 			ISC_LIST_APPENDLIST(resultdiff->tuples, diff[1].tuples,
   2338  1.1  christos 					    link);
   2339  1.1  christos 			INSIST(ISC_LIST_EMPTY(diff[1].tuples));
   2340  1.1  christos 			have[1] = false;
   2341  1.1  christos 			continue;
   2342  1.1  christos 		}
   2343  1.1  christos 		INSIST(t == 0);
   2344  1.1  christos 		CHECK(dns_diff_subtract(diff, resultdiff));
   2345  1.1  christos 		INSIST(ISC_LIST_EMPTY(diff[0].tuples));
   2346  1.1  christos 		INSIST(ISC_LIST_EMPTY(diff[1].tuples));
   2347  1.1  christos 		have[0] = have[1] = false;
   2348  1.1  christos 	next:;
   2349  1.1  christos 	}
   2350  1.1  christos 	if (itresult[0] != ISC_R_NOMORE) {
   2351  1.1  christos 		FAIL(itresult[0]);
   2352  1.1  christos 	}
   2353  1.1  christos 	if (itresult[1] != ISC_R_NOMORE) {
   2354  1.1  christos 		FAIL(itresult[1]);
   2355  1.1  christos 	}
   2356  1.1  christos 
   2357  1.1  christos 	INSIST(ISC_LIST_EMPTY(diff[0].tuples));
   2358  1.1  christos 	INSIST(ISC_LIST_EMPTY(diff[1].tuples));
   2359  1.1  christos 
   2360  1.1  christos failure:
   2361  1.1  christos 	dns_dbiterator_destroy(&dbit[1]);
   2362  1.1  christos 
   2363  1.1  christos cleanup_iterator:
   2364  1.1  christos 	dns_dbiterator_destroy(&dbit[0]);
   2365  1.1  christos 	dns_diff_clear(&diff[0]);
   2366  1.1  christos 	dns_diff_clear(&diff[1]);
   2367  1.1  christos 	return (result);
   2368  1.1  christos }
   2369  1.1  christos 
   2370  1.1  christos /*
   2371  1.1  christos  * Compare the databases 'dba' and 'dbb' and generate a journal
   2372  1.1  christos  * entry containing the changes to make 'dba' from 'dbb' (note
   2373  1.1  christos  * the order).  This journal entry will consist of a single,
   2374  1.1  christos  * possibly very large transaction.
   2375  1.1  christos  */
   2376  1.1  christos isc_result_t
   2377  1.1  christos dns_db_diff(isc_mem_t *mctx, dns_db_t *dba, dns_dbversion_t *dbvera,
   2378  1.1  christos 	    dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename) {
   2379  1.1  christos 	isc_result_t result;
   2380  1.1  christos 	dns_diff_t diff;
   2381  1.1  christos 
   2382  1.1  christos 	dns_diff_init(mctx, &diff);
   2383  1.1  christos 
   2384  1.1  christos 	result = dns_db_diffx(&diff, dba, dbvera, dbb, dbverb, filename);
   2385  1.1  christos 
   2386  1.1  christos 	dns_diff_clear(&diff);
   2387  1.1  christos 
   2388  1.1  christos 	return (result);
   2389  1.1  christos }
   2390  1.1  christos 
   2391  1.1  christos isc_result_t
   2392  1.1  christos dns_db_diffx(dns_diff_t *diff, dns_db_t *dba, dns_dbversion_t *dbvera,
   2393  1.1  christos 	     dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename) {
   2394  1.1  christos 	isc_result_t result;
   2395  1.1  christos 	dns_journal_t *journal = NULL;
   2396  1.1  christos 
   2397  1.1  christos 	if (filename != NULL) {
   2398  1.1  christos 		result = dns_journal_open(diff->mctx, filename,
   2399  1.1  christos 					  DNS_JOURNAL_CREATE, &journal);
   2400  1.1  christos 		if (result != ISC_R_SUCCESS) {
   2401  1.1  christos 			return (result);
   2402  1.1  christos 		}
   2403  1.1  christos 	}
   2404  1.1  christos 
   2405  1.1  christos 	CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NONSEC3, diff));
   2406  1.1  christos 	CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NSEC3ONLY, diff));
   2407  1.1  christos 
   2408  1.1  christos 	if (journal != NULL) {
   2409  1.1  christos 		if (ISC_LIST_EMPTY(diff->tuples)) {
   2410  1.1  christos 			isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes");
   2411  1.1  christos 		} else {
   2412  1.1  christos 			CHECK(dns_journal_write_transaction(journal, diff));
   2413  1.1  christos 		}
   2414  1.1  christos 	}
   2415  1.1  christos 
   2416  1.1  christos failure:
   2417  1.1  christos 	if (journal != NULL) {
   2418  1.1  christos 		dns_journal_destroy(&journal);
   2419  1.1  christos 	}
   2420  1.1  christos 	return (result);
   2421  1.1  christos }
   2422  1.1  christos 
   2423  1.1  christos static uint32_t
   2424  1.1  christos rrcount(unsigned char *buf, unsigned int size) {
   2425  1.1  christos 	isc_buffer_t b;
   2426  1.1  christos 	uint32_t rrsize, count = 0;
   2427  1.1  christos 
   2428  1.1  christos 	isc_buffer_init(&b, buf, size);
   2429  1.1  christos 	isc_buffer_add(&b, size);
   2430  1.1  christos 	while (isc_buffer_remaininglength(&b) > 0) {
   2431  1.1  christos 		rrsize = isc_buffer_getuint32(&b);
   2432  1.1  christos 		INSIST(isc_buffer_remaininglength(&b) >= rrsize);
   2433  1.1  christos 		isc_buffer_forward(&b, rrsize);
   2434  1.1  christos 		count++;
   2435  1.1  christos 	}
   2436  1.1  christos 
   2437  1.1  christos 	return (count);
   2438  1.1  christos }
   2439  1.1  christos 
   2440  1.1  christos static bool
   2441  1.1  christos check_delta(unsigned char *buf, size_t size) {
   2442  1.1  christos 	isc_buffer_t b;
   2443  1.1  christos 	uint32_t rrsize;
   2444  1.1  christos 
   2445  1.1  christos 	isc_buffer_init(&b, buf, size);
   2446  1.1  christos 	isc_buffer_add(&b, size);
   2447  1.1  christos 	while (isc_buffer_remaininglength(&b) > 0) {
   2448  1.1  christos 		if (isc_buffer_remaininglength(&b) < 4) {
   2449  1.1  christos 			return (false);
   2450  1.1  christos 		}
   2451  1.1  christos 		rrsize = isc_buffer_getuint32(&b);
   2452  1.1  christos 		/* "." + type + class + ttl + rdlen => 11U */
   2453  1.1  christos 		if (rrsize < 11U || isc_buffer_remaininglength(&b) < rrsize) {
   2454  1.1  christos 			return (false);
   2455  1.1  christos 		}
   2456  1.1  christos 		isc_buffer_forward(&b, rrsize);
   2457  1.1  christos 	}
   2458  1.1  christos 
   2459  1.1  christos 	return (true);
   2460  1.1  christos }
   2461  1.1  christos 
   2462  1.1  christos isc_result_t
   2463  1.1  christos dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial,
   2464  1.1  christos 		    uint32_t flags, uint32_t target_size) {
   2465  1.1  christos 	unsigned int i;
   2466  1.1  christos 	journal_pos_t best_guess;
   2467  1.1  christos 	journal_pos_t current_pos;
   2468  1.1  christos 	dns_journal_t *j1 = NULL;
   2469  1.1  christos 	dns_journal_t *j2 = NULL;
   2470  1.1  christos 	journal_rawheader_t rawheader;
   2471  1.1  christos 	unsigned int len;
   2472  1.1  christos 	size_t namelen;
   2473  1.1  christos 	unsigned char *buf = NULL;
   2474  1.1  christos 	unsigned int size = 0;
   2475  1.1  christos 	isc_result_t result;
   2476  1.1  christos 	unsigned int indexend;
   2477  1.1  christos 	char newname[PATH_MAX];
   2478  1.1  christos 	char backup[PATH_MAX];
   2479  1.1  christos 	bool is_backup = false;
   2480  1.1  christos 	bool rewrite = false;
   2481  1.1  christos 	bool downgrade = false;
   2482  1.1  christos 
   2483  1.1  christos 	REQUIRE(filename != NULL);
   2484  1.1  christos 
   2485  1.1  christos 	namelen = strlen(filename);
   2486  1.1  christos 	if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0) {
   2487  1.1  christos 		namelen -= 4;
   2488  1.1  christos 	}
   2489  1.1  christos 
   2490  1.1  christos 	result = snprintf(newname, sizeof(newname), "%.*s.jnw", (int)namelen,
   2491  1.1  christos 			  filename);
   2492  1.1  christos 	RUNTIME_CHECK(result < sizeof(newname));
   2493  1.1  christos 
   2494  1.1  christos 	result = snprintf(backup, sizeof(backup), "%.*s.jbk", (int)namelen,
   2495  1.1  christos 			  filename);
   2496  1.1  christos 	RUNTIME_CHECK(result < sizeof(backup));
   2497  1.1  christos 
   2498  1.1  christos 	result = journal_open(mctx, filename, false, false, false, &j1);
   2499  1.1  christos 	if (result == ISC_R_NOTFOUND) {
   2500  1.1  christos 		is_backup = true;
   2501  1.1  christos 		result = journal_open(mctx, backup, false, false, false, &j1);
   2502  1.1  christos 	}
   2503  1.1  christos 	if (result != ISC_R_SUCCESS) {
   2504  1.1  christos 		return (result);
   2505  1.1  christos 	}
   2506  1.1  christos 
   2507  1.1  christos 	/*
   2508  1.1  christos 	 * Always perform a re-write when processing a version 1 journal.
   2509  1.1  christos 	 */
   2510  1.1  christos 	rewrite = j1->header_ver1;
   2511  1.1  christos 
   2512  1.1  christos 	/*
   2513  1.1  christos 	 * Check whether we need to rewrite the whole journal
   2514  1.1  christos 	 * file (for example, to upversion it).
   2515  1.1  christos 	 */
   2516  1.1  christos 	if ((flags & DNS_JOURNAL_COMPACTALL) != 0) {
   2517  1.1  christos 		if ((flags & DNS_JOURNAL_VERSION1) != 0) {
   2518  1.1  christos 			downgrade = true;
   2519  1.1  christos 		}
   2520  1.1  christos 		rewrite = true;
   2521  1.1  christos 		serial = dns_journal_first_serial(j1);
   2522  1.1  christos 	} else if (JOURNAL_EMPTY(&j1->header)) {
   2523  1.1  christos 		dns_journal_destroy(&j1);
   2524  1.1  christos 		return (ISC_R_SUCCESS);
   2525  1.1  christos 	}
   2526  1.1  christos 
   2527  1.1  christos 	if (DNS_SERIAL_GT(j1->header.begin.serial, serial) ||
   2528  1.1  christos 	    DNS_SERIAL_GT(serial, j1->header.end.serial))
   2529  1.1  christos 	{
   2530  1.1  christos 		dns_journal_destroy(&j1);
   2531  1.1  christos 		return (ISC_R_RANGE);
   2532  1.1  christos 	}
   2533  1.1  christos 
   2534  1.1  christos 	/*
   2535  1.1  christos 	 * Cope with very small target sizes.
   2536  1.1  christos 	 */
   2537  1.1  christos 	indexend = sizeof(journal_rawheader_t) +
   2538  1.1  christos 		   j1->header.index_size * sizeof(journal_rawpos_t);
   2539  1.1  christos 	if (target_size < DNS_JOURNAL_SIZE_MIN) {
   2540  1.1  christos 		target_size = DNS_JOURNAL_SIZE_MIN;
   2541  1.1  christos 	}
   2542  1.1  christos 	if (target_size < indexend * 2) {
   2543  1.1  christos 		target_size = target_size / 2 + indexend;
   2544  1.1  christos 	}
   2545  1.1  christos 
   2546  1.1  christos 	/*
   2547  1.1  christos 	 * See if there is any work to do.
   2548  1.1  christos 	 */
   2549  1.1  christos 	if (!rewrite && (uint32_t)j1->header.end.offset < target_size) {
   2550  1.1  christos 		dns_journal_destroy(&j1);
   2551  1.1  christos 		return (ISC_R_SUCCESS);
   2552  1.1  christos 	}
   2553  1.1  christos 
   2554  1.1  christos 	CHECK(journal_open(mctx, newname, true, true, downgrade, &j2));
   2555  1.1  christos 	CHECK(journal_seek(j2, indexend));
   2556  1.1  christos 
   2557  1.1  christos 	/*
   2558  1.1  christos 	 * Remove overhead so space test below can succeed.
   2559  1.1  christos 	 */
   2560  1.1  christos 	if (target_size >= indexend) {
   2561  1.1  christos 		target_size -= indexend;
   2562  1.1  christos 	}
   2563  1.1  christos 
   2564  1.1  christos 	/*
   2565  1.1  christos 	 * Find if we can create enough free space.
   2566  1.1  christos 	 */
   2567  1.1  christos 	best_guess = j1->header.begin;
   2568  1.1  christos 	for (i = 0; i < j1->header.index_size; i++) {
   2569  1.1  christos 		if (POS_VALID(j1->index[i]) &&
   2570  1.1  christos 		    DNS_SERIAL_GE(serial, j1->index[i].serial) &&
   2571  1.1  christos 		    ((uint32_t)(j1->header.end.offset - j1->index[i].offset) >=
   2572  1.1  christos 		     target_size / 2) &&
   2573  1.1  christos 		    j1->index[i].offset > best_guess.offset)
   2574  1.1  christos 		{
   2575  1.1  christos 			best_guess = j1->index[i];
   2576  1.1  christos 		}
   2577  1.1  christos 	}
   2578  1.1  christos 
   2579  1.1  christos 	current_pos = best_guess;
   2580  1.1  christos 	while (current_pos.serial != serial) {
   2581  1.1  christos 		CHECK(journal_next(j1, &current_pos));
   2582  1.1  christos 		if (current_pos.serial == j1->header.end.serial) {
   2583  1.1  christos 			break;
   2584  1.1  christos 		}
   2585  1.1  christos 
   2586  1.1  christos 		if (DNS_SERIAL_GE(serial, current_pos.serial) &&
   2587  1.1  christos 		    ((uint32_t)(j1->header.end.offset - current_pos.offset) >=
   2588  1.1  christos 		     (target_size / 2)) &&
   2589  1.1  christos 		    current_pos.offset > best_guess.offset)
   2590  1.1  christos 		{
   2591  1.1  christos 			best_guess = current_pos;
   2592  1.1  christos 		} else {
   2593  1.1  christos 			break;
   2594  1.1  christos 		}
   2595  1.1  christos 	}
   2596  1.1  christos 
   2597  1.1  christos 	INSIST(best_guess.serial != j1->header.end.serial);
   2598  1.1  christos 	if (best_guess.serial != serial) {
   2599  1.1  christos 		CHECK(journal_next(j1, &best_guess));
   2600  1.1  christos 		serial = best_guess.serial;
   2601  1.1  christos 	}
   2602  1.1  christos 
   2603  1.1  christos 	/*
   2604  1.1  christos 	 * We should now be roughly half target_size provided
   2605  1.1  christos 	 * we did not reach 'serial'.  If not we will just copy
   2606  1.1  christos 	 * all uncommitted deltas regardless of the size.
   2607  1.1  christos 	 */
   2608  1.1  christos 	len = j1->header.end.offset - best_guess.offset;
   2609  1.1  christos 	if (len != 0) {
   2610  1.1  christos 		CHECK(journal_seek(j1, best_guess.offset));
   2611  1.1  christos 
   2612  1.1  christos 		/* Prepare new header */
   2613  1.1  christos 		j2->header.begin.serial = best_guess.serial;
   2614  1.1  christos 		j2->header.begin.offset = indexend;
   2615  1.1  christos 		j2->header.sourceserial = j1->header.sourceserial;
   2616  1.1  christos 		j2->header.serialset = j1->header.serialset;
   2617  1.1  christos 		j2->header.end.serial = j1->header.end.serial;
   2618  1.1  christos 
   2619  1.1  christos 		/*
   2620  1.1  christos 		 * Only use this method if we're rewriting the
   2621  1.1  christos 		 * journal to fix outdated transaction headers;
   2622  1.1  christos 		 * otherwise we'll copy the whole journal without
   2623  1.1  christos 		 * parsing individual deltas below.
   2624  1.1  christos 		 */
   2625  1.1  christos 		while (rewrite && len > 0) {
   2626  1.1  christos 			journal_xhdr_t xhdr;
   2627  1.1  christos 			isc_offset_t offset = j1->offset;
   2628  1.1  christos 			uint32_t count;
   2629  1.1  christos 
   2630  1.1  christos 			result = journal_read_xhdr(j1, &xhdr);
   2631  1.1  christos 			if (rewrite && result == ISC_R_NOMORE) {
   2632  1.1  christos 				break;
   2633  1.1  christos 			}
   2634  1.1  christos 			CHECK(result);
   2635  1.1  christos 
   2636  1.1  christos 			size = xhdr.size;
   2637  1.1  christos 			if (size > len) {
   2638  1.1  christos 				isc_log_write(JOURNAL_COMMON_LOGARGS,
   2639  1.1  christos 					      ISC_LOG_ERROR,
   2640  1.1  christos 					      "%s: journal file corrupt, "
   2641  1.1  christos 					      "transaction too large",
   2642  1.1  christos 					      j1->filename);
   2643  1.1  christos 				CHECK(ISC_R_FAILURE);
   2644  1.1  christos 			}
   2645  1.1  christos 			buf = isc_mem_get(mctx, size);
   2646  1.1  christos 			result = journal_read(j1, buf, size);
   2647  1.1  christos 
   2648  1.1  christos 			/*
   2649  1.1  christos 			 * If we're repairing an outdated journal, the
   2650  1.1  christos 			 * xhdr format may be wrong.
   2651  1.1  christos 			 */
   2652  1.1  christos 			if (rewrite && (result != ISC_R_SUCCESS ||
   2653  1.1  christos 					!check_delta(buf, size)))
   2654  1.1  christos 			{
   2655  1.1  christos 				if (j1->xhdr_version == XHDR_VERSION2) {
   2656  1.1  christos 					/* XHDR_VERSION2 -> XHDR_VERSION1 */
   2657  1.1  christos 					j1->xhdr_version = XHDR_VERSION1;
   2658  1.1  christos 					CHECK(journal_seek(j1, offset));
   2659  1.1  christos 					CHECK(journal_read_xhdr(j1, &xhdr));
   2660  1.1  christos 				} else if (j1->xhdr_version == XHDR_VERSION1) {
   2661  1.1  christos 					/* XHDR_VERSION1 -> XHDR_VERSION2 */
   2662  1.1  christos 					j1->xhdr_version = XHDR_VERSION2;
   2663  1.1  christos 					CHECK(journal_seek(j1, offset));
   2664  1.1  christos 					CHECK(journal_read_xhdr(j1, &xhdr));
   2665  1.1  christos 				}
   2666  1.1  christos 
   2667  1.1  christos 				/* Check again */
   2668  1.1  christos 				isc_mem_put(mctx, buf, size);
   2669  1.1  christos 				size = xhdr.size;
   2670  1.1  christos 				if (size > len) {
   2671  1.1  christos 					isc_log_write(
   2672  1.1  christos 						JOURNAL_COMMON_LOGARGS,
   2673  1.1  christos 						ISC_LOG_ERROR,
   2674  1.1  christos 						"%s: journal file corrupt, "
   2675  1.1  christos 						"transaction too large",
   2676  1.1  christos 						j1->filename);
   2677  1.1  christos 					CHECK(ISC_R_FAILURE);
   2678  1.1  christos 				}
   2679  1.1  christos 				buf = isc_mem_get(mctx, size);
   2680  1.1  christos 				CHECK(journal_read(j1, buf, size));
   2681  1.1  christos 
   2682  1.1  christos 				if (!check_delta(buf, size)) {
   2683  1.1  christos 					CHECK(ISC_R_UNEXPECTED);
   2684  1.1  christos 				}
   2685  1.1  christos 			} else {
   2686  1.1  christos 				CHECK(result);
   2687  1.1  christos 			}
   2688  1.1  christos 
   2689  1.1  christos 			/*
   2690  1.1  christos 			 * Recover from incorrectly written transaction header.
   2691  1.1  christos 			 * The incorrect header was written as size, serial0,
   2692  1.1  christos 			 * serial1, and 0.  XHDR_VERSION2 is expecting size,
   2693  1.1  christos 			 * count, serial0, and serial1.
   2694  1.1  christos 			 */
   2695  1.1  christos 			if (j1->xhdr_version == XHDR_VERSION2 &&
   2696  1.1  christos 			    xhdr.count == serial && xhdr.serial1 == 0U &&
   2697  1.1  christos 			    isc_serial_gt(xhdr.serial0, xhdr.count))
   2698  1.1  christos 			{
   2699  1.1  christos 				xhdr.serial1 = xhdr.serial0;
   2700  1.1  christos 				xhdr.serial0 = xhdr.count;
   2701  1.1  christos 				xhdr.count = 0;
   2702  1.1  christos 			}
   2703  1.1  christos 
   2704  1.1  christos 			/*
   2705  1.1  christos 			 * Check that xhdr is consistent.
   2706  1.1  christos 			 */
   2707  1.1  christos 			if (xhdr.serial0 != serial ||
   2708  1.1  christos 			    isc_serial_le(xhdr.serial1, xhdr.serial0))
   2709  1.1  christos 			{
   2710  1.1  christos 				CHECK(ISC_R_UNEXPECTED);
   2711  1.1  christos 			}
   2712  1.1  christos 
   2713  1.1  christos 			/*
   2714  1.1  christos 			 * Extract record count from the transaction.  This
   2715  1.1  christos 			 * is needed when converting from XHDR_VERSION1 to
   2716  1.1  christos 			 * XHDR_VERSION2, and when recovering from an
   2717  1.1  christos 			 * incorrectly written XHDR_VERSION2.
   2718  1.1  christos 			 */
   2719  1.1  christos 			count = rrcount(buf, size);
   2720  1.1  christos 			CHECK(journal_write_xhdr(j2, xhdr.size, count,
   2721  1.1  christos 						 xhdr.serial0, xhdr.serial1));
   2722  1.1  christos 			CHECK(journal_write(j2, buf, size));
   2723  1.1  christos 
   2724  1.1  christos 			j2->header.end.offset = j2->offset;
   2725  1.1  christos 
   2726  1.1  christos 			serial = xhdr.serial1;
   2727  1.1  christos 
   2728  1.1  christos 			len = j1->header.end.offset - j1->offset;
   2729  1.1  christos 			isc_mem_put(mctx, buf, size);
   2730  1.1  christos 		}
   2731  1.1  christos 
   2732  1.1  christos 		/*
   2733  1.1  christos 		 * If we're not rewriting transaction headers, we can use
   2734  1.1  christos 		 * this faster method instead.
   2735  1.1  christos 		 */
   2736  1.1  christos 		if (!rewrite) {
   2737  1.1  christos 			size = ISC_MIN(64 * 1024, len);
   2738  1.1  christos 			buf = isc_mem_get(mctx, size);
   2739  1.1  christos 			for (i = 0; i < len; i += size) {
   2740  1.1  christos 				unsigned int blob = ISC_MIN(size, len - i);
   2741  1.1  christos 				CHECK(journal_read(j1, buf, blob));
   2742  1.1  christos 				CHECK(journal_write(j2, buf, blob));
   2743  1.1  christos 			}
   2744  1.1  christos 
   2745  1.1  christos 			j2->header.end.offset = indexend + len;
   2746  1.1  christos 		}
   2747  1.1  christos 
   2748  1.1  christos 		CHECK(journal_fsync(j2));
   2749  1.1  christos 
   2750  1.1  christos 		/*
   2751  1.1  christos 		 * Update the journal header.
   2752  1.1  christos 		 */
   2753  1.1  christos 		journal_header_encode(&j2->header, &rawheader);
   2754  1.1  christos 		CHECK(journal_seek(j2, 0));
   2755  1.1  christos 		CHECK(journal_write(j2, &rawheader, sizeof(rawheader)));
   2756  1.1  christos 		CHECK(journal_fsync(j2));
   2757  1.1  christos 
   2758  1.1  christos 		/*
   2759  1.1  christos 		 * Build new index.
   2760  1.1  christos 		 */
   2761  1.1  christos 		current_pos = j2->header.begin;
   2762  1.1  christos 		while (current_pos.serial != j2->header.end.serial) {
   2763  1.1  christos 			index_add(j2, &current_pos);
   2764  1.1  christos 			CHECK(journal_next(j2, &current_pos));
   2765  1.1  christos 		}
   2766  1.1  christos 
   2767  1.1  christos 		/*
   2768  1.1  christos 		 * Write index.
   2769  1.1  christos 		 */
   2770  1.1  christos 		CHECK(index_to_disk(j2));
   2771  1.1  christos 		CHECK(journal_fsync(j2));
   2772  1.1  christos 
   2773  1.1  christos 		indexend = j2->header.end.offset;
   2774  1.1  christos 		POST(indexend);
   2775  1.1  christos 	}
   2776  1.1  christos 
   2777  1.1  christos 	/*
   2778  1.1  christos 	 * Close both journals before trying to rename files (this is
   2779  1.1  christos 	 * necessary on WIN32).
   2780  1.1  christos 	 */
   2781  1.1  christos 	dns_journal_destroy(&j1);
   2782  1.1  christos 	dns_journal_destroy(&j2);
   2783  1.1  christos 
   2784  1.1  christos 	/*
   2785  1.1  christos 	 * With a UFS file system this should just succeed and be atomic.
   2786  1.1  christos 	 * Any IXFR outs will just continue and the old journal will be
   2787  1.1  christos 	 * removed on final close.
   2788  1.1  christos 	 *
   2789  1.1  christos 	 * With MSDOS / NTFS we need to do a two stage rename, triggered
   2790  1.1  christos 	 * by EEXIST.  (If any IXFR's are running in other threads, however,
   2791  1.1  christos 	 * this will fail, and the journal will not be compacted.  But
   2792  1.1  christos 	 * if so, hopefully they'll be finished by the next time we
   2793  1.1  christos 	 * compact.)
   2794  1.1  christos 	 */
   2795  1.1  christos 	if (rename(newname, filename) == -1) {
   2796  1.1  christos 		if (errno == EEXIST && !is_backup) {
   2797  1.1  christos 			result = isc_file_remove(backup);
   2798  1.1  christos 			if (result != ISC_R_SUCCESS &&
   2799  1.1  christos 			    result != ISC_R_FILENOTFOUND)
   2800  1.1  christos 			{
   2801  1.1  christos 				goto failure;
   2802  1.1  christos 			}
   2803  1.1  christos 			if (rename(filename, backup) == -1) {
   2804  1.1  christos 				goto maperrno;
   2805  1.1  christos 			}
   2806  1.1  christos 			if (rename(newname, filename) == -1) {
   2807  1.1  christos 				goto maperrno;
   2808  1.1  christos 			}
   2809  1.1  christos 			(void)isc_file_remove(backup);
   2810  1.1  christos 		} else {
   2811  1.1  christos 		maperrno:
   2812  1.1  christos 			result = ISC_R_FAILURE;
   2813  1.1  christos 			goto failure;
   2814  1.1  christos 		}
   2815  1.1  christos 	}
   2816  1.1  christos 
   2817  1.1  christos 	result = ISC_R_SUCCESS;
   2818  1.1  christos 
   2819  1.1  christos failure:
   2820  1.1  christos 	(void)isc_file_remove(newname);
   2821  1.1  christos 	if (buf != NULL) {
   2822  1.1  christos 		isc_mem_put(mctx, buf, size);
   2823  1.1  christos 	}
   2824  1.1  christos 	if (j1 != NULL) {
   2825  1.1  christos 		dns_journal_destroy(&j1);
   2826  1.1  christos 	}
   2827  1.1  christos 	if (j2 != NULL) {
   2828  1.1  christos 		dns_journal_destroy(&j2);
   2829  1.1  christos 	}
   2830  1.1  christos 	return (result);
   2831  1.1  christos }
   2832  1.1  christos 
   2833  1.1  christos static isc_result_t
   2834  1.1  christos index_to_disk(dns_journal_t *j) {
   2835  1.1  christos 	isc_result_t result = ISC_R_SUCCESS;
   2836  1.1  christos 
   2837  1.1  christos 	if (j->header.index_size != 0) {
   2838  1.1  christos 		unsigned int i;
   2839  1.1  christos 		unsigned char *p;
   2840  1.1  christos 		unsigned int rawbytes;
   2841  1.1  christos 
   2842  1.1  christos 		rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
   2843  1.1  christos 
   2844  1.1  christos 		p = j->rawindex;
   2845  1.1  christos 		for (i = 0; i < j->header.index_size; i++) {
   2846  1.1  christos 			encode_uint32(j->index[i].serial, p);
   2847  1.1  christos 			p += 4;
   2848  1.1  christos 			encode_uint32(j->index[i].offset, p);
   2849  1.1  christos 			p += 4;
   2850  1.1  christos 		}
   2851  1.1  christos 		INSIST(p == j->rawindex + rawbytes);
   2852  1.1  christos 
   2853  1.1  christos 		CHECK(journal_seek(j, sizeof(journal_rawheader_t)));
   2854  1.1  christos 		CHECK(journal_write(j, j->rawindex, rawbytes));
   2855  1.1  christos 	}
   2856  1.1  christos failure:
   2857  1.1  christos 	return (result);
   2858  1.1  christos }
   2859