Home | History | Annotate | Line # | Download | only in vndcompress
offtab.c revision 1.10
      1  1.10     joerg /*	$NetBSD: offtab.c,v 1.10 2014/01/23 14:17:05 joerg Exp $	*/
      2   1.1  riastrad 
      3   1.1  riastrad /*-
      4   1.1  riastrad  * Copyright (c) 2014 The NetBSD Foundation, Inc.
      5   1.1  riastrad  * All rights reserved.
      6   1.1  riastrad  *
      7   1.1  riastrad  * This code is derived from software contributed to The NetBSD Foundation
      8   1.1  riastrad  * by Taylor R. Campbell.
      9   1.1  riastrad  *
     10   1.1  riastrad  * Redistribution and use in source and binary forms, with or without
     11   1.1  riastrad  * modification, are permitted provided that the following conditions
     12   1.1  riastrad  * are met:
     13   1.1  riastrad  * 1. Redistributions of source code must retain the above copyright
     14   1.1  riastrad  *    notice, this list of conditions and the following disclaimer.
     15   1.1  riastrad  * 2. Redistributions in binary form must reproduce the above copyright
     16   1.1  riastrad  *    notice, this list of conditions and the following disclaimer in the
     17   1.1  riastrad  *    documentation and/or other materials provided with the distribution.
     18   1.1  riastrad  *
     19   1.1  riastrad  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20   1.1  riastrad  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21   1.1  riastrad  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22   1.1  riastrad  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23   1.1  riastrad  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24   1.1  riastrad  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25   1.1  riastrad  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26   1.1  riastrad  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27   1.1  riastrad  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28   1.1  riastrad  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29   1.1  riastrad  * POSSIBILITY OF SUCH DAMAGE.
     30   1.1  riastrad  */
     31   1.1  riastrad 
     32   1.1  riastrad #include <sys/cdefs.h>
     33  1.10     joerg __RCSID("$NetBSD: offtab.c,v 1.10 2014/01/23 14:17:05 joerg Exp $");
     34   1.1  riastrad 
     35   1.1  riastrad #include <sys/types.h>
     36   1.1  riastrad #include <sys/endian.h>
     37   1.1  riastrad 
     38   1.1  riastrad #include <assert.h>
     39   1.1  riastrad #include <err.h>
     40   1.1  riastrad #include <errno.h>
     41   1.1  riastrad #include <inttypes.h>
     42   1.1  riastrad #include <limits.h>
     43   1.1  riastrad #include <stdbool.h>
     44   1.1  riastrad #include <stdlib.h>
     45   1.1  riastrad #include <unistd.h>
     46   1.1  riastrad 
     47   1.1  riastrad #include "common.h"
     48   1.1  riastrad #include "utils.h"
     49   1.1  riastrad 
     50   1.1  riastrad #include "offtab.h"
     51   1.1  riastrad 
     52  1.10     joerg static void __printflike(1,2) __dead
     53   1.1  riastrad offtab_bug(const char *fmt, ...)
     54   1.1  riastrad {
     55   1.1  riastrad 
     56   1.1  riastrad 	errx(1, "bug in offtab, please report");
     57   1.1  riastrad }
     58   1.1  riastrad 
     59  1.10     joerg static void __printflike(1,2) __dead
     60   1.1  riastrad offtab_bugx(const char *fmt, ...)
     61   1.1  riastrad {
     62   1.1  riastrad 
     63   1.1  riastrad 	errx(1, "bug in offtab, please report");
     64   1.1  riastrad }
     65   1.2  riastrad 
     66   1.2  riastrad static uint32_t
     67   1.8  riastrad offtab_compute_window_size(struct offtab *offtab, uint32_t start)
     68   1.2  riastrad {
     69   1.2  riastrad 
     70   1.8  riastrad 	assert(start < offtab->ot_n_offsets);
     71   1.8  riastrad 	return MIN(offtab->ot_window_size, (offtab->ot_n_offsets - start));
     72   1.2  riastrad }
     73   1.2  riastrad 
     74   1.2  riastrad static uint32_t
     75   1.2  riastrad offtab_current_window_size(struct offtab *offtab)
     76   1.2  riastrad {
     77   1.2  riastrad 
     78   1.8  riastrad 	return offtab_compute_window_size(offtab, offtab->ot_window_start);
     79   1.2  riastrad }
     80   1.2  riastrad 
     81   1.2  riastrad static uint32_t
     82   1.2  riastrad offtab_current_window_end(struct offtab *offtab)
     83   1.2  riastrad {
     84   1.2  riastrad 
     85   1.2  riastrad 	assert(offtab->ot_window_start < offtab->ot_n_offsets);
     86   1.2  riastrad 	assert(offtab_current_window_size(offtab) <=
     87   1.2  riastrad 	    (offtab->ot_n_offsets - offtab->ot_window_start));
     88   1.2  riastrad 	return (offtab->ot_window_start + offtab_current_window_size(offtab));
     89   1.2  riastrad }
     90   1.2  riastrad 
     91   1.2  riastrad #define	OFFTAB_READ_SEEK	0x01
     92   1.2  riastrad #define	OFFTAB_READ_NOSEEK	0x00
     93   1.2  riastrad 
     94   1.2  riastrad static bool
     95   1.2  riastrad offtab_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
     96   1.2  riastrad {
     97   1.2  riastrad 
     98   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
     99   1.2  riastrad 
    100   1.2  riastrad 	const uint32_t window_start = rounddown(blkno, offtab->ot_window_size);
    101   1.2  riastrad 	const uint32_t window_size = offtab_compute_window_size(offtab,
    102   1.8  riastrad 	    window_start);
    103   1.2  riastrad 
    104   1.2  riastrad 	__CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
    105   1.2  riastrad 	__CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
    106   1.2  riastrad 	assert(window_start < offtab->ot_n_offsets);
    107   1.2  riastrad 	assert(offtab->ot_fdpos <=
    108   1.3  riastrad 	    (OFF_MAX - (off_t)(window_start * sizeof(uint64_t))));
    109   1.2  riastrad 	assert(ISSET(read_flags, OFFTAB_READ_SEEK) ||
    110   1.2  riastrad 	    (lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
    111   1.2  riastrad 	    ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
    112   1.2  riastrad 	const size_t n_req = (window_size * sizeof(uint64_t));
    113   1.2  riastrad 	const ssize_t n_read = (ISSET(read_flags, OFFTAB_READ_SEEK)
    114   1.2  riastrad 	    ? pread_block(offtab->ot_fd, offtab->ot_window, n_req,
    115   1.2  riastrad 		(offtab->ot_fdpos + (window_start * sizeof(uint64_t))))
    116   1.2  riastrad 	    : read_block(offtab->ot_fd, offtab->ot_window, n_req));
    117   1.2  riastrad 	if (n_read == -1) {
    118   1.2  riastrad 		(*offtab->ot_report)("read offset table at %"PRIuMAX,
    119   1.2  riastrad 		    (uintmax_t)(offtab->ot_fdpos +
    120   1.2  riastrad 			(window_start * sizeof(uint64_t))));
    121   1.2  riastrad 		return false;
    122   1.2  riastrad 	}
    123   1.2  riastrad 	assert(n_read >= 0);
    124   1.2  riastrad 	if ((size_t)n_read != (window_size * sizeof(uint64_t))) {
    125   1.2  riastrad 		(*offtab->ot_reportx)("partial read of offset table"
    126   1.2  riastrad 		    " at %"PRIuMAX": %zu != %zu",
    127   1.2  riastrad 		    (uintmax_t)(offtab->ot_fdpos +
    128   1.2  riastrad 			(window_start * sizeof(uint64_t))),
    129   1.2  riastrad 		    (size_t)n_read,
    130   1.2  riastrad 		    (size_t)(window_size * sizeof(uint64_t)));
    131   1.2  riastrad 		return false;
    132   1.2  riastrad 	}
    133   1.2  riastrad 	offtab->ot_window_start = window_start;
    134   1.2  riastrad 
    135   1.2  riastrad 	return true;
    136   1.2  riastrad }
    137   1.2  riastrad 
    138   1.2  riastrad static bool
    139   1.2  riastrad offtab_maybe_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
    140   1.2  riastrad {
    141   1.2  riastrad 
    142   1.2  riastrad 	/* Don't bother if blkno is already in the window.  */
    143   1.2  riastrad 	if ((offtab->ot_window_start <= blkno) &&
    144   1.2  riastrad 	    (blkno < offtab_current_window_end(offtab)))
    145   1.2  riastrad 		return true;
    146   1.2  riastrad 
    147   1.2  riastrad 	if (!offtab_read_window(offtab, blkno, read_flags))
    148   1.2  riastrad 		return false;
    149   1.2  riastrad 
    150   1.2  riastrad 	return true;
    151   1.2  riastrad }
    152   1.2  riastrad 
    153   1.2  riastrad static void
    154   1.5  riastrad offtab_write_window(struct offtab *offtab)
    155   1.2  riastrad {
    156   1.2  riastrad 
    157   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
    158   1.2  riastrad 
    159   1.2  riastrad 	const uint32_t window_size = offtab_current_window_size(offtab);
    160   1.2  riastrad 	__CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
    161   1.2  riastrad 	__CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
    162   1.2  riastrad 	assert(offtab->ot_window_start < offtab->ot_n_offsets);
    163   1.2  riastrad 	assert(offtab->ot_fdpos <=
    164   1.3  riastrad 	    (OFF_MAX - (off_t)(offtab->ot_window_start * sizeof(uint64_t))));
    165   1.2  riastrad 	const ssize_t n_written = pwrite(offtab->ot_fd, offtab->ot_window,
    166   1.2  riastrad 	    (window_size * sizeof(uint64_t)),
    167   1.2  riastrad 	    (offtab->ot_fdpos +
    168   1.2  riastrad 		(offtab->ot_window_start * sizeof(uint64_t))));
    169   1.2  riastrad 	if (n_written == -1)
    170   1.2  riastrad 		err_ss(1, "write initial offset table");
    171   1.2  riastrad 	assert(n_written >= 0);
    172   1.2  riastrad 	if ((size_t)n_written != (window_size * sizeof(uint64_t)))
    173   1.2  riastrad 		errx_ss(1, "partial write of initial offset bytes: %zu <= %zu",
    174   1.2  riastrad 		    (size_t)n_written,
    175   1.2  riastrad 		    (size_t)(window_size * sizeof(uint64_t)));
    176   1.2  riastrad }
    177   1.5  riastrad 
    178   1.5  riastrad static void
    179   1.5  riastrad offtab_maybe_write_window(struct offtab *offtab, uint32_t start, uint32_t end)
    180   1.5  riastrad {
    181   1.5  riastrad 
    182   1.5  riastrad 	/* Don't bother if [start, end) does not cover our window.  */
    183   1.5  riastrad 	if (end <= offtab->ot_window_start)
    184   1.5  riastrad 		return;
    185   1.5  riastrad 	if (offtab_current_window_end(offtab) < start)
    186   1.5  riastrad 		return;
    187   1.5  riastrad 
    188   1.5  riastrad 	offtab_write_window(offtab);
    189   1.5  riastrad }
    190   1.1  riastrad 
    191   1.1  riastrad /*
    193   1.1  riastrad  * Initialize an offtab to support the specified number of offsets read
    194   1.1  riastrad  * to or written from fd at byte position fdpos.
    195   1.1  riastrad  */
    196   1.2  riastrad void
    197   1.2  riastrad offtab_init(struct offtab *offtab, uint32_t n_offsets, uint32_t window_size,
    198   1.1  riastrad     int fd, off_t fdpos)
    199   1.1  riastrad {
    200   1.1  riastrad 
    201   1.1  riastrad 	assert(offtab != NULL);
    202   1.1  riastrad 	assert(0 < n_offsets);
    203   1.1  riastrad 	assert(0 <= fd);
    204   1.1  riastrad 	assert(0 <= fdpos);
    205   1.1  riastrad 
    206   1.2  riastrad 	offtab->ot_n_offsets = n_offsets;
    207   1.2  riastrad 	if ((window_size == 0) || (n_offsets < window_size))
    208   1.2  riastrad 		offtab->ot_window_size = n_offsets;
    209   1.2  riastrad 	else
    210   1.2  riastrad 		offtab->ot_window_size = window_size;
    211   1.2  riastrad 	assert(offtab->ot_window_size <= offtab->ot_n_offsets);
    212   1.2  riastrad 	offtab->ot_window_start = (uint32_t)-1;
    213   1.2  riastrad 	__CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
    214   1.2  riastrad 	offtab->ot_window = malloc(offtab->ot_window_size * sizeof(uint64_t));
    215   1.1  riastrad 	if (offtab->ot_window == NULL)
    216   1.1  riastrad 		err(1, "malloc offset table");
    217   1.1  riastrad 	offtab->ot_blkno = (uint32_t)-1;
    218   1.1  riastrad 	offtab->ot_fd = fd;
    219   1.1  riastrad 	offtab->ot_fdpos = fdpos;
    220   1.1  riastrad 	offtab->ot_report = &offtab_bug;
    221   1.1  riastrad 	offtab->ot_reportx = &offtab_bugx;
    222   1.1  riastrad 	offtab->ot_mode = OFFTAB_MODE_NONE;
    223   1.1  riastrad }
    224   1.1  riastrad 
    225   1.1  riastrad /*
    226   1.1  riastrad  * Destroy an offtab.
    227   1.1  riastrad  */
    228   1.1  riastrad void
    229   1.1  riastrad offtab_destroy(struct offtab *offtab)
    230   1.1  riastrad {
    231   1.2  riastrad 
    232   1.1  riastrad 	free(offtab->ot_window);
    233   1.1  riastrad }
    234   1.1  riastrad 
    235   1.1  riastrad /*
    236   1.1  riastrad  * For an offtab that has been used to read data from disk, convert it
    237   1.2  riastrad  * to an offtab that can be used to write subsequent data to disk.
    238   1.1  riastrad  * blkno is the last valid blkno read from disk.
    239   1.2  riastrad  */
    240   1.2  riastrad bool
    241   1.1  riastrad offtab_transmogrify_read_to_write(struct offtab *offtab, uint32_t blkno)
    242   1.1  riastrad {
    243   1.1  riastrad 
    244   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
    245   1.2  riastrad 	assert(0 < blkno);
    246   1.2  riastrad 
    247   1.2  riastrad 	if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
    248   1.2  riastrad 		return false;
    249   1.1  riastrad 
    250   1.2  riastrad 	offtab->ot_mode = OFFTAB_MODE_WRITE;
    251   1.2  riastrad 	offtab->ot_blkno = blkno;
    252   1.2  riastrad 
    253   1.1  riastrad 	return true;
    254   1.1  riastrad }
    255   1.1  riastrad 
    256   1.1  riastrad /*
    258   1.1  riastrad  * Reset an offtab for reading an offset table from the beginning.
    259   1.1  riastrad  * Initializes in-memory state and may read data from offtab->ot_fd,
    260   1.1  riastrad  * which must currently be at byte position offtab->ot_fdpos.  Failure
    261   1.1  riastrad  * will be reported by the report/reportx routines, which are called
    262   1.2  riastrad  * like warn/warnx.  May fail; returns true on success, false on
    263   1.2  riastrad  * failure.
    264   1.2  riastrad  *
    265   1.2  riastrad  * This almost has copypasta of offtab_prepare_get, but this uses read,
    266   1.1  riastrad  * rather than pread, so that it will work on nonseekable input if the
    267   1.1  riastrad  * window is the whole offset table.
    268   1.1  riastrad  */
    269   1.1  riastrad bool
    270   1.1  riastrad offtab_reset_read(struct offtab *offtab,
    271   1.1  riastrad     void (*report)(const char *, ...) __printflike(1,2),
    272   1.1  riastrad     void (*reportx)(const char *, ...) __printflike(1,2))
    273   1.1  riastrad {
    274   1.1  riastrad 
    275   1.1  riastrad 	assert((lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
    276   1.1  riastrad 	    ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
    277   1.1  riastrad 
    278   1.1  riastrad 	offtab->ot_report = report;
    279   1.2  riastrad 	offtab->ot_reportx = reportx;
    280   1.1  riastrad 	offtab->ot_mode = OFFTAB_MODE_READ;
    281   1.2  riastrad 	offtab->ot_blkno = (uint32_t)-1;
    282   1.1  riastrad 
    283   1.1  riastrad 	if (!offtab_read_window(offtab, 0, OFFTAB_READ_NOSEEK))
    284   1.4  riastrad 		return false;
    285   1.4  riastrad 
    286   1.4  riastrad 	if (offtab->ot_window_size < offtab->ot_n_offsets) {
    287   1.4  riastrad 		__CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
    288   1.4  riastrad 		assert(offtab->ot_fdpos <= (OFF_MAX -
    289   1.4  riastrad 			(off_t)(offtab->ot_n_offsets * sizeof(uint64_t))));
    290   1.4  riastrad 		const off_t first_offset = (offtab->ot_fdpos +
    291   1.4  riastrad 		    (offtab->ot_n_offsets * sizeof(uint64_t)));
    292   1.4  riastrad 		if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1) {
    293   1.4  riastrad 			(*offtab->ot_report)("lseek to first offset 0x%"PRIx64,
    294   1.4  riastrad 			    first_offset);
    295   1.4  riastrad 			return false;
    296   1.4  riastrad 		}
    297   1.1  riastrad 	}
    298   1.1  riastrad 
    299   1.1  riastrad 	return true;
    300   1.1  riastrad }
    301   1.1  riastrad 
    302   1.1  riastrad /*
    303   1.1  riastrad  * Do any I/O or bookkeeping necessary to fetch the offset for blkno in
    304   1.1  riastrad  * preparation for a call to offtab_get.  May fail; returns true on
    305   1.1  riastrad  * success, false on failure.
    306   1.1  riastrad  */
    307   1.1  riastrad bool
    308   1.1  riastrad offtab_prepare_get(struct offtab *offtab, uint32_t blkno)
    309   1.1  riastrad {
    310   1.1  riastrad 
    311   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
    312   1.2  riastrad 	assert(blkno < offtab->ot_n_offsets);
    313   1.2  riastrad 
    314   1.2  riastrad 	if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
    315   1.2  riastrad 		return false;
    316   1.2  riastrad 
    317   1.2  riastrad 	assert(offtab->ot_window_start <= blkno);
    318   1.1  riastrad 	assert(blkno < offtab_current_window_end(offtab));
    319   1.1  riastrad 
    320   1.1  riastrad 	offtab->ot_blkno = blkno;
    321   1.1  riastrad 	return true;
    322   1.1  riastrad }
    323   1.1  riastrad 
    324   1.1  riastrad /*
    325   1.1  riastrad  * Return the offset for blkno.  Caller must have called
    326   1.1  riastrad  * offtab_prepare_get beforehand.
    327   1.1  riastrad  */
    328   1.1  riastrad uint64_t
    329   1.1  riastrad offtab_get(struct offtab *offtab, uint32_t blkno)
    330   1.1  riastrad {
    331   1.1  riastrad 
    332   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
    333   1.2  riastrad 	assert(blkno == offtab->ot_blkno);
    334   1.2  riastrad 	assert(offtab->ot_window_start <= blkno);
    335   1.2  riastrad 	assert(blkno < offtab_current_window_end(offtab));
    336   1.1  riastrad 
    337   1.1  riastrad 	return be64toh(offtab->ot_window[blkno - offtab->ot_window_start]);
    338   1.1  riastrad }
    339   1.1  riastrad 
    340   1.1  riastrad /*
    342   1.1  riastrad  * Reset offtab for writing a fresh offset table.  Initializes
    343   1.1  riastrad  * in-memory state and writes an empty offset table to offtab->ot_fd,
    344   1.1  riastrad  * which must currently be at byte position offtab->ot_fdpos.  May
    345   1.1  riastrad  * fail; returns on success, aborts with err(3) on failure.
    346   1.1  riastrad  */
    347   1.1  riastrad void
    348   1.1  riastrad offtab_reset_write(struct offtab *offtab)
    349   1.1  riastrad {
    350   1.1  riastrad 	uint32_t i;
    351   1.1  riastrad 
    352   1.2  riastrad 	assert(lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos);
    353   1.1  riastrad 
    354   1.1  riastrad 	offtab->ot_mode = OFFTAB_MODE_WRITE;
    355   1.1  riastrad 	offtab->ot_blkno = (uint32_t)-1;
    356   1.1  riastrad 
    357   1.1  riastrad 	/*
    358   1.1  riastrad 	 * Initialize the offset table to all ones (except for the
    359   1.1  riastrad 	 * fixed first offset) so that we can easily detect where we
    360   1.1  riastrad 	 * were interrupted if we want to restart.
    361   1.2  riastrad 	 */
    362   1.2  riastrad 	__CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
    363   1.2  riastrad 	assert(offtab->ot_n_offsets > 0);
    364   1.2  riastrad 
    365   1.2  riastrad 	for (i = 0; i < offtab->ot_window_size; i++)
    366   1.2  riastrad 		offtab->ot_window[i] = ~(uint64_t)0;
    367   1.2  riastrad 
    368   1.2  riastrad 	const uint32_t n_windows =
    369   1.2  riastrad 	    howmany(offtab->ot_n_offsets, offtab->ot_window_size);
    370   1.5  riastrad 	for (i = 1; i < n_windows; i++) {
    371   1.2  riastrad 		/* Change the start but reuse the all-ones buffer.  */
    372   1.2  riastrad 		offtab->ot_window_start = (i * offtab->ot_window_size);
    373   1.2  riastrad 		offtab_write_window(offtab);
    374   1.2  riastrad 	}
    375   1.2  riastrad 
    376   1.3  riastrad 	offtab->ot_window_start = 0;
    377   1.3  riastrad 	__CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
    378   1.1  riastrad 	assert(offtab->ot_fdpos <=
    379   1.3  riastrad 	    (OFF_MAX - (off_t)(offtab->ot_n_offsets * sizeof(uint64_t))));
    380   1.2  riastrad 	const uint64_t first_offset = (offtab->ot_fdpos +
    381   1.5  riastrad 	    (offtab->ot_n_offsets * sizeof(uint64_t)));
    382   1.1  riastrad 	assert(first_offset <= OFF_MAX);
    383   1.2  riastrad 	offtab->ot_window[0] = htobe64(first_offset);
    384   1.2  riastrad 	offtab_write_window(offtab);
    385   1.1  riastrad 
    386   1.1  riastrad 	if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1)
    387   1.1  riastrad 		err(1, "lseek to first offset failed");
    388   1.1  riastrad }
    389   1.1  riastrad 
    390   1.1  riastrad /*
    391   1.1  riastrad  * Guarantee that the disk reflects block offsets [0, n_offsets).  If
    392   1.1  riastrad  * OFFTAB_CHECKPOINT_SYNC is set in flags, will also fsync the entire
    393   1.1  riastrad  * offset table.  May fail; returns on success, aborts with err(3) on
    394   1.1  riastrad  * failure.  Fsync failure is considered success but is reported with a
    395   1.2  riastrad  * warning.
    396   1.2  riastrad  *
    397   1.2  riastrad  * This routine does not write state in memory, and does not read state
    398   1.1  riastrad  * that is not signal-safe.  The only state read is offtab->ot_window,
    399   1.1  riastrad  * offtab->ot_window_start, and quantities that are static for the
    400   1.1  riastrad  * signal-interruptable existence of the offset table.
    401   1.1  riastrad  */
    402   1.1  riastrad void
    403   1.1  riastrad offtab_checkpoint(struct offtab *offtab, uint32_t n_offsets, int flags)
    404   1.1  riastrad {
    405   1.1  riastrad 
    406   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
    407   1.2  riastrad 	assert(n_offsets <= offtab->ot_n_offsets);
    408   1.2  riastrad 
    409   1.2  riastrad 	/*
    410   1.2  riastrad 	 * Write the window unless we just did that and were
    411   1.5  riastrad 	 * interrupted before we could move the window.
    412   1.1  riastrad 	 */
    413   1.1  riastrad 	if (offtab->ot_window != NULL)
    414   1.2  riastrad 		offtab_maybe_write_window(offtab, 0, n_offsets);
    415   1.2  riastrad 
    416   1.3  riastrad 	if (ISSET(flags, OFFTAB_CHECKPOINT_SYNC)) {
    417   1.1  riastrad 		__CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
    418   1.1  riastrad 		assert(offtab->ot_fdpos
    419   1.2  riastrad 		    <= (OFF_MAX - (off_t)(n_offsets * sizeof(uint64_t))));
    420   1.1  riastrad 		if (fsync_range(offtab->ot_fd, (FFILESYNC | FDISKSYNC),
    421   1.1  riastrad 			offtab->ot_fdpos,
    422   1.1  riastrad 			(offtab->ot_fdpos + (n_offsets * sizeof(uint64_t))))
    423   1.1  riastrad 		    == -1)
    424   1.1  riastrad 			warn_ss("fsync of offset table failed");
    425   1.1  riastrad 	}
    426   1.1  riastrad }
    427   1.1  riastrad 
    428   1.1  riastrad /*
    429   1.1  riastrad  * Do any I/O or bookkeeping necessary to set an offset for blkno.  May
    430   1.1  riastrad  * fail; returns on success, aborts with err(3) on failure.
    431   1.1  riastrad  */
    432   1.2  riastrad void
    433   1.1  riastrad offtab_prepare_put(struct offtab *offtab, uint32_t blkno)
    434   1.1  riastrad {
    435   1.1  riastrad 	uint32_t i;
    436   1.2  riastrad 
    437   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
    438   1.2  riastrad 	assert(blkno < offtab->ot_n_offsets);
    439   1.2  riastrad 
    440   1.2  riastrad 	/*
    441   1.2  riastrad 	 * Assume, for convenience, that we write blocks in order.
    442   1.2  riastrad 	 * Thus we need not do another read -- we can just clear the
    443   1.2  riastrad 	 * window.
    444   1.2  riastrad 	 */
    445   1.2  riastrad 	assert((offtab->ot_blkno == (uint32_t)-1) ||
    446   1.2  riastrad 	    ((offtab->ot_blkno + 1) == blkno));
    447   1.2  riastrad 
    448   1.2  riastrad 	/* If it's already in our window, we're good to go.  */
    449   1.2  riastrad 	if ((offtab->ot_window_start <= blkno) &&
    450   1.2  riastrad 	    (blkno < offtab_current_window_end(offtab)))
    451   1.5  riastrad 		goto win;
    452   1.2  riastrad 
    453   1.2  riastrad 	/* Otherwise, write out the current window and choose a new one.  */
    454   1.2  riastrad 	offtab_write_window(offtab);
    455   1.2  riastrad 
    456   1.2  riastrad 	assert(offtab->ot_window_size <= blkno);
    457   1.2  riastrad 	assert(offtab->ot_window_start == (blkno - offtab->ot_window_size));
    458   1.2  riastrad 	assert((offtab->ot_window_start + offtab->ot_window_size) ==
    459   1.2  riastrad 	    rounddown(blkno, offtab->ot_window_size));
    460   1.2  riastrad 
    461   1.2  riastrad     {
    462   1.2  riastrad 	uint64_t *window;
    463   1.2  riastrad 	sigset_t sigmask;
    464   1.7  riastrad 
    465   1.2  riastrad 	/*
    466   1.2  riastrad 	 * Mark the window as being updated so nobody tries to write it
    467   1.2  riastrad 	 * (since we just wrote it) while we fill it with ones.
    468   1.2  riastrad 	 */
    469   1.2  riastrad 	block_signals(&sigmask);
    470   1.2  riastrad 	window = offtab->ot_window;
    471   1.2  riastrad 	offtab->ot_window = NULL;
    472   1.2  riastrad 	restore_sigmask(&sigmask);
    473   1.2  riastrad 
    474   1.2  riastrad 	/* Fill the window with ones.  */
    475   1.2  riastrad 	for (i = 0; i < offtab_current_window_size(offtab); i++)
    476   1.2  riastrad 		window[i] = ~(uint64_t)0;
    477   1.2  riastrad 
    478   1.2  riastrad 	/* Restore the window as ready again.  */
    479   1.2  riastrad 	block_signals(&sigmask);
    480   1.2  riastrad 	offtab->ot_window = window;
    481   1.2  riastrad 	offtab->ot_window_start = rounddown(blkno, offtab->ot_window_size);
    482   1.2  riastrad 	restore_sigmask(&sigmask);
    483   1.2  riastrad     }
    484   1.2  riastrad 
    485   1.1  riastrad win:	assert(offtab->ot_window_start <= blkno);
    486   1.1  riastrad 	assert(blkno < offtab_current_window_end(offtab));
    487   1.1  riastrad 
    488   1.1  riastrad 	offtab->ot_blkno = blkno;
    489   1.1  riastrad }
    490   1.1  riastrad 
    491   1.1  riastrad /*
    492   1.1  riastrad  * Actually set the offset for blkno.
    493   1.1  riastrad  */
    494   1.1  riastrad void
    495   1.1  riastrad offtab_put(struct offtab *offtab, uint32_t blkno, uint64_t offset)
    496   1.1  riastrad {
    497   1.2  riastrad 
    498   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
    499   1.2  riastrad 	assert(blkno == offtab->ot_blkno);
    500   1.2  riastrad 	assert(offtab->ot_window_start <= blkno);
    501   1.1  riastrad 	assert(blkno < offtab_current_window_end(offtab));
    502                 
    503                 	offtab->ot_window[blkno - offtab->ot_window_start] = htobe64(offset);
    504                 }
    505