Home | History | Annotate | Line # | Download | only in vndcompress
offtab.c revision 1.12
      1  1.12  riastrad /*	$NetBSD: offtab.c,v 1.12 2014/01/25 16:26:17 riastradh Exp $	*/
      2   1.1  riastrad 
      3   1.1  riastrad /*-
      4   1.1  riastrad  * Copyright (c) 2014 The NetBSD Foundation, Inc.
      5   1.1  riastrad  * All rights reserved.
      6   1.1  riastrad  *
      7   1.1  riastrad  * This code is derived from software contributed to The NetBSD Foundation
      8   1.1  riastrad  * by Taylor R. Campbell.
      9   1.1  riastrad  *
     10   1.1  riastrad  * Redistribution and use in source and binary forms, with or without
     11   1.1  riastrad  * modification, are permitted provided that the following conditions
     12   1.1  riastrad  * are met:
     13   1.1  riastrad  * 1. Redistributions of source code must retain the above copyright
     14   1.1  riastrad  *    notice, this list of conditions and the following disclaimer.
     15   1.1  riastrad  * 2. Redistributions in binary form must reproduce the above copyright
     16   1.1  riastrad  *    notice, this list of conditions and the following disclaimer in the
     17   1.1  riastrad  *    documentation and/or other materials provided with the distribution.
     18   1.1  riastrad  *
     19   1.1  riastrad  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20   1.1  riastrad  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21   1.1  riastrad  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22   1.1  riastrad  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23   1.1  riastrad  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24   1.1  riastrad  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25   1.1  riastrad  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26   1.1  riastrad  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27   1.1  riastrad  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28   1.1  riastrad  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29   1.1  riastrad  * POSSIBILITY OF SUCH DAMAGE.
     30   1.1  riastrad  */
     31   1.1  riastrad 
     32   1.1  riastrad #include <sys/cdefs.h>
     33  1.12  riastrad __RCSID("$NetBSD: offtab.c,v 1.12 2014/01/25 16:26:17 riastradh Exp $");
     34   1.1  riastrad 
     35   1.1  riastrad #include <sys/types.h>
     36   1.1  riastrad #include <sys/endian.h>
     37   1.1  riastrad 
     38   1.1  riastrad #include <assert.h>
     39   1.1  riastrad #include <err.h>
     40   1.1  riastrad #include <errno.h>
     41   1.1  riastrad #include <inttypes.h>
     42   1.1  riastrad #include <limits.h>
     43   1.1  riastrad #include <stdbool.h>
     44   1.1  riastrad #include <stdlib.h>
     45   1.1  riastrad #include <unistd.h>
     46   1.1  riastrad 
     47   1.1  riastrad #include "common.h"
     48   1.1  riastrad #include "utils.h"
     49   1.1  riastrad 
     50   1.1  riastrad #include "offtab.h"
     51   1.1  riastrad 
     52  1.10     joerg static void __printflike(1,2) __dead
     53   1.1  riastrad offtab_bug(const char *fmt, ...)
     54   1.1  riastrad {
     55   1.1  riastrad 
     56   1.1  riastrad 	errx(1, "bug in offtab, please report");
     57   1.1  riastrad }
     58   1.1  riastrad 
     59  1.10     joerg static void __printflike(1,2) __dead
     60   1.1  riastrad offtab_bugx(const char *fmt, ...)
     61   1.1  riastrad {
     62   1.1  riastrad 
     63   1.1  riastrad 	errx(1, "bug in offtab, please report");
     64   1.1  riastrad }
     65   1.2  riastrad 
     66   1.2  riastrad static uint32_t
     67   1.8  riastrad offtab_compute_window_size(struct offtab *offtab, uint32_t start)
     68   1.2  riastrad {
     69   1.2  riastrad 
     70   1.8  riastrad 	assert(start < offtab->ot_n_offsets);
     71   1.8  riastrad 	return MIN(offtab->ot_window_size, (offtab->ot_n_offsets - start));
     72   1.2  riastrad }
     73   1.2  riastrad 
     74   1.2  riastrad static uint32_t
     75   1.2  riastrad offtab_current_window_size(struct offtab *offtab)
     76   1.2  riastrad {
     77   1.2  riastrad 
     78   1.8  riastrad 	return offtab_compute_window_size(offtab, offtab->ot_window_start);
     79   1.2  riastrad }
     80   1.2  riastrad 
     81   1.2  riastrad static uint32_t
     82   1.2  riastrad offtab_current_window_end(struct offtab *offtab)
     83   1.2  riastrad {
     84   1.2  riastrad 
     85   1.2  riastrad 	assert(offtab->ot_window_start < offtab->ot_n_offsets);
     86   1.2  riastrad 	assert(offtab_current_window_size(offtab) <=
     87   1.2  riastrad 	    (offtab->ot_n_offsets - offtab->ot_window_start));
     88   1.2  riastrad 	return (offtab->ot_window_start + offtab_current_window_size(offtab));
     89   1.2  riastrad }
     90   1.2  riastrad 
     91  1.12  riastrad static void
     92  1.12  riastrad offtab_compute_window_position(struct offtab *offtab, uint32_t window_start,
     93  1.12  riastrad     size_t *bytes, off_t *pos)
     94  1.12  riastrad {
     95  1.12  riastrad 	const uint32_t window_size = offtab_compute_window_size(offtab,
     96  1.12  riastrad 	    window_start);
     97  1.12  riastrad 
     98  1.12  riastrad 	__CTASSERT(MAX_WINDOW_SIZE <= (OFF_MAX / sizeof(uint64_t)));
     99  1.12  riastrad 	*bytes = (window_size * sizeof(uint64_t));
    100  1.12  riastrad 
    101  1.12  riastrad 	assert(window_start <= offtab->ot_n_offsets);
    102  1.12  riastrad 	__CTASSERT(MAX_N_OFFSETS <= (SIZE_MAX / sizeof(uint64_t)));
    103  1.12  riastrad 	const off_t window_offset = ((off_t)window_start *
    104  1.12  riastrad 	    (off_t)sizeof(uint64_t));
    105  1.12  riastrad 
    106  1.12  riastrad 	/* XXX This assertion is not justified.  */
    107  1.12  riastrad 	assert(offtab->ot_fdpos <= (OFF_MAX - window_offset));
    108  1.12  riastrad 	*pos = (offtab->ot_fdpos + window_offset);
    109  1.12  riastrad }
    110  1.12  riastrad 
    111   1.2  riastrad #define	OFFTAB_READ_SEEK	0x01
    112   1.2  riastrad #define	OFFTAB_READ_NOSEEK	0x00
    113   1.2  riastrad 
    114   1.2  riastrad static bool
    115   1.2  riastrad offtab_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
    116   1.2  riastrad {
    117  1.12  riastrad 	const uint32_t window_start = rounddown(blkno, offtab->ot_window_size);
    118  1.12  riastrad 	size_t window_bytes;
    119  1.12  riastrad 	off_t window_pos;
    120   1.2  riastrad 
    121   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
    122   1.2  riastrad 	assert(ISSET(read_flags, OFFTAB_READ_SEEK) ||
    123   1.2  riastrad 	    (lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
    124   1.2  riastrad 	    ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
    125  1.12  riastrad 
    126  1.12  riastrad 	offtab_compute_window_position(offtab, window_start,
    127  1.12  riastrad 	    &window_bytes, &window_pos);
    128   1.2  riastrad 	const ssize_t n_read = (ISSET(read_flags, OFFTAB_READ_SEEK)
    129  1.12  riastrad 	    ? pread_block(offtab->ot_fd, offtab->ot_window, window_bytes,
    130  1.12  riastrad 		window_pos)
    131  1.12  riastrad 	    : read_block(offtab->ot_fd, offtab->ot_window, window_bytes));
    132   1.2  riastrad 	if (n_read == -1) {
    133   1.2  riastrad 		(*offtab->ot_report)("read offset table at %"PRIuMAX,
    134  1.11  riastrad 		    (uintmax_t)window_pos);
    135   1.2  riastrad 		return false;
    136   1.2  riastrad 	}
    137   1.2  riastrad 	assert(n_read >= 0);
    138  1.12  riastrad 	if ((size_t)n_read != window_bytes) {
    139   1.2  riastrad 		(*offtab->ot_reportx)("partial read of offset table"
    140   1.2  riastrad 		    " at %"PRIuMAX": %zu != %zu",
    141  1.12  riastrad 		    (uintmax_t)window_pos, (size_t)n_read, window_bytes);
    142   1.2  riastrad 		return false;
    143   1.2  riastrad 	}
    144  1.12  riastrad 
    145   1.2  riastrad 	offtab->ot_window_start = window_start;
    146   1.2  riastrad 
    147   1.2  riastrad 	return true;
    148   1.2  riastrad }
    149   1.2  riastrad 
    150   1.2  riastrad static bool
    151   1.2  riastrad offtab_maybe_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
    152   1.2  riastrad {
    153   1.2  riastrad 
    154   1.2  riastrad 	/* Don't bother if blkno is already in the window.  */
    155   1.2  riastrad 	if ((offtab->ot_window_start <= blkno) &&
    156   1.2  riastrad 	    (blkno < offtab_current_window_end(offtab)))
    157   1.2  riastrad 		return true;
    158   1.2  riastrad 
    159   1.2  riastrad 	if (!offtab_read_window(offtab, blkno, read_flags))
    160   1.2  riastrad 		return false;
    161   1.2  riastrad 
    162   1.2  riastrad 	return true;
    163   1.2  riastrad }
    164   1.2  riastrad 
    165   1.2  riastrad static void
    166   1.5  riastrad offtab_write_window(struct offtab *offtab)
    167   1.2  riastrad {
    168  1.12  riastrad 	size_t window_bytes;
    169  1.12  riastrad 	off_t window_pos;
    170   1.2  riastrad 
    171   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
    172   1.2  riastrad 
    173  1.12  riastrad 	offtab_compute_window_position(offtab, offtab->ot_window_start,
    174  1.12  riastrad 	    &window_bytes, &window_pos);
    175   1.2  riastrad 	const ssize_t n_written = pwrite(offtab->ot_fd, offtab->ot_window,
    176  1.12  riastrad 	    window_bytes, window_pos);
    177   1.2  riastrad 	if (n_written == -1)
    178   1.2  riastrad 		err_ss(1, "write initial offset table");
    179   1.2  riastrad 	assert(n_written >= 0);
    180  1.12  riastrad 	if ((size_t)n_written != window_bytes)
    181   1.2  riastrad 		errx_ss(1, "partial write of initial offset bytes: %zu <= %zu",
    182   1.2  riastrad 		    (size_t)n_written,
    183  1.12  riastrad 		    window_bytes);
    184   1.2  riastrad }
    185   1.5  riastrad 
    186   1.5  riastrad static void
    187   1.5  riastrad offtab_maybe_write_window(struct offtab *offtab, uint32_t start, uint32_t end)
    188   1.5  riastrad {
    189   1.5  riastrad 
    190   1.5  riastrad 	/* Don't bother if [start, end) does not cover our window.  */
    191   1.5  riastrad 	if (end <= offtab->ot_window_start)
    192   1.5  riastrad 		return;
    193   1.5  riastrad 	if (offtab_current_window_end(offtab) < start)
    194   1.5  riastrad 		return;
    195   1.5  riastrad 
    196   1.5  riastrad 	offtab_write_window(offtab);
    197   1.5  riastrad }
    198   1.1  riastrad 
    199   1.1  riastrad /*
    201   1.1  riastrad  * Initialize an offtab to support the specified number of offsets read
    202   1.1  riastrad  * to or written from fd at byte position fdpos.
    203   1.1  riastrad  */
    204   1.2  riastrad void
    205   1.2  riastrad offtab_init(struct offtab *offtab, uint32_t n_offsets, uint32_t window_size,
    206   1.1  riastrad     int fd, off_t fdpos)
    207   1.1  riastrad {
    208   1.1  riastrad 
    209   1.1  riastrad 	assert(offtab != NULL);
    210   1.1  riastrad 	assert(0 < n_offsets);
    211   1.1  riastrad 	assert(0 <= fd);
    212   1.1  riastrad 	assert(0 <= fdpos);
    213   1.1  riastrad 
    214   1.2  riastrad 	offtab->ot_n_offsets = n_offsets;
    215   1.2  riastrad 	if ((window_size == 0) || (n_offsets < window_size))
    216   1.2  riastrad 		offtab->ot_window_size = n_offsets;
    217   1.2  riastrad 	else
    218   1.2  riastrad 		offtab->ot_window_size = window_size;
    219   1.2  riastrad 	assert(offtab->ot_window_size <= offtab->ot_n_offsets);
    220   1.2  riastrad 	offtab->ot_window_start = (uint32_t)-1;
    221   1.2  riastrad 	__CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
    222   1.2  riastrad 	offtab->ot_window = malloc(offtab->ot_window_size * sizeof(uint64_t));
    223   1.1  riastrad 	if (offtab->ot_window == NULL)
    224   1.1  riastrad 		err(1, "malloc offset table");
    225   1.1  riastrad 	offtab->ot_blkno = (uint32_t)-1;
    226   1.1  riastrad 	offtab->ot_fd = fd;
    227   1.1  riastrad 	offtab->ot_fdpos = fdpos;
    228   1.1  riastrad 	offtab->ot_report = &offtab_bug;
    229   1.1  riastrad 	offtab->ot_reportx = &offtab_bugx;
    230   1.1  riastrad 	offtab->ot_mode = OFFTAB_MODE_NONE;
    231   1.1  riastrad }
    232   1.1  riastrad 
    233   1.1  riastrad /*
    234   1.1  riastrad  * Destroy an offtab.
    235   1.1  riastrad  */
    236   1.1  riastrad void
    237   1.1  riastrad offtab_destroy(struct offtab *offtab)
    238   1.1  riastrad {
    239   1.2  riastrad 
    240   1.1  riastrad 	free(offtab->ot_window);
    241   1.1  riastrad }
    242   1.1  riastrad 
    243   1.1  riastrad /*
    244   1.1  riastrad  * For an offtab that has been used to read data from disk, convert it
    245   1.2  riastrad  * to an offtab that can be used to write subsequent data to disk.
    246   1.1  riastrad  * blkno is the last valid blkno read from disk.
    247   1.2  riastrad  */
    248   1.2  riastrad bool
    249   1.1  riastrad offtab_transmogrify_read_to_write(struct offtab *offtab, uint32_t blkno)
    250   1.1  riastrad {
    251   1.1  riastrad 
    252   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
    253   1.2  riastrad 	assert(0 < blkno);
    254   1.2  riastrad 
    255   1.2  riastrad 	if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
    256   1.2  riastrad 		return false;
    257   1.1  riastrad 
    258   1.2  riastrad 	offtab->ot_mode = OFFTAB_MODE_WRITE;
    259   1.2  riastrad 	offtab->ot_blkno = blkno;
    260   1.2  riastrad 
    261   1.1  riastrad 	return true;
    262   1.1  riastrad }
    263   1.1  riastrad 
    264   1.1  riastrad /*
    266   1.1  riastrad  * Reset an offtab for reading an offset table from the beginning.
    267   1.1  riastrad  * Initializes in-memory state and may read data from offtab->ot_fd,
    268   1.1  riastrad  * which must currently be at byte position offtab->ot_fdpos.  Failure
    269   1.1  riastrad  * will be reported by the report/reportx routines, which are called
    270   1.2  riastrad  * like warn/warnx.  May fail; returns true on success, false on
    271   1.2  riastrad  * failure.
    272   1.2  riastrad  *
    273   1.2  riastrad  * This almost has copypasta of offtab_prepare_get, but this uses read,
    274   1.1  riastrad  * rather than pread, so that it will work on nonseekable input if the
    275   1.1  riastrad  * window is the whole offset table.
    276   1.1  riastrad  */
    277   1.1  riastrad bool
    278   1.1  riastrad offtab_reset_read(struct offtab *offtab,
    279   1.1  riastrad     void (*report)(const char *, ...) __printflike(1,2),
    280   1.1  riastrad     void (*reportx)(const char *, ...) __printflike(1,2))
    281   1.1  riastrad {
    282   1.1  riastrad 
    283   1.1  riastrad 	assert((lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
    284   1.1  riastrad 	    ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
    285   1.1  riastrad 
    286   1.1  riastrad 	offtab->ot_report = report;
    287   1.2  riastrad 	offtab->ot_reportx = reportx;
    288   1.1  riastrad 	offtab->ot_mode = OFFTAB_MODE_READ;
    289   1.2  riastrad 	offtab->ot_blkno = (uint32_t)-1;
    290   1.1  riastrad 
    291   1.1  riastrad 	if (!offtab_read_window(offtab, 0, OFFTAB_READ_NOSEEK))
    292   1.4  riastrad 		return false;
    293   1.4  riastrad 
    294  1.11  riastrad 	if (offtab->ot_window_size < offtab->ot_n_offsets) {
    295  1.11  riastrad 		__CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
    296  1.11  riastrad 		const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
    297  1.11  riastrad 		    (off_t)sizeof(uint64_t));
    298   1.4  riastrad 		assert(offtab->ot_fdpos <= (OFF_MAX - offtab_bytes));
    299   1.4  riastrad 		const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
    300   1.4  riastrad 		if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1) {
    301   1.4  riastrad 			(*offtab->ot_report)("lseek to first offset 0x%"PRIx64,
    302   1.4  riastrad 			    first_offset);
    303   1.4  riastrad 			return false;
    304   1.4  riastrad 		}
    305   1.1  riastrad 	}
    306   1.1  riastrad 
    307   1.1  riastrad 	return true;
    308   1.1  riastrad }
    309   1.1  riastrad 
    310   1.1  riastrad /*
    311   1.1  riastrad  * Do any I/O or bookkeeping necessary to fetch the offset for blkno in
    312   1.1  riastrad  * preparation for a call to offtab_get.  May fail; returns true on
    313   1.1  riastrad  * success, false on failure.
    314   1.1  riastrad  */
    315   1.1  riastrad bool
    316   1.1  riastrad offtab_prepare_get(struct offtab *offtab, uint32_t blkno)
    317   1.1  riastrad {
    318   1.1  riastrad 
    319   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
    320   1.2  riastrad 	assert(blkno < offtab->ot_n_offsets);
    321   1.2  riastrad 
    322   1.2  riastrad 	if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
    323   1.2  riastrad 		return false;
    324   1.2  riastrad 
    325   1.2  riastrad 	assert(offtab->ot_window_start <= blkno);
    326   1.1  riastrad 	assert(blkno < offtab_current_window_end(offtab));
    327   1.1  riastrad 
    328   1.1  riastrad 	offtab->ot_blkno = blkno;
    329   1.1  riastrad 	return true;
    330   1.1  riastrad }
    331   1.1  riastrad 
    332   1.1  riastrad /*
    333   1.1  riastrad  * Return the offset for blkno.  Caller must have called
    334   1.1  riastrad  * offtab_prepare_get beforehand.
    335   1.1  riastrad  */
    336   1.1  riastrad uint64_t
    337   1.1  riastrad offtab_get(struct offtab *offtab, uint32_t blkno)
    338   1.1  riastrad {
    339   1.1  riastrad 
    340   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
    341   1.2  riastrad 	assert(blkno == offtab->ot_blkno);
    342   1.2  riastrad 	assert(offtab->ot_window_start <= blkno);
    343   1.2  riastrad 	assert(blkno < offtab_current_window_end(offtab));
    344   1.1  riastrad 
    345   1.1  riastrad 	return be64toh(offtab->ot_window[blkno - offtab->ot_window_start]);
    346   1.1  riastrad }
    347   1.1  riastrad 
    348   1.1  riastrad /*
    350   1.1  riastrad  * Reset offtab for writing a fresh offset table.  Initializes
    351   1.1  riastrad  * in-memory state and writes an empty offset table to offtab->ot_fd,
    352   1.1  riastrad  * which must currently be at byte position offtab->ot_fdpos.  May
    353   1.1  riastrad  * fail; returns on success, aborts with err(3) on failure.
    354   1.1  riastrad  */
    355   1.1  riastrad void
    356   1.1  riastrad offtab_reset_write(struct offtab *offtab)
    357   1.1  riastrad {
    358   1.1  riastrad 	uint32_t i;
    359   1.1  riastrad 
    360   1.2  riastrad 	assert(lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos);
    361   1.1  riastrad 
    362   1.1  riastrad 	offtab->ot_mode = OFFTAB_MODE_WRITE;
    363   1.1  riastrad 	offtab->ot_blkno = (uint32_t)-1;
    364   1.1  riastrad 
    365   1.1  riastrad 	/*
    366   1.1  riastrad 	 * Initialize the offset table to all ones (except for the
    367   1.1  riastrad 	 * fixed first offset) so that we can easily detect where we
    368   1.1  riastrad 	 * were interrupted if we want to restart.
    369   1.2  riastrad 	 */
    370   1.2  riastrad 	__CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
    371   1.2  riastrad 	assert(offtab->ot_n_offsets > 0);
    372   1.2  riastrad 
    373   1.2  riastrad 	for (i = 0; i < offtab->ot_window_size; i++)
    374   1.2  riastrad 		offtab->ot_window[i] = ~(uint64_t)0;
    375   1.2  riastrad 
    376   1.2  riastrad 	const uint32_t n_windows =
    377   1.2  riastrad 	    howmany(offtab->ot_n_offsets, offtab->ot_window_size);
    378   1.5  riastrad 	for (i = 1; i < n_windows; i++) {
    379   1.2  riastrad 		/* Change the start but reuse the all-ones buffer.  */
    380   1.2  riastrad 		offtab->ot_window_start = (i * offtab->ot_window_size);
    381   1.2  riastrad 		offtab_write_window(offtab);
    382  1.11  riastrad 	}
    383  1.11  riastrad 
    384  1.11  riastrad 	offtab->ot_window_start = 0;
    385  1.11  riastrad 	__CTASSERT(MAX_N_OFFSETS <=
    386   1.2  riastrad 	    (MIN(OFF_MAX, UINT64_MAX) / sizeof(uint64_t)));
    387  1.11  riastrad 	const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
    388  1.11  riastrad 	    sizeof(uint64_t));
    389  1.11  riastrad 	assert(offtab->ot_fdpos <=
    390  1.11  riastrad 	    ((off_t)MIN(OFF_MAX, UINT64_MAX) - offtab_bytes));
    391   1.5  riastrad 	const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
    392   1.1  riastrad 	assert(first_offset <= (off_t)MIN(OFF_MAX, UINT64_MAX));
    393   1.2  riastrad 	offtab->ot_window[0] = htobe64((uint64_t)first_offset);
    394   1.2  riastrad 	offtab_write_window(offtab);
    395   1.1  riastrad 
    396   1.1  riastrad 	if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1)
    397   1.1  riastrad 		err(1, "lseek to first offset failed");
    398   1.1  riastrad }
    399   1.1  riastrad 
    400   1.1  riastrad /*
    401   1.1  riastrad  * Guarantee that the disk reflects block offsets [0, n_offsets).  If
    402   1.1  riastrad  * OFFTAB_CHECKPOINT_SYNC is set in flags, will also fsync the entire
    403   1.1  riastrad  * offset table.  May fail; returns on success, aborts with err(3) on
    404   1.1  riastrad  * failure.  Fsync failure is considered success but is reported with a
    405   1.2  riastrad  * warning.
    406   1.2  riastrad  *
    407   1.2  riastrad  * This routine does not write state in memory, and does not read state
    408   1.1  riastrad  * that is not signal-safe.  The only state read is offtab->ot_window,
    409   1.1  riastrad  * offtab->ot_window_start, and quantities that are static for the
    410   1.1  riastrad  * signal-interruptable existence of the offset table.
    411   1.1  riastrad  */
    412   1.1  riastrad void
    413   1.1  riastrad offtab_checkpoint(struct offtab *offtab, uint32_t n_offsets, int flags)
    414   1.1  riastrad {
    415   1.1  riastrad 
    416   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
    417   1.2  riastrad 	assert(n_offsets <= offtab->ot_n_offsets);
    418   1.2  riastrad 
    419   1.2  riastrad 	/*
    420   1.2  riastrad 	 * Write the window unless we just did that and were
    421   1.5  riastrad 	 * interrupted before we could move the window.
    422   1.1  riastrad 	 */
    423   1.1  riastrad 	if (offtab->ot_window != NULL)
    424   1.2  riastrad 		offtab_maybe_write_window(offtab, 0, n_offsets);
    425  1.11  riastrad 
    426  1.11  riastrad 	if (ISSET(flags, OFFTAB_CHECKPOINT_SYNC)) {
    427  1.11  riastrad 		__CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
    428   1.1  riastrad 		const off_t sync_bytes = ((off_t)n_offsets *
    429  1.11  riastrad 		    (off_t)sizeof(uint64_t));
    430   1.1  riastrad 		assert(offtab->ot_fdpos <= (OFF_MAX - sync_bytes));
    431   1.1  riastrad 		if (fsync_range(offtab->ot_fd, (FFILESYNC | FDISKSYNC),
    432   1.1  riastrad 			offtab->ot_fdpos, (offtab->ot_fdpos + sync_bytes))
    433   1.1  riastrad 		    == -1)
    434   1.1  riastrad 			warn_ss("fsync of offset table failed");
    435   1.1  riastrad 	}
    436   1.1  riastrad }
    437   1.1  riastrad 
    438   1.1  riastrad /*
    439   1.1  riastrad  * Do any I/O or bookkeeping necessary to set an offset for blkno.  May
    440   1.1  riastrad  * fail; returns on success, aborts with err(3) on failure.
    441   1.1  riastrad  */
    442   1.2  riastrad void
    443   1.1  riastrad offtab_prepare_put(struct offtab *offtab, uint32_t blkno)
    444   1.1  riastrad {
    445   1.1  riastrad 	uint32_t i;
    446   1.2  riastrad 
    447   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
    448   1.2  riastrad 	assert(blkno < offtab->ot_n_offsets);
    449   1.2  riastrad 
    450   1.2  riastrad 	/*
    451   1.2  riastrad 	 * Assume, for convenience, that we write blocks in order.
    452   1.2  riastrad 	 * Thus we need not do another read -- we can just clear the
    453   1.2  riastrad 	 * window.
    454   1.2  riastrad 	 */
    455   1.2  riastrad 	assert((offtab->ot_blkno == (uint32_t)-1) ||
    456   1.2  riastrad 	    ((offtab->ot_blkno + 1) == blkno));
    457   1.2  riastrad 
    458   1.2  riastrad 	/* If it's already in our window, we're good to go.  */
    459   1.2  riastrad 	if ((offtab->ot_window_start <= blkno) &&
    460   1.2  riastrad 	    (blkno < offtab_current_window_end(offtab)))
    461   1.5  riastrad 		goto win;
    462   1.2  riastrad 
    463   1.2  riastrad 	/* Otherwise, write out the current window and choose a new one.  */
    464   1.2  riastrad 	offtab_write_window(offtab);
    465   1.2  riastrad 
    466   1.2  riastrad 	assert(offtab->ot_window_size <= blkno);
    467   1.2  riastrad 	assert(offtab->ot_window_start == (blkno - offtab->ot_window_size));
    468   1.2  riastrad 	assert((offtab->ot_window_start + offtab->ot_window_size) ==
    469   1.2  riastrad 	    rounddown(blkno, offtab->ot_window_size));
    470   1.2  riastrad 
    471   1.2  riastrad     {
    472   1.2  riastrad 	uint64_t *window;
    473   1.2  riastrad 	sigset_t sigmask;
    474   1.7  riastrad 
    475   1.2  riastrad 	/*
    476   1.2  riastrad 	 * Mark the window as being updated so nobody tries to write it
    477   1.2  riastrad 	 * (since we just wrote it) while we fill it with ones.
    478   1.2  riastrad 	 */
    479   1.2  riastrad 	block_signals(&sigmask);
    480   1.2  riastrad 	window = offtab->ot_window;
    481   1.2  riastrad 	offtab->ot_window = NULL;
    482   1.2  riastrad 	restore_sigmask(&sigmask);
    483   1.2  riastrad 
    484   1.2  riastrad 	/* Fill the window with ones.  */
    485   1.2  riastrad 	for (i = 0; i < offtab_current_window_size(offtab); i++)
    486   1.2  riastrad 		window[i] = ~(uint64_t)0;
    487   1.2  riastrad 
    488   1.2  riastrad 	/* Restore the window as ready again.  */
    489   1.2  riastrad 	block_signals(&sigmask);
    490   1.2  riastrad 	offtab->ot_window = window;
    491   1.2  riastrad 	offtab->ot_window_start = rounddown(blkno, offtab->ot_window_size);
    492   1.2  riastrad 	restore_sigmask(&sigmask);
    493   1.2  riastrad     }
    494   1.2  riastrad 
    495   1.1  riastrad win:	assert(offtab->ot_window_start <= blkno);
    496   1.1  riastrad 	assert(blkno < offtab_current_window_end(offtab));
    497   1.1  riastrad 
    498   1.1  riastrad 	offtab->ot_blkno = blkno;
    499   1.1  riastrad }
    500   1.1  riastrad 
    501   1.1  riastrad /*
    502   1.1  riastrad  * Actually set the offset for blkno.
    503   1.1  riastrad  */
    504   1.1  riastrad void
    505   1.1  riastrad offtab_put(struct offtab *offtab, uint32_t blkno, uint64_t offset)
    506   1.1  riastrad {
    507   1.2  riastrad 
    508   1.2  riastrad 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
    509   1.2  riastrad 	assert(blkno == offtab->ot_blkno);
    510   1.2  riastrad 	assert(offtab->ot_window_start <= blkno);
    511   1.1  riastrad 	assert(blkno < offtab_current_window_end(offtab));
    512                 
    513                 	offtab->ot_window[blkno - offtab->ot_window_start] = htobe64(offset);
    514                 }
    515