1 1.15 riastrad /* $NetBSD: offtab.c,v 1.15 2017/07/29 21:04:07 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /*- 4 1.1 riastrad * Copyright (c) 2014 The NetBSD Foundation, Inc. 5 1.1 riastrad * All rights reserved. 6 1.1 riastrad * 7 1.1 riastrad * This code is derived from software contributed to The NetBSD Foundation 8 1.1 riastrad * by Taylor R. Campbell. 9 1.1 riastrad * 10 1.1 riastrad * Redistribution and use in source and binary forms, with or without 11 1.1 riastrad * modification, are permitted provided that the following conditions 12 1.1 riastrad * are met: 13 1.1 riastrad * 1. Redistributions of source code must retain the above copyright 14 1.1 riastrad * notice, this list of conditions and the following disclaimer. 15 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 riastrad * notice, this list of conditions and the following disclaimer in the 17 1.1 riastrad * documentation and/or other materials provided with the distribution. 18 1.1 riastrad * 19 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE. 30 1.1 riastrad */ 31 1.1 riastrad 32 1.1 riastrad #include <sys/cdefs.h> 33 1.15 riastrad __RCSID("$NetBSD: offtab.c,v 1.15 2017/07/29 21:04:07 riastradh Exp $"); 34 1.1 riastrad 35 1.1 riastrad #include <sys/types.h> 36 1.1 riastrad #include <sys/endian.h> 37 1.1 riastrad 38 1.1 riastrad #include <assert.h> 39 1.1 riastrad #include <err.h> 40 1.1 riastrad #include <errno.h> 41 1.1 riastrad #include <inttypes.h> 42 1.1 riastrad #include <limits.h> 43 1.1 riastrad #include <stdbool.h> 44 1.1 riastrad #include <stdlib.h> 45 1.1 riastrad #include <unistd.h> 46 1.1 riastrad 47 1.1 riastrad #include "common.h" 48 1.1 riastrad #include "utils.h" 49 1.1 riastrad 50 1.1 riastrad #include "offtab.h" 51 1.1 riastrad 52 1.10 joerg static void __printflike(1,2) __dead 53 1.1 riastrad offtab_bug(const char *fmt, ...) 54 1.1 riastrad { 55 1.1 riastrad 56 1.1 riastrad errx(1, "bug in offtab, please report"); 57 1.1 riastrad } 58 1.1 riastrad 59 1.10 joerg static void __printflike(1,2) __dead 60 1.1 riastrad offtab_bugx(const char *fmt, ...) 61 1.1 riastrad { 62 1.1 riastrad 63 1.1 riastrad errx(1, "bug in offtab, please report"); 64 1.1 riastrad } 65 1.2 riastrad 66 1.2 riastrad static uint32_t 67 1.8 riastrad offtab_compute_window_size(struct offtab *offtab, uint32_t start) 68 1.2 riastrad { 69 1.2 riastrad 70 1.8 riastrad assert(start < offtab->ot_n_offsets); 71 1.8 riastrad return MIN(offtab->ot_window_size, (offtab->ot_n_offsets - start)); 72 1.2 riastrad } 73 1.2 riastrad 74 1.2 riastrad static uint32_t 75 1.2 riastrad offtab_current_window_size(struct offtab *offtab) 76 1.2 riastrad { 77 1.2 riastrad 78 1.8 riastrad return offtab_compute_window_size(offtab, offtab->ot_window_start); 79 1.2 riastrad } 80 1.2 riastrad 81 1.2 riastrad static uint32_t 82 1.2 riastrad offtab_current_window_end(struct offtab *offtab) 83 1.2 riastrad { 84 1.2 riastrad 85 1.2 riastrad assert(offtab->ot_window_start < offtab->ot_n_offsets); 86 1.2 riastrad assert(offtab_current_window_size(offtab) <= 87 1.2 riastrad (offtab->ot_n_offsets - offtab->ot_window_start)); 88 1.2 riastrad return (offtab->ot_window_start + offtab_current_window_size(offtab)); 89 1.2 riastrad } 90 1.2 riastrad 91 1.12 riastrad static void 92 1.12 riastrad offtab_compute_window_position(struct offtab *offtab, uint32_t window_start, 93 1.12 riastrad size_t *bytes, off_t *pos) 94 1.12 riastrad { 95 1.12 riastrad const uint32_t window_size = offtab_compute_window_size(offtab, 96 1.12 riastrad window_start); 97 1.12 riastrad 98 1.15 riastrad __CTASSERT(MUL_OK(size_t, MAX_WINDOW_SIZE, sizeof(uint64_t))); 99 1.12 riastrad *bytes = (window_size * sizeof(uint64_t)); 100 1.12 riastrad 101 1.12 riastrad assert(window_start <= offtab->ot_n_offsets); 102 1.15 riastrad __CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t))); 103 1.12 riastrad const off_t window_offset = ((off_t)window_start * 104 1.12 riastrad (off_t)sizeof(uint64_t)); 105 1.12 riastrad 106 1.14 riastrad assert(offtab->ot_fdpos <= OFFTAB_MAX_FDPOS); 107 1.15 riastrad __CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS, 108 1.15 riastrad (off_t)MAX_N_OFFSETS*sizeof(uint64_t))); 109 1.15 riastrad assert(ADD_OK(off_t, offtab->ot_fdpos, window_offset)); 110 1.12 riastrad *pos = (offtab->ot_fdpos + window_offset); 111 1.12 riastrad } 112 1.12 riastrad 113 1.2 riastrad #define OFFTAB_READ_SEEK 0x01 114 1.2 riastrad #define OFFTAB_READ_NOSEEK 0x00 115 1.2 riastrad 116 1.2 riastrad static bool 117 1.2 riastrad offtab_read_window(struct offtab *offtab, uint32_t blkno, int read_flags) 118 1.2 riastrad { 119 1.12 riastrad const uint32_t window_start = rounddown(blkno, offtab->ot_window_size); 120 1.12 riastrad size_t window_bytes; 121 1.12 riastrad off_t window_pos; 122 1.2 riastrad 123 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ); 124 1.2 riastrad assert(ISSET(read_flags, OFFTAB_READ_SEEK) || 125 1.2 riastrad (lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) || 126 1.2 riastrad ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE))); 127 1.12 riastrad 128 1.12 riastrad offtab_compute_window_position(offtab, window_start, 129 1.12 riastrad &window_bytes, &window_pos); 130 1.2 riastrad const ssize_t n_read = (ISSET(read_flags, OFFTAB_READ_SEEK) 131 1.12 riastrad ? pread_block(offtab->ot_fd, offtab->ot_window, window_bytes, 132 1.12 riastrad window_pos) 133 1.12 riastrad : read_block(offtab->ot_fd, offtab->ot_window, window_bytes)); 134 1.2 riastrad if (n_read == -1) { 135 1.2 riastrad (*offtab->ot_report)("read offset table at %"PRIuMAX, 136 1.11 riastrad (uintmax_t)window_pos); 137 1.2 riastrad return false; 138 1.2 riastrad } 139 1.2 riastrad assert(n_read >= 0); 140 1.12 riastrad if ((size_t)n_read != window_bytes) { 141 1.2 riastrad (*offtab->ot_reportx)("partial read of offset table" 142 1.2 riastrad " at %"PRIuMAX": %zu != %zu", 143 1.12 riastrad (uintmax_t)window_pos, (size_t)n_read, window_bytes); 144 1.2 riastrad return false; 145 1.2 riastrad } 146 1.12 riastrad 147 1.2 riastrad offtab->ot_window_start = window_start; 148 1.2 riastrad 149 1.2 riastrad return true; 150 1.2 riastrad } 151 1.2 riastrad 152 1.2 riastrad static bool 153 1.2 riastrad offtab_maybe_read_window(struct offtab *offtab, uint32_t blkno, int read_flags) 154 1.2 riastrad { 155 1.2 riastrad 156 1.2 riastrad /* Don't bother if blkno is already in the window. */ 157 1.2 riastrad if ((offtab->ot_window_start <= blkno) && 158 1.2 riastrad (blkno < offtab_current_window_end(offtab))) 159 1.2 riastrad return true; 160 1.2 riastrad 161 1.2 riastrad if (!offtab_read_window(offtab, blkno, read_flags)) 162 1.2 riastrad return false; 163 1.2 riastrad 164 1.2 riastrad return true; 165 1.2 riastrad } 166 1.2 riastrad 167 1.2 riastrad static void 168 1.5 riastrad offtab_write_window(struct offtab *offtab) 169 1.2 riastrad { 170 1.12 riastrad size_t window_bytes; 171 1.12 riastrad off_t window_pos; 172 1.2 riastrad 173 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE); 174 1.2 riastrad 175 1.12 riastrad offtab_compute_window_position(offtab, offtab->ot_window_start, 176 1.12 riastrad &window_bytes, &window_pos); 177 1.2 riastrad const ssize_t n_written = pwrite(offtab->ot_fd, offtab->ot_window, 178 1.12 riastrad window_bytes, window_pos); 179 1.2 riastrad if (n_written == -1) 180 1.2 riastrad err_ss(1, "write initial offset table"); 181 1.2 riastrad assert(n_written >= 0); 182 1.12 riastrad if ((size_t)n_written != window_bytes) 183 1.2 riastrad errx_ss(1, "partial write of initial offset bytes: %zu <= %zu", 184 1.2 riastrad (size_t)n_written, 185 1.12 riastrad window_bytes); 186 1.2 riastrad } 187 1.5 riastrad 188 1.5 riastrad static void 189 1.5 riastrad offtab_maybe_write_window(struct offtab *offtab, uint32_t start, uint32_t end) 190 1.5 riastrad { 191 1.5 riastrad 192 1.5 riastrad /* Don't bother if [start, end) does not cover our window. */ 193 1.5 riastrad if (end <= offtab->ot_window_start) 194 1.5 riastrad return; 195 1.5 riastrad if (offtab_current_window_end(offtab) < start) 196 1.5 riastrad return; 197 1.5 riastrad 198 1.5 riastrad offtab_write_window(offtab); 199 1.5 riastrad } 200 1.1 riastrad 201 1.1 riastrad /* 203 1.1 riastrad * Initialize an offtab to support the specified number of offsets read 204 1.1 riastrad * to or written from fd at byte position fdpos. 205 1.1 riastrad */ 206 1.2 riastrad void 207 1.2 riastrad offtab_init(struct offtab *offtab, uint32_t n_offsets, uint32_t window_size, 208 1.1 riastrad int fd, off_t fdpos) 209 1.1 riastrad { 210 1.1 riastrad 211 1.1 riastrad assert(offtab != NULL); 212 1.1 riastrad assert(0 < n_offsets); 213 1.1 riastrad assert(0 <= fd); 214 1.14 riastrad assert(0 <= fdpos); 215 1.1 riastrad assert(fdpos <= OFFTAB_MAX_FDPOS); 216 1.1 riastrad 217 1.2 riastrad offtab->ot_n_offsets = n_offsets; 218 1.2 riastrad if ((window_size == 0) || (n_offsets < window_size)) 219 1.2 riastrad offtab->ot_window_size = n_offsets; 220 1.2 riastrad else 221 1.2 riastrad offtab->ot_window_size = window_size; 222 1.2 riastrad assert(offtab->ot_window_size <= offtab->ot_n_offsets); 223 1.15 riastrad offtab->ot_window_start = (uint32_t)-1; 224 1.2 riastrad __CTASSERT(MUL_OK(size_t, MAX_WINDOW_SIZE, sizeof(uint64_t))); 225 1.2 riastrad offtab->ot_window = malloc(offtab->ot_window_size * sizeof(uint64_t)); 226 1.1 riastrad if (offtab->ot_window == NULL) 227 1.1 riastrad err(1, "malloc offset table"); 228 1.1 riastrad offtab->ot_blkno = (uint32_t)-1; 229 1.1 riastrad offtab->ot_fd = fd; 230 1.1 riastrad offtab->ot_fdpos = fdpos; 231 1.1 riastrad offtab->ot_report = &offtab_bug; 232 1.1 riastrad offtab->ot_reportx = &offtab_bugx; 233 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_NONE; 234 1.1 riastrad } 235 1.1 riastrad 236 1.1 riastrad /* 237 1.1 riastrad * Destroy an offtab. 238 1.1 riastrad */ 239 1.1 riastrad void 240 1.1 riastrad offtab_destroy(struct offtab *offtab) 241 1.1 riastrad { 242 1.2 riastrad 243 1.1 riastrad free(offtab->ot_window); 244 1.1 riastrad } 245 1.1 riastrad 246 1.1 riastrad /* 247 1.1 riastrad * For an offtab that has been used to read data from disk, convert it 248 1.2 riastrad * to an offtab that can be used to write subsequent data to disk. 249 1.1 riastrad * blkno is the last valid blkno read from disk. 250 1.2 riastrad */ 251 1.2 riastrad bool 252 1.1 riastrad offtab_transmogrify_read_to_write(struct offtab *offtab, uint32_t blkno) 253 1.1 riastrad { 254 1.1 riastrad 255 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ); 256 1.2 riastrad assert(0 < blkno); 257 1.2 riastrad 258 1.2 riastrad if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK)) 259 1.2 riastrad return false; 260 1.1 riastrad 261 1.2 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE; 262 1.2 riastrad offtab->ot_blkno = blkno; 263 1.2 riastrad 264 1.1 riastrad return true; 265 1.1 riastrad } 266 1.1 riastrad 267 1.1 riastrad /* 269 1.1 riastrad * Reset an offtab for reading an offset table from the beginning. 270 1.1 riastrad * Initializes in-memory state and may read data from offtab->ot_fd, 271 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. Failure 272 1.1 riastrad * will be reported by the report/reportx routines, which are called 273 1.2 riastrad * like warn/warnx. May fail; returns true on success, false on 274 1.2 riastrad * failure. 275 1.2 riastrad * 276 1.2 riastrad * This almost has copypasta of offtab_prepare_get, but this uses read, 277 1.1 riastrad * rather than pread, so that it will work on nonseekable input if the 278 1.1 riastrad * window is the whole offset table. 279 1.1 riastrad */ 280 1.1 riastrad bool 281 1.1 riastrad offtab_reset_read(struct offtab *offtab, 282 1.1 riastrad void (*report)(const char *, ...) __printflike(1,2), 283 1.1 riastrad void (*reportx)(const char *, ...) __printflike(1,2)) 284 1.1 riastrad { 285 1.1 riastrad 286 1.1 riastrad assert((lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) || 287 1.1 riastrad ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE))); 288 1.1 riastrad 289 1.1 riastrad offtab->ot_report = report; 290 1.2 riastrad offtab->ot_reportx = reportx; 291 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_READ; 292 1.2 riastrad offtab->ot_blkno = (uint32_t)-1; 293 1.1 riastrad 294 1.1 riastrad if (!offtab_read_window(offtab, 0, OFFTAB_READ_NOSEEK)) 295 1.4 riastrad return false; 296 1.15 riastrad 297 1.11 riastrad if (offtab->ot_window_size < offtab->ot_n_offsets) { 298 1.11 riastrad __CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t))); 299 1.14 riastrad const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets * 300 1.15 riastrad (off_t)sizeof(uint64_t)); 301 1.15 riastrad assert(offtab->ot_fdpos <= OFFTAB_MAX_FDPOS); 302 1.15 riastrad __CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS, 303 1.11 riastrad (off_t)MAX_N_OFFSETS*sizeof(uint64_t))); 304 1.4 riastrad assert(ADD_OK(off_t, offtab->ot_fdpos, offtab_bytes)); 305 1.4 riastrad const off_t first_offset = (offtab->ot_fdpos + offtab_bytes); 306 1.4 riastrad if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1) { 307 1.4 riastrad (*offtab->ot_report)("lseek to first offset 0x%"PRIx64, 308 1.4 riastrad first_offset); 309 1.4 riastrad return false; 310 1.4 riastrad } 311 1.1 riastrad } 312 1.1 riastrad 313 1.1 riastrad return true; 314 1.1 riastrad } 315 1.1 riastrad 316 1.1 riastrad /* 317 1.1 riastrad * Do any I/O or bookkeeping necessary to fetch the offset for blkno in 318 1.1 riastrad * preparation for a call to offtab_get. May fail; returns true on 319 1.1 riastrad * success, false on failure. 320 1.1 riastrad */ 321 1.1 riastrad bool 322 1.1 riastrad offtab_prepare_get(struct offtab *offtab, uint32_t blkno) 323 1.1 riastrad { 324 1.1 riastrad 325 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ); 326 1.2 riastrad assert(blkno < offtab->ot_n_offsets); 327 1.2 riastrad 328 1.2 riastrad if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK)) 329 1.2 riastrad return false; 330 1.2 riastrad 331 1.2 riastrad assert(offtab->ot_window_start <= blkno); 332 1.1 riastrad assert(blkno < offtab_current_window_end(offtab)); 333 1.1 riastrad 334 1.1 riastrad offtab->ot_blkno = blkno; 335 1.1 riastrad return true; 336 1.1 riastrad } 337 1.1 riastrad 338 1.1 riastrad /* 339 1.1 riastrad * Return the offset for blkno. Caller must have called 340 1.1 riastrad * offtab_prepare_get beforehand. 341 1.1 riastrad */ 342 1.1 riastrad uint64_t 343 1.1 riastrad offtab_get(struct offtab *offtab, uint32_t blkno) 344 1.1 riastrad { 345 1.1 riastrad 346 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ); 347 1.2 riastrad assert(blkno == offtab->ot_blkno); 348 1.2 riastrad assert(offtab->ot_window_start <= blkno); 349 1.2 riastrad assert(blkno < offtab_current_window_end(offtab)); 350 1.1 riastrad 351 1.1 riastrad return be64toh(offtab->ot_window[blkno - offtab->ot_window_start]); 352 1.1 riastrad } 353 1.1 riastrad 354 1.1 riastrad /* 356 1.1 riastrad * Reset offtab for writing a fresh offset table. Initializes 357 1.1 riastrad * in-memory state and writes an empty offset table to offtab->ot_fd, 358 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. May 359 1.1 riastrad * fail; returns on success, aborts with err(3) on failure. 360 1.1 riastrad */ 361 1.1 riastrad void 362 1.1 riastrad offtab_reset_write(struct offtab *offtab) 363 1.1 riastrad { 364 1.1 riastrad uint32_t i; 365 1.1 riastrad 366 1.2 riastrad assert(lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos); 367 1.1 riastrad 368 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE; 369 1.1 riastrad offtab->ot_blkno = (uint32_t)-1; 370 1.1 riastrad 371 1.1 riastrad /* 372 1.1 riastrad * Initialize the offset table to all ones (except for the 373 1.1 riastrad * fixed first offset) so that we can easily detect where we 374 1.1 riastrad * were interrupted if we want to restart. 375 1.2 riastrad */ 376 1.14 riastrad __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX); 377 1.2 riastrad assert(offtab->ot_n_offsets > 0); 378 1.2 riastrad 379 1.2 riastrad /* Initialize window of all ones. */ 380 1.14 riastrad for (i = 0; i < offtab->ot_window_size; i++) 381 1.2 riastrad offtab->ot_window[i] = ~(uint64_t)0; 382 1.2 riastrad 383 1.2 riastrad /* Write the window to every position in the table. */ 384 1.2 riastrad const uint32_t n_windows = 385 1.2 riastrad howmany(offtab->ot_n_offsets, offtab->ot_window_size); 386 1.5 riastrad for (i = 1; i < n_windows; i++) { 387 1.2 riastrad /* Change the start but reuse the all-ones buffer. */ 388 1.2 riastrad offtab->ot_window_start = (i * offtab->ot_window_size); 389 1.14 riastrad offtab_write_window(offtab); 390 1.15 riastrad } 391 1.11 riastrad 392 1.11 riastrad /* Compute the number of bytes in the offset table. */ 393 1.14 riastrad __CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t))); 394 1.14 riastrad const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets * 395 1.14 riastrad sizeof(uint64_t)); 396 1.15 riastrad 397 1.15 riastrad /* Compute the offset of the first block. */ 398 1.15 riastrad assert(offtab->ot_fdpos <= OFFTAB_MAX_FDPOS); 399 1.11 riastrad __CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS, 400 1.14 riastrad MAX_N_OFFSETS*sizeof(uint64_t))); 401 1.14 riastrad assert(ADD_OK(off_t, offtab->ot_fdpos, offtab_bytes)); 402 1.15 riastrad const off_t first_offset = (offtab->ot_fdpos + offtab_bytes); 403 1.15 riastrad 404 1.15 riastrad /* Assert that it fits in 64 bits. */ 405 1.14 riastrad __CTASSERT(MUL_OK(uint64_t, MAX_N_OFFSETS, sizeof(uint64_t))); 406 1.14 riastrad __CTASSERT(ADD_OK(uint64_t, OFFTAB_MAX_FDPOS, 407 1.14 riastrad (uint64_t)MAX_N_OFFSETS*sizeof(uint64_t))); 408 1.11 riastrad 409 1.5 riastrad /* Write out the first window with the first offset. */ 410 1.1 riastrad offtab->ot_window_start = 0; 411 1.2 riastrad offtab->ot_window[0] = htobe64((uint64_t)first_offset); 412 1.2 riastrad offtab_write_window(offtab); 413 1.1 riastrad 414 1.1 riastrad if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1) 415 1.1 riastrad err(1, "lseek to first offset failed"); 416 1.1 riastrad } 417 1.1 riastrad 418 1.1 riastrad /* 419 1.1 riastrad * Guarantee that the disk reflects block offsets [0, n_offsets). If 420 1.1 riastrad * OFFTAB_CHECKPOINT_SYNC is set in flags, will also fsync the entire 421 1.1 riastrad * offset table. May fail; returns on success, aborts with err(3) on 422 1.1 riastrad * failure. Fsync failure is considered success but is reported with a 423 1.2 riastrad * warning. 424 1.2 riastrad * 425 1.2 riastrad * This routine does not write state in memory, and does not read state 426 1.1 riastrad * that is not signal-safe. The only state read is offtab->ot_window, 427 1.1 riastrad * offtab->ot_window_start, and quantities that are static for the 428 1.1 riastrad * signal-interruptable existence of the offset table. 429 1.1 riastrad */ 430 1.1 riastrad void 431 1.1 riastrad offtab_checkpoint(struct offtab *offtab, uint32_t n_offsets, int flags) 432 1.1 riastrad { 433 1.1 riastrad 434 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE); 435 1.2 riastrad assert(n_offsets <= offtab->ot_n_offsets); 436 1.2 riastrad 437 1.2 riastrad /* 438 1.2 riastrad * Write the window unless we just did that and were 439 1.5 riastrad * interrupted before we could move the window. 440 1.1 riastrad */ 441 1.1 riastrad if (offtab->ot_window != NULL) 442 1.15 riastrad offtab_maybe_write_window(offtab, 0, n_offsets); 443 1.11 riastrad 444 1.11 riastrad if (ISSET(flags, OFFTAB_CHECKPOINT_SYNC)) { 445 1.15 riastrad __CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t))); 446 1.15 riastrad const off_t sync_bytes = ((off_t)n_offsets * 447 1.15 riastrad (off_t)sizeof(uint64_t)); 448 1.1 riastrad __CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS, 449 1.11 riastrad MAX_N_OFFSETS*sizeof(uint64_t))); 450 1.1 riastrad assert(ADD_OK(off_t, offtab->ot_fdpos, sync_bytes)); 451 1.1 riastrad if (fsync_range(offtab->ot_fd, (FFILESYNC | FDISKSYNC), 452 1.1 riastrad offtab->ot_fdpos, (offtab->ot_fdpos + sync_bytes)) 453 1.1 riastrad == -1) 454 1.1 riastrad warn_ss("fsync of offset table failed"); 455 1.1 riastrad } 456 1.1 riastrad } 457 1.1 riastrad 458 1.1 riastrad /* 459 1.1 riastrad * Do any I/O or bookkeeping necessary to set an offset for blkno. May 460 1.1 riastrad * fail; returns on success, aborts with err(3) on failure. 461 1.1 riastrad */ 462 1.2 riastrad void 463 1.1 riastrad offtab_prepare_put(struct offtab *offtab, uint32_t blkno) 464 1.1 riastrad { 465 1.1 riastrad uint32_t i; 466 1.2 riastrad 467 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE); 468 1.2 riastrad assert(blkno < offtab->ot_n_offsets); 469 1.2 riastrad 470 1.2 riastrad /* 471 1.2 riastrad * Assume, for convenience, that we write blocks in order. 472 1.2 riastrad * Thus we need not do another read -- we can just clear the 473 1.2 riastrad * window. 474 1.2 riastrad */ 475 1.2 riastrad assert((offtab->ot_blkno == (uint32_t)-1) || 476 1.2 riastrad ((offtab->ot_blkno + 1) == blkno)); 477 1.2 riastrad 478 1.2 riastrad /* If it's already in our window, we're good to go. */ 479 1.2 riastrad if ((offtab->ot_window_start <= blkno) && 480 1.2 riastrad (blkno < offtab_current_window_end(offtab))) 481 1.5 riastrad goto win; 482 1.2 riastrad 483 1.2 riastrad /* Otherwise, write out the current window and choose a new one. */ 484 1.2 riastrad offtab_write_window(offtab); 485 1.2 riastrad 486 1.2 riastrad assert(offtab->ot_window_size <= blkno); 487 1.2 riastrad assert(offtab->ot_window_start == (blkno - offtab->ot_window_size)); 488 1.2 riastrad assert((offtab->ot_window_start + offtab->ot_window_size) == 489 1.2 riastrad rounddown(blkno, offtab->ot_window_size)); 490 1.2 riastrad 491 1.2 riastrad { 492 1.2 riastrad uint64_t *window; 493 1.2 riastrad sigset_t sigmask; 494 1.7 riastrad 495 1.2 riastrad /* 496 1.2 riastrad * Mark the window as being updated so nobody tries to write it 497 1.2 riastrad * (since we just wrote it) while we fill it with ones. 498 1.2 riastrad */ 499 1.2 riastrad block_signals(&sigmask); 500 1.2 riastrad window = offtab->ot_window; 501 1.2 riastrad offtab->ot_window = NULL; 502 1.2 riastrad restore_sigmask(&sigmask); 503 1.2 riastrad 504 1.2 riastrad /* Fill the window with ones. */ 505 1.2 riastrad for (i = 0; i < offtab_current_window_size(offtab); i++) 506 1.2 riastrad window[i] = ~(uint64_t)0; 507 1.2 riastrad 508 1.2 riastrad /* Restore the window as ready again. */ 509 1.2 riastrad block_signals(&sigmask); 510 1.2 riastrad offtab->ot_window = window; 511 1.2 riastrad offtab->ot_window_start = rounddown(blkno, offtab->ot_window_size); 512 1.2 riastrad restore_sigmask(&sigmask); 513 1.2 riastrad } 514 1.2 riastrad 515 1.1 riastrad win: assert(offtab->ot_window_start <= blkno); 516 1.1 riastrad assert(blkno < offtab_current_window_end(offtab)); 517 1.1 riastrad 518 1.1 riastrad offtab->ot_blkno = blkno; 519 1.1 riastrad } 520 1.1 riastrad 521 1.1 riastrad /* 522 1.1 riastrad * Actually set the offset for blkno. 523 1.1 riastrad */ 524 1.1 riastrad void 525 1.1 riastrad offtab_put(struct offtab *offtab, uint32_t blkno, uint64_t offset) 526 1.1 riastrad { 527 1.2 riastrad 528 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE); 529 1.2 riastrad assert(blkno == offtab->ot_blkno); 530 1.2 riastrad assert(offtab->ot_window_start <= blkno); 531 1.1 riastrad assert(blkno < offtab_current_window_end(offtab)); 532 533 offtab->ot_window[blkno - offtab->ot_window_start] = htobe64(offset); 534 } 535