offtab.c revision 1.15 1 1.15 riastrad /* $NetBSD: offtab.c,v 1.15 2017/07/29 21:04:07 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2014 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * This code is derived from software contributed to The NetBSD Foundation
8 1.1 riastrad * by Taylor R. Campbell.
9 1.1 riastrad *
10 1.1 riastrad * Redistribution and use in source and binary forms, with or without
11 1.1 riastrad * modification, are permitted provided that the following conditions
12 1.1 riastrad * are met:
13 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
14 1.1 riastrad * notice, this list of conditions and the following disclaimer.
15 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
17 1.1 riastrad * documentation and/or other materials provided with the distribution.
18 1.1 riastrad *
19 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
30 1.1 riastrad */
31 1.1 riastrad
32 1.1 riastrad #include <sys/cdefs.h>
33 1.15 riastrad __RCSID("$NetBSD: offtab.c,v 1.15 2017/07/29 21:04:07 riastradh Exp $");
34 1.1 riastrad
35 1.1 riastrad #include <sys/types.h>
36 1.1 riastrad #include <sys/endian.h>
37 1.1 riastrad
38 1.1 riastrad #include <assert.h>
39 1.1 riastrad #include <err.h>
40 1.1 riastrad #include <errno.h>
41 1.1 riastrad #include <inttypes.h>
42 1.1 riastrad #include <limits.h>
43 1.1 riastrad #include <stdbool.h>
44 1.1 riastrad #include <stdlib.h>
45 1.1 riastrad #include <unistd.h>
46 1.1 riastrad
47 1.1 riastrad #include "common.h"
48 1.1 riastrad #include "utils.h"
49 1.1 riastrad
50 1.1 riastrad #include "offtab.h"
51 1.1 riastrad
52 1.10 joerg static void __printflike(1,2) __dead
53 1.1 riastrad offtab_bug(const char *fmt, ...)
54 1.1 riastrad {
55 1.1 riastrad
56 1.1 riastrad errx(1, "bug in offtab, please report");
57 1.1 riastrad }
58 1.1 riastrad
59 1.10 joerg static void __printflike(1,2) __dead
60 1.1 riastrad offtab_bugx(const char *fmt, ...)
61 1.1 riastrad {
62 1.1 riastrad
63 1.1 riastrad errx(1, "bug in offtab, please report");
64 1.1 riastrad }
65 1.2 riastrad
66 1.2 riastrad static uint32_t
67 1.8 riastrad offtab_compute_window_size(struct offtab *offtab, uint32_t start)
68 1.2 riastrad {
69 1.2 riastrad
70 1.8 riastrad assert(start < offtab->ot_n_offsets);
71 1.8 riastrad return MIN(offtab->ot_window_size, (offtab->ot_n_offsets - start));
72 1.2 riastrad }
73 1.2 riastrad
74 1.2 riastrad static uint32_t
75 1.2 riastrad offtab_current_window_size(struct offtab *offtab)
76 1.2 riastrad {
77 1.2 riastrad
78 1.8 riastrad return offtab_compute_window_size(offtab, offtab->ot_window_start);
79 1.2 riastrad }
80 1.2 riastrad
81 1.2 riastrad static uint32_t
82 1.2 riastrad offtab_current_window_end(struct offtab *offtab)
83 1.2 riastrad {
84 1.2 riastrad
85 1.2 riastrad assert(offtab->ot_window_start < offtab->ot_n_offsets);
86 1.2 riastrad assert(offtab_current_window_size(offtab) <=
87 1.2 riastrad (offtab->ot_n_offsets - offtab->ot_window_start));
88 1.2 riastrad return (offtab->ot_window_start + offtab_current_window_size(offtab));
89 1.2 riastrad }
90 1.2 riastrad
91 1.12 riastrad static void
92 1.12 riastrad offtab_compute_window_position(struct offtab *offtab, uint32_t window_start,
93 1.12 riastrad size_t *bytes, off_t *pos)
94 1.12 riastrad {
95 1.12 riastrad const uint32_t window_size = offtab_compute_window_size(offtab,
96 1.12 riastrad window_start);
97 1.12 riastrad
98 1.15 riastrad __CTASSERT(MUL_OK(size_t, MAX_WINDOW_SIZE, sizeof(uint64_t)));
99 1.12 riastrad *bytes = (window_size * sizeof(uint64_t));
100 1.12 riastrad
101 1.12 riastrad assert(window_start <= offtab->ot_n_offsets);
102 1.15 riastrad __CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t)));
103 1.12 riastrad const off_t window_offset = ((off_t)window_start *
104 1.12 riastrad (off_t)sizeof(uint64_t));
105 1.12 riastrad
106 1.14 riastrad assert(offtab->ot_fdpos <= OFFTAB_MAX_FDPOS);
107 1.15 riastrad __CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS,
108 1.15 riastrad (off_t)MAX_N_OFFSETS*sizeof(uint64_t)));
109 1.15 riastrad assert(ADD_OK(off_t, offtab->ot_fdpos, window_offset));
110 1.12 riastrad *pos = (offtab->ot_fdpos + window_offset);
111 1.12 riastrad }
112 1.12 riastrad
113 1.2 riastrad #define OFFTAB_READ_SEEK 0x01
114 1.2 riastrad #define OFFTAB_READ_NOSEEK 0x00
115 1.2 riastrad
116 1.2 riastrad static bool
117 1.2 riastrad offtab_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
118 1.2 riastrad {
119 1.12 riastrad const uint32_t window_start = rounddown(blkno, offtab->ot_window_size);
120 1.12 riastrad size_t window_bytes;
121 1.12 riastrad off_t window_pos;
122 1.2 riastrad
123 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
124 1.2 riastrad assert(ISSET(read_flags, OFFTAB_READ_SEEK) ||
125 1.2 riastrad (lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
126 1.2 riastrad ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
127 1.12 riastrad
128 1.12 riastrad offtab_compute_window_position(offtab, window_start,
129 1.12 riastrad &window_bytes, &window_pos);
130 1.2 riastrad const ssize_t n_read = (ISSET(read_flags, OFFTAB_READ_SEEK)
131 1.12 riastrad ? pread_block(offtab->ot_fd, offtab->ot_window, window_bytes,
132 1.12 riastrad window_pos)
133 1.12 riastrad : read_block(offtab->ot_fd, offtab->ot_window, window_bytes));
134 1.2 riastrad if (n_read == -1) {
135 1.2 riastrad (*offtab->ot_report)("read offset table at %"PRIuMAX,
136 1.11 riastrad (uintmax_t)window_pos);
137 1.2 riastrad return false;
138 1.2 riastrad }
139 1.2 riastrad assert(n_read >= 0);
140 1.12 riastrad if ((size_t)n_read != window_bytes) {
141 1.2 riastrad (*offtab->ot_reportx)("partial read of offset table"
142 1.2 riastrad " at %"PRIuMAX": %zu != %zu",
143 1.12 riastrad (uintmax_t)window_pos, (size_t)n_read, window_bytes);
144 1.2 riastrad return false;
145 1.2 riastrad }
146 1.12 riastrad
147 1.2 riastrad offtab->ot_window_start = window_start;
148 1.2 riastrad
149 1.2 riastrad return true;
150 1.2 riastrad }
151 1.2 riastrad
152 1.2 riastrad static bool
153 1.2 riastrad offtab_maybe_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
154 1.2 riastrad {
155 1.2 riastrad
156 1.2 riastrad /* Don't bother if blkno is already in the window. */
157 1.2 riastrad if ((offtab->ot_window_start <= blkno) &&
158 1.2 riastrad (blkno < offtab_current_window_end(offtab)))
159 1.2 riastrad return true;
160 1.2 riastrad
161 1.2 riastrad if (!offtab_read_window(offtab, blkno, read_flags))
162 1.2 riastrad return false;
163 1.2 riastrad
164 1.2 riastrad return true;
165 1.2 riastrad }
166 1.2 riastrad
167 1.2 riastrad static void
168 1.5 riastrad offtab_write_window(struct offtab *offtab)
169 1.2 riastrad {
170 1.12 riastrad size_t window_bytes;
171 1.12 riastrad off_t window_pos;
172 1.2 riastrad
173 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
174 1.2 riastrad
175 1.12 riastrad offtab_compute_window_position(offtab, offtab->ot_window_start,
176 1.12 riastrad &window_bytes, &window_pos);
177 1.2 riastrad const ssize_t n_written = pwrite(offtab->ot_fd, offtab->ot_window,
178 1.12 riastrad window_bytes, window_pos);
179 1.2 riastrad if (n_written == -1)
180 1.2 riastrad err_ss(1, "write initial offset table");
181 1.2 riastrad assert(n_written >= 0);
182 1.12 riastrad if ((size_t)n_written != window_bytes)
183 1.2 riastrad errx_ss(1, "partial write of initial offset bytes: %zu <= %zu",
184 1.2 riastrad (size_t)n_written,
185 1.12 riastrad window_bytes);
186 1.2 riastrad }
187 1.5 riastrad
188 1.5 riastrad static void
189 1.5 riastrad offtab_maybe_write_window(struct offtab *offtab, uint32_t start, uint32_t end)
190 1.5 riastrad {
191 1.5 riastrad
192 1.5 riastrad /* Don't bother if [start, end) does not cover our window. */
193 1.5 riastrad if (end <= offtab->ot_window_start)
194 1.5 riastrad return;
195 1.5 riastrad if (offtab_current_window_end(offtab) < start)
196 1.5 riastrad return;
197 1.5 riastrad
198 1.5 riastrad offtab_write_window(offtab);
199 1.5 riastrad }
200 1.1 riastrad
201 1.1 riastrad /*
203 1.1 riastrad * Initialize an offtab to support the specified number of offsets read
204 1.1 riastrad * to or written from fd at byte position fdpos.
205 1.1 riastrad */
206 1.2 riastrad void
207 1.2 riastrad offtab_init(struct offtab *offtab, uint32_t n_offsets, uint32_t window_size,
208 1.1 riastrad int fd, off_t fdpos)
209 1.1 riastrad {
210 1.1 riastrad
211 1.1 riastrad assert(offtab != NULL);
212 1.1 riastrad assert(0 < n_offsets);
213 1.1 riastrad assert(0 <= fd);
214 1.14 riastrad assert(0 <= fdpos);
215 1.1 riastrad assert(fdpos <= OFFTAB_MAX_FDPOS);
216 1.1 riastrad
217 1.2 riastrad offtab->ot_n_offsets = n_offsets;
218 1.2 riastrad if ((window_size == 0) || (n_offsets < window_size))
219 1.2 riastrad offtab->ot_window_size = n_offsets;
220 1.2 riastrad else
221 1.2 riastrad offtab->ot_window_size = window_size;
222 1.2 riastrad assert(offtab->ot_window_size <= offtab->ot_n_offsets);
223 1.15 riastrad offtab->ot_window_start = (uint32_t)-1;
224 1.2 riastrad __CTASSERT(MUL_OK(size_t, MAX_WINDOW_SIZE, sizeof(uint64_t)));
225 1.2 riastrad offtab->ot_window = malloc(offtab->ot_window_size * sizeof(uint64_t));
226 1.1 riastrad if (offtab->ot_window == NULL)
227 1.1 riastrad err(1, "malloc offset table");
228 1.1 riastrad offtab->ot_blkno = (uint32_t)-1;
229 1.1 riastrad offtab->ot_fd = fd;
230 1.1 riastrad offtab->ot_fdpos = fdpos;
231 1.1 riastrad offtab->ot_report = &offtab_bug;
232 1.1 riastrad offtab->ot_reportx = &offtab_bugx;
233 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_NONE;
234 1.1 riastrad }
235 1.1 riastrad
236 1.1 riastrad /*
237 1.1 riastrad * Destroy an offtab.
238 1.1 riastrad */
239 1.1 riastrad void
240 1.1 riastrad offtab_destroy(struct offtab *offtab)
241 1.1 riastrad {
242 1.2 riastrad
243 1.1 riastrad free(offtab->ot_window);
244 1.1 riastrad }
245 1.1 riastrad
246 1.1 riastrad /*
247 1.1 riastrad * For an offtab that has been used to read data from disk, convert it
248 1.2 riastrad * to an offtab that can be used to write subsequent data to disk.
249 1.1 riastrad * blkno is the last valid blkno read from disk.
250 1.2 riastrad */
251 1.2 riastrad bool
252 1.1 riastrad offtab_transmogrify_read_to_write(struct offtab *offtab, uint32_t blkno)
253 1.1 riastrad {
254 1.1 riastrad
255 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
256 1.2 riastrad assert(0 < blkno);
257 1.2 riastrad
258 1.2 riastrad if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
259 1.2 riastrad return false;
260 1.1 riastrad
261 1.2 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE;
262 1.2 riastrad offtab->ot_blkno = blkno;
263 1.2 riastrad
264 1.1 riastrad return true;
265 1.1 riastrad }
266 1.1 riastrad
267 1.1 riastrad /*
269 1.1 riastrad * Reset an offtab for reading an offset table from the beginning.
270 1.1 riastrad * Initializes in-memory state and may read data from offtab->ot_fd,
271 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. Failure
272 1.1 riastrad * will be reported by the report/reportx routines, which are called
273 1.2 riastrad * like warn/warnx. May fail; returns true on success, false on
274 1.2 riastrad * failure.
275 1.2 riastrad *
276 1.2 riastrad * This almost has copypasta of offtab_prepare_get, but this uses read,
277 1.1 riastrad * rather than pread, so that it will work on nonseekable input if the
278 1.1 riastrad * window is the whole offset table.
279 1.1 riastrad */
280 1.1 riastrad bool
281 1.1 riastrad offtab_reset_read(struct offtab *offtab,
282 1.1 riastrad void (*report)(const char *, ...) __printflike(1,2),
283 1.1 riastrad void (*reportx)(const char *, ...) __printflike(1,2))
284 1.1 riastrad {
285 1.1 riastrad
286 1.1 riastrad assert((lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
287 1.1 riastrad ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
288 1.1 riastrad
289 1.1 riastrad offtab->ot_report = report;
290 1.2 riastrad offtab->ot_reportx = reportx;
291 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_READ;
292 1.2 riastrad offtab->ot_blkno = (uint32_t)-1;
293 1.1 riastrad
294 1.1 riastrad if (!offtab_read_window(offtab, 0, OFFTAB_READ_NOSEEK))
295 1.4 riastrad return false;
296 1.15 riastrad
297 1.11 riastrad if (offtab->ot_window_size < offtab->ot_n_offsets) {
298 1.11 riastrad __CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t)));
299 1.14 riastrad const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
300 1.15 riastrad (off_t)sizeof(uint64_t));
301 1.15 riastrad assert(offtab->ot_fdpos <= OFFTAB_MAX_FDPOS);
302 1.15 riastrad __CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS,
303 1.11 riastrad (off_t)MAX_N_OFFSETS*sizeof(uint64_t)));
304 1.4 riastrad assert(ADD_OK(off_t, offtab->ot_fdpos, offtab_bytes));
305 1.4 riastrad const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
306 1.4 riastrad if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1) {
307 1.4 riastrad (*offtab->ot_report)("lseek to first offset 0x%"PRIx64,
308 1.4 riastrad first_offset);
309 1.4 riastrad return false;
310 1.4 riastrad }
311 1.1 riastrad }
312 1.1 riastrad
313 1.1 riastrad return true;
314 1.1 riastrad }
315 1.1 riastrad
316 1.1 riastrad /*
317 1.1 riastrad * Do any I/O or bookkeeping necessary to fetch the offset for blkno in
318 1.1 riastrad * preparation for a call to offtab_get. May fail; returns true on
319 1.1 riastrad * success, false on failure.
320 1.1 riastrad */
321 1.1 riastrad bool
322 1.1 riastrad offtab_prepare_get(struct offtab *offtab, uint32_t blkno)
323 1.1 riastrad {
324 1.1 riastrad
325 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
326 1.2 riastrad assert(blkno < offtab->ot_n_offsets);
327 1.2 riastrad
328 1.2 riastrad if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
329 1.2 riastrad return false;
330 1.2 riastrad
331 1.2 riastrad assert(offtab->ot_window_start <= blkno);
332 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
333 1.1 riastrad
334 1.1 riastrad offtab->ot_blkno = blkno;
335 1.1 riastrad return true;
336 1.1 riastrad }
337 1.1 riastrad
338 1.1 riastrad /*
339 1.1 riastrad * Return the offset for blkno. Caller must have called
340 1.1 riastrad * offtab_prepare_get beforehand.
341 1.1 riastrad */
342 1.1 riastrad uint64_t
343 1.1 riastrad offtab_get(struct offtab *offtab, uint32_t blkno)
344 1.1 riastrad {
345 1.1 riastrad
346 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
347 1.2 riastrad assert(blkno == offtab->ot_blkno);
348 1.2 riastrad assert(offtab->ot_window_start <= blkno);
349 1.2 riastrad assert(blkno < offtab_current_window_end(offtab));
350 1.1 riastrad
351 1.1 riastrad return be64toh(offtab->ot_window[blkno - offtab->ot_window_start]);
352 1.1 riastrad }
353 1.1 riastrad
354 1.1 riastrad /*
356 1.1 riastrad * Reset offtab for writing a fresh offset table. Initializes
357 1.1 riastrad * in-memory state and writes an empty offset table to offtab->ot_fd,
358 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. May
359 1.1 riastrad * fail; returns on success, aborts with err(3) on failure.
360 1.1 riastrad */
361 1.1 riastrad void
362 1.1 riastrad offtab_reset_write(struct offtab *offtab)
363 1.1 riastrad {
364 1.1 riastrad uint32_t i;
365 1.1 riastrad
366 1.2 riastrad assert(lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos);
367 1.1 riastrad
368 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE;
369 1.1 riastrad offtab->ot_blkno = (uint32_t)-1;
370 1.1 riastrad
371 1.1 riastrad /*
372 1.1 riastrad * Initialize the offset table to all ones (except for the
373 1.1 riastrad * fixed first offset) so that we can easily detect where we
374 1.1 riastrad * were interrupted if we want to restart.
375 1.2 riastrad */
376 1.14 riastrad __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
377 1.2 riastrad assert(offtab->ot_n_offsets > 0);
378 1.2 riastrad
379 1.2 riastrad /* Initialize window of all ones. */
380 1.14 riastrad for (i = 0; i < offtab->ot_window_size; i++)
381 1.2 riastrad offtab->ot_window[i] = ~(uint64_t)0;
382 1.2 riastrad
383 1.2 riastrad /* Write the window to every position in the table. */
384 1.2 riastrad const uint32_t n_windows =
385 1.2 riastrad howmany(offtab->ot_n_offsets, offtab->ot_window_size);
386 1.5 riastrad for (i = 1; i < n_windows; i++) {
387 1.2 riastrad /* Change the start but reuse the all-ones buffer. */
388 1.2 riastrad offtab->ot_window_start = (i * offtab->ot_window_size);
389 1.14 riastrad offtab_write_window(offtab);
390 1.15 riastrad }
391 1.11 riastrad
392 1.11 riastrad /* Compute the number of bytes in the offset table. */
393 1.14 riastrad __CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t)));
394 1.14 riastrad const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
395 1.14 riastrad sizeof(uint64_t));
396 1.15 riastrad
397 1.15 riastrad /* Compute the offset of the first block. */
398 1.15 riastrad assert(offtab->ot_fdpos <= OFFTAB_MAX_FDPOS);
399 1.11 riastrad __CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS,
400 1.14 riastrad MAX_N_OFFSETS*sizeof(uint64_t)));
401 1.14 riastrad assert(ADD_OK(off_t, offtab->ot_fdpos, offtab_bytes));
402 1.15 riastrad const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
403 1.15 riastrad
404 1.15 riastrad /* Assert that it fits in 64 bits. */
405 1.14 riastrad __CTASSERT(MUL_OK(uint64_t, MAX_N_OFFSETS, sizeof(uint64_t)));
406 1.14 riastrad __CTASSERT(ADD_OK(uint64_t, OFFTAB_MAX_FDPOS,
407 1.14 riastrad (uint64_t)MAX_N_OFFSETS*sizeof(uint64_t)));
408 1.11 riastrad
409 1.5 riastrad /* Write out the first window with the first offset. */
410 1.1 riastrad offtab->ot_window_start = 0;
411 1.2 riastrad offtab->ot_window[0] = htobe64((uint64_t)first_offset);
412 1.2 riastrad offtab_write_window(offtab);
413 1.1 riastrad
414 1.1 riastrad if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1)
415 1.1 riastrad err(1, "lseek to first offset failed");
416 1.1 riastrad }
417 1.1 riastrad
418 1.1 riastrad /*
419 1.1 riastrad * Guarantee that the disk reflects block offsets [0, n_offsets). If
420 1.1 riastrad * OFFTAB_CHECKPOINT_SYNC is set in flags, will also fsync the entire
421 1.1 riastrad * offset table. May fail; returns on success, aborts with err(3) on
422 1.1 riastrad * failure. Fsync failure is considered success but is reported with a
423 1.2 riastrad * warning.
424 1.2 riastrad *
425 1.2 riastrad * This routine does not write state in memory, and does not read state
426 1.1 riastrad * that is not signal-safe. The only state read is offtab->ot_window,
427 1.1 riastrad * offtab->ot_window_start, and quantities that are static for the
428 1.1 riastrad * signal-interruptable existence of the offset table.
429 1.1 riastrad */
430 1.1 riastrad void
431 1.1 riastrad offtab_checkpoint(struct offtab *offtab, uint32_t n_offsets, int flags)
432 1.1 riastrad {
433 1.1 riastrad
434 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
435 1.2 riastrad assert(n_offsets <= offtab->ot_n_offsets);
436 1.2 riastrad
437 1.2 riastrad /*
438 1.2 riastrad * Write the window unless we just did that and were
439 1.5 riastrad * interrupted before we could move the window.
440 1.1 riastrad */
441 1.1 riastrad if (offtab->ot_window != NULL)
442 1.15 riastrad offtab_maybe_write_window(offtab, 0, n_offsets);
443 1.11 riastrad
444 1.11 riastrad if (ISSET(flags, OFFTAB_CHECKPOINT_SYNC)) {
445 1.15 riastrad __CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t)));
446 1.15 riastrad const off_t sync_bytes = ((off_t)n_offsets *
447 1.15 riastrad (off_t)sizeof(uint64_t));
448 1.1 riastrad __CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS,
449 1.11 riastrad MAX_N_OFFSETS*sizeof(uint64_t)));
450 1.1 riastrad assert(ADD_OK(off_t, offtab->ot_fdpos, sync_bytes));
451 1.1 riastrad if (fsync_range(offtab->ot_fd, (FFILESYNC | FDISKSYNC),
452 1.1 riastrad offtab->ot_fdpos, (offtab->ot_fdpos + sync_bytes))
453 1.1 riastrad == -1)
454 1.1 riastrad warn_ss("fsync of offset table failed");
455 1.1 riastrad }
456 1.1 riastrad }
457 1.1 riastrad
458 1.1 riastrad /*
459 1.1 riastrad * Do any I/O or bookkeeping necessary to set an offset for blkno. May
460 1.1 riastrad * fail; returns on success, aborts with err(3) on failure.
461 1.1 riastrad */
462 1.2 riastrad void
463 1.1 riastrad offtab_prepare_put(struct offtab *offtab, uint32_t blkno)
464 1.1 riastrad {
465 1.1 riastrad uint32_t i;
466 1.2 riastrad
467 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
468 1.2 riastrad assert(blkno < offtab->ot_n_offsets);
469 1.2 riastrad
470 1.2 riastrad /*
471 1.2 riastrad * Assume, for convenience, that we write blocks in order.
472 1.2 riastrad * Thus we need not do another read -- we can just clear the
473 1.2 riastrad * window.
474 1.2 riastrad */
475 1.2 riastrad assert((offtab->ot_blkno == (uint32_t)-1) ||
476 1.2 riastrad ((offtab->ot_blkno + 1) == blkno));
477 1.2 riastrad
478 1.2 riastrad /* If it's already in our window, we're good to go. */
479 1.2 riastrad if ((offtab->ot_window_start <= blkno) &&
480 1.2 riastrad (blkno < offtab_current_window_end(offtab)))
481 1.5 riastrad goto win;
482 1.2 riastrad
483 1.2 riastrad /* Otherwise, write out the current window and choose a new one. */
484 1.2 riastrad offtab_write_window(offtab);
485 1.2 riastrad
486 1.2 riastrad assert(offtab->ot_window_size <= blkno);
487 1.2 riastrad assert(offtab->ot_window_start == (blkno - offtab->ot_window_size));
488 1.2 riastrad assert((offtab->ot_window_start + offtab->ot_window_size) ==
489 1.2 riastrad rounddown(blkno, offtab->ot_window_size));
490 1.2 riastrad
491 1.2 riastrad {
492 1.2 riastrad uint64_t *window;
493 1.2 riastrad sigset_t sigmask;
494 1.7 riastrad
495 1.2 riastrad /*
496 1.2 riastrad * Mark the window as being updated so nobody tries to write it
497 1.2 riastrad * (since we just wrote it) while we fill it with ones.
498 1.2 riastrad */
499 1.2 riastrad block_signals(&sigmask);
500 1.2 riastrad window = offtab->ot_window;
501 1.2 riastrad offtab->ot_window = NULL;
502 1.2 riastrad restore_sigmask(&sigmask);
503 1.2 riastrad
504 1.2 riastrad /* Fill the window with ones. */
505 1.2 riastrad for (i = 0; i < offtab_current_window_size(offtab); i++)
506 1.2 riastrad window[i] = ~(uint64_t)0;
507 1.2 riastrad
508 1.2 riastrad /* Restore the window as ready again. */
509 1.2 riastrad block_signals(&sigmask);
510 1.2 riastrad offtab->ot_window = window;
511 1.2 riastrad offtab->ot_window_start = rounddown(blkno, offtab->ot_window_size);
512 1.2 riastrad restore_sigmask(&sigmask);
513 1.2 riastrad }
514 1.2 riastrad
515 1.1 riastrad win: assert(offtab->ot_window_start <= blkno);
516 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
517 1.1 riastrad
518 1.1 riastrad offtab->ot_blkno = blkno;
519 1.1 riastrad }
520 1.1 riastrad
521 1.1 riastrad /*
522 1.1 riastrad * Actually set the offset for blkno.
523 1.1 riastrad */
524 1.1 riastrad void
525 1.1 riastrad offtab_put(struct offtab *offtab, uint32_t blkno, uint64_t offset)
526 1.1 riastrad {
527 1.2 riastrad
528 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
529 1.2 riastrad assert(blkno == offtab->ot_blkno);
530 1.2 riastrad assert(offtab->ot_window_start <= blkno);
531 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
532
533 offtab->ot_window[blkno - offtab->ot_window_start] = htobe64(offset);
534 }
535