offtab.c revision 1.7 1 1.1 riastrad /* $NetBSD: offtab.c,v 1.7 2014/01/22 06:17:07 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2014 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * This code is derived from software contributed to The NetBSD Foundation
8 1.1 riastrad * by Taylor R. Campbell.
9 1.1 riastrad *
10 1.1 riastrad * Redistribution and use in source and binary forms, with or without
11 1.1 riastrad * modification, are permitted provided that the following conditions
12 1.1 riastrad * are met:
13 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
14 1.1 riastrad * notice, this list of conditions and the following disclaimer.
15 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
17 1.1 riastrad * documentation and/or other materials provided with the distribution.
18 1.1 riastrad *
19 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
30 1.1 riastrad */
31 1.1 riastrad
32 1.1 riastrad #include <sys/cdefs.h>
33 1.1 riastrad __RCSID("$NetBSD");
34 1.1 riastrad
35 1.1 riastrad #include <sys/types.h>
36 1.1 riastrad #include <sys/endian.h>
37 1.1 riastrad
38 1.1 riastrad #include <assert.h>
39 1.1 riastrad #include <err.h>
40 1.1 riastrad #include <errno.h>
41 1.1 riastrad #include <inttypes.h>
42 1.1 riastrad #include <limits.h>
43 1.1 riastrad #include <stdbool.h>
44 1.1 riastrad #include <stdlib.h>
45 1.1 riastrad #include <unistd.h>
46 1.1 riastrad
47 1.1 riastrad #include "common.h"
48 1.1 riastrad #include "utils.h"
49 1.1 riastrad
50 1.1 riastrad #include "offtab.h"
51 1.1 riastrad
52 1.1 riastrad static void __printflike(1,2)
53 1.1 riastrad offtab_bug(const char *fmt, ...)
54 1.1 riastrad {
55 1.1 riastrad
56 1.1 riastrad errx(1, "bug in offtab, please report");
57 1.1 riastrad }
58 1.1 riastrad
59 1.1 riastrad static void __printflike(1,2)
60 1.1 riastrad offtab_bugx(const char *fmt, ...)
61 1.1 riastrad {
62 1.1 riastrad
63 1.1 riastrad errx(1, "bug in offtab, please report");
64 1.1 riastrad }
65 1.2 riastrad
66 1.2 riastrad static uint32_t
67 1.2 riastrad offtab_compute_window_size(struct offtab *offtab, uint32_t start,
68 1.2 riastrad uint32_t end)
69 1.2 riastrad {
70 1.2 riastrad
71 1.2 riastrad if (end == 0)
72 1.2 riastrad end = offtab->ot_n_offsets;
73 1.2 riastrad
74 1.2 riastrad assert(end <= offtab->ot_n_offsets);
75 1.2 riastrad assert(start < end);
76 1.2 riastrad return MIN(offtab->ot_window_size, (end - start));
77 1.2 riastrad }
78 1.2 riastrad
79 1.2 riastrad static uint32_t
80 1.2 riastrad offtab_current_window_size(struct offtab *offtab)
81 1.2 riastrad {
82 1.2 riastrad
83 1.2 riastrad return offtab_compute_window_size(offtab, offtab->ot_window_start, 0);
84 1.2 riastrad }
85 1.2 riastrad
86 1.2 riastrad static uint32_t
87 1.2 riastrad offtab_current_window_end(struct offtab *offtab)
88 1.2 riastrad {
89 1.2 riastrad
90 1.2 riastrad assert(offtab->ot_window_start < offtab->ot_n_offsets);
91 1.2 riastrad assert(offtab_current_window_size(offtab) <=
92 1.2 riastrad (offtab->ot_n_offsets - offtab->ot_window_start));
93 1.2 riastrad return (offtab->ot_window_start + offtab_current_window_size(offtab));
94 1.2 riastrad }
95 1.2 riastrad
96 1.2 riastrad #define OFFTAB_READ_SEEK 0x01
97 1.2 riastrad #define OFFTAB_READ_NOSEEK 0x00
98 1.2 riastrad
99 1.2 riastrad static bool
100 1.2 riastrad offtab_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
101 1.2 riastrad {
102 1.2 riastrad
103 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
104 1.2 riastrad
105 1.2 riastrad const uint32_t window_start = rounddown(blkno, offtab->ot_window_size);
106 1.2 riastrad const uint32_t window_size = offtab_compute_window_size(offtab,
107 1.2 riastrad window_start, 0);
108 1.2 riastrad
109 1.2 riastrad __CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
110 1.2 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
111 1.2 riastrad assert(window_start < offtab->ot_n_offsets);
112 1.2 riastrad assert(offtab->ot_fdpos <=
113 1.3 riastrad (OFF_MAX - (off_t)(window_start * sizeof(uint64_t))));
114 1.2 riastrad assert(ISSET(read_flags, OFFTAB_READ_SEEK) ||
115 1.2 riastrad (lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
116 1.2 riastrad ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
117 1.2 riastrad const size_t n_req = (window_size * sizeof(uint64_t));
118 1.2 riastrad const ssize_t n_read = (ISSET(read_flags, OFFTAB_READ_SEEK)
119 1.2 riastrad ? pread_block(offtab->ot_fd, offtab->ot_window, n_req,
120 1.2 riastrad (offtab->ot_fdpos + (window_start * sizeof(uint64_t))))
121 1.2 riastrad : read_block(offtab->ot_fd, offtab->ot_window, n_req));
122 1.2 riastrad if (n_read == -1) {
123 1.2 riastrad (*offtab->ot_report)("read offset table at %"PRIuMAX,
124 1.2 riastrad (uintmax_t)(offtab->ot_fdpos +
125 1.2 riastrad (window_start * sizeof(uint64_t))));
126 1.2 riastrad return false;
127 1.2 riastrad }
128 1.2 riastrad assert(n_read >= 0);
129 1.2 riastrad if ((size_t)n_read != (window_size * sizeof(uint64_t))) {
130 1.2 riastrad (*offtab->ot_reportx)("partial read of offset table"
131 1.2 riastrad " at %"PRIuMAX": %zu != %zu",
132 1.2 riastrad (uintmax_t)(offtab->ot_fdpos +
133 1.2 riastrad (window_start * sizeof(uint64_t))),
134 1.2 riastrad (size_t)n_read,
135 1.2 riastrad (size_t)(window_size * sizeof(uint64_t)));
136 1.2 riastrad return false;
137 1.2 riastrad }
138 1.2 riastrad offtab->ot_window_start = window_start;
139 1.2 riastrad
140 1.2 riastrad return true;
141 1.2 riastrad }
142 1.2 riastrad
143 1.2 riastrad static bool
144 1.2 riastrad offtab_maybe_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
145 1.2 riastrad {
146 1.2 riastrad
147 1.2 riastrad /* Don't bother if blkno is already in the window. */
148 1.2 riastrad if ((offtab->ot_window_start <= blkno) &&
149 1.2 riastrad (blkno < offtab_current_window_end(offtab)))
150 1.2 riastrad return true;
151 1.2 riastrad
152 1.2 riastrad if (!offtab_read_window(offtab, blkno, read_flags))
153 1.2 riastrad return false;
154 1.2 riastrad
155 1.2 riastrad return true;
156 1.2 riastrad }
157 1.2 riastrad
158 1.2 riastrad static void
159 1.5 riastrad offtab_write_window(struct offtab *offtab)
160 1.2 riastrad {
161 1.2 riastrad
162 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
163 1.2 riastrad
164 1.2 riastrad const uint32_t window_size = offtab_current_window_size(offtab);
165 1.2 riastrad __CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
166 1.2 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
167 1.2 riastrad assert(offtab->ot_window_start < offtab->ot_n_offsets);
168 1.2 riastrad assert(offtab->ot_fdpos <=
169 1.3 riastrad (OFF_MAX - (off_t)(offtab->ot_window_start * sizeof(uint64_t))));
170 1.2 riastrad const ssize_t n_written = pwrite(offtab->ot_fd, offtab->ot_window,
171 1.2 riastrad (window_size * sizeof(uint64_t)),
172 1.2 riastrad (offtab->ot_fdpos +
173 1.2 riastrad (offtab->ot_window_start * sizeof(uint64_t))));
174 1.2 riastrad if (n_written == -1)
175 1.2 riastrad err_ss(1, "write initial offset table");
176 1.2 riastrad assert(n_written >= 0);
177 1.2 riastrad if ((size_t)n_written != (window_size * sizeof(uint64_t)))
178 1.2 riastrad errx_ss(1, "partial write of initial offset bytes: %zu <= %zu",
179 1.2 riastrad (size_t)n_written,
180 1.2 riastrad (size_t)(window_size * sizeof(uint64_t)));
181 1.2 riastrad }
182 1.5 riastrad
183 1.5 riastrad static void
184 1.5 riastrad offtab_maybe_write_window(struct offtab *offtab, uint32_t start, uint32_t end)
185 1.5 riastrad {
186 1.5 riastrad
187 1.5 riastrad /* Don't bother if [start, end) does not cover our window. */
188 1.5 riastrad if (end <= offtab->ot_window_start)
189 1.5 riastrad return;
190 1.5 riastrad if (offtab_current_window_end(offtab) < start)
191 1.5 riastrad return;
192 1.5 riastrad
193 1.5 riastrad offtab_write_window(offtab);
194 1.5 riastrad }
195 1.1 riastrad
196 1.1 riastrad /*
198 1.1 riastrad * Initialize an offtab to support the specified number of offsets read
199 1.1 riastrad * to or written from fd at byte position fdpos.
200 1.1 riastrad */
201 1.2 riastrad void
202 1.2 riastrad offtab_init(struct offtab *offtab, uint32_t n_offsets, uint32_t window_size,
203 1.1 riastrad int fd, off_t fdpos)
204 1.1 riastrad {
205 1.1 riastrad
206 1.1 riastrad assert(offtab != NULL);
207 1.1 riastrad assert(0 < n_offsets);
208 1.1 riastrad assert(0 <= fd);
209 1.1 riastrad assert(0 <= fdpos);
210 1.1 riastrad
211 1.2 riastrad offtab->ot_n_offsets = n_offsets;
212 1.2 riastrad if ((window_size == 0) || (n_offsets < window_size))
213 1.2 riastrad offtab->ot_window_size = n_offsets;
214 1.2 riastrad else
215 1.2 riastrad offtab->ot_window_size = window_size;
216 1.2 riastrad assert(offtab->ot_window_size <= offtab->ot_n_offsets);
217 1.2 riastrad offtab->ot_window_start = (uint32_t)-1;
218 1.2 riastrad __CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
219 1.2 riastrad offtab->ot_window = malloc(offtab->ot_window_size * sizeof(uint64_t));
220 1.1 riastrad if (offtab->ot_window == NULL)
221 1.1 riastrad err(1, "malloc offset table");
222 1.1 riastrad offtab->ot_blkno = (uint32_t)-1;
223 1.1 riastrad offtab->ot_fd = fd;
224 1.1 riastrad offtab->ot_fdpos = fdpos;
225 1.1 riastrad offtab->ot_report = &offtab_bug;
226 1.1 riastrad offtab->ot_reportx = &offtab_bugx;
227 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_NONE;
228 1.1 riastrad }
229 1.1 riastrad
230 1.1 riastrad /*
231 1.1 riastrad * Destroy an offtab.
232 1.1 riastrad */
233 1.1 riastrad void
234 1.1 riastrad offtab_destroy(struct offtab *offtab)
235 1.1 riastrad {
236 1.2 riastrad
237 1.1 riastrad free(offtab->ot_window);
238 1.1 riastrad }
239 1.1 riastrad
240 1.1 riastrad /*
241 1.1 riastrad * For an offtab that has been used to read data from disk, convert it
242 1.2 riastrad * to an offtab that can be used to write subsequent data to disk.
243 1.1 riastrad * blkno is the last valid blkno read from disk.
244 1.2 riastrad */
245 1.2 riastrad bool
246 1.1 riastrad offtab_transmogrify_read_to_write(struct offtab *offtab, uint32_t blkno)
247 1.1 riastrad {
248 1.1 riastrad
249 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
250 1.2 riastrad assert(0 < blkno);
251 1.2 riastrad
252 1.2 riastrad if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
253 1.2 riastrad return false;
254 1.1 riastrad
255 1.2 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE;
256 1.2 riastrad offtab->ot_blkno = blkno;
257 1.2 riastrad
258 1.1 riastrad return true;
259 1.1 riastrad }
260 1.1 riastrad
261 1.1 riastrad /*
263 1.1 riastrad * Reset an offtab for reading an offset table from the beginning.
264 1.1 riastrad * Initializes in-memory state and may read data from offtab->ot_fd,
265 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. Failure
266 1.1 riastrad * will be reported by the report/reportx routines, which are called
267 1.2 riastrad * like warn/warnx. May fail; returns true on success, false on
268 1.2 riastrad * failure.
269 1.2 riastrad *
270 1.2 riastrad * This almost has copypasta of offtab_prepare_get, but this uses read,
271 1.1 riastrad * rather than pread, so that it will work on nonseekable input if the
272 1.1 riastrad * window is the whole offset table.
273 1.1 riastrad */
274 1.1 riastrad bool
275 1.1 riastrad offtab_reset_read(struct offtab *offtab,
276 1.1 riastrad void (*report)(const char *, ...) __printflike(1,2),
277 1.1 riastrad void (*reportx)(const char *, ...) __printflike(1,2))
278 1.1 riastrad {
279 1.1 riastrad
280 1.1 riastrad assert((lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
281 1.1 riastrad ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
282 1.1 riastrad
283 1.1 riastrad offtab->ot_report = report;
284 1.2 riastrad offtab->ot_reportx = reportx;
285 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_READ;
286 1.2 riastrad offtab->ot_blkno = (uint32_t)-1;
287 1.1 riastrad
288 1.1 riastrad if (!offtab_read_window(offtab, 0, OFFTAB_READ_NOSEEK))
289 1.4 riastrad return false;
290 1.4 riastrad
291 1.4 riastrad if (offtab->ot_window_size < offtab->ot_n_offsets) {
292 1.4 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
293 1.4 riastrad assert(offtab->ot_fdpos <= (OFF_MAX -
294 1.4 riastrad (off_t)(offtab->ot_n_offsets * sizeof(uint64_t))));
295 1.4 riastrad const off_t first_offset = (offtab->ot_fdpos +
296 1.4 riastrad (offtab->ot_n_offsets * sizeof(uint64_t)));
297 1.4 riastrad if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1) {
298 1.4 riastrad (*offtab->ot_report)("lseek to first offset 0x%"PRIx64,
299 1.4 riastrad first_offset);
300 1.4 riastrad return false;
301 1.4 riastrad }
302 1.1 riastrad }
303 1.1 riastrad
304 1.1 riastrad return true;
305 1.1 riastrad }
306 1.1 riastrad
307 1.1 riastrad /*
308 1.1 riastrad * Do any I/O or bookkeeping necessary to fetch the offset for blkno in
309 1.1 riastrad * preparation for a call to offtab_get. May fail; returns true on
310 1.1 riastrad * success, false on failure.
311 1.1 riastrad */
312 1.1 riastrad bool
313 1.1 riastrad offtab_prepare_get(struct offtab *offtab, uint32_t blkno)
314 1.1 riastrad {
315 1.1 riastrad
316 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
317 1.2 riastrad assert(blkno < offtab->ot_n_offsets);
318 1.2 riastrad
319 1.2 riastrad if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
320 1.2 riastrad return false;
321 1.2 riastrad
322 1.2 riastrad assert(offtab->ot_window_start <= blkno);
323 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
324 1.1 riastrad
325 1.1 riastrad offtab->ot_blkno = blkno;
326 1.1 riastrad return true;
327 1.1 riastrad }
328 1.1 riastrad
329 1.1 riastrad /*
330 1.1 riastrad * Return the offset for blkno. Caller must have called
331 1.1 riastrad * offtab_prepare_get beforehand.
332 1.1 riastrad */
333 1.1 riastrad uint64_t
334 1.1 riastrad offtab_get(struct offtab *offtab, uint32_t blkno)
335 1.1 riastrad {
336 1.1 riastrad
337 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
338 1.2 riastrad assert(blkno == offtab->ot_blkno);
339 1.2 riastrad assert(offtab->ot_window_start <= blkno);
340 1.2 riastrad assert(blkno < offtab_current_window_end(offtab));
341 1.1 riastrad
342 1.1 riastrad return be64toh(offtab->ot_window[blkno - offtab->ot_window_start]);
343 1.1 riastrad }
344 1.1 riastrad
345 1.1 riastrad /*
347 1.1 riastrad * Reset offtab for writing a fresh offset table. Initializes
348 1.1 riastrad * in-memory state and writes an empty offset table to offtab->ot_fd,
349 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. May
350 1.1 riastrad * fail; returns on success, aborts with err(3) on failure.
351 1.1 riastrad */
352 1.1 riastrad void
353 1.1 riastrad offtab_reset_write(struct offtab *offtab)
354 1.1 riastrad {
355 1.1 riastrad uint32_t i;
356 1.1 riastrad
357 1.2 riastrad assert(lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos);
358 1.1 riastrad
359 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE;
360 1.1 riastrad offtab->ot_blkno = (uint32_t)-1;
361 1.1 riastrad
362 1.1 riastrad /*
363 1.1 riastrad * Initialize the offset table to all ones (except for the
364 1.1 riastrad * fixed first offset) so that we can easily detect where we
365 1.1 riastrad * were interrupted if we want to restart.
366 1.2 riastrad */
367 1.2 riastrad __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
368 1.2 riastrad assert(offtab->ot_n_offsets > 0);
369 1.2 riastrad
370 1.2 riastrad for (i = 0; i < offtab->ot_window_size; i++)
371 1.2 riastrad offtab->ot_window[i] = ~(uint64_t)0;
372 1.2 riastrad
373 1.2 riastrad const uint32_t n_windows =
374 1.2 riastrad howmany(offtab->ot_n_offsets, offtab->ot_window_size);
375 1.5 riastrad for (i = 1; i < n_windows; i++) {
376 1.2 riastrad /* Change the start but reuse the all-ones buffer. */
377 1.2 riastrad offtab->ot_window_start = (i * offtab->ot_window_size);
378 1.2 riastrad offtab_write_window(offtab);
379 1.2 riastrad }
380 1.2 riastrad
381 1.3 riastrad offtab->ot_window_start = 0;
382 1.3 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
383 1.1 riastrad assert(offtab->ot_fdpos <=
384 1.3 riastrad (OFF_MAX - (off_t)(offtab->ot_n_offsets * sizeof(uint64_t))));
385 1.2 riastrad const uint64_t first_offset = (offtab->ot_fdpos +
386 1.5 riastrad (offtab->ot_n_offsets * sizeof(uint64_t)));
387 1.1 riastrad assert(first_offset <= OFF_MAX);
388 1.2 riastrad offtab->ot_window[0] = htobe64(first_offset);
389 1.2 riastrad offtab_write_window(offtab);
390 1.1 riastrad
391 1.1 riastrad if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1)
392 1.1 riastrad err(1, "lseek to first offset failed");
393 1.1 riastrad }
394 1.1 riastrad
395 1.1 riastrad /*
396 1.1 riastrad * Guarantee that the disk reflects block offsets [0, n_offsets). If
397 1.1 riastrad * OFFTAB_CHECKPOINT_SYNC is set in flags, will also fsync the entire
398 1.1 riastrad * offset table. May fail; returns on success, aborts with err(3) on
399 1.1 riastrad * failure. Fsync failure is considered success but is reported with a
400 1.2 riastrad * warning.
401 1.2 riastrad *
402 1.2 riastrad * This routine does not write state in memory, and does not read state
403 1.1 riastrad * that is not signal-safe. The only state read is offtab->ot_window,
404 1.1 riastrad * offtab->ot_window_start, and quantities that are static for the
405 1.1 riastrad * signal-interruptable existence of the offset table.
406 1.1 riastrad */
407 1.1 riastrad void
408 1.1 riastrad offtab_checkpoint(struct offtab *offtab, uint32_t n_offsets, int flags)
409 1.1 riastrad {
410 1.1 riastrad
411 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
412 1.2 riastrad assert(n_offsets <= offtab->ot_n_offsets);
413 1.2 riastrad
414 1.2 riastrad /*
415 1.2 riastrad * Write the window unless we just did that and were
416 1.5 riastrad * interrupted before we could move the window.
417 1.1 riastrad */
418 1.1 riastrad if (offtab->ot_window != NULL)
419 1.2 riastrad offtab_maybe_write_window(offtab, 0, n_offsets);
420 1.2 riastrad
421 1.3 riastrad if (ISSET(flags, OFFTAB_CHECKPOINT_SYNC)) {
422 1.1 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
423 1.1 riastrad assert(offtab->ot_fdpos
424 1.2 riastrad <= (OFF_MAX - (off_t)(n_offsets * sizeof(uint64_t))));
425 1.1 riastrad if (fsync_range(offtab->ot_fd, (FFILESYNC | FDISKSYNC),
426 1.1 riastrad offtab->ot_fdpos,
427 1.1 riastrad (offtab->ot_fdpos + (n_offsets * sizeof(uint64_t))))
428 1.1 riastrad == -1)
429 1.1 riastrad warn_ss("fsync of offset table failed");
430 1.1 riastrad }
431 1.1 riastrad }
432 1.1 riastrad
433 1.1 riastrad /*
434 1.1 riastrad * Do any I/O or bookkeeping necessary to set an offset for blkno. May
435 1.1 riastrad * fail; returns on success, aborts with err(3) on failure.
436 1.1 riastrad */
437 1.2 riastrad void
438 1.1 riastrad offtab_prepare_put(struct offtab *offtab, uint32_t blkno)
439 1.1 riastrad {
440 1.1 riastrad uint32_t i;
441 1.2 riastrad
442 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
443 1.2 riastrad assert(blkno < offtab->ot_n_offsets);
444 1.2 riastrad
445 1.2 riastrad /*
446 1.2 riastrad * Assume, for convenience, that we write blocks in order.
447 1.2 riastrad * Thus we need not do another read -- we can just clear the
448 1.2 riastrad * window.
449 1.2 riastrad */
450 1.2 riastrad assert((offtab->ot_blkno == (uint32_t)-1) ||
451 1.2 riastrad ((offtab->ot_blkno + 1) == blkno));
452 1.2 riastrad
453 1.2 riastrad /* If it's already in our window, we're good to go. */
454 1.2 riastrad if ((offtab->ot_window_start <= blkno) &&
455 1.2 riastrad (blkno < offtab_current_window_end(offtab)))
456 1.5 riastrad goto win;
457 1.2 riastrad
458 1.2 riastrad /* Otherwise, write out the current window and choose a new one. */
459 1.2 riastrad offtab_write_window(offtab);
460 1.2 riastrad
461 1.2 riastrad assert(offtab->ot_window_size <= blkno);
462 1.2 riastrad assert(offtab->ot_window_start == (blkno - offtab->ot_window_size));
463 1.2 riastrad assert((offtab->ot_window_start + offtab->ot_window_size) ==
464 1.2 riastrad rounddown(blkno, offtab->ot_window_size));
465 1.2 riastrad
466 1.2 riastrad {
467 1.2 riastrad uint64_t *window;
468 1.2 riastrad sigset_t sigmask;
469 1.7 riastrad
470 1.2 riastrad /*
471 1.2 riastrad * Mark the window as being updated so nobody tries to write it
472 1.2 riastrad * (since we just wrote it) while we fill it with ones.
473 1.2 riastrad */
474 1.2 riastrad block_signals(&sigmask);
475 1.2 riastrad window = offtab->ot_window;
476 1.2 riastrad offtab->ot_window = NULL;
477 1.2 riastrad restore_sigmask(&sigmask);
478 1.2 riastrad
479 1.2 riastrad /* Fill the window with ones. */
480 1.2 riastrad for (i = 0; i < offtab_current_window_size(offtab); i++)
481 1.2 riastrad window[i] = ~(uint64_t)0;
482 1.2 riastrad
483 1.2 riastrad /* Restore the window as ready again. */
484 1.2 riastrad block_signals(&sigmask);
485 1.2 riastrad offtab->ot_window = window;
486 1.2 riastrad offtab->ot_window_start = rounddown(blkno, offtab->ot_window_size);
487 1.2 riastrad restore_sigmask(&sigmask);
488 1.2 riastrad }
489 1.2 riastrad
490 1.1 riastrad win: assert(offtab->ot_window_start <= blkno);
491 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
492 1.1 riastrad
493 1.1 riastrad offtab->ot_blkno = blkno;
494 1.1 riastrad }
495 1.1 riastrad
496 1.1 riastrad /*
497 1.1 riastrad * Actually set the offset for blkno.
498 1.1 riastrad */
499 1.1 riastrad void
500 1.1 riastrad offtab_put(struct offtab *offtab, uint32_t blkno, uint64_t offset)
501 1.1 riastrad {
502 1.2 riastrad
503 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
504 1.2 riastrad assert(blkno == offtab->ot_blkno);
505 1.2 riastrad assert(offtab->ot_window_start <= blkno);
506 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
507
508 offtab->ot_window[blkno - offtab->ot_window_start] = htobe64(offset);
509 }
510