offtab.c revision 1.10 1 1.10 joerg /* $NetBSD: offtab.c,v 1.10 2014/01/23 14:17:05 joerg Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2014 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * This code is derived from software contributed to The NetBSD Foundation
8 1.1 riastrad * by Taylor R. Campbell.
9 1.1 riastrad *
10 1.1 riastrad * Redistribution and use in source and binary forms, with or without
11 1.1 riastrad * modification, are permitted provided that the following conditions
12 1.1 riastrad * are met:
13 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
14 1.1 riastrad * notice, this list of conditions and the following disclaimer.
15 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
17 1.1 riastrad * documentation and/or other materials provided with the distribution.
18 1.1 riastrad *
19 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
30 1.1 riastrad */
31 1.1 riastrad
32 1.1 riastrad #include <sys/cdefs.h>
33 1.10 joerg __RCSID("$NetBSD: offtab.c,v 1.10 2014/01/23 14:17:05 joerg Exp $");
34 1.1 riastrad
35 1.1 riastrad #include <sys/types.h>
36 1.1 riastrad #include <sys/endian.h>
37 1.1 riastrad
38 1.1 riastrad #include <assert.h>
39 1.1 riastrad #include <err.h>
40 1.1 riastrad #include <errno.h>
41 1.1 riastrad #include <inttypes.h>
42 1.1 riastrad #include <limits.h>
43 1.1 riastrad #include <stdbool.h>
44 1.1 riastrad #include <stdlib.h>
45 1.1 riastrad #include <unistd.h>
46 1.1 riastrad
47 1.1 riastrad #include "common.h"
48 1.1 riastrad #include "utils.h"
49 1.1 riastrad
50 1.1 riastrad #include "offtab.h"
51 1.1 riastrad
52 1.10 joerg static void __printflike(1,2) __dead
53 1.1 riastrad offtab_bug(const char *fmt, ...)
54 1.1 riastrad {
55 1.1 riastrad
56 1.1 riastrad errx(1, "bug in offtab, please report");
57 1.1 riastrad }
58 1.1 riastrad
59 1.10 joerg static void __printflike(1,2) __dead
60 1.1 riastrad offtab_bugx(const char *fmt, ...)
61 1.1 riastrad {
62 1.1 riastrad
63 1.1 riastrad errx(1, "bug in offtab, please report");
64 1.1 riastrad }
65 1.2 riastrad
66 1.2 riastrad static uint32_t
67 1.8 riastrad offtab_compute_window_size(struct offtab *offtab, uint32_t start)
68 1.2 riastrad {
69 1.2 riastrad
70 1.8 riastrad assert(start < offtab->ot_n_offsets);
71 1.8 riastrad return MIN(offtab->ot_window_size, (offtab->ot_n_offsets - start));
72 1.2 riastrad }
73 1.2 riastrad
74 1.2 riastrad static uint32_t
75 1.2 riastrad offtab_current_window_size(struct offtab *offtab)
76 1.2 riastrad {
77 1.2 riastrad
78 1.8 riastrad return offtab_compute_window_size(offtab, offtab->ot_window_start);
79 1.2 riastrad }
80 1.2 riastrad
81 1.2 riastrad static uint32_t
82 1.2 riastrad offtab_current_window_end(struct offtab *offtab)
83 1.2 riastrad {
84 1.2 riastrad
85 1.2 riastrad assert(offtab->ot_window_start < offtab->ot_n_offsets);
86 1.2 riastrad assert(offtab_current_window_size(offtab) <=
87 1.2 riastrad (offtab->ot_n_offsets - offtab->ot_window_start));
88 1.2 riastrad return (offtab->ot_window_start + offtab_current_window_size(offtab));
89 1.2 riastrad }
90 1.2 riastrad
91 1.2 riastrad #define OFFTAB_READ_SEEK 0x01
92 1.2 riastrad #define OFFTAB_READ_NOSEEK 0x00
93 1.2 riastrad
94 1.2 riastrad static bool
95 1.2 riastrad offtab_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
96 1.2 riastrad {
97 1.2 riastrad
98 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
99 1.2 riastrad
100 1.2 riastrad const uint32_t window_start = rounddown(blkno, offtab->ot_window_size);
101 1.2 riastrad const uint32_t window_size = offtab_compute_window_size(offtab,
102 1.8 riastrad window_start);
103 1.2 riastrad
104 1.2 riastrad __CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
105 1.2 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
106 1.2 riastrad assert(window_start < offtab->ot_n_offsets);
107 1.2 riastrad assert(offtab->ot_fdpos <=
108 1.3 riastrad (OFF_MAX - (off_t)(window_start * sizeof(uint64_t))));
109 1.2 riastrad assert(ISSET(read_flags, OFFTAB_READ_SEEK) ||
110 1.2 riastrad (lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
111 1.2 riastrad ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
112 1.2 riastrad const size_t n_req = (window_size * sizeof(uint64_t));
113 1.2 riastrad const ssize_t n_read = (ISSET(read_flags, OFFTAB_READ_SEEK)
114 1.2 riastrad ? pread_block(offtab->ot_fd, offtab->ot_window, n_req,
115 1.2 riastrad (offtab->ot_fdpos + (window_start * sizeof(uint64_t))))
116 1.2 riastrad : read_block(offtab->ot_fd, offtab->ot_window, n_req));
117 1.2 riastrad if (n_read == -1) {
118 1.2 riastrad (*offtab->ot_report)("read offset table at %"PRIuMAX,
119 1.2 riastrad (uintmax_t)(offtab->ot_fdpos +
120 1.2 riastrad (window_start * sizeof(uint64_t))));
121 1.2 riastrad return false;
122 1.2 riastrad }
123 1.2 riastrad assert(n_read >= 0);
124 1.2 riastrad if ((size_t)n_read != (window_size * sizeof(uint64_t))) {
125 1.2 riastrad (*offtab->ot_reportx)("partial read of offset table"
126 1.2 riastrad " at %"PRIuMAX": %zu != %zu",
127 1.2 riastrad (uintmax_t)(offtab->ot_fdpos +
128 1.2 riastrad (window_start * sizeof(uint64_t))),
129 1.2 riastrad (size_t)n_read,
130 1.2 riastrad (size_t)(window_size * sizeof(uint64_t)));
131 1.2 riastrad return false;
132 1.2 riastrad }
133 1.2 riastrad offtab->ot_window_start = window_start;
134 1.2 riastrad
135 1.2 riastrad return true;
136 1.2 riastrad }
137 1.2 riastrad
138 1.2 riastrad static bool
139 1.2 riastrad offtab_maybe_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
140 1.2 riastrad {
141 1.2 riastrad
142 1.2 riastrad /* Don't bother if blkno is already in the window. */
143 1.2 riastrad if ((offtab->ot_window_start <= blkno) &&
144 1.2 riastrad (blkno < offtab_current_window_end(offtab)))
145 1.2 riastrad return true;
146 1.2 riastrad
147 1.2 riastrad if (!offtab_read_window(offtab, blkno, read_flags))
148 1.2 riastrad return false;
149 1.2 riastrad
150 1.2 riastrad return true;
151 1.2 riastrad }
152 1.2 riastrad
153 1.2 riastrad static void
154 1.5 riastrad offtab_write_window(struct offtab *offtab)
155 1.2 riastrad {
156 1.2 riastrad
157 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
158 1.2 riastrad
159 1.2 riastrad const uint32_t window_size = offtab_current_window_size(offtab);
160 1.2 riastrad __CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
161 1.2 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
162 1.2 riastrad assert(offtab->ot_window_start < offtab->ot_n_offsets);
163 1.2 riastrad assert(offtab->ot_fdpos <=
164 1.3 riastrad (OFF_MAX - (off_t)(offtab->ot_window_start * sizeof(uint64_t))));
165 1.2 riastrad const ssize_t n_written = pwrite(offtab->ot_fd, offtab->ot_window,
166 1.2 riastrad (window_size * sizeof(uint64_t)),
167 1.2 riastrad (offtab->ot_fdpos +
168 1.2 riastrad (offtab->ot_window_start * sizeof(uint64_t))));
169 1.2 riastrad if (n_written == -1)
170 1.2 riastrad err_ss(1, "write initial offset table");
171 1.2 riastrad assert(n_written >= 0);
172 1.2 riastrad if ((size_t)n_written != (window_size * sizeof(uint64_t)))
173 1.2 riastrad errx_ss(1, "partial write of initial offset bytes: %zu <= %zu",
174 1.2 riastrad (size_t)n_written,
175 1.2 riastrad (size_t)(window_size * sizeof(uint64_t)));
176 1.2 riastrad }
177 1.5 riastrad
178 1.5 riastrad static void
179 1.5 riastrad offtab_maybe_write_window(struct offtab *offtab, uint32_t start, uint32_t end)
180 1.5 riastrad {
181 1.5 riastrad
182 1.5 riastrad /* Don't bother if [start, end) does not cover our window. */
183 1.5 riastrad if (end <= offtab->ot_window_start)
184 1.5 riastrad return;
185 1.5 riastrad if (offtab_current_window_end(offtab) < start)
186 1.5 riastrad return;
187 1.5 riastrad
188 1.5 riastrad offtab_write_window(offtab);
189 1.5 riastrad }
190 1.1 riastrad
191 1.1 riastrad /*
193 1.1 riastrad * Initialize an offtab to support the specified number of offsets read
194 1.1 riastrad * to or written from fd at byte position fdpos.
195 1.1 riastrad */
196 1.2 riastrad void
197 1.2 riastrad offtab_init(struct offtab *offtab, uint32_t n_offsets, uint32_t window_size,
198 1.1 riastrad int fd, off_t fdpos)
199 1.1 riastrad {
200 1.1 riastrad
201 1.1 riastrad assert(offtab != NULL);
202 1.1 riastrad assert(0 < n_offsets);
203 1.1 riastrad assert(0 <= fd);
204 1.1 riastrad assert(0 <= fdpos);
205 1.1 riastrad
206 1.2 riastrad offtab->ot_n_offsets = n_offsets;
207 1.2 riastrad if ((window_size == 0) || (n_offsets < window_size))
208 1.2 riastrad offtab->ot_window_size = n_offsets;
209 1.2 riastrad else
210 1.2 riastrad offtab->ot_window_size = window_size;
211 1.2 riastrad assert(offtab->ot_window_size <= offtab->ot_n_offsets);
212 1.2 riastrad offtab->ot_window_start = (uint32_t)-1;
213 1.2 riastrad __CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
214 1.2 riastrad offtab->ot_window = malloc(offtab->ot_window_size * sizeof(uint64_t));
215 1.1 riastrad if (offtab->ot_window == NULL)
216 1.1 riastrad err(1, "malloc offset table");
217 1.1 riastrad offtab->ot_blkno = (uint32_t)-1;
218 1.1 riastrad offtab->ot_fd = fd;
219 1.1 riastrad offtab->ot_fdpos = fdpos;
220 1.1 riastrad offtab->ot_report = &offtab_bug;
221 1.1 riastrad offtab->ot_reportx = &offtab_bugx;
222 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_NONE;
223 1.1 riastrad }
224 1.1 riastrad
225 1.1 riastrad /*
226 1.1 riastrad * Destroy an offtab.
227 1.1 riastrad */
228 1.1 riastrad void
229 1.1 riastrad offtab_destroy(struct offtab *offtab)
230 1.1 riastrad {
231 1.2 riastrad
232 1.1 riastrad free(offtab->ot_window);
233 1.1 riastrad }
234 1.1 riastrad
235 1.1 riastrad /*
236 1.1 riastrad * For an offtab that has been used to read data from disk, convert it
237 1.2 riastrad * to an offtab that can be used to write subsequent data to disk.
238 1.1 riastrad * blkno is the last valid blkno read from disk.
239 1.2 riastrad */
240 1.2 riastrad bool
241 1.1 riastrad offtab_transmogrify_read_to_write(struct offtab *offtab, uint32_t blkno)
242 1.1 riastrad {
243 1.1 riastrad
244 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
245 1.2 riastrad assert(0 < blkno);
246 1.2 riastrad
247 1.2 riastrad if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
248 1.2 riastrad return false;
249 1.1 riastrad
250 1.2 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE;
251 1.2 riastrad offtab->ot_blkno = blkno;
252 1.2 riastrad
253 1.1 riastrad return true;
254 1.1 riastrad }
255 1.1 riastrad
256 1.1 riastrad /*
258 1.1 riastrad * Reset an offtab for reading an offset table from the beginning.
259 1.1 riastrad * Initializes in-memory state and may read data from offtab->ot_fd,
260 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. Failure
261 1.1 riastrad * will be reported by the report/reportx routines, which are called
262 1.2 riastrad * like warn/warnx. May fail; returns true on success, false on
263 1.2 riastrad * failure.
264 1.2 riastrad *
265 1.2 riastrad * This almost has copypasta of offtab_prepare_get, but this uses read,
266 1.1 riastrad * rather than pread, so that it will work on nonseekable input if the
267 1.1 riastrad * window is the whole offset table.
268 1.1 riastrad */
269 1.1 riastrad bool
270 1.1 riastrad offtab_reset_read(struct offtab *offtab,
271 1.1 riastrad void (*report)(const char *, ...) __printflike(1,2),
272 1.1 riastrad void (*reportx)(const char *, ...) __printflike(1,2))
273 1.1 riastrad {
274 1.1 riastrad
275 1.1 riastrad assert((lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
276 1.1 riastrad ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
277 1.1 riastrad
278 1.1 riastrad offtab->ot_report = report;
279 1.2 riastrad offtab->ot_reportx = reportx;
280 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_READ;
281 1.2 riastrad offtab->ot_blkno = (uint32_t)-1;
282 1.1 riastrad
283 1.1 riastrad if (!offtab_read_window(offtab, 0, OFFTAB_READ_NOSEEK))
284 1.4 riastrad return false;
285 1.4 riastrad
286 1.4 riastrad if (offtab->ot_window_size < offtab->ot_n_offsets) {
287 1.4 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
288 1.4 riastrad assert(offtab->ot_fdpos <= (OFF_MAX -
289 1.4 riastrad (off_t)(offtab->ot_n_offsets * sizeof(uint64_t))));
290 1.4 riastrad const off_t first_offset = (offtab->ot_fdpos +
291 1.4 riastrad (offtab->ot_n_offsets * sizeof(uint64_t)));
292 1.4 riastrad if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1) {
293 1.4 riastrad (*offtab->ot_report)("lseek to first offset 0x%"PRIx64,
294 1.4 riastrad first_offset);
295 1.4 riastrad return false;
296 1.4 riastrad }
297 1.1 riastrad }
298 1.1 riastrad
299 1.1 riastrad return true;
300 1.1 riastrad }
301 1.1 riastrad
302 1.1 riastrad /*
303 1.1 riastrad * Do any I/O or bookkeeping necessary to fetch the offset for blkno in
304 1.1 riastrad * preparation for a call to offtab_get. May fail; returns true on
305 1.1 riastrad * success, false on failure.
306 1.1 riastrad */
307 1.1 riastrad bool
308 1.1 riastrad offtab_prepare_get(struct offtab *offtab, uint32_t blkno)
309 1.1 riastrad {
310 1.1 riastrad
311 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
312 1.2 riastrad assert(blkno < offtab->ot_n_offsets);
313 1.2 riastrad
314 1.2 riastrad if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
315 1.2 riastrad return false;
316 1.2 riastrad
317 1.2 riastrad assert(offtab->ot_window_start <= blkno);
318 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
319 1.1 riastrad
320 1.1 riastrad offtab->ot_blkno = blkno;
321 1.1 riastrad return true;
322 1.1 riastrad }
323 1.1 riastrad
324 1.1 riastrad /*
325 1.1 riastrad * Return the offset for blkno. Caller must have called
326 1.1 riastrad * offtab_prepare_get beforehand.
327 1.1 riastrad */
328 1.1 riastrad uint64_t
329 1.1 riastrad offtab_get(struct offtab *offtab, uint32_t blkno)
330 1.1 riastrad {
331 1.1 riastrad
332 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
333 1.2 riastrad assert(blkno == offtab->ot_blkno);
334 1.2 riastrad assert(offtab->ot_window_start <= blkno);
335 1.2 riastrad assert(blkno < offtab_current_window_end(offtab));
336 1.1 riastrad
337 1.1 riastrad return be64toh(offtab->ot_window[blkno - offtab->ot_window_start]);
338 1.1 riastrad }
339 1.1 riastrad
340 1.1 riastrad /*
342 1.1 riastrad * Reset offtab for writing a fresh offset table. Initializes
343 1.1 riastrad * in-memory state and writes an empty offset table to offtab->ot_fd,
344 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. May
345 1.1 riastrad * fail; returns on success, aborts with err(3) on failure.
346 1.1 riastrad */
347 1.1 riastrad void
348 1.1 riastrad offtab_reset_write(struct offtab *offtab)
349 1.1 riastrad {
350 1.1 riastrad uint32_t i;
351 1.1 riastrad
352 1.2 riastrad assert(lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos);
353 1.1 riastrad
354 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE;
355 1.1 riastrad offtab->ot_blkno = (uint32_t)-1;
356 1.1 riastrad
357 1.1 riastrad /*
358 1.1 riastrad * Initialize the offset table to all ones (except for the
359 1.1 riastrad * fixed first offset) so that we can easily detect where we
360 1.1 riastrad * were interrupted if we want to restart.
361 1.2 riastrad */
362 1.2 riastrad __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
363 1.2 riastrad assert(offtab->ot_n_offsets > 0);
364 1.2 riastrad
365 1.2 riastrad for (i = 0; i < offtab->ot_window_size; i++)
366 1.2 riastrad offtab->ot_window[i] = ~(uint64_t)0;
367 1.2 riastrad
368 1.2 riastrad const uint32_t n_windows =
369 1.2 riastrad howmany(offtab->ot_n_offsets, offtab->ot_window_size);
370 1.5 riastrad for (i = 1; i < n_windows; i++) {
371 1.2 riastrad /* Change the start but reuse the all-ones buffer. */
372 1.2 riastrad offtab->ot_window_start = (i * offtab->ot_window_size);
373 1.2 riastrad offtab_write_window(offtab);
374 1.2 riastrad }
375 1.2 riastrad
376 1.3 riastrad offtab->ot_window_start = 0;
377 1.3 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
378 1.1 riastrad assert(offtab->ot_fdpos <=
379 1.3 riastrad (OFF_MAX - (off_t)(offtab->ot_n_offsets * sizeof(uint64_t))));
380 1.2 riastrad const uint64_t first_offset = (offtab->ot_fdpos +
381 1.5 riastrad (offtab->ot_n_offsets * sizeof(uint64_t)));
382 1.1 riastrad assert(first_offset <= OFF_MAX);
383 1.2 riastrad offtab->ot_window[0] = htobe64(first_offset);
384 1.2 riastrad offtab_write_window(offtab);
385 1.1 riastrad
386 1.1 riastrad if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1)
387 1.1 riastrad err(1, "lseek to first offset failed");
388 1.1 riastrad }
389 1.1 riastrad
390 1.1 riastrad /*
391 1.1 riastrad * Guarantee that the disk reflects block offsets [0, n_offsets). If
392 1.1 riastrad * OFFTAB_CHECKPOINT_SYNC is set in flags, will also fsync the entire
393 1.1 riastrad * offset table. May fail; returns on success, aborts with err(3) on
394 1.1 riastrad * failure. Fsync failure is considered success but is reported with a
395 1.2 riastrad * warning.
396 1.2 riastrad *
397 1.2 riastrad * This routine does not write state in memory, and does not read state
398 1.1 riastrad * that is not signal-safe. The only state read is offtab->ot_window,
399 1.1 riastrad * offtab->ot_window_start, and quantities that are static for the
400 1.1 riastrad * signal-interruptable existence of the offset table.
401 1.1 riastrad */
402 1.1 riastrad void
403 1.1 riastrad offtab_checkpoint(struct offtab *offtab, uint32_t n_offsets, int flags)
404 1.1 riastrad {
405 1.1 riastrad
406 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
407 1.2 riastrad assert(n_offsets <= offtab->ot_n_offsets);
408 1.2 riastrad
409 1.2 riastrad /*
410 1.2 riastrad * Write the window unless we just did that and were
411 1.5 riastrad * interrupted before we could move the window.
412 1.1 riastrad */
413 1.1 riastrad if (offtab->ot_window != NULL)
414 1.2 riastrad offtab_maybe_write_window(offtab, 0, n_offsets);
415 1.2 riastrad
416 1.3 riastrad if (ISSET(flags, OFFTAB_CHECKPOINT_SYNC)) {
417 1.1 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
418 1.1 riastrad assert(offtab->ot_fdpos
419 1.2 riastrad <= (OFF_MAX - (off_t)(n_offsets * sizeof(uint64_t))));
420 1.1 riastrad if (fsync_range(offtab->ot_fd, (FFILESYNC | FDISKSYNC),
421 1.1 riastrad offtab->ot_fdpos,
422 1.1 riastrad (offtab->ot_fdpos + (n_offsets * sizeof(uint64_t))))
423 1.1 riastrad == -1)
424 1.1 riastrad warn_ss("fsync of offset table failed");
425 1.1 riastrad }
426 1.1 riastrad }
427 1.1 riastrad
428 1.1 riastrad /*
429 1.1 riastrad * Do any I/O or bookkeeping necessary to set an offset for blkno. May
430 1.1 riastrad * fail; returns on success, aborts with err(3) on failure.
431 1.1 riastrad */
432 1.2 riastrad void
433 1.1 riastrad offtab_prepare_put(struct offtab *offtab, uint32_t blkno)
434 1.1 riastrad {
435 1.1 riastrad uint32_t i;
436 1.2 riastrad
437 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
438 1.2 riastrad assert(blkno < offtab->ot_n_offsets);
439 1.2 riastrad
440 1.2 riastrad /*
441 1.2 riastrad * Assume, for convenience, that we write blocks in order.
442 1.2 riastrad * Thus we need not do another read -- we can just clear the
443 1.2 riastrad * window.
444 1.2 riastrad */
445 1.2 riastrad assert((offtab->ot_blkno == (uint32_t)-1) ||
446 1.2 riastrad ((offtab->ot_blkno + 1) == blkno));
447 1.2 riastrad
448 1.2 riastrad /* If it's already in our window, we're good to go. */
449 1.2 riastrad if ((offtab->ot_window_start <= blkno) &&
450 1.2 riastrad (blkno < offtab_current_window_end(offtab)))
451 1.5 riastrad goto win;
452 1.2 riastrad
453 1.2 riastrad /* Otherwise, write out the current window and choose a new one. */
454 1.2 riastrad offtab_write_window(offtab);
455 1.2 riastrad
456 1.2 riastrad assert(offtab->ot_window_size <= blkno);
457 1.2 riastrad assert(offtab->ot_window_start == (blkno - offtab->ot_window_size));
458 1.2 riastrad assert((offtab->ot_window_start + offtab->ot_window_size) ==
459 1.2 riastrad rounddown(blkno, offtab->ot_window_size));
460 1.2 riastrad
461 1.2 riastrad {
462 1.2 riastrad uint64_t *window;
463 1.2 riastrad sigset_t sigmask;
464 1.7 riastrad
465 1.2 riastrad /*
466 1.2 riastrad * Mark the window as being updated so nobody tries to write it
467 1.2 riastrad * (since we just wrote it) while we fill it with ones.
468 1.2 riastrad */
469 1.2 riastrad block_signals(&sigmask);
470 1.2 riastrad window = offtab->ot_window;
471 1.2 riastrad offtab->ot_window = NULL;
472 1.2 riastrad restore_sigmask(&sigmask);
473 1.2 riastrad
474 1.2 riastrad /* Fill the window with ones. */
475 1.2 riastrad for (i = 0; i < offtab_current_window_size(offtab); i++)
476 1.2 riastrad window[i] = ~(uint64_t)0;
477 1.2 riastrad
478 1.2 riastrad /* Restore the window as ready again. */
479 1.2 riastrad block_signals(&sigmask);
480 1.2 riastrad offtab->ot_window = window;
481 1.2 riastrad offtab->ot_window_start = rounddown(blkno, offtab->ot_window_size);
482 1.2 riastrad restore_sigmask(&sigmask);
483 1.2 riastrad }
484 1.2 riastrad
485 1.1 riastrad win: assert(offtab->ot_window_start <= blkno);
486 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
487 1.1 riastrad
488 1.1 riastrad offtab->ot_blkno = blkno;
489 1.1 riastrad }
490 1.1 riastrad
491 1.1 riastrad /*
492 1.1 riastrad * Actually set the offset for blkno.
493 1.1 riastrad */
494 1.1 riastrad void
495 1.1 riastrad offtab_put(struct offtab *offtab, uint32_t blkno, uint64_t offset)
496 1.1 riastrad {
497 1.2 riastrad
498 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
499 1.2 riastrad assert(blkno == offtab->ot_blkno);
500 1.2 riastrad assert(offtab->ot_window_start <= blkno);
501 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
502
503 offtab->ot_window[blkno - offtab->ot_window_start] = htobe64(offset);
504 }
505