offtab.c revision 1.12 1 1.12 riastrad /* $NetBSD: offtab.c,v 1.12 2014/01/25 16:26:17 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2014 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * This code is derived from software contributed to The NetBSD Foundation
8 1.1 riastrad * by Taylor R. Campbell.
9 1.1 riastrad *
10 1.1 riastrad * Redistribution and use in source and binary forms, with or without
11 1.1 riastrad * modification, are permitted provided that the following conditions
12 1.1 riastrad * are met:
13 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
14 1.1 riastrad * notice, this list of conditions and the following disclaimer.
15 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
17 1.1 riastrad * documentation and/or other materials provided with the distribution.
18 1.1 riastrad *
19 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
30 1.1 riastrad */
31 1.1 riastrad
32 1.1 riastrad #include <sys/cdefs.h>
33 1.12 riastrad __RCSID("$NetBSD: offtab.c,v 1.12 2014/01/25 16:26:17 riastradh Exp $");
34 1.1 riastrad
35 1.1 riastrad #include <sys/types.h>
36 1.1 riastrad #include <sys/endian.h>
37 1.1 riastrad
38 1.1 riastrad #include <assert.h>
39 1.1 riastrad #include <err.h>
40 1.1 riastrad #include <errno.h>
41 1.1 riastrad #include <inttypes.h>
42 1.1 riastrad #include <limits.h>
43 1.1 riastrad #include <stdbool.h>
44 1.1 riastrad #include <stdlib.h>
45 1.1 riastrad #include <unistd.h>
46 1.1 riastrad
47 1.1 riastrad #include "common.h"
48 1.1 riastrad #include "utils.h"
49 1.1 riastrad
50 1.1 riastrad #include "offtab.h"
51 1.1 riastrad
52 1.10 joerg static void __printflike(1,2) __dead
53 1.1 riastrad offtab_bug(const char *fmt, ...)
54 1.1 riastrad {
55 1.1 riastrad
56 1.1 riastrad errx(1, "bug in offtab, please report");
57 1.1 riastrad }
58 1.1 riastrad
59 1.10 joerg static void __printflike(1,2) __dead
60 1.1 riastrad offtab_bugx(const char *fmt, ...)
61 1.1 riastrad {
62 1.1 riastrad
63 1.1 riastrad errx(1, "bug in offtab, please report");
64 1.1 riastrad }
65 1.2 riastrad
66 1.2 riastrad static uint32_t
67 1.8 riastrad offtab_compute_window_size(struct offtab *offtab, uint32_t start)
68 1.2 riastrad {
69 1.2 riastrad
70 1.8 riastrad assert(start < offtab->ot_n_offsets);
71 1.8 riastrad return MIN(offtab->ot_window_size, (offtab->ot_n_offsets - start));
72 1.2 riastrad }
73 1.2 riastrad
74 1.2 riastrad static uint32_t
75 1.2 riastrad offtab_current_window_size(struct offtab *offtab)
76 1.2 riastrad {
77 1.2 riastrad
78 1.8 riastrad return offtab_compute_window_size(offtab, offtab->ot_window_start);
79 1.2 riastrad }
80 1.2 riastrad
81 1.2 riastrad static uint32_t
82 1.2 riastrad offtab_current_window_end(struct offtab *offtab)
83 1.2 riastrad {
84 1.2 riastrad
85 1.2 riastrad assert(offtab->ot_window_start < offtab->ot_n_offsets);
86 1.2 riastrad assert(offtab_current_window_size(offtab) <=
87 1.2 riastrad (offtab->ot_n_offsets - offtab->ot_window_start));
88 1.2 riastrad return (offtab->ot_window_start + offtab_current_window_size(offtab));
89 1.2 riastrad }
90 1.2 riastrad
91 1.12 riastrad static void
92 1.12 riastrad offtab_compute_window_position(struct offtab *offtab, uint32_t window_start,
93 1.12 riastrad size_t *bytes, off_t *pos)
94 1.12 riastrad {
95 1.12 riastrad const uint32_t window_size = offtab_compute_window_size(offtab,
96 1.12 riastrad window_start);
97 1.12 riastrad
98 1.12 riastrad __CTASSERT(MAX_WINDOW_SIZE <= (OFF_MAX / sizeof(uint64_t)));
99 1.12 riastrad *bytes = (window_size * sizeof(uint64_t));
100 1.12 riastrad
101 1.12 riastrad assert(window_start <= offtab->ot_n_offsets);
102 1.12 riastrad __CTASSERT(MAX_N_OFFSETS <= (SIZE_MAX / sizeof(uint64_t)));
103 1.12 riastrad const off_t window_offset = ((off_t)window_start *
104 1.12 riastrad (off_t)sizeof(uint64_t));
105 1.12 riastrad
106 1.12 riastrad /* XXX This assertion is not justified. */
107 1.12 riastrad assert(offtab->ot_fdpos <= (OFF_MAX - window_offset));
108 1.12 riastrad *pos = (offtab->ot_fdpos + window_offset);
109 1.12 riastrad }
110 1.12 riastrad
111 1.2 riastrad #define OFFTAB_READ_SEEK 0x01
112 1.2 riastrad #define OFFTAB_READ_NOSEEK 0x00
113 1.2 riastrad
114 1.2 riastrad static bool
115 1.2 riastrad offtab_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
116 1.2 riastrad {
117 1.12 riastrad const uint32_t window_start = rounddown(blkno, offtab->ot_window_size);
118 1.12 riastrad size_t window_bytes;
119 1.12 riastrad off_t window_pos;
120 1.2 riastrad
121 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
122 1.2 riastrad assert(ISSET(read_flags, OFFTAB_READ_SEEK) ||
123 1.2 riastrad (lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
124 1.2 riastrad ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
125 1.12 riastrad
126 1.12 riastrad offtab_compute_window_position(offtab, window_start,
127 1.12 riastrad &window_bytes, &window_pos);
128 1.2 riastrad const ssize_t n_read = (ISSET(read_flags, OFFTAB_READ_SEEK)
129 1.12 riastrad ? pread_block(offtab->ot_fd, offtab->ot_window, window_bytes,
130 1.12 riastrad window_pos)
131 1.12 riastrad : read_block(offtab->ot_fd, offtab->ot_window, window_bytes));
132 1.2 riastrad if (n_read == -1) {
133 1.2 riastrad (*offtab->ot_report)("read offset table at %"PRIuMAX,
134 1.11 riastrad (uintmax_t)window_pos);
135 1.2 riastrad return false;
136 1.2 riastrad }
137 1.2 riastrad assert(n_read >= 0);
138 1.12 riastrad if ((size_t)n_read != window_bytes) {
139 1.2 riastrad (*offtab->ot_reportx)("partial read of offset table"
140 1.2 riastrad " at %"PRIuMAX": %zu != %zu",
141 1.12 riastrad (uintmax_t)window_pos, (size_t)n_read, window_bytes);
142 1.2 riastrad return false;
143 1.2 riastrad }
144 1.12 riastrad
145 1.2 riastrad offtab->ot_window_start = window_start;
146 1.2 riastrad
147 1.2 riastrad return true;
148 1.2 riastrad }
149 1.2 riastrad
150 1.2 riastrad static bool
151 1.2 riastrad offtab_maybe_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
152 1.2 riastrad {
153 1.2 riastrad
154 1.2 riastrad /* Don't bother if blkno is already in the window. */
155 1.2 riastrad if ((offtab->ot_window_start <= blkno) &&
156 1.2 riastrad (blkno < offtab_current_window_end(offtab)))
157 1.2 riastrad return true;
158 1.2 riastrad
159 1.2 riastrad if (!offtab_read_window(offtab, blkno, read_flags))
160 1.2 riastrad return false;
161 1.2 riastrad
162 1.2 riastrad return true;
163 1.2 riastrad }
164 1.2 riastrad
165 1.2 riastrad static void
166 1.5 riastrad offtab_write_window(struct offtab *offtab)
167 1.2 riastrad {
168 1.12 riastrad size_t window_bytes;
169 1.12 riastrad off_t window_pos;
170 1.2 riastrad
171 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
172 1.2 riastrad
173 1.12 riastrad offtab_compute_window_position(offtab, offtab->ot_window_start,
174 1.12 riastrad &window_bytes, &window_pos);
175 1.2 riastrad const ssize_t n_written = pwrite(offtab->ot_fd, offtab->ot_window,
176 1.12 riastrad window_bytes, window_pos);
177 1.2 riastrad if (n_written == -1)
178 1.2 riastrad err_ss(1, "write initial offset table");
179 1.2 riastrad assert(n_written >= 0);
180 1.12 riastrad if ((size_t)n_written != window_bytes)
181 1.2 riastrad errx_ss(1, "partial write of initial offset bytes: %zu <= %zu",
182 1.2 riastrad (size_t)n_written,
183 1.12 riastrad window_bytes);
184 1.2 riastrad }
185 1.5 riastrad
186 1.5 riastrad static void
187 1.5 riastrad offtab_maybe_write_window(struct offtab *offtab, uint32_t start, uint32_t end)
188 1.5 riastrad {
189 1.5 riastrad
190 1.5 riastrad /* Don't bother if [start, end) does not cover our window. */
191 1.5 riastrad if (end <= offtab->ot_window_start)
192 1.5 riastrad return;
193 1.5 riastrad if (offtab_current_window_end(offtab) < start)
194 1.5 riastrad return;
195 1.5 riastrad
196 1.5 riastrad offtab_write_window(offtab);
197 1.5 riastrad }
198 1.1 riastrad
199 1.1 riastrad /*
201 1.1 riastrad * Initialize an offtab to support the specified number of offsets read
202 1.1 riastrad * to or written from fd at byte position fdpos.
203 1.1 riastrad */
204 1.2 riastrad void
205 1.2 riastrad offtab_init(struct offtab *offtab, uint32_t n_offsets, uint32_t window_size,
206 1.1 riastrad int fd, off_t fdpos)
207 1.1 riastrad {
208 1.1 riastrad
209 1.1 riastrad assert(offtab != NULL);
210 1.1 riastrad assert(0 < n_offsets);
211 1.1 riastrad assert(0 <= fd);
212 1.1 riastrad assert(0 <= fdpos);
213 1.1 riastrad
214 1.2 riastrad offtab->ot_n_offsets = n_offsets;
215 1.2 riastrad if ((window_size == 0) || (n_offsets < window_size))
216 1.2 riastrad offtab->ot_window_size = n_offsets;
217 1.2 riastrad else
218 1.2 riastrad offtab->ot_window_size = window_size;
219 1.2 riastrad assert(offtab->ot_window_size <= offtab->ot_n_offsets);
220 1.2 riastrad offtab->ot_window_start = (uint32_t)-1;
221 1.2 riastrad __CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
222 1.2 riastrad offtab->ot_window = malloc(offtab->ot_window_size * sizeof(uint64_t));
223 1.1 riastrad if (offtab->ot_window == NULL)
224 1.1 riastrad err(1, "malloc offset table");
225 1.1 riastrad offtab->ot_blkno = (uint32_t)-1;
226 1.1 riastrad offtab->ot_fd = fd;
227 1.1 riastrad offtab->ot_fdpos = fdpos;
228 1.1 riastrad offtab->ot_report = &offtab_bug;
229 1.1 riastrad offtab->ot_reportx = &offtab_bugx;
230 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_NONE;
231 1.1 riastrad }
232 1.1 riastrad
233 1.1 riastrad /*
234 1.1 riastrad * Destroy an offtab.
235 1.1 riastrad */
236 1.1 riastrad void
237 1.1 riastrad offtab_destroy(struct offtab *offtab)
238 1.1 riastrad {
239 1.2 riastrad
240 1.1 riastrad free(offtab->ot_window);
241 1.1 riastrad }
242 1.1 riastrad
243 1.1 riastrad /*
244 1.1 riastrad * For an offtab that has been used to read data from disk, convert it
245 1.2 riastrad * to an offtab that can be used to write subsequent data to disk.
246 1.1 riastrad * blkno is the last valid blkno read from disk.
247 1.2 riastrad */
248 1.2 riastrad bool
249 1.1 riastrad offtab_transmogrify_read_to_write(struct offtab *offtab, uint32_t blkno)
250 1.1 riastrad {
251 1.1 riastrad
252 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
253 1.2 riastrad assert(0 < blkno);
254 1.2 riastrad
255 1.2 riastrad if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
256 1.2 riastrad return false;
257 1.1 riastrad
258 1.2 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE;
259 1.2 riastrad offtab->ot_blkno = blkno;
260 1.2 riastrad
261 1.1 riastrad return true;
262 1.1 riastrad }
263 1.1 riastrad
264 1.1 riastrad /*
266 1.1 riastrad * Reset an offtab for reading an offset table from the beginning.
267 1.1 riastrad * Initializes in-memory state and may read data from offtab->ot_fd,
268 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. Failure
269 1.1 riastrad * will be reported by the report/reportx routines, which are called
270 1.2 riastrad * like warn/warnx. May fail; returns true on success, false on
271 1.2 riastrad * failure.
272 1.2 riastrad *
273 1.2 riastrad * This almost has copypasta of offtab_prepare_get, but this uses read,
274 1.1 riastrad * rather than pread, so that it will work on nonseekable input if the
275 1.1 riastrad * window is the whole offset table.
276 1.1 riastrad */
277 1.1 riastrad bool
278 1.1 riastrad offtab_reset_read(struct offtab *offtab,
279 1.1 riastrad void (*report)(const char *, ...) __printflike(1,2),
280 1.1 riastrad void (*reportx)(const char *, ...) __printflike(1,2))
281 1.1 riastrad {
282 1.1 riastrad
283 1.1 riastrad assert((lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
284 1.1 riastrad ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
285 1.1 riastrad
286 1.1 riastrad offtab->ot_report = report;
287 1.2 riastrad offtab->ot_reportx = reportx;
288 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_READ;
289 1.2 riastrad offtab->ot_blkno = (uint32_t)-1;
290 1.1 riastrad
291 1.1 riastrad if (!offtab_read_window(offtab, 0, OFFTAB_READ_NOSEEK))
292 1.4 riastrad return false;
293 1.4 riastrad
294 1.11 riastrad if (offtab->ot_window_size < offtab->ot_n_offsets) {
295 1.11 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
296 1.11 riastrad const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
297 1.11 riastrad (off_t)sizeof(uint64_t));
298 1.4 riastrad assert(offtab->ot_fdpos <= (OFF_MAX - offtab_bytes));
299 1.4 riastrad const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
300 1.4 riastrad if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1) {
301 1.4 riastrad (*offtab->ot_report)("lseek to first offset 0x%"PRIx64,
302 1.4 riastrad first_offset);
303 1.4 riastrad return false;
304 1.4 riastrad }
305 1.1 riastrad }
306 1.1 riastrad
307 1.1 riastrad return true;
308 1.1 riastrad }
309 1.1 riastrad
310 1.1 riastrad /*
311 1.1 riastrad * Do any I/O or bookkeeping necessary to fetch the offset for blkno in
312 1.1 riastrad * preparation for a call to offtab_get. May fail; returns true on
313 1.1 riastrad * success, false on failure.
314 1.1 riastrad */
315 1.1 riastrad bool
316 1.1 riastrad offtab_prepare_get(struct offtab *offtab, uint32_t blkno)
317 1.1 riastrad {
318 1.1 riastrad
319 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
320 1.2 riastrad assert(blkno < offtab->ot_n_offsets);
321 1.2 riastrad
322 1.2 riastrad if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
323 1.2 riastrad return false;
324 1.2 riastrad
325 1.2 riastrad assert(offtab->ot_window_start <= blkno);
326 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
327 1.1 riastrad
328 1.1 riastrad offtab->ot_blkno = blkno;
329 1.1 riastrad return true;
330 1.1 riastrad }
331 1.1 riastrad
332 1.1 riastrad /*
333 1.1 riastrad * Return the offset for blkno. Caller must have called
334 1.1 riastrad * offtab_prepare_get beforehand.
335 1.1 riastrad */
336 1.1 riastrad uint64_t
337 1.1 riastrad offtab_get(struct offtab *offtab, uint32_t blkno)
338 1.1 riastrad {
339 1.1 riastrad
340 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
341 1.2 riastrad assert(blkno == offtab->ot_blkno);
342 1.2 riastrad assert(offtab->ot_window_start <= blkno);
343 1.2 riastrad assert(blkno < offtab_current_window_end(offtab));
344 1.1 riastrad
345 1.1 riastrad return be64toh(offtab->ot_window[blkno - offtab->ot_window_start]);
346 1.1 riastrad }
347 1.1 riastrad
348 1.1 riastrad /*
350 1.1 riastrad * Reset offtab for writing a fresh offset table. Initializes
351 1.1 riastrad * in-memory state and writes an empty offset table to offtab->ot_fd,
352 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. May
353 1.1 riastrad * fail; returns on success, aborts with err(3) on failure.
354 1.1 riastrad */
355 1.1 riastrad void
356 1.1 riastrad offtab_reset_write(struct offtab *offtab)
357 1.1 riastrad {
358 1.1 riastrad uint32_t i;
359 1.1 riastrad
360 1.2 riastrad assert(lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos);
361 1.1 riastrad
362 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE;
363 1.1 riastrad offtab->ot_blkno = (uint32_t)-1;
364 1.1 riastrad
365 1.1 riastrad /*
366 1.1 riastrad * Initialize the offset table to all ones (except for the
367 1.1 riastrad * fixed first offset) so that we can easily detect where we
368 1.1 riastrad * were interrupted if we want to restart.
369 1.2 riastrad */
370 1.2 riastrad __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
371 1.2 riastrad assert(offtab->ot_n_offsets > 0);
372 1.2 riastrad
373 1.2 riastrad for (i = 0; i < offtab->ot_window_size; i++)
374 1.2 riastrad offtab->ot_window[i] = ~(uint64_t)0;
375 1.2 riastrad
376 1.2 riastrad const uint32_t n_windows =
377 1.2 riastrad howmany(offtab->ot_n_offsets, offtab->ot_window_size);
378 1.5 riastrad for (i = 1; i < n_windows; i++) {
379 1.2 riastrad /* Change the start but reuse the all-ones buffer. */
380 1.2 riastrad offtab->ot_window_start = (i * offtab->ot_window_size);
381 1.2 riastrad offtab_write_window(offtab);
382 1.11 riastrad }
383 1.11 riastrad
384 1.11 riastrad offtab->ot_window_start = 0;
385 1.11 riastrad __CTASSERT(MAX_N_OFFSETS <=
386 1.2 riastrad (MIN(OFF_MAX, UINT64_MAX) / sizeof(uint64_t)));
387 1.11 riastrad const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
388 1.11 riastrad sizeof(uint64_t));
389 1.11 riastrad assert(offtab->ot_fdpos <=
390 1.11 riastrad ((off_t)MIN(OFF_MAX, UINT64_MAX) - offtab_bytes));
391 1.5 riastrad const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
392 1.1 riastrad assert(first_offset <= (off_t)MIN(OFF_MAX, UINT64_MAX));
393 1.2 riastrad offtab->ot_window[0] = htobe64((uint64_t)first_offset);
394 1.2 riastrad offtab_write_window(offtab);
395 1.1 riastrad
396 1.1 riastrad if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1)
397 1.1 riastrad err(1, "lseek to first offset failed");
398 1.1 riastrad }
399 1.1 riastrad
400 1.1 riastrad /*
401 1.1 riastrad * Guarantee that the disk reflects block offsets [0, n_offsets). If
402 1.1 riastrad * OFFTAB_CHECKPOINT_SYNC is set in flags, will also fsync the entire
403 1.1 riastrad * offset table. May fail; returns on success, aborts with err(3) on
404 1.1 riastrad * failure. Fsync failure is considered success but is reported with a
405 1.2 riastrad * warning.
406 1.2 riastrad *
407 1.2 riastrad * This routine does not write state in memory, and does not read state
408 1.1 riastrad * that is not signal-safe. The only state read is offtab->ot_window,
409 1.1 riastrad * offtab->ot_window_start, and quantities that are static for the
410 1.1 riastrad * signal-interruptable existence of the offset table.
411 1.1 riastrad */
412 1.1 riastrad void
413 1.1 riastrad offtab_checkpoint(struct offtab *offtab, uint32_t n_offsets, int flags)
414 1.1 riastrad {
415 1.1 riastrad
416 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
417 1.2 riastrad assert(n_offsets <= offtab->ot_n_offsets);
418 1.2 riastrad
419 1.2 riastrad /*
420 1.2 riastrad * Write the window unless we just did that and were
421 1.5 riastrad * interrupted before we could move the window.
422 1.1 riastrad */
423 1.1 riastrad if (offtab->ot_window != NULL)
424 1.2 riastrad offtab_maybe_write_window(offtab, 0, n_offsets);
425 1.11 riastrad
426 1.11 riastrad if (ISSET(flags, OFFTAB_CHECKPOINT_SYNC)) {
427 1.11 riastrad __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
428 1.1 riastrad const off_t sync_bytes = ((off_t)n_offsets *
429 1.11 riastrad (off_t)sizeof(uint64_t));
430 1.1 riastrad assert(offtab->ot_fdpos <= (OFF_MAX - sync_bytes));
431 1.1 riastrad if (fsync_range(offtab->ot_fd, (FFILESYNC | FDISKSYNC),
432 1.1 riastrad offtab->ot_fdpos, (offtab->ot_fdpos + sync_bytes))
433 1.1 riastrad == -1)
434 1.1 riastrad warn_ss("fsync of offset table failed");
435 1.1 riastrad }
436 1.1 riastrad }
437 1.1 riastrad
438 1.1 riastrad /*
439 1.1 riastrad * Do any I/O or bookkeeping necessary to set an offset for blkno. May
440 1.1 riastrad * fail; returns on success, aborts with err(3) on failure.
441 1.1 riastrad */
442 1.2 riastrad void
443 1.1 riastrad offtab_prepare_put(struct offtab *offtab, uint32_t blkno)
444 1.1 riastrad {
445 1.1 riastrad uint32_t i;
446 1.2 riastrad
447 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
448 1.2 riastrad assert(blkno < offtab->ot_n_offsets);
449 1.2 riastrad
450 1.2 riastrad /*
451 1.2 riastrad * Assume, for convenience, that we write blocks in order.
452 1.2 riastrad * Thus we need not do another read -- we can just clear the
453 1.2 riastrad * window.
454 1.2 riastrad */
455 1.2 riastrad assert((offtab->ot_blkno == (uint32_t)-1) ||
456 1.2 riastrad ((offtab->ot_blkno + 1) == blkno));
457 1.2 riastrad
458 1.2 riastrad /* If it's already in our window, we're good to go. */
459 1.2 riastrad if ((offtab->ot_window_start <= blkno) &&
460 1.2 riastrad (blkno < offtab_current_window_end(offtab)))
461 1.5 riastrad goto win;
462 1.2 riastrad
463 1.2 riastrad /* Otherwise, write out the current window and choose a new one. */
464 1.2 riastrad offtab_write_window(offtab);
465 1.2 riastrad
466 1.2 riastrad assert(offtab->ot_window_size <= blkno);
467 1.2 riastrad assert(offtab->ot_window_start == (blkno - offtab->ot_window_size));
468 1.2 riastrad assert((offtab->ot_window_start + offtab->ot_window_size) ==
469 1.2 riastrad rounddown(blkno, offtab->ot_window_size));
470 1.2 riastrad
471 1.2 riastrad {
472 1.2 riastrad uint64_t *window;
473 1.2 riastrad sigset_t sigmask;
474 1.7 riastrad
475 1.2 riastrad /*
476 1.2 riastrad * Mark the window as being updated so nobody tries to write it
477 1.2 riastrad * (since we just wrote it) while we fill it with ones.
478 1.2 riastrad */
479 1.2 riastrad block_signals(&sigmask);
480 1.2 riastrad window = offtab->ot_window;
481 1.2 riastrad offtab->ot_window = NULL;
482 1.2 riastrad restore_sigmask(&sigmask);
483 1.2 riastrad
484 1.2 riastrad /* Fill the window with ones. */
485 1.2 riastrad for (i = 0; i < offtab_current_window_size(offtab); i++)
486 1.2 riastrad window[i] = ~(uint64_t)0;
487 1.2 riastrad
488 1.2 riastrad /* Restore the window as ready again. */
489 1.2 riastrad block_signals(&sigmask);
490 1.2 riastrad offtab->ot_window = window;
491 1.2 riastrad offtab->ot_window_start = rounddown(blkno, offtab->ot_window_size);
492 1.2 riastrad restore_sigmask(&sigmask);
493 1.2 riastrad }
494 1.2 riastrad
495 1.1 riastrad win: assert(offtab->ot_window_start <= blkno);
496 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
497 1.1 riastrad
498 1.1 riastrad offtab->ot_blkno = blkno;
499 1.1 riastrad }
500 1.1 riastrad
501 1.1 riastrad /*
502 1.1 riastrad * Actually set the offset for blkno.
503 1.1 riastrad */
504 1.1 riastrad void
505 1.1 riastrad offtab_put(struct offtab *offtab, uint32_t blkno, uint64_t offset)
506 1.1 riastrad {
507 1.2 riastrad
508 1.2 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
509 1.2 riastrad assert(blkno == offtab->ot_blkno);
510 1.2 riastrad assert(offtab->ot_window_start <= blkno);
511 1.1 riastrad assert(blkno < offtab_current_window_end(offtab));
512
513 offtab->ot_window[blkno - offtab->ot_window_start] = htobe64(offset);
514 }
515