offtab.c revision 1.13.4.2 1 1.13.4.2 yamt /* $NetBSD: offtab.c,v 1.13.4.2 2014/05/22 11:42:51 yamt Exp $ */
2 1.13.4.2 yamt
3 1.13.4.2 yamt /*-
4 1.13.4.2 yamt * Copyright (c) 2014 The NetBSD Foundation, Inc.
5 1.13.4.2 yamt * All rights reserved.
6 1.13.4.2 yamt *
7 1.13.4.2 yamt * This code is derived from software contributed to The NetBSD Foundation
8 1.13.4.2 yamt * by Taylor R. Campbell.
9 1.13.4.2 yamt *
10 1.13.4.2 yamt * Redistribution and use in source and binary forms, with or without
11 1.13.4.2 yamt * modification, are permitted provided that the following conditions
12 1.13.4.2 yamt * are met:
13 1.13.4.2 yamt * 1. Redistributions of source code must retain the above copyright
14 1.13.4.2 yamt * notice, this list of conditions and the following disclaimer.
15 1.13.4.2 yamt * 2. Redistributions in binary form must reproduce the above copyright
16 1.13.4.2 yamt * notice, this list of conditions and the following disclaimer in the
17 1.13.4.2 yamt * documentation and/or other materials provided with the distribution.
18 1.13.4.2 yamt *
19 1.13.4.2 yamt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.13.4.2 yamt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.13.4.2 yamt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.13.4.2 yamt * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.13.4.2 yamt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.13.4.2 yamt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.13.4.2 yamt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.13.4.2 yamt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.13.4.2 yamt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.13.4.2 yamt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.13.4.2 yamt * POSSIBILITY OF SUCH DAMAGE.
30 1.13.4.2 yamt */
31 1.13.4.2 yamt
32 1.13.4.2 yamt #include <sys/cdefs.h>
33 1.13.4.2 yamt __RCSID("$NetBSD: offtab.c,v 1.13.4.2 2014/05/22 11:42:51 yamt Exp $");
34 1.13.4.2 yamt
35 1.13.4.2 yamt #include <sys/types.h>
36 1.13.4.2 yamt #include <sys/endian.h>
37 1.13.4.2 yamt
38 1.13.4.2 yamt #include <assert.h>
39 1.13.4.2 yamt #include <err.h>
40 1.13.4.2 yamt #include <errno.h>
41 1.13.4.2 yamt #include <inttypes.h>
42 1.13.4.2 yamt #include <limits.h>
43 1.13.4.2 yamt #include <stdbool.h>
44 1.13.4.2 yamt #include <stdlib.h>
45 1.13.4.2 yamt #include <unistd.h>
46 1.13.4.2 yamt
47 1.13.4.2 yamt #include "common.h"
48 1.13.4.2 yamt #include "utils.h"
49 1.13.4.2 yamt
50 1.13.4.2 yamt #include "offtab.h"
51 1.13.4.2 yamt
52 1.13.4.2 yamt static void __printflike(1,2) __dead
53 1.13.4.2 yamt offtab_bug(const char *fmt, ...)
54 1.13.4.2 yamt {
55 1.13.4.2 yamt
56 1.13.4.2 yamt errx(1, "bug in offtab, please report");
57 1.13.4.2 yamt }
58 1.13.4.2 yamt
59 1.13.4.2 yamt static void __printflike(1,2) __dead
60 1.13.4.2 yamt offtab_bugx(const char *fmt, ...)
61 1.13.4.2 yamt {
62 1.13.4.2 yamt
63 1.13.4.2 yamt errx(1, "bug in offtab, please report");
64 1.13.4.2 yamt }
65 1.13.4.2 yamt
66 1.13.4.2 yamt static uint32_t
67 1.13.4.2 yamt offtab_compute_window_size(struct offtab *offtab, uint32_t start)
68 1.13.4.2 yamt {
69 1.13.4.2 yamt
70 1.13.4.2 yamt assert(start < offtab->ot_n_offsets);
71 1.13.4.2 yamt return MIN(offtab->ot_window_size, (offtab->ot_n_offsets - start));
72 1.13.4.2 yamt }
73 1.13.4.2 yamt
74 1.13.4.2 yamt static uint32_t
75 1.13.4.2 yamt offtab_current_window_size(struct offtab *offtab)
76 1.13.4.2 yamt {
77 1.13.4.2 yamt
78 1.13.4.2 yamt return offtab_compute_window_size(offtab, offtab->ot_window_start);
79 1.13.4.2 yamt }
80 1.13.4.2 yamt
81 1.13.4.2 yamt static uint32_t
82 1.13.4.2 yamt offtab_current_window_end(struct offtab *offtab)
83 1.13.4.2 yamt {
84 1.13.4.2 yamt
85 1.13.4.2 yamt assert(offtab->ot_window_start < offtab->ot_n_offsets);
86 1.13.4.2 yamt assert(offtab_current_window_size(offtab) <=
87 1.13.4.2 yamt (offtab->ot_n_offsets - offtab->ot_window_start));
88 1.13.4.2 yamt return (offtab->ot_window_start + offtab_current_window_size(offtab));
89 1.13.4.2 yamt }
90 1.13.4.2 yamt
91 1.13.4.2 yamt static void
92 1.13.4.2 yamt offtab_compute_window_position(struct offtab *offtab, uint32_t window_start,
93 1.13.4.2 yamt size_t *bytes, off_t *pos)
94 1.13.4.2 yamt {
95 1.13.4.2 yamt const uint32_t window_size = offtab_compute_window_size(offtab,
96 1.13.4.2 yamt window_start);
97 1.13.4.2 yamt
98 1.13.4.2 yamt __CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
99 1.13.4.2 yamt *bytes = (window_size * sizeof(uint64_t));
100 1.13.4.2 yamt
101 1.13.4.2 yamt assert(window_start <= offtab->ot_n_offsets);
102 1.13.4.2 yamt __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
103 1.13.4.2 yamt const off_t window_offset = ((off_t)window_start *
104 1.13.4.2 yamt (off_t)sizeof(uint64_t));
105 1.13.4.2 yamt
106 1.13.4.2 yamt /* XXX This assertion is not justified. */
107 1.13.4.2 yamt assert(offtab->ot_fdpos <= (OFF_MAX - window_offset));
108 1.13.4.2 yamt *pos = (offtab->ot_fdpos + window_offset);
109 1.13.4.2 yamt }
110 1.13.4.2 yamt
111 1.13.4.2 yamt #define OFFTAB_READ_SEEK 0x01
112 1.13.4.2 yamt #define OFFTAB_READ_NOSEEK 0x00
113 1.13.4.2 yamt
114 1.13.4.2 yamt static bool
115 1.13.4.2 yamt offtab_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
116 1.13.4.2 yamt {
117 1.13.4.2 yamt const uint32_t window_start = rounddown(blkno, offtab->ot_window_size);
118 1.13.4.2 yamt size_t window_bytes;
119 1.13.4.2 yamt off_t window_pos;
120 1.13.4.2 yamt
121 1.13.4.2 yamt assert(offtab->ot_mode == OFFTAB_MODE_READ);
122 1.13.4.2 yamt assert(ISSET(read_flags, OFFTAB_READ_SEEK) ||
123 1.13.4.2 yamt (lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
124 1.13.4.2 yamt ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
125 1.13.4.2 yamt
126 1.13.4.2 yamt offtab_compute_window_position(offtab, window_start,
127 1.13.4.2 yamt &window_bytes, &window_pos);
128 1.13.4.2 yamt const ssize_t n_read = (ISSET(read_flags, OFFTAB_READ_SEEK)
129 1.13.4.2 yamt ? pread_block(offtab->ot_fd, offtab->ot_window, window_bytes,
130 1.13.4.2 yamt window_pos)
131 1.13.4.2 yamt : read_block(offtab->ot_fd, offtab->ot_window, window_bytes));
132 1.13.4.2 yamt if (n_read == -1) {
133 1.13.4.2 yamt (*offtab->ot_report)("read offset table at %"PRIuMAX,
134 1.13.4.2 yamt (uintmax_t)window_pos);
135 1.13.4.2 yamt return false;
136 1.13.4.2 yamt }
137 1.13.4.2 yamt assert(n_read >= 0);
138 1.13.4.2 yamt if ((size_t)n_read != window_bytes) {
139 1.13.4.2 yamt (*offtab->ot_reportx)("partial read of offset table"
140 1.13.4.2 yamt " at %"PRIuMAX": %zu != %zu",
141 1.13.4.2 yamt (uintmax_t)window_pos, (size_t)n_read, window_bytes);
142 1.13.4.2 yamt return false;
143 1.13.4.2 yamt }
144 1.13.4.2 yamt
145 1.13.4.2 yamt offtab->ot_window_start = window_start;
146 1.13.4.2 yamt
147 1.13.4.2 yamt return true;
148 1.13.4.2 yamt }
149 1.13.4.2 yamt
150 1.13.4.2 yamt static bool
151 1.13.4.2 yamt offtab_maybe_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
152 1.13.4.2 yamt {
153 1.13.4.2 yamt
154 1.13.4.2 yamt /* Don't bother if blkno is already in the window. */
155 1.13.4.2 yamt if ((offtab->ot_window_start <= blkno) &&
156 1.13.4.2 yamt (blkno < offtab_current_window_end(offtab)))
157 1.13.4.2 yamt return true;
158 1.13.4.2 yamt
159 1.13.4.2 yamt if (!offtab_read_window(offtab, blkno, read_flags))
160 1.13.4.2 yamt return false;
161 1.13.4.2 yamt
162 1.13.4.2 yamt return true;
163 1.13.4.2 yamt }
164 1.13.4.2 yamt
165 1.13.4.2 yamt static void
166 1.13.4.2 yamt offtab_write_window(struct offtab *offtab)
167 1.13.4.2 yamt {
168 1.13.4.2 yamt size_t window_bytes;
169 1.13.4.2 yamt off_t window_pos;
170 1.13.4.2 yamt
171 1.13.4.2 yamt assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
172 1.13.4.2 yamt
173 1.13.4.2 yamt offtab_compute_window_position(offtab, offtab->ot_window_start,
174 1.13.4.2 yamt &window_bytes, &window_pos);
175 1.13.4.2 yamt const ssize_t n_written = pwrite(offtab->ot_fd, offtab->ot_window,
176 1.13.4.2 yamt window_bytes, window_pos);
177 1.13.4.2 yamt if (n_written == -1)
178 1.13.4.2 yamt err_ss(1, "write initial offset table");
179 1.13.4.2 yamt assert(n_written >= 0);
180 1.13.4.2 yamt if ((size_t)n_written != window_bytes)
181 1.13.4.2 yamt errx_ss(1, "partial write of initial offset bytes: %zu <= %zu",
182 1.13.4.2 yamt (size_t)n_written,
183 1.13.4.2 yamt window_bytes);
184 1.13.4.2 yamt }
185 1.13.4.2 yamt
186 1.13.4.2 yamt static void
187 1.13.4.2 yamt offtab_maybe_write_window(struct offtab *offtab, uint32_t start, uint32_t end)
188 1.13.4.2 yamt {
189 1.13.4.2 yamt
190 1.13.4.2 yamt /* Don't bother if [start, end) does not cover our window. */
191 1.13.4.2 yamt if (end <= offtab->ot_window_start)
192 1.13.4.2 yamt return;
193 1.13.4.2 yamt if (offtab_current_window_end(offtab) < start)
194 1.13.4.2 yamt return;
195 1.13.4.2 yamt
196 1.13.4.2 yamt offtab_write_window(offtab);
197 1.13.4.2 yamt }
198 1.13.4.2 yamt
199 1.13.4.2 yamt /*
201 1.13.4.2 yamt * Initialize an offtab to support the specified number of offsets read
202 1.13.4.2 yamt * to or written from fd at byte position fdpos.
203 1.13.4.2 yamt */
204 1.13.4.2 yamt void
205 1.13.4.2 yamt offtab_init(struct offtab *offtab, uint32_t n_offsets, uint32_t window_size,
206 1.13.4.2 yamt int fd, off_t fdpos)
207 1.13.4.2 yamt {
208 1.13.4.2 yamt
209 1.13.4.2 yamt assert(offtab != NULL);
210 1.13.4.2 yamt assert(0 < n_offsets);
211 1.13.4.2 yamt assert(0 <= fd);
212 1.13.4.2 yamt assert(0 <= fdpos);
213 1.13.4.2 yamt
214 1.13.4.2 yamt offtab->ot_n_offsets = n_offsets;
215 1.13.4.2 yamt if ((window_size == 0) || (n_offsets < window_size))
216 1.13.4.2 yamt offtab->ot_window_size = n_offsets;
217 1.13.4.2 yamt else
218 1.13.4.2 yamt offtab->ot_window_size = window_size;
219 1.13.4.2 yamt assert(offtab->ot_window_size <= offtab->ot_n_offsets);
220 1.13.4.2 yamt offtab->ot_window_start = (uint32_t)-1;
221 1.13.4.2 yamt __CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
222 1.13.4.2 yamt offtab->ot_window = malloc(offtab->ot_window_size * sizeof(uint64_t));
223 1.13.4.2 yamt if (offtab->ot_window == NULL)
224 1.13.4.2 yamt err(1, "malloc offset table");
225 1.13.4.2 yamt offtab->ot_blkno = (uint32_t)-1;
226 1.13.4.2 yamt offtab->ot_fd = fd;
227 1.13.4.2 yamt offtab->ot_fdpos = fdpos;
228 1.13.4.2 yamt offtab->ot_report = &offtab_bug;
229 1.13.4.2 yamt offtab->ot_reportx = &offtab_bugx;
230 1.13.4.2 yamt offtab->ot_mode = OFFTAB_MODE_NONE;
231 1.13.4.2 yamt }
232 1.13.4.2 yamt
233 1.13.4.2 yamt /*
234 1.13.4.2 yamt * Destroy an offtab.
235 1.13.4.2 yamt */
236 1.13.4.2 yamt void
237 1.13.4.2 yamt offtab_destroy(struct offtab *offtab)
238 1.13.4.2 yamt {
239 1.13.4.2 yamt
240 1.13.4.2 yamt free(offtab->ot_window);
241 1.13.4.2 yamt }
242 1.13.4.2 yamt
243 1.13.4.2 yamt /*
244 1.13.4.2 yamt * For an offtab that has been used to read data from disk, convert it
245 1.13.4.2 yamt * to an offtab that can be used to write subsequent data to disk.
246 1.13.4.2 yamt * blkno is the last valid blkno read from disk.
247 1.13.4.2 yamt */
248 1.13.4.2 yamt bool
249 1.13.4.2 yamt offtab_transmogrify_read_to_write(struct offtab *offtab, uint32_t blkno)
250 1.13.4.2 yamt {
251 1.13.4.2 yamt
252 1.13.4.2 yamt assert(offtab->ot_mode == OFFTAB_MODE_READ);
253 1.13.4.2 yamt assert(0 < blkno);
254 1.13.4.2 yamt
255 1.13.4.2 yamt if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
256 1.13.4.2 yamt return false;
257 1.13.4.2 yamt
258 1.13.4.2 yamt offtab->ot_mode = OFFTAB_MODE_WRITE;
259 1.13.4.2 yamt offtab->ot_blkno = blkno;
260 1.13.4.2 yamt
261 1.13.4.2 yamt return true;
262 1.13.4.2 yamt }
263 1.13.4.2 yamt
264 1.13.4.2 yamt /*
266 1.13.4.2 yamt * Reset an offtab for reading an offset table from the beginning.
267 1.13.4.2 yamt * Initializes in-memory state and may read data from offtab->ot_fd,
268 1.13.4.2 yamt * which must currently be at byte position offtab->ot_fdpos. Failure
269 1.13.4.2 yamt * will be reported by the report/reportx routines, which are called
270 1.13.4.2 yamt * like warn/warnx. May fail; returns true on success, false on
271 1.13.4.2 yamt * failure.
272 1.13.4.2 yamt *
273 1.13.4.2 yamt * This almost has copypasta of offtab_prepare_get, but this uses read,
274 1.13.4.2 yamt * rather than pread, so that it will work on nonseekable input if the
275 1.13.4.2 yamt * window is the whole offset table.
276 1.13.4.2 yamt */
277 1.13.4.2 yamt bool
278 1.13.4.2 yamt offtab_reset_read(struct offtab *offtab,
279 1.13.4.2 yamt void (*report)(const char *, ...) __printflike(1,2),
280 1.13.4.2 yamt void (*reportx)(const char *, ...) __printflike(1,2))
281 1.13.4.2 yamt {
282 1.13.4.2 yamt
283 1.13.4.2 yamt assert((lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
284 1.13.4.2 yamt ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
285 1.13.4.2 yamt
286 1.13.4.2 yamt offtab->ot_report = report;
287 1.13.4.2 yamt offtab->ot_reportx = reportx;
288 1.13.4.2 yamt offtab->ot_mode = OFFTAB_MODE_READ;
289 1.13.4.2 yamt offtab->ot_blkno = (uint32_t)-1;
290 1.13.4.2 yamt
291 1.13.4.2 yamt if (!offtab_read_window(offtab, 0, OFFTAB_READ_NOSEEK))
292 1.13.4.2 yamt return false;
293 1.13.4.2 yamt
294 1.13.4.2 yamt if (offtab->ot_window_size < offtab->ot_n_offsets) {
295 1.13.4.2 yamt __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
296 1.13.4.2 yamt const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
297 1.13.4.2 yamt (off_t)sizeof(uint64_t));
298 1.13.4.2 yamt assert(offtab->ot_fdpos <= (OFF_MAX - offtab_bytes));
299 1.13.4.2 yamt const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
300 1.13.4.2 yamt if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1) {
301 1.13.4.2 yamt (*offtab->ot_report)("lseek to first offset 0x%"PRIx64,
302 1.13.4.2 yamt first_offset);
303 1.13.4.2 yamt return false;
304 1.13.4.2 yamt }
305 1.13.4.2 yamt }
306 1.13.4.2 yamt
307 1.13.4.2 yamt return true;
308 1.13.4.2 yamt }
309 1.13.4.2 yamt
310 1.13.4.2 yamt /*
311 1.13.4.2 yamt * Do any I/O or bookkeeping necessary to fetch the offset for blkno in
312 1.13.4.2 yamt * preparation for a call to offtab_get. May fail; returns true on
313 1.13.4.2 yamt * success, false on failure.
314 1.13.4.2 yamt */
315 1.13.4.2 yamt bool
316 1.13.4.2 yamt offtab_prepare_get(struct offtab *offtab, uint32_t blkno)
317 1.13.4.2 yamt {
318 1.13.4.2 yamt
319 1.13.4.2 yamt assert(offtab->ot_mode == OFFTAB_MODE_READ);
320 1.13.4.2 yamt assert(blkno < offtab->ot_n_offsets);
321 1.13.4.2 yamt
322 1.13.4.2 yamt if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
323 1.13.4.2 yamt return false;
324 1.13.4.2 yamt
325 1.13.4.2 yamt assert(offtab->ot_window_start <= blkno);
326 1.13.4.2 yamt assert(blkno < offtab_current_window_end(offtab));
327 1.13.4.2 yamt
328 1.13.4.2 yamt offtab->ot_blkno = blkno;
329 1.13.4.2 yamt return true;
330 1.13.4.2 yamt }
331 1.13.4.2 yamt
332 1.13.4.2 yamt /*
333 1.13.4.2 yamt * Return the offset for blkno. Caller must have called
334 1.13.4.2 yamt * offtab_prepare_get beforehand.
335 1.13.4.2 yamt */
336 1.13.4.2 yamt uint64_t
337 1.13.4.2 yamt offtab_get(struct offtab *offtab, uint32_t blkno)
338 1.13.4.2 yamt {
339 1.13.4.2 yamt
340 1.13.4.2 yamt assert(offtab->ot_mode == OFFTAB_MODE_READ);
341 1.13.4.2 yamt assert(blkno == offtab->ot_blkno);
342 1.13.4.2 yamt assert(offtab->ot_window_start <= blkno);
343 1.13.4.2 yamt assert(blkno < offtab_current_window_end(offtab));
344 1.13.4.2 yamt
345 1.13.4.2 yamt return be64toh(offtab->ot_window[blkno - offtab->ot_window_start]);
346 1.13.4.2 yamt }
347 1.13.4.2 yamt
348 1.13.4.2 yamt /*
350 1.13.4.2 yamt * Reset offtab for writing a fresh offset table. Initializes
351 1.13.4.2 yamt * in-memory state and writes an empty offset table to offtab->ot_fd,
352 1.13.4.2 yamt * which must currently be at byte position offtab->ot_fdpos. May
353 1.13.4.2 yamt * fail; returns on success, aborts with err(3) on failure.
354 1.13.4.2 yamt */
355 1.13.4.2 yamt void
356 1.13.4.2 yamt offtab_reset_write(struct offtab *offtab)
357 1.13.4.2 yamt {
358 1.13.4.2 yamt uint32_t i;
359 1.13.4.2 yamt
360 1.13.4.2 yamt assert(lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos);
361 1.13.4.2 yamt
362 1.13.4.2 yamt offtab->ot_mode = OFFTAB_MODE_WRITE;
363 1.13.4.2 yamt offtab->ot_blkno = (uint32_t)-1;
364 1.13.4.2 yamt
365 1.13.4.2 yamt /*
366 1.13.4.2 yamt * Initialize the offset table to all ones (except for the
367 1.13.4.2 yamt * fixed first offset) so that we can easily detect where we
368 1.13.4.2 yamt * were interrupted if we want to restart.
369 1.13.4.2 yamt */
370 1.13.4.2 yamt __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
371 1.13.4.2 yamt assert(offtab->ot_n_offsets > 0);
372 1.13.4.2 yamt
373 1.13.4.2 yamt for (i = 0; i < offtab->ot_window_size; i++)
374 1.13.4.2 yamt offtab->ot_window[i] = ~(uint64_t)0;
375 1.13.4.2 yamt
376 1.13.4.2 yamt const uint32_t n_windows =
377 1.13.4.2 yamt howmany(offtab->ot_n_offsets, offtab->ot_window_size);
378 1.13.4.2 yamt for (i = 1; i < n_windows; i++) {
379 1.13.4.2 yamt /* Change the start but reuse the all-ones buffer. */
380 1.13.4.2 yamt offtab->ot_window_start = (i * offtab->ot_window_size);
381 1.13.4.2 yamt offtab_write_window(offtab);
382 1.13.4.2 yamt }
383 1.13.4.2 yamt
384 1.13.4.2 yamt offtab->ot_window_start = 0;
385 1.13.4.2 yamt __CTASSERT(MAX_N_OFFSETS <=
386 1.13.4.2 yamt (MIN(OFF_MAX, UINT64_MAX) / sizeof(uint64_t)));
387 1.13.4.2 yamt const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
388 1.13.4.2 yamt sizeof(uint64_t));
389 1.13.4.2 yamt assert(offtab->ot_fdpos <=
390 1.13.4.2 yamt ((off_t)MIN(OFF_MAX, UINT64_MAX) - offtab_bytes));
391 1.13.4.2 yamt const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
392 1.13.4.2 yamt assert(first_offset <= (off_t)MIN(OFF_MAX, UINT64_MAX));
393 1.13.4.2 yamt offtab->ot_window[0] = htobe64((uint64_t)first_offset);
394 1.13.4.2 yamt offtab_write_window(offtab);
395 1.13.4.2 yamt
396 1.13.4.2 yamt if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1)
397 1.13.4.2 yamt err(1, "lseek to first offset failed");
398 1.13.4.2 yamt }
399 1.13.4.2 yamt
400 1.13.4.2 yamt /*
401 1.13.4.2 yamt * Guarantee that the disk reflects block offsets [0, n_offsets). If
402 1.13.4.2 yamt * OFFTAB_CHECKPOINT_SYNC is set in flags, will also fsync the entire
403 1.13.4.2 yamt * offset table. May fail; returns on success, aborts with err(3) on
404 1.13.4.2 yamt * failure. Fsync failure is considered success but is reported with a
405 1.13.4.2 yamt * warning.
406 1.13.4.2 yamt *
407 1.13.4.2 yamt * This routine does not write state in memory, and does not read state
408 1.13.4.2 yamt * that is not signal-safe. The only state read is offtab->ot_window,
409 1.13.4.2 yamt * offtab->ot_window_start, and quantities that are static for the
410 1.13.4.2 yamt * signal-interruptable existence of the offset table.
411 1.13.4.2 yamt */
412 1.13.4.2 yamt void
413 1.13.4.2 yamt offtab_checkpoint(struct offtab *offtab, uint32_t n_offsets, int flags)
414 1.13.4.2 yamt {
415 1.13.4.2 yamt
416 1.13.4.2 yamt assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
417 1.13.4.2 yamt assert(n_offsets <= offtab->ot_n_offsets);
418 1.13.4.2 yamt
419 1.13.4.2 yamt /*
420 1.13.4.2 yamt * Write the window unless we just did that and were
421 1.13.4.2 yamt * interrupted before we could move the window.
422 1.13.4.2 yamt */
423 1.13.4.2 yamt if (offtab->ot_window != NULL)
424 1.13.4.2 yamt offtab_maybe_write_window(offtab, 0, n_offsets);
425 1.13.4.2 yamt
426 1.13.4.2 yamt if (ISSET(flags, OFFTAB_CHECKPOINT_SYNC)) {
427 1.13.4.2 yamt __CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
428 1.13.4.2 yamt const off_t sync_bytes = ((off_t)n_offsets *
429 1.13.4.2 yamt (off_t)sizeof(uint64_t));
430 1.13.4.2 yamt assert(offtab->ot_fdpos <= (OFF_MAX - sync_bytes));
431 1.13.4.2 yamt if (fsync_range(offtab->ot_fd, (FFILESYNC | FDISKSYNC),
432 1.13.4.2 yamt offtab->ot_fdpos, (offtab->ot_fdpos + sync_bytes))
433 1.13.4.2 yamt == -1)
434 1.13.4.2 yamt warn_ss("fsync of offset table failed");
435 1.13.4.2 yamt }
436 1.13.4.2 yamt }
437 1.13.4.2 yamt
438 1.13.4.2 yamt /*
439 1.13.4.2 yamt * Do any I/O or bookkeeping necessary to set an offset for blkno. May
440 1.13.4.2 yamt * fail; returns on success, aborts with err(3) on failure.
441 1.13.4.2 yamt */
442 1.13.4.2 yamt void
443 1.13.4.2 yamt offtab_prepare_put(struct offtab *offtab, uint32_t blkno)
444 1.13.4.2 yamt {
445 1.13.4.2 yamt uint32_t i;
446 1.13.4.2 yamt
447 1.13.4.2 yamt assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
448 1.13.4.2 yamt assert(blkno < offtab->ot_n_offsets);
449 1.13.4.2 yamt
450 1.13.4.2 yamt /*
451 1.13.4.2 yamt * Assume, for convenience, that we write blocks in order.
452 1.13.4.2 yamt * Thus we need not do another read -- we can just clear the
453 1.13.4.2 yamt * window.
454 1.13.4.2 yamt */
455 1.13.4.2 yamt assert((offtab->ot_blkno == (uint32_t)-1) ||
456 1.13.4.2 yamt ((offtab->ot_blkno + 1) == blkno));
457 1.13.4.2 yamt
458 1.13.4.2 yamt /* If it's already in our window, we're good to go. */
459 1.13.4.2 yamt if ((offtab->ot_window_start <= blkno) &&
460 1.13.4.2 yamt (blkno < offtab_current_window_end(offtab)))
461 1.13.4.2 yamt goto win;
462 1.13.4.2 yamt
463 1.13.4.2 yamt /* Otherwise, write out the current window and choose a new one. */
464 1.13.4.2 yamt offtab_write_window(offtab);
465 1.13.4.2 yamt
466 1.13.4.2 yamt assert(offtab->ot_window_size <= blkno);
467 1.13.4.2 yamt assert(offtab->ot_window_start == (blkno - offtab->ot_window_size));
468 1.13.4.2 yamt assert((offtab->ot_window_start + offtab->ot_window_size) ==
469 1.13.4.2 yamt rounddown(blkno, offtab->ot_window_size));
470 1.13.4.2 yamt
471 1.13.4.2 yamt {
472 1.13.4.2 yamt uint64_t *window;
473 1.13.4.2 yamt sigset_t sigmask;
474 1.13.4.2 yamt
475 1.13.4.2 yamt /*
476 1.13.4.2 yamt * Mark the window as being updated so nobody tries to write it
477 1.13.4.2 yamt * (since we just wrote it) while we fill it with ones.
478 1.13.4.2 yamt */
479 1.13.4.2 yamt block_signals(&sigmask);
480 1.13.4.2 yamt window = offtab->ot_window;
481 1.13.4.2 yamt offtab->ot_window = NULL;
482 1.13.4.2 yamt restore_sigmask(&sigmask);
483 1.13.4.2 yamt
484 1.13.4.2 yamt /* Fill the window with ones. */
485 1.13.4.2 yamt for (i = 0; i < offtab_current_window_size(offtab); i++)
486 1.13.4.2 yamt window[i] = ~(uint64_t)0;
487 1.13.4.2 yamt
488 1.13.4.2 yamt /* Restore the window as ready again. */
489 1.13.4.2 yamt block_signals(&sigmask);
490 1.13.4.2 yamt offtab->ot_window = window;
491 1.13.4.2 yamt offtab->ot_window_start = rounddown(blkno, offtab->ot_window_size);
492 1.13.4.2 yamt restore_sigmask(&sigmask);
493 1.13.4.2 yamt }
494 1.13.4.2 yamt
495 1.13.4.2 yamt win: assert(offtab->ot_window_start <= blkno);
496 1.13.4.2 yamt assert(blkno < offtab_current_window_end(offtab));
497 1.13.4.2 yamt
498 1.13.4.2 yamt offtab->ot_blkno = blkno;
499 1.13.4.2 yamt }
500 1.13.4.2 yamt
501 1.13.4.2 yamt /*
502 1.13.4.2 yamt * Actually set the offset for blkno.
503 1.13.4.2 yamt */
504 1.13.4.2 yamt void
505 1.13.4.2 yamt offtab_put(struct offtab *offtab, uint32_t blkno, uint64_t offset)
506 1.13.4.2 yamt {
507 1.13.4.2 yamt
508 1.13.4.2 yamt assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
509 1.13.4.2 yamt assert(blkno == offtab->ot_blkno);
510 1.13.4.2 yamt assert(offtab->ot_window_start <= blkno);
511 1.13.4.2 yamt assert(blkno < offtab_current_window_end(offtab));
512
513 offtab->ot_window[blkno - offtab->ot_window_start] = htobe64(offset);
514 }
515