offtab.c revision 1.1 1 1.1 riastrad /* $NetBSD: offtab.c,v 1.1 2014/01/22 06:14:46 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2014 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * This code is derived from software contributed to The NetBSD Foundation
8 1.1 riastrad * by Taylor R. Campbell.
9 1.1 riastrad *
10 1.1 riastrad * Redistribution and use in source and binary forms, with or without
11 1.1 riastrad * modification, are permitted provided that the following conditions
12 1.1 riastrad * are met:
13 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
14 1.1 riastrad * notice, this list of conditions and the following disclaimer.
15 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
17 1.1 riastrad * documentation and/or other materials provided with the distribution.
18 1.1 riastrad *
19 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
30 1.1 riastrad */
31 1.1 riastrad
32 1.1 riastrad #include <sys/cdefs.h>
33 1.1 riastrad __RCSID("$NetBSD");
34 1.1 riastrad
35 1.1 riastrad #include <sys/types.h>
36 1.1 riastrad #include <sys/endian.h>
37 1.1 riastrad
38 1.1 riastrad #include <assert.h>
39 1.1 riastrad #include <err.h>
40 1.1 riastrad #include <errno.h>
41 1.1 riastrad #include <inttypes.h>
42 1.1 riastrad #include <limits.h>
43 1.1 riastrad #include <stdbool.h>
44 1.1 riastrad #include <stdlib.h>
45 1.1 riastrad #include <unistd.h>
46 1.1 riastrad
47 1.1 riastrad #include "common.h"
48 1.1 riastrad #include "utils.h"
49 1.1 riastrad
50 1.1 riastrad #include "offtab.h"
51 1.1 riastrad
52 1.1 riastrad static void __printflike(1,2)
53 1.1 riastrad offtab_bug(const char *fmt, ...)
54 1.1 riastrad {
55 1.1 riastrad
56 1.1 riastrad errx(1, "bug in offtab, please report");
57 1.1 riastrad }
58 1.1 riastrad
59 1.1 riastrad static void __printflike(1,2)
60 1.1 riastrad offtab_bugx(const char *fmt, ...)
61 1.1 riastrad {
62 1.1 riastrad
63 1.1 riastrad errx(1, "bug in offtab, please report");
64 1.1 riastrad }
65 1.1 riastrad
66 1.1 riastrad /*
68 1.1 riastrad * Initialize an offtab to support the specified number of offsets read
69 1.1 riastrad * to or written from fd at byte position fdpos.
70 1.1 riastrad */
71 1.1 riastrad void
72 1.1 riastrad offtab_init(struct offtab *offtab, uint32_t n_offsets, int fd, off_t fdpos)
73 1.1 riastrad {
74 1.1 riastrad
75 1.1 riastrad assert(offtab != NULL);
76 1.1 riastrad assert(0 < n_offsets);
77 1.1 riastrad assert(0 <= fd);
78 1.1 riastrad assert(0 <= fdpos);
79 1.1 riastrad
80 1.1 riastrad offtab->ot_n_offsets = n_offsets;
81 1.1 riastrad __CTASSERT(MAX_N_OFFSETS <= (SIZE_MAX / sizeof(uint64_t)));
82 1.1 riastrad offtab->ot_offsets = malloc(n_offsets * sizeof(uint64_t));
83 1.1 riastrad if (offtab->ot_offsets == NULL)
84 1.1 riastrad err(1, "malloc offset table");
85 1.1 riastrad offtab->ot_blkno = (uint32_t)-1;
86 1.1 riastrad offtab->ot_fd = fd;
87 1.1 riastrad offtab->ot_fdpos = fdpos;
88 1.1 riastrad offtab->ot_report = &offtab_bug;
89 1.1 riastrad offtab->ot_reportx = &offtab_bugx;
90 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_NONE;
91 1.1 riastrad }
92 1.1 riastrad
93 1.1 riastrad /*
94 1.1 riastrad * Destroy an offtab.
95 1.1 riastrad */
96 1.1 riastrad void
97 1.1 riastrad offtab_destroy(struct offtab *offtab)
98 1.1 riastrad {
99 1.1 riastrad
100 1.1 riastrad free(offtab->ot_offsets);
101 1.1 riastrad }
102 1.1 riastrad
103 1.1 riastrad /*
104 1.1 riastrad * For an offtab that has been used to read data from disk, convert it
105 1.1 riastrad * to an offtab that can be used to write subsequent data to disk.
106 1.1 riastrad */
107 1.1 riastrad void
108 1.1 riastrad offtab_transmogrify_read_to_write(struct offtab *offtab)
109 1.1 riastrad {
110 1.1 riastrad
111 1.1 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
112 1.1 riastrad assert(offtab->ot_offsets[0] == htobe64(offtab->ot_fdpos +
113 1.1 riastrad (offtab->ot_n_offsets * sizeof(uint64_t))));
114 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE;
115 1.1 riastrad }
116 1.1 riastrad
117 1.1 riastrad /*
119 1.1 riastrad * Reset an offtab for reading an offset table from the beginning.
120 1.1 riastrad * Initializes in-memory state and may read data from offtab->ot_fd,
121 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. Failure
122 1.1 riastrad * will be reported by the report/reportx routines, which are called
123 1.1 riastrad * like warn/warnx. May fail; returns true on success, false on
124 1.1 riastrad * failure.
125 1.1 riastrad */
126 1.1 riastrad bool
127 1.1 riastrad offtab_reset_read(struct offtab *offtab,
128 1.1 riastrad void (*report)(const char *, ...) __printflike(1,2),
129 1.1 riastrad void (*reportx)(const char *, ...) __printflike(1,2))
130 1.1 riastrad {
131 1.1 riastrad
132 1.1 riastrad assert((lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
133 1.1 riastrad ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
134 1.1 riastrad
135 1.1 riastrad offtab->ot_report = report;
136 1.1 riastrad offtab->ot_reportx = reportx;
137 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_READ;
138 1.1 riastrad
139 1.1 riastrad const ssize_t n_read = read_block(offtab->ot_fd, offtab->ot_offsets,
140 1.1 riastrad (offtab->ot_n_offsets * sizeof(uint64_t)));
141 1.1 riastrad if (n_read == -1) {
142 1.1 riastrad (*offtab->ot_report)("read offset table");
143 1.1 riastrad return false;
144 1.1 riastrad }
145 1.1 riastrad assert(n_read >= 0);
146 1.1 riastrad if ((size_t)n_read != (offtab->ot_n_offsets * sizeof(uint64_t))) {
147 1.1 riastrad (*offtab->ot_reportx)("partial read of offset table"
148 1.1 riastrad ": %zu != %zu",
149 1.1 riastrad (size_t)n_read,
150 1.1 riastrad (size_t)(offtab->ot_n_offsets * sizeof(uint64_t)));
151 1.1 riastrad return false;
152 1.1 riastrad }
153 1.1 riastrad
154 1.1 riastrad return true;
155 1.1 riastrad }
156 1.1 riastrad
157 1.1 riastrad /*
158 1.1 riastrad * Do any I/O or bookkeeping necessary to fetch the offset for blkno in
159 1.1 riastrad * preparation for a call to offtab_get. May fail; returns true on
160 1.1 riastrad * success, false on failure.
161 1.1 riastrad */
162 1.1 riastrad bool
163 1.1 riastrad offtab_prepare_get(struct offtab *offtab, uint32_t blkno)
164 1.1 riastrad {
165 1.1 riastrad
166 1.1 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
167 1.1 riastrad assert(blkno < offtab->ot_n_offsets);
168 1.1 riastrad offtab->ot_blkno = blkno;
169 1.1 riastrad return true;
170 1.1 riastrad }
171 1.1 riastrad
172 1.1 riastrad /*
173 1.1 riastrad * Return the offset for blkno. Caller must have called
174 1.1 riastrad * offtab_prepare_get beforehand.
175 1.1 riastrad */
176 1.1 riastrad uint64_t
177 1.1 riastrad offtab_get(struct offtab *offtab, uint32_t blkno)
178 1.1 riastrad {
179 1.1 riastrad
180 1.1 riastrad assert(offtab->ot_mode == OFFTAB_MODE_READ);
181 1.1 riastrad assert(blkno < offtab->ot_n_offsets);
182 1.1 riastrad assert(blkno == offtab->ot_blkno);
183 1.1 riastrad return be64toh(offtab->ot_offsets[blkno]);
184 1.1 riastrad }
185 1.1 riastrad
186 1.1 riastrad /*
188 1.1 riastrad * Reset offtab for writing a fresh offset table. Initializes
189 1.1 riastrad * in-memory state and writes an empty offset table to offtab->ot_fd,
190 1.1 riastrad * which must currently be at byte position offtab->ot_fdpos. May
191 1.1 riastrad * fail; returns on success, aborts with err(3) on failure.
192 1.1 riastrad */
193 1.1 riastrad void
194 1.1 riastrad offtab_reset_write(struct offtab *offtab)
195 1.1 riastrad {
196 1.1 riastrad uint32_t i;
197 1.1 riastrad
198 1.1 riastrad assert(lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos);
199 1.1 riastrad
200 1.1 riastrad offtab->ot_mode = OFFTAB_MODE_WRITE;
201 1.1 riastrad
202 1.1 riastrad /*
203 1.1 riastrad * Initialize the offset table to all ones (except for the
204 1.1 riastrad * fixed first offset) so that we can easily detect where we
205 1.1 riastrad * were interrupted if we want to restart.
206 1.1 riastrad */
207 1.1 riastrad __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
208 1.1 riastrad assert(offtab->ot_n_offsets > 0);
209 1.1 riastrad offtab->ot_offsets[0] = htobe64(offtab->ot_fdpos +
210 1.1 riastrad (offtab->ot_n_offsets * sizeof(uint64_t)));
211 1.1 riastrad for (i = 1; i < offtab->ot_n_offsets; i++)
212 1.1 riastrad offtab->ot_offsets[i] = ~(uint64_t)0;
213 1.1 riastrad
214 1.1 riastrad /* Write the initial (empty) offset table. */
215 1.1 riastrad const ssize_t n_written = write(offtab->ot_fd, offtab->ot_offsets,
216 1.1 riastrad (offtab->ot_n_offsets * sizeof(uint64_t)));
217 1.1 riastrad if (n_written == -1)
218 1.1 riastrad err(1, "write initial offset table");
219 1.1 riastrad assert(n_written >= 0);
220 1.1 riastrad if ((size_t)n_written != (offtab->ot_n_offsets * sizeof(uint64_t)))
221 1.1 riastrad errx(1, "partial write of initial offset bytes: %zu <= %zu",
222 1.1 riastrad (size_t)n_written,
223 1.1 riastrad (size_t)(offtab->ot_n_offsets * sizeof(uint64_t)));
224 1.1 riastrad }
225 1.1 riastrad
226 1.1 riastrad /*
227 1.1 riastrad * Guarantee that the disk reflects block offsets [0, n_offsets). If
228 1.1 riastrad * OFFTAB_CHECKPOINT_SYNC is set in flags, will also fsync the entire
229 1.1 riastrad * offset table. May fail; returns on success, aborts with err(3) on
230 1.1 riastrad * failure. Fsync failure is considered success but is reported with a
231 1.1 riastrad * warning.
232 1.1 riastrad *
233 1.1 riastrad * This routine does not write state in memory, and does not read state
234 1.1 riastrad * that is not signal-safe. The only state read is static for the
235 1.1 riastrad * existence of the offset table.
236 1.1 riastrad */
237 1.1 riastrad void
238 1.1 riastrad offtab_checkpoint(struct offtab *offtab, uint32_t n_offsets, int flags)
239 1.1 riastrad {
240 1.1 riastrad
241 1.1 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
242 1.1 riastrad assert(n_offsets <= offtab->ot_n_offsets);
243 1.1 riastrad
244 1.1 riastrad const ssize_t n_written = pwrite(offtab->ot_fd, offtab->ot_offsets,
245 1.1 riastrad (n_offsets * sizeof(uint64_t)), offtab->ot_fdpos);
246 1.1 riastrad if (n_written == -1)
247 1.1 riastrad err_ss(1, "write partial offset table");
248 1.1 riastrad assert(n_written >= 0);
249 1.1 riastrad if ((size_t)n_written != (n_offsets * sizeof(uint64_t)))
250 1.1 riastrad errx_ss(1, "partial write of partial offset table: %zu != %zu",
251 1.1 riastrad (size_t)n_written,
252 1.1 riastrad (size_t)(n_offsets * sizeof(uint64_t)));
253 1.1 riastrad
254 1.1 riastrad if (ISSET(flags, OFFTAB_CHECKPOINT_SYNC)) {
255 1.1 riastrad if (fsync_range(offtab->ot_fd, (FFILESYNC | FDISKSYNC),
256 1.1 riastrad offtab->ot_fdpos,
257 1.1 riastrad (offtab->ot_fdpos + (n_offsets * (sizeof(uint64_t)))))
258 1.1 riastrad == -1)
259 1.1 riastrad warn_ss("fsync of offset table failed");
260 1.1 riastrad }
261 1.1 riastrad }
262 1.1 riastrad
263 1.1 riastrad /*
264 1.1 riastrad * Do any I/O or bookkeeping necessary to set an offset for blkno. May
265 1.1 riastrad * fail; returns on success, aborts with err(3) on failure.
266 1.1 riastrad */
267 1.1 riastrad void
268 1.1 riastrad offtab_prepare_put(struct offtab *offtab, uint32_t blkno)
269 1.1 riastrad {
270 1.1 riastrad
271 1.1 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
272 1.1 riastrad assert(blkno < offtab->ot_n_offsets);
273 1.1 riastrad offtab->ot_blkno = blkno;
274 1.1 riastrad }
275 1.1 riastrad
276 1.1 riastrad /*
277 1.1 riastrad * Actually set the offset for blkno.
278 1.1 riastrad */
279 1.1 riastrad void
280 1.1 riastrad offtab_put(struct offtab *offtab, uint32_t blkno, uint64_t offset)
281 1.1 riastrad {
282 1.1 riastrad
283 1.1 riastrad assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
284 1.1 riastrad assert(blkno < offtab->ot_n_offsets);
285 assert(blkno == offtab->ot_blkno);
286 offtab->ot_offsets[blkno] = htobe64(offset);
287 }
288