efs_subr.c revision 1.3.2.3 1 1.3.2.3 ad /* $NetBSD: efs_subr.c,v 1.3.2.3 2007/08/20 21:26:04 ad Exp $ */
2 1.3.2.2 ad
3 1.3.2.2 ad /*
4 1.3.2.2 ad * Copyright (c) 2006 Stephen M. Rumble <rumble (at) ephemeral.org>
5 1.3.2.2 ad *
6 1.3.2.2 ad * Permission to use, copy, modify, and distribute this software for any
7 1.3.2.2 ad * purpose with or without fee is hereby granted, provided that the above
8 1.3.2.2 ad * copyright notice and this permission notice appear in all copies.
9 1.3.2.2 ad *
10 1.3.2.2 ad * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 1.3.2.2 ad * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 1.3.2.2 ad * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 1.3.2.2 ad * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 1.3.2.2 ad * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 1.3.2.2 ad * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 1.3.2.2 ad * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 1.3.2.2 ad */
18 1.3.2.2 ad
19 1.3.2.2 ad #include <sys/cdefs.h>
20 1.3.2.3 ad __KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.3.2.3 2007/08/20 21:26:04 ad Exp $");
21 1.3.2.2 ad
22 1.3.2.2 ad #include <sys/param.h>
23 1.3.2.2 ad #include <sys/kauth.h>
24 1.3.2.2 ad #include <sys/lwp.h>
25 1.3.2.2 ad #include <sys/proc.h>
26 1.3.2.2 ad #include <sys/buf.h>
27 1.3.2.2 ad #include <sys/mount.h>
28 1.3.2.2 ad #include <sys/vnode.h>
29 1.3.2.2 ad #include <sys/namei.h>
30 1.3.2.2 ad #include <sys/stat.h>
31 1.3.2.2 ad #include <sys/malloc.h>
32 1.3.2.2 ad
33 1.3.2.2 ad #include <miscfs/genfs/genfs_node.h>
34 1.3.2.2 ad
35 1.3.2.2 ad #include <fs/efs/efs.h>
36 1.3.2.2 ad #include <fs/efs/efs_sb.h>
37 1.3.2.2 ad #include <fs/efs/efs_dir.h>
38 1.3.2.2 ad #include <fs/efs/efs_genfs.h>
39 1.3.2.2 ad #include <fs/efs/efs_mount.h>
40 1.3.2.2 ad #include <fs/efs/efs_extent.h>
41 1.3.2.2 ad #include <fs/efs/efs_dinode.h>
42 1.3.2.2 ad #include <fs/efs/efs_inode.h>
43 1.3.2.2 ad #include <fs/efs/efs_subr.h>
44 1.3.2.2 ad
45 1.3.2.2 ad struct pool efs_inode_pool;
46 1.3.2.2 ad
47 1.3.2.2 ad /*
48 1.3.2.2 ad * Calculate a checksum for the provided superblock in __host byte order__.
49 1.3.2.2 ad *
50 1.3.2.2 ad * At some point SGI changed the checksum algorithm slightly, which can be
51 1.3.2.2 ad * enabled with the 'new' flag.
52 1.3.2.2 ad *
53 1.3.2.2 ad * Presumably this change occured on or before 24 Oct 1988 (around IRIX 3.1),
54 1.3.2.2 ad * so we're pretty unlikely to ever actually see an old checksum. Further, it
55 1.3.2.2 ad * means that EFS_NEWMAGIC filesystems (IRIX >= 3.3) must match the new
56 1.3.2.2 ad * checksum whereas EFS_MAGIC filesystems could potentially use either
57 1.3.2.2 ad * algorithm.
58 1.3.2.2 ad *
59 1.3.2.2 ad * See comp.sys.sgi <1991Aug9.050838.16876 (at) odin.corp.sgi.com>
60 1.3.2.2 ad */
61 1.3.2.2 ad int32_t
62 1.3.2.2 ad efs_sb_checksum(struct efs_sb *esb, int new)
63 1.3.2.2 ad {
64 1.3.2.2 ad int i;
65 1.3.2.2 ad int32_t cksum;
66 1.3.2.3 ad uint16_t *sbarray = (uint16_t *)esb;
67 1.3.2.2 ad
68 1.3.2.2 ad KASSERT((EFS_SB_CHECKSUM_SIZE % 2) == 0);
69 1.3.2.2 ad
70 1.3.2.2 ad for (i = cksum = 0; i < (EFS_SB_CHECKSUM_SIZE / 2); i++) {
71 1.3.2.2 ad cksum ^= be16toh(sbarray[i]);
72 1.3.2.2 ad cksum = (cksum << 1) | (new && cksum < 0);
73 1.3.2.2 ad }
74 1.3.2.2 ad
75 1.3.2.2 ad return (cksum);
76 1.3.2.2 ad }
77 1.3.2.2 ad
78 1.3.2.2 ad /*
79 1.3.2.2 ad * Determine if the superblock is valid.
80 1.3.2.2 ad *
81 1.3.2.2 ad * Returns 0 if valid, else invalid. If invalid, 'why' is set to an
82 1.3.2.2 ad * explanation.
83 1.3.2.2 ad */
84 1.3.2.2 ad int
85 1.3.2.2 ad efs_sb_validate(struct efs_sb *esb, const char **why)
86 1.3.2.2 ad {
87 1.3.2.2 ad uint32_t ocksum, ncksum;
88 1.3.2.2 ad
89 1.3.2.2 ad *why = NULL;
90 1.3.2.2 ad
91 1.3.2.2 ad if (be32toh(esb->sb_magic) != EFS_SB_MAGIC &&
92 1.3.2.3 ad be32toh(esb->sb_magic) != EFS_SB_NEWMAGIC) {
93 1.3.2.2 ad *why = "sb_magic invalid";
94 1.3.2.2 ad return (1);
95 1.3.2.2 ad }
96 1.3.2.2 ad
97 1.3.2.2 ad ocksum = htobe32(efs_sb_checksum(esb, 0));
98 1.3.2.2 ad ncksum = htobe32(efs_sb_checksum(esb, 1));
99 1.3.2.2 ad if (esb->sb_checksum != ocksum && esb->sb_checksum != ncksum) {
100 1.3.2.2 ad *why = "sb_checksum invalid";
101 1.3.2.2 ad return (1);
102 1.3.2.2 ad }
103 1.3.2.2 ad
104 1.3.2.2 ad if (be32toh(esb->sb_size) > EFS_SIZE_MAX) {
105 1.3.2.2 ad *why = "sb_size > EFS_SIZE_MAX";
106 1.3.2.2 ad return (1);
107 1.3.2.2 ad }
108 1.3.2.2 ad
109 1.3.2.2 ad if (be32toh(esb->sb_firstcg) <= EFS_BB_BITMAP) {
110 1.3.2.2 ad *why = "sb_firstcg <= EFS_BB_BITMAP";
111 1.3.2.2 ad return (1);
112 1.3.2.2 ad }
113 1.3.2.2 ad
114 1.3.2.2 ad /* XXX - add better sb consistency checks here */
115 1.3.2.2 ad if (esb->sb_cgfsize == 0 ||
116 1.3.2.2 ad esb->sb_cgisize == 0 ||
117 1.3.2.2 ad esb->sb_ncg == 0 ||
118 1.3.2.2 ad esb->sb_bmsize == 0) {
119 1.3.2.2 ad *why = "something bad happened";
120 1.3.2.2 ad return (1);
121 1.3.2.2 ad }
122 1.3.2.2 ad
123 1.3.2.2 ad return (0);
124 1.3.2.2 ad }
125 1.3.2.2 ad
126 1.3.2.2 ad /*
127 1.3.2.2 ad * Determine the basic block offset and inode index within that block, given
128 1.3.2.2 ad * the inode 'ino' and filesystem parameters _in host byte order_. The inode
129 1.3.2.2 ad * will live at byte address 'bboff' * EFS_BB_SIZE + 'index' * EFS_DINODE_SIZE.
130 1.3.2.2 ad */
131 1.3.2.2 ad void
132 1.3.2.2 ad efs_locate_inode(ino_t ino, struct efs_sb *sbp, uint32_t *bboff, int *index)
133 1.3.2.2 ad {
134 1.3.2.2 ad uint32_t cgfsize, firstcg;
135 1.3.2.2 ad uint16_t cgisize;
136 1.3.2.2 ad
137 1.3.2.2 ad cgisize = be16toh(sbp->sb_cgisize);
138 1.3.2.2 ad cgfsize = be32toh(sbp->sb_cgfsize);
139 1.3.2.2 ad firstcg = be32toh(sbp->sb_firstcg),
140 1.3.2.2 ad
141 1.3.2.2 ad *bboff = firstcg + ((ino / (cgisize * EFS_DINODES_PER_BB)) * cgfsize) +
142 1.3.2.2 ad ((ino % (cgisize * EFS_DINODES_PER_BB)) / EFS_DINODES_PER_BB);
143 1.3.2.2 ad *index = ino & (EFS_DINODES_PER_BB - 1);
144 1.3.2.2 ad }
145 1.3.2.2 ad
146 1.3.2.2 ad /*
147 1.3.2.2 ad * Read in an inode from disk.
148 1.3.2.2 ad *
149 1.3.2.2 ad * We actually take in four inodes at a time. Hopefully these will stick
150 1.3.2.2 ad * around in the buffer cache and get used without going to disk.
151 1.3.2.2 ad *
152 1.3.2.2 ad * Returns 0 on success.
153 1.3.2.2 ad */
154 1.3.2.2 ad int
155 1.3.2.2 ad efs_read_inode(struct efs_mount *emp, ino_t ino, struct lwp *l,
156 1.3.2.2 ad struct efs_dinode *di)
157 1.3.2.2 ad {
158 1.3.2.2 ad struct efs_sb *sbp;
159 1.3.2.2 ad struct buf *bp;
160 1.3.2.2 ad int index, err;
161 1.3.2.2 ad uint32_t bboff;
162 1.3.2.2 ad
163 1.3.2.2 ad sbp = &emp->em_sb;
164 1.3.2.2 ad efs_locate_inode(ino, sbp, &bboff, &index);
165 1.3.2.2 ad
166 1.3.2.2 ad err = efs_bread(emp, bboff, l, &bp);
167 1.3.2.2 ad if (err) {
168 1.3.2.2 ad brelse(bp, 0);
169 1.3.2.2 ad return (err);
170 1.3.2.2 ad }
171 1.3.2.2 ad memcpy(di, ((struct efs_dinode *)bp->b_data) + index, sizeof(*di));
172 1.3.2.2 ad brelse(bp, 0);
173 1.3.2.2 ad
174 1.3.2.2 ad return (0);
175 1.3.2.2 ad }
176 1.3.2.2 ad
177 1.3.2.2 ad /*
178 1.3.2.2 ad * Perform a read from our device handling the potential DEV_BSIZE
179 1.3.2.2 ad * messiness (although as of 19.2.2006, all ports appear to use 512) as
180 1.3.2.2 ad * we as EFS block sizing.
181 1.3.2.2 ad *
182 1.3.2.2 ad * bboff: basic block offset
183 1.3.2.2 ad *
184 1.3.2.2 ad * Returns 0 on success.
185 1.3.2.2 ad */
186 1.3.2.2 ad int
187 1.3.2.2 ad efs_bread(struct efs_mount *emp, uint32_t bboff, struct lwp *l, struct buf **bp)
188 1.3.2.2 ad {
189 1.3.2.2 ad KASSERT(bboff < EFS_SIZE_MAX);
190 1.3.2.2 ad
191 1.3.2.2 ad return (bread(emp->em_devvp, (daddr_t)bboff * (EFS_BB_SIZE / DEV_BSIZE),
192 1.3.2.2 ad EFS_BB_SIZE, (l == NULL) ? NOCRED : l->l_cred, bp));
193 1.3.2.2 ad }
194 1.3.2.2 ad
195 1.3.2.2 ad /*
196 1.3.2.2 ad * Synchronise the in-core, host ordered and typed inode fields with their
197 1.3.2.2 ad * corresponding on-disk, EFS ordered and typed copies.
198 1.3.2.2 ad *
199 1.3.2.2 ad * This is the inverse of efs_dinode_sync_inode(), and should be called when
200 1.3.2.2 ad * an inode is loaded from disk.
201 1.3.2.2 ad */
202 1.3.2.2 ad void
203 1.3.2.2 ad efs_sync_dinode_to_inode(struct efs_inode *ei)
204 1.3.2.2 ad {
205 1.3.2.2 ad
206 1.3.2.2 ad ei->ei_mode = be16toh(ei->ei_di.di_mode); /*same as nbsd*/
207 1.3.2.2 ad ei->ei_nlink = be16toh(ei->ei_di.di_nlink);
208 1.3.2.2 ad ei->ei_uid = be16toh(ei->ei_di.di_uid);
209 1.3.2.2 ad ei->ei_gid = be16toh(ei->ei_di.di_gid);
210 1.3.2.2 ad ei->ei_size = be32toh(ei->ei_di.di_size);
211 1.3.2.2 ad ei->ei_atime = be32toh(ei->ei_di.di_atime);
212 1.3.2.2 ad ei->ei_mtime = be32toh(ei->ei_di.di_mtime);
213 1.3.2.2 ad ei->ei_ctime = be32toh(ei->ei_di.di_ctime);
214 1.3.2.2 ad ei->ei_gen = be32toh(ei->ei_di.di_gen);
215 1.3.2.2 ad ei->ei_numextents = be16toh(ei->ei_di.di_numextents);
216 1.3.2.2 ad ei->ei_version = ei->ei_di.di_version;
217 1.3.2.2 ad }
218 1.3.2.2 ad
219 1.3.2.2 ad /*
220 1.3.2.2 ad * Synchronise the on-disk, EFS ordered and typed inode fields with their
221 1.3.2.2 ad * corresponding in-core, host ordered and typed copies.
222 1.3.2.2 ad *
223 1.3.2.2 ad * This is the inverse of efs_inode_sync_dinode(), and should be called before
224 1.3.2.2 ad * an inode is flushed to disk.
225 1.3.2.2 ad */
226 1.3.2.2 ad void
227 1.3.2.2 ad efs_sync_inode_to_dinode(struct efs_inode *ei)
228 1.3.2.2 ad {
229 1.3.2.2 ad
230 1.3.2.2 ad panic("readonly -- no need to call me");
231 1.3.2.2 ad }
232 1.3.2.2 ad
233 1.3.2.2 ad #ifdef DIAGNOSTIC
234 1.3.2.2 ad /*
235 1.3.2.2 ad * Ensure that the in-core inode's host cached fields match its on-disk copy.
236 1.3.2.2 ad *
237 1.3.2.2 ad * Returns 0 if they match.
238 1.3.2.2 ad */
239 1.3.2.2 ad static int
240 1.3.2.2 ad efs_is_inode_synced(struct efs_inode *ei)
241 1.3.2.2 ad {
242 1.3.2.2 ad int s;
243 1.3.2.2 ad
244 1.3.2.2 ad s = 0;
245 1.3.2.2 ad /* XXX -- see above remarks about assumption */
246 1.3.2.2 ad s += (ei->ei_mode != be16toh(ei->ei_di.di_mode));
247 1.3.2.2 ad s += (ei->ei_nlink != be16toh(ei->ei_di.di_nlink));
248 1.3.2.2 ad s += (ei->ei_uid != be16toh(ei->ei_di.di_uid));
249 1.3.2.2 ad s += (ei->ei_gid != be16toh(ei->ei_di.di_gid));
250 1.3.2.2 ad s += (ei->ei_size != be32toh(ei->ei_di.di_size));
251 1.3.2.2 ad s += (ei->ei_atime != be32toh(ei->ei_di.di_atime));
252 1.3.2.2 ad s += (ei->ei_mtime != be32toh(ei->ei_di.di_mtime));
253 1.3.2.2 ad s += (ei->ei_ctime != be32toh(ei->ei_di.di_ctime));
254 1.3.2.2 ad s += (ei->ei_gen != be32toh(ei->ei_di.di_gen));
255 1.3.2.2 ad s += (ei->ei_numextents != be16toh(ei->ei_di.di_numextents));
256 1.3.2.2 ad s += (ei->ei_version != ei->ei_di.di_version);
257 1.3.2.2 ad
258 1.3.2.2 ad return (s);
259 1.3.2.2 ad }
260 1.3.2.2 ad #endif
261 1.3.2.2 ad
262 1.3.2.2 ad /*
263 1.3.2.2 ad * Given an efs_dirblk structure and a componentname to search for, return the
264 1.3.2.2 ad * corresponding inode if it is found.
265 1.3.2.2 ad *
266 1.3.2.2 ad * Returns 0 on success.
267 1.3.2.2 ad */
268 1.3.2.2 ad static int
269 1.3.2.2 ad efs_dirblk_lookup(struct efs_dirblk *dir, struct componentname *cn,
270 1.3.2.2 ad ino_t *inode)
271 1.3.2.2 ad {
272 1.3.2.2 ad struct efs_dirent *de;
273 1.3.2.2 ad int i, slot, offset;
274 1.3.2.2 ad
275 1.3.2.2 ad KASSERT(cn->cn_namelen <= EFS_DIRENT_NAMELEN_MAX);
276 1.3.2.2 ad
277 1.3.2.2 ad slot = offset = 0;
278 1.3.2.2 ad
279 1.3.2.2 ad for (i = 0; i < dir->db_slots; i++) {
280 1.3.2.2 ad offset = EFS_DIRENT_OFF_EXPND(dir->db_space[i]);
281 1.3.2.2 ad
282 1.3.2.2 ad if (offset == EFS_DIRBLK_SLOT_FREE)
283 1.3.2.2 ad continue;
284 1.3.2.2 ad
285 1.3.2.2 ad de = (struct efs_dirent *)((char *)dir + offset);
286 1.3.2.2 ad if (de->de_namelen == cn->cn_namelen &&
287 1.3.2.2 ad (strncmp(cn->cn_nameptr, de->de_name, cn->cn_namelen) == 0)){
288 1.3.2.2 ad slot = i;
289 1.3.2.2 ad break;
290 1.3.2.2 ad }
291 1.3.2.2 ad }
292 1.3.2.2 ad if (i == dir->db_slots)
293 1.3.2.2 ad return (ENOENT);
294 1.3.2.2 ad
295 1.3.2.2 ad KASSERT(slot < offset && offset < EFS_DIRBLK_SPACE_SIZE);
296 1.3.2.2 ad de = (struct efs_dirent *)((char *)dir + offset);
297 1.3.2.2 ad *inode = be32toh(de->de_inumber);
298 1.3.2.2 ad
299 1.3.2.2 ad return (0);
300 1.3.2.2 ad }
301 1.3.2.2 ad
302 1.3.2.2 ad /*
303 1.3.2.2 ad * Given an extent descriptor that represents a directory, look up
304 1.3.2.2 ad * componentname within its efs_dirblk's. If it is found, return the
305 1.3.2.2 ad * corresponding inode in 'ino'.
306 1.3.2.2 ad *
307 1.3.2.2 ad * Returns 0 on success.
308 1.3.2.2 ad */
309 1.3.2.2 ad static int
310 1.3.2.2 ad efs_extent_lookup(struct efs_mount *emp, struct efs_extent *ex,
311 1.3.2.2 ad struct componentname *cn, ino_t *ino)
312 1.3.2.2 ad {
313 1.3.2.2 ad struct efs_dirblk *db;
314 1.3.2.2 ad struct buf *bp;
315 1.3.2.2 ad int i, err;
316 1.3.2.2 ad
317 1.3.2.2 ad /*
318 1.3.2.2 ad * Read in each of the dirblks until we find our entry.
319 1.3.2.2 ad * If we don't, return ENOENT.
320 1.3.2.2 ad */
321 1.3.2.2 ad for (i = 0; i < ex->ex_length; i++) {
322 1.3.2.2 ad err = efs_bread(emp, ex->ex_bn + i, NULL, &bp);
323 1.3.2.2 ad if (err) {
324 1.3.2.2 ad printf("efs: warning: invalid extent descriptor\n");
325 1.3.2.2 ad brelse(bp, 0);
326 1.3.2.2 ad return (err);
327 1.3.2.2 ad }
328 1.3.2.2 ad
329 1.3.2.2 ad db = (struct efs_dirblk *)bp->b_data;
330 1.3.2.2 ad if (efs_dirblk_lookup(db, cn, ino) == 0) {
331 1.3.2.2 ad brelse(bp, 0);
332 1.3.2.2 ad return (0);
333 1.3.2.2 ad }
334 1.3.2.2 ad brelse(bp, 0);
335 1.3.2.2 ad }
336 1.3.2.2 ad
337 1.3.2.2 ad return (ENOENT);
338 1.3.2.2 ad }
339 1.3.2.2 ad
340 1.3.2.2 ad /*
341 1.3.2.2 ad * Given the provided in-core inode, look up the pathname requested. If
342 1.3.2.2 ad * we find it, 'ino' reflects its corresponding on-disk inode number.
343 1.3.2.2 ad *
344 1.3.2.2 ad * Returns 0 on success.
345 1.3.2.2 ad */
346 1.3.2.2 ad int
347 1.3.2.2 ad efs_inode_lookup(struct efs_mount *emp, struct efs_inode *ei,
348 1.3.2.2 ad struct componentname *cn, ino_t *ino)
349 1.3.2.2 ad {
350 1.3.2.2 ad struct efs_extent ex;
351 1.3.2.2 ad struct efs_extent_iterator exi;
352 1.3.2.2 ad int ret;
353 1.3.2.2 ad
354 1.3.2.2 ad KASSERT(VOP_ISLOCKED(ei->ei_vp));
355 1.3.2.2 ad KASSERT(efs_is_inode_synced(ei) == 0);
356 1.3.2.2 ad KASSERT((ei->ei_mode & S_IFMT) == S_IFDIR);
357 1.3.2.2 ad
358 1.3.2.2 ad efs_extent_iterator_init(&exi, ei, 0);
359 1.3.2.2 ad while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) {
360 1.3.2.2 ad if (efs_extent_lookup(emp, &ex, cn, ino) == 0) {
361 1.3.2.2 ad return (0);
362 1.3.2.2 ad }
363 1.3.2.2 ad }
364 1.3.2.2 ad
365 1.3.2.2 ad return ((ret == -1) ? ENOENT : ret);
366 1.3.2.2 ad }
367 1.3.2.2 ad
368 1.3.2.2 ad /*
369 1.3.2.2 ad * Convert on-disk extent structure to in-core format.
370 1.3.2.2 ad */
371 1.3.2.2 ad void
372 1.3.2.2 ad efs_dextent_to_extent(struct efs_dextent *dex, struct efs_extent *ex)
373 1.3.2.2 ad {
374 1.3.2.2 ad
375 1.3.2.2 ad KASSERT(dex != NULL && ex != NULL);
376 1.3.2.2 ad
377 1.3.2.2 ad ex->ex_magic = dex->ex_bytes[0];
378 1.3.2.2 ad ex->ex_bn = be32toh(dex->ex_words[0]) & 0x00ffffff;
379 1.3.2.2 ad ex->ex_length = dex->ex_bytes[4];
380 1.3.2.2 ad ex->ex_offset = be32toh(dex->ex_words[1]) & 0x00ffffff;
381 1.3.2.2 ad }
382 1.3.2.2 ad
383 1.3.2.2 ad /*
384 1.3.2.2 ad * Convert in-core extent format to on-disk structure.
385 1.3.2.2 ad */
386 1.3.2.2 ad void
387 1.3.2.2 ad efs_extent_to_dextent(struct efs_extent *ex, struct efs_dextent *dex)
388 1.3.2.2 ad {
389 1.3.2.2 ad
390 1.3.2.2 ad KASSERT(ex != NULL && dex != NULL);
391 1.3.2.2 ad KASSERT(ex->ex_magic == EFS_EXTENT_MAGIC);
392 1.3.2.2 ad KASSERT((ex->ex_bn & ~EFS_EXTENT_BN_MASK) == 0);
393 1.3.2.2 ad KASSERT((ex->ex_offset & ~EFS_EXTENT_OFFSET_MASK) == 0);
394 1.3.2.2 ad
395 1.3.2.2 ad dex->ex_words[0] = htobe32(ex->ex_bn);
396 1.3.2.2 ad dex->ex_bytes[0] = ex->ex_magic;
397 1.3.2.2 ad dex->ex_words[1] = htobe32(ex->ex_offset);
398 1.3.2.2 ad dex->ex_bytes[4] = ex->ex_length;
399 1.3.2.2 ad }
400 1.3.2.2 ad
401 1.3.2.2 ad /*
402 1.3.2.2 ad * Initialise an extent iterator.
403 1.3.2.2 ad *
404 1.3.2.2 ad * If start_hint is non-0, attempt to set up the iterator beginning with the
405 1.3.2.2 ad * extent descriptor in which the start_hint'th byte exists. Callers must not
406 1.3.2.2 ad * expect success (this is simply an optimisation), so we reserve the right
407 1.3.2.2 ad * to start from the beginning.
408 1.3.2.2 ad */
409 1.3.2.2 ad void
410 1.3.2.2 ad efs_extent_iterator_init(struct efs_extent_iterator *exi, struct efs_inode *eip,
411 1.3.2.2 ad off_t start_hint)
412 1.3.2.2 ad {
413 1.3.2.2 ad struct efs_extent ex, ex2;
414 1.3.2.2 ad struct buf *bp;
415 1.3.2.2 ad struct efs_mount *emp = VFSTOEFS(eip->ei_vp->v_mount);
416 1.3.2.2 ad off_t offset, length, next;
417 1.3.2.2 ad int i, err, numextents, numinextents;
418 1.3.2.2 ad int hi, lo, mid;
419 1.3.2.2 ad int indir;
420 1.3.2.2 ad
421 1.3.2.2 ad exi->exi_eip = eip;
422 1.3.2.2 ad exi->exi_next = 0;
423 1.3.2.2 ad exi->exi_dnext = 0;
424 1.3.2.2 ad exi->exi_innext = 0;
425 1.3.2.2 ad
426 1.3.2.2 ad if (start_hint == 0)
427 1.3.2.2 ad return;
428 1.3.2.2 ad
429 1.3.2.2 ad /* force iterator to end if hint is too big */
430 1.3.2.2 ad if (start_hint >= eip->ei_size) {
431 1.3.2.2 ad exi->exi_next = eip->ei_numextents;
432 1.3.2.2 ad return;
433 1.3.2.2 ad }
434 1.3.2.2 ad
435 1.3.2.2 ad /*
436 1.3.2.2 ad * Use start_hint to jump to the right extent descriptor. We'll
437 1.3.2.2 ad * iterate over the 12 indirect extents because it's cheap, then
438 1.3.2.2 ad * bring the appropriate vector into core and binary search it.
439 1.3.2.2 ad */
440 1.3.2.2 ad
441 1.3.2.2 ad /*
442 1.3.2.2 ad * Handle the small file case separately first...
443 1.3.2.2 ad */
444 1.3.2.2 ad if (eip->ei_numextents <= EFS_DIRECTEXTENTS) {
445 1.3.2.2 ad for (i = 0; i < eip->ei_numextents; i++) {
446 1.3.2.2 ad efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex);
447 1.3.2.2 ad
448 1.3.2.2 ad offset = ex.ex_offset * EFS_BB_SIZE;
449 1.3.2.2 ad length = ex.ex_length * EFS_BB_SIZE;
450 1.3.2.2 ad
451 1.3.2.2 ad if (start_hint >= offset &&
452 1.3.2.2 ad start_hint < (offset + length)) {
453 1.3.2.2 ad exi->exi_next = exi->exi_dnext = i;
454 1.3.2.2 ad return;
455 1.3.2.2 ad }
456 1.3.2.2 ad }
457 1.3.2.2 ad
458 1.3.2.2 ad /* shouldn't get here, no? */
459 1.3.2.2 ad EFS_DPRINTF(("efs_extent_iterator_init: bad direct extents\n"));
460 1.3.2.2 ad return;
461 1.3.2.2 ad }
462 1.3.2.2 ad
463 1.3.2.2 ad /*
464 1.3.2.2 ad * Now do the large files with indirect extents...
465 1.3.2.2 ad *
466 1.3.2.2 ad * The first indirect extent's ex_offset field contains the
467 1.3.2.2 ad * number of indirect extents used.
468 1.3.2.2 ad */
469 1.3.2.2 ad efs_dextent_to_extent(&eip->ei_di.di_extents[0], &ex);
470 1.3.2.2 ad
471 1.3.2.2 ad numinextents = ex.ex_offset;
472 1.3.2.2 ad if (numinextents < 1 || numinextents >= EFS_DIRECTEXTENTS) {
473 1.3.2.2 ad EFS_DPRINTF(("efs_extent_iterator_init: bad ex.ex_offset\n"));
474 1.3.2.2 ad return;
475 1.3.2.2 ad }
476 1.3.2.2 ad
477 1.3.2.2 ad next = 0;
478 1.3.2.2 ad indir = -1;
479 1.3.2.2 ad numextents = 0;
480 1.3.2.2 ad for (i = 0; i < numinextents; i++) {
481 1.3.2.2 ad efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex);
482 1.3.2.2 ad
483 1.3.2.2 ad err = efs_bread(emp, ex.ex_bn, NULL, &bp);
484 1.3.2.2 ad if (err) {
485 1.3.2.2 ad brelse(bp, 0);
486 1.3.2.2 ad return;
487 1.3.2.2 ad }
488 1.3.2.2 ad
489 1.3.2.2 ad efs_dextent_to_extent((struct efs_dextent *)bp->b_data, &ex2);
490 1.3.2.2 ad brelse(bp, 0);
491 1.3.2.2 ad
492 1.3.2.2 ad offset = ex2.ex_offset * EFS_BB_SIZE;
493 1.3.2.2 ad
494 1.3.2.2 ad if (offset > start_hint) {
495 1.3.2.2 ad indir = MAX(0, i - 1);
496 1.3.2.2 ad break;
497 1.3.2.2 ad }
498 1.3.2.2 ad
499 1.3.2.2 ad /* number of extents prior to this indirect vector of extents */
500 1.3.2.2 ad next += numextents;
501 1.3.2.2 ad
502 1.3.2.2 ad /* number of extents within this indirect vector of extents */
503 1.3.2.2 ad numextents = ex.ex_length * EFS_EXTENTS_PER_BB;
504 1.3.2.2 ad numextents = MIN(numextents, eip->ei_numextents - next);
505 1.3.2.2 ad }
506 1.3.2.2 ad
507 1.3.2.2 ad /*
508 1.3.2.2 ad * We hit the end, so assume it's in the last extent.
509 1.3.2.2 ad */
510 1.3.2.2 ad if (indir == -1)
511 1.3.2.2 ad indir = numinextents - 1;
512 1.3.2.2 ad
513 1.3.2.2 ad /*
514 1.3.2.2 ad * Binary search to find our desired direct extent.
515 1.3.2.2 ad */
516 1.3.2.2 ad lo = 0;
517 1.3.2.2 ad mid = 0;
518 1.3.2.2 ad hi = numextents - 1;
519 1.3.2.2 ad efs_dextent_to_extent(&eip->ei_di.di_extents[indir], &ex);
520 1.3.2.2 ad while (lo <= hi) {
521 1.3.2.2 ad int bboff;
522 1.3.2.2 ad int index;
523 1.3.2.2 ad
524 1.3.2.2 ad mid = (lo + hi) / 2;
525 1.3.2.2 ad
526 1.3.2.2 ad bboff = mid / EFS_EXTENTS_PER_BB;
527 1.3.2.2 ad index = mid % EFS_EXTENTS_PER_BB;
528 1.3.2.2 ad
529 1.3.2.2 ad err = efs_bread(emp, ex.ex_bn + bboff, NULL, &bp);
530 1.3.2.2 ad if (err) {
531 1.3.2.2 ad brelse(bp, 0);
532 1.3.2.2 ad EFS_DPRINTF(("efs_extent_iterator_init: bsrch read\n"));
533 1.3.2.2 ad return;
534 1.3.2.2 ad }
535 1.3.2.2 ad
536 1.3.2.2 ad efs_dextent_to_extent((struct efs_dextent *)bp->b_data + index,
537 1.3.2.2 ad &ex2);
538 1.3.2.2 ad brelse(bp, 0);
539 1.3.2.2 ad
540 1.3.2.2 ad offset = ex2.ex_offset * EFS_BB_SIZE;
541 1.3.2.2 ad length = ex2.ex_length * EFS_BB_SIZE;
542 1.3.2.2 ad
543 1.3.2.2 ad if (start_hint >= offset && start_hint < (offset + length))
544 1.3.2.2 ad break;
545 1.3.2.2 ad
546 1.3.2.2 ad if (start_hint < offset)
547 1.3.2.2 ad hi = mid - 1;
548 1.3.2.2 ad else
549 1.3.2.2 ad lo = mid + 1;
550 1.3.2.2 ad }
551 1.3.2.2 ad
552 1.3.2.2 ad /*
553 1.3.2.2 ad * This is bad. Either the hint is bogus (which shouldn't
554 1.3.2.2 ad * happen) or the extent list must be screwed up. We
555 1.3.2.2 ad * have to abort.
556 1.3.2.2 ad */
557 1.3.2.2 ad if (lo > hi) {
558 1.3.2.2 ad EFS_DPRINTF(("efs_extent_iterator_init: bsearch "
559 1.3.2.2 ad "failed to find extent\n"));
560 1.3.2.2 ad return;
561 1.3.2.2 ad }
562 1.3.2.2 ad
563 1.3.2.2 ad exi->exi_next = next + mid;
564 1.3.2.2 ad exi->exi_dnext = indir;
565 1.3.2.2 ad exi->exi_innext = mid;
566 1.3.2.2 ad }
567 1.3.2.2 ad
568 1.3.2.2 ad /*
569 1.3.2.2 ad * Return the next EFS extent.
570 1.3.2.2 ad *
571 1.3.2.2 ad * Returns 0 if another extent was iterated, -1 if we've exhausted all
572 1.3.2.2 ad * extents, or an error number. If 'exi' is non-NULL, the next extent is
573 1.3.2.2 ad * written to it (should it exist).
574 1.3.2.2 ad */
575 1.3.2.2 ad int
576 1.3.2.2 ad efs_extent_iterator_next(struct efs_extent_iterator *exi,
577 1.3.2.2 ad struct efs_extent *exp)
578 1.3.2.2 ad {
579 1.3.2.2 ad struct efs_extent ex;
580 1.3.2.2 ad struct efs_dextent *dexp;
581 1.3.2.2 ad struct efs_inode *eip = exi->exi_eip;
582 1.3.2.2 ad struct buf *bp;
583 1.3.2.2 ad int err, bboff, index;
584 1.3.2.2 ad
585 1.3.2.2 ad if (exi->exi_next++ >= eip->ei_numextents)
586 1.3.2.2 ad return (-1);
587 1.3.2.2 ad
588 1.3.2.2 ad /* direct or indirect extents? */
589 1.3.2.2 ad if (eip->ei_numextents <= EFS_DIRECTEXTENTS) {
590 1.3.2.2 ad if (exp != NULL) {
591 1.3.2.2 ad dexp = &eip->ei_di.di_extents[exi->exi_dnext++];
592 1.3.2.2 ad efs_dextent_to_extent(dexp, exp);
593 1.3.2.2 ad }
594 1.3.2.2 ad } else {
595 1.3.2.2 ad efs_dextent_to_extent(
596 1.3.2.2 ad &eip->ei_di.di_extents[exi->exi_dnext], &ex);
597 1.3.2.2 ad
598 1.3.2.2 ad bboff = exi->exi_innext / EFS_EXTENTS_PER_BB;
599 1.3.2.2 ad index = exi->exi_innext % EFS_EXTENTS_PER_BB;
600 1.3.2.2 ad
601 1.3.2.2 ad err = efs_bread(VFSTOEFS(eip->ei_vp->v_mount),
602 1.3.2.2 ad ex.ex_bn + bboff, NULL, &bp);
603 1.3.2.2 ad if (err) {
604 1.3.2.2 ad EFS_DPRINTF(("efs_extent_iterator_next: "
605 1.3.2.2 ad "efs_bread failed: %d\n", err));
606 1.3.2.2 ad brelse(bp, 0);
607 1.3.2.2 ad return (err);
608 1.3.2.2 ad }
609 1.3.2.2 ad
610 1.3.2.2 ad if (exp != NULL) {
611 1.3.2.2 ad dexp = (struct efs_dextent *)bp->b_data + index;
612 1.3.2.2 ad efs_dextent_to_extent(dexp, exp);
613 1.3.2.2 ad }
614 1.3.2.2 ad brelse(bp, 0);
615 1.3.2.2 ad
616 1.3.2.2 ad bboff = exi->exi_innext++ / EFS_EXTENTS_PER_BB;
617 1.3.2.2 ad if (bboff >= ex.ex_length) {
618 1.3.2.2 ad exi->exi_innext = 0;
619 1.3.2.2 ad exi->exi_dnext++;
620 1.3.2.2 ad }
621 1.3.2.2 ad }
622 1.3.2.2 ad
623 1.3.2.2 ad return (0);
624 1.3.2.2 ad }
625