efs_subr.c revision 1.1 1 /* $NetBSD: efs_subr.c,v 1.1 2007/06/29 23:30:29 rumble Exp $ */
2
3 /*
4 * Copyright (c) 2006 Stephen M. Rumble <rumble (at) ephemeral.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/cdefs.h>
20 __KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.1 2007/06/29 23:30:29 rumble Exp $");
21
22 #include <sys/param.h>
23 #include <sys/kauth.h>
24 #include <sys/lwp.h>
25 #include <sys/proc.h>
26 #include <sys/buf.h>
27 #include <sys/mount.h>
28 #include <sys/vnode.h>
29 #include <sys/namei.h>
30 #include <sys/stat.h>
31 #include <sys/malloc.h>
32
33 #include <miscfs/genfs/genfs_node.h>
34
35 #include <fs/efs/efs.h>
36 #include <fs/efs/efs_sb.h>
37 #include <fs/efs/efs_dir.h>
38 #include <fs/efs/efs_genfs.h>
39 #include <fs/efs/efs_mount.h>
40 #include <fs/efs/efs_extent.h>
41 #include <fs/efs/efs_dinode.h>
42 #include <fs/efs/efs_inode.h>
43 #include <fs/efs/efs_subr.h>
44
45 MALLOC_DECLARE(M_EFSTMP);
46
47 struct pool efs_inode_pool;
48
49 /*
50 * Calculate a checksum for the provided superblock in __host byte order__.
51 *
52 * At some point SGI changed the checksum algorithm slightly, which can be
53 * enabled with the 'new' flag.
54 *
55 * Presumably this change occured on or before 24 Oct 1988 (around IRIX 3.1),
56 * so we're pretty unlikely to ever actually see an old checksum. Further, it
57 * means that EFS_NEWMAGIC filesystems (IRIX >= 3.3) must match the new
58 * checksum whereas EFS_MAGIC filesystems could potentially use either
59 * algorithm.
60 *
61 * See comp.sys.sgi <1991Aug9.050838.16876 (at) odin.corp.sgi.com>
62 */
63 int32_t
64 efs_sb_checksum(struct efs_sb *esb, int new)
65 {
66 int i;
67 int32_t cksum;
68 int16_t *sbarray = (int16_t *)esb;
69
70 KASSERT((EFS_SB_CHECKSUM_SIZE % 2) == 0);
71
72 for (i = cksum = 0; i < (EFS_SB_CHECKSUM_SIZE / 2); i++) {
73 cksum ^= be16toh(sbarray[i]);
74 cksum = (cksum << 1) | (new && cksum < 0);
75 }
76
77 return (cksum);
78 }
79
80 /*
81 * Determine if the superblock is valid.
82 *
83 * Returns 0 if valid, else invalid. If invalid, 'why' is set to an
84 * explanation.
85 */
86 int
87 efs_sb_validate(struct efs_sb *esb, const char **why)
88 {
89 uint32_t ocksum, ncksum;
90
91 *why = NULL;
92
93 if (be32toh(esb->sb_magic) != EFS_SB_MAGIC &&
94 be32toh(esb->sb_magic != EFS_SB_NEWMAGIC)) {
95 *why = "sb_magic invalid";
96 return (1);
97 }
98
99 ocksum = htobe32(efs_sb_checksum(esb, 0));
100 ncksum = htobe32(efs_sb_checksum(esb, 1));
101 if (esb->sb_checksum != ocksum && esb->sb_checksum != ncksum) {
102 *why = "sb_checksum invalid";
103 return (1);
104 }
105
106 if (be32toh(esb->sb_size) > EFS_SIZE_MAX) {
107 *why = "sb_size > EFS_SIZE_MAX";
108 return (1);
109 }
110
111 if (be32toh(esb->sb_firstcg) <= EFS_BB_BITMAP) {
112 *why = "sb_firstcg <= EFS_BB_BITMAP";
113 return (1);
114 }
115
116 /* XXX - add better sb consistency checks here */
117 if (esb->sb_cgfsize == 0 ||
118 esb->sb_cgisize == 0 ||
119 esb->sb_ncg == 0 ||
120 esb->sb_bmsize == 0) {
121 *why = "something bad happened";
122 return (1);
123 }
124
125 return (0);
126 }
127
128 /*
129 * Determine the basic block offset and inode index within that block, given
130 * the inode 'ino' and filesystem parameters _in host byte order_. The inode
131 * will live at byte address 'bboff' * EFS_BB_SIZE + 'index' * EFS_DINODE_SIZE.
132 */
133 void
134 efs_locate_inode(ino_t ino, struct efs_sb *sbp, uint32_t *bboff, int *index)
135 {
136 uint32_t cgfsize, firstcg;
137 uint16_t cgisize;
138
139 cgisize = be16toh(sbp->sb_cgisize);
140 cgfsize = be32toh(sbp->sb_cgfsize);
141 firstcg = be32toh(sbp->sb_firstcg),
142
143 *bboff = firstcg + ((ino / (cgisize * EFS_DINODES_PER_BB)) * cgfsize) +
144 ((ino % (cgisize * EFS_DINODES_PER_BB)) / EFS_DINODES_PER_BB);
145 *index = ino & (EFS_DINODES_PER_BB - 1);
146 }
147
148 /*
149 * Read in an inode from disk.
150 *
151 * We actually take in four inodes at a time. Hopefully these will stick
152 * around in the buffer cache and get used without going to disk.
153 *
154 * Returns 0 on success.
155 */
156 int
157 efs_read_inode(struct efs_mount *emp, ino_t ino, struct lwp *l,
158 struct efs_dinode *di)
159 {
160 struct efs_sb *sbp;
161 struct buf *bp;
162 int index, err;
163 uint32_t bboff;
164
165 sbp = &emp->em_sb;
166 efs_locate_inode(ino, sbp, &bboff, &index);
167
168 err = efs_bread(emp, bboff, EFS_BY2BB(EFS_DINODE_SIZE), l, &bp);
169 if (err) {
170 brelse(bp);
171 return (err);
172 }
173 memcpy(di, ((struct efs_dinode *)bp->b_data) + index, sizeof(*di));
174 brelse(bp);
175
176 return (0);
177 }
178
179 /*
180 * Perform a read from our device handling the potential DEV_BSIZE
181 * messiness (although as of 19.2.2006, all ports appear to use 512) as
182 * we as EFS block sizing.
183 *
184 * bboff: basic block offset
185 * nbb: number of basic blocks to be read
186 *
187 * Returns 0 on success.
188 */
189 int
190 efs_bread(struct efs_mount *emp, uint32_t bboff, int nbb, struct lwp *l,
191 struct buf **bp)
192 {
193 KASSERT(nbb > 0);
194 KASSERT(bboff < EFS_SIZE_MAX);
195
196 return (bread(emp->em_devvp, (daddr_t)bboff * (EFS_BB_SIZE / DEV_BSIZE),
197 nbb * EFS_BB_SIZE, (l == NULL) ? NOCRED : l->l_cred, bp));
198 }
199
200 /*
201 * Synchronise the in-core, host ordered and typed inode fields with their
202 * corresponding on-disk, EFS ordered and typed copies.
203 *
204 * This is the inverse of efs_dinode_sync_inode(), and should be called when
205 * an inode is loaded from disk.
206 */
207 void
208 efs_sync_dinode_to_inode(struct efs_inode *ei)
209 {
210
211 ei->ei_mode = be16toh(ei->ei_di.di_mode); /*same as nbsd*/
212 ei->ei_nlink = be16toh(ei->ei_di.di_nlink);
213 ei->ei_uid = be16toh(ei->ei_di.di_uid);
214 ei->ei_gid = be16toh(ei->ei_di.di_gid);
215 ei->ei_size = be32toh(ei->ei_di.di_size);
216 ei->ei_atime = be32toh(ei->ei_di.di_atime);
217 ei->ei_mtime = be32toh(ei->ei_di.di_mtime);
218 ei->ei_ctime = be32toh(ei->ei_di.di_ctime);
219 ei->ei_gen = be32toh(ei->ei_di.di_gen);
220 ei->ei_numextents = be16toh(ei->ei_di.di_numextents);
221 ei->ei_version = ei->ei_di.di_version;
222 }
223
224 /*
225 * Synchronise the on-disk, EFS ordered and typed inode fields with their
226 * corresponding in-core, host ordered and typed copies.
227 *
228 * This is the inverse of efs_inode_sync_dinode(), and should be called before
229 * an inode is flushed to disk.
230 */
231 void
232 efs_sync_inode_to_dinode(struct efs_inode *ei)
233 {
234
235 panic("readonly -- no need to call me");
236 }
237
238 #ifdef DIAGNOSTIC
239 /*
240 * Ensure that the in-core inode's host cached fields match its on-disk copy.
241 *
242 * Returns 0 if they match.
243 */
244 static int
245 efs_is_inode_synced(struct efs_inode *ei)
246 {
247 int s;
248
249 s = 0;
250 /* XXX -- see above remarks about assumption */
251 s += (ei->ei_mode != be16toh(ei->ei_di.di_mode));
252 s += (ei->ei_nlink != be16toh(ei->ei_di.di_nlink));
253 s += (ei->ei_uid != be16toh(ei->ei_di.di_uid));
254 s += (ei->ei_gid != be16toh(ei->ei_di.di_gid));
255 s += (ei->ei_size != be32toh(ei->ei_di.di_size));
256 s += (ei->ei_atime != be32toh(ei->ei_di.di_atime));
257 s += (ei->ei_mtime != be32toh(ei->ei_di.di_mtime));
258 s += (ei->ei_ctime != be32toh(ei->ei_di.di_ctime));
259 s += (ei->ei_gen != be32toh(ei->ei_di.di_gen));
260 s += (ei->ei_numextents != be16toh(ei->ei_di.di_numextents));
261 s += (ei->ei_version != ei->ei_di.di_version);
262
263 return (s);
264 }
265 #endif
266
267 /*
268 * Given an efs_dirblk structure and a componentname to search for, return the
269 * corresponding inode if it is found.
270 *
271 * Returns 0 on success.
272 */
273 static int
274 efs_dirblk_lookup(struct efs_dirblk *dir, struct componentname *cn,
275 ino_t *inode)
276 {
277 struct efs_dirent *de;
278 int i, slot, offset;
279
280 KASSERT(cn->cn_namelen <= EFS_DIRENT_NAMELEN_MAX);
281
282 slot = offset = 0;
283
284 for (i = 0; i < dir->db_slots; i++) {
285 offset = EFS_DIRENT_OFF_EXPND(dir->db_space[i]);
286
287 if (offset == EFS_DIRBLK_SLOT_FREE)
288 continue;
289
290 de = (struct efs_dirent *)((char *)dir + offset);
291 if (de->de_namelen == cn->cn_namelen &&
292 (strncmp(cn->cn_nameptr, de->de_name, cn->cn_namelen) == 0)){
293 slot = i;
294 break;
295 }
296 }
297 if (i == dir->db_slots)
298 return (ENOENT);
299
300 KASSERT(slot < offset && offset < EFS_DIRBLK_SPACE_SIZE);
301 de = (struct efs_dirent *)((char *)dir + offset);
302 *inode = be32toh(de->de_inumber);
303
304 return (0);
305 }
306
307 /*
308 * Given an extent descriptor that represents a directory, look up
309 * componentname within its efs_dirblk's. If it is found, return the
310 * corresponding inode in 'ino'.
311 *
312 * Returns 0 on success.
313 */
314 static int
315 efs_extent_lookup(struct efs_mount *emp, struct efs_extent *ex,
316 struct componentname *cn, ino_t *ino)
317 {
318 struct efs_dirblk *db;
319 struct buf *bp;
320 int i, err;
321
322 /*
323 * Read in the entire extent, evaluating all of the dirblks until we
324 * find our entry. If we don't, return ENOENT.
325 */
326 err = efs_bread(emp, ex->ex_bn, ex->ex_length, NULL, &bp);
327 if (err) {
328 printf("efs: warning: invalid extent descriptor\n");
329 brelse(bp);
330 return (err);
331 }
332
333 for (i = 0; i < ex->ex_length; i++) {
334 db = ((struct efs_dirblk *)bp->b_data) + i;
335 if (efs_dirblk_lookup(db, cn, ino) == 0) {
336 brelse(bp);
337 return (0);
338 }
339 }
340
341 brelse(bp);
342 return (ENOENT);
343 }
344
345 /*
346 * Given the provided in-core inode, look up the pathname requested. If
347 * we find it, 'ino' reflects its corresponding on-disk inode number.
348 *
349 * Returns 0 on success.
350 */
351 int
352 efs_inode_lookup(struct efs_mount *emp, struct efs_inode *ei,
353 struct componentname *cn, ino_t *ino)
354 {
355 struct efs_extent ex;
356 struct efs_extent_iterator exi;
357 int ret;
358
359 KASSERT(VOP_ISLOCKED(ei->ei_vp));
360 KASSERT(efs_is_inode_synced(ei) == 0);
361 KASSERT((ei->ei_mode & S_IFMT) == S_IFDIR);
362
363 efs_extent_iterator_init(&exi, ei);
364 while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) {
365 if (efs_extent_lookup(emp, &ex, cn, ino) == 0) {
366 efs_extent_iterator_free(&exi);
367 return (0);
368 }
369 }
370 efs_extent_iterator_free(&exi);
371
372 return ((ret == -1) ? ENOENT : ret);
373 }
374
375 /*
376 * Convert on-disk extent structure to in-core format.
377 */
378 void
379 efs_dextent_to_extent(struct efs_dextent *dex, struct efs_extent *ex)
380 {
381
382 KASSERT(dex != NULL && ex != NULL);
383
384 ex->ex_magic = dex->ex_bytes[0];
385 ex->ex_bn = be32toh(dex->ex_words[0]) & 0x00ffffff;
386 ex->ex_length = dex->ex_bytes[4];
387 ex->ex_offset = be32toh(dex->ex_words[1]) & 0x00ffffff;
388 }
389
390 /*
391 * Convert in-core extent format to on-disk structure.
392 */
393 void
394 efs_extent_to_dextent(struct efs_extent *ex, struct efs_dextent *dex)
395 {
396
397 KASSERT(ex != NULL && dex != NULL);
398 KASSERT(ex->ex_magic == EFS_EXTENT_MAGIC);
399 KASSERT((ex->ex_bn & ~EFS_EXTENT_BN_MASK) == 0);
400 KASSERT((ex->ex_offset & ~EFS_EXTENT_OFFSET_MASK) == 0);
401
402 dex->ex_words[0] = htobe32(ex->ex_bn);
403 dex->ex_bytes[0] = ex->ex_magic;
404 dex->ex_words[1] = htobe32(ex->ex_offset);
405 dex->ex_bytes[4] = ex->ex_length;
406 }
407
408 /*
409 * Initialise an extent iterator.
410 */
411 void
412 efs_extent_iterator_init(struct efs_extent_iterator *exi, struct efs_inode *eip)
413 {
414
415 exi->exi_eip = eip;
416 exi->exi_next = 0;
417 exi->exi_dnext = 0;
418 exi->exi_innext = 0;
419 exi->exi_incache = NULL;
420 exi->exi_nincache = 0;
421 }
422
423 /*
424 * Return the next EFS extent.
425 *
426 * Returns 0 if another extent was iterated, -1 if we've exhausted all
427 * extents, or an error number. If 'exi' is non-NULL, the next extent is
428 * written to it (should it exist).
429 */
430 int
431 efs_extent_iterator_next(struct efs_extent_iterator *exi,
432 struct efs_extent *exp)
433 {
434 struct efs_inode *eip = exi->exi_eip;
435
436 if (exi->exi_next++ >= eip->ei_numextents)
437 return (-1);
438
439 /* direct or indirect extents? */
440 if (eip->ei_numextents <= EFS_DIRECTEXTENTS) {
441 if (exp != NULL) {
442 efs_dextent_to_extent(
443 &eip->ei_di.di_extents[exi->exi_dnext++], exp);
444 }
445 } else {
446 /*
447 * Cache a full indirect extent worth of extent descriptors.
448 * This is maximally 124KB (248 * 512).
449 */
450 if (exi->exi_incache == NULL) {
451 struct efs_extent ex;
452 struct buf *bp;
453 int err;
454
455 efs_dextent_to_extent(
456 &eip->ei_di.di_extents[exi->exi_dnext], &ex);
457
458 err = efs_bread(VFSTOEFS(eip->ei_vp->v_mount),
459 ex.ex_bn, ex.ex_length, NULL, &bp);
460 if (err) {
461 EFS_DPRINTF(("efs_extent_iterator_next: "
462 "efs_bread failed: %d\n", err));
463 brelse(bp);
464 return (err);
465 }
466
467 exi->exi_incache = malloc(ex.ex_length * EFS_BB_SIZE,
468 M_EFSTMP, M_WAITOK);
469 exi->exi_nincache = ex.ex_length * EFS_BB_SIZE /
470 sizeof(struct efs_dextent);
471 memcpy(exi->exi_incache, bp->b_data,
472 ex.ex_length * EFS_BB_SIZE);
473 brelse(bp);
474 }
475
476 if (exp != NULL) {
477 efs_dextent_to_extent(
478 &exi->exi_incache[exi->exi_innext++], exp);
479 }
480
481 /* if this is the last one, ditch the cache */
482 if (exi->exi_innext >= exi->exi_nincache) {
483 exi->exi_innext = 0;
484 exi->exi_nincache = 0;
485 free(exi->exi_incache, M_EFSTMP);
486 exi->exi_incache = NULL;
487 exi->exi_dnext++;
488 }
489 }
490
491 return (0);
492 }
493
494 /*
495 * Clean up the extent iterator.
496 */
497 void
498 efs_extent_iterator_free(struct efs_extent_iterator *exi)
499 {
500
501 if (exi->exi_incache != NULL)
502 free(exi->exi_incache, M_EFSTMP);
503 efs_extent_iterator_init(exi, NULL);
504 }
505