efs_subr.c revision 1.7.42.1 1 /* $NetBSD: efs_subr.c,v 1.7.42.1 2013/02/25 00:29:47 tls Exp $ */
2
3 /*
4 * Copyright (c) 2006 Stephen M. Rumble <rumble (at) ephemeral.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/cdefs.h>
20 __KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.7.42.1 2013/02/25 00:29:47 tls Exp $");
21
22 #include <sys/param.h>
23 #include <sys/kauth.h>
24 #include <sys/lwp.h>
25 #include <sys/proc.h>
26 #include <sys/buf.h>
27 #include <sys/mount.h>
28 #include <sys/vnode.h>
29 #include <sys/namei.h>
30 #include <sys/stat.h>
31 #include <sys/malloc.h>
32
33 #include <miscfs/genfs/genfs_node.h>
34
35 #include <fs/efs/efs.h>
36 #include <fs/efs/efs_sb.h>
37 #include <fs/efs/efs_dir.h>
38 #include <fs/efs/efs_genfs.h>
39 #include <fs/efs/efs_mount.h>
40 #include <fs/efs/efs_extent.h>
41 #include <fs/efs/efs_dinode.h>
42 #include <fs/efs/efs_inode.h>
43 #include <fs/efs/efs_subr.h>
44
45 struct pool efs_inode_pool;
46
47 /*
48 * Calculate a checksum for the provided superblock in __host byte order__.
49 *
50 * At some point SGI changed the checksum algorithm slightly, which can be
51 * enabled with the 'new' flag.
52 *
53 * Presumably this change occured on or before 24 Oct 1988 (around IRIX 3.1),
54 * so we're pretty unlikely to ever actually see an old checksum. Further, it
55 * means that EFS_NEWMAGIC filesystems (IRIX >= 3.3) must match the new
56 * checksum whereas EFS_MAGIC filesystems could potentially use either
57 * algorithm.
58 *
59 * See comp.sys.sgi <1991Aug9.050838.16876 (at) odin.corp.sgi.com>
60 */
61 int32_t
62 efs_sb_checksum(struct efs_sb *esb, int new)
63 {
64 int i;
65 int32_t cksum;
66 uint16_t *sbarray = (uint16_t *)esb;
67
68 KASSERT((EFS_SB_CHECKSUM_SIZE % 2) == 0);
69
70 for (i = cksum = 0; i < (EFS_SB_CHECKSUM_SIZE / 2); i++) {
71 cksum ^= be16toh(sbarray[i]);
72 cksum = (cksum << 1) | (new && cksum < 0);
73 }
74
75 return (cksum);
76 }
77
78 /*
79 * Determine if the superblock is valid.
80 *
81 * Returns 0 if valid, else invalid. If invalid, 'why' is set to an
82 * explanation.
83 */
84 int
85 efs_sb_validate(struct efs_sb *esb, const char **why)
86 {
87 uint32_t ocksum, ncksum;
88
89 *why = NULL;
90
91 if (be32toh(esb->sb_magic) != EFS_SB_MAGIC &&
92 be32toh(esb->sb_magic) != EFS_SB_NEWMAGIC) {
93 *why = "sb_magic invalid";
94 return (1);
95 }
96
97 ocksum = htobe32(efs_sb_checksum(esb, 0));
98 ncksum = htobe32(efs_sb_checksum(esb, 1));
99 if (esb->sb_checksum != ocksum && esb->sb_checksum != ncksum) {
100 *why = "sb_checksum invalid";
101 return (1);
102 }
103
104 if (be32toh(esb->sb_size) > EFS_SIZE_MAX) {
105 *why = "sb_size > EFS_SIZE_MAX";
106 return (1);
107 }
108
109 if (be32toh(esb->sb_firstcg) <= EFS_BB_BITMAP) {
110 *why = "sb_firstcg <= EFS_BB_BITMAP";
111 return (1);
112 }
113
114 /* XXX - add better sb consistency checks here */
115 if (esb->sb_cgfsize == 0 ||
116 esb->sb_cgisize == 0 ||
117 esb->sb_ncg == 0 ||
118 esb->sb_bmsize == 0) {
119 *why = "something bad happened";
120 return (1);
121 }
122
123 return (0);
124 }
125
126 /*
127 * Determine the basic block offset and inode index within that block, given
128 * the inode 'ino' and filesystem parameters _in host byte order_. The inode
129 * will live at byte address 'bboff' * EFS_BB_SIZE + 'index' * EFS_DINODE_SIZE.
130 */
131 void
132 efs_locate_inode(ino_t ino, struct efs_sb *sbp, uint32_t *bboff, int *index)
133 {
134 uint32_t cgfsize, firstcg;
135 uint16_t cgisize;
136
137 cgisize = be16toh(sbp->sb_cgisize);
138 cgfsize = be32toh(sbp->sb_cgfsize);
139 firstcg = be32toh(sbp->sb_firstcg),
140
141 *bboff = firstcg + ((ino / (cgisize * EFS_DINODES_PER_BB)) * cgfsize) +
142 ((ino % (cgisize * EFS_DINODES_PER_BB)) / EFS_DINODES_PER_BB);
143 *index = ino & (EFS_DINODES_PER_BB - 1);
144 }
145
146 /*
147 * Read in an inode from disk.
148 *
149 * We actually take in four inodes at a time. Hopefully these will stick
150 * around in the buffer cache and get used without going to disk.
151 *
152 * Returns 0 on success.
153 */
154 int
155 efs_read_inode(struct efs_mount *emp, ino_t ino, struct lwp *l,
156 struct efs_dinode *di)
157 {
158 struct efs_sb *sbp;
159 struct buf *bp;
160 int index, err;
161 uint32_t bboff;
162
163 sbp = &emp->em_sb;
164 efs_locate_inode(ino, sbp, &bboff, &index);
165
166 err = efs_bread(emp, bboff, l, &bp);
167 if (err) {
168 return (err);
169 }
170 memcpy(di, ((struct efs_dinode *)bp->b_data) + index, sizeof(*di));
171 brelse(bp, 0);
172
173 return (0);
174 }
175
176 /*
177 * Perform a read from our device handling the potential DEV_BSIZE
178 * messiness (although as of 19.2.2006, all ports appear to use 512) as
179 * we as EFS block sizing.
180 *
181 * bboff: basic block offset
182 *
183 * Returns 0 on success.
184 */
185 int
186 efs_bread(struct efs_mount *emp, uint32_t bboff, struct lwp *l, struct buf **bp)
187 {
188 KASSERT(bboff < EFS_SIZE_MAX);
189
190 return (bread(emp->em_devvp, (daddr_t)bboff * (EFS_BB_SIZE / DEV_BSIZE),
191 EFS_BB_SIZE, (l == NULL) ? NOCRED : l->l_cred, 0, bp));
192 }
193
194 /*
195 * Synchronise the in-core, host ordered and typed inode fields with their
196 * corresponding on-disk, EFS ordered and typed copies.
197 *
198 * This is the inverse of efs_dinode_sync_inode(), and should be called when
199 * an inode is loaded from disk.
200 */
201 void
202 efs_sync_dinode_to_inode(struct efs_inode *ei)
203 {
204
205 ei->ei_mode = be16toh(ei->ei_di.di_mode); /*same as nbsd*/
206 ei->ei_nlink = be16toh(ei->ei_di.di_nlink);
207 ei->ei_uid = be16toh(ei->ei_di.di_uid);
208 ei->ei_gid = be16toh(ei->ei_di.di_gid);
209 ei->ei_size = be32toh(ei->ei_di.di_size);
210 ei->ei_atime = be32toh(ei->ei_di.di_atime);
211 ei->ei_mtime = be32toh(ei->ei_di.di_mtime);
212 ei->ei_ctime = be32toh(ei->ei_di.di_ctime);
213 ei->ei_gen = be32toh(ei->ei_di.di_gen);
214 ei->ei_numextents = be16toh(ei->ei_di.di_numextents);
215 ei->ei_version = ei->ei_di.di_version;
216 }
217
218 /*
219 * Synchronise the on-disk, EFS ordered and typed inode fields with their
220 * corresponding in-core, host ordered and typed copies.
221 *
222 * This is the inverse of efs_inode_sync_dinode(), and should be called before
223 * an inode is flushed to disk.
224 */
225 void
226 efs_sync_inode_to_dinode(struct efs_inode *ei)
227 {
228
229 panic("readonly -- no need to call me");
230 }
231
232 #ifdef DIAGNOSTIC
233 /*
234 * Ensure that the in-core inode's host cached fields match its on-disk copy.
235 *
236 * Returns 0 if they match.
237 */
238 static int
239 efs_is_inode_synced(struct efs_inode *ei)
240 {
241 int s;
242
243 s = 0;
244 /* XXX -- see above remarks about assumption */
245 s += (ei->ei_mode != be16toh(ei->ei_di.di_mode));
246 s += (ei->ei_nlink != be16toh(ei->ei_di.di_nlink));
247 s += (ei->ei_uid != be16toh(ei->ei_di.di_uid));
248 s += (ei->ei_gid != be16toh(ei->ei_di.di_gid));
249 s += (ei->ei_size != be32toh(ei->ei_di.di_size));
250 s += (ei->ei_atime != be32toh(ei->ei_di.di_atime));
251 s += (ei->ei_mtime != be32toh(ei->ei_di.di_mtime));
252 s += (ei->ei_ctime != be32toh(ei->ei_di.di_ctime));
253 s += (ei->ei_gen != be32toh(ei->ei_di.di_gen));
254 s += (ei->ei_numextents != be16toh(ei->ei_di.di_numextents));
255 s += (ei->ei_version != ei->ei_di.di_version);
256
257 return (s);
258 }
259 #endif
260
261 /*
262 * Given an efs_dirblk structure and a componentname to search for, return the
263 * corresponding inode if it is found.
264 *
265 * Returns 0 on success.
266 */
267 static int
268 efs_dirblk_lookup(struct efs_dirblk *dir, struct componentname *cn,
269 ino_t *inode)
270 {
271 struct efs_dirent *de;
272 int i, slot, offset;
273
274 KASSERT(cn->cn_namelen <= EFS_DIRENT_NAMELEN_MAX);
275
276 slot = offset = 0;
277
278 for (i = 0; i < dir->db_slots; i++) {
279 offset = EFS_DIRENT_OFF_EXPND(dir->db_space[i]);
280
281 if (offset == EFS_DIRBLK_SLOT_FREE)
282 continue;
283
284 de = (struct efs_dirent *)((char *)dir + offset);
285 if (de->de_namelen == cn->cn_namelen &&
286 (strncmp(cn->cn_nameptr, de->de_name, cn->cn_namelen) == 0)){
287 slot = i;
288 break;
289 }
290 }
291 if (i == dir->db_slots)
292 return (ENOENT);
293
294 KASSERT(slot < offset && offset < EFS_DIRBLK_SPACE_SIZE);
295 de = (struct efs_dirent *)((char *)dir + offset);
296 *inode = be32toh(de->de_inumber);
297
298 return (0);
299 }
300
301 /*
302 * Given an extent descriptor that represents a directory, look up
303 * componentname within its efs_dirblk's. If it is found, return the
304 * corresponding inode in 'ino'.
305 *
306 * Returns 0 on success.
307 */
308 static int
309 efs_extent_lookup(struct efs_mount *emp, struct efs_extent *ex,
310 struct componentname *cn, ino_t *ino)
311 {
312 struct efs_dirblk *db;
313 struct buf *bp;
314 int i, err;
315
316 /*
317 * Read in each of the dirblks until we find our entry.
318 * If we don't, return ENOENT.
319 */
320 for (i = 0; i < ex->ex_length; i++) {
321 err = efs_bread(emp, ex->ex_bn + i, NULL, &bp);
322 if (err) {
323 printf("efs: warning: invalid extent descriptor\n");
324 return (err);
325 }
326
327 db = (struct efs_dirblk *)bp->b_data;
328 if (efs_dirblk_lookup(db, cn, ino) == 0) {
329 brelse(bp, 0);
330 return (0);
331 }
332 brelse(bp, 0);
333 }
334
335 return (ENOENT);
336 }
337
338 /*
339 * Given the provided in-core inode, look up the pathname requested. If
340 * we find it, 'ino' reflects its corresponding on-disk inode number.
341 *
342 * Returns 0 on success.
343 */
344 int
345 efs_inode_lookup(struct efs_mount *emp, struct efs_inode *ei,
346 struct componentname *cn, ino_t *ino)
347 {
348 struct efs_extent ex;
349 struct efs_extent_iterator exi;
350 int ret;
351
352 KASSERT(VOP_ISLOCKED(ei->ei_vp));
353 KASSERT(efs_is_inode_synced(ei) == 0);
354 KASSERT((ei->ei_mode & S_IFMT) == S_IFDIR);
355
356 efs_extent_iterator_init(&exi, ei, 0);
357 while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) {
358 if (efs_extent_lookup(emp, &ex, cn, ino) == 0) {
359 return (0);
360 }
361 }
362
363 return ((ret == -1) ? ENOENT : ret);
364 }
365
366 /*
367 * Convert on-disk extent structure to in-core format.
368 */
369 void
370 efs_dextent_to_extent(struct efs_dextent *dex, struct efs_extent *ex)
371 {
372
373 KASSERT(dex != NULL && ex != NULL);
374
375 ex->ex_magic = dex->ex_bytes[0];
376 ex->ex_bn = be32toh(dex->ex_words[0]) & 0x00ffffff;
377 ex->ex_length = dex->ex_bytes[4];
378 ex->ex_offset = be32toh(dex->ex_words[1]) & 0x00ffffff;
379 }
380
381 /*
382 * Convert in-core extent format to on-disk structure.
383 */
384 void
385 efs_extent_to_dextent(struct efs_extent *ex, struct efs_dextent *dex)
386 {
387
388 KASSERT(ex != NULL && dex != NULL);
389 KASSERT(ex->ex_magic == EFS_EXTENT_MAGIC);
390 KASSERT((ex->ex_bn & ~EFS_EXTENT_BN_MASK) == 0);
391 KASSERT((ex->ex_offset & ~EFS_EXTENT_OFFSET_MASK) == 0);
392
393 dex->ex_words[0] = htobe32(ex->ex_bn);
394 dex->ex_bytes[0] = ex->ex_magic;
395 dex->ex_words[1] = htobe32(ex->ex_offset);
396 dex->ex_bytes[4] = ex->ex_length;
397 }
398
399 /*
400 * Initialise an extent iterator.
401 *
402 * If start_hint is non-0, attempt to set up the iterator beginning with the
403 * extent descriptor in which the start_hint'th byte exists. Callers must not
404 * expect success (this is simply an optimisation), so we reserve the right
405 * to start from the beginning.
406 */
407 void
408 efs_extent_iterator_init(struct efs_extent_iterator *exi, struct efs_inode *eip,
409 off_t start_hint)
410 {
411 struct efs_extent ex, ex2;
412 struct buf *bp;
413 struct efs_mount *emp = VFSTOEFS(eip->ei_vp->v_mount);
414 off_t offset, length, next;
415 int i, err, numextents, numinextents;
416 int hi, lo, mid;
417 int indir;
418
419 exi->exi_eip = eip;
420 exi->exi_next = 0;
421 exi->exi_dnext = 0;
422 exi->exi_innext = 0;
423
424 if (start_hint == 0)
425 return;
426
427 /* force iterator to end if hint is too big */
428 if (start_hint >= eip->ei_size) {
429 exi->exi_next = eip->ei_numextents;
430 return;
431 }
432
433 /*
434 * Use start_hint to jump to the right extent descriptor. We'll
435 * iterate over the 12 indirect extents because it's cheap, then
436 * bring the appropriate vector into core and binary search it.
437 */
438
439 /*
440 * Handle the small file case separately first...
441 */
442 if (eip->ei_numextents <= EFS_DIRECTEXTENTS) {
443 for (i = 0; i < eip->ei_numextents; i++) {
444 efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex);
445
446 offset = ex.ex_offset * EFS_BB_SIZE;
447 length = ex.ex_length * EFS_BB_SIZE;
448
449 if (start_hint >= offset &&
450 start_hint < (offset + length)) {
451 exi->exi_next = exi->exi_dnext = i;
452 return;
453 }
454 }
455
456 /* shouldn't get here, no? */
457 EFS_DPRINTF(("efs_extent_iterator_init: bad direct extents\n"));
458 return;
459 }
460
461 /*
462 * Now do the large files with indirect extents...
463 *
464 * The first indirect extent's ex_offset field contains the
465 * number of indirect extents used.
466 */
467 efs_dextent_to_extent(&eip->ei_di.di_extents[0], &ex);
468
469 numinextents = ex.ex_offset;
470 if (numinextents < 1 || numinextents >= EFS_DIRECTEXTENTS) {
471 EFS_DPRINTF(("efs_extent_iterator_init: bad ex.ex_offset\n"));
472 return;
473 }
474
475 next = 0;
476 indir = -1;
477 numextents = 0;
478 for (i = 0; i < numinextents; i++) {
479 efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex);
480
481 err = efs_bread(emp, ex.ex_bn, NULL, &bp);
482 if (err) {
483 return;
484 }
485
486 efs_dextent_to_extent((struct efs_dextent *)bp->b_data, &ex2);
487 brelse(bp, 0);
488
489 offset = ex2.ex_offset * EFS_BB_SIZE;
490
491 if (offset > start_hint) {
492 indir = MAX(0, i - 1);
493 break;
494 }
495
496 /* number of extents prior to this indirect vector of extents */
497 next += numextents;
498
499 /* number of extents within this indirect vector of extents */
500 numextents = ex.ex_length * EFS_EXTENTS_PER_BB;
501 numextents = MIN(numextents, eip->ei_numextents - next);
502 }
503
504 /*
505 * We hit the end, so assume it's in the last extent.
506 */
507 if (indir == -1)
508 indir = numinextents - 1;
509
510 /*
511 * Binary search to find our desired direct extent.
512 */
513 lo = 0;
514 mid = 0;
515 hi = numextents - 1;
516 efs_dextent_to_extent(&eip->ei_di.di_extents[indir], &ex);
517 while (lo <= hi) {
518 int bboff;
519 int index;
520
521 mid = (lo + hi) / 2;
522
523 bboff = mid / EFS_EXTENTS_PER_BB;
524 index = mid % EFS_EXTENTS_PER_BB;
525
526 err = efs_bread(emp, ex.ex_bn + bboff, NULL, &bp);
527 if (err) {
528 EFS_DPRINTF(("efs_extent_iterator_init: bsrch read\n"));
529 return;
530 }
531
532 efs_dextent_to_extent((struct efs_dextent *)bp->b_data + index,
533 &ex2);
534 brelse(bp, 0);
535
536 offset = ex2.ex_offset * EFS_BB_SIZE;
537 length = ex2.ex_length * EFS_BB_SIZE;
538
539 if (start_hint >= offset && start_hint < (offset + length))
540 break;
541
542 if (start_hint < offset)
543 hi = mid - 1;
544 else
545 lo = mid + 1;
546 }
547
548 /*
549 * This is bad. Either the hint is bogus (which shouldn't
550 * happen) or the extent list must be screwed up. We
551 * have to abort.
552 */
553 if (lo > hi) {
554 EFS_DPRINTF(("efs_extent_iterator_init: bsearch "
555 "failed to find extent\n"));
556 return;
557 }
558
559 exi->exi_next = next + mid;
560 exi->exi_dnext = indir;
561 exi->exi_innext = mid;
562 }
563
564 /*
565 * Return the next EFS extent.
566 *
567 * Returns 0 if another extent was iterated, -1 if we've exhausted all
568 * extents, or an error number. If 'exi' is non-NULL, the next extent is
569 * written to it (should it exist).
570 */
571 int
572 efs_extent_iterator_next(struct efs_extent_iterator *exi,
573 struct efs_extent *exp)
574 {
575 struct efs_extent ex;
576 struct efs_dextent *dexp;
577 struct efs_inode *eip = exi->exi_eip;
578 struct buf *bp;
579 int err, bboff, index;
580
581 if (exi->exi_next++ >= eip->ei_numextents)
582 return (-1);
583
584 /* direct or indirect extents? */
585 if (eip->ei_numextents <= EFS_DIRECTEXTENTS) {
586 if (exp != NULL) {
587 dexp = &eip->ei_di.di_extents[exi->exi_dnext++];
588 efs_dextent_to_extent(dexp, exp);
589 }
590 } else {
591 efs_dextent_to_extent(
592 &eip->ei_di.di_extents[exi->exi_dnext], &ex);
593
594 bboff = exi->exi_innext / EFS_EXTENTS_PER_BB;
595 index = exi->exi_innext % EFS_EXTENTS_PER_BB;
596
597 err = efs_bread(VFSTOEFS(eip->ei_vp->v_mount),
598 ex.ex_bn + bboff, NULL, &bp);
599 if (err) {
600 EFS_DPRINTF(("efs_extent_iterator_next: "
601 "efs_bread failed: %d\n", err));
602 return (err);
603 }
604
605 if (exp != NULL) {
606 dexp = (struct efs_dextent *)bp->b_data + index;
607 efs_dextent_to_extent(dexp, exp);
608 }
609 brelse(bp, 0);
610
611 bboff = exi->exi_innext++ / EFS_EXTENTS_PER_BB;
612 if (bboff >= ex.ex_length) {
613 exi->exi_innext = 0;
614 exi->exi_dnext++;
615 }
616 }
617
618 return (0);
619 }
620