Home | History | Annotate | Line # | Download | only in efs
efs_subr.c revision 1.1
      1 /*	$NetBSD: efs_subr.c,v 1.1 2007/06/29 23:30:29 rumble Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2006 Stephen M. Rumble <rumble (at) ephemeral.org>
      5  *
      6  * Permission to use, copy, modify, and distribute this software for any
      7  * purpose with or without fee is hereby granted, provided that the above
      8  * copyright notice and this permission notice appear in all copies.
      9  *
     10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     17  */
     18 
     19 #include <sys/cdefs.h>
     20 __KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.1 2007/06/29 23:30:29 rumble Exp $");
     21 
     22 #include <sys/param.h>
     23 #include <sys/kauth.h>
     24 #include <sys/lwp.h>
     25 #include <sys/proc.h>
     26 #include <sys/buf.h>
     27 #include <sys/mount.h>
     28 #include <sys/vnode.h>
     29 #include <sys/namei.h>
     30 #include <sys/stat.h>
     31 #include <sys/malloc.h>
     32 
     33 #include <miscfs/genfs/genfs_node.h>
     34 
     35 #include <fs/efs/efs.h>
     36 #include <fs/efs/efs_sb.h>
     37 #include <fs/efs/efs_dir.h>
     38 #include <fs/efs/efs_genfs.h>
     39 #include <fs/efs/efs_mount.h>
     40 #include <fs/efs/efs_extent.h>
     41 #include <fs/efs/efs_dinode.h>
     42 #include <fs/efs/efs_inode.h>
     43 #include <fs/efs/efs_subr.h>
     44 
     45 MALLOC_DECLARE(M_EFSTMP);
     46 
     47 struct pool efs_inode_pool;
     48 
     49 /*
     50  * Calculate a checksum for the provided superblock in __host byte order__.
     51  *
     52  * At some point SGI changed the checksum algorithm slightly, which can be
     53  * enabled with the 'new' flag.
     54  *
     55  * Presumably this change occured on or before 24 Oct 1988 (around IRIX 3.1),
     56  * so we're pretty unlikely to ever actually see an old checksum. Further, it
     57  * means that EFS_NEWMAGIC filesystems (IRIX >= 3.3) must match the new
     58  * checksum whereas EFS_MAGIC filesystems could potentially use either
     59  * algorithm.
     60  *
     61  * See comp.sys.sgi <1991Aug9.050838.16876 (at) odin.corp.sgi.com>
     62  */
     63 int32_t
     64 efs_sb_checksum(struct efs_sb *esb, int new)
     65 {
     66 	int i;
     67 	int32_t cksum;
     68 	int16_t *sbarray = (int16_t *)esb;
     69 
     70 	KASSERT((EFS_SB_CHECKSUM_SIZE % 2) == 0);
     71 
     72 	for (i = cksum = 0; i < (EFS_SB_CHECKSUM_SIZE / 2); i++) {
     73 		cksum ^= be16toh(sbarray[i]);
     74 		cksum  = (cksum << 1) | (new && cksum < 0);
     75 	}
     76 
     77 	return (cksum);
     78 }
     79 
     80 /*
     81  * Determine if the superblock is valid.
     82  *
     83  * Returns 0 if valid, else invalid. If invalid, 'why' is set to an
     84  * explanation.
     85  */
     86 int
     87 efs_sb_validate(struct efs_sb *esb, const char **why)
     88 {
     89 	uint32_t ocksum, ncksum;
     90 
     91 	*why = NULL;
     92 
     93 	if (be32toh(esb->sb_magic) != EFS_SB_MAGIC &&
     94 	    be32toh(esb->sb_magic != EFS_SB_NEWMAGIC)) {
     95 		*why = "sb_magic invalid";
     96 		return (1);
     97 	}
     98 
     99 	ocksum = htobe32(efs_sb_checksum(esb, 0));
    100 	ncksum = htobe32(efs_sb_checksum(esb, 1));
    101 	if (esb->sb_checksum != ocksum && esb->sb_checksum != ncksum) {
    102 		*why = "sb_checksum invalid";
    103 		return (1);
    104 	}
    105 
    106 	if (be32toh(esb->sb_size) > EFS_SIZE_MAX) {
    107 		*why = "sb_size > EFS_SIZE_MAX";
    108 		return (1);
    109 	}
    110 
    111 	if (be32toh(esb->sb_firstcg) <= EFS_BB_BITMAP) {
    112 		*why = "sb_firstcg <= EFS_BB_BITMAP";
    113 		return (1);
    114 	}
    115 
    116 	/* XXX - add better sb consistency checks here */
    117 	if (esb->sb_cgfsize == 0 ||
    118 	    esb->sb_cgisize == 0 ||
    119 	    esb->sb_ncg == 0 ||
    120 	    esb->sb_bmsize == 0) {
    121 		*why = "something bad happened";
    122 		return (1);
    123 	}
    124 
    125 	return (0);
    126 }
    127 
    128 /*
    129  * Determine the basic block offset and inode index within that block, given
    130  * the inode 'ino' and filesystem parameters _in host byte order_. The inode
    131  * will live at byte address 'bboff' * EFS_BB_SIZE + 'index' * EFS_DINODE_SIZE.
    132  */
    133 void
    134 efs_locate_inode(ino_t ino, struct efs_sb *sbp, uint32_t *bboff, int *index)
    135 {
    136 	uint32_t cgfsize, firstcg;
    137 	uint16_t cgisize;
    138 
    139 	cgisize = be16toh(sbp->sb_cgisize);
    140 	cgfsize = be32toh(sbp->sb_cgfsize);
    141 	firstcg = be32toh(sbp->sb_firstcg),
    142 
    143 	*bboff = firstcg + ((ino / (cgisize * EFS_DINODES_PER_BB)) * cgfsize) +
    144 	    ((ino % (cgisize * EFS_DINODES_PER_BB)) / EFS_DINODES_PER_BB);
    145 	*index = ino & (EFS_DINODES_PER_BB - 1);
    146 }
    147 
    148 /*
    149  * Read in an inode from disk.
    150  *
    151  * We actually take in four inodes at a time. Hopefully these will stick
    152  * around in the buffer cache and get used without going to disk.
    153  *
    154  * Returns 0 on success.
    155  */
    156 int
    157 efs_read_inode(struct efs_mount *emp, ino_t ino, struct lwp *l,
    158     struct efs_dinode *di)
    159 {
    160 	struct efs_sb *sbp;
    161 	struct buf *bp;
    162 	int index, err;
    163 	uint32_t bboff;
    164 
    165 	sbp = &emp->em_sb;
    166 	efs_locate_inode(ino, sbp, &bboff, &index);
    167 
    168 	err = efs_bread(emp, bboff, EFS_BY2BB(EFS_DINODE_SIZE), l, &bp);
    169 	if (err) {
    170 		brelse(bp);
    171 		return (err);
    172 	}
    173 	memcpy(di, ((struct efs_dinode *)bp->b_data) + index, sizeof(*di));
    174 	brelse(bp);
    175 
    176 	return (0);
    177 }
    178 
    179 /*
    180  * Perform a read from our device handling the potential DEV_BSIZE
    181  * messiness (although as of 19.2.2006, all ports appear to use 512) as
    182  * we as EFS block sizing.
    183  *
    184  * bboff: basic block offset
    185  * nbb: number of basic blocks to be read
    186  *
    187  * Returns 0 on success.
    188  */
    189 int
    190 efs_bread(struct efs_mount *emp, uint32_t bboff, int nbb, struct lwp *l,
    191     struct buf **bp)
    192 {
    193 	KASSERT(nbb > 0);
    194 	KASSERT(bboff < EFS_SIZE_MAX);
    195 
    196 	return (bread(emp->em_devvp, (daddr_t)bboff * (EFS_BB_SIZE / DEV_BSIZE),
    197 	    nbb * EFS_BB_SIZE, (l == NULL) ? NOCRED : l->l_cred, bp));
    198 }
    199 
    200 /*
    201  * Synchronise the in-core, host ordered and typed inode fields with their
    202  * corresponding on-disk, EFS ordered and typed copies.
    203  *
    204  * This is the inverse of efs_dinode_sync_inode(), and should be called when
    205  * an inode is loaded from disk.
    206  */
    207 void
    208 efs_sync_dinode_to_inode(struct efs_inode *ei)
    209 {
    210 
    211 	ei->ei_mode		= be16toh(ei->ei_di.di_mode);	/*same as nbsd*/
    212 	ei->ei_nlink		= be16toh(ei->ei_di.di_nlink);
    213 	ei->ei_uid		= be16toh(ei->ei_di.di_uid);
    214 	ei->ei_gid		= be16toh(ei->ei_di.di_gid);
    215 	ei->ei_size		= be32toh(ei->ei_di.di_size);
    216 	ei->ei_atime		= be32toh(ei->ei_di.di_atime);
    217 	ei->ei_mtime		= be32toh(ei->ei_di.di_mtime);
    218 	ei->ei_ctime		= be32toh(ei->ei_di.di_ctime);
    219 	ei->ei_gen		= be32toh(ei->ei_di.di_gen);
    220 	ei->ei_numextents 	= be16toh(ei->ei_di.di_numextents);
    221 	ei->ei_version		= ei->ei_di.di_version;
    222 }
    223 
    224 /*
    225  * Synchronise the on-disk, EFS ordered and typed inode fields with their
    226  * corresponding in-core, host ordered and typed copies.
    227  *
    228  * This is the inverse of efs_inode_sync_dinode(), and should be called before
    229  * an inode is flushed to disk.
    230  */
    231 void
    232 efs_sync_inode_to_dinode(struct efs_inode *ei)
    233 {
    234 
    235 	panic("readonly -- no need to call me");
    236 }
    237 
    238 #ifdef DIAGNOSTIC
    239 /*
    240  * Ensure that the in-core inode's host cached fields match its on-disk copy.
    241  *
    242  * Returns 0 if they match.
    243  */
    244 static int
    245 efs_is_inode_synced(struct efs_inode *ei)
    246 {
    247 	int s;
    248 
    249 	s = 0;
    250 	/* XXX -- see above remarks about assumption */
    251 	s += (ei->ei_mode	!= be16toh(ei->ei_di.di_mode));
    252 	s += (ei->ei_nlink	!= be16toh(ei->ei_di.di_nlink));
    253 	s += (ei->ei_uid	!= be16toh(ei->ei_di.di_uid));
    254 	s += (ei->ei_gid	!= be16toh(ei->ei_di.di_gid));
    255 	s += (ei->ei_size	!= be32toh(ei->ei_di.di_size));
    256 	s += (ei->ei_atime	!= be32toh(ei->ei_di.di_atime));
    257 	s += (ei->ei_mtime	!= be32toh(ei->ei_di.di_mtime));
    258 	s += (ei->ei_ctime	!= be32toh(ei->ei_di.di_ctime));
    259 	s += (ei->ei_gen	!= be32toh(ei->ei_di.di_gen));
    260 	s += (ei->ei_numextents	!= be16toh(ei->ei_di.di_numextents));
    261 	s += (ei->ei_version	!= ei->ei_di.di_version);
    262 
    263 	return (s);
    264 }
    265 #endif
    266 
    267 /*
    268  * Given an efs_dirblk structure and a componentname to search for, return the
    269  * corresponding inode if it is found.
    270  *
    271  * Returns 0 on success.
    272  */
    273 static int
    274 efs_dirblk_lookup(struct efs_dirblk *dir, struct componentname *cn,
    275     ino_t *inode)
    276 {
    277 	struct efs_dirent *de;
    278 	int i, slot, offset;
    279 
    280 	KASSERT(cn->cn_namelen <= EFS_DIRENT_NAMELEN_MAX);
    281 
    282 	slot = offset = 0;
    283 
    284 	for (i = 0; i < dir->db_slots; i++) {
    285 		offset = EFS_DIRENT_OFF_EXPND(dir->db_space[i]);
    286 
    287 		if (offset == EFS_DIRBLK_SLOT_FREE)
    288 			continue;
    289 
    290 		de = (struct efs_dirent *)((char *)dir + offset);
    291 		if (de->de_namelen == cn->cn_namelen &&
    292 		   (strncmp(cn->cn_nameptr, de->de_name, cn->cn_namelen) == 0)){
    293 			slot = i;
    294 			break;
    295 		}
    296 	}
    297 	if (i == dir->db_slots)
    298 		return (ENOENT);
    299 
    300 	KASSERT(slot < offset && offset < EFS_DIRBLK_SPACE_SIZE);
    301 	de = (struct efs_dirent *)((char *)dir + offset);
    302 	*inode = be32toh(de->de_inumber);
    303 
    304 	return (0);
    305 }
    306 
    307 /*
    308  * Given an extent descriptor that represents a directory, look up
    309  * componentname within its efs_dirblk's. If it is found, return the
    310  * corresponding inode in 'ino'.
    311  *
    312  * Returns 0 on success.
    313  */
    314 static int
    315 efs_extent_lookup(struct efs_mount *emp, struct efs_extent *ex,
    316     struct componentname *cn, ino_t *ino)
    317 {
    318 	struct efs_dirblk *db;
    319 	struct buf *bp;
    320 	int i, err;
    321 
    322 	/*
    323 	 * Read in the entire extent, evaluating all of the dirblks until we
    324 	 * find our entry. If we don't, return ENOENT.
    325 	 */
    326 	err = efs_bread(emp, ex->ex_bn, ex->ex_length, NULL, &bp);
    327 	if (err) {
    328 		printf("efs: warning: invalid extent descriptor\n");
    329 		brelse(bp);
    330 		return (err);
    331 	}
    332 
    333 	for (i = 0; i < ex->ex_length; i++) {
    334 		db = ((struct efs_dirblk *)bp->b_data) + i;
    335 		if (efs_dirblk_lookup(db, cn, ino) == 0) {
    336 			brelse(bp);
    337 			return (0);
    338 		}
    339 	}
    340 
    341 	brelse(bp);
    342 	return (ENOENT);
    343 }
    344 
    345 /*
    346  * Given the provided in-core inode, look up the pathname requested. If
    347  * we find it, 'ino' reflects its corresponding on-disk inode number.
    348  *
    349  * Returns 0 on success.
    350  */
    351 int
    352 efs_inode_lookup(struct efs_mount *emp, struct efs_inode *ei,
    353     struct componentname *cn, ino_t *ino)
    354 {
    355 	struct efs_extent ex;
    356 	struct efs_extent_iterator exi;
    357 	int ret;
    358 
    359 	KASSERT(VOP_ISLOCKED(ei->ei_vp));
    360 	KASSERT(efs_is_inode_synced(ei) == 0);
    361 	KASSERT((ei->ei_mode & S_IFMT) == S_IFDIR);
    362 
    363 	efs_extent_iterator_init(&exi, ei);
    364 	while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) {
    365 		if (efs_extent_lookup(emp, &ex, cn, ino) == 0) {
    366 			efs_extent_iterator_free(&exi);
    367 			return (0);
    368 		}
    369 	}
    370 	efs_extent_iterator_free(&exi);
    371 
    372 	return ((ret == -1) ? ENOENT : ret);
    373 }
    374 
    375 /*
    376  * Convert on-disk extent structure to in-core format.
    377  */
    378 void
    379 efs_dextent_to_extent(struct efs_dextent *dex, struct efs_extent *ex)
    380 {
    381 
    382 	KASSERT(dex != NULL && ex != NULL);
    383 
    384 	ex->ex_magic	= dex->ex_bytes[0];
    385 	ex->ex_bn	= be32toh(dex->ex_words[0]) & 0x00ffffff;
    386 	ex->ex_length	= dex->ex_bytes[4];
    387 	ex->ex_offset	= be32toh(dex->ex_words[1]) & 0x00ffffff;
    388 }
    389 
    390 /*
    391  * Convert in-core extent format to on-disk structure.
    392  */
    393 void
    394 efs_extent_to_dextent(struct efs_extent *ex, struct efs_dextent *dex)
    395 {
    396 
    397 	KASSERT(ex != NULL && dex != NULL);
    398 	KASSERT(ex->ex_magic == EFS_EXTENT_MAGIC);
    399 	KASSERT((ex->ex_bn & ~EFS_EXTENT_BN_MASK) == 0);
    400 	KASSERT((ex->ex_offset & ~EFS_EXTENT_OFFSET_MASK) == 0);
    401 
    402 	dex->ex_words[0] = htobe32(ex->ex_bn);
    403 	dex->ex_bytes[0] = ex->ex_magic;
    404 	dex->ex_words[1] = htobe32(ex->ex_offset);
    405 	dex->ex_bytes[4] = ex->ex_length;
    406 }
    407 
    408 /*
    409  * Initialise an extent iterator.
    410  */
    411 void
    412 efs_extent_iterator_init(struct efs_extent_iterator *exi, struct efs_inode *eip)
    413 {
    414 
    415 	exi->exi_eip		= eip;
    416 	exi->exi_next		= 0;
    417 	exi->exi_dnext		= 0;
    418 	exi->exi_innext		= 0;
    419 	exi->exi_incache	= NULL;
    420 	exi->exi_nincache	= 0;
    421 }
    422 
    423 /*
    424  * Return the next EFS extent.
    425  *
    426  * Returns 0 if another extent was iterated, -1 if we've exhausted all
    427  * extents, or an error number. If 'exi' is non-NULL, the next extent is
    428  * written to it (should it exist).
    429  */
    430 int
    431 efs_extent_iterator_next(struct efs_extent_iterator *exi,
    432     struct efs_extent *exp)
    433 {
    434 	struct efs_inode *eip = exi->exi_eip;
    435 
    436 	if (exi->exi_next++ >= eip->ei_numextents)
    437 		return (-1);
    438 
    439 	/* direct or indirect extents? */
    440 	if (eip->ei_numextents <= EFS_DIRECTEXTENTS) {
    441 		if (exp != NULL) {
    442 			efs_dextent_to_extent(
    443 			    &eip->ei_di.di_extents[exi->exi_dnext++], exp);
    444 		}
    445 	} else {
    446 		/*
    447 		 * Cache a full indirect extent worth of extent descriptors.
    448 		 * This is maximally 124KB (248 * 512).
    449 		 */
    450 		if (exi->exi_incache == NULL) {
    451 			struct efs_extent ex;
    452 			struct buf *bp;
    453 			int err;
    454 
    455 			efs_dextent_to_extent(
    456 			    &eip->ei_di.di_extents[exi->exi_dnext], &ex);
    457 
    458 			err = efs_bread(VFSTOEFS(eip->ei_vp->v_mount),
    459 			    ex.ex_bn, ex.ex_length, NULL, &bp);
    460 			if (err) {
    461 				EFS_DPRINTF(("efs_extent_iterator_next: "
    462 				    "efs_bread failed: %d\n", err));
    463 				brelse(bp);
    464 				return (err);
    465 			}
    466 
    467 			exi->exi_incache = malloc(ex.ex_length * EFS_BB_SIZE,
    468 			    M_EFSTMP, M_WAITOK);
    469 			exi->exi_nincache = ex.ex_length * EFS_BB_SIZE /
    470 			    sizeof(struct efs_dextent);
    471 			memcpy(exi->exi_incache, bp->b_data,
    472 			    ex.ex_length * EFS_BB_SIZE);
    473 			brelse(bp);
    474 		}
    475 
    476 		if (exp != NULL) {
    477 			efs_dextent_to_extent(
    478 			    &exi->exi_incache[exi->exi_innext++], exp);
    479 		}
    480 
    481 		/* if this is the last one, ditch the cache */
    482 		if (exi->exi_innext >= exi->exi_nincache) {
    483 			exi->exi_innext = 0;
    484 			exi->exi_nincache = 0;
    485 			free(exi->exi_incache, M_EFSTMP);
    486 			exi->exi_incache = NULL;
    487 			exi->exi_dnext++;
    488 		}
    489 	}
    490 
    491 	return (0);
    492 }
    493 
    494 /*
    495  * Clean up the extent iterator.
    496  */
    497 void
    498 efs_extent_iterator_free(struct efs_extent_iterator *exi)
    499 {
    500 
    501 	if (exi->exi_incache != NULL)
    502 		free(exi->exi_incache, M_EFSTMP);
    503 	efs_extent_iterator_init(exi, NULL);
    504 }
    505