Home | History | Annotate | Line # | Download | only in nilfs
nilfs_subr.c revision 1.5
      1 /* $NetBSD: nilfs_subr.c,v 1.5 2010/06/24 12:15:46 reinoud Exp $ */
      2 
      3 /*
      4  * Copyright (c) 2008, 2009 Reinoud Zandijk
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  *
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #ifndef lint
     31 __KERNEL_RCSID(0, "$NetBSD: nilfs_subr.c,v 1.5 2010/06/24 12:15:46 reinoud Exp $");
     32 #endif /* not lint */
     33 
     34 #include <sys/param.h>
     35 #include <sys/systm.h>
     36 #include <sys/namei.h>
     37 #include <sys/resourcevar.h>	/* defines plimit structure in proc struct */
     38 #include <sys/kernel.h>
     39 #include <sys/file.h>		/* define FWRITE ... */
     40 #include <sys/stat.h>
     41 #include <sys/buf.h>
     42 #include <sys/proc.h>
     43 #include <sys/mount.h>
     44 #include <sys/vnode.h>
     45 #include <sys/signalvar.h>
     46 #include <sys/malloc.h>
     47 #include <sys/dirent.h>
     48 #include <sys/lockf.h>
     49 #include <sys/kauth.h>
     50 #include <sys/dirhash.h>
     51 
     52 #include <miscfs/genfs/genfs.h>
     53 #include <uvm/uvm_extern.h>
     54 
     55 #include <fs/nilfs/nilfs_mount.h>
     56 #include "nilfs.h"
     57 #include "nilfs_subr.h"
     58 #include "nilfs_bswap.h"
     59 
     60 
     61 #define VTOI(vnode) ((struct nilfs_node *) (vnode)->v_data)
     62 
     63 /* forwards */
     64 static int nilfs_btree_lookup(struct nilfs_node *node, uint64_t lblocknr,
     65 	uint64_t *vblocknr);
     66 
     67 /* basic calculators */
     68 uint64_t nilfs_get_segnum_of_block(struct nilfs_device *nilfsdev,
     69 	uint64_t blocknr)
     70 {
     71 	return blocknr / nilfs_rw32(nilfsdev->super.s_blocks_per_segment);
     72 }
     73 
     74 
     75 void
     76 nilfs_get_segment_range(struct nilfs_device *nilfsdev, uint64_t segnum,
     77         uint64_t *seg_start, uint64_t *seg_end)
     78 {
     79         uint64_t blks_per_seg;
     80 
     81         blks_per_seg = nilfs_rw64(nilfsdev->super.s_blocks_per_segment);
     82         *seg_start = blks_per_seg * segnum;
     83         *seg_end   = *seg_start + blks_per_seg -1;
     84         if (segnum == 0)
     85                 *seg_start = nilfs_rw64(nilfsdev->super.s_first_data_block);
     86 }
     87 
     88 
     89 void nilfs_calc_mdt_consts(struct nilfs_device *nilfsdev,
     90 	struct nilfs_mdt *mdt, int entry_size)
     91 {
     92 	uint32_t blocksize = nilfsdev->blocksize;
     93 
     94 	mdt->entries_per_group = blocksize * 8;	   /* bits in sector */
     95 	mdt->entries_per_block = blocksize / entry_size;
     96 
     97 	mdt->blocks_per_group  =
     98 		(mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1;
     99 	mdt->groups_per_desc_block =
    100 		blocksize / sizeof(struct nilfs_block_group_desc);
    101 	mdt->blocks_per_desc_block =
    102 		mdt->groups_per_desc_block * mdt->blocks_per_group + 1;
    103 }
    104 
    105 
    106 /* from NetBSD's src/sys/net/if_ethersubr.c */
    107 uint32_t
    108 crc32_le(uint32_t crc, const uint8_t *buf, size_t len)
    109 {
    110         static const uint32_t crctab[] = {
    111                 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
    112                 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
    113                 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
    114                 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
    115         };
    116         size_t i;
    117 
    118         for (i = 0; i < len; i++) {
    119                 crc ^= buf[i];
    120                 crc = (crc >> 4) ^ crctab[crc & 0xf];
    121                 crc = (crc >> 4) ^ crctab[crc & 0xf];
    122         }
    123 
    124         return (crc);
    125 }
    126 
    127 
    128 static int
    129 nilfs_calchash(uint64_t ino)
    130 {
    131 	return (int) ino;
    132 }
    133 
    134 
    135 /* dev reading */
    136 static int
    137 nilfs_dev_bread(struct nilfs_device *nilfsdev, uint64_t blocknr,
    138 	struct kauth_cred *cred, int flags, struct buf **bpp)
    139 {
    140 	int blk2dev = nilfsdev->blocksize / DEV_BSIZE;
    141 
    142 	return bread(nilfsdev->devvp, blocknr * blk2dev, nilfsdev->blocksize,
    143 		NOCRED, 0, bpp);
    144 }
    145 
    146 
    147 /* read on a node */
    148 int
    149 nilfs_bread(struct nilfs_node *node, uint64_t blocknr,
    150 	struct kauth_cred *cred, int flags, struct buf **bpp)
    151 {
    152 	uint64_t vblocknr;
    153 	int error;
    154 
    155 	error = nilfs_btree_lookup(node, blocknr, &vblocknr);
    156 	if (error)
    157 		return error;
    158 	return bread(node->vnode, vblocknr, node->nilfsdev->blocksize,
    159 		cred, flags, bpp);
    160 }
    161 
    162 
    163 /* segment-log reading */
    164 int
    165 nilfs_get_segment_log(struct nilfs_device *nilfsdev, uint64_t *blocknr,
    166 	uint64_t *offset, struct buf **bpp, int len, void *blob)
    167 {
    168 	int blocksize = nilfsdev->blocksize;
    169 	int error;
    170 
    171 	KASSERT(len <= blocksize);
    172 
    173 	if (*offset + len > blocksize) {
    174 		*blocknr = *blocknr + 1;
    175 		*offset = 0;
    176 	}
    177 	if (*offset == 0) {
    178 		if (*bpp)
    179 			brelse(*bpp, BC_AGE);
    180 		/* read in block */
    181 		error = nilfs_dev_bread(nilfsdev, *blocknr, NOCRED, 0, bpp);
    182 		if (error)
    183 			return error;
    184 	}
    185 	memcpy(blob, ((uint8_t *) (*bpp)->b_data) + *offset, len);
    186 	*offset += len;
    187 
    188 	return 0;
    189 }
    190 
    191 /* -------------------------------------------------------------------------- */
    192 
    193 /* btree operations */
    194 
    195 static int
    196 nilfs_btree_lookup_level(struct nilfs_node *node, uint64_t lblocknr,
    197 		uint64_t btree_vblknr, int level, uint64_t *vblocknr)
    198 {
    199 	struct nilfs_device *nilfsdev = node->nilfsdev;
    200 	struct nilfs_btree_node *btree_hdr;
    201 	struct buf *bp;
    202 	uint64_t btree_blknr;
    203 	uint64_t *dkeys, *dptrs, child_btree_blk;
    204 	uint8_t  *pos;
    205 	int i, error, selected;
    206 
    207 	DPRINTF(TRANSLATE, ("nilfs_btree_lookup_level ino %"PRIu64", "
    208 		"lblocknr %"PRIu64", btree_vblknr %"PRIu64", level %d\n",
    209 		node->ino, lblocknr, btree_vblknr, level));
    210 
    211 	/* translate btree_vblknr */
    212 	error = nilfs_nvtop(node, 1, &btree_vblknr, &btree_blknr);
    213 	if (error)
    214 		return error;
    215 
    216 	/* get our block */
    217 	error = nilfs_dev_bread(nilfsdev, btree_blknr, NOCRED, 0, &bp);
    218 	if (error) {
    219 		brelse(bp, BC_AGE);
    220 		return error;
    221 	}
    222 
    223 	btree_hdr = (struct nilfs_btree_node *) bp->b_data;
    224 	pos =   (uint8_t *) bp->b_data +
    225 		sizeof(struct nilfs_btree_node) +
    226 		NILFS_BTREE_NODE_EXTRA_PAD_SIZE;
    227 	dkeys = (uint64_t *) pos;
    228 	dptrs = dkeys + NILFS_BTREE_NODE_NCHILDREN_MAX(nilfsdev->blocksize);
    229 
    230 	assert((btree_hdr->bn_flags & NILFS_BTREE_NODE_ROOT) == 0);
    231 
    232 	/* select matching child XXX could use binary search */
    233 	selected = 0;
    234 	for (i = 0; i < nilfs_rw16(btree_hdr->bn_nchildren); i++) {
    235 		if (dkeys[i] > lblocknr)
    236 			break;
    237 		selected = i;
    238 	}
    239 
    240 	if (level == 1) {
    241 		/* if found it mapped */
    242 		if (dkeys[selected] == lblocknr)
    243 			*vblocknr = dptrs[selected];
    244 		brelse(bp, BC_AGE);
    245 		return 0;
    246 	}
    247 
    248 	/* lookup in selected child */
    249 	assert(dkeys[selected] <= lblocknr);
    250 	child_btree_blk = dptrs[selected];
    251 	brelse(bp, BC_AGE);
    252 
    253 	return nilfs_btree_lookup_level(node, lblocknr,
    254 			child_btree_blk, btree_hdr->bn_level-1, vblocknr);
    255 }
    256 
    257 
    258 /* internal function */
    259 static int
    260 nilfs_btree_lookup(struct nilfs_node *node, uint64_t lblocknr,
    261 		uint64_t *vblocknr)
    262 {
    263 	struct nilfs_inode  *inode    = &node->inode;
    264 	struct nilfs_btree_node  *btree_hdr;
    265 	uint64_t *dkeys, *dptrs, *dtrans;
    266 	int i, selected;
    267 	int error;
    268 
    269 	DPRINTF(TRANSLATE, ("nilfs_btree_lookup ino %"PRIu64", "
    270 		"lblocknr %"PRIu64"\n", node->ino, lblocknr));
    271 
    272 	btree_hdr  = (struct nilfs_btree_node *) &inode->i_bmap[0];
    273 	dkeys  = &inode->i_bmap[1];
    274 	dptrs  = dkeys + NILFS_BTREE_ROOT_NCHILDREN_MAX;
    275 	dtrans = &inode->i_bmap[1];
    276 
    277 	/* SMALL, direct lookup */
    278 	*vblocknr = 0;
    279 	if ((btree_hdr->bn_flags & NILFS_BMAP_LARGE) == 0) {
    280 		if (lblocknr < NILFS_DIRECT_NBLOCKS) {
    281 			*vblocknr = dtrans[lblocknr];
    282 			return 0;
    283 		}
    284 		/* not mapped XXX could be considered error here */
    285 		return 0;
    286 	}
    287 
    288 	/* LARGE, select matching child; XXX could use binary search */
    289 	dtrans = NULL;
    290 	error = 0;
    291 	selected = 0;
    292 	for (i = 0; i < nilfs_rw16(btree_hdr->bn_nchildren); i++) {
    293 		if (dkeys[i] > lblocknr)
    294 			break;
    295 		selected = i;
    296 	}
    297 
    298 	/* if selected key > lblocknr, its not mapped */
    299 	if (dkeys[selected] > lblocknr)
    300 		return 0;
    301 
    302 	/* overshooting? then not mapped */
    303 	if (selected == nilfs_rw16(btree_hdr->bn_nchildren))
    304 		return 0;
    305 
    306 	/* level should be > 1 or otherwise it should be a direct one */
    307 	assert(btree_hdr->bn_level > 1);
    308 
    309 	/* lookup in selected child */
    310 	assert(dkeys[selected] <= lblocknr);
    311 	error = nilfs_btree_lookup_level(node, lblocknr,
    312 			dptrs[selected], btree_hdr->bn_level-1, vblocknr);
    313 
    314 	return error;
    315 }
    316 
    317 
    318 /* node should be locked on entry to prevent btree changes (unlikely) */
    319 int
    320 nilfs_btree_nlookup(struct nilfs_node *node, uint64_t from, uint64_t blks,
    321 		uint64_t *l2vmap)
    322 {
    323 	uint64_t lblocknr, *vblocknr;
    324 	int i, error;
    325 
    326 	/* TODO / OPTI multiple translations in one go possible */
    327 	error = EINVAL;
    328 	for (i = 0; i < blks; i++) {
    329 		lblocknr  = from + i;
    330 		vblocknr  = l2vmap + i;
    331 		error = nilfs_btree_lookup(node, lblocknr, vblocknr);
    332 
    333 		DPRINTF(TRANSLATE, ("btree_nlookup ino %"PRIu64", "
    334 			"lblocknr %"PRIu64" -> %"PRIu64"\n",
    335 			node->ino, lblocknr, *vblocknr));
    336 		if (error)
    337 			break;
    338 	}
    339 
    340 	return error;
    341 }
    342 
    343 /* --------------------------------------------------------------------- */
    344 
    345 /* vtop operations */
    346 
    347 /* translate index to a file block number and an entry */
    348 static void
    349 nilfs_mdt_trans(struct nilfs_mdt *mdt, uint64_t index,
    350 	uint64_t *blocknr, uint32_t *entry_in_block)
    351 {
    352 	uint64_t blknr;
    353 	uint64_t group, group_offset, blocknr_in_group;
    354 	uint64_t desc_block, desc_offset;
    355 
    356 	/* calculate our offset in the file */
    357 	group             = index / mdt->entries_per_group;
    358 	group_offset      = index % mdt->entries_per_group;
    359 	desc_block        = group / mdt->groups_per_desc_block;
    360 	desc_offset       = group % mdt->groups_per_desc_block;
    361 	blocknr_in_group  = group_offset / mdt->entries_per_block;
    362 
    363 	/* to descgroup offset */
    364 	blknr = 1 + desc_block * mdt->blocks_per_desc_block;
    365 
    366 	/* to group offset */
    367 	blknr += desc_offset * mdt->blocks_per_group;
    368 
    369 	/* to actual file block */
    370 	blknr += 1 + blocknr_in_group;
    371 
    372 	*blocknr        = blknr;
    373 	*entry_in_block = group_offset % mdt->entries_per_block;
    374 }
    375 
    376 
    377 static int
    378 nilfs_vtop(struct nilfs_device *nilfsdev, uint64_t vblocknr, uint64_t *pblocknr)
    379 {
    380 	struct nilfs_dat_entry *entry;
    381 	struct buf *bp;
    382 	uint64_t  ldatblknr;
    383 	uint32_t  entry_in_block;
    384 	int error;
    385 
    386 	nilfs_mdt_trans(&nilfsdev->dat_mdt, vblocknr,
    387 		&ldatblknr, &entry_in_block);
    388 
    389 	error = nilfs_bread(nilfsdev->dat_node, ldatblknr, NOCRED, 0, &bp);
    390 	if (error) {
    391 		printf("vtop: can't read in DAT block %"PRIu64"!\n", ldatblknr);
    392 		brelse(bp, BC_AGE);
    393 		return error;
    394 	}
    395 
    396 	/* get our translation */
    397 	entry = ((struct nilfs_dat_entry *) bp->b_data) + entry_in_block;
    398 #if 0
    399 	printf("\tvblk %4"PRIu64" -> %"PRIu64" for "
    400 		"checkpoint %"PRIu64" to %"PRIu64"\n",
    401 		vblocknr,
    402 		nilfs_rw64(entry->de_blocknr),
    403 		nilfs_rw64(entry->de_start),
    404 		nilfs_rw64(entry->de_end));
    405 #endif
    406 
    407 	*pblocknr = nilfs_rw64(entry->de_blocknr);
    408 	brelse(bp, BC_AGE);
    409 
    410 	return 0;
    411 }
    412 
    413 
    414 int
    415 nilfs_nvtop(struct nilfs_node *node, uint64_t blks, uint64_t *l2vmap,
    416 		uint64_t *v2pmap)
    417 {
    418 	uint64_t vblocknr, *pblocknr;
    419 	int i, error;
    420 
    421 	/* the DAT inode is the only one not mapped virtual */
    422 	if (node->ino == NILFS_DAT_INO) {
    423 		memcpy(v2pmap, l2vmap, blks * sizeof(uint64_t));
    424 		return 0;
    425 	}
    426 
    427 	/* TODO / OPTI more translations in one go */
    428 	error = 0;
    429 	for (i = 0; i < blks; i++) {
    430 		vblocknr  = l2vmap[i];
    431 		pblocknr  = v2pmap + i;
    432 		*pblocknr = 0;
    433 
    434 		/* only translate valid vblocknrs */
    435 		if (vblocknr == 0)
    436 			continue;
    437 		error = nilfs_vtop(node->nilfsdev, vblocknr, pblocknr);
    438 		if (error)
    439 			break;
    440 	}
    441 
    442 	return error;
    443 }
    444 
    445 /* --------------------------------------------------------------------- */
    446 
    447 struct nilfs_recover_info {
    448 	uint64_t segnum;
    449 	uint64_t pseg;
    450 
    451 	struct nilfs_segment_summary segsum;
    452 	struct nilfs_super_root      super_root;
    453 	STAILQ_ENTRY(nilfs_recover_info) next;
    454 };
    455 
    456 
    457 /*
    458  * Helper functions of nilfs_mount() that actually mounts the disc.
    459  */
    460 static int
    461 nilfs_load_segsum(struct nilfs_device *nilfsdev,
    462 	struct nilfs_recover_info *ri)
    463 {
    464 	struct buf *bp;
    465 	uint64_t blocknr, offset;
    466 	uint32_t segsum_struct_size;
    467 	uint32_t magic;
    468 	int error;
    469 
    470 	segsum_struct_size = sizeof(struct nilfs_segment_summary);
    471 
    472 	/* read in segsum structure */
    473 	bp      = NULL;
    474 	blocknr = ri->pseg;
    475 	offset  = 0;
    476 	error = nilfs_get_segment_log(nilfsdev,
    477 			&blocknr, &offset, &bp,
    478 			segsum_struct_size, (void *) &ri->segsum);
    479 	if (error)
    480 		goto out;
    481 
    482 	/* sanity checks */
    483 	magic = nilfs_rw32(ri->segsum.ss_magic);
    484 	if (magic != NILFS_SEGSUM_MAGIC) {
    485 		DPRINTF(VOLUMES, ("nilfs: bad magic in pseg %"PRIu64"\n",
    486 			ri->pseg));
    487 		error = EINVAL;
    488 		goto out;
    489 	}
    490 
    491 	/* TODO check segment summary checksum */
    492 	/* TODO check data checksum */
    493 
    494 out:
    495 	if (bp)
    496 		brelse(bp, BC_AGE);
    497 
    498 	return error;
    499 }
    500 
    501 
    502 static int
    503 nilfs_load_super_root(struct nilfs_device *nilfsdev,
    504 	struct nilfs_recover_info *ri)
    505 {
    506 	struct nilfs_segment_summary *segsum = &ri->segsum;
    507 	struct nilfs_super_root *super_root;
    508 	struct buf *bp;
    509 	uint64_t blocknr, offset;
    510 	uint32_t segsum_size, size;
    511 	uint32_t nsumblk, nfileblk;
    512 	uint32_t super_root_crc, comp_crc;
    513 	int off, error;
    514 
    515 	/* process segment summary */
    516 	segsum_size = nilfs_rw32(segsum->ss_sumbytes);
    517 	nsumblk     = (segsum_size - 1) / nilfsdev->blocksize + 1;
    518 	nfileblk    = nilfs_rw32(segsum->ss_nblocks) - nsumblk;
    519 
    520 	/* check if there is a superroot */
    521 	if ((nilfs_rw16(segsum->ss_flags) & NILFS_SS_SR) == 0) {
    522 		DPRINTF(VOLUMES, ("nilfs: no super root in pseg %"PRIu64"\n",
    523 			ri->pseg));
    524 		return ENOENT;
    525 	}
    526 
    527 	/* get our super root, located at the end of the pseg */
    528 	blocknr = ri->pseg + nsumblk + nfileblk - 1;
    529 	offset = 0;
    530 	size = sizeof(struct nilfs_super_root);
    531 	bp = NULL;
    532 	error = nilfs_get_segment_log(nilfsdev,
    533 			&blocknr, &offset, &bp,
    534 			size, (void *) &nilfsdev->super_root);
    535 	if (bp)
    536 		brelse(bp, BC_AGE);
    537 	if (error) {
    538 		printf("read in of superroot failed\n");
    539 		return EIO;
    540 	}
    541 
    542 	/* check super root crc */
    543 	super_root = &nilfsdev->super_root;
    544 	super_root_crc = nilfs_rw32(super_root->sr_sum);
    545 	off = sizeof(super_root->sr_sum);
    546 	comp_crc = crc32_le(nilfs_rw32(nilfsdev->super.s_crc_seed),
    547 		(uint8_t *) super_root + off,
    548 		NILFS_SR_BYTES - off);
    549 	if (super_root_crc != comp_crc) {
    550 		DPRINTF(VOLUMES, ("    invalid superroot, likely from old format\n"));
    551 		return EINVAL;
    552 	}
    553 
    554 	DPRINTF(VOLUMES, ("    got valid superroot\n"));
    555 
    556 	return 0;
    557 }
    558 
    559 /*
    560  * Search for the last super root recorded.
    561  */
    562 void
    563 nilfs_search_super_root(struct nilfs_device *nilfsdev)
    564 {
    565 	struct nilfs_super_block *super;
    566 	struct nilfs_segment_summary *segsum;
    567 	struct nilfs_recover_info *ri, *ori, *i_ri;
    568 	STAILQ_HEAD(,nilfs_recover_info) ri_list;
    569 	uint64_t seg_start, seg_end, cno;
    570 	uint32_t segsum_size;
    571 	uint32_t nsumblk, nfileblk;
    572 	int error;
    573 
    574 	STAILQ_INIT(&ri_list);
    575 
    576 	/* search for last super root */
    577 	ri = malloc(sizeof(struct nilfs_recover_info), M_NILFSTEMP, M_WAITOK);
    578 	memset(ri, 0, sizeof(struct nilfs_recover_info));
    579 
    580 	/* if enabled, start from the specified position */
    581 	if (0) {
    582 		/* start from set start */
    583 		nilfsdev->super.s_last_pseg = nilfsdev->super.s_first_data_block;
    584 		nilfsdev->super.s_last_cno  = nilfs_rw64(1);
    585 	}
    586 
    587 	ri->pseg   = nilfs_rw64(nilfsdev->super.s_last_pseg); /* blknr */
    588 	ri->segnum = nilfs_get_segnum_of_block(nilfsdev, ri->pseg);
    589 
    590 	error = 0;
    591 	cno = nilfs_rw64(nilfsdev->super.s_last_cno);
    592 	DPRINTF(VOLUMES, ("nilfs: seach_super_root start in pseg %"PRIu64"\n",
    593 			ri->pseg));
    594 	for (;;) {
    595 		DPRINTF(VOLUMES, (" at pseg %"PRIu64"\n", ri->pseg));
    596 		error = nilfs_load_segsum(nilfsdev, ri);
    597 		if (error)
    598 			break;
    599 
    600 		segsum = &ri->segsum;
    601 
    602 		/* try to load super root */
    603 		if (nilfs_rw16(segsum->ss_flags) & NILFS_SS_SR) {
    604 			DPRINTF(VOLUMES, (" try super root\n"));
    605 			error = nilfs_load_super_root(nilfsdev, ri);
    606 			if (error)
    607 				break;	/* confused */
    608 			/* wipe current list of ri */
    609 			while (!STAILQ_EMPTY(&ri_list)) {
    610 				i_ri = STAILQ_FIRST(&ri_list);
    611 				STAILQ_REMOVE_HEAD(&ri_list, next);
    612 				free(i_ri, M_NILFSTEMP);
    613 			}
    614 			super = &nilfsdev->super;
    615 
    616 			super->s_last_pseg = nilfs_rw64(ri->pseg);
    617 			super->s_last_cno  = cno++;
    618 			super->s_last_seq  = segsum->ss_seq;
    619 			super->s_state     = nilfs_rw16(NILFS_VALID_FS);
    620 		} else {
    621 			STAILQ_INSERT_TAIL(&ri_list, ri, next);
    622 			ori = ri;
    623 			ri = malloc(sizeof(struct nilfs_recover_info),
    624 				M_NILFSTEMP, M_WAITOK);
    625 			memset(ri, 0, sizeof(struct nilfs_recover_info));
    626 			ri->segnum = ori->segnum;
    627 			ri->pseg   = ori->pseg;
    628 			/* segsum keeps pointing to the `old' ri */
    629 		}
    630 
    631 		/* continue to the next pseg */
    632 		segsum_size = nilfs_rw32(segsum->ss_sumbytes);
    633 		nsumblk     = (segsum_size - 1) / nilfsdev->blocksize + 1;
    634 		nfileblk    = nilfs_rw32(segsum->ss_nblocks) - nsumblk;
    635 
    636 		/* calculate next partial segment location */
    637 		ri->pseg += nsumblk + nfileblk;
    638 
    639 		/* did we reach the end of the segment? if so, go to the next */
    640 		nilfs_get_segment_range(nilfsdev, ri->segnum, &seg_start, &seg_end);
    641 		if (ri->pseg >= seg_end)
    642 			ri->pseg = nilfs_rw64(segsum->ss_next);
    643 		ri->segnum = nilfs_get_segnum_of_block(nilfsdev, ri->pseg);
    644 	}
    645 
    646 	/*
    647 	 * XXX No roll-forward yet of the remaining partial segments.
    648 	 */
    649 
    650 	/* wipe current list of ri */
    651 	while (!STAILQ_EMPTY(&ri_list)) {
    652 		i_ri = STAILQ_FIRST(&ri_list);
    653 		STAILQ_REMOVE_HEAD(&ri_list, next);
    654 		printf("nilfs: ignoring pseg at %"PRIu64"\n", i_ri->pseg);
    655 		free(i_ri, M_NILFSTEMP);
    656 	}
    657 	free(ri, M_NILFSTEMP);
    658 }
    659 
    660 /* --------------------------------------------------------------------- */
    661 
    662 /*
    663  * Genfs interfacing
    664  *
    665  * static const struct genfs_ops nilfs_genfsops = {
    666  * 	.gop_size = genfs_size,
    667  * 		size of transfers
    668  * 	.gop_alloc = nilfs_gop_alloc,
    669  * 		allocate len bytes at offset
    670  * 	.gop_write = genfs_gop_write,
    671  * 		putpages interface code
    672  * 	.gop_markupdate = nilfs_gop_markupdate,
    673  * 		set update/modify flags etc.
    674  * }
    675  */
    676 
    677 /*
    678  * Callback from genfs to allocate len bytes at offset off; only called when
    679  * filling up gaps in the allocation.
    680  */
    681 static int
    682 nilfs_gop_alloc(struct vnode *vp, off_t off,
    683     off_t len, int flags, kauth_cred_t cred)
    684 {
    685 	DPRINTF(NOTIMPL, ("nilfs_gop_alloc not implemented\n"));
    686 	DPRINTF(ALLOC, ("nilfs_gop_alloc called for %"PRIu64" bytes\n", len));
    687 
    688 	return 0;
    689 }
    690 
    691 
    692 /*
    693  * callback from genfs to update our flags
    694  */
    695 static void
    696 nilfs_gop_markupdate(struct vnode *vp, int flags)
    697 {
    698 	struct nilfs_node *nilfs_node = VTOI(vp);
    699 	u_long mask = 0;
    700 
    701 	if ((flags & GOP_UPDATE_ACCESSED) != 0) {
    702 		mask = IN_ACCESS;
    703 	}
    704 	if ((flags & GOP_UPDATE_MODIFIED) != 0) {
    705 		if (vp->v_type == VREG) {
    706 			mask |= IN_CHANGE | IN_UPDATE;
    707 		} else {
    708 			mask |= IN_MODIFY;
    709 		}
    710 	}
    711 	if (mask) {
    712 		nilfs_node->i_flags |= mask;
    713 	}
    714 }
    715 
    716 
    717 static const struct genfs_ops nilfs_genfsops = {
    718 	.gop_size = genfs_size,
    719 	.gop_alloc = nilfs_gop_alloc,
    720 	.gop_write = genfs_gop_write_rwmap,
    721 	.gop_markupdate = nilfs_gop_markupdate,
    722 };
    723 
    724 /* --------------------------------------------------------------------- */
    725 
    726 static void
    727 nilfs_register_node(struct nilfs_node *node)
    728 {
    729 	struct nilfs_mount *ump;
    730 	struct nilfs_node *chk;
    731 	uint32_t hashline;
    732 
    733 	ump = node->ump;
    734 	mutex_enter(&ump->ihash_lock);
    735 
    736 	/* add to our hash table */
    737 	hashline = nilfs_calchash(node->ino) & NILFS_INODE_HASHMASK;
    738 #ifdef DEBUG
    739 	LIST_FOREACH(chk, &ump->nilfs_nodes[hashline], hashchain) {
    740 		assert(chk);
    741 		if (chk->ino == node->ino)
    742 			panic("Double node entered\n");
    743 	}
    744 #else
    745 	chk = NULL;
    746 #endif
    747 	LIST_INSERT_HEAD(&ump->nilfs_nodes[hashline], node, hashchain);
    748 
    749 	mutex_exit(&ump->ihash_lock);
    750 }
    751 
    752 
    753 static void
    754 nilfs_deregister_node(struct nilfs_node *node)
    755 {
    756 	struct nilfs_mount *ump;
    757 
    758 	ump = node->ump;
    759 	mutex_enter(&ump->ihash_lock);
    760 
    761 	/* remove from hash list */
    762 	LIST_REMOVE(node, hashchain);
    763 
    764 	mutex_exit(&ump->ihash_lock);
    765 }
    766 
    767 
    768 static struct nilfs_node *
    769 nilfs_hash_lookup(struct nilfs_mount *ump, ino_t ino)
    770 {
    771 	struct nilfs_node *node;
    772 	struct vnode *vp;
    773 	uint32_t hashline;
    774 
    775 loop:
    776 	mutex_enter(&ump->ihash_lock);
    777 
    778 	/* search our hash table */
    779 	hashline = nilfs_calchash(ino) & NILFS_INODE_HASHMASK;
    780 	LIST_FOREACH(node, &ump->nilfs_nodes[hashline], hashchain) {
    781 		assert(node);
    782 		if (node->ino == ino) {
    783 			vp = node->vnode;
    784 			assert(vp);
    785 			mutex_enter(&vp->v_interlock);
    786 			mutex_exit(&ump->ihash_lock);
    787 			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
    788 				goto loop;
    789 			return node;
    790 		}
    791 	}
    792 	mutex_exit(&ump->ihash_lock);
    793 
    794 	return NULL;
    795 }
    796 
    797 
    798 /* node action implementators */
    799 extern int (**nilfs_vnodeop_p)(void *);
    800 
    801 int
    802 nilfs_get_node_raw(struct nilfs_device *nilfsdev, struct nilfs_mount *ump,
    803 	uint64_t ino, struct nilfs_inode *inode, struct nilfs_node **nodep)
    804 {
    805 	struct nilfs_node *node;
    806 	struct vnode *nvp;
    807 	struct mount *mp;
    808 	int (**vnodeops)(void *);
    809 	int error;
    810 
    811 	*nodep = NULL;
    812 	vnodeops = nilfs_vnodeop_p;
    813 
    814 	/* associate with mountpoint if present*/
    815 	mp = ump? ump->vfs_mountp : NULL;
    816 	error = getnewvnode(VT_NILFS, mp, vnodeops, &nvp);
    817 	if (error)
    818 		return error;
    819 
    820 	/* lock node */
    821 	error = vn_lock(nvp, LK_EXCLUSIVE | LK_RETRY);
    822 	if (error) {
    823 		nvp->v_data = NULL;
    824 		ungetnewvnode(nvp);
    825 		return error;
    826 	}
    827 
    828 	node = pool_get(&nilfs_node_pool, PR_WAITOK);
    829 	memset(node, 0, sizeof(struct nilfs_node));
    830 
    831 	/* crosslink */
    832 	node->vnode    = nvp;
    833 	node->ump      = ump;
    834 	node->nilfsdev = nilfsdev;
    835 	nvp->v_data    = node;
    836 
    837 	/* initiase nilfs node */
    838 	node->ino   = ino;
    839 	node->inode = *inode;
    840 	node->lockf = NULL;
    841 
    842 	/* needed? */
    843 	mutex_init(&node->node_mutex, MUTEX_DEFAULT, IPL_NONE);
    844 	cv_init(&node->node_lock, "nilfs_nlk");
    845 
    846 	/* initialise genfs */
    847 	genfs_node_init(nvp, &nilfs_genfsops);
    848 
    849 	/* check if we're fetching the root */
    850 	if (ino == NILFS_ROOT_INO)
    851 		nvp->v_vflag |= VV_ROOT;
    852 
    853 	/* update vnode's file type XXX is there a function for this? */
    854 	nvp->v_type = VREG;
    855 	if (S_ISDIR(inode->i_mode))
    856 		nvp->v_type = VDIR;
    857 	if (S_ISLNK(inode->i_mode))
    858 		nvp->v_type = VLNK;
    859 #if 0
    860 	if (S_ISCHR(inode->i_mode))
    861 		nvp->v_type = VCHR;
    862 	if (S_ISBLK(inode->i_mode))
    863 		nvp->v_type = VBLK;
    864 #endif
    865 	/* XXX what else? */
    866 
    867 	/* fixup inode size for system nodes */
    868 	if ((ino < NILFS_USER_INO) && (ino != NILFS_ROOT_INO)) {
    869 		DPRINTF(VOLUMES, ("NEED TO GET my size for inode %"PRIu64"\n",
    870 			ino));
    871 		/* for now set it to maximum, -1 is illegal */
    872 		inode->i_size = nilfs_rw64(((uint64_t) -2));
    873 	}
    874 
    875 	uvm_vnp_setsize(nvp, nilfs_rw64(inode->i_size));
    876 
    877 	if (ump)
    878 		nilfs_register_node(node);
    879 
    880 	/* return node */
    881 	*nodep = node;
    882 	return 0;
    883 }
    884 
    885 
    886 int
    887 nilfs_get_node(struct nilfs_mount *ump, uint64_t ino, struct nilfs_node **nodep)
    888 {
    889 	struct nilfs_device *nilfsdev;
    890 	struct nilfs_inode   inode, *entry;
    891 	struct buf *bp;
    892 	uint64_t ivblocknr;
    893 	uint32_t entry_in_block;
    894 	int error;
    895 
    896 	/* lookup node in hash table */
    897 	*nodep = nilfs_hash_lookup(ump, ino);
    898 	if (*nodep)
    899 		return 0;
    900 
    901 	/* lock to disallow simultanious creation of same udf_node */
    902 	mutex_enter(&ump->get_node_lock);
    903 
    904 	/* relookup since it could be created while waiting for the mutex */
    905 	*nodep = nilfs_hash_lookup(ump, ino);
    906 	if (*nodep) {
    907 		mutex_exit(&ump->get_node_lock);
    908 		return 0;
    909 	}
    910 
    911 	/* create new inode; XXX check could be handier */
    912 	if ((ino < NILFS_ATIME_INO) && (ino != NILFS_ROOT_INO)) {
    913 		printf("nilfs_get_node: system ino %"PRIu64" not in mount "
    914 			"point!\n", ino);
    915 		mutex_exit(&ump->get_node_lock);
    916 		return ENOENT;
    917 	}
    918 
    919 	/* lookup inode in the ifile */
    920 	DPRINTF(NODE, ("lookup ino %"PRIu64"\n", ino));
    921 
    922 	/* lookup inode structure in mountpoints ifile */
    923 	nilfsdev = ump->nilfsdev;
    924 	nilfs_mdt_trans(&nilfsdev->ifile_mdt, ino, &ivblocknr, &entry_in_block);
    925 
    926 	error = nilfs_bread(ump->ifile_node, ivblocknr, NOCRED, 0, &bp);
    927 	if (error) {
    928 		mutex_exit(&ump->get_node_lock);
    929 		return ENOENT;
    930 	}
    931 
    932 	/* get inode entry */
    933 	entry =  (struct nilfs_inode *) bp->b_data + entry_in_block;
    934 	inode = *entry;
    935 	brelse(bp, BC_AGE);
    936 
    937 	/* get node */
    938 	error = nilfs_get_node_raw(ump->nilfsdev, ump, ino, &inode, nodep);
    939 	mutex_exit(&ump->get_node_lock);
    940 
    941 	return error;
    942 }
    943 
    944 
    945 void
    946 nilfs_dispose_node(struct nilfs_node **nodep)
    947 {
    948 	struct vnode *vp;
    949 	struct nilfs_node *node;
    950 
    951 	/* protect against rogue values */
    952 	if (!*nodep)
    953 		return;
    954 
    955 	node = *nodep;
    956 	vp = node->vnode;
    957 
    958 	/* remove dirhash if present */
    959 	dirhash_purge(&node->dir_hash);
    960 
    961 	/* remove from our hash lookup table */
    962 	if (node->ump)
    963 		nilfs_deregister_node(node);
    964 
    965 	/* destroy our locks */
    966 	mutex_destroy(&node->node_mutex);
    967 	cv_destroy(&node->node_lock);
    968 
    969 	/* dissociate from our vnode */
    970 	genfs_node_destroy(node->vnode);
    971 	vp->v_data = NULL;
    972 
    973 	/* free our associated memory */
    974 	pool_put(&nilfs_node_pool, node);
    975 
    976 	*nodep = NULL;
    977 }
    978 
    979 
    980 void
    981 nilfs_itimes(struct nilfs_node *node, struct timespec *acc,
    982 	struct timespec *mod, struct timespec *birth)
    983 {
    984 }
    985 
    986 
    987 int
    988 nilfs_update(struct vnode *node, struct timespec *acc,
    989 	struct timespec *mod, struct timespec *birth, int updflags)
    990 {
    991 	return EROFS;
    992 }
    993 
    994 
    995 int
    996 nilfs_chsize(struct vnode *vp, u_quad_t newsize, kauth_cred_t cred)
    997 {
    998 	return EROFS;
    999 }
   1000 
   1001 
   1002 
   1003 int
   1004 nilfs_grow_node(struct nilfs_node *node, uint64_t new_size)
   1005 {
   1006 	return EROFS;
   1007 }
   1008 
   1009 
   1010 int
   1011 nilfs_shrink_node(struct nilfs_node *node, uint64_t new_size)
   1012 {
   1013 	return EROFS;
   1014 }
   1015 
   1016 
   1017 static int
   1018 dirhash_fill(struct nilfs_node *dir_node)
   1019 {
   1020 	struct vnode *dvp = dir_node->vnode;
   1021 	struct dirhash *dirh;
   1022 	struct nilfs_dir_entry *ndirent;
   1023 	struct dirent dirent;
   1024 	struct buf *bp;
   1025 	uint64_t file_size, diroffset, blkoff;
   1026 	uint64_t blocknr;
   1027 	uint32_t blocksize = dir_node->nilfsdev->blocksize;
   1028 	uint8_t *pos, name_len;
   1029 	int error;
   1030 
   1031 	DPRINTF(CALL, ("dirhash_fill called\n"));
   1032 
   1033 	if (dvp->v_type != VDIR)
   1034 		return ENOTDIR;
   1035 
   1036 	/* make sure we have a dirhash to work on */
   1037 	dirh = dir_node->dir_hash;
   1038 	KASSERT(dirh);
   1039 	KASSERT(dirh->refcnt > 0);
   1040 
   1041 	if (dirh->flags & DIRH_BROKEN)
   1042 		return EIO;
   1043 
   1044 	if (dirh->flags & DIRH_COMPLETE)
   1045 		return 0;
   1046 
   1047 	DPRINTF(DIRHASH, ("Filling directory hash\n"));
   1048 
   1049 	/* make sure we have a clean dirhash to add to */
   1050 	dirhash_purge_entries(dirh);
   1051 
   1052 	/* get directory filesize */
   1053 	file_size = nilfs_rw64(dir_node->inode.i_size);
   1054 
   1055 	/* walk the directory */
   1056 	error = 0;
   1057 	diroffset = 0;
   1058 
   1059 	blocknr = diroffset / blocksize;
   1060 	blkoff  = diroffset % blocksize;
   1061 	error = nilfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
   1062 	if (error) {
   1063 		dirh->flags |= DIRH_BROKEN;
   1064 		dirhash_purge_entries(dirh);
   1065 		return EIO;
   1066 	}
   1067 	while (diroffset < file_size) {
   1068 		DPRINTF(READDIR, ("filldir : offset = %"PRIu64"\n",
   1069 			diroffset));
   1070 		if (blkoff >= blocksize) {
   1071 			blkoff = 0; blocknr++;
   1072 			brelse(bp, BC_AGE);
   1073 			error = nilfs_bread(dir_node, blocknr, NOCRED, 0,
   1074 					&bp);
   1075 			if (error) {
   1076 				dirh->flags |= DIRH_BROKEN;
   1077 				dirhash_purge_entries(dirh);
   1078 				return EIO;
   1079 			}
   1080 		}
   1081 
   1082 		/* read in one dirent */
   1083 		pos = (uint8_t *) bp->b_data + blkoff;
   1084 		ndirent = (struct nilfs_dir_entry *) pos;
   1085 		name_len = ndirent->name_len;
   1086 
   1087 		memset(&dirent, 0, sizeof(struct dirent));
   1088 		dirent.d_fileno = nilfs_rw64(ndirent->inode);
   1089 		dirent.d_type   = ndirent->file_type;	/* 1:1 ? */
   1090 		dirent.d_namlen = name_len;
   1091 		strncpy(dirent.d_name, ndirent->name, name_len);
   1092 		dirent.d_reclen = _DIRENT_SIZE(&dirent);
   1093 		DPRINTF(DIRHASH, ("copying `%*.*s`\n", name_len,
   1094 			name_len, dirent.d_name));
   1095 
   1096 		/* XXX is it deleted? extra free space? */
   1097 		dirhash_enter(dirh, &dirent, diroffset,
   1098 			nilfs_rw16(ndirent->rec_len), 0);
   1099 
   1100 		/* advance */
   1101 		diroffset += nilfs_rw16(ndirent->rec_len);
   1102 		blkoff    += nilfs_rw16(ndirent->rec_len);
   1103 	}
   1104 	brelse(bp, BC_AGE);
   1105 
   1106 	dirh->flags |= DIRH_COMPLETE;
   1107 
   1108 	return 0;
   1109 }
   1110 
   1111 
   1112 int
   1113 nilfs_lookup_name_in_dir(struct vnode *dvp, const char *name, int namelen,
   1114 		uint64_t *ino, int *found)
   1115 {
   1116 	struct nilfs_node	*dir_node = VTOI(dvp);
   1117 	struct nilfs_dir_entry *ndirent;
   1118 	struct dirhash		*dirh;
   1119 	struct dirhash_entry	*dirh_ep;
   1120 	struct buf *bp;
   1121 	uint64_t diroffset, blkoff;
   1122 	uint64_t blocknr;
   1123 	uint32_t blocksize = dir_node->nilfsdev->blocksize;
   1124 	uint8_t *pos;
   1125 	int hit, error;
   1126 
   1127 	/* set default return */
   1128 	*found = 0;
   1129 
   1130 	/* get our dirhash and make sure its read in */
   1131 	dirhash_get(&dir_node->dir_hash);
   1132 	error = dirhash_fill(dir_node);
   1133 	if (error) {
   1134 		dirhash_put(dir_node->dir_hash);
   1135 		return error;
   1136 	}
   1137 	dirh = dir_node->dir_hash;
   1138 
   1139 	/* allocate temporary space for fid */
   1140 
   1141 	DPRINTF(DIRHASH, ("dirhash_lookup looking for `%*.*s`\n",
   1142 		namelen, namelen, name));
   1143 
   1144 	/* search our dirhash hits */
   1145 	*ino = 0;
   1146 	dirh_ep = NULL;
   1147 	for (;;) {
   1148 		hit = dirhash_lookup(dirh, name, namelen, &dirh_ep);
   1149 		/* if no hit, abort the search */
   1150 		if (!hit)
   1151 			break;
   1152 
   1153 		/* check this hit */
   1154 		diroffset = dirh_ep->offset;
   1155 
   1156 		blocknr = diroffset / blocksize;
   1157 		blkoff  = diroffset % blocksize;
   1158 		error = nilfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
   1159 		if (error)
   1160 			return EIO;
   1161 
   1162 		/* read in one dirent */
   1163 		pos = (uint8_t *) bp->b_data + blkoff;
   1164 		ndirent = (struct nilfs_dir_entry *) pos;
   1165 
   1166 		DPRINTF(DIRHASH, ("dirhash_lookup\tchecking `%*.*s`\n",
   1167 			ndirent->name_len, ndirent->name_len, ndirent->name));
   1168 
   1169 		/* see if its our entry */
   1170 		KASSERT(ndirent->name_len == namelen);
   1171 		if (strncmp(ndirent->name, name, namelen) == 0) {
   1172 			*found = 1;
   1173 			*ino = nilfs_rw64(ndirent->inode);
   1174 			brelse(bp, BC_AGE);
   1175 			break;
   1176 		}
   1177 		brelse(bp, BC_AGE);
   1178 	}
   1179 
   1180 	dirhash_put(dir_node->dir_hash);
   1181 
   1182 	return error;
   1183 }
   1184 
   1185 
   1186 int
   1187 nilfs_dir_detach(struct nilfs_mount *ump, struct nilfs_node *dir_node, struct nilfs_node *node, struct componentname *cnp)
   1188 {
   1189 	return EROFS;
   1190 }
   1191 
   1192 
   1193 int
   1194 nilfs_dir_attach(struct nilfs_mount *ump, struct nilfs_node *dir_node, struct nilfs_node *node, struct vattr *vap, struct componentname *cnp)
   1195 {
   1196 	return EROFS;
   1197 }
   1198 
   1199 
   1200 /* XXX return vnode? */
   1201 int
   1202 nilfs_create_node(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, struct componentname *cnp)
   1203 {
   1204 	return EROFS;
   1205 }
   1206 
   1207 
   1208 void
   1209 nilfs_delete_node(struct nilfs_node *node)
   1210 {
   1211 }
   1212 
   1213 
   1214