Home | History | Annotate | Line # | Download | only in nilfs
nilfs_subr.c revision 1.1
      1 /* $NetBSD: nilfs_subr.c,v 1.1 2009/07/18 16:31:42 reinoud Exp $ */
      2 
      3 /*
      4  * Copyright (c) 2008, 2009 Reinoud Zandijk
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  *
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #ifndef lint
     31 __KERNEL_RCSID(0, "$NetBSD: nilfs_subr.c,v 1.1 2009/07/18 16:31:42 reinoud Exp $");
     32 #endif /* not lint */
     33 
     34 #include <sys/param.h>
     35 #include <sys/systm.h>
     36 #include <sys/namei.h>
     37 #include <sys/resourcevar.h>	/* defines plimit structure in proc struct */
     38 #include <sys/kernel.h>
     39 #include <sys/file.h>		/* define FWRITE ... */
     40 #include <sys/stat.h>
     41 #include <sys/buf.h>
     42 #include <sys/proc.h>
     43 #include <sys/mount.h>
     44 #include <sys/vnode.h>
     45 #include <sys/signalvar.h>
     46 #include <sys/malloc.h>
     47 #include <sys/dirent.h>
     48 #include <sys/lockf.h>
     49 #include <sys/kauth.h>
     50 #include <sys/dirhash.h>
     51 
     52 #include <miscfs/genfs/genfs.h>
     53 #include <uvm/uvm_extern.h>
     54 
     55 #include <fs/nilfs/nilfs_mount.h>
     56 #include "nilfs.h"
     57 #include "nilfs_subr.h"
     58 #include "nilfs_bswap.h"
     59 
     60 
     61 #define VTOI(vnode) ((struct nilfs_node *) (vnode)->v_data)
     62 
     63 /* basic calculators */
     64 uint64_t nilfs_get_segnum_of_block(struct nilfs_device *nilfsdev,
     65 	uint64_t blocknr)
     66 {
     67 	return blocknr / nilfs_rw32(nilfsdev->super.s_blocks_per_segment);
     68 }
     69 
     70 
     71 void
     72 nilfs_get_segment_range(struct nilfs_device *nilfsdev, uint64_t segnum,
     73         uint64_t *seg_start, uint64_t *seg_end)
     74 {
     75         uint64_t blks_per_seg;
     76 
     77         blks_per_seg = nilfs_rw64(nilfsdev->super.s_blocks_per_segment);
     78         *seg_start = blks_per_seg * segnum;
     79         *seg_end   = *seg_start + blks_per_seg -1;
     80         if (segnum == 0)
     81                 *seg_start = nilfs_rw64(nilfsdev->super.s_first_data_block);
     82 }
     83 
     84 
     85 void nilfs_calc_mdt_consts(struct nilfs_device *nilfsdev,
     86 	struct nilfs_mdt *mdt, int entry_size)
     87 {
     88 	uint32_t blocksize = nilfsdev->blocksize;
     89 
     90 	mdt->entries_per_group = blocksize * 8;	   /* bits in sector */
     91 	mdt->entries_per_block = blocksize / entry_size;
     92 
     93 	mdt->blocks_per_group  =
     94 		(mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1;
     95 	mdt->groups_per_desc_block =
     96 		blocksize / sizeof(struct nilfs_block_group_desc);
     97 	mdt->blocks_per_desc_block =
     98 		mdt->groups_per_desc_block * mdt->blocks_per_group + 1;
     99 }
    100 
    101 
    102 /* from NetBSD's src/sys/net/if_ethersubr.c */
    103 uint32_t
    104 crc32_le(uint32_t crc, const uint8_t *buf, size_t len)
    105 {
    106         static const uint32_t crctab[] = {
    107                 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
    108                 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
    109                 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
    110                 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
    111         };
    112         size_t i;
    113 
    114         for (i = 0; i < len; i++) {
    115                 crc ^= buf[i];
    116                 crc = (crc >> 4) ^ crctab[crc & 0xf];
    117                 crc = (crc >> 4) ^ crctab[crc & 0xf];
    118         }
    119 
    120         return (crc);
    121 }
    122 
    123 
    124 static int
    125 nilfs_calchash(uint64_t ino)
    126 {
    127 	return (int) ino;
    128 }
    129 
    130 
    131 /* dev reading */
    132 static int
    133 nilfs_dev_bread(struct nilfs_device *nilfsdev, uint64_t blocknr,
    134 	struct kauth_cred *cred, int flags, struct buf **bpp)
    135 {
    136 	int blk2dev = nilfsdev->blocksize / DEV_BSIZE;
    137 
    138 	return bread(nilfsdev->devvp, blocknr * blk2dev, nilfsdev->blocksize,
    139 		NOCRED, 0, bpp);
    140 }
    141 
    142 
    143 /* read on a node */
    144 int
    145 nilfs_bread(struct nilfs_node *node, uint64_t blocknr,
    146 	struct kauth_cred *cred, int flags, struct buf **bpp)
    147 {
    148 	return bread(node->vnode, blocknr, node->nilfsdev->blocksize,
    149 		cred, flags, bpp);
    150 }
    151 
    152 
    153 /* segment-log reading */
    154 int
    155 nilfs_get_segment_log(struct nilfs_device *nilfsdev, uint64_t *blocknr,
    156 	uint64_t *offset, struct buf **bpp, int len, void *blob)
    157 {
    158 	int blocksize = nilfsdev->blocksize;
    159 	int error;
    160 
    161 	KASSERT(len <= blocksize);
    162 
    163 	if (*offset + len > blocksize) {
    164 		*blocknr = *blocknr + 1;
    165 		*offset = 0;
    166 	}
    167 	if (*offset == 0) {
    168 		if (*bpp)
    169 			brelse(*bpp, BC_AGE);
    170 		/* read in block */
    171 		error = nilfs_dev_bread(nilfsdev, *blocknr, NOCRED, 0, bpp);
    172 		if (error)
    173 			return error;
    174 	}
    175 	memcpy(blob, ((uint8_t *) (*bpp)->b_data) + *offset, len);
    176 	*offset += len;
    177 
    178 	return 0;
    179 }
    180 
    181 /* -------------------------------------------------------------------------- */
    182 
    183 /* btree operations */
    184 
    185 static int
    186 nilfs_btree_lookup_level(struct nilfs_node *node, uint64_t lblocknr,
    187 		uint64_t btree_vblknr, int level, uint64_t *vblocknr)
    188 {
    189 	struct nilfs_device *nilfsdev = node->nilfsdev;
    190 	struct nilfs_btree_node *btree_hdr;
    191 	struct buf *bp;
    192 	uint64_t btree_blknr;
    193 	uint64_t *dkeys, *dptrs, child_btree_blk;
    194 	uint8_t  *pos;
    195 	int i, error, selected;
    196 
    197 	DPRINTF(TRANSLATE, ("nilfs_btree_lookup_level ino %"PRIu64", "
    198 		"lblocknr %"PRIu64", btree_vblknr %"PRIu64", level %d\n",
    199 		node->ino, lblocknr, btree_vblknr, level));
    200 
    201 	/* translate btree_vblknr */
    202 	error = nilfs_nvtop(node, 1, &btree_vblknr, &btree_blknr);
    203 	if (error)
    204 		return error;
    205 
    206 	/* get our block */
    207 	error = nilfs_dev_bread(nilfsdev, btree_blknr, NOCRED, 0, &bp);
    208 	if (error) {
    209 		brelse(bp, BC_AGE);
    210 		return error;
    211 	}
    212 
    213 	btree_hdr = (struct nilfs_btree_node *) bp->b_data;
    214 	pos =   (uint8_t *) bp->b_data +
    215 		sizeof(struct nilfs_btree_node) +
    216 		NILFS_BTREE_NODE_EXTRA_PAD_SIZE;
    217 	dkeys = (uint64_t *) pos;
    218 	dptrs = dkeys + NILFS_BTREE_NODE_NCHILDREN_MAX(nilfsdev->blocksize);
    219 
    220 	assert((btree_hdr->bn_flags & NILFS_BTREE_NODE_ROOT) == 0);
    221 
    222 	/* select matching child XXX could use binary search */
    223 	selected = 0;
    224 	for (i = 0; i < nilfs_rw16(btree_hdr->bn_nchildren); i++) {
    225 		if (dkeys[i] > lblocknr)
    226 			break;
    227 		selected = i;
    228 	}
    229 
    230 	if (level == 1) {
    231 		/* if found it mapped */
    232 		if (dkeys[selected] == lblocknr)
    233 			*vblocknr = dptrs[selected];
    234 		brelse(bp, BC_AGE);
    235 		return 0;
    236 	}
    237 
    238 	/* lookup in selected child */
    239 	assert(dkeys[selected] <= lblocknr);
    240 	child_btree_blk = dptrs[selected];
    241 	brelse(bp, BC_AGE);
    242 
    243 	return nilfs_btree_lookup_level(node, lblocknr,
    244 			child_btree_blk, btree_hdr->bn_level-1, vblocknr);
    245 }
    246 
    247 
    248 /* internal function */
    249 static int
    250 nilfs_btree_lookup(struct nilfs_node *node, uint64_t lblocknr,
    251 		uint64_t *vblocknr)
    252 {
    253 	struct nilfs_inode  *inode    = &node->inode;
    254 	struct nilfs_btree_node  *btree_hdr;
    255 	uint64_t *dkeys, *dptrs, *dtrans;
    256 	int i, selected;
    257 	int error;
    258 
    259 	DPRINTF(TRANSLATE, ("nilfs_btree_lookup ino %"PRIu64", "
    260 		"lblocknr %"PRIu64"\n", node->ino, lblocknr));
    261 
    262 	btree_hdr  = (struct nilfs_btree_node *) &inode->i_bmap[0];
    263 	dkeys  = &inode->i_bmap[1];
    264 	dptrs  = dkeys + NILFS_BTREE_ROOT_NCHILDREN_MAX;
    265 	dtrans = &inode->i_bmap[1];
    266 
    267 	/* SMALL, direct lookup */
    268 	*vblocknr = 0;
    269 	if ((btree_hdr->bn_flags & NILFS_BMAP_LARGE) == 0) {
    270 		if (lblocknr < NILFS_DIRECT_NBLOCKS) {
    271 			*vblocknr = dtrans[lblocknr];
    272 			return 0;
    273 		}
    274 		/* not mapped XXX could be considered error here */
    275 		return 0;
    276 	}
    277 
    278 	/* LARGE, select matching child; XXX could use binary search */
    279 	dtrans = NULL;
    280 	error = 0;
    281 	selected = 0;
    282 	for (i = 0; i < nilfs_rw16(btree_hdr->bn_nchildren); i++) {
    283 		if (dkeys[i] > lblocknr)
    284 			break;
    285 		selected = i;
    286 	}
    287 	/* overshooting? then not mapped */
    288 	if (selected == nilfs_rw16(btree_hdr->bn_nchildren))
    289 		return 0;
    290 
    291 	/* lookup in selected child */
    292 	assert(dkeys[selected] <= lblocknr);
    293 	error = nilfs_btree_lookup_level(node, lblocknr,
    294 			dptrs[selected], btree_hdr->bn_level-1, vblocknr);
    295 
    296 	return error;
    297 }
    298 
    299 
    300 /* node should be locked on entry to prevent btree changes (unlikely) */
    301 int
    302 nilfs_btree_nlookup(struct nilfs_node *node, uint64_t from, uint64_t blks,
    303 		uint64_t *l2vmap)
    304 {
    305 	uint64_t lblocknr, *vblocknr;
    306 	int i, error;
    307 
    308 	/* TODO / OPTI multiple translations in one go possible */
    309 	error = EINVAL;
    310 	for (i = 0; i < blks; i++) {
    311 		lblocknr  = from + i;
    312 		vblocknr  = l2vmap + i;
    313 		error = nilfs_btree_lookup(node, lblocknr, vblocknr);
    314 
    315 		DPRINTF(TRANSLATE, ("btree_nlookup ino %"PRIu64", "
    316 			"lblocknr %"PRIu64" -> %"PRIu64"\n",
    317 			node->ino, lblocknr, *vblocknr));
    318 		if (error)
    319 			break;
    320 	}
    321 
    322 	return error;
    323 }
    324 
    325 /* --------------------------------------------------------------------- */
    326 
    327 /* vtop operations */
    328 
    329 /* translate index to a file block number and an entry */
    330 static void
    331 nilfs_mdt_trans(struct nilfs_mdt *mdt, uint64_t index,
    332 	uint64_t *blocknr, uint32_t *entry_in_block)
    333 {
    334 	uint64_t blknr;
    335 	uint64_t group, group_offset, blocknr_in_group;
    336 	uint64_t desc_block, desc_offset;
    337 
    338 	/* calculate our offset in the file */
    339 	group             = index / mdt->entries_per_group;
    340 	group_offset      = index % mdt->entries_per_group;
    341 	desc_block        = group / mdt->groups_per_desc_block;
    342 	desc_offset       = group % mdt->groups_per_desc_block;
    343 	blocknr_in_group  = group_offset / mdt->entries_per_block;
    344 
    345 	/* to descgroup offset */
    346 	blknr = 1 + desc_block * mdt->blocks_per_desc_block;
    347 
    348 	/* to group offset */
    349 	blknr += desc_offset * mdt->blocks_per_group;
    350 
    351 	/* to actual file block */
    352 	blknr += 1 + blocknr_in_group;
    353 
    354 	*blocknr        = blknr;
    355 	*entry_in_block = group_offset % mdt->entries_per_block;
    356 }
    357 
    358 
    359 static int
    360 nilfs_vtop(struct nilfs_device *nilfsdev, uint64_t vblocknr, uint64_t *pblocknr)
    361 {
    362 	struct nilfs_dat_entry *entry;
    363 	struct buf *bp;
    364 	uint64_t  ldatblknr;
    365 	uint32_t  entry_in_block;
    366 	int error;
    367 
    368 	nilfs_mdt_trans(&nilfsdev->dat_mdt, vblocknr,
    369 		&ldatblknr, &entry_in_block);
    370 
    371 	error = nilfs_bread(nilfsdev->dat_node, ldatblknr, NOCRED, 0, &bp);
    372 	if (error) {
    373 		printf("vtop: can't read in DAT block %"PRIu64"!\n", ldatblknr);
    374 		brelse(bp, BC_AGE);
    375 		return error;
    376 	}
    377 
    378 	/* get our translation */
    379 	entry = ((struct nilfs_dat_entry *) bp->b_data) + entry_in_block;
    380 #if 0
    381 	printf("\tvblk %4"PRIu64" -> %"PRIu64" for "
    382 		"checkpoint %"PRIu64" to %"PRIu64"\n",
    383 		vblocknr,
    384 		nilfs_rw64(entry->de_blocknr),
    385 		nilfs_rw64(entry->de_start),
    386 		nilfs_rw64(entry->de_end));
    387 #endif
    388 
    389 	*pblocknr = nilfs_rw64(entry->de_blocknr);
    390 	brelse(bp, BC_AGE);
    391 
    392 	return 0;
    393 }
    394 
    395 
    396 int
    397 nilfs_nvtop(struct nilfs_node *node, uint64_t blks, uint64_t *l2vmap,
    398 		uint64_t *v2pmap)
    399 {
    400 	uint64_t vblocknr, *pblocknr;
    401 	int i, error;
    402 
    403 	/* the DAT inode is the only one not mapped virtual */
    404 	if (node->ino == NILFS_DAT_INO) {
    405 		memcpy(v2pmap, l2vmap, blks * sizeof(uint64_t));
    406 		return 0;
    407 	}
    408 
    409 	/* TODO / OPTI more translations in one go */
    410 	error = EINVAL;
    411 	for (i = 0; i < blks; i++) {
    412 		vblocknr  = l2vmap[i];
    413 		pblocknr  = v2pmap + i;
    414 		/* only translate valid vblocknrs */
    415 		if (vblocknr == 0)
    416 			continue;
    417 		error = nilfs_vtop(node->nilfsdev, vblocknr, pblocknr);
    418 		if (error)
    419 			break;
    420 	}
    421 
    422 	return error;
    423 }
    424 
    425 /* --------------------------------------------------------------------- */
    426 
    427 struct nilfs_recover_info {
    428 	uint64_t segnum;
    429 	uint64_t pseg;
    430 
    431 	struct nilfs_segment_summary segsum;
    432 	struct nilfs_super_root      super_root;
    433 	STAILQ_ENTRY(nilfs_recover_info) next;
    434 };
    435 
    436 
    437 /*
    438  * Helper functions of nilfs_mount() that actually mounts the disc.
    439  */
    440 static int
    441 nilfs_load_segsum(struct nilfs_device *nilfsdev,
    442 	struct nilfs_recover_info *ri)
    443 {
    444 	struct buf *bp;
    445 	uint64_t blocknr, offset;
    446 	uint32_t segsum_struct_size;
    447 	uint32_t magic;
    448 	int error;
    449 
    450 	segsum_struct_size = sizeof(struct nilfs_segment_summary);
    451 
    452 	/* read in segsum structure */
    453 	bp      = NULL;
    454 	blocknr = ri->pseg;
    455 	offset  = 0;
    456 	error = nilfs_get_segment_log(nilfsdev,
    457 			&blocknr, &offset, &bp,
    458 			segsum_struct_size, (void *) &ri->segsum);
    459 	if (error)
    460 		goto out;
    461 
    462 	/* sanity checks */
    463 	magic = nilfs_rw32(ri->segsum.ss_magic);
    464 	if (magic != NILFS_SEGSUM_MAGIC) {
    465 		DPRINTF(VOLUMES, ("nilfs: bad magic in pseg %"PRIu64"\n",
    466 			ri->pseg));
    467 		error = EINVAL;
    468 		goto out;
    469 	}
    470 
    471 	/* TODO check segment summary checksum */
    472 	/* TODO check data checksum */
    473 
    474 	/* adjust our walking point if we have an odd size */
    475 	if (segsum_struct_size != nilfs_rw32(ri->segsum.ss_bytes)) {
    476 		printf("nilfs: WARNING encountered segsum_struct size %d in "
    477 			"pseg %"PRIu64"\n",
    478 			nilfs_rw32(ri->segsum.ss_bytes), ri->pseg);
    479 		/* XXX report as an error? */
    480 	}
    481 
    482 out:
    483 	if (bp)
    484 		brelse(bp, BC_AGE);
    485 
    486 	return error;
    487 }
    488 
    489 
    490 static int
    491 nilfs_load_super_root(struct nilfs_device *nilfsdev,
    492 	struct nilfs_recover_info *ri)
    493 {
    494 	struct nilfs_segment_summary *segsum = &ri->segsum;
    495 	struct buf *bp;
    496 	uint64_t blocknr, offset;
    497 	uint32_t segsum_size, size;
    498 	uint32_t nsumblk, nfileblk;
    499 	int error;
    500 
    501 	bp = NULL;
    502 
    503 	/* process segment summary */
    504 	segsum_size = nilfs_rw32(segsum->ss_sumbytes);
    505 	nsumblk     = (segsum_size - 1) / nilfsdev->blocksize + 1;
    506 	nfileblk    = nilfs_rw32(segsum->ss_nblocks) - nsumblk;
    507 
    508 	/* check if there is a superroot */
    509 	if ((nilfs_rw16(segsum->ss_flags) & NILFS_SS_SR) == 0) {
    510 		DPRINTF(VOLUMES, ("nilfs: no super root in pseg %"PRIu64"\n",
    511 			ri->pseg));
    512 		error = ENOENT;
    513 		goto out;
    514 	}
    515 
    516 	/* get our super root, located at the end of the pseg */
    517 	blocknr = ri->pseg + nsumblk + nfileblk - 1;
    518 	offset = 0;
    519 	size = sizeof(struct nilfs_super_root);
    520 	error = nilfs_get_segment_log(nilfsdev,
    521 			&blocknr, &offset, &bp,
    522 			size, (void *) &nilfsdev->super_root);
    523 	if (error) {
    524 		printf("read in of superroot failed\n");
    525 		error = EIO;
    526 	}
    527 	/* else got our super root! */
    528 
    529 out:
    530 	if (bp)
    531 		brelse(bp, BC_AGE);
    532 
    533 	return error;
    534 }
    535 
    536 /*
    537  * Search for the last super root recorded.
    538  */
    539 void
    540 nilfs_search_super_root(struct nilfs_device *nilfsdev)
    541 {
    542 	struct nilfs_super_block *super;
    543 	struct nilfs_segment_summary *segsum;
    544 	struct nilfs_recover_info *ri, *ori, *i_ri;
    545 	STAILQ_HEAD(,nilfs_recover_info) ri_list;
    546 	uint64_t seg_start, seg_end, cno;
    547 	uint32_t segsum_size;
    548 	uint32_t nsumblk, nfileblk;
    549 	int error;
    550 
    551 	STAILQ_INIT(&ri_list);
    552 
    553 	/* search for last super root */
    554 	ri = malloc(sizeof(struct nilfs_recover_info), M_NILFSTEMP, M_WAITOK);
    555 	memset(ri, 0, sizeof(struct nilfs_recover_info));
    556 
    557 	/* if enabled, start from the specified position */
    558 	if (0) {
    559 		/* start from set start */
    560 		nilfsdev->super.s_last_pseg = nilfsdev->super.s_first_data_block;
    561 		nilfsdev->super.s_last_cno  = nilfs_rw64(1);
    562 	}
    563 
    564 	ri->pseg   = nilfs_rw64(nilfsdev->super.s_last_pseg); /* blknr */
    565 	ri->segnum = nilfs_get_segnum_of_block(nilfsdev, ri->pseg);
    566 
    567 	error = 0;
    568 	cno = nilfs_rw64(nilfsdev->super.s_last_cno);
    569 	DPRINTF(VOLUMES, ("nilfs: seach_super_root start in pseg %"PRIu64"\n",
    570 			ri->pseg));
    571 	for (;;) {
    572 		DPRINTF(VOLUMES, (" at pseg %"PRIu64"\n", ri->pseg));
    573 		error = nilfs_load_segsum(nilfsdev, ri);
    574 		if (error)
    575 			break;
    576 
    577 		segsum = &ri->segsum;
    578 
    579 		/* try to load super root */
    580 		if (nilfs_rw16(segsum->ss_flags) & NILFS_SS_SR) {
    581 			DPRINTF(VOLUMES, (" try super root\n"));
    582 			error = nilfs_load_super_root(nilfsdev, ri);
    583 			if (error)
    584 				break;	/* confused */
    585 			/* wipe current list of ri */
    586 			while (!STAILQ_EMPTY(&ri_list)) {
    587 				i_ri = STAILQ_FIRST(&ri_list);
    588 				STAILQ_REMOVE_HEAD(&ri_list, next);
    589 				free(i_ri, M_NILFSTEMP);
    590 			}
    591 			super = &nilfsdev->super;
    592 
    593 			super->s_last_pseg = nilfs_rw64(ri->pseg);
    594 			super->s_last_cno  = cno++;
    595 			super->s_last_seq  = segsum->ss_seq;
    596 			super->s_state     = nilfs_rw16(NILFS_VALID_FS);
    597 		} else {
    598 			STAILQ_INSERT_TAIL(&ri_list, ri, next);
    599 			ori = ri;
    600 			ri = malloc(sizeof(struct nilfs_recover_info),
    601 				M_NILFSTEMP, M_WAITOK);
    602 			memset(ri, 0, sizeof(struct nilfs_recover_info));
    603 			ri->segnum = ori->segnum;
    604 			ri->pseg   = ori->pseg;
    605 			/* segsum keeps pointing to the `old' ri */
    606 		}
    607 
    608 		/* continue to the next pseg */
    609 		segsum_size = nilfs_rw32(segsum->ss_sumbytes);
    610 		nsumblk     = (segsum_size - 1) / nilfsdev->blocksize + 1;
    611 		nfileblk    = nilfs_rw32(segsum->ss_nblocks) - nsumblk;
    612 
    613 		/* calculate next partial segment location */
    614 		ri->pseg += nsumblk + nfileblk;
    615 
    616 		/* did we reach the end of the segment? if so, go to the next */
    617 		nilfs_get_segment_range(nilfsdev, ri->segnum, &seg_start, &seg_end);
    618 		if (ri->pseg >= seg_end)
    619 			ri->pseg = nilfs_rw64(segsum->ss_next);
    620 		ri->segnum = nilfs_get_segnum_of_block(nilfsdev, ri->pseg);
    621 	}
    622 
    623 	/*
    624 	 * XXX No roll-forward yet of the remaining partial segments.
    625 	 */
    626 
    627 	/* wipe current list of ri */
    628 	while (!STAILQ_EMPTY(&ri_list)) {
    629 		i_ri = STAILQ_FIRST(&ri_list);
    630 		STAILQ_REMOVE_HEAD(&ri_list, next);
    631 		printf("nilfs: ignoring pseg at %"PRIu64"\n", i_ri->pseg);
    632 		free(i_ri, M_NILFSTEMP);
    633 	}
    634 	free(ri, M_NILFSTEMP);
    635 }
    636 
    637 /* --------------------------------------------------------------------- */
    638 
    639 /*
    640  * Genfs interfacing
    641  *
    642  * static const struct genfs_ops nilfs_genfsops = {
    643  * 	.gop_size = genfs_size,
    644  * 		size of transfers
    645  * 	.gop_alloc = nilfs_gop_alloc,
    646  * 		allocate len bytes at offset
    647  * 	.gop_write = genfs_gop_write,
    648  * 		putpages interface code
    649  * 	.gop_markupdate = nilfs_gop_markupdate,
    650  * 		set update/modify flags etc.
    651  * }
    652  */
    653 
    654 /*
    655  * Callback from genfs to allocate len bytes at offset off; only called when
    656  * filling up gaps in the allocation.
    657  */
    658 static int
    659 nilfs_gop_alloc(struct vnode *vp, off_t off,
    660     off_t len, int flags, kauth_cred_t cred)
    661 {
    662 	DPRINTF(NOTIMPL, ("nilfs_gop_alloc not implemented\n"));
    663 	DPRINTF(ALLOC, ("nilfs_gop_alloc called for %"PRIu64" bytes\n", len));
    664 
    665 	return 0;
    666 }
    667 
    668 
    669 /*
    670  * callback from genfs to update our flags
    671  */
    672 static void
    673 nilfs_gop_markupdate(struct vnode *vp, int flags)
    674 {
    675 	struct nilfs_node *nilfs_node = VTOI(vp);
    676 	u_long mask = 0;
    677 
    678 	if ((flags & GOP_UPDATE_ACCESSED) != 0) {
    679 		mask = IN_ACCESS;
    680 	}
    681 	if ((flags & GOP_UPDATE_MODIFIED) != 0) {
    682 		if (vp->v_type == VREG) {
    683 			mask |= IN_CHANGE | IN_UPDATE;
    684 		} else {
    685 			mask |= IN_MODIFY;
    686 		}
    687 	}
    688 	if (mask) {
    689 		nilfs_node->i_flags |= mask;
    690 	}
    691 }
    692 
    693 
    694 static const struct genfs_ops nilfs_genfsops = {
    695 	.gop_size = genfs_size,
    696 	.gop_alloc = nilfs_gop_alloc,
    697 	.gop_write = genfs_gop_write_rwmap,
    698 	.gop_markupdate = nilfs_gop_markupdate,
    699 };
    700 
    701 /* --------------------------------------------------------------------- */
    702 
    703 static void
    704 nilfs_register_node(struct nilfs_node *node)
    705 {
    706 	struct nilfs_mount *ump;
    707 	struct nilfs_node *chk;
    708 	uint32_t hashline;
    709 
    710 	ump = node->ump;
    711 	mutex_enter(&ump->ihash_lock);
    712 
    713 	/* add to our hash table */
    714 	hashline = nilfs_calchash(node->ino) & NILFS_INODE_HASHMASK;
    715 #ifdef DEBUG
    716 	LIST_FOREACH(chk, &ump->nilfs_nodes[hashline], hashchain) {
    717 		assert(chk);
    718 		if (chk->ino == node->ino)
    719 			panic("Double node entered\n");
    720 	}
    721 #else
    722 	chk = NULL;
    723 #endif
    724 	LIST_INSERT_HEAD(&ump->nilfs_nodes[hashline], node, hashchain);
    725 
    726 	mutex_exit(&ump->ihash_lock);
    727 }
    728 
    729 
    730 static void
    731 nilfs_deregister_node(struct nilfs_node *node)
    732 {
    733 	struct nilfs_mount *ump;
    734 
    735 	ump = node->ump;
    736 	mutex_enter(&ump->ihash_lock);
    737 
    738 	/* remove from hash list */
    739 	LIST_REMOVE(node, hashchain);
    740 
    741 	mutex_exit(&ump->ihash_lock);
    742 }
    743 
    744 
    745 static struct nilfs_node *
    746 nilfs_hash_lookup(struct nilfs_mount *ump, ino_t ino)
    747 {
    748 	struct nilfs_node *node;
    749 	struct vnode *vp;
    750 	uint32_t hashline;
    751 
    752 loop:
    753 	mutex_enter(&ump->ihash_lock);
    754 
    755 	/* search our hash table */
    756 	hashline = nilfs_calchash(ino) & NILFS_INODE_HASHMASK;
    757 	LIST_FOREACH(node, &ump->nilfs_nodes[hashline], hashchain) {
    758 		assert(node);
    759 		if (node->ino == ino) {
    760 			vp = node->vnode;
    761 			assert(vp);
    762 			mutex_enter(&vp->v_interlock);
    763 			mutex_exit(&ump->ihash_lock);
    764 			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
    765 				goto loop;
    766 			return node;
    767 		}
    768 	}
    769 	mutex_exit(&ump->ihash_lock);
    770 
    771 	return NULL;
    772 }
    773 
    774 
    775 /* node action implementators */
    776 extern int (**nilfs_vnodeop_p)(void *);
    777 
    778 int
    779 nilfs_get_node_raw(struct nilfs_device *nilfsdev, struct nilfs_mount *ump,
    780 	uint64_t ino, struct nilfs_inode *inode, struct nilfs_node **nodep)
    781 {
    782 	struct nilfs_node *node;
    783 	struct vnode *nvp;
    784 	struct mount *mp;
    785 	int (**vnodeops)(void *);
    786 	int error;
    787 
    788 	*nodep = NULL;
    789 	vnodeops = nilfs_vnodeop_p;
    790 
    791 	/* associate with mountpoint if present*/
    792 	mp = ump? ump->vfs_mountp : NULL;
    793 	error = getnewvnode(VT_NILFS, mp, vnodeops, &nvp);
    794 	if (error)
    795 		return error;
    796 
    797 	/* lock node */
    798 	error = vn_lock(nvp, LK_EXCLUSIVE | LK_RETRY);
    799 	if (error) {
    800 		nvp->v_data = NULL;
    801 		ungetnewvnode(nvp);
    802 		return error;
    803 	}
    804 
    805 	node = pool_get(&nilfs_node_pool, PR_WAITOK);
    806 	memset(node, 0, sizeof(struct nilfs_node));
    807 
    808 	/* crosslink */
    809 	node->vnode    = nvp;
    810 	node->ump      = ump;
    811 	node->nilfsdev = nilfsdev;
    812 	nvp->v_data    = node;
    813 
    814 	/* initiase nilfs node */
    815 	node->ino   = ino;
    816 	node->inode = *inode;
    817 	node->lockf = NULL;
    818 
    819 	/* needed? */
    820 	mutex_init(&node->node_mutex, MUTEX_DEFAULT, IPL_NONE);
    821 	cv_init(&node->node_lock, "nilfs_nlk");
    822 
    823 	/* initialise genfs */
    824 	genfs_node_init(nvp, &nilfs_genfsops);
    825 
    826 	/* check if we're fetching the root */
    827 	if (ino == NILFS_ROOT_INO)
    828 		nvp->v_vflag |= VV_ROOT;
    829 
    830 	/* update vnode's file type XXX is there a function for this? */
    831 	nvp->v_type = VREG;
    832 	if (S_ISDIR(inode->i_mode))
    833 		nvp->v_type = VDIR;
    834 	if (S_ISLNK(inode->i_mode))
    835 		nvp->v_type = VLNK;
    836 #if 0
    837 	if (S_ISCHR(inode->i_mode))
    838 		nvp->v_type = VCHR;
    839 	if (S_ISBLK(inode->i_mode))
    840 		nvp->v_type = VBLK;
    841 #endif
    842 	/* XXX what else? */
    843 
    844 	/* fixup inode size for system nodes */
    845 	if ((ino < NILFS_USER_INO) && (ino != NILFS_ROOT_INO)) {
    846 		DPRINTF(VOLUMES, ("NEED TO GET my size for inode %"PRIu64"\n",
    847 			ino));
    848 		/* for now set it to maximum, -1 is illegal */
    849 		inode->i_size = nilfs_rw64(((uint64_t) -2));
    850 	}
    851 
    852 	uvm_vnp_setsize(nvp, nilfs_rw64(inode->i_size));
    853 
    854 	if (ump)
    855 		nilfs_register_node(node);
    856 
    857 	/* return node */
    858 	*nodep = node;
    859 	return 0;
    860 }
    861 
    862 
    863 int
    864 nilfs_get_node(struct nilfs_mount *ump, uint64_t ino, struct nilfs_node **nodep)
    865 {
    866 	struct nilfs_device *nilfsdev;
    867 	struct nilfs_inode   inode, *entry;
    868 	struct buf *bp;
    869 	uint64_t ivblocknr;
    870 	uint32_t entry_in_block;
    871 	int error;
    872 
    873 	/* lookup node in hash table */
    874 	*nodep = nilfs_hash_lookup(ump, ino);
    875 	if (*nodep)
    876 		return 0;
    877 
    878 	/* lock to disallow simultanious creation of same udf_node */
    879 	mutex_enter(&ump->get_node_lock);
    880 
    881 	/* relookup since it could be created while waiting for the mutex */
    882 	*nodep = nilfs_hash_lookup(ump, ino);
    883 	if (*nodep) {
    884 		mutex_exit(&ump->get_node_lock);
    885 		return 0;
    886 	}
    887 
    888 	/* create new inode; XXX check could be handier */
    889 	if ((ino < NILFS_ATIME_INO) && (ino != NILFS_ROOT_INO)) {
    890 		printf("nilfs_get_node: system ino %"PRIu64" not in mount "
    891 			"point!\n", ino);
    892 		mutex_exit(&ump->get_node_lock);
    893 		return ENOENT;
    894 	}
    895 
    896 	/* lookup inode in the ifile */
    897 	DPRINTF(NODE, ("lookup ino %"PRIu64"\n", ino));
    898 
    899 	/* lookup inode structure in mountpoints ifile */
    900 	nilfsdev = ump->nilfsdev;
    901 	nilfs_mdt_trans(&nilfsdev->ifile_mdt, ino, &ivblocknr, &entry_in_block);
    902 
    903 	error = nilfs_bread(ump->ifile_node, ivblocknr, NOCRED, 0, &bp);
    904 	if (error) {
    905 		mutex_exit(&ump->get_node_lock);
    906 		return ENOENT;
    907 	}
    908 
    909 	/* get inode entry */
    910 	entry =  (struct nilfs_inode *) bp->b_data + entry_in_block;
    911 	inode = *entry;
    912 	brelse(bp, BC_AGE);
    913 
    914 	/* get node */
    915 	error = nilfs_get_node_raw(ump->nilfsdev, ump, ino, &inode, nodep);
    916 	mutex_exit(&ump->get_node_lock);
    917 
    918 	return error;
    919 }
    920 
    921 
    922 void
    923 nilfs_dispose_node(struct nilfs_node **nodep)
    924 {
    925 	struct vnode *vp;
    926 	struct nilfs_node *node;
    927 
    928 	/* protect against rogue values */
    929 	if (!*nodep)
    930 		return;
    931 
    932 	node = *nodep;
    933 	vp = node->vnode;
    934 
    935 	/* remove dirhash if present */
    936 	dirhash_purge(&node->dir_hash);
    937 
    938 	/* remove from our hash lookup table */
    939 	if (node->ump)
    940 		nilfs_deregister_node(node);
    941 
    942 	/* destroy our locks */
    943 	mutex_destroy(&node->node_mutex);
    944 	cv_destroy(&node->node_lock);
    945 
    946 	/* dissociate from our vnode */
    947 	genfs_node_destroy(node->vnode);
    948 	vp->v_data = NULL;
    949 
    950 	/* free our associated memory */
    951 	pool_put(&nilfs_node_pool, node);
    952 
    953 	*nodep = NULL;
    954 }
    955 
    956 
    957 void
    958 nilfs_itimes(struct nilfs_node *node, struct timespec *acc,
    959 	struct timespec *mod, struct timespec *birth)
    960 {
    961 }
    962 
    963 
    964 int
    965 nilfs_update(struct vnode *node, struct timespec *acc,
    966 	struct timespec *mod, struct timespec *birth, int updflags)
    967 {
    968 	return EROFS;
    969 }
    970 
    971 
    972 int
    973 nilfs_chsize(struct vnode *vp, u_quad_t newsize, kauth_cred_t cred)
    974 {
    975 	return EROFS;
    976 }
    977 
    978 
    979 
    980 int
    981 nilfs_grow_node(struct nilfs_node *node, uint64_t new_size)
    982 {
    983 	return EROFS;
    984 }
    985 
    986 
    987 int
    988 nilfs_shrink_node(struct nilfs_node *node, uint64_t new_size)
    989 {
    990 	return EROFS;
    991 }
    992 
    993 
    994 static int
    995 dirhash_fill(struct nilfs_node *dir_node)
    996 {
    997 	struct vnode *dvp = dir_node->vnode;
    998 	struct dirhash *dirh;
    999 	struct nilfs_dir_entry *ndirent;
   1000 	struct dirent dirent;
   1001 	struct buf *bp;
   1002 	uint64_t file_size, diroffset, blkoff;
   1003 	uint64_t blocknr;
   1004 	uint32_t blocksize = dir_node->nilfsdev->blocksize;
   1005 	uint8_t *pos, name_len;
   1006 	int error;
   1007 
   1008 	DPRINTF(CALL, ("dirhash_fill called\n"));
   1009 
   1010 	if (dvp->v_type != VDIR)
   1011 		return ENOTDIR;
   1012 
   1013 	/* make sure we have a dirhash to work on */
   1014 	dirh = dir_node->dir_hash;
   1015 	KASSERT(dirh);
   1016 	KASSERT(dirh->refcnt > 0);
   1017 
   1018 	if (dirh->flags & DIRH_BROKEN)
   1019 		return EIO;
   1020 
   1021 	if (dirh->flags & DIRH_COMPLETE)
   1022 		return 0;
   1023 
   1024 	DPRINTF(DIRHASH, ("Filling directory hash\n"));
   1025 
   1026 	/* make sure we have a clean dirhash to add to */
   1027 	dirhash_purge_entries(dirh);
   1028 
   1029 	/* get directory filesize */
   1030 	file_size = nilfs_rw64(dir_node->inode.i_size);
   1031 
   1032 	/* walk the directory */
   1033 	error = 0;
   1034 	diroffset = 0;
   1035 
   1036 	blocknr = diroffset / blocksize;
   1037 	blkoff  = diroffset % blocksize;
   1038 	error = nilfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
   1039 	if (error) {
   1040 		dirh->flags |= DIRH_BROKEN;
   1041 		dirhash_purge_entries(dirh);
   1042 		return EIO;
   1043 	}
   1044 	while (diroffset < file_size) {
   1045 		DPRINTF(READDIR, ("filldir : offset = %"PRIu64"\n",
   1046 			diroffset));
   1047 		if (blkoff >= blocksize) {
   1048 			blkoff = 0; blocknr++;
   1049 			brelse(bp, BC_AGE);
   1050 			error = nilfs_bread(dir_node, blocknr, NOCRED, 0,
   1051 					&bp);
   1052 			if (error) {
   1053 				dirh->flags |= DIRH_BROKEN;
   1054 				dirhash_purge_entries(dirh);
   1055 				return EIO;
   1056 			}
   1057 		}
   1058 
   1059 		/* read in one dirent */
   1060 		pos = (uint8_t *) bp->b_data + blkoff;
   1061 		ndirent = (struct nilfs_dir_entry *) pos;
   1062 		name_len = ndirent->name_len;
   1063 
   1064 		memset(&dirent, 0, sizeof(struct dirent));
   1065 		dirent.d_fileno = nilfs_rw64(ndirent->inode);
   1066 		dirent.d_type   = ndirent->file_type;	/* 1:1 ? */
   1067 		dirent.d_namlen = name_len;
   1068 		strncpy(dirent.d_name, ndirent->name, name_len);
   1069 		dirent.d_reclen = _DIRENT_SIZE(&dirent);
   1070 		DPRINTF(DIRHASH, ("copying `%*.*s`\n", name_len,
   1071 			name_len, dirent.d_name));
   1072 
   1073 		/* XXX is it deleted? extra free space? */
   1074 		dirhash_enter(dirh, &dirent, diroffset,
   1075 			nilfs_rw16(ndirent->rec_len), 0);
   1076 
   1077 		/* advance */
   1078 		diroffset += nilfs_rw16(ndirent->rec_len);
   1079 		blkoff    += nilfs_rw16(ndirent->rec_len);
   1080 	}
   1081 	brelse(bp, BC_AGE);
   1082 
   1083 	dirh->flags |= DIRH_COMPLETE;
   1084 
   1085 	return 0;
   1086 }
   1087 
   1088 
   1089 int
   1090 nilfs_lookup_name_in_dir(struct vnode *dvp, const char *name, int namelen,
   1091 		uint64_t *ino, int *found)
   1092 {
   1093 	struct nilfs_node	*dir_node = VTOI(dvp);
   1094 	struct nilfs_dir_entry *ndirent;
   1095 	struct dirhash		*dirh;
   1096 	struct dirhash_entry	*dirh_ep;
   1097 	struct buf *bp;
   1098 	uint64_t diroffset, blkoff;
   1099 	uint64_t blocknr;
   1100 	uint32_t blocksize = dir_node->nilfsdev->blocksize;
   1101 	uint8_t *pos;
   1102 	int hit, error;
   1103 
   1104 	/* set default return */
   1105 	*found = 0;
   1106 
   1107 	/* get our dirhash and make sure its read in */
   1108 	dirhash_get(&dir_node->dir_hash);
   1109 	error = dirhash_fill(dir_node);
   1110 	if (error) {
   1111 		dirhash_put(dir_node->dir_hash);
   1112 		return error;
   1113 	}
   1114 	dirh = dir_node->dir_hash;
   1115 
   1116 	/* allocate temporary space for fid */
   1117 
   1118 	DPRINTF(DIRHASH, ("dirhash_lookup looking for `%*.*s`\n",
   1119 		namelen, namelen, name));
   1120 
   1121 	/* search our dirhash hits */
   1122 	*ino = 0;
   1123 	dirh_ep = NULL;
   1124 	for (;;) {
   1125 		hit = dirhash_lookup(dirh, name, namelen, &dirh_ep);
   1126 		/* if no hit, abort the search */
   1127 		if (!hit)
   1128 			break;
   1129 
   1130 		/* check this hit */
   1131 		diroffset = dirh_ep->offset;
   1132 
   1133 		blocknr = diroffset / blocksize;
   1134 		blkoff  = diroffset % blocksize;
   1135 		error = nilfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
   1136 		if (error)
   1137 			return EIO;
   1138 
   1139 		/* read in one dirent */
   1140 		pos = (uint8_t *) bp->b_data + blkoff;
   1141 		ndirent = (struct nilfs_dir_entry *) pos;
   1142 
   1143 		DPRINTF(DIRHASH, ("dirhash_lookup\tchecking `%*.*s`\n",
   1144 			ndirent->name_len, ndirent->name_len, ndirent->name));
   1145 
   1146 		/* see if its our entry */
   1147 		KASSERT(ndirent->name_len == namelen);
   1148 		if (strncmp(ndirent->name, name, namelen) == 0) {
   1149 			*found = 1;
   1150 			*ino = nilfs_rw64(ndirent->inode);
   1151 			brelse(bp, BC_AGE);
   1152 			break;
   1153 		}
   1154 		brelse(bp, BC_AGE);
   1155 	}
   1156 
   1157 	dirhash_put(dir_node->dir_hash);
   1158 
   1159 	return error;
   1160 }
   1161 
   1162 
   1163 int
   1164 nilfs_dir_detach(struct nilfs_mount *ump, struct nilfs_node *dir_node, struct nilfs_node *node, struct componentname *cnp)
   1165 {
   1166 	return EROFS;
   1167 }
   1168 
   1169 
   1170 int
   1171 nilfs_dir_attach(struct nilfs_mount *ump, struct nilfs_node *dir_node, struct nilfs_node *node, struct vattr *vap, struct componentname *cnp)
   1172 {
   1173 	return EROFS;
   1174 }
   1175 
   1176 
   1177 /* XXX return vnode? */
   1178 int
   1179 nilfs_create_node(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, struct componentname *cnp)
   1180 {
   1181 	return EROFS;
   1182 }
   1183 
   1184 
   1185 void
   1186 nilfs_delete_node(struct nilfs_node *node)
   1187 {
   1188 }
   1189 
   1190 
   1191