Home | History | Annotate | Line # | Download | only in chfs
chfs_readinode.c revision 1.2.6.2
      1 /*	$NetBSD: chfs_readinode.c,v 1.2.6.2 2012/04/17 00:08:54 yamt Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2010 Department of Software Engineering,
      5  *		      University of Szeged, Hungary
      6  * Copyright (C) 2010 David Tengeri <dtengeri (at) inf.u-szeged.hu>
      7  * Copyright (C) 2010 Tamas Toth <ttoth (at) inf.u-szeged.hu>
      8  * Copyright (C) 2010 Adam Hoka <ahoka (at) NetBSD.org>
      9  * All rights reserved.
     10  *
     11  * This code is derived from software contributed to The NetBSD Foundation
     12  * by the Department of Software Engineering, University of Szeged, Hungary
     13  *
     14  * Redistribution and use in source and binary forms, with or without
     15  * modification, are permitted provided that the following conditions
     16  * are met:
     17  * 1. Redistributions of source code must retain the above copyright
     18  *    notice, this list of conditions and the following disclaimer.
     19  * 2. Redistributions in binary form must reproduce the above copyright
     20  *    notice, this list of conditions and the following disclaimer in the
     21  *    documentation and/or other materials provided with the distribution.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 /*
     37  * chfs_readinode.c
     38  *
     39  *  Created on: 2010.05.31.
     40  *      Author: dtengeri
     41  */
     42 
     43 #include <sys/buf.h>
     44 
     45 #include "chfs.h"
     46 
     47 /* tmp node operations */
     48 int chfs_check_td_data(struct chfs_mount *,
     49     struct chfs_tmp_dnode *);
     50 int chfs_check_td_node(struct chfs_mount *,
     51     struct chfs_tmp_dnode *);
     52 struct chfs_node_ref *chfs_first_valid_data_ref(struct chfs_node_ref *);
     53 int chfs_add_tmp_dnode_to_tree(struct chfs_mount *,
     54     struct chfs_readinode_info *,
     55     struct chfs_tmp_dnode *);
     56 void chfs_add_tmp_dnode_to_tdi(struct chfs_tmp_dnode_info *,
     57 	struct chfs_tmp_dnode *);
     58 void chfs_remove_tmp_dnode_from_tdi(struct chfs_tmp_dnode_info *,
     59 	struct chfs_tmp_dnode *);
     60 static void chfs_kill_td(struct chfs_mount *,
     61     struct chfs_tmp_dnode *);
     62 static void chfs_kill_tdi(struct chfs_mount *,
     63     struct chfs_tmp_dnode_info *);
     64 /* frag node operations */
     65 struct chfs_node_frag *new_fragment(struct chfs_full_dnode *,
     66     uint32_t,
     67     uint32_t);
     68 int no_overlapping_node(struct rb_tree *, struct chfs_node_frag *,
     69     struct chfs_node_frag *, uint32_t);
     70 int chfs_add_frag_to_fragtree(struct chfs_mount *,
     71     struct rb_tree *,
     72     struct chfs_node_frag *);
     73 void chfs_obsolete_node_frag(struct chfs_mount *,
     74     struct chfs_node_frag *);
     75 /* general node operations */
     76 int chfs_get_data_nodes(struct chfs_mount *,
     77     struct chfs_inode *,
     78     struct chfs_readinode_info *);
     79 int chfs_build_fragtree(struct chfs_mount *,
     80     struct chfs_inode *,
     81     struct chfs_readinode_info *);
     82 
     83 
     84 
     85 /*
     86  * --------------------------
     87  * tmp node rbtree operations
     88  * --------------------------
     89  */
     90 static signed int
     91 tmp_node_compare_nodes(void *ctx, const void *n1, const void *n2)
     92 {
     93 	const struct chfs_tmp_dnode_info *tdi1 = n1;
     94 	const struct chfs_tmp_dnode_info *tdi2 = n2;
     95 
     96 	return (tdi1->tmpnode->node->ofs - tdi2->tmpnode->node->ofs);
     97 }
     98 
     99 static signed int
    100 tmp_node_compare_key(void *ctx, const void *n, const void *key)
    101 {
    102 	const struct chfs_tmp_dnode_info *tdi = n;
    103 	uint64_t ofs =  *(const uint64_t *)key;
    104 
    105 	return (tdi->tmpnode->node->ofs - ofs);
    106 }
    107 
    108 const rb_tree_ops_t tmp_node_rbtree_ops = {
    109 	.rbto_compare_nodes = tmp_node_compare_nodes,
    110 	.rbto_compare_key = tmp_node_compare_key,
    111 	.rbto_node_offset = offsetof(struct chfs_tmp_dnode_info, rb_node),
    112 	.rbto_context = NULL
    113 };
    114 
    115 
    116 /*
    117  * ---------------------------
    118  * frag node rbtree operations
    119  * ---------------------------
    120  */
    121 static signed int
    122 frag_compare_nodes(void *ctx, const void *n1, const void *n2)
    123 {
    124 	const struct chfs_node_frag *frag1 = n1;
    125 	const struct chfs_node_frag *frag2 = n2;
    126 
    127 	return (frag1->ofs - frag2->ofs);
    128 }
    129 
    130 static signed int
    131 frag_compare_key(void *ctx, const void *n, const void *key)
    132 {
    133 	const struct chfs_node_frag *frag = n;
    134 	uint64_t ofs = *(const uint64_t *)key;
    135 
    136 	return (frag->ofs - ofs);
    137 }
    138 
    139 const rb_tree_ops_t frag_rbtree_ops = {
    140 	.rbto_compare_nodes = frag_compare_nodes,
    141 	.rbto_compare_key   = frag_compare_key,
    142 	.rbto_node_offset = offsetof(struct chfs_node_frag, rb_node),
    143 	.rbto_context = NULL
    144 };
    145 
    146 
    147 /*
    148  * -------------------
    149  * tmp node operations
    150  * -------------------
    151  */
    152 /*
    153  * Check the data CRC of the node.
    154  *
    155  * Returns: 0 - if everything OK;
    156  * 	    	1 - if CRC is incorrect;
    157  * 	    	2 - else;
    158  *	    	error code if an error occured.
    159  */
    160 int
    161 chfs_check_td_data(struct chfs_mount *chmp,
    162     struct chfs_tmp_dnode *td)
    163 {
    164 	int err;
    165 	size_t retlen, len, totlen;
    166 	uint32_t crc;
    167 	uint64_t ofs;
    168 	char *buf;
    169 	struct chfs_node_ref *nref = td->node->nref;
    170 
    171 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    172 	KASSERT(!mutex_owned(&chmp->chm_lock_sizes));
    173 
    174 	ofs = CHFS_GET_OFS(nref->nref_offset) + sizeof(struct chfs_flash_data_node);
    175 	len = td->node->size;
    176 	if (!len)
    177 		return 0;
    178 
    179 	buf = kmem_alloc(len, KM_SLEEP);
    180 	if (!buf) {
    181 		dbg("allocating error\n");
    182 		return 2;
    183 	}
    184 	err = chfs_read_leb(chmp, nref->nref_lnr, buf, ofs, len, &retlen);
    185 	if (err) {
    186 		dbg("error wile reading: %d\n", err);
    187 		err = 2;
    188 		goto out;
    189 	}
    190 
    191 	if (len != retlen) {
    192 		dbg("len:%zu, retlen:%zu\n", len, retlen);
    193 		err = 2;
    194 		goto out;
    195 	}
    196 	crc = crc32(0, (uint8_t *)buf, len);
    197 
    198 	if (crc != td->data_crc) {
    199 		dbg("crc failed, calculated: 0x%x, orig: 0x%x\n", crc, td->data_crc);
    200 		kmem_free(buf, len);
    201 		return 1;
    202 	}
    203 
    204 	nref->nref_offset = CHFS_GET_OFS(nref->nref_offset) | CHFS_NORMAL_NODE_MASK;
    205 	totlen = CHFS_PAD(sizeof(struct chfs_flash_data_node) + len);
    206 
    207 	mutex_enter(&chmp->chm_lock_sizes);
    208 	chfs_change_size_unchecked(chmp, &chmp->chm_blocks[nref->nref_lnr], -totlen);
    209 	chfs_change_size_used(chmp, &chmp->chm_blocks[nref->nref_lnr], totlen);
    210 	mutex_exit(&chmp->chm_lock_sizes);
    211 	KASSERT(chmp->chm_blocks[nref->nref_lnr].used_size <= chmp->chm_ebh->eb_size);
    212 
    213 	err = 0;
    214 out:
    215 	kmem_free(buf, len);
    216 	return err;
    217 }
    218 
    219 int
    220 chfs_check_td_node(struct chfs_mount *chmp, struct chfs_tmp_dnode *td)
    221 {
    222 	int ret;
    223 
    224 	if (CHFS_REF_FLAGS(td->node->nref) != CHFS_UNCHECKED_NODE_MASK)
    225 		return 0;
    226 
    227 	ret = chfs_check_td_data(chmp, td);
    228 	if (ret == 1) {
    229 		chfs_mark_node_obsolete(chmp, td->node->nref);
    230 	}
    231 	return ret;
    232 }
    233 
    234 
    235 struct chfs_node_ref *
    236 chfs_first_valid_data_ref(struct chfs_node_ref *nref)
    237 {
    238 	while (nref) {
    239 		if (!CHFS_REF_OBSOLETE(nref)) {
    240 #ifdef DGB_MSG_GC
    241 			if (nref->nref_lnr == REF_EMPTY_NODE) {
    242 				dbg("FIRST VALID IS EMPTY!\n");
    243 			}
    244 #endif
    245 			return nref;
    246 		}
    247 
    248 		if (nref->nref_next) {
    249 			nref = nref->nref_next;
    250 		} else
    251 			break;
    252 	}
    253 	return NULL;
    254 }
    255 
    256 void
    257 chfs_add_tmp_dnode_to_tdi(struct chfs_tmp_dnode_info *tdi,
    258 	struct chfs_tmp_dnode *td)
    259 {
    260 	if (!tdi->tmpnode) {
    261 		tdi->tmpnode = td;
    262 	} else {
    263 		struct chfs_tmp_dnode *tmp = tdi->tmpnode;
    264 		while (tmp->next) {
    265 			tmp = tmp->next;
    266 		}
    267 		tmp->next = td;
    268 	}
    269 }
    270 
    271 void
    272 chfs_remove_tmp_dnode_from_tdi(struct chfs_tmp_dnode_info *tdi,
    273 	struct chfs_tmp_dnode *td)
    274 {
    275 	if (tdi->tmpnode == td) {
    276 		tdi->tmpnode = tdi->tmpnode->next;
    277 	} else {
    278 		struct chfs_tmp_dnode *tmp = tdi->tmpnode->next;
    279 		while (tmp->next && tmp->next != td) {
    280 			tmp = tmp->next;
    281 		}
    282 		if (tmp->next) {
    283 			tmp->next = td->next;
    284 		}
    285 	}
    286 }
    287 
    288 static void
    289 chfs_kill_td(struct chfs_mount *chmp,
    290     struct chfs_tmp_dnode *td)
    291 {
    292 	/* check if we need to mark as obsolete, to avoid double mark */
    293 	if (!CHFS_REF_OBSOLETE(td->node->nref)) {
    294 		chfs_mark_node_obsolete(chmp, td->node->nref);
    295 	}
    296 
    297 	chfs_free_tmp_dnode(td);
    298 }
    299 
    300 static void
    301 chfs_kill_tdi(struct chfs_mount *chmp,
    302     struct chfs_tmp_dnode_info *tdi)
    303 {
    304 	struct chfs_tmp_dnode *next, *tmp = tdi->tmpnode;
    305 
    306 	while (tmp) {
    307 		next = tmp->next;
    308 		chfs_kill_td(chmp, tmp);
    309 		tmp = next;
    310 	}
    311 
    312 	chfs_free_tmp_dnode_info(tdi);
    313 }
    314 
    315 int
    316 chfs_add_tmp_dnode_to_tree(struct chfs_mount *chmp,
    317     struct chfs_readinode_info *rii,
    318     struct chfs_tmp_dnode *newtd)
    319 {
    320 	uint64_t end_ofs = newtd->node->ofs + newtd->node->size;
    321 	struct chfs_tmp_dnode_info *this;
    322 	struct rb_node *node, *prev_node;
    323 	struct chfs_tmp_dnode_info *newtdi;
    324 
    325 	node = rb_tree_find_node(&rii->tdi_root, &newtd->node->ofs);
    326 	if (node) {
    327 		this = (struct chfs_tmp_dnode_info *)node;
    328 		while (this->tmpnode->overlapped) {
    329 			prev_node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
    330 			if (!prev_node) {
    331 				this->tmpnode->overlapped = 0;
    332 				break;
    333 			}
    334 			node = prev_node;
    335 			this = (struct chfs_tmp_dnode_info *)node;
    336 		}
    337 	}
    338 	while (node) {
    339 		this = (struct chfs_tmp_dnode_info *)node;
    340 		if (this->tmpnode->node->ofs > end_ofs)
    341 			break;
    342 
    343 		struct chfs_tmp_dnode *tmp_td = this->tmpnode;
    344 		while (tmp_td) {
    345 			if (tmp_td->version == newtd->version) {
    346 				if (!chfs_check_td_node(chmp, tmp_td)) {
    347 					dbg("calling kill td 0\n");
    348 					chfs_kill_td(chmp, newtd);
    349 					return 0;
    350 				} else {
    351 					chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    352 					chfs_kill_td(chmp, tmp_td);
    353 					chfs_add_tmp_dnode_to_tdi(this, newtd);
    354 					return 0;
    355 				}
    356 			}
    357 			if (tmp_td->version < newtd->version &&
    358 				tmp_td->node->ofs >= newtd->node->ofs &&
    359 				tmp_td->node->ofs + tmp_td->node->size <= end_ofs) {
    360 				/* New node entirely overlaps 'this' */
    361 				if (chfs_check_td_node(chmp, newtd)) {
    362 					dbg("calling kill td 2\n");
    363 					chfs_kill_td(chmp, newtd);
    364 					return 0;
    365 				}
    366 				/* ... and is good. Kill 'this' and any subsequent nodes which are also overlapped */
    367 				while (tmp_td && tmp_td->node->ofs + tmp_td->node->size <= end_ofs) {
    368 					struct rb_node *next = rb_tree_iterate(&rii->tdi_root, this, RB_DIR_RIGHT);
    369 					struct chfs_tmp_dnode_info *next_tdi = (struct chfs_tmp_dnode_info *)next;
    370 					struct chfs_tmp_dnode *next_td = NULL;
    371 					if (tmp_td->next) {
    372 						next_td = tmp_td->next;
    373 					} else if (next_tdi) {
    374 						next_td = next_tdi->tmpnode;
    375 					}
    376 					if (tmp_td->version < newtd->version) {
    377 						chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    378 						chfs_kill_td(chmp, tmp_td);
    379 						if (!this->tmpnode) {
    380 							rb_tree_remove_node(&rii->tdi_root, this);
    381 							chfs_kill_tdi(chmp, this);
    382 							this = next_tdi;
    383 						}
    384 					}
    385 					tmp_td = next_td;
    386 				}
    387 				continue;
    388 			}
    389 			if (tmp_td->version > newtd->version &&
    390 				tmp_td->node->ofs <= newtd->node->ofs &&
    391 				tmp_td->node->ofs + tmp_td->node->size >= end_ofs) {
    392 				/* New node entirely overlapped by 'this' */
    393 				if (!chfs_check_td_node(chmp, tmp_td)) {
    394 					dbg("this version: %llu\n",
    395 						(unsigned long long)tmp_td->version);
    396 					dbg("this ofs: %llu, size: %u\n",
    397 						(unsigned long long)tmp_td->node->ofs,
    398 						tmp_td->node->size);
    399 					dbg("calling kill td 4\n");
    400 					chfs_kill_td(chmp, newtd);
    401 					return 0;
    402 				}
    403 				/* ... but 'this' was bad. Replace it... */
    404 				chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    405 				chfs_kill_td(chmp, tmp_td);
    406 				if (!this->tmpnode) {
    407 					rb_tree_remove_node(&rii->tdi_root, this);
    408 					chfs_kill_tdi(chmp, this);
    409 				}
    410 				dbg("calling kill td 5\n");
    411 				chfs_kill_td(chmp, newtd);
    412 				break;
    413 			}
    414 			tmp_td = tmp_td->next;
    415 		}
    416 		node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
    417 	}
    418 
    419 	newtdi = chfs_alloc_tmp_dnode_info();
    420 	chfs_add_tmp_dnode_to_tdi(newtdi, newtd);
    421 	/* We neither completely obsoleted nor were completely
    422 	   obsoleted by an earlier node. Insert into the tree */
    423 	struct chfs_tmp_dnode_info *tmp_tdi = rb_tree_insert_node(&rii->tdi_root, newtdi);
    424 	if (tmp_tdi != newtdi) {
    425 		chfs_add_tmp_dnode_to_tdi(tmp_tdi, newtd);
    426 		newtdi->tmpnode = NULL;
    427 		chfs_kill_tdi(chmp, newtdi);
    428 	}
    429 
    430 	/* If there's anything behind that overlaps us, note it */
    431 	node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
    432 	if (node) {
    433 		while (1) {
    434 			this = (struct chfs_tmp_dnode_info *)node;
    435 			if (this->tmpnode->node->ofs + this->tmpnode->node->size > newtd->node->ofs) {
    436 				newtd->overlapped = 1;
    437 			}
    438 			if (!this->tmpnode->overlapped)
    439 				break;
    440 
    441 			prev_node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
    442 			if (!prev_node) {
    443 				this->tmpnode->overlapped = 0;
    444 				break;
    445 			}
    446 			node = prev_node;
    447 		}
    448 	}
    449 
    450 	/* If the new node overlaps anything ahead, note it */
    451 	node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
    452 	this = (struct chfs_tmp_dnode_info *)node;
    453 	while (this && this->tmpnode->node->ofs < end_ofs) {
    454 		this->tmpnode->overlapped = 1;
    455 		node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
    456 		this = (struct chfs_tmp_dnode_info *)node;
    457 	}
    458 	return 0;
    459 }
    460 
    461 
    462 /*
    463  * --------------------
    464  * frag node operations
    465  * --------------------
    466  */
    467 struct chfs_node_frag *
    468 new_fragment(struct chfs_full_dnode *fdn, uint32_t ofs, uint32_t size)
    469 {
    470 	struct chfs_node_frag *newfrag;
    471 	newfrag = chfs_alloc_node_frag();
    472 	if (newfrag) {
    473 		newfrag->ofs = ofs;
    474 		newfrag->size = size;
    475 		newfrag->node = fdn;
    476 	} else {
    477 		chfs_err("cannot allocate a chfs_node_frag object\n");
    478 	}
    479 	return newfrag;
    480 }
    481 
    482 int
    483 no_overlapping_node(struct rb_tree *fragtree,
    484     struct chfs_node_frag *newfrag,
    485     struct chfs_node_frag *this, uint32_t lastend)
    486 {
    487 	if (lastend < newfrag->node->ofs) {
    488 		struct chfs_node_frag *holefrag;
    489 
    490 		holefrag = new_fragment(NULL, lastend, newfrag->node->ofs - lastend);
    491 		if (!holefrag) {
    492 			chfs_free_node_frag(newfrag);
    493 			return ENOMEM;
    494 		}
    495 
    496 		rb_tree_insert_node(fragtree, holefrag);
    497 		this = holefrag;
    498 	}
    499 
    500 	rb_tree_insert_node(fragtree, newfrag);
    501 
    502 	return 0;
    503 }
    504 
    505 int
    506 chfs_add_frag_to_fragtree(struct chfs_mount *chmp,
    507     struct rb_tree *fragtree,
    508     struct chfs_node_frag *newfrag)
    509 {
    510 	struct chfs_node_frag *this;
    511 	uint32_t lastend;
    512 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    513 
    514 	this = (struct chfs_node_frag *)rb_tree_find_node_leq(fragtree, &newfrag->ofs);
    515 
    516 	if (this) {
    517 		lastend = this->ofs + this->size;
    518 	} else {
    519 		lastend = 0;
    520 	}
    521 
    522 	if (lastend <= newfrag->ofs) {
    523 		//dbg("no overlapping node\n");
    524 		if (lastend && (lastend - 1) >> PAGE_SHIFT == newfrag->ofs >> PAGE_SHIFT) {
    525 			if (this->node)
    526 				CHFS_MARK_REF_NORMAL(this->node->nref);
    527 			CHFS_MARK_REF_NORMAL(newfrag->node->nref);
    528 		}
    529 		return no_overlapping_node(fragtree, newfrag, this, lastend);
    530 	}
    531 
    532 	if (newfrag->ofs > this->ofs) {
    533 
    534 		CHFS_MARK_REF_NORMAL(newfrag->node->nref);
    535 		if (this->node)
    536 			CHFS_MARK_REF_NORMAL(this->node->nref);
    537 
    538 		if (this->ofs + this->size > newfrag->ofs + newfrag->size) {
    539 			/* newfrag is inside of this */
    540 			//dbg("newfrag is inside of this\n");
    541 			struct chfs_node_frag *newfrag2;
    542 
    543 			newfrag2 = new_fragment(this->node, newfrag->ofs + newfrag->size,
    544 			    this->ofs + this->size - newfrag->ofs - newfrag->size);
    545 			if (!newfrag2)
    546 				return ENOMEM;
    547 			if (this->node)
    548 				this->node->frags++;
    549 
    550 			this->size = newfrag->ofs - this->ofs;
    551 
    552 			rb_tree_insert_node(fragtree, newfrag);
    553 			rb_tree_insert_node(fragtree, newfrag2);
    554 
    555 			return 0;
    556 		}
    557 		/* newfrag is bottom of this */
    558 		//dbg("newfrag is bottom of this\n");
    559 		this->size = newfrag->ofs - this->ofs;
    560 		rb_tree_insert_node(fragtree, newfrag);
    561 	} else {
    562 		/* newfrag start at same point */
    563 		//dbg("newfrag start at same point\n");
    564 		//TODO replace instead of remove and insert
    565 		rb_tree_remove_node(fragtree, this);
    566 		rb_tree_insert_node(fragtree, newfrag);
    567 
    568 		if (newfrag->ofs + newfrag->size >= this->ofs+this->size) {
    569 			chfs_obsolete_node_frag(chmp, this);
    570 		} else {
    571 			this->ofs += newfrag->size;
    572 			this->size -= newfrag->size;
    573 
    574 			rb_tree_insert_node(fragtree, this);
    575 			return 0;
    576 		}
    577 	}
    578 	/* OK, now we have newfrag added in the correct place in the tree, but
    579 	   frag_next(newfrag) may be a fragment which is overlapped by it
    580 	*/
    581 	while ((this = frag_next(fragtree, newfrag)) && newfrag->ofs + newfrag->size >= this->ofs + this->size) {
    582 		rb_tree_remove_node(fragtree, this);
    583 		chfs_obsolete_node_frag(chmp, this);
    584 	}
    585 
    586 	if (!this || newfrag->ofs + newfrag->size == this->ofs)
    587 		return 0;
    588 
    589 	this->size = (this->ofs + this->size) - (newfrag->ofs + newfrag->size);
    590 	this->ofs = newfrag->ofs + newfrag->size;
    591 
    592 	if (this->node)
    593 		CHFS_MARK_REF_NORMAL(this->node->nref);
    594 	CHFS_MARK_REF_NORMAL(newfrag->node->nref);
    595 
    596 	return 0;
    597 }
    598 
    599 void
    600 chfs_kill_fragtree(struct rb_tree *fragtree)
    601 {
    602 	struct chfs_node_frag *this, *next;
    603 	//dbg("start\n");
    604 
    605 	this = (struct chfs_node_frag *)RB_TREE_MIN(fragtree);
    606 	while (this) {
    607 		//for (this = (struct chfs_node_frag *)RB_TREE_MIN(&fragtree); this != NULL; this = (struct chfs_node_frag *)rb_tree_iterate(&fragtree, &this->rb_node, RB_DIR_RIGHT)) {
    608 		next = frag_next(fragtree, this);
    609 		rb_tree_remove_node(fragtree, this);
    610 		chfs_free_node_frag(this);
    611 		//dbg("one frag killed\n");
    612 		this = next;
    613 	}
    614 	//dbg("end\n");
    615 }
    616 
    617 uint32_t
    618 chfs_truncate_fragtree(struct chfs_mount *chmp,
    619 	struct rb_tree *fragtree, uint32_t size)
    620 {
    621 	struct chfs_node_frag *frag;
    622 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    623 
    624 	dbg("truncate to size: %u\n", size);
    625 
    626 	frag = (struct chfs_node_frag *)rb_tree_find_node_leq(fragtree, &size);
    627 
    628 	/* Find the last frag before size and set its new size. */
    629 	if (frag && frag->ofs != size) {
    630 		if (frag->ofs + frag->size > size) {
    631 			frag->size = size - frag->ofs;
    632 		}
    633 		frag = frag_next(fragtree, frag);
    634 	}
    635 
    636 	/* Delete frags after new size. */
    637 	while (frag && frag->ofs >= size) {
    638 		struct chfs_node_frag *next = frag_next(fragtree, frag);
    639 
    640 		rb_tree_remove_node(fragtree, frag);
    641 		chfs_obsolete_node_frag(chmp, frag);
    642 		frag = next;
    643 	}
    644 
    645 	if (size == 0) {
    646 		return 0;
    647 	}
    648 
    649 	frag = frag_last(fragtree);
    650 
    651 	if (!frag) {
    652 		return 0;
    653 	}
    654 
    655 	if (frag->ofs + frag->size < size) {
    656 		return frag->ofs + frag->size;
    657 	}
    658 
    659 	/* FIXME Should we check the postion of the last node? (PAGE_CACHE size, etc.) */
    660 	if (frag->node && (frag->ofs & (PAGE_SIZE - 1)) == 0) {
    661 		frag->node->nref->nref_offset = CHFS_GET_OFS(frag->node->nref->nref_offset) | CHFS_PRISTINE_NODE_MASK;
    662 	}
    663 
    664 	return size;
    665 }
    666 
    667 void
    668 chfs_obsolete_node_frag(struct chfs_mount *chmp,
    669     struct chfs_node_frag *this)
    670 {
    671 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    672 	if (this->node) {
    673 		this->node->frags--;
    674 		if (!this->node->frags) {
    675 			struct chfs_vnode_cache *vc = chfs_nref_to_vc(this->node->nref);
    676 			chfs_mark_node_obsolete(chmp, this->node->nref);
    677 
    678 			if (vc->dnode == this->node->nref) {
    679 				vc->dnode = this->node->nref->nref_next;
    680 			} else {
    681 				struct chfs_node_ref *tmp = vc->dnode;
    682 				while (tmp->nref_next != (struct chfs_node_ref*) vc
    683 						&& tmp->nref_next != this->node->nref) {
    684 					tmp = tmp->nref_next;
    685 				}
    686 				if (tmp->nref_next == this->node->nref) {
    687 					tmp->nref_next = this->node->nref->nref_next;
    688 				}
    689 				// FIXME should we free here the this->node->nref?
    690 			}
    691 
    692 			chfs_free_full_dnode(this->node);
    693 		} else {
    694 			CHFS_MARK_REF_NORMAL(this->node->nref);
    695 		}
    696 	}
    697 	chfs_free_node_frag(this);
    698 }
    699 
    700 int
    701 chfs_add_full_dnode_to_inode(struct chfs_mount *chmp,
    702     struct chfs_inode *ip,
    703     struct chfs_full_dnode *fd)
    704 {
    705 	int ret;
    706 	struct chfs_node_frag *newfrag;
    707 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    708 
    709 	if (unlikely(!fd->size))
    710 		return 0;
    711 
    712 	newfrag = new_fragment(fd, fd->ofs, fd->size);
    713 	if (unlikely(!newfrag))
    714 		return ENOMEM;
    715 
    716 	newfrag->node->frags = 1;
    717 
    718 	ret = chfs_add_frag_to_fragtree(chmp, &ip->fragtree, newfrag);
    719 	if (ret)
    720 		return ret;
    721 
    722 	if (newfrag->ofs & (PAGE_SIZE - 1)) {
    723 		struct chfs_node_frag *prev = frag_prev(&ip->fragtree, newfrag);
    724 
    725 		CHFS_MARK_REF_NORMAL(fd->nref);
    726 		if (prev->node)
    727 			CHFS_MARK_REF_NORMAL(prev->node->nref);
    728 	}
    729 
    730 	if ((newfrag->ofs+newfrag->size) & (PAGE_SIZE - 1)) {
    731 		struct chfs_node_frag *next = frag_next(&ip->fragtree, newfrag);
    732 
    733 		if (next) {
    734 			CHFS_MARK_REF_NORMAL(fd->nref);
    735 			if (next->node)
    736 				CHFS_MARK_REF_NORMAL(next->node->nref);
    737 		}
    738 	}
    739 
    740 	return 0;
    741 }
    742 
    743 
    744 /*
    745  * -----------------------
    746  * general node operations
    747  * -----------------------
    748  */
    749 /* get tmp nodes of an inode */
    750 int
    751 chfs_get_data_nodes(struct chfs_mount *chmp,
    752     struct chfs_inode *ip,
    753     struct chfs_readinode_info *rii)
    754 {
    755 	uint32_t crc;
    756 	int err;
    757 	size_t len, retlen;
    758 	struct chfs_node_ref *nref;
    759 	struct chfs_flash_data_node *dnode;
    760 	struct chfs_tmp_dnode *td;
    761 	char* buf;
    762 
    763 	len = sizeof(struct chfs_flash_data_node);
    764 	buf = kmem_alloc(len, KM_SLEEP);
    765 
    766 	dnode = kmem_alloc(len, KM_SLEEP);
    767 	if (!dnode)
    768 		return ENOMEM;
    769 
    770 	nref = chfs_first_valid_data_ref(ip->chvc->dnode);
    771 
    772 	rii->highest_version = ip->chvc->highest_version;
    773 
    774 	while(nref && (struct chfs_vnode_cache *)nref != ip->chvc) {
    775 		err = chfs_read_leb(chmp, nref->nref_lnr, buf, CHFS_GET_OFS(nref->nref_offset), len, &retlen);
    776 		if (err || len != retlen)
    777 			goto out;
    778 		dnode = (struct chfs_flash_data_node*)buf;
    779 
    780 		//check header crc
    781 		crc = crc32(0, (uint8_t *)dnode, CHFS_NODE_HDR_SIZE - 4);
    782 		if (crc != le32toh(dnode->hdr_crc)) {
    783 			chfs_err("CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->hdr_crc));
    784 			goto cont;
    785 		}
    786 		//check header magic bitmask
    787 		if (le16toh(dnode->magic) != CHFS_FS_MAGIC_BITMASK) {
    788 			chfs_err("Wrong magic bitmask.\n");
    789 			goto cont;
    790 		}
    791 		//check node crc
    792 		crc = crc32(0, (uint8_t *)dnode, sizeof(*dnode) - 4);
    793 		if (crc != le32toh(dnode->node_crc)) {
    794 			chfs_err("Node CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->node_crc));
    795 			goto cont;
    796 		}
    797 		td = chfs_alloc_tmp_dnode();
    798 		if (!td) {
    799 			chfs_err("Can't allocate tmp dnode info.\n");
    800 			err = ENOMEM;
    801 			goto out;
    802 		}
    803 		/* We don't check data crc here, just add nodes to tmp frag tree, because
    804 		 * we don't want to check nodes which have been overlapped by a new node
    805 		 * with a higher version number.
    806 		 */
    807 		td->node = chfs_alloc_full_dnode();
    808 		if (!td->node) {
    809 			chfs_err("Can't allocate full dnode info.\n");
    810 			err = ENOMEM;
    811 			goto out_tmp_dnode;
    812 		}
    813 		td->version = le64toh(dnode->version);
    814 		td->node->ofs = le64toh(dnode->offset);
    815 		td->data_crc = le32toh(dnode->data_crc);
    816 		td->node->nref = nref;
    817 		td->node->size = le32toh(dnode->data_length);
    818 		td->overlapped = 0;
    819 
    820 		if (td->version > rii->highest_version) {
    821 			rii->highest_version = td->version;
    822 		}
    823 
    824 		err = chfs_add_tmp_dnode_to_tree(chmp, rii, td);
    825 		if (err)
    826 			goto out_full_dnode;
    827 
    828 cont:
    829 		nref = chfs_first_valid_data_ref(nref->nref_next);
    830 	}
    831 
    832 	ip->chvc->highest_version = rii->highest_version;
    833 	return 0;
    834 
    835 /* Exit points */
    836 out_full_dnode:
    837 	chfs_free_full_dnode(td->node);
    838 out_tmp_dnode:
    839 	chfs_free_tmp_dnode(td);
    840 out:
    841 	kmem_free(buf, len);
    842 	kmem_free(dnode, len);
    843 	return err;
    844 }
    845 
    846 
    847 /* Build final normal fragtree from tdi tree. */
    848 int
    849 chfs_build_fragtree(struct chfs_mount *chmp, struct chfs_inode *ip,
    850     struct chfs_readinode_info *rii)
    851 {
    852 	struct chfs_tmp_dnode_info *pen, *last, *this;
    853 	struct rb_tree ver_tree;    /* version tree */
    854 	uint64_t high_ver = 0;
    855 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    856 
    857 	rb_tree_init(&ver_tree, &tmp_node_rbtree_ops);
    858 
    859 	if (rii->mdata_tn) {
    860 		high_ver = rii->mdata_tn->tmpnode->version;
    861 		rii->latest_ref = rii->mdata_tn->tmpnode->node->nref;
    862 	}
    863 
    864 	pen = (struct chfs_tmp_dnode_info *)RB_TREE_MAX(&rii->tdi_root);
    865 
    866 	while((last = pen)) {
    867 		pen = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&rii->tdi_root, last, RB_DIR_LEFT);
    868 
    869 		rb_tree_remove_node(&rii->tdi_root, last);
    870 		rb_tree_insert_node(&ver_tree, last);
    871 
    872 		if (last->tmpnode->overlapped) {
    873 			if (pen)
    874 				continue;
    875 
    876 			last->tmpnode->overlapped = 0;
    877 		}
    878 
    879 		this = (struct chfs_tmp_dnode_info *)RB_TREE_MAX(&ver_tree);
    880 
    881 		while (this) {
    882 			struct chfs_tmp_dnode_info *vers_next;
    883 			int ret;
    884 
    885 			vers_next = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&ver_tree, this, RB_DIR_LEFT);
    886 			rb_tree_remove_node(&ver_tree, this);
    887 
    888 			struct chfs_tmp_dnode *tmp_td = this->tmpnode;
    889 			while (tmp_td) {
    890 				struct chfs_tmp_dnode *next_td = tmp_td->next;
    891 
    892 				if (chfs_check_td_node(chmp, tmp_td)) {
    893 					if (next_td) {
    894 						chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    895 					} else {
    896 						break;
    897 					}
    898 				} else {
    899 					if (tmp_td->version > high_ver) {
    900 						high_ver = tmp_td->version;
    901 						dbg("highver: %llu\n", (unsigned long long)high_ver);
    902 						rii->latest_ref = tmp_td->node->nref;
    903 					}
    904 
    905 					ret = chfs_add_full_dnode_to_inode(chmp, ip, tmp_td->node);
    906 					if (ret) {
    907 						while (1) {
    908 							vers_next = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&ver_tree, this, RB_DIR_LEFT);
    909 							while (tmp_td) {
    910 								next_td = tmp_td->next;
    911 								if (chfs_check_td_node(chmp, tmp_td) > 1) {
    912 									chfs_mark_node_obsolete(chmp,
    913 										tmp_td->node->nref);
    914 								}
    915 								chfs_free_full_dnode(tmp_td->node);
    916 								chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    917 								chfs_free_tmp_dnode(tmp_td);
    918 								tmp_td = next_td;
    919 							}
    920 							chfs_free_tmp_dnode_info(this);
    921 							this = vers_next;
    922 							if (!this)
    923 								break;
    924 							rb_tree_remove_node(&ver_tree, vers_next);
    925 						}
    926 						return ret;
    927 					}
    928 
    929 					chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    930 					chfs_free_tmp_dnode(tmp_td);
    931 				}
    932 				tmp_td = next_td;
    933 			}
    934 			chfs_kill_tdi(chmp, this);
    935 			this = vers_next;
    936 		}
    937 	}
    938 
    939 	return 0;
    940 }
    941 
    942 int chfs_read_inode(struct chfs_mount *chmp, struct chfs_inode *ip)
    943 {
    944 	struct chfs_vnode_cache *vc = ip->chvc;
    945 
    946 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    947 
    948 retry:
    949 	/* XXX locking */
    950 	//mutex_enter(&chmp->chm_lock_vnocache);
    951 	switch (vc->state) {
    952 	case VNO_STATE_UNCHECKED:
    953 	case VNO_STATE_CHECKEDABSENT:
    954 //		chfs_vnode_cache_set_state(chmp, vc, VNO_STATE_READING);
    955 		vc->state = VNO_STATE_READING;
    956 		break;
    957 	case VNO_STATE_CHECKING:
    958 	case VNO_STATE_GC:
    959 		//sleep_on_spinunlock(&chmp->chm_lock_vnocache);
    960 		//KASSERT(!mutex_owned(&chmp->chm_lock_vnocache));
    961 		goto retry;
    962 		break;
    963 	case VNO_STATE_PRESENT:
    964 	case VNO_STATE_READING:
    965 		chfs_err("Reading inode #%llu in state %d!\n",
    966 			(unsigned long long)vc->vno, vc->state);
    967 		chfs_err("wants to read a nonexistent ino %llu\n",
    968 			(unsigned long long)vc->vno);
    969 		return ENOENT;
    970 	default:
    971 		panic("BUG() Bad vno cache state.");
    972 	}
    973 	//mutex_exit(&chmp->chm_lock_vnocache);
    974 
    975 	return chfs_read_inode_internal(chmp, ip);
    976 }
    977 
    978 /*
    979  * Read inode frags.
    980  * Firstly get tmp nodes,
    981  * secondly build fragtree from those.
    982  */
    983 int
    984 chfs_read_inode_internal(struct chfs_mount *chmp, struct chfs_inode *ip)
    985 {
    986 	int err;
    987 	size_t len, retlen;
    988 	char* buf;
    989 	struct chfs_readinode_info rii;
    990 	struct chfs_flash_vnode *fvnode;
    991 
    992 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    993 
    994 	len = sizeof(*fvnode);
    995 
    996 	memset(&rii, 0, sizeof(rii));
    997 
    998 	rb_tree_init(&rii.tdi_root, &tmp_node_rbtree_ops);
    999 
   1000 	/* build up a temp node frag tree */
   1001 	err = chfs_get_data_nodes(chmp, ip, &rii);
   1002 	if (err) {
   1003 		if (ip->chvc->state == VNO_STATE_READING)
   1004 			ip->chvc->state = VNO_STATE_CHECKEDABSENT;
   1005 		/* FIXME Should we kill fragtree or something here? */
   1006 		return err;
   1007 	}
   1008 
   1009 	rb_tree_init(&ip->fragtree, &frag_rbtree_ops);
   1010 	/*
   1011 	 * build fragtree from temp nodes
   1012 	 */
   1013 	err = chfs_build_fragtree(chmp, ip, &rii);
   1014 	if (err) {
   1015 		if (ip->chvc->state == VNO_STATE_READING)
   1016 			ip->chvc->state = VNO_STATE_CHECKEDABSENT;
   1017 		/* FIXME Should we kill fragtree or something here? */
   1018 		return err;
   1019 	}
   1020 
   1021 	if (!rii.latest_ref) {
   1022 		return 0;
   1023 	}
   1024 
   1025 	buf = kmem_alloc(len, KM_SLEEP);
   1026 	if (!buf)
   1027 		return ENOMEM;
   1028 
   1029 	/*
   1030 	 * set inode size from chvc->v
   1031 	 */
   1032 	err = chfs_read_leb(chmp, ip->chvc->v->nref_lnr, buf, CHFS_GET_OFS(ip->chvc->v->nref_offset), len, &retlen);
   1033 	if (err || retlen != len) {
   1034 		kmem_free(buf, len);
   1035 		return err?err:EIO;
   1036 	}
   1037 
   1038 	fvnode = (struct chfs_flash_vnode*)buf;
   1039 
   1040 	dbg("set size from v: %u\n", fvnode->dn_size);
   1041 	chfs_set_vnode_size(ITOV(ip), fvnode->dn_size);
   1042 	uint32_t retsize = chfs_truncate_fragtree(chmp, &ip->fragtree, fvnode->dn_size);
   1043 	if (retsize != fvnode->dn_size) {
   1044 		dbg("Truncating failed. It is %u instead of %u\n", retsize, fvnode->dn_size);
   1045 	}
   1046 
   1047 	kmem_free(buf, len);
   1048 
   1049 	if (ip->chvc->state == VNO_STATE_READING) {
   1050 		ip->chvc->state = VNO_STATE_PRESENT;
   1051 	}
   1052 
   1053 	return 0;
   1054 }
   1055 
   1056 int
   1057 chfs_read_data(struct chfs_mount* chmp, struct vnode *vp,
   1058     struct buf *bp)
   1059 {
   1060 	off_t ofs;
   1061 	struct chfs_node_frag *frag;
   1062 	char * buf;
   1063 	int err = 0;
   1064 	size_t size, retlen;
   1065 	uint32_t crc;
   1066 	struct chfs_inode *ip = VTOI(vp);
   1067 	struct chfs_flash_data_node *dnode;
   1068 	struct chfs_node_ref *nref;
   1069 
   1070 	memset(bp->b_data, 0, bp->b_bcount);
   1071 
   1072 	ofs = bp->b_blkno * PAGE_SIZE;
   1073 	frag = (struct chfs_node_frag *)rb_tree_find_node_leq(&ip->fragtree, &ofs);
   1074 
   1075 	if (!frag || frag->ofs > ofs || frag->ofs + frag->size <= ofs) {
   1076 		dbg("not found in frag tree\n");
   1077 		return 0;
   1078 	}
   1079 
   1080 	if (!frag->node) {
   1081 		dbg("no node in frag\n");
   1082 		return 0;
   1083 	}
   1084 
   1085 	nref = frag->node->nref;
   1086 
   1087 	size = sizeof(*dnode) + frag->size;
   1088 
   1089 	buf = kmem_alloc(size, KM_SLEEP);
   1090 
   1091 	dbg("reading from lnr: %u, offset: %u, size: %zu\n", nref->nref_lnr, CHFS_GET_OFS(nref->nref_offset), size);
   1092 	err = chfs_read_leb(chmp, nref->nref_lnr, buf, CHFS_GET_OFS(nref->nref_offset), size, &retlen);
   1093 	if (err) {
   1094 		chfs_err("error after reading: %d\n", err);
   1095 		goto out;
   1096 	}
   1097 	if (retlen != size) {
   1098 		chfs_err("retlen: %zu != size: %zu\n", retlen, size);
   1099 		err = EIO;
   1100 		goto out;
   1101 	}
   1102 
   1103 	dnode = (struct chfs_flash_data_node *)buf;
   1104 	crc = crc32(0, (uint8_t *)dnode, CHFS_NODE_HDR_SIZE - 4);
   1105 	if (crc != le32toh(dnode->hdr_crc)) {
   1106 		chfs_err("CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->hdr_crc));
   1107 		err = EIO;
   1108 		goto out;
   1109 	}
   1110 	//check header magic bitmask
   1111 	if (le16toh(dnode->magic) != CHFS_FS_MAGIC_BITMASK) {
   1112 		chfs_err("Wrong magic bitmask.\n");
   1113 		err = EIO;
   1114 		goto out;
   1115 	}
   1116 	//check node crc
   1117 	crc = crc32(0, (uint8_t *)dnode, sizeof(*dnode) - 4);
   1118 	if (crc != le32toh(dnode->node_crc)) {
   1119 		chfs_err("Node CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->node_crc));
   1120 		err = EIO;
   1121 		goto out;
   1122 	}
   1123 	crc = crc32(0, (uint8_t *)dnode->data, dnode->data_length);
   1124 	if (crc != le32toh(dnode->data_crc)) {
   1125 		chfs_err("Data CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->data_crc));
   1126 		err = EIO;
   1127 		goto out;
   1128 	}
   1129 
   1130 	memcpy(bp->b_data, dnode->data, dnode->data_length);
   1131 	bp->b_resid = 0;
   1132 
   1133 out:
   1134 	kmem_free(buf, size);
   1135 	return err;
   1136 }
   1137