Home | History | Annotate | Line # | Download | only in chfs
chfs_readinode.c revision 1.4
      1 /*	$NetBSD: chfs_readinode.c,v 1.4 2012/08/13 13:12:51 ttoth Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2010 Department of Software Engineering,
      5  *		      University of Szeged, Hungary
      6  * Copyright (C) 2010 David Tengeri <dtengeri (at) inf.u-szeged.hu>
      7  * Copyright (C) 2010 Tamas Toth <ttoth (at) inf.u-szeged.hu>
      8  * Copyright (C) 2010 Adam Hoka <ahoka (at) NetBSD.org>
      9  * All rights reserved.
     10  *
     11  * This code is derived from software contributed to The NetBSD Foundation
     12  * by the Department of Software Engineering, University of Szeged, Hungary
     13  *
     14  * Redistribution and use in source and binary forms, with or without
     15  * modification, are permitted provided that the following conditions
     16  * are met:
     17  * 1. Redistributions of source code must retain the above copyright
     18  *    notice, this list of conditions and the following disclaimer.
     19  * 2. Redistributions in binary form must reproduce the above copyright
     20  *    notice, this list of conditions and the following disclaimer in the
     21  *    documentation and/or other materials provided with the distribution.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 /*
     37  * chfs_readinode.c
     38  *
     39  *  Created on: 2010.05.31.
     40  *      Author: dtengeri
     41  */
     42 
     43 #include <sys/buf.h>
     44 
     45 #include "chfs.h"
     46 
     47 /* tmp node operations */
     48 int chfs_check_td_data(struct chfs_mount *,
     49     struct chfs_tmp_dnode *);
     50 int chfs_check_td_node(struct chfs_mount *,
     51     struct chfs_tmp_dnode *);
     52 struct chfs_node_ref *chfs_first_valid_data_ref(struct chfs_node_ref *);
     53 int chfs_add_tmp_dnode_to_tree(struct chfs_mount *,
     54     struct chfs_readinode_info *,
     55     struct chfs_tmp_dnode *);
     56 void chfs_add_tmp_dnode_to_tdi(struct chfs_tmp_dnode_info *,
     57 	struct chfs_tmp_dnode *);
     58 void chfs_remove_tmp_dnode_from_tdi(struct chfs_tmp_dnode_info *,
     59 	struct chfs_tmp_dnode *);
     60 static void chfs_kill_td(struct chfs_mount *,
     61     struct chfs_tmp_dnode *);
     62 static void chfs_kill_tdi(struct chfs_mount *,
     63     struct chfs_tmp_dnode_info *);
     64 /* frag node operations */
     65 struct chfs_node_frag *new_fragment(struct chfs_full_dnode *,
     66     uint32_t,
     67     uint32_t);
     68 int no_overlapping_node(struct rb_tree *, struct chfs_node_frag *,
     69     struct chfs_node_frag *, uint32_t);
     70 int chfs_add_frag_to_fragtree(struct chfs_mount *,
     71     struct rb_tree *,
     72     struct chfs_node_frag *);
     73 void chfs_obsolete_node_frag(struct chfs_mount *,
     74     struct chfs_node_frag *);
     75 /* general node operations */
     76 int chfs_get_data_nodes(struct chfs_mount *,
     77     struct chfs_inode *,
     78     struct chfs_readinode_info *);
     79 int chfs_build_fragtree(struct chfs_mount *,
     80     struct chfs_inode *,
     81     struct chfs_readinode_info *);
     82 
     83 
     84 
     85 /*
     86  * --------------------------
     87  * tmp node rbtree operations
     88  * --------------------------
     89  */
     90 static signed int
     91 tmp_node_compare_nodes(void *ctx, const void *n1, const void *n2)
     92 {
     93 	const struct chfs_tmp_dnode_info *tdi1 = n1;
     94 	const struct chfs_tmp_dnode_info *tdi2 = n2;
     95 
     96 	return (tdi1->tmpnode->node->ofs - tdi2->tmpnode->node->ofs);
     97 }
     98 
     99 static signed int
    100 tmp_node_compare_key(void *ctx, const void *n, const void *key)
    101 {
    102 	const struct chfs_tmp_dnode_info *tdi = n;
    103 	uint64_t ofs =  *(const uint64_t *)key;
    104 
    105 	return (tdi->tmpnode->node->ofs - ofs);
    106 }
    107 
    108 const rb_tree_ops_t tmp_node_rbtree_ops = {
    109 	.rbto_compare_nodes = tmp_node_compare_nodes,
    110 	.rbto_compare_key = tmp_node_compare_key,
    111 	.rbto_node_offset = offsetof(struct chfs_tmp_dnode_info, rb_node),
    112 	.rbto_context = NULL
    113 };
    114 
    115 
    116 /*
    117  * ---------------------------
    118  * frag node rbtree operations
    119  * ---------------------------
    120  */
    121 static signed int
    122 frag_compare_nodes(void *ctx, const void *n1, const void *n2)
    123 {
    124 	const struct chfs_node_frag *frag1 = n1;
    125 	const struct chfs_node_frag *frag2 = n2;
    126 
    127 	return (frag1->ofs - frag2->ofs);
    128 }
    129 
    130 static signed int
    131 frag_compare_key(void *ctx, const void *n, const void *key)
    132 {
    133 	const struct chfs_node_frag *frag = n;
    134 	uint64_t ofs = *(const uint64_t *)key;
    135 
    136 	return (frag->ofs - ofs);
    137 }
    138 
    139 const rb_tree_ops_t frag_rbtree_ops = {
    140 	.rbto_compare_nodes = frag_compare_nodes,
    141 	.rbto_compare_key   = frag_compare_key,
    142 	.rbto_node_offset = offsetof(struct chfs_node_frag, rb_node),
    143 	.rbto_context = NULL
    144 };
    145 
    146 
    147 /*
    148  * -------------------
    149  * tmp node operations
    150  * -------------------
    151  */
    152 /*
    153  * Check the data CRC of the node.
    154  *
    155  * Returns: 0 - if everything OK;
    156  * 	    	1 - if CRC is incorrect;
    157  * 	    	2 - else;
    158  *	    	error code if an error occured.
    159  */
    160 int
    161 chfs_check_td_data(struct chfs_mount *chmp,
    162     struct chfs_tmp_dnode *td)
    163 {
    164 	int err;
    165 	size_t retlen, len, totlen;
    166 	uint32_t crc;
    167 	uint64_t ofs;
    168 	char *buf;
    169 	struct chfs_node_ref *nref = td->node->nref;
    170 
    171 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    172 	KASSERT(!mutex_owned(&chmp->chm_lock_sizes));
    173 
    174 	ofs = CHFS_GET_OFS(nref->nref_offset) + sizeof(struct chfs_flash_data_node);
    175 	len = td->node->size;
    176 	if (!len)
    177 		return 0;
    178 
    179 	buf = kmem_alloc(len, KM_SLEEP);
    180 	if (!buf) {
    181 		dbg("allocating error\n");
    182 		return 2;
    183 	}
    184 	err = chfs_read_leb(chmp, nref->nref_lnr, buf, ofs, len, &retlen);
    185 	if (err) {
    186 		dbg("error wile reading: %d\n", err);
    187 		err = 2;
    188 		goto out;
    189 	}
    190 
    191 	if (len != retlen) {
    192 		dbg("len:%zu, retlen:%zu\n", len, retlen);
    193 		err = 2;
    194 		goto out;
    195 	}
    196 	crc = crc32(0, (uint8_t *)buf, len);
    197 
    198 	if (crc != td->data_crc) {
    199 		dbg("crc failed, calculated: 0x%x, orig: 0x%x\n", crc, td->data_crc);
    200 		kmem_free(buf, len);
    201 		return 1;
    202 	}
    203 
    204 	CHFS_MARK_REF_NORMAL(nref);
    205 	totlen = CHFS_PAD(sizeof(struct chfs_flash_data_node) + len);
    206 
    207 	mutex_enter(&chmp->chm_lock_sizes);
    208 	chfs_change_size_unchecked(chmp, &chmp->chm_blocks[nref->nref_lnr], -totlen);
    209 	chfs_change_size_used(chmp, &chmp->chm_blocks[nref->nref_lnr], totlen);
    210 	mutex_exit(&chmp->chm_lock_sizes);
    211 	KASSERT(chmp->chm_blocks[nref->nref_lnr].used_size <= chmp->chm_ebh->eb_size);
    212 
    213 	err = 0;
    214 out:
    215 	kmem_free(buf, len);
    216 	return err;
    217 }
    218 
    219 int
    220 chfs_check_td_node(struct chfs_mount *chmp, struct chfs_tmp_dnode *td)
    221 {
    222 	int ret;
    223 
    224 	if (CHFS_REF_FLAGS(td->node->nref) != CHFS_UNCHECKED_NODE_MASK)
    225 		return 0;
    226 
    227 	ret = chfs_check_td_data(chmp, td);
    228 	return ret;
    229 }
    230 
    231 
    232 struct chfs_node_ref *
    233 chfs_first_valid_data_ref(struct chfs_node_ref *nref)
    234 {
    235 	while (nref) {
    236 		if (!CHFS_REF_OBSOLETE(nref)) {
    237 #ifdef DGB_MSG_GC
    238 			if (nref->nref_lnr == REF_EMPTY_NODE) {
    239 				dbg("FIRST VALID IS EMPTY!\n");
    240 			}
    241 #endif
    242 			return nref;
    243 		}
    244 
    245 		if (nref->nref_next) {
    246 			nref = nref->nref_next;
    247 		} else
    248 			break;
    249 	}
    250 	return NULL;
    251 }
    252 
    253 void
    254 chfs_add_tmp_dnode_to_tdi(struct chfs_tmp_dnode_info *tdi,
    255 	struct chfs_tmp_dnode *td)
    256 {
    257 	if (!tdi->tmpnode) {
    258 		tdi->tmpnode = td;
    259 	} else {
    260 		struct chfs_tmp_dnode *tmp = tdi->tmpnode;
    261 		while (tmp->next) {
    262 			tmp = tmp->next;
    263 		}
    264 		tmp->next = td;
    265 	}
    266 }
    267 
    268 void
    269 chfs_remove_tmp_dnode_from_tdi(struct chfs_tmp_dnode_info *tdi,
    270 	struct chfs_tmp_dnode *td)
    271 {
    272 	if (tdi->tmpnode == td) {
    273 		tdi->tmpnode = tdi->tmpnode->next;
    274 	} else {
    275 		struct chfs_tmp_dnode *tmp = tdi->tmpnode->next;
    276 		while (tmp->next && tmp->next != td) {
    277 			tmp = tmp->next;
    278 		}
    279 		if (tmp->next) {
    280 			tmp->next = td->next;
    281 		}
    282 	}
    283 }
    284 
    285 static void
    286 chfs_kill_td(struct chfs_mount *chmp,
    287     struct chfs_tmp_dnode *td)
    288 {
    289 	struct chfs_vnode_cache *vc;
    290 	if (td->node) {
    291 		mutex_enter(&chmp->chm_lock_vnocache);
    292 		vc = chfs_nref_to_vc(td->node->nref);
    293 		chfs_remove_and_obsolete(chmp, vc, td->node->nref, &vc->dnode);
    294 		mutex_exit(&chmp->chm_lock_vnocache);
    295 	}
    296 
    297 	chfs_free_tmp_dnode(td);
    298 }
    299 
    300 static void
    301 chfs_kill_tdi(struct chfs_mount *chmp,
    302     struct chfs_tmp_dnode_info *tdi)
    303 {
    304 	struct chfs_tmp_dnode *next, *tmp = tdi->tmpnode;
    305 
    306 	while (tmp) {
    307 		next = tmp->next;
    308 		chfs_kill_td(chmp, tmp);
    309 		tmp = next;
    310 	}
    311 
    312 	chfs_free_tmp_dnode_info(tdi);
    313 }
    314 
    315 int
    316 chfs_add_tmp_dnode_to_tree(struct chfs_mount *chmp,
    317     struct chfs_readinode_info *rii,
    318     struct chfs_tmp_dnode *newtd)
    319 {
    320 	uint64_t end_ofs = newtd->node->ofs + newtd->node->size;
    321 	struct chfs_tmp_dnode_info *this;
    322 	struct rb_node *node, *prev_node;
    323 	struct chfs_tmp_dnode_info *newtdi;
    324 
    325 	node = rb_tree_find_node(&rii->tdi_root, &newtd->node->ofs);
    326 	if (node) {
    327 		this = (struct chfs_tmp_dnode_info *)node;
    328 		while (this->tmpnode->overlapped) {
    329 			prev_node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
    330 			if (!prev_node) {
    331 				this->tmpnode->overlapped = 0;
    332 				break;
    333 			}
    334 			node = prev_node;
    335 			this = (struct chfs_tmp_dnode_info *)node;
    336 		}
    337 	}
    338 	while (node) {
    339 		this = (struct chfs_tmp_dnode_info *)node;
    340 		if (this->tmpnode->node->ofs > end_ofs)
    341 			break;
    342 
    343 		struct chfs_tmp_dnode *tmp_td = this->tmpnode;
    344 		while (tmp_td) {
    345 			if (tmp_td->version == newtd->version) {
    346 				if (!chfs_check_td_node(chmp, tmp_td)) {
    347 					dbg("calling kill td 0\n");
    348 					chfs_kill_td(chmp, newtd);
    349 					return 0;
    350 				} else {
    351 					chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    352 					chfs_kill_td(chmp, tmp_td);
    353 					chfs_add_tmp_dnode_to_tdi(this, newtd);
    354 					return 0;
    355 				}
    356 			}
    357 			if (tmp_td->version < newtd->version &&
    358 				tmp_td->node->ofs >= newtd->node->ofs &&
    359 				tmp_td->node->ofs + tmp_td->node->size <= end_ofs) {
    360 				/* New node entirely overlaps 'this' */
    361 				if (chfs_check_td_node(chmp, newtd)) {
    362 					dbg("calling kill td 2\n");
    363 					chfs_kill_td(chmp, newtd);
    364 					return 0;
    365 				}
    366 				/* ... and is good. Kill 'this' and any subsequent nodes which are also overlapped */
    367 				while (tmp_td && tmp_td->node->ofs + tmp_td->node->size <= end_ofs) {
    368 					struct rb_node *next = rb_tree_iterate(&rii->tdi_root, this, RB_DIR_RIGHT);
    369 					struct chfs_tmp_dnode_info *next_tdi = (struct chfs_tmp_dnode_info *)next;
    370 					struct chfs_tmp_dnode *next_td = NULL;
    371 					if (tmp_td->next) {
    372 						next_td = tmp_td->next;
    373 					} else if (next_tdi) {
    374 						next_td = next_tdi->tmpnode;
    375 					}
    376 					if (tmp_td->version < newtd->version) {
    377 						chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    378 						chfs_kill_td(chmp, tmp_td);
    379 						if (!this->tmpnode) {
    380 							rb_tree_remove_node(&rii->tdi_root, this);
    381 							chfs_kill_tdi(chmp, this);
    382 							this = next_tdi;
    383 						}
    384 					}
    385 					tmp_td = next_td;
    386 				}
    387 				continue;
    388 			}
    389 			if (tmp_td->version > newtd->version &&
    390 				tmp_td->node->ofs <= newtd->node->ofs &&
    391 				tmp_td->node->ofs + tmp_td->node->size >= end_ofs) {
    392 				/* New node entirely overlapped by 'this' */
    393 				if (!chfs_check_td_node(chmp, tmp_td)) {
    394 					dbg("this version: %llu\n",
    395 						(unsigned long long)tmp_td->version);
    396 					dbg("this ofs: %llu, size: %u\n",
    397 						(unsigned long long)tmp_td->node->ofs,
    398 						tmp_td->node->size);
    399 					dbg("calling kill td 4\n");
    400 					chfs_kill_td(chmp, newtd);
    401 					return 0;
    402 				}
    403 				/* ... but 'this' was bad. Replace it... */
    404 				chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    405 				chfs_kill_td(chmp, tmp_td);
    406 				if (!this->tmpnode) {
    407 					rb_tree_remove_node(&rii->tdi_root, this);
    408 					chfs_kill_tdi(chmp, this);
    409 				}
    410 				dbg("calling kill td 5\n");
    411 				chfs_kill_td(chmp, newtd);
    412 				break;
    413 			}
    414 			tmp_td = tmp_td->next;
    415 		}
    416 		node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
    417 	}
    418 
    419 	newtdi = chfs_alloc_tmp_dnode_info();
    420 	chfs_add_tmp_dnode_to_tdi(newtdi, newtd);
    421 	/* We neither completely obsoleted nor were completely
    422 	   obsoleted by an earlier node. Insert into the tree */
    423 	struct chfs_tmp_dnode_info *tmp_tdi = rb_tree_insert_node(&rii->tdi_root, newtdi);
    424 	if (tmp_tdi != newtdi) {
    425 		chfs_remove_tmp_dnode_from_tdi(newtdi, newtd);
    426 		chfs_add_tmp_dnode_to_tdi(tmp_tdi, newtd);
    427 		chfs_kill_tdi(chmp, newtdi);
    428 	}
    429 
    430 	/* If there's anything behind that overlaps us, note it */
    431 	node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
    432 	if (node) {
    433 		while (1) {
    434 			this = (struct chfs_tmp_dnode_info *)node;
    435 			if (this->tmpnode->node->ofs + this->tmpnode->node->size > newtd->node->ofs) {
    436 				newtd->overlapped = 1;
    437 			}
    438 			if (!this->tmpnode->overlapped)
    439 				break;
    440 
    441 			prev_node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
    442 			if (!prev_node) {
    443 				this->tmpnode->overlapped = 0;
    444 				break;
    445 			}
    446 			node = prev_node;
    447 		}
    448 	}
    449 
    450 	/* If the new node overlaps anything ahead, note it */
    451 	node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
    452 	this = (struct chfs_tmp_dnode_info *)node;
    453 	while (this && this->tmpnode->node->ofs < end_ofs) {
    454 		this->tmpnode->overlapped = 1;
    455 		node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
    456 		this = (struct chfs_tmp_dnode_info *)node;
    457 	}
    458 	return 0;
    459 }
    460 
    461 
    462 /*
    463  * --------------------
    464  * frag node operations
    465  * --------------------
    466  */
    467 struct chfs_node_frag *
    468 new_fragment(struct chfs_full_dnode *fdn, uint32_t ofs, uint32_t size)
    469 {
    470 	struct chfs_node_frag *newfrag;
    471 	newfrag = chfs_alloc_node_frag();
    472 	if (newfrag) {
    473 		newfrag->ofs = ofs;
    474 		newfrag->size = size;
    475 		newfrag->node = fdn;
    476 		if (newfrag->node) {
    477 			newfrag->node->frags++;
    478 		}
    479 	} else {
    480 		chfs_err("cannot allocate a chfs_node_frag object\n");
    481 	}
    482 	return newfrag;
    483 }
    484 
    485 int
    486 no_overlapping_node(struct rb_tree *fragtree,
    487     struct chfs_node_frag *newfrag,
    488     struct chfs_node_frag *this, uint32_t lastend)
    489 {
    490 	if (lastend < newfrag->node->ofs) {
    491 		struct chfs_node_frag *holefrag;
    492 
    493 		holefrag = new_fragment(NULL, lastend, newfrag->node->ofs - lastend);
    494 		if (!holefrag) {
    495 			chfs_free_node_frag(newfrag);
    496 			return ENOMEM;
    497 		}
    498 
    499 		rb_tree_insert_node(fragtree, holefrag);
    500 		this = holefrag;
    501 	}
    502 
    503 	rb_tree_insert_node(fragtree, newfrag);
    504 
    505 	return 0;
    506 }
    507 
    508 int
    509 chfs_add_frag_to_fragtree(struct chfs_mount *chmp,
    510     struct rb_tree *fragtree,
    511     struct chfs_node_frag *newfrag)
    512 {
    513 	struct chfs_node_frag *this;
    514 	uint32_t lastend;
    515 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    516 
    517 	this = (struct chfs_node_frag *)rb_tree_find_node_leq(fragtree, &newfrag->ofs);
    518 
    519 	if (this) {
    520 		lastend = this->ofs + this->size;
    521 	} else {
    522 		lastend = 0;
    523 	}
    524 
    525 	if (lastend <= newfrag->ofs) {
    526 		//dbg("no overlapping node\n");
    527 		if (lastend && (lastend - 1) >> PAGE_SHIFT == newfrag->ofs >> PAGE_SHIFT) {
    528 			if (this->node)
    529 				CHFS_MARK_REF_NORMAL(this->node->nref);
    530 			CHFS_MARK_REF_NORMAL(newfrag->node->nref);
    531 		}
    532 		return no_overlapping_node(fragtree, newfrag, this, lastend);
    533 	}
    534 
    535 	if (newfrag->ofs > this->ofs) {
    536 
    537 		CHFS_MARK_REF_NORMAL(newfrag->node->nref);
    538 		if (this->node)
    539 			CHFS_MARK_REF_NORMAL(this->node->nref);
    540 
    541 		if (this->ofs + this->size > newfrag->ofs + newfrag->size) {
    542 			/* newfrag is inside of this */
    543 			//dbg("newfrag is inside of this\n");
    544 			struct chfs_node_frag *newfrag2;
    545 
    546 			newfrag2 = new_fragment(this->node, newfrag->ofs + newfrag->size,
    547 			    this->ofs + this->size - newfrag->ofs - newfrag->size);
    548 			if (!newfrag2)
    549 				return ENOMEM;
    550 
    551 			this->size = newfrag->ofs - this->ofs;
    552 
    553 			rb_tree_insert_node(fragtree, newfrag);
    554 			rb_tree_insert_node(fragtree, newfrag2);
    555 
    556 			return 0;
    557 		}
    558 		/* newfrag is bottom of this */
    559 		//dbg("newfrag is bottom of this\n");
    560 		this->size = newfrag->ofs - this->ofs;
    561 		rb_tree_insert_node(fragtree, newfrag);
    562 	} else {
    563 		/* newfrag start at same point */
    564 		//dbg("newfrag start at same point\n");
    565 		//TODO replace instead of remove and insert
    566 		rb_tree_remove_node(fragtree, this);
    567 		rb_tree_insert_node(fragtree, newfrag);
    568 
    569 		if (newfrag->ofs + newfrag->size >= this->ofs+this->size) {
    570 			chfs_obsolete_node_frag(chmp, this);
    571 		} else {
    572 			this->ofs += newfrag->size;
    573 			this->size -= newfrag->size;
    574 
    575 			rb_tree_insert_node(fragtree, this);
    576 			return 0;
    577 		}
    578 	}
    579 	/* OK, now we have newfrag added in the correct place in the tree, but
    580 	   frag_next(newfrag) may be a fragment which is overlapped by it
    581 	*/
    582 	while ((this = frag_next(fragtree, newfrag)) && newfrag->ofs + newfrag->size >= this->ofs + this->size) {
    583 		rb_tree_remove_node(fragtree, this);
    584 		chfs_obsolete_node_frag(chmp, this);
    585 	}
    586 
    587 	if (!this || newfrag->ofs + newfrag->size == this->ofs)
    588 		return 0;
    589 
    590 	this->size = (this->ofs + this->size) - (newfrag->ofs + newfrag->size);
    591 	this->ofs = newfrag->ofs + newfrag->size;
    592 
    593 	if (this->node)
    594 		CHFS_MARK_REF_NORMAL(this->node->nref);
    595 	CHFS_MARK_REF_NORMAL(newfrag->node->nref);
    596 
    597 	return 0;
    598 }
    599 
    600 void
    601 chfs_remove_frags_of_node(struct chfs_mount *chmp, struct rb_tree *fragtree,
    602 	struct chfs_node_ref *nref)
    603 {
    604 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    605 	struct chfs_node_frag *this, *next;
    606 
    607 	this = (struct chfs_node_frag *)RB_TREE_MIN(fragtree);
    608 	while (this) {
    609 		next = frag_next(fragtree, this);
    610 		if (this->node->nref == nref) {
    611 			rb_tree_remove_node(fragtree, this);
    612 			chfs_free_node_frag(this);
    613 		}
    614 		this = next;
    615 	}
    616 }
    617 
    618 void
    619 chfs_kill_fragtree(struct chfs_mount *chmp, struct rb_tree *fragtree)
    620 {
    621 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    622 	struct chfs_node_frag *this, *next;
    623 	//dbg("start\n");
    624 
    625 	this = (struct chfs_node_frag *)RB_TREE_MIN(fragtree);
    626 	while (this) {
    627 		next = frag_next(fragtree, this);
    628 		rb_tree_remove_node(fragtree, this);
    629 		chfs_obsolete_node_frag(chmp, this);
    630 		//dbg("one frag killed\n");
    631 		this = next;
    632 	}
    633 	//dbg("end\n");
    634 }
    635 
    636 uint32_t
    637 chfs_truncate_fragtree(struct chfs_mount *chmp,
    638 	struct rb_tree *fragtree, uint32_t size)
    639 {
    640 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    641 	struct chfs_node_frag *frag;
    642 
    643 	dbg("truncate to size: %u\n", size);
    644 
    645 	frag = (struct chfs_node_frag *)rb_tree_find_node_leq(fragtree, &size);
    646 
    647 	/* Find the last frag before size and set its new size. */
    648 	if (frag && frag->ofs != size) {
    649 		if (frag->ofs + frag->size > size) {
    650 			frag->size = size - frag->ofs;
    651 		}
    652 		frag = frag_next(fragtree, frag);
    653 	}
    654 
    655 	/* Delete frags after new size. */
    656 	while (frag && frag->ofs >= size) {
    657 		struct chfs_node_frag *next = frag_next(fragtree, frag);
    658 
    659 		rb_tree_remove_node(fragtree, frag);
    660 		chfs_obsolete_node_frag(chmp, frag);
    661 		frag = next;
    662 	}
    663 
    664 	if (size == 0) {
    665 		return 0;
    666 	}
    667 
    668 	frag = frag_last(fragtree);
    669 
    670 	if (!frag) {
    671 		return 0;
    672 	}
    673 
    674 	if (frag->ofs + frag->size < size) {
    675 		return frag->ofs + frag->size;
    676 	}
    677 
    678 	/* FIXME Should we check the postion of the last node? (PAGE_CACHE size, etc.) */
    679 	if (frag->node && (frag->ofs & (PAGE_SIZE - 1)) == 0) {
    680 		frag->node->nref->nref_offset =
    681 			CHFS_GET_OFS(frag->node->nref->nref_offset) | CHFS_PRISTINE_NODE_MASK;
    682 	}
    683 
    684 	return size;
    685 }
    686 
    687 void
    688 chfs_obsolete_node_frag(struct chfs_mount *chmp,
    689     struct chfs_node_frag *this)
    690 {
    691 	struct chfs_vnode_cache *vc;
    692 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    693 	if (this->node) {
    694 		KASSERT(this->node->frags != 0);
    695 		this->node->frags--;
    696 		if (this->node->frags == 0) {
    697 			KASSERT(!CHFS_REF_OBSOLETE(this->node->nref));
    698 			mutex_enter(&chmp->chm_lock_vnocache);
    699 			vc = chfs_nref_to_vc(this->node->nref);
    700 			dbg("[MARK] lnr: %u ofs: %u\n", this->node->nref->nref_lnr,
    701 				this->node->nref->nref_offset);
    702 
    703 			chfs_remove_and_obsolete(chmp, vc, this->node->nref, &vc->dnode);
    704 			mutex_exit(&chmp->chm_lock_vnocache);
    705 
    706 			chfs_free_full_dnode(this->node);
    707 		} else {
    708 			CHFS_MARK_REF_NORMAL(this->node->nref);
    709 		}
    710 	}
    711 	chfs_free_node_frag(this);
    712 }
    713 
    714 int
    715 chfs_add_full_dnode_to_inode(struct chfs_mount *chmp,
    716     struct chfs_inode *ip,
    717     struct chfs_full_dnode *fd)
    718 {
    719 	int ret;
    720 	struct chfs_node_frag *newfrag;
    721 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    722 
    723 	if (unlikely(!fd->size))
    724 		return 0;
    725 
    726 	newfrag = new_fragment(fd, fd->ofs, fd->size);
    727 	if (unlikely(!newfrag))
    728 		return ENOMEM;
    729 
    730 	ret = chfs_add_frag_to_fragtree(chmp, &ip->fragtree, newfrag);
    731 	if (ret)
    732 		return ret;
    733 
    734 	if (newfrag->ofs & (PAGE_SIZE - 1)) {
    735 		struct chfs_node_frag *prev = frag_prev(&ip->fragtree, newfrag);
    736 
    737 		CHFS_MARK_REF_NORMAL(fd->nref);
    738 		if (prev->node)
    739 			CHFS_MARK_REF_NORMAL(prev->node->nref);
    740 	}
    741 
    742 	if ((newfrag->ofs+newfrag->size) & (PAGE_SIZE - 1)) {
    743 		struct chfs_node_frag *next = frag_next(&ip->fragtree, newfrag);
    744 
    745 		if (next) {
    746 			CHFS_MARK_REF_NORMAL(fd->nref);
    747 			if (next->node)
    748 				CHFS_MARK_REF_NORMAL(next->node->nref);
    749 		}
    750 	}
    751 
    752 	return 0;
    753 }
    754 
    755 
    756 /*
    757  * -----------------------
    758  * general node operations
    759  * -----------------------
    760  */
    761 /* get tmp nodes of an inode */
    762 int
    763 chfs_get_data_nodes(struct chfs_mount *chmp,
    764     struct chfs_inode *ip,
    765     struct chfs_readinode_info *rii)
    766 {
    767 	uint32_t crc;
    768 	int err;
    769 	size_t len, retlen;
    770 	struct chfs_node_ref *nref;
    771 	struct chfs_flash_data_node *dnode;
    772 	struct chfs_tmp_dnode *td;
    773 	char* buf;
    774 
    775 	len = sizeof(struct chfs_flash_data_node);
    776 	buf = kmem_alloc(len, KM_SLEEP);
    777 
    778 	dnode = kmem_alloc(len, KM_SLEEP);
    779 	if (!dnode)
    780 		return ENOMEM;
    781 
    782 	nref = chfs_first_valid_data_ref(ip->chvc->dnode);
    783 
    784 	rii->highest_version = ip->chvc->highest_version;
    785 
    786 	while(nref && (struct chfs_vnode_cache *)nref != ip->chvc) {
    787 		err = chfs_read_leb(chmp, nref->nref_lnr, buf, CHFS_GET_OFS(nref->nref_offset), len, &retlen);
    788 		if (err || len != retlen)
    789 			goto out;
    790 		dnode = (struct chfs_flash_data_node*)buf;
    791 
    792 		//check header crc
    793 		crc = crc32(0, (uint8_t *)dnode, CHFS_NODE_HDR_SIZE - 4);
    794 		if (crc != le32toh(dnode->hdr_crc)) {
    795 			chfs_err("CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->hdr_crc));
    796 			goto cont;
    797 		}
    798 		//check header magic bitmask
    799 		if (le16toh(dnode->magic) != CHFS_FS_MAGIC_BITMASK) {
    800 			chfs_err("Wrong magic bitmask.\n");
    801 			goto cont;
    802 		}
    803 		//check node crc
    804 		crc = crc32(0, (uint8_t *)dnode, sizeof(*dnode) - 4);
    805 		if (crc != le32toh(dnode->node_crc)) {
    806 			chfs_err("Node CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->node_crc));
    807 			goto cont;
    808 		}
    809 		td = chfs_alloc_tmp_dnode();
    810 		if (!td) {
    811 			chfs_err("Can't allocate tmp dnode info.\n");
    812 			err = ENOMEM;
    813 			goto out;
    814 		}
    815 		/* We don't check data crc here, just add nodes to tmp frag tree, because
    816 		 * we don't want to check nodes which have been overlapped by a new node
    817 		 * with a higher version number.
    818 		 */
    819 		td->node = chfs_alloc_full_dnode();
    820 		if (!td->node) {
    821 			chfs_err("Can't allocate full dnode info.\n");
    822 			err = ENOMEM;
    823 			goto out_tmp_dnode;
    824 		}
    825 		td->version = le64toh(dnode->version);
    826 		td->node->ofs = le64toh(dnode->offset);
    827 		td->data_crc = le32toh(dnode->data_crc);
    828 		td->node->nref = nref;
    829 		td->node->size = le32toh(dnode->data_length);
    830 		td->node->frags = 1;
    831 		td->overlapped = 0;
    832 
    833 		if (td->version > rii->highest_version) {
    834 			rii->highest_version = td->version;
    835 		}
    836 
    837 		err = chfs_add_tmp_dnode_to_tree(chmp, rii, td);
    838 		if (err)
    839 			goto out_full_dnode;
    840 
    841 cont:
    842 		nref = chfs_first_valid_data_ref(nref->nref_next);
    843 	}
    844 
    845 	ip->chvc->highest_version = rii->highest_version;
    846 	return 0;
    847 
    848 /* Exit points */
    849 out_full_dnode:
    850 	chfs_free_full_dnode(td->node);
    851 out_tmp_dnode:
    852 	chfs_free_tmp_dnode(td);
    853 out:
    854 	kmem_free(buf, len);
    855 	kmem_free(dnode, len);
    856 	return err;
    857 }
    858 
    859 
    860 /* Build final normal fragtree from tdi tree. */
    861 int
    862 chfs_build_fragtree(struct chfs_mount *chmp, struct chfs_inode *ip,
    863     struct chfs_readinode_info *rii)
    864 {
    865 	struct chfs_tmp_dnode_info *pen, *last, *this;
    866 	struct rb_tree ver_tree;    /* version tree */
    867 	uint64_t high_ver = 0;
    868 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    869 
    870 	rb_tree_init(&ver_tree, &tmp_node_rbtree_ops);
    871 
    872 	if (rii->mdata_tn) {
    873 		high_ver = rii->mdata_tn->tmpnode->version;
    874 		rii->latest_ref = rii->mdata_tn->tmpnode->node->nref;
    875 	}
    876 
    877 	pen = (struct chfs_tmp_dnode_info *)RB_TREE_MAX(&rii->tdi_root);
    878 
    879 	while((last = pen)) {
    880 		pen = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&rii->tdi_root, last, RB_DIR_LEFT);
    881 
    882 		rb_tree_remove_node(&rii->tdi_root, last);
    883 		rb_tree_insert_node(&ver_tree, last);
    884 
    885 		if (last->tmpnode->overlapped) {
    886 			if (pen)
    887 				continue;
    888 
    889 			last->tmpnode->overlapped = 0;
    890 		}
    891 
    892 		this = (struct chfs_tmp_dnode_info *)RB_TREE_MAX(&ver_tree);
    893 
    894 		while (this) {
    895 			struct chfs_tmp_dnode_info *vers_next;
    896 			int ret;
    897 
    898 			vers_next = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&ver_tree, this, RB_DIR_LEFT);
    899 			rb_tree_remove_node(&ver_tree, this);
    900 
    901 			struct chfs_tmp_dnode *tmp_td = this->tmpnode;
    902 			while (tmp_td) {
    903 				struct chfs_tmp_dnode *next_td = tmp_td->next;
    904 
    905 				if (chfs_check_td_node(chmp, tmp_td)) {
    906 					if (next_td) {
    907 						chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    908 						chfs_kill_td(chmp, tmp_td);
    909 					} else {
    910 						break;
    911 					}
    912 				} else {
    913 					if (tmp_td->version > high_ver) {
    914 						high_ver = tmp_td->version;
    915 						dbg("highver: %llu\n", (unsigned long long)high_ver);
    916 						rii->latest_ref = tmp_td->node->nref;
    917 					}
    918 
    919 					ret = chfs_add_full_dnode_to_inode(chmp, ip, tmp_td->node);
    920 					if (ret) {
    921 						while (1) {
    922 							vers_next = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&ver_tree, this, RB_DIR_LEFT);
    923 							while (tmp_td) {
    924 								next_td = tmp_td->next;
    925 
    926 								chfs_free_full_dnode(tmp_td->node);
    927 								chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    928 								chfs_kill_td(chmp, tmp_td);
    929 								tmp_td = next_td;
    930 							}
    931 							chfs_free_tmp_dnode_info(this);
    932 							this = vers_next;
    933 							if (!this)
    934 								break;
    935 							rb_tree_remove_node(&ver_tree, vers_next);
    936 							chfs_kill_tdi(chmp, vers_next);
    937 						}
    938 						return ret;
    939 					}
    940 
    941 					chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    942 					chfs_free_tmp_dnode(tmp_td);
    943 					// shouldn't obsolete tmp_td here,
    944 					// because tmp_td->node was added to the inode
    945 				}
    946 				tmp_td = next_td;
    947 			}
    948 			chfs_kill_tdi(chmp, this);
    949 			this = vers_next;
    950 		}
    951 	}
    952 
    953 	return 0;
    954 }
    955 
    956 int chfs_read_inode(struct chfs_mount *chmp, struct chfs_inode *ip)
    957 {
    958 	struct chfs_vnode_cache *vc = ip->chvc;
    959 
    960 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    961 
    962 retry:
    963 	mutex_enter(&chmp->chm_lock_vnocache);
    964 	switch (vc->state) {
    965 	case VNO_STATE_UNCHECKED:
    966 	case VNO_STATE_CHECKEDABSENT:
    967 		vc->state = VNO_STATE_READING;
    968 		break;
    969 	case VNO_STATE_CHECKING:
    970 	case VNO_STATE_GC:
    971 		mutex_exit(&chmp->chm_lock_vnocache);
    972 		//sleep_on_spinunlock(&chmp->chm_lock_vnocache);
    973 		//KASSERT(!mutex_owned(&chmp->chm_lock_vnocache));
    974 		goto retry;
    975 		break;
    976 	case VNO_STATE_PRESENT:
    977 	case VNO_STATE_READING:
    978 		chfs_err("Reading inode #%llu in state %d!\n",
    979 			(unsigned long long)vc->vno, vc->state);
    980 		chfs_err("wants to read a nonexistent ino %llu\n",
    981 			(unsigned long long)vc->vno);
    982 		return ENOENT;
    983 	default:
    984 		panic("BUG() Bad vno cache state.");
    985 	}
    986 	mutex_exit(&chmp->chm_lock_vnocache);
    987 
    988 	return chfs_read_inode_internal(chmp, ip);
    989 }
    990 
    991 /*
    992  * Read inode frags.
    993  * Firstly get tmp nodes,
    994  * secondly build fragtree from those.
    995  */
    996 int
    997 chfs_read_inode_internal(struct chfs_mount *chmp, struct chfs_inode *ip)
    998 {
    999 	int err;
   1000 	size_t len, retlen;
   1001 	char* buf;
   1002 	struct chfs_readinode_info rii;
   1003 	struct chfs_flash_vnode *fvnode;
   1004 
   1005 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
   1006 
   1007 	len = sizeof(*fvnode);
   1008 
   1009 	memset(&rii, 0, sizeof(rii));
   1010 
   1011 	rb_tree_init(&rii.tdi_root, &tmp_node_rbtree_ops);
   1012 
   1013 	/* build up a temp node frag tree */
   1014 	err = chfs_get_data_nodes(chmp, ip, &rii);
   1015 	if (err) {
   1016 		if (ip->chvc->state == VNO_STATE_READING)
   1017 			ip->chvc->state = VNO_STATE_CHECKEDABSENT;
   1018 		/* FIXME Should we kill fragtree or something here? */
   1019 		return err;
   1020 	}
   1021 
   1022 	rb_tree_init(&ip->fragtree, &frag_rbtree_ops);
   1023 	/*
   1024 	 * build fragtree from temp nodes
   1025 	 */
   1026 	err = chfs_build_fragtree(chmp, ip, &rii);
   1027 	if (err) {
   1028 		if (ip->chvc->state == VNO_STATE_READING)
   1029 			ip->chvc->state = VNO_STATE_CHECKEDABSENT;
   1030 		/* FIXME Should we kill fragtree or something here? */
   1031 		return err;
   1032 	}
   1033 
   1034 	if (!rii.latest_ref) {
   1035 		return 0;
   1036 	}
   1037 
   1038 	buf = kmem_alloc(len, KM_SLEEP);
   1039 	if (!buf)
   1040 		return ENOMEM;
   1041 
   1042 	/*
   1043 	 * set inode size from chvc->v
   1044 	 */
   1045 	err = chfs_read_leb(chmp, ip->chvc->v->nref_lnr, buf, CHFS_GET_OFS(ip->chvc->v->nref_offset), len, &retlen);
   1046 	if (err || retlen != len) {
   1047 		kmem_free(buf, len);
   1048 		return err?err:EIO;
   1049 	}
   1050 
   1051 	fvnode = (struct chfs_flash_vnode*)buf;
   1052 
   1053 	dbg("set size from v: %u\n", fvnode->dn_size);
   1054 	chfs_set_vnode_size(ITOV(ip), fvnode->dn_size);
   1055 	uint32_t retsize = chfs_truncate_fragtree(chmp, &ip->fragtree, fvnode->dn_size);
   1056 	if (retsize != fvnode->dn_size) {
   1057 		dbg("Truncating failed. It is %u instead of %u\n", retsize, fvnode->dn_size);
   1058 	}
   1059 
   1060 	kmem_free(buf, len);
   1061 
   1062 	if (ip->chvc->state == VNO_STATE_READING) {
   1063 		ip->chvc->state = VNO_STATE_PRESENT;
   1064 	}
   1065 
   1066 	return 0;
   1067 }
   1068 
   1069 int
   1070 chfs_read_data(struct chfs_mount* chmp, struct vnode *vp,
   1071     struct buf *bp)
   1072 {
   1073 	off_t ofs;
   1074 	struct chfs_node_frag *frag;
   1075 	char * buf;
   1076 	int err = 0;
   1077 	size_t size, retlen;
   1078 	uint32_t crc;
   1079 	struct chfs_inode *ip = VTOI(vp);
   1080 	struct chfs_flash_data_node *dnode;
   1081 	struct chfs_node_ref *nref;
   1082 
   1083 	memset(bp->b_data, 0, bp->b_bcount);
   1084 
   1085 	ofs = bp->b_blkno * PAGE_SIZE;
   1086 	frag = (struct chfs_node_frag *)rb_tree_find_node_leq(&ip->fragtree, &ofs);
   1087 
   1088 	if (!frag || frag->ofs > ofs || frag->ofs + frag->size <= ofs) {
   1089 		dbg("not found in frag tree\n");
   1090 		return 0;
   1091 	}
   1092 
   1093 	if (!frag->node) {
   1094 		dbg("no node in frag\n");
   1095 		return 0;
   1096 	}
   1097 
   1098 	nref = frag->node->nref;
   1099 
   1100 	size = sizeof(*dnode) + frag->size;
   1101 
   1102 	buf = kmem_alloc(size, KM_SLEEP);
   1103 
   1104 	dbg("reading from lnr: %u, offset: %u, size: %zu\n", nref->nref_lnr, CHFS_GET_OFS(nref->nref_offset), size);
   1105 	err = chfs_read_leb(chmp, nref->nref_lnr, buf, CHFS_GET_OFS(nref->nref_offset), size, &retlen);
   1106 	if (err) {
   1107 		chfs_err("error after reading: %d\n", err);
   1108 		goto out;
   1109 	}
   1110 	if (retlen != size) {
   1111 		chfs_err("retlen: %zu != size: %zu\n", retlen, size);
   1112 		err = EIO;
   1113 		goto out;
   1114 	}
   1115 
   1116 	dnode = (struct chfs_flash_data_node *)buf;
   1117 	crc = crc32(0, (uint8_t *)dnode, CHFS_NODE_HDR_SIZE - 4);
   1118 	if (crc != le32toh(dnode->hdr_crc)) {
   1119 		chfs_err("CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->hdr_crc));
   1120 		err = EIO;
   1121 		goto out;
   1122 	}
   1123 	//check header magic bitmask
   1124 	if (le16toh(dnode->magic) != CHFS_FS_MAGIC_BITMASK) {
   1125 		chfs_err("Wrong magic bitmask.\n");
   1126 		err = EIO;
   1127 		goto out;
   1128 	}
   1129 	//check node crc
   1130 	crc = crc32(0, (uint8_t *)dnode, sizeof(*dnode) - 4);
   1131 	if (crc != le32toh(dnode->node_crc)) {
   1132 		chfs_err("Node CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->node_crc));
   1133 		err = EIO;
   1134 		goto out;
   1135 	}
   1136 	crc = crc32(0, (uint8_t *)dnode->data, dnode->data_length);
   1137 	if (crc != le32toh(dnode->data_crc)) {
   1138 		chfs_err("Data CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->data_crc));
   1139 		err = EIO;
   1140 		goto out;
   1141 	}
   1142 
   1143 	memcpy(bp->b_data, dnode->data, dnode->data_length);
   1144 	bp->b_resid = 0;
   1145 
   1146 out:
   1147 	kmem_free(buf, size);
   1148 	return err;
   1149 }
   1150