Home | History | Annotate | Line # | Download | only in chfs
chfs_readinode.c revision 1.5
      1 /*	$NetBSD: chfs_readinode.c,v 1.5 2012/08/22 09:20:13 ttoth Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2010 Department of Software Engineering,
      5  *		      University of Szeged, Hungary
      6  * Copyright (C) 2010 David Tengeri <dtengeri (at) inf.u-szeged.hu>
      7  * Copyright (C) 2010 Tamas Toth <ttoth (at) inf.u-szeged.hu>
      8  * Copyright (C) 2010 Adam Hoka <ahoka (at) NetBSD.org>
      9  * All rights reserved.
     10  *
     11  * This code is derived from software contributed to The NetBSD Foundation
     12  * by the Department of Software Engineering, University of Szeged, Hungary
     13  *
     14  * Redistribution and use in source and binary forms, with or without
     15  * modification, are permitted provided that the following conditions
     16  * are met:
     17  * 1. Redistributions of source code must retain the above copyright
     18  *    notice, this list of conditions and the following disclaimer.
     19  * 2. Redistributions in binary form must reproduce the above copyright
     20  *    notice, this list of conditions and the following disclaimer in the
     21  *    documentation and/or other materials provided with the distribution.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 /*
     37  * chfs_readinode.c
     38  *
     39  *  Created on: 2010.05.31.
     40  *      Author: dtengeri
     41  */
     42 
     43 #include <sys/buf.h>
     44 
     45 #include "chfs.h"
     46 
     47 /* tmp node operations */
     48 int chfs_check_td_data(struct chfs_mount *,
     49     struct chfs_tmp_dnode *);
     50 int chfs_check_td_node(struct chfs_mount *,
     51     struct chfs_tmp_dnode *);
     52 struct chfs_node_ref *chfs_first_valid_data_ref(struct chfs_node_ref *);
     53 int chfs_add_tmp_dnode_to_tree(struct chfs_mount *,
     54     struct chfs_readinode_info *,
     55     struct chfs_tmp_dnode *);
     56 void chfs_add_tmp_dnode_to_tdi(struct chfs_tmp_dnode_info *,
     57 	struct chfs_tmp_dnode *);
     58 void chfs_remove_tmp_dnode_from_tdi(struct chfs_tmp_dnode_info *,
     59 	struct chfs_tmp_dnode *);
     60 static void chfs_kill_td(struct chfs_mount *,
     61     struct chfs_tmp_dnode *);
     62 static void chfs_kill_tdi(struct chfs_mount *,
     63     struct chfs_tmp_dnode_info *);
     64 /* frag node operations */
     65 struct chfs_node_frag *new_fragment(struct chfs_full_dnode *,
     66     uint32_t,
     67     uint32_t);
     68 int no_overlapping_node(struct rb_tree *, struct chfs_node_frag *,
     69     struct chfs_node_frag *, uint32_t);
     70 int chfs_add_frag_to_fragtree(struct chfs_mount *,
     71     struct rb_tree *,
     72     struct chfs_node_frag *);
     73 void chfs_obsolete_node_frag(struct chfs_mount *,
     74     struct chfs_node_frag *);
     75 /* general node operations */
     76 int chfs_get_data_nodes(struct chfs_mount *,
     77     struct chfs_inode *,
     78     struct chfs_readinode_info *);
     79 int chfs_build_fragtree(struct chfs_mount *,
     80     struct chfs_inode *,
     81     struct chfs_readinode_info *);
     82 
     83 
     84 
     85 /*
     86  * --------------------------
     87  * tmp node rbtree operations
     88  * --------------------------
     89  */
     90 static signed int
     91 tmp_node_compare_nodes(void *ctx, const void *n1, const void *n2)
     92 {
     93 	const struct chfs_tmp_dnode_info *tdi1 = n1;
     94 	const struct chfs_tmp_dnode_info *tdi2 = n2;
     95 
     96 	return (tdi1->tmpnode->node->ofs - tdi2->tmpnode->node->ofs);
     97 }
     98 
     99 static signed int
    100 tmp_node_compare_key(void *ctx, const void *n, const void *key)
    101 {
    102 	const struct chfs_tmp_dnode_info *tdi = n;
    103 	uint64_t ofs =  *(const uint64_t *)key;
    104 
    105 	return (tdi->tmpnode->node->ofs - ofs);
    106 }
    107 
    108 const rb_tree_ops_t tmp_node_rbtree_ops = {
    109 	.rbto_compare_nodes = tmp_node_compare_nodes,
    110 	.rbto_compare_key = tmp_node_compare_key,
    111 	.rbto_node_offset = offsetof(struct chfs_tmp_dnode_info, rb_node),
    112 	.rbto_context = NULL
    113 };
    114 
    115 
    116 /*
    117  * ---------------------------
    118  * frag node rbtree operations
    119  * ---------------------------
    120  */
    121 static signed int
    122 frag_compare_nodes(void *ctx, const void *n1, const void *n2)
    123 {
    124 	const struct chfs_node_frag *frag1 = n1;
    125 	const struct chfs_node_frag *frag2 = n2;
    126 
    127 	return (frag1->ofs - frag2->ofs);
    128 }
    129 
    130 static signed int
    131 frag_compare_key(void *ctx, const void *n, const void *key)
    132 {
    133 	const struct chfs_node_frag *frag = n;
    134 	uint64_t ofs = *(const uint64_t *)key;
    135 
    136 	return (frag->ofs - ofs);
    137 }
    138 
    139 const rb_tree_ops_t frag_rbtree_ops = {
    140 	.rbto_compare_nodes = frag_compare_nodes,
    141 	.rbto_compare_key   = frag_compare_key,
    142 	.rbto_node_offset = offsetof(struct chfs_node_frag, rb_node),
    143 	.rbto_context = NULL
    144 };
    145 
    146 
    147 /*
    148  * -------------------
    149  * tmp node operations
    150  * -------------------
    151  */
    152 /*
    153  * Check the data CRC of the node.
    154  *
    155  * Returns: 0 - if everything OK;
    156  * 	    	1 - if CRC is incorrect;
    157  * 	    	2 - else;
    158  *	    	error code if an error occured.
    159  */
    160 int
    161 chfs_check_td_data(struct chfs_mount *chmp,
    162     struct chfs_tmp_dnode *td)
    163 {
    164 	int err;
    165 	size_t retlen, len, totlen;
    166 	uint32_t crc;
    167 	uint64_t ofs;
    168 	char *buf;
    169 	struct chfs_node_ref *nref = td->node->nref;
    170 
    171 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    172 	KASSERT(!mutex_owned(&chmp->chm_lock_sizes));
    173 
    174 	ofs = CHFS_GET_OFS(nref->nref_offset) + sizeof(struct chfs_flash_data_node);
    175 	len = td->node->size;
    176 	if (!len)
    177 		return 0;
    178 
    179 	buf = kmem_alloc(len, KM_SLEEP);
    180 	if (!buf) {
    181 		dbg("allocating error\n");
    182 		return 2;
    183 	}
    184 	err = chfs_read_leb(chmp, nref->nref_lnr, buf, ofs, len, &retlen);
    185 	if (err) {
    186 		dbg("error wile reading: %d\n", err);
    187 		err = 2;
    188 		goto out;
    189 	}
    190 
    191 	if (len != retlen) {
    192 		dbg("len:%zu, retlen:%zu\n", len, retlen);
    193 		err = 2;
    194 		goto out;
    195 	}
    196 	crc = crc32(0, (uint8_t *)buf, len);
    197 
    198 	if (crc != td->data_crc) {
    199 		dbg("crc failed, calculated: 0x%x, orig: 0x%x\n", crc, td->data_crc);
    200 		kmem_free(buf, len);
    201 		return 1;
    202 	}
    203 
    204 	CHFS_MARK_REF_NORMAL(nref);
    205 	totlen = CHFS_PAD(sizeof(struct chfs_flash_data_node) + len);
    206 
    207 	mutex_enter(&chmp->chm_lock_sizes);
    208 	chfs_change_size_unchecked(chmp, &chmp->chm_blocks[nref->nref_lnr], -totlen);
    209 	chfs_change_size_used(chmp, &chmp->chm_blocks[nref->nref_lnr], totlen);
    210 	mutex_exit(&chmp->chm_lock_sizes);
    211 	KASSERT(chmp->chm_blocks[nref->nref_lnr].used_size <= chmp->chm_ebh->eb_size);
    212 
    213 	err = 0;
    214 out:
    215 	kmem_free(buf, len);
    216 	return err;
    217 }
    218 
    219 int
    220 chfs_check_td_node(struct chfs_mount *chmp, struct chfs_tmp_dnode *td)
    221 {
    222 	int ret;
    223 
    224 	if (CHFS_REF_FLAGS(td->node->nref) != CHFS_UNCHECKED_NODE_MASK)
    225 		return 0;
    226 
    227 	ret = chfs_check_td_data(chmp, td);
    228 	return ret;
    229 }
    230 
    231 
    232 struct chfs_node_ref *
    233 chfs_first_valid_data_ref(struct chfs_node_ref *nref)
    234 {
    235 	while (nref) {
    236 		if (!CHFS_REF_OBSOLETE(nref)) {
    237 #ifdef DGB_MSG_GC
    238 			if (nref->nref_lnr == REF_EMPTY_NODE) {
    239 				dbg("FIRST VALID IS EMPTY!\n");
    240 			}
    241 #endif
    242 			return nref;
    243 		}
    244 
    245 		if (nref->nref_next) {
    246 			nref = nref->nref_next;
    247 		} else
    248 			break;
    249 	}
    250 	return NULL;
    251 }
    252 
    253 void
    254 chfs_add_tmp_dnode_to_tdi(struct chfs_tmp_dnode_info *tdi,
    255 	struct chfs_tmp_dnode *td)
    256 {
    257 	if (!tdi->tmpnode) {
    258 		tdi->tmpnode = td;
    259 	} else {
    260 		struct chfs_tmp_dnode *tmp = tdi->tmpnode;
    261 		while (tmp->next) {
    262 			tmp = tmp->next;
    263 		}
    264 		tmp->next = td;
    265 	}
    266 }
    267 
    268 void
    269 chfs_remove_tmp_dnode_from_tdi(struct chfs_tmp_dnode_info *tdi,
    270 	struct chfs_tmp_dnode *td)
    271 {
    272 	if (tdi->tmpnode == td) {
    273 		tdi->tmpnode = tdi->tmpnode->next;
    274 	} else {
    275 		struct chfs_tmp_dnode *tmp = tdi->tmpnode->next;
    276 		while (tmp->next && tmp->next != td) {
    277 			tmp = tmp->next;
    278 		}
    279 		if (tmp->next) {
    280 			tmp->next = td->next;
    281 		}
    282 	}
    283 }
    284 
    285 static void
    286 chfs_kill_td(struct chfs_mount *chmp,
    287     struct chfs_tmp_dnode *td)
    288 {
    289 	struct chfs_vnode_cache *vc;
    290 	if (td->node) {
    291 		mutex_enter(&chmp->chm_lock_vnocache);
    292 		vc = chfs_nref_to_vc(td->node->nref);
    293 		chfs_remove_and_obsolete(chmp, vc, td->node->nref, &vc->dnode);
    294 		mutex_exit(&chmp->chm_lock_vnocache);
    295 	}
    296 
    297 	chfs_free_tmp_dnode(td);
    298 }
    299 
    300 static void
    301 chfs_kill_tdi(struct chfs_mount *chmp,
    302     struct chfs_tmp_dnode_info *tdi)
    303 {
    304 	struct chfs_tmp_dnode *next, *tmp = tdi->tmpnode;
    305 
    306 	while (tmp) {
    307 		next = tmp->next;
    308 		chfs_kill_td(chmp, tmp);
    309 		tmp = next;
    310 	}
    311 
    312 	chfs_free_tmp_dnode_info(tdi);
    313 }
    314 
    315 int
    316 chfs_add_tmp_dnode_to_tree(struct chfs_mount *chmp,
    317     struct chfs_readinode_info *rii,
    318     struct chfs_tmp_dnode *newtd)
    319 {
    320 	uint64_t end_ofs = newtd->node->ofs + newtd->node->size;
    321 	struct chfs_tmp_dnode_info *this;
    322 	struct rb_node *node, *prev_node;
    323 	struct chfs_tmp_dnode_info *newtdi;
    324 
    325 	node = rb_tree_find_node(&rii->tdi_root, &newtd->node->ofs);
    326 	if (node) {
    327 		this = (struct chfs_tmp_dnode_info *)node;
    328 		while (this->tmpnode->overlapped) {
    329 			prev_node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
    330 			if (!prev_node) {
    331 				this->tmpnode->overlapped = 0;
    332 				break;
    333 			}
    334 			node = prev_node;
    335 			this = (struct chfs_tmp_dnode_info *)node;
    336 		}
    337 	}
    338 	while (node) {
    339 		this = (struct chfs_tmp_dnode_info *)node;
    340 		if (this->tmpnode->node->ofs > end_ofs)
    341 			break;
    342 
    343 		struct chfs_tmp_dnode *tmp_td = this->tmpnode;
    344 		while (tmp_td) {
    345 			if (tmp_td->version == newtd->version) {
    346 				if (!chfs_check_td_node(chmp, tmp_td)) {
    347 					dbg("calling kill td 0\n");
    348 					chfs_kill_td(chmp, newtd);
    349 					return 0;
    350 				} else {
    351 					chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    352 					chfs_kill_td(chmp, tmp_td);
    353 					chfs_add_tmp_dnode_to_tdi(this, newtd);
    354 					return 0;
    355 				}
    356 			}
    357 			if (tmp_td->version < newtd->version &&
    358 				tmp_td->node->ofs >= newtd->node->ofs &&
    359 				tmp_td->node->ofs + tmp_td->node->size <= end_ofs) {
    360 				/* New node entirely overlaps 'this' */
    361 				if (chfs_check_td_node(chmp, newtd)) {
    362 					dbg("calling kill td 2\n");
    363 					chfs_kill_td(chmp, newtd);
    364 					return 0;
    365 				}
    366 				/* ... and is good. Kill 'this' and any subsequent nodes which are also overlapped */
    367 				while (tmp_td && tmp_td->node->ofs + tmp_td->node->size <= end_ofs) {
    368 					struct rb_node *next = rb_tree_iterate(&rii->tdi_root, this, RB_DIR_RIGHT);
    369 					struct chfs_tmp_dnode_info *next_tdi = (struct chfs_tmp_dnode_info *)next;
    370 					struct chfs_tmp_dnode *next_td = NULL;
    371 					if (tmp_td->next) {
    372 						next_td = tmp_td->next;
    373 					} else if (next_tdi) {
    374 						next_td = next_tdi->tmpnode;
    375 					}
    376 					if (tmp_td->version < newtd->version) {
    377 						chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    378 						chfs_kill_td(chmp, tmp_td);
    379 						if (!this->tmpnode) {
    380 							rb_tree_remove_node(&rii->tdi_root, this);
    381 							chfs_kill_tdi(chmp, this);
    382 							this = next_tdi;
    383 						}
    384 					}
    385 					tmp_td = next_td;
    386 				}
    387 				continue;
    388 			}
    389 			if (tmp_td->version > newtd->version &&
    390 				tmp_td->node->ofs <= newtd->node->ofs &&
    391 				tmp_td->node->ofs + tmp_td->node->size >= end_ofs) {
    392 				/* New node entirely overlapped by 'this' */
    393 				if (!chfs_check_td_node(chmp, tmp_td)) {
    394 					dbg("this version: %llu\n",
    395 						(unsigned long long)tmp_td->version);
    396 					dbg("this ofs: %llu, size: %u\n",
    397 						(unsigned long long)tmp_td->node->ofs,
    398 						tmp_td->node->size);
    399 					dbg("calling kill td 4\n");
    400 					chfs_kill_td(chmp, newtd);
    401 					return 0;
    402 				}
    403 				/* ... but 'this' was bad. Replace it... */
    404 				chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    405 				chfs_kill_td(chmp, tmp_td);
    406 				if (!this->tmpnode) {
    407 					rb_tree_remove_node(&rii->tdi_root, this);
    408 					chfs_kill_tdi(chmp, this);
    409 				}
    410 				dbg("calling kill td 5\n");
    411 				chfs_kill_td(chmp, newtd);
    412 				break;
    413 			}
    414 			tmp_td = tmp_td->next;
    415 		}
    416 		node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
    417 	}
    418 
    419 	newtdi = chfs_alloc_tmp_dnode_info();
    420 	chfs_add_tmp_dnode_to_tdi(newtdi, newtd);
    421 	/* We neither completely obsoleted nor were completely
    422 	   obsoleted by an earlier node. Insert into the tree */
    423 	struct chfs_tmp_dnode_info *tmp_tdi = rb_tree_insert_node(&rii->tdi_root, newtdi);
    424 	if (tmp_tdi != newtdi) {
    425 		chfs_remove_tmp_dnode_from_tdi(newtdi, newtd);
    426 		chfs_add_tmp_dnode_to_tdi(tmp_tdi, newtd);
    427 		chfs_kill_tdi(chmp, newtdi);
    428 	}
    429 
    430 	/* If there's anything behind that overlaps us, note it */
    431 	node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
    432 	if (node) {
    433 		while (1) {
    434 			this = (struct chfs_tmp_dnode_info *)node;
    435 			if (this->tmpnode->node->ofs + this->tmpnode->node->size > newtd->node->ofs) {
    436 				newtd->overlapped = 1;
    437 			}
    438 			if (!this->tmpnode->overlapped)
    439 				break;
    440 
    441 			prev_node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
    442 			if (!prev_node) {
    443 				this->tmpnode->overlapped = 0;
    444 				break;
    445 			}
    446 			node = prev_node;
    447 		}
    448 	}
    449 
    450 	/* If the new node overlaps anything ahead, note it */
    451 	node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
    452 	this = (struct chfs_tmp_dnode_info *)node;
    453 	while (this && this->tmpnode->node->ofs < end_ofs) {
    454 		this->tmpnode->overlapped = 1;
    455 		node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
    456 		this = (struct chfs_tmp_dnode_info *)node;
    457 	}
    458 	return 0;
    459 }
    460 
    461 
    462 /*
    463  * --------------------
    464  * frag node operations
    465  * --------------------
    466  */
    467 struct chfs_node_frag *
    468 new_fragment(struct chfs_full_dnode *fdn, uint32_t ofs, uint32_t size)
    469 {
    470 	struct chfs_node_frag *newfrag;
    471 	newfrag = chfs_alloc_node_frag();
    472 	if (newfrag) {
    473 		newfrag->ofs = ofs;
    474 		newfrag->size = size;
    475 		newfrag->node = fdn;
    476 		if (newfrag->node) {
    477 			newfrag->node->frags++;
    478 		}
    479 	} else {
    480 		chfs_err("cannot allocate a chfs_node_frag object\n");
    481 	}
    482 	return newfrag;
    483 }
    484 
    485 int
    486 no_overlapping_node(struct rb_tree *fragtree,
    487     struct chfs_node_frag *newfrag,
    488     struct chfs_node_frag *this, uint32_t lastend)
    489 {
    490 	if (lastend < newfrag->node->ofs) {
    491 		struct chfs_node_frag *holefrag;
    492 
    493 		holefrag = new_fragment(NULL, lastend, newfrag->node->ofs - lastend);
    494 		if (!holefrag) {
    495 			chfs_free_node_frag(newfrag);
    496 			return ENOMEM;
    497 		}
    498 
    499 		rb_tree_insert_node(fragtree, holefrag);
    500 		this = holefrag;
    501 	}
    502 
    503 	rb_tree_insert_node(fragtree, newfrag);
    504 
    505 	return 0;
    506 }
    507 
    508 int
    509 chfs_add_frag_to_fragtree(struct chfs_mount *chmp,
    510     struct rb_tree *fragtree,
    511     struct chfs_node_frag *newfrag)
    512 {
    513 	struct chfs_node_frag *this;
    514 	uint32_t lastend;
    515 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    516 
    517 	this = (struct chfs_node_frag *)rb_tree_find_node_leq(fragtree, &newfrag->ofs);
    518 
    519 	if (this) {
    520 		lastend = this->ofs + this->size;
    521 	} else {
    522 		lastend = 0;
    523 	}
    524 
    525 	if (lastend <= newfrag->ofs) {
    526 		//dbg("no overlapping node\n");
    527 		if (lastend && (lastend - 1) >> PAGE_SHIFT == newfrag->ofs >> PAGE_SHIFT) {
    528 			if (this->node)
    529 				CHFS_MARK_REF_NORMAL(this->node->nref);
    530 			CHFS_MARK_REF_NORMAL(newfrag->node->nref);
    531 		}
    532 		return no_overlapping_node(fragtree, newfrag, this, lastend);
    533 	}
    534 
    535 	if (newfrag->ofs > this->ofs) {
    536 
    537 		CHFS_MARK_REF_NORMAL(newfrag->node->nref);
    538 		if (this->node)
    539 			CHFS_MARK_REF_NORMAL(this->node->nref);
    540 
    541 		if (this->ofs + this->size > newfrag->ofs + newfrag->size) {
    542 			/* newfrag is inside of this */
    543 			//dbg("newfrag is inside of this\n");
    544 			struct chfs_node_frag *newfrag2;
    545 
    546 			newfrag2 = new_fragment(this->node, newfrag->ofs + newfrag->size,
    547 			    this->ofs + this->size - newfrag->ofs - newfrag->size);
    548 			if (!newfrag2)
    549 				return ENOMEM;
    550 
    551 			this->size = newfrag->ofs - this->ofs;
    552 
    553 			rb_tree_insert_node(fragtree, newfrag);
    554 			rb_tree_insert_node(fragtree, newfrag2);
    555 
    556 			return 0;
    557 		}
    558 		/* newfrag is bottom of this */
    559 		//dbg("newfrag is bottom of this\n");
    560 		this->size = newfrag->ofs - this->ofs;
    561 		rb_tree_insert_node(fragtree, newfrag);
    562 	} else {
    563 		/* newfrag start at same point */
    564 		//dbg("newfrag start at same point\n");
    565 		//TODO replace instead of remove and insert
    566 		rb_tree_remove_node(fragtree, this);
    567 		rb_tree_insert_node(fragtree, newfrag);
    568 
    569 		if (newfrag->ofs + newfrag->size >= this->ofs+this->size) {
    570 			chfs_obsolete_node_frag(chmp, this);
    571 		} else {
    572 			this->ofs += newfrag->size;
    573 			this->size -= newfrag->size;
    574 
    575 			rb_tree_insert_node(fragtree, this);
    576 			return 0;
    577 		}
    578 	}
    579 	/* OK, now we have newfrag added in the correct place in the tree, but
    580 	   frag_next(newfrag) may be a fragment which is overlapped by it
    581 	*/
    582 	while ((this = frag_next(fragtree, newfrag)) && newfrag->ofs + newfrag->size >= this->ofs + this->size) {
    583 		rb_tree_remove_node(fragtree, this);
    584 		chfs_obsolete_node_frag(chmp, this);
    585 	}
    586 
    587 	if (!this || newfrag->ofs + newfrag->size == this->ofs)
    588 		return 0;
    589 
    590 	this->size = (this->ofs + this->size) - (newfrag->ofs + newfrag->size);
    591 	this->ofs = newfrag->ofs + newfrag->size;
    592 
    593 	if (this->node)
    594 		CHFS_MARK_REF_NORMAL(this->node->nref);
    595 	CHFS_MARK_REF_NORMAL(newfrag->node->nref);
    596 
    597 	return 0;
    598 }
    599 
    600 void
    601 chfs_remove_frags_of_node(struct chfs_mount *chmp, struct rb_tree *fragtree,
    602 	struct chfs_node_ref *nref)
    603 {
    604 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    605 	struct chfs_node_frag *this, *next;
    606 
    607 	if (nref == NULL) {
    608 		return;
    609 	}
    610 
    611 	this = (struct chfs_node_frag *)RB_TREE_MIN(fragtree);
    612 	while (this) {
    613 		next = frag_next(fragtree, this);
    614 		if (this->node->nref == nref) {
    615 			rb_tree_remove_node(fragtree, this);
    616 			chfs_free_node_frag(this);
    617 		}
    618 		this = next;
    619 	}
    620 }
    621 
    622 void
    623 chfs_kill_fragtree(struct chfs_mount *chmp, struct rb_tree *fragtree)
    624 {
    625 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    626 	struct chfs_node_frag *this, *next;
    627 	//dbg("start\n");
    628 
    629 	this = (struct chfs_node_frag *)RB_TREE_MIN(fragtree);
    630 	while (this) {
    631 		next = frag_next(fragtree, this);
    632 		rb_tree_remove_node(fragtree, this);
    633 		chfs_obsolete_node_frag(chmp, this);
    634 		//dbg("one frag killed\n");
    635 		this = next;
    636 	}
    637 	//dbg("end\n");
    638 }
    639 
    640 uint32_t
    641 chfs_truncate_fragtree(struct chfs_mount *chmp,
    642 	struct rb_tree *fragtree, uint32_t size)
    643 {
    644 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    645 	struct chfs_node_frag *frag;
    646 
    647 	dbg("truncate to size: %u\n", size);
    648 
    649 	frag = (struct chfs_node_frag *)rb_tree_find_node_leq(fragtree, &size);
    650 
    651 	/* Find the last frag before size and set its new size. */
    652 	if (frag && frag->ofs != size) {
    653 		if (frag->ofs + frag->size > size) {
    654 			frag->size = size - frag->ofs;
    655 		}
    656 		frag = frag_next(fragtree, frag);
    657 	}
    658 
    659 	/* Delete frags after new size. */
    660 	while (frag && frag->ofs >= size) {
    661 		struct chfs_node_frag *next = frag_next(fragtree, frag);
    662 
    663 		rb_tree_remove_node(fragtree, frag);
    664 		chfs_obsolete_node_frag(chmp, frag);
    665 		frag = next;
    666 	}
    667 
    668 	if (size == 0) {
    669 		return 0;
    670 	}
    671 
    672 	frag = frag_last(fragtree);
    673 
    674 	if (!frag) {
    675 		return 0;
    676 	}
    677 
    678 	if (frag->ofs + frag->size < size) {
    679 		return frag->ofs + frag->size;
    680 	}
    681 
    682 	/* FIXME Should we check the postion of the last node? (PAGE_CACHE size, etc.) */
    683 	if (frag->node && (frag->ofs & (PAGE_SIZE - 1)) == 0) {
    684 		frag->node->nref->nref_offset =
    685 			CHFS_GET_OFS(frag->node->nref->nref_offset) | CHFS_PRISTINE_NODE_MASK;
    686 	}
    687 
    688 	return size;
    689 }
    690 
    691 void
    692 chfs_obsolete_node_frag(struct chfs_mount *chmp,
    693     struct chfs_node_frag *this)
    694 {
    695 	struct chfs_vnode_cache *vc;
    696 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    697 	if (this->node) {
    698 		KASSERT(this->node->frags != 0);
    699 		this->node->frags--;
    700 		if (this->node->frags == 0) {
    701 			KASSERT(!CHFS_REF_OBSOLETE(this->node->nref));
    702 			mutex_enter(&chmp->chm_lock_vnocache);
    703 			vc = chfs_nref_to_vc(this->node->nref);
    704 			dbg("[MARK] lnr: %u ofs: %u\n", this->node->nref->nref_lnr,
    705 				this->node->nref->nref_offset);
    706 
    707 			chfs_remove_and_obsolete(chmp, vc, this->node->nref, &vc->dnode);
    708 			mutex_exit(&chmp->chm_lock_vnocache);
    709 
    710 			chfs_free_full_dnode(this->node);
    711 		} else {
    712 			CHFS_MARK_REF_NORMAL(this->node->nref);
    713 		}
    714 	}
    715 	chfs_free_node_frag(this);
    716 }
    717 
    718 int
    719 chfs_add_full_dnode_to_inode(struct chfs_mount *chmp,
    720     struct chfs_inode *ip,
    721     struct chfs_full_dnode *fd)
    722 {
    723 	int ret;
    724 	struct chfs_node_frag *newfrag;
    725 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    726 
    727 	if (unlikely(!fd->size))
    728 		return 0;
    729 
    730 	newfrag = new_fragment(fd, fd->ofs, fd->size);
    731 	if (unlikely(!newfrag))
    732 		return ENOMEM;
    733 
    734 	ret = chfs_add_frag_to_fragtree(chmp, &ip->fragtree, newfrag);
    735 	if (ret)
    736 		return ret;
    737 
    738 	if (newfrag->ofs & (PAGE_SIZE - 1)) {
    739 		struct chfs_node_frag *prev = frag_prev(&ip->fragtree, newfrag);
    740 
    741 		CHFS_MARK_REF_NORMAL(fd->nref);
    742 		if (prev->node)
    743 			CHFS_MARK_REF_NORMAL(prev->node->nref);
    744 	}
    745 
    746 	if ((newfrag->ofs+newfrag->size) & (PAGE_SIZE - 1)) {
    747 		struct chfs_node_frag *next = frag_next(&ip->fragtree, newfrag);
    748 
    749 		if (next) {
    750 			CHFS_MARK_REF_NORMAL(fd->nref);
    751 			if (next->node)
    752 				CHFS_MARK_REF_NORMAL(next->node->nref);
    753 		}
    754 	}
    755 
    756 	return 0;
    757 }
    758 
    759 
    760 /*
    761  * -----------------------
    762  * general node operations
    763  * -----------------------
    764  */
    765 /* get tmp nodes of an inode */
    766 int
    767 chfs_get_data_nodes(struct chfs_mount *chmp,
    768     struct chfs_inode *ip,
    769     struct chfs_readinode_info *rii)
    770 {
    771 	uint32_t crc;
    772 	int err;
    773 	size_t len, retlen;
    774 	struct chfs_node_ref *nref;
    775 	struct chfs_flash_data_node *dnode;
    776 	struct chfs_tmp_dnode *td;
    777 	char* buf;
    778 
    779 	len = sizeof(struct chfs_flash_data_node);
    780 	buf = kmem_alloc(len, KM_SLEEP);
    781 
    782 	dnode = kmem_alloc(len, KM_SLEEP);
    783 	if (!dnode)
    784 		return ENOMEM;
    785 
    786 	nref = chfs_first_valid_data_ref(ip->chvc->dnode);
    787 
    788 	rii->highest_version = ip->chvc->highest_version;
    789 
    790 	while(nref && (struct chfs_vnode_cache *)nref != ip->chvc) {
    791 		err = chfs_read_leb(chmp, nref->nref_lnr, buf, CHFS_GET_OFS(nref->nref_offset), len, &retlen);
    792 		if (err || len != retlen)
    793 			goto out;
    794 		dnode = (struct chfs_flash_data_node*)buf;
    795 
    796 		//check header crc
    797 		crc = crc32(0, (uint8_t *)dnode, CHFS_NODE_HDR_SIZE - 4);
    798 		if (crc != le32toh(dnode->hdr_crc)) {
    799 			chfs_err("CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->hdr_crc));
    800 			goto cont;
    801 		}
    802 		//check header magic bitmask
    803 		if (le16toh(dnode->magic) != CHFS_FS_MAGIC_BITMASK) {
    804 			chfs_err("Wrong magic bitmask.\n");
    805 			goto cont;
    806 		}
    807 		//check node crc
    808 		crc = crc32(0, (uint8_t *)dnode, sizeof(*dnode) - 4);
    809 		if (crc != le32toh(dnode->node_crc)) {
    810 			chfs_err("Node CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->node_crc));
    811 			goto cont;
    812 		}
    813 		td = chfs_alloc_tmp_dnode();
    814 		if (!td) {
    815 			chfs_err("Can't allocate tmp dnode info.\n");
    816 			err = ENOMEM;
    817 			goto out;
    818 		}
    819 		/* We don't check data crc here, just add nodes to tmp frag tree, because
    820 		 * we don't want to check nodes which have been overlapped by a new node
    821 		 * with a higher version number.
    822 		 */
    823 		td->node = chfs_alloc_full_dnode();
    824 		if (!td->node) {
    825 			chfs_err("Can't allocate full dnode info.\n");
    826 			err = ENOMEM;
    827 			goto out_tmp_dnode;
    828 		}
    829 		td->version = le64toh(dnode->version);
    830 		td->node->ofs = le64toh(dnode->offset);
    831 		td->data_crc = le32toh(dnode->data_crc);
    832 		td->node->nref = nref;
    833 		td->node->size = le32toh(dnode->data_length);
    834 		td->node->frags = 1;
    835 		td->overlapped = 0;
    836 
    837 		if (td->version > rii->highest_version) {
    838 			rii->highest_version = td->version;
    839 		}
    840 
    841 		err = chfs_add_tmp_dnode_to_tree(chmp, rii, td);
    842 		if (err)
    843 			goto out_full_dnode;
    844 
    845 cont:
    846 		nref = chfs_first_valid_data_ref(nref->nref_next);
    847 	}
    848 
    849 	ip->chvc->highest_version = rii->highest_version;
    850 	return 0;
    851 
    852 /* Exit points */
    853 out_full_dnode:
    854 	chfs_free_full_dnode(td->node);
    855 out_tmp_dnode:
    856 	chfs_free_tmp_dnode(td);
    857 out:
    858 	kmem_free(buf, len);
    859 	kmem_free(dnode, len);
    860 	return err;
    861 }
    862 
    863 
    864 /* Build final normal fragtree from tdi tree. */
    865 int
    866 chfs_build_fragtree(struct chfs_mount *chmp, struct chfs_inode *ip,
    867     struct chfs_readinode_info *rii)
    868 {
    869 	struct chfs_tmp_dnode_info *pen, *last, *this;
    870 	struct rb_tree ver_tree;    /* version tree */
    871 	uint64_t high_ver = 0;
    872 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    873 
    874 	rb_tree_init(&ver_tree, &tmp_node_rbtree_ops);
    875 
    876 	if (rii->mdata_tn) {
    877 		high_ver = rii->mdata_tn->tmpnode->version;
    878 		rii->latest_ref = rii->mdata_tn->tmpnode->node->nref;
    879 	}
    880 
    881 	pen = (struct chfs_tmp_dnode_info *)RB_TREE_MAX(&rii->tdi_root);
    882 
    883 	while((last = pen)) {
    884 		pen = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&rii->tdi_root, last, RB_DIR_LEFT);
    885 
    886 		rb_tree_remove_node(&rii->tdi_root, last);
    887 		rb_tree_insert_node(&ver_tree, last);
    888 
    889 		if (last->tmpnode->overlapped) {
    890 			if (pen)
    891 				continue;
    892 
    893 			last->tmpnode->overlapped = 0;
    894 		}
    895 
    896 		this = (struct chfs_tmp_dnode_info *)RB_TREE_MAX(&ver_tree);
    897 
    898 		while (this) {
    899 			struct chfs_tmp_dnode_info *vers_next;
    900 			int ret;
    901 
    902 			vers_next = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&ver_tree, this, RB_DIR_LEFT);
    903 			rb_tree_remove_node(&ver_tree, this);
    904 
    905 			struct chfs_tmp_dnode *tmp_td = this->tmpnode;
    906 			while (tmp_td) {
    907 				struct chfs_tmp_dnode *next_td = tmp_td->next;
    908 
    909 				if (chfs_check_td_node(chmp, tmp_td)) {
    910 					if (next_td) {
    911 						chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    912 						chfs_kill_td(chmp, tmp_td);
    913 					} else {
    914 						break;
    915 					}
    916 				} else {
    917 					if (tmp_td->version > high_ver) {
    918 						high_ver = tmp_td->version;
    919 						dbg("highver: %llu\n", (unsigned long long)high_ver);
    920 						rii->latest_ref = tmp_td->node->nref;
    921 					}
    922 
    923 					ret = chfs_add_full_dnode_to_inode(chmp, ip, tmp_td->node);
    924 					if (ret) {
    925 						while (1) {
    926 							vers_next = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&ver_tree, this, RB_DIR_LEFT);
    927 							while (tmp_td) {
    928 								next_td = tmp_td->next;
    929 
    930 								chfs_free_full_dnode(tmp_td->node);
    931 								chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    932 								chfs_kill_td(chmp, tmp_td);
    933 								tmp_td = next_td;
    934 							}
    935 							chfs_free_tmp_dnode_info(this);
    936 							this = vers_next;
    937 							if (!this)
    938 								break;
    939 							rb_tree_remove_node(&ver_tree, vers_next);
    940 							chfs_kill_tdi(chmp, vers_next);
    941 						}
    942 						return ret;
    943 					}
    944 
    945 					chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
    946 					chfs_free_tmp_dnode(tmp_td);
    947 					// shouldn't obsolete tmp_td here,
    948 					// because tmp_td->node was added to the inode
    949 				}
    950 				tmp_td = next_td;
    951 			}
    952 			chfs_kill_tdi(chmp, this);
    953 			this = vers_next;
    954 		}
    955 	}
    956 
    957 	return 0;
    958 }
    959 
    960 int chfs_read_inode(struct chfs_mount *chmp, struct chfs_inode *ip)
    961 {
    962 	struct chfs_vnode_cache *vc = ip->chvc;
    963 
    964 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
    965 
    966 retry:
    967 	mutex_enter(&chmp->chm_lock_vnocache);
    968 	switch (vc->state) {
    969 	case VNO_STATE_UNCHECKED:
    970 	case VNO_STATE_CHECKEDABSENT:
    971 		vc->state = VNO_STATE_READING;
    972 		break;
    973 	case VNO_STATE_CHECKING:
    974 	case VNO_STATE_GC:
    975 		mutex_exit(&chmp->chm_lock_vnocache);
    976 		//sleep_on_spinunlock(&chmp->chm_lock_vnocache);
    977 		//KASSERT(!mutex_owned(&chmp->chm_lock_vnocache));
    978 		goto retry;
    979 		break;
    980 	case VNO_STATE_PRESENT:
    981 	case VNO_STATE_READING:
    982 		chfs_err("Reading inode #%llu in state %d!\n",
    983 			(unsigned long long)vc->vno, vc->state);
    984 		chfs_err("wants to read a nonexistent ino %llu\n",
    985 			(unsigned long long)vc->vno);
    986 		return ENOENT;
    987 	default:
    988 		panic("BUG() Bad vno cache state.");
    989 	}
    990 	mutex_exit(&chmp->chm_lock_vnocache);
    991 
    992 	return chfs_read_inode_internal(chmp, ip);
    993 }
    994 
    995 /*
    996  * Read inode frags.
    997  * Firstly get tmp nodes,
    998  * secondly build fragtree from those.
    999  */
   1000 int
   1001 chfs_read_inode_internal(struct chfs_mount *chmp, struct chfs_inode *ip)
   1002 {
   1003 	int err;
   1004 	size_t len, retlen;
   1005 	char* buf;
   1006 	struct chfs_readinode_info rii;
   1007 	struct chfs_flash_vnode *fvnode;
   1008 
   1009 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
   1010 
   1011 	len = sizeof(*fvnode);
   1012 
   1013 	memset(&rii, 0, sizeof(rii));
   1014 
   1015 	rb_tree_init(&rii.tdi_root, &tmp_node_rbtree_ops);
   1016 
   1017 	/* build up a temp node frag tree */
   1018 	err = chfs_get_data_nodes(chmp, ip, &rii);
   1019 	if (err) {
   1020 		if (ip->chvc->state == VNO_STATE_READING)
   1021 			ip->chvc->state = VNO_STATE_CHECKEDABSENT;
   1022 		/* FIXME Should we kill fragtree or something here? */
   1023 		return err;
   1024 	}
   1025 
   1026 	rb_tree_init(&ip->fragtree, &frag_rbtree_ops);
   1027 	/*
   1028 	 * build fragtree from temp nodes
   1029 	 */
   1030 	err = chfs_build_fragtree(chmp, ip, &rii);
   1031 	if (err) {
   1032 		if (ip->chvc->state == VNO_STATE_READING)
   1033 			ip->chvc->state = VNO_STATE_CHECKEDABSENT;
   1034 		/* FIXME Should we kill fragtree or something here? */
   1035 		return err;
   1036 	}
   1037 
   1038 	if (!rii.latest_ref) {
   1039 		return 0;
   1040 	}
   1041 
   1042 	buf = kmem_alloc(len, KM_SLEEP);
   1043 	if (!buf)
   1044 		return ENOMEM;
   1045 
   1046 	/*
   1047 	 * set inode size from chvc->v
   1048 	 */
   1049 	err = chfs_read_leb(chmp, ip->chvc->v->nref_lnr, buf, CHFS_GET_OFS(ip->chvc->v->nref_offset), len, &retlen);
   1050 	if (err || retlen != len) {
   1051 		kmem_free(buf, len);
   1052 		return err?err:EIO;
   1053 	}
   1054 
   1055 	fvnode = (struct chfs_flash_vnode*)buf;
   1056 
   1057 	dbg("set size from v: %u\n", fvnode->dn_size);
   1058 	chfs_set_vnode_size(ITOV(ip), fvnode->dn_size);
   1059 	uint32_t retsize = chfs_truncate_fragtree(chmp, &ip->fragtree, fvnode->dn_size);
   1060 	if (retsize != fvnode->dn_size) {
   1061 		dbg("Truncating failed. It is %u instead of %u\n", retsize, fvnode->dn_size);
   1062 	}
   1063 
   1064 	kmem_free(buf, len);
   1065 
   1066 	if (ip->chvc->state == VNO_STATE_READING) {
   1067 		ip->chvc->state = VNO_STATE_PRESENT;
   1068 	}
   1069 
   1070 	return 0;
   1071 }
   1072 
   1073 int
   1074 chfs_read_data(struct chfs_mount* chmp, struct vnode *vp,
   1075     struct buf *bp)
   1076 {
   1077 	off_t ofs;
   1078 	struct chfs_node_frag *frag;
   1079 	char * buf;
   1080 	int err = 0;
   1081 	size_t size, retlen;
   1082 	uint32_t crc;
   1083 	struct chfs_inode *ip = VTOI(vp);
   1084 	struct chfs_flash_data_node *dnode;
   1085 	struct chfs_node_ref *nref;
   1086 
   1087 	memset(bp->b_data, 0, bp->b_bcount);
   1088 
   1089 	ofs = bp->b_blkno * PAGE_SIZE;
   1090 	frag = (struct chfs_node_frag *)rb_tree_find_node_leq(&ip->fragtree, &ofs);
   1091 
   1092 	if (!frag || frag->ofs > ofs || frag->ofs + frag->size <= ofs) {
   1093 		bp->b_resid = 0;
   1094 		dbg("not found in frag tree\n");
   1095 		return 0;
   1096 	}
   1097 
   1098 	if (!frag->node) {
   1099 		dbg("no node in frag\n");
   1100 		return 0;
   1101 	}
   1102 
   1103 	nref = frag->node->nref;
   1104 
   1105 	size = sizeof(*dnode) + frag->size;
   1106 
   1107 	buf = kmem_alloc(size, KM_SLEEP);
   1108 
   1109 	dbg("reading from lnr: %u, offset: %u, size: %zu\n", nref->nref_lnr, CHFS_GET_OFS(nref->nref_offset), size);
   1110 	err = chfs_read_leb(chmp, nref->nref_lnr, buf, CHFS_GET_OFS(nref->nref_offset), size, &retlen);
   1111 	if (err) {
   1112 		chfs_err("error after reading: %d\n", err);
   1113 		goto out;
   1114 	}
   1115 	if (retlen != size) {
   1116 		chfs_err("retlen: %zu != size: %zu\n", retlen, size);
   1117 		err = EIO;
   1118 		goto out;
   1119 	}
   1120 
   1121 	dnode = (struct chfs_flash_data_node *)buf;
   1122 	crc = crc32(0, (uint8_t *)dnode, CHFS_NODE_HDR_SIZE - 4);
   1123 	if (crc != le32toh(dnode->hdr_crc)) {
   1124 		chfs_err("CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->hdr_crc));
   1125 		err = EIO;
   1126 		goto out;
   1127 	}
   1128 	//check header magic bitmask
   1129 	if (le16toh(dnode->magic) != CHFS_FS_MAGIC_BITMASK) {
   1130 		chfs_err("Wrong magic bitmask.\n");
   1131 		err = EIO;
   1132 		goto out;
   1133 	}
   1134 	//check node crc
   1135 	crc = crc32(0, (uint8_t *)dnode, sizeof(*dnode) - 4);
   1136 	if (crc != le32toh(dnode->node_crc)) {
   1137 		chfs_err("Node CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->node_crc));
   1138 		err = EIO;
   1139 		goto out;
   1140 	}
   1141 	crc = crc32(0, (uint8_t *)dnode->data, dnode->data_length);
   1142 	if (crc != le32toh(dnode->data_crc)) {
   1143 		chfs_err("Data CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->data_crc));
   1144 		err = EIO;
   1145 		goto out;
   1146 	}
   1147 
   1148 	memcpy(bp->b_data, dnode->data, dnode->data_length);
   1149 	bp->b_resid = 0;
   1150 
   1151 out:
   1152 	kmem_free(buf, size);
   1153 	return err;
   1154 }
   1155