1 /* $NetBSD: chfs_gc.c,v 1.12 2021/12/07 22:13:56 andvar Exp $ */ 2 3 /*- 4 * Copyright (c) 2010 Department of Software Engineering, 5 * University of Szeged, Hungary 6 * Copyright (c) 2010 Tamas Toth <ttoth (at) inf.u-szeged.hu> 7 * Copyright (c) 2010 Adam Hoka <ahoka (at) NetBSD.org> 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by the Department of Software Engineering, University of Szeged, Hungary 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cprng.h> 36 #include "chfs.h" 37 38 void chfs_gc_release_inode(struct chfs_mount *, 39 struct chfs_inode *); 40 struct chfs_inode *chfs_gc_fetch_inode(struct chfs_mount *, 41 ino_t, uint32_t); 42 int chfs_check(struct chfs_mount *, struct chfs_vnode_cache *); 43 void chfs_clear_inode(struct chfs_mount *, struct chfs_inode *); 44 45 46 struct chfs_eraseblock *find_gc_block(struct chfs_mount *); 47 int chfs_gcollect_pristine(struct chfs_mount *, 48 struct chfs_eraseblock *, 49 struct chfs_vnode_cache *, struct chfs_node_ref *); 50 int chfs_gcollect_live(struct chfs_mount *, 51 struct chfs_eraseblock *, struct chfs_node_ref *, 52 struct chfs_inode *); 53 int chfs_gcollect_vnode(struct chfs_mount *, struct chfs_inode *); 54 int chfs_gcollect_dirent(struct chfs_mount *, 55 struct chfs_eraseblock *, struct chfs_inode *, 56 struct chfs_dirent *); 57 int chfs_gcollect_deletion_dirent(struct chfs_mount *, 58 struct chfs_eraseblock *, struct chfs_inode *, 59 struct chfs_dirent *); 60 int chfs_gcollect_dnode(struct chfs_mount *, 61 struct chfs_eraseblock *, struct chfs_inode *, 62 struct chfs_full_dnode *, uint32_t, uint32_t); 63 64 /* 65 * chfs_gc_trigger - wakes up GC thread, if it should run 66 * Must be called with chm_lock_mountfields held. 67 */ 68 void 69 chfs_gc_trigger(struct chfs_mount *chmp) 70 { 71 struct garbage_collector_thread *gc = &chmp->chm_gc_thread; 72 73 if (gc->gcth_running && 74 chfs_gc_thread_should_wake(chmp)) { 75 cv_signal(&gc->gcth_wakeup); 76 } 77 } 78 79 80 /* chfs_gc_thread - garbage collector's thread */ 81 void 82 chfs_gc_thread(void *data) 83 { 84 struct chfs_mount *chmp = data; 85 struct garbage_collector_thread *gc = &chmp->chm_gc_thread; 86 87 dbg_gc("[GC THREAD] thread started\n"); 88 89 mutex_enter(&chmp->chm_lock_mountfields); 90 while (gc->gcth_running) { 91 /* we must call chfs_gc_thread_should_wake with chm_lock_mountfields 92 * held, which is a bit awkwardly done here, but we can't really 93 * do it otherway with the current design... 94 */ 95 if (chfs_gc_thread_should_wake(chmp)) { 96 if (chfs_gcollect_pass(chmp) == ENOSPC) { 97 mutex_exit(&chmp->chm_lock_mountfields); 98 panic("No space for garbage collection\n"); 99 /* XXX why break here? i have added a panic 100 * here to see if it gets triggered -ahoka 101 */ 102 break; 103 } 104 /* XXX gcollect_pass drops the mutex */ 105 } 106 107 cv_timedwait_sig(&gc->gcth_wakeup, 108 &chmp->chm_lock_mountfields, mstohz(100)); 109 } 110 mutex_exit(&chmp->chm_lock_mountfields); 111 112 dbg_gc("[GC THREAD] thread stopped\n"); 113 kthread_exit(0); 114 } 115 116 /* chfs_gc_thread_start - starts GC */ 117 void 118 chfs_gc_thread_start(struct chfs_mount *chmp) 119 { 120 struct garbage_collector_thread *gc = &chmp->chm_gc_thread; 121 122 cv_init(&gc->gcth_wakeup, "chfsgccv"); 123 124 gc->gcth_running = true; 125 kthread_create(PRI_NONE, /*KTHREAD_MPSAFE |*/ KTHREAD_MUSTJOIN, 126 NULL, chfs_gc_thread, chmp, &gc->gcth_thread, 127 "chfsgcth"); 128 } 129 130 /* chfs_gc_thread_stop - stops GC */ 131 void 132 chfs_gc_thread_stop(struct chfs_mount *chmp) 133 { 134 struct garbage_collector_thread *gc = &chmp->chm_gc_thread; 135 136 /* check if it is actually running */ 137 if (gc->gcth_running) { 138 gc->gcth_running = false; 139 } else { 140 return; 141 } 142 cv_signal(&gc->gcth_wakeup); 143 dbg_gc("[GC THREAD] stop signal sent\n"); 144 145 kthread_join(gc->gcth_thread); 146 #ifdef BROKEN_KTH_JOIN 147 kpause("chfsthjoin", false, mstohz(1000), NULL); 148 #endif 149 150 cv_destroy(&gc->gcth_wakeup); 151 } 152 153 /* 154 * chfs_gc_thread_should_wake - checks if GC thread should wake up 155 * Must be called with chm_lock_mountfields held. 156 * Returns 1, if GC should wake up and 0 else. 157 */ 158 int 159 chfs_gc_thread_should_wake(struct chfs_mount *chmp) 160 { 161 int nr_very_dirty = 0; 162 struct chfs_eraseblock *cheb; 163 uint32_t dirty; 164 165 KASSERT(mutex_owned(&chmp->chm_lock_mountfields)); 166 167 /* Erase pending queue is not empty. */ 168 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) { 169 dbg_gc("erase_pending\n"); 170 return 1; 171 } 172 173 /* There is something unchecked in the filesystem. */ 174 if (chmp->chm_unchecked_size) { 175 dbg_gc("unchecked\n"); 176 return 1; 177 } 178 179 dirty = chmp->chm_dirty_size - chmp->chm_nr_erasable_blocks * 180 chmp->chm_ebh->eb_size; 181 182 /* Number of free and erasable blocks are critical. */ 183 if (chmp->chm_nr_free_blocks + chmp->chm_nr_erasable_blocks < 184 chmp->chm_resv_blocks_gctrigger && (dirty > chmp->chm_nospc_dirty)) { 185 dbg_gc("free: %d + erasable: %d < resv: %d\n", 186 chmp->chm_nr_free_blocks, chmp->chm_nr_erasable_blocks, 187 chmp->chm_resv_blocks_gctrigger); 188 dbg_gc("dirty: %d > nospc_dirty: %d\n", 189 dirty, chmp->chm_nospc_dirty); 190 191 return 1; 192 } 193 194 /* There are too much very dirty blocks. */ 195 TAILQ_FOREACH(cheb, &chmp->chm_very_dirty_queue, queue) { 196 nr_very_dirty++; 197 if (nr_very_dirty == chmp->chm_vdirty_blocks_gctrigger) { 198 dbg_gc("nr_very_dirty\n"); 199 return 1; 200 } 201 } 202 203 /* Everything is OK, GC shouldn't run. */ 204 return 0; 205 } 206 207 /* chfs_gc_release_inode - does nothing yet */ 208 void 209 chfs_gc_release_inode(struct chfs_mount *chmp, 210 struct chfs_inode *ip) 211 { 212 dbg_gc("release inode\n"); 213 } 214 215 /* chfs_gc_fetch_inode - assign the given inode to the GC */ 216 struct chfs_inode * 217 chfs_gc_fetch_inode(struct chfs_mount *chmp, ino_t vno, 218 uint32_t unlinked) 219 { 220 struct vnode *vp = NULL; 221 struct chfs_vnode_cache *vc; 222 struct chfs_inode *ip; 223 dbg_gc("fetch inode %llu\n", (unsigned long long)vno); 224 225 if (unlinked) { 226 dbg_gc("unlinked\n"); 227 vp = chfs_vnode_lookup(chmp, vno); 228 if (!vp) { 229 mutex_enter(&chmp->chm_lock_vnocache); 230 vc = chfs_vnode_cache_get(chmp, vno); 231 if (!vc) { 232 mutex_exit(&chmp->chm_lock_vnocache); 233 return NULL; 234 } 235 mutex_exit(&chmp->chm_lock_vnocache); 236 if (vc->state != VNO_STATE_CHECKEDABSENT) { 237 /* XXX why do we need the delay here?! */ 238 KASSERT(mutex_owned(&chmp->chm_lock_mountfields)); 239 cv_timedwait_sig( 240 &chmp->chm_gc_thread.gcth_wakeup, 241 &chmp->chm_lock_mountfields, mstohz(50)); 242 } 243 return NULL; 244 } 245 } else { 246 dbg_gc("vnode lookup\n"); 247 vp = chfs_vnode_lookup(chmp, vno); 248 } 249 dbg_gc("vp to ip\n"); 250 ip = VTOI(vp); 251 KASSERT(ip); 252 vrele(vp); 253 254 return ip; 255 } 256 257 extern rb_tree_ops_t frag_rbtree_ops; 258 259 /* chfs_check - checks an inode with minimal initialization */ 260 int 261 chfs_check(struct chfs_mount *chmp, struct chfs_vnode_cache *chvc) 262 { 263 KASSERT(mutex_owned(&chmp->chm_lock_vnocache)); 264 265 struct chfs_inode *ip; 266 struct vnode *vp; 267 int ret; 268 269 /* Get a new inode. */ 270 ip = pool_get(&chfs_inode_pool, PR_WAITOK); 271 if (!ip) { 272 return ENOMEM; 273 } 274 275 vp = kmem_zalloc(sizeof(struct vnode), KM_SLEEP); 276 277 /* Minimal initialization. */ 278 ip->chvc = chvc; 279 ip->vp = vp; 280 281 vp->v_data = ip; 282 283 rb_tree_init(&ip->fragtree, &frag_rbtree_ops); 284 TAILQ_INIT(&ip->dents); 285 286 /* Build the node. */ 287 mutex_exit(&chmp->chm_lock_vnocache); 288 ret = chfs_read_inode_internal(chmp, ip); 289 mutex_enter(&chmp->chm_lock_vnocache); 290 if (!ret) { 291 chfs_clear_inode(chmp, ip); 292 } 293 294 /* Release inode. */ 295 pool_put(&chfs_inode_pool, ip); 296 297 return ret; 298 } 299 300 /* chfs_clear_inode - kills a minimal inode */ 301 void 302 chfs_clear_inode(struct chfs_mount *chmp, struct chfs_inode *ip) 303 { 304 KASSERT(mutex_owned(&chmp->chm_lock_vnocache)); 305 306 struct chfs_dirent *fd, *tmpfd; 307 struct chfs_vnode_cache *chvc; 308 struct chfs_node_ref *nref; 309 310 chvc = ip->chvc; 311 /* shouldnt this be: */ 312 //bool deleted = (chvc && !(chvc->pvno || chvc->nlink)); 313 int deleted = (chvc && !(chvc->pvno | chvc->nlink)); 314 315 /* Set actual state. */ 316 if (chvc && chvc->state != VNO_STATE_CHECKING) { 317 chvc->state = VNO_STATE_CLEARING; 318 } 319 320 /* Remove vnode information. */ 321 while (deleted && chvc->v != (struct chfs_node_ref *)chvc) { 322 nref = chvc->v; 323 chfs_remove_and_obsolete(chmp, chvc, nref, &chvc->v); 324 } 325 326 /* Destroy data. */ 327 chfs_kill_fragtree(chmp, &ip->fragtree); 328 329 /* Clear dirents. */ 330 TAILQ_FOREACH_SAFE(fd, &ip->dents, fds, tmpfd) { 331 chfs_free_dirent(fd); 332 } 333 334 /* Remove node from vnode cache. */ 335 if (chvc && chvc->state == VNO_STATE_CHECKING) { 336 chvc->state = VNO_STATE_CHECKEDABSENT; 337 if ((struct chfs_vnode_cache *)chvc->v == chvc && 338 (struct chfs_vnode_cache *)chvc->dirents == chvc && 339 (struct chfs_vnode_cache *)chvc->dnode == chvc) 340 chfs_vnode_cache_remove(chmp, chvc); 341 } 342 } 343 344 /* find_gc_block - finds the next block for GC */ 345 struct chfs_eraseblock * 346 find_gc_block(struct chfs_mount *chmp) 347 { 348 struct chfs_eraseblock *ret; 349 struct chfs_eraseblock_queue *nextqueue; 350 351 KASSERT(mutex_owned(&chmp->chm_lock_mountfields)); 352 353 /* Get a random number. */ 354 uint32_t n = cprng_fast32() % 128; 355 356 again: 357 /* Find an eraseblock queue. */ 358 if (n<50 && !TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) { 359 dbg_gc("Picking block from erase_pending_queue to GC next\n"); 360 nextqueue = &chmp->chm_erase_pending_queue; 361 } else if (n<110 && !TAILQ_EMPTY(&chmp->chm_very_dirty_queue) ) { 362 dbg_gc("Picking block from very_dirty_queue to GC next\n"); 363 nextqueue = &chmp->chm_very_dirty_queue; 364 } else if (n<126 && !TAILQ_EMPTY(&chmp->chm_dirty_queue) ) { 365 dbg_gc("Picking block from dirty_queue to GC next\n"); 366 nextqueue = &chmp->chm_dirty_queue; 367 } else if (!TAILQ_EMPTY(&chmp->chm_clean_queue)) { 368 dbg_gc("Picking block from clean_queue to GC next\n"); 369 nextqueue = &chmp->chm_clean_queue; 370 } else if (!TAILQ_EMPTY(&chmp->chm_dirty_queue)) { 371 dbg_gc("Picking block from dirty_queue to GC next" 372 " (clean_queue was empty)\n"); 373 nextqueue = &chmp->chm_dirty_queue; 374 } else if (!TAILQ_EMPTY(&chmp->chm_very_dirty_queue)) { 375 dbg_gc("Picking block from very_dirty_queue to GC next" 376 " (clean_queue and dirty_queue were empty)\n"); 377 nextqueue = &chmp->chm_very_dirty_queue; 378 } else if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) { 379 dbg_gc("Picking block from erase_pending_queue to GC next" 380 " (clean_queue and {very_,}dirty_queue were empty)\n"); 381 nextqueue = &chmp->chm_erase_pending_queue; 382 } else if (!TAILQ_EMPTY(&chmp->chm_erasable_pending_wbuf_queue)) { 383 dbg_gc("Synching wbuf in order to reuse " 384 "erasable_pendig_wbuf_queue blocks\n"); 385 rw_enter(&chmp->chm_lock_wbuf, RW_WRITER); 386 chfs_flush_pending_wbuf(chmp); 387 rw_exit(&chmp->chm_lock_wbuf); 388 goto again; 389 } else { 390 dbg_gc("CHFS: no clean, dirty _or_ erasable" 391 " blocks to GC from! Where are they all?\n"); 392 return NULL; 393 } 394 395 /* Get the first block of the queue. */ 396 ret = TAILQ_FIRST(nextqueue); 397 if (chmp->chm_nextblock) { 398 dbg_gc("nextblock num: %u - gcblock num: %u\n", 399 chmp->chm_nextblock->lnr, ret->lnr); 400 if (ret == chmp->chm_nextblock) 401 goto again; 402 } 403 TAILQ_REMOVE(nextqueue, ret, queue); 404 405 /* Set GC block. */ 406 chmp->chm_gcblock = ret; 407 /* Set GC node. */ 408 ret->gc_node = ret->first_node; 409 410 if (!ret->gc_node) { 411 dbg_gc("Oops! ret->gc_node at LEB: %u is NULL\n", ret->lnr); 412 panic("CHFS BUG - one LEB's gc_node is NULL\n"); 413 } 414 415 /* TODO wasted size? */ 416 return ret; 417 } 418 419 /* chfs_gcollect_pass - this is the main function of GC */ 420 int 421 chfs_gcollect_pass(struct chfs_mount *chmp) 422 { 423 struct chfs_vnode_cache *vc; 424 struct chfs_eraseblock *eb; 425 struct chfs_node_ref *nref; 426 uint32_t gcblock_dirty; 427 struct chfs_inode *ip; 428 ino_t vno, pvno; 429 uint32_t nlink; 430 int ret = 0; 431 432 KASSERT(mutex_owned(&chmp->chm_lock_mountfields)); 433 434 /* Check all vnodes. */ 435 for (;;) { 436 mutex_enter(&chmp->chm_lock_sizes); 437 438 /* Check unchecked size. */ 439 dbg_gc("unchecked size == %u\n", chmp->chm_unchecked_size); 440 if (!chmp->chm_unchecked_size) 441 break; 442 443 /* Compare vnode number to the maximum. */ 444 if (chmp->chm_checked_vno > chmp->chm_max_vno) { 445 mutex_exit(&chmp->chm_lock_sizes); 446 dbg_gc("checked_vno (#%llu) > max_vno (#%llu)\n", 447 (unsigned long long)chmp->chm_checked_vno, 448 (unsigned long long)chmp->chm_max_vno); 449 return ENOSPC; 450 } 451 452 mutex_exit(&chmp->chm_lock_sizes); 453 454 mutex_enter(&chmp->chm_lock_vnocache); 455 dbg_gc("checking vno #%llu\n", 456 (unsigned long long)chmp->chm_checked_vno); 457 dbg_gc("get vnode cache\n"); 458 459 /* OK, Get and check the vnode cache. */ 460 vc = chfs_vnode_cache_get(chmp, chmp->chm_checked_vno++); 461 462 if (!vc) { 463 dbg_gc("!vc\n"); 464 mutex_exit(&chmp->chm_lock_vnocache); 465 continue; 466 } 467 468 if ((vc->pvno | vc->nlink) == 0) { 469 dbg_gc("(pvno | nlink) == 0\n"); 470 mutex_exit(&chmp->chm_lock_vnocache); 471 continue; 472 } 473 474 /* Find out the state of the vnode. */ 475 dbg_gc("switch\n"); 476 switch (vc->state) { 477 case VNO_STATE_CHECKEDABSENT: 478 /* FALLTHROUGH */ 479 case VNO_STATE_PRESENT: 480 mutex_exit(&chmp->chm_lock_vnocache); 481 continue; 482 483 case VNO_STATE_GC: 484 /* FALLTHROUGH */ 485 case VNO_STATE_CHECKING: 486 mutex_exit(&chmp->chm_lock_vnocache); 487 dbg_gc("VNO_STATE GC or CHECKING\n"); 488 panic("CHFS BUG - vc state gc or checking\n"); 489 490 case VNO_STATE_READING: 491 chmp->chm_checked_vno--; 492 mutex_exit(&chmp->chm_lock_vnocache); 493 /* XXX why do we need the delay here?! */ 494 kpause("chvncrea", true, mstohz(50), NULL); 495 496 return 0; 497 498 default: 499 mutex_exit(&chmp->chm_lock_vnocache); 500 dbg_gc("default\n"); 501 panic("CHFS BUG - vc state is other what we" 502 " checked\n"); 503 504 case VNO_STATE_UNCHECKED: 505 ; 506 } 507 508 /* We found an unchecked vnode. */ 509 510 vc->state = VNO_STATE_CHECKING; 511 512 /* XXX check if this is too heavy to call under 513 * chm_lock_vnocache 514 */ 515 ret = chfs_check(chmp, vc); 516 vc->state = VNO_STATE_CHECKEDABSENT; 517 518 mutex_exit(&chmp->chm_lock_vnocache); 519 return ret; 520 } 521 522 /* Get GC block. */ 523 eb = chmp->chm_gcblock; 524 525 if (!eb) { 526 eb = find_gc_block(chmp); 527 } 528 529 if (!eb) { 530 dbg_gc("!eb\n"); 531 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) { 532 mutex_exit(&chmp->chm_lock_sizes); 533 return EAGAIN; 534 } 535 mutex_exit(&chmp->chm_lock_sizes); 536 return EIO; 537 } 538 539 if (!eb->used_size) { 540 dbg_gc("!eb->used_size\n"); 541 goto eraseit; 542 } 543 544 /* Get GC node. */ 545 nref = eb->gc_node; 546 gcblock_dirty = eb->dirty_size; 547 548 /* Find a node which wasn't obsoleted yet. 549 * Obsoleted nodes will be simply deleted after the whole block has checked. */ 550 while(CHFS_REF_OBSOLETE(nref)) { 551 #ifdef DBG_MSG_GC 552 if (nref == chmp->chm_blocks[nref->nref_lnr].last_node) { 553 dbg_gc("THIS NODE IS THE LAST NODE OF ITS EB\n"); 554 } 555 #endif 556 nref = node_next(nref); 557 if (!nref) { 558 eb->gc_node = nref; 559 mutex_exit(&chmp->chm_lock_sizes); 560 panic("CHFS BUG - nref is NULL)\n"); 561 } 562 } 563 564 /* We found a "not obsoleted" node. */ 565 eb->gc_node = nref; 566 KASSERT(nref->nref_lnr == chmp->chm_gcblock->lnr); 567 568 /* Check if node is in any chain. */ 569 if (!nref->nref_next) { 570 /* This node is not in any chain. Simply collect it, or obsolete. */ 571 mutex_exit(&chmp->chm_lock_sizes); 572 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) { 573 chfs_gcollect_pristine(chmp, eb, NULL, nref); 574 } else { 575 chfs_mark_node_obsolete(chmp, nref); 576 } 577 goto lock_size; 578 } 579 580 mutex_exit(&chmp->chm_lock_sizes); 581 582 mutex_enter(&chmp->chm_lock_vnocache); 583 584 dbg_gc("nref lnr: %u - offset: %u\n", nref->nref_lnr, nref->nref_offset); 585 vc = chfs_nref_to_vc(nref); 586 587 /* Check the state of the node. */ 588 dbg_gc("switch\n"); 589 switch(vc->state) { 590 case VNO_STATE_CHECKEDABSENT: 591 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) { 592 vc->state = VNO_STATE_GC; 593 } 594 break; 595 596 case VNO_STATE_PRESENT: 597 break; 598 599 case VNO_STATE_UNCHECKED: 600 /* FALLTHROUGH */ 601 case VNO_STATE_CHECKING: 602 /* FALLTHROUGH */ 603 case VNO_STATE_GC: 604 mutex_exit(&chmp->chm_lock_vnocache); 605 panic("CHFS BUG - vc state unchecked," 606 " checking or gc (vno #%llu, num #%d)\n", 607 (unsigned long long)vc->vno, vc->state); 608 609 case VNO_STATE_READING: 610 /* Node is in use at this time. */ 611 mutex_exit(&chmp->chm_lock_vnocache); 612 kpause("chvncrea", true, mstohz(50), NULL); 613 return 0; 614 } 615 616 if (vc->state == VNO_STATE_GC) { 617 dbg_gc("vc->state == VNO_STATE_GC\n"); 618 vc->state = VNO_STATE_CHECKEDABSENT; 619 mutex_exit(&chmp->chm_lock_vnocache); 620 ret = chfs_gcollect_pristine(chmp, eb, NULL, nref); 621 622 //TODO wake_up(&chmp->chm_vnocache_wq); 623 if (ret != EBADF) 624 goto test_gcnode; 625 mutex_enter(&chmp->chm_lock_vnocache); 626 } 627 628 /* Collect living node. */ 629 vno = vc->vno; 630 pvno = vc->pvno; 631 nlink = vc->nlink; 632 mutex_exit(&chmp->chm_lock_vnocache); 633 634 ip = chfs_gc_fetch_inode(chmp, vno, !(pvno | nlink)); 635 636 if (!ip) { 637 dbg_gc("!ip\n"); 638 ret = 0; 639 goto lock_size; 640 } 641 642 chfs_gcollect_live(chmp, eb, nref, ip); 643 644 chfs_gc_release_inode(chmp, ip); 645 646 test_gcnode: 647 if (eb->dirty_size == gcblock_dirty && 648 !CHFS_REF_OBSOLETE(eb->gc_node)) { 649 dbg_gc("ERROR collecting node at %u failed.\n", 650 CHFS_GET_OFS(eb->gc_node->nref_offset)); 651 652 ret = ENOSPC; 653 } 654 655 lock_size: 656 KASSERT(mutex_owned(&chmp->chm_lock_mountfields)); 657 mutex_enter(&chmp->chm_lock_sizes); 658 eraseit: 659 dbg_gc("eraseit\n"); 660 661 if (chmp->chm_gcblock) { 662 /* This is only for debugging. */ 663 dbg_gc("eb used size = %u\n", chmp->chm_gcblock->used_size); 664 dbg_gc("eb free size = %u\n", chmp->chm_gcblock->free_size); 665 dbg_gc("eb dirty size = %u\n", chmp->chm_gcblock->dirty_size); 666 dbg_gc("eb unchecked size = %u\n", 667 chmp->chm_gcblock->unchecked_size); 668 dbg_gc("eb wasted size = %u\n", chmp->chm_gcblock->wasted_size); 669 670 KASSERT(chmp->chm_gcblock->used_size + chmp->chm_gcblock->free_size + 671 chmp->chm_gcblock->dirty_size + 672 chmp->chm_gcblock->unchecked_size + 673 chmp->chm_gcblock->wasted_size == chmp->chm_ebh->eb_size); 674 675 } 676 677 /* Check the state of GC block. */ 678 if (chmp->chm_gcblock && chmp->chm_gcblock->dirty_size + 679 chmp->chm_gcblock->wasted_size == chmp->chm_ebh->eb_size) { 680 dbg_gc("Block at leb #%u completely obsoleted by GC, " 681 "Moving to erase_pending_queue\n", chmp->chm_gcblock->lnr); 682 TAILQ_INSERT_TAIL(&chmp->chm_erase_pending_queue, 683 chmp->chm_gcblock, queue); 684 chmp->chm_gcblock = NULL; 685 chmp->chm_nr_erasable_blocks++; 686 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) { 687 ret = chfs_remap_leb(chmp); 688 } 689 } 690 691 mutex_exit(&chmp->chm_lock_sizes); 692 dbg_gc("return\n"); 693 return ret; 694 } 695 696 697 /* chfs_gcollect_pristine - collects a pristine node */ 698 int 699 chfs_gcollect_pristine(struct chfs_mount *chmp, struct chfs_eraseblock *cheb, 700 struct chfs_vnode_cache *chvc, struct chfs_node_ref *nref) 701 { 702 struct chfs_node_ref *newnref; 703 struct chfs_flash_node_hdr *nhdr; 704 struct chfs_flash_vnode *fvnode; 705 struct chfs_flash_dirent_node *fdirent; 706 struct chfs_flash_data_node *fdata; 707 int ret, retries = 0; 708 uint32_t ofs, crc; 709 size_t totlen = chfs_nref_len(chmp, cheb, nref); 710 char *data; 711 struct iovec vec; 712 size_t retlen; 713 714 dbg_gc("gcollect_pristine\n"); 715 716 data = kmem_alloc(totlen, KM_SLEEP); 717 ofs = CHFS_GET_OFS(nref->nref_offset); 718 719 /* Read header. */ 720 ret = chfs_read_leb(chmp, nref->nref_lnr, data, ofs, totlen, &retlen); 721 if (ret) { 722 dbg_gc("reading error\n"); 723 goto err_out; 724 } 725 if (retlen != totlen) { 726 dbg_gc("read size error\n"); 727 ret = EIO; 728 goto err_out; 729 } 730 nhdr = (struct chfs_flash_node_hdr *)data; 731 732 /* Check the header. */ 733 if (le16toh(nhdr->magic) != CHFS_FS_MAGIC_BITMASK) { 734 dbg_gc("node header magic number error\n"); 735 ret = EBADF; 736 goto err_out; 737 } 738 crc = crc32(0, (uint8_t *)nhdr, CHFS_NODE_HDR_SIZE - 4); 739 if (crc != le32toh(nhdr->hdr_crc)) { 740 dbg_gc("node header crc error\n"); 741 ret = EBADF; 742 goto err_out; 743 } 744 745 /* Read the remaining parts. */ 746 switch(le16toh(nhdr->type)) { 747 case CHFS_NODETYPE_VNODE: 748 /* vnode information node */ 749 fvnode = (struct chfs_flash_vnode *)data; 750 crc = crc32(0, (uint8_t *)fvnode, sizeof(struct chfs_flash_vnode) - 4); 751 if (crc != le32toh(fvnode->node_crc)) { 752 dbg_gc("vnode crc error\n"); 753 ret = EBADF; 754 goto err_out; 755 } 756 break; 757 case CHFS_NODETYPE_DIRENT: 758 /* dirent node */ 759 fdirent = (struct chfs_flash_dirent_node *)data; 760 crc = crc32(0, (uint8_t *)fdirent, sizeof(struct chfs_flash_dirent_node) - 4); 761 if (crc != le32toh(fdirent->node_crc)) { 762 dbg_gc("dirent crc error\n"); 763 ret = EBADF; 764 goto err_out; 765 } 766 crc = crc32(0, fdirent->name, fdirent->nsize); 767 if (crc != le32toh(fdirent->name_crc)) { 768 dbg_gc("dirent name crc error\n"); 769 ret = EBADF; 770 goto err_out; 771 } 772 break; 773 case CHFS_NODETYPE_DATA: 774 /* data node */ 775 fdata = (struct chfs_flash_data_node *)data; 776 crc = crc32(0, (uint8_t *)fdata, sizeof(struct chfs_flash_data_node) - 4); 777 if (crc != le32toh(fdata->node_crc)) { 778 dbg_gc("data node crc error\n"); 779 ret = EBADF; 780 goto err_out; 781 } 782 break; 783 default: 784 /* unknown node */ 785 if (chvc) { 786 dbg_gc("unknown node have vnode cache\n"); 787 ret = EBADF; 788 goto err_out; 789 } 790 } 791 /* CRC's OK, write node to its new place */ 792 retry: 793 ret = chfs_reserve_space_gc(chmp, totlen); 794 if (ret) 795 goto err_out; 796 797 newnref = chfs_alloc_node_ref(chmp->chm_nextblock); 798 if (!newnref) { 799 ret = ENOMEM; 800 goto err_out; 801 } 802 803 ofs = chmp->chm_ebh->eb_size - chmp->chm_nextblock->free_size; 804 newnref->nref_offset = ofs; 805 806 /* write out the whole node */ 807 vec.iov_base = (void *)data; 808 vec.iov_len = totlen; 809 mutex_enter(&chmp->chm_lock_sizes); 810 ret = chfs_write_wbuf(chmp, &vec, 1, ofs, &retlen); 811 812 if (ret || retlen != totlen) { 813 /* error while writing */ 814 chfs_err("error while writing out to the media\n"); 815 chfs_err("err: %d | size: %zu | retlen : %zu\n", 816 ret, totlen, retlen); 817 818 chfs_change_size_dirty(chmp, chmp->chm_nextblock, totlen); 819 if (retries) { 820 mutex_exit(&chmp->chm_lock_sizes); 821 ret = EIO; 822 goto err_out; 823 } 824 825 /* try again */ 826 retries++; 827 mutex_exit(&chmp->chm_lock_sizes); 828 goto retry; 829 } 830 831 /* update vnode information */ 832 mutex_exit(&chmp->chm_lock_sizes); 833 //TODO should we set free_size? 834 mutex_enter(&chmp->chm_lock_vnocache); 835 chfs_add_vnode_ref_to_vc(chmp, chvc, newnref); 836 mutex_exit(&chmp->chm_lock_vnocache); 837 ret = 0; 838 /* FALLTHROUGH */ 839 err_out: 840 kmem_free(data, totlen); 841 return ret; 842 } 843 844 845 /* chfs_gcollect_live - collects a living node */ 846 int 847 chfs_gcollect_live(struct chfs_mount *chmp, 848 struct chfs_eraseblock *cheb, struct chfs_node_ref *nref, 849 struct chfs_inode *ip) 850 { 851 struct chfs_node_frag *frag; 852 struct chfs_full_dnode *fn = NULL; 853 int start = 0, end = 0, nrfrags = 0; 854 struct chfs_dirent *fd = NULL; 855 int ret = 0; 856 bool is_dirent; 857 858 dbg_gc("gcollect_live\n"); 859 860 if (chmp->chm_gcblock != cheb) { 861 dbg_gc("GC block is no longer gcblock. Restart.\n"); 862 goto upnout; 863 } 864 865 if (CHFS_REF_OBSOLETE(nref)) { 866 dbg_gc("node to be GC'd was obsoleted in the meantime.\n"); 867 goto upnout; 868 } 869 870 /* It's a vnode? */ 871 if (ip->chvc->v == nref) { 872 chfs_gcollect_vnode(chmp, ip); 873 goto upnout; 874 } 875 876 /* Find data node. */ 877 dbg_gc("find full dnode\n"); 878 for(frag = frag_first(&ip->fragtree); 879 frag; frag = frag_next(&ip->fragtree, frag)) { 880 if (frag->node && frag->node->nref == nref) { 881 fn = frag->node; 882 end = frag->ofs + frag->size; 883 if (!nrfrags++) 884 start = frag->ofs; 885 if (nrfrags == frag->node->frags) 886 break; 887 } 888 } 889 890 /* It's a pristine node, or dnode (or hole? XXX have we hole nodes?) */ 891 if (fn) { 892 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) { 893 ret = chfs_gcollect_pristine(chmp, 894 cheb, ip->chvc, nref); 895 if (!ret) { 896 frag->node->nref = ip->chvc->v; 897 } 898 if (ret != EBADF) 899 goto upnout; 900 } 901 ret = chfs_gcollect_dnode(chmp, cheb, ip, fn, start, end); 902 goto upnout; 903 } 904 905 /* Is it a dirent? */ 906 dbg_gc("find full dirent\n"); 907 is_dirent = false; 908 TAILQ_FOREACH(fd, &ip->dents, fds) { 909 if (fd->nref == nref) { 910 is_dirent = true; 911 break; 912 } 913 } 914 915 if (is_dirent && fd->vno) { 916 /* Living dirent. */ 917 ret = chfs_gcollect_dirent(chmp, cheb, ip, fd); 918 } else if (is_dirent) { 919 /* Already deleted dirent. */ 920 ret = chfs_gcollect_deletion_dirent(chmp, cheb, ip, fd); 921 } else { 922 dbg_gc("Nref at leb #%u offset 0x%08x wasn't in node list" 923 " for ino #%llu\n", 924 nref->nref_lnr, CHFS_GET_OFS(nref->nref_offset), 925 (unsigned long long)ip->ino); 926 if (CHFS_REF_OBSOLETE(nref)) { 927 dbg_gc("But it's obsolete so we don't mind" 928 " too much.\n"); 929 } 930 } 931 932 upnout: 933 return ret; 934 } 935 936 /* chfs_gcollect_vnode - collects a vnode information node */ 937 int 938 chfs_gcollect_vnode(struct chfs_mount *chmp, struct chfs_inode *ip) 939 { 940 int ret; 941 dbg_gc("gcollect_vnode\n"); 942 943 /* Simply write the new vnode information to the flash 944 * with GC's space allocation */ 945 ret = chfs_write_flash_vnode(chmp, ip, ALLOC_GC); 946 947 return ret; 948 } 949 950 /* chfs_gcollect_dirent - collects a dirent */ 951 int 952 chfs_gcollect_dirent(struct chfs_mount *chmp, 953 struct chfs_eraseblock *cheb, struct chfs_inode *parent, 954 struct chfs_dirent *fd) 955 { 956 struct vnode *vnode = NULL; 957 struct chfs_inode *ip; 958 dbg_gc("gcollect_dirent\n"); 959 960 /* Find vnode. */ 961 vnode = chfs_vnode_lookup(chmp, fd->vno); 962 963 /* XXX maybe KASSERT or panic on this? */ 964 if (vnode == NULL) { 965 return ENOENT; 966 } 967 968 ip = VTOI(vnode); 969 vrele(vnode); 970 971 /* Remove and obsolete the previous version. */ 972 mutex_enter(&chmp->chm_lock_vnocache); 973 chfs_remove_and_obsolete(chmp, parent->chvc, fd->nref, 974 &parent->chvc->dirents); 975 mutex_exit(&chmp->chm_lock_vnocache); 976 977 /* Write the new dirent to the flash. */ 978 return chfs_write_flash_dirent(chmp, 979 parent, ip, fd, fd->vno, ALLOC_GC); 980 } 981 982 /* 983 * chfs_gcollect_deletion_dirent - 984 * collects a dirent what was marked as deleted 985 */ 986 int 987 chfs_gcollect_deletion_dirent(struct chfs_mount *chmp, 988 struct chfs_eraseblock *cheb, struct chfs_inode *parent, 989 struct chfs_dirent *fd) 990 { 991 struct chfs_flash_dirent_node chfdn; 992 struct chfs_node_ref *nref; 993 size_t retlen, name_len, nref_len; 994 uint32_t name_crc; 995 996 int ret; 997 998 dbg_gc("gcollect_deletion_dirent\n"); 999 1000 /* Check node. */ 1001 name_len = strlen(fd->name); 1002 name_crc = crc32(0, fd->name, name_len); 1003 1004 nref_len = chfs_nref_len(chmp, cheb, fd->nref); 1005 1006 /* XXX This was a noop (void)chfs_vnode_lookup(chmp, fd->vno); */ 1007 1008 /* Find it in parent dirents. */ 1009 for (nref = parent->chvc->dirents; 1010 nref != (void*)parent->chvc; 1011 nref = nref->nref_next) { 1012 1013 if (!CHFS_REF_OBSOLETE(nref)) 1014 continue; 1015 1016 /* if node refs have different length, skip */ 1017 if (chfs_nref_len(chmp, NULL, nref) != nref_len) 1018 continue; 1019 1020 if (CHFS_GET_OFS(nref->nref_offset) == 1021 CHFS_GET_OFS(fd->nref->nref_offset)) { 1022 continue; 1023 } 1024 1025 /* read it from flash */ 1026 ret = chfs_read_leb(chmp, 1027 nref->nref_lnr, (void*)&chfdn, CHFS_GET_OFS(nref->nref_offset), 1028 nref_len, &retlen); 1029 1030 if (ret) { 1031 dbg_gc("Read error: %d\n", ret); 1032 continue; 1033 } 1034 1035 if (retlen != nref_len) { 1036 dbg_gc("Error reading node:" 1037 " read: %zu instead of: %zu\n", retlen, nref_len); 1038 continue; 1039 } 1040 1041 /* if node type doesn't match, skip */ 1042 if (le16toh(chfdn.type) != CHFS_NODETYPE_DIRENT) 1043 continue; 1044 1045 /* if crc doesn't match, skip */ 1046 if (le32toh(chfdn.name_crc) != name_crc) 1047 continue; 1048 1049 /* if length of name different, or this is an another deletion 1050 * dirent, skip 1051 */ 1052 if (chfdn.nsize != name_len || !le64toh(chfdn.vno)) 1053 continue; 1054 1055 /* check actual name */ 1056 if (memcmp(chfdn.name, fd->name, name_len)) 1057 continue; 1058 1059 mutex_enter(&chmp->chm_lock_vnocache); 1060 chfs_remove_and_obsolete(chmp, parent->chvc, fd->nref, 1061 &parent->chvc->dirents); 1062 mutex_exit(&chmp->chm_lock_vnocache); 1063 return chfs_write_flash_dirent(chmp, 1064 parent, NULL, fd, fd->vno, ALLOC_GC); 1065 } 1066 1067 /* Simply remove it from the parent dirents. */ 1068 TAILQ_REMOVE(&parent->dents, fd, fds); 1069 chfs_free_dirent(fd); 1070 return 0; 1071 } 1072 1073 /* chfs_gcollect_dnode - */ 1074 int 1075 chfs_gcollect_dnode(struct chfs_mount *chmp, 1076 struct chfs_eraseblock *orig_cheb, struct chfs_inode *ip, 1077 struct chfs_full_dnode *fn, uint32_t orig_start, uint32_t orig_end) 1078 { 1079 struct chfs_node_ref *nref; 1080 struct chfs_full_dnode *newfn; 1081 struct chfs_flash_data_node *fdnode; 1082 int ret = 0, retries = 0; 1083 uint32_t totlen; 1084 char *data = NULL; 1085 struct iovec vec; 1086 size_t retlen; 1087 dbg_gc("gcollect_dnode\n"); 1088 1089 //TODO merge frags 1090 1091 KASSERT(orig_cheb->lnr == fn->nref->nref_lnr); 1092 totlen = chfs_nref_len(chmp, orig_cheb, fn->nref); 1093 data = kmem_alloc(totlen, KM_SLEEP); 1094 1095 /* Read the node from the flash. */ 1096 ret = chfs_read_leb(chmp, fn->nref->nref_lnr, data, fn->nref->nref_offset, 1097 totlen, &retlen); 1098 1099 fdnode = (struct chfs_flash_data_node *)data; 1100 fdnode->version = htole64(++ip->chvc->highest_version); 1101 fdnode->node_crc = htole32(crc32(0, (uint8_t *)fdnode, 1102 sizeof(*fdnode) - 4)); 1103 1104 vec.iov_base = (void *)data; 1105 vec.iov_len = totlen; 1106 1107 retry: 1108 /* Set the next block where we can write. */ 1109 ret = chfs_reserve_space_gc(chmp, totlen); 1110 if (ret) 1111 goto out; 1112 1113 nref = chfs_alloc_node_ref(chmp->chm_nextblock); 1114 if (!nref) { 1115 ret = ENOMEM; 1116 goto out; 1117 } 1118 1119 mutex_enter(&chmp->chm_lock_sizes); 1120 1121 nref->nref_offset = chmp->chm_ebh->eb_size - chmp->chm_nextblock->free_size; 1122 KASSERT(nref->nref_offset % 4 == 0); 1123 chfs_change_size_free(chmp, chmp->chm_nextblock, -totlen); 1124 1125 /* Write it to the writebuffer. */ 1126 ret = chfs_write_wbuf(chmp, &vec, 1, nref->nref_offset, &retlen); 1127 if (ret || retlen != totlen) { 1128 /* error during writing */ 1129 chfs_err("error while writing out to the media\n"); 1130 chfs_err("err: %d | size: %d | retlen : %zu\n", 1131 ret, totlen, retlen); 1132 chfs_change_size_dirty(chmp, chmp->chm_nextblock, totlen); 1133 if (retries) { 1134 ret = EIO; 1135 mutex_exit(&chmp->chm_lock_sizes); 1136 goto out; 1137 } 1138 1139 /* try again */ 1140 retries++; 1141 mutex_exit(&chmp->chm_lock_sizes); 1142 goto retry; 1143 } 1144 1145 dbg_gc("new nref lnr: %u - offset: %u\n", nref->nref_lnr, nref->nref_offset); 1146 1147 chfs_change_size_used(chmp, &chmp->chm_blocks[nref->nref_lnr], totlen); 1148 mutex_exit(&chmp->chm_lock_sizes); 1149 KASSERT(chmp->chm_blocks[nref->nref_lnr].used_size <= chmp->chm_ebh->eb_size); 1150 1151 /* Set fields of the new node. */ 1152 newfn = chfs_alloc_full_dnode(); 1153 newfn->nref = nref; 1154 newfn->ofs = fn->ofs; 1155 newfn->size = fn->size; 1156 newfn->frags = 0; 1157 1158 mutex_enter(&chmp->chm_lock_vnocache); 1159 /* Remove every part of the old node. */ 1160 chfs_remove_frags_of_node(chmp, &ip->fragtree, fn->nref); 1161 chfs_remove_and_obsolete(chmp, ip->chvc, fn->nref, &ip->chvc->dnode); 1162 1163 /* Add the new nref to inode. */ 1164 chfs_add_full_dnode_to_inode(chmp, ip, newfn); 1165 chfs_add_node_to_list(chmp, 1166 ip->chvc, newfn->nref, &ip->chvc->dnode); 1167 mutex_exit(&chmp->chm_lock_vnocache); 1168 1169 out: 1170 kmem_free(data, totlen); 1171 return ret; 1172 } 1173