1 /* $NetBSD: vfs_dirhash.c,v 1.17 2025/10/31 10:35:52 reinoud Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2025 Reinoud Zandijk 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: vfs_dirhash.c,v 1.17 2025/10/31 10:35:52 reinoud Exp $"); 31 32 /* CLEAN UP! */ 33 #include <sys/param.h> 34 #include <sys/types.h> 35 36 #include <sys/buf.h> 37 #include <sys/dirent.h> 38 #include <sys/dirhash.h> 39 #include <sys/hash.h> 40 #include <sys/kernel.h> 41 #include <sys/mutex.h> 42 #include <sys/pool.h> 43 #include <sys/queue.h> 44 #include <sys/sysctl.h> 45 #include <sys/vnode.h> 46 47 #if 1 48 # define DPRINTF(a) __nothing 49 #else 50 # define DPRINTF(a) printf a 51 #endif 52 53 /* 54 * defaults for dirhash cache sizes: 55 * - use up to 1/64th of system memory. 56 * - cap maximum size of the default dirhash cache at 32mb. 57 */ 58 #define DIRHASH_DEFAULT_DIVIDER 64 59 #define MIN_DEFAULT_DIRHASH_MEM (1024*1024) 60 #define MAX_DEFAULT_DIRHASH_MEM (32*1024*1024) 61 #define DIRHASH_PURGE_BLOCK (16*1024) 62 63 /* 64 * The locking protocol of the dirhash structures is fairly simple: 65 * 66 * The global dirhash_queue is protected by the dirhashmutex. This lock is 67 * internal only and is FS/mountpoint/vnode independent. On exit of the 68 * exported functions this mutex is not held. 69 * 70 * The dirhash structure is considered part of the vnode/inode structure and 71 * will thus use the lock that protects that vnode/inode. 72 * 73 * The dirhash entries are considered part of the dirhash structure and thus 74 * are on the same lock. 75 */ 76 77 static struct sysctllog *sysctl_log; 78 static struct pool dirhash_pool; 79 static struct pool dirhash_entry_pool; 80 81 static kmutex_t dirhashmutex; 82 static uint32_t maxdirhashsize = MIN_DEFAULT_DIRHASH_MEM; 83 static uint32_t dirhashsize = 0; 84 static TAILQ_HEAD(_dirhash, dirhash) dirhash_queue; 85 86 87 void 88 dirhash_init(void) 89 { 90 const struct sysctlnode *rnode, *cnode; 91 size_t sz; 92 uint64_t physmem_bytes; 93 uint32_t max_entries; 94 95 /* initialise dirhash queue */ 96 TAILQ_INIT(&dirhash_queue); 97 98 /* init dirhash pools */ 99 sz = sizeof(struct dirhash); 100 pool_init(&dirhash_pool, sz, 0, 0, 0, 101 "dirhpl", NULL, IPL_NONE); 102 103 sz = sizeof(struct dirhash_entry); 104 pool_init(&dirhash_entry_pool, sz, 0, 0, 0, 105 "dirhepl", NULL, IPL_NONE); 106 107 /* calculate the default maximum dirhashsize */ 108 physmem_bytes = ctob((uint64_t) physmem); 109 maxdirhashsize = physmem_bytes / DIRHASH_DEFAULT_DIVIDER; 110 if (maxdirhashsize < MIN_DEFAULT_DIRHASH_MEM) 111 maxdirhashsize = MIN_DEFAULT_DIRHASH_MEM; 112 if (maxdirhashsize > MAX_DEFAULT_DIRHASH_MEM) 113 maxdirhashsize = MAX_DEFAULT_DIRHASH_MEM; 114 115 mutex_init(&dirhashmutex, MUTEX_DEFAULT, IPL_NONE); 116 max_entries = maxdirhashsize / sz; 117 pool_sethiwat(&dirhash_entry_pool, max_entries); 118 dirhashsize = 0; 119 120 /* create sysctl knobs and dials */ 121 sysctl_log = NULL; 122 sysctl_createv(&sysctl_log, 0, NULL, &rnode, 123 CTLFLAG_PERMANENT, 124 CTLTYPE_NODE, "dirhash", NULL, 125 NULL, 0, NULL, 0, 126 CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL); 127 sysctl_createv(&sysctl_log, 0, &rnode, &cnode, 128 CTLFLAG_PERMANENT, 129 CTLTYPE_INT, "memused", 130 SYSCTL_DESCR("current dirhash memory usage"), 131 NULL, 0, &dirhashsize, 0, 132 CTL_CREATE, CTL_EOL); 133 sysctl_createv(&sysctl_log, 0, &rnode, &cnode, 134 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 135 CTLTYPE_INT, "maxmem", 136 SYSCTL_DESCR("maximum dirhash memory usage"), 137 NULL, 0, &maxdirhashsize, 0, 138 CTL_CREATE, CTL_EOL); 139 } 140 141 142 #if 0 143 void 144 dirhash_finish(void) 145 { 146 pool_destroy(&dirhash_pool); 147 pool_destroy(&dirhash_entry_pool); 148 149 mutex_destroy(&dirhashmutex); 150 151 /* sysctl_teardown(&sysctl_log); */ 152 } 153 #endif 154 155 /* 156 * generic dirhash implementation 157 */ 158 159 void 160 dirhash_purge_entries(struct dirhash *dirh) 161 { 162 struct dirhash_entry *dirh_e; 163 uint32_t hashline; 164 165 if (dirh == NULL) 166 return; 167 168 if (dirh->size == 0) 169 return; 170 171 for (hashline = 0; hashline < DIRHASH_HASHSIZE; hashline++) { 172 while ((dirh_e = LIST_FIRST(&dirh->entries[hashline])) 173 != NULL) { 174 LIST_REMOVE(dirh_e, next); 175 pool_put(&dirhash_entry_pool, dirh_e); 176 } 177 } 178 179 while ((dirh_e = LIST_FIRST(&dirh->free_entries)) != NULL) { 180 LIST_REMOVE(dirh_e, next); 181 pool_put(&dirhash_entry_pool, dirh_e); 182 } 183 184 dirh->flags &= ~DIRH_COMPLETE; 185 dirh->flags |= DIRH_PURGED; 186 dirh->num_files = 0; 187 188 dirhashsize -= dirh->size; 189 dirh->size = 0; 190 } 191 192 void 193 dirhash_purge(struct dirhash **dirhp) 194 { 195 struct dirhash *dirh = *dirhp; 196 197 if (dirh == NULL) 198 return; 199 200 /* purge its entries */ 201 dirhash_purge_entries(dirh); 202 203 /* recycle */ 204 mutex_enter(&dirhashmutex); 205 TAILQ_REMOVE(&dirhash_queue, dirh, next); 206 mutex_exit(&dirhashmutex); 207 208 pool_put(&dirhash_pool, dirh); 209 *dirhp = NULL; 210 } 211 212 void 213 dirhash_get(struct dirhash **dirhp) 214 { 215 struct dirhash *dirh; 216 uint32_t hashline; 217 218 /* protect against concurrent VOP_LOOKUP calls */ 219 mutex_enter(&dirhashmutex); 220 221 /* if no dirhash was given, allocate one */ 222 dirh = *dirhp; 223 if (dirh == NULL) { 224 dirh = pool_get(&dirhash_pool, PR_WAITOK | PR_ZERO); 225 for (hashline = 0; hashline < DIRHASH_HASHSIZE; hashline++) { 226 LIST_INIT(&dirh->entries[hashline]); 227 } 228 } 229 230 /* implement LRU on the dirhash queue */ 231 if (*dirhp) { 232 /* remove from queue to be requeued */ 233 TAILQ_REMOVE(&dirhash_queue, dirh, next); 234 } 235 dirh->refcnt++; 236 TAILQ_INSERT_HEAD(&dirhash_queue, dirh, next); 237 mutex_exit(&dirhashmutex); 238 239 *dirhp = dirh; 240 } 241 242 void 243 dirhash_put(struct dirhash *dirh) 244 { 245 246 mutex_enter(&dirhashmutex); 247 dirh->refcnt--; 248 mutex_exit(&dirhashmutex); 249 } 250 251 void 252 dirhash_enter(struct dirhash *dirh, 253 struct dirent *dirent, uint64_t offset, uint32_t entry_size, int new_p) 254 { 255 struct dirhash *p_dirh; 256 struct dirhash_entry *dirh_e; 257 uint32_t hashvalue, hashline; 258 int entrysize; 259 260 /* make sure we have a dirhash to work on */ 261 KASSERT(dirh); 262 KASSERT(dirh->refcnt > 0); 263 264 /* are we trying to re-enter an entry? */ 265 if (!new_p && (dirh->flags & DIRH_COMPLETE)) 266 return; 267 268 /* calculate our hash */ 269 hashvalue = hash32_strn(dirent->d_name, dirent->d_namlen, 270 HASH32_STR_INIT); 271 hashline = hashvalue & DIRHASH_HASHMASK; 272 273 /* lookup and insert entry if not there yet */ 274 LIST_FOREACH(dirh_e, &dirh->entries[hashline], next) { 275 /* check for hash collision */ 276 if (dirh_e->hashvalue != hashvalue) 277 continue; 278 if (dirh_e->offset != offset) 279 continue; 280 /* got it already */ 281 KASSERT(dirh_e->d_namlen == dirent->d_namlen); 282 KASSERT(dirh_e->entry_size == entry_size); 283 return; 284 } 285 286 DPRINTF(("dirhash enter %"PRIu64", %d, %d for `%*.*s`\n", 287 offset, entry_size, dirent->d_namlen, 288 dirent->d_namlen, dirent->d_namlen, dirent->d_name)); 289 290 /* check if entry is in free space list */ 291 LIST_FOREACH(dirh_e, &dirh->free_entries, next) { 292 if (dirh_e->offset == offset) { 293 DPRINTF(("\tremoving free entry\n")); 294 LIST_REMOVE(dirh_e, next); 295 pool_put(&dirhash_entry_pool, dirh_e); 296 break; 297 } 298 } 299 300 /* ensure we are not passing the dirhash limit */ 301 entrysize = sizeof(struct dirhash_entry); 302 if (dirhashsize + entrysize > maxdirhashsize) { 303 /* we walk the dirhash_queue, so need to lock it */ 304 mutex_enter(&dirhashmutex); 305 TAILQ_FOREACH_REVERSE(p_dirh, &dirhash_queue, _dirhash, next) { 306 /* satisfied? */ 307 if (dirhashsize + entrysize < 308 maxdirhashsize - DIRHASH_PURGE_BLOCK) 309 break; 310 if (p_dirh->refcnt) 311 continue; 312 313 /* this can't be ourselves */ 314 KASSERT(p_dirh != dirh); 315 dirhash_purge_entries(p_dirh); 316 } 317 /* if not enough is cleaned, we just give up */ 318 mutex_exit(&dirhashmutex); 319 } 320 321 /* add to the hashline */ 322 dirh_e = pool_get(&dirhash_entry_pool, PR_WAITOK | PR_ZERO); 323 324 dirh_e->hashvalue = hashvalue; 325 dirh_e->offset = offset; 326 dirh_e->d_namlen = dirent->d_namlen; 327 dirh_e->entry_size = entry_size; 328 329 dirh->size += sizeof(struct dirhash_entry); 330 dirh->num_files++; 331 dirhashsize += sizeof(struct dirhash_entry); 332 LIST_INSERT_HEAD(&dirh->entries[hashline], dirh_e, next); 333 } 334 335 void 336 dirhash_enter_freed(struct dirhash *dirh, uint64_t offset, uint32_t entry_size) 337 { 338 struct dirhash_entry *dirh_e; 339 340 /* make sure we have a dirhash to work on */ 341 KASSERT(dirh); 342 KASSERT(dirh->refcnt > 0); 343 344 /* check for double entry of free space */ 345 LIST_FOREACH(dirh_e, &dirh->free_entries, next) { 346 KASSERT(dirh_e->offset != offset); 347 } 348 349 DPRINTF(("dirhash enter FREED %"PRIu64", %d\n", 350 offset, entry_size)); 351 dirh_e = pool_get(&dirhash_entry_pool, PR_WAITOK | PR_ZERO); 352 353 dirh_e->hashvalue = 0; /* not relevant */ 354 dirh_e->offset = offset; 355 dirh_e->d_namlen = 0; /* not relevant */ 356 dirh_e->entry_size = entry_size; 357 358 /* XXX it might be preferable to append them at the tail */ 359 LIST_INSERT_HEAD(&dirh->free_entries, dirh_e, next); 360 dirh->size += sizeof(struct dirhash_entry); 361 dirhashsize += sizeof(struct dirhash_entry); 362 } 363 364 void 365 dirhash_remove(struct dirhash *dirh, struct dirent *dirent, 366 uint64_t offset, uint32_t entry_size) 367 { 368 struct dirhash_entry *dirh_e; 369 uint32_t hashvalue, hashline; 370 371 DPRINTF(("dirhash remove %"PRIu64", %d for `%*.*s`\n", 372 offset, entry_size, 373 dirent->d_namlen, dirent->d_namlen, dirent->d_name)); 374 375 /* make sure we have a dirhash to work on */ 376 KASSERT(dirh); 377 KASSERT(dirh->refcnt > 0); 378 379 /* calculate our hash */ 380 hashvalue = hash32_strn(dirent->d_name, dirent->d_namlen, 381 HASH32_STR_INIT); 382 hashline = hashvalue & DIRHASH_HASHMASK; 383 384 /* lookup entry */ 385 LIST_FOREACH(dirh_e, &dirh->entries[hashline], next) { 386 /* check for hash collision */ 387 if (dirh_e->hashvalue != hashvalue) 388 continue; 389 if (dirh_e->offset != offset) 390 continue; 391 392 /* got it! */ 393 KASSERT(dirh_e->d_namlen == dirent->d_namlen); 394 KASSERT(dirh_e->entry_size == entry_size); 395 LIST_REMOVE(dirh_e, next); 396 dirh->size -= sizeof(struct dirhash_entry); 397 KASSERT(dirh->num_files > 0); 398 dirh->num_files--; 399 dirhashsize -= sizeof(struct dirhash_entry); 400 401 dirhash_enter_freed(dirh, offset, entry_size); 402 return; 403 } 404 405 /* not found! */ 406 panic("dirhash_remove couldn't find entry in hash table\n"); 407 } 408 409 /* 410 * BUGALERT: don't use result longer than needed, never past the node lock. 411 * Call with NULL *result initially and it will return nonzero if again. 412 */ 413 int 414 dirhash_lookup(struct dirhash *dirh, const char *d_name, int d_namlen, 415 struct dirhash_entry **result) 416 { 417 struct dirhash_entry *dirh_e; 418 uint32_t hashvalue, hashline; 419 420 /* make sure we have a dirhash to work on */ 421 KASSERT(dirh); 422 KASSERT(dirh->refcnt > 0); 423 424 /* start where we were */ 425 if (*result) { 426 dirh_e = *result; 427 428 /* retrieve information to avoid recalculation and advance */ 429 hashvalue = dirh_e->hashvalue; 430 dirh_e = LIST_NEXT(*result, next); 431 } else { 432 /* calculate our hash and lookup all entries in hashline */ 433 hashvalue = hash32_strn(d_name, d_namlen, HASH32_STR_INIT); 434 hashline = hashvalue & DIRHASH_HASHMASK; 435 dirh_e = LIST_FIRST(&dirh->entries[hashline]); 436 } 437 438 for (; dirh_e; dirh_e = LIST_NEXT(dirh_e, next)) { 439 /* check for hash collision */ 440 if (dirh_e->hashvalue != hashvalue) 441 continue; 442 if (dirh_e->d_namlen != d_namlen) 443 continue; 444 /* might have an entry in the cache */ 445 *result = dirh_e; 446 return 1; 447 } 448 449 *result = NULL; 450 return 0; 451 } 452 453 /* 454 * BUGALERT: don't use result longer than needed, never past the node lock. 455 * Call with NULL *result initially and it will return nonzero if again. 456 */ 457 int 458 dirhash_lookup_freed(struct dirhash *dirh, uint32_t min_entrysize, 459 struct dirhash_entry **result) 460 { 461 struct dirhash_entry *dirh_e; 462 463 /* make sure we have a dirhash to work on */ 464 KASSERT(dirh); 465 KASSERT(dirh->refcnt > 0); 466 467 /* start where we were */ 468 if (*result) { 469 dirh_e = LIST_NEXT(*result, next); 470 } else { 471 /* lookup all entries that match */ 472 dirh_e = LIST_FIRST(&dirh->free_entries); 473 } 474 475 for (; dirh_e; dirh_e = LIST_NEXT(dirh_e, next)) { 476 /* check for minimum size */ 477 if (dirh_e->entry_size < min_entrysize) 478 continue; 479 /* might be a candidate */ 480 *result = dirh_e; 481 return 1; 482 } 483 484 *result = NULL; 485 return 0; 486 } 487 488 bool 489 dirhash_dir_isempty(struct dirhash *dirh) 490 { 491 #ifdef DEBUG 492 struct dirhash_entry *dirh_e; 493 int hashline, num; 494 495 num = 0; 496 for (hashline = 0; hashline < DIRHASH_HASHSIZE; hashline++) { 497 LIST_FOREACH(dirh_e, &dirh->entries[hashline], next) { 498 num++; 499 } 500 } 501 502 if (dirh->num_files != num) { 503 printf("dirhash_dir_isempy: dirhash_counter failed: " 504 "dirh->num_files = %d, counted %d\n", 505 dirh->num_files, num); 506 assert(dirh->num_files == num); 507 } 508 #endif 509 /* assert the directory hash info is valid */ 510 KASSERT(dirh->flags & DIRH_COMPLETE); 511 512 /* the directory is empty when only '..' lifes in it or is absent */ 513 return (dirh->num_files <= 1); 514 } 515