Home | History | Annotate | Line # | Download | only in kern
      1 /*	$NetBSD: vfs_dirhash.c,v 1.17 2025/10/31 10:35:52 reinoud Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008, 2025 Reinoud Zandijk
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  *
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(0, "$NetBSD: vfs_dirhash.c,v 1.17 2025/10/31 10:35:52 reinoud Exp $");
     31 
     32 /* CLEAN UP! */
     33 #include <sys/param.h>
     34 #include <sys/types.h>
     35 
     36 #include <sys/buf.h>
     37 #include <sys/dirent.h>
     38 #include <sys/dirhash.h>
     39 #include <sys/hash.h>
     40 #include <sys/kernel.h>
     41 #include <sys/mutex.h>
     42 #include <sys/pool.h>
     43 #include <sys/queue.h>
     44 #include <sys/sysctl.h>
     45 #include <sys/vnode.h>
     46 
     47 #if 1
     48 #	define DPRINTF(a) __nothing
     49 #else
     50 #	define DPRINTF(a) printf a
     51 #endif
     52 
     53 /*
     54  * defaults for dirhash cache sizes:
     55  *  - use up to 1/64th of system memory.
     56  *  - cap maximum size of the default dirhash cache at 32mb.
     57  */
     58 #define	DIRHASH_DEFAULT_DIVIDER	64
     59 #define MIN_DEFAULT_DIRHASH_MEM	(1024*1024)
     60 #define MAX_DEFAULT_DIRHASH_MEM (32*1024*1024)
     61 #define DIRHASH_PURGE_BLOCK	(16*1024)
     62 
     63 /*
     64  * The locking protocol of the dirhash structures is fairly simple:
     65  *
     66  * The global dirhash_queue is protected by the dirhashmutex. This lock is
     67  * internal only and is FS/mountpoint/vnode independent. On exit of the
     68  * exported functions this mutex is not held.
     69  *
     70  * The dirhash structure is considered part of the vnode/inode structure and
     71  * will thus use the lock that protects that vnode/inode.
     72  *
     73  * The dirhash entries are considered part of the dirhash structure and thus
     74  * are on the same lock.
     75  */
     76 
     77 static struct sysctllog *sysctl_log;
     78 static struct pool dirhash_pool;
     79 static struct pool dirhash_entry_pool;
     80 
     81 static kmutex_t dirhashmutex;
     82 static uint32_t maxdirhashsize = MIN_DEFAULT_DIRHASH_MEM;
     83 static uint32_t dirhashsize    = 0;
     84 static TAILQ_HEAD(_dirhash, dirhash) dirhash_queue;
     85 
     86 
     87 void
     88 dirhash_init(void)
     89 {
     90 	const struct sysctlnode *rnode, *cnode;
     91 	size_t sz;
     92 	uint64_t physmem_bytes;
     93 	uint32_t max_entries;
     94 
     95 	/* initialise dirhash queue */
     96 	TAILQ_INIT(&dirhash_queue);
     97 
     98 	/* init dirhash pools */
     99 	sz = sizeof(struct dirhash);
    100 	pool_init(&dirhash_pool, sz, 0, 0, 0,
    101 	    "dirhpl", NULL, IPL_NONE);
    102 
    103 	sz = sizeof(struct dirhash_entry);
    104 	pool_init(&dirhash_entry_pool, sz, 0, 0, 0,
    105 	    "dirhepl", NULL, IPL_NONE);
    106 
    107 	/* calculate the default maximum dirhashsize */
    108 	physmem_bytes = ctob((uint64_t) physmem);
    109 	maxdirhashsize = physmem_bytes / DIRHASH_DEFAULT_DIVIDER;
    110 	if (maxdirhashsize < MIN_DEFAULT_DIRHASH_MEM)
    111 		maxdirhashsize = MIN_DEFAULT_DIRHASH_MEM;
    112 	if (maxdirhashsize > MAX_DEFAULT_DIRHASH_MEM)
    113 		maxdirhashsize = MAX_DEFAULT_DIRHASH_MEM;
    114 
    115 	mutex_init(&dirhashmutex, MUTEX_DEFAULT, IPL_NONE);
    116 	max_entries = maxdirhashsize / sz;
    117 	pool_sethiwat(&dirhash_entry_pool, max_entries);
    118 	dirhashsize = 0;
    119 
    120 	/* create sysctl knobs and dials */
    121 	sysctl_log = NULL;
    122 	sysctl_createv(&sysctl_log, 0, NULL, &rnode,
    123 	    CTLFLAG_PERMANENT,
    124 	    CTLTYPE_NODE, "dirhash", NULL,
    125 	    NULL, 0, NULL, 0,
    126 	    CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
    127 	sysctl_createv(&sysctl_log, 0, &rnode, &cnode,
    128 	    CTLFLAG_PERMANENT,
    129 	    CTLTYPE_INT, "memused",
    130 	    SYSCTL_DESCR("current dirhash memory usage"),
    131 	    NULL, 0, &dirhashsize, 0,
    132 	    CTL_CREATE, CTL_EOL);
    133 	sysctl_createv(&sysctl_log, 0, &rnode, &cnode,
    134 	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
    135 	    CTLTYPE_INT, "maxmem",
    136 	    SYSCTL_DESCR("maximum dirhash memory usage"),
    137 	    NULL, 0, &maxdirhashsize, 0,
    138 	    CTL_CREATE, CTL_EOL);
    139 }
    140 
    141 
    142 #if 0
    143 void
    144 dirhash_finish(void)
    145 {
    146 	pool_destroy(&dirhash_pool);
    147 	pool_destroy(&dirhash_entry_pool);
    148 
    149 	mutex_destroy(&dirhashmutex);
    150 
    151 	/* sysctl_teardown(&sysctl_log); */
    152 }
    153 #endif
    154 
    155 /*
    156  * generic dirhash implementation
    157  */
    158 
    159 void
    160 dirhash_purge_entries(struct dirhash *dirh)
    161 {
    162 	struct dirhash_entry *dirh_e;
    163 	uint32_t hashline;
    164 
    165 	if (dirh == NULL)
    166 		return;
    167 
    168 	if (dirh->size == 0)
    169 		return;
    170 
    171 	for (hashline = 0; hashline < DIRHASH_HASHSIZE; hashline++) {
    172 		while ((dirh_e = LIST_FIRST(&dirh->entries[hashline]))
    173 		    != NULL) {
    174 			LIST_REMOVE(dirh_e, next);
    175 			pool_put(&dirhash_entry_pool, dirh_e);
    176 		}
    177 	}
    178 
    179 	while ((dirh_e = LIST_FIRST(&dirh->free_entries)) != NULL) {
    180 		LIST_REMOVE(dirh_e, next);
    181 		pool_put(&dirhash_entry_pool, dirh_e);
    182 	}
    183 
    184 	dirh->flags &= ~DIRH_COMPLETE;
    185 	dirh->flags |=  DIRH_PURGED;
    186 	dirh->num_files = 0;
    187 
    188 	dirhashsize -= dirh->size;
    189 	dirh->size = 0;
    190 }
    191 
    192 void
    193 dirhash_purge(struct dirhash **dirhp)
    194 {
    195 	struct dirhash *dirh = *dirhp;
    196 
    197 	if (dirh == NULL)
    198 		return;
    199 
    200 	/* purge its entries */
    201 	dirhash_purge_entries(dirh);
    202 
    203 	/* recycle */
    204 	mutex_enter(&dirhashmutex);
    205 	TAILQ_REMOVE(&dirhash_queue, dirh, next);
    206 	mutex_exit(&dirhashmutex);
    207 
    208 	pool_put(&dirhash_pool, dirh);
    209 	*dirhp = NULL;
    210 }
    211 
    212 void
    213 dirhash_get(struct dirhash **dirhp)
    214 {
    215 	struct dirhash *dirh;
    216 	uint32_t hashline;
    217 
    218 	/* protect against concurrent VOP_LOOKUP calls */
    219 	mutex_enter(&dirhashmutex);
    220 
    221 	/* if no dirhash was given, allocate one */
    222 	dirh = *dirhp;
    223 	if (dirh == NULL) {
    224 		dirh = pool_get(&dirhash_pool, PR_WAITOK | PR_ZERO);
    225 		for (hashline = 0; hashline < DIRHASH_HASHSIZE; hashline++) {
    226 			LIST_INIT(&dirh->entries[hashline]);
    227 		}
    228 	}
    229 
    230 	/* implement LRU on the dirhash queue */
    231 	if (*dirhp) {
    232 		/* remove from queue to be requeued */
    233 		TAILQ_REMOVE(&dirhash_queue, dirh, next);
    234 	}
    235 	dirh->refcnt++;
    236 	TAILQ_INSERT_HEAD(&dirhash_queue, dirh, next);
    237 	mutex_exit(&dirhashmutex);
    238 
    239 	*dirhp = dirh;
    240 }
    241 
    242 void
    243 dirhash_put(struct dirhash *dirh)
    244 {
    245 
    246 	mutex_enter(&dirhashmutex);
    247 	dirh->refcnt--;
    248 	mutex_exit(&dirhashmutex);
    249 }
    250 
    251 void
    252 dirhash_enter(struct dirhash *dirh,
    253     struct dirent *dirent, uint64_t offset, uint32_t entry_size, int new_p)
    254 {
    255 	struct dirhash *p_dirh;
    256 	struct dirhash_entry *dirh_e;
    257 	uint32_t hashvalue, hashline;
    258 	int entrysize;
    259 
    260 	/* make sure we have a dirhash to work on */
    261 	KASSERT(dirh);
    262 	KASSERT(dirh->refcnt > 0);
    263 
    264 	/* are we trying to re-enter an entry? */
    265 	if (!new_p && (dirh->flags & DIRH_COMPLETE))
    266 		return;
    267 
    268 	/* calculate our hash */
    269 	hashvalue = hash32_strn(dirent->d_name, dirent->d_namlen,
    270 	    HASH32_STR_INIT);
    271 	hashline  = hashvalue & DIRHASH_HASHMASK;
    272 
    273 	/* lookup and insert entry if not there yet */
    274 	LIST_FOREACH(dirh_e, &dirh->entries[hashline], next) {
    275 		/* check for hash collision */
    276 		if (dirh_e->hashvalue != hashvalue)
    277 			continue;
    278 		if (dirh_e->offset != offset)
    279 			continue;
    280 		/* got it already */
    281 		KASSERT(dirh_e->d_namlen == dirent->d_namlen);
    282 		KASSERT(dirh_e->entry_size == entry_size);
    283 		return;
    284 	}
    285 
    286 	DPRINTF(("dirhash enter %"PRIu64", %d, %d for `%*.*s`\n",
    287 		offset, entry_size, dirent->d_namlen,
    288 		dirent->d_namlen, dirent->d_namlen, dirent->d_name));
    289 
    290 	/* check if entry is in free space list */
    291 	LIST_FOREACH(dirh_e, &dirh->free_entries, next) {
    292 		if (dirh_e->offset == offset) {
    293 			DPRINTF(("\tremoving free entry\n"));
    294 			LIST_REMOVE(dirh_e, next);
    295 			pool_put(&dirhash_entry_pool, dirh_e);
    296 			break;
    297 		}
    298 	}
    299 
    300 	/* ensure we are not passing the dirhash limit */
    301 	entrysize = sizeof(struct dirhash_entry);
    302 	if (dirhashsize + entrysize > maxdirhashsize) {
    303 		/* we walk the dirhash_queue, so need to lock it */
    304 		mutex_enter(&dirhashmutex);
    305 		TAILQ_FOREACH_REVERSE(p_dirh, &dirhash_queue, _dirhash, next) {
    306 			/* satisfied? */
    307 			if (dirhashsize + entrysize <
    308 					maxdirhashsize - DIRHASH_PURGE_BLOCK)
    309 				break;
    310 			if (p_dirh->refcnt)
    311 				continue;
    312 
    313 			/* this can't be ourselves */
    314 			KASSERT(p_dirh != dirh);
    315 			dirhash_purge_entries(p_dirh);
    316 		}
    317 		/* if not enough is cleaned, we just give up */
    318 		mutex_exit(&dirhashmutex);
    319 	}
    320 
    321 	/* add to the hashline */
    322 	dirh_e = pool_get(&dirhash_entry_pool, PR_WAITOK | PR_ZERO);
    323 
    324 	dirh_e->hashvalue = hashvalue;
    325 	dirh_e->offset    = offset;
    326 	dirh_e->d_namlen  = dirent->d_namlen;
    327 	dirh_e->entry_size  = entry_size;
    328 
    329 	dirh->size  += sizeof(struct dirhash_entry);
    330 	dirh->num_files++;
    331 	dirhashsize += sizeof(struct dirhash_entry);
    332 	LIST_INSERT_HEAD(&dirh->entries[hashline], dirh_e, next);
    333 }
    334 
    335 void
    336 dirhash_enter_freed(struct dirhash *dirh, uint64_t offset, uint32_t entry_size)
    337 {
    338 	struct dirhash_entry *dirh_e;
    339 
    340 	/* make sure we have a dirhash to work on */
    341 	KASSERT(dirh);
    342 	KASSERT(dirh->refcnt > 0);
    343 
    344 	/* check for double entry of free space */
    345 	LIST_FOREACH(dirh_e, &dirh->free_entries, next) {
    346 		KASSERT(dirh_e->offset != offset);
    347 	}
    348 
    349 	DPRINTF(("dirhash enter FREED %"PRIu64", %d\n",
    350 		offset, entry_size));
    351 	dirh_e = pool_get(&dirhash_entry_pool, PR_WAITOK | PR_ZERO);
    352 
    353 	dirh_e->hashvalue = 0;		/* not relevant */
    354 	dirh_e->offset    = offset;
    355 	dirh_e->d_namlen  = 0;		/* not relevant */
    356 	dirh_e->entry_size  = entry_size;
    357 
    358 	/* XXX it might be preferable to append them at the tail */
    359 	LIST_INSERT_HEAD(&dirh->free_entries, dirh_e, next);
    360 	dirh->size  += sizeof(struct dirhash_entry);
    361 	dirhashsize += sizeof(struct dirhash_entry);
    362 }
    363 
    364 void
    365 dirhash_remove(struct dirhash *dirh, struct dirent *dirent,
    366     uint64_t offset, uint32_t entry_size)
    367 {
    368 	struct dirhash_entry *dirh_e;
    369 	uint32_t hashvalue, hashline;
    370 
    371 	DPRINTF(("dirhash remove %"PRIu64", %d for `%*.*s`\n",
    372 		offset, entry_size,
    373 		dirent->d_namlen, dirent->d_namlen, dirent->d_name));
    374 
    375 	/* make sure we have a dirhash to work on */
    376 	KASSERT(dirh);
    377 	KASSERT(dirh->refcnt > 0);
    378 
    379 	/* calculate our hash */
    380 	hashvalue = hash32_strn(dirent->d_name, dirent->d_namlen,
    381 	    HASH32_STR_INIT);
    382 	hashline  = hashvalue & DIRHASH_HASHMASK;
    383 
    384 	/* lookup entry */
    385 	LIST_FOREACH(dirh_e, &dirh->entries[hashline], next) {
    386 		/* check for hash collision */
    387 		if (dirh_e->hashvalue != hashvalue)
    388 			continue;
    389 		if (dirh_e->offset != offset)
    390 			continue;
    391 
    392 		/* got it! */
    393 		KASSERT(dirh_e->d_namlen == dirent->d_namlen);
    394 		KASSERT(dirh_e->entry_size == entry_size);
    395 		LIST_REMOVE(dirh_e, next);
    396 		dirh->size -= sizeof(struct dirhash_entry);
    397 		KASSERT(dirh->num_files > 0);
    398 		dirh->num_files--;
    399 		dirhashsize -= sizeof(struct dirhash_entry);
    400 
    401 		dirhash_enter_freed(dirh, offset, entry_size);
    402 		return;
    403 	}
    404 
    405 	/* not found! */
    406 	panic("dirhash_remove couldn't find entry in hash table\n");
    407 }
    408 
    409 /*
    410  * BUGALERT: don't use result longer than needed, never past the node lock.
    411  * Call with NULL *result initially and it will return nonzero if again.
    412  */
    413 int
    414 dirhash_lookup(struct dirhash *dirh, const char *d_name, int d_namlen,
    415     struct dirhash_entry **result)
    416 {
    417 	struct dirhash_entry *dirh_e;
    418 	uint32_t hashvalue, hashline;
    419 
    420 	/* make sure we have a dirhash to work on */
    421 	KASSERT(dirh);
    422 	KASSERT(dirh->refcnt > 0);
    423 
    424 	/* start where we were */
    425 	if (*result) {
    426 		dirh_e = *result;
    427 
    428 		/* retrieve information to avoid recalculation and advance */
    429 		hashvalue = dirh_e->hashvalue;
    430 		dirh_e = LIST_NEXT(*result, next);
    431 	} else {
    432 		/* calculate our hash and lookup all entries in hashline */
    433 		hashvalue = hash32_strn(d_name, d_namlen, HASH32_STR_INIT);
    434 		hashline  = hashvalue & DIRHASH_HASHMASK;
    435 		dirh_e = LIST_FIRST(&dirh->entries[hashline]);
    436 	}
    437 
    438 	for (; dirh_e; dirh_e = LIST_NEXT(dirh_e, next)) {
    439 		/* check for hash collision */
    440 		if (dirh_e->hashvalue != hashvalue)
    441 			continue;
    442 		if (dirh_e->d_namlen != d_namlen)
    443 			continue;
    444 		/* might have an entry in the cache */
    445 		*result = dirh_e;
    446 		return 1;
    447 	}
    448 
    449 	*result = NULL;
    450 	return 0;
    451 }
    452 
    453 /*
    454  * BUGALERT: don't use result longer than needed, never past the node lock.
    455  * Call with NULL *result initially and it will return nonzero if again.
    456  */
    457 int
    458 dirhash_lookup_freed(struct dirhash *dirh, uint32_t min_entrysize,
    459     struct dirhash_entry **result)
    460 {
    461 	struct dirhash_entry *dirh_e;
    462 
    463 	/* make sure we have a dirhash to work on */
    464 	KASSERT(dirh);
    465 	KASSERT(dirh->refcnt > 0);
    466 
    467 	/* start where we were */
    468 	if (*result) {
    469 		dirh_e = LIST_NEXT(*result, next);
    470 	} else {
    471 		/* lookup all entries that match */
    472 		dirh_e = LIST_FIRST(&dirh->free_entries);
    473 	}
    474 
    475 	for (; dirh_e; dirh_e = LIST_NEXT(dirh_e, next)) {
    476 		/* check for minimum size */
    477 		if (dirh_e->entry_size < min_entrysize)
    478 			continue;
    479 		/* might be a candidate */
    480 		*result = dirh_e;
    481 		return 1;
    482 	}
    483 
    484 	*result = NULL;
    485 	return 0;
    486 }
    487 
    488 bool
    489 dirhash_dir_isempty(struct dirhash *dirh)
    490 {
    491 #ifdef DEBUG
    492 	struct dirhash_entry *dirh_e;
    493 	int hashline, num;
    494 
    495 	num = 0;
    496 	for (hashline = 0; hashline < DIRHASH_HASHSIZE; hashline++) {
    497 		LIST_FOREACH(dirh_e, &dirh->entries[hashline], next) {
    498 			num++;
    499 		}
    500 	}
    501 
    502 	if (dirh->num_files != num) {
    503 		printf("dirhash_dir_isempy: dirhash_counter failed: "
    504 		    "dirh->num_files = %d, counted %d\n",
    505 		    dirh->num_files, num);
    506 		assert(dirh->num_files == num);
    507 	}
    508 #endif
    509 	/* assert the directory hash info is valid */
    510 	KASSERT(dirh->flags & DIRH_COMPLETE);
    511 
    512 	/* the directory is empty when only '..' lifes in it or is absent */
    513 	return (dirh->num_files <= 1);
    514 }
    515