vfs_cache.c revision 1.76 1 /* $NetBSD: vfs_cache.c,v 1.76 2008/05/05 17:11:17 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94
58 */
59
60 #include <sys/cdefs.h>
61 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.76 2008/05/05 17:11:17 ad Exp $");
62
63 #include "opt_ddb.h"
64 #include "opt_revcache.h"
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/time.h>
69 #include <sys/mount.h>
70 #include <sys/vnode.h>
71 #include <sys/namei.h>
72 #include <sys/errno.h>
73 #include <sys/pool.h>
74 #include <sys/mutex.h>
75 #include <sys/atomic.h>
76 #include <sys/kthread.h>
77 #include <sys/kernel.h>
78 #include <sys/cpu.h>
79 #include <sys/evcnt.h>
80
81 #define NAMECACHE_ENTER_REVERSE
82 /*
83 * Name caching works as follows:
84 *
85 * Names found by directory scans are retained in a cache
86 * for future reference. It is managed LRU, so frequently
87 * used names will hang around. Cache is indexed by hash value
88 * obtained from (dvp, name) where dvp refers to the directory
89 * containing name.
90 *
91 * For simplicity (and economy of storage), names longer than
92 * a maximum length of NCHNAMLEN are not cached; they occur
93 * infrequently in any case, and are almost never of interest.
94 *
95 * Upon reaching the last segment of a path, if the reference
96 * is for DELETE, or NOCACHE is set (rewrite), and the
97 * name is located in the cache, it will be dropped.
98 * The entry is dropped also when it was not possible to lock
99 * the cached vnode, either because vget() failed or the generation
100 * number has changed while waiting for the lock.
101 */
102
103 /*
104 * Structures associated with name cacheing.
105 */
106 LIST_HEAD(nchashhead, namecache) *nchashtbl;
107 u_long nchash; /* size of hash table - 1 */
108 #define NCHASH(cnp, dvp) \
109 (((cnp)->cn_hash ^ ((uintptr_t)(dvp) >> 3)) & nchash)
110
111 LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl;
112 u_long ncvhash; /* size of hash table - 1 */
113 #define NCVHASH(vp) (((uintptr_t)(vp) >> 3) & ncvhash)
114
115 long numcache; /* number of cache entries allocated */
116 static u_int cache_gcpend; /* number of entries pending GC */
117 static void *cache_gcqueue; /* garbage collection queue */
118
119 TAILQ_HEAD(, namecache) nclruhead = /* LRU chain */
120 TAILQ_HEAD_INITIALIZER(nclruhead);
121 #define COUNT(x) nchstats.x++
122 struct nchstats nchstats; /* cache effectiveness statistics */
123
124 static pool_cache_t namecache_cache;
125
126 MALLOC_DEFINE(M_CACHE, "namecache", "Dynamically allocated cache entries");
127
128 int cache_lowat = 95;
129 int cache_hiwat = 98;
130 int cache_hottime = 5; /* number of seconds */
131 int doingcache = 1; /* 1 => enable the cache */
132
133 static struct evcnt cache_ev_scan;
134 static struct evcnt cache_ev_gc;
135 static struct evcnt cache_ev_over;
136 static struct evcnt cache_ev_under;
137 static struct evcnt cache_ev_forced;
138
139 /* A single lock to serialize modifications. */
140 static kmutex_t *namecache_lock;
141
142 static void cache_invalidate(struct namecache *);
143 static inline struct namecache *cache_lookup_entry(
144 const struct vnode *, const struct componentname *);
145 static void cache_thread(void *);
146 static void cache_invalidate(struct namecache *);
147 static void cache_disassociate(struct namecache *);
148 static void cache_reclaim(void);
149 static int cache_ctor(void *, void *, int);
150 static void cache_dtor(void *, void *);
151
152 /*
153 * Invalidate a cache entry and enqueue it for garbage collection.
154 */
155 static void
156 cache_invalidate(struct namecache *ncp)
157 {
158 void *head;
159
160 KASSERT(mutex_owned(&ncp->nc_lock));
161
162 if (ncp->nc_dvp != NULL) {
163 ncp->nc_vp = NULL;
164 ncp->nc_dvp = NULL;
165 do {
166 head = cache_gcqueue;
167 ncp->nc_gcqueue = head;
168 } while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
169 atomic_inc_uint(&cache_gcpend);
170 }
171 }
172
173 /*
174 * Disassociate a namecache entry from any vnodes it is attached to,
175 * and remove from the global LRU list.
176 */
177 static void
178 cache_disassociate(struct namecache *ncp)
179 {
180
181 KASSERT(mutex_owned(namecache_lock));
182 KASSERT(ncp->nc_dvp == NULL);
183
184 if (ncp->nc_lru.tqe_prev != NULL) {
185 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
186 ncp->nc_lru.tqe_prev = NULL;
187 }
188 if (ncp->nc_vhash.le_prev != NULL) {
189 LIST_REMOVE(ncp, nc_vhash);
190 ncp->nc_vhash.le_prev = NULL;
191 }
192 if (ncp->nc_vlist.le_prev != NULL) {
193 LIST_REMOVE(ncp, nc_vlist);
194 ncp->nc_vlist.le_prev = NULL;
195 }
196 if (ncp->nc_dvlist.le_prev != NULL) {
197 LIST_REMOVE(ncp, nc_dvlist);
198 ncp->nc_dvlist.le_prev = NULL;
199 }
200 }
201
202 /*
203 * Lock all CPUs to prevent any cache lookup activity. Conceptually,
204 * this locks out all "readers".
205 */
206 static void
207 cache_lock_cpus(void)
208 {
209 CPU_INFO_ITERATOR cii;
210 struct cpu_info *ci;
211
212 for (CPU_INFO_FOREACH(cii, ci)) {
213 mutex_enter(ci->ci_data.cpu_cachelock);
214 }
215 }
216
217 /*
218 * Release all CPU locks.
219 */
220 static void
221 cache_unlock_cpus(void)
222 {
223 CPU_INFO_ITERATOR cii;
224 struct cpu_info *ci;
225
226 for (CPU_INFO_FOREACH(cii, ci)) {
227 mutex_exit(ci->ci_data.cpu_cachelock);
228 }
229 }
230
231 /*
232 * Find a single cache entry and return it locked. 'namecache_lock' or
233 * at least one of the per-CPU locks must be held.
234 */
235 static struct namecache *
236 cache_lookup_entry(const struct vnode *dvp, const struct componentname *cnp)
237 {
238 struct nchashhead *ncpp;
239 struct namecache *ncp;
240
241 ncpp = &nchashtbl[NCHASH(cnp, dvp)];
242
243 LIST_FOREACH(ncp, ncpp, nc_hash) {
244 if (ncp->nc_dvp != dvp ||
245 ncp->nc_nlen != cnp->cn_namelen ||
246 memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
247 continue;
248 mutex_enter(&ncp->nc_lock);
249 if (ncp->nc_dvp == dvp) {
250 ncp->nc_hittime = hardclock_ticks;
251 return ncp;
252 }
253 /* Raced: entry has been nullified. */
254 mutex_exit(&ncp->nc_lock);
255 }
256
257 return NULL;
258 }
259
260 /*
261 * Look for a the name in the cache. We don't do this
262 * if the segment name is long, simply so the cache can avoid
263 * holding long names (which would either waste space, or
264 * add greatly to the complexity).
265 *
266 * Lookup is called with ni_dvp pointing to the directory to search,
267 * ni_ptr pointing to the name of the entry being sought, ni_namelen
268 * tells the length of the name, and ni_hash contains a hash of
269 * the name. If the lookup succeeds, the vnode is locked, stored in ni_vp
270 * and a status of zero is returned. If the locking fails for whatever
271 * reason, the vnode is unlocked and the error is returned to caller.
272 * If the lookup determines that the name does not exist (negative cacheing),
273 * a status of ENOENT is returned. If the lookup fails, a status of -1
274 * is returned.
275 */
276 int
277 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
278 {
279 struct namecache *ncp;
280 struct vnode *vp;
281 kmutex_t *cpulock;
282 int error;
283
284 if (!doingcache) {
285 cnp->cn_flags &= ~MAKEENTRY;
286 *vpp = NULL;
287 return (-1);
288 }
289
290 if (cnp->cn_namelen > NCHNAMLEN) {
291 /* Unlocked, but only for stats. */
292 COUNT(ncs_long);
293 cnp->cn_flags &= ~MAKEENTRY;
294 goto fail;
295 }
296 cpulock = curcpu()->ci_data.cpu_cachelock;
297 mutex_enter(cpulock);
298 ncp = cache_lookup_entry(dvp, cnp);
299 if (ncp == NULL) {
300 COUNT(ncs_miss);
301 goto fail_wlock;
302 }
303 if ((cnp->cn_flags & MAKEENTRY) == 0) {
304 COUNT(ncs_badhits);
305 goto remove;
306 } else if (ncp->nc_vp == NULL) {
307 /*
308 * Restore the ISWHITEOUT flag saved earlier.
309 */
310 cnp->cn_flags |= ncp->nc_flags;
311 if (cnp->cn_nameiop != CREATE ||
312 (cnp->cn_flags & ISLASTCN) == 0) {
313 COUNT(ncs_neghits);
314 mutex_exit(&ncp->nc_lock);
315 mutex_exit(cpulock);
316 return (ENOENT);
317 } else {
318 COUNT(ncs_badhits);
319 goto remove;
320 }
321 }
322
323 vp = ncp->nc_vp;
324 mutex_enter(&vp->v_interlock);
325 mutex_exit(&ncp->nc_lock);
326 mutex_exit(cpulock);
327 error = vget(vp, LK_NOWAIT | LK_INTERLOCK);
328
329 #ifdef DEBUG
330 /*
331 * since we released nb->nb_lock,
332 * we can't use this pointer any more.
333 */
334 ncp = NULL;
335 #endif /* DEBUG */
336
337 if (error) {
338 KASSERT(error == EBUSY);
339 /*
340 * this vnode is being cleaned out.
341 */
342 COUNT(ncs_falsehits); /* XXX badhits? */
343 goto fail;
344 }
345
346 if (vp == dvp) { /* lookup on "." */
347 error = 0;
348 } else if (cnp->cn_flags & ISDOTDOT) {
349 VOP_UNLOCK(dvp, 0);
350 error = vn_lock(vp, LK_EXCLUSIVE);
351 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
352 } else {
353 error = vn_lock(vp, LK_EXCLUSIVE);
354 }
355
356 /*
357 * Check that the lock succeeded.
358 */
359 if (error) {
360 /* Unlocked, but only for stats. */
361 COUNT(ncs_badhits);
362 *vpp = NULL;
363 return (-1);
364 }
365
366 /* Unlocked, but only for stats. */
367 COUNT(ncs_goodhits);
368 *vpp = vp;
369 return (0);
370
371 remove:
372 /*
373 * Last component and we are renaming or deleting,
374 * the cache entry is invalid, or otherwise don't
375 * want cache entry to exist.
376 */
377 cache_invalidate(ncp);
378 mutex_exit(&ncp->nc_lock);
379 fail_wlock:
380 mutex_exit(cpulock);
381 fail:
382 *vpp = NULL;
383 return (-1);
384 }
385
386 int
387 cache_lookup_raw(struct vnode *dvp, struct vnode **vpp,
388 struct componentname *cnp)
389 {
390 struct namecache *ncp;
391 struct vnode *vp;
392 kmutex_t *cpulock;
393 int error;
394
395 if (!doingcache) {
396 cnp->cn_flags &= ~MAKEENTRY;
397 *vpp = NULL;
398 return (-1);
399 }
400
401 if (cnp->cn_namelen > NCHNAMLEN) {
402 /* Unlocked, but only for stats. */
403 COUNT(ncs_long);
404 cnp->cn_flags &= ~MAKEENTRY;
405 goto fail;
406 }
407 cpulock = curcpu()->ci_data.cpu_cachelock;
408 mutex_enter(cpulock);
409 ncp = cache_lookup_entry(dvp, cnp);
410 if (ncp == NULL) {
411 COUNT(ncs_miss);
412 goto fail_wlock;
413 }
414 vp = ncp->nc_vp;
415 if (vp == NULL) {
416 /*
417 * Restore the ISWHITEOUT flag saved earlier.
418 */
419 cnp->cn_flags |= ncp->nc_flags;
420 COUNT(ncs_neghits);
421 mutex_exit(&ncp->nc_lock);
422 mutex_exit(cpulock);
423 return (ENOENT);
424 }
425 mutex_enter(&vp->v_interlock);
426 mutex_exit(&ncp->nc_lock);
427 mutex_exit(cpulock);
428 error = vget(vp, LK_NOWAIT | LK_INTERLOCK);
429
430 if (error) {
431 KASSERT(error == EBUSY);
432 /*
433 * this vnode is being cleaned out.
434 */
435 COUNT(ncs_falsehits); /* XXX badhits? */
436 goto fail;
437 }
438
439 *vpp = vp;
440
441 return 0;
442
443 fail_wlock:
444 mutex_exit(cpulock);
445 fail:
446 *vpp = NULL;
447 return -1;
448 }
449
450 /*
451 * Scan cache looking for name of directory entry pointing at vp.
452 *
453 * Fill in dvpp.
454 *
455 * If bufp is non-NULL, also place the name in the buffer which starts
456 * at bufp, immediately before *bpp, and move bpp backwards to point
457 * at the start of it. (Yes, this is a little baroque, but it's done
458 * this way to cater to the whims of getcwd).
459 *
460 * Returns 0 on success, -1 on cache miss, positive errno on failure.
461 */
462 int
463 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
464 {
465 struct namecache *ncp;
466 struct vnode *dvp;
467 struct ncvhashhead *nvcpp;
468 char *bp;
469
470 if (!doingcache)
471 goto out;
472
473 nvcpp = &ncvhashtbl[NCVHASH(vp)];
474
475 mutex_enter(namecache_lock);
476 LIST_FOREACH(ncp, nvcpp, nc_vhash) {
477 mutex_enter(&ncp->nc_lock);
478 if (ncp->nc_vp == vp &&
479 (dvp = ncp->nc_dvp) != NULL &&
480 dvp != vp) { /* avoid pesky . entries.. */
481
482 #ifdef DIAGNOSTIC
483 if (ncp->nc_nlen == 1 &&
484 ncp->nc_name[0] == '.')
485 panic("cache_revlookup: found entry for .");
486
487 if (ncp->nc_nlen == 2 &&
488 ncp->nc_name[0] == '.' &&
489 ncp->nc_name[1] == '.')
490 panic("cache_revlookup: found entry for ..");
491 #endif
492 COUNT(ncs_revhits);
493
494 if (bufp) {
495 bp = *bpp;
496 bp -= ncp->nc_nlen;
497 if (bp <= bufp) {
498 *dvpp = NULL;
499 mutex_exit(&ncp->nc_lock);
500 mutex_exit(namecache_lock);
501 return (ERANGE);
502 }
503 memcpy(bp, ncp->nc_name, ncp->nc_nlen);
504 *bpp = bp;
505 }
506
507 /* XXX MP: how do we know dvp won't evaporate? */
508 *dvpp = dvp;
509 mutex_exit(&ncp->nc_lock);
510 mutex_exit(namecache_lock);
511 return (0);
512 }
513 mutex_exit(&ncp->nc_lock);
514 }
515 COUNT(ncs_revmiss);
516 mutex_exit(namecache_lock);
517 out:
518 *dvpp = NULL;
519 return (-1);
520 }
521
522 /*
523 * Add an entry to the cache
524 */
525 void
526 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
527 {
528 struct namecache *ncp;
529 struct namecache *oncp;
530 struct nchashhead *ncpp;
531 struct ncvhashhead *nvcpp;
532
533 #ifdef DIAGNOSTIC
534 if (cnp->cn_namelen > NCHNAMLEN)
535 panic("cache_enter: name too long");
536 #endif
537 if (!doingcache)
538 return;
539
540 if (numcache > desiredvnodes) {
541 mutex_enter(namecache_lock);
542 cache_ev_forced.ev_count++;
543 cache_reclaim();
544 mutex_exit(namecache_lock);
545 }
546
547 ncp = pool_cache_get(namecache_cache, PR_WAITOK);
548 mutex_enter(namecache_lock);
549 numcache++;
550
551 /*
552 * Concurrent lookups in the same directory may race for a
553 * cache entry. if there's a duplicated entry, free it.
554 */
555 oncp = cache_lookup_entry(dvp, cnp);
556 if (oncp) {
557 cache_invalidate(oncp);
558 mutex_exit(&oncp->nc_lock);
559 }
560
561 /* Grab the vnode we just found. */
562 mutex_enter(&ncp->nc_lock);
563 ncp->nc_vp = vp;
564 ncp->nc_flags = 0;
565 ncp->nc_hittime = 0;
566 ncp->nc_gcqueue = NULL;
567 if (vp == NULL) {
568 /*
569 * For negative hits, save the ISWHITEOUT flag so we can
570 * restore it later when the cache entry is used again.
571 */
572 ncp->nc_flags = cnp->cn_flags & ISWHITEOUT;
573 }
574 /* Fill in cache info. */
575 ncp->nc_dvp = dvp;
576 LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist);
577 if (vp)
578 LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist);
579 else {
580 ncp->nc_vlist.le_prev = NULL;
581 ncp->nc_vlist.le_next = NULL;
582 }
583 ncp->nc_nlen = cnp->cn_namelen;
584 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
585 memcpy(ncp->nc_name, cnp->cn_nameptr, (unsigned)ncp->nc_nlen);
586 ncpp = &nchashtbl[NCHASH(cnp, dvp)];
587
588 /*
589 * Flush updates before making visible in table. No need for a
590 * memory barrier on the other side: to see modifications the
591 * list must be followed, meaning a dependent pointer load.
592 * The below is LIST_INSERT_HEAD() inlined, with the memory
593 * barrier included in the correct place.
594 */
595 if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL)
596 ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next;
597 ncp->nc_hash.le_prev = &ncpp->lh_first;
598 membar_producer();
599 ncpp->lh_first = ncp;
600
601 ncp->nc_vhash.le_prev = NULL;
602 ncp->nc_vhash.le_next = NULL;
603
604 /*
605 * Create reverse-cache entries (used in getcwd) for directories.
606 * (and in linux procfs exe node)
607 */
608 if (vp != NULL &&
609 vp != dvp &&
610 #ifndef NAMECACHE_ENTER_REVERSE
611 vp->v_type == VDIR &&
612 #endif
613 (ncp->nc_nlen > 2 ||
614 (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
615 (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) {
616 nvcpp = &ncvhashtbl[NCVHASH(vp)];
617 LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
618 }
619 mutex_exit(&ncp->nc_lock);
620 mutex_exit(namecache_lock);
621 }
622
623 /*
624 * Name cache initialization, from vfs_init() when we are booting
625 */
626 void
627 nchinit(void)
628 {
629 int error;
630
631 namecache_cache = pool_cache_init(sizeof(struct namecache),
632 coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor,
633 cache_dtor, NULL);
634 KASSERT(namecache_cache != NULL);
635
636 namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
637
638 nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash);
639 ncvhashtbl =
640 #ifdef NAMECACHE_ENTER_REVERSE
641 hashinit(desiredvnodes, HASH_LIST, true, &ncvhash);
642 #else
643 hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash);
644 #endif
645
646 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread,
647 NULL, NULL, "cachegc");
648 if (error != 0)
649 panic("nchinit %d", error);
650
651 evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL,
652 "namecache", "entries scanned");
653 evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL,
654 "namecache", "entries collected");
655 evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL,
656 "namecache", "over scan target");
657 evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL,
658 "namecache", "under scan target");
659 evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL,
660 "namecache", "forced reclaims");
661 }
662
663 static int
664 cache_ctor(void *arg, void *obj, int flag)
665 {
666 struct namecache *ncp;
667
668 ncp = obj;
669 mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE);
670
671 return 0;
672 }
673
674 static void
675 cache_dtor(void *arg, void *obj)
676 {
677 struct namecache *ncp;
678
679 ncp = obj;
680 mutex_destroy(&ncp->nc_lock);
681 }
682
683 /*
684 * Called once for each CPU in the system as attached.
685 */
686 void
687 cache_cpu_init(struct cpu_info *ci)
688 {
689
690 ci->ci_data.cpu_cachelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
691 }
692
693 /*
694 * Name cache reinitialization, for when the maximum number of vnodes increases.
695 */
696 void
697 nchreinit(void)
698 {
699 struct namecache *ncp;
700 struct nchashhead *oldhash1, *hash1;
701 struct ncvhashhead *oldhash2, *hash2;
702 u_long i, oldmask1, oldmask2, mask1, mask2;
703
704 hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1);
705 hash2 =
706 #ifdef NAMECACHE_ENTER_REVERSE
707 hashinit(desiredvnodes, HASH_LIST, true, &mask2);
708 #else
709 hashinit(desiredvnodes/8, HASH_LIST, true, &mask2);
710 #endif
711 mutex_enter(namecache_lock);
712 cache_lock_cpus();
713 oldhash1 = nchashtbl;
714 oldmask1 = nchash;
715 nchashtbl = hash1;
716 nchash = mask1;
717 oldhash2 = ncvhashtbl;
718 oldmask2 = ncvhash;
719 ncvhashtbl = hash2;
720 ncvhash = mask2;
721 for (i = 0; i <= oldmask1; i++) {
722 while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) {
723 LIST_REMOVE(ncp, nc_hash);
724 ncp->nc_hash.le_prev = NULL;
725 }
726 }
727 for (i = 0; i <= oldmask2; i++) {
728 while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) {
729 LIST_REMOVE(ncp, nc_vhash);
730 ncp->nc_vhash.le_prev = NULL;
731 }
732 }
733 cache_unlock_cpus();
734 mutex_exit(namecache_lock);
735 hashdone(oldhash1, HASH_LIST, oldmask1);
736 hashdone(oldhash2, HASH_LIST, oldmask2);
737 }
738
739 /*
740 * Cache flush, a particular vnode; called when a vnode is renamed to
741 * hide entries that would now be invalid
742 */
743 void
744 cache_purge1(struct vnode *vp, const struct componentname *cnp, int flags)
745 {
746 struct namecache *ncp, *ncnext;
747
748 mutex_enter(namecache_lock);
749 if (flags & PURGE_PARENTS) {
750 for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL;
751 ncp = ncnext) {
752 ncnext = LIST_NEXT(ncp, nc_vlist);
753 mutex_enter(&ncp->nc_lock);
754 cache_invalidate(ncp);
755 mutex_exit(&ncp->nc_lock);
756 cache_disassociate(ncp);
757 }
758 }
759 if (flags & PURGE_CHILDREN) {
760 for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL;
761 ncp = ncnext) {
762 ncnext = LIST_NEXT(ncp, nc_dvlist);
763 mutex_enter(&ncp->nc_lock);
764 cache_invalidate(ncp);
765 mutex_exit(&ncp->nc_lock);
766 cache_disassociate(ncp);
767 }
768 }
769 if (cnp != NULL) {
770 ncp = cache_lookup_entry(vp, cnp);
771 if (ncp) {
772 cache_invalidate(ncp);
773 cache_disassociate(ncp);
774 mutex_exit(&ncp->nc_lock);
775 }
776 }
777 mutex_exit(namecache_lock);
778 }
779
780 /*
781 * Cache flush, a whole filesystem; called when filesys is umounted to
782 * remove entries that would now be invalid.
783 */
784 void
785 cache_purgevfs(struct mount *mp)
786 {
787 struct namecache *ncp, *nxtcp;
788
789 mutex_enter(namecache_lock);
790 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
791 nxtcp = TAILQ_NEXT(ncp, nc_lru);
792 mutex_enter(&ncp->nc_lock);
793 if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) {
794 /* Free the resources we had. */
795 cache_invalidate(ncp);
796 cache_disassociate(ncp);
797 }
798 mutex_exit(&ncp->nc_lock);
799 }
800 cache_reclaim();
801 mutex_exit(namecache_lock);
802 }
803
804 /*
805 * Scan global list invalidating entries until we meet a preset target.
806 * Prefer to invalidate entries that have not scored a hit within
807 * cache_hottime seconds. We sort the LRU list only for this routine's
808 * benefit.
809 */
810 static void
811 cache_prune(int incache, int target)
812 {
813 struct namecache *ncp, *nxtcp, *sentinel;
814 int items, recent, tryharder;
815
816 KASSERT(mutex_owned(namecache_lock));
817
818 items = 0;
819 tryharder = 0;
820 recent = hardclock_ticks - hz * cache_hottime;
821 sentinel = NULL;
822 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
823 if (incache <= target)
824 break;
825 items++;
826 nxtcp = TAILQ_NEXT(ncp, nc_lru);
827 if (ncp->nc_dvp == NULL)
828 continue;
829 if (ncp == sentinel) {
830 /*
831 * If we looped back on ourself, then ignore
832 * recent entries and purge whatever we find.
833 */
834 tryharder = 1;
835 }
836 if (!tryharder && ncp->nc_hittime > recent) {
837 if (sentinel == NULL)
838 sentinel = ncp;
839 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
840 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
841 continue;
842 }
843 mutex_enter(&ncp->nc_lock);
844 if (ncp->nc_dvp != NULL) {
845 cache_invalidate(ncp);
846 cache_disassociate(ncp);
847 incache--;
848 }
849 mutex_exit(&ncp->nc_lock);
850 }
851 cache_ev_scan.ev_count += items;
852 }
853
854 /*
855 * Collect dead cache entries from all CPUs and garbage collect.
856 */
857 static void
858 cache_reclaim(void)
859 {
860 struct namecache *ncp, *next;
861 int items;
862
863 KASSERT(mutex_owned(namecache_lock));
864
865 /*
866 * If the number of extant entries not awaiting garbage collection
867 * exceeds the high water mark, then reclaim stale entries until we
868 * reach our low water mark.
869 */
870 items = numcache - cache_gcpend;
871 if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) {
872 cache_prune(items, (int)((uint64_t)desiredvnodes *
873 cache_lowat / 100));
874 cache_ev_over.ev_count++;
875 } else
876 cache_ev_under.ev_count++;
877
878 /*
879 * Stop forward lookup activity on all CPUs and garbage collect dead
880 * entries.
881 */
882 cache_lock_cpus();
883 ncp = cache_gcqueue;
884 cache_gcqueue = NULL;
885 items = cache_gcpend;
886 cache_gcpend = 0;
887 while (ncp != NULL) {
888 next = ncp->nc_gcqueue;
889 cache_disassociate(ncp);
890 KASSERT(ncp->nc_dvp == NULL);
891 if (ncp->nc_hash.le_prev != NULL) {
892 LIST_REMOVE(ncp, nc_hash);
893 ncp->nc_hash.le_prev = NULL;
894 }
895 pool_cache_put(namecache_cache, ncp);
896 ncp = next;
897 }
898 cache_unlock_cpus();
899 numcache -= items;
900 cache_ev_gc.ev_count += items;
901 }
902
903 /*
904 * Cache maintainence thread, awakening once per second to:
905 *
906 * => keep number of entries below the high water mark
907 * => sort pseudo-LRU list
908 * => garbage collect dead entries
909 */
910 static void
911 cache_thread(void *arg)
912 {
913
914 mutex_enter(namecache_lock);
915 for (;;) {
916 cache_reclaim();
917 kpause("cachegc", false, hz, namecache_lock);
918 }
919 }
920
921 #ifdef DDB
922 void
923 namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
924 {
925 struct vnode *dvp = NULL;
926 struct namecache *ncp;
927
928 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
929 if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) {
930 (*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
931 dvp = ncp->nc_dvp;
932 }
933 }
934 if (dvp == NULL) {
935 (*pr)("name not found\n");
936 return;
937 }
938 vp = dvp;
939 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
940 if (ncp->nc_vp == vp) {
941 (*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name);
942 }
943 }
944 }
945 #endif
946