vfs_cache.c revision 1.91 1 /* $NetBSD: vfs_cache.c,v 1.91 2012/11/05 17:27:39 dholland Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the University nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94
58 */
59
60 #include <sys/cdefs.h>
61 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.91 2012/11/05 17:27:39 dholland Exp $");
62
63 #include "opt_ddb.h"
64 #include "opt_revcache.h"
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/time.h>
69 #include <sys/mount.h>
70 #include <sys/vnode.h>
71 #include <sys/namei.h>
72 #include <sys/errno.h>
73 #include <sys/pool.h>
74 #include <sys/mutex.h>
75 #include <sys/atomic.h>
76 #include <sys/kthread.h>
77 #include <sys/kernel.h>
78 #include <sys/cpu.h>
79 #include <sys/evcnt.h>
80
81 #define NAMECACHE_ENTER_REVERSE
82 /*
83 * Name caching works as follows:
84 *
85 * Names found by directory scans are retained in a cache
86 * for future reference. It is managed LRU, so frequently
87 * used names will hang around. Cache is indexed by hash value
88 * obtained from (dvp, name) where dvp refers to the directory
89 * containing name.
90 *
91 * For simplicity (and economy of storage), names longer than
92 * a maximum length of NCHNAMLEN are not cached; they occur
93 * infrequently in any case, and are almost never of interest.
94 *
95 * Upon reaching the last segment of a path, if the reference
96 * is for DELETE, or NOCACHE is set (rewrite), and the
97 * name is located in the cache, it will be dropped.
98 * The entry is dropped also when it was not possible to lock
99 * the cached vnode, either because vget() failed or the generation
100 * number has changed while waiting for the lock.
101 */
102
103 /*
104 * Per-cpu namecache data.
105 */
106 struct nchcpu {
107 kmutex_t cpu_lock;
108 struct nchstats cpu_stats;
109 };
110
111 /*
112 * The type for the hash code. While the hash function generates a
113 * u32, the hash code has historically been passed around as a u_long,
114 * and the value is modified by xor'ing a uintptr_t, so it's not
115 * entirely clear what the best type is. For now I'll leave it
116 * unchanged as u_long.
117 */
118
119 typedef u_long nchash_t;
120
121 /*
122 * Structures associated with name cacheing.
123 */
124
125 static kmutex_t *namecache_lock __read_mostly;
126 static pool_cache_t namecache_cache __read_mostly;
127 static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned;
128
129 static LIST_HEAD(nchashhead, namecache) *nchashtbl __read_mostly;
130 static u_long nchash __read_mostly;
131
132 #define NCHASH2(hash, dvp) \
133 (((hash) ^ ((uintptr_t)(dvp) >> 3)) & nchash)
134
135 static LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl __read_mostly;
136 static u_long ncvhash __read_mostly;
137
138 #define NCVHASH(vp) (((uintptr_t)(vp) >> 3) & ncvhash)
139
140 /* Number of cache entries allocated. */
141 static long numcache __cacheline_aligned;
142
143 /* Garbage collection queue and number of entries pending in it. */
144 static void *cache_gcqueue;
145 static u_int cache_gcpend;
146
147 /* Cache effectiveness statistics. */
148 struct nchstats nchstats __cacheline_aligned;
149 #define COUNT(c,x) (c.x++)
150
151 static const int cache_lowat = 95;
152 static const int cache_hiwat = 98;
153 static const int cache_hottime = 5; /* number of seconds */
154 static int doingcache = 1; /* 1 => enable the cache */
155
156 static struct evcnt cache_ev_scan;
157 static struct evcnt cache_ev_gc;
158 static struct evcnt cache_ev_over;
159 static struct evcnt cache_ev_under;
160 static struct evcnt cache_ev_forced;
161
162 static void cache_invalidate(struct namecache *);
163 static struct namecache *cache_lookup_entry(
164 const struct vnode *, const char *, size_t);
165 static void cache_thread(void *);
166 static void cache_invalidate(struct namecache *);
167 static void cache_disassociate(struct namecache *);
168 static void cache_reclaim(void);
169 static int cache_ctor(void *, void *, int);
170 static void cache_dtor(void *, void *);
171
172 /*
173 * Compute the hash for an entry.
174 *
175 * (This is for now a wrapper around namei_hash, whose interface is
176 * for the time being slightly inconvenient.)
177 */
178 static nchash_t
179 cache_hash(const char *name, size_t namelen)
180 {
181 const char *endptr;
182
183 endptr = name + namelen;
184 return namei_hash(name, &endptr);
185 }
186
187 /*
188 * Invalidate a cache entry and enqueue it for garbage collection.
189 */
190 static void
191 cache_invalidate(struct namecache *ncp)
192 {
193 void *head;
194
195 KASSERT(mutex_owned(&ncp->nc_lock));
196
197 if (ncp->nc_dvp != NULL) {
198 ncp->nc_vp = NULL;
199 ncp->nc_dvp = NULL;
200 do {
201 head = cache_gcqueue;
202 ncp->nc_gcqueue = head;
203 } while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
204 atomic_inc_uint(&cache_gcpend);
205 }
206 }
207
208 /*
209 * Disassociate a namecache entry from any vnodes it is attached to,
210 * and remove from the global LRU list.
211 */
212 static void
213 cache_disassociate(struct namecache *ncp)
214 {
215
216 KASSERT(mutex_owned(namecache_lock));
217 KASSERT(ncp->nc_dvp == NULL);
218
219 if (ncp->nc_lru.tqe_prev != NULL) {
220 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
221 ncp->nc_lru.tqe_prev = NULL;
222 }
223 if (ncp->nc_vhash.le_prev != NULL) {
224 LIST_REMOVE(ncp, nc_vhash);
225 ncp->nc_vhash.le_prev = NULL;
226 }
227 if (ncp->nc_vlist.le_prev != NULL) {
228 LIST_REMOVE(ncp, nc_vlist);
229 ncp->nc_vlist.le_prev = NULL;
230 }
231 if (ncp->nc_dvlist.le_prev != NULL) {
232 LIST_REMOVE(ncp, nc_dvlist);
233 ncp->nc_dvlist.le_prev = NULL;
234 }
235 }
236
237 /*
238 * Lock all CPUs to prevent any cache lookup activity. Conceptually,
239 * this locks out all "readers".
240 */
241 static void
242 cache_lock_cpus(void)
243 {
244 CPU_INFO_ITERATOR cii;
245 struct cpu_info *ci;
246 struct nchcpu *cpup;
247 long *s, *d, *m;
248
249 for (CPU_INFO_FOREACH(cii, ci)) {
250 cpup = ci->ci_data.cpu_nch;
251 mutex_enter(&cpup->cpu_lock);
252
253 /* Collate statistics. */
254 d = (long *)&nchstats;
255 s = (long *)&cpup->cpu_stats;
256 m = s + sizeof(nchstats) / sizeof(long);
257 for (; s < m; s++, d++) {
258 *d += *s;
259 *s = 0;
260 }
261 }
262 }
263
264 /*
265 * Release all CPU locks.
266 */
267 static void
268 cache_unlock_cpus(void)
269 {
270 CPU_INFO_ITERATOR cii;
271 struct cpu_info *ci;
272 struct nchcpu *cpup;
273
274 for (CPU_INFO_FOREACH(cii, ci)) {
275 cpup = ci->ci_data.cpu_nch;
276 mutex_exit(&cpup->cpu_lock);
277 }
278 }
279
280 /*
281 * Find a single cache entry and return it locked. 'namecache_lock' or
282 * at least one of the per-CPU locks must be held.
283 */
284 static struct namecache *
285 cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen)
286 {
287 struct nchashhead *ncpp;
288 struct namecache *ncp;
289 nchash_t hash;
290
291 KASSERT(dvp != NULL);
292 hash = cache_hash(name, namelen);
293 ncpp = &nchashtbl[NCHASH2(hash, dvp)];
294
295 LIST_FOREACH(ncp, ncpp, nc_hash) {
296 if (ncp->nc_dvp != dvp ||
297 ncp->nc_nlen != namelen ||
298 memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen))
299 continue;
300 mutex_enter(&ncp->nc_lock);
301 if (__predict_true(ncp->nc_dvp == dvp)) {
302 ncp->nc_hittime = hardclock_ticks;
303 return ncp;
304 }
305 /* Raced: entry has been nullified. */
306 mutex_exit(&ncp->nc_lock);
307 }
308
309 return NULL;
310 }
311
312 /*
313 * Look for a the name in the cache. We don't do this
314 * if the segment name is long, simply so the cache can avoid
315 * holding long names (which would either waste space, or
316 * add greatly to the complexity).
317 *
318 * Lookup is called with DVP pointing to the directory to search,
319 * and CNP providing the name of the entry being sought: cn_nameptr
320 * is the name, cn_namelen is its length, and cn_flags is the flags
321 * word from the namei operation.
322 *
323 * DVP must be locked.
324 *
325 * There are three possible non-error return states:
326 * 1. Nothing was found in the cache. Nothing is known about
327 * the requested name.
328 * 2. A negative entry was found in the cache, meaning that the
329 * requested name definitely does not exist.
330 * 3. A positive entry was found in the cache, meaning that the
331 * requested name does exist and that we are providing the
332 * vnode.
333 * In these cases the results are:
334 * 1. 0 returned; VN is set to NULL.
335 * 2. 1 returned; VN is set to NULL.
336 * 3. 1 returned; VN is set to the vnode found.
337 *
338 * The additional result argument ISWHT is set to zero, unless a
339 * negative entry is found that was entered as a whiteout, in which
340 * case ISWHT is set to one.
341 *
342 * The ISWHT_RET argument pointer may be null. In this case an
343 * assertion is made that the whiteout flag is not set. File systems
344 * that do not support whiteouts can/should do this.
345 *
346 * Filesystems that do support whiteouts should add ISWHITEOUT to
347 * cnp->cn_flags if ISWHT comes back nonzero.
348 *
349 * When a vnode is returned, it is locked, as per the vnode lookup
350 * locking protocol.
351 *
352 * There is no way for this function to fail, in the sense of
353 * generating an error that requires aborting the namei operation.
354 *
355 * (Prior to October 2012, this function returned an integer status,
356 * and a vnode, and mucked with the flags word in CNP for whiteouts.
357 * The integer status was -1 for "nothing found", ENOENT for "a
358 * negative entry found", 0 for "a positive entry found", and possibly
359 * other errors, and the value of VN might or might not have been set
360 * depending on what error occurred.)
361 */
362 int
363 cache_lookup(struct vnode *dvp, const char *name, size_t namelen,
364 uint32_t nameiop, uint32_t cnflags,
365 int *iswht_ret, struct vnode **vn_ret)
366 {
367 struct namecache *ncp;
368 struct vnode *vp;
369 struct nchcpu *cpup;
370 int error;
371
372 /* Establish default result values */
373 if (iswht_ret != NULL) {
374 *iswht_ret = 0;
375 }
376 *vn_ret = NULL;
377
378 if (__predict_false(!doingcache)) {
379 return 0;
380 }
381
382 cpup = curcpu()->ci_data.cpu_nch;
383 mutex_enter(&cpup->cpu_lock);
384 if (__predict_false(namelen > NCHNAMLEN)) {
385 COUNT(cpup->cpu_stats, ncs_long);
386 mutex_exit(&cpup->cpu_lock);
387 /* found nothing */
388 return 0;
389 }
390 ncp = cache_lookup_entry(dvp, name, namelen);
391 if (__predict_false(ncp == NULL)) {
392 COUNT(cpup->cpu_stats, ncs_miss);
393 mutex_exit(&cpup->cpu_lock);
394 /* found nothing */
395 return 0;
396 }
397 if ((cnflags & MAKEENTRY) == 0) {
398 COUNT(cpup->cpu_stats, ncs_badhits);
399 /*
400 * Last component and we are renaming or deleting,
401 * the cache entry is invalid, or otherwise don't
402 * want cache entry to exist.
403 */
404 cache_invalidate(ncp);
405 mutex_exit(&ncp->nc_lock);
406 mutex_exit(&cpup->cpu_lock);
407 /* found nothing */
408 return 0;
409 }
410 if (ncp->nc_vp == NULL) {
411 if (iswht_ret != NULL) {
412 /*
413 * Restore the ISWHITEOUT flag saved earlier.
414 */
415 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
416 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
417 } else {
418 KASSERT(ncp->nc_flags == 0);
419 }
420
421 if (__predict_true(nameiop != CREATE ||
422 (cnflags & ISLASTCN) == 0)) {
423 COUNT(cpup->cpu_stats, ncs_neghits);
424 mutex_exit(&ncp->nc_lock);
425 mutex_exit(&cpup->cpu_lock);
426 /* found neg entry; vn is already null from above */
427 return 1;
428 } else {
429 COUNT(cpup->cpu_stats, ncs_badhits);
430 /*
431 * Last component and we are renaming or
432 * deleting, the cache entry is invalid,
433 * or otherwise don't want cache entry to
434 * exist.
435 */
436 cache_invalidate(ncp);
437 mutex_exit(&ncp->nc_lock);
438 mutex_exit(&cpup->cpu_lock);
439 /* found nothing */
440 return 0;
441 }
442 }
443
444 vp = ncp->nc_vp;
445 if (vtryget(vp)) {
446 mutex_exit(&ncp->nc_lock);
447 mutex_exit(&cpup->cpu_lock);
448 } else {
449 mutex_enter(vp->v_interlock);
450 mutex_exit(&ncp->nc_lock);
451 mutex_exit(&cpup->cpu_lock);
452 error = vget(vp, LK_NOWAIT);
453 if (error) {
454 KASSERT(error == EBUSY);
455 /*
456 * This vnode is being cleaned out.
457 * XXX badhits?
458 */
459 COUNT(cpup->cpu_stats, ncs_falsehits);
460 /* found nothing */
461 return 0;
462 }
463 }
464
465 #ifdef DEBUG
466 /*
467 * since we released nb->nb_lock,
468 * we can't use this pointer any more.
469 */
470 ncp = NULL;
471 #endif /* DEBUG */
472
473 if (vp == dvp) { /* lookup on "." */
474 error = 0;
475 } else if (cnflags & ISDOTDOT) {
476 VOP_UNLOCK(dvp);
477 error = vn_lock(vp, LK_EXCLUSIVE);
478 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
479 } else {
480 error = vn_lock(vp, LK_EXCLUSIVE);
481 }
482
483 /*
484 * Check that the lock succeeded.
485 */
486 if (error) {
487 /* We don't have the right lock, but this is only for stats. */
488 COUNT(cpup->cpu_stats, ncs_badhits);
489
490 vrele(vp);
491 /* found nothing */
492 return 0;
493 }
494
495 /* We don't have the right lock, but this is only for stats. */
496 COUNT(cpup->cpu_stats, ncs_goodhits);
497
498 /* found it */
499 *vn_ret = vp;
500 return 1;
501 }
502
503 int
504 cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen,
505 uint32_t cnflags,
506 int *iswht_ret, struct vnode **vn_ret)
507 {
508 struct namecache *ncp;
509 struct vnode *vp;
510 struct nchcpu *cpup;
511 int error;
512
513 /* Establish default results. */
514 if (iswht_ret != NULL) {
515 *iswht_ret = 0;
516 }
517 *vn_ret = NULL;
518
519 if (__predict_false(!doingcache)) {
520 /* found nothing */
521 return 0;
522 }
523
524 cpup = curcpu()->ci_data.cpu_nch;
525 mutex_enter(&cpup->cpu_lock);
526 if (__predict_false(namelen > NCHNAMLEN)) {
527 COUNT(cpup->cpu_stats, ncs_long);
528 mutex_exit(&cpup->cpu_lock);
529 /* found nothing */
530 return 0;
531 }
532 ncp = cache_lookup_entry(dvp, name, namelen);
533 if (__predict_false(ncp == NULL)) {
534 COUNT(cpup->cpu_stats, ncs_miss);
535 mutex_exit(&cpup->cpu_lock);
536 /* found nothing */
537 return 0;
538 }
539 vp = ncp->nc_vp;
540 if (vp == NULL) {
541 /*
542 * Restore the ISWHITEOUT flag saved earlier.
543 */
544 if (iswht_ret != NULL) {
545 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
546 /*cnp->cn_flags |= ncp->nc_flags;*/
547 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
548 }
549 COUNT(cpup->cpu_stats, ncs_neghits);
550 mutex_exit(&ncp->nc_lock);
551 mutex_exit(&cpup->cpu_lock);
552 /* found negative entry; vn is already null from above */
553 return 1;
554 }
555 if (vtryget(vp)) {
556 mutex_exit(&ncp->nc_lock);
557 mutex_exit(&cpup->cpu_lock);
558 } else {
559 mutex_enter(vp->v_interlock);
560 mutex_exit(&ncp->nc_lock);
561 mutex_exit(&cpup->cpu_lock);
562 error = vget(vp, LK_NOWAIT);
563 if (error) {
564 KASSERT(error == EBUSY);
565 /*
566 * This vnode is being cleaned out.
567 * XXX badhits?
568 */
569 COUNT(cpup->cpu_stats, ncs_falsehits);
570 /* found nothing */
571 return 0;
572 }
573 }
574
575 /* Unlocked, but only for stats. */
576 COUNT(cpup->cpu_stats, ncs_goodhits); /* XXX can be "badhits" */
577
578 /* found it */
579 *vn_ret = vp;
580 return 1;
581 }
582
583 /*
584 * Scan cache looking for name of directory entry pointing at vp.
585 *
586 * If the lookup succeeds the vnode is referenced and stored in dvpp.
587 *
588 * If bufp is non-NULL, also place the name in the buffer which starts
589 * at bufp, immediately before *bpp, and move bpp backwards to point
590 * at the start of it. (Yes, this is a little baroque, but it's done
591 * this way to cater to the whims of getcwd).
592 *
593 * Returns 0 on success, -1 on cache miss, positive errno on failure.
594 */
595 int
596 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
597 {
598 struct namecache *ncp;
599 struct vnode *dvp;
600 struct ncvhashhead *nvcpp;
601 char *bp;
602 int error, nlen;
603
604 if (!doingcache)
605 goto out;
606
607 nvcpp = &ncvhashtbl[NCVHASH(vp)];
608
609 mutex_enter(namecache_lock);
610 LIST_FOREACH(ncp, nvcpp, nc_vhash) {
611 mutex_enter(&ncp->nc_lock);
612 if (ncp->nc_vp == vp &&
613 (dvp = ncp->nc_dvp) != NULL &&
614 dvp != vp) { /* avoid pesky . entries.. */
615
616 #ifdef DIAGNOSTIC
617 if (ncp->nc_nlen == 1 &&
618 ncp->nc_name[0] == '.')
619 panic("cache_revlookup: found entry for .");
620
621 if (ncp->nc_nlen == 2 &&
622 ncp->nc_name[0] == '.' &&
623 ncp->nc_name[1] == '.')
624 panic("cache_revlookup: found entry for ..");
625 #endif
626 COUNT(nchstats, ncs_revhits);
627 nlen = ncp->nc_nlen;
628
629 if (bufp) {
630 bp = *bpp;
631 bp -= nlen;
632 if (bp <= bufp) {
633 *dvpp = NULL;
634 mutex_exit(&ncp->nc_lock);
635 mutex_exit(namecache_lock);
636 return (ERANGE);
637 }
638 memcpy(bp, ncp->nc_name, nlen);
639 *bpp = bp;
640 }
641
642 if (vtryget(dvp)) {
643 mutex_exit(&ncp->nc_lock);
644 mutex_exit(namecache_lock);
645 } else {
646 mutex_enter(dvp->v_interlock);
647 mutex_exit(&ncp->nc_lock);
648 mutex_exit(namecache_lock);
649 error = vget(dvp, LK_NOWAIT);
650 if (error) {
651 KASSERT(error == EBUSY);
652 if (bufp)
653 (*bpp) += nlen;
654 *dvpp = NULL;
655 return -1;
656 }
657 }
658 *dvpp = dvp;
659 return (0);
660 }
661 mutex_exit(&ncp->nc_lock);
662 }
663 COUNT(nchstats, ncs_revmiss);
664 mutex_exit(namecache_lock);
665 out:
666 *dvpp = NULL;
667 return (-1);
668 }
669
670 /*
671 * Add an entry to the cache
672 */
673 void
674 cache_enter(struct vnode *dvp, struct vnode *vp,
675 const char *name, size_t namelen, uint32_t cnflags)
676 {
677 struct namecache *ncp;
678 struct namecache *oncp;
679 struct nchashhead *ncpp;
680 struct ncvhashhead *nvcpp;
681 nchash_t hash;
682
683 /* First, check whether we can/should add a cache entry. */
684 if ((cnflags & MAKEENTRY) == 0 ||
685 __predict_false(namelen > NCHNAMLEN || !doingcache)) {
686 return;
687 }
688
689 if (numcache > desiredvnodes) {
690 mutex_enter(namecache_lock);
691 cache_ev_forced.ev_count++;
692 cache_reclaim();
693 mutex_exit(namecache_lock);
694 }
695
696 ncp = pool_cache_get(namecache_cache, PR_WAITOK);
697 mutex_enter(namecache_lock);
698 numcache++;
699
700 /*
701 * Concurrent lookups in the same directory may race for a
702 * cache entry. if there's a duplicated entry, free it.
703 */
704 oncp = cache_lookup_entry(dvp, name, namelen);
705 if (oncp) {
706 cache_invalidate(oncp);
707 mutex_exit(&oncp->nc_lock);
708 }
709
710 /* Grab the vnode we just found. */
711 mutex_enter(&ncp->nc_lock);
712 ncp->nc_vp = vp;
713 ncp->nc_flags = 0;
714 ncp->nc_hittime = 0;
715 ncp->nc_gcqueue = NULL;
716 if (vp == NULL) {
717 /*
718 * For negative hits, save the ISWHITEOUT flag so we can
719 * restore it later when the cache entry is used again.
720 */
721 ncp->nc_flags = cnflags & ISWHITEOUT;
722 }
723
724 /* Fill in cache info. */
725 ncp->nc_dvp = dvp;
726 LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist);
727 if (vp)
728 LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist);
729 else {
730 ncp->nc_vlist.le_prev = NULL;
731 ncp->nc_vlist.le_next = NULL;
732 }
733 KASSERT(namelen <= NCHNAMLEN);
734 ncp->nc_nlen = namelen;
735 memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen);
736 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
737 hash = cache_hash(name, namelen);
738 ncpp = &nchashtbl[NCHASH2(hash, dvp)];
739
740 /*
741 * Flush updates before making visible in table. No need for a
742 * memory barrier on the other side: to see modifications the
743 * list must be followed, meaning a dependent pointer load.
744 * The below is LIST_INSERT_HEAD() inlined, with the memory
745 * barrier included in the correct place.
746 */
747 if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL)
748 ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next;
749 ncp->nc_hash.le_prev = &ncpp->lh_first;
750 membar_producer();
751 ncpp->lh_first = ncp;
752
753 ncp->nc_vhash.le_prev = NULL;
754 ncp->nc_vhash.le_next = NULL;
755
756 /*
757 * Create reverse-cache entries (used in getcwd) for directories.
758 * (and in linux procfs exe node)
759 */
760 if (vp != NULL &&
761 vp != dvp &&
762 #ifndef NAMECACHE_ENTER_REVERSE
763 vp->v_type == VDIR &&
764 #endif
765 (ncp->nc_nlen > 2 ||
766 (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
767 (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) {
768 nvcpp = &ncvhashtbl[NCVHASH(vp)];
769 LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
770 }
771 mutex_exit(&ncp->nc_lock);
772 mutex_exit(namecache_lock);
773 }
774
775 /*
776 * Name cache initialization, from vfs_init() when we are booting
777 */
778 void
779 nchinit(void)
780 {
781 int error;
782
783 TAILQ_INIT(&nclruhead);
784 namecache_cache = pool_cache_init(sizeof(struct namecache),
785 coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor,
786 cache_dtor, NULL);
787 KASSERT(namecache_cache != NULL);
788
789 namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
790
791 nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash);
792 ncvhashtbl =
793 #ifdef NAMECACHE_ENTER_REVERSE
794 hashinit(desiredvnodes, HASH_LIST, true, &ncvhash);
795 #else
796 hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash);
797 #endif
798
799 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread,
800 NULL, NULL, "cachegc");
801 if (error != 0)
802 panic("nchinit %d", error);
803
804 evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL,
805 "namecache", "entries scanned");
806 evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL,
807 "namecache", "entries collected");
808 evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL,
809 "namecache", "over scan target");
810 evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL,
811 "namecache", "under scan target");
812 evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL,
813 "namecache", "forced reclaims");
814 }
815
816 static int
817 cache_ctor(void *arg, void *obj, int flag)
818 {
819 struct namecache *ncp;
820
821 ncp = obj;
822 mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE);
823
824 return 0;
825 }
826
827 static void
828 cache_dtor(void *arg, void *obj)
829 {
830 struct namecache *ncp;
831
832 ncp = obj;
833 mutex_destroy(&ncp->nc_lock);
834 }
835
836 /*
837 * Called once for each CPU in the system as attached.
838 */
839 void
840 cache_cpu_init(struct cpu_info *ci)
841 {
842 struct nchcpu *cpup;
843 size_t sz;
844
845 sz = roundup2(sizeof(*cpup), coherency_unit) + coherency_unit;
846 cpup = kmem_zalloc(sz, KM_SLEEP);
847 cpup = (void *)roundup2((uintptr_t)cpup, coherency_unit);
848 mutex_init(&cpup->cpu_lock, MUTEX_DEFAULT, IPL_NONE);
849 ci->ci_data.cpu_nch = cpup;
850 }
851
852 /*
853 * Name cache reinitialization, for when the maximum number of vnodes increases.
854 */
855 void
856 nchreinit(void)
857 {
858 struct namecache *ncp;
859 struct nchashhead *oldhash1, *hash1;
860 struct ncvhashhead *oldhash2, *hash2;
861 u_long i, oldmask1, oldmask2, mask1, mask2;
862
863 hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1);
864 hash2 =
865 #ifdef NAMECACHE_ENTER_REVERSE
866 hashinit(desiredvnodes, HASH_LIST, true, &mask2);
867 #else
868 hashinit(desiredvnodes/8, HASH_LIST, true, &mask2);
869 #endif
870 mutex_enter(namecache_lock);
871 cache_lock_cpus();
872 oldhash1 = nchashtbl;
873 oldmask1 = nchash;
874 nchashtbl = hash1;
875 nchash = mask1;
876 oldhash2 = ncvhashtbl;
877 oldmask2 = ncvhash;
878 ncvhashtbl = hash2;
879 ncvhash = mask2;
880 for (i = 0; i <= oldmask1; i++) {
881 while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) {
882 LIST_REMOVE(ncp, nc_hash);
883 ncp->nc_hash.le_prev = NULL;
884 }
885 }
886 for (i = 0; i <= oldmask2; i++) {
887 while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) {
888 LIST_REMOVE(ncp, nc_vhash);
889 ncp->nc_vhash.le_prev = NULL;
890 }
891 }
892 cache_unlock_cpus();
893 mutex_exit(namecache_lock);
894 hashdone(oldhash1, HASH_LIST, oldmask1);
895 hashdone(oldhash2, HASH_LIST, oldmask2);
896 }
897
898 /*
899 * Cache flush, a particular vnode; called when a vnode is renamed to
900 * hide entries that would now be invalid
901 */
902 void
903 cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags)
904 {
905 struct namecache *ncp, *ncnext;
906
907 mutex_enter(namecache_lock);
908 if (flags & PURGE_PARENTS) {
909 for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL;
910 ncp = ncnext) {
911 ncnext = LIST_NEXT(ncp, nc_vlist);
912 mutex_enter(&ncp->nc_lock);
913 cache_invalidate(ncp);
914 mutex_exit(&ncp->nc_lock);
915 cache_disassociate(ncp);
916 }
917 }
918 if (flags & PURGE_CHILDREN) {
919 for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL;
920 ncp = ncnext) {
921 ncnext = LIST_NEXT(ncp, nc_dvlist);
922 mutex_enter(&ncp->nc_lock);
923 cache_invalidate(ncp);
924 mutex_exit(&ncp->nc_lock);
925 cache_disassociate(ncp);
926 }
927 }
928 if (name != NULL) {
929 ncp = cache_lookup_entry(vp, name, namelen);
930 if (ncp) {
931 cache_invalidate(ncp);
932 mutex_exit(&ncp->nc_lock);
933 cache_disassociate(ncp);
934 }
935 }
936 mutex_exit(namecache_lock);
937 }
938
939 /*
940 * Cache flush, a whole filesystem; called when filesys is umounted to
941 * remove entries that would now be invalid.
942 */
943 void
944 cache_purgevfs(struct mount *mp)
945 {
946 struct namecache *ncp, *nxtcp;
947
948 mutex_enter(namecache_lock);
949 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
950 nxtcp = TAILQ_NEXT(ncp, nc_lru);
951 mutex_enter(&ncp->nc_lock);
952 if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) {
953 /* Free the resources we had. */
954 cache_invalidate(ncp);
955 cache_disassociate(ncp);
956 }
957 mutex_exit(&ncp->nc_lock);
958 }
959 cache_reclaim();
960 mutex_exit(namecache_lock);
961 }
962
963 /*
964 * Scan global list invalidating entries until we meet a preset target.
965 * Prefer to invalidate entries that have not scored a hit within
966 * cache_hottime seconds. We sort the LRU list only for this routine's
967 * benefit.
968 */
969 static void
970 cache_prune(int incache, int target)
971 {
972 struct namecache *ncp, *nxtcp, *sentinel;
973 int items, recent, tryharder;
974
975 KASSERT(mutex_owned(namecache_lock));
976
977 items = 0;
978 tryharder = 0;
979 recent = hardclock_ticks - hz * cache_hottime;
980 sentinel = NULL;
981 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
982 if (incache <= target)
983 break;
984 items++;
985 nxtcp = TAILQ_NEXT(ncp, nc_lru);
986 if (ncp->nc_dvp == NULL)
987 continue;
988 if (ncp == sentinel) {
989 /*
990 * If we looped back on ourself, then ignore
991 * recent entries and purge whatever we find.
992 */
993 tryharder = 1;
994 }
995 if (!tryharder && (ncp->nc_hittime - recent) > 0) {
996 if (sentinel == NULL)
997 sentinel = ncp;
998 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
999 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
1000 continue;
1001 }
1002 mutex_enter(&ncp->nc_lock);
1003 if (ncp->nc_dvp != NULL) {
1004 cache_invalidate(ncp);
1005 cache_disassociate(ncp);
1006 incache--;
1007 }
1008 mutex_exit(&ncp->nc_lock);
1009 }
1010 cache_ev_scan.ev_count += items;
1011 }
1012
1013 /*
1014 * Collect dead cache entries from all CPUs and garbage collect.
1015 */
1016 static void
1017 cache_reclaim(void)
1018 {
1019 struct namecache *ncp, *next;
1020 int items;
1021
1022 KASSERT(mutex_owned(namecache_lock));
1023
1024 /*
1025 * If the number of extant entries not awaiting garbage collection
1026 * exceeds the high water mark, then reclaim stale entries until we
1027 * reach our low water mark.
1028 */
1029 items = numcache - cache_gcpend;
1030 if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) {
1031 cache_prune(items, (int)((uint64_t)desiredvnodes *
1032 cache_lowat / 100));
1033 cache_ev_over.ev_count++;
1034 } else
1035 cache_ev_under.ev_count++;
1036
1037 /*
1038 * Stop forward lookup activity on all CPUs and garbage collect dead
1039 * entries.
1040 */
1041 cache_lock_cpus();
1042 ncp = cache_gcqueue;
1043 cache_gcqueue = NULL;
1044 items = cache_gcpend;
1045 cache_gcpend = 0;
1046 while (ncp != NULL) {
1047 next = ncp->nc_gcqueue;
1048 cache_disassociate(ncp);
1049 KASSERT(ncp->nc_dvp == NULL);
1050 if (ncp->nc_hash.le_prev != NULL) {
1051 LIST_REMOVE(ncp, nc_hash);
1052 ncp->nc_hash.le_prev = NULL;
1053 }
1054 pool_cache_put(namecache_cache, ncp);
1055 ncp = next;
1056 }
1057 cache_unlock_cpus();
1058 numcache -= items;
1059 cache_ev_gc.ev_count += items;
1060 }
1061
1062 /*
1063 * Cache maintainence thread, awakening once per second to:
1064 *
1065 * => keep number of entries below the high water mark
1066 * => sort pseudo-LRU list
1067 * => garbage collect dead entries
1068 */
1069 static void
1070 cache_thread(void *arg)
1071 {
1072
1073 mutex_enter(namecache_lock);
1074 for (;;) {
1075 cache_reclaim();
1076 kpause("cachegc", false, hz, namecache_lock);
1077 }
1078 }
1079
1080 #ifdef DDB
1081 void
1082 namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
1083 {
1084 struct vnode *dvp = NULL;
1085 struct namecache *ncp;
1086
1087 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
1088 if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) {
1089 (*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
1090 dvp = ncp->nc_dvp;
1091 }
1092 }
1093 if (dvp == NULL) {
1094 (*pr)("name not found\n");
1095 return;
1096 }
1097 vp = dvp;
1098 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
1099 if (ncp->nc_vp == vp) {
1100 (*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name);
1101 }
1102 }
1103 }
1104 #endif
1105