Home | History | Annotate | Line # | Download | only in kern
vfs_cache.c revision 1.102
      1  1.102    dennis /*	$NetBSD: vfs_cache.c,v 1.102 2014/12/07 22:23:38 dennis Exp $	*/
      2   1.73        ad 
      3   1.73        ad /*-
      4   1.73        ad  * Copyright (c) 2008 The NetBSD Foundation, Inc.
      5   1.73        ad  * All rights reserved.
      6   1.73        ad  *
      7   1.73        ad  * Redistribution and use in source and binary forms, with or without
      8   1.73        ad  * modification, are permitted provided that the following conditions
      9   1.73        ad  * are met:
     10   1.73        ad  * 1. Redistributions of source code must retain the above copyright
     11   1.73        ad  *    notice, this list of conditions and the following disclaimer.
     12   1.73        ad  * 2. Redistributions in binary form must reproduce the above copyright
     13   1.73        ad  *    notice, this list of conditions and the following disclaimer in the
     14   1.73        ad  *    documentation and/or other materials provided with the distribution.
     15   1.73        ad  *
     16   1.73        ad  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17   1.73        ad  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18   1.73        ad  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19   1.73        ad  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20   1.73        ad  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21   1.73        ad  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22   1.73        ad  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23   1.73        ad  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24   1.73        ad  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25   1.73        ad  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26   1.73        ad  * POSSIBILITY OF SUCH DAMAGE.
     27   1.73        ad  */
     28    1.6       cgd 
     29    1.1       cgd /*
     30    1.5   mycroft  * Copyright (c) 1989, 1993
     31    1.5   mycroft  *	The Regents of the University of California.  All rights reserved.
     32    1.1       cgd  *
     33    1.1       cgd  * Redistribution and use in source and binary forms, with or without
     34    1.1       cgd  * modification, are permitted provided that the following conditions
     35    1.1       cgd  * are met:
     36    1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     37    1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     38    1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     39    1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     40    1.1       cgd  *    documentation and/or other materials provided with the distribution.
     41   1.51       agc  * 3. Neither the name of the University nor the names of its contributors
     42    1.1       cgd  *    may be used to endorse or promote products derived from this software
     43    1.1       cgd  *    without specific prior written permission.
     44    1.1       cgd  *
     45    1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     46    1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     47    1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     48    1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     49    1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     50    1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     51    1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     52    1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     53    1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     54    1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     55    1.1       cgd  * SUCH DAMAGE.
     56    1.1       cgd  *
     57   1.10   mycroft  *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
     58    1.1       cgd  */
     59   1.32     lukem 
     60   1.32     lukem #include <sys/cdefs.h>
     61  1.102    dennis __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.102 2014/12/07 22:23:38 dennis Exp $");
     62    1.1       cgd 
     63   1.28       chs #include "opt_ddb.h"
     64   1.29      fvdl #include "opt_revcache.h"
     65   1.28       chs 
     66    1.4   mycroft #include <sys/param.h>
     67    1.4   mycroft #include <sys/systm.h>
     68   1.97     joerg #include <sys/sysctl.h>
     69    1.4   mycroft #include <sys/time.h>
     70    1.4   mycroft #include <sys/mount.h>
     71    1.4   mycroft #include <sys/vnode.h>
     72    1.4   mycroft #include <sys/namei.h>
     73    1.4   mycroft #include <sys/errno.h>
     74   1.18   thorpej #include <sys/pool.h>
     75   1.68        ad #include <sys/mutex.h>
     76   1.73        ad #include <sys/atomic.h>
     77   1.73        ad #include <sys/kthread.h>
     78   1.73        ad #include <sys/kernel.h>
     79   1.73        ad #include <sys/cpu.h>
     80   1.73        ad #include <sys/evcnt.h>
     81    1.1       cgd 
     82   1.66  christos #define NAMECACHE_ENTER_REVERSE
     83    1.1       cgd /*
     84    1.1       cgd  * Name caching works as follows:
     85    1.1       cgd  *
     86    1.1       cgd  * Names found by directory scans are retained in a cache
     87    1.1       cgd  * for future reference.  It is managed LRU, so frequently
     88    1.1       cgd  * used names will hang around.  Cache is indexed by hash value
     89   1.20  jdolecek  * obtained from (dvp, name) where dvp refers to the directory
     90    1.1       cgd  * containing name.
     91    1.1       cgd  *
     92    1.1       cgd  * For simplicity (and economy of storage), names longer than
     93    1.1       cgd  * a maximum length of NCHNAMLEN are not cached; they occur
     94    1.1       cgd  * infrequently in any case, and are almost never of interest.
     95    1.1       cgd  *
     96    1.1       cgd  * Upon reaching the last segment of a path, if the reference
     97    1.1       cgd  * is for DELETE, or NOCACHE is set (rewrite), and the
     98    1.1       cgd  * name is located in the cache, it will be dropped.
     99   1.20  jdolecek  * The entry is dropped also when it was not possible to lock
    100   1.20  jdolecek  * the cached vnode, either because vget() failed or the generation
    101   1.20  jdolecek  * number has changed while waiting for the lock.
    102    1.1       cgd  */
    103    1.1       cgd 
    104    1.1       cgd /*
    105  1.102    dennis  * The locking in this subsystem works as follows:
    106  1.102    dennis  *
    107  1.102    dennis  * When an entry is added to the cache, via cache_enter(),
    108  1.102    dennis  * namecache_lock is taken to exclude other writers.  The new
    109  1.102    dennis  * entry is added to the hash list in a way which permits
    110  1.102    dennis  * concurrent lookups and invalidations in the cache done on
    111  1.102    dennis  * other CPUs to continue in parallel.
    112  1.102    dennis  *
    113  1.102    dennis  * When a lookup is done in the cache, via cache_lookup() or
    114  1.102    dennis  * cache_lookup_raw(), the per-cpu lock below is taken.  This
    115  1.102    dennis  * protects calls to cache_lookup_entry() and cache_invalidate()
    116  1.102    dennis  * against cache_reclaim() but allows lookups to continue in
    117  1.102    dennis  * parallel with cache_enter().
    118  1.102    dennis  *
    119  1.102    dennis  * cache_revlookup() takes namecache_lock to exclude cache_enter()
    120  1.102    dennis  * and cache_reclaim() since the list it operates on is not
    121  1.102    dennis  * maintained to allow concurrent reads.
    122  1.102    dennis  *
    123  1.102    dennis  * When cache_reclaim() is called namecache_lock is held to hold
    124  1.102    dennis  * off calls to cache_enter()/cache_revlookup() and each of the
    125  1.102    dennis  * per-cpu locks is taken to hold off lookups.  Holding all these
    126  1.102    dennis  * locks essentially idles the subsystem, ensuring there are no
    127  1.102    dennis  * concurrent references to the cache entries being freed.
    128  1.102    dennis  *
    129  1.102    dennis  * As a side effect while running cache_reclaim(), once the per-cpu
    130  1.102    dennis  * locks are held the per-cpu stats are sampled, added to the
    131  1.102    dennis  * subsystem total and zeroed.  Only some of the per-cpu stats are
    132  1.102    dennis  * incremented with the per-cpu lock held, however, and attempting
    133  1.102    dennis  * to add locking around the remaining counters currently
    134  1.102    dennis  * incremented without a lock can cause deadlock, so we don't
    135  1.102    dennis  * do that.  XXX Fix this up in a later revision.
    136  1.102    dennis  *
    137  1.102    dennis  * Per-cpu namecache data is defined next.
    138   1.77        ad  */
    139   1.77        ad struct nchcpu {
    140   1.77        ad 	kmutex_t	cpu_lock;
    141   1.77        ad 	struct nchstats	cpu_stats;
    142   1.77        ad };
    143   1.77        ad 
    144   1.77        ad /*
    145   1.90  dholland  * The type for the hash code. While the hash function generates a
    146   1.90  dholland  * u32, the hash code has historically been passed around as a u_long,
    147   1.90  dholland  * and the value is modified by xor'ing a uintptr_t, so it's not
    148   1.90  dholland  * entirely clear what the best type is. For now I'll leave it
    149   1.90  dholland  * unchanged as u_long.
    150   1.90  dholland  */
    151   1.90  dholland 
    152   1.90  dholland typedef u_long nchash_t;
    153   1.90  dholland 
    154   1.90  dholland /*
    155    1.1       cgd  * Structures associated with name cacheing.
    156    1.1       cgd  */
    157   1.89     rmind 
    158   1.89     rmind static kmutex_t *namecache_lock __read_mostly;
    159   1.89     rmind static pool_cache_t namecache_cache __read_mostly;
    160   1.89     rmind static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned;
    161   1.89     rmind 
    162   1.89     rmind static LIST_HEAD(nchashhead, namecache) *nchashtbl __read_mostly;
    163   1.89     rmind static u_long	nchash __read_mostly;
    164   1.89     rmind 
    165   1.90  dholland #define	NCHASH2(hash, dvp)	\
    166   1.90  dholland 	(((hash) ^ ((uintptr_t)(dvp) >> 3)) & nchash)
    167   1.19  sommerfe 
    168   1.89     rmind static LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl __read_mostly;
    169   1.89     rmind static u_long	ncvhash __read_mostly;
    170   1.89     rmind 
    171   1.48      yamt #define	NCVHASH(vp)		(((uintptr_t)(vp) >> 3) & ncvhash)
    172   1.19  sommerfe 
    173   1.89     rmind /* Number of cache entries allocated. */
    174   1.89     rmind static long	numcache __cacheline_aligned;
    175   1.73        ad 
    176   1.89     rmind /* Garbage collection queue and number of entries pending in it. */
    177   1.89     rmind static void	*cache_gcqueue;
    178   1.89     rmind static u_int	cache_gcpend;
    179   1.89     rmind 
    180   1.89     rmind /* Cache effectiveness statistics. */
    181   1.89     rmind struct nchstats	nchstats __cacheline_aligned;
    182   1.77        ad #define	COUNT(c,x)	(c.x++)
    183   1.38   thorpej 
    184   1.89     rmind static const int cache_lowat = 95;
    185   1.89     rmind static const int cache_hiwat = 98;
    186   1.89     rmind static const int cache_hottime = 5;	/* number of seconds */
    187   1.89     rmind static int doingcache = 1;		/* 1 => enable the cache */
    188    1.1       cgd 
    189   1.73        ad static struct evcnt cache_ev_scan;
    190   1.73        ad static struct evcnt cache_ev_gc;
    191   1.73        ad static struct evcnt cache_ev_over;
    192   1.73        ad static struct evcnt cache_ev_under;
    193   1.73        ad static struct evcnt cache_ev_forced;
    194   1.73        ad 
    195   1.73        ad static void cache_invalidate(struct namecache *);
    196   1.89     rmind static struct namecache *cache_lookup_entry(
    197   1.91  dholland     const struct vnode *, const char *, size_t);
    198   1.73        ad static void cache_thread(void *);
    199   1.73        ad static void cache_invalidate(struct namecache *);
    200   1.73        ad static void cache_disassociate(struct namecache *);
    201   1.73        ad static void cache_reclaim(void);
    202   1.73        ad static int cache_ctor(void *, void *, int);
    203   1.73        ad static void cache_dtor(void *, void *);
    204   1.46      yamt 
    205   1.73        ad /*
    206   1.90  dholland  * Compute the hash for an entry.
    207   1.90  dholland  *
    208   1.90  dholland  * (This is for now a wrapper around namei_hash, whose interface is
    209   1.90  dholland  * for the time being slightly inconvenient.)
    210   1.90  dholland  */
    211   1.90  dholland static nchash_t
    212   1.91  dholland cache_hash(const char *name, size_t namelen)
    213   1.90  dholland {
    214   1.90  dholland 	const char *endptr;
    215   1.90  dholland 
    216   1.91  dholland 	endptr = name + namelen;
    217   1.91  dholland 	return namei_hash(name, &endptr);
    218   1.90  dholland }
    219   1.90  dholland 
    220   1.90  dholland /*
    221   1.73        ad  * Invalidate a cache entry and enqueue it for garbage collection.
    222   1.73        ad  */
    223   1.46      yamt static void
    224   1.73        ad cache_invalidate(struct namecache *ncp)
    225   1.46      yamt {
    226   1.73        ad 	void *head;
    227   1.46      yamt 
    228   1.73        ad 	KASSERT(mutex_owned(&ncp->nc_lock));
    229   1.46      yamt 
    230   1.73        ad 	if (ncp->nc_dvp != NULL) {
    231   1.73        ad 		ncp->nc_vp = NULL;
    232   1.73        ad 		ncp->nc_dvp = NULL;
    233   1.73        ad 		do {
    234   1.73        ad 			head = cache_gcqueue;
    235   1.73        ad 			ncp->nc_gcqueue = head;
    236   1.73        ad 		} while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
    237   1.73        ad 		atomic_inc_uint(&cache_gcpend);
    238   1.73        ad 	}
    239   1.73        ad }
    240   1.46      yamt 
    241   1.73        ad /*
    242   1.73        ad  * Disassociate a namecache entry from any vnodes it is attached to,
    243   1.73        ad  * and remove from the global LRU list.
    244   1.73        ad  */
    245   1.73        ad static void
    246   1.73        ad cache_disassociate(struct namecache *ncp)
    247   1.73        ad {
    248   1.73        ad 
    249   1.73        ad 	KASSERT(mutex_owned(namecache_lock));
    250   1.73        ad 	KASSERT(ncp->nc_dvp == NULL);
    251   1.73        ad 
    252   1.73        ad 	if (ncp->nc_lru.tqe_prev != NULL) {
    253   1.73        ad 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
    254   1.73        ad 		ncp->nc_lru.tqe_prev = NULL;
    255   1.46      yamt 	}
    256   1.46      yamt 	if (ncp->nc_vhash.le_prev != NULL) {
    257   1.46      yamt 		LIST_REMOVE(ncp, nc_vhash);
    258   1.46      yamt 		ncp->nc_vhash.le_prev = NULL;
    259   1.46      yamt 	}
    260   1.46      yamt 	if (ncp->nc_vlist.le_prev != NULL) {
    261   1.46      yamt 		LIST_REMOVE(ncp, nc_vlist);
    262   1.46      yamt 		ncp->nc_vlist.le_prev = NULL;
    263   1.46      yamt 	}
    264   1.46      yamt 	if (ncp->nc_dvlist.le_prev != NULL) {
    265   1.46      yamt 		LIST_REMOVE(ncp, nc_dvlist);
    266   1.46      yamt 		ncp->nc_dvlist.le_prev = NULL;
    267   1.46      yamt 	}
    268   1.46      yamt }
    269   1.46      yamt 
    270   1.73        ad /*
    271   1.73        ad  * Lock all CPUs to prevent any cache lookup activity.  Conceptually,
    272   1.73        ad  * this locks out all "readers".
    273   1.73        ad  */
    274   1.96     joerg #define	UPDATE(f) do { \
    275   1.96     joerg 	nchstats.f += cpup->cpu_stats.f; \
    276   1.96     joerg 	cpup->cpu_stats.f = 0; \
    277   1.96     joerg } while (/* CONSTCOND */ 0)
    278   1.96     joerg 
    279   1.46      yamt static void
    280   1.73        ad cache_lock_cpus(void)
    281   1.46      yamt {
    282   1.73        ad 	CPU_INFO_ITERATOR cii;
    283   1.73        ad 	struct cpu_info *ci;
    284   1.77        ad 	struct nchcpu *cpup;
    285   1.46      yamt 
    286   1.73        ad 	for (CPU_INFO_FOREACH(cii, ci)) {
    287   1.77        ad 		cpup = ci->ci_data.cpu_nch;
    288   1.77        ad 		mutex_enter(&cpup->cpu_lock);
    289   1.96     joerg 		UPDATE(ncs_goodhits);
    290   1.96     joerg 		UPDATE(ncs_neghits);
    291   1.96     joerg 		UPDATE(ncs_badhits);
    292   1.96     joerg 		UPDATE(ncs_falsehits);
    293   1.96     joerg 		UPDATE(ncs_miss);
    294   1.96     joerg 		UPDATE(ncs_long);
    295   1.96     joerg 		UPDATE(ncs_pass2);
    296   1.96     joerg 		UPDATE(ncs_2passes);
    297   1.96     joerg 		UPDATE(ncs_revhits);
    298   1.96     joerg 		UPDATE(ncs_revmiss);
    299   1.73        ad 	}
    300   1.46      yamt }
    301   1.46      yamt 
    302   1.96     joerg #undef UPDATE
    303   1.96     joerg 
    304   1.73        ad /*
    305   1.73        ad  * Release all CPU locks.
    306   1.73        ad  */
    307   1.73        ad static void
    308   1.73        ad cache_unlock_cpus(void)
    309   1.73        ad {
    310   1.73        ad 	CPU_INFO_ITERATOR cii;
    311   1.73        ad 	struct cpu_info *ci;
    312   1.77        ad 	struct nchcpu *cpup;
    313   1.73        ad 
    314   1.73        ad 	for (CPU_INFO_FOREACH(cii, ci)) {
    315   1.77        ad 		cpup = ci->ci_data.cpu_nch;
    316   1.77        ad 		mutex_exit(&cpup->cpu_lock);
    317   1.73        ad 	}
    318   1.73        ad }
    319   1.73        ad 
    320   1.73        ad /*
    321  1.102    dennis  * Find a single cache entry and return it locked.  'namecache_lock' or
    322  1.102    dennis  * at least one of the per-CPU locks must be held.
    323   1.73        ad  */
    324   1.73        ad static struct namecache *
    325   1.91  dholland cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen)
    326   1.55      yamt {
    327   1.55      yamt 	struct nchashhead *ncpp;
    328   1.55      yamt 	struct namecache *ncp;
    329   1.90  dholland 	nchash_t hash;
    330   1.55      yamt 
    331   1.84      yamt 	KASSERT(dvp != NULL);
    332   1.91  dholland 	hash = cache_hash(name, namelen);
    333   1.90  dholland 	ncpp = &nchashtbl[NCHASH2(hash, dvp)];
    334   1.55      yamt 
    335   1.55      yamt 	LIST_FOREACH(ncp, ncpp, nc_hash) {
    336   1.73        ad 		if (ncp->nc_dvp != dvp ||
    337   1.91  dholland 		    ncp->nc_nlen != namelen ||
    338   1.91  dholland 		    memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen))
    339   1.73        ad 		    	continue;
    340   1.73        ad 	    	mutex_enter(&ncp->nc_lock);
    341   1.77        ad 		if (__predict_true(ncp->nc_dvp == dvp)) {
    342   1.73        ad 			ncp->nc_hittime = hardclock_ticks;
    343   1.73        ad 			return ncp;
    344   1.73        ad 		}
    345   1.73        ad 		/* Raced: entry has been nullified. */
    346   1.73        ad 		mutex_exit(&ncp->nc_lock);
    347   1.55      yamt 	}
    348   1.55      yamt 
    349   1.73        ad 	return NULL;
    350   1.55      yamt }
    351   1.55      yamt 
    352    1.1       cgd /*
    353    1.1       cgd  * Look for a the name in the cache. We don't do this
    354    1.1       cgd  * if the segment name is long, simply so the cache can avoid
    355    1.1       cgd  * holding long names (which would either waste space, or
    356    1.1       cgd  * add greatly to the complexity).
    357    1.1       cgd  *
    358   1.90  dholland  * Lookup is called with DVP pointing to the directory to search,
    359   1.90  dholland  * and CNP providing the name of the entry being sought: cn_nameptr
    360   1.90  dholland  * is the name, cn_namelen is its length, and cn_flags is the flags
    361   1.90  dholland  * word from the namei operation.
    362   1.90  dholland  *
    363   1.90  dholland  * DVP must be locked.
    364   1.90  dholland  *
    365   1.90  dholland  * There are three possible non-error return states:
    366   1.90  dholland  *    1. Nothing was found in the cache. Nothing is known about
    367   1.90  dholland  *       the requested name.
    368   1.90  dholland  *    2. A negative entry was found in the cache, meaning that the
    369   1.90  dholland  *       requested name definitely does not exist.
    370   1.90  dholland  *    3. A positive entry was found in the cache, meaning that the
    371   1.90  dholland  *       requested name does exist and that we are providing the
    372   1.90  dholland  *       vnode.
    373   1.90  dholland  * In these cases the results are:
    374   1.90  dholland  *    1. 0 returned; VN is set to NULL.
    375   1.90  dholland  *    2. 1 returned; VN is set to NULL.
    376   1.90  dholland  *    3. 1 returned; VN is set to the vnode found.
    377   1.90  dholland  *
    378   1.90  dholland  * The additional result argument ISWHT is set to zero, unless a
    379   1.90  dholland  * negative entry is found that was entered as a whiteout, in which
    380   1.90  dholland  * case ISWHT is set to one.
    381   1.90  dholland  *
    382   1.90  dholland  * The ISWHT_RET argument pointer may be null. In this case an
    383   1.90  dholland  * assertion is made that the whiteout flag is not set. File systems
    384   1.90  dholland  * that do not support whiteouts can/should do this.
    385   1.90  dholland  *
    386   1.90  dholland  * Filesystems that do support whiteouts should add ISWHITEOUT to
    387   1.90  dholland  * cnp->cn_flags if ISWHT comes back nonzero.
    388   1.90  dholland  *
    389   1.90  dholland  * When a vnode is returned, it is locked, as per the vnode lookup
    390   1.90  dholland  * locking protocol.
    391   1.90  dholland  *
    392   1.90  dholland  * There is no way for this function to fail, in the sense of
    393   1.90  dholland  * generating an error that requires aborting the namei operation.
    394   1.90  dholland  *
    395   1.90  dholland  * (Prior to October 2012, this function returned an integer status,
    396   1.90  dholland  * and a vnode, and mucked with the flags word in CNP for whiteouts.
    397   1.90  dholland  * The integer status was -1 for "nothing found", ENOENT for "a
    398   1.90  dholland  * negative entry found", 0 for "a positive entry found", and possibly
    399   1.90  dholland  * other errors, and the value of VN might or might not have been set
    400   1.90  dholland  * depending on what error occurred.)
    401    1.1       cgd  */
    402    1.5   mycroft int
    403   1.91  dholland cache_lookup(struct vnode *dvp, const char *name, size_t namelen,
    404   1.91  dholland 	     uint32_t nameiop, uint32_t cnflags,
    405   1.90  dholland 	     int *iswht_ret, struct vnode **vn_ret)
    406    1.1       cgd {
    407   1.23  augustss 	struct namecache *ncp;
    408   1.20  jdolecek 	struct vnode *vp;
    409   1.77        ad 	struct nchcpu *cpup;
    410  1.101  christos 	int error;
    411    1.1       cgd 
    412   1.90  dholland 	/* Establish default result values */
    413   1.90  dholland 	if (iswht_ret != NULL) {
    414   1.90  dholland 		*iswht_ret = 0;
    415   1.90  dholland 	}
    416   1.90  dholland 	*vn_ret = NULL;
    417   1.90  dholland 
    418   1.77        ad 	if (__predict_false(!doingcache)) {
    419   1.90  dholland 		return 0;
    420    1.8       cgd 	}
    421   1.39        pk 
    422   1.77        ad 	cpup = curcpu()->ci_data.cpu_nch;
    423  1.102    dennis 	mutex_enter(&cpup->cpu_lock);
    424   1.91  dholland 	if (__predict_false(namelen > NCHNAMLEN)) {
    425   1.77        ad 		COUNT(cpup->cpu_stats, ncs_long);
    426   1.77        ad 		mutex_exit(&cpup->cpu_lock);
    427   1.90  dholland 		/* found nothing */
    428   1.90  dholland 		return 0;
    429    1.1       cgd 	}
    430   1.91  dholland 	ncp = cache_lookup_entry(dvp, name, namelen);
    431   1.77        ad 	if (__predict_false(ncp == NULL)) {
    432   1.77        ad 		COUNT(cpup->cpu_stats, ncs_miss);
    433   1.77        ad 		mutex_exit(&cpup->cpu_lock);
    434   1.90  dholland 		/* found nothing */
    435   1.90  dholland 		return 0;
    436    1.1       cgd 	}
    437   1.91  dholland 	if ((cnflags & MAKEENTRY) == 0) {
    438   1.77        ad 		COUNT(cpup->cpu_stats, ncs_badhits);
    439   1.77        ad 		/*
    440   1.77        ad 		 * Last component and we are renaming or deleting,
    441   1.77        ad 		 * the cache entry is invalid, or otherwise don't
    442   1.77        ad 		 * want cache entry to exist.
    443   1.77        ad 		 */
    444   1.77        ad 		cache_invalidate(ncp);
    445   1.77        ad 		mutex_exit(&ncp->nc_lock);
    446  1.102    dennis 		mutex_exit(&cpup->cpu_lock);
    447   1.90  dholland 		/* found nothing */
    448   1.90  dholland 		return 0;
    449   1.90  dholland 	}
    450   1.90  dholland 	if (ncp->nc_vp == NULL) {
    451   1.90  dholland 		if (iswht_ret != NULL) {
    452   1.90  dholland 			/*
    453   1.90  dholland 			 * Restore the ISWHITEOUT flag saved earlier.
    454   1.90  dholland 			 */
    455   1.90  dholland 			KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
    456   1.90  dholland 			*iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
    457   1.90  dholland 		} else {
    458   1.90  dholland 			KASSERT(ncp->nc_flags == 0);
    459   1.90  dholland 		}
    460   1.90  dholland 
    461   1.91  dholland 		if (__predict_true(nameiop != CREATE ||
    462   1.91  dholland 		    (cnflags & ISLASTCN) == 0)) {
    463   1.77        ad 			COUNT(cpup->cpu_stats, ncs_neghits);
    464  1.102    dennis 			mutex_exit(&ncp->nc_lock);
    465  1.101  christos 			mutex_exit(&cpup->cpu_lock);
    466   1.90  dholland 			/* found neg entry; vn is already null from above */
    467  1.101  christos 			return 1;
    468   1.20  jdolecek 		} else {
    469   1.77        ad 			COUNT(cpup->cpu_stats, ncs_badhits);
    470   1.77        ad 			/*
    471   1.77        ad 			 * Last component and we are renaming or
    472   1.77        ad 			 * deleting, the cache entry is invalid,
    473   1.77        ad 			 * or otherwise don't want cache entry to
    474   1.77        ad 			 * exist.
    475   1.77        ad 			 */
    476   1.77        ad 			cache_invalidate(ncp);
    477  1.101  christos 			mutex_exit(&ncp->nc_lock);
    478  1.102    dennis 			mutex_exit(&cpup->cpu_lock);
    479   1.90  dholland 			/* found nothing */
    480  1.101  christos 			return 0;
    481   1.20  jdolecek 		}
    482   1.20  jdolecek 	}
    483   1.20  jdolecek 
    484   1.20  jdolecek 	vp = ncp->nc_vp;
    485   1.92   hannken 	mutex_enter(vp->v_interlock);
    486   1.92   hannken 	mutex_exit(&ncp->nc_lock);
    487  1.102    dennis 	mutex_exit(&cpup->cpu_lock);
    488   1.92   hannken 	error = vget(vp, LK_NOWAIT);
    489   1.92   hannken 	if (error) {
    490   1.92   hannken 		KASSERT(error == EBUSY);
    491   1.92   hannken 		/*
    492   1.92   hannken 		 * This vnode is being cleaned out.
    493   1.92   hannken 		 * XXX badhits?
    494   1.92   hannken 		 */
    495   1.92   hannken 		COUNT(cpup->cpu_stats, ncs_falsehits);
    496   1.92   hannken 		/* found nothing */
    497  1.101  christos 		return 0;
    498   1.77        ad 	}
    499  1.101  christos 
    500  1.101  christos #ifdef DEBUG
    501  1.101  christos 	/*
    502  1.101  christos 	 * since we released nb->nb_lock,
    503  1.101  christos 	 * we can't use this pointer any more.
    504  1.101  christos 	 */
    505  1.101  christos 	ncp = NULL;
    506  1.101  christos #endif /* DEBUG */
    507  1.101  christos 
    508  1.101  christos 	/* We don't have the right lock, but this is only for stats. */
    509  1.101  christos 	COUNT(cpup->cpu_stats, ncs_goodhits);
    510   1.90  dholland 
    511  1.101  christos 	/* found it */
    512  1.101  christos 	*vn_ret = vp;
    513  1.101  christos 	return 1;
    514    1.1       cgd }
    515    1.1       cgd 
    516   1.61      yamt int
    517   1.91  dholland cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen,
    518   1.91  dholland 		 uint32_t cnflags,
    519   1.90  dholland 		 int *iswht_ret, struct vnode **vn_ret)
    520   1.61      yamt {
    521   1.61      yamt 	struct namecache *ncp;
    522   1.61      yamt 	struct vnode *vp;
    523   1.77        ad 	struct nchcpu *cpup;
    524  1.101  christos 	int error;
    525   1.61      yamt 
    526   1.90  dholland 	/* Establish default results. */
    527   1.90  dholland 	if (iswht_ret != NULL) {
    528   1.90  dholland 		*iswht_ret = 0;
    529   1.90  dholland 	}
    530   1.90  dholland 	*vn_ret = NULL;
    531   1.90  dholland 
    532   1.77        ad 	if (__predict_false(!doingcache)) {
    533   1.90  dholland 		/* found nothing */
    534   1.90  dholland 		return 0;
    535   1.61      yamt 	}
    536   1.61      yamt 
    537   1.77        ad 	cpup = curcpu()->ci_data.cpu_nch;
    538  1.102    dennis 	mutex_enter(&cpup->cpu_lock);
    539   1.91  dholland 	if (__predict_false(namelen > NCHNAMLEN)) {
    540   1.77        ad 		COUNT(cpup->cpu_stats, ncs_long);
    541   1.77        ad 		mutex_exit(&cpup->cpu_lock);
    542   1.90  dholland 		/* found nothing */
    543   1.90  dholland 		return 0;
    544   1.61      yamt 	}
    545   1.91  dholland 	ncp = cache_lookup_entry(dvp, name, namelen);
    546   1.77        ad 	if (__predict_false(ncp == NULL)) {
    547   1.77        ad 		COUNT(cpup->cpu_stats, ncs_miss);
    548   1.77        ad 		mutex_exit(&cpup->cpu_lock);
    549   1.90  dholland 		/* found nothing */
    550   1.90  dholland 		return 0;
    551   1.61      yamt 	}
    552   1.61      yamt 	vp = ncp->nc_vp;
    553   1.61      yamt 	if (vp == NULL) {
    554   1.61      yamt 		/*
    555   1.61      yamt 		 * Restore the ISWHITEOUT flag saved earlier.
    556   1.61      yamt 		 */
    557   1.90  dholland 		if (iswht_ret != NULL) {
    558   1.90  dholland 			KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
    559   1.90  dholland 			/*cnp->cn_flags |= ncp->nc_flags;*/
    560   1.90  dholland 			*iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
    561   1.90  dholland 		}
    562   1.77        ad 		COUNT(cpup->cpu_stats, ncs_neghits);
    563  1.102    dennis 		mutex_exit(&ncp->nc_lock);
    564  1.101  christos 		mutex_exit(&cpup->cpu_lock);
    565   1.90  dholland 		/* found negative entry; vn is already null from above */
    566   1.90  dholland 		return 1;
    567   1.61      yamt 	}
    568   1.92   hannken 	mutex_enter(vp->v_interlock);
    569   1.92   hannken 	mutex_exit(&ncp->nc_lock);
    570  1.102    dennis 	mutex_exit(&cpup->cpu_lock);
    571   1.92   hannken 	error = vget(vp, LK_NOWAIT);
    572   1.92   hannken 	if (error) {
    573   1.92   hannken 		KASSERT(error == EBUSY);
    574   1.92   hannken 		/*
    575   1.92   hannken 		 * This vnode is being cleaned out.
    576   1.92   hannken 		 * XXX badhits?
    577   1.92   hannken 		 */
    578   1.92   hannken 		COUNT(cpup->cpu_stats, ncs_falsehits);
    579   1.92   hannken 		/* found nothing */
    580  1.101  christos 		return 0;
    581   1.61      yamt 	}
    582  1.101  christos 
    583  1.101  christos 	/* Unlocked, but only for stats. */
    584  1.101  christos 	COUNT(cpup->cpu_stats, ncs_goodhits); /* XXX can be "badhits" */
    585   1.90  dholland 
    586  1.101  christos 	/* found it */
    587  1.101  christos 	*vn_ret = vp;
    588  1.101  christos 	return 1;
    589   1.61      yamt }
    590   1.61      yamt 
    591    1.1       cgd /*
    592   1.19  sommerfe  * Scan cache looking for name of directory entry pointing at vp.
    593   1.19  sommerfe  *
    594   1.86   hannken  * If the lookup succeeds the vnode is referenced and stored in dvpp.
    595   1.19  sommerfe  *
    596   1.19  sommerfe  * If bufp is non-NULL, also place the name in the buffer which starts
    597   1.19  sommerfe  * at bufp, immediately before *bpp, and move bpp backwards to point
    598   1.19  sommerfe  * at the start of it.  (Yes, this is a little baroque, but it's done
    599   1.19  sommerfe  * this way to cater to the whims of getcwd).
    600   1.19  sommerfe  *
    601   1.19  sommerfe  * Returns 0 on success, -1 on cache miss, positive errno on failure.
    602   1.19  sommerfe  */
    603   1.19  sommerfe int
    604   1.34     enami cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
    605   1.19  sommerfe {
    606   1.19  sommerfe 	struct namecache *ncp;
    607   1.19  sommerfe 	struct vnode *dvp;
    608   1.95     joerg 	struct nchcpu *cpup;
    609   1.19  sommerfe 	struct ncvhashhead *nvcpp;
    610   1.34     enami 	char *bp;
    611   1.86   hannken 	int error, nlen;
    612   1.34     enami 
    613   1.19  sommerfe 	if (!doingcache)
    614   1.19  sommerfe 		goto out;
    615   1.19  sommerfe 
    616   1.30       chs 	nvcpp = &ncvhashtbl[NCVHASH(vp)];
    617   1.95     joerg 	cpup = curcpu()->ci_data.cpu_nch;
    618   1.19  sommerfe 
    619   1.73        ad 	mutex_enter(namecache_lock);
    620   1.27       chs 	LIST_FOREACH(ncp, nvcpp, nc_vhash) {
    621   1.73        ad 		mutex_enter(&ncp->nc_lock);
    622   1.34     enami 		if (ncp->nc_vp == vp &&
    623   1.34     enami 		    (dvp = ncp->nc_dvp) != NULL &&
    624   1.47      yamt 		    dvp != vp) { 		/* avoid pesky . entries.. */
    625   1.34     enami 
    626   1.19  sommerfe #ifdef DIAGNOSTIC
    627   1.34     enami 			if (ncp->nc_nlen == 1 &&
    628   1.34     enami 			    ncp->nc_name[0] == '.')
    629   1.19  sommerfe 				panic("cache_revlookup: found entry for .");
    630   1.19  sommerfe 
    631   1.34     enami 			if (ncp->nc_nlen == 2 &&
    632   1.34     enami 			    ncp->nc_name[0] == '.' &&
    633   1.34     enami 			    ncp->nc_name[1] == '.')
    634   1.19  sommerfe 				panic("cache_revlookup: found entry for ..");
    635   1.19  sommerfe #endif
    636   1.95     joerg 			COUNT(cpup->cpu_stats, ncs_revhits);
    637   1.86   hannken 			nlen = ncp->nc_nlen;
    638   1.19  sommerfe 
    639   1.19  sommerfe 			if (bufp) {
    640   1.19  sommerfe 				bp = *bpp;
    641   1.86   hannken 				bp -= nlen;
    642   1.19  sommerfe 				if (bp <= bufp) {
    643   1.34     enami 					*dvpp = NULL;
    644   1.73        ad 					mutex_exit(&ncp->nc_lock);
    645   1.73        ad 					mutex_exit(namecache_lock);
    646   1.34     enami 					return (ERANGE);
    647   1.19  sommerfe 				}
    648   1.86   hannken 				memcpy(bp, ncp->nc_name, nlen);
    649   1.19  sommerfe 				*bpp = bp;
    650   1.19  sommerfe 			}
    651   1.34     enami 
    652   1.92   hannken 			mutex_enter(dvp->v_interlock);
    653   1.92   hannken 			mutex_exit(&ncp->nc_lock);
    654   1.92   hannken 			mutex_exit(namecache_lock);
    655   1.92   hannken 			error = vget(dvp, LK_NOWAIT);
    656   1.92   hannken 			if (error) {
    657   1.92   hannken 				KASSERT(error == EBUSY);
    658   1.92   hannken 				if (bufp)
    659   1.92   hannken 					(*bpp) += nlen;
    660   1.92   hannken 				*dvpp = NULL;
    661   1.92   hannken 				return -1;
    662   1.86   hannken 			}
    663   1.19  sommerfe 			*dvpp = dvp;
    664   1.34     enami 			return (0);
    665   1.19  sommerfe 		}
    666   1.73        ad 		mutex_exit(&ncp->nc_lock);
    667   1.19  sommerfe 	}
    668   1.95     joerg 	COUNT(cpup->cpu_stats, ncs_revmiss);
    669   1.73        ad 	mutex_exit(namecache_lock);
    670   1.19  sommerfe  out:
    671   1.34     enami 	*dvpp = NULL;
    672   1.34     enami 	return (-1);
    673   1.19  sommerfe }
    674   1.19  sommerfe 
    675   1.19  sommerfe /*
    676    1.1       cgd  * Add an entry to the cache
    677    1.1       cgd  */
    678   1.13  christos void
    679   1.91  dholland cache_enter(struct vnode *dvp, struct vnode *vp,
    680   1.91  dholland 	    const char *name, size_t namelen, uint32_t cnflags)
    681    1.1       cgd {
    682   1.23  augustss 	struct namecache *ncp;
    683   1.59      yamt 	struct namecache *oncp;
    684   1.23  augustss 	struct nchashhead *ncpp;
    685   1.23  augustss 	struct ncvhashhead *nvcpp;
    686   1.90  dholland 	nchash_t hash;
    687    1.1       cgd 
    688   1.89     rmind 	/* First, check whether we can/should add a cache entry. */
    689   1.91  dholland 	if ((cnflags & MAKEENTRY) == 0 ||
    690   1.91  dholland 	    __predict_false(namelen > NCHNAMLEN || !doingcache)) {
    691    1.1       cgd 		return;
    692   1.89     rmind 	}
    693   1.58      yamt 
    694   1.73        ad 	if (numcache > desiredvnodes) {
    695   1.73        ad 		mutex_enter(namecache_lock);
    696   1.73        ad 		cache_ev_forced.ev_count++;
    697   1.73        ad 		cache_reclaim();
    698   1.73        ad 		mutex_exit(namecache_lock);
    699   1.39        pk 	}
    700   1.57        pk 
    701   1.73        ad 	ncp = pool_cache_get(namecache_cache, PR_WAITOK);
    702   1.73        ad 	mutex_enter(namecache_lock);
    703   1.73        ad 	numcache++;
    704   1.73        ad 
    705   1.59      yamt 	/*
    706   1.59      yamt 	 * Concurrent lookups in the same directory may race for a
    707   1.59      yamt 	 * cache entry.  if there's a duplicated entry, free it.
    708   1.59      yamt 	 */
    709   1.91  dholland 	oncp = cache_lookup_entry(dvp, name, namelen);
    710   1.59      yamt 	if (oncp) {
    711   1.73        ad 		cache_invalidate(oncp);
    712   1.73        ad 		mutex_exit(&oncp->nc_lock);
    713   1.59      yamt 	}
    714   1.59      yamt 
    715   1.34     enami 	/* Grab the vnode we just found. */
    716   1.73        ad 	mutex_enter(&ncp->nc_lock);
    717    1.5   mycroft 	ncp->nc_vp = vp;
    718   1.73        ad 	ncp->nc_flags = 0;
    719   1.73        ad 	ncp->nc_hittime = 0;
    720   1.73        ad 	ncp->nc_gcqueue = NULL;
    721   1.47      yamt 	if (vp == NULL) {
    722   1.11   mycroft 		/*
    723   1.11   mycroft 		 * For negative hits, save the ISWHITEOUT flag so we can
    724   1.11   mycroft 		 * restore it later when the cache entry is used again.
    725   1.11   mycroft 		 */
    726   1.91  dholland 		ncp->nc_flags = cnflags & ISWHITEOUT;
    727   1.11   mycroft 	}
    728   1.89     rmind 
    729   1.34     enami 	/* Fill in cache info. */
    730    1.5   mycroft 	ncp->nc_dvp = dvp;
    731   1.46      yamt 	LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist);
    732   1.46      yamt 	if (vp)
    733   1.46      yamt 		LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist);
    734   1.73        ad 	else {
    735   1.73        ad 		ncp->nc_vlist.le_prev = NULL;
    736   1.73        ad 		ncp->nc_vlist.le_next = NULL;
    737   1.73        ad 	}
    738   1.91  dholland 	KASSERT(namelen <= NCHNAMLEN);
    739   1.91  dholland 	ncp->nc_nlen = namelen;
    740   1.91  dholland 	memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen);
    741   1.73        ad 	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
    742   1.91  dholland 	hash = cache_hash(name, namelen);
    743   1.90  dholland 	ncpp = &nchashtbl[NCHASH2(hash, dvp)];
    744   1.73        ad 
    745   1.73        ad 	/*
    746   1.73        ad 	 * Flush updates before making visible in table.  No need for a
    747   1.73        ad 	 * memory barrier on the other side: to see modifications the
    748   1.73        ad 	 * list must be followed, meaning a dependent pointer load.
    749   1.74        ad 	 * The below is LIST_INSERT_HEAD() inlined, with the memory
    750   1.74        ad 	 * barrier included in the correct place.
    751   1.73        ad 	 */
    752   1.74        ad 	if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL)
    753   1.74        ad 		ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next;
    754   1.74        ad 	ncp->nc_hash.le_prev = &ncpp->lh_first;
    755   1.73        ad 	membar_producer();
    756   1.74        ad 	ncpp->lh_first = ncp;
    757   1.19  sommerfe 
    758   1.34     enami 	ncp->nc_vhash.le_prev = NULL;
    759   1.34     enami 	ncp->nc_vhash.le_next = NULL;
    760   1.34     enami 
    761   1.19  sommerfe 	/*
    762   1.19  sommerfe 	 * Create reverse-cache entries (used in getcwd) for directories.
    763   1.66  christos 	 * (and in linux procfs exe node)
    764   1.19  sommerfe 	 */
    765   1.33     enami 	if (vp != NULL &&
    766   1.33     enami 	    vp != dvp &&
    767   1.29      fvdl #ifndef NAMECACHE_ENTER_REVERSE
    768   1.33     enami 	    vp->v_type == VDIR &&
    769   1.29      fvdl #endif
    770   1.33     enami 	    (ncp->nc_nlen > 2 ||
    771   1.33     enami 	    (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
    772   1.33     enami 	    (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) {
    773   1.30       chs 		nvcpp = &ncvhashtbl[NCVHASH(vp)];
    774   1.19  sommerfe 		LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
    775   1.19  sommerfe 	}
    776   1.73        ad 	mutex_exit(&ncp->nc_lock);
    777   1.73        ad 	mutex_exit(namecache_lock);
    778    1.1       cgd }
    779    1.1       cgd 
    780    1.1       cgd /*
    781    1.1       cgd  * Name cache initialization, from vfs_init() when we are booting
    782    1.1       cgd  */
    783   1.13  christos void
    784   1.34     enami nchinit(void)
    785    1.1       cgd {
    786   1.73        ad 	int error;
    787    1.1       cgd 
    788   1.89     rmind 	TAILQ_INIT(&nclruhead);
    789   1.73        ad 	namecache_cache = pool_cache_init(sizeof(struct namecache),
    790   1.73        ad 	    coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor,
    791   1.73        ad 	    cache_dtor, NULL);
    792   1.71        ad 	KASSERT(namecache_cache != NULL);
    793   1.71        ad 
    794   1.73        ad 	namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
    795   1.73        ad 
    796   1.76        ad 	nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash);
    797   1.26        ad 	ncvhashtbl =
    798   1.29      fvdl #ifdef NAMECACHE_ENTER_REVERSE
    799   1.76        ad 	    hashinit(desiredvnodes, HASH_LIST, true, &ncvhash);
    800   1.29      fvdl #else
    801   1.76        ad 	    hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash);
    802   1.29      fvdl #endif
    803   1.73        ad 
    804   1.73        ad 	error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread,
    805   1.73        ad 	    NULL, NULL, "cachegc");
    806   1.73        ad 	if (error != 0)
    807   1.73        ad 		panic("nchinit %d", error);
    808   1.73        ad 
    809   1.73        ad 	evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL,
    810   1.73        ad 	   "namecache", "entries scanned");
    811   1.73        ad 	evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL,
    812   1.73        ad 	   "namecache", "entries collected");
    813   1.73        ad 	evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL,
    814   1.73        ad 	   "namecache", "over scan target");
    815   1.73        ad 	evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL,
    816   1.73        ad 	   "namecache", "under scan target");
    817   1.73        ad 	evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL,
    818   1.73        ad 	   "namecache", "forced reclaims");
    819   1.73        ad }
    820   1.73        ad 
    821   1.73        ad static int
    822   1.73        ad cache_ctor(void *arg, void *obj, int flag)
    823   1.73        ad {
    824   1.73        ad 	struct namecache *ncp;
    825   1.73        ad 
    826   1.73        ad 	ncp = obj;
    827   1.73        ad 	mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE);
    828   1.73        ad 
    829   1.73        ad 	return 0;
    830   1.73        ad }
    831   1.73        ad 
    832   1.73        ad static void
    833   1.73        ad cache_dtor(void *arg, void *obj)
    834   1.73        ad {
    835   1.73        ad 	struct namecache *ncp;
    836   1.73        ad 
    837   1.73        ad 	ncp = obj;
    838   1.73        ad 	mutex_destroy(&ncp->nc_lock);
    839   1.73        ad }
    840   1.73        ad 
    841   1.73        ad /*
    842   1.73        ad  * Called once for each CPU in the system as attached.
    843   1.73        ad  */
    844   1.73        ad void
    845   1.73        ad cache_cpu_init(struct cpu_info *ci)
    846   1.73        ad {
    847   1.77        ad 	struct nchcpu *cpup;
    848   1.77        ad 	size_t sz;
    849   1.73        ad 
    850   1.77        ad 	sz = roundup2(sizeof(*cpup), coherency_unit) + coherency_unit;
    851   1.77        ad 	cpup = kmem_zalloc(sz, KM_SLEEP);
    852   1.77        ad 	cpup = (void *)roundup2((uintptr_t)cpup, coherency_unit);
    853   1.77        ad 	mutex_init(&cpup->cpu_lock, MUTEX_DEFAULT, IPL_NONE);
    854   1.77        ad 	ci->ci_data.cpu_nch = cpup;
    855   1.30       chs }
    856   1.30       chs 
    857   1.30       chs /*
    858   1.30       chs  * Name cache reinitialization, for when the maximum number of vnodes increases.
    859   1.30       chs  */
    860   1.30       chs void
    861   1.34     enami nchreinit(void)
    862   1.30       chs {
    863   1.30       chs 	struct namecache *ncp;
    864   1.30       chs 	struct nchashhead *oldhash1, *hash1;
    865   1.30       chs 	struct ncvhashhead *oldhash2, *hash2;
    866   1.36   thorpej 	u_long i, oldmask1, oldmask2, mask1, mask2;
    867   1.30       chs 
    868   1.76        ad 	hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1);
    869   1.30       chs 	hash2 =
    870   1.30       chs #ifdef NAMECACHE_ENTER_REVERSE
    871   1.76        ad 	    hashinit(desiredvnodes, HASH_LIST, true, &mask2);
    872   1.30       chs #else
    873   1.76        ad 	    hashinit(desiredvnodes/8, HASH_LIST, true, &mask2);
    874   1.30       chs #endif
    875   1.73        ad 	mutex_enter(namecache_lock);
    876   1.73        ad 	cache_lock_cpus();
    877   1.30       chs 	oldhash1 = nchashtbl;
    878   1.30       chs 	oldmask1 = nchash;
    879   1.30       chs 	nchashtbl = hash1;
    880   1.30       chs 	nchash = mask1;
    881   1.30       chs 	oldhash2 = ncvhashtbl;
    882   1.30       chs 	oldmask2 = ncvhash;
    883   1.30       chs 	ncvhashtbl = hash2;
    884   1.30       chs 	ncvhash = mask2;
    885   1.30       chs 	for (i = 0; i <= oldmask1; i++) {
    886   1.30       chs 		while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) {
    887   1.30       chs 			LIST_REMOVE(ncp, nc_hash);
    888   1.30       chs 			ncp->nc_hash.le_prev = NULL;
    889   1.30       chs 		}
    890   1.30       chs 	}
    891   1.30       chs 	for (i = 0; i <= oldmask2; i++) {
    892   1.30       chs 		while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) {
    893   1.30       chs 			LIST_REMOVE(ncp, nc_vhash);
    894   1.30       chs 			ncp->nc_vhash.le_prev = NULL;
    895   1.30       chs 		}
    896   1.30       chs 	}
    897   1.73        ad 	cache_unlock_cpus();
    898   1.73        ad 	mutex_exit(namecache_lock);
    899   1.76        ad 	hashdone(oldhash1, HASH_LIST, oldmask1);
    900   1.76        ad 	hashdone(oldhash2, HASH_LIST, oldmask2);
    901    1.1       cgd }
    902    1.1       cgd 
    903    1.1       cgd /*
    904    1.1       cgd  * Cache flush, a particular vnode; called when a vnode is renamed to
    905    1.1       cgd  * hide entries that would now be invalid
    906    1.1       cgd  */
    907   1.13  christos void
    908   1.91  dholland cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags)
    909    1.1       cgd {
    910   1.46      yamt 	struct namecache *ncp, *ncnext;
    911    1.1       cgd 
    912   1.73        ad 	mutex_enter(namecache_lock);
    913   1.55      yamt 	if (flags & PURGE_PARENTS) {
    914   1.55      yamt 		for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL;
    915   1.55      yamt 		    ncp = ncnext) {
    916   1.55      yamt 			ncnext = LIST_NEXT(ncp, nc_vlist);
    917   1.73        ad 			mutex_enter(&ncp->nc_lock);
    918   1.73        ad 			cache_invalidate(ncp);
    919   1.73        ad 			mutex_exit(&ncp->nc_lock);
    920   1.73        ad 			cache_disassociate(ncp);
    921   1.55      yamt 		}
    922   1.55      yamt 	}
    923   1.55      yamt 	if (flags & PURGE_CHILDREN) {
    924   1.55      yamt 		for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL;
    925   1.55      yamt 		    ncp = ncnext) {
    926   1.55      yamt 			ncnext = LIST_NEXT(ncp, nc_dvlist);
    927   1.73        ad 			mutex_enter(&ncp->nc_lock);
    928   1.73        ad 			cache_invalidate(ncp);
    929   1.73        ad 			mutex_exit(&ncp->nc_lock);
    930   1.73        ad 			cache_disassociate(ncp);
    931   1.55      yamt 		}
    932   1.46      yamt 	}
    933   1.91  dholland 	if (name != NULL) {
    934   1.91  dholland 		ncp = cache_lookup_entry(vp, name, namelen);
    935   1.55      yamt 		if (ncp) {
    936   1.73        ad 			cache_invalidate(ncp);
    937   1.83      yamt 			mutex_exit(&ncp->nc_lock);
    938   1.73        ad 			cache_disassociate(ncp);
    939   1.55      yamt 		}
    940   1.46      yamt 	}
    941   1.73        ad 	mutex_exit(namecache_lock);
    942    1.1       cgd }
    943    1.1       cgd 
    944    1.1       cgd /*
    945    1.1       cgd  * Cache flush, a whole filesystem; called when filesys is umounted to
    946   1.27       chs  * remove entries that would now be invalid.
    947    1.1       cgd  */
    948   1.13  christos void
    949   1.34     enami cache_purgevfs(struct mount *mp)
    950    1.1       cgd {
    951   1.23  augustss 	struct namecache *ncp, *nxtcp;
    952    1.1       cgd 
    953   1.73        ad 	mutex_enter(namecache_lock);
    954   1.73        ad 	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
    955   1.73        ad 		nxtcp = TAILQ_NEXT(ncp, nc_lru);
    956   1.73        ad 		mutex_enter(&ncp->nc_lock);
    957   1.73        ad 		if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) {
    958   1.73        ad 			/* Free the resources we had. */
    959   1.73        ad 			cache_invalidate(ncp);
    960   1.73        ad 			cache_disassociate(ncp);
    961   1.73        ad 		}
    962   1.73        ad 		mutex_exit(&ncp->nc_lock);
    963   1.73        ad 	}
    964   1.73        ad 	cache_reclaim();
    965   1.73        ad 	mutex_exit(namecache_lock);
    966   1.73        ad }
    967   1.73        ad 
    968   1.73        ad /*
    969   1.73        ad  * Scan global list invalidating entries until we meet a preset target.
    970   1.73        ad  * Prefer to invalidate entries that have not scored a hit within
    971   1.73        ad  * cache_hottime seconds.  We sort the LRU list only for this routine's
    972   1.73        ad  * benefit.
    973   1.73        ad  */
    974   1.73        ad static void
    975   1.73        ad cache_prune(int incache, int target)
    976   1.73        ad {
    977   1.73        ad 	struct namecache *ncp, *nxtcp, *sentinel;
    978   1.73        ad 	int items, recent, tryharder;
    979   1.73        ad 
    980   1.73        ad 	KASSERT(mutex_owned(namecache_lock));
    981   1.73        ad 
    982   1.73        ad 	items = 0;
    983   1.73        ad 	tryharder = 0;
    984   1.73        ad 	recent = hardclock_ticks - hz * cache_hottime;
    985   1.73        ad 	sentinel = NULL;
    986   1.27       chs 	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
    987   1.73        ad 		if (incache <= target)
    988   1.73        ad 			break;
    989   1.73        ad 		items++;
    990   1.27       chs 		nxtcp = TAILQ_NEXT(ncp, nc_lru);
    991   1.73        ad 		if (ncp == sentinel) {
    992   1.73        ad 			/*
    993   1.73        ad 			 * If we looped back on ourself, then ignore
    994   1.73        ad 			 * recent entries and purge whatever we find.
    995   1.73        ad 			 */
    996   1.73        ad 			tryharder = 1;
    997    1.5   mycroft 		}
    998   1.93   hannken 		if (ncp->nc_dvp == NULL)
    999   1.93   hannken 			continue;
   1000   1.81      yamt 		if (!tryharder && (ncp->nc_hittime - recent) > 0) {
   1001   1.73        ad 			if (sentinel == NULL)
   1002   1.73        ad 				sentinel = ncp;
   1003   1.73        ad 			TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
   1004   1.73        ad 			TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
   1005   1.73        ad 			continue;
   1006   1.73        ad 		}
   1007   1.73        ad 		mutex_enter(&ncp->nc_lock);
   1008   1.73        ad 		if (ncp->nc_dvp != NULL) {
   1009   1.73        ad 			cache_invalidate(ncp);
   1010   1.73        ad 			cache_disassociate(ncp);
   1011   1.73        ad 			incache--;
   1012   1.73        ad 		}
   1013   1.73        ad 		mutex_exit(&ncp->nc_lock);
   1014   1.73        ad 	}
   1015   1.73        ad 	cache_ev_scan.ev_count += items;
   1016   1.73        ad }
   1017   1.73        ad 
   1018   1.73        ad /*
   1019   1.73        ad  * Collect dead cache entries from all CPUs and garbage collect.
   1020   1.73        ad  */
   1021   1.73        ad static void
   1022   1.73        ad cache_reclaim(void)
   1023   1.73        ad {
   1024   1.73        ad 	struct namecache *ncp, *next;
   1025   1.73        ad 	int items;
   1026   1.73        ad 
   1027   1.73        ad 	KASSERT(mutex_owned(namecache_lock));
   1028   1.73        ad 
   1029   1.73        ad 	/*
   1030   1.73        ad 	 * If the number of extant entries not awaiting garbage collection
   1031   1.73        ad 	 * exceeds the high water mark, then reclaim stale entries until we
   1032   1.73        ad 	 * reach our low water mark.
   1033   1.73        ad 	 */
   1034   1.73        ad 	items = numcache - cache_gcpend;
   1035   1.73        ad 	if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) {
   1036   1.73        ad 		cache_prune(items, (int)((uint64_t)desiredvnodes *
   1037   1.73        ad 		    cache_lowat / 100));
   1038   1.73        ad 		cache_ev_over.ev_count++;
   1039   1.73        ad 	} else
   1040   1.73        ad 		cache_ev_under.ev_count++;
   1041   1.73        ad 
   1042   1.73        ad 	/*
   1043   1.73        ad 	 * Stop forward lookup activity on all CPUs and garbage collect dead
   1044   1.73        ad 	 * entries.
   1045   1.73        ad 	 */
   1046   1.73        ad 	cache_lock_cpus();
   1047   1.73        ad 	ncp = cache_gcqueue;
   1048   1.73        ad 	cache_gcqueue = NULL;
   1049   1.73        ad 	items = cache_gcpend;
   1050   1.73        ad 	cache_gcpend = 0;
   1051   1.73        ad 	while (ncp != NULL) {
   1052   1.73        ad 		next = ncp->nc_gcqueue;
   1053   1.73        ad 		cache_disassociate(ncp);
   1054   1.73        ad 		KASSERT(ncp->nc_dvp == NULL);
   1055   1.73        ad 		if (ncp->nc_hash.le_prev != NULL) {
   1056   1.73        ad 			LIST_REMOVE(ncp, nc_hash);
   1057   1.73        ad 			ncp->nc_hash.le_prev = NULL;
   1058   1.73        ad 		}
   1059   1.73        ad 		pool_cache_put(namecache_cache, ncp);
   1060   1.73        ad 		ncp = next;
   1061   1.73        ad 	}
   1062   1.73        ad 	cache_unlock_cpus();
   1063   1.73        ad 	numcache -= items;
   1064   1.73        ad 	cache_ev_gc.ev_count += items;
   1065   1.73        ad }
   1066   1.73        ad 
   1067   1.73        ad /*
   1068   1.73        ad  * Cache maintainence thread, awakening once per second to:
   1069   1.73        ad  *
   1070   1.73        ad  * => keep number of entries below the high water mark
   1071   1.73        ad  * => sort pseudo-LRU list
   1072   1.73        ad  * => garbage collect dead entries
   1073   1.73        ad  */
   1074   1.73        ad static void
   1075   1.73        ad cache_thread(void *arg)
   1076   1.73        ad {
   1077   1.73        ad 
   1078   1.73        ad 	mutex_enter(namecache_lock);
   1079   1.73        ad 	for (;;) {
   1080   1.73        ad 		cache_reclaim();
   1081   1.73        ad 		kpause("cachegc", false, hz, namecache_lock);
   1082    1.1       cgd 	}
   1083    1.1       cgd }
   1084   1.19  sommerfe 
   1085   1.28       chs #ifdef DDB
   1086   1.28       chs void
   1087   1.28       chs namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
   1088   1.28       chs {
   1089   1.28       chs 	struct vnode *dvp = NULL;
   1090   1.28       chs 	struct namecache *ncp;
   1091   1.28       chs 
   1092   1.28       chs 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
   1093   1.73        ad 		if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) {
   1094   1.28       chs 			(*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
   1095   1.28       chs 			dvp = ncp->nc_dvp;
   1096   1.28       chs 		}
   1097   1.28       chs 	}
   1098   1.28       chs 	if (dvp == NULL) {
   1099   1.28       chs 		(*pr)("name not found\n");
   1100   1.28       chs 		return;
   1101   1.28       chs 	}
   1102   1.28       chs 	vp = dvp;
   1103   1.28       chs 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
   1104   1.47      yamt 		if (ncp->nc_vp == vp) {
   1105   1.28       chs 			(*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name);
   1106   1.28       chs 		}
   1107   1.28       chs 	}
   1108   1.28       chs }
   1109   1.28       chs #endif
   1110   1.95     joerg 
   1111   1.95     joerg void
   1112   1.95     joerg namecache_count_pass2(void)
   1113   1.95     joerg {
   1114   1.95     joerg 	struct nchcpu *cpup = curcpu()->ci_data.cpu_nch;
   1115   1.95     joerg 
   1116   1.95     joerg 	COUNT(cpup->cpu_stats, ncs_pass2);
   1117   1.95     joerg }
   1118   1.95     joerg 
   1119   1.95     joerg void
   1120   1.95     joerg namecache_count_2passes(void)
   1121   1.95     joerg {
   1122   1.95     joerg 	struct nchcpu *cpup = curcpu()->ci_data.cpu_nch;
   1123   1.95     joerg 
   1124   1.95     joerg 	COUNT(cpup->cpu_stats, ncs_2passes);
   1125   1.95     joerg }
   1126   1.97     joerg 
   1127   1.97     joerg static int
   1128   1.97     joerg cache_stat_sysctl(SYSCTLFN_ARGS)
   1129   1.97     joerg {
   1130   1.97     joerg 	struct nchstats_sysctl stats;
   1131   1.97     joerg 
   1132   1.97     joerg 	if (oldp == NULL) {
   1133   1.97     joerg 		*oldlenp = sizeof(stats);
   1134   1.97     joerg 		return 0;
   1135   1.97     joerg 	}
   1136   1.97     joerg 
   1137   1.97     joerg 	if (*oldlenp < sizeof(stats)) {
   1138   1.97     joerg 		*oldlenp = 0;
   1139   1.97     joerg 		return 0;
   1140   1.97     joerg 	}
   1141   1.97     joerg 
   1142   1.97     joerg 	memset(&stats, 0, sizeof(stats));
   1143   1.97     joerg 
   1144   1.97     joerg 	sysctl_unlock();
   1145   1.97     joerg 	cache_lock_cpus();
   1146   1.97     joerg 	stats.ncs_goodhits = nchstats.ncs_goodhits;
   1147   1.97     joerg 	stats.ncs_neghits = nchstats.ncs_neghits;
   1148   1.97     joerg 	stats.ncs_badhits = nchstats.ncs_badhits;
   1149   1.97     joerg 	stats.ncs_falsehits = nchstats.ncs_falsehits;
   1150   1.97     joerg 	stats.ncs_miss = nchstats.ncs_miss;
   1151   1.97     joerg 	stats.ncs_long = nchstats.ncs_long;
   1152   1.97     joerg 	stats.ncs_pass2 = nchstats.ncs_pass2;
   1153   1.97     joerg 	stats.ncs_2passes = nchstats.ncs_2passes;
   1154   1.97     joerg 	stats.ncs_revhits = nchstats.ncs_revhits;
   1155   1.97     joerg 	stats.ncs_revmiss = nchstats.ncs_revmiss;
   1156   1.97     joerg 	cache_unlock_cpus();
   1157   1.97     joerg 	sysctl_relock();
   1158   1.97     joerg 
   1159   1.97     joerg 	*oldlenp = sizeof(stats);
   1160   1.97     joerg 	return sysctl_copyout(l, &stats, oldp, sizeof(stats));
   1161   1.97     joerg }
   1162   1.97     joerg 
   1163   1.97     joerg SYSCTL_SETUP(sysctl_cache_stat_setup, "vfs.namecache_stats subtree setup")
   1164   1.97     joerg {
   1165   1.97     joerg 	sysctl_createv(clog, 0, NULL, NULL,
   1166   1.97     joerg 		       CTLFLAG_PERMANENT,
   1167   1.97     joerg 		       CTLTYPE_STRUCT, "namecache_stats",
   1168   1.97     joerg 		       SYSCTL_DESCR("namecache statistics"),
   1169   1.97     joerg 		       cache_stat_sysctl, 0, NULL, 0,
   1170   1.97     joerg 		       CTL_VFS, CTL_CREATE, CTL_EOL);
   1171   1.97     joerg }
   1172