1 1.156 riastrad /* $NetBSD: vfs_vnode.c,v 1.156 2024/12/07 02:27:38 riastradh Exp $ */ 2 1.1 rmind 3 1.1 rmind /*- 4 1.111 ad * Copyright (c) 1997-2011, 2019, 2020 The NetBSD Foundation, Inc. 5 1.1 rmind * All rights reserved. 6 1.1 rmind * 7 1.1 rmind * This code is derived from software contributed to The NetBSD Foundation 8 1.1 rmind * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 1.1 rmind * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 1.1 rmind * 11 1.1 rmind * Redistribution and use in source and binary forms, with or without 12 1.1 rmind * modification, are permitted provided that the following conditions 13 1.1 rmind * are met: 14 1.1 rmind * 1. Redistributions of source code must retain the above copyright 15 1.1 rmind * notice, this list of conditions and the following disclaimer. 16 1.1 rmind * 2. Redistributions in binary form must reproduce the above copyright 17 1.1 rmind * notice, this list of conditions and the following disclaimer in the 18 1.1 rmind * documentation and/or other materials provided with the distribution. 19 1.1 rmind * 20 1.1 rmind * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 1.1 rmind * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 1.1 rmind * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 1.1 rmind * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 1.1 rmind * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 1.1 rmind * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 1.1 rmind * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 1.1 rmind * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 1.1 rmind * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 1.1 rmind * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 1.1 rmind * POSSIBILITY OF SUCH DAMAGE. 31 1.1 rmind */ 32 1.1 rmind 33 1.1 rmind /* 34 1.1 rmind * Copyright (c) 1989, 1993 35 1.1 rmind * The Regents of the University of California. All rights reserved. 36 1.1 rmind * (c) UNIX System Laboratories, Inc. 37 1.1 rmind * All or some portions of this file are derived from material licensed 38 1.1 rmind * to the University of California by American Telephone and Telegraph 39 1.1 rmind * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 1.1 rmind * the permission of UNIX System Laboratories, Inc. 41 1.1 rmind * 42 1.1 rmind * Redistribution and use in source and binary forms, with or without 43 1.1 rmind * modification, are permitted provided that the following conditions 44 1.1 rmind * are met: 45 1.1 rmind * 1. Redistributions of source code must retain the above copyright 46 1.1 rmind * notice, this list of conditions and the following disclaimer. 47 1.1 rmind * 2. Redistributions in binary form must reproduce the above copyright 48 1.1 rmind * notice, this list of conditions and the following disclaimer in the 49 1.1 rmind * documentation and/or other materials provided with the distribution. 50 1.1 rmind * 3. Neither the name of the University nor the names of its contributors 51 1.1 rmind * may be used to endorse or promote products derived from this software 52 1.1 rmind * without specific prior written permission. 53 1.1 rmind * 54 1.1 rmind * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 1.1 rmind * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 1.1 rmind * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 1.1 rmind * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 1.1 rmind * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 1.1 rmind * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 1.1 rmind * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 1.1 rmind * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 1.1 rmind * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 1.1 rmind * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 1.1 rmind * SUCH DAMAGE. 65 1.1 rmind * 66 1.1 rmind * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 1.1 rmind */ 68 1.1 rmind 69 1.1 rmind /* 70 1.8 rmind * The vnode cache subsystem. 71 1.1 rmind * 72 1.8 rmind * Life-cycle 73 1.1 rmind * 74 1.8 rmind * Normally, there are two points where new vnodes are created: 75 1.8 rmind * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode 76 1.8 rmind * starts in one of the following ways: 77 1.8 rmind * 78 1.45 hannken * - Allocation, via vcache_get(9) or vcache_new(9). 79 1.66 hannken * - Reclamation of inactive vnode, via vcache_vget(9). 80 1.8 rmind * 81 1.16 rmind * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9) 82 1.16 rmind * was another, traditional way. Currently, only the draining thread 83 1.16 rmind * recycles the vnodes. This behaviour might be revisited. 84 1.16 rmind * 85 1.8 rmind * The life-cycle ends when the last reference is dropped, usually 86 1.8 rmind * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform 87 1.8 rmind * the file system that vnode is inactive. Via this call, file system 88 1.16 rmind * indicates whether vnode can be recycled (usually, it checks its own 89 1.16 rmind * references, e.g. count of links, whether the file was removed). 90 1.8 rmind * 91 1.8 rmind * Depending on indication, vnode can be put into a free list (cache), 92 1.54 hannken * or cleaned via vcache_reclaim, which calls VOP_RECLAIM(9) to 93 1.54 hannken * disassociate underlying file system from the vnode, and finally 94 1.54 hannken * destroyed. 95 1.8 rmind * 96 1.52 hannken * Vnode state 97 1.52 hannken * 98 1.52 hannken * Vnode is always in one of six states: 99 1.52 hannken * - MARKER This is a marker vnode to help list traversal. It 100 1.52 hannken * will never change its state. 101 1.52 hannken * - LOADING Vnode is associating underlying file system and not 102 1.52 hannken * yet ready to use. 103 1.94 hannken * - LOADED Vnode has associated underlying file system and is 104 1.52 hannken * ready to use. 105 1.52 hannken * - BLOCKED Vnode is active but cannot get new references. 106 1.52 hannken * - RECLAIMING Vnode is disassociating from the underlying file 107 1.52 hannken * system. 108 1.52 hannken * - RECLAIMED Vnode has disassociated from underlying file system 109 1.52 hannken * and is dead. 110 1.52 hannken * 111 1.52 hannken * Valid state changes are: 112 1.94 hannken * LOADING -> LOADED 113 1.52 hannken * Vnode has been initialised in vcache_get() or 114 1.52 hannken * vcache_new() and is ready to use. 115 1.123 ad * BLOCKED -> RECLAIMING 116 1.52 hannken * Vnode starts disassociation from underlying file 117 1.54 hannken * system in vcache_reclaim(). 118 1.52 hannken * RECLAIMING -> RECLAIMED 119 1.52 hannken * Vnode finished disassociation from underlying file 120 1.54 hannken * system in vcache_reclaim(). 121 1.94 hannken * LOADED -> BLOCKED 122 1.52 hannken * Either vcache_rekey*() is changing the vnode key or 123 1.52 hannken * vrelel() is about to call VOP_INACTIVE(). 124 1.94 hannken * BLOCKED -> LOADED 125 1.52 hannken * The block condition is over. 126 1.52 hannken * LOADING -> RECLAIMED 127 1.52 hannken * Either vcache_get() or vcache_new() failed to 128 1.52 hannken * associate the underlying file system or vcache_rekey*() 129 1.52 hannken * drops a vnode used as placeholder. 130 1.52 hannken * 131 1.52 hannken * Of these states LOADING, BLOCKED and RECLAIMING are intermediate 132 1.52 hannken * and it is possible to wait for state change. 133 1.52 hannken * 134 1.52 hannken * State is protected with v_interlock with one exception: 135 1.69 hannken * to change from LOADING both v_interlock and vcache_lock must be held 136 1.52 hannken * so it is possible to check "state == LOADING" without holding 137 1.52 hannken * v_interlock. See vcache_get() for details. 138 1.52 hannken * 139 1.8 rmind * Reference counting 140 1.8 rmind * 141 1.8 rmind * Vnode is considered active, if reference count (vnode_t::v_usecount) 142 1.8 rmind * is non-zero. It is maintained using: vref(9) and vrele(9), as well 143 1.8 rmind * as vput(9), routines. Common points holding references are e.g. 144 1.146 riastrad * file openings, current working directory, mount points, etc. 145 1.8 rmind * 146 1.123 ad * v_usecount is adjusted with atomic operations, however to change 147 1.123 ad * from a non-zero value to zero the interlock must also be held. 148 1.1 rmind */ 149 1.1 rmind 150 1.1 rmind #include <sys/cdefs.h> 151 1.156 riastrad __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.156 2024/12/07 02:27:38 riastradh Exp $"); 152 1.108 ad 153 1.109 ad #ifdef _KERNEL_OPT 154 1.108 ad #include "opt_pax.h" 155 1.109 ad #endif 156 1.1 rmind 157 1.1 rmind #include <sys/param.h> 158 1.154 riastrad #include <sys/types.h> 159 1.1 rmind 160 1.1 rmind #include <sys/atomic.h> 161 1.1 rmind #include <sys/buf.h> 162 1.1 rmind #include <sys/conf.h> 163 1.1 rmind #include <sys/device.h> 164 1.154 riastrad #include <sys/fstrans.h> 165 1.36 hannken #include <sys/hash.h> 166 1.1 rmind #include <sys/kauth.h> 167 1.154 riastrad #include <sys/kernel.h> 168 1.1 rmind #include <sys/kmem.h> 169 1.1 rmind #include <sys/module.h> 170 1.1 rmind #include <sys/mount.h> 171 1.1 rmind #include <sys/namei.h> 172 1.108 ad #include <sys/pax.h> 173 1.156 riastrad #include <sys/sdt.h> 174 1.1 rmind #include <sys/syscallargs.h> 175 1.1 rmind #include <sys/sysctl.h> 176 1.1 rmind #include <sys/systm.h> 177 1.152 hannken #include <sys/threadpool.h> 178 1.58 hannken #include <sys/vnode_impl.h> 179 1.1 rmind #include <sys/wapbl.h> 180 1.1 rmind 181 1.146 riastrad #include <miscfs/deadfs/deadfs.h> 182 1.147 riastrad #include <miscfs/specfs/specdev.h> 183 1.146 riastrad 184 1.1 rmind #include <uvm/uvm.h> 185 1.1 rmind #include <uvm/uvm_readahead.h> 186 1.104 ad #include <uvm/uvm_stat.h> 187 1.1 rmind 188 1.23 hannken /* Flags to vrelel. */ 189 1.104 ad #define VRELEL_ASYNC 0x0001 /* Always defer to vrele thread. */ 190 1.104 ad 191 1.104 ad #define LRU_VRELE 0 192 1.104 ad #define LRU_FREE 1 193 1.104 ad #define LRU_HOLD 2 194 1.104 ad #define LRU_COUNT 3 195 1.1 rmind 196 1.16 rmind /* 197 1.63 hannken * There are three lru lists: one holds vnodes waiting for async release, 198 1.104 ad * one is for vnodes which have no buffer/page references and one for those 199 1.104 ad * which do (i.e. v_holdcnt is non-zero). We put the lists into a single, 200 1.104 ad * private cache line as vnodes migrate between them while under the same 201 1.104 ad * lock (vdrain_lock). 202 1.63 hannken */ 203 1.152 hannken 204 1.152 hannken typedef struct { 205 1.152 hannken vnode_impl_t *li_marker; 206 1.152 hannken } lru_iter_t; 207 1.152 hannken 208 1.104 ad u_int numvnodes __cacheline_aligned; 209 1.104 ad static vnodelst_t lru_list[LRU_COUNT] __cacheline_aligned; 210 1.152 hannken static struct threadpool *threadpool; 211 1.152 hannken static struct threadpool_job vdrain_job; 212 1.152 hannken static struct threadpool_job vrele_job; 213 1.63 hannken static kmutex_t vdrain_lock __cacheline_aligned; 214 1.57 hannken SLIST_HEAD(hashhead, vnode_impl); 215 1.69 hannken static kmutex_t vcache_lock __cacheline_aligned; 216 1.104 ad static kcondvar_t vcache_cv; 217 1.69 hannken static u_int vcache_hashsize; 218 1.69 hannken static u_long vcache_hashmask; 219 1.104 ad static struct hashhead *vcache_hashtab; 220 1.69 hannken static pool_cache_t vcache_pool; 221 1.63 hannken static void lru_requeue(vnode_t *, vnodelst_t *); 222 1.63 hannken static vnodelst_t * lru_which(vnode_t *); 223 1.152 hannken static vnode_impl_t * lru_iter_first(int, lru_iter_t *); 224 1.152 hannken static vnode_impl_t * lru_iter_next(lru_iter_t *); 225 1.152 hannken static void lru_iter_release(lru_iter_t *); 226 1.63 hannken static vnode_impl_t * vcache_alloc(void); 227 1.79 hannken static void vcache_dealloc(vnode_impl_t *); 228 1.57 hannken static void vcache_free(vnode_impl_t *); 229 1.36 hannken static void vcache_init(void); 230 1.36 hannken static void vcache_reinit(void); 231 1.54 hannken static void vcache_reclaim(vnode_t *); 232 1.152 hannken static void vrele_deferred(vnode_impl_t *); 233 1.107 ad static void vrelel(vnode_t *, int, int); 234 1.11 christos static void vnpanic(vnode_t *, const char *, ...) 235 1.18 christos __printflike(2, 3); 236 1.152 hannken static bool vdrain_one(u_int); 237 1.152 hannken static void vdrain_task(struct threadpool_job *); 238 1.152 hannken static void vrele_task(struct threadpool_job *); 239 1.1 rmind 240 1.1 rmind /* Routines having to do with the management of the vnode table. */ 241 1.1 rmind 242 1.120 ad /* 243 1.123 ad * The high bit of v_usecount is a gate for vcache_tryvget(). It's set 244 1.123 ad * only when the vnode state is LOADED. 245 1.132 hannken * The next bit of v_usecount is a flag for vrelel(). It's set 246 1.132 hannken * from vcache_vget() and vcache_tryvget() whenever the operation succeeds. 247 1.123 ad */ 248 1.132 hannken #define VUSECOUNT_MASK 0x3fffffff 249 1.123 ad #define VUSECOUNT_GATE 0x80000000 250 1.132 hannken #define VUSECOUNT_VGET 0x40000000 251 1.123 ad 252 1.123 ad /* 253 1.120 ad * Return the current usecount of a vnode. 254 1.120 ad */ 255 1.120 ad inline int 256 1.120 ad vrefcnt(struct vnode *vp) 257 1.120 ad { 258 1.120 ad 259 1.123 ad return atomic_load_relaxed(&vp->v_usecount) & VUSECOUNT_MASK; 260 1.120 ad } 261 1.120 ad 262 1.51 hannken /* Vnode state operations and diagnostics. */ 263 1.51 hannken 264 1.51 hannken #if defined(DIAGNOSTIC) 265 1.51 hannken 266 1.94 hannken #define VSTATE_VALID(state) \ 267 1.94 hannken ((state) != VS_ACTIVE && (state) != VS_MARKER) 268 1.51 hannken #define VSTATE_GET(vp) \ 269 1.51 hannken vstate_assert_get((vp), __func__, __LINE__) 270 1.51 hannken #define VSTATE_CHANGE(vp, from, to) \ 271 1.51 hannken vstate_assert_change((vp), (from), (to), __func__, __LINE__) 272 1.51 hannken #define VSTATE_WAIT_STABLE(vp) \ 273 1.51 hannken vstate_assert_wait_stable((vp), __func__, __LINE__) 274 1.51 hannken 275 1.94 hannken void 276 1.99 joerg _vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line, 277 1.99 joerg bool has_lock) 278 1.51 hannken { 279 1.70 hannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 280 1.120 ad int refcnt = vrefcnt(vp); 281 1.51 hannken 282 1.99 joerg if (!has_lock) { 283 1.148 riastrad enum vnode_state vstate = atomic_load_relaxed(&vip->vi_state); 284 1.148 riastrad 285 1.120 ad if (state == VS_ACTIVE && refcnt > 0 && 286 1.148 riastrad (vstate == VS_LOADED || vstate == VS_BLOCKED)) 287 1.99 joerg return; 288 1.148 riastrad if (vstate == state) 289 1.99 joerg return; 290 1.99 joerg mutex_enter((vp)->v_interlock); 291 1.99 joerg } 292 1.99 joerg 293 1.51 hannken KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 294 1.51 hannken 295 1.120 ad if ((state == VS_ACTIVE && refcnt > 0 && 296 1.99 joerg (vip->vi_state == VS_LOADED || vip->vi_state == VS_BLOCKED)) || 297 1.99 joerg vip->vi_state == state) { 298 1.99 joerg if (!has_lock) 299 1.99 joerg mutex_exit((vp)->v_interlock); 300 1.94 hannken return; 301 1.99 joerg } 302 1.94 hannken vnpanic(vp, "state is %s, usecount %d, expected %s at %s:%d", 303 1.120 ad vstate_name(vip->vi_state), refcnt, 304 1.94 hannken vstate_name(state), func, line); 305 1.51 hannken } 306 1.51 hannken 307 1.57 hannken static enum vnode_state 308 1.51 hannken vstate_assert_get(vnode_t *vp, const char *func, int line) 309 1.51 hannken { 310 1.70 hannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 311 1.51 hannken 312 1.51 hannken KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 313 1.94 hannken if (! VSTATE_VALID(vip->vi_state)) 314 1.51 hannken vnpanic(vp, "state is %s at %s:%d", 315 1.70 hannken vstate_name(vip->vi_state), func, line); 316 1.51 hannken 317 1.70 hannken return vip->vi_state; 318 1.51 hannken } 319 1.51 hannken 320 1.52 hannken static void 321 1.51 hannken vstate_assert_wait_stable(vnode_t *vp, const char *func, int line) 322 1.51 hannken { 323 1.70 hannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 324 1.51 hannken 325 1.51 hannken KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 326 1.94 hannken if (! VSTATE_VALID(vip->vi_state)) 327 1.51 hannken vnpanic(vp, "state is %s at %s:%d", 328 1.70 hannken vstate_name(vip->vi_state), func, line); 329 1.51 hannken 330 1.94 hannken while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED) 331 1.51 hannken cv_wait(&vp->v_cv, vp->v_interlock); 332 1.51 hannken 333 1.94 hannken if (! VSTATE_VALID(vip->vi_state)) 334 1.51 hannken vnpanic(vp, "state is %s at %s:%d", 335 1.70 hannken vstate_name(vip->vi_state), func, line); 336 1.51 hannken } 337 1.51 hannken 338 1.52 hannken static void 339 1.57 hannken vstate_assert_change(vnode_t *vp, enum vnode_state from, enum vnode_state to, 340 1.51 hannken const char *func, int line) 341 1.51 hannken { 342 1.123 ad bool gated = (atomic_load_relaxed(&vp->v_usecount) & VUSECOUNT_GATE); 343 1.70 hannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 344 1.51 hannken 345 1.51 hannken KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 346 1.57 hannken if (from == VS_LOADING) 347 1.69 hannken KASSERTMSG(mutex_owned(&vcache_lock), "at %s:%d", func, line); 348 1.51 hannken 349 1.94 hannken if (! VSTATE_VALID(from)) 350 1.51 hannken vnpanic(vp, "from is %s at %s:%d", 351 1.51 hannken vstate_name(from), func, line); 352 1.94 hannken if (! VSTATE_VALID(to)) 353 1.51 hannken vnpanic(vp, "to is %s at %s:%d", 354 1.51 hannken vstate_name(to), func, line); 355 1.70 hannken if (vip->vi_state != from) 356 1.51 hannken vnpanic(vp, "from is %s, expected %s at %s:%d\n", 357 1.70 hannken vstate_name(vip->vi_state), vstate_name(from), func, line); 358 1.123 ad if ((from == VS_LOADED) != gated) 359 1.123 ad vnpanic(vp, "state is %s, gate %d does not match at %s:%d\n", 360 1.123 ad vstate_name(vip->vi_state), gated, func, line); 361 1.123 ad 362 1.136 riastrad /* Open/close the gate for vcache_tryvget(). */ 363 1.136 riastrad if (to == VS_LOADED) { 364 1.142 riastrad membar_release(); 365 1.123 ad atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE); 366 1.136 riastrad } else { 367 1.123 ad atomic_and_uint(&vp->v_usecount, ~VUSECOUNT_GATE); 368 1.136 riastrad } 369 1.51 hannken 370 1.148 riastrad atomic_store_relaxed(&vip->vi_state, to); 371 1.57 hannken if (from == VS_LOADING) 372 1.69 hannken cv_broadcast(&vcache_cv); 373 1.94 hannken if (to == VS_LOADED || to == VS_RECLAIMED) 374 1.51 hannken cv_broadcast(&vp->v_cv); 375 1.51 hannken } 376 1.51 hannken 377 1.51 hannken #else /* defined(DIAGNOSTIC) */ 378 1.51 hannken 379 1.51 hannken #define VSTATE_GET(vp) \ 380 1.57 hannken (VNODE_TO_VIMPL((vp))->vi_state) 381 1.51 hannken #define VSTATE_CHANGE(vp, from, to) \ 382 1.51 hannken vstate_change((vp), (from), (to)) 383 1.51 hannken #define VSTATE_WAIT_STABLE(vp) \ 384 1.51 hannken vstate_wait_stable((vp)) 385 1.94 hannken void 386 1.100 joerg _vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line, 387 1.100 joerg bool has_lock) 388 1.94 hannken { 389 1.94 hannken 390 1.94 hannken } 391 1.51 hannken 392 1.52 hannken static void 393 1.51 hannken vstate_wait_stable(vnode_t *vp) 394 1.51 hannken { 395 1.70 hannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 396 1.51 hannken 397 1.94 hannken while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED) 398 1.51 hannken cv_wait(&vp->v_cv, vp->v_interlock); 399 1.51 hannken } 400 1.51 hannken 401 1.52 hannken static void 402 1.57 hannken vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to) 403 1.51 hannken { 404 1.70 hannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 405 1.51 hannken 406 1.142 riastrad /* Open/close the gate for vcache_tryvget(). */ 407 1.136 riastrad if (to == VS_LOADED) { 408 1.142 riastrad membar_release(); 409 1.123 ad atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE); 410 1.136 riastrad } else { 411 1.123 ad atomic_and_uint(&vp->v_usecount, ~VUSECOUNT_GATE); 412 1.136 riastrad } 413 1.123 ad 414 1.148 riastrad atomic_store_relaxed(&vip->vi_state, to); 415 1.57 hannken if (from == VS_LOADING) 416 1.69 hannken cv_broadcast(&vcache_cv); 417 1.94 hannken if (to == VS_LOADED || to == VS_RECLAIMED) 418 1.51 hannken cv_broadcast(&vp->v_cv); 419 1.51 hannken } 420 1.51 hannken 421 1.51 hannken #endif /* defined(DIAGNOSTIC) */ 422 1.51 hannken 423 1.1 rmind void 424 1.1 rmind vfs_vnode_sysinit(void) 425 1.1 rmind { 426 1.104 ad int error __diagused, i; 427 1.1 rmind 428 1.44 hannken dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL); 429 1.44 hannken KASSERT(dead_rootmount != NULL); 430 1.103 hannken dead_rootmount->mnt_iflag |= IMNT_MPSAFE; 431 1.31 hannken 432 1.63 hannken mutex_init(&vdrain_lock, MUTEX_DEFAULT, IPL_NONE); 433 1.104 ad for (i = 0; i < LRU_COUNT; i++) { 434 1.104 ad TAILQ_INIT(&lru_list[i]); 435 1.104 ad } 436 1.36 hannken vcache_init(); 437 1.36 hannken 438 1.152 hannken error = threadpool_get(&threadpool, PRI_NONE); 439 1.152 hannken KASSERTMSG((error == 0), "threadpool_get failed: %d", error); 440 1.152 hannken threadpool_job_init(&vdrain_job, vdrain_task, &vdrain_lock, "vdrain"); 441 1.152 hannken threadpool_job_init(&vrele_job, vrele_task, &vdrain_lock, "vrele"); 442 1.1 rmind } 443 1.1 rmind 444 1.1 rmind /* 445 1.48 hannken * Allocate a new marker vnode. 446 1.48 hannken */ 447 1.48 hannken vnode_t * 448 1.48 hannken vnalloc_marker(struct mount *mp) 449 1.48 hannken { 450 1.70 hannken vnode_impl_t *vip; 451 1.50 hannken vnode_t *vp; 452 1.50 hannken 453 1.70 hannken vip = pool_cache_get(vcache_pool, PR_WAITOK); 454 1.70 hannken memset(vip, 0, sizeof(*vip)); 455 1.70 hannken vp = VIMPL_TO_VNODE(vip); 456 1.111 ad uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 1); 457 1.50 hannken vp->v_mount = mp; 458 1.50 hannken vp->v_type = VBAD; 459 1.111 ad vp->v_interlock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 460 1.144 thorpej klist_init(&vip->vi_klist.vk_klist); 461 1.144 thorpej vp->v_klist = &vip->vi_klist; 462 1.70 hannken vip->vi_state = VS_MARKER; 463 1.48 hannken 464 1.50 hannken return vp; 465 1.48 hannken } 466 1.48 hannken 467 1.48 hannken /* 468 1.48 hannken * Free a marker vnode. 469 1.48 hannken */ 470 1.48 hannken void 471 1.48 hannken vnfree_marker(vnode_t *vp) 472 1.48 hannken { 473 1.70 hannken vnode_impl_t *vip; 474 1.48 hannken 475 1.70 hannken vip = VNODE_TO_VIMPL(vp); 476 1.70 hannken KASSERT(vip->vi_state == VS_MARKER); 477 1.111 ad mutex_obj_free(vp->v_interlock); 478 1.50 hannken uvm_obj_destroy(&vp->v_uobj, true); 479 1.144 thorpej klist_fini(&vip->vi_klist.vk_klist); 480 1.70 hannken pool_cache_put(vcache_pool, vip); 481 1.48 hannken } 482 1.48 hannken 483 1.48 hannken /* 484 1.48 hannken * Test a vnode for being a marker vnode. 485 1.48 hannken */ 486 1.48 hannken bool 487 1.48 hannken vnis_marker(vnode_t *vp) 488 1.48 hannken { 489 1.48 hannken 490 1.57 hannken return (VNODE_TO_VIMPL(vp)->vi_state == VS_MARKER); 491 1.48 hannken } 492 1.48 hannken 493 1.48 hannken /* 494 1.63 hannken * Return the lru list this node should be on. 495 1.63 hannken */ 496 1.63 hannken static vnodelst_t * 497 1.63 hannken lru_which(vnode_t *vp) 498 1.63 hannken { 499 1.63 hannken 500 1.63 hannken KASSERT(mutex_owned(vp->v_interlock)); 501 1.63 hannken 502 1.63 hannken if (vp->v_holdcnt > 0) 503 1.104 ad return &lru_list[LRU_HOLD]; 504 1.63 hannken else 505 1.104 ad return &lru_list[LRU_FREE]; 506 1.63 hannken } 507 1.63 hannken 508 1.63 hannken /* 509 1.63 hannken * Put vnode to end of given list. 510 1.63 hannken * Both the current and the new list may be NULL, used on vnode alloc/free. 511 1.63 hannken * Adjust numvnodes and signal vdrain thread if there is work. 512 1.63 hannken */ 513 1.63 hannken static void 514 1.63 hannken lru_requeue(vnode_t *vp, vnodelst_t *listhd) 515 1.63 hannken { 516 1.70 hannken vnode_impl_t *vip; 517 1.104 ad int d; 518 1.104 ad 519 1.104 ad /* 520 1.104 ad * If the vnode is on the correct list, and was put there recently, 521 1.104 ad * then leave it be, thus avoiding huge cache and lock contention. 522 1.104 ad */ 523 1.104 ad vip = VNODE_TO_VIMPL(vp); 524 1.104 ad if (listhd == vip->vi_lrulisthd && 525 1.119 maxv (getticks() - vip->vi_lrulisttm) < hz) { 526 1.155 riastrad return; 527 1.104 ad } 528 1.63 hannken 529 1.63 hannken mutex_enter(&vdrain_lock); 530 1.104 ad d = 0; 531 1.70 hannken if (vip->vi_lrulisthd != NULL) 532 1.70 hannken TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist); 533 1.63 hannken else 534 1.104 ad d++; 535 1.70 hannken vip->vi_lrulisthd = listhd; 536 1.119 maxv vip->vi_lrulisttm = getticks(); 537 1.70 hannken if (vip->vi_lrulisthd != NULL) 538 1.70 hannken TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist); 539 1.63 hannken else 540 1.104 ad d--; 541 1.104 ad if (d != 0) { 542 1.104 ad /* 543 1.104 ad * Looks strange? This is not a bug. Don't store 544 1.104 ad * numvnodes unless there is a change - avoid false 545 1.104 ad * sharing on MP. 546 1.104 ad */ 547 1.104 ad numvnodes += d; 548 1.104 ad } 549 1.152 hannken if (listhd == &lru_list[LRU_VRELE]) 550 1.152 hannken threadpool_schedule_job(threadpool, &vrele_job); 551 1.152 hannken if (d > 0 && numvnodes > desiredvnodes) 552 1.152 hannken threadpool_schedule_job(threadpool, &vdrain_job); 553 1.150 hannken if (d > 0 && numvnodes > desiredvnodes + desiredvnodes / 16) 554 1.151 riastrad kpause("vnfull", false, MAX(1, mstohz(10)), &vdrain_lock); 555 1.63 hannken mutex_exit(&vdrain_lock); 556 1.63 hannken } 557 1.63 hannken 558 1.63 hannken /* 559 1.152 hannken * LRU list iterator. 560 1.152 hannken * Caller holds vdrain_lock. 561 1.75 hannken */ 562 1.152 hannken static vnode_impl_t * 563 1.152 hannken lru_iter_first(int idx, lru_iter_t *iterp) 564 1.75 hannken { 565 1.152 hannken vnode_impl_t *marker; 566 1.75 hannken 567 1.152 hannken KASSERT(mutex_owned(&vdrain_lock)); 568 1.75 hannken 569 1.152 hannken mutex_exit(&vdrain_lock); 570 1.75 hannken marker = VNODE_TO_VIMPL(vnalloc_marker(NULL)); 571 1.152 hannken mutex_enter(&vdrain_lock); 572 1.152 hannken marker->vi_lrulisthd = &lru_list[idx]; 573 1.152 hannken iterp->li_marker = marker; 574 1.152 hannken 575 1.152 hannken TAILQ_INSERT_HEAD(marker->vi_lrulisthd, marker, vi_lrulist); 576 1.152 hannken 577 1.152 hannken return lru_iter_next(iterp); 578 1.152 hannken } 579 1.152 hannken 580 1.152 hannken static vnode_impl_t * 581 1.152 hannken lru_iter_next(lru_iter_t *iter) 582 1.152 hannken { 583 1.152 hannken vnode_impl_t *vip, *marker; 584 1.152 hannken vnodelst_t *listhd; 585 1.75 hannken 586 1.152 hannken KASSERT(mutex_owned(&vdrain_lock)); 587 1.152 hannken 588 1.152 hannken marker = iter->li_marker; 589 1.152 hannken listhd = marker->vi_lrulisthd; 590 1.75 hannken 591 1.75 hannken while ((vip = TAILQ_NEXT(marker, vi_lrulist))) { 592 1.152 hannken TAILQ_REMOVE(listhd, marker, vi_lrulist); 593 1.152 hannken TAILQ_INSERT_AFTER(listhd, vip, marker, vi_lrulist); 594 1.152 hannken if (!vnis_marker(VIMPL_TO_VNODE(vip))) 595 1.152 hannken break; 596 1.152 hannken } 597 1.75 hannken 598 1.152 hannken return vip; 599 1.152 hannken } 600 1.75 hannken 601 1.152 hannken static void 602 1.152 hannken lru_iter_release(lru_iter_t *iter) 603 1.152 hannken { 604 1.152 hannken vnode_impl_t *marker; 605 1.75 hannken 606 1.152 hannken KASSERT(mutex_owned(&vdrain_lock)); 607 1.122 hannken 608 1.152 hannken marker = iter->li_marker; 609 1.152 hannken TAILQ_REMOVE(marker->vi_lrulisthd, marker, vi_lrulist); 610 1.75 hannken 611 1.75 hannken mutex_exit(&vdrain_lock); 612 1.75 hannken vnfree_marker(VIMPL_TO_VNODE(marker)); 613 1.152 hannken mutex_enter(&vdrain_lock); 614 1.75 hannken } 615 1.75 hannken 616 1.75 hannken /* 617 1.152 hannken * Release deferred vrele vnodes for this mount. 618 1.152 hannken * Called with file system suspended. 619 1.1 rmind */ 620 1.152 hannken void 621 1.152 hannken vrele_flush(struct mount *mp) 622 1.1 rmind { 623 1.152 hannken lru_iter_t iter; 624 1.152 hannken vnode_impl_t *vip; 625 1.1 rmind 626 1.152 hannken KASSERT(fstrans_is_owner(mp)); 627 1.24 hannken 628 1.152 hannken mutex_enter(&vdrain_lock); 629 1.152 hannken for (vip = lru_iter_first(LRU_VRELE, &iter); vip != NULL; 630 1.152 hannken vip = lru_iter_next(&iter)) { 631 1.152 hannken if (VIMPL_TO_VNODE(vip)->v_mount != mp) 632 1.152 hannken continue; 633 1.152 hannken vrele_deferred(vip); 634 1.1 rmind } 635 1.152 hannken lru_iter_release(&iter); 636 1.63 hannken mutex_exit(&vdrain_lock); 637 1.1 rmind } 638 1.1 rmind 639 1.1 rmind /* 640 1.152 hannken * One pass through the LRU lists to keep the number of allocated 641 1.152 hannken * vnodes below target. Returns true if target met. 642 1.12 hannken */ 643 1.152 hannken static bool 644 1.152 hannken vdrain_one(u_int target) 645 1.12 hannken { 646 1.152 hannken int ix, lists[] = { LRU_FREE, LRU_HOLD }; 647 1.152 hannken lru_iter_t iter; 648 1.152 hannken vnode_impl_t *vip; 649 1.152 hannken vnode_t *vp; 650 1.63 hannken struct mount *mp; 651 1.12 hannken 652 1.63 hannken KASSERT(mutex_owned(&vdrain_lock)); 653 1.12 hannken 654 1.152 hannken for (ix = 0; ix < __arraycount(lists); ix++) { 655 1.152 hannken for (vip = lru_iter_first(lists[ix], &iter); vip != NULL; 656 1.152 hannken vip = lru_iter_next(&iter)) { 657 1.152 hannken if (numvnodes < target) { 658 1.152 hannken lru_iter_release(&iter); 659 1.152 hannken return true; 660 1.152 hannken } 661 1.152 hannken 662 1.152 hannken vp = VIMPL_TO_VNODE(vip); 663 1.152 hannken 664 1.152 hannken /* Probe usecount (unlocked). */ 665 1.152 hannken if (vrefcnt(vp) > 0) 666 1.152 hannken continue; 667 1.152 hannken /* Try v_interlock -- we lock the wrong direction! */ 668 1.152 hannken if (!mutex_tryenter(vp->v_interlock)) 669 1.152 hannken continue; 670 1.152 hannken /* Probe usecount and state. */ 671 1.152 hannken if (vrefcnt(vp) > 0 || VSTATE_GET(vp) != VS_LOADED) { 672 1.152 hannken mutex_exit(vp->v_interlock); 673 1.152 hannken continue; 674 1.152 hannken } 675 1.152 hannken mutex_exit(&vdrain_lock); 676 1.63 hannken 677 1.152 hannken mp = vp->v_mount; 678 1.152 hannken if (fstrans_start_nowait(mp) != 0) { 679 1.152 hannken mutex_exit(vp->v_interlock); 680 1.152 hannken mutex_enter(&vdrain_lock); 681 1.152 hannken continue; 682 1.152 hannken } 683 1.63 hannken 684 1.152 hannken if (vcache_vget(vp) == 0) { 685 1.152 hannken if (!vrecycle(vp)) { 686 1.152 hannken vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 687 1.152 hannken mutex_enter(vp->v_interlock); 688 1.152 hannken vrelel(vp, 0, LK_EXCLUSIVE); 689 1.152 hannken } 690 1.152 hannken } 691 1.152 hannken fstrans_done(mp); 692 1.63 hannken 693 1.152 hannken mutex_enter(&vdrain_lock); 694 1.152 hannken } 695 1.152 hannken lru_iter_release(&iter); 696 1.152 hannken } 697 1.63 hannken 698 1.152 hannken return false; 699 1.12 hannken } 700 1.12 hannken 701 1.12 hannken /* 702 1.152 hannken * threadpool task to keep the number of vnodes below desiredvnodes. 703 1.1 rmind */ 704 1.63 hannken static void 705 1.152 hannken vdrain_task(struct threadpool_job *job) 706 1.1 rmind { 707 1.63 hannken u_int target; 708 1.63 hannken 709 1.152 hannken target = desiredvnodes - desiredvnodes / 16; 710 1.63 hannken 711 1.63 hannken mutex_enter(&vdrain_lock); 712 1.63 hannken 713 1.152 hannken while (!vdrain_one(target)) 714 1.153 hannken kpause("vdrain", false, 1, &vdrain_lock); 715 1.152 hannken 716 1.152 hannken threadpool_job_done(job); 717 1.152 hannken mutex_exit(&vdrain_lock); 718 1.152 hannken } 719 1.152 hannken 720 1.152 hannken /* 721 1.152 hannken * threadpool task to process asynchronous vrele. 722 1.152 hannken */ 723 1.152 hannken static void 724 1.152 hannken vrele_task(struct threadpool_job *job) 725 1.152 hannken { 726 1.152 hannken int skipped; 727 1.152 hannken lru_iter_t iter; 728 1.152 hannken vnode_impl_t *vip; 729 1.152 hannken struct mount *mp; 730 1.152 hannken 731 1.152 hannken mutex_enter(&vdrain_lock); 732 1.152 hannken while ((vip = lru_iter_first(LRU_VRELE, &iter)) != NULL) { 733 1.152 hannken for (skipped = 0; vip != NULL; vip = lru_iter_next(&iter)) { 734 1.152 hannken mp = VIMPL_TO_VNODE(vip)->v_mount; 735 1.152 hannken if (fstrans_start_nowait(mp) == 0) { 736 1.152 hannken vrele_deferred(vip); 737 1.152 hannken fstrans_done(mp); 738 1.152 hannken } else { 739 1.152 hannken skipped++; 740 1.63 hannken } 741 1.63 hannken } 742 1.1 rmind 743 1.152 hannken lru_iter_release(&iter); 744 1.154 riastrad if (skipped) { 745 1.154 riastrad kpause("vrele", false, MAX(1, mstohz(10)), 746 1.154 riastrad &vdrain_lock); 747 1.154 riastrad } 748 1.1 rmind } 749 1.152 hannken 750 1.152 hannken threadpool_job_done(job); 751 1.152 hannken lru_iter_release(&iter); 752 1.152 hannken mutex_exit(&vdrain_lock); 753 1.1 rmind } 754 1.1 rmind 755 1.1 rmind /* 756 1.112 ad * Try to drop reference on a vnode. Abort if we are releasing the 757 1.112 ad * last reference. Note: this _must_ succeed if not the last reference. 758 1.112 ad */ 759 1.112 ad static bool 760 1.112 ad vtryrele(vnode_t *vp) 761 1.112 ad { 762 1.112 ad u_int use, next; 763 1.112 ad 764 1.142 riastrad membar_release(); 765 1.112 ad for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) { 766 1.123 ad if (__predict_false((use & VUSECOUNT_MASK) == 1)) { 767 1.112 ad return false; 768 1.112 ad } 769 1.123 ad KASSERT((use & VUSECOUNT_MASK) > 1); 770 1.112 ad next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 771 1.112 ad if (__predict_true(next == use)) { 772 1.112 ad return true; 773 1.112 ad } 774 1.112 ad } 775 1.112 ad } 776 1.112 ad 777 1.112 ad /* 778 1.4 rmind * vput: unlock and release the reference. 779 1.1 rmind */ 780 1.1 rmind void 781 1.1 rmind vput(vnode_t *vp) 782 1.1 rmind { 783 1.107 ad int lktype; 784 1.1 rmind 785 1.112 ad /* 786 1.120 ad * Do an unlocked check of the usecount. If it looks like we're not 787 1.112 ad * about to drop the last reference, then unlock the vnode and try 788 1.112 ad * to drop the reference. If it ends up being the last reference 789 1.112 ad * after all, vrelel() can fix it all up. Most of the time this 790 1.112 ad * will all go to plan. 791 1.112 ad */ 792 1.120 ad if (vrefcnt(vp) > 1) { 793 1.112 ad VOP_UNLOCK(vp); 794 1.112 ad if (vtryrele(vp)) { 795 1.112 ad return; 796 1.112 ad } 797 1.112 ad lktype = LK_NONE; 798 1.107 ad } else { 799 1.107 ad lktype = VOP_ISLOCKED(vp); 800 1.107 ad KASSERT(lktype != LK_NONE); 801 1.107 ad } 802 1.107 ad mutex_enter(vp->v_interlock); 803 1.107 ad vrelel(vp, 0, lktype); 804 1.1 rmind } 805 1.1 rmind 806 1.1 rmind /* 807 1.152 hannken * Release a vnode from the deferred list. 808 1.152 hannken */ 809 1.152 hannken static void 810 1.152 hannken vrele_deferred(vnode_impl_t *vip) 811 1.152 hannken { 812 1.152 hannken vnode_t *vp; 813 1.152 hannken 814 1.152 hannken KASSERT(mutex_owned(&vdrain_lock)); 815 1.152 hannken KASSERT(vip->vi_lrulisthd == &lru_list[LRU_VRELE]); 816 1.152 hannken 817 1.152 hannken vp = VIMPL_TO_VNODE(vip); 818 1.152 hannken 819 1.152 hannken /* 820 1.152 hannken * First remove the vnode from the vrele list. 821 1.152 hannken * Put it on the last lru list, the last vrele() 822 1.152 hannken * will put it back onto the right list before 823 1.152 hannken * its usecount reaches zero. 824 1.152 hannken */ 825 1.152 hannken TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist); 826 1.152 hannken vip->vi_lrulisthd = &lru_list[LRU_HOLD]; 827 1.152 hannken vip->vi_lrulisttm = getticks(); 828 1.152 hannken TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist); 829 1.152 hannken 830 1.152 hannken mutex_exit(&vdrain_lock); 831 1.152 hannken 832 1.152 hannken vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 833 1.152 hannken mutex_enter(vp->v_interlock); 834 1.152 hannken vrelel(vp, 0, LK_EXCLUSIVE); 835 1.152 hannken 836 1.152 hannken mutex_enter(&vdrain_lock); 837 1.152 hannken } 838 1.152 hannken 839 1.152 hannken /* 840 1.1 rmind * Vnode release. If reference count drops to zero, call inactive 841 1.1 rmind * routine and either return to freelist or free to the pool. 842 1.1 rmind */ 843 1.23 hannken static void 844 1.107 ad vrelel(vnode_t *vp, int flags, int lktype) 845 1.1 rmind { 846 1.104 ad const bool async = ((flags & VRELEL_ASYNC) != 0); 847 1.133 hannken bool recycle, defer, objlock_held; 848 1.132 hannken u_int use, next; 849 1.1 rmind int error; 850 1.1 rmind 851 1.133 hannken objlock_held = false; 852 1.133 hannken 853 1.132 hannken retry: 854 1.9 rmind KASSERT(mutex_owned(vp->v_interlock)); 855 1.1 rmind 856 1.1 rmind if (__predict_false(vp->v_op == dead_vnodeop_p && 857 1.57 hannken VSTATE_GET(vp) != VS_RECLAIMED)) { 858 1.11 christos vnpanic(vp, "dead but not clean"); 859 1.1 rmind } 860 1.1 rmind 861 1.1 rmind /* 862 1.132 hannken * If not the last reference, just unlock and drop the reference count. 863 1.132 hannken * 864 1.132 hannken * Otherwise make sure we pass a point in time where we hold the 865 1.132 hannken * last reference with VGET flag unset. 866 1.1 rmind */ 867 1.132 hannken for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) { 868 1.132 hannken if (__predict_false((use & VUSECOUNT_MASK) > 1)) { 869 1.134 hannken if (objlock_held) { 870 1.134 hannken objlock_held = false; 871 1.134 hannken rw_exit(vp->v_uobj.vmobjlock); 872 1.134 hannken } 873 1.132 hannken if (lktype != LK_NONE) { 874 1.132 hannken mutex_exit(vp->v_interlock); 875 1.132 hannken lktype = LK_NONE; 876 1.132 hannken VOP_UNLOCK(vp); 877 1.132 hannken mutex_enter(vp->v_interlock); 878 1.132 hannken } 879 1.132 hannken if (vtryrele(vp)) { 880 1.132 hannken mutex_exit(vp->v_interlock); 881 1.132 hannken return; 882 1.132 hannken } 883 1.132 hannken next = atomic_load_relaxed(&vp->v_usecount); 884 1.132 hannken continue; 885 1.132 hannken } 886 1.132 hannken KASSERT((use & VUSECOUNT_MASK) == 1); 887 1.132 hannken next = use & ~VUSECOUNT_VGET; 888 1.132 hannken if (next != use) { 889 1.132 hannken next = atomic_cas_uint(&vp->v_usecount, use, next); 890 1.132 hannken } 891 1.132 hannken if (__predict_true(next == use)) { 892 1.132 hannken break; 893 1.107 ad } 894 1.1 rmind } 895 1.142 riastrad membar_acquire(); 896 1.120 ad if (vrefcnt(vp) <= 0 || vp->v_writecount != 0) { 897 1.11 christos vnpanic(vp, "%s: bad ref count", __func__); 898 1.1 rmind } 899 1.1 rmind 900 1.15 hannken #ifdef DIAGNOSTIC 901 1.15 hannken if ((vp->v_type == VBLK || vp->v_type == VCHR) && 902 1.15 hannken vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 903 1.15 hannken vprint("vrelel: missing VOP_CLOSE()", vp); 904 1.15 hannken } 905 1.15 hannken #endif 906 1.15 hannken 907 1.1 rmind /* 908 1.131 hannken * If already clean there is no need to lock, defer or 909 1.131 hannken * deactivate this node. 910 1.131 hannken */ 911 1.131 hannken if (VSTATE_GET(vp) == VS_RECLAIMED) { 912 1.134 hannken if (objlock_held) { 913 1.134 hannken objlock_held = false; 914 1.134 hannken rw_exit(vp->v_uobj.vmobjlock); 915 1.134 hannken } 916 1.131 hannken if (lktype != LK_NONE) { 917 1.131 hannken mutex_exit(vp->v_interlock); 918 1.131 hannken lktype = LK_NONE; 919 1.131 hannken VOP_UNLOCK(vp); 920 1.131 hannken mutex_enter(vp->v_interlock); 921 1.131 hannken } 922 1.131 hannken goto out; 923 1.131 hannken } 924 1.131 hannken 925 1.131 hannken /* 926 1.79 hannken * First try to get the vnode locked for VOP_INACTIVE(). 927 1.152 hannken * Defer vnode release to vrele task if caller requests 928 1.79 hannken * it explicitly, is the pagedaemon or the lock failed. 929 1.1 rmind */ 930 1.107 ad defer = false; 931 1.79 hannken if ((curlwp == uvm.pagedaemon_lwp) || async) { 932 1.79 hannken defer = true; 933 1.107 ad } else if (lktype == LK_SHARED) { 934 1.107 ad /* Excellent chance of getting, if the last ref. */ 935 1.131 hannken error = vn_lock(vp, LK_UPGRADE | LK_RETRY | LK_NOWAIT); 936 1.107 ad if (error != 0) { 937 1.107 ad defer = true; 938 1.107 ad } else { 939 1.107 ad lktype = LK_EXCLUSIVE; 940 1.107 ad } 941 1.107 ad } else if (lktype == LK_NONE) { 942 1.107 ad /* Excellent chance of getting, if the last ref. */ 943 1.131 hannken error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); 944 1.107 ad if (error != 0) { 945 1.107 ad defer = true; 946 1.107 ad } else { 947 1.107 ad lktype = LK_EXCLUSIVE; 948 1.107 ad } 949 1.79 hannken } 950 1.79 hannken KASSERT(mutex_owned(vp->v_interlock)); 951 1.79 hannken if (defer) { 952 1.1 rmind /* 953 1.152 hannken * Defer reclaim to the vrele task; it's not safe to 954 1.79 hannken * clean it here. We donate it our last reference. 955 1.1 rmind */ 956 1.107 ad if (lktype != LK_NONE) { 957 1.134 hannken mutex_exit(vp->v_interlock); 958 1.107 ad VOP_UNLOCK(vp); 959 1.134 hannken mutex_enter(vp->v_interlock); 960 1.107 ad } 961 1.104 ad lru_requeue(vp, &lru_list[LRU_VRELE]); 962 1.79 hannken mutex_exit(vp->v_interlock); 963 1.79 hannken return; 964 1.79 hannken } 965 1.107 ad KASSERT(lktype == LK_EXCLUSIVE); 966 1.30 hannken 967 1.135 hannken /* If the node gained another reference, retry. */ 968 1.135 hannken use = atomic_load_relaxed(&vp->v_usecount); 969 1.137 hannken if ((use & VUSECOUNT_VGET) != 0) { 970 1.135 hannken goto retry; 971 1.135 hannken } 972 1.137 hannken KASSERT((use & VUSECOUNT_MASK) == 1); 973 1.135 hannken 974 1.133 hannken if ((vp->v_iflag & (VI_TEXT|VI_EXECMAP|VI_WRMAP)) != 0 || 975 1.133 hannken (vp->v_vflag & VV_MAPPED) != 0) { 976 1.133 hannken /* Take care of space accounting. */ 977 1.133 hannken if (!objlock_held) { 978 1.133 hannken objlock_held = true; 979 1.133 hannken if (!rw_tryenter(vp->v_uobj.vmobjlock, RW_WRITER)) { 980 1.133 hannken mutex_exit(vp->v_interlock); 981 1.133 hannken rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 982 1.133 hannken mutex_enter(vp->v_interlock); 983 1.133 hannken goto retry; 984 1.133 hannken } 985 1.133 hannken } 986 1.133 hannken if ((vp->v_iflag & VI_EXECMAP) != 0) { 987 1.133 hannken cpu_count(CPU_COUNT_EXECPAGES, -vp->v_uobj.uo_npages); 988 1.133 hannken } 989 1.133 hannken vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 990 1.133 hannken vp->v_vflag &= ~VV_MAPPED; 991 1.133 hannken } 992 1.133 hannken if (objlock_held) { 993 1.133 hannken objlock_held = false; 994 1.133 hannken rw_exit(vp->v_uobj.vmobjlock); 995 1.133 hannken } 996 1.133 hannken 997 1.79 hannken /* 998 1.131 hannken * Deactivate the vnode, but preserve our reference across 999 1.131 hannken * the call to VOP_INACTIVE(). 1000 1.131 hannken * 1001 1.131 hannken * If VOP_INACTIVE() indicates that the file has been 1002 1.131 hannken * deleted, then recycle the vnode. 1003 1.131 hannken * 1004 1.131 hannken * Note that VOP_INACTIVE() will not drop the vnode lock. 1005 1.79 hannken */ 1006 1.131 hannken mutex_exit(vp->v_interlock); 1007 1.131 hannken recycle = false; 1008 1.131 hannken VOP_INACTIVE(vp, &recycle); 1009 1.133 hannken if (!recycle) { 1010 1.133 hannken lktype = LK_NONE; 1011 1.133 hannken VOP_UNLOCK(vp); 1012 1.133 hannken } 1013 1.131 hannken mutex_enter(vp->v_interlock); 1014 1.131 hannken 1015 1.132 hannken /* 1016 1.132 hannken * Block new references then check again to see if a 1017 1.132 hannken * new reference was acquired in the meantime. If 1018 1.132 hannken * it was, restore the vnode state and try again. 1019 1.132 hannken */ 1020 1.132 hannken if (recycle) { 1021 1.132 hannken VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); 1022 1.132 hannken use = atomic_load_relaxed(&vp->v_usecount); 1023 1.137 hannken if ((use & VUSECOUNT_VGET) != 0) { 1024 1.132 hannken VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); 1025 1.132 hannken goto retry; 1026 1.131 hannken } 1027 1.137 hannken KASSERT((use & VUSECOUNT_MASK) == 1); 1028 1.132 hannken } 1029 1.1 rmind 1030 1.131 hannken /* 1031 1.133 hannken * Recycle the vnode if the file is now unused (unlinked). 1032 1.131 hannken */ 1033 1.131 hannken if (recycle) { 1034 1.131 hannken VSTATE_ASSERT(vp, VS_BLOCKED); 1035 1.133 hannken KASSERT(lktype == LK_EXCLUSIVE); 1036 1.131 hannken /* vcache_reclaim drops the lock. */ 1037 1.132 hannken lktype = LK_NONE; 1038 1.131 hannken vcache_reclaim(vp); 1039 1.1 rmind } 1040 1.131 hannken KASSERT(vrefcnt(vp) > 0); 1041 1.133 hannken KASSERT(lktype == LK_NONE); 1042 1.1 rmind 1043 1.131 hannken out: 1044 1.132 hannken for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) { 1045 1.132 hannken if (__predict_false((use & VUSECOUNT_VGET) != 0 && 1046 1.132 hannken (use & VUSECOUNT_MASK) == 1)) { 1047 1.132 hannken /* Gained and released another reference, retry. */ 1048 1.132 hannken goto retry; 1049 1.132 hannken } 1050 1.132 hannken next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 1051 1.132 hannken if (__predict_true(next == use)) { 1052 1.132 hannken if (__predict_false((use & VUSECOUNT_MASK) != 1)) { 1053 1.132 hannken /* Gained another reference. */ 1054 1.132 hannken mutex_exit(vp->v_interlock); 1055 1.132 hannken return; 1056 1.132 hannken } 1057 1.132 hannken break; 1058 1.132 hannken } 1059 1.1 rmind } 1060 1.142 riastrad membar_acquire(); 1061 1.1 rmind 1062 1.67 hannken if (VSTATE_GET(vp) == VS_RECLAIMED && vp->v_holdcnt == 0) { 1063 1.1 rmind /* 1064 1.1 rmind * It's clean so destroy it. It isn't referenced 1065 1.1 rmind * anywhere since it has been reclaimed. 1066 1.1 rmind */ 1067 1.57 hannken vcache_free(VNODE_TO_VIMPL(vp)); 1068 1.1 rmind } else { 1069 1.1 rmind /* 1070 1.1 rmind * Otherwise, put it back onto the freelist. It 1071 1.1 rmind * can't be destroyed while still associated with 1072 1.1 rmind * a file system. 1073 1.1 rmind */ 1074 1.63 hannken lru_requeue(vp, lru_which(vp)); 1075 1.9 rmind mutex_exit(vp->v_interlock); 1076 1.1 rmind } 1077 1.1 rmind } 1078 1.1 rmind 1079 1.1 rmind void 1080 1.1 rmind vrele(vnode_t *vp) 1081 1.1 rmind { 1082 1.1 rmind 1083 1.112 ad if (vtryrele(vp)) { 1084 1.112 ad return; 1085 1.112 ad } 1086 1.9 rmind mutex_enter(vp->v_interlock); 1087 1.107 ad vrelel(vp, 0, LK_NONE); 1088 1.1 rmind } 1089 1.1 rmind 1090 1.1 rmind /* 1091 1.1 rmind * Asynchronous vnode release, vnode is released in different context. 1092 1.1 rmind */ 1093 1.1 rmind void 1094 1.1 rmind vrele_async(vnode_t *vp) 1095 1.1 rmind { 1096 1.1 rmind 1097 1.112 ad if (vtryrele(vp)) { 1098 1.112 ad return; 1099 1.112 ad } 1100 1.9 rmind mutex_enter(vp->v_interlock); 1101 1.107 ad vrelel(vp, VRELEL_ASYNC, LK_NONE); 1102 1.1 rmind } 1103 1.1 rmind 1104 1.1 rmind /* 1105 1.1 rmind * Vnode reference, where a reference is already held by some other 1106 1.1 rmind * object (for example, a file structure). 1107 1.112 ad * 1108 1.123 ad * NB: lockless code sequences may rely on this not blocking. 1109 1.1 rmind */ 1110 1.1 rmind void 1111 1.1 rmind vref(vnode_t *vp) 1112 1.1 rmind { 1113 1.1 rmind 1114 1.120 ad KASSERT(vrefcnt(vp) > 0); 1115 1.1 rmind 1116 1.112 ad atomic_inc_uint(&vp->v_usecount); 1117 1.1 rmind } 1118 1.1 rmind 1119 1.1 rmind /* 1120 1.1 rmind * Page or buffer structure gets a reference. 1121 1.1 rmind * Called with v_interlock held. 1122 1.1 rmind */ 1123 1.1 rmind void 1124 1.1 rmind vholdl(vnode_t *vp) 1125 1.1 rmind { 1126 1.1 rmind 1127 1.9 rmind KASSERT(mutex_owned(vp->v_interlock)); 1128 1.1 rmind 1129 1.120 ad if (vp->v_holdcnt++ == 0 && vrefcnt(vp) == 0) 1130 1.63 hannken lru_requeue(vp, lru_which(vp)); 1131 1.1 rmind } 1132 1.1 rmind 1133 1.1 rmind /* 1134 1.112 ad * Page or buffer structure gets a reference. 1135 1.112 ad */ 1136 1.112 ad void 1137 1.112 ad vhold(vnode_t *vp) 1138 1.112 ad { 1139 1.112 ad 1140 1.112 ad mutex_enter(vp->v_interlock); 1141 1.112 ad vholdl(vp); 1142 1.112 ad mutex_exit(vp->v_interlock); 1143 1.112 ad } 1144 1.112 ad 1145 1.112 ad /* 1146 1.1 rmind * Page or buffer structure frees a reference. 1147 1.1 rmind * Called with v_interlock held. 1148 1.1 rmind */ 1149 1.1 rmind void 1150 1.1 rmind holdrelel(vnode_t *vp) 1151 1.1 rmind { 1152 1.1 rmind 1153 1.9 rmind KASSERT(mutex_owned(vp->v_interlock)); 1154 1.1 rmind 1155 1.1 rmind if (vp->v_holdcnt <= 0) { 1156 1.11 christos vnpanic(vp, "%s: holdcnt vp %p", __func__, vp); 1157 1.1 rmind } 1158 1.1 rmind 1159 1.1 rmind vp->v_holdcnt--; 1160 1.120 ad if (vp->v_holdcnt == 0 && vrefcnt(vp) == 0) 1161 1.63 hannken lru_requeue(vp, lru_which(vp)); 1162 1.1 rmind } 1163 1.1 rmind 1164 1.1 rmind /* 1165 1.112 ad * Page or buffer structure frees a reference. 1166 1.112 ad */ 1167 1.112 ad void 1168 1.112 ad holdrele(vnode_t *vp) 1169 1.112 ad { 1170 1.112 ad 1171 1.112 ad mutex_enter(vp->v_interlock); 1172 1.112 ad holdrelel(vp); 1173 1.112 ad mutex_exit(vp->v_interlock); 1174 1.112 ad } 1175 1.112 ad 1176 1.112 ad /* 1177 1.33 hannken * Recycle an unused vnode if caller holds the last reference. 1178 1.1 rmind */ 1179 1.33 hannken bool 1180 1.33 hannken vrecycle(vnode_t *vp) 1181 1.1 rmind { 1182 1.60 hannken int error __diagused; 1183 1.46 hannken 1184 1.33 hannken mutex_enter(vp->v_interlock); 1185 1.33 hannken 1186 1.123 ad /* If the vnode is already clean we're done. */ 1187 1.60 hannken VSTATE_WAIT_STABLE(vp); 1188 1.94 hannken if (VSTATE_GET(vp) != VS_LOADED) { 1189 1.60 hannken VSTATE_ASSERT(vp, VS_RECLAIMED); 1190 1.107 ad vrelel(vp, 0, LK_NONE); 1191 1.60 hannken return true; 1192 1.60 hannken } 1193 1.60 hannken 1194 1.60 hannken /* Prevent further references until the vnode is locked. */ 1195 1.94 hannken VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); 1196 1.123 ad 1197 1.123 ad /* Make sure we hold the last reference. */ 1198 1.123 ad if (vrefcnt(vp) != 1) { 1199 1.123 ad VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); 1200 1.123 ad mutex_exit(vp->v_interlock); 1201 1.123 ad return false; 1202 1.123 ad } 1203 1.123 ad 1204 1.60 hannken mutex_exit(vp->v_interlock); 1205 1.60 hannken 1206 1.73 hannken /* 1207 1.73 hannken * On a leaf file system this lock will always succeed as we hold 1208 1.73 hannken * the last reference and prevent further references. 1209 1.73 hannken * On layered file systems waiting for the lock would open a can of 1210 1.73 hannken * deadlocks as the lower vnodes may have other active references. 1211 1.73 hannken */ 1212 1.76 hannken error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); 1213 1.60 hannken 1214 1.60 hannken mutex_enter(vp->v_interlock); 1215 1.73 hannken if (error) { 1216 1.123 ad VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); 1217 1.73 hannken mutex_exit(vp->v_interlock); 1218 1.73 hannken return false; 1219 1.73 hannken } 1220 1.73 hannken 1221 1.120 ad KASSERT(vrefcnt(vp) == 1); 1222 1.54 hannken vcache_reclaim(vp); 1223 1.107 ad vrelel(vp, 0, LK_NONE); 1224 1.60 hannken 1225 1.33 hannken return true; 1226 1.1 rmind } 1227 1.1 rmind 1228 1.1 rmind /* 1229 1.92 hannken * Helper for vrevoke() to propagate suspension from lastmp 1230 1.92 hannken * to thismp. Both args may be NULL. 1231 1.92 hannken * Returns the currently suspended file system or NULL. 1232 1.92 hannken */ 1233 1.92 hannken static struct mount * 1234 1.92 hannken vrevoke_suspend_next(struct mount *lastmp, struct mount *thismp) 1235 1.92 hannken { 1236 1.92 hannken int error; 1237 1.92 hannken 1238 1.92 hannken if (lastmp == thismp) 1239 1.92 hannken return thismp; 1240 1.92 hannken 1241 1.92 hannken if (lastmp != NULL) 1242 1.92 hannken vfs_resume(lastmp); 1243 1.92 hannken 1244 1.92 hannken if (thismp == NULL) 1245 1.92 hannken return NULL; 1246 1.92 hannken 1247 1.92 hannken do { 1248 1.92 hannken error = vfs_suspend(thismp, 0); 1249 1.92 hannken } while (error == EINTR || error == ERESTART); 1250 1.92 hannken 1251 1.92 hannken if (error == 0) 1252 1.92 hannken return thismp; 1253 1.92 hannken 1254 1.129 hannken KASSERT(error == EOPNOTSUPP || error == ENOENT); 1255 1.92 hannken return NULL; 1256 1.92 hannken } 1257 1.92 hannken 1258 1.92 hannken /* 1259 1.1 rmind * Eliminate all activity associated with the requested vnode 1260 1.1 rmind * and with all vnodes aliased to the requested vnode. 1261 1.1 rmind */ 1262 1.1 rmind void 1263 1.1 rmind vrevoke(vnode_t *vp) 1264 1.1 rmind { 1265 1.88 hannken struct mount *mp; 1266 1.19 hannken vnode_t *vq; 1267 1.1 rmind enum vtype type; 1268 1.1 rmind dev_t dev; 1269 1.1 rmind 1270 1.120 ad KASSERT(vrefcnt(vp) > 0); 1271 1.1 rmind 1272 1.92 hannken mp = vrevoke_suspend_next(NULL, vp->v_mount); 1273 1.88 hannken 1274 1.9 rmind mutex_enter(vp->v_interlock); 1275 1.52 hannken VSTATE_WAIT_STABLE(vp); 1276 1.57 hannken if (VSTATE_GET(vp) == VS_RECLAIMED) { 1277 1.9 rmind mutex_exit(vp->v_interlock); 1278 1.1 rmind } else if (vp->v_type != VBLK && vp->v_type != VCHR) { 1279 1.112 ad atomic_inc_uint(&vp->v_usecount); 1280 1.29 christos mutex_exit(vp->v_interlock); 1281 1.29 christos vgone(vp); 1282 1.1 rmind } else { 1283 1.1 rmind dev = vp->v_rdev; 1284 1.1 rmind type = vp->v_type; 1285 1.9 rmind mutex_exit(vp->v_interlock); 1286 1.1 rmind 1287 1.140 riastrad while (spec_node_lookup_by_dev(type, dev, VDEAD_NOWAIT, &vq) 1288 1.140 riastrad == 0) { 1289 1.92 hannken mp = vrevoke_suspend_next(mp, vq->v_mount); 1290 1.88 hannken vgone(vq); 1291 1.88 hannken } 1292 1.1 rmind } 1293 1.92 hannken vrevoke_suspend_next(mp, NULL); 1294 1.1 rmind } 1295 1.1 rmind 1296 1.1 rmind /* 1297 1.1 rmind * Eliminate all activity associated with a vnode in preparation for 1298 1.1 rmind * reuse. Drops a reference from the vnode. 1299 1.1 rmind */ 1300 1.1 rmind void 1301 1.1 rmind vgone(vnode_t *vp) 1302 1.1 rmind { 1303 1.107 ad int lktype; 1304 1.1 rmind 1305 1.154 riastrad KASSERT(vp->v_mount == dead_rootmount || 1306 1.154 riastrad fstrans_is_owner(vp->v_mount)); 1307 1.93 hannken 1308 1.76 hannken vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1309 1.107 ad lktype = LK_EXCLUSIVE; 1310 1.9 rmind mutex_enter(vp->v_interlock); 1311 1.76 hannken VSTATE_WAIT_STABLE(vp); 1312 1.146 riastrad if (VSTATE_GET(vp) == VS_LOADED) { 1313 1.123 ad VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); 1314 1.76 hannken vcache_reclaim(vp); 1315 1.107 ad lktype = LK_NONE; 1316 1.107 ad } 1317 1.76 hannken VSTATE_ASSERT(vp, VS_RECLAIMED); 1318 1.107 ad vrelel(vp, 0, lktype); 1319 1.1 rmind } 1320 1.1 rmind 1321 1.36 hannken static inline uint32_t 1322 1.36 hannken vcache_hash(const struct vcache_key *key) 1323 1.36 hannken { 1324 1.36 hannken uint32_t hash = HASH32_BUF_INIT; 1325 1.36 hannken 1326 1.97 hannken KASSERT(key->vk_key_len > 0); 1327 1.97 hannken 1328 1.36 hannken hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash); 1329 1.36 hannken hash = hash32_buf(key->vk_key, key->vk_key_len, hash); 1330 1.36 hannken return hash; 1331 1.36 hannken } 1332 1.36 hannken 1333 1.127 simonb static int 1334 1.127 simonb vcache_stats(struct hashstat_sysctl *hs, bool fill) 1335 1.127 simonb { 1336 1.127 simonb vnode_impl_t *vip; 1337 1.127 simonb uint64_t chain; 1338 1.127 simonb 1339 1.127 simonb strlcpy(hs->hash_name, "vcache", sizeof(hs->hash_name)); 1340 1.127 simonb strlcpy(hs->hash_desc, "vnode cache hash", sizeof(hs->hash_desc)); 1341 1.127 simonb if (!fill) 1342 1.127 simonb return 0; 1343 1.127 simonb 1344 1.127 simonb hs->hash_size = vcache_hashmask + 1; 1345 1.127 simonb 1346 1.127 simonb for (size_t i = 0; i < hs->hash_size; i++) { 1347 1.127 simonb chain = 0; 1348 1.127 simonb mutex_enter(&vcache_lock); 1349 1.127 simonb SLIST_FOREACH(vip, &vcache_hashtab[i], vi_hash) { 1350 1.127 simonb chain++; 1351 1.127 simonb } 1352 1.127 simonb mutex_exit(&vcache_lock); 1353 1.127 simonb if (chain > 0) { 1354 1.127 simonb hs->hash_used++; 1355 1.127 simonb hs->hash_items += chain; 1356 1.127 simonb if (chain > hs->hash_maxchain) 1357 1.127 simonb hs->hash_maxchain = chain; 1358 1.127 simonb } 1359 1.127 simonb preempt_point(); 1360 1.127 simonb } 1361 1.127 simonb 1362 1.127 simonb return 0; 1363 1.127 simonb } 1364 1.127 simonb 1365 1.36 hannken static void 1366 1.36 hannken vcache_init(void) 1367 1.36 hannken { 1368 1.36 hannken 1369 1.112 ad vcache_pool = pool_cache_init(sizeof(vnode_impl_t), coherency_unit, 1370 1.112 ad 0, 0, "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL); 1371 1.69 hannken KASSERT(vcache_pool != NULL); 1372 1.69 hannken mutex_init(&vcache_lock, MUTEX_DEFAULT, IPL_NONE); 1373 1.69 hannken cv_init(&vcache_cv, "vcache"); 1374 1.69 hannken vcache_hashsize = desiredvnodes; 1375 1.69 hannken vcache_hashtab = hashinit(desiredvnodes, HASH_SLIST, true, 1376 1.69 hannken &vcache_hashmask); 1377 1.127 simonb hashstat_register("vcache", vcache_stats); 1378 1.36 hannken } 1379 1.36 hannken 1380 1.36 hannken static void 1381 1.36 hannken vcache_reinit(void) 1382 1.36 hannken { 1383 1.36 hannken int i; 1384 1.36 hannken uint32_t hash; 1385 1.36 hannken u_long oldmask, newmask; 1386 1.36 hannken struct hashhead *oldtab, *newtab; 1387 1.70 hannken vnode_impl_t *vip; 1388 1.36 hannken 1389 1.36 hannken newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask); 1390 1.69 hannken mutex_enter(&vcache_lock); 1391 1.69 hannken oldtab = vcache_hashtab; 1392 1.69 hannken oldmask = vcache_hashmask; 1393 1.69 hannken vcache_hashsize = desiredvnodes; 1394 1.69 hannken vcache_hashtab = newtab; 1395 1.69 hannken vcache_hashmask = newmask; 1396 1.36 hannken for (i = 0; i <= oldmask; i++) { 1397 1.70 hannken while ((vip = SLIST_FIRST(&oldtab[i])) != NULL) { 1398 1.70 hannken SLIST_REMOVE(&oldtab[i], vip, vnode_impl, vi_hash); 1399 1.70 hannken hash = vcache_hash(&vip->vi_key); 1400 1.69 hannken SLIST_INSERT_HEAD(&newtab[hash & vcache_hashmask], 1401 1.70 hannken vip, vi_hash); 1402 1.36 hannken } 1403 1.36 hannken } 1404 1.69 hannken mutex_exit(&vcache_lock); 1405 1.36 hannken hashdone(oldtab, HASH_SLIST, oldmask); 1406 1.36 hannken } 1407 1.36 hannken 1408 1.57 hannken static inline vnode_impl_t * 1409 1.36 hannken vcache_hash_lookup(const struct vcache_key *key, uint32_t hash) 1410 1.36 hannken { 1411 1.36 hannken struct hashhead *hashp; 1412 1.70 hannken vnode_impl_t *vip; 1413 1.36 hannken 1414 1.69 hannken KASSERT(mutex_owned(&vcache_lock)); 1415 1.36 hannken 1416 1.69 hannken hashp = &vcache_hashtab[hash & vcache_hashmask]; 1417 1.70 hannken SLIST_FOREACH(vip, hashp, vi_hash) { 1418 1.70 hannken if (key->vk_mount != vip->vi_key.vk_mount) 1419 1.36 hannken continue; 1420 1.70 hannken if (key->vk_key_len != vip->vi_key.vk_key_len) 1421 1.36 hannken continue; 1422 1.70 hannken if (memcmp(key->vk_key, vip->vi_key.vk_key, key->vk_key_len)) 1423 1.36 hannken continue; 1424 1.70 hannken return vip; 1425 1.36 hannken } 1426 1.36 hannken return NULL; 1427 1.36 hannken } 1428 1.36 hannken 1429 1.36 hannken /* 1430 1.50 hannken * Allocate a new, uninitialized vcache node. 1431 1.50 hannken */ 1432 1.57 hannken static vnode_impl_t * 1433 1.50 hannken vcache_alloc(void) 1434 1.50 hannken { 1435 1.70 hannken vnode_impl_t *vip; 1436 1.50 hannken vnode_t *vp; 1437 1.50 hannken 1438 1.70 hannken vip = pool_cache_get(vcache_pool, PR_WAITOK); 1439 1.111 ad vp = VIMPL_TO_VNODE(vip); 1440 1.70 hannken memset(vip, 0, sizeof(*vip)); 1441 1.50 hannken 1442 1.112 ad rw_init(&vip->vi_lock); 1443 1.111 ad vp->v_interlock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 1444 1.111 ad 1445 1.111 ad uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 1); 1446 1.144 thorpej klist_init(&vip->vi_klist.vk_klist); 1447 1.144 thorpej vp->v_klist = &vip->vi_klist; 1448 1.50 hannken cv_init(&vp->v_cv, "vnode"); 1449 1.114 ad cache_vnode_init(vp); 1450 1.50 hannken 1451 1.50 hannken vp->v_usecount = 1; 1452 1.50 hannken vp->v_type = VNON; 1453 1.50 hannken vp->v_size = vp->v_writesize = VSIZENOTSET; 1454 1.50 hannken 1455 1.70 hannken vip->vi_state = VS_LOADING; 1456 1.51 hannken 1457 1.104 ad lru_requeue(vp, &lru_list[LRU_FREE]); 1458 1.63 hannken 1459 1.70 hannken return vip; 1460 1.50 hannken } 1461 1.50 hannken 1462 1.50 hannken /* 1463 1.79 hannken * Deallocate a vcache node in state VS_LOADING. 1464 1.79 hannken * 1465 1.79 hannken * vcache_lock held on entry and released on return. 1466 1.79 hannken */ 1467 1.79 hannken static void 1468 1.79 hannken vcache_dealloc(vnode_impl_t *vip) 1469 1.79 hannken { 1470 1.79 hannken vnode_t *vp; 1471 1.79 hannken 1472 1.79 hannken KASSERT(mutex_owned(&vcache_lock)); 1473 1.79 hannken 1474 1.79 hannken vp = VIMPL_TO_VNODE(vip); 1475 1.102 hannken vfs_ref(dead_rootmount); 1476 1.102 hannken vfs_insmntque(vp, dead_rootmount); 1477 1.79 hannken mutex_enter(vp->v_interlock); 1478 1.79 hannken vp->v_op = dead_vnodeop_p; 1479 1.79 hannken VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED); 1480 1.79 hannken mutex_exit(&vcache_lock); 1481 1.107 ad vrelel(vp, 0, LK_NONE); 1482 1.79 hannken } 1483 1.79 hannken 1484 1.79 hannken /* 1485 1.50 hannken * Free an unused, unreferenced vcache node. 1486 1.67 hannken * v_interlock locked on entry. 1487 1.50 hannken */ 1488 1.50 hannken static void 1489 1.70 hannken vcache_free(vnode_impl_t *vip) 1490 1.50 hannken { 1491 1.50 hannken vnode_t *vp; 1492 1.50 hannken 1493 1.70 hannken vp = VIMPL_TO_VNODE(vip); 1494 1.67 hannken KASSERT(mutex_owned(vp->v_interlock)); 1495 1.50 hannken 1496 1.120 ad KASSERT(vrefcnt(vp) == 0); 1497 1.67 hannken KASSERT(vp->v_holdcnt == 0); 1498 1.67 hannken KASSERT(vp->v_writecount == 0); 1499 1.67 hannken lru_requeue(vp, NULL); 1500 1.67 hannken mutex_exit(vp->v_interlock); 1501 1.67 hannken 1502 1.67 hannken vfs_insmntque(vp, NULL); 1503 1.67 hannken if (vp->v_type == VBLK || vp->v_type == VCHR) 1504 1.67 hannken spec_node_destroy(vp); 1505 1.50 hannken 1506 1.111 ad mutex_obj_free(vp->v_interlock); 1507 1.112 ad rw_destroy(&vip->vi_lock); 1508 1.50 hannken uvm_obj_destroy(&vp->v_uobj, true); 1509 1.145 thorpej KASSERT(vp->v_klist == &vip->vi_klist); 1510 1.144 thorpej klist_fini(&vip->vi_klist.vk_klist); 1511 1.50 hannken cv_destroy(&vp->v_cv); 1512 1.114 ad cache_vnode_fini(vp); 1513 1.70 hannken pool_cache_put(vcache_pool, vip); 1514 1.50 hannken } 1515 1.50 hannken 1516 1.50 hannken /* 1517 1.66 hannken * Try to get an initial reference on this cached vnode. 1518 1.123 ad * Returns zero on success or EBUSY if the vnode state is not LOADED. 1519 1.66 hannken * 1520 1.123 ad * NB: lockless code sequences may rely on this not blocking. 1521 1.66 hannken */ 1522 1.66 hannken int 1523 1.66 hannken vcache_tryvget(vnode_t *vp) 1524 1.66 hannken { 1525 1.123 ad u_int use, next; 1526 1.66 hannken 1527 1.123 ad for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) { 1528 1.123 ad if (__predict_false((use & VUSECOUNT_GATE) == 0)) { 1529 1.156 riastrad return SET_ERROR(EBUSY); 1530 1.123 ad } 1531 1.132 hannken next = atomic_cas_uint(&vp->v_usecount, 1532 1.132 hannken use, (use + 1) | VUSECOUNT_VGET); 1533 1.123 ad if (__predict_true(next == use)) { 1534 1.142 riastrad membar_acquire(); 1535 1.123 ad return 0; 1536 1.123 ad } 1537 1.123 ad } 1538 1.66 hannken } 1539 1.66 hannken 1540 1.66 hannken /* 1541 1.66 hannken * Try to get an initial reference on this cached vnode. 1542 1.66 hannken * Returns zero on success and ENOENT if the vnode has been reclaimed. 1543 1.66 hannken * Will wait for the vnode state to be stable. 1544 1.66 hannken * 1545 1.66 hannken * v_interlock locked on entry and unlocked on exit. 1546 1.66 hannken */ 1547 1.66 hannken int 1548 1.66 hannken vcache_vget(vnode_t *vp) 1549 1.66 hannken { 1550 1.132 hannken int error; 1551 1.66 hannken 1552 1.66 hannken KASSERT(mutex_owned(vp->v_interlock)); 1553 1.66 hannken 1554 1.67 hannken /* Increment hold count to prevent vnode from disappearing. */ 1555 1.67 hannken vp->v_holdcnt++; 1556 1.67 hannken VSTATE_WAIT_STABLE(vp); 1557 1.67 hannken vp->v_holdcnt--; 1558 1.66 hannken 1559 1.67 hannken /* If this was the last reference to a reclaimed vnode free it now. */ 1560 1.67 hannken if (__predict_false(VSTATE_GET(vp) == VS_RECLAIMED)) { 1561 1.120 ad if (vp->v_holdcnt == 0 && vrefcnt(vp) == 0) 1562 1.67 hannken vcache_free(VNODE_TO_VIMPL(vp)); 1563 1.67 hannken else 1564 1.67 hannken mutex_exit(vp->v_interlock); 1565 1.156 riastrad return SET_ERROR(ENOENT); 1566 1.66 hannken } 1567 1.94 hannken VSTATE_ASSERT(vp, VS_LOADED); 1568 1.132 hannken error = vcache_tryvget(vp); 1569 1.132 hannken KASSERT(error == 0); 1570 1.66 hannken mutex_exit(vp->v_interlock); 1571 1.66 hannken 1572 1.66 hannken return 0; 1573 1.66 hannken } 1574 1.66 hannken 1575 1.66 hannken /* 1576 1.36 hannken * Get a vnode / fs node pair by key and return it referenced through vpp. 1577 1.36 hannken */ 1578 1.36 hannken int 1579 1.36 hannken vcache_get(struct mount *mp, const void *key, size_t key_len, 1580 1.36 hannken struct vnode **vpp) 1581 1.36 hannken { 1582 1.36 hannken int error; 1583 1.36 hannken uint32_t hash; 1584 1.36 hannken const void *new_key; 1585 1.36 hannken struct vnode *vp; 1586 1.36 hannken struct vcache_key vcache_key; 1587 1.70 hannken vnode_impl_t *vip, *new_vip; 1588 1.36 hannken 1589 1.36 hannken new_key = NULL; 1590 1.36 hannken *vpp = NULL; 1591 1.36 hannken 1592 1.36 hannken vcache_key.vk_mount = mp; 1593 1.36 hannken vcache_key.vk_key = key; 1594 1.36 hannken vcache_key.vk_key_len = key_len; 1595 1.36 hannken hash = vcache_hash(&vcache_key); 1596 1.36 hannken 1597 1.36 hannken again: 1598 1.69 hannken mutex_enter(&vcache_lock); 1599 1.70 hannken vip = vcache_hash_lookup(&vcache_key, hash); 1600 1.36 hannken 1601 1.36 hannken /* If found, take a reference or retry. */ 1602 1.70 hannken if (__predict_true(vip != NULL)) { 1603 1.52 hannken /* 1604 1.52 hannken * If the vnode is loading we cannot take the v_interlock 1605 1.52 hannken * here as it might change during load (see uvm_obj_setlock()). 1606 1.69 hannken * As changing state from VS_LOADING requires both vcache_lock 1607 1.69 hannken * and v_interlock it is safe to test with vcache_lock held. 1608 1.52 hannken * 1609 1.57 hannken * Wait for vnodes changing state from VS_LOADING and retry. 1610 1.52 hannken */ 1611 1.70 hannken if (__predict_false(vip->vi_state == VS_LOADING)) { 1612 1.69 hannken cv_wait(&vcache_cv, &vcache_lock); 1613 1.69 hannken mutex_exit(&vcache_lock); 1614 1.52 hannken goto again; 1615 1.52 hannken } 1616 1.70 hannken vp = VIMPL_TO_VNODE(vip); 1617 1.36 hannken mutex_enter(vp->v_interlock); 1618 1.69 hannken mutex_exit(&vcache_lock); 1619 1.66 hannken error = vcache_vget(vp); 1620 1.36 hannken if (error == ENOENT) 1621 1.36 hannken goto again; 1622 1.36 hannken if (error == 0) 1623 1.36 hannken *vpp = vp; 1624 1.36 hannken KASSERT((error != 0) == (*vpp == NULL)); 1625 1.36 hannken return error; 1626 1.36 hannken } 1627 1.69 hannken mutex_exit(&vcache_lock); 1628 1.36 hannken 1629 1.36 hannken /* Allocate and initialize a new vcache / vnode pair. */ 1630 1.87 hannken error = vfs_busy(mp); 1631 1.36 hannken if (error) 1632 1.36 hannken return error; 1633 1.70 hannken new_vip = vcache_alloc(); 1634 1.70 hannken new_vip->vi_key = vcache_key; 1635 1.70 hannken vp = VIMPL_TO_VNODE(new_vip); 1636 1.69 hannken mutex_enter(&vcache_lock); 1637 1.70 hannken vip = vcache_hash_lookup(&vcache_key, hash); 1638 1.70 hannken if (vip == NULL) { 1639 1.69 hannken SLIST_INSERT_HEAD(&vcache_hashtab[hash & vcache_hashmask], 1640 1.70 hannken new_vip, vi_hash); 1641 1.70 hannken vip = new_vip; 1642 1.36 hannken } 1643 1.36 hannken 1644 1.36 hannken /* If another thread beat us inserting this node, retry. */ 1645 1.70 hannken if (vip != new_vip) { 1646 1.79 hannken vcache_dealloc(new_vip); 1647 1.87 hannken vfs_unbusy(mp); 1648 1.36 hannken goto again; 1649 1.36 hannken } 1650 1.69 hannken mutex_exit(&vcache_lock); 1651 1.36 hannken 1652 1.57 hannken /* Load the fs node. Exclusive as new_node is VS_LOADING. */ 1653 1.36 hannken error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key); 1654 1.36 hannken if (error) { 1655 1.69 hannken mutex_enter(&vcache_lock); 1656 1.69 hannken SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask], 1657 1.70 hannken new_vip, vnode_impl, vi_hash); 1658 1.79 hannken vcache_dealloc(new_vip); 1659 1.87 hannken vfs_unbusy(mp); 1660 1.36 hannken KASSERT(*vpp == NULL); 1661 1.36 hannken return error; 1662 1.36 hannken } 1663 1.36 hannken KASSERT(new_key != NULL); 1664 1.36 hannken KASSERT(memcmp(key, new_key, key_len) == 0); 1665 1.36 hannken KASSERT(vp->v_op != NULL); 1666 1.36 hannken vfs_insmntque(vp, mp); 1667 1.36 hannken if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1668 1.36 hannken vp->v_vflag |= VV_MPSAFE; 1669 1.87 hannken vfs_ref(mp); 1670 1.87 hannken vfs_unbusy(mp); 1671 1.36 hannken 1672 1.36 hannken /* Finished loading, finalize node. */ 1673 1.69 hannken mutex_enter(&vcache_lock); 1674 1.70 hannken new_vip->vi_key.vk_key = new_key; 1675 1.39 hannken mutex_enter(vp->v_interlock); 1676 1.94 hannken VSTATE_CHANGE(vp, VS_LOADING, VS_LOADED); 1677 1.39 hannken mutex_exit(vp->v_interlock); 1678 1.69 hannken mutex_exit(&vcache_lock); 1679 1.36 hannken *vpp = vp; 1680 1.36 hannken return 0; 1681 1.36 hannken } 1682 1.36 hannken 1683 1.36 hannken /* 1684 1.40 hannken * Create a new vnode / fs node pair and return it referenced through vpp. 1685 1.40 hannken */ 1686 1.40 hannken int 1687 1.40 hannken vcache_new(struct mount *mp, struct vnode *dvp, struct vattr *vap, 1688 1.101 hannken kauth_cred_t cred, void *extra, struct vnode **vpp) 1689 1.40 hannken { 1690 1.40 hannken int error; 1691 1.40 hannken uint32_t hash; 1692 1.70 hannken struct vnode *vp, *ovp; 1693 1.70 hannken vnode_impl_t *vip, *ovip; 1694 1.40 hannken 1695 1.40 hannken *vpp = NULL; 1696 1.40 hannken 1697 1.40 hannken /* Allocate and initialize a new vcache / vnode pair. */ 1698 1.87 hannken error = vfs_busy(mp); 1699 1.40 hannken if (error) 1700 1.40 hannken return error; 1701 1.70 hannken vip = vcache_alloc(); 1702 1.70 hannken vip->vi_key.vk_mount = mp; 1703 1.70 hannken vp = VIMPL_TO_VNODE(vip); 1704 1.40 hannken 1705 1.40 hannken /* Create and load the fs node. */ 1706 1.101 hannken error = VFS_NEWVNODE(mp, dvp, vp, vap, cred, extra, 1707 1.70 hannken &vip->vi_key.vk_key_len, &vip->vi_key.vk_key); 1708 1.40 hannken if (error) { 1709 1.69 hannken mutex_enter(&vcache_lock); 1710 1.79 hannken vcache_dealloc(vip); 1711 1.87 hannken vfs_unbusy(mp); 1712 1.40 hannken KASSERT(*vpp == NULL); 1713 1.40 hannken return error; 1714 1.40 hannken } 1715 1.40 hannken KASSERT(vp->v_op != NULL); 1716 1.97 hannken KASSERT((vip->vi_key.vk_key_len == 0) == (mp == dead_rootmount)); 1717 1.97 hannken if (vip->vi_key.vk_key_len > 0) { 1718 1.97 hannken KASSERT(vip->vi_key.vk_key != NULL); 1719 1.97 hannken hash = vcache_hash(&vip->vi_key); 1720 1.40 hannken 1721 1.97 hannken /* 1722 1.97 hannken * Wait for previous instance to be reclaimed, 1723 1.97 hannken * then insert new node. 1724 1.97 hannken */ 1725 1.97 hannken mutex_enter(&vcache_lock); 1726 1.97 hannken while ((ovip = vcache_hash_lookup(&vip->vi_key, hash))) { 1727 1.97 hannken ovp = VIMPL_TO_VNODE(ovip); 1728 1.97 hannken mutex_enter(ovp->v_interlock); 1729 1.97 hannken mutex_exit(&vcache_lock); 1730 1.97 hannken error = vcache_vget(ovp); 1731 1.97 hannken KASSERT(error == ENOENT); 1732 1.97 hannken mutex_enter(&vcache_lock); 1733 1.97 hannken } 1734 1.97 hannken SLIST_INSERT_HEAD(&vcache_hashtab[hash & vcache_hashmask], 1735 1.97 hannken vip, vi_hash); 1736 1.69 hannken mutex_exit(&vcache_lock); 1737 1.40 hannken } 1738 1.40 hannken vfs_insmntque(vp, mp); 1739 1.40 hannken if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1740 1.40 hannken vp->v_vflag |= VV_MPSAFE; 1741 1.87 hannken vfs_ref(mp); 1742 1.87 hannken vfs_unbusy(mp); 1743 1.40 hannken 1744 1.40 hannken /* Finished loading, finalize node. */ 1745 1.69 hannken mutex_enter(&vcache_lock); 1746 1.52 hannken mutex_enter(vp->v_interlock); 1747 1.94 hannken VSTATE_CHANGE(vp, VS_LOADING, VS_LOADED); 1748 1.69 hannken mutex_exit(&vcache_lock); 1749 1.40 hannken mutex_exit(vp->v_interlock); 1750 1.40 hannken *vpp = vp; 1751 1.40 hannken return 0; 1752 1.40 hannken } 1753 1.40 hannken 1754 1.40 hannken /* 1755 1.65 hannken * Prepare key change: update old cache nodes key and lock new cache node. 1756 1.37 hannken * Return an error if the new node already exists. 1757 1.37 hannken */ 1758 1.37 hannken int 1759 1.37 hannken vcache_rekey_enter(struct mount *mp, struct vnode *vp, 1760 1.37 hannken const void *old_key, size_t old_key_len, 1761 1.37 hannken const void *new_key, size_t new_key_len) 1762 1.37 hannken { 1763 1.37 hannken uint32_t old_hash, new_hash; 1764 1.37 hannken struct vcache_key old_vcache_key, new_vcache_key; 1765 1.70 hannken vnode_impl_t *vip, *new_vip; 1766 1.37 hannken 1767 1.37 hannken old_vcache_key.vk_mount = mp; 1768 1.37 hannken old_vcache_key.vk_key = old_key; 1769 1.37 hannken old_vcache_key.vk_key_len = old_key_len; 1770 1.37 hannken old_hash = vcache_hash(&old_vcache_key); 1771 1.37 hannken 1772 1.37 hannken new_vcache_key.vk_mount = mp; 1773 1.37 hannken new_vcache_key.vk_key = new_key; 1774 1.37 hannken new_vcache_key.vk_key_len = new_key_len; 1775 1.37 hannken new_hash = vcache_hash(&new_vcache_key); 1776 1.37 hannken 1777 1.70 hannken new_vip = vcache_alloc(); 1778 1.70 hannken new_vip->vi_key = new_vcache_key; 1779 1.37 hannken 1780 1.52 hannken /* Insert locked new node used as placeholder. */ 1781 1.69 hannken mutex_enter(&vcache_lock); 1782 1.70 hannken vip = vcache_hash_lookup(&new_vcache_key, new_hash); 1783 1.70 hannken if (vip != NULL) { 1784 1.79 hannken vcache_dealloc(new_vip); 1785 1.156 riastrad return SET_ERROR(EEXIST); 1786 1.37 hannken } 1787 1.69 hannken SLIST_INSERT_HEAD(&vcache_hashtab[new_hash & vcache_hashmask], 1788 1.70 hannken new_vip, vi_hash); 1789 1.49 hannken 1790 1.65 hannken /* Replace old nodes key with the temporary copy. */ 1791 1.70 hannken vip = vcache_hash_lookup(&old_vcache_key, old_hash); 1792 1.70 hannken KASSERT(vip != NULL); 1793 1.70 hannken KASSERT(VIMPL_TO_VNODE(vip) == vp); 1794 1.70 hannken KASSERT(vip->vi_key.vk_key != old_vcache_key.vk_key); 1795 1.70 hannken vip->vi_key = old_vcache_key; 1796 1.69 hannken mutex_exit(&vcache_lock); 1797 1.37 hannken return 0; 1798 1.37 hannken } 1799 1.37 hannken 1800 1.37 hannken /* 1801 1.65 hannken * Key change complete: update old node and remove placeholder. 1802 1.37 hannken */ 1803 1.37 hannken void 1804 1.37 hannken vcache_rekey_exit(struct mount *mp, struct vnode *vp, 1805 1.37 hannken const void *old_key, size_t old_key_len, 1806 1.37 hannken const void *new_key, size_t new_key_len) 1807 1.37 hannken { 1808 1.37 hannken uint32_t old_hash, new_hash; 1809 1.37 hannken struct vcache_key old_vcache_key, new_vcache_key; 1810 1.70 hannken vnode_impl_t *vip, *new_vip; 1811 1.70 hannken struct vnode *new_vp; 1812 1.37 hannken 1813 1.37 hannken old_vcache_key.vk_mount = mp; 1814 1.37 hannken old_vcache_key.vk_key = old_key; 1815 1.37 hannken old_vcache_key.vk_key_len = old_key_len; 1816 1.37 hannken old_hash = vcache_hash(&old_vcache_key); 1817 1.37 hannken 1818 1.37 hannken new_vcache_key.vk_mount = mp; 1819 1.37 hannken new_vcache_key.vk_key = new_key; 1820 1.37 hannken new_vcache_key.vk_key_len = new_key_len; 1821 1.37 hannken new_hash = vcache_hash(&new_vcache_key); 1822 1.37 hannken 1823 1.69 hannken mutex_enter(&vcache_lock); 1824 1.49 hannken 1825 1.49 hannken /* Lookup old and new node. */ 1826 1.70 hannken vip = vcache_hash_lookup(&old_vcache_key, old_hash); 1827 1.70 hannken KASSERT(vip != NULL); 1828 1.70 hannken KASSERT(VIMPL_TO_VNODE(vip) == vp); 1829 1.70 hannken 1830 1.70 hannken new_vip = vcache_hash_lookup(&new_vcache_key, new_hash); 1831 1.70 hannken KASSERT(new_vip != NULL); 1832 1.70 hannken KASSERT(new_vip->vi_key.vk_key_len == new_key_len); 1833 1.70 hannken new_vp = VIMPL_TO_VNODE(new_vip); 1834 1.70 hannken mutex_enter(new_vp->v_interlock); 1835 1.70 hannken VSTATE_ASSERT(VIMPL_TO_VNODE(new_vip), VS_LOADING); 1836 1.79 hannken mutex_exit(new_vp->v_interlock); 1837 1.49 hannken 1838 1.49 hannken /* Rekey old node and put it onto its new hashlist. */ 1839 1.70 hannken vip->vi_key = new_vcache_key; 1840 1.49 hannken if (old_hash != new_hash) { 1841 1.69 hannken SLIST_REMOVE(&vcache_hashtab[old_hash & vcache_hashmask], 1842 1.70 hannken vip, vnode_impl, vi_hash); 1843 1.69 hannken SLIST_INSERT_HEAD(&vcache_hashtab[new_hash & vcache_hashmask], 1844 1.70 hannken vip, vi_hash); 1845 1.49 hannken } 1846 1.49 hannken 1847 1.49 hannken /* Remove new node used as placeholder. */ 1848 1.69 hannken SLIST_REMOVE(&vcache_hashtab[new_hash & vcache_hashmask], 1849 1.70 hannken new_vip, vnode_impl, vi_hash); 1850 1.79 hannken vcache_dealloc(new_vip); 1851 1.37 hannken } 1852 1.37 hannken 1853 1.37 hannken /* 1854 1.54 hannken * Disassociate the underlying file system from a vnode. 1855 1.54 hannken * 1856 1.54 hannken * Must be called with vnode locked and will return unlocked. 1857 1.54 hannken * Must be called with the interlock held, and will return with it held. 1858 1.54 hannken */ 1859 1.54 hannken static void 1860 1.54 hannken vcache_reclaim(vnode_t *vp) 1861 1.54 hannken { 1862 1.54 hannken lwp_t *l = curlwp; 1863 1.70 hannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 1864 1.74 hannken struct mount *mp = vp->v_mount; 1865 1.55 hannken uint32_t hash; 1866 1.55 hannken uint8_t temp_buf[64], *temp_key; 1867 1.55 hannken size_t temp_key_len; 1868 1.141 riastrad bool recycle; 1869 1.54 hannken int error; 1870 1.54 hannken 1871 1.139 hannken KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1872 1.54 hannken KASSERT(mutex_owned(vp->v_interlock)); 1873 1.120 ad KASSERT(vrefcnt(vp) != 0); 1874 1.54 hannken 1875 1.70 hannken temp_key_len = vip->vi_key.vk_key_len; 1876 1.54 hannken /* 1877 1.54 hannken * Prevent the vnode from being recycled or brought into use 1878 1.54 hannken * while we clean it out. 1879 1.54 hannken */ 1880 1.123 ad VSTATE_CHANGE(vp, VS_BLOCKED, VS_RECLAIMING); 1881 1.145 thorpej 1882 1.145 thorpej /* 1883 1.145 thorpej * Send NOTE_REVOKE now, before we call VOP_RECLAIM(), 1884 1.145 thorpej * because VOP_RECLAIM() could cause vp->v_klist to 1885 1.145 thorpej * become invalid. Don't check for interest in NOTE_REVOKE 1886 1.145 thorpej * here; it's always posted because it sets EV_EOF. 1887 1.145 thorpej * 1888 1.145 thorpej * Once it's been posted, reset vp->v_klist to point to 1889 1.145 thorpej * our own local storage, in case we were sharing with 1890 1.145 thorpej * someone else. 1891 1.145 thorpej */ 1892 1.145 thorpej KNOTE(&vp->v_klist->vk_klist, NOTE_REVOKE); 1893 1.145 thorpej vp->v_klist = &vip->vi_klist; 1894 1.111 ad mutex_exit(vp->v_interlock); 1895 1.111 ad 1896 1.111 ad rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 1897 1.111 ad mutex_enter(vp->v_interlock); 1898 1.125 ad if ((vp->v_iflag & VI_EXECMAP) != 0) { 1899 1.105 ad cpu_count(CPU_COUNT_EXECPAGES, -vp->v_uobj.uo_npages); 1900 1.54 hannken } 1901 1.54 hannken vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 1902 1.116 ad vp->v_iflag |= VI_DEADCHECK; /* for genfs_getpages() */ 1903 1.54 hannken mutex_exit(vp->v_interlock); 1904 1.111 ad rw_exit(vp->v_uobj.vmobjlock); 1905 1.54 hannken 1906 1.114 ad /* 1907 1.114 ad * With vnode state set to reclaiming, purge name cache immediately 1908 1.114 ad * to prevent new handles on vnode, and wait for existing threads 1909 1.114 ad * trying to get a handle to notice VS_RECLAIMED status and abort. 1910 1.114 ad */ 1911 1.114 ad cache_purge(vp); 1912 1.114 ad 1913 1.55 hannken /* Replace the vnode key with a temporary copy. */ 1914 1.70 hannken if (vip->vi_key.vk_key_len > sizeof(temp_buf)) { 1915 1.55 hannken temp_key = kmem_alloc(temp_key_len, KM_SLEEP); 1916 1.55 hannken } else { 1917 1.55 hannken temp_key = temp_buf; 1918 1.55 hannken } 1919 1.97 hannken if (vip->vi_key.vk_key_len > 0) { 1920 1.97 hannken mutex_enter(&vcache_lock); 1921 1.97 hannken memcpy(temp_key, vip->vi_key.vk_key, temp_key_len); 1922 1.97 hannken vip->vi_key.vk_key = temp_key; 1923 1.97 hannken mutex_exit(&vcache_lock); 1924 1.97 hannken } 1925 1.55 hannken 1926 1.96 hannken fstrans_start(mp); 1927 1.74 hannken 1928 1.54 hannken /* 1929 1.54 hannken * Clean out any cached data associated with the vnode. 1930 1.54 hannken */ 1931 1.54 hannken error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1932 1.54 hannken if (error != 0) { 1933 1.54 hannken if (wapbl_vphaswapbl(vp)) 1934 1.54 hannken WAPBL_DISCARD(wapbl_vptomp(vp)); 1935 1.54 hannken error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1936 1.54 hannken } 1937 1.54 hannken KASSERTMSG((error == 0), "vinvalbuf failed: %d", error); 1938 1.54 hannken KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1939 1.141 riastrad if (vp->v_type == VBLK || vp->v_type == VCHR) { 1940 1.54 hannken spec_node_revoke(vp); 1941 1.54 hannken } 1942 1.54 hannken 1943 1.60 hannken /* 1944 1.60 hannken * Disassociate the underlying file system from the vnode. 1945 1.90 riastrad * VOP_INACTIVE leaves the vnode locked; VOP_RECLAIM unlocks 1946 1.90 riastrad * the vnode, and may destroy the vnode so that VOP_UNLOCK 1947 1.90 riastrad * would no longer function. 1948 1.60 hannken */ 1949 1.60 hannken VOP_INACTIVE(vp, &recycle); 1950 1.139 hannken KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1951 1.54 hannken if (VOP_RECLAIM(vp)) { 1952 1.54 hannken vnpanic(vp, "%s: cannot reclaim", __func__); 1953 1.54 hannken } 1954 1.54 hannken 1955 1.54 hannken KASSERT(vp->v_data == NULL); 1956 1.113 ad KASSERT((vp->v_iflag & VI_PAGES) == 0); 1957 1.54 hannken 1958 1.54 hannken if (vp->v_type == VREG && vp->v_ractx != NULL) { 1959 1.54 hannken uvm_ra_freectx(vp->v_ractx); 1960 1.54 hannken vp->v_ractx = NULL; 1961 1.54 hannken } 1962 1.54 hannken 1963 1.97 hannken if (vip->vi_key.vk_key_len > 0) { 1964 1.55 hannken /* Remove from vnode cache. */ 1965 1.97 hannken hash = vcache_hash(&vip->vi_key); 1966 1.97 hannken mutex_enter(&vcache_lock); 1967 1.97 hannken KASSERT(vip == vcache_hash_lookup(&vip->vi_key, hash)); 1968 1.97 hannken SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask], 1969 1.97 hannken vip, vnode_impl, vi_hash); 1970 1.97 hannken mutex_exit(&vcache_lock); 1971 1.97 hannken } 1972 1.55 hannken if (temp_key != temp_buf) 1973 1.55 hannken kmem_free(temp_key, temp_key_len); 1974 1.55 hannken 1975 1.54 hannken /* Done with purge, notify sleepers of the grim news. */ 1976 1.54 hannken mutex_enter(vp->v_interlock); 1977 1.54 hannken vp->v_op = dead_vnodeop_p; 1978 1.57 hannken VSTATE_CHANGE(vp, VS_RECLAIMING, VS_RECLAIMED); 1979 1.54 hannken vp->v_tag = VT_NON; 1980 1.80 hannken mutex_exit(vp->v_interlock); 1981 1.54 hannken 1982 1.80 hannken /* 1983 1.80 hannken * Move to dead mount. Must be after changing the operations 1984 1.80 hannken * vector as vnode operations enter the mount before using the 1985 1.80 hannken * operations vector. See sys/kern/vnode_if.c. 1986 1.80 hannken */ 1987 1.80 hannken vp->v_vflag &= ~VV_ROOT; 1988 1.86 hannken vfs_ref(dead_rootmount); 1989 1.80 hannken vfs_insmntque(vp, dead_rootmount); 1990 1.80 hannken 1991 1.110 ad #ifdef PAX_SEGVGUARD 1992 1.110 ad pax_segvguard_cleanup(vp); 1993 1.110 ad #endif /* PAX_SEGVGUARD */ 1994 1.110 ad 1995 1.80 hannken mutex_enter(vp->v_interlock); 1996 1.74 hannken fstrans_done(mp); 1997 1.54 hannken KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1998 1.54 hannken } 1999 1.54 hannken 2000 1.54 hannken /* 2001 1.98 hannken * Disassociate the underlying file system from an open device vnode 2002 1.98 hannken * and make it anonymous. 2003 1.98 hannken * 2004 1.98 hannken * Vnode unlocked on entry, drops a reference to the vnode. 2005 1.98 hannken */ 2006 1.98 hannken void 2007 1.98 hannken vcache_make_anon(vnode_t *vp) 2008 1.98 hannken { 2009 1.98 hannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 2010 1.98 hannken uint32_t hash; 2011 1.98 hannken bool recycle; 2012 1.98 hannken 2013 1.98 hannken KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 2014 1.154 riastrad KASSERT(vp->v_mount == dead_rootmount || 2015 1.154 riastrad fstrans_is_owner(vp->v_mount)); 2016 1.98 hannken VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE); 2017 1.98 hannken 2018 1.98 hannken /* Remove from vnode cache. */ 2019 1.98 hannken hash = vcache_hash(&vip->vi_key); 2020 1.98 hannken mutex_enter(&vcache_lock); 2021 1.98 hannken KASSERT(vip == vcache_hash_lookup(&vip->vi_key, hash)); 2022 1.98 hannken SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask], 2023 1.98 hannken vip, vnode_impl, vi_hash); 2024 1.98 hannken vip->vi_key.vk_mount = dead_rootmount; 2025 1.98 hannken vip->vi_key.vk_key_len = 0; 2026 1.98 hannken vip->vi_key.vk_key = NULL; 2027 1.98 hannken mutex_exit(&vcache_lock); 2028 1.98 hannken 2029 1.98 hannken /* 2030 1.98 hannken * Disassociate the underlying file system from the vnode. 2031 1.98 hannken * VOP_INACTIVE leaves the vnode locked; VOP_RECLAIM unlocks 2032 1.98 hannken * the vnode, and may destroy the vnode so that VOP_UNLOCK 2033 1.98 hannken * would no longer function. 2034 1.98 hannken */ 2035 1.98 hannken if (vn_lock(vp, LK_EXCLUSIVE)) { 2036 1.98 hannken vnpanic(vp, "%s: cannot lock", __func__); 2037 1.98 hannken } 2038 1.98 hannken VOP_INACTIVE(vp, &recycle); 2039 1.139 hannken KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2040 1.98 hannken if (VOP_RECLAIM(vp)) { 2041 1.98 hannken vnpanic(vp, "%s: cannot reclaim", __func__); 2042 1.98 hannken } 2043 1.98 hannken 2044 1.98 hannken /* Purge name cache. */ 2045 1.98 hannken cache_purge(vp); 2046 1.98 hannken 2047 1.98 hannken /* Done with purge, change operations vector. */ 2048 1.98 hannken mutex_enter(vp->v_interlock); 2049 1.98 hannken vp->v_op = spec_vnodeop_p; 2050 1.139 hannken vp->v_vflag |= VV_MPSAFE; 2051 1.98 hannken mutex_exit(vp->v_interlock); 2052 1.98 hannken 2053 1.98 hannken /* 2054 1.98 hannken * Move to dead mount. Must be after changing the operations 2055 1.98 hannken * vector as vnode operations enter the mount before using the 2056 1.98 hannken * operations vector. See sys/kern/vnode_if.c. 2057 1.98 hannken */ 2058 1.98 hannken vfs_ref(dead_rootmount); 2059 1.98 hannken vfs_insmntque(vp, dead_rootmount); 2060 1.98 hannken 2061 1.98 hannken vrele(vp); 2062 1.98 hannken } 2063 1.98 hannken 2064 1.98 hannken /* 2065 1.1 rmind * Update outstanding I/O count and do wakeup if requested. 2066 1.1 rmind */ 2067 1.1 rmind void 2068 1.1 rmind vwakeup(struct buf *bp) 2069 1.1 rmind { 2070 1.1 rmind vnode_t *vp; 2071 1.1 rmind 2072 1.1 rmind if ((vp = bp->b_vp) == NULL) 2073 1.1 rmind return; 2074 1.1 rmind 2075 1.9 rmind KASSERT(bp->b_objlock == vp->v_interlock); 2076 1.1 rmind KASSERT(mutex_owned(bp->b_objlock)); 2077 1.1 rmind 2078 1.1 rmind if (--vp->v_numoutput < 0) 2079 1.11 christos vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp); 2080 1.1 rmind if (vp->v_numoutput == 0) 2081 1.1 rmind cv_broadcast(&vp->v_cv); 2082 1.1 rmind } 2083 1.1 rmind 2084 1.1 rmind /* 2085 1.35 hannken * Test a vnode for being or becoming dead. Returns one of: 2086 1.35 hannken * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only. 2087 1.35 hannken * ENOENT: vnode is dead. 2088 1.35 hannken * 0: otherwise. 2089 1.35 hannken * 2090 1.35 hannken * Whenever this function returns a non-zero value all future 2091 1.35 hannken * calls will also return a non-zero value. 2092 1.35 hannken */ 2093 1.35 hannken int 2094 1.35 hannken vdead_check(struct vnode *vp, int flags) 2095 1.35 hannken { 2096 1.35 hannken 2097 1.35 hannken KASSERT(mutex_owned(vp->v_interlock)); 2098 1.35 hannken 2099 1.52 hannken if (! ISSET(flags, VDEAD_NOWAIT)) 2100 1.52 hannken VSTATE_WAIT_STABLE(vp); 2101 1.1 rmind 2102 1.57 hannken if (VSTATE_GET(vp) == VS_RECLAIMING) { 2103 1.52 hannken KASSERT(ISSET(flags, VDEAD_NOWAIT)); 2104 1.156 riastrad return SET_ERROR(EBUSY); 2105 1.57 hannken } else if (VSTATE_GET(vp) == VS_RECLAIMED) { 2106 1.156 riastrad return SET_ERROR(ENOENT); 2107 1.52 hannken } 2108 1.1 rmind 2109 1.52 hannken return 0; 2110 1.1 rmind } 2111 1.1 rmind 2112 1.1 rmind int 2113 1.61 hannken vfs_drainvnodes(void) 2114 1.1 rmind { 2115 1.61 hannken 2116 1.63 hannken mutex_enter(&vdrain_lock); 2117 1.152 hannken 2118 1.152 hannken if (!vdrain_one(desiredvnodes)) { 2119 1.152 hannken mutex_exit(&vdrain_lock); 2120 1.156 riastrad return SET_ERROR(EBUSY); 2121 1.61 hannken } 2122 1.152 hannken 2123 1.63 hannken mutex_exit(&vdrain_lock); 2124 1.12 hannken 2125 1.69 hannken if (vcache_hashsize != desiredvnodes) 2126 1.61 hannken vcache_reinit(); 2127 1.36 hannken 2128 1.1 rmind return 0; 2129 1.1 rmind } 2130 1.1 rmind 2131 1.1 rmind void 2132 1.11 christos vnpanic(vnode_t *vp, const char *fmt, ...) 2133 1.1 rmind { 2134 1.11 christos va_list ap; 2135 1.11 christos 2136 1.1 rmind #ifdef DIAGNOSTIC 2137 1.1 rmind vprint(NULL, vp); 2138 1.1 rmind #endif 2139 1.11 christos va_start(ap, fmt); 2140 1.11 christos vpanic(fmt, ap); 2141 1.11 christos va_end(ap); 2142 1.1 rmind } 2143 1.111 ad 2144 1.111 ad void 2145 1.111 ad vshareilock(vnode_t *tvp, vnode_t *fvp) 2146 1.111 ad { 2147 1.111 ad kmutex_t *oldlock; 2148 1.111 ad 2149 1.111 ad oldlock = tvp->v_interlock; 2150 1.111 ad mutex_obj_hold(fvp->v_interlock); 2151 1.111 ad tvp->v_interlock = fvp->v_interlock; 2152 1.111 ad mutex_obj_free(oldlock); 2153 1.111 ad } 2154 1.144 thorpej 2155 1.144 thorpej void 2156 1.144 thorpej vshareklist(vnode_t *tvp, vnode_t *fvp) 2157 1.144 thorpej { 2158 1.144 thorpej /* 2159 1.144 thorpej * If two vnodes share klist state, they must also share 2160 1.144 thorpej * an interlock. 2161 1.144 thorpej */ 2162 1.144 thorpej KASSERT(tvp->v_interlock == fvp->v_interlock); 2163 1.144 thorpej 2164 1.144 thorpej /* 2165 1.144 thorpej * We make the following assumptions: 2166 1.144 thorpej * 2167 1.144 thorpej * ==> Some other synchronization is happening outside of 2168 1.144 thorpej * our view to make this safe. 2169 1.144 thorpej * 2170 1.144 thorpej * ==> That the "to" vnode will have the necessary references 2171 1.144 thorpej * on the "from" vnode so that the storage for the klist 2172 1.144 thorpej * won't be yanked out from beneath us (the vnode_impl). 2173 1.144 thorpej * 2174 1.144 thorpej * ==> If "from" is also sharing, we then assume that "from" 2175 1.144 thorpej * has the necessary references, and so on. 2176 1.144 thorpej */ 2177 1.144 thorpej tvp->v_klist = fvp->v_klist; 2178 1.144 thorpej } 2179