vfs_vnode.c revision 1.53.2.2 1 /* $NetBSD: vfs_vnode.c,v 1.53.2.2 2017/01/07 08:56:49 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1989, 1993
35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
67 */
68
69 /*
70 * The vnode cache subsystem.
71 *
72 * Life-cycle
73 *
74 * Normally, there are two points where new vnodes are created:
75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode
76 * starts in one of the following ways:
77 *
78 * - Allocation, via vcache_get(9) or vcache_new(9).
79 * - Reclamation of inactive vnode, via vcache_vget(9).
80 *
81 * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9)
82 * was another, traditional way. Currently, only the draining thread
83 * recycles the vnodes. This behaviour might be revisited.
84 *
85 * The life-cycle ends when the last reference is dropped, usually
86 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform
87 * the file system that vnode is inactive. Via this call, file system
88 * indicates whether vnode can be recycled (usually, it checks its own
89 * references, e.g. count of links, whether the file was removed).
90 *
91 * Depending on indication, vnode can be put into a free list (cache),
92 * or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate
93 * underlying file system from the vnode, and finally destroyed.
94 *
95 * Vnode state
96 *
97 * Vnode is always in one of six states:
98 * - MARKER This is a marker vnode to help list traversal. It
99 * will never change its state.
100 * - LOADING Vnode is associating underlying file system and not
101 * yet ready to use.
102 * - ACTIVE Vnode has associated underlying file system and is
103 * ready to use.
104 * - BLOCKED Vnode is active but cannot get new references.
105 * - RECLAIMING Vnode is disassociating from the underlying file
106 * system.
107 * - RECLAIMED Vnode has disassociated from underlying file system
108 * and is dead.
109 *
110 * Valid state changes are:
111 * LOADING -> ACTIVE
112 * Vnode has been initialised in vcache_get() or
113 * vcache_new() and is ready to use.
114 * ACTIVE -> RECLAIMING
115 * Vnode starts disassociation from underlying file
116 * system in vclean().
117 * RECLAIMING -> RECLAIMED
118 * Vnode finished disassociation from underlying file
119 * system in vclean().
120 * ACTIVE -> BLOCKED
121 * Either vcache_rekey*() is changing the vnode key or
122 * vrelel() is about to call VOP_INACTIVE().
123 * BLOCKED -> ACTIVE
124 * The block condition is over.
125 * LOADING -> RECLAIMED
126 * Either vcache_get() or vcache_new() failed to
127 * associate the underlying file system or vcache_rekey*()
128 * drops a vnode used as placeholder.
129 *
130 * Of these states LOADING, BLOCKED and RECLAIMING are intermediate
131 * and it is possible to wait for state change.
132 *
133 * State is protected with v_interlock with one exception:
134 * to change from LOADING both v_interlock and vcache_lock must be held
135 * so it is possible to check "state == LOADING" without holding
136 * v_interlock. See vcache_get() for details.
137 *
138 * Reference counting
139 *
140 * Vnode is considered active, if reference count (vnode_t::v_usecount)
141 * is non-zero. It is maintained using: vref(9) and vrele(9), as well
142 * as vput(9), routines. Common points holding references are e.g.
143 * file openings, current working directory, mount points, etc.
144 *
145 * Note on v_usecount and its locking
146 *
147 * At nearly all points it is known that v_usecount could be zero,
148 * the vnode_t::v_interlock will be held. To change v_usecount away
149 * from zero, the interlock must be held. To change from a non-zero
150 * value to zero, again the interlock must be held.
151 *
152 * Changing the usecount from a non-zero value to a non-zero value can
153 * safely be done using atomic operations, without the interlock held.
154 *
155 */
156
157 #include <sys/cdefs.h>
158 __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.53.2.2 2017/01/07 08:56:49 pgoyette Exp $");
159
160 #include <sys/param.h>
161 #include <sys/kernel.h>
162
163 #include <sys/atomic.h>
164 #include <sys/buf.h>
165 #include <sys/conf.h>
166 #include <sys/device.h>
167 #include <sys/hash.h>
168 #include <sys/kauth.h>
169 #include <sys/kmem.h>
170 #include <sys/kthread.h>
171 #include <sys/module.h>
172 #include <sys/mount.h>
173 #include <sys/namei.h>
174 #include <sys/syscallargs.h>
175 #include <sys/sysctl.h>
176 #include <sys/systm.h>
177 #include <sys/vnode_impl.h>
178 #include <sys/wapbl.h>
179 #include <sys/fstrans.h>
180
181 #include <uvm/uvm.h>
182 #include <uvm/uvm_readahead.h>
183
184 /* Flags to vrelel. */
185 #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */
186
187 u_int numvnodes __cacheline_aligned;
188
189 /*
190 * There are three lru lists: one holds vnodes waiting for async release,
191 * one is for vnodes which have no buffer/page references and
192 * one for those which do (i.e. v_holdcnt is non-zero).
193 */
194 static vnodelst_t lru_vrele_list __cacheline_aligned;
195 static vnodelst_t lru_free_list __cacheline_aligned;
196 static vnodelst_t lru_hold_list __cacheline_aligned;
197 static kmutex_t vdrain_lock __cacheline_aligned;
198 static kcondvar_t vdrain_cv __cacheline_aligned;
199 static int vdrain_gen;
200 static kcondvar_t vdrain_gen_cv;
201 static bool vdrain_retry;
202 static lwp_t * vdrain_lwp;
203 SLIST_HEAD(hashhead, vnode_impl);
204 static kmutex_t vcache_lock __cacheline_aligned;
205 static kcondvar_t vcache_cv __cacheline_aligned;
206 static u_int vcache_hashsize;
207 static u_long vcache_hashmask;
208 static struct hashhead *vcache_hashtab __cacheline_aligned;
209 static pool_cache_t vcache_pool;
210 static void lru_requeue(vnode_t *, vnodelst_t *);
211 static vnodelst_t * lru_which(vnode_t *);
212 static vnode_impl_t * vcache_alloc(void);
213 static void vcache_free(vnode_impl_t *);
214 static void vcache_init(void);
215 static void vcache_reinit(void);
216 static void vclean(vnode_t *);
217 static void vrelel(vnode_t *, int);
218 static void vdrain_thread(void *);
219 static void vnpanic(vnode_t *, const char *, ...)
220 __printflike(2, 3);
221
222 /* Routines having to do with the management of the vnode table. */
223 extern struct mount *dead_rootmount;
224 extern int (**dead_vnodeop_p)(void *);
225 extern struct vfsops dead_vfsops;
226
227 /* Vnode state operations and diagnostics. */
228
229 #if defined(DIAGNOSTIC)
230
231 #define VSTATE_GET(vp) \
232 vstate_assert_get((vp), __func__, __LINE__)
233 #define VSTATE_CHANGE(vp, from, to) \
234 vstate_assert_change((vp), (from), (to), __func__, __LINE__)
235 #define VSTATE_WAIT_STABLE(vp) \
236 vstate_assert_wait_stable((vp), __func__, __LINE__)
237 #define VSTATE_ASSERT(vp, state) \
238 vstate_assert((vp), (state), __func__, __LINE__)
239
240 static void
241 vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line)
242 {
243 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
244
245 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line);
246
247 if (__predict_true(vip->vi_state == state))
248 return;
249 vnpanic(vp, "state is %s, expected %s at %s:%d",
250 vstate_name(vip->vi_state), vstate_name(state), func, line);
251 }
252
253 static enum vnode_state
254 vstate_assert_get(vnode_t *vp, const char *func, int line)
255 {
256 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
257
258 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line);
259 if (vip->vi_state == VS_MARKER)
260 vnpanic(vp, "state is %s at %s:%d",
261 vstate_name(vip->vi_state), func, line);
262
263 return vip->vi_state;
264 }
265
266 static void
267 vstate_assert_wait_stable(vnode_t *vp, const char *func, int line)
268 {
269 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
270
271 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line);
272 if (vip->vi_state == VS_MARKER)
273 vnpanic(vp, "state is %s at %s:%d",
274 vstate_name(vip->vi_state), func, line);
275
276 while (vip->vi_state != VS_ACTIVE && vip->vi_state != VS_RECLAIMED)
277 cv_wait(&vp->v_cv, vp->v_interlock);
278
279 if (vip->vi_state == VS_MARKER)
280 vnpanic(vp, "state is %s at %s:%d",
281 vstate_name(vip->vi_state), func, line);
282 }
283
284 static void
285 vstate_assert_change(vnode_t *vp, enum vnode_state from, enum vnode_state to,
286 const char *func, int line)
287 {
288 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
289
290 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line);
291 if (from == VS_LOADING)
292 KASSERTMSG(mutex_owned(&vcache_lock), "at %s:%d", func, line);
293
294 if (from == VS_MARKER)
295 vnpanic(vp, "from is %s at %s:%d",
296 vstate_name(from), func, line);
297 if (to == VS_MARKER)
298 vnpanic(vp, "to is %s at %s:%d",
299 vstate_name(to), func, line);
300 if (vip->vi_state != from)
301 vnpanic(vp, "from is %s, expected %s at %s:%d\n",
302 vstate_name(vip->vi_state), vstate_name(from), func, line);
303 if ((from == VS_BLOCKED || to == VS_BLOCKED) && vp->v_usecount != 1)
304 vnpanic(vp, "%s to %s with usecount %d at %s:%d",
305 vstate_name(from), vstate_name(to), vp->v_usecount,
306 func, line);
307
308 vip->vi_state = to;
309 if (from == VS_LOADING)
310 cv_broadcast(&vcache_cv);
311 if (to == VS_ACTIVE || to == VS_RECLAIMED)
312 cv_broadcast(&vp->v_cv);
313 }
314
315 #else /* defined(DIAGNOSTIC) */
316
317 #define VSTATE_GET(vp) \
318 (VNODE_TO_VIMPL((vp))->vi_state)
319 #define VSTATE_CHANGE(vp, from, to) \
320 vstate_change((vp), (from), (to))
321 #define VSTATE_WAIT_STABLE(vp) \
322 vstate_wait_stable((vp))
323 #define VSTATE_ASSERT(vp, state)
324
325 static void
326 vstate_wait_stable(vnode_t *vp)
327 {
328 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
329
330 while (vip->vi_state != VS_ACTIVE && vip->vi_state != VS_RECLAIMED)
331 cv_wait(&vp->v_cv, vp->v_interlock);
332 }
333
334 static void
335 vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to)
336 {
337 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
338
339 vip->vi_state = to;
340 if (from == VS_LOADING)
341 cv_broadcast(&vcache_cv);
342 if (to == VS_ACTIVE || to == VS_RECLAIMED)
343 cv_broadcast(&vp->v_cv);
344 }
345
346 #endif /* defined(DIAGNOSTIC) */
347
348 void
349 vfs_vnode_sysinit(void)
350 {
351 int error __diagused;
352
353 dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL);
354 KASSERT(dead_rootmount != NULL);
355 dead_rootmount->mnt_iflag = IMNT_MPSAFE;
356
357 mutex_init(&vdrain_lock, MUTEX_DEFAULT, IPL_NONE);
358 TAILQ_INIT(&lru_free_list);
359 TAILQ_INIT(&lru_hold_list);
360 TAILQ_INIT(&lru_vrele_list);
361
362 vcache_init();
363
364 cv_init(&vdrain_cv, "vdrain");
365 cv_init(&vdrain_gen_cv, "vdrainwt");
366 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread,
367 NULL, &vdrain_lwp, "vdrain");
368 KASSERTMSG((error == 0), "kthread_create(vdrain) failed: %d", error);
369 }
370
371 /*
372 * Allocate a new marker vnode.
373 */
374 vnode_t *
375 vnalloc_marker(struct mount *mp)
376 {
377 vnode_impl_t *vip;
378 vnode_t *vp;
379
380 vip = pool_cache_get(vcache_pool, PR_WAITOK);
381 memset(vip, 0, sizeof(*vip));
382 vp = VIMPL_TO_VNODE(vip);
383 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0);
384 vp->v_mount = mp;
385 vp->v_type = VBAD;
386 vip->vi_state = VS_MARKER;
387
388 return vp;
389 }
390
391 /*
392 * Free a marker vnode.
393 */
394 void
395 vnfree_marker(vnode_t *vp)
396 {
397 vnode_impl_t *vip;
398
399 vip = VNODE_TO_VIMPL(vp);
400 KASSERT(vip->vi_state == VS_MARKER);
401 uvm_obj_destroy(&vp->v_uobj, true);
402 pool_cache_put(vcache_pool, vip);
403 }
404
405 /*
406 * Test a vnode for being a marker vnode.
407 */
408 bool
409 vnis_marker(vnode_t *vp)
410 {
411
412 return (VNODE_TO_VIMPL(vp)->vi_state == VS_MARKER);
413 }
414
415 /*
416 * Return the lru list this node should be on.
417 */
418 static vnodelst_t *
419 lru_which(vnode_t *vp)
420 {
421
422 KASSERT(mutex_owned(vp->v_interlock));
423
424 if (vp->v_holdcnt > 0)
425 return &lru_hold_list;
426 else
427 return &lru_free_list;
428 }
429
430 /*
431 * Put vnode to end of given list.
432 * Both the current and the new list may be NULL, used on vnode alloc/free.
433 * Adjust numvnodes and signal vdrain thread if there is work.
434 */
435 static void
436 lru_requeue(vnode_t *vp, vnodelst_t *listhd)
437 {
438 vnode_impl_t *vip;
439
440 mutex_enter(&vdrain_lock);
441 vip = VNODE_TO_VIMPL(vp);
442 if (vip->vi_lrulisthd != NULL)
443 TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist);
444 else
445 numvnodes++;
446 vip->vi_lrulisthd = listhd;
447 if (vip->vi_lrulisthd != NULL)
448 TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist);
449 else
450 numvnodes--;
451 if (numvnodes > desiredvnodes || listhd == &lru_vrele_list)
452 cv_broadcast(&vdrain_cv);
453 mutex_exit(&vdrain_lock);
454 }
455
456 /*
457 * Reclaim a cached vnode. Used from vdrain_thread only.
458 */
459 static __inline void
460 vdrain_remove(vnode_t *vp)
461 {
462 struct mount *mp;
463
464 KASSERT(mutex_owned(&vdrain_lock));
465
466 /* Probe usecount (unlocked). */
467 if (vp->v_usecount > 0)
468 return;
469 /* Try v_interlock -- we lock the wrong direction! */
470 if (!mutex_tryenter(vp->v_interlock))
471 return;
472 /* Probe usecount and state. */
473 if (vp->v_usecount > 0 || VSTATE_GET(vp) != VS_ACTIVE) {
474 mutex_exit(vp->v_interlock);
475 return;
476 }
477 mp = vp->v_mount;
478 if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) {
479 mutex_exit(vp->v_interlock);
480 return;
481 }
482 vdrain_retry = true;
483 mutex_exit(&vdrain_lock);
484
485 if (vcache_vget(vp) == 0) {
486 if (!vrecycle(vp))
487 vrele(vp);
488 }
489 fstrans_done(mp);
490
491 mutex_enter(&vdrain_lock);
492 }
493
494 /*
495 * Release a cached vnode. Used from vdrain_thread only.
496 */
497 static __inline void
498 vdrain_vrele(vnode_t *vp)
499 {
500 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
501 struct mount *mp;
502
503 KASSERT(mutex_owned(&vdrain_lock));
504
505 mp = vp->v_mount;
506 if (fstrans_start_nowait(mp, FSTRANS_LAZY) != 0)
507 return;
508
509 /*
510 * First remove the vnode from the vrele list.
511 * Put it on the last lru list, the last vrele()
512 * will put it back onto the right list before
513 * its v_usecount reaches zero.
514 */
515 KASSERT(vip->vi_lrulisthd == &lru_vrele_list);
516 TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist);
517 vip->vi_lrulisthd = &lru_hold_list;
518 TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist);
519
520 vdrain_retry = true;
521 mutex_exit(&vdrain_lock);
522
523 mutex_enter(vp->v_interlock);
524 vrelel(vp, 0);
525 fstrans_done(mp);
526
527 mutex_enter(&vdrain_lock);
528 }
529
530 /*
531 * Helper thread to keep the number of vnodes below desiredvnodes
532 * and release vnodes from asynchronous vrele.
533 */
534 static void
535 vdrain_thread(void *cookie)
536 {
537 vnodelst_t *listhd[] = {
538 &lru_vrele_list, &lru_free_list, &lru_hold_list
539 };
540 int i;
541 u_int target;
542 vnode_impl_t *vip, *marker;
543
544 marker = VNODE_TO_VIMPL(vnalloc_marker(NULL));
545
546 mutex_enter(&vdrain_lock);
547
548 for (;;) {
549 vdrain_retry = false;
550 target = desiredvnodes - desiredvnodes/10;
551
552 for (i = 0; i < __arraycount(listhd); i++) {
553 TAILQ_INSERT_HEAD(listhd[i], marker, vi_lrulist);
554 while ((vip = TAILQ_NEXT(marker, vi_lrulist))) {
555 TAILQ_REMOVE(listhd[i], marker, vi_lrulist);
556 TAILQ_INSERT_AFTER(listhd[i], vip, marker,
557 vi_lrulist);
558 if (listhd[i] == &lru_vrele_list)
559 vdrain_vrele(VIMPL_TO_VNODE(vip));
560 else if (numvnodes < target)
561 break;
562 else
563 vdrain_remove(VIMPL_TO_VNODE(vip));
564 }
565 TAILQ_REMOVE(listhd[i], marker, vi_lrulist);
566 }
567
568 if (vdrain_retry) {
569 mutex_exit(&vdrain_lock);
570 yield();
571 mutex_enter(&vdrain_lock);
572 } else {
573 vdrain_gen++;
574 cv_broadcast(&vdrain_gen_cv);
575 cv_wait(&vdrain_cv, &vdrain_lock);
576 }
577 }
578 }
579
580 /*
581 * vput: unlock and release the reference.
582 */
583 void
584 vput(vnode_t *vp)
585 {
586
587 VOP_UNLOCK(vp);
588 vrele(vp);
589 }
590
591 /*
592 * Try to drop reference on a vnode. Abort if we are releasing the
593 * last reference. Note: this _must_ succeed if not the last reference.
594 */
595 static inline bool
596 vtryrele(vnode_t *vp)
597 {
598 u_int use, next;
599
600 for (use = vp->v_usecount;; use = next) {
601 if (use == 1) {
602 return false;
603 }
604 KASSERT(use > 1);
605 next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
606 if (__predict_true(next == use)) {
607 return true;
608 }
609 }
610 }
611
612 /*
613 * Vnode release. If reference count drops to zero, call inactive
614 * routine and either return to freelist or free to the pool.
615 */
616 static void
617 vrelel(vnode_t *vp, int flags)
618 {
619 bool recycle, defer;
620 int error;
621
622 KASSERT(mutex_owned(vp->v_interlock));
623
624 if (__predict_false(vp->v_op == dead_vnodeop_p &&
625 VSTATE_GET(vp) != VS_RECLAIMED)) {
626 vnpanic(vp, "dead but not clean");
627 }
628
629 /*
630 * If not the last reference, just drop the reference count
631 * and unlock.
632 */
633 if (vtryrele(vp)) {
634 mutex_exit(vp->v_interlock);
635 return;
636 }
637 if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
638 vnpanic(vp, "%s: bad ref count", __func__);
639 }
640
641 #ifdef DIAGNOSTIC
642 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
643 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
644 vprint("vrelel: missing VOP_CLOSE()", vp);
645 }
646 #endif
647
648 /*
649 * If not clean, deactivate the vnode, but preserve
650 * our reference across the call to VOP_INACTIVE().
651 */
652 if (VSTATE_GET(vp) != VS_RECLAIMED) {
653 recycle = false;
654
655 /*
656 * XXX This ugly block can be largely eliminated if
657 * locking is pushed down into the file systems.
658 *
659 * Defer vnode release to vdrain_thread if caller
660 * requests it explicitly or is the pagedaemon.
661 */
662 if ((curlwp == uvm.pagedaemon_lwp) ||
663 (flags & VRELEL_ASYNC_RELE) != 0) {
664 defer = true;
665 } else if (curlwp == vdrain_lwp) {
666 /*
667 * We have to try harder.
668 */
669 mutex_exit(vp->v_interlock);
670 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
671 KASSERTMSG((error == 0), "vn_lock failed: %d", error);
672 mutex_enter(vp->v_interlock);
673 defer = false;
674 } else {
675 /* If we can't acquire the lock, then defer. */
676 mutex_exit(vp->v_interlock);
677 error = vn_lock(vp,
678 LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT);
679 defer = (error != 0);
680 mutex_enter(vp->v_interlock);
681 }
682
683 KASSERT(mutex_owned(vp->v_interlock));
684 KASSERT(! (curlwp == vdrain_lwp && defer));
685
686 if (defer) {
687 /*
688 * Defer reclaim to the kthread; it's not safe to
689 * clean it here. We donate it our last reference.
690 */
691 lru_requeue(vp, &lru_vrele_list);
692 mutex_exit(vp->v_interlock);
693 return;
694 }
695
696 /*
697 * If the node got another reference while we
698 * released the interlock, don't try to inactivate it yet.
699 */
700 if (__predict_false(vtryrele(vp))) {
701 VOP_UNLOCK(vp);
702 mutex_exit(vp->v_interlock);
703 return;
704 }
705 VSTATE_CHANGE(vp, VS_ACTIVE, VS_BLOCKED);
706 mutex_exit(vp->v_interlock);
707
708 /*
709 * The vnode must not gain another reference while being
710 * deactivated. If VOP_INACTIVE() indicates that
711 * the described file has been deleted, then recycle
712 * the vnode.
713 *
714 * Note that VOP_INACTIVE() will drop the vnode lock.
715 */
716 VOP_INACTIVE(vp, &recycle);
717 if (recycle) {
718 /* vclean() below will drop the lock. */
719 if (vn_lock(vp, LK_EXCLUSIVE) != 0)
720 recycle = false;
721 }
722 mutex_enter(vp->v_interlock);
723 VSTATE_CHANGE(vp, VS_BLOCKED, VS_ACTIVE);
724 if (!recycle) {
725 if (vtryrele(vp)) {
726 mutex_exit(vp->v_interlock);
727 return;
728 }
729 }
730
731 /* Take care of space accounting. */
732 if (vp->v_iflag & VI_EXECMAP) {
733 atomic_add_int(&uvmexp.execpages,
734 -vp->v_uobj.uo_npages);
735 atomic_add_int(&uvmexp.filepages,
736 vp->v_uobj.uo_npages);
737 }
738 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
739 vp->v_vflag &= ~VV_MAPPED;
740
741 /*
742 * Recycle the vnode if the file is now unused (unlinked),
743 * otherwise just free it.
744 */
745 if (recycle) {
746 VSTATE_ASSERT(vp, VS_ACTIVE);
747 vcache_reclaim(vp);
748 }
749 KASSERT(vp->v_usecount > 0);
750 }
751
752 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
753 /* Gained another reference while being reclaimed. */
754 mutex_exit(vp->v_interlock);
755 return;
756 }
757
758 if (VSTATE_GET(vp) == VS_RECLAIMED && vp->v_holdcnt == 0) {
759 /*
760 * It's clean so destroy it. It isn't referenced
761 * anywhere since it has been reclaimed.
762 */
763 vcache_free(VNODE_TO_VIMPL(vp));
764 } else {
765 /*
766 * Otherwise, put it back onto the freelist. It
767 * can't be destroyed while still associated with
768 * a file system.
769 */
770 lru_requeue(vp, lru_which(vp));
771 mutex_exit(vp->v_interlock);
772 }
773 }
774
775 void
776 vrele(vnode_t *vp)
777 {
778
779 if (vtryrele(vp)) {
780 return;
781 }
782 mutex_enter(vp->v_interlock);
783 vrelel(vp, 0);
784 }
785
786 /*
787 * Asynchronous vnode release, vnode is released in different context.
788 */
789 void
790 vrele_async(vnode_t *vp)
791 {
792
793 if (vtryrele(vp)) {
794 return;
795 }
796 mutex_enter(vp->v_interlock);
797 vrelel(vp, VRELEL_ASYNC_RELE);
798 }
799
800 /*
801 * Vnode reference, where a reference is already held by some other
802 * object (for example, a file structure).
803 */
804 void
805 vref(vnode_t *vp)
806 {
807
808 KASSERT(vp->v_usecount != 0);
809
810 atomic_inc_uint(&vp->v_usecount);
811 }
812
813 /*
814 * Page or buffer structure gets a reference.
815 * Called with v_interlock held.
816 */
817 void
818 vholdl(vnode_t *vp)
819 {
820
821 KASSERT(mutex_owned(vp->v_interlock));
822
823 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0)
824 lru_requeue(vp, lru_which(vp));
825 }
826
827 /*
828 * Page or buffer structure frees a reference.
829 * Called with v_interlock held.
830 */
831 void
832 holdrelel(vnode_t *vp)
833 {
834
835 KASSERT(mutex_owned(vp->v_interlock));
836
837 if (vp->v_holdcnt <= 0) {
838 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp);
839 }
840
841 vp->v_holdcnt--;
842 if (vp->v_holdcnt == 0 && vp->v_usecount == 0)
843 lru_requeue(vp, lru_which(vp));
844 }
845
846 /*
847 * Disassociate the underlying file system from a vnode.
848 *
849 * Must be called with vnode locked and will return unlocked.
850 * Must be called with the interlock held, and will return with it held.
851 */
852 static void
853 vclean(vnode_t *vp)
854 {
855 lwp_t *l = curlwp;
856 bool recycle, active;
857 int error;
858
859 KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 ||
860 VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
861 KASSERT(mutex_owned(vp->v_interlock));
862 KASSERT(vp->v_usecount != 0);
863
864 active = (vp->v_usecount > 1);
865 /*
866 * Prevent the vnode from being recycled or brought into use
867 * while we clean it out.
868 */
869 VSTATE_CHANGE(vp, VN_ACTIVE, VN_RECLAIMING);
870 if (vp->v_iflag & VI_EXECMAP) {
871 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
872 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
873 }
874 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
875 mutex_exit(vp->v_interlock);
876
877 /*
878 * Clean out any cached data associated with the vnode.
879 * If purging an active vnode, it must be closed and
880 * deactivated before being reclaimed. Note that the
881 * VOP_INACTIVE will unlock the vnode.
882 */
883 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
884 if (error != 0) {
885 if (wapbl_vphaswapbl(vp))
886 WAPBL_DISCARD(wapbl_vptomp(vp));
887 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
888 }
889 KASSERTMSG((error == 0), "vinvalbuf failed: %d", error);
890 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
891 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
892 spec_node_revoke(vp);
893 }
894 if (active) {
895 VOP_INACTIVE(vp, &recycle);
896 } else {
897 /*
898 * Any other processes trying to obtain this lock must first
899 * wait for VN_RECLAIMED, then call the new lock operation.
900 */
901 VOP_UNLOCK(vp);
902 }
903
904 /* Disassociate the underlying file system from the vnode. */
905 if (VOP_RECLAIM(vp)) {
906 vnpanic(vp, "%s: cannot reclaim", __func__);
907 }
908
909 KASSERT(vp->v_data == NULL);
910 KASSERT(vp->v_uobj.uo_npages == 0);
911
912 if (vp->v_type == VREG && vp->v_ractx != NULL) {
913 uvm_ra_freectx(vp->v_ractx);
914 vp->v_ractx = NULL;
915 }
916
917 /* Purge name cache. */
918 cache_purge(vp);
919
920 /* Move to dead mount. */
921 vp->v_vflag &= ~VV_ROOT;
922 atomic_inc_uint(&dead_rootmount->mnt_refcnt);
923 vfs_insmntque(vp, dead_rootmount);
924
925 /* Done with purge, notify sleepers of the grim news. */
926 mutex_enter(vp->v_interlock);
927 vp->v_op = dead_vnodeop_p;
928 vp->v_vflag |= VV_LOCKSWORK;
929 VSTATE_CHANGE(vp, VN_RECLAIMING, VN_RECLAIMED);
930 vp->v_tag = VT_NON;
931 KNOTE(&vp->v_klist, NOTE_REVOKE);
932
933 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
934 }
935
936 /*
937 * Recycle an unused vnode if caller holds the last reference.
938 */
939 bool
940 vrecycle(vnode_t *vp)
941 {
942 int error __diagused;
943
944 mutex_enter(vp->v_interlock);
945
946 /* Make sure we hold the last reference. */
947 VSTATE_WAIT_STABLE(vp);
948 if (vp->v_usecount != 1) {
949 mutex_exit(vp->v_interlock);
950 return false;
951 }
952
953 /* If the vnode is already clean we're done. */
954 if (VSTATE_GET(vp) != VS_ACTIVE) {
955 VSTATE_ASSERT(vp, VS_RECLAIMED);
956 vrelel(vp, 0);
957 return true;
958 }
959
960 /* Prevent further references until the vnode is locked. */
961 VSTATE_CHANGE(vp, VS_ACTIVE, VS_BLOCKED);
962 mutex_exit(vp->v_interlock);
963
964 error = vn_lock(vp, LK_EXCLUSIVE);
965 KASSERT(error == 0);
966
967 mutex_enter(vp->v_interlock);
968 VSTATE_CHANGE(vp, VS_BLOCKED, VS_ACTIVE);
969
970 KASSERT(vp->v_usecount == 1);
971 vcache_reclaim(vp);
972 vrelel(vp, 0);
973
974 return true;
975 }
976
977 /*
978 * Eliminate all activity associated with the requested vnode
979 * and with all vnodes aliased to the requested vnode.
980 */
981 void
982 vrevoke(vnode_t *vp)
983 {
984 vnode_t *vq;
985 enum vtype type;
986 dev_t dev;
987
988 KASSERT(vp->v_usecount > 0);
989
990 mutex_enter(vp->v_interlock);
991 VSTATE_WAIT_STABLE(vp);
992 if (VSTATE_GET(vp) == VS_RECLAIMED) {
993 mutex_exit(vp->v_interlock);
994 return;
995 } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
996 atomic_inc_uint(&vp->v_usecount);
997 mutex_exit(vp->v_interlock);
998 vgone(vp);
999 return;
1000 } else {
1001 dev = vp->v_rdev;
1002 type = vp->v_type;
1003 mutex_exit(vp->v_interlock);
1004 }
1005
1006 while (spec_node_lookup_by_dev(type, dev, &vq) == 0) {
1007 vgone(vq);
1008 }
1009 }
1010
1011 /*
1012 * Eliminate all activity associated with a vnode in preparation for
1013 * reuse. Drops a reference from the vnode.
1014 */
1015 void
1016 vgone(vnode_t *vp)
1017 {
1018
1019 if (vn_lock(vp, LK_EXCLUSIVE) != 0) {
1020 VSTATE_ASSERT(vp, VS_RECLAIMED);
1021 vrele(vp);
1022 }
1023
1024 mutex_enter(vp->v_interlock);
1025 vclean(vp);
1026 vrelel(vp, 0);
1027 }
1028
1029 static inline uint32_t
1030 vcache_hash(const struct vcache_key *key)
1031 {
1032 uint32_t hash = HASH32_BUF_INIT;
1033
1034 hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash);
1035 hash = hash32_buf(key->vk_key, key->vk_key_len, hash);
1036 return hash;
1037 }
1038
1039 static void
1040 vcache_init(void)
1041 {
1042
1043 vcache_pool = pool_cache_init(sizeof(vnode_impl_t), 0, 0, 0,
1044 "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL);
1045 KASSERT(vcache_pool != NULL);
1046 mutex_init(&vcache_lock, MUTEX_DEFAULT, IPL_NONE);
1047 cv_init(&vcache_cv, "vcache");
1048 vcache_hashsize = desiredvnodes;
1049 vcache_hashtab = hashinit(desiredvnodes, HASH_SLIST, true,
1050 &vcache_hashmask);
1051 }
1052
1053 static void
1054 vcache_reinit(void)
1055 {
1056 int i;
1057 uint32_t hash;
1058 u_long oldmask, newmask;
1059 struct hashhead *oldtab, *newtab;
1060 vnode_impl_t *vip;
1061
1062 newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask);
1063 mutex_enter(&vcache_lock);
1064 oldtab = vcache_hashtab;
1065 oldmask = vcache_hashmask;
1066 vcache_hashsize = desiredvnodes;
1067 vcache_hashtab = newtab;
1068 vcache_hashmask = newmask;
1069 for (i = 0; i <= oldmask; i++) {
1070 while ((vip = SLIST_FIRST(&oldtab[i])) != NULL) {
1071 SLIST_REMOVE(&oldtab[i], vip, vnode_impl, vi_hash);
1072 hash = vcache_hash(&vip->vi_key);
1073 SLIST_INSERT_HEAD(&newtab[hash & vcache_hashmask],
1074 vip, vi_hash);
1075 }
1076 }
1077 mutex_exit(&vcache_lock);
1078 hashdone(oldtab, HASH_SLIST, oldmask);
1079 }
1080
1081 static inline vnode_impl_t *
1082 vcache_hash_lookup(const struct vcache_key *key, uint32_t hash)
1083 {
1084 struct hashhead *hashp;
1085 vnode_impl_t *vip;
1086
1087 KASSERT(mutex_owned(&vcache_lock));
1088
1089 hashp = &vcache_hashtab[hash & vcache_hashmask];
1090 SLIST_FOREACH(vip, hashp, vi_hash) {
1091 if (key->vk_mount != vip->vi_key.vk_mount)
1092 continue;
1093 if (key->vk_key_len != vip->vi_key.vk_key_len)
1094 continue;
1095 if (memcmp(key->vk_key, vip->vi_key.vk_key, key->vk_key_len))
1096 continue;
1097 return vip;
1098 }
1099 return NULL;
1100 }
1101
1102 /*
1103 * Allocate a new, uninitialized vcache node.
1104 */
1105 static vnode_impl_t *
1106 vcache_alloc(void)
1107 {
1108 vnode_impl_t *vip;
1109 vnode_t *vp;
1110
1111 vip = pool_cache_get(vcache_pool, PR_WAITOK);
1112 memset(vip, 0, sizeof(*vip));
1113
1114 /* SLIST_INIT(&vip->vi_hash); */
1115
1116 vp = VIMPL_TO_VNODE(vip);
1117 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0);
1118 cv_init(&vp->v_cv, "vnode");
1119 /* LIST_INIT(&vp->v_nclist); */
1120 /* LIST_INIT(&vp->v_dnclist); */
1121
1122 rw_init(&vp->v_lock);
1123 vp->v_usecount = 1;
1124 vp->v_type = VNON;
1125 vp->v_size = vp->v_writesize = VSIZENOTSET;
1126
1127 vip->vi_state = VS_LOADING;
1128
1129 lru_requeue(vp, &lru_free_list);
1130
1131 return vip;
1132 }
1133
1134 /*
1135 * Free an unused, unreferenced vcache node.
1136 * v_interlock locked on entry.
1137 */
1138 static void
1139 vcache_free(vnode_impl_t *vip)
1140 {
1141 vnode_t *vp;
1142
1143 vp = VIMPL_TO_VNODE(vip);
1144 KASSERT(mutex_owned(vp->v_interlock));
1145
1146 KASSERT(vp->v_usecount == 0);
1147 KASSERT(vp->v_holdcnt == 0);
1148 KASSERT(vp->v_writecount == 0);
1149 lru_requeue(vp, NULL);
1150 mutex_exit(vp->v_interlock);
1151
1152 vfs_insmntque(vp, NULL);
1153 if (vp->v_type == VBLK || vp->v_type == VCHR)
1154 spec_node_destroy(vp);
1155
1156 rw_destroy(&vp->v_lock);
1157 uvm_obj_destroy(&vp->v_uobj, true);
1158 cv_destroy(&vp->v_cv);
1159 pool_cache_put(vcache_pool, vip);
1160 }
1161
1162 /*
1163 * Try to get an initial reference on this cached vnode.
1164 * Returns zero on success, ENOENT if the vnode has been reclaimed and
1165 * EBUSY if the vnode state is unstable.
1166 *
1167 * v_interlock locked on entry and unlocked on exit.
1168 */
1169 int
1170 vcache_tryvget(vnode_t *vp)
1171 {
1172 int error = 0;
1173
1174 KASSERT(mutex_owned(vp->v_interlock));
1175
1176 if (__predict_false(VSTATE_GET(vp) == VS_RECLAIMED))
1177 error = ENOENT;
1178 else if (__predict_false(VSTATE_GET(vp) != VS_ACTIVE))
1179 error = EBUSY;
1180 else if (vp->v_usecount == 0)
1181 vp->v_usecount = 1;
1182 else
1183 atomic_inc_uint(&vp->v_usecount);
1184
1185 mutex_exit(vp->v_interlock);
1186
1187 return error;
1188 }
1189
1190 /*
1191 * Try to get an initial reference on this cached vnode.
1192 * Returns zero on success and ENOENT if the vnode has been reclaimed.
1193 * Will wait for the vnode state to be stable.
1194 *
1195 * v_interlock locked on entry and unlocked on exit.
1196 */
1197 int
1198 vcache_vget(vnode_t *vp)
1199 {
1200
1201 KASSERT(mutex_owned(vp->v_interlock));
1202
1203 /* Increment hold count to prevent vnode from disappearing. */
1204 vp->v_holdcnt++;
1205 VSTATE_WAIT_STABLE(vp);
1206 vp->v_holdcnt--;
1207
1208 /* If this was the last reference to a reclaimed vnode free it now. */
1209 if (__predict_false(VSTATE_GET(vp) == VS_RECLAIMED)) {
1210 if (vp->v_holdcnt == 0 && vp->v_usecount == 0)
1211 vcache_free(VNODE_TO_VIMPL(vp));
1212 else
1213 mutex_exit(vp->v_interlock);
1214 return ENOENT;
1215 }
1216 VSTATE_ASSERT(vp, VS_ACTIVE);
1217 if (vp->v_usecount == 0)
1218 vp->v_usecount = 1;
1219 else
1220 atomic_inc_uint(&vp->v_usecount);
1221
1222 mutex_exit(vp->v_interlock);
1223
1224 return 0;
1225 }
1226
1227 /*
1228 * Get a vnode / fs node pair by key and return it referenced through vpp.
1229 */
1230 int
1231 vcache_get(struct mount *mp, const void *key, size_t key_len,
1232 struct vnode **vpp)
1233 {
1234 int error;
1235 uint32_t hash;
1236 const void *new_key;
1237 struct vnode *vp;
1238 struct vcache_key vcache_key;
1239 vnode_impl_t *vip, *new_vip;
1240
1241 new_key = NULL;
1242 *vpp = NULL;
1243
1244 vcache_key.vk_mount = mp;
1245 vcache_key.vk_key = key;
1246 vcache_key.vk_key_len = key_len;
1247 hash = vcache_hash(&vcache_key);
1248
1249 again:
1250 mutex_enter(&vcache_lock);
1251 vip = vcache_hash_lookup(&vcache_key, hash);
1252
1253 /* If found, take a reference or retry. */
1254 if (__predict_true(vip != NULL)) {
1255 /*
1256 * If the vnode is loading we cannot take the v_interlock
1257 * here as it might change during load (see uvm_obj_setlock()).
1258 * As changing state from VS_LOADING requires both vcache_lock
1259 * and v_interlock it is safe to test with vcache_lock held.
1260 *
1261 * Wait for vnodes changing state from VS_LOADING and retry.
1262 */
1263 if (__predict_false(vip->vi_state == VS_LOADING)) {
1264 cv_wait(&vcache_cv, &vcache_lock);
1265 mutex_exit(&vcache_lock);
1266 goto again;
1267 }
1268 vp = VIMPL_TO_VNODE(vip);
1269 mutex_enter(vp->v_interlock);
1270 mutex_exit(&vcache_lock);
1271 error = vcache_vget(vp);
1272 if (error == ENOENT)
1273 goto again;
1274 if (error == 0)
1275 *vpp = vp;
1276 KASSERT((error != 0) == (*vpp == NULL));
1277 return error;
1278 }
1279 mutex_exit(&vcache_lock);
1280
1281 /* Allocate and initialize a new vcache / vnode pair. */
1282 error = vfs_busy(mp, NULL);
1283 if (error)
1284 return error;
1285 new_vip = vcache_alloc();
1286 new_vip->vi_key = vcache_key;
1287 vp = VIMPL_TO_VNODE(new_vip);
1288 mutex_enter(&vcache_lock);
1289 vip = vcache_hash_lookup(&vcache_key, hash);
1290 if (vip == NULL) {
1291 SLIST_INSERT_HEAD(&vcache_hashtab[hash & vcache_hashmask],
1292 new_vip, vi_hash);
1293 vip = new_vip;
1294 }
1295
1296 /* If another thread beat us inserting this node, retry. */
1297 if (vip != new_vip) {
1298 mutex_enter(vp->v_interlock);
1299 VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED);
1300 mutex_exit(&vcache_lock);
1301 vrelel(vp, 0);
1302 vfs_unbusy(mp, false, NULL);
1303 goto again;
1304 }
1305 mutex_exit(&vcache_lock);
1306
1307 /* Load the fs node. Exclusive as new_node is VS_LOADING. */
1308 error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key);
1309 if (error) {
1310 mutex_enter(&vcache_lock);
1311 SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask],
1312 new_vip, vnode_impl, vi_hash);
1313 mutex_enter(vp->v_interlock);
1314 VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED);
1315 mutex_exit(&vcache_lock);
1316 vrelel(vp, 0);
1317 vfs_unbusy(mp, false, NULL);
1318 KASSERT(*vpp == NULL);
1319 return error;
1320 }
1321 KASSERT(new_key != NULL);
1322 KASSERT(memcmp(key, new_key, key_len) == 0);
1323 KASSERT(vp->v_op != NULL);
1324 vfs_insmntque(vp, mp);
1325 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
1326 vp->v_vflag |= VV_MPSAFE;
1327 vfs_unbusy(mp, true, NULL);
1328
1329 /* Finished loading, finalize node. */
1330 mutex_enter(&vcache_lock);
1331 new_vip->vi_key.vk_key = new_key;
1332 mutex_enter(vp->v_interlock);
1333 VSTATE_CHANGE(vp, VS_LOADING, VS_ACTIVE);
1334 mutex_exit(vp->v_interlock);
1335 mutex_exit(&vcache_lock);
1336 *vpp = vp;
1337 return 0;
1338 }
1339
1340 /*
1341 * Create a new vnode / fs node pair and return it referenced through vpp.
1342 */
1343 int
1344 vcache_new(struct mount *mp, struct vnode *dvp, struct vattr *vap,
1345 kauth_cred_t cred, struct vnode **vpp)
1346 {
1347 int error;
1348 uint32_t hash;
1349 struct vnode *vp, *ovp;
1350 vnode_impl_t *vip, *ovip;
1351
1352 *vpp = NULL;
1353
1354 /* Allocate and initialize a new vcache / vnode pair. */
1355 error = vfs_busy(mp, NULL);
1356 if (error)
1357 return error;
1358 vip = vcache_alloc();
1359 vip->vi_key.vk_mount = mp;
1360 vp = VIMPL_TO_VNODE(vip);
1361
1362 /* Create and load the fs node. */
1363 error = VFS_NEWVNODE(mp, dvp, vp, vap, cred,
1364 &vip->vi_key.vk_key_len, &vip->vi_key.vk_key);
1365 if (error) {
1366 mutex_enter(&vcache_lock);
1367 mutex_enter(vp->v_interlock);
1368 VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED);
1369 mutex_exit(&vcache_lock);
1370 vrelel(vp, 0);
1371 vfs_unbusy(mp, false, NULL);
1372 KASSERT(*vpp == NULL);
1373 return error;
1374 }
1375 KASSERT(vip->vi_key.vk_key != NULL);
1376 KASSERT(vp->v_op != NULL);
1377 hash = vcache_hash(&vip->vi_key);
1378
1379 /* Wait for previous instance to be reclaimed, then insert new node. */
1380 mutex_enter(&vcache_lock);
1381 while ((ovip = vcache_hash_lookup(&vip->vi_key, hash))) {
1382 ovp = VIMPL_TO_VNODE(ovip);
1383 mutex_enter(ovp->v_interlock);
1384 mutex_exit(&vcache_lock);
1385 error = vcache_vget(ovp);
1386 KASSERT(error == ENOENT);
1387 mutex_enter(&vcache_lock);
1388 }
1389 SLIST_INSERT_HEAD(&vcache_hashtab[hash & vcache_hashmask],
1390 vip, vi_hash);
1391 mutex_exit(&vcache_lock);
1392 vfs_insmntque(vp, mp);
1393 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
1394 vp->v_vflag |= VV_MPSAFE;
1395 vfs_unbusy(mp, true, NULL);
1396
1397 /* Finished loading, finalize node. */
1398 mutex_enter(&vcache_lock);
1399 mutex_enter(vp->v_interlock);
1400 VSTATE_CHANGE(vp, VS_LOADING, VS_ACTIVE);
1401 mutex_exit(&vcache_lock);
1402 mutex_exit(vp->v_interlock);
1403 *vpp = vp;
1404 return 0;
1405 }
1406
1407 /*
1408 * Prepare key change: update old cache nodes key and lock new cache node.
1409 * Return an error if the new node already exists.
1410 */
1411 int
1412 vcache_rekey_enter(struct mount *mp, struct vnode *vp,
1413 const void *old_key, size_t old_key_len,
1414 const void *new_key, size_t new_key_len)
1415 {
1416 uint32_t old_hash, new_hash;
1417 struct vcache_key old_vcache_key, new_vcache_key;
1418 vnode_impl_t *vip, *new_vip;
1419 struct vnode *new_vp;
1420
1421 old_vcache_key.vk_mount = mp;
1422 old_vcache_key.vk_key = old_key;
1423 old_vcache_key.vk_key_len = old_key_len;
1424 old_hash = vcache_hash(&old_vcache_key);
1425
1426 new_vcache_key.vk_mount = mp;
1427 new_vcache_key.vk_key = new_key;
1428 new_vcache_key.vk_key_len = new_key_len;
1429 new_hash = vcache_hash(&new_vcache_key);
1430
1431 new_vip = vcache_alloc();
1432 new_vip->vi_key = new_vcache_key;
1433 new_vp = VIMPL_TO_VNODE(new_vip);
1434
1435 /* Insert locked new node used as placeholder. */
1436 mutex_enter(&vcache_lock);
1437 vip = vcache_hash_lookup(&new_vcache_key, new_hash);
1438 if (vip != NULL) {
1439 mutex_enter(new_vp->v_interlock);
1440 VSTATE_CHANGE(new_vp, VS_LOADING, VS_RECLAIMED);
1441 mutex_exit(&vcache_lock);
1442 vrelel(new_vp, 0);
1443 return EEXIST;
1444 }
1445 SLIST_INSERT_HEAD(&vcache_hashtab[new_hash & vcache_hashmask],
1446 new_vip, vi_hash);
1447
1448 /* Replace old nodes key with the temporary copy. */
1449 vip = vcache_hash_lookup(&old_vcache_key, old_hash);
1450 KASSERT(vip != NULL);
1451 KASSERT(VIMPL_TO_VNODE(vip) == vp);
1452 KASSERT(vip->vi_key.vk_key != old_vcache_key.vk_key);
1453 vip->vi_key = old_vcache_key;
1454 mutex_exit(&vcache_lock);
1455 return 0;
1456 }
1457
1458 /*
1459 * Key change complete: update old node and remove placeholder.
1460 */
1461 void
1462 vcache_rekey_exit(struct mount *mp, struct vnode *vp,
1463 const void *old_key, size_t old_key_len,
1464 const void *new_key, size_t new_key_len)
1465 {
1466 uint32_t old_hash, new_hash;
1467 struct vcache_key old_vcache_key, new_vcache_key;
1468 vnode_impl_t *vip, *new_vip;
1469 struct vnode *new_vp;
1470
1471 old_vcache_key.vk_mount = mp;
1472 old_vcache_key.vk_key = old_key;
1473 old_vcache_key.vk_key_len = old_key_len;
1474 old_hash = vcache_hash(&old_vcache_key);
1475
1476 new_vcache_key.vk_mount = mp;
1477 new_vcache_key.vk_key = new_key;
1478 new_vcache_key.vk_key_len = new_key_len;
1479 new_hash = vcache_hash(&new_vcache_key);
1480
1481 mutex_enter(&vcache_lock);
1482
1483 /* Lookup old and new node. */
1484 vip = vcache_hash_lookup(&old_vcache_key, old_hash);
1485 KASSERT(vip != NULL);
1486 KASSERT(VIMPL_TO_VNODE(vip) == vp);
1487
1488 new_vip = vcache_hash_lookup(&new_vcache_key, new_hash);
1489 KASSERT(new_vip != NULL);
1490 KASSERT(new_vip->vi_key.vk_key_len == new_key_len);
1491 new_vp = VIMPL_TO_VNODE(new_vip);
1492 mutex_enter(new_vp->v_interlock);
1493 VSTATE_ASSERT(VIMPL_TO_VNODE(new_vip), VS_LOADING);
1494
1495 /* Rekey old node and put it onto its new hashlist. */
1496 vip->vi_key = new_vcache_key;
1497 if (old_hash != new_hash) {
1498 SLIST_REMOVE(&vcache_hashtab[old_hash & vcache_hashmask],
1499 vip, vnode_impl, vi_hash);
1500 SLIST_INSERT_HEAD(&vcache_hashtab[new_hash & vcache_hashmask],
1501 vip, vi_hash);
1502 }
1503
1504 /* Remove new node used as placeholder. */
1505 SLIST_REMOVE(&vcache_hashtab[new_hash & vcache_hashmask],
1506 new_vip, vnode_impl, vi_hash);
1507 VSTATE_CHANGE(new_vp, VS_LOADING, VS_RECLAIMED);
1508 mutex_exit(&vcache_lock);
1509 vrelel(new_vp, 0);
1510 }
1511
1512 /*
1513 * Remove a vnode / fs node pair from the cache.
1514 */
1515 void
1516 vcache_remove(struct mount *mp, const void *key, size_t key_len)
1517 {
1518 lwp_t *l = curlwp;
1519 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
1520 uint32_t hash;
1521 struct vcache_key vcache_key;
1522 struct vcache_node *node;
1523
1524 vcache_key.vk_mount = mp;
1525 vcache_key.vk_key = key;
1526 vcache_key.vk_key_len = key_len;
1527 hash = vcache_hash(&vcache_key);
1528
1529 active = (vp->v_usecount > 1);
1530 temp_key_len = vip->vi_key.vk_key_len;
1531 /*
1532 * Prevent the vnode from being recycled or brought into use
1533 * while we clean it out.
1534 */
1535 VSTATE_CHANGE(vp, VS_ACTIVE, VS_RECLAIMING);
1536 if (vp->v_iflag & VI_EXECMAP) {
1537 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
1538 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
1539 }
1540 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
1541 mutex_exit(vp->v_interlock);
1542
1543 /* Replace the vnode key with a temporary copy. */
1544 if (vip->vi_key.vk_key_len > sizeof(temp_buf)) {
1545 temp_key = kmem_alloc(temp_key_len, KM_SLEEP);
1546 } else {
1547 temp_key = temp_buf;
1548 }
1549 mutex_enter(&vcache_lock);
1550 memcpy(temp_key, vip->vi_key.vk_key, temp_key_len);
1551 vip->vi_key.vk_key = temp_key;
1552 mutex_exit(&vcache_lock);
1553
1554 /*
1555 * Clean out any cached data associated with the vnode.
1556 * If purging an active vnode, it must be closed and
1557 * deactivated before being reclaimed.
1558 */
1559 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1560 if (error != 0) {
1561 if (wapbl_vphaswapbl(vp))
1562 WAPBL_DISCARD(wapbl_vptomp(vp));
1563 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1564 }
1565 KASSERTMSG((error == 0), "vinvalbuf failed: %d", error);
1566 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1567 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
1568 spec_node_revoke(vp);
1569 }
1570
1571 /*
1572 * Disassociate the underlying file system from the vnode.
1573 * Note that the VOP_INACTIVE will unlock the vnode.
1574 */
1575 VOP_INACTIVE(vp, &recycle);
1576 if (VOP_RECLAIM(vp)) {
1577 vnpanic(vp, "%s: cannot reclaim", __func__);
1578 }
1579
1580 KASSERT(vp->v_data == NULL);
1581 KASSERT(vp->v_uobj.uo_npages == 0);
1582
1583 if (vp->v_type == VREG && vp->v_ractx != NULL) {
1584 uvm_ra_freectx(vp->v_ractx);
1585 vp->v_ractx = NULL;
1586 }
1587
1588 /* Purge name cache. */
1589 cache_purge(vp);
1590
1591 /* Move to dead mount. */
1592 vp->v_vflag &= ~VV_ROOT;
1593 atomic_inc_uint(&dead_rootmount->mnt_refcnt);
1594 vfs_insmntque(vp, dead_rootmount);
1595
1596 /* Remove from vnode cache. */
1597 hash = vcache_hash(&vip->vi_key);
1598 mutex_enter(&vcache_lock);
1599 KASSERT(vip == vcache_hash_lookup(&vip->vi_key, hash));
1600 SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask],
1601 vip, vnode_impl, vi_hash);
1602 mutex_exit(&vcache_lock);
1603 if (temp_key != temp_buf)
1604 kmem_free(temp_key, temp_key_len);
1605
1606 /* Done with purge, notify sleepers of the grim news. */
1607 mutex_enter(vp->v_interlock);
1608 vp->v_op = dead_vnodeop_p;
1609 vp->v_vflag |= VV_LOCKSWORK;
1610 VSTATE_CHANGE(vp, VS_RECLAIMING, VS_RECLAIMED);
1611 vp->v_tag = VT_NON;
1612 KNOTE(&vp->v_klist, NOTE_REVOKE);
1613
1614 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1615 }
1616
1617 /*
1618 * Update outstanding I/O count and do wakeup if requested.
1619 */
1620 void
1621 vwakeup(struct buf *bp)
1622 {
1623 vnode_t *vp;
1624
1625 if ((vp = bp->b_vp) == NULL)
1626 return;
1627
1628 KASSERT(bp->b_objlock == vp->v_interlock);
1629 KASSERT(mutex_owned(bp->b_objlock));
1630
1631 if (--vp->v_numoutput < 0)
1632 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp);
1633 if (vp->v_numoutput == 0)
1634 cv_broadcast(&vp->v_cv);
1635 }
1636
1637 /*
1638 * Test a vnode for being or becoming dead. Returns one of:
1639 * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only.
1640 * ENOENT: vnode is dead.
1641 * 0: otherwise.
1642 *
1643 * Whenever this function returns a non-zero value all future
1644 * calls will also return a non-zero value.
1645 */
1646 int
1647 vdead_check(struct vnode *vp, int flags)
1648 {
1649
1650 KASSERT(mutex_owned(vp->v_interlock));
1651
1652 if (! ISSET(flags, VDEAD_NOWAIT))
1653 VSTATE_WAIT_STABLE(vp);
1654
1655 if (VSTATE_GET(vp) == VS_RECLAIMING) {
1656 KASSERT(ISSET(flags, VDEAD_NOWAIT));
1657 return EBUSY;
1658 } else if (VSTATE_GET(vp) == VS_RECLAIMED) {
1659 return ENOENT;
1660 }
1661
1662 return 0;
1663 }
1664
1665 int
1666 vfs_drainvnodes(void)
1667 {
1668 int i, gen;
1669
1670 mutex_enter(&vdrain_lock);
1671 for (i = 0; i < 2; i++) {
1672 gen = vdrain_gen;
1673 while (gen == vdrain_gen) {
1674 cv_broadcast(&vdrain_cv);
1675 cv_wait(&vdrain_gen_cv, &vdrain_lock);
1676 }
1677 }
1678 mutex_exit(&vdrain_lock);
1679
1680 if (numvnodes >= desiredvnodes)
1681 return EBUSY;
1682
1683 if (vcache_hashsize != desiredvnodes)
1684 vcache_reinit();
1685
1686 return 0;
1687 }
1688
1689 void
1690 vnpanic(vnode_t *vp, const char *fmt, ...)
1691 {
1692 va_list ap;
1693
1694 #ifdef DIAGNOSTIC
1695 vprint(NULL, vp);
1696 #endif
1697 va_start(ap, fmt);
1698 vpanic(fmt, ap);
1699 va_end(ap);
1700 }
1701