vfs_vnode.c revision 1.47 1 /* $NetBSD: vfs_vnode.c,v 1.47 2016/04/22 15:01:54 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1989, 1993
35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
67 */
68
69 /*
70 * The vnode cache subsystem.
71 *
72 * Life-cycle
73 *
74 * Normally, there are two points where new vnodes are created:
75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode
76 * starts in one of the following ways:
77 *
78 * - Allocation, via vcache_get(9) or vcache_new(9).
79 * - Reclamation of inactive vnode, via vget(9).
80 *
81 * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9)
82 * was another, traditional way. Currently, only the draining thread
83 * recycles the vnodes. This behaviour might be revisited.
84 *
85 * The life-cycle ends when the last reference is dropped, usually
86 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform
87 * the file system that vnode is inactive. Via this call, file system
88 * indicates whether vnode can be recycled (usually, it checks its own
89 * references, e.g. count of links, whether the file was removed).
90 *
91 * Depending on indication, vnode can be put into a free list (cache),
92 * or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate
93 * underlying file system from the vnode, and finally destroyed.
94 *
95 * Reference counting
96 *
97 * Vnode is considered active, if reference count (vnode_t::v_usecount)
98 * is non-zero. It is maintained using: vref(9) and vrele(9), as well
99 * as vput(9), routines. Common points holding references are e.g.
100 * file openings, current working directory, mount points, etc.
101 *
102 * Note on v_usecount and its locking
103 *
104 * At nearly all points it is known that v_usecount could be zero,
105 * the vnode_t::v_interlock will be held. To change v_usecount away
106 * from zero, the interlock must be held. To change from a non-zero
107 * value to zero, again the interlock must be held.
108 *
109 * Changing the usecount from a non-zero value to a non-zero value can
110 * safely be done using atomic operations, without the interlock held.
111 *
112 * Note: if VI_CLEAN is set, vnode_t::v_interlock will be released while
113 * mntvnode_lock is still held.
114 *
115 * See PR 41374.
116 */
117
118 #include <sys/cdefs.h>
119 __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.47 2016/04/22 15:01:54 riastradh Exp $");
120
121 #define _VFS_VNODE_PRIVATE
122
123 #include <sys/param.h>
124 #include <sys/kernel.h>
125
126 #include <sys/atomic.h>
127 #include <sys/buf.h>
128 #include <sys/conf.h>
129 #include <sys/device.h>
130 #include <sys/hash.h>
131 #include <sys/kauth.h>
132 #include <sys/kmem.h>
133 #include <sys/kthread.h>
134 #include <sys/module.h>
135 #include <sys/mount.h>
136 #include <sys/namei.h>
137 #include <sys/syscallargs.h>
138 #include <sys/sysctl.h>
139 #include <sys/systm.h>
140 #include <sys/vnode.h>
141 #include <sys/wapbl.h>
142 #include <sys/fstrans.h>
143
144 #include <uvm/uvm.h>
145 #include <uvm/uvm_readahead.h>
146
147 /* Flags to vrelel. */
148 #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */
149 #define VRELEL_CHANGING_SET 0x0002 /* VI_CHANGING set by caller. */
150
151 struct vcache_key {
152 struct mount *vk_mount;
153 const void *vk_key;
154 size_t vk_key_len;
155 };
156 struct vcache_node {
157 SLIST_ENTRY(vcache_node) vn_hash;
158 struct vnode *vn_vnode;
159 struct vcache_key vn_key;
160 };
161
162 u_int numvnodes __cacheline_aligned;
163
164 static pool_cache_t vnode_cache __read_mostly;
165
166 /*
167 * There are two free lists: one is for vnodes which have no buffer/page
168 * references and one for those which do (i.e. v_holdcnt is non-zero).
169 * Vnode recycling mechanism first attempts to look into the former list.
170 */
171 static kmutex_t vnode_free_list_lock __cacheline_aligned;
172 static vnodelst_t vnode_free_list __cacheline_aligned;
173 static vnodelst_t vnode_hold_list __cacheline_aligned;
174 static kcondvar_t vdrain_cv __cacheline_aligned;
175
176 static vnodelst_t vrele_list __cacheline_aligned;
177 static kmutex_t vrele_lock __cacheline_aligned;
178 static kcondvar_t vrele_cv __cacheline_aligned;
179 static lwp_t * vrele_lwp __cacheline_aligned;
180 static int vrele_pending __cacheline_aligned;
181 static int vrele_gen __cacheline_aligned;
182
183 SLIST_HEAD(hashhead, vcache_node);
184 static struct {
185 kmutex_t lock;
186 u_long hashmask;
187 struct hashhead *hashtab;
188 pool_cache_t pool;
189 } vcache __cacheline_aligned;
190
191 static int cleanvnode(void);
192 static void vcache_init(void);
193 static void vcache_reinit(void);
194 static void vclean(vnode_t *);
195 static void vrelel(vnode_t *, int);
196 static void vdrain_thread(void *);
197 static void vrele_thread(void *);
198 static void vnpanic(vnode_t *, const char *, ...)
199 __printflike(2, 3);
200 static void vwait(vnode_t *, int);
201
202 /* Routines having to do with the management of the vnode table. */
203 extern struct mount *dead_rootmount;
204 extern int (**dead_vnodeop_p)(void *);
205 extern struct vfsops dead_vfsops;
206
207 void
208 vfs_vnode_sysinit(void)
209 {
210 int error __diagused;
211
212 vnode_cache = pool_cache_init(sizeof(vnode_t), 0, 0, 0, "vnodepl",
213 NULL, IPL_NONE, NULL, NULL, NULL);
214 KASSERT(vnode_cache != NULL);
215
216 dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL);
217 KASSERT(dead_rootmount != NULL);
218 dead_rootmount->mnt_iflag = IMNT_MPSAFE;
219
220 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
221 TAILQ_INIT(&vnode_free_list);
222 TAILQ_INIT(&vnode_hold_list);
223 TAILQ_INIT(&vrele_list);
224
225 vcache_init();
226
227 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
228 cv_init(&vdrain_cv, "vdrain");
229 cv_init(&vrele_cv, "vrele");
230 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread,
231 NULL, NULL, "vdrain");
232 KASSERTMSG((error == 0), "kthread_create(vdrain) failed: %d", error);
233 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
234 NULL, &vrele_lwp, "vrele");
235 KASSERTMSG((error == 0), "kthread_create(vrele) failed: %d", error);
236 }
237
238 /*
239 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
240 * marker vnode.
241 */
242 vnode_t *
243 vnalloc(struct mount *mp)
244 {
245 vnode_t *vp;
246
247 vp = pool_cache_get(vnode_cache, PR_WAITOK);
248 KASSERT(vp != NULL);
249
250 memset(vp, 0, sizeof(*vp));
251 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0);
252 cv_init(&vp->v_cv, "vnode");
253 /*
254 * Done by memset() above.
255 * LIST_INIT(&vp->v_nclist);
256 * LIST_INIT(&vp->v_dnclist);
257 */
258
259 if (mp != NULL) {
260 vp->v_mount = mp;
261 vp->v_type = VBAD;
262 vp->v_iflag = VI_MARKER;
263 return vp;
264 }
265
266 mutex_enter(&vnode_free_list_lock);
267 numvnodes++;
268 if (numvnodes > desiredvnodes + desiredvnodes / 10)
269 cv_signal(&vdrain_cv);
270 mutex_exit(&vnode_free_list_lock);
271
272 rw_init(&vp->v_lock);
273 vp->v_usecount = 1;
274 vp->v_type = VNON;
275 vp->v_size = vp->v_writesize = VSIZENOTSET;
276
277 return vp;
278 }
279
280 /*
281 * Free an unused, unreferenced vnode.
282 */
283 void
284 vnfree(vnode_t *vp)
285 {
286
287 KASSERT(vp->v_usecount == 0);
288
289 if ((vp->v_iflag & VI_MARKER) == 0) {
290 rw_destroy(&vp->v_lock);
291 mutex_enter(&vnode_free_list_lock);
292 numvnodes--;
293 mutex_exit(&vnode_free_list_lock);
294 }
295
296 uvm_obj_destroy(&vp->v_uobj, true);
297 cv_destroy(&vp->v_cv);
298 pool_cache_put(vnode_cache, vp);
299 }
300
301 /*
302 * cleanvnode: grab a vnode from freelist, clean and free it.
303 *
304 * => Releases vnode_free_list_lock.
305 */
306 static int
307 cleanvnode(void)
308 {
309 vnode_t *vp;
310 vnodelst_t *listhd;
311 struct mount *mp;
312
313 KASSERT(mutex_owned(&vnode_free_list_lock));
314
315 listhd = &vnode_free_list;
316 try_nextlist:
317 TAILQ_FOREACH(vp, listhd, v_freelist) {
318 /*
319 * It's safe to test v_usecount and v_iflag
320 * without holding the interlock here, since
321 * these vnodes should never appear on the
322 * lists.
323 */
324 KASSERT(vp->v_usecount == 0);
325 KASSERT((vp->v_iflag & VI_CLEAN) == 0);
326 KASSERT(vp->v_freelisthd == listhd);
327
328 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
329 continue;
330 if (!mutex_tryenter(vp->v_interlock)) {
331 VOP_UNLOCK(vp);
332 continue;
333 }
334 KASSERT((vp->v_iflag & VI_XLOCK) == 0);
335 mp = vp->v_mount;
336 if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) {
337 mutex_exit(vp->v_interlock);
338 VOP_UNLOCK(vp);
339 continue;
340 }
341 break;
342 }
343
344 if (vp == NULL) {
345 if (listhd == &vnode_free_list) {
346 listhd = &vnode_hold_list;
347 goto try_nextlist;
348 }
349 mutex_exit(&vnode_free_list_lock);
350 return EBUSY;
351 }
352
353 /* Remove it from the freelist. */
354 TAILQ_REMOVE(listhd, vp, v_freelist);
355 vp->v_freelisthd = NULL;
356 mutex_exit(&vnode_free_list_lock);
357
358 KASSERT(vp->v_usecount == 0);
359
360 /*
361 * The vnode is still associated with a file system, so we must
362 * clean it out before freeing it. We need to add a reference
363 * before doing this.
364 */
365 vp->v_usecount = 1;
366 KASSERT((vp->v_iflag & VI_CHANGING) == 0);
367 vp->v_iflag |= VI_CHANGING;
368 vclean(vp);
369 vrelel(vp, VRELEL_CHANGING_SET);
370 fstrans_done(mp);
371
372 return 0;
373 }
374
375 /*
376 * Helper thread to keep the number of vnodes below desiredvnodes.
377 */
378 static void
379 vdrain_thread(void *cookie)
380 {
381 int error;
382
383 mutex_enter(&vnode_free_list_lock);
384
385 for (;;) {
386 cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz);
387 while (numvnodes > desiredvnodes) {
388 error = cleanvnode();
389 if (error)
390 kpause("vndsbusy", false, hz, NULL);
391 mutex_enter(&vnode_free_list_lock);
392 if (error)
393 break;
394 }
395 }
396 }
397
398 /*
399 * Remove a vnode from its freelist.
400 */
401 void
402 vremfree(vnode_t *vp)
403 {
404
405 KASSERT(mutex_owned(vp->v_interlock));
406 KASSERT(vp->v_usecount == 0);
407
408 /*
409 * Note that the reference count must not change until
410 * the vnode is removed.
411 */
412 mutex_enter(&vnode_free_list_lock);
413 if (vp->v_holdcnt > 0) {
414 KASSERT(vp->v_freelisthd == &vnode_hold_list);
415 } else {
416 KASSERT(vp->v_freelisthd == &vnode_free_list);
417 }
418 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
419 vp->v_freelisthd = NULL;
420 mutex_exit(&vnode_free_list_lock);
421 }
422
423 /*
424 * vget: get a particular vnode from the free list, increment its reference
425 * count and lock it.
426 *
427 * => Should be called with v_interlock held.
428 *
429 * If VI_CHANGING is set, the vnode may be eliminated in vgone()/vclean().
430 * In that case, we cannot grab the vnode, so the process is awakened when
431 * the transition is completed, and an error returned to indicate that the
432 * vnode is no longer usable.
433 */
434 int
435 vget(vnode_t *vp, int flags, bool waitok)
436 {
437 int error = 0;
438
439 KASSERT((vp->v_iflag & VI_MARKER) == 0);
440 KASSERT(mutex_owned(vp->v_interlock));
441 KASSERT((flags & ~LK_NOWAIT) == 0);
442 KASSERT(waitok == ((flags & LK_NOWAIT) == 0));
443
444 /*
445 * Before adding a reference, we must remove the vnode
446 * from its freelist.
447 */
448 if (vp->v_usecount == 0) {
449 vremfree(vp);
450 vp->v_usecount = 1;
451 } else {
452 atomic_inc_uint(&vp->v_usecount);
453 }
454
455 /*
456 * If the vnode is in the process of changing state we wait
457 * for the change to complete and take care not to return
458 * a clean vnode.
459 */
460 if ((vp->v_iflag & VI_CHANGING) != 0) {
461 if ((flags & LK_NOWAIT) != 0) {
462 vrelel(vp, 0);
463 return EBUSY;
464 }
465 vwait(vp, VI_CHANGING);
466 if ((vp->v_iflag & VI_CLEAN) != 0) {
467 vrelel(vp, 0);
468 return ENOENT;
469 }
470 }
471
472 /*
473 * Ok, we got it in good shape.
474 */
475 KASSERT((vp->v_iflag & VI_CLEAN) == 0);
476 mutex_exit(vp->v_interlock);
477 return error;
478 }
479
480 /*
481 * vput: unlock and release the reference.
482 */
483 void
484 vput(vnode_t *vp)
485 {
486
487 KASSERT((vp->v_iflag & VI_MARKER) == 0);
488
489 VOP_UNLOCK(vp);
490 vrele(vp);
491 }
492
493 /*
494 * Try to drop reference on a vnode. Abort if we are releasing the
495 * last reference. Note: this _must_ succeed if not the last reference.
496 */
497 static inline bool
498 vtryrele(vnode_t *vp)
499 {
500 u_int use, next;
501
502 for (use = vp->v_usecount;; use = next) {
503 if (use == 1) {
504 return false;
505 }
506 KASSERT(use > 1);
507 next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
508 if (__predict_true(next == use)) {
509 return true;
510 }
511 }
512 }
513
514 /*
515 * Vnode release. If reference count drops to zero, call inactive
516 * routine and either return to freelist or free to the pool.
517 */
518 static void
519 vrelel(vnode_t *vp, int flags)
520 {
521 bool recycle, defer;
522 int error;
523
524 KASSERT(mutex_owned(vp->v_interlock));
525 KASSERT((vp->v_iflag & VI_MARKER) == 0);
526 KASSERT(vp->v_freelisthd == NULL);
527
528 if (__predict_false(vp->v_op == dead_vnodeop_p &&
529 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
530 vnpanic(vp, "dead but not clean");
531 }
532
533 /*
534 * If not the last reference, just drop the reference count
535 * and unlock.
536 */
537 if (vtryrele(vp)) {
538 if ((flags & VRELEL_CHANGING_SET) != 0) {
539 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
540 vp->v_iflag &= ~VI_CHANGING;
541 cv_broadcast(&vp->v_cv);
542 }
543 mutex_exit(vp->v_interlock);
544 return;
545 }
546 if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
547 vnpanic(vp, "%s: bad ref count", __func__);
548 }
549
550 KASSERT((vp->v_iflag & VI_XLOCK) == 0);
551
552 #ifdef DIAGNOSTIC
553 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
554 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
555 vprint("vrelel: missing VOP_CLOSE()", vp);
556 }
557 #endif
558
559 /*
560 * If not clean, deactivate the vnode, but preserve
561 * our reference across the call to VOP_INACTIVE().
562 */
563 if ((vp->v_iflag & VI_CLEAN) == 0) {
564 recycle = false;
565
566 /*
567 * XXX This ugly block can be largely eliminated if
568 * locking is pushed down into the file systems.
569 *
570 * Defer vnode release to vrele_thread if caller
571 * requests it explicitly or is the pagedaemon.
572 */
573 if ((curlwp == uvm.pagedaemon_lwp) ||
574 (flags & VRELEL_ASYNC_RELE) != 0) {
575 defer = true;
576 } else if (curlwp == vrele_lwp) {
577 /*
578 * We have to try harder.
579 */
580 mutex_exit(vp->v_interlock);
581 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
582 KASSERTMSG((error == 0), "vn_lock failed: %d", error);
583 mutex_enter(vp->v_interlock);
584 defer = false;
585 } else {
586 /* If we can't acquire the lock, then defer. */
587 mutex_exit(vp->v_interlock);
588 error = vn_lock(vp,
589 LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT);
590 defer = (error != 0);
591 mutex_enter(vp->v_interlock);
592 }
593
594 KASSERT(mutex_owned(vp->v_interlock));
595 KASSERT(! (curlwp == vrele_lwp && defer));
596
597 if (defer) {
598 /*
599 * Defer reclaim to the kthread; it's not safe to
600 * clean it here. We donate it our last reference.
601 */
602 if ((flags & VRELEL_CHANGING_SET) != 0) {
603 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
604 vp->v_iflag &= ~VI_CHANGING;
605 cv_broadcast(&vp->v_cv);
606 }
607 mutex_enter(&vrele_lock);
608 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
609 if (++vrele_pending > (desiredvnodes >> 8))
610 cv_signal(&vrele_cv);
611 mutex_exit(&vrele_lock);
612 mutex_exit(vp->v_interlock);
613 return;
614 }
615
616 /*
617 * If the node got another reference while we
618 * released the interlock, don't try to inactivate it yet.
619 */
620 if (__predict_false(vtryrele(vp))) {
621 VOP_UNLOCK(vp);
622 if ((flags & VRELEL_CHANGING_SET) != 0) {
623 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
624 vp->v_iflag &= ~VI_CHANGING;
625 cv_broadcast(&vp->v_cv);
626 }
627 mutex_exit(vp->v_interlock);
628 return;
629 }
630
631 if ((flags & VRELEL_CHANGING_SET) == 0) {
632 KASSERT((vp->v_iflag & VI_CHANGING) == 0);
633 vp->v_iflag |= VI_CHANGING;
634 }
635 mutex_exit(vp->v_interlock);
636
637 /*
638 * The vnode can gain another reference while being
639 * deactivated. If VOP_INACTIVE() indicates that
640 * the described file has been deleted, then recycle
641 * the vnode irrespective of additional references.
642 * Another thread may be waiting to re-use the on-disk
643 * inode.
644 *
645 * Note that VOP_INACTIVE() will drop the vnode lock.
646 */
647 VOP_INACTIVE(vp, &recycle);
648 if (recycle) {
649 /* vclean() below will drop the lock. */
650 if (vn_lock(vp, LK_EXCLUSIVE) != 0)
651 recycle = false;
652 }
653 mutex_enter(vp->v_interlock);
654 if (!recycle) {
655 if (vtryrele(vp)) {
656 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
657 vp->v_iflag &= ~VI_CHANGING;
658 cv_broadcast(&vp->v_cv);
659 mutex_exit(vp->v_interlock);
660 return;
661 }
662 }
663
664 /* Take care of space accounting. */
665 if (vp->v_iflag & VI_EXECMAP) {
666 atomic_add_int(&uvmexp.execpages,
667 -vp->v_uobj.uo_npages);
668 atomic_add_int(&uvmexp.filepages,
669 vp->v_uobj.uo_npages);
670 }
671 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
672 vp->v_vflag &= ~VV_MAPPED;
673
674 /*
675 * Recycle the vnode if the file is now unused (unlinked),
676 * otherwise just free it.
677 */
678 if (recycle) {
679 vclean(vp);
680 }
681 KASSERT(vp->v_usecount > 0);
682 } else { /* vnode was already clean */
683 if ((flags & VRELEL_CHANGING_SET) == 0) {
684 KASSERT((vp->v_iflag & VI_CHANGING) == 0);
685 vp->v_iflag |= VI_CHANGING;
686 }
687 }
688
689 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
690 /* Gained another reference while being reclaimed. */
691 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
692 vp->v_iflag &= ~VI_CHANGING;
693 cv_broadcast(&vp->v_cv);
694 mutex_exit(vp->v_interlock);
695 return;
696 }
697
698 if ((vp->v_iflag & VI_CLEAN) != 0) {
699 /*
700 * It's clean so destroy it. It isn't referenced
701 * anywhere since it has been reclaimed.
702 */
703 KASSERT(vp->v_holdcnt == 0);
704 KASSERT(vp->v_writecount == 0);
705 mutex_exit(vp->v_interlock);
706 vfs_insmntque(vp, NULL);
707 if (vp->v_type == VBLK || vp->v_type == VCHR) {
708 spec_node_destroy(vp);
709 }
710 vnfree(vp);
711 } else {
712 /*
713 * Otherwise, put it back onto the freelist. It
714 * can't be destroyed while still associated with
715 * a file system.
716 */
717 mutex_enter(&vnode_free_list_lock);
718 if (vp->v_holdcnt > 0) {
719 vp->v_freelisthd = &vnode_hold_list;
720 } else {
721 vp->v_freelisthd = &vnode_free_list;
722 }
723 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
724 mutex_exit(&vnode_free_list_lock);
725 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
726 vp->v_iflag &= ~VI_CHANGING;
727 cv_broadcast(&vp->v_cv);
728 mutex_exit(vp->v_interlock);
729 }
730 }
731
732 void
733 vrele(vnode_t *vp)
734 {
735
736 KASSERT((vp->v_iflag & VI_MARKER) == 0);
737
738 if (vtryrele(vp)) {
739 return;
740 }
741 mutex_enter(vp->v_interlock);
742 vrelel(vp, 0);
743 }
744
745 /*
746 * Asynchronous vnode release, vnode is released in different context.
747 */
748 void
749 vrele_async(vnode_t *vp)
750 {
751
752 KASSERT((vp->v_iflag & VI_MARKER) == 0);
753
754 if (vtryrele(vp)) {
755 return;
756 }
757 mutex_enter(vp->v_interlock);
758 vrelel(vp, VRELEL_ASYNC_RELE);
759 }
760
761 static void
762 vrele_thread(void *cookie)
763 {
764 vnodelst_t skip_list;
765 vnode_t *vp;
766 struct mount *mp;
767
768 TAILQ_INIT(&skip_list);
769
770 mutex_enter(&vrele_lock);
771 for (;;) {
772 while (TAILQ_EMPTY(&vrele_list)) {
773 vrele_gen++;
774 cv_broadcast(&vrele_cv);
775 cv_timedwait(&vrele_cv, &vrele_lock, hz);
776 TAILQ_CONCAT(&vrele_list, &skip_list, v_freelist);
777 }
778 vp = TAILQ_FIRST(&vrele_list);
779 mp = vp->v_mount;
780 TAILQ_REMOVE(&vrele_list, vp, v_freelist);
781 if (fstrans_start_nowait(mp, FSTRANS_LAZY) != 0) {
782 TAILQ_INSERT_TAIL(&skip_list, vp, v_freelist);
783 continue;
784 }
785 vrele_pending--;
786 mutex_exit(&vrele_lock);
787
788 /*
789 * If not the last reference, then ignore the vnode
790 * and look for more work.
791 */
792 mutex_enter(vp->v_interlock);
793 vrelel(vp, 0);
794 fstrans_done(mp);
795 mutex_enter(&vrele_lock);
796 }
797 }
798
799 void
800 vrele_flush(void)
801 {
802 int gen;
803
804 mutex_enter(&vrele_lock);
805 gen = vrele_gen;
806 while (vrele_pending && gen == vrele_gen) {
807 cv_broadcast(&vrele_cv);
808 cv_wait(&vrele_cv, &vrele_lock);
809 }
810 mutex_exit(&vrele_lock);
811 }
812
813 /*
814 * Vnode reference, where a reference is already held by some other
815 * object (for example, a file structure).
816 */
817 void
818 vref(vnode_t *vp)
819 {
820
821 KASSERT((vp->v_iflag & VI_MARKER) == 0);
822 KASSERT(vp->v_usecount != 0);
823
824 atomic_inc_uint(&vp->v_usecount);
825 }
826
827 /*
828 * Page or buffer structure gets a reference.
829 * Called with v_interlock held.
830 */
831 void
832 vholdl(vnode_t *vp)
833 {
834
835 KASSERT(mutex_owned(vp->v_interlock));
836 KASSERT((vp->v_iflag & VI_MARKER) == 0);
837
838 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
839 mutex_enter(&vnode_free_list_lock);
840 KASSERT(vp->v_freelisthd == &vnode_free_list);
841 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
842 vp->v_freelisthd = &vnode_hold_list;
843 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
844 mutex_exit(&vnode_free_list_lock);
845 }
846 }
847
848 /*
849 * Page or buffer structure frees a reference.
850 * Called with v_interlock held.
851 */
852 void
853 holdrelel(vnode_t *vp)
854 {
855
856 KASSERT(mutex_owned(vp->v_interlock));
857 KASSERT((vp->v_iflag & VI_MARKER) == 0);
858
859 if (vp->v_holdcnt <= 0) {
860 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp);
861 }
862
863 vp->v_holdcnt--;
864 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
865 mutex_enter(&vnode_free_list_lock);
866 KASSERT(vp->v_freelisthd == &vnode_hold_list);
867 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
868 vp->v_freelisthd = &vnode_free_list;
869 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
870 mutex_exit(&vnode_free_list_lock);
871 }
872 }
873
874 /*
875 * Disassociate the underlying file system from a vnode.
876 *
877 * Must be called with vnode locked and will return unlocked.
878 * Must be called with the interlock held, and will return with it held.
879 */
880 static void
881 vclean(vnode_t *vp)
882 {
883 lwp_t *l = curlwp;
884 bool recycle, active;
885 int error;
886
887 KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 ||
888 VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
889 KASSERT(mutex_owned(vp->v_interlock));
890 KASSERT((vp->v_iflag & VI_MARKER) == 0);
891 KASSERT((vp->v_iflag & (VI_XLOCK | VI_CLEAN)) == 0);
892 KASSERT(vp->v_usecount != 0);
893
894 active = (vp->v_usecount > 1);
895 /*
896 * Prevent the vnode from being recycled or brought into use
897 * while we clean it out.
898 */
899 vp->v_iflag |= VI_XLOCK;
900 if (vp->v_iflag & VI_EXECMAP) {
901 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
902 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
903 }
904 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
905 mutex_exit(vp->v_interlock);
906
907 /*
908 * Clean out any cached data associated with the vnode.
909 * If purging an active vnode, it must be closed and
910 * deactivated before being reclaimed. Note that the
911 * VOP_INACTIVE will unlock the vnode.
912 */
913 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
914 if (error != 0) {
915 if (wapbl_vphaswapbl(vp))
916 WAPBL_DISCARD(wapbl_vptomp(vp));
917 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
918 }
919 KASSERTMSG((error == 0), "vinvalbuf failed: %d", error);
920 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
921 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
922 spec_node_revoke(vp);
923 }
924 if (active) {
925 VOP_INACTIVE(vp, &recycle);
926 } else {
927 /*
928 * Any other processes trying to obtain this lock must first
929 * wait for VI_XLOCK to clear, then call the new lock operation.
930 */
931 VOP_UNLOCK(vp);
932 }
933
934 /* Disassociate the underlying file system from the vnode. */
935 if (VOP_RECLAIM(vp)) {
936 vnpanic(vp, "%s: cannot reclaim", __func__);
937 }
938
939 KASSERT(vp->v_data == NULL);
940 KASSERT(vp->v_uobj.uo_npages == 0);
941
942 if (vp->v_type == VREG && vp->v_ractx != NULL) {
943 uvm_ra_freectx(vp->v_ractx);
944 vp->v_ractx = NULL;
945 }
946
947 /* Purge name cache. */
948 cache_purge(vp);
949
950 /* Move to dead mount. */
951 vp->v_vflag &= ~VV_ROOT;
952 atomic_inc_uint(&dead_rootmount->mnt_refcnt);
953 vfs_insmntque(vp, dead_rootmount);
954
955 /* Done with purge, notify sleepers of the grim news. */
956 mutex_enter(vp->v_interlock);
957 vp->v_op = dead_vnodeop_p;
958 vp->v_vflag |= VV_LOCKSWORK;
959 vp->v_iflag |= VI_CLEAN;
960 vp->v_tag = VT_NON;
961 KNOTE(&vp->v_klist, NOTE_REVOKE);
962 vp->v_iflag &= ~VI_XLOCK;
963 cv_broadcast(&vp->v_cv);
964
965 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
966 }
967
968 /*
969 * Recycle an unused vnode if caller holds the last reference.
970 */
971 bool
972 vrecycle(vnode_t *vp)
973 {
974
975 if (vn_lock(vp, LK_EXCLUSIVE) != 0)
976 return false;
977
978 mutex_enter(vp->v_interlock);
979
980 KASSERT((vp->v_iflag & VI_MARKER) == 0);
981
982 if (vp->v_usecount != 1) {
983 mutex_exit(vp->v_interlock);
984 VOP_UNLOCK(vp);
985 return false;
986 }
987 if ((vp->v_iflag & VI_CHANGING) != 0)
988 vwait(vp, VI_CHANGING);
989 if (vp->v_usecount != 1) {
990 mutex_exit(vp->v_interlock);
991 VOP_UNLOCK(vp);
992 return false;
993 }
994 KASSERT((vp->v_iflag & VI_CLEAN) == 0);
995 vp->v_iflag |= VI_CHANGING;
996 vclean(vp);
997 vrelel(vp, VRELEL_CHANGING_SET);
998 return true;
999 }
1000
1001 /*
1002 * Eliminate all activity associated with the requested vnode
1003 * and with all vnodes aliased to the requested vnode.
1004 */
1005 void
1006 vrevoke(vnode_t *vp)
1007 {
1008 vnode_t *vq;
1009 enum vtype type;
1010 dev_t dev;
1011
1012 KASSERT(vp->v_usecount > 0);
1013
1014 mutex_enter(vp->v_interlock);
1015 if ((vp->v_iflag & VI_CLEAN) != 0) {
1016 mutex_exit(vp->v_interlock);
1017 return;
1018 } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
1019 atomic_inc_uint(&vp->v_usecount);
1020 mutex_exit(vp->v_interlock);
1021 vgone(vp);
1022 return;
1023 } else {
1024 dev = vp->v_rdev;
1025 type = vp->v_type;
1026 mutex_exit(vp->v_interlock);
1027 }
1028
1029 while (spec_node_lookup_by_dev(type, dev, &vq) == 0) {
1030 vgone(vq);
1031 }
1032 }
1033
1034 /*
1035 * Eliminate all activity associated with a vnode in preparation for
1036 * reuse. Drops a reference from the vnode.
1037 */
1038 void
1039 vgone(vnode_t *vp)
1040 {
1041
1042 if (vn_lock(vp, LK_EXCLUSIVE) != 0) {
1043 KASSERT((vp->v_iflag & VI_CLEAN) != 0);
1044 vrele(vp);
1045 }
1046
1047 mutex_enter(vp->v_interlock);
1048 if ((vp->v_iflag & VI_CHANGING) != 0)
1049 vwait(vp, VI_CHANGING);
1050 vp->v_iflag |= VI_CHANGING;
1051 vclean(vp);
1052 vrelel(vp, VRELEL_CHANGING_SET);
1053 }
1054
1055 static inline uint32_t
1056 vcache_hash(const struct vcache_key *key)
1057 {
1058 uint32_t hash = HASH32_BUF_INIT;
1059
1060 hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash);
1061 hash = hash32_buf(key->vk_key, key->vk_key_len, hash);
1062 return hash;
1063 }
1064
1065 static void
1066 vcache_init(void)
1067 {
1068
1069 vcache.pool = pool_cache_init(sizeof(struct vcache_node), 0, 0, 0,
1070 "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL);
1071 KASSERT(vcache.pool != NULL);
1072 mutex_init(&vcache.lock, MUTEX_DEFAULT, IPL_NONE);
1073 vcache.hashtab = hashinit(desiredvnodes, HASH_SLIST, true,
1074 &vcache.hashmask);
1075 }
1076
1077 static void
1078 vcache_reinit(void)
1079 {
1080 int i;
1081 uint32_t hash;
1082 u_long oldmask, newmask;
1083 struct hashhead *oldtab, *newtab;
1084 struct vcache_node *node;
1085
1086 newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask);
1087 mutex_enter(&vcache.lock);
1088 oldtab = vcache.hashtab;
1089 oldmask = vcache.hashmask;
1090 vcache.hashtab = newtab;
1091 vcache.hashmask = newmask;
1092 for (i = 0; i <= oldmask; i++) {
1093 while ((node = SLIST_FIRST(&oldtab[i])) != NULL) {
1094 SLIST_REMOVE(&oldtab[i], node, vcache_node, vn_hash);
1095 hash = vcache_hash(&node->vn_key);
1096 SLIST_INSERT_HEAD(&newtab[hash & vcache.hashmask],
1097 node, vn_hash);
1098 }
1099 }
1100 mutex_exit(&vcache.lock);
1101 hashdone(oldtab, HASH_SLIST, oldmask);
1102 }
1103
1104 static inline struct vcache_node *
1105 vcache_hash_lookup(const struct vcache_key *key, uint32_t hash)
1106 {
1107 struct hashhead *hashp;
1108 struct vcache_node *node;
1109
1110 KASSERT(mutex_owned(&vcache.lock));
1111
1112 hashp = &vcache.hashtab[hash & vcache.hashmask];
1113 SLIST_FOREACH(node, hashp, vn_hash) {
1114 if (key->vk_mount != node->vn_key.vk_mount)
1115 continue;
1116 if (key->vk_key_len != node->vn_key.vk_key_len)
1117 continue;
1118 if (memcmp(key->vk_key, node->vn_key.vk_key, key->vk_key_len))
1119 continue;
1120 return node;
1121 }
1122 return NULL;
1123 }
1124
1125 /*
1126 * Get a vnode / fs node pair by key and return it referenced through vpp.
1127 */
1128 int
1129 vcache_get(struct mount *mp, const void *key, size_t key_len,
1130 struct vnode **vpp)
1131 {
1132 int error;
1133 uint32_t hash;
1134 const void *new_key;
1135 struct vnode *vp;
1136 struct vcache_key vcache_key;
1137 struct vcache_node *node, *new_node;
1138
1139 new_key = NULL;
1140 *vpp = NULL;
1141
1142 vcache_key.vk_mount = mp;
1143 vcache_key.vk_key = key;
1144 vcache_key.vk_key_len = key_len;
1145 hash = vcache_hash(&vcache_key);
1146
1147 again:
1148 mutex_enter(&vcache.lock);
1149 node = vcache_hash_lookup(&vcache_key, hash);
1150
1151 /* If found, take a reference or retry. */
1152 if (__predict_true(node != NULL && node->vn_vnode != NULL)) {
1153 vp = node->vn_vnode;
1154 mutex_enter(vp->v_interlock);
1155 mutex_exit(&vcache.lock);
1156 error = vget(vp, 0, true /* wait */);
1157 if (error == ENOENT)
1158 goto again;
1159 if (error == 0)
1160 *vpp = vp;
1161 KASSERT((error != 0) == (*vpp == NULL));
1162 return error;
1163 }
1164
1165 /* If another thread loads this node, wait and retry. */
1166 if (node != NULL) {
1167 KASSERT(node->vn_vnode == NULL);
1168 mutex_exit(&vcache.lock);
1169 kpause("vcache", false, mstohz(20), NULL);
1170 goto again;
1171 }
1172 mutex_exit(&vcache.lock);
1173
1174 /* Allocate and initialize a new vcache / vnode pair. */
1175 error = vfs_busy(mp, NULL);
1176 if (error)
1177 return error;
1178 new_node = pool_cache_get(vcache.pool, PR_WAITOK);
1179 new_node->vn_vnode = NULL;
1180 new_node->vn_key = vcache_key;
1181 vp = vnalloc(NULL);
1182 mutex_enter(&vcache.lock);
1183 node = vcache_hash_lookup(&vcache_key, hash);
1184 if (node == NULL) {
1185 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask],
1186 new_node, vn_hash);
1187 node = new_node;
1188 }
1189 mutex_exit(&vcache.lock);
1190
1191 /* If another thread beat us inserting this node, retry. */
1192 if (node != new_node) {
1193 pool_cache_put(vcache.pool, new_node);
1194 KASSERT(vp->v_usecount == 1);
1195 vp->v_usecount = 0;
1196 vnfree(vp);
1197 vfs_unbusy(mp, false, NULL);
1198 goto again;
1199 }
1200
1201 /* Load the fs node. Exclusive as new_node->vn_vnode is NULL. */
1202 vp->v_iflag |= VI_CHANGING;
1203 error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key);
1204 if (error) {
1205 mutex_enter(&vcache.lock);
1206 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask],
1207 new_node, vcache_node, vn_hash);
1208 mutex_exit(&vcache.lock);
1209 pool_cache_put(vcache.pool, new_node);
1210 KASSERT(vp->v_usecount == 1);
1211 vp->v_usecount = 0;
1212 vnfree(vp);
1213 vfs_unbusy(mp, false, NULL);
1214 KASSERT(*vpp == NULL);
1215 return error;
1216 }
1217 KASSERT(new_key != NULL);
1218 KASSERT(memcmp(key, new_key, key_len) == 0);
1219 KASSERT(vp->v_op != NULL);
1220 vfs_insmntque(vp, mp);
1221 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
1222 vp->v_vflag |= VV_MPSAFE;
1223 vfs_unbusy(mp, true, NULL);
1224
1225 /* Finished loading, finalize node. */
1226 mutex_enter(&vcache.lock);
1227 new_node->vn_key.vk_key = new_key;
1228 new_node->vn_vnode = vp;
1229 mutex_exit(&vcache.lock);
1230 mutex_enter(vp->v_interlock);
1231 vp->v_iflag &= ~VI_CHANGING;
1232 cv_broadcast(&vp->v_cv);
1233 mutex_exit(vp->v_interlock);
1234 *vpp = vp;
1235 return 0;
1236 }
1237
1238 /*
1239 * Create a new vnode / fs node pair and return it referenced through vpp.
1240 */
1241 int
1242 vcache_new(struct mount *mp, struct vnode *dvp, struct vattr *vap,
1243 kauth_cred_t cred, struct vnode **vpp)
1244 {
1245 int error;
1246 uint32_t hash;
1247 struct vnode *vp;
1248 struct vcache_node *new_node;
1249 struct vcache_node *old_node __diagused;
1250
1251 *vpp = NULL;
1252
1253 /* Allocate and initialize a new vcache / vnode pair. */
1254 error = vfs_busy(mp, NULL);
1255 if (error)
1256 return error;
1257 new_node = pool_cache_get(vcache.pool, PR_WAITOK);
1258 new_node->vn_key.vk_mount = mp;
1259 new_node->vn_vnode = NULL;
1260 vp = vnalloc(NULL);
1261
1262 /* Create and load the fs node. */
1263 vp->v_iflag |= VI_CHANGING;
1264 error = VFS_NEWVNODE(mp, dvp, vp, vap, cred,
1265 &new_node->vn_key.vk_key_len, &new_node->vn_key.vk_key);
1266 if (error) {
1267 pool_cache_put(vcache.pool, new_node);
1268 KASSERT(vp->v_usecount == 1);
1269 vp->v_usecount = 0;
1270 vnfree(vp);
1271 vfs_unbusy(mp, false, NULL);
1272 KASSERT(*vpp == NULL);
1273 return error;
1274 }
1275 KASSERT(new_node->vn_key.vk_key != NULL);
1276 KASSERT(vp->v_op != NULL);
1277 hash = vcache_hash(&new_node->vn_key);
1278
1279 /* Wait for previous instance to be reclaimed, then insert new node. */
1280 mutex_enter(&vcache.lock);
1281 while ((old_node = vcache_hash_lookup(&new_node->vn_key, hash))) {
1282 #ifdef DIAGNOSTIC
1283 if (old_node->vn_vnode != NULL)
1284 mutex_enter(old_node->vn_vnode->v_interlock);
1285 KASSERT(old_node->vn_vnode == NULL ||
1286 (old_node->vn_vnode->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0);
1287 if (old_node->vn_vnode != NULL)
1288 mutex_exit(old_node->vn_vnode->v_interlock);
1289 #endif
1290 mutex_exit(&vcache.lock);
1291 kpause("vcache", false, mstohz(20), NULL);
1292 mutex_enter(&vcache.lock);
1293 }
1294 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask],
1295 new_node, vn_hash);
1296 mutex_exit(&vcache.lock);
1297 vfs_insmntque(vp, mp);
1298 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
1299 vp->v_vflag |= VV_MPSAFE;
1300 vfs_unbusy(mp, true, NULL);
1301
1302 /* Finished loading, finalize node. */
1303 mutex_enter(&vcache.lock);
1304 new_node->vn_vnode = vp;
1305 mutex_exit(&vcache.lock);
1306 mutex_enter(vp->v_interlock);
1307 vp->v_iflag &= ~VI_CHANGING;
1308 cv_broadcast(&vp->v_cv);
1309 mutex_exit(vp->v_interlock);
1310 *vpp = vp;
1311 return 0;
1312 }
1313
1314 /*
1315 * Prepare key change: lock old and new cache node.
1316 * Return an error if the new node already exists.
1317 */
1318 int
1319 vcache_rekey_enter(struct mount *mp, struct vnode *vp,
1320 const void *old_key, size_t old_key_len,
1321 const void *new_key, size_t new_key_len)
1322 {
1323 uint32_t old_hash, new_hash;
1324 struct vcache_key old_vcache_key, new_vcache_key;
1325 struct vcache_node *node, *new_node;
1326
1327 old_vcache_key.vk_mount = mp;
1328 old_vcache_key.vk_key = old_key;
1329 old_vcache_key.vk_key_len = old_key_len;
1330 old_hash = vcache_hash(&old_vcache_key);
1331
1332 new_vcache_key.vk_mount = mp;
1333 new_vcache_key.vk_key = new_key;
1334 new_vcache_key.vk_key_len = new_key_len;
1335 new_hash = vcache_hash(&new_vcache_key);
1336
1337 new_node = pool_cache_get(vcache.pool, PR_WAITOK);
1338 new_node->vn_vnode = NULL;
1339 new_node->vn_key = new_vcache_key;
1340
1341 mutex_enter(&vcache.lock);
1342 node = vcache_hash_lookup(&new_vcache_key, new_hash);
1343 if (node != NULL) {
1344 mutex_exit(&vcache.lock);
1345 pool_cache_put(vcache.pool, new_node);
1346 return EEXIST;
1347 }
1348 SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask],
1349 new_node, vn_hash);
1350 node = vcache_hash_lookup(&old_vcache_key, old_hash);
1351 KASSERT(node != NULL);
1352 KASSERT(node->vn_vnode == vp);
1353 node->vn_vnode = NULL;
1354 node->vn_key = old_vcache_key;
1355 mutex_exit(&vcache.lock);
1356 return 0;
1357 }
1358
1359 /*
1360 * Key change complete: remove old node and unlock new node.
1361 */
1362 void
1363 vcache_rekey_exit(struct mount *mp, struct vnode *vp,
1364 const void *old_key, size_t old_key_len,
1365 const void *new_key, size_t new_key_len)
1366 {
1367 uint32_t old_hash, new_hash;
1368 struct vcache_key old_vcache_key, new_vcache_key;
1369 struct vcache_node *node;
1370
1371 old_vcache_key.vk_mount = mp;
1372 old_vcache_key.vk_key = old_key;
1373 old_vcache_key.vk_key_len = old_key_len;
1374 old_hash = vcache_hash(&old_vcache_key);
1375
1376 new_vcache_key.vk_mount = mp;
1377 new_vcache_key.vk_key = new_key;
1378 new_vcache_key.vk_key_len = new_key_len;
1379 new_hash = vcache_hash(&new_vcache_key);
1380
1381 mutex_enter(&vcache.lock);
1382 node = vcache_hash_lookup(&new_vcache_key, new_hash);
1383 KASSERT(node != NULL && node->vn_vnode == NULL);
1384 KASSERT(node->vn_key.vk_key_len == new_key_len);
1385 node->vn_vnode = vp;
1386 node->vn_key = new_vcache_key;
1387 node = vcache_hash_lookup(&old_vcache_key, old_hash);
1388 KASSERT(node != NULL);
1389 KASSERT(node->vn_vnode == NULL);
1390 SLIST_REMOVE(&vcache.hashtab[old_hash & vcache.hashmask],
1391 node, vcache_node, vn_hash);
1392 mutex_exit(&vcache.lock);
1393 pool_cache_put(vcache.pool, node);
1394 }
1395
1396 /*
1397 * Remove a vnode / fs node pair from the cache.
1398 */
1399 void
1400 vcache_remove(struct mount *mp, const void *key, size_t key_len)
1401 {
1402 uint32_t hash;
1403 struct vcache_key vcache_key;
1404 struct vcache_node *node;
1405
1406 vcache_key.vk_mount = mp;
1407 vcache_key.vk_key = key;
1408 vcache_key.vk_key_len = key_len;
1409 hash = vcache_hash(&vcache_key);
1410
1411 mutex_enter(&vcache.lock);
1412 node = vcache_hash_lookup(&vcache_key, hash);
1413 KASSERT(node != NULL);
1414 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask],
1415 node, vcache_node, vn_hash);
1416 mutex_exit(&vcache.lock);
1417 pool_cache_put(vcache.pool, node);
1418 }
1419
1420 /*
1421 * Update outstanding I/O count and do wakeup if requested.
1422 */
1423 void
1424 vwakeup(struct buf *bp)
1425 {
1426 vnode_t *vp;
1427
1428 if ((vp = bp->b_vp) == NULL)
1429 return;
1430
1431 KASSERT(bp->b_objlock == vp->v_interlock);
1432 KASSERT(mutex_owned(bp->b_objlock));
1433
1434 if (--vp->v_numoutput < 0)
1435 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp);
1436 if (vp->v_numoutput == 0)
1437 cv_broadcast(&vp->v_cv);
1438 }
1439
1440 /*
1441 * Test a vnode for being or becoming dead. Returns one of:
1442 * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only.
1443 * ENOENT: vnode is dead.
1444 * 0: otherwise.
1445 *
1446 * Whenever this function returns a non-zero value all future
1447 * calls will also return a non-zero value.
1448 */
1449 int
1450 vdead_check(struct vnode *vp, int flags)
1451 {
1452
1453 KASSERT(mutex_owned(vp->v_interlock));
1454 if (ISSET(vp->v_iflag, VI_XLOCK)) {
1455 if (ISSET(flags, VDEAD_NOWAIT))
1456 return EBUSY;
1457 vwait(vp, VI_XLOCK);
1458 KASSERT(ISSET(vp->v_iflag, VI_CLEAN));
1459 }
1460 if (ISSET(vp->v_iflag, VI_CLEAN))
1461 return ENOENT;
1462 return 0;
1463 }
1464
1465 /*
1466 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
1467 * recycled.
1468 */
1469 static void
1470 vwait(vnode_t *vp, int flags)
1471 {
1472
1473 KASSERT(mutex_owned(vp->v_interlock));
1474 KASSERT(vp->v_usecount != 0);
1475
1476 while ((vp->v_iflag & flags) != 0)
1477 cv_wait(&vp->v_cv, vp->v_interlock);
1478 }
1479
1480 int
1481 vfs_drainvnodes(long target)
1482 {
1483 int error;
1484
1485 mutex_enter(&vnode_free_list_lock);
1486
1487 while (numvnodes > target) {
1488 error = cleanvnode();
1489 if (error != 0)
1490 return error;
1491 mutex_enter(&vnode_free_list_lock);
1492 }
1493
1494 mutex_exit(&vnode_free_list_lock);
1495
1496 vcache_reinit();
1497
1498 return 0;
1499 }
1500
1501 void
1502 vnpanic(vnode_t *vp, const char *fmt, ...)
1503 {
1504 va_list ap;
1505
1506 #ifdef DIAGNOSTIC
1507 vprint(NULL, vp);
1508 #endif
1509 va_start(ap, fmt);
1510 vpanic(fmt, ap);
1511 va_end(ap);
1512 }
1513