vfs_vnode.c revision 1.37.2.1.2.1 1 /* $NetBSD: vfs_vnode.c,v 1.37.2.1.2.1 2016/01/26 23:44:11 snj Exp $ */
2
3 /*-
4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1989, 1993
35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
67 */
68
69 /*
70 * The vnode cache subsystem.
71 *
72 * Life-cycle
73 *
74 * Normally, there are two points where new vnodes are created:
75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode
76 * starts in one of the following ways:
77 *
78 * - Allocation, via getnewvnode(9) and/or vnalloc(9).
79 * - Reclamation of inactive vnode, via vget(9).
80 *
81 * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9)
82 * was another, traditional way. Currently, only the draining thread
83 * recycles the vnodes. This behaviour might be revisited.
84 *
85 * The life-cycle ends when the last reference is dropped, usually
86 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform
87 * the file system that vnode is inactive. Via this call, file system
88 * indicates whether vnode can be recycled (usually, it checks its own
89 * references, e.g. count of links, whether the file was removed).
90 *
91 * Depending on indication, vnode can be put into a free list (cache),
92 * or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate
93 * underlying file system from the vnode, and finally destroyed.
94 *
95 * Reference counting
96 *
97 * Vnode is considered active, if reference count (vnode_t::v_usecount)
98 * is non-zero. It is maintained using: vref(9) and vrele(9), as well
99 * as vput(9), routines. Common points holding references are e.g.
100 * file openings, current working directory, mount points, etc.
101 *
102 * Note on v_usecount and its locking
103 *
104 * At nearly all points it is known that v_usecount could be zero,
105 * the vnode_t::v_interlock will be held. To change v_usecount away
106 * from zero, the interlock must be held. To change from a non-zero
107 * value to zero, again the interlock must be held.
108 *
109 * Changing the usecount from a non-zero value to a non-zero value can
110 * safely be done using atomic operations, without the interlock held.
111 *
112 * Note: if VI_CLEAN is set, vnode_t::v_interlock will be released while
113 * mntvnode_lock is still held.
114 *
115 * See PR 41374.
116 */
117
118 #include <sys/cdefs.h>
119 __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.37.2.1.2.1 2016/01/26 23:44:11 snj Exp $");
120
121 #define _VFS_VNODE_PRIVATE
122
123 #include <sys/param.h>
124 #include <sys/kernel.h>
125
126 #include <sys/atomic.h>
127 #include <sys/buf.h>
128 #include <sys/conf.h>
129 #include <sys/device.h>
130 #include <sys/hash.h>
131 #include <sys/kauth.h>
132 #include <sys/kmem.h>
133 #include <sys/kthread.h>
134 #include <sys/module.h>
135 #include <sys/mount.h>
136 #include <sys/namei.h>
137 #include <sys/syscallargs.h>
138 #include <sys/sysctl.h>
139 #include <sys/systm.h>
140 #include <sys/vnode.h>
141 #include <sys/wapbl.h>
142 #include <sys/fstrans.h>
143
144 #include <uvm/uvm.h>
145 #include <uvm/uvm_readahead.h>
146
147 /* Flags to vrelel. */
148 #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */
149 #define VRELEL_CHANGING_SET 0x0002 /* VI_CHANGING set by caller. */
150
151 struct vcache_key {
152 struct mount *vk_mount;
153 const void *vk_key;
154 size_t vk_key_len;
155 };
156 struct vcache_node {
157 SLIST_ENTRY(vcache_node) vn_hash;
158 struct vnode *vn_vnode;
159 struct vcache_key vn_key;
160 };
161
162 u_int numvnodes __cacheline_aligned;
163
164 static pool_cache_t vnode_cache __read_mostly;
165 static struct mount *dead_mount;
166
167 /*
168 * There are two free lists: one is for vnodes which have no buffer/page
169 * references and one for those which do (i.e. v_holdcnt is non-zero).
170 * Vnode recycling mechanism first attempts to look into the former list.
171 */
172 static kmutex_t vnode_free_list_lock __cacheline_aligned;
173 static vnodelst_t vnode_free_list __cacheline_aligned;
174 static vnodelst_t vnode_hold_list __cacheline_aligned;
175 static kcondvar_t vdrain_cv __cacheline_aligned;
176
177 static vnodelst_t vrele_list __cacheline_aligned;
178 static kmutex_t vrele_lock __cacheline_aligned;
179 static kcondvar_t vrele_cv __cacheline_aligned;
180 static lwp_t * vrele_lwp __cacheline_aligned;
181 static int vrele_pending __cacheline_aligned;
182 static int vrele_gen __cacheline_aligned;
183
184 static struct {
185 kmutex_t lock;
186 u_long hashmask;
187 SLIST_HEAD(hashhead, vcache_node) *hashtab;
188 pool_cache_t pool;
189 } vcache __cacheline_aligned;
190
191 static int cleanvnode(void);
192 static void vcache_init(void);
193 static void vcache_reinit(void);
194 static void vclean(vnode_t *);
195 static void vrelel(vnode_t *, int);
196 static void vdrain_thread(void *);
197 static void vrele_thread(void *);
198 static void vnpanic(vnode_t *, const char *, ...)
199 __printflike(2, 3);
200 static void vwait(vnode_t *, int);
201
202 /* Routines having to do with the management of the vnode table. */
203 extern int (**dead_vnodeop_p)(void *);
204 extern struct vfsops dead_vfsops;
205
206 void
207 vfs_vnode_sysinit(void)
208 {
209 int error __diagused;
210
211 vnode_cache = pool_cache_init(sizeof(vnode_t), 0, 0, 0, "vnodepl",
212 NULL, IPL_NONE, NULL, NULL, NULL);
213 KASSERT(vnode_cache != NULL);
214
215 dead_mount = vfs_mountalloc(&dead_vfsops, NULL);
216 KASSERT(dead_mount != NULL);
217 dead_mount->mnt_iflag = IMNT_MPSAFE;
218
219 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
220 TAILQ_INIT(&vnode_free_list);
221 TAILQ_INIT(&vnode_hold_list);
222 TAILQ_INIT(&vrele_list);
223
224 vcache_init();
225
226 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
227 cv_init(&vdrain_cv, "vdrain");
228 cv_init(&vrele_cv, "vrele");
229 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread,
230 NULL, NULL, "vdrain");
231 KASSERT(error == 0);
232 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
233 NULL, &vrele_lwp, "vrele");
234 KASSERT(error == 0);
235 }
236
237 /*
238 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
239 * marker vnode.
240 */
241 vnode_t *
242 vnalloc(struct mount *mp)
243 {
244 vnode_t *vp;
245
246 vp = pool_cache_get(vnode_cache, PR_WAITOK);
247 KASSERT(vp != NULL);
248
249 memset(vp, 0, sizeof(*vp));
250 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0);
251 cv_init(&vp->v_cv, "vnode");
252 /*
253 * Done by memset() above.
254 * LIST_INIT(&vp->v_nclist);
255 * LIST_INIT(&vp->v_dnclist);
256 */
257
258 if (mp != NULL) {
259 vp->v_mount = mp;
260 vp->v_type = VBAD;
261 vp->v_iflag = VI_MARKER;
262 return vp;
263 }
264
265 mutex_enter(&vnode_free_list_lock);
266 numvnodes++;
267 if (numvnodes > desiredvnodes + desiredvnodes / 10)
268 cv_signal(&vdrain_cv);
269 mutex_exit(&vnode_free_list_lock);
270
271 rw_init(&vp->v_lock);
272 vp->v_usecount = 1;
273 vp->v_type = VNON;
274 vp->v_size = vp->v_writesize = VSIZENOTSET;
275
276 return vp;
277 }
278
279 /*
280 * Free an unused, unreferenced vnode.
281 */
282 void
283 vnfree(vnode_t *vp)
284 {
285
286 KASSERT(vp->v_usecount == 0);
287
288 if ((vp->v_iflag & VI_MARKER) == 0) {
289 rw_destroy(&vp->v_lock);
290 mutex_enter(&vnode_free_list_lock);
291 numvnodes--;
292 mutex_exit(&vnode_free_list_lock);
293 }
294
295 /*
296 * Note: the vnode interlock will either be freed, of reference
297 * dropped (if VI_LOCKSHARE was in use).
298 */
299 uvm_obj_destroy(&vp->v_uobj, true);
300 cv_destroy(&vp->v_cv);
301 pool_cache_put(vnode_cache, vp);
302 }
303
304 /*
305 * cleanvnode: grab a vnode from freelist, clean and free it.
306 *
307 * => Releases vnode_free_list_lock.
308 */
309 static int
310 cleanvnode(void)
311 {
312 vnode_t *vp;
313 vnodelst_t *listhd;
314 struct mount *mp;
315
316 KASSERT(mutex_owned(&vnode_free_list_lock));
317
318 listhd = &vnode_free_list;
319 try_nextlist:
320 TAILQ_FOREACH(vp, listhd, v_freelist) {
321 /*
322 * It's safe to test v_usecount and v_iflag
323 * without holding the interlock here, since
324 * these vnodes should never appear on the
325 * lists.
326 */
327 KASSERT(vp->v_usecount == 0);
328 KASSERT((vp->v_iflag & VI_CLEAN) == 0);
329 KASSERT(vp->v_freelisthd == listhd);
330
331 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
332 continue;
333 if (!mutex_tryenter(vp->v_interlock)) {
334 VOP_UNLOCK(vp);
335 continue;
336 }
337 KASSERT((vp->v_iflag & VI_XLOCK) == 0);
338 mp = vp->v_mount;
339 if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) {
340 mutex_exit(vp->v_interlock);
341 VOP_UNLOCK(vp);
342 continue;
343 }
344 break;
345 }
346
347 if (vp == NULL) {
348 if (listhd == &vnode_free_list) {
349 listhd = &vnode_hold_list;
350 goto try_nextlist;
351 }
352 mutex_exit(&vnode_free_list_lock);
353 return EBUSY;
354 }
355
356 /* Remove it from the freelist. */
357 TAILQ_REMOVE(listhd, vp, v_freelist);
358 vp->v_freelisthd = NULL;
359 mutex_exit(&vnode_free_list_lock);
360
361 KASSERT(vp->v_usecount == 0);
362
363 /*
364 * The vnode is still associated with a file system, so we must
365 * clean it out before freeing it. We need to add a reference
366 * before doing this.
367 */
368 vp->v_usecount = 1;
369 KASSERT((vp->v_iflag & VI_CHANGING) == 0);
370 vp->v_iflag |= VI_CHANGING;
371 vclean(vp);
372 vrelel(vp, VRELEL_CHANGING_SET);
373 fstrans_done(mp);
374
375 return 0;
376 }
377
378 /*
379 * getnewvnode: return a fresh vnode.
380 *
381 * => Returns referenced vnode, moved into the mount queue.
382 * => Shares the interlock specified by 'slock', if it is not NULL.
383 */
384 int
385 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
386 kmutex_t *slock, vnode_t **vpp)
387 {
388 struct uvm_object *uobj __diagused;
389 vnode_t *vp;
390 int error = 0;
391
392 if (mp != NULL) {
393 /*
394 * Mark filesystem busy while we are creating a vnode.
395 * If unmount is in progress, this will fail.
396 */
397 error = vfs_busy(mp, NULL);
398 if (error)
399 return error;
400 }
401
402 vp = NULL;
403
404 /* Allocate a new vnode. */
405 vp = vnalloc(NULL);
406
407 KASSERT(vp->v_freelisthd == NULL);
408 KASSERT(LIST_EMPTY(&vp->v_nclist));
409 KASSERT(LIST_EMPTY(&vp->v_dnclist));
410 KASSERT(vp->v_data == NULL);
411
412 /* Initialize vnode. */
413 vp->v_tag = tag;
414 vp->v_op = vops;
415
416 uobj = &vp->v_uobj;
417 KASSERT(uobj->pgops == &uvm_vnodeops);
418 KASSERT(uobj->uo_npages == 0);
419 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
420
421 /* Share the vnode_t::v_interlock, if requested. */
422 if (slock) {
423 /* Set the interlock and mark that it is shared. */
424 KASSERT(vp->v_mount == NULL);
425 mutex_obj_hold(slock);
426 uvm_obj_setlock(&vp->v_uobj, slock);
427 KASSERT(vp->v_interlock == slock);
428 vp->v_iflag |= VI_LOCKSHARE;
429 }
430
431 /* Finally, move vnode into the mount queue. */
432 vfs_insmntque(vp, mp);
433
434 if (mp != NULL) {
435 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
436 vp->v_vflag |= VV_MPSAFE;
437 vfs_unbusy(mp, true, NULL);
438 }
439
440 *vpp = vp;
441 return 0;
442 }
443
444 /*
445 * This is really just the reverse of getnewvnode(). Needed for
446 * VFS_VGET functions who may need to push back a vnode in case
447 * of a locking race.
448 */
449 void
450 ungetnewvnode(vnode_t *vp)
451 {
452
453 KASSERT(vp->v_usecount == 1);
454 KASSERT(vp->v_data == NULL);
455 KASSERT(vp->v_freelisthd == NULL);
456
457 mutex_enter(vp->v_interlock);
458 vp->v_iflag |= VI_CLEAN;
459 vrelel(vp, 0);
460 }
461
462 /*
463 * Helper thread to keep the number of vnodes below desiredvnodes.
464 */
465 static void
466 vdrain_thread(void *cookie)
467 {
468 int error;
469
470 mutex_enter(&vnode_free_list_lock);
471
472 for (;;) {
473 cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz);
474 while (numvnodes > desiredvnodes) {
475 error = cleanvnode();
476 if (error)
477 kpause("vndsbusy", false, hz, NULL);
478 mutex_enter(&vnode_free_list_lock);
479 if (error)
480 break;
481 }
482 }
483 }
484
485 /*
486 * Remove a vnode from its freelist.
487 */
488 void
489 vremfree(vnode_t *vp)
490 {
491
492 KASSERT(mutex_owned(vp->v_interlock));
493 KASSERT(vp->v_usecount == 0);
494
495 /*
496 * Note that the reference count must not change until
497 * the vnode is removed.
498 */
499 mutex_enter(&vnode_free_list_lock);
500 if (vp->v_holdcnt > 0) {
501 KASSERT(vp->v_freelisthd == &vnode_hold_list);
502 } else {
503 KASSERT(vp->v_freelisthd == &vnode_free_list);
504 }
505 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
506 vp->v_freelisthd = NULL;
507 mutex_exit(&vnode_free_list_lock);
508 }
509
510 /*
511 * vget: get a particular vnode from the free list, increment its reference
512 * count and lock it.
513 *
514 * => Should be called with v_interlock held.
515 *
516 * If VI_CHANGING is set, the vnode may be eliminated in vgone()/vclean().
517 * In that case, we cannot grab the vnode, so the process is awakened when
518 * the transition is completed, and an error returned to indicate that the
519 * vnode is no longer usable.
520 */
521 int
522 vget(vnode_t *vp, int flags)
523 {
524 int error = 0;
525
526 KASSERT((vp->v_iflag & VI_MARKER) == 0);
527 KASSERT(mutex_owned(vp->v_interlock));
528 KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT)) == 0);
529
530 /*
531 * Before adding a reference, we must remove the vnode
532 * from its freelist.
533 */
534 if (vp->v_usecount == 0) {
535 vremfree(vp);
536 vp->v_usecount = 1;
537 } else {
538 atomic_inc_uint(&vp->v_usecount);
539 }
540
541 /*
542 * If the vnode is in the process of changing state we wait
543 * for the change to complete and take care not to return
544 * a clean vnode.
545 */
546 if ((vp->v_iflag & VI_CHANGING) != 0) {
547 if ((flags & LK_NOWAIT) != 0) {
548 vrelel(vp, 0);
549 return EBUSY;
550 }
551 vwait(vp, VI_CHANGING);
552 if ((vp->v_iflag & VI_CLEAN) != 0) {
553 vrelel(vp, 0);
554 return ENOENT;
555 }
556 }
557
558 /*
559 * Ok, we got it in good shape. Just locking left.
560 */
561 KASSERT((vp->v_iflag & VI_CLEAN) == 0);
562 mutex_exit(vp->v_interlock);
563 if (flags & (LK_EXCLUSIVE | LK_SHARED)) {
564 error = vn_lock(vp, flags);
565 if (error != 0) {
566 vrele(vp);
567 }
568 }
569 return error;
570 }
571
572 /*
573 * vput: unlock and release the reference.
574 */
575 void
576 vput(vnode_t *vp)
577 {
578
579 KASSERT((vp->v_iflag & VI_MARKER) == 0);
580
581 VOP_UNLOCK(vp);
582 vrele(vp);
583 }
584
585 /*
586 * Try to drop reference on a vnode. Abort if we are releasing the
587 * last reference. Note: this _must_ succeed if not the last reference.
588 */
589 static inline bool
590 vtryrele(vnode_t *vp)
591 {
592 u_int use, next;
593
594 for (use = vp->v_usecount;; use = next) {
595 if (use == 1) {
596 return false;
597 }
598 KASSERT(use > 1);
599 next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
600 if (__predict_true(next == use)) {
601 return true;
602 }
603 }
604 }
605
606 /*
607 * Vnode release. If reference count drops to zero, call inactive
608 * routine and either return to freelist or free to the pool.
609 */
610 static void
611 vrelel(vnode_t *vp, int flags)
612 {
613 bool recycle, defer;
614 int error;
615
616 KASSERT(mutex_owned(vp->v_interlock));
617 KASSERT((vp->v_iflag & VI_MARKER) == 0);
618 KASSERT(vp->v_freelisthd == NULL);
619
620 if (__predict_false(vp->v_op == dead_vnodeop_p &&
621 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
622 vnpanic(vp, "dead but not clean");
623 }
624
625 /*
626 * If not the last reference, just drop the reference count
627 * and unlock.
628 */
629 if (vtryrele(vp)) {
630 if ((flags & VRELEL_CHANGING_SET) != 0) {
631 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
632 vp->v_iflag &= ~VI_CHANGING;
633 cv_broadcast(&vp->v_cv);
634 }
635 mutex_exit(vp->v_interlock);
636 return;
637 }
638 if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
639 vnpanic(vp, "%s: bad ref count", __func__);
640 }
641
642 KASSERT((vp->v_iflag & VI_XLOCK) == 0);
643
644 #ifdef DIAGNOSTIC
645 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
646 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
647 vprint("vrelel: missing VOP_CLOSE()", vp);
648 }
649 #endif
650
651 /*
652 * If not clean, deactivate the vnode, but preserve
653 * our reference across the call to VOP_INACTIVE().
654 */
655 if ((vp->v_iflag & VI_CLEAN) == 0) {
656 recycle = false;
657
658 /*
659 * XXX This ugly block can be largely eliminated if
660 * locking is pushed down into the file systems.
661 *
662 * Defer vnode release to vrele_thread if caller
663 * requests it explicitly or is the pagedaemon.
664 */
665 if ((curlwp == uvm.pagedaemon_lwp) ||
666 (flags & VRELEL_ASYNC_RELE) != 0) {
667 defer = true;
668 } else if (curlwp == vrele_lwp) {
669 /*
670 * We have to try harder.
671 */
672 mutex_exit(vp->v_interlock);
673 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
674 KASSERT(error == 0);
675 mutex_enter(vp->v_interlock);
676 defer = false;
677 } else {
678 /* If we can't acquire the lock, then defer. */
679 mutex_exit(vp->v_interlock);
680 error = vn_lock(vp,
681 LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT);
682 defer = (error != 0);
683 mutex_enter(vp->v_interlock);
684 }
685
686 KASSERT(mutex_owned(vp->v_interlock));
687 KASSERT(! (curlwp == vrele_lwp && defer));
688
689 if (defer) {
690 /*
691 * Defer reclaim to the kthread; it's not safe to
692 * clean it here. We donate it our last reference.
693 */
694 if ((flags & VRELEL_CHANGING_SET) != 0) {
695 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
696 vp->v_iflag &= ~VI_CHANGING;
697 cv_broadcast(&vp->v_cv);
698 }
699 mutex_enter(&vrele_lock);
700 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
701 if (++vrele_pending > (desiredvnodes >> 8))
702 cv_signal(&vrele_cv);
703 mutex_exit(&vrele_lock);
704 mutex_exit(vp->v_interlock);
705 return;
706 }
707
708 /*
709 * If the node got another reference while we
710 * released the interlock, don't try to inactivate it yet.
711 */
712 if (__predict_false(vtryrele(vp))) {
713 VOP_UNLOCK(vp);
714 if ((flags & VRELEL_CHANGING_SET) != 0) {
715 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
716 vp->v_iflag &= ~VI_CHANGING;
717 cv_broadcast(&vp->v_cv);
718 }
719 mutex_exit(vp->v_interlock);
720 return;
721 }
722
723 if ((flags & VRELEL_CHANGING_SET) == 0) {
724 KASSERT((vp->v_iflag & VI_CHANGING) == 0);
725 vp->v_iflag |= VI_CHANGING;
726 }
727 mutex_exit(vp->v_interlock);
728
729 /*
730 * The vnode can gain another reference while being
731 * deactivated. If VOP_INACTIVE() indicates that
732 * the described file has been deleted, then recycle
733 * the vnode irrespective of additional references.
734 * Another thread may be waiting to re-use the on-disk
735 * inode.
736 *
737 * Note that VOP_INACTIVE() will drop the vnode lock.
738 */
739 VOP_INACTIVE(vp, &recycle);
740 if (recycle) {
741 /* vclean() below will drop the lock. */
742 if (vn_lock(vp, LK_EXCLUSIVE) != 0)
743 recycle = false;
744 }
745 mutex_enter(vp->v_interlock);
746 if (!recycle) {
747 if (vtryrele(vp)) {
748 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
749 vp->v_iflag &= ~VI_CHANGING;
750 cv_broadcast(&vp->v_cv);
751 mutex_exit(vp->v_interlock);
752 return;
753 }
754 }
755
756 /* Take care of space accounting. */
757 if (vp->v_iflag & VI_EXECMAP) {
758 atomic_add_int(&uvmexp.execpages,
759 -vp->v_uobj.uo_npages);
760 atomic_add_int(&uvmexp.filepages,
761 vp->v_uobj.uo_npages);
762 }
763 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
764 vp->v_vflag &= ~VV_MAPPED;
765
766 /*
767 * Recycle the vnode if the file is now unused (unlinked),
768 * otherwise just free it.
769 */
770 if (recycle) {
771 vclean(vp);
772 }
773 KASSERT(vp->v_usecount > 0);
774 } else { /* vnode was already clean */
775 if ((flags & VRELEL_CHANGING_SET) == 0) {
776 KASSERT((vp->v_iflag & VI_CHANGING) == 0);
777 vp->v_iflag |= VI_CHANGING;
778 }
779 }
780
781 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
782 /* Gained another reference while being reclaimed. */
783 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
784 vp->v_iflag &= ~VI_CHANGING;
785 cv_broadcast(&vp->v_cv);
786 mutex_exit(vp->v_interlock);
787 return;
788 }
789
790 if ((vp->v_iflag & VI_CLEAN) != 0) {
791 /*
792 * It's clean so destroy it. It isn't referenced
793 * anywhere since it has been reclaimed.
794 */
795 KASSERT(vp->v_holdcnt == 0);
796 KASSERT(vp->v_writecount == 0);
797 mutex_exit(vp->v_interlock);
798 vfs_insmntque(vp, NULL);
799 if (vp->v_type == VBLK || vp->v_type == VCHR) {
800 spec_node_destroy(vp);
801 }
802 vnfree(vp);
803 } else {
804 /*
805 * Otherwise, put it back onto the freelist. It
806 * can't be destroyed while still associated with
807 * a file system.
808 */
809 mutex_enter(&vnode_free_list_lock);
810 if (vp->v_holdcnt > 0) {
811 vp->v_freelisthd = &vnode_hold_list;
812 } else {
813 vp->v_freelisthd = &vnode_free_list;
814 }
815 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
816 mutex_exit(&vnode_free_list_lock);
817 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
818 vp->v_iflag &= ~VI_CHANGING;
819 cv_broadcast(&vp->v_cv);
820 mutex_exit(vp->v_interlock);
821 }
822 }
823
824 void
825 vrele(vnode_t *vp)
826 {
827
828 KASSERT((vp->v_iflag & VI_MARKER) == 0);
829
830 if (vtryrele(vp)) {
831 return;
832 }
833 mutex_enter(vp->v_interlock);
834 vrelel(vp, 0);
835 }
836
837 /*
838 * Asynchronous vnode release, vnode is released in different context.
839 */
840 void
841 vrele_async(vnode_t *vp)
842 {
843
844 KASSERT((vp->v_iflag & VI_MARKER) == 0);
845
846 if (vtryrele(vp)) {
847 return;
848 }
849 mutex_enter(vp->v_interlock);
850 vrelel(vp, VRELEL_ASYNC_RELE);
851 }
852
853 static void
854 vrele_thread(void *cookie)
855 {
856 vnodelst_t skip_list;
857 vnode_t *vp;
858 struct mount *mp;
859
860 TAILQ_INIT(&skip_list);
861
862 mutex_enter(&vrele_lock);
863 for (;;) {
864 while (TAILQ_EMPTY(&vrele_list)) {
865 vrele_gen++;
866 cv_broadcast(&vrele_cv);
867 cv_timedwait(&vrele_cv, &vrele_lock, hz);
868 TAILQ_CONCAT(&vrele_list, &skip_list, v_freelist);
869 }
870 vp = TAILQ_FIRST(&vrele_list);
871 mp = vp->v_mount;
872 TAILQ_REMOVE(&vrele_list, vp, v_freelist);
873 if (fstrans_start_nowait(mp, FSTRANS_LAZY) != 0) {
874 TAILQ_INSERT_TAIL(&skip_list, vp, v_freelist);
875 continue;
876 }
877 vrele_pending--;
878 mutex_exit(&vrele_lock);
879
880 /*
881 * If not the last reference, then ignore the vnode
882 * and look for more work.
883 */
884 mutex_enter(vp->v_interlock);
885 vrelel(vp, 0);
886 fstrans_done(mp);
887 mutex_enter(&vrele_lock);
888 }
889 }
890
891 void
892 vrele_flush(void)
893 {
894 int gen;
895
896 mutex_enter(&vrele_lock);
897 gen = vrele_gen;
898 while (vrele_pending && gen == vrele_gen) {
899 cv_broadcast(&vrele_cv);
900 cv_wait(&vrele_cv, &vrele_lock);
901 }
902 mutex_exit(&vrele_lock);
903 }
904
905 /*
906 * Vnode reference, where a reference is already held by some other
907 * object (for example, a file structure).
908 */
909 void
910 vref(vnode_t *vp)
911 {
912
913 KASSERT((vp->v_iflag & VI_MARKER) == 0);
914 KASSERT(vp->v_usecount != 0);
915
916 atomic_inc_uint(&vp->v_usecount);
917 }
918
919 /*
920 * Page or buffer structure gets a reference.
921 * Called with v_interlock held.
922 */
923 void
924 vholdl(vnode_t *vp)
925 {
926
927 KASSERT(mutex_owned(vp->v_interlock));
928 KASSERT((vp->v_iflag & VI_MARKER) == 0);
929
930 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
931 mutex_enter(&vnode_free_list_lock);
932 KASSERT(vp->v_freelisthd == &vnode_free_list);
933 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
934 vp->v_freelisthd = &vnode_hold_list;
935 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
936 mutex_exit(&vnode_free_list_lock);
937 }
938 }
939
940 /*
941 * Page or buffer structure frees a reference.
942 * Called with v_interlock held.
943 */
944 void
945 holdrelel(vnode_t *vp)
946 {
947
948 KASSERT(mutex_owned(vp->v_interlock));
949 KASSERT((vp->v_iflag & VI_MARKER) == 0);
950
951 if (vp->v_holdcnt <= 0) {
952 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp);
953 }
954
955 vp->v_holdcnt--;
956 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
957 mutex_enter(&vnode_free_list_lock);
958 KASSERT(vp->v_freelisthd == &vnode_hold_list);
959 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
960 vp->v_freelisthd = &vnode_free_list;
961 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
962 mutex_exit(&vnode_free_list_lock);
963 }
964 }
965
966 /*
967 * Disassociate the underlying file system from a vnode.
968 *
969 * Must be called with vnode locked and will return unlocked.
970 * Must be called with the interlock held, and will return with it held.
971 */
972 static void
973 vclean(vnode_t *vp)
974 {
975 lwp_t *l = curlwp;
976 bool recycle, active, doclose;
977 int error;
978
979 KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 ||
980 VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
981 KASSERT(mutex_owned(vp->v_interlock));
982 KASSERT((vp->v_iflag & VI_MARKER) == 0);
983 KASSERT((vp->v_iflag & (VI_XLOCK | VI_CLEAN)) == 0);
984 KASSERT(vp->v_usecount != 0);
985
986 active = (vp->v_usecount > 1);
987 doclose = ! (active && vp->v_type == VBLK &&
988 spec_node_getmountedfs(vp) != NULL);
989
990 /*
991 * Prevent the vnode from being recycled or brought into use
992 * while we clean it out.
993 */
994 vp->v_iflag |= VI_XLOCK;
995 if (vp->v_iflag & VI_EXECMAP) {
996 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
997 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
998 }
999 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
1000 mutex_exit(vp->v_interlock);
1001
1002 /*
1003 * Clean out any cached data associated with the vnode.
1004 * If purging an active vnode, it must be closed and
1005 * deactivated before being reclaimed. Note that the
1006 * VOP_INACTIVE will unlock the vnode.
1007 */
1008 if (doclose) {
1009 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1010 if (error != 0) {
1011 if (wapbl_vphaswapbl(vp))
1012 WAPBL_DISCARD(wapbl_vptomp(vp));
1013 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1014 }
1015 KASSERT(error == 0);
1016 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1017 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
1018 spec_node_revoke(vp);
1019 }
1020 }
1021 if (active) {
1022 VOP_INACTIVE(vp, &recycle);
1023 } else {
1024 /*
1025 * Any other processes trying to obtain this lock must first
1026 * wait for VI_XLOCK to clear, then call the new lock operation.
1027 */
1028 VOP_UNLOCK(vp);
1029 }
1030
1031 /* Disassociate the underlying file system from the vnode. */
1032 if (VOP_RECLAIM(vp)) {
1033 vnpanic(vp, "%s: cannot reclaim", __func__);
1034 }
1035
1036 KASSERT(vp->v_data == NULL);
1037 KASSERT(vp->v_uobj.uo_npages == 0);
1038
1039 if (vp->v_type == VREG && vp->v_ractx != NULL) {
1040 uvm_ra_freectx(vp->v_ractx);
1041 vp->v_ractx = NULL;
1042 }
1043
1044 /* Purge name cache. */
1045 cache_purge(vp);
1046
1047 /* Move to dead mount. */
1048 vp->v_vflag &= ~VV_ROOT;
1049 atomic_inc_uint(&dead_mount->mnt_refcnt);
1050 vfs_insmntque(vp, dead_mount);
1051
1052 /* Done with purge, notify sleepers of the grim news. */
1053 mutex_enter(vp->v_interlock);
1054 if (doclose) {
1055 vp->v_op = dead_vnodeop_p;
1056 vp->v_vflag |= VV_LOCKSWORK;
1057 vp->v_iflag |= VI_CLEAN;
1058 } else {
1059 vp->v_op = spec_vnodeop_p;
1060 vp->v_vflag &= ~VV_LOCKSWORK;
1061 }
1062 vp->v_tag = VT_NON;
1063 KNOTE(&vp->v_klist, NOTE_REVOKE);
1064 vp->v_iflag &= ~VI_XLOCK;
1065 cv_broadcast(&vp->v_cv);
1066
1067 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1068 }
1069
1070 /*
1071 * Recycle an unused vnode if caller holds the last reference.
1072 */
1073 bool
1074 vrecycle(vnode_t *vp)
1075 {
1076
1077 if (vn_lock(vp, LK_EXCLUSIVE) != 0)
1078 return false;
1079
1080 mutex_enter(vp->v_interlock);
1081
1082 KASSERT((vp->v_iflag & VI_MARKER) == 0);
1083
1084 if (vp->v_usecount != 1) {
1085 mutex_exit(vp->v_interlock);
1086 VOP_UNLOCK(vp);
1087 return false;
1088 }
1089 if ((vp->v_iflag & VI_CHANGING) != 0)
1090 vwait(vp, VI_CHANGING);
1091 if (vp->v_usecount != 1) {
1092 mutex_exit(vp->v_interlock);
1093 VOP_UNLOCK(vp);
1094 return false;
1095 }
1096 KASSERT((vp->v_iflag & VI_CLEAN) == 0);
1097 vp->v_iflag |= VI_CHANGING;
1098 vclean(vp);
1099 vrelel(vp, VRELEL_CHANGING_SET);
1100 return true;
1101 }
1102
1103 /*
1104 * Eliminate all activity associated with the requested vnode
1105 * and with all vnodes aliased to the requested vnode.
1106 */
1107 void
1108 vrevoke(vnode_t *vp)
1109 {
1110 vnode_t *vq;
1111 enum vtype type;
1112 dev_t dev;
1113
1114 KASSERT(vp->v_usecount > 0);
1115
1116 mutex_enter(vp->v_interlock);
1117 if ((vp->v_iflag & VI_CLEAN) != 0) {
1118 mutex_exit(vp->v_interlock);
1119 return;
1120 } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
1121 atomic_inc_uint(&vp->v_usecount);
1122 mutex_exit(vp->v_interlock);
1123 vgone(vp);
1124 return;
1125 } else {
1126 dev = vp->v_rdev;
1127 type = vp->v_type;
1128 mutex_exit(vp->v_interlock);
1129 }
1130
1131 while (spec_node_lookup_by_dev(type, dev, &vq) == 0) {
1132 vgone(vq);
1133 }
1134 }
1135
1136 /*
1137 * Eliminate all activity associated with a vnode in preparation for
1138 * reuse. Drops a reference from the vnode.
1139 */
1140 void
1141 vgone(vnode_t *vp)
1142 {
1143
1144 if (vn_lock(vp, LK_EXCLUSIVE) != 0) {
1145 KASSERT((vp->v_iflag & VI_CLEAN) != 0);
1146 vrele(vp);
1147 }
1148
1149 mutex_enter(vp->v_interlock);
1150 if ((vp->v_iflag & VI_CHANGING) != 0)
1151 vwait(vp, VI_CHANGING);
1152 vp->v_iflag |= VI_CHANGING;
1153 vclean(vp);
1154 vrelel(vp, VRELEL_CHANGING_SET);
1155 }
1156
1157 static inline uint32_t
1158 vcache_hash(const struct vcache_key *key)
1159 {
1160 uint32_t hash = HASH32_BUF_INIT;
1161
1162 hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash);
1163 hash = hash32_buf(key->vk_key, key->vk_key_len, hash);
1164 return hash;
1165 }
1166
1167 static void
1168 vcache_init(void)
1169 {
1170
1171 vcache.pool = pool_cache_init(sizeof(struct vcache_node), 0, 0, 0,
1172 "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL);
1173 KASSERT(vcache.pool != NULL);
1174 mutex_init(&vcache.lock, MUTEX_DEFAULT, IPL_NONE);
1175 vcache.hashtab = hashinit(desiredvnodes, HASH_SLIST, true,
1176 &vcache.hashmask);
1177 }
1178
1179 static void
1180 vcache_reinit(void)
1181 {
1182 int i;
1183 uint32_t hash;
1184 u_long oldmask, newmask;
1185 struct hashhead *oldtab, *newtab;
1186 struct vcache_node *node;
1187
1188 newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask);
1189 mutex_enter(&vcache.lock);
1190 oldtab = vcache.hashtab;
1191 oldmask = vcache.hashmask;
1192 vcache.hashtab = newtab;
1193 vcache.hashmask = newmask;
1194 for (i = 0; i <= oldmask; i++) {
1195 while ((node = SLIST_FIRST(&oldtab[i])) != NULL) {
1196 SLIST_REMOVE(&oldtab[i], node, vcache_node, vn_hash);
1197 hash = vcache_hash(&node->vn_key);
1198 SLIST_INSERT_HEAD(&newtab[hash & vcache.hashmask],
1199 node, vn_hash);
1200 }
1201 }
1202 mutex_exit(&vcache.lock);
1203 hashdone(oldtab, HASH_SLIST, oldmask);
1204 }
1205
1206 static inline struct vcache_node *
1207 vcache_hash_lookup(const struct vcache_key *key, uint32_t hash)
1208 {
1209 struct hashhead *hashp;
1210 struct vcache_node *node;
1211
1212 KASSERT(mutex_owned(&vcache.lock));
1213
1214 hashp = &vcache.hashtab[hash & vcache.hashmask];
1215 SLIST_FOREACH(node, hashp, vn_hash) {
1216 if (key->vk_mount != node->vn_key.vk_mount)
1217 continue;
1218 if (key->vk_key_len != node->vn_key.vk_key_len)
1219 continue;
1220 if (memcmp(key->vk_key, node->vn_key.vk_key, key->vk_key_len))
1221 continue;
1222 return node;
1223 }
1224 return NULL;
1225 }
1226
1227 /*
1228 * Get a vnode / fs node pair by key and return it referenced through vpp.
1229 */
1230 int
1231 vcache_get(struct mount *mp, const void *key, size_t key_len,
1232 struct vnode **vpp)
1233 {
1234 int error;
1235 uint32_t hash;
1236 const void *new_key;
1237 struct vnode *vp;
1238 struct vcache_key vcache_key;
1239 struct vcache_node *node, *new_node;
1240
1241 new_key = NULL;
1242 *vpp = NULL;
1243
1244 vcache_key.vk_mount = mp;
1245 vcache_key.vk_key = key;
1246 vcache_key.vk_key_len = key_len;
1247 hash = vcache_hash(&vcache_key);
1248
1249 again:
1250 mutex_enter(&vcache.lock);
1251 node = vcache_hash_lookup(&vcache_key, hash);
1252
1253 /* If found, take a reference or retry. */
1254 if (__predict_true(node != NULL && node->vn_vnode != NULL)) {
1255 vp = node->vn_vnode;
1256 mutex_enter(vp->v_interlock);
1257 mutex_exit(&vcache.lock);
1258 error = vget(vp, 0);
1259 if (error == ENOENT)
1260 goto again;
1261 if (error == 0)
1262 *vpp = vp;
1263 KASSERT((error != 0) == (*vpp == NULL));
1264 return error;
1265 }
1266
1267 /* If another thread loads this node, wait and retry. */
1268 if (node != NULL) {
1269 KASSERT(node->vn_vnode == NULL);
1270 mutex_exit(&vcache.lock);
1271 kpause("vcache", false, mstohz(20), NULL);
1272 goto again;
1273 }
1274 mutex_exit(&vcache.lock);
1275
1276 /* Allocate and initialize a new vcache / vnode pair. */
1277 error = vfs_busy(mp, NULL);
1278 if (error)
1279 return error;
1280 new_node = pool_cache_get(vcache.pool, PR_WAITOK);
1281 new_node->vn_vnode = NULL;
1282 new_node->vn_key = vcache_key;
1283 vp = vnalloc(NULL);
1284 mutex_enter(&vcache.lock);
1285 node = vcache_hash_lookup(&vcache_key, hash);
1286 if (node == NULL) {
1287 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask],
1288 new_node, vn_hash);
1289 node = new_node;
1290 }
1291 mutex_exit(&vcache.lock);
1292
1293 /* If another thread beat us inserting this node, retry. */
1294 if (node != new_node) {
1295 pool_cache_put(vcache.pool, new_node);
1296 KASSERT(vp->v_usecount == 1);
1297 vp->v_usecount = 0;
1298 vnfree(vp);
1299 vfs_unbusy(mp, false, NULL);
1300 goto again;
1301 }
1302
1303 /* Load the fs node. Exclusive as new_node->vn_vnode is NULL. */
1304 vp->v_iflag |= VI_CHANGING;
1305 error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key);
1306 if (error) {
1307 mutex_enter(&vcache.lock);
1308 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask],
1309 new_node, vcache_node, vn_hash);
1310 mutex_exit(&vcache.lock);
1311 pool_cache_put(vcache.pool, new_node);
1312 KASSERT(vp->v_usecount == 1);
1313 vp->v_usecount = 0;
1314 vnfree(vp);
1315 vfs_unbusy(mp, false, NULL);
1316 KASSERT(*vpp == NULL);
1317 return error;
1318 }
1319 KASSERT(new_key != NULL);
1320 KASSERT(memcmp(key, new_key, key_len) == 0);
1321 KASSERT(vp->v_op != NULL);
1322 vfs_insmntque(vp, mp);
1323 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
1324 vp->v_vflag |= VV_MPSAFE;
1325 vfs_unbusy(mp, true, NULL);
1326
1327 /* Finished loading, finalize node. */
1328 mutex_enter(&vcache.lock);
1329 new_node->vn_key.vk_key = new_key;
1330 new_node->vn_vnode = vp;
1331 mutex_exit(&vcache.lock);
1332 mutex_enter(vp->v_interlock);
1333 vp->v_iflag &= ~VI_CHANGING;
1334 cv_broadcast(&vp->v_cv);
1335 mutex_exit(vp->v_interlock);
1336 *vpp = vp;
1337 return 0;
1338 }
1339
1340 /*
1341 * Prepare key change: lock old and new cache node.
1342 * Return an error if the new node already exists.
1343 */
1344 int
1345 vcache_rekey_enter(struct mount *mp, struct vnode *vp,
1346 const void *old_key, size_t old_key_len,
1347 const void *new_key, size_t new_key_len)
1348 {
1349 uint32_t old_hash, new_hash;
1350 struct vcache_key old_vcache_key, new_vcache_key;
1351 struct vcache_node *node, *new_node;
1352
1353 old_vcache_key.vk_mount = mp;
1354 old_vcache_key.vk_key = old_key;
1355 old_vcache_key.vk_key_len = old_key_len;
1356 old_hash = vcache_hash(&old_vcache_key);
1357
1358 new_vcache_key.vk_mount = mp;
1359 new_vcache_key.vk_key = new_key;
1360 new_vcache_key.vk_key_len = new_key_len;
1361 new_hash = vcache_hash(&new_vcache_key);
1362
1363 new_node = pool_cache_get(vcache.pool, PR_WAITOK);
1364 new_node->vn_vnode = NULL;
1365 new_node->vn_key = new_vcache_key;
1366
1367 mutex_enter(&vcache.lock);
1368 node = vcache_hash_lookup(&new_vcache_key, new_hash);
1369 if (node != NULL) {
1370 mutex_exit(&vcache.lock);
1371 pool_cache_put(vcache.pool, new_node);
1372 return EEXIST;
1373 }
1374 SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask],
1375 new_node, vn_hash);
1376 node = vcache_hash_lookup(&old_vcache_key, old_hash);
1377 KASSERT(node != NULL);
1378 KASSERT(node->vn_vnode == vp);
1379 node->vn_vnode = NULL;
1380 node->vn_key = old_vcache_key;
1381 mutex_exit(&vcache.lock);
1382 return 0;
1383 }
1384
1385 /*
1386 * Key change complete: remove old node and unlock new node.
1387 */
1388 void
1389 vcache_rekey_exit(struct mount *mp, struct vnode *vp,
1390 const void *old_key, size_t old_key_len,
1391 const void *new_key, size_t new_key_len)
1392 {
1393 uint32_t old_hash, new_hash;
1394 struct vcache_key old_vcache_key, new_vcache_key;
1395 struct vcache_node *node;
1396
1397 old_vcache_key.vk_mount = mp;
1398 old_vcache_key.vk_key = old_key;
1399 old_vcache_key.vk_key_len = old_key_len;
1400 old_hash = vcache_hash(&old_vcache_key);
1401
1402 new_vcache_key.vk_mount = mp;
1403 new_vcache_key.vk_key = new_key;
1404 new_vcache_key.vk_key_len = new_key_len;
1405 new_hash = vcache_hash(&new_vcache_key);
1406
1407 mutex_enter(&vcache.lock);
1408 node = vcache_hash_lookup(&new_vcache_key, new_hash);
1409 KASSERT(node != NULL && node->vn_vnode == NULL);
1410 KASSERT(node->vn_key.vk_key_len == new_key_len);
1411 node->vn_vnode = vp;
1412 node->vn_key = new_vcache_key;
1413 node = vcache_hash_lookup(&old_vcache_key, old_hash);
1414 KASSERT(node != NULL);
1415 KASSERT(node->vn_vnode == NULL);
1416 SLIST_REMOVE(&vcache.hashtab[old_hash & vcache.hashmask],
1417 node, vcache_node, vn_hash);
1418 mutex_exit(&vcache.lock);
1419 pool_cache_put(vcache.pool, node);
1420 }
1421
1422 /*
1423 * Remove a vnode / fs node pair from the cache.
1424 */
1425 void
1426 vcache_remove(struct mount *mp, const void *key, size_t key_len)
1427 {
1428 uint32_t hash;
1429 struct vcache_key vcache_key;
1430 struct vcache_node *node;
1431
1432 vcache_key.vk_mount = mp;
1433 vcache_key.vk_key = key;
1434 vcache_key.vk_key_len = key_len;
1435 hash = vcache_hash(&vcache_key);
1436
1437 mutex_enter(&vcache.lock);
1438 node = vcache_hash_lookup(&vcache_key, hash);
1439 KASSERT(node != NULL);
1440 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask],
1441 node, vcache_node, vn_hash);
1442 mutex_exit(&vcache.lock);
1443 pool_cache_put(vcache.pool, node);
1444 }
1445
1446 /*
1447 * Update outstanding I/O count and do wakeup if requested.
1448 */
1449 void
1450 vwakeup(struct buf *bp)
1451 {
1452 vnode_t *vp;
1453
1454 if ((vp = bp->b_vp) == NULL)
1455 return;
1456
1457 KASSERT(bp->b_objlock == vp->v_interlock);
1458 KASSERT(mutex_owned(bp->b_objlock));
1459
1460 if (--vp->v_numoutput < 0)
1461 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp);
1462 if (vp->v_numoutput == 0)
1463 cv_broadcast(&vp->v_cv);
1464 }
1465
1466 /*
1467 * Test a vnode for being or becoming dead. Returns one of:
1468 * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only.
1469 * ENOENT: vnode is dead.
1470 * 0: otherwise.
1471 *
1472 * Whenever this function returns a non-zero value all future
1473 * calls will also return a non-zero value.
1474 */
1475 int
1476 vdead_check(struct vnode *vp, int flags)
1477 {
1478
1479 KASSERT(mutex_owned(vp->v_interlock));
1480 if (ISSET(vp->v_iflag, VI_XLOCK)) {
1481 if (ISSET(flags, VDEAD_NOWAIT))
1482 return EBUSY;
1483 vwait(vp, VI_XLOCK);
1484 KASSERT(ISSET(vp->v_iflag, VI_CLEAN));
1485 }
1486 if (ISSET(vp->v_iflag, VI_CLEAN))
1487 return ENOENT;
1488 return 0;
1489 }
1490
1491 /*
1492 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
1493 * recycled.
1494 */
1495 static void
1496 vwait(vnode_t *vp, int flags)
1497 {
1498
1499 KASSERT(mutex_owned(vp->v_interlock));
1500 KASSERT(vp->v_usecount != 0);
1501
1502 while ((vp->v_iflag & flags) != 0)
1503 cv_wait(&vp->v_cv, vp->v_interlock);
1504 }
1505
1506 int
1507 vfs_drainvnodes(long target)
1508 {
1509 int error;
1510
1511 mutex_enter(&vnode_free_list_lock);
1512
1513 while (numvnodes > target) {
1514 error = cleanvnode();
1515 if (error != 0)
1516 return error;
1517 mutex_enter(&vnode_free_list_lock);
1518 }
1519
1520 mutex_exit(&vnode_free_list_lock);
1521
1522 vcache_reinit();
1523
1524 return 0;
1525 }
1526
1527 void
1528 vnpanic(vnode_t *vp, const char *fmt, ...)
1529 {
1530 va_list ap;
1531
1532 #ifdef DIAGNOSTIC
1533 vprint(NULL, vp);
1534 #endif
1535 va_start(ap, fmt);
1536 vpanic(fmt, ap);
1537 va_end(ap);
1538 }
1539