vfs_vnode.c revision 1.37.2.1 1 /* $NetBSD: vfs_vnode.c,v 1.37.2.1 2014/10/19 10:02:59 martin Exp $ */
2
3 /*-
4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1989, 1993
35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
67 */
68
69 /*
70 * The vnode cache subsystem.
71 *
72 * Life-cycle
73 *
74 * Normally, there are two points where new vnodes are created:
75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode
76 * starts in one of the following ways:
77 *
78 * - Allocation, via getnewvnode(9) and/or vnalloc(9).
79 * - Reclamation of inactive vnode, via vget(9).
80 *
81 * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9)
82 * was another, traditional way. Currently, only the draining thread
83 * recycles the vnodes. This behaviour might be revisited.
84 *
85 * The life-cycle ends when the last reference is dropped, usually
86 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform
87 * the file system that vnode is inactive. Via this call, file system
88 * indicates whether vnode can be recycled (usually, it checks its own
89 * references, e.g. count of links, whether the file was removed).
90 *
91 * Depending on indication, vnode can be put into a free list (cache),
92 * or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate
93 * underlying file system from the vnode, and finally destroyed.
94 *
95 * Reference counting
96 *
97 * Vnode is considered active, if reference count (vnode_t::v_usecount)
98 * is non-zero. It is maintained using: vref(9) and vrele(9), as well
99 * as vput(9), routines. Common points holding references are e.g.
100 * file openings, current working directory, mount points, etc.
101 *
102 * Note on v_usecount and its locking
103 *
104 * At nearly all points it is known that v_usecount could be zero,
105 * the vnode_t::v_interlock will be held. To change v_usecount away
106 * from zero, the interlock must be held. To change from a non-zero
107 * value to zero, again the interlock must be held.
108 *
109 * Changing the usecount from a non-zero value to a non-zero value can
110 * safely be done using atomic operations, without the interlock held.
111 *
112 * Note: if VI_CLEAN is set, vnode_t::v_interlock will be released while
113 * mntvnode_lock is still held.
114 *
115 * See PR 41374.
116 */
117
118 #include <sys/cdefs.h>
119 __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.37.2.1 2014/10/19 10:02:59 martin Exp $");
120
121 #define _VFS_VNODE_PRIVATE
122
123 #include <sys/param.h>
124 #include <sys/kernel.h>
125
126 #include <sys/atomic.h>
127 #include <sys/buf.h>
128 #include <sys/conf.h>
129 #include <sys/device.h>
130 #include <sys/hash.h>
131 #include <sys/kauth.h>
132 #include <sys/kmem.h>
133 #include <sys/kthread.h>
134 #include <sys/module.h>
135 #include <sys/mount.h>
136 #include <sys/namei.h>
137 #include <sys/syscallargs.h>
138 #include <sys/sysctl.h>
139 #include <sys/systm.h>
140 #include <sys/vnode.h>
141 #include <sys/wapbl.h>
142 #include <sys/fstrans.h>
143
144 #include <uvm/uvm.h>
145 #include <uvm/uvm_readahead.h>
146
147 /* Flags to vrelel. */
148 #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */
149 #define VRELEL_CHANGING_SET 0x0002 /* VI_CHANGING set by caller. */
150
151 struct vcache_key {
152 struct mount *vk_mount;
153 const void *vk_key;
154 size_t vk_key_len;
155 };
156 struct vcache_node {
157 SLIST_ENTRY(vcache_node) vn_hash;
158 struct vnode *vn_vnode;
159 struct vcache_key vn_key;
160 };
161
162 u_int numvnodes __cacheline_aligned;
163
164 static pool_cache_t vnode_cache __read_mostly;
165 static struct mount *dead_mount;
166
167 /*
168 * There are two free lists: one is for vnodes which have no buffer/page
169 * references and one for those which do (i.e. v_holdcnt is non-zero).
170 * Vnode recycling mechanism first attempts to look into the former list.
171 */
172 static kmutex_t vnode_free_list_lock __cacheline_aligned;
173 static vnodelst_t vnode_free_list __cacheline_aligned;
174 static vnodelst_t vnode_hold_list __cacheline_aligned;
175 static kcondvar_t vdrain_cv __cacheline_aligned;
176
177 static vnodelst_t vrele_list __cacheline_aligned;
178 static kmutex_t vrele_lock __cacheline_aligned;
179 static kcondvar_t vrele_cv __cacheline_aligned;
180 static lwp_t * vrele_lwp __cacheline_aligned;
181 static int vrele_pending __cacheline_aligned;
182 static int vrele_gen __cacheline_aligned;
183
184 static struct {
185 kmutex_t lock;
186 u_long hashmask;
187 SLIST_HEAD(hashhead, vcache_node) *hashtab;
188 pool_cache_t pool;
189 } vcache __cacheline_aligned;
190
191 static int cleanvnode(void);
192 static void vcache_init(void);
193 static void vcache_reinit(void);
194 static void vclean(vnode_t *);
195 static void vrelel(vnode_t *, int);
196 static void vdrain_thread(void *);
197 static void vrele_thread(void *);
198 static void vnpanic(vnode_t *, const char *, ...)
199 __printflike(2, 3);
200 static void vwait(vnode_t *, int);
201
202 /* Routines having to do with the management of the vnode table. */
203 extern int (**dead_vnodeop_p)(void *);
204 extern struct vfsops dead_vfsops;
205
206 void
207 vfs_vnode_sysinit(void)
208 {
209 int error __diagused;
210
211 vnode_cache = pool_cache_init(sizeof(vnode_t), 0, 0, 0, "vnodepl",
212 NULL, IPL_NONE, NULL, NULL, NULL);
213 KASSERT(vnode_cache != NULL);
214
215 dead_mount = vfs_mountalloc(&dead_vfsops, NULL);
216 KASSERT(dead_mount != NULL);
217 dead_mount->mnt_iflag = IMNT_MPSAFE;
218
219 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
220 TAILQ_INIT(&vnode_free_list);
221 TAILQ_INIT(&vnode_hold_list);
222 TAILQ_INIT(&vrele_list);
223
224 vcache_init();
225
226 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
227 cv_init(&vdrain_cv, "vdrain");
228 cv_init(&vrele_cv, "vrele");
229 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread,
230 NULL, NULL, "vdrain");
231 KASSERT(error == 0);
232 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
233 NULL, &vrele_lwp, "vrele");
234 KASSERT(error == 0);
235 }
236
237 /*
238 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a
239 * marker vnode.
240 */
241 vnode_t *
242 vnalloc(struct mount *mp)
243 {
244 vnode_t *vp;
245
246 vp = pool_cache_get(vnode_cache, PR_WAITOK);
247 KASSERT(vp != NULL);
248
249 memset(vp, 0, sizeof(*vp));
250 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0);
251 cv_init(&vp->v_cv, "vnode");
252 /*
253 * Done by memset() above.
254 * LIST_INIT(&vp->v_nclist);
255 * LIST_INIT(&vp->v_dnclist);
256 */
257
258 if (mp != NULL) {
259 vp->v_mount = mp;
260 vp->v_type = VBAD;
261 vp->v_iflag = VI_MARKER;
262 return vp;
263 }
264
265 mutex_enter(&vnode_free_list_lock);
266 numvnodes++;
267 if (numvnodes > desiredvnodes + desiredvnodes / 10)
268 cv_signal(&vdrain_cv);
269 mutex_exit(&vnode_free_list_lock);
270
271 rw_init(&vp->v_lock);
272 vp->v_usecount = 1;
273 vp->v_type = VNON;
274 vp->v_size = vp->v_writesize = VSIZENOTSET;
275
276 return vp;
277 }
278
279 /*
280 * Free an unused, unreferenced vnode.
281 */
282 void
283 vnfree(vnode_t *vp)
284 {
285
286 KASSERT(vp->v_usecount == 0);
287
288 if ((vp->v_iflag & VI_MARKER) == 0) {
289 rw_destroy(&vp->v_lock);
290 mutex_enter(&vnode_free_list_lock);
291 numvnodes--;
292 mutex_exit(&vnode_free_list_lock);
293 }
294
295 /*
296 * Note: the vnode interlock will either be freed, of reference
297 * dropped (if VI_LOCKSHARE was in use).
298 */
299 uvm_obj_destroy(&vp->v_uobj, true);
300 cv_destroy(&vp->v_cv);
301 pool_cache_put(vnode_cache, vp);
302 }
303
304 /*
305 * cleanvnode: grab a vnode from freelist, clean and free it.
306 *
307 * => Releases vnode_free_list_lock.
308 */
309 static int
310 cleanvnode(void)
311 {
312 vnode_t *vp;
313 vnodelst_t *listhd;
314 struct mount *mp;
315
316 KASSERT(mutex_owned(&vnode_free_list_lock));
317
318 listhd = &vnode_free_list;
319 try_nextlist:
320 TAILQ_FOREACH(vp, listhd, v_freelist) {
321 /*
322 * It's safe to test v_usecount and v_iflag
323 * without holding the interlock here, since
324 * these vnodes should never appear on the
325 * lists.
326 */
327 KASSERT(vp->v_usecount == 0);
328 KASSERT((vp->v_iflag & VI_CLEAN) == 0);
329 KASSERT(vp->v_freelisthd == listhd);
330
331 if (!mutex_tryenter(vp->v_interlock))
332 continue;
333 if ((vp->v_iflag & VI_XLOCK) != 0) {
334 mutex_exit(vp->v_interlock);
335 continue;
336 }
337 mp = vp->v_mount;
338 if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) {
339 mutex_exit(vp->v_interlock);
340 continue;
341 }
342 break;
343 }
344
345 if (vp == NULL) {
346 if (listhd == &vnode_free_list) {
347 listhd = &vnode_hold_list;
348 goto try_nextlist;
349 }
350 mutex_exit(&vnode_free_list_lock);
351 return EBUSY;
352 }
353
354 /* Remove it from the freelist. */
355 TAILQ_REMOVE(listhd, vp, v_freelist);
356 vp->v_freelisthd = NULL;
357 mutex_exit(&vnode_free_list_lock);
358
359 KASSERT(vp->v_usecount == 0);
360
361 /*
362 * The vnode is still associated with a file system, so we must
363 * clean it out before freeing it. We need to add a reference
364 * before doing this.
365 */
366 vp->v_usecount = 1;
367 KASSERT((vp->v_iflag & VI_CHANGING) == 0);
368 vp->v_iflag |= VI_CHANGING;
369 vclean(vp);
370 vrelel(vp, VRELEL_CHANGING_SET);
371 fstrans_done(mp);
372
373 return 0;
374 }
375
376 /*
377 * getnewvnode: return a fresh vnode.
378 *
379 * => Returns referenced vnode, moved into the mount queue.
380 * => Shares the interlock specified by 'slock', if it is not NULL.
381 */
382 int
383 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
384 kmutex_t *slock, vnode_t **vpp)
385 {
386 struct uvm_object *uobj __diagused;
387 vnode_t *vp;
388 int error = 0;
389
390 if (mp != NULL) {
391 /*
392 * Mark filesystem busy while we are creating a vnode.
393 * If unmount is in progress, this will fail.
394 */
395 error = vfs_busy(mp, NULL);
396 if (error)
397 return error;
398 }
399
400 vp = NULL;
401
402 /* Allocate a new vnode. */
403 vp = vnalloc(NULL);
404
405 KASSERT(vp->v_freelisthd == NULL);
406 KASSERT(LIST_EMPTY(&vp->v_nclist));
407 KASSERT(LIST_EMPTY(&vp->v_dnclist));
408 KASSERT(vp->v_data == NULL);
409
410 /* Initialize vnode. */
411 vp->v_tag = tag;
412 vp->v_op = vops;
413
414 uobj = &vp->v_uobj;
415 KASSERT(uobj->pgops == &uvm_vnodeops);
416 KASSERT(uobj->uo_npages == 0);
417 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
418
419 /* Share the vnode_t::v_interlock, if requested. */
420 if (slock) {
421 /* Set the interlock and mark that it is shared. */
422 KASSERT(vp->v_mount == NULL);
423 mutex_obj_hold(slock);
424 uvm_obj_setlock(&vp->v_uobj, slock);
425 KASSERT(vp->v_interlock == slock);
426 vp->v_iflag |= VI_LOCKSHARE;
427 }
428
429 /* Finally, move vnode into the mount queue. */
430 vfs_insmntque(vp, mp);
431
432 if (mp != NULL) {
433 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
434 vp->v_vflag |= VV_MPSAFE;
435 vfs_unbusy(mp, true, NULL);
436 }
437
438 *vpp = vp;
439 return 0;
440 }
441
442 /*
443 * This is really just the reverse of getnewvnode(). Needed for
444 * VFS_VGET functions who may need to push back a vnode in case
445 * of a locking race.
446 */
447 void
448 ungetnewvnode(vnode_t *vp)
449 {
450
451 KASSERT(vp->v_usecount == 1);
452 KASSERT(vp->v_data == NULL);
453 KASSERT(vp->v_freelisthd == NULL);
454
455 mutex_enter(vp->v_interlock);
456 vp->v_iflag |= VI_CLEAN;
457 vrelel(vp, 0);
458 }
459
460 /*
461 * Helper thread to keep the number of vnodes below desiredvnodes.
462 */
463 static void
464 vdrain_thread(void *cookie)
465 {
466 int error;
467
468 mutex_enter(&vnode_free_list_lock);
469
470 for (;;) {
471 cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz);
472 while (numvnodes > desiredvnodes) {
473 error = cleanvnode();
474 if (error)
475 kpause("vndsbusy", false, hz, NULL);
476 mutex_enter(&vnode_free_list_lock);
477 if (error)
478 break;
479 }
480 }
481 }
482
483 /*
484 * Remove a vnode from its freelist.
485 */
486 void
487 vremfree(vnode_t *vp)
488 {
489
490 KASSERT(mutex_owned(vp->v_interlock));
491 KASSERT(vp->v_usecount == 0);
492
493 /*
494 * Note that the reference count must not change until
495 * the vnode is removed.
496 */
497 mutex_enter(&vnode_free_list_lock);
498 if (vp->v_holdcnt > 0) {
499 KASSERT(vp->v_freelisthd == &vnode_hold_list);
500 } else {
501 KASSERT(vp->v_freelisthd == &vnode_free_list);
502 }
503 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
504 vp->v_freelisthd = NULL;
505 mutex_exit(&vnode_free_list_lock);
506 }
507
508 /*
509 * vget: get a particular vnode from the free list, increment its reference
510 * count and lock it.
511 *
512 * => Should be called with v_interlock held.
513 *
514 * If VI_CHANGING is set, the vnode may be eliminated in vgone()/vclean().
515 * In that case, we cannot grab the vnode, so the process is awakened when
516 * the transition is completed, and an error returned to indicate that the
517 * vnode is no longer usable.
518 */
519 int
520 vget(vnode_t *vp, int flags)
521 {
522 int error = 0;
523
524 KASSERT((vp->v_iflag & VI_MARKER) == 0);
525 KASSERT(mutex_owned(vp->v_interlock));
526 KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT)) == 0);
527
528 /*
529 * Before adding a reference, we must remove the vnode
530 * from its freelist.
531 */
532 if (vp->v_usecount == 0) {
533 vremfree(vp);
534 vp->v_usecount = 1;
535 } else {
536 atomic_inc_uint(&vp->v_usecount);
537 }
538
539 /*
540 * If the vnode is in the process of changing state we wait
541 * for the change to complete and take care not to return
542 * a clean vnode.
543 */
544 if ((vp->v_iflag & VI_CHANGING) != 0) {
545 if ((flags & LK_NOWAIT) != 0) {
546 vrelel(vp, 0);
547 return EBUSY;
548 }
549 vwait(vp, VI_CHANGING);
550 if ((vp->v_iflag & VI_CLEAN) != 0) {
551 vrelel(vp, 0);
552 return ENOENT;
553 }
554 }
555
556 /*
557 * Ok, we got it in good shape. Just locking left.
558 */
559 KASSERT((vp->v_iflag & VI_CLEAN) == 0);
560 mutex_exit(vp->v_interlock);
561 if (flags & (LK_EXCLUSIVE | LK_SHARED)) {
562 error = vn_lock(vp, flags);
563 if (error != 0) {
564 vrele(vp);
565 }
566 }
567 return error;
568 }
569
570 /*
571 * vput: unlock and release the reference.
572 */
573 void
574 vput(vnode_t *vp)
575 {
576
577 KASSERT((vp->v_iflag & VI_MARKER) == 0);
578
579 VOP_UNLOCK(vp);
580 vrele(vp);
581 }
582
583 /*
584 * Try to drop reference on a vnode. Abort if we are releasing the
585 * last reference. Note: this _must_ succeed if not the last reference.
586 */
587 static inline bool
588 vtryrele(vnode_t *vp)
589 {
590 u_int use, next;
591
592 for (use = vp->v_usecount;; use = next) {
593 if (use == 1) {
594 return false;
595 }
596 KASSERT(use > 1);
597 next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
598 if (__predict_true(next == use)) {
599 return true;
600 }
601 }
602 }
603
604 /*
605 * Vnode release. If reference count drops to zero, call inactive
606 * routine and either return to freelist or free to the pool.
607 */
608 static void
609 vrelel(vnode_t *vp, int flags)
610 {
611 bool recycle, defer;
612 int error;
613
614 KASSERT(mutex_owned(vp->v_interlock));
615 KASSERT((vp->v_iflag & VI_MARKER) == 0);
616 KASSERT(vp->v_freelisthd == NULL);
617
618 if (__predict_false(vp->v_op == dead_vnodeop_p &&
619 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
620 vnpanic(vp, "dead but not clean");
621 }
622
623 /*
624 * If not the last reference, just drop the reference count
625 * and unlock.
626 */
627 if (vtryrele(vp)) {
628 if ((flags & VRELEL_CHANGING_SET) != 0) {
629 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
630 vp->v_iflag &= ~VI_CHANGING;
631 cv_broadcast(&vp->v_cv);
632 }
633 mutex_exit(vp->v_interlock);
634 return;
635 }
636 if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
637 vnpanic(vp, "%s: bad ref count", __func__);
638 }
639
640 KASSERT((vp->v_iflag & VI_XLOCK) == 0);
641
642 #ifdef DIAGNOSTIC
643 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
644 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
645 vprint("vrelel: missing VOP_CLOSE()", vp);
646 }
647 #endif
648
649 /*
650 * If not clean, deactivate the vnode, but preserve
651 * our reference across the call to VOP_INACTIVE().
652 */
653 if ((vp->v_iflag & VI_CLEAN) == 0) {
654 recycle = false;
655
656 /*
657 * XXX This ugly block can be largely eliminated if
658 * locking is pushed down into the file systems.
659 *
660 * Defer vnode release to vrele_thread if caller
661 * requests it explicitly or is the pagedaemon.
662 */
663 if ((curlwp == uvm.pagedaemon_lwp) ||
664 (flags & VRELEL_ASYNC_RELE) != 0) {
665 defer = true;
666 } else if (curlwp == vrele_lwp) {
667 /*
668 * We have to try harder.
669 */
670 mutex_exit(vp->v_interlock);
671 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
672 KASSERT(error == 0);
673 mutex_enter(vp->v_interlock);
674 defer = false;
675 } else {
676 /* If we can't acquire the lock, then defer. */
677 mutex_exit(vp->v_interlock);
678 error = vn_lock(vp,
679 LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT);
680 defer = (error != 0);
681 mutex_enter(vp->v_interlock);
682 }
683
684 KASSERT(mutex_owned(vp->v_interlock));
685 KASSERT(! (curlwp == vrele_lwp && defer));
686
687 if (defer) {
688 /*
689 * Defer reclaim to the kthread; it's not safe to
690 * clean it here. We donate it our last reference.
691 */
692 if ((flags & VRELEL_CHANGING_SET) != 0) {
693 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
694 vp->v_iflag &= ~VI_CHANGING;
695 cv_broadcast(&vp->v_cv);
696 }
697 mutex_enter(&vrele_lock);
698 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
699 if (++vrele_pending > (desiredvnodes >> 8))
700 cv_signal(&vrele_cv);
701 mutex_exit(&vrele_lock);
702 mutex_exit(vp->v_interlock);
703 return;
704 }
705
706 /*
707 * If the node got another reference while we
708 * released the interlock, don't try to inactivate it yet.
709 */
710 if (__predict_false(vtryrele(vp))) {
711 VOP_UNLOCK(vp);
712 if ((flags & VRELEL_CHANGING_SET) != 0) {
713 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
714 vp->v_iflag &= ~VI_CHANGING;
715 cv_broadcast(&vp->v_cv);
716 }
717 mutex_exit(vp->v_interlock);
718 return;
719 }
720
721 if ((flags & VRELEL_CHANGING_SET) == 0) {
722 KASSERT((vp->v_iflag & VI_CHANGING) == 0);
723 vp->v_iflag |= VI_CHANGING;
724 }
725 mutex_exit(vp->v_interlock);
726
727 /*
728 * The vnode can gain another reference while being
729 * deactivated. If VOP_INACTIVE() indicates that
730 * the described file has been deleted, then recycle
731 * the vnode irrespective of additional references.
732 * Another thread may be waiting to re-use the on-disk
733 * inode.
734 *
735 * Note that VOP_INACTIVE() will drop the vnode lock.
736 */
737 VOP_INACTIVE(vp, &recycle);
738 mutex_enter(vp->v_interlock);
739 if (!recycle) {
740 if (vtryrele(vp)) {
741 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
742 vp->v_iflag &= ~VI_CHANGING;
743 cv_broadcast(&vp->v_cv);
744 mutex_exit(vp->v_interlock);
745 return;
746 }
747 }
748
749 /* Take care of space accounting. */
750 if (vp->v_iflag & VI_EXECMAP) {
751 atomic_add_int(&uvmexp.execpages,
752 -vp->v_uobj.uo_npages);
753 atomic_add_int(&uvmexp.filepages,
754 vp->v_uobj.uo_npages);
755 }
756 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
757 vp->v_vflag &= ~VV_MAPPED;
758
759 /*
760 * Recycle the vnode if the file is now unused (unlinked),
761 * otherwise just free it.
762 */
763 if (recycle) {
764 vclean(vp);
765 }
766 KASSERT(vp->v_usecount > 0);
767 } else { /* vnode was already clean */
768 if ((flags & VRELEL_CHANGING_SET) == 0) {
769 KASSERT((vp->v_iflag & VI_CHANGING) == 0);
770 vp->v_iflag |= VI_CHANGING;
771 }
772 }
773
774 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
775 /* Gained another reference while being reclaimed. */
776 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
777 vp->v_iflag &= ~VI_CHANGING;
778 cv_broadcast(&vp->v_cv);
779 mutex_exit(vp->v_interlock);
780 return;
781 }
782
783 if ((vp->v_iflag & VI_CLEAN) != 0) {
784 /*
785 * It's clean so destroy it. It isn't referenced
786 * anywhere since it has been reclaimed.
787 */
788 KASSERT(vp->v_holdcnt == 0);
789 KASSERT(vp->v_writecount == 0);
790 mutex_exit(vp->v_interlock);
791 vfs_insmntque(vp, NULL);
792 if (vp->v_type == VBLK || vp->v_type == VCHR) {
793 spec_node_destroy(vp);
794 }
795 vnfree(vp);
796 } else {
797 /*
798 * Otherwise, put it back onto the freelist. It
799 * can't be destroyed while still associated with
800 * a file system.
801 */
802 mutex_enter(&vnode_free_list_lock);
803 if (vp->v_holdcnt > 0) {
804 vp->v_freelisthd = &vnode_hold_list;
805 } else {
806 vp->v_freelisthd = &vnode_free_list;
807 }
808 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
809 mutex_exit(&vnode_free_list_lock);
810 KASSERT((vp->v_iflag & VI_CHANGING) != 0);
811 vp->v_iflag &= ~VI_CHANGING;
812 cv_broadcast(&vp->v_cv);
813 mutex_exit(vp->v_interlock);
814 }
815 }
816
817 void
818 vrele(vnode_t *vp)
819 {
820
821 KASSERT((vp->v_iflag & VI_MARKER) == 0);
822
823 if (vtryrele(vp)) {
824 return;
825 }
826 mutex_enter(vp->v_interlock);
827 vrelel(vp, 0);
828 }
829
830 /*
831 * Asynchronous vnode release, vnode is released in different context.
832 */
833 void
834 vrele_async(vnode_t *vp)
835 {
836
837 KASSERT((vp->v_iflag & VI_MARKER) == 0);
838
839 if (vtryrele(vp)) {
840 return;
841 }
842 mutex_enter(vp->v_interlock);
843 vrelel(vp, VRELEL_ASYNC_RELE);
844 }
845
846 static void
847 vrele_thread(void *cookie)
848 {
849 vnodelst_t skip_list;
850 vnode_t *vp;
851 struct mount *mp;
852
853 TAILQ_INIT(&skip_list);
854
855 mutex_enter(&vrele_lock);
856 for (;;) {
857 while (TAILQ_EMPTY(&vrele_list)) {
858 vrele_gen++;
859 cv_broadcast(&vrele_cv);
860 cv_timedwait(&vrele_cv, &vrele_lock, hz);
861 TAILQ_CONCAT(&vrele_list, &skip_list, v_freelist);
862 }
863 vp = TAILQ_FIRST(&vrele_list);
864 mp = vp->v_mount;
865 TAILQ_REMOVE(&vrele_list, vp, v_freelist);
866 if (fstrans_start_nowait(mp, FSTRANS_LAZY) != 0) {
867 TAILQ_INSERT_TAIL(&skip_list, vp, v_freelist);
868 continue;
869 }
870 vrele_pending--;
871 mutex_exit(&vrele_lock);
872
873 /*
874 * If not the last reference, then ignore the vnode
875 * and look for more work.
876 */
877 mutex_enter(vp->v_interlock);
878 vrelel(vp, 0);
879 fstrans_done(mp);
880 mutex_enter(&vrele_lock);
881 }
882 }
883
884 void
885 vrele_flush(void)
886 {
887 int gen;
888
889 mutex_enter(&vrele_lock);
890 gen = vrele_gen;
891 while (vrele_pending && gen == vrele_gen) {
892 cv_broadcast(&vrele_cv);
893 cv_wait(&vrele_cv, &vrele_lock);
894 }
895 mutex_exit(&vrele_lock);
896 }
897
898 /*
899 * Vnode reference, where a reference is already held by some other
900 * object (for example, a file structure).
901 */
902 void
903 vref(vnode_t *vp)
904 {
905
906 KASSERT((vp->v_iflag & VI_MARKER) == 0);
907 KASSERT(vp->v_usecount != 0);
908
909 atomic_inc_uint(&vp->v_usecount);
910 }
911
912 /*
913 * Page or buffer structure gets a reference.
914 * Called with v_interlock held.
915 */
916 void
917 vholdl(vnode_t *vp)
918 {
919
920 KASSERT(mutex_owned(vp->v_interlock));
921 KASSERT((vp->v_iflag & VI_MARKER) == 0);
922
923 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
924 mutex_enter(&vnode_free_list_lock);
925 KASSERT(vp->v_freelisthd == &vnode_free_list);
926 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
927 vp->v_freelisthd = &vnode_hold_list;
928 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
929 mutex_exit(&vnode_free_list_lock);
930 }
931 }
932
933 /*
934 * Page or buffer structure frees a reference.
935 * Called with v_interlock held.
936 */
937 void
938 holdrelel(vnode_t *vp)
939 {
940
941 KASSERT(mutex_owned(vp->v_interlock));
942 KASSERT((vp->v_iflag & VI_MARKER) == 0);
943
944 if (vp->v_holdcnt <= 0) {
945 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp);
946 }
947
948 vp->v_holdcnt--;
949 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
950 mutex_enter(&vnode_free_list_lock);
951 KASSERT(vp->v_freelisthd == &vnode_hold_list);
952 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
953 vp->v_freelisthd = &vnode_free_list;
954 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
955 mutex_exit(&vnode_free_list_lock);
956 }
957 }
958
959 /*
960 * Disassociate the underlying file system from a vnode.
961 *
962 * Must be called with the interlock held, and will return with it held.
963 */
964 static void
965 vclean(vnode_t *vp)
966 {
967 lwp_t *l = curlwp;
968 bool recycle, active, doclose;
969 int error;
970
971 KASSERT(mutex_owned(vp->v_interlock));
972 KASSERT((vp->v_iflag & VI_MARKER) == 0);
973 KASSERT(vp->v_usecount != 0);
974
975 /* If already clean, nothing to do. */
976 if ((vp->v_iflag & VI_CLEAN) != 0) {
977 return;
978 }
979
980 active = (vp->v_usecount > 1);
981 doclose = ! (active && vp->v_type == VBLK &&
982 spec_node_getmountedfs(vp) != NULL);
983 mutex_exit(vp->v_interlock);
984
985 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
986
987 /*
988 * Prevent the vnode from being recycled or brought into use
989 * while we clean it out.
990 */
991 mutex_enter(vp->v_interlock);
992 KASSERT((vp->v_iflag & (VI_XLOCK | VI_CLEAN)) == 0);
993 vp->v_iflag |= VI_XLOCK;
994 if (vp->v_iflag & VI_EXECMAP) {
995 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
996 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
997 }
998 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
999 mutex_exit(vp->v_interlock);
1000
1001 /*
1002 * Clean out any cached data associated with the vnode.
1003 * If purging an active vnode, it must be closed and
1004 * deactivated before being reclaimed. Note that the
1005 * VOP_INACTIVE will unlock the vnode.
1006 */
1007 if (doclose) {
1008 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1009 if (error != 0) {
1010 if (wapbl_vphaswapbl(vp))
1011 WAPBL_DISCARD(wapbl_vptomp(vp));
1012 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1013 }
1014 KASSERT(error == 0);
1015 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1016 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
1017 spec_node_revoke(vp);
1018 }
1019 }
1020 if (active) {
1021 VOP_INACTIVE(vp, &recycle);
1022 } else {
1023 /*
1024 * Any other processes trying to obtain this lock must first
1025 * wait for VI_XLOCK to clear, then call the new lock operation.
1026 */
1027 VOP_UNLOCK(vp);
1028 }
1029
1030 /* Disassociate the underlying file system from the vnode. */
1031 if (VOP_RECLAIM(vp)) {
1032 vnpanic(vp, "%s: cannot reclaim", __func__);
1033 }
1034
1035 KASSERT(vp->v_data == NULL);
1036 KASSERT(vp->v_uobj.uo_npages == 0);
1037
1038 if (vp->v_type == VREG && vp->v_ractx != NULL) {
1039 uvm_ra_freectx(vp->v_ractx);
1040 vp->v_ractx = NULL;
1041 }
1042
1043 /* Purge name cache. */
1044 cache_purge(vp);
1045
1046 /* Move to dead mount. */
1047 vp->v_vflag &= ~VV_ROOT;
1048 atomic_inc_uint(&dead_mount->mnt_refcnt);
1049 vfs_insmntque(vp, dead_mount);
1050
1051 /* Done with purge, notify sleepers of the grim news. */
1052 mutex_enter(vp->v_interlock);
1053 if (doclose) {
1054 vp->v_op = dead_vnodeop_p;
1055 vp->v_vflag |= VV_LOCKSWORK;
1056 vp->v_iflag |= VI_CLEAN;
1057 } else {
1058 vp->v_op = spec_vnodeop_p;
1059 vp->v_vflag &= ~VV_LOCKSWORK;
1060 }
1061 vp->v_tag = VT_NON;
1062 KNOTE(&vp->v_klist, NOTE_REVOKE);
1063 vp->v_iflag &= ~VI_XLOCK;
1064 cv_broadcast(&vp->v_cv);
1065
1066 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1067 }
1068
1069 /*
1070 * Recycle an unused vnode if caller holds the last reference.
1071 */
1072 bool
1073 vrecycle(vnode_t *vp)
1074 {
1075
1076 mutex_enter(vp->v_interlock);
1077
1078 KASSERT((vp->v_iflag & VI_MARKER) == 0);
1079
1080 if (vp->v_usecount != 1) {
1081 mutex_exit(vp->v_interlock);
1082 return false;
1083 }
1084 if ((vp->v_iflag & VI_CHANGING) != 0)
1085 vwait(vp, VI_CHANGING);
1086 if (vp->v_usecount != 1) {
1087 mutex_exit(vp->v_interlock);
1088 return false;
1089 } else if ((vp->v_iflag & VI_CLEAN) != 0) {
1090 mutex_exit(vp->v_interlock);
1091 return true;
1092 }
1093 vp->v_iflag |= VI_CHANGING;
1094 vclean(vp);
1095 vrelel(vp, VRELEL_CHANGING_SET);
1096 return true;
1097 }
1098
1099 /*
1100 * Eliminate all activity associated with the requested vnode
1101 * and with all vnodes aliased to the requested vnode.
1102 */
1103 void
1104 vrevoke(vnode_t *vp)
1105 {
1106 vnode_t *vq;
1107 enum vtype type;
1108 dev_t dev;
1109
1110 KASSERT(vp->v_usecount > 0);
1111
1112 mutex_enter(vp->v_interlock);
1113 if ((vp->v_iflag & VI_CLEAN) != 0) {
1114 mutex_exit(vp->v_interlock);
1115 return;
1116 } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
1117 atomic_inc_uint(&vp->v_usecount);
1118 mutex_exit(vp->v_interlock);
1119 vgone(vp);
1120 return;
1121 } else {
1122 dev = vp->v_rdev;
1123 type = vp->v_type;
1124 mutex_exit(vp->v_interlock);
1125 }
1126
1127 while (spec_node_lookup_by_dev(type, dev, &vq) == 0) {
1128 vgone(vq);
1129 }
1130 }
1131
1132 /*
1133 * Eliminate all activity associated with a vnode in preparation for
1134 * reuse. Drops a reference from the vnode.
1135 */
1136 void
1137 vgone(vnode_t *vp)
1138 {
1139
1140 mutex_enter(vp->v_interlock);
1141 if ((vp->v_iflag & VI_CHANGING) != 0)
1142 vwait(vp, VI_CHANGING);
1143 vp->v_iflag |= VI_CHANGING;
1144 vclean(vp);
1145 vrelel(vp, VRELEL_CHANGING_SET);
1146 }
1147
1148 static inline uint32_t
1149 vcache_hash(const struct vcache_key *key)
1150 {
1151 uint32_t hash = HASH32_BUF_INIT;
1152
1153 hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash);
1154 hash = hash32_buf(key->vk_key, key->vk_key_len, hash);
1155 return hash;
1156 }
1157
1158 static void
1159 vcache_init(void)
1160 {
1161
1162 vcache.pool = pool_cache_init(sizeof(struct vcache_node), 0, 0, 0,
1163 "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL);
1164 KASSERT(vcache.pool != NULL);
1165 mutex_init(&vcache.lock, MUTEX_DEFAULT, IPL_NONE);
1166 vcache.hashtab = hashinit(desiredvnodes, HASH_SLIST, true,
1167 &vcache.hashmask);
1168 }
1169
1170 static void
1171 vcache_reinit(void)
1172 {
1173 int i;
1174 uint32_t hash;
1175 u_long oldmask, newmask;
1176 struct hashhead *oldtab, *newtab;
1177 struct vcache_node *node;
1178
1179 newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask);
1180 mutex_enter(&vcache.lock);
1181 oldtab = vcache.hashtab;
1182 oldmask = vcache.hashmask;
1183 vcache.hashtab = newtab;
1184 vcache.hashmask = newmask;
1185 for (i = 0; i <= oldmask; i++) {
1186 while ((node = SLIST_FIRST(&oldtab[i])) != NULL) {
1187 SLIST_REMOVE(&oldtab[i], node, vcache_node, vn_hash);
1188 hash = vcache_hash(&node->vn_key);
1189 SLIST_INSERT_HEAD(&newtab[hash & vcache.hashmask],
1190 node, vn_hash);
1191 }
1192 }
1193 mutex_exit(&vcache.lock);
1194 hashdone(oldtab, HASH_SLIST, oldmask);
1195 }
1196
1197 static inline struct vcache_node *
1198 vcache_hash_lookup(const struct vcache_key *key, uint32_t hash)
1199 {
1200 struct hashhead *hashp;
1201 struct vcache_node *node;
1202
1203 KASSERT(mutex_owned(&vcache.lock));
1204
1205 hashp = &vcache.hashtab[hash & vcache.hashmask];
1206 SLIST_FOREACH(node, hashp, vn_hash) {
1207 if (key->vk_mount != node->vn_key.vk_mount)
1208 continue;
1209 if (key->vk_key_len != node->vn_key.vk_key_len)
1210 continue;
1211 if (memcmp(key->vk_key, node->vn_key.vk_key, key->vk_key_len))
1212 continue;
1213 return node;
1214 }
1215 return NULL;
1216 }
1217
1218 /*
1219 * Get a vnode / fs node pair by key and return it referenced through vpp.
1220 */
1221 int
1222 vcache_get(struct mount *mp, const void *key, size_t key_len,
1223 struct vnode **vpp)
1224 {
1225 int error;
1226 uint32_t hash;
1227 const void *new_key;
1228 struct vnode *vp;
1229 struct vcache_key vcache_key;
1230 struct vcache_node *node, *new_node;
1231
1232 new_key = NULL;
1233 *vpp = NULL;
1234
1235 vcache_key.vk_mount = mp;
1236 vcache_key.vk_key = key;
1237 vcache_key.vk_key_len = key_len;
1238 hash = vcache_hash(&vcache_key);
1239
1240 again:
1241 mutex_enter(&vcache.lock);
1242 node = vcache_hash_lookup(&vcache_key, hash);
1243
1244 /* If found, take a reference or retry. */
1245 if (__predict_true(node != NULL && node->vn_vnode != NULL)) {
1246 vp = node->vn_vnode;
1247 mutex_enter(vp->v_interlock);
1248 mutex_exit(&vcache.lock);
1249 error = vget(vp, 0);
1250 if (error == ENOENT)
1251 goto again;
1252 if (error == 0)
1253 *vpp = vp;
1254 KASSERT((error != 0) == (*vpp == NULL));
1255 return error;
1256 }
1257
1258 /* If another thread loads this node, wait and retry. */
1259 if (node != NULL) {
1260 KASSERT(node->vn_vnode == NULL);
1261 mutex_exit(&vcache.lock);
1262 kpause("vcache", false, mstohz(20), NULL);
1263 goto again;
1264 }
1265 mutex_exit(&vcache.lock);
1266
1267 /* Allocate and initialize a new vcache / vnode pair. */
1268 error = vfs_busy(mp, NULL);
1269 if (error)
1270 return error;
1271 new_node = pool_cache_get(vcache.pool, PR_WAITOK);
1272 new_node->vn_vnode = NULL;
1273 new_node->vn_key = vcache_key;
1274 vp = vnalloc(NULL);
1275 mutex_enter(&vcache.lock);
1276 node = vcache_hash_lookup(&vcache_key, hash);
1277 if (node == NULL) {
1278 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask],
1279 new_node, vn_hash);
1280 node = new_node;
1281 }
1282 mutex_exit(&vcache.lock);
1283
1284 /* If another thread beat us inserting this node, retry. */
1285 if (node != new_node) {
1286 pool_cache_put(vcache.pool, new_node);
1287 KASSERT(vp->v_usecount == 1);
1288 vp->v_usecount = 0;
1289 vnfree(vp);
1290 vfs_unbusy(mp, false, NULL);
1291 goto again;
1292 }
1293
1294 /* Load the fs node. Exclusive as new_node->vn_vnode is NULL. */
1295 vp->v_iflag |= VI_CHANGING;
1296 error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key);
1297 if (error) {
1298 mutex_enter(&vcache.lock);
1299 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask],
1300 new_node, vcache_node, vn_hash);
1301 mutex_exit(&vcache.lock);
1302 pool_cache_put(vcache.pool, new_node);
1303 KASSERT(vp->v_usecount == 1);
1304 vp->v_usecount = 0;
1305 vnfree(vp);
1306 vfs_unbusy(mp, false, NULL);
1307 KASSERT(*vpp == NULL);
1308 return error;
1309 }
1310 KASSERT(new_key != NULL);
1311 KASSERT(memcmp(key, new_key, key_len) == 0);
1312 KASSERT(vp->v_op != NULL);
1313 vfs_insmntque(vp, mp);
1314 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
1315 vp->v_vflag |= VV_MPSAFE;
1316 vfs_unbusy(mp, true, NULL);
1317
1318 /* Finished loading, finalize node. */
1319 mutex_enter(&vcache.lock);
1320 new_node->vn_key.vk_key = new_key;
1321 new_node->vn_vnode = vp;
1322 mutex_exit(&vcache.lock);
1323 mutex_enter(vp->v_interlock);
1324 vp->v_iflag &= ~VI_CHANGING;
1325 cv_broadcast(&vp->v_cv);
1326 mutex_exit(vp->v_interlock);
1327 *vpp = vp;
1328 return 0;
1329 }
1330
1331 /*
1332 * Prepare key change: lock old and new cache node.
1333 * Return an error if the new node already exists.
1334 */
1335 int
1336 vcache_rekey_enter(struct mount *mp, struct vnode *vp,
1337 const void *old_key, size_t old_key_len,
1338 const void *new_key, size_t new_key_len)
1339 {
1340 uint32_t old_hash, new_hash;
1341 struct vcache_key old_vcache_key, new_vcache_key;
1342 struct vcache_node *node, *new_node;
1343
1344 old_vcache_key.vk_mount = mp;
1345 old_vcache_key.vk_key = old_key;
1346 old_vcache_key.vk_key_len = old_key_len;
1347 old_hash = vcache_hash(&old_vcache_key);
1348
1349 new_vcache_key.vk_mount = mp;
1350 new_vcache_key.vk_key = new_key;
1351 new_vcache_key.vk_key_len = new_key_len;
1352 new_hash = vcache_hash(&new_vcache_key);
1353
1354 new_node = pool_cache_get(vcache.pool, PR_WAITOK);
1355 new_node->vn_vnode = NULL;
1356 new_node->vn_key = new_vcache_key;
1357
1358 mutex_enter(&vcache.lock);
1359 node = vcache_hash_lookup(&new_vcache_key, new_hash);
1360 if (node != NULL) {
1361 mutex_exit(&vcache.lock);
1362 pool_cache_put(vcache.pool, new_node);
1363 return EEXIST;
1364 }
1365 SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask],
1366 new_node, vn_hash);
1367 node = vcache_hash_lookup(&old_vcache_key, old_hash);
1368 KASSERT(node != NULL);
1369 KASSERT(node->vn_vnode == vp);
1370 node->vn_vnode = NULL;
1371 node->vn_key = old_vcache_key;
1372 mutex_exit(&vcache.lock);
1373 return 0;
1374 }
1375
1376 /*
1377 * Key change complete: remove old node and unlock new node.
1378 */
1379 void
1380 vcache_rekey_exit(struct mount *mp, struct vnode *vp,
1381 const void *old_key, size_t old_key_len,
1382 const void *new_key, size_t new_key_len)
1383 {
1384 uint32_t old_hash, new_hash;
1385 struct vcache_key old_vcache_key, new_vcache_key;
1386 struct vcache_node *node;
1387
1388 old_vcache_key.vk_mount = mp;
1389 old_vcache_key.vk_key = old_key;
1390 old_vcache_key.vk_key_len = old_key_len;
1391 old_hash = vcache_hash(&old_vcache_key);
1392
1393 new_vcache_key.vk_mount = mp;
1394 new_vcache_key.vk_key = new_key;
1395 new_vcache_key.vk_key_len = new_key_len;
1396 new_hash = vcache_hash(&new_vcache_key);
1397
1398 mutex_enter(&vcache.lock);
1399 node = vcache_hash_lookup(&new_vcache_key, new_hash);
1400 KASSERT(node != NULL && node->vn_vnode == NULL);
1401 KASSERT(node->vn_key.vk_key_len == new_key_len);
1402 node->vn_vnode = vp;
1403 node->vn_key = new_vcache_key;
1404 node = vcache_hash_lookup(&old_vcache_key, old_hash);
1405 KASSERT(node != NULL);
1406 KASSERT(node->vn_vnode == NULL);
1407 SLIST_REMOVE(&vcache.hashtab[old_hash & vcache.hashmask],
1408 node, vcache_node, vn_hash);
1409 mutex_exit(&vcache.lock);
1410 pool_cache_put(vcache.pool, node);
1411 }
1412
1413 /*
1414 * Remove a vnode / fs node pair from the cache.
1415 */
1416 void
1417 vcache_remove(struct mount *mp, const void *key, size_t key_len)
1418 {
1419 uint32_t hash;
1420 struct vcache_key vcache_key;
1421 struct vcache_node *node;
1422
1423 vcache_key.vk_mount = mp;
1424 vcache_key.vk_key = key;
1425 vcache_key.vk_key_len = key_len;
1426 hash = vcache_hash(&vcache_key);
1427
1428 mutex_enter(&vcache.lock);
1429 node = vcache_hash_lookup(&vcache_key, hash);
1430 KASSERT(node != NULL);
1431 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask],
1432 node, vcache_node, vn_hash);
1433 mutex_exit(&vcache.lock);
1434 pool_cache_put(vcache.pool, node);
1435 }
1436
1437 /*
1438 * Update outstanding I/O count and do wakeup if requested.
1439 */
1440 void
1441 vwakeup(struct buf *bp)
1442 {
1443 vnode_t *vp;
1444
1445 if ((vp = bp->b_vp) == NULL)
1446 return;
1447
1448 KASSERT(bp->b_objlock == vp->v_interlock);
1449 KASSERT(mutex_owned(bp->b_objlock));
1450
1451 if (--vp->v_numoutput < 0)
1452 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp);
1453 if (vp->v_numoutput == 0)
1454 cv_broadcast(&vp->v_cv);
1455 }
1456
1457 /*
1458 * Test a vnode for being or becoming dead. Returns one of:
1459 * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only.
1460 * ENOENT: vnode is dead.
1461 * 0: otherwise.
1462 *
1463 * Whenever this function returns a non-zero value all future
1464 * calls will also return a non-zero value.
1465 */
1466 int
1467 vdead_check(struct vnode *vp, int flags)
1468 {
1469
1470 KASSERT(mutex_owned(vp->v_interlock));
1471 if (ISSET(vp->v_iflag, VI_XLOCK)) {
1472 if (ISSET(flags, VDEAD_NOWAIT))
1473 return EBUSY;
1474 vwait(vp, VI_XLOCK);
1475 KASSERT(ISSET(vp->v_iflag, VI_CLEAN));
1476 }
1477 if (ISSET(vp->v_iflag, VI_CLEAN))
1478 return ENOENT;
1479 return 0;
1480 }
1481
1482 /*
1483 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
1484 * recycled.
1485 */
1486 static void
1487 vwait(vnode_t *vp, int flags)
1488 {
1489
1490 KASSERT(mutex_owned(vp->v_interlock));
1491 KASSERT(vp->v_usecount != 0);
1492
1493 while ((vp->v_iflag & flags) != 0)
1494 cv_wait(&vp->v_cv, vp->v_interlock);
1495 }
1496
1497 int
1498 vfs_drainvnodes(long target)
1499 {
1500 int error;
1501
1502 mutex_enter(&vnode_free_list_lock);
1503
1504 while (numvnodes > target) {
1505 error = cleanvnode();
1506 if (error != 0)
1507 return error;
1508 mutex_enter(&vnode_free_list_lock);
1509 }
1510
1511 mutex_exit(&vnode_free_list_lock);
1512
1513 vcache_reinit();
1514
1515 return 0;
1516 }
1517
1518 void
1519 vnpanic(vnode_t *vp, const char *fmt, ...)
1520 {
1521 va_list ap;
1522
1523 #ifdef DIAGNOSTIC
1524 vprint(NULL, vp);
1525 #endif
1526 va_start(ap, fmt);
1527 vpanic(fmt, ap);
1528 va_end(ap);
1529 }
1530