lfs_vnops.c revision 1.279 1 /* $NetBSD: lfs_vnops.c,v 1.279 2015/08/02 18:12:41 dholland Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31 /*
32 * Copyright (c) 1986, 1989, 1991, 1993, 1995
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95
60 */
61
62 /* from NetBSD: ufs_vnops.c,v 1.213 2013/06/08 05:47:02 kardel Exp */
63 /*-
64 * Copyright (c) 2008 The NetBSD Foundation, Inc.
65 * All rights reserved.
66 *
67 * This code is derived from software contributed to The NetBSD Foundation
68 * by Wasabi Systems, Inc.
69 *
70 * Redistribution and use in source and binary forms, with or without
71 * modification, are permitted provided that the following conditions
72 * are met:
73 * 1. Redistributions of source code must retain the above copyright
74 * notice, this list of conditions and the following disclaimer.
75 * 2. Redistributions in binary form must reproduce the above copyright
76 * notice, this list of conditions and the following disclaimer in the
77 * documentation and/or other materials provided with the distribution.
78 *
79 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
80 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
81 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
82 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
83 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
84 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
85 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
86 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
87 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
88 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
89 * POSSIBILITY OF SUCH DAMAGE.
90 */
91 /*
92 * Copyright (c) 1982, 1986, 1989, 1993, 1995
93 * The Regents of the University of California. All rights reserved.
94 * (c) UNIX System Laboratories, Inc.
95 * All or some portions of this file are derived from material licensed
96 * to the University of California by American Telephone and Telegraph
97 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
98 * the permission of UNIX System Laboratories, Inc.
99 *
100 * Redistribution and use in source and binary forms, with or without
101 * modification, are permitted provided that the following conditions
102 * are met:
103 * 1. Redistributions of source code must retain the above copyright
104 * notice, this list of conditions and the following disclaimer.
105 * 2. Redistributions in binary form must reproduce the above copyright
106 * notice, this list of conditions and the following disclaimer in the
107 * documentation and/or other materials provided with the distribution.
108 * 3. Neither the name of the University nor the names of its contributors
109 * may be used to endorse or promote products derived from this software
110 * without specific prior written permission.
111 *
112 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
113 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
114 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
115 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
116 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
117 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
118 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
119 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
120 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
121 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
122 * SUCH DAMAGE.
123 *
124 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95
125 */
126
127 #include <sys/cdefs.h>
128 __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.279 2015/08/02 18:12:41 dholland Exp $");
129
130 #ifdef _KERNEL_OPT
131 #include "opt_compat_netbsd.h"
132 #include "opt_uvm_page_trkown.h"
133 #endif
134
135 #include <sys/param.h>
136 #include <sys/systm.h>
137 #include <sys/namei.h>
138 #include <sys/resourcevar.h>
139 #include <sys/kernel.h>
140 #include <sys/file.h>
141 #include <sys/stat.h>
142 #include <sys/buf.h>
143 #include <sys/proc.h>
144 #include <sys/mount.h>
145 #include <sys/vnode.h>
146 #include <sys/pool.h>
147 #include <sys/signalvar.h>
148 #include <sys/kauth.h>
149 #include <sys/syslog.h>
150 #include <sys/fstrans.h>
151
152 #include <miscfs/fifofs/fifo.h>
153 #include <miscfs/genfs/genfs.h>
154 #include <miscfs/specfs/specdev.h>
155
156 #include <ufs/lfs/ulfs_inode.h>
157 #include <ufs/lfs/ulfsmount.h>
158 #include <ufs/lfs/ulfs_bswap.h>
159 #include <ufs/lfs/ulfs_extern.h>
160
161 #include <uvm/uvm.h>
162 #include <uvm/uvm_pmap.h>
163 #include <uvm/uvm_stat.h>
164 #include <uvm/uvm_pager.h>
165
166 #include <ufs/lfs/lfs.h>
167 #include <ufs/lfs/lfs_accessors.h>
168 #include <ufs/lfs/lfs_kernel.h>
169 #include <ufs/lfs/lfs_extern.h>
170
171 extern pid_t lfs_writer_daemon;
172 int lfs_ignore_lazy_sync = 1;
173
174 static int lfs_openextattr(void *v);
175 static int lfs_closeextattr(void *v);
176 static int lfs_getextattr(void *v);
177 static int lfs_setextattr(void *v);
178 static int lfs_listextattr(void *v);
179 static int lfs_deleteextattr(void *v);
180
181 /*
182 * A virgin directory (no blushing please).
183 */
184 static const struct lfs_dirtemplate mastertemplate = {
185 0, 12, LFS_DT_DIR, 1, ".",
186 0, LFS_DIRBLKSIZ - 12, LFS_DT_DIR, 2, ".."
187 };
188
189 /* Global vfs data structures for lfs. */
190 int (**lfs_vnodeop_p)(void *);
191 const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
192 { &vop_default_desc, vn_default_error },
193 { &vop_lookup_desc, ulfs_lookup }, /* lookup */
194 { &vop_create_desc, lfs_create }, /* create */
195 { &vop_whiteout_desc, ulfs_whiteout }, /* whiteout */
196 { &vop_mknod_desc, lfs_mknod }, /* mknod */
197 { &vop_open_desc, ulfs_open }, /* open */
198 { &vop_close_desc, lfs_close }, /* close */
199 { &vop_access_desc, ulfs_access }, /* access */
200 { &vop_getattr_desc, lfs_getattr }, /* getattr */
201 { &vop_setattr_desc, lfs_setattr }, /* setattr */
202 { &vop_read_desc, lfs_read }, /* read */
203 { &vop_write_desc, lfs_write }, /* write */
204 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */
205 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */
206 { &vop_ioctl_desc, ulfs_ioctl }, /* ioctl */
207 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */
208 { &vop_poll_desc, ulfs_poll }, /* poll */
209 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */
210 { &vop_revoke_desc, ulfs_revoke }, /* revoke */
211 { &vop_mmap_desc, lfs_mmap }, /* mmap */
212 { &vop_fsync_desc, lfs_fsync }, /* fsync */
213 { &vop_seek_desc, ulfs_seek }, /* seek */
214 { &vop_remove_desc, lfs_remove }, /* remove */
215 { &vop_link_desc, lfs_link }, /* link */
216 { &vop_rename_desc, lfs_rename }, /* rename */
217 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */
218 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */
219 { &vop_symlink_desc, lfs_symlink }, /* symlink */
220 { &vop_readdir_desc, ulfs_readdir }, /* readdir */
221 { &vop_readlink_desc, ulfs_readlink }, /* readlink */
222 { &vop_abortop_desc, ulfs_abortop }, /* abortop */
223 { &vop_inactive_desc, lfs_inactive }, /* inactive */
224 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
225 { &vop_lock_desc, ulfs_lock }, /* lock */
226 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
227 { &vop_bmap_desc, ulfs_bmap }, /* bmap */
228 { &vop_strategy_desc, lfs_strategy }, /* strategy */
229 { &vop_print_desc, ulfs_print }, /* print */
230 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
231 { &vop_pathconf_desc, ulfs_pathconf }, /* pathconf */
232 { &vop_advlock_desc, ulfs_advlock }, /* advlock */
233 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
234 { &vop_getpages_desc, lfs_getpages }, /* getpages */
235 { &vop_putpages_desc, lfs_putpages }, /* putpages */
236 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
237 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
238 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
239 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
240 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
241 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
242 { NULL, NULL }
243 };
244 const struct vnodeopv_desc lfs_vnodeop_opv_desc =
245 { &lfs_vnodeop_p, lfs_vnodeop_entries };
246
247 int (**lfs_specop_p)(void *);
248 const struct vnodeopv_entry_desc lfs_specop_entries[] = {
249 { &vop_default_desc, vn_default_error },
250 { &vop_lookup_desc, spec_lookup }, /* lookup */
251 { &vop_create_desc, spec_create }, /* create */
252 { &vop_mknod_desc, spec_mknod }, /* mknod */
253 { &vop_open_desc, spec_open }, /* open */
254 { &vop_close_desc, lfsspec_close }, /* close */
255 { &vop_access_desc, ulfs_access }, /* access */
256 { &vop_getattr_desc, lfs_getattr }, /* getattr */
257 { &vop_setattr_desc, lfs_setattr }, /* setattr */
258 { &vop_read_desc, ulfsspec_read }, /* read */
259 { &vop_write_desc, ulfsspec_write }, /* write */
260 { &vop_fallocate_desc, spec_fallocate }, /* fallocate */
261 { &vop_fdiscard_desc, spec_fdiscard }, /* fdiscard */
262 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
263 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
264 { &vop_poll_desc, spec_poll }, /* poll */
265 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */
266 { &vop_revoke_desc, spec_revoke }, /* revoke */
267 { &vop_mmap_desc, spec_mmap }, /* mmap */
268 { &vop_fsync_desc, spec_fsync }, /* fsync */
269 { &vop_seek_desc, spec_seek }, /* seek */
270 { &vop_remove_desc, spec_remove }, /* remove */
271 { &vop_link_desc, spec_link }, /* link */
272 { &vop_rename_desc, spec_rename }, /* rename */
273 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
274 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
275 { &vop_symlink_desc, spec_symlink }, /* symlink */
276 { &vop_readdir_desc, spec_readdir }, /* readdir */
277 { &vop_readlink_desc, spec_readlink }, /* readlink */
278 { &vop_abortop_desc, spec_abortop }, /* abortop */
279 { &vop_inactive_desc, lfs_inactive }, /* inactive */
280 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
281 { &vop_lock_desc, ulfs_lock }, /* lock */
282 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
283 { &vop_bmap_desc, spec_bmap }, /* bmap */
284 { &vop_strategy_desc, spec_strategy }, /* strategy */
285 { &vop_print_desc, ulfs_print }, /* print */
286 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
287 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
288 { &vop_advlock_desc, spec_advlock }, /* advlock */
289 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */
290 { &vop_getpages_desc, spec_getpages }, /* getpages */
291 { &vop_putpages_desc, spec_putpages }, /* putpages */
292 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
293 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
294 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
295 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
296 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
297 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
298 { NULL, NULL }
299 };
300 const struct vnodeopv_desc lfs_specop_opv_desc =
301 { &lfs_specop_p, lfs_specop_entries };
302
303 int (**lfs_fifoop_p)(void *);
304 const struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
305 { &vop_default_desc, vn_default_error },
306 { &vop_lookup_desc, vn_fifo_bypass }, /* lookup */
307 { &vop_create_desc, vn_fifo_bypass }, /* create */
308 { &vop_mknod_desc, vn_fifo_bypass }, /* mknod */
309 { &vop_open_desc, vn_fifo_bypass }, /* open */
310 { &vop_close_desc, lfsfifo_close }, /* close */
311 { &vop_access_desc, ulfs_access }, /* access */
312 { &vop_getattr_desc, lfs_getattr }, /* getattr */
313 { &vop_setattr_desc, lfs_setattr }, /* setattr */
314 { &vop_read_desc, ulfsfifo_read }, /* read */
315 { &vop_write_desc, ulfsfifo_write }, /* write */
316 { &vop_fallocate_desc, vn_fifo_bypass }, /* fallocate */
317 { &vop_fdiscard_desc, vn_fifo_bypass }, /* fdiscard */
318 { &vop_ioctl_desc, vn_fifo_bypass }, /* ioctl */
319 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
320 { &vop_poll_desc, vn_fifo_bypass }, /* poll */
321 { &vop_kqfilter_desc, vn_fifo_bypass }, /* kqfilter */
322 { &vop_revoke_desc, vn_fifo_bypass }, /* revoke */
323 { &vop_mmap_desc, vn_fifo_bypass }, /* mmap */
324 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */
325 { &vop_seek_desc, vn_fifo_bypass }, /* seek */
326 { &vop_remove_desc, vn_fifo_bypass }, /* remove */
327 { &vop_link_desc, vn_fifo_bypass }, /* link */
328 { &vop_rename_desc, vn_fifo_bypass }, /* rename */
329 { &vop_mkdir_desc, vn_fifo_bypass }, /* mkdir */
330 { &vop_rmdir_desc, vn_fifo_bypass }, /* rmdir */
331 { &vop_symlink_desc, vn_fifo_bypass }, /* symlink */
332 { &vop_readdir_desc, vn_fifo_bypass }, /* readdir */
333 { &vop_readlink_desc, vn_fifo_bypass }, /* readlink */
334 { &vop_abortop_desc, vn_fifo_bypass }, /* abortop */
335 { &vop_inactive_desc, lfs_inactive }, /* inactive */
336 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
337 { &vop_lock_desc, ulfs_lock }, /* lock */
338 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
339 { &vop_bmap_desc, vn_fifo_bypass }, /* bmap */
340 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */
341 { &vop_print_desc, ulfs_print }, /* print */
342 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
343 { &vop_pathconf_desc, vn_fifo_bypass }, /* pathconf */
344 { &vop_advlock_desc, vn_fifo_bypass }, /* advlock */
345 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
346 { &vop_putpages_desc, vn_fifo_bypass }, /* putpages */
347 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
348 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
349 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
350 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
351 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
352 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
353 { NULL, NULL }
354 };
355 const struct vnodeopv_desc lfs_fifoop_opv_desc =
356 { &lfs_fifoop_p, lfs_fifoop_entries };
357
358 #define LFS_READWRITE
359 #include <ufs/lfs/ulfs_readwrite.c>
360 #undef LFS_READWRITE
361
362 /*
363 * Synch an open file.
364 */
365 /* ARGSUSED */
366 int
367 lfs_fsync(void *v)
368 {
369 struct vop_fsync_args /* {
370 struct vnode *a_vp;
371 kauth_cred_t a_cred;
372 int a_flags;
373 off_t offlo;
374 off_t offhi;
375 } */ *ap = v;
376 struct vnode *vp = ap->a_vp;
377 int error, wait;
378 struct inode *ip = VTOI(vp);
379 struct lfs *fs = ip->i_lfs;
380
381 /* If we're mounted read-only, don't try to sync. */
382 if (fs->lfs_ronly)
383 return 0;
384
385 /* If a removed vnode is being cleaned, no need to sync here. */
386 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0)
387 return 0;
388
389 /*
390 * Trickle sync simply adds this vnode to the pager list, as if
391 * the pagedaemon had requested a pageout.
392 */
393 if (ap->a_flags & FSYNC_LAZY) {
394 if (lfs_ignore_lazy_sync == 0) {
395 mutex_enter(&lfs_lock);
396 if (!(ip->i_flags & IN_PAGING)) {
397 ip->i_flags |= IN_PAGING;
398 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip,
399 i_lfs_pchain);
400 }
401 wakeup(&lfs_writer_daemon);
402 mutex_exit(&lfs_lock);
403 }
404 return 0;
405 }
406
407 /*
408 * If a vnode is bring cleaned, flush it out before we try to
409 * reuse it. This prevents the cleaner from writing files twice
410 * in the same partial segment, causing an accounting underflow.
411 */
412 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) {
413 lfs_vflush(vp);
414 }
415
416 wait = (ap->a_flags & FSYNC_WAIT);
417 do {
418 mutex_enter(vp->v_interlock);
419 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
420 round_page(ap->a_offhi),
421 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0));
422 if (error == EAGAIN) {
423 mutex_enter(&lfs_lock);
424 mtsleep(&fs->lfs_availsleep, PCATCH | PUSER,
425 "lfs_fsync", hz / 100 + 1, &lfs_lock);
426 mutex_exit(&lfs_lock);
427 }
428 } while (error == EAGAIN);
429 if (error)
430 return error;
431
432 if ((ap->a_flags & FSYNC_DATAONLY) == 0)
433 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0);
434
435 if (error == 0 && ap->a_flags & FSYNC_CACHE) {
436 int l = 0;
437 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE,
438 curlwp->l_cred);
439 }
440 if (wait && !VPISEMPTY(vp))
441 LFS_SET_UINO(ip, IN_MODIFIED);
442
443 return error;
444 }
445
446 /*
447 * Take IN_ADIROP off, then call ulfs_inactive.
448 */
449 int
450 lfs_inactive(void *v)
451 {
452 struct vop_inactive_args /* {
453 struct vnode *a_vp;
454 } */ *ap = v;
455
456 lfs_unmark_vnode(ap->a_vp);
457
458 /*
459 * The Ifile is only ever inactivated on unmount.
460 * Streamline this process by not giving it more dirty blocks.
461 */
462 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) {
463 mutex_enter(&lfs_lock);
464 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD);
465 mutex_exit(&lfs_lock);
466 VOP_UNLOCK(ap->a_vp);
467 return 0;
468 }
469
470 #ifdef DEBUG
471 /*
472 * This might happen on unmount.
473 * XXX If it happens at any other time, it should be a panic.
474 */
475 if (ap->a_vp->v_uflag & VU_DIROP) {
476 struct inode *ip = VTOI(ap->a_vp);
477 printf("lfs_inactive: inactivating VU_DIROP? ino = %d\n", (int)ip->i_number);
478 }
479 #endif /* DIAGNOSTIC */
480
481 return ulfs_inactive(v);
482 }
483
484 int
485 lfs_set_dirop(struct vnode *dvp, struct vnode *vp)
486 {
487 struct lfs *fs;
488 int error;
489
490 KASSERT(VOP_ISLOCKED(dvp));
491 KASSERT(vp == NULL || VOP_ISLOCKED(vp));
492
493 fs = VTOI(dvp)->i_lfs;
494
495 ASSERT_NO_SEGLOCK(fs);
496 /*
497 * LFS_NRESERVE calculates direct and indirect blocks as well
498 * as an inode block; an overestimate in most cases.
499 */
500 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0)
501 return (error);
502
503 restart:
504 mutex_enter(&lfs_lock);
505 if (fs->lfs_dirops == 0) {
506 mutex_exit(&lfs_lock);
507 lfs_check(dvp, LFS_UNUSED_LBN, 0);
508 mutex_enter(&lfs_lock);
509 }
510 while (fs->lfs_writer) {
511 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH,
512 "lfs_sdirop", 0, &lfs_lock);
513 if (error == EINTR) {
514 mutex_exit(&lfs_lock);
515 goto unreserve;
516 }
517 }
518 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) {
519 wakeup(&lfs_writer_daemon);
520 mutex_exit(&lfs_lock);
521 preempt();
522 goto restart;
523 }
524
525 if (lfs_dirvcount > LFS_MAX_DIROP) {
526 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, "
527 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount));
528 if ((error = mtsleep(&lfs_dirvcount,
529 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0,
530 &lfs_lock)) != 0) {
531 goto unreserve;
532 }
533 goto restart;
534 }
535
536 ++fs->lfs_dirops;
537 /* fs->lfs_doifile = 1; */ /* XXX why? --ks */
538 mutex_exit(&lfs_lock);
539
540 /* Hold a reference so SET_ENDOP will be happy */
541 vref(dvp);
542 if (vp) {
543 vref(vp);
544 MARK_VNODE(vp);
545 }
546
547 MARK_VNODE(dvp);
548 return 0;
549
550 unreserve:
551 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs));
552 return error;
553 }
554
555 /*
556 * Opposite of lfs_set_dirop... mostly. For now at least must call
557 * UNMARK_VNODE(dvp) explicitly first. (XXX: clean that up)
558 */
559 void
560 lfs_unset_dirop(struct lfs *fs, struct vnode *dvp, const char *str)
561 {
562 mutex_enter(&lfs_lock);
563 --fs->lfs_dirops;
564 if (!fs->lfs_dirops) {
565 if (fs->lfs_nadirop) {
566 panic("lfs_unset_dirop: %s: no dirops but "
567 " nadirop=%d", str,
568 fs->lfs_nadirop);
569 }
570 wakeup(&fs->lfs_writer);
571 mutex_exit(&lfs_lock);
572 lfs_check(dvp, LFS_UNUSED_LBN, 0);
573 } else {
574 mutex_exit(&lfs_lock);
575 }
576 lfs_reserve(fs, dvp, NULL, -LFS_NRESERVE(fs));
577 }
578
579 void
580 lfs_mark_vnode(struct vnode *vp)
581 {
582 struct inode *ip = VTOI(vp);
583 struct lfs *fs = ip->i_lfs;
584
585 mutex_enter(&lfs_lock);
586 if (!(ip->i_flag & IN_ADIROP)) {
587 if (!(vp->v_uflag & VU_DIROP)) {
588 mutex_exit(&lfs_lock);
589 vref(vp);
590 mutex_enter(&lfs_lock);
591 ++lfs_dirvcount;
592 ++fs->lfs_dirvcount;
593 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain);
594 vp->v_uflag |= VU_DIROP;
595 }
596 ++fs->lfs_nadirop;
597 ip->i_flag &= ~IN_CDIROP;
598 ip->i_flag |= IN_ADIROP;
599 } else
600 KASSERT(vp->v_uflag & VU_DIROP);
601 mutex_exit(&lfs_lock);
602 }
603
604 void
605 lfs_unmark_vnode(struct vnode *vp)
606 {
607 struct inode *ip = VTOI(vp);
608
609 mutex_enter(&lfs_lock);
610 if (ip && (ip->i_flag & IN_ADIROP)) {
611 KASSERT(vp->v_uflag & VU_DIROP);
612 --ip->i_lfs->lfs_nadirop;
613 ip->i_flag &= ~IN_ADIROP;
614 }
615 mutex_exit(&lfs_lock);
616 }
617
618 int
619 lfs_symlink(void *v)
620 {
621 struct vop_symlink_v3_args /* {
622 struct vnode *a_dvp;
623 struct vnode **a_vpp;
624 struct componentname *a_cnp;
625 struct vattr *a_vap;
626 char *a_target;
627 } */ *ap = v;
628 struct lfs *fs;
629 struct vnode *dvp, **vpp;
630 struct inode *ip;
631 struct ulfs_lookup_results *ulr;
632 ssize_t len; /* XXX should be size_t */
633 int error;
634
635 dvp = ap->a_dvp;
636 vpp = ap->a_vpp;
637
638 KASSERT(vpp != NULL);
639 KASSERT(*vpp == NULL);
640 KASSERT(ap->a_vap->va_type == VLNK);
641
642 /* XXX should handle this material another way */
643 ulr = &VTOI(ap->a_dvp)->i_crap;
644 ULFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
645
646 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
647 ASSERT_NO_SEGLOCK(fs);
648 if (fs->lfs_ronly) {
649 return EROFS;
650 }
651
652 error = lfs_set_dirop(dvp, NULL);
653 if (error)
654 return error;
655
656 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
657 error = ulfs_makeinode(ap->a_vap, dvp, ulr, vpp, ap->a_cnp);
658 if (error) {
659 goto out;
660 }
661
662 VN_KNOTE(ap->a_dvp, NOTE_WRITE);
663 ip = VTOI(*vpp);
664
665 len = strlen(ap->a_target);
666 if (len < ip->i_lfs->um_maxsymlinklen) {
667 memcpy((char *)SHORTLINK(ip), ap->a_target, len);
668 ip->i_size = len;
669 DIP_ASSIGN(ip, size, len);
670 uvm_vnp_setsize(*vpp, ip->i_size);
671 ip->i_flag |= IN_CHANGE | IN_UPDATE;
672 if ((*vpp)->v_mount->mnt_flag & MNT_RELATIME)
673 ip->i_flag |= IN_ACCESS;
674 } else {
675 error = ulfs_bufio(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0,
676 IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL,
677 NULL);
678 }
679
680 VOP_UNLOCK(*vpp);
681 if (error)
682 vrele(*vpp);
683
684 out:
685 fstrans_done(dvp->v_mount);
686
687 UNMARK_VNODE(dvp);
688 /* XXX: is it even possible for the symlink to get MARK'd? */
689 UNMARK_VNODE(*vpp);
690 if (!((*vpp)->v_uflag & VU_DIROP)) {
691 KASSERT(error != 0);
692 *vpp = NULL;
693 }
694 else {
695 KASSERT(error == 0);
696 }
697 lfs_unset_dirop(fs, dvp, "symlink");
698
699 vrele(dvp);
700 return (error);
701 }
702
703 int
704 lfs_mknod(void *v)
705 {
706 struct vop_mknod_v3_args /* {
707 struct vnode *a_dvp;
708 struct vnode **a_vpp;
709 struct componentname *a_cnp;
710 struct vattr *a_vap;
711 } */ *ap = v;
712 struct lfs *fs;
713 struct vnode *dvp, **vpp;
714 struct vattr *vap;
715 struct inode *ip;
716 int error;
717 ino_t ino;
718 struct ulfs_lookup_results *ulr;
719
720 dvp = ap->a_dvp;
721 vpp = ap->a_vpp;
722 vap = ap->a_vap;
723
724 KASSERT(vpp != NULL);
725 KASSERT(*vpp == NULL);
726
727 /* XXX should handle this material another way */
728 ulr = &VTOI(dvp)->i_crap;
729 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
730
731 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
732 ASSERT_NO_SEGLOCK(fs);
733 if (fs->lfs_ronly) {
734 return EROFS;
735 }
736
737 error = lfs_set_dirop(dvp, NULL);
738 if (error)
739 return error;
740
741 fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
742 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
743
744 /* Either way we're done with the dirop at this point */
745 UNMARK_VNODE(dvp);
746 UNMARK_VNODE(*vpp);
747 if (!((*vpp)->v_uflag & VU_DIROP)) {
748 KASSERT(error != 0);
749 *vpp = NULL;
750 }
751 else {
752 KASSERT(error == 0);
753 }
754 lfs_unset_dirop(fs, dvp, "mknod");
755 /*
756 * XXX this is where this used to be (though inside some evil
757 * macros) but it clearly should be moved further down.
758 * - dholland 20140515
759 */
760 vrele(dvp);
761
762 if (error) {
763 fstrans_done(ap->a_dvp->v_mount);
764 *vpp = NULL;
765 return (error);
766 }
767
768 VN_KNOTE(dvp, NOTE_WRITE);
769 ip = VTOI(*vpp);
770 ino = ip->i_number;
771 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
772
773 /*
774 * Call fsync to write the vnode so that we don't have to deal with
775 * flushing it when it's marked VU_DIROP or reclaiming.
776 *
777 * XXX KS - If we can't flush we also can't call vgone(), so must
778 * return. But, that leaves this vnode in limbo, also not good.
779 * Can this ever happen (barring hardware failure)?
780 */
781 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) {
782 panic("lfs_mknod: couldn't fsync (ino %llu)",
783 (unsigned long long)ino);
784 /* return (error); */
785 }
786
787 fstrans_done(ap->a_dvp->v_mount);
788 KASSERT(error == 0);
789 VOP_UNLOCK(*vpp);
790 return (0);
791 }
792
793 /*
794 * Create a regular file
795 */
796 int
797 lfs_create(void *v)
798 {
799 struct vop_create_v3_args /* {
800 struct vnode *a_dvp;
801 struct vnode **a_vpp;
802 struct componentname *a_cnp;
803 struct vattr *a_vap;
804 } */ *ap = v;
805 struct lfs *fs;
806 struct vnode *dvp, **vpp;
807 struct vattr *vap;
808 struct ulfs_lookup_results *ulr;
809 int error;
810
811 dvp = ap->a_dvp;
812 vpp = ap->a_vpp;
813 vap = ap->a_vap;
814
815 KASSERT(vpp != NULL);
816 KASSERT(*vpp == NULL);
817
818 /* XXX should handle this material another way */
819 ulr = &VTOI(dvp)->i_crap;
820 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
821
822 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
823 ASSERT_NO_SEGLOCK(fs);
824 if (fs->lfs_ronly) {
825 return EROFS;
826 }
827
828 error = lfs_set_dirop(dvp, NULL);
829 if (error)
830 return error;
831
832 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
833 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
834 if (error) {
835 fstrans_done(dvp->v_mount);
836 goto out;
837 }
838 fstrans_done(dvp->v_mount);
839 VN_KNOTE(dvp, NOTE_WRITE);
840 VOP_UNLOCK(*vpp);
841
842 out:
843
844 UNMARK_VNODE(dvp);
845 UNMARK_VNODE(*vpp);
846 if (!((*vpp)->v_uflag & VU_DIROP)) {
847 KASSERT(error != 0);
848 *vpp = NULL;
849 }
850 else {
851 KASSERT(error == 0);
852 }
853 lfs_unset_dirop(fs, dvp, "create");
854
855 vrele(dvp);
856 return (error);
857 }
858
859 int
860 lfs_mkdir(void *v)
861 {
862 struct vop_mkdir_v3_args /* {
863 struct vnode *a_dvp;
864 struct vnode **a_vpp;
865 struct componentname *a_cnp;
866 struct vattr *a_vap;
867 } */ *ap = v;
868 struct lfs *fs;
869 struct vnode *dvp, *tvp, **vpp;
870 struct inode *dp, *ip;
871 struct componentname *cnp;
872 struct vattr *vap;
873 struct ulfs_lookup_results *ulr;
874 struct buf *bp;
875 struct lfs_dirtemplate dirtemplate;
876 struct lfs_direct *newdir;
877 int dirblksiz;
878 int error;
879
880 dvp = ap->a_dvp;
881 tvp = NULL;
882 vpp = ap->a_vpp;
883 cnp = ap->a_cnp;
884 vap = ap->a_vap;
885
886 dp = VTOI(dvp);
887 ip = NULL;
888
889 KASSERT(vap->va_type == VDIR);
890 KASSERT(vpp != NULL);
891 KASSERT(*vpp == NULL);
892
893 /* XXX should handle this material another way */
894 ulr = &dp->i_crap;
895 ULFS_CHECK_CRAPCOUNTER(dp);
896
897 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
898 ASSERT_NO_SEGLOCK(fs);
899 if (fs->lfs_ronly) {
900 return EROFS;
901 }
902 dirblksiz = fs->um_dirblksiz;
903
904 error = lfs_set_dirop(dvp, NULL);
905 if (error)
906 return error;
907
908 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
909
910 if ((nlink_t)dp->i_nlink >= LINK_MAX) {
911 error = EMLINK;
912 goto out;
913 }
914
915 /*
916 * Must simulate part of ulfs_makeinode here to acquire the inode,
917 * but not have it entered in the parent directory. The entry is
918 * made later after writing "." and ".." entries.
919 */
920 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, ap->a_vpp);
921 if (error)
922 goto out;
923
924 error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE);
925 if (error) {
926 vrele(*ap->a_vpp);
927 *ap->a_vpp = NULL;
928 goto out;
929 }
930
931 tvp = *ap->a_vpp;
932 lfs_mark_vnode(tvp);
933 ip = VTOI(tvp);
934 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
935 ip->i_nlink = 2;
936 DIP_ASSIGN(ip, nlink, 2);
937 if (cnp->cn_flags & ISWHITEOUT) {
938 ip->i_flags |= UF_OPAQUE;
939 DIP_ASSIGN(ip, flags, ip->i_flags);
940 }
941
942 /*
943 * Bump link count in parent directory to reflect work done below.
944 */
945 dp->i_nlink++;
946 DIP_ASSIGN(dp, nlink, dp->i_nlink);
947 dp->i_flag |= IN_CHANGE;
948 if ((error = lfs_update(dvp, NULL, NULL, UPDATE_DIROP)) != 0)
949 goto bad;
950
951 /*
952 * Initialize directory with "." and ".." from static template.
953 */
954 dirtemplate = mastertemplate;
955 dirtemplate.dotdot_reclen = dirblksiz - dirtemplate.dot_reclen;
956 dirtemplate.dot_ino = ulfs_rw32(ip->i_number, ULFS_MPNEEDSWAP(fs));
957 dirtemplate.dotdot_ino = ulfs_rw32(dp->i_number, ULFS_MPNEEDSWAP(fs));
958 dirtemplate.dot_reclen = ulfs_rw16(dirtemplate.dot_reclen,
959 ULFS_MPNEEDSWAP(fs));
960 dirtemplate.dotdot_reclen = ulfs_rw16(dirtemplate.dotdot_reclen,
961 ULFS_MPNEEDSWAP(fs));
962 if (fs->um_maxsymlinklen <= 0) {
963 #if BYTE_ORDER == LITTLE_ENDIAN
964 if (ULFS_MPNEEDSWAP(fs) == 0)
965 #else
966 if (ULFS_MPNEEDSWAP(fs) != 0)
967 #endif
968 {
969 dirtemplate.dot_type = dirtemplate.dot_namlen;
970 dirtemplate.dotdot_type = dirtemplate.dotdot_namlen;
971 dirtemplate.dot_namlen = dirtemplate.dotdot_namlen = 0;
972 } else
973 dirtemplate.dot_type = dirtemplate.dotdot_type = 0;
974 }
975 if ((error = lfs_balloc(tvp, (off_t)0, dirblksiz, cnp->cn_cred,
976 B_CLRBUF, &bp)) != 0)
977 goto bad;
978 ip->i_size = dirblksiz;
979 DIP_ASSIGN(ip, size, dirblksiz);
980 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
981 uvm_vnp_setsize(tvp, ip->i_size);
982 memcpy((void *)bp->b_data, (void *)&dirtemplate, sizeof dirtemplate);
983
984 /*
985 * Directory set up; now install its entry in the parent directory.
986 */
987 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
988 goto bad;
989 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) {
990 goto bad;
991 }
992 newdir = pool_cache_get(ulfs_direct_cache, PR_WAITOK);
993 ulfs_makedirentry(ip, cnp, newdir);
994 error = ulfs_direnter(dvp, ulr, tvp, newdir, cnp, bp);
995 pool_cache_put(ulfs_direct_cache, newdir);
996 bad:
997 if (error == 0) {
998 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
999 VOP_UNLOCK(tvp);
1000 } else {
1001 dp->i_nlink--;
1002 DIP_ASSIGN(dp, nlink, dp->i_nlink);
1003 dp->i_flag |= IN_CHANGE;
1004 /*
1005 * No need to do an explicit lfs_truncate here, vrele will
1006 * do this for us because we set the link count to 0.
1007 */
1008 ip->i_nlink = 0;
1009 DIP_ASSIGN(ip, nlink, 0);
1010 ip->i_flag |= IN_CHANGE;
1011 /* If IN_ADIROP, account for it */
1012 lfs_unmark_vnode(tvp);
1013 vput(tvp);
1014 }
1015
1016 out:
1017 fstrans_done(dvp->v_mount);
1018
1019 UNMARK_VNODE(dvp);
1020 UNMARK_VNODE(*vpp);
1021 if (!((*vpp)->v_uflag & VU_DIROP)) {
1022 KASSERT(error != 0);
1023 *vpp = NULL;
1024 }
1025 else {
1026 KASSERT(error == 0);
1027 }
1028 lfs_unset_dirop(fs, dvp, "mkdir");
1029
1030 vrele(dvp);
1031 return (error);
1032 }
1033
1034 int
1035 lfs_remove(void *v)
1036 {
1037 struct vop_remove_args /* {
1038 struct vnode *a_dvp;
1039 struct vnode *a_vp;
1040 struct componentname *a_cnp;
1041 } */ *ap = v;
1042 struct vnode *dvp, *vp;
1043 struct inode *ip;
1044 int error;
1045
1046 dvp = ap->a_dvp;
1047 vp = ap->a_vp;
1048 ip = VTOI(vp);
1049 if ((error = lfs_set_dirop(dvp, vp)) != 0) {
1050 if (dvp == vp)
1051 vrele(vp);
1052 else
1053 vput(vp);
1054 vput(dvp);
1055 return error;
1056 }
1057 error = ulfs_remove(ap);
1058 if (ip->i_nlink == 0)
1059 lfs_orphan(ip->i_lfs, ip->i_number);
1060
1061 UNMARK_VNODE(dvp);
1062 if (ap->a_vp) {
1063 UNMARK_VNODE(ap->a_vp);
1064 }
1065 lfs_unset_dirop(ip->i_lfs, dvp, "remove");
1066 vrele(dvp);
1067 if (ap->a_vp) {
1068 vrele(ap->a_vp);
1069 }
1070
1071 return (error);
1072 }
1073
1074 int
1075 lfs_rmdir(void *v)
1076 {
1077 struct vop_rmdir_args /* {
1078 struct vnodeop_desc *a_desc;
1079 struct vnode *a_dvp;
1080 struct vnode *a_vp;
1081 struct componentname *a_cnp;
1082 } */ *ap = v;
1083 struct vnode *vp;
1084 struct inode *ip;
1085 int error;
1086
1087 vp = ap->a_vp;
1088 ip = VTOI(vp);
1089 if ((error = lfs_set_dirop(ap->a_dvp, ap->a_vp)) != 0) {
1090 if (ap->a_dvp == vp)
1091 vrele(ap->a_dvp);
1092 else
1093 vput(ap->a_dvp);
1094 vput(vp);
1095 return error;
1096 }
1097 error = ulfs_rmdir(ap);
1098 if (ip->i_nlink == 0)
1099 lfs_orphan(ip->i_lfs, ip->i_number);
1100
1101 UNMARK_VNODE(ap->a_dvp);
1102 if (ap->a_vp) {
1103 UNMARK_VNODE(ap->a_vp);
1104 }
1105 lfs_unset_dirop(ip->i_lfs, ap->a_dvp, "rmdir");
1106 vrele(ap->a_dvp);
1107 if (ap->a_vp) {
1108 vrele(ap->a_vp);
1109 }
1110
1111 return (error);
1112 }
1113
1114 int
1115 lfs_link(void *v)
1116 {
1117 struct vop_link_v2_args /* {
1118 struct vnode *a_dvp;
1119 struct vnode *a_vp;
1120 struct componentname *a_cnp;
1121 } */ *ap = v;
1122 struct lfs *fs;
1123 struct vnode *dvp;
1124 int error;
1125
1126 dvp = ap->a_dvp;
1127
1128 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
1129 ASSERT_NO_SEGLOCK(fs);
1130 if (fs->lfs_ronly) {
1131 return EROFS;
1132 }
1133
1134 error = lfs_set_dirop(dvp, NULL);
1135 if (error) {
1136 return error;
1137 }
1138
1139 error = ulfs_link(ap);
1140
1141 UNMARK_VNODE(dvp);
1142 lfs_unset_dirop(fs, dvp, "link");
1143 vrele(dvp);
1144
1145 return (error);
1146 }
1147
1148 /* XXX hack to avoid calling ITIMES in getattr */
1149 int
1150 lfs_getattr(void *v)
1151 {
1152 struct vop_getattr_args /* {
1153 struct vnode *a_vp;
1154 struct vattr *a_vap;
1155 kauth_cred_t a_cred;
1156 } */ *ap = v;
1157 struct vnode *vp = ap->a_vp;
1158 struct inode *ip = VTOI(vp);
1159 struct vattr *vap = ap->a_vap;
1160 struct lfs *fs = ip->i_lfs;
1161
1162 fstrans_start(vp->v_mount, FSTRANS_SHARED);
1163 /*
1164 * Copy from inode table
1165 */
1166 vap->va_fsid = ip->i_dev;
1167 vap->va_fileid = ip->i_number;
1168 vap->va_mode = ip->i_mode & ~LFS_IFMT;
1169 vap->va_nlink = ip->i_nlink;
1170 vap->va_uid = ip->i_uid;
1171 vap->va_gid = ip->i_gid;
1172 vap->va_rdev = (dev_t)ip->i_ffs1_rdev;
1173 vap->va_size = vp->v_size;
1174 vap->va_atime.tv_sec = ip->i_ffs1_atime;
1175 vap->va_atime.tv_nsec = ip->i_ffs1_atimensec;
1176 vap->va_mtime.tv_sec = ip->i_ffs1_mtime;
1177 vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec;
1178 vap->va_ctime.tv_sec = ip->i_ffs1_ctime;
1179 vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec;
1180 vap->va_flags = ip->i_flags;
1181 vap->va_gen = ip->i_gen;
1182 /* this doesn't belong here */
1183 if (vp->v_type == VBLK)
1184 vap->va_blocksize = BLKDEV_IOSIZE;
1185 else if (vp->v_type == VCHR)
1186 vap->va_blocksize = MAXBSIZE;
1187 else
1188 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
1189 vap->va_bytes = lfs_fsbtob(fs, ip->i_lfs_effnblks);
1190 vap->va_type = vp->v_type;
1191 vap->va_filerev = ip->i_modrev;
1192 fstrans_done(vp->v_mount);
1193 return (0);
1194 }
1195
1196 /*
1197 * Check to make sure the inode blocks won't choke the buffer
1198 * cache, then call ulfs_setattr as usual.
1199 */
1200 int
1201 lfs_setattr(void *v)
1202 {
1203 struct vop_setattr_args /* {
1204 struct vnode *a_vp;
1205 struct vattr *a_vap;
1206 kauth_cred_t a_cred;
1207 } */ *ap = v;
1208 struct vnode *vp = ap->a_vp;
1209
1210 lfs_check(vp, LFS_UNUSED_LBN, 0);
1211 return ulfs_setattr(v);
1212 }
1213
1214 /*
1215 * Release the block we hold on lfs_newseg wrapping. Called on file close,
1216 * or explicitly from LFCNWRAPGO. Called with the interlock held.
1217 */
1218 static int
1219 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor)
1220 {
1221 if (fs->lfs_stoplwp != curlwp)
1222 return EBUSY;
1223
1224 fs->lfs_stoplwp = NULL;
1225 cv_signal(&fs->lfs_stopcv);
1226
1227 KASSERT(fs->lfs_nowrap > 0);
1228 if (fs->lfs_nowrap <= 0) {
1229 return 0;
1230 }
1231
1232 if (--fs->lfs_nowrap == 0) {
1233 log(LOG_NOTICE, "%s: re-enabled log wrap\n",
1234 lfs_sb_getfsmnt(fs));
1235 wakeup(&fs->lfs_wrappass);
1236 lfs_wakeup_cleaner(fs);
1237 }
1238 if (waitfor) {
1239 mtsleep(&fs->lfs_nextsegsleep, PCATCH | PUSER, "segment",
1240 0, &lfs_lock);
1241 }
1242
1243 return 0;
1244 }
1245
1246 /*
1247 * Close called.
1248 *
1249 * Update the times on the inode.
1250 */
1251 /* ARGSUSED */
1252 int
1253 lfs_close(void *v)
1254 {
1255 struct vop_close_args /* {
1256 struct vnode *a_vp;
1257 int a_fflag;
1258 kauth_cred_t a_cred;
1259 } */ *ap = v;
1260 struct vnode *vp = ap->a_vp;
1261 struct inode *ip = VTOI(vp);
1262 struct lfs *fs = ip->i_lfs;
1263
1264 if ((ip->i_number == ULFS_ROOTINO || ip->i_number == LFS_IFILE_INUM) &&
1265 fs->lfs_stoplwp == curlwp) {
1266 mutex_enter(&lfs_lock);
1267 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n");
1268 lfs_wrapgo(fs, ip, 0);
1269 mutex_exit(&lfs_lock);
1270 }
1271
1272 if (vp == ip->i_lfs->lfs_ivnode &&
1273 vp->v_mount->mnt_iflag & IMNT_UNMOUNT)
1274 return 0;
1275
1276 fstrans_start(vp->v_mount, FSTRANS_SHARED);
1277 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) {
1278 LFS_ITIMES(ip, NULL, NULL, NULL);
1279 }
1280 fstrans_done(vp->v_mount);
1281 return (0);
1282 }
1283
1284 /*
1285 * Close wrapper for special devices.
1286 *
1287 * Update the times on the inode then do device close.
1288 */
1289 int
1290 lfsspec_close(void *v)
1291 {
1292 struct vop_close_args /* {
1293 struct vnode *a_vp;
1294 int a_fflag;
1295 kauth_cred_t a_cred;
1296 } */ *ap = v;
1297 struct vnode *vp;
1298 struct inode *ip;
1299
1300 vp = ap->a_vp;
1301 ip = VTOI(vp);
1302 if (vp->v_usecount > 1) {
1303 LFS_ITIMES(ip, NULL, NULL, NULL);
1304 }
1305 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
1306 }
1307
1308 /*
1309 * Close wrapper for fifo's.
1310 *
1311 * Update the times on the inode then do device close.
1312 */
1313 int
1314 lfsfifo_close(void *v)
1315 {
1316 struct vop_close_args /* {
1317 struct vnode *a_vp;
1318 int a_fflag;
1319 kauth_cred_ a_cred;
1320 } */ *ap = v;
1321 struct vnode *vp;
1322 struct inode *ip;
1323
1324 vp = ap->a_vp;
1325 ip = VTOI(vp);
1326 if (ap->a_vp->v_usecount > 1) {
1327 LFS_ITIMES(ip, NULL, NULL, NULL);
1328 }
1329 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
1330 }
1331
1332 /*
1333 * Reclaim an inode so that it can be used for other purposes.
1334 */
1335
1336 int
1337 lfs_reclaim(void *v)
1338 {
1339 struct vop_reclaim_args /* {
1340 struct vnode *a_vp;
1341 } */ *ap = v;
1342 struct vnode *vp = ap->a_vp;
1343 struct inode *ip = VTOI(vp);
1344 struct lfs *fs = ip->i_lfs;
1345 int error;
1346
1347 /*
1348 * The inode must be freed and updated before being removed
1349 * from its hash chain. Other threads trying to gain a hold
1350 * or lock on the inode will be stalled.
1351 */
1352 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1353 lfs_vfree(vp, ip->i_number, ip->i_omode);
1354
1355 mutex_enter(&lfs_lock);
1356 LFS_CLR_UINO(ip, IN_ALLMOD);
1357 mutex_exit(&lfs_lock);
1358 if ((error = ulfs_reclaim(vp)))
1359 return (error);
1360
1361 /*
1362 * Take us off the paging and/or dirop queues if we were on them.
1363 * We shouldn't be on them.
1364 */
1365 mutex_enter(&lfs_lock);
1366 if (ip->i_flags & IN_PAGING) {
1367 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n",
1368 lfs_sb_getfsmnt(fs));
1369 ip->i_flags &= ~IN_PAGING;
1370 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
1371 }
1372 if (vp->v_uflag & VU_DIROP) {
1373 panic("reclaimed vnode is VU_DIROP");
1374 vp->v_uflag &= ~VU_DIROP;
1375 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
1376 }
1377 mutex_exit(&lfs_lock);
1378
1379 pool_put(&lfs_dinode_pool, ip->i_din.ffs1_din);
1380 lfs_deregister_all(vp);
1381 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs);
1382 ip->inode_ext.lfs = NULL;
1383 genfs_node_destroy(vp);
1384 pool_put(&lfs_inode_pool, vp->v_data);
1385 vp->v_data = NULL;
1386 return (0);
1387 }
1388
1389 /*
1390 * Read a block from a storage device.
1391 *
1392 * Calculate the logical to physical mapping if not done already,
1393 * then call the device strategy routine.
1394 *
1395 * In order to avoid reading blocks that are in the process of being
1396 * written by the cleaner---and hence are not mutexed by the normal
1397 * buffer cache / page cache mechanisms---check for collisions before
1398 * reading.
1399 *
1400 * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before*
1401 * the active cleaner test.
1402 *
1403 * XXX This code assumes that lfs_markv makes synchronous checkpoints.
1404 */
1405 int
1406 lfs_strategy(void *v)
1407 {
1408 struct vop_strategy_args /* {
1409 struct vnode *a_vp;
1410 struct buf *a_bp;
1411 } */ *ap = v;
1412 struct buf *bp;
1413 struct lfs *fs;
1414 struct vnode *vp;
1415 struct inode *ip;
1416 daddr_t tbn;
1417 #define MAXLOOP 25
1418 int i, sn, error, slept, loopcount;
1419
1420 bp = ap->a_bp;
1421 vp = ap->a_vp;
1422 ip = VTOI(vp);
1423 fs = ip->i_lfs;
1424
1425 /* lfs uses its strategy routine only for read */
1426 KASSERT(bp->b_flags & B_READ);
1427
1428 if (vp->v_type == VBLK || vp->v_type == VCHR)
1429 panic("lfs_strategy: spec");
1430 KASSERT(bp->b_bcount != 0);
1431 if (bp->b_blkno == bp->b_lblkno) {
1432 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
1433 NULL);
1434 if (error) {
1435 bp->b_error = error;
1436 bp->b_resid = bp->b_bcount;
1437 biodone(bp);
1438 return (error);
1439 }
1440 if ((long)bp->b_blkno == -1) /* no valid data */
1441 clrbuf(bp);
1442 }
1443 if ((long)bp->b_blkno < 0) { /* block is not on disk */
1444 bp->b_resid = bp->b_bcount;
1445 biodone(bp);
1446 return (0);
1447 }
1448
1449 slept = 1;
1450 loopcount = 0;
1451 mutex_enter(&lfs_lock);
1452 while (slept && fs->lfs_seglock) {
1453 mutex_exit(&lfs_lock);
1454 /*
1455 * Look through list of intervals.
1456 * There will only be intervals to look through
1457 * if the cleaner holds the seglock.
1458 * Since the cleaner is synchronous, we can trust
1459 * the list of intervals to be current.
1460 */
1461 tbn = LFS_DBTOFSB(fs, bp->b_blkno);
1462 sn = lfs_dtosn(fs, tbn);
1463 slept = 0;
1464 for (i = 0; i < fs->lfs_cleanind; i++) {
1465 if (sn == lfs_dtosn(fs, fs->lfs_cleanint[i]) &&
1466 tbn >= fs->lfs_cleanint[i]) {
1467 DLOG((DLOG_CLEAN,
1468 "lfs_strategy: ino %d lbn %" PRId64
1469 " ind %d sn %d fsb %" PRIx32
1470 " given sn %d fsb %" PRIx64 "\n",
1471 ip->i_number, bp->b_lblkno, i,
1472 lfs_dtosn(fs, fs->lfs_cleanint[i]),
1473 fs->lfs_cleanint[i], sn, tbn));
1474 DLOG((DLOG_CLEAN,
1475 "lfs_strategy: sleeping on ino %d lbn %"
1476 PRId64 "\n", ip->i_number, bp->b_lblkno));
1477 mutex_enter(&lfs_lock);
1478 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) {
1479 /*
1480 * Cleaner can't wait for itself.
1481 * Instead, wait for the blocks
1482 * to be written to disk.
1483 * XXX we need pribio in the test
1484 * XXX here.
1485 */
1486 mtsleep(&fs->lfs_iocount,
1487 (PRIBIO + 1) | PNORELOCK,
1488 "clean2", hz/10 + 1,
1489 &lfs_lock);
1490 slept = 1;
1491 ++loopcount;
1492 break;
1493 } else if (fs->lfs_seglock) {
1494 mtsleep(&fs->lfs_seglock,
1495 (PRIBIO + 1) | PNORELOCK,
1496 "clean1", 0,
1497 &lfs_lock);
1498 slept = 1;
1499 break;
1500 }
1501 mutex_exit(&lfs_lock);
1502 }
1503 }
1504 mutex_enter(&lfs_lock);
1505 if (loopcount > MAXLOOP) {
1506 printf("lfs_strategy: breaking out of clean2 loop\n");
1507 break;
1508 }
1509 }
1510 mutex_exit(&lfs_lock);
1511
1512 vp = ip->i_devvp;
1513 return VOP_STRATEGY(vp, bp);
1514 }
1515
1516 /*
1517 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops.
1518 * Technically this is a checkpoint (the on-disk state is valid)
1519 * even though we are leaving out all the file data.
1520 */
1521 int
1522 lfs_flush_dirops(struct lfs *fs)
1523 {
1524 struct inode *ip, *nip;
1525 struct vnode *vp;
1526 extern int lfs_dostats;
1527 struct segment *sp;
1528 int flags = 0;
1529 int error = 0;
1530
1531 ASSERT_MAYBE_SEGLOCK(fs);
1532 KASSERT(fs->lfs_nadirop == 0);
1533
1534 if (fs->lfs_ronly)
1535 return EROFS;
1536
1537 mutex_enter(&lfs_lock);
1538 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) {
1539 mutex_exit(&lfs_lock);
1540 return 0;
1541 } else
1542 mutex_exit(&lfs_lock);
1543
1544 if (lfs_dostats)
1545 ++lfs_stats.flush_invoked;
1546
1547 lfs_imtime(fs);
1548 lfs_seglock(fs, flags);
1549 sp = fs->lfs_sp;
1550
1551 /*
1552 * lfs_writevnodes, optimized to get dirops out of the way.
1553 * Only write dirops, and don't flush files' pages, only
1554 * blocks from the directories.
1555 *
1556 * We don't need to vref these files because they are
1557 * dirops and so hold an extra reference until the
1558 * segunlock clears them of that status.
1559 *
1560 * We don't need to check for IN_ADIROP because we know that
1561 * no dirops are active.
1562 *
1563 */
1564 mutex_enter(&lfs_lock);
1565 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
1566 nip = TAILQ_NEXT(ip, i_lfs_dchain);
1567 mutex_exit(&lfs_lock);
1568 vp = ITOV(ip);
1569 mutex_enter(vp->v_interlock);
1570
1571 KASSERT((ip->i_flag & IN_ADIROP) == 0);
1572 KASSERT(vp->v_uflag & VU_DIROP);
1573 KASSERT(vdead_check(vp, VDEAD_NOWAIT) == 0);
1574
1575 /*
1576 * All writes to directories come from dirops; all
1577 * writes to files' direct blocks go through the page
1578 * cache, which we're not touching. Reads to files
1579 * and/or directories will not be affected by writing
1580 * directory blocks inodes and file inodes. So we don't
1581 * really need to lock.
1582 */
1583 if (vdead_check(vp, VDEAD_NOWAIT) != 0) {
1584 mutex_exit(vp->v_interlock);
1585 mutex_enter(&lfs_lock);
1586 continue;
1587 }
1588 mutex_exit(vp->v_interlock);
1589 /* XXX see below
1590 * waslocked = VOP_ISLOCKED(vp);
1591 */
1592 if (vp->v_type != VREG &&
1593 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) {
1594 error = lfs_writefile(fs, sp, vp);
1595 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1596 !(ip->i_flag & IN_ALLMOD)) {
1597 mutex_enter(&lfs_lock);
1598 LFS_SET_UINO(ip, IN_MODIFIED);
1599 mutex_exit(&lfs_lock);
1600 }
1601 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1602 mutex_enter(&lfs_lock);
1603 error = EAGAIN;
1604 break;
1605 }
1606 }
1607 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1608 error = lfs_writeinode(fs, sp, ip);
1609 mutex_enter(&lfs_lock);
1610 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1611 error = EAGAIN;
1612 break;
1613 }
1614
1615 /*
1616 * We might need to update these inodes again,
1617 * for example, if they have data blocks to write.
1618 * Make sure that after this flush, they are still
1619 * marked IN_MODIFIED so that we don't forget to
1620 * write them.
1621 */
1622 /* XXX only for non-directories? --KS */
1623 LFS_SET_UINO(ip, IN_MODIFIED);
1624 }
1625 mutex_exit(&lfs_lock);
1626 /* We've written all the dirops there are */
1627 ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT);
1628 lfs_finalize_fs_seguse(fs);
1629 (void) lfs_writeseg(fs, sp);
1630 lfs_segunlock(fs);
1631
1632 return error;
1633 }
1634
1635 /*
1636 * Flush all vnodes for which the pagedaemon has requested pageouts.
1637 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop()
1638 * has just run, this would be an error). If we have to skip a vnode
1639 * for any reason, just skip it; if we have to wait for the cleaner,
1640 * abort. The writer daemon will call us again later.
1641 */
1642 int
1643 lfs_flush_pchain(struct lfs *fs)
1644 {
1645 struct inode *ip, *nip;
1646 struct vnode *vp;
1647 extern int lfs_dostats;
1648 struct segment *sp;
1649 int error, error2;
1650
1651 ASSERT_NO_SEGLOCK(fs);
1652
1653 if (fs->lfs_ronly)
1654 return EROFS;
1655
1656 mutex_enter(&lfs_lock);
1657 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) {
1658 mutex_exit(&lfs_lock);
1659 return 0;
1660 } else
1661 mutex_exit(&lfs_lock);
1662
1663 /* Get dirops out of the way */
1664 if ((error = lfs_flush_dirops(fs)) != 0)
1665 return error;
1666
1667 if (lfs_dostats)
1668 ++lfs_stats.flush_invoked;
1669
1670 /*
1671 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts.
1672 */
1673 lfs_imtime(fs);
1674 lfs_seglock(fs, 0);
1675 sp = fs->lfs_sp;
1676
1677 /*
1678 * lfs_writevnodes, optimized to clear pageout requests.
1679 * Only write non-dirop files that are in the pageout queue.
1680 * We're very conservative about what we write; we want to be
1681 * fast and async.
1682 */
1683 mutex_enter(&lfs_lock);
1684 top:
1685 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) {
1686 struct mount *mp = ITOV(ip)->v_mount;
1687 ino_t ino = ip->i_number;
1688
1689 nip = TAILQ_NEXT(ip, i_lfs_pchain);
1690
1691 if (!(ip->i_flags & IN_PAGING))
1692 goto top;
1693
1694 mutex_exit(&lfs_lock);
1695 if (vcache_get(mp, &ino, sizeof(ino), &vp) != 0) {
1696 mutex_enter(&lfs_lock);
1697 continue;
1698 };
1699 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1700 vrele(vp);
1701 mutex_enter(&lfs_lock);
1702 continue;
1703 }
1704 ip = VTOI(vp);
1705 mutex_enter(&lfs_lock);
1706 if ((vp->v_uflag & VU_DIROP) != 0 || vp->v_type != VREG ||
1707 !(ip->i_flags & IN_PAGING)) {
1708 mutex_exit(&lfs_lock);
1709 vput(vp);
1710 mutex_enter(&lfs_lock);
1711 goto top;
1712 }
1713 mutex_exit(&lfs_lock);
1714
1715 error = lfs_writefile(fs, sp, vp);
1716 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1717 !(ip->i_flag & IN_ALLMOD)) {
1718 mutex_enter(&lfs_lock);
1719 LFS_SET_UINO(ip, IN_MODIFIED);
1720 mutex_exit(&lfs_lock);
1721 }
1722 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1723 error2 = lfs_writeinode(fs, sp, ip);
1724
1725 VOP_UNLOCK(vp);
1726 vrele(vp);
1727
1728 if (error == EAGAIN || error2 == EAGAIN) {
1729 lfs_writeseg(fs, sp);
1730 mutex_enter(&lfs_lock);
1731 break;
1732 }
1733 mutex_enter(&lfs_lock);
1734 }
1735 mutex_exit(&lfs_lock);
1736 (void) lfs_writeseg(fs, sp);
1737 lfs_segunlock(fs);
1738
1739 return 0;
1740 }
1741
1742 /*
1743 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}.
1744 */
1745 int
1746 lfs_fcntl(void *v)
1747 {
1748 struct vop_fcntl_args /* {
1749 struct vnode *a_vp;
1750 u_int a_command;
1751 void * a_data;
1752 int a_fflag;
1753 kauth_cred_t a_cred;
1754 } */ *ap = v;
1755 struct timeval tv;
1756 struct timeval *tvp;
1757 BLOCK_INFO *blkiov;
1758 CLEANERINFO *cip;
1759 SEGUSE *sup;
1760 int blkcnt, error;
1761 size_t fh_size;
1762 struct lfs_fcntl_markv blkvp;
1763 struct lwp *l;
1764 fsid_t *fsidp;
1765 struct lfs *fs;
1766 struct buf *bp;
1767 fhandle_t *fhp;
1768 daddr_t off;
1769 int oclean;
1770
1771 /* Only respect LFS fcntls on fs root or Ifile */
1772 if (VTOI(ap->a_vp)->i_number != ULFS_ROOTINO &&
1773 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) {
1774 return ulfs_fcntl(v);
1775 }
1776
1777 /* Avoid locking a draining lock */
1778 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) {
1779 return ESHUTDOWN;
1780 }
1781
1782 /* LFS control and monitoring fcntls are available only to root */
1783 l = curlwp;
1784 if (((ap->a_command & 0xff00) >> 8) == 'L' &&
1785 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
1786 KAUTH_REQ_SYSTEM_LFS_FCNTL, NULL, NULL, NULL)) != 0)
1787 return (error);
1788
1789 fs = VTOI(ap->a_vp)->i_lfs;
1790 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx;
1791
1792 error = 0;
1793 switch ((int)ap->a_command) {
1794 case LFCNSEGWAITALL_COMPAT_50:
1795 case LFCNSEGWAITALL_COMPAT:
1796 fsidp = NULL;
1797 /* FALLSTHROUGH */
1798 case LFCNSEGWAIT_COMPAT_50:
1799 case LFCNSEGWAIT_COMPAT:
1800 {
1801 struct timeval50 *tvp50
1802 = (struct timeval50 *)ap->a_data;
1803 timeval50_to_timeval(tvp50, &tv);
1804 tvp = &tv;
1805 }
1806 goto segwait_common;
1807 case LFCNSEGWAITALL:
1808 fsidp = NULL;
1809 /* FALLSTHROUGH */
1810 case LFCNSEGWAIT:
1811 tvp = (struct timeval *)ap->a_data;
1812 segwait_common:
1813 mutex_enter(&lfs_lock);
1814 ++fs->lfs_sleepers;
1815 mutex_exit(&lfs_lock);
1816
1817 error = lfs_segwait(fsidp, tvp);
1818
1819 mutex_enter(&lfs_lock);
1820 if (--fs->lfs_sleepers == 0)
1821 wakeup(&fs->lfs_sleepers);
1822 mutex_exit(&lfs_lock);
1823 return error;
1824
1825 case LFCNBMAPV:
1826 case LFCNMARKV:
1827 blkvp = *(struct lfs_fcntl_markv *)ap->a_data;
1828
1829 blkcnt = blkvp.blkcnt;
1830 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
1831 return (EINVAL);
1832 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
1833 if ((error = copyin(blkvp.blkiov, blkiov,
1834 blkcnt * sizeof(BLOCK_INFO))) != 0) {
1835 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1836 return error;
1837 }
1838
1839 mutex_enter(&lfs_lock);
1840 ++fs->lfs_sleepers;
1841 mutex_exit(&lfs_lock);
1842 if (ap->a_command == LFCNBMAPV)
1843 error = lfs_bmapv(l->l_proc, fsidp, blkiov, blkcnt);
1844 else /* LFCNMARKV */
1845 error = lfs_markv(l->l_proc, fsidp, blkiov, blkcnt);
1846 if (error == 0)
1847 error = copyout(blkiov, blkvp.blkiov,
1848 blkcnt * sizeof(BLOCK_INFO));
1849 mutex_enter(&lfs_lock);
1850 if (--fs->lfs_sleepers == 0)
1851 wakeup(&fs->lfs_sleepers);
1852 mutex_exit(&lfs_lock);
1853 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1854 return error;
1855
1856 case LFCNRECLAIM:
1857 /*
1858 * Flush dirops and write Ifile, allowing empty segments
1859 * to be immediately reclaimed.
1860 */
1861 lfs_writer_enter(fs, "pndirop");
1862 off = lfs_sb_getoffset(fs);
1863 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP);
1864 lfs_flush_dirops(fs);
1865 LFS_CLEANERINFO(cip, fs, bp);
1866 oclean = cip->clean;
1867 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
1868 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP);
1869 fs->lfs_sp->seg_flags |= SEGM_PROT;
1870 lfs_segunlock(fs);
1871 lfs_writer_leave(fs);
1872
1873 #ifdef DEBUG
1874 LFS_CLEANERINFO(cip, fs, bp);
1875 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64
1876 " blocks, cleaned %" PRId32 " segments (activesb %d)\n",
1877 lfs_sb_getoffset(fs) - off, cip->clean - oclean,
1878 fs->lfs_activesb));
1879 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
1880 #else
1881 __USE(oclean);
1882 __USE(off);
1883 #endif
1884
1885 return 0;
1886
1887 case LFCNIFILEFH_COMPAT:
1888 /* Return the filehandle of the Ifile */
1889 if ((error = kauth_authorize_system(l->l_cred,
1890 KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)) != 0)
1891 return (error);
1892 fhp = (struct fhandle *)ap->a_data;
1893 fhp->fh_fsid = *fsidp;
1894 fh_size = 16; /* former VFS_MAXFIDSIZ */
1895 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
1896
1897 case LFCNIFILEFH_COMPAT2:
1898 case LFCNIFILEFH:
1899 /* Return the filehandle of the Ifile */
1900 fhp = (struct fhandle *)ap->a_data;
1901 fhp->fh_fsid = *fsidp;
1902 fh_size = sizeof(struct lfs_fhandle) -
1903 offsetof(fhandle_t, fh_fid);
1904 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
1905
1906 case LFCNREWIND:
1907 /* Move lfs_offset to the lowest-numbered segment */
1908 return lfs_rewind(fs, *(int *)ap->a_data);
1909
1910 case LFCNINVAL:
1911 /* Mark a segment SEGUSE_INVAL */
1912 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp);
1913 if (sup->su_nbytes > 0) {
1914 brelse(bp, 0);
1915 lfs_unset_inval_all(fs);
1916 return EBUSY;
1917 }
1918 sup->su_flags |= SEGUSE_INVAL;
1919 VOP_BWRITE(bp->b_vp, bp);
1920 return 0;
1921
1922 case LFCNRESIZE:
1923 /* Resize the filesystem */
1924 return lfs_resize_fs(fs, *(int *)ap->a_data);
1925
1926 case LFCNWRAPSTOP:
1927 case LFCNWRAPSTOP_COMPAT:
1928 /*
1929 * Hold lfs_newseg at segment 0; if requested, sleep until
1930 * the filesystem wraps around. To support external agents
1931 * (dump, fsck-based regression test) that need to look at
1932 * a snapshot of the filesystem, without necessarily
1933 * requiring that all fs activity stops.
1934 */
1935 if (fs->lfs_stoplwp == curlwp)
1936 return EALREADY;
1937
1938 mutex_enter(&lfs_lock);
1939 while (fs->lfs_stoplwp != NULL)
1940 cv_wait(&fs->lfs_stopcv, &lfs_lock);
1941 fs->lfs_stoplwp = curlwp;
1942 if (fs->lfs_nowrap == 0)
1943 log(LOG_NOTICE, "%s: disabled log wrap\n",
1944 lfs_sb_getfsmnt(fs));
1945 ++fs->lfs_nowrap;
1946 if (*(int *)ap->a_data == 1
1947 || ap->a_command == LFCNWRAPSTOP_COMPAT) {
1948 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n");
1949 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
1950 "segwrap", 0, &lfs_lock);
1951 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n");
1952 if (error) {
1953 lfs_wrapgo(fs, VTOI(ap->a_vp), 0);
1954 }
1955 }
1956 mutex_exit(&lfs_lock);
1957 return 0;
1958
1959 case LFCNWRAPGO:
1960 case LFCNWRAPGO_COMPAT:
1961 /*
1962 * Having done its work, the agent wakes up the writer.
1963 * If the argument is 1, it sleeps until a new segment
1964 * is selected.
1965 */
1966 mutex_enter(&lfs_lock);
1967 error = lfs_wrapgo(fs, VTOI(ap->a_vp),
1968 ap->a_command == LFCNWRAPGO_COMPAT ? 1 :
1969 *((int *)ap->a_data));
1970 mutex_exit(&lfs_lock);
1971 return error;
1972
1973 case LFCNWRAPPASS:
1974 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT))
1975 return EALREADY;
1976 mutex_enter(&lfs_lock);
1977 if (fs->lfs_stoplwp != curlwp) {
1978 mutex_exit(&lfs_lock);
1979 return EALREADY;
1980 }
1981 if (fs->lfs_nowrap == 0) {
1982 mutex_exit(&lfs_lock);
1983 return EBUSY;
1984 }
1985 fs->lfs_wrappass = 1;
1986 wakeup(&fs->lfs_wrappass);
1987 /* Wait for the log to wrap, if asked */
1988 if (*(int *)ap->a_data) {
1989 vref(ap->a_vp);
1990 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT;
1991 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n");
1992 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
1993 "segwrap", 0, &lfs_lock);
1994 log(LOG_NOTICE, "LFCNPASS done waiting\n");
1995 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT;
1996 vrele(ap->a_vp);
1997 }
1998 mutex_exit(&lfs_lock);
1999 return error;
2000
2001 case LFCNWRAPSTATUS:
2002 mutex_enter(&lfs_lock);
2003 *(int *)ap->a_data = fs->lfs_wrapstatus;
2004 mutex_exit(&lfs_lock);
2005 return 0;
2006
2007 default:
2008 return ulfs_fcntl(v);
2009 }
2010 return 0;
2011 }
2012
2013 /*
2014 * Return the last logical file offset that should be written for this file
2015 * if we're doing a write that ends at "size". If writing, we need to know
2016 * about sizes on disk, i.e. fragments if there are any; if reading, we need
2017 * to know about entire blocks.
2018 */
2019 void
2020 lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
2021 {
2022 struct inode *ip = VTOI(vp);
2023 struct lfs *fs = ip->i_lfs;
2024 daddr_t olbn, nlbn;
2025
2026 olbn = lfs_lblkno(fs, ip->i_size);
2027 nlbn = lfs_lblkno(fs, size);
2028 if (!(flags & GOP_SIZE_MEM) && nlbn < ULFS_NDADDR && olbn <= nlbn) {
2029 *eobp = lfs_fragroundup(fs, size);
2030 } else {
2031 *eobp = lfs_blkroundup(fs, size);
2032 }
2033 }
2034
2035 #ifdef DEBUG
2036 void lfs_dump_vop(void *);
2037
2038 void
2039 lfs_dump_vop(void *v)
2040 {
2041 struct vop_putpages_args /* {
2042 struct vnode *a_vp;
2043 voff_t a_offlo;
2044 voff_t a_offhi;
2045 int a_flags;
2046 } */ *ap = v;
2047
2048 #ifdef DDB
2049 vfs_vnode_print(ap->a_vp, 0, printf);
2050 #endif
2051 lfs_dump_dinode(VTOI(ap->a_vp)->i_din.ffs1_din);
2052 }
2053 #endif
2054
2055 int
2056 lfs_mmap(void *v)
2057 {
2058 struct vop_mmap_args /* {
2059 const struct vnodeop_desc *a_desc;
2060 struct vnode *a_vp;
2061 vm_prot_t a_prot;
2062 kauth_cred_t a_cred;
2063 } */ *ap = v;
2064
2065 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM)
2066 return EOPNOTSUPP;
2067 return ulfs_mmap(v);
2068 }
2069
2070 static int
2071 lfs_openextattr(void *v)
2072 {
2073 struct vop_openextattr_args /* {
2074 struct vnode *a_vp;
2075 kauth_cred_t a_cred;
2076 struct proc *a_p;
2077 } */ *ap = v;
2078 struct inode *ip = VTOI(ap->a_vp);
2079 struct ulfsmount *ump = ip->i_ump;
2080 //struct lfs *fs = ip->i_lfs;
2081
2082 /* Not supported for ULFS1 file systems. */
2083 if (ump->um_fstype == ULFS1)
2084 return (EOPNOTSUPP);
2085
2086 /* XXX Not implemented for ULFS2 file systems. */
2087 return (EOPNOTSUPP);
2088 }
2089
2090 static int
2091 lfs_closeextattr(void *v)
2092 {
2093 struct vop_closeextattr_args /* {
2094 struct vnode *a_vp;
2095 int a_commit;
2096 kauth_cred_t a_cred;
2097 struct proc *a_p;
2098 } */ *ap = v;
2099 struct inode *ip = VTOI(ap->a_vp);
2100 struct ulfsmount *ump = ip->i_ump;
2101 //struct lfs *fs = ip->i_lfs;
2102
2103 /* Not supported for ULFS1 file systems. */
2104 if (ump->um_fstype == ULFS1)
2105 return (EOPNOTSUPP);
2106
2107 /* XXX Not implemented for ULFS2 file systems. */
2108 return (EOPNOTSUPP);
2109 }
2110
2111 static int
2112 lfs_getextattr(void *v)
2113 {
2114 struct vop_getextattr_args /* {
2115 struct vnode *a_vp;
2116 int a_attrnamespace;
2117 const char *a_name;
2118 struct uio *a_uio;
2119 size_t *a_size;
2120 kauth_cred_t a_cred;
2121 struct proc *a_p;
2122 } */ *ap = v;
2123 struct vnode *vp = ap->a_vp;
2124 struct inode *ip = VTOI(vp);
2125 struct ulfsmount *ump = ip->i_ump;
2126 //struct lfs *fs = ip->i_lfs;
2127 int error;
2128
2129 if (ump->um_fstype == ULFS1) {
2130 #ifdef LFS_EXTATTR
2131 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2132 error = ulfs_getextattr(ap);
2133 fstrans_done(vp->v_mount);
2134 #else
2135 error = EOPNOTSUPP;
2136 #endif
2137 return error;
2138 }
2139
2140 /* XXX Not implemented for ULFS2 file systems. */
2141 return (EOPNOTSUPP);
2142 }
2143
2144 static int
2145 lfs_setextattr(void *v)
2146 {
2147 struct vop_setextattr_args /* {
2148 struct vnode *a_vp;
2149 int a_attrnamespace;
2150 const char *a_name;
2151 struct uio *a_uio;
2152 kauth_cred_t a_cred;
2153 struct proc *a_p;
2154 } */ *ap = v;
2155 struct vnode *vp = ap->a_vp;
2156 struct inode *ip = VTOI(vp);
2157 struct ulfsmount *ump = ip->i_ump;
2158 //struct lfs *fs = ip->i_lfs;
2159 int error;
2160
2161 if (ump->um_fstype == ULFS1) {
2162 #ifdef LFS_EXTATTR
2163 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2164 error = ulfs_setextattr(ap);
2165 fstrans_done(vp->v_mount);
2166 #else
2167 error = EOPNOTSUPP;
2168 #endif
2169 return error;
2170 }
2171
2172 /* XXX Not implemented for ULFS2 file systems. */
2173 return (EOPNOTSUPP);
2174 }
2175
2176 static int
2177 lfs_listextattr(void *v)
2178 {
2179 struct vop_listextattr_args /* {
2180 struct vnode *a_vp;
2181 int a_attrnamespace;
2182 struct uio *a_uio;
2183 size_t *a_size;
2184 kauth_cred_t a_cred;
2185 struct proc *a_p;
2186 } */ *ap = v;
2187 struct vnode *vp = ap->a_vp;
2188 struct inode *ip = VTOI(vp);
2189 struct ulfsmount *ump = ip->i_ump;
2190 //struct lfs *fs = ip->i_lfs;
2191 int error;
2192
2193 if (ump->um_fstype == ULFS1) {
2194 #ifdef LFS_EXTATTR
2195 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2196 error = ulfs_listextattr(ap);
2197 fstrans_done(vp->v_mount);
2198 #else
2199 error = EOPNOTSUPP;
2200 #endif
2201 return error;
2202 }
2203
2204 /* XXX Not implemented for ULFS2 file systems. */
2205 return (EOPNOTSUPP);
2206 }
2207
2208 static int
2209 lfs_deleteextattr(void *v)
2210 {
2211 struct vop_deleteextattr_args /* {
2212 struct vnode *a_vp;
2213 int a_attrnamespace;
2214 kauth_cred_t a_cred;
2215 struct proc *a_p;
2216 } */ *ap = v;
2217 struct vnode *vp = ap->a_vp;
2218 struct inode *ip = VTOI(vp);
2219 struct ulfsmount *ump = ip->i_ump;
2220 //struct fs *fs = ip->i_lfs;
2221 int error;
2222
2223 if (ump->um_fstype == ULFS1) {
2224 #ifdef LFS_EXTATTR
2225 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2226 error = ulfs_deleteextattr(ap);
2227 fstrans_done(vp->v_mount);
2228 #else
2229 error = EOPNOTSUPP;
2230 #endif
2231 return error;
2232 }
2233
2234 /* XXX Not implemented for ULFS2 file systems. */
2235 return (EOPNOTSUPP);
2236 }
2237