lfs_vnops.c revision 1.286 1 /* $NetBSD: lfs_vnops.c,v 1.286 2015/08/12 18:28:01 dholland Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31 /*
32 * Copyright (c) 1986, 1989, 1991, 1993, 1995
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95
60 */
61
62 /* from NetBSD: ufs_vnops.c,v 1.213 2013/06/08 05:47:02 kardel Exp */
63 /*-
64 * Copyright (c) 2008 The NetBSD Foundation, Inc.
65 * All rights reserved.
66 *
67 * This code is derived from software contributed to The NetBSD Foundation
68 * by Wasabi Systems, Inc.
69 *
70 * Redistribution and use in source and binary forms, with or without
71 * modification, are permitted provided that the following conditions
72 * are met:
73 * 1. Redistributions of source code must retain the above copyright
74 * notice, this list of conditions and the following disclaimer.
75 * 2. Redistributions in binary form must reproduce the above copyright
76 * notice, this list of conditions and the following disclaimer in the
77 * documentation and/or other materials provided with the distribution.
78 *
79 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
80 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
81 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
82 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
83 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
84 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
85 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
86 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
87 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
88 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
89 * POSSIBILITY OF SUCH DAMAGE.
90 */
91 /*
92 * Copyright (c) 1982, 1986, 1989, 1993, 1995
93 * The Regents of the University of California. All rights reserved.
94 * (c) UNIX System Laboratories, Inc.
95 * All or some portions of this file are derived from material licensed
96 * to the University of California by American Telephone and Telegraph
97 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
98 * the permission of UNIX System Laboratories, Inc.
99 *
100 * Redistribution and use in source and binary forms, with or without
101 * modification, are permitted provided that the following conditions
102 * are met:
103 * 1. Redistributions of source code must retain the above copyright
104 * notice, this list of conditions and the following disclaimer.
105 * 2. Redistributions in binary form must reproduce the above copyright
106 * notice, this list of conditions and the following disclaimer in the
107 * documentation and/or other materials provided with the distribution.
108 * 3. Neither the name of the University nor the names of its contributors
109 * may be used to endorse or promote products derived from this software
110 * without specific prior written permission.
111 *
112 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
113 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
114 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
115 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
116 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
117 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
118 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
119 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
120 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
121 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
122 * SUCH DAMAGE.
123 *
124 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95
125 */
126
127 #include <sys/cdefs.h>
128 __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.286 2015/08/12 18:28:01 dholland Exp $");
129
130 #ifdef _KERNEL_OPT
131 #include "opt_compat_netbsd.h"
132 #include "opt_uvm_page_trkown.h"
133 #endif
134
135 #include <sys/param.h>
136 #include <sys/systm.h>
137 #include <sys/namei.h>
138 #include <sys/resourcevar.h>
139 #include <sys/kernel.h>
140 #include <sys/file.h>
141 #include <sys/stat.h>
142 #include <sys/buf.h>
143 #include <sys/proc.h>
144 #include <sys/mount.h>
145 #include <sys/vnode.h>
146 #include <sys/pool.h>
147 #include <sys/signalvar.h>
148 #include <sys/kauth.h>
149 #include <sys/syslog.h>
150 #include <sys/fstrans.h>
151
152 #include <miscfs/fifofs/fifo.h>
153 #include <miscfs/genfs/genfs.h>
154 #include <miscfs/specfs/specdev.h>
155
156 #include <ufs/lfs/ulfs_inode.h>
157 #include <ufs/lfs/ulfsmount.h>
158 #include <ufs/lfs/ulfs_bswap.h>
159 #include <ufs/lfs/ulfs_extern.h>
160
161 #include <uvm/uvm.h>
162 #include <uvm/uvm_pmap.h>
163 #include <uvm/uvm_stat.h>
164 #include <uvm/uvm_pager.h>
165
166 #include <ufs/lfs/lfs.h>
167 #include <ufs/lfs/lfs_accessors.h>
168 #include <ufs/lfs/lfs_kernel.h>
169 #include <ufs/lfs/lfs_extern.h>
170
171 extern pid_t lfs_writer_daemon;
172 int lfs_ignore_lazy_sync = 1;
173
174 static int lfs_openextattr(void *v);
175 static int lfs_closeextattr(void *v);
176 static int lfs_getextattr(void *v);
177 static int lfs_setextattr(void *v);
178 static int lfs_listextattr(void *v);
179 static int lfs_deleteextattr(void *v);
180
181 /*
182 * A virgin directory (no blushing please).
183 */
184 static const struct lfs_dirtemplate mastertemplate = {
185 0, 12, LFS_DT_DIR, 1, ".",
186 0, LFS_DIRBLKSIZ - 12, LFS_DT_DIR, 2, ".."
187 };
188
189 /* Global vfs data structures for lfs. */
190 int (**lfs_vnodeop_p)(void *);
191 const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
192 { &vop_default_desc, vn_default_error },
193 { &vop_lookup_desc, ulfs_lookup }, /* lookup */
194 { &vop_create_desc, lfs_create }, /* create */
195 { &vop_whiteout_desc, ulfs_whiteout }, /* whiteout */
196 { &vop_mknod_desc, lfs_mknod }, /* mknod */
197 { &vop_open_desc, ulfs_open }, /* open */
198 { &vop_close_desc, lfs_close }, /* close */
199 { &vop_access_desc, ulfs_access }, /* access */
200 { &vop_getattr_desc, lfs_getattr }, /* getattr */
201 { &vop_setattr_desc, lfs_setattr }, /* setattr */
202 { &vop_read_desc, lfs_read }, /* read */
203 { &vop_write_desc, lfs_write }, /* write */
204 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */
205 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */
206 { &vop_ioctl_desc, ulfs_ioctl }, /* ioctl */
207 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */
208 { &vop_poll_desc, ulfs_poll }, /* poll */
209 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */
210 { &vop_revoke_desc, ulfs_revoke }, /* revoke */
211 { &vop_mmap_desc, lfs_mmap }, /* mmap */
212 { &vop_fsync_desc, lfs_fsync }, /* fsync */
213 { &vop_seek_desc, ulfs_seek }, /* seek */
214 { &vop_remove_desc, lfs_remove }, /* remove */
215 { &vop_link_desc, lfs_link }, /* link */
216 { &vop_rename_desc, lfs_rename }, /* rename */
217 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */
218 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */
219 { &vop_symlink_desc, lfs_symlink }, /* symlink */
220 { &vop_readdir_desc, ulfs_readdir }, /* readdir */
221 { &vop_readlink_desc, ulfs_readlink }, /* readlink */
222 { &vop_abortop_desc, ulfs_abortop }, /* abortop */
223 { &vop_inactive_desc, lfs_inactive }, /* inactive */
224 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
225 { &vop_lock_desc, ulfs_lock }, /* lock */
226 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
227 { &vop_bmap_desc, ulfs_bmap }, /* bmap */
228 { &vop_strategy_desc, lfs_strategy }, /* strategy */
229 { &vop_print_desc, ulfs_print }, /* print */
230 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
231 { &vop_pathconf_desc, ulfs_pathconf }, /* pathconf */
232 { &vop_advlock_desc, ulfs_advlock }, /* advlock */
233 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
234 { &vop_getpages_desc, lfs_getpages }, /* getpages */
235 { &vop_putpages_desc, lfs_putpages }, /* putpages */
236 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
237 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
238 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
239 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
240 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
241 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
242 { NULL, NULL }
243 };
244 const struct vnodeopv_desc lfs_vnodeop_opv_desc =
245 { &lfs_vnodeop_p, lfs_vnodeop_entries };
246
247 int (**lfs_specop_p)(void *);
248 const struct vnodeopv_entry_desc lfs_specop_entries[] = {
249 { &vop_default_desc, vn_default_error },
250 { &vop_lookup_desc, spec_lookup }, /* lookup */
251 { &vop_create_desc, spec_create }, /* create */
252 { &vop_mknod_desc, spec_mknod }, /* mknod */
253 { &vop_open_desc, spec_open }, /* open */
254 { &vop_close_desc, lfsspec_close }, /* close */
255 { &vop_access_desc, ulfs_access }, /* access */
256 { &vop_getattr_desc, lfs_getattr }, /* getattr */
257 { &vop_setattr_desc, lfs_setattr }, /* setattr */
258 { &vop_read_desc, ulfsspec_read }, /* read */
259 { &vop_write_desc, ulfsspec_write }, /* write */
260 { &vop_fallocate_desc, spec_fallocate }, /* fallocate */
261 { &vop_fdiscard_desc, spec_fdiscard }, /* fdiscard */
262 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
263 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
264 { &vop_poll_desc, spec_poll }, /* poll */
265 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */
266 { &vop_revoke_desc, spec_revoke }, /* revoke */
267 { &vop_mmap_desc, spec_mmap }, /* mmap */
268 { &vop_fsync_desc, spec_fsync }, /* fsync */
269 { &vop_seek_desc, spec_seek }, /* seek */
270 { &vop_remove_desc, spec_remove }, /* remove */
271 { &vop_link_desc, spec_link }, /* link */
272 { &vop_rename_desc, spec_rename }, /* rename */
273 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
274 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
275 { &vop_symlink_desc, spec_symlink }, /* symlink */
276 { &vop_readdir_desc, spec_readdir }, /* readdir */
277 { &vop_readlink_desc, spec_readlink }, /* readlink */
278 { &vop_abortop_desc, spec_abortop }, /* abortop */
279 { &vop_inactive_desc, lfs_inactive }, /* inactive */
280 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
281 { &vop_lock_desc, ulfs_lock }, /* lock */
282 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
283 { &vop_bmap_desc, spec_bmap }, /* bmap */
284 { &vop_strategy_desc, spec_strategy }, /* strategy */
285 { &vop_print_desc, ulfs_print }, /* print */
286 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
287 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
288 { &vop_advlock_desc, spec_advlock }, /* advlock */
289 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */
290 { &vop_getpages_desc, spec_getpages }, /* getpages */
291 { &vop_putpages_desc, spec_putpages }, /* putpages */
292 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
293 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
294 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
295 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
296 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
297 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
298 { NULL, NULL }
299 };
300 const struct vnodeopv_desc lfs_specop_opv_desc =
301 { &lfs_specop_p, lfs_specop_entries };
302
303 int (**lfs_fifoop_p)(void *);
304 const struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
305 { &vop_default_desc, vn_default_error },
306 { &vop_lookup_desc, vn_fifo_bypass }, /* lookup */
307 { &vop_create_desc, vn_fifo_bypass }, /* create */
308 { &vop_mknod_desc, vn_fifo_bypass }, /* mknod */
309 { &vop_open_desc, vn_fifo_bypass }, /* open */
310 { &vop_close_desc, lfsfifo_close }, /* close */
311 { &vop_access_desc, ulfs_access }, /* access */
312 { &vop_getattr_desc, lfs_getattr }, /* getattr */
313 { &vop_setattr_desc, lfs_setattr }, /* setattr */
314 { &vop_read_desc, ulfsfifo_read }, /* read */
315 { &vop_write_desc, ulfsfifo_write }, /* write */
316 { &vop_fallocate_desc, vn_fifo_bypass }, /* fallocate */
317 { &vop_fdiscard_desc, vn_fifo_bypass }, /* fdiscard */
318 { &vop_ioctl_desc, vn_fifo_bypass }, /* ioctl */
319 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
320 { &vop_poll_desc, vn_fifo_bypass }, /* poll */
321 { &vop_kqfilter_desc, vn_fifo_bypass }, /* kqfilter */
322 { &vop_revoke_desc, vn_fifo_bypass }, /* revoke */
323 { &vop_mmap_desc, vn_fifo_bypass }, /* mmap */
324 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */
325 { &vop_seek_desc, vn_fifo_bypass }, /* seek */
326 { &vop_remove_desc, vn_fifo_bypass }, /* remove */
327 { &vop_link_desc, vn_fifo_bypass }, /* link */
328 { &vop_rename_desc, vn_fifo_bypass }, /* rename */
329 { &vop_mkdir_desc, vn_fifo_bypass }, /* mkdir */
330 { &vop_rmdir_desc, vn_fifo_bypass }, /* rmdir */
331 { &vop_symlink_desc, vn_fifo_bypass }, /* symlink */
332 { &vop_readdir_desc, vn_fifo_bypass }, /* readdir */
333 { &vop_readlink_desc, vn_fifo_bypass }, /* readlink */
334 { &vop_abortop_desc, vn_fifo_bypass }, /* abortop */
335 { &vop_inactive_desc, lfs_inactive }, /* inactive */
336 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
337 { &vop_lock_desc, ulfs_lock }, /* lock */
338 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
339 { &vop_bmap_desc, vn_fifo_bypass }, /* bmap */
340 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */
341 { &vop_print_desc, ulfs_print }, /* print */
342 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
343 { &vop_pathconf_desc, vn_fifo_bypass }, /* pathconf */
344 { &vop_advlock_desc, vn_fifo_bypass }, /* advlock */
345 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
346 { &vop_putpages_desc, vn_fifo_bypass }, /* putpages */
347 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
348 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
349 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
350 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
351 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
352 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
353 { NULL, NULL }
354 };
355 const struct vnodeopv_desc lfs_fifoop_opv_desc =
356 { &lfs_fifoop_p, lfs_fifoop_entries };
357
358 #define LFS_READWRITE
359 #include <ufs/lfs/ulfs_readwrite.c>
360 #undef LFS_READWRITE
361
362 /*
363 * Synch an open file.
364 */
365 /* ARGSUSED */
366 int
367 lfs_fsync(void *v)
368 {
369 struct vop_fsync_args /* {
370 struct vnode *a_vp;
371 kauth_cred_t a_cred;
372 int a_flags;
373 off_t offlo;
374 off_t offhi;
375 } */ *ap = v;
376 struct vnode *vp = ap->a_vp;
377 int error, wait;
378 struct inode *ip = VTOI(vp);
379 struct lfs *fs = ip->i_lfs;
380
381 /* If we're mounted read-only, don't try to sync. */
382 if (fs->lfs_ronly)
383 return 0;
384
385 /* If a removed vnode is being cleaned, no need to sync here. */
386 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0)
387 return 0;
388
389 /*
390 * Trickle sync simply adds this vnode to the pager list, as if
391 * the pagedaemon had requested a pageout.
392 */
393 if (ap->a_flags & FSYNC_LAZY) {
394 if (lfs_ignore_lazy_sync == 0) {
395 mutex_enter(&lfs_lock);
396 if (!(ip->i_flags & IN_PAGING)) {
397 ip->i_flags |= IN_PAGING;
398 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip,
399 i_lfs_pchain);
400 }
401 wakeup(&lfs_writer_daemon);
402 mutex_exit(&lfs_lock);
403 }
404 return 0;
405 }
406
407 /*
408 * If a vnode is bring cleaned, flush it out before we try to
409 * reuse it. This prevents the cleaner from writing files twice
410 * in the same partial segment, causing an accounting underflow.
411 */
412 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) {
413 lfs_vflush(vp);
414 }
415
416 wait = (ap->a_flags & FSYNC_WAIT);
417 do {
418 mutex_enter(vp->v_interlock);
419 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
420 round_page(ap->a_offhi),
421 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0));
422 if (error == EAGAIN) {
423 mutex_enter(&lfs_lock);
424 mtsleep(&fs->lfs_availsleep, PCATCH | PUSER,
425 "lfs_fsync", hz / 100 + 1, &lfs_lock);
426 mutex_exit(&lfs_lock);
427 }
428 } while (error == EAGAIN);
429 if (error)
430 return error;
431
432 if ((ap->a_flags & FSYNC_DATAONLY) == 0)
433 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0);
434
435 if (error == 0 && ap->a_flags & FSYNC_CACHE) {
436 int l = 0;
437 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE,
438 curlwp->l_cred);
439 }
440 if (wait && !VPISEMPTY(vp))
441 LFS_SET_UINO(ip, IN_MODIFIED);
442
443 return error;
444 }
445
446 /*
447 * Take IN_ADIROP off, then call ulfs_inactive.
448 */
449 int
450 lfs_inactive(void *v)
451 {
452 struct vop_inactive_args /* {
453 struct vnode *a_vp;
454 } */ *ap = v;
455
456 lfs_unmark_vnode(ap->a_vp);
457
458 /*
459 * The Ifile is only ever inactivated on unmount.
460 * Streamline this process by not giving it more dirty blocks.
461 */
462 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) {
463 mutex_enter(&lfs_lock);
464 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD);
465 mutex_exit(&lfs_lock);
466 VOP_UNLOCK(ap->a_vp);
467 return 0;
468 }
469
470 #ifdef DEBUG
471 /*
472 * This might happen on unmount.
473 * XXX If it happens at any other time, it should be a panic.
474 */
475 if (ap->a_vp->v_uflag & VU_DIROP) {
476 struct inode *ip = VTOI(ap->a_vp);
477 printf("lfs_inactive: inactivating VU_DIROP? ino = %d\n", (int)ip->i_number);
478 }
479 #endif /* DIAGNOSTIC */
480
481 return ulfs_inactive(v);
482 }
483
484 int
485 lfs_set_dirop(struct vnode *dvp, struct vnode *vp)
486 {
487 struct lfs *fs;
488 int error;
489
490 KASSERT(VOP_ISLOCKED(dvp));
491 KASSERT(vp == NULL || VOP_ISLOCKED(vp));
492
493 fs = VTOI(dvp)->i_lfs;
494
495 ASSERT_NO_SEGLOCK(fs);
496 /*
497 * LFS_NRESERVE calculates direct and indirect blocks as well
498 * as an inode block; an overestimate in most cases.
499 */
500 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0)
501 return (error);
502
503 restart:
504 mutex_enter(&lfs_lock);
505 if (fs->lfs_dirops == 0) {
506 mutex_exit(&lfs_lock);
507 lfs_check(dvp, LFS_UNUSED_LBN, 0);
508 mutex_enter(&lfs_lock);
509 }
510 while (fs->lfs_writer) {
511 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH,
512 "lfs_sdirop", 0, &lfs_lock);
513 if (error == EINTR) {
514 mutex_exit(&lfs_lock);
515 goto unreserve;
516 }
517 }
518 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) {
519 wakeup(&lfs_writer_daemon);
520 mutex_exit(&lfs_lock);
521 preempt();
522 goto restart;
523 }
524
525 if (lfs_dirvcount > LFS_MAX_DIROP) {
526 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, "
527 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount));
528 if ((error = mtsleep(&lfs_dirvcount,
529 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0,
530 &lfs_lock)) != 0) {
531 goto unreserve;
532 }
533 goto restart;
534 }
535
536 ++fs->lfs_dirops;
537 /* fs->lfs_doifile = 1; */ /* XXX why? --ks */
538 mutex_exit(&lfs_lock);
539
540 /* Hold a reference so SET_ENDOP will be happy */
541 vref(dvp);
542 if (vp) {
543 vref(vp);
544 MARK_VNODE(vp);
545 }
546
547 MARK_VNODE(dvp);
548 return 0;
549
550 unreserve:
551 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs));
552 return error;
553 }
554
555 /*
556 * Opposite of lfs_set_dirop... mostly. For now at least must call
557 * UNMARK_VNODE(dvp) explicitly first. (XXX: clean that up)
558 */
559 void
560 lfs_unset_dirop(struct lfs *fs, struct vnode *dvp, const char *str)
561 {
562 mutex_enter(&lfs_lock);
563 --fs->lfs_dirops;
564 if (!fs->lfs_dirops) {
565 if (fs->lfs_nadirop) {
566 panic("lfs_unset_dirop: %s: no dirops but "
567 " nadirop=%d", str,
568 fs->lfs_nadirop);
569 }
570 wakeup(&fs->lfs_writer);
571 mutex_exit(&lfs_lock);
572 lfs_check(dvp, LFS_UNUSED_LBN, 0);
573 } else {
574 mutex_exit(&lfs_lock);
575 }
576 lfs_reserve(fs, dvp, NULL, -LFS_NRESERVE(fs));
577 }
578
579 void
580 lfs_mark_vnode(struct vnode *vp)
581 {
582 struct inode *ip = VTOI(vp);
583 struct lfs *fs = ip->i_lfs;
584
585 mutex_enter(&lfs_lock);
586 if (!(ip->i_flag & IN_ADIROP)) {
587 if (!(vp->v_uflag & VU_DIROP)) {
588 mutex_exit(&lfs_lock);
589 vref(vp);
590 mutex_enter(&lfs_lock);
591 ++lfs_dirvcount;
592 ++fs->lfs_dirvcount;
593 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain);
594 vp->v_uflag |= VU_DIROP;
595 }
596 ++fs->lfs_nadirop;
597 ip->i_flag &= ~IN_CDIROP;
598 ip->i_flag |= IN_ADIROP;
599 } else
600 KASSERT(vp->v_uflag & VU_DIROP);
601 mutex_exit(&lfs_lock);
602 }
603
604 void
605 lfs_unmark_vnode(struct vnode *vp)
606 {
607 struct inode *ip = VTOI(vp);
608
609 mutex_enter(&lfs_lock);
610 if (ip && (ip->i_flag & IN_ADIROP)) {
611 KASSERT(vp->v_uflag & VU_DIROP);
612 --ip->i_lfs->lfs_nadirop;
613 ip->i_flag &= ~IN_ADIROP;
614 }
615 mutex_exit(&lfs_lock);
616 }
617
618 int
619 lfs_symlink(void *v)
620 {
621 struct vop_symlink_v3_args /* {
622 struct vnode *a_dvp;
623 struct vnode **a_vpp;
624 struct componentname *a_cnp;
625 struct vattr *a_vap;
626 char *a_target;
627 } */ *ap = v;
628 struct lfs *fs;
629 struct vnode *dvp, **vpp;
630 struct inode *ip;
631 struct ulfs_lookup_results *ulr;
632 ssize_t len; /* XXX should be size_t */
633 int error;
634
635 dvp = ap->a_dvp;
636 vpp = ap->a_vpp;
637
638 KASSERT(vpp != NULL);
639 KASSERT(*vpp == NULL);
640 KASSERT(ap->a_vap->va_type == VLNK);
641
642 /* XXX should handle this material another way */
643 ulr = &VTOI(ap->a_dvp)->i_crap;
644 ULFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
645
646 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
647 ASSERT_NO_SEGLOCK(fs);
648 if (fs->lfs_ronly) {
649 return EROFS;
650 }
651
652 error = lfs_set_dirop(dvp, NULL);
653 if (error)
654 return error;
655
656 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
657 error = ulfs_makeinode(ap->a_vap, dvp, ulr, vpp, ap->a_cnp);
658 if (error) {
659 goto out;
660 }
661
662 VN_KNOTE(ap->a_dvp, NOTE_WRITE);
663 ip = VTOI(*vpp);
664
665 len = strlen(ap->a_target);
666 if (len < ip->i_lfs->um_maxsymlinklen) {
667 memcpy((char *)SHORTLINK(ip), ap->a_target, len);
668 ip->i_size = len;
669 DIP_ASSIGN(ip, size, len);
670 uvm_vnp_setsize(*vpp, ip->i_size);
671 ip->i_flag |= IN_CHANGE | IN_UPDATE;
672 if ((*vpp)->v_mount->mnt_flag & MNT_RELATIME)
673 ip->i_flag |= IN_ACCESS;
674 } else {
675 error = ulfs_bufio(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0,
676 IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL,
677 NULL);
678 }
679
680 VOP_UNLOCK(*vpp);
681 if (error)
682 vrele(*vpp);
683
684 out:
685 fstrans_done(dvp->v_mount);
686
687 UNMARK_VNODE(dvp);
688 /* XXX: is it even possible for the symlink to get MARK'd? */
689 UNMARK_VNODE(*vpp);
690 if (error) {
691 *vpp = NULL;
692 }
693 lfs_unset_dirop(fs, dvp, "symlink");
694
695 vrele(dvp);
696 return (error);
697 }
698
699 int
700 lfs_mknod(void *v)
701 {
702 struct vop_mknod_v3_args /* {
703 struct vnode *a_dvp;
704 struct vnode **a_vpp;
705 struct componentname *a_cnp;
706 struct vattr *a_vap;
707 } */ *ap = v;
708 struct lfs *fs;
709 struct vnode *dvp, **vpp;
710 struct vattr *vap;
711 struct inode *ip;
712 int error;
713 ino_t ino;
714 struct ulfs_lookup_results *ulr;
715
716 dvp = ap->a_dvp;
717 vpp = ap->a_vpp;
718 vap = ap->a_vap;
719
720 KASSERT(vpp != NULL);
721 KASSERT(*vpp == NULL);
722
723 /* XXX should handle this material another way */
724 ulr = &VTOI(dvp)->i_crap;
725 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
726
727 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
728 ASSERT_NO_SEGLOCK(fs);
729 if (fs->lfs_ronly) {
730 return EROFS;
731 }
732
733 error = lfs_set_dirop(dvp, NULL);
734 if (error)
735 return error;
736
737 fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
738 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
739
740 /* Either way we're done with the dirop at this point */
741 UNMARK_VNODE(dvp);
742 UNMARK_VNODE(*vpp);
743 lfs_unset_dirop(fs, dvp, "mknod");
744 /*
745 * XXX this is where this used to be (though inside some evil
746 * macros) but it clearly should be moved further down.
747 * - dholland 20140515
748 */
749 vrele(dvp);
750
751 if (error) {
752 fstrans_done(ap->a_dvp->v_mount);
753 *vpp = NULL;
754 return (error);
755 }
756
757 VN_KNOTE(dvp, NOTE_WRITE);
758 ip = VTOI(*vpp);
759 ino = ip->i_number;
760 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
761
762 /*
763 * Call fsync to write the vnode so that we don't have to deal with
764 * flushing it when it's marked VU_DIROP or reclaiming.
765 *
766 * XXX KS - If we can't flush we also can't call vgone(), so must
767 * return. But, that leaves this vnode in limbo, also not good.
768 * Can this ever happen (barring hardware failure)?
769 */
770 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) {
771 panic("lfs_mknod: couldn't fsync (ino %llu)",
772 (unsigned long long)ino);
773 /* return (error); */
774 }
775
776 fstrans_done(ap->a_dvp->v_mount);
777 KASSERT(error == 0);
778 VOP_UNLOCK(*vpp);
779 return (0);
780 }
781
782 /*
783 * Create a regular file
784 */
785 int
786 lfs_create(void *v)
787 {
788 struct vop_create_v3_args /* {
789 struct vnode *a_dvp;
790 struct vnode **a_vpp;
791 struct componentname *a_cnp;
792 struct vattr *a_vap;
793 } */ *ap = v;
794 struct lfs *fs;
795 struct vnode *dvp, **vpp;
796 struct vattr *vap;
797 struct ulfs_lookup_results *ulr;
798 int error;
799
800 dvp = ap->a_dvp;
801 vpp = ap->a_vpp;
802 vap = ap->a_vap;
803
804 KASSERT(vpp != NULL);
805 KASSERT(*vpp == NULL);
806
807 /* XXX should handle this material another way */
808 ulr = &VTOI(dvp)->i_crap;
809 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
810
811 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
812 ASSERT_NO_SEGLOCK(fs);
813 if (fs->lfs_ronly) {
814 return EROFS;
815 }
816
817 error = lfs_set_dirop(dvp, NULL);
818 if (error)
819 return error;
820
821 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
822 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
823 if (error) {
824 fstrans_done(dvp->v_mount);
825 goto out;
826 }
827 fstrans_done(dvp->v_mount);
828 VN_KNOTE(dvp, NOTE_WRITE);
829 VOP_UNLOCK(*vpp);
830
831 out:
832
833 UNMARK_VNODE(dvp);
834 UNMARK_VNODE(*vpp);
835 if (error) {
836 *vpp = NULL;
837 }
838 lfs_unset_dirop(fs, dvp, "create");
839
840 vrele(dvp);
841 return (error);
842 }
843
844 int
845 lfs_mkdir(void *v)
846 {
847 struct vop_mkdir_v3_args /* {
848 struct vnode *a_dvp;
849 struct vnode **a_vpp;
850 struct componentname *a_cnp;
851 struct vattr *a_vap;
852 } */ *ap = v;
853 struct lfs *fs;
854 struct vnode *dvp, *tvp, **vpp;
855 struct inode *dp, *ip;
856 struct componentname *cnp;
857 struct vattr *vap;
858 struct ulfs_lookup_results *ulr;
859 struct buf *bp;
860 struct lfs_dirtemplate dirtemplate;
861 struct lfs_direct *newdir;
862 int dirblksiz;
863 int error;
864
865 dvp = ap->a_dvp;
866 tvp = NULL;
867 vpp = ap->a_vpp;
868 cnp = ap->a_cnp;
869 vap = ap->a_vap;
870
871 dp = VTOI(dvp);
872 ip = NULL;
873
874 KASSERT(vap->va_type == VDIR);
875 KASSERT(vpp != NULL);
876 KASSERT(*vpp == NULL);
877
878 /* XXX should handle this material another way */
879 ulr = &dp->i_crap;
880 ULFS_CHECK_CRAPCOUNTER(dp);
881
882 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
883 ASSERT_NO_SEGLOCK(fs);
884 if (fs->lfs_ronly) {
885 return EROFS;
886 }
887 dirblksiz = fs->um_dirblksiz;
888
889 error = lfs_set_dirop(dvp, NULL);
890 if (error)
891 return error;
892
893 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
894
895 if ((nlink_t)dp->i_nlink >= LINK_MAX) {
896 error = EMLINK;
897 goto out;
898 }
899
900 /*
901 * Must simulate part of ulfs_makeinode here to acquire the inode,
902 * but not have it entered in the parent directory. The entry is
903 * made later after writing "." and ".." entries.
904 */
905 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, ap->a_vpp);
906 if (error)
907 goto out;
908
909 error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE);
910 if (error) {
911 vrele(*ap->a_vpp);
912 *ap->a_vpp = NULL;
913 goto out;
914 }
915
916 tvp = *ap->a_vpp;
917 lfs_mark_vnode(tvp);
918 ip = VTOI(tvp);
919 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
920 ip->i_nlink = 2;
921 DIP_ASSIGN(ip, nlink, 2);
922 if (cnp->cn_flags & ISWHITEOUT) {
923 ip->i_flags |= UF_OPAQUE;
924 DIP_ASSIGN(ip, flags, ip->i_flags);
925 }
926
927 /*
928 * Bump link count in parent directory to reflect work done below.
929 */
930 dp->i_nlink++;
931 DIP_ASSIGN(dp, nlink, dp->i_nlink);
932 dp->i_flag |= IN_CHANGE;
933 if ((error = lfs_update(dvp, NULL, NULL, UPDATE_DIROP)) != 0)
934 goto bad;
935
936 /*
937 * Initialize directory with "." and ".." from static template.
938 */
939 dirtemplate = mastertemplate;
940 dirtemplate.dotdot_reclen = dirblksiz - dirtemplate.dot_reclen;
941 dirtemplate.dot_ino = ulfs_rw32(ip->i_number, ULFS_MPNEEDSWAP(fs));
942 dirtemplate.dotdot_ino = ulfs_rw32(dp->i_number, ULFS_MPNEEDSWAP(fs));
943 dirtemplate.dot_reclen = ulfs_rw16(dirtemplate.dot_reclen,
944 ULFS_MPNEEDSWAP(fs));
945 dirtemplate.dotdot_reclen = ulfs_rw16(dirtemplate.dotdot_reclen,
946 ULFS_MPNEEDSWAP(fs));
947 if (fs->um_maxsymlinklen <= 0) {
948 #if BYTE_ORDER == LITTLE_ENDIAN
949 if (ULFS_MPNEEDSWAP(fs) == 0)
950 #else
951 if (ULFS_MPNEEDSWAP(fs) != 0)
952 #endif
953 {
954 dirtemplate.dot_type = dirtemplate.dot_namlen;
955 dirtemplate.dotdot_type = dirtemplate.dotdot_namlen;
956 dirtemplate.dot_namlen = dirtemplate.dotdot_namlen = 0;
957 } else
958 dirtemplate.dot_type = dirtemplate.dotdot_type = 0;
959 }
960 if ((error = lfs_balloc(tvp, (off_t)0, dirblksiz, cnp->cn_cred,
961 B_CLRBUF, &bp)) != 0)
962 goto bad;
963 ip->i_size = dirblksiz;
964 DIP_ASSIGN(ip, size, dirblksiz);
965 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
966 uvm_vnp_setsize(tvp, ip->i_size);
967 memcpy((void *)bp->b_data, (void *)&dirtemplate, sizeof dirtemplate);
968
969 /*
970 * Directory set up; now install its entry in the parent directory.
971 */
972 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
973 goto bad;
974 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) {
975 goto bad;
976 }
977 newdir = pool_cache_get(ulfs_direct_cache, PR_WAITOK);
978 ulfs_makedirentry(ip, cnp, newdir);
979 error = ulfs_direnter(dvp, ulr, tvp, newdir, cnp, bp);
980 pool_cache_put(ulfs_direct_cache, newdir);
981 bad:
982 if (error == 0) {
983 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
984 VOP_UNLOCK(tvp);
985 } else {
986 dp->i_nlink--;
987 DIP_ASSIGN(dp, nlink, dp->i_nlink);
988 dp->i_flag |= IN_CHANGE;
989 /*
990 * No need to do an explicit lfs_truncate here, vrele will
991 * do this for us because we set the link count to 0.
992 */
993 ip->i_nlink = 0;
994 DIP_ASSIGN(ip, nlink, 0);
995 ip->i_flag |= IN_CHANGE;
996 /* If IN_ADIROP, account for it */
997 lfs_unmark_vnode(tvp);
998 vput(tvp);
999 }
1000
1001 out:
1002 fstrans_done(dvp->v_mount);
1003
1004 UNMARK_VNODE(dvp);
1005 UNMARK_VNODE(*vpp);
1006 if (error) {
1007 *vpp = NULL;
1008 }
1009 lfs_unset_dirop(fs, dvp, "mkdir");
1010
1011 vrele(dvp);
1012 return (error);
1013 }
1014
1015 int
1016 lfs_remove(void *v)
1017 {
1018 struct vop_remove_args /* {
1019 struct vnode *a_dvp;
1020 struct vnode *a_vp;
1021 struct componentname *a_cnp;
1022 } */ *ap = v;
1023 struct vnode *dvp, *vp;
1024 struct inode *ip;
1025 int error;
1026
1027 dvp = ap->a_dvp;
1028 vp = ap->a_vp;
1029 ip = VTOI(vp);
1030 if ((error = lfs_set_dirop(dvp, vp)) != 0) {
1031 if (dvp == vp)
1032 vrele(vp);
1033 else
1034 vput(vp);
1035 vput(dvp);
1036 return error;
1037 }
1038 error = ulfs_remove(ap);
1039 if (ip->i_nlink == 0)
1040 lfs_orphan(ip->i_lfs, ip->i_number);
1041
1042 UNMARK_VNODE(dvp);
1043 if (ap->a_vp) {
1044 UNMARK_VNODE(ap->a_vp);
1045 }
1046 lfs_unset_dirop(ip->i_lfs, dvp, "remove");
1047 vrele(dvp);
1048 if (ap->a_vp) {
1049 vrele(ap->a_vp);
1050 }
1051
1052 return (error);
1053 }
1054
1055 int
1056 lfs_rmdir(void *v)
1057 {
1058 struct vop_rmdir_args /* {
1059 struct vnodeop_desc *a_desc;
1060 struct vnode *a_dvp;
1061 struct vnode *a_vp;
1062 struct componentname *a_cnp;
1063 } */ *ap = v;
1064 struct vnode *vp;
1065 struct inode *ip;
1066 int error;
1067
1068 vp = ap->a_vp;
1069 ip = VTOI(vp);
1070 if ((error = lfs_set_dirop(ap->a_dvp, ap->a_vp)) != 0) {
1071 if (ap->a_dvp == vp)
1072 vrele(ap->a_dvp);
1073 else
1074 vput(ap->a_dvp);
1075 vput(vp);
1076 return error;
1077 }
1078 error = ulfs_rmdir(ap);
1079 if (ip->i_nlink == 0)
1080 lfs_orphan(ip->i_lfs, ip->i_number);
1081
1082 UNMARK_VNODE(ap->a_dvp);
1083 if (ap->a_vp) {
1084 UNMARK_VNODE(ap->a_vp);
1085 }
1086 lfs_unset_dirop(ip->i_lfs, ap->a_dvp, "rmdir");
1087 vrele(ap->a_dvp);
1088 if (ap->a_vp) {
1089 vrele(ap->a_vp);
1090 }
1091
1092 return (error);
1093 }
1094
1095 int
1096 lfs_link(void *v)
1097 {
1098 struct vop_link_v2_args /* {
1099 struct vnode *a_dvp;
1100 struct vnode *a_vp;
1101 struct componentname *a_cnp;
1102 } */ *ap = v;
1103 struct lfs *fs;
1104 struct vnode *dvp;
1105 int error;
1106
1107 dvp = ap->a_dvp;
1108
1109 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
1110 ASSERT_NO_SEGLOCK(fs);
1111 if (fs->lfs_ronly) {
1112 return EROFS;
1113 }
1114
1115 error = lfs_set_dirop(dvp, NULL);
1116 if (error) {
1117 return error;
1118 }
1119
1120 error = ulfs_link(ap);
1121
1122 UNMARK_VNODE(dvp);
1123 lfs_unset_dirop(fs, dvp, "link");
1124 vrele(dvp);
1125
1126 return (error);
1127 }
1128
1129 /* XXX hack to avoid calling ITIMES in getattr */
1130 int
1131 lfs_getattr(void *v)
1132 {
1133 struct vop_getattr_args /* {
1134 struct vnode *a_vp;
1135 struct vattr *a_vap;
1136 kauth_cred_t a_cred;
1137 } */ *ap = v;
1138 struct vnode *vp = ap->a_vp;
1139 struct inode *ip = VTOI(vp);
1140 struct vattr *vap = ap->a_vap;
1141 struct lfs *fs = ip->i_lfs;
1142
1143 fstrans_start(vp->v_mount, FSTRANS_SHARED);
1144 /*
1145 * Copy from inode table
1146 */
1147 vap->va_fsid = ip->i_dev;
1148 vap->va_fileid = ip->i_number;
1149 vap->va_mode = ip->i_mode & ~LFS_IFMT;
1150 vap->va_nlink = ip->i_nlink;
1151 vap->va_uid = ip->i_uid;
1152 vap->va_gid = ip->i_gid;
1153 vap->va_rdev = (dev_t)ip->i_ffs1_rdev;
1154 vap->va_size = vp->v_size;
1155 vap->va_atime.tv_sec = ip->i_ffs1_atime;
1156 vap->va_atime.tv_nsec = ip->i_ffs1_atimensec;
1157 vap->va_mtime.tv_sec = ip->i_ffs1_mtime;
1158 vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec;
1159 vap->va_ctime.tv_sec = ip->i_ffs1_ctime;
1160 vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec;
1161 vap->va_flags = ip->i_flags;
1162 vap->va_gen = ip->i_gen;
1163 /* this doesn't belong here */
1164 if (vp->v_type == VBLK)
1165 vap->va_blocksize = BLKDEV_IOSIZE;
1166 else if (vp->v_type == VCHR)
1167 vap->va_blocksize = MAXBSIZE;
1168 else
1169 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
1170 vap->va_bytes = lfs_fsbtob(fs, ip->i_lfs_effnblks);
1171 vap->va_type = vp->v_type;
1172 vap->va_filerev = ip->i_modrev;
1173 fstrans_done(vp->v_mount);
1174 return (0);
1175 }
1176
1177 /*
1178 * Check to make sure the inode blocks won't choke the buffer
1179 * cache, then call ulfs_setattr as usual.
1180 */
1181 int
1182 lfs_setattr(void *v)
1183 {
1184 struct vop_setattr_args /* {
1185 struct vnode *a_vp;
1186 struct vattr *a_vap;
1187 kauth_cred_t a_cred;
1188 } */ *ap = v;
1189 struct vnode *vp = ap->a_vp;
1190
1191 lfs_check(vp, LFS_UNUSED_LBN, 0);
1192 return ulfs_setattr(v);
1193 }
1194
1195 /*
1196 * Release the block we hold on lfs_newseg wrapping. Called on file close,
1197 * or explicitly from LFCNWRAPGO. Called with the interlock held.
1198 */
1199 static int
1200 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor)
1201 {
1202 if (fs->lfs_stoplwp != curlwp)
1203 return EBUSY;
1204
1205 fs->lfs_stoplwp = NULL;
1206 cv_signal(&fs->lfs_stopcv);
1207
1208 KASSERT(fs->lfs_nowrap > 0);
1209 if (fs->lfs_nowrap <= 0) {
1210 return 0;
1211 }
1212
1213 if (--fs->lfs_nowrap == 0) {
1214 log(LOG_NOTICE, "%s: re-enabled log wrap\n",
1215 lfs_sb_getfsmnt(fs));
1216 wakeup(&fs->lfs_wrappass);
1217 lfs_wakeup_cleaner(fs);
1218 }
1219 if (waitfor) {
1220 mtsleep(&fs->lfs_nextsegsleep, PCATCH | PUSER, "segment",
1221 0, &lfs_lock);
1222 }
1223
1224 return 0;
1225 }
1226
1227 /*
1228 * Close called.
1229 *
1230 * Update the times on the inode.
1231 */
1232 /* ARGSUSED */
1233 int
1234 lfs_close(void *v)
1235 {
1236 struct vop_close_args /* {
1237 struct vnode *a_vp;
1238 int a_fflag;
1239 kauth_cred_t a_cred;
1240 } */ *ap = v;
1241 struct vnode *vp = ap->a_vp;
1242 struct inode *ip = VTOI(vp);
1243 struct lfs *fs = ip->i_lfs;
1244
1245 if ((ip->i_number == ULFS_ROOTINO || ip->i_number == LFS_IFILE_INUM) &&
1246 fs->lfs_stoplwp == curlwp) {
1247 mutex_enter(&lfs_lock);
1248 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n");
1249 lfs_wrapgo(fs, ip, 0);
1250 mutex_exit(&lfs_lock);
1251 }
1252
1253 if (vp == ip->i_lfs->lfs_ivnode &&
1254 vp->v_mount->mnt_iflag & IMNT_UNMOUNT)
1255 return 0;
1256
1257 fstrans_start(vp->v_mount, FSTRANS_SHARED);
1258 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) {
1259 LFS_ITIMES(ip, NULL, NULL, NULL);
1260 }
1261 fstrans_done(vp->v_mount);
1262 return (0);
1263 }
1264
1265 /*
1266 * Close wrapper for special devices.
1267 *
1268 * Update the times on the inode then do device close.
1269 */
1270 int
1271 lfsspec_close(void *v)
1272 {
1273 struct vop_close_args /* {
1274 struct vnode *a_vp;
1275 int a_fflag;
1276 kauth_cred_t a_cred;
1277 } */ *ap = v;
1278 struct vnode *vp;
1279 struct inode *ip;
1280
1281 vp = ap->a_vp;
1282 ip = VTOI(vp);
1283 if (vp->v_usecount > 1) {
1284 LFS_ITIMES(ip, NULL, NULL, NULL);
1285 }
1286 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
1287 }
1288
1289 /*
1290 * Close wrapper for fifo's.
1291 *
1292 * Update the times on the inode then do device close.
1293 */
1294 int
1295 lfsfifo_close(void *v)
1296 {
1297 struct vop_close_args /* {
1298 struct vnode *a_vp;
1299 int a_fflag;
1300 kauth_cred_ a_cred;
1301 } */ *ap = v;
1302 struct vnode *vp;
1303 struct inode *ip;
1304
1305 vp = ap->a_vp;
1306 ip = VTOI(vp);
1307 if (ap->a_vp->v_usecount > 1) {
1308 LFS_ITIMES(ip, NULL, NULL, NULL);
1309 }
1310 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
1311 }
1312
1313 /*
1314 * Reclaim an inode so that it can be used for other purposes.
1315 */
1316
1317 int
1318 lfs_reclaim(void *v)
1319 {
1320 struct vop_reclaim_args /* {
1321 struct vnode *a_vp;
1322 } */ *ap = v;
1323 struct vnode *vp = ap->a_vp;
1324 struct inode *ip = VTOI(vp);
1325 struct lfs *fs = ip->i_lfs;
1326 int error;
1327
1328 /*
1329 * The inode must be freed and updated before being removed
1330 * from its hash chain. Other threads trying to gain a hold
1331 * or lock on the inode will be stalled.
1332 */
1333 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1334 lfs_vfree(vp, ip->i_number, ip->i_omode);
1335
1336 mutex_enter(&lfs_lock);
1337 LFS_CLR_UINO(ip, IN_ALLMOD);
1338 mutex_exit(&lfs_lock);
1339 if ((error = ulfs_reclaim(vp)))
1340 return (error);
1341
1342 /*
1343 * Take us off the paging and/or dirop queues if we were on them.
1344 * We shouldn't be on them.
1345 */
1346 mutex_enter(&lfs_lock);
1347 if (ip->i_flags & IN_PAGING) {
1348 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n",
1349 lfs_sb_getfsmnt(fs));
1350 ip->i_flags &= ~IN_PAGING;
1351 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
1352 }
1353 if (vp->v_uflag & VU_DIROP) {
1354 panic("reclaimed vnode is VU_DIROP");
1355 vp->v_uflag &= ~VU_DIROP;
1356 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
1357 }
1358 mutex_exit(&lfs_lock);
1359
1360 pool_put(&lfs_dinode_pool, ip->i_din.ffs1_din);
1361 lfs_deregister_all(vp);
1362 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs);
1363 ip->inode_ext.lfs = NULL;
1364 genfs_node_destroy(vp);
1365 pool_put(&lfs_inode_pool, vp->v_data);
1366 vp->v_data = NULL;
1367 return (0);
1368 }
1369
1370 /*
1371 * Read a block from a storage device.
1372 *
1373 * Calculate the logical to physical mapping if not done already,
1374 * then call the device strategy routine.
1375 *
1376 * In order to avoid reading blocks that are in the process of being
1377 * written by the cleaner---and hence are not mutexed by the normal
1378 * buffer cache / page cache mechanisms---check for collisions before
1379 * reading.
1380 *
1381 * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before*
1382 * the active cleaner test.
1383 *
1384 * XXX This code assumes that lfs_markv makes synchronous checkpoints.
1385 */
1386 int
1387 lfs_strategy(void *v)
1388 {
1389 struct vop_strategy_args /* {
1390 struct vnode *a_vp;
1391 struct buf *a_bp;
1392 } */ *ap = v;
1393 struct buf *bp;
1394 struct lfs *fs;
1395 struct vnode *vp;
1396 struct inode *ip;
1397 daddr_t tbn;
1398 #define MAXLOOP 25
1399 int i, sn, error, slept, loopcount;
1400
1401 bp = ap->a_bp;
1402 vp = ap->a_vp;
1403 ip = VTOI(vp);
1404 fs = ip->i_lfs;
1405
1406 /* lfs uses its strategy routine only for read */
1407 KASSERT(bp->b_flags & B_READ);
1408
1409 if (vp->v_type == VBLK || vp->v_type == VCHR)
1410 panic("lfs_strategy: spec");
1411 KASSERT(bp->b_bcount != 0);
1412 if (bp->b_blkno == bp->b_lblkno) {
1413 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
1414 NULL);
1415 if (error) {
1416 bp->b_error = error;
1417 bp->b_resid = bp->b_bcount;
1418 biodone(bp);
1419 return (error);
1420 }
1421 if ((long)bp->b_blkno == -1) /* no valid data */
1422 clrbuf(bp);
1423 }
1424 if ((long)bp->b_blkno < 0) { /* block is not on disk */
1425 bp->b_resid = bp->b_bcount;
1426 biodone(bp);
1427 return (0);
1428 }
1429
1430 slept = 1;
1431 loopcount = 0;
1432 mutex_enter(&lfs_lock);
1433 while (slept && fs->lfs_seglock) {
1434 mutex_exit(&lfs_lock);
1435 /*
1436 * Look through list of intervals.
1437 * There will only be intervals to look through
1438 * if the cleaner holds the seglock.
1439 * Since the cleaner is synchronous, we can trust
1440 * the list of intervals to be current.
1441 */
1442 tbn = LFS_DBTOFSB(fs, bp->b_blkno);
1443 sn = lfs_dtosn(fs, tbn);
1444 slept = 0;
1445 for (i = 0; i < fs->lfs_cleanind; i++) {
1446 if (sn == lfs_dtosn(fs, fs->lfs_cleanint[i]) &&
1447 tbn >= fs->lfs_cleanint[i]) {
1448 DLOG((DLOG_CLEAN,
1449 "lfs_strategy: ino %d lbn %" PRId64
1450 " ind %d sn %d fsb %" PRIx64
1451 " given sn %d fsb %" PRIx64 "\n",
1452 ip->i_number, bp->b_lblkno, i,
1453 lfs_dtosn(fs, fs->lfs_cleanint[i]),
1454 fs->lfs_cleanint[i], sn, tbn));
1455 DLOG((DLOG_CLEAN,
1456 "lfs_strategy: sleeping on ino %d lbn %"
1457 PRId64 "\n", ip->i_number, bp->b_lblkno));
1458 mutex_enter(&lfs_lock);
1459 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) {
1460 /*
1461 * Cleaner can't wait for itself.
1462 * Instead, wait for the blocks
1463 * to be written to disk.
1464 * XXX we need pribio in the test
1465 * XXX here.
1466 */
1467 mtsleep(&fs->lfs_iocount,
1468 (PRIBIO + 1) | PNORELOCK,
1469 "clean2", hz/10 + 1,
1470 &lfs_lock);
1471 slept = 1;
1472 ++loopcount;
1473 break;
1474 } else if (fs->lfs_seglock) {
1475 mtsleep(&fs->lfs_seglock,
1476 (PRIBIO + 1) | PNORELOCK,
1477 "clean1", 0,
1478 &lfs_lock);
1479 slept = 1;
1480 break;
1481 }
1482 mutex_exit(&lfs_lock);
1483 }
1484 }
1485 mutex_enter(&lfs_lock);
1486 if (loopcount > MAXLOOP) {
1487 printf("lfs_strategy: breaking out of clean2 loop\n");
1488 break;
1489 }
1490 }
1491 mutex_exit(&lfs_lock);
1492
1493 vp = ip->i_devvp;
1494 return VOP_STRATEGY(vp, bp);
1495 }
1496
1497 /*
1498 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops.
1499 * Technically this is a checkpoint (the on-disk state is valid)
1500 * even though we are leaving out all the file data.
1501 */
1502 int
1503 lfs_flush_dirops(struct lfs *fs)
1504 {
1505 struct inode *ip, *nip;
1506 struct vnode *vp;
1507 extern int lfs_dostats; /* XXX this does not belong here */
1508 struct segment *sp;
1509 SEGSUM *ssp;
1510 int flags = 0;
1511 int error = 0;
1512
1513 ASSERT_MAYBE_SEGLOCK(fs);
1514 KASSERT(fs->lfs_nadirop == 0);
1515
1516 if (fs->lfs_ronly)
1517 return EROFS;
1518
1519 mutex_enter(&lfs_lock);
1520 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) {
1521 mutex_exit(&lfs_lock);
1522 return 0;
1523 } else
1524 mutex_exit(&lfs_lock);
1525
1526 if (lfs_dostats)
1527 ++lfs_stats.flush_invoked;
1528
1529 lfs_imtime(fs);
1530 lfs_seglock(fs, flags);
1531 sp = fs->lfs_sp;
1532
1533 /*
1534 * lfs_writevnodes, optimized to get dirops out of the way.
1535 * Only write dirops, and don't flush files' pages, only
1536 * blocks from the directories.
1537 *
1538 * We don't need to vref these files because they are
1539 * dirops and so hold an extra reference until the
1540 * segunlock clears them of that status.
1541 *
1542 * We don't need to check for IN_ADIROP because we know that
1543 * no dirops are active.
1544 *
1545 */
1546 mutex_enter(&lfs_lock);
1547 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
1548 nip = TAILQ_NEXT(ip, i_lfs_dchain);
1549 mutex_exit(&lfs_lock);
1550 vp = ITOV(ip);
1551 mutex_enter(vp->v_interlock);
1552
1553 KASSERT((ip->i_flag & IN_ADIROP) == 0);
1554 KASSERT(vp->v_uflag & VU_DIROP);
1555 KASSERT(vdead_check(vp, VDEAD_NOWAIT) == 0);
1556
1557 /*
1558 * All writes to directories come from dirops; all
1559 * writes to files' direct blocks go through the page
1560 * cache, which we're not touching. Reads to files
1561 * and/or directories will not be affected by writing
1562 * directory blocks inodes and file inodes. So we don't
1563 * really need to lock.
1564 */
1565 if (vdead_check(vp, VDEAD_NOWAIT) != 0) {
1566 mutex_exit(vp->v_interlock);
1567 mutex_enter(&lfs_lock);
1568 continue;
1569 }
1570 mutex_exit(vp->v_interlock);
1571 /* XXX see below
1572 * waslocked = VOP_ISLOCKED(vp);
1573 */
1574 if (vp->v_type != VREG &&
1575 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) {
1576 error = lfs_writefile(fs, sp, vp);
1577 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1578 !(ip->i_flag & IN_ALLMOD)) {
1579 mutex_enter(&lfs_lock);
1580 LFS_SET_UINO(ip, IN_MODIFIED);
1581 mutex_exit(&lfs_lock);
1582 }
1583 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1584 mutex_enter(&lfs_lock);
1585 error = EAGAIN;
1586 break;
1587 }
1588 }
1589 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1590 error = lfs_writeinode(fs, sp, ip);
1591 mutex_enter(&lfs_lock);
1592 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1593 error = EAGAIN;
1594 break;
1595 }
1596
1597 /*
1598 * We might need to update these inodes again,
1599 * for example, if they have data blocks to write.
1600 * Make sure that after this flush, they are still
1601 * marked IN_MODIFIED so that we don't forget to
1602 * write them.
1603 */
1604 /* XXX only for non-directories? --KS */
1605 LFS_SET_UINO(ip, IN_MODIFIED);
1606 }
1607 mutex_exit(&lfs_lock);
1608 /* We've written all the dirops there are */
1609 ssp = (SEGSUM *)sp->segsum;
1610 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT));
1611 lfs_finalize_fs_seguse(fs);
1612 (void) lfs_writeseg(fs, sp);
1613 lfs_segunlock(fs);
1614
1615 return error;
1616 }
1617
1618 /*
1619 * Flush all vnodes for which the pagedaemon has requested pageouts.
1620 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop()
1621 * has just run, this would be an error). If we have to skip a vnode
1622 * for any reason, just skip it; if we have to wait for the cleaner,
1623 * abort. The writer daemon will call us again later.
1624 */
1625 int
1626 lfs_flush_pchain(struct lfs *fs)
1627 {
1628 struct inode *ip, *nip;
1629 struct vnode *vp;
1630 extern int lfs_dostats;
1631 struct segment *sp;
1632 int error, error2;
1633
1634 ASSERT_NO_SEGLOCK(fs);
1635
1636 if (fs->lfs_ronly)
1637 return EROFS;
1638
1639 mutex_enter(&lfs_lock);
1640 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) {
1641 mutex_exit(&lfs_lock);
1642 return 0;
1643 } else
1644 mutex_exit(&lfs_lock);
1645
1646 /* Get dirops out of the way */
1647 if ((error = lfs_flush_dirops(fs)) != 0)
1648 return error;
1649
1650 if (lfs_dostats)
1651 ++lfs_stats.flush_invoked;
1652
1653 /*
1654 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts.
1655 */
1656 lfs_imtime(fs);
1657 lfs_seglock(fs, 0);
1658 sp = fs->lfs_sp;
1659
1660 /*
1661 * lfs_writevnodes, optimized to clear pageout requests.
1662 * Only write non-dirop files that are in the pageout queue.
1663 * We're very conservative about what we write; we want to be
1664 * fast and async.
1665 */
1666 mutex_enter(&lfs_lock);
1667 top:
1668 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) {
1669 struct mount *mp = ITOV(ip)->v_mount;
1670 ino_t ino = ip->i_number;
1671
1672 nip = TAILQ_NEXT(ip, i_lfs_pchain);
1673
1674 if (!(ip->i_flags & IN_PAGING))
1675 goto top;
1676
1677 mutex_exit(&lfs_lock);
1678 if (vcache_get(mp, &ino, sizeof(ino), &vp) != 0) {
1679 mutex_enter(&lfs_lock);
1680 continue;
1681 };
1682 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1683 vrele(vp);
1684 mutex_enter(&lfs_lock);
1685 continue;
1686 }
1687 ip = VTOI(vp);
1688 mutex_enter(&lfs_lock);
1689 if ((vp->v_uflag & VU_DIROP) != 0 || vp->v_type != VREG ||
1690 !(ip->i_flags & IN_PAGING)) {
1691 mutex_exit(&lfs_lock);
1692 vput(vp);
1693 mutex_enter(&lfs_lock);
1694 goto top;
1695 }
1696 mutex_exit(&lfs_lock);
1697
1698 error = lfs_writefile(fs, sp, vp);
1699 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1700 !(ip->i_flag & IN_ALLMOD)) {
1701 mutex_enter(&lfs_lock);
1702 LFS_SET_UINO(ip, IN_MODIFIED);
1703 mutex_exit(&lfs_lock);
1704 }
1705 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1706 error2 = lfs_writeinode(fs, sp, ip);
1707
1708 VOP_UNLOCK(vp);
1709 vrele(vp);
1710
1711 if (error == EAGAIN || error2 == EAGAIN) {
1712 lfs_writeseg(fs, sp);
1713 mutex_enter(&lfs_lock);
1714 break;
1715 }
1716 mutex_enter(&lfs_lock);
1717 }
1718 mutex_exit(&lfs_lock);
1719 (void) lfs_writeseg(fs, sp);
1720 lfs_segunlock(fs);
1721
1722 return 0;
1723 }
1724
1725 /*
1726 * Conversion for compat.
1727 */
1728 static void
1729 block_info_from_70(BLOCK_INFO *bi, const BLOCK_INFO_70 *bi70)
1730 {
1731 bi->bi_inode = bi70->bi_inode;
1732 bi->bi_lbn = bi70->bi_lbn;
1733 bi->bi_daddr = bi70->bi_daddr;
1734 bi->bi_segcreate = bi70->bi_segcreate;
1735 bi->bi_version = bi70->bi_version;
1736 bi->bi_bp = bi70->bi_bp;
1737 bi->bi_size = bi70->bi_size;
1738 }
1739
1740 static void
1741 block_info_to_70(BLOCK_INFO_70 *bi70, const BLOCK_INFO *bi)
1742 {
1743 bi70->bi_inode = bi->bi_inode;
1744 bi70->bi_lbn = bi->bi_lbn;
1745 bi70->bi_daddr = bi->bi_daddr;
1746 bi70->bi_segcreate = bi->bi_segcreate;
1747 bi70->bi_version = bi->bi_version;
1748 bi70->bi_bp = bi->bi_bp;
1749 bi70->bi_size = bi->bi_size;
1750 }
1751
1752 /*
1753 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}.
1754 */
1755 int
1756 lfs_fcntl(void *v)
1757 {
1758 struct vop_fcntl_args /* {
1759 struct vnode *a_vp;
1760 u_int a_command;
1761 void * a_data;
1762 int a_fflag;
1763 kauth_cred_t a_cred;
1764 } */ *ap = v;
1765 struct timeval tv;
1766 struct timeval *tvp;
1767 BLOCK_INFO *blkiov;
1768 BLOCK_INFO_70 *blkiov70;
1769 CLEANERINFO *cip;
1770 SEGUSE *sup;
1771 int blkcnt, i, error;
1772 size_t fh_size;
1773 struct lfs_fcntl_markv blkvp;
1774 struct lfs_fcntl_markv_70 blkvp70;
1775 struct lwp *l;
1776 fsid_t *fsidp;
1777 struct lfs *fs;
1778 struct buf *bp;
1779 fhandle_t *fhp;
1780 daddr_t off;
1781 int oclean;
1782
1783 /* Only respect LFS fcntls on fs root or Ifile */
1784 if (VTOI(ap->a_vp)->i_number != ULFS_ROOTINO &&
1785 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) {
1786 return ulfs_fcntl(v);
1787 }
1788
1789 /* Avoid locking a draining lock */
1790 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) {
1791 return ESHUTDOWN;
1792 }
1793
1794 /* LFS control and monitoring fcntls are available only to root */
1795 l = curlwp;
1796 if (((ap->a_command & 0xff00) >> 8) == 'L' &&
1797 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
1798 KAUTH_REQ_SYSTEM_LFS_FCNTL, NULL, NULL, NULL)) != 0)
1799 return (error);
1800
1801 fs = VTOI(ap->a_vp)->i_lfs;
1802 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx;
1803
1804 error = 0;
1805 switch ((int)ap->a_command) {
1806 case LFCNSEGWAITALL_COMPAT_50:
1807 case LFCNSEGWAITALL_COMPAT:
1808 fsidp = NULL;
1809 /* FALLTHROUGH */
1810 case LFCNSEGWAIT_COMPAT_50:
1811 case LFCNSEGWAIT_COMPAT:
1812 {
1813 struct timeval50 *tvp50
1814 = (struct timeval50 *)ap->a_data;
1815 timeval50_to_timeval(tvp50, &tv);
1816 tvp = &tv;
1817 }
1818 goto segwait_common;
1819 case LFCNSEGWAITALL:
1820 fsidp = NULL;
1821 /* FALLTHROUGH */
1822 case LFCNSEGWAIT:
1823 tvp = (struct timeval *)ap->a_data;
1824 segwait_common:
1825 mutex_enter(&lfs_lock);
1826 ++fs->lfs_sleepers;
1827 mutex_exit(&lfs_lock);
1828
1829 error = lfs_segwait(fsidp, tvp);
1830
1831 mutex_enter(&lfs_lock);
1832 if (--fs->lfs_sleepers == 0)
1833 wakeup(&fs->lfs_sleepers);
1834 mutex_exit(&lfs_lock);
1835 return error;
1836
1837 case LFCNBMAPV_COMPAT_70:
1838 case LFCNMARKV_COMPAT_70:
1839 blkvp70 = *(struct lfs_fcntl_markv_70 *)ap->a_data;
1840
1841 blkcnt = blkvp70.blkcnt;
1842 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
1843 return (EINVAL);
1844 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
1845 blkiov70 = lfs_malloc(fs, sizeof(BLOCK_INFO_70), LFS_NB_BLKIOV);
1846 for (i = 0; i < blkcnt; i++) {
1847 error = copyin(&blkvp70.blkiov[i], blkiov70,
1848 sizeof(*blkiov70));
1849 if (error) {
1850 lfs_free(fs, blkiov70, LFS_NB_BLKIOV);
1851 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1852 return error;
1853 }
1854 block_info_from_70(&blkiov[i], blkiov70);
1855 }
1856
1857 mutex_enter(&lfs_lock);
1858 ++fs->lfs_sleepers;
1859 mutex_exit(&lfs_lock);
1860 if (ap->a_command == LFCNBMAPV)
1861 error = lfs_bmapv(l, fsidp, blkiov, blkcnt);
1862 else /* LFCNMARKV */
1863 error = lfs_markv(l, fsidp, blkiov, blkcnt);
1864 if (error == 0) {
1865 for (i = 0; i < blkcnt; i++) {
1866 block_info_to_70(blkiov70, &blkiov[i]);
1867 error = copyout(blkiov70, &blkvp70.blkiov[i],
1868 sizeof(*blkiov70));
1869 if (error) {
1870 break;
1871 }
1872 }
1873 }
1874 mutex_enter(&lfs_lock);
1875 if (--fs->lfs_sleepers == 0)
1876 wakeup(&fs->lfs_sleepers);
1877 mutex_exit(&lfs_lock);
1878 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1879 return error;
1880
1881 case LFCNBMAPV:
1882 case LFCNMARKV:
1883 blkvp = *(struct lfs_fcntl_markv *)ap->a_data;
1884
1885 blkcnt = blkvp.blkcnt;
1886 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
1887 return (EINVAL);
1888 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
1889 if ((error = copyin(blkvp.blkiov, blkiov,
1890 blkcnt * sizeof(BLOCK_INFO))) != 0) {
1891 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1892 return error;
1893 }
1894
1895 mutex_enter(&lfs_lock);
1896 ++fs->lfs_sleepers;
1897 mutex_exit(&lfs_lock);
1898 if (ap->a_command == LFCNBMAPV)
1899 error = lfs_bmapv(l, fsidp, blkiov, blkcnt);
1900 else /* LFCNMARKV */
1901 error = lfs_markv(l, fsidp, blkiov, blkcnt);
1902 if (error == 0)
1903 error = copyout(blkiov, blkvp.blkiov,
1904 blkcnt * sizeof(BLOCK_INFO));
1905 mutex_enter(&lfs_lock);
1906 if (--fs->lfs_sleepers == 0)
1907 wakeup(&fs->lfs_sleepers);
1908 mutex_exit(&lfs_lock);
1909 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1910 return error;
1911
1912 case LFCNRECLAIM:
1913 /*
1914 * Flush dirops and write Ifile, allowing empty segments
1915 * to be immediately reclaimed.
1916 */
1917 lfs_writer_enter(fs, "pndirop");
1918 off = lfs_sb_getoffset(fs);
1919 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP);
1920 lfs_flush_dirops(fs);
1921 LFS_CLEANERINFO(cip, fs, bp);
1922 oclean = lfs_ci_getclean(fs, cip);
1923 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
1924 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP);
1925 fs->lfs_sp->seg_flags |= SEGM_PROT;
1926 lfs_segunlock(fs);
1927 lfs_writer_leave(fs);
1928
1929 #ifdef DEBUG
1930 LFS_CLEANERINFO(cip, fs, bp);
1931 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64
1932 " blocks, cleaned %" PRId32 " segments (activesb %d)\n",
1933 lfs_sb_getoffset(fs) - off,
1934 lfs_ci_getclean(fs, cip) - oclean,
1935 fs->lfs_activesb));
1936 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
1937 #else
1938 __USE(oclean);
1939 __USE(off);
1940 #endif
1941
1942 return 0;
1943
1944 case LFCNIFILEFH_COMPAT:
1945 /* Return the filehandle of the Ifile */
1946 if ((error = kauth_authorize_system(l->l_cred,
1947 KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)) != 0)
1948 return (error);
1949 fhp = (struct fhandle *)ap->a_data;
1950 fhp->fh_fsid = *fsidp;
1951 fh_size = 16; /* former VFS_MAXFIDSIZ */
1952 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
1953
1954 case LFCNIFILEFH_COMPAT2:
1955 case LFCNIFILEFH:
1956 /* Return the filehandle of the Ifile */
1957 fhp = (struct fhandle *)ap->a_data;
1958 fhp->fh_fsid = *fsidp;
1959 fh_size = sizeof(struct lfs_fhandle) -
1960 offsetof(fhandle_t, fh_fid);
1961 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
1962
1963 case LFCNREWIND:
1964 /* Move lfs_offset to the lowest-numbered segment */
1965 return lfs_rewind(fs, *(int *)ap->a_data);
1966
1967 case LFCNINVAL:
1968 /* Mark a segment SEGUSE_INVAL */
1969 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp);
1970 if (sup->su_nbytes > 0) {
1971 brelse(bp, 0);
1972 lfs_unset_inval_all(fs);
1973 return EBUSY;
1974 }
1975 sup->su_flags |= SEGUSE_INVAL;
1976 VOP_BWRITE(bp->b_vp, bp);
1977 return 0;
1978
1979 case LFCNRESIZE:
1980 /* Resize the filesystem */
1981 return lfs_resize_fs(fs, *(int *)ap->a_data);
1982
1983 case LFCNWRAPSTOP:
1984 case LFCNWRAPSTOP_COMPAT:
1985 /*
1986 * Hold lfs_newseg at segment 0; if requested, sleep until
1987 * the filesystem wraps around. To support external agents
1988 * (dump, fsck-based regression test) that need to look at
1989 * a snapshot of the filesystem, without necessarily
1990 * requiring that all fs activity stops.
1991 */
1992 if (fs->lfs_stoplwp == curlwp)
1993 return EALREADY;
1994
1995 mutex_enter(&lfs_lock);
1996 while (fs->lfs_stoplwp != NULL)
1997 cv_wait(&fs->lfs_stopcv, &lfs_lock);
1998 fs->lfs_stoplwp = curlwp;
1999 if (fs->lfs_nowrap == 0)
2000 log(LOG_NOTICE, "%s: disabled log wrap\n",
2001 lfs_sb_getfsmnt(fs));
2002 ++fs->lfs_nowrap;
2003 if (*(int *)ap->a_data == 1
2004 || ap->a_command == LFCNWRAPSTOP_COMPAT) {
2005 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n");
2006 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
2007 "segwrap", 0, &lfs_lock);
2008 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n");
2009 if (error) {
2010 lfs_wrapgo(fs, VTOI(ap->a_vp), 0);
2011 }
2012 }
2013 mutex_exit(&lfs_lock);
2014 return 0;
2015
2016 case LFCNWRAPGO:
2017 case LFCNWRAPGO_COMPAT:
2018 /*
2019 * Having done its work, the agent wakes up the writer.
2020 * If the argument is 1, it sleeps until a new segment
2021 * is selected.
2022 */
2023 mutex_enter(&lfs_lock);
2024 error = lfs_wrapgo(fs, VTOI(ap->a_vp),
2025 ap->a_command == LFCNWRAPGO_COMPAT ? 1 :
2026 *((int *)ap->a_data));
2027 mutex_exit(&lfs_lock);
2028 return error;
2029
2030 case LFCNWRAPPASS:
2031 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT))
2032 return EALREADY;
2033 mutex_enter(&lfs_lock);
2034 if (fs->lfs_stoplwp != curlwp) {
2035 mutex_exit(&lfs_lock);
2036 return EALREADY;
2037 }
2038 if (fs->lfs_nowrap == 0) {
2039 mutex_exit(&lfs_lock);
2040 return EBUSY;
2041 }
2042 fs->lfs_wrappass = 1;
2043 wakeup(&fs->lfs_wrappass);
2044 /* Wait for the log to wrap, if asked */
2045 if (*(int *)ap->a_data) {
2046 vref(ap->a_vp);
2047 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT;
2048 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n");
2049 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
2050 "segwrap", 0, &lfs_lock);
2051 log(LOG_NOTICE, "LFCNPASS done waiting\n");
2052 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT;
2053 vrele(ap->a_vp);
2054 }
2055 mutex_exit(&lfs_lock);
2056 return error;
2057
2058 case LFCNWRAPSTATUS:
2059 mutex_enter(&lfs_lock);
2060 *(int *)ap->a_data = fs->lfs_wrapstatus;
2061 mutex_exit(&lfs_lock);
2062 return 0;
2063
2064 default:
2065 return ulfs_fcntl(v);
2066 }
2067 return 0;
2068 }
2069
2070 /*
2071 * Return the last logical file offset that should be written for this file
2072 * if we're doing a write that ends at "size". If writing, we need to know
2073 * about sizes on disk, i.e. fragments if there are any; if reading, we need
2074 * to know about entire blocks.
2075 */
2076 void
2077 lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
2078 {
2079 struct inode *ip = VTOI(vp);
2080 struct lfs *fs = ip->i_lfs;
2081 daddr_t olbn, nlbn;
2082
2083 olbn = lfs_lblkno(fs, ip->i_size);
2084 nlbn = lfs_lblkno(fs, size);
2085 if (!(flags & GOP_SIZE_MEM) && nlbn < ULFS_NDADDR && olbn <= nlbn) {
2086 *eobp = lfs_fragroundup(fs, size);
2087 } else {
2088 *eobp = lfs_blkroundup(fs, size);
2089 }
2090 }
2091
2092 #ifdef DEBUG
2093 void lfs_dump_vop(void *);
2094
2095 void
2096 lfs_dump_vop(void *v)
2097 {
2098 struct vop_putpages_args /* {
2099 struct vnode *a_vp;
2100 voff_t a_offlo;
2101 voff_t a_offhi;
2102 int a_flags;
2103 } */ *ap = v;
2104
2105 struct inode *ip = VTOI(ap->a_vp);
2106 struct lfs *fs = ip->i_lfs;
2107
2108 #ifdef DDB
2109 vfs_vnode_print(ap->a_vp, 0, printf);
2110 #endif
2111 // XXX bogus cast
2112 lfs_dump_dinode(fs, (union lfs_dinode *)ip->i_din.ffs1_din);
2113 }
2114 #endif
2115
2116 int
2117 lfs_mmap(void *v)
2118 {
2119 struct vop_mmap_args /* {
2120 const struct vnodeop_desc *a_desc;
2121 struct vnode *a_vp;
2122 vm_prot_t a_prot;
2123 kauth_cred_t a_cred;
2124 } */ *ap = v;
2125
2126 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM)
2127 return EOPNOTSUPP;
2128 return ulfs_mmap(v);
2129 }
2130
2131 static int
2132 lfs_openextattr(void *v)
2133 {
2134 struct vop_openextattr_args /* {
2135 struct vnode *a_vp;
2136 kauth_cred_t a_cred;
2137 struct proc *a_p;
2138 } */ *ap = v;
2139 struct inode *ip = VTOI(ap->a_vp);
2140 struct ulfsmount *ump = ip->i_ump;
2141 //struct lfs *fs = ip->i_lfs;
2142
2143 /* Not supported for ULFS1 file systems. */
2144 if (ump->um_fstype == ULFS1)
2145 return (EOPNOTSUPP);
2146
2147 /* XXX Not implemented for ULFS2 file systems. */
2148 return (EOPNOTSUPP);
2149 }
2150
2151 static int
2152 lfs_closeextattr(void *v)
2153 {
2154 struct vop_closeextattr_args /* {
2155 struct vnode *a_vp;
2156 int a_commit;
2157 kauth_cred_t a_cred;
2158 struct proc *a_p;
2159 } */ *ap = v;
2160 struct inode *ip = VTOI(ap->a_vp);
2161 struct ulfsmount *ump = ip->i_ump;
2162 //struct lfs *fs = ip->i_lfs;
2163
2164 /* Not supported for ULFS1 file systems. */
2165 if (ump->um_fstype == ULFS1)
2166 return (EOPNOTSUPP);
2167
2168 /* XXX Not implemented for ULFS2 file systems. */
2169 return (EOPNOTSUPP);
2170 }
2171
2172 static int
2173 lfs_getextattr(void *v)
2174 {
2175 struct vop_getextattr_args /* {
2176 struct vnode *a_vp;
2177 int a_attrnamespace;
2178 const char *a_name;
2179 struct uio *a_uio;
2180 size_t *a_size;
2181 kauth_cred_t a_cred;
2182 struct proc *a_p;
2183 } */ *ap = v;
2184 struct vnode *vp = ap->a_vp;
2185 struct inode *ip = VTOI(vp);
2186 struct ulfsmount *ump = ip->i_ump;
2187 //struct lfs *fs = ip->i_lfs;
2188 int error;
2189
2190 if (ump->um_fstype == ULFS1) {
2191 #ifdef LFS_EXTATTR
2192 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2193 error = ulfs_getextattr(ap);
2194 fstrans_done(vp->v_mount);
2195 #else
2196 error = EOPNOTSUPP;
2197 #endif
2198 return error;
2199 }
2200
2201 /* XXX Not implemented for ULFS2 file systems. */
2202 return (EOPNOTSUPP);
2203 }
2204
2205 static int
2206 lfs_setextattr(void *v)
2207 {
2208 struct vop_setextattr_args /* {
2209 struct vnode *a_vp;
2210 int a_attrnamespace;
2211 const char *a_name;
2212 struct uio *a_uio;
2213 kauth_cred_t a_cred;
2214 struct proc *a_p;
2215 } */ *ap = v;
2216 struct vnode *vp = ap->a_vp;
2217 struct inode *ip = VTOI(vp);
2218 struct ulfsmount *ump = ip->i_ump;
2219 //struct lfs *fs = ip->i_lfs;
2220 int error;
2221
2222 if (ump->um_fstype == ULFS1) {
2223 #ifdef LFS_EXTATTR
2224 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2225 error = ulfs_setextattr(ap);
2226 fstrans_done(vp->v_mount);
2227 #else
2228 error = EOPNOTSUPP;
2229 #endif
2230 return error;
2231 }
2232
2233 /* XXX Not implemented for ULFS2 file systems. */
2234 return (EOPNOTSUPP);
2235 }
2236
2237 static int
2238 lfs_listextattr(void *v)
2239 {
2240 struct vop_listextattr_args /* {
2241 struct vnode *a_vp;
2242 int a_attrnamespace;
2243 struct uio *a_uio;
2244 size_t *a_size;
2245 kauth_cred_t a_cred;
2246 struct proc *a_p;
2247 } */ *ap = v;
2248 struct vnode *vp = ap->a_vp;
2249 struct inode *ip = VTOI(vp);
2250 struct ulfsmount *ump = ip->i_ump;
2251 //struct lfs *fs = ip->i_lfs;
2252 int error;
2253
2254 if (ump->um_fstype == ULFS1) {
2255 #ifdef LFS_EXTATTR
2256 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2257 error = ulfs_listextattr(ap);
2258 fstrans_done(vp->v_mount);
2259 #else
2260 error = EOPNOTSUPP;
2261 #endif
2262 return error;
2263 }
2264
2265 /* XXX Not implemented for ULFS2 file systems. */
2266 return (EOPNOTSUPP);
2267 }
2268
2269 static int
2270 lfs_deleteextattr(void *v)
2271 {
2272 struct vop_deleteextattr_args /* {
2273 struct vnode *a_vp;
2274 int a_attrnamespace;
2275 kauth_cred_t a_cred;
2276 struct proc *a_p;
2277 } */ *ap = v;
2278 struct vnode *vp = ap->a_vp;
2279 struct inode *ip = VTOI(vp);
2280 struct ulfsmount *ump = ip->i_ump;
2281 //struct fs *fs = ip->i_lfs;
2282 int error;
2283
2284 if (ump->um_fstype == ULFS1) {
2285 #ifdef LFS_EXTATTR
2286 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2287 error = ulfs_deleteextattr(ap);
2288 fstrans_done(vp->v_mount);
2289 #else
2290 error = EOPNOTSUPP;
2291 #endif
2292 return error;
2293 }
2294
2295 /* XXX Not implemented for ULFS2 file systems. */
2296 return (EOPNOTSUPP);
2297 }
2298