lfs_vnops.c revision 1.288 1 /* $NetBSD: lfs_vnops.c,v 1.288 2015/09/01 06:08:37 dholland Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31 /*
32 * Copyright (c) 1986, 1989, 1991, 1993, 1995
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95
60 */
61
62 /* from NetBSD: ufs_vnops.c,v 1.213 2013/06/08 05:47:02 kardel Exp */
63 /*-
64 * Copyright (c) 2008 The NetBSD Foundation, Inc.
65 * All rights reserved.
66 *
67 * This code is derived from software contributed to The NetBSD Foundation
68 * by Wasabi Systems, Inc.
69 *
70 * Redistribution and use in source and binary forms, with or without
71 * modification, are permitted provided that the following conditions
72 * are met:
73 * 1. Redistributions of source code must retain the above copyright
74 * notice, this list of conditions and the following disclaimer.
75 * 2. Redistributions in binary form must reproduce the above copyright
76 * notice, this list of conditions and the following disclaimer in the
77 * documentation and/or other materials provided with the distribution.
78 *
79 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
80 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
81 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
82 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
83 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
84 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
85 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
86 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
87 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
88 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
89 * POSSIBILITY OF SUCH DAMAGE.
90 */
91 /*
92 * Copyright (c) 1982, 1986, 1989, 1993, 1995
93 * The Regents of the University of California. All rights reserved.
94 * (c) UNIX System Laboratories, Inc.
95 * All or some portions of this file are derived from material licensed
96 * to the University of California by American Telephone and Telegraph
97 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
98 * the permission of UNIX System Laboratories, Inc.
99 *
100 * Redistribution and use in source and binary forms, with or without
101 * modification, are permitted provided that the following conditions
102 * are met:
103 * 1. Redistributions of source code must retain the above copyright
104 * notice, this list of conditions and the following disclaimer.
105 * 2. Redistributions in binary form must reproduce the above copyright
106 * notice, this list of conditions and the following disclaimer in the
107 * documentation and/or other materials provided with the distribution.
108 * 3. Neither the name of the University nor the names of its contributors
109 * may be used to endorse or promote products derived from this software
110 * without specific prior written permission.
111 *
112 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
113 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
114 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
115 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
116 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
117 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
118 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
119 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
120 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
121 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
122 * SUCH DAMAGE.
123 *
124 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95
125 */
126
127 #include <sys/cdefs.h>
128 __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.288 2015/09/01 06:08:37 dholland Exp $");
129
130 #ifdef _KERNEL_OPT
131 #include "opt_compat_netbsd.h"
132 #include "opt_uvm_page_trkown.h"
133 #endif
134
135 #include <sys/param.h>
136 #include <sys/systm.h>
137 #include <sys/namei.h>
138 #include <sys/resourcevar.h>
139 #include <sys/kernel.h>
140 #include <sys/file.h>
141 #include <sys/stat.h>
142 #include <sys/buf.h>
143 #include <sys/proc.h>
144 #include <sys/mount.h>
145 #include <sys/vnode.h>
146 #include <sys/pool.h>
147 #include <sys/signalvar.h>
148 #include <sys/kauth.h>
149 #include <sys/syslog.h>
150 #include <sys/fstrans.h>
151
152 #include <miscfs/fifofs/fifo.h>
153 #include <miscfs/genfs/genfs.h>
154 #include <miscfs/specfs/specdev.h>
155
156 #include <ufs/lfs/ulfs_inode.h>
157 #include <ufs/lfs/ulfsmount.h>
158 #include <ufs/lfs/ulfs_bswap.h>
159 #include <ufs/lfs/ulfs_extern.h>
160
161 #include <uvm/uvm.h>
162 #include <uvm/uvm_pmap.h>
163 #include <uvm/uvm_stat.h>
164 #include <uvm/uvm_pager.h>
165
166 #include <ufs/lfs/lfs.h>
167 #include <ufs/lfs/lfs_accessors.h>
168 #include <ufs/lfs/lfs_kernel.h>
169 #include <ufs/lfs/lfs_extern.h>
170
171 extern pid_t lfs_writer_daemon;
172 int lfs_ignore_lazy_sync = 1;
173
174 static int lfs_openextattr(void *v);
175 static int lfs_closeextattr(void *v);
176 static int lfs_getextattr(void *v);
177 static int lfs_setextattr(void *v);
178 static int lfs_listextattr(void *v);
179 static int lfs_deleteextattr(void *v);
180
181 /*
182 * A virgin directory (no blushing please).
183 */
184 static const struct lfs_dirtemplate mastertemplate = {
185 0, 12, LFS_DT_DIR, 1, ".",
186 0, LFS_DIRBLKSIZ - 12, LFS_DT_DIR, 2, ".."
187 };
188
189 /* Global vfs data structures for lfs. */
190 int (**lfs_vnodeop_p)(void *);
191 const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
192 { &vop_default_desc, vn_default_error },
193 { &vop_lookup_desc, ulfs_lookup }, /* lookup */
194 { &vop_create_desc, lfs_create }, /* create */
195 { &vop_whiteout_desc, ulfs_whiteout }, /* whiteout */
196 { &vop_mknod_desc, lfs_mknod }, /* mknod */
197 { &vop_open_desc, ulfs_open }, /* open */
198 { &vop_close_desc, lfs_close }, /* close */
199 { &vop_access_desc, ulfs_access }, /* access */
200 { &vop_getattr_desc, lfs_getattr }, /* getattr */
201 { &vop_setattr_desc, lfs_setattr }, /* setattr */
202 { &vop_read_desc, lfs_read }, /* read */
203 { &vop_write_desc, lfs_write }, /* write */
204 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */
205 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */
206 { &vop_ioctl_desc, ulfs_ioctl }, /* ioctl */
207 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */
208 { &vop_poll_desc, ulfs_poll }, /* poll */
209 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */
210 { &vop_revoke_desc, ulfs_revoke }, /* revoke */
211 { &vop_mmap_desc, lfs_mmap }, /* mmap */
212 { &vop_fsync_desc, lfs_fsync }, /* fsync */
213 { &vop_seek_desc, ulfs_seek }, /* seek */
214 { &vop_remove_desc, lfs_remove }, /* remove */
215 { &vop_link_desc, lfs_link }, /* link */
216 { &vop_rename_desc, lfs_rename }, /* rename */
217 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */
218 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */
219 { &vop_symlink_desc, lfs_symlink }, /* symlink */
220 { &vop_readdir_desc, ulfs_readdir }, /* readdir */
221 { &vop_readlink_desc, ulfs_readlink }, /* readlink */
222 { &vop_abortop_desc, ulfs_abortop }, /* abortop */
223 { &vop_inactive_desc, lfs_inactive }, /* inactive */
224 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
225 { &vop_lock_desc, ulfs_lock }, /* lock */
226 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
227 { &vop_bmap_desc, ulfs_bmap }, /* bmap */
228 { &vop_strategy_desc, lfs_strategy }, /* strategy */
229 { &vop_print_desc, ulfs_print }, /* print */
230 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
231 { &vop_pathconf_desc, ulfs_pathconf }, /* pathconf */
232 { &vop_advlock_desc, ulfs_advlock }, /* advlock */
233 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
234 { &vop_getpages_desc, lfs_getpages }, /* getpages */
235 { &vop_putpages_desc, lfs_putpages }, /* putpages */
236 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
237 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
238 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
239 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
240 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
241 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
242 { NULL, NULL }
243 };
244 const struct vnodeopv_desc lfs_vnodeop_opv_desc =
245 { &lfs_vnodeop_p, lfs_vnodeop_entries };
246
247 int (**lfs_specop_p)(void *);
248 const struct vnodeopv_entry_desc lfs_specop_entries[] = {
249 { &vop_default_desc, vn_default_error },
250 { &vop_lookup_desc, spec_lookup }, /* lookup */
251 { &vop_create_desc, spec_create }, /* create */
252 { &vop_mknod_desc, spec_mknod }, /* mknod */
253 { &vop_open_desc, spec_open }, /* open */
254 { &vop_close_desc, lfsspec_close }, /* close */
255 { &vop_access_desc, ulfs_access }, /* access */
256 { &vop_getattr_desc, lfs_getattr }, /* getattr */
257 { &vop_setattr_desc, lfs_setattr }, /* setattr */
258 { &vop_read_desc, ulfsspec_read }, /* read */
259 { &vop_write_desc, ulfsspec_write }, /* write */
260 { &vop_fallocate_desc, spec_fallocate }, /* fallocate */
261 { &vop_fdiscard_desc, spec_fdiscard }, /* fdiscard */
262 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
263 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
264 { &vop_poll_desc, spec_poll }, /* poll */
265 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */
266 { &vop_revoke_desc, spec_revoke }, /* revoke */
267 { &vop_mmap_desc, spec_mmap }, /* mmap */
268 { &vop_fsync_desc, spec_fsync }, /* fsync */
269 { &vop_seek_desc, spec_seek }, /* seek */
270 { &vop_remove_desc, spec_remove }, /* remove */
271 { &vop_link_desc, spec_link }, /* link */
272 { &vop_rename_desc, spec_rename }, /* rename */
273 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
274 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
275 { &vop_symlink_desc, spec_symlink }, /* symlink */
276 { &vop_readdir_desc, spec_readdir }, /* readdir */
277 { &vop_readlink_desc, spec_readlink }, /* readlink */
278 { &vop_abortop_desc, spec_abortop }, /* abortop */
279 { &vop_inactive_desc, lfs_inactive }, /* inactive */
280 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
281 { &vop_lock_desc, ulfs_lock }, /* lock */
282 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
283 { &vop_bmap_desc, spec_bmap }, /* bmap */
284 { &vop_strategy_desc, spec_strategy }, /* strategy */
285 { &vop_print_desc, ulfs_print }, /* print */
286 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
287 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
288 { &vop_advlock_desc, spec_advlock }, /* advlock */
289 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */
290 { &vop_getpages_desc, spec_getpages }, /* getpages */
291 { &vop_putpages_desc, spec_putpages }, /* putpages */
292 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
293 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
294 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
295 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
296 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
297 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
298 { NULL, NULL }
299 };
300 const struct vnodeopv_desc lfs_specop_opv_desc =
301 { &lfs_specop_p, lfs_specop_entries };
302
303 int (**lfs_fifoop_p)(void *);
304 const struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
305 { &vop_default_desc, vn_default_error },
306 { &vop_lookup_desc, vn_fifo_bypass }, /* lookup */
307 { &vop_create_desc, vn_fifo_bypass }, /* create */
308 { &vop_mknod_desc, vn_fifo_bypass }, /* mknod */
309 { &vop_open_desc, vn_fifo_bypass }, /* open */
310 { &vop_close_desc, lfsfifo_close }, /* close */
311 { &vop_access_desc, ulfs_access }, /* access */
312 { &vop_getattr_desc, lfs_getattr }, /* getattr */
313 { &vop_setattr_desc, lfs_setattr }, /* setattr */
314 { &vop_read_desc, ulfsfifo_read }, /* read */
315 { &vop_write_desc, ulfsfifo_write }, /* write */
316 { &vop_fallocate_desc, vn_fifo_bypass }, /* fallocate */
317 { &vop_fdiscard_desc, vn_fifo_bypass }, /* fdiscard */
318 { &vop_ioctl_desc, vn_fifo_bypass }, /* ioctl */
319 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
320 { &vop_poll_desc, vn_fifo_bypass }, /* poll */
321 { &vop_kqfilter_desc, vn_fifo_bypass }, /* kqfilter */
322 { &vop_revoke_desc, vn_fifo_bypass }, /* revoke */
323 { &vop_mmap_desc, vn_fifo_bypass }, /* mmap */
324 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */
325 { &vop_seek_desc, vn_fifo_bypass }, /* seek */
326 { &vop_remove_desc, vn_fifo_bypass }, /* remove */
327 { &vop_link_desc, vn_fifo_bypass }, /* link */
328 { &vop_rename_desc, vn_fifo_bypass }, /* rename */
329 { &vop_mkdir_desc, vn_fifo_bypass }, /* mkdir */
330 { &vop_rmdir_desc, vn_fifo_bypass }, /* rmdir */
331 { &vop_symlink_desc, vn_fifo_bypass }, /* symlink */
332 { &vop_readdir_desc, vn_fifo_bypass }, /* readdir */
333 { &vop_readlink_desc, vn_fifo_bypass }, /* readlink */
334 { &vop_abortop_desc, vn_fifo_bypass }, /* abortop */
335 { &vop_inactive_desc, lfs_inactive }, /* inactive */
336 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
337 { &vop_lock_desc, ulfs_lock }, /* lock */
338 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
339 { &vop_bmap_desc, vn_fifo_bypass }, /* bmap */
340 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */
341 { &vop_print_desc, ulfs_print }, /* print */
342 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
343 { &vop_pathconf_desc, vn_fifo_bypass }, /* pathconf */
344 { &vop_advlock_desc, vn_fifo_bypass }, /* advlock */
345 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
346 { &vop_putpages_desc, vn_fifo_bypass }, /* putpages */
347 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
348 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
349 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
350 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
351 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
352 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
353 { NULL, NULL }
354 };
355 const struct vnodeopv_desc lfs_fifoop_opv_desc =
356 { &lfs_fifoop_p, lfs_fifoop_entries };
357
358 #define LFS_READWRITE
359 #include <ufs/lfs/ulfs_readwrite.c>
360 #undef LFS_READWRITE
361
362 /*
363 * Synch an open file.
364 */
365 /* ARGSUSED */
366 int
367 lfs_fsync(void *v)
368 {
369 struct vop_fsync_args /* {
370 struct vnode *a_vp;
371 kauth_cred_t a_cred;
372 int a_flags;
373 off_t offlo;
374 off_t offhi;
375 } */ *ap = v;
376 struct vnode *vp = ap->a_vp;
377 int error, wait;
378 struct inode *ip = VTOI(vp);
379 struct lfs *fs = ip->i_lfs;
380
381 /* If we're mounted read-only, don't try to sync. */
382 if (fs->lfs_ronly)
383 return 0;
384
385 /* If a removed vnode is being cleaned, no need to sync here. */
386 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0)
387 return 0;
388
389 /*
390 * Trickle sync simply adds this vnode to the pager list, as if
391 * the pagedaemon had requested a pageout.
392 */
393 if (ap->a_flags & FSYNC_LAZY) {
394 if (lfs_ignore_lazy_sync == 0) {
395 mutex_enter(&lfs_lock);
396 if (!(ip->i_flags & IN_PAGING)) {
397 ip->i_flags |= IN_PAGING;
398 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip,
399 i_lfs_pchain);
400 }
401 wakeup(&lfs_writer_daemon);
402 mutex_exit(&lfs_lock);
403 }
404 return 0;
405 }
406
407 /*
408 * If a vnode is bring cleaned, flush it out before we try to
409 * reuse it. This prevents the cleaner from writing files twice
410 * in the same partial segment, causing an accounting underflow.
411 */
412 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) {
413 lfs_vflush(vp);
414 }
415
416 wait = (ap->a_flags & FSYNC_WAIT);
417 do {
418 mutex_enter(vp->v_interlock);
419 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
420 round_page(ap->a_offhi),
421 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0));
422 if (error == EAGAIN) {
423 mutex_enter(&lfs_lock);
424 mtsleep(&fs->lfs_availsleep, PCATCH | PUSER,
425 "lfs_fsync", hz / 100 + 1, &lfs_lock);
426 mutex_exit(&lfs_lock);
427 }
428 } while (error == EAGAIN);
429 if (error)
430 return error;
431
432 if ((ap->a_flags & FSYNC_DATAONLY) == 0)
433 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0);
434
435 if (error == 0 && ap->a_flags & FSYNC_CACHE) {
436 int l = 0;
437 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE,
438 curlwp->l_cred);
439 }
440 if (wait && !VPISEMPTY(vp))
441 LFS_SET_UINO(ip, IN_MODIFIED);
442
443 return error;
444 }
445
446 /*
447 * Take IN_ADIROP off, then call ulfs_inactive.
448 */
449 int
450 lfs_inactive(void *v)
451 {
452 struct vop_inactive_args /* {
453 struct vnode *a_vp;
454 } */ *ap = v;
455
456 lfs_unmark_vnode(ap->a_vp);
457
458 /*
459 * The Ifile is only ever inactivated on unmount.
460 * Streamline this process by not giving it more dirty blocks.
461 */
462 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) {
463 mutex_enter(&lfs_lock);
464 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD);
465 mutex_exit(&lfs_lock);
466 VOP_UNLOCK(ap->a_vp);
467 return 0;
468 }
469
470 #ifdef DEBUG
471 /*
472 * This might happen on unmount.
473 * XXX If it happens at any other time, it should be a panic.
474 */
475 if (ap->a_vp->v_uflag & VU_DIROP) {
476 struct inode *ip = VTOI(ap->a_vp);
477 printf("lfs_inactive: inactivating VU_DIROP? ino = %d\n", (int)ip->i_number);
478 }
479 #endif /* DIAGNOSTIC */
480
481 return ulfs_inactive(v);
482 }
483
484 int
485 lfs_set_dirop(struct vnode *dvp, struct vnode *vp)
486 {
487 struct lfs *fs;
488 int error;
489
490 KASSERT(VOP_ISLOCKED(dvp));
491 KASSERT(vp == NULL || VOP_ISLOCKED(vp));
492
493 fs = VTOI(dvp)->i_lfs;
494
495 ASSERT_NO_SEGLOCK(fs);
496 /*
497 * LFS_NRESERVE calculates direct and indirect blocks as well
498 * as an inode block; an overestimate in most cases.
499 */
500 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0)
501 return (error);
502
503 restart:
504 mutex_enter(&lfs_lock);
505 if (fs->lfs_dirops == 0) {
506 mutex_exit(&lfs_lock);
507 lfs_check(dvp, LFS_UNUSED_LBN, 0);
508 mutex_enter(&lfs_lock);
509 }
510 while (fs->lfs_writer) {
511 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH,
512 "lfs_sdirop", 0, &lfs_lock);
513 if (error == EINTR) {
514 mutex_exit(&lfs_lock);
515 goto unreserve;
516 }
517 }
518 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) {
519 wakeup(&lfs_writer_daemon);
520 mutex_exit(&lfs_lock);
521 preempt();
522 goto restart;
523 }
524
525 if (lfs_dirvcount > LFS_MAX_DIROP) {
526 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, "
527 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount));
528 if ((error = mtsleep(&lfs_dirvcount,
529 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0,
530 &lfs_lock)) != 0) {
531 goto unreserve;
532 }
533 goto restart;
534 }
535
536 ++fs->lfs_dirops;
537 /* fs->lfs_doifile = 1; */ /* XXX why? --ks */
538 mutex_exit(&lfs_lock);
539
540 /* Hold a reference so SET_ENDOP will be happy */
541 vref(dvp);
542 if (vp) {
543 vref(vp);
544 MARK_VNODE(vp);
545 }
546
547 MARK_VNODE(dvp);
548 return 0;
549
550 unreserve:
551 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs));
552 return error;
553 }
554
555 /*
556 * Opposite of lfs_set_dirop... mostly. For now at least must call
557 * UNMARK_VNODE(dvp) explicitly first. (XXX: clean that up)
558 */
559 void
560 lfs_unset_dirop(struct lfs *fs, struct vnode *dvp, const char *str)
561 {
562 mutex_enter(&lfs_lock);
563 --fs->lfs_dirops;
564 if (!fs->lfs_dirops) {
565 if (fs->lfs_nadirop) {
566 panic("lfs_unset_dirop: %s: no dirops but "
567 " nadirop=%d", str,
568 fs->lfs_nadirop);
569 }
570 wakeup(&fs->lfs_writer);
571 mutex_exit(&lfs_lock);
572 lfs_check(dvp, LFS_UNUSED_LBN, 0);
573 } else {
574 mutex_exit(&lfs_lock);
575 }
576 lfs_reserve(fs, dvp, NULL, -LFS_NRESERVE(fs));
577 }
578
579 void
580 lfs_mark_vnode(struct vnode *vp)
581 {
582 struct inode *ip = VTOI(vp);
583 struct lfs *fs = ip->i_lfs;
584
585 mutex_enter(&lfs_lock);
586 if (!(ip->i_flag & IN_ADIROP)) {
587 if (!(vp->v_uflag & VU_DIROP)) {
588 mutex_exit(&lfs_lock);
589 vref(vp);
590 mutex_enter(&lfs_lock);
591 ++lfs_dirvcount;
592 ++fs->lfs_dirvcount;
593 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain);
594 vp->v_uflag |= VU_DIROP;
595 }
596 ++fs->lfs_nadirop;
597 ip->i_flag &= ~IN_CDIROP;
598 ip->i_flag |= IN_ADIROP;
599 } else
600 KASSERT(vp->v_uflag & VU_DIROP);
601 mutex_exit(&lfs_lock);
602 }
603
604 void
605 lfs_unmark_vnode(struct vnode *vp)
606 {
607 struct inode *ip = VTOI(vp);
608
609 mutex_enter(&lfs_lock);
610 if (ip && (ip->i_flag & IN_ADIROP)) {
611 KASSERT(vp->v_uflag & VU_DIROP);
612 --ip->i_lfs->lfs_nadirop;
613 ip->i_flag &= ~IN_ADIROP;
614 }
615 mutex_exit(&lfs_lock);
616 }
617
618 int
619 lfs_symlink(void *v)
620 {
621 struct vop_symlink_v3_args /* {
622 struct vnode *a_dvp;
623 struct vnode **a_vpp;
624 struct componentname *a_cnp;
625 struct vattr *a_vap;
626 char *a_target;
627 } */ *ap = v;
628 struct lfs *fs;
629 struct vnode *dvp, **vpp;
630 struct inode *ip;
631 struct ulfs_lookup_results *ulr;
632 ssize_t len; /* XXX should be size_t */
633 int error;
634
635 dvp = ap->a_dvp;
636 vpp = ap->a_vpp;
637
638 KASSERT(vpp != NULL);
639 KASSERT(*vpp == NULL);
640 KASSERT(ap->a_vap->va_type == VLNK);
641
642 /* XXX should handle this material another way */
643 ulr = &VTOI(ap->a_dvp)->i_crap;
644 ULFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
645
646 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
647 ASSERT_NO_SEGLOCK(fs);
648 if (fs->lfs_ronly) {
649 return EROFS;
650 }
651
652 error = lfs_set_dirop(dvp, NULL);
653 if (error)
654 return error;
655
656 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
657 error = ulfs_makeinode(ap->a_vap, dvp, ulr, vpp, ap->a_cnp);
658 if (error) {
659 goto out;
660 }
661
662 VN_KNOTE(ap->a_dvp, NOTE_WRITE);
663 ip = VTOI(*vpp);
664
665 len = strlen(ap->a_target);
666 if (len < ip->i_lfs->um_maxsymlinklen) {
667 memcpy((char *)SHORTLINK(ip), ap->a_target, len);
668 ip->i_size = len;
669 DIP_ASSIGN(ip, size, len);
670 uvm_vnp_setsize(*vpp, ip->i_size);
671 ip->i_flag |= IN_CHANGE | IN_UPDATE;
672 if ((*vpp)->v_mount->mnt_flag & MNT_RELATIME)
673 ip->i_flag |= IN_ACCESS;
674 } else {
675 error = ulfs_bufio(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0,
676 IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL,
677 NULL);
678 }
679
680 VOP_UNLOCK(*vpp);
681 if (error)
682 vrele(*vpp);
683
684 out:
685 fstrans_done(dvp->v_mount);
686
687 UNMARK_VNODE(dvp);
688 /* XXX: is it even possible for the symlink to get MARK'd? */
689 UNMARK_VNODE(*vpp);
690 if (error) {
691 *vpp = NULL;
692 }
693 lfs_unset_dirop(fs, dvp, "symlink");
694
695 vrele(dvp);
696 return (error);
697 }
698
699 int
700 lfs_mknod(void *v)
701 {
702 struct vop_mknod_v3_args /* {
703 struct vnode *a_dvp;
704 struct vnode **a_vpp;
705 struct componentname *a_cnp;
706 struct vattr *a_vap;
707 } */ *ap = v;
708 struct lfs *fs;
709 struct vnode *dvp, **vpp;
710 struct vattr *vap;
711 struct inode *ip;
712 int error;
713 ino_t ino;
714 struct ulfs_lookup_results *ulr;
715
716 dvp = ap->a_dvp;
717 vpp = ap->a_vpp;
718 vap = ap->a_vap;
719
720 KASSERT(vpp != NULL);
721 KASSERT(*vpp == NULL);
722
723 /* XXX should handle this material another way */
724 ulr = &VTOI(dvp)->i_crap;
725 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
726
727 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
728 ASSERT_NO_SEGLOCK(fs);
729 if (fs->lfs_ronly) {
730 return EROFS;
731 }
732
733 error = lfs_set_dirop(dvp, NULL);
734 if (error)
735 return error;
736
737 fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
738 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
739
740 /* Either way we're done with the dirop at this point */
741 UNMARK_VNODE(dvp);
742 UNMARK_VNODE(*vpp);
743 lfs_unset_dirop(fs, dvp, "mknod");
744 /*
745 * XXX this is where this used to be (though inside some evil
746 * macros) but it clearly should be moved further down.
747 * - dholland 20140515
748 */
749 vrele(dvp);
750
751 if (error) {
752 fstrans_done(ap->a_dvp->v_mount);
753 *vpp = NULL;
754 return (error);
755 }
756
757 VN_KNOTE(dvp, NOTE_WRITE);
758 ip = VTOI(*vpp);
759 ino = ip->i_number;
760 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
761
762 /*
763 * Call fsync to write the vnode so that we don't have to deal with
764 * flushing it when it's marked VU_DIROP or reclaiming.
765 *
766 * XXX KS - If we can't flush we also can't call vgone(), so must
767 * return. But, that leaves this vnode in limbo, also not good.
768 * Can this ever happen (barring hardware failure)?
769 */
770 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) {
771 panic("lfs_mknod: couldn't fsync (ino %llu)",
772 (unsigned long long)ino);
773 /* return (error); */
774 }
775
776 fstrans_done(ap->a_dvp->v_mount);
777 KASSERT(error == 0);
778 VOP_UNLOCK(*vpp);
779 return (0);
780 }
781
782 /*
783 * Create a regular file
784 */
785 int
786 lfs_create(void *v)
787 {
788 struct vop_create_v3_args /* {
789 struct vnode *a_dvp;
790 struct vnode **a_vpp;
791 struct componentname *a_cnp;
792 struct vattr *a_vap;
793 } */ *ap = v;
794 struct lfs *fs;
795 struct vnode *dvp, **vpp;
796 struct vattr *vap;
797 struct ulfs_lookup_results *ulr;
798 int error;
799
800 dvp = ap->a_dvp;
801 vpp = ap->a_vpp;
802 vap = ap->a_vap;
803
804 KASSERT(vpp != NULL);
805 KASSERT(*vpp == NULL);
806
807 /* XXX should handle this material another way */
808 ulr = &VTOI(dvp)->i_crap;
809 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
810
811 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
812 ASSERT_NO_SEGLOCK(fs);
813 if (fs->lfs_ronly) {
814 return EROFS;
815 }
816
817 error = lfs_set_dirop(dvp, NULL);
818 if (error)
819 return error;
820
821 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
822 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
823 if (error) {
824 fstrans_done(dvp->v_mount);
825 goto out;
826 }
827 fstrans_done(dvp->v_mount);
828 VN_KNOTE(dvp, NOTE_WRITE);
829 VOP_UNLOCK(*vpp);
830
831 out:
832
833 UNMARK_VNODE(dvp);
834 UNMARK_VNODE(*vpp);
835 if (error) {
836 *vpp = NULL;
837 }
838 lfs_unset_dirop(fs, dvp, "create");
839
840 vrele(dvp);
841 return (error);
842 }
843
844 int
845 lfs_mkdir(void *v)
846 {
847 struct vop_mkdir_v3_args /* {
848 struct vnode *a_dvp;
849 struct vnode **a_vpp;
850 struct componentname *a_cnp;
851 struct vattr *a_vap;
852 } */ *ap = v;
853 struct lfs *fs;
854 struct vnode *dvp, *tvp, **vpp;
855 struct inode *dp, *ip;
856 struct componentname *cnp;
857 struct vattr *vap;
858 struct ulfs_lookup_results *ulr;
859 struct buf *bp;
860 struct lfs_dirtemplate dirtemplate;
861 struct lfs_direct *newdir;
862 int dirblksiz;
863 int error;
864
865 dvp = ap->a_dvp;
866 tvp = NULL;
867 vpp = ap->a_vpp;
868 cnp = ap->a_cnp;
869 vap = ap->a_vap;
870
871 dp = VTOI(dvp);
872 ip = NULL;
873
874 KASSERT(vap->va_type == VDIR);
875 KASSERT(vpp != NULL);
876 KASSERT(*vpp == NULL);
877
878 /* XXX should handle this material another way */
879 ulr = &dp->i_crap;
880 ULFS_CHECK_CRAPCOUNTER(dp);
881
882 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
883 ASSERT_NO_SEGLOCK(fs);
884 if (fs->lfs_ronly) {
885 return EROFS;
886 }
887 dirblksiz = fs->um_dirblksiz;
888
889 error = lfs_set_dirop(dvp, NULL);
890 if (error)
891 return error;
892
893 fstrans_start(dvp->v_mount, FSTRANS_SHARED);
894
895 if ((nlink_t)dp->i_nlink >= LINK_MAX) {
896 error = EMLINK;
897 goto out;
898 }
899
900 /*
901 * Must simulate part of ulfs_makeinode here to acquire the inode,
902 * but not have it entered in the parent directory. The entry is
903 * made later after writing "." and ".." entries.
904 */
905 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, ap->a_vpp);
906 if (error)
907 goto out;
908
909 error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE);
910 if (error) {
911 vrele(*ap->a_vpp);
912 *ap->a_vpp = NULL;
913 goto out;
914 }
915
916 tvp = *ap->a_vpp;
917 lfs_mark_vnode(tvp);
918 ip = VTOI(tvp);
919 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
920 ip->i_nlink = 2;
921 DIP_ASSIGN(ip, nlink, 2);
922 if (cnp->cn_flags & ISWHITEOUT) {
923 ip->i_flags |= UF_OPAQUE;
924 DIP_ASSIGN(ip, flags, ip->i_flags);
925 }
926
927 /*
928 * Bump link count in parent directory to reflect work done below.
929 */
930 dp->i_nlink++;
931 DIP_ASSIGN(dp, nlink, dp->i_nlink);
932 dp->i_flag |= IN_CHANGE;
933 if ((error = lfs_update(dvp, NULL, NULL, UPDATE_DIROP)) != 0)
934 goto bad;
935
936 /*
937 * Initialize directory with "." and ".." from static template.
938 */
939 dirtemplate = mastertemplate;
940 dirtemplate.dotdot_reclen = dirblksiz - dirtemplate.dot_reclen;
941 dirtemplate.dot_ino = ulfs_rw32(ip->i_number, ULFS_MPNEEDSWAP(fs));
942 dirtemplate.dotdot_ino = ulfs_rw32(dp->i_number, ULFS_MPNEEDSWAP(fs));
943 dirtemplate.dot_reclen = ulfs_rw16(dirtemplate.dot_reclen,
944 ULFS_MPNEEDSWAP(fs));
945 dirtemplate.dotdot_reclen = ulfs_rw16(dirtemplate.dotdot_reclen,
946 ULFS_MPNEEDSWAP(fs));
947 if (fs->um_maxsymlinklen <= 0) {
948 #if BYTE_ORDER == LITTLE_ENDIAN
949 if (ULFS_MPNEEDSWAP(fs) == 0)
950 #else
951 if (ULFS_MPNEEDSWAP(fs) != 0)
952 #endif
953 {
954 dirtemplate.dot_type = dirtemplate.dot_namlen;
955 dirtemplate.dotdot_type = dirtemplate.dotdot_namlen;
956 dirtemplate.dot_namlen = dirtemplate.dotdot_namlen = 0;
957 } else
958 dirtemplate.dot_type = dirtemplate.dotdot_type = 0;
959 }
960 if ((error = lfs_balloc(tvp, (off_t)0, dirblksiz, cnp->cn_cred,
961 B_CLRBUF, &bp)) != 0)
962 goto bad;
963 ip->i_size = dirblksiz;
964 DIP_ASSIGN(ip, size, dirblksiz);
965 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
966 uvm_vnp_setsize(tvp, ip->i_size);
967 memcpy((void *)bp->b_data, (void *)&dirtemplate, sizeof dirtemplate);
968
969 /*
970 * Directory set up; now install its entry in the parent directory.
971 */
972 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
973 goto bad;
974 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) {
975 goto bad;
976 }
977 newdir = pool_cache_get(ulfs_direct_cache, PR_WAITOK);
978 ulfs_makedirentry(ip, cnp, newdir);
979 error = ulfs_direnter(dvp, ulr, tvp, newdir, cnp, bp);
980 pool_cache_put(ulfs_direct_cache, newdir);
981 bad:
982 if (error == 0) {
983 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
984 VOP_UNLOCK(tvp);
985 } else {
986 dp->i_nlink--;
987 DIP_ASSIGN(dp, nlink, dp->i_nlink);
988 dp->i_flag |= IN_CHANGE;
989 /*
990 * No need to do an explicit lfs_truncate here, vrele will
991 * do this for us because we set the link count to 0.
992 */
993 ip->i_nlink = 0;
994 DIP_ASSIGN(ip, nlink, 0);
995 ip->i_flag |= IN_CHANGE;
996 /* If IN_ADIROP, account for it */
997 lfs_unmark_vnode(tvp);
998 vput(tvp);
999 }
1000
1001 out:
1002 fstrans_done(dvp->v_mount);
1003
1004 UNMARK_VNODE(dvp);
1005 UNMARK_VNODE(*vpp);
1006 if (error) {
1007 *vpp = NULL;
1008 }
1009 lfs_unset_dirop(fs, dvp, "mkdir");
1010
1011 vrele(dvp);
1012 return (error);
1013 }
1014
1015 int
1016 lfs_remove(void *v)
1017 {
1018 struct vop_remove_args /* {
1019 struct vnode *a_dvp;
1020 struct vnode *a_vp;
1021 struct componentname *a_cnp;
1022 } */ *ap = v;
1023 struct vnode *dvp, *vp;
1024 struct inode *ip;
1025 int error;
1026
1027 dvp = ap->a_dvp;
1028 vp = ap->a_vp;
1029 ip = VTOI(vp);
1030 if ((error = lfs_set_dirop(dvp, vp)) != 0) {
1031 if (dvp == vp)
1032 vrele(vp);
1033 else
1034 vput(vp);
1035 vput(dvp);
1036 return error;
1037 }
1038 error = ulfs_remove(ap);
1039 if (ip->i_nlink == 0)
1040 lfs_orphan(ip->i_lfs, ip->i_number);
1041
1042 UNMARK_VNODE(dvp);
1043 if (ap->a_vp) {
1044 UNMARK_VNODE(ap->a_vp);
1045 }
1046 lfs_unset_dirop(ip->i_lfs, dvp, "remove");
1047 vrele(dvp);
1048 if (ap->a_vp) {
1049 vrele(ap->a_vp);
1050 }
1051
1052 return (error);
1053 }
1054
1055 int
1056 lfs_rmdir(void *v)
1057 {
1058 struct vop_rmdir_args /* {
1059 struct vnodeop_desc *a_desc;
1060 struct vnode *a_dvp;
1061 struct vnode *a_vp;
1062 struct componentname *a_cnp;
1063 } */ *ap = v;
1064 struct vnode *vp;
1065 struct inode *ip;
1066 int error;
1067
1068 vp = ap->a_vp;
1069 ip = VTOI(vp);
1070 if ((error = lfs_set_dirop(ap->a_dvp, ap->a_vp)) != 0) {
1071 if (ap->a_dvp == vp)
1072 vrele(ap->a_dvp);
1073 else
1074 vput(ap->a_dvp);
1075 vput(vp);
1076 return error;
1077 }
1078 error = ulfs_rmdir(ap);
1079 if (ip->i_nlink == 0)
1080 lfs_orphan(ip->i_lfs, ip->i_number);
1081
1082 UNMARK_VNODE(ap->a_dvp);
1083 if (ap->a_vp) {
1084 UNMARK_VNODE(ap->a_vp);
1085 }
1086 lfs_unset_dirop(ip->i_lfs, ap->a_dvp, "rmdir");
1087 vrele(ap->a_dvp);
1088 if (ap->a_vp) {
1089 vrele(ap->a_vp);
1090 }
1091
1092 return (error);
1093 }
1094
1095 int
1096 lfs_link(void *v)
1097 {
1098 struct vop_link_v2_args /* {
1099 struct vnode *a_dvp;
1100 struct vnode *a_vp;
1101 struct componentname *a_cnp;
1102 } */ *ap = v;
1103 struct lfs *fs;
1104 struct vnode *dvp;
1105 int error;
1106
1107 dvp = ap->a_dvp;
1108
1109 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
1110 ASSERT_NO_SEGLOCK(fs);
1111 if (fs->lfs_ronly) {
1112 return EROFS;
1113 }
1114
1115 error = lfs_set_dirop(dvp, NULL);
1116 if (error) {
1117 return error;
1118 }
1119
1120 error = ulfs_link(ap);
1121
1122 UNMARK_VNODE(dvp);
1123 lfs_unset_dirop(fs, dvp, "link");
1124 vrele(dvp);
1125
1126 return (error);
1127 }
1128
1129 /* XXX hack to avoid calling ITIMES in getattr */
1130 int
1131 lfs_getattr(void *v)
1132 {
1133 struct vop_getattr_args /* {
1134 struct vnode *a_vp;
1135 struct vattr *a_vap;
1136 kauth_cred_t a_cred;
1137 } */ *ap = v;
1138 struct vnode *vp = ap->a_vp;
1139 struct inode *ip = VTOI(vp);
1140 struct vattr *vap = ap->a_vap;
1141 struct lfs *fs = ip->i_lfs;
1142
1143 fstrans_start(vp->v_mount, FSTRANS_SHARED);
1144 /*
1145 * Copy from inode table
1146 */
1147 vap->va_fsid = ip->i_dev;
1148 vap->va_fileid = ip->i_number;
1149 vap->va_mode = ip->i_mode & ~LFS_IFMT;
1150 vap->va_nlink = ip->i_nlink;
1151 vap->va_uid = ip->i_uid;
1152 vap->va_gid = ip->i_gid;
1153 switch (vp->v_type) {
1154 case VBLK:
1155 case VCHR:
1156 vap->va_rdev = (dev_t)lfs_dino_getrdev(fs, ip->i_din);
1157 break;
1158 default:
1159 vap->va_rdev = NODEV;
1160 break;
1161 }
1162 vap->va_size = vp->v_size;
1163 vap->va_atime.tv_sec = lfs_dino_getatime(fs, ip->i_din);
1164 vap->va_atime.tv_nsec = lfs_dino_getatimensec(fs, ip->i_din);
1165 vap->va_mtime.tv_sec = lfs_dino_getmtime(fs, ip->i_din);
1166 vap->va_mtime.tv_nsec = lfs_dino_getmtimensec(fs, ip->i_din);
1167 vap->va_ctime.tv_sec = lfs_dino_getctime(fs, ip->i_din);
1168 vap->va_ctime.tv_nsec = lfs_dino_getctimensec(fs, ip->i_din);
1169 vap->va_flags = ip->i_flags;
1170 vap->va_gen = ip->i_gen;
1171 /* this doesn't belong here */
1172 if (vp->v_type == VBLK)
1173 vap->va_blocksize = BLKDEV_IOSIZE;
1174 else if (vp->v_type == VCHR)
1175 vap->va_blocksize = MAXBSIZE;
1176 else
1177 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
1178 vap->va_bytes = lfs_fsbtob(fs, ip->i_lfs_effnblks);
1179 vap->va_type = vp->v_type;
1180 vap->va_filerev = ip->i_modrev;
1181 fstrans_done(vp->v_mount);
1182 return (0);
1183 }
1184
1185 /*
1186 * Check to make sure the inode blocks won't choke the buffer
1187 * cache, then call ulfs_setattr as usual.
1188 */
1189 int
1190 lfs_setattr(void *v)
1191 {
1192 struct vop_setattr_args /* {
1193 struct vnode *a_vp;
1194 struct vattr *a_vap;
1195 kauth_cred_t a_cred;
1196 } */ *ap = v;
1197 struct vnode *vp = ap->a_vp;
1198
1199 lfs_check(vp, LFS_UNUSED_LBN, 0);
1200 return ulfs_setattr(v);
1201 }
1202
1203 /*
1204 * Release the block we hold on lfs_newseg wrapping. Called on file close,
1205 * or explicitly from LFCNWRAPGO. Called with the interlock held.
1206 */
1207 static int
1208 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor)
1209 {
1210 if (fs->lfs_stoplwp != curlwp)
1211 return EBUSY;
1212
1213 fs->lfs_stoplwp = NULL;
1214 cv_signal(&fs->lfs_stopcv);
1215
1216 KASSERT(fs->lfs_nowrap > 0);
1217 if (fs->lfs_nowrap <= 0) {
1218 return 0;
1219 }
1220
1221 if (--fs->lfs_nowrap == 0) {
1222 log(LOG_NOTICE, "%s: re-enabled log wrap\n",
1223 lfs_sb_getfsmnt(fs));
1224 wakeup(&fs->lfs_wrappass);
1225 lfs_wakeup_cleaner(fs);
1226 }
1227 if (waitfor) {
1228 mtsleep(&fs->lfs_nextsegsleep, PCATCH | PUSER, "segment",
1229 0, &lfs_lock);
1230 }
1231
1232 return 0;
1233 }
1234
1235 /*
1236 * Close called.
1237 *
1238 * Update the times on the inode.
1239 */
1240 /* ARGSUSED */
1241 int
1242 lfs_close(void *v)
1243 {
1244 struct vop_close_args /* {
1245 struct vnode *a_vp;
1246 int a_fflag;
1247 kauth_cred_t a_cred;
1248 } */ *ap = v;
1249 struct vnode *vp = ap->a_vp;
1250 struct inode *ip = VTOI(vp);
1251 struct lfs *fs = ip->i_lfs;
1252
1253 if ((ip->i_number == ULFS_ROOTINO || ip->i_number == LFS_IFILE_INUM) &&
1254 fs->lfs_stoplwp == curlwp) {
1255 mutex_enter(&lfs_lock);
1256 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n");
1257 lfs_wrapgo(fs, ip, 0);
1258 mutex_exit(&lfs_lock);
1259 }
1260
1261 if (vp == ip->i_lfs->lfs_ivnode &&
1262 vp->v_mount->mnt_iflag & IMNT_UNMOUNT)
1263 return 0;
1264
1265 fstrans_start(vp->v_mount, FSTRANS_SHARED);
1266 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) {
1267 LFS_ITIMES(ip, NULL, NULL, NULL);
1268 }
1269 fstrans_done(vp->v_mount);
1270 return (0);
1271 }
1272
1273 /*
1274 * Close wrapper for special devices.
1275 *
1276 * Update the times on the inode then do device close.
1277 */
1278 int
1279 lfsspec_close(void *v)
1280 {
1281 struct vop_close_args /* {
1282 struct vnode *a_vp;
1283 int a_fflag;
1284 kauth_cred_t a_cred;
1285 } */ *ap = v;
1286 struct vnode *vp;
1287 struct inode *ip;
1288
1289 vp = ap->a_vp;
1290 ip = VTOI(vp);
1291 if (vp->v_usecount > 1) {
1292 LFS_ITIMES(ip, NULL, NULL, NULL);
1293 }
1294 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
1295 }
1296
1297 /*
1298 * Close wrapper for fifo's.
1299 *
1300 * Update the times on the inode then do device close.
1301 */
1302 int
1303 lfsfifo_close(void *v)
1304 {
1305 struct vop_close_args /* {
1306 struct vnode *a_vp;
1307 int a_fflag;
1308 kauth_cred_ a_cred;
1309 } */ *ap = v;
1310 struct vnode *vp;
1311 struct inode *ip;
1312
1313 vp = ap->a_vp;
1314 ip = VTOI(vp);
1315 if (ap->a_vp->v_usecount > 1) {
1316 LFS_ITIMES(ip, NULL, NULL, NULL);
1317 }
1318 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
1319 }
1320
1321 /*
1322 * Reclaim an inode so that it can be used for other purposes.
1323 */
1324
1325 int
1326 lfs_reclaim(void *v)
1327 {
1328 struct vop_reclaim_args /* {
1329 struct vnode *a_vp;
1330 } */ *ap = v;
1331 struct vnode *vp = ap->a_vp;
1332 struct inode *ip = VTOI(vp);
1333 struct lfs *fs = ip->i_lfs;
1334 int error;
1335
1336 /*
1337 * The inode must be freed and updated before being removed
1338 * from its hash chain. Other threads trying to gain a hold
1339 * or lock on the inode will be stalled.
1340 */
1341 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1342 lfs_vfree(vp, ip->i_number, ip->i_omode);
1343
1344 mutex_enter(&lfs_lock);
1345 LFS_CLR_UINO(ip, IN_ALLMOD);
1346 mutex_exit(&lfs_lock);
1347 if ((error = ulfs_reclaim(vp)))
1348 return (error);
1349
1350 /*
1351 * Take us off the paging and/or dirop queues if we were on them.
1352 * We shouldn't be on them.
1353 */
1354 mutex_enter(&lfs_lock);
1355 if (ip->i_flags & IN_PAGING) {
1356 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n",
1357 lfs_sb_getfsmnt(fs));
1358 ip->i_flags &= ~IN_PAGING;
1359 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
1360 }
1361 if (vp->v_uflag & VU_DIROP) {
1362 panic("reclaimed vnode is VU_DIROP");
1363 vp->v_uflag &= ~VU_DIROP;
1364 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
1365 }
1366 mutex_exit(&lfs_lock);
1367
1368 pool_put(&lfs_dinode_pool, ip->i_din);
1369 lfs_deregister_all(vp);
1370 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs);
1371 ip->inode_ext.lfs = NULL;
1372 genfs_node_destroy(vp);
1373 pool_put(&lfs_inode_pool, vp->v_data);
1374 vp->v_data = NULL;
1375 return (0);
1376 }
1377
1378 /*
1379 * Read a block from a storage device.
1380 *
1381 * Calculate the logical to physical mapping if not done already,
1382 * then call the device strategy routine.
1383 *
1384 * In order to avoid reading blocks that are in the process of being
1385 * written by the cleaner---and hence are not mutexed by the normal
1386 * buffer cache / page cache mechanisms---check for collisions before
1387 * reading.
1388 *
1389 * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before*
1390 * the active cleaner test.
1391 *
1392 * XXX This code assumes that lfs_markv makes synchronous checkpoints.
1393 */
1394 int
1395 lfs_strategy(void *v)
1396 {
1397 struct vop_strategy_args /* {
1398 struct vnode *a_vp;
1399 struct buf *a_bp;
1400 } */ *ap = v;
1401 struct buf *bp;
1402 struct lfs *fs;
1403 struct vnode *vp;
1404 struct inode *ip;
1405 daddr_t tbn;
1406 #define MAXLOOP 25
1407 int i, sn, error, slept, loopcount;
1408
1409 bp = ap->a_bp;
1410 vp = ap->a_vp;
1411 ip = VTOI(vp);
1412 fs = ip->i_lfs;
1413
1414 /* lfs uses its strategy routine only for read */
1415 KASSERT(bp->b_flags & B_READ);
1416
1417 if (vp->v_type == VBLK || vp->v_type == VCHR)
1418 panic("lfs_strategy: spec");
1419 KASSERT(bp->b_bcount != 0);
1420 if (bp->b_blkno == bp->b_lblkno) {
1421 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
1422 NULL);
1423 if (error) {
1424 bp->b_error = error;
1425 bp->b_resid = bp->b_bcount;
1426 biodone(bp);
1427 return (error);
1428 }
1429 if ((long)bp->b_blkno == -1) /* no valid data */
1430 clrbuf(bp);
1431 }
1432 if ((long)bp->b_blkno < 0) { /* block is not on disk */
1433 bp->b_resid = bp->b_bcount;
1434 biodone(bp);
1435 return (0);
1436 }
1437
1438 slept = 1;
1439 loopcount = 0;
1440 mutex_enter(&lfs_lock);
1441 while (slept && fs->lfs_seglock) {
1442 mutex_exit(&lfs_lock);
1443 /*
1444 * Look through list of intervals.
1445 * There will only be intervals to look through
1446 * if the cleaner holds the seglock.
1447 * Since the cleaner is synchronous, we can trust
1448 * the list of intervals to be current.
1449 */
1450 tbn = LFS_DBTOFSB(fs, bp->b_blkno);
1451 sn = lfs_dtosn(fs, tbn);
1452 slept = 0;
1453 for (i = 0; i < fs->lfs_cleanind; i++) {
1454 if (sn == lfs_dtosn(fs, fs->lfs_cleanint[i]) &&
1455 tbn >= fs->lfs_cleanint[i]) {
1456 DLOG((DLOG_CLEAN,
1457 "lfs_strategy: ino %d lbn %" PRId64
1458 " ind %d sn %d fsb %" PRIx64
1459 " given sn %d fsb %" PRIx64 "\n",
1460 ip->i_number, bp->b_lblkno, i,
1461 lfs_dtosn(fs, fs->lfs_cleanint[i]),
1462 fs->lfs_cleanint[i], sn, tbn));
1463 DLOG((DLOG_CLEAN,
1464 "lfs_strategy: sleeping on ino %d lbn %"
1465 PRId64 "\n", ip->i_number, bp->b_lblkno));
1466 mutex_enter(&lfs_lock);
1467 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) {
1468 /*
1469 * Cleaner can't wait for itself.
1470 * Instead, wait for the blocks
1471 * to be written to disk.
1472 * XXX we need pribio in the test
1473 * XXX here.
1474 */
1475 mtsleep(&fs->lfs_iocount,
1476 (PRIBIO + 1) | PNORELOCK,
1477 "clean2", hz/10 + 1,
1478 &lfs_lock);
1479 slept = 1;
1480 ++loopcount;
1481 break;
1482 } else if (fs->lfs_seglock) {
1483 mtsleep(&fs->lfs_seglock,
1484 (PRIBIO + 1) | PNORELOCK,
1485 "clean1", 0,
1486 &lfs_lock);
1487 slept = 1;
1488 break;
1489 }
1490 mutex_exit(&lfs_lock);
1491 }
1492 }
1493 mutex_enter(&lfs_lock);
1494 if (loopcount > MAXLOOP) {
1495 printf("lfs_strategy: breaking out of clean2 loop\n");
1496 break;
1497 }
1498 }
1499 mutex_exit(&lfs_lock);
1500
1501 vp = ip->i_devvp;
1502 return VOP_STRATEGY(vp, bp);
1503 }
1504
1505 /*
1506 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops.
1507 * Technically this is a checkpoint (the on-disk state is valid)
1508 * even though we are leaving out all the file data.
1509 */
1510 int
1511 lfs_flush_dirops(struct lfs *fs)
1512 {
1513 struct inode *ip, *nip;
1514 struct vnode *vp;
1515 extern int lfs_dostats; /* XXX this does not belong here */
1516 struct segment *sp;
1517 SEGSUM *ssp;
1518 int flags = 0;
1519 int error = 0;
1520
1521 ASSERT_MAYBE_SEGLOCK(fs);
1522 KASSERT(fs->lfs_nadirop == 0);
1523
1524 if (fs->lfs_ronly)
1525 return EROFS;
1526
1527 mutex_enter(&lfs_lock);
1528 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) {
1529 mutex_exit(&lfs_lock);
1530 return 0;
1531 } else
1532 mutex_exit(&lfs_lock);
1533
1534 if (lfs_dostats)
1535 ++lfs_stats.flush_invoked;
1536
1537 lfs_imtime(fs);
1538 lfs_seglock(fs, flags);
1539 sp = fs->lfs_sp;
1540
1541 /*
1542 * lfs_writevnodes, optimized to get dirops out of the way.
1543 * Only write dirops, and don't flush files' pages, only
1544 * blocks from the directories.
1545 *
1546 * We don't need to vref these files because they are
1547 * dirops and so hold an extra reference until the
1548 * segunlock clears them of that status.
1549 *
1550 * We don't need to check for IN_ADIROP because we know that
1551 * no dirops are active.
1552 *
1553 */
1554 mutex_enter(&lfs_lock);
1555 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
1556 nip = TAILQ_NEXT(ip, i_lfs_dchain);
1557 mutex_exit(&lfs_lock);
1558 vp = ITOV(ip);
1559 mutex_enter(vp->v_interlock);
1560
1561 KASSERT((ip->i_flag & IN_ADIROP) == 0);
1562 KASSERT(vp->v_uflag & VU_DIROP);
1563 KASSERT(vdead_check(vp, VDEAD_NOWAIT) == 0);
1564
1565 /*
1566 * All writes to directories come from dirops; all
1567 * writes to files' direct blocks go through the page
1568 * cache, which we're not touching. Reads to files
1569 * and/or directories will not be affected by writing
1570 * directory blocks inodes and file inodes. So we don't
1571 * really need to lock.
1572 */
1573 if (vdead_check(vp, VDEAD_NOWAIT) != 0) {
1574 mutex_exit(vp->v_interlock);
1575 mutex_enter(&lfs_lock);
1576 continue;
1577 }
1578 mutex_exit(vp->v_interlock);
1579 /* XXX see below
1580 * waslocked = VOP_ISLOCKED(vp);
1581 */
1582 if (vp->v_type != VREG &&
1583 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) {
1584 error = lfs_writefile(fs, sp, vp);
1585 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1586 !(ip->i_flag & IN_ALLMOD)) {
1587 mutex_enter(&lfs_lock);
1588 LFS_SET_UINO(ip, IN_MODIFIED);
1589 mutex_exit(&lfs_lock);
1590 }
1591 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1592 mutex_enter(&lfs_lock);
1593 error = EAGAIN;
1594 break;
1595 }
1596 }
1597 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1598 error = lfs_writeinode(fs, sp, ip);
1599 mutex_enter(&lfs_lock);
1600 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1601 error = EAGAIN;
1602 break;
1603 }
1604
1605 /*
1606 * We might need to update these inodes again,
1607 * for example, if they have data blocks to write.
1608 * Make sure that after this flush, they are still
1609 * marked IN_MODIFIED so that we don't forget to
1610 * write them.
1611 */
1612 /* XXX only for non-directories? --KS */
1613 LFS_SET_UINO(ip, IN_MODIFIED);
1614 }
1615 mutex_exit(&lfs_lock);
1616 /* We've written all the dirops there are */
1617 ssp = (SEGSUM *)sp->segsum;
1618 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT));
1619 lfs_finalize_fs_seguse(fs);
1620 (void) lfs_writeseg(fs, sp);
1621 lfs_segunlock(fs);
1622
1623 return error;
1624 }
1625
1626 /*
1627 * Flush all vnodes for which the pagedaemon has requested pageouts.
1628 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop()
1629 * has just run, this would be an error). If we have to skip a vnode
1630 * for any reason, just skip it; if we have to wait for the cleaner,
1631 * abort. The writer daemon will call us again later.
1632 */
1633 int
1634 lfs_flush_pchain(struct lfs *fs)
1635 {
1636 struct inode *ip, *nip;
1637 struct vnode *vp;
1638 extern int lfs_dostats;
1639 struct segment *sp;
1640 int error, error2;
1641
1642 ASSERT_NO_SEGLOCK(fs);
1643
1644 if (fs->lfs_ronly)
1645 return EROFS;
1646
1647 mutex_enter(&lfs_lock);
1648 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) {
1649 mutex_exit(&lfs_lock);
1650 return 0;
1651 } else
1652 mutex_exit(&lfs_lock);
1653
1654 /* Get dirops out of the way */
1655 if ((error = lfs_flush_dirops(fs)) != 0)
1656 return error;
1657
1658 if (lfs_dostats)
1659 ++lfs_stats.flush_invoked;
1660
1661 /*
1662 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts.
1663 */
1664 lfs_imtime(fs);
1665 lfs_seglock(fs, 0);
1666 sp = fs->lfs_sp;
1667
1668 /*
1669 * lfs_writevnodes, optimized to clear pageout requests.
1670 * Only write non-dirop files that are in the pageout queue.
1671 * We're very conservative about what we write; we want to be
1672 * fast and async.
1673 */
1674 mutex_enter(&lfs_lock);
1675 top:
1676 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) {
1677 struct mount *mp = ITOV(ip)->v_mount;
1678 ino_t ino = ip->i_number;
1679
1680 nip = TAILQ_NEXT(ip, i_lfs_pchain);
1681
1682 if (!(ip->i_flags & IN_PAGING))
1683 goto top;
1684
1685 mutex_exit(&lfs_lock);
1686 if (vcache_get(mp, &ino, sizeof(ino), &vp) != 0) {
1687 mutex_enter(&lfs_lock);
1688 continue;
1689 };
1690 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1691 vrele(vp);
1692 mutex_enter(&lfs_lock);
1693 continue;
1694 }
1695 ip = VTOI(vp);
1696 mutex_enter(&lfs_lock);
1697 if ((vp->v_uflag & VU_DIROP) != 0 || vp->v_type != VREG ||
1698 !(ip->i_flags & IN_PAGING)) {
1699 mutex_exit(&lfs_lock);
1700 vput(vp);
1701 mutex_enter(&lfs_lock);
1702 goto top;
1703 }
1704 mutex_exit(&lfs_lock);
1705
1706 error = lfs_writefile(fs, sp, vp);
1707 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1708 !(ip->i_flag & IN_ALLMOD)) {
1709 mutex_enter(&lfs_lock);
1710 LFS_SET_UINO(ip, IN_MODIFIED);
1711 mutex_exit(&lfs_lock);
1712 }
1713 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1714 error2 = lfs_writeinode(fs, sp, ip);
1715
1716 VOP_UNLOCK(vp);
1717 vrele(vp);
1718
1719 if (error == EAGAIN || error2 == EAGAIN) {
1720 lfs_writeseg(fs, sp);
1721 mutex_enter(&lfs_lock);
1722 break;
1723 }
1724 mutex_enter(&lfs_lock);
1725 }
1726 mutex_exit(&lfs_lock);
1727 (void) lfs_writeseg(fs, sp);
1728 lfs_segunlock(fs);
1729
1730 return 0;
1731 }
1732
1733 /*
1734 * Conversion for compat.
1735 */
1736 static void
1737 block_info_from_70(BLOCK_INFO *bi, const BLOCK_INFO_70 *bi70)
1738 {
1739 bi->bi_inode = bi70->bi_inode;
1740 bi->bi_lbn = bi70->bi_lbn;
1741 bi->bi_daddr = bi70->bi_daddr;
1742 bi->bi_segcreate = bi70->bi_segcreate;
1743 bi->bi_version = bi70->bi_version;
1744 bi->bi_bp = bi70->bi_bp;
1745 bi->bi_size = bi70->bi_size;
1746 }
1747
1748 static void
1749 block_info_to_70(BLOCK_INFO_70 *bi70, const BLOCK_INFO *bi)
1750 {
1751 bi70->bi_inode = bi->bi_inode;
1752 bi70->bi_lbn = bi->bi_lbn;
1753 bi70->bi_daddr = bi->bi_daddr;
1754 bi70->bi_segcreate = bi->bi_segcreate;
1755 bi70->bi_version = bi->bi_version;
1756 bi70->bi_bp = bi->bi_bp;
1757 bi70->bi_size = bi->bi_size;
1758 }
1759
1760 /*
1761 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}.
1762 */
1763 int
1764 lfs_fcntl(void *v)
1765 {
1766 struct vop_fcntl_args /* {
1767 struct vnode *a_vp;
1768 u_int a_command;
1769 void * a_data;
1770 int a_fflag;
1771 kauth_cred_t a_cred;
1772 } */ *ap = v;
1773 struct timeval tv;
1774 struct timeval *tvp;
1775 BLOCK_INFO *blkiov;
1776 BLOCK_INFO_70 *blkiov70;
1777 CLEANERINFO *cip;
1778 SEGUSE *sup;
1779 int blkcnt, i, error;
1780 size_t fh_size;
1781 struct lfs_fcntl_markv blkvp;
1782 struct lfs_fcntl_markv_70 blkvp70;
1783 struct lwp *l;
1784 fsid_t *fsidp;
1785 struct lfs *fs;
1786 struct buf *bp;
1787 fhandle_t *fhp;
1788 daddr_t off;
1789 int oclean;
1790
1791 /* Only respect LFS fcntls on fs root or Ifile */
1792 if (VTOI(ap->a_vp)->i_number != ULFS_ROOTINO &&
1793 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) {
1794 return ulfs_fcntl(v);
1795 }
1796
1797 /* Avoid locking a draining lock */
1798 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) {
1799 return ESHUTDOWN;
1800 }
1801
1802 /* LFS control and monitoring fcntls are available only to root */
1803 l = curlwp;
1804 if (((ap->a_command & 0xff00) >> 8) == 'L' &&
1805 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
1806 KAUTH_REQ_SYSTEM_LFS_FCNTL, NULL, NULL, NULL)) != 0)
1807 return (error);
1808
1809 fs = VTOI(ap->a_vp)->i_lfs;
1810 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx;
1811
1812 error = 0;
1813 switch ((int)ap->a_command) {
1814 case LFCNSEGWAITALL_COMPAT_50:
1815 case LFCNSEGWAITALL_COMPAT:
1816 fsidp = NULL;
1817 /* FALLTHROUGH */
1818 case LFCNSEGWAIT_COMPAT_50:
1819 case LFCNSEGWAIT_COMPAT:
1820 {
1821 struct timeval50 *tvp50
1822 = (struct timeval50 *)ap->a_data;
1823 timeval50_to_timeval(tvp50, &tv);
1824 tvp = &tv;
1825 }
1826 goto segwait_common;
1827 case LFCNSEGWAITALL:
1828 fsidp = NULL;
1829 /* FALLTHROUGH */
1830 case LFCNSEGWAIT:
1831 tvp = (struct timeval *)ap->a_data;
1832 segwait_common:
1833 mutex_enter(&lfs_lock);
1834 ++fs->lfs_sleepers;
1835 mutex_exit(&lfs_lock);
1836
1837 error = lfs_segwait(fsidp, tvp);
1838
1839 mutex_enter(&lfs_lock);
1840 if (--fs->lfs_sleepers == 0)
1841 wakeup(&fs->lfs_sleepers);
1842 mutex_exit(&lfs_lock);
1843 return error;
1844
1845 case LFCNBMAPV_COMPAT_70:
1846 case LFCNMARKV_COMPAT_70:
1847 blkvp70 = *(struct lfs_fcntl_markv_70 *)ap->a_data;
1848
1849 blkcnt = blkvp70.blkcnt;
1850 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
1851 return (EINVAL);
1852 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
1853 blkiov70 = lfs_malloc(fs, sizeof(BLOCK_INFO_70), LFS_NB_BLKIOV);
1854 for (i = 0; i < blkcnt; i++) {
1855 error = copyin(&blkvp70.blkiov[i], blkiov70,
1856 sizeof(*blkiov70));
1857 if (error) {
1858 lfs_free(fs, blkiov70, LFS_NB_BLKIOV);
1859 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1860 return error;
1861 }
1862 block_info_from_70(&blkiov[i], blkiov70);
1863 }
1864
1865 mutex_enter(&lfs_lock);
1866 ++fs->lfs_sleepers;
1867 mutex_exit(&lfs_lock);
1868 if (ap->a_command == LFCNBMAPV)
1869 error = lfs_bmapv(l, fsidp, blkiov, blkcnt);
1870 else /* LFCNMARKV */
1871 error = lfs_markv(l, fsidp, blkiov, blkcnt);
1872 if (error == 0) {
1873 for (i = 0; i < blkcnt; i++) {
1874 block_info_to_70(blkiov70, &blkiov[i]);
1875 error = copyout(blkiov70, &blkvp70.blkiov[i],
1876 sizeof(*blkiov70));
1877 if (error) {
1878 break;
1879 }
1880 }
1881 }
1882 mutex_enter(&lfs_lock);
1883 if (--fs->lfs_sleepers == 0)
1884 wakeup(&fs->lfs_sleepers);
1885 mutex_exit(&lfs_lock);
1886 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1887 return error;
1888
1889 case LFCNBMAPV:
1890 case LFCNMARKV:
1891 blkvp = *(struct lfs_fcntl_markv *)ap->a_data;
1892
1893 blkcnt = blkvp.blkcnt;
1894 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
1895 return (EINVAL);
1896 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
1897 if ((error = copyin(blkvp.blkiov, blkiov,
1898 blkcnt * sizeof(BLOCK_INFO))) != 0) {
1899 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1900 return error;
1901 }
1902
1903 mutex_enter(&lfs_lock);
1904 ++fs->lfs_sleepers;
1905 mutex_exit(&lfs_lock);
1906 if (ap->a_command == LFCNBMAPV)
1907 error = lfs_bmapv(l, fsidp, blkiov, blkcnt);
1908 else /* LFCNMARKV */
1909 error = lfs_markv(l, fsidp, blkiov, blkcnt);
1910 if (error == 0)
1911 error = copyout(blkiov, blkvp.blkiov,
1912 blkcnt * sizeof(BLOCK_INFO));
1913 mutex_enter(&lfs_lock);
1914 if (--fs->lfs_sleepers == 0)
1915 wakeup(&fs->lfs_sleepers);
1916 mutex_exit(&lfs_lock);
1917 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1918 return error;
1919
1920 case LFCNRECLAIM:
1921 /*
1922 * Flush dirops and write Ifile, allowing empty segments
1923 * to be immediately reclaimed.
1924 */
1925 lfs_writer_enter(fs, "pndirop");
1926 off = lfs_sb_getoffset(fs);
1927 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP);
1928 lfs_flush_dirops(fs);
1929 LFS_CLEANERINFO(cip, fs, bp);
1930 oclean = lfs_ci_getclean(fs, cip);
1931 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
1932 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP);
1933 fs->lfs_sp->seg_flags |= SEGM_PROT;
1934 lfs_segunlock(fs);
1935 lfs_writer_leave(fs);
1936
1937 #ifdef DEBUG
1938 LFS_CLEANERINFO(cip, fs, bp);
1939 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64
1940 " blocks, cleaned %" PRId32 " segments (activesb %d)\n",
1941 lfs_sb_getoffset(fs) - off,
1942 lfs_ci_getclean(fs, cip) - oclean,
1943 fs->lfs_activesb));
1944 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
1945 #else
1946 __USE(oclean);
1947 __USE(off);
1948 #endif
1949
1950 return 0;
1951
1952 case LFCNIFILEFH_COMPAT:
1953 /* Return the filehandle of the Ifile */
1954 if ((error = kauth_authorize_system(l->l_cred,
1955 KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)) != 0)
1956 return (error);
1957 fhp = (struct fhandle *)ap->a_data;
1958 fhp->fh_fsid = *fsidp;
1959 fh_size = 16; /* former VFS_MAXFIDSIZ */
1960 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
1961
1962 case LFCNIFILEFH_COMPAT2:
1963 case LFCNIFILEFH:
1964 /* Return the filehandle of the Ifile */
1965 fhp = (struct fhandle *)ap->a_data;
1966 fhp->fh_fsid = *fsidp;
1967 fh_size = sizeof(struct lfs_fhandle) -
1968 offsetof(fhandle_t, fh_fid);
1969 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
1970
1971 case LFCNREWIND:
1972 /* Move lfs_offset to the lowest-numbered segment */
1973 return lfs_rewind(fs, *(int *)ap->a_data);
1974
1975 case LFCNINVAL:
1976 /* Mark a segment SEGUSE_INVAL */
1977 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp);
1978 if (sup->su_nbytes > 0) {
1979 brelse(bp, 0);
1980 lfs_unset_inval_all(fs);
1981 return EBUSY;
1982 }
1983 sup->su_flags |= SEGUSE_INVAL;
1984 VOP_BWRITE(bp->b_vp, bp);
1985 return 0;
1986
1987 case LFCNRESIZE:
1988 /* Resize the filesystem */
1989 return lfs_resize_fs(fs, *(int *)ap->a_data);
1990
1991 case LFCNWRAPSTOP:
1992 case LFCNWRAPSTOP_COMPAT:
1993 /*
1994 * Hold lfs_newseg at segment 0; if requested, sleep until
1995 * the filesystem wraps around. To support external agents
1996 * (dump, fsck-based regression test) that need to look at
1997 * a snapshot of the filesystem, without necessarily
1998 * requiring that all fs activity stops.
1999 */
2000 if (fs->lfs_stoplwp == curlwp)
2001 return EALREADY;
2002
2003 mutex_enter(&lfs_lock);
2004 while (fs->lfs_stoplwp != NULL)
2005 cv_wait(&fs->lfs_stopcv, &lfs_lock);
2006 fs->lfs_stoplwp = curlwp;
2007 if (fs->lfs_nowrap == 0)
2008 log(LOG_NOTICE, "%s: disabled log wrap\n",
2009 lfs_sb_getfsmnt(fs));
2010 ++fs->lfs_nowrap;
2011 if (*(int *)ap->a_data == 1
2012 || ap->a_command == LFCNWRAPSTOP_COMPAT) {
2013 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n");
2014 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
2015 "segwrap", 0, &lfs_lock);
2016 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n");
2017 if (error) {
2018 lfs_wrapgo(fs, VTOI(ap->a_vp), 0);
2019 }
2020 }
2021 mutex_exit(&lfs_lock);
2022 return 0;
2023
2024 case LFCNWRAPGO:
2025 case LFCNWRAPGO_COMPAT:
2026 /*
2027 * Having done its work, the agent wakes up the writer.
2028 * If the argument is 1, it sleeps until a new segment
2029 * is selected.
2030 */
2031 mutex_enter(&lfs_lock);
2032 error = lfs_wrapgo(fs, VTOI(ap->a_vp),
2033 ap->a_command == LFCNWRAPGO_COMPAT ? 1 :
2034 *((int *)ap->a_data));
2035 mutex_exit(&lfs_lock);
2036 return error;
2037
2038 case LFCNWRAPPASS:
2039 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT))
2040 return EALREADY;
2041 mutex_enter(&lfs_lock);
2042 if (fs->lfs_stoplwp != curlwp) {
2043 mutex_exit(&lfs_lock);
2044 return EALREADY;
2045 }
2046 if (fs->lfs_nowrap == 0) {
2047 mutex_exit(&lfs_lock);
2048 return EBUSY;
2049 }
2050 fs->lfs_wrappass = 1;
2051 wakeup(&fs->lfs_wrappass);
2052 /* Wait for the log to wrap, if asked */
2053 if (*(int *)ap->a_data) {
2054 vref(ap->a_vp);
2055 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT;
2056 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n");
2057 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
2058 "segwrap", 0, &lfs_lock);
2059 log(LOG_NOTICE, "LFCNPASS done waiting\n");
2060 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT;
2061 vrele(ap->a_vp);
2062 }
2063 mutex_exit(&lfs_lock);
2064 return error;
2065
2066 case LFCNWRAPSTATUS:
2067 mutex_enter(&lfs_lock);
2068 *(int *)ap->a_data = fs->lfs_wrapstatus;
2069 mutex_exit(&lfs_lock);
2070 return 0;
2071
2072 default:
2073 return ulfs_fcntl(v);
2074 }
2075 return 0;
2076 }
2077
2078 /*
2079 * Return the last logical file offset that should be written for this file
2080 * if we're doing a write that ends at "size". If writing, we need to know
2081 * about sizes on disk, i.e. fragments if there are any; if reading, we need
2082 * to know about entire blocks.
2083 */
2084 void
2085 lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
2086 {
2087 struct inode *ip = VTOI(vp);
2088 struct lfs *fs = ip->i_lfs;
2089 daddr_t olbn, nlbn;
2090
2091 olbn = lfs_lblkno(fs, ip->i_size);
2092 nlbn = lfs_lblkno(fs, size);
2093 if (!(flags & GOP_SIZE_MEM) && nlbn < ULFS_NDADDR && olbn <= nlbn) {
2094 *eobp = lfs_fragroundup(fs, size);
2095 } else {
2096 *eobp = lfs_blkroundup(fs, size);
2097 }
2098 }
2099
2100 #ifdef DEBUG
2101 void lfs_dump_vop(void *);
2102
2103 void
2104 lfs_dump_vop(void *v)
2105 {
2106 struct vop_putpages_args /* {
2107 struct vnode *a_vp;
2108 voff_t a_offlo;
2109 voff_t a_offhi;
2110 int a_flags;
2111 } */ *ap = v;
2112
2113 struct inode *ip = VTOI(ap->a_vp);
2114 struct lfs *fs = ip->i_lfs;
2115
2116 #ifdef DDB
2117 vfs_vnode_print(ap->a_vp, 0, printf);
2118 #endif
2119 lfs_dump_dinode(fs, ip->i_din);
2120 }
2121 #endif
2122
2123 int
2124 lfs_mmap(void *v)
2125 {
2126 struct vop_mmap_args /* {
2127 const struct vnodeop_desc *a_desc;
2128 struct vnode *a_vp;
2129 vm_prot_t a_prot;
2130 kauth_cred_t a_cred;
2131 } */ *ap = v;
2132
2133 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM)
2134 return EOPNOTSUPP;
2135 return ulfs_mmap(v);
2136 }
2137
2138 static int
2139 lfs_openextattr(void *v)
2140 {
2141 struct vop_openextattr_args /* {
2142 struct vnode *a_vp;
2143 kauth_cred_t a_cred;
2144 struct proc *a_p;
2145 } */ *ap = v;
2146 struct inode *ip = VTOI(ap->a_vp);
2147 struct ulfsmount *ump = ip->i_ump;
2148 //struct lfs *fs = ip->i_lfs;
2149
2150 /* Not supported for ULFS1 file systems. */
2151 if (ump->um_fstype == ULFS1)
2152 return (EOPNOTSUPP);
2153
2154 /* XXX Not implemented for ULFS2 file systems. */
2155 return (EOPNOTSUPP);
2156 }
2157
2158 static int
2159 lfs_closeextattr(void *v)
2160 {
2161 struct vop_closeextattr_args /* {
2162 struct vnode *a_vp;
2163 int a_commit;
2164 kauth_cred_t a_cred;
2165 struct proc *a_p;
2166 } */ *ap = v;
2167 struct inode *ip = VTOI(ap->a_vp);
2168 struct ulfsmount *ump = ip->i_ump;
2169 //struct lfs *fs = ip->i_lfs;
2170
2171 /* Not supported for ULFS1 file systems. */
2172 if (ump->um_fstype == ULFS1)
2173 return (EOPNOTSUPP);
2174
2175 /* XXX Not implemented for ULFS2 file systems. */
2176 return (EOPNOTSUPP);
2177 }
2178
2179 static int
2180 lfs_getextattr(void *v)
2181 {
2182 struct vop_getextattr_args /* {
2183 struct vnode *a_vp;
2184 int a_attrnamespace;
2185 const char *a_name;
2186 struct uio *a_uio;
2187 size_t *a_size;
2188 kauth_cred_t a_cred;
2189 struct proc *a_p;
2190 } */ *ap = v;
2191 struct vnode *vp = ap->a_vp;
2192 struct inode *ip = VTOI(vp);
2193 struct ulfsmount *ump = ip->i_ump;
2194 //struct lfs *fs = ip->i_lfs;
2195 int error;
2196
2197 if (ump->um_fstype == ULFS1) {
2198 #ifdef LFS_EXTATTR
2199 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2200 error = ulfs_getextattr(ap);
2201 fstrans_done(vp->v_mount);
2202 #else
2203 error = EOPNOTSUPP;
2204 #endif
2205 return error;
2206 }
2207
2208 /* XXX Not implemented for ULFS2 file systems. */
2209 return (EOPNOTSUPP);
2210 }
2211
2212 static int
2213 lfs_setextattr(void *v)
2214 {
2215 struct vop_setextattr_args /* {
2216 struct vnode *a_vp;
2217 int a_attrnamespace;
2218 const char *a_name;
2219 struct uio *a_uio;
2220 kauth_cred_t a_cred;
2221 struct proc *a_p;
2222 } */ *ap = v;
2223 struct vnode *vp = ap->a_vp;
2224 struct inode *ip = VTOI(vp);
2225 struct ulfsmount *ump = ip->i_ump;
2226 //struct lfs *fs = ip->i_lfs;
2227 int error;
2228
2229 if (ump->um_fstype == ULFS1) {
2230 #ifdef LFS_EXTATTR
2231 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2232 error = ulfs_setextattr(ap);
2233 fstrans_done(vp->v_mount);
2234 #else
2235 error = EOPNOTSUPP;
2236 #endif
2237 return error;
2238 }
2239
2240 /* XXX Not implemented for ULFS2 file systems. */
2241 return (EOPNOTSUPP);
2242 }
2243
2244 static int
2245 lfs_listextattr(void *v)
2246 {
2247 struct vop_listextattr_args /* {
2248 struct vnode *a_vp;
2249 int a_attrnamespace;
2250 struct uio *a_uio;
2251 size_t *a_size;
2252 kauth_cred_t a_cred;
2253 struct proc *a_p;
2254 } */ *ap = v;
2255 struct vnode *vp = ap->a_vp;
2256 struct inode *ip = VTOI(vp);
2257 struct ulfsmount *ump = ip->i_ump;
2258 //struct lfs *fs = ip->i_lfs;
2259 int error;
2260
2261 if (ump->um_fstype == ULFS1) {
2262 #ifdef LFS_EXTATTR
2263 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2264 error = ulfs_listextattr(ap);
2265 fstrans_done(vp->v_mount);
2266 #else
2267 error = EOPNOTSUPP;
2268 #endif
2269 return error;
2270 }
2271
2272 /* XXX Not implemented for ULFS2 file systems. */
2273 return (EOPNOTSUPP);
2274 }
2275
2276 static int
2277 lfs_deleteextattr(void *v)
2278 {
2279 struct vop_deleteextattr_args /* {
2280 struct vnode *a_vp;
2281 int a_attrnamespace;
2282 kauth_cred_t a_cred;
2283 struct proc *a_p;
2284 } */ *ap = v;
2285 struct vnode *vp = ap->a_vp;
2286 struct inode *ip = VTOI(vp);
2287 struct ulfsmount *ump = ip->i_ump;
2288 //struct fs *fs = ip->i_lfs;
2289 int error;
2290
2291 if (ump->um_fstype == ULFS1) {
2292 #ifdef LFS_EXTATTR
2293 fstrans_start(vp->v_mount, FSTRANS_SHARED);
2294 error = ulfs_deleteextattr(ap);
2295 fstrans_done(vp->v_mount);
2296 #else
2297 error = EOPNOTSUPP;
2298 #endif
2299 return error;
2300 }
2301
2302 /* XXX Not implemented for ULFS2 file systems. */
2303 return (EOPNOTSUPP);
2304 }
2305