lfs_vnops.c revision 1.308 1 /* $NetBSD: lfs_vnops.c,v 1.308 2017/04/01 01:50:02 maya Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31 /*
32 * Copyright (c) 1986, 1989, 1991, 1993, 1995
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95
60 */
61
62 /* from NetBSD: ufs_vnops.c,v 1.232 2016/05/19 18:32:03 riastradh Exp */
63 /*-
64 * Copyright (c) 2008 The NetBSD Foundation, Inc.
65 * All rights reserved.
66 *
67 * This code is derived from software contributed to The NetBSD Foundation
68 * by Wasabi Systems, Inc.
69 *
70 * Redistribution and use in source and binary forms, with or without
71 * modification, are permitted provided that the following conditions
72 * are met:
73 * 1. Redistributions of source code must retain the above copyright
74 * notice, this list of conditions and the following disclaimer.
75 * 2. Redistributions in binary form must reproduce the above copyright
76 * notice, this list of conditions and the following disclaimer in the
77 * documentation and/or other materials provided with the distribution.
78 *
79 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
80 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
81 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
82 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
83 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
84 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
85 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
86 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
87 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
88 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
89 * POSSIBILITY OF SUCH DAMAGE.
90 */
91 /*
92 * Copyright (c) 1982, 1986, 1989, 1993, 1995
93 * The Regents of the University of California. All rights reserved.
94 * (c) UNIX System Laboratories, Inc.
95 * All or some portions of this file are derived from material licensed
96 * to the University of California by American Telephone and Telegraph
97 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
98 * the permission of UNIX System Laboratories, Inc.
99 *
100 * Redistribution and use in source and binary forms, with or without
101 * modification, are permitted provided that the following conditions
102 * are met:
103 * 1. Redistributions of source code must retain the above copyright
104 * notice, this list of conditions and the following disclaimer.
105 * 2. Redistributions in binary form must reproduce the above copyright
106 * notice, this list of conditions and the following disclaimer in the
107 * documentation and/or other materials provided with the distribution.
108 * 3. Neither the name of the University nor the names of its contributors
109 * may be used to endorse or promote products derived from this software
110 * without specific prior written permission.
111 *
112 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
113 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
114 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
115 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
116 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
117 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
118 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
119 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
120 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
121 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
122 * SUCH DAMAGE.
123 *
124 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95
125 */
126
127 #include <sys/cdefs.h>
128 __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.308 2017/04/01 01:50:02 maya Exp $");
129
130 #ifdef _KERNEL_OPT
131 #include "opt_compat_netbsd.h"
132 #include "opt_uvm_page_trkown.h"
133 #endif
134
135 #include <sys/param.h>
136 #include <sys/systm.h>
137 #include <sys/namei.h>
138 #include <sys/resourcevar.h>
139 #include <sys/kernel.h>
140 #include <sys/file.h>
141 #include <sys/stat.h>
142 #include <sys/buf.h>
143 #include <sys/proc.h>
144 #include <sys/mount.h>
145 #include <sys/vnode.h>
146 #include <sys/pool.h>
147 #include <sys/signalvar.h>
148 #include <sys/kauth.h>
149 #include <sys/syslog.h>
150
151 #include <miscfs/fifofs/fifo.h>
152 #include <miscfs/genfs/genfs.h>
153 #include <miscfs/specfs/specdev.h>
154
155 #include <ufs/lfs/ulfs_inode.h>
156 #include <ufs/lfs/ulfsmount.h>
157 #include <ufs/lfs/ulfs_bswap.h>
158 #include <ufs/lfs/ulfs_extern.h>
159
160 #include <uvm/uvm.h>
161 #include <uvm/uvm_pmap.h>
162 #include <uvm/uvm_stat.h>
163 #include <uvm/uvm_pager.h>
164
165 #include <ufs/lfs/lfs.h>
166 #include <ufs/lfs/lfs_accessors.h>
167 #include <ufs/lfs/lfs_kernel.h>
168 #include <ufs/lfs/lfs_extern.h>
169
170 extern pid_t lfs_writer_daemon;
171 int lfs_ignore_lazy_sync = 1;
172
173 static int lfs_openextattr(void *v);
174 static int lfs_closeextattr(void *v);
175 static int lfs_getextattr(void *v);
176 static int lfs_setextattr(void *v);
177 static int lfs_listextattr(void *v);
178 static int lfs_deleteextattr(void *v);
179
180 static int lfs_makeinode(struct vattr *vap, struct vnode *,
181 const struct ulfs_lookup_results *,
182 struct vnode **, struct componentname *);
183
184 /* Global vfs data structures for lfs. */
185 int (**lfs_vnodeop_p)(void *);
186 const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
187 { &vop_default_desc, vn_default_error },
188 { &vop_lookup_desc, ulfs_lookup }, /* lookup */
189 { &vop_create_desc, lfs_create }, /* create */
190 { &vop_whiteout_desc, ulfs_whiteout }, /* whiteout */
191 { &vop_mknod_desc, lfs_mknod }, /* mknod */
192 { &vop_open_desc, ulfs_open }, /* open */
193 { &vop_close_desc, lfs_close }, /* close */
194 { &vop_access_desc, ulfs_access }, /* access */
195 { &vop_getattr_desc, lfs_getattr }, /* getattr */
196 { &vop_setattr_desc, lfs_setattr }, /* setattr */
197 { &vop_read_desc, lfs_read }, /* read */
198 { &vop_write_desc, lfs_write }, /* write */
199 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */
200 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */
201 { &vop_ioctl_desc, ulfs_ioctl }, /* ioctl */
202 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */
203 { &vop_poll_desc, ulfs_poll }, /* poll */
204 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */
205 { &vop_revoke_desc, ulfs_revoke }, /* revoke */
206 { &vop_mmap_desc, lfs_mmap }, /* mmap */
207 { &vop_fsync_desc, lfs_fsync }, /* fsync */
208 { &vop_seek_desc, ulfs_seek }, /* seek */
209 { &vop_remove_desc, lfs_remove }, /* remove */
210 { &vop_link_desc, lfs_link }, /* link */
211 { &vop_rename_desc, lfs_rename }, /* rename */
212 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */
213 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */
214 { &vop_symlink_desc, lfs_symlink }, /* symlink */
215 { &vop_readdir_desc, ulfs_readdir }, /* readdir */
216 { &vop_readlink_desc, ulfs_readlink }, /* readlink */
217 { &vop_abortop_desc, ulfs_abortop }, /* abortop */
218 { &vop_inactive_desc, lfs_inactive }, /* inactive */
219 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
220 { &vop_lock_desc, ulfs_lock }, /* lock */
221 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
222 { &vop_bmap_desc, ulfs_bmap }, /* bmap */
223 { &vop_strategy_desc, lfs_strategy }, /* strategy */
224 { &vop_print_desc, ulfs_print }, /* print */
225 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
226 { &vop_pathconf_desc, ulfs_pathconf }, /* pathconf */
227 { &vop_advlock_desc, ulfs_advlock }, /* advlock */
228 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
229 { &vop_getpages_desc, lfs_getpages }, /* getpages */
230 { &vop_putpages_desc, lfs_putpages }, /* putpages */
231 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
232 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
233 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
234 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
235 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
236 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
237 { NULL, NULL }
238 };
239 const struct vnodeopv_desc lfs_vnodeop_opv_desc =
240 { &lfs_vnodeop_p, lfs_vnodeop_entries };
241
242 int (**lfs_specop_p)(void *);
243 const struct vnodeopv_entry_desc lfs_specop_entries[] = {
244 { &vop_default_desc, vn_default_error },
245 { &vop_lookup_desc, spec_lookup }, /* lookup */
246 { &vop_create_desc, spec_create }, /* create */
247 { &vop_mknod_desc, spec_mknod }, /* mknod */
248 { &vop_open_desc, spec_open }, /* open */
249 { &vop_close_desc, lfsspec_close }, /* close */
250 { &vop_access_desc, ulfs_access }, /* access */
251 { &vop_getattr_desc, lfs_getattr }, /* getattr */
252 { &vop_setattr_desc, lfs_setattr }, /* setattr */
253 { &vop_read_desc, ulfsspec_read }, /* read */
254 { &vop_write_desc, ulfsspec_write }, /* write */
255 { &vop_fallocate_desc, spec_fallocate }, /* fallocate */
256 { &vop_fdiscard_desc, spec_fdiscard }, /* fdiscard */
257 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
258 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
259 { &vop_poll_desc, spec_poll }, /* poll */
260 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */
261 { &vop_revoke_desc, spec_revoke }, /* revoke */
262 { &vop_mmap_desc, spec_mmap }, /* mmap */
263 { &vop_fsync_desc, spec_fsync }, /* fsync */
264 { &vop_seek_desc, spec_seek }, /* seek */
265 { &vop_remove_desc, spec_remove }, /* remove */
266 { &vop_link_desc, spec_link }, /* link */
267 { &vop_rename_desc, spec_rename }, /* rename */
268 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
269 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
270 { &vop_symlink_desc, spec_symlink }, /* symlink */
271 { &vop_readdir_desc, spec_readdir }, /* readdir */
272 { &vop_readlink_desc, spec_readlink }, /* readlink */
273 { &vop_abortop_desc, spec_abortop }, /* abortop */
274 { &vop_inactive_desc, lfs_inactive }, /* inactive */
275 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
276 { &vop_lock_desc, ulfs_lock }, /* lock */
277 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
278 { &vop_bmap_desc, spec_bmap }, /* bmap */
279 { &vop_strategy_desc, spec_strategy }, /* strategy */
280 { &vop_print_desc, ulfs_print }, /* print */
281 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
282 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
283 { &vop_advlock_desc, spec_advlock }, /* advlock */
284 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */
285 { &vop_getpages_desc, spec_getpages }, /* getpages */
286 { &vop_putpages_desc, spec_putpages }, /* putpages */
287 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
288 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
289 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
290 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
291 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
292 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
293 { NULL, NULL }
294 };
295 const struct vnodeopv_desc lfs_specop_opv_desc =
296 { &lfs_specop_p, lfs_specop_entries };
297
298 int (**lfs_fifoop_p)(void *);
299 const struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
300 { &vop_default_desc, vn_default_error },
301 { &vop_lookup_desc, vn_fifo_bypass }, /* lookup */
302 { &vop_create_desc, vn_fifo_bypass }, /* create */
303 { &vop_mknod_desc, vn_fifo_bypass }, /* mknod */
304 { &vop_open_desc, vn_fifo_bypass }, /* open */
305 { &vop_close_desc, lfsfifo_close }, /* close */
306 { &vop_access_desc, ulfs_access }, /* access */
307 { &vop_getattr_desc, lfs_getattr }, /* getattr */
308 { &vop_setattr_desc, lfs_setattr }, /* setattr */
309 { &vop_read_desc, ulfsfifo_read }, /* read */
310 { &vop_write_desc, ulfsfifo_write }, /* write */
311 { &vop_fallocate_desc, vn_fifo_bypass }, /* fallocate */
312 { &vop_fdiscard_desc, vn_fifo_bypass }, /* fdiscard */
313 { &vop_ioctl_desc, vn_fifo_bypass }, /* ioctl */
314 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */
315 { &vop_poll_desc, vn_fifo_bypass }, /* poll */
316 { &vop_kqfilter_desc, vn_fifo_bypass }, /* kqfilter */
317 { &vop_revoke_desc, vn_fifo_bypass }, /* revoke */
318 { &vop_mmap_desc, vn_fifo_bypass }, /* mmap */
319 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */
320 { &vop_seek_desc, vn_fifo_bypass }, /* seek */
321 { &vop_remove_desc, vn_fifo_bypass }, /* remove */
322 { &vop_link_desc, vn_fifo_bypass }, /* link */
323 { &vop_rename_desc, vn_fifo_bypass }, /* rename */
324 { &vop_mkdir_desc, vn_fifo_bypass }, /* mkdir */
325 { &vop_rmdir_desc, vn_fifo_bypass }, /* rmdir */
326 { &vop_symlink_desc, vn_fifo_bypass }, /* symlink */
327 { &vop_readdir_desc, vn_fifo_bypass }, /* readdir */
328 { &vop_readlink_desc, vn_fifo_bypass }, /* readlink */
329 { &vop_abortop_desc, vn_fifo_bypass }, /* abortop */
330 { &vop_inactive_desc, lfs_inactive }, /* inactive */
331 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */
332 { &vop_lock_desc, ulfs_lock }, /* lock */
333 { &vop_unlock_desc, ulfs_unlock }, /* unlock */
334 { &vop_bmap_desc, vn_fifo_bypass }, /* bmap */
335 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */
336 { &vop_print_desc, ulfs_print }, /* print */
337 { &vop_islocked_desc, ulfs_islocked }, /* islocked */
338 { &vop_pathconf_desc, vn_fifo_bypass }, /* pathconf */
339 { &vop_advlock_desc, vn_fifo_bypass }, /* advlock */
340 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */
341 { &vop_putpages_desc, vn_fifo_bypass }, /* putpages */
342 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */
343 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */
344 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */
345 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */
346 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */
347 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */
348 { NULL, NULL }
349 };
350 const struct vnodeopv_desc lfs_fifoop_opv_desc =
351 { &lfs_fifoop_p, lfs_fifoop_entries };
352
353 #define LFS_READWRITE
354 #include <ufs/lfs/ulfs_readwrite.c>
355 #undef LFS_READWRITE
356
357 /*
358 * Allocate a new inode.
359 */
360 static int
361 lfs_makeinode(struct vattr *vap, struct vnode *dvp,
362 const struct ulfs_lookup_results *ulr,
363 struct vnode **vpp, struct componentname *cnp)
364 {
365 struct inode *ip;
366 struct vnode *tvp;
367 int error;
368
369 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, &tvp);
370 if (error)
371 return error;
372 error = vn_lock(tvp, LK_EXCLUSIVE);
373 if (error) {
374 vrele(tvp);
375 return error;
376 }
377 lfs_mark_vnode(tvp);
378 *vpp = tvp;
379 ip = VTOI(tvp);
380 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
381 ip->i_nlink = 1;
382 DIP_ASSIGN(ip, nlink, 1);
383
384 /* Authorize setting SGID if needed. */
385 if (ip->i_mode & ISGID) {
386 error = kauth_authorize_vnode(cnp->cn_cred, KAUTH_VNODE_WRITE_SECURITY,
387 tvp, NULL, genfs_can_chmod(tvp->v_type, cnp->cn_cred, ip->i_uid,
388 ip->i_gid, MAKEIMODE(vap->va_type, vap->va_mode)));
389 if (error) {
390 ip->i_mode &= ~ISGID;
391 DIP_ASSIGN(ip, mode, ip->i_mode);
392 }
393 }
394
395 if (cnp->cn_flags & ISWHITEOUT) {
396 ip->i_flags |= UF_OPAQUE;
397 DIP_ASSIGN(ip, flags, ip->i_flags);
398 }
399
400 /*
401 * Make sure inode goes to disk before directory entry.
402 */
403 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0)
404 goto bad;
405 error = ulfs_direnter(dvp, ulr, tvp,
406 cnp, ip->i_number, LFS_IFTODT(ip->i_mode), NULL);
407 if (error)
408 goto bad;
409 *vpp = tvp;
410 return (0);
411
412 bad:
413 /*
414 * Write error occurred trying to update the inode
415 * or the directory so must deallocate the inode.
416 */
417 ip->i_nlink = 0;
418 DIP_ASSIGN(ip, nlink, 0);
419 ip->i_flag |= IN_CHANGE;
420 /* If IN_ADIROP, account for it */
421 lfs_unmark_vnode(tvp);
422 vput(tvp);
423 return (error);
424 }
425
426 /*
427 * Synch an open file.
428 */
429 /* ARGSUSED */
430 int
431 lfs_fsync(void *v)
432 {
433 struct vop_fsync_args /* {
434 struct vnode *a_vp;
435 kauth_cred_t a_cred;
436 int a_flags;
437 off_t offlo;
438 off_t offhi;
439 } */ *ap = v;
440 struct vnode *vp = ap->a_vp;
441 int error, wait;
442 struct inode *ip = VTOI(vp);
443 struct lfs *fs = ip->i_lfs;
444
445 /* If we're mounted read-only, don't try to sync. */
446 if (fs->lfs_ronly)
447 return 0;
448
449 /* If a removed vnode is being cleaned, no need to sync here. */
450 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0)
451 return 0;
452
453 /*
454 * Trickle sync simply adds this vnode to the pager list, as if
455 * the pagedaemon had requested a pageout.
456 */
457 if (ap->a_flags & FSYNC_LAZY) {
458 if (lfs_ignore_lazy_sync == 0) {
459 mutex_enter(&lfs_lock);
460 if (!(ip->i_flags & IN_PAGING)) {
461 ip->i_flags |= IN_PAGING;
462 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip,
463 i_lfs_pchain);
464 }
465 wakeup(&lfs_writer_daemon);
466 mutex_exit(&lfs_lock);
467 }
468 return 0;
469 }
470
471 /*
472 * If a vnode is bring cleaned, flush it out before we try to
473 * reuse it. This prevents the cleaner from writing files twice
474 * in the same partial segment, causing an accounting underflow.
475 */
476 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) {
477 lfs_vflush(vp);
478 }
479
480 wait = (ap->a_flags & FSYNC_WAIT);
481 do {
482 mutex_enter(vp->v_interlock);
483 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
484 round_page(ap->a_offhi),
485 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0));
486 if (error == EAGAIN) {
487 mutex_enter(&lfs_lock);
488 mtsleep(&fs->lfs_availsleep, PCATCH | PUSER,
489 "lfs_fsync", hz / 100 + 1, &lfs_lock);
490 mutex_exit(&lfs_lock);
491 }
492 } while (error == EAGAIN);
493 if (error)
494 return error;
495
496 if ((ap->a_flags & FSYNC_DATAONLY) == 0)
497 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0);
498
499 if (error == 0 && ap->a_flags & FSYNC_CACHE) {
500 int l = 0;
501 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE,
502 curlwp->l_cred);
503 }
504 if (wait && !VPISEMPTY(vp))
505 LFS_SET_UINO(ip, IN_MODIFIED);
506
507 return error;
508 }
509
510 /*
511 * Take IN_ADIROP off, then call ulfs_inactive.
512 */
513 int
514 lfs_inactive(void *v)
515 {
516 struct vop_inactive_args /* {
517 struct vnode *a_vp;
518 } */ *ap = v;
519
520 lfs_unmark_vnode(ap->a_vp);
521
522 /*
523 * The Ifile is only ever inactivated on unmount.
524 * Streamline this process by not giving it more dirty blocks.
525 */
526 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) {
527 mutex_enter(&lfs_lock);
528 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD);
529 mutex_exit(&lfs_lock);
530 VOP_UNLOCK(ap->a_vp);
531 return 0;
532 }
533
534 #ifdef DEBUG
535 /*
536 * This might happen on unmount.
537 * XXX If it happens at any other time, it should be a panic.
538 */
539 if (ap->a_vp->v_uflag & VU_DIROP) {
540 struct inode *ip = VTOI(ap->a_vp);
541 printf("lfs_inactive: inactivating VU_DIROP? ino = %llu\n",
542 (unsigned long long) ip->i_number);
543 }
544 #endif /* DIAGNOSTIC */
545
546 return ulfs_inactive(v);
547 }
548
549 int
550 lfs_set_dirop(struct vnode *dvp, struct vnode *vp)
551 {
552 struct lfs *fs;
553 int error;
554
555 KASSERT(VOP_ISLOCKED(dvp));
556 KASSERT(vp == NULL || VOP_ISLOCKED(vp));
557
558 fs = VTOI(dvp)->i_lfs;
559
560 ASSERT_NO_SEGLOCK(fs);
561 /*
562 * LFS_NRESERVE calculates direct and indirect blocks as well
563 * as an inode block; an overestimate in most cases.
564 */
565 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0)
566 return (error);
567
568 restart:
569 mutex_enter(&lfs_lock);
570 if (fs->lfs_dirops == 0) {
571 mutex_exit(&lfs_lock);
572 lfs_check(dvp, LFS_UNUSED_LBN, 0);
573 mutex_enter(&lfs_lock);
574 }
575 while (fs->lfs_writer) {
576 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH,
577 "lfs_sdirop", 0, &lfs_lock);
578 if (error == EINTR) {
579 mutex_exit(&lfs_lock);
580 goto unreserve;
581 }
582 }
583 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) {
584 wakeup(&lfs_writer_daemon);
585 mutex_exit(&lfs_lock);
586 preempt();
587 goto restart;
588 }
589
590 if (lfs_dirvcount > LFS_MAX_DIROP) {
591 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, "
592 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount));
593 if ((error = mtsleep(&lfs_dirvcount,
594 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0,
595 &lfs_lock)) != 0) {
596 mutex_exit(&lfs_lock);
597 goto unreserve;
598 }
599 mutex_exit(&lfs_lock);
600 goto restart;
601 }
602
603 ++fs->lfs_dirops;
604 /* fs->lfs_doifile = 1; */ /* XXX why? --ks */
605 mutex_exit(&lfs_lock);
606
607 /* Hold a reference so SET_ENDOP will be happy */
608 vref(dvp);
609 if (vp) {
610 vref(vp);
611 MARK_VNODE(vp);
612 }
613
614 MARK_VNODE(dvp);
615 return 0;
616
617 unreserve:
618 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs));
619 return error;
620 }
621
622 /*
623 * Opposite of lfs_set_dirop... mostly. For now at least must call
624 * UNMARK_VNODE(dvp) explicitly first. (XXX: clean that up)
625 */
626 void
627 lfs_unset_dirop(struct lfs *fs, struct vnode *dvp, const char *str)
628 {
629 mutex_enter(&lfs_lock);
630 --fs->lfs_dirops;
631 if (!fs->lfs_dirops) {
632 if (fs->lfs_nadirop) {
633 panic("lfs_unset_dirop: %s: no dirops but "
634 " nadirop=%d", str,
635 fs->lfs_nadirop);
636 }
637 wakeup(&fs->lfs_writer);
638 mutex_exit(&lfs_lock);
639 lfs_check(dvp, LFS_UNUSED_LBN, 0);
640 } else {
641 mutex_exit(&lfs_lock);
642 }
643 lfs_reserve(fs, dvp, NULL, -LFS_NRESERVE(fs));
644 }
645
646 void
647 lfs_mark_vnode(struct vnode *vp)
648 {
649 struct inode *ip = VTOI(vp);
650 struct lfs *fs = ip->i_lfs;
651
652 mutex_enter(&lfs_lock);
653 if (!(ip->i_flag & IN_ADIROP)) {
654 if (!(vp->v_uflag & VU_DIROP)) {
655 mutex_exit(&lfs_lock);
656 vref(vp);
657 mutex_enter(&lfs_lock);
658 ++lfs_dirvcount;
659 ++fs->lfs_dirvcount;
660 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain);
661 vp->v_uflag |= VU_DIROP;
662 }
663 ++fs->lfs_nadirop;
664 ip->i_flag &= ~IN_CDIROP;
665 ip->i_flag |= IN_ADIROP;
666 } else
667 KASSERT(vp->v_uflag & VU_DIROP);
668 mutex_exit(&lfs_lock);
669 }
670
671 void
672 lfs_unmark_vnode(struct vnode *vp)
673 {
674 struct inode *ip = VTOI(vp);
675
676 mutex_enter(&lfs_lock);
677 if (ip && (ip->i_flag & IN_ADIROP)) {
678 KASSERT(vp->v_uflag & VU_DIROP);
679 --ip->i_lfs->lfs_nadirop;
680 ip->i_flag &= ~IN_ADIROP;
681 }
682 mutex_exit(&lfs_lock);
683 }
684
685 int
686 lfs_symlink(void *v)
687 {
688 struct vop_symlink_v3_args /* {
689 struct vnode *a_dvp;
690 struct vnode **a_vpp;
691 struct componentname *a_cnp;
692 struct vattr *a_vap;
693 char *a_target;
694 } */ *ap = v;
695 struct lfs *fs;
696 struct vnode *dvp, **vpp;
697 struct inode *ip;
698 struct ulfs_lookup_results *ulr;
699 ssize_t len; /* XXX should be size_t */
700 int error;
701
702 dvp = ap->a_dvp;
703 vpp = ap->a_vpp;
704
705 KASSERT(vpp != NULL);
706 KASSERT(*vpp == NULL);
707 KASSERT(ap->a_vap->va_type == VLNK);
708
709 /* XXX should handle this material another way */
710 ulr = &VTOI(ap->a_dvp)->i_crap;
711 ULFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
712
713 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
714 ASSERT_NO_SEGLOCK(fs);
715 if (fs->lfs_ronly) {
716 return EROFS;
717 }
718
719 error = lfs_set_dirop(dvp, NULL);
720 if (error)
721 return error;
722
723 error = lfs_makeinode(ap->a_vap, dvp, ulr, vpp, ap->a_cnp);
724 if (error) {
725 goto out;
726 }
727
728 VN_KNOTE(ap->a_dvp, NOTE_WRITE);
729 ip = VTOI(*vpp);
730
731 /*
732 * This test is off by one. um_maxsymlinklen contains the
733 * number of bytes available, and we aren't storing a \0, so
734 * the test should properly be <=. However, it cannot be
735 * changed as this would break compatibility with existing fs
736 * images -- see the way ulfs_readlink() works.
737 */
738 len = strlen(ap->a_target);
739 if (len < ip->i_lfs->um_maxsymlinklen) {
740 memcpy((char *)SHORTLINK(ip), ap->a_target, len);
741 ip->i_size = len;
742 DIP_ASSIGN(ip, size, len);
743 uvm_vnp_setsize(*vpp, ip->i_size);
744 ip->i_flag |= IN_CHANGE | IN_UPDATE;
745 if ((*vpp)->v_mount->mnt_flag & MNT_RELATIME)
746 ip->i_flag |= IN_ACCESS;
747 } else {
748 error = ulfs_bufio(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0,
749 IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL,
750 NULL);
751 }
752
753 VOP_UNLOCK(*vpp);
754 if (error)
755 vrele(*vpp);
756
757 out:
758 UNMARK_VNODE(dvp);
759 /* XXX: is it even possible for the symlink to get MARK'd? */
760 UNMARK_VNODE(*vpp);
761 if (error) {
762 *vpp = NULL;
763 }
764 lfs_unset_dirop(fs, dvp, "symlink");
765
766 vrele(dvp);
767 return (error);
768 }
769
770 int
771 lfs_mknod(void *v)
772 {
773 struct vop_mknod_v3_args /* {
774 struct vnode *a_dvp;
775 struct vnode **a_vpp;
776 struct componentname *a_cnp;
777 struct vattr *a_vap;
778 } */ *ap = v;
779 struct lfs *fs;
780 struct vnode *dvp, **vpp;
781 struct vattr *vap;
782 struct inode *ip;
783 int error;
784 ino_t ino;
785 struct ulfs_lookup_results *ulr;
786
787 dvp = ap->a_dvp;
788 vpp = ap->a_vpp;
789 vap = ap->a_vap;
790
791 KASSERT(vpp != NULL);
792 KASSERT(*vpp == NULL);
793
794 /* XXX should handle this material another way */
795 ulr = &VTOI(dvp)->i_crap;
796 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
797
798 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
799 ASSERT_NO_SEGLOCK(fs);
800 if (fs->lfs_ronly) {
801 return EROFS;
802 }
803
804 error = lfs_set_dirop(dvp, NULL);
805 if (error)
806 return error;
807
808 error = lfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
809
810 /* Either way we're done with the dirop at this point */
811 UNMARK_VNODE(dvp);
812 UNMARK_VNODE(*vpp);
813 lfs_unset_dirop(fs, dvp, "mknod");
814
815 if (error) {
816 vrele(dvp);
817 *vpp = NULL;
818 return (error);
819 }
820
821 VN_KNOTE(dvp, NOTE_WRITE);
822 ip = VTOI(*vpp);
823 ino = ip->i_number;
824 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
825
826 /*
827 * Call fsync to write the vnode so that we don't have to deal with
828 * flushing it when it's marked VU_DIROP or reclaiming.
829 *
830 * XXX KS - If we can't flush we also can't call vgone(), so must
831 * return. But, that leaves this vnode in limbo, also not good.
832 * Can this ever happen (barring hardware failure)?
833 */
834 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) {
835 panic("lfs_mknod: couldn't fsync (ino %llu)",
836 (unsigned long long) ino);
837 /* return (error); */
838 }
839
840 vrele(dvp);
841 KASSERT(error == 0);
842 VOP_UNLOCK(*vpp);
843 return (0);
844 }
845
846 /*
847 * Create a regular file
848 */
849 int
850 lfs_create(void *v)
851 {
852 struct vop_create_v3_args /* {
853 struct vnode *a_dvp;
854 struct vnode **a_vpp;
855 struct componentname *a_cnp;
856 struct vattr *a_vap;
857 } */ *ap = v;
858 struct lfs *fs;
859 struct vnode *dvp, **vpp;
860 struct vattr *vap;
861 struct ulfs_lookup_results *ulr;
862 int error;
863
864 dvp = ap->a_dvp;
865 vpp = ap->a_vpp;
866 vap = ap->a_vap;
867
868 KASSERT(vpp != NULL);
869 KASSERT(*vpp == NULL);
870
871 /* XXX should handle this material another way */
872 ulr = &VTOI(dvp)->i_crap;
873 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp));
874
875 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
876 ASSERT_NO_SEGLOCK(fs);
877 if (fs->lfs_ronly) {
878 return EROFS;
879 }
880
881 error = lfs_set_dirop(dvp, NULL);
882 if (error)
883 return error;
884
885 error = lfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp);
886 if (error) {
887 goto out;
888 }
889 VN_KNOTE(dvp, NOTE_WRITE);
890 VOP_UNLOCK(*vpp);
891
892 out:
893
894 UNMARK_VNODE(dvp);
895 UNMARK_VNODE(*vpp);
896 if (error) {
897 *vpp = NULL;
898 }
899 lfs_unset_dirop(fs, dvp, "create");
900
901 vrele(dvp);
902 return (error);
903 }
904
905 int
906 lfs_mkdir(void *v)
907 {
908 struct vop_mkdir_v3_args /* {
909 struct vnode *a_dvp;
910 struct vnode **a_vpp;
911 struct componentname *a_cnp;
912 struct vattr *a_vap;
913 } */ *ap = v;
914 struct lfs *fs;
915 struct vnode *dvp, *tvp, **vpp;
916 struct inode *dp, *ip;
917 struct componentname *cnp;
918 struct vattr *vap;
919 struct ulfs_lookup_results *ulr;
920 struct buf *bp;
921 LFS_DIRHEADER *dirp;
922 int dirblksiz;
923 int error;
924
925 dvp = ap->a_dvp;
926 tvp = NULL;
927 vpp = ap->a_vpp;
928 cnp = ap->a_cnp;
929 vap = ap->a_vap;
930
931 dp = VTOI(dvp);
932 ip = NULL;
933
934 KASSERT(vap->va_type == VDIR);
935 KASSERT(vpp != NULL);
936 KASSERT(*vpp == NULL);
937
938 /* XXX should handle this material another way */
939 ulr = &dp->i_crap;
940 ULFS_CHECK_CRAPCOUNTER(dp);
941
942 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
943 ASSERT_NO_SEGLOCK(fs);
944 if (fs->lfs_ronly) {
945 return EROFS;
946 }
947 dirblksiz = fs->um_dirblksiz;
948 /* XXX dholland 20150911 I believe this to be true, but... */
949 //KASSERT(dirblksiz == LFS_DIRBLKSIZ);
950
951 error = lfs_set_dirop(dvp, NULL);
952 if (error)
953 return error;
954
955 if ((nlink_t)dp->i_nlink >= LINK_MAX) {
956 error = EMLINK;
957 goto out;
958 }
959
960 /*
961 * Must simulate part of lfs_makeinode here to acquire the inode,
962 * but not have it entered in the parent directory. The entry is
963 * made later after writing "." and ".." entries.
964 */
965 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, ap->a_vpp);
966 if (error)
967 goto out;
968
969 error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE);
970 if (error) {
971 vrele(*ap->a_vpp);
972 *ap->a_vpp = NULL;
973 goto out;
974 }
975
976 tvp = *ap->a_vpp;
977 lfs_mark_vnode(tvp);
978 ip = VTOI(tvp);
979 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
980 ip->i_nlink = 2;
981 DIP_ASSIGN(ip, nlink, 2);
982 if (cnp->cn_flags & ISWHITEOUT) {
983 ip->i_flags |= UF_OPAQUE;
984 DIP_ASSIGN(ip, flags, ip->i_flags);
985 }
986
987 /*
988 * Bump link count in parent directory to reflect work done below.
989 */
990 dp->i_nlink++;
991 DIP_ASSIGN(dp, nlink, dp->i_nlink);
992 dp->i_flag |= IN_CHANGE;
993 if ((error = lfs_update(dvp, NULL, NULL, UPDATE_DIROP)) != 0)
994 goto bad;
995
996 /*
997 * Initialize directory with "." and "..". This used to use a
998 * static template but that adds moving parts for very little
999 * benefit.
1000 */
1001 if ((error = lfs_balloc(tvp, (off_t)0, dirblksiz, cnp->cn_cred,
1002 B_CLRBUF, &bp)) != 0)
1003 goto bad;
1004 ip->i_size = dirblksiz;
1005 DIP_ASSIGN(ip, size, dirblksiz);
1006 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1007 uvm_vnp_setsize(tvp, ip->i_size);
1008 dirp = bp->b_data;
1009
1010 /* . */
1011 lfs_dir_setino(fs, dirp, ip->i_number);
1012 lfs_dir_setreclen(fs, dirp, LFS_DIRECTSIZ(fs, 1));
1013 lfs_dir_settype(fs, dirp, LFS_DT_DIR);
1014 lfs_dir_setnamlen(fs, dirp, 1);
1015 lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), ".", 1,
1016 LFS_DIRECTSIZ(fs, 1));
1017 dirp = LFS_NEXTDIR(fs, dirp);
1018 /* .. */
1019 lfs_dir_setino(fs, dirp, dp->i_number);
1020 lfs_dir_setreclen(fs, dirp, dirblksiz - LFS_DIRECTSIZ(fs, 1));
1021 lfs_dir_settype(fs, dirp, LFS_DT_DIR);
1022 lfs_dir_setnamlen(fs, dirp, 2);
1023 lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), "..", 2,
1024 dirblksiz - LFS_DIRECTSIZ(fs, 1));
1025
1026 /*
1027 * Directory set up; now install its entry in the parent directory.
1028 */
1029 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
1030 goto bad;
1031 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) {
1032 goto bad;
1033 }
1034 error = ulfs_direnter(dvp, ulr, tvp,
1035 cnp, ip->i_number, LFS_IFTODT(ip->i_mode), bp);
1036 bad:
1037 if (error == 0) {
1038 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
1039 VOP_UNLOCK(tvp);
1040 } else {
1041 dp->i_nlink--;
1042 DIP_ASSIGN(dp, nlink, dp->i_nlink);
1043 dp->i_flag |= IN_CHANGE;
1044 /*
1045 * No need to do an explicit lfs_truncate here, vrele will
1046 * do this for us because we set the link count to 0.
1047 */
1048 ip->i_nlink = 0;
1049 DIP_ASSIGN(ip, nlink, 0);
1050 ip->i_flag |= IN_CHANGE;
1051 /* If IN_ADIROP, account for it */
1052 lfs_unmark_vnode(tvp);
1053 vput(tvp);
1054 }
1055
1056 out:
1057 UNMARK_VNODE(dvp);
1058 UNMARK_VNODE(*vpp);
1059 if (error) {
1060 *vpp = NULL;
1061 }
1062 lfs_unset_dirop(fs, dvp, "mkdir");
1063
1064 vrele(dvp);
1065 return (error);
1066 }
1067
1068 int
1069 lfs_remove(void *v)
1070 {
1071 struct vop_remove_args /* {
1072 struct vnode *a_dvp;
1073 struct vnode *a_vp;
1074 struct componentname *a_cnp;
1075 } */ *ap = v;
1076 struct vnode *dvp, *vp;
1077 struct inode *ip;
1078 int error;
1079
1080 dvp = ap->a_dvp;
1081 vp = ap->a_vp;
1082 ip = VTOI(vp);
1083 if ((error = lfs_set_dirop(dvp, vp)) != 0) {
1084 if (dvp == vp)
1085 vrele(vp);
1086 else
1087 vput(vp);
1088 vput(dvp);
1089 return error;
1090 }
1091 error = ulfs_remove(ap);
1092 if (ip->i_nlink == 0)
1093 lfs_orphan(ip->i_lfs, ip->i_number);
1094
1095 UNMARK_VNODE(dvp);
1096 if (ap->a_vp) {
1097 UNMARK_VNODE(ap->a_vp);
1098 }
1099 lfs_unset_dirop(ip->i_lfs, dvp, "remove");
1100 vrele(dvp);
1101 if (ap->a_vp) {
1102 vrele(ap->a_vp);
1103 }
1104
1105 return (error);
1106 }
1107
1108 int
1109 lfs_rmdir(void *v)
1110 {
1111 struct vop_rmdir_args /* {
1112 struct vnodeop_desc *a_desc;
1113 struct vnode *a_dvp;
1114 struct vnode *a_vp;
1115 struct componentname *a_cnp;
1116 } */ *ap = v;
1117 struct vnode *vp;
1118 struct inode *ip;
1119 int error;
1120
1121 vp = ap->a_vp;
1122 ip = VTOI(vp);
1123 if ((error = lfs_set_dirop(ap->a_dvp, ap->a_vp)) != 0) {
1124 if (ap->a_dvp == vp)
1125 vrele(ap->a_dvp);
1126 else
1127 vput(ap->a_dvp);
1128 vput(vp);
1129 return error;
1130 }
1131 error = ulfs_rmdir(ap);
1132 if (ip->i_nlink == 0)
1133 lfs_orphan(ip->i_lfs, ip->i_number);
1134
1135 UNMARK_VNODE(ap->a_dvp);
1136 if (ap->a_vp) {
1137 UNMARK_VNODE(ap->a_vp);
1138 }
1139 lfs_unset_dirop(ip->i_lfs, ap->a_dvp, "rmdir");
1140 vrele(ap->a_dvp);
1141 if (ap->a_vp) {
1142 vrele(ap->a_vp);
1143 }
1144
1145 return (error);
1146 }
1147
1148 int
1149 lfs_link(void *v)
1150 {
1151 struct vop_link_v2_args /* {
1152 struct vnode *a_dvp;
1153 struct vnode *a_vp;
1154 struct componentname *a_cnp;
1155 } */ *ap = v;
1156 struct lfs *fs;
1157 struct vnode *dvp;
1158 int error;
1159
1160 dvp = ap->a_dvp;
1161
1162 fs = VFSTOULFS(dvp->v_mount)->um_lfs;
1163 ASSERT_NO_SEGLOCK(fs);
1164 if (fs->lfs_ronly) {
1165 return EROFS;
1166 }
1167
1168 error = lfs_set_dirop(dvp, NULL);
1169 if (error) {
1170 return error;
1171 }
1172
1173 error = ulfs_link(ap);
1174
1175 UNMARK_VNODE(dvp);
1176 lfs_unset_dirop(fs, dvp, "link");
1177 vrele(dvp);
1178
1179 return (error);
1180 }
1181
1182 /* XXX hack to avoid calling ITIMES in getattr */
1183 int
1184 lfs_getattr(void *v)
1185 {
1186 struct vop_getattr_args /* {
1187 struct vnode *a_vp;
1188 struct vattr *a_vap;
1189 kauth_cred_t a_cred;
1190 } */ *ap = v;
1191 struct vnode *vp = ap->a_vp;
1192 struct inode *ip = VTOI(vp);
1193 struct vattr *vap = ap->a_vap;
1194 struct lfs *fs = ip->i_lfs;
1195
1196 /*
1197 * Copy from inode table
1198 */
1199 vap->va_fsid = ip->i_dev;
1200 vap->va_fileid = ip->i_number;
1201 vap->va_mode = ip->i_mode & ~LFS_IFMT;
1202 vap->va_nlink = ip->i_nlink;
1203 vap->va_uid = ip->i_uid;
1204 vap->va_gid = ip->i_gid;
1205 switch (vp->v_type) {
1206 case VBLK:
1207 case VCHR:
1208 vap->va_rdev = (dev_t)lfs_dino_getrdev(fs, ip->i_din);
1209 break;
1210 default:
1211 vap->va_rdev = NODEV;
1212 break;
1213 }
1214 vap->va_size = vp->v_size;
1215 vap->va_atime.tv_sec = lfs_dino_getatime(fs, ip->i_din);
1216 vap->va_atime.tv_nsec = lfs_dino_getatimensec(fs, ip->i_din);
1217 vap->va_mtime.tv_sec = lfs_dino_getmtime(fs, ip->i_din);
1218 vap->va_mtime.tv_nsec = lfs_dino_getmtimensec(fs, ip->i_din);
1219 vap->va_ctime.tv_sec = lfs_dino_getctime(fs, ip->i_din);
1220 vap->va_ctime.tv_nsec = lfs_dino_getctimensec(fs, ip->i_din);
1221 vap->va_flags = ip->i_flags;
1222 vap->va_gen = ip->i_gen;
1223 /* this doesn't belong here */
1224 if (vp->v_type == VBLK)
1225 vap->va_blocksize = BLKDEV_IOSIZE;
1226 else if (vp->v_type == VCHR)
1227 vap->va_blocksize = MAXBSIZE;
1228 else
1229 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
1230 vap->va_bytes = lfs_fsbtob(fs, ip->i_lfs_effnblks);
1231 vap->va_type = vp->v_type;
1232 vap->va_filerev = ip->i_modrev;
1233 return (0);
1234 }
1235
1236 /*
1237 * Check to make sure the inode blocks won't choke the buffer
1238 * cache, then call ulfs_setattr as usual.
1239 */
1240 int
1241 lfs_setattr(void *v)
1242 {
1243 struct vop_setattr_args /* {
1244 struct vnode *a_vp;
1245 struct vattr *a_vap;
1246 kauth_cred_t a_cred;
1247 } */ *ap = v;
1248 struct vnode *vp = ap->a_vp;
1249
1250 lfs_check(vp, LFS_UNUSED_LBN, 0);
1251 return ulfs_setattr(v);
1252 }
1253
1254 /*
1255 * Release the block we hold on lfs_newseg wrapping. Called on file close,
1256 * or explicitly from LFCNWRAPGO. Called with the interlock held.
1257 */
1258 static int
1259 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor)
1260 {
1261 if (fs->lfs_stoplwp != curlwp)
1262 return EBUSY;
1263
1264 fs->lfs_stoplwp = NULL;
1265 cv_signal(&fs->lfs_stopcv);
1266
1267 KASSERT(fs->lfs_nowrap > 0);
1268 if (fs->lfs_nowrap <= 0) {
1269 return 0;
1270 }
1271
1272 if (--fs->lfs_nowrap == 0) {
1273 log(LOG_NOTICE, "%s: re-enabled log wrap\n",
1274 lfs_sb_getfsmnt(fs));
1275 wakeup(&fs->lfs_wrappass);
1276 lfs_wakeup_cleaner(fs);
1277 }
1278 if (waitfor) {
1279 mtsleep(&fs->lfs_nextsegsleep, PCATCH | PUSER, "segment",
1280 0, &lfs_lock);
1281 }
1282
1283 return 0;
1284 }
1285
1286 /*
1287 * Close called.
1288 *
1289 * Update the times on the inode.
1290 */
1291 /* ARGSUSED */
1292 int
1293 lfs_close(void *v)
1294 {
1295 struct vop_close_args /* {
1296 struct vnode *a_vp;
1297 int a_fflag;
1298 kauth_cred_t a_cred;
1299 } */ *ap = v;
1300 struct vnode *vp = ap->a_vp;
1301 struct inode *ip = VTOI(vp);
1302 struct lfs *fs = ip->i_lfs;
1303
1304 if ((ip->i_number == ULFS_ROOTINO || ip->i_number == LFS_IFILE_INUM) &&
1305 fs->lfs_stoplwp == curlwp) {
1306 mutex_enter(&lfs_lock);
1307 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n");
1308 lfs_wrapgo(fs, ip, 0);
1309 mutex_exit(&lfs_lock);
1310 }
1311
1312 if (vp == ip->i_lfs->lfs_ivnode &&
1313 vp->v_mount->mnt_iflag & IMNT_UNMOUNT)
1314 return 0;
1315
1316 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) {
1317 LFS_ITIMES(ip, NULL, NULL, NULL);
1318 }
1319 return (0);
1320 }
1321
1322 /*
1323 * Close wrapper for special devices.
1324 *
1325 * Update the times on the inode then do device close.
1326 */
1327 int
1328 lfsspec_close(void *v)
1329 {
1330 struct vop_close_args /* {
1331 struct vnode *a_vp;
1332 int a_fflag;
1333 kauth_cred_t a_cred;
1334 } */ *ap = v;
1335 struct vnode *vp;
1336 struct inode *ip;
1337
1338 vp = ap->a_vp;
1339 ip = VTOI(vp);
1340 if (vp->v_usecount > 1) {
1341 LFS_ITIMES(ip, NULL, NULL, NULL);
1342 }
1343 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
1344 }
1345
1346 /*
1347 * Close wrapper for fifo's.
1348 *
1349 * Update the times on the inode then do device close.
1350 */
1351 int
1352 lfsfifo_close(void *v)
1353 {
1354 struct vop_close_args /* {
1355 struct vnode *a_vp;
1356 int a_fflag;
1357 kauth_cred_ a_cred;
1358 } */ *ap = v;
1359 struct vnode *vp;
1360 struct inode *ip;
1361
1362 vp = ap->a_vp;
1363 ip = VTOI(vp);
1364 if (ap->a_vp->v_usecount > 1) {
1365 LFS_ITIMES(ip, NULL, NULL, NULL);
1366 }
1367 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
1368 }
1369
1370 /*
1371 * Reclaim an inode so that it can be used for other purposes.
1372 */
1373
1374 int
1375 lfs_reclaim(void *v)
1376 {
1377 struct vop_reclaim_args /* {
1378 struct vnode *a_vp;
1379 } */ *ap = v;
1380 struct vnode *vp = ap->a_vp;
1381 struct inode *ip = VTOI(vp);
1382 struct lfs *fs = ip->i_lfs;
1383 int error;
1384
1385 /*
1386 * The inode must be freed and updated before being removed
1387 * from its hash chain. Other threads trying to gain a hold
1388 * or lock on the inode will be stalled.
1389 */
1390 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1391 lfs_vfree(vp, ip->i_number, ip->i_omode);
1392
1393 mutex_enter(&lfs_lock);
1394 LFS_CLR_UINO(ip, IN_ALLMOD);
1395 mutex_exit(&lfs_lock);
1396 if ((error = ulfs_reclaim(vp)))
1397 return (error);
1398
1399 /*
1400 * Take us off the paging and/or dirop queues if we were on them.
1401 * We shouldn't be on them.
1402 */
1403 mutex_enter(&lfs_lock);
1404 if (ip->i_flags & IN_PAGING) {
1405 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n",
1406 lfs_sb_getfsmnt(fs));
1407 ip->i_flags &= ~IN_PAGING;
1408 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
1409 }
1410 if (vp->v_uflag & VU_DIROP) {
1411 panic("reclaimed vnode is VU_DIROP");
1412 vp->v_uflag &= ~VU_DIROP;
1413 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
1414 }
1415 mutex_exit(&lfs_lock);
1416
1417 pool_put(&lfs_dinode_pool, ip->i_din);
1418 lfs_deregister_all(vp);
1419 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs);
1420 ip->inode_ext.lfs = NULL;
1421 genfs_node_destroy(vp);
1422 pool_put(&lfs_inode_pool, vp->v_data);
1423 vp->v_data = NULL;
1424 return (0);
1425 }
1426
1427 /*
1428 * Read a block from a storage device.
1429 *
1430 * Calculate the logical to physical mapping if not done already,
1431 * then call the device strategy routine.
1432 *
1433 * In order to avoid reading blocks that are in the process of being
1434 * written by the cleaner---and hence are not mutexed by the normal
1435 * buffer cache / page cache mechanisms---check for collisions before
1436 * reading.
1437 *
1438 * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before*
1439 * the active cleaner test.
1440 *
1441 * XXX This code assumes that lfs_markv makes synchronous checkpoints.
1442 */
1443 int
1444 lfs_strategy(void *v)
1445 {
1446 struct vop_strategy_args /* {
1447 struct vnode *a_vp;
1448 struct buf *a_bp;
1449 } */ *ap = v;
1450 struct buf *bp;
1451 struct lfs *fs;
1452 struct vnode *vp;
1453 struct inode *ip;
1454 daddr_t tbn;
1455 #define MAXLOOP 25
1456 int i, sn, error, slept, loopcount;
1457
1458 bp = ap->a_bp;
1459 vp = ap->a_vp;
1460 ip = VTOI(vp);
1461 fs = ip->i_lfs;
1462
1463 /* lfs uses its strategy routine only for read */
1464 KASSERT(bp->b_flags & B_READ);
1465
1466 if (vp->v_type == VBLK || vp->v_type == VCHR)
1467 panic("lfs_strategy: spec");
1468 KASSERT(bp->b_bcount != 0);
1469 if (bp->b_blkno == bp->b_lblkno) {
1470 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
1471 NULL);
1472 if (error) {
1473 bp->b_error = error;
1474 bp->b_resid = bp->b_bcount;
1475 biodone(bp);
1476 return (error);
1477 }
1478 if ((long)bp->b_blkno == -1) /* no valid data */
1479 clrbuf(bp);
1480 }
1481 if ((long)bp->b_blkno < 0) { /* block is not on disk */
1482 bp->b_resid = bp->b_bcount;
1483 biodone(bp);
1484 return (0);
1485 }
1486
1487 slept = 1;
1488 loopcount = 0;
1489 mutex_enter(&lfs_lock);
1490 while (slept && fs->lfs_seglock) {
1491 mutex_exit(&lfs_lock);
1492 /*
1493 * Look through list of intervals.
1494 * There will only be intervals to look through
1495 * if the cleaner holds the seglock.
1496 * Since the cleaner is synchronous, we can trust
1497 * the list of intervals to be current.
1498 */
1499 tbn = LFS_DBTOFSB(fs, bp->b_blkno);
1500 sn = lfs_dtosn(fs, tbn);
1501 slept = 0;
1502 for (i = 0; i < fs->lfs_cleanind; i++) {
1503 if (sn == lfs_dtosn(fs, fs->lfs_cleanint[i]) &&
1504 tbn >= fs->lfs_cleanint[i]) {
1505 DLOG((DLOG_CLEAN,
1506 "lfs_strategy: ino %llu lbn %" PRId64
1507 " ind %d sn %d fsb %" PRIx64
1508 " given sn %d fsb %" PRIx64 "\n",
1509 (unsigned long long) ip->i_number,
1510 bp->b_lblkno, i,
1511 lfs_dtosn(fs, fs->lfs_cleanint[i]),
1512 fs->lfs_cleanint[i], sn, tbn));
1513 DLOG((DLOG_CLEAN,
1514 "lfs_strategy: sleeping on ino %llu lbn %"
1515 PRId64 "\n",
1516 (unsigned long long) ip->i_number,
1517 bp->b_lblkno));
1518 mutex_enter(&lfs_lock);
1519 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) {
1520 /*
1521 * Cleaner can't wait for itself.
1522 * Instead, wait for the blocks
1523 * to be written to disk.
1524 * XXX we need pribio in the test
1525 * XXX here.
1526 */
1527 mtsleep(&fs->lfs_iocount,
1528 (PRIBIO + 1) | PNORELOCK,
1529 "clean2", hz/10 + 1,
1530 &lfs_lock);
1531 slept = 1;
1532 ++loopcount;
1533 break;
1534 } else if (fs->lfs_seglock) {
1535 mtsleep(&fs->lfs_seglock,
1536 (PRIBIO + 1) | PNORELOCK,
1537 "clean1", 0,
1538 &lfs_lock);
1539 slept = 1;
1540 break;
1541 }
1542 mutex_exit(&lfs_lock);
1543 }
1544 }
1545 mutex_enter(&lfs_lock);
1546 if (loopcount > MAXLOOP) {
1547 printf("lfs_strategy: breaking out of clean2 loop\n");
1548 break;
1549 }
1550 }
1551 mutex_exit(&lfs_lock);
1552
1553 vp = ip->i_devvp;
1554 return VOP_STRATEGY(vp, bp);
1555 }
1556
1557 /*
1558 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops.
1559 * Technically this is a checkpoint (the on-disk state is valid)
1560 * even though we are leaving out all the file data.
1561 */
1562 int
1563 lfs_flush_dirops(struct lfs *fs)
1564 {
1565 struct inode *ip, *nip;
1566 struct vnode *vp;
1567 extern int lfs_dostats; /* XXX this does not belong here */
1568 struct segment *sp;
1569 SEGSUM *ssp;
1570 int flags = 0;
1571 int error = 0;
1572
1573 ASSERT_MAYBE_SEGLOCK(fs);
1574 KASSERT(fs->lfs_nadirop == 0);
1575
1576 if (fs->lfs_ronly)
1577 return EROFS;
1578
1579 mutex_enter(&lfs_lock);
1580 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) {
1581 mutex_exit(&lfs_lock);
1582 return 0;
1583 } else
1584 mutex_exit(&lfs_lock);
1585
1586 if (lfs_dostats)
1587 ++lfs_stats.flush_invoked;
1588
1589 lfs_imtime(fs);
1590 lfs_seglock(fs, flags);
1591 sp = fs->lfs_sp;
1592
1593 /*
1594 * lfs_writevnodes, optimized to get dirops out of the way.
1595 * Only write dirops, and don't flush files' pages, only
1596 * blocks from the directories.
1597 *
1598 * We don't need to vref these files because they are
1599 * dirops and so hold an extra reference until the
1600 * segunlock clears them of that status.
1601 *
1602 * We don't need to check for IN_ADIROP because we know that
1603 * no dirops are active.
1604 *
1605 */
1606 mutex_enter(&lfs_lock);
1607 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
1608 nip = TAILQ_NEXT(ip, i_lfs_dchain);
1609 mutex_exit(&lfs_lock);
1610 vp = ITOV(ip);
1611 mutex_enter(vp->v_interlock);
1612
1613 KASSERT((ip->i_flag & IN_ADIROP) == 0);
1614 KASSERT(vp->v_uflag & VU_DIROP);
1615 KASSERT(vdead_check(vp, VDEAD_NOWAIT) == 0);
1616
1617 /*
1618 * All writes to directories come from dirops; all
1619 * writes to files' direct blocks go through the page
1620 * cache, which we're not touching. Reads to files
1621 * and/or directories will not be affected by writing
1622 * directory blocks inodes and file inodes. So we don't
1623 * really need to lock.
1624 */
1625 if (vdead_check(vp, VDEAD_NOWAIT) != 0) {
1626 mutex_exit(vp->v_interlock);
1627 mutex_enter(&lfs_lock);
1628 continue;
1629 }
1630 mutex_exit(vp->v_interlock);
1631 /* XXX see below
1632 * waslocked = VOP_ISLOCKED(vp);
1633 */
1634 if (vp->v_type != VREG &&
1635 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) {
1636 error = lfs_writefile(fs, sp, vp);
1637 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1638 !(ip->i_flag & IN_ALLMOD)) {
1639 mutex_enter(&lfs_lock);
1640 LFS_SET_UINO(ip, IN_MODIFIED);
1641 mutex_exit(&lfs_lock);
1642 }
1643 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1644 mutex_enter(&lfs_lock);
1645 error = EAGAIN;
1646 break;
1647 }
1648 }
1649 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1650 error = lfs_writeinode(fs, sp, ip);
1651 mutex_enter(&lfs_lock);
1652 if (error && (sp->seg_flags & SEGM_SINGLE)) {
1653 error = EAGAIN;
1654 break;
1655 }
1656
1657 /*
1658 * We might need to update these inodes again,
1659 * for example, if they have data blocks to write.
1660 * Make sure that after this flush, they are still
1661 * marked IN_MODIFIED so that we don't forget to
1662 * write them.
1663 */
1664 /* XXX only for non-directories? --KS */
1665 LFS_SET_UINO(ip, IN_MODIFIED);
1666 }
1667 mutex_exit(&lfs_lock);
1668 /* We've written all the dirops there are */
1669 ssp = (SEGSUM *)sp->segsum;
1670 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT));
1671 lfs_finalize_fs_seguse(fs);
1672 (void) lfs_writeseg(fs, sp);
1673 lfs_segunlock(fs);
1674
1675 return error;
1676 }
1677
1678 /*
1679 * Flush all vnodes for which the pagedaemon has requested pageouts.
1680 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop()
1681 * has just run, this would be an error). If we have to skip a vnode
1682 * for any reason, just skip it; if we have to wait for the cleaner,
1683 * abort. The writer daemon will call us again later.
1684 */
1685 int
1686 lfs_flush_pchain(struct lfs *fs)
1687 {
1688 struct inode *ip, *nip;
1689 struct vnode *vp;
1690 extern int lfs_dostats;
1691 struct segment *sp;
1692 int error, error2;
1693
1694 ASSERT_NO_SEGLOCK(fs);
1695
1696 if (fs->lfs_ronly)
1697 return EROFS;
1698
1699 mutex_enter(&lfs_lock);
1700 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) {
1701 mutex_exit(&lfs_lock);
1702 return 0;
1703 } else
1704 mutex_exit(&lfs_lock);
1705
1706 /* Get dirops out of the way */
1707 if ((error = lfs_flush_dirops(fs)) != 0)
1708 return error;
1709
1710 if (lfs_dostats)
1711 ++lfs_stats.flush_invoked;
1712
1713 /*
1714 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts.
1715 */
1716 lfs_imtime(fs);
1717 lfs_seglock(fs, 0);
1718 sp = fs->lfs_sp;
1719
1720 /*
1721 * lfs_writevnodes, optimized to clear pageout requests.
1722 * Only write non-dirop files that are in the pageout queue.
1723 * We're very conservative about what we write; we want to be
1724 * fast and async.
1725 */
1726 mutex_enter(&lfs_lock);
1727 top:
1728 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) {
1729 struct mount *mp = ITOV(ip)->v_mount;
1730 ino_t ino = ip->i_number;
1731
1732 nip = TAILQ_NEXT(ip, i_lfs_pchain);
1733
1734 if (!(ip->i_flags & IN_PAGING))
1735 goto top;
1736
1737 mutex_exit(&lfs_lock);
1738 if (vcache_get(mp, &ino, sizeof(ino), &vp) != 0) {
1739 mutex_enter(&lfs_lock);
1740 continue;
1741 };
1742 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1743 vrele(vp);
1744 mutex_enter(&lfs_lock);
1745 continue;
1746 }
1747 ip = VTOI(vp);
1748 mutex_enter(&lfs_lock);
1749 if ((vp->v_uflag & VU_DIROP) != 0 || vp->v_type != VREG ||
1750 !(ip->i_flags & IN_PAGING)) {
1751 mutex_exit(&lfs_lock);
1752 vput(vp);
1753 mutex_enter(&lfs_lock);
1754 goto top;
1755 }
1756 mutex_exit(&lfs_lock);
1757
1758 error = lfs_writefile(fs, sp, vp);
1759 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
1760 !(ip->i_flag & IN_ALLMOD)) {
1761 mutex_enter(&lfs_lock);
1762 LFS_SET_UINO(ip, IN_MODIFIED);
1763 mutex_exit(&lfs_lock);
1764 }
1765 KDASSERT(ip->i_number != LFS_IFILE_INUM);
1766 error2 = lfs_writeinode(fs, sp, ip);
1767
1768 VOP_UNLOCK(vp);
1769 vrele(vp);
1770
1771 if (error == EAGAIN || error2 == EAGAIN) {
1772 lfs_writeseg(fs, sp);
1773 mutex_enter(&lfs_lock);
1774 break;
1775 }
1776 mutex_enter(&lfs_lock);
1777 }
1778 mutex_exit(&lfs_lock);
1779 (void) lfs_writeseg(fs, sp);
1780 lfs_segunlock(fs);
1781
1782 return 0;
1783 }
1784
1785 /*
1786 * Conversion for compat.
1787 */
1788 static void
1789 block_info_from_70(BLOCK_INFO *bi, const BLOCK_INFO_70 *bi70)
1790 {
1791 bi->bi_inode = bi70->bi_inode;
1792 bi->bi_lbn = bi70->bi_lbn;
1793 bi->bi_daddr = bi70->bi_daddr;
1794 bi->bi_segcreate = bi70->bi_segcreate;
1795 bi->bi_version = bi70->bi_version;
1796 bi->bi_bp = bi70->bi_bp;
1797 bi->bi_size = bi70->bi_size;
1798 }
1799
1800 static void
1801 block_info_to_70(BLOCK_INFO_70 *bi70, const BLOCK_INFO *bi)
1802 {
1803 bi70->bi_inode = bi->bi_inode;
1804 bi70->bi_lbn = bi->bi_lbn;
1805 bi70->bi_daddr = bi->bi_daddr;
1806 bi70->bi_segcreate = bi->bi_segcreate;
1807 bi70->bi_version = bi->bi_version;
1808 bi70->bi_bp = bi->bi_bp;
1809 bi70->bi_size = bi->bi_size;
1810 }
1811
1812 /*
1813 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}.
1814 */
1815 int
1816 lfs_fcntl(void *v)
1817 {
1818 struct vop_fcntl_args /* {
1819 struct vnode *a_vp;
1820 u_int a_command;
1821 void * a_data;
1822 int a_fflag;
1823 kauth_cred_t a_cred;
1824 } */ *ap = v;
1825 struct timeval tv;
1826 struct timeval *tvp;
1827 BLOCK_INFO *blkiov;
1828 BLOCK_INFO_70 *blkiov70;
1829 CLEANERINFO *cip;
1830 SEGUSE *sup;
1831 int blkcnt, i, error;
1832 size_t fh_size;
1833 struct lfs_fcntl_markv blkvp;
1834 struct lfs_fcntl_markv_70 blkvp70;
1835 struct lwp *l;
1836 fsid_t *fsidp;
1837 struct lfs *fs;
1838 struct buf *bp;
1839 fhandle_t *fhp;
1840 daddr_t off;
1841 int oclean;
1842
1843 /* Only respect LFS fcntls on fs root or Ifile */
1844 if (VTOI(ap->a_vp)->i_number != ULFS_ROOTINO &&
1845 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) {
1846 return ulfs_fcntl(v);
1847 }
1848
1849 /* Avoid locking a draining lock */
1850 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) {
1851 return ESHUTDOWN;
1852 }
1853
1854 /* LFS control and monitoring fcntls are available only to root */
1855 l = curlwp;
1856 if (((ap->a_command & 0xff00) >> 8) == 'L' &&
1857 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
1858 KAUTH_REQ_SYSTEM_LFS_FCNTL, NULL, NULL, NULL)) != 0)
1859 return (error);
1860
1861 fs = VTOI(ap->a_vp)->i_lfs;
1862 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx;
1863
1864 error = 0;
1865 switch ((int)ap->a_command) {
1866 case LFCNSEGWAITALL_COMPAT_50:
1867 case LFCNSEGWAITALL_COMPAT:
1868 fsidp = NULL;
1869 /* FALLTHROUGH */
1870 case LFCNSEGWAIT_COMPAT_50:
1871 case LFCNSEGWAIT_COMPAT:
1872 {
1873 struct timeval50 *tvp50
1874 = (struct timeval50 *)ap->a_data;
1875 timeval50_to_timeval(tvp50, &tv);
1876 tvp = &tv;
1877 }
1878 goto segwait_common;
1879 case LFCNSEGWAITALL:
1880 fsidp = NULL;
1881 /* FALLTHROUGH */
1882 case LFCNSEGWAIT:
1883 tvp = (struct timeval *)ap->a_data;
1884 segwait_common:
1885 mutex_enter(&lfs_lock);
1886 ++fs->lfs_sleepers;
1887 mutex_exit(&lfs_lock);
1888
1889 error = lfs_segwait(fsidp, tvp);
1890
1891 mutex_enter(&lfs_lock);
1892 if (--fs->lfs_sleepers == 0)
1893 cv_broadcast(&fs->lfs_sleeperscv);
1894 mutex_exit(&lfs_lock);
1895 return error;
1896
1897 case LFCNBMAPV_COMPAT_70:
1898 case LFCNMARKV_COMPAT_70:
1899 blkvp70 = *(struct lfs_fcntl_markv_70 *)ap->a_data;
1900
1901 blkcnt = blkvp70.blkcnt;
1902 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
1903 return (EINVAL);
1904 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
1905 blkiov70 = lfs_malloc(fs, sizeof(BLOCK_INFO_70), LFS_NB_BLKIOV);
1906 for (i = 0; i < blkcnt; i++) {
1907 error = copyin(&blkvp70.blkiov[i], blkiov70,
1908 sizeof(*blkiov70));
1909 if (error) {
1910 lfs_free(fs, blkiov70, LFS_NB_BLKIOV);
1911 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1912 return error;
1913 }
1914 block_info_from_70(&blkiov[i], blkiov70);
1915 }
1916
1917 mutex_enter(&lfs_lock);
1918 ++fs->lfs_sleepers;
1919 mutex_exit(&lfs_lock);
1920 if (ap->a_command == LFCNBMAPV)
1921 error = lfs_bmapv(l, fsidp, blkiov, blkcnt);
1922 else /* LFCNMARKV */
1923 error = lfs_markv(l, fsidp, blkiov, blkcnt);
1924 if (error == 0) {
1925 for (i = 0; i < blkcnt; i++) {
1926 block_info_to_70(blkiov70, &blkiov[i]);
1927 error = copyout(blkiov70, &blkvp70.blkiov[i],
1928 sizeof(*blkiov70));
1929 if (error) {
1930 break;
1931 }
1932 }
1933 }
1934 mutex_enter(&lfs_lock);
1935 if (--fs->lfs_sleepers == 0)
1936 cv_broadcast(&fs->lfs_sleeperscv);
1937 mutex_exit(&lfs_lock);
1938 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1939 return error;
1940
1941 case LFCNBMAPV:
1942 case LFCNMARKV:
1943 blkvp = *(struct lfs_fcntl_markv *)ap->a_data;
1944
1945 blkcnt = blkvp.blkcnt;
1946 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
1947 return (EINVAL);
1948 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
1949 if ((error = copyin(blkvp.blkiov, blkiov,
1950 blkcnt * sizeof(BLOCK_INFO))) != 0) {
1951 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1952 return error;
1953 }
1954
1955 mutex_enter(&lfs_lock);
1956 ++fs->lfs_sleepers;
1957 mutex_exit(&lfs_lock);
1958 if (ap->a_command == LFCNBMAPV)
1959 error = lfs_bmapv(l, fsidp, blkiov, blkcnt);
1960 else /* LFCNMARKV */
1961 error = lfs_markv(l, fsidp, blkiov, blkcnt);
1962 if (error == 0)
1963 error = copyout(blkiov, blkvp.blkiov,
1964 blkcnt * sizeof(BLOCK_INFO));
1965 mutex_enter(&lfs_lock);
1966 if (--fs->lfs_sleepers == 0)
1967 cv_broadcast(&fs->lfs_sleeperscv);
1968 mutex_exit(&lfs_lock);
1969 lfs_free(fs, blkiov, LFS_NB_BLKIOV);
1970 return error;
1971
1972 case LFCNRECLAIM:
1973 /*
1974 * Flush dirops and write Ifile, allowing empty segments
1975 * to be immediately reclaimed.
1976 */
1977 lfs_writer_enter(fs, "pndirop");
1978 off = lfs_sb_getoffset(fs);
1979 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP);
1980 lfs_flush_dirops(fs);
1981 LFS_CLEANERINFO(cip, fs, bp);
1982 oclean = lfs_ci_getclean(fs, cip);
1983 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
1984 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP);
1985 fs->lfs_sp->seg_flags |= SEGM_PROT;
1986 lfs_segunlock(fs);
1987 lfs_writer_leave(fs);
1988
1989 #ifdef DEBUG
1990 LFS_CLEANERINFO(cip, fs, bp);
1991 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64
1992 " blocks, cleaned %" PRId32 " segments (activesb %d)\n",
1993 lfs_sb_getoffset(fs) - off,
1994 lfs_ci_getclean(fs, cip) - oclean,
1995 fs->lfs_activesb));
1996 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
1997 #else
1998 __USE(oclean);
1999 __USE(off);
2000 #endif
2001
2002 return 0;
2003
2004 case LFCNIFILEFH_COMPAT:
2005 /* Return the filehandle of the Ifile */
2006 if ((error = kauth_authorize_system(l->l_cred,
2007 KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)) != 0)
2008 return (error);
2009 fhp = (struct fhandle *)ap->a_data;
2010 fhp->fh_fsid = *fsidp;
2011 fh_size = 16; /* former VFS_MAXFIDSIZ */
2012 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
2013
2014 case LFCNIFILEFH_COMPAT2:
2015 case LFCNIFILEFH:
2016 /* Return the filehandle of the Ifile */
2017 fhp = (struct fhandle *)ap->a_data;
2018 fhp->fh_fsid = *fsidp;
2019 fh_size = sizeof(struct lfs_fhandle) -
2020 offsetof(fhandle_t, fh_fid);
2021 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
2022
2023 case LFCNREWIND:
2024 /* Move lfs_offset to the lowest-numbered segment */
2025 return lfs_rewind(fs, *(int *)ap->a_data);
2026
2027 case LFCNINVAL:
2028 /* Mark a segment SEGUSE_INVAL */
2029 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp);
2030 if (sup->su_nbytes > 0) {
2031 brelse(bp, 0);
2032 lfs_unset_inval_all(fs);
2033 return EBUSY;
2034 }
2035 sup->su_flags |= SEGUSE_INVAL;
2036 VOP_BWRITE(bp->b_vp, bp);
2037 return 0;
2038
2039 case LFCNRESIZE:
2040 /* Resize the filesystem */
2041 return lfs_resize_fs(fs, *(int *)ap->a_data);
2042
2043 case LFCNWRAPSTOP:
2044 case LFCNWRAPSTOP_COMPAT:
2045 /*
2046 * Hold lfs_newseg at segment 0; if requested, sleep until
2047 * the filesystem wraps around. To support external agents
2048 * (dump, fsck-based regression test) that need to look at
2049 * a snapshot of the filesystem, without necessarily
2050 * requiring that all fs activity stops.
2051 */
2052 if (fs->lfs_stoplwp == curlwp)
2053 return EALREADY;
2054
2055 mutex_enter(&lfs_lock);
2056 while (fs->lfs_stoplwp != NULL)
2057 cv_wait(&fs->lfs_stopcv, &lfs_lock);
2058 fs->lfs_stoplwp = curlwp;
2059 if (fs->lfs_nowrap == 0)
2060 log(LOG_NOTICE, "%s: disabled log wrap\n",
2061 lfs_sb_getfsmnt(fs));
2062 ++fs->lfs_nowrap;
2063 if (*(int *)ap->a_data == 1
2064 || ap->a_command == LFCNWRAPSTOP_COMPAT) {
2065 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n");
2066 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
2067 "segwrap", 0, &lfs_lock);
2068 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n");
2069 if (error) {
2070 lfs_wrapgo(fs, VTOI(ap->a_vp), 0);
2071 }
2072 }
2073 mutex_exit(&lfs_lock);
2074 return 0;
2075
2076 case LFCNWRAPGO:
2077 case LFCNWRAPGO_COMPAT:
2078 /*
2079 * Having done its work, the agent wakes up the writer.
2080 * If the argument is 1, it sleeps until a new segment
2081 * is selected.
2082 */
2083 mutex_enter(&lfs_lock);
2084 error = lfs_wrapgo(fs, VTOI(ap->a_vp),
2085 ap->a_command == LFCNWRAPGO_COMPAT ? 1 :
2086 *((int *)ap->a_data));
2087 mutex_exit(&lfs_lock);
2088 return error;
2089
2090 case LFCNWRAPPASS:
2091 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT))
2092 return EALREADY;
2093 mutex_enter(&lfs_lock);
2094 if (fs->lfs_stoplwp != curlwp) {
2095 mutex_exit(&lfs_lock);
2096 return EALREADY;
2097 }
2098 if (fs->lfs_nowrap == 0) {
2099 mutex_exit(&lfs_lock);
2100 return EBUSY;
2101 }
2102 fs->lfs_wrappass = 1;
2103 wakeup(&fs->lfs_wrappass);
2104 /* Wait for the log to wrap, if asked */
2105 if (*(int *)ap->a_data) {
2106 vref(ap->a_vp);
2107 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT;
2108 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n");
2109 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
2110 "segwrap", 0, &lfs_lock);
2111 log(LOG_NOTICE, "LFCNPASS done waiting\n");
2112 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT;
2113 vrele(ap->a_vp);
2114 }
2115 mutex_exit(&lfs_lock);
2116 return error;
2117
2118 case LFCNWRAPSTATUS:
2119 mutex_enter(&lfs_lock);
2120 *(int *)ap->a_data = fs->lfs_wrapstatus;
2121 mutex_exit(&lfs_lock);
2122 return 0;
2123
2124 default:
2125 return ulfs_fcntl(v);
2126 }
2127 return 0;
2128 }
2129
2130 /*
2131 * Return the last logical file offset that should be written for this file
2132 * if we're doing a write that ends at "size". If writing, we need to know
2133 * about sizes on disk, i.e. fragments if there are any; if reading, we need
2134 * to know about entire blocks.
2135 */
2136 void
2137 lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
2138 {
2139 struct inode *ip = VTOI(vp);
2140 struct lfs *fs = ip->i_lfs;
2141 daddr_t olbn, nlbn;
2142
2143 olbn = lfs_lblkno(fs, ip->i_size);
2144 nlbn = lfs_lblkno(fs, size);
2145 if (!(flags & GOP_SIZE_MEM) && nlbn < ULFS_NDADDR && olbn <= nlbn) {
2146 *eobp = lfs_fragroundup(fs, size);
2147 } else {
2148 *eobp = lfs_blkroundup(fs, size);
2149 }
2150 }
2151
2152 #ifdef DEBUG
2153 void lfs_dump_vop(void *);
2154
2155 void
2156 lfs_dump_vop(void *v)
2157 {
2158 struct vop_putpages_args /* {
2159 struct vnode *a_vp;
2160 voff_t a_offlo;
2161 voff_t a_offhi;
2162 int a_flags;
2163 } */ *ap = v;
2164
2165 struct inode *ip = VTOI(ap->a_vp);
2166 struct lfs *fs = ip->i_lfs;
2167
2168 #ifdef DDB
2169 vfs_vnode_print(ap->a_vp, 0, printf);
2170 #endif
2171 lfs_dump_dinode(fs, ip->i_din);
2172 }
2173 #endif
2174
2175 int
2176 lfs_mmap(void *v)
2177 {
2178 struct vop_mmap_args /* {
2179 const struct vnodeop_desc *a_desc;
2180 struct vnode *a_vp;
2181 vm_prot_t a_prot;
2182 kauth_cred_t a_cred;
2183 } */ *ap = v;
2184
2185 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM)
2186 return EOPNOTSUPP;
2187 return ulfs_mmap(v);
2188 }
2189
2190 static int
2191 lfs_openextattr(void *v)
2192 {
2193 struct vop_openextattr_args /* {
2194 struct vnode *a_vp;
2195 kauth_cred_t a_cred;
2196 struct proc *a_p;
2197 } */ *ap = v;
2198 struct inode *ip = VTOI(ap->a_vp);
2199 struct ulfsmount *ump = ip->i_ump;
2200 //struct lfs *fs = ip->i_lfs;
2201
2202 /* Not supported for ULFS1 file systems. */
2203 if (ump->um_fstype == ULFS1)
2204 return (EOPNOTSUPP);
2205
2206 /* XXX Not implemented for ULFS2 file systems. */
2207 return (EOPNOTSUPP);
2208 }
2209
2210 static int
2211 lfs_closeextattr(void *v)
2212 {
2213 struct vop_closeextattr_args /* {
2214 struct vnode *a_vp;
2215 int a_commit;
2216 kauth_cred_t a_cred;
2217 struct proc *a_p;
2218 } */ *ap = v;
2219 struct inode *ip = VTOI(ap->a_vp);
2220 struct ulfsmount *ump = ip->i_ump;
2221 //struct lfs *fs = ip->i_lfs;
2222
2223 /* Not supported for ULFS1 file systems. */
2224 if (ump->um_fstype == ULFS1)
2225 return (EOPNOTSUPP);
2226
2227 /* XXX Not implemented for ULFS2 file systems. */
2228 return (EOPNOTSUPP);
2229 }
2230
2231 static int
2232 lfs_getextattr(void *v)
2233 {
2234 struct vop_getextattr_args /* {
2235 struct vnode *a_vp;
2236 int a_attrnamespace;
2237 const char *a_name;
2238 struct uio *a_uio;
2239 size_t *a_size;
2240 kauth_cred_t a_cred;
2241 struct proc *a_p;
2242 } */ *ap = v;
2243 struct vnode *vp = ap->a_vp;
2244 struct inode *ip = VTOI(vp);
2245 struct ulfsmount *ump = ip->i_ump;
2246 //struct lfs *fs = ip->i_lfs;
2247 int error;
2248
2249 if (ump->um_fstype == ULFS1) {
2250 #ifdef LFS_EXTATTR
2251 error = ulfs_getextattr(ap);
2252 #else
2253 error = EOPNOTSUPP;
2254 #endif
2255 return error;
2256 }
2257
2258 /* XXX Not implemented for ULFS2 file systems. */
2259 return (EOPNOTSUPP);
2260 }
2261
2262 static int
2263 lfs_setextattr(void *v)
2264 {
2265 struct vop_setextattr_args /* {
2266 struct vnode *a_vp;
2267 int a_attrnamespace;
2268 const char *a_name;
2269 struct uio *a_uio;
2270 kauth_cred_t a_cred;
2271 struct proc *a_p;
2272 } */ *ap = v;
2273 struct vnode *vp = ap->a_vp;
2274 struct inode *ip = VTOI(vp);
2275 struct ulfsmount *ump = ip->i_ump;
2276 //struct lfs *fs = ip->i_lfs;
2277 int error;
2278
2279 if (ump->um_fstype == ULFS1) {
2280 #ifdef LFS_EXTATTR
2281 error = ulfs_setextattr(ap);
2282 #else
2283 error = EOPNOTSUPP;
2284 #endif
2285 return error;
2286 }
2287
2288 /* XXX Not implemented for ULFS2 file systems. */
2289 return (EOPNOTSUPP);
2290 }
2291
2292 static int
2293 lfs_listextattr(void *v)
2294 {
2295 struct vop_listextattr_args /* {
2296 struct vnode *a_vp;
2297 int a_attrnamespace;
2298 struct uio *a_uio;
2299 size_t *a_size;
2300 kauth_cred_t a_cred;
2301 struct proc *a_p;
2302 } */ *ap = v;
2303 struct vnode *vp = ap->a_vp;
2304 struct inode *ip = VTOI(vp);
2305 struct ulfsmount *ump = ip->i_ump;
2306 //struct lfs *fs = ip->i_lfs;
2307 int error;
2308
2309 if (ump->um_fstype == ULFS1) {
2310 #ifdef LFS_EXTATTR
2311 error = ulfs_listextattr(ap);
2312 #else
2313 error = EOPNOTSUPP;
2314 #endif
2315 return error;
2316 }
2317
2318 /* XXX Not implemented for ULFS2 file systems. */
2319 return (EOPNOTSUPP);
2320 }
2321
2322 static int
2323 lfs_deleteextattr(void *v)
2324 {
2325 struct vop_deleteextattr_args /* {
2326 struct vnode *a_vp;
2327 int a_attrnamespace;
2328 kauth_cred_t a_cred;
2329 struct proc *a_p;
2330 } */ *ap = v;
2331 struct vnode *vp = ap->a_vp;
2332 struct inode *ip = VTOI(vp);
2333 struct ulfsmount *ump = ip->i_ump;
2334 //struct fs *fs = ip->i_lfs;
2335 int error;
2336
2337 if (ump->um_fstype == ULFS1) {
2338 #ifdef LFS_EXTATTR
2339 error = ulfs_deleteextattr(ap);
2340 #else
2341 error = EOPNOTSUPP;
2342 #endif
2343 return error;
2344 }
2345
2346 /* XXX Not implemented for ULFS2 file systems. */
2347 return (EOPNOTSUPP);
2348 }
2349
2350