ffs_vfsops.c revision 1.255 1 /* $NetBSD: ffs_vfsops.c,v 1.255 2010/01/31 10:50:23 mlelstv Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1991, 1993, 1994
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.255 2010/01/31 10:50:23 mlelstv Exp $");
65
66 #if defined(_KERNEL_OPT)
67 #include "opt_ffs.h"
68 #include "opt_quota.h"
69 #include "opt_wapbl.h"
70 #endif
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/namei.h>
75 #include <sys/proc.h>
76 #include <sys/kernel.h>
77 #include <sys/vnode.h>
78 #include <sys/socket.h>
79 #include <sys/mount.h>
80 #include <sys/buf.h>
81 #include <sys/device.h>
82 #include <sys/mbuf.h>
83 #include <sys/file.h>
84 #include <sys/disklabel.h>
85 #include <sys/ioctl.h>
86 #include <sys/errno.h>
87 #include <sys/malloc.h>
88 #include <sys/pool.h>
89 #include <sys/lock.h>
90 #include <sys/sysctl.h>
91 #include <sys/conf.h>
92 #include <sys/kauth.h>
93 #include <sys/wapbl.h>
94 #include <sys/fstrans.h>
95 #include <sys/module.h>
96
97 #include <miscfs/genfs/genfs.h>
98 #include <miscfs/specfs/specdev.h>
99
100 #include <ufs/ufs/quota.h>
101 #include <ufs/ufs/ufsmount.h>
102 #include <ufs/ufs/inode.h>
103 #include <ufs/ufs/dir.h>
104 #include <ufs/ufs/ufs_extern.h>
105 #include <ufs/ufs/ufs_bswap.h>
106 #include <ufs/ufs/ufs_wapbl.h>
107
108 #include <ufs/ffs/fs.h>
109 #include <ufs/ffs/ffs_extern.h>
110
111 MODULE(MODULE_CLASS_VFS, ffs, NULL);
112
113 static int ffs_vfs_fsync(vnode_t *, int);
114
115 static struct sysctllog *ffs_sysctl_log;
116
117 /* how many times ffs_init() was called */
118 int ffs_initcount = 0;
119
120 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
121 extern const struct vnodeopv_desc ffs_specop_opv_desc;
122 extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
123
124 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
125 &ffs_vnodeop_opv_desc,
126 &ffs_specop_opv_desc,
127 &ffs_fifoop_opv_desc,
128 NULL,
129 };
130
131 struct vfsops ffs_vfsops = {
132 MOUNT_FFS,
133 sizeof (struct ufs_args),
134 ffs_mount,
135 ufs_start,
136 ffs_unmount,
137 ufs_root,
138 ufs_quotactl,
139 ffs_statvfs,
140 ffs_sync,
141 ffs_vget,
142 ffs_fhtovp,
143 ffs_vptofh,
144 ffs_init,
145 ffs_reinit,
146 ffs_done,
147 ffs_mountroot,
148 ffs_snapshot,
149 ffs_extattrctl,
150 ffs_suspendctl,
151 genfs_renamelock_enter,
152 genfs_renamelock_exit,
153 ffs_vfs_fsync,
154 ffs_vnodeopv_descs,
155 0,
156 { NULL, NULL },
157 };
158
159 static const struct genfs_ops ffs_genfsops = {
160 .gop_size = ffs_gop_size,
161 .gop_alloc = ufs_gop_alloc,
162 .gop_write = genfs_gop_write,
163 .gop_markupdate = ufs_gop_markupdate,
164 };
165
166 static const struct ufs_ops ffs_ufsops = {
167 .uo_itimes = ffs_itimes,
168 .uo_update = ffs_update,
169 .uo_truncate = ffs_truncate,
170 .uo_valloc = ffs_valloc,
171 .uo_vfree = ffs_vfree,
172 .uo_balloc = ffs_balloc,
173 .uo_unmark_vnode = (void (*)(vnode_t *))nullop,
174 };
175
176 static int
177 ffs_modcmd(modcmd_t cmd, void *arg)
178 {
179 int error;
180
181 #if 0
182 extern int doasyncfree;
183 #endif
184 extern int ffs_log_changeopt;
185
186 switch (cmd) {
187 case MODULE_CMD_INIT:
188 error = vfs_attach(&ffs_vfsops);
189 if (error != 0)
190 break;
191
192 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
193 CTLFLAG_PERMANENT,
194 CTLTYPE_NODE, "vfs", NULL,
195 NULL, 0, NULL, 0,
196 CTL_VFS, CTL_EOL);
197 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
198 CTLFLAG_PERMANENT,
199 CTLTYPE_NODE, "ffs",
200 SYSCTL_DESCR("Berkeley Fast File System"),
201 NULL, 0, NULL, 0,
202 CTL_VFS, 1, CTL_EOL);
203
204 /*
205 * @@@ should we even bother with these first three?
206 */
207 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
208 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
209 CTLTYPE_INT, "doclusterread", NULL,
210 sysctl_notavail, 0, NULL, 0,
211 CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
212 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
213 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
214 CTLTYPE_INT, "doclusterwrite", NULL,
215 sysctl_notavail, 0, NULL, 0,
216 CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
217 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
218 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
219 CTLTYPE_INT, "doreallocblks", NULL,
220 sysctl_notavail, 0, NULL, 0,
221 CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
222 #if 0
223 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
224 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
225 CTLTYPE_INT, "doasyncfree",
226 SYSCTL_DESCR("Release dirty blocks asynchronously"),
227 NULL, 0, &doasyncfree, 0,
228 CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
229 #endif
230 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
231 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
232 CTLTYPE_INT, "log_changeopt",
233 SYSCTL_DESCR("Log changes in optimization strategy"),
234 NULL, 0, &ffs_log_changeopt, 0,
235 CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
236 break;
237 case MODULE_CMD_FINI:
238 error = vfs_detach(&ffs_vfsops);
239 if (error != 0)
240 break;
241 sysctl_teardown(&ffs_sysctl_log);
242 break;
243 default:
244 error = ENOTTY;
245 break;
246 }
247
248 return (error);
249 }
250
251 pool_cache_t ffs_inode_cache;
252 pool_cache_t ffs_dinode1_cache;
253 pool_cache_t ffs_dinode2_cache;
254
255 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
256 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
257
258 /*
259 * Called by main() when ffs is going to be mounted as root.
260 */
261
262 int
263 ffs_mountroot(void)
264 {
265 struct fs *fs;
266 struct mount *mp;
267 struct lwp *l = curlwp; /* XXX */
268 struct ufsmount *ump;
269 int error;
270
271 if (device_class(root_device) != DV_DISK)
272 return (ENODEV);
273
274 if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
275 vrele(rootvp);
276 return (error);
277 }
278
279 /*
280 * We always need to be able to mount the root file system.
281 */
282 mp->mnt_flag |= MNT_FORCE;
283 if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
284 vfs_unbusy(mp, false, NULL);
285 vfs_destroy(mp);
286 return (error);
287 }
288 mp->mnt_flag &= ~MNT_FORCE;
289 mutex_enter(&mountlist_lock);
290 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
291 mutex_exit(&mountlist_lock);
292 ump = VFSTOUFS(mp);
293 fs = ump->um_fs;
294 memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
295 (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
296 (void)ffs_statvfs(mp, &mp->mnt_stat);
297 vfs_unbusy(mp, false, NULL);
298 setrootfstime((time_t)fs->fs_time);
299 return (0);
300 }
301
302 /*
303 * VFS Operations.
304 *
305 * mount system call
306 */
307 int
308 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
309 {
310 struct lwp *l = curlwp;
311 struct vnode *devvp = NULL;
312 struct ufs_args *args = data;
313 struct ufsmount *ump = NULL;
314 struct fs *fs;
315 int error = 0, flags, update;
316 mode_t accessmode;
317
318 if (*data_len < sizeof *args)
319 return EINVAL;
320
321 if (mp->mnt_flag & MNT_GETARGS) {
322 ump = VFSTOUFS(mp);
323 if (ump == NULL)
324 return EIO;
325 args->fspec = NULL;
326 *data_len = sizeof *args;
327 return 0;
328 }
329
330 update = mp->mnt_flag & MNT_UPDATE;
331
332 /* Check arguments */
333 if (args->fspec != NULL) {
334 /*
335 * Look up the name and verify that it's sane.
336 */
337 error = namei_simple_user(args->fspec,
338 NSM_FOLLOW_NOEMULROOT, &devvp);
339 if (error != 0)
340 return (error);
341
342 if (!update) {
343 /*
344 * Be sure this is a valid block device
345 */
346 if (devvp->v_type != VBLK)
347 error = ENOTBLK;
348 else if (bdevsw_lookup(devvp->v_rdev) == NULL)
349 error = ENXIO;
350 } else {
351 /*
352 * Be sure we're still naming the same device
353 * used for our initial mount
354 */
355 ump = VFSTOUFS(mp);
356 if (devvp != ump->um_devvp) {
357 if (devvp->v_rdev != ump->um_devvp->v_rdev)
358 error = EINVAL;
359 else {
360 vrele(devvp);
361 devvp = ump->um_devvp;
362 vref(devvp);
363 }
364 }
365 }
366 } else {
367 if (!update) {
368 /* New mounts must have a filename for the device */
369 return (EINVAL);
370 } else {
371 /* Use the extant mount */
372 ump = VFSTOUFS(mp);
373 devvp = ump->um_devvp;
374 vref(devvp);
375 }
376 }
377
378 /*
379 * If mount by non-root, then verify that user has necessary
380 * permissions on the device.
381 *
382 * Permission to update a mount is checked higher, so here we presume
383 * updating the mount is okay (for example, as far as securelevel goes)
384 * which leaves us with the normal check.
385 */
386 if (error == 0) {
387 accessmode = VREAD;
388 if (update ?
389 (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
390 (mp->mnt_flag & MNT_RDONLY) == 0)
391 accessmode |= VWRITE;
392 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
393 error = genfs_can_mount(devvp, accessmode, l->l_cred);
394 VOP_UNLOCK(devvp, 0);
395 }
396
397 if (error) {
398 vrele(devvp);
399 return (error);
400 }
401
402 #ifdef WAPBL
403 /* WAPBL can only be enabled on a r/w mount. */
404 if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) {
405 mp->mnt_flag &= ~MNT_LOG;
406 }
407 #else /* !WAPBL */
408 mp->mnt_flag &= ~MNT_LOG;
409 #endif /* !WAPBL */
410
411 if (!update) {
412 int xflags;
413
414 if (mp->mnt_flag & MNT_RDONLY)
415 xflags = FREAD;
416 else
417 xflags = FREAD | FWRITE;
418 error = VOP_OPEN(devvp, xflags, FSCRED);
419 if (error)
420 goto fail;
421 error = ffs_mountfs(devvp, mp, l);
422 if (error) {
423 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
424 (void)VOP_CLOSE(devvp, xflags, NOCRED);
425 VOP_UNLOCK(devvp, 0);
426 goto fail;
427 }
428
429 ump = VFSTOUFS(mp);
430 fs = ump->um_fs;
431 } else {
432 /*
433 * Update the mount.
434 */
435
436 /*
437 * The initial mount got a reference on this
438 * device, so drop the one obtained via
439 * namei(), above.
440 */
441 vrele(devvp);
442
443 ump = VFSTOUFS(mp);
444 fs = ump->um_fs;
445 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
446 /*
447 * Changing from r/w to r/o
448 */
449 flags = WRITECLOSE;
450 if (mp->mnt_flag & MNT_FORCE)
451 flags |= FORCECLOSE;
452 error = ffs_flushfiles(mp, flags, l);
453 if (error == 0)
454 error = UFS_WAPBL_BEGIN(mp);
455 if (error == 0 &&
456 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
457 fs->fs_clean & FS_WASCLEAN) {
458 if (mp->mnt_flag & MNT_SOFTDEP)
459 fs->fs_flags &= ~FS_DOSOFTDEP;
460 fs->fs_clean = FS_ISCLEAN;
461 (void) ffs_sbupdate(ump, MNT_WAIT);
462 }
463 if (error == 0)
464 UFS_WAPBL_END(mp);
465 if (error)
466 return (error);
467 }
468
469 #ifdef WAPBL
470 if ((mp->mnt_flag & MNT_LOG) == 0) {
471 error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE);
472 if (error)
473 return error;
474 }
475 #endif /* WAPBL */
476
477 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
478 /*
479 * Finish change from r/w to r/o
480 */
481 fs->fs_ronly = 1;
482 fs->fs_fmod = 0;
483 }
484
485 if (mp->mnt_flag & MNT_RELOAD) {
486 error = ffs_reload(mp, l->l_cred, l);
487 if (error)
488 return (error);
489 }
490
491 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
492 /*
493 * Changing from read-only to read/write
494 */
495 fs->fs_ronly = 0;
496 fs->fs_clean <<= 1;
497 fs->fs_fmod = 1;
498 #ifdef WAPBL
499 if (fs->fs_flags & FS_DOWAPBL) {
500 printf("%s: replaying log to disk\n",
501 fs->fs_fsmnt);
502 KDASSERT(mp->mnt_wapbl_replay);
503 error = wapbl_replay_write(mp->mnt_wapbl_replay,
504 devvp);
505 if (error) {
506 return error;
507 }
508 wapbl_replay_stop(mp->mnt_wapbl_replay);
509 fs->fs_clean = FS_WASCLEAN;
510 }
511 #endif /* WAPBL */
512 if (fs->fs_snapinum[0] != 0)
513 ffs_snapshot_mount(mp);
514 }
515
516 #ifdef WAPBL
517 error = ffs_wapbl_start(mp);
518 if (error)
519 return error;
520 #endif /* WAPBL */
521
522 if (args->fspec == NULL)
523 return 0;
524 }
525
526 error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
527 UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
528 if (error == 0)
529 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
530 sizeof(fs->fs_fsmnt));
531 fs->fs_flags &= ~FS_DOSOFTDEP;
532 if (fs->fs_fmod != 0) { /* XXX */
533 int err;
534
535 fs->fs_fmod = 0;
536 if (fs->fs_clean & FS_WASCLEAN)
537 fs->fs_time = time_second;
538 else {
539 printf("%s: file system not clean (fs_clean=%#x); "
540 "please fsck(8)\n", mp->mnt_stat.f_mntfromname,
541 fs->fs_clean);
542 printf("%s: lost blocks %" PRId64 " files %d\n",
543 mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
544 fs->fs_pendinginodes);
545 }
546 err = UFS_WAPBL_BEGIN(mp);
547 if (err == 0) {
548 (void) ffs_cgupdate(ump, MNT_WAIT);
549 UFS_WAPBL_END(mp);
550 }
551 }
552 if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
553 printf("%s: `-o softdep' is no longer supported, "
554 "consider `-o log'\n", mp->mnt_stat.f_mntfromname);
555 mp->mnt_flag &= ~MNT_SOFTDEP;
556 }
557
558 return (error);
559
560 fail:
561 vrele(devvp);
562 return (error);
563 }
564
565 /*
566 * Reload all incore data for a filesystem (used after running fsck on
567 * the root filesystem and finding things to fix). The filesystem must
568 * be mounted read-only.
569 *
570 * Things to do to update the mount:
571 * 1) invalidate all cached meta-data.
572 * 2) re-read superblock from disk.
573 * 3) re-read summary information from disk.
574 * 4) invalidate all inactive vnodes.
575 * 5) invalidate all cached file data.
576 * 6) re-read inode data for all active vnodes.
577 */
578 int
579 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
580 {
581 struct vnode *vp, *mvp, *devvp;
582 struct inode *ip;
583 void *space;
584 struct buf *bp;
585 struct fs *fs, *newfs;
586 struct partinfo dpart;
587 int i, bsize, blks, error;
588 uint64_t numsecs;
589 unsigned secsize;
590 int32_t *lp;
591 struct ufsmount *ump;
592 daddr_t sblockloc;
593
594 if ((mp->mnt_flag & MNT_RDONLY) == 0)
595 return (EINVAL);
596
597 ump = VFSTOUFS(mp);
598 /*
599 * Step 1: invalidate all cached meta-data.
600 */
601 devvp = ump->um_devvp;
602 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
603 error = vinvalbuf(devvp, 0, cred, l, 0, 0);
604 VOP_UNLOCK(devvp, 0);
605 if (error)
606 panic("ffs_reload: dirty1");
607 /*
608 * Step 2: re-read superblock from disk.
609 */
610 fs = ump->um_fs;
611 error = getdisksize(devvp, &numsecs, &secsize);
612 if (error)
613 secsize = DEV_BSIZE;
614 /* XXX we don't handle possibility that superblock moved. */
615 error = bread(devvp, fs->fs_sblockloc / secsize, fs->fs_sbsize,
616 NOCRED, 0, &bp);
617 if (error) {
618 brelse(bp, 0);
619 return (error);
620 }
621 newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK);
622 memcpy(newfs, bp->b_data, fs->fs_sbsize);
623 #ifdef FFS_EI
624 if (ump->um_flags & UFS_NEEDSWAP) {
625 ffs_sb_swap((struct fs*)bp->b_data, newfs);
626 fs->fs_flags |= FS_SWAPPED;
627 } else
628 #endif
629 fs->fs_flags &= ~FS_SWAPPED;
630 if ((newfs->fs_magic != FS_UFS1_MAGIC &&
631 newfs->fs_magic != FS_UFS2_MAGIC)||
632 newfs->fs_bsize > MAXBSIZE ||
633 newfs->fs_bsize < sizeof(struct fs)) {
634 brelse(bp, 0);
635 free(newfs, M_UFSMNT);
636 return (EIO); /* XXX needs translation */
637 }
638 /* Store off old fs_sblockloc for fs_oldfscompat_read. */
639 sblockloc = fs->fs_sblockloc;
640 /*
641 * Copy pointer fields back into superblock before copying in XXX
642 * new superblock. These should really be in the ufsmount. XXX
643 * Note that important parameters (eg fs_ncg) are unchanged.
644 */
645 newfs->fs_csp = fs->fs_csp;
646 newfs->fs_maxcluster = fs->fs_maxcluster;
647 newfs->fs_contigdirs = fs->fs_contigdirs;
648 newfs->fs_ronly = fs->fs_ronly;
649 newfs->fs_active = fs->fs_active;
650 memcpy(fs, newfs, (u_int)fs->fs_sbsize);
651 brelse(bp, 0);
652 free(newfs, M_UFSMNT);
653
654 /* Recheck for apple UFS filesystem */
655 ump->um_flags &= ~UFS_ISAPPLEUFS;
656 /* First check to see if this is tagged as an Apple UFS filesystem
657 * in the disklabel
658 */
659 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
660 (dpart.part->p_fstype == FS_APPLEUFS)) {
661 ump->um_flags |= UFS_ISAPPLEUFS;
662 }
663 #ifdef APPLE_UFS
664 else {
665 /* Manually look for an apple ufs label, and if a valid one
666 * is found, then treat it like an Apple UFS filesystem anyway
667 */
668 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / secsize),
669 APPLEUFS_LABEL_SIZE, cred, 0, &bp);
670 if (error) {
671 brelse(bp, 0);
672 return (error);
673 }
674 error = ffs_appleufs_validate(fs->fs_fsmnt,
675 (struct appleufslabel *)bp->b_data, NULL);
676 if (error == 0)
677 ump->um_flags |= UFS_ISAPPLEUFS;
678 brelse(bp, 0);
679 bp = NULL;
680 }
681 #else
682 if (ump->um_flags & UFS_ISAPPLEUFS)
683 return (EIO);
684 #endif
685
686 if (UFS_MPISAPPLEUFS(ump)) {
687 /* see comment about NeXT below */
688 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
689 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
690 mp->mnt_iflag |= IMNT_DTYPE;
691 } else {
692 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
693 ump->um_dirblksiz = DIRBLKSIZ;
694 if (ump->um_maxsymlinklen > 0)
695 mp->mnt_iflag |= IMNT_DTYPE;
696 else
697 mp->mnt_iflag &= ~IMNT_DTYPE;
698 }
699 ffs_oldfscompat_read(fs, ump, sblockloc);
700
701 mutex_enter(&ump->um_lock);
702 ump->um_maxfilesize = fs->fs_maxfilesize;
703 if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
704 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
705 mp->mnt_stat.f_mntonname, fs->fs_flags,
706 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
707 if ((mp->mnt_flag & MNT_FORCE) == 0) {
708 mutex_exit(&ump->um_lock);
709 return (EINVAL);
710 }
711 }
712 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
713 fs->fs_pendingblocks = 0;
714 fs->fs_pendinginodes = 0;
715 }
716 mutex_exit(&ump->um_lock);
717
718 ffs_statvfs(mp, &mp->mnt_stat);
719 /*
720 * Step 3: re-read summary information from disk.
721 */
722 blks = howmany(fs->fs_cssize, fs->fs_fsize);
723 space = fs->fs_csp;
724 for (i = 0; i < blks; i += fs->fs_frag) {
725 bsize = fs->fs_bsize;
726 if (i + fs->fs_frag > blks)
727 bsize = (blks - i) * fs->fs_fsize;
728 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), bsize,
729 NOCRED, 0, &bp);
730 if (error) {
731 brelse(bp, 0);
732 return (error);
733 }
734 #ifdef FFS_EI
735 if (UFS_FSNEEDSWAP(fs))
736 ffs_csum_swap((struct csum *)bp->b_data,
737 (struct csum *)space, bsize);
738 else
739 #endif
740 memcpy(space, bp->b_data, (size_t)bsize);
741 space = (char *)space + bsize;
742 brelse(bp, 0);
743 }
744 if (fs->fs_snapinum[0] != 0)
745 ffs_snapshot_mount(mp);
746 /*
747 * We no longer know anything about clusters per cylinder group.
748 */
749 if (fs->fs_contigsumsize > 0) {
750 lp = fs->fs_maxcluster;
751 for (i = 0; i < fs->fs_ncg; i++)
752 *lp++ = fs->fs_contigsumsize;
753 }
754
755 /* Allocate a marker vnode. */
756 if ((mvp = vnalloc(mp)) == NULL)
757 return ENOMEM;
758 /*
759 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
760 * and vclean() can be called indirectly
761 */
762 mutex_enter(&mntvnode_lock);
763 loop:
764 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
765 vmark(mvp, vp);
766 if (vp->v_mount != mp || vismarker(vp))
767 continue;
768 /*
769 * Step 4: invalidate all inactive vnodes.
770 */
771 if (vrecycle(vp, &mntvnode_lock, l)) {
772 mutex_enter(&mntvnode_lock);
773 (void)vunmark(mvp);
774 goto loop;
775 }
776 /*
777 * Step 5: invalidate all cached file data.
778 */
779 mutex_enter(&vp->v_interlock);
780 mutex_exit(&mntvnode_lock);
781 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
782 (void)vunmark(mvp);
783 goto loop;
784 }
785 if (vinvalbuf(vp, 0, cred, l, 0, 0))
786 panic("ffs_reload: dirty2");
787 /*
788 * Step 6: re-read inode data for all active vnodes.
789 */
790 ip = VTOI(vp);
791 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
792 (int)fs->fs_bsize, NOCRED, 0, &bp);
793 if (error) {
794 brelse(bp, 0);
795 vput(vp);
796 (void)vunmark(mvp);
797 break;
798 }
799 ffs_load_inode(bp, ip, fs, ip->i_number);
800 brelse(bp, 0);
801 vput(vp);
802 mutex_enter(&mntvnode_lock);
803 }
804 mutex_exit(&mntvnode_lock);
805 vnfree(mvp);
806 return (error);
807 }
808
809 /*
810 * Possible superblock locations ordered from most to least likely.
811 */
812 static const int sblock_try[] = SBLOCKSEARCH;
813
814 /*
815 * Common code for mount and mountroot
816 */
817 int
818 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
819 {
820 struct ufsmount *ump;
821 struct buf *bp;
822 struct fs *fs;
823 dev_t dev;
824 struct partinfo dpart;
825 void *space;
826 daddr_t sblockloc, fsblockloc;
827 int blks, fstype;
828 int error, i, bsize, ronly, bset = 0;
829 uint64_t numsecs;
830 unsigned secsize;
831 #ifdef FFS_EI
832 int needswap = 0; /* keep gcc happy */
833 #endif
834 int32_t *lp;
835 kauth_cred_t cred;
836 u_int32_t sbsize = 8192; /* keep gcc happy*/
837
838 dev = devvp->v_rdev;
839 cred = l ? l->l_cred : NOCRED;
840
841 /* Flush out any old buffers remaining from a previous use. */
842 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
843 error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
844 VOP_UNLOCK(devvp, 0);
845 if (error)
846 return (error);
847
848 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
849 error = getdisksize(devvp, &numsecs, &secsize);
850 if (error)
851 return (error);
852
853 bp = NULL;
854 ump = NULL;
855 fs = NULL;
856 sblockloc = 0;
857 fstype = 0;
858
859 error = fstrans_mount(mp);
860 if (error)
861 return error;
862
863 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
864 memset(ump, 0, sizeof *ump);
865 mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
866 error = ffs_snapshot_init(ump);
867 if (error)
868 goto out;
869 ump->um_ops = &ffs_ufsops;
870
871 #ifdef WAPBL
872 sbagain:
873 #endif
874 /*
875 * Try reading the superblock in each of its possible locations.
876 */
877 for (i = 0; ; i++) {
878 if (bp != NULL) {
879 brelse(bp, BC_NOCACHE);
880 bp = NULL;
881 }
882 if (sblock_try[i] == -1) {
883 error = EINVAL;
884 fs = NULL;
885 goto out;
886 }
887 error = bread(devvp, sblock_try[i] / secsize, SBLOCKSIZE, cred,
888 0, &bp);
889 if (error) {
890 fs = NULL;
891 goto out;
892 }
893 fs = (struct fs*)bp->b_data;
894 fsblockloc = sblockloc = sblock_try[i];
895 if (fs->fs_magic == FS_UFS1_MAGIC) {
896 sbsize = fs->fs_sbsize;
897 fstype = UFS1;
898 #ifdef FFS_EI
899 needswap = 0;
900 } else if (fs->fs_magic == bswap32(FS_UFS1_MAGIC)) {
901 sbsize = bswap32(fs->fs_sbsize);
902 fstype = UFS1;
903 needswap = 1;
904 #endif
905 } else if (fs->fs_magic == FS_UFS2_MAGIC) {
906 sbsize = fs->fs_sbsize;
907 fstype = UFS2;
908 #ifdef FFS_EI
909 needswap = 0;
910 } else if (fs->fs_magic == bswap32(FS_UFS2_MAGIC)) {
911 sbsize = bswap32(fs->fs_sbsize);
912 fstype = UFS2;
913 needswap = 1;
914 #endif
915 } else
916 continue;
917
918
919 /* fs->fs_sblockloc isn't defined for old filesystems */
920 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
921 if (sblockloc == SBLOCK_UFS2)
922 /*
923 * This is likely to be the first alternate
924 * in a filesystem with 64k blocks.
925 * Don't use it.
926 */
927 continue;
928 fsblockloc = sblockloc;
929 } else {
930 fsblockloc = fs->fs_sblockloc;
931 #ifdef FFS_EI
932 if (needswap)
933 fsblockloc = bswap64(fsblockloc);
934 #endif
935 }
936
937 /* Check we haven't found an alternate superblock */
938 if (fsblockloc != sblockloc)
939 continue;
940
941 /* Validate size of superblock */
942 if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs))
943 continue;
944
945 /* Ok seems to be a good superblock */
946 break;
947 }
948
949 fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
950 memcpy(fs, bp->b_data, sbsize);
951 ump->um_fs = fs;
952
953 #ifdef FFS_EI
954 if (needswap) {
955 ffs_sb_swap((struct fs*)bp->b_data, fs);
956 fs->fs_flags |= FS_SWAPPED;
957 } else
958 #endif
959 fs->fs_flags &= ~FS_SWAPPED;
960
961 #ifdef WAPBL
962 if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) {
963 error = ffs_wapbl_replay_start(mp, fs, devvp);
964 if (error && (mp->mnt_flag & MNT_FORCE) == 0)
965 goto out;
966 if (!error) {
967 if (!ronly) {
968 /* XXX fsmnt may be stale. */
969 printf("%s: replaying log to disk\n",
970 fs->fs_fsmnt);
971 error = wapbl_replay_write(mp->mnt_wapbl_replay,
972 devvp);
973 if (error)
974 goto out;
975 wapbl_replay_stop(mp->mnt_wapbl_replay);
976 fs->fs_clean = FS_WASCLEAN;
977 } else {
978 /* XXX fsmnt may be stale */
979 printf("%s: replaying log to memory\n",
980 fs->fs_fsmnt);
981 }
982
983 /* Force a re-read of the superblock */
984 brelse(bp, BC_INVAL);
985 bp = NULL;
986 free(fs, M_UFSMNT);
987 fs = NULL;
988 goto sbagain;
989 }
990 }
991 #else /* !WAPBL */
992 if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) {
993 error = EPERM;
994 goto out;
995 }
996 #endif /* !WAPBL */
997
998 ffs_oldfscompat_read(fs, ump, sblockloc);
999 ump->um_maxfilesize = fs->fs_maxfilesize;
1000
1001 if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
1002 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
1003 mp->mnt_stat.f_mntonname, fs->fs_flags,
1004 (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1005 if ((mp->mnt_flag & MNT_FORCE) == 0) {
1006 error = EINVAL;
1007 goto out;
1008 }
1009 }
1010
1011 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1012 fs->fs_pendingblocks = 0;
1013 fs->fs_pendinginodes = 0;
1014 }
1015
1016 ump->um_fstype = fstype;
1017 if (fs->fs_sbsize < SBLOCKSIZE)
1018 brelse(bp, BC_INVAL);
1019 else
1020 brelse(bp, 0);
1021 bp = NULL;
1022
1023 /* First check to see if this is tagged as an Apple UFS filesystem
1024 * in the disklabel
1025 */
1026 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
1027 (dpart.part->p_fstype == FS_APPLEUFS)) {
1028 ump->um_flags |= UFS_ISAPPLEUFS;
1029 }
1030 #ifdef APPLE_UFS
1031 else {
1032 /* Manually look for an apple ufs label, and if a valid one
1033 * is found, then treat it like an Apple UFS filesystem anyway
1034 */
1035 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size),
1036 APPLEUFS_LABEL_SIZE, cred, 0, &bp);
1037 if (error)
1038 goto out;
1039 error = ffs_appleufs_validate(fs->fs_fsmnt,
1040 (struct appleufslabel *)bp->b_data, NULL);
1041 if (error == 0) {
1042 ump->um_flags |= UFS_ISAPPLEUFS;
1043 }
1044 brelse(bp, 0);
1045 bp = NULL;
1046 }
1047 #else
1048 if (ump->um_flags & UFS_ISAPPLEUFS) {
1049 error = EINVAL;
1050 goto out;
1051 }
1052 #endif
1053
1054 #if 0
1055 /*
1056 * XXX This code changes the behaviour of mounting dirty filesystems, to
1057 * XXX require "mount -f ..." to mount them. This doesn't match what
1058 * XXX mount(8) describes and is disabled for now.
1059 */
1060 /*
1061 * If the file system is not clean, don't allow it to be mounted
1062 * unless MNT_FORCE is specified. (Note: MNT_FORCE is always set
1063 * for the root file system.)
1064 */
1065 if (fs->fs_flags & FS_DOWAPBL) {
1066 /*
1067 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
1068 * bit is set, although there's a window in unmount where it
1069 * could be FS_ISCLEAN
1070 */
1071 if ((mp->mnt_flag & MNT_FORCE) == 0 &&
1072 (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) {
1073 error = EPERM;
1074 goto out;
1075 }
1076 } else
1077 if ((fs->fs_clean & FS_ISCLEAN) == 0 &&
1078 (mp->mnt_flag & MNT_FORCE) == 0) {
1079 error = EPERM;
1080 goto out;
1081 }
1082 #endif
1083
1084 /*
1085 * verify that we can access the last block in the fs
1086 * if we're mounting read/write.
1087 */
1088
1089 if (!ronly) {
1090 error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), fs->fs_fsize,
1091 cred, 0, &bp);
1092 if (bp->b_bcount != fs->fs_fsize)
1093 error = EINVAL;
1094 if (error) {
1095 bset = BC_INVAL;
1096 goto out;
1097 }
1098 brelse(bp, BC_INVAL);
1099 bp = NULL;
1100 }
1101
1102 fs->fs_ronly = ronly;
1103 /* Don't bump fs_clean if we're replaying journal */
1104 if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN)))
1105 if (ronly == 0) {
1106 fs->fs_clean <<= 1;
1107 fs->fs_fmod = 1;
1108 }
1109 bsize = fs->fs_cssize;
1110 blks = howmany(bsize, fs->fs_fsize);
1111 if (fs->fs_contigsumsize > 0)
1112 bsize += fs->fs_ncg * sizeof(int32_t);
1113 bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1114 space = malloc((u_long)bsize, M_UFSMNT, M_WAITOK);
1115 fs->fs_csp = space;
1116 for (i = 0; i < blks; i += fs->fs_frag) {
1117 bsize = fs->fs_bsize;
1118 if (i + fs->fs_frag > blks)
1119 bsize = (blks - i) * fs->fs_fsize;
1120 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), bsize,
1121 cred, 0, &bp);
1122 if (error) {
1123 free(fs->fs_csp, M_UFSMNT);
1124 goto out;
1125 }
1126 #ifdef FFS_EI
1127 if (needswap)
1128 ffs_csum_swap((struct csum *)bp->b_data,
1129 (struct csum *)space, bsize);
1130 else
1131 #endif
1132 memcpy(space, bp->b_data, (u_int)bsize);
1133
1134 space = (char *)space + bsize;
1135 brelse(bp, 0);
1136 bp = NULL;
1137 }
1138 if (fs->fs_contigsumsize > 0) {
1139 fs->fs_maxcluster = lp = space;
1140 for (i = 0; i < fs->fs_ncg; i++)
1141 *lp++ = fs->fs_contigsumsize;
1142 space = lp;
1143 }
1144 bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1145 fs->fs_contigdirs = space;
1146 space = (char *)space + bsize;
1147 memset(fs->fs_contigdirs, 0, bsize);
1148 /* Compatibility for old filesystems - XXX */
1149 if (fs->fs_avgfilesize <= 0)
1150 fs->fs_avgfilesize = AVFILESIZ;
1151 if (fs->fs_avgfpdir <= 0)
1152 fs->fs_avgfpdir = AFPDIR;
1153 fs->fs_active = NULL;
1154 mp->mnt_data = ump;
1155 mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
1156 mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
1157 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1158 mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
1159 if (UFS_MPISAPPLEUFS(ump)) {
1160 /* NeXT used to keep short symlinks in the inode even
1161 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen
1162 * is probably -1, but we still need to be able to identify
1163 * short symlinks.
1164 */
1165 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
1166 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
1167 mp->mnt_iflag |= IMNT_DTYPE;
1168 } else {
1169 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
1170 ump->um_dirblksiz = DIRBLKSIZ;
1171 if (ump->um_maxsymlinklen > 0)
1172 mp->mnt_iflag |= IMNT_DTYPE;
1173 else
1174 mp->mnt_iflag &= ~IMNT_DTYPE;
1175 }
1176 mp->mnt_fs_bshift = fs->fs_bshift;
1177 mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */
1178 mp->mnt_flag |= MNT_LOCAL;
1179 mp->mnt_iflag |= IMNT_MPSAFE;
1180 #ifdef FFS_EI
1181 if (needswap)
1182 ump->um_flags |= UFS_NEEDSWAP;
1183 #endif
1184 ump->um_mountp = mp;
1185 ump->um_dev = dev;
1186 ump->um_devvp = devvp;
1187 ump->um_nindir = fs->fs_nindir;
1188 ump->um_lognindir = ffs(fs->fs_nindir) - 1;
1189 ump->um_bptrtodb = fs->fs_fsbtodb;
1190 ump->um_seqinc = fs->fs_frag;
1191 for (i = 0; i < MAXQUOTAS; i++)
1192 ump->um_quotas[i] = NULLVP;
1193 devvp->v_specmountpoint = mp;
1194 if (ronly == 0 && fs->fs_snapinum[0] != 0)
1195 ffs_snapshot_mount(mp);
1196
1197 #ifdef WAPBL
1198 if (!ronly) {
1199 KDASSERT(fs->fs_ronly == 0);
1200 /*
1201 * ffs_wapbl_start() needs mp->mnt_stat initialised if it
1202 * needs to create a new log file in-filesystem.
1203 */
1204 ffs_statvfs(mp, &mp->mnt_stat);
1205
1206 error = ffs_wapbl_start(mp);
1207 if (error) {
1208 free(fs->fs_csp, M_UFSMNT);
1209 goto out;
1210 }
1211 }
1212 #endif /* WAPBL */
1213 #ifdef UFS_EXTATTR
1214 /*
1215 * Initialize file-backed extended attributes on UFS1 file
1216 * systems.
1217 */
1218 if (ump->um_fstype == UFS1) {
1219 ufs_extattr_uepm_init(&ump->um_extattr);
1220 #ifdef UFS_EXTATTR_AUTOSTART
1221 /*
1222 * XXX Just ignore errors. Not clear that we should
1223 * XXX fail the mount in this case.
1224 */
1225 (void) ufs_extattr_autostart(mp, l);
1226 #endif
1227 }
1228 #endif /* UFS_EXTATTR */
1229 return (0);
1230 out:
1231 #ifdef WAPBL
1232 if (mp->mnt_wapbl_replay) {
1233 wapbl_replay_stop(mp->mnt_wapbl_replay);
1234 wapbl_replay_free(mp->mnt_wapbl_replay);
1235 mp->mnt_wapbl_replay = 0;
1236 }
1237 #endif
1238
1239 fstrans_unmount(mp);
1240 if (fs)
1241 free(fs, M_UFSMNT);
1242 devvp->v_specmountpoint = NULL;
1243 if (bp)
1244 brelse(bp, bset);
1245 if (ump) {
1246 if (ump->um_oldfscompat)
1247 free(ump->um_oldfscompat, M_UFSMNT);
1248 mutex_destroy(&ump->um_lock);
1249 free(ump, M_UFSMNT);
1250 mp->mnt_data = NULL;
1251 }
1252 return (error);
1253 }
1254
1255 /*
1256 * Sanity checks for loading old filesystem superblocks.
1257 * See ffs_oldfscompat_write below for unwound actions.
1258 *
1259 * XXX - Parts get retired eventually.
1260 * Unfortunately new bits get added.
1261 */
1262 static void
1263 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1264 {
1265 off_t maxfilesize;
1266 int32_t *extrasave;
1267
1268 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1269 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1270 return;
1271
1272 if (!ump->um_oldfscompat)
1273 ump->um_oldfscompat = malloc(512 + 3*sizeof(int32_t),
1274 M_UFSMNT, M_WAITOK);
1275
1276 memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1277 extrasave = ump->um_oldfscompat;
1278 extrasave += 512/sizeof(int32_t);
1279 extrasave[0] = fs->fs_old_npsect;
1280 extrasave[1] = fs->fs_old_interleave;
1281 extrasave[2] = fs->fs_old_trackskew;
1282
1283 /* These fields will be overwritten by their
1284 * original values in fs_oldfscompat_write, so it is harmless
1285 * to modify them here.
1286 */
1287 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1288 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1289 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1290 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1291
1292 fs->fs_maxbsize = fs->fs_bsize;
1293 fs->fs_time = fs->fs_old_time;
1294 fs->fs_size = fs->fs_old_size;
1295 fs->fs_dsize = fs->fs_old_dsize;
1296 fs->fs_csaddr = fs->fs_old_csaddr;
1297 fs->fs_sblockloc = sblockloc;
1298
1299 fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1300
1301 if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1302 fs->fs_old_nrpos = 8;
1303 fs->fs_old_npsect = fs->fs_old_nsect;
1304 fs->fs_old_interleave = 1;
1305 fs->fs_old_trackskew = 0;
1306 }
1307
1308 if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1309 fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
1310 fs->fs_qbmask = ~fs->fs_bmask;
1311 fs->fs_qfmask = ~fs->fs_fmask;
1312 }
1313
1314 maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1315 if (fs->fs_maxfilesize > maxfilesize)
1316 fs->fs_maxfilesize = maxfilesize;
1317
1318 /* Compatibility for old filesystems */
1319 if (fs->fs_avgfilesize <= 0)
1320 fs->fs_avgfilesize = AVFILESIZ;
1321 if (fs->fs_avgfpdir <= 0)
1322 fs->fs_avgfpdir = AFPDIR;
1323
1324 #if 0
1325 if (bigcgs) {
1326 fs->fs_save_cgsize = fs->fs_cgsize;
1327 fs->fs_cgsize = fs->fs_bsize;
1328 }
1329 #endif
1330 }
1331
1332 /*
1333 * Unwinding superblock updates for old filesystems.
1334 * See ffs_oldfscompat_read above for details.
1335 *
1336 * XXX - Parts get retired eventually.
1337 * Unfortunately new bits get added.
1338 */
1339 static void
1340 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1341 {
1342 int32_t *extrasave;
1343
1344 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1345 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1346 return;
1347
1348 fs->fs_old_time = fs->fs_time;
1349 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1350 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1351 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1352 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1353 fs->fs_old_flags = fs->fs_flags;
1354
1355 #if 0
1356 if (bigcgs) {
1357 fs->fs_cgsize = fs->fs_save_cgsize;
1358 }
1359 #endif
1360
1361 memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1362 extrasave = ump->um_oldfscompat;
1363 extrasave += 512/sizeof(int32_t);
1364 fs->fs_old_npsect = extrasave[0];
1365 fs->fs_old_interleave = extrasave[1];
1366 fs->fs_old_trackskew = extrasave[2];
1367
1368 }
1369
1370 /*
1371 * unmount vfs operation
1372 */
1373 int
1374 ffs_unmount(struct mount *mp, int mntflags)
1375 {
1376 struct lwp *l = curlwp;
1377 struct ufsmount *ump = VFSTOUFS(mp);
1378 struct fs *fs = ump->um_fs;
1379 int error, flags;
1380 #ifdef WAPBL
1381 extern int doforce;
1382 #endif
1383
1384 flags = 0;
1385 if (mntflags & MNT_FORCE)
1386 flags |= FORCECLOSE;
1387 if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1388 return (error);
1389 error = UFS_WAPBL_BEGIN(mp);
1390 if (error == 0)
1391 if (fs->fs_ronly == 0 &&
1392 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1393 fs->fs_clean & FS_WASCLEAN) {
1394 fs->fs_clean = FS_ISCLEAN;
1395 fs->fs_fmod = 0;
1396 (void) ffs_sbupdate(ump, MNT_WAIT);
1397 }
1398 if (error == 0)
1399 UFS_WAPBL_END(mp);
1400 #ifdef WAPBL
1401 KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl));
1402 if (mp->mnt_wapbl_replay) {
1403 KDASSERT(fs->fs_ronly);
1404 wapbl_replay_stop(mp->mnt_wapbl_replay);
1405 wapbl_replay_free(mp->mnt_wapbl_replay);
1406 mp->mnt_wapbl_replay = 0;
1407 }
1408 error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE));
1409 if (error) {
1410 return error;
1411 }
1412 #endif /* WAPBL */
1413 #ifdef UFS_EXTATTR
1414 if (ump->um_fstype == UFS1) {
1415 ufs_extattr_stop(mp, l);
1416 ufs_extattr_uepm_destroy(&ump->um_extattr);
1417 }
1418 #endif /* UFS_EXTATTR */
1419
1420 if (ump->um_devvp->v_type != VBAD)
1421 ump->um_devvp->v_specmountpoint = NULL;
1422 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1423 (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
1424 NOCRED);
1425 vput(ump->um_devvp);
1426 free(fs->fs_csp, M_UFSMNT);
1427 free(fs, M_UFSMNT);
1428 if (ump->um_oldfscompat != NULL)
1429 free(ump->um_oldfscompat, M_UFSMNT);
1430 mutex_destroy(&ump->um_lock);
1431 ffs_snapshot_fini(ump);
1432 free(ump, M_UFSMNT);
1433 mp->mnt_data = NULL;
1434 mp->mnt_flag &= ~MNT_LOCAL;
1435 fstrans_unmount(mp);
1436 return (0);
1437 }
1438
1439 /*
1440 * Flush out all the files in a filesystem.
1441 */
1442 int
1443 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1444 {
1445 extern int doforce;
1446 struct ufsmount *ump;
1447 int error;
1448
1449 if (!doforce)
1450 flags &= ~FORCECLOSE;
1451 ump = VFSTOUFS(mp);
1452 #ifdef QUOTA
1453 if (mp->mnt_flag & MNT_QUOTA) {
1454 int i;
1455 if ((error = vflush(mp, NULLVP, SKIPSYSTEM | flags)) != 0)
1456 return (error);
1457 for (i = 0; i < MAXQUOTAS; i++) {
1458 if (ump->um_quotas[i] == NULLVP)
1459 continue;
1460 quotaoff(l, mp, i);
1461 }
1462 /*
1463 * Here we fall through to vflush again to ensure
1464 * that we have gotten rid of all the system vnodes.
1465 */
1466 }
1467 #endif
1468 if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1469 return (error);
1470 ffs_snapshot_unmount(mp);
1471 /*
1472 * Flush all the files.
1473 */
1474 error = vflush(mp, NULLVP, flags);
1475 if (error)
1476 return (error);
1477 /*
1478 * Flush filesystem metadata.
1479 */
1480 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1481 error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
1482 VOP_UNLOCK(ump->um_devvp, 0);
1483 if (flags & FORCECLOSE) /* XXXDBJ */
1484 error = 0;
1485
1486 #ifdef WAPBL
1487 if (error)
1488 return error;
1489 if (mp->mnt_wapbl) {
1490 error = wapbl_flush(mp->mnt_wapbl, 1);
1491 if (flags & FORCECLOSE)
1492 error = 0;
1493 }
1494 #endif
1495
1496 return (error);
1497 }
1498
1499 /*
1500 * Get file system statistics.
1501 */
1502 int
1503 ffs_statvfs(struct mount *mp, struct statvfs *sbp)
1504 {
1505 struct ufsmount *ump;
1506 struct fs *fs;
1507
1508 ump = VFSTOUFS(mp);
1509 fs = ump->um_fs;
1510 mutex_enter(&ump->um_lock);
1511 sbp->f_bsize = fs->fs_bsize;
1512 sbp->f_frsize = fs->fs_fsize;
1513 sbp->f_iosize = fs->fs_bsize;
1514 sbp->f_blocks = fs->fs_dsize;
1515 sbp->f_bfree = blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1516 fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1517 sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1518 fs->fs_minfree) / (u_int64_t) 100;
1519 if (sbp->f_bfree > sbp->f_bresvd)
1520 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1521 else
1522 sbp->f_bavail = 0;
1523 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
1524 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1525 sbp->f_favail = sbp->f_ffree;
1526 sbp->f_fresvd = 0;
1527 mutex_exit(&ump->um_lock);
1528 copy_statvfs_info(sbp, mp);
1529
1530 return (0);
1531 }
1532
1533 /*
1534 * Go through the disk queues to initiate sandbagged IO;
1535 * go through the inodes to write those that have been modified;
1536 * initiate the writing of the super block if it has been modified.
1537 *
1538 * Note: we are always called with the filesystem marked `MPBUSY'.
1539 */
1540 int
1541 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
1542 {
1543 struct vnode *vp, *mvp, *nvp;
1544 struct inode *ip;
1545 struct ufsmount *ump = VFSTOUFS(mp);
1546 struct fs *fs;
1547 int lk_flags, error, allerror = 0;
1548 bool is_suspending;
1549
1550 fs = ump->um_fs;
1551 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
1552 printf("fs = %s\n", fs->fs_fsmnt);
1553 panic("update: rofs mod");
1554 }
1555
1556 /* Allocate a marker vnode. */
1557 if ((mvp = vnalloc(mp)) == NULL)
1558 return (ENOMEM);
1559
1560 fstrans_start(mp, FSTRANS_SHARED);
1561 is_suspending = (fstrans_getstate(mp) == FSTRANS_SUSPENDING);
1562 /*
1563 * We can't lock vnodes while the file system is suspending because
1564 * threads waiting on fstrans may have locked vnodes.
1565 */
1566 if (is_suspending)
1567 lk_flags = LK_INTERLOCK;
1568 else
1569 lk_flags = LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK;
1570 /*
1571 * Write back each (modified) inode.
1572 */
1573 mutex_enter(&mntvnode_lock);
1574 loop:
1575 /*
1576 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1577 * and vclean() can be called indirectly
1578 */
1579 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1580 nvp = TAILQ_NEXT(vp, v_mntvnodes);
1581 /*
1582 * If the vnode that we are about to sync is no longer
1583 * associated with this mount point, start over.
1584 */
1585 if (vp->v_mount != mp)
1586 goto loop;
1587 /*
1588 * Don't interfere with concurrent scans of this FS.
1589 */
1590 if (vismarker(vp))
1591 continue;
1592 mutex_enter(&vp->v_interlock);
1593 ip = VTOI(vp);
1594
1595 /*
1596 * Skip the vnode/inode if inaccessible.
1597 */
1598 if (ip == NULL || (vp->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0 ||
1599 vp->v_type == VNON) {
1600 mutex_exit(&vp->v_interlock);
1601 continue;
1602 }
1603
1604 /*
1605 * We deliberately update inode times here. This will
1606 * prevent a massive queue of updates accumulating, only
1607 * to be handled by a call to unmount.
1608 *
1609 * XXX It would be better to have the syncer trickle these
1610 * out. Adjustment needed to allow registering vnodes for
1611 * sync when the vnode is clean, but the inode dirty. Or
1612 * have ufs itself trickle out inode updates.
1613 *
1614 * If doing a lazy sync, we don't care about metadata or
1615 * data updates, because they are handled by each vnode's
1616 * synclist entry. In this case we are only interested in
1617 * writing back modified inodes.
1618 */
1619 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE |
1620 IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 &&
1621 (waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) &&
1622 UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) {
1623 mutex_exit(&vp->v_interlock);
1624 continue;
1625 }
1626 if (vp->v_type == VBLK && is_suspending) {
1627 mutex_exit(&vp->v_interlock);
1628 continue;
1629 }
1630 vmark(mvp, vp);
1631 mutex_exit(&mntvnode_lock);
1632 error = vget(vp, lk_flags);
1633 if (error) {
1634 mutex_enter(&mntvnode_lock);
1635 nvp = vunmark(mvp);
1636 if (error == ENOENT) {
1637 goto loop;
1638 }
1639 continue;
1640 }
1641 if (waitfor == MNT_LAZY) {
1642 error = UFS_WAPBL_BEGIN(vp->v_mount);
1643 if (!error) {
1644 error = ffs_update(vp, NULL, NULL,
1645 UPDATE_CLOSE);
1646 UFS_WAPBL_END(vp->v_mount);
1647 }
1648 } else {
1649 error = VOP_FSYNC(vp, cred, FSYNC_NOLOG |
1650 (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0);
1651 }
1652 if (error)
1653 allerror = error;
1654 if (is_suspending)
1655 vrele(vp);
1656 else
1657 vput(vp);
1658 mutex_enter(&mntvnode_lock);
1659 nvp = vunmark(mvp);
1660 }
1661 mutex_exit(&mntvnode_lock);
1662 /*
1663 * Force stale file system control information to be flushed.
1664 */
1665 if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1666 !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1667 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1668 if ((error = VOP_FSYNC(ump->um_devvp, cred,
1669 (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG,
1670 0, 0)) != 0)
1671 allerror = error;
1672 VOP_UNLOCK(ump->um_devvp, 0);
1673 if (allerror == 0 && waitfor == MNT_WAIT && !mp->mnt_wapbl) {
1674 mutex_enter(&mntvnode_lock);
1675 goto loop;
1676 }
1677 }
1678 #ifdef QUOTA
1679 qsync(mp);
1680 #endif
1681 /*
1682 * Write back modified superblock.
1683 */
1684 if (fs->fs_fmod != 0) {
1685 fs->fs_fmod = 0;
1686 fs->fs_time = time_second;
1687 error = UFS_WAPBL_BEGIN(mp);
1688 if (error)
1689 allerror = error;
1690 else {
1691 if ((error = ffs_cgupdate(ump, waitfor)))
1692 allerror = error;
1693 UFS_WAPBL_END(mp);
1694 }
1695 }
1696
1697 #ifdef WAPBL
1698 if (mp->mnt_wapbl) {
1699 error = wapbl_flush(mp->mnt_wapbl, 0);
1700 if (error)
1701 allerror = error;
1702 }
1703 #endif
1704
1705 fstrans_done(mp);
1706 vnfree(mvp);
1707 return (allerror);
1708 }
1709
1710 /*
1711 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1712 * in from disk. If it is in core, wait for the lock bit to clear, then
1713 * return the inode locked. Detection and handling of mount points must be
1714 * done by the calling routine.
1715 */
1716 int
1717 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1718 {
1719 struct fs *fs;
1720 struct inode *ip;
1721 struct ufsmount *ump;
1722 struct buf *bp;
1723 struct vnode *vp;
1724 dev_t dev;
1725 int error;
1726
1727 ump = VFSTOUFS(mp);
1728 dev = ump->um_dev;
1729
1730 retry:
1731 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1732 return (0);
1733
1734 /* Allocate a new vnode/inode. */
1735 if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) {
1736 *vpp = NULL;
1737 return (error);
1738 }
1739 ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
1740
1741 /*
1742 * If someone beat us to it, put back the freshly allocated
1743 * vnode/inode pair and retry.
1744 */
1745 mutex_enter(&ufs_hashlock);
1746 if (ufs_ihashget(dev, ino, 0) != NULL) {
1747 mutex_exit(&ufs_hashlock);
1748 ungetnewvnode(vp);
1749 pool_cache_put(ffs_inode_cache, ip);
1750 goto retry;
1751 }
1752
1753 vp->v_vflag |= VV_LOCKSWORK;
1754
1755 /*
1756 * XXX MFS ends up here, too, to allocate an inode. Should we
1757 * XXX create another pool for MFS inodes?
1758 */
1759
1760 memset(ip, 0, sizeof(struct inode));
1761 vp->v_data = ip;
1762 ip->i_vnode = vp;
1763 ip->i_ump = ump;
1764 ip->i_fs = fs = ump->um_fs;
1765 ip->i_dev = dev;
1766 ip->i_number = ino;
1767 #ifdef QUOTA
1768 ufsquota_init(ip);
1769 #endif
1770
1771 /*
1772 * Initialize genfs node, we might proceed to destroy it in
1773 * error branches.
1774 */
1775 genfs_node_init(vp, &ffs_genfsops);
1776
1777 /*
1778 * Put it onto its hash chain and lock it so that other requests for
1779 * this inode will block if they arrive while we are sleeping waiting
1780 * for old data structures to be purged or for the contents of the
1781 * disk portion of this inode to be read.
1782 */
1783
1784 ufs_ihashins(ip);
1785 mutex_exit(&ufs_hashlock);
1786
1787 /* Read in the disk contents for the inode, copy into the inode. */
1788 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1789 (int)fs->fs_bsize, NOCRED, 0, &bp);
1790 if (error) {
1791
1792 /*
1793 * The inode does not contain anything useful, so it would
1794 * be misleading to leave it on its hash chain. With mode
1795 * still zero, it will be unlinked and returned to the free
1796 * list by vput().
1797 */
1798
1799 vput(vp);
1800 brelse(bp, 0);
1801 *vpp = NULL;
1802 return (error);
1803 }
1804 if (ip->i_ump->um_fstype == UFS1)
1805 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
1806 PR_WAITOK);
1807 else
1808 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
1809 PR_WAITOK);
1810 ffs_load_inode(bp, ip, fs, ino);
1811 brelse(bp, 0);
1812
1813 /*
1814 * Initialize the vnode from the inode, check for aliases.
1815 * Note that the underlying vnode may have changed.
1816 */
1817
1818 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1819
1820 /*
1821 * Finish inode initialization now that aliasing has been resolved.
1822 */
1823
1824 ip->i_devvp = ump->um_devvp;
1825 vref(ip->i_devvp);
1826
1827 /*
1828 * Ensure that uid and gid are correct. This is a temporary
1829 * fix until fsck has been changed to do the update.
1830 */
1831
1832 if (fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */
1833 ip->i_uid = ip->i_ffs1_ouid; /* XXX */
1834 ip->i_gid = ip->i_ffs1_ogid; /* XXX */
1835 } /* XXX */
1836 uvm_vnp_setsize(vp, ip->i_size);
1837 *vpp = vp;
1838 return (0);
1839 }
1840
1841 /*
1842 * File handle to vnode
1843 *
1844 * Have to be really careful about stale file handles:
1845 * - check that the inode number is valid
1846 * - call ffs_vget() to get the locked inode
1847 * - check for an unallocated inode (i_mode == 0)
1848 * - check that the given client host has export rights and return
1849 * those rights via. exflagsp and credanonp
1850 */
1851 int
1852 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1853 {
1854 struct ufid ufh;
1855 struct fs *fs;
1856
1857 if (fhp->fid_len != sizeof(struct ufid))
1858 return EINVAL;
1859
1860 memcpy(&ufh, fhp, sizeof(ufh));
1861 fs = VFSTOUFS(mp)->um_fs;
1862 if (ufh.ufid_ino < ROOTINO ||
1863 ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1864 return (ESTALE);
1865 return (ufs_fhtovp(mp, &ufh, vpp));
1866 }
1867
1868 /*
1869 * Vnode pointer to File handle
1870 */
1871 /* ARGSUSED */
1872 int
1873 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
1874 {
1875 struct inode *ip;
1876 struct ufid ufh;
1877
1878 if (*fh_size < sizeof(struct ufid)) {
1879 *fh_size = sizeof(struct ufid);
1880 return E2BIG;
1881 }
1882 ip = VTOI(vp);
1883 *fh_size = sizeof(struct ufid);
1884 memset(&ufh, 0, sizeof(ufh));
1885 ufh.ufid_len = sizeof(struct ufid);
1886 ufh.ufid_ino = ip->i_number;
1887 ufh.ufid_gen = ip->i_gen;
1888 memcpy(fhp, &ufh, sizeof(ufh));
1889 return (0);
1890 }
1891
1892 void
1893 ffs_init(void)
1894 {
1895 if (ffs_initcount++ > 0)
1896 return;
1897
1898 ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
1899 "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
1900 ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
1901 "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
1902 ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
1903 "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
1904 ufs_init();
1905 }
1906
1907 void
1908 ffs_reinit(void)
1909 {
1910
1911 ufs_reinit();
1912 }
1913
1914 void
1915 ffs_done(void)
1916 {
1917 if (--ffs_initcount > 0)
1918 return;
1919
1920 ufs_done();
1921 pool_cache_destroy(ffs_dinode2_cache);
1922 pool_cache_destroy(ffs_dinode1_cache);
1923 pool_cache_destroy(ffs_inode_cache);
1924 }
1925
1926 /*
1927 * Write a superblock and associated information back to disk.
1928 */
1929 int
1930 ffs_sbupdate(struct ufsmount *mp, int waitfor)
1931 {
1932 struct fs *fs = mp->um_fs;
1933 struct buf *bp;
1934 int error = 0;
1935 u_int32_t saveflag;
1936
1937 error = ffs_getblk(mp->um_devvp,
1938 fs->fs_sblockloc >> (fs->fs_fshift - fs->fs_fsbtodb), FFS_NOBLK,
1939 fs->fs_sbsize, false, &bp);
1940 if (error)
1941 return error;
1942 saveflag = fs->fs_flags & FS_INTERNAL;
1943 fs->fs_flags &= ~FS_INTERNAL;
1944
1945 memcpy(bp->b_data, fs, fs->fs_sbsize);
1946
1947 ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1948 #ifdef FFS_EI
1949 if (mp->um_flags & UFS_NEEDSWAP)
1950 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
1951 #endif
1952 fs->fs_flags |= saveflag;
1953
1954 if (waitfor == MNT_WAIT)
1955 error = bwrite(bp);
1956 else
1957 bawrite(bp);
1958 return (error);
1959 }
1960
1961 int
1962 ffs_cgupdate(struct ufsmount *mp, int waitfor)
1963 {
1964 struct fs *fs = mp->um_fs;
1965 struct buf *bp;
1966 int blks;
1967 void *space;
1968 int i, size, error = 0, allerror = 0;
1969
1970 allerror = ffs_sbupdate(mp, waitfor);
1971 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1972 space = fs->fs_csp;
1973 for (i = 0; i < blks; i += fs->fs_frag) {
1974 size = fs->fs_bsize;
1975 if (i + fs->fs_frag > blks)
1976 size = (blks - i) * fs->fs_fsize;
1977 error = ffs_getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1978 FFS_NOBLK, size, false, &bp);
1979 if (error)
1980 break;
1981 #ifdef FFS_EI
1982 if (mp->um_flags & UFS_NEEDSWAP)
1983 ffs_csum_swap((struct csum*)space,
1984 (struct csum*)bp->b_data, size);
1985 else
1986 #endif
1987 memcpy(bp->b_data, space, (u_int)size);
1988 space = (char *)space + size;
1989 if (waitfor == MNT_WAIT)
1990 error = bwrite(bp);
1991 else
1992 bawrite(bp);
1993 }
1994 if (!allerror && error)
1995 allerror = error;
1996 return (allerror);
1997 }
1998
1999 int
2000 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
2001 int attrnamespace, const char *attrname)
2002 {
2003 #ifdef UFS_EXTATTR
2004 /*
2005 * File-backed extended attributes are only supported on UFS1.
2006 * UFS2 has native extended attributes.
2007 */
2008 if (VFSTOUFS(mp)->um_fstype == UFS1)
2009 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
2010 #endif
2011 return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
2012 }
2013
2014 int
2015 ffs_suspendctl(struct mount *mp, int cmd)
2016 {
2017 int error;
2018 struct lwp *l = curlwp;
2019
2020 switch (cmd) {
2021 case SUSPEND_SUSPEND:
2022 if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0)
2023 return error;
2024 error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred);
2025 if (error == 0)
2026 error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
2027 #ifdef WAPBL
2028 if (error == 0 && mp->mnt_wapbl)
2029 error = wapbl_flush(mp->mnt_wapbl, 1);
2030 #endif
2031 if (error != 0) {
2032 (void) fstrans_setstate(mp, FSTRANS_NORMAL);
2033 return error;
2034 }
2035 return 0;
2036
2037 case SUSPEND_RESUME:
2038 return fstrans_setstate(mp, FSTRANS_NORMAL);
2039
2040 default:
2041 return EINVAL;
2042 }
2043 }
2044
2045 /*
2046 * Synch vnode for a mounted file system. This is called for foreign
2047 * vnodes, i.e. non-ffs.
2048 */
2049 static int
2050 ffs_vfs_fsync(vnode_t *vp, int flags)
2051 {
2052 int error, passes, skipmeta, i, pflags;
2053 buf_t *bp, *nbp;
2054 #ifdef WAPBL
2055 struct mount *mp;
2056 #endif
2057
2058 KASSERT(vp->v_type == VBLK);
2059 KASSERT(vp->v_specmountpoint != NULL);
2060
2061 /*
2062 * Flush all dirty data associated with the vnode.
2063 */
2064 pflags = PGO_ALLPAGES | PGO_CLEANIT;
2065 if ((flags & FSYNC_WAIT) != 0)
2066 pflags |= PGO_SYNCIO;
2067 mutex_enter(&vp->v_interlock);
2068 error = VOP_PUTPAGES(vp, 0, 0, pflags);
2069 if (error)
2070 return error;
2071
2072 #ifdef WAPBL
2073 mp = vp->v_specmountpoint;
2074 if (mp && mp->mnt_wapbl) {
2075 /*
2076 * Don't bother writing out metadata if the syncer is
2077 * making the request. We will let the sync vnode
2078 * write it out in a single burst through a call to
2079 * VFS_SYNC().
2080 */
2081 if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0)
2082 return 0;
2083
2084 /*
2085 * Don't flush the log if the vnode being flushed
2086 * contains no dirty buffers that could be in the log.
2087 */
2088 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2089 error = wapbl_flush(mp->mnt_wapbl, 0);
2090 if (error)
2091 return error;
2092 }
2093
2094 if ((flags & FSYNC_WAIT) != 0) {
2095 mutex_enter(&vp->v_interlock);
2096 while (vp->v_numoutput)
2097 cv_wait(&vp->v_cv, &vp->v_interlock);
2098 mutex_exit(&vp->v_interlock);
2099 }
2100
2101 return 0;
2102 }
2103 #endif /* WAPBL */
2104
2105 /*
2106 * Write out metadata for non-logging file systems. XXX This block
2107 * should be simplified now that softdep is gone.
2108 */
2109 passes = NIADDR + 1;
2110 skipmeta = 0;
2111 if (flags & FSYNC_WAIT)
2112 skipmeta = 1;
2113
2114 loop:
2115 mutex_enter(&bufcache_lock);
2116 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
2117 bp->b_cflags &= ~BC_SCANNED;
2118 }
2119 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
2120 nbp = LIST_NEXT(bp, b_vnbufs);
2121 if (bp->b_cflags & (BC_BUSY | BC_SCANNED))
2122 continue;
2123 if ((bp->b_oflags & BO_DELWRI) == 0)
2124 panic("ffs_fsync: not dirty");
2125 if (skipmeta && bp->b_lblkno < 0)
2126 continue;
2127 bp->b_cflags |= BC_BUSY | BC_VFLUSH | BC_SCANNED;
2128 mutex_exit(&bufcache_lock);
2129 /*
2130 * On our final pass through, do all I/O synchronously
2131 * so that we can find out if our flush is failing
2132 * because of write errors.
2133 */
2134 if (passes > 0 || !(flags & FSYNC_WAIT))
2135 (void) bawrite(bp);
2136 else if ((error = bwrite(bp)) != 0)
2137 return (error);
2138 /*
2139 * Since we unlocked during the I/O, we need
2140 * to start from a known point.
2141 */
2142 mutex_enter(&bufcache_lock);
2143 nbp = LIST_FIRST(&vp->v_dirtyblkhd);
2144 }
2145 mutex_exit(&bufcache_lock);
2146 if (skipmeta) {
2147 skipmeta = 0;
2148 goto loop;
2149 }
2150
2151 if ((flags & FSYNC_WAIT) != 0) {
2152 mutex_enter(&vp->v_interlock);
2153 while (vp->v_numoutput) {
2154 cv_wait(&vp->v_cv, &vp->v_interlock);
2155 }
2156 mutex_exit(&vp->v_interlock);
2157
2158 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2159 /*
2160 * Block devices associated with filesystems may
2161 * have new I/O requests posted for them even if
2162 * the vnode is locked, so no amount of trying will
2163 * get them clean. Thus we give block devices a
2164 * good effort, then just give up. For all other file
2165 * types, go around and try again until it is clean.
2166 */
2167 if (passes > 0) {
2168 passes--;
2169 goto loop;
2170 }
2171 #ifdef DIAGNOSTIC
2172 if (vp->v_type != VBLK)
2173 vprint("ffs_fsync: dirty", vp);
2174 #endif
2175 }
2176 }
2177
2178 if (error == 0 && (flags & FSYNC_CACHE) != 0) {
2179 (void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE,
2180 kauth_cred_get());
2181 }
2182
2183 return error;
2184 }
2185