ffs_vfsops.c revision 1.225 1 /* $NetBSD: ffs_vfsops.c,v 1.225 2008/04/30 12:49:17 ad Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1991, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.225 2008/04/30 12:49:17 ad Exp $");
36
37 #if defined(_KERNEL_OPT)
38 #include "opt_ffs.h"
39 #include "opt_quota.h"
40 #include "opt_softdep.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/namei.h>
46 #include <sys/proc.h>
47 #include <sys/kernel.h>
48 #include <sys/vnode.h>
49 #include <sys/socket.h>
50 #include <sys/mount.h>
51 #include <sys/buf.h>
52 #include <sys/device.h>
53 #include <sys/mbuf.h>
54 #include <sys/file.h>
55 #include <sys/disklabel.h>
56 #include <sys/ioctl.h>
57 #include <sys/errno.h>
58 #include <sys/malloc.h>
59 #include <sys/pool.h>
60 #include <sys/lock.h>
61 #include <sys/sysctl.h>
62 #include <sys/conf.h>
63 #include <sys/kauth.h>
64 #include <sys/fstrans.h>
65
66 #include <miscfs/genfs/genfs.h>
67 #include <miscfs/specfs/specdev.h>
68
69 #include <ufs/ufs/quota.h>
70 #include <ufs/ufs/ufsmount.h>
71 #include <ufs/ufs/inode.h>
72 #include <ufs/ufs/dir.h>
73 #include <ufs/ufs/ufs_extern.h>
74 #include <ufs/ufs/ufs_bswap.h>
75
76 #include <ufs/ffs/fs.h>
77 #include <ufs/ffs/ffs_extern.h>
78
79 /* how many times ffs_init() was called */
80 int ffs_initcount = 0;
81
82 extern kmutex_t ufs_hashlock;
83
84 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
85 extern const struct vnodeopv_desc ffs_specop_opv_desc;
86 extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
87
88 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
89 &ffs_vnodeop_opv_desc,
90 &ffs_specop_opv_desc,
91 &ffs_fifoop_opv_desc,
92 NULL,
93 };
94
95 struct vfsops ffs_vfsops = {
96 MOUNT_FFS,
97 sizeof (struct ufs_args),
98 ffs_mount,
99 ufs_start,
100 ffs_unmount,
101 ufs_root,
102 ufs_quotactl,
103 ffs_statvfs,
104 ffs_sync,
105 ffs_vget,
106 ffs_fhtovp,
107 ffs_vptofh,
108 ffs_init,
109 ffs_reinit,
110 ffs_done,
111 ffs_mountroot,
112 ffs_snapshot,
113 ffs_extattrctl,
114 ffs_suspendctl,
115 genfs_renamelock_enter,
116 genfs_renamelock_exit,
117 ffs_full_fsync,
118 ffs_vnodeopv_descs,
119 0,
120 { NULL, NULL },
121 };
122 VFS_ATTACH(ffs_vfsops);
123
124 static const struct genfs_ops ffs_genfsops = {
125 .gop_size = ffs_gop_size,
126 .gop_alloc = ufs_gop_alloc,
127 .gop_write = genfs_gop_write,
128 .gop_markupdate = ufs_gop_markupdate,
129 };
130
131 static const struct ufs_ops ffs_ufsops = {
132 .uo_itimes = ffs_itimes,
133 .uo_update = ffs_update,
134 .uo_truncate = ffs_truncate,
135 .uo_valloc = ffs_valloc,
136 .uo_vfree = ffs_vfree,
137 .uo_balloc = ffs_balloc,
138 };
139
140 pool_cache_t ffs_inode_cache;
141 pool_cache_t ffs_dinode1_cache;
142 pool_cache_t ffs_dinode2_cache;
143
144 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
145 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
146
147 /*
148 * Called by main() when ffs is going to be mounted as root.
149 */
150
151 int
152 ffs_mountroot(void)
153 {
154 struct fs *fs;
155 struct mount *mp;
156 struct lwp *l = curlwp; /* XXX */
157 struct ufsmount *ump;
158 int error;
159
160 if (device_class(root_device) != DV_DISK)
161 return (ENODEV);
162
163 if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
164 vrele(rootvp);
165 return (error);
166 }
167 if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
168 vfs_unbusy(mp, false, NULL);
169 vfs_destroy(mp, false);
170 return (error);
171 }
172 mutex_enter(&mountlist_lock);
173 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
174 mp->mnt_iflag |= IMNT_ONLIST;
175 mutex_exit(&mountlist_lock);
176 ump = VFSTOUFS(mp);
177 fs = ump->um_fs;
178 memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
179 (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
180 (void)ffs_statvfs(mp, &mp->mnt_stat);
181 vfs_unbusy(mp, false, NULL);
182 setrootfstime((time_t)fs->fs_time);
183 return (0);
184 }
185
186 /*
187 * VFS Operations.
188 *
189 * mount system call
190 */
191 int
192 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
193 {
194 struct lwp *l = curlwp;
195 struct nameidata nd;
196 struct vnode *vp, *devvp = NULL;
197 struct ufs_args *args = data;
198 struct ufsmount *ump = NULL;
199 struct fs *fs;
200 int error = 0, flags, update;
201 mode_t accessmode;
202
203 if (*data_len < sizeof *args)
204 return EINVAL;
205
206 if (mp->mnt_flag & MNT_GETARGS) {
207 ump = VFSTOUFS(mp);
208 if (ump == NULL)
209 return EIO;
210 args->fspec = NULL;
211 *data_len = sizeof *args;
212 return 0;
213 }
214
215 #if !defined(SOFTDEP)
216 mp->mnt_flag &= ~MNT_SOFTDEP;
217 #endif
218
219 update = mp->mnt_flag & MNT_UPDATE;
220
221 /* Check arguments */
222 if (args->fspec != NULL) {
223 /*
224 * Look up the name and verify that it's sane.
225 */
226 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, args->fspec);
227 if ((error = namei(&nd)) != 0)
228 return (error);
229 devvp = nd.ni_vp;
230
231 if (!update) {
232 /*
233 * Be sure this is a valid block device
234 */
235 if (devvp->v_type != VBLK)
236 error = ENOTBLK;
237 else if (bdevsw_lookup(devvp->v_rdev) == NULL)
238 error = ENXIO;
239 } else {
240 /*
241 * Be sure we're still naming the same device
242 * used for our initial mount
243 */
244 ump = VFSTOUFS(mp);
245 if (devvp != ump->um_devvp) {
246 if (devvp->v_rdev != ump->um_devvp->v_rdev)
247 error = EINVAL;
248 else {
249 vrele(devvp);
250 devvp = ump->um_devvp;
251 vref(devvp);
252 }
253 }
254 }
255 } else {
256 if (!update) {
257 /* New mounts must have a filename for the device */
258 return (EINVAL);
259 } else {
260 /* Use the extant mount */
261 ump = VFSTOUFS(mp);
262 devvp = ump->um_devvp;
263 vref(devvp);
264 }
265 }
266
267 /*
268 * Mark the device and any existing vnodes as involved in
269 * softdep processing.
270 */
271 if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
272 devvp->v_uflag |= VU_SOFTDEP;
273 mutex_enter(&mntvnode_lock);
274 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
275 if (vp->v_mount != mp || vismarker(vp))
276 continue;
277 vp->v_uflag |= VU_SOFTDEP;
278 }
279 mutex_exit(&mntvnode_lock);
280 }
281
282 /*
283 * If mount by non-root, then verify that user has necessary
284 * permissions on the device.
285 */
286 if (error == 0 && kauth_authorize_generic(l->l_cred,
287 KAUTH_GENERIC_ISSUSER, NULL) != 0) {
288 accessmode = VREAD;
289 if (update ?
290 (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
291 (mp->mnt_flag & MNT_RDONLY) == 0)
292 accessmode |= VWRITE;
293 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
294 error = VOP_ACCESS(devvp, accessmode, l->l_cred);
295 VOP_UNLOCK(devvp, 0);
296 }
297
298 if (error) {
299 vrele(devvp);
300 return (error);
301 }
302
303 if (!update) {
304 int xflags;
305
306 if (mp->mnt_flag & MNT_RDONLY)
307 xflags = FREAD;
308 else
309 xflags = FREAD|FWRITE;
310 error = VOP_OPEN(devvp, xflags, FSCRED);
311 if (error)
312 goto fail;
313 error = ffs_mountfs(devvp, mp, l);
314 if (error) {
315 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
316 (void)VOP_CLOSE(devvp, xflags, NOCRED);
317 VOP_UNLOCK(devvp, 0);
318 goto fail;
319 }
320
321 ump = VFSTOUFS(mp);
322 fs = ump->um_fs;
323 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
324 (MNT_SOFTDEP | MNT_ASYNC)) {
325 printf("%s fs uses soft updates, "
326 "ignoring async mode\n",
327 fs->fs_fsmnt);
328 mp->mnt_flag &= ~MNT_ASYNC;
329 }
330 } else {
331 /*
332 * Update the mount.
333 */
334
335 /*
336 * The initial mount got a reference on this
337 * device, so drop the one obtained via
338 * namei(), above.
339 */
340 vrele(devvp);
341
342 ump = VFSTOUFS(mp);
343 fs = ump->um_fs;
344 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
345 /*
346 * Changing from r/w to r/o
347 */
348 flags = WRITECLOSE;
349 if (mp->mnt_flag & MNT_FORCE)
350 flags |= FORCECLOSE;
351 if (mp->mnt_flag & MNT_SOFTDEP)
352 error = softdep_flushfiles(mp, flags, l);
353 else
354 error = ffs_flushfiles(mp, flags, l);
355 if (fs->fs_pendingblocks != 0 ||
356 fs->fs_pendinginodes != 0) {
357 printf("%s: update error: blocks %" PRId64
358 " files %d\n",
359 fs->fs_fsmnt, fs->fs_pendingblocks,
360 fs->fs_pendinginodes);
361 fs->fs_pendingblocks = 0;
362 fs->fs_pendinginodes = 0;
363 }
364 if (error == 0 &&
365 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
366 fs->fs_clean & FS_WASCLEAN) {
367 if (mp->mnt_flag & MNT_SOFTDEP)
368 fs->fs_flags &= ~FS_DOSOFTDEP;
369 fs->fs_clean = FS_ISCLEAN;
370 (void) ffs_sbupdate(ump, MNT_WAIT);
371 }
372 if (error)
373 return (error);
374 fs->fs_ronly = 1;
375 fs->fs_fmod = 0;
376 }
377
378 /*
379 * Flush soft dependencies if disabling it via an update
380 * mount. This may leave some items to be processed,
381 * so don't do this yet XXX.
382 */
383 if ((fs->fs_flags & FS_DOSOFTDEP) &&
384 !(mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) {
385 #ifdef notyet
386 flags = WRITECLOSE;
387 if (mp->mnt_flag & MNT_FORCE)
388 flags |= FORCECLOSE;
389 error = softdep_flushfiles(mp, flags, l);
390 if (error == 0 && ffs_cgupdate(ump, MNT_WAIT) == 0)
391 fs->fs_flags &= ~FS_DOSOFTDEP;
392 (void) ffs_sbupdate(ump, MNT_WAIT);
393 #elif defined(SOFTDEP)
394 mp->mnt_flag |= MNT_SOFTDEP;
395 #endif
396 }
397
398 /*
399 * When upgrading to a softdep mount, we must first flush
400 * all vnodes. (not done yet -- see above)
401 */
402 if (!(fs->fs_flags & FS_DOSOFTDEP) &&
403 (mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) {
404 #ifdef notyet
405 flags = WRITECLOSE;
406 if (mp->mnt_flag & MNT_FORCE)
407 flags |= FORCECLOSE;
408 error = ffs_flushfiles(mp, flags, l);
409 #else
410 mp->mnt_flag &= ~MNT_SOFTDEP;
411 #endif
412 }
413
414 if (mp->mnt_flag & MNT_RELOAD) {
415 error = ffs_reload(mp, l->l_cred, l);
416 if (error)
417 return (error);
418 }
419
420 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
421 /*
422 * Changing from read-only to read/write
423 */
424 fs->fs_ronly = 0;
425 fs->fs_clean <<= 1;
426 fs->fs_fmod = 1;
427 if ((fs->fs_flags & FS_DOSOFTDEP)) {
428 error = softdep_mount(devvp, mp, fs,
429 l->l_cred);
430 if (error)
431 return (error);
432 }
433 if (fs->fs_snapinum[0] != 0)
434 ffs_snapshot_mount(mp);
435 }
436 if (args->fspec == NULL)
437 return EINVAL;
438 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
439 (MNT_SOFTDEP | MNT_ASYNC)) {
440 printf("%s fs uses soft updates, ignoring async mode\n",
441 fs->fs_fsmnt);
442 mp->mnt_flag &= ~MNT_ASYNC;
443 }
444 }
445
446 error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
447 UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
448 if (error == 0)
449 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
450 sizeof(fs->fs_fsmnt));
451 if (mp->mnt_flag & MNT_SOFTDEP)
452 fs->fs_flags |= FS_DOSOFTDEP;
453 else
454 fs->fs_flags &= ~FS_DOSOFTDEP;
455 if (fs->fs_fmod != 0) { /* XXX */
456 fs->fs_fmod = 0;
457 if (fs->fs_clean & FS_WASCLEAN)
458 fs->fs_time = time_second;
459 else {
460 printf("%s: file system not clean (fs_clean=%x); please fsck(8)\n",
461 mp->mnt_stat.f_mntfromname, fs->fs_clean);
462 printf("%s: lost blocks %" PRId64 " files %d\n",
463 mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
464 fs->fs_pendinginodes);
465 }
466 (void) ffs_cgupdate(ump, MNT_WAIT);
467 }
468 return (error);
469
470 fail:
471 vrele(devvp);
472 return (error);
473 }
474
475 /*
476 * Reload all incore data for a filesystem (used after running fsck on
477 * the root filesystem and finding things to fix). The filesystem must
478 * be mounted read-only.
479 *
480 * Things to do to update the mount:
481 * 1) invalidate all cached meta-data.
482 * 2) re-read superblock from disk.
483 * 3) re-read summary information from disk.
484 * 4) invalidate all inactive vnodes.
485 * 5) invalidate all cached file data.
486 * 6) re-read inode data for all active vnodes.
487 */
488 int
489 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
490 {
491 struct vnode *vp, *mvp, *devvp;
492 struct inode *ip;
493 void *space;
494 struct buf *bp;
495 struct fs *fs, *newfs;
496 struct partinfo dpart;
497 int i, blks, size, error;
498 int32_t *lp;
499 struct ufsmount *ump;
500 daddr_t sblockloc;
501
502 if ((mp->mnt_flag & MNT_RDONLY) == 0)
503 return (EINVAL);
504
505 ump = VFSTOUFS(mp);
506 /*
507 * Step 1: invalidate all cached meta-data.
508 */
509 devvp = ump->um_devvp;
510 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
511 error = vinvalbuf(devvp, 0, cred, l, 0, 0);
512 VOP_UNLOCK(devvp, 0);
513 if (error)
514 panic("ffs_reload: dirty1");
515 /*
516 * Step 2: re-read superblock from disk.
517 */
518 fs = ump->um_fs;
519 if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, NOCRED) != 0)
520 size = DEV_BSIZE;
521 else
522 size = dpart.disklab->d_secsize;
523 /* XXX we don't handle possibility that superblock moved. */
524 error = bread(devvp, fs->fs_sblockloc / size, fs->fs_sbsize,
525 NOCRED, &bp);
526 if (error) {
527 brelse(bp, 0);
528 return (error);
529 }
530 newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK);
531 memcpy(newfs, bp->b_data, fs->fs_sbsize);
532 #ifdef FFS_EI
533 if (ump->um_flags & UFS_NEEDSWAP) {
534 ffs_sb_swap((struct fs*)bp->b_data, newfs);
535 fs->fs_flags |= FS_SWAPPED;
536 } else
537 #endif
538 fs->fs_flags &= ~FS_SWAPPED;
539 if ((newfs->fs_magic != FS_UFS1_MAGIC &&
540 newfs->fs_magic != FS_UFS2_MAGIC)||
541 newfs->fs_bsize > MAXBSIZE ||
542 newfs->fs_bsize < sizeof(struct fs)) {
543 brelse(bp, 0);
544 free(newfs, M_UFSMNT);
545 return (EIO); /* XXX needs translation */
546 }
547 /* Store off old fs_sblockloc for fs_oldfscompat_read. */
548 sblockloc = fs->fs_sblockloc;
549 /*
550 * Copy pointer fields back into superblock before copying in XXX
551 * new superblock. These should really be in the ufsmount. XXX
552 * Note that important parameters (eg fs_ncg) are unchanged.
553 */
554 newfs->fs_csp = fs->fs_csp;
555 newfs->fs_maxcluster = fs->fs_maxcluster;
556 newfs->fs_contigdirs = fs->fs_contigdirs;
557 newfs->fs_ronly = fs->fs_ronly;
558 newfs->fs_active = fs->fs_active;
559 memcpy(fs, newfs, (u_int)fs->fs_sbsize);
560 brelse(bp, 0);
561 free(newfs, M_UFSMNT);
562
563 /* Recheck for apple UFS filesystem */
564 ump->um_flags &= ~UFS_ISAPPLEUFS;
565 /* First check to see if this is tagged as an Apple UFS filesystem
566 * in the disklabel
567 */
568 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
569 (dpart.part->p_fstype == FS_APPLEUFS)) {
570 ump->um_flags |= UFS_ISAPPLEUFS;
571 }
572 #ifdef APPLE_UFS
573 else {
574 /* Manually look for an apple ufs label, and if a valid one
575 * is found, then treat it like an Apple UFS filesystem anyway
576 */
577 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size),
578 APPLEUFS_LABEL_SIZE, cred, &bp);
579 if (error) {
580 brelse(bp, 0);
581 return (error);
582 }
583 error = ffs_appleufs_validate(fs->fs_fsmnt,
584 (struct appleufslabel *)bp->b_data,NULL);
585 if (error == 0)
586 ump->um_flags |= UFS_ISAPPLEUFS;
587 brelse(bp, 0);
588 bp = NULL;
589 }
590 #else
591 if (ump->um_flags & UFS_ISAPPLEUFS)
592 return (EIO);
593 #endif
594
595 if (UFS_MPISAPPLEUFS(ump)) {
596 /* see comment about NeXT below */
597 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
598 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
599 mp->mnt_iflag |= IMNT_DTYPE;
600 } else {
601 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
602 ump->um_dirblksiz = DIRBLKSIZ;
603 if (ump->um_maxsymlinklen > 0)
604 mp->mnt_iflag |= IMNT_DTYPE;
605 else
606 mp->mnt_iflag &= ~IMNT_DTYPE;
607 }
608 ffs_oldfscompat_read(fs, ump, sblockloc);
609 mutex_enter(&ump->um_lock);
610 ump->um_maxfilesize = fs->fs_maxfilesize;
611 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
612 fs->fs_pendingblocks = 0;
613 fs->fs_pendinginodes = 0;
614 }
615 mutex_exit(&ump->um_lock);
616
617 ffs_statvfs(mp, &mp->mnt_stat);
618 /*
619 * Step 3: re-read summary information from disk.
620 */
621 blks = howmany(fs->fs_cssize, fs->fs_fsize);
622 space = fs->fs_csp;
623 for (i = 0; i < blks; i += fs->fs_frag) {
624 size = fs->fs_bsize;
625 if (i + fs->fs_frag > blks)
626 size = (blks - i) * fs->fs_fsize;
627 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
628 NOCRED, &bp);
629 if (error) {
630 brelse(bp, 0);
631 return (error);
632 }
633 #ifdef FFS_EI
634 if (UFS_FSNEEDSWAP(fs))
635 ffs_csum_swap((struct csum *)bp->b_data,
636 (struct csum *)space, size);
637 else
638 #endif
639 memcpy(space, bp->b_data, (size_t)size);
640 space = (char *)space + size;
641 brelse(bp, 0);
642 }
643 if ((fs->fs_flags & FS_DOSOFTDEP))
644 softdep_mount(devvp, mp, fs, cred);
645 if (fs->fs_snapinum[0] != 0)
646 ffs_snapshot_mount(mp);
647 /*
648 * We no longer know anything about clusters per cylinder group.
649 */
650 if (fs->fs_contigsumsize > 0) {
651 lp = fs->fs_maxcluster;
652 for (i = 0; i < fs->fs_ncg; i++)
653 *lp++ = fs->fs_contigsumsize;
654 }
655
656 /* Allocate a marker vnode. */
657 if ((mvp = vnalloc(mp)) == NULL)
658 return ENOMEM;
659 /*
660 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
661 * and vclean() can be called indirectly
662 */
663 mutex_enter(&mntvnode_lock);
664 loop:
665 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
666 vmark(mvp, vp);
667 if (vp->v_mount != mp || vismarker(vp))
668 continue;
669 /*
670 * Step 4: invalidate all inactive vnodes.
671 */
672 if (vrecycle(vp, &mntvnode_lock, l)) {
673 mutex_enter(&mntvnode_lock);
674 (void)vunmark(mvp);
675 goto loop;
676 }
677 /*
678 * Step 5: invalidate all cached file data.
679 */
680 mutex_enter(&vp->v_interlock);
681 mutex_exit(&mntvnode_lock);
682 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
683 (void)vunmark(mvp);
684 goto loop;
685 }
686 if (vinvalbuf(vp, 0, cred, l, 0, 0))
687 panic("ffs_reload: dirty2");
688 /*
689 * Step 6: re-read inode data for all active vnodes.
690 */
691 ip = VTOI(vp);
692 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
693 (int)fs->fs_bsize, NOCRED, &bp);
694 if (error) {
695 brelse(bp, 0);
696 vput(vp);
697 (void)vunmark(mvp);
698 break;
699 }
700 ffs_load_inode(bp, ip, fs, ip->i_number);
701 ip->i_ffs_effnlink = ip->i_nlink;
702 brelse(bp, 0);
703 vput(vp);
704 mutex_enter(&mntvnode_lock);
705 }
706 mutex_exit(&mntvnode_lock);
707 vnfree(mvp);
708 return (error);
709 }
710
711 /*
712 * Possible superblock locations ordered from most to least likely.
713 */
714 static const int sblock_try[] = SBLOCKSEARCH;
715
716 /*
717 * Common code for mount and mountroot
718 */
719 int
720 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
721 {
722 struct ufsmount *ump;
723 struct buf *bp;
724 struct fs *fs;
725 dev_t dev;
726 struct partinfo dpart;
727 void *space;
728 daddr_t sblockloc, fsblockloc;
729 int blks, fstype;
730 int error, i, size, ronly, bset = 0;
731 #ifdef FFS_EI
732 int needswap = 0; /* keep gcc happy */
733 #endif
734 int32_t *lp;
735 kauth_cred_t cred;
736 u_int32_t sbsize = 8192; /* keep gcc happy*/
737
738 dev = devvp->v_rdev;
739 cred = l ? l->l_cred : NOCRED;
740
741 /* Flush out any old buffers remaining from a previous use. */
742 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
743 error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
744 VOP_UNLOCK(devvp, 0);
745 if (error)
746 return (error);
747
748 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
749 if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) != 0)
750 size = DEV_BSIZE;
751 else
752 size = dpart.disklab->d_secsize;
753
754 bp = NULL;
755 ump = NULL;
756 fs = NULL;
757 sblockloc = 0;
758 fstype = 0;
759
760 error = fstrans_mount(mp);
761 if (error)
762 return error;
763
764 /*
765 * Try reading the superblock in each of its possible locations.
766 */
767 for (i = 0; ; i++) {
768 if (bp != NULL) {
769 brelse(bp, BC_NOCACHE);
770 bp = NULL;
771 }
772 if (sblock_try[i] == -1) {
773 error = EINVAL;
774 fs = NULL;
775 goto out;
776 }
777 error = bread(devvp, sblock_try[i] / size, SBLOCKSIZE, cred,
778 &bp);
779 if (error) {
780 fs = NULL;
781 goto out;
782 }
783 fs = (struct fs*)bp->b_data;
784 fsblockloc = sblockloc = sblock_try[i];
785 if (fs->fs_magic == FS_UFS1_MAGIC) {
786 sbsize = fs->fs_sbsize;
787 fstype = UFS1;
788 #ifdef FFS_EI
789 needswap = 0;
790 } else if (fs->fs_magic == bswap32(FS_UFS1_MAGIC)) {
791 sbsize = bswap32(fs->fs_sbsize);
792 fstype = UFS1;
793 needswap = 1;
794 #endif
795 } else if (fs->fs_magic == FS_UFS2_MAGIC) {
796 sbsize = fs->fs_sbsize;
797 fstype = UFS2;
798 #ifdef FFS_EI
799 needswap = 0;
800 } else if (fs->fs_magic == bswap32(FS_UFS2_MAGIC)) {
801 sbsize = bswap32(fs->fs_sbsize);
802 fstype = UFS2;
803 needswap = 1;
804 #endif
805 } else
806 continue;
807
808
809 /* fs->fs_sblockloc isn't defined for old filesystems */
810 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
811 if (sblockloc == SBLOCK_UFS2)
812 /*
813 * This is likely to be the first alternate
814 * in a filesystem with 64k blocks.
815 * Don't use it.
816 */
817 continue;
818 fsblockloc = sblockloc;
819 } else {
820 fsblockloc = fs->fs_sblockloc;
821 #ifdef FFS_EI
822 if (needswap)
823 fsblockloc = bswap64(fsblockloc);
824 #endif
825 }
826
827 /* Check we haven't found an alternate superblock */
828 if (fsblockloc != sblockloc)
829 continue;
830
831 /* Validate size of superblock */
832 if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs))
833 continue;
834
835 /* Ok seems to be a good superblock */
836 break;
837 }
838
839 fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
840 memcpy(fs, bp->b_data, sbsize);
841
842 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
843 memset(ump, 0, sizeof *ump);
844 mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
845 error = ffs_snapshot_init(ump);
846 if (error)
847 goto out;
848 ump->um_fs = fs;
849 ump->um_ops = &ffs_ufsops;
850
851 #ifdef FFS_EI
852 if (needswap) {
853 ffs_sb_swap((struct fs*)bp->b_data, fs);
854 fs->fs_flags |= FS_SWAPPED;
855 } else
856 #endif
857 fs->fs_flags &= ~FS_SWAPPED;
858
859 ffs_oldfscompat_read(fs, ump, sblockloc);
860 ump->um_maxfilesize = fs->fs_maxfilesize;
861
862 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
863 fs->fs_pendingblocks = 0;
864 fs->fs_pendinginodes = 0;
865 }
866
867 ump->um_fstype = fstype;
868 if (fs->fs_sbsize < SBLOCKSIZE)
869 brelse(bp, BC_INVAL);
870 else
871 brelse(bp, 0);
872 bp = NULL;
873
874 /* First check to see if this is tagged as an Apple UFS filesystem
875 * in the disklabel
876 */
877 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
878 (dpart.part->p_fstype == FS_APPLEUFS)) {
879 ump->um_flags |= UFS_ISAPPLEUFS;
880 }
881 #ifdef APPLE_UFS
882 else {
883 /* Manually look for an apple ufs label, and if a valid one
884 * is found, then treat it like an Apple UFS filesystem anyway
885 */
886 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size),
887 APPLEUFS_LABEL_SIZE, cred, &bp);
888 if (error)
889 goto out;
890 error = ffs_appleufs_validate(fs->fs_fsmnt,
891 (struct appleufslabel *)bp->b_data,NULL);
892 if (error == 0) {
893 ump->um_flags |= UFS_ISAPPLEUFS;
894 }
895 brelse(bp, 0);
896 bp = NULL;
897 }
898 #else
899 if (ump->um_flags & UFS_ISAPPLEUFS) {
900 error = EINVAL;
901 goto out;
902 }
903 #endif
904
905 /*
906 * verify that we can access the last block in the fs
907 * if we're mounting read/write.
908 */
909
910 if (!ronly) {
911 error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), fs->fs_fsize,
912 cred, &bp);
913 if (bp->b_bcount != fs->fs_fsize)
914 error = EINVAL;
915 if (error) {
916 bset = BC_INVAL;
917 goto out;
918 }
919 brelse(bp, BC_INVAL);
920 bp = NULL;
921 }
922
923 fs->fs_ronly = ronly;
924 if (ronly == 0) {
925 fs->fs_clean <<= 1;
926 fs->fs_fmod = 1;
927 }
928 size = fs->fs_cssize;
929 blks = howmany(size, fs->fs_fsize);
930 if (fs->fs_contigsumsize > 0)
931 size += fs->fs_ncg * sizeof(int32_t);
932 size += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
933 space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
934 fs->fs_csp = space;
935 for (i = 0; i < blks; i += fs->fs_frag) {
936 size = fs->fs_bsize;
937 if (i + fs->fs_frag > blks)
938 size = (blks - i) * fs->fs_fsize;
939 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
940 cred, &bp);
941 if (error) {
942 free(fs->fs_csp, M_UFSMNT);
943 goto out;
944 }
945 #ifdef FFS_EI
946 if (needswap)
947 ffs_csum_swap((struct csum *)bp->b_data,
948 (struct csum *)space, size);
949 else
950 #endif
951 memcpy(space, bp->b_data, (u_int)size);
952
953 space = (char *)space + size;
954 brelse(bp, 0);
955 bp = NULL;
956 }
957 if (fs->fs_contigsumsize > 0) {
958 fs->fs_maxcluster = lp = space;
959 for (i = 0; i < fs->fs_ncg; i++)
960 *lp++ = fs->fs_contigsumsize;
961 space = lp;
962 }
963 size = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
964 fs->fs_contigdirs = space;
965 space = (char *)space + size;
966 memset(fs->fs_contigdirs, 0, size);
967 /* Compatibility for old filesystems - XXX */
968 if (fs->fs_avgfilesize <= 0)
969 fs->fs_avgfilesize = AVFILESIZ;
970 if (fs->fs_avgfpdir <= 0)
971 fs->fs_avgfpdir = AFPDIR;
972 fs->fs_active = NULL;
973 mp->mnt_data = ump;
974 mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
975 mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
976 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
977 mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
978 if (UFS_MPISAPPLEUFS(ump)) {
979 /* NeXT used to keep short symlinks in the inode even
980 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen
981 * is probably -1, but we still need to be able to identify
982 * short symlinks.
983 */
984 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
985 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
986 mp->mnt_iflag |= IMNT_DTYPE;
987 } else {
988 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
989 ump->um_dirblksiz = DIRBLKSIZ;
990 if (ump->um_maxsymlinklen > 0)
991 mp->mnt_iflag |= IMNT_DTYPE;
992 else
993 mp->mnt_iflag &= ~IMNT_DTYPE;
994 }
995 mp->mnt_fs_bshift = fs->fs_bshift;
996 mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */
997 mp->mnt_flag |= MNT_LOCAL;
998 mp->mnt_iflag |= IMNT_MPSAFE;
999 #ifdef FFS_EI
1000 if (needswap)
1001 ump->um_flags |= UFS_NEEDSWAP;
1002 #endif
1003 ump->um_mountp = mp;
1004 ump->um_dev = dev;
1005 ump->um_devvp = devvp;
1006 ump->um_nindir = fs->fs_nindir;
1007 ump->um_lognindir = ffs(fs->fs_nindir) - 1;
1008 ump->um_bptrtodb = fs->fs_fsbtodb;
1009 ump->um_seqinc = fs->fs_frag;
1010 for (i = 0; i < MAXQUOTAS; i++)
1011 ump->um_quotas[i] = NULLVP;
1012 devvp->v_specmountpoint = mp;
1013 if (ronly == 0 && (fs->fs_flags & FS_DOSOFTDEP)) {
1014 error = softdep_mount(devvp, mp, fs, cred);
1015 if (error) {
1016 free(fs->fs_csp, M_UFSMNT);
1017 goto out;
1018 }
1019 }
1020 if (ronly == 0 && fs->fs_snapinum[0] != 0)
1021 ffs_snapshot_mount(mp);
1022 #ifdef UFS_EXTATTR
1023 /*
1024 * Initialize file-backed extended attributes on UFS1 file
1025 * systems.
1026 */
1027 if (ump->um_fstype == UFS1) {
1028 ufs_extattr_uepm_init(&ump->um_extattr);
1029 #ifdef UFS_EXTATTR_AUTOSTART
1030 /*
1031 * XXX Just ignore errors. Not clear that we should
1032 * XXX fail the mount in this case.
1033 */
1034 (void) ufs_extattr_autostart(mp, l);
1035 #endif
1036 }
1037 #endif /* UFS_EXTATTR */
1038 return (0);
1039 out:
1040 fstrans_unmount(mp);
1041 if (fs)
1042 free(fs, M_UFSMNT);
1043 devvp->v_specmountpoint = NULL;
1044 if (bp)
1045 brelse(bp, bset);
1046 if (ump) {
1047 if (ump->um_oldfscompat)
1048 free(ump->um_oldfscompat, M_UFSMNT);
1049 mutex_destroy(&ump->um_lock);
1050 free(ump, M_UFSMNT);
1051 mp->mnt_data = NULL;
1052 }
1053 return (error);
1054 }
1055
1056 /*
1057 * Sanity checks for loading old filesystem superblocks.
1058 * See ffs_oldfscompat_write below for unwound actions.
1059 *
1060 * XXX - Parts get retired eventually.
1061 * Unfortunately new bits get added.
1062 */
1063 static void
1064 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1065 {
1066 off_t maxfilesize;
1067 int32_t *extrasave;
1068
1069 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1070 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1071 return;
1072
1073 if (!ump->um_oldfscompat)
1074 ump->um_oldfscompat = malloc(512 + 3*sizeof(int32_t),
1075 M_UFSMNT, M_WAITOK);
1076
1077 memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1078 extrasave = ump->um_oldfscompat;
1079 extrasave += 512/sizeof(int32_t);
1080 extrasave[0] = fs->fs_old_npsect;
1081 extrasave[1] = fs->fs_old_interleave;
1082 extrasave[2] = fs->fs_old_trackskew;
1083
1084 /* These fields will be overwritten by their
1085 * original values in fs_oldfscompat_write, so it is harmless
1086 * to modify them here.
1087 */
1088 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1089 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1090 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1091 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1092
1093 fs->fs_maxbsize = fs->fs_bsize;
1094 fs->fs_time = fs->fs_old_time;
1095 fs->fs_size = fs->fs_old_size;
1096 fs->fs_dsize = fs->fs_old_dsize;
1097 fs->fs_csaddr = fs->fs_old_csaddr;
1098 fs->fs_sblockloc = sblockloc;
1099
1100 fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1101
1102 if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1103 fs->fs_old_nrpos = 8;
1104 fs->fs_old_npsect = fs->fs_old_nsect;
1105 fs->fs_old_interleave = 1;
1106 fs->fs_old_trackskew = 0;
1107 }
1108
1109 if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1110 fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
1111 fs->fs_qbmask = ~fs->fs_bmask;
1112 fs->fs_qfmask = ~fs->fs_fmask;
1113 }
1114
1115 maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1116 if (fs->fs_maxfilesize > maxfilesize)
1117 fs->fs_maxfilesize = maxfilesize;
1118
1119 /* Compatibility for old filesystems */
1120 if (fs->fs_avgfilesize <= 0)
1121 fs->fs_avgfilesize = AVFILESIZ;
1122 if (fs->fs_avgfpdir <= 0)
1123 fs->fs_avgfpdir = AFPDIR;
1124
1125 #if 0
1126 if (bigcgs) {
1127 fs->fs_save_cgsize = fs->fs_cgsize;
1128 fs->fs_cgsize = fs->fs_bsize;
1129 }
1130 #endif
1131 }
1132
1133 /*
1134 * Unwinding superblock updates for old filesystems.
1135 * See ffs_oldfscompat_read above for details.
1136 *
1137 * XXX - Parts get retired eventually.
1138 * Unfortunately new bits get added.
1139 */
1140 static void
1141 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1142 {
1143 int32_t *extrasave;
1144
1145 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1146 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1147 return;
1148
1149 fs->fs_old_time = fs->fs_time;
1150 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1151 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1152 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1153 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1154 fs->fs_old_flags = fs->fs_flags;
1155
1156 #if 0
1157 if (bigcgs) {
1158 fs->fs_cgsize = fs->fs_save_cgsize;
1159 }
1160 #endif
1161
1162 memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1163 extrasave = ump->um_oldfscompat;
1164 extrasave += 512/sizeof(int32_t);
1165 fs->fs_old_npsect = extrasave[0];
1166 fs->fs_old_interleave = extrasave[1];
1167 fs->fs_old_trackskew = extrasave[2];
1168
1169 }
1170
1171 /*
1172 * unmount system call
1173 */
1174 int
1175 ffs_unmount(struct mount *mp, int mntflags)
1176 {
1177 struct lwp *l = curlwp;
1178 struct ufsmount *ump = VFSTOUFS(mp);
1179 struct fs *fs = ump->um_fs;
1180 int error, flags, penderr;
1181
1182 penderr = 0;
1183 flags = 0;
1184 if (mntflags & MNT_FORCE)
1185 flags |= FORCECLOSE;
1186 #ifdef UFS_EXTATTR
1187 if (ump->um_fstype == UFS1) {
1188 ufs_extattr_stop(mp, l);
1189 ufs_extattr_uepm_destroy(&ump->um_extattr);
1190 }
1191 #endif /* UFS_EXTATTR */
1192 if (mp->mnt_flag & MNT_SOFTDEP) {
1193 if ((error = softdep_flushfiles(mp, flags, l)) != 0)
1194 return (error);
1195 } else {
1196 if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1197 return (error);
1198 }
1199 mutex_enter(&ump->um_lock);
1200 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1201 printf("%s: unmount pending error: blocks %" PRId64
1202 " files %d\n",
1203 fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes);
1204 fs->fs_pendingblocks = 0;
1205 fs->fs_pendinginodes = 0;
1206 penderr = 1;
1207 }
1208 mutex_exit(&ump->um_lock);
1209 if (fs->fs_ronly == 0 &&
1210 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1211 fs->fs_clean & FS_WASCLEAN) {
1212 /*
1213 * XXXX don't mark fs clean in the case of softdep
1214 * pending block errors, until they are fixed.
1215 */
1216 if (penderr == 0) {
1217 if (mp->mnt_flag & MNT_SOFTDEP)
1218 fs->fs_flags &= ~FS_DOSOFTDEP;
1219 fs->fs_clean = FS_ISCLEAN;
1220 }
1221 fs->fs_fmod = 0;
1222 (void) ffs_sbupdate(ump, MNT_WAIT);
1223 }
1224 if (ump->um_devvp->v_type != VBAD)
1225 ump->um_devvp->v_specmountpoint = NULL;
1226 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1227 (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
1228 NOCRED);
1229 vput(ump->um_devvp);
1230 free(fs->fs_csp, M_UFSMNT);
1231 free(fs, M_UFSMNT);
1232 if (ump->um_oldfscompat != NULL)
1233 free(ump->um_oldfscompat, M_UFSMNT);
1234 softdep_unmount(mp);
1235 mutex_destroy(&ump->um_lock);
1236 ffs_snapshot_fini(ump);
1237 free(ump, M_UFSMNT);
1238 mp->mnt_data = NULL;
1239 mp->mnt_flag &= ~MNT_LOCAL;
1240 fstrans_unmount(mp);
1241 return (0);
1242 }
1243
1244 /*
1245 * Flush out all the files in a filesystem.
1246 */
1247 int
1248 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1249 {
1250 extern int doforce;
1251 struct ufsmount *ump;
1252 int error;
1253
1254 if (!doforce)
1255 flags &= ~FORCECLOSE;
1256 ump = VFSTOUFS(mp);
1257 #ifdef QUOTA
1258 if (mp->mnt_flag & MNT_QUOTA) {
1259 int i;
1260 if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0)
1261 return (error);
1262 for (i = 0; i < MAXQUOTAS; i++) {
1263 if (ump->um_quotas[i] == NULLVP)
1264 continue;
1265 quotaoff(l, mp, i);
1266 }
1267 /*
1268 * Here we fall through to vflush again to ensure
1269 * that we have gotten rid of all the system vnodes.
1270 */
1271 }
1272 #endif
1273 if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1274 return (error);
1275 ffs_snapshot_unmount(mp);
1276 /*
1277 * Flush all the files.
1278 */
1279 error = vflush(mp, NULLVP, flags);
1280 if (error)
1281 return (error);
1282 /*
1283 * Flush filesystem metadata.
1284 */
1285 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1286 error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
1287 VOP_UNLOCK(ump->um_devvp, 0);
1288 return (error);
1289 }
1290
1291 /*
1292 * Get file system statistics.
1293 */
1294 int
1295 ffs_statvfs(struct mount *mp, struct statvfs *sbp)
1296 {
1297 struct ufsmount *ump;
1298 struct fs *fs;
1299
1300 ump = VFSTOUFS(mp);
1301 fs = ump->um_fs;
1302 mutex_enter(&ump->um_lock);
1303 sbp->f_bsize = fs->fs_bsize;
1304 sbp->f_frsize = fs->fs_fsize;
1305 sbp->f_iosize = fs->fs_bsize;
1306 sbp->f_blocks = fs->fs_dsize;
1307 sbp->f_bfree = blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1308 fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1309 sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1310 fs->fs_minfree) / (u_int64_t) 100;
1311 if (sbp->f_bfree > sbp->f_bresvd)
1312 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1313 else
1314 sbp->f_bavail = 0;
1315 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
1316 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1317 sbp->f_favail = sbp->f_ffree;
1318 sbp->f_fresvd = 0;
1319 mutex_exit(&ump->um_lock);
1320 copy_statvfs_info(sbp, mp);
1321
1322 return (0);
1323 }
1324
1325 /*
1326 * Go through the disk queues to initiate sandbagged IO;
1327 * go through the inodes to write those that have been modified;
1328 * initiate the writing of the super block if it has been modified.
1329 *
1330 * Note: we are always called with the filesystem marked `MPBUSY'.
1331 */
1332 int
1333 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
1334 {
1335 struct lwp *l = curlwp;
1336 struct vnode *vp, *mvp;
1337 struct inode *ip;
1338 struct ufsmount *ump = VFSTOUFS(mp);
1339 struct fs *fs;
1340 int error, count, allerror = 0;
1341
1342 fs = ump->um_fs;
1343 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
1344 printf("fs = %s\n", fs->fs_fsmnt);
1345 panic("update: rofs mod");
1346 }
1347
1348 /* Allocate a marker vnode. */
1349 if ((mvp = vnalloc(mp)) == NULL)
1350 return (ENOMEM);
1351
1352 fstrans_start(mp, FSTRANS_SHARED);
1353 /*
1354 * Write back each (modified) inode.
1355 */
1356 mutex_enter(&mntvnode_lock);
1357 loop:
1358 /*
1359 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1360 * and vclean() can be called indirectly
1361 */
1362 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
1363 vmark(mvp, vp);
1364 /*
1365 * If the vnode that we are about to sync is no longer
1366 * associated with this mount point, start over.
1367 */
1368 if (vp->v_mount != mp || vismarker(vp))
1369 continue;
1370 mutex_enter(&vp->v_interlock);
1371 ip = VTOI(vp);
1372 if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 ||
1373 vp->v_type == VNON || ((ip->i_flag &
1374 (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
1375 LIST_EMPTY(&vp->v_dirtyblkhd) &&
1376 UVM_OBJ_IS_CLEAN(&vp->v_uobj)))
1377 {
1378 mutex_exit(&vp->v_interlock);
1379 continue;
1380 }
1381 if (vp->v_type == VBLK &&
1382 fstrans_getstate(mp) == FSTRANS_SUSPENDING) {
1383 mutex_exit(&vp->v_interlock);
1384 continue;
1385 }
1386 mutex_exit(&mntvnode_lock);
1387 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
1388 if (error) {
1389 mutex_enter(&mntvnode_lock);
1390 if (error == ENOENT) {
1391 (void)vunmark(mvp);
1392 goto loop;
1393 }
1394 continue;
1395 }
1396 if (vp->v_type == VREG && waitfor == MNT_LAZY)
1397 error = ffs_update(vp, NULL, NULL, 0);
1398 else
1399 error = VOP_FSYNC(vp, cred,
1400 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0);
1401 if (error)
1402 allerror = error;
1403 vput(vp);
1404 mutex_enter(&mntvnode_lock);
1405 }
1406 mutex_exit(&mntvnode_lock);
1407 /*
1408 * Force stale file system control information to be flushed.
1409 */
1410 if (waitfor == MNT_WAIT && (ump->um_mountp->mnt_flag & MNT_SOFTDEP)) {
1411 if ((error = softdep_flushworklist(ump->um_mountp, &count, l)))
1412 allerror = error;
1413 /* Flushed work items may create new vnodes to clean */
1414 if (allerror == 0 && count) {
1415 mutex_enter(&mntvnode_lock);
1416 goto loop;
1417 }
1418 }
1419 if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1420 !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1421 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1422 if ((error = VOP_FSYNC(ump->um_devvp, cred,
1423 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0)
1424 allerror = error;
1425 VOP_UNLOCK(ump->um_devvp, 0);
1426 if (allerror == 0 && waitfor == MNT_WAIT) {
1427 mutex_enter(&mntvnode_lock);
1428 goto loop;
1429 }
1430 }
1431 #ifdef QUOTA
1432 qsync(mp);
1433 #endif
1434 /*
1435 * Write back modified superblock.
1436 */
1437 if (fs->fs_fmod != 0) {
1438 fs->fs_fmod = 0;
1439 fs->fs_time = time_second;
1440 if ((error = ffs_cgupdate(ump, waitfor)))
1441 allerror = error;
1442 }
1443 fstrans_done(mp);
1444 vnfree(mvp);
1445 return (allerror);
1446 }
1447
1448 /*
1449 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1450 * in from disk. If it is in core, wait for the lock bit to clear, then
1451 * return the inode locked. Detection and handling of mount points must be
1452 * done by the calling routine.
1453 */
1454 int
1455 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1456 {
1457 struct fs *fs;
1458 struct inode *ip;
1459 struct ufsmount *ump;
1460 struct buf *bp;
1461 struct vnode *vp;
1462 dev_t dev;
1463 int error;
1464
1465 ump = VFSTOUFS(mp);
1466 dev = ump->um_dev;
1467
1468 retry:
1469 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1470 return (0);
1471
1472 /* Allocate a new vnode/inode. */
1473 if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) {
1474 *vpp = NULL;
1475 return (error);
1476 }
1477 ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
1478
1479 /*
1480 * If someone beat us to it, put back the freshly allocated
1481 * vnode/inode pair and retry.
1482 */
1483 mutex_enter(&ufs_hashlock);
1484 if (ufs_ihashget(dev, ino, 0) != NULL) {
1485 mutex_exit(&ufs_hashlock);
1486 ungetnewvnode(vp);
1487 pool_cache_put(ffs_inode_cache, ip);
1488 goto retry;
1489 }
1490
1491 vp->v_vflag |= VV_LOCKSWORK;
1492 if ((mp->mnt_flag & MNT_SOFTDEP) != 0)
1493 vp->v_uflag |= VU_SOFTDEP;
1494
1495 /*
1496 * XXX MFS ends up here, too, to allocate an inode. Should we
1497 * XXX create another pool for MFS inodes?
1498 */
1499
1500 memset(ip, 0, sizeof(struct inode));
1501 vp->v_data = ip;
1502 ip->i_vnode = vp;
1503 ip->i_ump = ump;
1504 ip->i_fs = fs = ump->um_fs;
1505 ip->i_dev = dev;
1506 ip->i_number = ino;
1507 LIST_INIT(&ip->i_pcbufhd);
1508 #ifdef QUOTA
1509 ufsquota_init(ip);
1510 #endif
1511
1512 /*
1513 * Initialize genfs node, we might proceed to destroy it in
1514 * error branches.
1515 */
1516 genfs_node_init(vp, &ffs_genfsops);
1517
1518 /*
1519 * Put it onto its hash chain and lock it so that other requests for
1520 * this inode will block if they arrive while we are sleeping waiting
1521 * for old data structures to be purged or for the contents of the
1522 * disk portion of this inode to be read.
1523 */
1524
1525 ufs_ihashins(ip);
1526 mutex_exit(&ufs_hashlock);
1527
1528 /* Read in the disk contents for the inode, copy into the inode. */
1529 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1530 (int)fs->fs_bsize, NOCRED, &bp);
1531 if (error) {
1532
1533 /*
1534 * The inode does not contain anything useful, so it would
1535 * be misleading to leave it on its hash chain. With mode
1536 * still zero, it will be unlinked and returned to the free
1537 * list by vput().
1538 */
1539
1540 vput(vp);
1541 brelse(bp, 0);
1542 *vpp = NULL;
1543 return (error);
1544 }
1545 if (ip->i_ump->um_fstype == UFS1)
1546 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
1547 PR_WAITOK);
1548 else
1549 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
1550 PR_WAITOK);
1551 ffs_load_inode(bp, ip, fs, ino);
1552 if (DOINGSOFTDEP(vp))
1553 softdep_load_inodeblock(ip);
1554 else
1555 ip->i_ffs_effnlink = ip->i_nlink;
1556 brelse(bp, 0);
1557
1558 /*
1559 * Initialize the vnode from the inode, check for aliases.
1560 * Note that the underlying vnode may have changed.
1561 */
1562
1563 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1564
1565 /*
1566 * Finish inode initialization now that aliasing has been resolved.
1567 */
1568
1569 ip->i_devvp = ump->um_devvp;
1570 VREF(ip->i_devvp);
1571
1572 /*
1573 * Ensure that uid and gid are correct. This is a temporary
1574 * fix until fsck has been changed to do the update.
1575 */
1576
1577 if (fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */
1578 ip->i_uid = ip->i_ffs1_ouid; /* XXX */
1579 ip->i_gid = ip->i_ffs1_ogid; /* XXX */
1580 } /* XXX */
1581 uvm_vnp_setsize(vp, ip->i_size);
1582 *vpp = vp;
1583 return (0);
1584 }
1585
1586 /*
1587 * File handle to vnode
1588 *
1589 * Have to be really careful about stale file handles:
1590 * - check that the inode number is valid
1591 * - call ffs_vget() to get the locked inode
1592 * - check for an unallocated inode (i_mode == 0)
1593 * - check that the given client host has export rights and return
1594 * those rights via. exflagsp and credanonp
1595 */
1596 int
1597 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1598 {
1599 struct ufid ufh;
1600 struct fs *fs;
1601
1602 if (fhp->fid_len != sizeof(struct ufid))
1603 return EINVAL;
1604
1605 memcpy(&ufh, fhp, sizeof(ufh));
1606 fs = VFSTOUFS(mp)->um_fs;
1607 if (ufh.ufid_ino < ROOTINO ||
1608 ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1609 return (ESTALE);
1610 return (ufs_fhtovp(mp, &ufh, vpp));
1611 }
1612
1613 /*
1614 * Vnode pointer to File handle
1615 */
1616 /* ARGSUSED */
1617 int
1618 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
1619 {
1620 struct inode *ip;
1621 struct ufid ufh;
1622
1623 if (*fh_size < sizeof(struct ufid)) {
1624 *fh_size = sizeof(struct ufid);
1625 return E2BIG;
1626 }
1627 ip = VTOI(vp);
1628 *fh_size = sizeof(struct ufid);
1629 memset(&ufh, 0, sizeof(ufh));
1630 ufh.ufid_len = sizeof(struct ufid);
1631 ufh.ufid_ino = ip->i_number;
1632 ufh.ufid_gen = ip->i_gen;
1633 memcpy(fhp, &ufh, sizeof(ufh));
1634 return (0);
1635 }
1636
1637 void
1638 ffs_init(void)
1639 {
1640 if (ffs_initcount++ > 0)
1641 return;
1642
1643 ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
1644 "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
1645 ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
1646 "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
1647 ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
1648 "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
1649 softdep_initialize();
1650 ufs_init();
1651 }
1652
1653 void
1654 ffs_reinit(void)
1655 {
1656 softdep_reinitialize();
1657 ufs_reinit();
1658 }
1659
1660 void
1661 ffs_done(void)
1662 {
1663 if (--ffs_initcount > 0)
1664 return;
1665
1666 /* XXX softdep cleanup ? */
1667 ufs_done();
1668 pool_cache_destroy(ffs_dinode2_cache);
1669 pool_cache_destroy(ffs_dinode1_cache);
1670 pool_cache_destroy(ffs_inode_cache);
1671 }
1672
1673 SYSCTL_SETUP(sysctl_vfs_ffs_setup, "sysctl vfs.ffs subtree setup")
1674 {
1675 #if 0
1676 extern int doasyncfree;
1677 #endif
1678 extern int ffs_log_changeopt;
1679
1680 sysctl_createv(clog, 0, NULL, NULL,
1681 CTLFLAG_PERMANENT,
1682 CTLTYPE_NODE, "vfs", NULL,
1683 NULL, 0, NULL, 0,
1684 CTL_VFS, CTL_EOL);
1685 sysctl_createv(clog, 0, NULL, NULL,
1686 CTLFLAG_PERMANENT,
1687 CTLTYPE_NODE, "ffs",
1688 SYSCTL_DESCR("Berkeley Fast File System"),
1689 NULL, 0, NULL, 0,
1690 CTL_VFS, 1, CTL_EOL);
1691
1692 /*
1693 * @@@ should we even bother with these first three?
1694 */
1695 sysctl_createv(clog, 0, NULL, NULL,
1696 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1697 CTLTYPE_INT, "doclusterread", NULL,
1698 sysctl_notavail, 0, NULL, 0,
1699 CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
1700 sysctl_createv(clog, 0, NULL, NULL,
1701 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1702 CTLTYPE_INT, "doclusterwrite", NULL,
1703 sysctl_notavail, 0, NULL, 0,
1704 CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
1705 sysctl_createv(clog, 0, NULL, NULL,
1706 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1707 CTLTYPE_INT, "doreallocblks", NULL,
1708 sysctl_notavail, 0, NULL, 0,
1709 CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
1710 #if 0
1711 sysctl_createv(clog, 0, NULL, NULL,
1712 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1713 CTLTYPE_INT, "doasyncfree",
1714 SYSCTL_DESCR("Release dirty blocks asynchronously"),
1715 NULL, 0, &doasyncfree, 0,
1716 CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
1717 #endif
1718 sysctl_createv(clog, 0, NULL, NULL,
1719 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1720 CTLTYPE_INT, "log_changeopt",
1721 SYSCTL_DESCR("Log changes in optimization strategy"),
1722 NULL, 0, &ffs_log_changeopt, 0,
1723 CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
1724 }
1725
1726 /*
1727 * Write a superblock and associated information back to disk.
1728 */
1729 int
1730 ffs_sbupdate(struct ufsmount *mp, int waitfor)
1731 {
1732 struct fs *fs = mp->um_fs;
1733 struct buf *bp;
1734 int error = 0;
1735 u_int32_t saveflag;
1736
1737 bp = getblk(mp->um_devvp,
1738 fs->fs_sblockloc >> (fs->fs_fshift - fs->fs_fsbtodb),
1739 (int)fs->fs_sbsize, 0, 0);
1740 saveflag = fs->fs_flags & FS_INTERNAL;
1741 fs->fs_flags &= ~FS_INTERNAL;
1742
1743 memcpy(bp->b_data, fs, fs->fs_sbsize);
1744
1745 ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1746 #ifdef FFS_EI
1747 if (mp->um_flags & UFS_NEEDSWAP)
1748 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
1749 #endif
1750 fs->fs_flags |= saveflag;
1751
1752 if (waitfor == MNT_WAIT)
1753 error = bwrite(bp);
1754 else
1755 bawrite(bp);
1756 return (error);
1757 }
1758
1759 int
1760 ffs_cgupdate(struct ufsmount *mp, int waitfor)
1761 {
1762 struct fs *fs = mp->um_fs;
1763 struct buf *bp;
1764 int blks;
1765 void *space;
1766 int i, size, error = 0, allerror = 0;
1767
1768 allerror = ffs_sbupdate(mp, waitfor);
1769 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1770 space = fs->fs_csp;
1771 for (i = 0; i < blks; i += fs->fs_frag) {
1772 size = fs->fs_bsize;
1773 if (i + fs->fs_frag > blks)
1774 size = (blks - i) * fs->fs_fsize;
1775 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1776 size, 0, 0);
1777 #ifdef FFS_EI
1778 if (mp->um_flags & UFS_NEEDSWAP)
1779 ffs_csum_swap((struct csum*)space,
1780 (struct csum*)bp->b_data, size);
1781 else
1782 #endif
1783 memcpy(bp->b_data, space, (u_int)size);
1784 space = (char *)space + size;
1785 if (waitfor == MNT_WAIT)
1786 error = bwrite(bp);
1787 else
1788 bawrite(bp);
1789 }
1790 if (!allerror && error)
1791 allerror = error;
1792 return (allerror);
1793 }
1794
1795 int
1796 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
1797 int attrnamespace, const char *attrname)
1798 {
1799 #ifdef UFS_EXTATTR
1800 /*
1801 * File-backed extended attributes are only supported on UFS1.
1802 * UFS2 has native extended attributes.
1803 */
1804 if (VFSTOUFS(mp)->um_fstype == UFS1)
1805 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
1806 #endif
1807 return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
1808 }
1809
1810 int
1811 ffs_suspendctl(struct mount *mp, int cmd)
1812 {
1813 int error;
1814 struct lwp *l = curlwp;
1815
1816 switch (cmd) {
1817 case SUSPEND_SUSPEND:
1818 if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0)
1819 return error;
1820 error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred);
1821 if (error == 0)
1822 error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
1823 if (error != 0) {
1824 (void) fstrans_setstate(mp, FSTRANS_NORMAL);
1825 return error;
1826 }
1827 return 0;
1828
1829 case SUSPEND_RESUME:
1830 return fstrans_setstate(mp, FSTRANS_NORMAL);
1831
1832 default:
1833 return EINVAL;
1834 }
1835 }
1836