ffs_vfsops.c revision 1.218 1 /* $NetBSD: ffs_vfsops.c,v 1.218 2008/01/09 18:20:54 ad Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1991, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.218 2008/01/09 18:20:54 ad Exp $");
36
37 #if defined(_KERNEL_OPT)
38 #include "opt_ffs.h"
39 #include "opt_quota.h"
40 #include "opt_softdep.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/namei.h>
46 #include <sys/proc.h>
47 #include <sys/kernel.h>
48 #include <sys/vnode.h>
49 #include <sys/socket.h>
50 #include <sys/mount.h>
51 #include <sys/buf.h>
52 #include <sys/device.h>
53 #include <sys/mbuf.h>
54 #include <sys/file.h>
55 #include <sys/disklabel.h>
56 #include <sys/ioctl.h>
57 #include <sys/errno.h>
58 #include <sys/malloc.h>
59 #include <sys/pool.h>
60 #include <sys/lock.h>
61 #include <sys/sysctl.h>
62 #include <sys/conf.h>
63 #include <sys/kauth.h>
64 #include <sys/fstrans.h>
65
66 #include <miscfs/specfs/specdev.h>
67
68 #include <ufs/ufs/quota.h>
69 #include <ufs/ufs/ufsmount.h>
70 #include <ufs/ufs/inode.h>
71 #include <ufs/ufs/dir.h>
72 #include <ufs/ufs/ufs_extern.h>
73 #include <ufs/ufs/ufs_bswap.h>
74
75 #include <ufs/ffs/fs.h>
76 #include <ufs/ffs/ffs_extern.h>
77
78 /* how many times ffs_init() was called */
79 int ffs_initcount = 0;
80
81 extern kmutex_t ufs_hashlock;
82
83 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
84 extern const struct vnodeopv_desc ffs_specop_opv_desc;
85 extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
86
87 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
88 &ffs_vnodeop_opv_desc,
89 &ffs_specop_opv_desc,
90 &ffs_fifoop_opv_desc,
91 NULL,
92 };
93
94 struct vfsops ffs_vfsops = {
95 MOUNT_FFS,
96 sizeof (struct ufs_args),
97 ffs_mount,
98 ufs_start,
99 ffs_unmount,
100 ufs_root,
101 ufs_quotactl,
102 ffs_statvfs,
103 ffs_sync,
104 ffs_vget,
105 ffs_fhtovp,
106 ffs_vptofh,
107 ffs_init,
108 ffs_reinit,
109 ffs_done,
110 ffs_mountroot,
111 ffs_snapshot,
112 ffs_extattrctl,
113 ffs_suspendctl,
114 ffs_vnodeopv_descs,
115 0,
116 { NULL, NULL },
117 };
118 VFS_ATTACH(ffs_vfsops);
119
120 static const struct genfs_ops ffs_genfsops = {
121 .gop_size = ffs_gop_size,
122 .gop_alloc = ufs_gop_alloc,
123 .gop_write = genfs_gop_write,
124 .gop_markupdate = ufs_gop_markupdate,
125 };
126
127 static const struct ufs_ops ffs_ufsops = {
128 .uo_itimes = ffs_itimes,
129 .uo_update = ffs_update,
130 .uo_truncate = ffs_truncate,
131 .uo_valloc = ffs_valloc,
132 .uo_vfree = ffs_vfree,
133 .uo_balloc = ffs_balloc,
134 };
135
136 pool_cache_t ffs_inode_cache;
137 pool_cache_t ffs_dinode1_cache;
138 pool_cache_t ffs_dinode2_cache;
139
140 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
141 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
142
143 /*
144 * Called by main() when ffs is going to be mounted as root.
145 */
146
147 int
148 ffs_mountroot(void)
149 {
150 struct fs *fs;
151 struct mount *mp;
152 struct lwp *l = curlwp; /* XXX */
153 struct ufsmount *ump;
154 int error;
155
156 if (device_class(root_device) != DV_DISK)
157 return (ENODEV);
158
159 if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
160 vrele(rootvp);
161 return (error);
162 }
163 if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
164 mp->mnt_op->vfs_refcount--;
165 vfs_unbusy(mp);
166 vfs_destroy(mp);
167 return (error);
168 }
169 mutex_enter(&mountlist_lock);
170 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
171 mutex_exit(&mountlist_lock);
172 ump = VFSTOUFS(mp);
173 fs = ump->um_fs;
174 memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
175 (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
176 (void)ffs_statvfs(mp, &mp->mnt_stat);
177 vfs_unbusy(mp);
178 setrootfstime((time_t)fs->fs_time);
179 return (0);
180 }
181
182 /*
183 * VFS Operations.
184 *
185 * mount system call
186 */
187 int
188 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
189 {
190 struct lwp *l = curlwp;
191 struct nameidata nd;
192 struct vnode *vp, *devvp = NULL;
193 struct ufs_args *args = data;
194 struct ufsmount *ump = NULL;
195 struct fs *fs;
196 int error = 0, flags, update;
197 mode_t accessmode;
198
199 if (*data_len < sizeof *args)
200 return EINVAL;
201
202 if (mp->mnt_flag & MNT_GETARGS) {
203 ump = VFSTOUFS(mp);
204 if (ump == NULL)
205 return EIO;
206 args->fspec = NULL;
207 *data_len = sizeof *args;
208 return 0;
209 }
210
211 #if !defined(SOFTDEP)
212 mp->mnt_flag &= ~MNT_SOFTDEP;
213 #endif
214
215 update = mp->mnt_flag & MNT_UPDATE;
216
217 /* Check arguments */
218 if (args->fspec != NULL) {
219 /*
220 * Look up the name and verify that it's sane.
221 */
222 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, args->fspec);
223 if ((error = namei(&nd)) != 0)
224 return (error);
225 devvp = nd.ni_vp;
226
227 if (!update) {
228 /*
229 * Be sure this is a valid block device
230 */
231 if (devvp->v_type != VBLK)
232 error = ENOTBLK;
233 else if (bdevsw_lookup(devvp->v_rdev) == NULL)
234 error = ENXIO;
235 } else {
236 /*
237 * Be sure we're still naming the same device
238 * used for our initial mount
239 */
240 ump = VFSTOUFS(mp);
241 if (devvp != ump->um_devvp) {
242 if (devvp->v_rdev != ump->um_devvp->v_rdev)
243 error = EINVAL;
244 else {
245 vrele(devvp);
246 devvp = ump->um_devvp;
247 vref(devvp);
248 }
249 }
250 }
251 } else {
252 if (!update) {
253 /* New mounts must have a filename for the device */
254 return (EINVAL);
255 } else {
256 /* Use the extant mount */
257 ump = VFSTOUFS(mp);
258 devvp = ump->um_devvp;
259 vref(devvp);
260 }
261 }
262
263 /*
264 * Mark the device and any existing vnodes as involved in
265 * softdep processing.
266 */
267 if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
268 devvp->v_uflag |= VU_SOFTDEP;
269 mutex_enter(&mntvnode_lock);
270 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
271 if (vp->v_mount != mp || vismarker(vp))
272 continue;
273 vp->v_uflag |= VU_SOFTDEP;
274 }
275 mutex_exit(&mntvnode_lock);
276 }
277
278 /*
279 * If mount by non-root, then verify that user has necessary
280 * permissions on the device.
281 */
282 if (error == 0 && kauth_authorize_generic(l->l_cred,
283 KAUTH_GENERIC_ISSUSER, NULL) != 0) {
284 accessmode = VREAD;
285 if (update ?
286 (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
287 (mp->mnt_flag & MNT_RDONLY) == 0)
288 accessmode |= VWRITE;
289 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
290 error = VOP_ACCESS(devvp, accessmode, l->l_cred);
291 VOP_UNLOCK(devvp, 0);
292 }
293
294 if (error) {
295 vrele(devvp);
296 return (error);
297 }
298
299 if (!update) {
300 int xflags;
301
302 /*
303 * Disallow multiple mounts of the same device.
304 * Disallow mounting of a device that is currently in use
305 * (except for root, which might share swap device for
306 * miniroot).
307 */
308 error = vfs_mountedon(devvp);
309 if (error)
310 goto fail;
311 if (vcount(devvp) > 1 && devvp != rootvp) {
312 error = EBUSY;
313 goto fail;
314 }
315 if (mp->mnt_flag & MNT_RDONLY)
316 xflags = FREAD;
317 else
318 xflags = FREAD|FWRITE;
319 error = VOP_OPEN(devvp, xflags, FSCRED);
320 if (error)
321 goto fail;
322 error = ffs_mountfs(devvp, mp, l);
323 if (error) {
324 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
325 (void)VOP_CLOSE(devvp, xflags, NOCRED);
326 VOP_UNLOCK(devvp, 0);
327 goto fail;
328 }
329
330 ump = VFSTOUFS(mp);
331 fs = ump->um_fs;
332 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
333 (MNT_SOFTDEP | MNT_ASYNC)) {
334 printf("%s fs uses soft updates, "
335 "ignoring async mode\n",
336 fs->fs_fsmnt);
337 mp->mnt_flag &= ~MNT_ASYNC;
338 }
339 } else {
340 /*
341 * Update the mount.
342 */
343
344 /*
345 * The initial mount got a reference on this
346 * device, so drop the one obtained via
347 * namei(), above.
348 */
349 vrele(devvp);
350
351 ump = VFSTOUFS(mp);
352 fs = ump->um_fs;
353 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
354 /*
355 * Changing from r/w to r/o
356 */
357 flags = WRITECLOSE;
358 if (mp->mnt_flag & MNT_FORCE)
359 flags |= FORCECLOSE;
360 if (mp->mnt_flag & MNT_SOFTDEP)
361 error = softdep_flushfiles(mp, flags, l);
362 else
363 error = ffs_flushfiles(mp, flags, l);
364 if (fs->fs_pendingblocks != 0 ||
365 fs->fs_pendinginodes != 0) {
366 printf("%s: update error: blocks %" PRId64
367 " files %d\n",
368 fs->fs_fsmnt, fs->fs_pendingblocks,
369 fs->fs_pendinginodes);
370 fs->fs_pendingblocks = 0;
371 fs->fs_pendinginodes = 0;
372 }
373 if (error == 0 &&
374 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
375 fs->fs_clean & FS_WASCLEAN) {
376 if (mp->mnt_flag & MNT_SOFTDEP)
377 fs->fs_flags &= ~FS_DOSOFTDEP;
378 fs->fs_clean = FS_ISCLEAN;
379 (void) ffs_sbupdate(ump, MNT_WAIT);
380 }
381 if (error)
382 return (error);
383 fs->fs_ronly = 1;
384 fs->fs_fmod = 0;
385 }
386
387 /*
388 * Flush soft dependencies if disabling it via an update
389 * mount. This may leave some items to be processed,
390 * so don't do this yet XXX.
391 */
392 if ((fs->fs_flags & FS_DOSOFTDEP) &&
393 !(mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) {
394 #ifdef notyet
395 flags = WRITECLOSE;
396 if (mp->mnt_flag & MNT_FORCE)
397 flags |= FORCECLOSE;
398 error = softdep_flushfiles(mp, flags, l);
399 if (error == 0 && ffs_cgupdate(ump, MNT_WAIT) == 0)
400 fs->fs_flags &= ~FS_DOSOFTDEP;
401 (void) ffs_sbupdate(ump, MNT_WAIT);
402 #elif defined(SOFTDEP)
403 mp->mnt_flag |= MNT_SOFTDEP;
404 #endif
405 }
406
407 /*
408 * When upgrading to a softdep mount, we must first flush
409 * all vnodes. (not done yet -- see above)
410 */
411 if (!(fs->fs_flags & FS_DOSOFTDEP) &&
412 (mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) {
413 #ifdef notyet
414 flags = WRITECLOSE;
415 if (mp->mnt_flag & MNT_FORCE)
416 flags |= FORCECLOSE;
417 error = ffs_flushfiles(mp, flags, l);
418 #else
419 mp->mnt_flag &= ~MNT_SOFTDEP;
420 #endif
421 }
422
423 if (mp->mnt_flag & MNT_RELOAD) {
424 error = ffs_reload(mp, l->l_cred, l);
425 if (error)
426 return (error);
427 }
428
429 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
430 /*
431 * Changing from read-only to read/write
432 */
433 fs->fs_ronly = 0;
434 fs->fs_clean <<= 1;
435 fs->fs_fmod = 1;
436 if ((fs->fs_flags & FS_DOSOFTDEP)) {
437 error = softdep_mount(devvp, mp, fs,
438 l->l_cred);
439 if (error)
440 return (error);
441 }
442 if (fs->fs_snapinum[0] != 0)
443 ffs_snapshot_mount(mp);
444 }
445 if (args->fspec == NULL)
446 return EINVAL;
447 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
448 (MNT_SOFTDEP | MNT_ASYNC)) {
449 printf("%s fs uses soft updates, ignoring async mode\n",
450 fs->fs_fsmnt);
451 mp->mnt_flag &= ~MNT_ASYNC;
452 }
453 }
454
455 error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
456 UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
457 if (error == 0)
458 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
459 sizeof(fs->fs_fsmnt));
460 if (mp->mnt_flag & MNT_SOFTDEP)
461 fs->fs_flags |= FS_DOSOFTDEP;
462 else
463 fs->fs_flags &= ~FS_DOSOFTDEP;
464 if (fs->fs_fmod != 0) { /* XXX */
465 fs->fs_fmod = 0;
466 if (fs->fs_clean & FS_WASCLEAN)
467 fs->fs_time = time_second;
468 else {
469 printf("%s: file system not clean (fs_clean=%x); please fsck(8)\n",
470 mp->mnt_stat.f_mntfromname, fs->fs_clean);
471 printf("%s: lost blocks %" PRId64 " files %d\n",
472 mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
473 fs->fs_pendinginodes);
474 }
475 (void) ffs_cgupdate(ump, MNT_WAIT);
476 }
477 return (error);
478
479 fail:
480 vrele(devvp);
481 return (error);
482 }
483
484 /*
485 * Reload all incore data for a filesystem (used after running fsck on
486 * the root filesystem and finding things to fix). The filesystem must
487 * be mounted read-only.
488 *
489 * Things to do to update the mount:
490 * 1) invalidate all cached meta-data.
491 * 2) re-read superblock from disk.
492 * 3) re-read summary information from disk.
493 * 4) invalidate all inactive vnodes.
494 * 5) invalidate all cached file data.
495 * 6) re-read inode data for all active vnodes.
496 */
497 int
498 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
499 {
500 struct vnode *vp, *mvp, *devvp;
501 struct inode *ip;
502 void *space;
503 struct buf *bp;
504 struct fs *fs, *newfs;
505 struct partinfo dpart;
506 int i, blks, size, error;
507 int32_t *lp;
508 struct ufsmount *ump;
509 daddr_t sblockloc;
510
511 if ((mp->mnt_flag & MNT_RDONLY) == 0)
512 return (EINVAL);
513
514 ump = VFSTOUFS(mp);
515 /*
516 * Step 1: invalidate all cached meta-data.
517 */
518 devvp = ump->um_devvp;
519 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
520 error = vinvalbuf(devvp, 0, cred, l, 0, 0);
521 VOP_UNLOCK(devvp, 0);
522 if (error)
523 panic("ffs_reload: dirty1");
524 /*
525 * Step 2: re-read superblock from disk.
526 */
527 fs = ump->um_fs;
528 if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, NOCRED) != 0)
529 size = DEV_BSIZE;
530 else
531 size = dpart.disklab->d_secsize;
532 /* XXX we don't handle possibility that superblock moved. */
533 error = bread(devvp, fs->fs_sblockloc / size, fs->fs_sbsize,
534 NOCRED, &bp);
535 if (error) {
536 brelse(bp, 0);
537 return (error);
538 }
539 newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK);
540 memcpy(newfs, bp->b_data, fs->fs_sbsize);
541 #ifdef FFS_EI
542 if (ump->um_flags & UFS_NEEDSWAP) {
543 ffs_sb_swap((struct fs*)bp->b_data, newfs);
544 fs->fs_flags |= FS_SWAPPED;
545 } else
546 #endif
547 fs->fs_flags &= ~FS_SWAPPED;
548 if ((newfs->fs_magic != FS_UFS1_MAGIC &&
549 newfs->fs_magic != FS_UFS2_MAGIC)||
550 newfs->fs_bsize > MAXBSIZE ||
551 newfs->fs_bsize < sizeof(struct fs)) {
552 brelse(bp, 0);
553 free(newfs, M_UFSMNT);
554 return (EIO); /* XXX needs translation */
555 }
556 /* Store off old fs_sblockloc for fs_oldfscompat_read. */
557 sblockloc = fs->fs_sblockloc;
558 /*
559 * Copy pointer fields back into superblock before copying in XXX
560 * new superblock. These should really be in the ufsmount. XXX
561 * Note that important parameters (eg fs_ncg) are unchanged.
562 */
563 newfs->fs_csp = fs->fs_csp;
564 newfs->fs_maxcluster = fs->fs_maxcluster;
565 newfs->fs_contigdirs = fs->fs_contigdirs;
566 newfs->fs_ronly = fs->fs_ronly;
567 newfs->fs_active = fs->fs_active;
568 memcpy(fs, newfs, (u_int)fs->fs_sbsize);
569 brelse(bp, 0);
570 free(newfs, M_UFSMNT);
571
572 /* Recheck for apple UFS filesystem */
573 ump->um_flags &= ~UFS_ISAPPLEUFS;
574 /* First check to see if this is tagged as an Apple UFS filesystem
575 * in the disklabel
576 */
577 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
578 (dpart.part->p_fstype == FS_APPLEUFS)) {
579 ump->um_flags |= UFS_ISAPPLEUFS;
580 }
581 #ifdef APPLE_UFS
582 else {
583 /* Manually look for an apple ufs label, and if a valid one
584 * is found, then treat it like an Apple UFS filesystem anyway
585 */
586 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size),
587 APPLEUFS_LABEL_SIZE, cred, &bp);
588 if (error) {
589 brelse(bp, 0);
590 return (error);
591 }
592 error = ffs_appleufs_validate(fs->fs_fsmnt,
593 (struct appleufslabel *)bp->b_data,NULL);
594 if (error == 0)
595 ump->um_flags |= UFS_ISAPPLEUFS;
596 brelse(bp, 0);
597 bp = NULL;
598 }
599 #else
600 if (ump->um_flags & UFS_ISAPPLEUFS)
601 return (EIO);
602 #endif
603
604 if (UFS_MPISAPPLEUFS(ump)) {
605 /* see comment about NeXT below */
606 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
607 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
608 mp->mnt_iflag |= IMNT_DTYPE;
609 } else {
610 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
611 ump->um_dirblksiz = DIRBLKSIZ;
612 if (ump->um_maxsymlinklen > 0)
613 mp->mnt_iflag |= IMNT_DTYPE;
614 else
615 mp->mnt_iflag &= ~IMNT_DTYPE;
616 }
617 ffs_oldfscompat_read(fs, ump, sblockloc);
618 mutex_enter(&ump->um_lock);
619 ump->um_maxfilesize = fs->fs_maxfilesize;
620 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
621 fs->fs_pendingblocks = 0;
622 fs->fs_pendinginodes = 0;
623 }
624 mutex_exit(&ump->um_lock);
625
626 ffs_statvfs(mp, &mp->mnt_stat);
627 /*
628 * Step 3: re-read summary information from disk.
629 */
630 blks = howmany(fs->fs_cssize, fs->fs_fsize);
631 space = fs->fs_csp;
632 for (i = 0; i < blks; i += fs->fs_frag) {
633 size = fs->fs_bsize;
634 if (i + fs->fs_frag > blks)
635 size = (blks - i) * fs->fs_fsize;
636 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
637 NOCRED, &bp);
638 if (error) {
639 brelse(bp, 0);
640 return (error);
641 }
642 #ifdef FFS_EI
643 if (UFS_FSNEEDSWAP(fs))
644 ffs_csum_swap((struct csum *)bp->b_data,
645 (struct csum *)space, size);
646 else
647 #endif
648 memcpy(space, bp->b_data, (size_t)size);
649 space = (char *)space + size;
650 brelse(bp, 0);
651 }
652 if ((fs->fs_flags & FS_DOSOFTDEP))
653 softdep_mount(devvp, mp, fs, cred);
654 if (fs->fs_snapinum[0] != 0)
655 ffs_snapshot_mount(mp);
656 /*
657 * We no longer know anything about clusters per cylinder group.
658 */
659 if (fs->fs_contigsumsize > 0) {
660 lp = fs->fs_maxcluster;
661 for (i = 0; i < fs->fs_ncg; i++)
662 *lp++ = fs->fs_contigsumsize;
663 }
664
665 /* Allocate a marker vnode. */
666 if ((mvp = vnalloc(mp)) == NULL)
667 return ENOMEM;
668 /*
669 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
670 * and vclean() can be called indirectly
671 */
672 mutex_enter(&mntvnode_lock);
673 loop:
674 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
675 vmark(mvp, vp);
676 if (vp->v_mount != mp || vismarker(vp))
677 continue;
678 /*
679 * Step 4: invalidate all inactive vnodes.
680 */
681 if (vrecycle(vp, &mntvnode_lock, l)) {
682 mutex_enter(&mntvnode_lock);
683 (void)vunmark(mvp);
684 goto loop;
685 }
686 /*
687 * Step 5: invalidate all cached file data.
688 */
689 mutex_enter(&vp->v_interlock);
690 mutex_exit(&mntvnode_lock);
691 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
692 (void)vunmark(mvp);
693 goto loop;
694 }
695 if (vinvalbuf(vp, 0, cred, l, 0, 0))
696 panic("ffs_reload: dirty2");
697 /*
698 * Step 6: re-read inode data for all active vnodes.
699 */
700 ip = VTOI(vp);
701 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
702 (int)fs->fs_bsize, NOCRED, &bp);
703 if (error) {
704 brelse(bp, 0);
705 vput(vp);
706 (void)vunmark(mvp);
707 break;
708 }
709 ffs_load_inode(bp, ip, fs, ip->i_number);
710 ip->i_ffs_effnlink = ip->i_nlink;
711 brelse(bp, 0);
712 vput(vp);
713 mutex_enter(&mntvnode_lock);
714 }
715 mutex_exit(&mntvnode_lock);
716 vnfree(mvp);
717 return (error);
718 }
719
720 /*
721 * Possible superblock locations ordered from most to least likely.
722 */
723 static const int sblock_try[] = SBLOCKSEARCH;
724
725 /*
726 * Common code for mount and mountroot
727 */
728 int
729 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
730 {
731 struct ufsmount *ump;
732 struct buf *bp;
733 struct fs *fs;
734 dev_t dev;
735 struct partinfo dpart;
736 void *space;
737 daddr_t sblockloc, fsblockloc;
738 int blks, fstype;
739 int error, i, size, ronly, bset = 0;
740 #ifdef FFS_EI
741 int needswap = 0; /* keep gcc happy */
742 #endif
743 int32_t *lp;
744 kauth_cred_t cred;
745 u_int32_t sbsize = 8192; /* keep gcc happy*/
746
747 dev = devvp->v_rdev;
748 cred = l ? l->l_cred : NOCRED;
749
750 /* Flush out any old buffers remaining from a previous use. */
751 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
752 error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
753 VOP_UNLOCK(devvp, 0);
754 if (error)
755 return (error);
756
757 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
758 if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) != 0)
759 size = DEV_BSIZE;
760 else
761 size = dpart.disklab->d_secsize;
762
763 bp = NULL;
764 ump = NULL;
765 fs = NULL;
766 sblockloc = 0;
767 fstype = 0;
768
769 error = fstrans_mount(mp);
770 if (error)
771 return error;
772
773 /*
774 * Try reading the superblock in each of its possible locations.
775 */
776 for (i = 0; ; i++) {
777 if (bp != NULL) {
778 brelse(bp, BC_NOCACHE);
779 bp = NULL;
780 }
781 if (sblock_try[i] == -1) {
782 error = EINVAL;
783 fs = NULL;
784 goto out;
785 }
786 error = bread(devvp, sblock_try[i] / size, SBLOCKSIZE, cred,
787 &bp);
788 if (error) {
789 fs = NULL;
790 goto out;
791 }
792 fs = (struct fs*)bp->b_data;
793 fsblockloc = sblockloc = sblock_try[i];
794 if (fs->fs_magic == FS_UFS1_MAGIC) {
795 sbsize = fs->fs_sbsize;
796 fstype = UFS1;
797 #ifdef FFS_EI
798 needswap = 0;
799 } else if (fs->fs_magic == bswap32(FS_UFS1_MAGIC)) {
800 sbsize = bswap32(fs->fs_sbsize);
801 fstype = UFS1;
802 needswap = 1;
803 #endif
804 } else if (fs->fs_magic == FS_UFS2_MAGIC) {
805 sbsize = fs->fs_sbsize;
806 fstype = UFS2;
807 #ifdef FFS_EI
808 needswap = 0;
809 } else if (fs->fs_magic == bswap32(FS_UFS2_MAGIC)) {
810 sbsize = bswap32(fs->fs_sbsize);
811 fstype = UFS2;
812 needswap = 1;
813 #endif
814 } else
815 continue;
816
817
818 /* fs->fs_sblockloc isn't defined for old filesystems */
819 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
820 if (sblockloc == SBLOCK_UFS2)
821 /*
822 * This is likely to be the first alternate
823 * in a filesystem with 64k blocks.
824 * Don't use it.
825 */
826 continue;
827 fsblockloc = sblockloc;
828 } else {
829 fsblockloc = fs->fs_sblockloc;
830 #ifdef FFS_EI
831 if (needswap)
832 fsblockloc = bswap64(fsblockloc);
833 #endif
834 }
835
836 /* Check we haven't found an alternate superblock */
837 if (fsblockloc != sblockloc)
838 continue;
839
840 /* Validate size of superblock */
841 if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs))
842 continue;
843
844 /* Ok seems to be a good superblock */
845 break;
846 }
847
848 fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
849 memcpy(fs, bp->b_data, sbsize);
850
851 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
852 memset(ump, 0, sizeof *ump);
853 mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
854 ump->um_fs = fs;
855 ump->um_ops = &ffs_ufsops;
856
857 #ifdef FFS_EI
858 if (needswap) {
859 ffs_sb_swap((struct fs*)bp->b_data, fs);
860 fs->fs_flags |= FS_SWAPPED;
861 } else
862 #endif
863 fs->fs_flags &= ~FS_SWAPPED;
864
865 ffs_oldfscompat_read(fs, ump, sblockloc);
866 ump->um_maxfilesize = fs->fs_maxfilesize;
867
868 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
869 fs->fs_pendingblocks = 0;
870 fs->fs_pendinginodes = 0;
871 }
872
873 ump->um_fstype = fstype;
874 if (fs->fs_sbsize < SBLOCKSIZE)
875 brelse(bp, BC_INVAL);
876 else
877 brelse(bp, 0);
878 bp = NULL;
879
880 /* First check to see if this is tagged as an Apple UFS filesystem
881 * in the disklabel
882 */
883 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
884 (dpart.part->p_fstype == FS_APPLEUFS)) {
885 ump->um_flags |= UFS_ISAPPLEUFS;
886 }
887 #ifdef APPLE_UFS
888 else {
889 /* Manually look for an apple ufs label, and if a valid one
890 * is found, then treat it like an Apple UFS filesystem anyway
891 */
892 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size),
893 APPLEUFS_LABEL_SIZE, cred, &bp);
894 if (error)
895 goto out;
896 error = ffs_appleufs_validate(fs->fs_fsmnt,
897 (struct appleufslabel *)bp->b_data,NULL);
898 if (error == 0) {
899 ump->um_flags |= UFS_ISAPPLEUFS;
900 }
901 brelse(bp, 0);
902 bp = NULL;
903 }
904 #else
905 if (ump->um_flags & UFS_ISAPPLEUFS) {
906 error = EINVAL;
907 goto out;
908 }
909 #endif
910
911 /*
912 * verify that we can access the last block in the fs
913 * if we're mounting read/write.
914 */
915
916 if (!ronly) {
917 error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), fs->fs_fsize,
918 cred, &bp);
919 if (bp->b_bcount != fs->fs_fsize)
920 error = EINVAL;
921 if (error) {
922 bset = BC_INVAL;
923 goto out;
924 }
925 brelse(bp, BC_INVAL);
926 bp = NULL;
927 }
928
929 fs->fs_ronly = ronly;
930 if (ronly == 0) {
931 fs->fs_clean <<= 1;
932 fs->fs_fmod = 1;
933 }
934 size = fs->fs_cssize;
935 blks = howmany(size, fs->fs_fsize);
936 if (fs->fs_contigsumsize > 0)
937 size += fs->fs_ncg * sizeof(int32_t);
938 size += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
939 space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
940 fs->fs_csp = space;
941 for (i = 0; i < blks; i += fs->fs_frag) {
942 size = fs->fs_bsize;
943 if (i + fs->fs_frag > blks)
944 size = (blks - i) * fs->fs_fsize;
945 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
946 cred, &bp);
947 if (error) {
948 free(fs->fs_csp, M_UFSMNT);
949 goto out;
950 }
951 #ifdef FFS_EI
952 if (needswap)
953 ffs_csum_swap((struct csum *)bp->b_data,
954 (struct csum *)space, size);
955 else
956 #endif
957 memcpy(space, bp->b_data, (u_int)size);
958
959 space = (char *)space + size;
960 brelse(bp, 0);
961 bp = NULL;
962 }
963 if (fs->fs_contigsumsize > 0) {
964 fs->fs_maxcluster = lp = space;
965 for (i = 0; i < fs->fs_ncg; i++)
966 *lp++ = fs->fs_contigsumsize;
967 space = lp;
968 }
969 size = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
970 fs->fs_contigdirs = space;
971 space = (char *)space + size;
972 memset(fs->fs_contigdirs, 0, size);
973 /* Compatibility for old filesystems - XXX */
974 if (fs->fs_avgfilesize <= 0)
975 fs->fs_avgfilesize = AVFILESIZ;
976 if (fs->fs_avgfpdir <= 0)
977 fs->fs_avgfpdir = AFPDIR;
978 fs->fs_active = NULL;
979 mp->mnt_data = ump;
980 mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
981 mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
982 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
983 mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
984 if (UFS_MPISAPPLEUFS(ump)) {
985 /* NeXT used to keep short symlinks in the inode even
986 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen
987 * is probably -1, but we still need to be able to identify
988 * short symlinks.
989 */
990 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
991 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
992 mp->mnt_iflag |= IMNT_DTYPE;
993 } else {
994 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
995 ump->um_dirblksiz = DIRBLKSIZ;
996 if (ump->um_maxsymlinklen > 0)
997 mp->mnt_iflag |= IMNT_DTYPE;
998 else
999 mp->mnt_iflag &= ~IMNT_DTYPE;
1000 }
1001 mp->mnt_fs_bshift = fs->fs_bshift;
1002 mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */
1003 mp->mnt_flag |= MNT_LOCAL;
1004 mp->mnt_iflag |= IMNT_MPSAFE;
1005 #ifdef FFS_EI
1006 if (needswap)
1007 ump->um_flags |= UFS_NEEDSWAP;
1008 #endif
1009 ump->um_mountp = mp;
1010 ump->um_dev = dev;
1011 ump->um_devvp = devvp;
1012 ump->um_nindir = fs->fs_nindir;
1013 ump->um_lognindir = ffs(fs->fs_nindir) - 1;
1014 ump->um_bptrtodb = fs->fs_fsbtodb;
1015 ump->um_seqinc = fs->fs_frag;
1016 for (i = 0; i < MAXQUOTAS; i++)
1017 ump->um_quotas[i] = NULLVP;
1018 devvp->v_specmountpoint = mp;
1019 if (ronly == 0 && (fs->fs_flags & FS_DOSOFTDEP)) {
1020 error = softdep_mount(devvp, mp, fs, cred);
1021 if (error) {
1022 free(fs->fs_csp, M_UFSMNT);
1023 goto out;
1024 }
1025 }
1026 if (ronly == 0 && fs->fs_snapinum[0] != 0)
1027 ffs_snapshot_mount(mp);
1028 #ifdef UFS_EXTATTR
1029 /*
1030 * Initialize file-backed extended attributes on UFS1 file
1031 * systems.
1032 */
1033 if (ump->um_fstype == UFS1) {
1034 ufs_extattr_uepm_init(&ump->um_extattr);
1035 #ifdef UFS_EXTATTR_AUTOSTART
1036 /*
1037 * XXX Just ignore errors. Not clear that we should
1038 * XXX fail the mount in this case.
1039 */
1040 (void) ufs_extattr_autostart(mp, l);
1041 #endif
1042 }
1043 #endif /* UFS_EXTATTR */
1044 return (0);
1045 out:
1046 fstrans_unmount(mp);
1047 if (fs)
1048 free(fs, M_UFSMNT);
1049 devvp->v_specmountpoint = NULL;
1050 if (bp)
1051 brelse(bp, bset);
1052 if (ump) {
1053 if (ump->um_oldfscompat)
1054 free(ump->um_oldfscompat, M_UFSMNT);
1055 mutex_destroy(&ump->um_lock);
1056 free(ump, M_UFSMNT);
1057 mp->mnt_data = NULL;
1058 }
1059 return (error);
1060 }
1061
1062 /*
1063 * Sanity checks for loading old filesystem superblocks.
1064 * See ffs_oldfscompat_write below for unwound actions.
1065 *
1066 * XXX - Parts get retired eventually.
1067 * Unfortunately new bits get added.
1068 */
1069 static void
1070 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1071 {
1072 off_t maxfilesize;
1073 int32_t *extrasave;
1074
1075 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1076 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1077 return;
1078
1079 if (!ump->um_oldfscompat)
1080 ump->um_oldfscompat = malloc(512 + 3*sizeof(int32_t),
1081 M_UFSMNT, M_WAITOK);
1082
1083 memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1084 extrasave = ump->um_oldfscompat;
1085 extrasave += 512/sizeof(int32_t);
1086 extrasave[0] = fs->fs_old_npsect;
1087 extrasave[1] = fs->fs_old_interleave;
1088 extrasave[2] = fs->fs_old_trackskew;
1089
1090 /* These fields will be overwritten by their
1091 * original values in fs_oldfscompat_write, so it is harmless
1092 * to modify them here.
1093 */
1094 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1095 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1096 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1097 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1098
1099 fs->fs_maxbsize = fs->fs_bsize;
1100 fs->fs_time = fs->fs_old_time;
1101 fs->fs_size = fs->fs_old_size;
1102 fs->fs_dsize = fs->fs_old_dsize;
1103 fs->fs_csaddr = fs->fs_old_csaddr;
1104 fs->fs_sblockloc = sblockloc;
1105
1106 fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1107
1108 if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1109 fs->fs_old_nrpos = 8;
1110 fs->fs_old_npsect = fs->fs_old_nsect;
1111 fs->fs_old_interleave = 1;
1112 fs->fs_old_trackskew = 0;
1113 }
1114
1115 if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1116 fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
1117 fs->fs_qbmask = ~fs->fs_bmask;
1118 fs->fs_qfmask = ~fs->fs_fmask;
1119 }
1120
1121 maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1122 if (fs->fs_maxfilesize > maxfilesize)
1123 fs->fs_maxfilesize = maxfilesize;
1124
1125 /* Compatibility for old filesystems */
1126 if (fs->fs_avgfilesize <= 0)
1127 fs->fs_avgfilesize = AVFILESIZ;
1128 if (fs->fs_avgfpdir <= 0)
1129 fs->fs_avgfpdir = AFPDIR;
1130
1131 #if 0
1132 if (bigcgs) {
1133 fs->fs_save_cgsize = fs->fs_cgsize;
1134 fs->fs_cgsize = fs->fs_bsize;
1135 }
1136 #endif
1137 }
1138
1139 /*
1140 * Unwinding superblock updates for old filesystems.
1141 * See ffs_oldfscompat_read above for details.
1142 *
1143 * XXX - Parts get retired eventually.
1144 * Unfortunately new bits get added.
1145 */
1146 static void
1147 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1148 {
1149 int32_t *extrasave;
1150
1151 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1152 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1153 return;
1154
1155 fs->fs_old_time = fs->fs_time;
1156 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1157 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1158 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1159 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1160 fs->fs_old_flags = fs->fs_flags;
1161
1162 #if 0
1163 if (bigcgs) {
1164 fs->fs_cgsize = fs->fs_save_cgsize;
1165 }
1166 #endif
1167
1168 memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1169 extrasave = ump->um_oldfscompat;
1170 extrasave += 512/sizeof(int32_t);
1171 fs->fs_old_npsect = extrasave[0];
1172 fs->fs_old_interleave = extrasave[1];
1173 fs->fs_old_trackskew = extrasave[2];
1174
1175 }
1176
1177 /*
1178 * unmount system call
1179 */
1180 int
1181 ffs_unmount(struct mount *mp, int mntflags)
1182 {
1183 struct lwp *l = curlwp;
1184 struct ufsmount *ump = VFSTOUFS(mp);
1185 struct fs *fs = ump->um_fs;
1186 int error, flags, penderr;
1187
1188 penderr = 0;
1189 flags = 0;
1190 if (mntflags & MNT_FORCE)
1191 flags |= FORCECLOSE;
1192 #ifdef UFS_EXTATTR
1193 if (ump->um_fstype == UFS1) {
1194 error = ufs_extattr_stop(mp, l);
1195 if (error) {
1196 if (error != EOPNOTSUPP)
1197 printf("%s: ufs_extattr_stop returned %d\n",
1198 fs->fs_fsmnt, error);
1199 } else
1200 ufs_extattr_uepm_destroy(&ump->um_extattr);
1201 }
1202 #endif /* UFS_EXTATTR */
1203 if (mp->mnt_flag & MNT_SOFTDEP) {
1204 if ((error = softdep_flushfiles(mp, flags, l)) != 0)
1205 return (error);
1206 } else {
1207 if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1208 return (error);
1209 }
1210 mutex_enter(&ump->um_lock);
1211 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1212 printf("%s: unmount pending error: blocks %" PRId64
1213 " files %d\n",
1214 fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes);
1215 fs->fs_pendingblocks = 0;
1216 fs->fs_pendinginodes = 0;
1217 penderr = 1;
1218 }
1219 mutex_exit(&ump->um_lock);
1220 if (fs->fs_ronly == 0 &&
1221 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1222 fs->fs_clean & FS_WASCLEAN) {
1223 /*
1224 * XXXX don't mark fs clean in the case of softdep
1225 * pending block errors, until they are fixed.
1226 */
1227 if (penderr == 0) {
1228 if (mp->mnt_flag & MNT_SOFTDEP)
1229 fs->fs_flags &= ~FS_DOSOFTDEP;
1230 fs->fs_clean = FS_ISCLEAN;
1231 }
1232 fs->fs_fmod = 0;
1233 (void) ffs_sbupdate(ump, MNT_WAIT);
1234 }
1235 if (ump->um_devvp->v_type != VBAD)
1236 ump->um_devvp->v_specmountpoint = NULL;
1237 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1238 (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
1239 NOCRED);
1240 vput(ump->um_devvp);
1241 free(fs->fs_csp, M_UFSMNT);
1242 free(fs, M_UFSMNT);
1243 if (ump->um_oldfscompat != NULL)
1244 free(ump->um_oldfscompat, M_UFSMNT);
1245 softdep_unmount(mp);
1246 mutex_destroy(&ump->um_lock);
1247 free(ump, M_UFSMNT);
1248 mp->mnt_data = NULL;
1249 mp->mnt_flag &= ~MNT_LOCAL;
1250 fstrans_unmount(mp);
1251 return (0);
1252 }
1253
1254 /*
1255 * Flush out all the files in a filesystem.
1256 */
1257 int
1258 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1259 {
1260 extern int doforce;
1261 struct ufsmount *ump;
1262 int error;
1263
1264 if (!doforce)
1265 flags &= ~FORCECLOSE;
1266 ump = VFSTOUFS(mp);
1267 #ifdef QUOTA
1268 if (mp->mnt_flag & MNT_QUOTA) {
1269 int i;
1270 if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0)
1271 return (error);
1272 for (i = 0; i < MAXQUOTAS; i++) {
1273 if (ump->um_quotas[i] == NULLVP)
1274 continue;
1275 quotaoff(l, mp, i);
1276 }
1277 /*
1278 * Here we fall through to vflush again to ensure
1279 * that we have gotten rid of all the system vnodes.
1280 */
1281 }
1282 #endif
1283 if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1284 return (error);
1285 ffs_snapshot_unmount(mp);
1286 /*
1287 * Flush all the files.
1288 */
1289 error = vflush(mp, NULLVP, flags);
1290 if (error)
1291 return (error);
1292 /*
1293 * Flush filesystem metadata.
1294 */
1295 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1296 error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
1297 VOP_UNLOCK(ump->um_devvp, 0);
1298 return (error);
1299 }
1300
1301 /*
1302 * Get file system statistics.
1303 */
1304 int
1305 ffs_statvfs(struct mount *mp, struct statvfs *sbp)
1306 {
1307 struct ufsmount *ump;
1308 struct fs *fs;
1309
1310 ump = VFSTOUFS(mp);
1311 fs = ump->um_fs;
1312 mutex_enter(&ump->um_lock);
1313 sbp->f_bsize = fs->fs_bsize;
1314 sbp->f_frsize = fs->fs_fsize;
1315 sbp->f_iosize = fs->fs_bsize;
1316 sbp->f_blocks = fs->fs_dsize;
1317 sbp->f_bfree = blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1318 fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1319 sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1320 fs->fs_minfree) / (u_int64_t) 100;
1321 if (sbp->f_bfree > sbp->f_bresvd)
1322 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1323 else
1324 sbp->f_bavail = 0;
1325 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
1326 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1327 sbp->f_favail = sbp->f_ffree;
1328 sbp->f_fresvd = 0;
1329 mutex_exit(&ump->um_lock);
1330 copy_statvfs_info(sbp, mp);
1331
1332 return (0);
1333 }
1334
1335 /*
1336 * Go through the disk queues to initiate sandbagged IO;
1337 * go through the inodes to write those that have been modified;
1338 * initiate the writing of the super block if it has been modified.
1339 *
1340 * Note: we are always called with the filesystem marked `MPBUSY'.
1341 */
1342 int
1343 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
1344 {
1345 struct lwp *l = curlwp;
1346 struct vnode *vp, *mvp;
1347 struct inode *ip;
1348 struct ufsmount *ump = VFSTOUFS(mp);
1349 struct fs *fs;
1350 int error, count, allerror = 0;
1351
1352 fs = ump->um_fs;
1353 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
1354 printf("fs = %s\n", fs->fs_fsmnt);
1355 panic("update: rofs mod");
1356 }
1357
1358 /* Allocate a marker vnode. */
1359 if ((mvp = vnalloc(mp)) == NULL)
1360 return (ENOMEM);
1361
1362 fstrans_start(mp, FSTRANS_SHARED);
1363 /*
1364 * Write back each (modified) inode.
1365 */
1366 mutex_enter(&mntvnode_lock);
1367 loop:
1368 /*
1369 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1370 * and vclean() can be called indirectly
1371 */
1372 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
1373 vmark(mvp, vp);
1374 /*
1375 * If the vnode that we are about to sync is no longer
1376 * associated with this mount point, start over.
1377 */
1378 if (vp->v_mount != mp || vismarker(vp))
1379 continue;
1380 mutex_enter(&vp->v_interlock);
1381 ip = VTOI(vp);
1382 if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 ||
1383 vp->v_type == VNON || ((ip->i_flag &
1384 (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
1385 LIST_EMPTY(&vp->v_dirtyblkhd) &&
1386 UVM_OBJ_IS_CLEAN(&vp->v_uobj)))
1387 {
1388 mutex_exit(&vp->v_interlock);
1389 continue;
1390 }
1391 if (vp->v_type == VBLK &&
1392 fstrans_getstate(mp) == FSTRANS_SUSPENDING) {
1393 mutex_exit(&vp->v_interlock);
1394 continue;
1395 }
1396 mutex_exit(&mntvnode_lock);
1397 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
1398 if (error) {
1399 mutex_enter(&mntvnode_lock);
1400 if (error == ENOENT) {
1401 (void)vunmark(mvp);
1402 goto loop;
1403 }
1404 continue;
1405 }
1406 if (vp->v_type == VREG && waitfor == MNT_LAZY)
1407 error = ffs_update(vp, NULL, NULL, 0);
1408 else
1409 error = VOP_FSYNC(vp, cred,
1410 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0);
1411 if (error)
1412 allerror = error;
1413 vput(vp);
1414 mutex_enter(&mntvnode_lock);
1415 }
1416 mutex_exit(&mntvnode_lock);
1417 /*
1418 * Force stale file system control information to be flushed.
1419 */
1420 if (waitfor == MNT_WAIT && (ump->um_mountp->mnt_flag & MNT_SOFTDEP)) {
1421 if ((error = softdep_flushworklist(ump->um_mountp, &count, l)))
1422 allerror = error;
1423 /* Flushed work items may create new vnodes to clean */
1424 if (allerror == 0 && count) {
1425 mutex_enter(&mntvnode_lock);
1426 goto loop;
1427 }
1428 }
1429 if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1430 !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1431 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1432 if ((error = VOP_FSYNC(ump->um_devvp, cred,
1433 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0)
1434 allerror = error;
1435 VOP_UNLOCK(ump->um_devvp, 0);
1436 if (allerror == 0 && waitfor == MNT_WAIT) {
1437 mutex_enter(&mntvnode_lock);
1438 goto loop;
1439 }
1440 }
1441 #ifdef QUOTA
1442 qsync(mp);
1443 #endif
1444 /*
1445 * Write back modified superblock.
1446 */
1447 if (fs->fs_fmod != 0) {
1448 fs->fs_fmod = 0;
1449 fs->fs_time = time_second;
1450 if ((error = ffs_cgupdate(ump, waitfor)))
1451 allerror = error;
1452 }
1453 fstrans_done(mp);
1454 vnfree(mvp);
1455 return (allerror);
1456 }
1457
1458 /*
1459 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1460 * in from disk. If it is in core, wait for the lock bit to clear, then
1461 * return the inode locked. Detection and handling of mount points must be
1462 * done by the calling routine.
1463 */
1464 int
1465 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1466 {
1467 struct fs *fs;
1468 struct inode *ip;
1469 struct ufsmount *ump;
1470 struct buf *bp;
1471 struct vnode *vp;
1472 dev_t dev;
1473 int error;
1474
1475 ump = VFSTOUFS(mp);
1476 dev = ump->um_dev;
1477
1478 retry:
1479 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1480 return (0);
1481
1482 /* Allocate a new vnode/inode. */
1483 if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) {
1484 *vpp = NULL;
1485 return (error);
1486 }
1487 ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
1488
1489 /*
1490 * If someone beat us to it, put back the freshly allocated
1491 * vnode/inode pair and retry.
1492 */
1493 mutex_enter(&ufs_hashlock);
1494 if (ufs_ihashget(dev, ino, 0) != NULL) {
1495 mutex_exit(&ufs_hashlock);
1496 ungetnewvnode(vp);
1497 pool_cache_put(ffs_inode_cache, ip);
1498 goto retry;
1499 }
1500
1501 vp->v_vflag |= VV_LOCKSWORK;
1502 if ((mp->mnt_flag & MNT_SOFTDEP) != 0)
1503 vp->v_uflag |= VU_SOFTDEP;
1504
1505 /*
1506 * XXX MFS ends up here, too, to allocate an inode. Should we
1507 * XXX create another pool for MFS inodes?
1508 */
1509
1510 memset(ip, 0, sizeof(struct inode));
1511 vp->v_data = ip;
1512 ip->i_vnode = vp;
1513 ip->i_ump = ump;
1514 ip->i_fs = fs = ump->um_fs;
1515 ip->i_dev = dev;
1516 ip->i_number = ino;
1517 LIST_INIT(&ip->i_pcbufhd);
1518 #ifdef QUOTA
1519 ufsquota_init(ip);
1520 #endif
1521
1522 /*
1523 * Initialize genfs node, we might proceed to destroy it in
1524 * error branches.
1525 */
1526 genfs_node_init(vp, &ffs_genfsops);
1527
1528 /*
1529 * Put it onto its hash chain and lock it so that other requests for
1530 * this inode will block if they arrive while we are sleeping waiting
1531 * for old data structures to be purged or for the contents of the
1532 * disk portion of this inode to be read.
1533 */
1534
1535 ufs_ihashins(ip);
1536 mutex_exit(&ufs_hashlock);
1537
1538 /* Read in the disk contents for the inode, copy into the inode. */
1539 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1540 (int)fs->fs_bsize, NOCRED, &bp);
1541 if (error) {
1542
1543 /*
1544 * The inode does not contain anything useful, so it would
1545 * be misleading to leave it on its hash chain. With mode
1546 * still zero, it will be unlinked and returned to the free
1547 * list by vput().
1548 */
1549
1550 vput(vp);
1551 brelse(bp, 0);
1552 *vpp = NULL;
1553 return (error);
1554 }
1555 if (ip->i_ump->um_fstype == UFS1)
1556 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
1557 PR_WAITOK);
1558 else
1559 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
1560 PR_WAITOK);
1561 ffs_load_inode(bp, ip, fs, ino);
1562 if (DOINGSOFTDEP(vp))
1563 softdep_load_inodeblock(ip);
1564 else
1565 ip->i_ffs_effnlink = ip->i_nlink;
1566 brelse(bp, 0);
1567
1568 /*
1569 * Initialize the vnode from the inode, check for aliases.
1570 * Note that the underlying vnode may have changed.
1571 */
1572
1573 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1574
1575 /*
1576 * Finish inode initialization now that aliasing has been resolved.
1577 */
1578
1579 ip->i_devvp = ump->um_devvp;
1580 VREF(ip->i_devvp);
1581
1582 /*
1583 * Ensure that uid and gid are correct. This is a temporary
1584 * fix until fsck has been changed to do the update.
1585 */
1586
1587 if (fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */
1588 ip->i_uid = ip->i_ffs1_ouid; /* XXX */
1589 ip->i_gid = ip->i_ffs1_ogid; /* XXX */
1590 } /* XXX */
1591 uvm_vnp_setsize(vp, ip->i_size);
1592 *vpp = vp;
1593 return (0);
1594 }
1595
1596 /*
1597 * File handle to vnode
1598 *
1599 * Have to be really careful about stale file handles:
1600 * - check that the inode number is valid
1601 * - call ffs_vget() to get the locked inode
1602 * - check for an unallocated inode (i_mode == 0)
1603 * - check that the given client host has export rights and return
1604 * those rights via. exflagsp and credanonp
1605 */
1606 int
1607 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1608 {
1609 struct ufid ufh;
1610 struct fs *fs;
1611
1612 if (fhp->fid_len != sizeof(struct ufid))
1613 return EINVAL;
1614
1615 memcpy(&ufh, fhp, sizeof(ufh));
1616 fs = VFSTOUFS(mp)->um_fs;
1617 if (ufh.ufid_ino < ROOTINO ||
1618 ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1619 return (ESTALE);
1620 return (ufs_fhtovp(mp, &ufh, vpp));
1621 }
1622
1623 /*
1624 * Vnode pointer to File handle
1625 */
1626 /* ARGSUSED */
1627 int
1628 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
1629 {
1630 struct inode *ip;
1631 struct ufid ufh;
1632
1633 if (*fh_size < sizeof(struct ufid)) {
1634 *fh_size = sizeof(struct ufid);
1635 return E2BIG;
1636 }
1637 ip = VTOI(vp);
1638 *fh_size = sizeof(struct ufid);
1639 memset(&ufh, 0, sizeof(ufh));
1640 ufh.ufid_len = sizeof(struct ufid);
1641 ufh.ufid_ino = ip->i_number;
1642 ufh.ufid_gen = ip->i_gen;
1643 memcpy(fhp, &ufh, sizeof(ufh));
1644 return (0);
1645 }
1646
1647 void
1648 ffs_init(void)
1649 {
1650 if (ffs_initcount++ > 0)
1651 return;
1652
1653 ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
1654 "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
1655 ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
1656 "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
1657 ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
1658 "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
1659 softdep_initialize();
1660 ffs_snapshot_init();
1661 ufs_init();
1662 }
1663
1664 void
1665 ffs_reinit(void)
1666 {
1667 softdep_reinitialize();
1668 ufs_reinit();
1669 }
1670
1671 void
1672 ffs_done(void)
1673 {
1674 if (--ffs_initcount > 0)
1675 return;
1676
1677 /* XXX softdep cleanup ? */
1678 ffs_snapshot_fini();
1679 ufs_done();
1680 pool_cache_destroy(ffs_dinode2_cache);
1681 pool_cache_destroy(ffs_dinode1_cache);
1682 pool_cache_destroy(ffs_inode_cache);
1683 }
1684
1685 SYSCTL_SETUP(sysctl_vfs_ffs_setup, "sysctl vfs.ffs subtree setup")
1686 {
1687 #if 0
1688 extern int doasyncfree;
1689 #endif
1690 extern int ffs_log_changeopt;
1691
1692 sysctl_createv(clog, 0, NULL, NULL,
1693 CTLFLAG_PERMANENT,
1694 CTLTYPE_NODE, "vfs", NULL,
1695 NULL, 0, NULL, 0,
1696 CTL_VFS, CTL_EOL);
1697 sysctl_createv(clog, 0, NULL, NULL,
1698 CTLFLAG_PERMANENT,
1699 CTLTYPE_NODE, "ffs",
1700 SYSCTL_DESCR("Berkeley Fast File System"),
1701 NULL, 0, NULL, 0,
1702 CTL_VFS, 1, CTL_EOL);
1703
1704 /*
1705 * @@@ should we even bother with these first three?
1706 */
1707 sysctl_createv(clog, 0, NULL, NULL,
1708 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1709 CTLTYPE_INT, "doclusterread", NULL,
1710 sysctl_notavail, 0, NULL, 0,
1711 CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
1712 sysctl_createv(clog, 0, NULL, NULL,
1713 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1714 CTLTYPE_INT, "doclusterwrite", NULL,
1715 sysctl_notavail, 0, NULL, 0,
1716 CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
1717 sysctl_createv(clog, 0, NULL, NULL,
1718 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1719 CTLTYPE_INT, "doreallocblks", NULL,
1720 sysctl_notavail, 0, NULL, 0,
1721 CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
1722 #if 0
1723 sysctl_createv(clog, 0, NULL, NULL,
1724 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1725 CTLTYPE_INT, "doasyncfree",
1726 SYSCTL_DESCR("Release dirty blocks asynchronously"),
1727 NULL, 0, &doasyncfree, 0,
1728 CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
1729 #endif
1730 sysctl_createv(clog, 0, NULL, NULL,
1731 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1732 CTLTYPE_INT, "log_changeopt",
1733 SYSCTL_DESCR("Log changes in optimization strategy"),
1734 NULL, 0, &ffs_log_changeopt, 0,
1735 CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
1736 }
1737
1738 /*
1739 * Write a superblock and associated information back to disk.
1740 */
1741 int
1742 ffs_sbupdate(struct ufsmount *mp, int waitfor)
1743 {
1744 struct fs *fs = mp->um_fs;
1745 struct buf *bp;
1746 int error = 0;
1747 u_int32_t saveflag;
1748
1749 bp = getblk(mp->um_devvp,
1750 fs->fs_sblockloc >> (fs->fs_fshift - fs->fs_fsbtodb),
1751 (int)fs->fs_sbsize, 0, 0);
1752 saveflag = fs->fs_flags & FS_INTERNAL;
1753 fs->fs_flags &= ~FS_INTERNAL;
1754
1755 memcpy(bp->b_data, fs, fs->fs_sbsize);
1756
1757 ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1758 #ifdef FFS_EI
1759 if (mp->um_flags & UFS_NEEDSWAP)
1760 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
1761 #endif
1762 fs->fs_flags |= saveflag;
1763
1764 if (waitfor == MNT_WAIT)
1765 error = bwrite(bp);
1766 else
1767 bawrite(bp);
1768 return (error);
1769 }
1770
1771 int
1772 ffs_cgupdate(struct ufsmount *mp, int waitfor)
1773 {
1774 struct fs *fs = mp->um_fs;
1775 struct buf *bp;
1776 int blks;
1777 void *space;
1778 int i, size, error = 0, allerror = 0;
1779
1780 allerror = ffs_sbupdate(mp, waitfor);
1781 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1782 space = fs->fs_csp;
1783 for (i = 0; i < blks; i += fs->fs_frag) {
1784 size = fs->fs_bsize;
1785 if (i + fs->fs_frag > blks)
1786 size = (blks - i) * fs->fs_fsize;
1787 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1788 size, 0, 0);
1789 #ifdef FFS_EI
1790 if (mp->um_flags & UFS_NEEDSWAP)
1791 ffs_csum_swap((struct csum*)space,
1792 (struct csum*)bp->b_data, size);
1793 else
1794 #endif
1795 memcpy(bp->b_data, space, (u_int)size);
1796 space = (char *)space + size;
1797 if (waitfor == MNT_WAIT)
1798 error = bwrite(bp);
1799 else
1800 bawrite(bp);
1801 }
1802 if (!allerror && error)
1803 allerror = error;
1804 return (allerror);
1805 }
1806
1807 int
1808 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
1809 int attrnamespace, const char *attrname)
1810 {
1811 #ifdef UFS_EXTATTR
1812 /*
1813 * File-backed extended attributes are only supported on UFS1.
1814 * UFS2 has native extended attributes.
1815 */
1816 if (VFSTOUFS(mp)->um_fstype == UFS1)
1817 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
1818 #endif
1819 return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
1820 }
1821
1822 int
1823 ffs_suspendctl(struct mount *mp, int cmd)
1824 {
1825 int error;
1826 struct lwp *l = curlwp;
1827
1828 switch (cmd) {
1829 case SUSPEND_SUSPEND:
1830 if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0)
1831 return error;
1832 error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred);
1833 if (error == 0)
1834 error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
1835 if (error != 0) {
1836 (void) fstrans_setstate(mp, FSTRANS_NORMAL);
1837 return error;
1838 }
1839 return 0;
1840
1841 case SUSPEND_RESUME:
1842 return fstrans_setstate(mp, FSTRANS_NORMAL);
1843
1844 default:
1845 return EINVAL;
1846 }
1847 }
1848