ffs_vfsops.c revision 1.228 1 /* $NetBSD: ffs_vfsops.c,v 1.228 2008/05/16 09:22:00 hannken Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1991, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.228 2008/05/16 09:22:00 hannken Exp $");
36
37 #if defined(_KERNEL_OPT)
38 #include "opt_ffs.h"
39 #include "opt_quota.h"
40 #include "opt_softdep.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/namei.h>
46 #include <sys/proc.h>
47 #include <sys/kernel.h>
48 #include <sys/vnode.h>
49 #include <sys/socket.h>
50 #include <sys/mount.h>
51 #include <sys/buf.h>
52 #include <sys/device.h>
53 #include <sys/mbuf.h>
54 #include <sys/file.h>
55 #include <sys/disklabel.h>
56 #include <sys/ioctl.h>
57 #include <sys/errno.h>
58 #include <sys/malloc.h>
59 #include <sys/pool.h>
60 #include <sys/lock.h>
61 #include <sys/sysctl.h>
62 #include <sys/conf.h>
63 #include <sys/kauth.h>
64 #include <sys/fstrans.h>
65 #include <sys/module.h>
66
67 #include <miscfs/genfs/genfs.h>
68 #include <miscfs/specfs/specdev.h>
69
70 #include <ufs/ufs/quota.h>
71 #include <ufs/ufs/ufsmount.h>
72 #include <ufs/ufs/inode.h>
73 #include <ufs/ufs/dir.h>
74 #include <ufs/ufs/ufs_extern.h>
75 #include <ufs/ufs/ufs_bswap.h>
76
77 #include <ufs/ffs/fs.h>
78 #include <ufs/ffs/ffs_extern.h>
79
80 MODULE(MODULE_CLASS_VFS, ffs, NULL);
81
82 /* how many times ffs_init() was called */
83 int ffs_initcount = 0;
84
85 extern kmutex_t ufs_hashlock;
86
87 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
88 extern const struct vnodeopv_desc ffs_specop_opv_desc;
89 extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
90
91 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
92 &ffs_vnodeop_opv_desc,
93 &ffs_specop_opv_desc,
94 &ffs_fifoop_opv_desc,
95 NULL,
96 };
97
98 struct vfsops ffs_vfsops = {
99 MOUNT_FFS,
100 sizeof (struct ufs_args),
101 ffs_mount,
102 ufs_start,
103 ffs_unmount,
104 ufs_root,
105 ufs_quotactl,
106 ffs_statvfs,
107 ffs_sync,
108 ffs_vget,
109 ffs_fhtovp,
110 ffs_vptofh,
111 ffs_init,
112 ffs_reinit,
113 ffs_done,
114 ffs_mountroot,
115 ffs_snapshot,
116 ffs_extattrctl,
117 ffs_suspendctl,
118 genfs_renamelock_enter,
119 genfs_renamelock_exit,
120 ffs_full_fsync,
121 ffs_vnodeopv_descs,
122 0,
123 { NULL, NULL },
124 };
125
126 static const struct genfs_ops ffs_genfsops = {
127 .gop_size = ffs_gop_size,
128 .gop_alloc = ufs_gop_alloc,
129 .gop_write = genfs_gop_write,
130 .gop_markupdate = ufs_gop_markupdate,
131 };
132
133 static const struct ufs_ops ffs_ufsops = {
134 .uo_itimes = ffs_itimes,
135 .uo_update = ffs_update,
136 .uo_truncate = ffs_truncate,
137 .uo_valloc = ffs_valloc,
138 .uo_vfree = ffs_vfree,
139 .uo_balloc = ffs_balloc,
140 };
141
142 static int
143 ffs_modcmd(modcmd_t cmd, void *arg)
144 {
145
146 switch (cmd) {
147 case MODULE_CMD_INIT:
148 return vfs_attach(&ffs_vfsops);
149 case MODULE_CMD_FINI:
150 return vfs_detach(&ffs_vfsops);
151 default:
152 return ENOTTY;
153 }
154 }
155
156 pool_cache_t ffs_inode_cache;
157 pool_cache_t ffs_dinode1_cache;
158 pool_cache_t ffs_dinode2_cache;
159
160 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
161 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
162
163 /*
164 * Called by main() when ffs is going to be mounted as root.
165 */
166
167 int
168 ffs_mountroot(void)
169 {
170 struct fs *fs;
171 struct mount *mp;
172 struct lwp *l = curlwp; /* XXX */
173 struct ufsmount *ump;
174 int error;
175
176 if (device_class(root_device) != DV_DISK)
177 return (ENODEV);
178
179 if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
180 vrele(rootvp);
181 return (error);
182 }
183 if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
184 vfs_unbusy(mp, false, NULL);
185 vfs_destroy(mp);
186 return (error);
187 }
188 mutex_enter(&mountlist_lock);
189 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
190 mutex_exit(&mountlist_lock);
191 ump = VFSTOUFS(mp);
192 fs = ump->um_fs;
193 memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
194 (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
195 (void)ffs_statvfs(mp, &mp->mnt_stat);
196 vfs_unbusy(mp, false, NULL);
197 setrootfstime((time_t)fs->fs_time);
198 return (0);
199 }
200
201 /*
202 * VFS Operations.
203 *
204 * mount system call
205 */
206 int
207 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
208 {
209 struct lwp *l = curlwp;
210 struct nameidata nd;
211 struct vnode *vp, *devvp = NULL;
212 struct ufs_args *args = data;
213 struct ufsmount *ump = NULL;
214 struct fs *fs;
215 int error = 0, flags, update;
216 mode_t accessmode;
217
218 if (*data_len < sizeof *args)
219 return EINVAL;
220
221 if (mp->mnt_flag & MNT_GETARGS) {
222 ump = VFSTOUFS(mp);
223 if (ump == NULL)
224 return EIO;
225 args->fspec = NULL;
226 *data_len = sizeof *args;
227 return 0;
228 }
229
230 #if !defined(SOFTDEP)
231 mp->mnt_flag &= ~MNT_SOFTDEP;
232 #endif
233
234 update = mp->mnt_flag & MNT_UPDATE;
235
236 /* Check arguments */
237 if (args->fspec != NULL) {
238 /*
239 * Look up the name and verify that it's sane.
240 */
241 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, args->fspec);
242 if ((error = namei(&nd)) != 0)
243 return (error);
244 devvp = nd.ni_vp;
245
246 if (!update) {
247 /*
248 * Be sure this is a valid block device
249 */
250 if (devvp->v_type != VBLK)
251 error = ENOTBLK;
252 else if (bdevsw_lookup(devvp->v_rdev) == NULL)
253 error = ENXIO;
254 } else {
255 /*
256 * Be sure we're still naming the same device
257 * used for our initial mount
258 */
259 ump = VFSTOUFS(mp);
260 if (devvp != ump->um_devvp) {
261 if (devvp->v_rdev != ump->um_devvp->v_rdev)
262 error = EINVAL;
263 else {
264 vrele(devvp);
265 devvp = ump->um_devvp;
266 vref(devvp);
267 }
268 }
269 }
270 } else {
271 if (!update) {
272 /* New mounts must have a filename for the device */
273 return (EINVAL);
274 } else {
275 /* Use the extant mount */
276 ump = VFSTOUFS(mp);
277 devvp = ump->um_devvp;
278 vref(devvp);
279 }
280 }
281
282 /*
283 * Mark the device and any existing vnodes as involved in
284 * softdep processing.
285 */
286 if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
287 devvp->v_uflag |= VU_SOFTDEP;
288 mutex_enter(&mntvnode_lock);
289 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
290 if (vp->v_mount != mp || vismarker(vp))
291 continue;
292 vp->v_uflag |= VU_SOFTDEP;
293 }
294 mutex_exit(&mntvnode_lock);
295 }
296
297 /*
298 * If mount by non-root, then verify that user has necessary
299 * permissions on the device.
300 */
301 if (error == 0 && kauth_authorize_generic(l->l_cred,
302 KAUTH_GENERIC_ISSUSER, NULL) != 0) {
303 accessmode = VREAD;
304 if (update ?
305 (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
306 (mp->mnt_flag & MNT_RDONLY) == 0)
307 accessmode |= VWRITE;
308 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
309 error = VOP_ACCESS(devvp, accessmode, l->l_cred);
310 VOP_UNLOCK(devvp, 0);
311 }
312
313 if (error) {
314 vrele(devvp);
315 return (error);
316 }
317
318 if (!update) {
319 int xflags;
320
321 if (mp->mnt_flag & MNT_RDONLY)
322 xflags = FREAD;
323 else
324 xflags = FREAD|FWRITE;
325 error = VOP_OPEN(devvp, xflags, FSCRED);
326 if (error)
327 goto fail;
328 error = ffs_mountfs(devvp, mp, l);
329 if (error) {
330 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
331 (void)VOP_CLOSE(devvp, xflags, NOCRED);
332 VOP_UNLOCK(devvp, 0);
333 goto fail;
334 }
335
336 ump = VFSTOUFS(mp);
337 fs = ump->um_fs;
338 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
339 (MNT_SOFTDEP | MNT_ASYNC)) {
340 printf("%s fs uses soft updates, "
341 "ignoring async mode\n",
342 fs->fs_fsmnt);
343 mp->mnt_flag &= ~MNT_ASYNC;
344 }
345 } else {
346 /*
347 * Update the mount.
348 */
349
350 /*
351 * The initial mount got a reference on this
352 * device, so drop the one obtained via
353 * namei(), above.
354 */
355 vrele(devvp);
356
357 ump = VFSTOUFS(mp);
358 fs = ump->um_fs;
359 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
360 /*
361 * Changing from r/w to r/o
362 */
363 flags = WRITECLOSE;
364 if (mp->mnt_flag & MNT_FORCE)
365 flags |= FORCECLOSE;
366 if (mp->mnt_flag & MNT_SOFTDEP)
367 error = softdep_flushfiles(mp, flags, l);
368 else
369 error = ffs_flushfiles(mp, flags, l);
370 if (fs->fs_pendingblocks != 0 ||
371 fs->fs_pendinginodes != 0) {
372 printf("%s: update error: blocks %" PRId64
373 " files %d\n",
374 fs->fs_fsmnt, fs->fs_pendingblocks,
375 fs->fs_pendinginodes);
376 fs->fs_pendingblocks = 0;
377 fs->fs_pendinginodes = 0;
378 }
379 if (error == 0 &&
380 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
381 fs->fs_clean & FS_WASCLEAN) {
382 if (mp->mnt_flag & MNT_SOFTDEP)
383 fs->fs_flags &= ~FS_DOSOFTDEP;
384 fs->fs_clean = FS_ISCLEAN;
385 (void) ffs_sbupdate(ump, MNT_WAIT);
386 }
387 if (error)
388 return (error);
389 fs->fs_ronly = 1;
390 fs->fs_fmod = 0;
391 }
392
393 /*
394 * Flush soft dependencies if disabling it via an update
395 * mount. This may leave some items to be processed,
396 * so don't do this yet XXX.
397 */
398 if ((fs->fs_flags & FS_DOSOFTDEP) &&
399 !(mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) {
400 #ifdef notyet
401 flags = WRITECLOSE;
402 if (mp->mnt_flag & MNT_FORCE)
403 flags |= FORCECLOSE;
404 error = softdep_flushfiles(mp, flags, l);
405 if (error == 0 && ffs_cgupdate(ump, MNT_WAIT) == 0)
406 fs->fs_flags &= ~FS_DOSOFTDEP;
407 (void) ffs_sbupdate(ump, MNT_WAIT);
408 #elif defined(SOFTDEP)
409 mp->mnt_flag |= MNT_SOFTDEP;
410 #endif
411 }
412
413 /*
414 * When upgrading to a softdep mount, we must first flush
415 * all vnodes. (not done yet -- see above)
416 */
417 if (!(fs->fs_flags & FS_DOSOFTDEP) &&
418 (mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) {
419 #ifdef notyet
420 flags = WRITECLOSE;
421 if (mp->mnt_flag & MNT_FORCE)
422 flags |= FORCECLOSE;
423 error = ffs_flushfiles(mp, flags, l);
424 #else
425 mp->mnt_flag &= ~MNT_SOFTDEP;
426 #endif
427 }
428
429 if (mp->mnt_flag & MNT_RELOAD) {
430 error = ffs_reload(mp, l->l_cred, l);
431 if (error)
432 return (error);
433 }
434
435 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
436 /*
437 * Changing from read-only to read/write
438 */
439 fs->fs_ronly = 0;
440 fs->fs_clean <<= 1;
441 fs->fs_fmod = 1;
442 if ((fs->fs_flags & FS_DOSOFTDEP)) {
443 error = softdep_mount(devvp, mp, fs,
444 l->l_cred);
445 if (error)
446 return (error);
447 }
448 if (fs->fs_snapinum[0] != 0)
449 ffs_snapshot_mount(mp);
450 }
451 if (args->fspec == NULL)
452 return EINVAL;
453 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
454 (MNT_SOFTDEP | MNT_ASYNC)) {
455 printf("%s fs uses soft updates, ignoring async mode\n",
456 fs->fs_fsmnt);
457 mp->mnt_flag &= ~MNT_ASYNC;
458 }
459 }
460
461 error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
462 UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
463 if (error == 0)
464 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
465 sizeof(fs->fs_fsmnt));
466 if (mp->mnt_flag & MNT_SOFTDEP)
467 fs->fs_flags |= FS_DOSOFTDEP;
468 else
469 fs->fs_flags &= ~FS_DOSOFTDEP;
470 if (fs->fs_fmod != 0) { /* XXX */
471 fs->fs_fmod = 0;
472 if (fs->fs_clean & FS_WASCLEAN)
473 fs->fs_time = time_second;
474 else {
475 printf("%s: file system not clean (fs_clean=%x); please fsck(8)\n",
476 mp->mnt_stat.f_mntfromname, fs->fs_clean);
477 printf("%s: lost blocks %" PRId64 " files %d\n",
478 mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
479 fs->fs_pendinginodes);
480 }
481 (void) ffs_cgupdate(ump, MNT_WAIT);
482 }
483 return (error);
484
485 fail:
486 vrele(devvp);
487 return (error);
488 }
489
490 /*
491 * Reload all incore data for a filesystem (used after running fsck on
492 * the root filesystem and finding things to fix). The filesystem must
493 * be mounted read-only.
494 *
495 * Things to do to update the mount:
496 * 1) invalidate all cached meta-data.
497 * 2) re-read superblock from disk.
498 * 3) re-read summary information from disk.
499 * 4) invalidate all inactive vnodes.
500 * 5) invalidate all cached file data.
501 * 6) re-read inode data for all active vnodes.
502 */
503 int
504 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
505 {
506 struct vnode *vp, *mvp, *devvp;
507 struct inode *ip;
508 void *space;
509 struct buf *bp;
510 struct fs *fs, *newfs;
511 struct partinfo dpart;
512 int i, blks, size, error;
513 int32_t *lp;
514 struct ufsmount *ump;
515 daddr_t sblockloc;
516
517 if ((mp->mnt_flag & MNT_RDONLY) == 0)
518 return (EINVAL);
519
520 ump = VFSTOUFS(mp);
521 /*
522 * Step 1: invalidate all cached meta-data.
523 */
524 devvp = ump->um_devvp;
525 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
526 error = vinvalbuf(devvp, 0, cred, l, 0, 0);
527 VOP_UNLOCK(devvp, 0);
528 if (error)
529 panic("ffs_reload: dirty1");
530 /*
531 * Step 2: re-read superblock from disk.
532 */
533 fs = ump->um_fs;
534 if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, NOCRED) != 0)
535 size = DEV_BSIZE;
536 else
537 size = dpart.disklab->d_secsize;
538 /* XXX we don't handle possibility that superblock moved. */
539 error = bread(devvp, fs->fs_sblockloc / size, fs->fs_sbsize,
540 NOCRED, 0, &bp);
541 if (error) {
542 brelse(bp, 0);
543 return (error);
544 }
545 newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK);
546 memcpy(newfs, bp->b_data, fs->fs_sbsize);
547 #ifdef FFS_EI
548 if (ump->um_flags & UFS_NEEDSWAP) {
549 ffs_sb_swap((struct fs*)bp->b_data, newfs);
550 fs->fs_flags |= FS_SWAPPED;
551 } else
552 #endif
553 fs->fs_flags &= ~FS_SWAPPED;
554 if ((newfs->fs_magic != FS_UFS1_MAGIC &&
555 newfs->fs_magic != FS_UFS2_MAGIC)||
556 newfs->fs_bsize > MAXBSIZE ||
557 newfs->fs_bsize < sizeof(struct fs)) {
558 brelse(bp, 0);
559 free(newfs, M_UFSMNT);
560 return (EIO); /* XXX needs translation */
561 }
562 /* Store off old fs_sblockloc for fs_oldfscompat_read. */
563 sblockloc = fs->fs_sblockloc;
564 /*
565 * Copy pointer fields back into superblock before copying in XXX
566 * new superblock. These should really be in the ufsmount. XXX
567 * Note that important parameters (eg fs_ncg) are unchanged.
568 */
569 newfs->fs_csp = fs->fs_csp;
570 newfs->fs_maxcluster = fs->fs_maxcluster;
571 newfs->fs_contigdirs = fs->fs_contigdirs;
572 newfs->fs_ronly = fs->fs_ronly;
573 newfs->fs_active = fs->fs_active;
574 memcpy(fs, newfs, (u_int)fs->fs_sbsize);
575 brelse(bp, 0);
576 free(newfs, M_UFSMNT);
577
578 /* Recheck for apple UFS filesystem */
579 ump->um_flags &= ~UFS_ISAPPLEUFS;
580 /* First check to see if this is tagged as an Apple UFS filesystem
581 * in the disklabel
582 */
583 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
584 (dpart.part->p_fstype == FS_APPLEUFS)) {
585 ump->um_flags |= UFS_ISAPPLEUFS;
586 }
587 #ifdef APPLE_UFS
588 else {
589 /* Manually look for an apple ufs label, and if a valid one
590 * is found, then treat it like an Apple UFS filesystem anyway
591 */
592 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size),
593 APPLEUFS_LABEL_SIZE, cred, 0, &bp);
594 if (error) {
595 brelse(bp, 0);
596 return (error);
597 }
598 error = ffs_appleufs_validate(fs->fs_fsmnt,
599 (struct appleufslabel *)bp->b_data,NULL);
600 if (error == 0)
601 ump->um_flags |= UFS_ISAPPLEUFS;
602 brelse(bp, 0);
603 bp = NULL;
604 }
605 #else
606 if (ump->um_flags & UFS_ISAPPLEUFS)
607 return (EIO);
608 #endif
609
610 if (UFS_MPISAPPLEUFS(ump)) {
611 /* see comment about NeXT below */
612 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
613 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
614 mp->mnt_iflag |= IMNT_DTYPE;
615 } else {
616 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
617 ump->um_dirblksiz = DIRBLKSIZ;
618 if (ump->um_maxsymlinklen > 0)
619 mp->mnt_iflag |= IMNT_DTYPE;
620 else
621 mp->mnt_iflag &= ~IMNT_DTYPE;
622 }
623 ffs_oldfscompat_read(fs, ump, sblockloc);
624 mutex_enter(&ump->um_lock);
625 ump->um_maxfilesize = fs->fs_maxfilesize;
626 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
627 fs->fs_pendingblocks = 0;
628 fs->fs_pendinginodes = 0;
629 }
630 mutex_exit(&ump->um_lock);
631
632 ffs_statvfs(mp, &mp->mnt_stat);
633 /*
634 * Step 3: re-read summary information from disk.
635 */
636 blks = howmany(fs->fs_cssize, fs->fs_fsize);
637 space = fs->fs_csp;
638 for (i = 0; i < blks; i += fs->fs_frag) {
639 size = fs->fs_bsize;
640 if (i + fs->fs_frag > blks)
641 size = (blks - i) * fs->fs_fsize;
642 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
643 NOCRED, 0, &bp);
644 if (error) {
645 brelse(bp, 0);
646 return (error);
647 }
648 #ifdef FFS_EI
649 if (UFS_FSNEEDSWAP(fs))
650 ffs_csum_swap((struct csum *)bp->b_data,
651 (struct csum *)space, size);
652 else
653 #endif
654 memcpy(space, bp->b_data, (size_t)size);
655 space = (char *)space + size;
656 brelse(bp, 0);
657 }
658 if ((fs->fs_flags & FS_DOSOFTDEP))
659 softdep_mount(devvp, mp, fs, cred);
660 if (fs->fs_snapinum[0] != 0)
661 ffs_snapshot_mount(mp);
662 /*
663 * We no longer know anything about clusters per cylinder group.
664 */
665 if (fs->fs_contigsumsize > 0) {
666 lp = fs->fs_maxcluster;
667 for (i = 0; i < fs->fs_ncg; i++)
668 *lp++ = fs->fs_contigsumsize;
669 }
670
671 /* Allocate a marker vnode. */
672 if ((mvp = vnalloc(mp)) == NULL)
673 return ENOMEM;
674 /*
675 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
676 * and vclean() can be called indirectly
677 */
678 mutex_enter(&mntvnode_lock);
679 loop:
680 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
681 vmark(mvp, vp);
682 if (vp->v_mount != mp || vismarker(vp))
683 continue;
684 /*
685 * Step 4: invalidate all inactive vnodes.
686 */
687 if (vrecycle(vp, &mntvnode_lock, l)) {
688 mutex_enter(&mntvnode_lock);
689 (void)vunmark(mvp);
690 goto loop;
691 }
692 /*
693 * Step 5: invalidate all cached file data.
694 */
695 mutex_enter(&vp->v_interlock);
696 mutex_exit(&mntvnode_lock);
697 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
698 (void)vunmark(mvp);
699 goto loop;
700 }
701 if (vinvalbuf(vp, 0, cred, l, 0, 0))
702 panic("ffs_reload: dirty2");
703 /*
704 * Step 6: re-read inode data for all active vnodes.
705 */
706 ip = VTOI(vp);
707 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
708 (int)fs->fs_bsize, NOCRED, 0, &bp);
709 if (error) {
710 brelse(bp, 0);
711 vput(vp);
712 (void)vunmark(mvp);
713 break;
714 }
715 ffs_load_inode(bp, ip, fs, ip->i_number);
716 ip->i_ffs_effnlink = ip->i_nlink;
717 brelse(bp, 0);
718 vput(vp);
719 mutex_enter(&mntvnode_lock);
720 }
721 mutex_exit(&mntvnode_lock);
722 vnfree(mvp);
723 return (error);
724 }
725
726 /*
727 * Possible superblock locations ordered from most to least likely.
728 */
729 static const int sblock_try[] = SBLOCKSEARCH;
730
731 /*
732 * Common code for mount and mountroot
733 */
734 int
735 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
736 {
737 struct ufsmount *ump;
738 struct buf *bp;
739 struct fs *fs;
740 dev_t dev;
741 struct partinfo dpart;
742 void *space;
743 daddr_t sblockloc, fsblockloc;
744 int blks, fstype;
745 int error, i, size, ronly, bset = 0;
746 #ifdef FFS_EI
747 int needswap = 0; /* keep gcc happy */
748 #endif
749 int32_t *lp;
750 kauth_cred_t cred;
751 u_int32_t sbsize = 8192; /* keep gcc happy*/
752
753 dev = devvp->v_rdev;
754 cred = l ? l->l_cred : NOCRED;
755
756 /* Flush out any old buffers remaining from a previous use. */
757 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
758 error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
759 VOP_UNLOCK(devvp, 0);
760 if (error)
761 return (error);
762
763 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
764 if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) != 0)
765 size = DEV_BSIZE;
766 else
767 size = dpart.disklab->d_secsize;
768
769 bp = NULL;
770 ump = NULL;
771 fs = NULL;
772 sblockloc = 0;
773 fstype = 0;
774
775 error = fstrans_mount(mp);
776 if (error)
777 return error;
778
779 /*
780 * Try reading the superblock in each of its possible locations.
781 */
782 for (i = 0; ; i++) {
783 if (bp != NULL) {
784 brelse(bp, BC_NOCACHE);
785 bp = NULL;
786 }
787 if (sblock_try[i] == -1) {
788 error = EINVAL;
789 fs = NULL;
790 goto out;
791 }
792 error = bread(devvp, sblock_try[i] / size, SBLOCKSIZE, cred,
793 0, &bp);
794 if (error) {
795 fs = NULL;
796 goto out;
797 }
798 fs = (struct fs*)bp->b_data;
799 fsblockloc = sblockloc = sblock_try[i];
800 if (fs->fs_magic == FS_UFS1_MAGIC) {
801 sbsize = fs->fs_sbsize;
802 fstype = UFS1;
803 #ifdef FFS_EI
804 needswap = 0;
805 } else if (fs->fs_magic == bswap32(FS_UFS1_MAGIC)) {
806 sbsize = bswap32(fs->fs_sbsize);
807 fstype = UFS1;
808 needswap = 1;
809 #endif
810 } else if (fs->fs_magic == FS_UFS2_MAGIC) {
811 sbsize = fs->fs_sbsize;
812 fstype = UFS2;
813 #ifdef FFS_EI
814 needswap = 0;
815 } else if (fs->fs_magic == bswap32(FS_UFS2_MAGIC)) {
816 sbsize = bswap32(fs->fs_sbsize);
817 fstype = UFS2;
818 needswap = 1;
819 #endif
820 } else
821 continue;
822
823
824 /* fs->fs_sblockloc isn't defined for old filesystems */
825 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
826 if (sblockloc == SBLOCK_UFS2)
827 /*
828 * This is likely to be the first alternate
829 * in a filesystem with 64k blocks.
830 * Don't use it.
831 */
832 continue;
833 fsblockloc = sblockloc;
834 } else {
835 fsblockloc = fs->fs_sblockloc;
836 #ifdef FFS_EI
837 if (needswap)
838 fsblockloc = bswap64(fsblockloc);
839 #endif
840 }
841
842 /* Check we haven't found an alternate superblock */
843 if (fsblockloc != sblockloc)
844 continue;
845
846 /* Validate size of superblock */
847 if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs))
848 continue;
849
850 /* Ok seems to be a good superblock */
851 break;
852 }
853
854 fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
855 memcpy(fs, bp->b_data, sbsize);
856
857 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
858 memset(ump, 0, sizeof *ump);
859 mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
860 error = ffs_snapshot_init(ump);
861 if (error)
862 goto out;
863 ump->um_fs = fs;
864 ump->um_ops = &ffs_ufsops;
865
866 #ifdef FFS_EI
867 if (needswap) {
868 ffs_sb_swap((struct fs*)bp->b_data, fs);
869 fs->fs_flags |= FS_SWAPPED;
870 } else
871 #endif
872 fs->fs_flags &= ~FS_SWAPPED;
873
874 ffs_oldfscompat_read(fs, ump, sblockloc);
875 ump->um_maxfilesize = fs->fs_maxfilesize;
876
877 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
878 fs->fs_pendingblocks = 0;
879 fs->fs_pendinginodes = 0;
880 }
881
882 ump->um_fstype = fstype;
883 if (fs->fs_sbsize < SBLOCKSIZE)
884 brelse(bp, BC_INVAL);
885 else
886 brelse(bp, 0);
887 bp = NULL;
888
889 /* First check to see if this is tagged as an Apple UFS filesystem
890 * in the disklabel
891 */
892 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred) == 0) &&
893 (dpart.part->p_fstype == FS_APPLEUFS)) {
894 ump->um_flags |= UFS_ISAPPLEUFS;
895 }
896 #ifdef APPLE_UFS
897 else {
898 /* Manually look for an apple ufs label, and if a valid one
899 * is found, then treat it like an Apple UFS filesystem anyway
900 */
901 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size),
902 APPLEUFS_LABEL_SIZE, cred, 0, &bp);
903 if (error)
904 goto out;
905 error = ffs_appleufs_validate(fs->fs_fsmnt,
906 (struct appleufslabel *)bp->b_data,NULL);
907 if (error == 0) {
908 ump->um_flags |= UFS_ISAPPLEUFS;
909 }
910 brelse(bp, 0);
911 bp = NULL;
912 }
913 #else
914 if (ump->um_flags & UFS_ISAPPLEUFS) {
915 error = EINVAL;
916 goto out;
917 }
918 #endif
919
920 /*
921 * verify that we can access the last block in the fs
922 * if we're mounting read/write.
923 */
924
925 if (!ronly) {
926 error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), fs->fs_fsize,
927 cred, 0, &bp);
928 if (bp->b_bcount != fs->fs_fsize)
929 error = EINVAL;
930 if (error) {
931 bset = BC_INVAL;
932 goto out;
933 }
934 brelse(bp, BC_INVAL);
935 bp = NULL;
936 }
937
938 fs->fs_ronly = ronly;
939 if (ronly == 0) {
940 fs->fs_clean <<= 1;
941 fs->fs_fmod = 1;
942 }
943 size = fs->fs_cssize;
944 blks = howmany(size, fs->fs_fsize);
945 if (fs->fs_contigsumsize > 0)
946 size += fs->fs_ncg * sizeof(int32_t);
947 size += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
948 space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
949 fs->fs_csp = space;
950 for (i = 0; i < blks; i += fs->fs_frag) {
951 size = fs->fs_bsize;
952 if (i + fs->fs_frag > blks)
953 size = (blks - i) * fs->fs_fsize;
954 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
955 cred, 0, &bp);
956 if (error) {
957 free(fs->fs_csp, M_UFSMNT);
958 goto out;
959 }
960 #ifdef FFS_EI
961 if (needswap)
962 ffs_csum_swap((struct csum *)bp->b_data,
963 (struct csum *)space, size);
964 else
965 #endif
966 memcpy(space, bp->b_data, (u_int)size);
967
968 space = (char *)space + size;
969 brelse(bp, 0);
970 bp = NULL;
971 }
972 if (fs->fs_contigsumsize > 0) {
973 fs->fs_maxcluster = lp = space;
974 for (i = 0; i < fs->fs_ncg; i++)
975 *lp++ = fs->fs_contigsumsize;
976 space = lp;
977 }
978 size = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
979 fs->fs_contigdirs = space;
980 space = (char *)space + size;
981 memset(fs->fs_contigdirs, 0, size);
982 /* Compatibility for old filesystems - XXX */
983 if (fs->fs_avgfilesize <= 0)
984 fs->fs_avgfilesize = AVFILESIZ;
985 if (fs->fs_avgfpdir <= 0)
986 fs->fs_avgfpdir = AFPDIR;
987 fs->fs_active = NULL;
988 mp->mnt_data = ump;
989 mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
990 mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
991 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
992 mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
993 if (UFS_MPISAPPLEUFS(ump)) {
994 /* NeXT used to keep short symlinks in the inode even
995 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen
996 * is probably -1, but we still need to be able to identify
997 * short symlinks.
998 */
999 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
1000 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
1001 mp->mnt_iflag |= IMNT_DTYPE;
1002 } else {
1003 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
1004 ump->um_dirblksiz = DIRBLKSIZ;
1005 if (ump->um_maxsymlinklen > 0)
1006 mp->mnt_iflag |= IMNT_DTYPE;
1007 else
1008 mp->mnt_iflag &= ~IMNT_DTYPE;
1009 }
1010 mp->mnt_fs_bshift = fs->fs_bshift;
1011 mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */
1012 mp->mnt_flag |= MNT_LOCAL;
1013 mp->mnt_iflag |= IMNT_MPSAFE;
1014 #ifdef FFS_EI
1015 if (needswap)
1016 ump->um_flags |= UFS_NEEDSWAP;
1017 #endif
1018 ump->um_mountp = mp;
1019 ump->um_dev = dev;
1020 ump->um_devvp = devvp;
1021 ump->um_nindir = fs->fs_nindir;
1022 ump->um_lognindir = ffs(fs->fs_nindir) - 1;
1023 ump->um_bptrtodb = fs->fs_fsbtodb;
1024 ump->um_seqinc = fs->fs_frag;
1025 for (i = 0; i < MAXQUOTAS; i++)
1026 ump->um_quotas[i] = NULLVP;
1027 devvp->v_specmountpoint = mp;
1028 if (ronly == 0 && (fs->fs_flags & FS_DOSOFTDEP)) {
1029 error = softdep_mount(devvp, mp, fs, cred);
1030 if (error) {
1031 free(fs->fs_csp, M_UFSMNT);
1032 goto out;
1033 }
1034 }
1035 if (ronly == 0 && fs->fs_snapinum[0] != 0)
1036 ffs_snapshot_mount(mp);
1037 #ifdef UFS_EXTATTR
1038 /*
1039 * Initialize file-backed extended attributes on UFS1 file
1040 * systems.
1041 */
1042 if (ump->um_fstype == UFS1) {
1043 ufs_extattr_uepm_init(&ump->um_extattr);
1044 #ifdef UFS_EXTATTR_AUTOSTART
1045 /*
1046 * XXX Just ignore errors. Not clear that we should
1047 * XXX fail the mount in this case.
1048 */
1049 (void) ufs_extattr_autostart(mp, l);
1050 #endif
1051 }
1052 #endif /* UFS_EXTATTR */
1053 return (0);
1054 out:
1055 fstrans_unmount(mp);
1056 if (fs)
1057 free(fs, M_UFSMNT);
1058 devvp->v_specmountpoint = NULL;
1059 if (bp)
1060 brelse(bp, bset);
1061 if (ump) {
1062 if (ump->um_oldfscompat)
1063 free(ump->um_oldfscompat, M_UFSMNT);
1064 mutex_destroy(&ump->um_lock);
1065 free(ump, M_UFSMNT);
1066 mp->mnt_data = NULL;
1067 }
1068 return (error);
1069 }
1070
1071 /*
1072 * Sanity checks for loading old filesystem superblocks.
1073 * See ffs_oldfscompat_write below for unwound actions.
1074 *
1075 * XXX - Parts get retired eventually.
1076 * Unfortunately new bits get added.
1077 */
1078 static void
1079 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1080 {
1081 off_t maxfilesize;
1082 int32_t *extrasave;
1083
1084 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1085 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1086 return;
1087
1088 if (!ump->um_oldfscompat)
1089 ump->um_oldfscompat = malloc(512 + 3*sizeof(int32_t),
1090 M_UFSMNT, M_WAITOK);
1091
1092 memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1093 extrasave = ump->um_oldfscompat;
1094 extrasave += 512/sizeof(int32_t);
1095 extrasave[0] = fs->fs_old_npsect;
1096 extrasave[1] = fs->fs_old_interleave;
1097 extrasave[2] = fs->fs_old_trackskew;
1098
1099 /* These fields will be overwritten by their
1100 * original values in fs_oldfscompat_write, so it is harmless
1101 * to modify them here.
1102 */
1103 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1104 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1105 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1106 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1107
1108 fs->fs_maxbsize = fs->fs_bsize;
1109 fs->fs_time = fs->fs_old_time;
1110 fs->fs_size = fs->fs_old_size;
1111 fs->fs_dsize = fs->fs_old_dsize;
1112 fs->fs_csaddr = fs->fs_old_csaddr;
1113 fs->fs_sblockloc = sblockloc;
1114
1115 fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1116
1117 if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1118 fs->fs_old_nrpos = 8;
1119 fs->fs_old_npsect = fs->fs_old_nsect;
1120 fs->fs_old_interleave = 1;
1121 fs->fs_old_trackskew = 0;
1122 }
1123
1124 if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1125 fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
1126 fs->fs_qbmask = ~fs->fs_bmask;
1127 fs->fs_qfmask = ~fs->fs_fmask;
1128 }
1129
1130 maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1131 if (fs->fs_maxfilesize > maxfilesize)
1132 fs->fs_maxfilesize = maxfilesize;
1133
1134 /* Compatibility for old filesystems */
1135 if (fs->fs_avgfilesize <= 0)
1136 fs->fs_avgfilesize = AVFILESIZ;
1137 if (fs->fs_avgfpdir <= 0)
1138 fs->fs_avgfpdir = AFPDIR;
1139
1140 #if 0
1141 if (bigcgs) {
1142 fs->fs_save_cgsize = fs->fs_cgsize;
1143 fs->fs_cgsize = fs->fs_bsize;
1144 }
1145 #endif
1146 }
1147
1148 /*
1149 * Unwinding superblock updates for old filesystems.
1150 * See ffs_oldfscompat_read above for details.
1151 *
1152 * XXX - Parts get retired eventually.
1153 * Unfortunately new bits get added.
1154 */
1155 static void
1156 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1157 {
1158 int32_t *extrasave;
1159
1160 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1161 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1162 return;
1163
1164 fs->fs_old_time = fs->fs_time;
1165 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1166 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1167 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1168 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1169 fs->fs_old_flags = fs->fs_flags;
1170
1171 #if 0
1172 if (bigcgs) {
1173 fs->fs_cgsize = fs->fs_save_cgsize;
1174 }
1175 #endif
1176
1177 memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1178 extrasave = ump->um_oldfscompat;
1179 extrasave += 512/sizeof(int32_t);
1180 fs->fs_old_npsect = extrasave[0];
1181 fs->fs_old_interleave = extrasave[1];
1182 fs->fs_old_trackskew = extrasave[2];
1183
1184 }
1185
1186 /*
1187 * unmount system call
1188 */
1189 int
1190 ffs_unmount(struct mount *mp, int mntflags)
1191 {
1192 struct lwp *l = curlwp;
1193 struct ufsmount *ump = VFSTOUFS(mp);
1194 struct fs *fs = ump->um_fs;
1195 int error, flags, penderr;
1196
1197 penderr = 0;
1198 flags = 0;
1199 if (mntflags & MNT_FORCE)
1200 flags |= FORCECLOSE;
1201 #ifdef UFS_EXTATTR
1202 if (ump->um_fstype == UFS1) {
1203 ufs_extattr_stop(mp, l);
1204 ufs_extattr_uepm_destroy(&ump->um_extattr);
1205 }
1206 #endif /* UFS_EXTATTR */
1207 if (mp->mnt_flag & MNT_SOFTDEP) {
1208 if ((error = softdep_flushfiles(mp, flags, l)) != 0)
1209 return (error);
1210 } else {
1211 if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1212 return (error);
1213 }
1214 mutex_enter(&ump->um_lock);
1215 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1216 printf("%s: unmount pending error: blocks %" PRId64
1217 " files %d\n",
1218 fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes);
1219 fs->fs_pendingblocks = 0;
1220 fs->fs_pendinginodes = 0;
1221 penderr = 1;
1222 }
1223 mutex_exit(&ump->um_lock);
1224 if (fs->fs_ronly == 0 &&
1225 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1226 fs->fs_clean & FS_WASCLEAN) {
1227 /*
1228 * XXXX don't mark fs clean in the case of softdep
1229 * pending block errors, until they are fixed.
1230 */
1231 if (penderr == 0) {
1232 if (mp->mnt_flag & MNT_SOFTDEP)
1233 fs->fs_flags &= ~FS_DOSOFTDEP;
1234 fs->fs_clean = FS_ISCLEAN;
1235 }
1236 fs->fs_fmod = 0;
1237 (void) ffs_sbupdate(ump, MNT_WAIT);
1238 }
1239 if (ump->um_devvp->v_type != VBAD)
1240 ump->um_devvp->v_specmountpoint = NULL;
1241 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1242 (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
1243 NOCRED);
1244 vput(ump->um_devvp);
1245 free(fs->fs_csp, M_UFSMNT);
1246 free(fs, M_UFSMNT);
1247 if (ump->um_oldfscompat != NULL)
1248 free(ump->um_oldfscompat, M_UFSMNT);
1249 softdep_unmount(mp);
1250 mutex_destroy(&ump->um_lock);
1251 ffs_snapshot_fini(ump);
1252 free(ump, M_UFSMNT);
1253 mp->mnt_data = NULL;
1254 mp->mnt_flag &= ~MNT_LOCAL;
1255 fstrans_unmount(mp);
1256 return (0);
1257 }
1258
1259 /*
1260 * Flush out all the files in a filesystem.
1261 */
1262 int
1263 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1264 {
1265 extern int doforce;
1266 struct ufsmount *ump;
1267 int error;
1268
1269 if (!doforce)
1270 flags &= ~FORCECLOSE;
1271 ump = VFSTOUFS(mp);
1272 #ifdef QUOTA
1273 if (mp->mnt_flag & MNT_QUOTA) {
1274 int i;
1275 if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0)
1276 return (error);
1277 for (i = 0; i < MAXQUOTAS; i++) {
1278 if (ump->um_quotas[i] == NULLVP)
1279 continue;
1280 quotaoff(l, mp, i);
1281 }
1282 /*
1283 * Here we fall through to vflush again to ensure
1284 * that we have gotten rid of all the system vnodes.
1285 */
1286 }
1287 #endif
1288 if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1289 return (error);
1290 ffs_snapshot_unmount(mp);
1291 /*
1292 * Flush all the files.
1293 */
1294 error = vflush(mp, NULLVP, flags);
1295 if (error)
1296 return (error);
1297 /*
1298 * Flush filesystem metadata.
1299 */
1300 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1301 error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
1302 VOP_UNLOCK(ump->um_devvp, 0);
1303 return (error);
1304 }
1305
1306 /*
1307 * Get file system statistics.
1308 */
1309 int
1310 ffs_statvfs(struct mount *mp, struct statvfs *sbp)
1311 {
1312 struct ufsmount *ump;
1313 struct fs *fs;
1314
1315 ump = VFSTOUFS(mp);
1316 fs = ump->um_fs;
1317 mutex_enter(&ump->um_lock);
1318 sbp->f_bsize = fs->fs_bsize;
1319 sbp->f_frsize = fs->fs_fsize;
1320 sbp->f_iosize = fs->fs_bsize;
1321 sbp->f_blocks = fs->fs_dsize;
1322 sbp->f_bfree = blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1323 fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1324 sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1325 fs->fs_minfree) / (u_int64_t) 100;
1326 if (sbp->f_bfree > sbp->f_bresvd)
1327 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1328 else
1329 sbp->f_bavail = 0;
1330 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
1331 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1332 sbp->f_favail = sbp->f_ffree;
1333 sbp->f_fresvd = 0;
1334 mutex_exit(&ump->um_lock);
1335 copy_statvfs_info(sbp, mp);
1336
1337 return (0);
1338 }
1339
1340 /*
1341 * Go through the disk queues to initiate sandbagged IO;
1342 * go through the inodes to write those that have been modified;
1343 * initiate the writing of the super block if it has been modified.
1344 *
1345 * Note: we are always called with the filesystem marked `MPBUSY'.
1346 */
1347 int
1348 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
1349 {
1350 struct lwp *l = curlwp;
1351 struct vnode *vp, *mvp;
1352 struct inode *ip;
1353 struct ufsmount *ump = VFSTOUFS(mp);
1354 struct fs *fs;
1355 int error, count, allerror = 0;
1356
1357 fs = ump->um_fs;
1358 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
1359 printf("fs = %s\n", fs->fs_fsmnt);
1360 panic("update: rofs mod");
1361 }
1362
1363 /* Allocate a marker vnode. */
1364 if ((mvp = vnalloc(mp)) == NULL)
1365 return (ENOMEM);
1366
1367 fstrans_start(mp, FSTRANS_SHARED);
1368 /*
1369 * Write back each (modified) inode.
1370 */
1371 mutex_enter(&mntvnode_lock);
1372 loop:
1373 /*
1374 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1375 * and vclean() can be called indirectly
1376 */
1377 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
1378 vmark(mvp, vp);
1379 /*
1380 * If the vnode that we are about to sync is no longer
1381 * associated with this mount point, start over.
1382 */
1383 if (vp->v_mount != mp || vismarker(vp))
1384 continue;
1385 mutex_enter(&vp->v_interlock);
1386 ip = VTOI(vp);
1387 if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 ||
1388 vp->v_type == VNON || ((ip->i_flag &
1389 (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
1390 LIST_EMPTY(&vp->v_dirtyblkhd) &&
1391 UVM_OBJ_IS_CLEAN(&vp->v_uobj)))
1392 {
1393 mutex_exit(&vp->v_interlock);
1394 continue;
1395 }
1396 if (vp->v_type == VBLK &&
1397 fstrans_getstate(mp) == FSTRANS_SUSPENDING) {
1398 mutex_exit(&vp->v_interlock);
1399 continue;
1400 }
1401 mutex_exit(&mntvnode_lock);
1402 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
1403 if (error) {
1404 mutex_enter(&mntvnode_lock);
1405 if (error == ENOENT) {
1406 (void)vunmark(mvp);
1407 goto loop;
1408 }
1409 continue;
1410 }
1411 if (vp->v_type == VREG && waitfor == MNT_LAZY)
1412 error = ffs_update(vp, NULL, NULL, 0);
1413 else
1414 error = VOP_FSYNC(vp, cred,
1415 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0);
1416 if (error)
1417 allerror = error;
1418 vput(vp);
1419 mutex_enter(&mntvnode_lock);
1420 }
1421 mutex_exit(&mntvnode_lock);
1422 /*
1423 * Force stale file system control information to be flushed.
1424 */
1425 if (waitfor == MNT_WAIT && (ump->um_mountp->mnt_flag & MNT_SOFTDEP)) {
1426 if ((error = softdep_flushworklist(ump->um_mountp, &count, l)))
1427 allerror = error;
1428 /* Flushed work items may create new vnodes to clean */
1429 if (allerror == 0 && count) {
1430 mutex_enter(&mntvnode_lock);
1431 goto loop;
1432 }
1433 }
1434 if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1435 !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1436 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1437 if ((error = VOP_FSYNC(ump->um_devvp, cred,
1438 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0)
1439 allerror = error;
1440 VOP_UNLOCK(ump->um_devvp, 0);
1441 if (allerror == 0 && waitfor == MNT_WAIT) {
1442 mutex_enter(&mntvnode_lock);
1443 goto loop;
1444 }
1445 }
1446 #ifdef QUOTA
1447 qsync(mp);
1448 #endif
1449 /*
1450 * Write back modified superblock.
1451 */
1452 if (fs->fs_fmod != 0) {
1453 fs->fs_fmod = 0;
1454 fs->fs_time = time_second;
1455 if ((error = ffs_cgupdate(ump, waitfor)))
1456 allerror = error;
1457 }
1458 fstrans_done(mp);
1459 vnfree(mvp);
1460 return (allerror);
1461 }
1462
1463 /*
1464 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1465 * in from disk. If it is in core, wait for the lock bit to clear, then
1466 * return the inode locked. Detection and handling of mount points must be
1467 * done by the calling routine.
1468 */
1469 int
1470 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1471 {
1472 struct fs *fs;
1473 struct inode *ip;
1474 struct ufsmount *ump;
1475 struct buf *bp;
1476 struct vnode *vp;
1477 dev_t dev;
1478 int error;
1479
1480 ump = VFSTOUFS(mp);
1481 dev = ump->um_dev;
1482
1483 retry:
1484 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1485 return (0);
1486
1487 /* Allocate a new vnode/inode. */
1488 if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) {
1489 *vpp = NULL;
1490 return (error);
1491 }
1492 ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
1493
1494 /*
1495 * If someone beat us to it, put back the freshly allocated
1496 * vnode/inode pair and retry.
1497 */
1498 mutex_enter(&ufs_hashlock);
1499 if (ufs_ihashget(dev, ino, 0) != NULL) {
1500 mutex_exit(&ufs_hashlock);
1501 ungetnewvnode(vp);
1502 pool_cache_put(ffs_inode_cache, ip);
1503 goto retry;
1504 }
1505
1506 vp->v_vflag |= VV_LOCKSWORK;
1507 if ((mp->mnt_flag & MNT_SOFTDEP) != 0)
1508 vp->v_uflag |= VU_SOFTDEP;
1509
1510 /*
1511 * XXX MFS ends up here, too, to allocate an inode. Should we
1512 * XXX create another pool for MFS inodes?
1513 */
1514
1515 memset(ip, 0, sizeof(struct inode));
1516 vp->v_data = ip;
1517 ip->i_vnode = vp;
1518 ip->i_ump = ump;
1519 ip->i_fs = fs = ump->um_fs;
1520 ip->i_dev = dev;
1521 ip->i_number = ino;
1522 LIST_INIT(&ip->i_pcbufhd);
1523 #ifdef QUOTA
1524 ufsquota_init(ip);
1525 #endif
1526
1527 /*
1528 * Initialize genfs node, we might proceed to destroy it in
1529 * error branches.
1530 */
1531 genfs_node_init(vp, &ffs_genfsops);
1532
1533 /*
1534 * Put it onto its hash chain and lock it so that other requests for
1535 * this inode will block if they arrive while we are sleeping waiting
1536 * for old data structures to be purged or for the contents of the
1537 * disk portion of this inode to be read.
1538 */
1539
1540 ufs_ihashins(ip);
1541 mutex_exit(&ufs_hashlock);
1542
1543 /* Read in the disk contents for the inode, copy into the inode. */
1544 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1545 (int)fs->fs_bsize, NOCRED, 0, &bp);
1546 if (error) {
1547
1548 /*
1549 * The inode does not contain anything useful, so it would
1550 * be misleading to leave it on its hash chain. With mode
1551 * still zero, it will be unlinked and returned to the free
1552 * list by vput().
1553 */
1554
1555 vput(vp);
1556 brelse(bp, 0);
1557 *vpp = NULL;
1558 return (error);
1559 }
1560 if (ip->i_ump->um_fstype == UFS1)
1561 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
1562 PR_WAITOK);
1563 else
1564 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
1565 PR_WAITOK);
1566 ffs_load_inode(bp, ip, fs, ino);
1567 if (DOINGSOFTDEP(vp))
1568 softdep_load_inodeblock(ip);
1569 else
1570 ip->i_ffs_effnlink = ip->i_nlink;
1571 brelse(bp, 0);
1572
1573 /*
1574 * Initialize the vnode from the inode, check for aliases.
1575 * Note that the underlying vnode may have changed.
1576 */
1577
1578 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1579
1580 /*
1581 * Finish inode initialization now that aliasing has been resolved.
1582 */
1583
1584 ip->i_devvp = ump->um_devvp;
1585 VREF(ip->i_devvp);
1586
1587 /*
1588 * Ensure that uid and gid are correct. This is a temporary
1589 * fix until fsck has been changed to do the update.
1590 */
1591
1592 if (fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */
1593 ip->i_uid = ip->i_ffs1_ouid; /* XXX */
1594 ip->i_gid = ip->i_ffs1_ogid; /* XXX */
1595 } /* XXX */
1596 uvm_vnp_setsize(vp, ip->i_size);
1597 *vpp = vp;
1598 return (0);
1599 }
1600
1601 /*
1602 * File handle to vnode
1603 *
1604 * Have to be really careful about stale file handles:
1605 * - check that the inode number is valid
1606 * - call ffs_vget() to get the locked inode
1607 * - check for an unallocated inode (i_mode == 0)
1608 * - check that the given client host has export rights and return
1609 * those rights via. exflagsp and credanonp
1610 */
1611 int
1612 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1613 {
1614 struct ufid ufh;
1615 struct fs *fs;
1616
1617 if (fhp->fid_len != sizeof(struct ufid))
1618 return EINVAL;
1619
1620 memcpy(&ufh, fhp, sizeof(ufh));
1621 fs = VFSTOUFS(mp)->um_fs;
1622 if (ufh.ufid_ino < ROOTINO ||
1623 ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1624 return (ESTALE);
1625 return (ufs_fhtovp(mp, &ufh, vpp));
1626 }
1627
1628 /*
1629 * Vnode pointer to File handle
1630 */
1631 /* ARGSUSED */
1632 int
1633 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
1634 {
1635 struct inode *ip;
1636 struct ufid ufh;
1637
1638 if (*fh_size < sizeof(struct ufid)) {
1639 *fh_size = sizeof(struct ufid);
1640 return E2BIG;
1641 }
1642 ip = VTOI(vp);
1643 *fh_size = sizeof(struct ufid);
1644 memset(&ufh, 0, sizeof(ufh));
1645 ufh.ufid_len = sizeof(struct ufid);
1646 ufh.ufid_ino = ip->i_number;
1647 ufh.ufid_gen = ip->i_gen;
1648 memcpy(fhp, &ufh, sizeof(ufh));
1649 return (0);
1650 }
1651
1652 void
1653 ffs_init(void)
1654 {
1655 if (ffs_initcount++ > 0)
1656 return;
1657
1658 ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
1659 "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
1660 ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
1661 "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
1662 ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
1663 "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
1664 softdep_initialize();
1665 ufs_init();
1666 }
1667
1668 void
1669 ffs_reinit(void)
1670 {
1671 softdep_reinitialize();
1672 ufs_reinit();
1673 }
1674
1675 void
1676 ffs_done(void)
1677 {
1678 if (--ffs_initcount > 0)
1679 return;
1680
1681 /* XXX softdep cleanup ? */
1682 ufs_done();
1683 pool_cache_destroy(ffs_dinode2_cache);
1684 pool_cache_destroy(ffs_dinode1_cache);
1685 pool_cache_destroy(ffs_inode_cache);
1686 }
1687
1688 SYSCTL_SETUP(sysctl_vfs_ffs_setup, "sysctl vfs.ffs subtree setup")
1689 {
1690 #if 0
1691 extern int doasyncfree;
1692 #endif
1693 extern int ffs_log_changeopt;
1694
1695 sysctl_createv(clog, 0, NULL, NULL,
1696 CTLFLAG_PERMANENT,
1697 CTLTYPE_NODE, "vfs", NULL,
1698 NULL, 0, NULL, 0,
1699 CTL_VFS, CTL_EOL);
1700 sysctl_createv(clog, 0, NULL, NULL,
1701 CTLFLAG_PERMANENT,
1702 CTLTYPE_NODE, "ffs",
1703 SYSCTL_DESCR("Berkeley Fast File System"),
1704 NULL, 0, NULL, 0,
1705 CTL_VFS, 1, CTL_EOL);
1706
1707 /*
1708 * @@@ should we even bother with these first three?
1709 */
1710 sysctl_createv(clog, 0, NULL, NULL,
1711 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1712 CTLTYPE_INT, "doclusterread", NULL,
1713 sysctl_notavail, 0, NULL, 0,
1714 CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
1715 sysctl_createv(clog, 0, NULL, NULL,
1716 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1717 CTLTYPE_INT, "doclusterwrite", NULL,
1718 sysctl_notavail, 0, NULL, 0,
1719 CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
1720 sysctl_createv(clog, 0, NULL, NULL,
1721 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1722 CTLTYPE_INT, "doreallocblks", NULL,
1723 sysctl_notavail, 0, NULL, 0,
1724 CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
1725 #if 0
1726 sysctl_createv(clog, 0, NULL, NULL,
1727 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1728 CTLTYPE_INT, "doasyncfree",
1729 SYSCTL_DESCR("Release dirty blocks asynchronously"),
1730 NULL, 0, &doasyncfree, 0,
1731 CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
1732 #endif
1733 sysctl_createv(clog, 0, NULL, NULL,
1734 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1735 CTLTYPE_INT, "log_changeopt",
1736 SYSCTL_DESCR("Log changes in optimization strategy"),
1737 NULL, 0, &ffs_log_changeopt, 0,
1738 CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
1739 }
1740
1741 /*
1742 * Write a superblock and associated information back to disk.
1743 */
1744 int
1745 ffs_sbupdate(struct ufsmount *mp, int waitfor)
1746 {
1747 struct fs *fs = mp->um_fs;
1748 struct buf *bp;
1749 int error = 0;
1750 u_int32_t saveflag;
1751
1752 bp = getblk(mp->um_devvp,
1753 fs->fs_sblockloc >> (fs->fs_fshift - fs->fs_fsbtodb),
1754 (int)fs->fs_sbsize, 0, 0);
1755 saveflag = fs->fs_flags & FS_INTERNAL;
1756 fs->fs_flags &= ~FS_INTERNAL;
1757
1758 memcpy(bp->b_data, fs, fs->fs_sbsize);
1759
1760 ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1761 #ifdef FFS_EI
1762 if (mp->um_flags & UFS_NEEDSWAP)
1763 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
1764 #endif
1765 fs->fs_flags |= saveflag;
1766
1767 if (waitfor == MNT_WAIT)
1768 error = bwrite(bp);
1769 else
1770 bawrite(bp);
1771 return (error);
1772 }
1773
1774 int
1775 ffs_cgupdate(struct ufsmount *mp, int waitfor)
1776 {
1777 struct fs *fs = mp->um_fs;
1778 struct buf *bp;
1779 int blks;
1780 void *space;
1781 int i, size, error = 0, allerror = 0;
1782
1783 allerror = ffs_sbupdate(mp, waitfor);
1784 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1785 space = fs->fs_csp;
1786 for (i = 0; i < blks; i += fs->fs_frag) {
1787 size = fs->fs_bsize;
1788 if (i + fs->fs_frag > blks)
1789 size = (blks - i) * fs->fs_fsize;
1790 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1791 size, 0, 0);
1792 #ifdef FFS_EI
1793 if (mp->um_flags & UFS_NEEDSWAP)
1794 ffs_csum_swap((struct csum*)space,
1795 (struct csum*)bp->b_data, size);
1796 else
1797 #endif
1798 memcpy(bp->b_data, space, (u_int)size);
1799 space = (char *)space + size;
1800 if (waitfor == MNT_WAIT)
1801 error = bwrite(bp);
1802 else
1803 bawrite(bp);
1804 }
1805 if (!allerror && error)
1806 allerror = error;
1807 return (allerror);
1808 }
1809
1810 int
1811 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
1812 int attrnamespace, const char *attrname)
1813 {
1814 #ifdef UFS_EXTATTR
1815 /*
1816 * File-backed extended attributes are only supported on UFS1.
1817 * UFS2 has native extended attributes.
1818 */
1819 if (VFSTOUFS(mp)->um_fstype == UFS1)
1820 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
1821 #endif
1822 return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
1823 }
1824
1825 int
1826 ffs_suspendctl(struct mount *mp, int cmd)
1827 {
1828 int error;
1829 struct lwp *l = curlwp;
1830
1831 switch (cmd) {
1832 case SUSPEND_SUSPEND:
1833 if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0)
1834 return error;
1835 error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred);
1836 if (error == 0)
1837 error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
1838 if (error != 0) {
1839 (void) fstrans_setstate(mp, FSTRANS_NORMAL);
1840 return error;
1841 }
1842 return 0;
1843
1844 case SUSPEND_RESUME:
1845 return fstrans_setstate(mp, FSTRANS_NORMAL);
1846
1847 default:
1848 return EINVAL;
1849 }
1850 }
1851