ffs_vfsops.c revision 1.184 1 /* $NetBSD: ffs_vfsops.c,v 1.184 2006/07/23 22:06:15 ad Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1991, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.184 2006/07/23 22:06:15 ad Exp $");
36
37 #if defined(_KERNEL_OPT)
38 #include "opt_ffs.h"
39 #include "opt_quota.h"
40 #include "opt_softdep.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/namei.h>
46 #include <sys/proc.h>
47 #include <sys/kernel.h>
48 #include <sys/vnode.h>
49 #include <sys/socket.h>
50 #include <sys/mount.h>
51 #include <sys/buf.h>
52 #include <sys/device.h>
53 #include <sys/mbuf.h>
54 #include <sys/file.h>
55 #include <sys/disklabel.h>
56 #include <sys/ioctl.h>
57 #include <sys/errno.h>
58 #include <sys/malloc.h>
59 #include <sys/pool.h>
60 #include <sys/lock.h>
61 #include <sys/sysctl.h>
62 #include <sys/conf.h>
63 #include <sys/kauth.h>
64
65 #include <miscfs/specfs/specdev.h>
66
67 #include <ufs/ufs/quota.h>
68 #include <ufs/ufs/ufsmount.h>
69 #include <ufs/ufs/inode.h>
70 #include <ufs/ufs/dir.h>
71 #include <ufs/ufs/ufs_extern.h>
72 #include <ufs/ufs/ufs_bswap.h>
73
74 #include <ufs/ffs/fs.h>
75 #include <ufs/ffs/ffs_extern.h>
76
77 /* how many times ffs_init() was called */
78 int ffs_initcount = 0;
79
80 extern struct lock ufs_hashlock;
81
82 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
83 extern const struct vnodeopv_desc ffs_specop_opv_desc;
84 extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
85
86 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
87 &ffs_vnodeop_opv_desc,
88 &ffs_specop_opv_desc,
89 &ffs_fifoop_opv_desc,
90 NULL,
91 };
92
93 struct vfsops ffs_vfsops = {
94 MOUNT_FFS,
95 ffs_mount,
96 ufs_start,
97 ffs_unmount,
98 ufs_root,
99 ufs_quotactl,
100 ffs_statvfs,
101 ffs_sync,
102 ffs_vget,
103 ffs_fhtovp,
104 ffs_vptofh,
105 ffs_init,
106 ffs_reinit,
107 ffs_done,
108 ffs_mountroot,
109 ffs_snapshot,
110 ffs_extattrctl,
111 ffs_vnodeopv_descs,
112 };
113 VFS_ATTACH(ffs_vfsops);
114
115 static const struct genfs_ops ffs_genfsops = {
116 .gop_size = ffs_gop_size,
117 .gop_alloc = ufs_gop_alloc,
118 .gop_write = genfs_gop_write,
119 .gop_markupdate = ufs_gop_markupdate,
120 };
121
122 static const struct ufs_ops ffs_ufsops = {
123 .uo_itimes = ffs_itimes,
124 .uo_update = ffs_update,
125 .uo_truncate = ffs_truncate,
126 .uo_valloc = ffs_valloc,
127 .uo_vfree = ffs_vfree,
128 .uo_balloc = ffs_balloc,
129 };
130
131 POOL_INIT(ffs_inode_pool, sizeof(struct inode), 0, 0, 0, "ffsinopl",
132 &pool_allocator_nointr);
133 POOL_INIT(ffs_dinode1_pool, sizeof(struct ufs1_dinode), 0, 0, 0, "dino1pl",
134 &pool_allocator_nointr);
135 POOL_INIT(ffs_dinode2_pool, sizeof(struct ufs2_dinode), 0, 0, 0, "dino2pl",
136 &pool_allocator_nointr);
137
138 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
139 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
140
141 /*
142 * Called by main() when ffs is going to be mounted as root.
143 */
144
145 int
146 ffs_mountroot(void)
147 {
148 struct fs *fs;
149 struct mount *mp;
150 struct lwp *l = curlwp; /* XXX */
151 struct ufsmount *ump;
152 int error;
153
154 if (device_class(root_device) != DV_DISK)
155 return (ENODEV);
156
157 if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
158 vrele(rootvp);
159 return (error);
160 }
161 if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
162 mp->mnt_op->vfs_refcount--;
163 vfs_unbusy(mp);
164 free(mp, M_MOUNT);
165 return (error);
166 }
167 simple_lock(&mountlist_slock);
168 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
169 simple_unlock(&mountlist_slock);
170 ump = VFSTOUFS(mp);
171 fs = ump->um_fs;
172 memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
173 (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
174 (void)ffs_statvfs(mp, &mp->mnt_stat, l);
175 vfs_unbusy(mp);
176 setrootfstime((time_t)fs->fs_time);
177 return (0);
178 }
179
180 /*
181 * VFS Operations.
182 *
183 * mount system call
184 */
185 int
186 ffs_mount(struct mount *mp, const char *path, void *data,
187 struct nameidata *ndp, struct lwp *l)
188 {
189 struct vnode *devvp = NULL;
190 struct ufs_args args;
191 struct ufsmount *ump = NULL;
192 struct fs *fs;
193 int error, flags, update;
194 mode_t accessmode;
195
196 if (mp->mnt_flag & MNT_GETARGS) {
197 ump = VFSTOUFS(mp);
198 if (ump == NULL)
199 return EIO;
200 args.fspec = NULL;
201 return copyout(&args, data, sizeof(args));
202 }
203 error = copyin(data, &args, sizeof (struct ufs_args));
204 if (error)
205 return (error);
206
207 #if !defined(SOFTDEP)
208 mp->mnt_flag &= ~MNT_SOFTDEP;
209 #endif
210
211 update = mp->mnt_flag & MNT_UPDATE;
212
213 /* Check arguments */
214 if (args.fspec != NULL) {
215 /*
216 * Look up the name and verify that it's sane.
217 */
218 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, l);
219 if ((error = namei(ndp)) != 0)
220 return (error);
221 devvp = ndp->ni_vp;
222
223 if (!update) {
224 /*
225 * Be sure this is a valid block device
226 */
227 if (devvp->v_type != VBLK)
228 error = ENOTBLK;
229 else if (bdevsw_lookup(devvp->v_rdev) == NULL)
230 error = ENXIO;
231 } else {
232 /*
233 * Be sure we're still naming the same device
234 * used for our initial mount
235 */
236 ump = VFSTOUFS(mp);
237 if (devvp != ump->um_devvp)
238 error = EINVAL;
239 }
240 } else {
241 if (!update) {
242 /* New mounts must have a filename for the device */
243 return (EINVAL);
244 } else {
245 /* Use the extant mount */
246 ump = VFSTOUFS(mp);
247 devvp = ump->um_devvp;
248 vref(devvp);
249 }
250 }
251
252 /*
253 * If mount by non-root, then verify that user has necessary
254 * permissions on the device.
255 */
256 if (error == 0 && kauth_cred_geteuid(l->l_cred) != 0) {
257 accessmode = VREAD;
258 if (update ?
259 (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
260 (mp->mnt_flag & MNT_RDONLY) == 0)
261 accessmode |= VWRITE;
262 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
263 error = VOP_ACCESS(devvp, accessmode, l->l_cred, l);
264 VOP_UNLOCK(devvp, 0);
265 }
266
267 if (error) {
268 vrele(devvp);
269 return (error);
270 }
271
272 if (!update) {
273 int xflags;
274
275 /*
276 * Disallow multiple mounts of the same device.
277 * Disallow mounting of a device that is currently in use
278 * (except for root, which might share swap device for
279 * miniroot).
280 */
281 error = vfs_mountedon(devvp);
282 if (error)
283 goto fail;
284 if (vcount(devvp) > 1 && devvp != rootvp) {
285 error = EBUSY;
286 goto fail;
287 }
288 if (mp->mnt_flag & MNT_RDONLY)
289 xflags = FREAD;
290 else
291 xflags = FREAD|FWRITE;
292 error = VOP_OPEN(devvp, xflags, FSCRED, l);
293 if (error)
294 goto fail;
295 error = ffs_mountfs(devvp, mp, l);
296 if (error) {
297 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
298 (void)VOP_CLOSE(devvp, xflags, NOCRED, l);
299 VOP_UNLOCK(devvp, 0);
300 goto fail;
301 }
302
303 ump = VFSTOUFS(mp);
304 fs = ump->um_fs;
305 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
306 (MNT_SOFTDEP | MNT_ASYNC)) {
307 printf("%s fs uses soft updates, "
308 "ignoring async mode\n",
309 fs->fs_fsmnt);
310 mp->mnt_flag &= ~MNT_ASYNC;
311 }
312 } else {
313 /*
314 * Update the mount.
315 */
316
317 /*
318 * The initial mount got a reference on this
319 * device, so drop the one obtained via
320 * namei(), above.
321 */
322 vrele(devvp);
323
324 ump = VFSTOUFS(mp);
325 fs = ump->um_fs;
326 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
327 /*
328 * Changing from r/w to r/o
329 */
330 vn_start_write(NULL, &mp, V_WAIT);
331 flags = WRITECLOSE;
332 if (mp->mnt_flag & MNT_FORCE)
333 flags |= FORCECLOSE;
334 if (mp->mnt_flag & MNT_SOFTDEP)
335 error = softdep_flushfiles(mp, flags, l);
336 else
337 error = ffs_flushfiles(mp, flags, l);
338 if (fs->fs_pendingblocks != 0 ||
339 fs->fs_pendinginodes != 0) {
340 printf("%s: update error: blocks %" PRId64
341 " files %d\n",
342 fs->fs_fsmnt, fs->fs_pendingblocks,
343 fs->fs_pendinginodes);
344 fs->fs_pendingblocks = 0;
345 fs->fs_pendinginodes = 0;
346 }
347 if (error == 0 &&
348 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
349 fs->fs_clean & FS_WASCLEAN) {
350 if (mp->mnt_flag & MNT_SOFTDEP)
351 fs->fs_flags &= ~FS_DOSOFTDEP;
352 fs->fs_clean = FS_ISCLEAN;
353 (void) ffs_sbupdate(ump, MNT_WAIT);
354 }
355 vn_finished_write(mp, 0);
356 if (error)
357 return (error);
358 fs->fs_ronly = 1;
359 fs->fs_fmod = 0;
360 }
361
362 /*
363 * Flush soft dependencies if disabling it via an update
364 * mount. This may leave some items to be processed,
365 * so don't do this yet XXX.
366 */
367 if ((fs->fs_flags & FS_DOSOFTDEP) &&
368 !(mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) {
369 #ifdef notyet
370 vn_start_write(NULL, &mp, V_WAIT);
371 flags = WRITECLOSE;
372 if (mp->mnt_flag & MNT_FORCE)
373 flags |= FORCECLOSE;
374 error = softdep_flushfiles(mp, flags, l);
375 if (error == 0 && ffs_cgupdate(ump, MNT_WAIT) == 0)
376 fs->fs_flags &= ~FS_DOSOFTDEP;
377 (void) ffs_sbupdate(ump, MNT_WAIT);
378 vn_finished_write(mp);
379 #elif defined(SOFTDEP)
380 mp->mnt_flag |= MNT_SOFTDEP;
381 #endif
382 }
383
384 /*
385 * When upgrading to a softdep mount, we must first flush
386 * all vnodes. (not done yet -- see above)
387 */
388 if (!(fs->fs_flags & FS_DOSOFTDEP) &&
389 (mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) {
390 #ifdef notyet
391 vn_start_write(NULL, &mp, V_WAIT);
392 flags = WRITECLOSE;
393 if (mp->mnt_flag & MNT_FORCE)
394 flags |= FORCECLOSE;
395 error = ffs_flushfiles(mp, flags, l);
396 vn_finished_write(mp);
397 #else
398 mp->mnt_flag &= ~MNT_SOFTDEP;
399 #endif
400 }
401
402 if (mp->mnt_flag & MNT_RELOAD) {
403 error = ffs_reload(mp, l->l_cred, l);
404 if (error)
405 return (error);
406 }
407
408 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
409 /*
410 * Changing from read-only to read/write
411 */
412 fs->fs_ronly = 0;
413 fs->fs_clean <<= 1;
414 fs->fs_fmod = 1;
415 if ((fs->fs_flags & FS_DOSOFTDEP)) {
416 error = softdep_mount(devvp, mp, fs,
417 l->l_cred);
418 if (error)
419 return (error);
420 }
421 if (fs->fs_snapinum[0] != 0)
422 ffs_snapshot_mount(mp);
423 }
424 if (args.fspec == NULL)
425 return EINVAL;
426 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
427 (MNT_SOFTDEP | MNT_ASYNC)) {
428 printf("%s fs uses soft updates, ignoring async mode\n",
429 fs->fs_fsmnt);
430 mp->mnt_flag &= ~MNT_ASYNC;
431 }
432 }
433
434 error = set_statvfs_info(path, UIO_USERSPACE, args.fspec,
435 UIO_USERSPACE, mp, l);
436 if (error == 0)
437 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
438 sizeof(fs->fs_fsmnt));
439 if (mp->mnt_flag & MNT_SOFTDEP)
440 fs->fs_flags |= FS_DOSOFTDEP;
441 else
442 fs->fs_flags &= ~FS_DOSOFTDEP;
443 if (fs->fs_fmod != 0) { /* XXX */
444 fs->fs_fmod = 0;
445 if (fs->fs_clean & FS_WASCLEAN)
446 fs->fs_time = time_second;
447 else {
448 printf("%s: file system not clean (fs_clean=%x); please fsck(8)\n",
449 mp->mnt_stat.f_mntfromname, fs->fs_clean);
450 printf("%s: lost blocks %" PRId64 " files %d\n",
451 mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
452 fs->fs_pendinginodes);
453 }
454 (void) ffs_cgupdate(ump, MNT_WAIT);
455 }
456 return (error);
457
458 fail:
459 vrele(devvp);
460 return (error);
461 }
462
463 /*
464 * Reload all incore data for a filesystem (used after running fsck on
465 * the root filesystem and finding things to fix). The filesystem must
466 * be mounted read-only.
467 *
468 * Things to do to update the mount:
469 * 1) invalidate all cached meta-data.
470 * 2) re-read superblock from disk.
471 * 3) re-read summary information from disk.
472 * 4) invalidate all inactive vnodes.
473 * 5) invalidate all cached file data.
474 * 6) re-read inode data for all active vnodes.
475 */
476 int
477 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
478 {
479 struct vnode *vp, *nvp, *devvp;
480 struct inode *ip;
481 void *space;
482 struct buf *bp;
483 struct fs *fs, *newfs;
484 struct partinfo dpart;
485 int i, blks, size, error;
486 int32_t *lp;
487 struct ufsmount *ump;
488 daddr_t sblockloc;
489
490 if ((mp->mnt_flag & MNT_RDONLY) == 0)
491 return (EINVAL);
492
493 ump = VFSTOUFS(mp);
494 /*
495 * Step 1: invalidate all cached meta-data.
496 */
497 devvp = ump->um_devvp;
498 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
499 error = vinvalbuf(devvp, 0, cred, l, 0, 0);
500 VOP_UNLOCK(devvp, 0);
501 if (error)
502 panic("ffs_reload: dirty1");
503 /*
504 * Step 2: re-read superblock from disk.
505 */
506 fs = ump->um_fs;
507 if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, NOCRED, l) != 0)
508 size = DEV_BSIZE;
509 else
510 size = dpart.disklab->d_secsize;
511 /* XXX we don't handle possibility that superblock moved. */
512 error = bread(devvp, fs->fs_sblockloc / size, fs->fs_sbsize,
513 NOCRED, &bp);
514 if (error) {
515 brelse(bp);
516 return (error);
517 }
518 newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK);
519 memcpy(newfs, bp->b_data, fs->fs_sbsize);
520 #ifdef FFS_EI
521 if (ump->um_flags & UFS_NEEDSWAP) {
522 ffs_sb_swap((struct fs*)bp->b_data, newfs);
523 fs->fs_flags |= FS_SWAPPED;
524 } else
525 #endif
526 fs->fs_flags &= ~FS_SWAPPED;
527 if ((newfs->fs_magic != FS_UFS1_MAGIC &&
528 newfs->fs_magic != FS_UFS2_MAGIC)||
529 newfs->fs_bsize > MAXBSIZE ||
530 newfs->fs_bsize < sizeof(struct fs)) {
531 brelse(bp);
532 free(newfs, M_UFSMNT);
533 return (EIO); /* XXX needs translation */
534 }
535 /* Store off old fs_sblockloc for fs_oldfscompat_read. */
536 sblockloc = fs->fs_sblockloc;
537 /*
538 * Copy pointer fields back into superblock before copying in XXX
539 * new superblock. These should really be in the ufsmount. XXX
540 * Note that important parameters (eg fs_ncg) are unchanged.
541 */
542 newfs->fs_csp = fs->fs_csp;
543 newfs->fs_maxcluster = fs->fs_maxcluster;
544 newfs->fs_contigdirs = fs->fs_contigdirs;
545 newfs->fs_ronly = fs->fs_ronly;
546 newfs->fs_active = fs->fs_active;
547 memcpy(fs, newfs, (u_int)fs->fs_sbsize);
548 brelse(bp);
549 free(newfs, M_UFSMNT);
550
551 /* Recheck for apple UFS filesystem */
552 ump->um_flags &= ~UFS_ISAPPLEUFS;
553 /* First check to see if this is tagged as an Apple UFS filesystem
554 * in the disklabel
555 */
556 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred, l) == 0) &&
557 (dpart.part->p_fstype == FS_APPLEUFS)) {
558 ump->um_flags |= UFS_ISAPPLEUFS;
559 }
560 #ifdef APPLE_UFS
561 else {
562 /* Manually look for an apple ufs label, and if a valid one
563 * is found, then treat it like an Apple UFS filesystem anyway
564 */
565 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size),
566 APPLEUFS_LABEL_SIZE, cred, &bp);
567 if (error) {
568 brelse(bp);
569 return (error);
570 }
571 error = ffs_appleufs_validate(fs->fs_fsmnt,
572 (struct appleufslabel *)bp->b_data,NULL);
573 if (error == 0)
574 ump->um_flags |= UFS_ISAPPLEUFS;
575 brelse(bp);
576 bp = NULL;
577 }
578 #else
579 if (ump->um_flags & UFS_ISAPPLEUFS)
580 return (EIO);
581 #endif
582
583 if (UFS_MPISAPPLEUFS(ump)) {
584 /* see comment about NeXT below */
585 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
586 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
587 mp->mnt_iflag |= IMNT_DTYPE;
588 } else {
589 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
590 ump->um_dirblksiz = DIRBLKSIZ;
591 if (ump->um_maxsymlinklen > 0)
592 mp->mnt_iflag |= IMNT_DTYPE;
593 else
594 mp->mnt_iflag &= ~IMNT_DTYPE;
595 }
596 ffs_oldfscompat_read(fs, ump, sblockloc);
597 ump->um_maxfilesize = fs->fs_maxfilesize;
598 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
599 fs->fs_pendingblocks = 0;
600 fs->fs_pendinginodes = 0;
601 }
602
603 ffs_statvfs(mp, &mp->mnt_stat, l);
604 /*
605 * Step 3: re-read summary information from disk.
606 */
607 blks = howmany(fs->fs_cssize, fs->fs_fsize);
608 space = fs->fs_csp;
609 for (i = 0; i < blks; i += fs->fs_frag) {
610 size = fs->fs_bsize;
611 if (i + fs->fs_frag > blks)
612 size = (blks - i) * fs->fs_fsize;
613 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
614 NOCRED, &bp);
615 if (error) {
616 brelse(bp);
617 return (error);
618 }
619 #ifdef FFS_EI
620 if (UFS_FSNEEDSWAP(fs))
621 ffs_csum_swap((struct csum *)bp->b_data,
622 (struct csum *)space, size);
623 else
624 #endif
625 memcpy(space, bp->b_data, (size_t)size);
626 space = (char *)space + size;
627 brelse(bp);
628 }
629 if ((fs->fs_flags & FS_DOSOFTDEP))
630 softdep_mount(devvp, mp, fs, cred);
631 if (fs->fs_snapinum[0] != 0)
632 ffs_snapshot_mount(mp);
633 /*
634 * We no longer know anything about clusters per cylinder group.
635 */
636 if (fs->fs_contigsumsize > 0) {
637 lp = fs->fs_maxcluster;
638 for (i = 0; i < fs->fs_ncg; i++)
639 *lp++ = fs->fs_contigsumsize;
640 }
641
642 loop:
643 simple_lock(&mntvnode_slock);
644 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
645 if (vp->v_mount != mp) {
646 simple_unlock(&mntvnode_slock);
647 goto loop;
648 }
649 nvp = vp->v_mntvnodes.le_next;
650 /*
651 * Step 4: invalidate all inactive vnodes.
652 */
653 if (vrecycle(vp, &mntvnode_slock, l))
654 goto loop;
655 /*
656 * Step 5: invalidate all cached file data.
657 */
658 simple_lock(&vp->v_interlock);
659 simple_unlock(&mntvnode_slock);
660 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
661 goto loop;
662 if (vinvalbuf(vp, 0, cred, l, 0, 0))
663 panic("ffs_reload: dirty2");
664 /*
665 * Step 6: re-read inode data for all active vnodes.
666 */
667 ip = VTOI(vp);
668 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
669 (int)fs->fs_bsize, NOCRED, &bp);
670 if (error) {
671 brelse(bp);
672 vput(vp);
673 return (error);
674 }
675 ffs_load_inode(bp, ip, fs, ip->i_number);
676 ip->i_ffs_effnlink = ip->i_nlink;
677 brelse(bp);
678 vput(vp);
679 simple_lock(&mntvnode_slock);
680 }
681 simple_unlock(&mntvnode_slock);
682 return (0);
683 }
684
685 /*
686 * Possible superblock locations ordered from most to least likely.
687 */
688 static const int sblock_try[] = SBLOCKSEARCH;
689
690 /*
691 * Common code for mount and mountroot
692 */
693 int
694 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
695 {
696 struct ufsmount *ump;
697 struct buf *bp;
698 struct fs *fs;
699 dev_t dev;
700 struct partinfo dpart;
701 void *space;
702 daddr_t sblockloc, fsblockloc;
703 int blks, fstype;
704 int error, i, size, ronly;
705 #ifdef FFS_EI
706 int needswap = 0; /* keep gcc happy */
707 #endif
708 int32_t *lp;
709 kauth_cred_t cred;
710 u_int32_t sbsize = 8192; /* keep gcc happy*/
711
712 dev = devvp->v_rdev;
713 cred = l ? l->l_cred : NOCRED;
714
715 /* Flush out any old buffers remaining from a previous use. */
716 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
717 error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
718 VOP_UNLOCK(devvp, 0);
719 if (error)
720 return (error);
721
722 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
723 if (VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred, l) != 0)
724 size = DEV_BSIZE;
725 else
726 size = dpart.disklab->d_secsize;
727
728 bp = NULL;
729 ump = NULL;
730 fs = NULL;
731 sblockloc = 0;
732 fstype = 0;
733
734 /*
735 * Try reading the superblock in each of its possible locations. */
736 for (i = 0; ; i++) {
737 if (bp != NULL) {
738 bp->b_flags |= B_NOCACHE;
739 brelse(bp);
740 bp = NULL;
741 }
742 if (sblock_try[i] == -1) {
743 error = EINVAL;
744 fs = NULL;
745 goto out;
746 }
747 error = bread(devvp, sblock_try[i] / size, SBLOCKSIZE, cred,
748 &bp);
749 if (error) {
750 fs = NULL;
751 goto out;
752 }
753 fs = (struct fs*)bp->b_data;
754 fsblockloc = sblockloc = sblock_try[i];
755 if (fs->fs_magic == FS_UFS1_MAGIC) {
756 sbsize = fs->fs_sbsize;
757 fstype = UFS1;
758 #ifdef FFS_EI
759 needswap = 0;
760 } else if (fs->fs_magic == bswap32(FS_UFS1_MAGIC)) {
761 sbsize = bswap32(fs->fs_sbsize);
762 fstype = UFS1;
763 needswap = 1;
764 #endif
765 } else if (fs->fs_magic == FS_UFS2_MAGIC) {
766 sbsize = fs->fs_sbsize;
767 fstype = UFS2;
768 #ifdef FFS_EI
769 needswap = 0;
770 } else if (fs->fs_magic == bswap32(FS_UFS2_MAGIC)) {
771 sbsize = bswap32(fs->fs_sbsize);
772 fstype = UFS2;
773 needswap = 1;
774 #endif
775 } else
776 continue;
777
778
779 /* fs->fs_sblockloc isn't defined for old filesystems */
780 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
781 if (sblockloc == SBLOCK_UFS2)
782 /*
783 * This is likely to be the first alternate
784 * in a filesystem with 64k blocks.
785 * Don't use it.
786 */
787 continue;
788 fsblockloc = sblockloc;
789 } else {
790 fsblockloc = fs->fs_sblockloc;
791 #ifdef FFS_EI
792 if (needswap)
793 fsblockloc = bswap64(fsblockloc);
794 #endif
795 }
796
797 /* Check we haven't found an alternate superblock */
798 if (fsblockloc != sblockloc)
799 continue;
800
801 /* Validate size of superblock */
802 if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs))
803 continue;
804
805 /* Ok seems to be a good superblock */
806 break;
807 }
808
809 fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
810 memcpy(fs, bp->b_data, sbsize);
811
812 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
813 memset(ump, 0, sizeof *ump);
814 TAILQ_INIT(&ump->um_snapshots);
815 ump->um_fs = fs;
816 ump->um_ops = &ffs_ufsops;
817
818 #ifdef FFS_EI
819 if (needswap) {
820 ffs_sb_swap((struct fs*)bp->b_data, fs);
821 fs->fs_flags |= FS_SWAPPED;
822 } else
823 #endif
824 fs->fs_flags &= ~FS_SWAPPED;
825
826 ffs_oldfscompat_read(fs, ump, sblockloc);
827 ump->um_maxfilesize = fs->fs_maxfilesize;
828
829 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
830 fs->fs_pendingblocks = 0;
831 fs->fs_pendinginodes = 0;
832 }
833
834 ump->um_fstype = fstype;
835 if (fs->fs_sbsize < SBLOCKSIZE)
836 bp->b_flags |= B_INVAL;
837 brelse(bp);
838 bp = NULL;
839
840 /* First check to see if this is tagged as an Apple UFS filesystem
841 * in the disklabel
842 */
843 if ((VOP_IOCTL(devvp, DIOCGPART, &dpart, FREAD, cred, l) == 0) &&
844 (dpart.part->p_fstype == FS_APPLEUFS)) {
845 ump->um_flags |= UFS_ISAPPLEUFS;
846 }
847 #ifdef APPLE_UFS
848 else {
849 /* Manually look for an apple ufs label, and if a valid one
850 * is found, then treat it like an Apple UFS filesystem anyway
851 */
852 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / size),
853 APPLEUFS_LABEL_SIZE, cred, &bp);
854 if (error)
855 goto out;
856 error = ffs_appleufs_validate(fs->fs_fsmnt,
857 (struct appleufslabel *)bp->b_data,NULL);
858 if (error == 0) {
859 ump->um_flags |= UFS_ISAPPLEUFS;
860 }
861 brelse(bp);
862 bp = NULL;
863 }
864 #else
865 if (ump->um_flags & UFS_ISAPPLEUFS) {
866 error = EINVAL;
867 goto out;
868 }
869 #endif
870
871 /*
872 * verify that we can access the last block in the fs
873 * if we're mounting read/write.
874 */
875
876 if (!ronly) {
877 error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), fs->fs_fsize,
878 cred, &bp);
879 if (bp->b_bcount != fs->fs_fsize)
880 error = EINVAL;
881 bp->b_flags |= B_INVAL;
882 if (error)
883 goto out;
884 brelse(bp);
885 bp = NULL;
886 }
887
888 fs->fs_ronly = ronly;
889 if (ronly == 0) {
890 fs->fs_clean <<= 1;
891 fs->fs_fmod = 1;
892 }
893 size = fs->fs_cssize;
894 blks = howmany(size, fs->fs_fsize);
895 if (fs->fs_contigsumsize > 0)
896 size += fs->fs_ncg * sizeof(int32_t);
897 size += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
898 space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
899 fs->fs_csp = space;
900 for (i = 0; i < blks; i += fs->fs_frag) {
901 size = fs->fs_bsize;
902 if (i + fs->fs_frag > blks)
903 size = (blks - i) * fs->fs_fsize;
904 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
905 cred, &bp);
906 if (error) {
907 free(fs->fs_csp, M_UFSMNT);
908 goto out;
909 }
910 #ifdef FFS_EI
911 if (needswap)
912 ffs_csum_swap((struct csum *)bp->b_data,
913 (struct csum *)space, size);
914 else
915 #endif
916 memcpy(space, bp->b_data, (u_int)size);
917
918 space = (char *)space + size;
919 brelse(bp);
920 bp = NULL;
921 }
922 if (fs->fs_contigsumsize > 0) {
923 fs->fs_maxcluster = lp = space;
924 for (i = 0; i < fs->fs_ncg; i++)
925 *lp++ = fs->fs_contigsumsize;
926 space = lp;
927 }
928 size = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
929 fs->fs_contigdirs = space;
930 space = (char *)space + size;
931 memset(fs->fs_contigdirs, 0, size);
932 /* Compatibility for old filesystems - XXX */
933 if (fs->fs_avgfilesize <= 0)
934 fs->fs_avgfilesize = AVFILESIZ;
935 if (fs->fs_avgfpdir <= 0)
936 fs->fs_avgfpdir = AFPDIR;
937 fs->fs_active = NULL;
938 mp->mnt_data = ump;
939 mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
940 mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
941 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
942 mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
943 if (UFS_MPISAPPLEUFS(ump)) {
944 /* NeXT used to keep short symlinks in the inode even
945 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen
946 * is probably -1, but we still need to be able to identify
947 * short symlinks.
948 */
949 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
950 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
951 mp->mnt_iflag |= IMNT_DTYPE;
952 } else {
953 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
954 ump->um_dirblksiz = DIRBLKSIZ;
955 if (ump->um_maxsymlinklen > 0)
956 mp->mnt_iflag |= IMNT_DTYPE;
957 else
958 mp->mnt_iflag &= ~IMNT_DTYPE;
959 }
960 mp->mnt_fs_bshift = fs->fs_bshift;
961 mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */
962 mp->mnt_flag |= MNT_LOCAL;
963 #ifdef FFS_EI
964 if (needswap)
965 ump->um_flags |= UFS_NEEDSWAP;
966 #endif
967 ump->um_mountp = mp;
968 ump->um_dev = dev;
969 ump->um_devvp = devvp;
970 ump->um_nindir = fs->fs_nindir;
971 ump->um_lognindir = ffs(fs->fs_nindir) - 1;
972 ump->um_bptrtodb = fs->fs_fsbtodb;
973 ump->um_seqinc = fs->fs_frag;
974 for (i = 0; i < MAXQUOTAS; i++)
975 ump->um_quotas[i] = NULLVP;
976 devvp->v_specmountpoint = mp;
977 if (ronly == 0 && (fs->fs_flags & FS_DOSOFTDEP)) {
978 error = softdep_mount(devvp, mp, fs, cred);
979 if (error) {
980 free(fs->fs_csp, M_UFSMNT);
981 goto out;
982 }
983 }
984 if (ronly == 0 && fs->fs_snapinum[0] != 0)
985 ffs_snapshot_mount(mp);
986 #ifdef UFS_EXTATTR
987 /*
988 * Initialize file-backed extended attributes on UFS1 file
989 * systems.
990 */
991 if (ump->um_fstype == UFS1) {
992 ufs_extattr_uepm_init(&ump->um_extattr);
993 #ifdef UFS_EXTATTR_AUTOSTART
994 /*
995 * XXX Just ignore errors. Not clear that we should
996 * XXX fail the mount in this case.
997 */
998 (void) ufs_extattr_autostart(mp, l);
999 #endif
1000 }
1001 #endif /* UFS_EXTATTR */
1002 return (0);
1003 out:
1004 if (fs)
1005 free(fs, M_UFSMNT);
1006 devvp->v_specmountpoint = NULL;
1007 if (bp)
1008 brelse(bp);
1009 if (ump) {
1010 if (ump->um_oldfscompat)
1011 free(ump->um_oldfscompat, M_UFSMNT);
1012 free(ump, M_UFSMNT);
1013 mp->mnt_data = NULL;
1014 }
1015 return (error);
1016 }
1017
1018 /*
1019 * Sanity checks for loading old filesystem superblocks.
1020 * See ffs_oldfscompat_write below for unwound actions.
1021 *
1022 * XXX - Parts get retired eventually.
1023 * Unfortunately new bits get added.
1024 */
1025 static void
1026 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1027 {
1028 off_t maxfilesize;
1029 int32_t *extrasave;
1030
1031 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1032 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1033 return;
1034
1035 if (!ump->um_oldfscompat)
1036 ump->um_oldfscompat = malloc(512 + 3*sizeof(int32_t),
1037 M_UFSMNT, M_WAITOK);
1038
1039 memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1040 extrasave = ump->um_oldfscompat;
1041 extrasave += 512/sizeof(int32_t);
1042 extrasave[0] = fs->fs_old_npsect;
1043 extrasave[1] = fs->fs_old_interleave;
1044 extrasave[2] = fs->fs_old_trackskew;
1045
1046 /* These fields will be overwritten by their
1047 * original values in fs_oldfscompat_write, so it is harmless
1048 * to modify them here.
1049 */
1050 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1051 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1052 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1053 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1054
1055 fs->fs_maxbsize = fs->fs_bsize;
1056 fs->fs_time = fs->fs_old_time;
1057 fs->fs_size = fs->fs_old_size;
1058 fs->fs_dsize = fs->fs_old_dsize;
1059 fs->fs_csaddr = fs->fs_old_csaddr;
1060 fs->fs_sblockloc = sblockloc;
1061
1062 fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1063
1064 if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1065 fs->fs_old_nrpos = 8;
1066 fs->fs_old_npsect = fs->fs_old_nsect;
1067 fs->fs_old_interleave = 1;
1068 fs->fs_old_trackskew = 0;
1069 }
1070
1071 if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1072 ump->um_maxfilesize = (u_quad_t) 1LL << 39;
1073 fs->fs_qbmask = ~fs->fs_bmask;
1074 fs->fs_qfmask = ~fs->fs_fmask;
1075 }
1076
1077 maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1078 if (ump->um_maxfilesize > maxfilesize)
1079 ump->um_maxfilesize = maxfilesize;
1080
1081 /* Compatibility for old filesystems */
1082 if (fs->fs_avgfilesize <= 0)
1083 fs->fs_avgfilesize = AVFILESIZ;
1084 if (fs->fs_avgfpdir <= 0)
1085 fs->fs_avgfpdir = AFPDIR;
1086
1087 #if 0
1088 if (bigcgs) {
1089 fs->fs_save_cgsize = fs->fs_cgsize;
1090 fs->fs_cgsize = fs->fs_bsize;
1091 }
1092 #endif
1093 }
1094
1095 /*
1096 * Unwinding superblock updates for old filesystems.
1097 * See ffs_oldfscompat_read above for details.
1098 *
1099 * XXX - Parts get retired eventually.
1100 * Unfortunately new bits get added.
1101 */
1102 static void
1103 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1104 {
1105 int32_t *extrasave;
1106
1107 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1108 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1109 return;
1110
1111 fs->fs_old_time = fs->fs_time;
1112 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1113 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1114 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1115 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1116 fs->fs_old_flags = fs->fs_flags;
1117
1118 #if 0
1119 if (bigcgs) {
1120 fs->fs_cgsize = fs->fs_save_cgsize;
1121 }
1122 #endif
1123
1124 memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1125 extrasave = ump->um_oldfscompat;
1126 extrasave += 512/sizeof(int32_t);
1127 fs->fs_old_npsect = extrasave[0];
1128 fs->fs_old_interleave = extrasave[1];
1129 fs->fs_old_trackskew = extrasave[2];
1130
1131 }
1132
1133 /*
1134 * unmount system call
1135 */
1136 int
1137 ffs_unmount(struct mount *mp, int mntflags, struct lwp *l)
1138 {
1139 struct ufsmount *ump = VFSTOUFS(mp);
1140 struct fs *fs = ump->um_fs;
1141 int error, flags, penderr;
1142
1143 penderr = 0;
1144 flags = 0;
1145 if (mntflags & MNT_FORCE)
1146 flags |= FORCECLOSE;
1147 #ifdef UFS_EXTATTR
1148 if (ump->um_fstype == UFS1) {
1149 error = ufs_extattr_stop(mp, l);
1150 if (error) {
1151 if (error != EOPNOTSUPP)
1152 printf("%s: ufs_extattr_stop returned %d\n",
1153 fs->fs_fsmnt, error);
1154 } else
1155 ufs_extattr_uepm_destroy(&ump->um_extattr);
1156 }
1157 #endif /* UFS_EXTATTR */
1158 if (mp->mnt_flag & MNT_SOFTDEP) {
1159 if ((error = softdep_flushfiles(mp, flags, l)) != 0)
1160 return (error);
1161 } else {
1162 if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1163 return (error);
1164 }
1165 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1166 printf("%s: unmount pending error: blocks %" PRId64
1167 " files %d\n",
1168 fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes);
1169 fs->fs_pendingblocks = 0;
1170 fs->fs_pendinginodes = 0;
1171 penderr = 1;
1172 }
1173 if (fs->fs_ronly == 0 &&
1174 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1175 fs->fs_clean & FS_WASCLEAN) {
1176 /*
1177 * XXXX don't mark fs clean in the case of softdep
1178 * pending block errors, until they are fixed.
1179 */
1180 if (penderr == 0) {
1181 if (mp->mnt_flag & MNT_SOFTDEP)
1182 fs->fs_flags &= ~FS_DOSOFTDEP;
1183 fs->fs_clean = FS_ISCLEAN;
1184 }
1185 fs->fs_fmod = 0;
1186 (void) ffs_sbupdate(ump, MNT_WAIT);
1187 }
1188 if (ump->um_devvp->v_type != VBAD)
1189 ump->um_devvp->v_specmountpoint = NULL;
1190 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1191 (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
1192 NOCRED, l);
1193 vput(ump->um_devvp);
1194 free(fs->fs_csp, M_UFSMNT);
1195 free(fs, M_UFSMNT);
1196 if (ump->um_oldfscompat != NULL)
1197 free(ump->um_oldfscompat, M_UFSMNT);
1198 free(ump, M_UFSMNT);
1199 mp->mnt_data = NULL;
1200 mp->mnt_flag &= ~MNT_LOCAL;
1201 return (0);
1202 }
1203
1204 /*
1205 * Flush out all the files in a filesystem.
1206 */
1207 int
1208 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1209 {
1210 extern int doforce;
1211 struct ufsmount *ump;
1212 int error;
1213
1214 if (!doforce)
1215 flags &= ~FORCECLOSE;
1216 ump = VFSTOUFS(mp);
1217 #ifdef QUOTA
1218 if (mp->mnt_flag & MNT_QUOTA) {
1219 int i;
1220 if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0)
1221 return (error);
1222 for (i = 0; i < MAXQUOTAS; i++) {
1223 if (ump->um_quotas[i] == NULLVP)
1224 continue;
1225 quotaoff(l, mp, i);
1226 }
1227 /*
1228 * Here we fall through to vflush again to ensure
1229 * that we have gotten rid of all the system vnodes.
1230 */
1231 }
1232 #endif
1233 if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1234 return (error);
1235 ffs_snapshot_unmount(mp);
1236 /*
1237 * Flush all the files.
1238 */
1239 error = vflush(mp, NULLVP, flags);
1240 if (error)
1241 return (error);
1242 /*
1243 * Flush filesystem metadata.
1244 */
1245 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1246 error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0, l);
1247 VOP_UNLOCK(ump->um_devvp, 0);
1248 return (error);
1249 }
1250
1251 /*
1252 * Get file system statistics.
1253 */
1254 int
1255 ffs_statvfs(struct mount *mp, struct statvfs *sbp, struct lwp *l)
1256 {
1257 struct ufsmount *ump;
1258 struct fs *fs;
1259
1260 ump = VFSTOUFS(mp);
1261 fs = ump->um_fs;
1262 sbp->f_bsize = fs->fs_bsize;
1263 sbp->f_frsize = fs->fs_fsize;
1264 sbp->f_iosize = fs->fs_bsize;
1265 sbp->f_blocks = fs->fs_dsize;
1266 sbp->f_bfree = blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1267 fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1268 sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1269 fs->fs_minfree) / (u_int64_t) 100;
1270 if (sbp->f_bfree > sbp->f_bresvd)
1271 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1272 else
1273 sbp->f_bavail = 0;
1274 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
1275 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1276 sbp->f_favail = sbp->f_ffree;
1277 sbp->f_fresvd = 0;
1278 copy_statvfs_info(sbp, mp);
1279 return (0);
1280 }
1281
1282 /*
1283 * Go through the disk queues to initiate sandbagged IO;
1284 * go through the inodes to write those that have been modified;
1285 * initiate the writing of the super block if it has been modified.
1286 *
1287 * Note: we are always called with the filesystem marked `MPBUSY'.
1288 */
1289 int
1290 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred, struct lwp *l)
1291 {
1292 struct vnode *vp, *nvp;
1293 struct inode *ip;
1294 struct ufsmount *ump = VFSTOUFS(mp);
1295 struct fs *fs;
1296 int error, count, allerror = 0;
1297
1298 fs = ump->um_fs;
1299 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
1300 printf("fs = %s\n", fs->fs_fsmnt);
1301 panic("update: rofs mod");
1302 }
1303 /*
1304 * Write back each (modified) inode.
1305 */
1306 simple_lock(&mntvnode_slock);
1307 loop:
1308 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
1309 /*
1310 * If the vnode that we are about to sync is no longer
1311 * associated with this mount point, start over.
1312 */
1313 if (vp->v_mount != mp)
1314 goto loop;
1315 simple_lock(&vp->v_interlock);
1316 nvp = LIST_NEXT(vp, v_mntvnodes);
1317 ip = VTOI(vp);
1318 if (vp->v_type == VNON ||
1319 ((ip->i_flag &
1320 (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
1321 LIST_EMPTY(&vp->v_dirtyblkhd) &&
1322 vp->v_uobj.uo_npages == 0))
1323 {
1324 simple_unlock(&vp->v_interlock);
1325 continue;
1326 }
1327 simple_unlock(&mntvnode_slock);
1328 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
1329 if (error) {
1330 simple_lock(&mntvnode_slock);
1331 if (error == ENOENT)
1332 goto loop;
1333 continue;
1334 }
1335 if (vp->v_type == VREG && waitfor == MNT_LAZY)
1336 error = ffs_update(vp, NULL, NULL, 0);
1337 else
1338 error = VOP_FSYNC(vp, cred,
1339 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0, l);
1340 if (error)
1341 allerror = error;
1342 vput(vp);
1343 simple_lock(&mntvnode_slock);
1344 }
1345 simple_unlock(&mntvnode_slock);
1346 /*
1347 * Force stale file system control information to be flushed.
1348 */
1349 if (waitfor == MNT_WAIT && (ump->um_mountp->mnt_flag & MNT_SOFTDEP)) {
1350 if ((error = softdep_flushworklist(ump->um_mountp, &count, l)))
1351 allerror = error;
1352 /* Flushed work items may create new vnodes to clean */
1353 if (allerror == 0 && count) {
1354 simple_lock(&mntvnode_slock);
1355 goto loop;
1356 }
1357 }
1358 if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1359 !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1360 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1361 if ((error = VOP_FSYNC(ump->um_devvp, cred,
1362 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0, l)) != 0)
1363 allerror = error;
1364 VOP_UNLOCK(ump->um_devvp, 0);
1365 if (allerror == 0 && waitfor == MNT_WAIT) {
1366 simple_lock(&mntvnode_slock);
1367 goto loop;
1368 }
1369 }
1370 #ifdef QUOTA
1371 qsync(mp);
1372 #endif
1373 /*
1374 * Write back modified superblock.
1375 */
1376 if (fs->fs_fmod != 0) {
1377 fs->fs_fmod = 0;
1378 fs->fs_time = time_second;
1379 if ((error = ffs_cgupdate(ump, waitfor)))
1380 allerror = error;
1381 }
1382 return (allerror);
1383 }
1384
1385 /*
1386 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1387 * in from disk. If it is in core, wait for the lock bit to clear, then
1388 * return the inode locked. Detection and handling of mount points must be
1389 * done by the calling routine.
1390 */
1391 int
1392 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1393 {
1394 struct fs *fs;
1395 struct inode *ip;
1396 struct ufsmount *ump;
1397 struct buf *bp;
1398 struct vnode *vp;
1399 dev_t dev;
1400 int error;
1401
1402 ump = VFSTOUFS(mp);
1403 dev = ump->um_dev;
1404
1405 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1406 return (0);
1407
1408 /* Allocate a new vnode/inode. */
1409 if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) {
1410 *vpp = NULL;
1411 return (error);
1412 }
1413
1414 /*
1415 * If someone beat us to it while sleeping in getnewvnode(),
1416 * push back the freshly allocated vnode we don't need, and return.
1417 */
1418
1419 do {
1420 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) {
1421 ungetnewvnode(vp);
1422 return (0);
1423 }
1424 } while (lockmgr(&ufs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0));
1425
1426 vp->v_flag |= VLOCKSWORK;
1427
1428 /*
1429 * XXX MFS ends up here, too, to allocate an inode. Should we
1430 * XXX create another pool for MFS inodes?
1431 */
1432
1433 ip = pool_get(&ffs_inode_pool, PR_WAITOK);
1434 memset(ip, 0, sizeof(struct inode));
1435 vp->v_data = ip;
1436 ip->i_vnode = vp;
1437 ip->i_ump = ump;
1438 ip->i_fs = fs = ump->um_fs;
1439 ip->i_dev = dev;
1440 ip->i_number = ino;
1441 LIST_INIT(&ip->i_pcbufhd);
1442 #ifdef QUOTA
1443 {
1444 int i;
1445
1446 for (i = 0; i < MAXQUOTAS; i++)
1447 ip->i_dquot[i] = NODQUOT;
1448 }
1449 #endif
1450
1451 /*
1452 * Put it onto its hash chain and lock it so that other requests for
1453 * this inode will block if they arrive while we are sleeping waiting
1454 * for old data structures to be purged or for the contents of the
1455 * disk portion of this inode to be read.
1456 */
1457
1458 ufs_ihashins(ip);
1459 lockmgr(&ufs_hashlock, LK_RELEASE, 0);
1460
1461 /* Read in the disk contents for the inode, copy into the inode. */
1462 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1463 (int)fs->fs_bsize, NOCRED, &bp);
1464 if (error) {
1465
1466 /*
1467 * The inode does not contain anything useful, so it would
1468 * be misleading to leave it on its hash chain. With mode
1469 * still zero, it will be unlinked and returned to the free
1470 * list by vput().
1471 */
1472
1473 vput(vp);
1474 brelse(bp);
1475 *vpp = NULL;
1476 return (error);
1477 }
1478 if (ip->i_ump->um_fstype == UFS1)
1479 ip->i_din.ffs1_din = pool_get(&ffs_dinode1_pool, PR_WAITOK);
1480 else
1481 ip->i_din.ffs2_din = pool_get(&ffs_dinode2_pool, PR_WAITOK);
1482 ffs_load_inode(bp, ip, fs, ino);
1483 if (DOINGSOFTDEP(vp))
1484 softdep_load_inodeblock(ip);
1485 else
1486 ip->i_ffs_effnlink = ip->i_nlink;
1487 brelse(bp);
1488
1489 /*
1490 * Initialize the vnode from the inode, check for aliases.
1491 * Note that the underlying vnode may have changed.
1492 */
1493
1494 ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1495
1496 /*
1497 * Finish inode initialization now that aliasing has been resolved.
1498 */
1499
1500 genfs_node_init(vp, &ffs_genfsops);
1501 ip->i_devvp = ump->um_devvp;
1502 VREF(ip->i_devvp);
1503
1504 /*
1505 * Ensure that uid and gid are correct. This is a temporary
1506 * fix until fsck has been changed to do the update.
1507 */
1508
1509 if (fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */
1510 ip->i_uid = ip->i_ffs1_ouid; /* XXX */
1511 ip->i_gid = ip->i_ffs1_ogid; /* XXX */
1512 } /* XXX */
1513 uvm_vnp_setsize(vp, ip->i_size);
1514 *vpp = vp;
1515 return (0);
1516 }
1517
1518 /*
1519 * File handle to vnode
1520 *
1521 * Have to be really careful about stale file handles:
1522 * - check that the inode number is valid
1523 * - call ffs_vget() to get the locked inode
1524 * - check for an unallocated inode (i_mode == 0)
1525 * - check that the given client host has export rights and return
1526 * those rights via. exflagsp and credanonp
1527 */
1528 int
1529 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1530 {
1531 struct ufid ufh;
1532 struct fs *fs;
1533
1534 if (fhp->fid_len != sizeof(struct ufid))
1535 return EINVAL;
1536
1537 memcpy(&ufh, fhp, sizeof(ufh));
1538 fs = VFSTOUFS(mp)->um_fs;
1539 if (ufh.ufid_ino < ROOTINO ||
1540 ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1541 return (ESTALE);
1542 return (ufs_fhtovp(mp, &ufh, vpp));
1543 }
1544
1545 /*
1546 * Vnode pointer to File handle
1547 */
1548 /* ARGSUSED */
1549 int
1550 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
1551 {
1552 struct inode *ip;
1553 struct ufid ufh;
1554
1555 if (*fh_size < sizeof(struct ufid)) {
1556 *fh_size = sizeof(struct ufid);
1557 return E2BIG;
1558 }
1559 ip = VTOI(vp);
1560 *fh_size = sizeof(struct ufid);
1561 memset(&ufh, 0, sizeof(ufh));
1562 ufh.ufid_len = sizeof(struct ufid);
1563 ufh.ufid_ino = ip->i_number;
1564 ufh.ufid_gen = ip->i_gen;
1565 memcpy(fhp, &ufh, sizeof(ufh));
1566 return (0);
1567 }
1568
1569 void
1570 ffs_init(void)
1571 {
1572 if (ffs_initcount++ > 0)
1573 return;
1574
1575 #ifdef _LKM
1576 pool_init(&ffs_inode_pool, sizeof(struct inode), 0, 0, 0,
1577 "ffsinopl", &pool_allocator_nointr);
1578 pool_init(&ffs_dinode1_pool, sizeof(struct ufs1_dinode), 0, 0, 0,
1579 "dino1pl", &pool_allocator_nointr);
1580 pool_init(&ffs_dinode2_pool, sizeof(struct ufs2_dinode), 0, 0, 0,
1581 "dino2pl", &pool_allocator_nointr);
1582 #endif
1583 softdep_initialize();
1584 ufs_init();
1585 }
1586
1587 void
1588 ffs_reinit(void)
1589 {
1590 softdep_reinitialize();
1591 ufs_reinit();
1592 }
1593
1594 void
1595 ffs_done(void)
1596 {
1597 if (--ffs_initcount > 0)
1598 return;
1599
1600 /* XXX softdep cleanup ? */
1601 ufs_done();
1602 #ifdef _LKM
1603 pool_destroy(&ffs_dinode2_pool);
1604 pool_destroy(&ffs_dinode1_pool);
1605 pool_destroy(&ffs_inode_pool);
1606 #endif
1607 }
1608
1609 SYSCTL_SETUP(sysctl_vfs_ffs_setup, "sysctl vfs.ffs subtree setup")
1610 {
1611 #if 0
1612 extern int doasyncfree;
1613 #endif
1614 extern int ffs_log_changeopt;
1615
1616 sysctl_createv(clog, 0, NULL, NULL,
1617 CTLFLAG_PERMANENT,
1618 CTLTYPE_NODE, "vfs", NULL,
1619 NULL, 0, NULL, 0,
1620 CTL_VFS, CTL_EOL);
1621 sysctl_createv(clog, 0, NULL, NULL,
1622 CTLFLAG_PERMANENT,
1623 CTLTYPE_NODE, "ffs",
1624 SYSCTL_DESCR("Berkeley Fast File System"),
1625 NULL, 0, NULL, 0,
1626 CTL_VFS, 1, CTL_EOL);
1627
1628 /*
1629 * @@@ should we even bother with these first three?
1630 */
1631 sysctl_createv(clog, 0, NULL, NULL,
1632 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1633 CTLTYPE_INT, "doclusterread", NULL,
1634 sysctl_notavail, 0, NULL, 0,
1635 CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
1636 sysctl_createv(clog, 0, NULL, NULL,
1637 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1638 CTLTYPE_INT, "doclusterwrite", NULL,
1639 sysctl_notavail, 0, NULL, 0,
1640 CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
1641 sysctl_createv(clog, 0, NULL, NULL,
1642 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1643 CTLTYPE_INT, "doreallocblks", NULL,
1644 sysctl_notavail, 0, NULL, 0,
1645 CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
1646 #if 0
1647 sysctl_createv(clog, 0, NULL, NULL,
1648 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1649 CTLTYPE_INT, "doasyncfree",
1650 SYSCTL_DESCR("Release dirty blocks asynchronously"),
1651 NULL, 0, &doasyncfree, 0,
1652 CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
1653 #endif
1654 sysctl_createv(clog, 0, NULL, NULL,
1655 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1656 CTLTYPE_INT, "log_changeopt",
1657 SYSCTL_DESCR("Log changes in optimization strategy"),
1658 NULL, 0, &ffs_log_changeopt, 0,
1659 CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
1660 }
1661
1662 /*
1663 * Write a superblock and associated information back to disk.
1664 */
1665 int
1666 ffs_sbupdate(struct ufsmount *mp, int waitfor)
1667 {
1668 struct fs *fs = mp->um_fs;
1669 struct buf *bp;
1670 int error = 0;
1671 u_int32_t saveflag;
1672
1673 bp = getblk(mp->um_devvp,
1674 fs->fs_sblockloc >> (fs->fs_fshift - fs->fs_fsbtodb),
1675 (int)fs->fs_sbsize, 0, 0);
1676 saveflag = fs->fs_flags & FS_INTERNAL;
1677 fs->fs_flags &= ~FS_INTERNAL;
1678
1679 memcpy(bp->b_data, fs, fs->fs_sbsize);
1680
1681 ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1682 #ifdef FFS_EI
1683 if (mp->um_flags & UFS_NEEDSWAP)
1684 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
1685 #endif
1686 fs->fs_flags |= saveflag;
1687
1688 if (waitfor == MNT_WAIT)
1689 error = bwrite(bp);
1690 else
1691 bawrite(bp);
1692 return (error);
1693 }
1694
1695 int
1696 ffs_cgupdate(struct ufsmount *mp, int waitfor)
1697 {
1698 struct fs *fs = mp->um_fs;
1699 struct buf *bp;
1700 int blks;
1701 void *space;
1702 int i, size, error = 0, allerror = 0;
1703
1704 allerror = ffs_sbupdate(mp, waitfor);
1705 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1706 space = fs->fs_csp;
1707 for (i = 0; i < blks; i += fs->fs_frag) {
1708 size = fs->fs_bsize;
1709 if (i + fs->fs_frag > blks)
1710 size = (blks - i) * fs->fs_fsize;
1711 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1712 size, 0, 0);
1713 #ifdef FFS_EI
1714 if (mp->um_flags & UFS_NEEDSWAP)
1715 ffs_csum_swap((struct csum*)space,
1716 (struct csum*)bp->b_data, size);
1717 else
1718 #endif
1719 memcpy(bp->b_data, space, (u_int)size);
1720 space = (char *)space + size;
1721 if (waitfor == MNT_WAIT)
1722 error = bwrite(bp);
1723 else
1724 bawrite(bp);
1725 }
1726 if (!allerror && error)
1727 allerror = error;
1728 return (allerror);
1729 }
1730
1731 int
1732 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
1733 int attrnamespace, const char *attrname, struct lwp *l)
1734 {
1735 #ifdef UFS_EXTATTR
1736 /*
1737 * File-backed extended attributes are only supported on UFS1.
1738 * UFS2 has native extended attributes.
1739 */
1740 if (VFSTOUFS(mp)->um_fstype == UFS1)
1741 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname,
1742 l));
1743 #endif
1744 return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname, l));
1745 }
1746