ffs_vfsops.c revision 1.62 1 /* $NetBSD: ffs_vfsops.c,v 1.62 2000/04/04 09:23:20 jdolecek Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1991, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
36 */
37
38 #if defined(_KERNEL) && !defined(_LKM)
39 #include "opt_ffs.h"
40 #include "opt_quota.h"
41 #include "opt_compat_netbsd.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/kernel.h>
49 #include <sys/vnode.h>
50 #include <sys/socket.h>
51 #include <sys/mount.h>
52 #include <sys/buf.h>
53 #include <sys/device.h>
54 #include <sys/mbuf.h>
55 #include <sys/file.h>
56 #include <sys/disklabel.h>
57 #include <sys/ioctl.h>
58 #include <sys/errno.h>
59 #include <sys/malloc.h>
60 #include <sys/pool.h>
61 #include <sys/lock.h>
62 #include <vm/vm.h>
63 #include <sys/sysctl.h>
64
65 #include <miscfs/specfs/specdev.h>
66
67 #include <ufs/ufs/quota.h>
68 #include <ufs/ufs/ufsmount.h>
69 #include <ufs/ufs/inode.h>
70 #include <ufs/ufs/dir.h>
71 #include <ufs/ufs/ufs_extern.h>
72 #include <ufs/ufs/ufs_bswap.h>
73
74 #include <ufs/ffs/fs.h>
75 #include <ufs/ffs/ffs_extern.h>
76
77 /* how many times ffs_init() was called */
78 int ffs_initcount = 0;
79
80 extern struct lock ufs_hashlock;
81
82 extern struct vnodeopv_desc ffs_vnodeop_opv_desc;
83 extern struct vnodeopv_desc ffs_specop_opv_desc;
84 extern struct vnodeopv_desc ffs_fifoop_opv_desc;
85
86 struct vnodeopv_desc *ffs_vnodeopv_descs[] = {
87 &ffs_vnodeop_opv_desc,
88 &ffs_specop_opv_desc,
89 &ffs_fifoop_opv_desc,
90 NULL,
91 };
92
93 struct vfsops ffs_vfsops = {
94 MOUNT_FFS,
95 ffs_mount,
96 ufs_start,
97 ffs_unmount,
98 ufs_root,
99 ufs_quotactl,
100 ffs_statfs,
101 ffs_sync,
102 ffs_vget,
103 ffs_fhtovp,
104 ffs_vptofh,
105 ffs_init,
106 ffs_done,
107 ffs_sysctl,
108 ffs_mountroot,
109 ufs_check_export,
110 ffs_vnodeopv_descs,
111 };
112
113 struct pool ffs_inode_pool;
114
115 /*
116 * Called by main() when ffs is going to be mounted as root.
117 */
118
119 int
120 ffs_mountroot()
121 {
122 struct fs *fs;
123 struct mount *mp;
124 struct proc *p = curproc; /* XXX */
125 struct ufsmount *ump;
126 int error;
127
128 if (root_device->dv_class != DV_DISK)
129 return (ENODEV);
130
131 /*
132 * Get vnodes for rootdev.
133 */
134 if (bdevvp(rootdev, &rootvp))
135 panic("ffs_mountroot: can't setup bdevvp's");
136
137 if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
138 vrele(rootvp);
139 return (error);
140 }
141 if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
142 mp->mnt_op->vfs_refcount--;
143 vfs_unbusy(mp);
144 free(mp, M_MOUNT);
145 vrele(rootvp);
146 return (error);
147 }
148 simple_lock(&mountlist_slock);
149 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
150 simple_unlock(&mountlist_slock);
151 ump = VFSTOUFS(mp);
152 fs = ump->um_fs;
153 memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
154 (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
155 (void)ffs_statfs(mp, &mp->mnt_stat, p);
156 vfs_unbusy(mp);
157 inittodr(fs->fs_time);
158 return (0);
159 }
160
161 /*
162 * VFS Operations.
163 *
164 * mount system call
165 */
166 int
167 ffs_mount(mp, path, data, ndp, p)
168 struct mount *mp;
169 const char *path;
170 void *data;
171 struct nameidata *ndp;
172 struct proc *p;
173 {
174 struct vnode *devvp;
175 struct ufs_args args;
176 struct ufsmount *ump = NULL;
177 struct fs *fs;
178 size_t size;
179 int error, flags;
180 mode_t accessmode;
181
182 error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
183 if (error)
184 return (error);
185 /*
186 * If updating, check whether changing from read-only to
187 * read/write; if there is no device name, that's all we do.
188 */
189 if (mp->mnt_flag & MNT_UPDATE) {
190 ump = VFSTOUFS(mp);
191 fs = ump->um_fs;
192 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
193 flags = WRITECLOSE;
194 if (mp->mnt_flag & MNT_FORCE)
195 flags |= FORCECLOSE;
196 if (mp->mnt_flag & MNT_SOFTDEP)
197 error = softdep_flushfiles(mp, flags, p);
198 else
199 error = ffs_flushfiles(mp, flags, p);
200 if (error == 0 &&
201 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
202 fs->fs_clean & FS_WASCLEAN) {
203 fs->fs_clean = FS_ISCLEAN;
204 (void) ffs_sbupdate(ump, MNT_WAIT);
205 }
206 if (error)
207 return (error);
208 fs->fs_ronly = 1;
209 }
210 if (mp->mnt_flag & MNT_RELOAD) {
211 error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
212 if (error)
213 return (error);
214 }
215 if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
216 /*
217 * If upgrade to read-write by non-root, then verify
218 * that user has necessary permissions on the device.
219 */
220 devvp = ump->um_devvp;
221 if (p->p_ucred->cr_uid != 0) {
222 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
223 error = VOP_ACCESS(devvp, VREAD | VWRITE,
224 p->p_ucred, p);
225 VOP_UNLOCK(devvp, 0);
226 if (error)
227 return (error);
228 }
229 fs->fs_ronly = 0;
230 fs->fs_clean <<= 1;
231 fs->fs_fmod = 1;
232 if ((fs->fs_flags & FS_DOSOFTDEP)) {
233 error = softdep_mount(devvp, mp, fs,
234 p->p_ucred);
235 if (error)
236 return (error);
237 } else
238 mp->mnt_flag &= ~MNT_SOFTDEP;
239 }
240 if (args.fspec == 0) {
241 /*
242 * Process export requests.
243 */
244 return (vfs_export(mp, &ump->um_export, &args.export));
245 }
246 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
247 (MNT_SOFTDEP | MNT_ASYNC)) {
248 printf("%s fs uses soft updates, ignoring async mode\n",
249 fs->fs_fsmnt);
250 mp->mnt_flag &= ~MNT_ASYNC;
251 }
252 }
253 /*
254 * Not an update, or updating the name: look up the name
255 * and verify that it refers to a sensible block device.
256 */
257 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
258 if ((error = namei(ndp)) != 0)
259 return (error);
260 devvp = ndp->ni_vp;
261
262 if (devvp->v_type != VBLK) {
263 vrele(devvp);
264 return (ENOTBLK);
265 }
266 if (major(devvp->v_rdev) >= nblkdev) {
267 vrele(devvp);
268 return (ENXIO);
269 }
270 /*
271 * If mount by non-root, then verify that user has necessary
272 * permissions on the device.
273 */
274 if (p->p_ucred->cr_uid != 0) {
275 accessmode = VREAD;
276 if ((mp->mnt_flag & MNT_RDONLY) == 0)
277 accessmode |= VWRITE;
278 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
279 error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
280 VOP_UNLOCK(devvp, 0);
281 if (error) {
282 vrele(devvp);
283 return (error);
284 }
285 }
286 if ((mp->mnt_flag & MNT_UPDATE) == 0) {
287 error = ffs_mountfs(devvp, mp, p);
288 if (!error) {
289 ump = VFSTOUFS(mp);
290 fs = ump->um_fs;
291 if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
292 (MNT_SOFTDEP | MNT_ASYNC)) {
293 printf("%s fs uses soft updates, "
294 "ignoring async mode\n",
295 fs->fs_fsmnt);
296 mp->mnt_flag &= ~MNT_ASYNC;
297 }
298 }
299 }
300 else {
301 if (devvp != ump->um_devvp)
302 error = EINVAL; /* needs translation */
303 else
304 vrele(devvp);
305 }
306 if (error) {
307 vrele(devvp);
308 return (error);
309 }
310 (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
311 memset(fs->fs_fsmnt + size, 0, sizeof(fs->fs_fsmnt) - size);
312 memcpy(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN);
313 (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
314 &size);
315 memset(mp->mnt_stat.f_mntfromname + size, 0, MNAMELEN - size);
316 if (fs->fs_fmod != 0) { /* XXX */
317 fs->fs_fmod = 0;
318 if (fs->fs_clean & FS_WASCLEAN)
319 fs->fs_time = time.tv_sec;
320 else
321 printf("%s: file system not clean (fs_flags=%x); please fsck(8)\n",
322 mp->mnt_stat.f_mntfromname, fs->fs_clean);
323 (void) ffs_cgupdate(ump, MNT_WAIT);
324 }
325 return (0);
326 }
327
328 /*
329 * Reload all incore data for a filesystem (used after running fsck on
330 * the root filesystem and finding things to fix). The filesystem must
331 * be mounted read-only.
332 *
333 * Things to do to update the mount:
334 * 1) invalidate all cached meta-data.
335 * 2) re-read superblock from disk.
336 * 3) re-read summary information from disk.
337 * 4) invalidate all inactive vnodes.
338 * 5) invalidate all cached file data.
339 * 6) re-read inode data for all active vnodes.
340 */
341 int
342 ffs_reload(mountp, cred, p)
343 struct mount *mountp;
344 struct ucred *cred;
345 struct proc *p;
346 {
347 struct vnode *vp, *nvp, *devvp;
348 struct inode *ip;
349 struct buf *bp;
350 struct fs *fs, *newfs;
351 struct partinfo dpart;
352 int i, blks, size, error;
353 int32_t *lp;
354 caddr_t cp;
355
356 if ((mountp->mnt_flag & MNT_RDONLY) == 0)
357 return (EINVAL);
358 /*
359 * Step 1: invalidate all cached meta-data.
360 */
361 devvp = VFSTOUFS(mountp)->um_devvp;
362 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
363 error = vinvalbuf(devvp, 0, cred, p, 0, 0);
364 VOP_UNLOCK(devvp, 0);
365 if (error)
366 panic("ffs_reload: dirty1");
367 /*
368 * Step 2: re-read superblock from disk.
369 */
370 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
371 size = DEV_BSIZE;
372 else
373 size = dpart.disklab->d_secsize;
374 error = bread(devvp, (ufs_daddr_t)(SBOFF / size), SBSIZE, NOCRED, &bp);
375 if (error) {
376 brelse(bp);
377 return (error);
378 }
379 fs = VFSTOUFS(mountp)->um_fs;
380 newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK);
381 memcpy(newfs, bp->b_data, fs->fs_sbsize);
382 #ifdef FFS_EI
383 if (VFSTOUFS(mountp)->um_flags & UFS_NEEDSWAP) {
384 ffs_sb_swap((struct fs*)bp->b_data, newfs, 0);
385 fs->fs_flags |= FS_SWAPPED;
386 }
387 #endif
388 if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
389 newfs->fs_bsize < sizeof(struct fs)) {
390 brelse(bp);
391 free(newfs, M_UFSMNT);
392 return (EIO); /* XXX needs translation */
393 }
394 /*
395 * Copy pointer fields back into superblock before copying in XXX
396 * new superblock. These should really be in the ufsmount. XXX
397 * Note that important parameters (eg fs_ncg) are unchanged.
398 */
399 memcpy(&newfs->fs_csp[0], &fs->fs_csp[0], sizeof(fs->fs_csp));
400 newfs->fs_maxcluster = fs->fs_maxcluster;
401 memcpy(fs, newfs, (u_int)fs->fs_sbsize);
402 if (fs->fs_sbsize < SBSIZE)
403 bp->b_flags |= B_INVAL;
404 brelse(bp);
405 free(newfs, M_UFSMNT);
406 mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
407 ffs_oldfscompat(fs);
408 ffs_statfs(mountp, &mountp->mnt_stat, p);
409 /*
410 * Step 3: re-read summary information from disk.
411 */
412 blks = howmany(fs->fs_cssize, fs->fs_fsize);
413 for (i = 0; i < blks; i += fs->fs_frag) {
414 size = fs->fs_bsize;
415 if (i + fs->fs_frag > blks)
416 size = (blks - i) * fs->fs_fsize;
417 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
418 NOCRED, &bp);
419 if (error) {
420 brelse(bp);
421 return (error);
422 }
423 #ifdef FFS_EI
424 if (UFS_FSNEEDSWAP(fs))
425 ffs_csum_swap((struct csum*)bp->b_data,
426 (struct csum*)fs->fs_csp[fragstoblks(fs, i)], size);
427 else
428 #endif
429 memcpy(fs->fs_csp[fragstoblks(fs, i)], bp->b_data,
430 (size_t)size);
431 brelse(bp);
432 }
433 if ((fs->fs_flags & FS_DOSOFTDEP))
434 softdep_mount(devvp, mountp, fs, cred);
435 else
436 mountp->mnt_flag &= ~MNT_SOFTDEP;
437 /*
438 * We no longer know anything about clusters per cylinder group.
439 */
440 if (fs->fs_contigsumsize > 0) {
441 lp = fs->fs_maxcluster;
442 for (i = 0; i < fs->fs_ncg; i++)
443 *lp++ = fs->fs_contigsumsize;
444 }
445
446 loop:
447 simple_lock(&mntvnode_slock);
448 for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
449 if (vp->v_mount != mountp) {
450 simple_unlock(&mntvnode_slock);
451 goto loop;
452 }
453 nvp = vp->v_mntvnodes.le_next;
454 /*
455 * Step 4: invalidate all inactive vnodes.
456 */
457 if (vrecycle(vp, &mntvnode_slock, p))
458 goto loop;
459 /*
460 * Step 5: invalidate all cached file data.
461 */
462 simple_lock(&vp->v_interlock);
463 simple_unlock(&mntvnode_slock);
464 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
465 goto loop;
466 if (vinvalbuf(vp, 0, cred, p, 0, 0))
467 panic("ffs_reload: dirty2");
468 /*
469 * Step 6: re-read inode data for all active vnodes.
470 */
471 ip = VTOI(vp);
472 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
473 (int)fs->fs_bsize, NOCRED, &bp);
474 if (error) {
475 brelse(bp);
476 vput(vp);
477 return (error);
478 }
479 cp = (caddr_t)bp->b_data +
480 (ino_to_fsbo(fs, ip->i_number) * DINODE_SIZE);
481 #ifdef FFS_EI
482 if (UFS_FSNEEDSWAP(fs))
483 ffs_dinode_swap((struct dinode *)cp,
484 &ip->i_din.ffs_din);
485 else
486 #endif
487 memcpy(&ip->i_din.ffs_din, cp, DINODE_SIZE);
488 ip->i_ffs_effnlink = ip->i_ffs_nlink;
489 brelse(bp);
490 vput(vp);
491 simple_lock(&mntvnode_slock);
492 }
493 simple_unlock(&mntvnode_slock);
494 return (0);
495 }
496
497 /*
498 * Common code for mount and mountroot
499 */
500 int
501 ffs_mountfs(devvp, mp, p)
502 struct vnode *devvp;
503 struct mount *mp;
504 struct proc *p;
505 {
506 struct ufsmount *ump;
507 struct buf *bp;
508 struct fs *fs;
509 dev_t dev;
510 struct partinfo dpart;
511 caddr_t base, space;
512 int blks;
513 int error, i, size, ronly;
514 #ifdef FFS_EI
515 int needswap;
516 #endif
517 int32_t *lp;
518 struct ucred *cred;
519 u_int64_t maxfilesize; /* XXX */
520 u_int32_t sbsize;
521
522 dev = devvp->v_rdev;
523 cred = p ? p->p_ucred : NOCRED;
524 /*
525 * Disallow multiple mounts of the same device.
526 * Disallow mounting of a device that is currently in use
527 * (except for root, which might share swap device for miniroot).
528 * Flush out any old buffers remaining from a previous use.
529 */
530 if ((error = vfs_mountedon(devvp)) != 0)
531 return (error);
532 if (vcount(devvp) > 1 && devvp != rootvp)
533 return (EBUSY);
534 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
535 error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0);
536 VOP_UNLOCK(devvp, 0);
537 if (error)
538 return (error);
539
540 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
541 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
542 if (error)
543 return (error);
544 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
545 size = DEV_BSIZE;
546 else
547 size = dpart.disklab->d_secsize;
548
549 bp = NULL;
550 ump = NULL;
551 error = bread(devvp, (ufs_daddr_t)(SBOFF / size), SBSIZE, cred, &bp);
552 if (error)
553 goto out;
554
555 fs = (struct fs*)bp->b_data;
556 if (fs->fs_magic == FS_MAGIC) {
557 sbsize = fs->fs_sbsize;
558 #ifdef FFS_EI
559 needswap = 0;
560 } else if (fs->fs_magic == bswap32(FS_MAGIC)) {
561 sbsize = bswap32(fs->fs_sbsize);
562 needswap = 1;
563 #endif
564 } else {
565 error = EINVAL;
566 goto out;
567 }
568 if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs)) {
569 error = EINVAL;
570 goto out;
571 }
572
573 fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
574 memcpy(fs, bp->b_data, sbsize);
575 #ifdef FFS_EI
576 if (needswap) {
577 ffs_sb_swap((struct fs*)bp->b_data, fs, 0);
578 fs->fs_flags |= FS_SWAPPED;
579 }
580 #endif
581 ffs_oldfscompat(fs);
582
583 if (fs->fs_bsize > MAXBSIZE || fs->fs_bsize < sizeof(struct fs)) {
584 error = EINVAL;
585 goto out;
586 }
587 /* make sure cylinder group summary area is a reasonable size. */
588 if (fs->fs_cgsize == 0 || fs->fs_cpg == 0 ||
589 fs->fs_ncg > fs->fs_ncyl / fs->fs_cpg + 1 ||
590 fs->fs_cssize >
591 fragroundup(fs, fs->fs_ncg * sizeof(struct csum))) {
592 error = EINVAL; /* XXX needs translation */
593 goto out2;
594 }
595 /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
596 if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
597 error = EROFS; /* XXX what should be returned? */
598 goto out2;
599 }
600
601 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
602 memset((caddr_t)ump, 0, sizeof *ump);
603 ump->um_fs = fs;
604 if (fs->fs_sbsize < SBSIZE)
605 bp->b_flags |= B_INVAL;
606 brelse(bp);
607 bp = NULL;
608 fs->fs_ronly = ronly;
609 if (ronly == 0) {
610 fs->fs_clean <<= 1;
611 fs->fs_fmod = 1;
612 }
613 size = fs->fs_cssize;
614 blks = howmany(size, fs->fs_fsize);
615 if (fs->fs_contigsumsize > 0)
616 size += fs->fs_ncg * sizeof(int32_t);
617 base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
618 for (i = 0; i < blks; i += fs->fs_frag) {
619 size = fs->fs_bsize;
620 if (i + fs->fs_frag > blks)
621 size = (blks - i) * fs->fs_fsize;
622 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
623 cred, &bp);
624 if (error) {
625 free(base, M_UFSMNT);
626 goto out2;
627 }
628 #ifdef FFS_EI
629 if (needswap)
630 ffs_csum_swap((struct csum*)bp->b_data,
631 (struct csum*)space, size);
632 else
633 #endif
634 memcpy(space, bp->b_data, (u_int)size);
635
636 fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
637 space += size;
638 brelse(bp);
639 bp = NULL;
640 }
641 if (fs->fs_contigsumsize > 0) {
642 fs->fs_maxcluster = lp = (int32_t *)space;
643 for (i = 0; i < fs->fs_ncg; i++)
644 *lp++ = fs->fs_contigsumsize;
645 }
646 mp->mnt_data = (qaddr_t)ump;
647 mp->mnt_stat.f_fsid.val[0] = (long)dev;
648 mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_FFS);
649 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
650 mp->mnt_flag |= MNT_LOCAL;
651 #ifdef FFS_EI
652 if (needswap)
653 ump->um_flags |= UFS_NEEDSWAP;
654 #endif
655 ump->um_mountp = mp;
656 ump->um_dev = dev;
657 ump->um_devvp = devvp;
658 ump->um_nindir = fs->fs_nindir;
659 ump->um_bptrtodb = fs->fs_fsbtodb;
660 ump->um_seqinc = fs->fs_frag;
661 for (i = 0; i < MAXQUOTAS; i++)
662 ump->um_quotas[i] = NULLVP;
663 devvp->v_specmountpoint = mp;
664 ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */
665 maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1; /* XXX */
666 if (fs->fs_maxfilesize > maxfilesize) /* XXX */
667 fs->fs_maxfilesize = maxfilesize; /* XXX */
668 if (ronly == 0 && (fs->fs_flags & FS_DOSOFTDEP)) {
669 error = softdep_mount(devvp, mp, fs, cred);
670 if (error) {
671 free(base, M_UFSMNT);
672 goto out;
673 }
674 }
675 return (0);
676 out2:
677 free(fs, M_UFSMNT);
678 out:
679 devvp->v_specmountpoint = NULL;
680 if (bp)
681 brelse(bp);
682 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
683 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
684 VOP_UNLOCK(devvp, 0);
685 if (ump) {
686 free(ump, M_UFSMNT);
687 mp->mnt_data = (qaddr_t)0;
688 }
689 return (error);
690 }
691
692 /*
693 * Sanity checks for old file systems.
694 *
695 * XXX - goes away some day.
696 */
697 int
698 ffs_oldfscompat(fs)
699 struct fs *fs;
700 {
701 int i;
702
703 fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */
704 fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */
705 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
706 fs->fs_nrpos = 8; /* XXX */
707 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
708 u_int64_t sizepb = fs->fs_bsize; /* XXX */
709 /* XXX */
710 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */
711 for (i = 0; i < NIADDR; i++) { /* XXX */
712 sizepb *= NINDIR(fs); /* XXX */
713 fs->fs_maxfilesize += sizepb; /* XXX */
714 } /* XXX */
715 fs->fs_qbmask = ~fs->fs_bmask; /* XXX */
716 fs->fs_qfmask = ~fs->fs_fmask; /* XXX */
717 } /* XXX */
718 return (0);
719 }
720
721 /*
722 * unmount system call
723 */
724 int
725 ffs_unmount(mp, mntflags, p)
726 struct mount *mp;
727 int mntflags;
728 struct proc *p;
729 {
730 struct ufsmount *ump;
731 struct fs *fs;
732 int error, flags;
733
734 flags = 0;
735 if (mntflags & MNT_FORCE)
736 flags |= FORCECLOSE;
737 if (mp->mnt_flag & MNT_SOFTDEP) {
738 if ((error = softdep_flushfiles(mp, flags, p)) != 0)
739 return (error);
740 } else {
741 if ((error = ffs_flushfiles(mp, flags, p)) != 0)
742 return (error);
743 }
744 ump = VFSTOUFS(mp);
745 fs = ump->um_fs;
746 if (fs->fs_ronly == 0 &&
747 ffs_cgupdate(ump, MNT_WAIT) == 0 &&
748 fs->fs_clean & FS_WASCLEAN) {
749 fs->fs_clean = FS_ISCLEAN;
750 (void) ffs_sbupdate(ump, MNT_WAIT);
751 }
752 if (ump->um_devvp->v_type != VBAD)
753 ump->um_devvp->v_specmountpoint = NULL;
754 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
755 error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
756 NOCRED, p);
757 vput(ump->um_devvp);
758 free(fs->fs_csp[0], M_UFSMNT);
759 free(fs, M_UFSMNT);
760 free(ump, M_UFSMNT);
761 mp->mnt_data = (qaddr_t)0;
762 mp->mnt_flag &= ~MNT_LOCAL;
763 return (error);
764 }
765
766 /*
767 * Flush out all the files in a filesystem.
768 */
769 int
770 ffs_flushfiles(mp, flags, p)
771 struct mount *mp;
772 int flags;
773 struct proc *p;
774 {
775 extern int doforce;
776 struct ufsmount *ump;
777 int error;
778
779 if (!doforce)
780 flags &= ~FORCECLOSE;
781 ump = VFSTOUFS(mp);
782 #ifdef QUOTA
783 if (mp->mnt_flag & MNT_QUOTA) {
784 int i;
785 if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0)
786 return (error);
787 for (i = 0; i < MAXQUOTAS; i++) {
788 if (ump->um_quotas[i] == NULLVP)
789 continue;
790 quotaoff(p, mp, i);
791 }
792 /*
793 * Here we fall through to vflush again to ensure
794 * that we have gotten rid of all the system vnodes.
795 */
796 }
797 #endif
798 /*
799 * Flush all the files.
800 */
801 error = vflush(mp, NULLVP, flags);
802 if (error)
803 return (error);
804 /*
805 * Flush filesystem metadata.
806 */
807 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
808 error = VOP_FSYNC(ump->um_devvp, p->p_ucred, FSYNC_WAIT, p);
809 VOP_UNLOCK(ump->um_devvp, 0);
810 return (error);
811 }
812
813 /*
814 * Get file system statistics.
815 */
816 int
817 ffs_statfs(mp, sbp, p)
818 struct mount *mp;
819 struct statfs *sbp;
820 struct proc *p;
821 {
822 struct ufsmount *ump;
823 struct fs *fs;
824
825 ump = VFSTOUFS(mp);
826 fs = ump->um_fs;
827 if (fs->fs_magic != FS_MAGIC)
828 panic("ffs_statfs");
829 #ifdef COMPAT_09
830 sbp->f_type = 1;
831 #else
832 sbp->f_type = 0;
833 #endif
834 sbp->f_bsize = fs->fs_fsize;
835 sbp->f_iosize = fs->fs_bsize;
836 sbp->f_blocks = fs->fs_dsize;
837 sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
838 fs->fs_cstotal.cs_nffree;
839 sbp->f_bavail = (long) (((u_int64_t) fs->fs_dsize * (u_int64_t)
840 (100 - fs->fs_minfree) / (u_int64_t) 100) -
841 (u_int64_t) (fs->fs_dsize - sbp->f_bfree));
842 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
843 sbp->f_ffree = fs->fs_cstotal.cs_nifree;
844 if (sbp != &mp->mnt_stat) {
845 memcpy(sbp->f_mntonname, mp->mnt_stat.f_mntonname, MNAMELEN);
846 memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, MNAMELEN);
847 }
848 strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN);
849 return (0);
850 }
851
852 /*
853 * Go through the disk queues to initiate sandbagged IO;
854 * go through the inodes to write those that have been modified;
855 * initiate the writing of the super block if it has been modified.
856 *
857 * Note: we are always called with the filesystem marked `MPBUSY'.
858 */
859 int
860 ffs_sync(mp, waitfor, cred, p)
861 struct mount *mp;
862 int waitfor;
863 struct ucred *cred;
864 struct proc *p;
865 {
866 struct vnode *vp, *nvp;
867 struct inode *ip;
868 struct ufsmount *ump = VFSTOUFS(mp);
869 struct fs *fs;
870 int error, allerror = 0;
871
872 fs = ump->um_fs;
873 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
874 printf("fs = %s\n", fs->fs_fsmnt);
875 panic("update: rofs mod");
876 }
877 /*
878 * Write back each (modified) inode.
879 */
880 simple_lock(&mntvnode_slock);
881 loop:
882 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
883 /*
884 * If the vnode that we are about to sync is no longer
885 * associated with this mount point, start over.
886 */
887 if (vp->v_mount != mp)
888 goto loop;
889 simple_lock(&vp->v_interlock);
890 nvp = vp->v_mntvnodes.le_next;
891 ip = VTOI(vp);
892 if (vp->v_type == VNON ||
893 ((ip->i_flag &
894 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
895 vp->v_dirtyblkhd.lh_first == NULL))
896 {
897 simple_unlock(&vp->v_interlock);
898 continue;
899 }
900 simple_unlock(&mntvnode_slock);
901 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
902 if (error) {
903 simple_lock(&mntvnode_slock);
904 if (error == ENOENT)
905 goto loop;
906 continue;
907 }
908 if ((error = VOP_FSYNC(vp, cred,
909 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, p)) != 0)
910 allerror = error;
911 vput(vp);
912 simple_lock(&mntvnode_slock);
913 }
914 simple_unlock(&mntvnode_slock);
915 /*
916 * Force stale file system control information to be flushed.
917 */
918 if (waitfor != MNT_LAZY) {
919 if (ump->um_mountp->mnt_flag & MNT_SOFTDEP)
920 waitfor = MNT_NOWAIT;
921 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
922 if ((error = VOP_FSYNC(ump->um_devvp, cred,
923 waitfor == MNT_WAIT ? FSYNC_WAIT : 0, p)) != 0)
924 allerror = error;
925 VOP_UNLOCK(ump->um_devvp, 0);
926 }
927 #ifdef QUOTA
928 qsync(mp);
929 #endif
930 /*
931 * Write back modified superblock.
932 */
933 if (fs->fs_fmod != 0) {
934 fs->fs_fmod = 0;
935 fs->fs_time = time.tv_sec;
936 allerror = ffs_cgupdate(ump, waitfor);
937 }
938 return (allerror);
939 }
940
941 /*
942 * Look up a FFS dinode number to find its incore vnode, otherwise read it
943 * in from disk. If it is in core, wait for the lock bit to clear, then
944 * return the inode locked. Detection and handling of mount points must be
945 * done by the calling routine.
946 */
947 int
948 ffs_vget(mp, ino, vpp)
949 struct mount *mp;
950 ino_t ino;
951 struct vnode **vpp;
952 {
953 struct fs *fs;
954 struct inode *ip;
955 struct ufsmount *ump;
956 struct buf *bp;
957 struct vnode *vp;
958 dev_t dev;
959 int error;
960 caddr_t cp;
961
962 ump = VFSTOUFS(mp);
963 dev = ump->um_dev;
964 do {
965 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
966 return (0);
967 } while (lockmgr(&ufs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0));
968
969 /* Allocate a new vnode/inode. */
970 if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) {
971 *vpp = NULL;
972 lockmgr(&ufs_hashlock, LK_RELEASE, 0);
973 return (error);
974 }
975 /*
976 * XXX MFS ends up here, too, to allocate an inode. Should we
977 * XXX create another pool for MFS inodes?
978 */
979 ip = pool_get(&ffs_inode_pool, PR_WAITOK);
980 memset((caddr_t)ip, 0, sizeof(struct inode));
981 vp->v_data = ip;
982 ip->i_vnode = vp;
983 ip->i_fs = fs = ump->um_fs;
984 ip->i_dev = dev;
985 ip->i_number = ino;
986 #ifdef QUOTA
987 {
988 int i;
989
990 for (i = 0; i < MAXQUOTAS; i++)
991 ip->i_dquot[i] = NODQUOT;
992 }
993 #endif
994 /*
995 * Put it onto its hash chain and lock it so that other requests for
996 * this inode will block if they arrive while we are sleeping waiting
997 * for old data structures to be purged or for the contents of the
998 * disk portion of this inode to be read.
999 */
1000 ufs_ihashins(ip);
1001 lockmgr(&ufs_hashlock, LK_RELEASE, 0);
1002
1003 /* Read in the disk contents for the inode, copy into the inode. */
1004 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1005 (int)fs->fs_bsize, NOCRED, &bp);
1006 if (error) {
1007 /*
1008 * The inode does not contain anything useful, so it would
1009 * be misleading to leave it on its hash chain. With mode
1010 * still zero, it will be unlinked and returned to the free
1011 * list by vput().
1012 */
1013 vput(vp);
1014 brelse(bp);
1015 *vpp = NULL;
1016 return (error);
1017 }
1018 cp = (caddr_t)bp->b_data + (ino_to_fsbo(fs, ino) * DINODE_SIZE);
1019 #ifdef FFS_EI
1020 if (UFS_FSNEEDSWAP(fs))
1021 ffs_dinode_swap((struct dinode *)cp, &ip->i_din.ffs_din);
1022 else
1023 #endif
1024 memcpy(&ip->i_din.ffs_din, cp, DINODE_SIZE);
1025 if (DOINGSOFTDEP(vp))
1026 softdep_load_inodeblock(ip);
1027 else
1028 ip->i_ffs_effnlink = ip->i_ffs_nlink;
1029 brelse(bp);
1030
1031 /*
1032 * Initialize the vnode from the inode, check for aliases.
1033 * Note that the underlying vnode may have changed.
1034 */
1035 error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1036 if (error) {
1037 vput(vp);
1038 *vpp = NULL;
1039 return (error);
1040 }
1041 /*
1042 * Finish inode initialization now that aliasing has been resolved.
1043 */
1044 ip->i_devvp = ump->um_devvp;
1045 VREF(ip->i_devvp);
1046 /*
1047 * Ensure that uid and gid are correct. This is a temporary
1048 * fix until fsck has been changed to do the update.
1049 */
1050 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1051 ip->i_ffs_uid = ip->i_din.ffs_din.di_ouid; /* XXX */
1052 ip->i_ffs_gid = ip->i_din.ffs_din.di_ogid; /* XXX */
1053 } /* XXX */
1054
1055 *vpp = vp;
1056 return (0);
1057 }
1058
1059 /*
1060 * File handle to vnode
1061 *
1062 * Have to be really careful about stale file handles:
1063 * - check that the inode number is valid
1064 * - call ffs_vget() to get the locked inode
1065 * - check for an unallocated inode (i_mode == 0)
1066 * - check that the given client host has export rights and return
1067 * those rights via. exflagsp and credanonp
1068 */
1069 int
1070 ffs_fhtovp(mp, fhp, vpp)
1071 struct mount *mp;
1072 struct fid *fhp;
1073 struct vnode **vpp;
1074 {
1075 struct ufid *ufhp;
1076 struct fs *fs;
1077
1078 ufhp = (struct ufid *)fhp;
1079 fs = VFSTOUFS(mp)->um_fs;
1080 if (ufhp->ufid_ino < ROOTINO ||
1081 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1082 return (ESTALE);
1083 return (ufs_fhtovp(mp, ufhp, vpp));
1084 }
1085
1086 /*
1087 * Vnode pointer to File handle
1088 */
1089 /* ARGSUSED */
1090 int
1091 ffs_vptofh(vp, fhp)
1092 struct vnode *vp;
1093 struct fid *fhp;
1094 {
1095 struct inode *ip;
1096 struct ufid *ufhp;
1097
1098 ip = VTOI(vp);
1099 ufhp = (struct ufid *)fhp;
1100 ufhp->ufid_len = sizeof(struct ufid);
1101 ufhp->ufid_ino = ip->i_number;
1102 ufhp->ufid_gen = ip->i_ffs_gen;
1103 return (0);
1104 }
1105
1106 void
1107 ffs_init()
1108 {
1109 if (ffs_initcount++ > 0)
1110 return;
1111
1112 softdep_initialize();
1113 ufs_init();
1114
1115 pool_init(&ffs_inode_pool, sizeof(struct inode), 0, 0, 0, "ffsinopl",
1116 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FFSNODE);
1117 }
1118
1119 void
1120 ffs_done()
1121 {
1122 if (--ffs_initcount > 0)
1123 return;
1124
1125 /* XXX softdep cleanup ? */
1126 ufs_done();
1127 pool_destroy(&ffs_inode_pool);
1128 }
1129
1130 int
1131 ffs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1132 int *name;
1133 u_int namelen;
1134 void *oldp;
1135 size_t *oldlenp;
1136 void *newp;
1137 size_t newlen;
1138 struct proc *p;
1139 {
1140 extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
1141 extern int ffs_log_changeopt;
1142
1143 /* all sysctl names at this level are terminal */
1144 if (namelen != 1)
1145 return (ENOTDIR); /* overloaded */
1146
1147 switch (name[0]) {
1148 case FFS_CLUSTERREAD:
1149 return (sysctl_int(oldp, oldlenp, newp, newlen,
1150 &doclusterread));
1151 case FFS_CLUSTERWRITE:
1152 return (sysctl_int(oldp, oldlenp, newp, newlen,
1153 &doclusterwrite));
1154 case FFS_REALLOCBLKS:
1155 return (sysctl_int(oldp, oldlenp, newp, newlen,
1156 &doreallocblks));
1157 case FFS_ASYNCFREE:
1158 return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
1159 case FFS_LOG_CHANGEOPT:
1160 return (sysctl_int(oldp, oldlenp, newp, newlen,
1161 &ffs_log_changeopt));
1162 default:
1163 return (EOPNOTSUPP);
1164 }
1165 /* NOTREACHED */
1166 }
1167
1168 /*
1169 * Write a superblock and associated information back to disk.
1170 */
1171 int
1172 ffs_sbupdate(mp, waitfor)
1173 struct ufsmount *mp;
1174 int waitfor;
1175 {
1176 struct fs *fs = mp->um_fs;
1177 struct buf *bp;
1178 int i, error = 0;
1179 int32_t saved_nrpos = fs->fs_nrpos;
1180 int64_t saved_qbmask = fs->fs_qbmask;
1181 int64_t saved_qfmask = fs->fs_qfmask;
1182 u_int64_t saved_maxfilesize = fs->fs_maxfilesize;
1183 u_int8_t saveflag;
1184
1185 /* Restore compatibility to old file systems. XXX */
1186 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
1187 fs->fs_nrpos = -1; /* XXX */
1188 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1189 int32_t *lp, tmp; /* XXX */
1190 /* XXX */
1191 lp = (int32_t *)&fs->fs_qbmask; /* XXX nuke qfmask too */
1192 tmp = lp[4]; /* XXX */
1193 for (i = 4; i > 0; i--) /* XXX */
1194 lp[i] = lp[i-1]; /* XXX */
1195 lp[0] = tmp; /* XXX */
1196 } /* XXX */
1197 fs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */
1198
1199 bp = getblk(mp->um_devvp, SBOFF >> (fs->fs_fshift - fs->fs_fsbtodb),
1200 (int)fs->fs_sbsize, 0, 0);
1201 saveflag = fs->fs_flags & FS_INTERNAL;
1202 fs->fs_flags &= ~FS_INTERNAL;
1203 memcpy(bp->b_data, fs, fs->fs_sbsize);
1204 #ifdef FFS_EI
1205 if (mp->um_flags & UFS_NEEDSWAP)
1206 ffs_sb_swap(fs, (struct fs*)bp->b_data, 1);
1207 #endif
1208
1209 fs->fs_flags |= saveflag;
1210 fs->fs_nrpos = saved_nrpos; /* XXX */
1211 fs->fs_qbmask = saved_qbmask; /* XXX */
1212 fs->fs_qfmask = saved_qfmask; /* XXX */
1213 fs->fs_maxfilesize = saved_maxfilesize; /* XXX */
1214
1215 if (waitfor == MNT_WAIT)
1216 error = bwrite(bp);
1217 else
1218 bawrite(bp);
1219 return (error);
1220 }
1221
1222 int
1223 ffs_cgupdate(mp, waitfor)
1224 struct ufsmount *mp;
1225 int waitfor;
1226 {
1227 struct fs *fs = mp->um_fs;
1228 struct buf *bp;
1229 int blks;
1230 caddr_t space;
1231 int i, size, error = 0, allerror = 0;
1232
1233 allerror = ffs_sbupdate(mp, waitfor);
1234 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1235 space = (caddr_t)fs->fs_csp[0];
1236 for (i = 0; i < blks; i += fs->fs_frag) {
1237 size = fs->fs_bsize;
1238 if (i + fs->fs_frag > blks)
1239 size = (blks - i) * fs->fs_fsize;
1240 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1241 size, 0, 0);
1242 #ifdef FFS_EI
1243 if (mp->um_flags & UFS_NEEDSWAP)
1244 ffs_csum_swap((struct csum*)space,
1245 (struct csum*)bp->b_data, size);
1246 else
1247 #endif
1248 memcpy(bp->b_data, space, (u_int)size);
1249 space += size;
1250 if (waitfor == MNT_WAIT)
1251 error = bwrite(bp);
1252 else
1253 bawrite(bp);
1254 }
1255 if (!allerror && error)
1256 allerror = error;
1257 return (allerror);
1258 }
1259