lfs_vfsops.c revision 1.71 1 /* $NetBSD: lfs_vfsops.c,v 1.71 2001/12/18 07:51:18 chs Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38 /*-
39 * Copyright (c) 1989, 1991, 1993, 1994
40 * The Regents of the University of California. All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 * must display the following acknowledgement:
52 * This product includes software developed by the University of
53 * California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)lfs_vfsops.c 8.20 (Berkeley) 6/10/95
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.71 2001/12/18 07:51:18 chs Exp $");
75
76 #if defined(_KERNEL_OPT)
77 #include "opt_quota.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/proc.h>
84 #include <sys/kernel.h>
85 #include <sys/vnode.h>
86 #include <sys/mount.h>
87 #include <sys/buf.h>
88 #include <sys/device.h>
89 #include <sys/mbuf.h>
90 #include <sys/file.h>
91 #include <sys/disklabel.h>
92 #include <sys/ioctl.h>
93 #include <sys/errno.h>
94 #include <sys/malloc.h>
95 #include <sys/pool.h>
96 #include <sys/socket.h>
97 #include <uvm/uvm_extern.h>
98 #include <sys/sysctl.h>
99
100 #include <miscfs/specfs/specdev.h>
101
102 #include <ufs/ufs/quota.h>
103 #include <ufs/ufs/inode.h>
104 #include <ufs/ufs/ufsmount.h>
105 #include <ufs/ufs/ufs_extern.h>
106
107 #include <ufs/lfs/lfs.h>
108 #include <ufs/lfs/lfs_extern.h>
109
110 int lfs_mountfs(struct vnode *, struct mount *, struct proc *);
111
112 extern const struct vnodeopv_desc lfs_vnodeop_opv_desc;
113 extern const struct vnodeopv_desc lfs_specop_opv_desc;
114 extern const struct vnodeopv_desc lfs_fifoop_opv_desc;
115
116 const struct vnodeopv_desc * const lfs_vnodeopv_descs[] = {
117 &lfs_vnodeop_opv_desc,
118 &lfs_specop_opv_desc,
119 &lfs_fifoop_opv_desc,
120 NULL,
121 };
122
123 struct vfsops lfs_vfsops = {
124 MOUNT_LFS,
125 lfs_mount,
126 ufs_start,
127 lfs_unmount,
128 ufs_root,
129 ufs_quotactl,
130 lfs_statfs,
131 lfs_sync,
132 lfs_vget,
133 lfs_fhtovp,
134 lfs_vptofh,
135 lfs_init,
136 lfs_reinit,
137 lfs_done,
138 lfs_sysctl,
139 lfs_mountroot,
140 ufs_check_export,
141 lfs_vnodeopv_descs,
142 };
143
144 struct genfs_ops lfs_genfsops = {
145 NULL,
146 NULL,
147 genfs_compat_gop_write,
148 };
149
150 struct pool lfs_inode_pool;
151
152 extern int locked_queue_count;
153 extern long locked_queue_bytes;
154
155 /*
156 * Initialize the filesystem, most work done by ufs_init.
157 */
158 void
159 lfs_init()
160 {
161 ufs_init();
162
163 /*
164 * XXX Same structure as FFS inodes? Should we share a common pool?
165 */
166 pool_init(&lfs_inode_pool, sizeof(struct inode), 0, 0, 0,
167 "lfsinopl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
168 M_LFSNODE);
169 }
170
171 void
172 lfs_reinit()
173 {
174 ufs_reinit();
175 }
176
177 void
178 lfs_done()
179 {
180 ufs_done();
181 pool_destroy(&lfs_inode_pool);
182 }
183
184 /*
185 * Called by main() when ufs is going to be mounted as root.
186 */
187 int
188 lfs_mountroot()
189 {
190 extern struct vnode *rootvp;
191 struct mount *mp;
192 struct proc *p = curproc; /* XXX */
193 int error;
194
195 if (root_device->dv_class != DV_DISK)
196 return (ENODEV);
197
198 if (rootdev == NODEV)
199 return (ENODEV);
200 /*
201 * Get vnodes for swapdev and rootdev.
202 */
203 if ((error = bdevvp(rootdev, &rootvp))) {
204 printf("lfs_mountroot: can't setup bdevvp's");
205 return (error);
206 }
207 if ((error = vfs_rootmountalloc(MOUNT_LFS, "root_device", &mp))) {
208 vrele(rootvp);
209 return (error);
210 }
211 if ((error = lfs_mountfs(rootvp, mp, p))) {
212 mp->mnt_op->vfs_refcount--;
213 vfs_unbusy(mp);
214 free(mp, M_MOUNT);
215 vrele(rootvp);
216 return (error);
217 }
218 simple_lock(&mountlist_slock);
219 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
220 simple_unlock(&mountlist_slock);
221 (void)lfs_statfs(mp, &mp->mnt_stat, p);
222 vfs_unbusy(mp);
223 inittodr(VFSTOUFS(mp)->um_lfs->lfs_tstamp);
224 return (0);
225 }
226
227 /*
228 * VFS Operations.
229 *
230 * mount system call
231 */
232 int
233 lfs_mount(struct mount *mp, const char *path, void *data, struct nameidata *ndp, struct proc *p)
234 {
235 struct vnode *devvp;
236 struct ufs_args args;
237 struct ufsmount *ump = NULL;
238 struct lfs *fs = NULL; /* LFS */
239 size_t size;
240 int error;
241 mode_t accessmode;
242
243 error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
244 if (error)
245 return (error);
246
247 #if 0
248 /* Until LFS can do NFS right. XXX */
249 if (args.export.ex_flags & MNT_EXPORTED)
250 return (EINVAL);
251 #endif
252
253 /*
254 * If updating, check whether changing from read-only to
255 * read/write; if there is no device name, that's all we do.
256 */
257 if (mp->mnt_flag & MNT_UPDATE) {
258 ump = VFSTOUFS(mp);
259 fs = ump->um_lfs;
260 if (fs->lfs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
261 /*
262 * If upgrade to read-write by non-root, then verify
263 * that user has necessary permissions on the device.
264 */
265 if (p->p_ucred->cr_uid != 0) {
266 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
267 error = VOP_ACCESS(ump->um_devvp, VREAD|VWRITE,
268 p->p_ucred, p);
269 VOP_UNLOCK(ump->um_devvp, 0);
270 if (error)
271 return (error);
272 }
273 fs->lfs_ronly = 0;
274 }
275 if (args.fspec == 0) {
276 /*
277 * Process export requests.
278 */
279 return (vfs_export(mp, &ump->um_export, &args.export));
280 }
281 }
282 /*
283 * Not an update, or updating the name: look up the name
284 * and verify that it refers to a sensible block device.
285 */
286 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
287 if ((error = namei(ndp)) != 0)
288 return (error);
289 devvp = ndp->ni_vp;
290 if (devvp->v_type != VBLK) {
291 vrele(devvp);
292 return (ENOTBLK);
293 }
294 if (major(devvp->v_rdev) >= nblkdev) {
295 vrele(devvp);
296 return (ENXIO);
297 }
298 /*
299 * If mount by non-root, then verify that user has necessary
300 * permissions on the device.
301 */
302 if (p->p_ucred->cr_uid != 0) {
303 accessmode = VREAD;
304 if ((mp->mnt_flag & MNT_RDONLY) == 0)
305 accessmode |= VWRITE;
306 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
307 error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
308 if (error) {
309 vput(devvp);
310 return (error);
311 }
312 VOP_UNLOCK(devvp, 0);
313 }
314 if ((mp->mnt_flag & MNT_UPDATE) == 0)
315 error = lfs_mountfs(devvp, mp, p); /* LFS */
316 else {
317 if (devvp != ump->um_devvp)
318 error = EINVAL; /* needs translation */
319 else
320 vrele(devvp);
321 }
322 if (error) {
323 vrele(devvp);
324 return (error);
325 }
326 ump = VFSTOUFS(mp);
327 fs = ump->um_lfs; /* LFS */
328 (void)copyinstr(path, fs->lfs_fsmnt, sizeof(fs->lfs_fsmnt) - 1, &size);
329 bzero(fs->lfs_fsmnt + size, sizeof(fs->lfs_fsmnt) - size);
330 bcopy(fs->lfs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN);
331 (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
332 &size);
333 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
334 return (0);
335 }
336
337 /*
338 * Roll-forward code.
339 */
340
341 /*
342 * Load the appropriate indirect block, and change the appropriate pointer.
343 * Mark the block dirty. Do segment and avail accounting.
344 */
345 static int
346 update_meta(struct lfs *fs, ino_t ino, int version, ufs_daddr_t lbn,
347 daddr_t ndaddr, size_t size, struct proc *p)
348 {
349 int error;
350 struct vnode *vp;
351 struct inode *ip;
352 daddr_t odaddr, ooff;
353 struct indir a[NIADDR], *ap;
354 struct buf *bp;
355 SEGUSE *sup;
356 int num;
357
358 if ((error = lfs_rf_valloc(fs, ino, version, p, &vp)) != 0) {
359 #ifdef DEBUG_LFS_RFW
360 printf("update_meta: ino %d: lfs_rf_valloc returned %d\n", ino,
361 error);
362 #endif
363 return error;
364 }
365
366 if ((error = VOP_BALLOC(vp, (lbn << fs->lfs_bshift), size,
367 NOCRED, 0, &bp)) != 0) {
368 vput(vp);
369 return (error);
370 }
371 /* No need to write, the block is already on disk */
372 if (bp->b_flags & B_DELWRI) {
373 LFS_UNLOCK_BUF(bp);
374 fs->lfs_avail += btofsb(fs, bp->b_bcount);
375 }
376 bp->b_flags |= B_INVAL;
377 brelse(bp);
378
379 /*
380 * Extend the file, if it is not large enough already.
381 * XXX this is not exactly right, we don't know how much of the
382 * XXX last block is actually used. We hope that an inode will
383 * XXX appear later to give the correct size.
384 */
385 ip = VTOI(vp);
386 if (ip->i_ffs_size <= (lbn << fs->lfs_bshift)) {
387 if (lbn < NDADDR)
388 ip->i_ffs_size = (lbn << fs->lfs_bshift) +
389 (size - fs->lfs_fsize) + 1;
390 else
391 ip->i_ffs_size = (lbn << fs->lfs_bshift) + 1;
392 }
393
394 error = ufs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL);
395 if (error) {
396 #ifdef DEBUG_LFS_RFW
397 printf("update_meta: ufs_bmaparray returned %d\n", error);
398 #endif
399 vput(vp);
400 return error;
401 }
402 switch (num) {
403 case 0:
404 ooff = ip->i_ffs_db[lbn];
405 if (ooff == UNWRITTEN)
406 ip->i_ffs_blocks += btofsb(fs, size);
407 ip->i_ffs_db[lbn] = ndaddr;
408 break;
409 case 1:
410 ooff = ip->i_ffs_ib[a[0].in_off];
411 if (ooff == UNWRITTEN)
412 ip->i_ffs_blocks += btofsb(fs, size);
413 ip->i_ffs_ib[a[0].in_off] = ndaddr;
414 break;
415 default:
416 ap = &a[num - 1];
417 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
418 panic("update_meta: bread bno %d", ap->in_lbn);
419
420 ooff = ((ufs_daddr_t *)bp->b_data)[ap->in_off];
421 if (ooff == UNWRITTEN)
422 ip->i_ffs_blocks += btofsb(fs, size);
423 ((ufs_daddr_t *)bp->b_data)[ap->in_off] = ndaddr;
424 (void) VOP_BWRITE(bp);
425 }
426 LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED | IN_UPDATE);
427
428 /* Update segment usage information. */
429 if (odaddr > 0) {
430 LFS_SEGENTRY(sup, fs, dtosn(fs, dbtofsb(fs, odaddr)), bp);
431 #ifdef DIAGNOSTIC
432 if (sup->su_nbytes < size) {
433 panic("update_meta: negative bytes "
434 "(segment %d short by %ld)\n",
435 dtosn(fs, dbtofsb(fs, odaddr)), (long)size - sup->su_nbytes);
436 sup->su_nbytes = size;
437 }
438 #endif
439 sup->su_nbytes -= size;
440 VOP_BWRITE(bp);
441 }
442 LFS_SEGENTRY(sup, fs, dtosn(fs, ndaddr), bp);
443 sup->su_nbytes += size;
444 VOP_BWRITE(bp);
445
446 /* Fix this so it can be released */
447 /* ip->i_lfs_effnblks = ip->i_ffs_blocks; */
448
449 #ifdef DEBUG_LFS_RFW
450 /* Now look again to make sure it worked */
451 ufs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL );
452 if (dbtofsb(fs, odaddr) != ndaddr)
453 printf("update_meta: failed setting ino %d lbn %d to %x\n",
454 ino, lbn, ndaddr);
455 #endif
456 vput(vp);
457 return 0;
458 }
459
460 static int
461 update_inoblk(struct lfs *fs, daddr_t offset, struct ucred *cred,
462 struct proc *p)
463 {
464 struct vnode *devvp, *vp;
465 struct inode *ip;
466 struct dinode *dip;
467 struct buf *dbp, *ibp;
468 int error;
469 daddr_t daddr;
470 IFILE *ifp;
471 SEGUSE *sup;
472
473 devvp = VTOI(fs->lfs_ivnode)->i_devvp;
474
475 /*
476 * Get the inode, update times and perms.
477 * DO NOT update disk blocks, we do that separately.
478 */
479 error = bread(devvp, fsbtodb(fs, offset), fs->lfs_ibsize, cred, &dbp);
480 if (error) {
481 #ifdef DEBUG_LFS_RFW
482 printf("update_inoblk: bread returned %d\n", error);
483 #endif
484 return error;
485 }
486 dip = ((struct dinode *)(dbp->b_data)) + INOPB(fs);
487 while (--dip >= (struct dinode *)dbp->b_data) {
488 if (dip->di_inumber > LFS_IFILE_INUM) {
489 /* printf("ino %d version %d\n", dip->di_inumber,
490 dip->di_gen); */
491 error = lfs_rf_valloc(fs, dip->di_inumber, dip->di_gen,
492 p, &vp);
493 if (error) {
494 #ifdef DEBUG_LFS_RFW
495 printf("update_inoblk: lfs_rf_valloc returned %d\n", error);
496 #endif
497 continue;
498 }
499 ip = VTOI(vp);
500 if (dip->di_size != ip->i_ffs_size)
501 VOP_TRUNCATE(vp, dip->di_size, 0, NOCRED, p);
502 /* Get mode, link count, size, and times */
503 memcpy(&ip->i_din.ffs_din, dip,
504 offsetof(struct dinode, di_db[0]));
505
506 /* Then the rest, except di_blocks */
507 ip->i_ffs_flags = dip->di_flags;
508 ip->i_ffs_gen = dip->di_gen;
509 ip->i_ffs_uid = dip->di_uid;
510 ip->i_ffs_gid = dip->di_gid;
511
512 ip->i_ffs_effnlink = dip->di_nlink;
513
514 LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED | IN_UPDATE);
515
516 /* Re-initialize to get type right */
517 ufs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p,
518 &vp);
519 vput(vp);
520
521 /* Record change in location */
522 LFS_IENTRY(ifp, fs, dip->di_inumber, ibp);
523 daddr = ifp->if_daddr;
524 ifp->if_daddr = dbtofsb(fs, dbp->b_blkno);
525 error = VOP_BWRITE(ibp); /* Ifile */
526 /* And do segment accounting */
527 if (dtosn(fs, daddr) != dtosn(fs, dbtofsb(fs, dbp->b_blkno))) {
528 if (daddr > 0) {
529 LFS_SEGENTRY(sup, fs, dtosn(fs, daddr),
530 ibp);
531 sup->su_nbytes -= DINODE_SIZE;
532 VOP_BWRITE(ibp);
533 }
534 LFS_SEGENTRY(sup, fs, dtosn(fs, dbtofsb(fs, dbp->b_blkno)),
535 ibp);
536 sup->su_nbytes += DINODE_SIZE;
537 VOP_BWRITE(ibp);
538 }
539 }
540 }
541 dbp->b_flags |= B_AGE;
542 brelse(dbp);
543
544 return 0;
545 }
546
547 #define CHECK_CKSUM 0x0001 /* Check the checksum to make sure it's valid */
548 #define CHECK_UPDATE 0x0002 /* Update Ifile for new data blocks / inodes */
549
550 static daddr_t
551 check_segsum(struct lfs *fs, daddr_t offset,
552 struct ucred *cred, int flags, int *pseg_flags, struct proc *p)
553 {
554 struct vnode *devvp;
555 struct buf *bp, *dbp;
556 int error, nblocks, ninos, i, j;
557 SEGSUM *ssp;
558 u_long *dp, *datap; /* XXX u_int32_t */
559 daddr_t *iaddr, oldoffset;
560 FINFO *fip;
561 SEGUSE *sup;
562 size_t size;
563 u_int64_t serial;
564
565 devvp = VTOI(fs->lfs_ivnode)->i_devvp;
566 /*
567 * If the segment has a superblock and we're at the top
568 * of the segment, skip the superblock.
569 */
570 if (sntod(fs, dtosn(fs, offset)) == offset) {
571 LFS_SEGENTRY(sup, fs, dtosn(fs, offset), bp);
572 if (sup->su_flags & SEGUSE_SUPERBLOCK)
573 offset += btofsb(fs, LFS_SBPAD);
574 brelse(bp);
575 }
576
577 /* Read in the segment summary */
578 error = bread(devvp, offset, fs->lfs_sumsize, cred, &bp);
579 if (error)
580 return -1;
581
582 /* Check summary checksum */
583 ssp = (SEGSUM *)bp->b_data;
584 if (flags & CHECK_CKSUM) {
585 if (ssp->ss_sumsum != cksum(&ssp->ss_datasum,
586 fs->lfs_sumsize -
587 sizeof(ssp->ss_sumsum))) {
588 #ifdef DEBUG_LFS_RFW
589 printf("Sumsum error at 0x%x\n", offset);
590 #endif
591 offset = -1;
592 goto err1;
593 }
594 if (ssp->ss_nfinfo == 0 && ssp->ss_ninos == 0) {
595 #ifdef DEBUG_LFS_RFW
596 printf("Empty pseg at 0x%x\n", offset);
597 #endif
598 offset = -1;
599 goto err1;
600 }
601 if (ssp->ss_create < fs->lfs_tstamp) {
602 #ifdef DEBUG_LFS_RFW
603 printf("Old data at 0x%x\n", offset);
604 #endif
605 offset = -1;
606 goto err1;
607 }
608 }
609 if (fs->lfs_version > 1) {
610 serial = ssp->ss_serial;
611 if (serial != fs->lfs_serial + 1) {
612 #ifdef DEBUG_LFS_RFW
613 printf("Unexpected serial number at 0x%x\n", offset);
614 #endif
615 offset = -1;
616 goto err1;
617 }
618 if (ssp->ss_ident != fs->lfs_ident) {
619 #ifdef DEBUG_LFS_RFW
620 printf("Incorrect fsid (0x%x vs 0x%x) at 0x%x\n",
621 ssp->ss_ident, fs->lfs_ident, offset);
622 #endif
623 offset = -1;
624 goto err1;
625 }
626 }
627 if (pseg_flags)
628 *pseg_flags = ssp->ss_flags;
629 oldoffset = offset;
630 offset += btofsb(fs, fs->lfs_sumsize);
631
632 ninos = howmany(ssp->ss_ninos, INOPB(fs));
633 iaddr = (daddr_t *)(bp->b_data + fs->lfs_sumsize - sizeof(daddr_t));
634 if (flags & CHECK_CKSUM) {
635 /* Count blocks */
636 nblocks = 0;
637 fip = (FINFO *)(bp->b_data + SEGSUM_SIZE(fs));
638 for (i = 0; i < ssp->ss_nfinfo; ++i) {
639 nblocks += fip->fi_nblocks;
640 if (fip->fi_nblocks <= 0)
641 break;
642 fip = (FINFO *)(((char *)fip) + sizeof(FINFO) +
643 (fip->fi_nblocks - 1) *
644 sizeof(ufs_daddr_t));
645 }
646 nblocks += ninos;
647 /* Create the sum array */
648 datap = dp = (u_long *)malloc(nblocks * sizeof(u_long),
649 M_SEGMENT, M_WAITOK);
650 }
651
652 /* Handle individual blocks */
653 fip = (FINFO *)(bp->b_data + SEGSUM_SIZE(fs));
654 for (i = 0; i < ssp->ss_nfinfo || ninos; ++i) {
655 /* Inode block? */
656 if (ninos && *iaddr == offset) {
657 if (flags & CHECK_CKSUM) {
658 /* Read in the head and add to the buffer */
659 error = bread(devvp, fsbtodb(fs, offset), fs->lfs_bsize,
660 cred, &dbp);
661 if (error) {
662 offset = -1;
663 goto err2;
664 }
665 (*dp++) = ((u_long *)(dbp->b_data))[0];
666 dbp->b_flags |= B_AGE;
667 brelse(dbp);
668 }
669 if (flags & CHECK_UPDATE) {
670 if ((error = update_inoblk(fs, offset, cred, p))
671 != 0) {
672 offset = -1;
673 goto err2;
674 }
675 }
676 offset += btofsb(fs, fs->lfs_ibsize);
677 --iaddr;
678 --ninos;
679 --i; /* compensate */
680 continue;
681 }
682 /* printf("check: blocks from ino %d version %d\n",
683 fip->fi_ino, fip->fi_version); */
684 size = fs->lfs_bsize;
685 for (j = 0; j < fip->fi_nblocks; ++j) {
686 if (j == fip->fi_nblocks - 1)
687 size = fip->fi_lastlength;
688 if (flags & CHECK_CKSUM) {
689 error = bread(devvp, fsbtodb(fs, offset), size, cred, &dbp);
690 if (error) {
691 offset = -1;
692 goto err2;
693 }
694 (*dp++) = ((u_long *)(dbp->b_data))[0];
695 dbp->b_flags |= B_AGE;
696 brelse(dbp);
697 }
698 /* Account for and update any direct blocks */
699 if ((flags & CHECK_UPDATE) &&
700 fip->fi_ino > LFS_IFILE_INUM &&
701 fip->fi_blocks[j] >= 0) {
702 update_meta(fs, fip->fi_ino, fip->fi_version,
703 fip->fi_blocks[j], offset, size, p);
704 }
705 offset += btofsb(fs, size);
706 }
707 fip = (FINFO *)(((char *)fip) + sizeof(FINFO)
708 + (fip->fi_nblocks - 1) * sizeof(ufs_daddr_t));
709 }
710 /* Checksum the array, compare */
711 if ((flags & CHECK_CKSUM) &&
712 ssp->ss_datasum != cksum(datap, nblocks * sizeof(u_long)))
713 {
714 #ifdef DEBUG_LFS_RFW
715 printf("Datasum error at 0x%x (wanted %x got %x)\n", offset,
716 ssp->ss_datasum, cksum(datap, nblocks *
717 sizeof(u_long)));
718 #endif
719 offset = -1;
720 goto err2;
721 }
722
723 /* If we're at the end of the segment, move to the next */
724 if (dtosn(fs, offset + btofsb(fs, fs->lfs_sumsize + fs->lfs_bsize)) !=
725 dtosn(fs, offset)) {
726 if (dtosn(fs, offset) == dtosn(fs, ssp->ss_next)) {
727 offset = -1;
728 goto err2;
729 }
730 offset = ssp->ss_next;
731 #ifdef DEBUG_LFS_RFW
732 printf("LFS roll forward: moving on to offset 0x%x "
733 " -> segment %d\n", offset, dtosn(fs,offset));
734 #endif
735 }
736
737 if (flags & CHECK_UPDATE) {
738 fs->lfs_avail -= (offset - oldoffset);
739 /* Don't clog the buffer queue */
740 if (locked_queue_count > LFS_MAX_BUFS ||
741 locked_queue_bytes > LFS_MAX_BYTES) {
742 ++fs->lfs_writer;
743 lfs_flush(fs, SEGM_CKP);
744 if (--fs->lfs_writer == 0)
745 wakeup(&fs->lfs_dirops);
746 }
747 }
748
749 err2:
750 if (flags & CHECK_CKSUM)
751 free(datap, M_SEGMENT);
752 err1:
753 bp->b_flags |= B_AGE;
754 brelse(bp);
755
756 /* XXX should we update the serial number even for bad psegs? */
757 if ((flags & CHECK_UPDATE) && offset > 0 && fs->lfs_version > 1)
758 fs->lfs_serial = serial;
759 return offset;
760 }
761
762 /*
763 * Common code for mount and mountroot
764 * LFS specific
765 */
766 int
767 lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
768 {
769 extern struct vnode *rootvp;
770 struct dlfs *tdfs, *dfs, *adfs;
771 struct lfs *fs;
772 struct ufsmount *ump;
773 struct vnode *vp;
774 struct buf *bp, *abp;
775 struct partinfo dpart;
776 dev_t dev;
777 int error, i, ronly, secsize, fsbsize;
778 struct ucred *cred;
779 CLEANERINFO *cip;
780 SEGUSE *sup;
781 int flags, dirty, do_rollforward;
782 daddr_t offset, oldoffset, lastgoodpseg, sb_addr;
783 int sn, curseg;
784
785 cred = p ? p->p_ucred : NOCRED;
786 /*
787 * Disallow multiple mounts of the same device.
788 * Disallow mounting of a device that is currently in use
789 * (except for root, which might share swap device for miniroot).
790 * Flush out any old buffers remaining from a previous use.
791 */
792 if ((error = vfs_mountedon(devvp)) != 0)
793 return (error);
794 if (vcount(devvp) > 1 && devvp != rootvp)
795 return (EBUSY);
796 if ((error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) != 0)
797 return (error);
798
799 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
800 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
801 if (error)
802 return (error);
803 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
804 secsize = DEV_BSIZE;
805 else
806 secsize = dpart.disklab->d_secsize;
807
808 /* Don't free random space on error. */
809 bp = NULL;
810 abp = NULL;
811 ump = NULL;
812
813 sb_addr = LFS_LABELPAD / secsize;
814 while (1) {
815 /* Read in the superblock. */
816 error = bread(devvp, sb_addr, LFS_SBPAD, cred, &bp);
817 if (error)
818 goto out;
819 dfs = (struct dlfs *)bp->b_data;
820
821 /* Check the basics. */
822 if (dfs->dlfs_magic != LFS_MAGIC || dfs->dlfs_bsize >= MAXBSIZE ||
823 dfs->dlfs_version > LFS_VERSION ||
824 dfs->dlfs_bsize < sizeof(struct dlfs)) {
825 #ifdef DEBUG_LFS
826 printf("lfs_mountfs: primary superblock sanity failed\n");
827 #endif
828 error = EINVAL; /* XXX needs translation */
829 goto out;
830 }
831 if (dfs->dlfs_inodefmt > LFS_MAXINODEFMT)
832 printf("lfs_mountfs: warning: unknown inode format %d\n",
833 dfs->dlfs_inodefmt);
834
835 if (dfs->dlfs_version == 1)
836 fsbsize = secsize;
837 else {
838 fsbsize = 1 << (dfs->dlfs_bshift - dfs->dlfs_blktodb +
839 dfs->dlfs_fsbtodb);
840 /*
841 * Could be, if the frag size is large enough, that we
842 * don't have the "real" primary superblock. If that's
843 * the case, get the real one, and try again.
844 */
845 if (sb_addr != dfs->dlfs_sboffs[0] <<
846 dfs->dlfs_fsbtodb) {
847 /* #ifdef DEBUG_LFS */
848 printf("lfs_mountfs: sb daddr 0x%x is not right, trying 0x%x\n",
849 sb_addr, dfs->dlfs_sboffs[0] <<
850 dfs->dlfs_fsbtodb);
851 /* #endif */
852 sb_addr = dfs->dlfs_sboffs[0] <<
853 dfs->dlfs_fsbtodb;
854 brelse(bp);
855 continue;
856 }
857 }
858 break;
859 }
860
861 /*
862 * Check the second superblock to see which is newer; then mount
863 * using the older of the two. This is necessary to ensure that
864 * the filesystem is valid if it was not unmounted cleanly.
865 */
866
867 if (dfs->dlfs_sboffs[1] &&
868 dfs->dlfs_sboffs[1] - LFS_LABELPAD / fsbsize > LFS_SBPAD / fsbsize)
869 {
870 error = bread(devvp, dfs->dlfs_sboffs[1] * (fsbsize / secsize),
871 LFS_SBPAD, cred, &abp);
872 if (error)
873 goto out;
874 adfs = (struct dlfs *)abp->b_data;
875
876 if (dfs->dlfs_version == 1) {
877 /* 1s resolution comparison */
878 if (adfs->dlfs_tstamp < dfs->dlfs_tstamp)
879 tdfs = adfs;
880 else
881 tdfs = dfs;
882 } else {
883 /* monotonic infinite-resolution comparison */
884 if (adfs->dlfs_serial < dfs->dlfs_serial)
885 tdfs = adfs;
886 else
887 tdfs = dfs;
888 }
889
890 /* Check the basics. */
891 if (tdfs->dlfs_magic != LFS_MAGIC ||
892 tdfs->dlfs_bsize > MAXBSIZE ||
893 tdfs->dlfs_version > LFS_VERSION ||
894 tdfs->dlfs_bsize < sizeof(struct dlfs)) {
895 #ifdef DEBUG_LFS
896 printf("lfs_mountfs: alt superblock sanity failed\n");
897 #endif
898 error = EINVAL; /* XXX needs translation */
899 goto out;
900 }
901 } else {
902 #ifdef DEBUG_LFS
903 printf("lfs_mountfs: invalid alt superblock daddr=0x%x\n",
904 dfs->dlfs_sboffs[1]);
905 #endif
906 error = EINVAL;
907 goto out;
908 }
909
910 /* Allocate the mount structure, copy the superblock into it. */
911 fs = malloc(sizeof(struct lfs), M_UFSMNT, M_WAITOK);
912 memcpy(&fs->lfs_dlfs, tdfs, sizeof(struct dlfs));
913
914 /* Compatibility */
915 if (fs->lfs_version < 2) {
916 fs->lfs_sumsize = LFS_V1_SUMMARY_SIZE;
917 fs->lfs_ibsize = fs->lfs_bsize;
918 fs->lfs_start = fs->lfs_sboffs[0];
919 fs->lfs_tstamp = fs->lfs_otstamp;
920 fs->lfs_fsbtodb = 0;
921 }
922
923 /* Before rolling forward, lock so vget will sleep for other procs */
924 fs->lfs_flags = LFS_NOTYET;
925 fs->lfs_rfpid = p->p_pid;
926
927 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
928 memset((caddr_t)ump, 0, sizeof *ump);
929 ump->um_lfs = fs;
930 if (sizeof(struct lfs) < LFS_SBPAD) { /* XXX why? */
931 bp->b_flags |= B_INVAL;
932 abp->b_flags |= B_INVAL;
933 }
934 brelse(bp);
935 bp = NULL;
936 brelse(abp);
937 abp = NULL;
938
939 /* Set up the I/O information */
940 fs->lfs_devbsize = secsize;
941 fs->lfs_iocount = 0;
942 fs->lfs_diropwait = 0;
943 fs->lfs_activesb = 0;
944 fs->lfs_uinodes = 0;
945 fs->lfs_ravail = 0;
946 fs->lfs_sbactive = 0;
947 #ifdef LFS_TRACK_IOS
948 for (i = 0; i < LFS_THROTTLE; i++)
949 fs->lfs_pending[i] = LFS_UNUSED_DADDR;
950 #endif
951
952 /* Set up the ifile and lock aflags */
953 fs->lfs_doifile = 0;
954 fs->lfs_writer = 0;
955 fs->lfs_dirops = 0;
956 fs->lfs_nadirop = 0;
957 fs->lfs_seglock = 0;
958 lockinit(&fs->lfs_freelock, PINOD, "lfs_freelock", 0, 0);
959
960 /* Set the file system readonly/modify bits. */
961 fs->lfs_ronly = ronly;
962 if (ronly == 0)
963 fs->lfs_fmod = 1;
964
965 /* Initialize the mount structure. */
966 dev = devvp->v_rdev;
967 mp->mnt_data = (qaddr_t)ump;
968 mp->mnt_stat.f_fsid.val[0] = (long)dev;
969 mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_LFS);
970 mp->mnt_stat.f_iosize = fs->lfs_bsize;
971 mp->mnt_maxsymlinklen = fs->lfs_maxsymlinklen;
972 mp->mnt_flag |= MNT_LOCAL;
973 ump->um_flags = 0;
974 ump->um_mountp = mp;
975 ump->um_dev = dev;
976 ump->um_devvp = devvp;
977 ump->um_bptrtodb = fs->lfs_fsbtodb;
978 ump->um_seqinc = fragstofsb(fs, fs->lfs_frag);
979 ump->um_nindir = fs->lfs_nindir;
980 ump->um_lognindir = ffs(fs->lfs_nindir) - 1;
981 for (i = 0; i < MAXQUOTAS; i++)
982 ump->um_quotas[i] = NULLVP;
983 devvp->v_specmountpoint = mp;
984
985 /*
986 * We use the ifile vnode for almost every operation. Instead of
987 * retrieving it from the hash table each time we retrieve it here,
988 * artificially increment the reference count and keep a pointer
989 * to it in the incore copy of the superblock.
990 */
991 if ((error = VFS_VGET(mp, LFS_IFILE_INUM, &vp)) != 0) {
992 #ifdef DEBUG
993 printf("lfs_mountfs: ifile vget failed, error=%d\n", error);
994 #endif
995 goto out;
996 }
997 fs->lfs_ivnode = vp;
998 VREF(vp);
999 vput(vp);
1000
1001 /*
1002 * Roll forward.
1003 *
1004 * We don't automatically roll forward for v1 filesystems, because
1005 * of the danger that the clock was turned back between the last
1006 * checkpoint and crash. This would roll forward garbage.
1007 *
1008 * v2 filesystems don't have this problem because they use a
1009 * monotonically increasing serial number instead of a timestamp.
1010 */
1011 #ifdef LFS_DO_ROLLFORWARD
1012 do_rollforward = !fs->lfs_ronly;
1013 #else
1014 do_rollforward = (fs->lfs_version > 1 && !fs->lfs_ronly &&
1015 !(fs->lfs_pflags & LFS_PF_CLEAN));
1016 #endif
1017 if (do_rollforward) {
1018 /*
1019 * Phase I: Find the address of the last good partial
1020 * segment that was written after the checkpoint. Mark
1021 * the segments in question dirty, so they won't be
1022 * reallocated.
1023 */
1024 lastgoodpseg = oldoffset = offset = fs->lfs_offset;
1025 flags = 0x0;
1026 #ifdef DEBUG_LFS_RFW
1027 printf("LFS roll forward phase 1: starting at offset 0x%x\n",
1028 offset);
1029 #endif
1030 LFS_SEGENTRY(sup, fs, dtosn(fs, offset), bp);
1031 if (!(sup->su_flags & SEGUSE_DIRTY))
1032 --fs->lfs_nclean;
1033 sup->su_flags |= SEGUSE_DIRTY;
1034 (void) VOP_BWRITE(bp);
1035 while ((offset = check_segsum(fs, offset, cred, CHECK_CKSUM,
1036 &flags, p)) > 0)
1037 {
1038 if (sntod(fs, oldoffset) != sntod(fs, offset)) {
1039 LFS_SEGENTRY(sup, fs, dtosn(fs, oldoffset),
1040 bp);
1041 if (!(sup->su_flags & SEGUSE_DIRTY))
1042 --fs->lfs_nclean;
1043 sup->su_flags |= SEGUSE_DIRTY;
1044 (void) VOP_BWRITE(bp);
1045 }
1046
1047 #ifdef DEBUG_LFS_RFW
1048 printf("LFS roll forward phase 1: offset=0x%x\n",
1049 offset);
1050 if (flags & SS_DIROP) {
1051 printf("lfs_mountfs: dirops at 0x%x\n",
1052 oldoffset);
1053 if (!(flags & SS_CONT))
1054 printf("lfs_mountfs: dirops end "
1055 "at 0x%x\n", oldoffset);
1056 }
1057 #endif
1058 if (!(flags & SS_CONT))
1059 lastgoodpseg = offset;
1060 oldoffset = offset;
1061 }
1062 #ifdef DEBUG_LFS_RFW
1063 if (flags & SS_CONT) {
1064 printf("LFS roll forward: warning: incomplete "
1065 "dirops discarded\n");
1066 }
1067 printf("LFS roll forward phase 1: completed: "
1068 "lastgoodpseg=0x%x\n", lastgoodpseg);
1069 #endif
1070 oldoffset = fs->lfs_offset;
1071 if (fs->lfs_offset != lastgoodpseg) {
1072 /* Don't overwrite what we're trying to preserve */
1073 offset = fs->lfs_offset;
1074 fs->lfs_offset = lastgoodpseg;
1075 fs->lfs_curseg = sntod(fs, dtosn(fs, fs->lfs_offset));
1076 for (sn = curseg = dtosn(fs, fs->lfs_curseg);;) {
1077 sn = (sn + 1) % fs->lfs_nseg;
1078 if (sn == curseg)
1079 panic("lfs_mountfs: no clean segments");
1080 LFS_SEGENTRY(sup, fs, sn, bp);
1081 dirty = (sup->su_flags & SEGUSE_DIRTY);
1082 brelse(bp);
1083 if (!dirty)
1084 break;
1085 }
1086 fs->lfs_nextseg = sntod(fs, sn);
1087
1088 /*
1089 * Phase II: Roll forward from the first superblock.
1090 */
1091 while (offset != lastgoodpseg) {
1092 #ifdef DEBUG_LFS_RFW
1093 printf("LFS roll forward phase 2: 0x%x\n",
1094 offset);
1095 #endif
1096 offset = check_segsum(fs, offset, cred,
1097 CHECK_UPDATE, NULL, p);
1098 }
1099
1100 /*
1101 * Finish: flush our changes to disk.
1102 */
1103 lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC);
1104 printf("lfs_mountfs: roll forward recovered %d blocks\n",
1105 lastgoodpseg - oldoffset);
1106 }
1107 #ifdef DEBUG_LFS_RFW
1108 printf("LFS roll forward complete\n");
1109 #endif
1110 }
1111 /* If writing, sb is not clean; record in case of immediate crash */
1112 if (!fs->lfs_ronly) {
1113 fs->lfs_pflags &= ~LFS_PF_CLEAN;
1114 lfs_writesuper(fs, fs->lfs_sboffs[0]);
1115 }
1116
1117 /* Allow vget now that roll-forward is complete */
1118 fs->lfs_flags &= ~(LFS_NOTYET);
1119 wakeup(&fs->lfs_flags);
1120
1121 /*
1122 * Initialize the ifile cleaner info with information from
1123 * the superblock.
1124 */
1125 LFS_CLEANERINFO(cip, fs, bp);
1126 cip->clean = fs->lfs_nclean;
1127 cip->dirty = fs->lfs_nseg - fs->lfs_nclean;
1128 cip->avail = fs->lfs_avail;
1129 cip->bfree = fs->lfs_bfree;
1130 (void) VOP_BWRITE(bp); /* Ifile */
1131
1132 /*
1133 * Mark the current segment as ACTIVE, since we're going to
1134 * be writing to it.
1135 */
1136 LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_offset), bp);
1137 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
1138 (void) VOP_BWRITE(bp); /* Ifile */
1139
1140 return (0);
1141 out:
1142 if (bp)
1143 brelse(bp);
1144 if (abp)
1145 brelse(abp);
1146 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1147 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
1148 VOP_UNLOCK(devvp, 0);
1149 if (ump) {
1150 free(ump->um_lfs, M_UFSMNT);
1151 free(ump, M_UFSMNT);
1152 mp->mnt_data = (qaddr_t)0;
1153 }
1154 return (error);
1155 }
1156
1157 /*
1158 * unmount system call
1159 */
1160 int
1161 lfs_unmount(struct mount *mp, int mntflags, struct proc *p)
1162 {
1163 struct ufsmount *ump;
1164 struct lfs *fs;
1165 int error, flags, ronly, s;
1166 extern int lfs_allclean_wakeup;
1167
1168 flags = 0;
1169 if (mntflags & MNT_FORCE)
1170 flags |= FORCECLOSE;
1171
1172 ump = VFSTOUFS(mp);
1173 fs = ump->um_lfs;
1174 #ifdef QUOTA
1175 if (mp->mnt_flag & MNT_QUOTA) {
1176 int i;
1177 error = vflush(mp, fs->lfs_ivnode, SKIPSYSTEM|flags);
1178 if (error)
1179 return (error);
1180 for (i = 0; i < MAXQUOTAS; i++) {
1181 if (ump->um_quotas[i] == NULLVP)
1182 continue;
1183 quotaoff(p, mp, i);
1184 }
1185 /*
1186 * Here we fall through to vflush again to ensure
1187 * that we have gotten rid of all the system vnodes.
1188 */
1189 }
1190 #endif
1191 if ((error = vflush(mp, fs->lfs_ivnode, flags)) != 0)
1192 return (error);
1193 if ((error = VFS_SYNC(mp, 1, p->p_ucred, p)) != 0)
1194 return (error);
1195 if (fs->lfs_ivnode->v_dirtyblkhd.lh_first)
1196 panic("lfs_unmount: still dirty blocks on ifile vnode\n");
1197
1198 /* Explicitly write the superblock, to update serial and pflags */
1199 fs->lfs_pflags |= LFS_PF_CLEAN;
1200 lfs_writesuper(fs, fs->lfs_sboffs[0]);
1201 lfs_writesuper(fs, fs->lfs_sboffs[1]);
1202
1203 /* Finish with the Ifile, now that we're done with it */
1204 vrele(fs->lfs_ivnode);
1205 vgone(fs->lfs_ivnode);
1206
1207 /* Wait for superblock writes to complete */
1208 s = splbio();
1209 while (fs->lfs_iocount)
1210 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_umount", 0);
1211 splx(s);
1212
1213 ronly = !fs->lfs_ronly;
1214 if (ump->um_devvp->v_type != VBAD)
1215 ump->um_devvp->v_specmountpoint = NULL;
1216 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1217 error = VOP_CLOSE(ump->um_devvp,
1218 ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
1219 vput(ump->um_devvp);
1220
1221 /* XXX KS - wake up the cleaner so it can die */
1222 wakeup(&fs->lfs_nextseg);
1223 wakeup(&lfs_allclean_wakeup);
1224
1225 free(fs, M_UFSMNT);
1226 free(ump, M_UFSMNT);
1227 mp->mnt_data = (qaddr_t)0;
1228 mp->mnt_flag &= ~MNT_LOCAL;
1229 return (error);
1230 }
1231
1232 /*
1233 * Get file system statistics.
1234 */
1235 int
1236 lfs_statfs(struct mount *mp, struct statfs *sbp, struct proc *p)
1237 {
1238 struct lfs *fs;
1239 struct ufsmount *ump;
1240
1241 ump = VFSTOUFS(mp);
1242 fs = ump->um_lfs;
1243 if (fs->lfs_magic != LFS_MAGIC)
1244 panic("lfs_statfs: magic");
1245
1246 sbp->f_type = 0;
1247 sbp->f_bsize = fs->lfs_fsize;
1248 sbp->f_iosize = fs->lfs_bsize;
1249 sbp->f_blocks = fsbtofrags(fs, LFS_EST_NONMETA(fs));
1250 sbp->f_bfree = fsbtofrags(fs, LFS_EST_BFREE(fs));
1251 sbp->f_bavail = fsbtofrags(fs, (long)LFS_EST_BFREE(fs) -
1252 (long)LFS_EST_RSVD(fs));
1253
1254 sbp->f_files = fs->lfs_bfree / btofsb(fs, fs->lfs_ibsize) * INOPB(fs);
1255 sbp->f_ffree = sbp->f_files - fs->lfs_nfiles;
1256 if (sbp != &mp->mnt_stat) {
1257 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
1258 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
1259 }
1260 strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN);
1261 return (0);
1262 }
1263
1264 /*
1265 * Go through the disk queues to initiate sandbagged IO;
1266 * go through the inodes to write those that have been modified;
1267 * initiate the writing of the super block if it has been modified.
1268 *
1269 * Note: we are always called with the filesystem marked `MPBUSY'.
1270 */
1271 int
1272 lfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p)
1273 {
1274 int error;
1275 struct lfs *fs;
1276
1277 fs = ((struct ufsmount *)mp->mnt_data)->ufsmount_u.lfs;
1278 if (fs->lfs_ronly)
1279 return 0;
1280 while (fs->lfs_dirops)
1281 error = tsleep(&fs->lfs_dirops, PRIBIO + 1, "lfs_dirops", 0);
1282 fs->lfs_writer++;
1283
1284 /* All syncs must be checkpoints until roll-forward is implemented. */
1285 error = lfs_segwrite(mp, SEGM_CKP | (waitfor ? SEGM_SYNC : 0));
1286 if (--fs->lfs_writer == 0)
1287 wakeup(&fs->lfs_dirops);
1288 #ifdef QUOTA
1289 qsync(mp);
1290 #endif
1291 return (error);
1292 }
1293
1294 extern struct lock ufs_hashlock;
1295
1296 /*
1297 * Look up an LFS dinode number to find its incore vnode. If not already
1298 * in core, read it in from the specified device. Return the inode locked.
1299 * Detection and handling of mount points must be done by the calling routine.
1300 */
1301 int
1302 lfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1303 {
1304 struct lfs *fs;
1305 struct inode *ip;
1306 struct buf *bp;
1307 struct ifile *ifp;
1308 struct vnode *vp;
1309 struct ufsmount *ump;
1310 ufs_daddr_t daddr;
1311 dev_t dev;
1312 int error;
1313 struct timespec ts;
1314
1315 ump = VFSTOUFS(mp);
1316 dev = ump->um_dev;
1317 fs = ump->um_lfs;
1318
1319 /*
1320 * If the filesystem is not completely mounted yet, suspend
1321 * any access requests (wait for roll-forward to complete).
1322 */
1323 while ((fs->lfs_flags & LFS_NOTYET) && curproc->p_pid != fs->lfs_rfpid)
1324 tsleep(&fs->lfs_flags, PRIBIO+1, "lfs_notyet", 0);
1325
1326 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1327 return (0);
1328
1329 if ((error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, &vp)) != 0) {
1330 *vpp = NULL;
1331 return (error);
1332 }
1333
1334 do {
1335 if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) {
1336 ungetnewvnode(vp);
1337 return (0);
1338 }
1339 } while (lockmgr(&ufs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0));
1340
1341 /* Translate the inode number to a disk address. */
1342 if (ino == LFS_IFILE_INUM)
1343 daddr = fs->lfs_idaddr;
1344 else {
1345 /* XXX bounds-check this too */
1346 LFS_IENTRY(ifp, fs, ino, bp);
1347 daddr = ifp->if_daddr;
1348 if (fs->lfs_version > 1) {
1349 ts.tv_sec = ifp->if_atime_sec;
1350 ts.tv_nsec = ifp->if_atime_nsec;
1351 }
1352
1353 brelse(bp);
1354 if (daddr == LFS_UNUSED_DADDR) {
1355 *vpp = NULLVP;
1356 ungetnewvnode(vp);
1357 lockmgr(&ufs_hashlock, LK_RELEASE, 0);
1358 return (ENOENT);
1359 }
1360 }
1361
1362 /* Allocate/init new vnode/inode. */
1363 lfs_vcreate(mp, ino, vp);
1364
1365 /*
1366 * Put it onto its hash chain and lock it so that other requests for
1367 * this inode will block if they arrive while we are sleeping waiting
1368 * for old data structures to be purged or for the contents of the
1369 * disk portion of this inode to be read.
1370 */
1371 ip = VTOI(vp);
1372 ufs_ihashins(ip);
1373 lockmgr(&ufs_hashlock, LK_RELEASE, 0);
1374
1375 /*
1376 * XXX
1377 * This may not need to be here, logically it should go down with
1378 * the i_devvp initialization.
1379 * Ask Kirk.
1380 */
1381 ip->i_lfs = ump->um_lfs;
1382
1383 /* Read in the disk contents for the inode, copy into the inode. */
1384 error = bread(ump->um_devvp, fsbtodb(fs, daddr),
1385 (fs->lfs_version == 1 ? fs->lfs_bsize : fs->lfs_fsize),
1386 NOCRED, &bp);
1387 if (error) {
1388 /*
1389 * The inode does not contain anything useful, so it would
1390 * be misleading to leave it on its hash chain. With mode
1391 * still zero, it will be unlinked and returned to the free
1392 * list by vput().
1393 */
1394 vput(vp);
1395 brelse(bp);
1396 *vpp = NULL;
1397 return (error);
1398 }
1399 ip->i_din.ffs_din = *lfs_ifind(fs, ino, bp);
1400 ip->i_ffs_effnlink = ip->i_ffs_nlink;
1401 ip->i_lfs_effnblks = ip->i_ffs_blocks;
1402 if (fs->lfs_version > 1) {
1403 ip->i_ffs_atime = ts.tv_sec;
1404 ip->i_ffs_atimensec = ts.tv_nsec;
1405 }
1406 brelse(bp);
1407
1408 /*
1409 * Initialize the vnode from the inode, check for aliases. In all
1410 * cases re-init ip, the underlying vnode/inode may have changed.
1411 */
1412 ufs_vinit(mp, lfs_specop_p, lfs_fifoop_p, &vp);
1413 #ifdef DIAGNOSTIC
1414 if (vp->v_type == VNON) {
1415 panic("lfs_vget: ino %d is type VNON! (ifmt %o)\n",
1416 ip->i_number, (ip->i_ffs_mode & IFMT) >> 12);
1417 }
1418 #endif
1419 /*
1420 * Finish inode initialization now that aliasing has been resolved.
1421 */
1422
1423 genfs_node_init(vp, &lfs_genfsops);
1424 ip->i_devvp = ump->um_devvp;
1425 VREF(ip->i_devvp);
1426 *vpp = vp;
1427
1428 uvm_vnp_setsize(vp, ip->i_ffs_size);
1429
1430 return (0);
1431 }
1432
1433 /*
1434 * File handle to vnode
1435 *
1436 * Have to be really careful about stale file handles:
1437 * - check that the inode number is valid
1438 * - call lfs_vget() to get the locked inode
1439 * - check for an unallocated inode (i_mode == 0)
1440 *
1441 * XXX
1442 * use ifile to see if inode is allocated instead of reading off disk
1443 * what is the relationship between my generational number and the NFS
1444 * generational number.
1445 */
1446 int
1447 lfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1448 {
1449 struct ufid *ufhp;
1450
1451 ufhp = (struct ufid *)fhp;
1452 if (ufhp->ufid_ino < ROOTINO)
1453 return (ESTALE);
1454 return (ufs_fhtovp(mp, ufhp, vpp));
1455 }
1456
1457 /*
1458 * Vnode pointer to File handle
1459 */
1460 /* ARGSUSED */
1461 int
1462 lfs_vptofh(struct vnode *vp, struct fid *fhp)
1463 {
1464 struct inode *ip;
1465 struct ufid *ufhp;
1466
1467 ip = VTOI(vp);
1468 ufhp = (struct ufid *)fhp;
1469 ufhp->ufid_len = sizeof(struct ufid);
1470 ufhp->ufid_ino = ip->i_number;
1471 ufhp->ufid_gen = ip->i_ffs_gen;
1472 return (0);
1473 }
1474
1475 int
1476 lfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct proc *p)
1477 {
1478 extern int lfs_writeindir, lfs_dostats, lfs_clean_vnhead;
1479 extern struct lfs_stats lfs_stats;
1480 int error;
1481
1482 /* all sysctl names at this level are terminal */
1483 if (namelen != 1)
1484 return (ENOTDIR);
1485
1486 switch (name[0]) {
1487 case LFS_WRITEINDIR:
1488 return (sysctl_int(oldp, oldlenp, newp, newlen,
1489 &lfs_writeindir));
1490 case LFS_CLEAN_VNHEAD:
1491 return (sysctl_int(oldp, oldlenp, newp, newlen,
1492 &lfs_clean_vnhead));
1493 case LFS_DOSTATS:
1494 if ((error = sysctl_int(oldp, oldlenp, newp, newlen,
1495 &lfs_dostats)))
1496 return error;
1497 if (lfs_dostats == 0)
1498 memset(&lfs_stats,0,sizeof(lfs_stats));
1499 return 0;
1500 case LFS_STATS:
1501 return (sysctl_rdstruct(oldp, oldlenp, newp,
1502 &lfs_stats, sizeof(lfs_stats)));
1503 default:
1504 return (EOPNOTSUPP);
1505 }
1506 /* NOTREACHED */
1507 }
1508