lfs_syscalls.c revision 1.8 1 /* $NetBSD: lfs_syscalls.c,v 1.8 1995/03/21 13:34:08 mycroft Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)lfs_syscalls.c 8.6 (Berkeley) 6/16/94
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/buf.h>
42 #include <sys/mount.h>
43 #include <sys/vnode.h>
44 #include <sys/malloc.h>
45 #include <sys/kernel.h>
46
47 #include <sys/syscallargs.h>
48
49 #include <ufs/ufs/quota.h>
50 #include <ufs/ufs/inode.h>
51 #include <ufs/ufs/ufsmount.h>
52 #include <ufs/ufs/ufs_extern.h>
53
54 #include <ufs/lfs/lfs.h>
55 #include <ufs/lfs/lfs_extern.h>
56 #define BUMP_FIP(SP) \
57 (SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
58
59 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
60 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
61
62 /*
63 * Before committing to add something to a segment summary, make sure there
64 * is enough room. S is the bytes added to the summary.
65 */
66 #define CHECK_SEG(s) \
67 if (sp->sum_bytes_left < (s)) { \
68 (void) lfs_writeseg(fs, sp); \
69 }
70 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
71
72 /*
73 * lfs_markv:
74 *
75 * This will mark inodes and blocks dirty, so they are written into the log.
76 * It will block until all the blocks have been written. The segment create
77 * time passed in the block_info and inode_info structures is used to decide
78 * if the data is valid for each block (in case some process dirtied a block
79 * or inode that is being cleaned between the determination that a block is
80 * live and the lfs_markv call).
81 *
82 * 0 on success
83 * -1/errno is return on error.
84 */
85 int
86 lfs_markv(p, uap, retval)
87 struct proc *p;
88 struct lfs_markv_args /* {
89 syscallarg(fsid_t *) fsidp;
90 syscallarg(struct block_info *) blkiov;
91 syscallarg(int) blkcnt;
92 } */ *uap;
93 register_t *retval;
94 {
95 struct segment *sp;
96 BLOCK_INFO *blkp;
97 IFILE *ifp;
98 struct buf *bp, **bpp;
99 struct inode *ip;
100 struct lfs *fs;
101 struct mount *mntp;
102 struct vnode *vp;
103 fsid_t fsid;
104 void *start;
105 ino_t lastino;
106 daddr_t b_daddr, v_daddr;
107 u_long bsize;
108 int cnt, error;
109
110 if (error = suser(p->p_ucred, &p->p_acflag))
111 return (error);
112
113 if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
114 return (error);
115 if ((mntp = getvfs(&fsid)) == NULL)
116 return (EINVAL);
117
118 cnt = SCARG(uap, blkcnt);
119 start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
120 if (error = copyin(SCARG(uap, blkiov), start, cnt * sizeof(BLOCK_INFO)))
121 goto err1;
122
123 /* Mark blocks/inodes dirty. */
124 fs = VFSTOUFS(mntp)->um_lfs;
125 bsize = fs->lfs_bsize;
126 error = 0;
127
128 lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
129 sp = fs->lfs_sp;
130 for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
131 blkp = start; cnt--; ++blkp) {
132 /*
133 * Get the IFILE entry (only once) and see if the file still
134 * exists.
135 */
136 if (lastino != blkp->bi_inode) {
137 if (lastino != LFS_UNUSED_INUM) {
138 /* Finish up last file */
139 if (sp->fip->fi_nblocks == 0) {
140 DEC_FINFO(sp);
141 sp->sum_bytes_left +=
142 sizeof(FINFO) - sizeof(daddr_t);
143 } else {
144 lfs_updatemeta(sp);
145 BUMP_FIP(sp);
146 }
147
148 lfs_writeinode(fs, sp, ip);
149 lfs_vunref(vp);
150 }
151
152 /* Start a new file */
153 CHECK_SEG(sizeof(FINFO));
154 sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
155 INC_FINFO(sp);
156 sp->start_lbp = &sp->fip->fi_blocks[0];
157 sp->vp = NULL;
158 sp->fip->fi_version = blkp->bi_version;
159 sp->fip->fi_nblocks = 0;
160 sp->fip->fi_ino = blkp->bi_inode;
161 lastino = blkp->bi_inode;
162 if (blkp->bi_inode == LFS_IFILE_INUM)
163 v_daddr = fs->lfs_idaddr;
164 else {
165 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
166 v_daddr = ifp->if_daddr;
167 brelse(bp);
168 }
169 if (v_daddr == LFS_UNUSED_DADDR)
170 continue;
171
172 /* Get the vnode/inode. */
173 if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
174 blkp->bi_lbn == LFS_UNUSED_LBN ?
175 blkp->bi_bp : NULL)) {
176 #ifdef DIAGNOSTIC
177 printf("lfs_markv: VFS_VGET failed (%d)\n",
178 blkp->bi_inode);
179 #endif
180 lastino = LFS_UNUSED_INUM;
181 v_daddr = LFS_UNUSED_DADDR;
182 continue;
183 }
184 sp->vp = vp;
185 ip = VTOI(vp);
186 } else if (v_daddr == LFS_UNUSED_DADDR)
187 continue;
188
189 /* If this BLOCK_INFO didn't contain a block, keep going. */
190 if (blkp->bi_lbn == LFS_UNUSED_LBN)
191 continue;
192 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
193 b_daddr != blkp->bi_daddr)
194 continue;
195 /*
196 * If we got to here, then we are keeping the block. If it
197 * is an indirect block, we want to actually put it in the
198 * buffer cache so that it can be updated in the finish_meta
199 * section. If it's not, we need to allocate a fake buffer
200 * so that writeseg can perform the copyin and write the buffer.
201 */
202 if (blkp->bi_lbn >= 0) /* Data Block */
203 bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
204 blkp->bi_bp);
205 else {
206 bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
207 if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
208 (error = copyin(blkp->bi_bp, bp->b_data,
209 bsize)))
210 goto err2;
211 if (error = VOP_BWRITE(bp))
212 goto err2;
213 }
214 while (lfs_gatherblock(sp, bp, NULL));
215 }
216 if (sp->vp) {
217 if (sp->fip->fi_nblocks == 0) {
218 DEC_FINFO(sp);
219 sp->sum_bytes_left +=
220 sizeof(FINFO) - sizeof(daddr_t);
221 } else
222 lfs_updatemeta(sp);
223
224 lfs_writeinode(fs, sp, ip);
225 lfs_vunref(vp);
226 }
227 (void) lfs_writeseg(fs, sp);
228 lfs_segunlock(fs);
229 free(start, M_SEGMENT);
230 return (error);
231
232 /*
233 * XXX
234 * If we come in to error 2, we might have indirect blocks that were
235 * updated and now have bad block pointers. I don't know what to do
236 * about this.
237 */
238
239 err2: lfs_vunref(vp);
240 /* Free up fakebuffers */
241 for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
242 if ((*bpp)->b_flags & B_CALL) {
243 brelvp(*bpp);
244 free(*bpp, M_SEGMENT);
245 } else
246 brelse(*bpp);
247 lfs_segunlock(fs);
248 err1:
249 free(start, M_SEGMENT);
250 return (error);
251 }
252
253 /*
254 * lfs_bmapv:
255 *
256 * This will fill in the current disk address for arrays of blocks.
257 *
258 * 0 on success
259 * -1/errno is return on error.
260 */
261 int
262 lfs_bmapv(p, uap, retval)
263 struct proc *p;
264 struct lfs_bmapv_args /* {
265 syscallarg(fsid_t *) fsidp;
266 syscallarg(struct block_info *) blkiov;
267 syscallarg(int) blkcnt;
268 } */ *uap;
269 register_t *retval;
270 {
271 BLOCK_INFO *blkp;
272 struct mount *mntp;
273 struct vnode *vp;
274 fsid_t fsid;
275 void *start;
276 daddr_t daddr;
277 int cnt, error, step;
278
279 if (error = suser(p->p_ucred, &p->p_acflag))
280 return (error);
281
282 if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
283 return (error);
284 if ((mntp = getvfs(&fsid)) == NULL)
285 return (EINVAL);
286
287 cnt = SCARG(uap, blkcnt);
288 start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
289 if (error = copyin(SCARG(uap, blkiov), blkp,
290 cnt * sizeof(BLOCK_INFO))) {
291 free(blkp, M_SEGMENT);
292 return (error);
293 }
294
295 for (step = cnt; step--; ++blkp) {
296 if (blkp->bi_lbn == LFS_UNUSED_LBN)
297 continue;
298 /* Could be a deadlock ? */
299 if (VFS_VGET(mntp, blkp->bi_inode, &vp))
300 daddr = LFS_UNUSED_DADDR;
301 else {
302 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
303 daddr = LFS_UNUSED_DADDR;
304 vput(vp);
305 }
306 blkp->bi_daddr = daddr;
307 }
308 copyout(start, SCARG(uap, blkiov), cnt * sizeof(BLOCK_INFO));
309 free(start, M_SEGMENT);
310 return (0);
311 }
312
313 /*
314 * lfs_segclean:
315 *
316 * Mark the segment clean.
317 *
318 * 0 on success
319 * -1/errno is return on error.
320 */
321 int
322 lfs_segclean(p, uap, retval)
323 struct proc *p;
324 struct lfs_segclean_args /* {
325 syscallarg(fsid_t *) fsidp;
326 syscallarg(u_long) segment;
327 } */ *uap;
328 register_t *retval;
329 {
330 CLEANERINFO *cip;
331 SEGUSE *sup;
332 struct buf *bp;
333 struct mount *mntp;
334 struct lfs *fs;
335 fsid_t fsid;
336 int error;
337
338 if (error = suser(p->p_ucred, &p->p_acflag))
339 return (error);
340
341 if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
342 return (error);
343 if ((mntp = getvfs(&fsid)) == NULL)
344 return (EINVAL);
345
346 fs = VFSTOUFS(mntp)->um_lfs;
347
348 if (datosn(fs, fs->lfs_curseg) == SCARG(uap, segment))
349 return (EBUSY);
350
351 LFS_SEGENTRY(sup, fs, SCARG(uap, segment), bp);
352 if (sup->su_flags & SEGUSE_ACTIVE) {
353 brelse(bp);
354 return (EBUSY);
355 }
356 fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
357 fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
358 sup->su_ninos * btodb(fs->lfs_bsize);
359 sup->su_flags &= ~SEGUSE_DIRTY;
360 (void) VOP_BWRITE(bp);
361
362 LFS_CLEANERINFO(cip, fs, bp);
363 ++cip->clean;
364 --cip->dirty;
365 (void) VOP_BWRITE(bp);
366 wakeup(&fs->lfs_avail);
367 return (0);
368 }
369
370 /*
371 * lfs_segwait:
372 *
373 * This will block until a segment in file system fsid is written. A timeout
374 * in milliseconds may be specified which will awake the cleaner automatically.
375 * An fsid of -1 means any file system, and a timeout of 0 means forever.
376 *
377 * 0 on success
378 * 1 on timeout
379 * -1/errno is return on error.
380 */
381 int
382 lfs_segwait(p, uap, retval)
383 struct proc *p;
384 struct lfs_segwait_args /* {
385 syscallarg(fsid_t *) fsidp;
386 syscallarg(struct timeval *) tv;
387 } */ *uap;
388 register_t *retval;
389 {
390 extern int lfs_allclean_wakeup;
391 struct mount *mntp;
392 struct timeval atv;
393 fsid_t fsid;
394 void *addr;
395 u_long timeout;
396 int error, s;
397
398 if (error = suser(p->p_ucred, &p->p_acflag)) {
399 return (error);
400 }
401 #ifdef WHEN_QUADS_WORK
402 if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
403 return (error);
404 if (fsid == (fsid_t)-1)
405 addr = &lfs_allclean_wakeup;
406 else {
407 if ((mntp = getvfs(&fsid)) == NULL)
408 return (EINVAL);
409 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
410 }
411 #else
412 if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
413 return (error);
414 if ((mntp = getvfs(&fsid)) == NULL)
415 addr = &lfs_allclean_wakeup;
416 else
417 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
418 #endif
419
420 if (SCARG(uap, tv)) {
421 if (error =
422 copyin(SCARG(uap, tv), &atv, sizeof(struct timeval)))
423 return (error);
424 if (itimerfix(&atv))
425 return (EINVAL);
426 s = splclock();
427 timeradd(&atv, &time, &atv);
428 timeout = hzto(&atv);
429 splx(s);
430 } else
431 timeout = 0;
432
433 error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
434 return (error == ERESTART ? EINTR : 0);
435 }
436
437 /*
438 * VFS_VGET call specialized for the cleaner. The cleaner already knows the
439 * daddr from the ifile, so don't look it up again. If the cleaner is
440 * processing IINFO structures, it may have the ondisk inode already, so
441 * don't go retrieving it again.
442 */
443 int
444 lfs_fastvget(mp, ino, daddr, vpp, dinp)
445 struct mount *mp;
446 ino_t ino;
447 daddr_t daddr;
448 struct vnode **vpp;
449 struct dinode *dinp;
450 {
451 register struct inode *ip;
452 struct vnode *vp;
453 struct ufsmount *ump;
454 struct buf *bp;
455 dev_t dev;
456 int error;
457
458 ump = VFSTOUFS(mp);
459 dev = ump->um_dev;
460 /*
461 * This is playing fast and loose. Someone may have the inode
462 * locked, in which case they are going to be distinctly unhappy
463 * if we trash something.
464 */
465 if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
466 lfs_vref(*vpp);
467 if ((*vpp)->v_flag & VXLOCK)
468 printf ("Cleaned vnode VXLOCKED\n");
469 ip = VTOI(*vpp);
470 if (ip->i_flag & IN_LOCKED)
471 printf("cleaned vnode locked\n");
472 if (!(ip->i_flag & IN_MODIFIED)) {
473 ++ump->um_lfs->lfs_uinodes;
474 ip->i_flag |= IN_MODIFIED;
475 }
476 ip->i_flag |= IN_MODIFIED;
477 return (0);
478 }
479
480 /* Allocate new vnode/inode. */
481 if (error = lfs_vcreate(mp, ino, &vp)) {
482 *vpp = NULL;
483 return (error);
484 }
485
486 /*
487 * Put it onto its hash chain and lock it so that other requests for
488 * this inode will block if they arrive while we are sleeping waiting
489 * for old data structures to be purged or for the contents of the
490 * disk portion of this inode to be read.
491 */
492 ip = VTOI(vp);
493 ufs_ihashins(ip);
494
495 /*
496 * XXX
497 * This may not need to be here, logically it should go down with
498 * the i_devvp initialization.
499 * Ask Kirk.
500 */
501 ip->i_lfs = ump->um_lfs;
502
503 /* Read in the disk contents for the inode, copy into the inode. */
504 if (dinp)
505 if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
506 return (error);
507 else {
508 if (error = bread(ump->um_devvp, daddr,
509 (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
510 /*
511 * The inode does not contain anything useful, so it
512 * would be misleading to leave it on its hash chain.
513 * Iput() will return it to the free list.
514 */
515 ufs_ihashrem(ip);
516
517 /* Unlock and discard unneeded inode. */
518 lfs_vunref(vp);
519 brelse(bp);
520 *vpp = NULL;
521 return (error);
522 }
523 ip->i_din =
524 *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
525 brelse(bp);
526 }
527
528 /* Inode was just read from user space or disk, make sure it's locked */
529 ip->i_flag |= IN_LOCKED;
530
531 /*
532 * Initialize the vnode from the inode, check for aliases. In all
533 * cases re-init ip, the underlying vnode/inode may have changed.
534 */
535 if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
536 lfs_vunref(vp);
537 *vpp = NULL;
538 return (error);
539 }
540 /*
541 * Finish inode initialization now that aliasing has been resolved.
542 */
543 ip->i_devvp = ump->um_devvp;
544 ip->i_flag |= IN_MODIFIED;
545 ++ump->um_lfs->lfs_uinodes;
546 VREF(ip->i_devvp);
547 *vpp = vp;
548 return (0);
549 }
550 struct buf *
551 lfs_fakebuf(vp, lbn, size, uaddr)
552 struct vnode *vp;
553 int lbn;
554 size_t size;
555 caddr_t uaddr;
556 {
557 struct buf *bp;
558
559 bp = lfs_newbuf(vp, lbn, 0);
560 bp->b_saveaddr = uaddr;
561 bp->b_bufsize = size;
562 bp->b_bcount = size;
563 bp->b_flags |= B_INVAL;
564 return (bp);
565 }
566