lfs_syscalls.c revision 1.15 1 /* $NetBSD: lfs_syscalls.c,v 1.15 1998/02/19 00:54:39 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)lfs_syscalls.c 8.6 (Berkeley) 6/16/94
36 */
37
38 #include "fs_lfs.h" /* for prototypes in syscallargs.h */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/buf.h>
44 #include <sys/mount.h>
45 #include <sys/vnode.h>
46 #include <sys/malloc.h>
47 #include <sys/kernel.h>
48
49 #include <sys/syscallargs.h>
50
51 #include <ufs/ufs/quota.h>
52 #include <ufs/ufs/inode.h>
53 #include <ufs/ufs/ufsmount.h>
54 #include <ufs/ufs/ufs_extern.h>
55
56 #include <ufs/lfs/lfs.h>
57 #include <ufs/lfs/lfs_extern.h>
58
59 #define BUMP_FIP(SP) \
60 (SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
61
62 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
63 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
64
65 /*
66 * Before committing to add something to a segment summary, make sure there
67 * is enough room. S is the bytes added to the summary.
68 */
69 #define CHECK_SEG(s) \
70 if (sp->sum_bytes_left < (s)) { \
71 (void) lfs_writeseg(fs, sp); \
72 }
73 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
74
75 /*
76 * lfs_markv:
77 *
78 * This will mark inodes and blocks dirty, so they are written into the log.
79 * It will block until all the blocks have been written. The segment create
80 * time passed in the block_info and inode_info structures is used to decide
81 * if the data is valid for each block (in case some process dirtied a block
82 * or inode that is being cleaned between the determination that a block is
83 * live and the lfs_markv call).
84 *
85 * 0 on success
86 * -1/errno is return on error.
87 */
88 int
89 lfs_markv(p, v, retval)
90 struct proc *p;
91 void *v;
92 register_t *retval;
93 {
94 struct lfs_markv_args /* {
95 syscallarg(fsid_t *) fsidp;
96 syscallarg(struct block_info *) blkiov;
97 syscallarg(int) blkcnt;
98 } */ *uap = v;
99 struct segment *sp;
100 BLOCK_INFO *blkp;
101 IFILE *ifp;
102 struct buf *bp, **bpp;
103 struct inode *ip = NULL;
104 struct lfs *fs;
105 struct mount *mntp;
106 struct vnode *vp;
107 fsid_t fsid;
108 void *start;
109 ino_t lastino;
110 daddr_t b_daddr, v_daddr;
111 u_long bsize;
112 int cnt, error;
113
114 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
115 return (error);
116
117 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
118 return (error);
119 if ((mntp = getvfs(&fsid)) == NULL)
120 return (EINVAL);
121
122 cnt = SCARG(uap, blkcnt);
123 start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
124 error = copyin(SCARG(uap, blkiov), start, cnt * sizeof(BLOCK_INFO));
125 if (error)
126 goto err1;
127
128 /* Mark blocks/inodes dirty. */
129 fs = VFSTOUFS(mntp)->um_lfs;
130 bsize = fs->lfs_bsize;
131 error = 0;
132
133 lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
134 sp = fs->lfs_sp;
135 for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
136 blkp = start; cnt--; ++blkp) {
137 /*
138 * Get the IFILE entry (only once) and see if the file still
139 * exists.
140 */
141 if (lastino != blkp->bi_inode) {
142 if (lastino != LFS_UNUSED_INUM) {
143 /* Finish up last file */
144 if (sp->fip->fi_nblocks == 0) {
145 DEC_FINFO(sp);
146 sp->sum_bytes_left +=
147 sizeof(FINFO) - sizeof(daddr_t);
148 } else {
149 lfs_updatemeta(sp);
150 BUMP_FIP(sp);
151 }
152
153 lfs_writeinode(fs, sp, ip);
154 lfs_vunref(vp);
155 }
156
157 /* Start a new file */
158 CHECK_SEG(sizeof(FINFO));
159 sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
160 INC_FINFO(sp);
161 sp->start_lbp = &sp->fip->fi_blocks[0];
162 sp->vp = NULL;
163 sp->fip->fi_version = blkp->bi_version;
164 sp->fip->fi_nblocks = 0;
165 sp->fip->fi_ino = blkp->bi_inode;
166 lastino = blkp->bi_inode;
167 if (blkp->bi_inode == LFS_IFILE_INUM)
168 v_daddr = fs->lfs_idaddr;
169 else {
170 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
171 v_daddr = ifp->if_daddr;
172 brelse(bp);
173 }
174 if (v_daddr == LFS_UNUSED_DADDR)
175 continue;
176
177 /* Get the vnode/inode. */
178 if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
179 blkp->bi_lbn == LFS_UNUSED_LBN ?
180 blkp->bi_bp : NULL)) {
181 #ifdef DIAGNOSTIC
182 printf("lfs_markv: VFS_VGET failed (%d)\n",
183 blkp->bi_inode);
184 #endif
185 lastino = LFS_UNUSED_INUM;
186 v_daddr = LFS_UNUSED_DADDR;
187 continue;
188 }
189 sp->vp = vp;
190 ip = VTOI(vp);
191 } else if (v_daddr == LFS_UNUSED_DADDR)
192 continue;
193
194 /* If this BLOCK_INFO didn't contain a block, keep going. */
195 if (blkp->bi_lbn == LFS_UNUSED_LBN)
196 continue;
197 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
198 b_daddr != blkp->bi_daddr)
199 continue;
200 /*
201 * If we got to here, then we are keeping the block. If it
202 * is an indirect block, we want to actually put it in the
203 * buffer cache so that it can be updated in the finish_meta
204 * section. If it's not, we need to allocate a fake buffer
205 * so that writeseg can perform the copyin and write the buffer.
206 */
207 if (blkp->bi_lbn >= 0) /* Data Block */
208 bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
209 blkp->bi_bp);
210 else {
211 bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
212 if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
213 (error = copyin(blkp->bi_bp, bp->b_data,
214 bsize)))
215 goto err2;
216 if ((error = VOP_BWRITE(bp)) != 0)
217 goto err2;
218 }
219 while (lfs_gatherblock(sp, bp, NULL));
220 }
221 if (sp->vp) {
222 if (sp->fip->fi_nblocks == 0) {
223 DEC_FINFO(sp);
224 sp->sum_bytes_left +=
225 sizeof(FINFO) - sizeof(daddr_t);
226 } else
227 lfs_updatemeta(sp);
228
229 lfs_writeinode(fs, sp, ip);
230 lfs_vunref(vp);
231 }
232 (void) lfs_writeseg(fs, sp);
233 lfs_segunlock(fs);
234 free(start, M_SEGMENT);
235 return (error);
236
237 /*
238 * XXX
239 * If we come in to error 2, we might have indirect blocks that were
240 * updated and now have bad block pointers. I don't know what to do
241 * about this.
242 */
243
244 err2: lfs_vunref(vp);
245 /* Free up fakebuffers */
246 for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
247 if ((*bpp)->b_flags & B_CALL) {
248 brelvp(*bpp);
249 free(*bpp, M_SEGMENT);
250 } else
251 brelse(*bpp);
252 lfs_segunlock(fs);
253 err1:
254 free(start, M_SEGMENT);
255 return (error);
256 }
257
258 /*
259 * lfs_bmapv:
260 *
261 * This will fill in the current disk address for arrays of blocks.
262 *
263 * 0 on success
264 * -1/errno is return on error.
265 */
266 int
267 lfs_bmapv(p, v, retval)
268 struct proc *p;
269 void *v;
270 register_t *retval;
271 {
272 struct lfs_bmapv_args /* {
273 syscallarg(fsid_t *) fsidp;
274 syscallarg(struct block_info *) blkiov;
275 syscallarg(int) blkcnt;
276 } */ *uap = v;
277 BLOCK_INFO *blkp;
278 struct mount *mntp;
279 struct vnode *vp;
280 fsid_t fsid;
281 void *start;
282 daddr_t daddr;
283 int cnt, error, step;
284
285 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
286 return (error);
287
288 error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t));
289 if (error)
290 return (error);
291 if ((mntp = getvfs(&fsid)) == NULL)
292 return (EINVAL);
293
294 cnt = SCARG(uap, blkcnt);
295 start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
296 error = copyin(SCARG(uap, blkiov), blkp, cnt * sizeof(BLOCK_INFO));
297 if (error) {
298 free(blkp, M_SEGMENT);
299 return (error);
300 }
301
302 for (step = cnt; step--; ++blkp) {
303 if (blkp->bi_lbn == LFS_UNUSED_LBN)
304 continue;
305 /* Could be a deadlock ? */
306 if (VFS_VGET(mntp, blkp->bi_inode, &vp))
307 daddr = LFS_UNUSED_DADDR;
308 else {
309 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
310 daddr = LFS_UNUSED_DADDR;
311 vput(vp);
312 }
313 blkp->bi_daddr = daddr;
314 }
315 copyout(start, SCARG(uap, blkiov), cnt * sizeof(BLOCK_INFO));
316 free(start, M_SEGMENT);
317 return (0);
318 }
319
320 /*
321 * lfs_segclean:
322 *
323 * Mark the segment clean.
324 *
325 * 0 on success
326 * -1/errno is return on error.
327 */
328 int
329 lfs_segclean(p, v, retval)
330 struct proc *p;
331 void *v;
332 register_t *retval;
333 {
334 struct lfs_segclean_args /* {
335 syscallarg(fsid_t *) fsidp;
336 syscallarg(u_long) segment;
337 } */ *uap = v;
338 CLEANERINFO *cip;
339 SEGUSE *sup;
340 struct buf *bp;
341 struct mount *mntp;
342 struct lfs *fs;
343 fsid_t fsid;
344 int error;
345
346 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
347 return (error);
348
349 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
350 return (error);
351 if ((mntp = getvfs(&fsid)) == NULL)
352 return (EINVAL);
353
354 fs = VFSTOUFS(mntp)->um_lfs;
355
356 if (datosn(fs, fs->lfs_curseg) == SCARG(uap, segment))
357 return (EBUSY);
358
359 LFS_SEGENTRY(sup, fs, SCARG(uap, segment), bp);
360 if (sup->su_flags & SEGUSE_ACTIVE) {
361 brelse(bp);
362 return (EBUSY);
363 }
364 fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
365 fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
366 sup->su_ninos * btodb(fs->lfs_bsize);
367 sup->su_flags &= ~SEGUSE_DIRTY;
368 (void) VOP_BWRITE(bp);
369
370 LFS_CLEANERINFO(cip, fs, bp);
371 ++cip->clean;
372 --cip->dirty;
373 (void) VOP_BWRITE(bp);
374 wakeup(&fs->lfs_avail);
375 return (0);
376 }
377
378 /*
379 * lfs_segwait:
380 *
381 * This will block until a segment in file system fsid is written. A timeout
382 * in milliseconds may be specified which will awake the cleaner automatically.
383 * An fsid of -1 means any file system, and a timeout of 0 means forever.
384 *
385 * 0 on success
386 * 1 on timeout
387 * -1/errno is return on error.
388 */
389 int
390 lfs_segwait(p, v, retval)
391 struct proc *p;
392 void *v;
393 register_t *retval;
394 {
395 struct lfs_segwait_args /* {
396 syscallarg(fsid_t *) fsidp;
397 syscallarg(struct timeval *) tv;
398 } */ *uap = v;
399 extern int lfs_allclean_wakeup;
400 struct mount *mntp;
401 struct timeval atv;
402 fsid_t fsid;
403 void *addr;
404 u_long timeout;
405 int error, s;
406
407 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) {
408 return (error);
409 }
410 #ifdef WHEN_QUADS_WORK
411 if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
412 return (error);
413 if (fsid == (fsid_t)-1)
414 addr = &lfs_allclean_wakeup;
415 else {
416 if ((mntp = getvfs(&fsid)) == NULL)
417 return (EINVAL);
418 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
419 }
420 #else
421 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
422 return (error);
423 if ((mntp = getvfs(&fsid)) == NULL)
424 addr = &lfs_allclean_wakeup;
425 else
426 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
427 #endif
428
429 if (SCARG(uap, tv)) {
430 error = copyin(SCARG(uap, tv), &atv, sizeof(struct timeval));
431 if (error)
432 return (error);
433 if (itimerfix(&atv))
434 return (EINVAL);
435 s = splclock();
436 timeradd(&atv, &time, &atv);
437 timeout = hzto(&atv);
438 splx(s);
439 } else
440 timeout = 0;
441
442 error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
443 return (error == ERESTART ? EINTR : 0);
444 }
445
446 /*
447 * VFS_VGET call specialized for the cleaner. The cleaner already knows the
448 * daddr from the ifile, so don't look it up again. If the cleaner is
449 * processing IINFO structures, it may have the ondisk inode already, so
450 * don't go retrieving it again.
451 */
452 int
453 lfs_fastvget(mp, ino, daddr, vpp, dinp)
454 struct mount *mp;
455 ino_t ino;
456 daddr_t daddr;
457 struct vnode **vpp;
458 struct dinode *dinp;
459 {
460 register struct inode *ip;
461 struct vnode *vp;
462 struct ufsmount *ump;
463 struct buf *bp;
464 dev_t dev;
465 int error;
466
467 ump = VFSTOUFS(mp);
468 dev = ump->um_dev;
469 /*
470 * This is playing fast and loose. Someone may have the inode
471 * locked, in which case they are going to be distinctly unhappy
472 * if we trash something.
473 */
474 if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
475 lfs_vref(*vpp);
476 if ((*vpp)->v_flag & VXLOCK)
477 printf ("Cleaned vnode VXLOCKED\n");
478 ip = VTOI(*vpp);
479 if (ip->i_flag & IN_LOCKED)
480 printf("cleaned vnode locked\n");
481 if (!(ip->i_flag & IN_MODIFIED)) {
482 ++ump->um_lfs->lfs_uinodes;
483 ip->i_flag |= IN_MODIFIED;
484 }
485 ip->i_flag |= IN_MODIFIED;
486 return (0);
487 }
488
489 /* Allocate new vnode/inode. */
490 if ((error = lfs_vcreate(mp, ino, &vp)) != 0) {
491 *vpp = NULL;
492 return (error);
493 }
494
495 /*
496 * Put it onto its hash chain and lock it so that other requests for
497 * this inode will block if they arrive while we are sleeping waiting
498 * for old data structures to be purged or for the contents of the
499 * disk portion of this inode to be read.
500 */
501 ip = VTOI(vp);
502 ufs_ihashins(ip);
503
504 /*
505 * XXX
506 * This may not need to be here, logically it should go down with
507 * the i_devvp initialization.
508 * Ask Kirk.
509 */
510 ip->i_lfs = ump->um_lfs;
511
512 /* Read in the disk contents for the inode, copy into the inode. */
513 if (dinp) {
514 error = copyin(dinp, &ip->i_din.ffs_din, sizeof(struct dinode));
515 if (error)
516 return (error);
517 }
518 else {
519 error = bread(ump->um_devvp, daddr,
520 (int)ump->um_lfs->lfs_bsize, NOCRED, &bp);
521 if (error) {
522 /*
523 * The inode does not contain anything useful, so it
524 * would be misleading to leave it on its hash chain.
525 * Iput() will return it to the free list.
526 */
527 ufs_ihashrem(ip);
528
529 /* Unlock and discard unneeded inode. */
530 lfs_vunref(vp);
531 brelse(bp);
532 *vpp = NULL;
533 return (error);
534 }
535 ip->i_din.ffs_din =
536 *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
537 brelse(bp);
538 }
539
540 /* Inode was just read from user space or disk, make sure it's locked */
541 ip->i_flag |= IN_LOCKED;
542
543 /*
544 * Initialize the vnode from the inode, check for aliases. In all
545 * cases re-init ip, the underlying vnode/inode may have changed.
546 */
547 error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp);
548 if (error) {
549 lfs_vunref(vp);
550 *vpp = NULL;
551 return (error);
552 }
553 /*
554 * Finish inode initialization now that aliasing has been resolved.
555 */
556 ip->i_devvp = ump->um_devvp;
557 ip->i_flag |= IN_MODIFIED;
558 ++ump->um_lfs->lfs_uinodes;
559 VREF(ip->i_devvp);
560 *vpp = vp;
561 return (0);
562 }
563 struct buf *
564 lfs_fakebuf(vp, lbn, size, uaddr)
565 struct vnode *vp;
566 int lbn;
567 size_t size;
568 caddr_t uaddr;
569 {
570 struct buf *bp;
571
572 bp = lfs_newbuf(vp, lbn, 0);
573 bp->b_saveaddr = uaddr;
574 bp->b_bufsize = size;
575 bp->b_bcount = size;
576 bp->b_flags |= B_INVAL;
577 return (bp);
578 }
579