lfs_syscalls.c revision 1.56 1 /* $NetBSD: lfs_syscalls.c,v 1.56 2000/12/03 07:34:49 perseant Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38 /*-
39 * Copyright (c) 1991, 1993, 1994
40 * The Regents of the University of California. All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 * must display the following acknowledgement:
52 * This product includes software developed by the University of
53 * California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)lfs_syscalls.c 8.10 (Berkeley) 5/14/95
71 */
72
73 #define LFS /* for prototypes in syscallargs.h */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/buf.h>
79 #include <sys/mount.h>
80 #include <sys/vnode.h>
81 #include <sys/malloc.h>
82 #include <sys/kernel.h>
83
84 #include <sys/syscallargs.h>
85
86 #include <ufs/ufs/quota.h>
87 #include <ufs/ufs/inode.h>
88 #include <ufs/ufs/ufsmount.h>
89 #include <ufs/ufs/ufs_extern.h>
90
91 #include <ufs/lfs/lfs.h>
92 #include <ufs/lfs/lfs_extern.h>
93
94 /* Flags for return from lfs_fastvget */
95 #define FVG_UNLOCK 0x01 /* Needs to be unlocked */
96 #define FVG_PUT 0x02 /* Needs to be vput() */
97
98 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
99 int lfs_fasthashget __P((dev_t, ino_t, int *, struct vnode **));
100
101 int debug_cleaner = 0;
102 int clean_vnlocked = 0;
103 int clean_inlocked = 0;
104 int verbose_debug = 0;
105
106 pid_t lfs_cleaner_pid = 0;
107
108 /*
109 * Definitions for the buffer free lists.
110 */
111 #define BQUEUES 4 /* number of free buffer queues */
112
113 #define BQ_LOCKED 0 /* super-blocks &c */
114 #define BQ_LRU 1 /* lru, useful buffers */
115 #define BQ_AGE 2 /* rubbish */
116 #define BQ_EMPTY 3 /* buffer headers with no memory */
117
118 extern TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
119
120 #define LFS_FORCE_WRITE UNASSIGNED
121
122 #define LFS_VREF_THRESHOLD 128
123
124 /*
125 * sys_lfs_markv:
126 *
127 * This will mark inodes and blocks dirty, so they are written into the log.
128 * It will block until all the blocks have been written. The segment create
129 * time passed in the block_info and inode_info structures is used to decide
130 * if the data is valid for each block (in case some process dirtied a block
131 * or inode that is being cleaned between the determination that a block is
132 * live and the lfs_markv call).
133 *
134 * 0 on success
135 * -1/errno is return on error.
136 */
137 int
138 sys_lfs_markv(p, v, retval)
139 struct proc *p;
140 void *v;
141 register_t *retval;
142 {
143 struct sys_lfs_markv_args /* {
144 syscallarg(fsid_t *) fsidp;
145 syscallarg(struct block_info *) blkiov;
146 syscallarg(int) blkcnt;
147 } */ *uap = v;
148 BLOCK_INFO *blkp;
149 IFILE *ifp;
150 struct buf *bp, *nbp;
151 struct inode *ip = NULL;
152 struct lfs *fs;
153 struct mount *mntp;
154 struct vnode *vp;
155 #ifdef DEBUG_LFS
156 int vputc=0, iwritten=0;
157 #endif
158 fsid_t fsid;
159 void *start;
160 ino_t lastino;
161 ufs_daddr_t b_daddr, v_daddr;
162 int origcnt, cnt, error, lfs_fastvget_unlock;
163 int do_again=0;
164 int s;
165 #ifdef CHECK_COPYIN
166 int i;
167 #endif /* CHECK_COPYIN */
168 #ifdef LFS_TRACK_IOS
169 int j;
170 #endif
171 int numlocked=0, numrefed=0;
172 ino_t maxino;
173
174 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
175 return (error);
176
177 if ((mntp = vfs_getvfs(&fsid)) == NULL)
178 return (ENOENT);
179
180 fs = VFSTOUFS(mntp)->um_lfs;
181
182 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
183 return (error);
184
185
186 maxino = (dbtofsb(fs, VTOI(fs->lfs_ivnode)->i_ffs_blocks) -
187 fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb;
188
189 origcnt = cnt = SCARG(uap, blkcnt);
190 start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
191 error = copyin(SCARG(uap, blkiov), start, cnt * sizeof(BLOCK_INFO));
192 if (error)
193 goto err1;
194
195 if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
196 return (error);
197
198 /*
199 * This seglock is just to prevent the fact that we might have to sleep
200 * from allowing the possibility that our blocks might become
201 * invalid.
202 *
203 * It is also important to note here that unless we specify SEGM_CKP,
204 * any Ifile blocks that we might be asked to clean will never get
205 * to the disk.
206 */
207 lfs_seglock(fs, SEGM_SYNC|SEGM_CLEAN|SEGM_CKP);
208
209 /* Mark blocks/inodes dirty. */
210 error = 0;
211
212 #ifdef DEBUG_LFS
213 /* Run through and count the inodes */
214 lastino = LFS_UNUSED_INUM;
215 for(blkp = start; cnt--; ++blkp) {
216 if(lastino != blkp->bi_inode) {
217 lastino = blkp->bi_inode;
218 vputc++;
219 }
220 }
221 cnt = origcnt;
222 printf("[%d/",vputc);
223 iwritten=0;
224 #endif /* DEBUG_LFS */
225 /* these were inside the initialization for the for loop */
226 v_daddr = LFS_UNUSED_DADDR;
227 lastino = LFS_UNUSED_INUM;
228 for (blkp = start; cnt--; ++blkp)
229 {
230 if(blkp->bi_daddr == LFS_FORCE_WRITE)
231 printf("lfs_markv: warning: force-writing ino %d lbn %d\n",
232 blkp->bi_inode, blkp->bi_lbn);
233 #ifdef LFS_TRACK_IOS
234 /*
235 * If there is I/O on this segment that is not yet complete,
236 * the cleaner probably does not have the right information.
237 * Send it packing.
238 */
239 for(j=0;j<LFS_THROTTLE;j++) {
240 if(fs->lfs_pending[j] != LFS_UNUSED_DADDR
241 && datosn(fs,fs->lfs_pending[j])==datosn(fs,blkp->bi_daddr)
242 && blkp->bi_daddr != LFS_FORCE_WRITE)
243 {
244 printf("lfs_markv: attempt to clean pending segment? (#%d)\n",
245 datosn(fs, fs->lfs_pending[j]));
246 /* free(start,M_SEGMENT); */
247 /* return (EBUSY); */
248 }
249 }
250 #endif /* LFS_TRACK_IOS */
251 /* Bounds-check incoming data, avoid panic for failed VGET */
252 if (blkp->bi_inode <= 0 || blkp->bi_inode >= maxino) {
253 error = EINVAL;
254 goto again;
255 }
256 /*
257 * Get the IFILE entry (only once) and see if the file still
258 * exists.
259 */
260 if (lastino != blkp->bi_inode) {
261 /*
262 * Finish the old file, if there was one. The presence
263 * of a usable vnode in vp is signaled by a valid v_daddr.
264 */
265 if(v_daddr != LFS_UNUSED_DADDR) {
266 #ifdef DEBUG_LFS
267 if(ip->i_flag & (IN_MODIFIED|IN_CLEANING))
268 iwritten++;
269 #endif
270 if(lfs_fastvget_unlock) {
271 VOP_UNLOCK(vp, 0);
272 numlocked--;
273 }
274 lfs_vunref(vp);
275 numrefed--;
276 }
277
278 /*
279 * Start a new file
280 */
281 lastino = blkp->bi_inode;
282 if (blkp->bi_inode == LFS_IFILE_INUM)
283 v_daddr = fs->lfs_idaddr;
284 else {
285 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
286 /* XXX fix for force write */
287 v_daddr = ifp->if_daddr;
288 brelse(bp);
289 }
290 /* Don't force-write the ifile */
291 if (blkp->bi_inode == LFS_IFILE_INUM
292 && blkp->bi_daddr == LFS_FORCE_WRITE)
293 {
294 continue;
295 }
296 if (v_daddr == LFS_UNUSED_DADDR
297 && blkp->bi_daddr != LFS_FORCE_WRITE)
298 {
299 continue;
300 }
301
302 /* Get the vnode/inode. */
303 error=lfs_fastvget(mntp, blkp->bi_inode, v_daddr,
304 &vp,
305 (blkp->bi_lbn==LFS_UNUSED_LBN
306 ? blkp->bi_bp
307 : NULL),
308 &lfs_fastvget_unlock);
309 if(lfs_fastvget_unlock)
310 numlocked++;
311
312 if(!error) {
313 numrefed++;
314 }
315 if(error) {
316 #ifdef DEBUG_LFS
317 printf("lfs_markv: lfs_fastvget failed with %d (ino %d, segment %d)\n",
318 error, blkp->bi_inode,
319 datosn(fs, blkp->bi_daddr));
320 #endif /* DEBUG_LFS */
321 /*
322 * If we got EAGAIN, that means that the
323 * Inode was locked. This is
324 * recoverable: just clean the rest of
325 * this segment, and let the cleaner try
326 * again with another. (When the
327 * cleaner runs again, this segment will
328 * sort high on the list, since it is
329 * now almost entirely empty.) But, we
330 * still set v_daddr = LFS_UNUSED_ADDR
331 * so as not to test this over and over
332 * again.
333 */
334 if(error == EAGAIN) {
335 error = 0;
336 do_again++;
337 }
338 #ifdef DIAGNOSTIC
339 else if(error != ENOENT)
340 panic("lfs_markv VFS_VGET FAILED");
341 #endif
342 /* lastino = LFS_UNUSED_INUM; */
343 v_daddr = LFS_UNUSED_DADDR;
344 vp = NULL;
345 ip = NULL;
346 continue;
347 }
348 ip = VTOI(vp);
349 } else if (v_daddr == LFS_UNUSED_DADDR) {
350 /*
351 * This can only happen if the vnode is dead (or
352 * in any case we can't get it...e.g., it is
353 * inlocked). Keep going.
354 */
355 continue;
356 }
357
358 /* Past this point we are guaranteed that vp, ip are valid. */
359
360 /* If this BLOCK_INFO didn't contain a block, keep going. */
361 if (blkp->bi_lbn == LFS_UNUSED_LBN) {
362 /* XXX need to make sure that the inode gets written in this case */
363 /* XXX but only write the inode if it's the right one */
364 if (blkp->bi_inode != LFS_IFILE_INUM) {
365 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
366 if(ifp->if_daddr == blkp->bi_daddr
367 || blkp->bi_daddr == LFS_FORCE_WRITE)
368 {
369 LFS_SET_UINO(ip, IN_CLEANING);
370 }
371 brelse(bp);
372 }
373 continue;
374 }
375
376 b_daddr = 0;
377 if(blkp->bi_daddr != LFS_FORCE_WRITE) {
378 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
379 b_daddr != blkp->bi_daddr)
380 {
381 if(datosn(fs,b_daddr)
382 == datosn(fs,blkp->bi_daddr))
383 {
384 printf("lfs_markv: wrong da same seg: %x vs %x\n",
385 blkp->bi_daddr, b_daddr);
386 }
387 continue;
388 }
389 }
390 /*
391 * If we got to here, then we are keeping the block. If
392 * it is an indirect block, we want to actually put it
393 * in the buffer cache so that it can be updated in the
394 * finish_meta section. If it's not, we need to
395 * allocate a fake buffer so that writeseg can perform
396 * the copyin and write the buffer.
397 */
398 /*
399 * XXX - if the block we are reading has been *extended* since
400 * it was written to disk, then we risk throwing away
401 * the extension in bread()/getblk(). Check the size
402 * here.
403 */
404 if(blkp->bi_size < fs->lfs_bsize) {
405 s = splbio();
406 bp = incore(vp, blkp->bi_lbn);
407 if(bp && bp->b_bcount > blkp->bi_size) {
408 printf("lfs_markv: %ld > %d (fixed)\n",
409 bp->b_bcount, blkp->bi_size);
410 blkp->bi_size = bp->b_bcount;
411 }
412 splx(s);
413 }
414 if (ip->i_number != LFS_IFILE_INUM && blkp->bi_lbn >= 0) {
415 /* Data Block */
416 bp = lfs_fakebuf(vp, blkp->bi_lbn,
417 blkp->bi_size, blkp->bi_bp);
418 /* Pretend we used bread() to get it */
419 bp->b_blkno = blkp->bi_daddr;
420 } else {
421 /* Indirect block */
422 bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0);
423 if (!(bp->b_flags & (B_DONE|B_DELWRI))) { /* B_CACHE */
424 /*
425 * The block in question was not found
426 * in the cache; i.e., the block that
427 * getblk() returned is empty. So, we
428 * can (and should) copy in the
429 * contents, because we've already
430 * determined that this was the right
431 * version of this block on disk.
432 *
433 * And, it can't have changed underneath
434 * us, because we have the segment lock.
435 */
436 error = copyin(blkp->bi_bp, bp->b_data, blkp->bi_size);
437 if(error)
438 goto err2;
439 }
440 }
441 if ((error = lfs_bwrite_ext(bp,BW_CLEAN)) != 0)
442 goto err2;
443 }
444
445 /*
446 * Finish the old file, if there was one
447 */
448 if(v_daddr != LFS_UNUSED_DADDR) {
449 #ifdef DEBUG_LFS
450 if(ip->i_flag & (IN_MODIFIED|IN_CLEANING))
451 iwritten++;
452 #endif
453 if(lfs_fastvget_unlock) {
454 VOP_UNLOCK(vp, 0);
455 numlocked--;
456 }
457 lfs_vunref(vp);
458 numrefed--;
459 }
460
461 /*
462 * The last write has to be SEGM_SYNC, because of calling semantics.
463 * It also has to be SEGM_CKP, because otherwise we could write
464 * over the newly cleaned data contained in a checkpoint, and then
465 * we'd be unhappy at recovery time.
466 */
467 lfs_segwrite(mntp, SEGM_SYNC|SEGM_CLEAN|SEGM_CKP);
468 free(start, M_SEGMENT);
469
470 lfs_segunlock(fs);
471
472 #ifdef DEBUG_LFS
473 printf("%d]",iwritten);
474 if(numlocked != 0 || numrefed != 0) {
475 panic("lfs_markv: numlocked=%d numrefed=%d", numlocked, numrefed);
476 }
477 #endif
478
479 vfs_unbusy(mntp);
480 if(error)
481 return (error);
482 else if(do_again)
483 return EAGAIN;
484
485 return 0;
486
487 err2:
488 printf("lfs_markv err2\n");
489 if(lfs_fastvget_unlock) {
490 VOP_UNLOCK(vp, 0);
491 --numlocked;
492 }
493 lfs_vunref(vp);
494 --numrefed;
495
496 /* Free up fakebuffers -- have to take these from the LOCKED list */
497 again:
498 s = splbio();
499 for(bp = bufqueues[BQ_LOCKED].tqh_first; bp; bp=nbp) {
500 nbp = bp->b_freelist.tqe_next;
501 if(bp->b_flags & B_CALL) {
502 if(bp->b_flags & B_BUSY) { /* not bloody likely */
503 bp->b_flags |= B_WANTED;
504 tsleep(bp, PRIBIO+1, "markv", 0);
505 splx(s);
506 goto again;
507 }
508 if(bp->b_flags & B_DELWRI)
509 fs->lfs_avail += btodb(bp->b_bcount);
510 bremfree(bp);
511 splx(s);
512 brelse(bp);
513 s = splbio();
514 }
515 }
516 splx(s);
517 free(start, M_SEGMENT);
518 lfs_segunlock(fs);
519 vfs_unbusy(mntp);
520 #ifdef DEBUG_LFS
521 if(numlocked != 0 || numrefed != 0) {
522 panic("lfs_markv: numlocked=%d numrefed=%d", numlocked, numrefed);
523 }
524 #endif
525
526 return (error);
527
528 err1:
529 printf("lfs_markv err1\n");
530 free(start, M_SEGMENT);
531 return (error);
532 }
533
534 /*
535 * sys_lfs_bmapv:
536 *
537 * This will fill in the current disk address for arrays of blocks.
538 *
539 * 0 on success
540 * -1/errno is return on error.
541 */
542
543 int
544 sys_lfs_bmapv(p, v, retval)
545 struct proc *p;
546 void *v;
547 register_t *retval;
548 {
549 struct sys_lfs_bmapv_args /* {
550 syscallarg(fsid_t *) fsidp;
551 syscallarg(struct block_info *) blkiov;
552 syscallarg(int) blkcnt;
553 } */ *uap = v;
554 BLOCK_INFO *blkp;
555 IFILE *ifp;
556 struct buf *bp;
557 struct inode *ip = NULL;
558 struct lfs *fs;
559 struct mount *mntp;
560 struct ufsmount *ump;
561 struct vnode *vp;
562 fsid_t fsid;
563 void *start;
564 ino_t lastino;
565 ufs_daddr_t v_daddr;
566 int origcnt, cnt, error, need_unlock=0;
567 int numlocked=0, numrefed=0;
568 #ifdef LFS_TRACK_IOS
569 int j;
570 #endif
571
572 lfs_cleaner_pid = p->p_pid;
573
574 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
575 return (error);
576
577 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
578 return (error);
579 if ((mntp = vfs_getvfs(&fsid)) == NULL)
580 return (ENOENT);
581
582 ump = VFSTOUFS(mntp);
583 if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
584 return (error);
585
586 origcnt = cnt = SCARG(uap, blkcnt);
587 start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
588 error = copyin(SCARG(uap, blkiov), start, cnt * sizeof(BLOCK_INFO));
589 if (error) {
590 free(start, M_SEGMENT);
591 vfs_unbusy(mntp);
592 return (error);
593 }
594
595 fs = VFSTOUFS(mntp)->um_lfs;
596
597 error = 0;
598
599 /* these were inside the initialization for the for loop */
600 v_daddr = LFS_UNUSED_DADDR;
601 lastino = LFS_UNUSED_INUM;
602 for (blkp = start; cnt--; ++blkp)
603 {
604 #ifdef DEBUG
605 if (datosn(fs, fs->lfs_curseg) == datosn(fs, blkp->bi_daddr)) {
606 printf("lfs_bmapv: attempt to clean current segment? (#%d)\n",
607 datosn(fs, fs->lfs_curseg));
608 free(start,M_SEGMENT);
609 vfs_unbusy(mntp);
610 return (EBUSY);
611 }
612 #endif /* DEBUG */
613 #ifdef LFS_TRACK_IOS
614 /*
615 * If there is I/O on this segment that is not yet complete,
616 * the cleaner probably does not have the right information.
617 * Send it packing.
618 */
619 for(j=0;j<LFS_THROTTLE;j++) {
620 if(fs->lfs_pending[j] != LFS_UNUSED_DADDR
621 && datosn(fs,fs->lfs_pending[j])==datosn(fs,blkp->bi_daddr))
622 {
623 printf("lfs_bmapv: attempt to clean pending segment? (#%d)\n",
624 datosn(fs, fs->lfs_pending[j]));
625 free(start,M_SEGMENT);
626 vfs_unbusy(mntp);
627 return (EBUSY);
628 }
629 }
630
631 #endif /* LFS_TRACK_IOS */
632 /*
633 * Get the IFILE entry (only once) and see if the file still
634 * exists.
635 */
636 if (lastino != blkp->bi_inode) {
637 /*
638 * Finish the old file, if there was one. The presence
639 * of a usable vnode in vp is signaled by a valid
640 * v_daddr.
641 */
642 if(v_daddr != LFS_UNUSED_DADDR) {
643 if(need_unlock) {
644 VOP_UNLOCK(vp, 0);
645 numlocked--;
646 }
647 lfs_vunref(vp);
648 numrefed--;
649 }
650
651 /*
652 * Start a new file
653 */
654 lastino = blkp->bi_inode;
655 if (blkp->bi_inode == LFS_IFILE_INUM)
656 v_daddr = fs->lfs_idaddr;
657 else {
658 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
659 v_daddr = ifp->if_daddr;
660 brelse(bp);
661 }
662 if (v_daddr == LFS_UNUSED_DADDR) {
663 blkp->bi_daddr = LFS_UNUSED_DADDR;
664 continue;
665 }
666 /*
667 * A regular call to VFS_VGET could deadlock
668 * here. Instead, we try an unlocked access.
669 */
670 vp = ufs_ihashlookup(ump->um_dev, blkp->bi_inode);
671 if (vp != NULL && !(vp->v_flag & VXLOCK)) {
672 ip = VTOI(vp);
673 if (lfs_vref(vp)) {
674 v_daddr = LFS_UNUSED_DADDR;
675 need_unlock = 0;
676 continue;
677 }
678 numrefed++;
679 if(VOP_ISLOCKED(vp)) {
680 #ifdef DEBUG_LFS
681 printf("lfs_bmapv: inode %d inlocked\n",ip->i_number);
682 #endif
683 v_daddr = LFS_UNUSED_DADDR;
684 need_unlock = 0;
685 lfs_vunref(vp);
686 --numrefed;
687 continue;
688 } else {
689 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
690 need_unlock = FVG_UNLOCK;
691 numlocked++;
692 }
693 } else {
694 error = VFS_VGET(mntp, blkp->bi_inode, &vp);
695 if(error) {
696 #ifdef DEBUG_LFS
697 printf("lfs_bmapv: vget of ino %d failed with %d",blkp->bi_inode,error);
698 #endif
699 v_daddr = LFS_UNUSED_DADDR;
700 need_unlock = 0;
701 continue;
702 } else {
703 need_unlock = FVG_PUT;
704 numlocked++;
705 numrefed++;
706 }
707 }
708 ip = VTOI(vp);
709 } else if (v_daddr == LFS_UNUSED_DADDR) {
710 /*
711 * This can only happen if the vnode is dead.
712 * Keep going. Note that we DO NOT set the
713 * bi_addr to anything -- if we failed to get
714 * the vnode, for example, we want to assume
715 * conservatively that all of its blocks *are*
716 * located in the segment in question.
717 * lfs_markv will throw them out if we are
718 * wrong.
719 */
720 /* blkp->bi_daddr = LFS_UNUSED_DADDR; */
721 continue;
722 }
723
724 /* Past this point we are guaranteed that vp, ip are valid. */
725
726 if(blkp->bi_lbn == LFS_UNUSED_LBN) {
727 /*
728 * We just want the inode address, which is
729 * conveniently in v_daddr.
730 */
731 blkp->bi_daddr = v_daddr;
732 } else {
733 error = VOP_BMAP(vp, blkp->bi_lbn, NULL,
734 &(blkp->bi_daddr), NULL);
735 if(error)
736 {
737 blkp->bi_daddr = LFS_UNUSED_DADDR;
738 continue;
739 }
740 }
741 }
742
743 /*
744 * Finish the old file, if there was one. The presence
745 * of a usable vnode in vp is signaled by a valid v_daddr.
746 */
747 if(v_daddr != LFS_UNUSED_DADDR) {
748 if(need_unlock) {
749 VOP_UNLOCK(vp, 0);
750 numlocked--;
751 }
752 lfs_vunref(vp);
753 numrefed--;
754 }
755
756 if(numlocked != 0 || numrefed != 0) {
757 panic("lfs_bmapv: numlocked=%d numrefed=%d", numlocked,
758 numrefed);
759 }
760
761 copyout(start, SCARG(uap, blkiov), origcnt * sizeof(BLOCK_INFO));
762 free(start, M_SEGMENT);
763 vfs_unbusy(mntp);
764
765 return 0;
766 }
767
768 /*
769 * sys_lfs_segclean:
770 *
771 * Mark the segment clean.
772 *
773 * 0 on success
774 * -1/errno is return on error.
775 */
776 int
777 sys_lfs_segclean(p, v, retval)
778 struct proc *p;
779 void *v;
780 register_t *retval;
781 {
782 struct sys_lfs_segclean_args /* {
783 syscallarg(fsid_t *) fsidp;
784 syscallarg(u_long) segment;
785 } */ *uap = v;
786 CLEANERINFO *cip;
787 SEGUSE *sup;
788 struct buf *bp;
789 struct mount *mntp;
790 struct lfs *fs;
791 fsid_t fsid;
792 int error;
793
794 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
795 return (error);
796
797 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
798 return (error);
799 if ((mntp = vfs_getvfs(&fsid)) == NULL)
800 return (ENOENT);
801
802 fs = VFSTOUFS(mntp)->um_lfs;
803
804 if (datosn(fs, fs->lfs_curseg) == SCARG(uap, segment))
805 return (EBUSY);
806
807 if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
808 return (error);
809 LFS_SEGENTRY(sup, fs, SCARG(uap, segment), bp);
810 if (sup->su_flags & SEGUSE_ACTIVE) {
811 brelse(bp);
812 vfs_unbusy(mntp);
813 return (EBUSY);
814 }
815 if (!(sup->su_flags & SEGUSE_DIRTY)) {
816 brelse(bp);
817 vfs_unbusy(mntp);
818 return (EALREADY);
819 }
820
821 fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize);
822 if (sup->su_flags & SEGUSE_SUPERBLOCK)
823 fs->lfs_avail -= btodb(LFS_SBPAD);
824 fs->lfs_bfree += sup->su_nsums * btodb(LFS_SUMMARY_SIZE) +
825 fsbtodb(fs, sup->su_ninos);
826 fs->lfs_dmeta -= sup->su_nsums * btodb(LFS_SUMMARY_SIZE) +
827 fsbtodb(fs, sup->su_ninos);
828 if (fs->lfs_dmeta < 0)
829 fs->lfs_dmeta = 0;
830 sup->su_flags &= ~SEGUSE_DIRTY;
831 (void) VOP_BWRITE(bp);
832
833 LFS_CLEANERINFO(cip, fs, bp);
834 ++cip->clean;
835 --cip->dirty;
836 fs->lfs_nclean = cip->clean;
837 cip->bfree = fs->lfs_bfree;
838 cip->avail = fs->lfs_avail - fs->lfs_ravail;
839 (void) VOP_BWRITE(bp);
840 wakeup(&fs->lfs_avail);
841 vfs_unbusy(mntp);
842
843 return (0);
844 }
845
846 /*
847 * sys_lfs_segwait:
848 *
849 * This will block until a segment in file system fsid is written. A timeout
850 * in milliseconds may be specified which will awake the cleaner automatically.
851 * An fsid of -1 means any file system, and a timeout of 0 means forever.
852 *
853 * 0 on success
854 * 1 on timeout
855 * -1/errno is return on error.
856 */
857 int
858 sys_lfs_segwait(p, v, retval)
859 struct proc *p;
860 void *v;
861 register_t *retval;
862 {
863 struct sys_lfs_segwait_args /* {
864 syscallarg(fsid_t *) fsidp;
865 syscallarg(struct timeval *) tv;
866 } */ *uap = v;
867 extern int lfs_allclean_wakeup;
868 struct mount *mntp;
869 struct timeval atv;
870 fsid_t fsid;
871 void *addr;
872 u_long timeout;
873 int error, s;
874
875 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) {
876 return (error);
877 }
878 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
879 return (error);
880 if ((mntp = vfs_getvfs(&fsid)) == NULL)
881 addr = &lfs_allclean_wakeup;
882 else
883 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
884
885 if (SCARG(uap, tv)) {
886 error = copyin(SCARG(uap, tv), &atv, sizeof(struct timeval));
887 if (error)
888 return (error);
889 if (itimerfix(&atv))
890 return (EINVAL);
891 /*
892 * XXX THIS COULD SLEEP FOREVER IF TIMEOUT IS {0,0}!
893 * XXX IS THAT WHAT IS INTENDED?
894 */
895 s = splclock();
896 timeradd(&atv, &time, &atv);
897 timeout = hzto(&atv);
898 splx(s);
899 } else
900 timeout = 0;
901
902 error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
903 return (error == ERESTART ? EINTR : 0);
904 }
905
906 /*
907 * VFS_VGET call specialized for the cleaner. The cleaner already knows the
908 * daddr from the ifile, so don't look it up again. If the cleaner is
909 * processing IINFO structures, it may have the ondisk inode already, so
910 * don't go retrieving it again.
911 *
912 * If we find the vnode on the hash chain, then it may be locked by another
913 * process; so we set (*need_unlock) to zero.
914 *
915 * If we don't, we call ufs_ihashins, which locks the inode, and we set
916 * (*need_unlock) to non-zero.
917 *
918 * In either case we lfs_vref, and it is the caller's responsibility to
919 * lfs_vunref and VOP_UNLOCK (if necessary) when finished.
920 */
921 extern struct lock ufs_hashlock;
922
923 int
924 lfs_fasthashget(dev, ino, need_unlock, vpp)
925 dev_t dev;
926 ino_t ino;
927 int *need_unlock;
928 struct vnode **vpp;
929 {
930 struct inode *ip;
931
932 /*
933 * This is playing fast and loose. Someone may have the inode
934 * locked, in which case they are going to be distinctly unhappy
935 * if we trash something.
936 */
937 if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
938 if ((*vpp)->v_flag & VXLOCK) {
939 printf("lfs_fastvget: vnode VXLOCKed for ino %d\n",
940 ino);
941 clean_vnlocked++;
942 #ifdef LFS_EAGAIN_FAIL
943 return EAGAIN;
944 #endif
945 }
946 ip = VTOI(*vpp);
947 if (lfs_vref(*vpp)) {
948 clean_inlocked++;
949 return EAGAIN;
950 }
951 if (VOP_ISLOCKED(*vpp)) {
952 #ifdef DEBUG_LFS
953 printf("lfs_fastvget: ino %d inlocked by pid %d\n",
954 ip->i_number, (*vpp)->v_lock.lk_lockholder);
955 #endif
956 clean_inlocked++;
957 #ifdef LFS_EAGAIN_FAIL
958 lfs_vunref(*vpp);
959 return EAGAIN;
960 #endif /* LFS_EAGAIN_FAIL */
961 } else {
962 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
963 *need_unlock |= FVG_UNLOCK;
964 }
965 } else
966 *vpp = NULL;
967
968 return (0);
969 }
970
971 int
972 lfs_fastvget(mp, ino, daddr, vpp, dinp, need_unlock)
973 struct mount *mp;
974 ino_t ino;
975 ufs_daddr_t daddr;
976 struct vnode **vpp;
977 struct dinode *dinp;
978 int *need_unlock;
979 {
980 struct inode *ip;
981 struct vnode *vp;
982 struct ufsmount *ump;
983 dev_t dev;
984 int error;
985 struct buf *bp;
986
987 ump = VFSTOUFS(mp);
988 dev = ump->um_dev;
989 *need_unlock = 0;
990
991 /*
992 * Wait until the filesystem is fully mounted before allowing vget
993 * to complete. This prevents possible problems with roll-forward.
994 */
995 while(ump->um_lfs->lfs_flags & LFS_NOTYET) {
996 tsleep(&ump->um_lfs->lfs_flags, PRIBIO+1, "lfs_fnotyet", 0);
997 }
998 /*
999 * This is playing fast and loose. Someone may have the inode
1000 * locked, in which case they are going to be distinctly unhappy
1001 * if we trash something.
1002 */
1003
1004 error = lfs_fasthashget(dev, ino, need_unlock, vpp);
1005 if (error != 0 || *vpp != NULL)
1006 return (error);
1007
1008 if ((error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, &vp)) != 0) {
1009 *vpp = NULL;
1010 return (error);
1011 }
1012
1013 do {
1014 error = lfs_fasthashget(dev, ino, need_unlock, vpp);
1015 if (error != 0 || *vpp != NULL) {
1016 ungetnewvnode(vp);
1017 return (error);
1018 }
1019 } while (lockmgr(&ufs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0));
1020
1021 /* Allocate new vnode/inode. */
1022 lfs_vcreate(mp, ino, vp);
1023
1024 /*
1025 * Put it onto its hash chain and lock it so that other requests for
1026 * this inode will block if they arrive while we are sleeping waiting
1027 * for old data structures to be purged or for the contents of the
1028 * disk portion of this inode to be read.
1029 */
1030 ip = VTOI(vp);
1031 ufs_ihashins(ip);
1032 lockmgr(&ufs_hashlock, LK_RELEASE, 0);
1033
1034 /*
1035 * XXX
1036 * This may not need to be here, logically it should go down with
1037 * the i_devvp initialization.
1038 * Ask Kirk.
1039 */
1040 ip->i_lfs = ump->um_lfs;
1041
1042 /* Read in the disk contents for the inode, copy into the inode. */
1043 if (dinp) {
1044 error = copyin(dinp, &ip->i_din.ffs_din, DINODE_SIZE);
1045 if (error) {
1046 printf("lfs_fastvget: dinode copyin failed for ino %d\n", ino);
1047 ufs_ihashrem(ip);
1048
1049 /* Unlock and discard unneeded inode. */
1050 lockmgr(&vp->v_lock, LK_RELEASE, &vp->v_interlock);
1051 lfs_vunref(vp);
1052 *vpp = NULL;
1053 return (error);
1054 }
1055 if(ip->i_number != ino)
1056 panic("lfs_fastvget: I was fed the wrong inode!");
1057 } else {
1058 error = bread(ump->um_devvp, daddr,
1059 (int)ump->um_lfs->lfs_bsize, NOCRED, &bp);
1060 if (error) {
1061 printf("lfs_fastvget: bread failed with %d\n",error);
1062 /*
1063 * The inode does not contain anything useful, so it
1064 * would be misleading to leave it on its hash chain.
1065 * Iput() will return it to the free list.
1066 */
1067 ufs_ihashrem(ip);
1068
1069 /* Unlock and discard unneeded inode. */
1070 lockmgr(&vp->v_lock, LK_RELEASE, &vp->v_interlock);
1071 lfs_vunref(vp);
1072 brelse(bp);
1073 *vpp = NULL;
1074 return (error);
1075 }
1076 ip->i_din.ffs_din =
1077 *lfs_ifind(ump->um_lfs, ino, bp);
1078 brelse(bp);
1079 }
1080 ip->i_ffs_effnlink = ip->i_ffs_nlink;
1081 ip->i_lfs_effnblks = ip->i_ffs_blocks;
1082
1083 /*
1084 * Initialize the vnode from the inode, check for aliases. In all
1085 * cases re-init ip, the underlying vnode/inode may have changed.
1086 */
1087 error = ufs_vinit(mp, lfs_specop_p, lfs_fifoop_p, &vp);
1088 if (error) {
1089 /* This CANNOT happen (see ufs_vinit) */
1090 printf("lfs_fastvget: ufs_vinit returned %d for ino %d\n", error, ino);
1091 lockmgr(&vp->v_lock, LK_RELEASE, &vp->v_interlock);
1092 lfs_vunref(vp);
1093 *vpp = NULL;
1094 return (error);
1095 }
1096 #ifdef DEBUG_LFS
1097 if(vp->v_type == VNON) {
1098 printf("lfs_fastvget: ino %d is type VNON! (ifmt=%o, dinp=%p)\n",
1099 ip->i_number, (ip->i_ffs_mode & IFMT)>>12, dinp);
1100 lfs_dump_dinode(&ip->i_din.ffs_din);
1101 #ifdef DDB
1102 Debugger();
1103 #endif
1104 }
1105 #endif /* DEBUG_LFS */
1106 /*
1107 * Finish inode initialization now that aliasing has been resolved.
1108 */
1109 ip->i_devvp = ump->um_devvp;
1110 VREF(ip->i_devvp);
1111 *vpp = vp;
1112 *need_unlock |= FVG_PUT;
1113
1114 uvm_vnp_setsize(vp, ip->i_ffs_size);
1115
1116 return (0);
1117 }
1118
1119 struct buf *
1120 lfs_fakebuf(vp, lbn, size, uaddr)
1121 struct vnode *vp;
1122 int lbn;
1123 size_t size;
1124 caddr_t uaddr;
1125 {
1126 struct buf *bp;
1127 int error;
1128
1129 #ifndef ALLOW_VFLUSH_CORRUPTION
1130 bp = lfs_newbuf(vp, lbn, size);
1131 error = copyin(uaddr, bp->b_data, size);
1132 if(error) {
1133 lfs_freebuf(bp);
1134 return NULL;
1135 }
1136 #else
1137 bp = lfs_newbuf(vp, lbn, 0);
1138 bp->b_flags |= B_INVAL;
1139 bp->b_saveaddr = uaddr;
1140 #endif
1141
1142 bp->b_bufsize = size;
1143 bp->b_bcount = size;
1144 return (bp);
1145 }
1146