lfs_syscalls.c revision 1.81 1 /* $NetBSD: lfs_syscalls.c,v 1.81 2003/02/18 02:00:08 perseant Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38 /*-
39 * Copyright (c) 1991, 1993, 1994
40 * The Regents of the University of California. All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 * must display the following acknowledgement:
52 * This product includes software developed by the University of
53 * California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)lfs_syscalls.c 8.10 (Berkeley) 5/14/95
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.81 2003/02/18 02:00:08 perseant Exp $");
75
76 #define LFS /* for prototypes in syscallargs.h */
77
78 #include <sys/param.h>
79 #include <sys/systm.h>
80 #include <sys/proc.h>
81 #include <sys/buf.h>
82 #include <sys/mount.h>
83 #include <sys/vnode.h>
84 #include <sys/malloc.h>
85 #include <sys/kernel.h>
86
87 #include <sys/sa.h>
88 #include <sys/syscallargs.h>
89
90 #include <ufs/ufs/inode.h>
91 #include <ufs/ufs/ufsmount.h>
92 #include <ufs/ufs/ufs_extern.h>
93
94 #include <ufs/lfs/lfs.h>
95 #include <ufs/lfs/lfs_extern.h>
96
97 /* Max block count for lfs_markv() */
98 #define MARKV_MAXBLKCNT 65536
99
100 struct buf *lfs_fakebuf(struct lfs *, struct vnode *, int, size_t, caddr_t);
101 int lfs_fasthashget(dev_t, ino_t, struct vnode **);
102
103 int debug_cleaner = 0;
104 int clean_vnlocked = 0;
105 int clean_inlocked = 0;
106 int verbose_debug = 0;
107
108 pid_t lfs_cleaner_pid = 0;
109
110 extern int lfs_subsys_pages;
111 extern struct simplelock lfs_subsys_lock;
112
113 /*
114 * Definitions for the buffer free lists.
115 */
116 #define BQUEUES 4 /* number of free buffer queues */
117
118 #define BQ_LOCKED 0 /* super-blocks &c */
119 #define BQ_LRU 1 /* lru, useful buffers */
120 #define BQ_AGE 2 /* rubbish */
121 #define BQ_EMPTY 3 /* buffer headers with no memory */
122
123 extern TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
124
125 #define LFS_FORCE_WRITE UNASSIGNED
126
127 #define LFS_VREF_THRESHOLD 128
128
129 static int lfs_bmapv(struct proc *, fsid_t *, BLOCK_INFO *, int);
130 static int lfs_markv(struct proc *, fsid_t *, BLOCK_INFO *, int);
131
132 /*
133 * sys_lfs_markv:
134 *
135 * This will mark inodes and blocks dirty, so they are written into the log.
136 * It will block until all the blocks have been written. The segment create
137 * time passed in the block_info and inode_info structures is used to decide
138 * if the data is valid for each block (in case some process dirtied a block
139 * or inode that is being cleaned between the determination that a block is
140 * live and the lfs_markv call).
141 *
142 * 0 on success
143 * -1/errno is return on error.
144 */
145 #ifdef USE_64BIT_SYSCALLS
146 int
147 sys_lfs_markv(struct proc *p, void *v, register_t *retval)
148 {
149 struct sys_lfs_markv_args /* {
150 syscallarg(fsid_t *) fsidp;
151 syscallarg(struct block_info *) blkiov;
152 syscallarg(int) blkcnt;
153 } */ *uap = v;
154 BLOCK_INFO *blkiov;
155 int blkcnt, error;
156 fsid_t fsid;
157
158 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
159 return (error);
160
161 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
162 return (error);
163
164 blkcnt = SCARG(uap, blkcnt);
165 if ((u_int) blkcnt > MARKV_MAXBLKCNT)
166 return (EINVAL);
167
168 blkiov = malloc(blkcnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
169 if ((error = copyin(SCARG(uap, blkiov), blkiov,
170 blkcnt * sizeof(BLOCK_INFO))) != 0)
171 goto out;
172
173 if ((error = lfs_markv(p, &fsid, blkiov, blkcnt)) == 0)
174 copyout(blkiov, SCARG(uap, blkiov),
175 blkcnt * sizeof(BLOCK_INFO));
176 out:
177 free(blkiov, M_SEGMENT);
178 return error;
179 }
180 #else
181 int
182 sys_lfs_markv(struct lwp *l, void *v, register_t *retval)
183 {
184 struct sys_lfs_markv_args /* {
185 syscallarg(fsid_t *) fsidp;
186 syscallarg(struct block_info *) blkiov;
187 syscallarg(int) blkcnt;
188 } */ *uap = v;
189 BLOCK_INFO *blkiov;
190 BLOCK_INFO_15 *blkiov15;
191 int i, blkcnt, error;
192 fsid_t fsid;
193
194 if ((error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag)) != 0)
195 return (error);
196
197 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
198 return (error);
199
200 blkcnt = SCARG(uap, blkcnt);
201 if ((u_int) blkcnt > MARKV_MAXBLKCNT)
202 return (EINVAL);
203
204 blkiov = malloc(blkcnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
205 blkiov15 = malloc(blkcnt * sizeof(BLOCK_INFO_15), M_SEGMENT, M_WAITOK);
206 if ((error = copyin(SCARG(uap, blkiov), blkiov15,
207 blkcnt * sizeof(BLOCK_INFO_15))) != 0)
208 goto out;
209
210 for (i = 0; i < blkcnt; i++) {
211 blkiov[i].bi_inode = blkiov15[i].bi_inode;
212 blkiov[i].bi_lbn = blkiov15[i].bi_lbn;
213 blkiov[i].bi_daddr = blkiov15[i].bi_daddr;
214 blkiov[i].bi_segcreate = blkiov15[i].bi_segcreate;
215 blkiov[i].bi_version = blkiov15[i].bi_version;
216 blkiov[i].bi_bp = blkiov15[i].bi_bp;
217 blkiov[i].bi_size = blkiov15[i].bi_size;
218 }
219
220 if ((error = lfs_markv(l->l_proc, &fsid, blkiov, blkcnt)) == 0) {
221 for (i = 0; i < blkcnt; i++) {
222 blkiov15[i].bi_inode = blkiov[i].bi_inode;
223 blkiov15[i].bi_lbn = blkiov[i].bi_lbn;
224 blkiov15[i].bi_daddr = blkiov[i].bi_daddr;
225 blkiov15[i].bi_segcreate = blkiov[i].bi_segcreate;
226 blkiov15[i].bi_version = blkiov[i].bi_version;
227 blkiov15[i].bi_bp = blkiov[i].bi_bp;
228 blkiov15[i].bi_size = blkiov[i].bi_size;
229 }
230 copyout(blkiov15, SCARG(uap, blkiov),
231 blkcnt * sizeof(BLOCK_INFO_15));
232 }
233 out:
234 free(blkiov, M_SEGMENT);
235 free(blkiov15, M_SEGMENT);
236 return error;
237 }
238 #endif
239
240 #define LFS_MARKV_MAX_BLOCKS (LFS_MAX_BUFS)
241
242 static int
243 lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
244 {
245 BLOCK_INFO *blkp;
246 IFILE *ifp;
247 struct buf *bp, *nbp;
248 struct inode *ip = NULL;
249 struct lfs *fs;
250 struct mount *mntp;
251 struct vnode *vp;
252 #ifdef DEBUG_LFS
253 int vputc = 0, iwritten = 0;
254 #endif
255 ino_t lastino;
256 daddr_t b_daddr, v_daddr;
257 int cnt, error;
258 int do_again = 0;
259 int s;
260 #ifdef CHECK_COPYIN
261 int i;
262 #endif /* CHECK_COPYIN */
263 int numrefed = 0;
264 ino_t maxino;
265 size_t obsize;
266
267 /* number of blocks/inodes that we have already bwrite'ed */
268 int nblkwritten, ninowritten;
269
270 if ((mntp = vfs_getvfs(fsidp)) == NULL)
271 return (ENOENT);
272
273 fs = VFSTOUFS(mntp)->um_lfs;
274 maxino = (fragstoblks(fs, fsbtofrags(fs, VTOI(fs->lfs_ivnode)->i_ffs_blocks)) -
275 fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb;
276
277 cnt = blkcnt;
278
279 if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
280 return (error);
281
282 /*
283 * This seglock is just to prevent the fact that we might have to sleep
284 * from allowing the possibility that our blocks might become
285 * invalid.
286 *
287 * It is also important to note here that unless we specify SEGM_CKP,
288 * any Ifile blocks that we might be asked to clean will never get
289 * to the disk.
290 */
291 lfs_seglock(fs, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);
292
293 /* Mark blocks/inodes dirty. */
294 error = 0;
295
296 #ifdef DEBUG_LFS
297 /* Run through and count the inodes */
298 lastino = LFS_UNUSED_INUM;
299 for (blkp = blkiov; cnt--; ++blkp) {
300 if (lastino != blkp->bi_inode) {
301 lastino = blkp->bi_inode;
302 vputc++;
303 }
304 }
305 cnt = blkcnt;
306 printf("[%d/",vputc);
307 iwritten = 0;
308 #endif /* DEBUG_LFS */
309 /* these were inside the initialization for the for loop */
310 v_daddr = LFS_UNUSED_DADDR;
311 lastino = LFS_UNUSED_INUM;
312 nblkwritten = ninowritten = 0;
313 for (blkp = blkiov; cnt--; ++blkp)
314 {
315 if (blkp->bi_daddr == LFS_FORCE_WRITE)
316 printf("lfs_markv: warning: force-writing ino %d "
317 "lbn %lld\n",
318 blkp->bi_inode, (long long)blkp->bi_lbn);
319 /* Bounds-check incoming data, avoid panic for failed VGET */
320 if (blkp->bi_inode <= 0 || blkp->bi_inode >= maxino) {
321 error = EINVAL;
322 goto again;
323 }
324 /*
325 * Get the IFILE entry (only once) and see if the file still
326 * exists.
327 */
328 if (lastino != blkp->bi_inode) {
329 /*
330 * Finish the old file, if there was one. The presence
331 * of a usable vnode in vp is signaled by a valid v_daddr.
332 */
333 if (v_daddr != LFS_UNUSED_DADDR) {
334 #ifdef DEBUG_LFS
335 if (ip->i_flag & (IN_MODIFIED|IN_CLEANING))
336 iwritten++;
337 #endif
338 lfs_vunref(vp);
339 numrefed--;
340 }
341
342 /*
343 * Start a new file
344 */
345 lastino = blkp->bi_inode;
346 if (blkp->bi_inode == LFS_IFILE_INUM)
347 v_daddr = fs->lfs_idaddr;
348 else {
349 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
350 /* XXX fix for force write */
351 v_daddr = ifp->if_daddr;
352 brelse(bp);
353 }
354 /* Don't force-write the ifile */
355 if (blkp->bi_inode == LFS_IFILE_INUM
356 && blkp->bi_daddr == LFS_FORCE_WRITE)
357 {
358 continue;
359 }
360 if (v_daddr == LFS_UNUSED_DADDR
361 && blkp->bi_daddr != LFS_FORCE_WRITE)
362 {
363 continue;
364 }
365
366 /* Get the vnode/inode. */
367 error = lfs_fastvget(mntp, blkp->bi_inode, v_daddr,
368 &vp,
369 (blkp->bi_lbn == LFS_UNUSED_LBN
370 ? blkp->bi_bp
371 : NULL));
372
373 if (!error) {
374 numrefed++;
375 }
376 if (error) {
377 #ifdef DEBUG_LFS
378 printf("lfs_markv: lfs_fastvget failed with %d (ino %d, segment %d)\n",
379 error, blkp->bi_inode,
380 dtosn(fs, blkp->bi_daddr));
381 #endif /* DEBUG_LFS */
382 /*
383 * If we got EAGAIN, that means that the
384 * Inode was locked. This is
385 * recoverable: just clean the rest of
386 * this segment, and let the cleaner try
387 * again with another. (When the
388 * cleaner runs again, this segment will
389 * sort high on the list, since it is
390 * now almost entirely empty.) But, we
391 * still set v_daddr = LFS_UNUSED_ADDR
392 * so as not to test this over and over
393 * again.
394 */
395 if (error == EAGAIN) {
396 error = 0;
397 do_again++;
398 }
399 #ifdef DIAGNOSTIC
400 else if (error != ENOENT)
401 panic("lfs_markv VFS_VGET FAILED");
402 #endif
403 /* lastino = LFS_UNUSED_INUM; */
404 v_daddr = LFS_UNUSED_DADDR;
405 vp = NULL;
406 ip = NULL;
407 continue;
408 }
409 ip = VTOI(vp);
410 ninowritten++;
411 } else if (v_daddr == LFS_UNUSED_DADDR) {
412 /*
413 * This can only happen if the vnode is dead (or
414 * in any case we can't get it...e.g., it is
415 * inlocked). Keep going.
416 */
417 continue;
418 }
419
420 /* Past this point we are guaranteed that vp, ip are valid. */
421
422 /* If this BLOCK_INFO didn't contain a block, keep going. */
423 if (blkp->bi_lbn == LFS_UNUSED_LBN) {
424 /* XXX need to make sure that the inode gets written in this case */
425 /* XXX but only write the inode if it's the right one */
426 if (blkp->bi_inode != LFS_IFILE_INUM) {
427 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
428 if (ifp->if_daddr == blkp->bi_daddr
429 || blkp->bi_daddr == LFS_FORCE_WRITE)
430 {
431 LFS_SET_UINO(ip, IN_CLEANING);
432 }
433 brelse(bp);
434 }
435 continue;
436 }
437
438 b_daddr = 0;
439 if (blkp->bi_daddr != LFS_FORCE_WRITE) {
440 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
441 dbtofsb(fs, b_daddr) != blkp->bi_daddr)
442 {
443 if (dtosn(fs,dbtofsb(fs, b_daddr))
444 == dtosn(fs,blkp->bi_daddr))
445 {
446 printf("lfs_markv: wrong da same seg: %llx vs %llx\n",
447 (long long)blkp->bi_daddr, (long long)dbtofsb(fs, b_daddr));
448 }
449 do_again++;
450 continue;
451 }
452 }
453
454 /*
455 * Check block sizes. The blocks being cleaned come from
456 * disk, so they should have the same size as their on-disk
457 * counterparts.
458 */
459 if (blkp->bi_lbn >= 0)
460 obsize = blksize(fs, ip, blkp->bi_lbn);
461 else
462 obsize = fs->lfs_bsize;
463 /* Check for fragment size change */
464 if (blkp->bi_lbn >= 0 && blkp->bi_lbn < NDADDR) {
465 obsize = ip->i_lfs_fragsize[blkp->bi_lbn];
466 }
467 if (obsize != blkp->bi_size) {
468 printf("lfs_markv: ino %d lbn %lld wrong size (%ld != %d), try again\n",
469 blkp->bi_inode, (long long)blkp->bi_lbn,
470 (long) obsize, blkp->bi_size);
471 do_again++;
472 continue;
473 }
474
475 /*
476 * If we get to here, then we are keeping the block. If
477 * it is an indirect block, we want to actually put it
478 * in the buffer cache so that it can be updated in the
479 * finish_meta section. If it's not, we need to
480 * allocate a fake buffer so that writeseg can perform
481 * the copyin and write the buffer.
482 */
483 if (ip->i_number != LFS_IFILE_INUM && blkp->bi_lbn >= 0) {
484 /* Data Block */
485 bp = lfs_fakebuf(fs, vp, blkp->bi_lbn,
486 blkp->bi_size, blkp->bi_bp);
487 /* Pretend we used bread() to get it */
488 bp->b_blkno = fsbtodb(fs, blkp->bi_daddr);
489 } else {
490 /* Indirect block or ifile */
491 if (blkp->bi_size != fs->lfs_bsize &&
492 ip->i_number != LFS_IFILE_INUM)
493 panic("lfs_markv: partial indirect block?"
494 " size=%d\n", blkp->bi_size);
495 bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0);
496 if (!(bp->b_flags & (B_DONE|B_DELWRI))) { /* B_CACHE */
497 /*
498 * The block in question was not found
499 * in the cache; i.e., the block that
500 * getblk() returned is empty. So, we
501 * can (and should) copy in the
502 * contents, because we've already
503 * determined that this was the right
504 * version of this block on disk.
505 *
506 * And, it can't have changed underneath
507 * us, because we have the segment lock.
508 */
509 error = copyin(blkp->bi_bp, bp->b_data, blkp->bi_size);
510 if (error)
511 goto err2;
512 }
513 }
514 if ((error = lfs_bwrite_ext(bp,BW_CLEAN)) != 0)
515 goto err2;
516
517 nblkwritten++;
518 /*
519 * XXX should account indirect blocks and ifile pages as well
520 */
521 if (nblkwritten + lblkno(fs, ninowritten * DINODE_SIZE)
522 > LFS_MARKV_MAX_BLOCKS) {
523 #ifdef DEBUG_LFS
524 printf("lfs_markv: writing %d blks %d inos\n",
525 nblkwritten, ninowritten);
526 #endif
527 lfs_segwrite(mntp, SEGM_CLEAN);
528 nblkwritten = ninowritten = 0;
529 }
530 }
531
532 /*
533 * Finish the old file, if there was one
534 */
535 if (v_daddr != LFS_UNUSED_DADDR) {
536 #ifdef DEBUG_LFS
537 if (ip->i_flag & (IN_MODIFIED|IN_CLEANING))
538 iwritten++;
539 #endif
540 lfs_vunref(vp);
541 numrefed--;
542 }
543
544 #ifdef DEBUG_LFS
545 printf("%d]",iwritten);
546 if (numrefed != 0) {
547 panic("lfs_markv: numrefed=%d", numrefed);
548 }
549 #endif
550
551 #ifdef DEBUG_LFS
552 printf("lfs_markv: writing %d blks %d inos (check point)\n",
553 nblkwritten, ninowritten);
554 #endif
555 /*
556 * The last write has to be SEGM_SYNC, because of calling semantics.
557 * It also has to be SEGM_CKP, because otherwise we could write
558 * over the newly cleaned data contained in a checkpoint, and then
559 * we'd be unhappy at recovery time.
560 */
561 lfs_segwrite(mntp, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);
562
563 lfs_segunlock(fs);
564
565 vfs_unbusy(mntp);
566 if (error)
567 return (error);
568 else if (do_again)
569 return EAGAIN;
570
571 return 0;
572
573 err2:
574 printf("lfs_markv err2\n");
575 lfs_vunref(vp);
576 --numrefed;
577
578 /* Free up fakebuffers -- have to take these from the LOCKED list */
579 again:
580 s = splbio();
581 for (bp = bufqueues[BQ_LOCKED].tqh_first; bp; bp = nbp) {
582 nbp = bp->b_freelist.tqe_next;
583 if (LFS_IS_MALLOC_BUF(bp)) {
584 if (bp->b_flags & B_BUSY) { /* not bloody likely */
585 bp->b_flags |= B_WANTED;
586 tsleep(bp, PRIBIO+1, "markv", 0);
587 splx(s);
588 goto again;
589 }
590 if (bp->b_flags & B_DELWRI)
591 fs->lfs_avail += btofsb(fs, bp->b_bcount);
592 bremfree(bp);
593 splx(s);
594 brelse(bp);
595 s = splbio();
596 }
597 }
598 splx(s);
599 lfs_segunlock(fs);
600 vfs_unbusy(mntp);
601 #ifdef DEBUG_LFS
602 if (numrefed != 0) {
603 panic("lfs_markv: numrefed=%d", numrefed);
604 }
605 #endif
606
607 return (error);
608 }
609
610 /*
611 * sys_lfs_bmapv:
612 *
613 * This will fill in the current disk address for arrays of blocks.
614 *
615 * 0 on success
616 * -1/errno is return on error.
617 */
618 #ifdef USE_64BIT_SYSCALLS
619 int
620 sys_lfs_bmapv(struct proc *p, void *v, register_t *retval)
621 {
622 struct sys_lfs_bmapv_args /* {
623 syscallarg(fsid_t *) fsidp;
624 syscallarg(struct block_info *) blkiov;
625 syscallarg(int) blkcnt;
626 } */ *uap = v;
627 BLOCK_INFO *blkiov;
628 int blkcnt, error;
629 fsid_t fsid;
630
631 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
632 return (error);
633
634 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
635 return (error);
636
637 blkcnt = SCARG(uap, blkcnt);
638 if ((u_int) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO))
639 return (EINVAL);
640 blkiov = malloc(blkcnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
641 if ((error = copyin(SCARG(uap, blkiov), blkiov,
642 blkcnt * sizeof(BLOCK_INFO))) != 0)
643 goto out;
644
645 if ((error = lfs_bmapv(p, &fsid, blkiov, blkcnt)) == 0)
646 copyout(blkiov, SCARG(uap, blkiov),
647 blkcnt * sizeof(BLOCK_INFO));
648 out:
649 free(blkiov, M_SEGMENT);
650 return error;
651 }
652 #else
653 int
654 sys_lfs_bmapv(struct lwp *l, void *v, register_t *retval)
655 {
656 struct sys_lfs_bmapv_args /* {
657 syscallarg(fsid_t *) fsidp;
658 syscallarg(struct block_info *) blkiov;
659 syscallarg(int) blkcnt;
660 } */ *uap = v;
661 struct proc *p = l->l_proc;
662 BLOCK_INFO *blkiov;
663 BLOCK_INFO_15 *blkiov15;
664 int i, blkcnt, error;
665 fsid_t fsid;
666
667 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
668 return (error);
669
670 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
671 return (error);
672
673 blkcnt = SCARG(uap, blkcnt);
674 if ((u_int) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO))
675 return (EINVAL);
676 blkiov = malloc(blkcnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
677 blkiov15 = malloc(blkcnt * sizeof(BLOCK_INFO_15), M_SEGMENT, M_WAITOK);
678 if ((error = copyin(SCARG(uap, blkiov), blkiov15,
679 blkcnt * sizeof(BLOCK_INFO_15))) != 0)
680 goto out;
681
682 for (i = 0; i < blkcnt; i++) {
683 blkiov[i].bi_inode = blkiov15[i].bi_inode;
684 blkiov[i].bi_lbn = blkiov15[i].bi_lbn;
685 blkiov[i].bi_daddr = blkiov15[i].bi_daddr;
686 blkiov[i].bi_segcreate = blkiov15[i].bi_segcreate;
687 blkiov[i].bi_version = blkiov15[i].bi_version;
688 blkiov[i].bi_bp = blkiov15[i].bi_bp;
689 blkiov[i].bi_size = blkiov15[i].bi_size;
690 }
691
692 if ((error = lfs_bmapv(p, &fsid, blkiov, blkcnt)) == 0) {
693 for (i = 0; i < blkcnt; i++) {
694 blkiov15[i].bi_inode = blkiov[i].bi_inode;
695 blkiov15[i].bi_lbn = blkiov[i].bi_lbn;
696 blkiov15[i].bi_daddr = blkiov[i].bi_daddr;
697 blkiov15[i].bi_segcreate = blkiov[i].bi_segcreate;
698 blkiov15[i].bi_version = blkiov[i].bi_version;
699 blkiov15[i].bi_bp = blkiov[i].bi_bp;
700 blkiov15[i].bi_size = blkiov[i].bi_size;
701 }
702 copyout(blkiov15, SCARG(uap, blkiov),
703 blkcnt * sizeof(BLOCK_INFO_15));
704 }
705 out:
706 free(blkiov, M_SEGMENT);
707 free(blkiov15, M_SEGMENT);
708 return error;
709 }
710 #endif
711
712 static int
713 lfs_bmapv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
714 {
715 BLOCK_INFO *blkp;
716 IFILE *ifp;
717 struct buf *bp;
718 struct inode *ip = NULL;
719 struct lfs *fs;
720 struct mount *mntp;
721 struct ufsmount *ump;
722 struct vnode *vp;
723 ino_t lastino;
724 daddr_t v_daddr;
725 int cnt, error;
726 int numrefed = 0;
727
728 lfs_cleaner_pid = p->p_pid;
729
730 if ((mntp = vfs_getvfs(fsidp)) == NULL)
731 return (ENOENT);
732
733 ump = VFSTOUFS(mntp);
734 if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
735 return (error);
736
737 cnt = blkcnt;
738
739 fs = VFSTOUFS(mntp)->um_lfs;
740
741 error = 0;
742
743 /* these were inside the initialization for the for loop */
744 v_daddr = LFS_UNUSED_DADDR;
745 lastino = LFS_UNUSED_INUM;
746 for (blkp = blkiov; cnt--; ++blkp)
747 {
748 /*
749 * Get the IFILE entry (only once) and see if the file still
750 * exists.
751 */
752 if (lastino != blkp->bi_inode) {
753 /*
754 * Finish the old file, if there was one. The presence
755 * of a usable vnode in vp is signaled by a valid
756 * v_daddr.
757 */
758 if (v_daddr != LFS_UNUSED_DADDR) {
759 lfs_vunref(vp);
760 numrefed--;
761 }
762
763 /*
764 * Start a new file
765 */
766 lastino = blkp->bi_inode;
767 if (blkp->bi_inode == LFS_IFILE_INUM)
768 v_daddr = fs->lfs_idaddr;
769 else {
770 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
771 v_daddr = ifp->if_daddr;
772 brelse(bp);
773 }
774 if (v_daddr == LFS_UNUSED_DADDR) {
775 blkp->bi_daddr = LFS_UNUSED_DADDR;
776 continue;
777 }
778 /*
779 * A regular call to VFS_VGET could deadlock
780 * here. Instead, we try an unlocked access.
781 */
782 vp = ufs_ihashlookup(ump->um_dev, blkp->bi_inode);
783 if (vp != NULL && !(vp->v_flag & VXLOCK)) {
784 ip = VTOI(vp);
785 if (lfs_vref(vp)) {
786 v_daddr = LFS_UNUSED_DADDR;
787 continue;
788 }
789 numrefed++;
790 } else {
791 error = VFS_VGET(mntp, blkp->bi_inode, &vp);
792 if (error) {
793 #ifdef DEBUG_LFS
794 printf("lfs_bmapv: vget of ino %d failed with %d",blkp->bi_inode,error);
795 #endif
796 v_daddr = LFS_UNUSED_DADDR;
797 continue;
798 } else {
799 KASSERT(VOP_ISLOCKED(vp));
800 VOP_UNLOCK(vp, 0);
801 numrefed++;
802 }
803 }
804 ip = VTOI(vp);
805 } else if (v_daddr == LFS_UNUSED_DADDR) {
806 /*
807 * This can only happen if the vnode is dead.
808 * Keep going. Note that we DO NOT set the
809 * bi_addr to anything -- if we failed to get
810 * the vnode, for example, we want to assume
811 * conservatively that all of its blocks *are*
812 * located in the segment in question.
813 * lfs_markv will throw them out if we are
814 * wrong.
815 */
816 /* blkp->bi_daddr = LFS_UNUSED_DADDR; */
817 continue;
818 }
819
820 /* Past this point we are guaranteed that vp, ip are valid. */
821
822 if (blkp->bi_lbn == LFS_UNUSED_LBN) {
823 /*
824 * We just want the inode address, which is
825 * conveniently in v_daddr.
826 */
827 blkp->bi_daddr = v_daddr;
828 } else {
829 daddr_t bi_daddr;
830
831 /* XXX ondisk32 */
832 error = VOP_BMAP(vp, blkp->bi_lbn, NULL,
833 &bi_daddr, NULL);
834 if (error)
835 {
836 blkp->bi_daddr = LFS_UNUSED_DADDR;
837 continue;
838 }
839 blkp->bi_daddr = dbtofsb(fs, bi_daddr);
840 /* Fill in the block size, too */
841 if (blkp->bi_lbn >= 0)
842 blkp->bi_size = blksize(fs, ip, blkp->bi_lbn);
843 else
844 blkp->bi_size = fs->lfs_bsize;
845 }
846 }
847
848 /*
849 * Finish the old file, if there was one. The presence
850 * of a usable vnode in vp is signaled by a valid v_daddr.
851 */
852 if (v_daddr != LFS_UNUSED_DADDR) {
853 lfs_vunref(vp);
854 numrefed--;
855 }
856
857 #ifdef DEBUG_LFS
858 if (numrefed != 0) {
859 panic("lfs_bmapv: numrefed=%d", numrefed);
860 }
861 #endif
862
863 vfs_unbusy(mntp);
864
865 return 0;
866 }
867
868 /*
869 * sys_lfs_segclean:
870 *
871 * Mark the segment clean.
872 *
873 * 0 on success
874 * -1/errno is return on error.
875 */
876 int
877 sys_lfs_segclean(struct lwp *l, void *v, register_t *retval)
878 {
879 struct sys_lfs_segclean_args /* {
880 syscallarg(fsid_t *) fsidp;
881 syscallarg(u_long) segment;
882 } */ *uap = v;
883 struct lfs *fs;
884 struct mount *mntp;
885 fsid_t fsid;
886 int error;
887 unsigned long segnum;
888 struct proc *p = l->l_proc;
889
890 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
891 return (error);
892
893 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
894 return (error);
895 if ((mntp = vfs_getvfs(&fsid)) == NULL)
896 return (ENOENT);
897
898 fs = VFSTOUFS(mntp)->um_lfs;
899 segnum = SCARG(uap, segment);
900
901 if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
902 return (error);
903
904 lfs_seglock(fs, SEGM_PROT);
905 error = lfs_do_segclean(fs, segnum);
906 lfs_segunlock(fs);
907 vfs_unbusy(mntp);
908 return error;
909 }
910
911 /*
912 * Actually mark the segment clean.
913 * Must be called with the segment lock held.
914 */
915 int
916 lfs_do_segclean(struct lfs *fs, unsigned long segnum)
917 {
918 struct buf *bp;
919 CLEANERINFO *cip;
920 SEGUSE *sup;
921
922 if (dtosn(fs, fs->lfs_curseg) == segnum) {
923 return (EBUSY);
924 }
925
926 LFS_SEGENTRY(sup, fs, segnum, bp);
927 if (sup->su_nbytes) {
928 printf("lfs_segclean: not cleaning segment %lu: %d live bytes\n",
929 segnum, sup->su_nbytes);
930 brelse(bp);
931 return (EBUSY);
932 }
933 if (sup->su_flags & SEGUSE_ACTIVE) {
934 brelse(bp);
935 return (EBUSY);
936 }
937 if (!(sup->su_flags & SEGUSE_DIRTY)) {
938 brelse(bp);
939 return (EALREADY);
940 }
941
942 fs->lfs_avail += segtod(fs, 1);
943 if (sup->su_flags & SEGUSE_SUPERBLOCK)
944 fs->lfs_avail -= btofsb(fs, LFS_SBPAD);
945 if (fs->lfs_version > 1 && segnum == 0 &&
946 fs->lfs_start < btofsb(fs, LFS_LABELPAD))
947 fs->lfs_avail -= btofsb(fs, LFS_LABELPAD) - fs->lfs_start;
948 fs->lfs_bfree += sup->su_nsums * btofsb(fs, fs->lfs_sumsize) +
949 btofsb(fs, sup->su_ninos * fs->lfs_ibsize);
950 fs->lfs_dmeta -= sup->su_nsums * btofsb(fs, fs->lfs_sumsize) +
951 btofsb(fs, sup->su_ninos * fs->lfs_ibsize);
952 if (fs->lfs_dmeta < 0)
953 fs->lfs_dmeta = 0;
954 sup->su_flags &= ~SEGUSE_DIRTY;
955 LFS_WRITESEGENTRY(sup, fs, segnum, bp);
956
957 LFS_CLEANERINFO(cip, fs, bp);
958 ++cip->clean;
959 --cip->dirty;
960 fs->lfs_nclean = cip->clean;
961 cip->bfree = fs->lfs_bfree;
962 cip->avail = fs->lfs_avail - fs->lfs_ravail;
963 (void) LFS_BWRITE_LOG(bp);
964 wakeup(&fs->lfs_avail);
965
966 return (0);
967 }
968
969 /*
970 * sys_lfs_segwait:
971 *
972 * This will block until a segment in file system fsid is written. A timeout
973 * in milliseconds may be specified which will awake the cleaner automatically.
974 * An fsid of -1 means any file system, and a timeout of 0 means forever.
975 *
976 * 0 on success
977 * 1 on timeout
978 * -1/errno is return on error.
979 */
980 int
981 sys_lfs_segwait(struct lwp *l, void *v, register_t *retval)
982 {
983 struct sys_lfs_segwait_args /* {
984 syscallarg(fsid_t *) fsidp;
985 syscallarg(struct timeval *) tv;
986 } */ *uap = v;
987 struct proc *p = l->l_proc;
988 struct mount *mntp;
989 struct timeval atv;
990 fsid_t fsid;
991 void *addr;
992 u_long timeout;
993 int error, s;
994
995 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) {
996 return (error);
997 }
998 if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
999 return (error);
1000 if ((mntp = vfs_getvfs(&fsid)) == NULL)
1001 addr = &lfs_allclean_wakeup;
1002 else
1003 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
1004
1005 if (SCARG(uap, tv)) {
1006 error = copyin(SCARG(uap, tv), &atv, sizeof(struct timeval));
1007 if (error)
1008 return (error);
1009 if (itimerfix(&atv))
1010 return (EINVAL);
1011 /*
1012 * XXX THIS COULD SLEEP FOREVER IF TIMEOUT IS {0,0}!
1013 * XXX IS THAT WHAT IS INTENDED?
1014 */
1015 s = splclock();
1016 timeradd(&atv, &time, &atv);
1017 timeout = hzto(&atv);
1018 splx(s);
1019 } else
1020 timeout = 0;
1021
1022 error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
1023 return (error == ERESTART ? EINTR : 0);
1024 }
1025
1026 /*
1027 * VFS_VGET call specialized for the cleaner. The cleaner already knows the
1028 * daddr from the ifile, so don't look it up again. If the cleaner is
1029 * processing IINFO structures, it may have the ondisk inode already, so
1030 * don't go retrieving it again.
1031 *
1032 * we lfs_vref, and it is the caller's responsibility to lfs_vunref
1033 * when finished.
1034 */
1035 extern struct lock ufs_hashlock;
1036
1037 int
1038 lfs_fasthashget(dev_t dev, ino_t ino, struct vnode **vpp)
1039 {
1040 struct inode *ip;
1041
1042 /*
1043 * This is playing fast and loose. Someone may have the inode
1044 * locked, in which case they are going to be distinctly unhappy
1045 * if we trash something.
1046 */
1047 if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
1048 if ((*vpp)->v_flag & VXLOCK) {
1049 printf("lfs_fastvget: vnode VXLOCKed for ino %d\n",
1050 ino);
1051 clean_vnlocked++;
1052 #ifdef LFS_EAGAIN_FAIL
1053 return EAGAIN;
1054 #endif
1055 }
1056 ip = VTOI(*vpp);
1057 if (lfs_vref(*vpp)) {
1058 clean_inlocked++;
1059 return EAGAIN;
1060 }
1061 } else
1062 *vpp = NULL;
1063
1064 return (0);
1065 }
1066
1067 int
1068 lfs_fastvget(struct mount *mp, ino_t ino, daddr_t daddr, struct vnode **vpp, struct dinode *dinp)
1069 {
1070 struct inode *ip;
1071 struct dinode *dip;
1072 struct vnode *vp;
1073 struct ufsmount *ump;
1074 dev_t dev;
1075 int i, error, retries;
1076 struct buf *bp;
1077 struct lfs *fs;
1078
1079 ump = VFSTOUFS(mp);
1080 dev = ump->um_dev;
1081 fs = ump->um_lfs;
1082
1083 /*
1084 * Wait until the filesystem is fully mounted before allowing vget
1085 * to complete. This prevents possible problems with roll-forward.
1086 */
1087 while (fs->lfs_flags & LFS_NOTYET) {
1088 tsleep(&fs->lfs_flags, PRIBIO+1, "lfs_fnotyet", 0);
1089 }
1090 /*
1091 * This is playing fast and loose. Someone may have the inode
1092 * locked, in which case they are going to be distinctly unhappy
1093 * if we trash something.
1094 */
1095
1096 error = lfs_fasthashget(dev, ino, vpp);
1097 if (error != 0 || *vpp != NULL)
1098 return (error);
1099
1100 if ((error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, &vp)) != 0) {
1101 *vpp = NULL;
1102 return (error);
1103 }
1104
1105 do {
1106 error = lfs_fasthashget(dev, ino, vpp);
1107 if (error != 0 || *vpp != NULL) {
1108 ungetnewvnode(vp);
1109 return (error);
1110 }
1111 } while (lockmgr(&ufs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0));
1112
1113 /* Allocate new vnode/inode. */
1114 lfs_vcreate(mp, ino, vp);
1115
1116 /*
1117 * Put it onto its hash chain and lock it so that other requests for
1118 * this inode will block if they arrive while we are sleeping waiting
1119 * for old data structures to be purged or for the contents of the
1120 * disk portion of this inode to be read.
1121 */
1122 ip = VTOI(vp);
1123 ufs_ihashins(ip);
1124 lockmgr(&ufs_hashlock, LK_RELEASE, 0);
1125
1126 /*
1127 * XXX
1128 * This may not need to be here, logically it should go down with
1129 * the i_devvp initialization.
1130 * Ask Kirk.
1131 */
1132 ip->i_lfs = fs;
1133
1134 /* Read in the disk contents for the inode, copy into the inode. */
1135 if (dinp) {
1136 error = copyin(dinp, &ip->i_din.ffs_din, DINODE_SIZE);
1137 if (error) {
1138 printf("lfs_fastvget: dinode copyin failed for ino %d\n", ino);
1139 ufs_ihashrem(ip);
1140
1141 /* Unlock and discard unneeded inode. */
1142 lockmgr(&vp->v_lock, LK_RELEASE, &vp->v_interlock);
1143 lfs_vunref(vp);
1144 *vpp = NULL;
1145 return (error);
1146 }
1147 if (ip->i_number != ino)
1148 panic("lfs_fastvget: I was fed the wrong inode!");
1149 } else {
1150 retries = 0;
1151 again:
1152 error = bread(ump->um_devvp, fsbtodb(fs, daddr), fs->lfs_ibsize,
1153 NOCRED, &bp);
1154 if (error) {
1155 printf("lfs_fastvget: bread failed with %d\n",error);
1156 /*
1157 * The inode does not contain anything useful, so it
1158 * would be misleading to leave it on its hash chain.
1159 * Iput() will return it to the free list.
1160 */
1161 ufs_ihashrem(ip);
1162
1163 /* Unlock and discard unneeded inode. */
1164 lockmgr(&vp->v_lock, LK_RELEASE, &vp->v_interlock);
1165 lfs_vunref(vp);
1166 brelse(bp);
1167 *vpp = NULL;
1168 return (error);
1169 }
1170 dip = lfs_ifind(ump->um_lfs, ino, bp);
1171 if (dip == NULL) {
1172 /* Assume write has not completed yet; try again */
1173 bp->b_flags |= B_INVAL;
1174 brelse(bp);
1175 ++retries;
1176 if (retries > LFS_IFIND_RETRIES)
1177 panic("lfs_fastvget: dinode not found");
1178 printf("lfs_fastvget: dinode not found, retrying...\n");
1179 goto again;
1180 }
1181 ip->i_din.ffs_din = *dip;
1182 brelse(bp);
1183 }
1184 ip->i_ffs_effnlink = ip->i_ffs_nlink;
1185 ip->i_lfs_effnblks = ip->i_ffs_blocks;
1186 ip->i_lfs_osize = ip->i_ffs_size;
1187
1188 memset(ip->i_lfs_fragsize, 0, NDADDR * sizeof(*ip->i_lfs_fragsize));
1189 for (i = 0; i < NDADDR; i++)
1190 if (ip->i_ffs_db[i] != 0)
1191 ip->i_lfs_fragsize[i] = blksize(fs, ip, i);
1192
1193 /*
1194 * Initialize the vnode from the inode, check for aliases. In all
1195 * cases re-init ip, the underlying vnode/inode may have changed.
1196 */
1197 ufs_vinit(mp, lfs_specop_p, lfs_fifoop_p, &vp);
1198 #ifdef DEBUG_LFS
1199 if (vp->v_type == VNON) {
1200 printf("lfs_fastvget: ino %d is type VNON! (ifmt=%o, dinp=%p)\n",
1201 ip->i_number, (ip->i_ffs_mode & IFMT) >> 12, dinp);
1202 lfs_dump_dinode(&ip->i_din.ffs_din);
1203 #ifdef DDB
1204 Debugger();
1205 #endif
1206 }
1207 #endif /* DEBUG_LFS */
1208 /*
1209 * Finish inode initialization now that aliasing has been resolved.
1210 */
1211
1212 genfs_node_init(vp, &lfs_genfsops);
1213 ip->i_devvp = ump->um_devvp;
1214 VREF(ip->i_devvp);
1215 *vpp = vp;
1216 KASSERT(VOP_ISLOCKED(vp));
1217 VOP_UNLOCK(vp, 0);
1218
1219 uvm_vnp_setsize(vp, ip->i_ffs_size);
1220
1221 return (0);
1222 }
1223
1224 void
1225 lfs_fakebuf_iodone(struct buf *bp)
1226 {
1227 struct buf *obp = bp->b_saveaddr;
1228
1229 if (!(obp->b_flags & (B_DELWRI | B_DONE)))
1230 obp->b_flags |= B_INVAL;
1231 bp->b_saveaddr = (caddr_t)(VTOI(obp->b_vp)->i_lfs);
1232 brelse(obp);
1233 lfs_callback(bp);
1234 }
1235
1236 struct buf *
1237 lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, caddr_t uaddr)
1238 {
1239 struct buf *bp;
1240 int error;
1241
1242 struct buf *obp;
1243
1244 KASSERT(VTOI(vp)->i_number != LFS_IFILE_INUM);
1245
1246 /*
1247 * make corresponding buffer busy to avoid
1248 * reading blocks that isn't written yet.
1249 * it's needed because we'll update metadatas in lfs_updatemeta
1250 * before data pointed by them is actually written to disk.
1251 *
1252 * XXX no need to allocbuf.
1253 *
1254 * XXX this can cause buf starvation.
1255 */
1256 obp = getblk(vp, lbn, size, 0, 0);
1257 if (obp == NULL)
1258 panic("lfs_fakebuf: getblk failed");
1259
1260 bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size, LFS_NB_CLEAN);
1261 error = copyin(uaddr, bp->b_data, size);
1262 if (error) {
1263 lfs_freebuf(fs, bp);
1264 return NULL;
1265 }
1266 bp->b_saveaddr = obp;
1267 KDASSERT(bp->b_iodone == lfs_callback);
1268 bp->b_iodone = lfs_fakebuf_iodone;
1269
1270 #ifdef DIAGNOSTIC
1271 if (obp->b_flags & B_GATHERED)
1272 panic("lfs_fakebuf: gathered bp: %p, ino=%u, lbn=%d",
1273 bp, VTOI(vp)->i_number, lbn);
1274 #endif
1275 #if 0
1276 bp->b_saveaddr = (caddr_t)fs;
1277 ++fs->lfs_iocount;
1278 #endif
1279 bp->b_bufsize = size;
1280 bp->b_bcount = size;
1281 return (bp);
1282 }
1283