lfs_inode.c revision 1.32 1 /* $NetBSD: lfs_inode.c,v 1.32 2000/03/30 12:41:13 augustss Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38 /*
39 * Copyright (c) 1986, 1989, 1991, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 * must display the following acknowledgement:
52 * This product includes software developed by the University of
53 * California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)lfs_inode.c 8.9 (Berkeley) 5/8/95
71 */
72
73 #if defined(_KERNEL) && !defined(_LKM)
74 #include "opt_quota.h"
75 #endif
76
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/mount.h>
80 #include <sys/proc.h>
81 #include <sys/file.h>
82 #include <sys/buf.h>
83 #include <sys/vnode.h>
84 #include <sys/kernel.h>
85 #include <sys/malloc.h>
86
87 #include <vm/vm.h>
88
89 #include <ufs/ufs/quota.h>
90 #include <ufs/ufs/inode.h>
91 #include <ufs/ufs/ufsmount.h>
92 #include <ufs/ufs/ufs_extern.h>
93
94 #include <ufs/lfs/lfs.h>
95 #include <ufs/lfs/lfs_extern.h>
96
97 static int lfs_vinvalbuf __P((struct vnode *, struct ucred *, struct proc *, ufs_daddr_t));
98
99 /* Search a block for a specific dinode. */
100 struct dinode *
101 lfs_ifind(fs, ino, bp)
102 struct lfs *fs;
103 ino_t ino;
104 struct buf *bp;
105 {
106 int cnt;
107 struct dinode *dip = (struct dinode *)bp->b_data;
108 struct dinode *ldip;
109
110 for (cnt = INOPB(fs), ldip = dip + (cnt - 1); cnt--; --ldip)
111 if (ldip->di_inumber == ino)
112 return (ldip);
113
114 printf("offset is %d (seg %d)\n", fs->lfs_offset, datosn(fs,fs->lfs_offset));
115 printf("block is %d (seg %d)\n", bp->b_blkno, datosn(fs,bp->b_blkno));
116 panic("lfs_ifind: dinode %u not found", ino);
117 /* NOTREACHED */
118 }
119
120 int
121 lfs_update(v)
122 void *v;
123 {
124 struct vop_update_args /* {
125 struct vnode *a_vp;
126 struct timespec *a_access;
127 struct timespec *a_modify;
128 int a_waitfor;
129 } */ *ap = v;
130 struct inode *ip;
131 struct vnode *vp = ap->a_vp;
132 int mod, oflag;
133 struct timespec ts;
134 struct lfs *fs = VFSTOUFS(vp->v_mount)->um_lfs;
135
136 if (vp->v_mount->mnt_flag & MNT_RDONLY)
137 return (0);
138 ip = VTOI(vp);
139
140 /*
141 * If we are called from vinvalbuf, and the file's blocks have
142 * already been scheduled for writing, but the writes have not
143 * yet completed, lfs_vflush will not be called, and vinvalbuf
144 * will cause a panic. So, we must wait until any pending write
145 * for our inode completes, if we are called with LFS_SYNC set.
146 */
147 while((ap->a_waitfor & LFS_SYNC) && WRITEINPROG(vp)) {
148 #ifdef DEBUG_LFS
149 printf("lfs_update: sleeping on inode %d (in-progress)\n",ip->i_number);
150 #endif
151 tsleep(vp, (PRIBIO+1), "lfs_update", 0);
152 }
153 mod = ip->i_flag & IN_MODIFIED;
154 oflag = ip->i_flag;
155 TIMEVAL_TO_TIMESPEC(&time, &ts);
156 LFS_ITIMES(ip,
157 ap->a_access ? ap->a_access : &ts,
158 ap->a_modify ? ap->a_modify : &ts, &ts);
159 if (!mod && (ip->i_flag & IN_MODIFIED))
160 ip->i_lfs->lfs_uinodes++;
161 if ((ip->i_flag & (IN_MODIFIED|IN_CLEANING)) == 0) {
162 return (0);
163 }
164
165 /* If sync, push back the vnode and any dirty blocks it may have. */
166 if(ap->a_waitfor & LFS_SYNC) {
167 /* Avoid flushing VDIROP. */
168 ++fs->lfs_diropwait;
169 while(vp->v_flag & VDIROP) {
170 #ifdef DEBUG_LFS
171 printf("lfs_update: sleeping on inode %d (dirops)\n",ip->i_number);
172 #endif
173 if(fs->lfs_dirops==0)
174 lfs_flush_fs(vp->v_mount,SEGM_SYNC);
175 else
176 tsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", 0);
177 /* XXX KS - by falling out here, are we writing the vn
178 twice? */
179 }
180 --fs->lfs_diropwait;
181 return lfs_vflush(vp);
182 }
183 return 0;
184 }
185
186 /* Update segment usage information when removing a block. */
187 #define UPDATE_SEGUSE \
188 if (lastseg != -1) { \
189 LFS_SEGENTRY(sup, fs, lastseg, sup_bp); \
190 if (num > sup->su_nbytes) { \
191 printf("lfs_truncate: negative bytes: segment %d short by %d\n", \
192 lastseg, num - sup->su_nbytes); \
193 panic("lfs_truncate: negative bytes"); \
194 sup->su_nbytes = 0; \
195 } else \
196 sup->su_nbytes -= num; \
197 e1 = VOP_BWRITE(sup_bp); \
198 fragsreleased += numfrags(fs, num); \
199 }
200
201 #define SEGDEC(S) { \
202 if (daddr != 0) { \
203 if (lastseg != (seg = datosn(fs, daddr))) { \
204 UPDATE_SEGUSE; \
205 num = (S); \
206 lastseg = seg; \
207 } else \
208 num += (S); \
209 } \
210 }
211
212 /*
213 * Truncate the inode ip to at most length size. Update segment usage
214 * table information.
215 */
216 /* ARGSUSED */
217 int
218 lfs_truncate(v)
219 void *v;
220 {
221 struct vop_truncate_args /* {
222 struct vnode *a_vp;
223 off_t a_length;
224 int a_flags;
225 struct ucred *a_cred;
226 struct proc *a_p;
227 } */ *ap = v;
228 struct indir *inp;
229 int i;
230 ufs_daddr_t *daddrp;
231 struct vnode *vp = ap->a_vp;
232 off_t length = ap->a_length;
233 struct buf *bp, *sup_bp;
234 struct ifile *ifp;
235 struct inode *ip;
236 struct lfs *fs;
237 struct indir a[NIADDR + 2], a_end[NIADDR + 2];
238 SEGUSE *sup;
239 ufs_daddr_t daddr, lastblock, lbn, olastblock;
240 ufs_daddr_t oldsize_lastblock, oldsize_newlast, newsize;
241 long off, a_released, fragsreleased, i_released;
242 int e1, e2, depth, lastseg, num, offset, seg, freesize, s;
243
244 ip = VTOI(vp);
245
246 if (vp->v_type == VLNK &&
247 (ip->i_ffs_size < vp->v_mount->mnt_maxsymlinklen ||
248 (vp->v_mount->mnt_maxsymlinklen == 0 &&
249 ip->i_din.ffs_din.di_blocks == 0))) {
250 #ifdef DIAGNOSTIC
251 if (length != 0)
252 panic("lfs_truncate: partial truncate of symlink");
253 #endif
254 bzero((char *)&ip->i_ffs_shortlink, (u_int)ip->i_ffs_size);
255 ip->i_ffs_size = 0;
256 ip->i_flag |= IN_CHANGE | IN_UPDATE;
257 return (VOP_UPDATE(vp, NULL, NULL, 0));
258 }
259 uvm_vnp_setsize(vp, length);
260
261 fs = ip->i_lfs;
262 lfs_imtime(fs);
263
264 /* If length is larger than the file, just update the times. */
265 if (ip->i_ffs_size <= length) {
266 ip->i_flag |= IN_CHANGE | IN_UPDATE;
267 return (VOP_UPDATE(vp, NULL, NULL, 0));
268 }
269
270 /*
271 * Make sure no writes happen while we're truncating.
272 * Otherwise, blocks which are accounted for on the inode
273 * *and* which have been created for cleaning can coexist,
274 * and cause us to overcount, and panic below.
275 *
276 * XXX KS - too restrictive? Maybe only when cleaning?
277 */
278 while(fs->lfs_seglock && fs->lfs_lockpid != ap->a_p->p_pid) {
279 tsleep(&fs->lfs_seglock, (PRIBIO+1), "lfs_truncate", 0);
280 }
281
282 /*
283 * Calculate index into inode's block list of last direct and indirect
284 * blocks (if any) which we want to keep. Lastblock is 0 when the
285 * file is truncated to 0.
286 */
287 lastblock = lblkno(fs, length + fs->lfs_bsize - 1);
288 olastblock = lblkno(fs, ip->i_ffs_size + fs->lfs_bsize - 1) - 1;
289
290 /*
291 * Update the size of the file. If the file is not being truncated to
292 * a block boundry, the contents of the partial block following the end
293 * must be zero'd in case it ever becomes accessible again
294 * because of subsequent file growth. For this part of the code,
295 * oldsize_newlast refers to the old size of the new last block in the
296 * file.
297 */
298 offset = blkoff(fs, length);
299 lbn = lblkno(fs, length);
300 oldsize_newlast = blksize(fs, ip, lbn);
301
302 /* Now set oldsize to the current size of the current last block */
303 oldsize_lastblock = blksize(fs, ip, olastblock);
304 if (offset == 0)
305 ip->i_ffs_size = length;
306 else {
307 #ifdef QUOTA
308 if ((e1 = getinoquota(ip)) != 0)
309 return (e1);
310 #endif
311 if ((e1 = bread(vp, lbn, oldsize_newlast, NOCRED, &bp)) != 0) {
312 printf("lfs_truncate: bread: %d\n",e1);
313 brelse(bp);
314 return (e1);
315 }
316 ip->i_ffs_size = length;
317 (void)uvm_vnp_uncache(vp);
318 newsize = blksize(fs, ip, lbn);
319 bzero((char *)bp->b_data + offset, (u_int)(newsize - offset));
320 #ifdef DEBUG
321 if(bp->b_flags & B_CALL)
322 panic("Can't allocbuf malloced buffer!");
323 else
324 #endif
325 allocbuf(bp, newsize);
326 if(oldsize_newlast > newsize)
327 ip->i_ffs_blocks -= btodb(oldsize_newlast - newsize);
328 if ((e1 = VOP_BWRITE(bp)) != 0) {
329 printf("lfs_truncate: bwrite: %d\n",e1);
330 return (e1);
331 }
332 }
333 /*
334 * Modify sup->su_nbyte counters for each deleted block; keep track
335 * of number of blocks removed for ip->i_ffs_blocks.
336 */
337 fragsreleased = 0;
338 num = 0;
339 lastseg = -1;
340
341 for (lbn = olastblock; lbn >= lastblock;) {
342 /* XXX use run length from bmap array to make this faster */
343 ufs_bmaparray(vp, lbn, &daddr, a, &depth, NULL);
344 if (lbn == olastblock) {
345 for (i = NIADDR + 2; i--;)
346 a_end[i] = a[i];
347 freesize = oldsize_lastblock;
348 } else
349 freesize = fs->lfs_bsize;
350 switch (depth) {
351 case 0: /* Direct block. */
352 daddr = ip->i_ffs_db[lbn];
353 SEGDEC(freesize);
354 ip->i_ffs_db[lbn] = 0;
355 --lbn;
356 break;
357 #ifdef DIAGNOSTIC
358 case 1: /* An indirect block. */
359 panic("lfs_truncate: ufs_bmaparray returned depth 1");
360 /* NOTREACHED */
361 #endif
362 default: /* Chain of indirect blocks. */
363 inp = a + --depth;
364 if (inp->in_off > 0 && lbn != lastblock) {
365 lbn -= inp->in_off < lbn - lastblock ?
366 inp->in_off : lbn - lastblock;
367 break;
368 }
369 for (; depth && (inp->in_off == 0 || lbn == lastblock);
370 --inp, --depth) {
371 if (bread(vp,
372 inp->in_lbn, fs->lfs_bsize, NOCRED, &bp))
373 panic("lfs_truncate: bread bno %d",
374 inp->in_lbn);
375 daddrp = (ufs_daddr_t *)bp->b_data + inp->in_off;
376 for (i = inp->in_off;
377 i++ <= a_end[depth].in_off;) {
378 daddr = *daddrp++;
379 SEGDEC(freesize);
380 }
381 a_end[depth].in_off = NINDIR(fs) - 1;
382 if (inp->in_off == 0)
383 brelse (bp);
384 else {
385 bzero((ufs_daddr_t *)bp->b_data +
386 inp->in_off, fs->lfs_bsize -
387 inp->in_off * sizeof(ufs_daddr_t));
388 if ((e1 = VOP_BWRITE(bp)) != 0) {
389 printf("lfs_truncate: indir bwrite: %d\n",e1);
390 return (e1);
391 }
392 }
393 }
394 if (depth == 0 && a[1].in_off == 0) {
395 off = a[0].in_off;
396 daddr = ip->i_ffs_ib[off];
397 SEGDEC(freesize);
398 ip->i_ffs_ib[off] = 0;
399 }
400 if (lbn == lastblock || lbn <= NDADDR)
401 --lbn;
402 else {
403 lbn -= NINDIR(fs);
404 if (lbn < lastblock)
405 lbn = lastblock;
406 }
407 }
408 }
409 UPDATE_SEGUSE;
410
411 /* If truncating the file to 0, update the version number. */
412 if (length == 0) {
413 LFS_IENTRY(ifp, fs, ip->i_number, bp);
414 ++ifp->if_version;
415 (void) VOP_BWRITE(bp);
416 }
417 #ifdef DIAGNOSTIC
418 if (ip->i_ffs_blocks < fragstodb(fs, fragsreleased)) {
419 panic("lfs_truncate: frag count < 0 (%d<%ld), ino %d\n",
420 ip->i_ffs_blocks, fragstodb(fs, fragsreleased),
421 ip->i_number);
422 fragsreleased = dbtofrags(fs, ip->i_ffs_blocks);
423 }
424 #endif
425 ip->i_ffs_blocks -= fragstodb(fs, fragsreleased);
426 fs->lfs_bfree += fragstodb(fs, fragsreleased);
427 ip->i_flag |= IN_CHANGE | IN_UPDATE;
428 /*
429 * Traverse dirty block list counting number of dirty buffers
430 * that are being deleted out of the cache, so that the lfs_avail
431 * field can be updated.
432 */
433 a_released = 0;
434 i_released = 0;
435
436 s = splbio();
437 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {
438
439 /* XXX KS - Don't miscount if we're not truncating to zero. */
440 if(length>0 && !(bp->b_lblkno >= 0 && bp->b_lblkno > lastblock)
441 && !(bp->b_lblkno < 0 && bp->b_lblkno < -lastblock-NIADDR))
442 continue;
443
444 if (bp->b_flags & B_LOCKED) {
445 a_released += numfrags(fs, bp->b_bcount);
446 /*
447 * XXX
448 * When buffers are created in the cache, their block
449 * number is set equal to their logical block number.
450 * If that is still true, we are assuming that the
451 * blocks are new (not yet on disk) and weren't
452 * counted above. However, there is a slight chance
453 * that a block's disk address is equal to its logical
454 * block number in which case, we'll get an overcounting
455 * here.
456 */
457 if (bp->b_blkno == bp->b_lblkno) {
458 i_released += numfrags(fs, bp->b_bcount);
459 }
460 }
461 }
462 splx(s);
463 fragsreleased = i_released;
464 #ifdef DIAGNOSTIC
465 if (fragsreleased > dbtofrags(fs, ip->i_ffs_blocks)) {
466 printf("lfs_inode: %ld frags released > %d in inode %d\n",
467 fragsreleased, dbtofrags(fs, ip->i_ffs_blocks),
468 ip->i_number);
469 fragsreleased = dbtofrags(fs, ip->i_ffs_blocks);
470 }
471 #endif
472 fs->lfs_bfree += fragstodb(fs, fragsreleased);
473 ip->i_ffs_blocks -= fragstodb(fs, fragsreleased);
474 #ifdef DIAGNOSTIC
475 if (length == 0 && ip->i_ffs_blocks != 0) {
476 printf("lfs_inode: trunc to zero, but %d blocks left on inode %d\n",
477 ip->i_ffs_blocks, ip->i_number);
478 panic("lfs_inode\n");
479 }
480 #endif
481 fs->lfs_avail += fragstodb(fs, a_released);
482 if(length>0)
483 e1 = lfs_vinvalbuf(vp, ap->a_cred, ap->a_p, lastblock-1);
484 else
485 e1 = vinvalbuf(vp, 0, ap->a_cred, ap->a_p, 0, 0);
486 e2 = VOP_UPDATE(vp, NULL, NULL, 0);
487 if(e1)
488 printf("lfs_truncate: vinvalbuf: %d\n",e1);
489 if(e2)
490 printf("lfs_truncate: update: %d\n",e2);
491
492 return (e1 ? e1 : e2 ? e2 : 0);
493 }
494
495 /*
496 * Get rid of blocks a la vinvalbuf; but only blocks that are of a higher
497 * lblkno than the file size allows.
498 */
499 int
500 lfs_vinvalbuf(vp, cred, p, maxblk)
501 struct vnode *vp;
502 struct ucred *cred;
503 struct proc *p;
504 ufs_daddr_t maxblk;
505 {
506 struct buf *bp;
507 struct buf *nbp, *blist;
508 int i, s, error, dirty;
509
510 top:
511 dirty=0;
512 for (i=0;i<2;i++) {
513 if(i==0)
514 blist = vp->v_cleanblkhd.lh_first;
515 else /* i == 1 */
516 blist = vp->v_dirtyblkhd.lh_first;
517
518 s = splbio();
519 for (bp = blist; bp; bp = nbp) {
520 nbp = bp->b_vnbufs.le_next;
521
522 if (bp->b_flags & B_GATHERED) {
523 error = tsleep(vp, PRIBIO+1, "lfs_vin2", 0);
524 splx(s);
525 if(error)
526 return error;
527 goto top;
528 }
529 if (bp->b_flags & B_BUSY) {
530 bp->b_flags |= B_WANTED;
531 error = tsleep((caddr_t)bp,
532 (PRIBIO + 1), "lfs_vinval", 0);
533 if (error) {
534 splx(s);
535 return (error);
536 }
537 goto top;
538 }
539
540 bp->b_flags |= B_BUSY;
541 if((bp->b_lblkno >= 0 && bp->b_lblkno > maxblk)
542 || (bp->b_lblkno < 0 && bp->b_lblkno < -maxblk-(NIADDR-1)))
543 {
544 bp->b_flags |= B_INVAL | B_VFLUSH;
545 if(bp->b_flags & B_CALL) {
546 lfs_freebuf(bp);
547 } else {
548 brelse(bp);
549 }
550 ++dirty;
551 } else {
552 /*
553 * This buffer is still on its free list.
554 * So don't brelse, but wake up any sleepers.
555 */
556 bp->b_flags &= ~B_BUSY;
557 if(bp->b_flags & B_WANTED) {
558 bp->b_flags &= ~(B_WANTED|B_AGE);
559 wakeup(bp);
560 }
561 }
562 }
563 splx(s);
564 }
565 if(dirty)
566 goto top;
567 return (0);
568 }
569