lfs_inode.c revision 1.35 1 /* $NetBSD: lfs_inode.c,v 1.35 2000/05/05 20:59:21 perseant Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38 /*
39 * Copyright (c) 1986, 1989, 1991, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 * must display the following acknowledgement:
52 * This product includes software developed by the University of
53 * California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)lfs_inode.c 8.9 (Berkeley) 5/8/95
71 */
72
73 #if defined(_KERNEL) && !defined(_LKM)
74 #include "opt_quota.h"
75 #endif
76
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/mount.h>
80 #include <sys/proc.h>
81 #include <sys/file.h>
82 #include <sys/buf.h>
83 #include <sys/vnode.h>
84 #include <sys/kernel.h>
85 #include <sys/malloc.h>
86
87 #include <vm/vm.h>
88
89 #include <ufs/ufs/quota.h>
90 #include <ufs/ufs/inode.h>
91 #include <ufs/ufs/ufsmount.h>
92 #include <ufs/ufs/ufs_extern.h>
93
94 #include <ufs/lfs/lfs.h>
95 #include <ufs/lfs/lfs_extern.h>
96
97 static int lfs_vinvalbuf __P((struct vnode *, struct ucred *, struct proc *, ufs_daddr_t));
98 extern int locked_queue_count;
99 extern long locked_queue_bytes;
100
101 /* Search a block for a specific dinode. */
102 struct dinode *
103 lfs_ifind(fs, ino, bp)
104 struct lfs *fs;
105 ino_t ino;
106 struct buf *bp;
107 {
108 int cnt;
109 struct dinode *dip = (struct dinode *)bp->b_data;
110 struct dinode *ldip;
111
112 for (cnt = INOPB(fs), ldip = dip + (cnt - 1); cnt--; --ldip)
113 if (ldip->di_inumber == ino)
114 return (ldip);
115
116 printf("offset is %d (seg %d)\n", fs->lfs_offset, datosn(fs,fs->lfs_offset));
117 printf("block is %d (seg %d)\n", bp->b_blkno, datosn(fs,bp->b_blkno));
118 panic("lfs_ifind: dinode %u not found", ino);
119 /* NOTREACHED */
120 }
121
122 int
123 lfs_update(v)
124 void *v;
125 {
126 struct vop_update_args /* {
127 struct vnode *a_vp;
128 struct timespec *a_access;
129 struct timespec *a_modify;
130 int a_waitfor;
131 } */ *ap = v;
132 struct inode *ip;
133 struct vnode *vp = ap->a_vp;
134 int mod, oflag;
135 struct timespec ts;
136 struct lfs *fs = VFSTOUFS(vp->v_mount)->um_lfs;
137
138 if (vp->v_mount->mnt_flag & MNT_RDONLY)
139 return (0);
140 ip = VTOI(vp);
141
142 /*
143 * If we are called from vinvalbuf, and the file's blocks have
144 * already been scheduled for writing, but the writes have not
145 * yet completed, lfs_vflush will not be called, and vinvalbuf
146 * will cause a panic. So, we must wait until any pending write
147 * for our inode completes, if we are called with LFS_SYNC set.
148 */
149 while((ap->a_waitfor & LFS_SYNC) && WRITEINPROG(vp)) {
150 #ifdef DEBUG_LFS
151 printf("lfs_update: sleeping on inode %d (in-progress)\n",ip->i_number);
152 #endif
153 tsleep(vp, (PRIBIO+1), "lfs_update", 0);
154 }
155 mod = ip->i_flag & IN_MODIFIED;
156 oflag = ip->i_flag;
157 TIMEVAL_TO_TIMESPEC(&time, &ts);
158 LFS_ITIMES(ip,
159 ap->a_access ? ap->a_access : &ts,
160 ap->a_modify ? ap->a_modify : &ts, &ts);
161 if (!mod && (ip->i_flag & IN_MODIFIED))
162 ip->i_lfs->lfs_uinodes++;
163 if ((ip->i_flag & (IN_MODIFIED|IN_CLEANING)) == 0) {
164 return (0);
165 }
166
167 /* If sync, push back the vnode and any dirty blocks it may have. */
168 if(ap->a_waitfor & LFS_SYNC) {
169 /* Avoid flushing VDIROP. */
170 ++fs->lfs_diropwait;
171 while(vp->v_flag & VDIROP) {
172 #ifdef DEBUG_LFS
173 printf("lfs_update: sleeping on inode %d (dirops)\n",ip->i_number);
174 #endif
175 if(fs->lfs_dirops==0)
176 lfs_flush_fs(vp->v_mount,SEGM_SYNC);
177 else
178 tsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", 0);
179 /* XXX KS - by falling out here, are we writing the vn
180 twice? */
181 }
182 --fs->lfs_diropwait;
183 return lfs_vflush(vp);
184 }
185 return 0;
186 }
187
188 /* Update segment usage information when removing a block. */
189 #define UPDATE_SEGUSE do { \
190 if (lastseg != -1) { \
191 LFS_SEGENTRY(sup, fs, lastseg, sup_bp); \
192 if (num > sup->su_nbytes) { \
193 printf("lfs_truncate: negative bytes: segment %d short by %d\n", \
194 lastseg, num - sup->su_nbytes); \
195 panic("lfs_truncate: negative bytes"); \
196 sup->su_nbytes = 0; \
197 } else \
198 sup->su_nbytes -= num; \
199 e1 = VOP_BWRITE(sup_bp); \
200 } \
201 fragsreleased += numfrags(fs, num); \
202 } while(0)
203
204 #define SEGDEC(S) do { \
205 if (daddr > 0) { \
206 if (lastseg != (seg = datosn(fs, daddr))) { \
207 UPDATE_SEGUSE; \
208 num = (S); \
209 lastseg = seg; \
210 } else \
211 num += (S); \
212 } else if (daddr == UNWRITTEN) { \
213 fragsreleased += numfrags(fs,(S)); \
214 } \
215 } while(0)
216
217 /*
218 * Truncate the inode ip to at most length size. Update segment usage
219 * table information.
220 */
221 /* ARGSUSED */
222 int
223 lfs_truncate(v)
224 void *v;
225 {
226 struct vop_truncate_args /* {
227 struct vnode *a_vp;
228 off_t a_length;
229 int a_flags;
230 struct ucred *a_cred;
231 struct proc *a_p;
232 } */ *ap = v;
233 struct indir *inp;
234 int i;
235 int error;
236 ufs_daddr_t *daddrp;
237 struct vnode *vp = ap->a_vp;
238 off_t length = ap->a_length;
239 struct buf *bp, *sup_bp;
240 struct ifile *ifp;
241 struct inode *ip;
242 struct lfs *fs;
243 struct indir a[NIADDR + 2], a_end[NIADDR + 2];
244 SEGUSE *sup;
245 ufs_daddr_t daddr, lastblock, lbn, olastblock;
246 ufs_daddr_t oldsize_lastblock, oldsize_newlast, newsize;
247 long off, a_released, fragsreleased;
248 int e1, e2, depth, lastseg, num, offset, seg, freesize, s;
249
250 if (length < 0)
251 return (EINVAL);
252
253 ip = VTOI(vp);
254 if (length == ip->i_ffs_size) /* XXX don't update times */
255 return 0;
256
257 if (vp->v_type == VLNK &&
258 (ip->i_ffs_size < vp->v_mount->mnt_maxsymlinklen ||
259 (vp->v_mount->mnt_maxsymlinklen == 0 &&
260 ip->i_din.ffs_din.di_blocks == 0))) {
261 #ifdef DIAGNOSTIC
262 if (length != 0)
263 panic("lfs_truncate: partial truncate of symlink");
264 #endif
265 bzero((char *)&ip->i_ffs_shortlink, (u_int)ip->i_ffs_size);
266 ip->i_ffs_size = 0;
267 ip->i_flag |= IN_CHANGE | IN_UPDATE;
268 return (VOP_UPDATE(vp, NULL, NULL, 0));
269 }
270
271 fs = ip->i_lfs;
272 lfs_imtime(fs);
273
274 /* If length is larger than the file, just update the times. */
275 if (ip->i_ffs_size < length) {
276 if (length > fs->lfs_maxfilesize)
277 return (EFBIG);
278 /*
279 * Allocate the new last block to ensure that any previously
280 * existing fragments get extended. (XXX Adding the new
281 * block is not really necessary.)
282 */
283 error = VOP_BALLOC(vp, length - 1, 1, ap->a_cred, 0, &bp);
284 if (error)
285 return (error);
286 VOP_BWRITE(bp);
287 ip->i_ffs_size = length;
288 uvm_vnp_setsize(vp, length);
289 (void) uvm_vnp_uncache(vp);
290 ip->i_flag |= IN_CHANGE | IN_UPDATE;
291 return (VOP_UPDATE(vp, NULL, NULL, 0));
292 }
293 uvm_vnp_setsize(vp, length);
294
295 /*
296 * Make sure no writes to this inode can happen while we're
297 * truncating. Otherwise, blocks which are accounted for on the
298 * inode *and* which have been created for cleaning can coexist,
299 * and cause an overcounting.
300 *
301 * (We don't need to *hold* the seglock, though, because we already
302 * hold the inode lock; draining the seglock is sufficient.)
303 */
304 if (vp != fs->lfs_unlockvp) {
305 while(fs->lfs_seglock) {
306 tsleep(&fs->lfs_seglock, PRIBIO+1, "lfs_truncate", 0);
307 }
308 }
309
310 /*
311 * Calculate index into inode's block list of last direct and indirect
312 * blocks (if any) which we want to keep. Lastblock is 0 when the
313 * file is truncated to 0.
314 */
315 lastblock = lblkno(fs, length + fs->lfs_bsize - 1);
316 olastblock = lblkno(fs, ip->i_ffs_size + fs->lfs_bsize - 1) - 1;
317
318 /*
319 * Update the size of the file. If the file is not being truncated to
320 * a block boundry, the contents of the partial block following the end
321 * must be zero'd in case it ever becomes accessible again
322 * because of subsequent file growth. For this part of the code,
323 * oldsize_newlast refers to the old size of the new last block in the
324 * file.
325 */
326 offset = blkoff(fs, length);
327 lbn = lblkno(fs, length);
328 oldsize_newlast = blksize(fs, ip, lbn);
329
330 /* Now set oldsize to the current size of the current last block */
331 oldsize_lastblock = blksize(fs, ip, olastblock);
332 if (offset == 0)
333 ip->i_ffs_size = length;
334 else {
335 #ifdef QUOTA
336 if ((e1 = getinoquota(ip)) != 0)
337 return (e1);
338 #endif
339 if ((e1 = bread(vp, lbn, oldsize_newlast, NOCRED, &bp)) != 0) {
340 printf("lfs_truncate: bread: %d\n",e1);
341 brelse(bp);
342 return (e1);
343 }
344 ip->i_ffs_size = length;
345 (void)uvm_vnp_uncache(vp);
346 newsize = blksize(fs, ip, lbn);
347 bzero((char *)bp->b_data + offset, (u_int)(newsize - offset));
348 #ifdef DEBUG
349 if(bp->b_flags & B_CALL)
350 panic("Can't allocbuf malloced buffer!");
351 else
352 #endif
353 allocbuf(bp, newsize);
354 if(bp->b_blkno != UNASSIGNED && oldsize_newlast > newsize) {
355 ip->i_ffs_blocks -= btodb(oldsize_newlast - newsize);
356 }
357 if ((e1 = VOP_BWRITE(bp)) != 0) {
358 printf("lfs_truncate: bwrite: %d\n",e1);
359 return (e1);
360 }
361 }
362 /*
363 * Modify sup->su_nbyte counters for each deleted block; keep track
364 * of number of blocks removed for ip->i_ffs_blocks.
365 */
366 fragsreleased = 0;
367 num = 0;
368 lastseg = -1;
369
370 for (lbn = olastblock; lbn >= lastblock;) {
371 /* XXX use run length from bmap array to make this faster */
372 ufs_bmaparray(vp, lbn, &daddr, a, &depth, NULL);
373 if (lbn == olastblock) {
374 for (i = NIADDR + 2; i--;)
375 a_end[i] = a[i];
376 freesize = oldsize_lastblock;
377 } else
378 freesize = fs->lfs_bsize;
379 switch (depth) {
380 case 0: /* Direct block. */
381 daddr = ip->i_ffs_db[lbn];
382 SEGDEC(freesize);
383 ip->i_ffs_db[lbn] = 0;
384 --lbn;
385 break;
386 #ifdef DIAGNOSTIC
387 case 1: /* An indirect block. */
388 panic("lfs_truncate: ufs_bmaparray returned depth 1");
389 /* NOTREACHED */
390 #endif
391 default: /* Chain of indirect blocks. */
392 inp = a + --depth;
393 if (inp->in_off > 0 && lbn != lastblock) {
394 lbn -= inp->in_off < lbn - lastblock ?
395 inp->in_off : lbn - lastblock;
396 break;
397 }
398 for (; depth && (inp->in_off == 0 || lbn == lastblock);
399 --inp, --depth) {
400 if (bread(vp,
401 inp->in_lbn, fs->lfs_bsize, NOCRED, &bp))
402 panic("lfs_truncate: bread bno %d",
403 inp->in_lbn);
404 daddrp = (ufs_daddr_t *)bp->b_data + inp->in_off;
405 for (i = inp->in_off; i++ <= a_end[depth].in_off;) {
406 daddr = *daddrp++;
407 SEGDEC(freesize);
408 }
409 a_end[depth].in_off = NINDIR(fs) - 1;
410 if (inp->in_off == 0) {
411 bp->b_flags |= B_INVAL;
412 brelse (bp);
413 } else {
414 bzero((ufs_daddr_t *)bp->b_data +
415 inp->in_off, fs->lfs_bsize -
416 inp->in_off * sizeof(ufs_daddr_t));
417 if ((e1 = VOP_BWRITE(bp)) != 0) {
418 printf("lfs_truncate: indir bwrite: %d\n",e1);
419 return (e1);
420 }
421 }
422 }
423 if (depth == 0 && a[1].in_off == 0) {
424 off = a[0].in_off;
425 daddr = ip->i_ffs_ib[off];
426 SEGDEC(freesize);
427 ip->i_ffs_ib[off] = 0;
428 }
429 if (lbn == lastblock || lbn <= NDADDR)
430 --lbn;
431 else {
432 lbn -= NINDIR(fs);
433 if (lbn < lastblock)
434 lbn = lastblock;
435 }
436 }
437 }
438 UPDATE_SEGUSE;
439
440 /* If truncating the file to 0, update the version number. */
441 if (length == 0) {
442 LFS_IENTRY(ifp, fs, ip->i_number, bp);
443 ++ifp->if_version;
444 (void) VOP_BWRITE(bp);
445 }
446 #ifdef DIAGNOSTIC
447 if (ip->i_ffs_blocks < fragstodb(fs, fragsreleased)) {
448 printf("lfs_truncate: frag count < 0 (%d<%ld), ino %d\n",
449 ip->i_ffs_blocks, fragstodb(fs, fragsreleased),
450 ip->i_number);
451 if (length > 0)
452 panic("lfs_truncate: frag count < 0");
453 fragsreleased = dbtofrags(fs, ip->i_ffs_blocks);
454 }
455 #endif
456 ip->i_ffs_blocks -= fragstodb(fs, fragsreleased);
457 fs->lfs_bfree += fragstodb(fs, fragsreleased);
458 ip->i_flag |= IN_CHANGE | IN_UPDATE;
459 /*
460 * Traverse dirty block list counting number of dirty buffers
461 * that are being deleted out of the cache, so that the lfs_avail
462 * field can be updated.
463 */
464 a_released = 0;
465
466 s = splbio();
467 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {
468 /* XXX KS - Don't miscount if we're not truncating to zero. */
469 if(length>0 && !(bp->b_lblkno >= 0 && bp->b_lblkno > lastblock)
470 && !(bp->b_lblkno < 0 && bp->b_lblkno < -lastblock-NIADDR))
471 continue;
472
473 if (bp->b_flags & B_LOCKED)
474 a_released += numfrags(fs, bp->b_bcount);
475 }
476 splx(s);
477
478 #ifdef DIAGNOSTIC
479 if (length == 0 && ip->i_ffs_blocks != 0) {
480 printf("lfs_inode: trunc to zero, but %d blocks left on inode %d\n",
481 ip->i_ffs_blocks, ip->i_number);
482 panic("lfs_inode: trunc to zero\n");
483 }
484 #endif
485 fs->lfs_avail += fragstodb(fs, a_released);
486 if(length>0)
487 e1 = lfs_vinvalbuf(vp, ap->a_cred, ap->a_p, lastblock-1);
488 else {
489 e1 = vinvalbuf(vp, 0, ap->a_cred, ap->a_p, 0, 0);
490 lfs_countlocked(&locked_queue_count,&locked_queue_bytes);
491 }
492 wakeup(&locked_queue_count);
493
494 if (length > 0) {
495 /*
496 * Allocate the new last block to ensure that any previously
497 * existing indirect blocks invalidated above are valid.
498 * (Adding the block is not really necessary.)
499 */
500 error = VOP_BALLOC(vp, length - 1, 1, ap->a_cred, 0, &bp);
501 if (error)
502 return (error);
503 VOP_BWRITE(bp);
504 }
505
506 e2 = VOP_UPDATE(vp, NULL, NULL, 0);
507
508 if(e1)
509 printf("lfs_truncate: vinvalbuf: %d\n",e1);
510 if(e2)
511 printf("lfs_truncate: update: %d\n",e2);
512
513 return (e1 ? e1 : e2 ? e2 : 0);
514 }
515
516 /*
517 * Get rid of blocks a la vinvalbuf; but only blocks that are of a higher
518 * lblkno than the file size allows.
519 */
520 int
521 lfs_vinvalbuf(vp, cred, p, maxblk)
522 struct vnode *vp;
523 struct ucred *cred;
524 struct proc *p;
525 ufs_daddr_t maxblk;
526 {
527 struct buf *bp;
528 struct buf *nbp, *blist;
529 int i, s, error, dirty;
530
531 top:
532 dirty=0;
533 for (i=0;i<2;i++) {
534 if(i==0)
535 blist = vp->v_cleanblkhd.lh_first;
536 else /* i == 1 */
537 blist = vp->v_dirtyblkhd.lh_first;
538
539 s = splbio();
540 for (bp = blist; bp; bp = nbp) {
541 nbp = bp->b_vnbufs.le_next;
542
543 if (bp->b_flags & B_GATHERED) {
544 printf("lfs_vinvalbuf: gathered block ino %d lbn %d\n",
545 VTOI(vp)->i_number, bp->b_lblkno);
546 error = tsleep(vp, PRIBIO+1, "lfs_vin2", 0);
547 splx(s);
548 if(error)
549 return error;
550 goto top;
551 }
552 if (bp->b_flags & B_BUSY) {
553 bp->b_flags |= B_WANTED;
554 error = tsleep((caddr_t)bp,
555 (PRIBIO + 1), "lfs_vinval", 0);
556 splx(s);
557 if (error)
558 return (error);
559 goto top;
560 }
561
562 bp->b_flags |= B_BUSY;
563 if((bp->b_lblkno >= 0 && bp->b_lblkno > maxblk)
564 || (bp->b_lblkno < 0 && bp->b_lblkno < -maxblk-(NIADDR-1)))
565 {
566 if(bp->b_flags & B_LOCKED) {
567 --locked_queue_count;
568 locked_queue_bytes -= bp->b_bufsize;
569 }
570 if(bp->b_flags & B_CALL) {
571 lfs_freebuf(bp);
572 } else {
573 bp->b_flags |= B_INVAL | B_VFLUSH;
574 brelse(bp);
575 }
576 ++dirty;
577 } else {
578 /*
579 * This buffer is still on its free list.
580 * So don't brelse, but wake up any sleepers.
581 */
582 bp->b_flags &= ~B_BUSY;
583 if(bp->b_flags & B_WANTED) {
584 bp->b_flags &= ~(B_WANTED|B_AGE);
585 wakeup(bp);
586 }
587 }
588 }
589 splx(s);
590 }
591 if(dirty)
592 goto top;
593 return (0);
594 }
595