ext2fs_alloc.c revision 1.11 1 /* $NetBSD: ext2fs_alloc.c,v 1.11 2001/08/24 10:24:46 wiz Exp $ */
2
3 /*
4 * Copyright (c) 1997 Manuel Bouyer.
5 * Copyright (c) 1982, 1986, 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
37 * Modified for ext2fs by Manuel Bouyer.
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/buf.h>
43 #include <sys/proc.h>
44 #include <sys/vnode.h>
45 #include <sys/mount.h>
46 #include <sys/kernel.h>
47 #include <sys/syslog.h>
48
49 #include <ufs/ufs/quota.h>
50 #include <ufs/ufs/inode.h>
51 #include <ufs/ufs/ufs_extern.h>
52
53 #include <ufs/ext2fs/ext2fs.h>
54 #include <ufs/ext2fs/ext2fs_extern.h>
55
56 u_long ext2gennumber;
57
58 static ufs_daddr_t ext2fs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
59 static u_long ext2fs_dirpref __P((struct m_ext2fs *));
60 static void ext2fs_fserr __P((struct m_ext2fs *, u_int, char *));
61 static u_long ext2fs_hashalloc __P((struct inode *, int, long, int,
62 ufs_daddr_t (*)(struct inode *, int, ufs_daddr_t,
63 int)));
64 static ufs_daddr_t ext2fs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
65 static ufs_daddr_t ext2fs_mapsearch __P((struct m_ext2fs *, char *, ufs_daddr_t));
66
67 /*
68 * Allocate a block in the file system.
69 *
70 * A preference may be optionally specified. If a preference is given
71 * the following hierarchy is used to allocate a block:
72 * 1) allocate the requested block.
73 * 2) allocate a rotationally optimal block in the same cylinder.
74 * 3) allocate a block in the same cylinder group.
75 * 4) quadradically rehash into other cylinder groups, until an
76 * available block is located.
77 * If no block preference is given the following hierarchy is used
78 * to allocate a block:
79 * 1) allocate a block in the cylinder group that contains the
80 * inode for the file.
81 * 2) quadradically rehash into other cylinder groups, until an
82 * available block is located.
83 */
84 int
85 ext2fs_alloc(ip, lbn, bpref, cred, bnp)
86 struct inode *ip;
87 ufs_daddr_t lbn, bpref;
88 struct ucred *cred;
89 ufs_daddr_t *bnp;
90 {
91 struct m_ext2fs *fs;
92 ufs_daddr_t bno;
93 int cg;
94
95 *bnp = 0;
96 fs = ip->i_e2fs;
97 #ifdef DIAGNOSTIC
98 if (cred == NOCRED)
99 panic("ext2fs_alloc: missing credential\n");
100 #endif /* DIAGNOSTIC */
101 if (fs->e2fs.e2fs_fbcount == 0)
102 goto nospace;
103 if (cred->cr_uid != 0 && freespace(fs) <= 0)
104 goto nospace;
105 if (bpref >= fs->e2fs.e2fs_bcount)
106 bpref = 0;
107 if (bpref == 0)
108 cg = ino_to_cg(fs, ip->i_number);
109 else
110 cg = dtog(fs, bpref);
111 bno = (ufs_daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
112 ext2fs_alloccg);
113 if (bno > 0) {
114 ip->i_e2fs_nblock += btodb(fs->e2fs_bsize);
115 ip->i_flag |= IN_CHANGE | IN_UPDATE;
116 *bnp = bno;
117 return (0);
118 }
119 nospace:
120 ext2fs_fserr(fs, cred->cr_uid, "file system full");
121 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
122 return (ENOSPC);
123 }
124
125 /*
126 * Allocate an inode in the file system.
127 *
128 * If allocating a directory, use ext2fs_dirpref to select the inode.
129 * If allocating in a directory, the following hierarchy is followed:
130 * 1) allocate the preferred inode.
131 * 2) allocate an inode in the same cylinder group.
132 * 3) quadradically rehash into other cylinder groups, until an
133 * available inode is located.
134 * If no inode preference is given the following hierarchy is used
135 * to allocate an inode:
136 * 1) allocate an inode in cylinder group 0.
137 * 2) quadradically rehash into other cylinder groups, until an
138 * available inode is located.
139 */
140 int
141 ext2fs_valloc(v)
142 void *v;
143 {
144 struct vop_valloc_args /* {
145 struct vnode *a_pvp;
146 int a_mode;
147 struct ucred *a_cred;
148 struct vnode **a_vpp;
149 } */ *ap = v;
150 struct vnode *pvp = ap->a_pvp;
151 struct inode *pip;
152 struct m_ext2fs *fs;
153 struct inode *ip;
154 mode_t mode = ap->a_mode;
155 ino_t ino, ipref;
156 int cg, error;
157
158 *ap->a_vpp = NULL;
159 pip = VTOI(pvp);
160 fs = pip->i_e2fs;
161 if (fs->e2fs.e2fs_ficount == 0)
162 goto noinodes;
163
164 if ((mode & IFMT) == IFDIR)
165 cg = ext2fs_dirpref(fs);
166 else
167 cg = ino_to_cg(fs, pip->i_number);
168 ipref = cg * fs->e2fs.e2fs_ipg + 1;
169 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
170 if (ino == 0)
171 goto noinodes;
172 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
173 if (error) {
174 VOP_VFREE(pvp, ino, mode);
175 return (error);
176 }
177 ip = VTOI(*ap->a_vpp);
178 if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) {
179 printf("mode = 0%o, nlinks %d, inum = %d, fs = %s\n",
180 ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number, fs->e2fs_fsmnt);
181 panic("ext2fs_valloc: dup alloc");
182 }
183
184 memset(&ip->i_din, 0, sizeof(ip->i_din));
185
186 /*
187 * Set up a new generation number for this inode.
188 */
189 if (++ext2gennumber < (u_long)time.tv_sec)
190 ext2gennumber = time.tv_sec;
191 ip->i_e2fs_gen = ext2gennumber;
192 return (0);
193 noinodes:
194 ext2fs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
195 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
196 return (ENOSPC);
197 }
198
199 /*
200 * Find a cylinder to place a directory.
201 *
202 * The policy implemented by this algorithm is to select from
203 * among those cylinder groups with above the average number of
204 * free inodes, the one with the smallest number of directories.
205 */
206 static u_long
207 ext2fs_dirpref(fs)
208 struct m_ext2fs *fs;
209 {
210 int cg, maxspace, mincg, avgifree;
211
212 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
213 maxspace = 0;
214 mincg = -1;
215 for (cg = 0; cg < fs->e2fs_ncg; cg++)
216 if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) {
217 if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) {
218 mincg = cg;
219 maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree;
220 }
221 }
222 return mincg;
223 }
224
225 /*
226 * Select the desired position for the next block in a file. The file is
227 * logically divided into sections. The first section is composed of the
228 * direct blocks. Each additional section contains fs_maxbpg blocks.
229 *
230 * If no blocks have been allocated in the first section, the policy is to
231 * request a block in the same cylinder group as the inode that describes
232 * the file. Otherwise, the policy is to try to allocate the blocks
233 * contigously. The two fields of the ext2 inode extension (see
234 * ufs/ufs/inode.h) help this.
235 */
236 ufs_daddr_t
237 ext2fs_blkpref(ip, lbn, indx, bap)
238 struct inode *ip;
239 ufs_daddr_t lbn;
240 int indx;
241 ufs_daddr_t *bap;
242 {
243 struct m_ext2fs *fs;
244 int cg, i;
245
246 fs = ip->i_e2fs;
247 /*
248 * if we are doing contigous lbn allocation, try to alloc blocks
249 * contigously on disk
250 */
251
252 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
253 return ip->i_e2fs_last_blk + 1;
254 }
255
256 /*
257 * bap, if provided, gives us a list of blocks to which we want to
258 * stay close
259 */
260
261 if (bap) {
262 for (i = indx; i >= 0 ; i--) {
263 if (bap[i]) {
264 return fs2h32(bap[i]) + 1;
265 }
266 }
267 }
268
269 /* fall back to the first block of the cylinder containing the inode */
270
271 cg = ino_to_cg(fs, ip->i_number);
272 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
273 }
274
275 /*
276 * Implement the cylinder overflow algorithm.
277 *
278 * The policy implemented by this algorithm is:
279 * 1) allocate the block in its requested cylinder group.
280 * 2) quadradically rehash on the cylinder group number.
281 * 3) brute force search for a free block.
282 */
283 static u_long
284 ext2fs_hashalloc(ip, cg, pref, size, allocator)
285 struct inode *ip;
286 int cg;
287 long pref;
288 int size; /* size for data blocks, mode for inodes */
289 ufs_daddr_t (*allocator) __P((struct inode *, int, ufs_daddr_t, int));
290 {
291 struct m_ext2fs *fs;
292 long result;
293 int i, icg = cg;
294
295 fs = ip->i_e2fs;
296 /*
297 * 1: preferred cylinder group
298 */
299 result = (*allocator)(ip, cg, pref, size);
300 if (result)
301 return (result);
302 /*
303 * 2: quadratic rehash
304 */
305 for (i = 1; i < fs->e2fs_ncg; i *= 2) {
306 cg += i;
307 if (cg >= fs->e2fs_ncg)
308 cg -= fs->e2fs_ncg;
309 result = (*allocator)(ip, cg, 0, size);
310 if (result)
311 return (result);
312 }
313 /*
314 * 3: brute force search
315 * Note that we start at i == 2, since 0 was checked initially,
316 * and 1 is always checked in the quadratic rehash.
317 */
318 cg = (icg + 2) % fs->e2fs_ncg;
319 for (i = 2; i < fs->e2fs_ncg; i++) {
320 result = (*allocator)(ip, cg, 0, size);
321 if (result)
322 return (result);
323 cg++;
324 if (cg == fs->e2fs_ncg)
325 cg = 0;
326 }
327 return (0);
328 }
329
330 /*
331 * Determine whether a block can be allocated.
332 *
333 * Check to see if a block of the appropriate size is available,
334 * and if it is, allocate it.
335 */
336
337 static ufs_daddr_t
338 ext2fs_alloccg(ip, cg, bpref, size)
339 struct inode *ip;
340 int cg;
341 ufs_daddr_t bpref;
342 int size;
343 {
344 struct m_ext2fs *fs;
345 char *bbp;
346 struct buf *bp;
347 int error, bno, start, end, loc;
348
349 fs = ip->i_e2fs;
350 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
351 return (0);
352 error = bread(ip->i_devvp, fsbtodb(fs,
353 fs->e2fs_gd[cg].ext2bgd_b_bitmap),
354 (int)fs->e2fs_bsize, NOCRED, &bp);
355 if (error) {
356 brelse(bp);
357 return (0);
358 }
359 bbp = (char *)bp->b_data;
360
361 if (dtog(fs, bpref) != cg)
362 bpref = 0;
363 if (bpref != 0) {
364 bpref = dtogd(fs, bpref);
365 /*
366 * if the requested block is available, use it
367 */
368 if (isclr(bbp, bpref)) {
369 bno = bpref;
370 goto gotit;
371 }
372 }
373 /*
374 * no blocks in the requested cylinder, so take next
375 * available one in this cylinder group.
376 * first try to get 8 contigous blocks, then fall back to a single
377 * block.
378 */
379 if (bpref)
380 start = dtogd(fs, bpref) / NBBY;
381 else
382 start = 0;
383 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
384 for (loc = start; loc < end; loc++) {
385 if (bbp[loc] == 0) {
386 bno = loc * NBBY;
387 goto gotit;
388 }
389 }
390 for (loc = 0; loc < start; loc++) {
391 if (bbp[loc] == 0) {
392 bno = loc * NBBY;
393 goto gotit;
394 }
395 }
396
397 bno = ext2fs_mapsearch(fs, bbp, bpref);
398 if (bno < 0)
399 return (0);
400 gotit:
401 #ifdef DIAGNOSTIC
402 if (isset(bbp, (long)bno)) {
403 printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
404 cg, bno, fs->e2fs_fsmnt);
405 panic("ext2fs_alloccg: dup alloc");
406 }
407 #endif
408 setbit(bbp, (long)bno);
409 fs->e2fs.e2fs_fbcount--;
410 fs->e2fs_gd[cg].ext2bgd_nbfree--;
411 fs->e2fs_fmod = 1;
412 bdwrite(bp);
413 return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno);
414 }
415
416 /*
417 * Determine whether an inode can be allocated.
418 *
419 * Check to see if an inode is available, and if it is,
420 * allocate it using the following policy:
421 * 1) allocate the requested inode.
422 * 2) allocate the next available inode after the requested
423 * inode in the specified cylinder group.
424 */
425 static ufs_daddr_t
426 ext2fs_nodealloccg(ip, cg, ipref, mode)
427 struct inode *ip;
428 int cg;
429 ufs_daddr_t ipref;
430 int mode;
431 {
432 struct m_ext2fs *fs;
433 char *ibp;
434 struct buf *bp;
435 int error, start, len, loc, map, i;
436
437 ipref--; /* to avoid a lot of (ipref -1) */
438 fs = ip->i_e2fs;
439 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
440 return (0);
441 error = bread(ip->i_devvp, fsbtodb(fs,
442 fs->e2fs_gd[cg].ext2bgd_i_bitmap),
443 (int)fs->e2fs_bsize, NOCRED, &bp);
444 if (error) {
445 brelse(bp);
446 return (0);
447 }
448 ibp = (char *)bp->b_data;
449 if (ipref) {
450 ipref %= fs->e2fs.e2fs_ipg;
451 if (isclr(ibp, ipref))
452 goto gotit;
453 }
454 start = ipref / NBBY;
455 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
456 loc = skpc(0xff, len, &ibp[start]);
457 if (loc == 0) {
458 len = start + 1;
459 start = 0;
460 loc = skpc(0xff, len, &ibp[0]);
461 if (loc == 0) {
462 printf("cg = %d, ipref = %d, fs = %s\n",
463 cg, ipref, fs->e2fs_fsmnt);
464 panic("ext2fs_nodealloccg: map corrupted");
465 /* NOTREACHED */
466 }
467 }
468 i = start + len - loc;
469 map = ibp[i];
470 ipref = i * NBBY;
471 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
472 if ((map & i) == 0) {
473 goto gotit;
474 }
475 }
476 printf("fs = %s\n", fs->e2fs_fsmnt);
477 panic("ext2fs_nodealloccg: block not in map");
478 /* NOTREACHED */
479 gotit:
480 setbit(ibp, ipref);
481 fs->e2fs.e2fs_ficount--;
482 fs->e2fs_gd[cg].ext2bgd_nifree--;
483 fs->e2fs_fmod = 1;
484 if ((mode & IFMT) == IFDIR) {
485 fs->e2fs_gd[cg].ext2bgd_ndirs++;
486 }
487 bdwrite(bp);
488 return (cg * fs->e2fs.e2fs_ipg + ipref +1);
489 }
490
491 /*
492 * Free a block.
493 *
494 * The specified block is placed back in the
495 * free map.
496 */
497 void
498 ext2fs_blkfree(ip, bno)
499 struct inode *ip;
500 ufs_daddr_t bno;
501 {
502 struct m_ext2fs *fs;
503 char *bbp;
504 struct buf *bp;
505 int error, cg;
506
507 fs = ip->i_e2fs;
508 cg = dtog(fs, bno);
509 if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
510 printf("bad block %d, ino %d\n", bno, ip->i_number);
511 ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block");
512 return;
513 }
514 error = bread(ip->i_devvp,
515 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
516 (int)fs->e2fs_bsize, NOCRED, &bp);
517 if (error) {
518 brelse(bp);
519 return;
520 }
521 bbp = (char *)bp->b_data;
522 bno = dtogd(fs, bno);
523 if (isclr(bbp, bno)) {
524 printf("dev = 0x%x, block = %d, fs = %s\n",
525 ip->i_dev, bno, fs->e2fs_fsmnt);
526 panic("blkfree: freeing free block");
527 }
528 clrbit(bbp, bno);
529 fs->e2fs.e2fs_fbcount++;
530 fs->e2fs_gd[cg].ext2bgd_nbfree++;
531
532 fs->e2fs_fmod = 1;
533 bdwrite(bp);
534 }
535
536 /*
537 * Free an inode.
538 *
539 * The specified inode is placed back in the free map.
540 */
541 int
542 ext2fs_vfree(v)
543 void *v;
544 {
545 struct vop_vfree_args /* {
546 struct vnode *a_pvp;
547 ino_t a_ino;
548 int a_mode;
549 } */ *ap = v;
550 struct m_ext2fs *fs;
551 char *ibp;
552 struct inode *pip;
553 ino_t ino = ap->a_ino;
554 struct buf *bp;
555 int error, cg;
556
557 pip = VTOI(ap->a_pvp);
558 fs = pip->i_e2fs;
559 if ((u_int)ino >= fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
560 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
561 pip->i_dev, ino, fs->e2fs_fsmnt);
562 cg = ino_to_cg(fs, ino);
563 error = bread(pip->i_devvp,
564 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
565 (int)fs->e2fs_bsize, NOCRED, &bp);
566 if (error) {
567 brelse(bp);
568 return (0);
569 }
570 ibp = (char *)bp->b_data;
571 ino = (ino - 1) % fs->e2fs.e2fs_ipg;
572 if (isclr(ibp, ino)) {
573 printf("dev = 0x%x, ino = %d, fs = %s\n",
574 pip->i_dev, ino, fs->e2fs_fsmnt);
575 if (fs->e2fs_ronly == 0)
576 panic("ifree: freeing free inode");
577 }
578 clrbit(ibp, ino);
579 fs->e2fs.e2fs_ficount++;
580 fs->e2fs_gd[cg].ext2bgd_nifree++;
581 if ((ap->a_mode & IFMT) == IFDIR) {
582 fs->e2fs_gd[cg].ext2bgd_ndirs--;
583 }
584 fs->e2fs_fmod = 1;
585 bdwrite(bp);
586 return (0);
587 }
588
589 /*
590 * Find a block in the specified cylinder group.
591 *
592 * It is a panic if a request is made to find a block if none are
593 * available.
594 */
595
596 static ufs_daddr_t
597 ext2fs_mapsearch(fs, bbp, bpref)
598 struct m_ext2fs *fs;
599 char *bbp;
600 ufs_daddr_t bpref;
601 {
602 ufs_daddr_t bno;
603 int start, len, loc, i, map;
604
605 /*
606 * find the fragment by searching through the free block
607 * map for an appropriate bit pattern
608 */
609 if (bpref)
610 start = dtogd(fs, bpref) / NBBY;
611 else
612 start = 0;
613 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
614 loc = skpc(0xff, len, &bbp[start]);
615 if (loc == 0) {
616 len = start + 1;
617 start = 0;
618 loc = skpc(0xff, len, &bbp[start]);
619 if (loc == 0) {
620 printf("start = %d, len = %d, fs = %s\n",
621 start, len, fs->e2fs_fsmnt);
622 panic("ext2fs_alloccg: map corrupted");
623 /* NOTREACHED */
624 }
625 }
626 i = start + len - loc;
627 map = bbp[i];
628 bno = i * NBBY;
629 for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
630 if ((map & i) == 0)
631 return (bno);
632 }
633 printf("fs = %s\n", fs->e2fs_fsmnt);
634 panic("ext2fs_mapsearch: block not in map");
635 /* NOTREACHED */
636 }
637
638 /*
639 * Fserr prints the name of a file system with an error diagnostic.
640 *
641 * The form of the error message is:
642 * fs: error message
643 */
644 static void
645 ext2fs_fserr(fs, uid, cp)
646 struct m_ext2fs *fs;
647 u_int uid;
648 char *cp;
649 {
650
651 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
652 }
653