ext2fs_alloc.c revision 1.12 1 /* $NetBSD: ext2fs_alloc.c,v 1.12 2001/10/26 05:56:07 lukem Exp $ */
2
3 /*
4 * Copyright (c) 1997 Manuel Bouyer.
5 * Copyright (c) 1982, 1986, 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
37 * Modified for ext2fs by Manuel Bouyer.
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/buf.h>
43 #include <sys/proc.h>
44 #include <sys/vnode.h>
45 #include <sys/mount.h>
46 #include <sys/kernel.h>
47 #include <sys/syslog.h>
48
49 #include <ufs/ufs/inode.h>
50 #include <ufs/ufs/ufs_extern.h>
51
52 #include <ufs/ext2fs/ext2fs.h>
53 #include <ufs/ext2fs/ext2fs_extern.h>
54
55 u_long ext2gennumber;
56
57 static ufs_daddr_t ext2fs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
58 static u_long ext2fs_dirpref __P((struct m_ext2fs *));
59 static void ext2fs_fserr __P((struct m_ext2fs *, u_int, char *));
60 static u_long ext2fs_hashalloc __P((struct inode *, int, long, int,
61 ufs_daddr_t (*)(struct inode *, int, ufs_daddr_t,
62 int)));
63 static ufs_daddr_t ext2fs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
64 static ufs_daddr_t ext2fs_mapsearch __P((struct m_ext2fs *, char *, ufs_daddr_t));
65
66 /*
67 * Allocate a block in the file system.
68 *
69 * A preference may be optionally specified. If a preference is given
70 * the following hierarchy is used to allocate a block:
71 * 1) allocate the requested block.
72 * 2) allocate a rotationally optimal block in the same cylinder.
73 * 3) allocate a block in the same cylinder group.
74 * 4) quadradically rehash into other cylinder groups, until an
75 * available block is located.
76 * If no block preference is given the following hierarchy is used
77 * to allocate a block:
78 * 1) allocate a block in the cylinder group that contains the
79 * inode for the file.
80 * 2) quadradically rehash into other cylinder groups, until an
81 * available block is located.
82 */
83 int
84 ext2fs_alloc(ip, lbn, bpref, cred, bnp)
85 struct inode *ip;
86 ufs_daddr_t lbn, bpref;
87 struct ucred *cred;
88 ufs_daddr_t *bnp;
89 {
90 struct m_ext2fs *fs;
91 ufs_daddr_t bno;
92 int cg;
93
94 *bnp = 0;
95 fs = ip->i_e2fs;
96 #ifdef DIAGNOSTIC
97 if (cred == NOCRED)
98 panic("ext2fs_alloc: missing credential\n");
99 #endif /* DIAGNOSTIC */
100 if (fs->e2fs.e2fs_fbcount == 0)
101 goto nospace;
102 if (cred->cr_uid != 0 && freespace(fs) <= 0)
103 goto nospace;
104 if (bpref >= fs->e2fs.e2fs_bcount)
105 bpref = 0;
106 if (bpref == 0)
107 cg = ino_to_cg(fs, ip->i_number);
108 else
109 cg = dtog(fs, bpref);
110 bno = (ufs_daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
111 ext2fs_alloccg);
112 if (bno > 0) {
113 ip->i_e2fs_nblock += btodb(fs->e2fs_bsize);
114 ip->i_flag |= IN_CHANGE | IN_UPDATE;
115 *bnp = bno;
116 return (0);
117 }
118 nospace:
119 ext2fs_fserr(fs, cred->cr_uid, "file system full");
120 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
121 return (ENOSPC);
122 }
123
124 /*
125 * Allocate an inode in the file system.
126 *
127 * If allocating a directory, use ext2fs_dirpref to select the inode.
128 * If allocating in a directory, the following hierarchy is followed:
129 * 1) allocate the preferred inode.
130 * 2) allocate an inode in the same cylinder group.
131 * 3) quadradically rehash into other cylinder groups, until an
132 * available inode is located.
133 * If no inode preference is given the following hierarchy is used
134 * to allocate an inode:
135 * 1) allocate an inode in cylinder group 0.
136 * 2) quadradically rehash into other cylinder groups, until an
137 * available inode is located.
138 */
139 int
140 ext2fs_valloc(v)
141 void *v;
142 {
143 struct vop_valloc_args /* {
144 struct vnode *a_pvp;
145 int a_mode;
146 struct ucred *a_cred;
147 struct vnode **a_vpp;
148 } */ *ap = v;
149 struct vnode *pvp = ap->a_pvp;
150 struct inode *pip;
151 struct m_ext2fs *fs;
152 struct inode *ip;
153 mode_t mode = ap->a_mode;
154 ino_t ino, ipref;
155 int cg, error;
156
157 *ap->a_vpp = NULL;
158 pip = VTOI(pvp);
159 fs = pip->i_e2fs;
160 if (fs->e2fs.e2fs_ficount == 0)
161 goto noinodes;
162
163 if ((mode & IFMT) == IFDIR)
164 cg = ext2fs_dirpref(fs);
165 else
166 cg = ino_to_cg(fs, pip->i_number);
167 ipref = cg * fs->e2fs.e2fs_ipg + 1;
168 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
169 if (ino == 0)
170 goto noinodes;
171 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
172 if (error) {
173 VOP_VFREE(pvp, ino, mode);
174 return (error);
175 }
176 ip = VTOI(*ap->a_vpp);
177 if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) {
178 printf("mode = 0%o, nlinks %d, inum = %d, fs = %s\n",
179 ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number, fs->e2fs_fsmnt);
180 panic("ext2fs_valloc: dup alloc");
181 }
182
183 memset(&ip->i_din, 0, sizeof(ip->i_din));
184
185 /*
186 * Set up a new generation number for this inode.
187 */
188 if (++ext2gennumber < (u_long)time.tv_sec)
189 ext2gennumber = time.tv_sec;
190 ip->i_e2fs_gen = ext2gennumber;
191 return (0);
192 noinodes:
193 ext2fs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
194 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
195 return (ENOSPC);
196 }
197
198 /*
199 * Find a cylinder to place a directory.
200 *
201 * The policy implemented by this algorithm is to select from
202 * among those cylinder groups with above the average number of
203 * free inodes, the one with the smallest number of directories.
204 */
205 static u_long
206 ext2fs_dirpref(fs)
207 struct m_ext2fs *fs;
208 {
209 int cg, maxspace, mincg, avgifree;
210
211 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
212 maxspace = 0;
213 mincg = -1;
214 for (cg = 0; cg < fs->e2fs_ncg; cg++)
215 if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) {
216 if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) {
217 mincg = cg;
218 maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree;
219 }
220 }
221 return mincg;
222 }
223
224 /*
225 * Select the desired position for the next block in a file. The file is
226 * logically divided into sections. The first section is composed of the
227 * direct blocks. Each additional section contains fs_maxbpg blocks.
228 *
229 * If no blocks have been allocated in the first section, the policy is to
230 * request a block in the same cylinder group as the inode that describes
231 * the file. Otherwise, the policy is to try to allocate the blocks
232 * contigously. The two fields of the ext2 inode extension (see
233 * ufs/ufs/inode.h) help this.
234 */
235 ufs_daddr_t
236 ext2fs_blkpref(ip, lbn, indx, bap)
237 struct inode *ip;
238 ufs_daddr_t lbn;
239 int indx;
240 ufs_daddr_t *bap;
241 {
242 struct m_ext2fs *fs;
243 int cg, i;
244
245 fs = ip->i_e2fs;
246 /*
247 * if we are doing contigous lbn allocation, try to alloc blocks
248 * contigously on disk
249 */
250
251 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
252 return ip->i_e2fs_last_blk + 1;
253 }
254
255 /*
256 * bap, if provided, gives us a list of blocks to which we want to
257 * stay close
258 */
259
260 if (bap) {
261 for (i = indx; i >= 0 ; i--) {
262 if (bap[i]) {
263 return fs2h32(bap[i]) + 1;
264 }
265 }
266 }
267
268 /* fall back to the first block of the cylinder containing the inode */
269
270 cg = ino_to_cg(fs, ip->i_number);
271 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
272 }
273
274 /*
275 * Implement the cylinder overflow algorithm.
276 *
277 * The policy implemented by this algorithm is:
278 * 1) allocate the block in its requested cylinder group.
279 * 2) quadradically rehash on the cylinder group number.
280 * 3) brute force search for a free block.
281 */
282 static u_long
283 ext2fs_hashalloc(ip, cg, pref, size, allocator)
284 struct inode *ip;
285 int cg;
286 long pref;
287 int size; /* size for data blocks, mode for inodes */
288 ufs_daddr_t (*allocator) __P((struct inode *, int, ufs_daddr_t, int));
289 {
290 struct m_ext2fs *fs;
291 long result;
292 int i, icg = cg;
293
294 fs = ip->i_e2fs;
295 /*
296 * 1: preferred cylinder group
297 */
298 result = (*allocator)(ip, cg, pref, size);
299 if (result)
300 return (result);
301 /*
302 * 2: quadratic rehash
303 */
304 for (i = 1; i < fs->e2fs_ncg; i *= 2) {
305 cg += i;
306 if (cg >= fs->e2fs_ncg)
307 cg -= fs->e2fs_ncg;
308 result = (*allocator)(ip, cg, 0, size);
309 if (result)
310 return (result);
311 }
312 /*
313 * 3: brute force search
314 * Note that we start at i == 2, since 0 was checked initially,
315 * and 1 is always checked in the quadratic rehash.
316 */
317 cg = (icg + 2) % fs->e2fs_ncg;
318 for (i = 2; i < fs->e2fs_ncg; i++) {
319 result = (*allocator)(ip, cg, 0, size);
320 if (result)
321 return (result);
322 cg++;
323 if (cg == fs->e2fs_ncg)
324 cg = 0;
325 }
326 return (0);
327 }
328
329 /*
330 * Determine whether a block can be allocated.
331 *
332 * Check to see if a block of the appropriate size is available,
333 * and if it is, allocate it.
334 */
335
336 static ufs_daddr_t
337 ext2fs_alloccg(ip, cg, bpref, size)
338 struct inode *ip;
339 int cg;
340 ufs_daddr_t bpref;
341 int size;
342 {
343 struct m_ext2fs *fs;
344 char *bbp;
345 struct buf *bp;
346 int error, bno, start, end, loc;
347
348 fs = ip->i_e2fs;
349 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
350 return (0);
351 error = bread(ip->i_devvp, fsbtodb(fs,
352 fs->e2fs_gd[cg].ext2bgd_b_bitmap),
353 (int)fs->e2fs_bsize, NOCRED, &bp);
354 if (error) {
355 brelse(bp);
356 return (0);
357 }
358 bbp = (char *)bp->b_data;
359
360 if (dtog(fs, bpref) != cg)
361 bpref = 0;
362 if (bpref != 0) {
363 bpref = dtogd(fs, bpref);
364 /*
365 * if the requested block is available, use it
366 */
367 if (isclr(bbp, bpref)) {
368 bno = bpref;
369 goto gotit;
370 }
371 }
372 /*
373 * no blocks in the requested cylinder, so take next
374 * available one in this cylinder group.
375 * first try to get 8 contigous blocks, then fall back to a single
376 * block.
377 */
378 if (bpref)
379 start = dtogd(fs, bpref) / NBBY;
380 else
381 start = 0;
382 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
383 for (loc = start; loc < end; loc++) {
384 if (bbp[loc] == 0) {
385 bno = loc * NBBY;
386 goto gotit;
387 }
388 }
389 for (loc = 0; loc < start; loc++) {
390 if (bbp[loc] == 0) {
391 bno = loc * NBBY;
392 goto gotit;
393 }
394 }
395
396 bno = ext2fs_mapsearch(fs, bbp, bpref);
397 if (bno < 0)
398 return (0);
399 gotit:
400 #ifdef DIAGNOSTIC
401 if (isset(bbp, (long)bno)) {
402 printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
403 cg, bno, fs->e2fs_fsmnt);
404 panic("ext2fs_alloccg: dup alloc");
405 }
406 #endif
407 setbit(bbp, (long)bno);
408 fs->e2fs.e2fs_fbcount--;
409 fs->e2fs_gd[cg].ext2bgd_nbfree--;
410 fs->e2fs_fmod = 1;
411 bdwrite(bp);
412 return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno);
413 }
414
415 /*
416 * Determine whether an inode can be allocated.
417 *
418 * Check to see if an inode is available, and if it is,
419 * allocate it using the following policy:
420 * 1) allocate the requested inode.
421 * 2) allocate the next available inode after the requested
422 * inode in the specified cylinder group.
423 */
424 static ufs_daddr_t
425 ext2fs_nodealloccg(ip, cg, ipref, mode)
426 struct inode *ip;
427 int cg;
428 ufs_daddr_t ipref;
429 int mode;
430 {
431 struct m_ext2fs *fs;
432 char *ibp;
433 struct buf *bp;
434 int error, start, len, loc, map, i;
435
436 ipref--; /* to avoid a lot of (ipref -1) */
437 fs = ip->i_e2fs;
438 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
439 return (0);
440 error = bread(ip->i_devvp, fsbtodb(fs,
441 fs->e2fs_gd[cg].ext2bgd_i_bitmap),
442 (int)fs->e2fs_bsize, NOCRED, &bp);
443 if (error) {
444 brelse(bp);
445 return (0);
446 }
447 ibp = (char *)bp->b_data;
448 if (ipref) {
449 ipref %= fs->e2fs.e2fs_ipg;
450 if (isclr(ibp, ipref))
451 goto gotit;
452 }
453 start = ipref / NBBY;
454 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
455 loc = skpc(0xff, len, &ibp[start]);
456 if (loc == 0) {
457 len = start + 1;
458 start = 0;
459 loc = skpc(0xff, len, &ibp[0]);
460 if (loc == 0) {
461 printf("cg = %d, ipref = %d, fs = %s\n",
462 cg, ipref, fs->e2fs_fsmnt);
463 panic("ext2fs_nodealloccg: map corrupted");
464 /* NOTREACHED */
465 }
466 }
467 i = start + len - loc;
468 map = ibp[i];
469 ipref = i * NBBY;
470 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
471 if ((map & i) == 0) {
472 goto gotit;
473 }
474 }
475 printf("fs = %s\n", fs->e2fs_fsmnt);
476 panic("ext2fs_nodealloccg: block not in map");
477 /* NOTREACHED */
478 gotit:
479 setbit(ibp, ipref);
480 fs->e2fs.e2fs_ficount--;
481 fs->e2fs_gd[cg].ext2bgd_nifree--;
482 fs->e2fs_fmod = 1;
483 if ((mode & IFMT) == IFDIR) {
484 fs->e2fs_gd[cg].ext2bgd_ndirs++;
485 }
486 bdwrite(bp);
487 return (cg * fs->e2fs.e2fs_ipg + ipref +1);
488 }
489
490 /*
491 * Free a block.
492 *
493 * The specified block is placed back in the
494 * free map.
495 */
496 void
497 ext2fs_blkfree(ip, bno)
498 struct inode *ip;
499 ufs_daddr_t bno;
500 {
501 struct m_ext2fs *fs;
502 char *bbp;
503 struct buf *bp;
504 int error, cg;
505
506 fs = ip->i_e2fs;
507 cg = dtog(fs, bno);
508 if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
509 printf("bad block %d, ino %d\n", bno, ip->i_number);
510 ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block");
511 return;
512 }
513 error = bread(ip->i_devvp,
514 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
515 (int)fs->e2fs_bsize, NOCRED, &bp);
516 if (error) {
517 brelse(bp);
518 return;
519 }
520 bbp = (char *)bp->b_data;
521 bno = dtogd(fs, bno);
522 if (isclr(bbp, bno)) {
523 printf("dev = 0x%x, block = %d, fs = %s\n",
524 ip->i_dev, bno, fs->e2fs_fsmnt);
525 panic("blkfree: freeing free block");
526 }
527 clrbit(bbp, bno);
528 fs->e2fs.e2fs_fbcount++;
529 fs->e2fs_gd[cg].ext2bgd_nbfree++;
530
531 fs->e2fs_fmod = 1;
532 bdwrite(bp);
533 }
534
535 /*
536 * Free an inode.
537 *
538 * The specified inode is placed back in the free map.
539 */
540 int
541 ext2fs_vfree(v)
542 void *v;
543 {
544 struct vop_vfree_args /* {
545 struct vnode *a_pvp;
546 ino_t a_ino;
547 int a_mode;
548 } */ *ap = v;
549 struct m_ext2fs *fs;
550 char *ibp;
551 struct inode *pip;
552 ino_t ino = ap->a_ino;
553 struct buf *bp;
554 int error, cg;
555
556 pip = VTOI(ap->a_pvp);
557 fs = pip->i_e2fs;
558 if ((u_int)ino >= fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
559 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
560 pip->i_dev, ino, fs->e2fs_fsmnt);
561 cg = ino_to_cg(fs, ino);
562 error = bread(pip->i_devvp,
563 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
564 (int)fs->e2fs_bsize, NOCRED, &bp);
565 if (error) {
566 brelse(bp);
567 return (0);
568 }
569 ibp = (char *)bp->b_data;
570 ino = (ino - 1) % fs->e2fs.e2fs_ipg;
571 if (isclr(ibp, ino)) {
572 printf("dev = 0x%x, ino = %d, fs = %s\n",
573 pip->i_dev, ino, fs->e2fs_fsmnt);
574 if (fs->e2fs_ronly == 0)
575 panic("ifree: freeing free inode");
576 }
577 clrbit(ibp, ino);
578 fs->e2fs.e2fs_ficount++;
579 fs->e2fs_gd[cg].ext2bgd_nifree++;
580 if ((ap->a_mode & IFMT) == IFDIR) {
581 fs->e2fs_gd[cg].ext2bgd_ndirs--;
582 }
583 fs->e2fs_fmod = 1;
584 bdwrite(bp);
585 return (0);
586 }
587
588 /*
589 * Find a block in the specified cylinder group.
590 *
591 * It is a panic if a request is made to find a block if none are
592 * available.
593 */
594
595 static ufs_daddr_t
596 ext2fs_mapsearch(fs, bbp, bpref)
597 struct m_ext2fs *fs;
598 char *bbp;
599 ufs_daddr_t bpref;
600 {
601 ufs_daddr_t bno;
602 int start, len, loc, i, map;
603
604 /*
605 * find the fragment by searching through the free block
606 * map for an appropriate bit pattern
607 */
608 if (bpref)
609 start = dtogd(fs, bpref) / NBBY;
610 else
611 start = 0;
612 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
613 loc = skpc(0xff, len, &bbp[start]);
614 if (loc == 0) {
615 len = start + 1;
616 start = 0;
617 loc = skpc(0xff, len, &bbp[start]);
618 if (loc == 0) {
619 printf("start = %d, len = %d, fs = %s\n",
620 start, len, fs->e2fs_fsmnt);
621 panic("ext2fs_alloccg: map corrupted");
622 /* NOTREACHED */
623 }
624 }
625 i = start + len - loc;
626 map = bbp[i];
627 bno = i * NBBY;
628 for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
629 if ((map & i) == 0)
630 return (bno);
631 }
632 printf("fs = %s\n", fs->e2fs_fsmnt);
633 panic("ext2fs_mapsearch: block not in map");
634 /* NOTREACHED */
635 }
636
637 /*
638 * Fserr prints the name of a file system with an error diagnostic.
639 *
640 * The form of the error message is:
641 * fs: error message
642 */
643 static void
644 ext2fs_fserr(fs, uid, cp)
645 struct m_ext2fs *fs;
646 u_int uid;
647 char *cp;
648 {
649
650 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
651 }
652