ffs_alloc.c revision 1.36 1 /* $NetBSD: ffs_alloc.c,v 1.36 2000/06/28 14:16:39 mrg Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95
36 */
37
38 #if defined(_KERNEL) && !defined(_LKM)
39 #include "opt_ffs.h"
40 #include "opt_quota.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/buf.h>
46 #include <sys/proc.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/kernel.h>
50 #include <sys/syslog.h>
51
52 #include <ufs/ufs/quota.h>
53 #include <ufs/ufs/ufsmount.h>
54 #include <ufs/ufs/inode.h>
55 #include <ufs/ufs/ufs_extern.h>
56 #include <ufs/ufs/ufs_bswap.h>
57
58 #include <ufs/ffs/fs.h>
59 #include <ufs/ffs/ffs_extern.h>
60
61 static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
62 static ufs_daddr_t ffs_alloccgblk __P((struct inode *, struct buf *,
63 ufs_daddr_t));
64 static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t, int));
65 static ino_t ffs_dirpref __P((struct fs *));
66 static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
67 static void ffs_fserr __P((struct fs *, u_int, char *));
68 static u_long ffs_hashalloc
69 __P((struct inode *, int, long, int,
70 ufs_daddr_t (*)(struct inode *, int, ufs_daddr_t, int)));
71 static ufs_daddr_t ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
72 static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *,
73 ufs_daddr_t, int));
74 #if defined(DIAGNOSTIC) || defined(DEBUG)
75 static int ffs_checkblk __P((struct inode *, ufs_daddr_t, long size));
76 #endif
77
78 /* if 1, changes in optimalization strategy are logged */
79 int ffs_log_changeopt = 0;
80
81 /* in ffs_tables.c */
82 extern int inside[], around[];
83 extern u_char *fragtbl[];
84
85 /*
86 * Allocate a block in the file system.
87 *
88 * The size of the requested block is given, which must be some
89 * multiple of fs_fsize and <= fs_bsize.
90 * A preference may be optionally specified. If a preference is given
91 * the following hierarchy is used to allocate a block:
92 * 1) allocate the requested block.
93 * 2) allocate a rotationally optimal block in the same cylinder.
94 * 3) allocate a block in the same cylinder group.
95 * 4) quadradically rehash into other cylinder groups, until an
96 * available block is located.
97 * If no block preference is given the following heirarchy is used
98 * to allocate a block:
99 * 1) allocate a block in the cylinder group that contains the
100 * inode for the file.
101 * 2) quadradically rehash into other cylinder groups, until an
102 * available block is located.
103 */
104 int
105 ffs_alloc(ip, lbn, bpref, size, cred, bnp)
106 struct inode *ip;
107 ufs_daddr_t lbn, bpref;
108 int size;
109 struct ucred *cred;
110 ufs_daddr_t *bnp;
111 {
112 struct fs *fs;
113 ufs_daddr_t bno;
114 int cg;
115 #ifdef QUOTA
116 int error;
117 #endif
118
119 *bnp = 0;
120 fs = ip->i_fs;
121 #ifdef DIAGNOSTIC
122 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
123 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
124 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
125 panic("ffs_alloc: bad size");
126 }
127 if (cred == NOCRED)
128 panic("ffs_alloc: missing credential\n");
129 #endif /* DIAGNOSTIC */
130 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
131 goto nospace;
132 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
133 goto nospace;
134 #ifdef QUOTA
135 if ((error = chkdq(ip, (long)btodb(size), cred, 0)) != 0)
136 return (error);
137 #endif
138 if (bpref >= fs->fs_size)
139 bpref = 0;
140 if (bpref == 0)
141 cg = ino_to_cg(fs, ip->i_number);
142 else
143 cg = dtog(fs, bpref);
144 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
145 ffs_alloccg);
146 if (bno > 0) {
147 ip->i_ffs_blocks += btodb(size);
148 ip->i_flag |= IN_CHANGE | IN_UPDATE;
149 *bnp = bno;
150 return (0);
151 }
152 #ifdef QUOTA
153 /*
154 * Restore user's disk quota because allocation failed.
155 */
156 (void) chkdq(ip, (long)-btodb(size), cred, FORCE);
157 #endif
158 nospace:
159 ffs_fserr(fs, cred->cr_uid, "file system full");
160 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
161 return (ENOSPC);
162 }
163
164 /*
165 * Reallocate a fragment to a bigger size
166 *
167 * The number and size of the old block is given, and a preference
168 * and new size is also specified. The allocator attempts to extend
169 * the original block. Failing that, the regular block allocator is
170 * invoked to get an appropriate block.
171 */
172 int
173 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
174 struct inode *ip;
175 ufs_daddr_t lbprev;
176 ufs_daddr_t bpref;
177 int osize, nsize;
178 struct ucred *cred;
179 struct buf **bpp;
180 {
181 struct fs *fs;
182 struct buf *bp;
183 int cg, request, error;
184 ufs_daddr_t bprev, bno;
185
186 *bpp = 0;
187 fs = ip->i_fs;
188 #ifdef DIAGNOSTIC
189 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
190 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
191 printf(
192 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
193 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
194 panic("ffs_realloccg: bad size");
195 }
196 if (cred == NOCRED)
197 panic("ffs_realloccg: missing credential\n");
198 #endif /* DIAGNOSTIC */
199 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
200 goto nospace;
201 if ((bprev = ufs_rw32(ip->i_ffs_db[lbprev], UFS_FSNEEDSWAP(fs))) == 0) {
202 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
203 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
204 panic("ffs_realloccg: bad bprev");
205 }
206 /*
207 * Allocate the extra space in the buffer.
208 */
209 if ((error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) != 0) {
210 brelse(bp);
211 return (error);
212 }
213 #ifdef QUOTA
214 if ((error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) != 0) {
215 brelse(bp);
216 return (error);
217 }
218 #endif
219 /*
220 * Check for extension in the existing location.
221 */
222 cg = dtog(fs, bprev);
223 if ((bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) != 0) {
224 if (bp->b_blkno != fsbtodb(fs, bno))
225 panic("bad blockno");
226 ip->i_ffs_blocks += btodb(nsize - osize);
227 ip->i_flag |= IN_CHANGE | IN_UPDATE;
228 allocbuf(bp, nsize);
229 bp->b_flags |= B_DONE;
230 memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize);
231 *bpp = bp;
232 return (0);
233 }
234 /*
235 * Allocate a new disk location.
236 */
237 if (bpref >= fs->fs_size)
238 bpref = 0;
239 switch ((int)fs->fs_optim) {
240 case FS_OPTSPACE:
241 /*
242 * Allocate an exact sized fragment. Although this makes
243 * best use of space, we will waste time relocating it if
244 * the file continues to grow. If the fragmentation is
245 * less than half of the minimum free reserve, we choose
246 * to begin optimizing for time.
247 */
248 request = nsize;
249 if (fs->fs_minfree < 5 ||
250 fs->fs_cstotal.cs_nffree >
251 fs->fs_dsize * fs->fs_minfree / (2 * 100))
252 break;
253
254 if (ffs_log_changeopt) {
255 log(LOG_NOTICE,
256 "%s: optimization changed from SPACE to TIME\n",
257 fs->fs_fsmnt);
258 }
259
260 fs->fs_optim = FS_OPTTIME;
261 break;
262 case FS_OPTTIME:
263 /*
264 * At this point we have discovered a file that is trying to
265 * grow a small fragment to a larger fragment. To save time,
266 * we allocate a full sized block, then free the unused portion.
267 * If the file continues to grow, the `ffs_fragextend' call
268 * above will be able to grow it in place without further
269 * copying. If aberrant programs cause disk fragmentation to
270 * grow within 2% of the free reserve, we choose to begin
271 * optimizing for space.
272 */
273 request = fs->fs_bsize;
274 if (fs->fs_cstotal.cs_nffree <
275 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
276 break;
277
278 if (ffs_log_changeopt) {
279 log(LOG_NOTICE,
280 "%s: optimization changed from TIME to SPACE\n",
281 fs->fs_fsmnt);
282 }
283
284 fs->fs_optim = FS_OPTSPACE;
285 break;
286 default:
287 printf("dev = 0x%x, optim = %d, fs = %s\n",
288 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
289 panic("ffs_realloccg: bad optim");
290 /* NOTREACHED */
291 }
292 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
293 ffs_alloccg);
294 if (bno > 0) {
295 bp->b_blkno = fsbtodb(fs, bno);
296 (void) uvm_vnp_uncache(ITOV(ip));
297 if (!DOINGSOFTDEP(ITOV(ip)))
298 ffs_blkfree(ip, bprev, (long)osize);
299 if (nsize < request)
300 ffs_blkfree(ip, bno + numfrags(fs, nsize),
301 (long)(request - nsize));
302 ip->i_ffs_blocks += btodb(nsize - osize);
303 ip->i_flag |= IN_CHANGE | IN_UPDATE;
304 allocbuf(bp, nsize);
305 bp->b_flags |= B_DONE;
306 memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize);
307 *bpp = bp;
308 return (0);
309 }
310 #ifdef QUOTA
311 /*
312 * Restore user's disk quota because allocation failed.
313 */
314 (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
315 #endif
316 brelse(bp);
317 nospace:
318 /*
319 * no space available
320 */
321 ffs_fserr(fs, cred->cr_uid, "file system full");
322 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
323 return (ENOSPC);
324 }
325
326 /*
327 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
328 *
329 * The vnode and an array of buffer pointers for a range of sequential
330 * logical blocks to be made contiguous is given. The allocator attempts
331 * to find a range of sequential blocks starting as close as possible to
332 * an fs_rotdelay offset from the end of the allocation for the logical
333 * block immediately preceeding the current range. If successful, the
334 * physical block numbers in the buffer pointers and in the inode are
335 * changed to reflect the new allocation. If unsuccessful, the allocation
336 * is left unchanged. The success in doing the reallocation is returned.
337 * Note that the error return is not reflected back to the user. Rather
338 * the previous block allocation will be used.
339 */
340 #ifdef DEBUG
341 #include <sys/sysctl.h>
342 int prtrealloc = 0;
343 struct ctldebug debug15 = { "prtrealloc", &prtrealloc };
344 #endif
345
346 int doasyncfree = 1;
347 extern int doreallocblks;
348
349 int
350 ffs_reallocblks(v)
351 void *v;
352 {
353 struct vop_reallocblks_args /* {
354 struct vnode *a_vp;
355 struct cluster_save *a_buflist;
356 } */ *ap = v;
357 struct fs *fs;
358 struct inode *ip;
359 struct vnode *vp;
360 struct buf *sbp, *ebp;
361 ufs_daddr_t *bap, *sbap, *ebap = NULL;
362 struct cluster_save *buflist;
363 ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno;
364 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
365 int i, len, start_lvl, end_lvl, pref, ssize;
366
367 vp = ap->a_vp;
368 ip = VTOI(vp);
369 fs = ip->i_fs;
370 if (fs->fs_contigsumsize <= 0)
371 return (ENOSPC);
372 buflist = ap->a_buflist;
373 len = buflist->bs_nchildren;
374 start_lbn = buflist->bs_children[0]->b_lblkno;
375 end_lbn = start_lbn + len - 1;
376 #ifdef DIAGNOSTIC
377 for (i = 0; i < len; i++)
378 if (!ffs_checkblk(ip,
379 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
380 panic("ffs_reallocblks: unallocated block 1");
381 for (i = 1; i < len; i++)
382 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
383 panic("ffs_reallocblks: non-logical cluster");
384 blkno = buflist->bs_children[0]->b_blkno;
385 ssize = fsbtodb(fs, fs->fs_frag);
386 for (i = 1; i < len - 1; i++)
387 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
388 panic("ffs_reallocblks: non-physical cluster %d", i);
389 #endif
390 /*
391 * If the latest allocation is in a new cylinder group, assume that
392 * the filesystem has decided to move and do not force it back to
393 * the previous cylinder group.
394 */
395 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
396 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
397 return (ENOSPC);
398 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
399 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
400 return (ENOSPC);
401 /*
402 * Get the starting offset and block map for the first block.
403 */
404 if (start_lvl == 0) {
405 sbap = &ip->i_ffs_db[0];
406 soff = start_lbn;
407 } else {
408 idp = &start_ap[start_lvl - 1];
409 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
410 brelse(sbp);
411 return (ENOSPC);
412 }
413 sbap = (ufs_daddr_t *)sbp->b_data;
414 soff = idp->in_off;
415 }
416 /*
417 * Find the preferred location for the cluster.
418 */
419 pref = ffs_blkpref(ip, start_lbn, soff, sbap);
420 /*
421 * If the block range spans two block maps, get the second map.
422 */
423 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
424 ssize = len;
425 } else {
426 #ifdef DIAGNOSTIC
427 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
428 panic("ffs_reallocblk: start == end");
429 #endif
430 ssize = len - (idp->in_off + 1);
431 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
432 goto fail;
433 ebap = (ufs_daddr_t *)ebp->b_data;
434 }
435 /*
436 * Search the block map looking for an allocation of the desired size.
437 */
438 if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
439 len, ffs_clusteralloc)) == 0)
440 goto fail;
441 /*
442 * We have found a new contiguous block.
443 *
444 * First we have to replace the old block pointers with the new
445 * block pointers in the inode and indirect blocks associated
446 * with the file.
447 */
448 #ifdef DEBUG
449 if (prtrealloc)
450 printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
451 start_lbn, end_lbn);
452 #endif
453 blkno = newblk;
454 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
455 ufs_daddr_t ba;
456
457 if (i == ssize) {
458 bap = ebap;
459 soff = -i;
460 }
461 ba = ufs_rw32(*bap, UFS_FSNEEDSWAP(fs));
462 #ifdef DIAGNOSTIC
463 if (!ffs_checkblk(ip,
464 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
465 panic("ffs_reallocblks: unallocated block 2");
466 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != ba)
467 panic("ffs_reallocblks: alloc mismatch");
468 #endif
469 #ifdef DEBUG
470 if (prtrealloc)
471 printf(" %d,", ba);
472 #endif
473 if (DOINGSOFTDEP(vp)) {
474 if (sbap == &ip->i_ffs_db[0] && i < ssize)
475 softdep_setup_allocdirect(ip, start_lbn + i,
476 blkno, ba, fs->fs_bsize, fs->fs_bsize,
477 buflist->bs_children[i]);
478 else
479 softdep_setup_allocindir_page(ip, start_lbn + i,
480 i < ssize ? sbp : ebp, soff + i, blkno,
481 ba, buflist->bs_children[i]);
482 }
483 *bap++ = ufs_rw32(blkno, UFS_FSNEEDSWAP(fs));
484 }
485 /*
486 * Next we must write out the modified inode and indirect blocks.
487 * For strict correctness, the writes should be synchronous since
488 * the old block values may have been written to disk. In practise
489 * they are almost never written, but if we are concerned about
490 * strict correctness, the `doasyncfree' flag should be set to zero.
491 *
492 * The test on `doasyncfree' should be changed to test a flag
493 * that shows whether the associated buffers and inodes have
494 * been written. The flag should be set when the cluster is
495 * started and cleared whenever the buffer or inode is flushed.
496 * We can then check below to see if it is set, and do the
497 * synchronous write only when it has been cleared.
498 */
499 if (sbap != &ip->i_ffs_db[0]) {
500 if (doasyncfree)
501 bdwrite(sbp);
502 else
503 bwrite(sbp);
504 } else {
505 ip->i_flag |= IN_CHANGE | IN_UPDATE;
506 if (!doasyncfree)
507 VOP_UPDATE(vp, NULL, NULL, 1);
508 }
509 if (ssize < len) {
510 if (doasyncfree)
511 bdwrite(ebp);
512 else
513 bwrite(ebp);
514 }
515 /*
516 * Last, free the old blocks and assign the new blocks to the buffers.
517 */
518 #ifdef DEBUG
519 if (prtrealloc)
520 printf("\n\tnew:");
521 #endif
522 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
523 if (!DOINGSOFTDEP(vp))
524 ffs_blkfree(ip,
525 dbtofsb(fs, buflist->bs_children[i]->b_blkno),
526 fs->fs_bsize);
527 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
528 #ifdef DEBUG
529 if (!ffs_checkblk(ip,
530 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
531 panic("ffs_reallocblks: unallocated block 3");
532 if (prtrealloc)
533 printf(" %d,", blkno);
534 #endif
535 }
536 #ifdef DEBUG
537 if (prtrealloc) {
538 prtrealloc--;
539 printf("\n");
540 }
541 #endif
542 return (0);
543
544 fail:
545 if (ssize < len)
546 brelse(ebp);
547 if (sbap != &ip->i_ffs_db[0])
548 brelse(sbp);
549 return (ENOSPC);
550 }
551
552 /*
553 * Allocate an inode in the file system.
554 *
555 * If allocating a directory, use ffs_dirpref to select the inode.
556 * If allocating in a directory, the following hierarchy is followed:
557 * 1) allocate the preferred inode.
558 * 2) allocate an inode in the same cylinder group.
559 * 3) quadradically rehash into other cylinder groups, until an
560 * available inode is located.
561 * If no inode preference is given the following heirarchy is used
562 * to allocate an inode:
563 * 1) allocate an inode in cylinder group 0.
564 * 2) quadradically rehash into other cylinder groups, until an
565 * available inode is located.
566 */
567 int
568 ffs_valloc(v)
569 void *v;
570 {
571 struct vop_valloc_args /* {
572 struct vnode *a_pvp;
573 int a_mode;
574 struct ucred *a_cred;
575 struct vnode **a_vpp;
576 } */ *ap = v;
577 struct vnode *pvp = ap->a_pvp;
578 struct inode *pip;
579 struct fs *fs;
580 struct inode *ip;
581 mode_t mode = ap->a_mode;
582 ino_t ino, ipref;
583 int cg, error;
584
585 *ap->a_vpp = NULL;
586 pip = VTOI(pvp);
587 fs = pip->i_fs;
588 if (fs->fs_cstotal.cs_nifree == 0)
589 goto noinodes;
590
591 if ((mode & IFMT) == IFDIR)
592 ipref = ffs_dirpref(fs);
593 else
594 ipref = pip->i_number;
595 if (ipref >= fs->fs_ncg * fs->fs_ipg)
596 ipref = 0;
597 cg = ino_to_cg(fs, ipref);
598 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
599 if (ino == 0)
600 goto noinodes;
601 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
602 if (error) {
603 VOP_VFREE(pvp, ino, mode);
604 return (error);
605 }
606 ip = VTOI(*ap->a_vpp);
607 if (ip->i_ffs_mode) {
608 printf("mode = 0%o, inum = %d, fs = %s\n",
609 ip->i_ffs_mode, ip->i_number, fs->fs_fsmnt);
610 panic("ffs_valloc: dup alloc");
611 }
612 if (ip->i_ffs_blocks) { /* XXX */
613 printf("free inode %s/%d had %d blocks\n",
614 fs->fs_fsmnt, ino, ip->i_ffs_blocks);
615 ip->i_ffs_blocks = 0;
616 }
617 ip->i_ffs_flags = 0;
618 /*
619 * Set up a new generation number for this inode.
620 */
621 ip->i_ffs_gen++;
622 return (0);
623 noinodes:
624 ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
625 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
626 return (ENOSPC);
627 }
628
629 /*
630 * Find a cylinder to place a directory.
631 *
632 * The policy implemented by this algorithm is to select from
633 * among those cylinder groups with above the average number of
634 * free inodes, the one with the smallest number of directories.
635 */
636 static ino_t
637 ffs_dirpref(fs)
638 struct fs *fs;
639 {
640 int cg, minndir, mincg, avgifree;
641
642 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
643 minndir = fs->fs_ipg;
644 mincg = 0;
645 for (cg = 0; cg < fs->fs_ncg; cg++)
646 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
647 fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
648 mincg = cg;
649 minndir = fs->fs_cs(fs, cg).cs_ndir;
650 }
651 return ((ino_t)(fs->fs_ipg * mincg));
652 }
653
654 /*
655 * Select the desired position for the next block in a file. The file is
656 * logically divided into sections. The first section is composed of the
657 * direct blocks. Each additional section contains fs_maxbpg blocks.
658 *
659 * If no blocks have been allocated in the first section, the policy is to
660 * request a block in the same cylinder group as the inode that describes
661 * the file. If no blocks have been allocated in any other section, the
662 * policy is to place the section in a cylinder group with a greater than
663 * average number of free blocks. An appropriate cylinder group is found
664 * by using a rotor that sweeps the cylinder groups. When a new group of
665 * blocks is needed, the sweep begins in the cylinder group following the
666 * cylinder group from which the previous allocation was made. The sweep
667 * continues until a cylinder group with greater than the average number
668 * of free blocks is found. If the allocation is for the first block in an
669 * indirect block, the information on the previous allocation is unavailable;
670 * here a best guess is made based upon the logical block number being
671 * allocated.
672 *
673 * If a section is already partially allocated, the policy is to
674 * contiguously allocate fs_maxcontig blocks. The end of one of these
675 * contiguous blocks and the beginning of the next is physically separated
676 * so that the disk head will be in transit between them for at least
677 * fs_rotdelay milliseconds. This is to allow time for the processor to
678 * schedule another I/O transfer.
679 */
680 ufs_daddr_t
681 ffs_blkpref(ip, lbn, indx, bap)
682 struct inode *ip;
683 ufs_daddr_t lbn;
684 int indx;
685 ufs_daddr_t *bap;
686 {
687 struct fs *fs;
688 int cg;
689 int avgbfree, startcg;
690 ufs_daddr_t nextblk;
691
692 fs = ip->i_fs;
693 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
694 if (lbn < NDADDR + NINDIR(fs)) {
695 cg = ino_to_cg(fs, ip->i_number);
696 return (fs->fs_fpg * cg + fs->fs_frag);
697 }
698 /*
699 * Find a cylinder with greater than average number of
700 * unused data blocks.
701 */
702 if (indx == 0 || bap[indx - 1] == 0)
703 startcg =
704 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
705 else
706 startcg = dtog(fs,
707 ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
708 startcg %= fs->fs_ncg;
709 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
710 for (cg = startcg; cg < fs->fs_ncg; cg++)
711 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
712 fs->fs_cgrotor = cg;
713 return (fs->fs_fpg * cg + fs->fs_frag);
714 }
715 for (cg = 0; cg <= startcg; cg++)
716 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
717 fs->fs_cgrotor = cg;
718 return (fs->fs_fpg * cg + fs->fs_frag);
719 }
720 return (0);
721 }
722 /*
723 * One or more previous blocks have been laid out. If less
724 * than fs_maxcontig previous blocks are contiguous, the
725 * next block is requested contiguously, otherwise it is
726 * requested rotationally delayed by fs_rotdelay milliseconds.
727 */
728 nextblk = ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
729 if (indx < fs->fs_maxcontig ||
730 ufs_rw32(bap[indx - fs->fs_maxcontig], UFS_FSNEEDSWAP(fs)) +
731 blkstofrags(fs, fs->fs_maxcontig) != nextblk)
732 return (nextblk);
733 if (fs->fs_rotdelay != 0)
734 /*
735 * Here we convert ms of delay to frags as:
736 * (frags) = (ms) * (rev/sec) * (sect/rev) /
737 * ((sect/frag) * (ms/sec))
738 * then round up to the next block.
739 */
740 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
741 (NSPF(fs) * 1000), fs->fs_frag);
742 return (nextblk);
743 }
744
745 /*
746 * Implement the cylinder overflow algorithm.
747 *
748 * The policy implemented by this algorithm is:
749 * 1) allocate the block in its requested cylinder group.
750 * 2) quadradically rehash on the cylinder group number.
751 * 3) brute force search for a free block.
752 */
753 /*VARARGS5*/
754 static u_long
755 ffs_hashalloc(ip, cg, pref, size, allocator)
756 struct inode *ip;
757 int cg;
758 long pref;
759 int size; /* size for data blocks, mode for inodes */
760 ufs_daddr_t (*allocator) __P((struct inode *, int, ufs_daddr_t, int));
761 {
762 struct fs *fs;
763 long result;
764 int i, icg = cg;
765
766 fs = ip->i_fs;
767 /*
768 * 1: preferred cylinder group
769 */
770 result = (*allocator)(ip, cg, pref, size);
771 if (result)
772 return (result);
773 /*
774 * 2: quadratic rehash
775 */
776 for (i = 1; i < fs->fs_ncg; i *= 2) {
777 cg += i;
778 if (cg >= fs->fs_ncg)
779 cg -= fs->fs_ncg;
780 result = (*allocator)(ip, cg, 0, size);
781 if (result)
782 return (result);
783 }
784 /*
785 * 3: brute force search
786 * Note that we start at i == 2, since 0 was checked initially,
787 * and 1 is always checked in the quadratic rehash.
788 */
789 cg = (icg + 2) % fs->fs_ncg;
790 for (i = 2; i < fs->fs_ncg; i++) {
791 result = (*allocator)(ip, cg, 0, size);
792 if (result)
793 return (result);
794 cg++;
795 if (cg == fs->fs_ncg)
796 cg = 0;
797 }
798 return (0);
799 }
800
801 /*
802 * Determine whether a fragment can be extended.
803 *
804 * Check to see if the necessary fragments are available, and
805 * if they are, allocate them.
806 */
807 static ufs_daddr_t
808 ffs_fragextend(ip, cg, bprev, osize, nsize)
809 struct inode *ip;
810 int cg;
811 long bprev;
812 int osize, nsize;
813 {
814 struct fs *fs;
815 struct cg *cgp;
816 struct buf *bp;
817 long bno;
818 int frags, bbase;
819 int i, error;
820
821 fs = ip->i_fs;
822 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
823 return (0);
824 frags = numfrags(fs, nsize);
825 bbase = fragnum(fs, bprev);
826 if (bbase > fragnum(fs, (bprev + frags - 1))) {
827 /* cannot extend across a block boundary */
828 return (0);
829 }
830 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
831 (int)fs->fs_cgsize, NOCRED, &bp);
832 if (error) {
833 brelse(bp);
834 return (0);
835 }
836 cgp = (struct cg *)bp->b_data;
837 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
838 brelse(bp);
839 return (0);
840 }
841 cgp->cg_time = ufs_rw32(time.tv_sec, UFS_FSNEEDSWAP(fs));
842 bno = dtogd(fs, bprev);
843 for (i = numfrags(fs, osize); i < frags; i++)
844 if (isclr(cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)), bno + i)) {
845 brelse(bp);
846 return (0);
847 }
848 /*
849 * the current fragment can be extended
850 * deduct the count on fragment being extended into
851 * increase the count on the remaining fragment (if any)
852 * allocate the extended piece
853 */
854 for (i = frags; i < fs->fs_frag - bbase; i++)
855 if (isclr(cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)), bno + i))
856 break;
857 ufs_add32(cgp->cg_frsum[i - numfrags(fs, osize)], -1, UFS_FSNEEDSWAP(fs));
858 if (i != frags)
859 ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_FSNEEDSWAP(fs));
860 for (i = numfrags(fs, osize); i < frags; i++) {
861 clrbit(cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)), bno + i);
862 ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_FSNEEDSWAP(fs));
863 fs->fs_cstotal.cs_nffree--;
864 fs->fs_cs(fs, cg).cs_nffree--;
865 }
866 fs->fs_fmod = 1;
867 if (DOINGSOFTDEP(ITOV(ip)))
868 softdep_setup_blkmapdep(bp, fs, bprev);
869 bdwrite(bp);
870 return (bprev);
871 }
872
873 /*
874 * Determine whether a block can be allocated.
875 *
876 * Check to see if a block of the appropriate size is available,
877 * and if it is, allocate it.
878 */
879 static ufs_daddr_t
880 ffs_alloccg(ip, cg, bpref, size)
881 struct inode *ip;
882 int cg;
883 ufs_daddr_t bpref;
884 int size;
885 {
886 struct cg *cgp;
887 struct buf *bp;
888 ufs_daddr_t bno, blkno;
889 int error, frags, allocsiz, i;
890 struct fs *fs = ip->i_fs;
891 #ifdef FFS_EI
892 const int needswap = UFS_FSNEEDSWAP(fs);
893 #endif
894
895 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
896 return (0);
897 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
898 (int)fs->fs_cgsize, NOCRED, &bp);
899 if (error) {
900 brelse(bp);
901 return (0);
902 }
903 cgp = (struct cg *)bp->b_data;
904 if (!cg_chkmagic(cgp, needswap) ||
905 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
906 brelse(bp);
907 return (0);
908 }
909 cgp->cg_time = ufs_rw32(time.tv_sec, needswap);
910 if (size == fs->fs_bsize) {
911 bno = ffs_alloccgblk(ip, bp, bpref);
912 bdwrite(bp);
913 return (bno);
914 }
915 /*
916 * check to see if any fragments are already available
917 * allocsiz is the size which will be allocated, hacking
918 * it down to a smaller size if necessary
919 */
920 frags = numfrags(fs, size);
921 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
922 if (cgp->cg_frsum[allocsiz] != 0)
923 break;
924 if (allocsiz == fs->fs_frag) {
925 /*
926 * no fragments were available, so a block will be
927 * allocated, and hacked up
928 */
929 if (cgp->cg_cs.cs_nbfree == 0) {
930 brelse(bp);
931 return (0);
932 }
933 bno = ffs_alloccgblk(ip, bp, bpref);
934 bpref = dtogd(fs, bno);
935 for (i = frags; i < fs->fs_frag; i++)
936 setbit(cg_blksfree(cgp, needswap), bpref + i);
937 i = fs->fs_frag - frags;
938 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
939 fs->fs_cstotal.cs_nffree += i;
940 fs->fs_cs(fs, cg).cs_nffree += i;
941 fs->fs_fmod = 1;
942 ufs_add32(cgp->cg_frsum[i], 1, needswap);
943 bdwrite(bp);
944 return (bno);
945 }
946 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
947 #if 0
948 /*
949 * XXX fvdl mapsearch will panic, and never return -1
950 * also: returning NULL as ufs_daddr_t ?
951 */
952 if (bno < 0) {
953 brelse(bp);
954 return (0);
955 }
956 #endif
957 for (i = 0; i < frags; i++)
958 clrbit(cg_blksfree(cgp, needswap), bno + i);
959 ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap);
960 fs->fs_cstotal.cs_nffree -= frags;
961 fs->fs_cs(fs, cg).cs_nffree -= frags;
962 fs->fs_fmod = 1;
963 ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap);
964 if (frags != allocsiz)
965 ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap);
966 blkno = cg * fs->fs_fpg + bno;
967 if (DOINGSOFTDEP(ITOV(ip)))
968 softdep_setup_blkmapdep(bp, fs, blkno);
969 bdwrite(bp);
970 return blkno;
971 }
972
973 /*
974 * Allocate a block in a cylinder group.
975 *
976 * This algorithm implements the following policy:
977 * 1) allocate the requested block.
978 * 2) allocate a rotationally optimal block in the same cylinder.
979 * 3) allocate the next available block on the block rotor for the
980 * specified cylinder group.
981 * Note that this routine only allocates fs_bsize blocks; these
982 * blocks may be fragmented by the routine that allocates them.
983 */
984 static ufs_daddr_t
985 ffs_alloccgblk(ip, bp, bpref)
986 struct inode *ip;
987 struct buf *bp;
988 ufs_daddr_t bpref;
989 {
990 struct cg *cgp;
991 ufs_daddr_t bno, blkno;
992 int cylno, pos, delta;
993 short *cylbp;
994 int i;
995 struct fs *fs = ip->i_fs;
996 #ifdef FFS_EI
997 const int needswap = UFS_FSNEEDSWAP(fs);
998 #endif
999
1000 cgp = (struct cg *)bp->b_data;
1001 if (bpref == 0 || dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) {
1002 bpref = ufs_rw32(cgp->cg_rotor, needswap);
1003 goto norot;
1004 }
1005 bpref = blknum(fs, bpref);
1006 bpref = dtogd(fs, bpref);
1007 /*
1008 * if the requested block is available, use it
1009 */
1010 if (ffs_isblock(fs, cg_blksfree(cgp, needswap),
1011 fragstoblks(fs, bpref))) {
1012 bno = bpref;
1013 goto gotit;
1014 }
1015 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) {
1016 /*
1017 * Block layout information is not available.
1018 * Leaving bpref unchanged means we take the
1019 * next available free block following the one
1020 * we just allocated. Hopefully this will at
1021 * least hit a track cache on drives of unknown
1022 * geometry (e.g. SCSI).
1023 */
1024 goto norot;
1025 }
1026 /*
1027 * check for a block available on the same cylinder
1028 */
1029 cylno = cbtocylno(fs, bpref);
1030 if (cg_blktot(cgp, needswap)[cylno] == 0)
1031 goto norot;
1032 /*
1033 * check the summary information to see if a block is
1034 * available in the requested cylinder starting at the
1035 * requested rotational position and proceeding around.
1036 */
1037 cylbp = cg_blks(fs, cgp, cylno, needswap);
1038 pos = cbtorpos(fs, bpref);
1039 for (i = pos; i < fs->fs_nrpos; i++)
1040 if (ufs_rw16(cylbp[i], needswap) > 0)
1041 break;
1042 if (i == fs->fs_nrpos)
1043 for (i = 0; i < pos; i++)
1044 if (ufs_rw16(cylbp[i], needswap) > 0)
1045 break;
1046 if (ufs_rw16(cylbp[i], needswap) > 0) {
1047 /*
1048 * found a rotational position, now find the actual
1049 * block. A panic if none is actually there.
1050 */
1051 pos = cylno % fs->fs_cpc;
1052 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
1053 if (fs_postbl(fs, pos)[i] == -1) {
1054 printf("pos = %d, i = %d, fs = %s\n",
1055 pos, i, fs->fs_fsmnt);
1056 panic("ffs_alloccgblk: cyl groups corrupted");
1057 }
1058 for (i = fs_postbl(fs, pos)[i];; ) {
1059 if (ffs_isblock(fs, cg_blksfree(cgp, needswap), bno + i)) {
1060 bno = blkstofrags(fs, (bno + i));
1061 goto gotit;
1062 }
1063 delta = fs_rotbl(fs)[i];
1064 if (delta <= 0 ||
1065 delta + i > fragstoblks(fs, fs->fs_fpg))
1066 break;
1067 i += delta;
1068 }
1069 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
1070 panic("ffs_alloccgblk: can't find blk in cyl");
1071 }
1072 norot:
1073 /*
1074 * no blocks in the requested cylinder, so take next
1075 * available one in this cylinder group.
1076 */
1077 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1078 if (bno < 0)
1079 return (0);
1080 cgp->cg_rotor = ufs_rw32(bno, needswap);
1081 gotit:
1082 blkno = fragstoblks(fs, bno);
1083 ffs_clrblock(fs, cg_blksfree(cgp, needswap), (long)blkno);
1084 ffs_clusteracct(fs, cgp, blkno, -1);
1085 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1086 fs->fs_cstotal.cs_nbfree--;
1087 fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--;
1088 cylno = cbtocylno(fs, bno);
1089 ufs_add16(cg_blks(fs, cgp, cylno, needswap)[cbtorpos(fs, bno)], -1,
1090 needswap);
1091 ufs_add32(cg_blktot(cgp, needswap)[cylno], -1, needswap);
1092 fs->fs_fmod = 1;
1093 blkno = ufs_rw32(cgp->cg_cgx, needswap) * fs->fs_fpg + bno;
1094 if (DOINGSOFTDEP(ITOV(ip)))
1095 softdep_setup_blkmapdep(bp, fs, blkno);
1096 return (blkno);
1097 }
1098
1099 /*
1100 * Determine whether a cluster can be allocated.
1101 *
1102 * We do not currently check for optimal rotational layout if there
1103 * are multiple choices in the same cylinder group. Instead we just
1104 * take the first one that we find following bpref.
1105 */
1106 static ufs_daddr_t
1107 ffs_clusteralloc(ip, cg, bpref, len)
1108 struct inode *ip;
1109 int cg;
1110 ufs_daddr_t bpref;
1111 int len;
1112 {
1113 struct fs *fs;
1114 struct cg *cgp;
1115 struct buf *bp;
1116 int i, got, run, bno, bit, map;
1117 u_char *mapp;
1118 int32_t *lp;
1119
1120 fs = ip->i_fs;
1121 if (fs->fs_maxcluster[cg] < len)
1122 return (0);
1123 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1124 NOCRED, &bp))
1125 goto fail;
1126 cgp = (struct cg *)bp->b_data;
1127 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
1128 goto fail;
1129 /*
1130 * Check to see if a cluster of the needed size (or bigger) is
1131 * available in this cylinder group.
1132 */
1133 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len];
1134 for (i = len; i <= fs->fs_contigsumsize; i++)
1135 if (ufs_rw32(*lp++, UFS_FSNEEDSWAP(fs)) > 0)
1136 break;
1137 if (i > fs->fs_contigsumsize) {
1138 /*
1139 * This is the first time looking for a cluster in this
1140 * cylinder group. Update the cluster summary information
1141 * to reflect the true maximum sized cluster so that
1142 * future cluster allocation requests can avoid reading
1143 * the cylinder group map only to find no clusters.
1144 */
1145 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len - 1];
1146 for (i = len - 1; i > 0; i--)
1147 if (ufs_rw32(*lp--, UFS_FSNEEDSWAP(fs)) > 0)
1148 break;
1149 fs->fs_maxcluster[cg] = i;
1150 goto fail;
1151 }
1152 /*
1153 * Search the cluster map to find a big enough cluster.
1154 * We take the first one that we find, even if it is larger
1155 * than we need as we prefer to get one close to the previous
1156 * block allocation. We do not search before the current
1157 * preference point as we do not want to allocate a block
1158 * that is allocated before the previous one (as we will
1159 * then have to wait for another pass of the elevator
1160 * algorithm before it will be read). We prefer to fail and
1161 * be recalled to try an allocation in the next cylinder group.
1162 */
1163 if (dtog(fs, bpref) != cg)
1164 bpref = 0;
1165 else
1166 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1167 mapp = &cg_clustersfree(cgp, UFS_FSNEEDSWAP(fs))[bpref / NBBY];
1168 map = *mapp++;
1169 bit = 1 << (bpref % NBBY);
1170 for (run = 0, got = bpref;
1171 got < ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)); got++) {
1172 if ((map & bit) == 0) {
1173 run = 0;
1174 } else {
1175 run++;
1176 if (run == len)
1177 break;
1178 }
1179 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1180 bit <<= 1;
1181 } else {
1182 map = *mapp++;
1183 bit = 1;
1184 }
1185 }
1186 if (got == ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)))
1187 goto fail;
1188 /*
1189 * Allocate the cluster that we have found.
1190 */
1191 #ifdef DIAGNOSTIC
1192 for (i = 1; i <= len; i++)
1193 if (!ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1194 got - run + i))
1195 panic("ffs_clusteralloc: map mismatch");
1196 #endif
1197 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1198 if (dtog(fs, bno) != cg)
1199 panic("ffs_clusteralloc: allocated out of group");
1200 len = blkstofrags(fs, len);
1201 for (i = 0; i < len; i += fs->fs_frag)
1202 if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i)
1203 panic("ffs_clusteralloc: lost block");
1204 bdwrite(bp);
1205 return (bno);
1206
1207 fail:
1208 brelse(bp);
1209 return (0);
1210 }
1211
1212 /*
1213 * Determine whether an inode can be allocated.
1214 *
1215 * Check to see if an inode is available, and if it is,
1216 * allocate it using the following policy:
1217 * 1) allocate the requested inode.
1218 * 2) allocate the next available inode after the requested
1219 * inode in the specified cylinder group.
1220 */
1221 static ufs_daddr_t
1222 ffs_nodealloccg(ip, cg, ipref, mode)
1223 struct inode *ip;
1224 int cg;
1225 ufs_daddr_t ipref;
1226 int mode;
1227 {
1228 struct cg *cgp;
1229 struct buf *bp;
1230 int error, start, len, loc, map, i;
1231 struct fs *fs = ip->i_fs;
1232 #ifdef FFS_EI
1233 const int needswap = UFS_FSNEEDSWAP(fs);
1234 #endif
1235
1236 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1237 return (0);
1238 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1239 (int)fs->fs_cgsize, NOCRED, &bp);
1240 if (error) {
1241 brelse(bp);
1242 return (0);
1243 }
1244 cgp = (struct cg *)bp->b_data;
1245 if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0) {
1246 brelse(bp);
1247 return (0);
1248 }
1249 cgp->cg_time = ufs_rw32(time.tv_sec, needswap);
1250 if (ipref) {
1251 ipref %= fs->fs_ipg;
1252 if (isclr(cg_inosused(cgp, needswap), ipref))
1253 goto gotit;
1254 }
1255 start = ufs_rw32(cgp->cg_irotor, needswap) / NBBY;
1256 len = howmany(fs->fs_ipg - ufs_rw32(cgp->cg_irotor, needswap),
1257 NBBY);
1258 loc = skpc(0xff, len, &cg_inosused(cgp, needswap)[start]);
1259 if (loc == 0) {
1260 len = start + 1;
1261 start = 0;
1262 loc = skpc(0xff, len, &cg_inosused(cgp, needswap)[0]);
1263 if (loc == 0) {
1264 printf("cg = %d, irotor = %d, fs = %s\n",
1265 cg, ufs_rw32(cgp->cg_irotor, needswap),
1266 fs->fs_fsmnt);
1267 panic("ffs_nodealloccg: map corrupted");
1268 /* NOTREACHED */
1269 }
1270 }
1271 i = start + len - loc;
1272 map = cg_inosused(cgp, needswap)[i];
1273 ipref = i * NBBY;
1274 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1275 if ((map & i) == 0) {
1276 cgp->cg_irotor = ufs_rw32(ipref, needswap);
1277 goto gotit;
1278 }
1279 }
1280 printf("fs = %s\n", fs->fs_fsmnt);
1281 panic("ffs_nodealloccg: block not in map");
1282 /* NOTREACHED */
1283 gotit:
1284 if (DOINGSOFTDEP(ITOV(ip)))
1285 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
1286 setbit(cg_inosused(cgp, needswap), ipref);
1287 ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap);
1288 fs->fs_cstotal.cs_nifree--;
1289 fs->fs_cs(fs, cg).cs_nifree--;
1290 fs->fs_fmod = 1;
1291 if ((mode & IFMT) == IFDIR) {
1292 ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap);
1293 fs->fs_cstotal.cs_ndir++;
1294 fs->fs_cs(fs, cg).cs_ndir++;
1295 }
1296 bdwrite(bp);
1297 return (cg * fs->fs_ipg + ipref);
1298 }
1299
1300 /*
1301 * Free a block or fragment.
1302 *
1303 * The specified block or fragment is placed back in the
1304 * free map. If a fragment is deallocated, a possible
1305 * block reassembly is checked.
1306 */
1307 void
1308 ffs_blkfree(ip, bno, size)
1309 struct inode *ip;
1310 ufs_daddr_t bno;
1311 long size;
1312 {
1313 struct cg *cgp;
1314 struct buf *bp;
1315 ufs_daddr_t blkno;
1316 int i, error, cg, blk, frags, bbase;
1317 struct fs *fs = ip->i_fs;
1318 const int needswap = UFS_FSNEEDSWAP(fs);
1319
1320 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
1321 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
1322 printf("dev = 0x%x, bno = %u bsize = %d, size = %ld, fs = %s\n",
1323 ip->i_dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
1324 panic("blkfree: bad size");
1325 }
1326 cg = dtog(fs, bno);
1327 if ((u_int)bno >= fs->fs_size) {
1328 printf("bad block %d, ino %d\n", bno, ip->i_number);
1329 ffs_fserr(fs, ip->i_ffs_uid, "bad block");
1330 return;
1331 }
1332 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1333 (int)fs->fs_cgsize, NOCRED, &bp);
1334 if (error) {
1335 brelse(bp);
1336 return;
1337 }
1338 cgp = (struct cg *)bp->b_data;
1339 if (!cg_chkmagic(cgp, needswap)) {
1340 brelse(bp);
1341 return;
1342 }
1343 cgp->cg_time = ufs_rw32(time.tv_sec, needswap);
1344 bno = dtogd(fs, bno);
1345 if (size == fs->fs_bsize) {
1346 blkno = fragstoblks(fs, bno);
1347 if (!ffs_isfreeblock(fs, cg_blksfree(cgp, needswap), blkno)) {
1348 printf("dev = 0x%x, block = %d, fs = %s\n",
1349 ip->i_dev, bno, fs->fs_fsmnt);
1350 panic("blkfree: freeing free block");
1351 }
1352 ffs_setblock(fs, cg_blksfree(cgp, needswap), blkno);
1353 ffs_clusteracct(fs, cgp, blkno, 1);
1354 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1355 fs->fs_cstotal.cs_nbfree++;
1356 fs->fs_cs(fs, cg).cs_nbfree++;
1357 i = cbtocylno(fs, bno);
1358 ufs_add16(cg_blks(fs, cgp, i, needswap)[cbtorpos(fs, bno)], 1,
1359 needswap);
1360 ufs_add32(cg_blktot(cgp, needswap)[i], 1, needswap);
1361 } else {
1362 bbase = bno - fragnum(fs, bno);
1363 /*
1364 * decrement the counts associated with the old frags
1365 */
1366 blk = blkmap(fs, cg_blksfree(cgp, needswap), bbase);
1367 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
1368 /*
1369 * deallocate the fragment
1370 */
1371 frags = numfrags(fs, size);
1372 for (i = 0; i < frags; i++) {
1373 if (isset(cg_blksfree(cgp, needswap), bno + i)) {
1374 printf("dev = 0x%x, block = %d, fs = %s\n",
1375 ip->i_dev, bno + i, fs->fs_fsmnt);
1376 panic("blkfree: freeing free frag");
1377 }
1378 setbit(cg_blksfree(cgp, needswap), bno + i);
1379 }
1380 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1381 fs->fs_cstotal.cs_nffree += i;
1382 fs->fs_cs(fs, cg).cs_nffree += i;
1383 /*
1384 * add back in counts associated with the new frags
1385 */
1386 blk = blkmap(fs, cg_blksfree(cgp, needswap), bbase);
1387 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
1388 /*
1389 * if a complete block has been reassembled, account for it
1390 */
1391 blkno = fragstoblks(fs, bbase);
1392 if (ffs_isblock(fs, cg_blksfree(cgp, needswap), blkno)) {
1393 ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap);
1394 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1395 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1396 ffs_clusteracct(fs, cgp, blkno, 1);
1397 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1398 fs->fs_cstotal.cs_nbfree++;
1399 fs->fs_cs(fs, cg).cs_nbfree++;
1400 i = cbtocylno(fs, bbase);
1401 ufs_add16(cg_blks(fs, cgp, i, needswap)[cbtorpos(fs,
1402 bbase)], 1,
1403 needswap);
1404 ufs_add32(cg_blktot(cgp, needswap)[i], 1, needswap);
1405 }
1406 }
1407 fs->fs_fmod = 1;
1408 bdwrite(bp);
1409 }
1410
1411 #if defined(DIAGNOSTIC) || defined(DEBUG)
1412 /*
1413 * Verify allocation of a block or fragment. Returns true if block or
1414 * fragment is allocated, false if it is free.
1415 */
1416 static int
1417 ffs_checkblk(ip, bno, size)
1418 struct inode *ip;
1419 ufs_daddr_t bno;
1420 long size;
1421 {
1422 struct fs *fs;
1423 struct cg *cgp;
1424 struct buf *bp;
1425 int i, error, frags, free;
1426
1427 fs = ip->i_fs;
1428 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1429 printf("bsize = %d, size = %ld, fs = %s\n",
1430 fs->fs_bsize, size, fs->fs_fsmnt);
1431 panic("checkblk: bad size");
1432 }
1433 if ((u_int)bno >= fs->fs_size)
1434 panic("checkblk: bad block %d", bno);
1435 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1436 (int)fs->fs_cgsize, NOCRED, &bp);
1437 if (error) {
1438 brelse(bp);
1439 return 0;
1440 }
1441 cgp = (struct cg *)bp->b_data;
1442 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
1443 brelse(bp);
1444 return 0;
1445 }
1446 bno = dtogd(fs, bno);
1447 if (size == fs->fs_bsize) {
1448 free = ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1449 fragstoblks(fs, bno));
1450 } else {
1451 frags = numfrags(fs, size);
1452 for (free = 0, i = 0; i < frags; i++)
1453 if (isset(cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)), bno + i))
1454 free++;
1455 if (free != 0 && free != frags)
1456 panic("checkblk: partially free fragment");
1457 }
1458 brelse(bp);
1459 return (!free);
1460 }
1461 #endif /* DIAGNOSTIC */
1462
1463 /*
1464 * Free an inode.
1465 */
1466 int
1467 ffs_vfree(v)
1468 void *v;
1469 {
1470 struct vop_vfree_args /* {
1471 struct vnode *a_pvp;
1472 ino_t a_ino;
1473 int a_mode;
1474 } */ *ap = v;
1475
1476 if (DOINGSOFTDEP(ap->a_pvp)) {
1477 softdep_freefile(ap);
1478 return (0);
1479 }
1480 return (ffs_freefile(ap));
1481 }
1482
1483 /*
1484 * Do the actual free operation.
1485 * The specified inode is placed back in the free map.
1486 */
1487 int
1488 ffs_freefile(v)
1489 void *v;
1490 {
1491 struct vop_vfree_args /* {
1492 struct vnode *a_pvp;
1493 ino_t a_ino;
1494 int a_mode;
1495 } */ *ap = v;
1496 struct cg *cgp;
1497 struct inode *pip = VTOI(ap->a_pvp);
1498 struct fs *fs = pip->i_fs;
1499 ino_t ino = ap->a_ino;
1500 struct buf *bp;
1501 int error, cg;
1502 #ifdef FFS_EI
1503 const int needswap = UFS_FSNEEDSWAP(fs);
1504 #endif
1505
1506 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1507 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
1508 pip->i_dev, ino, fs->fs_fsmnt);
1509 cg = ino_to_cg(fs, ino);
1510 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1511 (int)fs->fs_cgsize, NOCRED, &bp);
1512 if (error) {
1513 brelse(bp);
1514 return (error);
1515 }
1516 cgp = (struct cg *)bp->b_data;
1517 if (!cg_chkmagic(cgp, needswap)) {
1518 brelse(bp);
1519 return (0);
1520 }
1521 cgp->cg_time = ufs_rw32(time.tv_sec, needswap);
1522 ino %= fs->fs_ipg;
1523 if (isclr(cg_inosused(cgp, needswap), ino)) {
1524 printf("dev = 0x%x, ino = %d, fs = %s\n",
1525 pip->i_dev, ino, fs->fs_fsmnt);
1526 if (fs->fs_ronly == 0)
1527 panic("ifree: freeing free inode");
1528 }
1529 clrbit(cg_inosused(cgp, needswap), ino);
1530 if (ino < ufs_rw32(cgp->cg_irotor, needswap))
1531 cgp->cg_irotor = ufs_rw32(ino, needswap);
1532 ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap);
1533 fs->fs_cstotal.cs_nifree++;
1534 fs->fs_cs(fs, cg).cs_nifree++;
1535 if ((ap->a_mode & IFMT) == IFDIR) {
1536 ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap);
1537 fs->fs_cstotal.cs_ndir--;
1538 fs->fs_cs(fs, cg).cs_ndir--;
1539 }
1540 fs->fs_fmod = 1;
1541 bdwrite(bp);
1542 return (0);
1543 }
1544
1545 /*
1546 * Find a block of the specified size in the specified cylinder group.
1547 *
1548 * It is a panic if a request is made to find a block if none are
1549 * available.
1550 */
1551 static ufs_daddr_t
1552 ffs_mapsearch(fs, cgp, bpref, allocsiz)
1553 struct fs *fs;
1554 struct cg *cgp;
1555 ufs_daddr_t bpref;
1556 int allocsiz;
1557 {
1558 ufs_daddr_t bno;
1559 int start, len, loc, i;
1560 int blk, field, subfield, pos;
1561 int ostart, olen;
1562 #ifdef FFS_EI
1563 const int needswap = UFS_FSNEEDSWAP(fs);
1564 #endif
1565
1566 /*
1567 * find the fragment by searching through the free block
1568 * map for an appropriate bit pattern
1569 */
1570 if (bpref)
1571 start = dtogd(fs, bpref) / NBBY;
1572 else
1573 start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY;
1574 len = howmany(fs->fs_fpg, NBBY) - start;
1575 ostart = start;
1576 olen = len;
1577 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp, needswap)[start],
1578 (u_char *)fragtbl[fs->fs_frag],
1579 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1580 if (loc == 0) {
1581 len = start + 1;
1582 start = 0;
1583 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp, needswap)[0],
1584 (u_char *)fragtbl[fs->fs_frag],
1585 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1586 if (loc == 0) {
1587 printf("start = %d, len = %d, fs = %s\n",
1588 ostart, olen, fs->fs_fsmnt);
1589 printf("offset=%d %ld\n",
1590 ufs_rw32(cgp->cg_freeoff, needswap),
1591 (long)cg_blksfree(cgp, needswap) - (long)cgp);
1592 panic("ffs_alloccg: map corrupted");
1593 /* NOTREACHED */
1594 }
1595 }
1596 bno = (start + len - loc) * NBBY;
1597 cgp->cg_frotor = ufs_rw32(bno, needswap);
1598 /*
1599 * found the byte in the map
1600 * sift through the bits to find the selected frag
1601 */
1602 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1603 blk = blkmap(fs, cg_blksfree(cgp, needswap), bno);
1604 blk <<= 1;
1605 field = around[allocsiz];
1606 subfield = inside[allocsiz];
1607 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1608 if ((blk & field) == subfield)
1609 return (bno + pos);
1610 field <<= 1;
1611 subfield <<= 1;
1612 }
1613 }
1614 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1615 panic("ffs_alloccg: block not in map");
1616 return (-1);
1617 }
1618
1619 /*
1620 * Update the cluster map because of an allocation or free.
1621 *
1622 * Cnt == 1 means free; cnt == -1 means allocating.
1623 */
1624 void
1625 ffs_clusteracct(fs, cgp, blkno, cnt)
1626 struct fs *fs;
1627 struct cg *cgp;
1628 ufs_daddr_t blkno;
1629 int cnt;
1630 {
1631 int32_t *sump;
1632 int32_t *lp;
1633 u_char *freemapp, *mapp;
1634 int i, start, end, forw, back, map, bit;
1635 #ifdef FFS_EI
1636 const int needswap = UFS_FSNEEDSWAP(fs);
1637 #endif
1638
1639 if (fs->fs_contigsumsize <= 0)
1640 return;
1641 freemapp = cg_clustersfree(cgp, needswap);
1642 sump = cg_clustersum(cgp, needswap);
1643 /*
1644 * Allocate or clear the actual block.
1645 */
1646 if (cnt > 0)
1647 setbit(freemapp, blkno);
1648 else
1649 clrbit(freemapp, blkno);
1650 /*
1651 * Find the size of the cluster going forward.
1652 */
1653 start = blkno + 1;
1654 end = start + fs->fs_contigsumsize;
1655 if (end >= ufs_rw32(cgp->cg_nclusterblks, needswap))
1656 end = ufs_rw32(cgp->cg_nclusterblks, needswap);
1657 mapp = &freemapp[start / NBBY];
1658 map = *mapp++;
1659 bit = 1 << (start % NBBY);
1660 for (i = start; i < end; i++) {
1661 if ((map & bit) == 0)
1662 break;
1663 if ((i & (NBBY - 1)) != (NBBY - 1)) {
1664 bit <<= 1;
1665 } else {
1666 map = *mapp++;
1667 bit = 1;
1668 }
1669 }
1670 forw = i - start;
1671 /*
1672 * Find the size of the cluster going backward.
1673 */
1674 start = blkno - 1;
1675 end = start - fs->fs_contigsumsize;
1676 if (end < 0)
1677 end = -1;
1678 mapp = &freemapp[start / NBBY];
1679 map = *mapp--;
1680 bit = 1 << (start % NBBY);
1681 for (i = start; i > end; i--) {
1682 if ((map & bit) == 0)
1683 break;
1684 if ((i & (NBBY - 1)) != 0) {
1685 bit >>= 1;
1686 } else {
1687 map = *mapp--;
1688 bit = 1 << (NBBY - 1);
1689 }
1690 }
1691 back = start - i;
1692 /*
1693 * Account for old cluster and the possibly new forward and
1694 * back clusters.
1695 */
1696 i = back + forw + 1;
1697 if (i > fs->fs_contigsumsize)
1698 i = fs->fs_contigsumsize;
1699 ufs_add32(sump[i], cnt, needswap);
1700 if (back > 0)
1701 ufs_add32(sump[back], -cnt, needswap);
1702 if (forw > 0)
1703 ufs_add32(sump[forw], -cnt, needswap);
1704
1705 /*
1706 * Update cluster summary information.
1707 */
1708 lp = &sump[fs->fs_contigsumsize];
1709 for (i = fs->fs_contigsumsize; i > 0; i--)
1710 if (ufs_rw32(*lp--, needswap) > 0)
1711 break;
1712 fs->fs_maxcluster[ufs_rw32(cgp->cg_cgx, needswap)] = i;
1713 }
1714
1715 /*
1716 * Fserr prints the name of a file system with an error diagnostic.
1717 *
1718 * The form of the error message is:
1719 * fs: error message
1720 */
1721 static void
1722 ffs_fserr(fs, uid, cp)
1723 struct fs *fs;
1724 u_int uid;
1725 char *cp;
1726 {
1727
1728 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
1729 }
1730