ffs_alloc.c revision 1.96 1 /* $NetBSD: ffs_alloc.c,v 1.96 2006/11/16 01:33:53 christos Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.96 2006/11/16 01:33:53 christos Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_ffs.h"
48 #include "opt_quota.h"
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/buf.h>
54 #include <sys/proc.h>
55 #include <sys/vnode.h>
56 #include <sys/mount.h>
57 #include <sys/kernel.h>
58 #include <sys/syslog.h>
59 #include <sys/kauth.h>
60
61 #include <miscfs/specfs/specdev.h>
62 #include <ufs/ufs/quota.h>
63 #include <ufs/ufs/ufsmount.h>
64 #include <ufs/ufs/inode.h>
65 #include <ufs/ufs/ufs_extern.h>
66 #include <ufs/ufs/ufs_bswap.h>
67
68 #include <ufs/ffs/fs.h>
69 #include <ufs/ffs/ffs_extern.h>
70
71 static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int);
72 static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t);
73 #ifdef XXXUBC
74 static daddr_t ffs_clusteralloc(struct inode *, int, daddr_t, int);
75 #endif
76 static ino_t ffs_dirpref(struct inode *);
77 static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int);
78 static void ffs_fserr(struct fs *, u_int, const char *);
79 static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int,
80 daddr_t (*)(struct inode *, int, daddr_t, int));
81 static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int);
82 static int32_t ffs_mapsearch(struct fs *, struct cg *,
83 daddr_t, int);
84 #if defined(DIAGNOSTIC) || defined(DEBUG)
85 #ifdef XXXUBC
86 static int ffs_checkblk(struct inode *, daddr_t, long size);
87 #endif
88 #endif
89
90 /* if 1, changes in optimalization strategy are logged */
91 int ffs_log_changeopt = 0;
92
93 /* in ffs_tables.c */
94 extern const int inside[], around[];
95 extern const u_char * const fragtbl[];
96
97 /*
98 * Allocate a block in the file system.
99 *
100 * The size of the requested block is given, which must be some
101 * multiple of fs_fsize and <= fs_bsize.
102 * A preference may be optionally specified. If a preference is given
103 * the following hierarchy is used to allocate a block:
104 * 1) allocate the requested block.
105 * 2) allocate a rotationally optimal block in the same cylinder.
106 * 3) allocate a block in the same cylinder group.
107 * 4) quadradically rehash into other cylinder groups, until an
108 * available block is located.
109 * If no block preference is given the following hierarchy is used
110 * to allocate a block:
111 * 1) allocate a block in the cylinder group that contains the
112 * inode for the file.
113 * 2) quadradically rehash into other cylinder groups, until an
114 * available block is located.
115 */
116 int
117 ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
118 kauth_cred_t cred, daddr_t *bnp)
119 {
120 struct fs *fs;
121 daddr_t bno;
122 int cg;
123 #ifdef QUOTA
124 int error;
125 #endif
126
127 fs = ip->i_fs;
128
129 #ifdef UVM_PAGE_TRKOWN
130 if (ITOV(ip)->v_type == VREG &&
131 lblktosize(fs, (voff_t)lbn) < round_page(ITOV(ip)->v_size)) {
132 struct vm_page *pg;
133 struct uvm_object *uobj = &ITOV(ip)->v_uobj;
134 voff_t off = trunc_page(lblktosize(fs, lbn));
135 voff_t endoff = round_page(lblktosize(fs, lbn) + size);
136
137 simple_lock(&uobj->vmobjlock);
138 while (off < endoff) {
139 pg = uvm_pagelookup(uobj, off);
140 KASSERT(pg != NULL);
141 KASSERT(pg->owner == curproc->p_pid);
142 off += PAGE_SIZE;
143 }
144 simple_unlock(&uobj->vmobjlock);
145 }
146 #endif
147
148 *bnp = 0;
149 #ifdef DIAGNOSTIC
150 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
151 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
152 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
153 panic("ffs_alloc: bad size");
154 }
155 if (cred == NOCRED)
156 panic("ffs_alloc: missing credential");
157 #endif /* DIAGNOSTIC */
158 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
159 goto nospace;
160 if (kauth_cred_geteuid(cred) != 0 && freespace(fs, fs->fs_minfree) <= 0)
161 goto nospace;
162 #ifdef QUOTA
163 if ((error = chkdq(ip, btodb(size), cred, 0)) != 0)
164 return (error);
165 #endif
166 if (bpref >= fs->fs_size)
167 bpref = 0;
168 if (bpref == 0)
169 cg = ino_to_cg(fs, ip->i_number);
170 else
171 cg = dtog(fs, bpref);
172 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg);
173 if (bno > 0) {
174 DIP_ADD(ip, blocks, btodb(size));
175 ip->i_flag |= IN_CHANGE | IN_UPDATE;
176 *bnp = bno;
177 return (0);
178 }
179 #ifdef QUOTA
180 /*
181 * Restore user's disk quota because allocation failed.
182 */
183 (void) chkdq(ip, -btodb(size), cred, FORCE);
184 #endif
185 nospace:
186 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
187 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
188 return (ENOSPC);
189 }
190
191 /*
192 * Reallocate a fragment to a bigger size
193 *
194 * The number and size of the old block is given, and a preference
195 * and new size is also specified. The allocator attempts to extend
196 * the original block. Failing that, the regular block allocator is
197 * invoked to get an appropriate block.
198 */
199 int
200 ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
201 int nsize, kauth_cred_t cred, struct buf **bpp, daddr_t *blknop)
202 {
203 struct fs *fs;
204 struct buf *bp;
205 int cg, request, error;
206 daddr_t bprev, bno;
207
208 fs = ip->i_fs;
209 #ifdef UVM_PAGE_TRKOWN
210 if (ITOV(ip)->v_type == VREG) {
211 struct vm_page *pg;
212 struct uvm_object *uobj = &ITOV(ip)->v_uobj;
213 voff_t off = trunc_page(lblktosize(fs, lbprev));
214 voff_t endoff = round_page(lblktosize(fs, lbprev) + osize);
215
216 simple_lock(&uobj->vmobjlock);
217 while (off < endoff) {
218 pg = uvm_pagelookup(uobj, off);
219 KASSERT(pg != NULL);
220 KASSERT(pg->owner == curproc->p_pid);
221 KASSERT((pg->flags & PG_CLEAN) == 0);
222 off += PAGE_SIZE;
223 }
224 simple_unlock(&uobj->vmobjlock);
225 }
226 #endif
227
228 #ifdef DIAGNOSTIC
229 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
230 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
231 printf(
232 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
233 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
234 panic("ffs_realloccg: bad size");
235 }
236 if (cred == NOCRED)
237 panic("ffs_realloccg: missing credential");
238 #endif /* DIAGNOSTIC */
239 if (kauth_cred_geteuid(cred) != 0 && freespace(fs, fs->fs_minfree) <= 0)
240 goto nospace;
241 if (fs->fs_magic == FS_UFS2_MAGIC)
242 bprev = ufs_rw64(ip->i_ffs2_db[lbprev], UFS_FSNEEDSWAP(fs));
243 else
244 bprev = ufs_rw32(ip->i_ffs1_db[lbprev], UFS_FSNEEDSWAP(fs));
245
246 if (bprev == 0) {
247 printf("dev = 0x%x, bsize = %d, bprev = %" PRId64 ", fs = %s\n",
248 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
249 panic("ffs_realloccg: bad bprev");
250 }
251 /*
252 * Allocate the extra space in the buffer.
253 */
254 if (bpp != NULL &&
255 (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) != 0) {
256 brelse(bp);
257 return (error);
258 }
259 #ifdef QUOTA
260 if ((error = chkdq(ip, btodb(nsize - osize), cred, 0)) != 0) {
261 if (bpp != NULL) {
262 brelse(bp);
263 }
264 return (error);
265 }
266 #endif
267 /*
268 * Check for extension in the existing location.
269 */
270 cg = dtog(fs, bprev);
271 if ((bno = ffs_fragextend(ip, cg, bprev, osize, nsize)) != 0) {
272 DIP_ADD(ip, blocks, btodb(nsize - osize));
273 ip->i_flag |= IN_CHANGE | IN_UPDATE;
274
275 if (bpp != NULL) {
276 if (bp->b_blkno != fsbtodb(fs, bno))
277 panic("bad blockno");
278 allocbuf(bp, nsize, 1);
279 bp->b_flags |= B_DONE;
280 memset(bp->b_data + osize, 0, nsize - osize);
281 *bpp = bp;
282 }
283 if (blknop != NULL) {
284 *blknop = bno;
285 }
286 return (0);
287 }
288 /*
289 * Allocate a new disk location.
290 */
291 if (bpref >= fs->fs_size)
292 bpref = 0;
293 switch ((int)fs->fs_optim) {
294 case FS_OPTSPACE:
295 /*
296 * Allocate an exact sized fragment. Although this makes
297 * best use of space, we will waste time relocating it if
298 * the file continues to grow. If the fragmentation is
299 * less than half of the minimum free reserve, we choose
300 * to begin optimizing for time.
301 */
302 request = nsize;
303 if (fs->fs_minfree < 5 ||
304 fs->fs_cstotal.cs_nffree >
305 fs->fs_dsize * fs->fs_minfree / (2 * 100))
306 break;
307
308 if (ffs_log_changeopt) {
309 log(LOG_NOTICE,
310 "%s: optimization changed from SPACE to TIME\n",
311 fs->fs_fsmnt);
312 }
313
314 fs->fs_optim = FS_OPTTIME;
315 break;
316 case FS_OPTTIME:
317 /*
318 * At this point we have discovered a file that is trying to
319 * grow a small fragment to a larger fragment. To save time,
320 * we allocate a full sized block, then free the unused portion.
321 * If the file continues to grow, the `ffs_fragextend' call
322 * above will be able to grow it in place without further
323 * copying. If aberrant programs cause disk fragmentation to
324 * grow within 2% of the free reserve, we choose to begin
325 * optimizing for space.
326 */
327 request = fs->fs_bsize;
328 if (fs->fs_cstotal.cs_nffree <
329 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
330 break;
331
332 if (ffs_log_changeopt) {
333 log(LOG_NOTICE,
334 "%s: optimization changed from TIME to SPACE\n",
335 fs->fs_fsmnt);
336 }
337
338 fs->fs_optim = FS_OPTSPACE;
339 break;
340 default:
341 printf("dev = 0x%x, optim = %d, fs = %s\n",
342 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
343 panic("ffs_realloccg: bad optim");
344 /* NOTREACHED */
345 }
346 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg);
347 if (bno > 0) {
348 if (!DOINGSOFTDEP(ITOV(ip)))
349 ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
350 ip->i_number);
351 if (nsize < request)
352 ffs_blkfree(fs, ip->i_devvp, bno + numfrags(fs, nsize),
353 (long)(request - nsize), ip->i_number);
354 DIP_ADD(ip, blocks, btodb(nsize - osize));
355 ip->i_flag |= IN_CHANGE | IN_UPDATE;
356 if (bpp != NULL) {
357 bp->b_blkno = fsbtodb(fs, bno);
358 allocbuf(bp, nsize, 1);
359 bp->b_flags |= B_DONE;
360 memset(bp->b_data + osize, 0, (u_int)nsize - osize);
361 *bpp = bp;
362 }
363 if (blknop != NULL) {
364 *blknop = bno;
365 }
366 return (0);
367 }
368 #ifdef QUOTA
369 /*
370 * Restore user's disk quota because allocation failed.
371 */
372 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE);
373 #endif
374 if (bpp != NULL) {
375 brelse(bp);
376 }
377
378 nospace:
379 /*
380 * no space available
381 */
382 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
383 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
384 return (ENOSPC);
385 }
386
387 #if 0
388 /*
389 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
390 *
391 * The vnode and an array of buffer pointers for a range of sequential
392 * logical blocks to be made contiguous is given. The allocator attempts
393 * to find a range of sequential blocks starting as close as possible
394 * from the end of the allocation for the logical block immediately
395 * preceding the current range. If successful, the physical block numbers
396 * in the buffer pointers and in the inode are changed to reflect the new
397 * allocation. If unsuccessful, the allocation is left unchanged. The
398 * success in doing the reallocation is returned. Note that the error
399 * return is not reflected back to the user. Rather the previous block
400 * allocation will be used.
401
402 */
403 #ifdef XXXUBC
404 #ifdef DEBUG
405 #include <sys/sysctl.h>
406 int prtrealloc = 0;
407 struct ctldebug debug15 = { "prtrealloc", &prtrealloc };
408 #endif
409 #endif
410
411 /*
412 * NOTE: when re-enabling this, it must be updated for UFS2.
413 */
414
415 int doasyncfree = 1;
416
417 int
418 ffs_reallocblks(void *v)
419 {
420 #ifdef XXXUBC
421 struct vop_reallocblks_args /* {
422 struct vnode *a_vp;
423 struct cluster_save *a_buflist;
424 } */ *ap = v;
425 struct fs *fs;
426 struct inode *ip;
427 struct vnode *vp;
428 struct buf *sbp, *ebp;
429 int32_t *bap, *ebap = NULL, *sbap; /* XXX ondisk32 */
430 struct cluster_save *buflist;
431 daddr_t start_lbn, end_lbn, soff, newblk, blkno;
432 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
433 int i, len, start_lvl, end_lvl, pref, ssize;
434 #endif /* XXXUBC */
435
436 /* XXXUBC don't reallocblks for now */
437 return ENOSPC;
438
439 #ifdef XXXUBC
440 vp = ap->a_vp;
441 ip = VTOI(vp);
442 fs = ip->i_fs;
443 if (fs->fs_contigsumsize <= 0)
444 return (ENOSPC);
445 buflist = ap->a_buflist;
446 len = buflist->bs_nchildren;
447 start_lbn = buflist->bs_children[0]->b_lblkno;
448 end_lbn = start_lbn + len - 1;
449 #ifdef DIAGNOSTIC
450 for (i = 0; i < len; i++)
451 if (!ffs_checkblk(ip,
452 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
453 panic("ffs_reallocblks: unallocated block 1");
454 for (i = 1; i < len; i++)
455 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
456 panic("ffs_reallocblks: non-logical cluster");
457 blkno = buflist->bs_children[0]->b_blkno;
458 ssize = fsbtodb(fs, fs->fs_frag);
459 for (i = 1; i < len - 1; i++)
460 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
461 panic("ffs_reallocblks: non-physical cluster %d", i);
462 #endif
463 /*
464 * If the latest allocation is in a new cylinder group, assume that
465 * the filesystem has decided to move and do not force it back to
466 * the previous cylinder group.
467 */
468 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
469 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
470 return (ENOSPC);
471 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
472 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
473 return (ENOSPC);
474 /*
475 * Get the starting offset and block map for the first block.
476 */
477 if (start_lvl == 0) {
478 sbap = &ip->i_ffs1_db[0];
479 soff = start_lbn;
480 } else {
481 idp = &start_ap[start_lvl - 1];
482 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
483 brelse(sbp);
484 return (ENOSPC);
485 }
486 sbap = (int32_t *)sbp->b_data;
487 soff = idp->in_off;
488 }
489 /*
490 * Find the preferred location for the cluster.
491 */
492 pref = ffs_blkpref(ip, start_lbn, soff, sbap);
493 /*
494 * If the block range spans two block maps, get the second map.
495 */
496 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
497 ssize = len;
498 } else {
499 #ifdef DIAGNOSTIC
500 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
501 panic("ffs_reallocblk: start == end");
502 #endif
503 ssize = len - (idp->in_off + 1);
504 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
505 goto fail;
506 ebap = (int32_t *)ebp->b_data; /* XXX ondisk32 */
507 }
508 /*
509 * Search the block map looking for an allocation of the desired size.
510 */
511 if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
512 len, ffs_clusteralloc)) == 0)
513 goto fail;
514 /*
515 * We have found a new contiguous block.
516 *
517 * First we have to replace the old block pointers with the new
518 * block pointers in the inode and indirect blocks associated
519 * with the file.
520 */
521 #ifdef DEBUG
522 if (prtrealloc)
523 printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
524 start_lbn, end_lbn);
525 #endif
526 blkno = newblk;
527 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
528 daddr_t ba;
529
530 if (i == ssize) {
531 bap = ebap;
532 soff = -i;
533 }
534 /* XXX ondisk32 */
535 ba = ufs_rw32(*bap, UFS_FSNEEDSWAP(fs));
536 #ifdef DIAGNOSTIC
537 if (!ffs_checkblk(ip,
538 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
539 panic("ffs_reallocblks: unallocated block 2");
540 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != ba)
541 panic("ffs_reallocblks: alloc mismatch");
542 #endif
543 #ifdef DEBUG
544 if (prtrealloc)
545 printf(" %d,", ba);
546 #endif
547 if (DOINGSOFTDEP(vp)) {
548 if (sbap == &ip->i_ffs1_db[0] && i < ssize)
549 softdep_setup_allocdirect(ip, start_lbn + i,
550 blkno, ba, fs->fs_bsize, fs->fs_bsize,
551 buflist->bs_children[i]);
552 else
553 softdep_setup_allocindir_page(ip, start_lbn + i,
554 i < ssize ? sbp : ebp, soff + i, blkno,
555 ba, buflist->bs_children[i]);
556 }
557 /* XXX ondisk32 */
558 *bap++ = ufs_rw32((u_int32_t)blkno, UFS_FSNEEDSWAP(fs));
559 }
560 /*
561 * Next we must write out the modified inode and indirect blocks.
562 * For strict correctness, the writes should be synchronous since
563 * the old block values may have been written to disk. In practise
564 * they are almost never written, but if we are concerned about
565 * strict correctness, the `doasyncfree' flag should be set to zero.
566 *
567 * The test on `doasyncfree' should be changed to test a flag
568 * that shows whether the associated buffers and inodes have
569 * been written. The flag should be set when the cluster is
570 * started and cleared whenever the buffer or inode is flushed.
571 * We can then check below to see if it is set, and do the
572 * synchronous write only when it has been cleared.
573 */
574 if (sbap != &ip->i_ffs1_db[0]) {
575 if (doasyncfree)
576 bdwrite(sbp);
577 else
578 bwrite(sbp);
579 } else {
580 ip->i_flag |= IN_CHANGE | IN_UPDATE;
581 if (!doasyncfree)
582 ffs_update(vp, NULL, NULL, 1);
583 }
584 if (ssize < len) {
585 if (doasyncfree)
586 bdwrite(ebp);
587 else
588 bwrite(ebp);
589 }
590 /*
591 * Last, free the old blocks and assign the new blocks to the buffers.
592 */
593 #ifdef DEBUG
594 if (prtrealloc)
595 printf("\n\tnew:");
596 #endif
597 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
598 if (!DOINGSOFTDEP(vp))
599 ffs_blkfree(fs, ip->i_devvp,
600 dbtofsb(fs, buflist->bs_children[i]->b_blkno),
601 fs->fs_bsize, ip->i_number);
602 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
603 #ifdef DEBUG
604 if (!ffs_checkblk(ip,
605 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
606 panic("ffs_reallocblks: unallocated block 3");
607 if (prtrealloc)
608 printf(" %d,", blkno);
609 #endif
610 }
611 #ifdef DEBUG
612 if (prtrealloc) {
613 prtrealloc--;
614 printf("\n");
615 }
616 #endif
617 return (0);
618
619 fail:
620 if (ssize < len)
621 brelse(ebp);
622 if (sbap != &ip->i_ffs1_db[0])
623 brelse(sbp);
624 return (ENOSPC);
625 #endif /* XXXUBC */
626 }
627 #endif /* 0 */
628
629 /*
630 * Allocate an inode in the file system.
631 *
632 * If allocating a directory, use ffs_dirpref to select the inode.
633 * If allocating in a directory, the following hierarchy is followed:
634 * 1) allocate the preferred inode.
635 * 2) allocate an inode in the same cylinder group.
636 * 3) quadradically rehash into other cylinder groups, until an
637 * available inode is located.
638 * If no inode preference is given the following hierarchy is used
639 * to allocate an inode:
640 * 1) allocate an inode in cylinder group 0.
641 * 2) quadradically rehash into other cylinder groups, until an
642 * available inode is located.
643 */
644 int
645 ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
646 struct vnode **vpp)
647 {
648 struct inode *pip;
649 struct fs *fs;
650 struct inode *ip;
651 struct timespec ts;
652 ino_t ino, ipref;
653 int cg, error;
654
655 *vpp = NULL;
656 pip = VTOI(pvp);
657 fs = pip->i_fs;
658 if (fs->fs_cstotal.cs_nifree == 0)
659 goto noinodes;
660
661 if ((mode & IFMT) == IFDIR)
662 ipref = ffs_dirpref(pip);
663 else
664 ipref = pip->i_number;
665 if (ipref >= fs->fs_ncg * fs->fs_ipg)
666 ipref = 0;
667 cg = ino_to_cg(fs, ipref);
668 /*
669 * Track number of dirs created one after another
670 * in a same cg without intervening by files.
671 */
672 if ((mode & IFMT) == IFDIR) {
673 if (fs->fs_contigdirs[cg] < 255)
674 fs->fs_contigdirs[cg]++;
675 } else {
676 if (fs->fs_contigdirs[cg] > 0)
677 fs->fs_contigdirs[cg]--;
678 }
679 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, ffs_nodealloccg);
680 if (ino == 0)
681 goto noinodes;
682 error = VFS_VGET(pvp->v_mount, ino, vpp);
683 if (error) {
684 ffs_vfree(pvp, ino, mode);
685 return (error);
686 }
687 KASSERT((*vpp)->v_type == VNON);
688 ip = VTOI(*vpp);
689 if (ip->i_mode) {
690 #if 0
691 printf("mode = 0%o, inum = %d, fs = %s\n",
692 ip->i_mode, ip->i_number, fs->fs_fsmnt);
693 #else
694 printf("dmode %x mode %x dgen %x gen %x\n",
695 DIP(ip, mode), ip->i_mode,
696 DIP(ip, gen), ip->i_gen);
697 printf("size %llx blocks %llx\n",
698 (long long)DIP(ip, size), (long long)DIP(ip, blocks));
699 printf("ino %llu ipref %llu\n", (unsigned long long)ino,
700 (unsigned long long)ipref);
701 #if 0
702 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
703 (int)fs->fs_bsize, NOCRED, &bp);
704 #endif
705
706 #endif
707 panic("ffs_valloc: dup alloc");
708 }
709 if (DIP(ip, blocks)) { /* XXX */
710 printf("free inode %s/%llu had %" PRId64 " blocks\n",
711 fs->fs_fsmnt, (unsigned long long)ino, DIP(ip, blocks));
712 DIP_ASSIGN(ip, blocks, 0);
713 }
714 ip->i_flag &= ~IN_SPACECOUNTED;
715 ip->i_flags = 0;
716 DIP_ASSIGN(ip, flags, 0);
717 /*
718 * Set up a new generation number for this inode.
719 */
720 ip->i_gen++;
721 DIP_ASSIGN(ip, gen, ip->i_gen);
722 if (fs->fs_magic == FS_UFS2_MAGIC) {
723 vfs_timestamp(&ts);
724 ip->i_ffs2_birthtime = ts.tv_sec;
725 ip->i_ffs2_birthnsec = ts.tv_nsec;
726 }
727 return (0);
728 noinodes:
729 ffs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
730 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
731 return (ENOSPC);
732 }
733
734 /*
735 * Find a cylinder group in which to place a directory.
736 *
737 * The policy implemented by this algorithm is to allocate a
738 * directory inode in the same cylinder group as its parent
739 * directory, but also to reserve space for its files inodes
740 * and data. Restrict the number of directories which may be
741 * allocated one after another in the same cylinder group
742 * without intervening allocation of files.
743 *
744 * If we allocate a first level directory then force allocation
745 * in another cylinder group.
746 */
747 static ino_t
748 ffs_dirpref(struct inode *pip)
749 {
750 register struct fs *fs;
751 int cg, prefcg;
752 int64_t dirsize, cgsize, curdsz;
753 int avgifree, avgbfree, avgndir;
754 int minifree, minbfree, maxndir;
755 int mincg, minndir;
756 int maxcontigdirs;
757
758 fs = pip->i_fs;
759
760 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
761 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
762 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
763
764 /*
765 * Force allocation in another cg if creating a first level dir.
766 */
767 if (ITOV(pip)->v_flag & VROOT) {
768 prefcg = random() % fs->fs_ncg;
769 mincg = prefcg;
770 minndir = fs->fs_ipg;
771 for (cg = prefcg; cg < fs->fs_ncg; cg++)
772 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
773 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
774 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
775 mincg = cg;
776 minndir = fs->fs_cs(fs, cg).cs_ndir;
777 }
778 for (cg = 0; cg < prefcg; cg++)
779 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
780 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
781 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
782 mincg = cg;
783 minndir = fs->fs_cs(fs, cg).cs_ndir;
784 }
785 return ((ino_t)(fs->fs_ipg * mincg));
786 }
787
788 /*
789 * Count various limits which used for
790 * optimal allocation of a directory inode.
791 */
792 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
793 minifree = avgifree - fs->fs_ipg / 4;
794 if (minifree < 0)
795 minifree = 0;
796 minbfree = avgbfree - fragstoblks(fs, fs->fs_fpg) / 4;
797 if (minbfree < 0)
798 minbfree = 0;
799 cgsize = (int64_t)fs->fs_fsize * fs->fs_fpg;
800 dirsize = (int64_t)fs->fs_avgfilesize * fs->fs_avgfpdir;
801 if (avgndir != 0) {
802 curdsz = (cgsize - (int64_t)avgbfree * fs->fs_bsize) / avgndir;
803 if (dirsize < curdsz)
804 dirsize = curdsz;
805 }
806 if (cgsize < dirsize * 255)
807 maxcontigdirs = cgsize / dirsize;
808 else
809 maxcontigdirs = 255;
810 if (fs->fs_avgfpdir > 0)
811 maxcontigdirs = min(maxcontigdirs,
812 fs->fs_ipg / fs->fs_avgfpdir);
813 if (maxcontigdirs == 0)
814 maxcontigdirs = 1;
815
816 /*
817 * Limit number of dirs in one cg and reserve space for
818 * regular files, but only if we have no deficit in
819 * inodes or space.
820 */
821 prefcg = ino_to_cg(fs, pip->i_number);
822 for (cg = prefcg; cg < fs->fs_ncg; cg++)
823 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
824 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
825 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
826 if (fs->fs_contigdirs[cg] < maxcontigdirs)
827 return ((ino_t)(fs->fs_ipg * cg));
828 }
829 for (cg = 0; cg < prefcg; cg++)
830 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
831 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
832 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
833 if (fs->fs_contigdirs[cg] < maxcontigdirs)
834 return ((ino_t)(fs->fs_ipg * cg));
835 }
836 /*
837 * This is a backstop when we are deficient in space.
838 */
839 for (cg = prefcg; cg < fs->fs_ncg; cg++)
840 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
841 return ((ino_t)(fs->fs_ipg * cg));
842 for (cg = 0; cg < prefcg; cg++)
843 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
844 break;
845 return ((ino_t)(fs->fs_ipg * cg));
846 }
847
848 /*
849 * Select the desired position for the next block in a file. The file is
850 * logically divided into sections. The first section is composed of the
851 * direct blocks. Each additional section contains fs_maxbpg blocks.
852 *
853 * If no blocks have been allocated in the first section, the policy is to
854 * request a block in the same cylinder group as the inode that describes
855 * the file. If no blocks have been allocated in any other section, the
856 * policy is to place the section in a cylinder group with a greater than
857 * average number of free blocks. An appropriate cylinder group is found
858 * by using a rotor that sweeps the cylinder groups. When a new group of
859 * blocks is needed, the sweep begins in the cylinder group following the
860 * cylinder group from which the previous allocation was made. The sweep
861 * continues until a cylinder group with greater than the average number
862 * of free blocks is found. If the allocation is for the first block in an
863 * indirect block, the information on the previous allocation is unavailable;
864 * here a best guess is made based upon the logical block number being
865 * allocated.
866 *
867 * If a section is already partially allocated, the policy is to
868 * contiguously allocate fs_maxcontig blocks. The end of one of these
869 * contiguous blocks and the beginning of the next is laid out
870 * contigously if possible.
871 */
872 daddr_t
873 ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx,
874 int32_t *bap /* XXX ondisk32 */)
875 {
876 struct fs *fs;
877 int cg;
878 int avgbfree, startcg;
879
880 fs = ip->i_fs;
881 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
882 if (lbn < NDADDR + NINDIR(fs)) {
883 cg = ino_to_cg(fs, ip->i_number);
884 return (fs->fs_fpg * cg + fs->fs_frag);
885 }
886 /*
887 * Find a cylinder with greater than average number of
888 * unused data blocks.
889 */
890 if (indx == 0 || bap[indx - 1] == 0)
891 startcg =
892 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
893 else
894 startcg = dtog(fs,
895 ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
896 startcg %= fs->fs_ncg;
897 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
898 for (cg = startcg; cg < fs->fs_ncg; cg++)
899 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
900 return (fs->fs_fpg * cg + fs->fs_frag);
901 }
902 for (cg = 0; cg < startcg; cg++)
903 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
904 return (fs->fs_fpg * cg + fs->fs_frag);
905 }
906 return (0);
907 }
908 /*
909 * We just always try to lay things out contiguously.
910 */
911 return ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
912 }
913
914 daddr_t
915 ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
916 {
917 struct fs *fs;
918 int cg;
919 int avgbfree, startcg;
920
921 fs = ip->i_fs;
922 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
923 if (lbn < NDADDR + NINDIR(fs)) {
924 cg = ino_to_cg(fs, ip->i_number);
925 return (fs->fs_fpg * cg + fs->fs_frag);
926 }
927 /*
928 * Find a cylinder with greater than average number of
929 * unused data blocks.
930 */
931 if (indx == 0 || bap[indx - 1] == 0)
932 startcg =
933 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
934 else
935 startcg = dtog(fs,
936 ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
937 startcg %= fs->fs_ncg;
938 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
939 for (cg = startcg; cg < fs->fs_ncg; cg++)
940 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
941 return (fs->fs_fpg * cg + fs->fs_frag);
942 }
943 for (cg = 0; cg < startcg; cg++)
944 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
945 return (fs->fs_fpg * cg + fs->fs_frag);
946 }
947 return (0);
948 }
949 /*
950 * We just always try to lay things out contiguously.
951 */
952 return ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
953 }
954
955
956 /*
957 * Implement the cylinder overflow algorithm.
958 *
959 * The policy implemented by this algorithm is:
960 * 1) allocate the block in its requested cylinder group.
961 * 2) quadradically rehash on the cylinder group number.
962 * 3) brute force search for a free block.
963 */
964 /*VARARGS5*/
965 static daddr_t
966 ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
967 int size /* size for data blocks, mode for inodes */,
968 daddr_t (*allocator)(struct inode *, int, daddr_t, int))
969 {
970 struct fs *fs;
971 daddr_t result;
972 int i, icg = cg;
973
974 fs = ip->i_fs;
975 /*
976 * 1: preferred cylinder group
977 */
978 result = (*allocator)(ip, cg, pref, size);
979 if (result)
980 return (result);
981 /*
982 * 2: quadratic rehash
983 */
984 for (i = 1; i < fs->fs_ncg; i *= 2) {
985 cg += i;
986 if (cg >= fs->fs_ncg)
987 cg -= fs->fs_ncg;
988 result = (*allocator)(ip, cg, 0, size);
989 if (result)
990 return (result);
991 }
992 /*
993 * 3: brute force search
994 * Note that we start at i == 2, since 0 was checked initially,
995 * and 1 is always checked in the quadratic rehash.
996 */
997 cg = (icg + 2) % fs->fs_ncg;
998 for (i = 2; i < fs->fs_ncg; i++) {
999 result = (*allocator)(ip, cg, 0, size);
1000 if (result)
1001 return (result);
1002 cg++;
1003 if (cg == fs->fs_ncg)
1004 cg = 0;
1005 }
1006 return (0);
1007 }
1008
1009 /*
1010 * Determine whether a fragment can be extended.
1011 *
1012 * Check to see if the necessary fragments are available, and
1013 * if they are, allocate them.
1014 */
1015 static daddr_t
1016 ffs_fragextend(struct inode *ip, int cg, daddr_t bprev, int osize, int nsize)
1017 {
1018 struct fs *fs;
1019 struct cg *cgp;
1020 struct buf *bp;
1021 daddr_t bno;
1022 int frags, bbase;
1023 int i, error;
1024 u_int8_t *blksfree;
1025
1026 fs = ip->i_fs;
1027 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
1028 return (0);
1029 frags = numfrags(fs, nsize);
1030 bbase = fragnum(fs, bprev);
1031 if (bbase > fragnum(fs, (bprev + frags - 1))) {
1032 /* cannot extend across a block boundary */
1033 return (0);
1034 }
1035 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1036 (int)fs->fs_cgsize, NOCRED, &bp);
1037 if (error) {
1038 brelse(bp);
1039 return (0);
1040 }
1041 cgp = (struct cg *)bp->b_data;
1042 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
1043 brelse(bp);
1044 return (0);
1045 }
1046 cgp->cg_old_time = ufs_rw32(time_second, UFS_FSNEEDSWAP(fs));
1047 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1048 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1049 cgp->cg_time = ufs_rw64(time_second, UFS_FSNEEDSWAP(fs));
1050 bno = dtogd(fs, bprev);
1051 blksfree = cg_blksfree(cgp, UFS_FSNEEDSWAP(fs));
1052 for (i = numfrags(fs, osize); i < frags; i++)
1053 if (isclr(blksfree, bno + i)) {
1054 brelse(bp);
1055 return (0);
1056 }
1057 /*
1058 * the current fragment can be extended
1059 * deduct the count on fragment being extended into
1060 * increase the count on the remaining fragment (if any)
1061 * allocate the extended piece
1062 */
1063 for (i = frags; i < fs->fs_frag - bbase; i++)
1064 if (isclr(blksfree, bno + i))
1065 break;
1066 ufs_add32(cgp->cg_frsum[i - numfrags(fs, osize)], -1, UFS_FSNEEDSWAP(fs));
1067 if (i != frags)
1068 ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_FSNEEDSWAP(fs));
1069 for (i = numfrags(fs, osize); i < frags; i++) {
1070 clrbit(blksfree, bno + i);
1071 ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_FSNEEDSWAP(fs));
1072 fs->fs_cstotal.cs_nffree--;
1073 fs->fs_cs(fs, cg).cs_nffree--;
1074 }
1075 fs->fs_fmod = 1;
1076 if (DOINGSOFTDEP(ITOV(ip)))
1077 softdep_setup_blkmapdep(bp, fs, bprev);
1078 ACTIVECG_CLR(fs, cg);
1079 bdwrite(bp);
1080 return (bprev);
1081 }
1082
1083 /*
1084 * Determine whether a block can be allocated.
1085 *
1086 * Check to see if a block of the appropriate size is available,
1087 * and if it is, allocate it.
1088 */
1089 static daddr_t
1090 ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
1091 {
1092 struct fs *fs = ip->i_fs;
1093 struct cg *cgp;
1094 struct buf *bp;
1095 int32_t bno;
1096 daddr_t blkno;
1097 int error, frags, allocsiz, i;
1098 u_int8_t *blksfree;
1099 #ifdef FFS_EI
1100 const int needswap = UFS_FSNEEDSWAP(fs);
1101 #endif
1102
1103 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
1104 return (0);
1105 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1106 (int)fs->fs_cgsize, NOCRED, &bp);
1107 if (error) {
1108 brelse(bp);
1109 return (0);
1110 }
1111 cgp = (struct cg *)bp->b_data;
1112 if (!cg_chkmagic(cgp, needswap) ||
1113 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
1114 brelse(bp);
1115 return (0);
1116 }
1117 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1118 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1119 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1120 cgp->cg_time = ufs_rw64(time_second, needswap);
1121 if (size == fs->fs_bsize) {
1122 blkno = ffs_alloccgblk(ip, bp, bpref);
1123 ACTIVECG_CLR(fs, cg);
1124 bdwrite(bp);
1125 return (blkno);
1126 }
1127 /*
1128 * check to see if any fragments are already available
1129 * allocsiz is the size which will be allocated, hacking
1130 * it down to a smaller size if necessary
1131 */
1132 blksfree = cg_blksfree(cgp, needswap);
1133 frags = numfrags(fs, size);
1134 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
1135 if (cgp->cg_frsum[allocsiz] != 0)
1136 break;
1137 if (allocsiz == fs->fs_frag) {
1138 /*
1139 * no fragments were available, so a block will be
1140 * allocated, and hacked up
1141 */
1142 if (cgp->cg_cs.cs_nbfree == 0) {
1143 brelse(bp);
1144 return (0);
1145 }
1146 blkno = ffs_alloccgblk(ip, bp, bpref);
1147 bno = dtogd(fs, blkno);
1148 for (i = frags; i < fs->fs_frag; i++)
1149 setbit(blksfree, bno + i);
1150 i = fs->fs_frag - frags;
1151 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1152 fs->fs_cstotal.cs_nffree += i;
1153 fs->fs_cs(fs, cg).cs_nffree += i;
1154 fs->fs_fmod = 1;
1155 ufs_add32(cgp->cg_frsum[i], 1, needswap);
1156 ACTIVECG_CLR(fs, cg);
1157 bdwrite(bp);
1158 return (blkno);
1159 }
1160 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
1161 #if 0
1162 /*
1163 * XXX fvdl mapsearch will panic, and never return -1
1164 * also: returning NULL as daddr_t ?
1165 */
1166 if (bno < 0) {
1167 brelse(bp);
1168 return (0);
1169 }
1170 #endif
1171 for (i = 0; i < frags; i++)
1172 clrbit(blksfree, bno + i);
1173 ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap);
1174 fs->fs_cstotal.cs_nffree -= frags;
1175 fs->fs_cs(fs, cg).cs_nffree -= frags;
1176 fs->fs_fmod = 1;
1177 ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap);
1178 if (frags != allocsiz)
1179 ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap);
1180 blkno = cg * fs->fs_fpg + bno;
1181 if (DOINGSOFTDEP(ITOV(ip)))
1182 softdep_setup_blkmapdep(bp, fs, blkno);
1183 ACTIVECG_CLR(fs, cg);
1184 bdwrite(bp);
1185 return blkno;
1186 }
1187
1188 /*
1189 * Allocate a block in a cylinder group.
1190 *
1191 * This algorithm implements the following policy:
1192 * 1) allocate the requested block.
1193 * 2) allocate a rotationally optimal block in the same cylinder.
1194 * 3) allocate the next available block on the block rotor for the
1195 * specified cylinder group.
1196 * Note that this routine only allocates fs_bsize blocks; these
1197 * blocks may be fragmented by the routine that allocates them.
1198 */
1199 static daddr_t
1200 ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref)
1201 {
1202 struct fs *fs = ip->i_fs;
1203 struct cg *cgp;
1204 daddr_t blkno;
1205 int32_t bno;
1206 u_int8_t *blksfree;
1207 #ifdef FFS_EI
1208 const int needswap = UFS_FSNEEDSWAP(fs);
1209 #endif
1210
1211 cgp = (struct cg *)bp->b_data;
1212 blksfree = cg_blksfree(cgp, needswap);
1213 if (bpref == 0 || dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) {
1214 bpref = ufs_rw32(cgp->cg_rotor, needswap);
1215 } else {
1216 bpref = blknum(fs, bpref);
1217 bno = dtogd(fs, bpref);
1218 /*
1219 * if the requested block is available, use it
1220 */
1221 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
1222 goto gotit;
1223 }
1224 /*
1225 * Take the next available block in this cylinder group.
1226 */
1227 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1228 if (bno < 0)
1229 return (0);
1230 cgp->cg_rotor = ufs_rw32(bno, needswap);
1231 gotit:
1232 blkno = fragstoblks(fs, bno);
1233 ffs_clrblock(fs, blksfree, blkno);
1234 ffs_clusteracct(fs, cgp, blkno, -1);
1235 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1236 fs->fs_cstotal.cs_nbfree--;
1237 fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--;
1238 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1239 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1240 int cylno;
1241 cylno = old_cbtocylno(fs, bno);
1242 KASSERT(cylno >= 0);
1243 KASSERT(cylno < fs->fs_old_ncyl);
1244 KASSERT(old_cbtorpos(fs, bno) >= 0);
1245 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bno) < fs->fs_old_nrpos);
1246 ufs_add16(old_cg_blks(fs, cgp, cylno, needswap)[old_cbtorpos(fs, bno)], -1,
1247 needswap);
1248 ufs_add32(old_cg_blktot(cgp, needswap)[cylno], -1, needswap);
1249 }
1250 fs->fs_fmod = 1;
1251 blkno = ufs_rw32(cgp->cg_cgx, needswap) * fs->fs_fpg + bno;
1252 if (DOINGSOFTDEP(ITOV(ip)))
1253 softdep_setup_blkmapdep(bp, fs, blkno);
1254 return (blkno);
1255 }
1256
1257 #ifdef XXXUBC
1258 /*
1259 * Determine whether a cluster can be allocated.
1260 *
1261 * We do not currently check for optimal rotational layout if there
1262 * are multiple choices in the same cylinder group. Instead we just
1263 * take the first one that we find following bpref.
1264 */
1265
1266 /*
1267 * This function must be fixed for UFS2 if re-enabled.
1268 */
1269 static daddr_t
1270 ffs_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
1271 {
1272 struct fs *fs;
1273 struct cg *cgp;
1274 struct buf *bp;
1275 int i, got, run, bno, bit, map;
1276 u_char *mapp;
1277 int32_t *lp;
1278
1279 fs = ip->i_fs;
1280 if (fs->fs_maxcluster[cg] < len)
1281 return (0);
1282 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1283 NOCRED, &bp))
1284 goto fail;
1285 cgp = (struct cg *)bp->b_data;
1286 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
1287 goto fail;
1288 /*
1289 * Check to see if a cluster of the needed size (or bigger) is
1290 * available in this cylinder group.
1291 */
1292 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len];
1293 for (i = len; i <= fs->fs_contigsumsize; i++)
1294 if (ufs_rw32(*lp++, UFS_FSNEEDSWAP(fs)) > 0)
1295 break;
1296 if (i > fs->fs_contigsumsize) {
1297 /*
1298 * This is the first time looking for a cluster in this
1299 * cylinder group. Update the cluster summary information
1300 * to reflect the true maximum sized cluster so that
1301 * future cluster allocation requests can avoid reading
1302 * the cylinder group map only to find no clusters.
1303 */
1304 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len - 1];
1305 for (i = len - 1; i > 0; i--)
1306 if (ufs_rw32(*lp--, UFS_FSNEEDSWAP(fs)) > 0)
1307 break;
1308 fs->fs_maxcluster[cg] = i;
1309 goto fail;
1310 }
1311 /*
1312 * Search the cluster map to find a big enough cluster.
1313 * We take the first one that we find, even if it is larger
1314 * than we need as we prefer to get one close to the previous
1315 * block allocation. We do not search before the current
1316 * preference point as we do not want to allocate a block
1317 * that is allocated before the previous one (as we will
1318 * then have to wait for another pass of the elevator
1319 * algorithm before it will be read). We prefer to fail and
1320 * be recalled to try an allocation in the next cylinder group.
1321 */
1322 if (dtog(fs, bpref) != cg)
1323 bpref = 0;
1324 else
1325 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1326 mapp = &cg_clustersfree(cgp, UFS_FSNEEDSWAP(fs))[bpref / NBBY];
1327 map = *mapp++;
1328 bit = 1 << (bpref % NBBY);
1329 for (run = 0, got = bpref;
1330 got < ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)); got++) {
1331 if ((map & bit) == 0) {
1332 run = 0;
1333 } else {
1334 run++;
1335 if (run == len)
1336 break;
1337 }
1338 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1339 bit <<= 1;
1340 } else {
1341 map = *mapp++;
1342 bit = 1;
1343 }
1344 }
1345 if (got == ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)))
1346 goto fail;
1347 /*
1348 * Allocate the cluster that we have found.
1349 */
1350 #ifdef DIAGNOSTIC
1351 for (i = 1; i <= len; i++)
1352 if (!ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1353 got - run + i))
1354 panic("ffs_clusteralloc: map mismatch");
1355 #endif
1356 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1357 if (dtog(fs, bno) != cg)
1358 panic("ffs_clusteralloc: allocated out of group");
1359 len = blkstofrags(fs, len);
1360 for (i = 0; i < len; i += fs->fs_frag)
1361 if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i)
1362 panic("ffs_clusteralloc: lost block");
1363 ACTIVECG_CLR(fs, cg);
1364 bdwrite(bp);
1365 return (bno);
1366
1367 fail:
1368 brelse(bp);
1369 return (0);
1370 }
1371 #endif /* XXXUBC */
1372
1373 /*
1374 * Determine whether an inode can be allocated.
1375 *
1376 * Check to see if an inode is available, and if it is,
1377 * allocate it using the following policy:
1378 * 1) allocate the requested inode.
1379 * 2) allocate the next available inode after the requested
1380 * inode in the specified cylinder group.
1381 */
1382 static daddr_t
1383 ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
1384 {
1385 struct fs *fs = ip->i_fs;
1386 struct cg *cgp;
1387 struct buf *bp, *ibp;
1388 u_int8_t *inosused;
1389 int error, start, len, loc, map, i;
1390 int32_t initediblk;
1391 struct ufs2_dinode *dp2;
1392 #ifdef FFS_EI
1393 const int needswap = UFS_FSNEEDSWAP(fs);
1394 #endif
1395
1396 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1397 return (0);
1398 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1399 (int)fs->fs_cgsize, NOCRED, &bp);
1400 if (error) {
1401 brelse(bp);
1402 return (0);
1403 }
1404 cgp = (struct cg *)bp->b_data;
1405 if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0) {
1406 brelse(bp);
1407 return (0);
1408 }
1409 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1410 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1411 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1412 cgp->cg_time = ufs_rw64(time_second, needswap);
1413 inosused = cg_inosused(cgp, needswap);
1414 if (ipref) {
1415 ipref %= fs->fs_ipg;
1416 if (isclr(inosused, ipref))
1417 goto gotit;
1418 }
1419 start = ufs_rw32(cgp->cg_irotor, needswap) / NBBY;
1420 len = howmany(fs->fs_ipg - ufs_rw32(cgp->cg_irotor, needswap),
1421 NBBY);
1422 loc = skpc(0xff, len, &inosused[start]);
1423 if (loc == 0) {
1424 len = start + 1;
1425 start = 0;
1426 loc = skpc(0xff, len, &inosused[0]);
1427 if (loc == 0) {
1428 printf("cg = %d, irotor = %d, fs = %s\n",
1429 cg, ufs_rw32(cgp->cg_irotor, needswap),
1430 fs->fs_fsmnt);
1431 panic("ffs_nodealloccg: map corrupted");
1432 /* NOTREACHED */
1433 }
1434 }
1435 i = start + len - loc;
1436 map = inosused[i];
1437 ipref = i * NBBY;
1438 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1439 if ((map & i) == 0) {
1440 cgp->cg_irotor = ufs_rw32(ipref, needswap);
1441 goto gotit;
1442 }
1443 }
1444 printf("fs = %s\n", fs->fs_fsmnt);
1445 panic("ffs_nodealloccg: block not in map");
1446 /* NOTREACHED */
1447 gotit:
1448 if (DOINGSOFTDEP(ITOV(ip)))
1449 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
1450 setbit(inosused, ipref);
1451 ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap);
1452 fs->fs_cstotal.cs_nifree--;
1453 fs->fs_cs(fs, cg).cs_nifree--;
1454 fs->fs_fmod = 1;
1455 if ((mode & IFMT) == IFDIR) {
1456 ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap);
1457 fs->fs_cstotal.cs_ndir++;
1458 fs->fs_cs(fs, cg).cs_ndir++;
1459 }
1460 /*
1461 * Check to see if we need to initialize more inodes.
1462 */
1463 initediblk = ufs_rw32(cgp->cg_initediblk, needswap);
1464 if (fs->fs_magic == FS_UFS2_MAGIC &&
1465 ipref + INOPB(fs) > initediblk &&
1466 initediblk < ufs_rw32(cgp->cg_niblk, needswap)) {
1467 ibp = getblk(ip->i_devvp, fsbtodb(fs,
1468 ino_to_fsba(fs, cg * fs->fs_ipg + initediblk)),
1469 (int)fs->fs_bsize, 0, 0);
1470 memset(ibp->b_data, 0, fs->fs_bsize);
1471 dp2 = (struct ufs2_dinode *)(ibp->b_data);
1472 for (i = 0; i < INOPB(fs); i++) {
1473 /*
1474 * Don't bother to swap, it's supposed to be
1475 * random, after all.
1476 */
1477 dp2->di_gen = (arc4random() & INT32_MAX) / 2 + 1;
1478 dp2++;
1479 }
1480 bawrite(ibp);
1481 initediblk += INOPB(fs);
1482 cgp->cg_initediblk = ufs_rw32(initediblk, needswap);
1483 }
1484
1485 ACTIVECG_CLR(fs, cg);
1486 bdwrite(bp);
1487 return (cg * fs->fs_ipg + ipref);
1488 }
1489
1490 /*
1491 * Free a block or fragment.
1492 *
1493 * The specified block or fragment is placed back in the
1494 * free map. If a fragment is deallocated, a possible
1495 * block reassembly is checked.
1496 */
1497 void
1498 ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size,
1499 ino_t inum)
1500 {
1501 struct cg *cgp;
1502 struct buf *bp;
1503 struct ufsmount *ump;
1504 int32_t fragno, cgbno;
1505 daddr_t cgblkno;
1506 int i, error, cg, blk, frags, bbase;
1507 u_int8_t *blksfree;
1508 dev_t dev;
1509 const int needswap = UFS_FSNEEDSWAP(fs);
1510
1511 cg = dtog(fs, bno);
1512 if (devvp->v_type != VBLK) {
1513 /* devvp is a snapshot */
1514 dev = VTOI(devvp)->i_devvp->v_rdev;
1515 cgblkno = fragstoblks(fs, cgtod(fs, cg));
1516 } else {
1517 dev = devvp->v_rdev;
1518 ump = VFSTOUFS(devvp->v_specmountpoint);
1519 cgblkno = fsbtodb(fs, cgtod(fs, cg));
1520 if (TAILQ_FIRST(&ump->um_snapshots) != NULL &&
1521 ffs_snapblkfree(fs, devvp, bno, size, inum))
1522 return;
1523 }
1524 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
1525 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
1526 printf("dev = 0x%x, bno = %" PRId64 " bsize = %d, "
1527 "size = %ld, fs = %s\n",
1528 dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
1529 panic("blkfree: bad size");
1530 }
1531
1532 if (bno >= fs->fs_size) {
1533 printf("bad block %" PRId64 ", ino %llu\n", bno,
1534 (unsigned long long)inum);
1535 ffs_fserr(fs, inum, "bad block");
1536 return;
1537 }
1538 error = bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp);
1539 if (error) {
1540 brelse(bp);
1541 return;
1542 }
1543 cgp = (struct cg *)bp->b_data;
1544 if (!cg_chkmagic(cgp, needswap)) {
1545 brelse(bp);
1546 return;
1547 }
1548 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1549 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1550 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1551 cgp->cg_time = ufs_rw64(time_second, needswap);
1552 cgbno = dtogd(fs, bno);
1553 blksfree = cg_blksfree(cgp, needswap);
1554 if (size == fs->fs_bsize) {
1555 fragno = fragstoblks(fs, cgbno);
1556 if (!ffs_isfreeblock(fs, blksfree, fragno)) {
1557 if (devvp->v_type != VBLK) {
1558 /* devvp is a snapshot */
1559 brelse(bp);
1560 return;
1561 }
1562 printf("dev = 0x%x, block = %" PRId64 ", fs = %s\n",
1563 dev, bno, fs->fs_fsmnt);
1564 panic("blkfree: freeing free block");
1565 }
1566 ffs_setblock(fs, blksfree, fragno);
1567 ffs_clusteracct(fs, cgp, fragno, 1);
1568 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1569 fs->fs_cstotal.cs_nbfree++;
1570 fs->fs_cs(fs, cg).cs_nbfree++;
1571 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1572 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1573 i = old_cbtocylno(fs, cgbno);
1574 KASSERT(i >= 0);
1575 KASSERT(i < fs->fs_old_ncyl);
1576 KASSERT(old_cbtorpos(fs, cgbno) >= 0);
1577 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, cgbno) < fs->fs_old_nrpos);
1578 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, cgbno)], 1,
1579 needswap);
1580 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1581 }
1582 } else {
1583 bbase = cgbno - fragnum(fs, cgbno);
1584 /*
1585 * decrement the counts associated with the old frags
1586 */
1587 blk = blkmap(fs, blksfree, bbase);
1588 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
1589 /*
1590 * deallocate the fragment
1591 */
1592 frags = numfrags(fs, size);
1593 for (i = 0; i < frags; i++) {
1594 if (isset(blksfree, cgbno + i)) {
1595 printf("dev = 0x%x, block = %" PRId64
1596 ", fs = %s\n",
1597 dev, bno + i, fs->fs_fsmnt);
1598 panic("blkfree: freeing free frag");
1599 }
1600 setbit(blksfree, cgbno + i);
1601 }
1602 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1603 fs->fs_cstotal.cs_nffree += i;
1604 fs->fs_cs(fs, cg).cs_nffree += i;
1605 /*
1606 * add back in counts associated with the new frags
1607 */
1608 blk = blkmap(fs, blksfree, bbase);
1609 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
1610 /*
1611 * if a complete block has been reassembled, account for it
1612 */
1613 fragno = fragstoblks(fs, bbase);
1614 if (ffs_isblock(fs, blksfree, fragno)) {
1615 ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap);
1616 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1617 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1618 ffs_clusteracct(fs, cgp, fragno, 1);
1619 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1620 fs->fs_cstotal.cs_nbfree++;
1621 fs->fs_cs(fs, cg).cs_nbfree++;
1622 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1623 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1624 i = old_cbtocylno(fs, bbase);
1625 KASSERT(i >= 0);
1626 KASSERT(i < fs->fs_old_ncyl);
1627 KASSERT(old_cbtorpos(fs, bbase) >= 0);
1628 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bbase) < fs->fs_old_nrpos);
1629 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs,
1630 bbase)], 1, needswap);
1631 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1632 }
1633 }
1634 }
1635 fs->fs_fmod = 1;
1636 ACTIVECG_CLR(fs, cg);
1637 bdwrite(bp);
1638 }
1639
1640 #if defined(DIAGNOSTIC) || defined(DEBUG)
1641 #ifdef XXXUBC
1642 /*
1643 * Verify allocation of a block or fragment. Returns true if block or
1644 * fragment is allocated, false if it is free.
1645 */
1646 static int
1647 ffs_checkblk(struct inode *ip, daddr_t bno, long size)
1648 {
1649 struct fs *fs;
1650 struct cg *cgp;
1651 struct buf *bp;
1652 int i, error, frags, free;
1653
1654 fs = ip->i_fs;
1655 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1656 printf("bsize = %d, size = %ld, fs = %s\n",
1657 fs->fs_bsize, size, fs->fs_fsmnt);
1658 panic("checkblk: bad size");
1659 }
1660 if (bno >= fs->fs_size)
1661 panic("checkblk: bad block %d", bno);
1662 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1663 (int)fs->fs_cgsize, NOCRED, &bp);
1664 if (error) {
1665 brelse(bp);
1666 return 0;
1667 }
1668 cgp = (struct cg *)bp->b_data;
1669 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
1670 brelse(bp);
1671 return 0;
1672 }
1673 bno = dtogd(fs, bno);
1674 if (size == fs->fs_bsize) {
1675 free = ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1676 fragstoblks(fs, bno));
1677 } else {
1678 frags = numfrags(fs, size);
1679 for (free = 0, i = 0; i < frags; i++)
1680 if (isset(cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)), bno + i))
1681 free++;
1682 if (free != 0 && free != frags)
1683 panic("checkblk: partially free fragment");
1684 }
1685 brelse(bp);
1686 return (!free);
1687 }
1688 #endif /* XXXUBC */
1689 #endif /* DIAGNOSTIC */
1690
1691 /*
1692 * Free an inode.
1693 */
1694 int
1695 ffs_vfree(struct vnode *vp, ino_t ino, int mode)
1696 {
1697
1698 if (DOINGSOFTDEP(vp)) {
1699 softdep_freefile(vp, ino, mode);
1700 return (0);
1701 }
1702 return ffs_freefile(VTOI(vp)->i_fs, VTOI(vp)->i_devvp, ino, mode);
1703 }
1704
1705 /*
1706 * Do the actual free operation.
1707 * The specified inode is placed back in the free map.
1708 */
1709 int
1710 ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
1711 {
1712 struct cg *cgp;
1713 struct buf *bp;
1714 int error, cg;
1715 daddr_t cgbno;
1716 u_int8_t *inosused;
1717 dev_t dev;
1718 #ifdef FFS_EI
1719 const int needswap = UFS_FSNEEDSWAP(fs);
1720 #endif
1721
1722 cg = ino_to_cg(fs, ino);
1723 if (devvp->v_type != VBLK) {
1724 /* devvp is a snapshot */
1725 dev = VTOI(devvp)->i_devvp->v_rdev;
1726 cgbno = fragstoblks(fs, cgtod(fs, cg));
1727 } else {
1728 dev = devvp->v_rdev;
1729 cgbno = fsbtodb(fs, cgtod(fs, cg));
1730 }
1731 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1732 panic("ifree: range: dev = 0x%x, ino = %llu, fs = %s",
1733 dev, (unsigned long long)ino, fs->fs_fsmnt);
1734 error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp);
1735 if (error) {
1736 brelse(bp);
1737 return (error);
1738 }
1739 cgp = (struct cg *)bp->b_data;
1740 if (!cg_chkmagic(cgp, needswap)) {
1741 brelse(bp);
1742 return (0);
1743 }
1744 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1745 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1746 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1747 cgp->cg_time = ufs_rw64(time_second, needswap);
1748 inosused = cg_inosused(cgp, needswap);
1749 ino %= fs->fs_ipg;
1750 if (isclr(inosused, ino)) {
1751 printf("ifree: dev = 0x%x, ino = %llu, fs = %s\n",
1752 dev, (unsigned long long)ino + cg * fs->fs_ipg,
1753 fs->fs_fsmnt);
1754 if (fs->fs_ronly == 0)
1755 panic("ifree: freeing free inode");
1756 }
1757 clrbit(inosused, ino);
1758 if (ino < ufs_rw32(cgp->cg_irotor, needswap))
1759 cgp->cg_irotor = ufs_rw32(ino, needswap);
1760 ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap);
1761 fs->fs_cstotal.cs_nifree++;
1762 fs->fs_cs(fs, cg).cs_nifree++;
1763 if ((mode & IFMT) == IFDIR) {
1764 ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap);
1765 fs->fs_cstotal.cs_ndir--;
1766 fs->fs_cs(fs, cg).cs_ndir--;
1767 }
1768 fs->fs_fmod = 1;
1769 ACTIVECG_CLR(fs, cg);
1770 bdwrite(bp);
1771 return (0);
1772 }
1773
1774 /*
1775 * Check to see if a file is free.
1776 */
1777 int
1778 ffs_checkfreefile(struct fs *fs, struct vnode *devvp, ino_t ino)
1779 {
1780 struct cg *cgp;
1781 struct buf *bp;
1782 daddr_t cgbno;
1783 int ret, cg;
1784 u_int8_t *inosused;
1785
1786 cg = ino_to_cg(fs, ino);
1787 if (devvp->v_type != VBLK) {
1788 /* devvp is a snapshot */
1789 cgbno = fragstoblks(fs, cgtod(fs, cg));
1790 } else
1791 cgbno = fsbtodb(fs, cgtod(fs, cg));
1792 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1793 return 1;
1794 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) {
1795 brelse(bp);
1796 return 1;
1797 }
1798 cgp = (struct cg *)bp->b_data;
1799 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
1800 brelse(bp);
1801 return 1;
1802 }
1803 inosused = cg_inosused(cgp, UFS_FSNEEDSWAP(fs));
1804 ino %= fs->fs_ipg;
1805 ret = isclr(inosused, ino);
1806 brelse(bp);
1807 return ret;
1808 }
1809
1810 /*
1811 * Find a block of the specified size in the specified cylinder group.
1812 *
1813 * It is a panic if a request is made to find a block if none are
1814 * available.
1815 */
1816 static int32_t
1817 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz)
1818 {
1819 int32_t bno;
1820 int start, len, loc, i;
1821 int blk, field, subfield, pos;
1822 int ostart, olen;
1823 u_int8_t *blksfree;
1824 #ifdef FFS_EI
1825 const int needswap = UFS_FSNEEDSWAP(fs);
1826 #endif
1827
1828 /*
1829 * find the fragment by searching through the free block
1830 * map for an appropriate bit pattern
1831 */
1832 if (bpref)
1833 start = dtogd(fs, bpref) / NBBY;
1834 else
1835 start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY;
1836 blksfree = cg_blksfree(cgp, needswap);
1837 len = howmany(fs->fs_fpg, NBBY) - start;
1838 ostart = start;
1839 olen = len;
1840 loc = scanc((u_int)len,
1841 (const u_char *)&blksfree[start],
1842 (const u_char *)fragtbl[fs->fs_frag],
1843 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
1844 if (loc == 0) {
1845 len = start + 1;
1846 start = 0;
1847 loc = scanc((u_int)len,
1848 (const u_char *)&blksfree[0],
1849 (const u_char *)fragtbl[fs->fs_frag],
1850 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
1851 if (loc == 0) {
1852 printf("start = %d, len = %d, fs = %s\n",
1853 ostart, olen, fs->fs_fsmnt);
1854 printf("offset=%d %ld\n",
1855 ufs_rw32(cgp->cg_freeoff, needswap),
1856 (long)blksfree - (long)cgp);
1857 printf("cg %d\n", cgp->cg_cgx);
1858 panic("ffs_alloccg: map corrupted");
1859 /* NOTREACHED */
1860 }
1861 }
1862 bno = (start + len - loc) * NBBY;
1863 cgp->cg_frotor = ufs_rw32(bno, needswap);
1864 /*
1865 * found the byte in the map
1866 * sift through the bits to find the selected frag
1867 */
1868 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1869 blk = blkmap(fs, blksfree, bno);
1870 blk <<= 1;
1871 field = around[allocsiz];
1872 subfield = inside[allocsiz];
1873 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1874 if ((blk & field) == subfield)
1875 return (bno + pos);
1876 field <<= 1;
1877 subfield <<= 1;
1878 }
1879 }
1880 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1881 panic("ffs_alloccg: block not in map");
1882 /* return (-1); */
1883 }
1884
1885 /*
1886 * Update the cluster map because of an allocation or free.
1887 *
1888 * Cnt == 1 means free; cnt == -1 means allocating.
1889 */
1890 void
1891 ffs_clusteracct(struct fs *fs, struct cg *cgp, int32_t blkno, int cnt)
1892 {
1893 int32_t *sump;
1894 int32_t *lp;
1895 u_char *freemapp, *mapp;
1896 int i, start, end, forw, back, map, bit;
1897 #ifdef FFS_EI
1898 const int needswap = UFS_FSNEEDSWAP(fs);
1899 #endif
1900
1901 if (fs->fs_contigsumsize <= 0)
1902 return;
1903 freemapp = cg_clustersfree(cgp, needswap);
1904 sump = cg_clustersum(cgp, needswap);
1905 /*
1906 * Allocate or clear the actual block.
1907 */
1908 if (cnt > 0)
1909 setbit(freemapp, blkno);
1910 else
1911 clrbit(freemapp, blkno);
1912 /*
1913 * Find the size of the cluster going forward.
1914 */
1915 start = blkno + 1;
1916 end = start + fs->fs_contigsumsize;
1917 if (end >= ufs_rw32(cgp->cg_nclusterblks, needswap))
1918 end = ufs_rw32(cgp->cg_nclusterblks, needswap);
1919 mapp = &freemapp[start / NBBY];
1920 map = *mapp++;
1921 bit = 1 << (start % NBBY);
1922 for (i = start; i < end; i++) {
1923 if ((map & bit) == 0)
1924 break;
1925 if ((i & (NBBY - 1)) != (NBBY - 1)) {
1926 bit <<= 1;
1927 } else {
1928 map = *mapp++;
1929 bit = 1;
1930 }
1931 }
1932 forw = i - start;
1933 /*
1934 * Find the size of the cluster going backward.
1935 */
1936 start = blkno - 1;
1937 end = start - fs->fs_contigsumsize;
1938 if (end < 0)
1939 end = -1;
1940 mapp = &freemapp[start / NBBY];
1941 map = *mapp--;
1942 bit = 1 << (start % NBBY);
1943 for (i = start; i > end; i--) {
1944 if ((map & bit) == 0)
1945 break;
1946 if ((i & (NBBY - 1)) != 0) {
1947 bit >>= 1;
1948 } else {
1949 map = *mapp--;
1950 bit = 1 << (NBBY - 1);
1951 }
1952 }
1953 back = start - i;
1954 /*
1955 * Account for old cluster and the possibly new forward and
1956 * back clusters.
1957 */
1958 i = back + forw + 1;
1959 if (i > fs->fs_contigsumsize)
1960 i = fs->fs_contigsumsize;
1961 ufs_add32(sump[i], cnt, needswap);
1962 if (back > 0)
1963 ufs_add32(sump[back], -cnt, needswap);
1964 if (forw > 0)
1965 ufs_add32(sump[forw], -cnt, needswap);
1966
1967 /*
1968 * Update cluster summary information.
1969 */
1970 lp = &sump[fs->fs_contigsumsize];
1971 for (i = fs->fs_contigsumsize; i > 0; i--)
1972 if (ufs_rw32(*lp--, needswap) > 0)
1973 break;
1974 fs->fs_maxcluster[ufs_rw32(cgp->cg_cgx, needswap)] = i;
1975 }
1976
1977 /*
1978 * Fserr prints the name of a file system with an error diagnostic.
1979 *
1980 * The form of the error message is:
1981 * fs: error message
1982 */
1983 static void
1984 ffs_fserr(struct fs *fs, u_int uid, const char *cp)
1985 {
1986
1987 log(LOG_ERR, "uid %d, pid %d, command %s, on %s: %s\n",
1988 uid, curproc->p_pid, curproc->p_comm, fs->fs_fsmnt, cp);
1989 }
1990