ffs_alloc.c revision 1.105 1 /* $NetBSD: ffs_alloc.c,v 1.105 2008/01/02 11:49:08 ad Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.105 2008/01/02 11:49:08 ad Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_ffs.h"
48 #include "opt_quota.h"
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/buf.h>
54 #include <sys/proc.h>
55 #include <sys/vnode.h>
56 #include <sys/mount.h>
57 #include <sys/kernel.h>
58 #include <sys/syslog.h>
59 #include <sys/kauth.h>
60
61 #include <miscfs/specfs/specdev.h>
62 #include <ufs/ufs/quota.h>
63 #include <ufs/ufs/ufsmount.h>
64 #include <ufs/ufs/inode.h>
65 #include <ufs/ufs/ufs_extern.h>
66 #include <ufs/ufs/ufs_bswap.h>
67
68 #include <ufs/ffs/fs.h>
69 #include <ufs/ffs/ffs_extern.h>
70
71 static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int);
72 static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t);
73 #ifdef XXXUBC
74 static daddr_t ffs_clusteralloc(struct inode *, int, daddr_t, int);
75 #endif
76 static ino_t ffs_dirpref(struct inode *);
77 static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int);
78 static void ffs_fserr(struct fs *, u_int, const char *);
79 static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int,
80 daddr_t (*)(struct inode *, int, daddr_t, int));
81 static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int);
82 static int32_t ffs_mapsearch(struct fs *, struct cg *,
83 daddr_t, int);
84 #if defined(DIAGNOSTIC) || defined(DEBUG)
85 #ifdef XXXUBC
86 static int ffs_checkblk(struct inode *, daddr_t, long size);
87 #endif
88 #endif
89
90 /* if 1, changes in optimalization strategy are logged */
91 int ffs_log_changeopt = 0;
92
93 /* in ffs_tables.c */
94 extern const int inside[], around[];
95 extern const u_char * const fragtbl[];
96
97 /*
98 * Allocate a block in the file system.
99 *
100 * The size of the requested block is given, which must be some
101 * multiple of fs_fsize and <= fs_bsize.
102 * A preference may be optionally specified. If a preference is given
103 * the following hierarchy is used to allocate a block:
104 * 1) allocate the requested block.
105 * 2) allocate a rotationally optimal block in the same cylinder.
106 * 3) allocate a block in the same cylinder group.
107 * 4) quadradically rehash into other cylinder groups, until an
108 * available block is located.
109 * If no block preference is given the following hierarchy is used
110 * to allocate a block:
111 * 1) allocate a block in the cylinder group that contains the
112 * inode for the file.
113 * 2) quadradically rehash into other cylinder groups, until an
114 * available block is located.
115 */
116 int
117 ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
118 kauth_cred_t cred, daddr_t *bnp)
119 {
120 struct ufsmount *ump;
121 struct fs *fs;
122 daddr_t bno;
123 int cg;
124 #ifdef QUOTA
125 int error;
126 #endif
127
128 fs = ip->i_fs;
129 ump = ip->i_ump;
130
131 KASSERT(mutex_owned(&ump->um_lock));
132
133 #ifdef UVM_PAGE_TRKOWN
134 if (ITOV(ip)->v_type == VREG &&
135 lblktosize(fs, (voff_t)lbn) < round_page(ITOV(ip)->v_size)) {
136 struct vm_page *pg;
137 struct uvm_object *uobj = &ITOV(ip)->v_uobj;
138 voff_t off = trunc_page(lblktosize(fs, lbn));
139 voff_t endoff = round_page(lblktosize(fs, lbn) + size);
140
141 mutex_enter(&uobj->vmobjlock);
142 while (off < endoff) {
143 pg = uvm_pagelookup(uobj, off);
144 KASSERT(pg != NULL);
145 KASSERT(pg->owner == curproc->p_pid);
146 off += PAGE_SIZE;
147 }
148 mutex_exit(&uobj->vmobjlock);
149 }
150 #endif
151
152 *bnp = 0;
153 #ifdef DIAGNOSTIC
154 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
155 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
156 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
157 panic("ffs_alloc: bad size");
158 }
159 if (cred == NOCRED)
160 panic("ffs_alloc: missing credential");
161 #endif /* DIAGNOSTIC */
162 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
163 goto nospace;
164 if (freespace(fs, fs->fs_minfree) <= 0 &&
165 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0)
166 goto nospace;
167 #ifdef QUOTA
168 mutex_exit(&ump->um_lock);
169 if ((error = chkdq(ip, btodb(size), cred, 0)) != 0)
170 return (error);
171 mutex_enter(&ump->um_lock);
172 #endif
173 if (bpref >= fs->fs_size)
174 bpref = 0;
175 if (bpref == 0)
176 cg = ino_to_cg(fs, ip->i_number);
177 else
178 cg = dtog(fs, bpref);
179 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg);
180 if (bno > 0) {
181 DIP_ADD(ip, blocks, btodb(size));
182 ip->i_flag |= IN_CHANGE | IN_UPDATE;
183 *bnp = bno;
184 return (0);
185 }
186 #ifdef QUOTA
187 /*
188 * Restore user's disk quota because allocation failed.
189 */
190 (void) chkdq(ip, -btodb(size), cred, FORCE);
191 #endif
192 nospace:
193 mutex_exit(&ump->um_lock);
194 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
195 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
196 return (ENOSPC);
197 }
198
199 /*
200 * Reallocate a fragment to a bigger size
201 *
202 * The number and size of the old block is given, and a preference
203 * and new size is also specified. The allocator attempts to extend
204 * the original block. Failing that, the regular block allocator is
205 * invoked to get an appropriate block.
206 */
207 int
208 ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
209 int nsize, kauth_cred_t cred, struct buf **bpp, daddr_t *blknop)
210 {
211 struct ufsmount *ump;
212 struct fs *fs;
213 struct buf *bp;
214 int cg, request, error;
215 daddr_t bprev, bno;
216
217 fs = ip->i_fs;
218 ump = ip->i_ump;
219
220 KASSERT(mutex_owned(&ump->um_lock));
221
222 #ifdef UVM_PAGE_TRKOWN
223 if (ITOV(ip)->v_type == VREG) {
224 struct vm_page *pg;
225 struct uvm_object *uobj = &ITOV(ip)->v_uobj;
226 voff_t off = trunc_page(lblktosize(fs, lbprev));
227 voff_t endoff = round_page(lblktosize(fs, lbprev) + osize);
228
229 mutex_enter(&uobj->vmobjlock);
230 while (off < endoff) {
231 pg = uvm_pagelookup(uobj, off);
232 KASSERT(pg != NULL);
233 KASSERT(pg->owner == curproc->p_pid);
234 KASSERT((pg->flags & PG_CLEAN) == 0);
235 off += PAGE_SIZE;
236 }
237 mutex_exit(&uobj->vmobjlock);
238 }
239 #endif
240
241 #ifdef DIAGNOSTIC
242 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
243 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
244 printf(
245 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
246 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
247 panic("ffs_realloccg: bad size");
248 }
249 if (cred == NOCRED)
250 panic("ffs_realloccg: missing credential");
251 #endif /* DIAGNOSTIC */
252 if (freespace(fs, fs->fs_minfree) <= 0 &&
253 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0) {
254 mutex_exit(&ump->um_lock);
255 goto nospace;
256 }
257 if (fs->fs_magic == FS_UFS2_MAGIC)
258 bprev = ufs_rw64(ip->i_ffs2_db[lbprev], UFS_FSNEEDSWAP(fs));
259 else
260 bprev = ufs_rw32(ip->i_ffs1_db[lbprev], UFS_FSNEEDSWAP(fs));
261
262 if (bprev == 0) {
263 printf("dev = 0x%x, bsize = %d, bprev = %" PRId64 ", fs = %s\n",
264 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
265 panic("ffs_realloccg: bad bprev");
266 }
267 mutex_exit(&ump->um_lock);
268
269 /*
270 * Allocate the extra space in the buffer.
271 */
272 if (bpp != NULL &&
273 (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) != 0) {
274 brelse(bp, 0);
275 return (error);
276 }
277 #ifdef QUOTA
278 if ((error = chkdq(ip, btodb(nsize - osize), cred, 0)) != 0) {
279 if (bpp != NULL) {
280 brelse(bp, 0);
281 }
282 return (error);
283 }
284 #endif
285 /*
286 * Check for extension in the existing location.
287 */
288 cg = dtog(fs, bprev);
289 mutex_enter(&ump->um_lock);
290 if ((bno = ffs_fragextend(ip, cg, bprev, osize, nsize)) != 0) {
291 DIP_ADD(ip, blocks, btodb(nsize - osize));
292 ip->i_flag |= IN_CHANGE | IN_UPDATE;
293
294 if (bpp != NULL) {
295 if (bp->b_blkno != fsbtodb(fs, bno))
296 panic("bad blockno");
297 allocbuf(bp, nsize, 1);
298 memset((char *)bp->b_data + osize, 0, nsize - osize);
299 mutex_enter(bp->b_objlock);
300 bp->b_oflags |= BO_DONE;
301 mutex_exit(bp->b_objlock);
302 *bpp = bp;
303 }
304 if (blknop != NULL) {
305 *blknop = bno;
306 }
307 return (0);
308 }
309 /*
310 * Allocate a new disk location.
311 */
312 if (bpref >= fs->fs_size)
313 bpref = 0;
314 switch ((int)fs->fs_optim) {
315 case FS_OPTSPACE:
316 /*
317 * Allocate an exact sized fragment. Although this makes
318 * best use of space, we will waste time relocating it if
319 * the file continues to grow. If the fragmentation is
320 * less than half of the minimum free reserve, we choose
321 * to begin optimizing for time.
322 */
323 request = nsize;
324 if (fs->fs_minfree < 5 ||
325 fs->fs_cstotal.cs_nffree >
326 fs->fs_dsize * fs->fs_minfree / (2 * 100))
327 break;
328
329 if (ffs_log_changeopt) {
330 log(LOG_NOTICE,
331 "%s: optimization changed from SPACE to TIME\n",
332 fs->fs_fsmnt);
333 }
334
335 fs->fs_optim = FS_OPTTIME;
336 break;
337 case FS_OPTTIME:
338 /*
339 * At this point we have discovered a file that is trying to
340 * grow a small fragment to a larger fragment. To save time,
341 * we allocate a full sized block, then free the unused portion.
342 * If the file continues to grow, the `ffs_fragextend' call
343 * above will be able to grow it in place without further
344 * copying. If aberrant programs cause disk fragmentation to
345 * grow within 2% of the free reserve, we choose to begin
346 * optimizing for space.
347 */
348 request = fs->fs_bsize;
349 if (fs->fs_cstotal.cs_nffree <
350 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
351 break;
352
353 if (ffs_log_changeopt) {
354 log(LOG_NOTICE,
355 "%s: optimization changed from TIME to SPACE\n",
356 fs->fs_fsmnt);
357 }
358
359 fs->fs_optim = FS_OPTSPACE;
360 break;
361 default:
362 printf("dev = 0x%x, optim = %d, fs = %s\n",
363 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
364 panic("ffs_realloccg: bad optim");
365 /* NOTREACHED */
366 }
367 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg);
368 if (bno > 0) {
369 if (!DOINGSOFTDEP(ITOV(ip)))
370 ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
371 ip->i_number);
372 if (nsize < request)
373 ffs_blkfree(fs, ip->i_devvp, bno + numfrags(fs, nsize),
374 (long)(request - nsize), ip->i_number);
375 DIP_ADD(ip, blocks, btodb(nsize - osize));
376 ip->i_flag |= IN_CHANGE | IN_UPDATE;
377 if (bpp != NULL) {
378 bp->b_blkno = fsbtodb(fs, bno);
379 allocbuf(bp, nsize, 1);
380 memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize);
381 mutex_enter(bp->b_objlock);
382 bp->b_oflags |= BO_DONE;
383 mutex_exit(bp->b_objlock);
384 *bpp = bp;
385 }
386 if (blknop != NULL) {
387 *blknop = bno;
388 }
389 return (0);
390 }
391 mutex_exit(&ump->um_lock);
392
393 #ifdef QUOTA
394 /*
395 * Restore user's disk quota because allocation failed.
396 */
397 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE);
398 #endif
399 if (bpp != NULL) {
400 brelse(bp, 0);
401 }
402
403 nospace:
404 /*
405 * no space available
406 */
407 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
408 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
409 return (ENOSPC);
410 }
411
412 #if 0
413 /*
414 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
415 *
416 * The vnode and an array of buffer pointers for a range of sequential
417 * logical blocks to be made contiguous is given. The allocator attempts
418 * to find a range of sequential blocks starting as close as possible
419 * from the end of the allocation for the logical block immediately
420 * preceding the current range. If successful, the physical block numbers
421 * in the buffer pointers and in the inode are changed to reflect the new
422 * allocation. If unsuccessful, the allocation is left unchanged. The
423 * success in doing the reallocation is returned. Note that the error
424 * return is not reflected back to the user. Rather the previous block
425 * allocation will be used.
426
427 */
428 #ifdef XXXUBC
429 #ifdef DEBUG
430 #include <sys/sysctl.h>
431 int prtrealloc = 0;
432 struct ctldebug debug15 = { "prtrealloc", &prtrealloc };
433 #endif
434 #endif
435
436 /*
437 * NOTE: when re-enabling this, it must be updated for UFS2.
438 */
439
440 int doasyncfree = 1;
441
442 int
443 ffs_reallocblks(void *v)
444 {
445 #ifdef XXXUBC
446 struct vop_reallocblks_args /* {
447 struct vnode *a_vp;
448 struct cluster_save *a_buflist;
449 } */ *ap = v;
450 struct fs *fs;
451 struct inode *ip;
452 struct vnode *vp;
453 struct buf *sbp, *ebp;
454 int32_t *bap, *ebap = NULL, *sbap; /* XXX ondisk32 */
455 struct cluster_save *buflist;
456 daddr_t start_lbn, end_lbn, soff, newblk, blkno;
457 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
458 int i, len, start_lvl, end_lvl, pref, ssize;
459 struct ufsmount *ump;
460 #endif /* XXXUBC */
461
462 /* XXXUBC don't reallocblks for now */
463 return ENOSPC;
464
465 #ifdef XXXUBC
466 vp = ap->a_vp;
467 ip = VTOI(vp);
468 fs = ip->i_fs;
469 ump = ip->i_ump;
470 if (fs->fs_contigsumsize <= 0)
471 return (ENOSPC);
472 buflist = ap->a_buflist;
473 len = buflist->bs_nchildren;
474 start_lbn = buflist->bs_children[0]->b_lblkno;
475 end_lbn = start_lbn + len - 1;
476 #ifdef DIAGNOSTIC
477 for (i = 0; i < len; i++)
478 if (!ffs_checkblk(ip,
479 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
480 panic("ffs_reallocblks: unallocated block 1");
481 for (i = 1; i < len; i++)
482 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
483 panic("ffs_reallocblks: non-logical cluster");
484 blkno = buflist->bs_children[0]->b_blkno;
485 ssize = fsbtodb(fs, fs->fs_frag);
486 for (i = 1; i < len - 1; i++)
487 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
488 panic("ffs_reallocblks: non-physical cluster %d", i);
489 #endif
490 /*
491 * If the latest allocation is in a new cylinder group, assume that
492 * the filesystem has decided to move and do not force it back to
493 * the previous cylinder group.
494 */
495 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
496 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
497 return (ENOSPC);
498 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
499 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
500 return (ENOSPC);
501 /*
502 * Get the starting offset and block map for the first block.
503 */
504 if (start_lvl == 0) {
505 sbap = &ip->i_ffs1_db[0];
506 soff = start_lbn;
507 } else {
508 idp = &start_ap[start_lvl - 1];
509 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
510 brelse(sbp, 0);
511 return (ENOSPC);
512 }
513 sbap = (int32_t *)sbp->b_data;
514 soff = idp->in_off;
515 }
516 /*
517 * Find the preferred location for the cluster.
518 */
519 mutex_enter(&ump->um_lock);
520 pref = ffs_blkpref(ip, start_lbn, soff, sbap);
521 /*
522 * If the block range spans two block maps, get the second map.
523 */
524 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
525 ssize = len;
526 } else {
527 #ifdef DIAGNOSTIC
528 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
529 panic("ffs_reallocblk: start == end");
530 #endif
531 ssize = len - (idp->in_off + 1);
532 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
533 goto fail;
534 ebap = (int32_t *)ebp->b_data; /* XXX ondisk32 */
535 }
536 /*
537 * Search the block map looking for an allocation of the desired size.
538 */
539 if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
540 len, ffs_clusteralloc)) == 0) {
541 mutex_exit(&ump->um_lock);
542 goto fail;
543 }
544 /*
545 * We have found a new contiguous block.
546 *
547 * First we have to replace the old block pointers with the new
548 * block pointers in the inode and indirect blocks associated
549 * with the file.
550 */
551 #ifdef DEBUG
552 if (prtrealloc)
553 printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
554 start_lbn, end_lbn);
555 #endif
556 blkno = newblk;
557 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
558 daddr_t ba;
559
560 if (i == ssize) {
561 bap = ebap;
562 soff = -i;
563 }
564 /* XXX ondisk32 */
565 ba = ufs_rw32(*bap, UFS_FSNEEDSWAP(fs));
566 #ifdef DIAGNOSTIC
567 if (!ffs_checkblk(ip,
568 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
569 panic("ffs_reallocblks: unallocated block 2");
570 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != ba)
571 panic("ffs_reallocblks: alloc mismatch");
572 #endif
573 #ifdef DEBUG
574 if (prtrealloc)
575 printf(" %d,", ba);
576 #endif
577 if (DOINGSOFTDEP(vp)) {
578 if (sbap == &ip->i_ffs1_db[0] && i < ssize)
579 softdep_setup_allocdirect(ip, start_lbn + i,
580 blkno, ba, fs->fs_bsize, fs->fs_bsize,
581 buflist->bs_children[i]);
582 else
583 softdep_setup_allocindir_page(ip, start_lbn + i,
584 i < ssize ? sbp : ebp, soff + i, blkno,
585 ba, buflist->bs_children[i]);
586 }
587 /* XXX ondisk32 */
588 *bap++ = ufs_rw32((u_int32_t)blkno, UFS_FSNEEDSWAP(fs));
589 }
590 /*
591 * Next we must write out the modified inode and indirect blocks.
592 * For strict correctness, the writes should be synchronous since
593 * the old block values may have been written to disk. In practise
594 * they are almost never written, but if we are concerned about
595 * strict correctness, the `doasyncfree' flag should be set to zero.
596 *
597 * The test on `doasyncfree' should be changed to test a flag
598 * that shows whether the associated buffers and inodes have
599 * been written. The flag should be set when the cluster is
600 * started and cleared whenever the buffer or inode is flushed.
601 * We can then check below to see if it is set, and do the
602 * synchronous write only when it has been cleared.
603 */
604 if (sbap != &ip->i_ffs1_db[0]) {
605 if (doasyncfree)
606 bdwrite(sbp);
607 else
608 bwrite(sbp);
609 } else {
610 ip->i_flag |= IN_CHANGE | IN_UPDATE;
611 if (!doasyncfree)
612 ffs_update(vp, NULL, NULL, 1);
613 }
614 if (ssize < len) {
615 if (doasyncfree)
616 bdwrite(ebp);
617 else
618 bwrite(ebp);
619 }
620 /*
621 * Last, free the old blocks and assign the new blocks to the buffers.
622 */
623 #ifdef DEBUG
624 if (prtrealloc)
625 printf("\n\tnew:");
626 #endif
627 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
628 if (!DOINGSOFTDEP(vp))
629 ffs_blkfree(fs, ip->i_devvp,
630 dbtofsb(fs, buflist->bs_children[i]->b_blkno),
631 fs->fs_bsize, ip->i_number);
632 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
633 #ifdef DEBUG
634 if (!ffs_checkblk(ip,
635 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
636 panic("ffs_reallocblks: unallocated block 3");
637 if (prtrealloc)
638 printf(" %d,", blkno);
639 #endif
640 }
641 #ifdef DEBUG
642 if (prtrealloc) {
643 prtrealloc--;
644 printf("\n");
645 }
646 #endif
647 return (0);
648
649 fail:
650 if (ssize < len)
651 brelse(ebp, 0);
652 if (sbap != &ip->i_ffs1_db[0])
653 brelse(sbp, 0);
654 return (ENOSPC);
655 #endif /* XXXUBC */
656 }
657 #endif /* 0 */
658
659 /*
660 * Allocate an inode in the file system.
661 *
662 * If allocating a directory, use ffs_dirpref to select the inode.
663 * If allocating in a directory, the following hierarchy is followed:
664 * 1) allocate the preferred inode.
665 * 2) allocate an inode in the same cylinder group.
666 * 3) quadradically rehash into other cylinder groups, until an
667 * available inode is located.
668 * If no inode preference is given the following hierarchy is used
669 * to allocate an inode:
670 * 1) allocate an inode in cylinder group 0.
671 * 2) quadradically rehash into other cylinder groups, until an
672 * available inode is located.
673 */
674 int
675 ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
676 struct vnode **vpp)
677 {
678 struct ufsmount *ump;
679 struct inode *pip;
680 struct fs *fs;
681 struct inode *ip;
682 struct timespec ts;
683 ino_t ino, ipref;
684 int cg, error;
685
686 *vpp = NULL;
687 pip = VTOI(pvp);
688 fs = pip->i_fs;
689 ump = pip->i_ump;
690
691 mutex_enter(&ump->um_lock);
692 if (fs->fs_cstotal.cs_nifree == 0)
693 goto noinodes;
694
695 if ((mode & IFMT) == IFDIR)
696 ipref = ffs_dirpref(pip);
697 else
698 ipref = pip->i_number;
699 if (ipref >= fs->fs_ncg * fs->fs_ipg)
700 ipref = 0;
701 cg = ino_to_cg(fs, ipref);
702 /*
703 * Track number of dirs created one after another
704 * in a same cg without intervening by files.
705 */
706 if ((mode & IFMT) == IFDIR) {
707 if (fs->fs_contigdirs[cg] < 255)
708 fs->fs_contigdirs[cg]++;
709 } else {
710 if (fs->fs_contigdirs[cg] > 0)
711 fs->fs_contigdirs[cg]--;
712 }
713 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, ffs_nodealloccg);
714 if (ino == 0)
715 goto noinodes;
716 error = VFS_VGET(pvp->v_mount, ino, vpp);
717 if (error) {
718 ffs_vfree(pvp, ino, mode);
719 return (error);
720 }
721 KASSERT((*vpp)->v_type == VNON);
722 ip = VTOI(*vpp);
723 if (ip->i_mode) {
724 #if 0
725 printf("mode = 0%o, inum = %d, fs = %s\n",
726 ip->i_mode, ip->i_number, fs->fs_fsmnt);
727 #else
728 printf("dmode %x mode %x dgen %x gen %x\n",
729 DIP(ip, mode), ip->i_mode,
730 DIP(ip, gen), ip->i_gen);
731 printf("size %llx blocks %llx\n",
732 (long long)DIP(ip, size), (long long)DIP(ip, blocks));
733 printf("ino %llu ipref %llu\n", (unsigned long long)ino,
734 (unsigned long long)ipref);
735 #if 0
736 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
737 (int)fs->fs_bsize, NOCRED, &bp);
738 #endif
739
740 #endif
741 panic("ffs_valloc: dup alloc");
742 }
743 if (DIP(ip, blocks)) { /* XXX */
744 printf("free inode %s/%llu had %" PRId64 " blocks\n",
745 fs->fs_fsmnt, (unsigned long long)ino, DIP(ip, blocks));
746 DIP_ASSIGN(ip, blocks, 0);
747 }
748 ip->i_flag &= ~IN_SPACECOUNTED;
749 ip->i_flags = 0;
750 DIP_ASSIGN(ip, flags, 0);
751 /*
752 * Set up a new generation number for this inode.
753 */
754 ip->i_gen++;
755 DIP_ASSIGN(ip, gen, ip->i_gen);
756 if (fs->fs_magic == FS_UFS2_MAGIC) {
757 vfs_timestamp(&ts);
758 ip->i_ffs2_birthtime = ts.tv_sec;
759 ip->i_ffs2_birthnsec = ts.tv_nsec;
760 }
761 return (0);
762 noinodes:
763 mutex_exit(&ump->um_lock);
764 ffs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
765 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
766 return (ENOSPC);
767 }
768
769 /*
770 * Find a cylinder group in which to place a directory.
771 *
772 * The policy implemented by this algorithm is to allocate a
773 * directory inode in the same cylinder group as its parent
774 * directory, but also to reserve space for its files inodes
775 * and data. Restrict the number of directories which may be
776 * allocated one after another in the same cylinder group
777 * without intervening allocation of files.
778 *
779 * If we allocate a first level directory then force allocation
780 * in another cylinder group.
781 */
782 static ino_t
783 ffs_dirpref(struct inode *pip)
784 {
785 register struct fs *fs;
786 int cg, prefcg;
787 int64_t dirsize, cgsize, curdsz;
788 int avgifree, avgbfree, avgndir;
789 int minifree, minbfree, maxndir;
790 int mincg, minndir;
791 int maxcontigdirs;
792
793 KASSERT(mutex_owned(&pip->i_ump->um_lock));
794
795 fs = pip->i_fs;
796
797 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
798 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
799 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
800
801 /*
802 * Force allocation in another cg if creating a first level dir.
803 */
804 if (ITOV(pip)->v_vflag & VV_ROOT) {
805 prefcg = random() % fs->fs_ncg;
806 mincg = prefcg;
807 minndir = fs->fs_ipg;
808 for (cg = prefcg; cg < fs->fs_ncg; cg++)
809 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
810 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
811 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
812 mincg = cg;
813 minndir = fs->fs_cs(fs, cg).cs_ndir;
814 }
815 for (cg = 0; cg < prefcg; cg++)
816 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
817 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
818 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
819 mincg = cg;
820 minndir = fs->fs_cs(fs, cg).cs_ndir;
821 }
822 return ((ino_t)(fs->fs_ipg * mincg));
823 }
824
825 /*
826 * Count various limits which used for
827 * optimal allocation of a directory inode.
828 */
829 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
830 minifree = avgifree - fs->fs_ipg / 4;
831 if (minifree < 0)
832 minifree = 0;
833 minbfree = avgbfree - fragstoblks(fs, fs->fs_fpg) / 4;
834 if (minbfree < 0)
835 minbfree = 0;
836 cgsize = (int64_t)fs->fs_fsize * fs->fs_fpg;
837 dirsize = (int64_t)fs->fs_avgfilesize * fs->fs_avgfpdir;
838 if (avgndir != 0) {
839 curdsz = (cgsize - (int64_t)avgbfree * fs->fs_bsize) / avgndir;
840 if (dirsize < curdsz)
841 dirsize = curdsz;
842 }
843 if (cgsize < dirsize * 255)
844 maxcontigdirs = cgsize / dirsize;
845 else
846 maxcontigdirs = 255;
847 if (fs->fs_avgfpdir > 0)
848 maxcontigdirs = min(maxcontigdirs,
849 fs->fs_ipg / fs->fs_avgfpdir);
850 if (maxcontigdirs == 0)
851 maxcontigdirs = 1;
852
853 /*
854 * Limit number of dirs in one cg and reserve space for
855 * regular files, but only if we have no deficit in
856 * inodes or space.
857 */
858 prefcg = ino_to_cg(fs, pip->i_number);
859 for (cg = prefcg; cg < fs->fs_ncg; cg++)
860 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
861 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
862 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
863 if (fs->fs_contigdirs[cg] < maxcontigdirs)
864 return ((ino_t)(fs->fs_ipg * cg));
865 }
866 for (cg = 0; cg < prefcg; cg++)
867 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
868 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
869 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
870 if (fs->fs_contigdirs[cg] < maxcontigdirs)
871 return ((ino_t)(fs->fs_ipg * cg));
872 }
873 /*
874 * This is a backstop when we are deficient in space.
875 */
876 for (cg = prefcg; cg < fs->fs_ncg; cg++)
877 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
878 return ((ino_t)(fs->fs_ipg * cg));
879 for (cg = 0; cg < prefcg; cg++)
880 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
881 break;
882 return ((ino_t)(fs->fs_ipg * cg));
883 }
884
885 /*
886 * Select the desired position for the next block in a file. The file is
887 * logically divided into sections. The first section is composed of the
888 * direct blocks. Each additional section contains fs_maxbpg blocks.
889 *
890 * If no blocks have been allocated in the first section, the policy is to
891 * request a block in the same cylinder group as the inode that describes
892 * the file. If no blocks have been allocated in any other section, the
893 * policy is to place the section in a cylinder group with a greater than
894 * average number of free blocks. An appropriate cylinder group is found
895 * by using a rotor that sweeps the cylinder groups. When a new group of
896 * blocks is needed, the sweep begins in the cylinder group following the
897 * cylinder group from which the previous allocation was made. The sweep
898 * continues until a cylinder group with greater than the average number
899 * of free blocks is found. If the allocation is for the first block in an
900 * indirect block, the information on the previous allocation is unavailable;
901 * here a best guess is made based upon the logical block number being
902 * allocated.
903 *
904 * If a section is already partially allocated, the policy is to
905 * contiguously allocate fs_maxcontig blocks. The end of one of these
906 * contiguous blocks and the beginning of the next is laid out
907 * contigously if possible.
908 */
909 daddr_t
910 ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx,
911 int32_t *bap /* XXX ondisk32 */)
912 {
913 struct fs *fs;
914 int cg;
915 int avgbfree, startcg;
916
917 KASSERT(mutex_owned(&ip->i_ump->um_lock));
918
919 fs = ip->i_fs;
920 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
921 if (lbn < NDADDR + NINDIR(fs)) {
922 cg = ino_to_cg(fs, ip->i_number);
923 return (fs->fs_fpg * cg + fs->fs_frag);
924 }
925 /*
926 * Find a cylinder with greater than average number of
927 * unused data blocks.
928 */
929 if (indx == 0 || bap[indx - 1] == 0)
930 startcg =
931 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
932 else
933 startcg = dtog(fs,
934 ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
935 startcg %= fs->fs_ncg;
936 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
937 for (cg = startcg; cg < fs->fs_ncg; cg++)
938 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
939 return (fs->fs_fpg * cg + fs->fs_frag);
940 }
941 for (cg = 0; cg < startcg; cg++)
942 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
943 return (fs->fs_fpg * cg + fs->fs_frag);
944 }
945 return (0);
946 }
947 /*
948 * We just always try to lay things out contiguously.
949 */
950 return ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
951 }
952
953 daddr_t
954 ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
955 {
956 struct fs *fs;
957 int cg;
958 int avgbfree, startcg;
959
960 KASSERT(mutex_owned(&ip->i_ump->um_lock));
961
962 fs = ip->i_fs;
963 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
964 if (lbn < NDADDR + NINDIR(fs)) {
965 cg = ino_to_cg(fs, ip->i_number);
966 return (fs->fs_fpg * cg + fs->fs_frag);
967 }
968 /*
969 * Find a cylinder with greater than average number of
970 * unused data blocks.
971 */
972 if (indx == 0 || bap[indx - 1] == 0)
973 startcg =
974 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
975 else
976 startcg = dtog(fs,
977 ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
978 startcg %= fs->fs_ncg;
979 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
980 for (cg = startcg; cg < fs->fs_ncg; cg++)
981 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
982 return (fs->fs_fpg * cg + fs->fs_frag);
983 }
984 for (cg = 0; cg < startcg; cg++)
985 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
986 return (fs->fs_fpg * cg + fs->fs_frag);
987 }
988 return (0);
989 }
990 /*
991 * We just always try to lay things out contiguously.
992 */
993 return ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
994 }
995
996
997 /*
998 * Implement the cylinder overflow algorithm.
999 *
1000 * The policy implemented by this algorithm is:
1001 * 1) allocate the block in its requested cylinder group.
1002 * 2) quadradically rehash on the cylinder group number.
1003 * 3) brute force search for a free block.
1004 */
1005 /*VARARGS5*/
1006 static daddr_t
1007 ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
1008 int size /* size for data blocks, mode for inodes */,
1009 daddr_t (*allocator)(struct inode *, int, daddr_t, int))
1010 {
1011 struct fs *fs;
1012 daddr_t result;
1013 int i, icg = cg;
1014
1015 fs = ip->i_fs;
1016 /*
1017 * 1: preferred cylinder group
1018 */
1019 result = (*allocator)(ip, cg, pref, size);
1020 if (result)
1021 return (result);
1022 /*
1023 * 2: quadratic rehash
1024 */
1025 for (i = 1; i < fs->fs_ncg; i *= 2) {
1026 cg += i;
1027 if (cg >= fs->fs_ncg)
1028 cg -= fs->fs_ncg;
1029 result = (*allocator)(ip, cg, 0, size);
1030 if (result)
1031 return (result);
1032 }
1033 /*
1034 * 3: brute force search
1035 * Note that we start at i == 2, since 0 was checked initially,
1036 * and 1 is always checked in the quadratic rehash.
1037 */
1038 cg = (icg + 2) % fs->fs_ncg;
1039 for (i = 2; i < fs->fs_ncg; i++) {
1040 result = (*allocator)(ip, cg, 0, size);
1041 if (result)
1042 return (result);
1043 cg++;
1044 if (cg == fs->fs_ncg)
1045 cg = 0;
1046 }
1047 return (0);
1048 }
1049
1050 /*
1051 * Determine whether a fragment can be extended.
1052 *
1053 * Check to see if the necessary fragments are available, and
1054 * if they are, allocate them.
1055 */
1056 static daddr_t
1057 ffs_fragextend(struct inode *ip, int cg, daddr_t bprev, int osize, int nsize)
1058 {
1059 struct ufsmount *ump;
1060 struct fs *fs;
1061 struct cg *cgp;
1062 struct buf *bp;
1063 daddr_t bno;
1064 int frags, bbase;
1065 int i, error;
1066 u_int8_t *blksfree;
1067
1068 fs = ip->i_fs;
1069 ump = ip->i_ump;
1070
1071 KASSERT(mutex_owned(&ump->um_lock));
1072
1073 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
1074 return (0);
1075 frags = numfrags(fs, nsize);
1076 bbase = fragnum(fs, bprev);
1077 if (bbase > fragnum(fs, (bprev + frags - 1))) {
1078 /* cannot extend across a block boundary */
1079 return (0);
1080 }
1081 mutex_exit(&ump->um_lock);
1082 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1083 (int)fs->fs_cgsize, NOCRED, &bp);
1084 if (error)
1085 goto fail;
1086 cgp = (struct cg *)bp->b_data;
1087 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
1088 goto fail;
1089 cgp->cg_old_time = ufs_rw32(time_second, UFS_FSNEEDSWAP(fs));
1090 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1091 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1092 cgp->cg_time = ufs_rw64(time_second, UFS_FSNEEDSWAP(fs));
1093 bno = dtogd(fs, bprev);
1094 blksfree = cg_blksfree(cgp, UFS_FSNEEDSWAP(fs));
1095 for (i = numfrags(fs, osize); i < frags; i++)
1096 if (isclr(blksfree, bno + i))
1097 goto fail;
1098 /*
1099 * the current fragment can be extended
1100 * deduct the count on fragment being extended into
1101 * increase the count on the remaining fragment (if any)
1102 * allocate the extended piece
1103 */
1104 for (i = frags; i < fs->fs_frag - bbase; i++)
1105 if (isclr(blksfree, bno + i))
1106 break;
1107 ufs_add32(cgp->cg_frsum[i - numfrags(fs, osize)], -1, UFS_FSNEEDSWAP(fs));
1108 if (i != frags)
1109 ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_FSNEEDSWAP(fs));
1110 mutex_enter(&ump->um_lock);
1111 for (i = numfrags(fs, osize); i < frags; i++) {
1112 clrbit(blksfree, bno + i);
1113 ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_FSNEEDSWAP(fs));
1114 fs->fs_cstotal.cs_nffree--;
1115 fs->fs_cs(fs, cg).cs_nffree--;
1116 }
1117 fs->fs_fmod = 1;
1118 ACTIVECG_CLR(fs, cg);
1119 mutex_exit(&ump->um_lock);
1120 if (DOINGSOFTDEP(ITOV(ip)))
1121 softdep_setup_blkmapdep(bp, fs, bprev);
1122 bdwrite(bp);
1123 return (bprev);
1124
1125 fail:
1126 brelse(bp, 0);
1127 mutex_enter(&ump->um_lock);
1128 return (0);
1129 }
1130
1131 /*
1132 * Determine whether a block can be allocated.
1133 *
1134 * Check to see if a block of the appropriate size is available,
1135 * and if it is, allocate it.
1136 */
1137 static daddr_t
1138 ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
1139 {
1140 struct ufsmount *ump;
1141 struct fs *fs = ip->i_fs;
1142 struct cg *cgp;
1143 struct buf *bp;
1144 int32_t bno;
1145 daddr_t blkno;
1146 int error, frags, allocsiz, i;
1147 u_int8_t *blksfree;
1148 #ifdef FFS_EI
1149 const int needswap = UFS_FSNEEDSWAP(fs);
1150 #endif
1151
1152 ump = ip->i_ump;
1153
1154 KASSERT(mutex_owned(&ump->um_lock));
1155
1156 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
1157 return (0);
1158 mutex_exit(&ump->um_lock);
1159 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1160 (int)fs->fs_cgsize, NOCRED, &bp);
1161 if (error)
1162 goto fail;
1163 cgp = (struct cg *)bp->b_data;
1164 if (!cg_chkmagic(cgp, needswap) ||
1165 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize))
1166 goto fail;
1167 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1168 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1169 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1170 cgp->cg_time = ufs_rw64(time_second, needswap);
1171 if (size == fs->fs_bsize) {
1172 mutex_enter(&ump->um_lock);
1173 blkno = ffs_alloccgblk(ip, bp, bpref);
1174 ACTIVECG_CLR(fs, cg);
1175 mutex_exit(&ump->um_lock);
1176 bdwrite(bp);
1177 return (blkno);
1178 }
1179 /*
1180 * check to see if any fragments are already available
1181 * allocsiz is the size which will be allocated, hacking
1182 * it down to a smaller size if necessary
1183 */
1184 blksfree = cg_blksfree(cgp, needswap);
1185 frags = numfrags(fs, size);
1186 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
1187 if (cgp->cg_frsum[allocsiz] != 0)
1188 break;
1189 if (allocsiz == fs->fs_frag) {
1190 /*
1191 * no fragments were available, so a block will be
1192 * allocated, and hacked up
1193 */
1194 if (cgp->cg_cs.cs_nbfree == 0)
1195 goto fail;
1196 mutex_enter(&ump->um_lock);
1197 blkno = ffs_alloccgblk(ip, bp, bpref);
1198 bno = dtogd(fs, blkno);
1199 for (i = frags; i < fs->fs_frag; i++)
1200 setbit(blksfree, bno + i);
1201 i = fs->fs_frag - frags;
1202 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1203 fs->fs_cstotal.cs_nffree += i;
1204 fs->fs_cs(fs, cg).cs_nffree += i;
1205 fs->fs_fmod = 1;
1206 ufs_add32(cgp->cg_frsum[i], 1, needswap);
1207 ACTIVECG_CLR(fs, cg);
1208 mutex_exit(&ump->um_lock);
1209 bdwrite(bp);
1210 return (blkno);
1211 }
1212 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
1213 #if 0
1214 /*
1215 * XXX fvdl mapsearch will panic, and never return -1
1216 * also: returning NULL as daddr_t ?
1217 */
1218 if (bno < 0)
1219 goto fail;
1220 #endif
1221 for (i = 0; i < frags; i++)
1222 clrbit(blksfree, bno + i);
1223 mutex_enter(&ump->um_lock);
1224 ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap);
1225 fs->fs_cstotal.cs_nffree -= frags;
1226 fs->fs_cs(fs, cg).cs_nffree -= frags;
1227 fs->fs_fmod = 1;
1228 ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap);
1229 if (frags != allocsiz)
1230 ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap);
1231 blkno = cg * fs->fs_fpg + bno;
1232 ACTIVECG_CLR(fs, cg);
1233 mutex_exit(&ump->um_lock);
1234 if (DOINGSOFTDEP(ITOV(ip)))
1235 softdep_setup_blkmapdep(bp, fs, blkno);
1236 bdwrite(bp);
1237 return blkno;
1238
1239 fail:
1240 brelse(bp, 0);
1241 mutex_enter(&ump->um_lock);
1242 return (0);
1243 }
1244
1245 /*
1246 * Allocate a block in a cylinder group.
1247 *
1248 * This algorithm implements the following policy:
1249 * 1) allocate the requested block.
1250 * 2) allocate a rotationally optimal block in the same cylinder.
1251 * 3) allocate the next available block on the block rotor for the
1252 * specified cylinder group.
1253 * Note that this routine only allocates fs_bsize blocks; these
1254 * blocks may be fragmented by the routine that allocates them.
1255 */
1256 static daddr_t
1257 ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref)
1258 {
1259 struct ufsmount *ump;
1260 struct fs *fs = ip->i_fs;
1261 struct cg *cgp;
1262 daddr_t blkno;
1263 int32_t bno;
1264 u_int8_t *blksfree;
1265 #ifdef FFS_EI
1266 const int needswap = UFS_FSNEEDSWAP(fs);
1267 #endif
1268
1269 ump = ip->i_ump;
1270
1271 KASSERT(mutex_owned(&ump->um_lock));
1272
1273 cgp = (struct cg *)bp->b_data;
1274 blksfree = cg_blksfree(cgp, needswap);
1275 if (bpref == 0 || dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) {
1276 bpref = ufs_rw32(cgp->cg_rotor, needswap);
1277 } else {
1278 bpref = blknum(fs, bpref);
1279 bno = dtogd(fs, bpref);
1280 /*
1281 * if the requested block is available, use it
1282 */
1283 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
1284 goto gotit;
1285 }
1286 /*
1287 * Take the next available block in this cylinder group.
1288 */
1289 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1290 if (bno < 0)
1291 return (0);
1292 cgp->cg_rotor = ufs_rw32(bno, needswap);
1293 gotit:
1294 blkno = fragstoblks(fs, bno);
1295 ffs_clrblock(fs, blksfree, blkno);
1296 ffs_clusteracct(fs, cgp, blkno, -1);
1297 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1298 fs->fs_cstotal.cs_nbfree--;
1299 fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--;
1300 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1301 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1302 int cylno;
1303 cylno = old_cbtocylno(fs, bno);
1304 KASSERT(cylno >= 0);
1305 KASSERT(cylno < fs->fs_old_ncyl);
1306 KASSERT(old_cbtorpos(fs, bno) >= 0);
1307 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bno) < fs->fs_old_nrpos);
1308 ufs_add16(old_cg_blks(fs, cgp, cylno, needswap)[old_cbtorpos(fs, bno)], -1,
1309 needswap);
1310 ufs_add32(old_cg_blktot(cgp, needswap)[cylno], -1, needswap);
1311 }
1312 fs->fs_fmod = 1;
1313 blkno = ufs_rw32(cgp->cg_cgx, needswap) * fs->fs_fpg + bno;
1314 if (DOINGSOFTDEP(ITOV(ip))) {
1315 mutex_exit(&ump->um_lock);
1316 softdep_setup_blkmapdep(bp, fs, blkno);
1317 mutex_enter(&ump->um_lock);
1318 }
1319 return (blkno);
1320 }
1321
1322 #ifdef XXXUBC
1323 /*
1324 * Determine whether a cluster can be allocated.
1325 *
1326 * We do not currently check for optimal rotational layout if there
1327 * are multiple choices in the same cylinder group. Instead we just
1328 * take the first one that we find following bpref.
1329 */
1330
1331 /*
1332 * This function must be fixed for UFS2 if re-enabled.
1333 */
1334 static daddr_t
1335 ffs_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
1336 {
1337 struct ufsmount *ump;
1338 struct fs *fs;
1339 struct cg *cgp;
1340 struct buf *bp;
1341 int i, got, run, bno, bit, map;
1342 u_char *mapp;
1343 int32_t *lp;
1344
1345 fs = ip->i_fs;
1346 ump = ip->i_ump;
1347
1348 KASSERT(mutex_owned(&ump->um_lock));
1349 if (fs->fs_maxcluster[cg] < len)
1350 return (0);
1351 mutex_exit(&ump->um_lock);
1352 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1353 NOCRED, &bp))
1354 goto fail;
1355 cgp = (struct cg *)bp->b_data;
1356 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
1357 goto fail;
1358 /*
1359 * Check to see if a cluster of the needed size (or bigger) is
1360 * available in this cylinder group.
1361 */
1362 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len];
1363 for (i = len; i <= fs->fs_contigsumsize; i++)
1364 if (ufs_rw32(*lp++, UFS_FSNEEDSWAP(fs)) > 0)
1365 break;
1366 if (i > fs->fs_contigsumsize) {
1367 /*
1368 * This is the first time looking for a cluster in this
1369 * cylinder group. Update the cluster summary information
1370 * to reflect the true maximum sized cluster so that
1371 * future cluster allocation requests can avoid reading
1372 * the cylinder group map only to find no clusters.
1373 */
1374 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len - 1];
1375 for (i = len - 1; i > 0; i--)
1376 if (ufs_rw32(*lp--, UFS_FSNEEDSWAP(fs)) > 0)
1377 break;
1378 mutex_enter(&ump->um_lock);
1379 fs->fs_maxcluster[cg] = i;
1380 mutex_exit(&ump->um_lock);
1381 goto fail;
1382 }
1383 /*
1384 * Search the cluster map to find a big enough cluster.
1385 * We take the first one that we find, even if it is larger
1386 * than we need as we prefer to get one close to the previous
1387 * block allocation. We do not search before the current
1388 * preference point as we do not want to allocate a block
1389 * that is allocated before the previous one (as we will
1390 * then have to wait for another pass of the elevator
1391 * algorithm before it will be read). We prefer to fail and
1392 * be recalled to try an allocation in the next cylinder group.
1393 */
1394 if (dtog(fs, bpref) != cg)
1395 bpref = 0;
1396 else
1397 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1398 mapp = &cg_clustersfree(cgp, UFS_FSNEEDSWAP(fs))[bpref / NBBY];
1399 map = *mapp++;
1400 bit = 1 << (bpref % NBBY);
1401 for (run = 0, got = bpref;
1402 got < ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)); got++) {
1403 if ((map & bit) == 0) {
1404 run = 0;
1405 } else {
1406 run++;
1407 if (run == len)
1408 break;
1409 }
1410 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1411 bit <<= 1;
1412 } else {
1413 map = *mapp++;
1414 bit = 1;
1415 }
1416 }
1417 if (got == ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)))
1418 goto fail;
1419 /*
1420 * Allocate the cluster that we have found.
1421 */
1422 #ifdef DIAGNOSTIC
1423 for (i = 1; i <= len; i++)
1424 if (!ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1425 got - run + i))
1426 panic("ffs_clusteralloc: map mismatch");
1427 #endif
1428 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1429 if (dtog(fs, bno) != cg)
1430 panic("ffs_clusteralloc: allocated out of group");
1431 len = blkstofrags(fs, len);
1432 mutex_enter(&ump->um_lock);
1433 for (i = 0; i < len; i += fs->fs_frag)
1434 if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i)
1435 panic("ffs_clusteralloc: lost block");
1436 ACTIVECG_CLR(fs, cg);
1437 mutex_exit(&ump->um_lock);
1438 bdwrite(bp);
1439 return (bno);
1440
1441 fail:
1442 brelse(bp, 0);
1443 mutex_enter(&ump->um_lock);
1444 return (0);
1445 }
1446 #endif /* XXXUBC */
1447
1448 /*
1449 * Determine whether an inode can be allocated.
1450 *
1451 * Check to see if an inode is available, and if it is,
1452 * allocate it using the following policy:
1453 * 1) allocate the requested inode.
1454 * 2) allocate the next available inode after the requested
1455 * inode in the specified cylinder group.
1456 */
1457 static daddr_t
1458 ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
1459 {
1460 struct ufsmount *ump = ip->i_ump;
1461 struct fs *fs = ip->i_fs;
1462 struct cg *cgp;
1463 struct buf *bp, *ibp;
1464 u_int8_t *inosused;
1465 int error, start, len, loc, map, i;
1466 int32_t initediblk;
1467 struct ufs2_dinode *dp2;
1468 #ifdef FFS_EI
1469 const int needswap = UFS_FSNEEDSWAP(fs);
1470 #endif
1471
1472 KASSERT(mutex_owned(&ump->um_lock));
1473
1474 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1475 return (0);
1476 mutex_exit(&ump->um_lock);
1477 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1478 (int)fs->fs_cgsize, NOCRED, &bp);
1479 if (error)
1480 goto fail;
1481 cgp = (struct cg *)bp->b_data;
1482 if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0)
1483 goto fail;
1484 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1485 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1486 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1487 cgp->cg_time = ufs_rw64(time_second, needswap);
1488 inosused = cg_inosused(cgp, needswap);
1489 if (ipref) {
1490 ipref %= fs->fs_ipg;
1491 if (isclr(inosused, ipref))
1492 goto gotit;
1493 }
1494 start = ufs_rw32(cgp->cg_irotor, needswap) / NBBY;
1495 len = howmany(fs->fs_ipg - ufs_rw32(cgp->cg_irotor, needswap),
1496 NBBY);
1497 loc = skpc(0xff, len, &inosused[start]);
1498 if (loc == 0) {
1499 len = start + 1;
1500 start = 0;
1501 loc = skpc(0xff, len, &inosused[0]);
1502 if (loc == 0) {
1503 printf("cg = %d, irotor = %d, fs = %s\n",
1504 cg, ufs_rw32(cgp->cg_irotor, needswap),
1505 fs->fs_fsmnt);
1506 panic("ffs_nodealloccg: map corrupted");
1507 /* NOTREACHED */
1508 }
1509 }
1510 i = start + len - loc;
1511 map = inosused[i];
1512 ipref = i * NBBY;
1513 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1514 if ((map & i) == 0) {
1515 cgp->cg_irotor = ufs_rw32(ipref, needswap);
1516 goto gotit;
1517 }
1518 }
1519 printf("fs = %s\n", fs->fs_fsmnt);
1520 panic("ffs_nodealloccg: block not in map");
1521 /* NOTREACHED */
1522 gotit:
1523 /*
1524 * Check to see if we need to initialize more inodes.
1525 */
1526 initediblk = ufs_rw32(cgp->cg_initediblk, needswap);
1527 ibp = NULL;
1528 if (fs->fs_magic == FS_UFS2_MAGIC &&
1529 ipref + INOPB(fs) > initediblk &&
1530 initediblk < ufs_rw32(cgp->cg_niblk, needswap)) {
1531 ibp = getblk(ip->i_devvp, fsbtodb(fs,
1532 ino_to_fsba(fs, cg * fs->fs_ipg + initediblk)),
1533 (int)fs->fs_bsize, 0, 0);
1534 memset(ibp->b_data, 0, fs->fs_bsize);
1535 dp2 = (struct ufs2_dinode *)(ibp->b_data);
1536 for (i = 0; i < INOPB(fs); i++) {
1537 /*
1538 * Don't bother to swap, it's supposed to be
1539 * random, after all.
1540 */
1541 dp2->di_gen = (arc4random() & INT32_MAX) / 2 + 1;
1542 dp2++;
1543 }
1544 initediblk += INOPB(fs);
1545 cgp->cg_initediblk = ufs_rw32(initediblk, needswap);
1546 }
1547
1548 mutex_enter(&ump->um_lock);
1549 ACTIVECG_CLR(fs, cg);
1550 setbit(inosused, ipref);
1551 ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap);
1552 fs->fs_cstotal.cs_nifree--;
1553 fs->fs_cs(fs, cg).cs_nifree--;
1554 fs->fs_fmod = 1;
1555 if ((mode & IFMT) == IFDIR) {
1556 ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap);
1557 fs->fs_cstotal.cs_ndir++;
1558 fs->fs_cs(fs, cg).cs_ndir++;
1559 }
1560 mutex_exit(&ump->um_lock);
1561 if (DOINGSOFTDEP(ITOV(ip)))
1562 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
1563 bdwrite(bp);
1564 if (ibp != NULL)
1565 bawrite(ibp);
1566 return (cg * fs->fs_ipg + ipref);
1567 fail:
1568 brelse(bp, 0);
1569 mutex_enter(&ump->um_lock);
1570 return (0);
1571 }
1572
1573 /*
1574 * Free a block or fragment.
1575 *
1576 * The specified block or fragment is placed back in the
1577 * free map. If a fragment is deallocated, a possible
1578 * block reassembly is checked.
1579 */
1580 void
1581 ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size,
1582 ino_t inum)
1583 {
1584 struct cg *cgp;
1585 struct buf *bp;
1586 struct ufsmount *ump;
1587 int32_t fragno, cgbno;
1588 daddr_t cgblkno;
1589 int i, error, cg, blk, frags, bbase;
1590 u_int8_t *blksfree;
1591 dev_t dev;
1592 const int needswap = UFS_FSNEEDSWAP(fs);
1593
1594 cg = dtog(fs, bno);
1595 if (devvp->v_type != VBLK) {
1596 /* devvp is a snapshot */
1597 dev = VTOI(devvp)->i_devvp->v_rdev;
1598 ump = VFSTOUFS(devvp->v_mount);
1599 cgblkno = fragstoblks(fs, cgtod(fs, cg));
1600 } else {
1601 dev = devvp->v_rdev;
1602 ump = VFSTOUFS(devvp->v_specmountpoint);
1603 cgblkno = fsbtodb(fs, cgtod(fs, cg));
1604 if (ffs_snapblkfree(fs, devvp, bno, size, inum))
1605 return;
1606 }
1607 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
1608 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
1609 printf("dev = 0x%x, bno = %" PRId64 " bsize = %d, "
1610 "size = %ld, fs = %s\n",
1611 dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
1612 panic("blkfree: bad size");
1613 }
1614
1615 if (bno >= fs->fs_size) {
1616 printf("bad block %" PRId64 ", ino %llu\n", bno,
1617 (unsigned long long)inum);
1618 ffs_fserr(fs, inum, "bad block");
1619 return;
1620 }
1621 error = bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp);
1622 if (error) {
1623 brelse(bp, 0);
1624 return;
1625 }
1626 cgp = (struct cg *)bp->b_data;
1627 if (!cg_chkmagic(cgp, needswap)) {
1628 brelse(bp, 0);
1629 return;
1630 }
1631 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1632 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1633 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1634 cgp->cg_time = ufs_rw64(time_second, needswap);
1635 cgbno = dtogd(fs, bno);
1636 blksfree = cg_blksfree(cgp, needswap);
1637 mutex_enter(&ump->um_lock);
1638 if (size == fs->fs_bsize) {
1639 fragno = fragstoblks(fs, cgbno);
1640 if (!ffs_isfreeblock(fs, blksfree, fragno)) {
1641 if (devvp->v_type != VBLK) {
1642 /* devvp is a snapshot */
1643 mutex_exit(&ump->um_lock);
1644 brelse(bp, 0);
1645 return;
1646 }
1647 printf("dev = 0x%x, block = %" PRId64 ", fs = %s\n",
1648 dev, bno, fs->fs_fsmnt);
1649 panic("blkfree: freeing free block");
1650 }
1651 ffs_setblock(fs, blksfree, fragno);
1652 ffs_clusteracct(fs, cgp, fragno, 1);
1653 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1654 fs->fs_cstotal.cs_nbfree++;
1655 fs->fs_cs(fs, cg).cs_nbfree++;
1656 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1657 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1658 i = old_cbtocylno(fs, cgbno);
1659 KASSERT(i >= 0);
1660 KASSERT(i < fs->fs_old_ncyl);
1661 KASSERT(old_cbtorpos(fs, cgbno) >= 0);
1662 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, cgbno) < fs->fs_old_nrpos);
1663 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, cgbno)], 1,
1664 needswap);
1665 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1666 }
1667 } else {
1668 bbase = cgbno - fragnum(fs, cgbno);
1669 /*
1670 * decrement the counts associated with the old frags
1671 */
1672 blk = blkmap(fs, blksfree, bbase);
1673 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
1674 /*
1675 * deallocate the fragment
1676 */
1677 frags = numfrags(fs, size);
1678 for (i = 0; i < frags; i++) {
1679 if (isset(blksfree, cgbno + i)) {
1680 printf("dev = 0x%x, block = %" PRId64
1681 ", fs = %s\n",
1682 dev, bno + i, fs->fs_fsmnt);
1683 panic("blkfree: freeing free frag");
1684 }
1685 setbit(blksfree, cgbno + i);
1686 }
1687 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1688 fs->fs_cstotal.cs_nffree += i;
1689 fs->fs_cs(fs, cg).cs_nffree += i;
1690 /*
1691 * add back in counts associated with the new frags
1692 */
1693 blk = blkmap(fs, blksfree, bbase);
1694 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
1695 /*
1696 * if a complete block has been reassembled, account for it
1697 */
1698 fragno = fragstoblks(fs, bbase);
1699 if (ffs_isblock(fs, blksfree, fragno)) {
1700 ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap);
1701 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1702 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1703 ffs_clusteracct(fs, cgp, fragno, 1);
1704 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1705 fs->fs_cstotal.cs_nbfree++;
1706 fs->fs_cs(fs, cg).cs_nbfree++;
1707 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1708 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1709 i = old_cbtocylno(fs, bbase);
1710 KASSERT(i >= 0);
1711 KASSERT(i < fs->fs_old_ncyl);
1712 KASSERT(old_cbtorpos(fs, bbase) >= 0);
1713 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bbase) < fs->fs_old_nrpos);
1714 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs,
1715 bbase)], 1, needswap);
1716 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1717 }
1718 }
1719 }
1720 fs->fs_fmod = 1;
1721 ACTIVECG_CLR(fs, cg);
1722 mutex_exit(&ump->um_lock);
1723 bdwrite(bp);
1724 }
1725
1726 #if defined(DIAGNOSTIC) || defined(DEBUG)
1727 #ifdef XXXUBC
1728 /*
1729 * Verify allocation of a block or fragment. Returns true if block or
1730 * fragment is allocated, false if it is free.
1731 */
1732 static int
1733 ffs_checkblk(struct inode *ip, daddr_t bno, long size)
1734 {
1735 struct fs *fs;
1736 struct cg *cgp;
1737 struct buf *bp;
1738 int i, error, frags, free;
1739
1740 fs = ip->i_fs;
1741 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1742 printf("bsize = %d, size = %ld, fs = %s\n",
1743 fs->fs_bsize, size, fs->fs_fsmnt);
1744 panic("checkblk: bad size");
1745 }
1746 if (bno >= fs->fs_size)
1747 panic("checkblk: bad block %d", bno);
1748 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1749 (int)fs->fs_cgsize, NOCRED, &bp);
1750 if (error) {
1751 brelse(bp, 0);
1752 return 0;
1753 }
1754 cgp = (struct cg *)bp->b_data;
1755 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
1756 brelse(bp, 0);
1757 return 0;
1758 }
1759 bno = dtogd(fs, bno);
1760 if (size == fs->fs_bsize) {
1761 free = ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1762 fragstoblks(fs, bno));
1763 } else {
1764 frags = numfrags(fs, size);
1765 for (free = 0, i = 0; i < frags; i++)
1766 if (isset(cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)), bno + i))
1767 free++;
1768 if (free != 0 && free != frags)
1769 panic("checkblk: partially free fragment");
1770 }
1771 brelse(bp, 0);
1772 return (!free);
1773 }
1774 #endif /* XXXUBC */
1775 #endif /* DIAGNOSTIC */
1776
1777 /*
1778 * Free an inode.
1779 */
1780 int
1781 ffs_vfree(struct vnode *vp, ino_t ino, int mode)
1782 {
1783
1784 if (DOINGSOFTDEP(vp)) {
1785 softdep_freefile(vp, ino, mode);
1786 return (0);
1787 }
1788 return ffs_freefile(VTOI(vp)->i_fs, VTOI(vp)->i_devvp, ino, mode);
1789 }
1790
1791 /*
1792 * Do the actual free operation.
1793 * The specified inode is placed back in the free map.
1794 */
1795 int
1796 ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
1797 {
1798 struct ufsmount *ump;
1799 struct cg *cgp;
1800 struct buf *bp;
1801 int error, cg;
1802 daddr_t cgbno;
1803 u_int8_t *inosused;
1804 dev_t dev;
1805 #ifdef FFS_EI
1806 const int needswap = UFS_FSNEEDSWAP(fs);
1807 #endif
1808
1809 cg = ino_to_cg(fs, ino);
1810 if (devvp->v_type != VBLK) {
1811 /* devvp is a snapshot */
1812 dev = VTOI(devvp)->i_devvp->v_rdev;
1813 ump = VFSTOUFS(devvp->v_mount);
1814 cgbno = fragstoblks(fs, cgtod(fs, cg));
1815 } else {
1816 dev = devvp->v_rdev;
1817 ump = VFSTOUFS(devvp->v_specmountpoint);
1818 cgbno = fsbtodb(fs, cgtod(fs, cg));
1819 }
1820 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1821 panic("ifree: range: dev = 0x%x, ino = %llu, fs = %s",
1822 dev, (unsigned long long)ino, fs->fs_fsmnt);
1823 error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp);
1824 if (error) {
1825 brelse(bp, 0);
1826 return (error);
1827 }
1828 cgp = (struct cg *)bp->b_data;
1829 if (!cg_chkmagic(cgp, needswap)) {
1830 brelse(bp, 0);
1831 return (0);
1832 }
1833 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1834 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1835 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1836 cgp->cg_time = ufs_rw64(time_second, needswap);
1837 inosused = cg_inosused(cgp, needswap);
1838 ino %= fs->fs_ipg;
1839 if (isclr(inosused, ino)) {
1840 printf("ifree: dev = 0x%x, ino = %llu, fs = %s\n",
1841 dev, (unsigned long long)ino + cg * fs->fs_ipg,
1842 fs->fs_fsmnt);
1843 if (fs->fs_ronly == 0)
1844 panic("ifree: freeing free inode");
1845 }
1846 clrbit(inosused, ino);
1847 if (ino < ufs_rw32(cgp->cg_irotor, needswap))
1848 cgp->cg_irotor = ufs_rw32(ino, needswap);
1849 ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap);
1850 mutex_enter(&ump->um_lock);
1851 fs->fs_cstotal.cs_nifree++;
1852 fs->fs_cs(fs, cg).cs_nifree++;
1853 if ((mode & IFMT) == IFDIR) {
1854 ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap);
1855 fs->fs_cstotal.cs_ndir--;
1856 fs->fs_cs(fs, cg).cs_ndir--;
1857 }
1858 fs->fs_fmod = 1;
1859 ACTIVECG_CLR(fs, cg);
1860 mutex_exit(&ump->um_lock);
1861 bdwrite(bp);
1862 return (0);
1863 }
1864
1865 /*
1866 * Check to see if a file is free.
1867 */
1868 int
1869 ffs_checkfreefile(struct fs *fs, struct vnode *devvp, ino_t ino)
1870 {
1871 struct cg *cgp;
1872 struct buf *bp;
1873 daddr_t cgbno;
1874 int ret, cg;
1875 u_int8_t *inosused;
1876
1877 cg = ino_to_cg(fs, ino);
1878 if (devvp->v_type != VBLK) {
1879 /* devvp is a snapshot */
1880 cgbno = fragstoblks(fs, cgtod(fs, cg));
1881 } else
1882 cgbno = fsbtodb(fs, cgtod(fs, cg));
1883 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1884 return 1;
1885 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) {
1886 brelse(bp, 0);
1887 return 1;
1888 }
1889 cgp = (struct cg *)bp->b_data;
1890 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
1891 brelse(bp, 0);
1892 return 1;
1893 }
1894 inosused = cg_inosused(cgp, UFS_FSNEEDSWAP(fs));
1895 ino %= fs->fs_ipg;
1896 ret = isclr(inosused, ino);
1897 brelse(bp, 0);
1898 return ret;
1899 }
1900
1901 /*
1902 * Find a block of the specified size in the specified cylinder group.
1903 *
1904 * It is a panic if a request is made to find a block if none are
1905 * available.
1906 */
1907 static int32_t
1908 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz)
1909 {
1910 int32_t bno;
1911 int start, len, loc, i;
1912 int blk, field, subfield, pos;
1913 int ostart, olen;
1914 u_int8_t *blksfree;
1915 #ifdef FFS_EI
1916 const int needswap = UFS_FSNEEDSWAP(fs);
1917 #endif
1918
1919 /* KASSERT(mutex_owned(&ump->um_lock)); */
1920
1921 /*
1922 * find the fragment by searching through the free block
1923 * map for an appropriate bit pattern
1924 */
1925 if (bpref)
1926 start = dtogd(fs, bpref) / NBBY;
1927 else
1928 start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY;
1929 blksfree = cg_blksfree(cgp, needswap);
1930 len = howmany(fs->fs_fpg, NBBY) - start;
1931 ostart = start;
1932 olen = len;
1933 loc = scanc((u_int)len,
1934 (const u_char *)&blksfree[start],
1935 (const u_char *)fragtbl[fs->fs_frag],
1936 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
1937 if (loc == 0) {
1938 len = start + 1;
1939 start = 0;
1940 loc = scanc((u_int)len,
1941 (const u_char *)&blksfree[0],
1942 (const u_char *)fragtbl[fs->fs_frag],
1943 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
1944 if (loc == 0) {
1945 printf("start = %d, len = %d, fs = %s\n",
1946 ostart, olen, fs->fs_fsmnt);
1947 printf("offset=%d %ld\n",
1948 ufs_rw32(cgp->cg_freeoff, needswap),
1949 (long)blksfree - (long)cgp);
1950 printf("cg %d\n", cgp->cg_cgx);
1951 panic("ffs_alloccg: map corrupted");
1952 /* NOTREACHED */
1953 }
1954 }
1955 bno = (start + len - loc) * NBBY;
1956 cgp->cg_frotor = ufs_rw32(bno, needswap);
1957 /*
1958 * found the byte in the map
1959 * sift through the bits to find the selected frag
1960 */
1961 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1962 blk = blkmap(fs, blksfree, bno);
1963 blk <<= 1;
1964 field = around[allocsiz];
1965 subfield = inside[allocsiz];
1966 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1967 if ((blk & field) == subfield)
1968 return (bno + pos);
1969 field <<= 1;
1970 subfield <<= 1;
1971 }
1972 }
1973 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1974 panic("ffs_alloccg: block not in map");
1975 /* return (-1); */
1976 }
1977
1978 /*
1979 * Update the cluster map because of an allocation or free.
1980 *
1981 * Cnt == 1 means free; cnt == -1 means allocating.
1982 */
1983 void
1984 ffs_clusteracct(struct fs *fs, struct cg *cgp, int32_t blkno, int cnt)
1985 {
1986 int32_t *sump;
1987 int32_t *lp;
1988 u_char *freemapp, *mapp;
1989 int i, start, end, forw, back, map, bit;
1990 #ifdef FFS_EI
1991 const int needswap = UFS_FSNEEDSWAP(fs);
1992 #endif
1993
1994 /* KASSERT(mutex_owned(&ump->um_lock)); */
1995
1996 if (fs->fs_contigsumsize <= 0)
1997 return;
1998 freemapp = cg_clustersfree(cgp, needswap);
1999 sump = cg_clustersum(cgp, needswap);
2000 /*
2001 * Allocate or clear the actual block.
2002 */
2003 if (cnt > 0)
2004 setbit(freemapp, blkno);
2005 else
2006 clrbit(freemapp, blkno);
2007 /*
2008 * Find the size of the cluster going forward.
2009 */
2010 start = blkno + 1;
2011 end = start + fs->fs_contigsumsize;
2012 if (end >= ufs_rw32(cgp->cg_nclusterblks, needswap))
2013 end = ufs_rw32(cgp->cg_nclusterblks, needswap);
2014 mapp = &freemapp[start / NBBY];
2015 map = *mapp++;
2016 bit = 1 << (start % NBBY);
2017 for (i = start; i < end; i++) {
2018 if ((map & bit) == 0)
2019 break;
2020 if ((i & (NBBY - 1)) != (NBBY - 1)) {
2021 bit <<= 1;
2022 } else {
2023 map = *mapp++;
2024 bit = 1;
2025 }
2026 }
2027 forw = i - start;
2028 /*
2029 * Find the size of the cluster going backward.
2030 */
2031 start = blkno - 1;
2032 end = start - fs->fs_contigsumsize;
2033 if (end < 0)
2034 end = -1;
2035 mapp = &freemapp[start / NBBY];
2036 map = *mapp--;
2037 bit = 1 << (start % NBBY);
2038 for (i = start; i > end; i--) {
2039 if ((map & bit) == 0)
2040 break;
2041 if ((i & (NBBY - 1)) != 0) {
2042 bit >>= 1;
2043 } else {
2044 map = *mapp--;
2045 bit = 1 << (NBBY - 1);
2046 }
2047 }
2048 back = start - i;
2049 /*
2050 * Account for old cluster and the possibly new forward and
2051 * back clusters.
2052 */
2053 i = back + forw + 1;
2054 if (i > fs->fs_contigsumsize)
2055 i = fs->fs_contigsumsize;
2056 ufs_add32(sump[i], cnt, needswap);
2057 if (back > 0)
2058 ufs_add32(sump[back], -cnt, needswap);
2059 if (forw > 0)
2060 ufs_add32(sump[forw], -cnt, needswap);
2061
2062 /*
2063 * Update cluster summary information.
2064 */
2065 lp = &sump[fs->fs_contigsumsize];
2066 for (i = fs->fs_contigsumsize; i > 0; i--)
2067 if (ufs_rw32(*lp--, needswap) > 0)
2068 break;
2069 fs->fs_maxcluster[ufs_rw32(cgp->cg_cgx, needswap)] = i;
2070 }
2071
2072 /*
2073 * Fserr prints the name of a file system with an error diagnostic.
2074 *
2075 * The form of the error message is:
2076 * fs: error message
2077 */
2078 static void
2079 ffs_fserr(struct fs *fs, u_int uid, const char *cp)
2080 {
2081
2082 log(LOG_ERR, "uid %d, pid %d, command %s, on %s: %s\n",
2083 uid, curproc->p_pid, curproc->p_comm, fs->fs_fsmnt, cp);
2084 }
2085