ffs_alloc.c revision 1.109.2.1 1 /* $NetBSD: ffs_alloc.c,v 1.109.2.1 2008/06/10 14:51:23 simonb Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Copyright (c) 2002 Networks Associates Technology, Inc.
41 * All rights reserved.
42 *
43 * This software was developed for the FreeBSD Project by Marshall
44 * Kirk McKusick and Network Associates Laboratories, the Security
45 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
46 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
47 * research program
48 *
49 * Copyright (c) 1982, 1986, 1989, 1993
50 * The Regents of the University of California. All rights reserved.
51 *
52 * Redistribution and use in source and binary forms, with or without
53 * modification, are permitted provided that the following conditions
54 * are met:
55 * 1. Redistributions of source code must retain the above copyright
56 * notice, this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright
58 * notice, this list of conditions and the following disclaimer in the
59 * documentation and/or other materials provided with the distribution.
60 * 3. Neither the name of the University nor the names of its contributors
61 * may be used to endorse or promote products derived from this software
62 * without specific prior written permission.
63 *
64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
74 * SUCH DAMAGE.
75 *
76 * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95
77 */
78
79 #include <sys/cdefs.h>
80 __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.109.2.1 2008/06/10 14:51:23 simonb Exp $");
81
82 #if defined(_KERNEL_OPT)
83 #include "opt_ffs.h"
84 #include "opt_quota.h"
85 #endif
86
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/buf.h>
90 #include <sys/fstrans.h>
91 #include <sys/kauth.h>
92 #include <sys/kernel.h>
93 #include <sys/mount.h>
94 #include <sys/proc.h>
95 #include <sys/syslog.h>
96 #include <sys/vnode.h>
97 #include <sys/wapbl.h>
98
99 #include <miscfs/specfs/specdev.h>
100 #include <ufs/ufs/quota.h>
101 #include <ufs/ufs/ufsmount.h>
102 #include <ufs/ufs/inode.h>
103 #include <ufs/ufs/ufs_extern.h>
104 #include <ufs/ufs/ufs_bswap.h>
105 #include <ufs/ufs/ufs_wapbl.h>
106
107 #include <ufs/ffs/fs.h>
108 #include <ufs/ffs/ffs_extern.h>
109
110 static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int);
111 static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t);
112 #ifdef XXXUBC
113 static daddr_t ffs_clusteralloc(struct inode *, int, daddr_t, int);
114 #endif
115 static ino_t ffs_dirpref(struct inode *);
116 static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int);
117 static void ffs_fserr(struct fs *, u_int, const char *);
118 static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int,
119 daddr_t (*)(struct inode *, int, daddr_t, int));
120 static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int);
121 static int32_t ffs_mapsearch(struct fs *, struct cg *,
122 daddr_t, int);
123 #if defined(DIAGNOSTIC) || defined(DEBUG)
124 #ifdef XXXUBC
125 static int ffs_checkblk(struct inode *, daddr_t, long size);
126 #endif
127 #endif
128
129 /* if 1, changes in optimalization strategy are logged */
130 int ffs_log_changeopt = 0;
131
132 /* in ffs_tables.c */
133 extern const int inside[], around[];
134 extern const u_char * const fragtbl[];
135
136 /*
137 * Allocate a block in the file system.
138 *
139 * The size of the requested block is given, which must be some
140 * multiple of fs_fsize and <= fs_bsize.
141 * A preference may be optionally specified. If a preference is given
142 * the following hierarchy is used to allocate a block:
143 * 1) allocate the requested block.
144 * 2) allocate a rotationally optimal block in the same cylinder.
145 * 3) allocate a block in the same cylinder group.
146 * 4) quadradically rehash into other cylinder groups, until an
147 * available block is located.
148 * If no block preference is given the following hierarchy is used
149 * to allocate a block:
150 * 1) allocate a block in the cylinder group that contains the
151 * inode for the file.
152 * 2) quadradically rehash into other cylinder groups, until an
153 * available block is located.
154 *
155 * => called with um_lock held
156 * => releases um_lock before returning
157 */
158 int
159 ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
160 kauth_cred_t cred, daddr_t *bnp)
161 {
162 struct ufsmount *ump;
163 struct fs *fs;
164 daddr_t bno;
165 int cg;
166 #ifdef QUOTA
167 int error;
168 #endif
169
170 fs = ip->i_fs;
171 ump = ip->i_ump;
172
173 KASSERT(mutex_owned(&ump->um_lock));
174
175 #ifdef UVM_PAGE_TRKOWN
176 if (ITOV(ip)->v_type == VREG &&
177 lblktosize(fs, (voff_t)lbn) < round_page(ITOV(ip)->v_size)) {
178 struct vm_page *pg;
179 struct uvm_object *uobj = &ITOV(ip)->v_uobj;
180 voff_t off = trunc_page(lblktosize(fs, lbn));
181 voff_t endoff = round_page(lblktosize(fs, lbn) + size);
182
183 mutex_enter(&uobj->vmobjlock);
184 while (off < endoff) {
185 pg = uvm_pagelookup(uobj, off);
186 KASSERT(pg != NULL);
187 KASSERT(pg->owner == curproc->p_pid);
188 off += PAGE_SIZE;
189 }
190 mutex_exit(&uobj->vmobjlock);
191 }
192 #endif
193
194 *bnp = 0;
195 #ifdef DIAGNOSTIC
196 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
197 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
198 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
199 panic("ffs_alloc: bad size");
200 }
201 if (cred == NOCRED)
202 panic("ffs_alloc: missing credential");
203 #endif /* DIAGNOSTIC */
204 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
205 goto nospace;
206 if (freespace(fs, fs->fs_minfree) <= 0 &&
207 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0)
208 goto nospace;
209 #ifdef QUOTA
210 mutex_exit(&ump->um_lock);
211 if ((error = chkdq(ip, btodb(size), cred, 0)) != 0)
212 return (error);
213 mutex_enter(&ump->um_lock);
214 #endif
215 if (bpref >= fs->fs_size)
216 bpref = 0;
217 if (bpref == 0)
218 cg = ino_to_cg(fs, ip->i_number);
219 else
220 cg = dtog(fs, bpref);
221 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg);
222 if (bno > 0) {
223 DIP_ADD(ip, blocks, btodb(size));
224 ip->i_flag |= IN_CHANGE | IN_UPDATE;
225 *bnp = bno;
226 return (0);
227 }
228 #ifdef QUOTA
229 /*
230 * Restore user's disk quota because allocation failed.
231 */
232 (void) chkdq(ip, -btodb(size), cred, FORCE);
233 #endif
234 nospace:
235 mutex_exit(&ump->um_lock);
236 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
237 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
238 return (ENOSPC);
239 }
240
241 /*
242 * Reallocate a fragment to a bigger size
243 *
244 * The number and size of the old block is given, and a preference
245 * and new size is also specified. The allocator attempts to extend
246 * the original block. Failing that, the regular block allocator is
247 * invoked to get an appropriate block.
248 *
249 * => called with um_lock held
250 * => return with um_lock released
251 */
252 int
253 ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
254 int nsize, kauth_cred_t cred, struct buf **bpp, daddr_t *blknop)
255 {
256 struct ufsmount *ump;
257 struct fs *fs;
258 struct buf *bp;
259 int cg, request, error;
260 daddr_t bprev, bno;
261
262 fs = ip->i_fs;
263 ump = ip->i_ump;
264
265 KASSERT(mutex_owned(&ump->um_lock));
266
267 #ifdef UVM_PAGE_TRKOWN
268 if (ITOV(ip)->v_type == VREG) {
269 struct vm_page *pg;
270 struct uvm_object *uobj = &ITOV(ip)->v_uobj;
271 voff_t off = trunc_page(lblktosize(fs, lbprev));
272 voff_t endoff = round_page(lblktosize(fs, lbprev) + osize);
273
274 mutex_enter(&uobj->vmobjlock);
275 while (off < endoff) {
276 pg = uvm_pagelookup(uobj, off);
277 KASSERT(pg != NULL);
278 KASSERT(pg->owner == curproc->p_pid);
279 KASSERT((pg->flags & PG_CLEAN) == 0);
280 off += PAGE_SIZE;
281 }
282 mutex_exit(&uobj->vmobjlock);
283 }
284 #endif
285
286 #ifdef DIAGNOSTIC
287 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
288 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
289 printf(
290 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
291 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
292 panic("ffs_realloccg: bad size");
293 }
294 if (cred == NOCRED)
295 panic("ffs_realloccg: missing credential");
296 #endif /* DIAGNOSTIC */
297 if (freespace(fs, fs->fs_minfree) <= 0 &&
298 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0) {
299 mutex_exit(&ump->um_lock);
300 goto nospace;
301 }
302 if (fs->fs_magic == FS_UFS2_MAGIC)
303 bprev = ufs_rw64(ip->i_ffs2_db[lbprev], UFS_FSNEEDSWAP(fs));
304 else
305 bprev = ufs_rw32(ip->i_ffs1_db[lbprev], UFS_FSNEEDSWAP(fs));
306
307 if (bprev == 0) {
308 printf("dev = 0x%x, bsize = %d, bprev = %" PRId64 ", fs = %s\n",
309 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
310 panic("ffs_realloccg: bad bprev");
311 }
312 mutex_exit(&ump->um_lock);
313
314 /*
315 * Allocate the extra space in the buffer.
316 */
317 if (bpp != NULL &&
318 (error = bread(ITOV(ip), lbprev, osize, NOCRED, 0, &bp)) != 0) {
319 brelse(bp, 0);
320 return (error);
321 }
322 #ifdef QUOTA
323 if ((error = chkdq(ip, btodb(nsize - osize), cred, 0)) != 0) {
324 if (bpp != NULL) {
325 brelse(bp, 0);
326 }
327 return (error);
328 }
329 #endif
330 /*
331 * Check for extension in the existing location.
332 */
333 cg = dtog(fs, bprev);
334 mutex_enter(&ump->um_lock);
335 if ((bno = ffs_fragextend(ip, cg, bprev, osize, nsize)) != 0) {
336 DIP_ADD(ip, blocks, btodb(nsize - osize));
337 ip->i_flag |= IN_CHANGE | IN_UPDATE;
338
339 if (bpp != NULL) {
340 if (bp->b_blkno != fsbtodb(fs, bno))
341 panic("bad blockno");
342 allocbuf(bp, nsize, 1);
343 memset((char *)bp->b_data + osize, 0, nsize - osize);
344 mutex_enter(bp->b_objlock);
345 KASSERT(!cv_has_waiters(&bp->b_done));
346 bp->b_oflags |= BO_DONE;
347 mutex_exit(bp->b_objlock);
348 *bpp = bp;
349 }
350 if (blknop != NULL) {
351 *blknop = bno;
352 }
353 return (0);
354 }
355 /*
356 * Allocate a new disk location.
357 */
358 if (bpref >= fs->fs_size)
359 bpref = 0;
360 switch ((int)fs->fs_optim) {
361 case FS_OPTSPACE:
362 /*
363 * Allocate an exact sized fragment. Although this makes
364 * best use of space, we will waste time relocating it if
365 * the file continues to grow. If the fragmentation is
366 * less than half of the minimum free reserve, we choose
367 * to begin optimizing for time.
368 */
369 request = nsize;
370 if (fs->fs_minfree < 5 ||
371 fs->fs_cstotal.cs_nffree >
372 fs->fs_dsize * fs->fs_minfree / (2 * 100))
373 break;
374
375 if (ffs_log_changeopt) {
376 log(LOG_NOTICE,
377 "%s: optimization changed from SPACE to TIME\n",
378 fs->fs_fsmnt);
379 }
380
381 fs->fs_optim = FS_OPTTIME;
382 break;
383 case FS_OPTTIME:
384 /*
385 * At this point we have discovered a file that is trying to
386 * grow a small fragment to a larger fragment. To save time,
387 * we allocate a full sized block, then free the unused portion.
388 * If the file continues to grow, the `ffs_fragextend' call
389 * above will be able to grow it in place without further
390 * copying. If aberrant programs cause disk fragmentation to
391 * grow within 2% of the free reserve, we choose to begin
392 * optimizing for space.
393 */
394 request = fs->fs_bsize;
395 if (fs->fs_cstotal.cs_nffree <
396 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
397 break;
398
399 if (ffs_log_changeopt) {
400 log(LOG_NOTICE,
401 "%s: optimization changed from TIME to SPACE\n",
402 fs->fs_fsmnt);
403 }
404
405 fs->fs_optim = FS_OPTSPACE;
406 break;
407 default:
408 printf("dev = 0x%x, optim = %d, fs = %s\n",
409 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
410 panic("ffs_realloccg: bad optim");
411 /* NOTREACHED */
412 }
413 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg);
414 if (bno > 0) {
415 if (!DOINGSOFTDEP(ITOV(ip))) {
416 if ((ip->i_ump->um_mountp->mnt_wapbl) &&
417 (ITOV(ip)->v_type != VREG)) {
418 UFS_WAPBL_REGISTER_DEALLOCATION(
419 ip->i_ump->um_mountp, fsbtodb(fs, bprev),
420 osize);
421 } else
422 ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
423 ip->i_number);
424 }
425 if (nsize < request) {
426 if ((ip->i_ump->um_mountp->mnt_wapbl) &&
427 (ITOV(ip)->v_type != VREG)) {
428 UFS_WAPBL_REGISTER_DEALLOCATION(
429 ip->i_ump->um_mountp,
430 fsbtodb(fs, (bno + numfrags(fs, nsize))),
431 request - nsize);
432 } else
433 ffs_blkfree(fs, ip->i_devvp,
434 bno + numfrags(fs, nsize),
435 (long)(request - nsize), ip->i_number);
436 }
437 DIP_ADD(ip, blocks, btodb(nsize - osize));
438 ip->i_flag |= IN_CHANGE | IN_UPDATE;
439 if (bpp != NULL) {
440 bp->b_blkno = fsbtodb(fs, bno);
441 allocbuf(bp, nsize, 1);
442 memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize);
443 mutex_enter(bp->b_objlock);
444 KASSERT(!cv_has_waiters(&bp->b_done));
445 bp->b_oflags |= BO_DONE;
446 mutex_exit(bp->b_objlock);
447 *bpp = bp;
448 }
449 if (blknop != NULL) {
450 *blknop = bno;
451 }
452 return (0);
453 }
454 mutex_exit(&ump->um_lock);
455
456 #ifdef QUOTA
457 /*
458 * Restore user's disk quota because allocation failed.
459 */
460 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE);
461 #endif
462 if (bpp != NULL) {
463 brelse(bp, 0);
464 }
465
466 nospace:
467 /*
468 * no space available
469 */
470 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
471 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
472 return (ENOSPC);
473 }
474
475 #if 0
476 /*
477 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
478 *
479 * The vnode and an array of buffer pointers for a range of sequential
480 * logical blocks to be made contiguous is given. The allocator attempts
481 * to find a range of sequential blocks starting as close as possible
482 * from the end of the allocation for the logical block immediately
483 * preceding the current range. If successful, the physical block numbers
484 * in the buffer pointers and in the inode are changed to reflect the new
485 * allocation. If unsuccessful, the allocation is left unchanged. The
486 * success in doing the reallocation is returned. Note that the error
487 * return is not reflected back to the user. Rather the previous block
488 * allocation will be used.
489
490 */
491 #ifdef XXXUBC
492 #ifdef DEBUG
493 #include <sys/sysctl.h>
494 int prtrealloc = 0;
495 struct ctldebug debug15 = { "prtrealloc", &prtrealloc };
496 #endif
497 #endif
498
499 /*
500 * NOTE: when re-enabling this, it must be updated for UFS2 and WAPBL.
501 */
502
503 int doasyncfree = 1;
504
505 int
506 ffs_reallocblks(void *v)
507 {
508 #ifdef XXXUBC
509 struct vop_reallocblks_args /* {
510 struct vnode *a_vp;
511 struct cluster_save *a_buflist;
512 } */ *ap = v;
513 struct fs *fs;
514 struct inode *ip;
515 struct vnode *vp;
516 struct buf *sbp, *ebp;
517 int32_t *bap, *ebap = NULL, *sbap; /* XXX ondisk32 */
518 struct cluster_save *buflist;
519 daddr_t start_lbn, end_lbn, soff, newblk, blkno;
520 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
521 int i, len, start_lvl, end_lvl, pref, ssize;
522 struct ufsmount *ump;
523 #endif /* XXXUBC */
524
525 /* XXXUBC don't reallocblks for now */
526 return ENOSPC;
527
528 #ifdef XXXUBC
529 vp = ap->a_vp;
530 ip = VTOI(vp);
531 fs = ip->i_fs;
532 ump = ip->i_ump;
533 if (fs->fs_contigsumsize <= 0)
534 return (ENOSPC);
535 buflist = ap->a_buflist;
536 len = buflist->bs_nchildren;
537 start_lbn = buflist->bs_children[0]->b_lblkno;
538 end_lbn = start_lbn + len - 1;
539 #ifdef DIAGNOSTIC
540 for (i = 0; i < len; i++)
541 if (!ffs_checkblk(ip,
542 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
543 panic("ffs_reallocblks: unallocated block 1");
544 for (i = 1; i < len; i++)
545 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
546 panic("ffs_reallocblks: non-logical cluster");
547 blkno = buflist->bs_children[0]->b_blkno;
548 ssize = fsbtodb(fs, fs->fs_frag);
549 for (i = 1; i < len - 1; i++)
550 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
551 panic("ffs_reallocblks: non-physical cluster %d", i);
552 #endif
553 /*
554 * If the latest allocation is in a new cylinder group, assume that
555 * the filesystem has decided to move and do not force it back to
556 * the previous cylinder group.
557 */
558 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
559 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
560 return (ENOSPC);
561 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
562 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
563 return (ENOSPC);
564 /*
565 * Get the starting offset and block map for the first block.
566 */
567 if (start_lvl == 0) {
568 sbap = &ip->i_ffs1_db[0];
569 soff = start_lbn;
570 } else {
571 idp = &start_ap[start_lvl - 1];
572 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize,
573 NOCRED, B_MODIFY, &sbp)) {
574 brelse(sbp, 0);
575 return (ENOSPC);
576 }
577 sbap = (int32_t *)sbp->b_data;
578 soff = idp->in_off;
579 }
580 /*
581 * Find the preferred location for the cluster.
582 */
583 mutex_enter(&ump->um_lock);
584 pref = ffs_blkpref(ip, start_lbn, soff, sbap);
585 /*
586 * If the block range spans two block maps, get the second map.
587 */
588 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
589 ssize = len;
590 } else {
591 #ifdef DIAGNOSTIC
592 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
593 panic("ffs_reallocblk: start == end");
594 #endif
595 ssize = len - (idp->in_off + 1);
596 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize,
597 NOCRED, B_MODIFY, &ebp))
598 goto fail;
599 ebap = (int32_t *)ebp->b_data; /* XXX ondisk32 */
600 }
601 /*
602 * Search the block map looking for an allocation of the desired size.
603 */
604 if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
605 len, ffs_clusteralloc)) == 0) {
606 mutex_exit(&ump->um_lock);
607 goto fail;
608 }
609 /*
610 * We have found a new contiguous block.
611 *
612 * First we have to replace the old block pointers with the new
613 * block pointers in the inode and indirect blocks associated
614 * with the file.
615 */
616 #ifdef DEBUG
617 if (prtrealloc)
618 printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
619 start_lbn, end_lbn);
620 #endif
621 blkno = newblk;
622 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
623 daddr_t ba;
624
625 if (i == ssize) {
626 bap = ebap;
627 soff = -i;
628 }
629 /* XXX ondisk32 */
630 ba = ufs_rw32(*bap, UFS_FSNEEDSWAP(fs));
631 #ifdef DIAGNOSTIC
632 if (!ffs_checkblk(ip,
633 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
634 panic("ffs_reallocblks: unallocated block 2");
635 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != ba)
636 panic("ffs_reallocblks: alloc mismatch");
637 #endif
638 #ifdef DEBUG
639 if (prtrealloc)
640 printf(" %d,", ba);
641 #endif
642 if (DOINGSOFTDEP(vp)) {
643 if (sbap == &ip->i_ffs1_db[0] && i < ssize)
644 softdep_setup_allocdirect(ip, start_lbn + i,
645 blkno, ba, fs->fs_bsize, fs->fs_bsize,
646 buflist->bs_children[i]);
647 else
648 softdep_setup_allocindir_page(ip, start_lbn + i,
649 i < ssize ? sbp : ebp, soff + i, blkno,
650 ba, buflist->bs_children[i]);
651 }
652 /* XXX ondisk32 */
653 *bap++ = ufs_rw32((u_int32_t)blkno, UFS_FSNEEDSWAP(fs));
654 }
655 /*
656 * Next we must write out the modified inode and indirect blocks.
657 * For strict correctness, the writes should be synchronous since
658 * the old block values may have been written to disk. In practise
659 * they are almost never written, but if we are concerned about
660 * strict correctness, the `doasyncfree' flag should be set to zero.
661 *
662 * The test on `doasyncfree' should be changed to test a flag
663 * that shows whether the associated buffers and inodes have
664 * been written. The flag should be set when the cluster is
665 * started and cleared whenever the buffer or inode is flushed.
666 * We can then check below to see if it is set, and do the
667 * synchronous write only when it has been cleared.
668 */
669 if (sbap != &ip->i_ffs1_db[0]) {
670 if (doasyncfree)
671 bdwrite(sbp);
672 else
673 bwrite(sbp);
674 } else {
675 ip->i_flag |= IN_CHANGE | IN_UPDATE;
676 if (!doasyncfree)
677 ffs_update(vp, NULL, NULL, 1);
678 }
679 if (ssize < len) {
680 if (doasyncfree)
681 bdwrite(ebp);
682 else
683 bwrite(ebp);
684 }
685 /*
686 * Last, free the old blocks and assign the new blocks to the buffers.
687 */
688 #ifdef DEBUG
689 if (prtrealloc)
690 printf("\n\tnew:");
691 #endif
692 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
693 if (!DOINGSOFTDEP(vp))
694 ffs_blkfree(fs, ip->i_devvp,
695 dbtofsb(fs, buflist->bs_children[i]->b_blkno),
696 fs->fs_bsize, ip->i_number);
697 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
698 #ifdef DEBUG
699 if (!ffs_checkblk(ip,
700 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
701 panic("ffs_reallocblks: unallocated block 3");
702 if (prtrealloc)
703 printf(" %d,", blkno);
704 #endif
705 }
706 #ifdef DEBUG
707 if (prtrealloc) {
708 prtrealloc--;
709 printf("\n");
710 }
711 #endif
712 return (0);
713
714 fail:
715 if (ssize < len)
716 brelse(ebp, 0);
717 if (sbap != &ip->i_ffs1_db[0])
718 brelse(sbp, 0);
719 return (ENOSPC);
720 #endif /* XXXUBC */
721 }
722 #endif /* 0 */
723
724 /*
725 * Allocate an inode in the file system.
726 *
727 * If allocating a directory, use ffs_dirpref to select the inode.
728 * If allocating in a directory, the following hierarchy is followed:
729 * 1) allocate the preferred inode.
730 * 2) allocate an inode in the same cylinder group.
731 * 3) quadradically rehash into other cylinder groups, until an
732 * available inode is located.
733 * If no inode preference is given the following hierarchy is used
734 * to allocate an inode:
735 * 1) allocate an inode in cylinder group 0.
736 * 2) quadradically rehash into other cylinder groups, until an
737 * available inode is located.
738 *
739 * => um_lock not held upon entry or return
740 */
741 int
742 ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
743 struct vnode **vpp)
744 {
745 struct ufsmount *ump;
746 struct inode *pip;
747 struct fs *fs;
748 struct inode *ip;
749 struct timespec ts;
750 ino_t ino, ipref;
751 int cg, error;
752
753 UFS_WAPBL_JUNLOCK_ASSERT(pvp->v_mount);
754
755 *vpp = NULL;
756 pip = VTOI(pvp);
757 fs = pip->i_fs;
758 ump = pip->i_ump;
759
760 error = UFS_WAPBL_BEGIN(pvp->v_mount);
761 if (error) {
762 return error;
763 }
764 mutex_enter(&ump->um_lock);
765 if (fs->fs_cstotal.cs_nifree == 0)
766 goto noinodes;
767
768 if ((mode & IFMT) == IFDIR)
769 ipref = ffs_dirpref(pip);
770 else
771 ipref = pip->i_number;
772 if (ipref >= fs->fs_ncg * fs->fs_ipg)
773 ipref = 0;
774 cg = ino_to_cg(fs, ipref);
775 /*
776 * Track number of dirs created one after another
777 * in a same cg without intervening by files.
778 */
779 if ((mode & IFMT) == IFDIR) {
780 if (fs->fs_contigdirs[cg] < 255)
781 fs->fs_contigdirs[cg]++;
782 } else {
783 if (fs->fs_contigdirs[cg] > 0)
784 fs->fs_contigdirs[cg]--;
785 }
786 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, ffs_nodealloccg);
787 if (ino == 0)
788 goto noinodes;
789 UFS_WAPBL_END(pvp->v_mount);
790 error = VFS_VGET(pvp->v_mount, ino, vpp);
791 if (error) {
792 int err;
793 err = UFS_WAPBL_BEGIN(pvp->v_mount);
794 if (err == 0)
795 ffs_vfree(pvp, ino, mode);
796 if (err == 0)
797 UFS_WAPBL_END(pvp->v_mount);
798 return (error);
799 }
800 KASSERT((*vpp)->v_type == VNON);
801 ip = VTOI(*vpp);
802 if (ip->i_mode) {
803 #if 0
804 printf("mode = 0%o, inum = %d, fs = %s\n",
805 ip->i_mode, ip->i_number, fs->fs_fsmnt);
806 #else
807 printf("dmode %x mode %x dgen %x gen %x\n",
808 DIP(ip, mode), ip->i_mode,
809 DIP(ip, gen), ip->i_gen);
810 printf("size %llx blocks %llx\n",
811 (long long)DIP(ip, size), (long long)DIP(ip, blocks));
812 printf("ino %llu ipref %llu\n", (unsigned long long)ino,
813 (unsigned long long)ipref);
814 #if 0
815 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
816 (int)fs->fs_bsize, NOCRED, 0, &bp);
817 #endif
818
819 #endif
820 panic("ffs_valloc: dup alloc");
821 }
822 if (DIP(ip, blocks)) { /* XXX */
823 printf("free inode %s/%llu had %" PRId64 " blocks\n",
824 fs->fs_fsmnt, (unsigned long long)ino, DIP(ip, blocks));
825 DIP_ASSIGN(ip, blocks, 0);
826 }
827 ip->i_flag &= ~IN_SPACECOUNTED;
828 ip->i_flags = 0;
829 DIP_ASSIGN(ip, flags, 0);
830 /*
831 * Set up a new generation number for this inode.
832 */
833 ip->i_gen++;
834 DIP_ASSIGN(ip, gen, ip->i_gen);
835 if (fs->fs_magic == FS_UFS2_MAGIC) {
836 vfs_timestamp(&ts);
837 ip->i_ffs2_birthtime = ts.tv_sec;
838 ip->i_ffs2_birthnsec = ts.tv_nsec;
839 }
840 return (0);
841 noinodes:
842 mutex_exit(&ump->um_lock);
843 UFS_WAPBL_END(pvp->v_mount);
844 ffs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
845 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
846 return (ENOSPC);
847 }
848
849 /*
850 * Find a cylinder group in which to place a directory.
851 *
852 * The policy implemented by this algorithm is to allocate a
853 * directory inode in the same cylinder group as its parent
854 * directory, but also to reserve space for its files inodes
855 * and data. Restrict the number of directories which may be
856 * allocated one after another in the same cylinder group
857 * without intervening allocation of files.
858 *
859 * If we allocate a first level directory then force allocation
860 * in another cylinder group.
861 */
862 static ino_t
863 ffs_dirpref(struct inode *pip)
864 {
865 register struct fs *fs;
866 int cg, prefcg;
867 int64_t dirsize, cgsize, curdsz;
868 int avgifree, avgbfree, avgndir;
869 int minifree, minbfree, maxndir;
870 int mincg, minndir;
871 int maxcontigdirs;
872
873 KASSERT(mutex_owned(&pip->i_ump->um_lock));
874
875 fs = pip->i_fs;
876
877 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
878 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
879 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
880
881 /*
882 * Force allocation in another cg if creating a first level dir.
883 */
884 if (ITOV(pip)->v_vflag & VV_ROOT) {
885 prefcg = random() % fs->fs_ncg;
886 mincg = prefcg;
887 minndir = fs->fs_ipg;
888 for (cg = prefcg; cg < fs->fs_ncg; cg++)
889 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
890 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
891 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
892 mincg = cg;
893 minndir = fs->fs_cs(fs, cg).cs_ndir;
894 }
895 for (cg = 0; cg < prefcg; cg++)
896 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
897 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
898 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
899 mincg = cg;
900 minndir = fs->fs_cs(fs, cg).cs_ndir;
901 }
902 return ((ino_t)(fs->fs_ipg * mincg));
903 }
904
905 /*
906 * Count various limits which used for
907 * optimal allocation of a directory inode.
908 */
909 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
910 minifree = avgifree - fs->fs_ipg / 4;
911 if (minifree < 0)
912 minifree = 0;
913 minbfree = avgbfree - fragstoblks(fs, fs->fs_fpg) / 4;
914 if (minbfree < 0)
915 minbfree = 0;
916 cgsize = (int64_t)fs->fs_fsize * fs->fs_fpg;
917 dirsize = (int64_t)fs->fs_avgfilesize * fs->fs_avgfpdir;
918 if (avgndir != 0) {
919 curdsz = (cgsize - (int64_t)avgbfree * fs->fs_bsize) / avgndir;
920 if (dirsize < curdsz)
921 dirsize = curdsz;
922 }
923 if (cgsize < dirsize * 255)
924 maxcontigdirs = cgsize / dirsize;
925 else
926 maxcontigdirs = 255;
927 if (fs->fs_avgfpdir > 0)
928 maxcontigdirs = min(maxcontigdirs,
929 fs->fs_ipg / fs->fs_avgfpdir);
930 if (maxcontigdirs == 0)
931 maxcontigdirs = 1;
932
933 /*
934 * Limit number of dirs in one cg and reserve space for
935 * regular files, but only if we have no deficit in
936 * inodes or space.
937 */
938 prefcg = ino_to_cg(fs, pip->i_number);
939 for (cg = prefcg; cg < fs->fs_ncg; cg++)
940 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
941 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
942 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
943 if (fs->fs_contigdirs[cg] < maxcontigdirs)
944 return ((ino_t)(fs->fs_ipg * cg));
945 }
946 for (cg = 0; cg < prefcg; cg++)
947 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
948 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
949 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
950 if (fs->fs_contigdirs[cg] < maxcontigdirs)
951 return ((ino_t)(fs->fs_ipg * cg));
952 }
953 /*
954 * This is a backstop when we are deficient in space.
955 */
956 for (cg = prefcg; cg < fs->fs_ncg; cg++)
957 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
958 return ((ino_t)(fs->fs_ipg * cg));
959 for (cg = 0; cg < prefcg; cg++)
960 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
961 break;
962 return ((ino_t)(fs->fs_ipg * cg));
963 }
964
965 /*
966 * Select the desired position for the next block in a file. The file is
967 * logically divided into sections. The first section is composed of the
968 * direct blocks. Each additional section contains fs_maxbpg blocks.
969 *
970 * If no blocks have been allocated in the first section, the policy is to
971 * request a block in the same cylinder group as the inode that describes
972 * the file. If no blocks have been allocated in any other section, the
973 * policy is to place the section in a cylinder group with a greater than
974 * average number of free blocks. An appropriate cylinder group is found
975 * by using a rotor that sweeps the cylinder groups. When a new group of
976 * blocks is needed, the sweep begins in the cylinder group following the
977 * cylinder group from which the previous allocation was made. The sweep
978 * continues until a cylinder group with greater than the average number
979 * of free blocks is found. If the allocation is for the first block in an
980 * indirect block, the information on the previous allocation is unavailable;
981 * here a best guess is made based upon the logical block number being
982 * allocated.
983 *
984 * If a section is already partially allocated, the policy is to
985 * contiguously allocate fs_maxcontig blocks. The end of one of these
986 * contiguous blocks and the beginning of the next is laid out
987 * contigously if possible.
988 *
989 * => um_lock held on entry and exit
990 */
991 daddr_t
992 ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx,
993 int32_t *bap /* XXX ondisk32 */)
994 {
995 struct fs *fs;
996 int cg;
997 int avgbfree, startcg;
998
999 KASSERT(mutex_owned(&ip->i_ump->um_lock));
1000
1001 fs = ip->i_fs;
1002 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
1003 if (lbn < NDADDR + NINDIR(fs)) {
1004 cg = ino_to_cg(fs, ip->i_number);
1005 return (fs->fs_fpg * cg + fs->fs_frag);
1006 }
1007 /*
1008 * Find a cylinder with greater than average number of
1009 * unused data blocks.
1010 */
1011 if (indx == 0 || bap[indx - 1] == 0)
1012 startcg =
1013 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
1014 else
1015 startcg = dtog(fs,
1016 ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
1017 startcg %= fs->fs_ncg;
1018 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
1019 for (cg = startcg; cg < fs->fs_ncg; cg++)
1020 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1021 return (fs->fs_fpg * cg + fs->fs_frag);
1022 }
1023 for (cg = 0; cg < startcg; cg++)
1024 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1025 return (fs->fs_fpg * cg + fs->fs_frag);
1026 }
1027 return (0);
1028 }
1029 /*
1030 * We just always try to lay things out contiguously.
1031 */
1032 return ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
1033 }
1034
1035 daddr_t
1036 ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
1037 {
1038 struct fs *fs;
1039 int cg;
1040 int avgbfree, startcg;
1041
1042 KASSERT(mutex_owned(&ip->i_ump->um_lock));
1043
1044 fs = ip->i_fs;
1045 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
1046 if (lbn < NDADDR + NINDIR(fs)) {
1047 cg = ino_to_cg(fs, ip->i_number);
1048 return (fs->fs_fpg * cg + fs->fs_frag);
1049 }
1050 /*
1051 * Find a cylinder with greater than average number of
1052 * unused data blocks.
1053 */
1054 if (indx == 0 || bap[indx - 1] == 0)
1055 startcg =
1056 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
1057 else
1058 startcg = dtog(fs,
1059 ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
1060 startcg %= fs->fs_ncg;
1061 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
1062 for (cg = startcg; cg < fs->fs_ncg; cg++)
1063 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1064 return (fs->fs_fpg * cg + fs->fs_frag);
1065 }
1066 for (cg = 0; cg < startcg; cg++)
1067 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
1068 return (fs->fs_fpg * cg + fs->fs_frag);
1069 }
1070 return (0);
1071 }
1072 /*
1073 * We just always try to lay things out contiguously.
1074 */
1075 return ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
1076 }
1077
1078
1079 /*
1080 * Implement the cylinder overflow algorithm.
1081 *
1082 * The policy implemented by this algorithm is:
1083 * 1) allocate the block in its requested cylinder group.
1084 * 2) quadradically rehash on the cylinder group number.
1085 * 3) brute force search for a free block.
1086 *
1087 * => called with um_lock held
1088 * => returns with um_lock released on success, held on failure
1089 * (*allocator releases lock on success, retains lock on failure)
1090 */
1091 /*VARARGS5*/
1092 static daddr_t
1093 ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
1094 int size /* size for data blocks, mode for inodes */,
1095 daddr_t (*allocator)(struct inode *, int, daddr_t, int))
1096 {
1097 struct fs *fs;
1098 daddr_t result;
1099 int i, icg = cg;
1100
1101 fs = ip->i_fs;
1102 /*
1103 * 1: preferred cylinder group
1104 */
1105 result = (*allocator)(ip, cg, pref, size);
1106 if (result)
1107 return (result);
1108 /*
1109 * 2: quadratic rehash
1110 */
1111 for (i = 1; i < fs->fs_ncg; i *= 2) {
1112 cg += i;
1113 if (cg >= fs->fs_ncg)
1114 cg -= fs->fs_ncg;
1115 result = (*allocator)(ip, cg, 0, size);
1116 if (result)
1117 return (result);
1118 }
1119 /*
1120 * 3: brute force search
1121 * Note that we start at i == 2, since 0 was checked initially,
1122 * and 1 is always checked in the quadratic rehash.
1123 */
1124 cg = (icg + 2) % fs->fs_ncg;
1125 for (i = 2; i < fs->fs_ncg; i++) {
1126 result = (*allocator)(ip, cg, 0, size);
1127 if (result)
1128 return (result);
1129 cg++;
1130 if (cg == fs->fs_ncg)
1131 cg = 0;
1132 }
1133 return (0);
1134 }
1135
1136 /*
1137 * Determine whether a fragment can be extended.
1138 *
1139 * Check to see if the necessary fragments are available, and
1140 * if they are, allocate them.
1141 *
1142 * => called with um_lock held
1143 * => returns with um_lock released on success, held on failure
1144 */
1145 static daddr_t
1146 ffs_fragextend(struct inode *ip, int cg, daddr_t bprev, int osize, int nsize)
1147 {
1148 struct ufsmount *ump;
1149 struct fs *fs;
1150 struct cg *cgp;
1151 struct buf *bp;
1152 daddr_t bno;
1153 int frags, bbase;
1154 int i, error;
1155 u_int8_t *blksfree;
1156
1157 fs = ip->i_fs;
1158 ump = ip->i_ump;
1159
1160 KASSERT(mutex_owned(&ump->um_lock));
1161
1162 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
1163 return (0);
1164 frags = numfrags(fs, nsize);
1165 bbase = fragnum(fs, bprev);
1166 if (bbase > fragnum(fs, (bprev + frags - 1))) {
1167 /* cannot extend across a block boundary */
1168 return (0);
1169 }
1170 mutex_exit(&ump->um_lock);
1171 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1172 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp);
1173 if (error)
1174 goto fail;
1175 cgp = (struct cg *)bp->b_data;
1176 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
1177 goto fail;
1178 cgp->cg_old_time = ufs_rw32(time_second, UFS_FSNEEDSWAP(fs));
1179 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1180 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1181 cgp->cg_time = ufs_rw64(time_second, UFS_FSNEEDSWAP(fs));
1182 bno = dtogd(fs, bprev);
1183 blksfree = cg_blksfree(cgp, UFS_FSNEEDSWAP(fs));
1184 for (i = numfrags(fs, osize); i < frags; i++)
1185 if (isclr(blksfree, bno + i))
1186 goto fail;
1187 /*
1188 * the current fragment can be extended
1189 * deduct the count on fragment being extended into
1190 * increase the count on the remaining fragment (if any)
1191 * allocate the extended piece
1192 */
1193 for (i = frags; i < fs->fs_frag - bbase; i++)
1194 if (isclr(blksfree, bno + i))
1195 break;
1196 ufs_add32(cgp->cg_frsum[i - numfrags(fs, osize)], -1, UFS_FSNEEDSWAP(fs));
1197 if (i != frags)
1198 ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_FSNEEDSWAP(fs));
1199 mutex_enter(&ump->um_lock);
1200 for (i = numfrags(fs, osize); i < frags; i++) {
1201 clrbit(blksfree, bno + i);
1202 ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_FSNEEDSWAP(fs));
1203 fs->fs_cstotal.cs_nffree--;
1204 fs->fs_cs(fs, cg).cs_nffree--;
1205 }
1206 fs->fs_fmod = 1;
1207 ACTIVECG_CLR(fs, cg);
1208 mutex_exit(&ump->um_lock);
1209 if (DOINGSOFTDEP(ITOV(ip)))
1210 softdep_setup_blkmapdep(bp, fs, bprev);
1211 bdwrite(bp);
1212 return (bprev);
1213
1214 fail:
1215 brelse(bp, 0);
1216 mutex_enter(&ump->um_lock);
1217 return (0);
1218 }
1219
1220 /*
1221 * Determine whether a block can be allocated.
1222 *
1223 * Check to see if a block of the appropriate size is available,
1224 * and if it is, allocate it.
1225 */
1226 static daddr_t
1227 ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
1228 {
1229 struct ufsmount *ump;
1230 struct fs *fs = ip->i_fs;
1231 struct cg *cgp;
1232 struct buf *bp;
1233 int32_t bno;
1234 daddr_t blkno;
1235 int error, frags, allocsiz, i;
1236 u_int8_t *blksfree;
1237 #ifdef FFS_EI
1238 const int needswap = UFS_FSNEEDSWAP(fs);
1239 #endif
1240
1241 ump = ip->i_ump;
1242
1243 KASSERT(mutex_owned(&ump->um_lock));
1244
1245 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
1246 return (0);
1247 mutex_exit(&ump->um_lock);
1248 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1249 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp);
1250 if (error)
1251 goto fail;
1252 cgp = (struct cg *)bp->b_data;
1253 if (!cg_chkmagic(cgp, needswap) ||
1254 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize))
1255 goto fail;
1256 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1257 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1258 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1259 cgp->cg_time = ufs_rw64(time_second, needswap);
1260 if (size == fs->fs_bsize) {
1261 mutex_enter(&ump->um_lock);
1262 blkno = ffs_alloccgblk(ip, bp, bpref);
1263 ACTIVECG_CLR(fs, cg);
1264 mutex_exit(&ump->um_lock);
1265 bdwrite(bp);
1266 return (blkno);
1267 }
1268 /*
1269 * check to see if any fragments are already available
1270 * allocsiz is the size which will be allocated, hacking
1271 * it down to a smaller size if necessary
1272 */
1273 blksfree = cg_blksfree(cgp, needswap);
1274 frags = numfrags(fs, size);
1275 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
1276 if (cgp->cg_frsum[allocsiz] != 0)
1277 break;
1278 if (allocsiz == fs->fs_frag) {
1279 /*
1280 * no fragments were available, so a block will be
1281 * allocated, and hacked up
1282 */
1283 if (cgp->cg_cs.cs_nbfree == 0)
1284 goto fail;
1285 mutex_enter(&ump->um_lock);
1286 blkno = ffs_alloccgblk(ip, bp, bpref);
1287 bno = dtogd(fs, blkno);
1288 for (i = frags; i < fs->fs_frag; i++)
1289 setbit(blksfree, bno + i);
1290 i = fs->fs_frag - frags;
1291 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1292 fs->fs_cstotal.cs_nffree += i;
1293 fs->fs_cs(fs, cg).cs_nffree += i;
1294 fs->fs_fmod = 1;
1295 ufs_add32(cgp->cg_frsum[i], 1, needswap);
1296 ACTIVECG_CLR(fs, cg);
1297 mutex_exit(&ump->um_lock);
1298 bdwrite(bp);
1299 return (blkno);
1300 }
1301 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
1302 #if 0
1303 /*
1304 * XXX fvdl mapsearch will panic, and never return -1
1305 * also: returning NULL as daddr_t ?
1306 */
1307 if (bno < 0)
1308 goto fail;
1309 #endif
1310 for (i = 0; i < frags; i++)
1311 clrbit(blksfree, bno + i);
1312 mutex_enter(&ump->um_lock);
1313 ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap);
1314 fs->fs_cstotal.cs_nffree -= frags;
1315 fs->fs_cs(fs, cg).cs_nffree -= frags;
1316 fs->fs_fmod = 1;
1317 ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap);
1318 if (frags != allocsiz)
1319 ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap);
1320 blkno = cg * fs->fs_fpg + bno;
1321 ACTIVECG_CLR(fs, cg);
1322 mutex_exit(&ump->um_lock);
1323 if (DOINGSOFTDEP(ITOV(ip)))
1324 softdep_setup_blkmapdep(bp, fs, blkno);
1325 bdwrite(bp);
1326 return blkno;
1327
1328 fail:
1329 brelse(bp, 0);
1330 mutex_enter(&ump->um_lock);
1331 return (0);
1332 }
1333
1334 /*
1335 * Allocate a block in a cylinder group.
1336 *
1337 * This algorithm implements the following policy:
1338 * 1) allocate the requested block.
1339 * 2) allocate a rotationally optimal block in the same cylinder.
1340 * 3) allocate the next available block on the block rotor for the
1341 * specified cylinder group.
1342 * Note that this routine only allocates fs_bsize blocks; these
1343 * blocks may be fragmented by the routine that allocates them.
1344 */
1345 static daddr_t
1346 ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref)
1347 {
1348 struct ufsmount *ump;
1349 struct fs *fs = ip->i_fs;
1350 struct cg *cgp;
1351 daddr_t blkno;
1352 int32_t bno;
1353 u_int8_t *blksfree;
1354 #ifdef FFS_EI
1355 const int needswap = UFS_FSNEEDSWAP(fs);
1356 #endif
1357
1358 ump = ip->i_ump;
1359
1360 KASSERT(mutex_owned(&ump->um_lock));
1361
1362 cgp = (struct cg *)bp->b_data;
1363 blksfree = cg_blksfree(cgp, needswap);
1364 if (bpref == 0 || dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) {
1365 bpref = ufs_rw32(cgp->cg_rotor, needswap);
1366 } else {
1367 bpref = blknum(fs, bpref);
1368 bno = dtogd(fs, bpref);
1369 /*
1370 * if the requested block is available, use it
1371 */
1372 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
1373 goto gotit;
1374 }
1375 /*
1376 * Take the next available block in this cylinder group.
1377 */
1378 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1379 if (bno < 0)
1380 return (0);
1381 cgp->cg_rotor = ufs_rw32(bno, needswap);
1382 gotit:
1383 blkno = fragstoblks(fs, bno);
1384 ffs_clrblock(fs, blksfree, blkno);
1385 ffs_clusteracct(fs, cgp, blkno, -1);
1386 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1387 fs->fs_cstotal.cs_nbfree--;
1388 fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--;
1389 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1390 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1391 int cylno;
1392 cylno = old_cbtocylno(fs, bno);
1393 KASSERT(cylno >= 0);
1394 KASSERT(cylno < fs->fs_old_ncyl);
1395 KASSERT(old_cbtorpos(fs, bno) >= 0);
1396 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bno) < fs->fs_old_nrpos);
1397 ufs_add16(old_cg_blks(fs, cgp, cylno, needswap)[old_cbtorpos(fs, bno)], -1,
1398 needswap);
1399 ufs_add32(old_cg_blktot(cgp, needswap)[cylno], -1, needswap);
1400 }
1401 fs->fs_fmod = 1;
1402 blkno = ufs_rw32(cgp->cg_cgx, needswap) * fs->fs_fpg + bno;
1403 if (DOINGSOFTDEP(ITOV(ip))) {
1404 mutex_exit(&ump->um_lock);
1405 softdep_setup_blkmapdep(bp, fs, blkno);
1406 mutex_enter(&ump->um_lock);
1407 }
1408 return (blkno);
1409 }
1410
1411 #ifdef XXXUBC
1412 /*
1413 * Determine whether a cluster can be allocated.
1414 *
1415 * We do not currently check for optimal rotational layout if there
1416 * are multiple choices in the same cylinder group. Instead we just
1417 * take the first one that we find following bpref.
1418 */
1419
1420 /*
1421 * This function must be fixed for UFS2 if re-enabled.
1422 */
1423 static daddr_t
1424 ffs_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
1425 {
1426 struct ufsmount *ump;
1427 struct fs *fs;
1428 struct cg *cgp;
1429 struct buf *bp;
1430 int i, got, run, bno, bit, map;
1431 u_char *mapp;
1432 int32_t *lp;
1433
1434 fs = ip->i_fs;
1435 ump = ip->i_ump;
1436
1437 KASSERT(mutex_owned(&ump->um_lock));
1438 if (fs->fs_maxcluster[cg] < len)
1439 return (0);
1440 mutex_exit(&ump->um_lock);
1441 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1442 NOCRED, 0, &bp))
1443 goto fail;
1444 cgp = (struct cg *)bp->b_data;
1445 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
1446 goto fail;
1447 /*
1448 * Check to see if a cluster of the needed size (or bigger) is
1449 * available in this cylinder group.
1450 */
1451 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len];
1452 for (i = len; i <= fs->fs_contigsumsize; i++)
1453 if (ufs_rw32(*lp++, UFS_FSNEEDSWAP(fs)) > 0)
1454 break;
1455 if (i > fs->fs_contigsumsize) {
1456 /*
1457 * This is the first time looking for a cluster in this
1458 * cylinder group. Update the cluster summary information
1459 * to reflect the true maximum sized cluster so that
1460 * future cluster allocation requests can avoid reading
1461 * the cylinder group map only to find no clusters.
1462 */
1463 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len - 1];
1464 for (i = len - 1; i > 0; i--)
1465 if (ufs_rw32(*lp--, UFS_FSNEEDSWAP(fs)) > 0)
1466 break;
1467 mutex_enter(&ump->um_lock);
1468 fs->fs_maxcluster[cg] = i;
1469 mutex_exit(&ump->um_lock);
1470 goto fail;
1471 }
1472 /*
1473 * Search the cluster map to find a big enough cluster.
1474 * We take the first one that we find, even if it is larger
1475 * than we need as we prefer to get one close to the previous
1476 * block allocation. We do not search before the current
1477 * preference point as we do not want to allocate a block
1478 * that is allocated before the previous one (as we will
1479 * then have to wait for another pass of the elevator
1480 * algorithm before it will be read). We prefer to fail and
1481 * be recalled to try an allocation in the next cylinder group.
1482 */
1483 if (dtog(fs, bpref) != cg)
1484 bpref = 0;
1485 else
1486 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1487 mapp = &cg_clustersfree(cgp, UFS_FSNEEDSWAP(fs))[bpref / NBBY];
1488 map = *mapp++;
1489 bit = 1 << (bpref % NBBY);
1490 for (run = 0, got = bpref;
1491 got < ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)); got++) {
1492 if ((map & bit) == 0) {
1493 run = 0;
1494 } else {
1495 run++;
1496 if (run == len)
1497 break;
1498 }
1499 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1500 bit <<= 1;
1501 } else {
1502 map = *mapp++;
1503 bit = 1;
1504 }
1505 }
1506 if (got == ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)))
1507 goto fail;
1508 /*
1509 * Allocate the cluster that we have found.
1510 */
1511 #ifdef DIAGNOSTIC
1512 for (i = 1; i <= len; i++)
1513 if (!ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1514 got - run + i))
1515 panic("ffs_clusteralloc: map mismatch");
1516 #endif
1517 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1518 if (dtog(fs, bno) != cg)
1519 panic("ffs_clusteralloc: allocated out of group");
1520 len = blkstofrags(fs, len);
1521 mutex_enter(&ump->um_lock);
1522 for (i = 0; i < len; i += fs->fs_frag)
1523 if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i)
1524 panic("ffs_clusteralloc: lost block");
1525 ACTIVECG_CLR(fs, cg);
1526 mutex_exit(&ump->um_lock);
1527 bdwrite(bp);
1528 return (bno);
1529
1530 fail:
1531 brelse(bp, 0);
1532 mutex_enter(&ump->um_lock);
1533 return (0);
1534 }
1535 #endif /* XXXUBC */
1536
1537 /*
1538 * Determine whether an inode can be allocated.
1539 *
1540 * Check to see if an inode is available, and if it is,
1541 * allocate it using the following policy:
1542 * 1) allocate the requested inode.
1543 * 2) allocate the next available inode after the requested
1544 * inode in the specified cylinder group.
1545 */
1546 static daddr_t
1547 ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
1548 {
1549 struct ufsmount *ump = ip->i_ump;
1550 struct fs *fs = ip->i_fs;
1551 struct cg *cgp;
1552 struct buf *bp, *ibp;
1553 u_int8_t *inosused;
1554 int error, start, len, loc, map, i;
1555 int32_t initediblk;
1556 struct ufs2_dinode *dp2;
1557 #ifdef FFS_EI
1558 const int needswap = UFS_FSNEEDSWAP(fs);
1559 #endif
1560
1561 KASSERT(mutex_owned(&ump->um_lock));
1562 UFS_WAPBL_JLOCK_ASSERT(ip->i_ump->um_mountp);
1563
1564 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1565 return (0);
1566 mutex_exit(&ump->um_lock);
1567 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1568 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp);
1569 if (error)
1570 goto fail;
1571 cgp = (struct cg *)bp->b_data;
1572 if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0)
1573 goto fail;
1574 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1575 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1576 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1577 cgp->cg_time = ufs_rw64(time_second, needswap);
1578 inosused = cg_inosused(cgp, needswap);
1579 if (ipref) {
1580 ipref %= fs->fs_ipg;
1581 if (isclr(inosused, ipref))
1582 goto gotit;
1583 }
1584 start = ufs_rw32(cgp->cg_irotor, needswap) / NBBY;
1585 len = howmany(fs->fs_ipg - ufs_rw32(cgp->cg_irotor, needswap),
1586 NBBY);
1587 loc = skpc(0xff, len, &inosused[start]);
1588 if (loc == 0) {
1589 len = start + 1;
1590 start = 0;
1591 loc = skpc(0xff, len, &inosused[0]);
1592 if (loc == 0) {
1593 printf("cg = %d, irotor = %d, fs = %s\n",
1594 cg, ufs_rw32(cgp->cg_irotor, needswap),
1595 fs->fs_fsmnt);
1596 panic("ffs_nodealloccg: map corrupted");
1597 /* NOTREACHED */
1598 }
1599 }
1600 i = start + len - loc;
1601 map = inosused[i];
1602 ipref = i * NBBY;
1603 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1604 if ((map & i) == 0) {
1605 cgp->cg_irotor = ufs_rw32(ipref, needswap);
1606 goto gotit;
1607 }
1608 }
1609 printf("fs = %s\n", fs->fs_fsmnt);
1610 panic("ffs_nodealloccg: block not in map");
1611 /* NOTREACHED */
1612 gotit:
1613 UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, cg * fs->fs_ipg + ipref,
1614 mode);
1615 /*
1616 * Check to see if we need to initialize more inodes.
1617 */
1618 initediblk = ufs_rw32(cgp->cg_initediblk, needswap);
1619 ibp = NULL;
1620 if (fs->fs_magic == FS_UFS2_MAGIC &&
1621 ipref + INOPB(fs) > initediblk &&
1622 initediblk < ufs_rw32(cgp->cg_niblk, needswap)) {
1623 if (ffs_getblk(ip->i_devvp, fsbtodb(fs,
1624 ino_to_fsba(fs, cg * fs->fs_ipg + initediblk)),
1625 FFS_NOBLK, fs->fs_bsize, false, &ibp) != 0)
1626 goto fail;
1627 memset(ibp->b_data, 0, fs->fs_bsize);
1628 dp2 = (struct ufs2_dinode *)(ibp->b_data);
1629 for (i = 0; i < INOPB(fs); i++) {
1630 /*
1631 * Don't bother to swap, it's supposed to be
1632 * random, after all.
1633 */
1634 dp2->di_gen = (arc4random() & INT32_MAX) / 2 + 1;
1635 dp2++;
1636 }
1637 initediblk += INOPB(fs);
1638 cgp->cg_initediblk = ufs_rw32(initediblk, needswap);
1639 }
1640
1641 mutex_enter(&ump->um_lock);
1642 ACTIVECG_CLR(fs, cg);
1643 setbit(inosused, ipref);
1644 ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap);
1645 fs->fs_cstotal.cs_nifree--;
1646 fs->fs_cs(fs, cg).cs_nifree--;
1647 fs->fs_fmod = 1;
1648 if ((mode & IFMT) == IFDIR) {
1649 ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap);
1650 fs->fs_cstotal.cs_ndir++;
1651 fs->fs_cs(fs, cg).cs_ndir++;
1652 }
1653 mutex_exit(&ump->um_lock);
1654 if (DOINGSOFTDEP(ITOV(ip)))
1655 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
1656 bdwrite(bp);
1657 if (ibp != NULL)
1658 bawrite(ibp);
1659 return (cg * fs->fs_ipg + ipref);
1660 fail:
1661 brelse(bp, 0);
1662 mutex_enter(&ump->um_lock);
1663 return (0);
1664 }
1665
1666 /*
1667 * Allocate a block or fragment.
1668 *
1669 * The specified block or fragment is removed from the
1670 * free map, possibly fragmenting a block in the process.
1671 *
1672 * This implementation should mirror fs_blkfree
1673 *
1674 * => um_lock not held on entry or exit
1675 */
1676 int
1677 ffs_blkalloc(struct inode *ip, daddr_t bno, long size)
1678 {
1679 struct ufsmount *ump = ip->i_ump;
1680 struct fs *fs = ip->i_fs;
1681 struct cg *cgp;
1682 struct buf *bp;
1683 int32_t fragno, cgbno;
1684 int i, error, cg, blk, frags, bbase;
1685 u_int8_t *blksfree;
1686 const int needswap = UFS_FSNEEDSWAP(fs);
1687
1688 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
1689 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
1690 printf("dev = 0x%x, bno = %" PRId64 " bsize = %d, "
1691 "size = %ld, fs = %s\n",
1692 ip->i_dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
1693 panic("blkalloc: bad size");
1694 }
1695 cg = dtog(fs, bno);
1696 if (bno >= fs->fs_size) {
1697 printf("bad block %" PRId64 ", ino %" PRId64 "\n", bno,
1698 ip->i_number);
1699 ffs_fserr(fs, ip->i_uid, "bad block");
1700 return EINVAL;
1701 }
1702 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1703 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp);
1704 if (error) {
1705 brelse(bp, 0);
1706 return error;
1707 }
1708 cgp = (struct cg *)bp->b_data;
1709 if (!cg_chkmagic(cgp, needswap)) {
1710 brelse(bp, 0);
1711 return EIO;
1712 }
1713 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1714 cgp->cg_time = ufs_rw64(time_second, needswap);
1715 cgbno = dtogd(fs, bno);
1716 blksfree = cg_blksfree(cgp, needswap);
1717
1718 mutex_enter(&ump->um_lock);
1719 if (size == fs->fs_bsize) {
1720 fragno = fragstoblks(fs, cgbno);
1721 if (!ffs_isblock(fs, blksfree, fragno)) {
1722 mutex_exit(&ump->um_lock);
1723 brelse(bp, 0);
1724 return EBUSY;
1725 }
1726 ffs_clrblock(fs, blksfree, fragno);
1727 ffs_clusteracct(fs, cgp, fragno, -1);
1728 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1729 fs->fs_cstotal.cs_nbfree--;
1730 fs->fs_cs(fs, cg).cs_nbfree--;
1731 } else {
1732 bbase = cgbno - fragnum(fs, cgbno);
1733
1734 frags = numfrags(fs, size);
1735 for (i = 0; i < frags; i++) {
1736 if (isclr(blksfree, cgbno + i)) {
1737 mutex_exit(&ump->um_lock);
1738 brelse(bp, 0);
1739 return EBUSY;
1740 }
1741 }
1742 /*
1743 * if a complete block is being split, account for it
1744 */
1745 fragno = fragstoblks(fs, bbase);
1746 if (ffs_isblock(fs, blksfree, fragno)) {
1747 ufs_add32(cgp->cg_cs.cs_nffree, fs->fs_frag, needswap);
1748 fs->fs_cstotal.cs_nffree += fs->fs_frag;
1749 fs->fs_cs(fs, cg).cs_nffree += fs->fs_frag;
1750 ffs_clusteracct(fs, cgp, fragno, -1);
1751 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1752 fs->fs_cstotal.cs_nbfree--;
1753 fs->fs_cs(fs, cg).cs_nbfree--;
1754 }
1755 /*
1756 * decrement the counts associated with the old frags
1757 */
1758 blk = blkmap(fs, blksfree, bbase);
1759 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
1760 /*
1761 * allocate the fragment
1762 */
1763 for (i = 0; i < frags; i++) {
1764 clrbit(blksfree, cgbno + i);
1765 }
1766 ufs_add32(cgp->cg_cs.cs_nffree, -i, needswap);
1767 fs->fs_cstotal.cs_nffree -= i;
1768 fs->fs_cs(fs, cg).cs_nffree -= i;
1769 /*
1770 * add back in counts associated with the new frags
1771 */
1772 blk = blkmap(fs, blksfree, bbase);
1773 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
1774 }
1775 fs->fs_fmod = 1;
1776 ACTIVECG_CLR(fs, cg);
1777 mutex_exit(&ump->um_lock);
1778 bdwrite(bp);
1779 return 0;
1780 }
1781
1782 /*
1783 * Free a block or fragment.
1784 *
1785 * The specified block or fragment is placed back in the
1786 * free map. If a fragment is deallocated, a possible
1787 * block reassembly is checked.
1788 *
1789 * => um_lock not held on entry or exit
1790 */
1791 void
1792 ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size,
1793 ino_t inum)
1794 {
1795 struct cg *cgp;
1796 struct buf *bp;
1797 struct ufsmount *ump;
1798 int32_t fragno, cgbno;
1799 daddr_t cgblkno;
1800 int i, error, cg, blk, frags, bbase;
1801 u_int8_t *blksfree;
1802 dev_t dev;
1803 const int needswap = UFS_FSNEEDSWAP(fs);
1804
1805 cg = dtog(fs, bno);
1806 if (devvp->v_type != VBLK) {
1807 /* devvp is a snapshot */
1808 dev = VTOI(devvp)->i_devvp->v_rdev;
1809 ump = VFSTOUFS(devvp->v_mount);
1810 cgblkno = fragstoblks(fs, cgtod(fs, cg));
1811 } else {
1812 dev = devvp->v_rdev;
1813 ump = VFSTOUFS(devvp->v_specmountpoint);
1814 cgblkno = fsbtodb(fs, cgtod(fs, cg));
1815 if (ffs_snapblkfree(fs, devvp, bno, size, inum))
1816 return;
1817 }
1818 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
1819 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
1820 printf("dev = 0x%x, bno = %" PRId64 " bsize = %d, "
1821 "size = %ld, fs = %s\n",
1822 dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
1823 panic("blkfree: bad size");
1824 }
1825
1826 if (bno >= fs->fs_size) {
1827 printf("bad block %" PRId64 ", ino %llu\n", bno,
1828 (unsigned long long)inum);
1829 ffs_fserr(fs, inum, "bad block");
1830 return;
1831 }
1832 error = bread(devvp, cgblkno, (int)fs->fs_cgsize,
1833 NOCRED, B_MODIFY, &bp);
1834 if (error) {
1835 brelse(bp, 0);
1836 return;
1837 }
1838 cgp = (struct cg *)bp->b_data;
1839 if (!cg_chkmagic(cgp, needswap)) {
1840 brelse(bp, 0);
1841 return;
1842 }
1843 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1844 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1845 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1846 cgp->cg_time = ufs_rw64(time_second, needswap);
1847 cgbno = dtogd(fs, bno);
1848 blksfree = cg_blksfree(cgp, needswap);
1849 mutex_enter(&ump->um_lock);
1850 if (size == fs->fs_bsize) {
1851 fragno = fragstoblks(fs, cgbno);
1852 if (!ffs_isfreeblock(fs, blksfree, fragno)) {
1853 if (devvp->v_type != VBLK) {
1854 /* devvp is a snapshot */
1855 mutex_exit(&ump->um_lock);
1856 brelse(bp, 0);
1857 return;
1858 }
1859 printf("dev = 0x%x, block = %" PRId64 ", fs = %s\n",
1860 dev, bno, fs->fs_fsmnt);
1861 panic("blkfree: freeing free block");
1862 }
1863 ffs_setblock(fs, blksfree, fragno);
1864 ffs_clusteracct(fs, cgp, fragno, 1);
1865 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1866 fs->fs_cstotal.cs_nbfree++;
1867 fs->fs_cs(fs, cg).cs_nbfree++;
1868 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1869 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1870 i = old_cbtocylno(fs, cgbno);
1871 KASSERT(i >= 0);
1872 KASSERT(i < fs->fs_old_ncyl);
1873 KASSERT(old_cbtorpos(fs, cgbno) >= 0);
1874 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, cgbno) < fs->fs_old_nrpos);
1875 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, cgbno)], 1,
1876 needswap);
1877 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1878 }
1879 } else {
1880 bbase = cgbno - fragnum(fs, cgbno);
1881 /*
1882 * decrement the counts associated with the old frags
1883 */
1884 blk = blkmap(fs, blksfree, bbase);
1885 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
1886 /*
1887 * deallocate the fragment
1888 */
1889 frags = numfrags(fs, size);
1890 for (i = 0; i < frags; i++) {
1891 if (isset(blksfree, cgbno + i)) {
1892 printf("dev = 0x%x, block = %" PRId64
1893 ", fs = %s\n",
1894 dev, bno + i, fs->fs_fsmnt);
1895 panic("blkfree: freeing free frag");
1896 }
1897 setbit(blksfree, cgbno + i);
1898 }
1899 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1900 fs->fs_cstotal.cs_nffree += i;
1901 fs->fs_cs(fs, cg).cs_nffree += i;
1902 /*
1903 * add back in counts associated with the new frags
1904 */
1905 blk = blkmap(fs, blksfree, bbase);
1906 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
1907 /*
1908 * if a complete block has been reassembled, account for it
1909 */
1910 fragno = fragstoblks(fs, bbase);
1911 if (ffs_isblock(fs, blksfree, fragno)) {
1912 ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap);
1913 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1914 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1915 ffs_clusteracct(fs, cgp, fragno, 1);
1916 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1917 fs->fs_cstotal.cs_nbfree++;
1918 fs->fs_cs(fs, cg).cs_nbfree++;
1919 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1920 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1921 i = old_cbtocylno(fs, bbase);
1922 KASSERT(i >= 0);
1923 KASSERT(i < fs->fs_old_ncyl);
1924 KASSERT(old_cbtorpos(fs, bbase) >= 0);
1925 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bbase) < fs->fs_old_nrpos);
1926 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs,
1927 bbase)], 1, needswap);
1928 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1929 }
1930 }
1931 }
1932 fs->fs_fmod = 1;
1933 ACTIVECG_CLR(fs, cg);
1934 mutex_exit(&ump->um_lock);
1935 bdwrite(bp);
1936 }
1937
1938 #if defined(DIAGNOSTIC) || defined(DEBUG)
1939 #ifdef XXXUBC
1940 /*
1941 * Verify allocation of a block or fragment. Returns true if block or
1942 * fragment is allocated, false if it is free.
1943 */
1944 static int
1945 ffs_checkblk(struct inode *ip, daddr_t bno, long size)
1946 {
1947 struct fs *fs;
1948 struct cg *cgp;
1949 struct buf *bp;
1950 int i, error, frags, free;
1951
1952 fs = ip->i_fs;
1953 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1954 printf("bsize = %d, size = %ld, fs = %s\n",
1955 fs->fs_bsize, size, fs->fs_fsmnt);
1956 panic("checkblk: bad size");
1957 }
1958 if (bno >= fs->fs_size)
1959 panic("checkblk: bad block %d", bno);
1960 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1961 (int)fs->fs_cgsize, NOCRED, 0, &bp);
1962 if (error) {
1963 brelse(bp, 0);
1964 return 0;
1965 }
1966 cgp = (struct cg *)bp->b_data;
1967 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
1968 brelse(bp, 0);
1969 return 0;
1970 }
1971 bno = dtogd(fs, bno);
1972 if (size == fs->fs_bsize) {
1973 free = ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1974 fragstoblks(fs, bno));
1975 } else {
1976 frags = numfrags(fs, size);
1977 for (free = 0, i = 0; i < frags; i++)
1978 if (isset(cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)), bno + i))
1979 free++;
1980 if (free != 0 && free != frags)
1981 panic("checkblk: partially free fragment");
1982 }
1983 brelse(bp, 0);
1984 return (!free);
1985 }
1986 #endif /* XXXUBC */
1987 #endif /* DIAGNOSTIC */
1988
1989 /*
1990 * Free an inode.
1991 */
1992 int
1993 ffs_vfree(struct vnode *vp, ino_t ino, int mode)
1994 {
1995
1996 if (DOINGSOFTDEP(vp)) {
1997 softdep_freefile(vp, ino, mode);
1998 return (0);
1999 }
2000 return ffs_freefile(VTOI(vp)->i_fs, VTOI(vp)->i_devvp, ino, mode);
2001 }
2002
2003 /*
2004 * Do the actual free operation.
2005 * The specified inode is placed back in the free map.
2006 *
2007 * => um_lock not held on entry or exit
2008 */
2009 int
2010 ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
2011 {
2012 struct ufsmount *ump;
2013 struct cg *cgp;
2014 struct buf *bp;
2015 int error, cg;
2016 daddr_t cgbno;
2017 u_int8_t *inosused;
2018 dev_t dev;
2019 #ifdef FFS_EI
2020 const int needswap = UFS_FSNEEDSWAP(fs);
2021 #endif
2022
2023 UFS_WAPBL_JLOCK_ASSERT(devvp->v_specinfo->si_mountpoint);
2024
2025 cg = ino_to_cg(fs, ino);
2026 if (devvp->v_type != VBLK) {
2027 /* devvp is a snapshot */
2028 dev = VTOI(devvp)->i_devvp->v_rdev;
2029 ump = VFSTOUFS(devvp->v_mount);
2030 cgbno = fragstoblks(fs, cgtod(fs, cg));
2031 } else {
2032 dev = devvp->v_rdev;
2033 ump = VFSTOUFS(devvp->v_specmountpoint);
2034 cgbno = fsbtodb(fs, cgtod(fs, cg));
2035 }
2036 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
2037 panic("ifree: range: dev = 0x%x, ino = %llu, fs = %s",
2038 dev, (unsigned long long)ino, fs->fs_fsmnt);
2039 error = bread(devvp, cgbno, (int)fs->fs_cgsize,
2040 NOCRED, B_MODIFY, &bp);
2041 if (error) {
2042 brelse(bp, 0);
2043 return (error);
2044 }
2045 cgp = (struct cg *)bp->b_data;
2046 if (!cg_chkmagic(cgp, needswap)) {
2047 brelse(bp, 0);
2048 return (0);
2049 }
2050 cgp->cg_old_time = ufs_rw32(time_second, needswap);
2051 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
2052 (fs->fs_old_flags & FS_FLAGS_UPDATED))
2053 cgp->cg_time = ufs_rw64(time_second, needswap);
2054 inosused = cg_inosused(cgp, needswap);
2055 ino %= fs->fs_ipg;
2056 if (isclr(inosused, ino)) {
2057 printf("ifree: dev = 0x%x, ino = %llu, fs = %s\n",
2058 dev, (unsigned long long)ino + cg * fs->fs_ipg,
2059 fs->fs_fsmnt);
2060 if (fs->fs_ronly == 0)
2061 panic("ifree: freeing free inode");
2062 }
2063 clrbit(inosused, ino);
2064 UFS_WAPBL_UNREGISTER_INODE(devvp->v_specmountpoint,
2065 ino + cg * fs->fs_ipg, mode);
2066 if (ino < ufs_rw32(cgp->cg_irotor, needswap))
2067 cgp->cg_irotor = ufs_rw32(ino, needswap);
2068 ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap);
2069 mutex_enter(&ump->um_lock);
2070 fs->fs_cstotal.cs_nifree++;
2071 fs->fs_cs(fs, cg).cs_nifree++;
2072 if ((mode & IFMT) == IFDIR) {
2073 ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap);
2074 fs->fs_cstotal.cs_ndir--;
2075 fs->fs_cs(fs, cg).cs_ndir--;
2076 }
2077 fs->fs_fmod = 1;
2078 ACTIVECG_CLR(fs, cg);
2079 mutex_exit(&ump->um_lock);
2080 bdwrite(bp);
2081 return (0);
2082 }
2083
2084 /*
2085 * Check to see if a file is free.
2086 */
2087 int
2088 ffs_checkfreefile(struct fs *fs, struct vnode *devvp, ino_t ino)
2089 {
2090 struct cg *cgp;
2091 struct buf *bp;
2092 daddr_t cgbno;
2093 int ret, cg;
2094 u_int8_t *inosused;
2095
2096 cg = ino_to_cg(fs, ino);
2097 if (devvp->v_type != VBLK) {
2098 /* devvp is a snapshot */
2099 cgbno = fragstoblks(fs, cgtod(fs, cg));
2100 } else
2101 cgbno = fsbtodb(fs, cgtod(fs, cg));
2102 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
2103 return 1;
2104 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, 0, &bp)) {
2105 brelse(bp, 0);
2106 return 1;
2107 }
2108 cgp = (struct cg *)bp->b_data;
2109 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
2110 brelse(bp, 0);
2111 return 1;
2112 }
2113 inosused = cg_inosused(cgp, UFS_FSNEEDSWAP(fs));
2114 ino %= fs->fs_ipg;
2115 ret = isclr(inosused, ino);
2116 brelse(bp, 0);
2117 return ret;
2118 }
2119
2120 /*
2121 * Find a block of the specified size in the specified cylinder group.
2122 *
2123 * It is a panic if a request is made to find a block if none are
2124 * available.
2125 */
2126 static int32_t
2127 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz)
2128 {
2129 int32_t bno;
2130 int start, len, loc, i;
2131 int blk, field, subfield, pos;
2132 int ostart, olen;
2133 u_int8_t *blksfree;
2134 #ifdef FFS_EI
2135 const int needswap = UFS_FSNEEDSWAP(fs);
2136 #endif
2137
2138 /* KASSERT(mutex_owned(&ump->um_lock)); */
2139
2140 /*
2141 * find the fragment by searching through the free block
2142 * map for an appropriate bit pattern
2143 */
2144 if (bpref)
2145 start = dtogd(fs, bpref) / NBBY;
2146 else
2147 start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY;
2148 blksfree = cg_blksfree(cgp, needswap);
2149 len = howmany(fs->fs_fpg, NBBY) - start;
2150 ostart = start;
2151 olen = len;
2152 loc = scanc((u_int)len,
2153 (const u_char *)&blksfree[start],
2154 (const u_char *)fragtbl[fs->fs_frag],
2155 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
2156 if (loc == 0) {
2157 len = start + 1;
2158 start = 0;
2159 loc = scanc((u_int)len,
2160 (const u_char *)&blksfree[0],
2161 (const u_char *)fragtbl[fs->fs_frag],
2162 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
2163 if (loc == 0) {
2164 printf("start = %d, len = %d, fs = %s\n",
2165 ostart, olen, fs->fs_fsmnt);
2166 printf("offset=%d %ld\n",
2167 ufs_rw32(cgp->cg_freeoff, needswap),
2168 (long)blksfree - (long)cgp);
2169 printf("cg %d\n", cgp->cg_cgx);
2170 panic("ffs_alloccg: map corrupted");
2171 /* NOTREACHED */
2172 }
2173 }
2174 bno = (start + len - loc) * NBBY;
2175 cgp->cg_frotor = ufs_rw32(bno, needswap);
2176 /*
2177 * found the byte in the map
2178 * sift through the bits to find the selected frag
2179 */
2180 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
2181 blk = blkmap(fs, blksfree, bno);
2182 blk <<= 1;
2183 field = around[allocsiz];
2184 subfield = inside[allocsiz];
2185 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
2186 if ((blk & field) == subfield)
2187 return (bno + pos);
2188 field <<= 1;
2189 subfield <<= 1;
2190 }
2191 }
2192 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
2193 panic("ffs_alloccg: block not in map");
2194 /* return (-1); */
2195 }
2196
2197 /*
2198 * Update the cluster map because of an allocation or free.
2199 *
2200 * Cnt == 1 means free; cnt == -1 means allocating.
2201 */
2202 void
2203 ffs_clusteracct(struct fs *fs, struct cg *cgp, int32_t blkno, int cnt)
2204 {
2205 int32_t *sump;
2206 int32_t *lp;
2207 u_char *freemapp, *mapp;
2208 int i, start, end, forw, back, map, bit;
2209 #ifdef FFS_EI
2210 const int needswap = UFS_FSNEEDSWAP(fs);
2211 #endif
2212
2213 /* KASSERT(mutex_owned(&ump->um_lock)); */
2214
2215 if (fs->fs_contigsumsize <= 0)
2216 return;
2217 freemapp = cg_clustersfree(cgp, needswap);
2218 sump = cg_clustersum(cgp, needswap);
2219 /*
2220 * Allocate or clear the actual block.
2221 */
2222 if (cnt > 0)
2223 setbit(freemapp, blkno);
2224 else
2225 clrbit(freemapp, blkno);
2226 /*
2227 * Find the size of the cluster going forward.
2228 */
2229 start = blkno + 1;
2230 end = start + fs->fs_contigsumsize;
2231 if (end >= ufs_rw32(cgp->cg_nclusterblks, needswap))
2232 end = ufs_rw32(cgp->cg_nclusterblks, needswap);
2233 mapp = &freemapp[start / NBBY];
2234 map = *mapp++;
2235 bit = 1 << (start % NBBY);
2236 for (i = start; i < end; i++) {
2237 if ((map & bit) == 0)
2238 break;
2239 if ((i & (NBBY - 1)) != (NBBY - 1)) {
2240 bit <<= 1;
2241 } else {
2242 map = *mapp++;
2243 bit = 1;
2244 }
2245 }
2246 forw = i - start;
2247 /*
2248 * Find the size of the cluster going backward.
2249 */
2250 start = blkno - 1;
2251 end = start - fs->fs_contigsumsize;
2252 if (end < 0)
2253 end = -1;
2254 mapp = &freemapp[start / NBBY];
2255 map = *mapp--;
2256 bit = 1 << (start % NBBY);
2257 for (i = start; i > end; i--) {
2258 if ((map & bit) == 0)
2259 break;
2260 if ((i & (NBBY - 1)) != 0) {
2261 bit >>= 1;
2262 } else {
2263 map = *mapp--;
2264 bit = 1 << (NBBY - 1);
2265 }
2266 }
2267 back = start - i;
2268 /*
2269 * Account for old cluster and the possibly new forward and
2270 * back clusters.
2271 */
2272 i = back + forw + 1;
2273 if (i > fs->fs_contigsumsize)
2274 i = fs->fs_contigsumsize;
2275 ufs_add32(sump[i], cnt, needswap);
2276 if (back > 0)
2277 ufs_add32(sump[back], -cnt, needswap);
2278 if (forw > 0)
2279 ufs_add32(sump[forw], -cnt, needswap);
2280
2281 /*
2282 * Update cluster summary information.
2283 */
2284 lp = &sump[fs->fs_contigsumsize];
2285 for (i = fs->fs_contigsumsize; i > 0; i--)
2286 if (ufs_rw32(*lp--, needswap) > 0)
2287 break;
2288 fs->fs_maxcluster[ufs_rw32(cgp->cg_cgx, needswap)] = i;
2289 }
2290
2291 /*
2292 * Fserr prints the name of a file system with an error diagnostic.
2293 *
2294 * The form of the error message is:
2295 * fs: error message
2296 */
2297 static void
2298 ffs_fserr(struct fs *fs, u_int uid, const char *cp)
2299 {
2300
2301 log(LOG_ERR, "uid %d, pid %d, command %s, on %s: %s\n",
2302 uid, curproc->p_pid, curproc->p_comm, fs->fs_fsmnt, cp);
2303 }
2304