1 1.174 andvar /* $NetBSD: ffs_alloc.c,v 1.174 2025/06/27 19:55:38 andvar Exp $ */ 2 1.111 simonb 3 1.111 simonb /*- 4 1.122 ad * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 1.111 simonb * All rights reserved. 6 1.111 simonb * 7 1.111 simonb * This code is derived from software contributed to The NetBSD Foundation 8 1.111 simonb * by Wasabi Systems, Inc. 9 1.111 simonb * 10 1.111 simonb * Redistribution and use in source and binary forms, with or without 11 1.111 simonb * modification, are permitted provided that the following conditions 12 1.111 simonb * are met: 13 1.111 simonb * 1. Redistributions of source code must retain the above copyright 14 1.111 simonb * notice, this list of conditions and the following disclaimer. 15 1.111 simonb * 2. Redistributions in binary form must reproduce the above copyright 16 1.111 simonb * notice, this list of conditions and the following disclaimer in the 17 1.111 simonb * documentation and/or other materials provided with the distribution. 18 1.111 simonb * 19 1.111 simonb * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.111 simonb * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.111 simonb * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.111 simonb * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.111 simonb * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.111 simonb * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.111 simonb * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.111 simonb * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.111 simonb * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.111 simonb * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.111 simonb * POSSIBILITY OF SUCH DAMAGE. 30 1.111 simonb */ 31 1.2 cgd 32 1.1 mycroft /* 33 1.60 fvdl * Copyright (c) 2002 Networks Associates Technology, Inc. 34 1.60 fvdl * All rights reserved. 35 1.60 fvdl * 36 1.60 fvdl * This software was developed for the FreeBSD Project by Marshall 37 1.60 fvdl * Kirk McKusick and Network Associates Laboratories, the Security 38 1.60 fvdl * Research Division of Network Associates, Inc. under DARPA/SPAWAR 39 1.60 fvdl * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 40 1.60 fvdl * research program 41 1.60 fvdl * 42 1.1 mycroft * Copyright (c) 1982, 1986, 1989, 1993 43 1.1 mycroft * The Regents of the University of California. All rights reserved. 44 1.1 mycroft * 45 1.1 mycroft * Redistribution and use in source and binary forms, with or without 46 1.1 mycroft * modification, are permitted provided that the following conditions 47 1.1 mycroft * are met: 48 1.1 mycroft * 1. Redistributions of source code must retain the above copyright 49 1.1 mycroft * notice, this list of conditions and the following disclaimer. 50 1.1 mycroft * 2. Redistributions in binary form must reproduce the above copyright 51 1.1 mycroft * notice, this list of conditions and the following disclaimer in the 52 1.1 mycroft * documentation and/or other materials provided with the distribution. 53 1.69 agc * 3. Neither the name of the University nor the names of its contributors 54 1.1 mycroft * may be used to endorse or promote products derived from this software 55 1.1 mycroft * without specific prior written permission. 56 1.1 mycroft * 57 1.1 mycroft * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 58 1.1 mycroft * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 59 1.1 mycroft * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 60 1.1 mycroft * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 61 1.1 mycroft * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 62 1.1 mycroft * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 63 1.1 mycroft * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 64 1.1 mycroft * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 65 1.1 mycroft * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 1.1 mycroft * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 1.1 mycroft * SUCH DAMAGE. 68 1.1 mycroft * 69 1.18 fvdl * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95 70 1.1 mycroft */ 71 1.53 lukem 72 1.53 lukem #include <sys/cdefs.h> 73 1.174 andvar __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.174 2025/06/27 19:55:38 andvar Exp $"); 74 1.17 mrg 75 1.43 mrg #if defined(_KERNEL_OPT) 76 1.27 thorpej #include "opt_ffs.h" 77 1.21 scottr #include "opt_quota.h" 78 1.129 chs #include "opt_uvm_page_trkown.h" 79 1.22 scottr #endif 80 1.1 mycroft 81 1.1 mycroft #include <sys/param.h> 82 1.1 mycroft #include <sys/systm.h> 83 1.1 mycroft #include <sys/buf.h> 84 1.130 tls #include <sys/cprng.h> 85 1.111 simonb #include <sys/kauth.h> 86 1.111 simonb #include <sys/kernel.h> 87 1.111 simonb #include <sys/mount.h> 88 1.1 mycroft #include <sys/proc.h> 89 1.111 simonb #include <sys/syslog.h> 90 1.1 mycroft #include <sys/vnode.h> 91 1.111 simonb #include <sys/wapbl.h> 92 1.147 joerg #include <sys/cprng.h> 93 1.29 mrg 94 1.76 hannken #include <miscfs/specfs/specdev.h> 95 1.1 mycroft #include <ufs/ufs/quota.h> 96 1.19 bouyer #include <ufs/ufs/ufsmount.h> 97 1.1 mycroft #include <ufs/ufs/inode.h> 98 1.9 christos #include <ufs/ufs/ufs_extern.h> 99 1.19 bouyer #include <ufs/ufs/ufs_bswap.h> 100 1.111 simonb #include <ufs/ufs/ufs_wapbl.h> 101 1.1 mycroft 102 1.1 mycroft #include <ufs/ffs/fs.h> 103 1.1 mycroft #include <ufs/ffs/ffs_extern.h> 104 1.1 mycroft 105 1.129 chs #ifdef UVM_PAGE_TRKOWN 106 1.169 riastrad #include <uvm/uvm_object.h> 107 1.169 riastrad #include <uvm/uvm_page.h> 108 1.129 chs #endif 109 1.129 chs 110 1.172 chs static daddr_t ffs_alloccg(struct inode *, u_int, daddr_t, int, int, int); 111 1.152 jdolecek static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t, int, int); 112 1.85 thorpej static ino_t ffs_dirpref(struct inode *); 113 1.172 chs static daddr_t ffs_fragextend(struct inode *, u_int, daddr_t, int, int); 114 1.150 mlelstv static void ffs_fserr(struct fs *, kauth_cred_t, const char *); 115 1.172 chs static daddr_t ffs_hashalloc(struct inode *, u_int, daddr_t, int, int, int, 116 1.172 chs daddr_t (*)(struct inode *, u_int, daddr_t, int, int, int)); 117 1.172 chs static daddr_t ffs_nodealloccg(struct inode *, u_int, daddr_t, int, int, int); 118 1.85 thorpej static int32_t ffs_mapsearch(struct fs *, struct cg *, 119 1.85 thorpej daddr_t, int); 120 1.119 joerg static void ffs_blkfree_common(struct ufsmount *, struct fs *, dev_t, struct buf *, 121 1.116 joerg daddr_t, long, bool); 122 1.119 joerg static void ffs_freefile_common(struct ufsmount *, struct fs *, dev_t, struct buf *, ino_t, 123 1.119 joerg int, bool); 124 1.23 drochner 125 1.34 jdolecek /* if 1, changes in optimalization strategy are logged */ 126 1.34 jdolecek int ffs_log_changeopt = 0; 127 1.34 jdolecek 128 1.23 drochner /* in ffs_tables.c */ 129 1.40 jdolecek extern const int inside[], around[]; 130 1.40 jdolecek extern const u_char * const fragtbl[]; 131 1.1 mycroft 132 1.116 joerg /* Basic consistency check for block allocations */ 133 1.116 joerg static int 134 1.116 joerg ffs_check_bad_allocation(const char *func, struct fs *fs, daddr_t bno, 135 1.116 joerg long size, dev_t dev, ino_t inum) 136 1.116 joerg { 137 1.134 dholland if ((u_int)size > fs->fs_bsize || ffs_fragoff(fs, size) != 0 || 138 1.138 dholland ffs_fragnum(fs, bno) + ffs_numfrags(fs, size) > fs->fs_frag) { 139 1.154 christos panic("%s: bad size: dev = 0x%llx, bno = %" PRId64 140 1.154 christos " bsize = %d, size = %ld, fs = %s", func, 141 1.120 christos (long long)dev, bno, fs->fs_bsize, size, fs->fs_fsmnt); 142 1.116 joerg } 143 1.116 joerg 144 1.116 joerg if (bno >= fs->fs_size) { 145 1.154 christos printf("%s: bad block %" PRId64 ", ino %llu\n", func, bno, 146 1.116 joerg (unsigned long long)inum); 147 1.150 mlelstv ffs_fserr(fs, NOCRED, "bad block"); 148 1.116 joerg return EINVAL; 149 1.116 joerg } 150 1.116 joerg return 0; 151 1.116 joerg } 152 1.116 joerg 153 1.1 mycroft /* 154 1.1 mycroft * Allocate a block in the file system. 155 1.81 perry * 156 1.1 mycroft * The size of the requested block is given, which must be some 157 1.1 mycroft * multiple of fs_fsize and <= fs_bsize. 158 1.1 mycroft * A preference may be optionally specified. If a preference is given 159 1.1 mycroft * the following hierarchy is used to allocate a block: 160 1.1 mycroft * 1) allocate the requested block. 161 1.1 mycroft * 2) allocate a rotationally optimal block in the same cylinder. 162 1.1 mycroft * 3) allocate a block in the same cylinder group. 163 1.174 andvar * 4) quadratically rehash into other cylinder groups, until an 164 1.1 mycroft * available block is located. 165 1.47 wiz * If no block preference is given the following hierarchy is used 166 1.1 mycroft * to allocate a block: 167 1.1 mycroft * 1) allocate a block in the cylinder group that contains the 168 1.1 mycroft * inode for the file. 169 1.174 andvar * 2) quadratically rehash into other cylinder groups, until an 170 1.1 mycroft * available block is located. 171 1.106 pooka * 172 1.106 pooka * => called with um_lock held 173 1.106 pooka * => releases um_lock before returning 174 1.1 mycroft */ 175 1.9 christos int 176 1.152 jdolecek ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size, 177 1.152 jdolecek int flags, kauth_cred_t cred, daddr_t *bnp) 178 1.1 mycroft { 179 1.101 ad struct ufsmount *ump; 180 1.62 fvdl struct fs *fs; 181 1.58 fvdl daddr_t bno; 182 1.172 chs u_int cg; 183 1.127 bouyer #if defined(QUOTA) || defined(QUOTA2) 184 1.9 christos int error; 185 1.9 christos #endif 186 1.81 perry 187 1.62 fvdl fs = ip->i_fs; 188 1.101 ad ump = ip->i_ump; 189 1.101 ad 190 1.101 ad KASSERT(mutex_owned(&ump->um_lock)); 191 1.62 fvdl 192 1.37 chs #ifdef UVM_PAGE_TRKOWN 193 1.129 chs 194 1.129 chs /* 195 1.129 chs * Sanity-check that allocations within the file size 196 1.129 chs * do not allow other threads to read the stale contents 197 1.129 chs * of newly allocated blocks. 198 1.129 chs * Usually pages will exist to cover the new allocation. 199 1.129 chs * There is an optimization in ffs_write() where we skip 200 1.129 chs * creating pages if several conditions are met: 201 1.129 chs * - the file must not be mapped (in any user address space). 202 1.129 chs * - the write must cover whole pages and whole blocks. 203 1.129 chs * If those conditions are not met then pages must exist and 204 1.129 chs * be locked by the current thread. 205 1.129 chs */ 206 1.129 chs 207 1.159 chs struct vnode *vp = ITOV(ip); 208 1.168 chs if (vp->v_type == VREG && (flags & IO_EXT) == 0 && 209 1.159 chs ffs_lblktosize(fs, (voff_t)lbn) < round_page(vp->v_size) && 210 1.159 chs ((vp->v_vflag & VV_MAPPED) != 0 || (size & PAGE_MASK) != 0 || 211 1.159 chs ffs_blkoff(fs, size) != 0)) { 212 1.165 riastrad struct vm_page *pg __diagused; 213 1.129 chs struct uvm_object *uobj = &vp->v_uobj; 214 1.137 dholland voff_t off = trunc_page(ffs_lblktosize(fs, lbn)); 215 1.137 dholland voff_t endoff = round_page(ffs_lblktosize(fs, lbn) + size); 216 1.37 chs 217 1.166 ad rw_enter(uobj->vmobjlock, RW_WRITER); 218 1.37 chs while (off < endoff) { 219 1.37 chs pg = uvm_pagelookup(uobj, off); 220 1.159 chs KASSERT((pg != NULL && pg->owner_tag != NULL && 221 1.159 chs pg->owner == curproc->p_pid && 222 1.129 chs pg->lowner == curlwp->l_lid)); 223 1.37 chs off += PAGE_SIZE; 224 1.37 chs } 225 1.166 ad rw_exit(uobj->vmobjlock); 226 1.37 chs } 227 1.37 chs #endif 228 1.37 chs 229 1.1 mycroft *bnp = 0; 230 1.156 riastrad 231 1.156 riastrad KASSERTMSG((cred != NOCRED), "missing credential"); 232 1.156 riastrad KASSERTMSG(((u_int)size <= fs->fs_bsize), 233 1.156 riastrad "bad size: dev = 0x%llx, bsize = %d, size = %d, fs = %s", 234 1.156 riastrad (unsigned long long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 235 1.156 riastrad KASSERTMSG((ffs_fragoff(fs, size) == 0), 236 1.156 riastrad "bad size: dev = 0x%llx, bsize = %d, size = %d, fs = %s", 237 1.156 riastrad (unsigned long long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 238 1.156 riastrad 239 1.1 mycroft if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 240 1.1 mycroft goto nospace; 241 1.99 pooka if (freespace(fs, fs->fs_minfree) <= 0 && 242 1.124 elad kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL, 243 1.124 elad NULL, NULL) != 0) 244 1.1 mycroft goto nospace; 245 1.127 bouyer #if defined(QUOTA) || defined(QUOTA2) 246 1.101 ad mutex_exit(&ump->um_lock); 247 1.60 fvdl if ((error = chkdq(ip, btodb(size), cred, 0)) != 0) 248 1.1 mycroft return (error); 249 1.101 ad mutex_enter(&ump->um_lock); 250 1.1 mycroft #endif 251 1.111 simonb 252 1.1 mycroft if (bpref >= fs->fs_size) 253 1.1 mycroft bpref = 0; 254 1.1 mycroft if (bpref == 0) 255 1.1 mycroft cg = ino_to_cg(fs, ip->i_number); 256 1.1 mycroft else 257 1.1 mycroft cg = dtog(fs, bpref); 258 1.152 jdolecek bno = ffs_hashalloc(ip, cg, bpref, size, 0, flags, ffs_alloccg); 259 1.1 mycroft if (bno > 0) { 260 1.65 kristerw DIP_ADD(ip, blocks, btodb(size)); 261 1.167 christos if (flags & IO_EXT) 262 1.167 christos ip->i_flag |= IN_CHANGE; 263 1.167 christos else 264 1.167 christos ip->i_flag |= IN_CHANGE | IN_UPDATE; 265 1.1 mycroft *bnp = bno; 266 1.1 mycroft return (0); 267 1.1 mycroft } 268 1.127 bouyer #if defined(QUOTA) || defined(QUOTA2) 269 1.1 mycroft /* 270 1.1 mycroft * Restore user's disk quota because allocation failed. 271 1.1 mycroft */ 272 1.60 fvdl (void) chkdq(ip, -btodb(size), cred, FORCE); 273 1.1 mycroft #endif 274 1.111 simonb if (flags & B_CONTIG) { 275 1.111 simonb /* 276 1.111 simonb * XXX ump->um_lock handling is "suspect" at best. 277 1.111 simonb * For the case where ffs_hashalloc() fails early 278 1.111 simonb * in the B_CONTIG case we reach here with um_lock 279 1.111 simonb * already unlocked, so we can't release it again 280 1.111 simonb * like in the normal error path. See kern/39206. 281 1.111 simonb * 282 1.111 simonb * 283 1.111 simonb * Fail silently - it's up to our caller to report 284 1.111 simonb * errors. 285 1.111 simonb */ 286 1.111 simonb return (ENOSPC); 287 1.111 simonb } 288 1.1 mycroft nospace: 289 1.101 ad mutex_exit(&ump->um_lock); 290 1.150 mlelstv ffs_fserr(fs, cred, "file system full"); 291 1.1 mycroft uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 292 1.1 mycroft return (ENOSPC); 293 1.1 mycroft } 294 1.1 mycroft 295 1.1 mycroft /* 296 1.1 mycroft * Reallocate a fragment to a bigger size 297 1.1 mycroft * 298 1.1 mycroft * The number and size of the old block is given, and a preference 299 1.1 mycroft * and new size is also specified. The allocator attempts to extend 300 1.1 mycroft * the original block. Failing that, the regular block allocator is 301 1.1 mycroft * invoked to get an appropriate block. 302 1.106 pooka * 303 1.106 pooka * => called with um_lock held 304 1.106 pooka * => return with um_lock released 305 1.1 mycroft */ 306 1.9 christos int 307 1.167 christos ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bprev, daddr_t bpref, 308 1.167 christos int osize, int nsize, int flags, kauth_cred_t cred, struct buf **bpp, 309 1.167 christos daddr_t *blknop) 310 1.1 mycroft { 311 1.101 ad struct ufsmount *ump; 312 1.62 fvdl struct fs *fs; 313 1.1 mycroft struct buf *bp; 314 1.172 chs u_int cg, request; 315 1.172 chs int error; 316 1.167 christos daddr_t bno; 317 1.25 thorpej 318 1.62 fvdl fs = ip->i_fs; 319 1.101 ad ump = ip->i_ump; 320 1.101 ad 321 1.101 ad KASSERT(mutex_owned(&ump->um_lock)); 322 1.101 ad 323 1.37 chs #ifdef UVM_PAGE_TRKOWN 324 1.129 chs 325 1.129 chs /* 326 1.129 chs * Sanity-check that allocations within the file size 327 1.129 chs * do not allow other threads to read the stale contents 328 1.129 chs * of newly allocated blocks. 329 1.129 chs * Unlike in ffs_alloc(), here pages must always exist 330 1.129 chs * for such allocations, because only the last block of a file 331 1.129 chs * can be a fragment and ffs_write() will reallocate the 332 1.129 chs * fragment to the new size using ufs_balloc_range(), 333 1.129 chs * which always creates pages to cover blocks it allocates. 334 1.129 chs */ 335 1.129 chs 336 1.37 chs if (ITOV(ip)->v_type == VREG) { 337 1.165 riastrad struct vm_page *pg __diagused; 338 1.51 chs struct uvm_object *uobj = &ITOV(ip)->v_uobj; 339 1.137 dholland voff_t off = trunc_page(ffs_lblktosize(fs, lbprev)); 340 1.137 dholland voff_t endoff = round_page(ffs_lblktosize(fs, lbprev) + osize); 341 1.37 chs 342 1.166 ad rw_enter(uobj->vmobjlock, RW_WRITER); 343 1.37 chs while (off < endoff) { 344 1.37 chs pg = uvm_pagelookup(uobj, off); 345 1.129 chs KASSERT(pg->owner == curproc->p_pid && 346 1.129 chs pg->lowner == curlwp->l_lid); 347 1.37 chs off += PAGE_SIZE; 348 1.37 chs } 349 1.166 ad rw_exit(uobj->vmobjlock); 350 1.37 chs } 351 1.37 chs #endif 352 1.37 chs 353 1.156 riastrad KASSERTMSG((cred != NOCRED), "missing credential"); 354 1.156 riastrad KASSERTMSG(((u_int)osize <= fs->fs_bsize), 355 1.156 riastrad "bad size: dev=0x%llx, bsize=%d, osize=%d, nsize=%d, fs=%s", 356 1.156 riastrad (unsigned long long)ip->i_dev, fs->fs_bsize, osize, nsize, 357 1.156 riastrad fs->fs_fsmnt); 358 1.156 riastrad KASSERTMSG((ffs_fragoff(fs, osize) == 0), 359 1.156 riastrad "bad size: dev=0x%llx, bsize=%d, osize=%d, nsize=%d, fs=%s", 360 1.156 riastrad (unsigned long long)ip->i_dev, fs->fs_bsize, osize, nsize, 361 1.156 riastrad fs->fs_fsmnt); 362 1.156 riastrad KASSERTMSG(((u_int)nsize <= fs->fs_bsize), 363 1.156 riastrad "bad size: dev=0x%llx, bsize=%d, osize=%d, nsize=%d, fs=%s", 364 1.156 riastrad (unsigned long long)ip->i_dev, fs->fs_bsize, osize, nsize, 365 1.156 riastrad fs->fs_fsmnt); 366 1.156 riastrad KASSERTMSG((ffs_fragoff(fs, nsize) == 0), 367 1.156 riastrad "bad size: dev=0x%llx, bsize=%d, osize=%d, nsize=%d, fs=%s", 368 1.156 riastrad (unsigned long long)ip->i_dev, fs->fs_bsize, osize, nsize, 369 1.156 riastrad fs->fs_fsmnt); 370 1.156 riastrad 371 1.99 pooka if (freespace(fs, fs->fs_minfree) <= 0 && 372 1.124 elad kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL, 373 1.124 elad NULL, NULL) != 0) { 374 1.101 ad mutex_exit(&ump->um_lock); 375 1.1 mycroft goto nospace; 376 1.101 ad } 377 1.60 fvdl 378 1.60 fvdl if (bprev == 0) { 379 1.154 christos panic("%s: bad bprev: dev = 0x%llx, bsize = %d, bprev = %" 380 1.154 christos PRId64 ", fs = %s", __func__, 381 1.120 christos (unsigned long long)ip->i_dev, fs->fs_bsize, bprev, 382 1.120 christos fs->fs_fsmnt); 383 1.1 mycroft } 384 1.101 ad mutex_exit(&ump->um_lock); 385 1.101 ad 386 1.1 mycroft /* 387 1.1 mycroft * Allocate the extra space in the buffer. 388 1.1 mycroft */ 389 1.37 chs if (bpp != NULL && 390 1.149 maxv (error = bread(ITOV(ip), lbprev, osize, 0, &bp)) != 0) { 391 1.1 mycroft return (error); 392 1.1 mycroft } 393 1.127 bouyer #if defined(QUOTA) || defined(QUOTA2) 394 1.60 fvdl if ((error = chkdq(ip, btodb(nsize - osize), cred, 0)) != 0) { 395 1.44 chs if (bpp != NULL) { 396 1.101 ad brelse(bp, 0); 397 1.44 chs } 398 1.1 mycroft return (error); 399 1.1 mycroft } 400 1.1 mycroft #endif 401 1.1 mycroft /* 402 1.1 mycroft * Check for extension in the existing location. 403 1.1 mycroft */ 404 1.1 mycroft cg = dtog(fs, bprev); 405 1.101 ad mutex_enter(&ump->um_lock); 406 1.60 fvdl if ((bno = ffs_fragextend(ip, cg, bprev, osize, nsize)) != 0) { 407 1.65 kristerw DIP_ADD(ip, blocks, btodb(nsize - osize)); 408 1.167 christos if (flags & IO_EXT) 409 1.167 christos ip->i_flag |= IN_CHANGE; 410 1.167 christos else 411 1.167 christos ip->i_flag |= IN_CHANGE | IN_UPDATE; 412 1.37 chs 413 1.37 chs if (bpp != NULL) { 414 1.154 christos if (bp->b_blkno != FFS_FSBTODB(fs, bno)) { 415 1.154 christos panic("%s: bad blockno %#llx != %#llx", 416 1.154 christos __func__, (unsigned long long) bp->b_blkno, 417 1.154 christos (unsigned long long)FFS_FSBTODB(fs, bno)); 418 1.154 christos } 419 1.72 pk allocbuf(bp, nsize, 1); 420 1.98 christos memset((char *)bp->b_data + osize, 0, nsize - osize); 421 1.105 ad mutex_enter(bp->b_objlock); 422 1.109 ad KASSERT(!cv_has_waiters(&bp->b_done)); 423 1.105 ad bp->b_oflags |= BO_DONE; 424 1.105 ad mutex_exit(bp->b_objlock); 425 1.37 chs *bpp = bp; 426 1.37 chs } 427 1.37 chs if (blknop != NULL) { 428 1.37 chs *blknop = bno; 429 1.37 chs } 430 1.1 mycroft return (0); 431 1.1 mycroft } 432 1.1 mycroft /* 433 1.1 mycroft * Allocate a new disk location. 434 1.1 mycroft */ 435 1.1 mycroft if (bpref >= fs->fs_size) 436 1.1 mycroft bpref = 0; 437 1.1 mycroft switch ((int)fs->fs_optim) { 438 1.1 mycroft case FS_OPTSPACE: 439 1.1 mycroft /* 440 1.81 perry * Allocate an exact sized fragment. Although this makes 441 1.81 perry * best use of space, we will waste time relocating it if 442 1.1 mycroft * the file continues to grow. If the fragmentation is 443 1.1 mycroft * less than half of the minimum free reserve, we choose 444 1.1 mycroft * to begin optimizing for time. 445 1.1 mycroft */ 446 1.1 mycroft request = nsize; 447 1.1 mycroft if (fs->fs_minfree < 5 || 448 1.1 mycroft fs->fs_cstotal.cs_nffree > 449 1.1 mycroft fs->fs_dsize * fs->fs_minfree / (2 * 100)) 450 1.1 mycroft break; 451 1.34 jdolecek 452 1.34 jdolecek if (ffs_log_changeopt) { 453 1.34 jdolecek log(LOG_NOTICE, 454 1.34 jdolecek "%s: optimization changed from SPACE to TIME\n", 455 1.34 jdolecek fs->fs_fsmnt); 456 1.34 jdolecek } 457 1.34 jdolecek 458 1.1 mycroft fs->fs_optim = FS_OPTTIME; 459 1.1 mycroft break; 460 1.1 mycroft case FS_OPTTIME: 461 1.1 mycroft /* 462 1.1 mycroft * At this point we have discovered a file that is trying to 463 1.1 mycroft * grow a small fragment to a larger fragment. To save time, 464 1.1 mycroft * we allocate a full sized block, then free the unused portion. 465 1.1 mycroft * If the file continues to grow, the `ffs_fragextend' call 466 1.1 mycroft * above will be able to grow it in place without further 467 1.1 mycroft * copying. If aberrant programs cause disk fragmentation to 468 1.1 mycroft * grow within 2% of the free reserve, we choose to begin 469 1.1 mycroft * optimizing for space. 470 1.1 mycroft */ 471 1.1 mycroft request = fs->fs_bsize; 472 1.1 mycroft if (fs->fs_cstotal.cs_nffree < 473 1.1 mycroft fs->fs_dsize * (fs->fs_minfree - 2) / 100) 474 1.1 mycroft break; 475 1.34 jdolecek 476 1.34 jdolecek if (ffs_log_changeopt) { 477 1.34 jdolecek log(LOG_NOTICE, 478 1.34 jdolecek "%s: optimization changed from TIME to SPACE\n", 479 1.34 jdolecek fs->fs_fsmnt); 480 1.34 jdolecek } 481 1.34 jdolecek 482 1.1 mycroft fs->fs_optim = FS_OPTSPACE; 483 1.1 mycroft break; 484 1.1 mycroft default: 485 1.154 christos panic("%s: bad optim: dev = 0x%llx, optim = %d, fs = %s", 486 1.154 christos __func__, (unsigned long long)ip->i_dev, fs->fs_optim, 487 1.154 christos fs->fs_fsmnt); 488 1.1 mycroft /* NOTREACHED */ 489 1.1 mycroft } 490 1.152 jdolecek bno = ffs_hashalloc(ip, cg, bpref, request, nsize, 0, ffs_alloccg); 491 1.1 mycroft if (bno > 0) { 492 1.153 jdolecek /* 493 1.153 jdolecek * Use forced deallocation registration, we can't handle 494 1.153 jdolecek * failure here. This is safe, as this place is ever hit 495 1.153 jdolecek * maximum once per write operation, when fragment is extended 496 1.153 jdolecek * to longer fragment, or a full block. 497 1.153 jdolecek */ 498 1.122 ad if ((ip->i_ump->um_mountp->mnt_wapbl) && 499 1.122 ad (ITOV(ip)->v_type != VREG)) { 500 1.153 jdolecek /* this should never fail */ 501 1.153 jdolecek error = UFS_WAPBL_REGISTER_DEALLOCATION_FORCE( 502 1.136 dholland ip->i_ump->um_mountp, FFS_FSBTODB(fs, bprev), 503 1.122 ad osize); 504 1.153 jdolecek if (error) 505 1.153 jdolecek panic("ffs_realloccg: dealloc registration failed"); 506 1.122 ad } else { 507 1.122 ad ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize, 508 1.122 ad ip->i_number); 509 1.111 simonb } 510 1.65 kristerw DIP_ADD(ip, blocks, btodb(nsize - osize)); 511 1.167 christos if (flags & IO_EXT) 512 1.167 christos ip->i_flag |= IN_CHANGE; 513 1.167 christos else 514 1.167 christos ip->i_flag |= IN_CHANGE | IN_UPDATE; 515 1.37 chs if (bpp != NULL) { 516 1.136 dholland bp->b_blkno = FFS_FSBTODB(fs, bno); 517 1.72 pk allocbuf(bp, nsize, 1); 518 1.98 christos memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize); 519 1.105 ad mutex_enter(bp->b_objlock); 520 1.109 ad KASSERT(!cv_has_waiters(&bp->b_done)); 521 1.105 ad bp->b_oflags |= BO_DONE; 522 1.105 ad mutex_exit(bp->b_objlock); 523 1.37 chs *bpp = bp; 524 1.37 chs } 525 1.37 chs if (blknop != NULL) { 526 1.37 chs *blknop = bno; 527 1.37 chs } 528 1.1 mycroft return (0); 529 1.1 mycroft } 530 1.101 ad mutex_exit(&ump->um_lock); 531 1.101 ad 532 1.127 bouyer #if defined(QUOTA) || defined(QUOTA2) 533 1.1 mycroft /* 534 1.1 mycroft * Restore user's disk quota because allocation failed. 535 1.1 mycroft */ 536 1.60 fvdl (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); 537 1.1 mycroft #endif 538 1.37 chs if (bpp != NULL) { 539 1.101 ad brelse(bp, 0); 540 1.37 chs } 541 1.37 chs 542 1.1 mycroft nospace: 543 1.1 mycroft /* 544 1.1 mycroft * no space available 545 1.1 mycroft */ 546 1.150 mlelstv ffs_fserr(fs, cred, "file system full"); 547 1.1 mycroft uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 548 1.1 mycroft return (ENOSPC); 549 1.1 mycroft } 550 1.1 mycroft 551 1.1 mycroft /* 552 1.1 mycroft * Allocate an inode in the file system. 553 1.81 perry * 554 1.1 mycroft * If allocating a directory, use ffs_dirpref to select the inode. 555 1.1 mycroft * If allocating in a directory, the following hierarchy is followed: 556 1.1 mycroft * 1) allocate the preferred inode. 557 1.1 mycroft * 2) allocate an inode in the same cylinder group. 558 1.174 andvar * 3) quadratically rehash into other cylinder groups, until an 559 1.1 mycroft * available inode is located. 560 1.47 wiz * If no inode preference is given the following hierarchy is used 561 1.1 mycroft * to allocate an inode: 562 1.1 mycroft * 1) allocate an inode in cylinder group 0. 563 1.174 andvar * 2) quadratically rehash into other cylinder groups, until an 564 1.1 mycroft * available inode is located. 565 1.106 pooka * 566 1.106 pooka * => um_lock not held upon entry or return 567 1.1 mycroft */ 568 1.9 christos int 569 1.148 hannken ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, ino_t *inop) 570 1.9 christos { 571 1.101 ad struct ufsmount *ump; 572 1.33 augustss struct inode *pip; 573 1.33 augustss struct fs *fs; 574 1.1 mycroft ino_t ino, ipref; 575 1.172 chs u_int cg; 576 1.172 chs int error; 577 1.81 perry 578 1.163 jdolecek UFS_WAPBL_JUNLOCK_ASSERT(pvp->v_mount); 579 1.163 jdolecek 580 1.1 mycroft pip = VTOI(pvp); 581 1.1 mycroft fs = pip->i_fs; 582 1.101 ad ump = pip->i_ump; 583 1.101 ad 584 1.111 simonb error = UFS_WAPBL_BEGIN(pvp->v_mount); 585 1.111 simonb if (error) { 586 1.111 simonb return error; 587 1.111 simonb } 588 1.101 ad mutex_enter(&ump->um_lock); 589 1.1 mycroft if (fs->fs_cstotal.cs_nifree == 0) 590 1.1 mycroft goto noinodes; 591 1.1 mycroft 592 1.1 mycroft if ((mode & IFMT) == IFDIR) 593 1.50 lukem ipref = ffs_dirpref(pip); 594 1.50 lukem else 595 1.50 lukem ipref = pip->i_number; 596 1.1 mycroft if (ipref >= fs->fs_ncg * fs->fs_ipg) 597 1.1 mycroft ipref = 0; 598 1.1 mycroft cg = ino_to_cg(fs, ipref); 599 1.50 lukem /* 600 1.50 lukem * Track number of dirs created one after another 601 1.50 lukem * in a same cg without intervening by files. 602 1.50 lukem */ 603 1.50 lukem if ((mode & IFMT) == IFDIR) { 604 1.63 fvdl if (fs->fs_contigdirs[cg] < 255) 605 1.50 lukem fs->fs_contigdirs[cg]++; 606 1.50 lukem } else { 607 1.50 lukem if (fs->fs_contigdirs[cg] > 0) 608 1.50 lukem fs->fs_contigdirs[cg]--; 609 1.50 lukem } 610 1.152 jdolecek ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0, 0, ffs_nodealloccg); 611 1.1 mycroft if (ino == 0) 612 1.1 mycroft goto noinodes; 613 1.111 simonb UFS_WAPBL_END(pvp->v_mount); 614 1.148 hannken *inop = ino; 615 1.148 hannken return 0; 616 1.60 fvdl 617 1.1 mycroft noinodes: 618 1.101 ad mutex_exit(&ump->um_lock); 619 1.111 simonb UFS_WAPBL_END(pvp->v_mount); 620 1.150 mlelstv ffs_fserr(fs, cred, "out of inodes"); 621 1.1 mycroft uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 622 1.148 hannken return ENOSPC; 623 1.1 mycroft } 624 1.1 mycroft 625 1.1 mycroft /* 626 1.50 lukem * Find a cylinder group in which to place a directory. 627 1.42 sommerfe * 628 1.50 lukem * The policy implemented by this algorithm is to allocate a 629 1.50 lukem * directory inode in the same cylinder group as its parent 630 1.50 lukem * directory, but also to reserve space for its files inodes 631 1.50 lukem * and data. Restrict the number of directories which may be 632 1.50 lukem * allocated one after another in the same cylinder group 633 1.50 lukem * without intervening allocation of files. 634 1.42 sommerfe * 635 1.50 lukem * If we allocate a first level directory then force allocation 636 1.50 lukem * in another cylinder group. 637 1.1 mycroft */ 638 1.1 mycroft static ino_t 639 1.85 thorpej ffs_dirpref(struct inode *pip) 640 1.1 mycroft { 641 1.50 lukem register struct fs *fs; 642 1.172 chs u_int cg, prefcg; 643 1.172 chs uint64_t dirsize, cgsize, curdsz; 644 1.172 chs u_int avgifree, avgbfree, avgndir; 645 1.172 chs u_int minifree, minbfree, maxndir; 646 1.172 chs u_int mincg, minndir; 647 1.172 chs u_int maxcontigdirs; 648 1.50 lukem 649 1.101 ad KASSERT(mutex_owned(&pip->i_ump->um_lock)); 650 1.101 ad 651 1.50 lukem fs = pip->i_fs; 652 1.1 mycroft 653 1.1 mycroft avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 654 1.50 lukem avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 655 1.50 lukem avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 656 1.50 lukem 657 1.50 lukem /* 658 1.50 lukem * Force allocation in another cg if creating a first level dir. 659 1.50 lukem */ 660 1.102 ad if (ITOV(pip)->v_vflag & VV_ROOT) { 661 1.147 joerg prefcg = cprng_fast32() % fs->fs_ncg; 662 1.50 lukem mincg = prefcg; 663 1.50 lukem minndir = fs->fs_ipg; 664 1.50 lukem for (cg = prefcg; cg < fs->fs_ncg; cg++) 665 1.50 lukem if (fs->fs_cs(fs, cg).cs_ndir < minndir && 666 1.50 lukem fs->fs_cs(fs, cg).cs_nifree >= avgifree && 667 1.50 lukem fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 668 1.42 sommerfe mincg = cg; 669 1.50 lukem minndir = fs->fs_cs(fs, cg).cs_ndir; 670 1.42 sommerfe } 671 1.50 lukem for (cg = 0; cg < prefcg; cg++) 672 1.50 lukem if (fs->fs_cs(fs, cg).cs_ndir < minndir && 673 1.50 lukem fs->fs_cs(fs, cg).cs_nifree >= avgifree && 674 1.50 lukem fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 675 1.50 lukem mincg = cg; 676 1.50 lukem minndir = fs->fs_cs(fs, cg).cs_ndir; 677 1.42 sommerfe } 678 1.50 lukem return ((ino_t)(fs->fs_ipg * mincg)); 679 1.42 sommerfe } 680 1.50 lukem 681 1.50 lukem /* 682 1.50 lukem * Count various limits which used for 683 1.50 lukem * optimal allocation of a directory inode. 684 1.144 bad * Try cylinder groups with >75% avgifree and avgbfree. 685 1.144 bad * Avoid cylinder groups with no free blocks or inodes as that 686 1.144 bad * triggers an I/O-expensive cylinder group scan. 687 1.50 lukem */ 688 1.161 riastrad maxndir = uimin(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 689 1.144 bad minifree = avgifree - avgifree / 4; 690 1.144 bad if (minifree < 1) 691 1.144 bad minifree = 1; 692 1.144 bad minbfree = avgbfree - avgbfree / 4; 693 1.144 bad if (minbfree < 1) 694 1.144 bad minbfree = 1; 695 1.89 dsl cgsize = (int64_t)fs->fs_fsize * fs->fs_fpg; 696 1.89 dsl dirsize = (int64_t)fs->fs_avgfilesize * fs->fs_avgfpdir; 697 1.89 dsl if (avgndir != 0) { 698 1.89 dsl curdsz = (cgsize - (int64_t)avgbfree * fs->fs_bsize) / avgndir; 699 1.89 dsl if (dirsize < curdsz) 700 1.89 dsl dirsize = curdsz; 701 1.89 dsl } 702 1.89 dsl if (cgsize < dirsize * 255) 703 1.144 bad maxcontigdirs = (avgbfree * fs->fs_bsize) / dirsize; 704 1.89 dsl else 705 1.89 dsl maxcontigdirs = 255; 706 1.50 lukem if (fs->fs_avgfpdir > 0) 707 1.161 riastrad maxcontigdirs = uimin(maxcontigdirs, 708 1.50 lukem fs->fs_ipg / fs->fs_avgfpdir); 709 1.50 lukem if (maxcontigdirs == 0) 710 1.50 lukem maxcontigdirs = 1; 711 1.50 lukem 712 1.50 lukem /* 713 1.81 perry * Limit number of dirs in one cg and reserve space for 714 1.50 lukem * regular files, but only if we have no deficit in 715 1.50 lukem * inodes or space. 716 1.50 lukem */ 717 1.50 lukem prefcg = ino_to_cg(fs, pip->i_number); 718 1.50 lukem for (cg = prefcg; cg < fs->fs_ncg; cg++) 719 1.50 lukem if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 720 1.50 lukem fs->fs_cs(fs, cg).cs_nifree >= minifree && 721 1.50 lukem fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 722 1.50 lukem if (fs->fs_contigdirs[cg] < maxcontigdirs) 723 1.50 lukem return ((ino_t)(fs->fs_ipg * cg)); 724 1.50 lukem } 725 1.50 lukem for (cg = 0; cg < prefcg; cg++) 726 1.50 lukem if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 727 1.50 lukem fs->fs_cs(fs, cg).cs_nifree >= minifree && 728 1.50 lukem fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 729 1.50 lukem if (fs->fs_contigdirs[cg] < maxcontigdirs) 730 1.50 lukem return ((ino_t)(fs->fs_ipg * cg)); 731 1.50 lukem } 732 1.50 lukem /* 733 1.50 lukem * This is a backstop when we are deficient in space. 734 1.50 lukem */ 735 1.50 lukem for (cg = prefcg; cg < fs->fs_ncg; cg++) 736 1.50 lukem if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 737 1.50 lukem return ((ino_t)(fs->fs_ipg * cg)); 738 1.50 lukem for (cg = 0; cg < prefcg; cg++) 739 1.50 lukem if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 740 1.50 lukem break; 741 1.50 lukem return ((ino_t)(fs->fs_ipg * cg)); 742 1.1 mycroft } 743 1.1 mycroft 744 1.1 mycroft /* 745 1.1 mycroft * Select the desired position for the next block in a file. The file is 746 1.1 mycroft * logically divided into sections. The first section is composed of the 747 1.1 mycroft * direct blocks. Each additional section contains fs_maxbpg blocks. 748 1.81 perry * 749 1.1 mycroft * If no blocks have been allocated in the first section, the policy is to 750 1.1 mycroft * request a block in the same cylinder group as the inode that describes 751 1.1 mycroft * the file. If no blocks have been allocated in any other section, the 752 1.1 mycroft * policy is to place the section in a cylinder group with a greater than 753 1.1 mycroft * average number of free blocks. An appropriate cylinder group is found 754 1.1 mycroft * by using a rotor that sweeps the cylinder groups. When a new group of 755 1.1 mycroft * blocks is needed, the sweep begins in the cylinder group following the 756 1.1 mycroft * cylinder group from which the previous allocation was made. The sweep 757 1.1 mycroft * continues until a cylinder group with greater than the average number 758 1.1 mycroft * of free blocks is found. If the allocation is for the first block in an 759 1.1 mycroft * indirect block, the information on the previous allocation is unavailable; 760 1.1 mycroft * here a best guess is made based upon the logical block number being 761 1.1 mycroft * allocated. 762 1.81 perry * 763 1.1 mycroft * If a section is already partially allocated, the policy is to 764 1.1 mycroft * contiguously allocate fs_maxcontig blocks. The end of one of these 765 1.60 fvdl * contiguous blocks and the beginning of the next is laid out 766 1.173 msaitoh * contiguously if possible. 767 1.106 pooka * 768 1.106 pooka * => um_lock held on entry and exit 769 1.1 mycroft */ 770 1.58 fvdl daddr_t 771 1.111 simonb ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx, int flags, 772 1.85 thorpej int32_t *bap /* XXX ondisk32 */) 773 1.1 mycroft { 774 1.33 augustss struct fs *fs; 775 1.172 chs u_int cg; 776 1.172 chs u_int avgbfree, startcg; 777 1.1 mycroft 778 1.101 ad KASSERT(mutex_owned(&ip->i_ump->um_lock)); 779 1.101 ad 780 1.1 mycroft fs = ip->i_fs; 781 1.111 simonb 782 1.111 simonb /* 783 1.111 simonb * If allocating a contiguous file with B_CONTIG, use the hints 784 1.170 andvar * in the inode extensions to return the desired block. 785 1.111 simonb * 786 1.111 simonb * For metadata (indirect blocks) return the address of where 787 1.111 simonb * the first indirect block resides - we'll scan for the next 788 1.111 simonb * available slot if we need to allocate more than one indirect 789 1.111 simonb * block. For data, return the address of the actual block 790 1.111 simonb * relative to the address of the first data block. 791 1.111 simonb */ 792 1.111 simonb if (flags & B_CONTIG) { 793 1.111 simonb KASSERT(ip->i_ffs_first_data_blk != 0); 794 1.111 simonb KASSERT(ip->i_ffs_first_indir_blk != 0); 795 1.111 simonb if (flags & B_METAONLY) 796 1.111 simonb return ip->i_ffs_first_indir_blk; 797 1.111 simonb else 798 1.138 dholland return ip->i_ffs_first_data_blk + ffs_blkstofrags(fs, lbn); 799 1.111 simonb } 800 1.111 simonb 801 1.1 mycroft if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 802 1.134 dholland if (lbn < UFS_NDADDR + FFS_NINDIR(fs)) { 803 1.1 mycroft cg = ino_to_cg(fs, ip->i_number); 804 1.110 simonb return (cgbase(fs, cg) + fs->fs_frag); 805 1.1 mycroft } 806 1.1 mycroft /* 807 1.1 mycroft * Find a cylinder with greater than average number of 808 1.1 mycroft * unused data blocks. 809 1.1 mycroft */ 810 1.1 mycroft if (indx == 0 || bap[indx - 1] == 0) 811 1.1 mycroft startcg = 812 1.1 mycroft ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 813 1.1 mycroft else 814 1.19 bouyer startcg = dtog(fs, 815 1.30 fvdl ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1); 816 1.1 mycroft startcg %= fs->fs_ncg; 817 1.1 mycroft avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 818 1.1 mycroft for (cg = startcg; cg < fs->fs_ncg; cg++) 819 1.1 mycroft if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 820 1.110 simonb return (cgbase(fs, cg) + fs->fs_frag); 821 1.1 mycroft } 822 1.52 lukem for (cg = 0; cg < startcg; cg++) 823 1.1 mycroft if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 824 1.110 simonb return (cgbase(fs, cg) + fs->fs_frag); 825 1.1 mycroft } 826 1.35 thorpej return (0); 827 1.1 mycroft } 828 1.1 mycroft /* 829 1.60 fvdl * We just always try to lay things out contiguously. 830 1.60 fvdl */ 831 1.60 fvdl return ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag; 832 1.60 fvdl } 833 1.60 fvdl 834 1.60 fvdl daddr_t 835 1.111 simonb ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int flags, 836 1.111 simonb int64_t *bap) 837 1.60 fvdl { 838 1.60 fvdl struct fs *fs; 839 1.172 chs u_int cg; 840 1.172 chs u_int avgbfree, startcg; 841 1.60 fvdl 842 1.101 ad KASSERT(mutex_owned(&ip->i_ump->um_lock)); 843 1.101 ad 844 1.60 fvdl fs = ip->i_fs; 845 1.111 simonb 846 1.111 simonb /* 847 1.111 simonb * If allocating a contiguous file with B_CONTIG, use the hints 848 1.170 andvar * in the inode extensions to return the desired block. 849 1.111 simonb * 850 1.111 simonb * For metadata (indirect blocks) return the address of where 851 1.111 simonb * the first indirect block resides - we'll scan for the next 852 1.111 simonb * available slot if we need to allocate more than one indirect 853 1.111 simonb * block. For data, return the address of the actual block 854 1.111 simonb * relative to the address of the first data block. 855 1.111 simonb */ 856 1.111 simonb if (flags & B_CONTIG) { 857 1.111 simonb KASSERT(ip->i_ffs_first_data_blk != 0); 858 1.111 simonb KASSERT(ip->i_ffs_first_indir_blk != 0); 859 1.111 simonb if (flags & B_METAONLY) 860 1.111 simonb return ip->i_ffs_first_indir_blk; 861 1.111 simonb else 862 1.138 dholland return ip->i_ffs_first_data_blk + ffs_blkstofrags(fs, lbn); 863 1.111 simonb } 864 1.111 simonb 865 1.60 fvdl if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 866 1.134 dholland if (lbn < UFS_NDADDR + FFS_NINDIR(fs)) { 867 1.60 fvdl cg = ino_to_cg(fs, ip->i_number); 868 1.110 simonb return (cgbase(fs, cg) + fs->fs_frag); 869 1.60 fvdl } 870 1.1 mycroft /* 871 1.60 fvdl * Find a cylinder with greater than average number of 872 1.60 fvdl * unused data blocks. 873 1.1 mycroft */ 874 1.60 fvdl if (indx == 0 || bap[indx - 1] == 0) 875 1.60 fvdl startcg = 876 1.60 fvdl ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 877 1.60 fvdl else 878 1.60 fvdl startcg = dtog(fs, 879 1.60 fvdl ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1); 880 1.60 fvdl startcg %= fs->fs_ncg; 881 1.60 fvdl avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 882 1.60 fvdl for (cg = startcg; cg < fs->fs_ncg; cg++) 883 1.60 fvdl if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 884 1.110 simonb return (cgbase(fs, cg) + fs->fs_frag); 885 1.60 fvdl } 886 1.60 fvdl for (cg = 0; cg < startcg; cg++) 887 1.60 fvdl if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 888 1.110 simonb return (cgbase(fs, cg) + fs->fs_frag); 889 1.60 fvdl } 890 1.60 fvdl return (0); 891 1.60 fvdl } 892 1.60 fvdl /* 893 1.60 fvdl * We just always try to lay things out contiguously. 894 1.60 fvdl */ 895 1.60 fvdl return ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag; 896 1.1 mycroft } 897 1.1 mycroft 898 1.60 fvdl 899 1.1 mycroft /* 900 1.1 mycroft * Implement the cylinder overflow algorithm. 901 1.1 mycroft * 902 1.1 mycroft * The policy implemented by this algorithm is: 903 1.1 mycroft * 1) allocate the block in its requested cylinder group. 904 1.174 andvar * 2) quadratically rehash on the cylinder group number. 905 1.1 mycroft * 3) brute force search for a free block. 906 1.106 pooka * 907 1.106 pooka * => called with um_lock held 908 1.106 pooka * => returns with um_lock released on success, held on failure 909 1.106 pooka * (*allocator releases lock on success, retains lock on failure) 910 1.1 mycroft */ 911 1.1 mycroft /*VARARGS5*/ 912 1.58 fvdl static daddr_t 913 1.172 chs ffs_hashalloc(struct inode *ip, u_int cg, daddr_t pref, 914 1.85 thorpej int size /* size for data blocks, mode for inodes */, 915 1.152 jdolecek int realsize, 916 1.152 jdolecek int flags, 917 1.172 chs daddr_t (*allocator)(struct inode *, u_int, daddr_t, int, int, int)) 918 1.1 mycroft { 919 1.33 augustss struct fs *fs; 920 1.58 fvdl daddr_t result; 921 1.172 chs u_int i, icg = cg; 922 1.1 mycroft 923 1.1 mycroft fs = ip->i_fs; 924 1.1 mycroft /* 925 1.1 mycroft * 1: preferred cylinder group 926 1.1 mycroft */ 927 1.152 jdolecek result = (*allocator)(ip, cg, pref, size, realsize, flags); 928 1.1 mycroft if (result) 929 1.1 mycroft return (result); 930 1.111 simonb 931 1.111 simonb if (flags & B_CONTIG) 932 1.111 simonb return (result); 933 1.1 mycroft /* 934 1.1 mycroft * 2: quadratic rehash 935 1.1 mycroft */ 936 1.1 mycroft for (i = 1; i < fs->fs_ncg; i *= 2) { 937 1.1 mycroft cg += i; 938 1.1 mycroft if (cg >= fs->fs_ncg) 939 1.1 mycroft cg -= fs->fs_ncg; 940 1.152 jdolecek result = (*allocator)(ip, cg, 0, size, realsize, flags); 941 1.1 mycroft if (result) 942 1.1 mycroft return (result); 943 1.1 mycroft } 944 1.1 mycroft /* 945 1.1 mycroft * 3: brute force search 946 1.1 mycroft * Note that we start at i == 2, since 0 was checked initially, 947 1.1 mycroft * and 1 is always checked in the quadratic rehash. 948 1.1 mycroft */ 949 1.1 mycroft cg = (icg + 2) % fs->fs_ncg; 950 1.1 mycroft for (i = 2; i < fs->fs_ncg; i++) { 951 1.152 jdolecek result = (*allocator)(ip, cg, 0, size, realsize, flags); 952 1.1 mycroft if (result) 953 1.1 mycroft return (result); 954 1.1 mycroft cg++; 955 1.1 mycroft if (cg == fs->fs_ncg) 956 1.1 mycroft cg = 0; 957 1.1 mycroft } 958 1.35 thorpej return (0); 959 1.1 mycroft } 960 1.1 mycroft 961 1.1 mycroft /* 962 1.1 mycroft * Determine whether a fragment can be extended. 963 1.1 mycroft * 964 1.81 perry * Check to see if the necessary fragments are available, and 965 1.1 mycroft * if they are, allocate them. 966 1.106 pooka * 967 1.106 pooka * => called with um_lock held 968 1.106 pooka * => returns with um_lock released on success, held on failure 969 1.1 mycroft */ 970 1.58 fvdl static daddr_t 971 1.172 chs ffs_fragextend(struct inode *ip, u_int cg, daddr_t bprev, int osize, int nsize) 972 1.1 mycroft { 973 1.101 ad struct ufsmount *ump; 974 1.33 augustss struct fs *fs; 975 1.33 augustss struct cg *cgp; 976 1.1 mycroft struct buf *bp; 977 1.58 fvdl daddr_t bno; 978 1.1 mycroft int frags, bbase; 979 1.1 mycroft int i, error; 980 1.62 fvdl u_int8_t *blksfree; 981 1.1 mycroft 982 1.1 mycroft fs = ip->i_fs; 983 1.101 ad ump = ip->i_ump; 984 1.101 ad 985 1.101 ad KASSERT(mutex_owned(&ump->um_lock)); 986 1.101 ad 987 1.137 dholland if (fs->fs_cs(fs, cg).cs_nffree < ffs_numfrags(fs, nsize - osize)) 988 1.35 thorpej return (0); 989 1.137 dholland frags = ffs_numfrags(fs, nsize); 990 1.138 dholland bbase = ffs_fragnum(fs, bprev); 991 1.138 dholland if (bbase > ffs_fragnum(fs, (bprev + frags - 1))) { 992 1.1 mycroft /* cannot extend across a block boundary */ 993 1.35 thorpej return (0); 994 1.1 mycroft } 995 1.101 ad mutex_exit(&ump->um_lock); 996 1.136 dholland error = bread(ip->i_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)), 997 1.149 maxv (int)fs->fs_cgsize, B_MODIFY, &bp); 998 1.101 ad if (error) 999 1.101 ad goto fail; 1000 1.1 mycroft cgp = (struct cg *)bp->b_data; 1001 1.101 ad if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) 1002 1.101 ad goto fail; 1003 1.92 kardel cgp->cg_old_time = ufs_rw32(time_second, UFS_FSNEEDSWAP(fs)); 1004 1.73 dbj if ((fs->fs_magic != FS_UFS1_MAGIC) || 1005 1.73 dbj (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1006 1.92 kardel cgp->cg_time = ufs_rw64(time_second, UFS_FSNEEDSWAP(fs)); 1007 1.1 mycroft bno = dtogd(fs, bprev); 1008 1.62 fvdl blksfree = cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)); 1009 1.137 dholland for (i = ffs_numfrags(fs, osize); i < frags; i++) 1010 1.101 ad if (isclr(blksfree, bno + i)) 1011 1.101 ad goto fail; 1012 1.1 mycroft /* 1013 1.1 mycroft * the current fragment can be extended 1014 1.1 mycroft * deduct the count on fragment being extended into 1015 1.1 mycroft * increase the count on the remaining fragment (if any) 1016 1.1 mycroft * allocate the extended piece 1017 1.1 mycroft */ 1018 1.1 mycroft for (i = frags; i < fs->fs_frag - bbase; i++) 1019 1.62 fvdl if (isclr(blksfree, bno + i)) 1020 1.1 mycroft break; 1021 1.137 dholland ufs_add32(cgp->cg_frsum[i - ffs_numfrags(fs, osize)], -1, UFS_FSNEEDSWAP(fs)); 1022 1.1 mycroft if (i != frags) 1023 1.30 fvdl ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_FSNEEDSWAP(fs)); 1024 1.101 ad mutex_enter(&ump->um_lock); 1025 1.137 dholland for (i = ffs_numfrags(fs, osize); i < frags; i++) { 1026 1.62 fvdl clrbit(blksfree, bno + i); 1027 1.30 fvdl ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_FSNEEDSWAP(fs)); 1028 1.1 mycroft fs->fs_cstotal.cs_nffree--; 1029 1.1 mycroft fs->fs_cs(fs, cg).cs_nffree--; 1030 1.1 mycroft } 1031 1.1 mycroft fs->fs_fmod = 1; 1032 1.101 ad ACTIVECG_CLR(fs, cg); 1033 1.101 ad mutex_exit(&ump->um_lock); 1034 1.1 mycroft bdwrite(bp); 1035 1.1 mycroft return (bprev); 1036 1.101 ad 1037 1.101 ad fail: 1038 1.132 hannken if (bp != NULL) 1039 1.132 hannken brelse(bp, 0); 1040 1.101 ad mutex_enter(&ump->um_lock); 1041 1.101 ad return (0); 1042 1.1 mycroft } 1043 1.1 mycroft 1044 1.1 mycroft /* 1045 1.1 mycroft * Determine whether a block can be allocated. 1046 1.1 mycroft * 1047 1.1 mycroft * Check to see if a block of the appropriate size is available, 1048 1.1 mycroft * and if it is, allocate it. 1049 1.1 mycroft */ 1050 1.58 fvdl static daddr_t 1051 1.172 chs ffs_alloccg(struct inode *ip, u_int cg, daddr_t bpref, int size, int realsize, 1052 1.152 jdolecek int flags) 1053 1.1 mycroft { 1054 1.101 ad struct ufsmount *ump; 1055 1.62 fvdl struct fs *fs = ip->i_fs; 1056 1.30 fvdl struct cg *cgp; 1057 1.1 mycroft struct buf *bp; 1058 1.60 fvdl int32_t bno; 1059 1.60 fvdl daddr_t blkno; 1060 1.30 fvdl int error, frags, allocsiz, i; 1061 1.62 fvdl u_int8_t *blksfree; 1062 1.30 fvdl const int needswap = UFS_FSNEEDSWAP(fs); 1063 1.1 mycroft 1064 1.101 ad ump = ip->i_ump; 1065 1.101 ad 1066 1.101 ad KASSERT(mutex_owned(&ump->um_lock)); 1067 1.101 ad 1068 1.1 mycroft if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1069 1.35 thorpej return (0); 1070 1.101 ad mutex_exit(&ump->um_lock); 1071 1.136 dholland error = bread(ip->i_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)), 1072 1.149 maxv (int)fs->fs_cgsize, B_MODIFY, &bp); 1073 1.101 ad if (error) 1074 1.101 ad goto fail; 1075 1.1 mycroft cgp = (struct cg *)bp->b_data; 1076 1.19 bouyer if (!cg_chkmagic(cgp, needswap) || 1077 1.101 ad (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) 1078 1.101 ad goto fail; 1079 1.92 kardel cgp->cg_old_time = ufs_rw32(time_second, needswap); 1080 1.73 dbj if ((fs->fs_magic != FS_UFS1_MAGIC) || 1081 1.73 dbj (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1082 1.92 kardel cgp->cg_time = ufs_rw64(time_second, needswap); 1083 1.1 mycroft if (size == fs->fs_bsize) { 1084 1.101 ad mutex_enter(&ump->um_lock); 1085 1.152 jdolecek blkno = ffs_alloccgblk(ip, bp, bpref, realsize, flags); 1086 1.76 hannken ACTIVECG_CLR(fs, cg); 1087 1.101 ad mutex_exit(&ump->um_lock); 1088 1.152 jdolecek 1089 1.152 jdolecek /* 1090 1.152 jdolecek * If actually needed size is lower, free the extra blocks now. 1091 1.152 jdolecek * This is safe to call here, there is no outside reference 1092 1.152 jdolecek * to this block yet. It is not necessary to keep um_lock 1093 1.152 jdolecek * locked. 1094 1.152 jdolecek */ 1095 1.152 jdolecek if (realsize != 0 && realsize < size) { 1096 1.152 jdolecek ffs_blkfree_common(ip->i_ump, ip->i_fs, 1097 1.152 jdolecek ip->i_devvp->v_rdev, 1098 1.152 jdolecek bp, blkno + ffs_numfrags(fs, realsize), 1099 1.152 jdolecek (long)(size - realsize), false); 1100 1.152 jdolecek } 1101 1.152 jdolecek 1102 1.1 mycroft bdwrite(bp); 1103 1.60 fvdl return (blkno); 1104 1.1 mycroft } 1105 1.1 mycroft /* 1106 1.1 mycroft * check to see if any fragments are already available 1107 1.1 mycroft * allocsiz is the size which will be allocated, hacking 1108 1.1 mycroft * it down to a smaller size if necessary 1109 1.1 mycroft */ 1110 1.62 fvdl blksfree = cg_blksfree(cgp, needswap); 1111 1.137 dholland frags = ffs_numfrags(fs, size); 1112 1.1 mycroft for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1113 1.1 mycroft if (cgp->cg_frsum[allocsiz] != 0) 1114 1.1 mycroft break; 1115 1.1 mycroft if (allocsiz == fs->fs_frag) { 1116 1.1 mycroft /* 1117 1.81 perry * no fragments were available, so a block will be 1118 1.1 mycroft * allocated, and hacked up 1119 1.1 mycroft */ 1120 1.101 ad if (cgp->cg_cs.cs_nbfree == 0) 1121 1.101 ad goto fail; 1122 1.101 ad mutex_enter(&ump->um_lock); 1123 1.152 jdolecek blkno = ffs_alloccgblk(ip, bp, bpref, realsize, flags); 1124 1.60 fvdl bno = dtogd(fs, blkno); 1125 1.1 mycroft for (i = frags; i < fs->fs_frag; i++) 1126 1.62 fvdl setbit(blksfree, bno + i); 1127 1.1 mycroft i = fs->fs_frag - frags; 1128 1.19 bouyer ufs_add32(cgp->cg_cs.cs_nffree, i, needswap); 1129 1.1 mycroft fs->fs_cstotal.cs_nffree += i; 1130 1.30 fvdl fs->fs_cs(fs, cg).cs_nffree += i; 1131 1.1 mycroft fs->fs_fmod = 1; 1132 1.19 bouyer ufs_add32(cgp->cg_frsum[i], 1, needswap); 1133 1.76 hannken ACTIVECG_CLR(fs, cg); 1134 1.101 ad mutex_exit(&ump->um_lock); 1135 1.1 mycroft bdwrite(bp); 1136 1.60 fvdl return (blkno); 1137 1.1 mycroft } 1138 1.30 fvdl bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1139 1.30 fvdl #if 0 1140 1.30 fvdl /* 1141 1.30 fvdl * XXX fvdl mapsearch will panic, and never return -1 1142 1.58 fvdl * also: returning NULL as daddr_t ? 1143 1.30 fvdl */ 1144 1.101 ad if (bno < 0) 1145 1.101 ad goto fail; 1146 1.30 fvdl #endif 1147 1.1 mycroft for (i = 0; i < frags; i++) 1148 1.62 fvdl clrbit(blksfree, bno + i); 1149 1.101 ad mutex_enter(&ump->um_lock); 1150 1.19 bouyer ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap); 1151 1.1 mycroft fs->fs_cstotal.cs_nffree -= frags; 1152 1.1 mycroft fs->fs_cs(fs, cg).cs_nffree -= frags; 1153 1.1 mycroft fs->fs_fmod = 1; 1154 1.19 bouyer ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap); 1155 1.1 mycroft if (frags != allocsiz) 1156 1.19 bouyer ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap); 1157 1.123 sborrill blkno = cgbase(fs, cg) + bno; 1158 1.101 ad ACTIVECG_CLR(fs, cg); 1159 1.101 ad mutex_exit(&ump->um_lock); 1160 1.1 mycroft bdwrite(bp); 1161 1.30 fvdl return blkno; 1162 1.101 ad 1163 1.101 ad fail: 1164 1.132 hannken if (bp != NULL) 1165 1.132 hannken brelse(bp, 0); 1166 1.101 ad mutex_enter(&ump->um_lock); 1167 1.101 ad return (0); 1168 1.1 mycroft } 1169 1.1 mycroft 1170 1.1 mycroft /* 1171 1.1 mycroft * Allocate a block in a cylinder group. 1172 1.1 mycroft * 1173 1.1 mycroft * This algorithm implements the following policy: 1174 1.1 mycroft * 1) allocate the requested block. 1175 1.1 mycroft * 2) allocate a rotationally optimal block in the same cylinder. 1176 1.1 mycroft * 3) allocate the next available block on the block rotor for the 1177 1.1 mycroft * specified cylinder group. 1178 1.1 mycroft * Note that this routine only allocates fs_bsize blocks; these 1179 1.1 mycroft * blocks may be fragmented by the routine that allocates them. 1180 1.1 mycroft */ 1181 1.58 fvdl static daddr_t 1182 1.152 jdolecek ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref, int realsize, 1183 1.152 jdolecek int flags) 1184 1.1 mycroft { 1185 1.62 fvdl struct fs *fs = ip->i_fs; 1186 1.30 fvdl struct cg *cgp; 1187 1.123 sborrill int cg; 1188 1.60 fvdl daddr_t blkno; 1189 1.60 fvdl int32_t bno; 1190 1.60 fvdl u_int8_t *blksfree; 1191 1.30 fvdl const int needswap = UFS_FSNEEDSWAP(fs); 1192 1.1 mycroft 1193 1.141 martin KASSERT(mutex_owned(&ip->i_ump->um_lock)); 1194 1.101 ad 1195 1.30 fvdl cgp = (struct cg *)bp->b_data; 1196 1.60 fvdl blksfree = cg_blksfree(cgp, needswap); 1197 1.30 fvdl if (bpref == 0 || dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) { 1198 1.19 bouyer bpref = ufs_rw32(cgp->cg_rotor, needswap); 1199 1.60 fvdl } else { 1200 1.138 dholland bpref = ffs_blknum(fs, bpref); 1201 1.60 fvdl bno = dtogd(fs, bpref); 1202 1.1 mycroft /* 1203 1.60 fvdl * if the requested block is available, use it 1204 1.1 mycroft */ 1205 1.138 dholland if (ffs_isblock(fs, blksfree, ffs_fragstoblks(fs, bno))) 1206 1.60 fvdl goto gotit; 1207 1.111 simonb /* 1208 1.111 simonb * if the requested data block isn't available and we are 1209 1.111 simonb * trying to allocate a contiguous file, return an error. 1210 1.111 simonb */ 1211 1.111 simonb if ((flags & (B_CONTIG | B_METAONLY)) == B_CONTIG) 1212 1.111 simonb return (0); 1213 1.1 mycroft } 1214 1.111 simonb 1215 1.1 mycroft /* 1216 1.60 fvdl * Take the next available block in this cylinder group. 1217 1.1 mycroft */ 1218 1.30 fvdl bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1219 1.152 jdolecek #if 0 1220 1.152 jdolecek /* 1221 1.152 jdolecek * XXX jdolecek ffs_mapsearch() succeeds or panics 1222 1.152 jdolecek */ 1223 1.1 mycroft if (bno < 0) 1224 1.35 thorpej return (0); 1225 1.152 jdolecek #endif 1226 1.60 fvdl cgp->cg_rotor = ufs_rw32(bno, needswap); 1227 1.1 mycroft gotit: 1228 1.138 dholland blkno = ffs_fragstoblks(fs, bno); 1229 1.60 fvdl ffs_clrblock(fs, blksfree, blkno); 1230 1.30 fvdl ffs_clusteracct(fs, cgp, blkno, -1); 1231 1.19 bouyer ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap); 1232 1.1 mycroft fs->fs_cstotal.cs_nbfree--; 1233 1.19 bouyer fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--; 1234 1.73 dbj if ((fs->fs_magic == FS_UFS1_MAGIC) && 1235 1.73 dbj ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) { 1236 1.73 dbj int cylno; 1237 1.73 dbj cylno = old_cbtocylno(fs, bno); 1238 1.75 dbj KASSERT(cylno >= 0); 1239 1.75 dbj KASSERT(cylno < fs->fs_old_ncyl); 1240 1.75 dbj KASSERT(old_cbtorpos(fs, bno) >= 0); 1241 1.75 dbj KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bno) < fs->fs_old_nrpos); 1242 1.73 dbj ufs_add16(old_cg_blks(fs, cgp, cylno, needswap)[old_cbtorpos(fs, bno)], -1, 1243 1.73 dbj needswap); 1244 1.73 dbj ufs_add32(old_cg_blktot(cgp, needswap)[cylno], -1, needswap); 1245 1.73 dbj } 1246 1.1 mycroft fs->fs_fmod = 1; 1247 1.123 sborrill cg = ufs_rw32(cgp->cg_cgx, needswap); 1248 1.123 sborrill blkno = cgbase(fs, cg) + bno; 1249 1.30 fvdl return (blkno); 1250 1.1 mycroft } 1251 1.1 mycroft 1252 1.1 mycroft /* 1253 1.1 mycroft * Determine whether an inode can be allocated. 1254 1.1 mycroft * 1255 1.1 mycroft * Check to see if an inode is available, and if it is, 1256 1.1 mycroft * allocate it using the following policy: 1257 1.1 mycroft * 1) allocate the requested inode. 1258 1.1 mycroft * 2) allocate the next available inode after the requested 1259 1.1 mycroft * inode in the specified cylinder group. 1260 1.1 mycroft */ 1261 1.58 fvdl static daddr_t 1262 1.172 chs ffs_nodealloccg(struct inode *ip, u_int cg, daddr_t ipref, int mode, int realsize, 1263 1.152 jdolecek int flags) 1264 1.1 mycroft { 1265 1.101 ad struct ufsmount *ump = ip->i_ump; 1266 1.62 fvdl struct fs *fs = ip->i_fs; 1267 1.33 augustss struct cg *cgp; 1268 1.60 fvdl struct buf *bp, *ibp; 1269 1.60 fvdl u_int8_t *inosused; 1270 1.1 mycroft int error, start, len, loc, map, i; 1271 1.164 kardel int32_t initediblk, maxiblk, irotor; 1272 1.112 hannken daddr_t nalloc; 1273 1.60 fvdl struct ufs2_dinode *dp2; 1274 1.30 fvdl const int needswap = UFS_FSNEEDSWAP(fs); 1275 1.1 mycroft 1276 1.101 ad KASSERT(mutex_owned(&ump->um_lock)); 1277 1.111 simonb UFS_WAPBL_JLOCK_ASSERT(ip->i_ump->um_mountp); 1278 1.101 ad 1279 1.1 mycroft if (fs->fs_cs(fs, cg).cs_nifree == 0) 1280 1.35 thorpej return (0); 1281 1.101 ad mutex_exit(&ump->um_lock); 1282 1.112 hannken ibp = NULL; 1283 1.164 kardel if (fs->fs_magic == FS_UFS2_MAGIC) { 1284 1.164 kardel initediblk = -1; 1285 1.164 kardel } else { 1286 1.164 kardel initediblk = fs->fs_ipg; 1287 1.164 kardel } 1288 1.164 kardel maxiblk = initediblk; 1289 1.164 kardel 1290 1.112 hannken retry: 1291 1.136 dholland error = bread(ip->i_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)), 1292 1.149 maxv (int)fs->fs_cgsize, B_MODIFY, &bp); 1293 1.101 ad if (error) 1294 1.101 ad goto fail; 1295 1.1 mycroft cgp = (struct cg *)bp->b_data; 1296 1.101 ad if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0) 1297 1.101 ad goto fail; 1298 1.112 hannken 1299 1.112 hannken if (ibp != NULL && 1300 1.112 hannken initediblk != ufs_rw32(cgp->cg_initediblk, needswap)) { 1301 1.112 hannken /* Another thread allocated more inodes so we retry the test. */ 1302 1.121 ad brelse(ibp, 0); 1303 1.112 hannken ibp = NULL; 1304 1.112 hannken } 1305 1.112 hannken /* 1306 1.112 hannken * Check to see if we need to initialize more inodes. 1307 1.112 hannken */ 1308 1.112 hannken if (fs->fs_magic == FS_UFS2_MAGIC && ibp == NULL) { 1309 1.164 kardel initediblk = ufs_rw32(cgp->cg_initediblk, needswap); 1310 1.164 kardel maxiblk = initediblk; 1311 1.112 hannken nalloc = fs->fs_ipg - ufs_rw32(cgp->cg_cs.cs_nifree, needswap); 1312 1.134 dholland if (nalloc + FFS_INOPB(fs) > initediblk && 1313 1.112 hannken initediblk < ufs_rw32(cgp->cg_niblk, needswap)) { 1314 1.112 hannken /* 1315 1.112 hannken * We have to release the cg buffer here to prevent 1316 1.112 hannken * a deadlock when reading the inode block will 1317 1.112 hannken * run a copy-on-write that might use this cg. 1318 1.112 hannken */ 1319 1.112 hannken brelse(bp, 0); 1320 1.112 hannken bp = NULL; 1321 1.136 dholland error = ffs_getblk(ip->i_devvp, FFS_FSBTODB(fs, 1322 1.112 hannken ino_to_fsba(fs, cg * fs->fs_ipg + initediblk)), 1323 1.112 hannken FFS_NOBLK, fs->fs_bsize, false, &ibp); 1324 1.112 hannken if (error) 1325 1.112 hannken goto fail; 1326 1.164 kardel 1327 1.164 kardel maxiblk += FFS_INOPB(fs); 1328 1.164 kardel 1329 1.112 hannken goto retry; 1330 1.112 hannken } 1331 1.112 hannken } 1332 1.112 hannken 1333 1.92 kardel cgp->cg_old_time = ufs_rw32(time_second, needswap); 1334 1.73 dbj if ((fs->fs_magic != FS_UFS1_MAGIC) || 1335 1.73 dbj (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1336 1.92 kardel cgp->cg_time = ufs_rw64(time_second, needswap); 1337 1.60 fvdl inosused = cg_inosused(cgp, needswap); 1338 1.164 kardel 1339 1.1 mycroft if (ipref) { 1340 1.1 mycroft ipref %= fs->fs_ipg; 1341 1.164 kardel /* safeguard to stay in (to be) allocated range */ 1342 1.164 kardel if (ipref < maxiblk && isclr(inosused, ipref)) 1343 1.1 mycroft goto gotit; 1344 1.1 mycroft } 1345 1.164 kardel 1346 1.164 kardel irotor = ufs_rw32(cgp->cg_irotor, needswap); 1347 1.164 kardel 1348 1.164 kardel KASSERTMSG(irotor < initediblk, "%s: allocation botch: cg=%d, irotor %d" 1349 1.164 kardel " out of bounds, initediblk=%d", 1350 1.164 kardel __func__, cg, irotor, initediblk); 1351 1.164 kardel 1352 1.164 kardel start = irotor / NBBY; 1353 1.164 kardel len = howmany(maxiblk - irotor, NBBY); 1354 1.60 fvdl loc = skpc(0xff, len, &inosused[start]); 1355 1.1 mycroft if (loc == 0) { 1356 1.1 mycroft len = start + 1; 1357 1.1 mycroft start = 0; 1358 1.60 fvdl loc = skpc(0xff, len, &inosused[0]); 1359 1.1 mycroft if (loc == 0) { 1360 1.154 christos panic("%s: map corrupted: cg=%d, irotor=%d, fs=%s", 1361 1.154 christos __func__, cg, ufs_rw32(cgp->cg_irotor, needswap), 1362 1.154 christos fs->fs_fsmnt); 1363 1.1 mycroft /* NOTREACHED */ 1364 1.1 mycroft } 1365 1.1 mycroft } 1366 1.1 mycroft i = start + len - loc; 1367 1.126 rmind map = inosused[i] ^ 0xff; 1368 1.126 rmind if (map == 0) { 1369 1.154 christos panic("%s: block not in map: fs=%s", __func__, fs->fs_fsmnt); 1370 1.1 mycroft } 1371 1.164 kardel 1372 1.126 rmind ipref = i * NBBY + ffs(map) - 1; 1373 1.164 kardel 1374 1.126 rmind cgp->cg_irotor = ufs_rw32(ipref, needswap); 1375 1.164 kardel 1376 1.1 mycroft gotit: 1377 1.164 kardel KASSERTMSG(ipref < maxiblk, "%s: allocation botch: cg=%d attempt to " 1378 1.164 kardel "allocate inode index %d beyond max allocated index %d" 1379 1.164 kardel " of %d inodes/cg", 1380 1.164 kardel __func__, cg, (int)ipref, maxiblk, cgp->cg_niblk); 1381 1.164 kardel 1382 1.111 simonb UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, cg * fs->fs_ipg + ipref, 1383 1.111 simonb mode); 1384 1.60 fvdl /* 1385 1.60 fvdl * Check to see if we need to initialize more inodes. 1386 1.60 fvdl */ 1387 1.112 hannken if (ibp != NULL) { 1388 1.112 hannken KASSERT(initediblk == ufs_rw32(cgp->cg_initediblk, needswap)); 1389 1.108 hannken memset(ibp->b_data, 0, fs->fs_bsize); 1390 1.108 hannken dp2 = (struct ufs2_dinode *)(ibp->b_data); 1391 1.134 dholland for (i = 0; i < FFS_INOPB(fs); i++) { 1392 1.60 fvdl /* 1393 1.60 fvdl * Don't bother to swap, it's supposed to be 1394 1.60 fvdl * random, after all. 1395 1.60 fvdl */ 1396 1.130 tls dp2->di_gen = (cprng_fast32() & INT32_MAX) / 2 + 1; 1397 1.60 fvdl dp2++; 1398 1.60 fvdl } 1399 1.134 dholland initediblk += FFS_INOPB(fs); 1400 1.60 fvdl cgp->cg_initediblk = ufs_rw32(initediblk, needswap); 1401 1.60 fvdl } 1402 1.60 fvdl 1403 1.101 ad mutex_enter(&ump->um_lock); 1404 1.76 hannken ACTIVECG_CLR(fs, cg); 1405 1.101 ad setbit(inosused, ipref); 1406 1.101 ad ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap); 1407 1.101 ad fs->fs_cstotal.cs_nifree--; 1408 1.101 ad fs->fs_cs(fs, cg).cs_nifree--; 1409 1.101 ad fs->fs_fmod = 1; 1410 1.101 ad if ((mode & IFMT) == IFDIR) { 1411 1.101 ad ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap); 1412 1.101 ad fs->fs_cstotal.cs_ndir++; 1413 1.101 ad fs->fs_cs(fs, cg).cs_ndir++; 1414 1.101 ad } 1415 1.101 ad mutex_exit(&ump->um_lock); 1416 1.112 hannken if (ibp != NULL) { 1417 1.157 hannken bwrite(ibp); 1418 1.112 hannken bwrite(bp); 1419 1.112 hannken } else 1420 1.112 hannken bdwrite(bp); 1421 1.172 chs return ((ino_t)(cg * fs->fs_ipg + ipref)); 1422 1.101 ad fail: 1423 1.112 hannken if (bp != NULL) 1424 1.112 hannken brelse(bp, 0); 1425 1.112 hannken if (ibp != NULL) 1426 1.121 ad brelse(ibp, 0); 1427 1.101 ad mutex_enter(&ump->um_lock); 1428 1.101 ad return (0); 1429 1.1 mycroft } 1430 1.1 mycroft 1431 1.1 mycroft /* 1432 1.111 simonb * Allocate a block or fragment. 1433 1.111 simonb * 1434 1.111 simonb * The specified block or fragment is removed from the 1435 1.111 simonb * free map, possibly fragmenting a block in the process. 1436 1.111 simonb * 1437 1.111 simonb * This implementation should mirror fs_blkfree 1438 1.111 simonb * 1439 1.111 simonb * => um_lock not held on entry or exit 1440 1.111 simonb */ 1441 1.111 simonb int 1442 1.111 simonb ffs_blkalloc(struct inode *ip, daddr_t bno, long size) 1443 1.111 simonb { 1444 1.116 joerg int error; 1445 1.111 simonb 1446 1.116 joerg error = ffs_check_bad_allocation(__func__, ip->i_fs, bno, size, 1447 1.116 joerg ip->i_dev, ip->i_uid); 1448 1.116 joerg if (error) 1449 1.116 joerg return error; 1450 1.115 joerg 1451 1.115 joerg return ffs_blkalloc_ump(ip->i_ump, bno, size); 1452 1.115 joerg } 1453 1.115 joerg 1454 1.115 joerg int 1455 1.115 joerg ffs_blkalloc_ump(struct ufsmount *ump, daddr_t bno, long size) 1456 1.115 joerg { 1457 1.115 joerg struct fs *fs = ump->um_fs; 1458 1.115 joerg struct cg *cgp; 1459 1.115 joerg struct buf *bp; 1460 1.115 joerg int32_t fragno, cgbno; 1461 1.172 chs int i, error, blk, frags, bbase; 1462 1.172 chs u_int cg; 1463 1.115 joerg u_int8_t *blksfree; 1464 1.115 joerg const int needswap = UFS_FSNEEDSWAP(fs); 1465 1.115 joerg 1466 1.134 dholland KASSERT((u_int)size <= fs->fs_bsize && ffs_fragoff(fs, size) == 0 && 1467 1.138 dholland ffs_fragnum(fs, bno) + ffs_numfrags(fs, size) <= fs->fs_frag); 1468 1.115 joerg KASSERT(bno < fs->fs_size); 1469 1.115 joerg 1470 1.115 joerg cg = dtog(fs, bno); 1471 1.136 dholland error = bread(ump->um_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)), 1472 1.149 maxv (int)fs->fs_cgsize, B_MODIFY, &bp); 1473 1.111 simonb if (error) { 1474 1.111 simonb return error; 1475 1.111 simonb } 1476 1.111 simonb cgp = (struct cg *)bp->b_data; 1477 1.111 simonb if (!cg_chkmagic(cgp, needswap)) { 1478 1.111 simonb brelse(bp, 0); 1479 1.111 simonb return EIO; 1480 1.111 simonb } 1481 1.111 simonb cgp->cg_old_time = ufs_rw32(time_second, needswap); 1482 1.111 simonb cgp->cg_time = ufs_rw64(time_second, needswap); 1483 1.111 simonb cgbno = dtogd(fs, bno); 1484 1.111 simonb blksfree = cg_blksfree(cgp, needswap); 1485 1.111 simonb 1486 1.111 simonb mutex_enter(&ump->um_lock); 1487 1.111 simonb if (size == fs->fs_bsize) { 1488 1.138 dholland fragno = ffs_fragstoblks(fs, cgbno); 1489 1.111 simonb if (!ffs_isblock(fs, blksfree, fragno)) { 1490 1.111 simonb mutex_exit(&ump->um_lock); 1491 1.111 simonb brelse(bp, 0); 1492 1.111 simonb return EBUSY; 1493 1.111 simonb } 1494 1.111 simonb ffs_clrblock(fs, blksfree, fragno); 1495 1.111 simonb ffs_clusteracct(fs, cgp, fragno, -1); 1496 1.111 simonb ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap); 1497 1.111 simonb fs->fs_cstotal.cs_nbfree--; 1498 1.111 simonb fs->fs_cs(fs, cg).cs_nbfree--; 1499 1.111 simonb } else { 1500 1.138 dholland bbase = cgbno - ffs_fragnum(fs, cgbno); 1501 1.111 simonb 1502 1.137 dholland frags = ffs_numfrags(fs, size); 1503 1.111 simonb for (i = 0; i < frags; i++) { 1504 1.111 simonb if (isclr(blksfree, cgbno + i)) { 1505 1.111 simonb mutex_exit(&ump->um_lock); 1506 1.111 simonb brelse(bp, 0); 1507 1.111 simonb return EBUSY; 1508 1.111 simonb } 1509 1.111 simonb } 1510 1.111 simonb /* 1511 1.111 simonb * if a complete block is being split, account for it 1512 1.111 simonb */ 1513 1.138 dholland fragno = ffs_fragstoblks(fs, bbase); 1514 1.111 simonb if (ffs_isblock(fs, blksfree, fragno)) { 1515 1.111 simonb ufs_add32(cgp->cg_cs.cs_nffree, fs->fs_frag, needswap); 1516 1.111 simonb fs->fs_cstotal.cs_nffree += fs->fs_frag; 1517 1.111 simonb fs->fs_cs(fs, cg).cs_nffree += fs->fs_frag; 1518 1.111 simonb ffs_clusteracct(fs, cgp, fragno, -1); 1519 1.111 simonb ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap); 1520 1.111 simonb fs->fs_cstotal.cs_nbfree--; 1521 1.111 simonb fs->fs_cs(fs, cg).cs_nbfree--; 1522 1.111 simonb } 1523 1.111 simonb /* 1524 1.111 simonb * decrement the counts associated with the old frags 1525 1.111 simonb */ 1526 1.111 simonb blk = blkmap(fs, blksfree, bbase); 1527 1.111 simonb ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap); 1528 1.111 simonb /* 1529 1.111 simonb * allocate the fragment 1530 1.111 simonb */ 1531 1.111 simonb for (i = 0; i < frags; i++) { 1532 1.111 simonb clrbit(blksfree, cgbno + i); 1533 1.111 simonb } 1534 1.111 simonb ufs_add32(cgp->cg_cs.cs_nffree, -i, needswap); 1535 1.111 simonb fs->fs_cstotal.cs_nffree -= i; 1536 1.111 simonb fs->fs_cs(fs, cg).cs_nffree -= i; 1537 1.111 simonb /* 1538 1.111 simonb * add back in counts associated with the new frags 1539 1.111 simonb */ 1540 1.111 simonb blk = blkmap(fs, blksfree, bbase); 1541 1.111 simonb ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap); 1542 1.111 simonb } 1543 1.111 simonb fs->fs_fmod = 1; 1544 1.111 simonb ACTIVECG_CLR(fs, cg); 1545 1.111 simonb mutex_exit(&ump->um_lock); 1546 1.111 simonb bdwrite(bp); 1547 1.111 simonb return 0; 1548 1.111 simonb } 1549 1.111 simonb 1550 1.111 simonb /* 1551 1.1 mycroft * Free a block or fragment. 1552 1.1 mycroft * 1553 1.1 mycroft * The specified block or fragment is placed back in the 1554 1.81 perry * free map. If a fragment is deallocated, a possible 1555 1.1 mycroft * block reassembly is checked. 1556 1.106 pooka * 1557 1.106 pooka * => um_lock not held on entry or exit 1558 1.1 mycroft */ 1559 1.131 drochner static void 1560 1.131 drochner ffs_blkfree_cg(struct fs *fs, struct vnode *devvp, daddr_t bno, long size) 1561 1.1 mycroft { 1562 1.33 augustss struct cg *cgp; 1563 1.1 mycroft struct buf *bp; 1564 1.76 hannken struct ufsmount *ump; 1565 1.76 hannken daddr_t cgblkno; 1566 1.172 chs int error; 1567 1.172 chs u_int cg; 1568 1.76 hannken dev_t dev; 1569 1.113 hannken const bool devvp_is_snapshot = (devvp->v_type != VBLK); 1570 1.118 joerg const int needswap = UFS_FSNEEDSWAP(fs); 1571 1.1 mycroft 1572 1.116 joerg KASSERT(!devvp_is_snapshot); 1573 1.116 joerg 1574 1.76 hannken cg = dtog(fs, bno); 1575 1.116 joerg dev = devvp->v_rdev; 1576 1.140 hannken ump = VFSTOUFS(spec_node_getmountedfs(devvp)); 1577 1.119 joerg KASSERT(fs == ump->um_fs); 1578 1.136 dholland cgblkno = FFS_FSBTODB(fs, cgtod(fs, cg)); 1579 1.116 joerg 1580 1.116 joerg error = bread(devvp, cgblkno, (int)fs->fs_cgsize, 1581 1.149 maxv B_MODIFY, &bp); 1582 1.116 joerg if (error) { 1583 1.116 joerg return; 1584 1.76 hannken } 1585 1.116 joerg cgp = (struct cg *)bp->b_data; 1586 1.116 joerg if (!cg_chkmagic(cgp, needswap)) { 1587 1.116 joerg brelse(bp, 0); 1588 1.116 joerg return; 1589 1.1 mycroft } 1590 1.76 hannken 1591 1.119 joerg ffs_blkfree_common(ump, fs, dev, bp, bno, size, devvp_is_snapshot); 1592 1.119 joerg 1593 1.119 joerg bdwrite(bp); 1594 1.116 joerg } 1595 1.116 joerg 1596 1.131 drochner struct discardopdata { 1597 1.131 drochner struct work wk; /* must be first */ 1598 1.131 drochner struct vnode *devvp; 1599 1.131 drochner daddr_t bno; 1600 1.131 drochner long size; 1601 1.131 drochner }; 1602 1.131 drochner 1603 1.131 drochner struct discarddata { 1604 1.131 drochner struct fs *fs; 1605 1.131 drochner struct discardopdata *entry; 1606 1.131 drochner long maxsize; 1607 1.131 drochner kmutex_t entrylk; 1608 1.131 drochner struct workqueue *wq; 1609 1.131 drochner int wqcnt, wqdraining; 1610 1.131 drochner kmutex_t wqlk; 1611 1.131 drochner kcondvar_t wqcv; 1612 1.131 drochner /* timer for flush? */ 1613 1.131 drochner }; 1614 1.131 drochner 1615 1.131 drochner static void 1616 1.131 drochner ffs_blkfree_td(struct fs *fs, struct discardopdata *td) 1617 1.131 drochner { 1618 1.151 riastrad struct mount *mp = spec_node_getmountedfs(td->devvp); 1619 1.131 drochner long todo; 1620 1.151 riastrad int error; 1621 1.131 drochner 1622 1.131 drochner while (td->size) { 1623 1.161 riastrad todo = uimin(td->size, 1624 1.138 dholland ffs_lfragtosize(fs, (fs->fs_frag - ffs_fragnum(fs, td->bno)))); 1625 1.151 riastrad error = UFS_WAPBL_BEGIN(mp); 1626 1.151 riastrad if (error) { 1627 1.151 riastrad printf("ffs: failed to begin wapbl transaction" 1628 1.151 riastrad " for discard: %d\n", error); 1629 1.151 riastrad break; 1630 1.151 riastrad } 1631 1.131 drochner ffs_blkfree_cg(fs, td->devvp, td->bno, todo); 1632 1.151 riastrad UFS_WAPBL_END(mp); 1633 1.137 dholland td->bno += ffs_numfrags(fs, todo); 1634 1.131 drochner td->size -= todo; 1635 1.131 drochner } 1636 1.131 drochner } 1637 1.131 drochner 1638 1.131 drochner static void 1639 1.131 drochner ffs_discardcb(struct work *wk, void *arg) 1640 1.131 drochner { 1641 1.131 drochner struct discardopdata *td = (void *)wk; 1642 1.131 drochner struct discarddata *ts = arg; 1643 1.131 drochner struct fs *fs = ts->fs; 1644 1.146 dholland off_t start, len; 1645 1.139 martin #ifdef TRIMDEBUG 1646 1.131 drochner int error; 1647 1.139 martin #endif 1648 1.131 drochner 1649 1.146 dholland /* like FSBTODB but emits bytes; XXX move to fs.h */ 1650 1.146 dholland #ifndef FFS_FSBTOBYTES 1651 1.146 dholland #define FFS_FSBTOBYTES(fs, b) ((b) << (fs)->fs_fshift) 1652 1.146 dholland #endif 1653 1.146 dholland 1654 1.146 dholland start = FFS_FSBTOBYTES(fs, td->bno); 1655 1.146 dholland len = td->size; 1656 1.171 hannken vn_lock(td->devvp, LK_EXCLUSIVE | LK_RETRY); 1657 1.139 martin #ifdef TRIMDEBUG 1658 1.139 martin error = 1659 1.139 martin #endif 1660 1.146 dholland VOP_FDISCARD(td->devvp, start, len); 1661 1.171 hannken VOP_UNLOCK(td->devvp); 1662 1.131 drochner #ifdef TRIMDEBUG 1663 1.131 drochner printf("trim(%" PRId64 ",%ld):%d\n", td->bno, td->size, error); 1664 1.131 drochner #endif 1665 1.131 drochner 1666 1.131 drochner ffs_blkfree_td(fs, td); 1667 1.131 drochner kmem_free(td, sizeof(*td)); 1668 1.131 drochner mutex_enter(&ts->wqlk); 1669 1.131 drochner ts->wqcnt--; 1670 1.131 drochner if (ts->wqdraining && !ts->wqcnt) 1671 1.131 drochner cv_signal(&ts->wqcv); 1672 1.131 drochner mutex_exit(&ts->wqlk); 1673 1.131 drochner } 1674 1.131 drochner 1675 1.131 drochner void * 1676 1.131 drochner ffs_discard_init(struct vnode *devvp, struct fs *fs) 1677 1.131 drochner { 1678 1.131 drochner struct discarddata *ts; 1679 1.131 drochner int error; 1680 1.131 drochner 1681 1.131 drochner ts = kmem_zalloc(sizeof (*ts), KM_SLEEP); 1682 1.131 drochner error = workqueue_create(&ts->wq, "trimwq", ffs_discardcb, ts, 1683 1.160 ozaki PRI_USER, IPL_NONE, 0); 1684 1.131 drochner if (error) { 1685 1.131 drochner kmem_free(ts, sizeof (*ts)); 1686 1.131 drochner return NULL; 1687 1.131 drochner } 1688 1.131 drochner mutex_init(&ts->entrylk, MUTEX_DEFAULT, IPL_NONE); 1689 1.131 drochner mutex_init(&ts->wqlk, MUTEX_DEFAULT, IPL_NONE); 1690 1.131 drochner cv_init(&ts->wqcv, "trimwqcv"); 1691 1.146 dholland ts->maxsize = 100*1024; /* XXX */ 1692 1.131 drochner ts->fs = fs; 1693 1.131 drochner return ts; 1694 1.131 drochner } 1695 1.131 drochner 1696 1.131 drochner void 1697 1.131 drochner ffs_discard_finish(void *vts, int flags) 1698 1.131 drochner { 1699 1.131 drochner struct discarddata *ts = vts; 1700 1.131 drochner struct discardopdata *td = NULL; 1701 1.131 drochner 1702 1.131 drochner /* wait for workqueue to drain */ 1703 1.131 drochner mutex_enter(&ts->wqlk); 1704 1.131 drochner if (ts->wqcnt) { 1705 1.131 drochner ts->wqdraining = 1; 1706 1.158 mlelstv cv_wait(&ts->wqcv, &ts->wqlk); 1707 1.131 drochner } 1708 1.131 drochner mutex_exit(&ts->wqlk); 1709 1.131 drochner 1710 1.131 drochner mutex_enter(&ts->entrylk); 1711 1.131 drochner if (ts->entry) { 1712 1.131 drochner td = ts->entry; 1713 1.131 drochner ts->entry = NULL; 1714 1.131 drochner } 1715 1.131 drochner mutex_exit(&ts->entrylk); 1716 1.131 drochner if (td) { 1717 1.131 drochner /* XXX don't tell disk, its optional */ 1718 1.131 drochner ffs_blkfree_td(ts->fs, td); 1719 1.131 drochner #ifdef TRIMDEBUG 1720 1.131 drochner printf("finish(%" PRId64 ",%ld)\n", td->bno, td->size); 1721 1.131 drochner #endif 1722 1.131 drochner kmem_free(td, sizeof(*td)); 1723 1.131 drochner } 1724 1.131 drochner 1725 1.131 drochner cv_destroy(&ts->wqcv); 1726 1.131 drochner mutex_destroy(&ts->entrylk); 1727 1.131 drochner mutex_destroy(&ts->wqlk); 1728 1.131 drochner workqueue_destroy(ts->wq); 1729 1.131 drochner kmem_free(ts, sizeof(*ts)); 1730 1.131 drochner } 1731 1.131 drochner 1732 1.131 drochner void 1733 1.131 drochner ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size, 1734 1.131 drochner ino_t inum) 1735 1.131 drochner { 1736 1.131 drochner struct ufsmount *ump; 1737 1.131 drochner int error; 1738 1.131 drochner dev_t dev; 1739 1.131 drochner struct discarddata *ts; 1740 1.131 drochner struct discardopdata *td; 1741 1.131 drochner 1742 1.131 drochner dev = devvp->v_rdev; 1743 1.140 hannken ump = VFSTOUFS(spec_node_getmountedfs(devvp)); 1744 1.131 drochner if (ffs_snapblkfree(fs, devvp, bno, size, inum)) 1745 1.131 drochner return; 1746 1.131 drochner 1747 1.131 drochner error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum); 1748 1.131 drochner if (error) 1749 1.131 drochner return; 1750 1.131 drochner 1751 1.131 drochner if (!ump->um_discarddata) { 1752 1.131 drochner ffs_blkfree_cg(fs, devvp, bno, size); 1753 1.131 drochner return; 1754 1.131 drochner } 1755 1.131 drochner 1756 1.131 drochner #ifdef TRIMDEBUG 1757 1.131 drochner printf("blkfree(%" PRId64 ",%ld)\n", bno, size); 1758 1.131 drochner #endif 1759 1.131 drochner ts = ump->um_discarddata; 1760 1.131 drochner td = NULL; 1761 1.131 drochner 1762 1.131 drochner mutex_enter(&ts->entrylk); 1763 1.131 drochner if (ts->entry) { 1764 1.131 drochner td = ts->entry; 1765 1.131 drochner /* ffs deallocs backwards, check for prepend only */ 1766 1.137 dholland if (td->bno == bno + ffs_numfrags(fs, size) 1767 1.131 drochner && td->size + size <= ts->maxsize) { 1768 1.131 drochner td->bno = bno; 1769 1.131 drochner td->size += size; 1770 1.131 drochner if (td->size < ts->maxsize) { 1771 1.131 drochner #ifdef TRIMDEBUG 1772 1.131 drochner printf("defer(%" PRId64 ",%ld)\n", td->bno, td->size); 1773 1.131 drochner #endif 1774 1.131 drochner mutex_exit(&ts->entrylk); 1775 1.131 drochner return; 1776 1.131 drochner } 1777 1.131 drochner size = 0; /* mark done */ 1778 1.131 drochner } 1779 1.131 drochner ts->entry = NULL; 1780 1.131 drochner } 1781 1.131 drochner mutex_exit(&ts->entrylk); 1782 1.131 drochner 1783 1.131 drochner if (td) { 1784 1.131 drochner #ifdef TRIMDEBUG 1785 1.131 drochner printf("enq old(%" PRId64 ",%ld)\n", td->bno, td->size); 1786 1.131 drochner #endif 1787 1.131 drochner mutex_enter(&ts->wqlk); 1788 1.131 drochner ts->wqcnt++; 1789 1.131 drochner mutex_exit(&ts->wqlk); 1790 1.131 drochner workqueue_enqueue(ts->wq, &td->wk, NULL); 1791 1.131 drochner } 1792 1.131 drochner if (!size) 1793 1.131 drochner return; 1794 1.131 drochner 1795 1.131 drochner td = kmem_alloc(sizeof(*td), KM_SLEEP); 1796 1.131 drochner td->devvp = devvp; 1797 1.131 drochner td->bno = bno; 1798 1.131 drochner td->size = size; 1799 1.131 drochner 1800 1.131 drochner if (td->size < ts->maxsize) { /* XXX always the case */ 1801 1.131 drochner mutex_enter(&ts->entrylk); 1802 1.131 drochner if (!ts->entry) { /* possible race? */ 1803 1.131 drochner #ifdef TRIMDEBUG 1804 1.131 drochner printf("defer(%" PRId64 ",%ld)\n", td->bno, td->size); 1805 1.131 drochner #endif 1806 1.131 drochner ts->entry = td; 1807 1.131 drochner td = NULL; 1808 1.131 drochner } 1809 1.131 drochner mutex_exit(&ts->entrylk); 1810 1.131 drochner } 1811 1.131 drochner if (td) { 1812 1.131 drochner #ifdef TRIMDEBUG 1813 1.131 drochner printf("enq new(%" PRId64 ",%ld)\n", td->bno, td->size); 1814 1.131 drochner #endif 1815 1.131 drochner mutex_enter(&ts->wqlk); 1816 1.131 drochner ts->wqcnt++; 1817 1.131 drochner mutex_exit(&ts->wqlk); 1818 1.131 drochner workqueue_enqueue(ts->wq, &td->wk, NULL); 1819 1.131 drochner } 1820 1.131 drochner } 1821 1.131 drochner 1822 1.116 joerg /* 1823 1.116 joerg * Free a block or fragment from a snapshot cg copy. 1824 1.116 joerg * 1825 1.116 joerg * The specified block or fragment is placed back in the 1826 1.116 joerg * free map. If a fragment is deallocated, a possible 1827 1.116 joerg * block reassembly is checked. 1828 1.116 joerg * 1829 1.116 joerg * => um_lock not held on entry or exit 1830 1.116 joerg */ 1831 1.116 joerg void 1832 1.116 joerg ffs_blkfree_snap(struct fs *fs, struct vnode *devvp, daddr_t bno, long size, 1833 1.116 joerg ino_t inum) 1834 1.116 joerg { 1835 1.116 joerg struct cg *cgp; 1836 1.116 joerg struct buf *bp; 1837 1.116 joerg struct ufsmount *ump; 1838 1.116 joerg daddr_t cgblkno; 1839 1.116 joerg int error, cg; 1840 1.116 joerg dev_t dev; 1841 1.116 joerg const bool devvp_is_snapshot = (devvp->v_type != VBLK); 1842 1.118 joerg const int needswap = UFS_FSNEEDSWAP(fs); 1843 1.116 joerg 1844 1.116 joerg KASSERT(devvp_is_snapshot); 1845 1.116 joerg 1846 1.116 joerg cg = dtog(fs, bno); 1847 1.116 joerg dev = VTOI(devvp)->i_devvp->v_rdev; 1848 1.116 joerg ump = VFSTOUFS(devvp->v_mount); 1849 1.138 dholland cgblkno = ffs_fragstoblks(fs, cgtod(fs, cg)); 1850 1.116 joerg 1851 1.116 joerg error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum); 1852 1.116 joerg if (error) 1853 1.1 mycroft return; 1854 1.116 joerg 1855 1.107 hannken error = bread(devvp, cgblkno, (int)fs->fs_cgsize, 1856 1.149 maxv B_MODIFY, &bp); 1857 1.1 mycroft if (error) { 1858 1.1 mycroft return; 1859 1.1 mycroft } 1860 1.1 mycroft cgp = (struct cg *)bp->b_data; 1861 1.19 bouyer if (!cg_chkmagic(cgp, needswap)) { 1862 1.101 ad brelse(bp, 0); 1863 1.1 mycroft return; 1864 1.1 mycroft } 1865 1.116 joerg 1866 1.119 joerg ffs_blkfree_common(ump, fs, dev, bp, bno, size, devvp_is_snapshot); 1867 1.119 joerg 1868 1.119 joerg bdwrite(bp); 1869 1.116 joerg } 1870 1.116 joerg 1871 1.116 joerg static void 1872 1.119 joerg ffs_blkfree_common(struct ufsmount *ump, struct fs *fs, dev_t dev, 1873 1.119 joerg struct buf *bp, daddr_t bno, long size, bool devvp_is_snapshot) 1874 1.116 joerg { 1875 1.116 joerg struct cg *cgp; 1876 1.116 joerg int32_t fragno, cgbno; 1877 1.172 chs int i, blk, frags, bbase; 1878 1.172 chs u_int cg; 1879 1.116 joerg u_int8_t *blksfree; 1880 1.116 joerg const int needswap = UFS_FSNEEDSWAP(fs); 1881 1.116 joerg 1882 1.116 joerg cg = dtog(fs, bno); 1883 1.116 joerg cgp = (struct cg *)bp->b_data; 1884 1.92 kardel cgp->cg_old_time = ufs_rw32(time_second, needswap); 1885 1.73 dbj if ((fs->fs_magic != FS_UFS1_MAGIC) || 1886 1.73 dbj (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1887 1.92 kardel cgp->cg_time = ufs_rw64(time_second, needswap); 1888 1.60 fvdl cgbno = dtogd(fs, bno); 1889 1.62 fvdl blksfree = cg_blksfree(cgp, needswap); 1890 1.101 ad mutex_enter(&ump->um_lock); 1891 1.1 mycroft if (size == fs->fs_bsize) { 1892 1.138 dholland fragno = ffs_fragstoblks(fs, cgbno); 1893 1.62 fvdl if (!ffs_isfreeblock(fs, blksfree, fragno)) { 1894 1.113 hannken if (devvp_is_snapshot) { 1895 1.101 ad mutex_exit(&ump->um_lock); 1896 1.76 hannken return; 1897 1.76 hannken } 1898 1.154 christos panic("%s: freeing free block: dev = 0x%llx, block = %" 1899 1.154 christos PRId64 ", fs = %s", __func__, 1900 1.120 christos (unsigned long long)dev, bno, fs->fs_fsmnt); 1901 1.1 mycroft } 1902 1.62 fvdl ffs_setblock(fs, blksfree, fragno); 1903 1.60 fvdl ffs_clusteracct(fs, cgp, fragno, 1); 1904 1.19 bouyer ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap); 1905 1.1 mycroft fs->fs_cstotal.cs_nbfree++; 1906 1.1 mycroft fs->fs_cs(fs, cg).cs_nbfree++; 1907 1.73 dbj if ((fs->fs_magic == FS_UFS1_MAGIC) && 1908 1.73 dbj ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) { 1909 1.73 dbj i = old_cbtocylno(fs, cgbno); 1910 1.75 dbj KASSERT(i >= 0); 1911 1.75 dbj KASSERT(i < fs->fs_old_ncyl); 1912 1.75 dbj KASSERT(old_cbtorpos(fs, cgbno) >= 0); 1913 1.75 dbj KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, cgbno) < fs->fs_old_nrpos); 1914 1.73 dbj ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, cgbno)], 1, 1915 1.73 dbj needswap); 1916 1.73 dbj ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap); 1917 1.73 dbj } 1918 1.1 mycroft } else { 1919 1.138 dholland bbase = cgbno - ffs_fragnum(fs, cgbno); 1920 1.1 mycroft /* 1921 1.1 mycroft * decrement the counts associated with the old frags 1922 1.1 mycroft */ 1923 1.62 fvdl blk = blkmap(fs, blksfree, bbase); 1924 1.19 bouyer ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap); 1925 1.1 mycroft /* 1926 1.1 mycroft * deallocate the fragment 1927 1.1 mycroft */ 1928 1.137 dholland frags = ffs_numfrags(fs, size); 1929 1.1 mycroft for (i = 0; i < frags; i++) { 1930 1.62 fvdl if (isset(blksfree, cgbno + i)) { 1931 1.154 christos panic("%s: freeing free frag: " 1932 1.154 christos "dev = 0x%llx, block = %" PRId64 1933 1.154 christos ", fs = %s", __func__, 1934 1.120 christos (unsigned long long)dev, bno + i, 1935 1.120 christos fs->fs_fsmnt); 1936 1.1 mycroft } 1937 1.62 fvdl setbit(blksfree, cgbno + i); 1938 1.1 mycroft } 1939 1.19 bouyer ufs_add32(cgp->cg_cs.cs_nffree, i, needswap); 1940 1.1 mycroft fs->fs_cstotal.cs_nffree += i; 1941 1.30 fvdl fs->fs_cs(fs, cg).cs_nffree += i; 1942 1.1 mycroft /* 1943 1.1 mycroft * add back in counts associated with the new frags 1944 1.1 mycroft */ 1945 1.62 fvdl blk = blkmap(fs, blksfree, bbase); 1946 1.19 bouyer ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap); 1947 1.1 mycroft /* 1948 1.1 mycroft * if a complete block has been reassembled, account for it 1949 1.1 mycroft */ 1950 1.138 dholland fragno = ffs_fragstoblks(fs, bbase); 1951 1.62 fvdl if (ffs_isblock(fs, blksfree, fragno)) { 1952 1.19 bouyer ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap); 1953 1.1 mycroft fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1954 1.1 mycroft fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1955 1.60 fvdl ffs_clusteracct(fs, cgp, fragno, 1); 1956 1.19 bouyer ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap); 1957 1.1 mycroft fs->fs_cstotal.cs_nbfree++; 1958 1.1 mycroft fs->fs_cs(fs, cg).cs_nbfree++; 1959 1.73 dbj if ((fs->fs_magic == FS_UFS1_MAGIC) && 1960 1.73 dbj ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) { 1961 1.73 dbj i = old_cbtocylno(fs, bbase); 1962 1.75 dbj KASSERT(i >= 0); 1963 1.75 dbj KASSERT(i < fs->fs_old_ncyl); 1964 1.75 dbj KASSERT(old_cbtorpos(fs, bbase) >= 0); 1965 1.75 dbj KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bbase) < fs->fs_old_nrpos); 1966 1.73 dbj ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, 1967 1.73 dbj bbase)], 1, needswap); 1968 1.73 dbj ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap); 1969 1.73 dbj } 1970 1.1 mycroft } 1971 1.1 mycroft } 1972 1.1 mycroft fs->fs_fmod = 1; 1973 1.76 hannken ACTIVECG_CLR(fs, cg); 1974 1.101 ad mutex_exit(&ump->um_lock); 1975 1.1 mycroft } 1976 1.1 mycroft 1977 1.1 mycroft /* 1978 1.1 mycroft * Free an inode. 1979 1.30 fvdl */ 1980 1.30 fvdl int 1981 1.88 yamt ffs_vfree(struct vnode *vp, ino_t ino, int mode) 1982 1.30 fvdl { 1983 1.30 fvdl 1984 1.119 joerg return ffs_freefile(vp->v_mount, ino, mode); 1985 1.30 fvdl } 1986 1.30 fvdl 1987 1.30 fvdl /* 1988 1.30 fvdl * Do the actual free operation. 1989 1.1 mycroft * The specified inode is placed back in the free map. 1990 1.111 simonb * 1991 1.111 simonb * => um_lock not held on entry or exit 1992 1.1 mycroft */ 1993 1.1 mycroft int 1994 1.119 joerg ffs_freefile(struct mount *mp, ino_t ino, int mode) 1995 1.119 joerg { 1996 1.119 joerg struct ufsmount *ump = VFSTOUFS(mp); 1997 1.119 joerg struct fs *fs = ump->um_fs; 1998 1.119 joerg struct vnode *devvp; 1999 1.119 joerg struct cg *cgp; 2000 1.119 joerg struct buf *bp; 2001 1.172 chs int error; 2002 1.172 chs u_int cg; 2003 1.119 joerg daddr_t cgbno; 2004 1.119 joerg dev_t dev; 2005 1.119 joerg const int needswap = UFS_FSNEEDSWAP(fs); 2006 1.119 joerg 2007 1.119 joerg cg = ino_to_cg(fs, ino); 2008 1.119 joerg devvp = ump->um_devvp; 2009 1.119 joerg dev = devvp->v_rdev; 2010 1.136 dholland cgbno = FFS_FSBTODB(fs, cgtod(fs, cg)); 2011 1.119 joerg 2012 1.172 chs if (ino >= fs->fs_ipg * fs->fs_ncg) 2013 1.154 christos panic("%s: range: dev = 0x%llx, ino = %llu, fs = %s", __func__, 2014 1.120 christos (long long)dev, (unsigned long long)ino, fs->fs_fsmnt); 2015 1.119 joerg error = bread(devvp, cgbno, (int)fs->fs_cgsize, 2016 1.149 maxv B_MODIFY, &bp); 2017 1.119 joerg if (error) { 2018 1.119 joerg return (error); 2019 1.119 joerg } 2020 1.119 joerg cgp = (struct cg *)bp->b_data; 2021 1.119 joerg if (!cg_chkmagic(cgp, needswap)) { 2022 1.119 joerg brelse(bp, 0); 2023 1.119 joerg return (0); 2024 1.119 joerg } 2025 1.119 joerg 2026 1.119 joerg ffs_freefile_common(ump, fs, dev, bp, ino, mode, false); 2027 1.119 joerg 2028 1.119 joerg bdwrite(bp); 2029 1.119 joerg 2030 1.119 joerg return 0; 2031 1.119 joerg } 2032 1.119 joerg 2033 1.119 joerg int 2034 1.119 joerg ffs_freefile_snap(struct fs *fs, struct vnode *devvp, ino_t ino, int mode) 2035 1.9 christos { 2036 1.101 ad struct ufsmount *ump; 2037 1.33 augustss struct cg *cgp; 2038 1.1 mycroft struct buf *bp; 2039 1.1 mycroft int error, cg; 2040 1.76 hannken daddr_t cgbno; 2041 1.78 hannken dev_t dev; 2042 1.30 fvdl const int needswap = UFS_FSNEEDSWAP(fs); 2043 1.1 mycroft 2044 1.119 joerg KASSERT(devvp->v_type != VBLK); 2045 1.111 simonb 2046 1.76 hannken cg = ino_to_cg(fs, ino); 2047 1.119 joerg dev = VTOI(devvp)->i_devvp->v_rdev; 2048 1.119 joerg ump = VFSTOUFS(devvp->v_mount); 2049 1.138 dholland cgbno = ffs_fragstoblks(fs, cgtod(fs, cg)); 2050 1.172 chs if (ino >= fs->fs_ipg * fs->fs_ncg) 2051 1.154 christos panic("%s: range: dev = 0x%llx, ino = %llu, fs = %s", __func__, 2052 1.120 christos (unsigned long long)dev, (unsigned long long)ino, 2053 1.120 christos fs->fs_fsmnt); 2054 1.107 hannken error = bread(devvp, cgbno, (int)fs->fs_cgsize, 2055 1.149 maxv B_MODIFY, &bp); 2056 1.1 mycroft if (error) { 2057 1.30 fvdl return (error); 2058 1.1 mycroft } 2059 1.1 mycroft cgp = (struct cg *)bp->b_data; 2060 1.19 bouyer if (!cg_chkmagic(cgp, needswap)) { 2061 1.101 ad brelse(bp, 0); 2062 1.1 mycroft return (0); 2063 1.1 mycroft } 2064 1.119 joerg ffs_freefile_common(ump, fs, dev, bp, ino, mode, true); 2065 1.119 joerg 2066 1.119 joerg bdwrite(bp); 2067 1.119 joerg 2068 1.119 joerg return 0; 2069 1.119 joerg } 2070 1.119 joerg 2071 1.119 joerg static void 2072 1.119 joerg ffs_freefile_common(struct ufsmount *ump, struct fs *fs, dev_t dev, 2073 1.119 joerg struct buf *bp, ino_t ino, int mode, bool devvp_is_snapshot) 2074 1.119 joerg { 2075 1.172 chs u_int cg; 2076 1.119 joerg struct cg *cgp; 2077 1.119 joerg u_int8_t *inosused; 2078 1.119 joerg const int needswap = UFS_FSNEEDSWAP(fs); 2079 1.172 chs ino_t cgino; 2080 1.119 joerg 2081 1.119 joerg cg = ino_to_cg(fs, ino); 2082 1.119 joerg cgp = (struct cg *)bp->b_data; 2083 1.92 kardel cgp->cg_old_time = ufs_rw32(time_second, needswap); 2084 1.73 dbj if ((fs->fs_magic != FS_UFS1_MAGIC) || 2085 1.73 dbj (fs->fs_old_flags & FS_FLAGS_UPDATED)) 2086 1.92 kardel cgp->cg_time = ufs_rw64(time_second, needswap); 2087 1.62 fvdl inosused = cg_inosused(cgp, needswap); 2088 1.172 chs cgino = ino % fs->fs_ipg; 2089 1.172 chs if (isclr(inosused, cgino)) { 2090 1.120 christos printf("ifree: dev = 0x%llx, ino = %llu, fs = %s\n", 2091 1.172 chs (unsigned long long)dev, (unsigned long long)ino, 2092 1.172 chs fs->fs_fsmnt); 2093 1.1 mycroft if (fs->fs_ronly == 0) 2094 1.154 christos panic("%s: freeing free inode", __func__); 2095 1.1 mycroft } 2096 1.172 chs clrbit(inosused, cgino); 2097 1.113 hannken if (!devvp_is_snapshot) 2098 1.172 chs UFS_WAPBL_UNREGISTER_INODE(ump->um_mountp, ino, mode); 2099 1.172 chs if (cgino < ufs_rw32(cgp->cg_irotor, needswap)) 2100 1.172 chs cgp->cg_irotor = ufs_rw32(cgino, needswap); 2101 1.19 bouyer ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap); 2102 1.101 ad mutex_enter(&ump->um_lock); 2103 1.1 mycroft fs->fs_cstotal.cs_nifree++; 2104 1.1 mycroft fs->fs_cs(fs, cg).cs_nifree++; 2105 1.78 hannken if ((mode & IFMT) == IFDIR) { 2106 1.19 bouyer ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap); 2107 1.1 mycroft fs->fs_cstotal.cs_ndir--; 2108 1.1 mycroft fs->fs_cs(fs, cg).cs_ndir--; 2109 1.1 mycroft } 2110 1.1 mycroft fs->fs_fmod = 1; 2111 1.82 hannken ACTIVECG_CLR(fs, cg); 2112 1.101 ad mutex_exit(&ump->um_lock); 2113 1.1 mycroft } 2114 1.1 mycroft 2115 1.1 mycroft /* 2116 1.76 hannken * Check to see if a file is free. 2117 1.76 hannken */ 2118 1.76 hannken int 2119 1.85 thorpej ffs_checkfreefile(struct fs *fs, struct vnode *devvp, ino_t ino) 2120 1.76 hannken { 2121 1.76 hannken struct cg *cgp; 2122 1.76 hannken struct buf *bp; 2123 1.76 hannken daddr_t cgbno; 2124 1.172 chs int ret; 2125 1.172 chs u_int cg; 2126 1.76 hannken u_int8_t *inosused; 2127 1.113 hannken const bool devvp_is_snapshot = (devvp->v_type != VBLK); 2128 1.76 hannken 2129 1.119 joerg KASSERT(devvp_is_snapshot); 2130 1.119 joerg 2131 1.76 hannken cg = ino_to_cg(fs, ino); 2132 1.113 hannken if (devvp_is_snapshot) 2133 1.138 dholland cgbno = ffs_fragstoblks(fs, cgtod(fs, cg)); 2134 1.113 hannken else 2135 1.136 dholland cgbno = FFS_FSBTODB(fs, cgtod(fs, cg)); 2136 1.172 chs if (ino >= fs->fs_ipg * fs->fs_ncg) 2137 1.76 hannken return 1; 2138 1.149 maxv if (bread(devvp, cgbno, (int)fs->fs_cgsize, 0, &bp)) { 2139 1.76 hannken return 1; 2140 1.76 hannken } 2141 1.76 hannken cgp = (struct cg *)bp->b_data; 2142 1.76 hannken if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) { 2143 1.101 ad brelse(bp, 0); 2144 1.76 hannken return 1; 2145 1.76 hannken } 2146 1.76 hannken inosused = cg_inosused(cgp, UFS_FSNEEDSWAP(fs)); 2147 1.76 hannken ino %= fs->fs_ipg; 2148 1.76 hannken ret = isclr(inosused, ino); 2149 1.101 ad brelse(bp, 0); 2150 1.76 hannken return ret; 2151 1.76 hannken } 2152 1.76 hannken 2153 1.76 hannken /* 2154 1.1 mycroft * Find a block of the specified size in the specified cylinder group. 2155 1.1 mycroft * 2156 1.1 mycroft * It is a panic if a request is made to find a block if none are 2157 1.1 mycroft * available. 2158 1.1 mycroft */ 2159 1.60 fvdl static int32_t 2160 1.85 thorpej ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz) 2161 1.1 mycroft { 2162 1.60 fvdl int32_t bno; 2163 1.1 mycroft int start, len, loc, i; 2164 1.1 mycroft int blk, field, subfield, pos; 2165 1.19 bouyer int ostart, olen; 2166 1.62 fvdl u_int8_t *blksfree; 2167 1.30 fvdl const int needswap = UFS_FSNEEDSWAP(fs); 2168 1.1 mycroft 2169 1.101 ad /* KASSERT(mutex_owned(&ump->um_lock)); */ 2170 1.101 ad 2171 1.1 mycroft /* 2172 1.1 mycroft * find the fragment by searching through the free block 2173 1.1 mycroft * map for an appropriate bit pattern 2174 1.1 mycroft */ 2175 1.1 mycroft if (bpref) 2176 1.1 mycroft start = dtogd(fs, bpref) / NBBY; 2177 1.1 mycroft else 2178 1.19 bouyer start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY; 2179 1.62 fvdl blksfree = cg_blksfree(cgp, needswap); 2180 1.1 mycroft len = howmany(fs->fs_fpg, NBBY) - start; 2181 1.19 bouyer ostart = start; 2182 1.19 bouyer olen = len; 2183 1.45 lukem loc = scanc((u_int)len, 2184 1.62 fvdl (const u_char *)&blksfree[start], 2185 1.45 lukem (const u_char *)fragtbl[fs->fs_frag], 2186 1.54 mycroft (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1))))); 2187 1.1 mycroft if (loc == 0) { 2188 1.1 mycroft len = start + 1; 2189 1.1 mycroft start = 0; 2190 1.45 lukem loc = scanc((u_int)len, 2191 1.62 fvdl (const u_char *)&blksfree[0], 2192 1.45 lukem (const u_char *)fragtbl[fs->fs_frag], 2193 1.54 mycroft (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1))))); 2194 1.1 mycroft if (loc == 0) { 2195 1.154 christos panic("%s: map corrupted: start=%d, len=%d, " 2196 1.154 christos "fs = %s, offset=%d/%ld, cg %d", __func__, 2197 1.154 christos ostart, olen, fs->fs_fsmnt, 2198 1.154 christos ufs_rw32(cgp->cg_freeoff, needswap), 2199 1.154 christos (long)blksfree - (long)cgp, cgp->cg_cgx); 2200 1.1 mycroft /* NOTREACHED */ 2201 1.1 mycroft } 2202 1.1 mycroft } 2203 1.1 mycroft bno = (start + len - loc) * NBBY; 2204 1.19 bouyer cgp->cg_frotor = ufs_rw32(bno, needswap); 2205 1.1 mycroft /* 2206 1.1 mycroft * found the byte in the map 2207 1.1 mycroft * sift through the bits to find the selected frag 2208 1.1 mycroft */ 2209 1.1 mycroft for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 2210 1.62 fvdl blk = blkmap(fs, blksfree, bno); 2211 1.1 mycroft blk <<= 1; 2212 1.1 mycroft field = around[allocsiz]; 2213 1.1 mycroft subfield = inside[allocsiz]; 2214 1.1 mycroft for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 2215 1.1 mycroft if ((blk & field) == subfield) 2216 1.1 mycroft return (bno + pos); 2217 1.1 mycroft field <<= 1; 2218 1.1 mycroft subfield <<= 1; 2219 1.1 mycroft } 2220 1.1 mycroft } 2221 1.154 christos panic("%s: block not in map: bno=%d, fs=%s", __func__, 2222 1.154 christos bno, fs->fs_fsmnt); 2223 1.58 fvdl /* return (-1); */ 2224 1.1 mycroft } 2225 1.1 mycroft 2226 1.1 mycroft /* 2227 1.1 mycroft * Fserr prints the name of a file system with an error diagnostic. 2228 1.81 perry * 2229 1.1 mycroft * The form of the error message is: 2230 1.1 mycroft * fs: error message 2231 1.1 mycroft */ 2232 1.1 mycroft static void 2233 1.150 mlelstv ffs_fserr(struct fs *fs, kauth_cred_t cred, const char *cp) 2234 1.1 mycroft { 2235 1.150 mlelstv KASSERT(cred != NULL); 2236 1.1 mycroft 2237 1.150 mlelstv if (cred == NOCRED || cred == FSCRED) { 2238 1.150 mlelstv log(LOG_ERR, "pid %d, command %s, on %s: %s\n", 2239 1.150 mlelstv curproc->p_pid, curproc->p_comm, 2240 1.150 mlelstv fs->fs_fsmnt, cp); 2241 1.150 mlelstv } else { 2242 1.150 mlelstv log(LOG_ERR, "uid %d, pid %d, command %s, on %s: %s\n", 2243 1.150 mlelstv kauth_cred_getuid(cred), curproc->p_pid, curproc->p_comm, 2244 1.150 mlelstv fs->fs_fsmnt, cp); 2245 1.150 mlelstv } 2246 1.1 mycroft } 2247