lfs_balloc.c revision 1.94 1 1.94 maya /* $NetBSD: lfs_balloc.c,v 1.94 2017/06/10 05:29:36 maya Exp $ */
2 1.2 cgd
3 1.11 perseant /*-
4 1.36 perseant * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 1.11 perseant * All rights reserved.
6 1.11 perseant *
7 1.11 perseant * This code is derived from software contributed to The NetBSD Foundation
8 1.11 perseant * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 1.11 perseant *
10 1.11 perseant * Redistribution and use in source and binary forms, with or without
11 1.11 perseant * modification, are permitted provided that the following conditions
12 1.11 perseant * are met:
13 1.11 perseant * 1. Redistributions of source code must retain the above copyright
14 1.11 perseant * notice, this list of conditions and the following disclaimer.
15 1.11 perseant * 2. Redistributions in binary form must reproduce the above copyright
16 1.11 perseant * notice, this list of conditions and the following disclaimer in the
17 1.11 perseant * documentation and/or other materials provided with the distribution.
18 1.11 perseant *
19 1.11 perseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.11 perseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.11 perseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.11 perseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.11 perseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.11 perseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.11 perseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.11 perseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.11 perseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.11 perseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.11 perseant * POSSIBILITY OF SUCH DAMAGE.
30 1.11 perseant */
31 1.1 mycroft /*
32 1.1 mycroft * Copyright (c) 1989, 1991, 1993
33 1.1 mycroft * The Regents of the University of California. All rights reserved.
34 1.1 mycroft *
35 1.1 mycroft * Redistribution and use in source and binary forms, with or without
36 1.1 mycroft * modification, are permitted provided that the following conditions
37 1.1 mycroft * are met:
38 1.1 mycroft * 1. Redistributions of source code must retain the above copyright
39 1.1 mycroft * notice, this list of conditions and the following disclaimer.
40 1.1 mycroft * 2. Redistributions in binary form must reproduce the above copyright
41 1.1 mycroft * notice, this list of conditions and the following disclaimer in the
42 1.1 mycroft * documentation and/or other materials provided with the distribution.
43 1.43 agc * 3. Neither the name of the University nor the names of its contributors
44 1.1 mycroft * may be used to endorse or promote products derived from this software
45 1.1 mycroft * without specific prior written permission.
46 1.1 mycroft *
47 1.1 mycroft * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 1.1 mycroft * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 1.1 mycroft * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 1.1 mycroft * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 1.1 mycroft * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 1.1 mycroft * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 1.1 mycroft * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 1.1 mycroft * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 1.1 mycroft * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 1.1 mycroft * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 1.1 mycroft * SUCH DAMAGE.
58 1.1 mycroft *
59 1.5 fvdl * @(#)lfs_balloc.c 8.4 (Berkeley) 5/8/95
60 1.1 mycroft */
61 1.30 lukem
62 1.30 lukem #include <sys/cdefs.h>
63 1.94 maya __KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.94 2017/06/10 05:29:36 maya Exp $");
64 1.8 scottr
65 1.28 mrg #if defined(_KERNEL_OPT)
66 1.8 scottr #include "opt_quota.h"
67 1.9 scottr #endif
68 1.8 scottr
69 1.1 mycroft #include <sys/param.h>
70 1.3 christos #include <sys/systm.h>
71 1.1 mycroft #include <sys/buf.h>
72 1.1 mycroft #include <sys/proc.h>
73 1.1 mycroft #include <sys/vnode.h>
74 1.1 mycroft #include <sys/mount.h>
75 1.1 mycroft #include <sys/resourcevar.h>
76 1.55 perseant #include <sys/tree.h>
77 1.1 mycroft #include <sys/trace.h>
78 1.61 elad #include <sys/kauth.h>
79 1.1 mycroft
80 1.1 mycroft #include <miscfs/specfs/specdev.h>
81 1.1 mycroft
82 1.73 dholland #include <ufs/lfs/ulfs_quotacommon.h>
83 1.73 dholland #include <ufs/lfs/ulfs_inode.h>
84 1.73 dholland #include <ufs/lfs/ulfsmount.h>
85 1.73 dholland #include <ufs/lfs/ulfs_extern.h>
86 1.1 mycroft
87 1.1 mycroft #include <ufs/lfs/lfs.h>
88 1.84 dholland #include <ufs/lfs/lfs_accessors.h>
89 1.1 mycroft #include <ufs/lfs/lfs_extern.h>
90 1.78 dholland #include <ufs/lfs/lfs_kernel.h>
91 1.5 fvdl
92 1.36 perseant #include <uvm/uvm.h>
93 1.36 perseant
94 1.89 dholland static int lfs_fragextend(struct vnode *, int, int, daddr_t, struct buf **,
95 1.89 dholland kauth_cred_t);
96 1.5 fvdl
97 1.49 perseant u_int64_t locked_fakequeue_count;
98 1.49 perseant
99 1.16 perseant /*
100 1.90 dholland * Allocate a block, and do inode and filesystem block accounting for
101 1.90 dholland * it and for any indirect blocks that may need to be created in order
102 1.90 dholland * to handle this block.
103 1.90 dholland *
104 1.90 dholland * Blocks which have never been accounted for (i.e., which "do not
105 1.90 dholland * exist") have disk address 0, which is translated by ulfs_bmap to
106 1.90 dholland * the special value UNASSIGNED == -1, as in historical FFS-related
107 1.90 dholland * code.
108 1.90 dholland *
109 1.90 dholland * Blocks which have been accounted for but which have not yet been
110 1.90 dholland * written to disk are given the new special disk address UNWRITTEN ==
111 1.90 dholland * -2, so that they can be differentiated from completely new blocks.
112 1.90 dholland *
113 1.90 dholland * Note: it seems that bpp is passed as NULL for blocks that are file
114 1.90 dholland * pages that will be handled by UVM and not the buffer cache.
115 1.90 dholland *
116 1.90 dholland * XXX: locking?
117 1.16 perseant */
118 1.75 dholland /* VOP_BWRITE ULFS_NIADDR+2 times */
119 1.1 mycroft int
120 1.61 elad lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred,
121 1.57 yamt int flags, struct buf **bpp)
122 1.14 fvdl {
123 1.5 fvdl int offset;
124 1.46 mycroft daddr_t daddr, idaddr;
125 1.57 yamt struct buf *ibp, *bp;
126 1.1 mycroft struct inode *ip;
127 1.1 mycroft struct lfs *fs;
128 1.75 dholland struct indir indirs[ULFS_NIADDR+2], *idp;
129 1.35 fvdl daddr_t lbn, lastblock;
130 1.69 mlelstv int bcount;
131 1.11 perseant int error, frags, i, nsize, osize, num;
132 1.14 fvdl
133 1.1 mycroft ip = VTOI(vp);
134 1.1 mycroft fs = ip->i_lfs;
135 1.90 dholland
136 1.90 dholland /* Declare to humans that we might have the seglock here */
137 1.90 dholland ASSERT_MAYBE_SEGLOCK(fs);
138 1.90 dholland
139 1.90 dholland
140 1.90 dholland /* offset within block */
141 1.77 christos offset = lfs_blkoff(fs, startoffset);
142 1.90 dholland
143 1.90 dholland /* This is usually but not always exactly the block size */
144 1.82 dholland KASSERT(iosize <= lfs_sb_getbsize(fs));
145 1.90 dholland
146 1.90 dholland /* block number (within file) */
147 1.77 christos lbn = lfs_lblkno(fs, startoffset);
148 1.90 dholland
149 1.90 dholland /*
150 1.90 dholland * This checks for whether pending stuff needs to be flushed
151 1.90 dholland * out and potentially waits. It's been disabled since UBC
152 1.90 dholland * support was added to LFS in 2003. -- dholland 20160806
153 1.90 dholland */
154 1.36 perseant /* (void)lfs_check(vp, lbn, 0); */
155 1.36 perseant
156 1.52 perseant
157 1.50 perry /*
158 1.1 mycroft * Three cases: it's a block beyond the end of file, it's a block in
159 1.1 mycroft * the file that may or may not have been assigned a disk address or
160 1.19 perseant * we're writing an entire block.
161 1.19 perseant *
162 1.19 perseant * Note, if the daddr is UNWRITTEN, the block already exists in
163 1.37 perseant * the cache (it was read or written earlier). If so, make sure
164 1.19 perseant * we don't count it as a new block or zero out its contents. If
165 1.19 perseant * it did not, make sure we allocate any necessary indirect
166 1.19 perseant * blocks.
167 1.19 perseant *
168 1.5 fvdl * If we are writing a block beyond the end of the file, we need to
169 1.11 perseant * check if the old last block was a fragment. If it was, we need
170 1.5 fvdl * to rewrite it.
171 1.1 mycroft */
172 1.50 perry
173 1.36 perseant if (bpp)
174 1.36 perseant *bpp = NULL;
175 1.50 perry
176 1.90 dholland /* Last block number in file */
177 1.77 christos lastblock = lfs_lblkno(fs, ip->i_size);
178 1.90 dholland
179 1.75 dholland if (lastblock < ULFS_NDADDR && lastblock < lbn) {
180 1.90 dholland /*
181 1.90 dholland * The file is small enough to have fragments, and we're
182 1.90 dholland * allocating past EOF.
183 1.90 dholland *
184 1.90 dholland * If the last block was a fragment we need to rewrite it
185 1.90 dholland * as a full block.
186 1.90 dholland */
187 1.77 christos osize = lfs_blksize(fs, ip, lastblock);
188 1.82 dholland if (osize < lfs_sb_getbsize(fs) && osize > 0) {
189 1.82 dholland if ((error = lfs_fragextend(vp, osize, lfs_sb_getbsize(fs),
190 1.36 perseant lastblock,
191 1.57 yamt (bpp ? &bp : NULL), cred)))
192 1.31 chs return (error);
193 1.90 dholland /* Update the file size with what we just did (only) */
194 1.87 dholland ip->i_size = (lastblock + 1) * lfs_sb_getbsize(fs);
195 1.87 dholland lfs_dino_setsize(fs, ip->i_din, ip->i_size);
196 1.40 fvdl uvm_vnp_setsize(vp, ip->i_size);
197 1.94 maya ip->i_state |= IN_CHANGE | IN_UPDATE;
198 1.90 dholland /* if we got a buffer for this, write it out now */
199 1.36 perseant if (bpp)
200 1.70 hannken (void) VOP_BWRITE(bp->b_vp, bp);
201 1.5 fvdl }
202 1.5 fvdl }
203 1.5 fvdl
204 1.5 fvdl /*
205 1.5 fvdl * If the block we are writing is a direct block, it's the last
206 1.5 fvdl * block in the file, and offset + iosize is less than a full
207 1.5 fvdl * block, we can write one or more fragments. There are two cases:
208 1.5 fvdl * the block is brand new and we should allocate it the correct
209 1.5 fvdl * size or it already exists and contains some fragments and
210 1.5 fvdl * may need to extend it.
211 1.5 fvdl */
212 1.77 christos if (lbn < ULFS_NDADDR && lfs_lblkno(fs, ip->i_size) <= lbn) {
213 1.77 christos osize = lfs_blksize(fs, ip, lbn);
214 1.77 christos nsize = lfs_fragroundup(fs, offset + iosize);
215 1.77 christos if (lfs_lblktosize(fs, lbn) >= ip->i_size) {
216 1.5 fvdl /* Brand new block or fragment */
217 1.77 christos frags = lfs_numfrags(fs, nsize);
218 1.69 mlelstv if (!ISSPACE(fs, frags, cred))
219 1.51 perseant return ENOSPC;
220 1.36 perseant if (bpp) {
221 1.57 yamt *bpp = bp = getblk(vp, lbn, nsize, 0, 0);
222 1.36 perseant bp->b_blkno = UNWRITTEN;
223 1.57 yamt if (flags & B_CLRBUF)
224 1.38 perseant clrbuf(bp);
225 1.36 perseant }
226 1.90 dholland
227 1.90 dholland /*
228 1.90 dholland * Update the effective block count (this count
229 1.90 dholland * includes blocks that don't have an on-disk
230 1.90 dholland * presence or location yet)
231 1.90 dholland */
232 1.69 mlelstv ip->i_lfs_effnblks += frags;
233 1.90 dholland
234 1.90 dholland /* account for the space we're taking */
235 1.64 ad mutex_enter(&lfs_lock);
236 1.85 dholland lfs_sb_subbfree(fs, frags);
237 1.64 ad mutex_exit(&lfs_lock);
238 1.90 dholland
239 1.90 dholland /* update the inode */
240 1.87 dholland lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN);
241 1.19 perseant } else {
242 1.90 dholland /* extending a block that already has fragments */
243 1.90 dholland
244 1.11 perseant if (nsize <= osize) {
245 1.11 perseant /* No need to extend */
246 1.67 hannken if (bpp && (error = bread(vp, lbn, osize,
247 1.81 maxv 0, &bp)))
248 1.11 perseant return error;
249 1.11 perseant } else {
250 1.11 perseant /* Extend existing block */
251 1.11 perseant if ((error =
252 1.36 perseant lfs_fragextend(vp, osize, nsize, lbn,
253 1.57 yamt (bpp ? &bp : NULL), cred)))
254 1.19 perseant return error;
255 1.11 perseant }
256 1.36 perseant if (bpp)
257 1.36 perseant *bpp = bp;
258 1.5 fvdl }
259 1.19 perseant return 0;
260 1.19 perseant }
261 1.19 perseant
262 1.90 dholland /*
263 1.90 dholland * Look up what's already here.
264 1.90 dholland */
265 1.90 dholland
266 1.75 dholland error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL, NULL);
267 1.19 perseant if (error)
268 1.19 perseant return (error);
269 1.49 perseant
270 1.86 dholland KASSERT(daddr <= LFS_MAX_DADDR(fs));
271 1.49 perseant
272 1.19 perseant /*
273 1.19 perseant * Do byte accounting all at once, so we can gracefully fail *before*
274 1.19 perseant * we start assigning blocks.
275 1.19 perseant */
276 1.79 dholland frags = fs->um_seqinc;
277 1.90 dholland bcount = 0; /* number of frags we need */
278 1.19 perseant if (daddr == UNASSIGNED) {
279 1.90 dholland /* no block yet, going to need a whole block */
280 1.69 mlelstv bcount = frags;
281 1.19 perseant }
282 1.19 perseant for (i = 1; i < num; ++i) {
283 1.19 perseant if (!indirs[i].in_exists) {
284 1.90 dholland /* need an indirect block at this level */
285 1.69 mlelstv bcount += frags;
286 1.19 perseant }
287 1.19 perseant }
288 1.57 yamt if (ISSPACE(fs, bcount, cred)) {
289 1.90 dholland /* update the superblock's free block count */
290 1.64 ad mutex_enter(&lfs_lock);
291 1.85 dholland lfs_sb_subbfree(fs, bcount);
292 1.64 ad mutex_exit(&lfs_lock);
293 1.90 dholland /* update the file's effective block count */
294 1.21 perseant ip->i_lfs_effnblks += bcount;
295 1.5 fvdl } else {
296 1.90 dholland /* whoops, no can do */
297 1.19 perseant return ENOSPC;
298 1.19 perseant }
299 1.19 perseant
300 1.19 perseant if (daddr == UNASSIGNED) {
301 1.90 dholland /*
302 1.90 dholland * There is nothing here yet.
303 1.90 dholland */
304 1.90 dholland
305 1.90 dholland /*
306 1.90 dholland * If there's no indirect block in the inode, change it
307 1.90 dholland * to UNWRITTEN to indicate that it exists but doesn't
308 1.90 dholland * have an on-disk address yet.
309 1.90 dholland *
310 1.90 dholland * (Question: where's the block data initialized?)
311 1.90 dholland */
312 1.87 dholland if (num > 0 && lfs_dino_getib(fs, ip->i_din, indirs[0].in_off) == 0) {
313 1.87 dholland lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN);
314 1.19 perseant }
315 1.19 perseant
316 1.5 fvdl /*
317 1.90 dholland * If we need more layers of indirect blocks, create what
318 1.90 dholland * we need.
319 1.5 fvdl */
320 1.46 mycroft if (num > 1) {
321 1.90 dholland /*
322 1.90 dholland * The outermost indirect block address is the one
323 1.90 dholland * in the inode, so fetch that.
324 1.90 dholland */
325 1.87 dholland idaddr = lfs_dino_getib(fs, ip->i_din, indirs[0].in_off);
326 1.90 dholland /*
327 1.90 dholland * For each layer of indirection...
328 1.90 dholland */
329 1.46 mycroft for (i = 1; i < num; ++i) {
330 1.90 dholland /*
331 1.90 dholland * Get a buffer for the indirect block data.
332 1.90 dholland *
333 1.90 dholland * (XXX: the logic here seems twisted. What's
334 1.90 dholland * wrong with testing in_exists first and then
335 1.90 dholland * doing either bread or getblk to get a
336 1.90 dholland * buffer?)
337 1.90 dholland */
338 1.46 mycroft ibp = getblk(vp, indirs[i].in_lbn,
339 1.82 dholland lfs_sb_getbsize(fs), 0,0);
340 1.46 mycroft if (!indirs[i].in_exists) {
341 1.90 dholland /*
342 1.90 dholland * There isn't actually a block here,
343 1.90 dholland * so clear the buffer data and mark
344 1.90 dholland * the address of the block as
345 1.90 dholland * UNWRITTEN.
346 1.90 dholland */
347 1.46 mycroft clrbuf(ibp);
348 1.46 mycroft ibp->b_blkno = UNWRITTEN;
349 1.64 ad } else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) {
350 1.90 dholland /*
351 1.90 dholland * Otherwise read it in.
352 1.90 dholland */
353 1.77 christos ibp->b_blkno = LFS_FSBTODB(fs, idaddr);
354 1.46 mycroft ibp->b_flags |= B_READ;
355 1.48 hannken VOP_STRATEGY(vp, ibp);
356 1.46 mycroft biowait(ibp);
357 1.46 mycroft }
358 1.90 dholland
359 1.46 mycroft /*
360 1.90 dholland * Now this indirect block exists, but
361 1.90 dholland * the next one down may not yet. If
362 1.90 dholland * so, set it to UNWRITTEN. This keeps
363 1.46 mycroft * the accounting straight.
364 1.46 mycroft */
365 1.88 dholland if (lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off) == 0)
366 1.88 dholland lfs_iblock_set(fs, ibp->b_data, indirs[i].in_off,
367 1.88 dholland UNWRITTEN);
368 1.90 dholland
369 1.90 dholland /* get the block for the next iteration */
370 1.88 dholland idaddr = lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off);
371 1.92 maya
372 1.52 perseant if (vp == fs->lfs_ivnode) {
373 1.52 perseant LFS_ENTER_LOG("balloc", __FILE__,
374 1.52 perseant __LINE__, indirs[i].in_lbn,
375 1.52 perseant ibp->b_flags, curproc->p_pid);
376 1.52 perseant }
377 1.90 dholland /*
378 1.90 dholland * Write out the updated indirect block. Note
379 1.90 dholland * that this writes it out even if we didn't
380 1.90 dholland * modify it - ultimately because the final
381 1.90 dholland * block didn't exist we'll need to write a
382 1.90 dholland * new version of all the blocks that lead to
383 1.90 dholland * it. Hopefully all that gets in before any
384 1.90 dholland * actual disk I/O so we don't end up writing
385 1.90 dholland * any of them twice... this is currently not
386 1.90 dholland * very clear.
387 1.90 dholland */
388 1.70 hannken if ((error = VOP_BWRITE(ibp->b_vp, ibp)))
389 1.46 mycroft return error;
390 1.19 perseant }
391 1.19 perseant }
392 1.50 perry }
393 1.19 perseant
394 1.19 perseant
395 1.19 perseant /*
396 1.36 perseant * Get the existing block from the cache, if requested.
397 1.19 perseant */
398 1.36 perseant if (bpp)
399 1.77 christos *bpp = bp = getblk(vp, lbn, lfs_blksize(fs, ip, lbn), 0, 0);
400 1.50 perry
401 1.49 perseant /*
402 1.49 perseant * Do accounting on blocks that represent pages.
403 1.49 perseant */
404 1.49 perseant if (!bpp)
405 1.49 perseant lfs_register_block(vp, lbn);
406 1.49 perseant
407 1.50 perry /*
408 1.5 fvdl * The block we are writing may be a brand new block
409 1.19 perseant * in which case we need to do accounting.
410 1.15 perseant *
411 1.75 dholland * We can tell a truly new block because ulfs_bmaparray will say
412 1.90 dholland * it is UNASSIGNED. Once we allocate it we will assign it the
413 1.19 perseant * disk address UNWRITTEN.
414 1.5 fvdl */
415 1.16 perseant if (daddr == UNASSIGNED) {
416 1.36 perseant if (bpp) {
417 1.57 yamt if (flags & B_CLRBUF)
418 1.36 perseant clrbuf(bp);
419 1.50 perry
420 1.36 perseant /* Note the new address */
421 1.36 perseant bp->b_blkno = UNWRITTEN;
422 1.36 perseant }
423 1.50 perry
424 1.19 perseant switch (num) {
425 1.19 perseant case 0:
426 1.90 dholland /* direct block - update the inode */
427 1.87 dholland lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN);
428 1.19 perseant break;
429 1.19 perseant case 1:
430 1.90 dholland /*
431 1.90 dholland * using a single indirect block - update the inode
432 1.90 dholland *
433 1.90 dholland * XXX: is this right? We already set this block
434 1.90 dholland * pointer above. I think we want to be writing *in*
435 1.90 dholland * the single indirect block and this case shouldn't
436 1.90 dholland * exist. (just case 0 and default)
437 1.90 dholland * -- dholland 20160806
438 1.90 dholland */
439 1.87 dholland lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN);
440 1.19 perseant break;
441 1.19 perseant default:
442 1.90 dholland /*
443 1.90 dholland * using multiple indirect blocks - update the
444 1.90 dholland * innermost one
445 1.90 dholland */
446 1.19 perseant idp = &indirs[num - 1];
447 1.82 dholland if (bread(vp, idp->in_lbn, lfs_sb_getbsize(fs),
448 1.67 hannken B_MODIFY, &ibp))
449 1.35 fvdl panic("lfs_balloc: bread bno %lld",
450 1.35 fvdl (long long)idp->in_lbn);
451 1.88 dholland lfs_iblock_set(fs, ibp->b_data, idp->in_off, UNWRITTEN);
452 1.92 maya
453 1.52 perseant if (vp == fs->lfs_ivnode) {
454 1.52 perseant LFS_ENTER_LOG("balloc", __FILE__,
455 1.52 perseant __LINE__, idp->in_lbn,
456 1.52 perseant ibp->b_flags, curproc->p_pid);
457 1.52 perseant }
458 1.92 maya
459 1.70 hannken VOP_BWRITE(ibp->b_vp, ibp);
460 1.15 perseant }
461 1.64 ad } else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) {
462 1.15 perseant /*
463 1.15 perseant * Not a brand new block, also not in the cache;
464 1.15 perseant * read it in from disk.
465 1.15 perseant */
466 1.82 dholland if (iosize == lfs_sb_getbsize(fs))
467 1.5 fvdl /* Optimization: I/O is unnecessary. */
468 1.5 fvdl bp->b_blkno = daddr;
469 1.15 perseant else {
470 1.5 fvdl /*
471 1.5 fvdl * We need to read the block to preserve the
472 1.5 fvdl * existing bytes.
473 1.5 fvdl */
474 1.1 mycroft bp->b_blkno = daddr;
475 1.1 mycroft bp->b_flags |= B_READ;
476 1.48 hannken VOP_STRATEGY(vp, bp);
477 1.31 chs return (biowait(bp));
478 1.1 mycroft }
479 1.1 mycroft }
480 1.50 perry
481 1.5 fvdl return (0);
482 1.5 fvdl }
483 1.5 fvdl
484 1.90 dholland /*
485 1.90 dholland * Extend a file that uses fragments with more fragments.
486 1.90 dholland *
487 1.90 dholland * XXX: locking?
488 1.90 dholland */
489 1.25 perseant /* VOP_BWRITE 1 time */
490 1.89 dholland static int
491 1.89 dholland lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn,
492 1.89 dholland struct buf **bpp, kauth_cred_t cred)
493 1.5 fvdl {
494 1.5 fvdl struct inode *ip;
495 1.5 fvdl struct lfs *fs;
496 1.69 mlelstv long frags;
497 1.5 fvdl int error;
498 1.90 dholland size_t obufsize;
499 1.90 dholland
500 1.90 dholland /* XXX move this to a header file */
501 1.90 dholland /* (XXX: except it's not clear what purpose it serves) */
502 1.11 perseant extern long locked_queue_bytes;
503 1.90 dholland
504 1.91 dholland ip = VTOI(vp);
505 1.91 dholland fs = ip->i_lfs;
506 1.91 dholland
507 1.90 dholland /*
508 1.90 dholland * XXX: is there some reason we know more about the seglock
509 1.90 dholland * state here than at the top of lfs_balloc?
510 1.90 dholland */
511 1.90 dholland ASSERT_NO_SEGLOCK(fs);
512 1.5 fvdl
513 1.90 dholland /* number of frags we're adding */
514 1.77 christos frags = (long)lfs_numfrags(fs, nsize - osize);
515 1.90 dholland
516 1.18 perseant error = 0;
517 1.18 perseant
518 1.18 perseant /*
519 1.36 perseant * Get the seglock so we don't enlarge blocks while a segment
520 1.36 perseant * is being written. If we're called with bpp==NULL, though,
521 1.36 perseant * we are only pretending to change a buffer, so we don't have to
522 1.36 perseant * lock.
523 1.90 dholland *
524 1.90 dholland * XXX: the above comment is lying, as fs->lfs_fraglock is not
525 1.90 dholland * the segment lock.
526 1.18 perseant */
527 1.26 perseant top:
528 1.36 perseant if (bpp) {
529 1.62 ad rw_enter(&fs->lfs_fraglock, RW_READER);
530 1.36 perseant }
531 1.36 perseant
532 1.90 dholland /* check if we actually have enough frags available */
533 1.69 mlelstv if (!ISSPACE(fs, frags, cred)) {
534 1.18 perseant error = ENOSPC;
535 1.18 perseant goto out;
536 1.5 fvdl }
537 1.36 perseant
538 1.36 perseant /*
539 1.36 perseant * If we are not asked to actually return the block, all we need
540 1.36 perseant * to do is allocate space for it. UBC will handle dirtying the
541 1.36 perseant * appropriate things and making sure it all goes to disk.
542 1.36 perseant * Don't bother to read in that case.
543 1.36 perseant */
544 1.81 maxv if (bpp && (error = bread(vp, lbn, osize, 0, bpp))) {
545 1.18 perseant goto out;
546 1.5 fvdl }
547 1.80 dholland #if defined(LFS_QUOTA) || defined(LFS_QUOTA2)
548 1.76 dholland if ((error = lfs_chkdq(ip, frags, cred, 0))) {
549 1.36 perseant if (bpp)
550 1.63 ad brelse(*bpp, 0);
551 1.19 perseant goto out;
552 1.19 perseant }
553 1.19 perseant #endif
554 1.13 perseant /*
555 1.26 perseant * Adjust accounting for lfs_avail. If there's not enough room,
556 1.26 perseant * we will have to wait for the cleaner, which we can't do while
557 1.26 perseant * holding a block busy or while holding the seglock. In that case,
558 1.26 perseant * release both and start over after waiting.
559 1.26 perseant */
560 1.33 perseant
561 1.64 ad if (bpp && ((*bpp)->b_oflags & BO_DELWRI)) {
562 1.69 mlelstv if (!lfs_fits(fs, frags)) {
563 1.36 perseant if (bpp)
564 1.63 ad brelse(*bpp, 0);
565 1.80 dholland #if defined(LFS_QUOTA) || defined(LFS_QUOTA2)
566 1.76 dholland lfs_chkdq(ip, -frags, cred, 0);
567 1.26 perseant #endif
568 1.62 ad rw_exit(&fs->lfs_fraglock);
569 1.69 mlelstv lfs_availwait(fs, frags);
570 1.26 perseant goto top;
571 1.26 perseant }
572 1.82 dholland lfs_sb_subavail(fs, frags);
573 1.26 perseant }
574 1.26 perseant
575 1.90 dholland /* decrease the free block count in the superblock */
576 1.64 ad mutex_enter(&lfs_lock);
577 1.85 dholland lfs_sb_subbfree(fs, frags);
578 1.64 ad mutex_exit(&lfs_lock);
579 1.90 dholland /* increase the file's effective block count */
580 1.69 mlelstv ip->i_lfs_effnblks += frags;
581 1.90 dholland /* mark the inode dirty */
582 1.94 maya ip->i_state |= IN_CHANGE | IN_UPDATE;
583 1.26 perseant
584 1.36 perseant if (bpp) {
585 1.36 perseant obufsize = (*bpp)->b_bufsize;
586 1.47 pk allocbuf(*bpp, nsize, 1);
587 1.26 perseant
588 1.36 perseant /* Adjust locked-list accounting */
589 1.65 ad if (((*bpp)->b_flags & B_LOCKED) != 0 &&
590 1.64 ad (*bpp)->b_iodone == NULL) {
591 1.64 ad mutex_enter(&lfs_lock);
592 1.36 perseant locked_queue_bytes += (*bpp)->b_bufsize - obufsize;
593 1.64 ad mutex_exit(&lfs_lock);
594 1.52 perseant }
595 1.26 perseant
596 1.90 dholland /* zero the new space */
597 1.68 cegger memset((char *)((*bpp)->b_data) + osize, 0, (u_int)(nsize - osize));
598 1.36 perseant }
599 1.18 perseant
600 1.18 perseant out:
601 1.36 perseant if (bpp) {
602 1.62 ad rw_exit(&fs->lfs_fraglock);
603 1.36 perseant }
604 1.18 perseant return (error);
605 1.1 mycroft }
606 1.49 perseant
607 1.59 perry static inline int
608 1.55 perseant lge(struct lbnentry *a, struct lbnentry *b)
609 1.53 perseant {
610 1.55 perseant return a->lbn - b->lbn;
611 1.53 perseant }
612 1.53 perseant
613 1.55 perseant SPLAY_PROTOTYPE(lfs_splay, lbnentry, entry, lge);
614 1.55 perseant
615 1.55 perseant SPLAY_GENERATE(lfs_splay, lbnentry, entry, lge);
616 1.53 perseant
617 1.49 perseant /*
618 1.49 perseant * Record this lbn as being "write pending". We used to have this information
619 1.49 perseant * on the buffer headers, but since pages don't have buffer headers we
620 1.49 perseant * record it here instead.
621 1.49 perseant */
622 1.49 perseant void
623 1.49 perseant lfs_register_block(struct vnode *vp, daddr_t lbn)
624 1.49 perseant {
625 1.49 perseant struct lfs *fs;
626 1.49 perseant struct inode *ip;
627 1.49 perseant struct lbnentry *lbp;
628 1.53 perseant
629 1.53 perseant ip = VTOI(vp);
630 1.49 perseant
631 1.49 perseant /* Don't count metadata */
632 1.53 perseant if (lbn < 0 || vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM)
633 1.49 perseant return;
634 1.49 perseant
635 1.49 perseant fs = ip->i_lfs;
636 1.49 perseant
637 1.52 perseant ASSERT_NO_SEGLOCK(fs);
638 1.52 perseant
639 1.49 perseant /* If no space, wait for the cleaner */
640 1.83 dholland lfs_availwait(fs, lfs_btofsb(fs, 1 << lfs_sb_getbshift(fs)));
641 1.49 perseant
642 1.49 perseant lbp = (struct lbnentry *)pool_get(&lfs_lbnentry_pool, PR_WAITOK);
643 1.49 perseant lbp->lbn = lbn;
644 1.64 ad mutex_enter(&lfs_lock);
645 1.55 perseant if (SPLAY_INSERT(lfs_splay, &ip->i_lfs_lbtree, lbp) != NULL) {
646 1.64 ad mutex_exit(&lfs_lock);
647 1.55 perseant /* Already there */
648 1.55 perseant pool_put(&lfs_lbnentry_pool, lbp);
649 1.55 perseant return;
650 1.55 perseant }
651 1.52 perseant
652 1.56 perseant ++ip->i_lfs_nbtree;
653 1.83 dholland fs->lfs_favail += lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs)));
654 1.82 dholland fs->lfs_pages += lfs_sb_getbsize(fs) >> PAGE_SHIFT;
655 1.49 perseant ++locked_fakequeue_count;
656 1.82 dholland lfs_subsys_pages += lfs_sb_getbsize(fs) >> PAGE_SHIFT;
657 1.64 ad mutex_exit(&lfs_lock);
658 1.49 perseant }
659 1.49 perseant
660 1.49 perseant static void
661 1.53 perseant lfs_do_deregister(struct lfs *fs, struct inode *ip, struct lbnentry *lbp)
662 1.49 perseant {
663 1.52 perseant ASSERT_MAYBE_SEGLOCK(fs);
664 1.52 perseant
665 1.64 ad mutex_enter(&lfs_lock);
666 1.56 perseant --ip->i_lfs_nbtree;
667 1.55 perseant SPLAY_REMOVE(lfs_splay, &ip->i_lfs_lbtree, lbp);
668 1.83 dholland if (fs->lfs_favail > lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs))))
669 1.83 dholland fs->lfs_favail -= lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs)));
670 1.82 dholland fs->lfs_pages -= lfs_sb_getbsize(fs) >> PAGE_SHIFT;
671 1.49 perseant if (locked_fakequeue_count > 0)
672 1.49 perseant --locked_fakequeue_count;
673 1.82 dholland lfs_subsys_pages -= lfs_sb_getbsize(fs) >> PAGE_SHIFT;
674 1.64 ad mutex_exit(&lfs_lock);
675 1.64 ad
676 1.64 ad pool_put(&lfs_lbnentry_pool, lbp);
677 1.49 perseant }
678 1.49 perseant
679 1.49 perseant void
680 1.49 perseant lfs_deregister_block(struct vnode *vp, daddr_t lbn)
681 1.49 perseant {
682 1.49 perseant struct lfs *fs;
683 1.49 perseant struct inode *ip;
684 1.49 perseant struct lbnentry *lbp;
685 1.55 perseant struct lbnentry tmp;
686 1.53 perseant
687 1.53 perseant ip = VTOI(vp);
688 1.49 perseant
689 1.49 perseant /* Don't count metadata */
690 1.53 perseant if (lbn < 0 || vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM)
691 1.49 perseant return;
692 1.49 perseant
693 1.49 perseant fs = ip->i_lfs;
694 1.55 perseant tmp.lbn = lbn;
695 1.55 perseant lbp = SPLAY_FIND(lfs_splay, &ip->i_lfs_lbtree, &tmp);
696 1.49 perseant if (lbp == NULL)
697 1.49 perseant return;
698 1.49 perseant
699 1.53 perseant lfs_do_deregister(fs, ip, lbp);
700 1.49 perseant }
701 1.55 perseant
702 1.55 perseant void
703 1.55 perseant lfs_deregister_all(struct vnode *vp)
704 1.55 perseant {
705 1.55 perseant struct lbnentry *lbp, *nlbp;
706 1.55 perseant struct lfs_splay *hd;
707 1.55 perseant struct lfs *fs;
708 1.55 perseant struct inode *ip;
709 1.55 perseant
710 1.55 perseant ip = VTOI(vp);
711 1.55 perseant fs = ip->i_lfs;
712 1.55 perseant hd = &ip->i_lfs_lbtree;
713 1.55 perseant
714 1.55 perseant for (lbp = SPLAY_MIN(lfs_splay, hd); lbp != NULL; lbp = nlbp) {
715 1.55 perseant nlbp = SPLAY_NEXT(lfs_splay, hd, lbp);
716 1.55 perseant lfs_do_deregister(fs, ip, lbp);
717 1.55 perseant }
718 1.55 perseant }
719