ext2fs_alloc.c revision 1.54 1 /* $NetBSD: ext2fs_alloc.c,v 1.54 2023/08/26 05:22:50 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
32 * Modified for ext2fs by Manuel Bouyer.
33 */
34
35 /*
36 * Copyright (c) 1997 Manuel Bouyer.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
48 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
49 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
50 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
51 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
56 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57 *
58 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
59 * Modified for ext2fs by Manuel Bouyer.
60 */
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.54 2023/08/26 05:22:50 riastradh Exp $");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/buf.h>
68 #include <sys/proc.h>
69 #include <sys/vnode.h>
70 #include <sys/mount.h>
71 #include <sys/kernel.h>
72 #include <sys/syslog.h>
73 #include <sys/kauth.h>
74
75 #include <lib/libkern/crc16.h>
76
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/ufsmount.h>
80
81 #include <ufs/ext2fs/ext2fs.h>
82 #include <ufs/ext2fs/ext2fs_extern.h>
83
84 u_long ext2gennumber;
85
86 static daddr_t ext2fs_alloccg(struct inode *, int, daddr_t, int);
87 static u_long ext2fs_dirpref(struct m_ext2fs *);
88 static void ext2fs_fserr(struct m_ext2fs *, u_int, const char *);
89 static u_long ext2fs_hashalloc(struct inode *, int, long, int,
90 daddr_t (*)(struct inode *, int, daddr_t, int));
91 static daddr_t ext2fs_nodealloccg(struct inode *, int, daddr_t, int);
92 static daddr_t ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t);
93 static __inline void ext2fs_cg_update(struct m_ext2fs *, int,
94 struct ext2_gd *, int, int, int, daddr_t);
95 static uint16_t ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *);
96 static void ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *,
97 char *);
98
99 /*
100 * Allocate a block in the file system.
101 *
102 * A preference may be optionally specified. If a preference is given
103 * the following hierarchy is used to allocate a block:
104 * 1) allocate the requested block.
105 * 2) allocate a rotationally optimal block in the same cylinder.
106 * 3) allocate a block in the same cylinder group.
107 * 4) quadradically rehash into other cylinder groups, until an
108 * available block is located.
109 * If no block preference is given the following hierarchy is used
110 * to allocate a block:
111 * 1) allocate a block in the cylinder group that contains the
112 * inode for the file.
113 * 2) quadradically rehash into other cylinder groups, until an
114 * available block is located.
115 */
116 int
117 ext2fs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref,
118 kauth_cred_t cred, daddr_t *bnp)
119 {
120 struct m_ext2fs *fs;
121 daddr_t bno;
122 int cg;
123
124 *bnp = 0;
125 fs = ip->i_e2fs;
126 #ifdef DIAGNOSTIC
127 if (cred == NOCRED)
128 panic("ext2fs_alloc: missing credential");
129 #endif /* DIAGNOSTIC */
130 if (fs->e2fs.e2fs_fbcount == 0)
131 goto nospace;
132 if (kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
133 NULL, NULL) != 0 &&
134 freespace(fs) <= 0)
135 goto nospace;
136 if (bpref >= fs->e2fs.e2fs_bcount)
137 bpref = 0;
138 if (bpref == 0)
139 cg = ino_to_cg(fs, ip->i_number);
140 else
141 cg = dtog(fs, bpref);
142 bno = (daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
143 ext2fs_alloccg);
144 if (bno > 0) {
145 ext2fs_setnblock(ip, ext2fs_nblock(ip) + btodb(fs->e2fs_bsize));
146 ip->i_flag |= IN_CHANGE | IN_UPDATE;
147 *bnp = bno;
148 return 0;
149 }
150 nospace:
151 ext2fs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
152 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
153 return ENOSPC;
154 }
155
156 /*
157 * Allocate an inode in the file system.
158 *
159 * If allocating a directory, use ext2fs_dirpref to select the inode.
160 * If allocating in a directory, the following hierarchy is followed:
161 * 1) allocate the preferred inode.
162 * 2) allocate an inode in the same cylinder group.
163 * 3) quadradically rehash into other cylinder groups, until an
164 * available inode is located.
165 * If no inode preference is given the following hierarchy is used
166 * to allocate an inode:
167 * 1) allocate an inode in cylinder group 0.
168 * 2) quadradically rehash into other cylinder groups, until an
169 * available inode is located.
170 */
171 int
172 ext2fs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, ino_t *inop)
173 {
174 struct inode *pip;
175 struct m_ext2fs *fs;
176 ino_t ino, ipref;
177 int cg;
178
179 pip = VTOI(pvp);
180 fs = pip->i_e2fs;
181 if (fs->e2fs.e2fs_ficount == 0)
182 goto noinodes;
183
184 if ((mode & IFMT) == IFDIR)
185 cg = ext2fs_dirpref(fs);
186 else
187 cg = ino_to_cg(fs, pip->i_number);
188 ipref = cg * fs->e2fs.e2fs_ipg + 1;
189 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
190 if (ino == 0)
191 goto noinodes;
192
193 *inop = ino;
194 return 0;
195
196 noinodes:
197 ext2fs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
198 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
199 return ENOSPC;
200 }
201
202 /*
203 * Find a cylinder to place a directory.
204 *
205 * The policy implemented by this algorithm is to select from
206 * among those cylinder groups with above the average number of
207 * free inodes, the one with the smallest number of directories.
208 */
209 static u_long
210 ext2fs_dirpref(struct m_ext2fs *fs)
211 {
212 int cg, maxspace, mincg, avgifree;
213
214 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
215 maxspace = 0;
216 mincg = -1;
217 for (cg = 0; cg < fs->e2fs_ncg; cg++) {
218 uint32_t nifree =
219 (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree_hi) << 16)
220 | fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree);
221 if (nifree < avgifree) {
222 continue;
223 }
224 uint32_t nbfree =
225 (fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree_hi) << 16)
226 | fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree);
227 if (mincg == -1 || nbfree > maxspace) {
228 mincg = cg;
229 maxspace = nbfree;
230 }
231 }
232 return mincg;
233 }
234
235 /*
236 * Select the desired position for the next block in a file. The file is
237 * logically divided into sections. The first section is composed of the
238 * direct blocks. Each additional section contains fs_maxbpg blocks.
239 *
240 * If no blocks have been allocated in the first section, the policy is to
241 * request a block in the same cylinder group as the inode that describes
242 * the file. Otherwise, the policy is to try to allocate the blocks
243 * contigously. The two fields of the ext2 inode extension (see
244 * ufs/ufs/inode.h) help this.
245 */
246 daddr_t
247 ext2fs_blkpref(struct inode *ip, daddr_t lbn, int indx,
248 int32_t *bap /* XXX ondisk32 */)
249 {
250 struct m_ext2fs *fs;
251 int cg, i;
252
253 fs = ip->i_e2fs;
254 /*
255 * if we are doing contigous lbn allocation, try to alloc blocks
256 * contigously on disk
257 */
258
259 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
260 return ip->i_e2fs_last_blk + 1;
261 }
262
263 /*
264 * bap, if provided, gives us a list of blocks to which we want to
265 * stay close
266 */
267
268 if (bap) {
269 for (i = indx; i >= 0 ; i--) {
270 if (bap[i]) {
271 return fs2h32(bap[i]) + 1;
272 }
273 }
274 }
275
276 /* fall back to the first block of the cylinder containing the inode */
277
278 cg = ino_to_cg(fs, ip->i_number);
279 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
280 }
281
282 /*
283 * Implement the cylinder overflow algorithm.
284 *
285 * The policy implemented by this algorithm is:
286 * 1) allocate the block in its requested cylinder group.
287 * 2) quadradically rehash on the cylinder group number.
288 * 3) brute force search for a free block.
289 */
290 static u_long
291 ext2fs_hashalloc(struct inode *ip, int cg, long pref, int size,
292 daddr_t (*allocator)(struct inode *, int, daddr_t, int))
293 {
294 struct m_ext2fs *fs;
295 long result;
296 int i, icg = cg;
297
298 fs = ip->i_e2fs;
299 /*
300 * 1: preferred cylinder group
301 */
302 result = (*allocator)(ip, cg, pref, size);
303 if (result)
304 return result;
305 /*
306 * 2: quadratic rehash
307 */
308 for (i = 1; i < fs->e2fs_ncg; i *= 2) {
309 cg += i;
310 if (cg >= fs->e2fs_ncg)
311 cg -= fs->e2fs_ncg;
312 result = (*allocator)(ip, cg, 0, size);
313 if (result)
314 return result;
315 }
316 /*
317 * 3: brute force search
318 * Note that we start at i == 2, since 0 was checked initially,
319 * and 1 is always checked in the quadratic rehash.
320 */
321 cg = (icg + 2) % fs->e2fs_ncg;
322 for (i = 2; i < fs->e2fs_ncg; i++) {
323 result = (*allocator)(ip, cg, 0, size);
324 if (result)
325 return result;
326 cg++;
327 if (cg == fs->e2fs_ncg)
328 cg = 0;
329 }
330 return 0;
331 }
332
333 /*
334 * Determine whether a block can be allocated.
335 *
336 * Check to see if a block of the appropriate size is available,
337 * and if it is, allocate it.
338 */
339
340 static daddr_t
341 ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
342 {
343 struct m_ext2fs *fs;
344 char *bbp;
345 struct buf *bp;
346 int error, bno, start, end, loc;
347
348 fs = ip->i_e2fs;
349 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0 &&
350 fs->e2fs_gd[cg].ext2bgd_nbfree_hi == 0)
351 return 0;
352 error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
353 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
354 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
355 (int)fs->e2fs_bsize, B_MODIFY, &bp);
356 if (error) {
357 return 0;
358 }
359 bbp = (char *)bp->b_data;
360
361 if (dtog(fs, bpref) != cg)
362 bpref = 0;
363
364 /* initialize block bitmap now if uninit */
365 if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
366 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)))) {
367 ext2fs_init_bb(fs, cg, &fs->e2fs_gd[cg], bbp);
368 fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_BLOCK_UNINIT);
369 }
370
371 if (bpref != 0) {
372 bpref = dtogd(fs, bpref);
373 /*
374 * if the requested block is available, use it
375 */
376 if (isclr(bbp, bpref)) {
377 bno = bpref;
378 goto gotit;
379 }
380 }
381 /*
382 * no blocks in the requested cylinder, so take next
383 * available one in this cylinder group.
384 * first try to get 8 contigous blocks, then fall back to a single
385 * block.
386 */
387 if (bpref)
388 start = dtogd(fs, bpref) / NBBY;
389 else
390 start = 0;
391 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
392 for (loc = start; loc < end; loc++) {
393 if (bbp[loc] == 0) {
394 bno = loc * NBBY;
395 goto gotit;
396 }
397 }
398 for (loc = 0; loc < start; loc++) {
399 if (bbp[loc] == 0) {
400 bno = loc * NBBY;
401 goto gotit;
402 }
403 }
404
405 bno = ext2fs_mapsearch(fs, bbp, bpref);
406 #if 0
407 /*
408 * XXX jdolecek mapsearch actually never fails, it panics instead.
409 * If re-enabling, make sure to brele() before returning.
410 */
411 if (bno < 0)
412 return 0;
413 #endif
414 gotit:
415 #ifdef DIAGNOSTIC
416 if (isset(bbp, (daddr_t)bno)) {
417 printf("%s: cg=%d bno=%d fs=%s\n", __func__,
418 cg, bno, fs->e2fs_fsmnt);
419 panic("ext2fs_alloccg: dup alloc");
420 }
421 #endif
422 setbit(bbp, (daddr_t)bno);
423 fs->e2fs.e2fs_fbcount--;
424 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], -1, 0, 0, 0);
425 fs->e2fs_fmod = 1;
426 bdwrite(bp);
427 return cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno;
428 }
429
430 /*
431 * Determine whether an inode can be allocated.
432 *
433 * Check to see if an inode is available, and if it is,
434 * allocate it using the following policy:
435 * 1) allocate the requested inode.
436 * 2) allocate the next available inode after the requested
437 * inode in the specified cylinder group.
438 */
439 static daddr_t
440 ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
441 {
442 struct m_ext2fs *fs;
443 char *ibp;
444 struct buf *bp;
445 int error, start, len, loc, map, i;
446
447 ipref--; /* to avoid a lot of (ipref -1) */
448 if (ipref == -1)
449 ipref = 0;
450 fs = ip->i_e2fs;
451 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0 ||
452 fs->e2fs_gd[cg].ext2bgd_nifree_hi == 0)
453 return 0;
454 error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
455 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
456 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
457 (int)fs->e2fs_bsize, B_MODIFY, &bp);
458 if (error) {
459 return 0;
460 }
461 ibp = (char *)bp->b_data;
462
463 KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0);
464
465 /* initialize inode bitmap now if uninit */
466 if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
467 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)))) {
468 KASSERT(fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) == fs->e2fs.e2fs_ipg);
469 memset(ibp, 0, fs->e2fs_bsize);
470 fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_INODE_UNINIT);
471 }
472
473 if (ipref) {
474 ipref %= fs->e2fs.e2fs_ipg;
475 if (isclr(ibp, ipref))
476 goto gotit;
477 }
478 start = ipref / NBBY;
479 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
480 loc = skpc(0xff, len, &ibp[start]);
481 if (loc == 0) {
482 len = start + 1;
483 start = 0;
484 loc = skpc(0xff, len, &ibp[0]);
485 if (loc == 0) {
486 printf("%s: cg = %d, ipref = %lld, fs = %s\n", __func__,
487 cg, (long long)ipref, fs->e2fs_fsmnt);
488 panic("%s: map corrupted", __func__);
489 /* NOTREACHED */
490 }
491 }
492 i = start + len - loc;
493 map = ibp[i] ^ 0xff;
494 if (map == 0) {
495 printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
496 panic("%s: inode not in map", __func__);
497 }
498 ipref = i * NBBY + ffs(map) - 1;
499 gotit:
500 setbit(ibp, ipref);
501 fs->e2fs.e2fs_ficount--;
502 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
503 0, -1, ((mode & IFMT) == IFDIR) ? 1 : 0, ipref);
504 fs->e2fs_fmod = 1;
505 bdwrite(bp);
506 return cg * fs->e2fs.e2fs_ipg + ipref + 1;
507 }
508
509 /*
510 * Free a block.
511 *
512 * The specified block is placed back in the
513 * free map.
514 */
515 void
516 ext2fs_blkfree(struct inode *ip, daddr_t bno)
517 {
518 struct m_ext2fs *fs;
519 char *bbp;
520 struct buf *bp;
521 int error, cg;
522
523 fs = ip->i_e2fs;
524 cg = dtog(fs, bno);
525
526 KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
527 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)) == 0);
528
529 if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
530 printf("%s: bad block %jd, ino %ju\n",
531 __func__, (intmax_t)bno, (uintmax_t)ip->i_number);
532 ext2fs_fserr(fs, ip->i_uid, "bad block");
533 return;
534 }
535 error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
536 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
537 fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
538 (int)fs->e2fs_bsize, B_MODIFY, &bp);
539 if (error) {
540 return;
541 }
542 bbp = (char *)bp->b_data;
543 bno = dtogd(fs, bno);
544 if (isclr(bbp, bno)) {
545 printf("%s: dev = %#jx, block = %jd, fs = %s\n", __func__,
546 (uintmax_t)ip->i_dev, (intmax_t)bno,
547 fs->e2fs_fsmnt);
548 panic("%s: freeing free block", __func__);
549 }
550 clrbit(bbp, bno);
551 fs->e2fs.e2fs_fbcount++;
552 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 1, 0, 0, 0);
553 fs->e2fs_fmod = 1;
554 bdwrite(bp);
555 }
556
557 /*
558 * Free an inode.
559 *
560 * The specified inode is placed back in the free map.
561 */
562 int
563 ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode)
564 {
565 struct m_ext2fs *fs;
566 char *ibp;
567 struct inode *pip;
568 struct buf *bp;
569 int error, cg;
570
571 pip = VTOI(pvp);
572 fs = pip->i_e2fs;
573
574 if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
575 panic("%s: range: dev = %#jx, ino = %ju, fs = %s",
576 __func__, (uintmax_t)pip->i_dev, (uintmax_t)ino,
577 fs->e2fs_fsmnt);
578
579 cg = ino_to_cg(fs, ino);
580
581 KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
582 (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0);
583
584 error = bread(pip->i_devvp, EXT2_FSBTODB64(fs,
585 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
586 fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
587 (int)fs->e2fs_bsize, B_MODIFY, &bp);
588 if (error) {
589 return 0;
590 }
591 ibp = (char *)bp->b_data;
592 ino = (ino - 1) % fs->e2fs.e2fs_ipg;
593 if (isclr(ibp, ino)) {
594 printf("%s: dev = %#jx, ino = %ju, fs = %s\n", __func__,
595 (uintmax_t)pip->i_dev,
596 (uintmax_t)ino, fs->e2fs_fsmnt);
597 if (fs->e2fs_ronly == 0)
598 panic("%s: freeing free inode", __func__);
599 }
600 clrbit(ibp, ino);
601 fs->e2fs.e2fs_ficount++;
602 ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
603 0, 1, ((mode & IFMT) == IFDIR) ? -1 : 0, 0);
604 fs->e2fs_fmod = 1;
605 bdwrite(bp);
606 return 0;
607 }
608
609 /*
610 * Find a block in the specified cylinder group.
611 *
612 * It is a panic if a request is made to find a block if none are
613 * available.
614 */
615
616 static daddr_t
617 ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
618 {
619 int start, len, loc, i, map;
620
621 /*
622 * find the fragment by searching through the free block
623 * map for an appropriate bit pattern
624 */
625 if (bpref)
626 start = dtogd(fs, bpref) / NBBY;
627 else
628 start = 0;
629 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
630 loc = skpc(0xff, len, &bbp[start]);
631 if (loc == 0) {
632 len = start + 1;
633 start = 0;
634 loc = skpc(0xff, len, &bbp[start]);
635 if (loc == 0) {
636 printf("%s: start = %d, len = %d, fs = %s\n",
637 __func__, start, len, fs->e2fs_fsmnt);
638 panic("%s: map corrupted", __func__);
639 /* NOTREACHED */
640 }
641 }
642 i = start + len - loc;
643 map = bbp[i] ^ 0xff;
644 if (map == 0) {
645 printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
646 panic("%s: block not in map", __func__);
647 }
648 return i * NBBY + ffs(map) - 1;
649 }
650
651 /*
652 * Fserr prints the name of a file system with an error diagnostic.
653 *
654 * The form of the error message is:
655 * fs: error message
656 */
657 static void
658 ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp)
659 {
660
661 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
662 }
663
664 static __inline void
665 ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff)
666 {
667 if (nifree) {
668 uint32_t ext2bgd_nifree = fs2h16(gd->ext2bgd_nifree) |
669 (fs2h16(gd->ext2bgd_nifree_hi) << 16);
670 ext2bgd_nifree += nifree;
671 gd->ext2bgd_nifree = h2fs16(ext2bgd_nifree);
672 gd->ext2bgd_nifree_hi = h2fs16(ext2bgd_nifree >> 16);
673 /*
674 * If we allocated inode on bigger offset than what was
675 * ever used before, bump the itable_unused count. This
676 * member only ever grows, and is used only for initialization
677 * !INODE_ZEROED groups with used inodes. Of course, by the
678 * time we get here the itables are already zeroed, but
679 * e2fstools fsck.ext4 still checks this.
680 */
681 if (E2FS_HAS_GD_CSUM(fs) && nifree < 0 &&
682 (ioff + 1) >= (fs->e2fs.e2fs_ipg -
683 fs2h16(gd->ext2bgd_itable_unused_lo))) {
684 gd->ext2bgd_itable_unused_lo =
685 h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1));
686 }
687
688 KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
689 gd->ext2bgd_itable_unused_lo <= ext2bgd_nifree);
690 }
691
692
693 if (nbfree) {
694 uint32_t ext2bgd_nbfree = fs2h16(gd->ext2bgd_nbfree)
695 | (fs2h16(gd->ext2bgd_nbfree_hi) << 16);
696 ext2bgd_nbfree += nbfree;
697 gd->ext2bgd_nbfree = h2fs16(ext2bgd_nbfree);
698 gd->ext2bgd_nbfree_hi = h2fs16(ext2bgd_nbfree >> 16);
699 }
700
701 if (ndirs) {
702 uint32_t ext2bgd_ndirs = fs2h16(gd->ext2bgd_ndirs)
703 | (fs2h16(gd->ext2bgd_ndirs_hi) << 16);
704 ext2bgd_ndirs += ndirs;
705 gd->ext2bgd_ndirs = h2fs16(ext2bgd_ndirs);
706 gd->ext2bgd_ndirs_hi = h2fs16(ext2bgd_ndirs >> 16);
707 }
708
709 if (E2FS_HAS_GD_CSUM(fs))
710 gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
711 }
712
713 /*
714 * Compute group description csum. Structure data must be LE (not host).
715 * Returned as LE (disk encoding).
716 */
717 static uint16_t
718 ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd)
719 {
720 uint16_t crc;
721 size_t cgsize = 1 << fs->e2fs_group_desc_shift;
722 uint32_t cg_bswapped = h2fs32((uint32_t)cg);
723 size_t off;
724
725 if (!EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM))
726 return 0;
727
728 off = offsetof(struct ext2_gd, ext2bgd_checksum);
729
730 crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid, sizeof(fs->e2fs.e2fs_uuid));
731 crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped));
732 crc = crc16(crc, (uint8_t *)gd, off);
733 crc = crc16(crc, (uint8_t *)gd + off + 2, cgsize - (off + 2));
734
735 return h2fs16(crc);
736 }
737
738 static void
739 ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp)
740 {
741 int i;
742
743 memset(bbp, 0, fs->e2fs_bsize);
744
745 /*
746 * No block was ever allocated on this cg before, so the only used
747 * blocks are metadata blocks on start of the group. We could optimize
748 * this to set by bytes, but since this is done once per the group
749 * in lifetime of filesystem, it really is not worth it.
750 */
751 for(i=0; i < fs->e2fs.e2fs_bpg - fs2h16(gd->ext2bgd_nbfree); i++)
752 setbit(bbp, i);
753 }
754
755 /*
756 * Verify csum and initialize itable if not done already
757 */
758 int
759 ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly)
760 {
761 struct ext2_gd *gd;
762 ino_t ioff;
763 size_t boff;
764 struct buf *bp;
765 int cg, i, error;
766
767 if (!E2FS_HAS_GD_CSUM(fs))
768 return 0;
769
770 for(cg = 0; cg < fs->e2fs_ncg; cg++) {
771 gd = &fs->e2fs_gd[cg];
772
773 /* Verify checksum */
774 uint16_t csum = ext2fs_cg_get_csum(fs, cg, gd);
775 if (gd->ext2bgd_checksum != csum) {
776 printf("%s: group %d invalid csum (%#x != %#x)\n",
777 __func__, cg, gd->ext2bgd_checksum, csum);
778 return EINVAL;
779 }
780
781 /* if mounting read-write, zero itable if not already done */
782 if (ronly ||
783 (gd->ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0)
784 continue;
785
786 /*
787 * We are skipping already used inodes, zero rest of itable
788 * blocks. First block to zero could be only partial wipe, all
789 * others are wiped completely. This might take a while,
790 * there could be many inode table blocks. We use
791 * delayed writes, so this shouldn't block for very
792 * long.
793 */
794 ioff = fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo);
795 boff = (ioff % fs->e2fs_ipb) * EXT2_DINODE_SIZE(fs);
796
797 for(i = ioff / fs->e2fs_ipb; i < fs->e2fs_itpg; i++) {
798 if (boff) {
799 /* partial wipe, must read old data */
800 error = bread(devvp, EXT2_FSBTODB64OFF(fs,
801 fs2h32(gd->ext2bgd_i_tables),
802 fs2h32(gd->ext2bgd_i_tables_hi), i),
803 (int)fs->e2fs_bsize, B_MODIFY, &bp);
804 if (error) {
805 printf("%s: can't read itable block",
806 __func__);
807 return error;
808 }
809 memset((char *)bp->b_data + boff, 0,
810 fs->e2fs_bsize - boff);
811 boff = 0;
812 } else {
813 /*
814 * Complete wipe, don't need to read data. This
815 * assumes nothing else is changing the data.
816 */
817 bp = getblk(devvp, EXT2_FSBTODB64OFF(fs,
818 fs2h32(gd->ext2bgd_i_tables),
819 fs2h32(gd->ext2bgd_i_tables_hi), i),
820 (int)fs->e2fs_bsize, 0, 0);
821 clrbuf(bp);
822 }
823
824 bdwrite(bp);
825 }
826
827 gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED);
828 gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
829 fs->e2fs_fmod = 1;
830 }
831
832 return 0;
833 }
834