Home | History | Annotate | Line # | Download | only in raidframe
rf_paritymap.c revision 1.5.2.2
      1 /* $NetBSD: rf_paritymap.c,v 1.5.2.2 2011/05/31 03:04:54 rmind Exp $ */
      2 
      3 /*-
      4  * Copyright (c) 2009 Jed Davis.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(0, "$NetBSD: rf_paritymap.c,v 1.5.2.2 2011/05/31 03:04:54 rmind Exp $");
     31 
     32 #include <sys/param.h>
     33 #include <sys/callout.h>
     34 #include <sys/kmem.h>
     35 #include <sys/mutex.h>
     36 #include <sys/rwlock.h>
     37 #include <sys/systm.h>
     38 #include <sys/types.h>
     39 
     40 #include <dev/raidframe/rf_paritymap.h>
     41 #include <dev/raidframe/rf_stripelocks.h>
     42 #include <dev/raidframe/rf_layout.h>
     43 #include <dev/raidframe/rf_raid.h>
     44 #include <dev/raidframe/rf_parityscan.h>
     45 #include <dev/raidframe/rf_kintf.h>
     46 
     47 /* Important parameters: */
     48 #define REGION_MINSIZE (25ULL << 20)
     49 #define DFL_TICKMS      40000
     50 #define DFL_COOLDOWN    8     /* 7-8 intervals of 40s = 5min +/- 20s */
     51 
     52 /* Internal-use flag bits. */
     53 #define TICKING 1
     54 #define TICKED 2
     55 
     56 /* Prototypes! */
     57 static void rf_paritymap_write_locked(struct rf_paritymap *);
     58 static void rf_paritymap_tick(void *);
     59 static u_int rf_paritymap_nreg(RF_Raid_t *);
     60 
     61 /* Extract the current status of the parity map. */
     62 void
     63 rf_paritymap_status(struct rf_paritymap *pm, struct rf_pmstat *ps)
     64 {
     65 	memset(ps, 0, sizeof(*ps));
     66 	if (pm == NULL)
     67 		ps->enabled = 0;
     68 	else {
     69 		ps->enabled = 1;
     70 		ps->region_size = pm->region_size;
     71 		mutex_enter(&pm->lock);
     72 		memcpy(&ps->params, &pm->params, sizeof(ps->params));
     73 		memcpy(ps->dirty, pm->disk_now, sizeof(ps->dirty));
     74 		memcpy(&ps->ctrs, &pm->ctrs, sizeof(ps->ctrs));
     75 		mutex_exit(&pm->lock);
     76 	}
     77 }
     78 
     79 /*
     80  * Test whether parity in a given sector is suspected of being inconsistent
     81  * on disk (assuming that any pending I/O to it is allowed to complete).
     82  * This may be of interest to future work on parity scrubbing.
     83  */
     84 int
     85 rf_paritymap_test(struct rf_paritymap *pm, daddr_t sector)
     86 {
     87 	unsigned region = sector / pm->region_size;
     88 	int retval;
     89 
     90 	mutex_enter(&pm->lock);
     91 	retval = isset(pm->disk_boot->bits, region) ? 1 : 0;
     92 	mutex_exit(&pm->lock);
     93 	return retval;
     94 }
     95 
     96 /* To be called before a write to the RAID is submitted. */
     97 void
     98 rf_paritymap_begin(struct rf_paritymap *pm, daddr_t offset, daddr_t size)
     99 {
    100 	unsigned i, b, e;
    101 
    102 	b = offset / pm->region_size;
    103 	e = (offset + size - 1) / pm->region_size;
    104 
    105 	for (i = b; i <= e; i++)
    106 		rf_paritymap_begin_region(pm, i);
    107 }
    108 
    109 /* To be called after a write to the RAID completes. */
    110 void
    111 rf_paritymap_end(struct rf_paritymap *pm, daddr_t offset, daddr_t size)
    112 {
    113 	unsigned i, b, e;
    114 
    115 	b = offset / pm->region_size;
    116 	e = (offset + size - 1) / pm->region_size;
    117 
    118 	for (i = b; i <= e; i++)
    119 		rf_paritymap_end_region(pm, i);
    120 }
    121 
    122 void
    123 rf_paritymap_begin_region(struct rf_paritymap *pm, unsigned region)
    124 {
    125 	int needs_write;
    126 
    127 	KASSERT(region < RF_PARITYMAP_NREG);
    128 	pm->ctrs.nwrite++;
    129 
    130 	/* If it was being kept warm, deal with that. */
    131 	mutex_enter(&pm->lock);
    132 	if (pm->current->state[region] < 0)
    133 		pm->current->state[region] = 0;
    134 
    135 	/* This shouldn't happen unless RAIDOUTSTANDING is set too high. */
    136 	KASSERT(pm->current->state[region] < 127);
    137 	pm->current->state[region]++;
    138 
    139 	needs_write = isclr(pm->disk_now->bits, region);
    140 
    141 	if (needs_write) {
    142 		KASSERT(pm->current->state[region] == 1);
    143 		rf_paritymap_write_locked(pm);
    144 	}
    145 
    146 	mutex_exit(&pm->lock);
    147 }
    148 
    149 void
    150 rf_paritymap_end_region(struct rf_paritymap *pm, unsigned region)
    151 {
    152 	KASSERT(region < RF_PARITYMAP_NREG);
    153 
    154 	mutex_enter(&pm->lock);
    155 	KASSERT(pm->current->state[region] > 0);
    156 	--pm->current->state[region];
    157 
    158 	if (pm->current->state[region] <= 0) {
    159 		pm->current->state[region] = -pm->params.cooldown;
    160 		KASSERT(pm->current->state[region] <= 0);
    161 		mutex_enter(&pm->lk_flags);
    162 		if (!(pm->flags & TICKING)) {
    163 			pm->flags |= TICKING;
    164 			mutex_exit(&pm->lk_flags);
    165 			callout_schedule(&pm->ticker,
    166 			    mstohz(pm->params.tickms));
    167 		} else
    168 			mutex_exit(&pm->lk_flags);
    169 	}
    170 	mutex_exit(&pm->lock);
    171 }
    172 
    173 /*
    174  * Updates the parity map to account for any changes in current activity
    175  * and/or an ongoing parity scan, then writes it to disk with appropriate
    176  * synchronization.
    177  */
    178 void
    179 rf_paritymap_write(struct rf_paritymap *pm)
    180 {
    181 	mutex_enter(&pm->lock);
    182 	rf_paritymap_write_locked(pm);
    183 	mutex_exit(&pm->lock);
    184 }
    185 
    186 /* As above, but to be used when pm->lock is already held. */
    187 static void
    188 rf_paritymap_write_locked(struct rf_paritymap *pm)
    189 {
    190 	char w, w0;
    191 	int i, j, setting, clearing;
    192 
    193 	setting = clearing = 0;
    194 	for (i = 0; i < RF_PARITYMAP_NBYTE; i++) {
    195 		w0 = pm->disk_now->bits[i];
    196 		w = pm->disk_boot->bits[i];
    197 
    198 		for (j = 0; j < NBBY; j++)
    199 			if (pm->current->state[i * NBBY + j] != 0)
    200 				w |= 1 << j;
    201 
    202 		if (w & ~w0)
    203 			setting = 1;
    204 		if (w0 & ~w)
    205 			clearing = 1;
    206 
    207 		pm->disk_now->bits[i] = w;
    208 	}
    209 	pm->ctrs.ncachesync += setting + clearing;
    210 	pm->ctrs.nclearing += clearing;
    211 
    212 	/*
    213 	 * If bits are being set in the parity map, then a sync is
    214 	 * required afterwards, so that the regions are marked dirty
    215 	 * on disk before any writes to them take place.  If bits are
    216 	 * being cleared, then a sync is required before the write, so
    217 	 * that any writes to those regions are processed before the
    218 	 * region is marked clean.  (Synchronization is somewhat
    219 	 * overkill; a write ordering barrier would suffice, but we
    220 	 * currently have no way to express that directly.)
    221 	 */
    222 	if (clearing)
    223 		rf_sync_component_caches(pm->raid);
    224 	rf_paritymap_kern_write(pm->raid, pm->disk_now);
    225 	if (setting)
    226 		rf_sync_component_caches(pm->raid);
    227 }
    228 
    229 /* Mark all parity as being in need of rewrite. */
    230 void
    231 rf_paritymap_invalidate(struct rf_paritymap *pm)
    232 {
    233 	mutex_enter(&pm->lock);
    234 	memset(pm->disk_boot, ~(unsigned char)0,
    235 	    sizeof(struct rf_paritymap_ondisk));
    236 	mutex_exit(&pm->lock);
    237 }
    238 
    239 /* Mark all parity as being correct. */
    240 void
    241 rf_paritymap_forceclean(struct rf_paritymap *pm)
    242 {
    243 	mutex_enter(&pm->lock);
    244 	memset(pm->disk_boot, (unsigned char)0,
    245 	    sizeof(struct rf_paritymap_ondisk));
    246 	mutex_exit(&pm->lock);
    247 }
    248 
    249 /*
    250  * The cooldown callout routine just defers its work to a thread; it can't do
    251  * the parity map write itself as it would block, and although mutex-induced
    252  * blocking is permitted it seems wise to avoid tying up the softint.
    253  */
    254 static void
    255 rf_paritymap_tick(void *arg)
    256 {
    257 	struct rf_paritymap *pm = arg;
    258 
    259 	mutex_enter(&pm->lk_flags);
    260 	pm->flags |= TICKED;
    261 	mutex_exit(&pm->lk_flags);
    262 
    263 	rf_lock_mutex2(pm->raid->iodone_lock);
    264 	rf_signal_cond2(pm->raid->iodone_cv); /* XXX */
    265 	rf_unlock_mutex2(pm->raid->iodone_lock);
    266 }
    267 
    268 /*
    269  * This is where the parity cooling work (and rearming the callout if needed)
    270  * is done; the raidio thread calls it when woken up, as by the above.
    271  */
    272 void
    273 rf_paritymap_checkwork(struct rf_paritymap *pm)
    274 {
    275 	int i, zerop, progressp;
    276 
    277 	mutex_enter(&pm->lk_flags);
    278 	if (pm->flags & TICKED) {
    279 		zerop = progressp = 0;
    280 
    281 		pm->flags &= ~TICKED;
    282 		mutex_exit(&pm->lk_flags);
    283 
    284 		mutex_enter(&pm->lock);
    285 		for (i = 0; i < RF_PARITYMAP_NREG; i++) {
    286 			if (pm->current->state[i] < 0) {
    287 				progressp = 1;
    288 				pm->current->state[i]++;
    289 				if (pm->current->state[i] == 0)
    290 					zerop = 1;
    291 			}
    292 		}
    293 
    294 		if (progressp)
    295 			callout_schedule(&pm->ticker,
    296 			    mstohz(pm->params.tickms));
    297 		else {
    298 			mutex_enter(&pm->lk_flags);
    299 			pm->flags &= ~TICKING;
    300 			mutex_exit(&pm->lk_flags);
    301 		}
    302 
    303 		if (zerop)
    304 			rf_paritymap_write_locked(pm);
    305 		mutex_exit(&pm->lock);
    306 	} else
    307 		mutex_exit(&pm->lk_flags);
    308 }
    309 
    310 /*
    311  * Set parity map parameters; used both to alter parameters on the fly and to
    312  * establish their initial values.  Note that setting a parameter to 0 means
    313  * to leave the previous setting unchanged, and that if this is done for the
    314  * initial setting of "regions", then a default value will be computed based
    315  * on the RAID component size.
    316  */
    317 int
    318 rf_paritymap_set_params(struct rf_paritymap *pm,
    319     const struct rf_pmparams *params, int todisk)
    320 {
    321 	int cooldown, tickms;
    322 	u_int regions;
    323 	RF_RowCol_t col;
    324 	RF_ComponentLabel_t *clabel;
    325 	RF_Raid_t *raidPtr;
    326 
    327 	cooldown = params->cooldown != 0
    328 	    ? params->cooldown : pm->params.cooldown;
    329 	tickms = params->tickms != 0
    330 	    ? params->tickms : pm->params.tickms;
    331 	regions = params->regions != 0
    332 	    ? params->regions : pm->params.regions;
    333 
    334 	if (cooldown < 1 || cooldown > 128) {
    335 		printf("raid%d: cooldown %d out of range\n", pm->raid->raidid,
    336 		    cooldown);
    337 		return (-1);
    338 	}
    339 	if (tickms < 10) {
    340 		printf("raid%d: tick time %dms out of range\n",
    341 		    pm->raid->raidid, tickms);
    342 		return (-1);
    343 	}
    344 	if (regions == 0) {
    345 		regions = rf_paritymap_nreg(pm->raid);
    346 	} else if (regions > RF_PARITYMAP_NREG) {
    347 		printf("raid%d: region count %u too large (more than %u)\n",
    348 		    pm->raid->raidid, regions, RF_PARITYMAP_NREG);
    349 		return (-1);
    350 	}
    351 
    352 	/* XXX any currently warm parity will be used with the new tickms! */
    353 	pm->params.cooldown = cooldown;
    354 	pm->params.tickms = tickms;
    355 	/* Apply the initial region count, but do not change it after that. */
    356 	if (pm->params.regions == 0)
    357 		pm->params.regions = regions;
    358 
    359 	/* So that the newly set parameters can be tested: */
    360 	pm->ctrs.nwrite = pm->ctrs.ncachesync = pm->ctrs.nclearing = 0;
    361 
    362 	if (todisk) {
    363 		raidPtr = pm->raid;
    364 		for (col = 0; col < raidPtr->numCol; col++) {
    365 			if (RF_DEAD_DISK(raidPtr->Disks[col].status))
    366 				continue;
    367 
    368 			clabel = raidget_component_label(raidPtr, col);
    369 			clabel->parity_map_ntick = cooldown;
    370 			clabel->parity_map_tickms = tickms;
    371 			clabel->parity_map_regions = regions;
    372 
    373 			/* Don't touch the disk if it's been spared */
    374 			if (clabel->status == rf_ds_spared)
    375 				continue;
    376 
    377 			raidflush_component_label(raidPtr, col);
    378 		}
    379 
    380 		/* handle the spares too... */
    381 		for (col = 0; col < raidPtr->numSpare; col++) {
    382 			if (raidPtr->Disks[raidPtr->numCol+col].status == rf_ds_used_spare) {
    383 				clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
    384 				clabel->parity_map_ntick = cooldown;
    385 				clabel->parity_map_tickms = tickms;
    386 				clabel->parity_map_regions = regions;
    387 				raidflush_component_label(raidPtr, raidPtr->numCol+col);
    388 			}
    389 		}
    390 	}
    391 	return 0;
    392 }
    393 
    394 /*
    395  * The number of regions may not be as many as can fit into the map, because
    396  * when regions are too small, the overhead of setting parity map bits
    397  * becomes significant in comparison to the actual I/O, while the
    398  * corresponding gains in parity verification time become negligible.  Thus,
    399  * a minimum region size (defined above) is imposed.
    400  *
    401  * Note that, if the number of regions is less than the maximum, then some of
    402  * the regions will be "fictional", corresponding to no actual disk; some
    403  * parts of the code may process them as normal, but they can not ever be
    404  * written to.
    405  */
    406 static u_int
    407 rf_paritymap_nreg(RF_Raid_t *raid)
    408 {
    409 	daddr_t bytes_per_disk, nreg;
    410 
    411 	bytes_per_disk = raid->sectorsPerDisk << raid->logBytesPerSector;
    412 	nreg = bytes_per_disk / REGION_MINSIZE;
    413 	if (nreg > RF_PARITYMAP_NREG)
    414 		nreg = RF_PARITYMAP_NREG;
    415 	if (nreg < 1)
    416 		nreg = 1;
    417 
    418 	return (u_int)nreg;
    419 }
    420 
    421 /*
    422  * Initialize a parity map given specific parameters.  This neither reads nor
    423  * writes the parity map config in the component labels; for that, see below.
    424  */
    425 int
    426 rf_paritymap_init(struct rf_paritymap *pm, RF_Raid_t *raid,
    427     const struct rf_pmparams *params)
    428 {
    429 	daddr_t rstripes;
    430 	struct rf_pmparams safe;
    431 
    432 	pm->raid = raid;
    433 	pm->params.regions = 0;
    434 	if (0 != rf_paritymap_set_params(pm, params, 0)) {
    435 		/*
    436 		 * If the parameters are out-of-range, then bring the
    437 		 * parity map up with something reasonable, so that
    438 		 * the admin can at least go and fix it (or ignore it
    439 		 * entirely).
    440 		 */
    441 		safe.cooldown = DFL_COOLDOWN;
    442 		safe.tickms = DFL_TICKMS;
    443 		safe.regions = 0;
    444 
    445 		if (0 != rf_paritymap_set_params(pm, &safe, 0))
    446 			return (-1);
    447 	}
    448 
    449 	rstripes = howmany(raid->Layout.numStripe, pm->params.regions);
    450 	pm->region_size = rstripes * raid->Layout.dataSectorsPerStripe;
    451 
    452 	callout_init(&pm->ticker, CALLOUT_MPSAFE);
    453 	callout_setfunc(&pm->ticker, rf_paritymap_tick, pm);
    454 	pm->flags = 0;
    455 
    456 	pm->disk_boot = kmem_alloc(sizeof(struct rf_paritymap_ondisk),
    457 	    KM_SLEEP);
    458 	pm->disk_now = kmem_alloc(sizeof(struct rf_paritymap_ondisk),
    459 	    KM_SLEEP);
    460 	pm->current = kmem_zalloc(sizeof(struct rf_paritymap_current),
    461 	    KM_SLEEP);
    462 
    463 	rf_paritymap_kern_read(pm->raid, pm->disk_boot);
    464 	memcpy(pm->disk_now, pm->disk_boot, sizeof(*pm->disk_now));
    465 
    466 	mutex_init(&pm->lock, MUTEX_DEFAULT, IPL_NONE);
    467 	mutex_init(&pm->lk_flags, MUTEX_DEFAULT, IPL_SOFTCLOCK);
    468 
    469 	return 0;
    470 }
    471 
    472 /*
    473  * Destroys a parity map; unless "force" is set, also cleans parity for any
    474  * regions which were still in cooldown (but are not dirty on disk).
    475  */
    476 void
    477 rf_paritymap_destroy(struct rf_paritymap *pm, int force)
    478 {
    479 	int i;
    480 
    481 	callout_halt(&pm->ticker, NULL); /* XXX stop? halt? */
    482 	callout_destroy(&pm->ticker);
    483 
    484 	if (!force) {
    485 		for (i = 0; i < RF_PARITYMAP_NREG; i++) {
    486 			/* XXX check for > 0 ? */
    487 			if (pm->current->state[i] < 0)
    488 				pm->current->state[i] = 0;
    489 		}
    490 
    491 		rf_paritymap_write_locked(pm);
    492 	}
    493 
    494 	mutex_destroy(&pm->lock);
    495 	mutex_destroy(&pm->lk_flags);
    496 
    497 	kmem_free(pm->disk_boot, sizeof(struct rf_paritymap_ondisk));
    498 	kmem_free(pm->disk_now, sizeof(struct rf_paritymap_ondisk));
    499 	kmem_free(pm->current, sizeof(struct rf_paritymap_current));
    500 }
    501 
    502 /*
    503  * Rewrite parity, taking parity map into account; this is the equivalent of
    504  * the old rf_RewriteParity, and is likewise to be called from a suitable
    505  * thread and shouldn't have multiple copies running in parallel and so on.
    506  *
    507  * Note that the fictional regions are "cleaned" in one shot, so that very
    508  * small RAIDs (useful for testing) will not experience potentially severe
    509  * regressions in rewrite time.
    510  */
    511 int
    512 rf_paritymap_rewrite(struct rf_paritymap *pm)
    513 {
    514 	int i, ret_val = 0;
    515 	daddr_t reg_b, reg_e;
    516 
    517 	/* Process only the actual regions. */
    518 	for (i = 0; i < pm->params.regions; i++) {
    519 		mutex_enter(&pm->lock);
    520 		if (isset(pm->disk_boot->bits, i)) {
    521 			mutex_exit(&pm->lock);
    522 
    523 			reg_b = i * pm->region_size;
    524 			reg_e = reg_b + pm->region_size;
    525 			if (reg_e > pm->raid->totalSectors)
    526 				reg_e = pm->raid->totalSectors;
    527 
    528 			if (rf_RewriteParityRange(pm->raid, reg_b,
    529 			    reg_e - reg_b)) {
    530 				ret_val = 1;
    531 				if (pm->raid->waitShutdown)
    532 					return ret_val;
    533 			} else {
    534 				mutex_enter(&pm->lock);
    535 				clrbit(pm->disk_boot->bits, i);
    536 				rf_paritymap_write_locked(pm);
    537 				mutex_exit(&pm->lock);
    538 			}
    539 		} else {
    540 			mutex_exit(&pm->lock);
    541 		}
    542 	}
    543 
    544 	/* Now, clear the fictional regions, if any. */
    545 	rf_paritymap_forceclean(pm);
    546 	rf_paritymap_write(pm);
    547 
    548 	return ret_val;
    549 }
    550 
    551 /*
    552  * How to merge the on-disk parity maps when reading them in from the
    553  * various components; returns whether they differ.  In the case that
    554  * they do differ, sets *dst to the union of *dst and *src.
    555  *
    556  * In theory, it should be safe to take the intersection (or just pick
    557  * a single component arbitrarily), but the paranoid approach costs
    558  * little.
    559  *
    560  * Appropriate locking, if any, is the responsibility of the caller.
    561  */
    562 int
    563 rf_paritymap_merge(struct rf_paritymap_ondisk *dst,
    564     struct rf_paritymap_ondisk *src)
    565 {
    566 	int i, discrep = 0;
    567 
    568 	for (i = 0; i < RF_PARITYMAP_NBYTE; i++) {
    569 		if (dst->bits[i] != src->bits[i])
    570 			discrep = 1;
    571 		dst->bits[i] |= src->bits[i];
    572 	}
    573 
    574 	return discrep;
    575 }
    576 
    577 /*
    578  * Detach a parity map from its RAID.  This is not meant to be applied except
    579  * when unconfiguring the RAID after all I/O has been resolved, as otherwise
    580  * an out-of-date parity map could be treated as current.
    581  */
    582 void
    583 rf_paritymap_detach(RF_Raid_t *raidPtr)
    584 {
    585 	if (raidPtr->parity_map == NULL)
    586 		return;
    587 
    588 	rf_lock_mutex2(raidPtr->iodone_lock);
    589 	struct rf_paritymap *pm = raidPtr->parity_map;
    590 	raidPtr->parity_map = NULL;
    591 	rf_unlock_mutex2(raidPtr->iodone_lock);
    592 	/* XXXjld is that enough locking?  Or too much? */
    593 	rf_paritymap_destroy(pm, 0);
    594 	kmem_free(pm, sizeof(*pm));
    595 }
    596 
    597 /*
    598  * Is this RAID set ineligible for parity-map use due to not actually
    599  * having any parity?  (If so, rf_paritymap_attach is a no-op, but
    600  * rf_paritymap_{get,set}_disable will still pointlessly act on the
    601  * component labels.)
    602  */
    603 int
    604 rf_paritymap_ineligible(RF_Raid_t *raidPtr)
    605 {
    606 	return raidPtr->Layout.map->faultsTolerated == 0;
    607 }
    608 
    609 /*
    610  * Attach a parity map to a RAID set if appropriate.  Includes
    611  * configure-time processing of parity-map fields of component label.
    612  */
    613 void
    614 rf_paritymap_attach(RF_Raid_t *raidPtr, int force)
    615 {
    616 	RF_RowCol_t col;
    617 	int pm_use, pm_zap;
    618 	int g_tickms, g_ntick, g_regions;
    619 	int good;
    620 	RF_ComponentLabel_t *clabel;
    621 	u_int flags, regions;
    622 	struct rf_pmparams params;
    623 
    624 	if (rf_paritymap_ineligible(raidPtr)) {
    625 		/* There isn't any parity. */
    626 		return;
    627 	}
    628 
    629 	pm_use = 1;
    630 	pm_zap = 0;
    631 	g_tickms = DFL_TICKMS;
    632 	g_ntick = DFL_COOLDOWN;
    633 	g_regions = 0;
    634 
    635 	/*
    636 	 * Collect opinions on the set config.  If this is the initial
    637 	 * config (raidctl -C), treat all labels as invalid, since
    638 	 * there may be random data present.
    639 	 */
    640 	if (!force) {
    641 		for (col = 0; col < raidPtr->numCol; col++) {
    642 			if (RF_DEAD_DISK(raidPtr->Disks[col].status))
    643 				continue;
    644 			clabel = raidget_component_label(raidPtr, col);
    645 			flags = clabel->parity_map_flags;
    646 			/* Check for use by non-parity-map kernel. */
    647 			if (clabel->parity_map_modcount
    648 			    != clabel->mod_counter) {
    649 				flags &= ~RF_PMLABEL_WASUSED;
    650 			}
    651 
    652 			if (flags & RF_PMLABEL_VALID) {
    653 				g_tickms = clabel->parity_map_tickms;
    654 				g_ntick = clabel->parity_map_ntick;
    655 				regions = clabel->parity_map_regions;
    656 				if (g_regions == 0)
    657 					g_regions = regions;
    658 				else if (g_regions != regions) {
    659 					pm_zap = 1; /* important! */
    660 				}
    661 
    662 				if (flags & RF_PMLABEL_DISABLE) {
    663 					pm_use = 0;
    664 				}
    665 				if (!(flags & RF_PMLABEL_WASUSED)) {
    666 					pm_zap = 1;
    667 				}
    668 			} else {
    669 				pm_zap = 1;
    670 			}
    671 		}
    672 	} else {
    673 		pm_zap = 1;
    674 	}
    675 
    676 	/* Finally, create and attach the parity map. */
    677 	if (pm_use) {
    678 		params.cooldown = g_ntick;
    679 		params.tickms = g_tickms;
    680 		params.regions = g_regions;
    681 
    682 		raidPtr->parity_map = kmem_alloc(sizeof(struct rf_paritymap),
    683 		    KM_SLEEP);
    684 		if (0 != rf_paritymap_init(raidPtr->parity_map, raidPtr,
    685 			&params)) {
    686 			/* It failed; do without. */
    687 			kmem_free(raidPtr->parity_map,
    688 			    sizeof(struct rf_paritymap));
    689 			raidPtr->parity_map = NULL;
    690 			return;
    691 		}
    692 
    693 		if (g_regions == 0)
    694 			/* Pick up the autoconfigured region count. */
    695 			g_regions = raidPtr->parity_map->params.regions;
    696 
    697 		if (pm_zap) {
    698 			good = raidPtr->parity_good && !force;
    699 
    700 			if (good)
    701 				rf_paritymap_forceclean(raidPtr->parity_map);
    702 			else
    703 				rf_paritymap_invalidate(raidPtr->parity_map);
    704 			/* This needs to be on disk before WASUSED is set. */
    705 			rf_paritymap_write(raidPtr->parity_map);
    706 		}
    707 	}
    708 
    709 	/* Alter labels in-core to reflect the current view of things. */
    710 	for (col = 0; col < raidPtr->numCol; col++) {
    711 		if (RF_DEAD_DISK(raidPtr->Disks[col].status))
    712 			continue;
    713 		clabel = raidget_component_label(raidPtr, col);
    714 
    715 		if (pm_use)
    716 			flags = RF_PMLABEL_VALID | RF_PMLABEL_WASUSED;
    717 		else
    718 			flags = RF_PMLABEL_VALID | RF_PMLABEL_DISABLE;
    719 
    720 		clabel->parity_map_flags = flags;
    721 		clabel->parity_map_tickms = g_tickms;
    722 		clabel->parity_map_ntick = g_ntick;
    723 		clabel->parity_map_regions = g_regions;
    724 		raidflush_component_label(raidPtr, col);
    725 	}
    726 	/* Note that we're just in 'attach' here, and there won't
    727 	   be any spare disks at this point. */
    728 }
    729 
    730 /*
    731  * For initializing the parity-map fields of a component label, both on
    732  * initial creation and on reconstruct/copyback/etc.  */
    733 void
    734 rf_paritymap_init_label(struct rf_paritymap *pm, RF_ComponentLabel_t *clabel)
    735 {
    736 	if (pm != NULL) {
    737 		clabel->parity_map_flags =
    738 		    RF_PMLABEL_VALID | RF_PMLABEL_WASUSED;
    739 		clabel->parity_map_tickms = pm->params.tickms;
    740 		clabel->parity_map_ntick = pm->params.cooldown;
    741 		/*
    742 		 * XXXjld: If the number of regions is changed on disk, and
    743 		 * then a new component is labeled before the next configure,
    744 		 * then it will get the old value and they will conflict on
    745 		 * the next boot (and the default will be used instead).
    746 		 */
    747 		clabel->parity_map_regions = pm->params.regions;
    748 	} else {
    749 		/*
    750 		 * XXXjld: if the map is disabled, and all the components are
    751 		 * replaced without an intervening unconfigure/reconfigure,
    752 		 * then it will become enabled on the next unconfig/reconfig.
    753 		 */
    754 	}
    755 }
    756 
    757 
    758 /* Will the parity map be disabled next time? */
    759 int
    760 rf_paritymap_get_disable(RF_Raid_t *raidPtr)
    761 {
    762 	RF_ComponentLabel_t *clabel;
    763 	RF_RowCol_t col;
    764 	int dis;
    765 
    766 	dis = 0;
    767 	for (col = 0; col < raidPtr->numCol; col++) {
    768 		if (RF_DEAD_DISK(raidPtr->Disks[col].status))
    769 			continue;
    770 		clabel = raidget_component_label(raidPtr, col);
    771 		if (clabel->parity_map_flags & RF_PMLABEL_DISABLE)
    772 			dis = 1;
    773 	}
    774         for (col = 0; col < raidPtr->numSpare; col++) {
    775 		if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare)
    776                         continue;
    777                 clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
    778                 if (clabel->parity_map_flags & RF_PMLABEL_DISABLE)
    779                         dis = 1;
    780         }
    781 
    782 	return dis;
    783 }
    784 
    785 /* Set whether the parity map will be disabled next time. */
    786 void
    787 rf_paritymap_set_disable(RF_Raid_t *raidPtr, int dis)
    788 {
    789 	RF_ComponentLabel_t *clabel;
    790 	RF_RowCol_t col;
    791 
    792 	for (col = 0; col < raidPtr->numCol; col++) {
    793 		if (RF_DEAD_DISK(raidPtr->Disks[col].status))
    794 			continue;
    795 		clabel = raidget_component_label(raidPtr, col);
    796 		if (dis)
    797 			clabel->parity_map_flags |= RF_PMLABEL_DISABLE;
    798 		else
    799 			clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE;
    800 		raidflush_component_label(raidPtr, col);
    801 	}
    802 
    803 	/* update any used spares as well */
    804 	for (col = 0; col < raidPtr->numSpare; col++) {
    805 		if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare)
    806 			continue;
    807 
    808 		clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
    809 		if (dis)
    810 			clabel->parity_map_flags |= RF_PMLABEL_DISABLE;
    811 		else
    812 			clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE;
    813 		raidflush_component_label(raidPtr, raidPtr->numCol+col);
    814 	}
    815 }
    816