Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.83.2.2
      1 /*	$NetBSD: rf_disks.c,v 1.83.2.2 2014/08/20 00:03:49 tls Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Copyright (c) 1995 Carnegie-Mellon University.
     33  * All rights reserved.
     34  *
     35  * Author: Mark Holland
     36  *
     37  * Permission to use, copy, modify and distribute this software and
     38  * its documentation is hereby granted, provided that both the copyright
     39  * notice and this permission notice appear in all copies of the
     40  * software, derivative works or modified versions, and any portions
     41  * thereof, and that both notices appear in supporting documentation.
     42  *
     43  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     44  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     45  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     46  *
     47  * Carnegie Mellon requests users of this software to return to
     48  *
     49  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     50  *  School of Computer Science
     51  *  Carnegie Mellon University
     52  *  Pittsburgh PA 15213-3890
     53  *
     54  * any improvements or extensions that they make and grant Carnegie the
     55  * rights to redistribute these changes.
     56  */
     57 
     58 /***************************************************************
     59  * rf_disks.c -- code to perform operations on the actual disks
     60  ***************************************************************/
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.83.2.2 2014/08/20 00:03:49 tls Exp $");
     64 
     65 #include <dev/raidframe/raidframevar.h>
     66 
     67 #include "rf_raid.h"
     68 #include "rf_alloclist.h"
     69 #include "rf_utils.h"
     70 #include "rf_general.h"
     71 #include "rf_options.h"
     72 #include "rf_kintf.h"
     73 #include "rf_netbsd.h"
     74 
     75 #include <sys/param.h>
     76 #include <sys/systm.h>
     77 #include <sys/proc.h>
     78 #include <sys/ioctl.h>
     79 #include <sys/fcntl.h>
     80 #include <sys/vnode.h>
     81 #include <sys/namei.h> /* for pathbuf */
     82 #include <sys/kauth.h>
     83 #include <sys/atomic.h>
     84 #include <sys/disk.h>
     85 
     86 #include <miscfs/specfs/specdev.h> /* for v_rdev */
     87 
     88 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     89 static void rf_print_label_status( RF_Raid_t *, int, char *,
     90 				  RF_ComponentLabel_t *);
     91 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     92 				  RF_ComponentLabel_t *, int, int );
     93 
     94 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     95 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     96 
     97 /**************************************************************************
     98  *
     99  * initialize the disks comprising the array
    100  *
    101  * We want the spare disks to have regular row,col numbers so that we can
    102  * easily substitue a spare for a failed disk.  But, the driver code assumes
    103  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    104  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    105  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    106  * rest, and put all the spares in it.  This probably needs to get changed
    107  * eventually.
    108  *
    109  **************************************************************************/
    110 
    111 int
    112 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    113 		  RF_Config_t *cfgPtr)
    114 {
    115 	RF_RaidDisk_t *disks;
    116 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    117 	RF_RowCol_t c;
    118 	int bs, ret;
    119 	unsigned i, count, foundone = 0, numFailuresThisRow;
    120 	int force;
    121 
    122 	force = cfgPtr->force;
    123 
    124 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    125 	if (ret)
    126 		goto fail;
    127 
    128 	disks = raidPtr->Disks;
    129 
    130 	numFailuresThisRow = 0;
    131 	for (c = 0; c < raidPtr->numCol; c++) {
    132 		ret = rf_ConfigureDisk(raidPtr,
    133 				       &cfgPtr->devnames[0][c][0],
    134 				       &disks[c], c);
    135 
    136 		if (ret)
    137 			goto fail;
    138 
    139 		if (disks[c].status == rf_ds_optimal) {
    140 			ret = raidfetch_component_label(raidPtr, c);
    141 			if (ret)
    142 				goto fail;
    143 
    144 			/* mark it as failed if the label looks bogus... */
    145 			if (!rf_reasonable_label(&raidPtr->raid_cinfo[c].ci_label,0) && !force) {
    146 				disks[c].status = rf_ds_failed;
    147 			}
    148 		}
    149 
    150 		if (disks[c].status != rf_ds_optimal) {
    151 			numFailuresThisRow++;
    152 		} else {
    153 			if (disks[c].numBlocks < min_numblks)
    154 				min_numblks = disks[c].numBlocks;
    155 			DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
    156 				 c, disks[c].devname,
    157 				 disks[c].numBlocks,
    158 				 disks[c].blockSize,
    159 				 (long int) disks[c].numBlocks *
    160 				 disks[c].blockSize / 1024 / 1024);
    161 		}
    162 	}
    163 	/* XXX fix for n-fault tolerant */
    164 	/* XXX this should probably check to see how many failures
    165 	   we can handle for this configuration! */
    166 	if (numFailuresThisRow > 0)
    167 		raidPtr->status = rf_rs_degraded;
    168 
    169 	/* all disks must be the same size & have the same block size, bs must
    170 	 * be a power of 2 */
    171 	bs = 0;
    172 	foundone = 0;
    173 	for (c = 0; c < raidPtr->numCol; c++) {
    174 		if (disks[c].status == rf_ds_optimal) {
    175 			bs = disks[c].blockSize;
    176 			foundone = 1;
    177 			break;
    178 		}
    179 	}
    180 	if (!foundone) {
    181 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    182 		ret = EINVAL;
    183 		goto fail;
    184 	}
    185 	for (count = 0, i = 1; i; i <<= 1)
    186 		if (bs & i)
    187 			count++;
    188 	if (count != 1) {
    189 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    190 		ret = EINVAL;
    191 		goto fail;
    192 	}
    193 
    194 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    195 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    196 		if (force != 0) {
    197 			printf("raid%d: Fatal errors being ignored.\n",
    198 			       raidPtr->raidid);
    199 		} else {
    200 			ret = EINVAL;
    201 			goto fail;
    202 		}
    203 	}
    204 
    205 	for (c = 0; c < raidPtr->numCol; c++) {
    206 		if (disks[c].status == rf_ds_optimal) {
    207 			if (disks[c].blockSize != bs) {
    208 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
    209 				ret = EINVAL;
    210 				goto fail;
    211 			}
    212 			if (disks[c].numBlocks != min_numblks) {
    213 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
    214 					     c, (int) min_numblks);
    215 				disks[c].numBlocks = min_numblks;
    216 			}
    217 		}
    218 	}
    219 
    220 	raidPtr->sectorsPerDisk = min_numblks;
    221 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    222 	raidPtr->bytesPerSector = bs;
    223 	raidPtr->sectorMask = bs - 1;
    224 	return (0);
    225 
    226 fail:
    227 
    228 	rf_UnconfigureVnodes( raidPtr );
    229 
    230 	return (ret);
    231 }
    232 
    233 
    234 /****************************************************************************
    235  * set up the data structures describing the spare disks in the array
    236  * recall from the above comment that the spare disk descriptors are stored
    237  * in row zero, which is specially expanded to hold them.
    238  ****************************************************************************/
    239 int
    240 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    241 		       RF_Config_t *cfgPtr)
    242 {
    243 	int     i, ret;
    244 	unsigned int bs;
    245 	RF_RaidDisk_t *disks;
    246 	int     num_spares_done;
    247 
    248 	num_spares_done = 0;
    249 
    250 	/* The space for the spares should have already been allocated by
    251 	 * ConfigureDisks() */
    252 
    253 	disks = &raidPtr->Disks[raidPtr->numCol];
    254 	for (i = 0; i < raidPtr->numSpare; i++) {
    255 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    256 				       &disks[i], raidPtr->numCol + i);
    257 		if (ret)
    258 			goto fail;
    259 		if (disks[i].status != rf_ds_optimal) {
    260 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    261 				     &cfgPtr->spare_names[i][0]);
    262 		} else {
    263 			disks[i].status = rf_ds_spare;	/* change status to
    264 							 * spare */
    265 			DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
    266 			    disks[i].devname,
    267 			    disks[i].numBlocks, disks[i].blockSize,
    268 			    (long int) disks[i].numBlocks *
    269 				 disks[i].blockSize / 1024 / 1024);
    270 		}
    271 		num_spares_done++;
    272 	}
    273 
    274 	/* check sizes and block sizes on spare disks */
    275 	bs = 1 << raidPtr->logBytesPerSector;
    276 	for (i = 0; i < raidPtr->numSpare; i++) {
    277 		if (disks[i].blockSize != bs) {
    278 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    279 			ret = EINVAL;
    280 			goto fail;
    281 		}
    282 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    283 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
    284 				     disks[i].devname, disks[i].blockSize,
    285 				     raidPtr->sectorsPerDisk);
    286 			ret = EINVAL;
    287 			goto fail;
    288 		} else
    289 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    290 				RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
    291 				    disks[i].devname,
    292 				    raidPtr->sectorsPerDisk,
    293 				    disks[i].numBlocks);
    294 
    295 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    296 			}
    297 	}
    298 
    299 	return (0);
    300 
    301 fail:
    302 
    303 	/* Release the hold on the main components.  We've failed to allocate
    304 	 * a spare, and since we're failing, we need to free things..
    305 
    306 	 XXX failing to allocate a spare is *not* that big of a deal...
    307 	 We *can* survive without it, if need be, esp. if we get hot
    308 	 adding working.
    309 
    310 	 If we don't fail out here, then we need a way to remove this spare...
    311 	 that should be easier to do here than if we are "live"...
    312 
    313 	 */
    314 
    315 	rf_UnconfigureVnodes( raidPtr );
    316 
    317 	return (ret);
    318 }
    319 
    320 static int
    321 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    322 {
    323 	int ret;
    324 
    325 	/* We allocate RF_MAXSPARE on the first row so that we
    326 	   have room to do hot-swapping of spares */
    327 	RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
    328 			sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    329 			raidPtr->cleanupList);
    330 	if (raidPtr->Disks == NULL) {
    331 		ret = ENOMEM;
    332 		goto fail;
    333 	}
    334 
    335 	/* get space for device specific stuff.. */
    336 	RF_MallocAndAdd(raidPtr->raid_cinfo,
    337 			(raidPtr->numCol + RF_MAXSPARE) *
    338 			sizeof(struct raidcinfo), (struct raidcinfo *),
    339 			raidPtr->cleanupList);
    340 
    341 	if (raidPtr->raid_cinfo == NULL) {
    342 		ret = ENOMEM;
    343 		goto fail;
    344 	}
    345 
    346 	return(0);
    347 fail:
    348 	rf_UnconfigureVnodes( raidPtr );
    349 
    350 	return(ret);
    351 }
    352 
    353 
    354 /* configure a single disk during auto-configuration at boot */
    355 int
    356 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
    357 		      RF_AutoConfig_t *auto_config)
    358 {
    359 	RF_RaidDisk_t *disks;
    360 	RF_RaidDisk_t *diskPtr;
    361 	RF_RowCol_t c;
    362 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    363 	int bs, ret;
    364 	int numFailuresThisRow;
    365 	RF_AutoConfig_t *ac;
    366 	int parity_good;
    367 	int mod_counter;
    368 	int mod_counter_found;
    369 
    370 #if DEBUG
    371 	printf("Starting autoconfiguration of RAID set...\n");
    372 #endif
    373 
    374 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    375 	if (ret)
    376 		goto fail;
    377 
    378 	disks = raidPtr->Disks;
    379 
    380 	/* assume the parity will be fine.. */
    381 	parity_good = RF_RAID_CLEAN;
    382 
    383 	/* Check for mod_counters that are too low */
    384 	mod_counter_found = 0;
    385 	mod_counter = 0;
    386 	ac = auto_config;
    387 	while(ac!=NULL) {
    388 		if (mod_counter_found==0) {
    389 			mod_counter = ac->clabel->mod_counter;
    390 			mod_counter_found = 1;
    391 		} else {
    392 			if (ac->clabel->mod_counter > mod_counter) {
    393 				mod_counter = ac->clabel->mod_counter;
    394 			}
    395 		}
    396 		ac->flag = 0; /* clear the general purpose flag */
    397 		ac = ac->next;
    398 	}
    399 
    400 	bs = 0;
    401 
    402 	numFailuresThisRow = 0;
    403 	for (c = 0; c < raidPtr->numCol; c++) {
    404 		diskPtr = &disks[c];
    405 
    406 		/* find this row/col in the autoconfig */
    407 #if DEBUG
    408 		printf("Looking for %d in autoconfig\n",c);
    409 #endif
    410 		ac = auto_config;
    411 		while(ac!=NULL) {
    412 			if (ac->clabel==NULL) {
    413 				/* big-time bad news. */
    414 				goto fail;
    415 			}
    416 			if ((ac->clabel->column == c) &&
    417 			    (ac->clabel->mod_counter == mod_counter)) {
    418 				/* it's this one... */
    419 				/* flag it as 'used', so we don't
    420 				   free it later. */
    421 				ac->flag = 1;
    422 #if DEBUG
    423 				printf("Found: %s at %d\n",
    424 				       ac->devname,c);
    425 #endif
    426 
    427 				break;
    428 			}
    429 			ac=ac->next;
    430 		}
    431 
    432 		if (ac==NULL) {
    433 			/* we didn't find an exact match with a
    434 			   correct mod_counter above... can we find
    435 			   one with an incorrect mod_counter to use
    436 			   instead?  (this one, if we find it, will be
    437 			   marked as failed once the set configures)
    438 			*/
    439 
    440 			ac = auto_config;
    441 			while(ac!=NULL) {
    442 				if (ac->clabel==NULL) {
    443 					/* big-time bad news. */
    444 					goto fail;
    445 				}
    446 				if (ac->clabel->column == c) {
    447 					/* it's this one...
    448 					   flag it as 'used', so we
    449 					   don't free it later. */
    450 					ac->flag = 1;
    451 #if DEBUG
    452 					printf("Found(low mod_counter): %s at %d\n",
    453 					       ac->devname,c);
    454 #endif
    455 
    456 					break;
    457 				}
    458 				ac=ac->next;
    459 			}
    460 		}
    461 
    462 
    463 
    464 		if (ac!=NULL) {
    465 			/* Found it.  Configure it.. */
    466 			diskPtr->blockSize = ac->clabel->blockSize;
    467 			diskPtr->numBlocks =
    468 			    rf_component_label_numblocks(ac->clabel);
    469 			/* Note: rf_protectedSectors is already
    470 			   factored into numBlocks here */
    471 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
    472 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
    473 
    474 			memcpy(raidget_component_label(raidPtr, c),
    475 			    ac->clabel, sizeof(*ac->clabel));
    476 			snprintf(diskPtr->devname, sizeof(diskPtr->devname),
    477 			    "/dev/%s", ac->devname);
    478 
    479 			/* note the fact that this component was
    480 			   autoconfigured.  You'll need this info
    481 			   later.  Trust me :) */
    482 			diskPtr->auto_configured = 1;
    483 			diskPtr->dev = ac->dev;
    484 
    485 			/*
    486 			 * we allow the user to specify that
    487 			 * only a fraction of the disks should
    488 			 * be used this is just for debug: it
    489 			 * speeds up the parity scan
    490 			 */
    491 
    492 			diskPtr->numBlocks = diskPtr->numBlocks *
    493 				rf_sizePercentage / 100;
    494 
    495 			/* XXX these will get set multiple times,
    496 			   but since we're autoconfiguring, they'd
    497 			   better be always the same each time!
    498 			   If not, this is the least of your worries */
    499 
    500 			bs = diskPtr->blockSize;
    501 			min_numblks = diskPtr->numBlocks;
    502 
    503 			/* this gets done multiple times, but that's
    504 			   fine -- the serial number will be the same
    505 			   for all components, guaranteed */
    506 			raidPtr->serial_number = ac->clabel->serial_number;
    507 			/* check the last time the label was modified */
    508 
    509 			if (ac->clabel->mod_counter != mod_counter) {
    510 				/* Even though we've filled in all of
    511 				   the above, we don't trust this
    512 				   component since it's modification
    513 				   counter is not in sync with the
    514 				   rest, and we really consider it to
    515 				   be failed.  */
    516 				disks[c].status = rf_ds_failed;
    517 				numFailuresThisRow++;
    518 			} else {
    519 				if (ac->clabel->clean != RF_RAID_CLEAN) {
    520 					parity_good = RF_RAID_DIRTY;
    521 				}
    522 			}
    523 		} else {
    524 			/* Didn't find it at all!!  Component must
    525 			   really be dead */
    526 			disks[c].status = rf_ds_failed;
    527 			snprintf(disks[c].devname, sizeof(disks[c].devname),
    528 			    "component%d", c);
    529 			numFailuresThisRow++;
    530 		}
    531 	}
    532 	/* XXX fix for n-fault tolerant */
    533 	/* XXX this should probably check to see how many failures
    534 	   we can handle for this configuration! */
    535 	if (numFailuresThisRow > 0) {
    536 		raidPtr->status = rf_rs_degraded;
    537 		raidPtr->numFailures = numFailuresThisRow;
    538 	}
    539 
    540 	/* close the device for the ones that didn't get used */
    541 
    542 	ac = auto_config;
    543 	while(ac!=NULL) {
    544 		if (ac->flag == 0) {
    545 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
    546 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
    547 			vput(ac->vp);
    548 			ac->vp = NULL;
    549 #if DEBUG
    550 			printf("Released %s from auto-config set.\n",
    551 			       ac->devname);
    552 #endif
    553 		}
    554 		ac = ac->next;
    555 	}
    556 
    557 	raidPtr->mod_counter = mod_counter;
    558 
    559 	/* note the state of the parity, if any */
    560 	raidPtr->parity_good = parity_good;
    561 	raidPtr->sectorsPerDisk = min_numblks;
    562 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    563 	raidPtr->bytesPerSector = bs;
    564 	raidPtr->sectorMask = bs - 1;
    565 	return (0);
    566 
    567 fail:
    568 
    569 	rf_UnconfigureVnodes( raidPtr );
    570 
    571 	return (ret);
    572 
    573 }
    574 
    575 /* configure a single disk in the array */
    576 int
    577 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
    578 		 RF_RowCol_t col)
    579 {
    580 	char   *p;
    581 	struct pathbuf *pb;
    582 	struct vnode *vp;
    583 	int     error;
    584 
    585 	p = rf_find_non_white(bf);
    586 	if (p[strlen(p) - 1] == '\n') {
    587 		/* strip off the newline */
    588 		p[strlen(p) - 1] = '\0';
    589 	}
    590 	(void) strcpy(diskPtr->devname, p);
    591 
    592 	/* Let's start by claiming the component is fine and well... */
    593 	diskPtr->status = rf_ds_optimal;
    594 
    595 	raidPtr->raid_cinfo[col].ci_vp = NULL;
    596 	raidPtr->raid_cinfo[col].ci_dev = 0;
    597 
    598 	if (!strcmp("absent", diskPtr->devname)) {
    599 		printf("Ignoring missing component at column %d\n", col);
    600 		snprintf(diskPtr->devname, sizeof(diskPtr->devname),
    601 		    "component%d", col);
    602 		diskPtr->status = rf_ds_failed;
    603 		return (0);
    604 	}
    605 
    606 	pb = pathbuf_create(diskPtr->devname);
    607 	if (pb == NULL) {
    608 		printf("pathbuf_create for device: %s failed!\n",
    609 		       diskPtr->devname);
    610 		return ENOMEM;
    611 	}
    612 	error = dk_lookup(pb, curlwp, &vp);
    613 	pathbuf_destroy(pb);
    614 	if (error) {
    615 		printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
    616 		if (error == ENXIO) {
    617 			/* the component isn't there... must be dead :-( */
    618 			diskPtr->status = rf_ds_failed;
    619 			return 0;
    620 		} else {
    621 			return (error);
    622 		}
    623 	}
    624 
    625 	if ((error = rf_getdisksize(vp, diskPtr)) != 0)
    626 		return (error);
    627 
    628 	/*
    629 	 * If this raidPtr's bytesPerSector is zero, fill it in with this
    630 	 * components blockSize.  This will give us something to work with
    631 	 * initially, and if it is wrong, we'll get errors later.
    632 	 */
    633 	if (raidPtr->bytesPerSector == 0)
    634 		raidPtr->bytesPerSector = diskPtr->blockSize;
    635 
    636 	if (diskPtr->status == rf_ds_optimal) {
    637 		raidPtr->raid_cinfo[col].ci_vp = vp;
    638 		raidPtr->raid_cinfo[col].ci_dev = vp->v_rdev;
    639 
    640 		/* This component was not automatically configured */
    641 		diskPtr->auto_configured = 0;
    642 		diskPtr->dev = vp->v_rdev;
    643 
    644 		/* we allow the user to specify that only a fraction of the
    645 		 * disks should be used this is just for debug:  it speeds up
    646 		 * the parity scan */
    647 		diskPtr->numBlocks = diskPtr->numBlocks *
    648 			rf_sizePercentage / 100;
    649 	}
    650 
    651 	/*
    652 	 * Tell the rest of the kernel to check whether anything's
    653 	 * maximum transfer size has changed -- like, for example,
    654 	 * a filesystem that might be mounted on a set where we're
    655 	 * adding a spare with a smaller maximum transfer size than
    656 	 * the original set members.
    657 	 */
    658 	atomic_inc_uint(&disk_serial);
    659 	return (0);
    660 }
    661 
    662 static void
    663 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
    664 		      RF_ComponentLabel_t *ci_label)
    665 {
    666 
    667 	printf("raid%d: Component %s being configured at col: %d\n",
    668 	       raidPtr->raidid, dev_name, column );
    669 	printf("         Column: %d Num Columns: %d\n",
    670 	       ci_label->column,
    671 	       ci_label->num_columns);
    672 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    673 	       ci_label->version, ci_label->serial_number,
    674 	       ci_label->mod_counter);
    675 	printf("         Clean: %s Status: %d\n",
    676 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    677 }
    678 
    679 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
    680 				 char *dev_name, RF_ComponentLabel_t *ci_label,
    681 				 int serial_number, int mod_counter)
    682 {
    683 	int fatal_error = 0;
    684 
    685 	if (serial_number != ci_label->serial_number) {
    686 		printf("%s has a different serial number: %d %d\n",
    687 		       dev_name, serial_number, ci_label->serial_number);
    688 		fatal_error = 1;
    689 	}
    690 	if (mod_counter != ci_label->mod_counter) {
    691 		printf("%s has a different modification count: %d %d\n",
    692 		       dev_name, mod_counter, ci_label->mod_counter);
    693 	}
    694 
    695 	if (row != ci_label->row) {
    696 		printf("Row out of alignment for: %s\n", dev_name);
    697 		fatal_error = 1;
    698 	}
    699 	if (column != ci_label->column) {
    700 		printf("Column out of alignment for: %s\n", dev_name);
    701 		fatal_error = 1;
    702 	}
    703 	if (raidPtr->numCol != ci_label->num_columns) {
    704 		printf("Number of columns do not match for: %s\n", dev_name);
    705 		fatal_error = 1;
    706 	}
    707 	if (ci_label->clean == 0) {
    708 		/* it's not clean, but that's not fatal */
    709 		printf("%s is not clean!\n", dev_name);
    710 	}
    711 	return(fatal_error);
    712 }
    713 
    714 
    715 /*
    716 
    717    rf_CheckLabels() - check all the component labels for consistency.
    718    Return an error if there is anything major amiss.
    719 
    720  */
    721 
    722 int
    723 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    724 {
    725 	int c;
    726 	char *dev_name;
    727 	RF_ComponentLabel_t *ci_label;
    728 	int serial_number = 0;
    729 	int mod_number = 0;
    730 	int fatal_error = 0;
    731 	int mod_values[4];
    732 	int mod_count[4];
    733 	int ser_values[4];
    734 	int ser_count[4];
    735 	int num_ser;
    736 	int num_mod;
    737 	int i;
    738 	int found;
    739 	int hosed_column;
    740 	int too_fatal;
    741 	int parity_good;
    742 	int force;
    743 
    744 	hosed_column = -1;
    745 	too_fatal = 0;
    746 	force = cfgPtr->force;
    747 
    748 	/*
    749 	   We're going to try to be a little intelligent here.  If one
    750 	   component's label is bogus, and we can identify that it's the
    751 	   *only* one that's gone, we'll mark it as "failed" and allow
    752 	   the configuration to proceed.  This will be the *only* case
    753 	   that we'll proceed if there would be (otherwise) fatal errors.
    754 
    755 	   Basically we simply keep a count of how many components had
    756 	   what serial number.  If all but one agree, we simply mark
    757 	   the disagreeing component as being failed, and allow
    758 	   things to come up "normally".
    759 
    760 	   We do this first for serial numbers, and then for "mod_counter".
    761 
    762 	 */
    763 
    764 	num_ser = 0;
    765 	num_mod = 0;
    766 
    767 	ser_values[0] = ser_values[1] = ser_values[2] = ser_values[3] = 0;
    768 	ser_count[0] = ser_count[1] = ser_count[2] = ser_count[3] = 0;
    769 	mod_values[0] = mod_values[1] = mod_values[2] = mod_values[3] = 0;
    770 	mod_count[0] = mod_count[1] = mod_count[2] = mod_count[3] = 0;
    771 
    772 	for (c = 0; c < raidPtr->numCol; c++) {
    773 		if (raidPtr->Disks[c].status != rf_ds_optimal)
    774 			continue;
    775 		ci_label = raidget_component_label(raidPtr, c);
    776 		found=0;
    777 		for(i=0;i<num_ser;i++) {
    778 			if (ser_values[i] == ci_label->serial_number) {
    779 				ser_count[i]++;
    780 				found=1;
    781 				break;
    782 			}
    783 		}
    784 		if (!found) {
    785 			ser_values[num_ser] = ci_label->serial_number;
    786 			ser_count[num_ser] = 1;
    787 			num_ser++;
    788 			if (num_ser>2) {
    789 				fatal_error = 1;
    790 				break;
    791 			}
    792 		}
    793 		found=0;
    794 		for(i=0;i<num_mod;i++) {
    795 			if (mod_values[i] == ci_label->mod_counter) {
    796 				mod_count[i]++;
    797 				found=1;
    798 				break;
    799 			}
    800 		}
    801 		if (!found) {
    802 			mod_values[num_mod] = ci_label->mod_counter;
    803 			mod_count[num_mod] = 1;
    804 			num_mod++;
    805 			if (num_mod>2) {
    806 				fatal_error = 1;
    807 				break;
    808 			}
    809 		}
    810 	}
    811 #if DEBUG
    812 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    813 	for(i=0;i<num_ser;i++) {
    814 		printf("%d %d\n", ser_values[i], ser_count[i]);
    815 	}
    816 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    817 	for(i=0;i<num_mod;i++) {
    818 		printf("%d %d\n", mod_values[i], mod_count[i]);
    819 	}
    820 #endif
    821 	serial_number = ser_values[0];
    822 	if (num_ser == 2) {
    823 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    824 			/* Locate the maverick component */
    825 			if (ser_count[1] > ser_count[0]) {
    826 				serial_number = ser_values[1];
    827 			}
    828 
    829 			for (c = 0; c < raidPtr->numCol; c++) {
    830 				if (raidPtr->Disks[c].status != rf_ds_optimal)
    831 					continue;
    832 				ci_label = raidget_component_label(raidPtr, c);
    833 				if (serial_number != ci_label->serial_number) {
    834 					hosed_column = c;
    835 					break;
    836 				}
    837 			}
    838 			printf("Hosed component: %s\n",
    839 			       &cfgPtr->devnames[0][hosed_column][0]);
    840 			if (!force) {
    841 				/* we'll fail this component, as if there are
    842 				   other major errors, we arn't forcing things
    843 				   and we'll abort the config anyways */
    844 				raidPtr->Disks[hosed_column].status
    845 					= rf_ds_failed;
    846 				raidPtr->numFailures++;
    847 				raidPtr->status = rf_rs_degraded;
    848 			}
    849 		} else {
    850 			too_fatal = 1;
    851 		}
    852 		if (cfgPtr->parityConfig == '0') {
    853 			/* We've identified two different serial numbers.
    854 			   RAID 0 can't cope with that, so we'll punt */
    855 			too_fatal = 1;
    856 		}
    857 
    858 	}
    859 
    860 	/* record the serial number for later.  If we bail later, setting
    861 	   this doesn't matter, otherwise we've got the best guess at the
    862 	   correct serial number */
    863 	raidPtr->serial_number = serial_number;
    864 
    865 	mod_number = mod_values[0];
    866 	if (num_mod == 2) {
    867 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    868 			/* Locate the maverick component */
    869 			if (mod_count[1] > mod_count[0]) {
    870 				mod_number = mod_values[1];
    871 			} else if (mod_count[1] < mod_count[0]) {
    872 				mod_number = mod_values[0];
    873 			} else {
    874 				/* counts of different modification values
    875 				   are the same.   Assume greater value is
    876 				   the correct one, all other things
    877 				   considered */
    878 				if (mod_values[0] > mod_values[1]) {
    879 					mod_number = mod_values[0];
    880 				} else {
    881 					mod_number = mod_values[1];
    882 				}
    883 
    884 			}
    885 
    886 			for (c = 0; c < raidPtr->numCol; c++) {
    887 				if (raidPtr->Disks[c].status != rf_ds_optimal)
    888 					continue;
    889 
    890 				ci_label = raidget_component_label(raidPtr, c);
    891 				if (mod_number != ci_label->mod_counter) {
    892 					if (hosed_column == c) {
    893 						/* same one.  Can
    894 						   deal with it.  */
    895 					} else {
    896 						hosed_column = c;
    897 						if (num_ser != 1) {
    898 							too_fatal = 1;
    899 							break;
    900 						}
    901 					}
    902 				}
    903 			}
    904 			printf("Hosed component: %s\n",
    905 			       &cfgPtr->devnames[0][hosed_column][0]);
    906 			if (!force) {
    907 				/* we'll fail this component, as if there are
    908 				   other major errors, we arn't forcing things
    909 				   and we'll abort the config anyways */
    910 				if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
    911 					raidPtr->Disks[hosed_column].status
    912 						= rf_ds_failed;
    913 					raidPtr->numFailures++;
    914 					raidPtr->status = rf_rs_degraded;
    915 				}
    916 			}
    917 		} else {
    918 			too_fatal = 1;
    919 		}
    920 		if (cfgPtr->parityConfig == '0') {
    921 			/* We've identified two different mod counters.
    922 			   RAID 0 can't cope with that, so we'll punt */
    923 			too_fatal = 1;
    924 		}
    925 	}
    926 
    927 	raidPtr->mod_counter = mod_number;
    928 
    929 	if (too_fatal) {
    930 		/* we've had both a serial number mismatch, and a mod_counter
    931 		   mismatch -- and they involved two different components!!
    932 		   Bail -- make things fail so that the user must force
    933 		   the issue... */
    934 		hosed_column = -1;
    935 		fatal_error = 1;
    936 	}
    937 
    938 	if (num_ser > 2) {
    939 		printf("raid%d: Too many different serial numbers!\n",
    940 		       raidPtr->raidid);
    941 		fatal_error = 1;
    942 	}
    943 
    944 	if (num_mod > 2) {
    945 		printf("raid%d: Too many different mod counters!\n",
    946 		       raidPtr->raidid);
    947 		fatal_error = 1;
    948 	}
    949 
    950         for (c = 0; c < raidPtr->numCol; c++) {
    951 		if (raidPtr->Disks[c].status != rf_ds_optimal) {
    952 			hosed_column = c;
    953 			break;
    954 		}
    955 	}
    956 
    957 	/* we start by assuming the parity will be good, and flee from
    958 	   that notion at the slightest sign of trouble */
    959 
    960 	parity_good = RF_RAID_CLEAN;
    961 
    962 	for (c = 0; c < raidPtr->numCol; c++) {
    963 		dev_name = &cfgPtr->devnames[0][c][0];
    964 		ci_label = raidget_component_label(raidPtr, c);
    965 
    966 		if (c == hosed_column) {
    967 			printf("raid%d: Ignoring %s\n",
    968 			       raidPtr->raidid, dev_name);
    969 		} else {
    970 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
    971 			if (rf_check_label_vitals( raidPtr, 0, c,
    972 						   dev_name, ci_label,
    973 						   serial_number,
    974 						   mod_number )) {
    975 				fatal_error = 1;
    976 			}
    977 			if (ci_label->clean != RF_RAID_CLEAN) {
    978 				parity_good = RF_RAID_DIRTY;
    979 			}
    980 		}
    981 	}
    982 
    983 	if (fatal_error) {
    984 		parity_good = RF_RAID_DIRTY;
    985 	}
    986 
    987 	/* we note the state of the parity */
    988 	raidPtr->parity_good = parity_good;
    989 
    990 	return(fatal_error);
    991 }
    992 
    993 int
    994 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
    995 {
    996 	RF_RaidDisk_t *disks;
    997 	RF_DiskQueue_t *spareQueues;
    998 	int ret;
    999 	unsigned int bs;
   1000 	int spare_number;
   1001 
   1002 	ret=0;
   1003 
   1004 	if (raidPtr->numSpare >= RF_MAXSPARE) {
   1005 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
   1006 		return(EINVAL);
   1007 	}
   1008 
   1009 	rf_lock_mutex2(raidPtr->mutex);
   1010 	while (raidPtr->adding_hot_spare == 1) {
   1011 		rf_wait_cond2(raidPtr->adding_hot_spare_cv, raidPtr->mutex);
   1012 	}
   1013 	raidPtr->adding_hot_spare = 1;
   1014 	rf_unlock_mutex2(raidPtr->mutex);
   1015 
   1016 	/* the beginning of the spares... */
   1017 	disks = &raidPtr->Disks[raidPtr->numCol];
   1018 
   1019 	spare_number = raidPtr->numSpare;
   1020 
   1021 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
   1022 			       &disks[spare_number],
   1023 			       raidPtr->numCol + spare_number);
   1024 
   1025 	if (ret)
   1026 		goto fail;
   1027 	if (disks[spare_number].status != rf_ds_optimal) {
   1028 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
   1029 			     sparePtr->component_name);
   1030 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1031 		ret=EINVAL;
   1032 		goto fail;
   1033 	} else {
   1034 		disks[spare_number].status = rf_ds_spare;
   1035 		DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
   1036 			 spare_number,
   1037 			 disks[spare_number].devname,
   1038 			 disks[spare_number].numBlocks,
   1039 			 disks[spare_number].blockSize,
   1040 			 (long int) disks[spare_number].numBlocks *
   1041 			 disks[spare_number].blockSize / 1024 / 1024);
   1042 	}
   1043 
   1044 
   1045 	/* check sizes and block sizes on the spare disk */
   1046 	bs = 1 << raidPtr->logBytesPerSector;
   1047 	if (disks[spare_number].blockSize != bs) {
   1048 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1049 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1050 		ret = EINVAL;
   1051 		goto fail;
   1052 	}
   1053 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1054 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
   1055 			     disks[spare_number].devname,
   1056 			     disks[spare_number].blockSize,
   1057 			     raidPtr->sectorsPerDisk);
   1058 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1059 		ret = EINVAL;
   1060 		goto fail;
   1061 	} else {
   1062 		if (disks[spare_number].numBlocks >
   1063 		    raidPtr->sectorsPerDisk) {
   1064 			RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
   1065 			    disks[spare_number].devname,
   1066 			    raidPtr->sectorsPerDisk,
   1067 			    disks[spare_number].numBlocks);
   1068 
   1069 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1070 		}
   1071 	}
   1072 
   1073 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
   1074 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1075 				 raidPtr->numCol + spare_number,
   1076 				 raidPtr->qType,
   1077 				 raidPtr->sectorsPerDisk,
   1078 				 raidPtr->Disks[raidPtr->numCol +
   1079 						  spare_number].dev,
   1080 				 raidPtr->maxOutstanding,
   1081 				 &raidPtr->shutdownList,
   1082 				 raidPtr->cleanupList);
   1083 
   1084 	rf_lock_mutex2(raidPtr->mutex);
   1085 	raidPtr->numSpare++;
   1086 	rf_unlock_mutex2(raidPtr->mutex);
   1087 
   1088 fail:
   1089 	rf_lock_mutex2(raidPtr->mutex);
   1090 	raidPtr->adding_hot_spare = 0;
   1091 	rf_signal_cond2(raidPtr->adding_hot_spare_cv);
   1092 	rf_unlock_mutex2(raidPtr->mutex);
   1093 
   1094 	return(ret);
   1095 }
   1096 
   1097 int
   1098 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
   1099 {
   1100 #if 0
   1101 	int spare_number;
   1102 #endif
   1103 
   1104 	if (raidPtr->numSpare==0) {
   1105 		printf("No spares to remove!\n");
   1106 		return(EINVAL);
   1107 	}
   1108 
   1109 	return(EINVAL); /* XXX not implemented yet */
   1110 #if 0
   1111 	spare_number = sparePtr->column;
   1112 
   1113 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1114 		return(EINVAL);
   1115 	}
   1116 
   1117 	/* verify that this spare isn't in use... */
   1118 
   1119 
   1120 
   1121 
   1122 	/* it's gone.. */
   1123 
   1124 	raidPtr->numSpare--;
   1125 
   1126 	return(0);
   1127 #endif
   1128 }
   1129 
   1130 
   1131 int
   1132 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
   1133 {
   1134 #if 0
   1135 	RF_RaidDisk_t *disks;
   1136 #endif
   1137 
   1138 	if ((component->column < 0) ||
   1139 	    (component->column >= raidPtr->numCol)) {
   1140 		return(EINVAL);
   1141 	}
   1142 
   1143 #if 0
   1144 	disks = &raidPtr->Disks[component->column];
   1145 #endif
   1146 
   1147 	/* 1. This component must be marked as 'failed' */
   1148 
   1149 	return(EINVAL); /* Not implemented yet. */
   1150 }
   1151 
   1152 int
   1153 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
   1154     RF_SingleComponent_t *component)
   1155 {
   1156 
   1157 	/* Issues here include how to 'move' this in if there is IO
   1158 	   taking place (e.g. component queues and such) */
   1159 
   1160 	return(EINVAL); /* Not implemented yet. */
   1161 }
   1162