Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.72.2.2
      1 /*	$NetBSD: rf_disks.c,v 1.72.2.2 2010/11/06 08:08:32 uebayasi Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Copyright (c) 1995 Carnegie-Mellon University.
     33  * All rights reserved.
     34  *
     35  * Author: Mark Holland
     36  *
     37  * Permission to use, copy, modify and distribute this software and
     38  * its documentation is hereby granted, provided that both the copyright
     39  * notice and this permission notice appear in all copies of the
     40  * software, derivative works or modified versions, and any portions
     41  * thereof, and that both notices appear in supporting documentation.
     42  *
     43  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     44  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     45  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     46  *
     47  * Carnegie Mellon requests users of this software to return to
     48  *
     49  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     50  *  School of Computer Science
     51  *  Carnegie Mellon University
     52  *  Pittsburgh PA 15213-3890
     53  *
     54  * any improvements or extensions that they make and grant Carnegie the
     55  * rights to redistribute these changes.
     56  */
     57 
     58 /***************************************************************
     59  * rf_disks.c -- code to perform operations on the actual disks
     60  ***************************************************************/
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.72.2.2 2010/11/06 08:08:32 uebayasi Exp $");
     64 
     65 #include <dev/raidframe/raidframevar.h>
     66 
     67 #include "rf_raid.h"
     68 #include "rf_alloclist.h"
     69 #include "rf_utils.h"
     70 #include "rf_general.h"
     71 #include "rf_options.h"
     72 #include "rf_kintf.h"
     73 #include "rf_netbsd.h"
     74 
     75 #include <sys/param.h>
     76 #include <sys/systm.h>
     77 #include <sys/proc.h>
     78 #include <sys/ioctl.h>
     79 #include <sys/fcntl.h>
     80 #include <sys/vnode.h>
     81 #include <sys/kauth.h>
     82 
     83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     84 static void rf_print_label_status( RF_Raid_t *, int, char *,
     85 				  RF_ComponentLabel_t *);
     86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     87 				  RF_ComponentLabel_t *, int, int );
     88 
     89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     91 
     92 /**************************************************************************
     93  *
     94  * initialize the disks comprising the array
     95  *
     96  * We want the spare disks to have regular row,col numbers so that we can
     97  * easily substitue a spare for a failed disk.  But, the driver code assumes
     98  * throughout that the array contains numRow by numCol _non-spare_ disks, so
     99  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    100  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    101  * rest, and put all the spares in it.  This probably needs to get changed
    102  * eventually.
    103  *
    104  **************************************************************************/
    105 
    106 int
    107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    108 		  RF_Config_t *cfgPtr)
    109 {
    110 	RF_RaidDisk_t *disks;
    111 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    112 	RF_RowCol_t c;
    113 	int bs, ret;
    114 	unsigned i, count, foundone = 0, numFailuresThisRow;
    115 	int force;
    116 
    117 	force = cfgPtr->force;
    118 
    119 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    120 	if (ret)
    121 		goto fail;
    122 
    123 	disks = raidPtr->Disks;
    124 
    125 	numFailuresThisRow = 0;
    126 	for (c = 0; c < raidPtr->numCol; c++) {
    127 		ret = rf_ConfigureDisk(raidPtr,
    128 				       &cfgPtr->devnames[0][c][0],
    129 				       &disks[c], c);
    130 
    131 		if (ret)
    132 			goto fail;
    133 
    134 		if (disks[c].status == rf_ds_optimal) {
    135 			ret = raidfetch_component_label(raidPtr, c);
    136 			if (ret)
    137 				goto fail;
    138 		}
    139 
    140 		if (disks[c].status != rf_ds_optimal) {
    141 			numFailuresThisRow++;
    142 		} else {
    143 			if (disks[c].numBlocks < min_numblks)
    144 				min_numblks = disks[c].numBlocks;
    145 			DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
    146 				 c, disks[c].devname,
    147 				 disks[c].numBlocks,
    148 				 disks[c].blockSize,
    149 				 (long int) disks[c].numBlocks *
    150 				 disks[c].blockSize / 1024 / 1024);
    151 		}
    152 	}
    153 	/* XXX fix for n-fault tolerant */
    154 	/* XXX this should probably check to see how many failures
    155 	   we can handle for this configuration! */
    156 	if (numFailuresThisRow > 0)
    157 		raidPtr->status = rf_rs_degraded;
    158 
    159 	/* all disks must be the same size & have the same block size, bs must
    160 	 * be a power of 2 */
    161 	bs = 0;
    162 	foundone = 0;
    163 	for (c = 0; c < raidPtr->numCol; c++) {
    164 		if (disks[c].status == rf_ds_optimal) {
    165 			bs = disks[c].blockSize;
    166 			foundone = 1;
    167 			break;
    168 		}
    169 	}
    170 	if (!foundone) {
    171 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    172 		ret = EINVAL;
    173 		goto fail;
    174 	}
    175 	for (count = 0, i = 1; i; i <<= 1)
    176 		if (bs & i)
    177 			count++;
    178 	if (count != 1) {
    179 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    180 		ret = EINVAL;
    181 		goto fail;
    182 	}
    183 
    184 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    185 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    186 		if (force != 0) {
    187 			printf("raid%d: Fatal errors being ignored.\n",
    188 			       raidPtr->raidid);
    189 		} else {
    190 			ret = EINVAL;
    191 			goto fail;
    192 		}
    193 	}
    194 
    195 	for (c = 0; c < raidPtr->numCol; c++) {
    196 		if (disks[c].status == rf_ds_optimal) {
    197 			if (disks[c].blockSize != bs) {
    198 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
    199 				ret = EINVAL;
    200 				goto fail;
    201 			}
    202 			if (disks[c].numBlocks != min_numblks) {
    203 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
    204 					     c, (int) min_numblks);
    205 				disks[c].numBlocks = min_numblks;
    206 			}
    207 		}
    208 	}
    209 
    210 	raidPtr->sectorsPerDisk = min_numblks;
    211 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    212 	raidPtr->bytesPerSector = bs;
    213 	raidPtr->sectorMask = bs - 1;
    214 	return (0);
    215 
    216 fail:
    217 
    218 	rf_UnconfigureVnodes( raidPtr );
    219 
    220 	return (ret);
    221 }
    222 
    223 
    224 /****************************************************************************
    225  * set up the data structures describing the spare disks in the array
    226  * recall from the above comment that the spare disk descriptors are stored
    227  * in row zero, which is specially expanded to hold them.
    228  ****************************************************************************/
    229 int
    230 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    231 		       RF_Config_t *cfgPtr)
    232 {
    233 	int     i, ret;
    234 	unsigned int bs;
    235 	RF_RaidDisk_t *disks;
    236 	int     num_spares_done;
    237 
    238 	num_spares_done = 0;
    239 
    240 	/* The space for the spares should have already been allocated by
    241 	 * ConfigureDisks() */
    242 
    243 	disks = &raidPtr->Disks[raidPtr->numCol];
    244 	for (i = 0; i < raidPtr->numSpare; i++) {
    245 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    246 				       &disks[i], raidPtr->numCol + i);
    247 		if (ret)
    248 			goto fail;
    249 		if (disks[i].status != rf_ds_optimal) {
    250 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    251 				     &cfgPtr->spare_names[i][0]);
    252 		} else {
    253 			disks[i].status = rf_ds_spare;	/* change status to
    254 							 * spare */
    255 			DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
    256 			    disks[i].devname,
    257 			    disks[i].numBlocks, disks[i].blockSize,
    258 			    (long int) disks[i].numBlocks *
    259 				 disks[i].blockSize / 1024 / 1024);
    260 		}
    261 		num_spares_done++;
    262 	}
    263 
    264 	/* check sizes and block sizes on spare disks */
    265 	bs = 1 << raidPtr->logBytesPerSector;
    266 	for (i = 0; i < raidPtr->numSpare; i++) {
    267 		if (disks[i].blockSize != bs) {
    268 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    269 			ret = EINVAL;
    270 			goto fail;
    271 		}
    272 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    273 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
    274 				     disks[i].devname, disks[i].blockSize,
    275 				     raidPtr->sectorsPerDisk);
    276 			ret = EINVAL;
    277 			goto fail;
    278 		} else
    279 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    280 				RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
    281 				    disks[i].devname,
    282 				    raidPtr->sectorsPerDisk,
    283 				    disks[i].numBlocks);
    284 
    285 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    286 			}
    287 	}
    288 
    289 	return (0);
    290 
    291 fail:
    292 
    293 	/* Release the hold on the main components.  We've failed to allocate
    294 	 * a spare, and since we're failing, we need to free things..
    295 
    296 	 XXX failing to allocate a spare is *not* that big of a deal...
    297 	 We *can* survive without it, if need be, esp. if we get hot
    298 	 adding working.
    299 
    300 	 If we don't fail out here, then we need a way to remove this spare...
    301 	 that should be easier to do here than if we are "live"...
    302 
    303 	 */
    304 
    305 	rf_UnconfigureVnodes( raidPtr );
    306 
    307 	return (ret);
    308 }
    309 
    310 static int
    311 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    312 {
    313 	int ret;
    314 
    315 	/* We allocate RF_MAXSPARE on the first row so that we
    316 	   have room to do hot-swapping of spares */
    317 	RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
    318 			sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    319 			raidPtr->cleanupList);
    320 	if (raidPtr->Disks == NULL) {
    321 		ret = ENOMEM;
    322 		goto fail;
    323 	}
    324 
    325 	/* get space for device specific stuff.. */
    326 	RF_MallocAndAdd(raidPtr->raid_cinfo,
    327 			(raidPtr->numCol + RF_MAXSPARE) *
    328 			sizeof(struct raidcinfo), (struct raidcinfo *),
    329 			raidPtr->cleanupList);
    330 
    331 	if (raidPtr->raid_cinfo == NULL) {
    332 		ret = ENOMEM;
    333 		goto fail;
    334 	}
    335 
    336 	return(0);
    337 fail:
    338 	rf_UnconfigureVnodes( raidPtr );
    339 
    340 	return(ret);
    341 }
    342 
    343 
    344 /* configure a single disk during auto-configuration at boot */
    345 int
    346 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
    347 		      RF_AutoConfig_t *auto_config)
    348 {
    349 	RF_RaidDisk_t *disks;
    350 	RF_RaidDisk_t *diskPtr;
    351 	RF_RowCol_t c;
    352 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    353 	int bs, ret;
    354 	int numFailuresThisRow;
    355 	RF_AutoConfig_t *ac;
    356 	int parity_good;
    357 	int mod_counter;
    358 	int mod_counter_found;
    359 
    360 #if DEBUG
    361 	printf("Starting autoconfiguration of RAID set...\n");
    362 #endif
    363 
    364 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    365 	if (ret)
    366 		goto fail;
    367 
    368 	disks = raidPtr->Disks;
    369 
    370 	/* assume the parity will be fine.. */
    371 	parity_good = RF_RAID_CLEAN;
    372 
    373 	/* Check for mod_counters that are too low */
    374 	mod_counter_found = 0;
    375 	mod_counter = 0;
    376 	ac = auto_config;
    377 	while(ac!=NULL) {
    378 		if (mod_counter_found==0) {
    379 			mod_counter = ac->clabel->mod_counter;
    380 			mod_counter_found = 1;
    381 		} else {
    382 			if (ac->clabel->mod_counter > mod_counter) {
    383 				mod_counter = ac->clabel->mod_counter;
    384 			}
    385 		}
    386 		ac->flag = 0; /* clear the general purpose flag */
    387 		ac = ac->next;
    388 	}
    389 
    390 	bs = 0;
    391 
    392 	numFailuresThisRow = 0;
    393 	for (c = 0; c < raidPtr->numCol; c++) {
    394 		diskPtr = &disks[c];
    395 
    396 		/* find this row/col in the autoconfig */
    397 #if DEBUG
    398 		printf("Looking for %d in autoconfig\n",c);
    399 #endif
    400 		ac = auto_config;
    401 		while(ac!=NULL) {
    402 			if (ac->clabel==NULL) {
    403 				/* big-time bad news. */
    404 				goto fail;
    405 			}
    406 			if ((ac->clabel->column == c) &&
    407 			    (ac->clabel->mod_counter == mod_counter)) {
    408 				/* it's this one... */
    409 				/* flag it as 'used', so we don't
    410 				   free it later. */
    411 				ac->flag = 1;
    412 #if DEBUG
    413 				printf("Found: %s at %d\n",
    414 				       ac->devname,c);
    415 #endif
    416 
    417 				break;
    418 			}
    419 			ac=ac->next;
    420 		}
    421 
    422 		if (ac==NULL) {
    423 			/* we didn't find an exact match with a
    424 			   correct mod_counter above... can we find
    425 			   one with an incorrect mod_counter to use
    426 			   instead?  (this one, if we find it, will be
    427 			   marked as failed once the set configures)
    428 			*/
    429 
    430 			ac = auto_config;
    431 			while(ac!=NULL) {
    432 				if (ac->clabel==NULL) {
    433 					/* big-time bad news. */
    434 					goto fail;
    435 				}
    436 				if (ac->clabel->column == c) {
    437 					/* it's this one...
    438 					   flag it as 'used', so we
    439 					   don't free it later. */
    440 					ac->flag = 1;
    441 #if DEBUG
    442 					printf("Found(low mod_counter): %s at %d\n",
    443 					       ac->devname,c);
    444 #endif
    445 
    446 					break;
    447 				}
    448 				ac=ac->next;
    449 			}
    450 		}
    451 
    452 
    453 
    454 		if (ac!=NULL) {
    455 			/* Found it.  Configure it.. */
    456 			diskPtr->blockSize = ac->clabel->blockSize;
    457 			diskPtr->numBlocks = ac->clabel->numBlocks;
    458 			diskPtr->numBlocks |=
    459 			    (uint64_t)ac->clabel->numBlocksHi << 32;
    460 			/* Note: rf_protectedSectors is already
    461 			   factored into numBlocks here */
    462 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
    463 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
    464 
    465 			memcpy(raidget_component_label(raidPtr, c),
    466 			    ac->clabel, sizeof(*ac->clabel));
    467 			snprintf(diskPtr->devname, sizeof(diskPtr->devname),
    468 			    "/dev/%s", ac->devname);
    469 
    470 			/* note the fact that this component was
    471 			   autoconfigured.  You'll need this info
    472 			   later.  Trust me :) */
    473 			diskPtr->auto_configured = 1;
    474 			diskPtr->dev = ac->dev;
    475 
    476 			/*
    477 			 * we allow the user to specify that
    478 			 * only a fraction of the disks should
    479 			 * be used this is just for debug: it
    480 			 * speeds up the parity scan
    481 			 */
    482 
    483 			diskPtr->numBlocks = diskPtr->numBlocks *
    484 				rf_sizePercentage / 100;
    485 
    486 			/* XXX these will get set multiple times,
    487 			   but since we're autoconfiguring, they'd
    488 			   better be always the same each time!
    489 			   If not, this is the least of your worries */
    490 
    491 			bs = diskPtr->blockSize;
    492 			min_numblks = diskPtr->numBlocks;
    493 
    494 			/* this gets done multiple times, but that's
    495 			   fine -- the serial number will be the same
    496 			   for all components, guaranteed */
    497 			raidPtr->serial_number = ac->clabel->serial_number;
    498 			/* check the last time the label was modified */
    499 
    500 			if (ac->clabel->mod_counter != mod_counter) {
    501 				/* Even though we've filled in all of
    502 				   the above, we don't trust this
    503 				   component since it's modification
    504 				   counter is not in sync with the
    505 				   rest, and we really consider it to
    506 				   be failed.  */
    507 				disks[c].status = rf_ds_failed;
    508 				numFailuresThisRow++;
    509 			} else {
    510 				if (ac->clabel->clean != RF_RAID_CLEAN) {
    511 					parity_good = RF_RAID_DIRTY;
    512 				}
    513 			}
    514 		} else {
    515 			/* Didn't find it at all!!  Component must
    516 			   really be dead */
    517 			disks[c].status = rf_ds_failed;
    518 			snprintf(disks[c].devname, sizeof(disks[c].devname),
    519 			    "component%d", c);
    520 			numFailuresThisRow++;
    521 		}
    522 	}
    523 	/* XXX fix for n-fault tolerant */
    524 	/* XXX this should probably check to see how many failures
    525 	   we can handle for this configuration! */
    526 	if (numFailuresThisRow > 0) {
    527 		raidPtr->status = rf_rs_degraded;
    528 		raidPtr->numFailures = numFailuresThisRow;
    529 	}
    530 
    531 	/* close the device for the ones that didn't get used */
    532 
    533 	ac = auto_config;
    534 	while(ac!=NULL) {
    535 		if (ac->flag == 0) {
    536 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
    537 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
    538 			vput(ac->vp);
    539 			ac->vp = NULL;
    540 #if DEBUG
    541 			printf("Released %s from auto-config set.\n",
    542 			       ac->devname);
    543 #endif
    544 		}
    545 		ac = ac->next;
    546 	}
    547 
    548 	raidPtr->mod_counter = mod_counter;
    549 
    550 	/* note the state of the parity, if any */
    551 	raidPtr->parity_good = parity_good;
    552 	raidPtr->sectorsPerDisk = min_numblks;
    553 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    554 	raidPtr->bytesPerSector = bs;
    555 	raidPtr->sectorMask = bs - 1;
    556 	return (0);
    557 
    558 fail:
    559 
    560 	rf_UnconfigureVnodes( raidPtr );
    561 
    562 	return (ret);
    563 
    564 }
    565 
    566 /* configure a single disk in the array */
    567 int
    568 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
    569 		 RF_RowCol_t col)
    570 {
    571 	char   *p;
    572 	struct vnode *vp;
    573 	struct vattr va;
    574 	int     error;
    575 
    576 	p = rf_find_non_white(bf);
    577 	if (p[strlen(p) - 1] == '\n') {
    578 		/* strip off the newline */
    579 		p[strlen(p) - 1] = '\0';
    580 	}
    581 	(void) strcpy(diskPtr->devname, p);
    582 
    583 	/* Let's start by claiming the component is fine and well... */
    584 	diskPtr->status = rf_ds_optimal;
    585 
    586 	raidPtr->raid_cinfo[col].ci_vp = NULL;
    587 	raidPtr->raid_cinfo[col].ci_dev = 0;
    588 
    589 	if (!strcmp("absent", diskPtr->devname)) {
    590 		printf("Ignoring missing component at column %d\n", col);
    591 		sprintf(diskPtr->devname, "component%d", col);
    592 		diskPtr->status = rf_ds_failed;
    593 		return (0);
    594 	}
    595 
    596 	error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE);
    597 	if (error) {
    598 		printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
    599 		if (error == ENXIO) {
    600 			/* the component isn't there... must be dead :-( */
    601 			diskPtr->status = rf_ds_failed;
    602 		} else {
    603 			return (error);
    604 		}
    605 	}
    606 	if (diskPtr->status == rf_ds_optimal) {
    607 
    608 		if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
    609 			return (error);
    610 		if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0)
    611 			return (error);
    612 
    613 		raidPtr->raid_cinfo[col].ci_vp = vp;
    614 		raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
    615 
    616 		/* This component was not automatically configured */
    617 		diskPtr->auto_configured = 0;
    618 		diskPtr->dev = va.va_rdev;
    619 
    620 		/* we allow the user to specify that only a fraction of the
    621 		 * disks should be used this is just for debug:  it speeds up
    622 		 * the parity scan */
    623 		diskPtr->numBlocks = diskPtr->numBlocks *
    624 			rf_sizePercentage / 100;
    625 	}
    626 	return (0);
    627 }
    628 
    629 static void
    630 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
    631 		      RF_ComponentLabel_t *ci_label)
    632 {
    633 
    634 	printf("raid%d: Component %s being configured at col: %d\n",
    635 	       raidPtr->raidid, dev_name, column );
    636 	printf("         Column: %d Num Columns: %d\n",
    637 	       ci_label->column,
    638 	       ci_label->num_columns);
    639 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    640 	       ci_label->version, ci_label->serial_number,
    641 	       ci_label->mod_counter);
    642 	printf("         Clean: %s Status: %d\n",
    643 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    644 }
    645 
    646 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
    647 				 char *dev_name, RF_ComponentLabel_t *ci_label,
    648 				 int serial_number, int mod_counter)
    649 {
    650 	int fatal_error = 0;
    651 
    652 	if (serial_number != ci_label->serial_number) {
    653 		printf("%s has a different serial number: %d %d\n",
    654 		       dev_name, serial_number, ci_label->serial_number);
    655 		fatal_error = 1;
    656 	}
    657 	if (mod_counter != ci_label->mod_counter) {
    658 		printf("%s has a different modification count: %d %d\n",
    659 		       dev_name, mod_counter, ci_label->mod_counter);
    660 	}
    661 
    662 	if (row != ci_label->row) {
    663 		printf("Row out of alignment for: %s\n", dev_name);
    664 		fatal_error = 1;
    665 	}
    666 	if (column != ci_label->column) {
    667 		printf("Column out of alignment for: %s\n", dev_name);
    668 		fatal_error = 1;
    669 	}
    670 	if (raidPtr->numCol != ci_label->num_columns) {
    671 		printf("Number of columns do not match for: %s\n", dev_name);
    672 		fatal_error = 1;
    673 	}
    674 	if (ci_label->clean == 0) {
    675 		/* it's not clean, but that's not fatal */
    676 		printf("%s is not clean!\n", dev_name);
    677 	}
    678 	return(fatal_error);
    679 }
    680 
    681 
    682 /*
    683 
    684    rf_CheckLabels() - check all the component labels for consistency.
    685    Return an error if there is anything major amiss.
    686 
    687  */
    688 
    689 int
    690 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    691 {
    692 	int c;
    693 	char *dev_name;
    694 	RF_ComponentLabel_t *ci_label;
    695 	int serial_number = 0;
    696 	int mod_number = 0;
    697 	int fatal_error = 0;
    698 	int mod_values[4];
    699 	int mod_count[4];
    700 	int ser_values[4];
    701 	int ser_count[4];
    702 	int num_ser;
    703 	int num_mod;
    704 	int i;
    705 	int found;
    706 	int hosed_column;
    707 	int too_fatal;
    708 	int parity_good;
    709 	int force;
    710 
    711 	hosed_column = -1;
    712 	too_fatal = 0;
    713 	force = cfgPtr->force;
    714 
    715 	/*
    716 	   We're going to try to be a little intelligent here.  If one
    717 	   component's label is bogus, and we can identify that it's the
    718 	   *only* one that's gone, we'll mark it as "failed" and allow
    719 	   the configuration to proceed.  This will be the *only* case
    720 	   that we'll proceed if there would be (otherwise) fatal errors.
    721 
    722 	   Basically we simply keep a count of how many components had
    723 	   what serial number.  If all but one agree, we simply mark
    724 	   the disagreeing component as being failed, and allow
    725 	   things to come up "normally".
    726 
    727 	   We do this first for serial numbers, and then for "mod_counter".
    728 
    729 	 */
    730 
    731 	num_ser = 0;
    732 	num_mod = 0;
    733 
    734 	for (c = 0; c < raidPtr->numCol; c++) {
    735 		ci_label = raidget_component_label(raidPtr, c);
    736 		found=0;
    737 		for(i=0;i<num_ser;i++) {
    738 			if (ser_values[i] == ci_label->serial_number) {
    739 				ser_count[i]++;
    740 				found=1;
    741 				break;
    742 			}
    743 		}
    744 		if (!found) {
    745 			ser_values[num_ser] = ci_label->serial_number;
    746 			ser_count[num_ser] = 1;
    747 			num_ser++;
    748 			if (num_ser>2) {
    749 				fatal_error = 1;
    750 				break;
    751 			}
    752 		}
    753 		found=0;
    754 		for(i=0;i<num_mod;i++) {
    755 			if (mod_values[i] == ci_label->mod_counter) {
    756 				mod_count[i]++;
    757 				found=1;
    758 				break;
    759 			}
    760 		}
    761 		if (!found) {
    762 			mod_values[num_mod] = ci_label->mod_counter;
    763 			mod_count[num_mod] = 1;
    764 			num_mod++;
    765 			if (num_mod>2) {
    766 				fatal_error = 1;
    767 				break;
    768 			}
    769 		}
    770 	}
    771 #if DEBUG
    772 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    773 	for(i=0;i<num_ser;i++) {
    774 		printf("%d %d\n", ser_values[i], ser_count[i]);
    775 	}
    776 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    777 	for(i=0;i<num_mod;i++) {
    778 		printf("%d %d\n", mod_values[i], mod_count[i]);
    779 	}
    780 #endif
    781 	serial_number = ser_values[0];
    782 	if (num_ser == 2) {
    783 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    784 			/* Locate the maverick component */
    785 			if (ser_count[1] > ser_count[0]) {
    786 				serial_number = ser_values[1];
    787 			}
    788 
    789 			for (c = 0; c < raidPtr->numCol; c++) {
    790 				ci_label = raidget_component_label(raidPtr, c);
    791 				if (serial_number != ci_label->serial_number) {
    792 					hosed_column = c;
    793 					break;
    794 				}
    795 			}
    796 			printf("Hosed component: %s\n",
    797 			       &cfgPtr->devnames[0][hosed_column][0]);
    798 			if (!force) {
    799 				/* we'll fail this component, as if there are
    800 				   other major errors, we arn't forcing things
    801 				   and we'll abort the config anyways */
    802 				raidPtr->Disks[hosed_column].status
    803 					= rf_ds_failed;
    804 				raidPtr->numFailures++;
    805 				raidPtr->status = rf_rs_degraded;
    806 			}
    807 		} else {
    808 			too_fatal = 1;
    809 		}
    810 		if (cfgPtr->parityConfig == '0') {
    811 			/* We've identified two different serial numbers.
    812 			   RAID 0 can't cope with that, so we'll punt */
    813 			too_fatal = 1;
    814 		}
    815 
    816 	}
    817 
    818 	/* record the serial number for later.  If we bail later, setting
    819 	   this doesn't matter, otherwise we've got the best guess at the
    820 	   correct serial number */
    821 	raidPtr->serial_number = serial_number;
    822 
    823 	mod_number = mod_values[0];
    824 	if (num_mod == 2) {
    825 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    826 			/* Locate the maverick component */
    827 			if (mod_count[1] > mod_count[0]) {
    828 				mod_number = mod_values[1];
    829 			} else if (mod_count[1] < mod_count[0]) {
    830 				mod_number = mod_values[0];
    831 			} else {
    832 				/* counts of different modification values
    833 				   are the same.   Assume greater value is
    834 				   the correct one, all other things
    835 				   considered */
    836 				if (mod_values[0] > mod_values[1]) {
    837 					mod_number = mod_values[0];
    838 				} else {
    839 					mod_number = mod_values[1];
    840 				}
    841 
    842 			}
    843 
    844 			for (c = 0; c < raidPtr->numCol; c++) {
    845 				ci_label = raidget_component_label(raidPtr, c);
    846 				if (mod_number != ci_label->mod_counter) {
    847 					if (hosed_column == c) {
    848 						/* same one.  Can
    849 						   deal with it.  */
    850 					} else {
    851 						hosed_column = c;
    852 						if (num_ser != 1) {
    853 							too_fatal = 1;
    854 							break;
    855 						}
    856 					}
    857 				}
    858 			}
    859 			printf("Hosed component: %s\n",
    860 			       &cfgPtr->devnames[0][hosed_column][0]);
    861 			if (!force) {
    862 				/* we'll fail this component, as if there are
    863 				   other major errors, we arn't forcing things
    864 				   and we'll abort the config anyways */
    865 				if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
    866 					raidPtr->Disks[hosed_column].status
    867 						= rf_ds_failed;
    868 					raidPtr->numFailures++;
    869 					raidPtr->status = rf_rs_degraded;
    870 				}
    871 			}
    872 		} else {
    873 			too_fatal = 1;
    874 		}
    875 		if (cfgPtr->parityConfig == '0') {
    876 			/* We've identified two different mod counters.
    877 			   RAID 0 can't cope with that, so we'll punt */
    878 			too_fatal = 1;
    879 		}
    880 	}
    881 
    882 	raidPtr->mod_counter = mod_number;
    883 
    884 	if (too_fatal) {
    885 		/* we've had both a serial number mismatch, and a mod_counter
    886 		   mismatch -- and they involved two different components!!
    887 		   Bail -- make things fail so that the user must force
    888 		   the issue... */
    889 		hosed_column = -1;
    890 		fatal_error = 1;
    891 	}
    892 
    893 	if (num_ser > 2) {
    894 		printf("raid%d: Too many different serial numbers!\n",
    895 		       raidPtr->raidid);
    896 		fatal_error = 1;
    897 	}
    898 
    899 	if (num_mod > 2) {
    900 		printf("raid%d: Too many different mod counters!\n",
    901 		       raidPtr->raidid);
    902 		fatal_error = 1;
    903 	}
    904 
    905 	/* we start by assuming the parity will be good, and flee from
    906 	   that notion at the slightest sign of trouble */
    907 
    908 	parity_good = RF_RAID_CLEAN;
    909 
    910 	for (c = 0; c < raidPtr->numCol; c++) {
    911 		dev_name = &cfgPtr->devnames[0][c][0];
    912 		ci_label = raidget_component_label(raidPtr, c);
    913 
    914 		if (c == hosed_column) {
    915 			printf("raid%d: Ignoring %s\n",
    916 			       raidPtr->raidid, dev_name);
    917 		} else {
    918 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
    919 			if (rf_check_label_vitals( raidPtr, 0, c,
    920 						   dev_name, ci_label,
    921 						   serial_number,
    922 						   mod_number )) {
    923 				fatal_error = 1;
    924 			}
    925 			if (ci_label->clean != RF_RAID_CLEAN) {
    926 				parity_good = RF_RAID_DIRTY;
    927 			}
    928 		}
    929 	}
    930 
    931 	if (fatal_error) {
    932 		parity_good = RF_RAID_DIRTY;
    933 	}
    934 
    935 	/* we note the state of the parity */
    936 	raidPtr->parity_good = parity_good;
    937 
    938 	return(fatal_error);
    939 }
    940 
    941 int
    942 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
    943 {
    944 	RF_RaidDisk_t *disks;
    945 	RF_DiskQueue_t *spareQueues;
    946 	int ret;
    947 	unsigned int bs;
    948 	int spare_number;
    949 
    950 	ret=0;
    951 
    952 	if (raidPtr->numSpare >= RF_MAXSPARE) {
    953 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
    954 		return(EINVAL);
    955 	}
    956 
    957 	RF_LOCK_MUTEX(raidPtr->mutex);
    958 	while (raidPtr->adding_hot_spare==1) {
    959 		ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
    960 			&(raidPtr->mutex));
    961 	}
    962 	raidPtr->adding_hot_spare=1;
    963 	RF_UNLOCK_MUTEX(raidPtr->mutex);
    964 
    965 	/* the beginning of the spares... */
    966 	disks = &raidPtr->Disks[raidPtr->numCol];
    967 
    968 	spare_number = raidPtr->numSpare;
    969 
    970 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
    971 			       &disks[spare_number],
    972 			       raidPtr->numCol + spare_number);
    973 
    974 	if (ret)
    975 		goto fail;
    976 	if (disks[spare_number].status != rf_ds_optimal) {
    977 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    978 			     sparePtr->component_name);
    979 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
    980 		ret=EINVAL;
    981 		goto fail;
    982 	} else {
    983 		disks[spare_number].status = rf_ds_spare;
    984 		DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
    985 			 spare_number,
    986 			 disks[spare_number].devname,
    987 			 disks[spare_number].numBlocks,
    988 			 disks[spare_number].blockSize,
    989 			 (long int) disks[spare_number].numBlocks *
    990 			 disks[spare_number].blockSize / 1024 / 1024);
    991 	}
    992 
    993 
    994 	/* check sizes and block sizes on the spare disk */
    995 	bs = 1 << raidPtr->logBytesPerSector;
    996 	if (disks[spare_number].blockSize != bs) {
    997 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
    998 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
    999 		ret = EINVAL;
   1000 		goto fail;
   1001 	}
   1002 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1003 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
   1004 			     disks[spare_number].devname,
   1005 			     disks[spare_number].blockSize,
   1006 			     raidPtr->sectorsPerDisk);
   1007 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1008 		ret = EINVAL;
   1009 		goto fail;
   1010 	} else {
   1011 		if (disks[spare_number].numBlocks >
   1012 		    raidPtr->sectorsPerDisk) {
   1013 			RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
   1014 			    disks[spare_number].devname,
   1015 			    raidPtr->sectorsPerDisk,
   1016 			    disks[spare_number].numBlocks);
   1017 
   1018 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1019 		}
   1020 	}
   1021 
   1022 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
   1023 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1024 				 raidPtr->numCol + spare_number,
   1025 				 raidPtr->qType,
   1026 				 raidPtr->sectorsPerDisk,
   1027 				 raidPtr->Disks[raidPtr->numCol +
   1028 						  spare_number].dev,
   1029 				 raidPtr->maxOutstanding,
   1030 				 &raidPtr->shutdownList,
   1031 				 raidPtr->cleanupList);
   1032 
   1033 	RF_LOCK_MUTEX(raidPtr->mutex);
   1034 	raidPtr->numSpare++;
   1035 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1036 
   1037 fail:
   1038 	RF_LOCK_MUTEX(raidPtr->mutex);
   1039 	raidPtr->adding_hot_spare=0;
   1040 	wakeup(&(raidPtr->adding_hot_spare));
   1041 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1042 
   1043 	return(ret);
   1044 }
   1045 
   1046 int
   1047 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
   1048 {
   1049 	int spare_number;
   1050 
   1051 
   1052 	if (raidPtr->numSpare==0) {
   1053 		printf("No spares to remove!\n");
   1054 		return(EINVAL);
   1055 	}
   1056 
   1057 	spare_number = sparePtr->column;
   1058 
   1059 	return(EINVAL); /* XXX not implemented yet */
   1060 #if 0
   1061 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1062 		return(EINVAL);
   1063 	}
   1064 
   1065 	/* verify that this spare isn't in use... */
   1066 
   1067 
   1068 
   1069 
   1070 	/* it's gone.. */
   1071 
   1072 	raidPtr->numSpare--;
   1073 
   1074 	return(0);
   1075 #endif
   1076 }
   1077 
   1078 
   1079 int
   1080 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
   1081 {
   1082 	RF_RaidDisk_t *disks;
   1083 
   1084 	if ((component->column < 0) ||
   1085 	    (component->column >= raidPtr->numCol)) {
   1086 		return(EINVAL);
   1087 	}
   1088 
   1089 	disks = &raidPtr->Disks[component->column];
   1090 
   1091 	/* 1. This component must be marked as 'failed' */
   1092 
   1093 	return(EINVAL); /* Not implemented yet. */
   1094 }
   1095 
   1096 int
   1097 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
   1098     RF_SingleComponent_t *component)
   1099 {
   1100 
   1101 	/* Issues here include how to 'move' this in if there is IO
   1102 	   taking place (e.g. component queues and such) */
   1103 
   1104 	return(EINVAL); /* Not implemented yet. */
   1105 }
   1106