Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.70.10.1.4.1
      1 /*	$NetBSD: rf_disks.c,v 1.70.10.1.4.1 2010/04/21 00:27:51 matt Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Copyright (c) 1995 Carnegie-Mellon University.
     33  * All rights reserved.
     34  *
     35  * Author: Mark Holland
     36  *
     37  * Permission to use, copy, modify and distribute this software and
     38  * its documentation is hereby granted, provided that both the copyright
     39  * notice and this permission notice appear in all copies of the
     40  * software, derivative works or modified versions, and any portions
     41  * thereof, and that both notices appear in supporting documentation.
     42  *
     43  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     44  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     45  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     46  *
     47  * Carnegie Mellon requests users of this software to return to
     48  *
     49  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     50  *  School of Computer Science
     51  *  Carnegie Mellon University
     52  *  Pittsburgh PA 15213-3890
     53  *
     54  * any improvements or extensions that they make and grant Carnegie the
     55  * rights to redistribute these changes.
     56  */
     57 
     58 /***************************************************************
     59  * rf_disks.c -- code to perform operations on the actual disks
     60  ***************************************************************/
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.70.10.1.4.1 2010/04/21 00:27:51 matt Exp $");
     64 
     65 #include <dev/raidframe/raidframevar.h>
     66 
     67 #include "rf_raid.h"
     68 #include "rf_alloclist.h"
     69 #include "rf_utils.h"
     70 #include "rf_general.h"
     71 #include "rf_options.h"
     72 #include "rf_kintf.h"
     73 #include "rf_netbsd.h"
     74 
     75 #include <sys/param.h>
     76 #include <sys/systm.h>
     77 #include <sys/proc.h>
     78 #include <sys/ioctl.h>
     79 #include <sys/fcntl.h>
     80 #include <sys/vnode.h>
     81 #include <sys/kauth.h>
     82 
     83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     84 static void rf_print_label_status( RF_Raid_t *, int, char *,
     85 				  RF_ComponentLabel_t *);
     86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     87 				  RF_ComponentLabel_t *, int, int );
     88 
     89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     91 
     92 /**************************************************************************
     93  *
     94  * initialize the disks comprising the array
     95  *
     96  * We want the spare disks to have regular row,col numbers so that we can
     97  * easily substitue a spare for a failed disk.  But, the driver code assumes
     98  * throughout that the array contains numRow by numCol _non-spare_ disks, so
     99  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    100  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    101  * rest, and put all the spares in it.  This probably needs to get changed
    102  * eventually.
    103  *
    104  **************************************************************************/
    105 
    106 int
    107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    108 		  RF_Config_t *cfgPtr)
    109 {
    110 	RF_RaidDisk_t *disks;
    111 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    112 	RF_RowCol_t c;
    113 	int bs, ret;
    114 	unsigned i, count, foundone = 0, numFailuresThisRow;
    115 	int force;
    116 
    117 	force = cfgPtr->force;
    118 
    119 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    120 	if (ret)
    121 		goto fail;
    122 
    123 	disks = raidPtr->Disks;
    124 
    125 	numFailuresThisRow = 0;
    126 	for (c = 0; c < raidPtr->numCol; c++) {
    127 		ret = rf_ConfigureDisk(raidPtr,
    128 				       &cfgPtr->devnames[0][c][0],
    129 				       &disks[c], c);
    130 
    131 		if (ret)
    132 			goto fail;
    133 
    134 		if (disks[c].status == rf_ds_optimal) {
    135 			raidfetch_component_label(raidPtr, c);
    136 		}
    137 
    138 		if (disks[c].status != rf_ds_optimal) {
    139 			numFailuresThisRow++;
    140 		} else {
    141 			if (disks[c].numBlocks < min_numblks)
    142 				min_numblks = disks[c].numBlocks;
    143 			DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
    144 				 c, disks[c].devname,
    145 				 disks[c].numBlocks,
    146 				 disks[c].blockSize,
    147 				 (long int) disks[c].numBlocks *
    148 				 disks[c].blockSize / 1024 / 1024);
    149 		}
    150 	}
    151 	/* XXX fix for n-fault tolerant */
    152 	/* XXX this should probably check to see how many failures
    153 	   we can handle for this configuration! */
    154 	if (numFailuresThisRow > 0)
    155 		raidPtr->status = rf_rs_degraded;
    156 
    157 	/* all disks must be the same size & have the same block size, bs must
    158 	 * be a power of 2 */
    159 	bs = 0;
    160 	foundone = 0;
    161 	for (c = 0; c < raidPtr->numCol; c++) {
    162 		if (disks[c].status == rf_ds_optimal) {
    163 			bs = disks[c].blockSize;
    164 			foundone = 1;
    165 			break;
    166 		}
    167 	}
    168 	if (!foundone) {
    169 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    170 		ret = EINVAL;
    171 		goto fail;
    172 	}
    173 	for (count = 0, i = 1; i; i <<= 1)
    174 		if (bs & i)
    175 			count++;
    176 	if (count != 1) {
    177 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    178 		ret = EINVAL;
    179 		goto fail;
    180 	}
    181 
    182 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    183 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    184 		if (force != 0) {
    185 			printf("raid%d: Fatal errors being ignored.\n",
    186 			       raidPtr->raidid);
    187 		} else {
    188 			ret = EINVAL;
    189 			goto fail;
    190 		}
    191 	}
    192 
    193 	for (c = 0; c < raidPtr->numCol; c++) {
    194 		if (disks[c].status == rf_ds_optimal) {
    195 			if (disks[c].blockSize != bs) {
    196 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
    197 				ret = EINVAL;
    198 				goto fail;
    199 			}
    200 			if (disks[c].numBlocks != min_numblks) {
    201 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
    202 					     c, (int) min_numblks);
    203 				disks[c].numBlocks = min_numblks;
    204 			}
    205 		}
    206 	}
    207 
    208 	raidPtr->sectorsPerDisk = min_numblks;
    209 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    210 	raidPtr->bytesPerSector = bs;
    211 	raidPtr->sectorMask = bs - 1;
    212 	return (0);
    213 
    214 fail:
    215 
    216 	rf_UnconfigureVnodes( raidPtr );
    217 
    218 	return (ret);
    219 }
    220 
    221 
    222 /****************************************************************************
    223  * set up the data structures describing the spare disks in the array
    224  * recall from the above comment that the spare disk descriptors are stored
    225  * in row zero, which is specially expanded to hold them.
    226  ****************************************************************************/
    227 int
    228 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    229 		       RF_Config_t *cfgPtr)
    230 {
    231 	int     i, ret;
    232 	unsigned int bs;
    233 	RF_RaidDisk_t *disks;
    234 	int     num_spares_done;
    235 
    236 	num_spares_done = 0;
    237 
    238 	/* The space for the spares should have already been allocated by
    239 	 * ConfigureDisks() */
    240 
    241 	disks = &raidPtr->Disks[raidPtr->numCol];
    242 	for (i = 0; i < raidPtr->numSpare; i++) {
    243 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    244 				       &disks[i], raidPtr->numCol + i);
    245 		if (ret)
    246 			goto fail;
    247 		if (disks[i].status != rf_ds_optimal) {
    248 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    249 				     &cfgPtr->spare_names[i][0]);
    250 		} else {
    251 			disks[i].status = rf_ds_spare;	/* change status to
    252 							 * spare */
    253 			DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
    254 			    disks[i].devname,
    255 			    disks[i].numBlocks, disks[i].blockSize,
    256 			    (long int) disks[i].numBlocks *
    257 				 disks[i].blockSize / 1024 / 1024);
    258 		}
    259 		num_spares_done++;
    260 	}
    261 
    262 	/* check sizes and block sizes on spare disks */
    263 	bs = 1 << raidPtr->logBytesPerSector;
    264 	for (i = 0; i < raidPtr->numSpare; i++) {
    265 		if (disks[i].blockSize != bs) {
    266 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    267 			ret = EINVAL;
    268 			goto fail;
    269 		}
    270 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    271 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
    272 				     disks[i].devname, disks[i].blockSize,
    273 				     raidPtr->sectorsPerDisk);
    274 			ret = EINVAL;
    275 			goto fail;
    276 		} else
    277 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    278 				RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
    279 				    disks[i].devname,
    280 				    raidPtr->sectorsPerDisk,
    281 				    disks[i].numBlocks);
    282 
    283 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    284 			}
    285 	}
    286 
    287 	return (0);
    288 
    289 fail:
    290 
    291 	/* Release the hold on the main components.  We've failed to allocate
    292 	 * a spare, and since we're failing, we need to free things..
    293 
    294 	 XXX failing to allocate a spare is *not* that big of a deal...
    295 	 We *can* survive without it, if need be, esp. if we get hot
    296 	 adding working.
    297 
    298 	 If we don't fail out here, then we need a way to remove this spare...
    299 	 that should be easier to do here than if we are "live"...
    300 
    301 	 */
    302 
    303 	rf_UnconfigureVnodes( raidPtr );
    304 
    305 	return (ret);
    306 }
    307 
    308 static int
    309 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    310 {
    311 	int ret;
    312 
    313 	/* We allocate RF_MAXSPARE on the first row so that we
    314 	   have room to do hot-swapping of spares */
    315 	RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
    316 			sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    317 			raidPtr->cleanupList);
    318 	if (raidPtr->Disks == NULL) {
    319 		ret = ENOMEM;
    320 		goto fail;
    321 	}
    322 
    323 	/* get space for device specific stuff.. */
    324 	RF_MallocAndAdd(raidPtr->raid_cinfo,
    325 			(raidPtr->numCol + RF_MAXSPARE) *
    326 			sizeof(struct raidcinfo), (struct raidcinfo *),
    327 			raidPtr->cleanupList);
    328 
    329 	if (raidPtr->raid_cinfo == NULL) {
    330 		ret = ENOMEM;
    331 		goto fail;
    332 	}
    333 
    334 	return(0);
    335 fail:
    336 	rf_UnconfigureVnodes( raidPtr );
    337 
    338 	return(ret);
    339 }
    340 
    341 
    342 /* configure a single disk during auto-configuration at boot */
    343 int
    344 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
    345 		      RF_AutoConfig_t *auto_config)
    346 {
    347 	RF_RaidDisk_t *disks;
    348 	RF_RaidDisk_t *diskPtr;
    349 	RF_RowCol_t c;
    350 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    351 	int bs, ret;
    352 	int numFailuresThisRow;
    353 	RF_AutoConfig_t *ac;
    354 	int parity_good;
    355 	int mod_counter;
    356 	int mod_counter_found;
    357 
    358 #if DEBUG
    359 	printf("Starting autoconfiguration of RAID set...\n");
    360 #endif
    361 
    362 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    363 	if (ret)
    364 		goto fail;
    365 
    366 	disks = raidPtr->Disks;
    367 
    368 	/* assume the parity will be fine.. */
    369 	parity_good = RF_RAID_CLEAN;
    370 
    371 	/* Check for mod_counters that are too low */
    372 	mod_counter_found = 0;
    373 	mod_counter = 0;
    374 	ac = auto_config;
    375 	while(ac!=NULL) {
    376 		if (mod_counter_found==0) {
    377 			mod_counter = ac->clabel->mod_counter;
    378 			mod_counter_found = 1;
    379 		} else {
    380 			if (ac->clabel->mod_counter > mod_counter) {
    381 				mod_counter = ac->clabel->mod_counter;
    382 			}
    383 		}
    384 		ac->flag = 0; /* clear the general purpose flag */
    385 		ac = ac->next;
    386 	}
    387 
    388 	bs = 0;
    389 
    390 	numFailuresThisRow = 0;
    391 	for (c = 0; c < raidPtr->numCol; c++) {
    392 		diskPtr = &disks[c];
    393 
    394 		/* find this row/col in the autoconfig */
    395 #if DEBUG
    396 		printf("Looking for %d in autoconfig\n",c);
    397 #endif
    398 		ac = auto_config;
    399 		while(ac!=NULL) {
    400 			if (ac->clabel==NULL) {
    401 				/* big-time bad news. */
    402 				goto fail;
    403 			}
    404 			if ((ac->clabel->column == c) &&
    405 			    (ac->clabel->mod_counter == mod_counter)) {
    406 				/* it's this one... */
    407 				/* flag it as 'used', so we don't
    408 				   free it later. */
    409 				ac->flag = 1;
    410 #if DEBUG
    411 				printf("Found: %s at %d\n",
    412 				       ac->devname,c);
    413 #endif
    414 
    415 				break;
    416 			}
    417 			ac=ac->next;
    418 		}
    419 
    420 		if (ac==NULL) {
    421 			/* we didn't find an exact match with a
    422 			   correct mod_counter above... can we find
    423 			   one with an incorrect mod_counter to use
    424 			   instead?  (this one, if we find it, will be
    425 			   marked as failed once the set configures)
    426 			*/
    427 
    428 			ac = auto_config;
    429 			while(ac!=NULL) {
    430 				if (ac->clabel==NULL) {
    431 					/* big-time bad news. */
    432 					goto fail;
    433 				}
    434 				if (ac->clabel->column == c) {
    435 					/* it's this one...
    436 					   flag it as 'used', so we
    437 					   don't free it later. */
    438 					ac->flag = 1;
    439 #if DEBUG
    440 					printf("Found(low mod_counter): %s at %d\n",
    441 					       ac->devname,c);
    442 #endif
    443 
    444 					break;
    445 				}
    446 				ac=ac->next;
    447 			}
    448 		}
    449 
    450 
    451 
    452 		if (ac!=NULL) {
    453 			/* Found it.  Configure it.. */
    454 			diskPtr->blockSize = ac->clabel->blockSize;
    455 			diskPtr->numBlocks = ac->clabel->numBlocks;
    456 			/* Note: rf_protectedSectors is already
    457 			   factored into numBlocks here */
    458 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
    459 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
    460 
    461 			memcpy(raidget_component_label(raidPtr, c),
    462 			    ac->clabel, sizeof(*ac->clabel));
    463 			snprintf(diskPtr->devname, sizeof(diskPtr->devname),
    464 			    "/dev/%s", ac->devname);
    465 
    466 			/* note the fact that this component was
    467 			   autoconfigured.  You'll need this info
    468 			   later.  Trust me :) */
    469 			diskPtr->auto_configured = 1;
    470 			diskPtr->dev = ac->dev;
    471 
    472 			/*
    473 			 * we allow the user to specify that
    474 			 * only a fraction of the disks should
    475 			 * be used this is just for debug: it
    476 			 * speeds up the parity scan
    477 			 */
    478 
    479 			diskPtr->numBlocks = diskPtr->numBlocks *
    480 				rf_sizePercentage / 100;
    481 
    482 			/* XXX these will get set multiple times,
    483 			   but since we're autoconfiguring, they'd
    484 			   better be always the same each time!
    485 			   If not, this is the least of your worries */
    486 
    487 			bs = diskPtr->blockSize;
    488 			min_numblks = diskPtr->numBlocks;
    489 
    490 			/* this gets done multiple times, but that's
    491 			   fine -- the serial number will be the same
    492 			   for all components, guaranteed */
    493 			raidPtr->serial_number = ac->clabel->serial_number;
    494 			/* check the last time the label was modified */
    495 
    496 			if (ac->clabel->mod_counter != mod_counter) {
    497 				/* Even though we've filled in all of
    498 				   the above, we don't trust this
    499 				   component since it's modification
    500 				   counter is not in sync with the
    501 				   rest, and we really consider it to
    502 				   be failed.  */
    503 				disks[c].status = rf_ds_failed;
    504 				numFailuresThisRow++;
    505 			} else {
    506 				if (ac->clabel->clean != RF_RAID_CLEAN) {
    507 					parity_good = RF_RAID_DIRTY;
    508 				}
    509 			}
    510 		} else {
    511 			/* Didn't find it at all!!  Component must
    512 			   really be dead */
    513 			disks[c].status = rf_ds_failed;
    514 			snprintf(disks[c].devname, sizeof(disks[c].devname),
    515 			    "component%d", c);
    516 			numFailuresThisRow++;
    517 		}
    518 	}
    519 	/* XXX fix for n-fault tolerant */
    520 	/* XXX this should probably check to see how many failures
    521 	   we can handle for this configuration! */
    522 	if (numFailuresThisRow > 0) {
    523 		raidPtr->status = rf_rs_degraded;
    524 		raidPtr->numFailures = numFailuresThisRow;
    525 	}
    526 
    527 	/* close the device for the ones that didn't get used */
    528 
    529 	ac = auto_config;
    530 	while(ac!=NULL) {
    531 		if (ac->flag == 0) {
    532 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
    533 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
    534 			vput(ac->vp);
    535 			ac->vp = NULL;
    536 #if DEBUG
    537 			printf("Released %s from auto-config set.\n",
    538 			       ac->devname);
    539 #endif
    540 		}
    541 		ac = ac->next;
    542 	}
    543 
    544 	raidPtr->mod_counter = mod_counter;
    545 
    546 	/* note the state of the parity, if any */
    547 	raidPtr->parity_good = parity_good;
    548 	raidPtr->sectorsPerDisk = min_numblks;
    549 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    550 	raidPtr->bytesPerSector = bs;
    551 	raidPtr->sectorMask = bs - 1;
    552 	return (0);
    553 
    554 fail:
    555 
    556 	rf_UnconfigureVnodes( raidPtr );
    557 
    558 	return (ret);
    559 
    560 }
    561 
    562 /* configure a single disk in the array */
    563 int
    564 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
    565 		 RF_RowCol_t col)
    566 {
    567 	char   *p;
    568 	struct vnode *vp;
    569 	struct vattr va;
    570 	int     error;
    571 
    572 	p = rf_find_non_white(bf);
    573 	if (p[strlen(p) - 1] == '\n') {
    574 		/* strip off the newline */
    575 		p[strlen(p) - 1] = '\0';
    576 	}
    577 	(void) strcpy(diskPtr->devname, p);
    578 
    579 	/* Let's start by claiming the component is fine and well... */
    580 	diskPtr->status = rf_ds_optimal;
    581 
    582 	raidPtr->raid_cinfo[col].ci_vp = NULL;
    583 	raidPtr->raid_cinfo[col].ci_dev = 0;
    584 
    585 	if (!strcmp("absent", diskPtr->devname)) {
    586 		printf("Ignoring missing component at column %d\n", col);
    587 		sprintf(diskPtr->devname, "component%d", col);
    588 		diskPtr->status = rf_ds_failed;
    589 		return (0);
    590 	}
    591 
    592 	error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE);
    593 	if (error) {
    594 		printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
    595 		if (error == ENXIO) {
    596 			/* the component isn't there... must be dead :-( */
    597 			diskPtr->status = rf_ds_failed;
    598 		} else {
    599 			return (error);
    600 		}
    601 	}
    602 	if (diskPtr->status == rf_ds_optimal) {
    603 
    604 		if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
    605 			return (error);
    606 		if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0)
    607 			return (error);
    608 
    609 		raidPtr->raid_cinfo[col].ci_vp = vp;
    610 		raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
    611 
    612 		/* This component was not automatically configured */
    613 		diskPtr->auto_configured = 0;
    614 		diskPtr->dev = va.va_rdev;
    615 
    616 		/* we allow the user to specify that only a fraction of the
    617 		 * disks should be used this is just for debug:  it speeds up
    618 		 * the parity scan */
    619 		diskPtr->numBlocks = diskPtr->numBlocks *
    620 			rf_sizePercentage / 100;
    621 	}
    622 	return (0);
    623 }
    624 
    625 static void
    626 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
    627 		      RF_ComponentLabel_t *ci_label)
    628 {
    629 
    630 	printf("raid%d: Component %s being configured at col: %d\n",
    631 	       raidPtr->raidid, dev_name, column );
    632 	printf("         Column: %d Num Columns: %d\n",
    633 	       ci_label->column,
    634 	       ci_label->num_columns);
    635 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    636 	       ci_label->version, ci_label->serial_number,
    637 	       ci_label->mod_counter);
    638 	printf("         Clean: %s Status: %d\n",
    639 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    640 }
    641 
    642 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
    643 				 char *dev_name, RF_ComponentLabel_t *ci_label,
    644 				 int serial_number, int mod_counter)
    645 {
    646 	int fatal_error = 0;
    647 
    648 	if (serial_number != ci_label->serial_number) {
    649 		printf("%s has a different serial number: %d %d\n",
    650 		       dev_name, serial_number, ci_label->serial_number);
    651 		fatal_error = 1;
    652 	}
    653 	if (mod_counter != ci_label->mod_counter) {
    654 		printf("%s has a different modification count: %d %d\n",
    655 		       dev_name, mod_counter, ci_label->mod_counter);
    656 	}
    657 
    658 	if (row != ci_label->row) {
    659 		printf("Row out of alignment for: %s\n", dev_name);
    660 		fatal_error = 1;
    661 	}
    662 	if (column != ci_label->column) {
    663 		printf("Column out of alignment for: %s\n", dev_name);
    664 		fatal_error = 1;
    665 	}
    666 	if (raidPtr->numCol != ci_label->num_columns) {
    667 		printf("Number of columns do not match for: %s\n", dev_name);
    668 		fatal_error = 1;
    669 	}
    670 	if (ci_label->clean == 0) {
    671 		/* it's not clean, but that's not fatal */
    672 		printf("%s is not clean!\n", dev_name);
    673 	}
    674 	return(fatal_error);
    675 }
    676 
    677 
    678 /*
    679 
    680    rf_CheckLabels() - check all the component labels for consistency.
    681    Return an error if there is anything major amiss.
    682 
    683  */
    684 
    685 int
    686 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    687 {
    688 	int c;
    689 	char *dev_name;
    690 	RF_ComponentLabel_t *ci_label;
    691 	int serial_number = 0;
    692 	int mod_number = 0;
    693 	int fatal_error = 0;
    694 	int mod_values[4];
    695 	int mod_count[4];
    696 	int ser_values[4];
    697 	int ser_count[4];
    698 	int num_ser;
    699 	int num_mod;
    700 	int i;
    701 	int found;
    702 	int hosed_column;
    703 	int too_fatal;
    704 	int parity_good;
    705 	int force;
    706 
    707 	hosed_column = -1;
    708 	too_fatal = 0;
    709 	force = cfgPtr->force;
    710 
    711 	/*
    712 	   We're going to try to be a little intelligent here.  If one
    713 	   component's label is bogus, and we can identify that it's the
    714 	   *only* one that's gone, we'll mark it as "failed" and allow
    715 	   the configuration to proceed.  This will be the *only* case
    716 	   that we'll proceed if there would be (otherwise) fatal errors.
    717 
    718 	   Basically we simply keep a count of how many components had
    719 	   what serial number.  If all but one agree, we simply mark
    720 	   the disagreeing component as being failed, and allow
    721 	   things to come up "normally".
    722 
    723 	   We do this first for serial numbers, and then for "mod_counter".
    724 
    725 	 */
    726 
    727 	num_ser = 0;
    728 	num_mod = 0;
    729 
    730 	for (c = 0; c < raidPtr->numCol; c++) {
    731 		ci_label = raidget_component_label(raidPtr, c);
    732 		found=0;
    733 		for(i=0;i<num_ser;i++) {
    734 			if (ser_values[i] == ci_label->serial_number) {
    735 				ser_count[i]++;
    736 				found=1;
    737 				break;
    738 			}
    739 		}
    740 		if (!found) {
    741 			ser_values[num_ser] = ci_label->serial_number;
    742 			ser_count[num_ser] = 1;
    743 			num_ser++;
    744 			if (num_ser>2) {
    745 				fatal_error = 1;
    746 				break;
    747 			}
    748 		}
    749 		found=0;
    750 		for(i=0;i<num_mod;i++) {
    751 			if (mod_values[i] == ci_label->mod_counter) {
    752 				mod_count[i]++;
    753 				found=1;
    754 				break;
    755 			}
    756 		}
    757 		if (!found) {
    758 			mod_values[num_mod] = ci_label->mod_counter;
    759 			mod_count[num_mod] = 1;
    760 			num_mod++;
    761 			if (num_mod>2) {
    762 				fatal_error = 1;
    763 				break;
    764 			}
    765 		}
    766 	}
    767 #if DEBUG
    768 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    769 	for(i=0;i<num_ser;i++) {
    770 		printf("%d %d\n", ser_values[i], ser_count[i]);
    771 	}
    772 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    773 	for(i=0;i<num_mod;i++) {
    774 		printf("%d %d\n", mod_values[i], mod_count[i]);
    775 	}
    776 #endif
    777 	serial_number = ser_values[0];
    778 	if (num_ser == 2) {
    779 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    780 			/* Locate the maverick component */
    781 			if (ser_count[1] > ser_count[0]) {
    782 				serial_number = ser_values[1];
    783 			}
    784 
    785 			for (c = 0; c < raidPtr->numCol; c++) {
    786 				ci_label = raidget_component_label(raidPtr, c);
    787 				if (serial_number != ci_label->serial_number) {
    788 					hosed_column = c;
    789 					break;
    790 				}
    791 			}
    792 			printf("Hosed component: %s\n",
    793 			       &cfgPtr->devnames[0][hosed_column][0]);
    794 			if (!force) {
    795 				/* we'll fail this component, as if there are
    796 				   other major errors, we arn't forcing things
    797 				   and we'll abort the config anyways */
    798 				raidPtr->Disks[hosed_column].status
    799 					= rf_ds_failed;
    800 				raidPtr->numFailures++;
    801 				raidPtr->status = rf_rs_degraded;
    802 			}
    803 		} else {
    804 			too_fatal = 1;
    805 		}
    806 		if (cfgPtr->parityConfig == '0') {
    807 			/* We've identified two different serial numbers.
    808 			   RAID 0 can't cope with that, so we'll punt */
    809 			too_fatal = 1;
    810 		}
    811 
    812 	}
    813 
    814 	/* record the serial number for later.  If we bail later, setting
    815 	   this doesn't matter, otherwise we've got the best guess at the
    816 	   correct serial number */
    817 	raidPtr->serial_number = serial_number;
    818 
    819 	mod_number = mod_values[0];
    820 	if (num_mod == 2) {
    821 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    822 			/* Locate the maverick component */
    823 			if (mod_count[1] > mod_count[0]) {
    824 				mod_number = mod_values[1];
    825 			} else if (mod_count[1] < mod_count[0]) {
    826 				mod_number = mod_values[0];
    827 			} else {
    828 				/* counts of different modification values
    829 				   are the same.   Assume greater value is
    830 				   the correct one, all other things
    831 				   considered */
    832 				if (mod_values[0] > mod_values[1]) {
    833 					mod_number = mod_values[0];
    834 				} else {
    835 					mod_number = mod_values[1];
    836 				}
    837 
    838 			}
    839 
    840 			for (c = 0; c < raidPtr->numCol; c++) {
    841 				ci_label = raidget_component_label(raidPtr, c);
    842 				if (mod_number != ci_label->mod_counter) {
    843 					if (hosed_column == c) {
    844 						/* same one.  Can
    845 						   deal with it.  */
    846 					} else {
    847 						hosed_column = c;
    848 						if (num_ser != 1) {
    849 							too_fatal = 1;
    850 							break;
    851 						}
    852 					}
    853 				}
    854 			}
    855 			printf("Hosed component: %s\n",
    856 			       &cfgPtr->devnames[0][hosed_column][0]);
    857 			if (!force) {
    858 				/* we'll fail this component, as if there are
    859 				   other major errors, we arn't forcing things
    860 				   and we'll abort the config anyways */
    861 				if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
    862 					raidPtr->Disks[hosed_column].status
    863 						= rf_ds_failed;
    864 					raidPtr->numFailures++;
    865 					raidPtr->status = rf_rs_degraded;
    866 				}
    867 			}
    868 		} else {
    869 			too_fatal = 1;
    870 		}
    871 		if (cfgPtr->parityConfig == '0') {
    872 			/* We've identified two different mod counters.
    873 			   RAID 0 can't cope with that, so we'll punt */
    874 			too_fatal = 1;
    875 		}
    876 	}
    877 
    878 	raidPtr->mod_counter = mod_number;
    879 
    880 	if (too_fatal) {
    881 		/* we've had both a serial number mismatch, and a mod_counter
    882 		   mismatch -- and they involved two different components!!
    883 		   Bail -- make things fail so that the user must force
    884 		   the issue... */
    885 		hosed_column = -1;
    886 		fatal_error = 1;
    887 	}
    888 
    889 	if (num_ser > 2) {
    890 		printf("raid%d: Too many different serial numbers!\n",
    891 		       raidPtr->raidid);
    892 		fatal_error = 1;
    893 	}
    894 
    895 	if (num_mod > 2) {
    896 		printf("raid%d: Too many different mod counters!\n",
    897 		       raidPtr->raidid);
    898 		fatal_error = 1;
    899 	}
    900 
    901 	/* we start by assuming the parity will be good, and flee from
    902 	   that notion at the slightest sign of trouble */
    903 
    904 	parity_good = RF_RAID_CLEAN;
    905 
    906 	for (c = 0; c < raidPtr->numCol; c++) {
    907 		dev_name = &cfgPtr->devnames[0][c][0];
    908 		ci_label = raidget_component_label(raidPtr, c);
    909 
    910 		if (c == hosed_column) {
    911 			printf("raid%d: Ignoring %s\n",
    912 			       raidPtr->raidid, dev_name);
    913 		} else {
    914 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
    915 			if (rf_check_label_vitals( raidPtr, 0, c,
    916 						   dev_name, ci_label,
    917 						   serial_number,
    918 						   mod_number )) {
    919 				fatal_error = 1;
    920 			}
    921 			if (ci_label->clean != RF_RAID_CLEAN) {
    922 				parity_good = RF_RAID_DIRTY;
    923 			}
    924 		}
    925 	}
    926 
    927 	if (fatal_error) {
    928 		parity_good = RF_RAID_DIRTY;
    929 	}
    930 
    931 	/* we note the state of the parity */
    932 	raidPtr->parity_good = parity_good;
    933 
    934 	return(fatal_error);
    935 }
    936 
    937 int
    938 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
    939 {
    940 	RF_RaidDisk_t *disks;
    941 	RF_DiskQueue_t *spareQueues;
    942 	int ret;
    943 	unsigned int bs;
    944 	int spare_number;
    945 
    946 	ret=0;
    947 
    948 	if (raidPtr->numSpare >= RF_MAXSPARE) {
    949 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
    950 		return(EINVAL);
    951 	}
    952 
    953 	RF_LOCK_MUTEX(raidPtr->mutex);
    954 	while (raidPtr->adding_hot_spare==1) {
    955 		ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
    956 			&(raidPtr->mutex));
    957 	}
    958 	raidPtr->adding_hot_spare=1;
    959 	RF_UNLOCK_MUTEX(raidPtr->mutex);
    960 
    961 	/* the beginning of the spares... */
    962 	disks = &raidPtr->Disks[raidPtr->numCol];
    963 
    964 	spare_number = raidPtr->numSpare;
    965 
    966 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
    967 			       &disks[spare_number],
    968 			       raidPtr->numCol + spare_number);
    969 
    970 	if (ret)
    971 		goto fail;
    972 	if (disks[spare_number].status != rf_ds_optimal) {
    973 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    974 			     sparePtr->component_name);
    975 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
    976 		ret=EINVAL;
    977 		goto fail;
    978 	} else {
    979 		disks[spare_number].status = rf_ds_spare;
    980 		DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
    981 			 spare_number,
    982 			 disks[spare_number].devname,
    983 			 disks[spare_number].numBlocks,
    984 			 disks[spare_number].blockSize,
    985 			 (long int) disks[spare_number].numBlocks *
    986 			 disks[spare_number].blockSize / 1024 / 1024);
    987 	}
    988 
    989 
    990 	/* check sizes and block sizes on the spare disk */
    991 	bs = 1 << raidPtr->logBytesPerSector;
    992 	if (disks[spare_number].blockSize != bs) {
    993 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
    994 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
    995 		ret = EINVAL;
    996 		goto fail;
    997 	}
    998 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
    999 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
   1000 			     disks[spare_number].devname,
   1001 			     disks[spare_number].blockSize,
   1002 			     raidPtr->sectorsPerDisk);
   1003 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1004 		ret = EINVAL;
   1005 		goto fail;
   1006 	} else {
   1007 		if (disks[spare_number].numBlocks >
   1008 		    raidPtr->sectorsPerDisk) {
   1009 			RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
   1010 			    disks[spare_number].devname,
   1011 			    raidPtr->sectorsPerDisk,
   1012 			    disks[spare_number].numBlocks);
   1013 
   1014 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1015 		}
   1016 	}
   1017 
   1018 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
   1019 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1020 				 raidPtr->numCol + spare_number,
   1021 				 raidPtr->qType,
   1022 				 raidPtr->sectorsPerDisk,
   1023 				 raidPtr->Disks[raidPtr->numCol +
   1024 						  spare_number].dev,
   1025 				 raidPtr->maxOutstanding,
   1026 				 &raidPtr->shutdownList,
   1027 				 raidPtr->cleanupList);
   1028 
   1029 	RF_LOCK_MUTEX(raidPtr->mutex);
   1030 	raidPtr->numSpare++;
   1031 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1032 
   1033 fail:
   1034 	RF_LOCK_MUTEX(raidPtr->mutex);
   1035 	raidPtr->adding_hot_spare=0;
   1036 	wakeup(&(raidPtr->adding_hot_spare));
   1037 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1038 
   1039 	return(ret);
   1040 }
   1041 
   1042 int
   1043 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
   1044 {
   1045 	int spare_number;
   1046 
   1047 
   1048 	if (raidPtr->numSpare==0) {
   1049 		printf("No spares to remove!\n");
   1050 		return(EINVAL);
   1051 	}
   1052 
   1053 	spare_number = sparePtr->column;
   1054 
   1055 	return(EINVAL); /* XXX not implemented yet */
   1056 #if 0
   1057 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1058 		return(EINVAL);
   1059 	}
   1060 
   1061 	/* verify that this spare isn't in use... */
   1062 
   1063 
   1064 
   1065 
   1066 	/* it's gone.. */
   1067 
   1068 	raidPtr->numSpare--;
   1069 
   1070 	return(0);
   1071 #endif
   1072 }
   1073 
   1074 
   1075 int
   1076 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
   1077 {
   1078 	RF_RaidDisk_t *disks;
   1079 
   1080 	if ((component->column < 0) ||
   1081 	    (component->column >= raidPtr->numCol)) {
   1082 		return(EINVAL);
   1083 	}
   1084 
   1085 	disks = &raidPtr->Disks[component->column];
   1086 
   1087 	/* 1. This component must be marked as 'failed' */
   1088 
   1089 	return(EINVAL); /* Not implemented yet. */
   1090 }
   1091 
   1092 int
   1093 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
   1094     RF_SingleComponent_t *component)
   1095 {
   1096 
   1097 	/* Issues here include how to 'move' this in if there is IO
   1098 	   taking place (e.g. component queues and such) */
   1099 
   1100 	return(EINVAL); /* Not implemented yet. */
   1101 }
   1102