Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.47
      1 /*	$NetBSD: rf_disks.c,v 1.47 2003/12/29 03:33:48 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1995 Carnegie-Mellon University.
     40  * All rights reserved.
     41  *
     42  * Author: Mark Holland
     43  *
     44  * Permission to use, copy, modify and distribute this software and
     45  * its documentation is hereby granted, provided that both the copyright
     46  * notice and this permission notice appear in all copies of the
     47  * software, derivative works or modified versions, and any portions
     48  * thereof, and that both notices appear in supporting documentation.
     49  *
     50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     53  *
     54  * Carnegie Mellon requests users of this software to return to
     55  *
     56  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     57  *  School of Computer Science
     58  *  Carnegie Mellon University
     59  *  Pittsburgh PA 15213-3890
     60  *
     61  * any improvements or extensions that they make and grant Carnegie the
     62  * rights to redistribute these changes.
     63  */
     64 
     65 /***************************************************************
     66  * rf_disks.c -- code to perform operations on the actual disks
     67  ***************************************************************/
     68 
     69 #include <sys/cdefs.h>
     70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.47 2003/12/29 03:33:48 oster Exp $");
     71 
     72 #include <dev/raidframe/raidframevar.h>
     73 
     74 #include "rf_raid.h"
     75 #include "rf_alloclist.h"
     76 #include "rf_utils.h"
     77 #include "rf_general.h"
     78 #include "rf_options.h"
     79 #include "rf_kintf.h"
     80 #include "rf_netbsd.h"
     81 
     82 #include <sys/param.h>
     83 #include <sys/systm.h>
     84 #include <sys/proc.h>
     85 #include <sys/ioctl.h>
     86 #include <sys/fcntl.h>
     87 #include <sys/vnode.h>
     88 
     89 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     90 static void rf_print_label_status( RF_Raid_t *, int, char *,
     91 				  RF_ComponentLabel_t *);
     92 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     93 				  RF_ComponentLabel_t *, int, int );
     94 
     95 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     96 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     97 
     98 /**************************************************************************
     99  *
    100  * initialize the disks comprising the array
    101  *
    102  * We want the spare disks to have regular row,col numbers so that we can
    103  * easily substitue a spare for a failed disk.  But, the driver code assumes
    104  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    105  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    106  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    107  * rest, and put all the spares in it.  This probably needs to get changed
    108  * eventually.
    109  *
    110  **************************************************************************/
    111 
    112 int
    113 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
    114 	RF_ShutdownList_t **listp;
    115 	RF_Raid_t *raidPtr;
    116 	RF_Config_t *cfgPtr;
    117 {
    118 	RF_RaidDisk_t *disks;
    119 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    120 	RF_RowCol_t c;
    121 	int bs, ret;
    122 	unsigned i, count, foundone = 0, numFailuresThisRow;
    123 	int force;
    124 
    125 	force = cfgPtr->force;
    126 
    127 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    128 	if (ret)
    129 		goto fail;
    130 
    131 	disks = raidPtr->Disks;
    132 
    133 	numFailuresThisRow = 0;
    134 	for (c = 0; c < raidPtr->numCol; c++) {
    135 		ret = rf_ConfigureDisk(raidPtr,
    136 				       &cfgPtr->devnames[0][c][0],
    137 				       &disks[c], c);
    138 
    139 		if (ret)
    140 			goto fail;
    141 
    142 		if (disks[c].status == rf_ds_optimal) {
    143 			raidread_component_label(
    144 						 raidPtr->raid_cinfo[c].ci_dev,
    145 						 raidPtr->raid_cinfo[c].ci_vp,
    146 						 &raidPtr->raid_cinfo[c].ci_label);
    147 		}
    148 
    149 		if (disks[c].status != rf_ds_optimal) {
    150 			numFailuresThisRow++;
    151 		} else {
    152 			if (disks[c].numBlocks < min_numblks)
    153 				min_numblks = disks[c].numBlocks;
    154 			DPRINTF6("Disk at col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    155 				 c, disks[c].devname,
    156 				 (long int) disks[c].numBlocks,
    157 				 disks[c].blockSize,
    158 				 (long int) disks[c].numBlocks *
    159 				 disks[c].blockSize / 1024 / 1024);
    160 		}
    161 	}
    162 	/* XXX fix for n-fault tolerant */
    163 	/* XXX this should probably check to see how many failures
    164 	   we can handle for this configuration! */
    165 	if (numFailuresThisRow > 0)
    166 		raidPtr->status = rf_rs_degraded;
    167 
    168 	/* all disks must be the same size & have the same block size, bs must
    169 	 * be a power of 2 */
    170 	bs = 0;
    171 	foundone = 0;
    172 	for (c = 0; c < raidPtr->numCol; c++) {
    173 		if (disks[c].status == rf_ds_optimal) {
    174 			bs = disks[c].blockSize;
    175 			foundone = 1;
    176 			break;
    177 		}
    178 	}
    179 	if (!foundone) {
    180 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    181 		ret = EINVAL;
    182 		goto fail;
    183 	}
    184 	for (count = 0, i = 1; i; i <<= 1)
    185 		if (bs & i)
    186 			count++;
    187 	if (count != 1) {
    188 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    189 		ret = EINVAL;
    190 		goto fail;
    191 	}
    192 
    193 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    194 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    195 		if (force != 0) {
    196 			printf("raid%d: Fatal errors being ignored.\n",
    197 			       raidPtr->raidid);
    198 		} else {
    199 			ret = EINVAL;
    200 			goto fail;
    201 		}
    202 	}
    203 
    204 	for (c = 0; c < raidPtr->numCol; c++) {
    205 		if (disks[c].status == rf_ds_optimal) {
    206 			if (disks[c].blockSize != bs) {
    207 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
    208 				ret = EINVAL;
    209 				goto fail;
    210 			}
    211 			if (disks[c].numBlocks != min_numblks) {
    212 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
    213 					     c, (int) min_numblks);
    214 				disks[c].numBlocks = min_numblks;
    215 			}
    216 		}
    217 	}
    218 
    219 	raidPtr->sectorsPerDisk = min_numblks;
    220 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    221 	raidPtr->bytesPerSector = bs;
    222 	raidPtr->sectorMask = bs - 1;
    223 	return (0);
    224 
    225 fail:
    226 
    227 	rf_UnconfigureVnodes( raidPtr );
    228 
    229 	return (ret);
    230 }
    231 
    232 
    233 /****************************************************************************
    234  * set up the data structures describing the spare disks in the array
    235  * recall from the above comment that the spare disk descriptors are stored
    236  * in row zero, which is specially expanded to hold them.
    237  ****************************************************************************/
    238 int
    239 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
    240 	RF_ShutdownList_t ** listp;
    241 	RF_Raid_t * raidPtr;
    242 	RF_Config_t * cfgPtr;
    243 {
    244 	int     i, ret;
    245 	unsigned int bs;
    246 	RF_RaidDisk_t *disks;
    247 	int     num_spares_done;
    248 
    249 	num_spares_done = 0;
    250 
    251 	/* The space for the spares should have already been allocated by
    252 	 * ConfigureDisks() */
    253 
    254 	disks = &raidPtr->Disks[raidPtr->numCol];
    255 	for (i = 0; i < raidPtr->numSpare; i++) {
    256 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    257 				       &disks[i], raidPtr->numCol + i);
    258 		if (ret)
    259 			goto fail;
    260 		if (disks[i].status != rf_ds_optimal) {
    261 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    262 				     &cfgPtr->spare_names[i][0]);
    263 		} else {
    264 			disks[i].status = rf_ds_spare;	/* change status to
    265 							 * spare */
    266 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    267 			    disks[i].devname,
    268 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    269 			    (long int) disks[i].numBlocks *
    270 				 disks[i].blockSize / 1024 / 1024);
    271 		}
    272 		num_spares_done++;
    273 	}
    274 
    275 	/* check sizes and block sizes on spare disks */
    276 	bs = 1 << raidPtr->logBytesPerSector;
    277 	for (i = 0; i < raidPtr->numSpare; i++) {
    278 		if (disks[i].blockSize != bs) {
    279 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    280 			ret = EINVAL;
    281 			goto fail;
    282 		}
    283 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    284 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    285 				     disks[i].devname, disks[i].blockSize,
    286 				     (long int) raidPtr->sectorsPerDisk);
    287 			ret = EINVAL;
    288 			goto fail;
    289 		} else
    290 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    291 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    292 
    293 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    294 			}
    295 	}
    296 
    297 	return (0);
    298 
    299 fail:
    300 
    301 	/* Release the hold on the main components.  We've failed to allocate
    302 	 * a spare, and since we're failing, we need to free things..
    303 
    304 	 XXX failing to allocate a spare is *not* that big of a deal...
    305 	 We *can* survive without it, if need be, esp. if we get hot
    306 	 adding working.
    307 
    308 	 If we don't fail out here, then we need a way to remove this spare...
    309 	 that should be easier to do here than if we are "live"...
    310 
    311 	 */
    312 
    313 	rf_UnconfigureVnodes( raidPtr );
    314 
    315 	return (ret);
    316 }
    317 
    318 static int
    319 rf_AllocDiskStructures(raidPtr, cfgPtr)
    320 	RF_Raid_t *raidPtr;
    321  	RF_Config_t *cfgPtr;
    322 {
    323 	int ret;
    324 
    325 	/* We allocate RF_MAXSPARE on the first row so that we
    326 	   have room to do hot-swapping of spares */
    327 	RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
    328 			sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    329 			raidPtr->cleanupList);
    330 	if (raidPtr->Disks == NULL) {
    331 		ret = ENOMEM;
    332 		goto fail;
    333 	}
    334 
    335 	/* get space for device specific stuff.. */
    336 	RF_MallocAndAdd(raidPtr->raid_cinfo,
    337 			(raidPtr->numCol + raidPtr->numSpare) *
    338 			sizeof(struct raidcinfo), (struct raidcinfo *),
    339 			raidPtr->cleanupList);
    340 
    341 	if (raidPtr->raid_cinfo == NULL) {
    342 		ret = ENOMEM;
    343 		goto fail;
    344 	}
    345 
    346 	return(0);
    347 fail:
    348 	rf_UnconfigureVnodes( raidPtr );
    349 
    350 	return(ret);
    351 }
    352 
    353 
    354 /* configure a single disk during auto-configuration at boot */
    355 int
    356 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
    357 	RF_Raid_t *raidPtr;
    358 	RF_Config_t *cfgPtr;
    359 	RF_AutoConfig_t *auto_config;
    360 {
    361 	RF_RaidDisk_t *disks;
    362 	RF_RaidDisk_t *diskPtr;
    363 	RF_RowCol_t c;
    364 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    365 	int bs, ret;
    366 	int numFailuresThisRow;
    367 	RF_AutoConfig_t *ac;
    368 	int parity_good;
    369 	int mod_counter;
    370 	int mod_counter_found;
    371 
    372 #if DEBUG
    373 	printf("Starting autoconfiguration of RAID set...\n");
    374 #endif
    375 
    376 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    377 	if (ret)
    378 		goto fail;
    379 
    380 	disks = raidPtr->Disks;
    381 
    382 	/* assume the parity will be fine.. */
    383 	parity_good = RF_RAID_CLEAN;
    384 
    385 	/* Check for mod_counters that are too low */
    386 	mod_counter_found = 0;
    387 	mod_counter = 0;
    388 	ac = auto_config;
    389 	while(ac!=NULL) {
    390 		if (mod_counter_found==0) {
    391 			mod_counter = ac->clabel->mod_counter;
    392 			mod_counter_found = 1;
    393 		} else {
    394 			if (ac->clabel->mod_counter > mod_counter) {
    395 				mod_counter = ac->clabel->mod_counter;
    396 			}
    397 		}
    398 		ac->flag = 0; /* clear the general purpose flag */
    399 		ac = ac->next;
    400 	}
    401 
    402 	bs = 0;
    403 
    404 	numFailuresThisRow = 0;
    405 	for (c = 0; c < raidPtr->numCol; c++) {
    406 		diskPtr = &disks[c];
    407 
    408 		/* find this row/col in the autoconfig */
    409 #if DEBUG
    410 		printf("Looking for %d in autoconfig\n",c);
    411 #endif
    412 		ac = auto_config;
    413 		while(ac!=NULL) {
    414 			if (ac->clabel==NULL) {
    415 				/* big-time bad news. */
    416 				goto fail;
    417 			}
    418 			if ((ac->clabel->column == c) &&
    419 			    (ac->clabel->mod_counter == mod_counter)) {
    420 				/* it's this one... */
    421 				/* flag it as 'used', so we don't
    422 				   free it later. */
    423 				ac->flag = 1;
    424 #if DEBUG
    425 				printf("Found: %s at %d\n",
    426 				       ac->devname,c);
    427 #endif
    428 
    429 				break;
    430 			}
    431 			ac=ac->next;
    432 		}
    433 
    434 		if (ac==NULL) {
    435 			/* we didn't find an exact match with a
    436 			   correct mod_counter above... can we find
    437 			   one with an incorrect mod_counter to use
    438 			   instead?  (this one, if we find it, will be
    439 			   marked as failed once the set configures)
    440 			*/
    441 
    442 			ac = auto_config;
    443 			while(ac!=NULL) {
    444 				if (ac->clabel==NULL) {
    445 					/* big-time bad news. */
    446 					goto fail;
    447 				}
    448 				if (ac->clabel->column == c) {
    449 					/* it's this one...
    450 					   flag it as 'used', so we
    451 					   don't free it later. */
    452 					ac->flag = 1;
    453 #if DEBUG
    454 					printf("Found(low mod_counter): %s at %d\n",
    455 					       ac->devname,c);
    456 #endif
    457 
    458 					break;
    459 				}
    460 				ac=ac->next;
    461 			}
    462 		}
    463 
    464 
    465 
    466 		if (ac!=NULL) {
    467 			/* Found it.  Configure it.. */
    468 			diskPtr->blockSize = ac->clabel->blockSize;
    469 			diskPtr->numBlocks = ac->clabel->numBlocks;
    470 			/* Note: rf_protectedSectors is already
    471 			   factored into numBlocks here */
    472 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
    473 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
    474 
    475 			memcpy(&raidPtr->raid_cinfo[c].ci_label,
    476 			       ac->clabel, sizeof(*ac->clabel));
    477 			sprintf(diskPtr->devname, "/dev/%s",
    478 				ac->devname);
    479 
    480 			/* note the fact that this component was
    481 			   autoconfigured.  You'll need this info
    482 			   later.  Trust me :) */
    483 			diskPtr->auto_configured = 1;
    484 			diskPtr->dev = ac->dev;
    485 
    486 			/*
    487 			 * we allow the user to specify that
    488 			 * only a fraction of the disks should
    489 			 * be used this is just for debug: it
    490 			 * speeds up the parity scan
    491 			 */
    492 
    493 			diskPtr->numBlocks = diskPtr->numBlocks *
    494 				rf_sizePercentage / 100;
    495 
    496 			/* XXX these will get set multiple times,
    497 			   but since we're autoconfiguring, they'd
    498 			   better be always the same each time!
    499 			   If not, this is the least of your worries */
    500 
    501 			bs = diskPtr->blockSize;
    502 			min_numblks = diskPtr->numBlocks;
    503 
    504 			/* this gets done multiple times, but that's
    505 			   fine -- the serial number will be the same
    506 			   for all components, guaranteed */
    507 			raidPtr->serial_number = ac->clabel->serial_number;
    508 			/* check the last time the label was modified */
    509 
    510 			if (ac->clabel->mod_counter != mod_counter) {
    511 				/* Even though we've filled in all of
    512 				   the above, we don't trust this
    513 				   component since it's modification
    514 				   counter is not in sync with the
    515 				   rest, and we really consider it to
    516 				   be failed.  */
    517 				disks[c].status = rf_ds_failed;
    518 				numFailuresThisRow++;
    519 			} else {
    520 				if (ac->clabel->clean != RF_RAID_CLEAN) {
    521 					parity_good = RF_RAID_DIRTY;
    522 				}
    523 			}
    524 		} else {
    525 			/* Didn't find it at all!!  Component must
    526 			   really be dead */
    527 			disks[c].status = rf_ds_failed;
    528 			sprintf(disks[c].devname,"component%d",
    529 				raidPtr->numCol + c);
    530 			numFailuresThisRow++;
    531 		}
    532 	}
    533 	/* XXX fix for n-fault tolerant */
    534 	/* XXX this should probably check to see how many failures
    535 	   we can handle for this configuration! */
    536 	if (numFailuresThisRow > 0)
    537 		raidPtr->status = rf_rs_degraded;
    538 
    539 	/* close the device for the ones that didn't get used */
    540 
    541 	ac = auto_config;
    542 	while(ac!=NULL) {
    543 		if (ac->flag == 0) {
    544 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
    545 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
    546 			vput(ac->vp);
    547 			ac->vp = NULL;
    548 #if DEBUG
    549 			printf("Released %s from auto-config set.\n",
    550 			       ac->devname);
    551 #endif
    552 		}
    553 		ac = ac->next;
    554 	}
    555 
    556 	raidPtr->mod_counter = mod_counter;
    557 
    558 	/* note the state of the parity, if any */
    559 	raidPtr->parity_good = parity_good;
    560 	raidPtr->sectorsPerDisk = min_numblks;
    561 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    562 	raidPtr->bytesPerSector = bs;
    563 	raidPtr->sectorMask = bs - 1;
    564 	return (0);
    565 
    566 fail:
    567 
    568 	rf_UnconfigureVnodes( raidPtr );
    569 
    570 	return (ret);
    571 
    572 }
    573 
    574 /* configure a single disk in the array */
    575 int
    576 rf_ConfigureDisk(raidPtr, buf, diskPtr, col)
    577 	RF_Raid_t *raidPtr;
    578 	char   *buf;
    579 	RF_RaidDisk_t *diskPtr;
    580 	RF_RowCol_t col;
    581 {
    582 	char   *p;
    583 	struct partinfo dpart;
    584 	struct vnode *vp;
    585 	struct vattr va;
    586 	struct proc *proc;
    587 	int     error;
    588 
    589 	p = rf_find_non_white(buf);
    590 	if (p[strlen(p) - 1] == '\n') {
    591 		/* strip off the newline */
    592 		p[strlen(p) - 1] = '\0';
    593 	}
    594 	(void) strcpy(diskPtr->devname, p);
    595 
    596 	proc = raidPtr->engine_thread;
    597 
    598 	/* Let's start by claiming the component is fine and well... */
    599 	diskPtr->status = rf_ds_optimal;
    600 
    601 	raidPtr->raid_cinfo[col].ci_vp = NULL;
    602 	raidPtr->raid_cinfo[col].ci_dev = 0;
    603 
    604 	error = raidlookup(diskPtr->devname, proc, &vp);
    605 	if (error) {
    606 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    607 		if (error == ENXIO) {
    608 			/* the component isn't there... must be dead :-( */
    609 			diskPtr->status = rf_ds_failed;
    610 		} else {
    611 			return (error);
    612 		}
    613 	}
    614 	if (diskPtr->status == rf_ds_optimal) {
    615 
    616 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
    617 			return (error);
    618 		}
    619 		error = VOP_IOCTL(vp, DIOCGPART, &dpart,
    620 				  FREAD, proc->p_ucred, proc);
    621 		if (error) {
    622 			return (error);
    623 		}
    624 
    625 		diskPtr->blockSize = dpart.disklab->d_secsize;
    626 
    627 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    628 		diskPtr->partitionSize = dpart.part->p_size;
    629 
    630 		raidPtr->raid_cinfo[col].ci_vp = vp;
    631 		raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
    632 
    633 		/* This component was not automatically configured */
    634 		diskPtr->auto_configured = 0;
    635 		diskPtr->dev = va.va_rdev;
    636 
    637 		/* we allow the user to specify that only a fraction of the
    638 		 * disks should be used this is just for debug:  it speeds up
    639 		 * the parity scan */
    640 		diskPtr->numBlocks = diskPtr->numBlocks *
    641 			rf_sizePercentage / 100;
    642 	}
    643 	return (0);
    644 }
    645 
    646 static void
    647 rf_print_label_status( raidPtr, column, dev_name, ci_label )
    648 	RF_Raid_t *raidPtr;
    649 	int column;
    650 	char *dev_name;
    651 	RF_ComponentLabel_t *ci_label;
    652 {
    653 
    654 	printf("raid%d: Component %s being configured at col: %d\n",
    655 	       raidPtr->raidid, dev_name, column );
    656 	printf("         Column: %d Num Columns: %d\n",
    657 	       ci_label->column,
    658 	       ci_label->num_columns);
    659 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    660 	       ci_label->version, ci_label->serial_number,
    661 	       ci_label->mod_counter);
    662 	printf("         Clean: %s Status: %d\n",
    663 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    664 }
    665 
    666 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
    667 				  serial_number, mod_counter )
    668 	RF_Raid_t *raidPtr;
    669 	int row;
    670 	int column;
    671 	char *dev_name;
    672 	RF_ComponentLabel_t *ci_label;
    673 	int serial_number;
    674 	int mod_counter;
    675 {
    676 	int fatal_error = 0;
    677 
    678 	if (serial_number != ci_label->serial_number) {
    679 		printf("%s has a different serial number: %d %d\n",
    680 		       dev_name, serial_number, ci_label->serial_number);
    681 		fatal_error = 1;
    682 	}
    683 	if (mod_counter != ci_label->mod_counter) {
    684 		printf("%s has a different modfication count: %d %d\n",
    685 		       dev_name, mod_counter, ci_label->mod_counter);
    686 	}
    687 
    688 	if (row != ci_label->row) {
    689 		printf("Row out of alignment for: %s\n", dev_name);
    690 		fatal_error = 1;
    691 	}
    692 	if (column != ci_label->column) {
    693 		printf("Column out of alignment for: %s\n", dev_name);
    694 		fatal_error = 1;
    695 	}
    696 	if (raidPtr->numCol != ci_label->num_columns) {
    697 		printf("Number of columns do not match for: %s\n", dev_name);
    698 		fatal_error = 1;
    699 	}
    700 	if (ci_label->clean == 0) {
    701 		/* it's not clean, but that's not fatal */
    702 		printf("%s is not clean!\n", dev_name);
    703 	}
    704 	return(fatal_error);
    705 }
    706 
    707 
    708 /*
    709 
    710    rf_CheckLabels() - check all the component labels for consistency.
    711    Return an error if there is anything major amiss.
    712 
    713  */
    714 
    715 int
    716 rf_CheckLabels( raidPtr, cfgPtr )
    717 	RF_Raid_t *raidPtr;
    718 	RF_Config_t *cfgPtr;
    719 {
    720 	int c;
    721 	char *dev_name;
    722 	RF_ComponentLabel_t *ci_label;
    723 	int serial_number = 0;
    724 	int mod_number = 0;
    725 	int fatal_error = 0;
    726 	int mod_values[4];
    727 	int mod_count[4];
    728 	int ser_values[4];
    729 	int ser_count[4];
    730 	int num_ser;
    731 	int num_mod;
    732 	int i;
    733 	int found;
    734 	int hosed_column;
    735 	int too_fatal;
    736 	int parity_good;
    737 	int force;
    738 
    739 	hosed_column = -1;
    740 	too_fatal = 0;
    741 	force = cfgPtr->force;
    742 
    743 	/*
    744 	   We're going to try to be a little intelligent here.  If one
    745 	   component's label is bogus, and we can identify that it's the
    746 	   *only* one that's gone, we'll mark it as "failed" and allow
    747 	   the configuration to proceed.  This will be the *only* case
    748 	   that we'll proceed if there would be (otherwise) fatal errors.
    749 
    750 	   Basically we simply keep a count of how many components had
    751 	   what serial number.  If all but one agree, we simply mark
    752 	   the disagreeing component as being failed, and allow
    753 	   things to come up "normally".
    754 
    755 	   We do this first for serial numbers, and then for "mod_counter".
    756 
    757 	 */
    758 
    759 	num_ser = 0;
    760 	num_mod = 0;
    761 
    762 	for (c = 0; c < raidPtr->numCol; c++) {
    763 		ci_label = &raidPtr->raid_cinfo[c].ci_label;
    764 		found=0;
    765 		for(i=0;i<num_ser;i++) {
    766 			if (ser_values[i] == ci_label->serial_number) {
    767 				ser_count[i]++;
    768 				found=1;
    769 				break;
    770 			}
    771 		}
    772 		if (!found) {
    773 			ser_values[num_ser] = ci_label->serial_number;
    774 			ser_count[num_ser] = 1;
    775 			num_ser++;
    776 			if (num_ser>2) {
    777 				fatal_error = 1;
    778 				break;
    779 			}
    780 		}
    781 		found=0;
    782 		for(i=0;i<num_mod;i++) {
    783 			if (mod_values[i] == ci_label->mod_counter) {
    784 				mod_count[i]++;
    785 				found=1;
    786 				break;
    787 			}
    788 		}
    789 		if (!found) {
    790 			mod_values[num_mod] = ci_label->mod_counter;
    791 			mod_count[num_mod] = 1;
    792 			num_mod++;
    793 			if (num_mod>2) {
    794 				fatal_error = 1;
    795 				break;
    796 			}
    797 		}
    798 	}
    799 #if DEBUG
    800 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    801 	for(i=0;i<num_ser;i++) {
    802 		printf("%d %d\n", ser_values[i], ser_count[i]);
    803 	}
    804 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    805 	for(i=0;i<num_mod;i++) {
    806 		printf("%d %d\n", mod_values[i], mod_count[i]);
    807 	}
    808 #endif
    809 	serial_number = ser_values[0];
    810 	if (num_ser == 2) {
    811 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    812 			/* Locate the maverick component */
    813 			if (ser_count[1] > ser_count[0]) {
    814 				serial_number = ser_values[1];
    815 			}
    816 
    817 			for (c = 0; c < raidPtr->numCol; c++) {
    818 				ci_label = &raidPtr->raid_cinfo[c].ci_label;
    819 				if (serial_number != ci_label->serial_number) {
    820 					hosed_column = c;
    821 					break;
    822 				}
    823 			}
    824 			printf("Hosed component: %s\n",
    825 			       &cfgPtr->devnames[0][hosed_column][0]);
    826 			if (!force) {
    827 				/* we'll fail this component, as if there are
    828 				   other major errors, we arn't forcing things
    829 				   and we'll abort the config anyways */
    830 				raidPtr->Disks[hosed_column].status
    831 					= rf_ds_failed;
    832 				raidPtr->numFailures++;
    833 				raidPtr->status = rf_rs_degraded;
    834 			}
    835 		} else {
    836 			too_fatal = 1;
    837 		}
    838 		if (cfgPtr->parityConfig == '0') {
    839 			/* We've identified two different serial numbers.
    840 			   RAID 0 can't cope with that, so we'll punt */
    841 			too_fatal = 1;
    842 		}
    843 
    844 	}
    845 
    846 	/* record the serial number for later.  If we bail later, setting
    847 	   this doesn't matter, otherwise we've got the best guess at the
    848 	   correct serial number */
    849 	raidPtr->serial_number = serial_number;
    850 
    851 	mod_number = mod_values[0];
    852 	if (num_mod == 2) {
    853 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    854 			/* Locate the maverick component */
    855 			if (mod_count[1] > mod_count[0]) {
    856 				mod_number = mod_values[1];
    857 			} else if (mod_count[1] < mod_count[0]) {
    858 				mod_number = mod_values[0];
    859 			} else {
    860 				/* counts of different modification values
    861 				   are the same.   Assume greater value is
    862 				   the correct one, all other things
    863 				   considered */
    864 				if (mod_values[0] > mod_values[1]) {
    865 					mod_number = mod_values[0];
    866 				} else {
    867 					mod_number = mod_values[1];
    868 				}
    869 
    870 			}
    871 
    872 			for (c = 0; c < raidPtr->numCol; c++) {
    873 				ci_label = &raidPtr->raid_cinfo[c].ci_label;
    874 				if (mod_number != ci_label->mod_counter) {
    875 					if (hosed_column == c) {
    876 						/* same one.  Can
    877 						   deal with it.  */
    878 					} else {
    879 						hosed_column = c;
    880 						if (num_ser != 1) {
    881 							too_fatal = 1;
    882 							break;
    883 						}
    884 					}
    885 				}
    886 			}
    887 			printf("Hosed component: %s\n",
    888 			       &cfgPtr->devnames[0][hosed_column][0]);
    889 			if (!force) {
    890 				/* we'll fail this component, as if there are
    891 				   other major errors, we arn't forcing things
    892 				   and we'll abort the config anyways */
    893 				if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
    894 					raidPtr->Disks[hosed_column].status
    895 						= rf_ds_failed;
    896 					raidPtr->numFailures++;
    897 					raidPtr->status = rf_rs_degraded;
    898 				}
    899 			}
    900 		} else {
    901 			too_fatal = 1;
    902 		}
    903 		if (cfgPtr->parityConfig == '0') {
    904 			/* We've identified two different mod counters.
    905 			   RAID 0 can't cope with that, so we'll punt */
    906 			too_fatal = 1;
    907 		}
    908 	}
    909 
    910 	raidPtr->mod_counter = mod_number;
    911 
    912 	if (too_fatal) {
    913 		/* we've had both a serial number mismatch, and a mod_counter
    914 		   mismatch -- and they involved two different components!!
    915 		   Bail -- make things fail so that the user must force
    916 		   the issue... */
    917 		hosed_column = -1;
    918 	}
    919 
    920 	if (num_ser > 2) {
    921 		printf("raid%d: Too many different serial numbers!\n",
    922 		       raidPtr->raidid);
    923 	}
    924 
    925 	if (num_mod > 2) {
    926 		printf("raid%d: Too many different mod counters!\n",
    927 		       raidPtr->raidid);
    928 	}
    929 
    930 	/* we start by assuming the parity will be good, and flee from
    931 	   that notion at the slightest sign of trouble */
    932 
    933 	parity_good = RF_RAID_CLEAN;
    934 
    935 	for (c = 0; c < raidPtr->numCol; c++) {
    936 		dev_name = &cfgPtr->devnames[0][c][0];
    937 		ci_label = &raidPtr->raid_cinfo[c].ci_label;
    938 
    939 		if (c == hosed_column) {
    940 			printf("raid%d: Ignoring %s\n",
    941 			       raidPtr->raidid, dev_name);
    942 		} else {
    943 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
    944 			if (rf_check_label_vitals( raidPtr, 0, c,
    945 						   dev_name, ci_label,
    946 						   serial_number,
    947 						   mod_number )) {
    948 				fatal_error = 1;
    949 			}
    950 			if (ci_label->clean != RF_RAID_CLEAN) {
    951 				parity_good = RF_RAID_DIRTY;
    952 			}
    953 		}
    954 	}
    955 
    956 	if (fatal_error) {
    957 		parity_good = RF_RAID_DIRTY;
    958 	}
    959 
    960 	/* we note the state of the parity */
    961 	raidPtr->parity_good = parity_good;
    962 
    963 	return(fatal_error);
    964 }
    965 
    966 int
    967 rf_add_hot_spare(raidPtr, sparePtr)
    968 	RF_Raid_t *raidPtr;
    969 	RF_SingleComponent_t *sparePtr;
    970 {
    971 	RF_RaidDisk_t *disks;
    972 	RF_DiskQueue_t *spareQueues;
    973 	int ret;
    974 	unsigned int bs;
    975 	int spare_number;
    976 
    977 	ret=0;
    978 
    979 	if (raidPtr->numSpare >= RF_MAXSPARE) {
    980 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
    981 		return(EINVAL);
    982 	}
    983 
    984 	RF_LOCK_MUTEX(raidPtr->mutex);
    985 	while (raidPtr->adding_hot_spare==1) {
    986 		ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
    987 			&(raidPtr->mutex));
    988 	}
    989 	raidPtr->adding_hot_spare=1;
    990 	RF_UNLOCK_MUTEX(raidPtr->mutex);
    991 
    992 	/* the beginning of the spares... */
    993 	disks = &raidPtr->Disks[raidPtr->numCol];
    994 
    995 	spare_number = raidPtr->numSpare;
    996 
    997 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
    998 			       &disks[spare_number],
    999 			       raidPtr->numCol + spare_number);
   1000 
   1001 	if (ret)
   1002 		goto fail;
   1003 	if (disks[spare_number].status != rf_ds_optimal) {
   1004 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
   1005 			     sparePtr->component_name);
   1006 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1007 		ret=EINVAL;
   1008 		goto fail;
   1009 	} else {
   1010 		disks[spare_number].status = rf_ds_spare;
   1011 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
   1012 			 disks[spare_number].devname,
   1013 			 (long int) disks[spare_number].numBlocks,
   1014 			 disks[spare_number].blockSize,
   1015 			 (long int) disks[spare_number].numBlocks *
   1016 			 disks[spare_number].blockSize / 1024 / 1024);
   1017 	}
   1018 
   1019 
   1020 	/* check sizes and block sizes on the spare disk */
   1021 	bs = 1 << raidPtr->logBytesPerSector;
   1022 	if (disks[spare_number].blockSize != bs) {
   1023 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1024 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1025 		ret = EINVAL;
   1026 		goto fail;
   1027 	}
   1028 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1029 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
   1030 			     disks[spare_number].devname,
   1031 			     disks[spare_number].blockSize,
   1032 			     (long int) raidPtr->sectorsPerDisk);
   1033 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1034 		ret = EINVAL;
   1035 		goto fail;
   1036 	} else {
   1037 		if (disks[spare_number].numBlocks >
   1038 		    raidPtr->sectorsPerDisk) {
   1039 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
   1040 				     (long int) raidPtr->sectorsPerDisk);
   1041 
   1042 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1043 		}
   1044 	}
   1045 
   1046 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
   1047 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1048 				 raidPtr->numCol + spare_number,
   1049 				 raidPtr->qType,
   1050 				 raidPtr->sectorsPerDisk,
   1051 				 raidPtr->Disks[raidPtr->numCol +
   1052 						  spare_number].dev,
   1053 				 raidPtr->maxOutstanding,
   1054 				 &raidPtr->shutdownList,
   1055 				 raidPtr->cleanupList);
   1056 
   1057 	RF_LOCK_MUTEX(raidPtr->mutex);
   1058 	raidPtr->numSpare++;
   1059 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1060 
   1061 fail:
   1062 	RF_LOCK_MUTEX(raidPtr->mutex);
   1063 	raidPtr->adding_hot_spare=0;
   1064 	wakeup(&(raidPtr->adding_hot_spare));
   1065 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1066 
   1067 	return(ret);
   1068 }
   1069 
   1070 int
   1071 rf_remove_hot_spare(raidPtr,sparePtr)
   1072 	RF_Raid_t *raidPtr;
   1073 	RF_SingleComponent_t *sparePtr;
   1074 {
   1075 	int spare_number;
   1076 
   1077 
   1078 	if (raidPtr->numSpare==0) {
   1079 		printf("No spares to remove!\n");
   1080 		return(EINVAL);
   1081 	}
   1082 
   1083 	spare_number = sparePtr->column;
   1084 
   1085 	return(EINVAL); /* XXX not implemented yet */
   1086 #if 0
   1087 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1088 		return(EINVAL);
   1089 	}
   1090 
   1091 	/* verify that this spare isn't in use... */
   1092 
   1093 
   1094 
   1095 
   1096 	/* it's gone.. */
   1097 
   1098 	raidPtr->numSpare--;
   1099 
   1100 	return(0);
   1101 #endif
   1102 }
   1103 
   1104 
   1105 int
   1106 rf_delete_component(raidPtr,component)
   1107 	RF_Raid_t *raidPtr;
   1108 	RF_SingleComponent_t *component;
   1109 {
   1110 	RF_RaidDisk_t *disks;
   1111 
   1112 	if ((component->column < 0) ||
   1113 	    (component->column >= raidPtr->numCol)) {
   1114 		return(EINVAL);
   1115 	}
   1116 
   1117 	disks = &raidPtr->Disks[component->column];
   1118 
   1119 	/* 1. This component must be marked as 'failed' */
   1120 
   1121 	return(EINVAL); /* Not implemented yet. */
   1122 }
   1123 
   1124 int
   1125 rf_incorporate_hot_spare(raidPtr,component)
   1126 	RF_Raid_t *raidPtr;
   1127 	RF_SingleComponent_t *component;
   1128 {
   1129 
   1130 	/* Issues here include how to 'move' this in if there is IO
   1131 	   taking place (e.g. component queues and such) */
   1132 
   1133 	return(EINVAL); /* Not implemented yet. */
   1134 }
   1135