Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.44.2.5
      1 /*	$NetBSD: rf_disks.c,v 1.44.2.5 2004/09/21 13:32:52 skrll Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1995 Carnegie-Mellon University.
     40  * All rights reserved.
     41  *
     42  * Author: Mark Holland
     43  *
     44  * Permission to use, copy, modify and distribute this software and
     45  * its documentation is hereby granted, provided that both the copyright
     46  * notice and this permission notice appear in all copies of the
     47  * software, derivative works or modified versions, and any portions
     48  * thereof, and that both notices appear in supporting documentation.
     49  *
     50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     53  *
     54  * Carnegie Mellon requests users of this software to return to
     55  *
     56  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     57  *  School of Computer Science
     58  *  Carnegie Mellon University
     59  *  Pittsburgh PA 15213-3890
     60  *
     61  * any improvements or extensions that they make and grant Carnegie the
     62  * rights to redistribute these changes.
     63  */
     64 
     65 /***************************************************************
     66  * rf_disks.c -- code to perform operations on the actual disks
     67  ***************************************************************/
     68 
     69 #include <sys/cdefs.h>
     70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.44.2.5 2004/09/21 13:32:52 skrll Exp $");
     71 
     72 #include <dev/raidframe/raidframevar.h>
     73 
     74 #include "rf_raid.h"
     75 #include "rf_alloclist.h"
     76 #include "rf_utils.h"
     77 #include "rf_general.h"
     78 #include "rf_options.h"
     79 #include "rf_kintf.h"
     80 #include "rf_netbsd.h"
     81 
     82 #include <sys/param.h>
     83 #include <sys/systm.h>
     84 #include <sys/proc.h>
     85 #include <sys/ioctl.h>
     86 #include <sys/fcntl.h>
     87 #include <sys/vnode.h>
     88 
     89 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     90 static void rf_print_label_status( RF_Raid_t *, int, char *,
     91 				  RF_ComponentLabel_t *);
     92 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     93 				  RF_ComponentLabel_t *, int, int );
     94 
     95 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     96 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     97 
     98 /**************************************************************************
     99  *
    100  * initialize the disks comprising the array
    101  *
    102  * We want the spare disks to have regular row,col numbers so that we can
    103  * easily substitue a spare for a failed disk.  But, the driver code assumes
    104  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    105  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    106  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    107  * rest, and put all the spares in it.  This probably needs to get changed
    108  * eventually.
    109  *
    110  **************************************************************************/
    111 
    112 int
    113 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    114 		  RF_Config_t *cfgPtr)
    115 {
    116 	RF_RaidDisk_t *disks;
    117 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    118 	RF_RowCol_t c;
    119 	int bs, ret;
    120 	unsigned i, count, foundone = 0, numFailuresThisRow;
    121 	int force;
    122 
    123 	force = cfgPtr->force;
    124 
    125 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    126 	if (ret)
    127 		goto fail;
    128 
    129 	disks = raidPtr->Disks;
    130 
    131 	numFailuresThisRow = 0;
    132 	for (c = 0; c < raidPtr->numCol; c++) {
    133 		ret = rf_ConfigureDisk(raidPtr,
    134 				       &cfgPtr->devnames[0][c][0],
    135 				       &disks[c], c);
    136 
    137 		if (ret)
    138 			goto fail;
    139 
    140 		if (disks[c].status == rf_ds_optimal) {
    141 			raidread_component_label(
    142 						 raidPtr->raid_cinfo[c].ci_dev,
    143 						 raidPtr->raid_cinfo[c].ci_vp,
    144 						 &raidPtr->raid_cinfo[c].ci_label);
    145 		}
    146 
    147 		if (disks[c].status != rf_ds_optimal) {
    148 			numFailuresThisRow++;
    149 		} else {
    150 			if (disks[c].numBlocks < min_numblks)
    151 				min_numblks = disks[c].numBlocks;
    152 			DPRINTF6("Disk at col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    153 				 c, disks[c].devname,
    154 				 (long int) disks[c].numBlocks,
    155 				 disks[c].blockSize,
    156 				 (long int) disks[c].numBlocks *
    157 				 disks[c].blockSize / 1024 / 1024);
    158 		}
    159 	}
    160 	/* XXX fix for n-fault tolerant */
    161 	/* XXX this should probably check to see how many failures
    162 	   we can handle for this configuration! */
    163 	if (numFailuresThisRow > 0)
    164 		raidPtr->status = rf_rs_degraded;
    165 
    166 	/* all disks must be the same size & have the same block size, bs must
    167 	 * be a power of 2 */
    168 	bs = 0;
    169 	foundone = 0;
    170 	for (c = 0; c < raidPtr->numCol; c++) {
    171 		if (disks[c].status == rf_ds_optimal) {
    172 			bs = disks[c].blockSize;
    173 			foundone = 1;
    174 			break;
    175 		}
    176 	}
    177 	if (!foundone) {
    178 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    179 		ret = EINVAL;
    180 		goto fail;
    181 	}
    182 	for (count = 0, i = 1; i; i <<= 1)
    183 		if (bs & i)
    184 			count++;
    185 	if (count != 1) {
    186 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    187 		ret = EINVAL;
    188 		goto fail;
    189 	}
    190 
    191 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    192 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    193 		if (force != 0) {
    194 			printf("raid%d: Fatal errors being ignored.\n",
    195 			       raidPtr->raidid);
    196 		} else {
    197 			ret = EINVAL;
    198 			goto fail;
    199 		}
    200 	}
    201 
    202 	for (c = 0; c < raidPtr->numCol; c++) {
    203 		if (disks[c].status == rf_ds_optimal) {
    204 			if (disks[c].blockSize != bs) {
    205 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
    206 				ret = EINVAL;
    207 				goto fail;
    208 			}
    209 			if (disks[c].numBlocks != min_numblks) {
    210 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
    211 					     c, (int) min_numblks);
    212 				disks[c].numBlocks = min_numblks;
    213 			}
    214 		}
    215 	}
    216 
    217 	raidPtr->sectorsPerDisk = min_numblks;
    218 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    219 	raidPtr->bytesPerSector = bs;
    220 	raidPtr->sectorMask = bs - 1;
    221 	return (0);
    222 
    223 fail:
    224 
    225 	rf_UnconfigureVnodes( raidPtr );
    226 
    227 	return (ret);
    228 }
    229 
    230 
    231 /****************************************************************************
    232  * set up the data structures describing the spare disks in the array
    233  * recall from the above comment that the spare disk descriptors are stored
    234  * in row zero, which is specially expanded to hold them.
    235  ****************************************************************************/
    236 int
    237 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    238 		       RF_Config_t *cfgPtr)
    239 {
    240 	int     i, ret;
    241 	unsigned int bs;
    242 	RF_RaidDisk_t *disks;
    243 	int     num_spares_done;
    244 
    245 	num_spares_done = 0;
    246 
    247 	/* The space for the spares should have already been allocated by
    248 	 * ConfigureDisks() */
    249 
    250 	disks = &raidPtr->Disks[raidPtr->numCol];
    251 	for (i = 0; i < raidPtr->numSpare; i++) {
    252 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    253 				       &disks[i], raidPtr->numCol + i);
    254 		if (ret)
    255 			goto fail;
    256 		if (disks[i].status != rf_ds_optimal) {
    257 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    258 				     &cfgPtr->spare_names[i][0]);
    259 		} else {
    260 			disks[i].status = rf_ds_spare;	/* change status to
    261 							 * spare */
    262 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    263 			    disks[i].devname,
    264 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    265 			    (long int) disks[i].numBlocks *
    266 				 disks[i].blockSize / 1024 / 1024);
    267 		}
    268 		num_spares_done++;
    269 	}
    270 
    271 	/* check sizes and block sizes on spare disks */
    272 	bs = 1 << raidPtr->logBytesPerSector;
    273 	for (i = 0; i < raidPtr->numSpare; i++) {
    274 		if (disks[i].blockSize != bs) {
    275 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    276 			ret = EINVAL;
    277 			goto fail;
    278 		}
    279 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    280 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    281 				     disks[i].devname, disks[i].blockSize,
    282 				     (long int) raidPtr->sectorsPerDisk);
    283 			ret = EINVAL;
    284 			goto fail;
    285 		} else
    286 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    287 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    288 
    289 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    290 			}
    291 	}
    292 
    293 	return (0);
    294 
    295 fail:
    296 
    297 	/* Release the hold on the main components.  We've failed to allocate
    298 	 * a spare, and since we're failing, we need to free things..
    299 
    300 	 XXX failing to allocate a spare is *not* that big of a deal...
    301 	 We *can* survive without it, if need be, esp. if we get hot
    302 	 adding working.
    303 
    304 	 If we don't fail out here, then we need a way to remove this spare...
    305 	 that should be easier to do here than if we are "live"...
    306 
    307 	 */
    308 
    309 	rf_UnconfigureVnodes( raidPtr );
    310 
    311 	return (ret);
    312 }
    313 
    314 static int
    315 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    316 {
    317 	int ret;
    318 
    319 	/* We allocate RF_MAXSPARE on the first row so that we
    320 	   have room to do hot-swapping of spares */
    321 	RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
    322 			sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    323 			raidPtr->cleanupList);
    324 	if (raidPtr->Disks == NULL) {
    325 		ret = ENOMEM;
    326 		goto fail;
    327 	}
    328 
    329 	/* get space for device specific stuff.. */
    330 	RF_MallocAndAdd(raidPtr->raid_cinfo,
    331 			(raidPtr->numCol + RF_MAXSPARE) *
    332 			sizeof(struct raidcinfo), (struct raidcinfo *),
    333 			raidPtr->cleanupList);
    334 
    335 	if (raidPtr->raid_cinfo == NULL) {
    336 		ret = ENOMEM;
    337 		goto fail;
    338 	}
    339 
    340 	return(0);
    341 fail:
    342 	rf_UnconfigureVnodes( raidPtr );
    343 
    344 	return(ret);
    345 }
    346 
    347 
    348 /* configure a single disk during auto-configuration at boot */
    349 int
    350 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
    351 		      RF_AutoConfig_t *auto_config)
    352 {
    353 	RF_RaidDisk_t *disks;
    354 	RF_RaidDisk_t *diskPtr;
    355 	RF_RowCol_t c;
    356 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    357 	int bs, ret;
    358 	int numFailuresThisRow;
    359 	RF_AutoConfig_t *ac;
    360 	int parity_good;
    361 	int mod_counter;
    362 	int mod_counter_found;
    363 
    364 #if DEBUG
    365 	printf("Starting autoconfiguration of RAID set...\n");
    366 #endif
    367 
    368 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    369 	if (ret)
    370 		goto fail;
    371 
    372 	disks = raidPtr->Disks;
    373 
    374 	/* assume the parity will be fine.. */
    375 	parity_good = RF_RAID_CLEAN;
    376 
    377 	/* Check for mod_counters that are too low */
    378 	mod_counter_found = 0;
    379 	mod_counter = 0;
    380 	ac = auto_config;
    381 	while(ac!=NULL) {
    382 		if (mod_counter_found==0) {
    383 			mod_counter = ac->clabel->mod_counter;
    384 			mod_counter_found = 1;
    385 		} else {
    386 			if (ac->clabel->mod_counter > mod_counter) {
    387 				mod_counter = ac->clabel->mod_counter;
    388 			}
    389 		}
    390 		ac->flag = 0; /* clear the general purpose flag */
    391 		ac = ac->next;
    392 	}
    393 
    394 	bs = 0;
    395 
    396 	numFailuresThisRow = 0;
    397 	for (c = 0; c < raidPtr->numCol; c++) {
    398 		diskPtr = &disks[c];
    399 
    400 		/* find this row/col in the autoconfig */
    401 #if DEBUG
    402 		printf("Looking for %d in autoconfig\n",c);
    403 #endif
    404 		ac = auto_config;
    405 		while(ac!=NULL) {
    406 			if (ac->clabel==NULL) {
    407 				/* big-time bad news. */
    408 				goto fail;
    409 			}
    410 			if ((ac->clabel->column == c) &&
    411 			    (ac->clabel->mod_counter == mod_counter)) {
    412 				/* it's this one... */
    413 				/* flag it as 'used', so we don't
    414 				   free it later. */
    415 				ac->flag = 1;
    416 #if DEBUG
    417 				printf("Found: %s at %d\n",
    418 				       ac->devname,c);
    419 #endif
    420 
    421 				break;
    422 			}
    423 			ac=ac->next;
    424 		}
    425 
    426 		if (ac==NULL) {
    427 			/* we didn't find an exact match with a
    428 			   correct mod_counter above... can we find
    429 			   one with an incorrect mod_counter to use
    430 			   instead?  (this one, if we find it, will be
    431 			   marked as failed once the set configures)
    432 			*/
    433 
    434 			ac = auto_config;
    435 			while(ac!=NULL) {
    436 				if (ac->clabel==NULL) {
    437 					/* big-time bad news. */
    438 					goto fail;
    439 				}
    440 				if (ac->clabel->column == c) {
    441 					/* it's this one...
    442 					   flag it as 'used', so we
    443 					   don't free it later. */
    444 					ac->flag = 1;
    445 #if DEBUG
    446 					printf("Found(low mod_counter): %s at %d\n",
    447 					       ac->devname,c);
    448 #endif
    449 
    450 					break;
    451 				}
    452 				ac=ac->next;
    453 			}
    454 		}
    455 
    456 
    457 
    458 		if (ac!=NULL) {
    459 			/* Found it.  Configure it.. */
    460 			diskPtr->blockSize = ac->clabel->blockSize;
    461 			diskPtr->numBlocks = ac->clabel->numBlocks;
    462 			/* Note: rf_protectedSectors is already
    463 			   factored into numBlocks here */
    464 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
    465 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
    466 
    467 			memcpy(&raidPtr->raid_cinfo[c].ci_label,
    468 			    ac->clabel, sizeof(*ac->clabel));
    469 			snprintf(diskPtr->devname, sizeof(diskPtr->devname),
    470 			    "/dev/%s", ac->devname);
    471 
    472 			/* note the fact that this component was
    473 			   autoconfigured.  You'll need this info
    474 			   later.  Trust me :) */
    475 			diskPtr->auto_configured = 1;
    476 			diskPtr->dev = ac->dev;
    477 
    478 			/*
    479 			 * we allow the user to specify that
    480 			 * only a fraction of the disks should
    481 			 * be used this is just for debug: it
    482 			 * speeds up the parity scan
    483 			 */
    484 
    485 			diskPtr->numBlocks = diskPtr->numBlocks *
    486 				rf_sizePercentage / 100;
    487 
    488 			/* XXX these will get set multiple times,
    489 			   but since we're autoconfiguring, they'd
    490 			   better be always the same each time!
    491 			   If not, this is the least of your worries */
    492 
    493 			bs = diskPtr->blockSize;
    494 			min_numblks = diskPtr->numBlocks;
    495 
    496 			/* this gets done multiple times, but that's
    497 			   fine -- the serial number will be the same
    498 			   for all components, guaranteed */
    499 			raidPtr->serial_number = ac->clabel->serial_number;
    500 			/* check the last time the label was modified */
    501 
    502 			if (ac->clabel->mod_counter != mod_counter) {
    503 				/* Even though we've filled in all of
    504 				   the above, we don't trust this
    505 				   component since it's modification
    506 				   counter is not in sync with the
    507 				   rest, and we really consider it to
    508 				   be failed.  */
    509 				disks[c].status = rf_ds_failed;
    510 				numFailuresThisRow++;
    511 			} else {
    512 				if (ac->clabel->clean != RF_RAID_CLEAN) {
    513 					parity_good = RF_RAID_DIRTY;
    514 				}
    515 			}
    516 		} else {
    517 			/* Didn't find it at all!!  Component must
    518 			   really be dead */
    519 			disks[c].status = rf_ds_failed;
    520 			snprintf(disks[c].devname, sizeof(disks[c].devname),
    521 			    "component%d", c);
    522 			numFailuresThisRow++;
    523 		}
    524 	}
    525 	/* XXX fix for n-fault tolerant */
    526 	/* XXX this should probably check to see how many failures
    527 	   we can handle for this configuration! */
    528 	if (numFailuresThisRow > 0) {
    529 		raidPtr->status = rf_rs_degraded;
    530 		raidPtr->numFailures = numFailuresThisRow;
    531 	}
    532 
    533 	/* close the device for the ones that didn't get used */
    534 
    535 	ac = auto_config;
    536 	while(ac!=NULL) {
    537 		if (ac->flag == 0) {
    538 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
    539 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
    540 			vput(ac->vp);
    541 			ac->vp = NULL;
    542 #if DEBUG
    543 			printf("Released %s from auto-config set.\n",
    544 			       ac->devname);
    545 #endif
    546 		}
    547 		ac = ac->next;
    548 	}
    549 
    550 	raidPtr->mod_counter = mod_counter;
    551 
    552 	/* note the state of the parity, if any */
    553 	raidPtr->parity_good = parity_good;
    554 	raidPtr->sectorsPerDisk = min_numblks;
    555 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    556 	raidPtr->bytesPerSector = bs;
    557 	raidPtr->sectorMask = bs - 1;
    558 	return (0);
    559 
    560 fail:
    561 
    562 	rf_UnconfigureVnodes( raidPtr );
    563 
    564 	return (ret);
    565 
    566 }
    567 
    568 /* configure a single disk in the array */
    569 int
    570 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr,
    571 		 RF_RowCol_t col)
    572 {
    573 	char   *p;
    574 	struct partinfo dpart;
    575 	struct vnode *vp;
    576 	struct vattr va;
    577 	struct lwp *l;
    578 	int     error;
    579 
    580 	p = rf_find_non_white(buf);
    581 	if (p[strlen(p) - 1] == '\n') {
    582 		/* strip off the newline */
    583 		p[strlen(p) - 1] = '\0';
    584 	}
    585 	(void) strcpy(diskPtr->devname, p);
    586 
    587 	l = LIST_FIRST(&raidPtr->engine_thread->p_lwps);
    588 
    589 	/* Let's start by claiming the component is fine and well... */
    590 	diskPtr->status = rf_ds_optimal;
    591 
    592 	raidPtr->raid_cinfo[col].ci_vp = NULL;
    593 	raidPtr->raid_cinfo[col].ci_dev = 0;
    594 
    595 	if (!strcmp("absent", diskPtr->devname)) {
    596 		printf("Ignoring missing component at column %d\n", col);
    597 		sprintf(diskPtr->devname, "component%d", col);
    598 		diskPtr->status = rf_ds_failed;
    599 		return (0);
    600 	}
    601 
    602 	error = raidlookup(diskPtr->devname, l, &vp);
    603 	if (error) {
    604 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    605 		if (error == ENXIO) {
    606 			/* the component isn't there... must be dead :-( */
    607 			diskPtr->status = rf_ds_failed;
    608 		} else {
    609 			return (error);
    610 		}
    611 	}
    612 	if (diskPtr->status == rf_ds_optimal) {
    613 
    614 		if ((error = VOP_GETATTR(vp, &va,
    615 		    l->l_proc->p_ucred, l)) != 0) {
    616 			return (error);
    617 		}
    618 		error = VOP_IOCTL(vp, DIOCGPART, &dpart,
    619 				  FREAD, l->l_proc->p_ucred, l);
    620 		if (error) {
    621 			return (error);
    622 		}
    623 
    624 		diskPtr->blockSize = dpart.disklab->d_secsize;
    625 
    626 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    627 		diskPtr->partitionSize = dpart.part->p_size;
    628 
    629 		raidPtr->raid_cinfo[col].ci_vp = vp;
    630 		raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
    631 
    632 		/* This component was not automatically configured */
    633 		diskPtr->auto_configured = 0;
    634 		diskPtr->dev = va.va_rdev;
    635 
    636 		/* we allow the user to specify that only a fraction of the
    637 		 * disks should be used this is just for debug:  it speeds up
    638 		 * the parity scan */
    639 		diskPtr->numBlocks = diskPtr->numBlocks *
    640 			rf_sizePercentage / 100;
    641 	}
    642 	return (0);
    643 }
    644 
    645 static void
    646 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
    647 		      RF_ComponentLabel_t *ci_label)
    648 {
    649 
    650 	printf("raid%d: Component %s being configured at col: %d\n",
    651 	       raidPtr->raidid, dev_name, column );
    652 	printf("         Column: %d Num Columns: %d\n",
    653 	       ci_label->column,
    654 	       ci_label->num_columns);
    655 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    656 	       ci_label->version, ci_label->serial_number,
    657 	       ci_label->mod_counter);
    658 	printf("         Clean: %s Status: %d\n",
    659 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    660 }
    661 
    662 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
    663 				 char *dev_name, RF_ComponentLabel_t *ci_label,
    664 				 int serial_number, int mod_counter)
    665 {
    666 	int fatal_error = 0;
    667 
    668 	if (serial_number != ci_label->serial_number) {
    669 		printf("%s has a different serial number: %d %d\n",
    670 		       dev_name, serial_number, ci_label->serial_number);
    671 		fatal_error = 1;
    672 	}
    673 	if (mod_counter != ci_label->mod_counter) {
    674 		printf("%s has a different modfication count: %d %d\n",
    675 		       dev_name, mod_counter, ci_label->mod_counter);
    676 	}
    677 
    678 	if (row != ci_label->row) {
    679 		printf("Row out of alignment for: %s\n", dev_name);
    680 		fatal_error = 1;
    681 	}
    682 	if (column != ci_label->column) {
    683 		printf("Column out of alignment for: %s\n", dev_name);
    684 		fatal_error = 1;
    685 	}
    686 	if (raidPtr->numCol != ci_label->num_columns) {
    687 		printf("Number of columns do not match for: %s\n", dev_name);
    688 		fatal_error = 1;
    689 	}
    690 	if (ci_label->clean == 0) {
    691 		/* it's not clean, but that's not fatal */
    692 		printf("%s is not clean!\n", dev_name);
    693 	}
    694 	return(fatal_error);
    695 }
    696 
    697 
    698 /*
    699 
    700    rf_CheckLabels() - check all the component labels for consistency.
    701    Return an error if there is anything major amiss.
    702 
    703  */
    704 
    705 int
    706 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    707 {
    708 	int c;
    709 	char *dev_name;
    710 	RF_ComponentLabel_t *ci_label;
    711 	int serial_number = 0;
    712 	int mod_number = 0;
    713 	int fatal_error = 0;
    714 	int mod_values[4];
    715 	int mod_count[4];
    716 	int ser_values[4];
    717 	int ser_count[4];
    718 	int num_ser;
    719 	int num_mod;
    720 	int i;
    721 	int found;
    722 	int hosed_column;
    723 	int too_fatal;
    724 	int parity_good;
    725 	int force;
    726 
    727 	hosed_column = -1;
    728 	too_fatal = 0;
    729 	force = cfgPtr->force;
    730 
    731 	/*
    732 	   We're going to try to be a little intelligent here.  If one
    733 	   component's label is bogus, and we can identify that it's the
    734 	   *only* one that's gone, we'll mark it as "failed" and allow
    735 	   the configuration to proceed.  This will be the *only* case
    736 	   that we'll proceed if there would be (otherwise) fatal errors.
    737 
    738 	   Basically we simply keep a count of how many components had
    739 	   what serial number.  If all but one agree, we simply mark
    740 	   the disagreeing component as being failed, and allow
    741 	   things to come up "normally".
    742 
    743 	   We do this first for serial numbers, and then for "mod_counter".
    744 
    745 	 */
    746 
    747 	num_ser = 0;
    748 	num_mod = 0;
    749 
    750 	for (c = 0; c < raidPtr->numCol; c++) {
    751 		ci_label = &raidPtr->raid_cinfo[c].ci_label;
    752 		found=0;
    753 		for(i=0;i<num_ser;i++) {
    754 			if (ser_values[i] == ci_label->serial_number) {
    755 				ser_count[i]++;
    756 				found=1;
    757 				break;
    758 			}
    759 		}
    760 		if (!found) {
    761 			ser_values[num_ser] = ci_label->serial_number;
    762 			ser_count[num_ser] = 1;
    763 			num_ser++;
    764 			if (num_ser>2) {
    765 				fatal_error = 1;
    766 				break;
    767 			}
    768 		}
    769 		found=0;
    770 		for(i=0;i<num_mod;i++) {
    771 			if (mod_values[i] == ci_label->mod_counter) {
    772 				mod_count[i]++;
    773 				found=1;
    774 				break;
    775 			}
    776 		}
    777 		if (!found) {
    778 			mod_values[num_mod] = ci_label->mod_counter;
    779 			mod_count[num_mod] = 1;
    780 			num_mod++;
    781 			if (num_mod>2) {
    782 				fatal_error = 1;
    783 				break;
    784 			}
    785 		}
    786 	}
    787 #if DEBUG
    788 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    789 	for(i=0;i<num_ser;i++) {
    790 		printf("%d %d\n", ser_values[i], ser_count[i]);
    791 	}
    792 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    793 	for(i=0;i<num_mod;i++) {
    794 		printf("%d %d\n", mod_values[i], mod_count[i]);
    795 	}
    796 #endif
    797 	serial_number = ser_values[0];
    798 	if (num_ser == 2) {
    799 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    800 			/* Locate the maverick component */
    801 			if (ser_count[1] > ser_count[0]) {
    802 				serial_number = ser_values[1];
    803 			}
    804 
    805 			for (c = 0; c < raidPtr->numCol; c++) {
    806 				ci_label = &raidPtr->raid_cinfo[c].ci_label;
    807 				if (serial_number != ci_label->serial_number) {
    808 					hosed_column = c;
    809 					break;
    810 				}
    811 			}
    812 			printf("Hosed component: %s\n",
    813 			       &cfgPtr->devnames[0][hosed_column][0]);
    814 			if (!force) {
    815 				/* we'll fail this component, as if there are
    816 				   other major errors, we arn't forcing things
    817 				   and we'll abort the config anyways */
    818 				raidPtr->Disks[hosed_column].status
    819 					= rf_ds_failed;
    820 				raidPtr->numFailures++;
    821 				raidPtr->status = rf_rs_degraded;
    822 			}
    823 		} else {
    824 			too_fatal = 1;
    825 		}
    826 		if (cfgPtr->parityConfig == '0') {
    827 			/* We've identified two different serial numbers.
    828 			   RAID 0 can't cope with that, so we'll punt */
    829 			too_fatal = 1;
    830 		}
    831 
    832 	}
    833 
    834 	/* record the serial number for later.  If we bail later, setting
    835 	   this doesn't matter, otherwise we've got the best guess at the
    836 	   correct serial number */
    837 	raidPtr->serial_number = serial_number;
    838 
    839 	mod_number = mod_values[0];
    840 	if (num_mod == 2) {
    841 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    842 			/* Locate the maverick component */
    843 			if (mod_count[1] > mod_count[0]) {
    844 				mod_number = mod_values[1];
    845 			} else if (mod_count[1] < mod_count[0]) {
    846 				mod_number = mod_values[0];
    847 			} else {
    848 				/* counts of different modification values
    849 				   are the same.   Assume greater value is
    850 				   the correct one, all other things
    851 				   considered */
    852 				if (mod_values[0] > mod_values[1]) {
    853 					mod_number = mod_values[0];
    854 				} else {
    855 					mod_number = mod_values[1];
    856 				}
    857 
    858 			}
    859 
    860 			for (c = 0; c < raidPtr->numCol; c++) {
    861 				ci_label = &raidPtr->raid_cinfo[c].ci_label;
    862 				if (mod_number != ci_label->mod_counter) {
    863 					if (hosed_column == c) {
    864 						/* same one.  Can
    865 						   deal with it.  */
    866 					} else {
    867 						hosed_column = c;
    868 						if (num_ser != 1) {
    869 							too_fatal = 1;
    870 							break;
    871 						}
    872 					}
    873 				}
    874 			}
    875 			printf("Hosed component: %s\n",
    876 			       &cfgPtr->devnames[0][hosed_column][0]);
    877 			if (!force) {
    878 				/* we'll fail this component, as if there are
    879 				   other major errors, we arn't forcing things
    880 				   and we'll abort the config anyways */
    881 				if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
    882 					raidPtr->Disks[hosed_column].status
    883 						= rf_ds_failed;
    884 					raidPtr->numFailures++;
    885 					raidPtr->status = rf_rs_degraded;
    886 				}
    887 			}
    888 		} else {
    889 			too_fatal = 1;
    890 		}
    891 		if (cfgPtr->parityConfig == '0') {
    892 			/* We've identified two different mod counters.
    893 			   RAID 0 can't cope with that, so we'll punt */
    894 			too_fatal = 1;
    895 		}
    896 	}
    897 
    898 	raidPtr->mod_counter = mod_number;
    899 
    900 	if (too_fatal) {
    901 		/* we've had both a serial number mismatch, and a mod_counter
    902 		   mismatch -- and they involved two different components!!
    903 		   Bail -- make things fail so that the user must force
    904 		   the issue... */
    905 		hosed_column = -1;
    906 		fatal_error = 1;
    907 	}
    908 
    909 	if (num_ser > 2) {
    910 		printf("raid%d: Too many different serial numbers!\n",
    911 		       raidPtr->raidid);
    912 		fatal_error = 1;
    913 	}
    914 
    915 	if (num_mod > 2) {
    916 		printf("raid%d: Too many different mod counters!\n",
    917 		       raidPtr->raidid);
    918 		fatal_error = 1;
    919 	}
    920 
    921 	/* we start by assuming the parity will be good, and flee from
    922 	   that notion at the slightest sign of trouble */
    923 
    924 	parity_good = RF_RAID_CLEAN;
    925 
    926 	for (c = 0; c < raidPtr->numCol; c++) {
    927 		dev_name = &cfgPtr->devnames[0][c][0];
    928 		ci_label = &raidPtr->raid_cinfo[c].ci_label;
    929 
    930 		if (c == hosed_column) {
    931 			printf("raid%d: Ignoring %s\n",
    932 			       raidPtr->raidid, dev_name);
    933 		} else {
    934 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
    935 			if (rf_check_label_vitals( raidPtr, 0, c,
    936 						   dev_name, ci_label,
    937 						   serial_number,
    938 						   mod_number )) {
    939 				fatal_error = 1;
    940 			}
    941 			if (ci_label->clean != RF_RAID_CLEAN) {
    942 				parity_good = RF_RAID_DIRTY;
    943 			}
    944 		}
    945 	}
    946 
    947 	if (fatal_error) {
    948 		parity_good = RF_RAID_DIRTY;
    949 	}
    950 
    951 	/* we note the state of the parity */
    952 	raidPtr->parity_good = parity_good;
    953 
    954 	return(fatal_error);
    955 }
    956 
    957 int
    958 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
    959 {
    960 	RF_RaidDisk_t *disks;
    961 	RF_DiskQueue_t *spareQueues;
    962 	int ret;
    963 	unsigned int bs;
    964 	int spare_number;
    965 
    966 	ret=0;
    967 
    968 	if (raidPtr->numSpare >= RF_MAXSPARE) {
    969 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
    970 		return(EINVAL);
    971 	}
    972 
    973 	RF_LOCK_MUTEX(raidPtr->mutex);
    974 	while (raidPtr->adding_hot_spare==1) {
    975 		ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
    976 			&(raidPtr->mutex));
    977 	}
    978 	raidPtr->adding_hot_spare=1;
    979 	RF_UNLOCK_MUTEX(raidPtr->mutex);
    980 
    981 	/* the beginning of the spares... */
    982 	disks = &raidPtr->Disks[raidPtr->numCol];
    983 
    984 	spare_number = raidPtr->numSpare;
    985 
    986 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
    987 			       &disks[spare_number],
    988 			       raidPtr->numCol + spare_number);
    989 
    990 	if (ret)
    991 		goto fail;
    992 	if (disks[spare_number].status != rf_ds_optimal) {
    993 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    994 			     sparePtr->component_name);
    995 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
    996 		ret=EINVAL;
    997 		goto fail;
    998 	} else {
    999 		disks[spare_number].status = rf_ds_spare;
   1000 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
   1001 			 disks[spare_number].devname,
   1002 			 (long int) disks[spare_number].numBlocks,
   1003 			 disks[spare_number].blockSize,
   1004 			 (long int) disks[spare_number].numBlocks *
   1005 			 disks[spare_number].blockSize / 1024 / 1024);
   1006 	}
   1007 
   1008 
   1009 	/* check sizes and block sizes on the spare disk */
   1010 	bs = 1 << raidPtr->logBytesPerSector;
   1011 	if (disks[spare_number].blockSize != bs) {
   1012 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1013 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1014 		ret = EINVAL;
   1015 		goto fail;
   1016 	}
   1017 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1018 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
   1019 			     disks[spare_number].devname,
   1020 			     disks[spare_number].blockSize,
   1021 			     (long int) raidPtr->sectorsPerDisk);
   1022 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1023 		ret = EINVAL;
   1024 		goto fail;
   1025 	} else {
   1026 		if (disks[spare_number].numBlocks >
   1027 		    raidPtr->sectorsPerDisk) {
   1028 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
   1029 				     (long int) raidPtr->sectorsPerDisk);
   1030 
   1031 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1032 		}
   1033 	}
   1034 
   1035 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
   1036 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1037 				 raidPtr->numCol + spare_number,
   1038 				 raidPtr->qType,
   1039 				 raidPtr->sectorsPerDisk,
   1040 				 raidPtr->Disks[raidPtr->numCol +
   1041 						  spare_number].dev,
   1042 				 raidPtr->maxOutstanding,
   1043 				 &raidPtr->shutdownList,
   1044 				 raidPtr->cleanupList);
   1045 
   1046 	RF_LOCK_MUTEX(raidPtr->mutex);
   1047 	raidPtr->numSpare++;
   1048 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1049 
   1050 fail:
   1051 	RF_LOCK_MUTEX(raidPtr->mutex);
   1052 	raidPtr->adding_hot_spare=0;
   1053 	wakeup(&(raidPtr->adding_hot_spare));
   1054 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1055 
   1056 	return(ret);
   1057 }
   1058 
   1059 int
   1060 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
   1061 {
   1062 	int spare_number;
   1063 
   1064 
   1065 	if (raidPtr->numSpare==0) {
   1066 		printf("No spares to remove!\n");
   1067 		return(EINVAL);
   1068 	}
   1069 
   1070 	spare_number = sparePtr->column;
   1071 
   1072 	return(EINVAL); /* XXX not implemented yet */
   1073 #if 0
   1074 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1075 		return(EINVAL);
   1076 	}
   1077 
   1078 	/* verify that this spare isn't in use... */
   1079 
   1080 
   1081 
   1082 
   1083 	/* it's gone.. */
   1084 
   1085 	raidPtr->numSpare--;
   1086 
   1087 	return(0);
   1088 #endif
   1089 }
   1090 
   1091 
   1092 int
   1093 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
   1094 {
   1095 	RF_RaidDisk_t *disks;
   1096 
   1097 	if ((component->column < 0) ||
   1098 	    (component->column >= raidPtr->numCol)) {
   1099 		return(EINVAL);
   1100 	}
   1101 
   1102 	disks = &raidPtr->Disks[component->column];
   1103 
   1104 	/* 1. This component must be marked as 'failed' */
   1105 
   1106 	return(EINVAL); /* Not implemented yet. */
   1107 }
   1108 
   1109 int
   1110 rf_incorporate_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
   1111 {
   1112 
   1113 	/* Issues here include how to 'move' this in if there is IO
   1114 	   taking place (e.g. component queues and such) */
   1115 
   1116 	return(EINVAL); /* Not implemented yet. */
   1117 }
   1118