Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.18
      1 /*	$NetBSD: rf_disks.c,v 1.18 2000/02/24 02:55:05 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1995 Carnegie-Mellon University.
     40  * All rights reserved.
     41  *
     42  * Author: Mark Holland
     43  *
     44  * Permission to use, copy, modify and distribute this software and
     45  * its documentation is hereby granted, provided that both the copyright
     46  * notice and this permission notice appear in all copies of the
     47  * software, derivative works or modified versions, and any portions
     48  * thereof, and that both notices appear in supporting documentation.
     49  *
     50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     53  *
     54  * Carnegie Mellon requests users of this software to return to
     55  *
     56  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     57  *  School of Computer Science
     58  *  Carnegie Mellon University
     59  *  Pittsburgh PA 15213-3890
     60  *
     61  * any improvements or extensions that they make and grant Carnegie the
     62  * rights to redistribute these changes.
     63  */
     64 
     65 /***************************************************************
     66  * rf_disks.c -- code to perform operations on the actual disks
     67  ***************************************************************/
     68 
     69 #include "rf_types.h"
     70 #include "rf_raid.h"
     71 #include "rf_alloclist.h"
     72 #include "rf_utils.h"
     73 #include "rf_configure.h"
     74 #include "rf_general.h"
     75 #include "rf_options.h"
     76 #include "rf_kintf.h"
     77 #include "rf_netbsd.h"
     78 
     79 #include <sys/types.h>
     80 #include <sys/param.h>
     81 #include <sys/systm.h>
     82 #include <sys/proc.h>
     83 #include <sys/ioctl.h>
     84 #include <sys/fcntl.h>
     85 #include <sys/vnode.h>
     86 
     87 /* XXX these should be in a header file somewhere */
     88 void rf_UnconfigureVnodes( RF_Raid_t * );
     89 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
     90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     91 
     92 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     93 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     94 
     95 /**************************************************************************
     96  *
     97  * initialize the disks comprising the array
     98  *
     99  * We want the spare disks to have regular row,col numbers so that we can
    100  * easily substitue a spare for a failed disk.  But, the driver code assumes
    101  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    102  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    103  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    104  * rest, and put all the spares in it.  This probably needs to get changed
    105  * eventually.
    106  *
    107  **************************************************************************/
    108 
    109 int
    110 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
    111 	RF_ShutdownList_t **listp;
    112 	RF_Raid_t *raidPtr;
    113 	RF_Config_t *cfgPtr;
    114 {
    115 	RF_RaidDisk_t **disks;
    116 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    117 	RF_RowCol_t r, c;
    118 	int bs, ret;
    119 	unsigned i, count, foundone = 0, numFailuresThisRow;
    120 	int force;
    121 
    122 	force = cfgPtr->force;
    123 
    124 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    125 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    126 	if (disks == NULL) {
    127 		ret = ENOMEM;
    128 		goto fail;
    129 	}
    130 	raidPtr->Disks = disks;
    131 
    132 	/* get space for the device-specific stuff... */
    133 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    134 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    135 	    raidPtr->cleanupList);
    136 	if (raidPtr->raid_cinfo == NULL) {
    137 		ret = ENOMEM;
    138 		goto fail;
    139 	}
    140 	for (r = 0; r < raidPtr->numRow; r++) {
    141 		numFailuresThisRow = 0;
    142 		/* We allocate RF_MAXSPARE on the first row so that we
    143 		   have room to do hot-swapping of spares */
    144 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    145 				+ ((r == 0) ? RF_MAXSPARE : 0),
    146 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    147 				raidPtr->cleanupList);
    148 		if (disks[r] == NULL) {
    149 			ret = ENOMEM;
    150 			goto fail;
    151 		}
    152 		/* get more space for device specific stuff.. */
    153 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    154 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    155 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    156 		    raidPtr->cleanupList);
    157 		if (raidPtr->raid_cinfo[r] == NULL) {
    158 			ret = ENOMEM;
    159 			goto fail;
    160 		}
    161 		for (c = 0; c < raidPtr->numCol; c++) {
    162 				ret = rf_ConfigureDisk(raidPtr,
    163 						       &cfgPtr->devnames[r][c][0],
    164 						       &disks[r][c], r, c);
    165 
    166 			if (ret)
    167 				goto fail;
    168 
    169 			if (disks[r][c].status == rf_ds_optimal) {
    170 				raidread_component_label(
    171 					 raidPtr->raid_cinfo[r][c].ci_dev,
    172 					 raidPtr->raid_cinfo[r][c].ci_vp,
    173 					 &raidPtr->raid_cinfo[r][c].ci_label);
    174 			}
    175 
    176 			if (disks[r][c].status != rf_ds_optimal) {
    177 				numFailuresThisRow++;
    178 			} else {
    179 				if (disks[r][c].numBlocks < min_numblks)
    180 					min_numblks = disks[r][c].numBlocks;
    181 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    182 				    r, c, disks[r][c].devname,
    183 				    (long int) disks[r][c].numBlocks,
    184 				    disks[r][c].blockSize,
    185 				    (long int) disks[r][c].numBlocks *
    186 					 disks[r][c].blockSize / 1024 / 1024);
    187 			}
    188 		}
    189 		/* XXX fix for n-fault tolerant */
    190 		/* XXX this should probably check to see how many failures
    191 		   we can handle for this configuration! */
    192 		if (numFailuresThisRow > 0)
    193 			raidPtr->status[r] = rf_rs_degraded;
    194 	}
    195 
    196 	/* all disks must be the same size & have the same block size, bs must
    197 	 * be a power of 2 */
    198 	bs = 0;
    199 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
    200 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
    201 			if (disks[r][c].status == rf_ds_optimal) {
    202 				bs = disks[r][c].blockSize;
    203 				foundone = 1;
    204 			}
    205 		}
    206 	}
    207 	if (!foundone) {
    208 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    209 		ret = EINVAL;
    210 		goto fail;
    211 	}
    212 	for (count = 0, i = 1; i; i <<= 1)
    213 		if (bs & i)
    214 			count++;
    215 	if (count != 1) {
    216 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    217 		ret = EINVAL;
    218 		goto fail;
    219 	}
    220 
    221 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    222 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    223 		if (force != 0) {
    224 			printf("raid%d: Fatal errors being ignored.\n",
    225 			       raidPtr->raidid);
    226 		} else {
    227 			ret = EINVAL;
    228 			goto fail;
    229 		}
    230 	}
    231 
    232 	for (r = 0; r < raidPtr->numRow; r++) {
    233 		for (c = 0; c < raidPtr->numCol; c++) {
    234 			if (disks[r][c].status == rf_ds_optimal) {
    235 				if (disks[r][c].blockSize != bs) {
    236 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
    237 					ret = EINVAL;
    238 					goto fail;
    239 				}
    240 				if (disks[r][c].numBlocks != min_numblks) {
    241 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
    242 					    r, c, (int) min_numblks);
    243 					disks[r][c].numBlocks = min_numblks;
    244 				}
    245 			}
    246 		}
    247 	}
    248 
    249 	raidPtr->sectorsPerDisk = min_numblks;
    250 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    251 	raidPtr->bytesPerSector = bs;
    252 	raidPtr->sectorMask = bs - 1;
    253 	return (0);
    254 
    255 fail:
    256 
    257 	rf_UnconfigureVnodes( raidPtr );
    258 
    259 	return (ret);
    260 }
    261 
    262 
    263 /****************************************************************************
    264  * set up the data structures describing the spare disks in the array
    265  * recall from the above comment that the spare disk descriptors are stored
    266  * in row zero, which is specially expanded to hold them.
    267  ****************************************************************************/
    268 int
    269 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
    270 	RF_ShutdownList_t ** listp;
    271 	RF_Raid_t * raidPtr;
    272 	RF_Config_t * cfgPtr;
    273 {
    274 	int     i, ret;
    275 	unsigned int bs;
    276 	RF_RaidDisk_t *disks;
    277 	int     num_spares_done;
    278 
    279 	num_spares_done = 0;
    280 
    281 	/* The space for the spares should have already been allocated by
    282 	 * ConfigureDisks() */
    283 
    284 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    285 	for (i = 0; i < raidPtr->numSpare; i++) {
    286 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    287 				       &disks[i], 0, raidPtr->numCol + i);
    288 		if (ret)
    289 			goto fail;
    290 		if (disks[i].status != rf_ds_optimal) {
    291 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    292 				     &cfgPtr->spare_names[i][0]);
    293 		} else {
    294 			disks[i].status = rf_ds_spare;	/* change status to
    295 							 * spare */
    296 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    297 			    disks[i].devname,
    298 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    299 			    (long int) disks[i].numBlocks *
    300 				 disks[i].blockSize / 1024 / 1024);
    301 		}
    302 		num_spares_done++;
    303 	}
    304 
    305 	/* check sizes and block sizes on spare disks */
    306 	bs = 1 << raidPtr->logBytesPerSector;
    307 	for (i = 0; i < raidPtr->numSpare; i++) {
    308 		if (disks[i].blockSize != bs) {
    309 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    310 			ret = EINVAL;
    311 			goto fail;
    312 		}
    313 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    314 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    315 				     disks[i].devname, disks[i].blockSize,
    316 				     (long int) raidPtr->sectorsPerDisk);
    317 			ret = EINVAL;
    318 			goto fail;
    319 		} else
    320 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    321 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    322 
    323 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    324 			}
    325 	}
    326 
    327 	return (0);
    328 
    329 fail:
    330 
    331 	/* Release the hold on the main components.  We've failed to allocate
    332 	 * a spare, and since we're failing, we need to free things..
    333 
    334 	 XXX failing to allocate a spare is *not* that big of a deal...
    335 	 We *can* survive without it, if need be, esp. if we get hot
    336 	 adding working.
    337 
    338 	 If we don't fail out here, then we need a way to remove this spare...
    339 	 that should be easier to do here than if we are "live"...
    340 
    341 	 */
    342 
    343 	rf_UnconfigureVnodes( raidPtr );
    344 
    345 	return (ret);
    346 }
    347 
    348 static int
    349 rf_AllocDiskStructures(raidPtr, cfgPtr)
    350 	RF_Raid_t *raidPtr;
    351  	RF_Config_t *cfgPtr;
    352 {
    353 	RF_RaidDisk_t **disks;
    354 	int ret;
    355 	int r;
    356 
    357 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    358 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    359 	if (disks == NULL) {
    360 		ret = ENOMEM;
    361 		goto fail;
    362 	}
    363 	raidPtr->Disks = disks;
    364 	/* get space for the device-specific stuff... */
    365 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    366 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    367 	    raidPtr->cleanupList);
    368 	if (raidPtr->raid_cinfo == NULL) {
    369 		ret = ENOMEM;
    370 		goto fail;
    371 	}
    372 
    373 	for (r = 0; r < raidPtr->numRow; r++) {
    374 		/* We allocate RF_MAXSPARE on the first row so that we
    375 		   have room to do hot-swapping of spares */
    376 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    377 				+ ((r == 0) ? RF_MAXSPARE : 0),
    378 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    379 				raidPtr->cleanupList);
    380 		if (disks[r] == NULL) {
    381 			ret = ENOMEM;
    382 			goto fail;
    383 		}
    384 		/* get more space for device specific stuff.. */
    385 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    386 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    387 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    388 		    raidPtr->cleanupList);
    389 		if (raidPtr->raid_cinfo[r] == NULL) {
    390 			ret = ENOMEM;
    391 			goto fail;
    392 		}
    393 	}
    394 	return(0);
    395 fail:
    396 	rf_UnconfigureVnodes( raidPtr );
    397 
    398 	return(ret);
    399 }
    400 
    401 
    402 /* configure a single disk during auto-configuration at boot */
    403 int
    404 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
    405 	RF_Raid_t *raidPtr;
    406 	RF_Config_t *cfgPtr;
    407 	RF_AutoConfig_t *auto_config;
    408 {
    409 	RF_RaidDisk_t **disks;
    410 	RF_RaidDisk_t *diskPtr;
    411 	RF_RowCol_t r, c;
    412 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    413 	int bs, ret;
    414 	int numFailuresThisRow;
    415 	int force;
    416 	RF_AutoConfig_t *ac;
    417 	int parity_good;
    418 
    419 #if DEBUG
    420 	printf("Starting autoconfiguration of RAID set...\n");
    421 #endif
    422 	force = cfgPtr->force;
    423 
    424 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    425 	if (ret)
    426 		goto fail;
    427 
    428 	disks = raidPtr->Disks;
    429 
    430 	/* assume the parity will be fine.. */
    431 	parity_good = RF_RAID_CLEAN;
    432 
    433 	for (r = 0; r < raidPtr->numRow; r++) {
    434 		numFailuresThisRow = 0;
    435 		for (c = 0; c < raidPtr->numCol; c++) {
    436 			diskPtr = &disks[r][c];
    437 
    438 			/* find this row/col in the autoconfig */
    439 #if DEBUG
    440 			printf("Looking for %d,%d in autoconfig\n",r,c);
    441 #endif
    442 			ac = auto_config;
    443 			while(ac!=NULL) {
    444 				if (ac->clabel==NULL) {
    445 					/* big-time bad news. */
    446 					goto fail;
    447 				}
    448 				if ((ac->clabel->row == r) &&
    449 				    (ac->clabel->column == c)) {
    450 					/* it's this one... */
    451 #if DEBUG
    452 					printf("Found: %s at %d,%d\n",
    453 					       ac->devname,r,c);
    454 #endif
    455 					break;
    456 				}
    457 				ac=ac->next;
    458 			}
    459 
    460 			if (ac!=NULL) {
    461 				/* Found it.  Configure it.. */
    462 				diskPtr->blockSize = ac->clabel->blockSize;
    463 				diskPtr->numBlocks = ac->clabel->numBlocks;
    464 				/* Note: rf_protectedSectors is already
    465 				   factored into numBlocks here */
    466 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
    467 				raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
    468 
    469 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
    470 				       ac->clabel, sizeof(*ac->clabel));
    471 				sprintf(diskPtr->devname, "/dev/%s",
    472 					ac->devname);
    473 
    474 				/* note the fact that this component was
    475 				   autoconfigured.  You'll need this info
    476 				   later.  Trust me :) */
    477 				diskPtr->auto_configured = 1;
    478 				diskPtr->dev = ac->dev;
    479 
    480 				/*
    481 				 * we allow the user to specify that
    482 				 * only a fraction of the disks should
    483 				 * be used this is just for debug: it
    484 				 * speeds up the parity scan
    485 				 */
    486 
    487 				diskPtr->numBlocks = diskPtr->numBlocks *
    488 					rf_sizePercentage / 100;
    489 
    490 				/* XXX these will get set multiple times,
    491 				   but since we're autoconfiguring, they'd
    492 				   better be always the same each time!
    493 				   If not, this is the least of your worries */
    494 
    495 				bs = diskPtr->blockSize;
    496 				min_numblks = diskPtr->numBlocks;
    497 
    498 				/* this gets done multiple times, but that's
    499 				   fine -- the serial number will be the same
    500 				   for all components, guaranteed */
    501 				raidPtr->serial_number =
    502 					ac->clabel->serial_number;
    503 
    504 				if (ac->clabel->clean != RF_RAID_CLEAN) {
    505 					parity_good = RF_RAID_DIRTY;
    506 				}
    507 
    508 			} else {
    509 				/* Didn't find it!! Component must be dead */
    510 				disks[r][c].status = rf_ds_failed;
    511 				numFailuresThisRow++;
    512 			}
    513 		}
    514 		/* XXX fix for n-fault tolerant */
    515 		/* XXX this should probably check to see how many failures
    516 		   we can handle for this configuration! */
    517 		if (numFailuresThisRow > 0)
    518 			raidPtr->status[r] = rf_rs_degraded;
    519 	}
    520 
    521 	/* note the state of the parity, if any */
    522 	raidPtr->parity_good = parity_good;
    523 	raidPtr->sectorsPerDisk = min_numblks;
    524 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    525 	raidPtr->bytesPerSector = bs;
    526 	raidPtr->sectorMask = bs - 1;
    527 	return (0);
    528 
    529 fail:
    530 
    531 	rf_UnconfigureVnodes( raidPtr );
    532 
    533 	return (ret);
    534 
    535 }
    536 
    537 /* configure a single disk in the array */
    538 int
    539 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
    540 	RF_Raid_t *raidPtr;
    541 	char   *buf;
    542 	RF_RaidDisk_t *diskPtr;
    543 	RF_RowCol_t row;
    544 	RF_RowCol_t col;
    545 {
    546 	char   *p;
    547 	int     retcode;
    548 
    549 	struct partinfo dpart;
    550 	struct vnode *vp;
    551 	struct vattr va;
    552 	struct proc *proc;
    553 	int     error;
    554 
    555 	retcode = 0;
    556 	p = rf_find_non_white(buf);
    557 	if (p[strlen(p) - 1] == '\n') {
    558 		/* strip off the newline */
    559 		p[strlen(p) - 1] = '\0';
    560 	}
    561 	(void) strcpy(diskPtr->devname, p);
    562 
    563 	proc = raidPtr->engine_thread;
    564 
    565 	/* Let's start by claiming the component is fine and well... */
    566 	diskPtr->status = rf_ds_optimal;
    567 
    568 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
    569 	raidPtr->raid_cinfo[row][col].ci_dev = NULL;
    570 
    571 	error = raidlookup(diskPtr->devname, proc, &vp);
    572 	if (error) {
    573 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    574 		if (error == ENXIO) {
    575 			/* the component isn't there... must be dead :-( */
    576 			diskPtr->status = rf_ds_failed;
    577 		} else {
    578 			return (error);
    579 		}
    580 	}
    581 	if (diskPtr->status == rf_ds_optimal) {
    582 
    583 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
    584 			return (error);
    585 		}
    586 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
    587 				  FREAD, proc->p_ucred, proc);
    588 		if (error) {
    589 			return (error);
    590 		}
    591 
    592 		diskPtr->blockSize = dpart.disklab->d_secsize;
    593 
    594 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    595 		diskPtr->partitionSize = dpart.part->p_size;
    596 
    597 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
    598 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
    599 
    600 		/* This component was not automatically configured */
    601 		diskPtr->auto_configured = 0;
    602 		diskPtr->dev = va.va_rdev;
    603 
    604 		/* we allow the user to specify that only a fraction of the
    605 		 * disks should be used this is just for debug:  it speeds up
    606 		 * the parity scan */
    607 		diskPtr->numBlocks = diskPtr->numBlocks *
    608 			rf_sizePercentage / 100;
    609 	}
    610 	return (0);
    611 }
    612 
    613 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
    614 				  RF_ComponentLabel_t *);
    615 
    616 static void
    617 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
    618 	RF_Raid_t *raidPtr;
    619 	int row;
    620 	int column;
    621 	char *dev_name;
    622 	RF_ComponentLabel_t *ci_label;
    623 {
    624 
    625 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
    626 	       raidPtr->raidid, dev_name, row, column );
    627 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
    628 	       ci_label->row, ci_label->column,
    629 	       ci_label->num_rows, ci_label->num_columns);
    630 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    631 	       ci_label->version, ci_label->serial_number,
    632 	       ci_label->mod_counter);
    633 	printf("         Clean: %s Status: %d\n",
    634 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    635 }
    636 
    637 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
    638 				  RF_ComponentLabel_t *, int, int );
    639 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
    640 				  serial_number, mod_counter )
    641 	RF_Raid_t *raidPtr;
    642 	int row;
    643 	int column;
    644 	char *dev_name;
    645 	RF_ComponentLabel_t *ci_label;
    646 	int serial_number;
    647 	int mod_counter;
    648 {
    649 	int fatal_error = 0;
    650 
    651 	if (serial_number != ci_label->serial_number) {
    652 		printf("%s has a different serial number: %d %d\n",
    653 		       dev_name, serial_number, ci_label->serial_number);
    654 		fatal_error = 1;
    655 	}
    656 	if (mod_counter != ci_label->mod_counter) {
    657 		printf("%s has a different modfication count: %d %d\n",
    658 		       dev_name, mod_counter, ci_label->mod_counter);
    659 	}
    660 
    661 	if (row != ci_label->row) {
    662 		printf("Row out of alignment for: %s\n", dev_name);
    663 		fatal_error = 1;
    664 	}
    665 	if (column != ci_label->column) {
    666 		printf("Column out of alignment for: %s\n", dev_name);
    667 		fatal_error = 1;
    668 	}
    669 	if (raidPtr->numRow != ci_label->num_rows) {
    670 		printf("Number of rows do not match for: %s\n", dev_name);
    671 		fatal_error = 1;
    672 	}
    673 	if (raidPtr->numCol != ci_label->num_columns) {
    674 		printf("Number of columns do not match for: %s\n", dev_name);
    675 		fatal_error = 1;
    676 	}
    677 	if (ci_label->clean == 0) {
    678 		/* it's not clean, but that's not fatal */
    679 		printf("%s is not clean!\n", dev_name);
    680 	}
    681 	return(fatal_error);
    682 }
    683 
    684 
    685 /*
    686 
    687    rf_CheckLabels() - check all the component labels for consistency.
    688    Return an error if there is anything major amiss.
    689 
    690  */
    691 
    692 int
    693 rf_CheckLabels( raidPtr, cfgPtr )
    694 	RF_Raid_t *raidPtr;
    695 	RF_Config_t *cfgPtr;
    696 {
    697 	int r,c;
    698 	char *dev_name;
    699 	RF_ComponentLabel_t *ci_label;
    700 	int serial_number = 0;
    701 	int mod_number = 0;
    702 	int fatal_error = 0;
    703 	int mod_values[4];
    704 	int mod_count[4];
    705 	int ser_values[4];
    706 	int ser_count[4];
    707 	int num_ser;
    708 	int num_mod;
    709 	int i;
    710 	int found;
    711 	int hosed_row;
    712 	int hosed_column;
    713 	int too_fatal;
    714 	int parity_good;
    715 	int force;
    716 
    717 	hosed_row = -1;
    718 	hosed_column = -1;
    719 	too_fatal = 0;
    720 	force = cfgPtr->force;
    721 
    722 	/*
    723 	   We're going to try to be a little intelligent here.  If one
    724 	   component's label is bogus, and we can identify that it's the
    725 	   *only* one that's gone, we'll mark it as "failed" and allow
    726 	   the configuration to proceed.  This will be the *only* case
    727 	   that we'll proceed if there would be (otherwise) fatal errors.
    728 
    729 	   Basically we simply keep a count of how many components had
    730 	   what serial number.  If all but one agree, we simply mark
    731 	   the disagreeing component as being failed, and allow
    732 	   things to come up "normally".
    733 
    734 	   We do this first for serial numbers, and then for "mod_counter".
    735 
    736 	 */
    737 
    738 	num_ser = 0;
    739 	num_mod = 0;
    740 	for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
    741 		for (c = 0; c < raidPtr->numCol; c++) {
    742 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    743 			found=0;
    744 			for(i=0;i<num_ser;i++) {
    745 				if (ser_values[i] == ci_label->serial_number) {
    746 					ser_count[i]++;
    747 					found=1;
    748 					break;
    749 				}
    750 			}
    751 			if (!found) {
    752 				ser_values[num_ser] = ci_label->serial_number;
    753 				ser_count[num_ser] = 1;
    754 				num_ser++;
    755 				if (num_ser>2) {
    756 					fatal_error = 1;
    757 					break;
    758 				}
    759 			}
    760 			found=0;
    761 			for(i=0;i<num_mod;i++) {
    762 				if (mod_values[i] == ci_label->mod_counter) {
    763 					mod_count[i]++;
    764 					found=1;
    765 					break;
    766 				}
    767 			}
    768 			if (!found) {
    769 			        mod_values[num_mod] = ci_label->mod_counter;
    770 				mod_count[num_mod] = 1;
    771 				num_mod++;
    772 				if (num_mod>2) {
    773 					fatal_error = 1;
    774 					break;
    775 				}
    776 			}
    777 		}
    778 	}
    779 #if DEBUG
    780 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    781 	for(i=0;i<num_ser;i++) {
    782 		printf("%d %d\n", ser_values[i], ser_count[i]);
    783 	}
    784 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    785 	for(i=0;i<num_mod;i++) {
    786 		printf("%d %d\n", mod_values[i], mod_count[i]);
    787 	}
    788 #endif
    789 	serial_number = ser_values[0];
    790 	if (num_ser == 2) {
    791 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    792 			/* Locate the maverick component */
    793 			if (ser_count[1] > ser_count[0]) {
    794 				serial_number = ser_values[1];
    795 			}
    796 			for (r = 0; r < raidPtr->numRow; r++) {
    797 				for (c = 0; c < raidPtr->numCol; c++) {
    798 				ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    799 					if (serial_number !=
    800 					    ci_label->serial_number) {
    801 						hosed_row = r;
    802 						hosed_column = c;
    803 						break;
    804 					}
    805 				}
    806 			}
    807 			printf("Hosed component: %s\n",
    808 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    809 			if (!force) {
    810 				/* we'll fail this component, as if there are
    811 				   other major errors, we arn't forcing things
    812 				   and we'll abort the config anyways */
    813 				raidPtr->Disks[hosed_row][hosed_column].status
    814 					= rf_ds_failed;
    815 				raidPtr->numFailures++;
    816 				raidPtr->status[hosed_row] = rf_rs_degraded;
    817 			}
    818 		} else {
    819 			too_fatal = 1;
    820 		}
    821 		if (cfgPtr->parityConfig == '0') {
    822 			/* We've identified two different serial numbers.
    823 			   RAID 0 can't cope with that, so we'll punt */
    824 			too_fatal = 1;
    825 		}
    826 
    827 	}
    828 
    829 	/* record the serial number for later.  If we bail later, setting
    830 	   this doesn't matter, otherwise we've got the best guess at the
    831 	   correct serial number */
    832 	raidPtr->serial_number = serial_number;
    833 
    834 	mod_number = mod_values[0];
    835 	if (num_mod == 2) {
    836 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    837 			/* Locate the maverick component */
    838 			if (mod_count[1] > mod_count[0]) {
    839 				mod_number = mod_values[1];
    840 			} else if (mod_count[1] < mod_count[0]) {
    841 				mod_number = mod_values[0];
    842 			} else {
    843 				/* counts of different modification values
    844 				   are the same.   Assume greater value is
    845 				   the correct one, all other things
    846 				   considered */
    847 				if (mod_values[0] > mod_values[1]) {
    848 					mod_number = mod_values[0];
    849 				} else {
    850 					mod_number = mod_values[1];
    851 				}
    852 
    853 			}
    854 			for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
    855 				for (c = 0; c < raidPtr->numCol; c++) {
    856 					ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    857 					if (mod_number !=
    858 					    ci_label->mod_counter) {
    859 						if ( ( hosed_row == r ) &&
    860 						     ( hosed_column == c )) {
    861 							/* same one.  Can
    862 							   deal with it.  */
    863 						} else {
    864 							hosed_row = r;
    865 							hosed_column = c;
    866 							if (num_ser != 1) {
    867 								too_fatal = 1;
    868 								break;
    869 							}
    870 						}
    871 					}
    872 				}
    873 			}
    874 			printf("Hosed component: %s\n",
    875 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    876 			if (!force) {
    877 				/* we'll fail this component, as if there are
    878 				   other major errors, we arn't forcing things
    879 				   and we'll abort the config anyways */
    880 				if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
    881 					raidPtr->Disks[hosed_row][hosed_column].status
    882 						= rf_ds_failed;
    883 					raidPtr->numFailures++;
    884 					raidPtr->status[hosed_row] = rf_rs_degraded;
    885 				}
    886 			}
    887 		} else {
    888 			too_fatal = 1;
    889 		}
    890 		if (cfgPtr->parityConfig == '0') {
    891 			/* We've identified two different mod counters.
    892 			   RAID 0 can't cope with that, so we'll punt */
    893 			too_fatal = 1;
    894 		}
    895 	}
    896 
    897 	raidPtr->mod_counter = mod_number;
    898 
    899 	if (too_fatal) {
    900 		/* we've had both a serial number mismatch, and a mod_counter
    901 		   mismatch -- and they involved two different components!!
    902 		   Bail -- make things fail so that the user must force
    903 		   the issue... */
    904 		hosed_row = -1;
    905 		hosed_column = -1;
    906 	}
    907 
    908 	if (num_ser > 2) {
    909 		printf("raid%d: Too many different serial numbers!\n",
    910 		       raidPtr->raidid);
    911 	}
    912 
    913 	if (num_mod > 2) {
    914 		printf("raid%d: Too many different mod counters!\n",
    915 		       raidPtr->raidid);
    916 	}
    917 
    918 	/* we start by assuming the parity will be good, and flee from
    919 	   that notion at the slightest sign of trouble */
    920 
    921 	parity_good = RF_RAID_CLEAN;
    922 	for (r = 0; r < raidPtr->numRow; r++) {
    923 		for (c = 0; c < raidPtr->numCol; c++) {
    924 			dev_name = &cfgPtr->devnames[r][c][0];
    925 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    926 
    927 			if ((r == hosed_row) && (c == hosed_column)) {
    928 				printf("raid%d: Ignoring %s\n",
    929 				       raidPtr->raidid, dev_name);
    930 			} else {
    931 				rf_print_label_status( raidPtr, r, c,
    932 						       dev_name, ci_label );
    933 				if (rf_check_label_vitals( raidPtr, r, c,
    934 							   dev_name, ci_label,
    935 							   serial_number,
    936 							   mod_number )) {
    937 					fatal_error = 1;
    938 				}
    939 				if (ci_label->clean != RF_RAID_CLEAN) {
    940 					parity_good = RF_RAID_DIRTY;
    941 				}
    942 			}
    943 		}
    944 	}
    945 	if (fatal_error) {
    946 		parity_good = RF_RAID_DIRTY;
    947 	}
    948 
    949 	/* we note the state of the parity */
    950 	raidPtr->parity_good = parity_good;
    951 
    952 	return(fatal_error);
    953 }
    954 
    955 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *);
    956 int
    957 rf_add_hot_spare(raidPtr, sparePtr)
    958 	RF_Raid_t *raidPtr;
    959 	RF_SingleComponent_t *sparePtr;
    960 {
    961 	RF_RaidDisk_t *disks;
    962 	RF_DiskQueue_t *spareQueues;
    963 	int ret;
    964 	unsigned int bs;
    965 	int spare_number;
    966 
    967 	printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
    968 	printf("Num col: %d\n",raidPtr->numCol);
    969 	if (raidPtr->numSpare >= RF_MAXSPARE) {
    970 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
    971 		return(EINVAL);
    972 	}
    973 
    974 	RF_LOCK_MUTEX(raidPtr->mutex);
    975 
    976 	/* the beginning of the spares... */
    977 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    978 
    979 	spare_number = raidPtr->numSpare;
    980 
    981 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
    982 			       &disks[spare_number], 0,
    983 			       raidPtr->numCol + spare_number);
    984 
    985 	if (ret)
    986 		goto fail;
    987 	if (disks[spare_number].status != rf_ds_optimal) {
    988 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    989 			     sparePtr->component_name);
    990 		ret=EINVAL;
    991 		goto fail;
    992 	} else {
    993 		disks[spare_number].status = rf_ds_spare;
    994 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
    995 			 disks[spare_number].devname,
    996 			 (long int) disks[spare_number].numBlocks,
    997 			 disks[spare_number].blockSize,
    998 			 (long int) disks[spare_number].numBlocks *
    999 			 disks[spare_number].blockSize / 1024 / 1024);
   1000 	}
   1001 
   1002 
   1003 	/* check sizes and block sizes on the spare disk */
   1004 	bs = 1 << raidPtr->logBytesPerSector;
   1005 	if (disks[spare_number].blockSize != bs) {
   1006 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1007 		ret = EINVAL;
   1008 		goto fail;
   1009 	}
   1010 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1011 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
   1012 			     disks[spare_number].devname,
   1013 			     disks[spare_number].blockSize,
   1014 			     (long int) raidPtr->sectorsPerDisk);
   1015 		ret = EINVAL;
   1016 		goto fail;
   1017 	} else {
   1018 		if (disks[spare_number].numBlocks >
   1019 		    raidPtr->sectorsPerDisk) {
   1020 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
   1021 				     (long int) raidPtr->sectorsPerDisk);
   1022 
   1023 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1024 		}
   1025 	}
   1026 
   1027 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
   1028 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1029 				 0, raidPtr->numCol + spare_number,
   1030 				 raidPtr->Queues[0][0].qPtr, /* XXX */
   1031 				 raidPtr->sectorsPerDisk,
   1032 				 raidPtr->Disks[0][raidPtr->numCol + spare_number].dev,
   1033 				 raidPtr->maxOutstanding,
   1034 				 &raidPtr->shutdownList,
   1035 				 raidPtr->cleanupList);
   1036 
   1037 
   1038 	raidPtr->numSpare++;
   1039 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1040 	return (0);
   1041 
   1042 fail:
   1043 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1044 	return(ret);
   1045 }
   1046 
   1047 int
   1048 rf_remove_hot_spare(raidPtr,sparePtr)
   1049 	RF_Raid_t *raidPtr;
   1050 	RF_SingleComponent_t *sparePtr;
   1051 {
   1052 	int spare_number;
   1053 
   1054 
   1055 	if (raidPtr->numSpare==0) {
   1056 		printf("No spares to remove!\n");
   1057 		return(EINVAL);
   1058 	}
   1059 
   1060 	spare_number = sparePtr->column;
   1061 
   1062 	return(EINVAL); /* XXX not implemented yet */
   1063 #if 0
   1064 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1065 		return(EINVAL);
   1066 	}
   1067 
   1068 	/* verify that this spare isn't in use... */
   1069 
   1070 
   1071 
   1072 
   1073 	/* it's gone.. */
   1074 
   1075 	raidPtr->numSpare--;
   1076 
   1077 	return(0);
   1078 #endif
   1079 }
   1080 
   1081 
   1082