Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.30
      1 /*	$NetBSD: rf_disks.c,v 1.30 2000/05/29 02:57:34 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1995 Carnegie-Mellon University.
     40  * All rights reserved.
     41  *
     42  * Author: Mark Holland
     43  *
     44  * Permission to use, copy, modify and distribute this software and
     45  * its documentation is hereby granted, provided that both the copyright
     46  * notice and this permission notice appear in all copies of the
     47  * software, derivative works or modified versions, and any portions
     48  * thereof, and that both notices appear in supporting documentation.
     49  *
     50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     53  *
     54  * Carnegie Mellon requests users of this software to return to
     55  *
     56  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     57  *  School of Computer Science
     58  *  Carnegie Mellon University
     59  *  Pittsburgh PA 15213-3890
     60  *
     61  * any improvements or extensions that they make and grant Carnegie the
     62  * rights to redistribute these changes.
     63  */
     64 
     65 /***************************************************************
     66  * rf_disks.c -- code to perform operations on the actual disks
     67  ***************************************************************/
     68 
     69 #include "rf_types.h"
     70 #include "rf_raid.h"
     71 #include "rf_alloclist.h"
     72 #include "rf_utils.h"
     73 #include "rf_configure.h"
     74 #include "rf_general.h"
     75 #include "rf_options.h"
     76 #include "rf_kintf.h"
     77 #include "rf_netbsd.h"
     78 
     79 #include <sys/types.h>
     80 #include <sys/param.h>
     81 #include <sys/systm.h>
     82 #include <sys/proc.h>
     83 #include <sys/ioctl.h>
     84 #include <sys/fcntl.h>
     85 #include <sys/vnode.h>
     86 
     87 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     88 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
     89 				  RF_ComponentLabel_t *);
     90 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     91 				  RF_ComponentLabel_t *, int, int );
     92 
     93 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     94 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     95 
     96 /**************************************************************************
     97  *
     98  * initialize the disks comprising the array
     99  *
    100  * We want the spare disks to have regular row,col numbers so that we can
    101  * easily substitue a spare for a failed disk.  But, the driver code assumes
    102  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    103  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    104  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    105  * rest, and put all the spares in it.  This probably needs to get changed
    106  * eventually.
    107  *
    108  **************************************************************************/
    109 
    110 int
    111 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
    112 	RF_ShutdownList_t **listp;
    113 	RF_Raid_t *raidPtr;
    114 	RF_Config_t *cfgPtr;
    115 {
    116 	RF_RaidDisk_t **disks;
    117 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    118 	RF_RowCol_t r, c;
    119 	int bs, ret;
    120 	unsigned i, count, foundone = 0, numFailuresThisRow;
    121 	int force;
    122 
    123 	force = cfgPtr->force;
    124 
    125 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    126 	if (ret)
    127 		goto fail;
    128 
    129 	disks = raidPtr->Disks;
    130 
    131 	for (r = 0; r < raidPtr->numRow; r++) {
    132 		numFailuresThisRow = 0;
    133 		for (c = 0; c < raidPtr->numCol; c++) {
    134 			ret = rf_ConfigureDisk(raidPtr,
    135 					       &cfgPtr->devnames[r][c][0],
    136 					       &disks[r][c], r, c);
    137 
    138 			if (ret)
    139 				goto fail;
    140 
    141 			if (disks[r][c].status == rf_ds_optimal) {
    142 				raidread_component_label(
    143 					 raidPtr->raid_cinfo[r][c].ci_dev,
    144 					 raidPtr->raid_cinfo[r][c].ci_vp,
    145 					 &raidPtr->raid_cinfo[r][c].ci_label);
    146 			}
    147 
    148 			if (disks[r][c].status != rf_ds_optimal) {
    149 				numFailuresThisRow++;
    150 			} else {
    151 				if (disks[r][c].numBlocks < min_numblks)
    152 					min_numblks = disks[r][c].numBlocks;
    153 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    154 				    r, c, disks[r][c].devname,
    155 				    (long int) disks[r][c].numBlocks,
    156 				    disks[r][c].blockSize,
    157 				    (long int) disks[r][c].numBlocks *
    158 					 disks[r][c].blockSize / 1024 / 1024);
    159 			}
    160 		}
    161 		/* XXX fix for n-fault tolerant */
    162 		/* XXX this should probably check to see how many failures
    163 		   we can handle for this configuration! */
    164 		if (numFailuresThisRow > 0)
    165 			raidPtr->status[r] = rf_rs_degraded;
    166 	}
    167 
    168 	/* all disks must be the same size & have the same block size, bs must
    169 	 * be a power of 2 */
    170 	bs = 0;
    171 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
    172 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
    173 			if (disks[r][c].status == rf_ds_optimal) {
    174 				bs = disks[r][c].blockSize;
    175 				foundone = 1;
    176 			}
    177 		}
    178 	}
    179 	if (!foundone) {
    180 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    181 		ret = EINVAL;
    182 		goto fail;
    183 	}
    184 	for (count = 0, i = 1; i; i <<= 1)
    185 		if (bs & i)
    186 			count++;
    187 	if (count != 1) {
    188 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    189 		ret = EINVAL;
    190 		goto fail;
    191 	}
    192 
    193 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    194 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    195 		if (force != 0) {
    196 			printf("raid%d: Fatal errors being ignored.\n",
    197 			       raidPtr->raidid);
    198 		} else {
    199 			ret = EINVAL;
    200 			goto fail;
    201 		}
    202 	}
    203 
    204 	for (r = 0; r < raidPtr->numRow; r++) {
    205 		for (c = 0; c < raidPtr->numCol; c++) {
    206 			if (disks[r][c].status == rf_ds_optimal) {
    207 				if (disks[r][c].blockSize != bs) {
    208 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
    209 					ret = EINVAL;
    210 					goto fail;
    211 				}
    212 				if (disks[r][c].numBlocks != min_numblks) {
    213 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
    214 					    r, c, (int) min_numblks);
    215 					disks[r][c].numBlocks = min_numblks;
    216 				}
    217 			}
    218 		}
    219 	}
    220 
    221 	raidPtr->sectorsPerDisk = min_numblks;
    222 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    223 	raidPtr->bytesPerSector = bs;
    224 	raidPtr->sectorMask = bs - 1;
    225 	return (0);
    226 
    227 fail:
    228 
    229 	rf_UnconfigureVnodes( raidPtr );
    230 
    231 	return (ret);
    232 }
    233 
    234 
    235 /****************************************************************************
    236  * set up the data structures describing the spare disks in the array
    237  * recall from the above comment that the spare disk descriptors are stored
    238  * in row zero, which is specially expanded to hold them.
    239  ****************************************************************************/
    240 int
    241 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
    242 	RF_ShutdownList_t ** listp;
    243 	RF_Raid_t * raidPtr;
    244 	RF_Config_t * cfgPtr;
    245 {
    246 	int     i, ret;
    247 	unsigned int bs;
    248 	RF_RaidDisk_t *disks;
    249 	int     num_spares_done;
    250 
    251 	num_spares_done = 0;
    252 
    253 	/* The space for the spares should have already been allocated by
    254 	 * ConfigureDisks() */
    255 
    256 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    257 	for (i = 0; i < raidPtr->numSpare; i++) {
    258 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    259 				       &disks[i], 0, raidPtr->numCol + i);
    260 		if (ret)
    261 			goto fail;
    262 		if (disks[i].status != rf_ds_optimal) {
    263 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    264 				     &cfgPtr->spare_names[i][0]);
    265 		} else {
    266 			disks[i].status = rf_ds_spare;	/* change status to
    267 							 * spare */
    268 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    269 			    disks[i].devname,
    270 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    271 			    (long int) disks[i].numBlocks *
    272 				 disks[i].blockSize / 1024 / 1024);
    273 		}
    274 		num_spares_done++;
    275 	}
    276 
    277 	/* check sizes and block sizes on spare disks */
    278 	bs = 1 << raidPtr->logBytesPerSector;
    279 	for (i = 0; i < raidPtr->numSpare; i++) {
    280 		if (disks[i].blockSize != bs) {
    281 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    282 			ret = EINVAL;
    283 			goto fail;
    284 		}
    285 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    286 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    287 				     disks[i].devname, disks[i].blockSize,
    288 				     (long int) raidPtr->sectorsPerDisk);
    289 			ret = EINVAL;
    290 			goto fail;
    291 		} else
    292 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    293 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    294 
    295 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    296 			}
    297 	}
    298 
    299 	return (0);
    300 
    301 fail:
    302 
    303 	/* Release the hold on the main components.  We've failed to allocate
    304 	 * a spare, and since we're failing, we need to free things..
    305 
    306 	 XXX failing to allocate a spare is *not* that big of a deal...
    307 	 We *can* survive without it, if need be, esp. if we get hot
    308 	 adding working.
    309 
    310 	 If we don't fail out here, then we need a way to remove this spare...
    311 	 that should be easier to do here than if we are "live"...
    312 
    313 	 */
    314 
    315 	rf_UnconfigureVnodes( raidPtr );
    316 
    317 	return (ret);
    318 }
    319 
    320 static int
    321 rf_AllocDiskStructures(raidPtr, cfgPtr)
    322 	RF_Raid_t *raidPtr;
    323  	RF_Config_t *cfgPtr;
    324 {
    325 	RF_RaidDisk_t **disks;
    326 	int ret;
    327 	int r;
    328 
    329 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    330 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    331 	if (disks == NULL) {
    332 		ret = ENOMEM;
    333 		goto fail;
    334 	}
    335 	raidPtr->Disks = disks;
    336 	/* get space for the device-specific stuff... */
    337 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    338 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    339 	    raidPtr->cleanupList);
    340 	if (raidPtr->raid_cinfo == NULL) {
    341 		ret = ENOMEM;
    342 		goto fail;
    343 	}
    344 
    345 	for (r = 0; r < raidPtr->numRow; r++) {
    346 		/* We allocate RF_MAXSPARE on the first row so that we
    347 		   have room to do hot-swapping of spares */
    348 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    349 				+ ((r == 0) ? RF_MAXSPARE : 0),
    350 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    351 				raidPtr->cleanupList);
    352 		if (disks[r] == NULL) {
    353 			ret = ENOMEM;
    354 			goto fail;
    355 		}
    356 		/* get more space for device specific stuff.. */
    357 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    358 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    359 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    360 		    raidPtr->cleanupList);
    361 		if (raidPtr->raid_cinfo[r] == NULL) {
    362 			ret = ENOMEM;
    363 			goto fail;
    364 		}
    365 	}
    366 	return(0);
    367 fail:
    368 	rf_UnconfigureVnodes( raidPtr );
    369 
    370 	return(ret);
    371 }
    372 
    373 
    374 /* configure a single disk during auto-configuration at boot */
    375 int
    376 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
    377 	RF_Raid_t *raidPtr;
    378 	RF_Config_t *cfgPtr;
    379 	RF_AutoConfig_t *auto_config;
    380 {
    381 	RF_RaidDisk_t **disks;
    382 	RF_RaidDisk_t *diskPtr;
    383 	RF_RowCol_t r, c;
    384 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    385 	int bs, ret;
    386 	int numFailuresThisRow;
    387 	int force;
    388 	RF_AutoConfig_t *ac;
    389 	int parity_good;
    390 	int mod_counter;
    391 	int mod_counter_found;
    392 
    393 #if DEBUG
    394 	printf("Starting autoconfiguration of RAID set...\n");
    395 #endif
    396 	force = cfgPtr->force;
    397 
    398 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    399 	if (ret)
    400 		goto fail;
    401 
    402 	disks = raidPtr->Disks;
    403 
    404 	/* assume the parity will be fine.. */
    405 	parity_good = RF_RAID_CLEAN;
    406 
    407 	/* Check for mod_counters that are too low */
    408 	mod_counter_found = 0;
    409 	ac = auto_config;
    410 	while(ac!=NULL) {
    411 		if (mod_counter_found==0) {
    412 			mod_counter = ac->clabel->mod_counter;
    413 			mod_counter_found = 1;
    414 		} else {
    415 			if (ac->clabel->mod_counter > mod_counter) {
    416 				mod_counter = ac->clabel->mod_counter;
    417 			}
    418 		}
    419 		ac->flag = 0; /* clear the general purpose flag */
    420 		ac = ac->next;
    421 	}
    422 
    423 	for (r = 0; r < raidPtr->numRow; r++) {
    424 		numFailuresThisRow = 0;
    425 		for (c = 0; c < raidPtr->numCol; c++) {
    426 			diskPtr = &disks[r][c];
    427 
    428 			/* find this row/col in the autoconfig */
    429 #if DEBUG
    430 			printf("Looking for %d,%d in autoconfig\n",r,c);
    431 #endif
    432 			ac = auto_config;
    433 			while(ac!=NULL) {
    434 				if (ac->clabel==NULL) {
    435 					/* big-time bad news. */
    436 					goto fail;
    437 				}
    438 				if ((ac->clabel->row == r) &&
    439 				    (ac->clabel->column == c) &&
    440 				    (ac->clabel->mod_counter == mod_counter)) {
    441 					/* it's this one... */
    442 					/* flag it as 'used', so we don't
    443 					   free it later. */
    444 					ac->flag = 1;
    445 #if DEBUG
    446 					printf("Found: %s at %d,%d\n",
    447 					       ac->devname,r,c);
    448 #endif
    449 
    450 					break;
    451 				}
    452 				ac=ac->next;
    453 			}
    454 
    455 			if (ac==NULL) {
    456 				/* we didn't find an exact match with a
    457 				   correct mod_counter above... can we
    458 				   find one with an incorrect mod_counter
    459 				   to use instead?  (this one, if we find
    460 				   it, will be marked as failed once the
    461 				   set configures)
    462 				*/
    463 
    464 				ac = auto_config;
    465 				while(ac!=NULL) {
    466 					if (ac->clabel==NULL) {
    467 						/* big-time bad news. */
    468 						goto fail;
    469 					}
    470 					if ((ac->clabel->row == r) &&
    471 					    (ac->clabel->column == c)) {
    472 						/* it's this one...
    473 						   flag it as 'used', so we
    474 						   don't free it later. */
    475 						ac->flag = 1;
    476 #if DEBUG
    477 						printf("Found(low mod_counter): %s at %d,%d\n",
    478 						       ac->devname,r,c);
    479 #endif
    480 
    481 						break;
    482 					}
    483 					ac=ac->next;
    484 				}
    485 			}
    486 
    487 
    488 
    489 			if (ac!=NULL) {
    490 				/* Found it.  Configure it.. */
    491 				diskPtr->blockSize = ac->clabel->blockSize;
    492 				diskPtr->numBlocks = ac->clabel->numBlocks;
    493 				/* Note: rf_protectedSectors is already
    494 				   factored into numBlocks here */
    495 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
    496 				raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
    497 
    498 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
    499 				       ac->clabel, sizeof(*ac->clabel));
    500 				sprintf(diskPtr->devname, "/dev/%s",
    501 					ac->devname);
    502 
    503 				/* note the fact that this component was
    504 				   autoconfigured.  You'll need this info
    505 				   later.  Trust me :) */
    506 				diskPtr->auto_configured = 1;
    507 				diskPtr->dev = ac->dev;
    508 
    509 				/*
    510 				 * we allow the user to specify that
    511 				 * only a fraction of the disks should
    512 				 * be used this is just for debug: it
    513 				 * speeds up the parity scan
    514 				 */
    515 
    516 				diskPtr->numBlocks = diskPtr->numBlocks *
    517 					rf_sizePercentage / 100;
    518 
    519 				/* XXX these will get set multiple times,
    520 				   but since we're autoconfiguring, they'd
    521 				   better be always the same each time!
    522 				   If not, this is the least of your worries */
    523 
    524 				bs = diskPtr->blockSize;
    525 				min_numblks = diskPtr->numBlocks;
    526 
    527 				/* this gets done multiple times, but that's
    528 				   fine -- the serial number will be the same
    529 				   for all components, guaranteed */
    530 				raidPtr->serial_number =
    531 					ac->clabel->serial_number;
    532 				/* check the last time the label
    533 				   was modified */
    534 				if (ac->clabel->mod_counter !=
    535 				    mod_counter) {
    536 					/* Even though we've filled in all
    537 					   of the above, we don't trust
    538 					   this component since it's
    539 					   modification counter is not
    540 					   in sync with the rest, and we really
    541 					   consider it to be failed.  */
    542 					disks[r][c].status = rf_ds_failed;
    543 					numFailuresThisRow++;
    544 				} else {
    545 					if (ac->clabel->clean !=
    546 					    RF_RAID_CLEAN) {
    547 						parity_good = RF_RAID_DIRTY;
    548 					}
    549 				}
    550 			} else {
    551 				/* Didn't find it at all!!
    552 				   Component must really be dead */
    553 				disks[r][c].status = rf_ds_failed;
    554 				numFailuresThisRow++;
    555 			}
    556 		}
    557 		/* XXX fix for n-fault tolerant */
    558 		/* XXX this should probably check to see how many failures
    559 		   we can handle for this configuration! */
    560 		if (numFailuresThisRow > 0)
    561 			raidPtr->status[r] = rf_rs_degraded;
    562 	}
    563 
    564 	/* close the device for the ones that didn't get used */
    565 
    566 	ac = auto_config;
    567 	while(ac!=NULL) {
    568 		if (ac->flag == 0) {
    569 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
    570 			vput(ac->vp);
    571 			ac->vp = NULL;
    572 #if DEBUG
    573 			printf("Released %s from auto-config set.\n",
    574 			       ac->devname);
    575 #endif
    576 		}
    577 		ac = ac->next;
    578 	}
    579 
    580 	raidPtr->mod_counter = mod_counter;
    581 
    582 	/* note the state of the parity, if any */
    583 	raidPtr->parity_good = parity_good;
    584 	raidPtr->sectorsPerDisk = min_numblks;
    585 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    586 	raidPtr->bytesPerSector = bs;
    587 	raidPtr->sectorMask = bs - 1;
    588 	return (0);
    589 
    590 fail:
    591 
    592 	rf_UnconfigureVnodes( raidPtr );
    593 
    594 	return (ret);
    595 
    596 }
    597 
    598 /* configure a single disk in the array */
    599 int
    600 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
    601 	RF_Raid_t *raidPtr;
    602 	char   *buf;
    603 	RF_RaidDisk_t *diskPtr;
    604 	RF_RowCol_t row;
    605 	RF_RowCol_t col;
    606 {
    607 	char   *p;
    608 	int     retcode;
    609 
    610 	struct partinfo dpart;
    611 	struct vnode *vp;
    612 	struct vattr va;
    613 	struct proc *proc;
    614 	int     error;
    615 
    616 	retcode = 0;
    617 	p = rf_find_non_white(buf);
    618 	if (p[strlen(p) - 1] == '\n') {
    619 		/* strip off the newline */
    620 		p[strlen(p) - 1] = '\0';
    621 	}
    622 	(void) strcpy(diskPtr->devname, p);
    623 
    624 	proc = raidPtr->engine_thread;
    625 
    626 	/* Let's start by claiming the component is fine and well... */
    627 	diskPtr->status = rf_ds_optimal;
    628 
    629 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
    630 	raidPtr->raid_cinfo[row][col].ci_dev = NULL;
    631 
    632 	error = raidlookup(diskPtr->devname, proc, &vp);
    633 	if (error) {
    634 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    635 		if (error == ENXIO) {
    636 			/* the component isn't there... must be dead :-( */
    637 			diskPtr->status = rf_ds_failed;
    638 		} else {
    639 			return (error);
    640 		}
    641 	}
    642 	if (diskPtr->status == rf_ds_optimal) {
    643 
    644 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
    645 			return (error);
    646 		}
    647 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
    648 				  FREAD, proc->p_ucred, proc);
    649 		if (error) {
    650 			return (error);
    651 		}
    652 
    653 		diskPtr->blockSize = dpart.disklab->d_secsize;
    654 
    655 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    656 		diskPtr->partitionSize = dpart.part->p_size;
    657 
    658 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
    659 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
    660 
    661 		/* This component was not automatically configured */
    662 		diskPtr->auto_configured = 0;
    663 		diskPtr->dev = va.va_rdev;
    664 
    665 		/* we allow the user to specify that only a fraction of the
    666 		 * disks should be used this is just for debug:  it speeds up
    667 		 * the parity scan */
    668 		diskPtr->numBlocks = diskPtr->numBlocks *
    669 			rf_sizePercentage / 100;
    670 	}
    671 	return (0);
    672 }
    673 
    674 static void
    675 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
    676 	RF_Raid_t *raidPtr;
    677 	int row;
    678 	int column;
    679 	char *dev_name;
    680 	RF_ComponentLabel_t *ci_label;
    681 {
    682 
    683 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
    684 	       raidPtr->raidid, dev_name, row, column );
    685 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
    686 	       ci_label->row, ci_label->column,
    687 	       ci_label->num_rows, ci_label->num_columns);
    688 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    689 	       ci_label->version, ci_label->serial_number,
    690 	       ci_label->mod_counter);
    691 	printf("         Clean: %s Status: %d\n",
    692 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    693 }
    694 
    695 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
    696 				  serial_number, mod_counter )
    697 	RF_Raid_t *raidPtr;
    698 	int row;
    699 	int column;
    700 	char *dev_name;
    701 	RF_ComponentLabel_t *ci_label;
    702 	int serial_number;
    703 	int mod_counter;
    704 {
    705 	int fatal_error = 0;
    706 
    707 	if (serial_number != ci_label->serial_number) {
    708 		printf("%s has a different serial number: %d %d\n",
    709 		       dev_name, serial_number, ci_label->serial_number);
    710 		fatal_error = 1;
    711 	}
    712 	if (mod_counter != ci_label->mod_counter) {
    713 		printf("%s has a different modfication count: %d %d\n",
    714 		       dev_name, mod_counter, ci_label->mod_counter);
    715 	}
    716 
    717 	if (row != ci_label->row) {
    718 		printf("Row out of alignment for: %s\n", dev_name);
    719 		fatal_error = 1;
    720 	}
    721 	if (column != ci_label->column) {
    722 		printf("Column out of alignment for: %s\n", dev_name);
    723 		fatal_error = 1;
    724 	}
    725 	if (raidPtr->numRow != ci_label->num_rows) {
    726 		printf("Number of rows do not match for: %s\n", dev_name);
    727 		fatal_error = 1;
    728 	}
    729 	if (raidPtr->numCol != ci_label->num_columns) {
    730 		printf("Number of columns do not match for: %s\n", dev_name);
    731 		fatal_error = 1;
    732 	}
    733 	if (ci_label->clean == 0) {
    734 		/* it's not clean, but that's not fatal */
    735 		printf("%s is not clean!\n", dev_name);
    736 	}
    737 	return(fatal_error);
    738 }
    739 
    740 
    741 /*
    742 
    743    rf_CheckLabels() - check all the component labels for consistency.
    744    Return an error if there is anything major amiss.
    745 
    746  */
    747 
    748 int
    749 rf_CheckLabels( raidPtr, cfgPtr )
    750 	RF_Raid_t *raidPtr;
    751 	RF_Config_t *cfgPtr;
    752 {
    753 	int r,c;
    754 	char *dev_name;
    755 	RF_ComponentLabel_t *ci_label;
    756 	int serial_number = 0;
    757 	int mod_number = 0;
    758 	int fatal_error = 0;
    759 	int mod_values[4];
    760 	int mod_count[4];
    761 	int ser_values[4];
    762 	int ser_count[4];
    763 	int num_ser;
    764 	int num_mod;
    765 	int i;
    766 	int found;
    767 	int hosed_row;
    768 	int hosed_column;
    769 	int too_fatal;
    770 	int parity_good;
    771 	int force;
    772 
    773 	hosed_row = -1;
    774 	hosed_column = -1;
    775 	too_fatal = 0;
    776 	force = cfgPtr->force;
    777 
    778 	/*
    779 	   We're going to try to be a little intelligent here.  If one
    780 	   component's label is bogus, and we can identify that it's the
    781 	   *only* one that's gone, we'll mark it as "failed" and allow
    782 	   the configuration to proceed.  This will be the *only* case
    783 	   that we'll proceed if there would be (otherwise) fatal errors.
    784 
    785 	   Basically we simply keep a count of how many components had
    786 	   what serial number.  If all but one agree, we simply mark
    787 	   the disagreeing component as being failed, and allow
    788 	   things to come up "normally".
    789 
    790 	   We do this first for serial numbers, and then for "mod_counter".
    791 
    792 	 */
    793 
    794 	num_ser = 0;
    795 	num_mod = 0;
    796 	for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
    797 		for (c = 0; c < raidPtr->numCol; c++) {
    798 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    799 			found=0;
    800 			for(i=0;i<num_ser;i++) {
    801 				if (ser_values[i] == ci_label->serial_number) {
    802 					ser_count[i]++;
    803 					found=1;
    804 					break;
    805 				}
    806 			}
    807 			if (!found) {
    808 				ser_values[num_ser] = ci_label->serial_number;
    809 				ser_count[num_ser] = 1;
    810 				num_ser++;
    811 				if (num_ser>2) {
    812 					fatal_error = 1;
    813 					break;
    814 				}
    815 			}
    816 			found=0;
    817 			for(i=0;i<num_mod;i++) {
    818 				if (mod_values[i] == ci_label->mod_counter) {
    819 					mod_count[i]++;
    820 					found=1;
    821 					break;
    822 				}
    823 			}
    824 			if (!found) {
    825 			        mod_values[num_mod] = ci_label->mod_counter;
    826 				mod_count[num_mod] = 1;
    827 				num_mod++;
    828 				if (num_mod>2) {
    829 					fatal_error = 1;
    830 					break;
    831 				}
    832 			}
    833 		}
    834 	}
    835 #if DEBUG
    836 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    837 	for(i=0;i<num_ser;i++) {
    838 		printf("%d %d\n", ser_values[i], ser_count[i]);
    839 	}
    840 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    841 	for(i=0;i<num_mod;i++) {
    842 		printf("%d %d\n", mod_values[i], mod_count[i]);
    843 	}
    844 #endif
    845 	serial_number = ser_values[0];
    846 	if (num_ser == 2) {
    847 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    848 			/* Locate the maverick component */
    849 			if (ser_count[1] > ser_count[0]) {
    850 				serial_number = ser_values[1];
    851 			}
    852 			for (r = 0; r < raidPtr->numRow; r++) {
    853 				for (c = 0; c < raidPtr->numCol; c++) {
    854 				ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    855 					if (serial_number !=
    856 					    ci_label->serial_number) {
    857 						hosed_row = r;
    858 						hosed_column = c;
    859 						break;
    860 					}
    861 				}
    862 			}
    863 			printf("Hosed component: %s\n",
    864 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    865 			if (!force) {
    866 				/* we'll fail this component, as if there are
    867 				   other major errors, we arn't forcing things
    868 				   and we'll abort the config anyways */
    869 				raidPtr->Disks[hosed_row][hosed_column].status
    870 					= rf_ds_failed;
    871 				raidPtr->numFailures++;
    872 				raidPtr->status[hosed_row] = rf_rs_degraded;
    873 			}
    874 		} else {
    875 			too_fatal = 1;
    876 		}
    877 		if (cfgPtr->parityConfig == '0') {
    878 			/* We've identified two different serial numbers.
    879 			   RAID 0 can't cope with that, so we'll punt */
    880 			too_fatal = 1;
    881 		}
    882 
    883 	}
    884 
    885 	/* record the serial number for later.  If we bail later, setting
    886 	   this doesn't matter, otherwise we've got the best guess at the
    887 	   correct serial number */
    888 	raidPtr->serial_number = serial_number;
    889 
    890 	mod_number = mod_values[0];
    891 	if (num_mod == 2) {
    892 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    893 			/* Locate the maverick component */
    894 			if (mod_count[1] > mod_count[0]) {
    895 				mod_number = mod_values[1];
    896 			} else if (mod_count[1] < mod_count[0]) {
    897 				mod_number = mod_values[0];
    898 			} else {
    899 				/* counts of different modification values
    900 				   are the same.   Assume greater value is
    901 				   the correct one, all other things
    902 				   considered */
    903 				if (mod_values[0] > mod_values[1]) {
    904 					mod_number = mod_values[0];
    905 				} else {
    906 					mod_number = mod_values[1];
    907 				}
    908 
    909 			}
    910 			for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
    911 				for (c = 0; c < raidPtr->numCol; c++) {
    912 					ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    913 					if (mod_number !=
    914 					    ci_label->mod_counter) {
    915 						if ( ( hosed_row == r ) &&
    916 						     ( hosed_column == c )) {
    917 							/* same one.  Can
    918 							   deal with it.  */
    919 						} else {
    920 							hosed_row = r;
    921 							hosed_column = c;
    922 							if (num_ser != 1) {
    923 								too_fatal = 1;
    924 								break;
    925 							}
    926 						}
    927 					}
    928 				}
    929 			}
    930 			printf("Hosed component: %s\n",
    931 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    932 			if (!force) {
    933 				/* we'll fail this component, as if there are
    934 				   other major errors, we arn't forcing things
    935 				   and we'll abort the config anyways */
    936 				if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
    937 					raidPtr->Disks[hosed_row][hosed_column].status
    938 						= rf_ds_failed;
    939 					raidPtr->numFailures++;
    940 					raidPtr->status[hosed_row] = rf_rs_degraded;
    941 				}
    942 			}
    943 		} else {
    944 			too_fatal = 1;
    945 		}
    946 		if (cfgPtr->parityConfig == '0') {
    947 			/* We've identified two different mod counters.
    948 			   RAID 0 can't cope with that, so we'll punt */
    949 			too_fatal = 1;
    950 		}
    951 	}
    952 
    953 	raidPtr->mod_counter = mod_number;
    954 
    955 	if (too_fatal) {
    956 		/* we've had both a serial number mismatch, and a mod_counter
    957 		   mismatch -- and they involved two different components!!
    958 		   Bail -- make things fail so that the user must force
    959 		   the issue... */
    960 		hosed_row = -1;
    961 		hosed_column = -1;
    962 	}
    963 
    964 	if (num_ser > 2) {
    965 		printf("raid%d: Too many different serial numbers!\n",
    966 		       raidPtr->raidid);
    967 	}
    968 
    969 	if (num_mod > 2) {
    970 		printf("raid%d: Too many different mod counters!\n",
    971 		       raidPtr->raidid);
    972 	}
    973 
    974 	/* we start by assuming the parity will be good, and flee from
    975 	   that notion at the slightest sign of trouble */
    976 
    977 	parity_good = RF_RAID_CLEAN;
    978 	for (r = 0; r < raidPtr->numRow; r++) {
    979 		for (c = 0; c < raidPtr->numCol; c++) {
    980 			dev_name = &cfgPtr->devnames[r][c][0];
    981 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    982 
    983 			if ((r == hosed_row) && (c == hosed_column)) {
    984 				printf("raid%d: Ignoring %s\n",
    985 				       raidPtr->raidid, dev_name);
    986 			} else {
    987 				rf_print_label_status( raidPtr, r, c,
    988 						       dev_name, ci_label );
    989 				if (rf_check_label_vitals( raidPtr, r, c,
    990 							   dev_name, ci_label,
    991 							   serial_number,
    992 							   mod_number )) {
    993 					fatal_error = 1;
    994 				}
    995 				if (ci_label->clean != RF_RAID_CLEAN) {
    996 					parity_good = RF_RAID_DIRTY;
    997 				}
    998 			}
    999 		}
   1000 	}
   1001 	if (fatal_error) {
   1002 		parity_good = RF_RAID_DIRTY;
   1003 	}
   1004 
   1005 	/* we note the state of the parity */
   1006 	raidPtr->parity_good = parity_good;
   1007 
   1008 	return(fatal_error);
   1009 }
   1010 
   1011 int
   1012 rf_add_hot_spare(raidPtr, sparePtr)
   1013 	RF_Raid_t *raidPtr;
   1014 	RF_SingleComponent_t *sparePtr;
   1015 {
   1016 	RF_RaidDisk_t *disks;
   1017 	RF_DiskQueue_t *spareQueues;
   1018 	int ret;
   1019 	unsigned int bs;
   1020 	int spare_number;
   1021 
   1022 #if 0
   1023 	printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
   1024 	printf("Num col: %d\n",raidPtr->numCol);
   1025 #endif
   1026 	if (raidPtr->numSpare >= RF_MAXSPARE) {
   1027 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
   1028 		return(EINVAL);
   1029 	}
   1030 
   1031 	RF_LOCK_MUTEX(raidPtr->mutex);
   1032 
   1033 	/* the beginning of the spares... */
   1034 	disks = &raidPtr->Disks[0][raidPtr->numCol];
   1035 
   1036 	spare_number = raidPtr->numSpare;
   1037 
   1038 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
   1039 			       &disks[spare_number], 0,
   1040 			       raidPtr->numCol + spare_number);
   1041 
   1042 	if (ret)
   1043 		goto fail;
   1044 	if (disks[spare_number].status != rf_ds_optimal) {
   1045 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
   1046 			     sparePtr->component_name);
   1047 		ret=EINVAL;
   1048 		goto fail;
   1049 	} else {
   1050 		disks[spare_number].status = rf_ds_spare;
   1051 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
   1052 			 disks[spare_number].devname,
   1053 			 (long int) disks[spare_number].numBlocks,
   1054 			 disks[spare_number].blockSize,
   1055 			 (long int) disks[spare_number].numBlocks *
   1056 			 disks[spare_number].blockSize / 1024 / 1024);
   1057 	}
   1058 
   1059 
   1060 	/* check sizes and block sizes on the spare disk */
   1061 	bs = 1 << raidPtr->logBytesPerSector;
   1062 	if (disks[spare_number].blockSize != bs) {
   1063 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1064 		ret = EINVAL;
   1065 		goto fail;
   1066 	}
   1067 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1068 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
   1069 			     disks[spare_number].devname,
   1070 			     disks[spare_number].blockSize,
   1071 			     (long int) raidPtr->sectorsPerDisk);
   1072 		ret = EINVAL;
   1073 		goto fail;
   1074 	} else {
   1075 		if (disks[spare_number].numBlocks >
   1076 		    raidPtr->sectorsPerDisk) {
   1077 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
   1078 				     (long int) raidPtr->sectorsPerDisk);
   1079 
   1080 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1081 		}
   1082 	}
   1083 
   1084 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
   1085 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1086 				 0, raidPtr->numCol + spare_number,
   1087 				 raidPtr->qType,
   1088 				 raidPtr->sectorsPerDisk,
   1089 				 raidPtr->Disks[0][raidPtr->numCol +
   1090 						  spare_number].dev,
   1091 				 raidPtr->maxOutstanding,
   1092 				 &raidPtr->shutdownList,
   1093 				 raidPtr->cleanupList);
   1094 
   1095 
   1096 	raidPtr->numSpare++;
   1097 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1098 	return (0);
   1099 
   1100 fail:
   1101 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1102 	return(ret);
   1103 }
   1104 
   1105 int
   1106 rf_remove_hot_spare(raidPtr,sparePtr)
   1107 	RF_Raid_t *raidPtr;
   1108 	RF_SingleComponent_t *sparePtr;
   1109 {
   1110 	int spare_number;
   1111 
   1112 
   1113 	if (raidPtr->numSpare==0) {
   1114 		printf("No spares to remove!\n");
   1115 		return(EINVAL);
   1116 	}
   1117 
   1118 	spare_number = sparePtr->column;
   1119 
   1120 	return(EINVAL); /* XXX not implemented yet */
   1121 #if 0
   1122 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1123 		return(EINVAL);
   1124 	}
   1125 
   1126 	/* verify that this spare isn't in use... */
   1127 
   1128 
   1129 
   1130 
   1131 	/* it's gone.. */
   1132 
   1133 	raidPtr->numSpare--;
   1134 
   1135 	return(0);
   1136 #endif
   1137 }
   1138 
   1139 
   1140 int
   1141 rf_delete_component(raidPtr,component)
   1142 	RF_Raid_t *raidPtr;
   1143 	RF_SingleComponent_t *component;
   1144 {
   1145 	RF_RaidDisk_t *disks;
   1146 
   1147 	if ((component->row < 0) ||
   1148 	    (component->row >= raidPtr->numRow) ||
   1149 	    (component->column < 0) ||
   1150 	    (component->column >= raidPtr->numCol)) {
   1151 		return(EINVAL);
   1152 	}
   1153 
   1154 	disks = &raidPtr->Disks[component->row][component->column];
   1155 
   1156 	/* 1. This component must be marked as 'failed' */
   1157 
   1158 	return(EINVAL); /* Not implemented yet. */
   1159 }
   1160 
   1161 int
   1162 rf_incorporate_hot_spare(raidPtr,component)
   1163 	RF_Raid_t *raidPtr;
   1164 	RF_SingleComponent_t *component;
   1165 {
   1166 
   1167 	/* Issues here include how to 'move' this in if there is IO
   1168 	   taking place (e.g. component queues and such) */
   1169 
   1170 	return(EINVAL); /* Not implemented yet. */
   1171 }
   1172