Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.29
      1 /*	$NetBSD: rf_disks.c,v 1.29 2000/05/28 22:53:49 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1995 Carnegie-Mellon University.
     40  * All rights reserved.
     41  *
     42  * Author: Mark Holland
     43  *
     44  * Permission to use, copy, modify and distribute this software and
     45  * its documentation is hereby granted, provided that both the copyright
     46  * notice and this permission notice appear in all copies of the
     47  * software, derivative works or modified versions, and any portions
     48  * thereof, and that both notices appear in supporting documentation.
     49  *
     50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     53  *
     54  * Carnegie Mellon requests users of this software to return to
     55  *
     56  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     57  *  School of Computer Science
     58  *  Carnegie Mellon University
     59  *  Pittsburgh PA 15213-3890
     60  *
     61  * any improvements or extensions that they make and grant Carnegie the
     62  * rights to redistribute these changes.
     63  */
     64 
     65 /***************************************************************
     66  * rf_disks.c -- code to perform operations on the actual disks
     67  ***************************************************************/
     68 
     69 #include "rf_types.h"
     70 #include "rf_raid.h"
     71 #include "rf_alloclist.h"
     72 #include "rf_utils.h"
     73 #include "rf_configure.h"
     74 #include "rf_general.h"
     75 #include "rf_options.h"
     76 #include "rf_kintf.h"
     77 #include "rf_netbsd.h"
     78 
     79 #include <sys/types.h>
     80 #include <sys/param.h>
     81 #include <sys/systm.h>
     82 #include <sys/proc.h>
     83 #include <sys/ioctl.h>
     84 #include <sys/fcntl.h>
     85 #include <sys/vnode.h>
     86 
     87 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     88 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
     89 				  RF_ComponentLabel_t *);
     90 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     91 				  RF_ComponentLabel_t *, int, int );
     92 
     93 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     94 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     95 
     96 /**************************************************************************
     97  *
     98  * initialize the disks comprising the array
     99  *
    100  * We want the spare disks to have regular row,col numbers so that we can
    101  * easily substitue a spare for a failed disk.  But, the driver code assumes
    102  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    103  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    104  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    105  * rest, and put all the spares in it.  This probably needs to get changed
    106  * eventually.
    107  *
    108  **************************************************************************/
    109 
    110 int
    111 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
    112 	RF_ShutdownList_t **listp;
    113 	RF_Raid_t *raidPtr;
    114 	RF_Config_t *cfgPtr;
    115 {
    116 	RF_RaidDisk_t **disks;
    117 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    118 	RF_RowCol_t r, c;
    119 	int bs, ret;
    120 	unsigned i, count, foundone = 0, numFailuresThisRow;
    121 	int force;
    122 
    123 	force = cfgPtr->force;
    124 
    125 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    126 	if (ret)
    127 		goto fail;
    128 
    129 	disks = raidPtr->Disks;
    130 
    131 	for (r = 0; r < raidPtr->numRow; r++) {
    132 		numFailuresThisRow = 0;
    133 		for (c = 0; c < raidPtr->numCol; c++) {
    134 			ret = rf_ConfigureDisk(raidPtr,
    135 					       &cfgPtr->devnames[r][c][0],
    136 					       &disks[r][c], r, c);
    137 
    138 			if (ret)
    139 				goto fail;
    140 
    141 			if (disks[r][c].status == rf_ds_optimal) {
    142 				raidread_component_label(
    143 					 raidPtr->raid_cinfo[r][c].ci_dev,
    144 					 raidPtr->raid_cinfo[r][c].ci_vp,
    145 					 &raidPtr->raid_cinfo[r][c].ci_label);
    146 			}
    147 
    148 			if (disks[r][c].status != rf_ds_optimal) {
    149 				numFailuresThisRow++;
    150 			} else {
    151 				if (disks[r][c].numBlocks < min_numblks)
    152 					min_numblks = disks[r][c].numBlocks;
    153 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    154 				    r, c, disks[r][c].devname,
    155 				    (long int) disks[r][c].numBlocks,
    156 				    disks[r][c].blockSize,
    157 				    (long int) disks[r][c].numBlocks *
    158 					 disks[r][c].blockSize / 1024 / 1024);
    159 			}
    160 		}
    161 		/* XXX fix for n-fault tolerant */
    162 		/* XXX this should probably check to see how many failures
    163 		   we can handle for this configuration! */
    164 		if (numFailuresThisRow > 0)
    165 			raidPtr->status[r] = rf_rs_degraded;
    166 	}
    167 
    168 	/* all disks must be the same size & have the same block size, bs must
    169 	 * be a power of 2 */
    170 	bs = 0;
    171 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
    172 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
    173 			if (disks[r][c].status == rf_ds_optimal) {
    174 				bs = disks[r][c].blockSize;
    175 				foundone = 1;
    176 			}
    177 		}
    178 	}
    179 	if (!foundone) {
    180 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    181 		ret = EINVAL;
    182 		goto fail;
    183 	}
    184 	for (count = 0, i = 1; i; i <<= 1)
    185 		if (bs & i)
    186 			count++;
    187 	if (count != 1) {
    188 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    189 		ret = EINVAL;
    190 		goto fail;
    191 	}
    192 
    193 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    194 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    195 		if (force != 0) {
    196 			printf("raid%d: Fatal errors being ignored.\n",
    197 			       raidPtr->raidid);
    198 		} else {
    199 			ret = EINVAL;
    200 			goto fail;
    201 		}
    202 	}
    203 
    204 	for (r = 0; r < raidPtr->numRow; r++) {
    205 		for (c = 0; c < raidPtr->numCol; c++) {
    206 			if (disks[r][c].status == rf_ds_optimal) {
    207 				if (disks[r][c].blockSize != bs) {
    208 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
    209 					ret = EINVAL;
    210 					goto fail;
    211 				}
    212 				if (disks[r][c].numBlocks != min_numblks) {
    213 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
    214 					    r, c, (int) min_numblks);
    215 					disks[r][c].numBlocks = min_numblks;
    216 				}
    217 			}
    218 		}
    219 	}
    220 
    221 	raidPtr->sectorsPerDisk = min_numblks;
    222 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    223 	raidPtr->bytesPerSector = bs;
    224 	raidPtr->sectorMask = bs - 1;
    225 	return (0);
    226 
    227 fail:
    228 
    229 	rf_UnconfigureVnodes( raidPtr );
    230 
    231 	return (ret);
    232 }
    233 
    234 
    235 /****************************************************************************
    236  * set up the data structures describing the spare disks in the array
    237  * recall from the above comment that the spare disk descriptors are stored
    238  * in row zero, which is specially expanded to hold them.
    239  ****************************************************************************/
    240 int
    241 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
    242 	RF_ShutdownList_t ** listp;
    243 	RF_Raid_t * raidPtr;
    244 	RF_Config_t * cfgPtr;
    245 {
    246 	int     i, ret;
    247 	unsigned int bs;
    248 	RF_RaidDisk_t *disks;
    249 	int     num_spares_done;
    250 
    251 	num_spares_done = 0;
    252 
    253 	/* The space for the spares should have already been allocated by
    254 	 * ConfigureDisks() */
    255 
    256 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    257 	for (i = 0; i < raidPtr->numSpare; i++) {
    258 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    259 				       &disks[i], 0, raidPtr->numCol + i);
    260 		if (ret)
    261 			goto fail;
    262 		if (disks[i].status != rf_ds_optimal) {
    263 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    264 				     &cfgPtr->spare_names[i][0]);
    265 		} else {
    266 			disks[i].status = rf_ds_spare;	/* change status to
    267 							 * spare */
    268 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    269 			    disks[i].devname,
    270 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    271 			    (long int) disks[i].numBlocks *
    272 				 disks[i].blockSize / 1024 / 1024);
    273 		}
    274 		num_spares_done++;
    275 	}
    276 
    277 	/* check sizes and block sizes on spare disks */
    278 	bs = 1 << raidPtr->logBytesPerSector;
    279 	for (i = 0; i < raidPtr->numSpare; i++) {
    280 		if (disks[i].blockSize != bs) {
    281 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    282 			ret = EINVAL;
    283 			goto fail;
    284 		}
    285 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    286 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    287 				     disks[i].devname, disks[i].blockSize,
    288 				     (long int) raidPtr->sectorsPerDisk);
    289 			ret = EINVAL;
    290 			goto fail;
    291 		} else
    292 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    293 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    294 
    295 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    296 			}
    297 	}
    298 
    299 	return (0);
    300 
    301 fail:
    302 
    303 	/* Release the hold on the main components.  We've failed to allocate
    304 	 * a spare, and since we're failing, we need to free things..
    305 
    306 	 XXX failing to allocate a spare is *not* that big of a deal...
    307 	 We *can* survive without it, if need be, esp. if we get hot
    308 	 adding working.
    309 
    310 	 If we don't fail out here, then we need a way to remove this spare...
    311 	 that should be easier to do here than if we are "live"...
    312 
    313 	 */
    314 
    315 	rf_UnconfigureVnodes( raidPtr );
    316 
    317 	return (ret);
    318 }
    319 
    320 static int
    321 rf_AllocDiskStructures(raidPtr, cfgPtr)
    322 	RF_Raid_t *raidPtr;
    323  	RF_Config_t *cfgPtr;
    324 {
    325 	RF_RaidDisk_t **disks;
    326 	int ret;
    327 	int r;
    328 
    329 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    330 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    331 	if (disks == NULL) {
    332 		ret = ENOMEM;
    333 		goto fail;
    334 	}
    335 	raidPtr->Disks = disks;
    336 	/* get space for the device-specific stuff... */
    337 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    338 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    339 	    raidPtr->cleanupList);
    340 	if (raidPtr->raid_cinfo == NULL) {
    341 		ret = ENOMEM;
    342 		goto fail;
    343 	}
    344 
    345 	for (r = 0; r < raidPtr->numRow; r++) {
    346 		/* We allocate RF_MAXSPARE on the first row so that we
    347 		   have room to do hot-swapping of spares */
    348 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    349 				+ ((r == 0) ? RF_MAXSPARE : 0),
    350 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    351 				raidPtr->cleanupList);
    352 		if (disks[r] == NULL) {
    353 			ret = ENOMEM;
    354 			goto fail;
    355 		}
    356 		/* get more space for device specific stuff.. */
    357 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    358 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    359 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    360 		    raidPtr->cleanupList);
    361 		if (raidPtr->raid_cinfo[r] == NULL) {
    362 			ret = ENOMEM;
    363 			goto fail;
    364 		}
    365 	}
    366 	return(0);
    367 fail:
    368 	rf_UnconfigureVnodes( raidPtr );
    369 
    370 	return(ret);
    371 }
    372 
    373 
    374 /* configure a single disk during auto-configuration at boot */
    375 int
    376 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
    377 	RF_Raid_t *raidPtr;
    378 	RF_Config_t *cfgPtr;
    379 	RF_AutoConfig_t *auto_config;
    380 {
    381 	RF_RaidDisk_t **disks;
    382 	RF_RaidDisk_t *diskPtr;
    383 	RF_RowCol_t r, c;
    384 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    385 	int bs, ret;
    386 	int numFailuresThisRow;
    387 	int force;
    388 	RF_AutoConfig_t *ac;
    389 	int parity_good;
    390 	int mod_counter;
    391 
    392 #if DEBUG
    393 	printf("Starting autoconfiguration of RAID set...\n");
    394 #endif
    395 	force = cfgPtr->force;
    396 
    397 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    398 	if (ret)
    399 		goto fail;
    400 
    401 	disks = raidPtr->Disks;
    402 
    403 	/* assume the parity will be fine.. */
    404 	parity_good = RF_RAID_CLEAN;
    405 
    406 	/* Check for mod_counters that are too low */
    407 	mod_counter = -1;
    408 	ac = auto_config;
    409 	while(ac!=NULL) {
    410 		if (ac->clabel->mod_counter > mod_counter) {
    411 			mod_counter = ac->clabel->mod_counter;
    412 		}
    413 		ac->flag = 0; /* clear the general purpose flag */
    414 		ac = ac->next;
    415 	}
    416 
    417 	if (mod_counter == -1) {
    418 		/* mod_counters were all negative!?!?!?
    419 		   Ok, we can deal with that. */
    420 #if 0
    421 		ac = auto_config;
    422 		while(ac!=NULL) {
    423 			if (ac->clabel->mod_counter > mod_counter) {
    424 				mod_counter = ac->clabel->mod_counter;
    425 			}
    426 			ac = ac->next;
    427 		}
    428 #endif
    429 	}
    430 
    431 	for (r = 0; r < raidPtr->numRow; r++) {
    432 		numFailuresThisRow = 0;
    433 		for (c = 0; c < raidPtr->numCol; c++) {
    434 			diskPtr = &disks[r][c];
    435 
    436 			/* find this row/col in the autoconfig */
    437 #if DEBUG
    438 			printf("Looking for %d,%d in autoconfig\n",r,c);
    439 #endif
    440 			ac = auto_config;
    441 			while(ac!=NULL) {
    442 				if (ac->clabel==NULL) {
    443 					/* big-time bad news. */
    444 					goto fail;
    445 				}
    446 				if ((ac->clabel->row == r) &&
    447 				    (ac->clabel->column == c) &&
    448 				    (ac->clabel->mod_counter == mod_counter)) {
    449 					/* it's this one... */
    450 					/* flag it as 'used', so we don't
    451 					   free it later. */
    452 					ac->flag = 1;
    453 #if DEBUG
    454 					printf("Found: %s at %d,%d\n",
    455 					       ac->devname,r,c);
    456 #endif
    457 
    458 					break;
    459 				}
    460 				ac=ac->next;
    461 			}
    462 
    463 			if (ac==NULL) {
    464 				/* we didn't find an exact match with a
    465 				   correct mod_counter above... can we
    466 				   find one with an incorrect mod_counter
    467 				   to use instead?  (this one, if we find
    468 				   it, will be marked as failed once the
    469 				   set configures)
    470 				*/
    471 
    472 				ac = auto_config;
    473 				while(ac!=NULL) {
    474 					if (ac->clabel==NULL) {
    475 						/* big-time bad news. */
    476 						goto fail;
    477 					}
    478 					if ((ac->clabel->row == r) &&
    479 					    (ac->clabel->column == c)) {
    480 						/* it's this one...
    481 						   flag it as 'used', so we
    482 						   don't free it later. */
    483 						ac->flag = 1;
    484 #if DEBUG
    485 						printf("Found(low mod_counter): %s at %d,%d\n",
    486 						       ac->devname,r,c);
    487 #endif
    488 
    489 						break;
    490 					}
    491 					ac=ac->next;
    492 				}
    493 			}
    494 
    495 
    496 
    497 			if (ac!=NULL) {
    498 				/* Found it.  Configure it.. */
    499 				diskPtr->blockSize = ac->clabel->blockSize;
    500 				diskPtr->numBlocks = ac->clabel->numBlocks;
    501 				/* Note: rf_protectedSectors is already
    502 				   factored into numBlocks here */
    503 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
    504 				raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
    505 
    506 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
    507 				       ac->clabel, sizeof(*ac->clabel));
    508 				sprintf(diskPtr->devname, "/dev/%s",
    509 					ac->devname);
    510 
    511 				/* note the fact that this component was
    512 				   autoconfigured.  You'll need this info
    513 				   later.  Trust me :) */
    514 				diskPtr->auto_configured = 1;
    515 				diskPtr->dev = ac->dev;
    516 
    517 				/*
    518 				 * we allow the user to specify that
    519 				 * only a fraction of the disks should
    520 				 * be used this is just for debug: it
    521 				 * speeds up the parity scan
    522 				 */
    523 
    524 				diskPtr->numBlocks = diskPtr->numBlocks *
    525 					rf_sizePercentage / 100;
    526 
    527 				/* XXX these will get set multiple times,
    528 				   but since we're autoconfiguring, they'd
    529 				   better be always the same each time!
    530 				   If not, this is the least of your worries */
    531 
    532 				bs = diskPtr->blockSize;
    533 				min_numblks = diskPtr->numBlocks;
    534 
    535 				/* this gets done multiple times, but that's
    536 				   fine -- the serial number will be the same
    537 				   for all components, guaranteed */
    538 				raidPtr->serial_number =
    539 					ac->clabel->serial_number;
    540 				/* check the last time the label
    541 				   was modified */
    542 				if (ac->clabel->mod_counter !=
    543 				    mod_counter) {
    544 					/* Even though we've filled in all
    545 					   of the above, we don't trust
    546 					   this component since it's
    547 					   modification counter is not
    548 					   in sync with the rest, and we really
    549 					   consider it to be failed.  */
    550 					disks[r][c].status = rf_ds_failed;
    551 					numFailuresThisRow++;
    552 				} else {
    553 					if (ac->clabel->clean !=
    554 					    RF_RAID_CLEAN) {
    555 						parity_good = RF_RAID_DIRTY;
    556 					}
    557 				}
    558 			} else {
    559 				/* Didn't find it at all!!
    560 				   Component must really be dead */
    561 				disks[r][c].status = rf_ds_failed;
    562 				numFailuresThisRow++;
    563 			}
    564 		}
    565 		/* XXX fix for n-fault tolerant */
    566 		/* XXX this should probably check to see how many failures
    567 		   we can handle for this configuration! */
    568 		if (numFailuresThisRow > 0)
    569 			raidPtr->status[r] = rf_rs_degraded;
    570 	}
    571 
    572 	/* close the device for the ones that didn't get used */
    573 
    574 	ac = auto_config;
    575 	while(ac!=NULL) {
    576 		if (ac->flag == 0) {
    577 			VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
    578 			vput(ac->vp);
    579 			ac->vp = NULL;
    580 #if DEBUG
    581 			printf("Released %s from auto-config set.\n",
    582 			       ac->devname);
    583 #endif
    584 		}
    585 		ac = ac->next;
    586 	}
    587 
    588 	raidPtr->mod_counter = mod_counter;
    589 
    590 	/* note the state of the parity, if any */
    591 	raidPtr->parity_good = parity_good;
    592 	raidPtr->sectorsPerDisk = min_numblks;
    593 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    594 	raidPtr->bytesPerSector = bs;
    595 	raidPtr->sectorMask = bs - 1;
    596 	return (0);
    597 
    598 fail:
    599 
    600 	rf_UnconfigureVnodes( raidPtr );
    601 
    602 	return (ret);
    603 
    604 }
    605 
    606 /* configure a single disk in the array */
    607 int
    608 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
    609 	RF_Raid_t *raidPtr;
    610 	char   *buf;
    611 	RF_RaidDisk_t *diskPtr;
    612 	RF_RowCol_t row;
    613 	RF_RowCol_t col;
    614 {
    615 	char   *p;
    616 	int     retcode;
    617 
    618 	struct partinfo dpart;
    619 	struct vnode *vp;
    620 	struct vattr va;
    621 	struct proc *proc;
    622 	int     error;
    623 
    624 	retcode = 0;
    625 	p = rf_find_non_white(buf);
    626 	if (p[strlen(p) - 1] == '\n') {
    627 		/* strip off the newline */
    628 		p[strlen(p) - 1] = '\0';
    629 	}
    630 	(void) strcpy(diskPtr->devname, p);
    631 
    632 	proc = raidPtr->engine_thread;
    633 
    634 	/* Let's start by claiming the component is fine and well... */
    635 	diskPtr->status = rf_ds_optimal;
    636 
    637 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
    638 	raidPtr->raid_cinfo[row][col].ci_dev = NULL;
    639 
    640 	error = raidlookup(diskPtr->devname, proc, &vp);
    641 	if (error) {
    642 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    643 		if (error == ENXIO) {
    644 			/* the component isn't there... must be dead :-( */
    645 			diskPtr->status = rf_ds_failed;
    646 		} else {
    647 			return (error);
    648 		}
    649 	}
    650 	if (diskPtr->status == rf_ds_optimal) {
    651 
    652 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
    653 			return (error);
    654 		}
    655 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
    656 				  FREAD, proc->p_ucred, proc);
    657 		if (error) {
    658 			return (error);
    659 		}
    660 
    661 		diskPtr->blockSize = dpart.disklab->d_secsize;
    662 
    663 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    664 		diskPtr->partitionSize = dpart.part->p_size;
    665 
    666 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
    667 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
    668 
    669 		/* This component was not automatically configured */
    670 		diskPtr->auto_configured = 0;
    671 		diskPtr->dev = va.va_rdev;
    672 
    673 		/* we allow the user to specify that only a fraction of the
    674 		 * disks should be used this is just for debug:  it speeds up
    675 		 * the parity scan */
    676 		diskPtr->numBlocks = diskPtr->numBlocks *
    677 			rf_sizePercentage / 100;
    678 	}
    679 	return (0);
    680 }
    681 
    682 static void
    683 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
    684 	RF_Raid_t *raidPtr;
    685 	int row;
    686 	int column;
    687 	char *dev_name;
    688 	RF_ComponentLabel_t *ci_label;
    689 {
    690 
    691 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
    692 	       raidPtr->raidid, dev_name, row, column );
    693 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
    694 	       ci_label->row, ci_label->column,
    695 	       ci_label->num_rows, ci_label->num_columns);
    696 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    697 	       ci_label->version, ci_label->serial_number,
    698 	       ci_label->mod_counter);
    699 	printf("         Clean: %s Status: %d\n",
    700 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    701 }
    702 
    703 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
    704 				  serial_number, mod_counter )
    705 	RF_Raid_t *raidPtr;
    706 	int row;
    707 	int column;
    708 	char *dev_name;
    709 	RF_ComponentLabel_t *ci_label;
    710 	int serial_number;
    711 	int mod_counter;
    712 {
    713 	int fatal_error = 0;
    714 
    715 	if (serial_number != ci_label->serial_number) {
    716 		printf("%s has a different serial number: %d %d\n",
    717 		       dev_name, serial_number, ci_label->serial_number);
    718 		fatal_error = 1;
    719 	}
    720 	if (mod_counter != ci_label->mod_counter) {
    721 		printf("%s has a different modfication count: %d %d\n",
    722 		       dev_name, mod_counter, ci_label->mod_counter);
    723 	}
    724 
    725 	if (row != ci_label->row) {
    726 		printf("Row out of alignment for: %s\n", dev_name);
    727 		fatal_error = 1;
    728 	}
    729 	if (column != ci_label->column) {
    730 		printf("Column out of alignment for: %s\n", dev_name);
    731 		fatal_error = 1;
    732 	}
    733 	if (raidPtr->numRow != ci_label->num_rows) {
    734 		printf("Number of rows do not match for: %s\n", dev_name);
    735 		fatal_error = 1;
    736 	}
    737 	if (raidPtr->numCol != ci_label->num_columns) {
    738 		printf("Number of columns do not match for: %s\n", dev_name);
    739 		fatal_error = 1;
    740 	}
    741 	if (ci_label->clean == 0) {
    742 		/* it's not clean, but that's not fatal */
    743 		printf("%s is not clean!\n", dev_name);
    744 	}
    745 	return(fatal_error);
    746 }
    747 
    748 
    749 /*
    750 
    751    rf_CheckLabels() - check all the component labels for consistency.
    752    Return an error if there is anything major amiss.
    753 
    754  */
    755 
    756 int
    757 rf_CheckLabels( raidPtr, cfgPtr )
    758 	RF_Raid_t *raidPtr;
    759 	RF_Config_t *cfgPtr;
    760 {
    761 	int r,c;
    762 	char *dev_name;
    763 	RF_ComponentLabel_t *ci_label;
    764 	int serial_number = 0;
    765 	int mod_number = 0;
    766 	int fatal_error = 0;
    767 	int mod_values[4];
    768 	int mod_count[4];
    769 	int ser_values[4];
    770 	int ser_count[4];
    771 	int num_ser;
    772 	int num_mod;
    773 	int i;
    774 	int found;
    775 	int hosed_row;
    776 	int hosed_column;
    777 	int too_fatal;
    778 	int parity_good;
    779 	int force;
    780 
    781 	hosed_row = -1;
    782 	hosed_column = -1;
    783 	too_fatal = 0;
    784 	force = cfgPtr->force;
    785 
    786 	/*
    787 	   We're going to try to be a little intelligent here.  If one
    788 	   component's label is bogus, and we can identify that it's the
    789 	   *only* one that's gone, we'll mark it as "failed" and allow
    790 	   the configuration to proceed.  This will be the *only* case
    791 	   that we'll proceed if there would be (otherwise) fatal errors.
    792 
    793 	   Basically we simply keep a count of how many components had
    794 	   what serial number.  If all but one agree, we simply mark
    795 	   the disagreeing component as being failed, and allow
    796 	   things to come up "normally".
    797 
    798 	   We do this first for serial numbers, and then for "mod_counter".
    799 
    800 	 */
    801 
    802 	num_ser = 0;
    803 	num_mod = 0;
    804 	for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
    805 		for (c = 0; c < raidPtr->numCol; c++) {
    806 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    807 			found=0;
    808 			for(i=0;i<num_ser;i++) {
    809 				if (ser_values[i] == ci_label->serial_number) {
    810 					ser_count[i]++;
    811 					found=1;
    812 					break;
    813 				}
    814 			}
    815 			if (!found) {
    816 				ser_values[num_ser] = ci_label->serial_number;
    817 				ser_count[num_ser] = 1;
    818 				num_ser++;
    819 				if (num_ser>2) {
    820 					fatal_error = 1;
    821 					break;
    822 				}
    823 			}
    824 			found=0;
    825 			for(i=0;i<num_mod;i++) {
    826 				if (mod_values[i] == ci_label->mod_counter) {
    827 					mod_count[i]++;
    828 					found=1;
    829 					break;
    830 				}
    831 			}
    832 			if (!found) {
    833 			        mod_values[num_mod] = ci_label->mod_counter;
    834 				mod_count[num_mod] = 1;
    835 				num_mod++;
    836 				if (num_mod>2) {
    837 					fatal_error = 1;
    838 					break;
    839 				}
    840 			}
    841 		}
    842 	}
    843 #if DEBUG
    844 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    845 	for(i=0;i<num_ser;i++) {
    846 		printf("%d %d\n", ser_values[i], ser_count[i]);
    847 	}
    848 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    849 	for(i=0;i<num_mod;i++) {
    850 		printf("%d %d\n", mod_values[i], mod_count[i]);
    851 	}
    852 #endif
    853 	serial_number = ser_values[0];
    854 	if (num_ser == 2) {
    855 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    856 			/* Locate the maverick component */
    857 			if (ser_count[1] > ser_count[0]) {
    858 				serial_number = ser_values[1];
    859 			}
    860 			for (r = 0; r < raidPtr->numRow; r++) {
    861 				for (c = 0; c < raidPtr->numCol; c++) {
    862 				ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    863 					if (serial_number !=
    864 					    ci_label->serial_number) {
    865 						hosed_row = r;
    866 						hosed_column = c;
    867 						break;
    868 					}
    869 				}
    870 			}
    871 			printf("Hosed component: %s\n",
    872 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    873 			if (!force) {
    874 				/* we'll fail this component, as if there are
    875 				   other major errors, we arn't forcing things
    876 				   and we'll abort the config anyways */
    877 				raidPtr->Disks[hosed_row][hosed_column].status
    878 					= rf_ds_failed;
    879 				raidPtr->numFailures++;
    880 				raidPtr->status[hosed_row] = rf_rs_degraded;
    881 			}
    882 		} else {
    883 			too_fatal = 1;
    884 		}
    885 		if (cfgPtr->parityConfig == '0') {
    886 			/* We've identified two different serial numbers.
    887 			   RAID 0 can't cope with that, so we'll punt */
    888 			too_fatal = 1;
    889 		}
    890 
    891 	}
    892 
    893 	/* record the serial number for later.  If we bail later, setting
    894 	   this doesn't matter, otherwise we've got the best guess at the
    895 	   correct serial number */
    896 	raidPtr->serial_number = serial_number;
    897 
    898 	mod_number = mod_values[0];
    899 	if (num_mod == 2) {
    900 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    901 			/* Locate the maverick component */
    902 			if (mod_count[1] > mod_count[0]) {
    903 				mod_number = mod_values[1];
    904 			} else if (mod_count[1] < mod_count[0]) {
    905 				mod_number = mod_values[0];
    906 			} else {
    907 				/* counts of different modification values
    908 				   are the same.   Assume greater value is
    909 				   the correct one, all other things
    910 				   considered */
    911 				if (mod_values[0] > mod_values[1]) {
    912 					mod_number = mod_values[0];
    913 				} else {
    914 					mod_number = mod_values[1];
    915 				}
    916 
    917 			}
    918 			for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
    919 				for (c = 0; c < raidPtr->numCol; c++) {
    920 					ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    921 					if (mod_number !=
    922 					    ci_label->mod_counter) {
    923 						if ( ( hosed_row == r ) &&
    924 						     ( hosed_column == c )) {
    925 							/* same one.  Can
    926 							   deal with it.  */
    927 						} else {
    928 							hosed_row = r;
    929 							hosed_column = c;
    930 							if (num_ser != 1) {
    931 								too_fatal = 1;
    932 								break;
    933 							}
    934 						}
    935 					}
    936 				}
    937 			}
    938 			printf("Hosed component: %s\n",
    939 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    940 			if (!force) {
    941 				/* we'll fail this component, as if there are
    942 				   other major errors, we arn't forcing things
    943 				   and we'll abort the config anyways */
    944 				if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
    945 					raidPtr->Disks[hosed_row][hosed_column].status
    946 						= rf_ds_failed;
    947 					raidPtr->numFailures++;
    948 					raidPtr->status[hosed_row] = rf_rs_degraded;
    949 				}
    950 			}
    951 		} else {
    952 			too_fatal = 1;
    953 		}
    954 		if (cfgPtr->parityConfig == '0') {
    955 			/* We've identified two different mod counters.
    956 			   RAID 0 can't cope with that, so we'll punt */
    957 			too_fatal = 1;
    958 		}
    959 	}
    960 
    961 	raidPtr->mod_counter = mod_number;
    962 
    963 	if (too_fatal) {
    964 		/* we've had both a serial number mismatch, and a mod_counter
    965 		   mismatch -- and they involved two different components!!
    966 		   Bail -- make things fail so that the user must force
    967 		   the issue... */
    968 		hosed_row = -1;
    969 		hosed_column = -1;
    970 	}
    971 
    972 	if (num_ser > 2) {
    973 		printf("raid%d: Too many different serial numbers!\n",
    974 		       raidPtr->raidid);
    975 	}
    976 
    977 	if (num_mod > 2) {
    978 		printf("raid%d: Too many different mod counters!\n",
    979 		       raidPtr->raidid);
    980 	}
    981 
    982 	/* we start by assuming the parity will be good, and flee from
    983 	   that notion at the slightest sign of trouble */
    984 
    985 	parity_good = RF_RAID_CLEAN;
    986 	for (r = 0; r < raidPtr->numRow; r++) {
    987 		for (c = 0; c < raidPtr->numCol; c++) {
    988 			dev_name = &cfgPtr->devnames[r][c][0];
    989 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    990 
    991 			if ((r == hosed_row) && (c == hosed_column)) {
    992 				printf("raid%d: Ignoring %s\n",
    993 				       raidPtr->raidid, dev_name);
    994 			} else {
    995 				rf_print_label_status( raidPtr, r, c,
    996 						       dev_name, ci_label );
    997 				if (rf_check_label_vitals( raidPtr, r, c,
    998 							   dev_name, ci_label,
    999 							   serial_number,
   1000 							   mod_number )) {
   1001 					fatal_error = 1;
   1002 				}
   1003 				if (ci_label->clean != RF_RAID_CLEAN) {
   1004 					parity_good = RF_RAID_DIRTY;
   1005 				}
   1006 			}
   1007 		}
   1008 	}
   1009 	if (fatal_error) {
   1010 		parity_good = RF_RAID_DIRTY;
   1011 	}
   1012 
   1013 	/* we note the state of the parity */
   1014 	raidPtr->parity_good = parity_good;
   1015 
   1016 	return(fatal_error);
   1017 }
   1018 
   1019 int
   1020 rf_add_hot_spare(raidPtr, sparePtr)
   1021 	RF_Raid_t *raidPtr;
   1022 	RF_SingleComponent_t *sparePtr;
   1023 {
   1024 	RF_RaidDisk_t *disks;
   1025 	RF_DiskQueue_t *spareQueues;
   1026 	int ret;
   1027 	unsigned int bs;
   1028 	int spare_number;
   1029 
   1030 #if 0
   1031 	printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
   1032 	printf("Num col: %d\n",raidPtr->numCol);
   1033 #endif
   1034 	if (raidPtr->numSpare >= RF_MAXSPARE) {
   1035 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
   1036 		return(EINVAL);
   1037 	}
   1038 
   1039 	RF_LOCK_MUTEX(raidPtr->mutex);
   1040 
   1041 	/* the beginning of the spares... */
   1042 	disks = &raidPtr->Disks[0][raidPtr->numCol];
   1043 
   1044 	spare_number = raidPtr->numSpare;
   1045 
   1046 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
   1047 			       &disks[spare_number], 0,
   1048 			       raidPtr->numCol + spare_number);
   1049 
   1050 	if (ret)
   1051 		goto fail;
   1052 	if (disks[spare_number].status != rf_ds_optimal) {
   1053 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
   1054 			     sparePtr->component_name);
   1055 		ret=EINVAL;
   1056 		goto fail;
   1057 	} else {
   1058 		disks[spare_number].status = rf_ds_spare;
   1059 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
   1060 			 disks[spare_number].devname,
   1061 			 (long int) disks[spare_number].numBlocks,
   1062 			 disks[spare_number].blockSize,
   1063 			 (long int) disks[spare_number].numBlocks *
   1064 			 disks[spare_number].blockSize / 1024 / 1024);
   1065 	}
   1066 
   1067 
   1068 	/* check sizes and block sizes on the spare disk */
   1069 	bs = 1 << raidPtr->logBytesPerSector;
   1070 	if (disks[spare_number].blockSize != bs) {
   1071 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1072 		ret = EINVAL;
   1073 		goto fail;
   1074 	}
   1075 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1076 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
   1077 			     disks[spare_number].devname,
   1078 			     disks[spare_number].blockSize,
   1079 			     (long int) raidPtr->sectorsPerDisk);
   1080 		ret = EINVAL;
   1081 		goto fail;
   1082 	} else {
   1083 		if (disks[spare_number].numBlocks >
   1084 		    raidPtr->sectorsPerDisk) {
   1085 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
   1086 				     (long int) raidPtr->sectorsPerDisk);
   1087 
   1088 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1089 		}
   1090 	}
   1091 
   1092 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
   1093 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1094 				 0, raidPtr->numCol + spare_number,
   1095 				 raidPtr->qType,
   1096 				 raidPtr->sectorsPerDisk,
   1097 				 raidPtr->Disks[0][raidPtr->numCol +
   1098 						  spare_number].dev,
   1099 				 raidPtr->maxOutstanding,
   1100 				 &raidPtr->shutdownList,
   1101 				 raidPtr->cleanupList);
   1102 
   1103 
   1104 	raidPtr->numSpare++;
   1105 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1106 	return (0);
   1107 
   1108 fail:
   1109 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1110 	return(ret);
   1111 }
   1112 
   1113 int
   1114 rf_remove_hot_spare(raidPtr,sparePtr)
   1115 	RF_Raid_t *raidPtr;
   1116 	RF_SingleComponent_t *sparePtr;
   1117 {
   1118 	int spare_number;
   1119 
   1120 
   1121 	if (raidPtr->numSpare==0) {
   1122 		printf("No spares to remove!\n");
   1123 		return(EINVAL);
   1124 	}
   1125 
   1126 	spare_number = sparePtr->column;
   1127 
   1128 	return(EINVAL); /* XXX not implemented yet */
   1129 #if 0
   1130 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1131 		return(EINVAL);
   1132 	}
   1133 
   1134 	/* verify that this spare isn't in use... */
   1135 
   1136 
   1137 
   1138 
   1139 	/* it's gone.. */
   1140 
   1141 	raidPtr->numSpare--;
   1142 
   1143 	return(0);
   1144 #endif
   1145 }
   1146 
   1147 
   1148 int
   1149 rf_delete_component(raidPtr,component)
   1150 	RF_Raid_t *raidPtr;
   1151 	RF_SingleComponent_t *component;
   1152 {
   1153 	RF_RaidDisk_t *disks;
   1154 
   1155 	if ((component->row < 0) ||
   1156 	    (component->row >= raidPtr->numRow) ||
   1157 	    (component->column < 0) ||
   1158 	    (component->column >= raidPtr->numCol)) {
   1159 		return(EINVAL);
   1160 	}
   1161 
   1162 	disks = &raidPtr->Disks[component->row][component->column];
   1163 
   1164 	/* 1. This component must be marked as 'failed' */
   1165 
   1166 	return(EINVAL); /* Not implemented yet. */
   1167 }
   1168 
   1169 int
   1170 rf_incorporate_hot_spare(raidPtr,component)
   1171 	RF_Raid_t *raidPtr;
   1172 	RF_SingleComponent_t *component;
   1173 {
   1174 
   1175 	/* Issues here include how to 'move' this in if there is IO
   1176 	   taking place (e.g. component queues and such) */
   1177 
   1178 	return(EINVAL); /* Not implemented yet. */
   1179 }
   1180