Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.34.2.2
      1 /*	$NetBSD: rf_disks.c,v 1.34.2.2 2001/11/14 19:15:48 nathanw Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1995 Carnegie-Mellon University.
     40  * All rights reserved.
     41  *
     42  * Author: Mark Holland
     43  *
     44  * Permission to use, copy, modify and distribute this software and
     45  * its documentation is hereby granted, provided that both the copyright
     46  * notice and this permission notice appear in all copies of the
     47  * software, derivative works or modified versions, and any portions
     48  * thereof, and that both notices appear in supporting documentation.
     49  *
     50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     53  *
     54  * Carnegie Mellon requests users of this software to return to
     55  *
     56  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     57  *  School of Computer Science
     58  *  Carnegie Mellon University
     59  *  Pittsburgh PA 15213-3890
     60  *
     61  * any improvements or extensions that they make and grant Carnegie the
     62  * rights to redistribute these changes.
     63  */
     64 
     65 /***************************************************************
     66  * rf_disks.c -- code to perform operations on the actual disks
     67  ***************************************************************/
     68 
     69 #include <sys/cdefs.h>
     70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.34.2.2 2001/11/14 19:15:48 nathanw Exp $");
     71 
     72 #include <dev/raidframe/raidframevar.h>
     73 
     74 #include "rf_raid.h"
     75 #include "rf_alloclist.h"
     76 #include "rf_utils.h"
     77 #include "rf_general.h"
     78 #include "rf_options.h"
     79 #include "rf_kintf.h"
     80 #include "rf_netbsd.h"
     81 
     82 #include <sys/types.h>
     83 #include <sys/param.h>
     84 #include <sys/systm.h>
     85 #include <sys/proc.h>
     86 #include <sys/ioctl.h>
     87 #include <sys/fcntl.h>
     88 #include <sys/vnode.h>
     89 
     90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     91 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
     92 				  RF_ComponentLabel_t *);
     93 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     94 				  RF_ComponentLabel_t *, int, int );
     95 
     96 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     97 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     98 
     99 /**************************************************************************
    100  *
    101  * initialize the disks comprising the array
    102  *
    103  * We want the spare disks to have regular row,col numbers so that we can
    104  * easily substitue a spare for a failed disk.  But, the driver code assumes
    105  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    106  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    107  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    108  * rest, and put all the spares in it.  This probably needs to get changed
    109  * eventually.
    110  *
    111  **************************************************************************/
    112 
    113 int
    114 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
    115 	RF_ShutdownList_t **listp;
    116 	RF_Raid_t *raidPtr;
    117 	RF_Config_t *cfgPtr;
    118 {
    119 	RF_RaidDisk_t **disks;
    120 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    121 	RF_RowCol_t r, c;
    122 	int bs, ret;
    123 	unsigned i, count, foundone = 0, numFailuresThisRow;
    124 	int force;
    125 
    126 	force = cfgPtr->force;
    127 
    128 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    129 	if (ret)
    130 		goto fail;
    131 
    132 	disks = raidPtr->Disks;
    133 
    134 	for (r = 0; r < raidPtr->numRow; r++) {
    135 		numFailuresThisRow = 0;
    136 		for (c = 0; c < raidPtr->numCol; c++) {
    137 			ret = rf_ConfigureDisk(raidPtr,
    138 					       &cfgPtr->devnames[r][c][0],
    139 					       &disks[r][c], r, c);
    140 
    141 			if (ret)
    142 				goto fail;
    143 
    144 			if (disks[r][c].status == rf_ds_optimal) {
    145 				raidread_component_label(
    146 					 raidPtr->raid_cinfo[r][c].ci_dev,
    147 					 raidPtr->raid_cinfo[r][c].ci_vp,
    148 					 &raidPtr->raid_cinfo[r][c].ci_label);
    149 			}
    150 
    151 			if (disks[r][c].status != rf_ds_optimal) {
    152 				numFailuresThisRow++;
    153 			} else {
    154 				if (disks[r][c].numBlocks < min_numblks)
    155 					min_numblks = disks[r][c].numBlocks;
    156 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    157 				    r, c, disks[r][c].devname,
    158 				    (long int) disks[r][c].numBlocks,
    159 				    disks[r][c].blockSize,
    160 				    (long int) disks[r][c].numBlocks *
    161 					 disks[r][c].blockSize / 1024 / 1024);
    162 			}
    163 		}
    164 		/* XXX fix for n-fault tolerant */
    165 		/* XXX this should probably check to see how many failures
    166 		   we can handle for this configuration! */
    167 		if (numFailuresThisRow > 0)
    168 			raidPtr->status[r] = rf_rs_degraded;
    169 	}
    170 
    171 	/* all disks must be the same size & have the same block size, bs must
    172 	 * be a power of 2 */
    173 	bs = 0;
    174 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
    175 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
    176 			if (disks[r][c].status == rf_ds_optimal) {
    177 				bs = disks[r][c].blockSize;
    178 				foundone = 1;
    179 			}
    180 		}
    181 	}
    182 	if (!foundone) {
    183 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    184 		ret = EINVAL;
    185 		goto fail;
    186 	}
    187 	for (count = 0, i = 1; i; i <<= 1)
    188 		if (bs & i)
    189 			count++;
    190 	if (count != 1) {
    191 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    192 		ret = EINVAL;
    193 		goto fail;
    194 	}
    195 
    196 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    197 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    198 		if (force != 0) {
    199 			printf("raid%d: Fatal errors being ignored.\n",
    200 			       raidPtr->raidid);
    201 		} else {
    202 			ret = EINVAL;
    203 			goto fail;
    204 		}
    205 	}
    206 
    207 	for (r = 0; r < raidPtr->numRow; r++) {
    208 		for (c = 0; c < raidPtr->numCol; c++) {
    209 			if (disks[r][c].status == rf_ds_optimal) {
    210 				if (disks[r][c].blockSize != bs) {
    211 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
    212 					ret = EINVAL;
    213 					goto fail;
    214 				}
    215 				if (disks[r][c].numBlocks != min_numblks) {
    216 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
    217 					    r, c, (int) min_numblks);
    218 					disks[r][c].numBlocks = min_numblks;
    219 				}
    220 			}
    221 		}
    222 	}
    223 
    224 	raidPtr->sectorsPerDisk = min_numblks;
    225 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    226 	raidPtr->bytesPerSector = bs;
    227 	raidPtr->sectorMask = bs - 1;
    228 	return (0);
    229 
    230 fail:
    231 
    232 	rf_UnconfigureVnodes( raidPtr );
    233 
    234 	return (ret);
    235 }
    236 
    237 
    238 /****************************************************************************
    239  * set up the data structures describing the spare disks in the array
    240  * recall from the above comment that the spare disk descriptors are stored
    241  * in row zero, which is specially expanded to hold them.
    242  ****************************************************************************/
    243 int
    244 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
    245 	RF_ShutdownList_t ** listp;
    246 	RF_Raid_t * raidPtr;
    247 	RF_Config_t * cfgPtr;
    248 {
    249 	int     i, ret;
    250 	unsigned int bs;
    251 	RF_RaidDisk_t *disks;
    252 	int     num_spares_done;
    253 
    254 	num_spares_done = 0;
    255 
    256 	/* The space for the spares should have already been allocated by
    257 	 * ConfigureDisks() */
    258 
    259 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    260 	for (i = 0; i < raidPtr->numSpare; i++) {
    261 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    262 				       &disks[i], 0, raidPtr->numCol + i);
    263 		if (ret)
    264 			goto fail;
    265 		if (disks[i].status != rf_ds_optimal) {
    266 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    267 				     &cfgPtr->spare_names[i][0]);
    268 		} else {
    269 			disks[i].status = rf_ds_spare;	/* change status to
    270 							 * spare */
    271 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    272 			    disks[i].devname,
    273 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    274 			    (long int) disks[i].numBlocks *
    275 				 disks[i].blockSize / 1024 / 1024);
    276 		}
    277 		num_spares_done++;
    278 	}
    279 
    280 	/* check sizes and block sizes on spare disks */
    281 	bs = 1 << raidPtr->logBytesPerSector;
    282 	for (i = 0; i < raidPtr->numSpare; i++) {
    283 		if (disks[i].blockSize != bs) {
    284 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    285 			ret = EINVAL;
    286 			goto fail;
    287 		}
    288 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    289 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    290 				     disks[i].devname, disks[i].blockSize,
    291 				     (long int) raidPtr->sectorsPerDisk);
    292 			ret = EINVAL;
    293 			goto fail;
    294 		} else
    295 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    296 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    297 
    298 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    299 			}
    300 	}
    301 
    302 	return (0);
    303 
    304 fail:
    305 
    306 	/* Release the hold on the main components.  We've failed to allocate
    307 	 * a spare, and since we're failing, we need to free things..
    308 
    309 	 XXX failing to allocate a spare is *not* that big of a deal...
    310 	 We *can* survive without it, if need be, esp. if we get hot
    311 	 adding working.
    312 
    313 	 If we don't fail out here, then we need a way to remove this spare...
    314 	 that should be easier to do here than if we are "live"...
    315 
    316 	 */
    317 
    318 	rf_UnconfigureVnodes( raidPtr );
    319 
    320 	return (ret);
    321 }
    322 
    323 static int
    324 rf_AllocDiskStructures(raidPtr, cfgPtr)
    325 	RF_Raid_t *raidPtr;
    326  	RF_Config_t *cfgPtr;
    327 {
    328 	RF_RaidDisk_t **disks;
    329 	int ret;
    330 	int r;
    331 
    332 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    333 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    334 	if (disks == NULL) {
    335 		ret = ENOMEM;
    336 		goto fail;
    337 	}
    338 	raidPtr->Disks = disks;
    339 	/* get space for the device-specific stuff... */
    340 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    341 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    342 	    raidPtr->cleanupList);
    343 	if (raidPtr->raid_cinfo == NULL) {
    344 		ret = ENOMEM;
    345 		goto fail;
    346 	}
    347 
    348 	for (r = 0; r < raidPtr->numRow; r++) {
    349 		/* We allocate RF_MAXSPARE on the first row so that we
    350 		   have room to do hot-swapping of spares */
    351 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    352 				+ ((r == 0) ? RF_MAXSPARE : 0),
    353 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    354 				raidPtr->cleanupList);
    355 		if (disks[r] == NULL) {
    356 			ret = ENOMEM;
    357 			goto fail;
    358 		}
    359 		/* get more space for device specific stuff.. */
    360 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    361 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    362 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    363 		    raidPtr->cleanupList);
    364 		if (raidPtr->raid_cinfo[r] == NULL) {
    365 			ret = ENOMEM;
    366 			goto fail;
    367 		}
    368 	}
    369 	return(0);
    370 fail:
    371 	rf_UnconfigureVnodes( raidPtr );
    372 
    373 	return(ret);
    374 }
    375 
    376 
    377 /* configure a single disk during auto-configuration at boot */
    378 int
    379 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
    380 	RF_Raid_t *raidPtr;
    381 	RF_Config_t *cfgPtr;
    382 	RF_AutoConfig_t *auto_config;
    383 {
    384 	RF_RaidDisk_t **disks;
    385 	RF_RaidDisk_t *diskPtr;
    386 	RF_RowCol_t r, c;
    387 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    388 	int bs, ret;
    389 	int numFailuresThisRow;
    390 	int force;
    391 	RF_AutoConfig_t *ac;
    392 	int parity_good;
    393 	int mod_counter;
    394 	int mod_counter_found;
    395 
    396 #if DEBUG
    397 	printf("Starting autoconfiguration of RAID set...\n");
    398 #endif
    399 	force = cfgPtr->force;
    400 
    401 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    402 	if (ret)
    403 		goto fail;
    404 
    405 	disks = raidPtr->Disks;
    406 
    407 	/* assume the parity will be fine.. */
    408 	parity_good = RF_RAID_CLEAN;
    409 
    410 	/* Check for mod_counters that are too low */
    411 	mod_counter_found = 0;
    412 	mod_counter = 0;
    413 	ac = auto_config;
    414 	while(ac!=NULL) {
    415 		if (mod_counter_found==0) {
    416 			mod_counter = ac->clabel->mod_counter;
    417 			mod_counter_found = 1;
    418 		} else {
    419 			if (ac->clabel->mod_counter > mod_counter) {
    420 				mod_counter = ac->clabel->mod_counter;
    421 			}
    422 		}
    423 		ac->flag = 0; /* clear the general purpose flag */
    424 		ac = ac->next;
    425 	}
    426 
    427 	bs = 0;
    428 	for (r = 0; r < raidPtr->numRow; r++) {
    429 		numFailuresThisRow = 0;
    430 		for (c = 0; c < raidPtr->numCol; c++) {
    431 			diskPtr = &disks[r][c];
    432 
    433 			/* find this row/col in the autoconfig */
    434 #if DEBUG
    435 			printf("Looking for %d,%d in autoconfig\n",r,c);
    436 #endif
    437 			ac = auto_config;
    438 			while(ac!=NULL) {
    439 				if (ac->clabel==NULL) {
    440 					/* big-time bad news. */
    441 					goto fail;
    442 				}
    443 				if ((ac->clabel->row == r) &&
    444 				    (ac->clabel->column == c) &&
    445 				    (ac->clabel->mod_counter == mod_counter)) {
    446 					/* it's this one... */
    447 					/* flag it as 'used', so we don't
    448 					   free it later. */
    449 					ac->flag = 1;
    450 #if DEBUG
    451 					printf("Found: %s at %d,%d\n",
    452 					       ac->devname,r,c);
    453 #endif
    454 
    455 					break;
    456 				}
    457 				ac=ac->next;
    458 			}
    459 
    460 			if (ac==NULL) {
    461 				/* we didn't find an exact match with a
    462 				   correct mod_counter above... can we
    463 				   find one with an incorrect mod_counter
    464 				   to use instead?  (this one, if we find
    465 				   it, will be marked as failed once the
    466 				   set configures)
    467 				*/
    468 
    469 				ac = auto_config;
    470 				while(ac!=NULL) {
    471 					if (ac->clabel==NULL) {
    472 						/* big-time bad news. */
    473 						goto fail;
    474 					}
    475 					if ((ac->clabel->row == r) &&
    476 					    (ac->clabel->column == c)) {
    477 						/* it's this one...
    478 						   flag it as 'used', so we
    479 						   don't free it later. */
    480 						ac->flag = 1;
    481 #if DEBUG
    482 						printf("Found(low mod_counter): %s at %d,%d\n",
    483 						       ac->devname,r,c);
    484 #endif
    485 
    486 						break;
    487 					}
    488 					ac=ac->next;
    489 				}
    490 			}
    491 
    492 
    493 
    494 			if (ac!=NULL) {
    495 				/* Found it.  Configure it.. */
    496 				diskPtr->blockSize = ac->clabel->blockSize;
    497 				diskPtr->numBlocks = ac->clabel->numBlocks;
    498 				/* Note: rf_protectedSectors is already
    499 				   factored into numBlocks here */
    500 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
    501 				raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
    502 
    503 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
    504 				       ac->clabel, sizeof(*ac->clabel));
    505 				sprintf(diskPtr->devname, "/dev/%s",
    506 					ac->devname);
    507 
    508 				/* note the fact that this component was
    509 				   autoconfigured.  You'll need this info
    510 				   later.  Trust me :) */
    511 				diskPtr->auto_configured = 1;
    512 				diskPtr->dev = ac->dev;
    513 
    514 				/*
    515 				 * we allow the user to specify that
    516 				 * only a fraction of the disks should
    517 				 * be used this is just for debug: it
    518 				 * speeds up the parity scan
    519 				 */
    520 
    521 				diskPtr->numBlocks = diskPtr->numBlocks *
    522 					rf_sizePercentage / 100;
    523 
    524 				/* XXX these will get set multiple times,
    525 				   but since we're autoconfiguring, they'd
    526 				   better be always the same each time!
    527 				   If not, this is the least of your worries */
    528 
    529 				bs = diskPtr->blockSize;
    530 				min_numblks = diskPtr->numBlocks;
    531 
    532 				/* this gets done multiple times, but that's
    533 				   fine -- the serial number will be the same
    534 				   for all components, guaranteed */
    535 				raidPtr->serial_number =
    536 					ac->clabel->serial_number;
    537 				/* check the last time the label
    538 				   was modified */
    539 				if (ac->clabel->mod_counter !=
    540 				    mod_counter) {
    541 					/* Even though we've filled in all
    542 					   of the above, we don't trust
    543 					   this component since it's
    544 					   modification counter is not
    545 					   in sync with the rest, and we really
    546 					   consider it to be failed.  */
    547 					disks[r][c].status = rf_ds_failed;
    548 					numFailuresThisRow++;
    549 				} else {
    550 					if (ac->clabel->clean !=
    551 					    RF_RAID_CLEAN) {
    552 						parity_good = RF_RAID_DIRTY;
    553 					}
    554 				}
    555 			} else {
    556 				/* Didn't find it at all!!
    557 				   Component must really be dead */
    558 				disks[r][c].status = rf_ds_failed;
    559 				sprintf(disks[r][c].devname,"component%d",
    560 					r * raidPtr->numCol + c);
    561 				numFailuresThisRow++;
    562 			}
    563 		}
    564 		/* XXX fix for n-fault tolerant */
    565 		/* XXX this should probably check to see how many failures
    566 		   we can handle for this configuration! */
    567 		if (numFailuresThisRow > 0)
    568 			raidPtr->status[r] = rf_rs_degraded;
    569 	}
    570 
    571 	/* close the device for the ones that didn't get used */
    572 
    573 	ac = auto_config;
    574 	while(ac!=NULL) {
    575 		if (ac->flag == 0) {
    576 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
    577 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
    578 			vput(ac->vp);
    579 			ac->vp = NULL;
    580 #if DEBUG
    581 			printf("Released %s from auto-config set.\n",
    582 			       ac->devname);
    583 #endif
    584 		}
    585 		ac = ac->next;
    586 	}
    587 
    588 	raidPtr->mod_counter = mod_counter;
    589 
    590 	/* note the state of the parity, if any */
    591 	raidPtr->parity_good = parity_good;
    592 	raidPtr->sectorsPerDisk = min_numblks;
    593 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    594 	raidPtr->bytesPerSector = bs;
    595 	raidPtr->sectorMask = bs - 1;
    596 	return (0);
    597 
    598 fail:
    599 
    600 	rf_UnconfigureVnodes( raidPtr );
    601 
    602 	return (ret);
    603 
    604 }
    605 
    606 /* configure a single disk in the array */
    607 int
    608 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
    609 	RF_Raid_t *raidPtr;
    610 	char   *buf;
    611 	RF_RaidDisk_t *diskPtr;
    612 	RF_RowCol_t row;
    613 	RF_RowCol_t col;
    614 {
    615 	char   *p;
    616 	int     retcode;
    617 
    618 	struct partinfo dpart;
    619 	struct vnode *vp;
    620 	struct vattr va;
    621 	struct proc *proc;
    622 	int     error;
    623 
    624 	retcode = 0;
    625 	p = rf_find_non_white(buf);
    626 	if (p[strlen(p) - 1] == '\n') {
    627 		/* strip off the newline */
    628 		p[strlen(p) - 1] = '\0';
    629 	}
    630 	(void) strcpy(diskPtr->devname, p);
    631 
    632 	proc = raidPtr->engine_thread;
    633 
    634 	/* Let's start by claiming the component is fine and well... */
    635 	diskPtr->status = rf_ds_optimal;
    636 
    637 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
    638 	raidPtr->raid_cinfo[row][col].ci_dev = NULL;
    639 
    640 	error = raidlookup(diskPtr->devname, proc, &vp);
    641 	if (error) {
    642 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    643 		if (error == ENXIO) {
    644 			/* the component isn't there... must be dead :-( */
    645 			diskPtr->status = rf_ds_failed;
    646 		} else {
    647 			return (error);
    648 		}
    649 	}
    650 	if (diskPtr->status == rf_ds_optimal) {
    651 
    652 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
    653 			return (error);
    654 		}
    655 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
    656 				  FREAD, proc->p_ucred, proc);
    657 		if (error) {
    658 			return (error);
    659 		}
    660 
    661 		diskPtr->blockSize = dpart.disklab->d_secsize;
    662 
    663 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    664 		diskPtr->partitionSize = dpart.part->p_size;
    665 
    666 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
    667 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
    668 
    669 		/* This component was not automatically configured */
    670 		diskPtr->auto_configured = 0;
    671 		diskPtr->dev = va.va_rdev;
    672 
    673 		/* we allow the user to specify that only a fraction of the
    674 		 * disks should be used this is just for debug:  it speeds up
    675 		 * the parity scan */
    676 		diskPtr->numBlocks = diskPtr->numBlocks *
    677 			rf_sizePercentage / 100;
    678 	}
    679 	return (0);
    680 }
    681 
    682 static void
    683 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
    684 	RF_Raid_t *raidPtr;
    685 	int row;
    686 	int column;
    687 	char *dev_name;
    688 	RF_ComponentLabel_t *ci_label;
    689 {
    690 
    691 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
    692 	       raidPtr->raidid, dev_name, row, column );
    693 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
    694 	       ci_label->row, ci_label->column,
    695 	       ci_label->num_rows, ci_label->num_columns);
    696 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    697 	       ci_label->version, ci_label->serial_number,
    698 	       ci_label->mod_counter);
    699 	printf("         Clean: %s Status: %d\n",
    700 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    701 }
    702 
    703 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
    704 				  serial_number, mod_counter )
    705 	RF_Raid_t *raidPtr;
    706 	int row;
    707 	int column;
    708 	char *dev_name;
    709 	RF_ComponentLabel_t *ci_label;
    710 	int serial_number;
    711 	int mod_counter;
    712 {
    713 	int fatal_error = 0;
    714 
    715 	if (serial_number != ci_label->serial_number) {
    716 		printf("%s has a different serial number: %d %d\n",
    717 		       dev_name, serial_number, ci_label->serial_number);
    718 		fatal_error = 1;
    719 	}
    720 	if (mod_counter != ci_label->mod_counter) {
    721 		printf("%s has a different modfication count: %d %d\n",
    722 		       dev_name, mod_counter, ci_label->mod_counter);
    723 	}
    724 
    725 	if (row != ci_label->row) {
    726 		printf("Row out of alignment for: %s\n", dev_name);
    727 		fatal_error = 1;
    728 	}
    729 	if (column != ci_label->column) {
    730 		printf("Column out of alignment for: %s\n", dev_name);
    731 		fatal_error = 1;
    732 	}
    733 	if (raidPtr->numRow != ci_label->num_rows) {
    734 		printf("Number of rows do not match for: %s\n", dev_name);
    735 		fatal_error = 1;
    736 	}
    737 	if (raidPtr->numCol != ci_label->num_columns) {
    738 		printf("Number of columns do not match for: %s\n", dev_name);
    739 		fatal_error = 1;
    740 	}
    741 	if (ci_label->clean == 0) {
    742 		/* it's not clean, but that's not fatal */
    743 		printf("%s is not clean!\n", dev_name);
    744 	}
    745 	return(fatal_error);
    746 }
    747 
    748 
    749 /*
    750 
    751    rf_CheckLabels() - check all the component labels for consistency.
    752    Return an error if there is anything major amiss.
    753 
    754  */
    755 
    756 int
    757 rf_CheckLabels( raidPtr, cfgPtr )
    758 	RF_Raid_t *raidPtr;
    759 	RF_Config_t *cfgPtr;
    760 {
    761 	int r,c;
    762 	char *dev_name;
    763 	RF_ComponentLabel_t *ci_label;
    764 	int serial_number = 0;
    765 	int mod_number = 0;
    766 	int fatal_error = 0;
    767 	int mod_values[4];
    768 	int mod_count[4];
    769 	int ser_values[4];
    770 	int ser_count[4];
    771 	int num_ser;
    772 	int num_mod;
    773 	int i;
    774 	int found;
    775 	int hosed_row;
    776 	int hosed_column;
    777 	int too_fatal;
    778 	int parity_good;
    779 	int force;
    780 
    781 	hosed_row = -1;
    782 	hosed_column = -1;
    783 	too_fatal = 0;
    784 	force = cfgPtr->force;
    785 
    786 	/*
    787 	   We're going to try to be a little intelligent here.  If one
    788 	   component's label is bogus, and we can identify that it's the
    789 	   *only* one that's gone, we'll mark it as "failed" and allow
    790 	   the configuration to proceed.  This will be the *only* case
    791 	   that we'll proceed if there would be (otherwise) fatal errors.
    792 
    793 	   Basically we simply keep a count of how many components had
    794 	   what serial number.  If all but one agree, we simply mark
    795 	   the disagreeing component as being failed, and allow
    796 	   things to come up "normally".
    797 
    798 	   We do this first for serial numbers, and then for "mod_counter".
    799 
    800 	 */
    801 
    802 	num_ser = 0;
    803 	num_mod = 0;
    804 	for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
    805 		for (c = 0; c < raidPtr->numCol; c++) {
    806 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    807 			found=0;
    808 			for(i=0;i<num_ser;i++) {
    809 				if (ser_values[i] == ci_label->serial_number) {
    810 					ser_count[i]++;
    811 					found=1;
    812 					break;
    813 				}
    814 			}
    815 			if (!found) {
    816 				ser_values[num_ser] = ci_label->serial_number;
    817 				ser_count[num_ser] = 1;
    818 				num_ser++;
    819 				if (num_ser>2) {
    820 					fatal_error = 1;
    821 					break;
    822 				}
    823 			}
    824 			found=0;
    825 			for(i=0;i<num_mod;i++) {
    826 				if (mod_values[i] == ci_label->mod_counter) {
    827 					mod_count[i]++;
    828 					found=1;
    829 					break;
    830 				}
    831 			}
    832 			if (!found) {
    833 			        mod_values[num_mod] = ci_label->mod_counter;
    834 				mod_count[num_mod] = 1;
    835 				num_mod++;
    836 				if (num_mod>2) {
    837 					fatal_error = 1;
    838 					break;
    839 				}
    840 			}
    841 		}
    842 	}
    843 #if DEBUG
    844 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    845 	for(i=0;i<num_ser;i++) {
    846 		printf("%d %d\n", ser_values[i], ser_count[i]);
    847 	}
    848 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    849 	for(i=0;i<num_mod;i++) {
    850 		printf("%d %d\n", mod_values[i], mod_count[i]);
    851 	}
    852 #endif
    853 	serial_number = ser_values[0];
    854 	if (num_ser == 2) {
    855 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    856 			/* Locate the maverick component */
    857 			if (ser_count[1] > ser_count[0]) {
    858 				serial_number = ser_values[1];
    859 			}
    860 			for (r = 0; r < raidPtr->numRow; r++) {
    861 				for (c = 0; c < raidPtr->numCol; c++) {
    862 				ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    863 					if (serial_number !=
    864 					    ci_label->serial_number) {
    865 						hosed_row = r;
    866 						hosed_column = c;
    867 						break;
    868 					}
    869 				}
    870 			}
    871 			printf("Hosed component: %s\n",
    872 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    873 			if (!force) {
    874 				/* we'll fail this component, as if there are
    875 				   other major errors, we arn't forcing things
    876 				   and we'll abort the config anyways */
    877 				raidPtr->Disks[hosed_row][hosed_column].status
    878 					= rf_ds_failed;
    879 				raidPtr->numFailures++;
    880 				raidPtr->status[hosed_row] = rf_rs_degraded;
    881 			}
    882 		} else {
    883 			too_fatal = 1;
    884 		}
    885 		if (cfgPtr->parityConfig == '0') {
    886 			/* We've identified two different serial numbers.
    887 			   RAID 0 can't cope with that, so we'll punt */
    888 			too_fatal = 1;
    889 		}
    890 
    891 	}
    892 
    893 	/* record the serial number for later.  If we bail later, setting
    894 	   this doesn't matter, otherwise we've got the best guess at the
    895 	   correct serial number */
    896 	raidPtr->serial_number = serial_number;
    897 
    898 	mod_number = mod_values[0];
    899 	if (num_mod == 2) {
    900 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    901 			/* Locate the maverick component */
    902 			if (mod_count[1] > mod_count[0]) {
    903 				mod_number = mod_values[1];
    904 			} else if (mod_count[1] < mod_count[0]) {
    905 				mod_number = mod_values[0];
    906 			} else {
    907 				/* counts of different modification values
    908 				   are the same.   Assume greater value is
    909 				   the correct one, all other things
    910 				   considered */
    911 				if (mod_values[0] > mod_values[1]) {
    912 					mod_number = mod_values[0];
    913 				} else {
    914 					mod_number = mod_values[1];
    915 				}
    916 
    917 			}
    918 			for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
    919 				for (c = 0; c < raidPtr->numCol; c++) {
    920 					ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    921 					if (mod_number !=
    922 					    ci_label->mod_counter) {
    923 						if ( ( hosed_row == r ) &&
    924 						     ( hosed_column == c )) {
    925 							/* same one.  Can
    926 							   deal with it.  */
    927 						} else {
    928 							hosed_row = r;
    929 							hosed_column = c;
    930 							if (num_ser != 1) {
    931 								too_fatal = 1;
    932 								break;
    933 							}
    934 						}
    935 					}
    936 				}
    937 			}
    938 			printf("Hosed component: %s\n",
    939 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    940 			if (!force) {
    941 				/* we'll fail this component, as if there are
    942 				   other major errors, we arn't forcing things
    943 				   and we'll abort the config anyways */
    944 				if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
    945 					raidPtr->Disks[hosed_row][hosed_column].status
    946 						= rf_ds_failed;
    947 					raidPtr->numFailures++;
    948 					raidPtr->status[hosed_row] = rf_rs_degraded;
    949 				}
    950 			}
    951 		} else {
    952 			too_fatal = 1;
    953 		}
    954 		if (cfgPtr->parityConfig == '0') {
    955 			/* We've identified two different mod counters.
    956 			   RAID 0 can't cope with that, so we'll punt */
    957 			too_fatal = 1;
    958 		}
    959 	}
    960 
    961 	raidPtr->mod_counter = mod_number;
    962 
    963 	if (too_fatal) {
    964 		/* we've had both a serial number mismatch, and a mod_counter
    965 		   mismatch -- and they involved two different components!!
    966 		   Bail -- make things fail so that the user must force
    967 		   the issue... */
    968 		hosed_row = -1;
    969 		hosed_column = -1;
    970 	}
    971 
    972 	if (num_ser > 2) {
    973 		printf("raid%d: Too many different serial numbers!\n",
    974 		       raidPtr->raidid);
    975 	}
    976 
    977 	if (num_mod > 2) {
    978 		printf("raid%d: Too many different mod counters!\n",
    979 		       raidPtr->raidid);
    980 	}
    981 
    982 	/* we start by assuming the parity will be good, and flee from
    983 	   that notion at the slightest sign of trouble */
    984 
    985 	parity_good = RF_RAID_CLEAN;
    986 	for (r = 0; r < raidPtr->numRow; r++) {
    987 		for (c = 0; c < raidPtr->numCol; c++) {
    988 			dev_name = &cfgPtr->devnames[r][c][0];
    989 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    990 
    991 			if ((r == hosed_row) && (c == hosed_column)) {
    992 				printf("raid%d: Ignoring %s\n",
    993 				       raidPtr->raidid, dev_name);
    994 			} else {
    995 				rf_print_label_status( raidPtr, r, c,
    996 						       dev_name, ci_label );
    997 				if (rf_check_label_vitals( raidPtr, r, c,
    998 							   dev_name, ci_label,
    999 							   serial_number,
   1000 							   mod_number )) {
   1001 					fatal_error = 1;
   1002 				}
   1003 				if (ci_label->clean != RF_RAID_CLEAN) {
   1004 					parity_good = RF_RAID_DIRTY;
   1005 				}
   1006 			}
   1007 		}
   1008 	}
   1009 	if (fatal_error) {
   1010 		parity_good = RF_RAID_DIRTY;
   1011 	}
   1012 
   1013 	/* we note the state of the parity */
   1014 	raidPtr->parity_good = parity_good;
   1015 
   1016 	return(fatal_error);
   1017 }
   1018 
   1019 int
   1020 rf_add_hot_spare(raidPtr, sparePtr)
   1021 	RF_Raid_t *raidPtr;
   1022 	RF_SingleComponent_t *sparePtr;
   1023 {
   1024 	RF_RaidDisk_t *disks;
   1025 	RF_DiskQueue_t *spareQueues;
   1026 	int ret;
   1027 	unsigned int bs;
   1028 	int spare_number;
   1029 
   1030 	if (raidPtr->numSpare >= RF_MAXSPARE) {
   1031 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
   1032 		return(EINVAL);
   1033 	}
   1034 
   1035 	RF_LOCK_MUTEX(raidPtr->mutex);
   1036 
   1037 	/* the beginning of the spares... */
   1038 	disks = &raidPtr->Disks[0][raidPtr->numCol];
   1039 
   1040 	spare_number = raidPtr->numSpare;
   1041 
   1042 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
   1043 			       &disks[spare_number], 0,
   1044 			       raidPtr->numCol + spare_number);
   1045 
   1046 	if (ret)
   1047 		goto fail;
   1048 	if (disks[spare_number].status != rf_ds_optimal) {
   1049 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
   1050 			     sparePtr->component_name);
   1051 		ret=EINVAL;
   1052 		goto fail;
   1053 	} else {
   1054 		disks[spare_number].status = rf_ds_spare;
   1055 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
   1056 			 disks[spare_number].devname,
   1057 			 (long int) disks[spare_number].numBlocks,
   1058 			 disks[spare_number].blockSize,
   1059 			 (long int) disks[spare_number].numBlocks *
   1060 			 disks[spare_number].blockSize / 1024 / 1024);
   1061 	}
   1062 
   1063 
   1064 	/* check sizes and block sizes on the spare disk */
   1065 	bs = 1 << raidPtr->logBytesPerSector;
   1066 	if (disks[spare_number].blockSize != bs) {
   1067 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1068 		ret = EINVAL;
   1069 		goto fail;
   1070 	}
   1071 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1072 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
   1073 			     disks[spare_number].devname,
   1074 			     disks[spare_number].blockSize,
   1075 			     (long int) raidPtr->sectorsPerDisk);
   1076 		ret = EINVAL;
   1077 		goto fail;
   1078 	} else {
   1079 		if (disks[spare_number].numBlocks >
   1080 		    raidPtr->sectorsPerDisk) {
   1081 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
   1082 				     (long int) raidPtr->sectorsPerDisk);
   1083 
   1084 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1085 		}
   1086 	}
   1087 
   1088 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
   1089 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1090 				 0, raidPtr->numCol + spare_number,
   1091 				 raidPtr->qType,
   1092 				 raidPtr->sectorsPerDisk,
   1093 				 raidPtr->Disks[0][raidPtr->numCol +
   1094 						  spare_number].dev,
   1095 				 raidPtr->maxOutstanding,
   1096 				 &raidPtr->shutdownList,
   1097 				 raidPtr->cleanupList);
   1098 
   1099 
   1100 	raidPtr->numSpare++;
   1101 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1102 	return (0);
   1103 
   1104 fail:
   1105 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1106 	return(ret);
   1107 }
   1108 
   1109 int
   1110 rf_remove_hot_spare(raidPtr,sparePtr)
   1111 	RF_Raid_t *raidPtr;
   1112 	RF_SingleComponent_t *sparePtr;
   1113 {
   1114 	int spare_number;
   1115 
   1116 
   1117 	if (raidPtr->numSpare==0) {
   1118 		printf("No spares to remove!\n");
   1119 		return(EINVAL);
   1120 	}
   1121 
   1122 	spare_number = sparePtr->column;
   1123 
   1124 	return(EINVAL); /* XXX not implemented yet */
   1125 #if 0
   1126 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1127 		return(EINVAL);
   1128 	}
   1129 
   1130 	/* verify that this spare isn't in use... */
   1131 
   1132 
   1133 
   1134 
   1135 	/* it's gone.. */
   1136 
   1137 	raidPtr->numSpare--;
   1138 
   1139 	return(0);
   1140 #endif
   1141 }
   1142 
   1143 
   1144 int
   1145 rf_delete_component(raidPtr,component)
   1146 	RF_Raid_t *raidPtr;
   1147 	RF_SingleComponent_t *component;
   1148 {
   1149 	RF_RaidDisk_t *disks;
   1150 
   1151 	if ((component->row < 0) ||
   1152 	    (component->row >= raidPtr->numRow) ||
   1153 	    (component->column < 0) ||
   1154 	    (component->column >= raidPtr->numCol)) {
   1155 		return(EINVAL);
   1156 	}
   1157 
   1158 	disks = &raidPtr->Disks[component->row][component->column];
   1159 
   1160 	/* 1. This component must be marked as 'failed' */
   1161 
   1162 	return(EINVAL); /* Not implemented yet. */
   1163 }
   1164 
   1165 int
   1166 rf_incorporate_hot_spare(raidPtr,component)
   1167 	RF_Raid_t *raidPtr;
   1168 	RF_SingleComponent_t *component;
   1169 {
   1170 
   1171 	/* Issues here include how to 'move' this in if there is IO
   1172 	   taking place (e.g. component queues and such) */
   1173 
   1174 	return(EINVAL); /* Not implemented yet. */
   1175 }
   1176