Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.15
      1 /*	$NetBSD: rf_disks.c,v 1.15 2000/02/13 04:53:57 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1995 Carnegie-Mellon University.
     40  * All rights reserved.
     41  *
     42  * Author: Mark Holland
     43  *
     44  * Permission to use, copy, modify and distribute this software and
     45  * its documentation is hereby granted, provided that both the copyright
     46  * notice and this permission notice appear in all copies of the
     47  * software, derivative works or modified versions, and any portions
     48  * thereof, and that both notices appear in supporting documentation.
     49  *
     50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     53  *
     54  * Carnegie Mellon requests users of this software to return to
     55  *
     56  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     57  *  School of Computer Science
     58  *  Carnegie Mellon University
     59  *  Pittsburgh PA 15213-3890
     60  *
     61  * any improvements or extensions that they make and grant Carnegie the
     62  * rights to redistribute these changes.
     63  */
     64 
     65 /***************************************************************
     66  * rf_disks.c -- code to perform operations on the actual disks
     67  ***************************************************************/
     68 
     69 #include "rf_types.h"
     70 #include "rf_raid.h"
     71 #include "rf_alloclist.h"
     72 #include "rf_utils.h"
     73 #include "rf_configure.h"
     74 #include "rf_general.h"
     75 #include "rf_options.h"
     76 #include "rf_kintf.h"
     77 #include "rf_netbsd.h"
     78 
     79 #include <sys/types.h>
     80 #include <sys/param.h>
     81 #include <sys/systm.h>
     82 #include <sys/proc.h>
     83 #include <sys/ioctl.h>
     84 #include <sys/fcntl.h>
     85 #include <sys/vnode.h>
     86 
     87 /* XXX these should be in a header file somewhere */
     88 void rf_UnconfigureVnodes( RF_Raid_t * );
     89 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
     90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     91 
     92 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     93 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     94 
     95 /**************************************************************************
     96  *
     97  * initialize the disks comprising the array
     98  *
     99  * We want the spare disks to have regular row,col numbers so that we can
    100  * easily substitue a spare for a failed disk.  But, the driver code assumes
    101  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    102  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    103  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    104  * rest, and put all the spares in it.  This probably needs to get changed
    105  * eventually.
    106  *
    107  **************************************************************************/
    108 
    109 int
    110 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
    111 	RF_ShutdownList_t **listp;
    112 	RF_Raid_t *raidPtr;
    113 	RF_Config_t *cfgPtr;
    114 {
    115 	RF_RaidDisk_t **disks;
    116 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    117 	RF_RowCol_t r, c;
    118 	int bs, ret;
    119 	unsigned i, count, foundone = 0, numFailuresThisRow;
    120 	int force;
    121 
    122 	force = cfgPtr->force;
    123 
    124 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    125 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    126 	if (disks == NULL) {
    127 		ret = ENOMEM;
    128 		goto fail;
    129 	}
    130 	raidPtr->Disks = disks;
    131 
    132 	/* get space for the device-specific stuff... */
    133 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    134 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    135 	    raidPtr->cleanupList);
    136 	if (raidPtr->raid_cinfo == NULL) {
    137 		ret = ENOMEM;
    138 		goto fail;
    139 	}
    140 	for (r = 0; r < raidPtr->numRow; r++) {
    141 		numFailuresThisRow = 0;
    142 		/* We allocate RF_MAXSPARE on the first row so that we
    143 		   have room to do hot-swapping of spares */
    144 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    145 				+ ((r == 0) ? RF_MAXSPARE : 0),
    146 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    147 				raidPtr->cleanupList);
    148 		if (disks[r] == NULL) {
    149 			ret = ENOMEM;
    150 			goto fail;
    151 		}
    152 		/* get more space for device specific stuff.. */
    153 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    154 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    155 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    156 		    raidPtr->cleanupList);
    157 		if (raidPtr->raid_cinfo[r] == NULL) {
    158 			ret = ENOMEM;
    159 			goto fail;
    160 		}
    161 		for (c = 0; c < raidPtr->numCol; c++) {
    162 				ret = rf_ConfigureDisk(raidPtr,
    163 						       &cfgPtr->devnames[r][c][0],
    164 						       &disks[r][c], r, c);
    165 
    166 			if (ret)
    167 				goto fail;
    168 
    169 			if (disks[r][c].status == rf_ds_optimal) {
    170 				raidread_component_label(
    171 					 raidPtr->raid_cinfo[r][c].ci_dev,
    172 					 raidPtr->raid_cinfo[r][c].ci_vp,
    173 					 &raidPtr->raid_cinfo[r][c].ci_label);
    174 			}
    175 
    176 			if (disks[r][c].status != rf_ds_optimal) {
    177 				numFailuresThisRow++;
    178 			} else {
    179 				if (disks[r][c].numBlocks < min_numblks)
    180 					min_numblks = disks[r][c].numBlocks;
    181 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    182 				    r, c, disks[r][c].devname,
    183 				    (long int) disks[r][c].numBlocks,
    184 				    disks[r][c].blockSize,
    185 				    (long int) disks[r][c].numBlocks *
    186 					 disks[r][c].blockSize / 1024 / 1024);
    187 			}
    188 		}
    189 		/* XXX fix for n-fault tolerant */
    190 		/* XXX this should probably check to see how many failures
    191 		   we can handle for this configuration! */
    192 		if (numFailuresThisRow > 0)
    193 			raidPtr->status[r] = rf_rs_degraded;
    194 	}
    195 
    196 	/* all disks must be the same size & have the same block size, bs must
    197 	 * be a power of 2 */
    198 	bs = 0;
    199 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
    200 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
    201 			if (disks[r][c].status == rf_ds_optimal) {
    202 				bs = disks[r][c].blockSize;
    203 				foundone = 1;
    204 			}
    205 		}
    206 	}
    207 	if (!foundone) {
    208 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    209 		ret = EINVAL;
    210 		goto fail;
    211 	}
    212 	for (count = 0, i = 1; i; i <<= 1)
    213 		if (bs & i)
    214 			count++;
    215 	if (count != 1) {
    216 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    217 		ret = EINVAL;
    218 		goto fail;
    219 	}
    220 
    221 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    222 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    223 		if (force != 0) {
    224 			printf("raid%d: Fatal errors being ignored.\n",
    225 			       raidPtr->raidid);
    226 		} else {
    227 			ret = EINVAL;
    228 			goto fail;
    229 		}
    230 	}
    231 
    232 	for (r = 0; r < raidPtr->numRow; r++) {
    233 		for (c = 0; c < raidPtr->numCol; c++) {
    234 			if (disks[r][c].status == rf_ds_optimal) {
    235 				if (disks[r][c].blockSize != bs) {
    236 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
    237 					ret = EINVAL;
    238 					goto fail;
    239 				}
    240 				if (disks[r][c].numBlocks != min_numblks) {
    241 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
    242 					    r, c, (int) min_numblks);
    243 					disks[r][c].numBlocks = min_numblks;
    244 				}
    245 			}
    246 		}
    247 	}
    248 
    249 	raidPtr->sectorsPerDisk = min_numblks;
    250 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    251 	raidPtr->bytesPerSector = bs;
    252 	raidPtr->sectorMask = bs - 1;
    253 	return (0);
    254 
    255 fail:
    256 
    257 	rf_UnconfigureVnodes( raidPtr );
    258 
    259 	return (ret);
    260 }
    261 
    262 
    263 /****************************************************************************
    264  * set up the data structures describing the spare disks in the array
    265  * recall from the above comment that the spare disk descriptors are stored
    266  * in row zero, which is specially expanded to hold them.
    267  ****************************************************************************/
    268 int
    269 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
    270 	RF_ShutdownList_t ** listp;
    271 	RF_Raid_t * raidPtr;
    272 	RF_Config_t * cfgPtr;
    273 {
    274 	int     i, ret;
    275 	unsigned int bs;
    276 	RF_RaidDisk_t *disks;
    277 	int     num_spares_done;
    278 
    279 	num_spares_done = 0;
    280 
    281 	/* The space for the spares should have already been allocated by
    282 	 * ConfigureDisks() */
    283 
    284 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    285 	for (i = 0; i < raidPtr->numSpare; i++) {
    286 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    287 				       &disks[i], 0, raidPtr->numCol + i);
    288 		if (ret)
    289 			goto fail;
    290 		if (disks[i].status != rf_ds_optimal) {
    291 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    292 				     &cfgPtr->spare_names[i][0]);
    293 		} else {
    294 			disks[i].status = rf_ds_spare;	/* change status to
    295 							 * spare */
    296 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    297 			    disks[i].devname,
    298 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    299 			    (long int) disks[i].numBlocks *
    300 				 disks[i].blockSize / 1024 / 1024);
    301 		}
    302 		num_spares_done++;
    303 	}
    304 
    305 	/* check sizes and block sizes on spare disks */
    306 	bs = 1 << raidPtr->logBytesPerSector;
    307 	for (i = 0; i < raidPtr->numSpare; i++) {
    308 		if (disks[i].blockSize != bs) {
    309 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    310 			ret = EINVAL;
    311 			goto fail;
    312 		}
    313 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    314 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    315 				     disks[i].devname, disks[i].blockSize,
    316 				     (long int) raidPtr->sectorsPerDisk);
    317 			ret = EINVAL;
    318 			goto fail;
    319 		} else
    320 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    321 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    322 
    323 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    324 			}
    325 	}
    326 
    327 	return (0);
    328 
    329 fail:
    330 
    331 	/* Release the hold on the main components.  We've failed to allocate
    332 	 * a spare, and since we're failing, we need to free things..
    333 
    334 	 XXX failing to allocate a spare is *not* that big of a deal...
    335 	 We *can* survive without it, if need be, esp. if we get hot
    336 	 adding working.
    337 
    338 	 If we don't fail out here, then we need a way to remove this spare...
    339 	 that should be easier to do here than if we are "live"...
    340 
    341 	 */
    342 
    343 	rf_UnconfigureVnodes( raidPtr );
    344 
    345 	return (ret);
    346 }
    347 
    348 static int
    349 rf_AllocDiskStructures(raidPtr, cfgPtr)
    350 	RF_Raid_t *raidPtr;
    351  	RF_Config_t *cfgPtr;
    352 {
    353 	RF_RaidDisk_t **disks;
    354 	int ret;
    355 	int r;
    356 
    357 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    358 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    359 	if (disks == NULL) {
    360 		ret = ENOMEM;
    361 		goto fail;
    362 	}
    363 	raidPtr->Disks = disks;
    364 	/* get space for the device-specific stuff... */
    365 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    366 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    367 	    raidPtr->cleanupList);
    368 	if (raidPtr->raid_cinfo == NULL) {
    369 		ret = ENOMEM;
    370 		goto fail;
    371 	}
    372 
    373 	for (r = 0; r < raidPtr->numRow; r++) {
    374 		/* We allocate RF_MAXSPARE on the first row so that we
    375 		   have room to do hot-swapping of spares */
    376 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    377 				+ ((r == 0) ? RF_MAXSPARE : 0),
    378 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    379 				raidPtr->cleanupList);
    380 		if (disks[r] == NULL) {
    381 			ret = ENOMEM;
    382 			goto fail;
    383 		}
    384 		/* get more space for device specific stuff.. */
    385 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    386 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    387 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    388 		    raidPtr->cleanupList);
    389 		if (raidPtr->raid_cinfo[r] == NULL) {
    390 			ret = ENOMEM;
    391 			goto fail;
    392 		}
    393 	}
    394 	return(0);
    395 fail:
    396 	rf_UnconfigureVnodes( raidPtr );
    397 
    398 	return(ret);
    399 }
    400 
    401 
    402 /* configure a single disk during auto-configuration at boot */
    403 int
    404 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
    405 	RF_Raid_t *raidPtr;
    406 	RF_Config_t *cfgPtr;
    407 	RF_AutoConfig_t *auto_config;
    408 {
    409 	RF_RaidDisk_t **disks;
    410 	RF_RaidDisk_t *diskPtr;
    411 	RF_RowCol_t r, c;
    412 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    413 	int bs, ret;
    414 	int numFailuresThisRow;
    415 	int force;
    416 	RF_AutoConfig_t *ac;
    417 
    418 #if DEBUG
    419 	printf("Starting autoconfiguration of RAID set...\n");
    420 #endif
    421 	force = cfgPtr->force;
    422 
    423 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    424 	if (ret)
    425 		goto fail;
    426 
    427 	disks = raidPtr->Disks;
    428 
    429 	for (r = 0; r < raidPtr->numRow; r++) {
    430 		numFailuresThisRow = 0;
    431 		for (c = 0; c < raidPtr->numCol; c++) {
    432 			diskPtr = &disks[r][c];
    433 
    434 			/* find this row/col in the autoconfig */
    435 #if DEBUG
    436 			printf("Looking for %d,%d in autoconfig\n",r,c);
    437 #endif
    438 			ac = auto_config;
    439 			while(ac!=NULL) {
    440 				if (ac->clabel==NULL) {
    441 					/* big-time bad news. */
    442 					goto fail;
    443 				}
    444 				if ((ac->clabel->row == r) &&
    445 				    (ac->clabel->column == c)) {
    446 					/* it's this one... */
    447 #if DEBUG
    448 					printf("Found: %s at %d,%d\n",
    449 					       ac->devname,r,c);
    450 #endif
    451 					break;
    452 				}
    453 				ac=ac->next;
    454 			}
    455 
    456 			if (ac!=NULL) {
    457 				/* Found it.  Configure it.. */
    458 				diskPtr->blockSize = ac->clabel->blockSize;
    459 				diskPtr->numBlocks = ac->clabel->numBlocks;
    460 				/* Note: rf_protectedSectors is already
    461 				   factored into numBlocks here */
    462 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
    463 				raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
    464 
    465 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
    466 				       ac->clabel, sizeof(*ac->clabel));
    467 				sprintf(diskPtr->devname, "/dev/%s",
    468 					ac->devname);
    469 				diskPtr->dev = ac->dev;
    470 
    471 				/*
    472 				 * we allow the user to specify that
    473 				 * only a fraction of the disks should
    474 				 * be used this is just for debug: it
    475 				 * speeds up the parity scan
    476 				 */
    477 
    478 				diskPtr->numBlocks = diskPtr->numBlocks *
    479 					rf_sizePercentage / 100;
    480 
    481 				/* XXX these will get set multiple times,
    482 				   but since we're autoconfiguring, they'd
    483 				   better be always the same each time!
    484 				   If not, this is the least of your worries */
    485 
    486 				bs = diskPtr->blockSize;
    487 				min_numblks = diskPtr->numBlocks;
    488 			} else {
    489 				/* Didn't find it!! Component must be dead */
    490 				disks[r][c].status = rf_ds_failed;
    491 				numFailuresThisRow++;
    492 			}
    493 		}
    494 		/* XXX fix for n-fault tolerant */
    495 		/* XXX this should probably check to see how many failures
    496 		   we can handle for this configuration! */
    497 		if (numFailuresThisRow > 0)
    498 			raidPtr->status[r] = rf_rs_degraded;
    499 	}
    500 
    501 	raidPtr->sectorsPerDisk = min_numblks;
    502 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    503 	raidPtr->bytesPerSector = bs;
    504 	raidPtr->sectorMask = bs - 1;
    505 	return (0);
    506 
    507 fail:
    508 
    509 	rf_UnconfigureVnodes( raidPtr );
    510 
    511 	return (ret);
    512 
    513 }
    514 
    515 /* configure a single disk in the array */
    516 int
    517 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
    518 	RF_Raid_t *raidPtr;
    519 	char   *buf;
    520 	RF_RaidDisk_t *diskPtr;
    521 	RF_RowCol_t row;
    522 	RF_RowCol_t col;
    523 {
    524 	char   *p;
    525 	int     retcode;
    526 
    527 	struct partinfo dpart;
    528 	struct vnode *vp;
    529 	struct vattr va;
    530 	struct proc *proc;
    531 	int     error;
    532 
    533 	retcode = 0;
    534 	p = rf_find_non_white(buf);
    535 	if (p[strlen(p) - 1] == '\n') {
    536 		/* strip off the newline */
    537 		p[strlen(p) - 1] = '\0';
    538 	}
    539 	(void) strcpy(diskPtr->devname, p);
    540 
    541 	proc = raidPtr->engine_thread;
    542 
    543 	/* Let's start by claiming the component is fine and well... */
    544 	diskPtr->status = rf_ds_optimal;
    545 
    546 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
    547 	raidPtr->raid_cinfo[row][col].ci_dev = NULL;
    548 
    549 	error = raidlookup(diskPtr->devname, proc, &vp);
    550 	if (error) {
    551 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    552 		if (error == ENXIO) {
    553 			/* the component isn't there... must be dead :-( */
    554 			diskPtr->status = rf_ds_failed;
    555 		} else {
    556 			return (error);
    557 		}
    558 	}
    559 	if (diskPtr->status == rf_ds_optimal) {
    560 
    561 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
    562 			return (error);
    563 		}
    564 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
    565 				  FREAD, proc->p_ucred, proc);
    566 		if (error) {
    567 			return (error);
    568 		}
    569 
    570 		diskPtr->blockSize = dpart.disklab->d_secsize;
    571 
    572 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    573 		diskPtr->partitionSize = dpart.part->p_size;
    574 
    575 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
    576 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
    577 
    578 		diskPtr->dev = va.va_rdev;
    579 
    580 		/* we allow the user to specify that only a fraction of the
    581 		 * disks should be used this is just for debug:  it speeds up
    582 		 * the parity scan */
    583 		diskPtr->numBlocks = diskPtr->numBlocks *
    584 			rf_sizePercentage / 100;
    585 	}
    586 	return (0);
    587 }
    588 
    589 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
    590 				  RF_ComponentLabel_t *);
    591 
    592 static void
    593 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
    594 	RF_Raid_t *raidPtr;
    595 	int row;
    596 	int column;
    597 	char *dev_name;
    598 	RF_ComponentLabel_t *ci_label;
    599 {
    600 
    601 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
    602 	       raidPtr->raidid, dev_name, row, column );
    603 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
    604 	       ci_label->row, ci_label->column,
    605 	       ci_label->num_rows, ci_label->num_columns);
    606 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    607 	       ci_label->version, ci_label->serial_number,
    608 	       ci_label->mod_counter);
    609 	printf("         Clean: %s Status: %d\n",
    610 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    611 }
    612 
    613 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
    614 				  RF_ComponentLabel_t *, int, int );
    615 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
    616 				  serial_number, mod_counter )
    617 	RF_Raid_t *raidPtr;
    618 	int row;
    619 	int column;
    620 	char *dev_name;
    621 	RF_ComponentLabel_t *ci_label;
    622 	int serial_number;
    623 	int mod_counter;
    624 {
    625 	int fatal_error = 0;
    626 
    627 	if (serial_number != ci_label->serial_number) {
    628 		printf("%s has a different serial number: %d %d\n",
    629 		       dev_name, serial_number, ci_label->serial_number);
    630 		fatal_error = 1;
    631 	}
    632 	if (mod_counter != ci_label->mod_counter) {
    633 		printf("%s has a different modfication count: %d %d\n",
    634 		       dev_name, mod_counter, ci_label->mod_counter);
    635 	}
    636 
    637 	if (row != ci_label->row) {
    638 		printf("Row out of alignment for: %s\n", dev_name);
    639 		fatal_error = 1;
    640 	}
    641 	if (column != ci_label->column) {
    642 		printf("Column out of alignment for: %s\n", dev_name);
    643 		fatal_error = 1;
    644 	}
    645 	if (raidPtr->numRow != ci_label->num_rows) {
    646 		printf("Number of rows do not match for: %s\n", dev_name);
    647 		fatal_error = 1;
    648 	}
    649 	if (raidPtr->numCol != ci_label->num_columns) {
    650 		printf("Number of columns do not match for: %s\n", dev_name);
    651 		fatal_error = 1;
    652 	}
    653 	if (ci_label->clean == 0) {
    654 		/* it's not clean, but that's not fatal */
    655 		printf("%s is not clean!\n", dev_name);
    656 	}
    657 	return(fatal_error);
    658 }
    659 
    660 
    661 /*
    662 
    663    rf_CheckLabels() - check all the component labels for consistency.
    664    Return an error if there is anything major amiss.
    665 
    666  */
    667 
    668 int
    669 rf_CheckLabels( raidPtr, cfgPtr )
    670 	RF_Raid_t *raidPtr;
    671 	RF_Config_t *cfgPtr;
    672 {
    673 	int r,c;
    674 	char *dev_name;
    675 	RF_ComponentLabel_t *ci_label;
    676 	int serial_number = 0;
    677 	int mod_number = 0;
    678 	int fatal_error = 0;
    679 	int mod_values[4];
    680 	int mod_count[4];
    681 	int ser_values[4];
    682 	int ser_count[4];
    683 	int num_ser;
    684 	int num_mod;
    685 	int i;
    686 	int found;
    687 	int hosed_row;
    688 	int hosed_column;
    689 	int too_fatal;
    690 	int parity_good;
    691 	int force;
    692 
    693 	hosed_row = -1;
    694 	hosed_column = -1;
    695 	too_fatal = 0;
    696 	force = cfgPtr->force;
    697 
    698 	/*
    699 	   We're going to try to be a little intelligent here.  If one
    700 	   component's label is bogus, and we can identify that it's the
    701 	   *only* one that's gone, we'll mark it as "failed" and allow
    702 	   the configuration to proceed.  This will be the *only* case
    703 	   that we'll proceed if there would be (otherwise) fatal errors.
    704 
    705 	   Basically we simply keep a count of how many components had
    706 	   what serial number.  If all but one agree, we simply mark
    707 	   the disagreeing component as being failed, and allow
    708 	   things to come up "normally".
    709 
    710 	   We do this first for serial numbers, and then for "mod_counter".
    711 
    712 	 */
    713 
    714 	num_ser = 0;
    715 	num_mod = 0;
    716 	for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
    717 		for (c = 0; c < raidPtr->numCol; c++) {
    718 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    719 			found=0;
    720 			for(i=0;i<num_ser;i++) {
    721 				if (ser_values[i] == ci_label->serial_number) {
    722 					ser_count[i]++;
    723 					found=1;
    724 					break;
    725 				}
    726 			}
    727 			if (!found) {
    728 				ser_values[num_ser] = ci_label->serial_number;
    729 				ser_count[num_ser] = 1;
    730 				num_ser++;
    731 				if (num_ser>2) {
    732 					fatal_error = 1;
    733 					break;
    734 				}
    735 			}
    736 			found=0;
    737 			for(i=0;i<num_mod;i++) {
    738 				if (mod_values[i] == ci_label->mod_counter) {
    739 					mod_count[i]++;
    740 					found=1;
    741 					break;
    742 				}
    743 			}
    744 			if (!found) {
    745 			        mod_values[num_mod] = ci_label->mod_counter;
    746 				mod_count[num_mod] = 1;
    747 				num_mod++;
    748 				if (num_mod>2) {
    749 					fatal_error = 1;
    750 					break;
    751 				}
    752 			}
    753 		}
    754 	}
    755 #if DEBUG
    756 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    757 	for(i=0;i<num_ser;i++) {
    758 		printf("%d %d\n", ser_values[i], ser_count[i]);
    759 	}
    760 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    761 	for(i=0;i<num_mod;i++) {
    762 		printf("%d %d\n", mod_values[i], mod_count[i]);
    763 	}
    764 #endif
    765 	serial_number = ser_values[0];
    766 	if (num_ser == 2) {
    767 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    768 			/* Locate the maverick component */
    769 			if (ser_count[1] > ser_count[0]) {
    770 				serial_number = ser_values[1];
    771 			}
    772 			for (r = 0; r < raidPtr->numRow; r++) {
    773 				for (c = 0; c < raidPtr->numCol; c++) {
    774 				ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    775 					if (serial_number !=
    776 					    ci_label->serial_number) {
    777 						hosed_row = r;
    778 						hosed_column = c;
    779 						break;
    780 					}
    781 				}
    782 			}
    783 			printf("Hosed component: %s\n",
    784 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    785 			if (!force) {
    786 				/* we'll fail this component, as if there are
    787 				   other major errors, we arn't forcing things
    788 				   and we'll abort the config anyways */
    789 				raidPtr->Disks[hosed_row][hosed_column].status
    790 					= rf_ds_failed;
    791 				raidPtr->numFailures++;
    792 				raidPtr->status[hosed_row] = rf_rs_degraded;
    793 			}
    794 		} else {
    795 			too_fatal = 1;
    796 		}
    797 		if (cfgPtr->parityConfig == '0') {
    798 			/* We've identified two different serial numbers.
    799 			   RAID 0 can't cope with that, so we'll punt */
    800 			too_fatal = 1;
    801 		}
    802 
    803 	}
    804 
    805 	/* record the serial number for later.  If we bail later, setting
    806 	   this doesn't matter, otherwise we've got the best guess at the
    807 	   correct serial number */
    808 	raidPtr->serial_number = serial_number;
    809 
    810 	mod_number = mod_values[0];
    811 	if (num_mod == 2) {
    812 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    813 			/* Locate the maverick component */
    814 			if (mod_count[1] > mod_count[0]) {
    815 				mod_number = mod_values[1];
    816 			} else if (mod_count[1] < mod_count[0]) {
    817 				mod_number = mod_values[0];
    818 			} else {
    819 				/* counts of different modification values
    820 				   are the same.   Assume greater value is
    821 				   the correct one, all other things
    822 				   considered */
    823 				if (mod_values[0] > mod_values[1]) {
    824 					mod_number = mod_values[0];
    825 				} else {
    826 					mod_number = mod_values[1];
    827 				}
    828 
    829 			}
    830 			for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
    831 				for (c = 0; c < raidPtr->numCol; c++) {
    832 					ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    833 					if (mod_number !=
    834 					    ci_label->mod_counter) {
    835 						if ( ( hosed_row == r ) &&
    836 						     ( hosed_column == c )) {
    837 							/* same one.  Can
    838 							   deal with it.  */
    839 						} else {
    840 							hosed_row = r;
    841 							hosed_column = c;
    842 							if (num_ser != 1) {
    843 								too_fatal = 1;
    844 								break;
    845 							}
    846 						}
    847 					}
    848 				}
    849 			}
    850 			printf("Hosed component: %s\n",
    851 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    852 			if (!force) {
    853 				/* we'll fail this component, as if there are
    854 				   other major errors, we arn't forcing things
    855 				   and we'll abort the config anyways */
    856 				if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
    857 					raidPtr->Disks[hosed_row][hosed_column].status
    858 						= rf_ds_failed;
    859 					raidPtr->numFailures++;
    860 					raidPtr->status[hosed_row] = rf_rs_degraded;
    861 				}
    862 			}
    863 		} else {
    864 			too_fatal = 1;
    865 		}
    866 		if (cfgPtr->parityConfig == '0') {
    867 			/* We've identified two different mod counters.
    868 			   RAID 0 can't cope with that, so we'll punt */
    869 			too_fatal = 1;
    870 		}
    871 	}
    872 
    873 	raidPtr->mod_counter = mod_number;
    874 
    875 	if (too_fatal) {
    876 		/* we've had both a serial number mismatch, and a mod_counter
    877 		   mismatch -- and they involved two different components!!
    878 		   Bail -- make things fail so that the user must force
    879 		   the issue... */
    880 		hosed_row = -1;
    881 		hosed_column = -1;
    882 	}
    883 
    884 	if (num_ser > 2) {
    885 		printf("raid%d: Too many different serial numbers!\n",
    886 		       raidPtr->raidid);
    887 	}
    888 
    889 	if (num_mod > 2) {
    890 		printf("raid%d: Too many different mod counters!\n",
    891 		       raidPtr->raidid);
    892 	}
    893 
    894 	/* we start by assuming the parity will be good, and flee from
    895 	   that notion at the slightest sign of trouble */
    896 
    897 	parity_good = RF_RAID_CLEAN;
    898 	for (r = 0; r < raidPtr->numRow; r++) {
    899 		for (c = 0; c < raidPtr->numCol; c++) {
    900 			dev_name = &cfgPtr->devnames[r][c][0];
    901 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    902 
    903 			if ((r == hosed_row) && (c == hosed_column)) {
    904 				printf("raid%d: Ignoring %s\n",
    905 				       raidPtr->raidid, dev_name);
    906 			} else {
    907 				rf_print_label_status( raidPtr, r, c,
    908 						       dev_name, ci_label );
    909 				if (rf_check_label_vitals( raidPtr, r, c,
    910 							   dev_name, ci_label,
    911 							   serial_number,
    912 							   mod_number )) {
    913 					fatal_error = 1;
    914 				}
    915 				if (ci_label->clean != RF_RAID_CLEAN) {
    916 					parity_good = RF_RAID_DIRTY;
    917 				}
    918 			}
    919 		}
    920 	}
    921 	if (fatal_error) {
    922 		parity_good = RF_RAID_DIRTY;
    923 	}
    924 
    925 	/* we note the state of the parity */
    926 	raidPtr->parity_good = parity_good;
    927 
    928 	return(fatal_error);
    929 }
    930 
    931 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *);
    932 int
    933 rf_add_hot_spare(raidPtr, sparePtr)
    934 	RF_Raid_t *raidPtr;
    935 	RF_SingleComponent_t *sparePtr;
    936 {
    937 	RF_RaidDisk_t *disks;
    938 	RF_DiskQueue_t *spareQueues;
    939 	int ret;
    940 	unsigned int bs;
    941 	int spare_number;
    942 
    943 	printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
    944 	printf("Num col: %d\n",raidPtr->numCol);
    945 	if (raidPtr->numSpare >= RF_MAXSPARE) {
    946 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
    947 		return(EINVAL);
    948 	}
    949 
    950 	RF_LOCK_MUTEX(raidPtr->mutex);
    951 
    952 	/* the beginning of the spares... */
    953 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    954 
    955 	spare_number = raidPtr->numSpare;
    956 
    957 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
    958 			       &disks[spare_number], 0,
    959 			       raidPtr->numCol + spare_number);
    960 
    961 	if (ret)
    962 		goto fail;
    963 	if (disks[spare_number].status != rf_ds_optimal) {
    964 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    965 			     sparePtr->component_name);
    966 		ret=EINVAL;
    967 		goto fail;
    968 	} else {
    969 		disks[spare_number].status = rf_ds_spare;
    970 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
    971 			 disks[spare_number].devname,
    972 			 (long int) disks[spare_number].numBlocks,
    973 			 disks[spare_number].blockSize,
    974 			 (long int) disks[spare_number].numBlocks *
    975 			 disks[spare_number].blockSize / 1024 / 1024);
    976 	}
    977 
    978 
    979 	/* check sizes and block sizes on the spare disk */
    980 	bs = 1 << raidPtr->logBytesPerSector;
    981 	if (disks[spare_number].blockSize != bs) {
    982 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
    983 		ret = EINVAL;
    984 		goto fail;
    985 	}
    986 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
    987 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    988 			     disks[spare_number].devname,
    989 			     disks[spare_number].blockSize,
    990 			     (long int) raidPtr->sectorsPerDisk);
    991 		ret = EINVAL;
    992 		goto fail;
    993 	} else {
    994 		if (disks[spare_number].numBlocks >
    995 		    raidPtr->sectorsPerDisk) {
    996 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
    997 				     (long int) raidPtr->sectorsPerDisk);
    998 
    999 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1000 		}
   1001 	}
   1002 
   1003 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
   1004 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1005 				 0, raidPtr->numCol + spare_number,
   1006 				 raidPtr->Queues[0][0].qPtr, /* XXX */
   1007 				 raidPtr->sectorsPerDisk,
   1008 				 raidPtr->Disks[0][raidPtr->numCol + spare_number].dev,
   1009 				 raidPtr->Queues[0][0].maxOutstanding, /* XXX */
   1010 				 &raidPtr->shutdownList,
   1011 				 raidPtr->cleanupList);
   1012 
   1013 
   1014 	raidPtr->numSpare++;
   1015 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1016 	return (0);
   1017 
   1018 fail:
   1019 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1020 	return(ret);
   1021 }
   1022 
   1023 int
   1024 rf_remove_hot_spare(raidPtr,sparePtr)
   1025 	RF_Raid_t *raidPtr;
   1026 	RF_SingleComponent_t *sparePtr;
   1027 {
   1028 	int spare_number;
   1029 
   1030 
   1031 	if (raidPtr->numSpare==0) {
   1032 		printf("No spares to remove!\n");
   1033 		return(EINVAL);
   1034 	}
   1035 
   1036 	spare_number = sparePtr->column;
   1037 
   1038 	return(EINVAL); /* XXX not implemented yet */
   1039 #if 0
   1040 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1041 		return(EINVAL);
   1042 	}
   1043 
   1044 	/* verify that this spare isn't in use... */
   1045 
   1046 
   1047 
   1048 
   1049 	/* it's gone.. */
   1050 
   1051 	raidPtr->numSpare--;
   1052 
   1053 	return(0);
   1054 #endif
   1055 }
   1056 
   1057 
   1058