Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.20
      1 /*	$NetBSD: rf_disks.c,v 1.20 2000/02/25 19:56:32 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1995 Carnegie-Mellon University.
     40  * All rights reserved.
     41  *
     42  * Author: Mark Holland
     43  *
     44  * Permission to use, copy, modify and distribute this software and
     45  * its documentation is hereby granted, provided that both the copyright
     46  * notice and this permission notice appear in all copies of the
     47  * software, derivative works or modified versions, and any portions
     48  * thereof, and that both notices appear in supporting documentation.
     49  *
     50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     53  *
     54  * Carnegie Mellon requests users of this software to return to
     55  *
     56  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     57  *  School of Computer Science
     58  *  Carnegie Mellon University
     59  *  Pittsburgh PA 15213-3890
     60  *
     61  * any improvements or extensions that they make and grant Carnegie the
     62  * rights to redistribute these changes.
     63  */
     64 
     65 /***************************************************************
     66  * rf_disks.c -- code to perform operations on the actual disks
     67  ***************************************************************/
     68 
     69 #include "rf_types.h"
     70 #include "rf_raid.h"
     71 #include "rf_alloclist.h"
     72 #include "rf_utils.h"
     73 #include "rf_configure.h"
     74 #include "rf_general.h"
     75 #include "rf_options.h"
     76 #include "rf_kintf.h"
     77 #include "rf_netbsd.h"
     78 
     79 #include <sys/types.h>
     80 #include <sys/param.h>
     81 #include <sys/systm.h>
     82 #include <sys/proc.h>
     83 #include <sys/ioctl.h>
     84 #include <sys/fcntl.h>
     85 #include <sys/vnode.h>
     86 
     87 /* XXX these should be in a header file somewhere */
     88 void rf_UnconfigureVnodes( RF_Raid_t * );
     89 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
     90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     91 
     92 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     93 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     94 
     95 /**************************************************************************
     96  *
     97  * initialize the disks comprising the array
     98  *
     99  * We want the spare disks to have regular row,col numbers so that we can
    100  * easily substitue a spare for a failed disk.  But, the driver code assumes
    101  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    102  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    103  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    104  * rest, and put all the spares in it.  This probably needs to get changed
    105  * eventually.
    106  *
    107  **************************************************************************/
    108 
    109 int
    110 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
    111 	RF_ShutdownList_t **listp;
    112 	RF_Raid_t *raidPtr;
    113 	RF_Config_t *cfgPtr;
    114 {
    115 	RF_RaidDisk_t **disks;
    116 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    117 	RF_RowCol_t r, c;
    118 	int bs, ret;
    119 	unsigned i, count, foundone = 0, numFailuresThisRow;
    120 	int force;
    121 
    122 	force = cfgPtr->force;
    123 
    124 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    125 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    126 	if (disks == NULL) {
    127 		ret = ENOMEM;
    128 		goto fail;
    129 	}
    130 	raidPtr->Disks = disks;
    131 
    132 	/* get space for the device-specific stuff... */
    133 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    134 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    135 	    raidPtr->cleanupList);
    136 	if (raidPtr->raid_cinfo == NULL) {
    137 		ret = ENOMEM;
    138 		goto fail;
    139 	}
    140 	for (r = 0; r < raidPtr->numRow; r++) {
    141 		numFailuresThisRow = 0;
    142 		/* We allocate RF_MAXSPARE on the first row so that we
    143 		   have room to do hot-swapping of spares */
    144 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    145 				+ ((r == 0) ? RF_MAXSPARE : 0),
    146 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    147 				raidPtr->cleanupList);
    148 		if (disks[r] == NULL) {
    149 			ret = ENOMEM;
    150 			goto fail;
    151 		}
    152 		/* get more space for device specific stuff.. */
    153 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    154 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    155 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    156 		    raidPtr->cleanupList);
    157 		if (raidPtr->raid_cinfo[r] == NULL) {
    158 			ret = ENOMEM;
    159 			goto fail;
    160 		}
    161 		for (c = 0; c < raidPtr->numCol; c++) {
    162 				ret = rf_ConfigureDisk(raidPtr,
    163 						       &cfgPtr->devnames[r][c][0],
    164 						       &disks[r][c], r, c);
    165 
    166 			if (ret)
    167 				goto fail;
    168 
    169 			if (disks[r][c].status == rf_ds_optimal) {
    170 				raidread_component_label(
    171 					 raidPtr->raid_cinfo[r][c].ci_dev,
    172 					 raidPtr->raid_cinfo[r][c].ci_vp,
    173 					 &raidPtr->raid_cinfo[r][c].ci_label);
    174 			}
    175 
    176 			if (disks[r][c].status != rf_ds_optimal) {
    177 				numFailuresThisRow++;
    178 			} else {
    179 				if (disks[r][c].numBlocks < min_numblks)
    180 					min_numblks = disks[r][c].numBlocks;
    181 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    182 				    r, c, disks[r][c].devname,
    183 				    (long int) disks[r][c].numBlocks,
    184 				    disks[r][c].blockSize,
    185 				    (long int) disks[r][c].numBlocks *
    186 					 disks[r][c].blockSize / 1024 / 1024);
    187 			}
    188 		}
    189 		/* XXX fix for n-fault tolerant */
    190 		/* XXX this should probably check to see how many failures
    191 		   we can handle for this configuration! */
    192 		if (numFailuresThisRow > 0)
    193 			raidPtr->status[r] = rf_rs_degraded;
    194 	}
    195 
    196 	/* all disks must be the same size & have the same block size, bs must
    197 	 * be a power of 2 */
    198 	bs = 0;
    199 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
    200 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
    201 			if (disks[r][c].status == rf_ds_optimal) {
    202 				bs = disks[r][c].blockSize;
    203 				foundone = 1;
    204 			}
    205 		}
    206 	}
    207 	if (!foundone) {
    208 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    209 		ret = EINVAL;
    210 		goto fail;
    211 	}
    212 	for (count = 0, i = 1; i; i <<= 1)
    213 		if (bs & i)
    214 			count++;
    215 	if (count != 1) {
    216 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    217 		ret = EINVAL;
    218 		goto fail;
    219 	}
    220 
    221 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    222 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    223 		if (force != 0) {
    224 			printf("raid%d: Fatal errors being ignored.\n",
    225 			       raidPtr->raidid);
    226 		} else {
    227 			ret = EINVAL;
    228 			goto fail;
    229 		}
    230 	}
    231 
    232 	for (r = 0; r < raidPtr->numRow; r++) {
    233 		for (c = 0; c < raidPtr->numCol; c++) {
    234 			if (disks[r][c].status == rf_ds_optimal) {
    235 				if (disks[r][c].blockSize != bs) {
    236 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
    237 					ret = EINVAL;
    238 					goto fail;
    239 				}
    240 				if (disks[r][c].numBlocks != min_numblks) {
    241 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
    242 					    r, c, (int) min_numblks);
    243 					disks[r][c].numBlocks = min_numblks;
    244 				}
    245 			}
    246 		}
    247 	}
    248 
    249 	raidPtr->sectorsPerDisk = min_numblks;
    250 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    251 	raidPtr->bytesPerSector = bs;
    252 	raidPtr->sectorMask = bs - 1;
    253 	return (0);
    254 
    255 fail:
    256 
    257 	rf_UnconfigureVnodes( raidPtr );
    258 
    259 	return (ret);
    260 }
    261 
    262 
    263 /****************************************************************************
    264  * set up the data structures describing the spare disks in the array
    265  * recall from the above comment that the spare disk descriptors are stored
    266  * in row zero, which is specially expanded to hold them.
    267  ****************************************************************************/
    268 int
    269 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
    270 	RF_ShutdownList_t ** listp;
    271 	RF_Raid_t * raidPtr;
    272 	RF_Config_t * cfgPtr;
    273 {
    274 	int     i, ret;
    275 	unsigned int bs;
    276 	RF_RaidDisk_t *disks;
    277 	int     num_spares_done;
    278 
    279 	num_spares_done = 0;
    280 
    281 	/* The space for the spares should have already been allocated by
    282 	 * ConfigureDisks() */
    283 
    284 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    285 	for (i = 0; i < raidPtr->numSpare; i++) {
    286 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    287 				       &disks[i], 0, raidPtr->numCol + i);
    288 		if (ret)
    289 			goto fail;
    290 		if (disks[i].status != rf_ds_optimal) {
    291 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    292 				     &cfgPtr->spare_names[i][0]);
    293 		} else {
    294 			disks[i].status = rf_ds_spare;	/* change status to
    295 							 * spare */
    296 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    297 			    disks[i].devname,
    298 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    299 			    (long int) disks[i].numBlocks *
    300 				 disks[i].blockSize / 1024 / 1024);
    301 		}
    302 		num_spares_done++;
    303 	}
    304 
    305 	/* check sizes and block sizes on spare disks */
    306 	bs = 1 << raidPtr->logBytesPerSector;
    307 	for (i = 0; i < raidPtr->numSpare; i++) {
    308 		if (disks[i].blockSize != bs) {
    309 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    310 			ret = EINVAL;
    311 			goto fail;
    312 		}
    313 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    314 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    315 				     disks[i].devname, disks[i].blockSize,
    316 				     (long int) raidPtr->sectorsPerDisk);
    317 			ret = EINVAL;
    318 			goto fail;
    319 		} else
    320 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    321 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    322 
    323 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    324 			}
    325 	}
    326 
    327 	return (0);
    328 
    329 fail:
    330 
    331 	/* Release the hold on the main components.  We've failed to allocate
    332 	 * a spare, and since we're failing, we need to free things..
    333 
    334 	 XXX failing to allocate a spare is *not* that big of a deal...
    335 	 We *can* survive without it, if need be, esp. if we get hot
    336 	 adding working.
    337 
    338 	 If we don't fail out here, then we need a way to remove this spare...
    339 	 that should be easier to do here than if we are "live"...
    340 
    341 	 */
    342 
    343 	rf_UnconfigureVnodes( raidPtr );
    344 
    345 	return (ret);
    346 }
    347 
    348 static int
    349 rf_AllocDiskStructures(raidPtr, cfgPtr)
    350 	RF_Raid_t *raidPtr;
    351  	RF_Config_t *cfgPtr;
    352 {
    353 	RF_RaidDisk_t **disks;
    354 	int ret;
    355 	int r;
    356 
    357 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    358 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    359 	if (disks == NULL) {
    360 		ret = ENOMEM;
    361 		goto fail;
    362 	}
    363 	raidPtr->Disks = disks;
    364 	/* get space for the device-specific stuff... */
    365 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    366 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    367 	    raidPtr->cleanupList);
    368 	if (raidPtr->raid_cinfo == NULL) {
    369 		ret = ENOMEM;
    370 		goto fail;
    371 	}
    372 
    373 	for (r = 0; r < raidPtr->numRow; r++) {
    374 		/* We allocate RF_MAXSPARE on the first row so that we
    375 		   have room to do hot-swapping of spares */
    376 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    377 				+ ((r == 0) ? RF_MAXSPARE : 0),
    378 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    379 				raidPtr->cleanupList);
    380 		if (disks[r] == NULL) {
    381 			ret = ENOMEM;
    382 			goto fail;
    383 		}
    384 		/* get more space for device specific stuff.. */
    385 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    386 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    387 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    388 		    raidPtr->cleanupList);
    389 		if (raidPtr->raid_cinfo[r] == NULL) {
    390 			ret = ENOMEM;
    391 			goto fail;
    392 		}
    393 	}
    394 	return(0);
    395 fail:
    396 	rf_UnconfigureVnodes( raidPtr );
    397 
    398 	return(ret);
    399 }
    400 
    401 
    402 /* configure a single disk during auto-configuration at boot */
    403 int
    404 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
    405 	RF_Raid_t *raidPtr;
    406 	RF_Config_t *cfgPtr;
    407 	RF_AutoConfig_t *auto_config;
    408 {
    409 	RF_RaidDisk_t **disks;
    410 	RF_RaidDisk_t *diskPtr;
    411 	RF_RowCol_t r, c;
    412 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    413 	int bs, ret;
    414 	int numFailuresThisRow;
    415 	int force;
    416 	RF_AutoConfig_t *ac;
    417 	int parity_good;
    418 	int mod_counter;
    419 
    420 #if DEBUG
    421 	printf("Starting autoconfiguration of RAID set...\n");
    422 #endif
    423 	force = cfgPtr->force;
    424 
    425 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    426 	if (ret)
    427 		goto fail;
    428 
    429 	disks = raidPtr->Disks;
    430 
    431 	/* assume the parity will be fine.. */
    432 	parity_good = RF_RAID_CLEAN;
    433 
    434 	/* Check for mod_counters that are too low */
    435 	mod_counter = -1;
    436 	ac = auto_config;
    437 	while(ac!=NULL) {
    438 		if (ac->clabel->mod_counter > mod_counter) {
    439 			mod_counter = ac->clabel->mod_counter;
    440 		}
    441 		ac = ac->next;
    442 	}
    443 	if (mod_counter == -1) {
    444 		/* mod_counters were all negative!?!?!?
    445 		   Ok, we can deal with that. */
    446 #if 0
    447 		ac = auto_config;
    448 		while(ac!=NULL) {
    449 			if (ac->clabel->mod_counter > mod_counter) {
    450 				mod_counter = ac->clabel->mod_counter;
    451 			}
    452 			ac = ac->next;
    453 		}
    454 #endif
    455 	}
    456 
    457 	for (r = 0; r < raidPtr->numRow; r++) {
    458 		numFailuresThisRow = 0;
    459 		for (c = 0; c < raidPtr->numCol; c++) {
    460 			diskPtr = &disks[r][c];
    461 
    462 			/* find this row/col in the autoconfig */
    463 #if DEBUG
    464 			printf("Looking for %d,%d in autoconfig\n",r,c);
    465 #endif
    466 			ac = auto_config;
    467 			while(ac!=NULL) {
    468 				if (ac->clabel==NULL) {
    469 					/* big-time bad news. */
    470 					goto fail;
    471 				}
    472 				if ((ac->clabel->row == r) &&
    473 				    (ac->clabel->column == c)) {
    474 					/* it's this one... */
    475 #if DEBUG
    476 					printf("Found: %s at %d,%d\n",
    477 					       ac->devname,r,c);
    478 #endif
    479 
    480 					break;
    481 				}
    482 				ac=ac->next;
    483 			}
    484 
    485 			if (ac!=NULL) {
    486 				/* Found it.  Configure it.. */
    487 				diskPtr->blockSize = ac->clabel->blockSize;
    488 				diskPtr->numBlocks = ac->clabel->numBlocks;
    489 				/* Note: rf_protectedSectors is already
    490 				   factored into numBlocks here */
    491 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
    492 				raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
    493 
    494 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
    495 				       ac->clabel, sizeof(*ac->clabel));
    496 				sprintf(diskPtr->devname, "/dev/%s",
    497 					ac->devname);
    498 
    499 				/* note the fact that this component was
    500 				   autoconfigured.  You'll need this info
    501 				   later.  Trust me :) */
    502 				diskPtr->auto_configured = 1;
    503 				diskPtr->dev = ac->dev;
    504 
    505 				/*
    506 				 * we allow the user to specify that
    507 				 * only a fraction of the disks should
    508 				 * be used this is just for debug: it
    509 				 * speeds up the parity scan
    510 				 */
    511 
    512 				diskPtr->numBlocks = diskPtr->numBlocks *
    513 					rf_sizePercentage / 100;
    514 
    515 				/* XXX these will get set multiple times,
    516 				   but since we're autoconfiguring, they'd
    517 				   better be always the same each time!
    518 				   If not, this is the least of your worries */
    519 
    520 				bs = diskPtr->blockSize;
    521 				min_numblks = diskPtr->numBlocks;
    522 
    523 				/* this gets done multiple times, but that's
    524 				   fine -- the serial number will be the same
    525 				   for all components, guaranteed */
    526 				raidPtr->serial_number =
    527 					ac->clabel->serial_number;
    528 				/* check the last time the label
    529 				   was modified */
    530 				if (ac->clabel->mod_counter !=
    531 				    mod_counter) {
    532 					/* Even though we've filled in all
    533 					   of the above, we don't trust
    534 					   this component since it's
    535 					   modification counter is not
    536 					   in sync with the rest, and we really
    537 					   consider it to be failed.  */
    538 					disks[r][c].status = rf_ds_failed;
    539 					numFailuresThisRow++;
    540 				} else {
    541 					if (ac->clabel->clean !=
    542 					    RF_RAID_CLEAN) {
    543 						parity_good = RF_RAID_DIRTY;
    544 					}
    545 				}
    546 			} else {
    547 				/* Didn't find it at all!!
    548 				   Component must really be dead */
    549 				disks[r][c].status = rf_ds_failed;
    550 				numFailuresThisRow++;
    551 			}
    552 		}
    553 		/* XXX fix for n-fault tolerant */
    554 		/* XXX this should probably check to see how many failures
    555 		   we can handle for this configuration! */
    556 		if (numFailuresThisRow > 0)
    557 			raidPtr->status[r] = rf_rs_degraded;
    558 	}
    559 
    560 	raidPtr->mod_counter = mod_counter;
    561 
    562 	/* note the state of the parity, if any */
    563 	raidPtr->parity_good = parity_good;
    564 	raidPtr->sectorsPerDisk = min_numblks;
    565 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    566 	raidPtr->bytesPerSector = bs;
    567 	raidPtr->sectorMask = bs - 1;
    568 	return (0);
    569 
    570 fail:
    571 
    572 	rf_UnconfigureVnodes( raidPtr );
    573 
    574 	return (ret);
    575 
    576 }
    577 
    578 /* configure a single disk in the array */
    579 int
    580 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
    581 	RF_Raid_t *raidPtr;
    582 	char   *buf;
    583 	RF_RaidDisk_t *diskPtr;
    584 	RF_RowCol_t row;
    585 	RF_RowCol_t col;
    586 {
    587 	char   *p;
    588 	int     retcode;
    589 
    590 	struct partinfo dpart;
    591 	struct vnode *vp;
    592 	struct vattr va;
    593 	struct proc *proc;
    594 	int     error;
    595 
    596 	retcode = 0;
    597 	p = rf_find_non_white(buf);
    598 	if (p[strlen(p) - 1] == '\n') {
    599 		/* strip off the newline */
    600 		p[strlen(p) - 1] = '\0';
    601 	}
    602 	(void) strcpy(diskPtr->devname, p);
    603 
    604 	proc = raidPtr->engine_thread;
    605 
    606 	/* Let's start by claiming the component is fine and well... */
    607 	diskPtr->status = rf_ds_optimal;
    608 
    609 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
    610 	raidPtr->raid_cinfo[row][col].ci_dev = NULL;
    611 
    612 	error = raidlookup(diskPtr->devname, proc, &vp);
    613 	if (error) {
    614 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    615 		if (error == ENXIO) {
    616 			/* the component isn't there... must be dead :-( */
    617 			diskPtr->status = rf_ds_failed;
    618 		} else {
    619 			return (error);
    620 		}
    621 	}
    622 	if (diskPtr->status == rf_ds_optimal) {
    623 
    624 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
    625 			return (error);
    626 		}
    627 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
    628 				  FREAD, proc->p_ucred, proc);
    629 		if (error) {
    630 			return (error);
    631 		}
    632 
    633 		diskPtr->blockSize = dpart.disklab->d_secsize;
    634 
    635 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    636 		diskPtr->partitionSize = dpart.part->p_size;
    637 
    638 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
    639 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
    640 
    641 		/* This component was not automatically configured */
    642 		diskPtr->auto_configured = 0;
    643 		diskPtr->dev = va.va_rdev;
    644 
    645 		/* we allow the user to specify that only a fraction of the
    646 		 * disks should be used this is just for debug:  it speeds up
    647 		 * the parity scan */
    648 		diskPtr->numBlocks = diskPtr->numBlocks *
    649 			rf_sizePercentage / 100;
    650 	}
    651 	return (0);
    652 }
    653 
    654 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
    655 				  RF_ComponentLabel_t *);
    656 
    657 static void
    658 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
    659 	RF_Raid_t *raidPtr;
    660 	int row;
    661 	int column;
    662 	char *dev_name;
    663 	RF_ComponentLabel_t *ci_label;
    664 {
    665 
    666 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
    667 	       raidPtr->raidid, dev_name, row, column );
    668 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
    669 	       ci_label->row, ci_label->column,
    670 	       ci_label->num_rows, ci_label->num_columns);
    671 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    672 	       ci_label->version, ci_label->serial_number,
    673 	       ci_label->mod_counter);
    674 	printf("         Clean: %s Status: %d\n",
    675 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    676 }
    677 
    678 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
    679 				  RF_ComponentLabel_t *, int, int );
    680 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
    681 				  serial_number, mod_counter )
    682 	RF_Raid_t *raidPtr;
    683 	int row;
    684 	int column;
    685 	char *dev_name;
    686 	RF_ComponentLabel_t *ci_label;
    687 	int serial_number;
    688 	int mod_counter;
    689 {
    690 	int fatal_error = 0;
    691 
    692 	if (serial_number != ci_label->serial_number) {
    693 		printf("%s has a different serial number: %d %d\n",
    694 		       dev_name, serial_number, ci_label->serial_number);
    695 		fatal_error = 1;
    696 	}
    697 	if (mod_counter != ci_label->mod_counter) {
    698 		printf("%s has a different modfication count: %d %d\n",
    699 		       dev_name, mod_counter, ci_label->mod_counter);
    700 	}
    701 
    702 	if (row != ci_label->row) {
    703 		printf("Row out of alignment for: %s\n", dev_name);
    704 		fatal_error = 1;
    705 	}
    706 	if (column != ci_label->column) {
    707 		printf("Column out of alignment for: %s\n", dev_name);
    708 		fatal_error = 1;
    709 	}
    710 	if (raidPtr->numRow != ci_label->num_rows) {
    711 		printf("Number of rows do not match for: %s\n", dev_name);
    712 		fatal_error = 1;
    713 	}
    714 	if (raidPtr->numCol != ci_label->num_columns) {
    715 		printf("Number of columns do not match for: %s\n", dev_name);
    716 		fatal_error = 1;
    717 	}
    718 	if (ci_label->clean == 0) {
    719 		/* it's not clean, but that's not fatal */
    720 		printf("%s is not clean!\n", dev_name);
    721 	}
    722 	return(fatal_error);
    723 }
    724 
    725 
    726 /*
    727 
    728    rf_CheckLabels() - check all the component labels for consistency.
    729    Return an error if there is anything major amiss.
    730 
    731  */
    732 
    733 int
    734 rf_CheckLabels( raidPtr, cfgPtr )
    735 	RF_Raid_t *raidPtr;
    736 	RF_Config_t *cfgPtr;
    737 {
    738 	int r,c;
    739 	char *dev_name;
    740 	RF_ComponentLabel_t *ci_label;
    741 	int serial_number = 0;
    742 	int mod_number = 0;
    743 	int fatal_error = 0;
    744 	int mod_values[4];
    745 	int mod_count[4];
    746 	int ser_values[4];
    747 	int ser_count[4];
    748 	int num_ser;
    749 	int num_mod;
    750 	int i;
    751 	int found;
    752 	int hosed_row;
    753 	int hosed_column;
    754 	int too_fatal;
    755 	int parity_good;
    756 	int force;
    757 
    758 	hosed_row = -1;
    759 	hosed_column = -1;
    760 	too_fatal = 0;
    761 	force = cfgPtr->force;
    762 
    763 	/*
    764 	   We're going to try to be a little intelligent here.  If one
    765 	   component's label is bogus, and we can identify that it's the
    766 	   *only* one that's gone, we'll mark it as "failed" and allow
    767 	   the configuration to proceed.  This will be the *only* case
    768 	   that we'll proceed if there would be (otherwise) fatal errors.
    769 
    770 	   Basically we simply keep a count of how many components had
    771 	   what serial number.  If all but one agree, we simply mark
    772 	   the disagreeing component as being failed, and allow
    773 	   things to come up "normally".
    774 
    775 	   We do this first for serial numbers, and then for "mod_counter".
    776 
    777 	 */
    778 
    779 	num_ser = 0;
    780 	num_mod = 0;
    781 	for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
    782 		for (c = 0; c < raidPtr->numCol; c++) {
    783 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    784 			found=0;
    785 			for(i=0;i<num_ser;i++) {
    786 				if (ser_values[i] == ci_label->serial_number) {
    787 					ser_count[i]++;
    788 					found=1;
    789 					break;
    790 				}
    791 			}
    792 			if (!found) {
    793 				ser_values[num_ser] = ci_label->serial_number;
    794 				ser_count[num_ser] = 1;
    795 				num_ser++;
    796 				if (num_ser>2) {
    797 					fatal_error = 1;
    798 					break;
    799 				}
    800 			}
    801 			found=0;
    802 			for(i=0;i<num_mod;i++) {
    803 				if (mod_values[i] == ci_label->mod_counter) {
    804 					mod_count[i]++;
    805 					found=1;
    806 					break;
    807 				}
    808 			}
    809 			if (!found) {
    810 			        mod_values[num_mod] = ci_label->mod_counter;
    811 				mod_count[num_mod] = 1;
    812 				num_mod++;
    813 				if (num_mod>2) {
    814 					fatal_error = 1;
    815 					break;
    816 				}
    817 			}
    818 		}
    819 	}
    820 #if DEBUG
    821 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    822 	for(i=0;i<num_ser;i++) {
    823 		printf("%d %d\n", ser_values[i], ser_count[i]);
    824 	}
    825 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    826 	for(i=0;i<num_mod;i++) {
    827 		printf("%d %d\n", mod_values[i], mod_count[i]);
    828 	}
    829 #endif
    830 	serial_number = ser_values[0];
    831 	if (num_ser == 2) {
    832 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    833 			/* Locate the maverick component */
    834 			if (ser_count[1] > ser_count[0]) {
    835 				serial_number = ser_values[1];
    836 			}
    837 			for (r = 0; r < raidPtr->numRow; r++) {
    838 				for (c = 0; c < raidPtr->numCol; c++) {
    839 				ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    840 					if (serial_number !=
    841 					    ci_label->serial_number) {
    842 						hosed_row = r;
    843 						hosed_column = c;
    844 						break;
    845 					}
    846 				}
    847 			}
    848 			printf("Hosed component: %s\n",
    849 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    850 			if (!force) {
    851 				/* we'll fail this component, as if there are
    852 				   other major errors, we arn't forcing things
    853 				   and we'll abort the config anyways */
    854 				raidPtr->Disks[hosed_row][hosed_column].status
    855 					= rf_ds_failed;
    856 				raidPtr->numFailures++;
    857 				raidPtr->status[hosed_row] = rf_rs_degraded;
    858 			}
    859 		} else {
    860 			too_fatal = 1;
    861 		}
    862 		if (cfgPtr->parityConfig == '0') {
    863 			/* We've identified two different serial numbers.
    864 			   RAID 0 can't cope with that, so we'll punt */
    865 			too_fatal = 1;
    866 		}
    867 
    868 	}
    869 
    870 	/* record the serial number for later.  If we bail later, setting
    871 	   this doesn't matter, otherwise we've got the best guess at the
    872 	   correct serial number */
    873 	raidPtr->serial_number = serial_number;
    874 
    875 	mod_number = mod_values[0];
    876 	if (num_mod == 2) {
    877 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    878 			/* Locate the maverick component */
    879 			if (mod_count[1] > mod_count[0]) {
    880 				mod_number = mod_values[1];
    881 			} else if (mod_count[1] < mod_count[0]) {
    882 				mod_number = mod_values[0];
    883 			} else {
    884 				/* counts of different modification values
    885 				   are the same.   Assume greater value is
    886 				   the correct one, all other things
    887 				   considered */
    888 				if (mod_values[0] > mod_values[1]) {
    889 					mod_number = mod_values[0];
    890 				} else {
    891 					mod_number = mod_values[1];
    892 				}
    893 
    894 			}
    895 			for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
    896 				for (c = 0; c < raidPtr->numCol; c++) {
    897 					ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    898 					if (mod_number !=
    899 					    ci_label->mod_counter) {
    900 						if ( ( hosed_row == r ) &&
    901 						     ( hosed_column == c )) {
    902 							/* same one.  Can
    903 							   deal with it.  */
    904 						} else {
    905 							hosed_row = r;
    906 							hosed_column = c;
    907 							if (num_ser != 1) {
    908 								too_fatal = 1;
    909 								break;
    910 							}
    911 						}
    912 					}
    913 				}
    914 			}
    915 			printf("Hosed component: %s\n",
    916 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    917 			if (!force) {
    918 				/* we'll fail this component, as if there are
    919 				   other major errors, we arn't forcing things
    920 				   and we'll abort the config anyways */
    921 				if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
    922 					raidPtr->Disks[hosed_row][hosed_column].status
    923 						= rf_ds_failed;
    924 					raidPtr->numFailures++;
    925 					raidPtr->status[hosed_row] = rf_rs_degraded;
    926 				}
    927 			}
    928 		} else {
    929 			too_fatal = 1;
    930 		}
    931 		if (cfgPtr->parityConfig == '0') {
    932 			/* We've identified two different mod counters.
    933 			   RAID 0 can't cope with that, so we'll punt */
    934 			too_fatal = 1;
    935 		}
    936 	}
    937 
    938 	raidPtr->mod_counter = mod_number;
    939 
    940 	if (too_fatal) {
    941 		/* we've had both a serial number mismatch, and a mod_counter
    942 		   mismatch -- and they involved two different components!!
    943 		   Bail -- make things fail so that the user must force
    944 		   the issue... */
    945 		hosed_row = -1;
    946 		hosed_column = -1;
    947 	}
    948 
    949 	if (num_ser > 2) {
    950 		printf("raid%d: Too many different serial numbers!\n",
    951 		       raidPtr->raidid);
    952 	}
    953 
    954 	if (num_mod > 2) {
    955 		printf("raid%d: Too many different mod counters!\n",
    956 		       raidPtr->raidid);
    957 	}
    958 
    959 	/* we start by assuming the parity will be good, and flee from
    960 	   that notion at the slightest sign of trouble */
    961 
    962 	parity_good = RF_RAID_CLEAN;
    963 	for (r = 0; r < raidPtr->numRow; r++) {
    964 		for (c = 0; c < raidPtr->numCol; c++) {
    965 			dev_name = &cfgPtr->devnames[r][c][0];
    966 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    967 
    968 			if ((r == hosed_row) && (c == hosed_column)) {
    969 				printf("raid%d: Ignoring %s\n",
    970 				       raidPtr->raidid, dev_name);
    971 			} else {
    972 				rf_print_label_status( raidPtr, r, c,
    973 						       dev_name, ci_label );
    974 				if (rf_check_label_vitals( raidPtr, r, c,
    975 							   dev_name, ci_label,
    976 							   serial_number,
    977 							   mod_number )) {
    978 					fatal_error = 1;
    979 				}
    980 				if (ci_label->clean != RF_RAID_CLEAN) {
    981 					parity_good = RF_RAID_DIRTY;
    982 				}
    983 			}
    984 		}
    985 	}
    986 	if (fatal_error) {
    987 		parity_good = RF_RAID_DIRTY;
    988 	}
    989 
    990 	/* we note the state of the parity */
    991 	raidPtr->parity_good = parity_good;
    992 
    993 	return(fatal_error);
    994 }
    995 
    996 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *);
    997 int
    998 rf_add_hot_spare(raidPtr, sparePtr)
    999 	RF_Raid_t *raidPtr;
   1000 	RF_SingleComponent_t *sparePtr;
   1001 {
   1002 	RF_RaidDisk_t *disks;
   1003 	RF_DiskQueue_t *spareQueues;
   1004 	int ret;
   1005 	unsigned int bs;
   1006 	int spare_number;
   1007 
   1008 	printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
   1009 	printf("Num col: %d\n",raidPtr->numCol);
   1010 	if (raidPtr->numSpare >= RF_MAXSPARE) {
   1011 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
   1012 		return(EINVAL);
   1013 	}
   1014 
   1015 	RF_LOCK_MUTEX(raidPtr->mutex);
   1016 
   1017 	/* the beginning of the spares... */
   1018 	disks = &raidPtr->Disks[0][raidPtr->numCol];
   1019 
   1020 	spare_number = raidPtr->numSpare;
   1021 
   1022 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
   1023 			       &disks[spare_number], 0,
   1024 			       raidPtr->numCol + spare_number);
   1025 
   1026 	if (ret)
   1027 		goto fail;
   1028 	if (disks[spare_number].status != rf_ds_optimal) {
   1029 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
   1030 			     sparePtr->component_name);
   1031 		ret=EINVAL;
   1032 		goto fail;
   1033 	} else {
   1034 		disks[spare_number].status = rf_ds_spare;
   1035 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
   1036 			 disks[spare_number].devname,
   1037 			 (long int) disks[spare_number].numBlocks,
   1038 			 disks[spare_number].blockSize,
   1039 			 (long int) disks[spare_number].numBlocks *
   1040 			 disks[spare_number].blockSize / 1024 / 1024);
   1041 	}
   1042 
   1043 
   1044 	/* check sizes and block sizes on the spare disk */
   1045 	bs = 1 << raidPtr->logBytesPerSector;
   1046 	if (disks[spare_number].blockSize != bs) {
   1047 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1048 		ret = EINVAL;
   1049 		goto fail;
   1050 	}
   1051 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1052 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
   1053 			     disks[spare_number].devname,
   1054 			     disks[spare_number].blockSize,
   1055 			     (long int) raidPtr->sectorsPerDisk);
   1056 		ret = EINVAL;
   1057 		goto fail;
   1058 	} else {
   1059 		if (disks[spare_number].numBlocks >
   1060 		    raidPtr->sectorsPerDisk) {
   1061 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
   1062 				     (long int) raidPtr->sectorsPerDisk);
   1063 
   1064 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1065 		}
   1066 	}
   1067 
   1068 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
   1069 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1070 				 0, raidPtr->numCol + spare_number,
   1071 				 raidPtr->qType,
   1072 				 raidPtr->sectorsPerDisk,
   1073 				 raidPtr->Disks[0][raidPtr->numCol +
   1074 						  spare_number].dev,
   1075 				 raidPtr->maxOutstanding,
   1076 				 &raidPtr->shutdownList,
   1077 				 raidPtr->cleanupList);
   1078 
   1079 
   1080 	raidPtr->numSpare++;
   1081 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1082 	return (0);
   1083 
   1084 fail:
   1085 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1086 	return(ret);
   1087 }
   1088 
   1089 int
   1090 rf_remove_hot_spare(raidPtr,sparePtr)
   1091 	RF_Raid_t *raidPtr;
   1092 	RF_SingleComponent_t *sparePtr;
   1093 {
   1094 	int spare_number;
   1095 
   1096 
   1097 	if (raidPtr->numSpare==0) {
   1098 		printf("No spares to remove!\n");
   1099 		return(EINVAL);
   1100 	}
   1101 
   1102 	spare_number = sparePtr->column;
   1103 
   1104 	return(EINVAL); /* XXX not implemented yet */
   1105 #if 0
   1106 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1107 		return(EINVAL);
   1108 	}
   1109 
   1110 	/* verify that this spare isn't in use... */
   1111 
   1112 
   1113 
   1114 
   1115 	/* it's gone.. */
   1116 
   1117 	raidPtr->numSpare--;
   1118 
   1119 	return(0);
   1120 #endif
   1121 }
   1122 
   1123 
   1124