Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.43
      1 /*	$NetBSD: rf_disks.c,v 1.43 2003/06/28 14:21:42 darrenr Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1995 Carnegie-Mellon University.
     40  * All rights reserved.
     41  *
     42  * Author: Mark Holland
     43  *
     44  * Permission to use, copy, modify and distribute this software and
     45  * its documentation is hereby granted, provided that both the copyright
     46  * notice and this permission notice appear in all copies of the
     47  * software, derivative works or modified versions, and any portions
     48  * thereof, and that both notices appear in supporting documentation.
     49  *
     50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     53  *
     54  * Carnegie Mellon requests users of this software to return to
     55  *
     56  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     57  *  School of Computer Science
     58  *  Carnegie Mellon University
     59  *  Pittsburgh PA 15213-3890
     60  *
     61  * any improvements or extensions that they make and grant Carnegie the
     62  * rights to redistribute these changes.
     63  */
     64 
     65 /***************************************************************
     66  * rf_disks.c -- code to perform operations on the actual disks
     67  ***************************************************************/
     68 
     69 #include <sys/cdefs.h>
     70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.43 2003/06/28 14:21:42 darrenr Exp $");
     71 
     72 #include <dev/raidframe/raidframevar.h>
     73 
     74 #include "rf_raid.h"
     75 #include "rf_alloclist.h"
     76 #include "rf_utils.h"
     77 #include "rf_general.h"
     78 #include "rf_options.h"
     79 #include "rf_kintf.h"
     80 #include "rf_netbsd.h"
     81 
     82 #include <sys/param.h>
     83 #include <sys/systm.h>
     84 #include <sys/proc.h>
     85 #include <sys/ioctl.h>
     86 #include <sys/fcntl.h>
     87 #include <sys/vnode.h>
     88 
     89 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     90 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
     91 				  RF_ComponentLabel_t *);
     92 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     93 				  RF_ComponentLabel_t *, int, int );
     94 
     95 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     96 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     97 
     98 /**************************************************************************
     99  *
    100  * initialize the disks comprising the array
    101  *
    102  * We want the spare disks to have regular row,col numbers so that we can
    103  * easily substitue a spare for a failed disk.  But, the driver code assumes
    104  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    105  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    106  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    107  * rest, and put all the spares in it.  This probably needs to get changed
    108  * eventually.
    109  *
    110  **************************************************************************/
    111 
    112 int
    113 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
    114 	RF_ShutdownList_t **listp;
    115 	RF_Raid_t *raidPtr;
    116 	RF_Config_t *cfgPtr;
    117 {
    118 	RF_RaidDisk_t **disks;
    119 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    120 	RF_RowCol_t r, c;
    121 	int bs, ret;
    122 	unsigned i, count, foundone = 0, numFailuresThisRow;
    123 	int force;
    124 
    125 	force = cfgPtr->force;
    126 
    127 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    128 	if (ret)
    129 		goto fail;
    130 
    131 	disks = raidPtr->Disks;
    132 
    133 	for (r = 0; r < raidPtr->numRow; r++) {
    134 		numFailuresThisRow = 0;
    135 		for (c = 0; c < raidPtr->numCol; c++) {
    136 			ret = rf_ConfigureDisk(raidPtr,
    137 					       &cfgPtr->devnames[r][c][0],
    138 					       &disks[r][c], r, c);
    139 
    140 			if (ret)
    141 				goto fail;
    142 
    143 			if (disks[r][c].status == rf_ds_optimal) {
    144 				raidread_component_label(
    145 					 raidPtr->raid_cinfo[r][c].ci_dev,
    146 					 raidPtr->raid_cinfo[r][c].ci_vp,
    147 					 &raidPtr->raid_cinfo[r][c].ci_label);
    148 			}
    149 
    150 			if (disks[r][c].status != rf_ds_optimal) {
    151 				numFailuresThisRow++;
    152 			} else {
    153 				if (disks[r][c].numBlocks < min_numblks)
    154 					min_numblks = disks[r][c].numBlocks;
    155 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    156 				    r, c, disks[r][c].devname,
    157 				    (long int) disks[r][c].numBlocks,
    158 				    disks[r][c].blockSize,
    159 				    (long int) disks[r][c].numBlocks *
    160 					 disks[r][c].blockSize / 1024 / 1024);
    161 			}
    162 		}
    163 		/* XXX fix for n-fault tolerant */
    164 		/* XXX this should probably check to see how many failures
    165 		   we can handle for this configuration! */
    166 		if (numFailuresThisRow > 0)
    167 			raidPtr->status[r] = rf_rs_degraded;
    168 	}
    169 
    170 	/* all disks must be the same size & have the same block size, bs must
    171 	 * be a power of 2 */
    172 	bs = 0;
    173 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
    174 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
    175 			if (disks[r][c].status == rf_ds_optimal) {
    176 				bs = disks[r][c].blockSize;
    177 				foundone = 1;
    178 			}
    179 		}
    180 	}
    181 	if (!foundone) {
    182 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    183 		ret = EINVAL;
    184 		goto fail;
    185 	}
    186 	for (count = 0, i = 1; i; i <<= 1)
    187 		if (bs & i)
    188 			count++;
    189 	if (count != 1) {
    190 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    191 		ret = EINVAL;
    192 		goto fail;
    193 	}
    194 
    195 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    196 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    197 		if (force != 0) {
    198 			printf("raid%d: Fatal errors being ignored.\n",
    199 			       raidPtr->raidid);
    200 		} else {
    201 			ret = EINVAL;
    202 			goto fail;
    203 		}
    204 	}
    205 
    206 	for (r = 0; r < raidPtr->numRow; r++) {
    207 		for (c = 0; c < raidPtr->numCol; c++) {
    208 			if (disks[r][c].status == rf_ds_optimal) {
    209 				if (disks[r][c].blockSize != bs) {
    210 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
    211 					ret = EINVAL;
    212 					goto fail;
    213 				}
    214 				if (disks[r][c].numBlocks != min_numblks) {
    215 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
    216 					    r, c, (int) min_numblks);
    217 					disks[r][c].numBlocks = min_numblks;
    218 				}
    219 			}
    220 		}
    221 	}
    222 
    223 	raidPtr->sectorsPerDisk = min_numblks;
    224 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    225 	raidPtr->bytesPerSector = bs;
    226 	raidPtr->sectorMask = bs - 1;
    227 	return (0);
    228 
    229 fail:
    230 
    231 	rf_UnconfigureVnodes( raidPtr );
    232 
    233 	return (ret);
    234 }
    235 
    236 
    237 /****************************************************************************
    238  * set up the data structures describing the spare disks in the array
    239  * recall from the above comment that the spare disk descriptors are stored
    240  * in row zero, which is specially expanded to hold them.
    241  ****************************************************************************/
    242 int
    243 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
    244 	RF_ShutdownList_t ** listp;
    245 	RF_Raid_t * raidPtr;
    246 	RF_Config_t * cfgPtr;
    247 {
    248 	int     i, ret;
    249 	unsigned int bs;
    250 	RF_RaidDisk_t *disks;
    251 	int     num_spares_done;
    252 
    253 	num_spares_done = 0;
    254 
    255 	/* The space for the spares should have already been allocated by
    256 	 * ConfigureDisks() */
    257 
    258 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    259 	for (i = 0; i < raidPtr->numSpare; i++) {
    260 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    261 				       &disks[i], 0, raidPtr->numCol + i);
    262 		if (ret)
    263 			goto fail;
    264 		if (disks[i].status != rf_ds_optimal) {
    265 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    266 				     &cfgPtr->spare_names[i][0]);
    267 		} else {
    268 			disks[i].status = rf_ds_spare;	/* change status to
    269 							 * spare */
    270 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    271 			    disks[i].devname,
    272 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    273 			    (long int) disks[i].numBlocks *
    274 				 disks[i].blockSize / 1024 / 1024);
    275 		}
    276 		num_spares_done++;
    277 	}
    278 
    279 	/* check sizes and block sizes on spare disks */
    280 	bs = 1 << raidPtr->logBytesPerSector;
    281 	for (i = 0; i < raidPtr->numSpare; i++) {
    282 		if (disks[i].blockSize != bs) {
    283 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    284 			ret = EINVAL;
    285 			goto fail;
    286 		}
    287 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    288 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    289 				     disks[i].devname, disks[i].blockSize,
    290 				     (long int) raidPtr->sectorsPerDisk);
    291 			ret = EINVAL;
    292 			goto fail;
    293 		} else
    294 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    295 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    296 
    297 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    298 			}
    299 	}
    300 
    301 	return (0);
    302 
    303 fail:
    304 
    305 	/* Release the hold on the main components.  We've failed to allocate
    306 	 * a spare, and since we're failing, we need to free things..
    307 
    308 	 XXX failing to allocate a spare is *not* that big of a deal...
    309 	 We *can* survive without it, if need be, esp. if we get hot
    310 	 adding working.
    311 
    312 	 If we don't fail out here, then we need a way to remove this spare...
    313 	 that should be easier to do here than if we are "live"...
    314 
    315 	 */
    316 
    317 	rf_UnconfigureVnodes( raidPtr );
    318 
    319 	return (ret);
    320 }
    321 
    322 static int
    323 rf_AllocDiskStructures(raidPtr, cfgPtr)
    324 	RF_Raid_t *raidPtr;
    325  	RF_Config_t *cfgPtr;
    326 {
    327 	RF_RaidDisk_t **disks;
    328 	int ret;
    329 	int r;
    330 
    331 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    332 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    333 	if (disks == NULL) {
    334 		ret = ENOMEM;
    335 		goto fail;
    336 	}
    337 	raidPtr->Disks = disks;
    338 	/* get space for the device-specific stuff... */
    339 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    340 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    341 	    raidPtr->cleanupList);
    342 	if (raidPtr->raid_cinfo == NULL) {
    343 		ret = ENOMEM;
    344 		goto fail;
    345 	}
    346 
    347 	for (r = 0; r < raidPtr->numRow; r++) {
    348 		/* We allocate RF_MAXSPARE on the first row so that we
    349 		   have room to do hot-swapping of spares */
    350 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    351 				+ ((r == 0) ? RF_MAXSPARE : 0),
    352 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    353 				raidPtr->cleanupList);
    354 		if (disks[r] == NULL) {
    355 			ret = ENOMEM;
    356 			goto fail;
    357 		}
    358 		/* get more space for device specific stuff.. */
    359 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    360 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    361 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    362 		    raidPtr->cleanupList);
    363 		if (raidPtr->raid_cinfo[r] == NULL) {
    364 			ret = ENOMEM;
    365 			goto fail;
    366 		}
    367 	}
    368 	return(0);
    369 fail:
    370 	rf_UnconfigureVnodes( raidPtr );
    371 
    372 	return(ret);
    373 }
    374 
    375 
    376 /* configure a single disk during auto-configuration at boot */
    377 int
    378 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
    379 	RF_Raid_t *raidPtr;
    380 	RF_Config_t *cfgPtr;
    381 	RF_AutoConfig_t *auto_config;
    382 {
    383 	RF_RaidDisk_t **disks;
    384 	RF_RaidDisk_t *diskPtr;
    385 	RF_RowCol_t r, c;
    386 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    387 	int bs, ret;
    388 	int numFailuresThisRow;
    389 	RF_AutoConfig_t *ac;
    390 	int parity_good;
    391 	int mod_counter;
    392 	int mod_counter_found;
    393 
    394 #if DEBUG
    395 	printf("Starting autoconfiguration of RAID set...\n");
    396 #endif
    397 
    398 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    399 	if (ret)
    400 		goto fail;
    401 
    402 	disks = raidPtr->Disks;
    403 
    404 	/* assume the parity will be fine.. */
    405 	parity_good = RF_RAID_CLEAN;
    406 
    407 	/* Check for mod_counters that are too low */
    408 	mod_counter_found = 0;
    409 	mod_counter = 0;
    410 	ac = auto_config;
    411 	while(ac!=NULL) {
    412 		if (mod_counter_found==0) {
    413 			mod_counter = ac->clabel->mod_counter;
    414 			mod_counter_found = 1;
    415 		} else {
    416 			if (ac->clabel->mod_counter > mod_counter) {
    417 				mod_counter = ac->clabel->mod_counter;
    418 			}
    419 		}
    420 		ac->flag = 0; /* clear the general purpose flag */
    421 		ac = ac->next;
    422 	}
    423 
    424 	bs = 0;
    425 	for (r = 0; r < raidPtr->numRow; r++) {
    426 		numFailuresThisRow = 0;
    427 		for (c = 0; c < raidPtr->numCol; c++) {
    428 			diskPtr = &disks[r][c];
    429 
    430 			/* find this row/col in the autoconfig */
    431 #if DEBUG
    432 			printf("Looking for %d,%d in autoconfig\n",r,c);
    433 #endif
    434 			ac = auto_config;
    435 			while(ac!=NULL) {
    436 				if (ac->clabel==NULL) {
    437 					/* big-time bad news. */
    438 					goto fail;
    439 				}
    440 				if ((ac->clabel->row == r) &&
    441 				    (ac->clabel->column == c) &&
    442 				    (ac->clabel->mod_counter == mod_counter)) {
    443 					/* it's this one... */
    444 					/* flag it as 'used', so we don't
    445 					   free it later. */
    446 					ac->flag = 1;
    447 #if DEBUG
    448 					printf("Found: %s at %d,%d\n",
    449 					       ac->devname,r,c);
    450 #endif
    451 
    452 					break;
    453 				}
    454 				ac=ac->next;
    455 			}
    456 
    457 			if (ac==NULL) {
    458 				/* we didn't find an exact match with a
    459 				   correct mod_counter above... can we
    460 				   find one with an incorrect mod_counter
    461 				   to use instead?  (this one, if we find
    462 				   it, will be marked as failed once the
    463 				   set configures)
    464 				*/
    465 
    466 				ac = auto_config;
    467 				while(ac!=NULL) {
    468 					if (ac->clabel==NULL) {
    469 						/* big-time bad news. */
    470 						goto fail;
    471 					}
    472 					if ((ac->clabel->row == r) &&
    473 					    (ac->clabel->column == c)) {
    474 						/* it's this one...
    475 						   flag it as 'used', so we
    476 						   don't free it later. */
    477 						ac->flag = 1;
    478 #if DEBUG
    479 						printf("Found(low mod_counter): %s at %d,%d\n",
    480 						       ac->devname,r,c);
    481 #endif
    482 
    483 						break;
    484 					}
    485 					ac=ac->next;
    486 				}
    487 			}
    488 
    489 
    490 
    491 			if (ac!=NULL) {
    492 				/* Found it.  Configure it.. */
    493 				diskPtr->blockSize = ac->clabel->blockSize;
    494 				diskPtr->numBlocks = ac->clabel->numBlocks;
    495 				/* Note: rf_protectedSectors is already
    496 				   factored into numBlocks here */
    497 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
    498 				raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
    499 
    500 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
    501 				       ac->clabel, sizeof(*ac->clabel));
    502 				sprintf(diskPtr->devname, "/dev/%s",
    503 					ac->devname);
    504 
    505 				/* note the fact that this component was
    506 				   autoconfigured.  You'll need this info
    507 				   later.  Trust me :) */
    508 				diskPtr->auto_configured = 1;
    509 				diskPtr->dev = ac->dev;
    510 
    511 				/*
    512 				 * we allow the user to specify that
    513 				 * only a fraction of the disks should
    514 				 * be used this is just for debug: it
    515 				 * speeds up the parity scan
    516 				 */
    517 
    518 				diskPtr->numBlocks = diskPtr->numBlocks *
    519 					rf_sizePercentage / 100;
    520 
    521 				/* XXX these will get set multiple times,
    522 				   but since we're autoconfiguring, they'd
    523 				   better be always the same each time!
    524 				   If not, this is the least of your worries */
    525 
    526 				bs = diskPtr->blockSize;
    527 				min_numblks = diskPtr->numBlocks;
    528 
    529 				/* this gets done multiple times, but that's
    530 				   fine -- the serial number will be the same
    531 				   for all components, guaranteed */
    532 				raidPtr->serial_number =
    533 					ac->clabel->serial_number;
    534 				/* check the last time the label
    535 				   was modified */
    536 				if (ac->clabel->mod_counter !=
    537 				    mod_counter) {
    538 					/* Even though we've filled in all
    539 					   of the above, we don't trust
    540 					   this component since it's
    541 					   modification counter is not
    542 					   in sync with the rest, and we really
    543 					   consider it to be failed.  */
    544 					disks[r][c].status = rf_ds_failed;
    545 					numFailuresThisRow++;
    546 				} else {
    547 					if (ac->clabel->clean !=
    548 					    RF_RAID_CLEAN) {
    549 						parity_good = RF_RAID_DIRTY;
    550 					}
    551 				}
    552 			} else {
    553 				/* Didn't find it at all!!
    554 				   Component must really be dead */
    555 				disks[r][c].status = rf_ds_failed;
    556 				sprintf(disks[r][c].devname,"component%d",
    557 					r * raidPtr->numCol + c);
    558 				numFailuresThisRow++;
    559 			}
    560 		}
    561 		/* XXX fix for n-fault tolerant */
    562 		/* XXX this should probably check to see how many failures
    563 		   we can handle for this configuration! */
    564 		if (numFailuresThisRow > 0)
    565 			raidPtr->status[r] = rf_rs_degraded;
    566 	}
    567 
    568 	/* close the device for the ones that didn't get used */
    569 
    570 	ac = auto_config;
    571 	while(ac!=NULL) {
    572 		if (ac->flag == 0) {
    573 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
    574 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
    575 			vput(ac->vp);
    576 			ac->vp = NULL;
    577 #if DEBUG
    578 			printf("Released %s from auto-config set.\n",
    579 			       ac->devname);
    580 #endif
    581 		}
    582 		ac = ac->next;
    583 	}
    584 
    585 	raidPtr->mod_counter = mod_counter;
    586 
    587 	/* note the state of the parity, if any */
    588 	raidPtr->parity_good = parity_good;
    589 	raidPtr->sectorsPerDisk = min_numblks;
    590 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    591 	raidPtr->bytesPerSector = bs;
    592 	raidPtr->sectorMask = bs - 1;
    593 	return (0);
    594 
    595 fail:
    596 
    597 	rf_UnconfigureVnodes( raidPtr );
    598 
    599 	return (ret);
    600 
    601 }
    602 
    603 /* configure a single disk in the array */
    604 int
    605 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
    606 	RF_Raid_t *raidPtr;
    607 	char   *buf;
    608 	RF_RaidDisk_t *diskPtr;
    609 	RF_RowCol_t row;
    610 	RF_RowCol_t col;
    611 {
    612 	char   *p;
    613 	struct partinfo dpart;
    614 	struct vnode *vp;
    615 	struct vattr va;
    616 	struct lwp *l;
    617 	int     error;
    618 
    619 	p = rf_find_non_white(buf);
    620 	if (p[strlen(p) - 1] == '\n') {
    621 		/* strip off the newline */
    622 		p[strlen(p) - 1] = '\0';
    623 	}
    624 	(void) strcpy(diskPtr->devname, p);
    625 
    626 	l = LIST_FIRST(&raidPtr->engine_thread->p_lwps);
    627 
    628 	/* Let's start by claiming the component is fine and well... */
    629 	diskPtr->status = rf_ds_optimal;
    630 
    631 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
    632 	raidPtr->raid_cinfo[row][col].ci_dev = NULL;
    633 
    634 	error = raidlookup(diskPtr->devname, l, &vp);
    635 	if (error) {
    636 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    637 		if (error == ENXIO) {
    638 			/* the component isn't there... must be dead :-( */
    639 			diskPtr->status = rf_ds_failed;
    640 		} else {
    641 			return (error);
    642 		}
    643 	}
    644 	if (diskPtr->status == rf_ds_optimal) {
    645 
    646 		if ((error = VOP_GETATTR(vp, &va,
    647 		    l->l_proc->p_ucred, l)) != 0) {
    648 			return (error);
    649 		}
    650 		error = VOP_IOCTL(vp, DIOCGPART, &dpart,
    651 				  FREAD, l->l_proc->p_ucred, l);
    652 		if (error) {
    653 			return (error);
    654 		}
    655 
    656 		diskPtr->blockSize = dpart.disklab->d_secsize;
    657 
    658 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    659 		diskPtr->partitionSize = dpart.part->p_size;
    660 
    661 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
    662 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
    663 
    664 		/* This component was not automatically configured */
    665 		diskPtr->auto_configured = 0;
    666 		diskPtr->dev = va.va_rdev;
    667 
    668 		/* we allow the user to specify that only a fraction of the
    669 		 * disks should be used this is just for debug:  it speeds up
    670 		 * the parity scan */
    671 		diskPtr->numBlocks = diskPtr->numBlocks *
    672 			rf_sizePercentage / 100;
    673 	}
    674 	return (0);
    675 }
    676 
    677 static void
    678 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
    679 	RF_Raid_t *raidPtr;
    680 	int row;
    681 	int column;
    682 	char *dev_name;
    683 	RF_ComponentLabel_t *ci_label;
    684 {
    685 
    686 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
    687 	       raidPtr->raidid, dev_name, row, column );
    688 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
    689 	       ci_label->row, ci_label->column,
    690 	       ci_label->num_rows, ci_label->num_columns);
    691 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    692 	       ci_label->version, ci_label->serial_number,
    693 	       ci_label->mod_counter);
    694 	printf("         Clean: %s Status: %d\n",
    695 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    696 }
    697 
    698 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
    699 				  serial_number, mod_counter )
    700 	RF_Raid_t *raidPtr;
    701 	int row;
    702 	int column;
    703 	char *dev_name;
    704 	RF_ComponentLabel_t *ci_label;
    705 	int serial_number;
    706 	int mod_counter;
    707 {
    708 	int fatal_error = 0;
    709 
    710 	if (serial_number != ci_label->serial_number) {
    711 		printf("%s has a different serial number: %d %d\n",
    712 		       dev_name, serial_number, ci_label->serial_number);
    713 		fatal_error = 1;
    714 	}
    715 	if (mod_counter != ci_label->mod_counter) {
    716 		printf("%s has a different modfication count: %d %d\n",
    717 		       dev_name, mod_counter, ci_label->mod_counter);
    718 	}
    719 
    720 	if (row != ci_label->row) {
    721 		printf("Row out of alignment for: %s\n", dev_name);
    722 		fatal_error = 1;
    723 	}
    724 	if (column != ci_label->column) {
    725 		printf("Column out of alignment for: %s\n", dev_name);
    726 		fatal_error = 1;
    727 	}
    728 	if (raidPtr->numRow != ci_label->num_rows) {
    729 		printf("Number of rows do not match for: %s\n", dev_name);
    730 		fatal_error = 1;
    731 	}
    732 	if (raidPtr->numCol != ci_label->num_columns) {
    733 		printf("Number of columns do not match for: %s\n", dev_name);
    734 		fatal_error = 1;
    735 	}
    736 	if (ci_label->clean == 0) {
    737 		/* it's not clean, but that's not fatal */
    738 		printf("%s is not clean!\n", dev_name);
    739 	}
    740 	return(fatal_error);
    741 }
    742 
    743 
    744 /*
    745 
    746    rf_CheckLabels() - check all the component labels for consistency.
    747    Return an error if there is anything major amiss.
    748 
    749  */
    750 
    751 int
    752 rf_CheckLabels( raidPtr, cfgPtr )
    753 	RF_Raid_t *raidPtr;
    754 	RF_Config_t *cfgPtr;
    755 {
    756 	int r,c;
    757 	char *dev_name;
    758 	RF_ComponentLabel_t *ci_label;
    759 	int serial_number = 0;
    760 	int mod_number = 0;
    761 	int fatal_error = 0;
    762 	int mod_values[4];
    763 	int mod_count[4];
    764 	int ser_values[4];
    765 	int ser_count[4];
    766 	int num_ser;
    767 	int num_mod;
    768 	int i;
    769 	int found;
    770 	int hosed_row;
    771 	int hosed_column;
    772 	int too_fatal;
    773 	int parity_good;
    774 	int force;
    775 
    776 	hosed_row = -1;
    777 	hosed_column = -1;
    778 	too_fatal = 0;
    779 	force = cfgPtr->force;
    780 
    781 	/*
    782 	   We're going to try to be a little intelligent here.  If one
    783 	   component's label is bogus, and we can identify that it's the
    784 	   *only* one that's gone, we'll mark it as "failed" and allow
    785 	   the configuration to proceed.  This will be the *only* case
    786 	   that we'll proceed if there would be (otherwise) fatal errors.
    787 
    788 	   Basically we simply keep a count of how many components had
    789 	   what serial number.  If all but one agree, we simply mark
    790 	   the disagreeing component as being failed, and allow
    791 	   things to come up "normally".
    792 
    793 	   We do this first for serial numbers, and then for "mod_counter".
    794 
    795 	 */
    796 
    797 	num_ser = 0;
    798 	num_mod = 0;
    799 	for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
    800 		for (c = 0; c < raidPtr->numCol; c++) {
    801 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    802 			found=0;
    803 			for(i=0;i<num_ser;i++) {
    804 				if (ser_values[i] == ci_label->serial_number) {
    805 					ser_count[i]++;
    806 					found=1;
    807 					break;
    808 				}
    809 			}
    810 			if (!found) {
    811 				ser_values[num_ser] = ci_label->serial_number;
    812 				ser_count[num_ser] = 1;
    813 				num_ser++;
    814 				if (num_ser>2) {
    815 					fatal_error = 1;
    816 					break;
    817 				}
    818 			}
    819 			found=0;
    820 			for(i=0;i<num_mod;i++) {
    821 				if (mod_values[i] == ci_label->mod_counter) {
    822 					mod_count[i]++;
    823 					found=1;
    824 					break;
    825 				}
    826 			}
    827 			if (!found) {
    828 			        mod_values[num_mod] = ci_label->mod_counter;
    829 				mod_count[num_mod] = 1;
    830 				num_mod++;
    831 				if (num_mod>2) {
    832 					fatal_error = 1;
    833 					break;
    834 				}
    835 			}
    836 		}
    837 	}
    838 #if DEBUG
    839 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    840 	for(i=0;i<num_ser;i++) {
    841 		printf("%d %d\n", ser_values[i], ser_count[i]);
    842 	}
    843 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    844 	for(i=0;i<num_mod;i++) {
    845 		printf("%d %d\n", mod_values[i], mod_count[i]);
    846 	}
    847 #endif
    848 	serial_number = ser_values[0];
    849 	if (num_ser == 2) {
    850 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    851 			/* Locate the maverick component */
    852 			if (ser_count[1] > ser_count[0]) {
    853 				serial_number = ser_values[1];
    854 			}
    855 			for (r = 0; r < raidPtr->numRow; r++) {
    856 				for (c = 0; c < raidPtr->numCol; c++) {
    857 				ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    858 					if (serial_number !=
    859 					    ci_label->serial_number) {
    860 						hosed_row = r;
    861 						hosed_column = c;
    862 						break;
    863 					}
    864 				}
    865 			}
    866 			printf("Hosed component: %s\n",
    867 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    868 			if (!force) {
    869 				/* we'll fail this component, as if there are
    870 				   other major errors, we arn't forcing things
    871 				   and we'll abort the config anyways */
    872 				raidPtr->Disks[hosed_row][hosed_column].status
    873 					= rf_ds_failed;
    874 				raidPtr->numFailures++;
    875 				raidPtr->status[hosed_row] = rf_rs_degraded;
    876 			}
    877 		} else {
    878 			too_fatal = 1;
    879 		}
    880 		if (cfgPtr->parityConfig == '0') {
    881 			/* We've identified two different serial numbers.
    882 			   RAID 0 can't cope with that, so we'll punt */
    883 			too_fatal = 1;
    884 		}
    885 
    886 	}
    887 
    888 	/* record the serial number for later.  If we bail later, setting
    889 	   this doesn't matter, otherwise we've got the best guess at the
    890 	   correct serial number */
    891 	raidPtr->serial_number = serial_number;
    892 
    893 	mod_number = mod_values[0];
    894 	if (num_mod == 2) {
    895 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    896 			/* Locate the maverick component */
    897 			if (mod_count[1] > mod_count[0]) {
    898 				mod_number = mod_values[1];
    899 			} else if (mod_count[1] < mod_count[0]) {
    900 				mod_number = mod_values[0];
    901 			} else {
    902 				/* counts of different modification values
    903 				   are the same.   Assume greater value is
    904 				   the correct one, all other things
    905 				   considered */
    906 				if (mod_values[0] > mod_values[1]) {
    907 					mod_number = mod_values[0];
    908 				} else {
    909 					mod_number = mod_values[1];
    910 				}
    911 
    912 			}
    913 			for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
    914 				for (c = 0; c < raidPtr->numCol; c++) {
    915 					ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    916 					if (mod_number !=
    917 					    ci_label->mod_counter) {
    918 						if ( ( hosed_row == r ) &&
    919 						     ( hosed_column == c )) {
    920 							/* same one.  Can
    921 							   deal with it.  */
    922 						} else {
    923 							hosed_row = r;
    924 							hosed_column = c;
    925 							if (num_ser != 1) {
    926 								too_fatal = 1;
    927 								break;
    928 							}
    929 						}
    930 					}
    931 				}
    932 			}
    933 			printf("Hosed component: %s\n",
    934 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    935 			if (!force) {
    936 				/* we'll fail this component, as if there are
    937 				   other major errors, we arn't forcing things
    938 				   and we'll abort the config anyways */
    939 				if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
    940 					raidPtr->Disks[hosed_row][hosed_column].status
    941 						= rf_ds_failed;
    942 					raidPtr->numFailures++;
    943 					raidPtr->status[hosed_row] = rf_rs_degraded;
    944 				}
    945 			}
    946 		} else {
    947 			too_fatal = 1;
    948 		}
    949 		if (cfgPtr->parityConfig == '0') {
    950 			/* We've identified two different mod counters.
    951 			   RAID 0 can't cope with that, so we'll punt */
    952 			too_fatal = 1;
    953 		}
    954 	}
    955 
    956 	raidPtr->mod_counter = mod_number;
    957 
    958 	if (too_fatal) {
    959 		/* we've had both a serial number mismatch, and a mod_counter
    960 		   mismatch -- and they involved two different components!!
    961 		   Bail -- make things fail so that the user must force
    962 		   the issue... */
    963 		hosed_row = -1;
    964 		hosed_column = -1;
    965 	}
    966 
    967 	if (num_ser > 2) {
    968 		printf("raid%d: Too many different serial numbers!\n",
    969 		       raidPtr->raidid);
    970 	}
    971 
    972 	if (num_mod > 2) {
    973 		printf("raid%d: Too many different mod counters!\n",
    974 		       raidPtr->raidid);
    975 	}
    976 
    977 	/* we start by assuming the parity will be good, and flee from
    978 	   that notion at the slightest sign of trouble */
    979 
    980 	parity_good = RF_RAID_CLEAN;
    981 	for (r = 0; r < raidPtr->numRow; r++) {
    982 		for (c = 0; c < raidPtr->numCol; c++) {
    983 			dev_name = &cfgPtr->devnames[r][c][0];
    984 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    985 
    986 			if ((r == hosed_row) && (c == hosed_column)) {
    987 				printf("raid%d: Ignoring %s\n",
    988 				       raidPtr->raidid, dev_name);
    989 			} else {
    990 				rf_print_label_status( raidPtr, r, c,
    991 						       dev_name, ci_label );
    992 				if (rf_check_label_vitals( raidPtr, r, c,
    993 							   dev_name, ci_label,
    994 							   serial_number,
    995 							   mod_number )) {
    996 					fatal_error = 1;
    997 				}
    998 				if (ci_label->clean != RF_RAID_CLEAN) {
    999 					parity_good = RF_RAID_DIRTY;
   1000 				}
   1001 			}
   1002 		}
   1003 	}
   1004 	if (fatal_error) {
   1005 		parity_good = RF_RAID_DIRTY;
   1006 	}
   1007 
   1008 	/* we note the state of the parity */
   1009 	raidPtr->parity_good = parity_good;
   1010 
   1011 	return(fatal_error);
   1012 }
   1013 
   1014 int
   1015 rf_add_hot_spare(raidPtr, sparePtr)
   1016 	RF_Raid_t *raidPtr;
   1017 	RF_SingleComponent_t *sparePtr;
   1018 {
   1019 	RF_RaidDisk_t *disks;
   1020 	RF_DiskQueue_t *spareQueues;
   1021 	int ret;
   1022 	unsigned int bs;
   1023 	int spare_number;
   1024 
   1025 	ret=0;
   1026 
   1027 	if (raidPtr->numSpare >= RF_MAXSPARE) {
   1028 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
   1029 		return(EINVAL);
   1030 	}
   1031 
   1032 	RF_LOCK_MUTEX(raidPtr->mutex);
   1033 	while (raidPtr->adding_hot_spare==1) {
   1034 		ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
   1035 			&(raidPtr->mutex));
   1036 	}
   1037 	raidPtr->adding_hot_spare=1;
   1038 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1039 
   1040 	/* the beginning of the spares... */
   1041 	disks = &raidPtr->Disks[0][raidPtr->numCol];
   1042 
   1043 	spare_number = raidPtr->numSpare;
   1044 
   1045 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
   1046 			       &disks[spare_number], 0,
   1047 			       raidPtr->numCol + spare_number);
   1048 
   1049 	if (ret)
   1050 		goto fail;
   1051 	if (disks[spare_number].status != rf_ds_optimal) {
   1052 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
   1053 			     sparePtr->component_name);
   1054 		rf_close_component(raidPtr, raidPtr->raid_cinfo[0][raidPtr->numCol+spare_number].ci_vp, 0);
   1055 		ret=EINVAL;
   1056 		goto fail;
   1057 	} else {
   1058 		disks[spare_number].status = rf_ds_spare;
   1059 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
   1060 			 disks[spare_number].devname,
   1061 			 (long int) disks[spare_number].numBlocks,
   1062 			 disks[spare_number].blockSize,
   1063 			 (long int) disks[spare_number].numBlocks *
   1064 			 disks[spare_number].blockSize / 1024 / 1024);
   1065 	}
   1066 
   1067 
   1068 	/* check sizes and block sizes on the spare disk */
   1069 	bs = 1 << raidPtr->logBytesPerSector;
   1070 	if (disks[spare_number].blockSize != bs) {
   1071 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1072 		rf_close_component(raidPtr, raidPtr->raid_cinfo[0][raidPtr->numCol+spare_number].ci_vp, 0);
   1073 		ret = EINVAL;
   1074 		goto fail;
   1075 	}
   1076 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1077 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
   1078 			     disks[spare_number].devname,
   1079 			     disks[spare_number].blockSize,
   1080 			     (long int) raidPtr->sectorsPerDisk);
   1081 		rf_close_component(raidPtr, raidPtr->raid_cinfo[0][raidPtr->numCol+spare_number].ci_vp, 0);
   1082 		ret = EINVAL;
   1083 		goto fail;
   1084 	} else {
   1085 		if (disks[spare_number].numBlocks >
   1086 		    raidPtr->sectorsPerDisk) {
   1087 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
   1088 				     (long int) raidPtr->sectorsPerDisk);
   1089 
   1090 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1091 		}
   1092 	}
   1093 
   1094 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
   1095 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1096 				 0, raidPtr->numCol + spare_number,
   1097 				 raidPtr->qType,
   1098 				 raidPtr->sectorsPerDisk,
   1099 				 raidPtr->Disks[0][raidPtr->numCol +
   1100 						  spare_number].dev,
   1101 				 raidPtr->maxOutstanding,
   1102 				 &raidPtr->shutdownList,
   1103 				 raidPtr->cleanupList);
   1104 
   1105 	RF_LOCK_MUTEX(raidPtr->mutex);
   1106 	raidPtr->numSpare++;
   1107 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1108 
   1109 fail:
   1110 	RF_LOCK_MUTEX(raidPtr->mutex);
   1111 	raidPtr->adding_hot_spare=0;
   1112 	wakeup(&(raidPtr->adding_hot_spare));
   1113 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1114 
   1115 	return(ret);
   1116 }
   1117 
   1118 int
   1119 rf_remove_hot_spare(raidPtr,sparePtr)
   1120 	RF_Raid_t *raidPtr;
   1121 	RF_SingleComponent_t *sparePtr;
   1122 {
   1123 	int spare_number;
   1124 
   1125 
   1126 	if (raidPtr->numSpare==0) {
   1127 		printf("No spares to remove!\n");
   1128 		return(EINVAL);
   1129 	}
   1130 
   1131 	spare_number = sparePtr->column;
   1132 
   1133 	return(EINVAL); /* XXX not implemented yet */
   1134 #if 0
   1135 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1136 		return(EINVAL);
   1137 	}
   1138 
   1139 	/* verify that this spare isn't in use... */
   1140 
   1141 
   1142 
   1143 
   1144 	/* it's gone.. */
   1145 
   1146 	raidPtr->numSpare--;
   1147 
   1148 	return(0);
   1149 #endif
   1150 }
   1151 
   1152 
   1153 int
   1154 rf_delete_component(raidPtr,component)
   1155 	RF_Raid_t *raidPtr;
   1156 	RF_SingleComponent_t *component;
   1157 {
   1158 	RF_RaidDisk_t *disks;
   1159 
   1160 	if ((component->row < 0) ||
   1161 	    (component->row >= raidPtr->numRow) ||
   1162 	    (component->column < 0) ||
   1163 	    (component->column >= raidPtr->numCol)) {
   1164 		return(EINVAL);
   1165 	}
   1166 
   1167 	disks = &raidPtr->Disks[component->row][component->column];
   1168 
   1169 	/* 1. This component must be marked as 'failed' */
   1170 
   1171 	return(EINVAL); /* Not implemented yet. */
   1172 }
   1173 
   1174 int
   1175 rf_incorporate_hot_spare(raidPtr,component)
   1176 	RF_Raid_t *raidPtr;
   1177 	RF_SingleComponent_t *component;
   1178 {
   1179 
   1180 	/* Issues here include how to 'move' this in if there is IO
   1181 	   taking place (e.g. component queues and such) */
   1182 
   1183 	return(EINVAL); /* Not implemented yet. */
   1184 }
   1185