Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.34.6.1
      1 /*	$NetBSD: rf_disks.c,v 1.34.6.1 2001/09/07 04:45:28 thorpej Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1995 Carnegie-Mellon University.
     40  * All rights reserved.
     41  *
     42  * Author: Mark Holland
     43  *
     44  * Permission to use, copy, modify and distribute this software and
     45  * its documentation is hereby granted, provided that both the copyright
     46  * notice and this permission notice appear in all copies of the
     47  * software, derivative works or modified versions, and any portions
     48  * thereof, and that both notices appear in supporting documentation.
     49  *
     50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     53  *
     54  * Carnegie Mellon requests users of this software to return to
     55  *
     56  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     57  *  School of Computer Science
     58  *  Carnegie Mellon University
     59  *  Pittsburgh PA 15213-3890
     60  *
     61  * any improvements or extensions that they make and grant Carnegie the
     62  * rights to redistribute these changes.
     63  */
     64 
     65 /***************************************************************
     66  * rf_disks.c -- code to perform operations on the actual disks
     67  ***************************************************************/
     68 
     69 #include "rf_types.h"
     70 #include "rf_raid.h"
     71 #include "rf_alloclist.h"
     72 #include "rf_utils.h"
     73 #include "rf_configure.h"
     74 #include "rf_general.h"
     75 #include "rf_options.h"
     76 #include "rf_kintf.h"
     77 #include "rf_netbsd.h"
     78 
     79 #include <sys/types.h>
     80 #include <sys/param.h>
     81 #include <sys/systm.h>
     82 #include <sys/proc.h>
     83 #include <sys/ioctl.h>
     84 #include <sys/fcntl.h>
     85 #include <sys/vnode.h>
     86 
     87 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     88 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
     89 				  RF_ComponentLabel_t *);
     90 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     91 				  RF_ComponentLabel_t *, int, int );
     92 
     93 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     94 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     95 
     96 /**************************************************************************
     97  *
     98  * initialize the disks comprising the array
     99  *
    100  * We want the spare disks to have regular row,col numbers so that we can
    101  * easily substitue a spare for a failed disk.  But, the driver code assumes
    102  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    103  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    104  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    105  * rest, and put all the spares in it.  This probably needs to get changed
    106  * eventually.
    107  *
    108  **************************************************************************/
    109 
    110 int
    111 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
    112 	RF_ShutdownList_t **listp;
    113 	RF_Raid_t *raidPtr;
    114 	RF_Config_t *cfgPtr;
    115 {
    116 	RF_RaidDisk_t **disks;
    117 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    118 	RF_RowCol_t r, c;
    119 	int bs, ret;
    120 	unsigned i, count, foundone = 0, numFailuresThisRow;
    121 	int force;
    122 
    123 	force = cfgPtr->force;
    124 
    125 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    126 	if (ret)
    127 		goto fail;
    128 
    129 	disks = raidPtr->Disks;
    130 
    131 	for (r = 0; r < raidPtr->numRow; r++) {
    132 		numFailuresThisRow = 0;
    133 		for (c = 0; c < raidPtr->numCol; c++) {
    134 			ret = rf_ConfigureDisk(raidPtr,
    135 					       &cfgPtr->devnames[r][c][0],
    136 					       &disks[r][c], r, c);
    137 
    138 			if (ret)
    139 				goto fail;
    140 
    141 			if (disks[r][c].status == rf_ds_optimal) {
    142 				raidread_component_label(
    143 					 raidPtr->raid_cinfo[r][c].ci_vp,
    144 					 &raidPtr->raid_cinfo[r][c].ci_label);
    145 			}
    146 
    147 			if (disks[r][c].status != rf_ds_optimal) {
    148 				numFailuresThisRow++;
    149 			} else {
    150 				if (disks[r][c].numBlocks < min_numblks)
    151 					min_numblks = disks[r][c].numBlocks;
    152 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    153 				    r, c, disks[r][c].devname,
    154 				    (long int) disks[r][c].numBlocks,
    155 				    disks[r][c].blockSize,
    156 				    (long int) disks[r][c].numBlocks *
    157 					 disks[r][c].blockSize / 1024 / 1024);
    158 			}
    159 		}
    160 		/* XXX fix for n-fault tolerant */
    161 		/* XXX this should probably check to see how many failures
    162 		   we can handle for this configuration! */
    163 		if (numFailuresThisRow > 0)
    164 			raidPtr->status[r] = rf_rs_degraded;
    165 	}
    166 
    167 	/* all disks must be the same size & have the same block size, bs must
    168 	 * be a power of 2 */
    169 	bs = 0;
    170 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
    171 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
    172 			if (disks[r][c].status == rf_ds_optimal) {
    173 				bs = disks[r][c].blockSize;
    174 				foundone = 1;
    175 			}
    176 		}
    177 	}
    178 	if (!foundone) {
    179 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    180 		ret = EINVAL;
    181 		goto fail;
    182 	}
    183 	for (count = 0, i = 1; i; i <<= 1)
    184 		if (bs & i)
    185 			count++;
    186 	if (count != 1) {
    187 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    188 		ret = EINVAL;
    189 		goto fail;
    190 	}
    191 
    192 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    193 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    194 		if (force != 0) {
    195 			printf("raid%d: Fatal errors being ignored.\n",
    196 			       raidPtr->raidid);
    197 		} else {
    198 			ret = EINVAL;
    199 			goto fail;
    200 		}
    201 	}
    202 
    203 	for (r = 0; r < raidPtr->numRow; r++) {
    204 		for (c = 0; c < raidPtr->numCol; c++) {
    205 			if (disks[r][c].status == rf_ds_optimal) {
    206 				if (disks[r][c].blockSize != bs) {
    207 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
    208 					ret = EINVAL;
    209 					goto fail;
    210 				}
    211 				if (disks[r][c].numBlocks != min_numblks) {
    212 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
    213 					    r, c, (int) min_numblks);
    214 					disks[r][c].numBlocks = min_numblks;
    215 				}
    216 			}
    217 		}
    218 	}
    219 
    220 	raidPtr->sectorsPerDisk = min_numblks;
    221 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    222 	raidPtr->bytesPerSector = bs;
    223 	raidPtr->sectorMask = bs - 1;
    224 	return (0);
    225 
    226 fail:
    227 
    228 	rf_UnconfigureVnodes( raidPtr );
    229 
    230 	return (ret);
    231 }
    232 
    233 
    234 /****************************************************************************
    235  * set up the data structures describing the spare disks in the array
    236  * recall from the above comment that the spare disk descriptors are stored
    237  * in row zero, which is specially expanded to hold them.
    238  ****************************************************************************/
    239 int
    240 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
    241 	RF_ShutdownList_t ** listp;
    242 	RF_Raid_t * raidPtr;
    243 	RF_Config_t * cfgPtr;
    244 {
    245 	int     i, ret;
    246 	unsigned int bs;
    247 	RF_RaidDisk_t *disks;
    248 	int     num_spares_done;
    249 
    250 	num_spares_done = 0;
    251 
    252 	/* The space for the spares should have already been allocated by
    253 	 * ConfigureDisks() */
    254 
    255 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    256 	for (i = 0; i < raidPtr->numSpare; i++) {
    257 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    258 				       &disks[i], 0, raidPtr->numCol + i);
    259 		if (ret)
    260 			goto fail;
    261 		if (disks[i].status != rf_ds_optimal) {
    262 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    263 				     &cfgPtr->spare_names[i][0]);
    264 		} else {
    265 			disks[i].status = rf_ds_spare;	/* change status to
    266 							 * spare */
    267 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    268 			    disks[i].devname,
    269 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    270 			    (long int) disks[i].numBlocks *
    271 				 disks[i].blockSize / 1024 / 1024);
    272 		}
    273 		num_spares_done++;
    274 	}
    275 
    276 	/* check sizes and block sizes on spare disks */
    277 	bs = 1 << raidPtr->logBytesPerSector;
    278 	for (i = 0; i < raidPtr->numSpare; i++) {
    279 		if (disks[i].blockSize != bs) {
    280 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    281 			ret = EINVAL;
    282 			goto fail;
    283 		}
    284 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    285 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    286 				     disks[i].devname, disks[i].blockSize,
    287 				     (long int) raidPtr->sectorsPerDisk);
    288 			ret = EINVAL;
    289 			goto fail;
    290 		} else
    291 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    292 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    293 
    294 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    295 			}
    296 	}
    297 
    298 	return (0);
    299 
    300 fail:
    301 
    302 	/* Release the hold on the main components.  We've failed to allocate
    303 	 * a spare, and since we're failing, we need to free things..
    304 
    305 	 XXX failing to allocate a spare is *not* that big of a deal...
    306 	 We *can* survive without it, if need be, esp. if we get hot
    307 	 adding working.
    308 
    309 	 If we don't fail out here, then we need a way to remove this spare...
    310 	 that should be easier to do here than if we are "live"...
    311 
    312 	 */
    313 
    314 	rf_UnconfigureVnodes( raidPtr );
    315 
    316 	return (ret);
    317 }
    318 
    319 static int
    320 rf_AllocDiskStructures(raidPtr, cfgPtr)
    321 	RF_Raid_t *raidPtr;
    322  	RF_Config_t *cfgPtr;
    323 {
    324 	RF_RaidDisk_t **disks;
    325 	int ret;
    326 	int r;
    327 
    328 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    329 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    330 	if (disks == NULL) {
    331 		ret = ENOMEM;
    332 		goto fail;
    333 	}
    334 	raidPtr->Disks = disks;
    335 	/* get space for the device-specific stuff... */
    336 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    337 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    338 	    raidPtr->cleanupList);
    339 	if (raidPtr->raid_cinfo == NULL) {
    340 		ret = ENOMEM;
    341 		goto fail;
    342 	}
    343 
    344 	for (r = 0; r < raidPtr->numRow; r++) {
    345 		/* We allocate RF_MAXSPARE on the first row so that we
    346 		   have room to do hot-swapping of spares */
    347 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    348 				+ ((r == 0) ? RF_MAXSPARE : 0),
    349 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    350 				raidPtr->cleanupList);
    351 		if (disks[r] == NULL) {
    352 			ret = ENOMEM;
    353 			goto fail;
    354 		}
    355 		/* get more space for device specific stuff.. */
    356 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    357 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    358 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    359 		    raidPtr->cleanupList);
    360 		if (raidPtr->raid_cinfo[r] == NULL) {
    361 			ret = ENOMEM;
    362 			goto fail;
    363 		}
    364 	}
    365 	return(0);
    366 fail:
    367 	rf_UnconfigureVnodes( raidPtr );
    368 
    369 	return(ret);
    370 }
    371 
    372 
    373 /* configure a single disk during auto-configuration at boot */
    374 int
    375 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
    376 	RF_Raid_t *raidPtr;
    377 	RF_Config_t *cfgPtr;
    378 	RF_AutoConfig_t *auto_config;
    379 {
    380 	RF_RaidDisk_t **disks;
    381 	RF_RaidDisk_t *diskPtr;
    382 	RF_RowCol_t r, c;
    383 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    384 	int bs, ret;
    385 	int numFailuresThisRow;
    386 	int force;
    387 	RF_AutoConfig_t *ac;
    388 	int parity_good;
    389 	int mod_counter;
    390 	int mod_counter_found;
    391 
    392 #if DEBUG
    393 	printf("Starting autoconfiguration of RAID set...\n");
    394 #endif
    395 	force = cfgPtr->force;
    396 
    397 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    398 	if (ret)
    399 		goto fail;
    400 
    401 	disks = raidPtr->Disks;
    402 
    403 	/* assume the parity will be fine.. */
    404 	parity_good = RF_RAID_CLEAN;
    405 
    406 	/* Check for mod_counters that are too low */
    407 	mod_counter_found = 0;
    408 	mod_counter = 0;
    409 	ac = auto_config;
    410 	while(ac!=NULL) {
    411 		if (mod_counter_found==0) {
    412 			mod_counter = ac->clabel->mod_counter;
    413 			mod_counter_found = 1;
    414 		} else {
    415 			if (ac->clabel->mod_counter > mod_counter) {
    416 				mod_counter = ac->clabel->mod_counter;
    417 			}
    418 		}
    419 		ac->flag = 0; /* clear the general purpose flag */
    420 		ac = ac->next;
    421 	}
    422 
    423 	bs = 0;
    424 	for (r = 0; r < raidPtr->numRow; r++) {
    425 		numFailuresThisRow = 0;
    426 		for (c = 0; c < raidPtr->numCol; c++) {
    427 			diskPtr = &disks[r][c];
    428 
    429 			/* find this row/col in the autoconfig */
    430 #if DEBUG
    431 			printf("Looking for %d,%d in autoconfig\n",r,c);
    432 #endif
    433 			ac = auto_config;
    434 			while(ac!=NULL) {
    435 				if (ac->clabel==NULL) {
    436 					/* big-time bad news. */
    437 					goto fail;
    438 				}
    439 				if ((ac->clabel->row == r) &&
    440 				    (ac->clabel->column == c) &&
    441 				    (ac->clabel->mod_counter == mod_counter)) {
    442 					/* it's this one... */
    443 					/* flag it as 'used', so we don't
    444 					   free it later. */
    445 					ac->flag = 1;
    446 #if DEBUG
    447 					printf("Found: %s at %d,%d\n",
    448 					       ac->devname,r,c);
    449 #endif
    450 
    451 					break;
    452 				}
    453 				ac=ac->next;
    454 			}
    455 
    456 			if (ac==NULL) {
    457 				/* we didn't find an exact match with a
    458 				   correct mod_counter above... can we
    459 				   find one with an incorrect mod_counter
    460 				   to use instead?  (this one, if we find
    461 				   it, will be marked as failed once the
    462 				   set configures)
    463 				*/
    464 
    465 				ac = auto_config;
    466 				while(ac!=NULL) {
    467 					if (ac->clabel==NULL) {
    468 						/* big-time bad news. */
    469 						goto fail;
    470 					}
    471 					if ((ac->clabel->row == r) &&
    472 					    (ac->clabel->column == c)) {
    473 						/* it's this one...
    474 						   flag it as 'used', so we
    475 						   don't free it later. */
    476 						ac->flag = 1;
    477 #if DEBUG
    478 						printf("Found(low mod_counter): %s at %d,%d\n",
    479 						       ac->devname,r,c);
    480 #endif
    481 
    482 						break;
    483 					}
    484 					ac=ac->next;
    485 				}
    486 			}
    487 
    488 
    489 
    490 			if (ac!=NULL) {
    491 				/* Found it.  Configure it.. */
    492 				diskPtr->blockSize = ac->clabel->blockSize;
    493 				diskPtr->numBlocks = ac->clabel->numBlocks;
    494 				/* Note: rf_protectedSectors is already
    495 				   factored into numBlocks here */
    496 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
    497 
    498 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
    499 				       ac->clabel, sizeof(*ac->clabel));
    500 				sprintf(diskPtr->devname, "/dev/%s",
    501 					ac->devname);
    502 
    503 				/* note the fact that this component was
    504 				   autoconfigured.  You'll need this info
    505 				   later.  Trust me :) */
    506 				diskPtr->auto_configured = 1;
    507 
    508 				/*
    509 				 * we allow the user to specify that
    510 				 * only a fraction of the disks should
    511 				 * be used this is just for debug: it
    512 				 * speeds up the parity scan
    513 				 */
    514 
    515 				diskPtr->numBlocks = diskPtr->numBlocks *
    516 					rf_sizePercentage / 100;
    517 
    518 				/* XXX these will get set multiple times,
    519 				   but since we're autoconfiguring, they'd
    520 				   better be always the same each time!
    521 				   If not, this is the least of your worries */
    522 
    523 				bs = diskPtr->blockSize;
    524 				min_numblks = diskPtr->numBlocks;
    525 
    526 				/* this gets done multiple times, but that's
    527 				   fine -- the serial number will be the same
    528 				   for all components, guaranteed */
    529 				raidPtr->serial_number =
    530 					ac->clabel->serial_number;
    531 				/* check the last time the label
    532 				   was modified */
    533 				if (ac->clabel->mod_counter !=
    534 				    mod_counter) {
    535 					/* Even though we've filled in all
    536 					   of the above, we don't trust
    537 					   this component since it's
    538 					   modification counter is not
    539 					   in sync with the rest, and we really
    540 					   consider it to be failed.  */
    541 					disks[r][c].status = rf_ds_failed;
    542 					numFailuresThisRow++;
    543 				} else {
    544 					if (ac->clabel->clean !=
    545 					    RF_RAID_CLEAN) {
    546 						parity_good = RF_RAID_DIRTY;
    547 					}
    548 				}
    549 			} else {
    550 				/* Didn't find it at all!!
    551 				   Component must really be dead */
    552 				disks[r][c].status = rf_ds_failed;
    553 				sprintf(disks[r][c].devname,"component%d",
    554 					r * raidPtr->numCol + c);
    555 				numFailuresThisRow++;
    556 			}
    557 		}
    558 		/* XXX fix for n-fault tolerant */
    559 		/* XXX this should probably check to see how many failures
    560 		   we can handle for this configuration! */
    561 		if (numFailuresThisRow > 0)
    562 			raidPtr->status[r] = rf_rs_degraded;
    563 	}
    564 
    565 	/* close the device for the ones that didn't get used */
    566 
    567 	ac = auto_config;
    568 	while(ac!=NULL) {
    569 		if (ac->flag == 0) {
    570 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
    571 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
    572 			vput(ac->vp);
    573 			ac->vp = NULL;
    574 #if DEBUG
    575 			printf("Released %s from auto-config set.\n",
    576 			       ac->devname);
    577 #endif
    578 		}
    579 		ac = ac->next;
    580 	}
    581 
    582 	raidPtr->mod_counter = mod_counter;
    583 
    584 	/* note the state of the parity, if any */
    585 	raidPtr->parity_good = parity_good;
    586 	raidPtr->sectorsPerDisk = min_numblks;
    587 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    588 	raidPtr->bytesPerSector = bs;
    589 	raidPtr->sectorMask = bs - 1;
    590 	return (0);
    591 
    592 fail:
    593 
    594 	rf_UnconfigureVnodes( raidPtr );
    595 
    596 	return (ret);
    597 
    598 }
    599 
    600 /* configure a single disk in the array */
    601 int
    602 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
    603 	RF_Raid_t *raidPtr;
    604 	char   *buf;
    605 	RF_RaidDisk_t *diskPtr;
    606 	RF_RowCol_t row;
    607 	RF_RowCol_t col;
    608 {
    609 	char   *p;
    610 	int     retcode;
    611 
    612 	struct partinfo dpart;
    613 	struct vnode *vp;
    614 	struct vattr va;
    615 	struct proc *proc;
    616 	int     error;
    617 
    618 	retcode = 0;
    619 	p = rf_find_non_white(buf);
    620 	if (p[strlen(p) - 1] == '\n') {
    621 		/* strip off the newline */
    622 		p[strlen(p) - 1] = '\0';
    623 	}
    624 	(void) strcpy(diskPtr->devname, p);
    625 
    626 	proc = raidPtr->engine_thread;
    627 
    628 	/* Let's start by claiming the component is fine and well... */
    629 	diskPtr->status = rf_ds_optimal;
    630 
    631 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
    632 
    633 	error = raidlookup(diskPtr->devname, proc, &vp);
    634 	if (error) {
    635 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    636 		if (error == ENXIO) {
    637 			/* the component isn't there... must be dead :-( */
    638 			diskPtr->status = rf_ds_failed;
    639 		} else {
    640 			return (error);
    641 		}
    642 	}
    643 	if (diskPtr->status == rf_ds_optimal) {
    644 
    645 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
    646 			return (error);
    647 		}
    648 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
    649 				  FREAD, proc->p_ucred, proc);
    650 		if (error) {
    651 			return (error);
    652 		}
    653 
    654 		diskPtr->blockSize = dpart.disklab->d_secsize;
    655 
    656 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    657 		diskPtr->partitionSize = dpart.part->p_size;
    658 
    659 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
    660 
    661 		/* This component was not automatically configured */
    662 		diskPtr->auto_configured = 0;
    663 
    664 		/* we allow the user to specify that only a fraction of the
    665 		 * disks should be used this is just for debug:  it speeds up
    666 		 * the parity scan */
    667 		diskPtr->numBlocks = diskPtr->numBlocks *
    668 			rf_sizePercentage / 100;
    669 	}
    670 	return (0);
    671 }
    672 
    673 static void
    674 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
    675 	RF_Raid_t *raidPtr;
    676 	int row;
    677 	int column;
    678 	char *dev_name;
    679 	RF_ComponentLabel_t *ci_label;
    680 {
    681 
    682 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
    683 	       raidPtr->raidid, dev_name, row, column );
    684 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
    685 	       ci_label->row, ci_label->column,
    686 	       ci_label->num_rows, ci_label->num_columns);
    687 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    688 	       ci_label->version, ci_label->serial_number,
    689 	       ci_label->mod_counter);
    690 	printf("         Clean: %s Status: %d\n",
    691 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    692 }
    693 
    694 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
    695 				  serial_number, mod_counter )
    696 	RF_Raid_t *raidPtr;
    697 	int row;
    698 	int column;
    699 	char *dev_name;
    700 	RF_ComponentLabel_t *ci_label;
    701 	int serial_number;
    702 	int mod_counter;
    703 {
    704 	int fatal_error = 0;
    705 
    706 	if (serial_number != ci_label->serial_number) {
    707 		printf("%s has a different serial number: %d %d\n",
    708 		       dev_name, serial_number, ci_label->serial_number);
    709 		fatal_error = 1;
    710 	}
    711 	if (mod_counter != ci_label->mod_counter) {
    712 		printf("%s has a different modfication count: %d %d\n",
    713 		       dev_name, mod_counter, ci_label->mod_counter);
    714 	}
    715 
    716 	if (row != ci_label->row) {
    717 		printf("Row out of alignment for: %s\n", dev_name);
    718 		fatal_error = 1;
    719 	}
    720 	if (column != ci_label->column) {
    721 		printf("Column out of alignment for: %s\n", dev_name);
    722 		fatal_error = 1;
    723 	}
    724 	if (raidPtr->numRow != ci_label->num_rows) {
    725 		printf("Number of rows do not match for: %s\n", dev_name);
    726 		fatal_error = 1;
    727 	}
    728 	if (raidPtr->numCol != ci_label->num_columns) {
    729 		printf("Number of columns do not match for: %s\n", dev_name);
    730 		fatal_error = 1;
    731 	}
    732 	if (ci_label->clean == 0) {
    733 		/* it's not clean, but that's not fatal */
    734 		printf("%s is not clean!\n", dev_name);
    735 	}
    736 	return(fatal_error);
    737 }
    738 
    739 
    740 /*
    741 
    742    rf_CheckLabels() - check all the component labels for consistency.
    743    Return an error if there is anything major amiss.
    744 
    745  */
    746 
    747 int
    748 rf_CheckLabels( raidPtr, cfgPtr )
    749 	RF_Raid_t *raidPtr;
    750 	RF_Config_t *cfgPtr;
    751 {
    752 	int r,c;
    753 	char *dev_name;
    754 	RF_ComponentLabel_t *ci_label;
    755 	int serial_number = 0;
    756 	int mod_number = 0;
    757 	int fatal_error = 0;
    758 	int mod_values[4];
    759 	int mod_count[4];
    760 	int ser_values[4];
    761 	int ser_count[4];
    762 	int num_ser;
    763 	int num_mod;
    764 	int i;
    765 	int found;
    766 	int hosed_row;
    767 	int hosed_column;
    768 	int too_fatal;
    769 	int parity_good;
    770 	int force;
    771 
    772 	hosed_row = -1;
    773 	hosed_column = -1;
    774 	too_fatal = 0;
    775 	force = cfgPtr->force;
    776 
    777 	/*
    778 	   We're going to try to be a little intelligent here.  If one
    779 	   component's label is bogus, and we can identify that it's the
    780 	   *only* one that's gone, we'll mark it as "failed" and allow
    781 	   the configuration to proceed.  This will be the *only* case
    782 	   that we'll proceed if there would be (otherwise) fatal errors.
    783 
    784 	   Basically we simply keep a count of how many components had
    785 	   what serial number.  If all but one agree, we simply mark
    786 	   the disagreeing component as being failed, and allow
    787 	   things to come up "normally".
    788 
    789 	   We do this first for serial numbers, and then for "mod_counter".
    790 
    791 	 */
    792 
    793 	num_ser = 0;
    794 	num_mod = 0;
    795 	for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
    796 		for (c = 0; c < raidPtr->numCol; c++) {
    797 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    798 			found=0;
    799 			for(i=0;i<num_ser;i++) {
    800 				if (ser_values[i] == ci_label->serial_number) {
    801 					ser_count[i]++;
    802 					found=1;
    803 					break;
    804 				}
    805 			}
    806 			if (!found) {
    807 				ser_values[num_ser] = ci_label->serial_number;
    808 				ser_count[num_ser] = 1;
    809 				num_ser++;
    810 				if (num_ser>2) {
    811 					fatal_error = 1;
    812 					break;
    813 				}
    814 			}
    815 			found=0;
    816 			for(i=0;i<num_mod;i++) {
    817 				if (mod_values[i] == ci_label->mod_counter) {
    818 					mod_count[i]++;
    819 					found=1;
    820 					break;
    821 				}
    822 			}
    823 			if (!found) {
    824 			        mod_values[num_mod] = ci_label->mod_counter;
    825 				mod_count[num_mod] = 1;
    826 				num_mod++;
    827 				if (num_mod>2) {
    828 					fatal_error = 1;
    829 					break;
    830 				}
    831 			}
    832 		}
    833 	}
    834 #if DEBUG
    835 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    836 	for(i=0;i<num_ser;i++) {
    837 		printf("%d %d\n", ser_values[i], ser_count[i]);
    838 	}
    839 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    840 	for(i=0;i<num_mod;i++) {
    841 		printf("%d %d\n", mod_values[i], mod_count[i]);
    842 	}
    843 #endif
    844 	serial_number = ser_values[0];
    845 	if (num_ser == 2) {
    846 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    847 			/* Locate the maverick component */
    848 			if (ser_count[1] > ser_count[0]) {
    849 				serial_number = ser_values[1];
    850 			}
    851 			for (r = 0; r < raidPtr->numRow; r++) {
    852 				for (c = 0; c < raidPtr->numCol; c++) {
    853 				ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    854 					if (serial_number !=
    855 					    ci_label->serial_number) {
    856 						hosed_row = r;
    857 						hosed_column = c;
    858 						break;
    859 					}
    860 				}
    861 			}
    862 			printf("Hosed component: %s\n",
    863 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    864 			if (!force) {
    865 				/* we'll fail this component, as if there are
    866 				   other major errors, we arn't forcing things
    867 				   and we'll abort the config anyways */
    868 				raidPtr->Disks[hosed_row][hosed_column].status
    869 					= rf_ds_failed;
    870 				raidPtr->numFailures++;
    871 				raidPtr->status[hosed_row] = rf_rs_degraded;
    872 			}
    873 		} else {
    874 			too_fatal = 1;
    875 		}
    876 		if (cfgPtr->parityConfig == '0') {
    877 			/* We've identified two different serial numbers.
    878 			   RAID 0 can't cope with that, so we'll punt */
    879 			too_fatal = 1;
    880 		}
    881 
    882 	}
    883 
    884 	/* record the serial number for later.  If we bail later, setting
    885 	   this doesn't matter, otherwise we've got the best guess at the
    886 	   correct serial number */
    887 	raidPtr->serial_number = serial_number;
    888 
    889 	mod_number = mod_values[0];
    890 	if (num_mod == 2) {
    891 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    892 			/* Locate the maverick component */
    893 			if (mod_count[1] > mod_count[0]) {
    894 				mod_number = mod_values[1];
    895 			} else if (mod_count[1] < mod_count[0]) {
    896 				mod_number = mod_values[0];
    897 			} else {
    898 				/* counts of different modification values
    899 				   are the same.   Assume greater value is
    900 				   the correct one, all other things
    901 				   considered */
    902 				if (mod_values[0] > mod_values[1]) {
    903 					mod_number = mod_values[0];
    904 				} else {
    905 					mod_number = mod_values[1];
    906 				}
    907 
    908 			}
    909 			for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
    910 				for (c = 0; c < raidPtr->numCol; c++) {
    911 					ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    912 					if (mod_number !=
    913 					    ci_label->mod_counter) {
    914 						if ( ( hosed_row == r ) &&
    915 						     ( hosed_column == c )) {
    916 							/* same one.  Can
    917 							   deal with it.  */
    918 						} else {
    919 							hosed_row = r;
    920 							hosed_column = c;
    921 							if (num_ser != 1) {
    922 								too_fatal = 1;
    923 								break;
    924 							}
    925 						}
    926 					}
    927 				}
    928 			}
    929 			printf("Hosed component: %s\n",
    930 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    931 			if (!force) {
    932 				/* we'll fail this component, as if there are
    933 				   other major errors, we arn't forcing things
    934 				   and we'll abort the config anyways */
    935 				if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
    936 					raidPtr->Disks[hosed_row][hosed_column].status
    937 						= rf_ds_failed;
    938 					raidPtr->numFailures++;
    939 					raidPtr->status[hosed_row] = rf_rs_degraded;
    940 				}
    941 			}
    942 		} else {
    943 			too_fatal = 1;
    944 		}
    945 		if (cfgPtr->parityConfig == '0') {
    946 			/* We've identified two different mod counters.
    947 			   RAID 0 can't cope with that, so we'll punt */
    948 			too_fatal = 1;
    949 		}
    950 	}
    951 
    952 	raidPtr->mod_counter = mod_number;
    953 
    954 	if (too_fatal) {
    955 		/* we've had both a serial number mismatch, and a mod_counter
    956 		   mismatch -- and they involved two different components!!
    957 		   Bail -- make things fail so that the user must force
    958 		   the issue... */
    959 		hosed_row = -1;
    960 		hosed_column = -1;
    961 	}
    962 
    963 	if (num_ser > 2) {
    964 		printf("raid%d: Too many different serial numbers!\n",
    965 		       raidPtr->raidid);
    966 	}
    967 
    968 	if (num_mod > 2) {
    969 		printf("raid%d: Too many different mod counters!\n",
    970 		       raidPtr->raidid);
    971 	}
    972 
    973 	/* we start by assuming the parity will be good, and flee from
    974 	   that notion at the slightest sign of trouble */
    975 
    976 	parity_good = RF_RAID_CLEAN;
    977 	for (r = 0; r < raidPtr->numRow; r++) {
    978 		for (c = 0; c < raidPtr->numCol; c++) {
    979 			dev_name = &cfgPtr->devnames[r][c][0];
    980 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    981 
    982 			if ((r == hosed_row) && (c == hosed_column)) {
    983 				printf("raid%d: Ignoring %s\n",
    984 				       raidPtr->raidid, dev_name);
    985 			} else {
    986 				rf_print_label_status( raidPtr, r, c,
    987 						       dev_name, ci_label );
    988 				if (rf_check_label_vitals( raidPtr, r, c,
    989 							   dev_name, ci_label,
    990 							   serial_number,
    991 							   mod_number )) {
    992 					fatal_error = 1;
    993 				}
    994 				if (ci_label->clean != RF_RAID_CLEAN) {
    995 					parity_good = RF_RAID_DIRTY;
    996 				}
    997 			}
    998 		}
    999 	}
   1000 	if (fatal_error) {
   1001 		parity_good = RF_RAID_DIRTY;
   1002 	}
   1003 
   1004 	/* we note the state of the parity */
   1005 	raidPtr->parity_good = parity_good;
   1006 
   1007 	return(fatal_error);
   1008 }
   1009 
   1010 int
   1011 rf_add_hot_spare(raidPtr, sparePtr)
   1012 	RF_Raid_t *raidPtr;
   1013 	RF_SingleComponent_t *sparePtr;
   1014 {
   1015 	RF_RaidDisk_t *disks;
   1016 	RF_DiskQueue_t *spareQueues;
   1017 	int ret;
   1018 	unsigned int bs;
   1019 	int spare_number;
   1020 
   1021 #if 0
   1022 	printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
   1023 	printf("Num col: %d\n",raidPtr->numCol);
   1024 #endif
   1025 	if (raidPtr->numSpare >= RF_MAXSPARE) {
   1026 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
   1027 		return(EINVAL);
   1028 	}
   1029 
   1030 	RF_LOCK_MUTEX(raidPtr->mutex);
   1031 
   1032 	/* the beginning of the spares... */
   1033 	disks = &raidPtr->Disks[0][raidPtr->numCol];
   1034 
   1035 	spare_number = raidPtr->numSpare;
   1036 
   1037 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
   1038 			       &disks[spare_number], 0,
   1039 			       raidPtr->numCol + spare_number);
   1040 
   1041 	if (ret)
   1042 		goto fail;
   1043 	if (disks[spare_number].status != rf_ds_optimal) {
   1044 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
   1045 			     sparePtr->component_name);
   1046 		ret=EINVAL;
   1047 		goto fail;
   1048 	} else {
   1049 		disks[spare_number].status = rf_ds_spare;
   1050 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
   1051 			 disks[spare_number].devname,
   1052 			 (long int) disks[spare_number].numBlocks,
   1053 			 disks[spare_number].blockSize,
   1054 			 (long int) disks[spare_number].numBlocks *
   1055 			 disks[spare_number].blockSize / 1024 / 1024);
   1056 	}
   1057 
   1058 
   1059 	/* check sizes and block sizes on the spare disk */
   1060 	bs = 1 << raidPtr->logBytesPerSector;
   1061 	if (disks[spare_number].blockSize != bs) {
   1062 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1063 		ret = EINVAL;
   1064 		goto fail;
   1065 	}
   1066 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1067 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
   1068 			     disks[spare_number].devname,
   1069 			     disks[spare_number].blockSize,
   1070 			     (long int) raidPtr->sectorsPerDisk);
   1071 		ret = EINVAL;
   1072 		goto fail;
   1073 	} else {
   1074 		if (disks[spare_number].numBlocks >
   1075 		    raidPtr->sectorsPerDisk) {
   1076 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
   1077 				     (long int) raidPtr->sectorsPerDisk);
   1078 
   1079 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1080 		}
   1081 	}
   1082 
   1083 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
   1084 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1085 				 0, raidPtr->numCol + spare_number,
   1086 				 raidPtr->qType,
   1087 				 raidPtr->sectorsPerDisk,
   1088 				 raidPtr->maxOutstanding,
   1089 				 &raidPtr->shutdownList,
   1090 				 raidPtr->cleanupList);
   1091 
   1092 
   1093 	raidPtr->numSpare++;
   1094 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1095 	return (0);
   1096 
   1097 fail:
   1098 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1099 	return(ret);
   1100 }
   1101 
   1102 int
   1103 rf_remove_hot_spare(raidPtr,sparePtr)
   1104 	RF_Raid_t *raidPtr;
   1105 	RF_SingleComponent_t *sparePtr;
   1106 {
   1107 	int spare_number;
   1108 
   1109 
   1110 	if (raidPtr->numSpare==0) {
   1111 		printf("No spares to remove!\n");
   1112 		return(EINVAL);
   1113 	}
   1114 
   1115 	spare_number = sparePtr->column;
   1116 
   1117 	return(EINVAL); /* XXX not implemented yet */
   1118 #if 0
   1119 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1120 		return(EINVAL);
   1121 	}
   1122 
   1123 	/* verify that this spare isn't in use... */
   1124 
   1125 
   1126 
   1127 
   1128 	/* it's gone.. */
   1129 
   1130 	raidPtr->numSpare--;
   1131 
   1132 	return(0);
   1133 #endif
   1134 }
   1135 
   1136 
   1137 int
   1138 rf_delete_component(raidPtr,component)
   1139 	RF_Raid_t *raidPtr;
   1140 	RF_SingleComponent_t *component;
   1141 {
   1142 	RF_RaidDisk_t *disks;
   1143 
   1144 	if ((component->row < 0) ||
   1145 	    (component->row >= raidPtr->numRow) ||
   1146 	    (component->column < 0) ||
   1147 	    (component->column >= raidPtr->numCol)) {
   1148 		return(EINVAL);
   1149 	}
   1150 
   1151 	disks = &raidPtr->Disks[component->row][component->column];
   1152 
   1153 	/* 1. This component must be marked as 'failed' */
   1154 
   1155 	return(EINVAL); /* Not implemented yet. */
   1156 }
   1157 
   1158 int
   1159 rf_incorporate_hot_spare(raidPtr,component)
   1160 	RF_Raid_t *raidPtr;
   1161 	RF_SingleComponent_t *component;
   1162 {
   1163 
   1164 	/* Issues here include how to 'move' this in if there is IO
   1165 	   taking place (e.g. component queues and such) */
   1166 
   1167 	return(EINVAL); /* Not implemented yet. */
   1168 }
   1169