Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.36
      1 /*	$NetBSD: rf_disks.c,v 1.36 2001/10/05 15:41:23 oster Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *        This product includes software developed by the NetBSD
     20  *        Foundation, Inc. and its contributors.
     21  * 4. Neither the name of The NetBSD Foundation nor the names of its
     22  *    contributors may be used to endorse or promote products derived
     23  *    from this software without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1995 Carnegie-Mellon University.
     40  * All rights reserved.
     41  *
     42  * Author: Mark Holland
     43  *
     44  * Permission to use, copy, modify and distribute this software and
     45  * its documentation is hereby granted, provided that both the copyright
     46  * notice and this permission notice appear in all copies of the
     47  * software, derivative works or modified versions, and any portions
     48  * thereof, and that both notices appear in supporting documentation.
     49  *
     50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     53  *
     54  * Carnegie Mellon requests users of this software to return to
     55  *
     56  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     57  *  School of Computer Science
     58  *  Carnegie Mellon University
     59  *  Pittsburgh PA 15213-3890
     60  *
     61  * any improvements or extensions that they make and grant Carnegie the
     62  * rights to redistribute these changes.
     63  */
     64 
     65 /***************************************************************
     66  * rf_disks.c -- code to perform operations on the actual disks
     67  ***************************************************************/
     68 
     69 #include <dev/raidframe/raidframevar.h>
     70 
     71 #include "rf_raid.h"
     72 #include "rf_alloclist.h"
     73 #include "rf_utils.h"
     74 #include "rf_general.h"
     75 #include "rf_options.h"
     76 #include "rf_kintf.h"
     77 #include "rf_netbsd.h"
     78 
     79 #include <sys/types.h>
     80 #include <sys/param.h>
     81 #include <sys/systm.h>
     82 #include <sys/proc.h>
     83 #include <sys/ioctl.h>
     84 #include <sys/fcntl.h>
     85 #include <sys/vnode.h>
     86 
     87 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     88 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
     89 				  RF_ComponentLabel_t *);
     90 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     91 				  RF_ComponentLabel_t *, int, int );
     92 
     93 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     94 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     95 
     96 /**************************************************************************
     97  *
     98  * initialize the disks comprising the array
     99  *
    100  * We want the spare disks to have regular row,col numbers so that we can
    101  * easily substitue a spare for a failed disk.  But, the driver code assumes
    102  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    103  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    104  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    105  * rest, and put all the spares in it.  This probably needs to get changed
    106  * eventually.
    107  *
    108  **************************************************************************/
    109 
    110 int
    111 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
    112 	RF_ShutdownList_t **listp;
    113 	RF_Raid_t *raidPtr;
    114 	RF_Config_t *cfgPtr;
    115 {
    116 	RF_RaidDisk_t **disks;
    117 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    118 	RF_RowCol_t r, c;
    119 	int bs, ret;
    120 	unsigned i, count, foundone = 0, numFailuresThisRow;
    121 	int force;
    122 
    123 	force = cfgPtr->force;
    124 
    125 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    126 	if (ret)
    127 		goto fail;
    128 
    129 	disks = raidPtr->Disks;
    130 
    131 	for (r = 0; r < raidPtr->numRow; r++) {
    132 		numFailuresThisRow = 0;
    133 		for (c = 0; c < raidPtr->numCol; c++) {
    134 			ret = rf_ConfigureDisk(raidPtr,
    135 					       &cfgPtr->devnames[r][c][0],
    136 					       &disks[r][c], r, c);
    137 
    138 			if (ret)
    139 				goto fail;
    140 
    141 			if (disks[r][c].status == rf_ds_optimal) {
    142 				raidread_component_label(
    143 					 raidPtr->raid_cinfo[r][c].ci_dev,
    144 					 raidPtr->raid_cinfo[r][c].ci_vp,
    145 					 &raidPtr->raid_cinfo[r][c].ci_label);
    146 			}
    147 
    148 			if (disks[r][c].status != rf_ds_optimal) {
    149 				numFailuresThisRow++;
    150 			} else {
    151 				if (disks[r][c].numBlocks < min_numblks)
    152 					min_numblks = disks[r][c].numBlocks;
    153 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    154 				    r, c, disks[r][c].devname,
    155 				    (long int) disks[r][c].numBlocks,
    156 				    disks[r][c].blockSize,
    157 				    (long int) disks[r][c].numBlocks *
    158 					 disks[r][c].blockSize / 1024 / 1024);
    159 			}
    160 		}
    161 		/* XXX fix for n-fault tolerant */
    162 		/* XXX this should probably check to see how many failures
    163 		   we can handle for this configuration! */
    164 		if (numFailuresThisRow > 0)
    165 			raidPtr->status[r] = rf_rs_degraded;
    166 	}
    167 
    168 	/* all disks must be the same size & have the same block size, bs must
    169 	 * be a power of 2 */
    170 	bs = 0;
    171 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
    172 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
    173 			if (disks[r][c].status == rf_ds_optimal) {
    174 				bs = disks[r][c].blockSize;
    175 				foundone = 1;
    176 			}
    177 		}
    178 	}
    179 	if (!foundone) {
    180 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    181 		ret = EINVAL;
    182 		goto fail;
    183 	}
    184 	for (count = 0, i = 1; i; i <<= 1)
    185 		if (bs & i)
    186 			count++;
    187 	if (count != 1) {
    188 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    189 		ret = EINVAL;
    190 		goto fail;
    191 	}
    192 
    193 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    194 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    195 		if (force != 0) {
    196 			printf("raid%d: Fatal errors being ignored.\n",
    197 			       raidPtr->raidid);
    198 		} else {
    199 			ret = EINVAL;
    200 			goto fail;
    201 		}
    202 	}
    203 
    204 	for (r = 0; r < raidPtr->numRow; r++) {
    205 		for (c = 0; c < raidPtr->numCol; c++) {
    206 			if (disks[r][c].status == rf_ds_optimal) {
    207 				if (disks[r][c].blockSize != bs) {
    208 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
    209 					ret = EINVAL;
    210 					goto fail;
    211 				}
    212 				if (disks[r][c].numBlocks != min_numblks) {
    213 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
    214 					    r, c, (int) min_numblks);
    215 					disks[r][c].numBlocks = min_numblks;
    216 				}
    217 			}
    218 		}
    219 	}
    220 
    221 	raidPtr->sectorsPerDisk = min_numblks;
    222 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    223 	raidPtr->bytesPerSector = bs;
    224 	raidPtr->sectorMask = bs - 1;
    225 	return (0);
    226 
    227 fail:
    228 
    229 	rf_UnconfigureVnodes( raidPtr );
    230 
    231 	return (ret);
    232 }
    233 
    234 
    235 /****************************************************************************
    236  * set up the data structures describing the spare disks in the array
    237  * recall from the above comment that the spare disk descriptors are stored
    238  * in row zero, which is specially expanded to hold them.
    239  ****************************************************************************/
    240 int
    241 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
    242 	RF_ShutdownList_t ** listp;
    243 	RF_Raid_t * raidPtr;
    244 	RF_Config_t * cfgPtr;
    245 {
    246 	int     i, ret;
    247 	unsigned int bs;
    248 	RF_RaidDisk_t *disks;
    249 	int     num_spares_done;
    250 
    251 	num_spares_done = 0;
    252 
    253 	/* The space for the spares should have already been allocated by
    254 	 * ConfigureDisks() */
    255 
    256 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    257 	for (i = 0; i < raidPtr->numSpare; i++) {
    258 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    259 				       &disks[i], 0, raidPtr->numCol + i);
    260 		if (ret)
    261 			goto fail;
    262 		if (disks[i].status != rf_ds_optimal) {
    263 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    264 				     &cfgPtr->spare_names[i][0]);
    265 		} else {
    266 			disks[i].status = rf_ds_spare;	/* change status to
    267 							 * spare */
    268 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    269 			    disks[i].devname,
    270 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    271 			    (long int) disks[i].numBlocks *
    272 				 disks[i].blockSize / 1024 / 1024);
    273 		}
    274 		num_spares_done++;
    275 	}
    276 
    277 	/* check sizes and block sizes on spare disks */
    278 	bs = 1 << raidPtr->logBytesPerSector;
    279 	for (i = 0; i < raidPtr->numSpare; i++) {
    280 		if (disks[i].blockSize != bs) {
    281 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    282 			ret = EINVAL;
    283 			goto fail;
    284 		}
    285 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    286 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    287 				     disks[i].devname, disks[i].blockSize,
    288 				     (long int) raidPtr->sectorsPerDisk);
    289 			ret = EINVAL;
    290 			goto fail;
    291 		} else
    292 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    293 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    294 
    295 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    296 			}
    297 	}
    298 
    299 	return (0);
    300 
    301 fail:
    302 
    303 	/* Release the hold on the main components.  We've failed to allocate
    304 	 * a spare, and since we're failing, we need to free things..
    305 
    306 	 XXX failing to allocate a spare is *not* that big of a deal...
    307 	 We *can* survive without it, if need be, esp. if we get hot
    308 	 adding working.
    309 
    310 	 If we don't fail out here, then we need a way to remove this spare...
    311 	 that should be easier to do here than if we are "live"...
    312 
    313 	 */
    314 
    315 	rf_UnconfigureVnodes( raidPtr );
    316 
    317 	return (ret);
    318 }
    319 
    320 static int
    321 rf_AllocDiskStructures(raidPtr, cfgPtr)
    322 	RF_Raid_t *raidPtr;
    323  	RF_Config_t *cfgPtr;
    324 {
    325 	RF_RaidDisk_t **disks;
    326 	int ret;
    327 	int r;
    328 
    329 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
    330 			(RF_RaidDisk_t **), raidPtr->cleanupList);
    331 	if (disks == NULL) {
    332 		ret = ENOMEM;
    333 		goto fail;
    334 	}
    335 	raidPtr->Disks = disks;
    336 	/* get space for the device-specific stuff... */
    337 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    338 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    339 	    raidPtr->cleanupList);
    340 	if (raidPtr->raid_cinfo == NULL) {
    341 		ret = ENOMEM;
    342 		goto fail;
    343 	}
    344 
    345 	for (r = 0; r < raidPtr->numRow; r++) {
    346 		/* We allocate RF_MAXSPARE on the first row so that we
    347 		   have room to do hot-swapping of spares */
    348 		RF_CallocAndAdd(disks[r], raidPtr->numCol
    349 				+ ((r == 0) ? RF_MAXSPARE : 0),
    350 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    351 				raidPtr->cleanupList);
    352 		if (disks[r] == NULL) {
    353 			ret = ENOMEM;
    354 			goto fail;
    355 		}
    356 		/* get more space for device specific stuff.. */
    357 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    358 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    359 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    360 		    raidPtr->cleanupList);
    361 		if (raidPtr->raid_cinfo[r] == NULL) {
    362 			ret = ENOMEM;
    363 			goto fail;
    364 		}
    365 	}
    366 	return(0);
    367 fail:
    368 	rf_UnconfigureVnodes( raidPtr );
    369 
    370 	return(ret);
    371 }
    372 
    373 
    374 /* configure a single disk during auto-configuration at boot */
    375 int
    376 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
    377 	RF_Raid_t *raidPtr;
    378 	RF_Config_t *cfgPtr;
    379 	RF_AutoConfig_t *auto_config;
    380 {
    381 	RF_RaidDisk_t **disks;
    382 	RF_RaidDisk_t *diskPtr;
    383 	RF_RowCol_t r, c;
    384 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    385 	int bs, ret;
    386 	int numFailuresThisRow;
    387 	int force;
    388 	RF_AutoConfig_t *ac;
    389 	int parity_good;
    390 	int mod_counter;
    391 	int mod_counter_found;
    392 
    393 #if DEBUG
    394 	printf("Starting autoconfiguration of RAID set...\n");
    395 #endif
    396 	force = cfgPtr->force;
    397 
    398 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    399 	if (ret)
    400 		goto fail;
    401 
    402 	disks = raidPtr->Disks;
    403 
    404 	/* assume the parity will be fine.. */
    405 	parity_good = RF_RAID_CLEAN;
    406 
    407 	/* Check for mod_counters that are too low */
    408 	mod_counter_found = 0;
    409 	mod_counter = 0;
    410 	ac = auto_config;
    411 	while(ac!=NULL) {
    412 		if (mod_counter_found==0) {
    413 			mod_counter = ac->clabel->mod_counter;
    414 			mod_counter_found = 1;
    415 		} else {
    416 			if (ac->clabel->mod_counter > mod_counter) {
    417 				mod_counter = ac->clabel->mod_counter;
    418 			}
    419 		}
    420 		ac->flag = 0; /* clear the general purpose flag */
    421 		ac = ac->next;
    422 	}
    423 
    424 	bs = 0;
    425 	for (r = 0; r < raidPtr->numRow; r++) {
    426 		numFailuresThisRow = 0;
    427 		for (c = 0; c < raidPtr->numCol; c++) {
    428 			diskPtr = &disks[r][c];
    429 
    430 			/* find this row/col in the autoconfig */
    431 #if DEBUG
    432 			printf("Looking for %d,%d in autoconfig\n",r,c);
    433 #endif
    434 			ac = auto_config;
    435 			while(ac!=NULL) {
    436 				if (ac->clabel==NULL) {
    437 					/* big-time bad news. */
    438 					goto fail;
    439 				}
    440 				if ((ac->clabel->row == r) &&
    441 				    (ac->clabel->column == c) &&
    442 				    (ac->clabel->mod_counter == mod_counter)) {
    443 					/* it's this one... */
    444 					/* flag it as 'used', so we don't
    445 					   free it later. */
    446 					ac->flag = 1;
    447 #if DEBUG
    448 					printf("Found: %s at %d,%d\n",
    449 					       ac->devname,r,c);
    450 #endif
    451 
    452 					break;
    453 				}
    454 				ac=ac->next;
    455 			}
    456 
    457 			if (ac==NULL) {
    458 				/* we didn't find an exact match with a
    459 				   correct mod_counter above... can we
    460 				   find one with an incorrect mod_counter
    461 				   to use instead?  (this one, if we find
    462 				   it, will be marked as failed once the
    463 				   set configures)
    464 				*/
    465 
    466 				ac = auto_config;
    467 				while(ac!=NULL) {
    468 					if (ac->clabel==NULL) {
    469 						/* big-time bad news. */
    470 						goto fail;
    471 					}
    472 					if ((ac->clabel->row == r) &&
    473 					    (ac->clabel->column == c)) {
    474 						/* it's this one...
    475 						   flag it as 'used', so we
    476 						   don't free it later. */
    477 						ac->flag = 1;
    478 #if DEBUG
    479 						printf("Found(low mod_counter): %s at %d,%d\n",
    480 						       ac->devname,r,c);
    481 #endif
    482 
    483 						break;
    484 					}
    485 					ac=ac->next;
    486 				}
    487 			}
    488 
    489 
    490 
    491 			if (ac!=NULL) {
    492 				/* Found it.  Configure it.. */
    493 				diskPtr->blockSize = ac->clabel->blockSize;
    494 				diskPtr->numBlocks = ac->clabel->numBlocks;
    495 				/* Note: rf_protectedSectors is already
    496 				   factored into numBlocks here */
    497 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
    498 				raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
    499 
    500 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
    501 				       ac->clabel, sizeof(*ac->clabel));
    502 				sprintf(diskPtr->devname, "/dev/%s",
    503 					ac->devname);
    504 
    505 				/* note the fact that this component was
    506 				   autoconfigured.  You'll need this info
    507 				   later.  Trust me :) */
    508 				diskPtr->auto_configured = 1;
    509 				diskPtr->dev = ac->dev;
    510 
    511 				/*
    512 				 * we allow the user to specify that
    513 				 * only a fraction of the disks should
    514 				 * be used this is just for debug: it
    515 				 * speeds up the parity scan
    516 				 */
    517 
    518 				diskPtr->numBlocks = diskPtr->numBlocks *
    519 					rf_sizePercentage / 100;
    520 
    521 				/* XXX these will get set multiple times,
    522 				   but since we're autoconfiguring, they'd
    523 				   better be always the same each time!
    524 				   If not, this is the least of your worries */
    525 
    526 				bs = diskPtr->blockSize;
    527 				min_numblks = diskPtr->numBlocks;
    528 
    529 				/* this gets done multiple times, but that's
    530 				   fine -- the serial number will be the same
    531 				   for all components, guaranteed */
    532 				raidPtr->serial_number =
    533 					ac->clabel->serial_number;
    534 				/* check the last time the label
    535 				   was modified */
    536 				if (ac->clabel->mod_counter !=
    537 				    mod_counter) {
    538 					/* Even though we've filled in all
    539 					   of the above, we don't trust
    540 					   this component since it's
    541 					   modification counter is not
    542 					   in sync with the rest, and we really
    543 					   consider it to be failed.  */
    544 					disks[r][c].status = rf_ds_failed;
    545 					numFailuresThisRow++;
    546 				} else {
    547 					if (ac->clabel->clean !=
    548 					    RF_RAID_CLEAN) {
    549 						parity_good = RF_RAID_DIRTY;
    550 					}
    551 				}
    552 			} else {
    553 				/* Didn't find it at all!!
    554 				   Component must really be dead */
    555 				disks[r][c].status = rf_ds_failed;
    556 				sprintf(disks[r][c].devname,"component%d",
    557 					r * raidPtr->numCol + c);
    558 				numFailuresThisRow++;
    559 			}
    560 		}
    561 		/* XXX fix for n-fault tolerant */
    562 		/* XXX this should probably check to see how many failures
    563 		   we can handle for this configuration! */
    564 		if (numFailuresThisRow > 0)
    565 			raidPtr->status[r] = rf_rs_degraded;
    566 	}
    567 
    568 	/* close the device for the ones that didn't get used */
    569 
    570 	ac = auto_config;
    571 	while(ac!=NULL) {
    572 		if (ac->flag == 0) {
    573 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
    574 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
    575 			vput(ac->vp);
    576 			ac->vp = NULL;
    577 #if DEBUG
    578 			printf("Released %s from auto-config set.\n",
    579 			       ac->devname);
    580 #endif
    581 		}
    582 		ac = ac->next;
    583 	}
    584 
    585 	raidPtr->mod_counter = mod_counter;
    586 
    587 	/* note the state of the parity, if any */
    588 	raidPtr->parity_good = parity_good;
    589 	raidPtr->sectorsPerDisk = min_numblks;
    590 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    591 	raidPtr->bytesPerSector = bs;
    592 	raidPtr->sectorMask = bs - 1;
    593 	return (0);
    594 
    595 fail:
    596 
    597 	rf_UnconfigureVnodes( raidPtr );
    598 
    599 	return (ret);
    600 
    601 }
    602 
    603 /* configure a single disk in the array */
    604 int
    605 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
    606 	RF_Raid_t *raidPtr;
    607 	char   *buf;
    608 	RF_RaidDisk_t *diskPtr;
    609 	RF_RowCol_t row;
    610 	RF_RowCol_t col;
    611 {
    612 	char   *p;
    613 	int     retcode;
    614 
    615 	struct partinfo dpart;
    616 	struct vnode *vp;
    617 	struct vattr va;
    618 	struct proc *proc;
    619 	int     error;
    620 
    621 	retcode = 0;
    622 	p = rf_find_non_white(buf);
    623 	if (p[strlen(p) - 1] == '\n') {
    624 		/* strip off the newline */
    625 		p[strlen(p) - 1] = '\0';
    626 	}
    627 	(void) strcpy(diskPtr->devname, p);
    628 
    629 	proc = raidPtr->engine_thread;
    630 
    631 	/* Let's start by claiming the component is fine and well... */
    632 	diskPtr->status = rf_ds_optimal;
    633 
    634 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
    635 	raidPtr->raid_cinfo[row][col].ci_dev = NULL;
    636 
    637 	error = raidlookup(diskPtr->devname, proc, &vp);
    638 	if (error) {
    639 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    640 		if (error == ENXIO) {
    641 			/* the component isn't there... must be dead :-( */
    642 			diskPtr->status = rf_ds_failed;
    643 		} else {
    644 			return (error);
    645 		}
    646 	}
    647 	if (diskPtr->status == rf_ds_optimal) {
    648 
    649 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
    650 			return (error);
    651 		}
    652 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
    653 				  FREAD, proc->p_ucred, proc);
    654 		if (error) {
    655 			return (error);
    656 		}
    657 
    658 		diskPtr->blockSize = dpart.disklab->d_secsize;
    659 
    660 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    661 		diskPtr->partitionSize = dpart.part->p_size;
    662 
    663 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
    664 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
    665 
    666 		/* This component was not automatically configured */
    667 		diskPtr->auto_configured = 0;
    668 		diskPtr->dev = va.va_rdev;
    669 
    670 		/* we allow the user to specify that only a fraction of the
    671 		 * disks should be used this is just for debug:  it speeds up
    672 		 * the parity scan */
    673 		diskPtr->numBlocks = diskPtr->numBlocks *
    674 			rf_sizePercentage / 100;
    675 	}
    676 	return (0);
    677 }
    678 
    679 static void
    680 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
    681 	RF_Raid_t *raidPtr;
    682 	int row;
    683 	int column;
    684 	char *dev_name;
    685 	RF_ComponentLabel_t *ci_label;
    686 {
    687 
    688 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
    689 	       raidPtr->raidid, dev_name, row, column );
    690 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
    691 	       ci_label->row, ci_label->column,
    692 	       ci_label->num_rows, ci_label->num_columns);
    693 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    694 	       ci_label->version, ci_label->serial_number,
    695 	       ci_label->mod_counter);
    696 	printf("         Clean: %s Status: %d\n",
    697 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    698 }
    699 
    700 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
    701 				  serial_number, mod_counter )
    702 	RF_Raid_t *raidPtr;
    703 	int row;
    704 	int column;
    705 	char *dev_name;
    706 	RF_ComponentLabel_t *ci_label;
    707 	int serial_number;
    708 	int mod_counter;
    709 {
    710 	int fatal_error = 0;
    711 
    712 	if (serial_number != ci_label->serial_number) {
    713 		printf("%s has a different serial number: %d %d\n",
    714 		       dev_name, serial_number, ci_label->serial_number);
    715 		fatal_error = 1;
    716 	}
    717 	if (mod_counter != ci_label->mod_counter) {
    718 		printf("%s has a different modfication count: %d %d\n",
    719 		       dev_name, mod_counter, ci_label->mod_counter);
    720 	}
    721 
    722 	if (row != ci_label->row) {
    723 		printf("Row out of alignment for: %s\n", dev_name);
    724 		fatal_error = 1;
    725 	}
    726 	if (column != ci_label->column) {
    727 		printf("Column out of alignment for: %s\n", dev_name);
    728 		fatal_error = 1;
    729 	}
    730 	if (raidPtr->numRow != ci_label->num_rows) {
    731 		printf("Number of rows do not match for: %s\n", dev_name);
    732 		fatal_error = 1;
    733 	}
    734 	if (raidPtr->numCol != ci_label->num_columns) {
    735 		printf("Number of columns do not match for: %s\n", dev_name);
    736 		fatal_error = 1;
    737 	}
    738 	if (ci_label->clean == 0) {
    739 		/* it's not clean, but that's not fatal */
    740 		printf("%s is not clean!\n", dev_name);
    741 	}
    742 	return(fatal_error);
    743 }
    744 
    745 
    746 /*
    747 
    748    rf_CheckLabels() - check all the component labels for consistency.
    749    Return an error if there is anything major amiss.
    750 
    751  */
    752 
    753 int
    754 rf_CheckLabels( raidPtr, cfgPtr )
    755 	RF_Raid_t *raidPtr;
    756 	RF_Config_t *cfgPtr;
    757 {
    758 	int r,c;
    759 	char *dev_name;
    760 	RF_ComponentLabel_t *ci_label;
    761 	int serial_number = 0;
    762 	int mod_number = 0;
    763 	int fatal_error = 0;
    764 	int mod_values[4];
    765 	int mod_count[4];
    766 	int ser_values[4];
    767 	int ser_count[4];
    768 	int num_ser;
    769 	int num_mod;
    770 	int i;
    771 	int found;
    772 	int hosed_row;
    773 	int hosed_column;
    774 	int too_fatal;
    775 	int parity_good;
    776 	int force;
    777 
    778 	hosed_row = -1;
    779 	hosed_column = -1;
    780 	too_fatal = 0;
    781 	force = cfgPtr->force;
    782 
    783 	/*
    784 	   We're going to try to be a little intelligent here.  If one
    785 	   component's label is bogus, and we can identify that it's the
    786 	   *only* one that's gone, we'll mark it as "failed" and allow
    787 	   the configuration to proceed.  This will be the *only* case
    788 	   that we'll proceed if there would be (otherwise) fatal errors.
    789 
    790 	   Basically we simply keep a count of how many components had
    791 	   what serial number.  If all but one agree, we simply mark
    792 	   the disagreeing component as being failed, and allow
    793 	   things to come up "normally".
    794 
    795 	   We do this first for serial numbers, and then for "mod_counter".
    796 
    797 	 */
    798 
    799 	num_ser = 0;
    800 	num_mod = 0;
    801 	for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
    802 		for (c = 0; c < raidPtr->numCol; c++) {
    803 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    804 			found=0;
    805 			for(i=0;i<num_ser;i++) {
    806 				if (ser_values[i] == ci_label->serial_number) {
    807 					ser_count[i]++;
    808 					found=1;
    809 					break;
    810 				}
    811 			}
    812 			if (!found) {
    813 				ser_values[num_ser] = ci_label->serial_number;
    814 				ser_count[num_ser] = 1;
    815 				num_ser++;
    816 				if (num_ser>2) {
    817 					fatal_error = 1;
    818 					break;
    819 				}
    820 			}
    821 			found=0;
    822 			for(i=0;i<num_mod;i++) {
    823 				if (mod_values[i] == ci_label->mod_counter) {
    824 					mod_count[i]++;
    825 					found=1;
    826 					break;
    827 				}
    828 			}
    829 			if (!found) {
    830 			        mod_values[num_mod] = ci_label->mod_counter;
    831 				mod_count[num_mod] = 1;
    832 				num_mod++;
    833 				if (num_mod>2) {
    834 					fatal_error = 1;
    835 					break;
    836 				}
    837 			}
    838 		}
    839 	}
    840 #if DEBUG
    841 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    842 	for(i=0;i<num_ser;i++) {
    843 		printf("%d %d\n", ser_values[i], ser_count[i]);
    844 	}
    845 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    846 	for(i=0;i<num_mod;i++) {
    847 		printf("%d %d\n", mod_values[i], mod_count[i]);
    848 	}
    849 #endif
    850 	serial_number = ser_values[0];
    851 	if (num_ser == 2) {
    852 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    853 			/* Locate the maverick component */
    854 			if (ser_count[1] > ser_count[0]) {
    855 				serial_number = ser_values[1];
    856 			}
    857 			for (r = 0; r < raidPtr->numRow; r++) {
    858 				for (c = 0; c < raidPtr->numCol; c++) {
    859 				ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    860 					if (serial_number !=
    861 					    ci_label->serial_number) {
    862 						hosed_row = r;
    863 						hosed_column = c;
    864 						break;
    865 					}
    866 				}
    867 			}
    868 			printf("Hosed component: %s\n",
    869 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    870 			if (!force) {
    871 				/* we'll fail this component, as if there are
    872 				   other major errors, we arn't forcing things
    873 				   and we'll abort the config anyways */
    874 				raidPtr->Disks[hosed_row][hosed_column].status
    875 					= rf_ds_failed;
    876 				raidPtr->numFailures++;
    877 				raidPtr->status[hosed_row] = rf_rs_degraded;
    878 			}
    879 		} else {
    880 			too_fatal = 1;
    881 		}
    882 		if (cfgPtr->parityConfig == '0') {
    883 			/* We've identified two different serial numbers.
    884 			   RAID 0 can't cope with that, so we'll punt */
    885 			too_fatal = 1;
    886 		}
    887 
    888 	}
    889 
    890 	/* record the serial number for later.  If we bail later, setting
    891 	   this doesn't matter, otherwise we've got the best guess at the
    892 	   correct serial number */
    893 	raidPtr->serial_number = serial_number;
    894 
    895 	mod_number = mod_values[0];
    896 	if (num_mod == 2) {
    897 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    898 			/* Locate the maverick component */
    899 			if (mod_count[1] > mod_count[0]) {
    900 				mod_number = mod_values[1];
    901 			} else if (mod_count[1] < mod_count[0]) {
    902 				mod_number = mod_values[0];
    903 			} else {
    904 				/* counts of different modification values
    905 				   are the same.   Assume greater value is
    906 				   the correct one, all other things
    907 				   considered */
    908 				if (mod_values[0] > mod_values[1]) {
    909 					mod_number = mod_values[0];
    910 				} else {
    911 					mod_number = mod_values[1];
    912 				}
    913 
    914 			}
    915 			for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
    916 				for (c = 0; c < raidPtr->numCol; c++) {
    917 					ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    918 					if (mod_number !=
    919 					    ci_label->mod_counter) {
    920 						if ( ( hosed_row == r ) &&
    921 						     ( hosed_column == c )) {
    922 							/* same one.  Can
    923 							   deal with it.  */
    924 						} else {
    925 							hosed_row = r;
    926 							hosed_column = c;
    927 							if (num_ser != 1) {
    928 								too_fatal = 1;
    929 								break;
    930 							}
    931 						}
    932 					}
    933 				}
    934 			}
    935 			printf("Hosed component: %s\n",
    936 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
    937 			if (!force) {
    938 				/* we'll fail this component, as if there are
    939 				   other major errors, we arn't forcing things
    940 				   and we'll abort the config anyways */
    941 				if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
    942 					raidPtr->Disks[hosed_row][hosed_column].status
    943 						= rf_ds_failed;
    944 					raidPtr->numFailures++;
    945 					raidPtr->status[hosed_row] = rf_rs_degraded;
    946 				}
    947 			}
    948 		} else {
    949 			too_fatal = 1;
    950 		}
    951 		if (cfgPtr->parityConfig == '0') {
    952 			/* We've identified two different mod counters.
    953 			   RAID 0 can't cope with that, so we'll punt */
    954 			too_fatal = 1;
    955 		}
    956 	}
    957 
    958 	raidPtr->mod_counter = mod_number;
    959 
    960 	if (too_fatal) {
    961 		/* we've had both a serial number mismatch, and a mod_counter
    962 		   mismatch -- and they involved two different components!!
    963 		   Bail -- make things fail so that the user must force
    964 		   the issue... */
    965 		hosed_row = -1;
    966 		hosed_column = -1;
    967 	}
    968 
    969 	if (num_ser > 2) {
    970 		printf("raid%d: Too many different serial numbers!\n",
    971 		       raidPtr->raidid);
    972 	}
    973 
    974 	if (num_mod > 2) {
    975 		printf("raid%d: Too many different mod counters!\n",
    976 		       raidPtr->raidid);
    977 	}
    978 
    979 	/* we start by assuming the parity will be good, and flee from
    980 	   that notion at the slightest sign of trouble */
    981 
    982 	parity_good = RF_RAID_CLEAN;
    983 	for (r = 0; r < raidPtr->numRow; r++) {
    984 		for (c = 0; c < raidPtr->numCol; c++) {
    985 			dev_name = &cfgPtr->devnames[r][c][0];
    986 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
    987 
    988 			if ((r == hosed_row) && (c == hosed_column)) {
    989 				printf("raid%d: Ignoring %s\n",
    990 				       raidPtr->raidid, dev_name);
    991 			} else {
    992 				rf_print_label_status( raidPtr, r, c,
    993 						       dev_name, ci_label );
    994 				if (rf_check_label_vitals( raidPtr, r, c,
    995 							   dev_name, ci_label,
    996 							   serial_number,
    997 							   mod_number )) {
    998 					fatal_error = 1;
    999 				}
   1000 				if (ci_label->clean != RF_RAID_CLEAN) {
   1001 					parity_good = RF_RAID_DIRTY;
   1002 				}
   1003 			}
   1004 		}
   1005 	}
   1006 	if (fatal_error) {
   1007 		parity_good = RF_RAID_DIRTY;
   1008 	}
   1009 
   1010 	/* we note the state of the parity */
   1011 	raidPtr->parity_good = parity_good;
   1012 
   1013 	return(fatal_error);
   1014 }
   1015 
   1016 int
   1017 rf_add_hot_spare(raidPtr, sparePtr)
   1018 	RF_Raid_t *raidPtr;
   1019 	RF_SingleComponent_t *sparePtr;
   1020 {
   1021 	RF_RaidDisk_t *disks;
   1022 	RF_DiskQueue_t *spareQueues;
   1023 	int ret;
   1024 	unsigned int bs;
   1025 	int spare_number;
   1026 
   1027 	if (raidPtr->numSpare >= RF_MAXSPARE) {
   1028 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
   1029 		return(EINVAL);
   1030 	}
   1031 
   1032 	RF_LOCK_MUTEX(raidPtr->mutex);
   1033 
   1034 	/* the beginning of the spares... */
   1035 	disks = &raidPtr->Disks[0][raidPtr->numCol];
   1036 
   1037 	spare_number = raidPtr->numSpare;
   1038 
   1039 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
   1040 			       &disks[spare_number], 0,
   1041 			       raidPtr->numCol + spare_number);
   1042 
   1043 	if (ret)
   1044 		goto fail;
   1045 	if (disks[spare_number].status != rf_ds_optimal) {
   1046 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
   1047 			     sparePtr->component_name);
   1048 		ret=EINVAL;
   1049 		goto fail;
   1050 	} else {
   1051 		disks[spare_number].status = rf_ds_spare;
   1052 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
   1053 			 disks[spare_number].devname,
   1054 			 (long int) disks[spare_number].numBlocks,
   1055 			 disks[spare_number].blockSize,
   1056 			 (long int) disks[spare_number].numBlocks *
   1057 			 disks[spare_number].blockSize / 1024 / 1024);
   1058 	}
   1059 
   1060 
   1061 	/* check sizes and block sizes on the spare disk */
   1062 	bs = 1 << raidPtr->logBytesPerSector;
   1063 	if (disks[spare_number].blockSize != bs) {
   1064 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1065 		ret = EINVAL;
   1066 		goto fail;
   1067 	}
   1068 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1069 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
   1070 			     disks[spare_number].devname,
   1071 			     disks[spare_number].blockSize,
   1072 			     (long int) raidPtr->sectorsPerDisk);
   1073 		ret = EINVAL;
   1074 		goto fail;
   1075 	} else {
   1076 		if (disks[spare_number].numBlocks >
   1077 		    raidPtr->sectorsPerDisk) {
   1078 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
   1079 				     (long int) raidPtr->sectorsPerDisk);
   1080 
   1081 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1082 		}
   1083 	}
   1084 
   1085 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
   1086 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1087 				 0, raidPtr->numCol + spare_number,
   1088 				 raidPtr->qType,
   1089 				 raidPtr->sectorsPerDisk,
   1090 				 raidPtr->Disks[0][raidPtr->numCol +
   1091 						  spare_number].dev,
   1092 				 raidPtr->maxOutstanding,
   1093 				 &raidPtr->shutdownList,
   1094 				 raidPtr->cleanupList);
   1095 
   1096 
   1097 	raidPtr->numSpare++;
   1098 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1099 	return (0);
   1100 
   1101 fail:
   1102 	RF_UNLOCK_MUTEX(raidPtr->mutex);
   1103 	return(ret);
   1104 }
   1105 
   1106 int
   1107 rf_remove_hot_spare(raidPtr,sparePtr)
   1108 	RF_Raid_t *raidPtr;
   1109 	RF_SingleComponent_t *sparePtr;
   1110 {
   1111 	int spare_number;
   1112 
   1113 
   1114 	if (raidPtr->numSpare==0) {
   1115 		printf("No spares to remove!\n");
   1116 		return(EINVAL);
   1117 	}
   1118 
   1119 	spare_number = sparePtr->column;
   1120 
   1121 	return(EINVAL); /* XXX not implemented yet */
   1122 #if 0
   1123 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1124 		return(EINVAL);
   1125 	}
   1126 
   1127 	/* verify that this spare isn't in use... */
   1128 
   1129 
   1130 
   1131 
   1132 	/* it's gone.. */
   1133 
   1134 	raidPtr->numSpare--;
   1135 
   1136 	return(0);
   1137 #endif
   1138 }
   1139 
   1140 
   1141 int
   1142 rf_delete_component(raidPtr,component)
   1143 	RF_Raid_t *raidPtr;
   1144 	RF_SingleComponent_t *component;
   1145 {
   1146 	RF_RaidDisk_t *disks;
   1147 
   1148 	if ((component->row < 0) ||
   1149 	    (component->row >= raidPtr->numRow) ||
   1150 	    (component->column < 0) ||
   1151 	    (component->column >= raidPtr->numCol)) {
   1152 		return(EINVAL);
   1153 	}
   1154 
   1155 	disks = &raidPtr->Disks[component->row][component->column];
   1156 
   1157 	/* 1. This component must be marked as 'failed' */
   1158 
   1159 	return(EINVAL); /* Not implemented yet. */
   1160 }
   1161 
   1162 int
   1163 rf_incorporate_hot_spare(raidPtr,component)
   1164 	RF_Raid_t *raidPtr;
   1165 	RF_SingleComponent_t *component;
   1166 {
   1167 
   1168 	/* Issues here include how to 'move' this in if there is IO
   1169 	   taking place (e.g. component queues and such) */
   1170 
   1171 	return(EINVAL); /* Not implemented yet. */
   1172 }
   1173