Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.83.2.3
      1 /*	$NetBSD: rf_disks.c,v 1.83.2.3 2017/12/03 11:37:31 jdolecek Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Copyright (c) 1995 Carnegie-Mellon University.
     33  * All rights reserved.
     34  *
     35  * Author: Mark Holland
     36  *
     37  * Permission to use, copy, modify and distribute this software and
     38  * its documentation is hereby granted, provided that both the copyright
     39  * notice and this permission notice appear in all copies of the
     40  * software, derivative works or modified versions, and any portions
     41  * thereof, and that both notices appear in supporting documentation.
     42  *
     43  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     44  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     45  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     46  *
     47  * Carnegie Mellon requests users of this software to return to
     48  *
     49  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     50  *  School of Computer Science
     51  *  Carnegie Mellon University
     52  *  Pittsburgh PA 15213-3890
     53  *
     54  * any improvements or extensions that they make and grant Carnegie the
     55  * rights to redistribute these changes.
     56  */
     57 
     58 /***************************************************************
     59  * rf_disks.c -- code to perform operations on the actual disks
     60  ***************************************************************/
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.83.2.3 2017/12/03 11:37:31 jdolecek Exp $");
     64 
     65 #include <dev/raidframe/raidframevar.h>
     66 
     67 #include "rf_raid.h"
     68 #include "rf_alloclist.h"
     69 #include "rf_utils.h"
     70 #include "rf_general.h"
     71 #include "rf_options.h"
     72 #include "rf_kintf.h"
     73 #include "rf_netbsd.h"
     74 
     75 #include <sys/param.h>
     76 #include <sys/systm.h>
     77 #include <sys/proc.h>
     78 #include <sys/ioctl.h>
     79 #include <sys/fcntl.h>
     80 #include <sys/vnode.h>
     81 #include <sys/namei.h> /* for pathbuf */
     82 #include <sys/kauth.h>
     83 #include <sys/atomic.h>
     84 #include <sys/disk.h>
     85 
     86 #include <miscfs/specfs/specdev.h> /* for v_rdev */
     87 
     88 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     89 static void rf_print_label_status( RF_Raid_t *, int, char *,
     90 				  RF_ComponentLabel_t *);
     91 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     92 				  RF_ComponentLabel_t *, int, int );
     93 
     94 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     95 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     96 
     97 /**************************************************************************
     98  *
     99  * initialize the disks comprising the array
    100  *
    101  * We want the spare disks to have regular row,col numbers so that we can
    102  * easily substitue a spare for a failed disk.  But, the driver code assumes
    103  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    104  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    105  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    106  * rest, and put all the spares in it.  This probably needs to get changed
    107  * eventually.
    108  *
    109  **************************************************************************/
    110 
    111 int
    112 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    113 		  RF_Config_t *cfgPtr)
    114 {
    115 	RF_RaidDisk_t *disks;
    116 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    117 	RF_RowCol_t c;
    118 	int bs, ret;
    119 	unsigned i, count, foundone = 0, numFailuresThisRow;
    120 	int force;
    121 
    122 	force = cfgPtr->force;
    123 
    124 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    125 	if (ret)
    126 		goto fail;
    127 
    128 	disks = raidPtr->Disks;
    129 
    130 	numFailuresThisRow = 0;
    131 	for (c = 0; c < raidPtr->numCol; c++) {
    132 		ret = rf_ConfigureDisk(raidPtr,
    133 				       &cfgPtr->devnames[0][c][0],
    134 				       &disks[c], c);
    135 
    136 		if (ret)
    137 			goto fail;
    138 
    139 		if (disks[c].status == rf_ds_optimal) {
    140 			ret = raidfetch_component_label(raidPtr, c);
    141 			if (ret)
    142 				goto fail;
    143 
    144 			/* mark it as failed if the label looks bogus... */
    145 			if (!rf_reasonable_label(&raidPtr->raid_cinfo[c].ci_label,0) && !force) {
    146 				disks[c].status = rf_ds_failed;
    147 			}
    148 		}
    149 
    150 		if (disks[c].status != rf_ds_optimal) {
    151 			numFailuresThisRow++;
    152 		} else {
    153 			if (disks[c].numBlocks < min_numblks)
    154 				min_numblks = disks[c].numBlocks;
    155 			DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
    156 				 c, disks[c].devname,
    157 				 disks[c].numBlocks,
    158 				 disks[c].blockSize,
    159 				 (long int) disks[c].numBlocks *
    160 				 disks[c].blockSize / 1024 / 1024);
    161 		}
    162 	}
    163 	/* XXX fix for n-fault tolerant */
    164 	/* XXX this should probably check to see how many failures
    165 	   we can handle for this configuration! */
    166 	if (numFailuresThisRow > 0)
    167 		raidPtr->status = rf_rs_degraded;
    168 
    169 	/* all disks must be the same size & have the same block size, bs must
    170 	 * be a power of 2 */
    171 	bs = 0;
    172 	foundone = 0;
    173 	for (c = 0; c < raidPtr->numCol; c++) {
    174 		if (disks[c].status == rf_ds_optimal) {
    175 			bs = disks[c].blockSize;
    176 			foundone = 1;
    177 			break;
    178 		}
    179 	}
    180 	if (!foundone) {
    181 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    182 		ret = EINVAL;
    183 		goto fail;
    184 	}
    185 	for (count = 0, i = 1; i; i <<= 1)
    186 		if (bs & i)
    187 			count++;
    188 	if (count != 1) {
    189 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    190 		ret = EINVAL;
    191 		goto fail;
    192 	}
    193 
    194 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    195 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    196 		if (force != 0) {
    197 			printf("raid%d: Fatal errors being ignored.\n",
    198 			       raidPtr->raidid);
    199 		} else {
    200 			ret = EINVAL;
    201 			goto fail;
    202 		}
    203 	}
    204 
    205 	for (c = 0; c < raidPtr->numCol; c++) {
    206 		if (disks[c].status == rf_ds_optimal) {
    207 			if (disks[c].blockSize != bs) {
    208 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
    209 				ret = EINVAL;
    210 				goto fail;
    211 			}
    212 			if (disks[c].numBlocks != min_numblks) {
    213 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
    214 					     c, (int) min_numblks);
    215 				disks[c].numBlocks = min_numblks;
    216 			}
    217 		}
    218 	}
    219 
    220 	raidPtr->sectorsPerDisk = min_numblks;
    221 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    222 	raidPtr->bytesPerSector = bs;
    223 	raidPtr->sectorMask = bs - 1;
    224 	return (0);
    225 
    226 fail:
    227 
    228 	rf_UnconfigureVnodes( raidPtr );
    229 
    230 	return (ret);
    231 }
    232 
    233 
    234 /****************************************************************************
    235  * set up the data structures describing the spare disks in the array
    236  * recall from the above comment that the spare disk descriptors are stored
    237  * in row zero, which is specially expanded to hold them.
    238  ****************************************************************************/
    239 int
    240 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    241 		       RF_Config_t *cfgPtr)
    242 {
    243 	int     i, ret;
    244 	unsigned int bs;
    245 	RF_RaidDisk_t *disks;
    246 	int     num_spares_done;
    247 
    248 	num_spares_done = 0;
    249 
    250 	/* The space for the spares should have already been allocated by
    251 	 * ConfigureDisks() */
    252 
    253 	disks = &raidPtr->Disks[raidPtr->numCol];
    254 	for (i = 0; i < raidPtr->numSpare; i++) {
    255 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    256 				       &disks[i], raidPtr->numCol + i);
    257 		if (ret)
    258 			goto fail;
    259 		if (disks[i].status != rf_ds_optimal) {
    260 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    261 				     &cfgPtr->spare_names[i][0]);
    262 		} else {
    263 			disks[i].status = rf_ds_spare;	/* change status to
    264 							 * spare */
    265 			DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
    266 			    disks[i].devname,
    267 			    disks[i].numBlocks, disks[i].blockSize,
    268 			    (long int) disks[i].numBlocks *
    269 				 disks[i].blockSize / 1024 / 1024);
    270 		}
    271 		num_spares_done++;
    272 	}
    273 
    274 	/* check sizes and block sizes on spare disks */
    275 	bs = 1 << raidPtr->logBytesPerSector;
    276 	for (i = 0; i < raidPtr->numSpare; i++) {
    277 		if (disks[i].blockSize != bs) {
    278 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    279 			ret = EINVAL;
    280 			goto fail;
    281 		}
    282 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    283 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
    284 				     disks[i].devname, disks[i].blockSize,
    285 				     raidPtr->sectorsPerDisk);
    286 			ret = EINVAL;
    287 			goto fail;
    288 		} else
    289 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    290 				RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
    291 				    disks[i].devname,
    292 				    raidPtr->sectorsPerDisk,
    293 				    disks[i].numBlocks);
    294 
    295 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    296 			}
    297 	}
    298 
    299 	return (0);
    300 
    301 fail:
    302 
    303 	/* Release the hold on the main components.  We've failed to allocate
    304 	 * a spare, and since we're failing, we need to free things..
    305 
    306 	 XXX failing to allocate a spare is *not* that big of a deal...
    307 	 We *can* survive without it, if need be, esp. if we get hot
    308 	 adding working.
    309 
    310 	 If we don't fail out here, then we need a way to remove this spare...
    311 	 that should be easier to do here than if we are "live"...
    312 
    313 	 */
    314 
    315 	rf_UnconfigureVnodes( raidPtr );
    316 
    317 	return (ret);
    318 }
    319 
    320 static int
    321 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    322 {
    323 	int ret;
    324 
    325 	/* We allocate RF_MAXSPARE on the first row so that we
    326 	   have room to do hot-swapping of spares */
    327 	RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
    328 			sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
    329 			raidPtr->cleanupList);
    330 	if (raidPtr->Disks == NULL) {
    331 		ret = ENOMEM;
    332 		goto fail;
    333 	}
    334 
    335 	/* get space for device specific stuff.. */
    336 	RF_MallocAndAdd(raidPtr->raid_cinfo,
    337 			(raidPtr->numCol + RF_MAXSPARE) *
    338 			sizeof(struct raidcinfo), (struct raidcinfo *),
    339 			raidPtr->cleanupList);
    340 
    341 	if (raidPtr->raid_cinfo == NULL) {
    342 		ret = ENOMEM;
    343 		goto fail;
    344 	}
    345 
    346 	return(0);
    347 fail:
    348 	rf_UnconfigureVnodes( raidPtr );
    349 
    350 	return(ret);
    351 }
    352 
    353 
    354 /* configure a single disk during auto-configuration at boot */
    355 int
    356 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
    357 		      RF_AutoConfig_t *auto_config)
    358 {
    359 	RF_RaidDisk_t *disks;
    360 	RF_RaidDisk_t *diskPtr;
    361 	RF_RowCol_t c;
    362 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    363 	int bs, ret;
    364 	int numFailuresThisRow;
    365 	RF_AutoConfig_t *ac;
    366 	int parity_good;
    367 	int mod_counter;
    368 	int mod_counter_found;
    369 
    370 #if DEBUG
    371 	printf("Starting autoconfiguration of RAID set...\n");
    372 #endif
    373 
    374 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    375 	if (ret)
    376 		goto fail;
    377 
    378 	disks = raidPtr->Disks;
    379 
    380 	/* assume the parity will be fine.. */
    381 	parity_good = RF_RAID_CLEAN;
    382 
    383 	/* Check for mod_counters that are too low */
    384 	mod_counter_found = 0;
    385 	mod_counter = 0;
    386 	ac = auto_config;
    387 	while(ac!=NULL) {
    388 		if (mod_counter_found==0) {
    389 			mod_counter = ac->clabel->mod_counter;
    390 			mod_counter_found = 1;
    391 		} else {
    392 			if (ac->clabel->mod_counter > mod_counter) {
    393 				mod_counter = ac->clabel->mod_counter;
    394 			}
    395 		}
    396 		ac->flag = 0; /* clear the general purpose flag */
    397 		ac = ac->next;
    398 	}
    399 
    400 	bs = 0;
    401 
    402 	numFailuresThisRow = 0;
    403 	for (c = 0; c < raidPtr->numCol; c++) {
    404 		diskPtr = &disks[c];
    405 
    406 		/* find this row/col in the autoconfig */
    407 #if DEBUG
    408 		printf("Looking for %d in autoconfig\n",c);
    409 #endif
    410 		ac = auto_config;
    411 		while(ac!=NULL) {
    412 			if (ac->clabel==NULL) {
    413 				/* big-time bad news. */
    414 				goto fail;
    415 			}
    416 			if ((ac->clabel->column == c) &&
    417 			    (ac->clabel->mod_counter == mod_counter)) {
    418 				/* it's this one... */
    419 				/* flag it as 'used', so we don't
    420 				   free it later. */
    421 				ac->flag = 1;
    422 #if DEBUG
    423 				printf("Found: %s at %d\n",
    424 				       ac->devname,c);
    425 #endif
    426 
    427 				break;
    428 			}
    429 			ac=ac->next;
    430 		}
    431 
    432 		if (ac==NULL) {
    433 			/* we didn't find an exact match with a
    434 			   correct mod_counter above... can we find
    435 			   one with an incorrect mod_counter to use
    436 			   instead?  (this one, if we find it, will be
    437 			   marked as failed once the set configures)
    438 			*/
    439 
    440 			ac = auto_config;
    441 			while(ac!=NULL) {
    442 				if (ac->clabel==NULL) {
    443 					/* big-time bad news. */
    444 					goto fail;
    445 				}
    446 				if (ac->clabel->column == c) {
    447 					/* it's this one...
    448 					   flag it as 'used', so we
    449 					   don't free it later. */
    450 					ac->flag = 1;
    451 #if DEBUG
    452 					printf("Found(low mod_counter): %s at %d\n",
    453 					       ac->devname,c);
    454 #endif
    455 
    456 					break;
    457 				}
    458 				ac=ac->next;
    459 			}
    460 		}
    461 
    462 
    463 
    464 		if (ac!=NULL) {
    465 			/* Found it.  Configure it.. */
    466 			diskPtr->blockSize = ac->clabel->blockSize;
    467 			diskPtr->numBlocks =
    468 			    rf_component_label_numblocks(ac->clabel);
    469 			/* Note: rf_protectedSectors is already
    470 			   factored into numBlocks here */
    471 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
    472 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
    473 
    474 			memcpy(raidget_component_label(raidPtr, c),
    475 			    ac->clabel, sizeof(*ac->clabel));
    476 			snprintf(diskPtr->devname, sizeof(diskPtr->devname),
    477 			    "/dev/%s", ac->devname);
    478 
    479 			/* note the fact that this component was
    480 			   autoconfigured.  You'll need this info
    481 			   later.  Trust me :) */
    482 			diskPtr->auto_configured = 1;
    483 			diskPtr->dev = ac->dev;
    484 
    485 			/*
    486 			 * we allow the user to specify that
    487 			 * only a fraction of the disks should
    488 			 * be used this is just for debug: it
    489 			 * speeds up the parity scan
    490 			 */
    491 
    492 			diskPtr->numBlocks = diskPtr->numBlocks *
    493 				rf_sizePercentage / 100;
    494 
    495 			/* XXX these will get set multiple times,
    496 			   but since we're autoconfiguring, they'd
    497 			   better be always the same each time!
    498 			   If not, this is the least of your worries */
    499 
    500 			bs = diskPtr->blockSize;
    501 			min_numblks = diskPtr->numBlocks;
    502 
    503 			/* this gets done multiple times, but that's
    504 			   fine -- the serial number will be the same
    505 			   for all components, guaranteed */
    506 			raidPtr->serial_number = ac->clabel->serial_number;
    507 			/* check the last time the label was modified */
    508 
    509 			if (ac->clabel->mod_counter != mod_counter) {
    510 				/* Even though we've filled in all of
    511 				   the above, we don't trust this
    512 				   component since its modification
    513 				   counter is not in sync with the
    514 				   rest, and we really consider it to
    515 				   be failed.  */
    516 				disks[c].status = rf_ds_failed;
    517 				numFailuresThisRow++;
    518 			} else {
    519 				if (ac->clabel->clean != RF_RAID_CLEAN) {
    520 					parity_good = RF_RAID_DIRTY;
    521 				}
    522 			}
    523 		} else {
    524 			/* Didn't find it at all!!  Component must
    525 			   really be dead */
    526 			disks[c].status = rf_ds_failed;
    527 			snprintf(disks[c].devname, sizeof(disks[c].devname),
    528 			    "component%d", c);
    529 			numFailuresThisRow++;
    530 		}
    531 	}
    532 	/* XXX fix for n-fault tolerant */
    533 	/* XXX this should probably check to see how many failures
    534 	   we can handle for this configuration! */
    535 	if (numFailuresThisRow > 0) {
    536 		raidPtr->status = rf_rs_degraded;
    537 		raidPtr->numFailures = numFailuresThisRow;
    538 	}
    539 
    540 	/* close the device for the ones that didn't get used */
    541 
    542 	ac = auto_config;
    543 	while(ac!=NULL) {
    544 		if (ac->flag == 0) {
    545 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
    546 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
    547 			vput(ac->vp);
    548 			ac->vp = NULL;
    549 #if DEBUG
    550 			printf("Released %s from auto-config set.\n",
    551 			       ac->devname);
    552 #endif
    553 		}
    554 		ac = ac->next;
    555 	}
    556 
    557 	raidPtr->mod_counter = mod_counter;
    558 
    559 	/* note the state of the parity, if any */
    560 	raidPtr->parity_good = parity_good;
    561 	raidPtr->sectorsPerDisk = min_numblks;
    562 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    563 	raidPtr->bytesPerSector = bs;
    564 	raidPtr->sectorMask = bs - 1;
    565 	return (0);
    566 
    567 fail:
    568 
    569 	rf_UnconfigureVnodes( raidPtr );
    570 
    571 	return (ret);
    572 
    573 }
    574 
    575 /* configure a single disk in the array */
    576 int
    577 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
    578 		 RF_RowCol_t col)
    579 {
    580 	char   *p;
    581 	struct pathbuf *pb;
    582 	struct vnode *vp;
    583 	int     error;
    584 
    585 	p = rf_find_non_white(bf);
    586 	if (p[strlen(p) - 1] == '\n') {
    587 		/* strip off the newline */
    588 		p[strlen(p) - 1] = '\0';
    589 	}
    590 	(void) strcpy(diskPtr->devname, p);
    591 
    592 	/* Let's start by claiming the component is fine and well... */
    593 	diskPtr->status = rf_ds_optimal;
    594 
    595 	raidPtr->raid_cinfo[col].ci_vp = NULL;
    596 	raidPtr->raid_cinfo[col].ci_dev = 0;
    597 
    598 	if (!strcmp("absent", diskPtr->devname)) {
    599 		printf("Ignoring missing component at column %d\n", col);
    600 		snprintf(diskPtr->devname, sizeof(diskPtr->devname),
    601 		    "component%d", col);
    602 		diskPtr->status = rf_ds_failed;
    603 		return (0);
    604 	}
    605 
    606 	pb = pathbuf_create(diskPtr->devname);
    607 	if (pb == NULL) {
    608 		printf("pathbuf_create for device: %s failed!\n",
    609 		       diskPtr->devname);
    610 		return ENOMEM;
    611 	}
    612 	error = dk_lookup(pb, curlwp, &vp);
    613 	pathbuf_destroy(pb);
    614 	if (error) {
    615 		printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
    616 		if (error == ENXIO) {
    617 			/* the component isn't there... must be dead :-( */
    618 			diskPtr->status = rf_ds_failed;
    619 			return 0;
    620 		} else {
    621 			return (error);
    622 		}
    623 	}
    624 
    625 	if ((error = rf_getdisksize(vp, diskPtr)) != 0)
    626 		return (error);
    627 
    628 	/*
    629 	 * If this raidPtr's bytesPerSector is zero, fill it in with this
    630 	 * components blockSize.  This will give us something to work with
    631 	 * initially, and if it is wrong, we'll get errors later.
    632 	 */
    633 	if (raidPtr->bytesPerSector == 0)
    634 		raidPtr->bytesPerSector = diskPtr->blockSize;
    635 
    636 	if (diskPtr->status == rf_ds_optimal) {
    637 		raidPtr->raid_cinfo[col].ci_vp = vp;
    638 		raidPtr->raid_cinfo[col].ci_dev = vp->v_rdev;
    639 
    640 		/* This component was not automatically configured */
    641 		diskPtr->auto_configured = 0;
    642 		diskPtr->dev = vp->v_rdev;
    643 
    644 		/* we allow the user to specify that only a fraction of the
    645 		 * disks should be used this is just for debug:  it speeds up
    646 		 * the parity scan */
    647 		diskPtr->numBlocks = diskPtr->numBlocks *
    648 			rf_sizePercentage / 100;
    649 	}
    650 
    651 	/*
    652 	 * Tell the rest of the kernel to check whether anything's
    653 	 * maximum transfer size has changed -- like, for example,
    654 	 * a filesystem that might be mounted on a set where we're
    655 	 * adding a spare with a smaller maximum transfer size than
    656 	 * the original set members.
    657 	 */
    658 	atomic_inc_uint(&disk_serial);
    659 	return (0);
    660 }
    661 
    662 static void
    663 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
    664 		      RF_ComponentLabel_t *ci_label)
    665 {
    666 
    667 	printf("raid%d: Component %s being configured at col: %d\n",
    668 	       raidPtr->raidid, dev_name, column );
    669 	printf("         Column: %d Num Columns: %d\n",
    670 	       ci_label->column,
    671 	       ci_label->num_columns);
    672 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    673 	       ci_label->version, ci_label->serial_number,
    674 	       ci_label->mod_counter);
    675 	printf("         Clean: %s Status: %d\n",
    676 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    677 }
    678 
    679 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
    680 				 char *dev_name, RF_ComponentLabel_t *ci_label,
    681 				 int serial_number, int mod_counter)
    682 {
    683 	int fatal_error = 0;
    684 
    685 	if (serial_number != ci_label->serial_number) {
    686 		printf("%s has a different serial number: %d %d\n",
    687 		       dev_name, serial_number, ci_label->serial_number);
    688 		fatal_error = 1;
    689 	}
    690 	if (mod_counter != ci_label->mod_counter) {
    691 		printf("%s has a different modification count: %d %d\n",
    692 		       dev_name, mod_counter, ci_label->mod_counter);
    693 	}
    694 
    695 	if (row != ci_label->row) {
    696 		printf("Row out of alignment for: %s\n", dev_name);
    697 		fatal_error = 1;
    698 	}
    699 	if (column != ci_label->column) {
    700 		printf("Column out of alignment for: %s\n", dev_name);
    701 		fatal_error = 1;
    702 	}
    703 	if (raidPtr->numCol != ci_label->num_columns) {
    704 		printf("Number of columns do not match for: %s\n", dev_name);
    705 		fatal_error = 1;
    706 	}
    707 	if (ci_label->clean == 0) {
    708 		/* it's not clean, but that's not fatal */
    709 		printf("%s is not clean!\n", dev_name);
    710 	}
    711 	return(fatal_error);
    712 }
    713 
    714 
    715 static void
    716 rf_handle_hosed(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, int hosed_column,
    717     int again)
    718 {
    719 	printf("Hosed component: %s\n", &cfgPtr->devnames[0][hosed_column][0]);
    720 	if (!cfgPtr->force)
    721 		return;
    722 
    723 	/* we'll fail this component, as if there are
    724 	   other major errors, we aren't forcing things
    725 	   and we'll abort the config anyways */
    726 	if (again && raidPtr->Disks[hosed_column].status == rf_ds_failed)
    727 		return;
    728 
    729 	raidPtr->Disks[hosed_column].status = rf_ds_failed;
    730 	raidPtr->numFailures++;
    731 	raidPtr->status = rf_rs_degraded;
    732 }
    733 
    734 /*
    735 
    736    rf_CheckLabels() - check all the component labels for consistency.
    737    Return an error if there is anything major amiss.
    738 
    739  */
    740 
    741 int
    742 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    743 {
    744 	int c;
    745 	char *dev_name;
    746 	RF_ComponentLabel_t *ci_label;
    747 	int serial_number = 0;
    748 	int mod_number = 0;
    749 	int fatal_error = 0;
    750 	int mod_values[4];
    751 	int mod_count[4];
    752 	int ser_values[4];
    753 	int ser_count[4];
    754 	int num_ser;
    755 	int num_mod;
    756 	int i;
    757 	int found;
    758 	int hosed_column;
    759 	int too_fatal;
    760 	int parity_good;
    761 
    762 	hosed_column = -1;
    763 	too_fatal = 0;
    764 
    765 	/*
    766 	   We're going to try to be a little intelligent here.  If one
    767 	   component's label is bogus, and we can identify that it's the
    768 	   *only* one that's gone, we'll mark it as "failed" and allow
    769 	   the configuration to proceed.  This will be the *only* case
    770 	   that we'll proceed if there would be (otherwise) fatal errors.
    771 
    772 	   Basically we simply keep a count of how many components had
    773 	   what serial number.  If all but one agree, we simply mark
    774 	   the disagreeing component as being failed, and allow
    775 	   things to come up "normally".
    776 
    777 	   We do this first for serial numbers, and then for "mod_counter".
    778 
    779 	 */
    780 
    781 	num_ser = 0;
    782 	num_mod = 0;
    783 
    784 	ser_values[0] = ser_values[1] = ser_values[2] = ser_values[3] = 0;
    785 	ser_count[0] = ser_count[1] = ser_count[2] = ser_count[3] = 0;
    786 	mod_values[0] = mod_values[1] = mod_values[2] = mod_values[3] = 0;
    787 	mod_count[0] = mod_count[1] = mod_count[2] = mod_count[3] = 0;
    788 
    789 	for (c = 0; c < raidPtr->numCol; c++) {
    790 		if (raidPtr->Disks[c].status != rf_ds_optimal)
    791 			continue;
    792 		ci_label = raidget_component_label(raidPtr, c);
    793 		found=0;
    794 		for(i=0;i<num_ser;i++) {
    795 			if (ser_values[i] == ci_label->serial_number) {
    796 				ser_count[i]++;
    797 				found=1;
    798 				break;
    799 			}
    800 		}
    801 		if (!found) {
    802 			ser_values[num_ser] = ci_label->serial_number;
    803 			ser_count[num_ser] = 1;
    804 			num_ser++;
    805 			if (num_ser>2) {
    806 				fatal_error = 1;
    807 				break;
    808 			}
    809 		}
    810 		found=0;
    811 		for(i=0;i<num_mod;i++) {
    812 			if (mod_values[i] == ci_label->mod_counter) {
    813 				mod_count[i]++;
    814 				found=1;
    815 				break;
    816 			}
    817 		}
    818 		if (!found) {
    819 			mod_values[num_mod] = ci_label->mod_counter;
    820 			mod_count[num_mod] = 1;
    821 			num_mod++;
    822 			if (num_mod>2) {
    823 				fatal_error = 1;
    824 				break;
    825 			}
    826 		}
    827 	}
    828 #if DEBUG
    829 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    830 	for(i=0;i<num_ser;i++) {
    831 		printf("%d %d\n", ser_values[i], ser_count[i]);
    832 	}
    833 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    834 	for(i=0;i<num_mod;i++) {
    835 		printf("%d %d\n", mod_values[i], mod_count[i]);
    836 	}
    837 #endif
    838 	serial_number = ser_values[0];
    839 	if (num_ser == 2) {
    840 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    841 			/* Locate the maverick component */
    842 			if (ser_count[1] > ser_count[0]) {
    843 				serial_number = ser_values[1];
    844 			}
    845 
    846 			for (c = 0; c < raidPtr->numCol; c++) {
    847 				if (raidPtr->Disks[c].status != rf_ds_optimal)
    848 					continue;
    849 				ci_label = raidget_component_label(raidPtr, c);
    850 				if (serial_number != ci_label->serial_number) {
    851 					hosed_column = c;
    852 					break;
    853 				}
    854 			}
    855 			if (hosed_column != -1)
    856 				rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
    857 				    0);
    858 		} else {
    859 			too_fatal = 1;
    860 		}
    861 		if (cfgPtr->parityConfig == '0') {
    862 			/* We've identified two different serial numbers.
    863 			   RAID 0 can't cope with that, so we'll punt */
    864 			too_fatal = 1;
    865 		}
    866 
    867 	}
    868 
    869 	/* record the serial number for later.  If we bail later, setting
    870 	   this doesn't matter, otherwise we've got the best guess at the
    871 	   correct serial number */
    872 	raidPtr->serial_number = serial_number;
    873 
    874 	mod_number = mod_values[0];
    875 	if (num_mod == 2) {
    876 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    877 			/* Locate the maverick component */
    878 			if (mod_count[1] > mod_count[0]) {
    879 				mod_number = mod_values[1];
    880 			} else if (mod_count[1] < mod_count[0]) {
    881 				mod_number = mod_values[0];
    882 			} else {
    883 				/* counts of different modification values
    884 				   are the same.   Assume greater value is
    885 				   the correct one, all other things
    886 				   considered */
    887 				if (mod_values[0] > mod_values[1]) {
    888 					mod_number = mod_values[0];
    889 				} else {
    890 					mod_number = mod_values[1];
    891 				}
    892 
    893 			}
    894 
    895 			for (c = 0; c < raidPtr->numCol; c++) {
    896 				if (raidPtr->Disks[c].status != rf_ds_optimal)
    897 					continue;
    898 
    899 				ci_label = raidget_component_label(raidPtr, c);
    900 				if (mod_number != ci_label->mod_counter) {
    901 					if (hosed_column == c) {
    902 						/* same one.  Can
    903 						   deal with it.  */
    904 					} else {
    905 						hosed_column = c;
    906 						if (num_ser != 1) {
    907 							too_fatal = 1;
    908 							break;
    909 						}
    910 					}
    911 				}
    912 			}
    913 			if (hosed_column != -1)
    914 				rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
    915 				    1);
    916 		} else {
    917 			too_fatal = 1;
    918 		}
    919 		if (cfgPtr->parityConfig == '0') {
    920 			/* We've identified two different mod counters.
    921 			   RAID 0 can't cope with that, so we'll punt */
    922 			too_fatal = 1;
    923 		}
    924 	}
    925 
    926 	raidPtr->mod_counter = mod_number;
    927 
    928 	if (too_fatal) {
    929 		/* we've had both a serial number mismatch, and a mod_counter
    930 		   mismatch -- and they involved two different components!!
    931 		   Bail -- make things fail so that the user must force
    932 		   the issue... */
    933 		hosed_column = -1;
    934 		fatal_error = 1;
    935 	}
    936 
    937 	if (num_ser > 2) {
    938 		printf("raid%d: Too many different serial numbers!\n",
    939 		       raidPtr->raidid);
    940 		fatal_error = 1;
    941 	}
    942 
    943 	if (num_mod > 2) {
    944 		printf("raid%d: Too many different mod counters!\n",
    945 		       raidPtr->raidid);
    946 		fatal_error = 1;
    947 	}
    948 
    949         for (c = 0; c < raidPtr->numCol; c++) {
    950 		if (raidPtr->Disks[c].status != rf_ds_optimal) {
    951 			hosed_column = c;
    952 			break;
    953 		}
    954 	}
    955 
    956 	/* we start by assuming the parity will be good, and flee from
    957 	   that notion at the slightest sign of trouble */
    958 
    959 	parity_good = RF_RAID_CLEAN;
    960 
    961 	for (c = 0; c < raidPtr->numCol; c++) {
    962 		dev_name = &cfgPtr->devnames[0][c][0];
    963 		ci_label = raidget_component_label(raidPtr, c);
    964 
    965 		if (c == hosed_column) {
    966 			printf("raid%d: Ignoring %s\n",
    967 			       raidPtr->raidid, dev_name);
    968 		} else {
    969 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
    970 			if (rf_check_label_vitals( raidPtr, 0, c,
    971 						   dev_name, ci_label,
    972 						   serial_number,
    973 						   mod_number )) {
    974 				fatal_error = 1;
    975 			}
    976 			if (ci_label->clean != RF_RAID_CLEAN) {
    977 				parity_good = RF_RAID_DIRTY;
    978 			}
    979 		}
    980 	}
    981 
    982 	if (fatal_error) {
    983 		parity_good = RF_RAID_DIRTY;
    984 	}
    985 
    986 	/* we note the state of the parity */
    987 	raidPtr->parity_good = parity_good;
    988 
    989 	return(fatal_error);
    990 }
    991 
    992 int
    993 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
    994 {
    995 	RF_RaidDisk_t *disks;
    996 	RF_DiskQueue_t *spareQueues;
    997 	int ret;
    998 	unsigned int bs;
    999 	int spare_number;
   1000 
   1001 	ret=0;
   1002 
   1003 	if (raidPtr->numSpare >= RF_MAXSPARE) {
   1004 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
   1005 		return(EINVAL);
   1006 	}
   1007 
   1008 	rf_lock_mutex2(raidPtr->mutex);
   1009 	while (raidPtr->adding_hot_spare == 1) {
   1010 		rf_wait_cond2(raidPtr->adding_hot_spare_cv, raidPtr->mutex);
   1011 	}
   1012 	raidPtr->adding_hot_spare = 1;
   1013 	rf_unlock_mutex2(raidPtr->mutex);
   1014 
   1015 	/* the beginning of the spares... */
   1016 	disks = &raidPtr->Disks[raidPtr->numCol];
   1017 
   1018 	spare_number = raidPtr->numSpare;
   1019 
   1020 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
   1021 			       &disks[spare_number],
   1022 			       raidPtr->numCol + spare_number);
   1023 
   1024 	if (ret)
   1025 		goto fail;
   1026 	if (disks[spare_number].status != rf_ds_optimal) {
   1027 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
   1028 			     sparePtr->component_name);
   1029 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1030 		ret=EINVAL;
   1031 		goto fail;
   1032 	} else {
   1033 		disks[spare_number].status = rf_ds_spare;
   1034 		DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
   1035 			 spare_number,
   1036 			 disks[spare_number].devname,
   1037 			 disks[spare_number].numBlocks,
   1038 			 disks[spare_number].blockSize,
   1039 			 (long int) disks[spare_number].numBlocks *
   1040 			 disks[spare_number].blockSize / 1024 / 1024);
   1041 	}
   1042 
   1043 
   1044 	/* check sizes and block sizes on the spare disk */
   1045 	bs = 1 << raidPtr->logBytesPerSector;
   1046 	if (disks[spare_number].blockSize != bs) {
   1047 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1048 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1049 		ret = EINVAL;
   1050 		goto fail;
   1051 	}
   1052 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1053 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
   1054 			     disks[spare_number].devname,
   1055 			     disks[spare_number].blockSize,
   1056 			     raidPtr->sectorsPerDisk);
   1057 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1058 		ret = EINVAL;
   1059 		goto fail;
   1060 	} else {
   1061 		if (disks[spare_number].numBlocks >
   1062 		    raidPtr->sectorsPerDisk) {
   1063 			RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
   1064 			    disks[spare_number].devname,
   1065 			    raidPtr->sectorsPerDisk,
   1066 			    disks[spare_number].numBlocks);
   1067 
   1068 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1069 		}
   1070 	}
   1071 
   1072 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
   1073 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1074 				 raidPtr->numCol + spare_number,
   1075 				 raidPtr->qType,
   1076 				 raidPtr->sectorsPerDisk,
   1077 				 raidPtr->Disks[raidPtr->numCol +
   1078 						  spare_number].dev,
   1079 				 raidPtr->maxOutstanding,
   1080 				 &raidPtr->shutdownList,
   1081 				 raidPtr->cleanupList);
   1082 
   1083 	rf_lock_mutex2(raidPtr->mutex);
   1084 	raidPtr->numSpare++;
   1085 	rf_unlock_mutex2(raidPtr->mutex);
   1086 
   1087 fail:
   1088 	rf_lock_mutex2(raidPtr->mutex);
   1089 	raidPtr->adding_hot_spare = 0;
   1090 	rf_signal_cond2(raidPtr->adding_hot_spare_cv);
   1091 	rf_unlock_mutex2(raidPtr->mutex);
   1092 
   1093 	return(ret);
   1094 }
   1095 
   1096 int
   1097 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
   1098 {
   1099 #if 0
   1100 	int spare_number;
   1101 #endif
   1102 
   1103 	if (raidPtr->numSpare==0) {
   1104 		printf("No spares to remove!\n");
   1105 		return(EINVAL);
   1106 	}
   1107 
   1108 	return(EINVAL); /* XXX not implemented yet */
   1109 #if 0
   1110 	spare_number = sparePtr->column;
   1111 
   1112 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1113 		return(EINVAL);
   1114 	}
   1115 
   1116 	/* verify that this spare isn't in use... */
   1117 
   1118 
   1119 
   1120 
   1121 	/* it's gone.. */
   1122 
   1123 	raidPtr->numSpare--;
   1124 
   1125 	return(0);
   1126 #endif
   1127 }
   1128 
   1129 
   1130 int
   1131 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
   1132 {
   1133 #if 0
   1134 	RF_RaidDisk_t *disks;
   1135 #endif
   1136 
   1137 	if ((component->column < 0) ||
   1138 	    (component->column >= raidPtr->numCol)) {
   1139 		return(EINVAL);
   1140 	}
   1141 
   1142 #if 0
   1143 	disks = &raidPtr->Disks[component->column];
   1144 #endif
   1145 
   1146 	/* 1. This component must be marked as 'failed' */
   1147 
   1148 	return(EINVAL); /* Not implemented yet. */
   1149 }
   1150 
   1151 int
   1152 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
   1153     RF_SingleComponent_t *component)
   1154 {
   1155 
   1156 	/* Issues here include how to 'move' this in if there is IO
   1157 	   taking place (e.g. component queues and such) */
   1158 
   1159 	return(EINVAL); /* Not implemented yet. */
   1160 }
   1161