Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.91.4.1
      1 /*	$NetBSD: rf_disks.c,v 1.91.4.1 2022/08/12 15:18:13 martin Exp $	*/
      2 /*-
      3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Greg Oster
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Copyright (c) 1995 Carnegie-Mellon University.
     33  * All rights reserved.
     34  *
     35  * Author: Mark Holland
     36  *
     37  * Permission to use, copy, modify and distribute this software and
     38  * its documentation is hereby granted, provided that both the copyright
     39  * notice and this permission notice appear in all copies of the
     40  * software, derivative works or modified versions, and any portions
     41  * thereof, and that both notices appear in supporting documentation.
     42  *
     43  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     44  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     45  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     46  *
     47  * Carnegie Mellon requests users of this software to return to
     48  *
     49  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     50  *  School of Computer Science
     51  *  Carnegie Mellon University
     52  *  Pittsburgh PA 15213-3890
     53  *
     54  * any improvements or extensions that they make and grant Carnegie the
     55  * rights to redistribute these changes.
     56  */
     57 
     58 /***************************************************************
     59  * rf_disks.c -- code to perform operations on the actual disks
     60  ***************************************************************/
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.91.4.1 2022/08/12 15:18:13 martin Exp $");
     64 
     65 #include <dev/raidframe/raidframevar.h>
     66 
     67 #include "rf_raid.h"
     68 #include "rf_alloclist.h"
     69 #include "rf_utils.h"
     70 #include "rf_general.h"
     71 #include "rf_options.h"
     72 #include "rf_kintf.h"
     73 #include "rf_netbsd.h"
     74 
     75 #include <sys/param.h>
     76 #include <sys/systm.h>
     77 #include <sys/proc.h>
     78 #include <sys/ioctl.h>
     79 #include <sys/fcntl.h>
     80 #include <sys/vnode.h>
     81 #include <sys/namei.h> /* for pathbuf */
     82 #include <sys/kauth.h>
     83 #include <miscfs/specfs/specdev.h> /* for v_rdev */
     84 
     85 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
     86 static void rf_print_label_status( RF_Raid_t *, int, char *,
     87 				  RF_ComponentLabel_t *);
     88 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
     89 				  RF_ComponentLabel_t *, int, int );
     90 
     91 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     92 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     93 
     94 /**************************************************************************
     95  *
     96  * initialize the disks comprising the array
     97  *
     98  * We want the spare disks to have regular row,col numbers so that we can
     99  * easily substitue a spare for a failed disk.  But, the driver code assumes
    100  * throughout that the array contains numRow by numCol _non-spare_ disks, so
    101  * it's not clear how to fit in the spares.  This is an unfortunate holdover
    102  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
    103  * rest, and put all the spares in it.  This probably needs to get changed
    104  * eventually.
    105  *
    106  **************************************************************************/
    107 
    108 int
    109 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    110 		  RF_Config_t *cfgPtr)
    111 {
    112 	RF_RaidDisk_t *disks;
    113 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    114 	RF_RowCol_t c;
    115 	int bs, ret;
    116 	unsigned i, count, foundone = 0, numFailuresThisRow;
    117 	int force;
    118 
    119 	force = cfgPtr->force;
    120 
    121 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    122 	if (ret)
    123 		goto fail;
    124 
    125 	disks = raidPtr->Disks;
    126 
    127 	numFailuresThisRow = 0;
    128 	for (c = 0; c < raidPtr->numCol; c++) {
    129 		ret = rf_ConfigureDisk(raidPtr,
    130 				       &cfgPtr->devnames[0][c][0],
    131 				       &disks[c], c);
    132 
    133 		if (ret)
    134 			goto fail;
    135 
    136 		if (disks[c].status == rf_ds_optimal) {
    137 			ret = raidfetch_component_label(raidPtr, c);
    138 			if (ret)
    139 				goto fail;
    140 
    141 			/* mark it as failed if the label looks bogus... */
    142 			if (!rf_reasonable_label(&raidPtr->raid_cinfo[c].ci_label,0) && !force) {
    143 				disks[c].status = rf_ds_failed;
    144 			}
    145 		}
    146 
    147 		if (disks[c].status != rf_ds_optimal) {
    148 			numFailuresThisRow++;
    149 		} else {
    150 			if (disks[c].numBlocks < min_numblks)
    151 				min_numblks = disks[c].numBlocks;
    152 			DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
    153 				 c, disks[c].devname,
    154 				 disks[c].numBlocks,
    155 				 disks[c].blockSize,
    156 				 (long int) disks[c].numBlocks *
    157 				 disks[c].blockSize / 1024 / 1024);
    158 		}
    159 	}
    160 	/* XXX fix for n-fault tolerant */
    161 	/* XXX this should probably check to see how many failures
    162 	   we can handle for this configuration! */
    163 	if (numFailuresThisRow > 0)
    164 		raidPtr->status = rf_rs_degraded;
    165 
    166 	/* all disks must be the same size & have the same block size, bs must
    167 	 * be a power of 2 */
    168 	bs = 0;
    169 	foundone = 0;
    170 	for (c = 0; c < raidPtr->numCol; c++) {
    171 		if (disks[c].status == rf_ds_optimal) {
    172 			bs = disks[c].blockSize;
    173 			foundone = 1;
    174 			break;
    175 		}
    176 	}
    177 	if (!foundone) {
    178 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    179 		ret = EINVAL;
    180 		goto fail;
    181 	}
    182 	for (count = 0, i = 1; i; i <<= 1)
    183 		if (bs & i)
    184 			count++;
    185 	if (count != 1) {
    186 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    187 		ret = EINVAL;
    188 		goto fail;
    189 	}
    190 
    191 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
    192 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
    193 		if (force != 0) {
    194 			printf("raid%d: Fatal errors being ignored.\n",
    195 			       raidPtr->raidid);
    196 		} else {
    197 			ret = EINVAL;
    198 			goto fail;
    199 		}
    200 	}
    201 
    202 	for (c = 0; c < raidPtr->numCol; c++) {
    203 		if (disks[c].status == rf_ds_optimal) {
    204 			if (disks[c].blockSize != bs) {
    205 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
    206 				ret = EINVAL;
    207 				goto fail;
    208 			}
    209 			if (disks[c].numBlocks != min_numblks) {
    210 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
    211 					     c, (int) min_numblks);
    212 				disks[c].numBlocks = min_numblks;
    213 			}
    214 		}
    215 	}
    216 
    217 	raidPtr->sectorsPerDisk = min_numblks;
    218 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    219 	raidPtr->bytesPerSector = bs;
    220 	raidPtr->sectorMask = bs - 1;
    221 	return (0);
    222 
    223 fail:
    224 
    225 	rf_UnconfigureVnodes( raidPtr );
    226 
    227 	return (ret);
    228 }
    229 
    230 
    231 /****************************************************************************
    232  * set up the data structures describing the spare disks in the array
    233  * recall from the above comment that the spare disk descriptors are stored
    234  * in row zero, which is specially expanded to hold them.
    235  ****************************************************************************/
    236 int
    237 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
    238 		       RF_Config_t *cfgPtr)
    239 {
    240 	int     i, ret;
    241 	unsigned int bs;
    242 	RF_RaidDisk_t *disks;
    243 	int     num_spares_done;
    244 
    245 	num_spares_done = 0;
    246 
    247 	/* The space for the spares should have already been allocated by
    248 	 * ConfigureDisks() */
    249 
    250 	disks = &raidPtr->Disks[raidPtr->numCol];
    251 	for (i = 0; i < raidPtr->numSpare; i++) {
    252 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    253 				       &disks[i], raidPtr->numCol + i);
    254 		if (ret)
    255 			goto fail;
    256 		if (disks[i].status != rf_ds_optimal) {
    257 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
    258 				     &cfgPtr->spare_names[i][0]);
    259 		} else {
    260 			disks[i].status = rf_ds_spare;	/* change status to
    261 							 * spare */
    262 			DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
    263 			    disks[i].devname,
    264 			    disks[i].numBlocks, disks[i].blockSize,
    265 			    (long int) disks[i].numBlocks *
    266 				 disks[i].blockSize / 1024 / 1024);
    267 		}
    268 		num_spares_done++;
    269 	}
    270 
    271 	/* check sizes and block sizes on spare disks */
    272 	bs = 1 << raidPtr->logBytesPerSector;
    273 	for (i = 0; i < raidPtr->numSpare; i++) {
    274 		if (disks[i].blockSize != bs) {
    275 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    276 			ret = EINVAL;
    277 			goto fail;
    278 		}
    279 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    280 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
    281 				     disks[i].devname, disks[i].blockSize,
    282 				     raidPtr->sectorsPerDisk);
    283 			ret = EINVAL;
    284 			goto fail;
    285 		} else
    286 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    287 				RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
    288 				    disks[i].devname,
    289 				    raidPtr->sectorsPerDisk,
    290 				    disks[i].numBlocks);
    291 
    292 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    293 			}
    294 	}
    295 
    296 	return (0);
    297 
    298 fail:
    299 
    300 	/* Release the hold on the main components.  We've failed to allocate
    301 	 * a spare, and since we're failing, we need to free things..
    302 
    303 	 XXX failing to allocate a spare is *not* that big of a deal...
    304 	 We *can* survive without it, if need be, esp. if we get hot
    305 	 adding working.
    306 
    307 	 If we don't fail out here, then we need a way to remove this spare...
    308 	 that should be easier to do here than if we are "live"...
    309 
    310 	 */
    311 
    312 	rf_UnconfigureVnodes( raidPtr );
    313 
    314 	return (ret);
    315 }
    316 
    317 static int
    318 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    319 {
    320 	int ret;
    321 	size_t entries = raidPtr->numCol + RF_MAXSPARE;
    322 
    323 	/* We allocate RF_MAXSPARE on the first row so that we
    324 	   have room to do hot-swapping of spares */
    325 	raidPtr->Disks = RF_MallocAndAdd(
    326 	    entries * sizeof(*raidPtr->Disks), raidPtr->cleanupList);
    327 	if (raidPtr->Disks == NULL) {
    328 		ret = ENOMEM;
    329 		goto fail;
    330 	}
    331 
    332 	/* get space for device specific stuff.. */
    333 	raidPtr->raid_cinfo = RF_MallocAndAdd(
    334 	    entries * sizeof(*raidPtr->raid_cinfo), raidPtr->cleanupList);
    335 	if (raidPtr->raid_cinfo == NULL) {
    336 		ret = ENOMEM;
    337 		goto fail;
    338 	}
    339 
    340 	return(0);
    341 fail:
    342 	rf_UnconfigureVnodes( raidPtr );
    343 
    344 	return(ret);
    345 }
    346 
    347 
    348 /* configure a single disk during auto-configuration at boot */
    349 int
    350 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
    351 		      RF_AutoConfig_t *auto_config)
    352 {
    353 	RF_RaidDisk_t *disks;
    354 	RF_RaidDisk_t *diskPtr;
    355 	RF_RowCol_t c;
    356 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
    357 	int bs, ret;
    358 	int numFailuresThisRow;
    359 	RF_AutoConfig_t *ac;
    360 	int parity_good;
    361 	int mod_counter;
    362 	int mod_counter_found;
    363 
    364 #if DEBUG
    365 	printf("Starting autoconfiguration of RAID set...\n");
    366 #endif
    367 
    368 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
    369 	if (ret)
    370 		goto fail;
    371 
    372 	disks = raidPtr->Disks;
    373 
    374 	/* assume the parity will be fine.. */
    375 	parity_good = RF_RAID_CLEAN;
    376 
    377 	/* Check for mod_counters that are too low */
    378 	mod_counter_found = 0;
    379 	mod_counter = 0;
    380 	ac = auto_config;
    381 	while(ac!=NULL) {
    382 		if (mod_counter_found==0) {
    383 			mod_counter = ac->clabel->mod_counter;
    384 			mod_counter_found = 1;
    385 		} else {
    386 			if (ac->clabel->mod_counter > mod_counter) {
    387 				mod_counter = ac->clabel->mod_counter;
    388 			}
    389 		}
    390 		ac->flag = 0; /* clear the general purpose flag */
    391 		ac = ac->next;
    392 	}
    393 
    394 	bs = 0;
    395 
    396 	numFailuresThisRow = 0;
    397 	for (c = 0; c < raidPtr->numCol; c++) {
    398 		diskPtr = &disks[c];
    399 
    400 		/* find this row/col in the autoconfig */
    401 #if DEBUG
    402 		printf("Looking for %d in autoconfig\n",c);
    403 #endif
    404 		ac = auto_config;
    405 		while(ac!=NULL) {
    406 			if (ac->clabel==NULL) {
    407 				/* big-time bad news. */
    408 				goto fail;
    409 			}
    410 			if ((ac->clabel->column == c) &&
    411 			    (ac->clabel->mod_counter == mod_counter)) {
    412 				/* it's this one... */
    413 				/* flag it as 'used', so we don't
    414 				   free it later. */
    415 				ac->flag = 1;
    416 #if DEBUG
    417 				printf("Found: %s at %d\n",
    418 				       ac->devname,c);
    419 #endif
    420 
    421 				break;
    422 			}
    423 			ac=ac->next;
    424 		}
    425 
    426 		if (ac==NULL) {
    427 			/* we didn't find an exact match with a
    428 			   correct mod_counter above... can we find
    429 			   one with an incorrect mod_counter to use
    430 			   instead?  (this one, if we find it, will be
    431 			   marked as failed once the set configures)
    432 			*/
    433 
    434 			ac = auto_config;
    435 			while(ac!=NULL) {
    436 				if (ac->clabel==NULL) {
    437 					/* big-time bad news. */
    438 					goto fail;
    439 				}
    440 				if (ac->clabel->column == c) {
    441 					/* it's this one...
    442 					   flag it as 'used', so we
    443 					   don't free it later. */
    444 					ac->flag = 1;
    445 #if DEBUG
    446 					printf("Found(low mod_counter): %s at %d\n",
    447 					       ac->devname,c);
    448 #endif
    449 
    450 					break;
    451 				}
    452 				ac=ac->next;
    453 			}
    454 		}
    455 
    456 
    457 
    458 		if (ac!=NULL) {
    459 			/* Found it.  Configure it.. */
    460 			diskPtr->blockSize = ac->clabel->blockSize;
    461 			diskPtr->numBlocks =
    462 			    rf_component_label_numblocks(ac->clabel);
    463 			/* Note: rf_protectedSectors is already
    464 			   factored into numBlocks here */
    465 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
    466 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
    467 
    468 			memcpy(raidget_component_label(raidPtr, c),
    469 			    ac->clabel, sizeof(*ac->clabel));
    470 			snprintf(diskPtr->devname, sizeof(diskPtr->devname),
    471 			    "/dev/%s", ac->devname);
    472 
    473 			/* note the fact that this component was
    474 			   autoconfigured.  You'll need this info
    475 			   later.  Trust me :) */
    476 			diskPtr->auto_configured = 1;
    477 			diskPtr->dev = ac->dev;
    478 
    479 			/*
    480 			 * we allow the user to specify that
    481 			 * only a fraction of the disks should
    482 			 * be used this is just for debug: it
    483 			 * speeds up the parity scan
    484 			 */
    485 
    486 			diskPtr->numBlocks = diskPtr->numBlocks *
    487 				rf_sizePercentage / 100;
    488 
    489 			/* XXX these will get set multiple times,
    490 			   but since we're autoconfiguring, they'd
    491 			   better be always the same each time!
    492 			   If not, this is the least of your worries */
    493 
    494 			bs = diskPtr->blockSize;
    495 			min_numblks = diskPtr->numBlocks;
    496 
    497 			/* this gets done multiple times, but that's
    498 			   fine -- the serial number will be the same
    499 			   for all components, guaranteed */
    500 			raidPtr->serial_number = ac->clabel->serial_number;
    501 			/* check the last time the label was modified */
    502 
    503 			if (ac->clabel->mod_counter != mod_counter) {
    504 				/* Even though we've filled in all of
    505 				   the above, we don't trust this
    506 				   component since its modification
    507 				   counter is not in sync with the
    508 				   rest, and we really consider it to
    509 				   be failed.  */
    510 				disks[c].status = rf_ds_failed;
    511 				numFailuresThisRow++;
    512 			} else {
    513 				if (ac->clabel->clean != RF_RAID_CLEAN) {
    514 					parity_good = RF_RAID_DIRTY;
    515 				}
    516 			}
    517 		} else {
    518 			/* Didn't find it at all!!  Component must
    519 			   really be dead */
    520 			disks[c].status = rf_ds_failed;
    521 			snprintf(disks[c].devname, sizeof(disks[c].devname),
    522 			    "component%d", c);
    523 			numFailuresThisRow++;
    524 		}
    525 	}
    526 	/* XXX fix for n-fault tolerant */
    527 	/* XXX this should probably check to see how many failures
    528 	   we can handle for this configuration! */
    529 	if (numFailuresThisRow > 0) {
    530 		raidPtr->status = rf_rs_degraded;
    531 		raidPtr->numFailures = numFailuresThisRow;
    532 	}
    533 
    534 	/* close the device for the ones that didn't get used */
    535 
    536 	ac = auto_config;
    537 	while(ac!=NULL) {
    538 		if (ac->flag == 0) {
    539 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
    540 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
    541 			vput(ac->vp);
    542 			ac->vp = NULL;
    543 #if DEBUG
    544 			printf("Released %s from auto-config set.\n",
    545 			       ac->devname);
    546 #endif
    547 		}
    548 		ac = ac->next;
    549 	}
    550 
    551 	raidPtr->mod_counter = mod_counter;
    552 
    553 	/* note the state of the parity, if any */
    554 	raidPtr->parity_good = parity_good;
    555 	raidPtr->sectorsPerDisk = min_numblks;
    556 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    557 	raidPtr->bytesPerSector = bs;
    558 	raidPtr->sectorMask = bs - 1;
    559 	return (0);
    560 
    561 fail:
    562 
    563 	rf_UnconfigureVnodes( raidPtr );
    564 
    565 	return (ret);
    566 
    567 }
    568 
    569 /* configure a single disk in the array */
    570 int
    571 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
    572 		 RF_RowCol_t col)
    573 {
    574 	char   *p;
    575 	struct pathbuf *pb;
    576 	struct vnode *vp;
    577 	int     error;
    578 
    579 	p = rf_find_non_white(bf);
    580 	if (p[strlen(p) - 1] == '\n') {
    581 		/* strip off the newline */
    582 		p[strlen(p) - 1] = '\0';
    583 	}
    584 	(void) strcpy(diskPtr->devname, p);
    585 
    586 	/* Let's start by claiming the component is fine and well... */
    587 	diskPtr->status = rf_ds_optimal;
    588 
    589 	raidPtr->raid_cinfo[col].ci_vp = NULL;
    590 	raidPtr->raid_cinfo[col].ci_dev = 0;
    591 
    592 	if (!strcmp("absent", diskPtr->devname)) {
    593 		printf("Ignoring missing component at column %d\n", col);
    594 		snprintf(diskPtr->devname, sizeof(diskPtr->devname),
    595 		    "component%d", col);
    596 		diskPtr->status = rf_ds_failed;
    597 		return (0);
    598 	}
    599 
    600 	pb = pathbuf_create(diskPtr->devname);
    601 	if (pb == NULL) {
    602 		printf("pathbuf_create for device: %s failed!\n",
    603 		       diskPtr->devname);
    604 		return ENOMEM;
    605 	}
    606 	error = dk_lookup(pb, curlwp, &vp);
    607 	pathbuf_destroy(pb);
    608 	if (error) {
    609 		printf("dk_lookup on device: '%s' failed: %d\n",
    610 		    diskPtr->devname, error);
    611 		if (error == ENXIO) {
    612 			/* the component isn't there... must be dead :-( */
    613 			diskPtr->status = rf_ds_failed;
    614 			return 0;
    615 		} else {
    616 			return (error);
    617 		}
    618 	}
    619 
    620 	if ((error = rf_getdisksize(vp, diskPtr)) != 0)
    621 		return (error);
    622 
    623 	/*
    624 	 * If this raidPtr's bytesPerSector is zero, fill it in with this
    625 	 * components blockSize.  This will give us something to work with
    626 	 * initially, and if it is wrong, we'll get errors later.
    627 	 */
    628 	if (raidPtr->bytesPerSector == 0)
    629 		raidPtr->bytesPerSector = diskPtr->blockSize;
    630 
    631 	if (diskPtr->status == rf_ds_optimal) {
    632 		raidPtr->raid_cinfo[col].ci_vp = vp;
    633 		raidPtr->raid_cinfo[col].ci_dev = vp->v_rdev;
    634 
    635 		/* This component was not automatically configured */
    636 		diskPtr->auto_configured = 0;
    637 		diskPtr->dev = vp->v_rdev;
    638 
    639 		/* we allow the user to specify that only a fraction of the
    640 		 * disks should be used this is just for debug:  it speeds up
    641 		 * the parity scan */
    642 		diskPtr->numBlocks = diskPtr->numBlocks *
    643 			rf_sizePercentage / 100;
    644 	}
    645 	return (0);
    646 }
    647 
    648 static void
    649 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
    650 		      RF_ComponentLabel_t *ci_label)
    651 {
    652 
    653 	printf("raid%d: Component %s being configured at col: %d\n",
    654 	       raidPtr->raidid, dev_name, column );
    655 	printf("         Column: %d Num Columns: %d\n",
    656 	       ci_label->column,
    657 	       ci_label->num_columns);
    658 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
    659 	       ci_label->version, ci_label->serial_number,
    660 	       ci_label->mod_counter);
    661 	printf("         Clean: %s Status: %d\n",
    662 	       ci_label->clean ? "Yes" : "No", ci_label->status );
    663 }
    664 
    665 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
    666 				 char *dev_name, RF_ComponentLabel_t *ci_label,
    667 				 int serial_number, int mod_counter)
    668 {
    669 	int fatal_error = 0;
    670 
    671 	if (serial_number != ci_label->serial_number) {
    672 		printf("%s has a different serial number: %d %d\n",
    673 		       dev_name, serial_number, ci_label->serial_number);
    674 		fatal_error = 1;
    675 	}
    676 	if (mod_counter != ci_label->mod_counter) {
    677 		printf("%s has a different modification count: %d %d\n",
    678 		       dev_name, mod_counter, ci_label->mod_counter);
    679 	}
    680 
    681 	if (row != ci_label->row) {
    682 		printf("Row out of alignment for: %s\n", dev_name);
    683 		fatal_error = 1;
    684 	}
    685 	if (column != ci_label->column) {
    686 		printf("Column out of alignment for: %s\n", dev_name);
    687 		fatal_error = 1;
    688 	}
    689 	if (raidPtr->numCol != ci_label->num_columns) {
    690 		printf("Number of columns do not match for: %s\n", dev_name);
    691 		fatal_error = 1;
    692 	}
    693 	if (ci_label->clean == 0) {
    694 		/* it's not clean, but that's not fatal */
    695 		printf("%s is not clean!\n", dev_name);
    696 	}
    697 	return(fatal_error);
    698 }
    699 
    700 
    701 static void
    702 rf_handle_hosed(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, int hosed_column,
    703     int again)
    704 {
    705 	printf("Hosed component: %s\n", &cfgPtr->devnames[0][hosed_column][0]);
    706 	if (cfgPtr->force)
    707 		return;
    708 
    709 	/* we'll fail this component, as if there are
    710 	   other major errors, we aren't forcing things
    711 	   and we'll abort the config anyways */
    712 	if (again && raidPtr->Disks[hosed_column].status == rf_ds_failed)
    713 		return;
    714 
    715 	raidPtr->Disks[hosed_column].status = rf_ds_failed;
    716 	raidPtr->numFailures++;
    717 	raidPtr->status = rf_rs_degraded;
    718 }
    719 
    720 /*
    721 
    722    rf_CheckLabels() - check all the component labels for consistency.
    723    Return an error if there is anything major amiss.
    724 
    725  */
    726 
    727 int
    728 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
    729 {
    730 	int c;
    731 	char *dev_name;
    732 	RF_ComponentLabel_t *ci_label;
    733 	int serial_number = 0;
    734 	int mod_number = 0;
    735 	int fatal_error = 0;
    736 	int mod_values[4];
    737 	int mod_count[4];
    738 	int ser_values[4];
    739 	int ser_count[4];
    740 	int num_ser;
    741 	int num_mod;
    742 	int i;
    743 	int found;
    744 	int hosed_column;
    745 	int too_fatal;
    746 	int parity_good;
    747 
    748 	hosed_column = -1;
    749 	too_fatal = 0;
    750 
    751 	/*
    752 	   We're going to try to be a little intelligent here.  If one
    753 	   component's label is bogus, and we can identify that it's the
    754 	   *only* one that's gone, we'll mark it as "failed" and allow
    755 	   the configuration to proceed.  This will be the *only* case
    756 	   that we'll proceed if there would be (otherwise) fatal errors.
    757 
    758 	   Basically we simply keep a count of how many components had
    759 	   what serial number.  If all but one agree, we simply mark
    760 	   the disagreeing component as being failed, and allow
    761 	   things to come up "normally".
    762 
    763 	   We do this first for serial numbers, and then for "mod_counter".
    764 
    765 	 */
    766 
    767 	num_ser = 0;
    768 	num_mod = 0;
    769 
    770 	ser_values[0] = ser_values[1] = ser_values[2] = ser_values[3] = 0;
    771 	ser_count[0] = ser_count[1] = ser_count[2] = ser_count[3] = 0;
    772 	mod_values[0] = mod_values[1] = mod_values[2] = mod_values[3] = 0;
    773 	mod_count[0] = mod_count[1] = mod_count[2] = mod_count[3] = 0;
    774 
    775 	for (c = 0; c < raidPtr->numCol; c++) {
    776 		if (raidPtr->Disks[c].status != rf_ds_optimal)
    777 			continue;
    778 		ci_label = raidget_component_label(raidPtr, c);
    779 		found=0;
    780 		for(i=0;i<num_ser;i++) {
    781 			if (ser_values[i] == ci_label->serial_number) {
    782 				ser_count[i]++;
    783 				found=1;
    784 				break;
    785 			}
    786 		}
    787 		if (!found) {
    788 			ser_values[num_ser] = ci_label->serial_number;
    789 			ser_count[num_ser] = 1;
    790 			num_ser++;
    791 			if (num_ser>2) {
    792 				fatal_error = 1;
    793 				break;
    794 			}
    795 		}
    796 		found=0;
    797 		for(i=0;i<num_mod;i++) {
    798 			if (mod_values[i] == ci_label->mod_counter) {
    799 				mod_count[i]++;
    800 				found=1;
    801 				break;
    802 			}
    803 		}
    804 		if (!found) {
    805 			mod_values[num_mod] = ci_label->mod_counter;
    806 			mod_count[num_mod] = 1;
    807 			num_mod++;
    808 			if (num_mod>2) {
    809 				fatal_error = 1;
    810 				break;
    811 			}
    812 		}
    813 	}
    814 #if DEBUG
    815 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
    816 	for(i=0;i<num_ser;i++) {
    817 		printf("%d %d\n", ser_values[i], ser_count[i]);
    818 	}
    819 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
    820 	for(i=0;i<num_mod;i++) {
    821 		printf("%d %d\n", mod_values[i], mod_count[i]);
    822 	}
    823 #endif
    824 	serial_number = ser_values[0];
    825 	if (num_ser == 2) {
    826 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
    827 			/* Locate the maverick component */
    828 			if (ser_count[1] > ser_count[0]) {
    829 				serial_number = ser_values[1];
    830 			}
    831 
    832 			for (c = 0; c < raidPtr->numCol; c++) {
    833 				if (raidPtr->Disks[c].status != rf_ds_optimal)
    834 					continue;
    835 				ci_label = raidget_component_label(raidPtr, c);
    836 				if (serial_number != ci_label->serial_number) {
    837 					hosed_column = c;
    838 					break;
    839 				}
    840 			}
    841 			if (hosed_column != -1)
    842 				rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
    843 				    0);
    844 		} else {
    845 			too_fatal = 1;
    846 		}
    847 		if (cfgPtr->parityConfig == '0') {
    848 			/* We've identified two different serial numbers.
    849 			   RAID 0 can't cope with that, so we'll punt */
    850 			too_fatal = 1;
    851 		}
    852 
    853 	}
    854 
    855 	/* record the serial number for later.  If we bail later, setting
    856 	   this doesn't matter, otherwise we've got the best guess at the
    857 	   correct serial number */
    858 	raidPtr->serial_number = serial_number;
    859 
    860 	mod_number = mod_values[0];
    861 	if (num_mod == 2) {
    862 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
    863 			/* Locate the maverick component */
    864 			if (mod_count[1] > mod_count[0]) {
    865 				mod_number = mod_values[1];
    866 			} else if (mod_count[1] < mod_count[0]) {
    867 				mod_number = mod_values[0];
    868 			} else {
    869 				/* counts of different modification values
    870 				   are the same.   Assume greater value is
    871 				   the correct one, all other things
    872 				   considered */
    873 				if (mod_values[0] > mod_values[1]) {
    874 					mod_number = mod_values[0];
    875 				} else {
    876 					mod_number = mod_values[1];
    877 				}
    878 
    879 			}
    880 
    881 			for (c = 0; c < raidPtr->numCol; c++) {
    882 				if (raidPtr->Disks[c].status != rf_ds_optimal)
    883 					continue;
    884 
    885 				ci_label = raidget_component_label(raidPtr, c);
    886 				if (mod_number != ci_label->mod_counter) {
    887 					if (hosed_column == c) {
    888 						/* same one.  Can
    889 						   deal with it.  */
    890 					} else {
    891 						hosed_column = c;
    892 						if (num_ser != 1) {
    893 							too_fatal = 1;
    894 							break;
    895 						}
    896 					}
    897 				}
    898 			}
    899 			if (hosed_column != -1)
    900 				rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
    901 				    1);
    902 		} else {
    903 			too_fatal = 1;
    904 		}
    905 		if (cfgPtr->parityConfig == '0') {
    906 			/* We've identified two different mod counters.
    907 			   RAID 0 can't cope with that, so we'll punt */
    908 			too_fatal = 1;
    909 		}
    910 	}
    911 
    912 	raidPtr->mod_counter = mod_number;
    913 
    914 	if (too_fatal) {
    915 		/* we've had both a serial number mismatch, and a mod_counter
    916 		   mismatch -- and they involved two different components!!
    917 		   Bail -- make things fail so that the user must force
    918 		   the issue... */
    919 		hosed_column = -1;
    920 		fatal_error = 1;
    921 	}
    922 
    923 	if (num_ser > 2) {
    924 		printf("raid%d: Too many different serial numbers!\n",
    925 		       raidPtr->raidid);
    926 		fatal_error = 1;
    927 	}
    928 
    929 	if (num_mod > 2) {
    930 		printf("raid%d: Too many different mod counters!\n",
    931 		       raidPtr->raidid);
    932 		fatal_error = 1;
    933 	}
    934 
    935         for (c = 0; c < raidPtr->numCol; c++) {
    936 		if (raidPtr->Disks[c].status != rf_ds_optimal) {
    937 			hosed_column = c;
    938 			break;
    939 		}
    940 	}
    941 
    942 	/* we start by assuming the parity will be good, and flee from
    943 	   that notion at the slightest sign of trouble */
    944 
    945 	parity_good = RF_RAID_CLEAN;
    946 
    947 	for (c = 0; c < raidPtr->numCol; c++) {
    948 		dev_name = &cfgPtr->devnames[0][c][0];
    949 		ci_label = raidget_component_label(raidPtr, c);
    950 
    951 		if (c == hosed_column) {
    952 			printf("raid%d: Ignoring %s\n",
    953 			       raidPtr->raidid, dev_name);
    954 		} else {
    955 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
    956 			if (rf_check_label_vitals( raidPtr, 0, c,
    957 						   dev_name, ci_label,
    958 						   serial_number,
    959 						   mod_number )) {
    960 				fatal_error = 1;
    961 			}
    962 			if (ci_label->clean != RF_RAID_CLEAN) {
    963 				parity_good = RF_RAID_DIRTY;
    964 			}
    965 		}
    966 	}
    967 
    968 	if (fatal_error) {
    969 		parity_good = RF_RAID_DIRTY;
    970 	}
    971 
    972 	/* we note the state of the parity */
    973 	raidPtr->parity_good = parity_good;
    974 
    975 	return(fatal_error);
    976 }
    977 
    978 int
    979 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
    980 {
    981 	RF_RaidDisk_t *disks;
    982 	RF_DiskQueue_t *spareQueues;
    983 	int ret;
    984 	unsigned int bs;
    985 	int spare_number;
    986 
    987 	ret=0;
    988 
    989 	if (raidPtr->numSpare >= RF_MAXSPARE) {
    990 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
    991 		return(EINVAL);
    992 	}
    993 
    994 	rf_lock_mutex2(raidPtr->mutex);
    995 	while (raidPtr->adding_hot_spare == 1) {
    996 		rf_wait_cond2(raidPtr->adding_hot_spare_cv, raidPtr->mutex);
    997 	}
    998 	raidPtr->adding_hot_spare = 1;
    999 	rf_unlock_mutex2(raidPtr->mutex);
   1000 
   1001 	/* the beginning of the spares... */
   1002 	disks = &raidPtr->Disks[raidPtr->numCol];
   1003 
   1004 	spare_number = raidPtr->numSpare;
   1005 
   1006 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
   1007 			       &disks[spare_number],
   1008 			       raidPtr->numCol + spare_number);
   1009 
   1010 	if (ret)
   1011 		goto fail;
   1012 	if (disks[spare_number].status != rf_ds_optimal) {
   1013 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
   1014 			     sparePtr->component_name);
   1015 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1016 		ret=EINVAL;
   1017 		goto fail;
   1018 	} else {
   1019 		disks[spare_number].status = rf_ds_spare;
   1020 		DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
   1021 			 spare_number,
   1022 			 disks[spare_number].devname,
   1023 			 disks[spare_number].numBlocks,
   1024 			 disks[spare_number].blockSize,
   1025 			 (long int) disks[spare_number].numBlocks *
   1026 			 disks[spare_number].blockSize / 1024 / 1024);
   1027 	}
   1028 
   1029 
   1030 	/* check sizes and block sizes on the spare disk */
   1031 	bs = 1 << raidPtr->logBytesPerSector;
   1032 	if (disks[spare_number].blockSize != bs) {
   1033 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
   1034 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1035 		ret = EINVAL;
   1036 		goto fail;
   1037 	}
   1038 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
   1039 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
   1040 			     disks[spare_number].devname,
   1041 			     disks[spare_number].blockSize,
   1042 			     raidPtr->sectorsPerDisk);
   1043 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
   1044 		ret = EINVAL;
   1045 		goto fail;
   1046 	} else {
   1047 		if (disks[spare_number].numBlocks >
   1048 		    raidPtr->sectorsPerDisk) {
   1049 			RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
   1050 			    disks[spare_number].devname,
   1051 			    raidPtr->sectorsPerDisk,
   1052 			    disks[spare_number].numBlocks);
   1053 
   1054 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
   1055 		}
   1056 	}
   1057 
   1058 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
   1059 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
   1060 				 raidPtr->numCol + spare_number,
   1061 				 raidPtr->qType,
   1062 				 raidPtr->sectorsPerDisk,
   1063 				 raidPtr->Disks[raidPtr->numCol +
   1064 						  spare_number].dev,
   1065 				 raidPtr->maxOutstanding,
   1066 				 &raidPtr->shutdownList,
   1067 				 raidPtr->cleanupList);
   1068 
   1069 	rf_lock_mutex2(raidPtr->mutex);
   1070 	raidPtr->numSpare++;
   1071 	rf_unlock_mutex2(raidPtr->mutex);
   1072 
   1073 fail:
   1074 	rf_lock_mutex2(raidPtr->mutex);
   1075 	raidPtr->adding_hot_spare = 0;
   1076 	rf_signal_cond2(raidPtr->adding_hot_spare_cv);
   1077 	rf_unlock_mutex2(raidPtr->mutex);
   1078 
   1079 	return(ret);
   1080 }
   1081 
   1082 int
   1083 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
   1084 {
   1085 #if 0
   1086 	int spare_number;
   1087 #endif
   1088 
   1089 	if (raidPtr->numSpare==0) {
   1090 		printf("No spares to remove!\n");
   1091 		return(EINVAL);
   1092 	}
   1093 
   1094 	return(EINVAL); /* XXX not implemented yet */
   1095 #if 0
   1096 	spare_number = sparePtr->column;
   1097 
   1098 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
   1099 		return(EINVAL);
   1100 	}
   1101 
   1102 	/* verify that this spare isn't in use... */
   1103 
   1104 
   1105 
   1106 
   1107 	/* it's gone.. */
   1108 
   1109 	raidPtr->numSpare--;
   1110 
   1111 	return(0);
   1112 #endif
   1113 }
   1114 
   1115 
   1116 int
   1117 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
   1118 {
   1119 #if 0
   1120 	RF_RaidDisk_t *disks;
   1121 #endif
   1122 
   1123 	if ((component->column < 0) ||
   1124 	    (component->column >= raidPtr->numCol)) {
   1125 		return(EINVAL);
   1126 	}
   1127 
   1128 #if 0
   1129 	disks = &raidPtr->Disks[component->column];
   1130 #endif
   1131 
   1132 	/* 1. This component must be marked as 'failed' */
   1133 
   1134 	return(EINVAL); /* Not implemented yet. */
   1135 }
   1136 
   1137 int
   1138 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
   1139     RF_SingleComponent_t *component)
   1140 {
   1141 
   1142 	/* Issues here include how to 'move' this in if there is IO
   1143 	   taking place (e.g. component queues and such) */
   1144 
   1145 	return(EINVAL); /* Not implemented yet. */
   1146 }
   1147