Home | History | Annotate | Line # | Download | only in raidframe
rf_disks.c revision 1.5
      1 /*	$NetBSD: rf_disks.c,v 1.5 1999/02/05 00:06:09 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /***************************************************************
     30  * rf_disks.c -- code to perform operations on the actual disks
     31  ***************************************************************/
     32 
     33 #include "rf_types.h"
     34 #include "rf_raid.h"
     35 #include "rf_alloclist.h"
     36 #include "rf_utils.h"
     37 #include "rf_configure.h"
     38 #include "rf_general.h"
     39 #if !defined(__NetBSD__)
     40 #include "rf_camlayer.h"
     41 #endif
     42 #include "rf_options.h"
     43 #include "rf_sys.h"
     44 
     45 #include <sys/types.h>
     46 #include <sys/param.h>
     47 #include <sys/systm.h>
     48 #include <sys/proc.h>
     49 #include <sys/ioctl.h>
     50 #include <sys/fcntl.h>
     51 #include <sys/vnode.h>
     52 
     53 int raidlookup __P((char *, struct proc * p, struct vnode **));
     54 
     55 
     56 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
     57 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
     58 
     59 /****************************************************************************************
     60  *
     61  * initialize the disks comprising the array
     62  *
     63  * We want the spare disks to have regular row,col numbers so that we can easily
     64  * substitue a spare for a failed disk.  But, the driver code assumes throughout
     65  * that the array contains numRow by numCol _non-spare_ disks, so it's not clear
     66  * how to fit in the spares.  This is an unfortunate holdover from raidSim.  The
     67  * quick and dirty fix is to make row zero bigger than the rest, and put all the
     68  * spares in it.  This probably needs to get changed eventually.
     69  *
     70  ***************************************************************************************/
     71 int
     72 rf_ConfigureDisks(
     73     RF_ShutdownList_t ** listp,
     74     RF_Raid_t * raidPtr,
     75     RF_Config_t * cfgPtr)
     76 {
     77 	RF_RaidDisk_t **disks;
     78 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
     79 	RF_RowCol_t r, c;
     80 	int     bs, ret;
     81 	unsigned i, count, foundone = 0, numFailuresThisRow;
     82 	RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL;
     83 	int     num_rows_done, num_cols_done;
     84 
     85 	struct proc *proc = 0;
     86 #ifndef __NetBSD__
     87 	ret = rf_SCSI_AllocReadCapacity(&rdcap_op);
     88 	if (ret)
     89 		goto fail;
     90 	ret = rf_SCSI_AllocTUR(&tur_op);
     91 	if (ret)
     92 		goto fail;
     93 #endif				/* !__NetBSD__ */
     94 
     95 	num_rows_done = 0;
     96 	num_cols_done = 0;
     97 
     98 
     99 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), (RF_RaidDisk_t **), raidPtr->cleanupList);
    100 	if (disks == NULL) {
    101 		ret = ENOMEM;
    102 		goto fail;
    103 	}
    104 	raidPtr->Disks = disks;
    105 
    106 
    107 	proc = raidPtr->proc;	/* Blah XXX */
    108 
    109 	/* get space for the device-specific stuff... */
    110 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
    111 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
    112 	    raidPtr->cleanupList);
    113 	if (raidPtr->raid_cinfo == NULL) {
    114 		ret = ENOMEM;
    115 		goto fail;
    116 	}
    117 	for (r = 0; r < raidPtr->numRow; r++) {
    118 		numFailuresThisRow = 0;
    119 		RF_CallocAndAdd(disks[r], raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), raidPtr->cleanupList);
    120 		if (disks[r] == NULL) {
    121 			ret = ENOMEM;
    122 			goto fail;
    123 		}
    124 		/* get more space for device specific stuff.. */
    125 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
    126 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
    127 		    sizeof(struct raidcinfo), (struct raidcinfo *),
    128 		    raidPtr->cleanupList);
    129 		if (raidPtr->raid_cinfo[r] == NULL) {
    130 			ret = ENOMEM;
    131 			goto fail;
    132 		}
    133 		for (c = 0; c < raidPtr->numCol; c++) {
    134 			ret = rf_ConfigureDisk(raidPtr, &cfgPtr->devnames[r][c][0],
    135 			    &disks[r][c], rdcap_op, tur_op,
    136 			    cfgPtr->devs[r][c], r, c);
    137 			if (ret)
    138 				goto fail;
    139 			if (disks[r][c].status != rf_ds_optimal) {
    140 				numFailuresThisRow++;
    141 			} else {
    142 				if (disks[r][c].numBlocks < min_numblks)
    143 					min_numblks = disks[r][c].numBlocks;
    144 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
    145 				    r, c, disks[r][c].devname,
    146 				    (long int) disks[r][c].numBlocks,
    147 				    disks[r][c].blockSize,
    148 				    (long int) disks[r][c].numBlocks * disks[r][c].blockSize / 1024 / 1024);
    149 			}
    150 			num_cols_done++;
    151 		}
    152 		/* XXX fix for n-fault tolerant */
    153 		if (numFailuresThisRow > 0)
    154 			raidPtr->status[r] = rf_rs_degraded;
    155 		num_rows_done++;
    156 	}
    157 #if defined(__NetBSD__) && defined(_KERNEL)
    158 	/* we do nothing */
    159 #else
    160 	rf_SCSI_FreeDiskOp(rdcap_op, 1);
    161 	rdcap_op = NULL;
    162 	rf_SCSI_FreeDiskOp(tur_op, 0);
    163 	tur_op = NULL;
    164 #endif
    165 	/* all disks must be the same size & have the same block size, bs must
    166 	 * be a power of 2 */
    167 	bs = 0;
    168 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
    169 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
    170 			if (disks[r][c].status == rf_ds_optimal) {
    171 				bs = disks[r][c].blockSize;
    172 				foundone = 1;
    173 			}
    174 		}
    175 	}
    176 	if (!foundone) {
    177 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
    178 		ret = EINVAL;
    179 		goto fail;
    180 	}
    181 	for (count = 0, i = 1; i; i <<= 1)
    182 		if (bs & i)
    183 			count++;
    184 	if (count != 1) {
    185 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
    186 		ret = EINVAL;
    187 		goto fail;
    188 	}
    189 	for (r = 0; r < raidPtr->numRow; r++) {
    190 		for (c = 0; c < raidPtr->numCol; c++) {
    191 			if (disks[r][c].status == rf_ds_optimal) {
    192 				if (disks[r][c].blockSize != bs) {
    193 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
    194 					ret = EINVAL;
    195 					goto fail;
    196 				}
    197 				if (disks[r][c].numBlocks != min_numblks) {
    198 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
    199 					    r, c, (int) min_numblks);
    200 					disks[r][c].numBlocks = min_numblks;
    201 				}
    202 			}
    203 		}
    204 	}
    205 
    206 	raidPtr->sectorsPerDisk = min_numblks;
    207 	raidPtr->logBytesPerSector = ffs(bs) - 1;
    208 	raidPtr->bytesPerSector = bs;
    209 	raidPtr->sectorMask = bs - 1;
    210 	return (0);
    211 
    212 fail:
    213 
    214 #if defined(__NetBSD__) && defined(_KERNEL)
    215 
    216 	for (r = 0; r < raidPtr->numRow; r++) {
    217 		for (c = 0; c < raidPtr->numCol; c++) {
    218 			/* Cleanup.. */
    219 #ifdef DEBUG
    220 			printf("Cleaning up row: %d col: %d\n", r, c);
    221 #endif
    222 			if (raidPtr->raid_cinfo[r][c].ci_vp) {
    223 				(void) vn_close(raidPtr->raid_cinfo[r][c].ci_vp,
    224 				    FREAD | FWRITE, proc->p_ucred, proc);
    225 			}
    226 		}
    227 	}
    228 	/* Space allocated for raid_vpp will get cleaned up at some other
    229 	 * point */
    230 	/* XXX Need more #ifdefs in the above... */
    231 
    232 #else
    233 
    234 	if (rdcap_op)
    235 		rf_SCSI_FreeDiskOp(rdcap_op, 1);
    236 	if (tur_op)
    237 		rf_SCSI_FreeDiskOp(tur_op, 0);
    238 
    239 #endif
    240 	return (ret);
    241 }
    242 
    243 
    244 /****************************************************************************************
    245  * set up the data structures describing the spare disks in the array
    246  * recall from the above comment that the spare disk descriptors are stored
    247  * in row zero, which is specially expanded to hold them.
    248  ***************************************************************************************/
    249 int
    250 rf_ConfigureSpareDisks(
    251     RF_ShutdownList_t ** listp,
    252     RF_Raid_t * raidPtr,
    253     RF_Config_t * cfgPtr)
    254 {
    255 	char    buf[256];
    256 	int     r, c, i, ret;
    257 	RF_DiskOp_t *rdcap_op = NULL, *tur_op = NULL;
    258 	unsigned bs;
    259 	RF_RaidDisk_t *disks;
    260 	int     num_spares_done;
    261 
    262 	struct proc *proc;
    263 
    264 #ifndef __NetBSD__
    265 	ret = rf_SCSI_AllocReadCapacity(&rdcap_op);
    266 	if (ret)
    267 		goto fail;
    268 	ret = rf_SCSI_AllocTUR(&tur_op);
    269 	if (ret)
    270 		goto fail;
    271 #endif				/* !__NetBSD__ */
    272 
    273 	num_spares_done = 0;
    274 
    275 	proc = raidPtr->proc;
    276 	/* The space for the spares should have already been allocated by
    277 	 * ConfigureDisks() */
    278 
    279 	disks = &raidPtr->Disks[0][raidPtr->numCol];
    280 	for (i = 0; i < raidPtr->numSpare; i++) {
    281 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
    282 		    &disks[i], rdcap_op, tur_op,
    283 		    cfgPtr->spare_devs[i], 0, raidPtr->numCol + i);
    284 		if (ret)
    285 			goto fail;
    286 		if (disks[i].status != rf_ds_optimal) {
    287 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", buf);
    288 		} else {
    289 			disks[i].status = rf_ds_spare;	/* change status to
    290 							 * spare */
    291 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
    292 			    disks[i].devname,
    293 			    (long int) disks[i].numBlocks, disks[i].blockSize,
    294 			    (long int) disks[i].numBlocks * disks[i].blockSize / 1024 / 1024);
    295 		}
    296 		num_spares_done++;
    297 	}
    298 #if defined(__NetBSD__) && (_KERNEL)
    299 
    300 #else
    301 	rf_SCSI_FreeDiskOp(rdcap_op, 1);
    302 	rdcap_op = NULL;
    303 	rf_SCSI_FreeDiskOp(tur_op, 0);
    304 	tur_op = NULL;
    305 #endif
    306 
    307 	/* check sizes and block sizes on spare disks */
    308 	bs = 1 << raidPtr->logBytesPerSector;
    309 	for (i = 0; i < raidPtr->numSpare; i++) {
    310 		if (disks[i].blockSize != bs) {
    311 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
    312 			ret = EINVAL;
    313 			goto fail;
    314 		}
    315 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
    316 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
    317 			    disks[i].devname, disks[i].blockSize, (long int) raidPtr->sectorsPerDisk);
    318 			ret = EINVAL;
    319 			goto fail;
    320 		} else
    321 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
    322 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
    323 
    324 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
    325 			}
    326 	}
    327 
    328 	return (0);
    329 
    330 fail:
    331 #if defined(__NetBSD__) && defined(_KERNEL)
    332 
    333 	/* Release the hold on the main components.  We've failed to allocate
    334 	 * a spare, and since we're failing, we need to free things.. */
    335 
    336 	for (r = 0; r < raidPtr->numRow; r++) {
    337 		for (c = 0; c < raidPtr->numCol; c++) {
    338 			/* Cleanup.. */
    339 #ifdef DEBUG
    340 			printf("Cleaning up row: %d col: %d\n", r, c);
    341 #endif
    342 			if (raidPtr->raid_cinfo[r][c].ci_vp) {
    343 				(void) vn_close(raidPtr->raid_cinfo[r][c].ci_vp,
    344 				    FREAD | FWRITE, proc->p_ucred, proc);
    345 			}
    346 		}
    347 	}
    348 
    349 	for (i = 0; i < raidPtr->numSpare; i++) {
    350 		/* Cleanup.. */
    351 #ifdef DEBUG
    352 		printf("Cleaning up spare: %d\n", i);
    353 #endif
    354 		if (raidPtr->raid_cinfo[0][raidPtr->numCol + i].ci_vp) {
    355 			(void) vn_close(raidPtr->raid_cinfo[0][raidPtr->numCol + i].ci_vp,
    356 			    FREAD | FWRITE, proc->p_ucred, proc);
    357 		}
    358 	}
    359 
    360 #else
    361 
    362 	if (rdcap_op)
    363 		rf_SCSI_FreeDiskOp(rdcap_op, 1);
    364 	if (tur_op)
    365 		rf_SCSI_FreeDiskOp(tur_op, 0);
    366 
    367 #endif
    368 
    369 	return (ret);
    370 }
    371 
    372 
    373 
    374 /* configure a single disk in the array */
    375 int
    376 rf_ConfigureDisk(raidPtr, buf, diskPtr, rdcap_op, tur_op, dev, row, col)
    377 	RF_Raid_t *raidPtr;	/* We need this down here too!! GO */
    378 	char   *buf;
    379 	RF_RaidDisk_t *diskPtr;
    380 	RF_DiskOp_t *rdcap_op;
    381 	RF_DiskOp_t *tur_op;
    382 	dev_t   dev;		/* device number used only in kernel */
    383 	RF_RowCol_t row;
    384 	RF_RowCol_t col;
    385 {
    386 	char   *p;
    387 	int     retcode;
    388 
    389 	struct partinfo dpart;
    390 	struct vnode *vp;
    391 	struct vattr va;
    392 	struct proc *proc;
    393 	int     error;
    394 
    395 	retcode = 0;
    396 	p = rf_find_non_white(buf);
    397 	if (p[strlen(p) - 1] == '\n') {
    398 		/* strip off the newline */
    399 		p[strlen(p) - 1] = '\0';
    400 	}
    401 	(void) strcpy(diskPtr->devname, p);
    402 
    403 #ifndef __NetBSD__
    404 	/* get bus, target, lun */
    405 	retcode = rf_extract_ids(p, &busid, &targid, &lun);
    406 	if (retcode)
    407 		return (retcode);
    408 
    409 	/* required in kernel, nop at user level */
    410 	retcode = rf_SCSI_OpenUnit(dev);
    411 	if (retcode)
    412 		return (retcode);
    413 
    414 	diskPtr->dev = dev;
    415 	if (rf_SCSI_DoTUR(tur_op, (u_char) busid, (u_char) targid, (u_char) lun, dev)) {
    416 		RF_ERRORMSG1("Disk %s failed TUR.  Marked as dead.\n", diskPtr->devname);
    417 		diskPtr->status = rf_ds_failed;
    418 	} else {
    419 		diskPtr->status = rf_ds_optimal;
    420 		retcode = rf_SCSI_DoReadCapacity(raidPtr, rdcap_op, busid, targid, lun, dev,
    421 		    &diskPtr->numBlocks, &diskPtr->blockSize, diskPtr->devname);
    422 		if (retcode)
    423 			return (retcode);
    424 
    425 		/* we allow the user to specify that only a fraction of the
    426 		 * disks should be used this is just for debug:  it speeds up
    427 		 * the parity scan */
    428 		diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
    429 	}
    430 #endif
    431 
    432 	proc = raidPtr->proc;	/* XXX Yes, this is not nice.. */
    433 
    434 	/* Let's start by claiming the component is fine and well... */
    435 	/* XXX not the case if the disk is toast.. */
    436 	diskPtr->status = rf_ds_optimal;
    437 
    438 
    439 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
    440 	raidPtr->raid_cinfo[row][col].ci_dev = NULL;
    441 
    442 	error = raidlookup(diskPtr->devname, proc, &vp);
    443 	if (error) {
    444 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
    445 		if (error == ENXIO) {
    446 			/* XXX the component isn't there... must be dead :-( */
    447 			diskPtr->status = rf_ds_failed;
    448 		} else {
    449 			return (error);
    450 		}
    451 	}
    452 	if (diskPtr->status == rf_ds_optimal) {
    453 
    454 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
    455 			return (error);
    456 		}
    457 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
    458 		    FREAD, proc->p_ucred, proc);
    459 		if (error) {
    460 			return (error);
    461 		}
    462 		diskPtr->blockSize = dpart.disklab->d_secsize;
    463 
    464 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
    465 
    466 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
    467 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
    468 
    469 #if 0
    470 		diskPtr->dev = dev;
    471 #endif
    472 
    473 		diskPtr->dev = va.va_rdev;	/* XXX or the above? */
    474 
    475 		/* we allow the user to specify that only a fraction of the
    476 		 * disks should be used this is just for debug:  it speeds up
    477 		 * the parity scan */
    478 		diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage / 100;
    479 
    480 	}
    481 	return (0);
    482 }
    483