Home | History | Annotate | Line # | Download | only in raidframe
rf_raid5.c revision 1.1
      1 /*	$NetBSD: rf_raid5.c,v 1.1 1998/11/13 04:20:33 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /******************************************************************************
     30  *
     31  * rf_raid5.c -- implements RAID Level 5
     32  *
     33  *****************************************************************************/
     34 
     35 /*
     36  * :
     37  * Log: rf_raid5.c,v
     38  * Revision 1.26  1996/11/05 21:10:40  jimz
     39  * failed pda generalization
     40  *
     41  * Revision 1.25  1996/07/31  16:56:18  jimz
     42  * dataBytesPerStripe, sectorsPerDisk init arch-indep.
     43  *
     44  * Revision 1.24  1996/07/18  22:57:14  jimz
     45  * port simulator to AIX
     46  *
     47  * Revision 1.23  1996/07/13  00:00:59  jimz
     48  * sanitized generalized reconstruction architecture
     49  * cleaned up head sep, rbuf problems
     50  *
     51  * Revision 1.22  1996/06/11  08:54:27  jimz
     52  * improved error-checking at configuration time
     53  *
     54  * Revision 1.21  1996/06/10  11:55:47  jimz
     55  * Straightened out some per-array/not-per-array distinctions, fixed
     56  * a couple bugs related to confusion. Added shutdown lists. Removed
     57  * layout shutdown function (now subsumed by shutdown lists).
     58  *
     59  * Revision 1.20  1996/06/07  22:26:27  jimz
     60  * type-ify which_ru (RF_ReconUnitNum_t)
     61  *
     62  * Revision 1.19  1996/06/07  21:33:04  jimz
     63  * begin using consistent types for sector numbers,
     64  * stripe numbers, row+col numbers, recon unit numbers
     65  *
     66  * Revision 1.18  1996/06/05  18:06:02  jimz
     67  * Major code cleanup. The Great Renaming is now done.
     68  * Better modularity. Better typing. Fixed a bunch of
     69  * synchronization bugs. Made a lot of global stuff
     70  * per-desc or per-array. Removed dead code.
     71  *
     72  * Revision 1.17  1996/06/03  23:28:26  jimz
     73  * more bugfixes
     74  * check in tree to sync for IPDS runs with current bugfixes
     75  * there still may be a problem with threads in the script test
     76  * getting I/Os stuck- not trivially reproducible (runs ~50 times
     77  * in a row without getting stuck)
     78  *
     79  * Revision 1.16  1996/06/02  17:31:48  jimz
     80  * Moved a lot of global stuff into array structure, where it belongs.
     81  * Fixed up paritylogging, pss modules in this manner. Some general
     82  * code cleanup. Removed lots of dead code, some dead files.
     83  *
     84  * Revision 1.15  1996/05/31  22:26:54  jimz
     85  * fix a lot of mapping problems, memory allocation problems
     86  * found some weird lock issues, fixed 'em
     87  * more code cleanup
     88  *
     89  * Revision 1.14  1996/05/30  23:22:16  jimz
     90  * bugfixes of serialization, timing problems
     91  * more cleanup
     92  *
     93  * Revision 1.13  1996/05/27  18:56:37  jimz
     94  * more code cleanup
     95  * better typing
     96  * compiles in all 3 environments
     97  *
     98  * Revision 1.12  1996/05/24  22:17:04  jimz
     99  * continue code + namespace cleanup
    100  * typed a bunch of flags
    101  *
    102  * Revision 1.11  1996/05/24  01:59:45  jimz
    103  * another checkpoint in code cleanup for release
    104  * time to sync kernel tree
    105  *
    106  * Revision 1.10  1996/05/23  00:33:23  jimz
    107  * code cleanup: move all debug decls to rf_options.c, all extern
    108  * debug decls to rf_options.h, all debug vars preceded by rf_
    109  *
    110  * Revision 1.9  1996/05/18  19:51:34  jimz
    111  * major code cleanup- fix syntax, make some types consistent,
    112  * add prototypes, clean out dead code, et cetera
    113  *
    114  * Revision 1.8  1996/05/03  19:38:58  wvcii
    115  * moved dag creation routines to dag library
    116  *
    117  * Revision 1.7  1995/12/12  18:10:06  jimz
    118  * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
    119  * fix 80-column brain damage in comments
    120  *
    121  * Revision 1.6  1995/12/06  15:04:28  root
    122  * added copyright info
    123  *
    124  * Revision 1.5  1995/11/17  18:59:41  wvcii
    125  * added prototyping to MapParity
    126  *
    127  * Revision 1.4  1995/06/23  13:38:21  robby
    128  * updeated to prototypes in rf_layout.h
    129  *
    130  */
    131 
    132 #include "rf_types.h"
    133 #include "rf_raid.h"
    134 #include "rf_raid5.h"
    135 #include "rf_dag.h"
    136 #include "rf_dagffrd.h"
    137 #include "rf_dagffwr.h"
    138 #include "rf_dagdegrd.h"
    139 #include "rf_dagdegwr.h"
    140 #include "rf_dagutils.h"
    141 #include "rf_threadid.h"
    142 #include "rf_general.h"
    143 #include "rf_map.h"
    144 #include "rf_utils.h"
    145 
    146 typedef struct RF_Raid5ConfigInfo_s {
    147   RF_RowCol_t  **stripeIdentifier;    /* filled in at config time and used by IdentifyStripe */
    148 } RF_Raid5ConfigInfo_t;
    149 
    150 int rf_ConfigureRAID5(
    151   RF_ShutdownList_t  **listp,
    152   RF_Raid_t           *raidPtr,
    153   RF_Config_t         *cfgPtr)
    154 {
    155   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
    156   RF_Raid5ConfigInfo_t *info;
    157   RF_RowCol_t i, j, startdisk;
    158 
    159   /* create a RAID level 5 configuration structure */
    160   RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t), (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList);
    161   if (info == NULL)
    162     return(ENOMEM);
    163   layoutPtr->layoutSpecificInfo = (void *) info;
    164 
    165   RF_ASSERT(raidPtr->numRow == 1);
    166 
    167   /* the stripe identifier must identify the disks in each stripe,
    168    * IN THE ORDER THAT THEY APPEAR IN THE STRIPE.
    169    */
    170   info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList);
    171   if (info->stripeIdentifier == NULL)
    172     return(ENOMEM);
    173   startdisk = 0;
    174   for (i=0; i<raidPtr->numCol; i++) {
    175     for (j=0; j<raidPtr->numCol; j++) {
    176       info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol;
    177     }
    178     if ((--startdisk) < 0) startdisk = raidPtr->numCol-1;
    179   }
    180 
    181   /* fill in the remaining layout parameters */
    182   layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
    183   layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
    184   layoutPtr->numDataCol = raidPtr->numCol-1;
    185   layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
    186   layoutPtr->numParityCol = 1;
    187   layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
    188 
    189   raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
    190 
    191   return(0);
    192 }
    193 
    194 int rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t *raidPtr)
    195 {
    196   return(20);
    197 }
    198 
    199 RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t *raidPtr)
    200 {
    201   return(10);
    202 }
    203 
    204 #if !defined(__NetBSD__) && !defined(_KERNEL)
    205 /* not currently used */
    206 int rf_ShutdownRAID5(RF_Raid_t *raidPtr)
    207 {
    208 	return(0);
    209 }
    210 #endif
    211 
    212 void rf_MapSectorRAID5(
    213   RF_Raid_t         *raidPtr,
    214   RF_RaidAddr_t      raidSector,
    215   RF_RowCol_t       *row,
    216   RF_RowCol_t       *col,
    217   RF_SectorNum_t    *diskSector,
    218   int                remap)
    219 {
    220   RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    221   *row = 0;
    222   *col = (SUID % raidPtr->numCol);
    223   *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
    224     (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    225 }
    226 
    227 void rf_MapParityRAID5(
    228   RF_Raid_t       *raidPtr,
    229   RF_RaidAddr_t    raidSector,
    230   RF_RowCol_t     *row,
    231   RF_RowCol_t     *col,
    232   RF_SectorNum_t  *diskSector,
    233   int              remap)
    234 {
    235   RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    236 
    237   *row = 0;
    238   *col = raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%raidPtr->numCol;
    239   *diskSector =(SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
    240     (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    241 }
    242 
    243 void rf_IdentifyStripeRAID5(
    244   RF_Raid_t        *raidPtr,
    245   RF_RaidAddr_t     addr,
    246   RF_RowCol_t     **diskids,
    247   RF_RowCol_t      *outRow)
    248 {
    249   RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
    250   RF_Raid5ConfigInfo_t *info = (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
    251 
    252   *outRow = 0;
    253   *diskids = info->stripeIdentifier[ stripeID % raidPtr->numCol ];
    254 }
    255 
    256 void rf_MapSIDToPSIDRAID5(
    257   RF_RaidLayout_t    *layoutPtr,
    258   RF_StripeNum_t      stripeID,
    259   RF_StripeNum_t     *psID,
    260   RF_ReconUnitNum_t  *which_ru)
    261 {
    262   *which_ru = 0;
    263   *psID = stripeID;
    264 }
    265 
    266 /* select an algorithm for performing an access.  Returns two pointers,
    267  * one to a function that will return information about the DAG, and
    268  * another to a function that will create the dag.
    269  */
    270 void rf_RaidFiveDagSelect(
    271   RF_Raid_t             *raidPtr,
    272   RF_IoType_t            type,
    273   RF_AccessStripeMap_t  *asmap,
    274   RF_VoidFuncPtr        *createFunc)
    275 {
    276   RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    277   RF_PhysDiskAddr_t *failedPDA=NULL;
    278   RF_RowCol_t frow, fcol;
    279   RF_RowStatus_t rstat;
    280   int prior_recon;
    281   int tid;
    282 
    283   RF_ASSERT(RF_IO_IS_R_OR_W(type));
    284 
    285   if (asmap->numDataFailed + asmap->numParityFailed > 1) {
    286     RF_ERRORMSG("Multiple disks failed in a single group!  Aborting I/O operation.\n");
    287     /* *infoFunc = */ *createFunc = NULL;
    288     return;
    289   } else if (asmap->numDataFailed + asmap->numParityFailed == 1) {
    290 
    291     /* if under recon & already reconstructed, redirect the access to the spare drive
    292      * and eliminate the failure indication
    293      */
    294     failedPDA = asmap->failedPDAs[0];
    295     frow = failedPDA->row; fcol = failedPDA->col;
    296     rstat = raidPtr->status[failedPDA->row];
    297     prior_recon = (rstat == rf_rs_reconfigured) || (
    298       (rstat == rf_rs_reconstructing) ?
    299       rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0
    300       );
    301     if (prior_recon) {
    302       RF_RowCol_t or = failedPDA->row,oc=failedPDA->col;
    303       RF_SectorNum_t oo=failedPDA->startSector;
    304 
    305       if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {         /* redirect to dist spare space */
    306 
    307 	if (failedPDA == asmap->parityInfo) {
    308 
    309 	  /* parity has failed */
    310 	  (layoutPtr->map->MapParity)(raidPtr, failedPDA->raidAddress, &failedPDA->row,
    311 				      &failedPDA->col, &failedPDA->startSector, RF_REMAP);
    312 
    313 	  if (asmap->parityInfo->next) {				/* redir 2nd component, if any */
    314 	    RF_PhysDiskAddr_t *p = asmap->parityInfo->next;
    315 	    RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit;
    316 	    p->row = failedPDA->row;
    317 	    p->col = failedPDA->col;
    318 	    p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) +
    319 			     SUoffs;  	/* cheating:  startSector is not really a RAID address */
    320 	  }
    321 
    322 	} else if (asmap->parityInfo->next && failedPDA == asmap->parityInfo->next) {
    323 	  RF_ASSERT(0);  		/* should not ever happen */
    324 	} else {
    325 
    326 	  /* data has failed */
    327 	  (layoutPtr->map->MapSector)(raidPtr, failedPDA->raidAddress, &failedPDA->row,
    328 				      &failedPDA->col, &failedPDA->startSector, RF_REMAP);
    329 
    330 	}
    331 
    332       } else {                                                 /* redirect to dedicated spare space */
    333 
    334 	failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
    335 	failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
    336 
    337 	/* the parity may have two distinct components, both of which may need to be redirected */
    338 	if (asmap->parityInfo->next) {
    339 	  if (failedPDA == asmap->parityInfo) {
    340 	    failedPDA->next->row = failedPDA->row;
    341 	    failedPDA->next->col = failedPDA->col;
    342 	  } else if (failedPDA == asmap->parityInfo->next) {    /* paranoid:  should never occur */
    343 	    asmap->parityInfo->row = failedPDA->row;
    344 	    asmap->parityInfo->col = failedPDA->col;
    345 	  }
    346 	}
    347       }
    348 
    349       RF_ASSERT(failedPDA->col != -1);
    350 
    351       if (rf_dagDebug || rf_mapDebug) {
    352 	rf_get_threadid(tid);
    353 	printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n",
    354 	       tid,type,or,oc,(long)oo,failedPDA->row,failedPDA->col,
    355 	       (long)failedPDA->startSector);
    356       }
    357 
    358       asmap->numDataFailed = asmap->numParityFailed = 0;
    359     }
    360 
    361   }
    362 
    363   /* all dags begin/end with block/unblock node
    364    * therefore, hdrSucc & termAnt counts should always be 1
    365    * also, these counts should not be visible outside dag creation routines -
    366    * manipulating the counts here should be removed */
    367   if (type == RF_IO_TYPE_READ) {
    368     if (asmap->numDataFailed == 0)
    369       *createFunc = (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG;
    370     else
    371       *createFunc = (RF_VoidFuncPtr)rf_CreateRaidFiveDegradedReadDAG;
    372   } else {
    373 
    374 
    375     /* if mirroring, always use large writes.  If the access requires two
    376      * distinct parity updates, always do a small write.  If the stripe
    377      * contains a failure but the access does not, do a small write.
    378      * The first conditional (numStripeUnitsAccessed <= numDataCol/2) uses a
    379      * less-than-or-equal rather than just a less-than because when G is 3
    380      * or 4, numDataCol/2 is 1, and I want single-stripe-unit updates to use
    381      * just one disk.
    382      */
    383     if ( (asmap->numDataFailed + asmap->numParityFailed) == 0) {
    384       if (rf_suppressLocksAndLargeWrites ||
    385 	  (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol!=1)) ||
    386 	   (asmap->parityInfo->next!=NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
    387 	*createFunc = (RF_VoidFuncPtr)rf_CreateSmallWriteDAG;
    388       }
    389       else
    390 	*createFunc = (RF_VoidFuncPtr)rf_CreateLargeWriteDAG;
    391     }
    392     else {
    393       if (asmap->numParityFailed == 1)
    394 	*createFunc = (RF_VoidFuncPtr)rf_CreateNonRedundantWriteDAG;
    395       else
    396 	if (asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)
    397 	  *createFunc = NULL;
    398 	else
    399 	  *createFunc = (RF_VoidFuncPtr)rf_CreateDegradedWriteDAG;
    400     }
    401   }
    402 }
    403