Home | History | Annotate | Line # | Download | only in raidframe
rf_raid5.c revision 1.2
      1 /*	$NetBSD: rf_raid5.c,v 1.2 1999/01/26 02:34:01 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /******************************************************************************
     30  *
     31  * rf_raid5.c -- implements RAID Level 5
     32  *
     33  *****************************************************************************/
     34 
     35 #include "rf_types.h"
     36 #include "rf_raid.h"
     37 #include "rf_raid5.h"
     38 #include "rf_dag.h"
     39 #include "rf_dagffrd.h"
     40 #include "rf_dagffwr.h"
     41 #include "rf_dagdegrd.h"
     42 #include "rf_dagdegwr.h"
     43 #include "rf_dagutils.h"
     44 #include "rf_threadid.h"
     45 #include "rf_general.h"
     46 #include "rf_map.h"
     47 #include "rf_utils.h"
     48 
     49 typedef struct RF_Raid5ConfigInfo_s {
     50   RF_RowCol_t  **stripeIdentifier;    /* filled in at config time and used by IdentifyStripe */
     51 } RF_Raid5ConfigInfo_t;
     52 
     53 int rf_ConfigureRAID5(
     54   RF_ShutdownList_t  **listp,
     55   RF_Raid_t           *raidPtr,
     56   RF_Config_t         *cfgPtr)
     57 {
     58   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
     59   RF_Raid5ConfigInfo_t *info;
     60   RF_RowCol_t i, j, startdisk;
     61 
     62   /* create a RAID level 5 configuration structure */
     63   RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t), (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList);
     64   if (info == NULL)
     65     return(ENOMEM);
     66   layoutPtr->layoutSpecificInfo = (void *) info;
     67 
     68   RF_ASSERT(raidPtr->numRow == 1);
     69 
     70   /* the stripe identifier must identify the disks in each stripe,
     71    * IN THE ORDER THAT THEY APPEAR IN THE STRIPE.
     72    */
     73   info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, raidPtr->numCol, raidPtr->cleanupList);
     74   if (info->stripeIdentifier == NULL)
     75     return(ENOMEM);
     76   startdisk = 0;
     77   for (i=0; i<raidPtr->numCol; i++) {
     78     for (j=0; j<raidPtr->numCol; j++) {
     79       info->stripeIdentifier[i][j] = (startdisk + j) % raidPtr->numCol;
     80     }
     81     if ((--startdisk) < 0) startdisk = raidPtr->numCol-1;
     82   }
     83 
     84   /* fill in the remaining layout parameters */
     85   layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
     86   layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
     87   layoutPtr->numDataCol = raidPtr->numCol-1;
     88   layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
     89   layoutPtr->numParityCol = 1;
     90   layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
     91 
     92   raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
     93 
     94   return(0);
     95 }
     96 
     97 int rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t *raidPtr)
     98 {
     99   return(20);
    100 }
    101 
    102 RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t *raidPtr)
    103 {
    104   return(10);
    105 }
    106 
    107 #if !defined(__NetBSD__) && !defined(_KERNEL)
    108 /* not currently used */
    109 int rf_ShutdownRAID5(RF_Raid_t *raidPtr)
    110 {
    111 	return(0);
    112 }
    113 #endif
    114 
    115 void rf_MapSectorRAID5(
    116   RF_Raid_t         *raidPtr,
    117   RF_RaidAddr_t      raidSector,
    118   RF_RowCol_t       *row,
    119   RF_RowCol_t       *col,
    120   RF_SectorNum_t    *diskSector,
    121   int                remap)
    122 {
    123   RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    124   *row = 0;
    125   *col = (SUID % raidPtr->numCol);
    126   *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
    127     (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    128 }
    129 
    130 void rf_MapParityRAID5(
    131   RF_Raid_t       *raidPtr,
    132   RF_RaidAddr_t    raidSector,
    133   RF_RowCol_t     *row,
    134   RF_RowCol_t     *col,
    135   RF_SectorNum_t  *diskSector,
    136   int              remap)
    137 {
    138   RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    139 
    140   *row = 0;
    141   *col = raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%raidPtr->numCol;
    142   *diskSector =(SUID / (raidPtr->Layout.numDataCol)) * raidPtr->Layout.sectorsPerStripeUnit +
    143     (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    144 }
    145 
    146 void rf_IdentifyStripeRAID5(
    147   RF_Raid_t        *raidPtr,
    148   RF_RaidAddr_t     addr,
    149   RF_RowCol_t     **diskids,
    150   RF_RowCol_t      *outRow)
    151 {
    152   RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
    153   RF_Raid5ConfigInfo_t *info = (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
    154 
    155   *outRow = 0;
    156   *diskids = info->stripeIdentifier[ stripeID % raidPtr->numCol ];
    157 }
    158 
    159 void rf_MapSIDToPSIDRAID5(
    160   RF_RaidLayout_t    *layoutPtr,
    161   RF_StripeNum_t      stripeID,
    162   RF_StripeNum_t     *psID,
    163   RF_ReconUnitNum_t  *which_ru)
    164 {
    165   *which_ru = 0;
    166   *psID = stripeID;
    167 }
    168 
    169 /* select an algorithm for performing an access.  Returns two pointers,
    170  * one to a function that will return information about the DAG, and
    171  * another to a function that will create the dag.
    172  */
    173 void rf_RaidFiveDagSelect(
    174   RF_Raid_t             *raidPtr,
    175   RF_IoType_t            type,
    176   RF_AccessStripeMap_t  *asmap,
    177   RF_VoidFuncPtr        *createFunc)
    178 {
    179   RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    180   RF_PhysDiskAddr_t *failedPDA=NULL;
    181   RF_RowCol_t frow, fcol;
    182   RF_RowStatus_t rstat;
    183   int prior_recon;
    184   int tid;
    185 
    186   RF_ASSERT(RF_IO_IS_R_OR_W(type));
    187 
    188   if (asmap->numDataFailed + asmap->numParityFailed > 1) {
    189     RF_ERRORMSG("Multiple disks failed in a single group!  Aborting I/O operation.\n");
    190     /* *infoFunc = */ *createFunc = NULL;
    191     return;
    192   } else if (asmap->numDataFailed + asmap->numParityFailed == 1) {
    193 
    194     /* if under recon & already reconstructed, redirect the access to the spare drive
    195      * and eliminate the failure indication
    196      */
    197     failedPDA = asmap->failedPDAs[0];
    198     frow = failedPDA->row; fcol = failedPDA->col;
    199     rstat = raidPtr->status[failedPDA->row];
    200     prior_recon = (rstat == rf_rs_reconfigured) || (
    201       (rstat == rf_rs_reconstructing) ?
    202       rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0
    203       );
    204     if (prior_recon) {
    205       RF_RowCol_t or = failedPDA->row,oc=failedPDA->col;
    206       RF_SectorNum_t oo=failedPDA->startSector;
    207 
    208       if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {         /* redirect to dist spare space */
    209 
    210 	if (failedPDA == asmap->parityInfo) {
    211 
    212 	  /* parity has failed */
    213 	  (layoutPtr->map->MapParity)(raidPtr, failedPDA->raidAddress, &failedPDA->row,
    214 				      &failedPDA->col, &failedPDA->startSector, RF_REMAP);
    215 
    216 	  if (asmap->parityInfo->next) {				/* redir 2nd component, if any */
    217 	    RF_PhysDiskAddr_t *p = asmap->parityInfo->next;
    218 	    RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit;
    219 	    p->row = failedPDA->row;
    220 	    p->col = failedPDA->col;
    221 	    p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) +
    222 			     SUoffs;  	/* cheating:  startSector is not really a RAID address */
    223 	  }
    224 
    225 	} else if (asmap->parityInfo->next && failedPDA == asmap->parityInfo->next) {
    226 	  RF_ASSERT(0);  		/* should not ever happen */
    227 	} else {
    228 
    229 	  /* data has failed */
    230 	  (layoutPtr->map->MapSector)(raidPtr, failedPDA->raidAddress, &failedPDA->row,
    231 				      &failedPDA->col, &failedPDA->startSector, RF_REMAP);
    232 
    233 	}
    234 
    235       } else {                                                 /* redirect to dedicated spare space */
    236 
    237 	failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
    238 	failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
    239 
    240 	/* the parity may have two distinct components, both of which may need to be redirected */
    241 	if (asmap->parityInfo->next) {
    242 	  if (failedPDA == asmap->parityInfo) {
    243 	    failedPDA->next->row = failedPDA->row;
    244 	    failedPDA->next->col = failedPDA->col;
    245 	  } else if (failedPDA == asmap->parityInfo->next) {    /* paranoid:  should never occur */
    246 	    asmap->parityInfo->row = failedPDA->row;
    247 	    asmap->parityInfo->col = failedPDA->col;
    248 	  }
    249 	}
    250       }
    251 
    252       RF_ASSERT(failedPDA->col != -1);
    253 
    254       if (rf_dagDebug || rf_mapDebug) {
    255 	rf_get_threadid(tid);
    256 	printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n",
    257 	       tid,type,or,oc,(long)oo,failedPDA->row,failedPDA->col,
    258 	       (long)failedPDA->startSector);
    259       }
    260 
    261       asmap->numDataFailed = asmap->numParityFailed = 0;
    262     }
    263 
    264   }
    265 
    266   /* all dags begin/end with block/unblock node
    267    * therefore, hdrSucc & termAnt counts should always be 1
    268    * also, these counts should not be visible outside dag creation routines -
    269    * manipulating the counts here should be removed */
    270   if (type == RF_IO_TYPE_READ) {
    271     if (asmap->numDataFailed == 0)
    272       *createFunc = (RF_VoidFuncPtr)rf_CreateFaultFreeReadDAG;
    273     else
    274       *createFunc = (RF_VoidFuncPtr)rf_CreateRaidFiveDegradedReadDAG;
    275   } else {
    276 
    277 
    278     /* if mirroring, always use large writes.  If the access requires two
    279      * distinct parity updates, always do a small write.  If the stripe
    280      * contains a failure but the access does not, do a small write.
    281      * The first conditional (numStripeUnitsAccessed <= numDataCol/2) uses a
    282      * less-than-or-equal rather than just a less-than because when G is 3
    283      * or 4, numDataCol/2 is 1, and I want single-stripe-unit updates to use
    284      * just one disk.
    285      */
    286     if ( (asmap->numDataFailed + asmap->numParityFailed) == 0) {
    287       if (rf_suppressLocksAndLargeWrites ||
    288 	  (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol!=1)) ||
    289 	   (asmap->parityInfo->next!=NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
    290 	*createFunc = (RF_VoidFuncPtr)rf_CreateSmallWriteDAG;
    291       }
    292       else
    293 	*createFunc = (RF_VoidFuncPtr)rf_CreateLargeWriteDAG;
    294     }
    295     else {
    296       if (asmap->numParityFailed == 1)
    297 	*createFunc = (RF_VoidFuncPtr)rf_CreateNonRedundantWriteDAG;
    298       else
    299 	if (asmap->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit)
    300 	  *createFunc = NULL;
    301 	else
    302 	  *createFunc = (RF_VoidFuncPtr)rf_CreateDegradedWriteDAG;
    303     }
    304   }
    305 }
    306