Home | History | Annotate | Line # | Download | only in raidframe
rf_parityscan.c revision 1.2
      1 /*	$NetBSD: rf_parityscan.c,v 1.2 1999/01/26 02:34:00 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*****************************************************************************
     30  *
     31  * rf_parityscan.c -- misc utilities related to parity verification
     32  *
     33  *****************************************************************************/
     34 
     35 #include "rf_types.h"
     36 #include "rf_raid.h"
     37 #include "rf_dag.h"
     38 #include "rf_dagfuncs.h"
     39 #include "rf_dagutils.h"
     40 #include "rf_mcpair.h"
     41 #include "rf_general.h"
     42 #include "rf_engine.h"
     43 #include "rf_parityscan.h"
     44 #include "rf_map.h"
     45 #include "rf_sys.h"
     46 
     47 /*****************************************************************************************
     48  *
     49  * walk through the entire arry and write new parity.
     50  * This works by creating two DAGs, one to read a stripe of data and one to
     51  * write new parity.  The first is executed, the data is xored together, and
     52  * then the second is executed.  To avoid constantly building and tearing down
     53  * the DAGs, we create them a priori and fill them in with the mapping
     54  * information as we go along.
     55  *
     56  * there should never be more than one thread running this.
     57  *
     58  ****************************************************************************************/
     59 
     60 int rf_RewriteParity(raidPtr)
     61   RF_Raid_t  *raidPtr;
     62 {
     63   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
     64   RF_AccessStripeMapHeader_t *asm_h;
     65   int old_pctg, new_pctg, rc;
     66   RF_PhysDiskAddr_t pda;
     67   RF_SectorNum_t i;
     68 
     69   pda.startSector = 0;
     70   pda.numSector   = raidPtr->Layout.sectorsPerStripeUnit;
     71   old_pctg = -1;
     72 
     73 /* rf_verifyParityDebug=1; */
     74   for (i=0; i<raidPtr->totalSectors; i+=layoutPtr->dataSectorsPerStripe) {
     75     asm_h = rf_MapAccess(raidPtr, i, layoutPtr->dataSectorsPerStripe, NULL, RF_DONT_REMAP);
     76     rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0);
     77     /*     printf("Parity verified: rc=%d\n",rc); */
     78     switch (rc) {
     79       case RF_PARITY_OKAY:
     80       case RF_PARITY_CORRECTED:
     81         break;
     82       case RF_PARITY_BAD:
     83         printf("Parity bad during correction\n");
     84         RF_PANIC();
     85         break;
     86       case RF_PARITY_COULD_NOT_CORRECT:
     87         printf("Could not correct bad parity\n");
     88         RF_PANIC();
     89         break;
     90       case RF_PARITY_COULD_NOT_VERIFY:
     91         printf("Could not verify parity\n");
     92         RF_PANIC();
     93         break;
     94       default:
     95         printf("Bad rc=%d from VerifyParity in RewriteParity\n", rc);
     96         RF_PANIC();
     97     }
     98     rf_FreeAccessStripeMap(asm_h);
     99     new_pctg = i*1000/raidPtr->totalSectors;
    100     if (new_pctg != old_pctg) {
    101     }
    102     old_pctg = new_pctg;
    103   }
    104 #if 1
    105   return(0); /* XXX nothing was here.. GO */
    106 #endif
    107 }
    108 
    109 /*****************************************************************************************
    110  *
    111  * verify that the parity in a particular stripe is correct.
    112  * we validate only the range of parity defined by parityPDA, since
    113  * this is all we have locked.  The way we do this is to create an asm
    114  * that maps the whole stripe and then range-restrict it to the parity
    115  * region defined by the parityPDA.
    116  *
    117  ****************************************************************************************/
    118 int rf_VerifyParity(raidPtr, aasm, correct_it, flags)
    119   RF_Raid_t             *raidPtr;
    120   RF_AccessStripeMap_t  *aasm;
    121   int                    correct_it;
    122   RF_RaidAccessFlags_t   flags;
    123 {
    124   RF_PhysDiskAddr_t *parityPDA;
    125   RF_AccessStripeMap_t *doasm;
    126   RF_LayoutSW_t *lp;
    127   int lrc, rc;
    128 
    129   lp = raidPtr->Layout.map;
    130   if (lp->faultsTolerated == 0) {
    131     /*
    132      * There isn't any parity. Call it "okay."
    133      */
    134     return(RF_PARITY_OKAY);
    135   }
    136   rc = RF_PARITY_OKAY;
    137   if (lp->VerifyParity) {
    138     for(doasm=aasm;doasm;doasm=doasm->next) {
    139       for(parityPDA=doasm->parityInfo;parityPDA;parityPDA=parityPDA->next) {
    140         lrc = lp->VerifyParity(raidPtr, doasm->raidAddress, parityPDA,
    141           correct_it, flags);
    142         if (lrc > rc) {
    143           /* see rf_parityscan.h for why this works */
    144           rc = lrc;
    145         }
    146       }
    147     }
    148   }
    149   else {
    150     rc = RF_PARITY_COULD_NOT_VERIFY;
    151   }
    152   return(rc);
    153 }
    154 
    155 int rf_VerifyParityBasic(raidPtr, raidAddr, parityPDA, correct_it, flags)
    156   RF_Raid_t             *raidPtr;
    157   RF_RaidAddr_t          raidAddr;
    158   RF_PhysDiskAddr_t     *parityPDA;
    159   int                    correct_it;
    160   RF_RaidAccessFlags_t   flags;
    161 {
    162   RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    163   RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
    164   RF_SectorCount_t numsector = parityPDA->numSector;
    165   int numbytes  = rf_RaidAddressToByte(raidPtr, numsector);
    166   int bytesPerStripe = numbytes * layoutPtr->numDataCol;
    167   RF_DagHeader_t *rd_dag_h, *wr_dag_h;          /* read, write dag */
    168   RF_DagNode_t *blockNode, *unblockNode, *wrBlock, *wrUnblock;
    169   RF_AccessStripeMapHeader_t *asm_h;
    170   RF_AccessStripeMap_t *asmap;
    171   RF_AllocListElem_t *alloclist;
    172   RF_PhysDiskAddr_t *pda;
    173   char *pbuf, *buf, *end_p, *p;
    174   int i, retcode;
    175   RF_ReconUnitNum_t which_ru;
    176   RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
    177   int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
    178   RF_AccTraceEntry_t tracerec;
    179   RF_MCPair_t *mcpair;
    180 
    181   retcode = RF_PARITY_OKAY;
    182 
    183   mcpair = rf_AllocMCPair();
    184   rf_MakeAllocList(alloclist);
    185   RF_MallocAndAdd(buf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist);
    186   RF_CallocAndAdd(pbuf, 1, numbytes, (char *), alloclist);     /* use calloc to make sure buffer is zeroed */
    187   end_p = buf + bytesPerStripe;
    188 
    189   rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, buf, rf_DiskReadFunc, rf_DiskReadUndoFunc,
    190 			   "Rod", alloclist, flags, RF_IO_NORMAL_PRIORITY);
    191   blockNode = rd_dag_h->succedents[0];
    192   unblockNode = blockNode->succedents[0]->succedents[0];
    193 
    194   /* map the stripe and fill in the PDAs in the dag */
    195   asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, buf, RF_DONT_REMAP);
    196   asmap = asm_h->stripeMap;
    197 
    198   for (pda=asmap->physInfo,i=0; i<layoutPtr->numDataCol; i++,pda=pda->next) {
    199     RF_ASSERT(pda);
    200     rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
    201     RF_ASSERT(pda->numSector != 0);
    202     if (rf_TryToRedirectPDA(raidPtr, pda, 0)) goto out;   /* no way to verify parity if disk is dead.  return w/ good status */
    203     blockNode->succedents[i]->params[0].p = pda;
    204     blockNode->succedents[i]->params[2].v = psID;
    205     blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    206   }
    207 
    208   RF_ASSERT(!asmap->parityInfo->next);
    209   rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1);
    210   RF_ASSERT(asmap->parityInfo->numSector != 0);
    211   if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1))
    212     goto out;
    213   blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo;
    214 
    215   /* fire off the DAG */
    216   bzero((char *)&tracerec,sizeof(tracerec));
    217   rd_dag_h->tracerec = &tracerec;
    218 
    219   if (rf_verifyParityDebug) {
    220     printf("Parity verify read dag:\n");
    221     rf_PrintDAGList(rd_dag_h);
    222   }
    223 
    224   RF_LOCK_MUTEX(mcpair->mutex);
    225   mcpair->flag = 0;
    226   rf_DispatchDAG(rd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    227 		 (void *) mcpair);
    228   while (!mcpair->flag)
    229 	  RF_WAIT_COND(mcpair->cond, mcpair->mutex);
    230   RF_UNLOCK_MUTEX(mcpair->mutex);
    231   if (rd_dag_h->status != rf_enable) {
    232     RF_ERRORMSG("Unable to verify parity:  can't read the stripe\n");
    233     retcode = RF_PARITY_COULD_NOT_VERIFY;
    234     goto out;
    235   }
    236 
    237   for (p=buf; p<end_p; p+=numbytes) {
    238     rf_bxor(p, pbuf, numbytes, NULL);
    239   }
    240   for (i=0; i<numbytes; i++) {
    241 #if 0
    242 	  if (pbuf[i]!=0 || buf[bytesPerStripe+i]!=0) {
    243 	  printf("Bytes: %d %d %d\n",i,pbuf[i],buf[bytesPerStripe+i]);
    244 	  }
    245 #endif
    246 	  if (pbuf[i] != buf[bytesPerStripe+i]) {
    247 		  if (!correct_it)
    248 			  RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n",
    249 			       i,(u_char) buf[bytesPerStripe+i],(u_char) pbuf[i]);
    250 		  retcode = RF_PARITY_BAD;
    251 		  break;
    252 	  }
    253   }
    254 
    255   if (retcode && correct_it) {
    256     wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
    257 			     "Wnp", alloclist, flags, RF_IO_NORMAL_PRIORITY);
    258     wrBlock = wr_dag_h->succedents[0]; wrUnblock = wrBlock->succedents[0]->succedents[0];
    259     wrBlock->succedents[0]->params[0].p = asmap->parityInfo;
    260     wrBlock->succedents[0]->params[2].v = psID;
    261     wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    262     bzero((char *)&tracerec,sizeof(tracerec));
    263     wr_dag_h->tracerec = &tracerec;
    264     if (rf_verifyParityDebug) {
    265       printf("Parity verify write dag:\n");
    266       rf_PrintDAGList(wr_dag_h);
    267     }
    268     RF_LOCK_MUTEX(mcpair->mutex);
    269     mcpair->flag = 0;
    270     rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    271 		   (void *) mcpair);
    272     while (!mcpair->flag)
    273       RF_WAIT_COND(mcpair->cond, mcpair->mutex);
    274     RF_UNLOCK_MUTEX(mcpair->mutex);
    275     if (wr_dag_h->status != rf_enable) {
    276       RF_ERRORMSG("Unable to correct parity in VerifyParity:  can't write the stripe\n");
    277       retcode = RF_PARITY_COULD_NOT_CORRECT;
    278     }
    279     rf_FreeDAG(wr_dag_h);
    280     if (retcode == RF_PARITY_BAD)
    281       retcode = RF_PARITY_CORRECTED;
    282   }
    283 
    284 out:
    285   rf_FreeAccessStripeMap(asm_h);
    286   rf_FreeAllocList(alloclist);
    287   rf_FreeDAG(rd_dag_h);
    288   rf_FreeMCPair(mcpair);
    289   return(retcode);
    290 }
    291 
    292 int rf_TryToRedirectPDA(raidPtr, pda, parity)
    293   RF_Raid_t          *raidPtr;
    294   RF_PhysDiskAddr_t  *pda;
    295   int                 parity;
    296 {
    297   if (raidPtr->Disks[pda->row][pda->col].status == rf_ds_reconstructing) {
    298     if (rf_CheckRUReconstructed(raidPtr->reconControl[pda->row]->reconMap, pda->startSector)) {
    299       if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
    300 	RF_RowCol_t or = pda->row, oc = pda->col;
    301 	RF_SectorNum_t os = pda->startSector;
    302 	if (parity) {
    303 	  (raidPtr->Layout.map->MapParity)(raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP);
    304 	  if (rf_verifyParityDebug) printf("VerifyParity: Redir P r %d c %d sect %ld -> r %d c %d sect %ld\n",
    305 					or,oc,(long)os,pda->row,pda->col,(long)pda->startSector);
    306 	} else {
    307 	  (raidPtr->Layout.map->MapSector)(raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP);
    308 	  if (rf_verifyParityDebug) printf("VerifyParity: Redir D r %d c %d sect %ld -> r %d c %d sect %ld\n",
    309 					or,oc,(long)os,pda->row,pda->col,(long)pda->startSector);
    310 	}
    311       } else {
    312 	RF_RowCol_t spRow = raidPtr->Disks[pda->row][pda->col].spareRow;
    313 	RF_RowCol_t spCol = raidPtr->Disks[pda->row][pda->col].spareCol;
    314 	pda->row = spRow;
    315 	pda->col = spCol;
    316       }
    317     }
    318   }
    319   if (RF_DEAD_DISK(raidPtr->Disks[pda->row][pda->col].status)) return(1);
    320   return(0);
    321 }
    322 
    323 /*****************************************************************************************
    324  *
    325  * currently a stub.
    326  *
    327  * takes as input an ASM describing a write operation and containing one failure, and
    328  * verifies that the parity was correctly updated to reflect the write.
    329  *
    330  * if it's a data unit that's failed, we read the other data units in the stripe and
    331  * the parity unit, XOR them together, and verify that we get the data intended for
    332  * the failed disk.  Since it's easy, we also validate that the right data got written
    333  * to the surviving data disks.
    334  *
    335  * If it's the parity that failed, there's really no validation we can do except the
    336  * above verification that the right data got written to all disks.  This is because
    337  * the new data intended for the failed disk is supplied in the ASM, but this is of
    338  * course not the case for the new parity.
    339  *
    340  ****************************************************************************************/
    341 int rf_VerifyDegrModeWrite(raidPtr, asmh)
    342   RF_Raid_t                   *raidPtr;
    343   RF_AccessStripeMapHeader_t  *asmh;
    344 {
    345   return(0);
    346 }
    347 
    348 /* creates a simple DAG with a header, a block-recon node at level 1,
    349  * nNodes nodes at level 2, an unblock-recon node at level 3, and
    350  * a terminator node at level 4.  The stripe address field in
    351  * the block and unblock nodes are not touched, nor are the pda
    352  * fields in the second-level nodes, so they must be filled in later.
    353  *
    354  * commit point is established at unblock node - this means that any
    355  * failure during dag execution causes the dag to fail
    356  */
    357 RF_DagHeader_t *rf_MakeSimpleDAG(raidPtr, nNodes, bytesPerSU, databuf, doFunc, undoFunc, name, alloclist, flags, priority)
    358   RF_Raid_t              *raidPtr;
    359   int                     nNodes;
    360   int                     bytesPerSU;
    361   char                   *databuf;
    362   int                   (*doFunc)(RF_DagNode_t *node);
    363   int                   (*undoFunc)(RF_DagNode_t *node);
    364   char                   *name;        /* node names at the second level */
    365   RF_AllocListElem_t     *alloclist;
    366   RF_RaidAccessFlags_t    flags;
    367   int                     priority;
    368 {
    369   RF_DagHeader_t *dag_h;
    370   RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode;
    371   int i;
    372 
    373   /* create the nodes, the block & unblock nodes, and the terminator node */
    374   RF_CallocAndAdd(nodes, nNodes+3, sizeof(RF_DagNode_t), (RF_DagNode_t *), alloclist);
    375   blockNode   = &nodes[nNodes];
    376   unblockNode = blockNode+1;
    377   termNode   = unblockNode+1;
    378 
    379   dag_h = rf_AllocDAGHeader();
    380   dag_h->raidPtr = (void *) raidPtr;
    381   dag_h->allocList = NULL;                               /* we won't use this alloc list */
    382   dag_h->status = rf_enable;
    383   dag_h->numSuccedents = 1;
    384   dag_h->creator = "SimpleDAG";
    385 
    386   /* this dag can not commit until the unblock node is reached
    387    * errors prior to the commit point imply the dag has failed
    388    */
    389   dag_h->numCommitNodes = 1;
    390   dag_h->numCommits = 0;
    391 
    392   dag_h->succedents[0] = blockNode;
    393   rf_InitNode(blockNode,   rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", alloclist);
    394   rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", alloclist);
    395   unblockNode->succedents[0] = termNode;
    396   for (i=0; i<nNodes; i++) {
    397     blockNode->succedents[i] = unblockNode->antecedents[i] = &nodes[i];
    398     unblockNode->antType[i] = rf_control;
    399     rf_InitNode(&nodes[i], rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist);
    400     nodes[i].succedents[0] =  unblockNode;
    401     nodes[i].antecedents[0] = blockNode;
    402     nodes[i].antType[0] = rf_control;
    403     nodes[i].params[1].p = (databuf + (i*bytesPerSU));
    404   }
    405   rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", alloclist);
    406   termNode->antecedents[0] = unblockNode;
    407   termNode->antType[0] = rf_control;
    408   return(dag_h);
    409 }
    410