Home | History | Annotate | Line # | Download | only in raidframe
rf_raid1.c revision 1.2
      1  1.2  oster /*	$NetBSD: rf_raid1.c,v 1.2 1999/01/26 02:34:00 oster Exp $	*/
      2  1.1  oster /*
      3  1.1  oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4  1.1  oster  * All rights reserved.
      5  1.1  oster  *
      6  1.1  oster  * Author: William V. Courtright II
      7  1.1  oster  *
      8  1.1  oster  * Permission to use, copy, modify and distribute this software and
      9  1.1  oster  * its documentation is hereby granted, provided that both the copyright
     10  1.1  oster  * notice and this permission notice appear in all copies of the
     11  1.1  oster  * software, derivative works or modified versions, and any portions
     12  1.1  oster  * thereof, and that both notices appear in supporting documentation.
     13  1.1  oster  *
     14  1.1  oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  1.1  oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  1.1  oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  1.1  oster  *
     18  1.1  oster  * Carnegie Mellon requests users of this software to return to
     19  1.1  oster  *
     20  1.1  oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  1.1  oster  *  School of Computer Science
     22  1.1  oster  *  Carnegie Mellon University
     23  1.1  oster  *  Pittsburgh PA 15213-3890
     24  1.1  oster  *
     25  1.1  oster  * any improvements or extensions that they make and grant Carnegie the
     26  1.1  oster  * rights to redistribute these changes.
     27  1.1  oster  */
     28  1.1  oster 
     29  1.1  oster /*****************************************************************************
     30  1.1  oster  *
     31  1.1  oster  * rf_raid1.c -- implements RAID Level 1
     32  1.1  oster  *
     33  1.1  oster  *****************************************************************************/
     34  1.1  oster 
     35  1.1  oster #include "rf_raid.h"
     36  1.1  oster #include "rf_raid1.h"
     37  1.1  oster #include "rf_dag.h"
     38  1.1  oster #include "rf_dagffrd.h"
     39  1.1  oster #include "rf_dagffwr.h"
     40  1.1  oster #include "rf_dagdegrd.h"
     41  1.1  oster #include "rf_dagutils.h"
     42  1.1  oster #include "rf_dagfuncs.h"
     43  1.1  oster #include "rf_threadid.h"
     44  1.1  oster #include "rf_diskqueue.h"
     45  1.1  oster #include "rf_general.h"
     46  1.1  oster #include "rf_utils.h"
     47  1.1  oster #include "rf_parityscan.h"
     48  1.1  oster #include "rf_mcpair.h"
     49  1.1  oster #include "rf_layout.h"
     50  1.1  oster #include "rf_map.h"
     51  1.1  oster #include "rf_engine.h"
     52  1.1  oster #include "rf_reconbuffer.h"
     53  1.1  oster #include "rf_sys.h"
     54  1.1  oster 
     55  1.1  oster typedef struct RF_Raid1ConfigInfo_s {
     56  1.1  oster   RF_RowCol_t  **stripeIdentifier;
     57  1.1  oster } RF_Raid1ConfigInfo_t;
     58  1.1  oster 
     59  1.1  oster /* start of day code specific to RAID level 1 */
     60  1.1  oster int rf_ConfigureRAID1(
     61  1.1  oster   RF_ShutdownList_t  **listp,
     62  1.1  oster   RF_Raid_t           *raidPtr,
     63  1.1  oster   RF_Config_t         *cfgPtr)
     64  1.1  oster {
     65  1.1  oster   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
     66  1.1  oster   RF_Raid1ConfigInfo_t *info;
     67  1.1  oster   RF_RowCol_t i;
     68  1.1  oster 
     69  1.1  oster   /* create a RAID level 1 configuration structure */
     70  1.1  oster   RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t), (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList);
     71  1.1  oster   if (info == NULL)
     72  1.1  oster     return(ENOMEM);
     73  1.1  oster   layoutPtr->layoutSpecificInfo = (void *) info;
     74  1.1  oster 
     75  1.1  oster   /* ... and fill it in. */
     76  1.1  oster   info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList);
     77  1.1  oster   if (info->stripeIdentifier == NULL)
     78  1.1  oster     return(ENOMEM);
     79  1.1  oster   for (i = 0; i < (raidPtr->numCol / 2); i ++) {
     80  1.1  oster     info->stripeIdentifier[i][0] = (2 * i);
     81  1.1  oster     info->stripeIdentifier[i][1] = (2 * i) + 1;
     82  1.1  oster   }
     83  1.1  oster 
     84  1.1  oster   RF_ASSERT(raidPtr->numRow == 1);
     85  1.1  oster 
     86  1.1  oster   /* this implementation of RAID level 1 uses one row of numCol disks and allows multiple (numCol / 2)
     87  1.1  oster    * stripes per row.  A stripe consists of a single data unit and a single parity (mirror) unit.
     88  1.1  oster    * stripe id = raidAddr / stripeUnitSize
     89  1.1  oster    */
     90  1.1  oster   raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit;
     91  1.1  oster   layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2);
     92  1.1  oster   layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit;
     93  1.1  oster   layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
     94  1.1  oster   layoutPtr->numDataCol = 1;
     95  1.1  oster   layoutPtr->numParityCol = 1;
     96  1.1  oster   return(0);
     97  1.1  oster }
     98  1.1  oster 
     99  1.1  oster 
    100  1.1  oster /* returns the physical disk location of the primary copy in the mirror pair */
    101  1.1  oster void rf_MapSectorRAID1(
    102  1.1  oster   RF_Raid_t         *raidPtr,
    103  1.1  oster   RF_RaidAddr_t      raidSector,
    104  1.1  oster   RF_RowCol_t       *row,
    105  1.1  oster   RF_RowCol_t       *col,
    106  1.1  oster   RF_SectorNum_t    *diskSector,
    107  1.1  oster   int                remap)
    108  1.1  oster {
    109  1.1  oster   RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    110  1.1  oster   RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
    111  1.1  oster 
    112  1.1  oster   *row = 0;
    113  1.1  oster   *col = 2 * mirrorPair;
    114  1.1  oster   *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    115  1.1  oster }
    116  1.1  oster 
    117  1.1  oster 
    118  1.1  oster /* Map Parity
    119  1.1  oster  *
    120  1.1  oster  * returns the physical disk location of the secondary copy in the mirror
    121  1.1  oster  * pair
    122  1.1  oster  */
    123  1.1  oster void rf_MapParityRAID1(
    124  1.1  oster   RF_Raid_t       *raidPtr,
    125  1.1  oster   RF_RaidAddr_t    raidSector,
    126  1.1  oster   RF_RowCol_t     *row,
    127  1.1  oster   RF_RowCol_t     *col,
    128  1.1  oster   RF_SectorNum_t  *diskSector,
    129  1.1  oster   int              remap)
    130  1.1  oster {
    131  1.1  oster   RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    132  1.1  oster   RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
    133  1.1  oster 
    134  1.1  oster   *row = 0;
    135  1.1  oster   *col = (2 * mirrorPair) + 1;
    136  1.1  oster 
    137  1.1  oster   *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    138  1.1  oster }
    139  1.1  oster 
    140  1.1  oster 
    141  1.1  oster /* IdentifyStripeRAID1
    142  1.1  oster  *
    143  1.1  oster  * returns a list of disks for a given redundancy group
    144  1.1  oster  */
    145  1.1  oster void rf_IdentifyStripeRAID1(
    146  1.1  oster   RF_Raid_t        *raidPtr,
    147  1.1  oster   RF_RaidAddr_t     addr,
    148  1.1  oster   RF_RowCol_t     **diskids,
    149  1.1  oster   RF_RowCol_t      *outRow)
    150  1.1  oster {
    151  1.1  oster   RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
    152  1.1  oster   RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo;
    153  1.1  oster   RF_ASSERT(stripeID >= 0);
    154  1.1  oster   RF_ASSERT(addr >= 0);
    155  1.1  oster   *outRow = 0;
    156  1.1  oster   *diskids = info->stripeIdentifier[ stripeID % (raidPtr->numCol/2)];
    157  1.1  oster   RF_ASSERT(*diskids);
    158  1.1  oster }
    159  1.1  oster 
    160  1.1  oster 
    161  1.1  oster /* MapSIDToPSIDRAID1
    162  1.1  oster  *
    163  1.1  oster  * maps a logical stripe to a stripe in the redundant array
    164  1.1  oster  */
    165  1.1  oster void rf_MapSIDToPSIDRAID1(
    166  1.1  oster   RF_RaidLayout_t    *layoutPtr,
    167  1.1  oster   RF_StripeNum_t      stripeID,
    168  1.1  oster   RF_StripeNum_t     *psID,
    169  1.1  oster   RF_ReconUnitNum_t  *which_ru)
    170  1.1  oster {
    171  1.1  oster   *which_ru = 0;
    172  1.1  oster   *psID = stripeID;
    173  1.1  oster }
    174  1.1  oster 
    175  1.1  oster 
    176  1.1  oster 
    177  1.1  oster /******************************************************************************
    178  1.1  oster  * select a graph to perform a single-stripe access
    179  1.1  oster  *
    180  1.1  oster  * Parameters:  raidPtr    - description of the physical array
    181  1.1  oster  *              type       - type of operation (read or write) requested
    182  1.1  oster  *              asmap      - logical & physical addresses for this access
    183  1.1  oster  *              createFunc - name of function to use to create the graph
    184  1.1  oster  *****************************************************************************/
    185  1.1  oster 
    186  1.1  oster void rf_RAID1DagSelect(
    187  1.1  oster   RF_Raid_t             *raidPtr,
    188  1.1  oster   RF_IoType_t            type,
    189  1.1  oster   RF_AccessStripeMap_t  *asmap,
    190  1.1  oster   RF_VoidFuncPtr        *createFunc)
    191  1.1  oster {
    192  1.1  oster   RF_RowCol_t frow, fcol, or, oc;
    193  1.1  oster   RF_PhysDiskAddr_t *failedPDA;
    194  1.1  oster   int prior_recon, tid;
    195  1.1  oster   RF_RowStatus_t rstat;
    196  1.1  oster   RF_SectorNum_t oo;
    197  1.1  oster 
    198  1.1  oster 
    199  1.1  oster   RF_ASSERT(RF_IO_IS_R_OR_W(type));
    200  1.1  oster 
    201  1.1  oster   if (asmap->numDataFailed + asmap->numParityFailed > 1) {
    202  1.1  oster     RF_ERRORMSG("Multiple disks failed in a single group!  Aborting I/O operation.\n");
    203  1.1  oster     *createFunc = NULL;
    204  1.1  oster     return;
    205  1.1  oster   }
    206  1.1  oster 
    207  1.1  oster   if (asmap->numDataFailed + asmap->numParityFailed) {
    208  1.1  oster     /*
    209  1.1  oster      * We've got a fault. Re-map to spare space, iff applicable.
    210  1.1  oster      * Shouldn't the arch-independent code do this for us?
    211  1.1  oster      * Anyway, it turns out if we don't do this here, then when
    212  1.1  oster      * we're reconstructing, writes go only to the surviving
    213  1.1  oster      * original disk, and aren't reflected on the reconstructed
    214  1.1  oster      * spare. Oops. --jimz
    215  1.1  oster      */
    216  1.1  oster     failedPDA = asmap->failedPDAs[0];
    217  1.1  oster     frow = failedPDA->row;
    218  1.1  oster     fcol = failedPDA->col;
    219  1.1  oster     rstat = raidPtr->status[frow];
    220  1.1  oster     prior_recon = (rstat == rf_rs_reconfigured) || (
    221  1.1  oster       (rstat == rf_rs_reconstructing) ?
    222  1.1  oster       rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0
    223  1.1  oster       );
    224  1.1  oster     if (prior_recon) {
    225  1.1  oster       or = frow;
    226  1.1  oster       oc = fcol;
    227  1.1  oster       oo = failedPDA->startSector;
    228  1.1  oster       /*
    229  1.1  oster        * If we did distributed sparing, we'd monkey with that here.
    230  1.1  oster        * But we don't, so we'll
    231  1.1  oster        */
    232  1.1  oster       failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
    233  1.1  oster       failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
    234  1.1  oster       /*
    235  1.1  oster        * Redirect other components, iff necessary. This looks
    236  1.1  oster        * pretty suspicious to me, but it's what the raid5
    237  1.1  oster        * DAG select does.
    238  1.1  oster        */
    239  1.1  oster       if (asmap->parityInfo->next) {
    240  1.1  oster         if (failedPDA == asmap->parityInfo) {
    241  1.1  oster           failedPDA->next->row = failedPDA->row;
    242  1.1  oster           failedPDA->next->col = failedPDA->col;
    243  1.1  oster         }
    244  1.1  oster         else {
    245  1.1  oster           if (failedPDA == asmap->parityInfo->next) {
    246  1.1  oster             asmap->parityInfo->row = failedPDA->row;
    247  1.1  oster             asmap->parityInfo->col = failedPDA->col;
    248  1.1  oster           }
    249  1.1  oster         }
    250  1.1  oster       }
    251  1.1  oster       if (rf_dagDebug || rf_mapDebug) {
    252  1.1  oster         rf_get_threadid(tid);
    253  1.1  oster         printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n",
    254  1.1  oster           tid, type, or, oc, (long)oo, failedPDA->row, failedPDA->col,
    255  1.1  oster           (long)failedPDA->startSector);
    256  1.1  oster       }
    257  1.1  oster       asmap->numDataFailed = asmap->numParityFailed = 0;
    258  1.1  oster     }
    259  1.1  oster   }
    260  1.1  oster   if (type == RF_IO_TYPE_READ) {
    261  1.1  oster     if (asmap->numDataFailed == 0)
    262  1.1  oster       *createFunc = (RF_VoidFuncPtr)rf_CreateMirrorIdleReadDAG;
    263  1.1  oster     else
    264  1.1  oster       *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneDegradedReadDAG;
    265  1.1  oster   }
    266  1.1  oster   else {
    267  1.1  oster     *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG;
    268  1.1  oster   }
    269  1.1  oster }
    270  1.1  oster 
    271  1.1  oster int rf_VerifyParityRAID1(
    272  1.1  oster   RF_Raid_t             *raidPtr,
    273  1.1  oster   RF_RaidAddr_t          raidAddr,
    274  1.1  oster   RF_PhysDiskAddr_t     *parityPDA,
    275  1.1  oster   int                    correct_it,
    276  1.1  oster   RF_RaidAccessFlags_t   flags)
    277  1.1  oster {
    278  1.1  oster   int nbytes, bcount, stripeWidth, ret, i, j, tid=0, nbad, *bbufs;
    279  1.1  oster   RF_DagNode_t *blockNode, *unblockNode, *wrBlock;
    280  1.1  oster   RF_DagHeader_t *rd_dag_h, *wr_dag_h;
    281  1.1  oster   RF_AccessStripeMapHeader_t *asm_h;
    282  1.1  oster   RF_AllocListElem_t *allocList;
    283  1.1  oster   RF_AccTraceEntry_t tracerec;
    284  1.1  oster   RF_ReconUnitNum_t which_ru;
    285  1.1  oster   RF_RaidLayout_t *layoutPtr;
    286  1.1  oster   RF_AccessStripeMap_t *aasm;
    287  1.1  oster   RF_SectorCount_t nsector;
    288  1.1  oster   RF_RaidAddr_t startAddr;
    289  1.1  oster   char *buf, *buf1, *buf2;
    290  1.1  oster   RF_PhysDiskAddr_t *pda;
    291  1.1  oster   RF_StripeNum_t psID;
    292  1.1  oster   RF_MCPair_t *mcpair;
    293  1.1  oster 
    294  1.1  oster   if (rf_verifyParityDebug) {
    295  1.1  oster     rf_get_threadid(tid);
    296  1.1  oster   }
    297  1.1  oster 
    298  1.1  oster   layoutPtr = &raidPtr->Layout;
    299  1.1  oster   startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
    300  1.1  oster   nsector = parityPDA->numSector;
    301  1.1  oster   nbytes = rf_RaidAddressToByte(raidPtr, nsector);
    302  1.1  oster   psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
    303  1.1  oster 
    304  1.1  oster   asm_h = NULL;
    305  1.1  oster   rd_dag_h = wr_dag_h = NULL;
    306  1.1  oster   mcpair = NULL;
    307  1.1  oster 
    308  1.1  oster   ret = RF_PARITY_COULD_NOT_VERIFY;
    309  1.1  oster 
    310  1.1  oster   rf_MakeAllocList(allocList);
    311  1.1  oster   if (allocList == NULL)
    312  1.1  oster     return(RF_PARITY_COULD_NOT_VERIFY);
    313  1.1  oster   mcpair = rf_AllocMCPair();
    314  1.1  oster   if (mcpair == NULL)
    315  1.1  oster     goto done;
    316  1.1  oster   RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol);
    317  1.1  oster   stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
    318  1.1  oster   bcount = nbytes*(layoutPtr->numDataCol + layoutPtr->numParityCol);
    319  1.1  oster   RF_MallocAndAdd(buf, bcount, (char *), allocList);
    320  1.1  oster   if (buf == NULL)
    321  1.1  oster     goto done;
    322  1.1  oster   if (rf_verifyParityDebug) {
    323  1.1  oster     printf("[%d] RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n",
    324  1.1  oster       tid, (long)buf, bcount, (long)buf, (long)buf+bcount);
    325  1.1  oster   }
    326  1.1  oster 
    327  1.1  oster   /*
    328  1.1  oster    * Generate a DAG which will read the entire stripe- then we can
    329  1.1  oster    * just compare data chunks versus "parity" chunks.
    330  1.1  oster    */
    331  1.1  oster 
    332  1.1  oster   rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf,
    333  1.1  oster     rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags,
    334  1.1  oster     RF_IO_NORMAL_PRIORITY);
    335  1.1  oster   if (rd_dag_h == NULL)
    336  1.1  oster     goto done;
    337  1.1  oster   blockNode = rd_dag_h->succedents[0];
    338  1.1  oster   unblockNode = blockNode->succedents[0]->succedents[0];
    339  1.1  oster 
    340  1.1  oster   /*
    341  1.1  oster    * Map the access to physical disk addresses (PDAs)- this will
    342  1.1  oster    * get us both a list of data addresses, and "parity" addresses
    343  1.1  oster    * (which are really mirror copies).
    344  1.1  oster    */
    345  1.1  oster   asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe,
    346  1.1  oster     buf, RF_DONT_REMAP);
    347  1.1  oster   aasm = asm_h->stripeMap;
    348  1.1  oster 
    349  1.1  oster   buf1 = buf;
    350  1.1  oster   /*
    351  1.1  oster    * Loop through the data blocks, setting up read nodes for each.
    352  1.1  oster    */
    353  1.1  oster   for(pda=aasm->physInfo,i=0;i<layoutPtr->numDataCol;i++,pda=pda->next)
    354  1.1  oster   {
    355  1.1  oster     RF_ASSERT(pda);
    356  1.1  oster 
    357  1.1  oster     rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
    358  1.1  oster 
    359  1.1  oster     RF_ASSERT(pda->numSector != 0);
    360  1.1  oster     if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
    361  1.1  oster       /* cannot verify parity with dead disk */
    362  1.1  oster       goto done;
    363  1.1  oster     }
    364  1.1  oster     pda->bufPtr = buf1;
    365  1.1  oster     blockNode->succedents[i]->params[0].p = pda;
    366  1.1  oster     blockNode->succedents[i]->params[1].p = buf1;
    367  1.1  oster     blockNode->succedents[i]->params[2].v = psID;
    368  1.1  oster     blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    369  1.1  oster     buf1 += nbytes;
    370  1.1  oster   }
    371  1.1  oster   RF_ASSERT(pda == NULL);
    372  1.1  oster   /*
    373  1.1  oster    * keep i, buf1 running
    374  1.1  oster    *
    375  1.1  oster    * Loop through parity blocks, setting up read nodes for each.
    376  1.1  oster    */
    377  1.1  oster   for(pda=aasm->parityInfo;i<layoutPtr->numDataCol+layoutPtr->numParityCol;i++,pda=pda->next)
    378  1.1  oster   {
    379  1.1  oster     RF_ASSERT(pda);
    380  1.1  oster     rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
    381  1.1  oster     RF_ASSERT(pda->numSector != 0);
    382  1.1  oster     if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
    383  1.1  oster       /* cannot verify parity with dead disk */
    384  1.1  oster       goto done;
    385  1.1  oster     }
    386  1.1  oster     pda->bufPtr = buf1;
    387  1.1  oster     blockNode->succedents[i]->params[0].p = pda;
    388  1.1  oster     blockNode->succedents[i]->params[1].p = buf1;
    389  1.1  oster     blockNode->succedents[i]->params[2].v = psID;
    390  1.1  oster     blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    391  1.1  oster     buf1 += nbytes;
    392  1.1  oster   }
    393  1.1  oster   RF_ASSERT(pda == NULL);
    394  1.1  oster 
    395  1.1  oster   bzero((char *)&tracerec, sizeof(tracerec));
    396  1.1  oster   rd_dag_h->tracerec = &tracerec;
    397  1.1  oster 
    398  1.1  oster   if (rf_verifyParityDebug > 1) {
    399  1.1  oster     printf("[%d] RAID1 parity verify read dag:\n", tid);
    400  1.1  oster     rf_PrintDAGList(rd_dag_h);
    401  1.1  oster   }
    402  1.1  oster 
    403  1.1  oster   RF_LOCK_MUTEX(mcpair->mutex);
    404  1.1  oster   mcpair->flag = 0;
    405  1.1  oster   rf_DispatchDAG(rd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    406  1.1  oster 		 (void *)mcpair);
    407  1.1  oster   while (mcpair->flag == 0) {
    408  1.1  oster     RF_WAIT_MCPAIR(mcpair);
    409  1.1  oster   }
    410  1.1  oster   RF_UNLOCK_MUTEX(mcpair->mutex);
    411  1.1  oster 
    412  1.1  oster   if (rd_dag_h->status != rf_enable) {
    413  1.1  oster     RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n");
    414  1.1  oster     ret = RF_PARITY_COULD_NOT_VERIFY;
    415  1.1  oster     goto done;
    416  1.1  oster   }
    417  1.1  oster 
    418  1.1  oster   /*
    419  1.1  oster    * buf1 is the beginning of the data blocks chunk
    420  1.1  oster    * buf2 is the beginning of the parity blocks chunk
    421  1.1  oster    */
    422  1.1  oster   buf1 = buf;
    423  1.1  oster   buf2 = buf + (nbytes * layoutPtr->numDataCol);
    424  1.1  oster   ret = RF_PARITY_OKAY;
    425  1.1  oster   /*
    426  1.1  oster    * bbufs is "bad bufs"- an array whose entries are the data
    427  1.1  oster    * column numbers where we had miscompares. (That is, column 0
    428  1.1  oster    * and column 1 of the array are mirror copies, and are considered
    429  1.1  oster    * "data column 0" for this purpose).
    430  1.1  oster    */
    431  1.1  oster   RF_MallocAndAdd(bbufs, layoutPtr->numParityCol*sizeof(int), (int *),
    432  1.1  oster     allocList);
    433  1.1  oster   nbad = 0;
    434  1.1  oster   /*
    435  1.1  oster    * Check data vs "parity" (mirror copy).
    436  1.1  oster    */
    437  1.1  oster   for(i=0;i<layoutPtr->numDataCol;i++) {
    438  1.1  oster     if (rf_verifyParityDebug) {
    439  1.1  oster       printf("[%d] RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n",
    440  1.1  oster         tid, nbytes, i, (long)buf1, (long)buf2, (long)buf);
    441  1.1  oster     }
    442  1.1  oster     ret = bcmp(buf1, buf2, nbytes);
    443  1.1  oster     if (ret) {
    444  1.1  oster       if (rf_verifyParityDebug > 1) {
    445  1.1  oster         for(j=0;j<nbytes;j++) {
    446  1.1  oster          if (buf1[j] != buf2[j])
    447  1.1  oster            break;
    448  1.1  oster         }
    449  1.1  oster         printf("psid=%ld j=%d\n", (long)psID, j);
    450  1.1  oster         printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0]&0xff,
    451  1.1  oster           buf1[1]&0xff, buf1[2]&0xff, buf1[3]&0xff, buf1[4]&0xff);
    452  1.1  oster         printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0]&0xff,
    453  1.1  oster           buf2[1]&0xff, buf2[2]&0xff, buf2[3]&0xff, buf2[4]&0xff);
    454  1.1  oster       }
    455  1.1  oster       if (rf_verifyParityDebug) {
    456  1.1  oster         printf("[%d] RAID1: found bad parity, i=%d\n", tid, i);
    457  1.1  oster       }
    458  1.1  oster       /*
    459  1.1  oster        * Parity is bad. Keep track of which columns were bad.
    460  1.1  oster        */
    461  1.1  oster       if (bbufs)
    462  1.1  oster         bbufs[nbad] = i;
    463  1.1  oster       nbad++;
    464  1.1  oster       ret = RF_PARITY_BAD;
    465  1.1  oster     }
    466  1.1  oster     buf1 += nbytes;
    467  1.1  oster     buf2 += nbytes;
    468  1.1  oster   }
    469  1.1  oster 
    470  1.1  oster   if ((ret != RF_PARITY_OKAY) && correct_it) {
    471  1.1  oster     ret = RF_PARITY_COULD_NOT_CORRECT;
    472  1.1  oster     if (rf_verifyParityDebug) {
    473  1.1  oster       printf("[%d] RAID1 parity verify: parity not correct\n", tid);
    474  1.1  oster     }
    475  1.1  oster     if (bbufs == NULL)
    476  1.1  oster       goto done;
    477  1.1  oster     /*
    478  1.1  oster      * Make a DAG with one write node for each bad unit. We'll simply
    479  1.1  oster      * write the contents of the data unit onto the parity unit for
    480  1.1  oster      * correction. (It's possible that the mirror copy was the correct
    481  1.1  oster      * copy, and that we're spooging good data by writing bad over it,
    482  1.1  oster      * but there's no way we can know that.
    483  1.1  oster      */
    484  1.1  oster     wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf,
    485  1.1  oster       rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags,
    486  1.1  oster       RF_IO_NORMAL_PRIORITY);
    487  1.1  oster     if (wr_dag_h == NULL)
    488  1.1  oster       goto done;
    489  1.1  oster     wrBlock = wr_dag_h->succedents[0];
    490  1.1  oster     /*
    491  1.1  oster      * Fill in a write node for each bad compare.
    492  1.1  oster      */
    493  1.1  oster     for(i=0;i<nbad;i++) {
    494  1.1  oster       j = i+layoutPtr->numDataCol;
    495  1.1  oster       pda = blockNode->succedents[j]->params[0].p;
    496  1.1  oster       pda->bufPtr = blockNode->succedents[i]->params[1].p;
    497  1.1  oster       wrBlock->succedents[i]->params[0].p = pda;
    498  1.1  oster       wrBlock->succedents[i]->params[1].p = pda->bufPtr;
    499  1.1  oster       wrBlock->succedents[i]->params[2].v = psID;
    500  1.1  oster       wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    501  1.1  oster     }
    502  1.1  oster     bzero((char *)&tracerec, sizeof(tracerec));
    503  1.1  oster     wr_dag_h->tracerec = &tracerec;
    504  1.1  oster     if (rf_verifyParityDebug > 1) {
    505  1.1  oster       printf("Parity verify write dag:\n");
    506  1.1  oster       rf_PrintDAGList(wr_dag_h);
    507  1.1  oster     }
    508  1.1  oster     RF_LOCK_MUTEX(mcpair->mutex);
    509  1.1  oster     mcpair->flag = 0;
    510  1.1  oster     /* fire off the write DAG */
    511  1.1  oster     rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    512  1.1  oster 		   (void *)mcpair);
    513  1.1  oster     while (!mcpair->flag) {
    514  1.1  oster       RF_WAIT_COND(mcpair->cond, mcpair->mutex);
    515  1.1  oster     }
    516  1.1  oster     RF_UNLOCK_MUTEX(mcpair->mutex);
    517  1.1  oster     if (wr_dag_h->status != rf_enable) {
    518  1.1  oster       RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n");
    519  1.1  oster       goto done;
    520  1.1  oster     }
    521  1.1  oster     ret = RF_PARITY_CORRECTED;
    522  1.1  oster   }
    523  1.1  oster 
    524  1.1  oster done:
    525  1.1  oster   /*
    526  1.1  oster    * All done. We might've gotten here without doing part of the function,
    527  1.1  oster    * so cleanup what we have to and return our running status.
    528  1.1  oster    */
    529  1.1  oster   if (asm_h)
    530  1.1  oster     rf_FreeAccessStripeMap(asm_h);
    531  1.1  oster   if (rd_dag_h)
    532  1.1  oster     rf_FreeDAG(rd_dag_h);
    533  1.1  oster   if (wr_dag_h)
    534  1.1  oster     rf_FreeDAG(wr_dag_h);
    535  1.1  oster   if (mcpair)
    536  1.1  oster     rf_FreeMCPair(mcpair);
    537  1.1  oster   rf_FreeAllocList(allocList);
    538  1.1  oster   if (rf_verifyParityDebug) {
    539  1.1  oster     printf("[%d] RAID1 parity verify, returning %d\n", tid, ret);
    540  1.1  oster   }
    541  1.1  oster   return(ret);
    542  1.1  oster }
    543  1.1  oster 
    544  1.1  oster int rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed)
    545  1.1  oster   RF_ReconBuffer_t  *rbuf;          /* the recon buffer to submit */
    546  1.1  oster   int                keep_it;       /* whether we can keep this buffer or we have to return it */
    547  1.1  oster   int                use_committed; /* whether to use a committed or an available recon buffer */
    548  1.1  oster {
    549  1.1  oster   RF_ReconParityStripeStatus_t *pssPtr;
    550  1.1  oster   RF_ReconCtrl_t *reconCtrlPtr;
    551  1.1  oster   RF_RaidLayout_t *layoutPtr;
    552  1.1  oster   int tid=0, retcode, created;
    553  1.1  oster   RF_CallbackDesc_t *cb, *p;
    554  1.1  oster   RF_ReconBuffer_t *t;
    555  1.1  oster   RF_Raid_t *raidPtr;
    556  1.1  oster   caddr_t ta;
    557  1.1  oster 
    558  1.1  oster   retcode = 0;
    559  1.1  oster   created = 0;
    560  1.1  oster 
    561  1.1  oster   raidPtr = rbuf->raidPtr;
    562  1.1  oster   layoutPtr = &raidPtr->Layout;
    563  1.1  oster   reconCtrlPtr = raidPtr->reconControl[rbuf->row];
    564  1.1  oster 
    565  1.1  oster   RF_ASSERT(rbuf);
    566  1.1  oster   RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
    567  1.1  oster 
    568  1.1  oster   if (rf_reconbufferDebug) {
    569  1.1  oster     rf_get_threadid(tid);
    570  1.1  oster     printf("[%d] RAID1 reconbuffer submission r%d c%d psid %ld ru%d (failed offset %ld)\n",
    571  1.1  oster       tid, rbuf->row, rbuf->col, (long)rbuf->parityStripeID, rbuf->which_ru,
    572  1.1  oster       (long)rbuf->failedDiskSectorOffset);
    573  1.1  oster   }
    574  1.1  oster 
    575  1.1  oster   if (rf_reconDebug) {
    576  1.1  oster     printf("RAID1 reconbuffer submit psid %ld buf %lx\n",
    577  1.1  oster 	   (long)rbuf->parityStripeID, (long)rbuf->buffer);
    578  1.1  oster     printf("RAID1 psid %ld   %02x %02x %02x %02x %02x\n",
    579  1.1  oster 	   (long)rbuf->parityStripeID,
    580  1.1  oster       rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2], rbuf->buffer[3],
    581  1.1  oster       rbuf->buffer[4]);
    582  1.1  oster   }
    583  1.1  oster 
    584  1.1  oster   RF_LOCK_PSS_MUTEX(raidPtr,rbuf->row,rbuf->parityStripeID);
    585  1.1  oster 
    586  1.1  oster   RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
    587  1.1  oster 
    588  1.1  oster   pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable,
    589  1.1  oster     rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
    590  1.1  oster   RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten an rbuf for it */
    591  1.1  oster 
    592  1.1  oster   /*
    593  1.1  oster    * Since this is simple mirroring, the first submission for a stripe is also
    594  1.1  oster    * treated as the last.
    595  1.1  oster    */
    596  1.1  oster 
    597  1.1  oster   t = NULL;
    598  1.1  oster   if (keep_it) {
    599  1.1  oster     if (rf_reconbufferDebug) {
    600  1.1  oster       printf("[%d] RAID1 rbuf submission: keeping rbuf\n", tid);
    601  1.1  oster     }
    602  1.1  oster     t = rbuf;
    603  1.1  oster   }
    604  1.1  oster   else {
    605  1.1  oster     if (use_committed) {
    606  1.1  oster       if (rf_reconbufferDebug) {
    607  1.1  oster         printf("[%d] RAID1 rbuf submission: using committed rbuf\n", tid);
    608  1.1  oster       }
    609  1.1  oster       t = reconCtrlPtr->committedRbufs;
    610  1.1  oster       RF_ASSERT(t);
    611  1.1  oster       reconCtrlPtr->committedRbufs = t->next;
    612  1.1  oster       t->next = NULL;
    613  1.1  oster     }
    614  1.1  oster     else if (reconCtrlPtr->floatingRbufs) {
    615  1.1  oster       if (rf_reconbufferDebug) {
    616  1.1  oster         printf("[%d] RAID1 rbuf submission: using floating rbuf\n", tid);
    617  1.1  oster       }
    618  1.1  oster       t = reconCtrlPtr->floatingRbufs;
    619  1.1  oster       reconCtrlPtr->floatingRbufs = t->next;
    620  1.1  oster       t->next = NULL;
    621  1.1  oster     }
    622  1.1  oster   }
    623  1.1  oster   if (t == NULL) {
    624  1.1  oster     if (rf_reconbufferDebug) {
    625  1.1  oster       printf("[%d] RAID1 rbuf submission: waiting for rbuf\n", tid);
    626  1.1  oster     }
    627  1.1  oster     RF_ASSERT((keep_it == 0) && (use_committed == 0));
    628  1.1  oster     raidPtr->procsInBufWait++;
    629  1.1  oster     if ((raidPtr->procsInBufWait == (raidPtr->numCol-1))
    630  1.1  oster       && (raidPtr->numFullReconBuffers == 0))
    631  1.1  oster     {
    632  1.1  oster       /* ruh-ro */
    633  1.1  oster       RF_ERRORMSG("Buffer wait deadlock\n");
    634  1.1  oster       rf_PrintPSStatusTable(raidPtr, rbuf->row);
    635  1.1  oster       RF_PANIC();
    636  1.1  oster     }
    637  1.1  oster     pssPtr->flags |= RF_PSS_BUFFERWAIT;
    638  1.1  oster     cb = rf_AllocCallbackDesc();
    639  1.1  oster     cb->row = rbuf->row;
    640  1.1  oster     cb->col = rbuf->col;
    641  1.1  oster     cb->callbackArg.v = rbuf->parityStripeID;
    642  1.1  oster     cb->callbackArg2.v = rbuf->which_ru;
    643  1.1  oster     cb->next = NULL;
    644  1.1  oster     if (reconCtrlPtr->bufferWaitList == NULL) {
    645  1.1  oster       /* we are the wait list- lucky us */
    646  1.1  oster       reconCtrlPtr->bufferWaitList = cb;
    647  1.1  oster     }
    648  1.1  oster     else {
    649  1.1  oster       /* append to wait list */
    650  1.1  oster       for(p=reconCtrlPtr->bufferWaitList;p->next;p=p->next);
    651  1.1  oster       p->next = cb;
    652  1.1  oster     }
    653  1.1  oster     retcode = 1;
    654  1.1  oster     goto out;
    655  1.1  oster   }
    656  1.1  oster   if (t != rbuf) {
    657  1.1  oster     t->row = rbuf->row;
    658  1.1  oster     t->col = reconCtrlPtr->fcol;
    659  1.1  oster     t->parityStripeID = rbuf->parityStripeID;
    660  1.1  oster     t->which_ru = rbuf->which_ru;
    661  1.1  oster     t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
    662  1.1  oster     t->spRow = rbuf->spRow;
    663  1.1  oster     t->spCol = rbuf->spCol;
    664  1.1  oster     t->spOffset = rbuf->spOffset;
    665  1.1  oster     /* Swap buffers. DANCE! */
    666  1.1  oster     ta = t->buffer;
    667  1.1  oster     t->buffer = rbuf->buffer;
    668  1.1  oster     rbuf->buffer = ta;
    669  1.1  oster   }
    670  1.1  oster   /*
    671  1.1  oster    * Use the rbuf we've been given as the target.
    672  1.1  oster    */
    673  1.1  oster   RF_ASSERT(pssPtr->rbuf == NULL);
    674  1.1  oster   pssPtr->rbuf = t;
    675  1.1  oster 
    676  1.1  oster   t->count = 1;
    677  1.1  oster   /*
    678  1.1  oster    * Below, we use 1 for numDataCol (which is equal to the count in the
    679  1.1  oster    * previous line), so we'll always be done.
    680  1.1  oster    */
    681  1.1  oster   rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1);
    682  1.1  oster 
    683  1.1  oster out:
    684  1.1  oster   RF_UNLOCK_PSS_MUTEX( raidPtr,rbuf->row,rbuf->parityStripeID);
    685  1.1  oster   RF_UNLOCK_MUTEX( reconCtrlPtr->rb_mutex );
    686  1.1  oster   if (rf_reconbufferDebug) {
    687  1.1  oster     printf("[%d] RAID1 rbuf submission: returning %d\n", tid, retcode);
    688  1.1  oster   }
    689  1.1  oster   return(retcode);
    690  1.1  oster }
    691