Home | History | Annotate | Line # | Download | only in raidframe
rf_raid1.c revision 1.2
      1 /*	$NetBSD: rf_raid1.c,v 1.2 1999/01/26 02:34:00 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: William V. Courtright II
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*****************************************************************************
     30  *
     31  * rf_raid1.c -- implements RAID Level 1
     32  *
     33  *****************************************************************************/
     34 
     35 #include "rf_raid.h"
     36 #include "rf_raid1.h"
     37 #include "rf_dag.h"
     38 #include "rf_dagffrd.h"
     39 #include "rf_dagffwr.h"
     40 #include "rf_dagdegrd.h"
     41 #include "rf_dagutils.h"
     42 #include "rf_dagfuncs.h"
     43 #include "rf_threadid.h"
     44 #include "rf_diskqueue.h"
     45 #include "rf_general.h"
     46 #include "rf_utils.h"
     47 #include "rf_parityscan.h"
     48 #include "rf_mcpair.h"
     49 #include "rf_layout.h"
     50 #include "rf_map.h"
     51 #include "rf_engine.h"
     52 #include "rf_reconbuffer.h"
     53 #include "rf_sys.h"
     54 
     55 typedef struct RF_Raid1ConfigInfo_s {
     56   RF_RowCol_t  **stripeIdentifier;
     57 } RF_Raid1ConfigInfo_t;
     58 
     59 /* start of day code specific to RAID level 1 */
     60 int rf_ConfigureRAID1(
     61   RF_ShutdownList_t  **listp,
     62   RF_Raid_t           *raidPtr,
     63   RF_Config_t         *cfgPtr)
     64 {
     65   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
     66   RF_Raid1ConfigInfo_t *info;
     67   RF_RowCol_t i;
     68 
     69   /* create a RAID level 1 configuration structure */
     70   RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t), (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList);
     71   if (info == NULL)
     72     return(ENOMEM);
     73   layoutPtr->layoutSpecificInfo = (void *) info;
     74 
     75   /* ... and fill it in. */
     76   info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList);
     77   if (info->stripeIdentifier == NULL)
     78     return(ENOMEM);
     79   for (i = 0; i < (raidPtr->numCol / 2); i ++) {
     80     info->stripeIdentifier[i][0] = (2 * i);
     81     info->stripeIdentifier[i][1] = (2 * i) + 1;
     82   }
     83 
     84   RF_ASSERT(raidPtr->numRow == 1);
     85 
     86   /* this implementation of RAID level 1 uses one row of numCol disks and allows multiple (numCol / 2)
     87    * stripes per row.  A stripe consists of a single data unit and a single parity (mirror) unit.
     88    * stripe id = raidAddr / stripeUnitSize
     89    */
     90   raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit;
     91   layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2);
     92   layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit;
     93   layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
     94   layoutPtr->numDataCol = 1;
     95   layoutPtr->numParityCol = 1;
     96   return(0);
     97 }
     98 
     99 
    100 /* returns the physical disk location of the primary copy in the mirror pair */
    101 void rf_MapSectorRAID1(
    102   RF_Raid_t         *raidPtr,
    103   RF_RaidAddr_t      raidSector,
    104   RF_RowCol_t       *row,
    105   RF_RowCol_t       *col,
    106   RF_SectorNum_t    *diskSector,
    107   int                remap)
    108 {
    109   RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    110   RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
    111 
    112   *row = 0;
    113   *col = 2 * mirrorPair;
    114   *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    115 }
    116 
    117 
    118 /* Map Parity
    119  *
    120  * returns the physical disk location of the secondary copy in the mirror
    121  * pair
    122  */
    123 void rf_MapParityRAID1(
    124   RF_Raid_t       *raidPtr,
    125   RF_RaidAddr_t    raidSector,
    126   RF_RowCol_t     *row,
    127   RF_RowCol_t     *col,
    128   RF_SectorNum_t  *diskSector,
    129   int              remap)
    130 {
    131   RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    132   RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
    133 
    134   *row = 0;
    135   *col = (2 * mirrorPair) + 1;
    136 
    137   *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    138 }
    139 
    140 
    141 /* IdentifyStripeRAID1
    142  *
    143  * returns a list of disks for a given redundancy group
    144  */
    145 void rf_IdentifyStripeRAID1(
    146   RF_Raid_t        *raidPtr,
    147   RF_RaidAddr_t     addr,
    148   RF_RowCol_t     **diskids,
    149   RF_RowCol_t      *outRow)
    150 {
    151   RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
    152   RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo;
    153   RF_ASSERT(stripeID >= 0);
    154   RF_ASSERT(addr >= 0);
    155   *outRow = 0;
    156   *diskids = info->stripeIdentifier[ stripeID % (raidPtr->numCol/2)];
    157   RF_ASSERT(*diskids);
    158 }
    159 
    160 
    161 /* MapSIDToPSIDRAID1
    162  *
    163  * maps a logical stripe to a stripe in the redundant array
    164  */
    165 void rf_MapSIDToPSIDRAID1(
    166   RF_RaidLayout_t    *layoutPtr,
    167   RF_StripeNum_t      stripeID,
    168   RF_StripeNum_t     *psID,
    169   RF_ReconUnitNum_t  *which_ru)
    170 {
    171   *which_ru = 0;
    172   *psID = stripeID;
    173 }
    174 
    175 
    176 
    177 /******************************************************************************
    178  * select a graph to perform a single-stripe access
    179  *
    180  * Parameters:  raidPtr    - description of the physical array
    181  *              type       - type of operation (read or write) requested
    182  *              asmap      - logical & physical addresses for this access
    183  *              createFunc - name of function to use to create the graph
    184  *****************************************************************************/
    185 
    186 void rf_RAID1DagSelect(
    187   RF_Raid_t             *raidPtr,
    188   RF_IoType_t            type,
    189   RF_AccessStripeMap_t  *asmap,
    190   RF_VoidFuncPtr        *createFunc)
    191 {
    192   RF_RowCol_t frow, fcol, or, oc;
    193   RF_PhysDiskAddr_t *failedPDA;
    194   int prior_recon, tid;
    195   RF_RowStatus_t rstat;
    196   RF_SectorNum_t oo;
    197 
    198 
    199   RF_ASSERT(RF_IO_IS_R_OR_W(type));
    200 
    201   if (asmap->numDataFailed + asmap->numParityFailed > 1) {
    202     RF_ERRORMSG("Multiple disks failed in a single group!  Aborting I/O operation.\n");
    203     *createFunc = NULL;
    204     return;
    205   }
    206 
    207   if (asmap->numDataFailed + asmap->numParityFailed) {
    208     /*
    209      * We've got a fault. Re-map to spare space, iff applicable.
    210      * Shouldn't the arch-independent code do this for us?
    211      * Anyway, it turns out if we don't do this here, then when
    212      * we're reconstructing, writes go only to the surviving
    213      * original disk, and aren't reflected on the reconstructed
    214      * spare. Oops. --jimz
    215      */
    216     failedPDA = asmap->failedPDAs[0];
    217     frow = failedPDA->row;
    218     fcol = failedPDA->col;
    219     rstat = raidPtr->status[frow];
    220     prior_recon = (rstat == rf_rs_reconfigured) || (
    221       (rstat == rf_rs_reconstructing) ?
    222       rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0
    223       );
    224     if (prior_recon) {
    225       or = frow;
    226       oc = fcol;
    227       oo = failedPDA->startSector;
    228       /*
    229        * If we did distributed sparing, we'd monkey with that here.
    230        * But we don't, so we'll
    231        */
    232       failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
    233       failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
    234       /*
    235        * Redirect other components, iff necessary. This looks
    236        * pretty suspicious to me, but it's what the raid5
    237        * DAG select does.
    238        */
    239       if (asmap->parityInfo->next) {
    240         if (failedPDA == asmap->parityInfo) {
    241           failedPDA->next->row = failedPDA->row;
    242           failedPDA->next->col = failedPDA->col;
    243         }
    244         else {
    245           if (failedPDA == asmap->parityInfo->next) {
    246             asmap->parityInfo->row = failedPDA->row;
    247             asmap->parityInfo->col = failedPDA->col;
    248           }
    249         }
    250       }
    251       if (rf_dagDebug || rf_mapDebug) {
    252         rf_get_threadid(tid);
    253         printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n",
    254           tid, type, or, oc, (long)oo, failedPDA->row, failedPDA->col,
    255           (long)failedPDA->startSector);
    256       }
    257       asmap->numDataFailed = asmap->numParityFailed = 0;
    258     }
    259   }
    260   if (type == RF_IO_TYPE_READ) {
    261     if (asmap->numDataFailed == 0)
    262       *createFunc = (RF_VoidFuncPtr)rf_CreateMirrorIdleReadDAG;
    263     else
    264       *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneDegradedReadDAG;
    265   }
    266   else {
    267     *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG;
    268   }
    269 }
    270 
    271 int rf_VerifyParityRAID1(
    272   RF_Raid_t             *raidPtr,
    273   RF_RaidAddr_t          raidAddr,
    274   RF_PhysDiskAddr_t     *parityPDA,
    275   int                    correct_it,
    276   RF_RaidAccessFlags_t   flags)
    277 {
    278   int nbytes, bcount, stripeWidth, ret, i, j, tid=0, nbad, *bbufs;
    279   RF_DagNode_t *blockNode, *unblockNode, *wrBlock;
    280   RF_DagHeader_t *rd_dag_h, *wr_dag_h;
    281   RF_AccessStripeMapHeader_t *asm_h;
    282   RF_AllocListElem_t *allocList;
    283   RF_AccTraceEntry_t tracerec;
    284   RF_ReconUnitNum_t which_ru;
    285   RF_RaidLayout_t *layoutPtr;
    286   RF_AccessStripeMap_t *aasm;
    287   RF_SectorCount_t nsector;
    288   RF_RaidAddr_t startAddr;
    289   char *buf, *buf1, *buf2;
    290   RF_PhysDiskAddr_t *pda;
    291   RF_StripeNum_t psID;
    292   RF_MCPair_t *mcpair;
    293 
    294   if (rf_verifyParityDebug) {
    295     rf_get_threadid(tid);
    296   }
    297 
    298   layoutPtr = &raidPtr->Layout;
    299   startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
    300   nsector = parityPDA->numSector;
    301   nbytes = rf_RaidAddressToByte(raidPtr, nsector);
    302   psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
    303 
    304   asm_h = NULL;
    305   rd_dag_h = wr_dag_h = NULL;
    306   mcpair = NULL;
    307 
    308   ret = RF_PARITY_COULD_NOT_VERIFY;
    309 
    310   rf_MakeAllocList(allocList);
    311   if (allocList == NULL)
    312     return(RF_PARITY_COULD_NOT_VERIFY);
    313   mcpair = rf_AllocMCPair();
    314   if (mcpair == NULL)
    315     goto done;
    316   RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol);
    317   stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
    318   bcount = nbytes*(layoutPtr->numDataCol + layoutPtr->numParityCol);
    319   RF_MallocAndAdd(buf, bcount, (char *), allocList);
    320   if (buf == NULL)
    321     goto done;
    322   if (rf_verifyParityDebug) {
    323     printf("[%d] RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n",
    324       tid, (long)buf, bcount, (long)buf, (long)buf+bcount);
    325   }
    326 
    327   /*
    328    * Generate a DAG which will read the entire stripe- then we can
    329    * just compare data chunks versus "parity" chunks.
    330    */
    331 
    332   rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf,
    333     rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags,
    334     RF_IO_NORMAL_PRIORITY);
    335   if (rd_dag_h == NULL)
    336     goto done;
    337   blockNode = rd_dag_h->succedents[0];
    338   unblockNode = blockNode->succedents[0]->succedents[0];
    339 
    340   /*
    341    * Map the access to physical disk addresses (PDAs)- this will
    342    * get us both a list of data addresses, and "parity" addresses
    343    * (which are really mirror copies).
    344    */
    345   asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe,
    346     buf, RF_DONT_REMAP);
    347   aasm = asm_h->stripeMap;
    348 
    349   buf1 = buf;
    350   /*
    351    * Loop through the data blocks, setting up read nodes for each.
    352    */
    353   for(pda=aasm->physInfo,i=0;i<layoutPtr->numDataCol;i++,pda=pda->next)
    354   {
    355     RF_ASSERT(pda);
    356 
    357     rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
    358 
    359     RF_ASSERT(pda->numSector != 0);
    360     if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
    361       /* cannot verify parity with dead disk */
    362       goto done;
    363     }
    364     pda->bufPtr = buf1;
    365     blockNode->succedents[i]->params[0].p = pda;
    366     blockNode->succedents[i]->params[1].p = buf1;
    367     blockNode->succedents[i]->params[2].v = psID;
    368     blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    369     buf1 += nbytes;
    370   }
    371   RF_ASSERT(pda == NULL);
    372   /*
    373    * keep i, buf1 running
    374    *
    375    * Loop through parity blocks, setting up read nodes for each.
    376    */
    377   for(pda=aasm->parityInfo;i<layoutPtr->numDataCol+layoutPtr->numParityCol;i++,pda=pda->next)
    378   {
    379     RF_ASSERT(pda);
    380     rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
    381     RF_ASSERT(pda->numSector != 0);
    382     if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
    383       /* cannot verify parity with dead disk */
    384       goto done;
    385     }
    386     pda->bufPtr = buf1;
    387     blockNode->succedents[i]->params[0].p = pda;
    388     blockNode->succedents[i]->params[1].p = buf1;
    389     blockNode->succedents[i]->params[2].v = psID;
    390     blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    391     buf1 += nbytes;
    392   }
    393   RF_ASSERT(pda == NULL);
    394 
    395   bzero((char *)&tracerec, sizeof(tracerec));
    396   rd_dag_h->tracerec = &tracerec;
    397 
    398   if (rf_verifyParityDebug > 1) {
    399     printf("[%d] RAID1 parity verify read dag:\n", tid);
    400     rf_PrintDAGList(rd_dag_h);
    401   }
    402 
    403   RF_LOCK_MUTEX(mcpair->mutex);
    404   mcpair->flag = 0;
    405   rf_DispatchDAG(rd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    406 		 (void *)mcpair);
    407   while (mcpair->flag == 0) {
    408     RF_WAIT_MCPAIR(mcpair);
    409   }
    410   RF_UNLOCK_MUTEX(mcpair->mutex);
    411 
    412   if (rd_dag_h->status != rf_enable) {
    413     RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n");
    414     ret = RF_PARITY_COULD_NOT_VERIFY;
    415     goto done;
    416   }
    417 
    418   /*
    419    * buf1 is the beginning of the data blocks chunk
    420    * buf2 is the beginning of the parity blocks chunk
    421    */
    422   buf1 = buf;
    423   buf2 = buf + (nbytes * layoutPtr->numDataCol);
    424   ret = RF_PARITY_OKAY;
    425   /*
    426    * bbufs is "bad bufs"- an array whose entries are the data
    427    * column numbers where we had miscompares. (That is, column 0
    428    * and column 1 of the array are mirror copies, and are considered
    429    * "data column 0" for this purpose).
    430    */
    431   RF_MallocAndAdd(bbufs, layoutPtr->numParityCol*sizeof(int), (int *),
    432     allocList);
    433   nbad = 0;
    434   /*
    435    * Check data vs "parity" (mirror copy).
    436    */
    437   for(i=0;i<layoutPtr->numDataCol;i++) {
    438     if (rf_verifyParityDebug) {
    439       printf("[%d] RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n",
    440         tid, nbytes, i, (long)buf1, (long)buf2, (long)buf);
    441     }
    442     ret = bcmp(buf1, buf2, nbytes);
    443     if (ret) {
    444       if (rf_verifyParityDebug > 1) {
    445         for(j=0;j<nbytes;j++) {
    446          if (buf1[j] != buf2[j])
    447            break;
    448         }
    449         printf("psid=%ld j=%d\n", (long)psID, j);
    450         printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0]&0xff,
    451           buf1[1]&0xff, buf1[2]&0xff, buf1[3]&0xff, buf1[4]&0xff);
    452         printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0]&0xff,
    453           buf2[1]&0xff, buf2[2]&0xff, buf2[3]&0xff, buf2[4]&0xff);
    454       }
    455       if (rf_verifyParityDebug) {
    456         printf("[%d] RAID1: found bad parity, i=%d\n", tid, i);
    457       }
    458       /*
    459        * Parity is bad. Keep track of which columns were bad.
    460        */
    461       if (bbufs)
    462         bbufs[nbad] = i;
    463       nbad++;
    464       ret = RF_PARITY_BAD;
    465     }
    466     buf1 += nbytes;
    467     buf2 += nbytes;
    468   }
    469 
    470   if ((ret != RF_PARITY_OKAY) && correct_it) {
    471     ret = RF_PARITY_COULD_NOT_CORRECT;
    472     if (rf_verifyParityDebug) {
    473       printf("[%d] RAID1 parity verify: parity not correct\n", tid);
    474     }
    475     if (bbufs == NULL)
    476       goto done;
    477     /*
    478      * Make a DAG with one write node for each bad unit. We'll simply
    479      * write the contents of the data unit onto the parity unit for
    480      * correction. (It's possible that the mirror copy was the correct
    481      * copy, and that we're spooging good data by writing bad over it,
    482      * but there's no way we can know that.
    483      */
    484     wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf,
    485       rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags,
    486       RF_IO_NORMAL_PRIORITY);
    487     if (wr_dag_h == NULL)
    488       goto done;
    489     wrBlock = wr_dag_h->succedents[0];
    490     /*
    491      * Fill in a write node for each bad compare.
    492      */
    493     for(i=0;i<nbad;i++) {
    494       j = i+layoutPtr->numDataCol;
    495       pda = blockNode->succedents[j]->params[0].p;
    496       pda->bufPtr = blockNode->succedents[i]->params[1].p;
    497       wrBlock->succedents[i]->params[0].p = pda;
    498       wrBlock->succedents[i]->params[1].p = pda->bufPtr;
    499       wrBlock->succedents[i]->params[2].v = psID;
    500       wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    501     }
    502     bzero((char *)&tracerec, sizeof(tracerec));
    503     wr_dag_h->tracerec = &tracerec;
    504     if (rf_verifyParityDebug > 1) {
    505       printf("Parity verify write dag:\n");
    506       rf_PrintDAGList(wr_dag_h);
    507     }
    508     RF_LOCK_MUTEX(mcpair->mutex);
    509     mcpair->flag = 0;
    510     /* fire off the write DAG */
    511     rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    512 		   (void *)mcpair);
    513     while (!mcpair->flag) {
    514       RF_WAIT_COND(mcpair->cond, mcpair->mutex);
    515     }
    516     RF_UNLOCK_MUTEX(mcpair->mutex);
    517     if (wr_dag_h->status != rf_enable) {
    518       RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n");
    519       goto done;
    520     }
    521     ret = RF_PARITY_CORRECTED;
    522   }
    523 
    524 done:
    525   /*
    526    * All done. We might've gotten here without doing part of the function,
    527    * so cleanup what we have to and return our running status.
    528    */
    529   if (asm_h)
    530     rf_FreeAccessStripeMap(asm_h);
    531   if (rd_dag_h)
    532     rf_FreeDAG(rd_dag_h);
    533   if (wr_dag_h)
    534     rf_FreeDAG(wr_dag_h);
    535   if (mcpair)
    536     rf_FreeMCPair(mcpair);
    537   rf_FreeAllocList(allocList);
    538   if (rf_verifyParityDebug) {
    539     printf("[%d] RAID1 parity verify, returning %d\n", tid, ret);
    540   }
    541   return(ret);
    542 }
    543 
    544 int rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed)
    545   RF_ReconBuffer_t  *rbuf;          /* the recon buffer to submit */
    546   int                keep_it;       /* whether we can keep this buffer or we have to return it */
    547   int                use_committed; /* whether to use a committed or an available recon buffer */
    548 {
    549   RF_ReconParityStripeStatus_t *pssPtr;
    550   RF_ReconCtrl_t *reconCtrlPtr;
    551   RF_RaidLayout_t *layoutPtr;
    552   int tid=0, retcode, created;
    553   RF_CallbackDesc_t *cb, *p;
    554   RF_ReconBuffer_t *t;
    555   RF_Raid_t *raidPtr;
    556   caddr_t ta;
    557 
    558   retcode = 0;
    559   created = 0;
    560 
    561   raidPtr = rbuf->raidPtr;
    562   layoutPtr = &raidPtr->Layout;
    563   reconCtrlPtr = raidPtr->reconControl[rbuf->row];
    564 
    565   RF_ASSERT(rbuf);
    566   RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
    567 
    568   if (rf_reconbufferDebug) {
    569     rf_get_threadid(tid);
    570     printf("[%d] RAID1 reconbuffer submission r%d c%d psid %ld ru%d (failed offset %ld)\n",
    571       tid, rbuf->row, rbuf->col, (long)rbuf->parityStripeID, rbuf->which_ru,
    572       (long)rbuf->failedDiskSectorOffset);
    573   }
    574 
    575   if (rf_reconDebug) {
    576     printf("RAID1 reconbuffer submit psid %ld buf %lx\n",
    577 	   (long)rbuf->parityStripeID, (long)rbuf->buffer);
    578     printf("RAID1 psid %ld   %02x %02x %02x %02x %02x\n",
    579 	   (long)rbuf->parityStripeID,
    580       rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2], rbuf->buffer[3],
    581       rbuf->buffer[4]);
    582   }
    583 
    584   RF_LOCK_PSS_MUTEX(raidPtr,rbuf->row,rbuf->parityStripeID);
    585 
    586   RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
    587 
    588   pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable,
    589     rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
    590   RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten an rbuf for it */
    591 
    592   /*
    593    * Since this is simple mirroring, the first submission for a stripe is also
    594    * treated as the last.
    595    */
    596 
    597   t = NULL;
    598   if (keep_it) {
    599     if (rf_reconbufferDebug) {
    600       printf("[%d] RAID1 rbuf submission: keeping rbuf\n", tid);
    601     }
    602     t = rbuf;
    603   }
    604   else {
    605     if (use_committed) {
    606       if (rf_reconbufferDebug) {
    607         printf("[%d] RAID1 rbuf submission: using committed rbuf\n", tid);
    608       }
    609       t = reconCtrlPtr->committedRbufs;
    610       RF_ASSERT(t);
    611       reconCtrlPtr->committedRbufs = t->next;
    612       t->next = NULL;
    613     }
    614     else if (reconCtrlPtr->floatingRbufs) {
    615       if (rf_reconbufferDebug) {
    616         printf("[%d] RAID1 rbuf submission: using floating rbuf\n", tid);
    617       }
    618       t = reconCtrlPtr->floatingRbufs;
    619       reconCtrlPtr->floatingRbufs = t->next;
    620       t->next = NULL;
    621     }
    622   }
    623   if (t == NULL) {
    624     if (rf_reconbufferDebug) {
    625       printf("[%d] RAID1 rbuf submission: waiting for rbuf\n", tid);
    626     }
    627     RF_ASSERT((keep_it == 0) && (use_committed == 0));
    628     raidPtr->procsInBufWait++;
    629     if ((raidPtr->procsInBufWait == (raidPtr->numCol-1))
    630       && (raidPtr->numFullReconBuffers == 0))
    631     {
    632       /* ruh-ro */
    633       RF_ERRORMSG("Buffer wait deadlock\n");
    634       rf_PrintPSStatusTable(raidPtr, rbuf->row);
    635       RF_PANIC();
    636     }
    637     pssPtr->flags |= RF_PSS_BUFFERWAIT;
    638     cb = rf_AllocCallbackDesc();
    639     cb->row = rbuf->row;
    640     cb->col = rbuf->col;
    641     cb->callbackArg.v = rbuf->parityStripeID;
    642     cb->callbackArg2.v = rbuf->which_ru;
    643     cb->next = NULL;
    644     if (reconCtrlPtr->bufferWaitList == NULL) {
    645       /* we are the wait list- lucky us */
    646       reconCtrlPtr->bufferWaitList = cb;
    647     }
    648     else {
    649       /* append to wait list */
    650       for(p=reconCtrlPtr->bufferWaitList;p->next;p=p->next);
    651       p->next = cb;
    652     }
    653     retcode = 1;
    654     goto out;
    655   }
    656   if (t != rbuf) {
    657     t->row = rbuf->row;
    658     t->col = reconCtrlPtr->fcol;
    659     t->parityStripeID = rbuf->parityStripeID;
    660     t->which_ru = rbuf->which_ru;
    661     t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
    662     t->spRow = rbuf->spRow;
    663     t->spCol = rbuf->spCol;
    664     t->spOffset = rbuf->spOffset;
    665     /* Swap buffers. DANCE! */
    666     ta = t->buffer;
    667     t->buffer = rbuf->buffer;
    668     rbuf->buffer = ta;
    669   }
    670   /*
    671    * Use the rbuf we've been given as the target.
    672    */
    673   RF_ASSERT(pssPtr->rbuf == NULL);
    674   pssPtr->rbuf = t;
    675 
    676   t->count = 1;
    677   /*
    678    * Below, we use 1 for numDataCol (which is equal to the count in the
    679    * previous line), so we'll always be done.
    680    */
    681   rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1);
    682 
    683 out:
    684   RF_UNLOCK_PSS_MUTEX( raidPtr,rbuf->row,rbuf->parityStripeID);
    685   RF_UNLOCK_MUTEX( reconCtrlPtr->rb_mutex );
    686   if (rf_reconbufferDebug) {
    687     printf("[%d] RAID1 rbuf submission: returning %d\n", tid, retcode);
    688   }
    689   return(retcode);
    690 }
    691