Home | History | Annotate | Line # | Download | only in raidframe
rf_raid1.c revision 1.1
      1  1.1  oster /*	$NetBSD: rf_raid1.c,v 1.1 1998/11/13 04:20:33 oster Exp $	*/
      2  1.1  oster /*
      3  1.1  oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4  1.1  oster  * All rights reserved.
      5  1.1  oster  *
      6  1.1  oster  * Author: William V. Courtright II
      7  1.1  oster  *
      8  1.1  oster  * Permission to use, copy, modify and distribute this software and
      9  1.1  oster  * its documentation is hereby granted, provided that both the copyright
     10  1.1  oster  * notice and this permission notice appear in all copies of the
     11  1.1  oster  * software, derivative works or modified versions, and any portions
     12  1.1  oster  * thereof, and that both notices appear in supporting documentation.
     13  1.1  oster  *
     14  1.1  oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  1.1  oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  1.1  oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  1.1  oster  *
     18  1.1  oster  * Carnegie Mellon requests users of this software to return to
     19  1.1  oster  *
     20  1.1  oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  1.1  oster  *  School of Computer Science
     22  1.1  oster  *  Carnegie Mellon University
     23  1.1  oster  *  Pittsburgh PA 15213-3890
     24  1.1  oster  *
     25  1.1  oster  * any improvements or extensions that they make and grant Carnegie the
     26  1.1  oster  * rights to redistribute these changes.
     27  1.1  oster  */
     28  1.1  oster 
     29  1.1  oster /*****************************************************************************
     30  1.1  oster  *
     31  1.1  oster  * rf_raid1.c -- implements RAID Level 1
     32  1.1  oster  *
     33  1.1  oster  *****************************************************************************/
     34  1.1  oster 
     35  1.1  oster /*
     36  1.1  oster  * :
     37  1.1  oster  * Log: rf_raid1.c,v
     38  1.1  oster  * Revision 1.46  1996/11/05 21:10:40  jimz
     39  1.1  oster  * failed pda generalization
     40  1.1  oster  *
     41  1.1  oster  * Revision 1.45  1996/07/31  16:56:18  jimz
     42  1.1  oster  * dataBytesPerStripe, sectorsPerDisk init arch-indep.
     43  1.1  oster  *
     44  1.1  oster  * Revision 1.44  1996/07/30  03:06:43  jimz
     45  1.1  oster  * get rid of extra rf_threadid.h include
     46  1.1  oster  *
     47  1.1  oster  * Revision 1.43  1996/07/27  23:36:08  jimz
     48  1.1  oster  * Solaris port of simulator
     49  1.1  oster  *
     50  1.1  oster  * Revision 1.42  1996/07/22  19:52:16  jimz
     51  1.1  oster  * switched node params to RF_DagParam_t, a union of
     52  1.1  oster  * a 64-bit int and a void *, for better portability
     53  1.1  oster  * attempted hpux port, but failed partway through for
     54  1.1  oster  * lack of a single C compiler capable of compiling all
     55  1.1  oster  * source files
     56  1.1  oster  *
     57  1.1  oster  * Revision 1.41  1996/07/18  22:57:14  jimz
     58  1.1  oster  * port simulator to AIX
     59  1.1  oster  *
     60  1.1  oster  * Revision 1.40  1996/07/17  14:31:19  jimz
     61  1.1  oster  * minor cleanup for readability
     62  1.1  oster  *
     63  1.1  oster  * Revision 1.39  1996/07/15  17:22:18  jimz
     64  1.1  oster  * nit-pick code cleanup
     65  1.1  oster  * resolve stdlib problems on DEC OSF
     66  1.1  oster  *
     67  1.1  oster  * Revision 1.38  1996/07/15  02:56:31  jimz
     68  1.1  oster  * fixed dag selection to deal with failed + recon to spare disks
     69  1.1  oster  * enhanced recon, parity check debugging
     70  1.1  oster  *
     71  1.1  oster  * Revision 1.37  1996/07/13  00:00:59  jimz
     72  1.1  oster  * sanitized generalized reconstruction architecture
     73  1.1  oster  * cleaned up head sep, rbuf problems
     74  1.1  oster  *
     75  1.1  oster  * Revision 1.36  1996/07/11  19:08:00  jimz
     76  1.1  oster  * generalize reconstruction mechanism
     77  1.1  oster  * allow raid1 reconstructs via copyback (done with array
     78  1.1  oster  * quiesced, not online, therefore not disk-directed)
     79  1.1  oster  *
     80  1.1  oster  * Revision 1.35  1996/07/10  23:01:24  jimz
     81  1.1  oster  * Better commenting of VerifyParity (for posterity)
     82  1.1  oster  *
     83  1.1  oster  * Revision 1.34  1996/07/10  22:29:45  jimz
     84  1.1  oster  * VerifyParityRAID1: corrected return values for stripes in degraded mode
     85  1.1  oster  *
     86  1.1  oster  * Revision 1.33  1996/07/10  16:05:39  jimz
     87  1.1  oster  * fixed a couple minor bugs in VerifyParityRAID1
     88  1.1  oster  * added code to correct bad RAID1 parity
     89  1.1  oster  *
     90  1.1  oster  * Revision 1.32  1996/06/20  18:47:04  jimz
     91  1.1  oster  * fix up verification bugs
     92  1.1  oster  *
     93  1.1  oster  * Revision 1.31  1996/06/20  15:38:59  jimz
     94  1.1  oster  * added parity verification
     95  1.1  oster  * can't correct bad parity yet, but can return pass/fail
     96  1.1  oster  *
     97  1.1  oster  * Revision 1.30  1996/06/19  22:23:01  jimz
     98  1.1  oster  * parity verification is now a layout-configurable thing
     99  1.1  oster  * not all layouts currently support it (correctly, anyway)
    100  1.1  oster  *
    101  1.1  oster  * Revision 1.29  1996/06/11  08:54:27  jimz
    102  1.1  oster  * improved error-checking at configuration time
    103  1.1  oster  *
    104  1.1  oster  * Revision 1.28  1996/06/10  18:25:24  wvcii
    105  1.1  oster  * fixed bug in rf_IdentifyStripeRAID1 - added array initialization
    106  1.1  oster  *
    107  1.1  oster  * Revision 1.27  1996/06/10  11:55:47  jimz
    108  1.1  oster  * Straightened out some per-array/not-per-array distinctions, fixed
    109  1.1  oster  * a couple bugs related to confusion. Added shutdown lists. Removed
    110  1.1  oster  * layout shutdown function (now subsumed by shutdown lists).
    111  1.1  oster  *
    112  1.1  oster  * Revision 1.26  1996/06/07  22:26:27  jimz
    113  1.1  oster  * type-ify which_ru (RF_ReconUnitNum_t)
    114  1.1  oster  *
    115  1.1  oster  * Revision 1.25  1996/06/07  21:33:04  jimz
    116  1.1  oster  * begin using consistent types for sector numbers,
    117  1.1  oster  * stripe numbers, row+col numbers, recon unit numbers
    118  1.1  oster  *
    119  1.1  oster  * Revision 1.24  1996/06/06  17:29:43  jimz
    120  1.1  oster  * use CreateMirrorIdleReadDAG for mirrored read
    121  1.1  oster  *
    122  1.1  oster  * Revision 1.23  1996/06/03  23:28:26  jimz
    123  1.1  oster  * more bugfixes
    124  1.1  oster  * check in tree to sync for IPDS runs with current bugfixes
    125  1.1  oster  * there still may be a problem with threads in the script test
    126  1.1  oster  * getting I/Os stuck- not trivially reproducible (runs ~50 times
    127  1.1  oster  * in a row without getting stuck)
    128  1.1  oster  *
    129  1.1  oster  * Revision 1.22  1996/06/02  17:31:48  jimz
    130  1.1  oster  * Moved a lot of global stuff into array structure, where it belongs.
    131  1.1  oster  * Fixed up paritylogging, pss modules in this manner. Some general
    132  1.1  oster  * code cleanup. Removed lots of dead code, some dead files.
    133  1.1  oster  *
    134  1.1  oster  * Revision 1.21  1996/05/31  22:26:54  jimz
    135  1.1  oster  * fix a lot of mapping problems, memory allocation problems
    136  1.1  oster  * found some weird lock issues, fixed 'em
    137  1.1  oster  * more code cleanup
    138  1.1  oster  *
    139  1.1  oster  * Revision 1.20  1996/05/30  23:22:16  jimz
    140  1.1  oster  * bugfixes of serialization, timing problems
    141  1.1  oster  * more cleanup
    142  1.1  oster  *
    143  1.1  oster  * Revision 1.19  1996/05/30  11:29:41  jimz
    144  1.1  oster  * Numerous bug fixes. Stripe lock release code disagreed with the taking code
    145  1.1  oster  * about when stripes should be locked (I made it consistent: no parity, no lock)
    146  1.1  oster  * There was a lot of extra serialization of I/Os which I've removed- a lot of
    147  1.1  oster  * it was to calculate values for the cache code, which is no longer with us.
    148  1.1  oster  * More types, function, macro cleanup. Added code to properly quiesce the array
    149  1.1  oster  * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
    150  1.1  oster  * before. Fixed memory allocation, freeing bugs.
    151  1.1  oster  *
    152  1.1  oster  * Revision 1.18  1996/05/27  18:56:37  jimz
    153  1.1  oster  * more code cleanup
    154  1.1  oster  * better typing
    155  1.1  oster  * compiles in all 3 environments
    156  1.1  oster  *
    157  1.1  oster  * Revision 1.17  1996/05/24  22:17:04  jimz
    158  1.1  oster  * continue code + namespace cleanup
    159  1.1  oster  * typed a bunch of flags
    160  1.1  oster  *
    161  1.1  oster  * Revision 1.16  1996/05/24  04:28:55  jimz
    162  1.1  oster  * release cleanup ckpt
    163  1.1  oster  *
    164  1.1  oster  * Revision 1.15  1996/05/24  01:59:45  jimz
    165  1.1  oster  * another checkpoint in code cleanup for release
    166  1.1  oster  * time to sync kernel tree
    167  1.1  oster  *
    168  1.1  oster  * Revision 1.14  1996/05/18  19:51:34  jimz
    169  1.1  oster  * major code cleanup- fix syntax, make some types consistent,
    170  1.1  oster  * add prototypes, clean out dead code, et cetera
    171  1.1  oster  *
    172  1.1  oster  * Revision 1.13  1996/05/03  19:36:22  wvcii
    173  1.1  oster  * moved dag creation routines to dag library
    174  1.1  oster  *
    175  1.1  oster  * Revision 1.12  1996/02/23  01:38:16  amiri
    176  1.1  oster  * removed chained declustering special case in SelectIdleDisk
    177  1.1  oster  *
    178  1.1  oster  * Revision 1.11  1996/02/22  16:47:18  amiri
    179  1.1  oster  * disabled shortest queue optimization for chained declustering
    180  1.1  oster  *
    181  1.1  oster  * Revision 1.10  1995/12/12  18:10:06  jimz
    182  1.1  oster  * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
    183  1.1  oster  * fix 80-column brain damage in comments
    184  1.1  oster  *
    185  1.1  oster  * Revision 1.9  1995/12/04  19:21:28  wvcii
    186  1.1  oster  * modified SelectIdleDisk to take a mirror node as a parameter and
    187  1.1  oster  * conditionally swap params 0 (data pda) and 4 (mirror pda).
    188  1.1  oster  * modified CreateRaidOneReadDAG so that it creates the DAG itself
    189  1.1  oster  * as opposed to reusing code in CreateNonredundantDAG.
    190  1.1  oster  *
    191  1.1  oster  * Revision 1.8  1995/11/30  16:07:45  wvcii
    192  1.1  oster  * added copyright info
    193  1.1  oster  *
    194  1.1  oster  * Revision 1.7  1995/11/16  14:46:18  wvcii
    195  1.1  oster  * fixed bugs in mapping and degraded dag creation, added comments
    196  1.1  oster  *
    197  1.1  oster  * Revision 1.6  1995/11/14  22:29:16  wvcii
    198  1.1  oster  * fixed bugs in dag creation
    199  1.1  oster  *
    200  1.1  oster  * Revision 1.5  1995/11/07  15:23:33  wvcii
    201  1.1  oster  * changed RAID1DagSelect prototype
    202  1.1  oster  * function no longer generates numHdrSucc, numTermAnt
    203  1.1  oster  * changed dag creation routines:
    204  1.1  oster  *   term node generated during dag creation
    205  1.1  oster  *   encoded commit nodes, barrier, antecedent types
    206  1.1  oster  *
    207  1.1  oster  * Revision 1.4  1995/10/10  19:09:21  wvcii
    208  1.1  oster  * write dag now handles non-aligned accesses
    209  1.1  oster  *
    210  1.1  oster  * Revision 1.3  1995/10/05  02:32:56  jimz
    211  1.1  oster  * ifdef'd out queue locking for load balancing
    212  1.1  oster  *
    213  1.1  oster  * Revision 1.2  1995/10/04  07:04:40  wvcii
    214  1.1  oster  * reads are now scheduled according to disk queue length.
    215  1.1  oster  * queue length is the sum of number of ios queued in raidframe as well as those at the disk.
    216  1.1  oster  * reads are sent to the disk with the shortest queue.
    217  1.1  oster  * testing against user disks successful, sim & kernel untested.
    218  1.1  oster  *
    219  1.1  oster  * Revision 1.1  1995/10/04  03:53:23  wvcii
    220  1.1  oster  * Initial revision
    221  1.1  oster  *
    222  1.1  oster  *
    223  1.1  oster  */
    224  1.1  oster 
    225  1.1  oster #include "rf_raid.h"
    226  1.1  oster #include "rf_raid1.h"
    227  1.1  oster #include "rf_dag.h"
    228  1.1  oster #include "rf_dagffrd.h"
    229  1.1  oster #include "rf_dagffwr.h"
    230  1.1  oster #include "rf_dagdegrd.h"
    231  1.1  oster #include "rf_dagutils.h"
    232  1.1  oster #include "rf_dagfuncs.h"
    233  1.1  oster #include "rf_threadid.h"
    234  1.1  oster #include "rf_diskqueue.h"
    235  1.1  oster #include "rf_general.h"
    236  1.1  oster #include "rf_utils.h"
    237  1.1  oster #include "rf_parityscan.h"
    238  1.1  oster #include "rf_mcpair.h"
    239  1.1  oster #include "rf_layout.h"
    240  1.1  oster #include "rf_map.h"
    241  1.1  oster #include "rf_engine.h"
    242  1.1  oster #include "rf_reconbuffer.h"
    243  1.1  oster #include "rf_sys.h"
    244  1.1  oster 
    245  1.1  oster typedef struct RF_Raid1ConfigInfo_s {
    246  1.1  oster   RF_RowCol_t  **stripeIdentifier;
    247  1.1  oster } RF_Raid1ConfigInfo_t;
    248  1.1  oster 
    249  1.1  oster /* start of day code specific to RAID level 1 */
    250  1.1  oster int rf_ConfigureRAID1(
    251  1.1  oster   RF_ShutdownList_t  **listp,
    252  1.1  oster   RF_Raid_t           *raidPtr,
    253  1.1  oster   RF_Config_t         *cfgPtr)
    254  1.1  oster {
    255  1.1  oster   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
    256  1.1  oster   RF_Raid1ConfigInfo_t *info;
    257  1.1  oster   RF_RowCol_t i;
    258  1.1  oster 
    259  1.1  oster   /* create a RAID level 1 configuration structure */
    260  1.1  oster   RF_MallocAndAdd(info, sizeof(RF_Raid1ConfigInfo_t), (RF_Raid1ConfigInfo_t *), raidPtr->cleanupList);
    261  1.1  oster   if (info == NULL)
    262  1.1  oster     return(ENOMEM);
    263  1.1  oster   layoutPtr->layoutSpecificInfo = (void *) info;
    264  1.1  oster 
    265  1.1  oster   /* ... and fill it in. */
    266  1.1  oster   info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol / 2, 2, raidPtr->cleanupList);
    267  1.1  oster   if (info->stripeIdentifier == NULL)
    268  1.1  oster     return(ENOMEM);
    269  1.1  oster   for (i = 0; i < (raidPtr->numCol / 2); i ++) {
    270  1.1  oster     info->stripeIdentifier[i][0] = (2 * i);
    271  1.1  oster     info->stripeIdentifier[i][1] = (2 * i) + 1;
    272  1.1  oster   }
    273  1.1  oster 
    274  1.1  oster   RF_ASSERT(raidPtr->numRow == 1);
    275  1.1  oster 
    276  1.1  oster   /* this implementation of RAID level 1 uses one row of numCol disks and allows multiple (numCol / 2)
    277  1.1  oster    * stripes per row.  A stripe consists of a single data unit and a single parity (mirror) unit.
    278  1.1  oster    * stripe id = raidAddr / stripeUnitSize
    279  1.1  oster    */
    280  1.1  oster   raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2) * layoutPtr->sectorsPerStripeUnit;
    281  1.1  oster   layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk * (raidPtr->numCol / 2);
    282  1.1  oster   layoutPtr->dataSectorsPerStripe = layoutPtr->sectorsPerStripeUnit;
    283  1.1  oster   layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
    284  1.1  oster   layoutPtr->numDataCol = 1;
    285  1.1  oster   layoutPtr->numParityCol = 1;
    286  1.1  oster   return(0);
    287  1.1  oster }
    288  1.1  oster 
    289  1.1  oster 
    290  1.1  oster /* returns the physical disk location of the primary copy in the mirror pair */
    291  1.1  oster void rf_MapSectorRAID1(
    292  1.1  oster   RF_Raid_t         *raidPtr,
    293  1.1  oster   RF_RaidAddr_t      raidSector,
    294  1.1  oster   RF_RowCol_t       *row,
    295  1.1  oster   RF_RowCol_t       *col,
    296  1.1  oster   RF_SectorNum_t    *diskSector,
    297  1.1  oster   int                remap)
    298  1.1  oster {
    299  1.1  oster   RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    300  1.1  oster   RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
    301  1.1  oster 
    302  1.1  oster   *row = 0;
    303  1.1  oster   *col = 2 * mirrorPair;
    304  1.1  oster   *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    305  1.1  oster }
    306  1.1  oster 
    307  1.1  oster 
    308  1.1  oster /* Map Parity
    309  1.1  oster  *
    310  1.1  oster  * returns the physical disk location of the secondary copy in the mirror
    311  1.1  oster  * pair
    312  1.1  oster  */
    313  1.1  oster void rf_MapParityRAID1(
    314  1.1  oster   RF_Raid_t       *raidPtr,
    315  1.1  oster   RF_RaidAddr_t    raidSector,
    316  1.1  oster   RF_RowCol_t     *row,
    317  1.1  oster   RF_RowCol_t     *col,
    318  1.1  oster   RF_SectorNum_t  *diskSector,
    319  1.1  oster   int              remap)
    320  1.1  oster {
    321  1.1  oster   RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
    322  1.1  oster   RF_RowCol_t mirrorPair = SUID % (raidPtr->numCol / 2);
    323  1.1  oster 
    324  1.1  oster   *row = 0;
    325  1.1  oster   *col = (2 * mirrorPair) + 1;
    326  1.1  oster 
    327  1.1  oster   *diskSector = ((SUID / (raidPtr->numCol / 2)) * raidPtr->Layout.sectorsPerStripeUnit) + (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
    328  1.1  oster }
    329  1.1  oster 
    330  1.1  oster 
    331  1.1  oster /* IdentifyStripeRAID1
    332  1.1  oster  *
    333  1.1  oster  * returns a list of disks for a given redundancy group
    334  1.1  oster  */
    335  1.1  oster void rf_IdentifyStripeRAID1(
    336  1.1  oster   RF_Raid_t        *raidPtr,
    337  1.1  oster   RF_RaidAddr_t     addr,
    338  1.1  oster   RF_RowCol_t     **diskids,
    339  1.1  oster   RF_RowCol_t      *outRow)
    340  1.1  oster {
    341  1.1  oster   RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
    342  1.1  oster   RF_Raid1ConfigInfo_t *info = raidPtr->Layout.layoutSpecificInfo;
    343  1.1  oster   RF_ASSERT(stripeID >= 0);
    344  1.1  oster   RF_ASSERT(addr >= 0);
    345  1.1  oster   *outRow = 0;
    346  1.1  oster   *diskids = info->stripeIdentifier[ stripeID % (raidPtr->numCol/2)];
    347  1.1  oster   RF_ASSERT(*diskids);
    348  1.1  oster }
    349  1.1  oster 
    350  1.1  oster 
    351  1.1  oster /* MapSIDToPSIDRAID1
    352  1.1  oster  *
    353  1.1  oster  * maps a logical stripe to a stripe in the redundant array
    354  1.1  oster  */
    355  1.1  oster void rf_MapSIDToPSIDRAID1(
    356  1.1  oster   RF_RaidLayout_t    *layoutPtr,
    357  1.1  oster   RF_StripeNum_t      stripeID,
    358  1.1  oster   RF_StripeNum_t     *psID,
    359  1.1  oster   RF_ReconUnitNum_t  *which_ru)
    360  1.1  oster {
    361  1.1  oster   *which_ru = 0;
    362  1.1  oster   *psID = stripeID;
    363  1.1  oster }
    364  1.1  oster 
    365  1.1  oster 
    366  1.1  oster 
    367  1.1  oster /******************************************************************************
    368  1.1  oster  * select a graph to perform a single-stripe access
    369  1.1  oster  *
    370  1.1  oster  * Parameters:  raidPtr    - description of the physical array
    371  1.1  oster  *              type       - type of operation (read or write) requested
    372  1.1  oster  *              asmap      - logical & physical addresses for this access
    373  1.1  oster  *              createFunc - name of function to use to create the graph
    374  1.1  oster  *****************************************************************************/
    375  1.1  oster 
    376  1.1  oster void rf_RAID1DagSelect(
    377  1.1  oster   RF_Raid_t             *raidPtr,
    378  1.1  oster   RF_IoType_t            type,
    379  1.1  oster   RF_AccessStripeMap_t  *asmap,
    380  1.1  oster   RF_VoidFuncPtr        *createFunc)
    381  1.1  oster {
    382  1.1  oster   RF_RowCol_t frow, fcol, or, oc;
    383  1.1  oster   RF_PhysDiskAddr_t *failedPDA;
    384  1.1  oster   int prior_recon, tid;
    385  1.1  oster   RF_RowStatus_t rstat;
    386  1.1  oster   RF_SectorNum_t oo;
    387  1.1  oster 
    388  1.1  oster 
    389  1.1  oster   RF_ASSERT(RF_IO_IS_R_OR_W(type));
    390  1.1  oster 
    391  1.1  oster   if (asmap->numDataFailed + asmap->numParityFailed > 1) {
    392  1.1  oster     RF_ERRORMSG("Multiple disks failed in a single group!  Aborting I/O operation.\n");
    393  1.1  oster     *createFunc = NULL;
    394  1.1  oster     return;
    395  1.1  oster   }
    396  1.1  oster 
    397  1.1  oster   if (asmap->numDataFailed + asmap->numParityFailed) {
    398  1.1  oster     /*
    399  1.1  oster      * We've got a fault. Re-map to spare space, iff applicable.
    400  1.1  oster      * Shouldn't the arch-independent code do this for us?
    401  1.1  oster      * Anyway, it turns out if we don't do this here, then when
    402  1.1  oster      * we're reconstructing, writes go only to the surviving
    403  1.1  oster      * original disk, and aren't reflected on the reconstructed
    404  1.1  oster      * spare. Oops. --jimz
    405  1.1  oster      */
    406  1.1  oster     failedPDA = asmap->failedPDAs[0];
    407  1.1  oster     frow = failedPDA->row;
    408  1.1  oster     fcol = failedPDA->col;
    409  1.1  oster     rstat = raidPtr->status[frow];
    410  1.1  oster     prior_recon = (rstat == rf_rs_reconfigured) || (
    411  1.1  oster       (rstat == rf_rs_reconstructing) ?
    412  1.1  oster       rf_CheckRUReconstructed(raidPtr->reconControl[frow]->reconMap, failedPDA->startSector) : 0
    413  1.1  oster       );
    414  1.1  oster     if (prior_recon) {
    415  1.1  oster       or = frow;
    416  1.1  oster       oc = fcol;
    417  1.1  oster       oo = failedPDA->startSector;
    418  1.1  oster       /*
    419  1.1  oster        * If we did distributed sparing, we'd monkey with that here.
    420  1.1  oster        * But we don't, so we'll
    421  1.1  oster        */
    422  1.1  oster       failedPDA->row = raidPtr->Disks[frow][fcol].spareRow;
    423  1.1  oster       failedPDA->col = raidPtr->Disks[frow][fcol].spareCol;
    424  1.1  oster       /*
    425  1.1  oster        * Redirect other components, iff necessary. This looks
    426  1.1  oster        * pretty suspicious to me, but it's what the raid5
    427  1.1  oster        * DAG select does.
    428  1.1  oster        */
    429  1.1  oster       if (asmap->parityInfo->next) {
    430  1.1  oster         if (failedPDA == asmap->parityInfo) {
    431  1.1  oster           failedPDA->next->row = failedPDA->row;
    432  1.1  oster           failedPDA->next->col = failedPDA->col;
    433  1.1  oster         }
    434  1.1  oster         else {
    435  1.1  oster           if (failedPDA == asmap->parityInfo->next) {
    436  1.1  oster             asmap->parityInfo->row = failedPDA->row;
    437  1.1  oster             asmap->parityInfo->col = failedPDA->col;
    438  1.1  oster           }
    439  1.1  oster         }
    440  1.1  oster       }
    441  1.1  oster       if (rf_dagDebug || rf_mapDebug) {
    442  1.1  oster         rf_get_threadid(tid);
    443  1.1  oster         printf("[%d] Redirected type '%c' r %d c %d o %ld -> r %d c %d o %ld\n",
    444  1.1  oster           tid, type, or, oc, (long)oo, failedPDA->row, failedPDA->col,
    445  1.1  oster           (long)failedPDA->startSector);
    446  1.1  oster       }
    447  1.1  oster       asmap->numDataFailed = asmap->numParityFailed = 0;
    448  1.1  oster     }
    449  1.1  oster   }
    450  1.1  oster   if (type == RF_IO_TYPE_READ) {
    451  1.1  oster     if (asmap->numDataFailed == 0)
    452  1.1  oster       *createFunc = (RF_VoidFuncPtr)rf_CreateMirrorIdleReadDAG;
    453  1.1  oster     else
    454  1.1  oster       *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneDegradedReadDAG;
    455  1.1  oster   }
    456  1.1  oster   else {
    457  1.1  oster     *createFunc = (RF_VoidFuncPtr)rf_CreateRaidOneWriteDAG;
    458  1.1  oster   }
    459  1.1  oster }
    460  1.1  oster 
    461  1.1  oster int rf_VerifyParityRAID1(
    462  1.1  oster   RF_Raid_t             *raidPtr,
    463  1.1  oster   RF_RaidAddr_t          raidAddr,
    464  1.1  oster   RF_PhysDiskAddr_t     *parityPDA,
    465  1.1  oster   int                    correct_it,
    466  1.1  oster   RF_RaidAccessFlags_t   flags)
    467  1.1  oster {
    468  1.1  oster   int nbytes, bcount, stripeWidth, ret, i, j, tid=0, nbad, *bbufs;
    469  1.1  oster   RF_DagNode_t *blockNode, *unblockNode, *wrBlock;
    470  1.1  oster   RF_DagHeader_t *rd_dag_h, *wr_dag_h;
    471  1.1  oster   RF_AccessStripeMapHeader_t *asm_h;
    472  1.1  oster   RF_AllocListElem_t *allocList;
    473  1.1  oster   RF_AccTraceEntry_t tracerec;
    474  1.1  oster   RF_ReconUnitNum_t which_ru;
    475  1.1  oster   RF_RaidLayout_t *layoutPtr;
    476  1.1  oster   RF_AccessStripeMap_t *aasm;
    477  1.1  oster   RF_SectorCount_t nsector;
    478  1.1  oster   RF_RaidAddr_t startAddr;
    479  1.1  oster   char *buf, *buf1, *buf2;
    480  1.1  oster   RF_PhysDiskAddr_t *pda;
    481  1.1  oster   RF_StripeNum_t psID;
    482  1.1  oster   RF_MCPair_t *mcpair;
    483  1.1  oster 
    484  1.1  oster   if (rf_verifyParityDebug) {
    485  1.1  oster     rf_get_threadid(tid);
    486  1.1  oster   }
    487  1.1  oster 
    488  1.1  oster   layoutPtr = &raidPtr->Layout;
    489  1.1  oster   startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
    490  1.1  oster   nsector = parityPDA->numSector;
    491  1.1  oster   nbytes = rf_RaidAddressToByte(raidPtr, nsector);
    492  1.1  oster   psID = rf_RaidAddressToParityStripeID(layoutPtr, raidAddr, &which_ru);
    493  1.1  oster 
    494  1.1  oster   asm_h = NULL;
    495  1.1  oster   rd_dag_h = wr_dag_h = NULL;
    496  1.1  oster   mcpair = NULL;
    497  1.1  oster 
    498  1.1  oster   ret = RF_PARITY_COULD_NOT_VERIFY;
    499  1.1  oster 
    500  1.1  oster   rf_MakeAllocList(allocList);
    501  1.1  oster   if (allocList == NULL)
    502  1.1  oster     return(RF_PARITY_COULD_NOT_VERIFY);
    503  1.1  oster   mcpair = rf_AllocMCPair();
    504  1.1  oster   if (mcpair == NULL)
    505  1.1  oster     goto done;
    506  1.1  oster   RF_ASSERT(layoutPtr->numDataCol == layoutPtr->numParityCol);
    507  1.1  oster   stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;
    508  1.1  oster   bcount = nbytes*(layoutPtr->numDataCol + layoutPtr->numParityCol);
    509  1.1  oster   RF_MallocAndAdd(buf, bcount, (char *), allocList);
    510  1.1  oster   if (buf == NULL)
    511  1.1  oster     goto done;
    512  1.1  oster   if (rf_verifyParityDebug) {
    513  1.1  oster     printf("[%d] RAID1 parity verify: buf=%lx bcount=%d (%lx - %lx)\n",
    514  1.1  oster       tid, (long)buf, bcount, (long)buf, (long)buf+bcount);
    515  1.1  oster   }
    516  1.1  oster 
    517  1.1  oster   /*
    518  1.1  oster    * Generate a DAG which will read the entire stripe- then we can
    519  1.1  oster    * just compare data chunks versus "parity" chunks.
    520  1.1  oster    */
    521  1.1  oster 
    522  1.1  oster   rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, nbytes, buf,
    523  1.1  oster     rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rod", allocList, flags,
    524  1.1  oster     RF_IO_NORMAL_PRIORITY);
    525  1.1  oster   if (rd_dag_h == NULL)
    526  1.1  oster     goto done;
    527  1.1  oster   blockNode = rd_dag_h->succedents[0];
    528  1.1  oster   unblockNode = blockNode->succedents[0]->succedents[0];
    529  1.1  oster 
    530  1.1  oster   /*
    531  1.1  oster    * Map the access to physical disk addresses (PDAs)- this will
    532  1.1  oster    * get us both a list of data addresses, and "parity" addresses
    533  1.1  oster    * (which are really mirror copies).
    534  1.1  oster    */
    535  1.1  oster   asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe,
    536  1.1  oster     buf, RF_DONT_REMAP);
    537  1.1  oster   aasm = asm_h->stripeMap;
    538  1.1  oster 
    539  1.1  oster   buf1 = buf;
    540  1.1  oster   /*
    541  1.1  oster    * Loop through the data blocks, setting up read nodes for each.
    542  1.1  oster    */
    543  1.1  oster   for(pda=aasm->physInfo,i=0;i<layoutPtr->numDataCol;i++,pda=pda->next)
    544  1.1  oster   {
    545  1.1  oster     RF_ASSERT(pda);
    546  1.1  oster 
    547  1.1  oster     rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
    548  1.1  oster 
    549  1.1  oster     RF_ASSERT(pda->numSector != 0);
    550  1.1  oster     if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
    551  1.1  oster       /* cannot verify parity with dead disk */
    552  1.1  oster       goto done;
    553  1.1  oster     }
    554  1.1  oster     pda->bufPtr = buf1;
    555  1.1  oster     blockNode->succedents[i]->params[0].p = pda;
    556  1.1  oster     blockNode->succedents[i]->params[1].p = buf1;
    557  1.1  oster     blockNode->succedents[i]->params[2].v = psID;
    558  1.1  oster     blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    559  1.1  oster     buf1 += nbytes;
    560  1.1  oster   }
    561  1.1  oster   RF_ASSERT(pda == NULL);
    562  1.1  oster   /*
    563  1.1  oster    * keep i, buf1 running
    564  1.1  oster    *
    565  1.1  oster    * Loop through parity blocks, setting up read nodes for each.
    566  1.1  oster    */
    567  1.1  oster   for(pda=aasm->parityInfo;i<layoutPtr->numDataCol+layoutPtr->numParityCol;i++,pda=pda->next)
    568  1.1  oster   {
    569  1.1  oster     RF_ASSERT(pda);
    570  1.1  oster     rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1);
    571  1.1  oster     RF_ASSERT(pda->numSector != 0);
    572  1.1  oster     if (rf_TryToRedirectPDA(raidPtr, pda, 0)) {
    573  1.1  oster       /* cannot verify parity with dead disk */
    574  1.1  oster       goto done;
    575  1.1  oster     }
    576  1.1  oster     pda->bufPtr = buf1;
    577  1.1  oster     blockNode->succedents[i]->params[0].p = pda;
    578  1.1  oster     blockNode->succedents[i]->params[1].p = buf1;
    579  1.1  oster     blockNode->succedents[i]->params[2].v = psID;
    580  1.1  oster     blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    581  1.1  oster     buf1 += nbytes;
    582  1.1  oster   }
    583  1.1  oster   RF_ASSERT(pda == NULL);
    584  1.1  oster 
    585  1.1  oster   bzero((char *)&tracerec, sizeof(tracerec));
    586  1.1  oster   rd_dag_h->tracerec = &tracerec;
    587  1.1  oster 
    588  1.1  oster   if (rf_verifyParityDebug > 1) {
    589  1.1  oster     printf("[%d] RAID1 parity verify read dag:\n", tid);
    590  1.1  oster     rf_PrintDAGList(rd_dag_h);
    591  1.1  oster   }
    592  1.1  oster 
    593  1.1  oster   RF_LOCK_MUTEX(mcpair->mutex);
    594  1.1  oster   mcpair->flag = 0;
    595  1.1  oster   rf_DispatchDAG(rd_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    596  1.1  oster 		 (void *)mcpair);
    597  1.1  oster   while (mcpair->flag == 0) {
    598  1.1  oster     RF_WAIT_MCPAIR(mcpair);
    599  1.1  oster   }
    600  1.1  oster   RF_UNLOCK_MUTEX(mcpair->mutex);
    601  1.1  oster 
    602  1.1  oster   if (rd_dag_h->status != rf_enable) {
    603  1.1  oster     RF_ERRORMSG("Unable to verify raid1 parity: can't read stripe\n");
    604  1.1  oster     ret = RF_PARITY_COULD_NOT_VERIFY;
    605  1.1  oster     goto done;
    606  1.1  oster   }
    607  1.1  oster 
    608  1.1  oster   /*
    609  1.1  oster    * buf1 is the beginning of the data blocks chunk
    610  1.1  oster    * buf2 is the beginning of the parity blocks chunk
    611  1.1  oster    */
    612  1.1  oster   buf1 = buf;
    613  1.1  oster   buf2 = buf + (nbytes * layoutPtr->numDataCol);
    614  1.1  oster   ret = RF_PARITY_OKAY;
    615  1.1  oster   /*
    616  1.1  oster    * bbufs is "bad bufs"- an array whose entries are the data
    617  1.1  oster    * column numbers where we had miscompares. (That is, column 0
    618  1.1  oster    * and column 1 of the array are mirror copies, and are considered
    619  1.1  oster    * "data column 0" for this purpose).
    620  1.1  oster    */
    621  1.1  oster   RF_MallocAndAdd(bbufs, layoutPtr->numParityCol*sizeof(int), (int *),
    622  1.1  oster     allocList);
    623  1.1  oster   nbad = 0;
    624  1.1  oster   /*
    625  1.1  oster    * Check data vs "parity" (mirror copy).
    626  1.1  oster    */
    627  1.1  oster   for(i=0;i<layoutPtr->numDataCol;i++) {
    628  1.1  oster     if (rf_verifyParityDebug) {
    629  1.1  oster       printf("[%d] RAID1 parity verify %d bytes: i=%d buf1=%lx buf2=%lx buf=%lx\n",
    630  1.1  oster         tid, nbytes, i, (long)buf1, (long)buf2, (long)buf);
    631  1.1  oster     }
    632  1.1  oster     ret = bcmp(buf1, buf2, nbytes);
    633  1.1  oster     if (ret) {
    634  1.1  oster       if (rf_verifyParityDebug > 1) {
    635  1.1  oster         for(j=0;j<nbytes;j++) {
    636  1.1  oster          if (buf1[j] != buf2[j])
    637  1.1  oster            break;
    638  1.1  oster         }
    639  1.1  oster         printf("psid=%ld j=%d\n", (long)psID, j);
    640  1.1  oster         printf("buf1 %02x %02x %02x %02x %02x\n", buf1[0]&0xff,
    641  1.1  oster           buf1[1]&0xff, buf1[2]&0xff, buf1[3]&0xff, buf1[4]&0xff);
    642  1.1  oster         printf("buf2 %02x %02x %02x %02x %02x\n", buf2[0]&0xff,
    643  1.1  oster           buf2[1]&0xff, buf2[2]&0xff, buf2[3]&0xff, buf2[4]&0xff);
    644  1.1  oster       }
    645  1.1  oster       if (rf_verifyParityDebug) {
    646  1.1  oster         printf("[%d] RAID1: found bad parity, i=%d\n", tid, i);
    647  1.1  oster       }
    648  1.1  oster       /*
    649  1.1  oster        * Parity is bad. Keep track of which columns were bad.
    650  1.1  oster        */
    651  1.1  oster       if (bbufs)
    652  1.1  oster         bbufs[nbad] = i;
    653  1.1  oster       nbad++;
    654  1.1  oster       ret = RF_PARITY_BAD;
    655  1.1  oster     }
    656  1.1  oster     buf1 += nbytes;
    657  1.1  oster     buf2 += nbytes;
    658  1.1  oster   }
    659  1.1  oster 
    660  1.1  oster   if ((ret != RF_PARITY_OKAY) && correct_it) {
    661  1.1  oster     ret = RF_PARITY_COULD_NOT_CORRECT;
    662  1.1  oster     if (rf_verifyParityDebug) {
    663  1.1  oster       printf("[%d] RAID1 parity verify: parity not correct\n", tid);
    664  1.1  oster     }
    665  1.1  oster     if (bbufs == NULL)
    666  1.1  oster       goto done;
    667  1.1  oster     /*
    668  1.1  oster      * Make a DAG with one write node for each bad unit. We'll simply
    669  1.1  oster      * write the contents of the data unit onto the parity unit for
    670  1.1  oster      * correction. (It's possible that the mirror copy was the correct
    671  1.1  oster      * copy, and that we're spooging good data by writing bad over it,
    672  1.1  oster      * but there's no way we can know that.
    673  1.1  oster      */
    674  1.1  oster     wr_dag_h = rf_MakeSimpleDAG(raidPtr, nbad, nbytes, buf,
    675  1.1  oster       rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wnp", allocList, flags,
    676  1.1  oster       RF_IO_NORMAL_PRIORITY);
    677  1.1  oster     if (wr_dag_h == NULL)
    678  1.1  oster       goto done;
    679  1.1  oster     wrBlock = wr_dag_h->succedents[0];
    680  1.1  oster     /*
    681  1.1  oster      * Fill in a write node for each bad compare.
    682  1.1  oster      */
    683  1.1  oster     for(i=0;i<nbad;i++) {
    684  1.1  oster       j = i+layoutPtr->numDataCol;
    685  1.1  oster       pda = blockNode->succedents[j]->params[0].p;
    686  1.1  oster       pda->bufPtr = blockNode->succedents[i]->params[1].p;
    687  1.1  oster       wrBlock->succedents[i]->params[0].p = pda;
    688  1.1  oster       wrBlock->succedents[i]->params[1].p = pda->bufPtr;
    689  1.1  oster       wrBlock->succedents[i]->params[2].v = psID;
    690  1.1  oster       wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
    691  1.1  oster     }
    692  1.1  oster     bzero((char *)&tracerec, sizeof(tracerec));
    693  1.1  oster     wr_dag_h->tracerec = &tracerec;
    694  1.1  oster     if (rf_verifyParityDebug > 1) {
    695  1.1  oster       printf("Parity verify write dag:\n");
    696  1.1  oster       rf_PrintDAGList(wr_dag_h);
    697  1.1  oster     }
    698  1.1  oster     RF_LOCK_MUTEX(mcpair->mutex);
    699  1.1  oster     mcpair->flag = 0;
    700  1.1  oster     /* fire off the write DAG */
    701  1.1  oster     rf_DispatchDAG(wr_dag_h, (void (*)(void *))rf_MCPairWakeupFunc,
    702  1.1  oster 		   (void *)mcpair);
    703  1.1  oster     while (!mcpair->flag) {
    704  1.1  oster       RF_WAIT_COND(mcpair->cond, mcpair->mutex);
    705  1.1  oster     }
    706  1.1  oster     RF_UNLOCK_MUTEX(mcpair->mutex);
    707  1.1  oster     if (wr_dag_h->status != rf_enable) {
    708  1.1  oster       RF_ERRORMSG("Unable to correct RAID1 parity in VerifyParity\n");
    709  1.1  oster       goto done;
    710  1.1  oster     }
    711  1.1  oster     ret = RF_PARITY_CORRECTED;
    712  1.1  oster   }
    713  1.1  oster 
    714  1.1  oster done:
    715  1.1  oster   /*
    716  1.1  oster    * All done. We might've gotten here without doing part of the function,
    717  1.1  oster    * so cleanup what we have to and return our running status.
    718  1.1  oster    */
    719  1.1  oster   if (asm_h)
    720  1.1  oster     rf_FreeAccessStripeMap(asm_h);
    721  1.1  oster   if (rd_dag_h)
    722  1.1  oster     rf_FreeDAG(rd_dag_h);
    723  1.1  oster   if (wr_dag_h)
    724  1.1  oster     rf_FreeDAG(wr_dag_h);
    725  1.1  oster   if (mcpair)
    726  1.1  oster     rf_FreeMCPair(mcpair);
    727  1.1  oster   rf_FreeAllocList(allocList);
    728  1.1  oster   if (rf_verifyParityDebug) {
    729  1.1  oster     printf("[%d] RAID1 parity verify, returning %d\n", tid, ret);
    730  1.1  oster   }
    731  1.1  oster   return(ret);
    732  1.1  oster }
    733  1.1  oster 
    734  1.1  oster int rf_SubmitReconBufferRAID1(rbuf, keep_it, use_committed)
    735  1.1  oster   RF_ReconBuffer_t  *rbuf;          /* the recon buffer to submit */
    736  1.1  oster   int                keep_it;       /* whether we can keep this buffer or we have to return it */
    737  1.1  oster   int                use_committed; /* whether to use a committed or an available recon buffer */
    738  1.1  oster {
    739  1.1  oster   RF_ReconParityStripeStatus_t *pssPtr;
    740  1.1  oster   RF_ReconCtrl_t *reconCtrlPtr;
    741  1.1  oster   RF_RaidLayout_t *layoutPtr;
    742  1.1  oster   int tid=0, retcode, created;
    743  1.1  oster   RF_CallbackDesc_t *cb, *p;
    744  1.1  oster   RF_ReconBuffer_t *t;
    745  1.1  oster   RF_Raid_t *raidPtr;
    746  1.1  oster   caddr_t ta;
    747  1.1  oster 
    748  1.1  oster   retcode = 0;
    749  1.1  oster   created = 0;
    750  1.1  oster 
    751  1.1  oster   raidPtr = rbuf->raidPtr;
    752  1.1  oster   layoutPtr = &raidPtr->Layout;
    753  1.1  oster   reconCtrlPtr = raidPtr->reconControl[rbuf->row];
    754  1.1  oster 
    755  1.1  oster   RF_ASSERT(rbuf);
    756  1.1  oster   RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
    757  1.1  oster 
    758  1.1  oster   if (rf_reconbufferDebug) {
    759  1.1  oster     rf_get_threadid(tid);
    760  1.1  oster     printf("[%d] RAID1 reconbuffer submission r%d c%d psid %ld ru%d (failed offset %ld)\n",
    761  1.1  oster       tid, rbuf->row, rbuf->col, (long)rbuf->parityStripeID, rbuf->which_ru,
    762  1.1  oster       (long)rbuf->failedDiskSectorOffset);
    763  1.1  oster   }
    764  1.1  oster 
    765  1.1  oster   if (rf_reconDebug) {
    766  1.1  oster     printf("RAID1 reconbuffer submit psid %ld buf %lx\n",
    767  1.1  oster 	   (long)rbuf->parityStripeID, (long)rbuf->buffer);
    768  1.1  oster     printf("RAID1 psid %ld   %02x %02x %02x %02x %02x\n",
    769  1.1  oster 	   (long)rbuf->parityStripeID,
    770  1.1  oster       rbuf->buffer[0], rbuf->buffer[1], rbuf->buffer[2], rbuf->buffer[3],
    771  1.1  oster       rbuf->buffer[4]);
    772  1.1  oster   }
    773  1.1  oster 
    774  1.1  oster   RF_LOCK_PSS_MUTEX(raidPtr,rbuf->row,rbuf->parityStripeID);
    775  1.1  oster 
    776  1.1  oster   RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
    777  1.1  oster 
    778  1.1  oster   pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable,
    779  1.1  oster     rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
    780  1.1  oster   RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten an rbuf for it */
    781  1.1  oster 
    782  1.1  oster   /*
    783  1.1  oster    * Since this is simple mirroring, the first submission for a stripe is also
    784  1.1  oster    * treated as the last.
    785  1.1  oster    */
    786  1.1  oster 
    787  1.1  oster   t = NULL;
    788  1.1  oster   if (keep_it) {
    789  1.1  oster     if (rf_reconbufferDebug) {
    790  1.1  oster       printf("[%d] RAID1 rbuf submission: keeping rbuf\n", tid);
    791  1.1  oster     }
    792  1.1  oster     t = rbuf;
    793  1.1  oster   }
    794  1.1  oster   else {
    795  1.1  oster     if (use_committed) {
    796  1.1  oster       if (rf_reconbufferDebug) {
    797  1.1  oster         printf("[%d] RAID1 rbuf submission: using committed rbuf\n", tid);
    798  1.1  oster       }
    799  1.1  oster       t = reconCtrlPtr->committedRbufs;
    800  1.1  oster       RF_ASSERT(t);
    801  1.1  oster       reconCtrlPtr->committedRbufs = t->next;
    802  1.1  oster       t->next = NULL;
    803  1.1  oster     }
    804  1.1  oster     else if (reconCtrlPtr->floatingRbufs) {
    805  1.1  oster       if (rf_reconbufferDebug) {
    806  1.1  oster         printf("[%d] RAID1 rbuf submission: using floating rbuf\n", tid);
    807  1.1  oster       }
    808  1.1  oster       t = reconCtrlPtr->floatingRbufs;
    809  1.1  oster       reconCtrlPtr->floatingRbufs = t->next;
    810  1.1  oster       t->next = NULL;
    811  1.1  oster     }
    812  1.1  oster   }
    813  1.1  oster   if (t == NULL) {
    814  1.1  oster     if (rf_reconbufferDebug) {
    815  1.1  oster       printf("[%d] RAID1 rbuf submission: waiting for rbuf\n", tid);
    816  1.1  oster     }
    817  1.1  oster     RF_ASSERT((keep_it == 0) && (use_committed == 0));
    818  1.1  oster     raidPtr->procsInBufWait++;
    819  1.1  oster     if ((raidPtr->procsInBufWait == (raidPtr->numCol-1))
    820  1.1  oster       && (raidPtr->numFullReconBuffers == 0))
    821  1.1  oster     {
    822  1.1  oster       /* ruh-ro */
    823  1.1  oster       RF_ERRORMSG("Buffer wait deadlock\n");
    824  1.1  oster       rf_PrintPSStatusTable(raidPtr, rbuf->row);
    825  1.1  oster       RF_PANIC();
    826  1.1  oster     }
    827  1.1  oster     pssPtr->flags |= RF_PSS_BUFFERWAIT;
    828  1.1  oster     cb = rf_AllocCallbackDesc();
    829  1.1  oster     cb->row = rbuf->row;
    830  1.1  oster     cb->col = rbuf->col;
    831  1.1  oster     cb->callbackArg.v = rbuf->parityStripeID;
    832  1.1  oster     cb->callbackArg2.v = rbuf->which_ru;
    833  1.1  oster     cb->next = NULL;
    834  1.1  oster     if (reconCtrlPtr->bufferWaitList == NULL) {
    835  1.1  oster       /* we are the wait list- lucky us */
    836  1.1  oster       reconCtrlPtr->bufferWaitList = cb;
    837  1.1  oster     }
    838  1.1  oster     else {
    839  1.1  oster       /* append to wait list */
    840  1.1  oster       for(p=reconCtrlPtr->bufferWaitList;p->next;p=p->next);
    841  1.1  oster       p->next = cb;
    842  1.1  oster     }
    843  1.1  oster     retcode = 1;
    844  1.1  oster     goto out;
    845  1.1  oster   }
    846  1.1  oster   if (t != rbuf) {
    847  1.1  oster     t->row = rbuf->row;
    848  1.1  oster     t->col = reconCtrlPtr->fcol;
    849  1.1  oster     t->parityStripeID = rbuf->parityStripeID;
    850  1.1  oster     t->which_ru = rbuf->which_ru;
    851  1.1  oster     t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
    852  1.1  oster     t->spRow = rbuf->spRow;
    853  1.1  oster     t->spCol = rbuf->spCol;
    854  1.1  oster     t->spOffset = rbuf->spOffset;
    855  1.1  oster     /* Swap buffers. DANCE! */
    856  1.1  oster     ta = t->buffer;
    857  1.1  oster     t->buffer = rbuf->buffer;
    858  1.1  oster     rbuf->buffer = ta;
    859  1.1  oster   }
    860  1.1  oster   /*
    861  1.1  oster    * Use the rbuf we've been given as the target.
    862  1.1  oster    */
    863  1.1  oster   RF_ASSERT(pssPtr->rbuf == NULL);
    864  1.1  oster   pssPtr->rbuf = t;
    865  1.1  oster 
    866  1.1  oster   t->count = 1;
    867  1.1  oster   /*
    868  1.1  oster    * Below, we use 1 for numDataCol (which is equal to the count in the
    869  1.1  oster    * previous line), so we'll always be done.
    870  1.1  oster    */
    871  1.1  oster   rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, 1);
    872  1.1  oster 
    873  1.1  oster out:
    874  1.1  oster   RF_UNLOCK_PSS_MUTEX( raidPtr,rbuf->row,rbuf->parityStripeID);
    875  1.1  oster   RF_UNLOCK_MUTEX( reconCtrlPtr->rb_mutex );
    876  1.1  oster   if (rf_reconbufferDebug) {
    877  1.1  oster     printf("[%d] RAID1 rbuf submission: returning %d\n", tid, retcode);
    878  1.1  oster   }
    879  1.1  oster   return(retcode);
    880  1.1  oster }
    881