Home | History | Annotate | Line # | Download | only in raidframe
rf_decluster.c revision 1.2
      1 /*	$NetBSD: rf_decluster.c,v 1.2 1999/01/26 02:33:55 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*----------------------------------------------------------------------
     30  *
     31  * rf_decluster.c -- code related to the declustered layout
     32  *
     33  * Created 10-21-92 (MCH)
     34  *
     35  * Nov 93:  adding support for distributed sparing.  This code is a little
     36  *          complex:  the basic layout used is as follows:
     37  *          let F = (v-1)/GCD(r,v-1).  The spare space for each set of
     38  *          F consecutive fulltables is grouped together and placed after
     39  *          that set of tables.
     40  *                   +------------------------------+
     41  *                   |        F fulltables          |
     42  *                   |        Spare Space           |
     43  *                   |        F fulltables          |
     44  *                   |        Spare Space           |
     45  *                   |            ...               |
     46  *                   +------------------------------+
     47  *
     48  *--------------------------------------------------------------------*/
     49 
     50 #include "rf_types.h"
     51 #include "rf_raid.h"
     52 #include "rf_raidframe.h"
     53 #include "rf_configure.h"
     54 #include "rf_decluster.h"
     55 #include "rf_debugMem.h"
     56 #include "rf_utils.h"
     57 #include "rf_alloclist.h"
     58 #include "rf_general.h"
     59 #include "rf_shutdown.h"
     60 #include "rf_sys.h"
     61 
     62 extern int rf_copyback_in_progress;                /* debug only */
     63 
     64 /* found in rf_kintf.c */
     65 int rf_GetSpareTableFromDaemon(RF_SparetWait_t  *req);
     66 
     67 /* configuration code */
     68 
     69 int rf_ConfigureDeclustered(
     70   RF_ShutdownList_t  **listp,
     71   RF_Raid_t           *raidPtr,
     72   RF_Config_t         *cfgPtr)
     73 {
     74     RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
     75     int b, v, k, r, lambda;				/* block design params */
     76     int i, j;
     77     RF_RowCol_t *first_avail_slot;
     78     RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk;
     79     RF_DeclusteredConfigInfo_t *info;
     80     RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk, extraPUsPerDisk;
     81     RF_StripeCount_t totSparePUsPerDisk;
     82     RF_SectorNum_t diskOffsetOfLastFullTableInSUs;
     83     RF_SectorCount_t SpareSpaceInSUs;
     84     char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
     85     RF_StripeNum_t l, SUID;
     86 
     87     SUID = l = 0;
     88     numCompleteSpareRegionsPerDisk = 0;
     89 
     90     /* 1. create layout specific structure */
     91     RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
     92     if (info == NULL)
     93       return(ENOMEM);
     94     layoutPtr->layoutSpecificInfo = (void *) info;
     95     info->SpareTable = NULL;
     96 
     97     /* 2. extract parameters from the config structure */
     98     if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
     99       (void) bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN);
    100     }
    101     cfgBuf += RF_SPAREMAP_NAME_LEN;
    102 
    103     b        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    104     v        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    105     k        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    106     r        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    107     lambda   = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    108     raidPtr->noRotate = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    109 
    110     /* the sparemaps are generated assuming that parity is rotated, so we issue
    111      * a warning if both distributed sparing and no-rotate are on at the same time
    112      */
    113     if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) {
    114 	RF_ERRORMSG("Warning:  distributed sparing specified without parity rotation.\n");
    115     }
    116 
    117     if (raidPtr->numCol != v) {
    118         RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol);
    119         return(EINVAL);
    120     }
    121 
    122     /* 3.  set up the values used in the mapping code */
    123     info->BlocksPerTable = b;
    124     info->Lambda = lambda;
    125     info->NumParityReps = info->groupSize = k;
    126     info->SUsPerTable = b * (k-1) * layoutPtr->SUsPerPU;/* b blks, k-1 SUs each */
    127     info->SUsPerFullTable = k * info->SUsPerTable;	/* rot k times */
    128     info->PUsPerBlock = k-1;
    129     info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
    130     info->TableDepthInPUs = (b*k) / v;
    131     info->FullTableDepthInPUs = info->TableDepthInPUs * k;		/* k repetitions */
    132 
    133     /* used only in distributed sparing case */
    134     info->FullTablesPerSpareRegion = (v-1) / rf_gcd(r, v-1);		/* (v-1)/gcd fulltables */
    135     info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
    136     info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v-1)) * layoutPtr->SUsPerPU;
    137 
    138     /* check to make sure the block design is sufficiently small */
    139     if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
    140         if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) {
    141 	    RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n",
    142 			 (int)info->FullTableDepthInPUs,
    143 			 (int)info->SpareSpaceDepthPerRegionInSUs,
    144 			 (int)layoutPtr->stripeUnitsPerDisk);
    145 	    return(EINVAL);
    146 	}
    147     } else {
    148 	if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) {
    149 	    RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n",
    150 			 (int)(info->TableDepthInPUs * layoutPtr->SUsPerPU), \
    151 			 (int)layoutPtr->stripeUnitsPerDisk);
    152 	    return(EINVAL);
    153 	}
    154     }
    155 
    156 
    157     /* compute the size of each disk, and the number of tables in the last fulltable (which
    158      * need not be complete)
    159      */
    160     if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
    161 
    162 	PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU;
    163 	spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs +
    164 				 (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v-1));
    165 	info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU;
    166 
    167 	numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs;
    168 	info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
    169 	extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
    170 
    171 	/* assume conservatively that we need the full amount of spare space in one region in order
    172 	 * to provide spares for the partial spare region at the end of the array.  We set "i" to
    173 	 * the number of tables in the partial spare region.  This may actually include some fulltables.
    174 	 */
    175 	extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
    176 	if (extraPUsPerDisk <= 0) i = 0;
    177 	else i = extraPUsPerDisk/info->TableDepthInPUs;
    178 
    179 	complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion/k) + i/k);
    180         info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
    181 	info->ExtraTablesPerDisk = i % k;
    182 
    183 	/* note that in the last spare region, the spare space is complete even though data/parity space is not */
    184 	totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk+1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
    185 	info->TotSparePUsPerDisk = totSparePUsPerDisk;
    186 
    187 	layoutPtr->stripeUnitsPerDisk =
    188 	    ((complete_FT_count/raidPtr->numRow) * info->FullTableDepthInPUs +	 	/* data & parity space */
    189 	     info->ExtraTablesPerDisk * info->TableDepthInPUs +
    190 	     totSparePUsPerDisk								/* spare space */
    191 	    ) * layoutPtr->SUsPerPU;
    192 	layoutPtr->dataStripeUnitsPerDisk =
    193 	    (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs)
    194 	    * layoutPtr->SUsPerPU * (k-1) / k;
    195 
    196     } else {
    197         /* non-dist spare case:  force each disk to contain an integral number of tables */
    198         layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
    199         layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
    200 
    201 	/* compute the number of tables in the last fulltable, which need not be complete */
    202         complete_FT_count =
    203             ((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow;
    204 
    205         info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
    206         info->ExtraTablesPerDisk =
    207 		((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k;
    208     }
    209 
    210     raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
    211 
    212     /* find the disk offset of the stripe unit where the last fulltable starts */
    213     numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
    214     diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
    215     if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
    216         SpareSpaceInSUs  = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs;
    217         diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
    218         info->DiskOffsetOfLastSpareSpaceChunkInSUs =
    219 	    diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
    220     }
    221     info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
    222     info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
    223 
    224     /* 4.  create and initialize the lookup tables */
    225     info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
    226     if (info->LayoutTable == NULL)
    227       return(ENOMEM);
    228     info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
    229     if (info->OffsetTable == NULL)
    230       return(ENOMEM);
    231     info->BlockTable  =	rf_make_2d_array(info->TableDepthInPUs*layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
    232     if (info->BlockTable == NULL)
    233       return(ENOMEM);
    234 
    235     first_avail_slot = rf_make_1d_array(v, NULL);
    236     if (first_avail_slot == NULL)
    237       return(ENOMEM);
    238 
    239     for (i=0; i<b; i++)
    240       for (j=0; j<k; j++)
    241         info->LayoutTable[i][j] = *cfgBuf++;
    242 
    243     /* initialize offset table */
    244     for (i=0; i<b; i++) for (j=0; j<k; j++) {
    245         info->OffsetTable[i][j] = first_avail_slot[ info->LayoutTable[i][j] ];
    246         first_avail_slot[ info->LayoutTable[i][j] ]++;
    247     }
    248 
    249     /* initialize block table */
    250     for (SUID=l=0; l<layoutPtr->SUsPerPU; l++) {
    251         for (i=0; i<b; i++) {
    252             for (j=0; j<k; j++) {
    253                 info->BlockTable[ (info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l ]
    254 		                [ info->LayoutTable[i][j] ] = SUID;
    255             }
    256             SUID++;
    257         }
    258     }
    259 
    260     rf_free_1d_array(first_avail_slot, v);
    261 
    262     /* 5.  set up the remaining redundant-but-useful parameters */
    263 
    264     raidPtr->totalSectors = (k*complete_FT_count + raidPtr->numRow*info->ExtraTablesPerDisk) *
    265     			  info->SUsPerTable * layoutPtr->sectorsPerStripeUnit;
    266     layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k-1);
    267 
    268     /* strange evaluation order below to try and minimize overflow problems */
    269 
    270     layoutPtr->dataSectorsPerStripe = (k-1) * layoutPtr->sectorsPerStripeUnit;
    271     layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
    272     layoutPtr->numDataCol = k-1;
    273     layoutPtr->numParityCol = 1;
    274 
    275     return(0);
    276 }
    277 
    278 /* declustering with distributed sparing */
    279 static void rf_ShutdownDeclusteredDS(RF_ThreadArg_t);
    280 static void rf_ShutdownDeclusteredDS(arg)
    281   RF_ThreadArg_t  arg;
    282 {
    283   RF_DeclusteredConfigInfo_t *info;
    284   RF_Raid_t *raidPtr;
    285 
    286   raidPtr = (RF_Raid_t *)arg;
    287   info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
    288   if (info->SpareTable)
    289     rf_FreeSpareTable(raidPtr);
    290 }
    291 
    292 int rf_ConfigureDeclusteredDS(
    293   RF_ShutdownList_t  **listp,
    294   RF_Raid_t           *raidPtr,
    295   RF_Config_t         *cfgPtr)
    296 {
    297   int rc;
    298 
    299   rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr);
    300   if (rc)
    301     return(rc);
    302   rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr);
    303   if (rc) {
    304     RF_ERRORMSG1("Got %d adding shutdown event for DeclusteredDS\n", rc);
    305     rf_ShutdownDeclusteredDS(raidPtr);
    306     return(rc);
    307   }
    308   return(0);
    309 }
    310 
    311 void rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap)
    312   RF_Raid_t       *raidPtr;
    313   RF_RaidAddr_t    raidSector;
    314   RF_RowCol_t     *row;
    315   RF_RowCol_t     *col;
    316   RF_SectorNum_t  *diskSector;
    317   int              remap;
    318 {
    319     RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    320     RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    321     RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
    322     RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
    323     RF_StripeNum_t BlockID, BlockOffset, RepIndex;
    324     RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
    325     RF_StripeCount_t fulltable_depth  = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
    326     RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
    327 
    328     rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
    329 
    330     FullTableID     = SUID / sus_per_fulltable;		/* fulltable ID within array (across rows) */
    331     if (raidPtr->numRow == 1) *row = 0;                 /* avoid a mod and a div in the common case */
    332     else {
    333       *row            = FullTableID % raidPtr->numRow;
    334       FullTableID    /= raidPtr->numRow;			/* convert to fulltable ID on this disk */
    335     }
    336     if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
    337 	SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
    338         SpareSpace  = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
    339     }
    340     FullTableOffset = SUID % sus_per_fulltable;
    341     TableID         = FullTableOffset / info->SUsPerTable;
    342     TableOffset     = FullTableOffset - TableID * info->SUsPerTable;
    343     BlockID         = TableOffset / info->PUsPerBlock;
    344     BlockOffset     = TableOffset - BlockID * info->PUsPerBlock;
    345     BlockID        %= info->BlocksPerTable;
    346     RepIndex        = info->PUsPerBlock - TableID;
    347     if (!raidPtr->noRotate) BlockOffset    += ((BlockOffset >= RepIndex) ? 1 : 0);
    348     *col            = info->LayoutTable[BlockID][BlockOffset];
    349 
    350     /* remap to distributed spare space if indicated */
    351     if (remap) {
    352       RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
    353 	     (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
    354       rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
    355     } else {
    356 
    357         outSU	    = base_suid;
    358         outSU      += FullTableID * fulltable_depth;  				        /* offs to strt of FT */
    359         outSU	   += SpareSpace;						        /* skip rsvd spare space */
    360         outSU      += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;   	        /* offs to strt of tble */
    361         outSU      += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU;	/* offs to the PU */
    362     }
    363     outSU          += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);	        /* offs to the SU within a PU */
    364 
    365     /* convert SUs to sectors, and, if not aligned to SU boundary, add in offset to sector.  */
    366     *diskSector     = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
    367 
    368     RF_ASSERT( *col != -1 );
    369 }
    370 
    371 
    372 /* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */
    373 void rf_MapParityDeclustered(
    374   RF_Raid_t       *raidPtr,
    375   RF_RaidAddr_t    raidSector,
    376   RF_RowCol_t     *row,
    377   RF_RowCol_t     *col,
    378   RF_SectorNum_t  *diskSector,
    379   int              remap)
    380 {
    381     RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    382     RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    383     RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
    384     RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
    385     RF_StripeNum_t BlockID, BlockOffset, RepIndex;
    386     RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
    387     RF_StripeCount_t fulltable_depth  = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
    388     RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
    389 
    390     rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
    391 
    392     /* compute row & (possibly) spare space exactly as before */
    393     FullTableID     = SUID / sus_per_fulltable;
    394     if (raidPtr->numRow == 1) *row = 0;                         /* avoid a mod and a div in the common case */
    395     else {
    396       *row            = FullTableID % raidPtr->numRow;
    397       FullTableID    /= raidPtr->numRow;			/* convert to fulltable ID on this disk */
    398     }
    399     if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
    400 	SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
    401         SpareSpace  = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
    402     }
    403 
    404     /* compute BlockID and RepIndex exactly as before */
    405     FullTableOffset = SUID % sus_per_fulltable;
    406     TableID         = FullTableOffset / info->SUsPerTable;
    407     TableOffset     = FullTableOffset - TableID * info->SUsPerTable;
    408     /*TableOffset     = FullTableOffset % info->SUsPerTable;*/
    409     /*BlockID         = (TableOffset / info->PUsPerBlock) % info->BlocksPerTable;*/
    410     BlockID         = TableOffset / info->PUsPerBlock;
    411     /*BlockOffset     = TableOffset % info->PUsPerBlock;*/
    412     BlockOffset     = TableOffset - BlockID * info->PUsPerBlock;
    413     BlockID        %= info->BlocksPerTable;
    414 
    415     /* the parity block is in the position indicated by RepIndex */
    416     RepIndex        = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID;
    417     *col	    = info->LayoutTable[BlockID][RepIndex];
    418 
    419     if (remap) {
    420       RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
    421 	     (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
    422       rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
    423     } else {
    424 
    425         /* compute sector as before, except use RepIndex instead of BlockOffset */
    426         outSU        = base_suid;
    427         outSU       += FullTableID * fulltable_depth;
    428         outSU	    += SpareSpace;						/* skip rsvd spare space */
    429         outSU       += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
    430         outSU       += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU;
    431     }
    432 
    433     outSU       += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
    434     *diskSector  = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
    435 
    436     RF_ASSERT( *col != -1 );
    437 }
    438 
    439 /* returns an array of ints identifying the disks that comprise the stripe containing the indicated address.
    440  * the caller must _never_ attempt to modify this array.
    441  */
    442 void rf_IdentifyStripeDeclustered(
    443   RF_Raid_t        *raidPtr,
    444   RF_RaidAddr_t     addr,
    445   RF_RowCol_t     **diskids,
    446   RF_RowCol_t      *outRow)
    447 {
    448   RF_RaidLayout_t *layoutPtr           = &(raidPtr->Layout);
    449   RF_DeclusteredConfigInfo_t *info     = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    450   RF_StripeCount_t sus_per_fulltable   = info->SUsPerFullTable;
    451   RF_StripeCount_t fulltable_depth     = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
    452   RF_StripeNum_t  base_suid            = 0;
    453   RF_StripeNum_t SUID                  = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
    454   RF_StripeNum_t stripeID, FullTableID;
    455   int tableOffset;
    456 
    457   rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
    458   FullTableID     = SUID / sus_per_fulltable;		/* fulltable ID within array (across rows) */
    459   *outRow         = FullTableID % raidPtr->numRow;
    460   stripeID        = rf_StripeUnitIDToStripeID(layoutPtr, SUID);                     /* find stripe offset into array */
    461   tableOffset     = (stripeID % info->BlocksPerTable);                        /* find offset into block design table */
    462   *diskids        = info->LayoutTable[tableOffset];
    463 }
    464 
    465 /* This returns the default head-separation limit, which is measured
    466  * in "required units for reconstruction".  Each time a disk fetches
    467  * a unit, it bumps a counter.  The head-sep code prohibits any disk
    468  * from getting more than headSepLimit counter values ahead of any
    469  * other.
    470  *
    471  * We assume here that the number of floating recon buffers is already
    472  * set.  There are r stripes to be reconstructed in each table, and so
    473  * if we have a total of B buffers, we can have at most B/r tables
    474  * under recon at any one time.  In each table, lambda units are required
    475  * from each disk, so given B buffers, the head sep limit has to be
    476  * (lambda*B)/r units.  We subtract one to avoid weird boundary cases.
    477  *
    478  * for example, suppose were given 50 buffers, r=19, and lambda=4 as in
    479  * the 20.5 design.  There are 19 stripes/table to be reconstructed, so
    480  * we can have 50/19 tables concurrently under reconstruction, which means
    481  * we can allow the fastest disk to get 50/19 tables ahead of the slower
    482  * disk.  There are lambda "required units" for each disk, so the fastest
    483  * disk can get 4*50/19 = 10 counter values ahead of the slowest.
    484  *
    485  * If numBufsToAccumulate is not 1, we need to limit the head sep further
    486  * because multiple bufs will be required for each stripe under recon.
    487  */
    488 RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(
    489   RF_Raid_t  *raidPtr)
    490 {
    491   RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
    492 
    493   return(info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate);
    494 }
    495 
    496 /* returns the default number of recon buffers to use.  The value
    497  * is somewhat arbitrary...it's intended to be large enough to allow
    498  * for a reasonably large head-sep limit, but small enough that you
    499  * don't use up all your system memory with buffers.
    500  */
    501 int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr)
    502 {
    503   return(100 * rf_numBufsToAccumulate);
    504 }
    505 
    506 /* sectors in the last fulltable of the array need to be handled
    507  * specially since this fulltable can be incomplete.  this function
    508  * changes the values of certain params to handle this.
    509  *
    510  * the idea here is that MapSector et. al. figure out which disk the
    511  * addressed unit lives on by computing the modulos of the unit number
    512  * with the number of units per fulltable, table, etc.  In the last
    513  * fulltable, there are fewer units per fulltable, so we need to adjust
    514  * the number of user data units per fulltable to reflect this.
    515  *
    516  * so, we (1) convert the fulltable size and depth parameters to
    517  * the size of the partial fulltable at the end, (2) compute the
    518  * disk sector offset where this fulltable starts, and (3) convert
    519  * the users stripe unit number from an offset into the array to
    520  * an offset into the last fulltable.
    521  */
    522 void rf_decluster_adjust_params(
    523   RF_RaidLayout_t   *layoutPtr,
    524   RF_StripeNum_t    *SUID,
    525   RF_StripeCount_t  *sus_per_fulltable,
    526   RF_StripeCount_t  *fulltable_depth,
    527   RF_StripeNum_t    *base_suid)
    528 {
    529     RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    530 #if defined(__NetBSD__) && defined(_KERNEL)
    531     /* Nothing! */
    532 #else
    533     char pc = layoutPtr->map->parityConfig;
    534 #endif
    535 
    536     if (*SUID >= info->FullTableLimitSUID) {
    537 	/* new full table size is size of last full table on disk */
    538 	*sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable;
    539 
    540 	/* new full table depth is corresponding depth */
    541 	*fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
    542 
    543 	/* set up the new base offset */
    544 	*base_suid = info->DiskOffsetOfLastFullTableInSUs;
    545 
    546 	/* convert users array address to an offset into the last fulltable */
    547 	*SUID -= info->FullTableLimitSUID;
    548     }
    549 }
    550 
    551 /*
    552  * map a stripe ID to a parity stripe ID.
    553  * See comment above RaidAddressToParityStripeID in layout.c.
    554  */
    555 void rf_MapSIDToPSIDDeclustered(
    556   RF_RaidLayout_t    *layoutPtr,
    557   RF_StripeNum_t      stripeID,
    558   RF_StripeNum_t     *psID,
    559   RF_ReconUnitNum_t  *which_ru)
    560 {
    561     RF_DeclusteredConfigInfo_t *info;
    562 
    563     info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    564 
    565     *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable))
    566         * info->BlocksPerTable + (stripeID % info->BlocksPerTable);
    567     *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU))
    568         / info->BlocksPerTable;
    569     RF_ASSERT( (*which_ru) < layoutPtr->SUsPerPU/layoutPtr->SUsPerRU);
    570 }
    571 
    572 /*
    573  * Called from MapSector and MapParity to retarget an access at the spare unit.
    574  * Modifies the "col" and "outSU" parameters only.
    575  */
    576 void rf_remap_to_spare_space(
    577   RF_RaidLayout_t             *layoutPtr,
    578   RF_DeclusteredConfigInfo_t  *info,
    579   RF_RowCol_t                  row,
    580   RF_StripeNum_t               FullTableID,
    581   RF_StripeNum_t               TableID,
    582   RF_SectorNum_t               BlockID,
    583   RF_StripeNum_t               base_suid,
    584   RF_StripeNum_t               SpareRegion,
    585   RF_RowCol_t                 *outCol,
    586   RF_StripeNum_t              *outSU)
    587 {
    588     RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset, which_ft;
    589 
    590     /*
    591      * note that FullTableID and hence SpareRegion may have gotten
    592      * tweaked by rf_decluster_adjust_params. We detect this by
    593      * noticing that base_suid is not 0.
    594      */
    595     if (base_suid == 0) {
    596       ftID = FullTableID;
    597     }
    598     else {
    599       /*
    600        * There may be > 1.0 full tables in the last (i.e. partial)
    601        * spare region.  find out which of these we're in.
    602        */
    603       lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs;
    604       which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU);
    605 
    606       /* compute the actual full table ID */
    607       ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft;
    608       SpareRegion = info->NumCompleteSRs;
    609     }
    610     TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion;
    611 
    612     *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk;
    613     RF_ASSERT( *outCol != -1);
    614 
    615     spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ?
    616 	    info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU :
    617 	    (SpareRegion+1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs;
    618     *outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs;
    619     if (*outSU >= layoutPtr->stripeUnitsPerDisk) {
    620 	printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n",(long)*outSU);
    621     }
    622 }
    623 
    624 int rf_InstallSpareTable(
    625   RF_Raid_t    *raidPtr,
    626   RF_RowCol_t   frow,
    627   RF_RowCol_t   fcol)
    628 {
    629   RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
    630   RF_SparetWait_t *req;
    631   int retcode;
    632 
    633   RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *));
    634   req->C                             = raidPtr->numCol;
    635   req->G                             = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol;
    636   req->fcol                          = fcol;
    637   req->SUsPerPU                      = raidPtr->Layout.SUsPerPU;
    638   req->TablesPerSpareRegion          = info->TablesPerSpareRegion;
    639   req->BlocksPerTable                = info->BlocksPerTable;
    640   req->TableDepthInPUs               = info->TableDepthInPUs;
    641   req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs;
    642 
    643   retcode = rf_GetSpareTableFromDaemon(req);
    644   RF_ASSERT(!retcode);                                     /* XXX -- fix this to recover gracefully -- XXX */
    645   return(retcode);
    646 }
    647 
    648 /*
    649  * Invoked via ioctl to install a spare table in the kernel.
    650  */
    651 int rf_SetSpareTable(raidPtr, data)
    652   RF_Raid_t  *raidPtr;
    653   void       *data;
    654 {
    655   RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
    656   RF_SpareTableEntry_t **ptrs;
    657   int i, retcode;
    658 
    659   /* what we need to copyin is a 2-d array, so first copyin the user pointers to the rows in the table */
    660   RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
    661   retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
    662 
    663   if (retcode) return(retcode);
    664 
    665   /* now allocate kernel space for the row pointers */
    666   RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
    667 
    668   /* now allocate kernel space for each row in the table, and copy it in from user space */
    669   for (i=0; i<info->TablesPerSpareRegion; i++) {
    670     RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *));
    671     retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
    672     if (retcode) {
    673       info->SpareTable = NULL;             /* blow off the memory we've allocated */
    674       return(retcode);
    675     }
    676   }
    677 
    678   /* free up the temporary array we used */
    679   RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
    680 
    681   return(0);
    682 }
    683 
    684 RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(raidPtr)
    685   RF_Raid_t *raidPtr;
    686 {
    687   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
    688 
    689   return( ((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk );
    690 }
    691 
    692 
    693 void rf_FreeSpareTable(raidPtr)
    694   RF_Raid_t  *raidPtr;
    695 {
    696   long i;
    697   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
    698   RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    699   RF_SpareTableEntry_t **table = info->SpareTable;
    700 
    701   for (i=0; i<info->TablesPerSpareRegion; i++) {RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));}
    702   RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
    703   info->SpareTable = (RF_SpareTableEntry_t **) NULL;
    704 }
    705