Home | History | Annotate | Line # | Download | only in raidframe
rf_decluster.c revision 1.1
      1  1.1  oster /*	$NetBSD: rf_decluster.c,v 1.1 1998/11/13 04:20:28 oster Exp $	*/
      2  1.1  oster /*
      3  1.1  oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4  1.1  oster  * All rights reserved.
      5  1.1  oster  *
      6  1.1  oster  * Author: Mark Holland
      7  1.1  oster  *
      8  1.1  oster  * Permission to use, copy, modify and distribute this software and
      9  1.1  oster  * its documentation is hereby granted, provided that both the copyright
     10  1.1  oster  * notice and this permission notice appear in all copies of the
     11  1.1  oster  * software, derivative works or modified versions, and any portions
     12  1.1  oster  * thereof, and that both notices appear in supporting documentation.
     13  1.1  oster  *
     14  1.1  oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  1.1  oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  1.1  oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  1.1  oster  *
     18  1.1  oster  * Carnegie Mellon requests users of this software to return to
     19  1.1  oster  *
     20  1.1  oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  1.1  oster  *  School of Computer Science
     22  1.1  oster  *  Carnegie Mellon University
     23  1.1  oster  *  Pittsburgh PA 15213-3890
     24  1.1  oster  *
     25  1.1  oster  * any improvements or extensions that they make and grant Carnegie the
     26  1.1  oster  * rights to redistribute these changes.
     27  1.1  oster  */
     28  1.1  oster 
     29  1.1  oster /*----------------------------------------------------------------------
     30  1.1  oster  *
     31  1.1  oster  * rf_decluster.c -- code related to the declustered layout
     32  1.1  oster  *
     33  1.1  oster  * Created 10-21-92 (MCH)
     34  1.1  oster  *
     35  1.1  oster  * Nov 93:  adding support for distributed sparing.  This code is a little
     36  1.1  oster  *          complex:  the basic layout used is as follows:
     37  1.1  oster  *          let F = (v-1)/GCD(r,v-1).  The spare space for each set of
     38  1.1  oster  *          F consecutive fulltables is grouped together and placed after
     39  1.1  oster  *          that set of tables.
     40  1.1  oster  *                   +------------------------------+
     41  1.1  oster  *                   |        F fulltables          |
     42  1.1  oster  *                   |        Spare Space           |
     43  1.1  oster  *                   |        F fulltables          |
     44  1.1  oster  *                   |        Spare Space           |
     45  1.1  oster  *                   |            ...               |
     46  1.1  oster  *                   +------------------------------+
     47  1.1  oster  *
     48  1.1  oster  *--------------------------------------------------------------------*/
     49  1.1  oster 
     50  1.1  oster /*
     51  1.1  oster  * :
     52  1.1  oster  * Log: rf_decluster.c,v
     53  1.1  oster  * Revision 1.51  1996/08/21 19:47:10  jimz
     54  1.1  oster  * fix bogus return values from config
     55  1.1  oster  *
     56  1.1  oster  * Revision 1.50  1996/08/20  22:41:42  jimz
     57  1.1  oster  * better diagnostics for bad blockdesigns
     58  1.1  oster  *
     59  1.1  oster  * Revision 1.49  1996/07/31  16:56:18  jimz
     60  1.1  oster  * dataBytesPerStripe, sectorsPerDisk init arch-indep.
     61  1.1  oster  *
     62  1.1  oster  * Revision 1.48  1996/07/29  14:05:12  jimz
     63  1.1  oster  * fix numPUs/numRUs confusion (everything is now numRUs)
     64  1.1  oster  * clean up some commenting, return values
     65  1.1  oster  *
     66  1.1  oster  * Revision 1.47  1996/07/27  23:36:08  jimz
     67  1.1  oster  * Solaris port of simulator
     68  1.1  oster  *
     69  1.1  oster  * Revision 1.46  1996/07/27  18:40:11  jimz
     70  1.1  oster  * cleanup sweep
     71  1.1  oster  *
     72  1.1  oster  * Revision 1.45  1996/07/18  22:57:14  jimz
     73  1.1  oster  * port simulator to AIX
     74  1.1  oster  *
     75  1.1  oster  * Revision 1.44  1996/07/13  00:00:59  jimz
     76  1.1  oster  * sanitized generalized reconstruction architecture
     77  1.1  oster  * cleaned up head sep, rbuf problems
     78  1.1  oster  *
     79  1.1  oster  * Revision 1.43  1996/06/19  17:53:48  jimz
     80  1.1  oster  * move GetNumSparePUs, InstallSpareTable ops into layout switch
     81  1.1  oster  *
     82  1.1  oster  * Revision 1.42  1996/06/17  03:23:48  jimz
     83  1.1  oster  * switch DeclusteredDS typing
     84  1.1  oster  *
     85  1.1  oster  * Revision 1.41  1996/06/11  08:55:15  jimz
     86  1.1  oster  * improved error-checking at configuration time
     87  1.1  oster  *
     88  1.1  oster  * Revision 1.40  1996/06/10  11:55:47  jimz
     89  1.1  oster  * Straightened out some per-array/not-per-array distinctions, fixed
     90  1.1  oster  * a couple bugs related to confusion. Added shutdown lists. Removed
     91  1.1  oster  * layout shutdown function (now subsumed by shutdown lists).
     92  1.1  oster  *
     93  1.1  oster  * Revision 1.39  1996/06/09  02:36:46  jimz
     94  1.1  oster  * lots of little crufty cleanup- fixup whitespace
     95  1.1  oster  * issues, comment #ifdefs, improve typing in some
     96  1.1  oster  * places (esp size-related)
     97  1.1  oster  *
     98  1.1  oster  * Revision 1.38  1996/06/07  22:26:27  jimz
     99  1.1  oster  * type-ify which_ru (RF_ReconUnitNum_t)
    100  1.1  oster  *
    101  1.1  oster  * Revision 1.37  1996/06/07  21:33:04  jimz
    102  1.1  oster  * begin using consistent types for sector numbers,
    103  1.1  oster  * stripe numbers, row+col numbers, recon unit numbers
    104  1.1  oster  *
    105  1.1  oster  * Revision 1.36  1996/06/03  23:28:26  jimz
    106  1.1  oster  * more bugfixes
    107  1.1  oster  * check in tree to sync for IPDS runs with current bugfixes
    108  1.1  oster  * there still may be a problem with threads in the script test
    109  1.1  oster  * getting I/Os stuck- not trivially reproducible (runs ~50 times
    110  1.1  oster  * in a row without getting stuck)
    111  1.1  oster  *
    112  1.1  oster  * Revision 1.35  1996/06/02  17:31:48  jimz
    113  1.1  oster  * Moved a lot of global stuff into array structure, where it belongs.
    114  1.1  oster  * Fixed up paritylogging, pss modules in this manner. Some general
    115  1.1  oster  * code cleanup. Removed lots of dead code, some dead files.
    116  1.1  oster  *
    117  1.1  oster  * Revision 1.34  1996/05/30  23:22:16  jimz
    118  1.1  oster  * bugfixes of serialization, timing problems
    119  1.1  oster  * more cleanup
    120  1.1  oster  *
    121  1.1  oster  * Revision 1.33  1996/05/30  11:29:41  jimz
    122  1.1  oster  * Numerous bug fixes. Stripe lock release code disagreed with the taking code
    123  1.1  oster  * about when stripes should be locked (I made it consistent: no parity, no lock)
    124  1.1  oster  * There was a lot of extra serialization of I/Os which I've removed- a lot of
    125  1.1  oster  * it was to calculate values for the cache code, which is no longer with us.
    126  1.1  oster  * More types, function, macro cleanup. Added code to properly quiesce the array
    127  1.1  oster  * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
    128  1.1  oster  * before. Fixed memory allocation, freeing bugs.
    129  1.1  oster  *
    130  1.1  oster  * Revision 1.32  1996/05/27  18:56:37  jimz
    131  1.1  oster  * more code cleanup
    132  1.1  oster  * better typing
    133  1.1  oster  * compiles in all 3 environments
    134  1.1  oster  *
    135  1.1  oster  * Revision 1.31  1996/05/24  01:59:45  jimz
    136  1.1  oster  * another checkpoint in code cleanup for release
    137  1.1  oster  * time to sync kernel tree
    138  1.1  oster  *
    139  1.1  oster  * Revision 1.30  1996/05/23  00:33:23  jimz
    140  1.1  oster  * code cleanup: move all debug decls to rf_options.c, all extern
    141  1.1  oster  * debug decls to rf_options.h, all debug vars preceded by rf_
    142  1.1  oster  *
    143  1.1  oster  * Revision 1.29  1996/05/18  19:51:34  jimz
    144  1.1  oster  * major code cleanup- fix syntax, make some types consistent,
    145  1.1  oster  * add prototypes, clean out dead code, et cetera
    146  1.1  oster  *
    147  1.1  oster  * Revision 1.28  1995/12/12  18:10:06  jimz
    148  1.1  oster  * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
    149  1.1  oster  * fix 80-column brain damage in comments
    150  1.1  oster  *
    151  1.1  oster  * Revision 1.27  1995/12/01  16:00:08  root
    152  1.1  oster  * added copyright info
    153  1.1  oster  *
    154  1.1  oster  * Revision 1.26  1995/11/28  21:35:12  amiri
    155  1.1  oster  * set the RF_BD_DECLUSTERED flag
    156  1.1  oster  *
    157  1.1  oster  * Revision 1.25  1995/11/17  18:56:00  wvcii
    158  1.1  oster  * added prototyping to MapParity
    159  1.1  oster  *
    160  1.1  oster  * Revision 1.24  1995/07/04  22:25:33  holland
    161  1.1  oster  * increased default num bufs
    162  1.1  oster  *
    163  1.1  oster  * Revision 1.23  1995/07/03  20:23:51  holland
    164  1.1  oster  * changed floating recon bufs & head sep yet again
    165  1.1  oster  *
    166  1.1  oster  * Revision 1.22  1995/07/03  18:12:14  holland
    167  1.1  oster  * changed the way the number of floating recon bufs & the head sep
    168  1.1  oster  * limit are set
    169  1.1  oster  *
    170  1.1  oster  * Revision 1.21  1995/07/02  15:07:42  holland
    171  1.1  oster  * bug fixes related to getting distributed sparing numbers
    172  1.1  oster  *
    173  1.1  oster  * Revision 1.20  1995/06/23  13:41:28  robby
    174  1.1  oster  * updeated to prototypes in rf_layout.h
    175  1.1  oster  *
    176  1.1  oster  */
    177  1.1  oster 
    178  1.1  oster #ifdef _KERNEL
    179  1.1  oster #define KERNEL
    180  1.1  oster #endif
    181  1.1  oster 
    182  1.1  oster 
    183  1.1  oster #include "rf_types.h"
    184  1.1  oster #include "rf_raid.h"
    185  1.1  oster #include "rf_raidframe.h"
    186  1.1  oster #include "rf_configure.h"
    187  1.1  oster #include "rf_decluster.h"
    188  1.1  oster #include "rf_debugMem.h"
    189  1.1  oster #include "rf_utils.h"
    190  1.1  oster #include "rf_alloclist.h"
    191  1.1  oster #include "rf_general.h"
    192  1.1  oster #include "rf_shutdown.h"
    193  1.1  oster #include "rf_sys.h"
    194  1.1  oster 
    195  1.1  oster extern int rf_copyback_in_progress;                /* debug only */
    196  1.1  oster 
    197  1.1  oster /* found in rf_kintf.c */
    198  1.1  oster int rf_GetSpareTableFromDaemon(RF_SparetWait_t  *req);
    199  1.1  oster 
    200  1.1  oster /* configuration code */
    201  1.1  oster 
    202  1.1  oster int rf_ConfigureDeclustered(
    203  1.1  oster   RF_ShutdownList_t  **listp,
    204  1.1  oster   RF_Raid_t           *raidPtr,
    205  1.1  oster   RF_Config_t         *cfgPtr)
    206  1.1  oster {
    207  1.1  oster     RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    208  1.1  oster     int b, v, k, r, lambda;				/* block design params */
    209  1.1  oster     int i, j;
    210  1.1  oster     RF_RowCol_t *first_avail_slot;
    211  1.1  oster     RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk;
    212  1.1  oster     RF_DeclusteredConfigInfo_t *info;
    213  1.1  oster     RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk, extraPUsPerDisk;
    214  1.1  oster     RF_StripeCount_t totSparePUsPerDisk;
    215  1.1  oster     RF_SectorNum_t diskOffsetOfLastFullTableInSUs;
    216  1.1  oster     RF_SectorCount_t SpareSpaceInSUs;
    217  1.1  oster     char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
    218  1.1  oster     RF_StripeNum_t l, SUID;
    219  1.1  oster 
    220  1.1  oster     SUID = l = 0;
    221  1.1  oster     numCompleteSpareRegionsPerDisk = 0;
    222  1.1  oster 
    223  1.1  oster     /* 1. create layout specific structure */
    224  1.1  oster     RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
    225  1.1  oster     if (info == NULL)
    226  1.1  oster       return(ENOMEM);
    227  1.1  oster     layoutPtr->layoutSpecificInfo = (void *) info;
    228  1.1  oster     info->SpareTable = NULL;
    229  1.1  oster 
    230  1.1  oster     /* 2. extract parameters from the config structure */
    231  1.1  oster     if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
    232  1.1  oster       (void) bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN);
    233  1.1  oster     }
    234  1.1  oster     cfgBuf += RF_SPAREMAP_NAME_LEN;
    235  1.1  oster 
    236  1.1  oster     b        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    237  1.1  oster     v        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    238  1.1  oster     k        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    239  1.1  oster     r        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    240  1.1  oster     lambda   = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    241  1.1  oster     raidPtr->noRotate = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
    242  1.1  oster 
    243  1.1  oster     /* the sparemaps are generated assuming that parity is rotated, so we issue
    244  1.1  oster      * a warning if both distributed sparing and no-rotate are on at the same time
    245  1.1  oster      */
    246  1.1  oster     if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) {
    247  1.1  oster 	RF_ERRORMSG("Warning:  distributed sparing specified without parity rotation.\n");
    248  1.1  oster     }
    249  1.1  oster 
    250  1.1  oster     if (raidPtr->numCol != v) {
    251  1.1  oster         RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol);
    252  1.1  oster         return(EINVAL);
    253  1.1  oster     }
    254  1.1  oster 
    255  1.1  oster     /* 3.  set up the values used in the mapping code */
    256  1.1  oster     info->BlocksPerTable = b;
    257  1.1  oster     info->Lambda = lambda;
    258  1.1  oster     info->NumParityReps = info->groupSize = k;
    259  1.1  oster     info->SUsPerTable = b * (k-1) * layoutPtr->SUsPerPU;/* b blks, k-1 SUs each */
    260  1.1  oster     info->SUsPerFullTable = k * info->SUsPerTable;	/* rot k times */
    261  1.1  oster     info->PUsPerBlock = k-1;
    262  1.1  oster     info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
    263  1.1  oster     info->TableDepthInPUs = (b*k) / v;
    264  1.1  oster     info->FullTableDepthInPUs = info->TableDepthInPUs * k;		/* k repetitions */
    265  1.1  oster 
    266  1.1  oster     /* used only in distributed sparing case */
    267  1.1  oster     info->FullTablesPerSpareRegion = (v-1) / rf_gcd(r, v-1);		/* (v-1)/gcd fulltables */
    268  1.1  oster     info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
    269  1.1  oster     info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v-1)) * layoutPtr->SUsPerPU;
    270  1.1  oster 
    271  1.1  oster     /* check to make sure the block design is sufficiently small */
    272  1.1  oster     if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
    273  1.1  oster         if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) {
    274  1.1  oster 	    RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n",
    275  1.1  oster 			 (int)info->FullTableDepthInPUs,
    276  1.1  oster 			 (int)info->SpareSpaceDepthPerRegionInSUs,
    277  1.1  oster 			 (int)layoutPtr->stripeUnitsPerDisk);
    278  1.1  oster 	    return(EINVAL);
    279  1.1  oster 	}
    280  1.1  oster     } else {
    281  1.1  oster 	if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) {
    282  1.1  oster 	    RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n",
    283  1.1  oster 			 (int)(info->TableDepthInPUs * layoutPtr->SUsPerPU), \
    284  1.1  oster 			 (int)layoutPtr->stripeUnitsPerDisk);
    285  1.1  oster 	    return(EINVAL);
    286  1.1  oster 	}
    287  1.1  oster     }
    288  1.1  oster 
    289  1.1  oster 
    290  1.1  oster     /* compute the size of each disk, and the number of tables in the last fulltable (which
    291  1.1  oster      * need not be complete)
    292  1.1  oster      */
    293  1.1  oster     if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
    294  1.1  oster 
    295  1.1  oster 	PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU;
    296  1.1  oster 	spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs +
    297  1.1  oster 				 (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v-1));
    298  1.1  oster 	info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU;
    299  1.1  oster 
    300  1.1  oster 	numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs;
    301  1.1  oster 	info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
    302  1.1  oster 	extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
    303  1.1  oster 
    304  1.1  oster 	/* assume conservatively that we need the full amount of spare space in one region in order
    305  1.1  oster 	 * to provide spares for the partial spare region at the end of the array.  We set "i" to
    306  1.1  oster 	 * the number of tables in the partial spare region.  This may actually include some fulltables.
    307  1.1  oster 	 */
    308  1.1  oster 	extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
    309  1.1  oster 	if (extraPUsPerDisk <= 0) i = 0;
    310  1.1  oster 	else i = extraPUsPerDisk/info->TableDepthInPUs;
    311  1.1  oster 
    312  1.1  oster 	complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion/k) + i/k);
    313  1.1  oster         info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
    314  1.1  oster 	info->ExtraTablesPerDisk = i % k;
    315  1.1  oster 
    316  1.1  oster 	/* note that in the last spare region, the spare space is complete even though data/parity space is not */
    317  1.1  oster 	totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk+1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
    318  1.1  oster 	info->TotSparePUsPerDisk = totSparePUsPerDisk;
    319  1.1  oster 
    320  1.1  oster 	layoutPtr->stripeUnitsPerDisk =
    321  1.1  oster 	    ((complete_FT_count/raidPtr->numRow) * info->FullTableDepthInPUs +	 	/* data & parity space */
    322  1.1  oster 	     info->ExtraTablesPerDisk * info->TableDepthInPUs +
    323  1.1  oster 	     totSparePUsPerDisk								/* spare space */
    324  1.1  oster 	    ) * layoutPtr->SUsPerPU;
    325  1.1  oster 	layoutPtr->dataStripeUnitsPerDisk =
    326  1.1  oster 	    (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs)
    327  1.1  oster 	    * layoutPtr->SUsPerPU * (k-1) / k;
    328  1.1  oster 
    329  1.1  oster     } else {
    330  1.1  oster         /* non-dist spare case:  force each disk to contain an integral number of tables */
    331  1.1  oster         layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
    332  1.1  oster         layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
    333  1.1  oster 
    334  1.1  oster 	/* compute the number of tables in the last fulltable, which need not be complete */
    335  1.1  oster         complete_FT_count =
    336  1.1  oster             ((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow;
    337  1.1  oster 
    338  1.1  oster         info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
    339  1.1  oster         info->ExtraTablesPerDisk =
    340  1.1  oster 		((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k;
    341  1.1  oster     }
    342  1.1  oster 
    343  1.1  oster     raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
    344  1.1  oster 
    345  1.1  oster     /* find the disk offset of the stripe unit where the last fulltable starts */
    346  1.1  oster     numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
    347  1.1  oster     diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
    348  1.1  oster     if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
    349  1.1  oster         SpareSpaceInSUs  = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs;
    350  1.1  oster         diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
    351  1.1  oster         info->DiskOffsetOfLastSpareSpaceChunkInSUs =
    352  1.1  oster 	    diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
    353  1.1  oster     }
    354  1.1  oster     info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
    355  1.1  oster     info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
    356  1.1  oster 
    357  1.1  oster     /* 4.  create and initialize the lookup tables */
    358  1.1  oster     info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
    359  1.1  oster     if (info->LayoutTable == NULL)
    360  1.1  oster       return(ENOMEM);
    361  1.1  oster     info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
    362  1.1  oster     if (info->OffsetTable == NULL)
    363  1.1  oster       return(ENOMEM);
    364  1.1  oster     info->BlockTable  =	rf_make_2d_array(info->TableDepthInPUs*layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
    365  1.1  oster     if (info->BlockTable == NULL)
    366  1.1  oster       return(ENOMEM);
    367  1.1  oster 
    368  1.1  oster     first_avail_slot = rf_make_1d_array(v, NULL);
    369  1.1  oster     if (first_avail_slot == NULL)
    370  1.1  oster       return(ENOMEM);
    371  1.1  oster 
    372  1.1  oster     for (i=0; i<b; i++)
    373  1.1  oster       for (j=0; j<k; j++)
    374  1.1  oster         info->LayoutTable[i][j] = *cfgBuf++;
    375  1.1  oster 
    376  1.1  oster     /* initialize offset table */
    377  1.1  oster     for (i=0; i<b; i++) for (j=0; j<k; j++) {
    378  1.1  oster         info->OffsetTable[i][j] = first_avail_slot[ info->LayoutTable[i][j] ];
    379  1.1  oster         first_avail_slot[ info->LayoutTable[i][j] ]++;
    380  1.1  oster     }
    381  1.1  oster 
    382  1.1  oster     /* initialize block table */
    383  1.1  oster     for (SUID=l=0; l<layoutPtr->SUsPerPU; l++) {
    384  1.1  oster         for (i=0; i<b; i++) {
    385  1.1  oster             for (j=0; j<k; j++) {
    386  1.1  oster                 info->BlockTable[ (info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l ]
    387  1.1  oster 		                [ info->LayoutTable[i][j] ] = SUID;
    388  1.1  oster             }
    389  1.1  oster             SUID++;
    390  1.1  oster         }
    391  1.1  oster     }
    392  1.1  oster 
    393  1.1  oster     rf_free_1d_array(first_avail_slot, v);
    394  1.1  oster 
    395  1.1  oster     /* 5.  set up the remaining redundant-but-useful parameters */
    396  1.1  oster 
    397  1.1  oster     raidPtr->totalSectors = (k*complete_FT_count + raidPtr->numRow*info->ExtraTablesPerDisk) *
    398  1.1  oster     			  info->SUsPerTable * layoutPtr->sectorsPerStripeUnit;
    399  1.1  oster     layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k-1);
    400  1.1  oster 
    401  1.1  oster     /* strange evaluation order below to try and minimize overflow problems */
    402  1.1  oster 
    403  1.1  oster     layoutPtr->dataSectorsPerStripe = (k-1) * layoutPtr->sectorsPerStripeUnit;
    404  1.1  oster     layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
    405  1.1  oster     layoutPtr->numDataCol = k-1;
    406  1.1  oster     layoutPtr->numParityCol = 1;
    407  1.1  oster 
    408  1.1  oster     return(0);
    409  1.1  oster }
    410  1.1  oster 
    411  1.1  oster /* declustering with distributed sparing */
    412  1.1  oster static void rf_ShutdownDeclusteredDS(RF_ThreadArg_t);
    413  1.1  oster static void rf_ShutdownDeclusteredDS(arg)
    414  1.1  oster   RF_ThreadArg_t  arg;
    415  1.1  oster {
    416  1.1  oster   RF_DeclusteredConfigInfo_t *info;
    417  1.1  oster   RF_Raid_t *raidPtr;
    418  1.1  oster 
    419  1.1  oster   raidPtr = (RF_Raid_t *)arg;
    420  1.1  oster   info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
    421  1.1  oster   if (info->SpareTable)
    422  1.1  oster     rf_FreeSpareTable(raidPtr);
    423  1.1  oster }
    424  1.1  oster 
    425  1.1  oster int rf_ConfigureDeclusteredDS(
    426  1.1  oster   RF_ShutdownList_t  **listp,
    427  1.1  oster   RF_Raid_t           *raidPtr,
    428  1.1  oster   RF_Config_t         *cfgPtr)
    429  1.1  oster {
    430  1.1  oster   int rc;
    431  1.1  oster 
    432  1.1  oster   rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr);
    433  1.1  oster   if (rc)
    434  1.1  oster     return(rc);
    435  1.1  oster   rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr);
    436  1.1  oster   if (rc) {
    437  1.1  oster     RF_ERRORMSG1("Got %d adding shutdown event for DeclusteredDS\n", rc);
    438  1.1  oster     rf_ShutdownDeclusteredDS(raidPtr);
    439  1.1  oster     return(rc);
    440  1.1  oster   }
    441  1.1  oster   return(0);
    442  1.1  oster }
    443  1.1  oster 
    444  1.1  oster void rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap)
    445  1.1  oster   RF_Raid_t       *raidPtr;
    446  1.1  oster   RF_RaidAddr_t    raidSector;
    447  1.1  oster   RF_RowCol_t     *row;
    448  1.1  oster   RF_RowCol_t     *col;
    449  1.1  oster   RF_SectorNum_t  *diskSector;
    450  1.1  oster   int              remap;
    451  1.1  oster {
    452  1.1  oster     RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    453  1.1  oster     RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    454  1.1  oster     RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
    455  1.1  oster     RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
    456  1.1  oster     RF_StripeNum_t BlockID, BlockOffset, RepIndex;
    457  1.1  oster     RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
    458  1.1  oster     RF_StripeCount_t fulltable_depth  = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
    459  1.1  oster     RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
    460  1.1  oster 
    461  1.1  oster     rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
    462  1.1  oster 
    463  1.1  oster     FullTableID     = SUID / sus_per_fulltable;		/* fulltable ID within array (across rows) */
    464  1.1  oster     if (raidPtr->numRow == 1) *row = 0;                 /* avoid a mod and a div in the common case */
    465  1.1  oster     else {
    466  1.1  oster       *row            = FullTableID % raidPtr->numRow;
    467  1.1  oster       FullTableID    /= raidPtr->numRow;			/* convert to fulltable ID on this disk */
    468  1.1  oster     }
    469  1.1  oster     if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
    470  1.1  oster 	SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
    471  1.1  oster         SpareSpace  = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
    472  1.1  oster     }
    473  1.1  oster     FullTableOffset = SUID % sus_per_fulltable;
    474  1.1  oster     TableID         = FullTableOffset / info->SUsPerTable;
    475  1.1  oster     TableOffset     = FullTableOffset - TableID * info->SUsPerTable;
    476  1.1  oster     BlockID         = TableOffset / info->PUsPerBlock;
    477  1.1  oster     BlockOffset     = TableOffset - BlockID * info->PUsPerBlock;
    478  1.1  oster     BlockID        %= info->BlocksPerTable;
    479  1.1  oster     RepIndex        = info->PUsPerBlock - TableID;
    480  1.1  oster     if (!raidPtr->noRotate) BlockOffset    += ((BlockOffset >= RepIndex) ? 1 : 0);
    481  1.1  oster     *col            = info->LayoutTable[BlockID][BlockOffset];
    482  1.1  oster 
    483  1.1  oster     /* remap to distributed spare space if indicated */
    484  1.1  oster     if (remap) {
    485  1.1  oster       RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
    486  1.1  oster 	     (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
    487  1.1  oster       rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
    488  1.1  oster     } else {
    489  1.1  oster 
    490  1.1  oster         outSU	    = base_suid;
    491  1.1  oster         outSU      += FullTableID * fulltable_depth;  				        /* offs to strt of FT */
    492  1.1  oster         outSU	   += SpareSpace;						        /* skip rsvd spare space */
    493  1.1  oster         outSU      += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;   	        /* offs to strt of tble */
    494  1.1  oster         outSU      += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU;	/* offs to the PU */
    495  1.1  oster     }
    496  1.1  oster     outSU          += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);	        /* offs to the SU within a PU */
    497  1.1  oster 
    498  1.1  oster     /* convert SUs to sectors, and, if not aligned to SU boundary, add in offset to sector.  */
    499  1.1  oster     *diskSector     = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
    500  1.1  oster 
    501  1.1  oster     RF_ASSERT( *col != -1 );
    502  1.1  oster }
    503  1.1  oster 
    504  1.1  oster 
    505  1.1  oster /* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */
    506  1.1  oster void rf_MapParityDeclustered(
    507  1.1  oster   RF_Raid_t       *raidPtr,
    508  1.1  oster   RF_RaidAddr_t    raidSector,
    509  1.1  oster   RF_RowCol_t     *row,
    510  1.1  oster   RF_RowCol_t     *col,
    511  1.1  oster   RF_SectorNum_t  *diskSector,
    512  1.1  oster   int              remap)
    513  1.1  oster {
    514  1.1  oster     RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    515  1.1  oster     RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    516  1.1  oster     RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
    517  1.1  oster     RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
    518  1.1  oster     RF_StripeNum_t BlockID, BlockOffset, RepIndex;
    519  1.1  oster     RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
    520  1.1  oster     RF_StripeCount_t fulltable_depth  = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
    521  1.1  oster     RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
    522  1.1  oster 
    523  1.1  oster     rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
    524  1.1  oster 
    525  1.1  oster     /* compute row & (possibly) spare space exactly as before */
    526  1.1  oster     FullTableID     = SUID / sus_per_fulltable;
    527  1.1  oster     if (raidPtr->numRow == 1) *row = 0;                         /* avoid a mod and a div in the common case */
    528  1.1  oster     else {
    529  1.1  oster       *row            = FullTableID % raidPtr->numRow;
    530  1.1  oster       FullTableID    /= raidPtr->numRow;			/* convert to fulltable ID on this disk */
    531  1.1  oster     }
    532  1.1  oster     if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
    533  1.1  oster 	SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
    534  1.1  oster         SpareSpace  = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
    535  1.1  oster     }
    536  1.1  oster 
    537  1.1  oster     /* compute BlockID and RepIndex exactly as before */
    538  1.1  oster     FullTableOffset = SUID % sus_per_fulltable;
    539  1.1  oster     TableID         = FullTableOffset / info->SUsPerTable;
    540  1.1  oster     TableOffset     = FullTableOffset - TableID * info->SUsPerTable;
    541  1.1  oster     /*TableOffset     = FullTableOffset % info->SUsPerTable;*/
    542  1.1  oster     /*BlockID         = (TableOffset / info->PUsPerBlock) % info->BlocksPerTable;*/
    543  1.1  oster     BlockID         = TableOffset / info->PUsPerBlock;
    544  1.1  oster     /*BlockOffset     = TableOffset % info->PUsPerBlock;*/
    545  1.1  oster     BlockOffset     = TableOffset - BlockID * info->PUsPerBlock;
    546  1.1  oster     BlockID        %= info->BlocksPerTable;
    547  1.1  oster 
    548  1.1  oster     /* the parity block is in the position indicated by RepIndex */
    549  1.1  oster     RepIndex        = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID;
    550  1.1  oster     *col	    = info->LayoutTable[BlockID][RepIndex];
    551  1.1  oster 
    552  1.1  oster     if (remap) {
    553  1.1  oster       RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
    554  1.1  oster 	     (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
    555  1.1  oster       rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
    556  1.1  oster     } else {
    557  1.1  oster 
    558  1.1  oster         /* compute sector as before, except use RepIndex instead of BlockOffset */
    559  1.1  oster         outSU        = base_suid;
    560  1.1  oster         outSU       += FullTableID * fulltable_depth;
    561  1.1  oster         outSU	    += SpareSpace;						/* skip rsvd spare space */
    562  1.1  oster         outSU       += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
    563  1.1  oster         outSU       += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU;
    564  1.1  oster     }
    565  1.1  oster 
    566  1.1  oster     outSU       += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
    567  1.1  oster     *diskSector  = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
    568  1.1  oster 
    569  1.1  oster     RF_ASSERT( *col != -1 );
    570  1.1  oster }
    571  1.1  oster 
    572  1.1  oster /* returns an array of ints identifying the disks that comprise the stripe containing the indicated address.
    573  1.1  oster  * the caller must _never_ attempt to modify this array.
    574  1.1  oster  */
    575  1.1  oster void rf_IdentifyStripeDeclustered(
    576  1.1  oster   RF_Raid_t        *raidPtr,
    577  1.1  oster   RF_RaidAddr_t     addr,
    578  1.1  oster   RF_RowCol_t     **diskids,
    579  1.1  oster   RF_RowCol_t      *outRow)
    580  1.1  oster {
    581  1.1  oster   RF_RaidLayout_t *layoutPtr           = &(raidPtr->Layout);
    582  1.1  oster   RF_DeclusteredConfigInfo_t *info     = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    583  1.1  oster   RF_StripeCount_t sus_per_fulltable   = info->SUsPerFullTable;
    584  1.1  oster   RF_StripeCount_t fulltable_depth     = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
    585  1.1  oster   RF_StripeNum_t  base_suid            = 0;
    586  1.1  oster   RF_StripeNum_t SUID                  = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
    587  1.1  oster   RF_StripeNum_t stripeID, FullTableID;
    588  1.1  oster   int tableOffset;
    589  1.1  oster 
    590  1.1  oster   rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
    591  1.1  oster   FullTableID     = SUID / sus_per_fulltable;		/* fulltable ID within array (across rows) */
    592  1.1  oster   *outRow         = FullTableID % raidPtr->numRow;
    593  1.1  oster   stripeID        = rf_StripeUnitIDToStripeID(layoutPtr, SUID);                     /* find stripe offset into array */
    594  1.1  oster   tableOffset     = (stripeID % info->BlocksPerTable);                        /* find offset into block design table */
    595  1.1  oster   *diskids        = info->LayoutTable[tableOffset];
    596  1.1  oster }
    597  1.1  oster 
    598  1.1  oster /* This returns the default head-separation limit, which is measured
    599  1.1  oster  * in "required units for reconstruction".  Each time a disk fetches
    600  1.1  oster  * a unit, it bumps a counter.  The head-sep code prohibits any disk
    601  1.1  oster  * from getting more than headSepLimit counter values ahead of any
    602  1.1  oster  * other.
    603  1.1  oster  *
    604  1.1  oster  * We assume here that the number of floating recon buffers is already
    605  1.1  oster  * set.  There are r stripes to be reconstructed in each table, and so
    606  1.1  oster  * if we have a total of B buffers, we can have at most B/r tables
    607  1.1  oster  * under recon at any one time.  In each table, lambda units are required
    608  1.1  oster  * from each disk, so given B buffers, the head sep limit has to be
    609  1.1  oster  * (lambda*B)/r units.  We subtract one to avoid weird boundary cases.
    610  1.1  oster  *
    611  1.1  oster  * for example, suppose were given 50 buffers, r=19, and lambda=4 as in
    612  1.1  oster  * the 20.5 design.  There are 19 stripes/table to be reconstructed, so
    613  1.1  oster  * we can have 50/19 tables concurrently under reconstruction, which means
    614  1.1  oster  * we can allow the fastest disk to get 50/19 tables ahead of the slower
    615  1.1  oster  * disk.  There are lambda "required units" for each disk, so the fastest
    616  1.1  oster  * disk can get 4*50/19 = 10 counter values ahead of the slowest.
    617  1.1  oster  *
    618  1.1  oster  * If numBufsToAccumulate is not 1, we need to limit the head sep further
    619  1.1  oster  * because multiple bufs will be required for each stripe under recon.
    620  1.1  oster  */
    621  1.1  oster RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(
    622  1.1  oster   RF_Raid_t  *raidPtr)
    623  1.1  oster {
    624  1.1  oster   RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
    625  1.1  oster 
    626  1.1  oster   return(info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate);
    627  1.1  oster }
    628  1.1  oster 
    629  1.1  oster /* returns the default number of recon buffers to use.  The value
    630  1.1  oster  * is somewhat arbitrary...it's intended to be large enough to allow
    631  1.1  oster  * for a reasonably large head-sep limit, but small enough that you
    632  1.1  oster  * don't use up all your system memory with buffers.
    633  1.1  oster  */
    634  1.1  oster int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr)
    635  1.1  oster {
    636  1.1  oster   return(100 * rf_numBufsToAccumulate);
    637  1.1  oster }
    638  1.1  oster 
    639  1.1  oster /* sectors in the last fulltable of the array need to be handled
    640  1.1  oster  * specially since this fulltable can be incomplete.  this function
    641  1.1  oster  * changes the values of certain params to handle this.
    642  1.1  oster  *
    643  1.1  oster  * the idea here is that MapSector et. al. figure out which disk the
    644  1.1  oster  * addressed unit lives on by computing the modulos of the unit number
    645  1.1  oster  * with the number of units per fulltable, table, etc.  In the last
    646  1.1  oster  * fulltable, there are fewer units per fulltable, so we need to adjust
    647  1.1  oster  * the number of user data units per fulltable to reflect this.
    648  1.1  oster  *
    649  1.1  oster  * so, we (1) convert the fulltable size and depth parameters to
    650  1.1  oster  * the size of the partial fulltable at the end, (2) compute the
    651  1.1  oster  * disk sector offset where this fulltable starts, and (3) convert
    652  1.1  oster  * the users stripe unit number from an offset into the array to
    653  1.1  oster  * an offset into the last fulltable.
    654  1.1  oster  */
    655  1.1  oster void rf_decluster_adjust_params(
    656  1.1  oster   RF_RaidLayout_t   *layoutPtr,
    657  1.1  oster   RF_StripeNum_t    *SUID,
    658  1.1  oster   RF_StripeCount_t  *sus_per_fulltable,
    659  1.1  oster   RF_StripeCount_t  *fulltable_depth,
    660  1.1  oster   RF_StripeNum_t    *base_suid)
    661  1.1  oster {
    662  1.1  oster     RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    663  1.1  oster #if defined(__NetBSD__) && defined(_KERNEL)
    664  1.1  oster     /* Nothing! */
    665  1.1  oster #else
    666  1.1  oster     char pc = layoutPtr->map->parityConfig;
    667  1.1  oster #endif
    668  1.1  oster 
    669  1.1  oster     if (*SUID >= info->FullTableLimitSUID) {
    670  1.1  oster 	/* new full table size is size of last full table on disk */
    671  1.1  oster 	*sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable;
    672  1.1  oster 
    673  1.1  oster 	/* new full table depth is corresponding depth */
    674  1.1  oster 	*fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
    675  1.1  oster 
    676  1.1  oster 	/* set up the new base offset */
    677  1.1  oster 	*base_suid = info->DiskOffsetOfLastFullTableInSUs;
    678  1.1  oster 
    679  1.1  oster 	/* convert users array address to an offset into the last fulltable */
    680  1.1  oster 	*SUID -= info->FullTableLimitSUID;
    681  1.1  oster     }
    682  1.1  oster }
    683  1.1  oster 
    684  1.1  oster /*
    685  1.1  oster  * map a stripe ID to a parity stripe ID.
    686  1.1  oster  * See comment above RaidAddressToParityStripeID in layout.c.
    687  1.1  oster  */
    688  1.1  oster void rf_MapSIDToPSIDDeclustered(
    689  1.1  oster   RF_RaidLayout_t    *layoutPtr,
    690  1.1  oster   RF_StripeNum_t      stripeID,
    691  1.1  oster   RF_StripeNum_t     *psID,
    692  1.1  oster   RF_ReconUnitNum_t  *which_ru)
    693  1.1  oster {
    694  1.1  oster     RF_DeclusteredConfigInfo_t *info;
    695  1.1  oster 
    696  1.1  oster     info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    697  1.1  oster 
    698  1.1  oster     *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable))
    699  1.1  oster         * info->BlocksPerTable + (stripeID % info->BlocksPerTable);
    700  1.1  oster     *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU))
    701  1.1  oster         / info->BlocksPerTable;
    702  1.1  oster     RF_ASSERT( (*which_ru) < layoutPtr->SUsPerPU/layoutPtr->SUsPerRU);
    703  1.1  oster }
    704  1.1  oster 
    705  1.1  oster /*
    706  1.1  oster  * Called from MapSector and MapParity to retarget an access at the spare unit.
    707  1.1  oster  * Modifies the "col" and "outSU" parameters only.
    708  1.1  oster  */
    709  1.1  oster void rf_remap_to_spare_space(
    710  1.1  oster   RF_RaidLayout_t             *layoutPtr,
    711  1.1  oster   RF_DeclusteredConfigInfo_t  *info,
    712  1.1  oster   RF_RowCol_t                  row,
    713  1.1  oster   RF_StripeNum_t               FullTableID,
    714  1.1  oster   RF_StripeNum_t               TableID,
    715  1.1  oster   RF_SectorNum_t               BlockID,
    716  1.1  oster   RF_StripeNum_t               base_suid,
    717  1.1  oster   RF_StripeNum_t               SpareRegion,
    718  1.1  oster   RF_RowCol_t                 *outCol,
    719  1.1  oster   RF_StripeNum_t              *outSU)
    720  1.1  oster {
    721  1.1  oster     RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset, which_ft;
    722  1.1  oster 
    723  1.1  oster     /*
    724  1.1  oster      * note that FullTableID and hence SpareRegion may have gotten
    725  1.1  oster      * tweaked by rf_decluster_adjust_params. We detect this by
    726  1.1  oster      * noticing that base_suid is not 0.
    727  1.1  oster      */
    728  1.1  oster     if (base_suid == 0) {
    729  1.1  oster       ftID = FullTableID;
    730  1.1  oster     }
    731  1.1  oster     else {
    732  1.1  oster       /*
    733  1.1  oster        * There may be > 1.0 full tables in the last (i.e. partial)
    734  1.1  oster        * spare region.  find out which of these we're in.
    735  1.1  oster        */
    736  1.1  oster       lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs;
    737  1.1  oster       which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU);
    738  1.1  oster 
    739  1.1  oster       /* compute the actual full table ID */
    740  1.1  oster       ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft;
    741  1.1  oster       SpareRegion = info->NumCompleteSRs;
    742  1.1  oster     }
    743  1.1  oster     TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion;
    744  1.1  oster 
    745  1.1  oster     *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk;
    746  1.1  oster     RF_ASSERT( *outCol != -1);
    747  1.1  oster 
    748  1.1  oster     spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ?
    749  1.1  oster 	    info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU :
    750  1.1  oster 	    (SpareRegion+1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs;
    751  1.1  oster     *outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs;
    752  1.1  oster     if (*outSU >= layoutPtr->stripeUnitsPerDisk) {
    753  1.1  oster 	printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n",(long)*outSU);
    754  1.1  oster     }
    755  1.1  oster }
    756  1.1  oster 
    757  1.1  oster int rf_InstallSpareTable(
    758  1.1  oster   RF_Raid_t    *raidPtr,
    759  1.1  oster   RF_RowCol_t   frow,
    760  1.1  oster   RF_RowCol_t   fcol)
    761  1.1  oster {
    762  1.1  oster   RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
    763  1.1  oster   RF_SparetWait_t *req;
    764  1.1  oster   int retcode;
    765  1.1  oster 
    766  1.1  oster   RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *));
    767  1.1  oster   req->C                             = raidPtr->numCol;
    768  1.1  oster   req->G                             = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol;
    769  1.1  oster   req->fcol                          = fcol;
    770  1.1  oster   req->SUsPerPU                      = raidPtr->Layout.SUsPerPU;
    771  1.1  oster   req->TablesPerSpareRegion          = info->TablesPerSpareRegion;
    772  1.1  oster   req->BlocksPerTable                = info->BlocksPerTable;
    773  1.1  oster   req->TableDepthInPUs               = info->TableDepthInPUs;
    774  1.1  oster   req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs;
    775  1.1  oster 
    776  1.1  oster #ifndef KERNEL
    777  1.1  oster   info->SpareTable = rf_ReadSpareTable(req, info->sparemap_fname);
    778  1.1  oster   RF_Free(req, sizeof(*req));
    779  1.1  oster   retcode = (info->SpareTable) ? 0 : 1;
    780  1.1  oster #else /* !KERNEL */
    781  1.1  oster   retcode = rf_GetSpareTableFromDaemon(req);
    782  1.1  oster   RF_ASSERT(!retcode);                                     /* XXX -- fix this to recover gracefully -- XXX */
    783  1.1  oster #endif /* !KERNEL */
    784  1.1  oster 
    785  1.1  oster   return(retcode);
    786  1.1  oster }
    787  1.1  oster 
    788  1.1  oster #ifdef KERNEL
    789  1.1  oster /*
    790  1.1  oster  * Invoked via ioctl to install a spare table in the kernel.
    791  1.1  oster  */
    792  1.1  oster int rf_SetSpareTable(raidPtr, data)
    793  1.1  oster   RF_Raid_t  *raidPtr;
    794  1.1  oster   void       *data;
    795  1.1  oster {
    796  1.1  oster   RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
    797  1.1  oster   RF_SpareTableEntry_t **ptrs;
    798  1.1  oster   int i, retcode;
    799  1.1  oster 
    800  1.1  oster   /* what we need to copyin is a 2-d array, so first copyin the user pointers to the rows in the table */
    801  1.1  oster   RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
    802  1.1  oster   retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
    803  1.1  oster 
    804  1.1  oster   if (retcode) return(retcode);
    805  1.1  oster 
    806  1.1  oster   /* now allocate kernel space for the row pointers */
    807  1.1  oster   RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
    808  1.1  oster 
    809  1.1  oster   /* now allocate kernel space for each row in the table, and copy it in from user space */
    810  1.1  oster   for (i=0; i<info->TablesPerSpareRegion; i++) {
    811  1.1  oster     RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *));
    812  1.1  oster     retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
    813  1.1  oster     if (retcode) {
    814  1.1  oster       info->SpareTable = NULL;             /* blow off the memory we've allocated */
    815  1.1  oster       return(retcode);
    816  1.1  oster     }
    817  1.1  oster   }
    818  1.1  oster 
    819  1.1  oster   /* free up the temporary array we used */
    820  1.1  oster   RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
    821  1.1  oster 
    822  1.1  oster   return(0);
    823  1.1  oster }
    824  1.1  oster #endif /* KERNEL */
    825  1.1  oster 
    826  1.1  oster RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(raidPtr)
    827  1.1  oster   RF_Raid_t *raidPtr;
    828  1.1  oster {
    829  1.1  oster   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
    830  1.1  oster 
    831  1.1  oster   return( ((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk );
    832  1.1  oster }
    833  1.1  oster 
    834  1.1  oster 
    835  1.1  oster void rf_FreeSpareTable(raidPtr)
    836  1.1  oster   RF_Raid_t  *raidPtr;
    837  1.1  oster {
    838  1.1  oster   long i;
    839  1.1  oster   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
    840  1.1  oster   RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
    841  1.1  oster   RF_SpareTableEntry_t **table = info->SpareTable;
    842  1.1  oster 
    843  1.1  oster   for (i=0; i<info->TablesPerSpareRegion; i++) {RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));}
    844  1.1  oster   RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
    845  1.1  oster   info->SpareTable = (RF_SpareTableEntry_t **) NULL;
    846  1.1  oster }
    847