Home | History | Annotate | Line # | Download | only in raidframe
rf_dagutils.c revision 1.2
      1  1.2  oster /*	$NetBSD: rf_dagutils.c,v 1.2 1999/01/26 02:33:54 oster Exp $	*/
      2  1.1  oster /*
      3  1.1  oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4  1.1  oster  * All rights reserved.
      5  1.1  oster  *
      6  1.1  oster  * Authors: Mark Holland, William V. Courtright II, Jim Zelenka
      7  1.1  oster  *
      8  1.1  oster  * Permission to use, copy, modify and distribute this software and
      9  1.1  oster  * its documentation is hereby granted, provided that both the copyright
     10  1.1  oster  * notice and this permission notice appear in all copies of the
     11  1.1  oster  * software, derivative works or modified versions, and any portions
     12  1.1  oster  * thereof, and that both notices appear in supporting documentation.
     13  1.1  oster  *
     14  1.1  oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  1.1  oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  1.1  oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  1.1  oster  *
     18  1.1  oster  * Carnegie Mellon requests users of this software to return to
     19  1.1  oster  *
     20  1.1  oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  1.1  oster  *  School of Computer Science
     22  1.1  oster  *  Carnegie Mellon University
     23  1.1  oster  *  Pittsburgh PA 15213-3890
     24  1.1  oster  *
     25  1.1  oster  * any improvements or extensions that they make and grant Carnegie the
     26  1.1  oster  * rights to redistribute these changes.
     27  1.1  oster  */
     28  1.1  oster 
     29  1.1  oster /******************************************************************************
     30  1.1  oster  *
     31  1.1  oster  * rf_dagutils.c -- utility routines for manipulating dags
     32  1.1  oster  *
     33  1.1  oster  *****************************************************************************/
     34  1.1  oster 
     35  1.1  oster #include "rf_archs.h"
     36  1.1  oster #include "rf_types.h"
     37  1.1  oster #include "rf_threadstuff.h"
     38  1.1  oster #include "rf_raid.h"
     39  1.1  oster #include "rf_dag.h"
     40  1.1  oster #include "rf_dagutils.h"
     41  1.1  oster #include "rf_dagfuncs.h"
     42  1.1  oster #include "rf_general.h"
     43  1.1  oster #include "rf_freelist.h"
     44  1.1  oster #include "rf_map.h"
     45  1.1  oster #include "rf_shutdown.h"
     46  1.1  oster #include "rf_sys.h"
     47  1.1  oster 
     48  1.1  oster #define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_)))
     49  1.1  oster 
     50  1.1  oster RF_RedFuncs_t rf_xorFuncs = {
     51  1.1  oster 	rf_RegularXorFunc, "Reg Xr",
     52  1.1  oster 	rf_SimpleXorFunc, "Simple Xr"};
     53  1.1  oster 
     54  1.1  oster RF_RedFuncs_t rf_xorRecoveryFuncs = {
     55  1.1  oster 	rf_RecoveryXorFunc, "Recovery Xr",
     56  1.1  oster 	rf_RecoveryXorFunc, "Recovery Xr"};
     57  1.1  oster 
     58  1.1  oster static void rf_RecurPrintDAG(RF_DagNode_t *, int, int);
     59  1.1  oster static void rf_PrintDAG(RF_DagHeader_t *);
     60  1.1  oster static int rf_ValidateBranch(RF_DagNode_t *, int *, int *,
     61  1.1  oster 			     RF_DagNode_t **, int );
     62  1.1  oster static void rf_ValidateBranchVisitedBits(RF_DagNode_t *, int, int);
     63  1.1  oster static void rf_ValidateVisitedBits(RF_DagHeader_t *);
     64  1.1  oster 
     65  1.1  oster /******************************************************************************
     66  1.1  oster  *
     67  1.1  oster  * InitNode - initialize a dag node
     68  1.1  oster  *
     69  1.1  oster  * the size of the propList array is always the same as that of the
     70  1.1  oster  * successors array.
     71  1.1  oster  *
     72  1.1  oster  *****************************************************************************/
     73  1.1  oster void rf_InitNode(
     74  1.1  oster   RF_DagNode_t         *node,
     75  1.1  oster   RF_NodeStatus_t       initstatus,
     76  1.1  oster   int                   commit,
     77  1.1  oster   int                 (*doFunc)(RF_DagNode_t *node),
     78  1.1  oster   int                 (*undoFunc)(RF_DagNode_t *node),
     79  1.1  oster   int                 (*wakeFunc)(RF_DagNode_t *node,int status),
     80  1.1  oster   int                   nSucc,
     81  1.1  oster   int                   nAnte,
     82  1.1  oster   int                   nParam,
     83  1.1  oster   int                   nResult,
     84  1.1  oster   RF_DagHeader_t       *hdr,
     85  1.1  oster   char                 *name,
     86  1.1  oster   RF_AllocListElem_t   *alist)
     87  1.1  oster {
     88  1.1  oster   void **ptrs;
     89  1.1  oster   int nptrs;
     90  1.1  oster 
     91  1.1  oster   if (nAnte > RF_MAX_ANTECEDENTS)
     92  1.1  oster     RF_PANIC();
     93  1.1  oster   node->status         = initstatus;
     94  1.1  oster   node->commitNode     = commit;
     95  1.1  oster   node->doFunc         = doFunc;
     96  1.1  oster   node->undoFunc       = undoFunc;
     97  1.1  oster   node->wakeFunc       = wakeFunc;
     98  1.1  oster   node->numParams      = nParam;
     99  1.1  oster   node->numResults     = nResult;
    100  1.1  oster   node->numAntecedents = nAnte;
    101  1.1  oster   node->numAntDone     = 0;
    102  1.1  oster   node->next           = NULL;
    103  1.1  oster   node->numSuccedents  = nSucc;
    104  1.1  oster   node->name           = name;
    105  1.1  oster   node->dagHdr         = hdr;
    106  1.1  oster   node->visited        = 0;
    107  1.1  oster 
    108  1.1  oster   /* allocate all the pointers with one call to malloc */
    109  1.1  oster   nptrs = nSucc+nAnte+nResult+nSucc;
    110  1.1  oster 
    111  1.1  oster   if (nptrs <= RF_DAG_PTRCACHESIZE) {
    112  1.1  oster     /*
    113  1.1  oster      * The dag_ptrs field of the node is basically some scribble
    114  1.1  oster      * space to be used here. We could get rid of it, and always
    115  1.1  oster      * allocate the range of pointers, but that's expensive. So,
    116  1.1  oster      * we pick a "common case" size for the pointer cache. Hopefully,
    117  1.1  oster      * we'll find that:
    118  1.1  oster      * (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by
    119  1.1  oster      *     only a little bit (least efficient case)
    120  1.1  oster      * (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE
    121  1.1  oster      *     (wasted memory)
    122  1.1  oster      */
    123  1.1  oster     ptrs = (void **)node->dag_ptrs;
    124  1.1  oster   }
    125  1.1  oster   else {
    126  1.1  oster     RF_CallocAndAdd(ptrs, nptrs, sizeof(void *), (void **), alist);
    127  1.1  oster   }
    128  1.1  oster   node->succedents  = (nSucc)   ? (RF_DagNode_t         **) ptrs                  : NULL;
    129  1.1  oster   node->antecedents = (nAnte)   ? (RF_DagNode_t         **) (ptrs+nSucc)          : NULL;
    130  1.1  oster   node->results     = (nResult) ? (void            **) (ptrs+nSucc+nAnte)         : NULL;
    131  1.1  oster   node->propList    = (nSucc)   ? (RF_PropHeader_t **) (ptrs+nSucc+nAnte+nResult) : NULL;
    132  1.1  oster 
    133  1.1  oster   if (nParam) {
    134  1.1  oster     if (nParam <= RF_DAG_PARAMCACHESIZE) {
    135  1.1  oster       node->params = (RF_DagParam_t *)node->dag_params;
    136  1.1  oster     }
    137  1.1  oster     else {
    138  1.1  oster       RF_CallocAndAdd(node->params, nParam, sizeof(RF_DagParam_t), (RF_DagParam_t *), alist);
    139  1.1  oster     }
    140  1.1  oster   }
    141  1.1  oster   else {
    142  1.1  oster     node->params = NULL;
    143  1.1  oster   }
    144  1.1  oster }
    145  1.1  oster 
    146  1.1  oster 
    147  1.1  oster 
    148  1.1  oster /******************************************************************************
    149  1.1  oster  *
    150  1.1  oster  * allocation and deallocation routines
    151  1.1  oster  *
    152  1.1  oster  *****************************************************************************/
    153  1.1  oster 
    154  1.1  oster void rf_FreeDAG(dag_h)
    155  1.1  oster   RF_DagHeader_t  *dag_h;
    156  1.1  oster {
    157  1.1  oster   RF_AccessStripeMapHeader_t *asmap, *t_asmap;
    158  1.1  oster   RF_DagHeader_t *nextDag;
    159  1.1  oster   int i;
    160  1.1  oster 
    161  1.1  oster   while (dag_h) {
    162  1.1  oster     nextDag = dag_h->next;
    163  1.1  oster     for (i=0; dag_h->memChunk[i] && i < RF_MAXCHUNKS; i++) {
    164  1.1  oster       /* release mem chunks */
    165  1.1  oster       rf_ReleaseMemChunk(dag_h->memChunk[i]);
    166  1.1  oster       dag_h->memChunk[i] = NULL;
    167  1.1  oster     }
    168  1.1  oster 
    169  1.1  oster     RF_ASSERT(i == dag_h->chunkIndex);
    170  1.1  oster     if (dag_h->xtraChunkCnt > 0) {
    171  1.1  oster       /* free xtraMemChunks */
    172  1.1  oster       for (i=0; dag_h->xtraMemChunk[i] && i < dag_h->xtraChunkIndex; i++) {
    173  1.1  oster         rf_ReleaseMemChunk(dag_h->xtraMemChunk[i]);
    174  1.1  oster         dag_h->xtraMemChunk[i] = NULL;
    175  1.1  oster       }
    176  1.1  oster       RF_ASSERT(i == dag_h->xtraChunkIndex);
    177  1.1  oster       /* free ptrs to xtraMemChunks */
    178  1.1  oster       RF_Free(dag_h->xtraMemChunk, dag_h->xtraChunkCnt * sizeof(RF_ChunkDesc_t *));
    179  1.1  oster     }
    180  1.1  oster     rf_FreeAllocList(dag_h->allocList);
    181  1.1  oster     for (asmap = dag_h->asmList; asmap;) {
    182  1.1  oster       t_asmap = asmap;
    183  1.1  oster       asmap = asmap->next;
    184  1.1  oster       rf_FreeAccessStripeMap(t_asmap);
    185  1.1  oster     }
    186  1.1  oster     rf_FreeDAGHeader(dag_h);
    187  1.1  oster     dag_h = nextDag;
    188  1.1  oster   }
    189  1.1  oster }
    190  1.1  oster 
    191  1.1  oster RF_PropHeader_t *rf_MakePropListEntry(
    192  1.1  oster   RF_DagHeader_t      *dag_h,
    193  1.1  oster   int                  resultNum,
    194  1.1  oster   int                  paramNum,
    195  1.1  oster   RF_PropHeader_t     *next,
    196  1.1  oster   RF_AllocListElem_t  *allocList)
    197  1.1  oster {
    198  1.1  oster   RF_PropHeader_t *p;
    199  1.1  oster 
    200  1.1  oster   RF_CallocAndAdd(p, 1, sizeof(RF_PropHeader_t),
    201  1.1  oster     (RF_PropHeader_t *), allocList);
    202  1.1  oster   p->resultNum = resultNum;
    203  1.1  oster   p->paramNum = paramNum;
    204  1.1  oster   p->next = next;
    205  1.1  oster   return(p);
    206  1.1  oster }
    207  1.1  oster 
    208  1.1  oster static RF_FreeList_t *rf_dagh_freelist;
    209  1.1  oster 
    210  1.1  oster #define RF_MAX_FREE_DAGH 128
    211  1.1  oster #define RF_DAGH_INC       16
    212  1.1  oster #define RF_DAGH_INITIAL   32
    213  1.1  oster 
    214  1.1  oster static void rf_ShutdownDAGs(void *);
    215  1.1  oster static void rf_ShutdownDAGs(ignored)
    216  1.1  oster   void  *ignored;
    217  1.1  oster {
    218  1.1  oster 	RF_FREELIST_DESTROY(rf_dagh_freelist,next,(RF_DagHeader_t *));
    219  1.1  oster }
    220  1.1  oster 
    221  1.1  oster int rf_ConfigureDAGs(listp)
    222  1.1  oster   RF_ShutdownList_t  **listp;
    223  1.1  oster {
    224  1.1  oster 	int rc;
    225  1.1  oster 
    226  1.1  oster 	RF_FREELIST_CREATE(rf_dagh_freelist, RF_MAX_FREE_DAGH,
    227  1.1  oster 		RF_DAGH_INC, sizeof(RF_DagHeader_t));
    228  1.1  oster 	if (rf_dagh_freelist == NULL)
    229  1.1  oster 		return(ENOMEM);
    230  1.1  oster 	rc = rf_ShutdownCreate(listp, rf_ShutdownDAGs, NULL);
    231  1.1  oster 	if (rc) {
    232  1.1  oster 		RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
    233  1.1  oster 			__FILE__, __LINE__, rc);
    234  1.1  oster 		rf_ShutdownDAGs(NULL);
    235  1.1  oster 		return(rc);
    236  1.1  oster 	}
    237  1.1  oster 	RF_FREELIST_PRIME(rf_dagh_freelist, RF_DAGH_INITIAL,next,
    238  1.1  oster 		(RF_DagHeader_t *));
    239  1.1  oster 	return(0);
    240  1.1  oster }
    241  1.1  oster 
    242  1.1  oster RF_DagHeader_t *rf_AllocDAGHeader()
    243  1.1  oster {
    244  1.1  oster 	RF_DagHeader_t *dh;
    245  1.1  oster 
    246  1.1  oster 	RF_FREELIST_GET(rf_dagh_freelist,dh,next,(RF_DagHeader_t *));
    247  1.1  oster 	if (dh) {
    248  1.1  oster 		bzero((char *)dh, sizeof(RF_DagHeader_t));
    249  1.1  oster 	}
    250  1.1  oster 	return(dh);
    251  1.1  oster }
    252  1.1  oster 
    253  1.1  oster void rf_FreeDAGHeader(RF_DagHeader_t *dh)
    254  1.1  oster {
    255  1.1  oster 	RF_FREELIST_FREE(rf_dagh_freelist,dh,next);
    256  1.1  oster }
    257  1.1  oster 
    258  1.1  oster /* allocates a buffer big enough to hold the data described by pda */
    259  1.1  oster void *rf_AllocBuffer(
    260  1.1  oster   RF_Raid_t           *raidPtr,
    261  1.1  oster   RF_DagHeader_t      *dag_h,
    262  1.1  oster   RF_PhysDiskAddr_t   *pda,
    263  1.1  oster   RF_AllocListElem_t  *allocList)
    264  1.1  oster {
    265  1.1  oster   char *p;
    266  1.1  oster 
    267  1.1  oster   RF_MallocAndAdd(p, pda->numSector << raidPtr->logBytesPerSector,
    268  1.1  oster     (char *), allocList);
    269  1.1  oster   return((void *)p);
    270  1.1  oster }
    271  1.1  oster 
    272  1.1  oster /******************************************************************************
    273  1.1  oster  *
    274  1.1  oster  * debug routines
    275  1.1  oster  *
    276  1.1  oster  *****************************************************************************/
    277  1.1  oster 
    278  1.1  oster char *rf_NodeStatusString(RF_DagNode_t *node)
    279  1.1  oster {
    280  1.1  oster   switch (node->status) {
    281  1.1  oster     case rf_wait: return("wait");
    282  1.1  oster     case rf_fired: return("fired");
    283  1.1  oster     case rf_good: return("good");
    284  1.1  oster     case rf_bad: return("bad");
    285  1.1  oster     default: return("?");
    286  1.1  oster   }
    287  1.1  oster }
    288  1.1  oster 
    289  1.1  oster void rf_PrintNodeInfoString(RF_DagNode_t *node)
    290  1.1  oster {
    291  1.1  oster   RF_PhysDiskAddr_t *pda;
    292  1.1  oster   int (*df)(RF_DagNode_t *) = node->doFunc;
    293  1.1  oster   int i, lk, unlk;
    294  1.1  oster   void *bufPtr;
    295  1.1  oster 
    296  1.1  oster   if ((df==rf_DiskReadFunc) || (df==rf_DiskWriteFunc)
    297  1.1  oster       || (df==rf_DiskReadMirrorIdleFunc)
    298  1.1  oster       || (df == rf_DiskReadMirrorPartitionFunc))
    299  1.1  oster   {
    300  1.1  oster     pda = (RF_PhysDiskAddr_t *)node->params[0].p;
    301  1.1  oster     bufPtr = (void *)node->params[1].p;
    302  1.1  oster     lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
    303  1.1  oster     unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
    304  1.1  oster     RF_ASSERT( !(lk && unlk) );
    305  1.1  oster     printf("r %d c %d offs %ld nsect %d buf 0x%lx %s\n", pda->row, pda->col,
    306  1.1  oster        (long)pda->startSector, (int) pda->numSector, (long)bufPtr,
    307  1.1  oster        (lk) ? "LOCK" : ((unlk) ? "UNLK" : " "));
    308  1.1  oster     return;
    309  1.1  oster   }
    310  1.1  oster 
    311  1.1  oster   if (df == rf_DiskUnlockFunc) {
    312  1.1  oster     pda = (RF_PhysDiskAddr_t *)node->params[0].p;
    313  1.1  oster     lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
    314  1.1  oster     unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
    315  1.1  oster     RF_ASSERT( !(lk && unlk) );
    316  1.1  oster     printf("r %d c %d %s\n", pda->row, pda->col,
    317  1.1  oster       (lk) ? "LOCK" : ((unlk) ? "UNLK" : "nop"));
    318  1.1  oster     return;
    319  1.1  oster   }
    320  1.1  oster 
    321  1.1  oster   if ((df==rf_SimpleXorFunc) || (df==rf_RegularXorFunc)
    322  1.1  oster       || (df==rf_RecoveryXorFunc))
    323  1.1  oster   {
    324  1.1  oster     printf("result buf 0x%lx\n",(long) node->results[0]);
    325  1.1  oster     for (i=0; i<node->numParams-1; i+=2) {
    326  1.1  oster       pda = (RF_PhysDiskAddr_t *)node->params[i].p;
    327  1.1  oster       bufPtr = (RF_PhysDiskAddr_t *)node->params[i+1].p;
    328  1.1  oster       printf("    buf 0x%lx r%d c%d offs %ld nsect %d\n",
    329  1.1  oster 	     (long)bufPtr, pda->row, pda->col,
    330  1.1  oster 	     (long)pda->startSector, (int)pda->numSector);
    331  1.1  oster     }
    332  1.1  oster     return;
    333  1.1  oster   }
    334  1.1  oster 
    335  1.1  oster #if RF_INCLUDE_PARITYLOGGING > 0
    336  1.1  oster   if (df==rf_ParityLogOverwriteFunc || df==rf_ParityLogUpdateFunc) {
    337  1.1  oster     for (i=0; i<node->numParams-1; i+=2) {
    338  1.1  oster       pda = (RF_PhysDiskAddr_t *)node->params[i].p;
    339  1.1  oster       bufPtr = (RF_PhysDiskAddr_t *)node->params[i+1].p;
    340  1.1  oster       printf(" r%d c%d offs %ld nsect %d buf 0x%lx\n",
    341  1.1  oster 	     pda->row, pda->col, (long) pda->startSector,
    342  1.1  oster 	     (int) pda->numSector, (long) bufPtr);
    343  1.1  oster     }
    344  1.1  oster     return;
    345  1.1  oster   }
    346  1.1  oster #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
    347  1.1  oster 
    348  1.1  oster   if ((df==rf_TerminateFunc) || (df==rf_NullNodeFunc)) {
    349  1.1  oster     printf("\n");
    350  1.1  oster     return;
    351  1.1  oster   }
    352  1.1  oster 
    353  1.1  oster   printf("?\n");
    354  1.1  oster }
    355  1.1  oster 
    356  1.1  oster static void rf_RecurPrintDAG(node, depth, unvisited)
    357  1.1  oster   RF_DagNode_t  *node;
    358  1.1  oster   int            depth;
    359  1.1  oster   int            unvisited;
    360  1.1  oster {
    361  1.1  oster   char *anttype;
    362  1.1  oster   int i;
    363  1.1  oster 
    364  1.1  oster   node->visited = (unvisited) ? 0 : 1;
    365  1.1  oster   printf("(%d) %d C%d %s: %s,s%d %d/%d,a%d/%d,p%d,r%d S{", depth,
    366  1.1  oster     node->nodeNum, node->commitNode, node->name, rf_NodeStatusString(node),
    367  1.1  oster     node->numSuccedents, node->numSuccFired, node->numSuccDone,
    368  1.1  oster 	node->numAntecedents, node->numAntDone, node->numParams,node->numResults);
    369  1.1  oster   for (i=0; i<node->numSuccedents; i++) {
    370  1.1  oster     printf("%d%s", node->succedents[i]->nodeNum,
    371  1.1  oster       ((i==node->numSuccedents-1) ? "\0" : " "));
    372  1.1  oster   }
    373  1.1  oster   printf("} A{");
    374  1.1  oster   for (i=0; i<node->numAntecedents; i++) {
    375  1.1  oster     switch (node->antType[i]) {
    376  1.1  oster     case rf_trueData :
    377  1.1  oster       anttype = "T";
    378  1.1  oster       break;
    379  1.1  oster     case rf_antiData :
    380  1.1  oster       anttype = "A";
    381  1.1  oster       break;
    382  1.1  oster     case rf_outputData :
    383  1.1  oster       anttype = "O";
    384  1.1  oster       break;
    385  1.1  oster     case rf_control :
    386  1.1  oster       anttype = "C";
    387  1.1  oster       break;
    388  1.1  oster     default :
    389  1.1  oster       anttype = "?";
    390  1.1  oster       break;
    391  1.1  oster     }
    392  1.1  oster     printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype, (i==node->numAntecedents-1) ? "\0" : " ");
    393  1.1  oster   }
    394  1.1  oster   printf("}; ");
    395  1.1  oster   rf_PrintNodeInfoString(node);
    396  1.1  oster   for (i=0; i<node->numSuccedents; i++) {
    397  1.1  oster     if (node->succedents[i]->visited == unvisited)
    398  1.1  oster       rf_RecurPrintDAG(node->succedents[i], depth+1, unvisited);
    399  1.1  oster   }
    400  1.1  oster }
    401  1.1  oster 
    402  1.1  oster static void rf_PrintDAG(dag_h)
    403  1.1  oster   RF_DagHeader_t  *dag_h;
    404  1.1  oster {
    405  1.1  oster   int unvisited, i;
    406  1.1  oster   char *status;
    407  1.1  oster 
    408  1.1  oster   /* set dag status */
    409  1.1  oster   switch (dag_h->status) {
    410  1.1  oster   case rf_enable :
    411  1.1  oster     status = "enable";
    412  1.1  oster     break;
    413  1.1  oster   case rf_rollForward :
    414  1.1  oster     status = "rollForward";
    415  1.1  oster     break;
    416  1.1  oster   case rf_rollBackward :
    417  1.1  oster     status = "rollBackward";
    418  1.1  oster     break;
    419  1.1  oster   default :
    420  1.1  oster     status = "illegal!";
    421  1.1  oster     break;
    422  1.1  oster   }
    423  1.1  oster   /* find out if visited bits are currently set or clear */
    424  1.1  oster   unvisited = dag_h->succedents[0]->visited;
    425  1.1  oster 
    426  1.1  oster   printf("DAG type:  %s\n", dag_h->creator);
    427  1.1  oster   printf("format is (depth) num commit type: status,nSucc nSuccFired/nSuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)};  info\n");
    428  1.1  oster   printf("(0) %d Hdr: %s, s%d, (commit %d/%d) S{", dag_h->nodeNum,
    429  1.1  oster     status, dag_h->numSuccedents, dag_h->numCommitNodes, dag_h->numCommits);
    430  1.1  oster   for (i=0; i<dag_h->numSuccedents; i++) {
    431  1.1  oster     printf("%d%s", dag_h->succedents[i]->nodeNum,
    432  1.1  oster       ((i==dag_h->numSuccedents-1) ? "\0" : " "));
    433  1.1  oster   }
    434  1.1  oster   printf("};\n");
    435  1.1  oster   for (i=0; i<dag_h->numSuccedents; i++) {
    436  1.1  oster     if (dag_h->succedents[i]->visited == unvisited)
    437  1.1  oster       rf_RecurPrintDAG(dag_h->succedents[i], 1, unvisited);
    438  1.1  oster   }
    439  1.1  oster }
    440  1.1  oster 
    441  1.1  oster /* assigns node numbers */
    442  1.1  oster int rf_AssignNodeNums(RF_DagHeader_t *dag_h)
    443  1.1  oster {
    444  1.1  oster   int unvisited, i, nnum;
    445  1.1  oster   RF_DagNode_t *node;
    446  1.1  oster 
    447  1.1  oster   nnum = 0;
    448  1.1  oster   unvisited = dag_h->succedents[0]->visited;
    449  1.1  oster 
    450  1.1  oster   dag_h->nodeNum = nnum++;
    451  1.1  oster   for (i=0; i<dag_h->numSuccedents; i++) {
    452  1.1  oster     node = dag_h->succedents[i];
    453  1.1  oster     if (node->visited == unvisited) {
    454  1.1  oster       nnum = rf_RecurAssignNodeNums(dag_h->succedents[i], nnum, unvisited);
    455  1.1  oster     }
    456  1.1  oster   }
    457  1.1  oster   return(nnum);
    458  1.1  oster }
    459  1.1  oster 
    460  1.1  oster int rf_RecurAssignNodeNums(node, num, unvisited)
    461  1.1  oster   RF_DagNode_t  *node;
    462  1.1  oster   int            num;
    463  1.1  oster   int            unvisited;
    464  1.1  oster {
    465  1.1  oster   int i;
    466  1.1  oster 
    467  1.1  oster   node->visited = (unvisited) ? 0 : 1;
    468  1.1  oster 
    469  1.1  oster   node->nodeNum = num++;
    470  1.1  oster   for (i=0; i<node->numSuccedents; i++) {
    471  1.1  oster     if (node->succedents[i]->visited == unvisited) {
    472  1.1  oster       num = rf_RecurAssignNodeNums(node->succedents[i], num, unvisited);
    473  1.1  oster     }
    474  1.1  oster   }
    475  1.1  oster   return(num);
    476  1.1  oster }
    477  1.1  oster 
    478  1.1  oster /* set the header pointers in each node to "newptr" */
    479  1.1  oster void rf_ResetDAGHeaderPointers(dag_h, newptr)
    480  1.1  oster   RF_DagHeader_t  *dag_h;
    481  1.1  oster   RF_DagHeader_t  *newptr;
    482  1.1  oster {
    483  1.1  oster   int i;
    484  1.1  oster   for (i=0; i<dag_h->numSuccedents; i++)
    485  1.1  oster     if (dag_h->succedents[i]->dagHdr != newptr)
    486  1.1  oster       rf_RecurResetDAGHeaderPointers(dag_h->succedents[i], newptr);
    487  1.1  oster }
    488  1.1  oster 
    489  1.1  oster void rf_RecurResetDAGHeaderPointers(node, newptr)
    490  1.1  oster   RF_DagNode_t    *node;
    491  1.1  oster   RF_DagHeader_t  *newptr;
    492  1.1  oster {
    493  1.1  oster   int i;
    494  1.1  oster   node->dagHdr = newptr;
    495  1.1  oster   for (i=0; i<node->numSuccedents; i++)
    496  1.1  oster     if (node->succedents[i]->dagHdr != newptr)
    497  1.1  oster       rf_RecurResetDAGHeaderPointers(node->succedents[i], newptr);
    498  1.1  oster }
    499  1.1  oster 
    500  1.1  oster 
    501  1.1  oster void rf_PrintDAGList(RF_DagHeader_t *dag_h)
    502  1.1  oster {
    503  1.1  oster   int i=0;
    504  1.1  oster 
    505  1.1  oster   for (; dag_h; dag_h=dag_h->next) {
    506  1.1  oster     rf_AssignNodeNums(dag_h);
    507  1.1  oster     printf("\n\nDAG %d IN LIST:\n",i++);
    508  1.1  oster     rf_PrintDAG(dag_h);
    509  1.1  oster   }
    510  1.1  oster }
    511  1.1  oster 
    512  1.1  oster static int rf_ValidateBranch(node, scount, acount, nodes, unvisited)
    513  1.1  oster   RF_DagNode_t   *node;
    514  1.1  oster   int            *scount;
    515  1.1  oster   int            *acount;
    516  1.1  oster   RF_DagNode_t  **nodes;
    517  1.1  oster   int             unvisited;
    518  1.1  oster {
    519  1.1  oster   int i, retcode = 0;
    520  1.1  oster 
    521  1.1  oster   /* construct an array of node pointers indexed by node num */
    522  1.1  oster   node->visited = (unvisited) ? 0 : 1;
    523  1.1  oster   nodes[ node->nodeNum ] = node;
    524  1.1  oster 
    525  1.1  oster   if (node->next != NULL) {
    526  1.1  oster     printf("INVALID DAG: next pointer in node is not NULL\n");
    527  1.1  oster     retcode = 1;
    528  1.1  oster   }
    529  1.1  oster   if (node->status != rf_wait) {
    530  1.1  oster     printf("INVALID DAG: Node status is not wait\n");
    531  1.1  oster     retcode = 1;
    532  1.1  oster   }
    533  1.1  oster   if (node->numAntDone != 0) {
    534  1.1  oster     printf("INVALID DAG: numAntDone is not zero\n");
    535  1.1  oster     retcode = 1;
    536  1.1  oster   }
    537  1.1  oster   if (node->doFunc == rf_TerminateFunc) {
    538  1.1  oster     if (node->numSuccedents != 0) {
    539  1.1  oster       printf("INVALID DAG: Terminator node has succedents\n");
    540  1.1  oster       retcode = 1;
    541  1.1  oster     }
    542  1.1  oster   } else {
    543  1.1  oster     if (node->numSuccedents == 0) {
    544  1.1  oster       printf("INVALID DAG: Non-terminator node has no succedents\n");
    545  1.1  oster       retcode = 1;
    546  1.1  oster     }
    547  1.1  oster   }
    548  1.1  oster   for (i=0; i<node->numSuccedents; i++)  {
    549  1.1  oster     if (!node->succedents[i]) {
    550  1.1  oster       printf("INVALID DAG: succedent %d of node %s is NULL\n",i,node->name);
    551  1.1  oster       retcode = 1;
    552  1.1  oster     }
    553  1.1  oster     scount[ node->succedents[i]->nodeNum ]++;
    554  1.1  oster   }
    555  1.1  oster   for (i=0; i<node->numAntecedents; i++) {
    556  1.1  oster     if (!node->antecedents[i]) {
    557  1.1  oster       printf("INVALID DAG: antecedent %d of node %s is NULL\n",i,node->name);
    558  1.1  oster       retcode = 1;
    559  1.1  oster     }
    560  1.1  oster     acount[ node->antecedents[i]->nodeNum ]++;
    561  1.1  oster   }
    562  1.1  oster   for (i=0; i<node->numSuccedents; i++) {
    563  1.1  oster     if (node->succedents[i]->visited == unvisited) {
    564  1.1  oster       if (rf_ValidateBranch(node->succedents[i], scount,
    565  1.1  oster           acount, nodes, unvisited))
    566  1.1  oster       {
    567  1.1  oster         retcode = 1;
    568  1.1  oster       }
    569  1.1  oster     }
    570  1.1  oster   }
    571  1.1  oster   return(retcode);
    572  1.1  oster }
    573  1.1  oster 
    574  1.1  oster static void rf_ValidateBranchVisitedBits(node, unvisited, rl)
    575  1.1  oster   RF_DagNode_t  *node;
    576  1.1  oster   int            unvisited;
    577  1.1  oster   int            rl;
    578  1.1  oster {
    579  1.1  oster   int i;
    580  1.1  oster 
    581  1.1  oster   RF_ASSERT(node->visited == unvisited);
    582  1.1  oster   for (i=0; i<node->numSuccedents; i++) {
    583  1.1  oster     if (node->succedents[i] == NULL) {
    584  1.1  oster       printf("node=%lx node->succedents[%d] is NULL\n", (long)node, i);
    585  1.1  oster       RF_ASSERT(0);
    586  1.1  oster     }
    587  1.1  oster     rf_ValidateBranchVisitedBits(node->succedents[i],unvisited, rl+1);
    588  1.1  oster   }
    589  1.1  oster }
    590  1.1  oster 
    591  1.1  oster /* NOTE:  never call this on a big dag, because it is exponential
    592  1.1  oster  * in execution time
    593  1.1  oster  */
    594  1.1  oster static void rf_ValidateVisitedBits(dag)
    595  1.1  oster   RF_DagHeader_t  *dag;
    596  1.1  oster {
    597  1.1  oster   int i, unvisited;
    598  1.1  oster 
    599  1.1  oster   unvisited = dag->succedents[0]->visited;
    600  1.1  oster 
    601  1.1  oster   for (i=0; i<dag->numSuccedents; i++) {
    602  1.1  oster     if (dag->succedents[i] == NULL) {
    603  1.1  oster       printf("dag=%lx dag->succedents[%d] is NULL\n", (long) dag, i);
    604  1.1  oster       RF_ASSERT(0);
    605  1.1  oster     }
    606  1.1  oster     rf_ValidateBranchVisitedBits(dag->succedents[i],unvisited,0);
    607  1.1  oster   }
    608  1.1  oster }
    609  1.1  oster 
    610  1.1  oster /* validate a DAG.  _at entry_ verify that:
    611  1.1  oster  *   -- numNodesCompleted is zero
    612  1.1  oster  *   -- node queue is null
    613  1.1  oster  *   -- dag status is rf_enable
    614  1.1  oster  *   -- next pointer is null on every node
    615  1.1  oster  *   -- all nodes have status wait
    616  1.1  oster  *   -- numAntDone is zero in all nodes
    617  1.1  oster  *   -- terminator node has zero successors
    618  1.1  oster  *   -- no other node besides terminator has zero successors
    619  1.1  oster  *   -- no successor or antecedent pointer in a node is NULL
    620  1.1  oster  *   -- number of times that each node appears as a successor of another node
    621  1.1  oster  *      is equal to the antecedent count on that node
    622  1.1  oster  *   -- number of times that each node appears as an antecedent of another node
    623  1.1  oster  *      is equal to the succedent count on that node
    624  1.1  oster  *   -- what else?
    625  1.1  oster  */
    626  1.1  oster int rf_ValidateDAG(dag_h)
    627  1.1  oster   RF_DagHeader_t  *dag_h;
    628  1.1  oster {
    629  1.1  oster   int i, nodecount;
    630  1.1  oster   int *scount, *acount;        /* per-node successor and antecedent counts */
    631  1.1  oster   RF_DagNode_t **nodes;        /* array of ptrs to nodes in dag */
    632  1.1  oster   int retcode = 0;
    633  1.1  oster   int unvisited;
    634  1.1  oster   int commitNodeCount = 0;
    635  1.1  oster 
    636  1.1  oster   if (rf_validateVisitedDebug)
    637  1.1  oster     rf_ValidateVisitedBits(dag_h);
    638  1.1  oster 
    639  1.1  oster   if (dag_h->numNodesCompleted != 0) {
    640  1.1  oster     printf("INVALID DAG: num nodes completed is %d, should be 0\n",dag_h->numNodesCompleted);
    641  1.1  oster     retcode = 1; goto validate_dag_bad;
    642  1.1  oster   }
    643  1.1  oster   if (dag_h->status != rf_enable) {
    644  1.1  oster     printf("INVALID DAG: not enabled\n");
    645  1.1  oster     retcode = 1; goto validate_dag_bad;
    646  1.1  oster   }
    647  1.1  oster   if (dag_h->numCommits != 0) {
    648  1.1  oster     printf("INVALID DAG: numCommits != 0 (%d)\n",dag_h->numCommits);
    649  1.1  oster     retcode = 1; goto validate_dag_bad;
    650  1.1  oster   }
    651  1.1  oster   if (dag_h->numSuccedents != 1) {
    652  1.1  oster     /* currently, all dags must have only one succedent */
    653  1.1  oster     printf("INVALID DAG: numSuccedents !1 (%d)\n",dag_h->numSuccedents);
    654  1.1  oster     retcode = 1; goto validate_dag_bad;
    655  1.1  oster   }
    656  1.1  oster   nodecount = rf_AssignNodeNums(dag_h);
    657  1.1  oster 
    658  1.1  oster   unvisited = dag_h->succedents[0]->visited;
    659  1.1  oster 
    660  1.1  oster   RF_Calloc(scount, nodecount, sizeof(int), (int *));
    661  1.1  oster   RF_Calloc(acount, nodecount, sizeof(int), (int *));
    662  1.1  oster   RF_Calloc(nodes,  nodecount, sizeof(RF_DagNode_t *), (RF_DagNode_t **));
    663  1.1  oster   for (i=0; i<dag_h->numSuccedents; i++) {
    664  1.1  oster     if ((dag_h->succedents[i]->visited == unvisited)
    665  1.1  oster         && rf_ValidateBranch(dag_h->succedents[i], scount,
    666  1.1  oster         acount, nodes, unvisited))
    667  1.1  oster     {
    668  1.1  oster       retcode = 1;
    669  1.1  oster     }
    670  1.1  oster   }
    671  1.1  oster   /* start at 1 to skip the header node */
    672  1.1  oster   for (i=1; i<nodecount; i++) {
    673  1.1  oster     if ( nodes[i]->commitNode )
    674  1.1  oster       commitNodeCount++;
    675  1.1  oster     if ( nodes[i]->doFunc == NULL ) {
    676  1.1  oster       printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name);
    677  1.1  oster       retcode = 1;
    678  1.1  oster       goto validate_dag_out;
    679  1.1  oster     }
    680  1.1  oster     if ( nodes[i]->undoFunc == NULL ) {
    681  1.1  oster       printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name);
    682  1.1  oster       retcode = 1;
    683  1.1  oster       goto validate_dag_out;
    684  1.1  oster     }
    685  1.1  oster     if ( nodes[i]->numAntecedents != scount[ nodes[i]->nodeNum ] ) {
    686  1.1  oster       printf("INVALID DAG: node %s has %d antecedents but appears as a succedent %d times\n",
    687  1.1  oster 	     nodes[i]->name, nodes[i]->numAntecedents, scount[nodes[i]->nodeNum]);
    688  1.1  oster       retcode = 1;
    689  1.1  oster       goto validate_dag_out;
    690  1.1  oster     }
    691  1.1  oster     if ( nodes[i]->numSuccedents != acount[ nodes[i]->nodeNum ] ) {
    692  1.1  oster       printf("INVALID DAG: node %s has %d succedents but appears as an antecedent %d times\n",
    693  1.1  oster 	     nodes[i]->name, nodes[i]->numSuccedents, acount[nodes[i]->nodeNum]);
    694  1.1  oster       retcode = 1;
    695  1.1  oster       goto validate_dag_out;
    696  1.1  oster     }
    697  1.1  oster   }
    698  1.1  oster 
    699  1.1  oster   if ( dag_h->numCommitNodes != commitNodeCount ) {
    700  1.1  oster     printf("INVALID DAG: incorrect commit node count.  hdr->numCommitNodes (%d) found (%d) commit nodes in graph\n",
    701  1.1  oster       dag_h->numCommitNodes, commitNodeCount);
    702  1.1  oster     retcode = 1;
    703  1.1  oster     goto validate_dag_out;
    704  1.1  oster   }
    705  1.1  oster 
    706  1.1  oster validate_dag_out:
    707  1.1  oster   RF_Free(scount, nodecount*sizeof(int));
    708  1.1  oster   RF_Free(acount, nodecount*sizeof(int));
    709  1.1  oster   RF_Free(nodes,  nodecount*sizeof(RF_DagNode_t *));
    710  1.1  oster   if (retcode)
    711  1.1  oster     rf_PrintDAGList(dag_h);
    712  1.1  oster 
    713  1.1  oster   if (rf_validateVisitedDebug)
    714  1.1  oster     rf_ValidateVisitedBits(dag_h);
    715  1.1  oster 
    716  1.1  oster   return(retcode);
    717  1.1  oster 
    718  1.1  oster validate_dag_bad:
    719  1.1  oster   rf_PrintDAGList(dag_h);
    720  1.1  oster   return(retcode);
    721  1.1  oster }
    722  1.1  oster 
    723  1.1  oster 
    724  1.1  oster /******************************************************************************
    725  1.1  oster  *
    726  1.1  oster  * misc construction routines
    727  1.1  oster  *
    728  1.1  oster  *****************************************************************************/
    729  1.1  oster 
    730  1.1  oster void rf_redirect_asm(
    731  1.1  oster   RF_Raid_t             *raidPtr,
    732  1.1  oster   RF_AccessStripeMap_t  *asmap)
    733  1.1  oster {
    734  1.1  oster   int ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0;
    735  1.1  oster   int row = asmap->physInfo->row;
    736  1.1  oster   int fcol = raidPtr->reconControl[row]->fcol;
    737  1.1  oster   int srow = raidPtr->reconControl[row]->spareRow;
    738  1.1  oster   int scol = raidPtr->reconControl[row]->spareCol;
    739  1.1  oster   RF_PhysDiskAddr_t *pda;
    740  1.1  oster 
    741  1.1  oster   RF_ASSERT( raidPtr->status[row] == rf_rs_reconstructing );
    742  1.1  oster   for (pda = asmap->physInfo; pda; pda=pda->next) {
    743  1.1  oster     if (pda->col == fcol) {
    744  1.1  oster       if (rf_dagDebug) {
    745  1.1  oster         if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap,
    746  1.1  oster           pda->startSector))
    747  1.1  oster         {
    748  1.1  oster           RF_PANIC();
    749  1.1  oster         }
    750  1.1  oster       }
    751  1.1  oster       /*printf("Remapped data for large write\n");*/
    752  1.1  oster       if (ds) {
    753  1.1  oster         raidPtr->Layout.map->MapSector(raidPtr, pda->raidAddress,
    754  1.1  oster           &pda->row, &pda->col, &pda->startSector, RF_REMAP);
    755  1.1  oster       }
    756  1.1  oster       else {
    757  1.1  oster         pda->row = srow; pda->col = scol;
    758  1.1  oster       }
    759  1.1  oster     }
    760  1.1  oster   }
    761  1.1  oster   for (pda = asmap->parityInfo; pda; pda=pda->next) {
    762  1.1  oster     if (pda->col == fcol) {
    763  1.1  oster       if (rf_dagDebug) {
    764  1.1  oster         if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, pda->startSector)) {
    765  1.1  oster           RF_PANIC();
    766  1.1  oster         }
    767  1.1  oster       }
    768  1.1  oster     }
    769  1.1  oster     if (ds) {
    770  1.1  oster       (raidPtr->Layout.map->MapParity)(raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP);
    771  1.1  oster     }
    772  1.1  oster     else {
    773  1.1  oster       pda->row = srow; pda->col = scol;
    774  1.1  oster     }
    775  1.1  oster   }
    776  1.1  oster }
    777  1.1  oster 
    778  1.1  oster 
    779  1.1  oster /* this routine allocates read buffers and generates stripe maps for the
    780  1.1  oster  * regions of the array from the start of the stripe to the start of the
    781  1.1  oster  * access, and from the end of the access to the end of the stripe.  It also
    782  1.1  oster  * computes and returns the number of DAG nodes needed to read all this data.
    783  1.1  oster  * Note that this routine does the wrong thing if the access is fully
    784  1.1  oster  * contained within one stripe unit, so we RF_ASSERT against this case at the
    785  1.1  oster  * start.
    786  1.1  oster  */
    787  1.1  oster void rf_MapUnaccessedPortionOfStripe(
    788  1.1  oster   RF_Raid_t                    *raidPtr,
    789  1.1  oster   RF_RaidLayout_t              *layoutPtr, /* in: layout information */
    790  1.1  oster   RF_AccessStripeMap_t         *asmap,     /* in: access stripe map */
    791  1.1  oster   RF_DagHeader_t               *dag_h,     /* in: header of the dag to create */
    792  1.1  oster   RF_AccessStripeMapHeader_t  **new_asm_h, /* in: ptr to array of 2 headers, to be filled in */
    793  1.1  oster   int                          *nRodNodes, /* out: num nodes to be generated to read unaccessed data */
    794  1.1  oster   char                        **sosBuffer, /* out: pointers to newly allocated buffer */
    795  1.1  oster   char                        **eosBuffer,
    796  1.1  oster   RF_AllocListElem_t           *allocList)
    797  1.1  oster {
    798  1.1  oster   RF_RaidAddr_t sosRaidAddress, eosRaidAddress;
    799  1.1  oster   RF_SectorNum_t sosNumSector, eosNumSector;
    800  1.1  oster 
    801  1.1  oster   RF_ASSERT( asmap->numStripeUnitsAccessed > (layoutPtr->numDataCol/2) );
    802  1.1  oster   /* generate an access map for the region of the array from start of stripe
    803  1.1  oster    * to start of access */
    804  1.1  oster   new_asm_h[0] = new_asm_h[1] = NULL; *nRodNodes = 0;
    805  1.1  oster   if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->raidAddress)) {
    806  1.1  oster     sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    807  1.1  oster     sosNumSector   = asmap->raidAddress - sosRaidAddress;
    808  1.1  oster     RF_MallocAndAdd(*sosBuffer, rf_RaidAddressToByte(raidPtr, sosNumSector), (char *), allocList);
    809  1.1  oster     new_asm_h[0]   = rf_MapAccess(raidPtr, sosRaidAddress, sosNumSector, *sosBuffer, RF_DONT_REMAP);
    810  1.1  oster     new_asm_h[0]->next = dag_h->asmList;
    811  1.1  oster     dag_h->asmList = new_asm_h[0];
    812  1.1  oster     *nRodNodes    += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
    813  1.1  oster 
    814  1.1  oster     RF_ASSERT(new_asm_h[0]->stripeMap->next == NULL);
    815  1.1  oster     /* we're totally within one stripe here */
    816  1.1  oster     if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
    817  1.1  oster       rf_redirect_asm(raidPtr, new_asm_h[0]->stripeMap);
    818  1.1  oster   }
    819  1.1  oster   /* generate an access map for the region of the array from end of access
    820  1.1  oster    * to end of stripe */
    821  1.1  oster   if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->endRaidAddress)) {
    822  1.1  oster     eosRaidAddress = asmap->endRaidAddress;
    823  1.1  oster     eosNumSector   = rf_RaidAddressOfNextStripeBoundary(layoutPtr, eosRaidAddress) - eosRaidAddress;
    824  1.1  oster     RF_MallocAndAdd(*eosBuffer, rf_RaidAddressToByte(raidPtr, eosNumSector), (char *), allocList);
    825  1.1  oster     new_asm_h[1]   = rf_MapAccess(raidPtr, eosRaidAddress, eosNumSector, *eosBuffer, RF_DONT_REMAP);
    826  1.1  oster     new_asm_h[1]->next = dag_h->asmList;
    827  1.1  oster     dag_h->asmList = new_asm_h[1];
    828  1.1  oster     *nRodNodes    += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
    829  1.1  oster 
    830  1.1  oster     RF_ASSERT(new_asm_h[1]->stripeMap->next == NULL);
    831  1.1  oster     /* we're totally within one stripe here */
    832  1.1  oster     if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
    833  1.1  oster       rf_redirect_asm(raidPtr, new_asm_h[1]->stripeMap);
    834  1.1  oster   }
    835  1.1  oster }
    836  1.1  oster 
    837  1.1  oster 
    838  1.1  oster 
    839  1.1  oster /* returns non-zero if the indicated ranges of stripe unit offsets overlap */
    840  1.1  oster int rf_PDAOverlap(
    841  1.1  oster   RF_RaidLayout_t    *layoutPtr,
    842  1.1  oster   RF_PhysDiskAddr_t  *src,
    843  1.1  oster   RF_PhysDiskAddr_t  *dest)
    844  1.1  oster {
    845  1.1  oster   RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector);
    846  1.1  oster   RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector);
    847  1.1  oster   /* use -1 to be sure we stay within SU */
    848  1.1  oster   RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector-1);
    849  1.1  oster   RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector-1);
    850  1.1  oster   return( (RF_MAX(soffs,doffs) <= RF_MIN(send,dend)) ? 1 : 0 );
    851  1.1  oster }
    852  1.1  oster 
    853  1.1  oster 
    854  1.1  oster /* GenerateFailedAccessASMs
    855  1.1  oster  *
    856  1.1  oster  * this routine figures out what portion of the stripe needs to be read
    857  1.1  oster  * to effect the degraded read or write operation.  It's primary function
    858  1.1  oster  * is to identify everything required to recover the data, and then
    859  1.1  oster  * eliminate anything that is already being accessed by the user.
    860  1.1  oster  *
    861  1.1  oster  * The main result is two new ASMs, one for the region from the start of the
    862  1.1  oster  * stripe to the start of the access, and one for the region from the end of
    863  1.1  oster  * the access to the end of the stripe.  These ASMs describe everything that
    864  1.1  oster  * needs to be read to effect the degraded access.  Other results are:
    865  1.1  oster  *    nXorBufs -- the total number of buffers that need to be XORed together to
    866  1.1  oster  *                recover the lost data,
    867  1.1  oster  *    rpBufPtr -- ptr to a newly-allocated buffer to hold the parity.  If NULL
    868  1.1  oster  *                at entry, not allocated.
    869  1.1  oster  *    overlappingPDAs --
    870  1.1  oster  *                describes which of the non-failed PDAs in the user access
    871  1.1  oster  *                overlap data that needs to be read to effect recovery.
    872  1.1  oster  *                overlappingPDAs[i]==1 if and only if, neglecting the failed
    873  1.1  oster  *                PDA, the ith pda in the input asm overlaps data that needs
    874  1.1  oster  *                to be read for recovery.
    875  1.1  oster  */
    876  1.1  oster  /* in: asm - ASM for the actual access, one stripe only */
    877  1.1  oster  /* in: faildPDA - which component of the access has failed */
    878  1.1  oster  /* in: dag_h - header of the DAG we're going to create */
    879  1.1  oster  /* out: new_asm_h - the two new ASMs */
    880  1.1  oster  /* out: nXorBufs - the total number of xor bufs required */
    881  1.1  oster  /* out: rpBufPtr - a buffer for the parity read */
    882  1.1  oster void rf_GenerateFailedAccessASMs(
    883  1.1  oster   RF_Raid_t                    *raidPtr,
    884  1.1  oster   RF_AccessStripeMap_t         *asmap,
    885  1.1  oster   RF_PhysDiskAddr_t            *failedPDA,
    886  1.1  oster   RF_DagHeader_t               *dag_h,
    887  1.1  oster   RF_AccessStripeMapHeader_t  **new_asm_h,
    888  1.1  oster   int                          *nXorBufs,
    889  1.1  oster   char                        **rpBufPtr,
    890  1.1  oster   char                         *overlappingPDAs,
    891  1.1  oster   RF_AllocListElem_t           *allocList)
    892  1.1  oster {
    893  1.1  oster   RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
    894  1.1  oster 
    895  1.1  oster   /* s=start, e=end, s=stripe, a=access, f=failed, su=stripe unit */
    896  1.1  oster   RF_RaidAddr_t sosAddr, sosEndAddr, eosStartAddr, eosAddr;
    897  1.1  oster 
    898  1.1  oster   RF_SectorCount_t numSect[2], numParitySect;
    899  1.1  oster   RF_PhysDiskAddr_t *pda;
    900  1.1  oster   char *rdBuf, *bufP;
    901  1.1  oster   int foundit, i;
    902  1.1  oster 
    903  1.1  oster   bufP = NULL;
    904  1.1  oster   foundit = 0;
    905  1.1  oster   /* first compute the following raid addresses:
    906  1.1  oster        start of stripe,                            (sosAddr)
    907  1.1  oster        MIN(start of access, start of failed SU),   (sosEndAddr)
    908  1.1  oster        MAX(end of access, end of failed SU),       (eosStartAddr)
    909  1.1  oster        end of stripe (i.e. start of next stripe)   (eosAddr)
    910  1.1  oster   */
    911  1.1  oster   sosAddr      = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
    912  1.1  oster   sosEndAddr   = RF_MIN(asmap->raidAddress, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,failedPDA->raidAddress));
    913  1.1  oster   eosStartAddr = RF_MAX(asmap->endRaidAddress, rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, failedPDA->raidAddress));
    914  1.1  oster   eosAddr      = rf_RaidAddressOfNextStripeBoundary(layoutPtr, asmap->raidAddress);
    915  1.1  oster 
    916  1.1  oster   /* now generate access stripe maps for each of the above regions of the
    917  1.1  oster    * stripe.  Use a dummy (NULL) buf ptr for now */
    918  1.1  oster 
    919  1.1  oster   new_asm_h[0] = (sosAddr != sosEndAddr)   ? rf_MapAccess(raidPtr, sosAddr,      sosEndAddr-sosAddr,   NULL, RF_DONT_REMAP) : NULL;
    920  1.1  oster   new_asm_h[1] = (eosStartAddr != eosAddr) ? rf_MapAccess(raidPtr, eosStartAddr, eosAddr-eosStartAddr, NULL, RF_DONT_REMAP) : NULL;
    921  1.1  oster 
    922  1.1  oster   /* walk through the PDAs and range-restrict each SU to the region of the
    923  1.1  oster    * SU touched on the failed PDA.  also compute total data buffer space
    924  1.1  oster    * requirements in this step.  Ignore the parity for now. */
    925  1.1  oster 
    926  1.1  oster   numSect[0] = numSect[1] = 0;
    927  1.1  oster   if (new_asm_h[0]) {
    928  1.1  oster     new_asm_h[0]->next = dag_h->asmList; dag_h->asmList = new_asm_h[0];
    929  1.1  oster     for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) {
    930  1.1  oster       rf_RangeRestrictPDA(raidPtr,failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); numSect[0] += pda->numSector;
    931  1.1  oster     }
    932  1.1  oster   }
    933  1.1  oster   if (new_asm_h[1]) {
    934  1.1  oster     new_asm_h[1]->next = dag_h->asmList; dag_h->asmList = new_asm_h[1];
    935  1.1  oster     for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) {
    936  1.1  oster       rf_RangeRestrictPDA(raidPtr,failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); numSect[1] += pda->numSector;
    937  1.1  oster     }
    938  1.1  oster   }
    939  1.1  oster   numParitySect = failedPDA->numSector;
    940  1.1  oster 
    941  1.1  oster   /* allocate buffer space for the data & parity we have to read to recover
    942  1.1  oster    * from the failure */
    943  1.1  oster 
    944  1.1  oster   if (numSect[0]+numSect[1]+ ((rpBufPtr) ? numParitySect : 0)) {      /* don't allocate parity buf if not needed */
    945  1.1  oster     RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr,numSect[0]+numSect[1]+numParitySect), (char *), allocList);
    946  1.1  oster     bufP = rdBuf;
    947  1.1  oster     if (rf_degDagDebug) printf("Newly allocated buffer (%d bytes) is 0x%lx\n",
    948  1.1  oster 			    (int)rf_RaidAddressToByte(raidPtr,numSect[0]+numSect[1]+numParitySect), (unsigned long) bufP);
    949  1.1  oster   }
    950  1.1  oster 
    951  1.1  oster   /* now walk through the pdas one last time and assign buffer pointers
    952  1.1  oster    * (ugh!).  Again, ignore the parity.  also, count nodes to find out how
    953  1.1  oster    * many bufs need to be xored together */
    954  1.1  oster   (*nXorBufs) = 1;   /* in read case, 1 is for parity.  In write case, 1 is for failed data */
    955  1.1  oster   if (new_asm_h[0]) {
    956  1.1  oster     for (pda=new_asm_h[0]->stripeMap->physInfo; pda; pda=pda->next) {pda->bufPtr = bufP; bufP += rf_RaidAddressToByte(raidPtr,pda->numSector);}
    957  1.1  oster     *nXorBufs += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
    958  1.1  oster   }
    959  1.1  oster   if (new_asm_h[1]) {
    960  1.1  oster     for (pda=new_asm_h[1]->stripeMap->physInfo; pda; pda=pda->next) {pda->bufPtr = bufP; bufP += rf_RaidAddressToByte(raidPtr,pda->numSector);}
    961  1.1  oster     (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
    962  1.1  oster   }
    963  1.1  oster   if (rpBufPtr) *rpBufPtr = bufP;     /* the rest of the buffer is for parity */
    964  1.1  oster 
    965  1.1  oster   /* the last step is to figure out how many more distinct buffers need to
    966  1.1  oster    * get xor'd to produce the missing unit.  there's one for each user-data
    967  1.1  oster    * read node that overlaps the portion of the failed unit being accessed */
    968  1.1  oster 
    969  1.1  oster   for (foundit=i=0,pda=asmap->physInfo; pda; i++,pda=pda->next) {
    970  1.1  oster     if (pda == failedPDA) {i--; foundit=1; continue;}
    971  1.1  oster     if (rf_PDAOverlap(layoutPtr, pda, failedPDA)) {
    972  1.1  oster       overlappingPDAs[i] = 1;
    973  1.1  oster       (*nXorBufs)++;
    974  1.1  oster     }
    975  1.1  oster   }
    976  1.1  oster   if (!foundit) {RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA in asm list\n"); RF_ASSERT(0);}
    977  1.1  oster 
    978  1.1  oster   if (rf_degDagDebug) {
    979  1.1  oster     if (new_asm_h[0]) {
    980  1.1  oster       printf("First asm:\n"); rf_PrintFullAccessStripeMap(new_asm_h[0], 1);
    981  1.1  oster     }
    982  1.1  oster     if (new_asm_h[1]) {
    983  1.1  oster       printf("Second asm:\n"); rf_PrintFullAccessStripeMap(new_asm_h[1], 1);
    984  1.1  oster     }
    985  1.1  oster   }
    986  1.1  oster }
    987  1.1  oster 
    988  1.1  oster 
    989  1.1  oster /* adjusts the offset and number of sectors in the destination pda so that
    990  1.1  oster  * it covers at most the region of the SU covered by the source PDA.  This
    991  1.1  oster  * is exclusively a restriction:  the number of sectors indicated by the
    992  1.1  oster  * target PDA can only shrink.
    993  1.1  oster  *
    994  1.1  oster  * For example:  s = sectors within SU indicated by source PDA
    995  1.1  oster  *               d = sectors within SU indicated by dest PDA
    996  1.1  oster  *               r = results, stored in dest PDA
    997  1.1  oster  *
    998  1.1  oster  * |--------------- one stripe unit ---------------------|
    999  1.1  oster  * |           sssssssssssssssssssssssssssssssss         |
   1000  1.1  oster  * |    ddddddddddddddddddddddddddddddddddddddddddddd    |
   1001  1.1  oster  * |           rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr         |
   1002  1.1  oster  *
   1003  1.1  oster  * Another example:
   1004  1.1  oster  *
   1005  1.1  oster  * |--------------- one stripe unit ---------------------|
   1006  1.1  oster  * |           sssssssssssssssssssssssssssssssss         |
   1007  1.1  oster  * |    ddddddddddddddddddddddd                          |
   1008  1.1  oster  * |           rrrrrrrrrrrrrrrr                          |
   1009  1.1  oster  *
   1010  1.1  oster  */
   1011  1.1  oster void rf_RangeRestrictPDA(
   1012  1.1  oster   RF_Raid_t          *raidPtr,
   1013  1.1  oster   RF_PhysDiskAddr_t  *src,
   1014  1.1  oster   RF_PhysDiskAddr_t  *dest,
   1015  1.1  oster   int                 dobuffer,
   1016  1.1  oster   int                 doraidaddr)
   1017  1.1  oster {
   1018  1.1  oster   RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
   1019  1.1  oster   RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector);
   1020  1.1  oster   RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector);
   1021  1.1  oster   RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector-1);     /* use -1 to be sure we stay within SU */
   1022  1.1  oster   RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector-1);
   1023  1.1  oster   RF_SectorNum_t subAddr = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->startSector); /* stripe unit boundary */
   1024  1.1  oster 
   1025  1.1  oster   dest->startSector = subAddr + RF_MAX(soffs,doffs);
   1026  1.1  oster   dest->numSector = subAddr + RF_MIN(send,dend) + 1 - dest->startSector;
   1027  1.1  oster 
   1028  1.1  oster   if (dobuffer)
   1029  1.1  oster     dest->bufPtr += (soffs > doffs) ? rf_RaidAddressToByte(raidPtr,soffs-doffs) : 0;
   1030  1.1  oster   if (doraidaddr) {
   1031  1.1  oster     dest->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->raidAddress) +
   1032  1.1  oster                         rf_StripeUnitOffset(layoutPtr, dest->startSector);
   1033  1.1  oster   }
   1034  1.1  oster }
   1035  1.1  oster 
   1036  1.1  oster /*
   1037  1.1  oster  * Want the highest of these primes to be the largest one
   1038  1.1  oster  * less than the max expected number of columns (won't hurt
   1039  1.1  oster  * to be too small or too large, but won't be optimal, either)
   1040  1.1  oster  * --jimz
   1041  1.1  oster  */
   1042  1.1  oster #define NLOWPRIMES 8
   1043  1.1  oster static int lowprimes[NLOWPRIMES] = {2,3,5,7,11,13,17,19};
   1044  1.1  oster 
   1045  1.1  oster /*****************************************************************************
   1046  1.1  oster  * compute the workload shift factor.  (chained declustering)
   1047  1.1  oster  *
   1048  1.1  oster  * return nonzero if access should shift to secondary, otherwise,
   1049  1.1  oster  * access is to primary
   1050  1.1  oster  *****************************************************************************/
   1051  1.1  oster int rf_compute_workload_shift(
   1052  1.1  oster   RF_Raid_t          *raidPtr,
   1053  1.1  oster   RF_PhysDiskAddr_t  *pda)
   1054  1.1  oster {
   1055  1.1  oster   /*
   1056  1.1  oster    * variables:
   1057  1.1  oster    *  d   = column of disk containing primary
   1058  1.1  oster    *  f   = column of failed disk
   1059  1.1  oster    *  n   = number of disks in array
   1060  1.1  oster    *  sd  = "shift distance" (number of columns that d is to the right of f)
   1061  1.1  oster    *  row = row of array the access is in
   1062  1.1  oster    *  v   = numerator of redirection ratio
   1063  1.1  oster    *  k   = denominator of redirection ratio
   1064  1.1  oster    */
   1065  1.1  oster   RF_RowCol_t d, f, sd, row, n;
   1066  1.1  oster   int k, v, ret, i;
   1067  1.1  oster 
   1068  1.1  oster   row = pda->row;
   1069  1.1  oster   n = raidPtr->numCol;
   1070  1.1  oster 
   1071  1.1  oster   /* assign column of primary copy to d */
   1072  1.1  oster   d = pda->col;
   1073  1.1  oster 
   1074  1.1  oster   /* assign column of dead disk to f */
   1075  1.1  oster   for(f=0;((!RF_DEAD_DISK(raidPtr->Disks[row][f].status))&&(f<n));f++);
   1076  1.1  oster 
   1077  1.1  oster   RF_ASSERT(f < n);
   1078  1.1  oster   RF_ASSERT(f != d);
   1079  1.1  oster 
   1080  1.1  oster   sd = (f > d) ? (n + d - f) : (d - f);
   1081  1.1  oster   RF_ASSERT(sd < n);
   1082  1.1  oster 
   1083  1.1  oster   /*
   1084  1.1  oster    * v of every k accesses should be redirected
   1085  1.1  oster    *
   1086  1.1  oster    * v/k := (n-1-sd)/(n-1)
   1087  1.1  oster    */
   1088  1.1  oster   v = (n-1-sd);
   1089  1.1  oster   k = (n-1);
   1090  1.1  oster 
   1091  1.1  oster #if 1
   1092  1.1  oster   /*
   1093  1.1  oster    * XXX
   1094  1.1  oster    * Is this worth it?
   1095  1.1  oster    *
   1096  1.1  oster    * Now reduce the fraction, by repeatedly factoring
   1097  1.1  oster    * out primes (just like they teach in elementary school!)
   1098  1.1  oster    */
   1099  1.1  oster   for(i=0;i<NLOWPRIMES;i++) {
   1100  1.1  oster     if (lowprimes[i] > v)
   1101  1.1  oster       break;
   1102  1.1  oster     while (((v%lowprimes[i])==0) && ((k%lowprimes[i])==0)) {
   1103  1.1  oster       v /= lowprimes[i];
   1104  1.1  oster       k /= lowprimes[i];
   1105  1.1  oster     }
   1106  1.1  oster   }
   1107  1.1  oster #endif
   1108  1.1  oster 
   1109  1.1  oster   raidPtr->hist_diskreq[row][d]++;
   1110  1.1  oster   if (raidPtr->hist_diskreq[row][d] > v) {
   1111  1.1  oster     ret = 0; /* do not redirect */
   1112  1.1  oster   }
   1113  1.1  oster   else {
   1114  1.1  oster     ret = 1; /* redirect */
   1115  1.1  oster   }
   1116  1.1  oster 
   1117  1.1  oster #if 0
   1118  1.1  oster   printf("d=%d f=%d sd=%d v=%d k=%d ret=%d h=%d\n", d, f, sd, v, k, ret,
   1119  1.1  oster     raidPtr->hist_diskreq[row][d]);
   1120  1.1  oster #endif
   1121  1.1  oster 
   1122  1.1  oster   if (raidPtr->hist_diskreq[row][d] >= k) {
   1123  1.1  oster     /* reset counter */
   1124  1.1  oster     raidPtr->hist_diskreq[row][d] = 0;
   1125  1.1  oster   }
   1126  1.1  oster 
   1127  1.1  oster   return(ret);
   1128  1.1  oster }
   1129  1.1  oster 
   1130  1.1  oster /*
   1131  1.1  oster  * Disk selection routines
   1132  1.1  oster  */
   1133  1.1  oster 
   1134  1.1  oster /*
   1135  1.1  oster  * Selects the disk with the shortest queue from a mirror pair.
   1136  1.1  oster  * Both the disk I/Os queued in RAIDframe as well as those at the physical
   1137  1.1  oster  * disk are counted as members of the "queue"
   1138  1.1  oster  */
   1139  1.1  oster void rf_SelectMirrorDiskIdle(RF_DagNode_t *node)
   1140  1.1  oster {
   1141  1.1  oster   RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
   1142  1.1  oster   RF_RowCol_t rowData, colData, rowMirror, colMirror;
   1143  1.1  oster   int dataQueueLength, mirrorQueueLength, usemirror;
   1144  1.1  oster   RF_PhysDiskAddr_t *data_pda   = (RF_PhysDiskAddr_t *)node->params[0].p;
   1145  1.1  oster   RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *)node->params[4].p;
   1146  1.1  oster   RF_PhysDiskAddr_t *tmp_pda;
   1147  1.1  oster   RF_RaidDisk_t **disks = raidPtr->Disks;
   1148  1.1  oster   RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue;
   1149  1.1  oster 
   1150  1.1  oster   /* return the [row col] of the disk with the shortest queue */
   1151  1.1  oster   rowData = data_pda->row;
   1152  1.1  oster   colData = data_pda->col;
   1153  1.1  oster   rowMirror = mirror_pda->row;
   1154  1.1  oster   colMirror = mirror_pda->col;
   1155  1.1  oster   dataQueue   = &(dqs[rowData][colData]);
   1156  1.1  oster   mirrorQueue = &(dqs[rowMirror][colMirror]);
   1157  1.1  oster 
   1158  1.1  oster #ifdef RF_LOCK_QUEUES_TO_READ_LEN
   1159  1.1  oster   RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
   1160  1.1  oster #endif /* RF_LOCK_QUEUES_TO_READ_LEN */
   1161  1.1  oster   dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding;
   1162  1.1  oster #ifdef RF_LOCK_QUEUES_TO_READ_LEN
   1163  1.1  oster   RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
   1164  1.1  oster   RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
   1165  1.1  oster #endif /* RF_LOCK_QUEUES_TO_READ_LEN */
   1166  1.1  oster   mirrorQueueLength = mirrorQueue->queueLength + mirrorQueue->numOutstanding;
   1167  1.1  oster #ifdef RF_LOCK_QUEUES_TO_READ_LEN
   1168  1.1  oster   RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
   1169  1.1  oster #endif /* RF_LOCK_QUEUES_TO_READ_LEN */
   1170  1.1  oster 
   1171  1.1  oster   usemirror = 0;
   1172  1.1  oster   if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) {
   1173  1.1  oster     usemirror = 0;
   1174  1.1  oster   }
   1175  1.1  oster   else if (RF_DEAD_DISK(disks[rowData][colData].status)) {
   1176  1.1  oster     usemirror = 1;
   1177  1.1  oster   }
   1178  1.1  oster   else if (dataQueueLength < mirrorQueueLength) {
   1179  1.1  oster     usemirror = 0;
   1180  1.1  oster   }
   1181  1.1  oster   else if (mirrorQueueLength < dataQueueLength) {
   1182  1.1  oster     usemirror = 1;
   1183  1.1  oster   }
   1184  1.1  oster   else {
   1185  1.1  oster     /* queues are equal length. attempt cleverness. */
   1186  1.1  oster     if (SNUM_DIFF(dataQueue->last_deq_sector,data_pda->startSector)
   1187  1.1  oster         <= SNUM_DIFF(mirrorQueue->last_deq_sector,mirror_pda->startSector))
   1188  1.1  oster     {
   1189  1.1  oster       usemirror = 0;
   1190  1.1  oster     }
   1191  1.1  oster     else {
   1192  1.1  oster       usemirror = 1;
   1193  1.1  oster     }
   1194  1.1  oster   }
   1195  1.1  oster 
   1196  1.1  oster   if (usemirror) {
   1197  1.1  oster     /* use mirror (parity) disk, swap params 0 & 4 */
   1198  1.1  oster     tmp_pda = data_pda;
   1199  1.1  oster     node->params[0].p = mirror_pda;
   1200  1.1  oster     node->params[4].p = tmp_pda;
   1201  1.1  oster   }
   1202  1.1  oster   else {
   1203  1.1  oster     /* use data disk, leave param 0 unchanged */
   1204  1.1  oster   }
   1205  1.1  oster   /* printf("dataQueueLength %d, mirrorQueueLength %d\n",dataQueueLength, mirrorQueueLength); */
   1206  1.1  oster }
   1207  1.1  oster 
   1208  1.1  oster /*
   1209  1.1  oster  * Do simple partitioning. This assumes that
   1210  1.1  oster  * the data and parity disks are laid out identically.
   1211  1.1  oster  */
   1212  1.1  oster void rf_SelectMirrorDiskPartition(RF_DagNode_t *node)
   1213  1.1  oster {
   1214  1.1  oster   RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
   1215  1.1  oster   RF_RowCol_t rowData, colData, rowMirror, colMirror;
   1216  1.1  oster   RF_PhysDiskAddr_t *data_pda   = (RF_PhysDiskAddr_t *)node->params[0].p;
   1217  1.1  oster   RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *)node->params[4].p;
   1218  1.1  oster   RF_PhysDiskAddr_t *tmp_pda;
   1219  1.1  oster   RF_RaidDisk_t **disks = raidPtr->Disks;
   1220  1.1  oster   RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue;
   1221  1.1  oster   int usemirror;
   1222  1.1  oster 
   1223  1.1  oster   /* return the [row col] of the disk with the shortest queue */
   1224  1.1  oster   rowData = data_pda->row;
   1225  1.1  oster   colData = data_pda->col;
   1226  1.1  oster   rowMirror = mirror_pda->row;
   1227  1.1  oster   colMirror = mirror_pda->col;
   1228  1.1  oster   dataQueue   = &(dqs[rowData][colData]);
   1229  1.1  oster   mirrorQueue = &(dqs[rowMirror][colMirror]);
   1230  1.1  oster 
   1231  1.1  oster   usemirror = 0;
   1232  1.1  oster   if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) {
   1233  1.1  oster     usemirror = 0;
   1234  1.1  oster   }
   1235  1.1  oster   else if (RF_DEAD_DISK(disks[rowData][colData].status)) {
   1236  1.1  oster     usemirror = 1;
   1237  1.1  oster   }
   1238  1.1  oster   else if (data_pda->startSector < (disks[rowData][colData].numBlocks / 2)) {
   1239  1.1  oster     usemirror = 0;
   1240  1.1  oster   }
   1241  1.1  oster   else {
   1242  1.1  oster     usemirror = 1;
   1243  1.1  oster   }
   1244  1.1  oster 
   1245  1.1  oster   if (usemirror) {
   1246  1.1  oster     /* use mirror (parity) disk, swap params 0 & 4 */
   1247  1.1  oster     tmp_pda = data_pda;
   1248  1.1  oster     node->params[0].p = mirror_pda;
   1249  1.1  oster     node->params[4].p = tmp_pda;
   1250  1.1  oster   }
   1251  1.1  oster   else {
   1252  1.1  oster     /* use data disk, leave param 0 unchanged */
   1253  1.1  oster   }
   1254  1.1  oster }
   1255