Home | History | Annotate | Line # | Download | only in raidframe
rf_states.c revision 1.4
      1  1.4     oster /*	$NetBSD: rf_states.c,v 1.4 1999/01/26 02:34:02 oster Exp $	*/
      2  1.1     oster /*
      3  1.1     oster  * Copyright (c) 1995 Carnegie-Mellon University.
      4  1.1     oster  * All rights reserved.
      5  1.1     oster  *
      6  1.1     oster  * Author: Mark Holland, William V. Courtright II, Robby Findler
      7  1.1     oster  *
      8  1.1     oster  * Permission to use, copy, modify and distribute this software and
      9  1.1     oster  * its documentation is hereby granted, provided that both the copyright
     10  1.1     oster  * notice and this permission notice appear in all copies of the
     11  1.1     oster  * software, derivative works or modified versions, and any portions
     12  1.1     oster  * thereof, and that both notices appear in supporting documentation.
     13  1.1     oster  *
     14  1.1     oster  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  1.1     oster  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  1.1     oster  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  1.1     oster  *
     18  1.1     oster  * Carnegie Mellon requests users of this software to return to
     19  1.1     oster  *
     20  1.1     oster  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  1.1     oster  *  School of Computer Science
     22  1.1     oster  *  Carnegie Mellon University
     23  1.1     oster  *  Pittsburgh PA 15213-3890
     24  1.1     oster  *
     25  1.1     oster  * any improvements or extensions that they make and grant Carnegie the
     26  1.1     oster  * rights to redistribute these changes.
     27  1.1     oster  */
     28  1.1     oster 
     29  1.1     oster #include <sys/errno.h>
     30  1.1     oster 
     31  1.1     oster #include "rf_archs.h"
     32  1.1     oster #include "rf_threadstuff.h"
     33  1.1     oster #include "rf_raid.h"
     34  1.1     oster #include "rf_dag.h"
     35  1.1     oster #include "rf_desc.h"
     36  1.1     oster #include "rf_aselect.h"
     37  1.1     oster #include "rf_threadid.h"
     38  1.1     oster #include "rf_general.h"
     39  1.1     oster #include "rf_states.h"
     40  1.1     oster #include "rf_dagutils.h"
     41  1.1     oster #include "rf_driver.h"
     42  1.1     oster #include "rf_engine.h"
     43  1.1     oster #include "rf_map.h"
     44  1.1     oster #include "rf_etimer.h"
     45  1.1     oster 
     46  1.1     oster #if defined(KERNEL) && (DKUSAGE > 0)
     47  1.1     oster #include <sys/dkusage.h>
     48  1.1     oster #include <io/common/iotypes.h>
     49  1.1     oster #include <io/cam/dec_cam.h>
     50  1.1     oster #include <io/cam/cam.h>
     51  1.1     oster #include <io/cam/pdrv.h>
     52  1.1     oster #endif /* KERNEL && DKUSAGE > 0 */
     53  1.1     oster 
     54  1.1     oster /* prototypes for some of the available states.
     55  1.1     oster 
     56  1.1     oster    States must:
     57  1.1     oster 
     58  1.1     oster      - not block.
     59  1.1     oster 
     60  1.1     oster      - either schedule rf_ContinueRaidAccess as a callback and return
     61  1.1     oster        RF_TRUE, or complete all of their work and return RF_FALSE.
     62  1.1     oster 
     63  1.1     oster      - increment desc->state when they have finished their work.
     64  1.1     oster */
     65  1.1     oster 
     66  1.1     oster static char *StateName(RF_AccessState_t state)
     67  1.1     oster {
     68  1.1     oster   switch (state) {
     69  1.1     oster     case rf_QuiesceState:            return "QuiesceState";
     70  1.1     oster     case rf_MapState:                return "MapState";
     71  1.1     oster     case rf_LockState:               return "LockState";
     72  1.1     oster     case rf_CreateDAGState:          return "CreateDAGState";
     73  1.1     oster     case rf_ExecuteDAGState:         return "ExecuteDAGState";
     74  1.1     oster     case rf_ProcessDAGState:         return "ProcessDAGState";
     75  1.1     oster     case rf_CleanupState:            return "CleanupState";
     76  1.1     oster     case rf_LastState:               return "LastState";
     77  1.1     oster     case rf_IncrAccessesCountState:  return "IncrAccessesCountState";
     78  1.1     oster     case rf_DecrAccessesCountState:  return "DecrAccessesCountState";
     79  1.1     oster     default:                         return "!!! UnnamedState !!!";
     80  1.1     oster   }
     81  1.1     oster }
     82  1.1     oster 
     83  1.1     oster void rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc)
     84  1.1     oster {
     85  1.1     oster   int suspended = RF_FALSE;
     86  1.1     oster   int current_state_index = desc->state;
     87  1.1     oster   RF_AccessState_t current_state = desc->states[current_state_index];
     88  1.1     oster 
     89  1.1     oster   do {
     90  1.1     oster 
     91  1.1     oster     current_state_index = desc->state;
     92  1.1     oster     current_state = desc->states [current_state_index];
     93  1.1     oster 
     94  1.1     oster     switch (current_state) {
     95  1.1     oster 
     96  1.1     oster     case rf_QuiesceState: 		 suspended = rf_State_Quiesce(desc);
     97  1.1     oster 				 break;
     98  1.1     oster     case rf_IncrAccessesCountState: suspended = rf_State_IncrAccessCount(desc);
     99  1.1     oster 				 break;
    100  1.1     oster     case rf_MapState:		 suspended = rf_State_Map(desc);
    101  1.1     oster 				 break;
    102  1.1     oster     case rf_LockState:		 suspended = rf_State_Lock(desc);
    103  1.1     oster 				 break;
    104  1.1     oster     case rf_CreateDAGState:	 suspended = rf_State_CreateDAG(desc);
    105  1.1     oster 				 break;
    106  1.1     oster     case rf_ExecuteDAGState:	 suspended = rf_State_ExecuteDAG(desc);
    107  1.1     oster 				 break;
    108  1.1     oster     case rf_ProcessDAGState:	 suspended = rf_State_ProcessDAG(desc);
    109  1.1     oster 				 break;
    110  1.1     oster     case rf_CleanupState: 	 suspended = rf_State_Cleanup(desc);
    111  1.1     oster 				 break;
    112  1.1     oster     case rf_DecrAccessesCountState: suspended = rf_State_DecrAccessCount(desc);
    113  1.1     oster 				 break;
    114  1.1     oster     case rf_LastState:		 suspended = rf_State_LastState(desc);
    115  1.1     oster 				 break;
    116  1.1     oster     }
    117  1.1     oster 
    118  1.1     oster     /* after this point, we cannot dereference desc since desc may
    119  1.1     oster        have been freed. desc is only freed in LastState, so if we
    120  1.1     oster        renter this function or loop back up, desc should be valid. */
    121  1.1     oster 
    122  1.1     oster     if (rf_printStatesDebug) {
    123  1.1     oster       int tid;
    124  1.1     oster       rf_get_threadid (tid);
    125  1.1     oster 
    126  1.1     oster       printf ("[%d] State: %-24s StateIndex: %3i desc: 0x%ld %s\n",
    127  1.1     oster 	      tid, StateName(current_state), current_state_index, (long)desc,
    128  1.1     oster 	      suspended ? "callback scheduled" : "looping");
    129  1.1     oster     }
    130  1.1     oster   } while (!suspended && current_state != rf_LastState);
    131  1.1     oster 
    132  1.1     oster   return;
    133  1.1     oster }
    134  1.1     oster 
    135  1.1     oster 
    136  1.1     oster void rf_ContinueDagAccess (RF_DagList_t *dagList)
    137  1.1     oster {
    138  1.1     oster   RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec);
    139  1.1     oster   RF_RaidAccessDesc_t *desc;
    140  1.1     oster   RF_DagHeader_t *dag_h;
    141  1.1     oster   RF_Etimer_t timer;
    142  1.1     oster   int i;
    143  1.1     oster 
    144  1.1     oster   desc = dagList->desc;
    145  1.1     oster 
    146  1.1     oster   timer = tracerec->timer;
    147  1.1     oster   RF_ETIMER_STOP(timer);
    148  1.1     oster   RF_ETIMER_EVAL(timer);
    149  1.1     oster   tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer);
    150  1.1     oster   RF_ETIMER_START(tracerec->timer);
    151  1.1     oster 
    152  1.1     oster   /* skip to dag which just finished */
    153  1.1     oster   dag_h = dagList->dags;
    154  1.1     oster   for (i = 0; i < dagList->numDagsDone; i++) {
    155  1.1     oster     dag_h = dag_h->next;
    156  1.1     oster   }
    157  1.1     oster 
    158  1.1     oster   /* check to see if retry is required */
    159  1.1     oster   if (dag_h->status == rf_rollBackward) {
    160  1.1     oster     /* when a dag fails, mark desc status as bad and allow all other dags
    161  1.1     oster      * in the desc to execute to completion.  then, free all dags and start over */
    162  1.1     oster     desc->status = 1;  /* bad status */
    163  1.1     oster #if RF_DEMO > 0
    164  1.1     oster     if (!rf_demoMode)
    165  1.1     oster #endif /* RF_DEMO > 0 */
    166  1.1     oster     {
    167  1.1     oster       printf("[%d] DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n",
    168  1.1     oster 	     desc->tid, desc->type, (long)desc->raidAddress,
    169  1.1     oster 	     (long)desc->raidAddress,(int)desc->numBlocks,
    170  1.1     oster 	     (int)desc->numBlocks, (unsigned long) (desc->bufPtr));
    171  1.1     oster     }
    172  1.1     oster   }
    173  1.1     oster 
    174  1.1     oster   dagList->numDagsDone++;
    175  1.1     oster   rf_ContinueRaidAccess(desc);
    176  1.1     oster }
    177  1.1     oster 
    178  1.1     oster 
    179  1.1     oster int rf_State_LastState(RF_RaidAccessDesc_t *desc)
    180  1.1     oster {
    181  1.1     oster   void (*callbackFunc)(RF_CBParam_t) = desc->callbackFunc;
    182  1.2  drochner   RF_CBParam_t callbackArg;
    183  1.2  drochner 
    184  1.2  drochner   callbackArg.p = desc->callbackArg;
    185  1.1     oster 
    186  1.1     oster   if (!(desc->flags & RF_DAG_TEST_ACCESS)) {/* don't biodone if this */
    187  1.1     oster #if DKUSAGE > 0
    188  1.1     oster     RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid,(struct buf *)desc->bp);
    189  1.1     oster #else
    190  1.1     oster     RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid);
    191  1.1     oster #endif /* DKUSAGE > 0 */
    192  1.3  explorer 
    193  1.3  explorer     /*
    194  1.3  explorer      * If this is not an async request, wake up the caller
    195  1.3  explorer      */
    196  1.3  explorer     if (desc->async_flag == 0)
    197  1.3  explorer     	wakeup(desc->bp);
    198  1.3  explorer 
    199  1.1     oster     /*     printf("Calling biodone on 0x%x\n",desc->bp); */
    200  1.1     oster     biodone(desc->bp); 			/* access came through ioctl */
    201  1.1     oster   }
    202  1.1     oster 
    203  1.1     oster   if (callbackFunc) callbackFunc(callbackArg);
    204  1.1     oster   rf_FreeRaidAccDesc(desc);
    205  1.1     oster 
    206  1.1     oster   return RF_FALSE;
    207  1.1     oster }
    208  1.1     oster 
    209  1.1     oster int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc)
    210  1.1     oster {
    211  1.1     oster   RF_Raid_t *raidPtr;
    212  1.1     oster 
    213  1.1     oster   raidPtr = desc->raidPtr;
    214  1.1     oster   /* Bummer. We have to do this to be 100% safe w.r.t. the increment below */
    215  1.1     oster   RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
    216  1.1     oster   raidPtr->accs_in_flight++; /* used to detect quiescence */
    217  1.1     oster   RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
    218  1.1     oster 
    219  1.1     oster   desc->state++;
    220  1.1     oster   return RF_FALSE;
    221  1.1     oster }
    222  1.1     oster 
    223  1.1     oster int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc)
    224  1.1     oster {
    225  1.1     oster   RF_Raid_t *raidPtr;
    226  1.1     oster 
    227  1.1     oster   raidPtr = desc->raidPtr;
    228  1.1     oster 
    229  1.1     oster   RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
    230  1.1     oster   raidPtr->accs_in_flight--;
    231  1.1     oster   if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0)  {
    232  1.1     oster     rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc);
    233  1.1     oster   }
    234  1.1     oster   rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks);
    235  1.1     oster   RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
    236  1.1     oster 
    237  1.1     oster   desc->state++;
    238  1.1     oster   return RF_FALSE;
    239  1.1     oster }
    240  1.1     oster 
    241  1.1     oster int rf_State_Quiesce(RF_RaidAccessDesc_t *desc)
    242  1.1     oster {
    243  1.1     oster   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    244  1.1     oster   RF_Etimer_t timer;
    245  1.1     oster   int suspended = RF_FALSE;
    246  1.1     oster   RF_Raid_t *raidPtr;
    247  1.1     oster 
    248  1.1     oster   raidPtr = desc->raidPtr;
    249  1.1     oster 
    250  1.1     oster   RF_ETIMER_START(timer);
    251  1.1     oster   RF_ETIMER_START(desc->timer);
    252  1.1     oster 
    253  1.1     oster   RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
    254  1.1     oster   if (raidPtr->accesses_suspended) {
    255  1.1     oster     RF_CallbackDesc_t *cb;
    256  1.1     oster     cb = rf_AllocCallbackDesc();
    257  1.1     oster     /* XXX the following cast is quite bogus...  rf_ContinueRaidAccess
    258  1.1     oster        takes a (RF_RaidAccessDesc_t *) as an argument..  GO */
    259  1.1     oster     cb->callbackFunc = (void (*)(RF_CBParam_t))rf_ContinueRaidAccess;
    260  1.1     oster     cb->callbackArg.p  = (void *) desc;
    261  1.1     oster     cb->next = raidPtr->quiesce_wait_list;
    262  1.1     oster     raidPtr->quiesce_wait_list = cb;
    263  1.1     oster     suspended = RF_TRUE;
    264  1.1     oster   }
    265  1.1     oster 
    266  1.1     oster   RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
    267  1.1     oster 
    268  1.1     oster   RF_ETIMER_STOP(timer);
    269  1.1     oster   RF_ETIMER_EVAL(timer);
    270  1.1     oster   tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer);
    271  1.1     oster 
    272  1.1     oster   if (suspended && rf_quiesceDebug)
    273  1.1     oster     printf("Stalling access due to quiescence lock\n");
    274  1.1     oster 
    275  1.1     oster   desc->state++;
    276  1.1     oster   return suspended;
    277  1.1     oster }
    278  1.1     oster 
    279  1.1     oster int rf_State_Map(RF_RaidAccessDesc_t *desc)
    280  1.1     oster {
    281  1.1     oster   RF_Raid_t *raidPtr               = desc->raidPtr;
    282  1.1     oster   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    283  1.1     oster   RF_Etimer_t timer;
    284  1.1     oster 
    285  1.1     oster   RF_ETIMER_START(timer);
    286  1.1     oster 
    287  1.1     oster   if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks,
    288  1.1     oster 			      desc->bufPtr, RF_DONT_REMAP)))
    289  1.1     oster     RF_PANIC();
    290  1.1     oster 
    291  1.1     oster   RF_ETIMER_STOP(timer);
    292  1.1     oster   RF_ETIMER_EVAL(timer);
    293  1.1     oster   tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer);
    294  1.1     oster 
    295  1.1     oster   desc->state ++;
    296  1.1     oster   return RF_FALSE;
    297  1.1     oster }
    298  1.1     oster 
    299  1.1     oster int rf_State_Lock(RF_RaidAccessDesc_t *desc)
    300  1.1     oster {
    301  1.1     oster   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    302  1.1     oster   RF_Raid_t *raidPtr               = desc->raidPtr;
    303  1.1     oster   RF_AccessStripeMapHeader_t *asmh = desc->asmap;
    304  1.1     oster   RF_AccessStripeMap_t *asm_p;
    305  1.1     oster   RF_Etimer_t timer;
    306  1.1     oster   int suspended = RF_FALSE;
    307  1.1     oster 
    308  1.1     oster   RF_ETIMER_START(timer);
    309  1.1     oster   if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
    310  1.1     oster     RF_StripeNum_t lastStripeID = -1;
    311  1.1     oster 
    312  1.1     oster     /* acquire each lock that we don't already hold */
    313  1.1     oster     for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
    314  1.1     oster       RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
    315  1.1     oster       if (!rf_suppressLocksAndLargeWrites &&
    316  1.1     oster           asm_p->parityInfo &&
    317  1.1     oster           !(desc->flags& RF_DAG_SUPPRESS_LOCKS) &&
    318  1.1     oster           !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED))
    319  1.1     oster       {
    320  1.1     oster         asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
    321  1.1     oster         RF_ASSERT(asm_p->stripeID > lastStripeID); /* locks must be acquired
    322  1.1     oster 						   hierarchically */
    323  1.1     oster         lastStripeID = asm_p->stripeID;
    324  1.1     oster 	/* XXX the cast to (void (*)(RF_CBParam_t)) below is bogus!  GO */
    325  1.1     oster         RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type,
    326  1.1     oster             (void (*)(struct buf *))rf_ContinueRaidAccess, desc, asm_p,
    327  1.1     oster             raidPtr->Layout.dataSectorsPerStripe);
    328  1.1     oster         if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID,
    329  1.1     oster             &asm_p->lockReqDesc))
    330  1.1     oster         {
    331  1.1     oster           suspended = RF_TRUE;
    332  1.1     oster           break;
    333  1.1     oster         }
    334  1.1     oster       }
    335  1.1     oster 
    336  1.1     oster       if (desc->type == RF_IO_TYPE_WRITE &&
    337  1.1     oster           raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing)
    338  1.1     oster       {
    339  1.1     oster         if (! (asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED) ) {
    340  1.1     oster           int val;
    341  1.1     oster 
    342  1.1     oster           asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED;
    343  1.1     oster 	  /* XXX the cast below is quite bogus!!! XXX  GO */
    344  1.1     oster           val = rf_ForceOrBlockRecon(raidPtr, asm_p,
    345  1.1     oster 		 (void (*)(RF_Raid_t *,void *))rf_ContinueRaidAccess, desc);
    346  1.1     oster           if (val == 0) {
    347  1.1     oster             asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED;
    348  1.1     oster           }
    349  1.1     oster           else {
    350  1.1     oster             suspended = RF_TRUE;
    351  1.1     oster             break;
    352  1.1     oster           }
    353  1.1     oster         }
    354  1.1     oster         else {
    355  1.1     oster           if (rf_pssDebug) {
    356  1.1     oster             printf("[%d] skipping force/block because already done, psid %ld\n",
    357  1.1     oster                 desc->tid,(long)asm_p->stripeID);
    358  1.1     oster           }
    359  1.1     oster         }
    360  1.1     oster       }
    361  1.1     oster       else {
    362  1.1     oster         if (rf_pssDebug) {
    363  1.1     oster           printf("[%d] skipping force/block because not write or not under recon, psid %ld\n",
    364  1.1     oster               desc->tid,(long)asm_p->stripeID);
    365  1.1     oster         }
    366  1.1     oster       }
    367  1.1     oster     }
    368  1.1     oster 
    369  1.1     oster     RF_ETIMER_STOP(timer);
    370  1.1     oster     RF_ETIMER_EVAL(timer);
    371  1.1     oster     tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
    372  1.1     oster 
    373  1.1     oster     if (suspended)
    374  1.1     oster       return(RF_TRUE);
    375  1.1     oster   }
    376  1.1     oster 
    377  1.1     oster   desc->state++;
    378  1.1     oster   return(RF_FALSE);
    379  1.1     oster }
    380  1.1     oster 
    381  1.1     oster /*
    382  1.1     oster  * the following three states create, execute, and post-process dags
    383  1.1     oster  * the error recovery unit is a single dag.
    384  1.1     oster  * by default, SelectAlgorithm creates an array of dags, one per parity stripe
    385  1.1     oster  * in some tricky cases, multiple dags per stripe are created
    386  1.1     oster  *   - dags within a parity stripe are executed sequentially (arbitrary order)
    387  1.1     oster  *   - dags for distinct parity stripes are executed concurrently
    388  1.1     oster  *
    389  1.1     oster  * repeat until all dags complete successfully -or- dag selection fails
    390  1.1     oster  *
    391  1.1     oster  * while !done
    392  1.1     oster  *   create dag(s) (SelectAlgorithm)
    393  1.1     oster  *   if dag
    394  1.1     oster  *     execute dag (DispatchDAG)
    395  1.1     oster  *     if dag successful
    396  1.1     oster  *       done (SUCCESS)
    397  1.1     oster  *     else
    398  1.1     oster  *       !done (RETRY - start over with new dags)
    399  1.1     oster  *   else
    400  1.1     oster  *     done (FAIL)
    401  1.1     oster  */
    402  1.1     oster int rf_State_CreateDAG (RF_RaidAccessDesc_t *desc)
    403  1.1     oster {
    404  1.1     oster   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    405  1.1     oster   RF_Etimer_t timer;
    406  1.1     oster   RF_DagHeader_t *dag_h;
    407  1.1     oster   int i, selectStatus;
    408  1.1     oster 
    409  1.1     oster   /* generate a dag for the access, and fire it off.  When the dag
    410  1.1     oster      completes, we'll get re-invoked in the next state. */
    411  1.1     oster   RF_ETIMER_START(timer);
    412  1.1     oster   /* SelectAlgorithm returns one or more dags */
    413  1.1     oster   selectStatus = rf_SelectAlgorithm(desc, desc->flags|RF_DAG_SUPPRESS_LOCKS);
    414  1.1     oster   if (rf_printDAGsDebug)
    415  1.1     oster     for (i = 0; i < desc->numStripes; i++)
    416  1.1     oster       rf_PrintDAGList(desc->dagArray[i].dags);
    417  1.1     oster   RF_ETIMER_STOP(timer);
    418  1.1     oster   RF_ETIMER_EVAL(timer);
    419  1.1     oster   /* update time to create all dags */
    420  1.1     oster   tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer);
    421  1.1     oster 
    422  1.1     oster   desc->status = 0; /* good status */
    423  1.1     oster 
    424  1.1     oster   if (selectStatus) {
    425  1.1     oster     /* failed to create a dag */
    426  1.1     oster     /* this happens when there are too many faults or incomplete dag libraries */
    427  1.1     oster     printf("[Failed to create a DAG\n]");
    428  1.1     oster     RF_PANIC();
    429  1.1     oster   }
    430  1.1     oster   else {
    431  1.1     oster     /* bind dags to desc */
    432  1.1     oster     for (i = 0; i < desc->numStripes; i++) {
    433  1.1     oster       dag_h = desc->dagArray[i].dags;
    434  1.1     oster       while (dag_h) {
    435  1.1     oster 	dag_h->bp = (struct buf *) desc->bp;
    436  1.1     oster 	dag_h->tracerec = tracerec;
    437  1.1     oster 	dag_h = dag_h->next;
    438  1.1     oster       }
    439  1.1     oster     }
    440  1.1     oster     desc->flags |= RF_DAG_DISPATCH_RETURNED;
    441  1.1     oster     desc->state++;  /* next state should be rf_State_ExecuteDAG */
    442  1.1     oster   }
    443  1.1     oster   return RF_FALSE;
    444  1.1     oster }
    445  1.1     oster 
    446  1.1     oster 
    447  1.1     oster 
    448  1.1     oster /* the access has an array of dagLists, one dagList per parity stripe.
    449  1.1     oster  * fire the first dag in each parity stripe (dagList).
    450  1.1     oster  * dags within a stripe (dagList) must be executed sequentially
    451  1.1     oster  *  - this preserves atomic parity update
    452  1.1     oster  * dags for independents parity groups (stripes) are fired concurrently */
    453  1.1     oster 
    454  1.1     oster int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
    455  1.1     oster {
    456  1.1     oster   int i;
    457  1.1     oster   RF_DagHeader_t *dag_h;
    458  1.1     oster   RF_DagList_t *dagArray = desc->dagArray;
    459  1.1     oster 
    460  1.1     oster   /* next state is always rf_State_ProcessDAG
    461  1.1     oster    * important to do this before firing the first dag
    462  1.1     oster    * (it may finish before we leave this routine) */
    463  1.1     oster   desc->state++;
    464  1.1     oster 
    465  1.1     oster   /* sweep dag array, a stripe at a time, firing the first dag in each stripe */
    466  1.1     oster   for (i = 0; i < desc->numStripes; i++) {
    467  1.1     oster     RF_ASSERT(dagArray[i].numDags > 0);
    468  1.1     oster     RF_ASSERT(dagArray[i].numDagsDone == 0);
    469  1.1     oster     RF_ASSERT(dagArray[i].numDagsFired == 0);
    470  1.1     oster     RF_ETIMER_START(dagArray[i].tracerec.timer);
    471  1.1     oster     /* fire first dag in this stripe */
    472  1.1     oster     dag_h = dagArray[i].dags;
    473  1.1     oster     RF_ASSERT(dag_h);
    474  1.1     oster     dagArray[i].numDagsFired++;
    475  1.1     oster     /* XXX Yet another case where we pass in a conflicting function pointer
    476  1.1     oster        :-(  XXX  GO */
    477  1.1     oster     rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess, &dagArray[i]);
    478  1.1     oster   }
    479  1.1     oster 
    480  1.1     oster   /* the DAG will always call the callback, even if there was no
    481  1.1     oster    * blocking, so we are always suspended in this state */
    482  1.1     oster   return RF_TRUE;
    483  1.1     oster }
    484  1.1     oster 
    485  1.1     oster 
    486  1.1     oster 
    487  1.1     oster /* rf_State_ProcessDAG is entered when a dag completes.
    488  1.1     oster  * first, check to all dags in the access have completed
    489  1.1     oster  * if not, fire as many dags as possible */
    490  1.1     oster 
    491  1.1     oster int rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc)
    492  1.1     oster {
    493  1.1     oster   RF_AccessStripeMapHeader_t *asmh = desc->asmap;
    494  1.1     oster   RF_Raid_t *raidPtr               = desc->raidPtr;
    495  1.1     oster   RF_DagHeader_t *dag_h;
    496  1.1     oster   int i, j, done = RF_TRUE;
    497  1.1     oster   RF_DagList_t *dagArray = desc->dagArray;
    498  1.1     oster   RF_Etimer_t timer;
    499  1.1     oster 
    500  1.1     oster   /* check to see if this is the last dag */
    501  1.1     oster   for (i = 0; i < desc->numStripes; i++)
    502  1.1     oster     if (dagArray[i].numDags != dagArray[i].numDagsDone)
    503  1.1     oster       done = RF_FALSE;
    504  1.1     oster 
    505  1.1     oster   if (done) {
    506  1.1     oster     if (desc->status) {
    507  1.1     oster       /* a dag failed, retry */
    508  1.1     oster       RF_ETIMER_START(timer);
    509  1.1     oster       /* free all dags */
    510  1.1     oster       for (i = 0; i < desc->numStripes; i++) {
    511  1.1     oster 	rf_FreeDAG(desc->dagArray[i].dags);
    512  1.1     oster       }
    513  1.1     oster       rf_MarkFailuresInASMList(raidPtr, asmh);
    514  1.1     oster       /* back up to rf_State_CreateDAG */
    515  1.1     oster       desc->state = desc->state - 2;
    516  1.1     oster       return RF_FALSE;
    517  1.1     oster     }
    518  1.1     oster     else {
    519  1.1     oster       /* move on to rf_State_Cleanup */
    520  1.1     oster       desc->state++;
    521  1.1     oster     }
    522  1.1     oster     return RF_FALSE;
    523  1.1     oster   }
    524  1.1     oster   else {
    525  1.1     oster     /* more dags to execute */
    526  1.1     oster     /* see if any are ready to be fired.  if so, fire them */
    527  1.1     oster     /* don't fire the initial dag in a list, it's fired in rf_State_ExecuteDAG */
    528  1.1     oster     for (i = 0; i < desc->numStripes; i++) {
    529  1.1     oster       if ((dagArray[i].numDagsDone < dagArray[i].numDags)
    530  1.1     oster 	  && (dagArray[i].numDagsDone == dagArray[i].numDagsFired)
    531  1.1     oster 	  && (dagArray[i].numDagsFired > 0)) {
    532  1.1     oster 	RF_ETIMER_START(dagArray[i].tracerec.timer);
    533  1.1     oster 	/* fire next dag in this stripe */
    534  1.1     oster 	/* first, skip to next dag awaiting execution */
    535  1.1     oster 	dag_h = dagArray[i].dags;
    536  1.1     oster 	for (j = 0; j < dagArray[i].numDagsDone; j++)
    537  1.1     oster 	  dag_h = dag_h->next;
    538  1.1     oster 	dagArray[i].numDagsFired++;
    539  1.1     oster 	/* XXX and again we pass a different function pointer.. GO */
    540  1.1     oster 	rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess,
    541  1.1     oster 		       &dagArray[i]);
    542  1.1     oster       }
    543  1.1     oster     }
    544  1.1     oster     return RF_TRUE;
    545  1.1     oster   }
    546  1.1     oster }
    547  1.1     oster 
    548  1.1     oster /* only make it this far if all dags complete successfully */
    549  1.1     oster int rf_State_Cleanup(RF_RaidAccessDesc_t *desc)
    550  1.1     oster {
    551  1.1     oster   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    552  1.1     oster   RF_AccessStripeMapHeader_t *asmh = desc->asmap;
    553  1.1     oster   RF_Raid_t *raidPtr               = desc->raidPtr;
    554  1.1     oster   RF_AccessStripeMap_t *asm_p;
    555  1.1     oster   RF_DagHeader_t *dag_h;
    556  1.1     oster   RF_Etimer_t timer;
    557  1.1     oster   int tid, i;
    558  1.1     oster 
    559  1.1     oster   desc->state ++;
    560  1.1     oster 
    561  1.1     oster   rf_get_threadid(tid);
    562  1.1     oster 
    563  1.1     oster   timer = tracerec->timer;
    564  1.1     oster   RF_ETIMER_STOP(timer);
    565  1.1     oster   RF_ETIMER_EVAL(timer);
    566  1.1     oster   tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer);
    567  1.1     oster 
    568  1.1     oster   /* the RAID I/O is complete.  Clean up. */
    569  1.1     oster   tracerec->specific.user.dag_retry_us = 0;
    570  1.1     oster 
    571  1.1     oster   RF_ETIMER_START(timer);
    572  1.1     oster   if (desc->flags & RF_DAG_RETURN_DAG) {
    573  1.1     oster     /* copy dags into paramDAG */
    574  1.1     oster     *(desc->paramDAG) = desc->dagArray[0].dags;
    575  1.1     oster     dag_h = *(desc->paramDAG);
    576  1.1     oster     for (i = 1; i < desc->numStripes; i++) {
    577  1.1     oster       /* concatenate dags from remaining stripes */
    578  1.1     oster       RF_ASSERT(dag_h);
    579  1.1     oster       while (dag_h->next)
    580  1.1     oster 	dag_h = dag_h->next;
    581  1.1     oster       dag_h->next = desc->dagArray[i].dags;
    582  1.1     oster     }
    583  1.1     oster   }
    584  1.1     oster   else {
    585  1.1     oster     /* free all dags */
    586  1.1     oster     for (i = 0; i < desc->numStripes; i++) {
    587  1.1     oster       rf_FreeDAG(desc->dagArray[i].dags);
    588  1.1     oster     }
    589  1.1     oster   }
    590  1.1     oster 
    591  1.1     oster   RF_ETIMER_STOP(timer);
    592  1.1     oster   RF_ETIMER_EVAL(timer);
    593  1.1     oster   tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer);
    594  1.1     oster 
    595  1.1     oster   RF_ETIMER_START(timer);
    596  1.1     oster   if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
    597  1.1     oster     for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
    598  1.1     oster       if (!rf_suppressLocksAndLargeWrites &&
    599  1.1     oster           asm_p->parityInfo &&
    600  1.1     oster           !(desc->flags&RF_DAG_SUPPRESS_LOCKS))
    601  1.1     oster       {
    602  1.1     oster         RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc);
    603  1.1     oster         rf_ReleaseStripeLock(raidPtr->lockTable, asm_p->stripeID,
    604  1.1     oster             &asm_p->lockReqDesc);
    605  1.1     oster       }
    606  1.1     oster       if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) {
    607  1.1     oster         rf_UnblockRecon(raidPtr, asm_p);
    608  1.1     oster       }
    609  1.1     oster     }
    610  1.1     oster   }
    611  1.1     oster 
    612  1.1     oster   RF_ETIMER_STOP(timer);
    613  1.1     oster   RF_ETIMER_EVAL(timer);
    614  1.1     oster   tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
    615  1.1     oster 
    616  1.1     oster   RF_ETIMER_START(timer);
    617  1.1     oster   if (desc->flags & RF_DAG_RETURN_ASM)
    618  1.1     oster     *(desc->paramASM) = asmh;
    619  1.1     oster   else
    620  1.1     oster     rf_FreeAccessStripeMap(asmh);
    621  1.1     oster   RF_ETIMER_STOP(timer);
    622  1.1     oster   RF_ETIMER_EVAL(timer);
    623  1.1     oster   tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer);
    624  1.1     oster 
    625  1.1     oster   RF_ETIMER_STOP(desc->timer);
    626  1.1     oster   RF_ETIMER_EVAL(desc->timer);
    627  1.1     oster 
    628  1.1     oster   timer = desc->tracerec.tot_timer;
    629  1.1     oster   RF_ETIMER_STOP(timer);
    630  1.1     oster   RF_ETIMER_EVAL(timer);
    631  1.1     oster   desc->tracerec.total_us = RF_ETIMER_VAL_US(timer);
    632  1.1     oster 
    633  1.1     oster   rf_LogTraceRec(raidPtr, tracerec);
    634  1.1     oster 
    635  1.1     oster   desc->flags |= RF_DAG_ACCESS_COMPLETE;
    636  1.1     oster 
    637  1.1     oster   return RF_FALSE;
    638  1.1     oster }
    639