Home | History | Annotate | Line # | Download | only in raidframe
rf_states.c revision 1.1
      1 /*	$NetBSD: rf_states.c,v 1.1 1998/11/13 04:20:34 oster Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland, William V. Courtright II, Robby Findler
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * :
     31  * Log: rf_states.c,v
     32  * Revision 1.45  1996/07/28 20:31:39  jimz
     33  * i386netbsd port
     34  * true/false fixup
     35  *
     36  * Revision 1.44  1996/07/27  23:36:08  jimz
     37  * Solaris port of simulator
     38  *
     39  * Revision 1.43  1996/07/22  19:52:16  jimz
     40  * switched node params to RF_DagParam_t, a union of
     41  * a 64-bit int and a void *, for better portability
     42  * attempted hpux port, but failed partway through for
     43  * lack of a single C compiler capable of compiling all
     44  * source files
     45  *
     46  * Revision 1.42  1996/07/17  21:00:58  jimz
     47  * clean up timer interface, tracing
     48  *
     49  * Revision 1.41  1996/07/11  19:08:00  jimz
     50  * generalize reconstruction mechanism
     51  * allow raid1 reconstructs via copyback (done with array
     52  * quiesced, not online, therefore not disk-directed)
     53  *
     54  * Revision 1.40  1996/06/17  14:38:33  jimz
     55  * properly #if out RF_DEMO code
     56  * fix bug in MakeConfig that was causing weird behavior
     57  * in configuration routines (config was not zeroed at start)
     58  * clean up genplot handling of stacks
     59  *
     60  * Revision 1.39  1996/06/11  18:12:17  jimz
     61  * got rid of evil race condition in LastState
     62  *
     63  * Revision 1.38  1996/06/10  14:18:58  jimz
     64  * move user, throughput stats into per-array structure
     65  *
     66  * Revision 1.37  1996/06/09  02:36:46  jimz
     67  * lots of little crufty cleanup- fixup whitespace
     68  * issues, comment #ifdefs, improve typing in some
     69  * places (esp size-related)
     70  *
     71  * Revision 1.36  1996/06/07  21:33:04  jimz
     72  * begin using consistent types for sector numbers,
     73  * stripe numbers, row+col numbers, recon unit numbers
     74  *
     75  * Revision 1.35  1996/06/05  18:06:02  jimz
     76  * Major code cleanup. The Great Renaming is now done.
     77  * Better modularity. Better typing. Fixed a bunch of
     78  * synchronization bugs. Made a lot of global stuff
     79  * per-desc or per-array. Removed dead code.
     80  *
     81  * Revision 1.34  1996/06/03  23:28:26  jimz
     82  * more bugfixes
     83  * check in tree to sync for IPDS runs with current bugfixes
     84  * there still may be a problem with threads in the script test
     85  * getting I/Os stuck- not trivially reproducible (runs ~50 times
     86  * in a row without getting stuck)
     87  *
     88  * Revision 1.33  1996/05/31  22:26:54  jimz
     89  * fix a lot of mapping problems, memory allocation problems
     90  * found some weird lock issues, fixed 'em
     91  * more code cleanup
     92  *
     93  * Revision 1.32  1996/05/30  12:59:18  jimz
     94  * make etimer happier, more portable
     95  *
     96  * Revision 1.31  1996/05/30  11:29:41  jimz
     97  * Numerous bug fixes. Stripe lock release code disagreed with the taking code
     98  * about when stripes should be locked (I made it consistent: no parity, no lock)
     99  * There was a lot of extra serialization of I/Os which I've removed- a lot of
    100  * it was to calculate values for the cache code, which is no longer with us.
    101  * More types, function, macro cleanup. Added code to properly quiesce the array
    102  * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
    103  * before. Fixed memory allocation, freeing bugs.
    104  *
    105  * Revision 1.30  1996/05/27  18:56:37  jimz
    106  * more code cleanup
    107  * better typing
    108  * compiles in all 3 environments
    109  *
    110  * Revision 1.29  1996/05/24  22:17:04  jimz
    111  * continue code + namespace cleanup
    112  * typed a bunch of flags
    113  *
    114  * Revision 1.28  1996/05/24  04:28:55  jimz
    115  * release cleanup ckpt
    116  *
    117  * Revision 1.27  1996/05/23  21:46:35  jimz
    118  * checkpoint in code cleanup (release prep)
    119  * lots of types, function names have been fixed
    120  *
    121  * Revision 1.26  1996/05/23  00:33:23  jimz
    122  * code cleanup: move all debug decls to rf_options.c, all extern
    123  * debug decls to rf_options.h, all debug vars preceded by rf_
    124  *
    125  * Revision 1.25  1996/05/20  19:31:46  jimz
    126  * straighten out syntax problems
    127  *
    128  * Revision 1.24  1996/05/18  19:51:34  jimz
    129  * major code cleanup- fix syntax, make some types consistent,
    130  * add prototypes, clean out dead code, et cetera
    131  *
    132  * Revision 1.23  1996/05/16  23:37:33  jimz
    133  * fix misspelled "else"
    134  *
    135  * Revision 1.22  1996/05/15  22:33:32  jimz
    136  * appropriately #ifdef cache stuff
    137  *
    138  * Revision 1.21  1996/05/06  22:09:20  wvcii
    139  * rf_State_ExecuteDAG now only executes the first dag
    140  * of each parity stripe in a multi-stripe access
    141  *
    142  * rf_State_ProcessDAG now executes all dags in a
    143  * multi-stripe access except the first dag of each stripe.
    144  *
    145  * Revision 1.20  1995/12/12  18:10:06  jimz
    146  * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
    147  * fix 80-column brain damage in comments
    148  *
    149  * Revision 1.19  1995/11/19  16:29:50  wvcii
    150  * replaced LaunchDAGState with CreateDAGState, ExecuteDAGState
    151  * created rf_ContinueDagAccess
    152  *
    153  * Revision 1.18  1995/11/07  15:37:23  wvcii
    154  * deleted states SendDAGState, RetryDAGState
    155  * added staes: LaunchDAGState, ProcessDAGState
    156  * code no longer has a hard-coded retry count of 1 but will support
    157  * retries until a dag can not be found (selected) to perform the user request
    158  *
    159  * Revision 1.17  1995/10/09  23:36:08  amiri
    160  * *** empty log message ***
    161  *
    162  * Revision 1.16  1995/10/09  18:36:58  jimz
    163  * moved call to StopThroughput for user-level driver to rf_driver.c
    164  *
    165  * Revision 1.15  1995/10/09  18:07:23  wvcii
    166  * lastState now call rf_StopThroughputStats
    167  *
    168  * Revision 1.14  1995/10/05  18:56:31  jimz
    169  * no-op file if !INCLUDE_VS
    170  *
    171  * Revision 1.13  1995/09/30  20:38:24  jimz
    172  * LogTraceRec now takes a Raid * as its first argument
    173  *
    174  * Revision 1.12  1995/09/19  22:58:54  jimz
    175  * integrate DKUSAGE into raidframe
    176  *
    177  * Revision 1.11  1995/09/07  01:26:55  jimz
    178  * Achive basic compilation in kernel. Kernel functionality
    179  * is not guaranteed at all, but it'll compile. Mostly. I hope.
    180  *
    181  * Revision 1.10  1995/07/26  03:28:31  robby
    182  * intermediary checkin
    183  *
    184  * Revision 1.9  1995/07/23  02:50:33  robby
    185  * oops. fixed boo boo
    186  *
    187  * Revision 1.8  1995/07/22  22:54:54  robby
    188  * removed incorrect comment
    189  *
    190  * Revision 1.7  1995/07/21  19:30:26  robby
    191  * added idle state for rf_when-idle.c
    192  *
    193  * Revision 1.6  1995/07/10  19:06:28  rachad
    194  * *** empty log message ***
    195  *
    196  * Revision 1.5  1995/07/10  17:30:38  robby
    197  * added virtual striping lock states
    198  *
    199  * Revision 1.4  1995/07/08  18:05:39  rachad
    200  * Linked up Claudsons code with the real cache
    201  *
    202  * Revision 1.3  1995/07/06  14:38:50  robby
    203  * changed get_thread_id to get_threadid
    204  *
    205  * Revision 1.2  1995/07/06  14:24:15  robby
    206  * added log
    207  *
    208  */
    209 
    210 #ifdef _KERNEL
    211 #define KERNEL
    212 #endif
    213 
    214 #ifdef KERNEL
    215 #ifndef __NetBSD__
    216 #include <dkusage.h>
    217 #endif /* !__NetBSD__ */
    218 #endif /* KERNEL */
    219 
    220 #include <sys/errno.h>
    221 
    222 #include "rf_archs.h"
    223 #include "rf_threadstuff.h"
    224 #include "rf_raid.h"
    225 #include "rf_dag.h"
    226 #include "rf_desc.h"
    227 #include "rf_aselect.h"
    228 #include "rf_threadid.h"
    229 #include "rf_general.h"
    230 #include "rf_states.h"
    231 #include "rf_dagutils.h"
    232 #include "rf_driver.h"
    233 #include "rf_engine.h"
    234 #include "rf_map.h"
    235 #include "rf_etimer.h"
    236 
    237 #if defined(KERNEL) && (DKUSAGE > 0)
    238 #include <sys/dkusage.h>
    239 #include <io/common/iotypes.h>
    240 #include <io/cam/dec_cam.h>
    241 #include <io/cam/cam.h>
    242 #include <io/cam/pdrv.h>
    243 #endif /* KERNEL && DKUSAGE > 0 */
    244 
    245 /* prototypes for some of the available states.
    246 
    247    States must:
    248 
    249      - not block.
    250 
    251      - either schedule rf_ContinueRaidAccess as a callback and return
    252        RF_TRUE, or complete all of their work and return RF_FALSE.
    253 
    254      - increment desc->state when they have finished their work.
    255 */
    256 
    257 
    258 #ifdef SIMULATE
    259 extern int global_async_flag;
    260 #endif /* SIMULATE */
    261 
    262 static char *StateName(RF_AccessState_t state)
    263 {
    264   switch (state) {
    265     case rf_QuiesceState:            return "QuiesceState";
    266     case rf_MapState:                return "MapState";
    267     case rf_LockState:               return "LockState";
    268     case rf_CreateDAGState:          return "CreateDAGState";
    269     case rf_ExecuteDAGState:         return "ExecuteDAGState";
    270     case rf_ProcessDAGState:         return "ProcessDAGState";
    271     case rf_CleanupState:            return "CleanupState";
    272     case rf_LastState:               return "LastState";
    273     case rf_IncrAccessesCountState:  return "IncrAccessesCountState";
    274     case rf_DecrAccessesCountState:  return "DecrAccessesCountState";
    275     default:                         return "!!! UnnamedState !!!";
    276   }
    277 }
    278 
    279 void rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc)
    280 {
    281   int suspended = RF_FALSE;
    282   int current_state_index = desc->state;
    283   RF_AccessState_t current_state = desc->states[current_state_index];
    284 
    285 #ifdef SIMULATE
    286   rf_SetCurrentOwner(desc->owner);
    287 #endif /* SIMULATE */
    288 
    289   do {
    290 
    291     current_state_index = desc->state;
    292     current_state = desc->states [current_state_index];
    293 
    294     switch (current_state) {
    295 
    296     case rf_QuiesceState: 		 suspended = rf_State_Quiesce(desc);
    297 				 break;
    298     case rf_IncrAccessesCountState: suspended = rf_State_IncrAccessCount(desc);
    299 				 break;
    300     case rf_MapState:		 suspended = rf_State_Map(desc);
    301 				 break;
    302     case rf_LockState:		 suspended = rf_State_Lock(desc);
    303 				 break;
    304     case rf_CreateDAGState:	 suspended = rf_State_CreateDAG(desc);
    305 				 break;
    306     case rf_ExecuteDAGState:	 suspended = rf_State_ExecuteDAG(desc);
    307 				 break;
    308     case rf_ProcessDAGState:	 suspended = rf_State_ProcessDAG(desc);
    309 				 break;
    310     case rf_CleanupState: 	 suspended = rf_State_Cleanup(desc);
    311 				 break;
    312     case rf_DecrAccessesCountState: suspended = rf_State_DecrAccessCount(desc);
    313 				 break;
    314     case rf_LastState:		 suspended = rf_State_LastState(desc);
    315 				 break;
    316     }
    317 
    318     /* after this point, we cannot dereference desc since desc may
    319        have been freed. desc is only freed in LastState, so if we
    320        renter this function or loop back up, desc should be valid. */
    321 
    322     if (rf_printStatesDebug) {
    323       int tid;
    324       rf_get_threadid (tid);
    325 
    326       printf ("[%d] State: %-24s StateIndex: %3i desc: 0x%ld %s\n",
    327 	      tid, StateName(current_state), current_state_index, (long)desc,
    328 	      suspended ? "callback scheduled" : "looping");
    329     }
    330   } while (!suspended && current_state != rf_LastState);
    331 
    332   return;
    333 }
    334 
    335 
    336 void rf_ContinueDagAccess (RF_DagList_t *dagList)
    337 {
    338   RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec);
    339   RF_RaidAccessDesc_t *desc;
    340   RF_DagHeader_t *dag_h;
    341   RF_Etimer_t timer;
    342   int i;
    343 
    344   desc = dagList->desc;
    345 
    346   timer = tracerec->timer;
    347   RF_ETIMER_STOP(timer);
    348   RF_ETIMER_EVAL(timer);
    349   tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer);
    350   RF_ETIMER_START(tracerec->timer);
    351 
    352   /* skip to dag which just finished */
    353   dag_h = dagList->dags;
    354   for (i = 0; i < dagList->numDagsDone; i++) {
    355     dag_h = dag_h->next;
    356   }
    357 
    358   /* check to see if retry is required */
    359   if (dag_h->status == rf_rollBackward) {
    360     /* when a dag fails, mark desc status as bad and allow all other dags
    361      * in the desc to execute to completion.  then, free all dags and start over */
    362     desc->status = 1;  /* bad status */
    363 #if RF_DEMO > 0
    364     if (!rf_demoMode)
    365 #endif /* RF_DEMO > 0 */
    366     {
    367       printf("[%d] DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n",
    368 	     desc->tid, desc->type, (long)desc->raidAddress,
    369 	     (long)desc->raidAddress,(int)desc->numBlocks,
    370 	     (int)desc->numBlocks, (unsigned long) (desc->bufPtr));
    371     }
    372   }
    373 
    374   dagList->numDagsDone++;
    375   rf_ContinueRaidAccess(desc);
    376 }
    377 
    378 
    379 int rf_State_LastState(RF_RaidAccessDesc_t *desc)
    380 {
    381   void (*callbackFunc)(RF_CBParam_t) = desc->callbackFunc;
    382   void *callbackArg = desc->callbackArg;
    383 
    384 #ifdef SIMULATE
    385   int tid;
    386   rf_get_threadid(tid);
    387 
    388   if (rf_accessDebug)
    389     printf("async_flag set to  %d\n",global_async_flag);
    390   global_async_flag=desc->async_flag;
    391   if (rf_accessDebug)
    392     printf("Will now do clean up for %d\n",rf_GetCurrentOwner());
    393   rf_FreeRaidAccDesc(desc);
    394 
    395   if (callbackFunc)
    396     callbackFunc(callbackArg);
    397 #else /* SIMULATE */
    398 
    399 #ifndef KERNEL
    400 
    401   if (!(desc->flags & RF_DAG_NONBLOCKING_IO)) {
    402     /* bummer that we have to take another lock here */
    403     RF_LOCK_MUTEX(desc->mutex);
    404     RF_ASSERT(desc->flags&RF_DAG_ACCESS_COMPLETE);
    405     RF_SIGNAL_COND(desc->cond);  /* DoAccess frees the desc in the blocking-I/O case */
    406     RF_UNLOCK_MUTEX(desc->mutex);
    407   }
    408   else
    409     rf_FreeRaidAccDesc(desc);
    410 
    411   if (callbackFunc)
    412     callbackFunc(callbackArg);
    413 
    414 #else  /* KERNEL */
    415   if (!(desc->flags & RF_DAG_TEST_ACCESS)) {/* don't biodone if this */
    416 #if DKUSAGE > 0
    417     RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid,(struct buf *)desc->bp);
    418 #else
    419     RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid);
    420 #endif /* DKUSAGE > 0 */
    421     /*     printf("Calling biodone on 0x%x\n",desc->bp); */
    422     biodone(desc->bp); 			/* access came through ioctl */
    423   }
    424 
    425   if (callbackFunc) callbackFunc(callbackArg);
    426   rf_FreeRaidAccDesc(desc);
    427 
    428 #endif /* ! KERNEL */
    429 #endif /* SIMULATE */
    430 
    431   return RF_FALSE;
    432 }
    433 
    434 int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc)
    435 {
    436   RF_Raid_t *raidPtr;
    437 
    438   raidPtr = desc->raidPtr;
    439   /* Bummer. We have to do this to be 100% safe w.r.t. the increment below */
    440   RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
    441   raidPtr->accs_in_flight++; /* used to detect quiescence */
    442   RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
    443 
    444   desc->state++;
    445   return RF_FALSE;
    446 }
    447 
    448 int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc)
    449 {
    450   RF_Raid_t *raidPtr;
    451 
    452   raidPtr = desc->raidPtr;
    453 
    454   RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
    455   raidPtr->accs_in_flight--;
    456   if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0)  {
    457     rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc);
    458   }
    459   rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks);
    460   RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
    461 
    462   desc->state++;
    463   return RF_FALSE;
    464 }
    465 
    466 int rf_State_Quiesce(RF_RaidAccessDesc_t *desc)
    467 {
    468   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    469   RF_Etimer_t timer;
    470   int suspended = RF_FALSE;
    471   RF_Raid_t *raidPtr;
    472 
    473   raidPtr = desc->raidPtr;
    474 
    475   RF_ETIMER_START(timer);
    476   RF_ETIMER_START(desc->timer);
    477 
    478   RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
    479   if (raidPtr->accesses_suspended) {
    480     RF_CallbackDesc_t *cb;
    481     cb = rf_AllocCallbackDesc();
    482     /* XXX the following cast is quite bogus...  rf_ContinueRaidAccess
    483        takes a (RF_RaidAccessDesc_t *) as an argument..  GO */
    484     cb->callbackFunc = (void (*)(RF_CBParam_t))rf_ContinueRaidAccess;
    485     cb->callbackArg.p  = (void *) desc;
    486     cb->next = raidPtr->quiesce_wait_list;
    487     raidPtr->quiesce_wait_list = cb;
    488     suspended = RF_TRUE;
    489   }
    490 
    491   RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
    492 
    493   RF_ETIMER_STOP(timer);
    494   RF_ETIMER_EVAL(timer);
    495   tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer);
    496 
    497   if (suspended && rf_quiesceDebug)
    498     printf("Stalling access due to quiescence lock\n");
    499 
    500   desc->state++;
    501   return suspended;
    502 }
    503 
    504 int rf_State_Map(RF_RaidAccessDesc_t *desc)
    505 {
    506   RF_Raid_t *raidPtr               = desc->raidPtr;
    507   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    508   RF_Etimer_t timer;
    509 
    510   RF_ETIMER_START(timer);
    511 
    512   if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks,
    513 			      desc->bufPtr, RF_DONT_REMAP)))
    514     RF_PANIC();
    515 
    516   RF_ETIMER_STOP(timer);
    517   RF_ETIMER_EVAL(timer);
    518   tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer);
    519 
    520   desc->state ++;
    521   return RF_FALSE;
    522 }
    523 
    524 int rf_State_Lock(RF_RaidAccessDesc_t *desc)
    525 {
    526   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    527   RF_Raid_t *raidPtr               = desc->raidPtr;
    528   RF_AccessStripeMapHeader_t *asmh = desc->asmap;
    529   RF_AccessStripeMap_t *asm_p;
    530   RF_Etimer_t timer;
    531   int suspended = RF_FALSE;
    532 
    533   RF_ETIMER_START(timer);
    534   if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
    535     RF_StripeNum_t lastStripeID = -1;
    536 
    537     /* acquire each lock that we don't already hold */
    538     for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
    539       RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
    540       if (!rf_suppressLocksAndLargeWrites &&
    541           asm_p->parityInfo &&
    542           !(desc->flags& RF_DAG_SUPPRESS_LOCKS) &&
    543           !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED))
    544       {
    545         asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
    546         RF_ASSERT(asm_p->stripeID > lastStripeID); /* locks must be acquired
    547 						   hierarchically */
    548         lastStripeID = asm_p->stripeID;
    549 	/* XXX the cast to (void (*)(RF_CBParam_t)) below is bogus!  GO */
    550         RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type,
    551             (void (*)(struct buf *))rf_ContinueRaidAccess, desc, asm_p,
    552             raidPtr->Layout.dataSectorsPerStripe);
    553         if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID,
    554             &asm_p->lockReqDesc))
    555         {
    556           suspended = RF_TRUE;
    557           break;
    558         }
    559       }
    560 
    561       if (desc->type == RF_IO_TYPE_WRITE &&
    562           raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing)
    563       {
    564         if (! (asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED) ) {
    565           int val;
    566 
    567           asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED;
    568 	  /* XXX the cast below is quite bogus!!! XXX  GO */
    569           val = rf_ForceOrBlockRecon(raidPtr, asm_p,
    570 		 (void (*)(RF_Raid_t *,void *))rf_ContinueRaidAccess, desc);
    571           if (val == 0) {
    572             asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED;
    573           }
    574           else {
    575             suspended = RF_TRUE;
    576             break;
    577           }
    578         }
    579         else {
    580           if (rf_pssDebug) {
    581             printf("[%d] skipping force/block because already done, psid %ld\n",
    582                 desc->tid,(long)asm_p->stripeID);
    583           }
    584         }
    585       }
    586       else {
    587         if (rf_pssDebug) {
    588           printf("[%d] skipping force/block because not write or not under recon, psid %ld\n",
    589               desc->tid,(long)asm_p->stripeID);
    590         }
    591       }
    592     }
    593 
    594     RF_ETIMER_STOP(timer);
    595     RF_ETIMER_EVAL(timer);
    596     tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
    597 
    598     if (suspended)
    599       return(RF_TRUE);
    600   }
    601 
    602   desc->state++;
    603   return(RF_FALSE);
    604 }
    605 
    606 /*
    607  * the following three states create, execute, and post-process dags
    608  * the error recovery unit is a single dag.
    609  * by default, SelectAlgorithm creates an array of dags, one per parity stripe
    610  * in some tricky cases, multiple dags per stripe are created
    611  *   - dags within a parity stripe are executed sequentially (arbitrary order)
    612  *   - dags for distinct parity stripes are executed concurrently
    613  *
    614  * repeat until all dags complete successfully -or- dag selection fails
    615  *
    616  * while !done
    617  *   create dag(s) (SelectAlgorithm)
    618  *   if dag
    619  *     execute dag (DispatchDAG)
    620  *     if dag successful
    621  *       done (SUCCESS)
    622  *     else
    623  *       !done (RETRY - start over with new dags)
    624  *   else
    625  *     done (FAIL)
    626  */
    627 int rf_State_CreateDAG (RF_RaidAccessDesc_t *desc)
    628 {
    629   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    630   RF_Etimer_t timer;
    631   RF_DagHeader_t *dag_h;
    632   int i, selectStatus;
    633 
    634   /* generate a dag for the access, and fire it off.  When the dag
    635      completes, we'll get re-invoked in the next state. */
    636   RF_ETIMER_START(timer);
    637   /* SelectAlgorithm returns one or more dags */
    638   selectStatus = rf_SelectAlgorithm(desc, desc->flags|RF_DAG_SUPPRESS_LOCKS);
    639   if (rf_printDAGsDebug)
    640     for (i = 0; i < desc->numStripes; i++)
    641       rf_PrintDAGList(desc->dagArray[i].dags);
    642   RF_ETIMER_STOP(timer);
    643   RF_ETIMER_EVAL(timer);
    644   /* update time to create all dags */
    645   tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer);
    646 
    647   desc->status = 0; /* good status */
    648 
    649   if (selectStatus) {
    650     /* failed to create a dag */
    651     /* this happens when there are too many faults or incomplete dag libraries */
    652     printf("[Failed to create a DAG\n]");
    653     RF_PANIC();
    654   }
    655   else {
    656     /* bind dags to desc */
    657     for (i = 0; i < desc->numStripes; i++) {
    658       dag_h = desc->dagArray[i].dags;
    659       while (dag_h) {
    660 #ifdef KERNEL
    661 	dag_h->bp = (struct buf *) desc->bp;
    662 #endif /* KERNEL */
    663 	dag_h->tracerec = tracerec;
    664 	dag_h = dag_h->next;
    665       }
    666     }
    667     desc->flags |= RF_DAG_DISPATCH_RETURNED;
    668     desc->state++;  /* next state should be rf_State_ExecuteDAG */
    669   }
    670   return RF_FALSE;
    671 }
    672 
    673 
    674 
    675 /* the access has an array of dagLists, one dagList per parity stripe.
    676  * fire the first dag in each parity stripe (dagList).
    677  * dags within a stripe (dagList) must be executed sequentially
    678  *  - this preserves atomic parity update
    679  * dags for independents parity groups (stripes) are fired concurrently */
    680 
    681 int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
    682 {
    683   int i;
    684   RF_DagHeader_t *dag_h;
    685   RF_DagList_t *dagArray = desc->dagArray;
    686 
    687   /* next state is always rf_State_ProcessDAG
    688    * important to do this before firing the first dag
    689    * (it may finish before we leave this routine) */
    690   desc->state++;
    691 
    692   /* sweep dag array, a stripe at a time, firing the first dag in each stripe */
    693   for (i = 0; i < desc->numStripes; i++) {
    694     RF_ASSERT(dagArray[i].numDags > 0);
    695     RF_ASSERT(dagArray[i].numDagsDone == 0);
    696     RF_ASSERT(dagArray[i].numDagsFired == 0);
    697     RF_ETIMER_START(dagArray[i].tracerec.timer);
    698     /* fire first dag in this stripe */
    699     dag_h = dagArray[i].dags;
    700     RF_ASSERT(dag_h);
    701     dagArray[i].numDagsFired++;
    702     /* XXX Yet another case where we pass in a conflicting function pointer
    703        :-(  XXX  GO */
    704     rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess, &dagArray[i]);
    705   }
    706 
    707   /* the DAG will always call the callback, even if there was no
    708    * blocking, so we are always suspended in this state */
    709   return RF_TRUE;
    710 }
    711 
    712 
    713 
    714 /* rf_State_ProcessDAG is entered when a dag completes.
    715  * first, check to all dags in the access have completed
    716  * if not, fire as many dags as possible */
    717 
    718 int rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc)
    719 {
    720   RF_AccessStripeMapHeader_t *asmh = desc->asmap;
    721   RF_Raid_t *raidPtr               = desc->raidPtr;
    722   RF_DagHeader_t *dag_h;
    723   int i, j, done = RF_TRUE;
    724   RF_DagList_t *dagArray = desc->dagArray;
    725   RF_Etimer_t timer;
    726 
    727   /* check to see if this is the last dag */
    728   for (i = 0; i < desc->numStripes; i++)
    729     if (dagArray[i].numDags != dagArray[i].numDagsDone)
    730       done = RF_FALSE;
    731 
    732   if (done) {
    733     if (desc->status) {
    734       /* a dag failed, retry */
    735       RF_ETIMER_START(timer);
    736       /* free all dags */
    737       for (i = 0; i < desc->numStripes; i++) {
    738 	rf_FreeDAG(desc->dagArray[i].dags);
    739       }
    740       rf_MarkFailuresInASMList(raidPtr, asmh);
    741       /* back up to rf_State_CreateDAG */
    742       desc->state = desc->state - 2;
    743       return RF_FALSE;
    744     }
    745     else {
    746       /* move on to rf_State_Cleanup */
    747       desc->state++;
    748     }
    749     return RF_FALSE;
    750   }
    751   else {
    752     /* more dags to execute */
    753     /* see if any are ready to be fired.  if so, fire them */
    754     /* don't fire the initial dag in a list, it's fired in rf_State_ExecuteDAG */
    755     for (i = 0; i < desc->numStripes; i++) {
    756       if ((dagArray[i].numDagsDone < dagArray[i].numDags)
    757 	  && (dagArray[i].numDagsDone == dagArray[i].numDagsFired)
    758 	  && (dagArray[i].numDagsFired > 0)) {
    759 	RF_ETIMER_START(dagArray[i].tracerec.timer);
    760 	/* fire next dag in this stripe */
    761 	/* first, skip to next dag awaiting execution */
    762 	dag_h = dagArray[i].dags;
    763 	for (j = 0; j < dagArray[i].numDagsDone; j++)
    764 	  dag_h = dag_h->next;
    765 	dagArray[i].numDagsFired++;
    766 	/* XXX and again we pass a different function pointer.. GO */
    767 	rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess,
    768 		       &dagArray[i]);
    769       }
    770     }
    771     return RF_TRUE;
    772   }
    773 }
    774 
    775 /* only make it this far if all dags complete successfully */
    776 int rf_State_Cleanup(RF_RaidAccessDesc_t *desc)
    777 {
    778   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    779   RF_AccessStripeMapHeader_t *asmh = desc->asmap;
    780   RF_Raid_t *raidPtr               = desc->raidPtr;
    781   RF_AccessStripeMap_t *asm_p;
    782   RF_DagHeader_t *dag_h;
    783   RF_Etimer_t timer;
    784   int tid, i;
    785 
    786   desc->state ++;
    787 
    788   rf_get_threadid(tid);
    789 
    790   timer = tracerec->timer;
    791   RF_ETIMER_STOP(timer);
    792   RF_ETIMER_EVAL(timer);
    793   tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer);
    794 
    795   /* the RAID I/O is complete.  Clean up. */
    796   tracerec->specific.user.dag_retry_us = 0;
    797 
    798   RF_ETIMER_START(timer);
    799   if (desc->flags & RF_DAG_RETURN_DAG) {
    800     /* copy dags into paramDAG */
    801     *(desc->paramDAG) = desc->dagArray[0].dags;
    802     dag_h = *(desc->paramDAG);
    803     for (i = 1; i < desc->numStripes; i++) {
    804       /* concatenate dags from remaining stripes */
    805       RF_ASSERT(dag_h);
    806       while (dag_h->next)
    807 	dag_h = dag_h->next;
    808       dag_h->next = desc->dagArray[i].dags;
    809     }
    810   }
    811   else {
    812     /* free all dags */
    813     for (i = 0; i < desc->numStripes; i++) {
    814       rf_FreeDAG(desc->dagArray[i].dags);
    815     }
    816   }
    817 
    818   RF_ETIMER_STOP(timer);
    819   RF_ETIMER_EVAL(timer);
    820   tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer);
    821 
    822   RF_ETIMER_START(timer);
    823   if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
    824     for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
    825       if (!rf_suppressLocksAndLargeWrites &&
    826           asm_p->parityInfo &&
    827           !(desc->flags&RF_DAG_SUPPRESS_LOCKS))
    828       {
    829         RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc);
    830         rf_ReleaseStripeLock(raidPtr->lockTable, asm_p->stripeID,
    831             &asm_p->lockReqDesc);
    832       }
    833       if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) {
    834         rf_UnblockRecon(raidPtr, asm_p);
    835       }
    836     }
    837   }
    838 
    839 #ifdef SIMULATE
    840   /* refresh current owner in case blocked ios where allowed to run */
    841   rf_SetCurrentOwner(desc->owner);
    842 #endif /* SIMULATE */
    843 
    844   RF_ETIMER_STOP(timer);
    845   RF_ETIMER_EVAL(timer);
    846   tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
    847 
    848   RF_ETIMER_START(timer);
    849   if (desc->flags & RF_DAG_RETURN_ASM)
    850     *(desc->paramASM) = asmh;
    851   else
    852     rf_FreeAccessStripeMap(asmh);
    853   RF_ETIMER_STOP(timer);
    854   RF_ETIMER_EVAL(timer);
    855   tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer);
    856 
    857   RF_ETIMER_STOP(desc->timer);
    858   RF_ETIMER_EVAL(desc->timer);
    859 
    860   timer = desc->tracerec.tot_timer;
    861   RF_ETIMER_STOP(timer);
    862   RF_ETIMER_EVAL(timer);
    863   desc->tracerec.total_us = RF_ETIMER_VAL_US(timer);
    864 
    865   rf_LogTraceRec(raidPtr, tracerec);
    866 
    867   desc->flags |= RF_DAG_ACCESS_COMPLETE;
    868 
    869   return RF_FALSE;
    870 }
    871