Home | History | Annotate | Line # | Download | only in raidframe
rf_states.c revision 1.3
      1 /*	$NetBSD: rf_states.c,v 1.3 1999/01/15 17:55:52 explorer Exp $	*/
      2 /*
      3  * Copyright (c) 1995 Carnegie-Mellon University.
      4  * All rights reserved.
      5  *
      6  * Author: Mark Holland, William V. Courtright II, Robby Findler
      7  *
      8  * Permission to use, copy, modify and distribute this software and
      9  * its documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie the
     26  * rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  * :
     31  * Log: rf_states.c,v
     32  * Revision 1.45  1996/07/28 20:31:39  jimz
     33  * i386netbsd port
     34  * true/false fixup
     35  *
     36  * Revision 1.44  1996/07/27  23:36:08  jimz
     37  * Solaris port of simulator
     38  *
     39  * Revision 1.43  1996/07/22  19:52:16  jimz
     40  * switched node params to RF_DagParam_t, a union of
     41  * a 64-bit int and a void *, for better portability
     42  * attempted hpux port, but failed partway through for
     43  * lack of a single C compiler capable of compiling all
     44  * source files
     45  *
     46  * Revision 1.42  1996/07/17  21:00:58  jimz
     47  * clean up timer interface, tracing
     48  *
     49  * Revision 1.41  1996/07/11  19:08:00  jimz
     50  * generalize reconstruction mechanism
     51  * allow raid1 reconstructs via copyback (done with array
     52  * quiesced, not online, therefore not disk-directed)
     53  *
     54  * Revision 1.40  1996/06/17  14:38:33  jimz
     55  * properly #if out RF_DEMO code
     56  * fix bug in MakeConfig that was causing weird behavior
     57  * in configuration routines (config was not zeroed at start)
     58  * clean up genplot handling of stacks
     59  *
     60  * Revision 1.39  1996/06/11  18:12:17  jimz
     61  * got rid of evil race condition in LastState
     62  *
     63  * Revision 1.38  1996/06/10  14:18:58  jimz
     64  * move user, throughput stats into per-array structure
     65  *
     66  * Revision 1.37  1996/06/09  02:36:46  jimz
     67  * lots of little crufty cleanup- fixup whitespace
     68  * issues, comment #ifdefs, improve typing in some
     69  * places (esp size-related)
     70  *
     71  * Revision 1.36  1996/06/07  21:33:04  jimz
     72  * begin using consistent types for sector numbers,
     73  * stripe numbers, row+col numbers, recon unit numbers
     74  *
     75  * Revision 1.35  1996/06/05  18:06:02  jimz
     76  * Major code cleanup. The Great Renaming is now done.
     77  * Better modularity. Better typing. Fixed a bunch of
     78  * synchronization bugs. Made a lot of global stuff
     79  * per-desc or per-array. Removed dead code.
     80  *
     81  * Revision 1.34  1996/06/03  23:28:26  jimz
     82  * more bugfixes
     83  * check in tree to sync for IPDS runs with current bugfixes
     84  * there still may be a problem with threads in the script test
     85  * getting I/Os stuck- not trivially reproducible (runs ~50 times
     86  * in a row without getting stuck)
     87  *
     88  * Revision 1.33  1996/05/31  22:26:54  jimz
     89  * fix a lot of mapping problems, memory allocation problems
     90  * found some weird lock issues, fixed 'em
     91  * more code cleanup
     92  *
     93  * Revision 1.32  1996/05/30  12:59:18  jimz
     94  * make etimer happier, more portable
     95  *
     96  * Revision 1.31  1996/05/30  11:29:41  jimz
     97  * Numerous bug fixes. Stripe lock release code disagreed with the taking code
     98  * about when stripes should be locked (I made it consistent: no parity, no lock)
     99  * There was a lot of extra serialization of I/Os which I've removed- a lot of
    100  * it was to calculate values for the cache code, which is no longer with us.
    101  * More types, function, macro cleanup. Added code to properly quiesce the array
    102  * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
    103  * before. Fixed memory allocation, freeing bugs.
    104  *
    105  * Revision 1.30  1996/05/27  18:56:37  jimz
    106  * more code cleanup
    107  * better typing
    108  * compiles in all 3 environments
    109  *
    110  * Revision 1.29  1996/05/24  22:17:04  jimz
    111  * continue code + namespace cleanup
    112  * typed a bunch of flags
    113  *
    114  * Revision 1.28  1996/05/24  04:28:55  jimz
    115  * release cleanup ckpt
    116  *
    117  * Revision 1.27  1996/05/23  21:46:35  jimz
    118  * checkpoint in code cleanup (release prep)
    119  * lots of types, function names have been fixed
    120  *
    121  * Revision 1.26  1996/05/23  00:33:23  jimz
    122  * code cleanup: move all debug decls to rf_options.c, all extern
    123  * debug decls to rf_options.h, all debug vars preceded by rf_
    124  *
    125  * Revision 1.25  1996/05/20  19:31:46  jimz
    126  * straighten out syntax problems
    127  *
    128  * Revision 1.24  1996/05/18  19:51:34  jimz
    129  * major code cleanup- fix syntax, make some types consistent,
    130  * add prototypes, clean out dead code, et cetera
    131  *
    132  * Revision 1.23  1996/05/16  23:37:33  jimz
    133  * fix misspelled "else"
    134  *
    135  * Revision 1.22  1996/05/15  22:33:32  jimz
    136  * appropriately #ifdef cache stuff
    137  *
    138  * Revision 1.21  1996/05/06  22:09:20  wvcii
    139  * rf_State_ExecuteDAG now only executes the first dag
    140  * of each parity stripe in a multi-stripe access
    141  *
    142  * rf_State_ProcessDAG now executes all dags in a
    143  * multi-stripe access except the first dag of each stripe.
    144  *
    145  * Revision 1.20  1995/12/12  18:10:06  jimz
    146  * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
    147  * fix 80-column brain damage in comments
    148  *
    149  * Revision 1.19  1995/11/19  16:29:50  wvcii
    150  * replaced LaunchDAGState with CreateDAGState, ExecuteDAGState
    151  * created rf_ContinueDagAccess
    152  *
    153  * Revision 1.18  1995/11/07  15:37:23  wvcii
    154  * deleted states SendDAGState, RetryDAGState
    155  * added staes: LaunchDAGState, ProcessDAGState
    156  * code no longer has a hard-coded retry count of 1 but will support
    157  * retries until a dag can not be found (selected) to perform the user request
    158  *
    159  * Revision 1.17  1995/10/09  23:36:08  amiri
    160  * *** empty log message ***
    161  *
    162  * Revision 1.16  1995/10/09  18:36:58  jimz
    163  * moved call to StopThroughput for user-level driver to rf_driver.c
    164  *
    165  * Revision 1.15  1995/10/09  18:07:23  wvcii
    166  * lastState now call rf_StopThroughputStats
    167  *
    168  * Revision 1.14  1995/10/05  18:56:31  jimz
    169  * no-op file if !INCLUDE_VS
    170  *
    171  * Revision 1.13  1995/09/30  20:38:24  jimz
    172  * LogTraceRec now takes a Raid * as its first argument
    173  *
    174  * Revision 1.12  1995/09/19  22:58:54  jimz
    175  * integrate DKUSAGE into raidframe
    176  *
    177  * Revision 1.11  1995/09/07  01:26:55  jimz
    178  * Achive basic compilation in kernel. Kernel functionality
    179  * is not guaranteed at all, but it'll compile. Mostly. I hope.
    180  *
    181  * Revision 1.10  1995/07/26  03:28:31  robby
    182  * intermediary checkin
    183  *
    184  * Revision 1.9  1995/07/23  02:50:33  robby
    185  * oops. fixed boo boo
    186  *
    187  * Revision 1.8  1995/07/22  22:54:54  robby
    188  * removed incorrect comment
    189  *
    190  * Revision 1.7  1995/07/21  19:30:26  robby
    191  * added idle state for rf_when-idle.c
    192  *
    193  * Revision 1.6  1995/07/10  19:06:28  rachad
    194  * *** empty log message ***
    195  *
    196  * Revision 1.5  1995/07/10  17:30:38  robby
    197  * added virtual striping lock states
    198  *
    199  * Revision 1.4  1995/07/08  18:05:39  rachad
    200  * Linked up Claudsons code with the real cache
    201  *
    202  * Revision 1.3  1995/07/06  14:38:50  robby
    203  * changed get_thread_id to get_threadid
    204  *
    205  * Revision 1.2  1995/07/06  14:24:15  robby
    206  * added log
    207  *
    208  */
    209 
    210 #ifdef _KERNEL
    211 #define KERNEL
    212 #endif
    213 
    214 #ifdef KERNEL
    215 #ifndef __NetBSD__
    216 #include <dkusage.h>
    217 #endif /* !__NetBSD__ */
    218 #endif /* KERNEL */
    219 
    220 #include <sys/errno.h>
    221 
    222 #include "rf_archs.h"
    223 #include "rf_threadstuff.h"
    224 #include "rf_raid.h"
    225 #include "rf_dag.h"
    226 #include "rf_desc.h"
    227 #include "rf_aselect.h"
    228 #include "rf_threadid.h"
    229 #include "rf_general.h"
    230 #include "rf_states.h"
    231 #include "rf_dagutils.h"
    232 #include "rf_driver.h"
    233 #include "rf_engine.h"
    234 #include "rf_map.h"
    235 #include "rf_etimer.h"
    236 
    237 #if defined(KERNEL) && (DKUSAGE > 0)
    238 #include <sys/dkusage.h>
    239 #include <io/common/iotypes.h>
    240 #include <io/cam/dec_cam.h>
    241 #include <io/cam/cam.h>
    242 #include <io/cam/pdrv.h>
    243 #endif /* KERNEL && DKUSAGE > 0 */
    244 
    245 /* prototypes for some of the available states.
    246 
    247    States must:
    248 
    249      - not block.
    250 
    251      - either schedule rf_ContinueRaidAccess as a callback and return
    252        RF_TRUE, or complete all of their work and return RF_FALSE.
    253 
    254      - increment desc->state when they have finished their work.
    255 */
    256 
    257 
    258 #ifdef SIMULATE
    259 extern int global_async_flag;
    260 #endif /* SIMULATE */
    261 
    262 static char *StateName(RF_AccessState_t state)
    263 {
    264   switch (state) {
    265     case rf_QuiesceState:            return "QuiesceState";
    266     case rf_MapState:                return "MapState";
    267     case rf_LockState:               return "LockState";
    268     case rf_CreateDAGState:          return "CreateDAGState";
    269     case rf_ExecuteDAGState:         return "ExecuteDAGState";
    270     case rf_ProcessDAGState:         return "ProcessDAGState";
    271     case rf_CleanupState:            return "CleanupState";
    272     case rf_LastState:               return "LastState";
    273     case rf_IncrAccessesCountState:  return "IncrAccessesCountState";
    274     case rf_DecrAccessesCountState:  return "DecrAccessesCountState";
    275     default:                         return "!!! UnnamedState !!!";
    276   }
    277 }
    278 
    279 void rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc)
    280 {
    281   int suspended = RF_FALSE;
    282   int current_state_index = desc->state;
    283   RF_AccessState_t current_state = desc->states[current_state_index];
    284 
    285 #ifdef SIMULATE
    286   rf_SetCurrentOwner(desc->owner);
    287 #endif /* SIMULATE */
    288 
    289   do {
    290 
    291     current_state_index = desc->state;
    292     current_state = desc->states [current_state_index];
    293 
    294     switch (current_state) {
    295 
    296     case rf_QuiesceState: 		 suspended = rf_State_Quiesce(desc);
    297 				 break;
    298     case rf_IncrAccessesCountState: suspended = rf_State_IncrAccessCount(desc);
    299 				 break;
    300     case rf_MapState:		 suspended = rf_State_Map(desc);
    301 				 break;
    302     case rf_LockState:		 suspended = rf_State_Lock(desc);
    303 				 break;
    304     case rf_CreateDAGState:	 suspended = rf_State_CreateDAG(desc);
    305 				 break;
    306     case rf_ExecuteDAGState:	 suspended = rf_State_ExecuteDAG(desc);
    307 				 break;
    308     case rf_ProcessDAGState:	 suspended = rf_State_ProcessDAG(desc);
    309 				 break;
    310     case rf_CleanupState: 	 suspended = rf_State_Cleanup(desc);
    311 				 break;
    312     case rf_DecrAccessesCountState: suspended = rf_State_DecrAccessCount(desc);
    313 				 break;
    314     case rf_LastState:		 suspended = rf_State_LastState(desc);
    315 				 break;
    316     }
    317 
    318     /* after this point, we cannot dereference desc since desc may
    319        have been freed. desc is only freed in LastState, so if we
    320        renter this function or loop back up, desc should be valid. */
    321 
    322     if (rf_printStatesDebug) {
    323       int tid;
    324       rf_get_threadid (tid);
    325 
    326       printf ("[%d] State: %-24s StateIndex: %3i desc: 0x%ld %s\n",
    327 	      tid, StateName(current_state), current_state_index, (long)desc,
    328 	      suspended ? "callback scheduled" : "looping");
    329     }
    330   } while (!suspended && current_state != rf_LastState);
    331 
    332   return;
    333 }
    334 
    335 
    336 void rf_ContinueDagAccess (RF_DagList_t *dagList)
    337 {
    338   RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec);
    339   RF_RaidAccessDesc_t *desc;
    340   RF_DagHeader_t *dag_h;
    341   RF_Etimer_t timer;
    342   int i;
    343 
    344   desc = dagList->desc;
    345 
    346   timer = tracerec->timer;
    347   RF_ETIMER_STOP(timer);
    348   RF_ETIMER_EVAL(timer);
    349   tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer);
    350   RF_ETIMER_START(tracerec->timer);
    351 
    352   /* skip to dag which just finished */
    353   dag_h = dagList->dags;
    354   for (i = 0; i < dagList->numDagsDone; i++) {
    355     dag_h = dag_h->next;
    356   }
    357 
    358   /* check to see if retry is required */
    359   if (dag_h->status == rf_rollBackward) {
    360     /* when a dag fails, mark desc status as bad and allow all other dags
    361      * in the desc to execute to completion.  then, free all dags and start over */
    362     desc->status = 1;  /* bad status */
    363 #if RF_DEMO > 0
    364     if (!rf_demoMode)
    365 #endif /* RF_DEMO > 0 */
    366     {
    367       printf("[%d] DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n",
    368 	     desc->tid, desc->type, (long)desc->raidAddress,
    369 	     (long)desc->raidAddress,(int)desc->numBlocks,
    370 	     (int)desc->numBlocks, (unsigned long) (desc->bufPtr));
    371     }
    372   }
    373 
    374   dagList->numDagsDone++;
    375   rf_ContinueRaidAccess(desc);
    376 }
    377 
    378 
    379 int rf_State_LastState(RF_RaidAccessDesc_t *desc)
    380 {
    381   void (*callbackFunc)(RF_CBParam_t) = desc->callbackFunc;
    382   RF_CBParam_t callbackArg;
    383 
    384   callbackArg.p = desc->callbackArg;
    385 
    386 #ifdef SIMULATE
    387   int tid;
    388   rf_get_threadid(tid);
    389 
    390   if (rf_accessDebug)
    391     printf("async_flag set to  %d\n",global_async_flag);
    392   global_async_flag=desc->async_flag;
    393   if (rf_accessDebug)
    394     printf("Will now do clean up for %d\n",rf_GetCurrentOwner());
    395   rf_FreeRaidAccDesc(desc);
    396 
    397   if (callbackFunc)
    398     callbackFunc(callbackArg);
    399 #else /* SIMULATE */
    400 
    401 #ifndef KERNEL
    402 
    403   if (!(desc->flags & RF_DAG_NONBLOCKING_IO)) {
    404     /* bummer that we have to take another lock here */
    405     RF_LOCK_MUTEX(desc->mutex);
    406     RF_ASSERT(desc->flags&RF_DAG_ACCESS_COMPLETE);
    407     RF_SIGNAL_COND(desc->cond);  /* DoAccess frees the desc in the blocking-I/O case */
    408     RF_UNLOCK_MUTEX(desc->mutex);
    409   }
    410   else
    411     rf_FreeRaidAccDesc(desc);
    412 
    413   if (callbackFunc)
    414     callbackFunc(callbackArg);
    415 
    416 #else  /* KERNEL */
    417   if (!(desc->flags & RF_DAG_TEST_ACCESS)) {/* don't biodone if this */
    418 #if DKUSAGE > 0
    419     RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid,(struct buf *)desc->bp);
    420 #else
    421     RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid);
    422 #endif /* DKUSAGE > 0 */
    423 
    424     /*
    425      * If this is not an async request, wake up the caller
    426      */
    427     if (desc->async_flag == 0)
    428     	wakeup(desc->bp);
    429 
    430     /*     printf("Calling biodone on 0x%x\n",desc->bp); */
    431     biodone(desc->bp); 			/* access came through ioctl */
    432   }
    433 
    434   if (callbackFunc) callbackFunc(callbackArg);
    435   rf_FreeRaidAccDesc(desc);
    436 
    437 #endif /* ! KERNEL */
    438 #endif /* SIMULATE */
    439 
    440   return RF_FALSE;
    441 }
    442 
    443 int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc)
    444 {
    445   RF_Raid_t *raidPtr;
    446 
    447   raidPtr = desc->raidPtr;
    448   /* Bummer. We have to do this to be 100% safe w.r.t. the increment below */
    449   RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
    450   raidPtr->accs_in_flight++; /* used to detect quiescence */
    451   RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
    452 
    453   desc->state++;
    454   return RF_FALSE;
    455 }
    456 
    457 int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc)
    458 {
    459   RF_Raid_t *raidPtr;
    460 
    461   raidPtr = desc->raidPtr;
    462 
    463   RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
    464   raidPtr->accs_in_flight--;
    465   if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0)  {
    466     rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc);
    467   }
    468   rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks);
    469   RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
    470 
    471   desc->state++;
    472   return RF_FALSE;
    473 }
    474 
    475 int rf_State_Quiesce(RF_RaidAccessDesc_t *desc)
    476 {
    477   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    478   RF_Etimer_t timer;
    479   int suspended = RF_FALSE;
    480   RF_Raid_t *raidPtr;
    481 
    482   raidPtr = desc->raidPtr;
    483 
    484   RF_ETIMER_START(timer);
    485   RF_ETIMER_START(desc->timer);
    486 
    487   RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
    488   if (raidPtr->accesses_suspended) {
    489     RF_CallbackDesc_t *cb;
    490     cb = rf_AllocCallbackDesc();
    491     /* XXX the following cast is quite bogus...  rf_ContinueRaidAccess
    492        takes a (RF_RaidAccessDesc_t *) as an argument..  GO */
    493     cb->callbackFunc = (void (*)(RF_CBParam_t))rf_ContinueRaidAccess;
    494     cb->callbackArg.p  = (void *) desc;
    495     cb->next = raidPtr->quiesce_wait_list;
    496     raidPtr->quiesce_wait_list = cb;
    497     suspended = RF_TRUE;
    498   }
    499 
    500   RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
    501 
    502   RF_ETIMER_STOP(timer);
    503   RF_ETIMER_EVAL(timer);
    504   tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer);
    505 
    506   if (suspended && rf_quiesceDebug)
    507     printf("Stalling access due to quiescence lock\n");
    508 
    509   desc->state++;
    510   return suspended;
    511 }
    512 
    513 int rf_State_Map(RF_RaidAccessDesc_t *desc)
    514 {
    515   RF_Raid_t *raidPtr               = desc->raidPtr;
    516   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    517   RF_Etimer_t timer;
    518 
    519   RF_ETIMER_START(timer);
    520 
    521   if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks,
    522 			      desc->bufPtr, RF_DONT_REMAP)))
    523     RF_PANIC();
    524 
    525   RF_ETIMER_STOP(timer);
    526   RF_ETIMER_EVAL(timer);
    527   tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer);
    528 
    529   desc->state ++;
    530   return RF_FALSE;
    531 }
    532 
    533 int rf_State_Lock(RF_RaidAccessDesc_t *desc)
    534 {
    535   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    536   RF_Raid_t *raidPtr               = desc->raidPtr;
    537   RF_AccessStripeMapHeader_t *asmh = desc->asmap;
    538   RF_AccessStripeMap_t *asm_p;
    539   RF_Etimer_t timer;
    540   int suspended = RF_FALSE;
    541 
    542   RF_ETIMER_START(timer);
    543   if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
    544     RF_StripeNum_t lastStripeID = -1;
    545 
    546     /* acquire each lock that we don't already hold */
    547     for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
    548       RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
    549       if (!rf_suppressLocksAndLargeWrites &&
    550           asm_p->parityInfo &&
    551           !(desc->flags& RF_DAG_SUPPRESS_LOCKS) &&
    552           !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED))
    553       {
    554         asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
    555         RF_ASSERT(asm_p->stripeID > lastStripeID); /* locks must be acquired
    556 						   hierarchically */
    557         lastStripeID = asm_p->stripeID;
    558 	/* XXX the cast to (void (*)(RF_CBParam_t)) below is bogus!  GO */
    559         RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type,
    560             (void (*)(struct buf *))rf_ContinueRaidAccess, desc, asm_p,
    561             raidPtr->Layout.dataSectorsPerStripe);
    562         if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID,
    563             &asm_p->lockReqDesc))
    564         {
    565           suspended = RF_TRUE;
    566           break;
    567         }
    568       }
    569 
    570       if (desc->type == RF_IO_TYPE_WRITE &&
    571           raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing)
    572       {
    573         if (! (asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED) ) {
    574           int val;
    575 
    576           asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED;
    577 	  /* XXX the cast below is quite bogus!!! XXX  GO */
    578           val = rf_ForceOrBlockRecon(raidPtr, asm_p,
    579 		 (void (*)(RF_Raid_t *,void *))rf_ContinueRaidAccess, desc);
    580           if (val == 0) {
    581             asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED;
    582           }
    583           else {
    584             suspended = RF_TRUE;
    585             break;
    586           }
    587         }
    588         else {
    589           if (rf_pssDebug) {
    590             printf("[%d] skipping force/block because already done, psid %ld\n",
    591                 desc->tid,(long)asm_p->stripeID);
    592           }
    593         }
    594       }
    595       else {
    596         if (rf_pssDebug) {
    597           printf("[%d] skipping force/block because not write or not under recon, psid %ld\n",
    598               desc->tid,(long)asm_p->stripeID);
    599         }
    600       }
    601     }
    602 
    603     RF_ETIMER_STOP(timer);
    604     RF_ETIMER_EVAL(timer);
    605     tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
    606 
    607     if (suspended)
    608       return(RF_TRUE);
    609   }
    610 
    611   desc->state++;
    612   return(RF_FALSE);
    613 }
    614 
    615 /*
    616  * the following three states create, execute, and post-process dags
    617  * the error recovery unit is a single dag.
    618  * by default, SelectAlgorithm creates an array of dags, one per parity stripe
    619  * in some tricky cases, multiple dags per stripe are created
    620  *   - dags within a parity stripe are executed sequentially (arbitrary order)
    621  *   - dags for distinct parity stripes are executed concurrently
    622  *
    623  * repeat until all dags complete successfully -or- dag selection fails
    624  *
    625  * while !done
    626  *   create dag(s) (SelectAlgorithm)
    627  *   if dag
    628  *     execute dag (DispatchDAG)
    629  *     if dag successful
    630  *       done (SUCCESS)
    631  *     else
    632  *       !done (RETRY - start over with new dags)
    633  *   else
    634  *     done (FAIL)
    635  */
    636 int rf_State_CreateDAG (RF_RaidAccessDesc_t *desc)
    637 {
    638   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    639   RF_Etimer_t timer;
    640   RF_DagHeader_t *dag_h;
    641   int i, selectStatus;
    642 
    643   /* generate a dag for the access, and fire it off.  When the dag
    644      completes, we'll get re-invoked in the next state. */
    645   RF_ETIMER_START(timer);
    646   /* SelectAlgorithm returns one or more dags */
    647   selectStatus = rf_SelectAlgorithm(desc, desc->flags|RF_DAG_SUPPRESS_LOCKS);
    648   if (rf_printDAGsDebug)
    649     for (i = 0; i < desc->numStripes; i++)
    650       rf_PrintDAGList(desc->dagArray[i].dags);
    651   RF_ETIMER_STOP(timer);
    652   RF_ETIMER_EVAL(timer);
    653   /* update time to create all dags */
    654   tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer);
    655 
    656   desc->status = 0; /* good status */
    657 
    658   if (selectStatus) {
    659     /* failed to create a dag */
    660     /* this happens when there are too many faults or incomplete dag libraries */
    661     printf("[Failed to create a DAG\n]");
    662     RF_PANIC();
    663   }
    664   else {
    665     /* bind dags to desc */
    666     for (i = 0; i < desc->numStripes; i++) {
    667       dag_h = desc->dagArray[i].dags;
    668       while (dag_h) {
    669 #ifdef KERNEL
    670 	dag_h->bp = (struct buf *) desc->bp;
    671 #endif /* KERNEL */
    672 	dag_h->tracerec = tracerec;
    673 	dag_h = dag_h->next;
    674       }
    675     }
    676     desc->flags |= RF_DAG_DISPATCH_RETURNED;
    677     desc->state++;  /* next state should be rf_State_ExecuteDAG */
    678   }
    679   return RF_FALSE;
    680 }
    681 
    682 
    683 
    684 /* the access has an array of dagLists, one dagList per parity stripe.
    685  * fire the first dag in each parity stripe (dagList).
    686  * dags within a stripe (dagList) must be executed sequentially
    687  *  - this preserves atomic parity update
    688  * dags for independents parity groups (stripes) are fired concurrently */
    689 
    690 int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
    691 {
    692   int i;
    693   RF_DagHeader_t *dag_h;
    694   RF_DagList_t *dagArray = desc->dagArray;
    695 
    696   /* next state is always rf_State_ProcessDAG
    697    * important to do this before firing the first dag
    698    * (it may finish before we leave this routine) */
    699   desc->state++;
    700 
    701   /* sweep dag array, a stripe at a time, firing the first dag in each stripe */
    702   for (i = 0; i < desc->numStripes; i++) {
    703     RF_ASSERT(dagArray[i].numDags > 0);
    704     RF_ASSERT(dagArray[i].numDagsDone == 0);
    705     RF_ASSERT(dagArray[i].numDagsFired == 0);
    706     RF_ETIMER_START(dagArray[i].tracerec.timer);
    707     /* fire first dag in this stripe */
    708     dag_h = dagArray[i].dags;
    709     RF_ASSERT(dag_h);
    710     dagArray[i].numDagsFired++;
    711     /* XXX Yet another case where we pass in a conflicting function pointer
    712        :-(  XXX  GO */
    713     rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess, &dagArray[i]);
    714   }
    715 
    716   /* the DAG will always call the callback, even if there was no
    717    * blocking, so we are always suspended in this state */
    718   return RF_TRUE;
    719 }
    720 
    721 
    722 
    723 /* rf_State_ProcessDAG is entered when a dag completes.
    724  * first, check to all dags in the access have completed
    725  * if not, fire as many dags as possible */
    726 
    727 int rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc)
    728 {
    729   RF_AccessStripeMapHeader_t *asmh = desc->asmap;
    730   RF_Raid_t *raidPtr               = desc->raidPtr;
    731   RF_DagHeader_t *dag_h;
    732   int i, j, done = RF_TRUE;
    733   RF_DagList_t *dagArray = desc->dagArray;
    734   RF_Etimer_t timer;
    735 
    736   /* check to see if this is the last dag */
    737   for (i = 0; i < desc->numStripes; i++)
    738     if (dagArray[i].numDags != dagArray[i].numDagsDone)
    739       done = RF_FALSE;
    740 
    741   if (done) {
    742     if (desc->status) {
    743       /* a dag failed, retry */
    744       RF_ETIMER_START(timer);
    745       /* free all dags */
    746       for (i = 0; i < desc->numStripes; i++) {
    747 	rf_FreeDAG(desc->dagArray[i].dags);
    748       }
    749       rf_MarkFailuresInASMList(raidPtr, asmh);
    750       /* back up to rf_State_CreateDAG */
    751       desc->state = desc->state - 2;
    752       return RF_FALSE;
    753     }
    754     else {
    755       /* move on to rf_State_Cleanup */
    756       desc->state++;
    757     }
    758     return RF_FALSE;
    759   }
    760   else {
    761     /* more dags to execute */
    762     /* see if any are ready to be fired.  if so, fire them */
    763     /* don't fire the initial dag in a list, it's fired in rf_State_ExecuteDAG */
    764     for (i = 0; i < desc->numStripes; i++) {
    765       if ((dagArray[i].numDagsDone < dagArray[i].numDags)
    766 	  && (dagArray[i].numDagsDone == dagArray[i].numDagsFired)
    767 	  && (dagArray[i].numDagsFired > 0)) {
    768 	RF_ETIMER_START(dagArray[i].tracerec.timer);
    769 	/* fire next dag in this stripe */
    770 	/* first, skip to next dag awaiting execution */
    771 	dag_h = dagArray[i].dags;
    772 	for (j = 0; j < dagArray[i].numDagsDone; j++)
    773 	  dag_h = dag_h->next;
    774 	dagArray[i].numDagsFired++;
    775 	/* XXX and again we pass a different function pointer.. GO */
    776 	rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess,
    777 		       &dagArray[i]);
    778       }
    779     }
    780     return RF_TRUE;
    781   }
    782 }
    783 
    784 /* only make it this far if all dags complete successfully */
    785 int rf_State_Cleanup(RF_RaidAccessDesc_t *desc)
    786 {
    787   RF_AccTraceEntry_t *tracerec     = &desc->tracerec;
    788   RF_AccessStripeMapHeader_t *asmh = desc->asmap;
    789   RF_Raid_t *raidPtr               = desc->raidPtr;
    790   RF_AccessStripeMap_t *asm_p;
    791   RF_DagHeader_t *dag_h;
    792   RF_Etimer_t timer;
    793   int tid, i;
    794 
    795   desc->state ++;
    796 
    797   rf_get_threadid(tid);
    798 
    799   timer = tracerec->timer;
    800   RF_ETIMER_STOP(timer);
    801   RF_ETIMER_EVAL(timer);
    802   tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer);
    803 
    804   /* the RAID I/O is complete.  Clean up. */
    805   tracerec->specific.user.dag_retry_us = 0;
    806 
    807   RF_ETIMER_START(timer);
    808   if (desc->flags & RF_DAG_RETURN_DAG) {
    809     /* copy dags into paramDAG */
    810     *(desc->paramDAG) = desc->dagArray[0].dags;
    811     dag_h = *(desc->paramDAG);
    812     for (i = 1; i < desc->numStripes; i++) {
    813       /* concatenate dags from remaining stripes */
    814       RF_ASSERT(dag_h);
    815       while (dag_h->next)
    816 	dag_h = dag_h->next;
    817       dag_h->next = desc->dagArray[i].dags;
    818     }
    819   }
    820   else {
    821     /* free all dags */
    822     for (i = 0; i < desc->numStripes; i++) {
    823       rf_FreeDAG(desc->dagArray[i].dags);
    824     }
    825   }
    826 
    827   RF_ETIMER_STOP(timer);
    828   RF_ETIMER_EVAL(timer);
    829   tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer);
    830 
    831   RF_ETIMER_START(timer);
    832   if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
    833     for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
    834       if (!rf_suppressLocksAndLargeWrites &&
    835           asm_p->parityInfo &&
    836           !(desc->flags&RF_DAG_SUPPRESS_LOCKS))
    837       {
    838         RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc);
    839         rf_ReleaseStripeLock(raidPtr->lockTable, asm_p->stripeID,
    840             &asm_p->lockReqDesc);
    841       }
    842       if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) {
    843         rf_UnblockRecon(raidPtr, asm_p);
    844       }
    845     }
    846   }
    847 
    848 #ifdef SIMULATE
    849   /* refresh current owner in case blocked ios where allowed to run */
    850   rf_SetCurrentOwner(desc->owner);
    851 #endif /* SIMULATE */
    852 
    853   RF_ETIMER_STOP(timer);
    854   RF_ETIMER_EVAL(timer);
    855   tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
    856 
    857   RF_ETIMER_START(timer);
    858   if (desc->flags & RF_DAG_RETURN_ASM)
    859     *(desc->paramASM) = asmh;
    860   else
    861     rf_FreeAccessStripeMap(asmh);
    862   RF_ETIMER_STOP(timer);
    863   RF_ETIMER_EVAL(timer);
    864   tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer);
    865 
    866   RF_ETIMER_STOP(desc->timer);
    867   RF_ETIMER_EVAL(desc->timer);
    868 
    869   timer = desc->tracerec.tot_timer;
    870   RF_ETIMER_STOP(timer);
    871   RF_ETIMER_EVAL(timer);
    872   desc->tracerec.total_us = RF_ETIMER_VAL_US(timer);
    873 
    874   rf_LogTraceRec(raidPtr, tracerec);
    875 
    876   desc->flags |= RF_DAG_ACCESS_COMPLETE;
    877 
    878   return RF_FALSE;
    879 }
    880