rf_states.c revision 1.1 1 /* $NetBSD: rf_states.c,v 1.1 1998/11/13 04:20:34 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, William V. Courtright II, Robby Findler
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * :
31 * Log: rf_states.c,v
32 * Revision 1.45 1996/07/28 20:31:39 jimz
33 * i386netbsd port
34 * true/false fixup
35 *
36 * Revision 1.44 1996/07/27 23:36:08 jimz
37 * Solaris port of simulator
38 *
39 * Revision 1.43 1996/07/22 19:52:16 jimz
40 * switched node params to RF_DagParam_t, a union of
41 * a 64-bit int and a void *, for better portability
42 * attempted hpux port, but failed partway through for
43 * lack of a single C compiler capable of compiling all
44 * source files
45 *
46 * Revision 1.42 1996/07/17 21:00:58 jimz
47 * clean up timer interface, tracing
48 *
49 * Revision 1.41 1996/07/11 19:08:00 jimz
50 * generalize reconstruction mechanism
51 * allow raid1 reconstructs via copyback (done with array
52 * quiesced, not online, therefore not disk-directed)
53 *
54 * Revision 1.40 1996/06/17 14:38:33 jimz
55 * properly #if out RF_DEMO code
56 * fix bug in MakeConfig that was causing weird behavior
57 * in configuration routines (config was not zeroed at start)
58 * clean up genplot handling of stacks
59 *
60 * Revision 1.39 1996/06/11 18:12:17 jimz
61 * got rid of evil race condition in LastState
62 *
63 * Revision 1.38 1996/06/10 14:18:58 jimz
64 * move user, throughput stats into per-array structure
65 *
66 * Revision 1.37 1996/06/09 02:36:46 jimz
67 * lots of little crufty cleanup- fixup whitespace
68 * issues, comment #ifdefs, improve typing in some
69 * places (esp size-related)
70 *
71 * Revision 1.36 1996/06/07 21:33:04 jimz
72 * begin using consistent types for sector numbers,
73 * stripe numbers, row+col numbers, recon unit numbers
74 *
75 * Revision 1.35 1996/06/05 18:06:02 jimz
76 * Major code cleanup. The Great Renaming is now done.
77 * Better modularity. Better typing. Fixed a bunch of
78 * synchronization bugs. Made a lot of global stuff
79 * per-desc or per-array. Removed dead code.
80 *
81 * Revision 1.34 1996/06/03 23:28:26 jimz
82 * more bugfixes
83 * check in tree to sync for IPDS runs with current bugfixes
84 * there still may be a problem with threads in the script test
85 * getting I/Os stuck- not trivially reproducible (runs ~50 times
86 * in a row without getting stuck)
87 *
88 * Revision 1.33 1996/05/31 22:26:54 jimz
89 * fix a lot of mapping problems, memory allocation problems
90 * found some weird lock issues, fixed 'em
91 * more code cleanup
92 *
93 * Revision 1.32 1996/05/30 12:59:18 jimz
94 * make etimer happier, more portable
95 *
96 * Revision 1.31 1996/05/30 11:29:41 jimz
97 * Numerous bug fixes. Stripe lock release code disagreed with the taking code
98 * about when stripes should be locked (I made it consistent: no parity, no lock)
99 * There was a lot of extra serialization of I/Os which I've removed- a lot of
100 * it was to calculate values for the cache code, which is no longer with us.
101 * More types, function, macro cleanup. Added code to properly quiesce the array
102 * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
103 * before. Fixed memory allocation, freeing bugs.
104 *
105 * Revision 1.30 1996/05/27 18:56:37 jimz
106 * more code cleanup
107 * better typing
108 * compiles in all 3 environments
109 *
110 * Revision 1.29 1996/05/24 22:17:04 jimz
111 * continue code + namespace cleanup
112 * typed a bunch of flags
113 *
114 * Revision 1.28 1996/05/24 04:28:55 jimz
115 * release cleanup ckpt
116 *
117 * Revision 1.27 1996/05/23 21:46:35 jimz
118 * checkpoint in code cleanup (release prep)
119 * lots of types, function names have been fixed
120 *
121 * Revision 1.26 1996/05/23 00:33:23 jimz
122 * code cleanup: move all debug decls to rf_options.c, all extern
123 * debug decls to rf_options.h, all debug vars preceded by rf_
124 *
125 * Revision 1.25 1996/05/20 19:31:46 jimz
126 * straighten out syntax problems
127 *
128 * Revision 1.24 1996/05/18 19:51:34 jimz
129 * major code cleanup- fix syntax, make some types consistent,
130 * add prototypes, clean out dead code, et cetera
131 *
132 * Revision 1.23 1996/05/16 23:37:33 jimz
133 * fix misspelled "else"
134 *
135 * Revision 1.22 1996/05/15 22:33:32 jimz
136 * appropriately #ifdef cache stuff
137 *
138 * Revision 1.21 1996/05/06 22:09:20 wvcii
139 * rf_State_ExecuteDAG now only executes the first dag
140 * of each parity stripe in a multi-stripe access
141 *
142 * rf_State_ProcessDAG now executes all dags in a
143 * multi-stripe access except the first dag of each stripe.
144 *
145 * Revision 1.20 1995/12/12 18:10:06 jimz
146 * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
147 * fix 80-column brain damage in comments
148 *
149 * Revision 1.19 1995/11/19 16:29:50 wvcii
150 * replaced LaunchDAGState with CreateDAGState, ExecuteDAGState
151 * created rf_ContinueDagAccess
152 *
153 * Revision 1.18 1995/11/07 15:37:23 wvcii
154 * deleted states SendDAGState, RetryDAGState
155 * added staes: LaunchDAGState, ProcessDAGState
156 * code no longer has a hard-coded retry count of 1 but will support
157 * retries until a dag can not be found (selected) to perform the user request
158 *
159 * Revision 1.17 1995/10/09 23:36:08 amiri
160 * *** empty log message ***
161 *
162 * Revision 1.16 1995/10/09 18:36:58 jimz
163 * moved call to StopThroughput for user-level driver to rf_driver.c
164 *
165 * Revision 1.15 1995/10/09 18:07:23 wvcii
166 * lastState now call rf_StopThroughputStats
167 *
168 * Revision 1.14 1995/10/05 18:56:31 jimz
169 * no-op file if !INCLUDE_VS
170 *
171 * Revision 1.13 1995/09/30 20:38:24 jimz
172 * LogTraceRec now takes a Raid * as its first argument
173 *
174 * Revision 1.12 1995/09/19 22:58:54 jimz
175 * integrate DKUSAGE into raidframe
176 *
177 * Revision 1.11 1995/09/07 01:26:55 jimz
178 * Achive basic compilation in kernel. Kernel functionality
179 * is not guaranteed at all, but it'll compile. Mostly. I hope.
180 *
181 * Revision 1.10 1995/07/26 03:28:31 robby
182 * intermediary checkin
183 *
184 * Revision 1.9 1995/07/23 02:50:33 robby
185 * oops. fixed boo boo
186 *
187 * Revision 1.8 1995/07/22 22:54:54 robby
188 * removed incorrect comment
189 *
190 * Revision 1.7 1995/07/21 19:30:26 robby
191 * added idle state for rf_when-idle.c
192 *
193 * Revision 1.6 1995/07/10 19:06:28 rachad
194 * *** empty log message ***
195 *
196 * Revision 1.5 1995/07/10 17:30:38 robby
197 * added virtual striping lock states
198 *
199 * Revision 1.4 1995/07/08 18:05:39 rachad
200 * Linked up Claudsons code with the real cache
201 *
202 * Revision 1.3 1995/07/06 14:38:50 robby
203 * changed get_thread_id to get_threadid
204 *
205 * Revision 1.2 1995/07/06 14:24:15 robby
206 * added log
207 *
208 */
209
210 #ifdef _KERNEL
211 #define KERNEL
212 #endif
213
214 #ifdef KERNEL
215 #ifndef __NetBSD__
216 #include <dkusage.h>
217 #endif /* !__NetBSD__ */
218 #endif /* KERNEL */
219
220 #include <sys/errno.h>
221
222 #include "rf_archs.h"
223 #include "rf_threadstuff.h"
224 #include "rf_raid.h"
225 #include "rf_dag.h"
226 #include "rf_desc.h"
227 #include "rf_aselect.h"
228 #include "rf_threadid.h"
229 #include "rf_general.h"
230 #include "rf_states.h"
231 #include "rf_dagutils.h"
232 #include "rf_driver.h"
233 #include "rf_engine.h"
234 #include "rf_map.h"
235 #include "rf_etimer.h"
236
237 #if defined(KERNEL) && (DKUSAGE > 0)
238 #include <sys/dkusage.h>
239 #include <io/common/iotypes.h>
240 #include <io/cam/dec_cam.h>
241 #include <io/cam/cam.h>
242 #include <io/cam/pdrv.h>
243 #endif /* KERNEL && DKUSAGE > 0 */
244
245 /* prototypes for some of the available states.
246
247 States must:
248
249 - not block.
250
251 - either schedule rf_ContinueRaidAccess as a callback and return
252 RF_TRUE, or complete all of their work and return RF_FALSE.
253
254 - increment desc->state when they have finished their work.
255 */
256
257
258 #ifdef SIMULATE
259 extern int global_async_flag;
260 #endif /* SIMULATE */
261
262 static char *StateName(RF_AccessState_t state)
263 {
264 switch (state) {
265 case rf_QuiesceState: return "QuiesceState";
266 case rf_MapState: return "MapState";
267 case rf_LockState: return "LockState";
268 case rf_CreateDAGState: return "CreateDAGState";
269 case rf_ExecuteDAGState: return "ExecuteDAGState";
270 case rf_ProcessDAGState: return "ProcessDAGState";
271 case rf_CleanupState: return "CleanupState";
272 case rf_LastState: return "LastState";
273 case rf_IncrAccessesCountState: return "IncrAccessesCountState";
274 case rf_DecrAccessesCountState: return "DecrAccessesCountState";
275 default: return "!!! UnnamedState !!!";
276 }
277 }
278
279 void rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc)
280 {
281 int suspended = RF_FALSE;
282 int current_state_index = desc->state;
283 RF_AccessState_t current_state = desc->states[current_state_index];
284
285 #ifdef SIMULATE
286 rf_SetCurrentOwner(desc->owner);
287 #endif /* SIMULATE */
288
289 do {
290
291 current_state_index = desc->state;
292 current_state = desc->states [current_state_index];
293
294 switch (current_state) {
295
296 case rf_QuiesceState: suspended = rf_State_Quiesce(desc);
297 break;
298 case rf_IncrAccessesCountState: suspended = rf_State_IncrAccessCount(desc);
299 break;
300 case rf_MapState: suspended = rf_State_Map(desc);
301 break;
302 case rf_LockState: suspended = rf_State_Lock(desc);
303 break;
304 case rf_CreateDAGState: suspended = rf_State_CreateDAG(desc);
305 break;
306 case rf_ExecuteDAGState: suspended = rf_State_ExecuteDAG(desc);
307 break;
308 case rf_ProcessDAGState: suspended = rf_State_ProcessDAG(desc);
309 break;
310 case rf_CleanupState: suspended = rf_State_Cleanup(desc);
311 break;
312 case rf_DecrAccessesCountState: suspended = rf_State_DecrAccessCount(desc);
313 break;
314 case rf_LastState: suspended = rf_State_LastState(desc);
315 break;
316 }
317
318 /* after this point, we cannot dereference desc since desc may
319 have been freed. desc is only freed in LastState, so if we
320 renter this function or loop back up, desc should be valid. */
321
322 if (rf_printStatesDebug) {
323 int tid;
324 rf_get_threadid (tid);
325
326 printf ("[%d] State: %-24s StateIndex: %3i desc: 0x%ld %s\n",
327 tid, StateName(current_state), current_state_index, (long)desc,
328 suspended ? "callback scheduled" : "looping");
329 }
330 } while (!suspended && current_state != rf_LastState);
331
332 return;
333 }
334
335
336 void rf_ContinueDagAccess (RF_DagList_t *dagList)
337 {
338 RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec);
339 RF_RaidAccessDesc_t *desc;
340 RF_DagHeader_t *dag_h;
341 RF_Etimer_t timer;
342 int i;
343
344 desc = dagList->desc;
345
346 timer = tracerec->timer;
347 RF_ETIMER_STOP(timer);
348 RF_ETIMER_EVAL(timer);
349 tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer);
350 RF_ETIMER_START(tracerec->timer);
351
352 /* skip to dag which just finished */
353 dag_h = dagList->dags;
354 for (i = 0; i < dagList->numDagsDone; i++) {
355 dag_h = dag_h->next;
356 }
357
358 /* check to see if retry is required */
359 if (dag_h->status == rf_rollBackward) {
360 /* when a dag fails, mark desc status as bad and allow all other dags
361 * in the desc to execute to completion. then, free all dags and start over */
362 desc->status = 1; /* bad status */
363 #if RF_DEMO > 0
364 if (!rf_demoMode)
365 #endif /* RF_DEMO > 0 */
366 {
367 printf("[%d] DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n",
368 desc->tid, desc->type, (long)desc->raidAddress,
369 (long)desc->raidAddress,(int)desc->numBlocks,
370 (int)desc->numBlocks, (unsigned long) (desc->bufPtr));
371 }
372 }
373
374 dagList->numDagsDone++;
375 rf_ContinueRaidAccess(desc);
376 }
377
378
379 int rf_State_LastState(RF_RaidAccessDesc_t *desc)
380 {
381 void (*callbackFunc)(RF_CBParam_t) = desc->callbackFunc;
382 void *callbackArg = desc->callbackArg;
383
384 #ifdef SIMULATE
385 int tid;
386 rf_get_threadid(tid);
387
388 if (rf_accessDebug)
389 printf("async_flag set to %d\n",global_async_flag);
390 global_async_flag=desc->async_flag;
391 if (rf_accessDebug)
392 printf("Will now do clean up for %d\n",rf_GetCurrentOwner());
393 rf_FreeRaidAccDesc(desc);
394
395 if (callbackFunc)
396 callbackFunc(callbackArg);
397 #else /* SIMULATE */
398
399 #ifndef KERNEL
400
401 if (!(desc->flags & RF_DAG_NONBLOCKING_IO)) {
402 /* bummer that we have to take another lock here */
403 RF_LOCK_MUTEX(desc->mutex);
404 RF_ASSERT(desc->flags&RF_DAG_ACCESS_COMPLETE);
405 RF_SIGNAL_COND(desc->cond); /* DoAccess frees the desc in the blocking-I/O case */
406 RF_UNLOCK_MUTEX(desc->mutex);
407 }
408 else
409 rf_FreeRaidAccDesc(desc);
410
411 if (callbackFunc)
412 callbackFunc(callbackArg);
413
414 #else /* KERNEL */
415 if (!(desc->flags & RF_DAG_TEST_ACCESS)) {/* don't biodone if this */
416 #if DKUSAGE > 0
417 RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid,(struct buf *)desc->bp);
418 #else
419 RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid);
420 #endif /* DKUSAGE > 0 */
421 /* printf("Calling biodone on 0x%x\n",desc->bp); */
422 biodone(desc->bp); /* access came through ioctl */
423 }
424
425 if (callbackFunc) callbackFunc(callbackArg);
426 rf_FreeRaidAccDesc(desc);
427
428 #endif /* ! KERNEL */
429 #endif /* SIMULATE */
430
431 return RF_FALSE;
432 }
433
434 int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc)
435 {
436 RF_Raid_t *raidPtr;
437
438 raidPtr = desc->raidPtr;
439 /* Bummer. We have to do this to be 100% safe w.r.t. the increment below */
440 RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
441 raidPtr->accs_in_flight++; /* used to detect quiescence */
442 RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
443
444 desc->state++;
445 return RF_FALSE;
446 }
447
448 int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc)
449 {
450 RF_Raid_t *raidPtr;
451
452 raidPtr = desc->raidPtr;
453
454 RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
455 raidPtr->accs_in_flight--;
456 if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) {
457 rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc);
458 }
459 rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks);
460 RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
461
462 desc->state++;
463 return RF_FALSE;
464 }
465
466 int rf_State_Quiesce(RF_RaidAccessDesc_t *desc)
467 {
468 RF_AccTraceEntry_t *tracerec = &desc->tracerec;
469 RF_Etimer_t timer;
470 int suspended = RF_FALSE;
471 RF_Raid_t *raidPtr;
472
473 raidPtr = desc->raidPtr;
474
475 RF_ETIMER_START(timer);
476 RF_ETIMER_START(desc->timer);
477
478 RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
479 if (raidPtr->accesses_suspended) {
480 RF_CallbackDesc_t *cb;
481 cb = rf_AllocCallbackDesc();
482 /* XXX the following cast is quite bogus... rf_ContinueRaidAccess
483 takes a (RF_RaidAccessDesc_t *) as an argument.. GO */
484 cb->callbackFunc = (void (*)(RF_CBParam_t))rf_ContinueRaidAccess;
485 cb->callbackArg.p = (void *) desc;
486 cb->next = raidPtr->quiesce_wait_list;
487 raidPtr->quiesce_wait_list = cb;
488 suspended = RF_TRUE;
489 }
490
491 RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
492
493 RF_ETIMER_STOP(timer);
494 RF_ETIMER_EVAL(timer);
495 tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer);
496
497 if (suspended && rf_quiesceDebug)
498 printf("Stalling access due to quiescence lock\n");
499
500 desc->state++;
501 return suspended;
502 }
503
504 int rf_State_Map(RF_RaidAccessDesc_t *desc)
505 {
506 RF_Raid_t *raidPtr = desc->raidPtr;
507 RF_AccTraceEntry_t *tracerec = &desc->tracerec;
508 RF_Etimer_t timer;
509
510 RF_ETIMER_START(timer);
511
512 if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks,
513 desc->bufPtr, RF_DONT_REMAP)))
514 RF_PANIC();
515
516 RF_ETIMER_STOP(timer);
517 RF_ETIMER_EVAL(timer);
518 tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer);
519
520 desc->state ++;
521 return RF_FALSE;
522 }
523
524 int rf_State_Lock(RF_RaidAccessDesc_t *desc)
525 {
526 RF_AccTraceEntry_t *tracerec = &desc->tracerec;
527 RF_Raid_t *raidPtr = desc->raidPtr;
528 RF_AccessStripeMapHeader_t *asmh = desc->asmap;
529 RF_AccessStripeMap_t *asm_p;
530 RF_Etimer_t timer;
531 int suspended = RF_FALSE;
532
533 RF_ETIMER_START(timer);
534 if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
535 RF_StripeNum_t lastStripeID = -1;
536
537 /* acquire each lock that we don't already hold */
538 for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
539 RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
540 if (!rf_suppressLocksAndLargeWrites &&
541 asm_p->parityInfo &&
542 !(desc->flags& RF_DAG_SUPPRESS_LOCKS) &&
543 !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED))
544 {
545 asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
546 RF_ASSERT(asm_p->stripeID > lastStripeID); /* locks must be acquired
547 hierarchically */
548 lastStripeID = asm_p->stripeID;
549 /* XXX the cast to (void (*)(RF_CBParam_t)) below is bogus! GO */
550 RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type,
551 (void (*)(struct buf *))rf_ContinueRaidAccess, desc, asm_p,
552 raidPtr->Layout.dataSectorsPerStripe);
553 if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID,
554 &asm_p->lockReqDesc))
555 {
556 suspended = RF_TRUE;
557 break;
558 }
559 }
560
561 if (desc->type == RF_IO_TYPE_WRITE &&
562 raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing)
563 {
564 if (! (asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED) ) {
565 int val;
566
567 asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED;
568 /* XXX the cast below is quite bogus!!! XXX GO */
569 val = rf_ForceOrBlockRecon(raidPtr, asm_p,
570 (void (*)(RF_Raid_t *,void *))rf_ContinueRaidAccess, desc);
571 if (val == 0) {
572 asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED;
573 }
574 else {
575 suspended = RF_TRUE;
576 break;
577 }
578 }
579 else {
580 if (rf_pssDebug) {
581 printf("[%d] skipping force/block because already done, psid %ld\n",
582 desc->tid,(long)asm_p->stripeID);
583 }
584 }
585 }
586 else {
587 if (rf_pssDebug) {
588 printf("[%d] skipping force/block because not write or not under recon, psid %ld\n",
589 desc->tid,(long)asm_p->stripeID);
590 }
591 }
592 }
593
594 RF_ETIMER_STOP(timer);
595 RF_ETIMER_EVAL(timer);
596 tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
597
598 if (suspended)
599 return(RF_TRUE);
600 }
601
602 desc->state++;
603 return(RF_FALSE);
604 }
605
606 /*
607 * the following three states create, execute, and post-process dags
608 * the error recovery unit is a single dag.
609 * by default, SelectAlgorithm creates an array of dags, one per parity stripe
610 * in some tricky cases, multiple dags per stripe are created
611 * - dags within a parity stripe are executed sequentially (arbitrary order)
612 * - dags for distinct parity stripes are executed concurrently
613 *
614 * repeat until all dags complete successfully -or- dag selection fails
615 *
616 * while !done
617 * create dag(s) (SelectAlgorithm)
618 * if dag
619 * execute dag (DispatchDAG)
620 * if dag successful
621 * done (SUCCESS)
622 * else
623 * !done (RETRY - start over with new dags)
624 * else
625 * done (FAIL)
626 */
627 int rf_State_CreateDAG (RF_RaidAccessDesc_t *desc)
628 {
629 RF_AccTraceEntry_t *tracerec = &desc->tracerec;
630 RF_Etimer_t timer;
631 RF_DagHeader_t *dag_h;
632 int i, selectStatus;
633
634 /* generate a dag for the access, and fire it off. When the dag
635 completes, we'll get re-invoked in the next state. */
636 RF_ETIMER_START(timer);
637 /* SelectAlgorithm returns one or more dags */
638 selectStatus = rf_SelectAlgorithm(desc, desc->flags|RF_DAG_SUPPRESS_LOCKS);
639 if (rf_printDAGsDebug)
640 for (i = 0; i < desc->numStripes; i++)
641 rf_PrintDAGList(desc->dagArray[i].dags);
642 RF_ETIMER_STOP(timer);
643 RF_ETIMER_EVAL(timer);
644 /* update time to create all dags */
645 tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer);
646
647 desc->status = 0; /* good status */
648
649 if (selectStatus) {
650 /* failed to create a dag */
651 /* this happens when there are too many faults or incomplete dag libraries */
652 printf("[Failed to create a DAG\n]");
653 RF_PANIC();
654 }
655 else {
656 /* bind dags to desc */
657 for (i = 0; i < desc->numStripes; i++) {
658 dag_h = desc->dagArray[i].dags;
659 while (dag_h) {
660 #ifdef KERNEL
661 dag_h->bp = (struct buf *) desc->bp;
662 #endif /* KERNEL */
663 dag_h->tracerec = tracerec;
664 dag_h = dag_h->next;
665 }
666 }
667 desc->flags |= RF_DAG_DISPATCH_RETURNED;
668 desc->state++; /* next state should be rf_State_ExecuteDAG */
669 }
670 return RF_FALSE;
671 }
672
673
674
675 /* the access has an array of dagLists, one dagList per parity stripe.
676 * fire the first dag in each parity stripe (dagList).
677 * dags within a stripe (dagList) must be executed sequentially
678 * - this preserves atomic parity update
679 * dags for independents parity groups (stripes) are fired concurrently */
680
681 int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
682 {
683 int i;
684 RF_DagHeader_t *dag_h;
685 RF_DagList_t *dagArray = desc->dagArray;
686
687 /* next state is always rf_State_ProcessDAG
688 * important to do this before firing the first dag
689 * (it may finish before we leave this routine) */
690 desc->state++;
691
692 /* sweep dag array, a stripe at a time, firing the first dag in each stripe */
693 for (i = 0; i < desc->numStripes; i++) {
694 RF_ASSERT(dagArray[i].numDags > 0);
695 RF_ASSERT(dagArray[i].numDagsDone == 0);
696 RF_ASSERT(dagArray[i].numDagsFired == 0);
697 RF_ETIMER_START(dagArray[i].tracerec.timer);
698 /* fire first dag in this stripe */
699 dag_h = dagArray[i].dags;
700 RF_ASSERT(dag_h);
701 dagArray[i].numDagsFired++;
702 /* XXX Yet another case where we pass in a conflicting function pointer
703 :-( XXX GO */
704 rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess, &dagArray[i]);
705 }
706
707 /* the DAG will always call the callback, even if there was no
708 * blocking, so we are always suspended in this state */
709 return RF_TRUE;
710 }
711
712
713
714 /* rf_State_ProcessDAG is entered when a dag completes.
715 * first, check to all dags in the access have completed
716 * if not, fire as many dags as possible */
717
718 int rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc)
719 {
720 RF_AccessStripeMapHeader_t *asmh = desc->asmap;
721 RF_Raid_t *raidPtr = desc->raidPtr;
722 RF_DagHeader_t *dag_h;
723 int i, j, done = RF_TRUE;
724 RF_DagList_t *dagArray = desc->dagArray;
725 RF_Etimer_t timer;
726
727 /* check to see if this is the last dag */
728 for (i = 0; i < desc->numStripes; i++)
729 if (dagArray[i].numDags != dagArray[i].numDagsDone)
730 done = RF_FALSE;
731
732 if (done) {
733 if (desc->status) {
734 /* a dag failed, retry */
735 RF_ETIMER_START(timer);
736 /* free all dags */
737 for (i = 0; i < desc->numStripes; i++) {
738 rf_FreeDAG(desc->dagArray[i].dags);
739 }
740 rf_MarkFailuresInASMList(raidPtr, asmh);
741 /* back up to rf_State_CreateDAG */
742 desc->state = desc->state - 2;
743 return RF_FALSE;
744 }
745 else {
746 /* move on to rf_State_Cleanup */
747 desc->state++;
748 }
749 return RF_FALSE;
750 }
751 else {
752 /* more dags to execute */
753 /* see if any are ready to be fired. if so, fire them */
754 /* don't fire the initial dag in a list, it's fired in rf_State_ExecuteDAG */
755 for (i = 0; i < desc->numStripes; i++) {
756 if ((dagArray[i].numDagsDone < dagArray[i].numDags)
757 && (dagArray[i].numDagsDone == dagArray[i].numDagsFired)
758 && (dagArray[i].numDagsFired > 0)) {
759 RF_ETIMER_START(dagArray[i].tracerec.timer);
760 /* fire next dag in this stripe */
761 /* first, skip to next dag awaiting execution */
762 dag_h = dagArray[i].dags;
763 for (j = 0; j < dagArray[i].numDagsDone; j++)
764 dag_h = dag_h->next;
765 dagArray[i].numDagsFired++;
766 /* XXX and again we pass a different function pointer.. GO */
767 rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess,
768 &dagArray[i]);
769 }
770 }
771 return RF_TRUE;
772 }
773 }
774
775 /* only make it this far if all dags complete successfully */
776 int rf_State_Cleanup(RF_RaidAccessDesc_t *desc)
777 {
778 RF_AccTraceEntry_t *tracerec = &desc->tracerec;
779 RF_AccessStripeMapHeader_t *asmh = desc->asmap;
780 RF_Raid_t *raidPtr = desc->raidPtr;
781 RF_AccessStripeMap_t *asm_p;
782 RF_DagHeader_t *dag_h;
783 RF_Etimer_t timer;
784 int tid, i;
785
786 desc->state ++;
787
788 rf_get_threadid(tid);
789
790 timer = tracerec->timer;
791 RF_ETIMER_STOP(timer);
792 RF_ETIMER_EVAL(timer);
793 tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer);
794
795 /* the RAID I/O is complete. Clean up. */
796 tracerec->specific.user.dag_retry_us = 0;
797
798 RF_ETIMER_START(timer);
799 if (desc->flags & RF_DAG_RETURN_DAG) {
800 /* copy dags into paramDAG */
801 *(desc->paramDAG) = desc->dagArray[0].dags;
802 dag_h = *(desc->paramDAG);
803 for (i = 1; i < desc->numStripes; i++) {
804 /* concatenate dags from remaining stripes */
805 RF_ASSERT(dag_h);
806 while (dag_h->next)
807 dag_h = dag_h->next;
808 dag_h->next = desc->dagArray[i].dags;
809 }
810 }
811 else {
812 /* free all dags */
813 for (i = 0; i < desc->numStripes; i++) {
814 rf_FreeDAG(desc->dagArray[i].dags);
815 }
816 }
817
818 RF_ETIMER_STOP(timer);
819 RF_ETIMER_EVAL(timer);
820 tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer);
821
822 RF_ETIMER_START(timer);
823 if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
824 for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
825 if (!rf_suppressLocksAndLargeWrites &&
826 asm_p->parityInfo &&
827 !(desc->flags&RF_DAG_SUPPRESS_LOCKS))
828 {
829 RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc);
830 rf_ReleaseStripeLock(raidPtr->lockTable, asm_p->stripeID,
831 &asm_p->lockReqDesc);
832 }
833 if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) {
834 rf_UnblockRecon(raidPtr, asm_p);
835 }
836 }
837 }
838
839 #ifdef SIMULATE
840 /* refresh current owner in case blocked ios where allowed to run */
841 rf_SetCurrentOwner(desc->owner);
842 #endif /* SIMULATE */
843
844 RF_ETIMER_STOP(timer);
845 RF_ETIMER_EVAL(timer);
846 tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
847
848 RF_ETIMER_START(timer);
849 if (desc->flags & RF_DAG_RETURN_ASM)
850 *(desc->paramASM) = asmh;
851 else
852 rf_FreeAccessStripeMap(asmh);
853 RF_ETIMER_STOP(timer);
854 RF_ETIMER_EVAL(timer);
855 tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer);
856
857 RF_ETIMER_STOP(desc->timer);
858 RF_ETIMER_EVAL(desc->timer);
859
860 timer = desc->tracerec.tot_timer;
861 RF_ETIMER_STOP(timer);
862 RF_ETIMER_EVAL(timer);
863 desc->tracerec.total_us = RF_ETIMER_VAL_US(timer);
864
865 rf_LogTraceRec(raidPtr, tracerec);
866
867 desc->flags |= RF_DAG_ACCESS_COMPLETE;
868
869 return RF_FALSE;
870 }
871