rf_states.c revision 1.3 1 /* $NetBSD: rf_states.c,v 1.3 1999/01/15 17:55:52 explorer Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland, William V. Courtright II, Robby Findler
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * :
31 * Log: rf_states.c,v
32 * Revision 1.45 1996/07/28 20:31:39 jimz
33 * i386netbsd port
34 * true/false fixup
35 *
36 * Revision 1.44 1996/07/27 23:36:08 jimz
37 * Solaris port of simulator
38 *
39 * Revision 1.43 1996/07/22 19:52:16 jimz
40 * switched node params to RF_DagParam_t, a union of
41 * a 64-bit int and a void *, for better portability
42 * attempted hpux port, but failed partway through for
43 * lack of a single C compiler capable of compiling all
44 * source files
45 *
46 * Revision 1.42 1996/07/17 21:00:58 jimz
47 * clean up timer interface, tracing
48 *
49 * Revision 1.41 1996/07/11 19:08:00 jimz
50 * generalize reconstruction mechanism
51 * allow raid1 reconstructs via copyback (done with array
52 * quiesced, not online, therefore not disk-directed)
53 *
54 * Revision 1.40 1996/06/17 14:38:33 jimz
55 * properly #if out RF_DEMO code
56 * fix bug in MakeConfig that was causing weird behavior
57 * in configuration routines (config was not zeroed at start)
58 * clean up genplot handling of stacks
59 *
60 * Revision 1.39 1996/06/11 18:12:17 jimz
61 * got rid of evil race condition in LastState
62 *
63 * Revision 1.38 1996/06/10 14:18:58 jimz
64 * move user, throughput stats into per-array structure
65 *
66 * Revision 1.37 1996/06/09 02:36:46 jimz
67 * lots of little crufty cleanup- fixup whitespace
68 * issues, comment #ifdefs, improve typing in some
69 * places (esp size-related)
70 *
71 * Revision 1.36 1996/06/07 21:33:04 jimz
72 * begin using consistent types for sector numbers,
73 * stripe numbers, row+col numbers, recon unit numbers
74 *
75 * Revision 1.35 1996/06/05 18:06:02 jimz
76 * Major code cleanup. The Great Renaming is now done.
77 * Better modularity. Better typing. Fixed a bunch of
78 * synchronization bugs. Made a lot of global stuff
79 * per-desc or per-array. Removed dead code.
80 *
81 * Revision 1.34 1996/06/03 23:28:26 jimz
82 * more bugfixes
83 * check in tree to sync for IPDS runs with current bugfixes
84 * there still may be a problem with threads in the script test
85 * getting I/Os stuck- not trivially reproducible (runs ~50 times
86 * in a row without getting stuck)
87 *
88 * Revision 1.33 1996/05/31 22:26:54 jimz
89 * fix a lot of mapping problems, memory allocation problems
90 * found some weird lock issues, fixed 'em
91 * more code cleanup
92 *
93 * Revision 1.32 1996/05/30 12:59:18 jimz
94 * make etimer happier, more portable
95 *
96 * Revision 1.31 1996/05/30 11:29:41 jimz
97 * Numerous bug fixes. Stripe lock release code disagreed with the taking code
98 * about when stripes should be locked (I made it consistent: no parity, no lock)
99 * There was a lot of extra serialization of I/Os which I've removed- a lot of
100 * it was to calculate values for the cache code, which is no longer with us.
101 * More types, function, macro cleanup. Added code to properly quiesce the array
102 * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
103 * before. Fixed memory allocation, freeing bugs.
104 *
105 * Revision 1.30 1996/05/27 18:56:37 jimz
106 * more code cleanup
107 * better typing
108 * compiles in all 3 environments
109 *
110 * Revision 1.29 1996/05/24 22:17:04 jimz
111 * continue code + namespace cleanup
112 * typed a bunch of flags
113 *
114 * Revision 1.28 1996/05/24 04:28:55 jimz
115 * release cleanup ckpt
116 *
117 * Revision 1.27 1996/05/23 21:46:35 jimz
118 * checkpoint in code cleanup (release prep)
119 * lots of types, function names have been fixed
120 *
121 * Revision 1.26 1996/05/23 00:33:23 jimz
122 * code cleanup: move all debug decls to rf_options.c, all extern
123 * debug decls to rf_options.h, all debug vars preceded by rf_
124 *
125 * Revision 1.25 1996/05/20 19:31:46 jimz
126 * straighten out syntax problems
127 *
128 * Revision 1.24 1996/05/18 19:51:34 jimz
129 * major code cleanup- fix syntax, make some types consistent,
130 * add prototypes, clean out dead code, et cetera
131 *
132 * Revision 1.23 1996/05/16 23:37:33 jimz
133 * fix misspelled "else"
134 *
135 * Revision 1.22 1996/05/15 22:33:32 jimz
136 * appropriately #ifdef cache stuff
137 *
138 * Revision 1.21 1996/05/06 22:09:20 wvcii
139 * rf_State_ExecuteDAG now only executes the first dag
140 * of each parity stripe in a multi-stripe access
141 *
142 * rf_State_ProcessDAG now executes all dags in a
143 * multi-stripe access except the first dag of each stripe.
144 *
145 * Revision 1.20 1995/12/12 18:10:06 jimz
146 * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
147 * fix 80-column brain damage in comments
148 *
149 * Revision 1.19 1995/11/19 16:29:50 wvcii
150 * replaced LaunchDAGState with CreateDAGState, ExecuteDAGState
151 * created rf_ContinueDagAccess
152 *
153 * Revision 1.18 1995/11/07 15:37:23 wvcii
154 * deleted states SendDAGState, RetryDAGState
155 * added staes: LaunchDAGState, ProcessDAGState
156 * code no longer has a hard-coded retry count of 1 but will support
157 * retries until a dag can not be found (selected) to perform the user request
158 *
159 * Revision 1.17 1995/10/09 23:36:08 amiri
160 * *** empty log message ***
161 *
162 * Revision 1.16 1995/10/09 18:36:58 jimz
163 * moved call to StopThroughput for user-level driver to rf_driver.c
164 *
165 * Revision 1.15 1995/10/09 18:07:23 wvcii
166 * lastState now call rf_StopThroughputStats
167 *
168 * Revision 1.14 1995/10/05 18:56:31 jimz
169 * no-op file if !INCLUDE_VS
170 *
171 * Revision 1.13 1995/09/30 20:38:24 jimz
172 * LogTraceRec now takes a Raid * as its first argument
173 *
174 * Revision 1.12 1995/09/19 22:58:54 jimz
175 * integrate DKUSAGE into raidframe
176 *
177 * Revision 1.11 1995/09/07 01:26:55 jimz
178 * Achive basic compilation in kernel. Kernel functionality
179 * is not guaranteed at all, but it'll compile. Mostly. I hope.
180 *
181 * Revision 1.10 1995/07/26 03:28:31 robby
182 * intermediary checkin
183 *
184 * Revision 1.9 1995/07/23 02:50:33 robby
185 * oops. fixed boo boo
186 *
187 * Revision 1.8 1995/07/22 22:54:54 robby
188 * removed incorrect comment
189 *
190 * Revision 1.7 1995/07/21 19:30:26 robby
191 * added idle state for rf_when-idle.c
192 *
193 * Revision 1.6 1995/07/10 19:06:28 rachad
194 * *** empty log message ***
195 *
196 * Revision 1.5 1995/07/10 17:30:38 robby
197 * added virtual striping lock states
198 *
199 * Revision 1.4 1995/07/08 18:05:39 rachad
200 * Linked up Claudsons code with the real cache
201 *
202 * Revision 1.3 1995/07/06 14:38:50 robby
203 * changed get_thread_id to get_threadid
204 *
205 * Revision 1.2 1995/07/06 14:24:15 robby
206 * added log
207 *
208 */
209
210 #ifdef _KERNEL
211 #define KERNEL
212 #endif
213
214 #ifdef KERNEL
215 #ifndef __NetBSD__
216 #include <dkusage.h>
217 #endif /* !__NetBSD__ */
218 #endif /* KERNEL */
219
220 #include <sys/errno.h>
221
222 #include "rf_archs.h"
223 #include "rf_threadstuff.h"
224 #include "rf_raid.h"
225 #include "rf_dag.h"
226 #include "rf_desc.h"
227 #include "rf_aselect.h"
228 #include "rf_threadid.h"
229 #include "rf_general.h"
230 #include "rf_states.h"
231 #include "rf_dagutils.h"
232 #include "rf_driver.h"
233 #include "rf_engine.h"
234 #include "rf_map.h"
235 #include "rf_etimer.h"
236
237 #if defined(KERNEL) && (DKUSAGE > 0)
238 #include <sys/dkusage.h>
239 #include <io/common/iotypes.h>
240 #include <io/cam/dec_cam.h>
241 #include <io/cam/cam.h>
242 #include <io/cam/pdrv.h>
243 #endif /* KERNEL && DKUSAGE > 0 */
244
245 /* prototypes for some of the available states.
246
247 States must:
248
249 - not block.
250
251 - either schedule rf_ContinueRaidAccess as a callback and return
252 RF_TRUE, or complete all of their work and return RF_FALSE.
253
254 - increment desc->state when they have finished their work.
255 */
256
257
258 #ifdef SIMULATE
259 extern int global_async_flag;
260 #endif /* SIMULATE */
261
262 static char *StateName(RF_AccessState_t state)
263 {
264 switch (state) {
265 case rf_QuiesceState: return "QuiesceState";
266 case rf_MapState: return "MapState";
267 case rf_LockState: return "LockState";
268 case rf_CreateDAGState: return "CreateDAGState";
269 case rf_ExecuteDAGState: return "ExecuteDAGState";
270 case rf_ProcessDAGState: return "ProcessDAGState";
271 case rf_CleanupState: return "CleanupState";
272 case rf_LastState: return "LastState";
273 case rf_IncrAccessesCountState: return "IncrAccessesCountState";
274 case rf_DecrAccessesCountState: return "DecrAccessesCountState";
275 default: return "!!! UnnamedState !!!";
276 }
277 }
278
279 void rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc)
280 {
281 int suspended = RF_FALSE;
282 int current_state_index = desc->state;
283 RF_AccessState_t current_state = desc->states[current_state_index];
284
285 #ifdef SIMULATE
286 rf_SetCurrentOwner(desc->owner);
287 #endif /* SIMULATE */
288
289 do {
290
291 current_state_index = desc->state;
292 current_state = desc->states [current_state_index];
293
294 switch (current_state) {
295
296 case rf_QuiesceState: suspended = rf_State_Quiesce(desc);
297 break;
298 case rf_IncrAccessesCountState: suspended = rf_State_IncrAccessCount(desc);
299 break;
300 case rf_MapState: suspended = rf_State_Map(desc);
301 break;
302 case rf_LockState: suspended = rf_State_Lock(desc);
303 break;
304 case rf_CreateDAGState: suspended = rf_State_CreateDAG(desc);
305 break;
306 case rf_ExecuteDAGState: suspended = rf_State_ExecuteDAG(desc);
307 break;
308 case rf_ProcessDAGState: suspended = rf_State_ProcessDAG(desc);
309 break;
310 case rf_CleanupState: suspended = rf_State_Cleanup(desc);
311 break;
312 case rf_DecrAccessesCountState: suspended = rf_State_DecrAccessCount(desc);
313 break;
314 case rf_LastState: suspended = rf_State_LastState(desc);
315 break;
316 }
317
318 /* after this point, we cannot dereference desc since desc may
319 have been freed. desc is only freed in LastState, so if we
320 renter this function or loop back up, desc should be valid. */
321
322 if (rf_printStatesDebug) {
323 int tid;
324 rf_get_threadid (tid);
325
326 printf ("[%d] State: %-24s StateIndex: %3i desc: 0x%ld %s\n",
327 tid, StateName(current_state), current_state_index, (long)desc,
328 suspended ? "callback scheduled" : "looping");
329 }
330 } while (!suspended && current_state != rf_LastState);
331
332 return;
333 }
334
335
336 void rf_ContinueDagAccess (RF_DagList_t *dagList)
337 {
338 RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec);
339 RF_RaidAccessDesc_t *desc;
340 RF_DagHeader_t *dag_h;
341 RF_Etimer_t timer;
342 int i;
343
344 desc = dagList->desc;
345
346 timer = tracerec->timer;
347 RF_ETIMER_STOP(timer);
348 RF_ETIMER_EVAL(timer);
349 tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer);
350 RF_ETIMER_START(tracerec->timer);
351
352 /* skip to dag which just finished */
353 dag_h = dagList->dags;
354 for (i = 0; i < dagList->numDagsDone; i++) {
355 dag_h = dag_h->next;
356 }
357
358 /* check to see if retry is required */
359 if (dag_h->status == rf_rollBackward) {
360 /* when a dag fails, mark desc status as bad and allow all other dags
361 * in the desc to execute to completion. then, free all dags and start over */
362 desc->status = 1; /* bad status */
363 #if RF_DEMO > 0
364 if (!rf_demoMode)
365 #endif /* RF_DEMO > 0 */
366 {
367 printf("[%d] DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n",
368 desc->tid, desc->type, (long)desc->raidAddress,
369 (long)desc->raidAddress,(int)desc->numBlocks,
370 (int)desc->numBlocks, (unsigned long) (desc->bufPtr));
371 }
372 }
373
374 dagList->numDagsDone++;
375 rf_ContinueRaidAccess(desc);
376 }
377
378
379 int rf_State_LastState(RF_RaidAccessDesc_t *desc)
380 {
381 void (*callbackFunc)(RF_CBParam_t) = desc->callbackFunc;
382 RF_CBParam_t callbackArg;
383
384 callbackArg.p = desc->callbackArg;
385
386 #ifdef SIMULATE
387 int tid;
388 rf_get_threadid(tid);
389
390 if (rf_accessDebug)
391 printf("async_flag set to %d\n",global_async_flag);
392 global_async_flag=desc->async_flag;
393 if (rf_accessDebug)
394 printf("Will now do clean up for %d\n",rf_GetCurrentOwner());
395 rf_FreeRaidAccDesc(desc);
396
397 if (callbackFunc)
398 callbackFunc(callbackArg);
399 #else /* SIMULATE */
400
401 #ifndef KERNEL
402
403 if (!(desc->flags & RF_DAG_NONBLOCKING_IO)) {
404 /* bummer that we have to take another lock here */
405 RF_LOCK_MUTEX(desc->mutex);
406 RF_ASSERT(desc->flags&RF_DAG_ACCESS_COMPLETE);
407 RF_SIGNAL_COND(desc->cond); /* DoAccess frees the desc in the blocking-I/O case */
408 RF_UNLOCK_MUTEX(desc->mutex);
409 }
410 else
411 rf_FreeRaidAccDesc(desc);
412
413 if (callbackFunc)
414 callbackFunc(callbackArg);
415
416 #else /* KERNEL */
417 if (!(desc->flags & RF_DAG_TEST_ACCESS)) {/* don't biodone if this */
418 #if DKUSAGE > 0
419 RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid,(struct buf *)desc->bp);
420 #else
421 RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid);
422 #endif /* DKUSAGE > 0 */
423
424 /*
425 * If this is not an async request, wake up the caller
426 */
427 if (desc->async_flag == 0)
428 wakeup(desc->bp);
429
430 /* printf("Calling biodone on 0x%x\n",desc->bp); */
431 biodone(desc->bp); /* access came through ioctl */
432 }
433
434 if (callbackFunc) callbackFunc(callbackArg);
435 rf_FreeRaidAccDesc(desc);
436
437 #endif /* ! KERNEL */
438 #endif /* SIMULATE */
439
440 return RF_FALSE;
441 }
442
443 int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc)
444 {
445 RF_Raid_t *raidPtr;
446
447 raidPtr = desc->raidPtr;
448 /* Bummer. We have to do this to be 100% safe w.r.t. the increment below */
449 RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
450 raidPtr->accs_in_flight++; /* used to detect quiescence */
451 RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
452
453 desc->state++;
454 return RF_FALSE;
455 }
456
457 int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc)
458 {
459 RF_Raid_t *raidPtr;
460
461 raidPtr = desc->raidPtr;
462
463 RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
464 raidPtr->accs_in_flight--;
465 if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) {
466 rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc);
467 }
468 rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks);
469 RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
470
471 desc->state++;
472 return RF_FALSE;
473 }
474
475 int rf_State_Quiesce(RF_RaidAccessDesc_t *desc)
476 {
477 RF_AccTraceEntry_t *tracerec = &desc->tracerec;
478 RF_Etimer_t timer;
479 int suspended = RF_FALSE;
480 RF_Raid_t *raidPtr;
481
482 raidPtr = desc->raidPtr;
483
484 RF_ETIMER_START(timer);
485 RF_ETIMER_START(desc->timer);
486
487 RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
488 if (raidPtr->accesses_suspended) {
489 RF_CallbackDesc_t *cb;
490 cb = rf_AllocCallbackDesc();
491 /* XXX the following cast is quite bogus... rf_ContinueRaidAccess
492 takes a (RF_RaidAccessDesc_t *) as an argument.. GO */
493 cb->callbackFunc = (void (*)(RF_CBParam_t))rf_ContinueRaidAccess;
494 cb->callbackArg.p = (void *) desc;
495 cb->next = raidPtr->quiesce_wait_list;
496 raidPtr->quiesce_wait_list = cb;
497 suspended = RF_TRUE;
498 }
499
500 RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
501
502 RF_ETIMER_STOP(timer);
503 RF_ETIMER_EVAL(timer);
504 tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer);
505
506 if (suspended && rf_quiesceDebug)
507 printf("Stalling access due to quiescence lock\n");
508
509 desc->state++;
510 return suspended;
511 }
512
513 int rf_State_Map(RF_RaidAccessDesc_t *desc)
514 {
515 RF_Raid_t *raidPtr = desc->raidPtr;
516 RF_AccTraceEntry_t *tracerec = &desc->tracerec;
517 RF_Etimer_t timer;
518
519 RF_ETIMER_START(timer);
520
521 if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks,
522 desc->bufPtr, RF_DONT_REMAP)))
523 RF_PANIC();
524
525 RF_ETIMER_STOP(timer);
526 RF_ETIMER_EVAL(timer);
527 tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer);
528
529 desc->state ++;
530 return RF_FALSE;
531 }
532
533 int rf_State_Lock(RF_RaidAccessDesc_t *desc)
534 {
535 RF_AccTraceEntry_t *tracerec = &desc->tracerec;
536 RF_Raid_t *raidPtr = desc->raidPtr;
537 RF_AccessStripeMapHeader_t *asmh = desc->asmap;
538 RF_AccessStripeMap_t *asm_p;
539 RF_Etimer_t timer;
540 int suspended = RF_FALSE;
541
542 RF_ETIMER_START(timer);
543 if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
544 RF_StripeNum_t lastStripeID = -1;
545
546 /* acquire each lock that we don't already hold */
547 for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
548 RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
549 if (!rf_suppressLocksAndLargeWrites &&
550 asm_p->parityInfo &&
551 !(desc->flags& RF_DAG_SUPPRESS_LOCKS) &&
552 !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED))
553 {
554 asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
555 RF_ASSERT(asm_p->stripeID > lastStripeID); /* locks must be acquired
556 hierarchically */
557 lastStripeID = asm_p->stripeID;
558 /* XXX the cast to (void (*)(RF_CBParam_t)) below is bogus! GO */
559 RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type,
560 (void (*)(struct buf *))rf_ContinueRaidAccess, desc, asm_p,
561 raidPtr->Layout.dataSectorsPerStripe);
562 if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID,
563 &asm_p->lockReqDesc))
564 {
565 suspended = RF_TRUE;
566 break;
567 }
568 }
569
570 if (desc->type == RF_IO_TYPE_WRITE &&
571 raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing)
572 {
573 if (! (asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED) ) {
574 int val;
575
576 asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED;
577 /* XXX the cast below is quite bogus!!! XXX GO */
578 val = rf_ForceOrBlockRecon(raidPtr, asm_p,
579 (void (*)(RF_Raid_t *,void *))rf_ContinueRaidAccess, desc);
580 if (val == 0) {
581 asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED;
582 }
583 else {
584 suspended = RF_TRUE;
585 break;
586 }
587 }
588 else {
589 if (rf_pssDebug) {
590 printf("[%d] skipping force/block because already done, psid %ld\n",
591 desc->tid,(long)asm_p->stripeID);
592 }
593 }
594 }
595 else {
596 if (rf_pssDebug) {
597 printf("[%d] skipping force/block because not write or not under recon, psid %ld\n",
598 desc->tid,(long)asm_p->stripeID);
599 }
600 }
601 }
602
603 RF_ETIMER_STOP(timer);
604 RF_ETIMER_EVAL(timer);
605 tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
606
607 if (suspended)
608 return(RF_TRUE);
609 }
610
611 desc->state++;
612 return(RF_FALSE);
613 }
614
615 /*
616 * the following three states create, execute, and post-process dags
617 * the error recovery unit is a single dag.
618 * by default, SelectAlgorithm creates an array of dags, one per parity stripe
619 * in some tricky cases, multiple dags per stripe are created
620 * - dags within a parity stripe are executed sequentially (arbitrary order)
621 * - dags for distinct parity stripes are executed concurrently
622 *
623 * repeat until all dags complete successfully -or- dag selection fails
624 *
625 * while !done
626 * create dag(s) (SelectAlgorithm)
627 * if dag
628 * execute dag (DispatchDAG)
629 * if dag successful
630 * done (SUCCESS)
631 * else
632 * !done (RETRY - start over with new dags)
633 * else
634 * done (FAIL)
635 */
636 int rf_State_CreateDAG (RF_RaidAccessDesc_t *desc)
637 {
638 RF_AccTraceEntry_t *tracerec = &desc->tracerec;
639 RF_Etimer_t timer;
640 RF_DagHeader_t *dag_h;
641 int i, selectStatus;
642
643 /* generate a dag for the access, and fire it off. When the dag
644 completes, we'll get re-invoked in the next state. */
645 RF_ETIMER_START(timer);
646 /* SelectAlgorithm returns one or more dags */
647 selectStatus = rf_SelectAlgorithm(desc, desc->flags|RF_DAG_SUPPRESS_LOCKS);
648 if (rf_printDAGsDebug)
649 for (i = 0; i < desc->numStripes; i++)
650 rf_PrintDAGList(desc->dagArray[i].dags);
651 RF_ETIMER_STOP(timer);
652 RF_ETIMER_EVAL(timer);
653 /* update time to create all dags */
654 tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer);
655
656 desc->status = 0; /* good status */
657
658 if (selectStatus) {
659 /* failed to create a dag */
660 /* this happens when there are too many faults or incomplete dag libraries */
661 printf("[Failed to create a DAG\n]");
662 RF_PANIC();
663 }
664 else {
665 /* bind dags to desc */
666 for (i = 0; i < desc->numStripes; i++) {
667 dag_h = desc->dagArray[i].dags;
668 while (dag_h) {
669 #ifdef KERNEL
670 dag_h->bp = (struct buf *) desc->bp;
671 #endif /* KERNEL */
672 dag_h->tracerec = tracerec;
673 dag_h = dag_h->next;
674 }
675 }
676 desc->flags |= RF_DAG_DISPATCH_RETURNED;
677 desc->state++; /* next state should be rf_State_ExecuteDAG */
678 }
679 return RF_FALSE;
680 }
681
682
683
684 /* the access has an array of dagLists, one dagList per parity stripe.
685 * fire the first dag in each parity stripe (dagList).
686 * dags within a stripe (dagList) must be executed sequentially
687 * - this preserves atomic parity update
688 * dags for independents parity groups (stripes) are fired concurrently */
689
690 int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
691 {
692 int i;
693 RF_DagHeader_t *dag_h;
694 RF_DagList_t *dagArray = desc->dagArray;
695
696 /* next state is always rf_State_ProcessDAG
697 * important to do this before firing the first dag
698 * (it may finish before we leave this routine) */
699 desc->state++;
700
701 /* sweep dag array, a stripe at a time, firing the first dag in each stripe */
702 for (i = 0; i < desc->numStripes; i++) {
703 RF_ASSERT(dagArray[i].numDags > 0);
704 RF_ASSERT(dagArray[i].numDagsDone == 0);
705 RF_ASSERT(dagArray[i].numDagsFired == 0);
706 RF_ETIMER_START(dagArray[i].tracerec.timer);
707 /* fire first dag in this stripe */
708 dag_h = dagArray[i].dags;
709 RF_ASSERT(dag_h);
710 dagArray[i].numDagsFired++;
711 /* XXX Yet another case where we pass in a conflicting function pointer
712 :-( XXX GO */
713 rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess, &dagArray[i]);
714 }
715
716 /* the DAG will always call the callback, even if there was no
717 * blocking, so we are always suspended in this state */
718 return RF_TRUE;
719 }
720
721
722
723 /* rf_State_ProcessDAG is entered when a dag completes.
724 * first, check to all dags in the access have completed
725 * if not, fire as many dags as possible */
726
727 int rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc)
728 {
729 RF_AccessStripeMapHeader_t *asmh = desc->asmap;
730 RF_Raid_t *raidPtr = desc->raidPtr;
731 RF_DagHeader_t *dag_h;
732 int i, j, done = RF_TRUE;
733 RF_DagList_t *dagArray = desc->dagArray;
734 RF_Etimer_t timer;
735
736 /* check to see if this is the last dag */
737 for (i = 0; i < desc->numStripes; i++)
738 if (dagArray[i].numDags != dagArray[i].numDagsDone)
739 done = RF_FALSE;
740
741 if (done) {
742 if (desc->status) {
743 /* a dag failed, retry */
744 RF_ETIMER_START(timer);
745 /* free all dags */
746 for (i = 0; i < desc->numStripes; i++) {
747 rf_FreeDAG(desc->dagArray[i].dags);
748 }
749 rf_MarkFailuresInASMList(raidPtr, asmh);
750 /* back up to rf_State_CreateDAG */
751 desc->state = desc->state - 2;
752 return RF_FALSE;
753 }
754 else {
755 /* move on to rf_State_Cleanup */
756 desc->state++;
757 }
758 return RF_FALSE;
759 }
760 else {
761 /* more dags to execute */
762 /* see if any are ready to be fired. if so, fire them */
763 /* don't fire the initial dag in a list, it's fired in rf_State_ExecuteDAG */
764 for (i = 0; i < desc->numStripes; i++) {
765 if ((dagArray[i].numDagsDone < dagArray[i].numDags)
766 && (dagArray[i].numDagsDone == dagArray[i].numDagsFired)
767 && (dagArray[i].numDagsFired > 0)) {
768 RF_ETIMER_START(dagArray[i].tracerec.timer);
769 /* fire next dag in this stripe */
770 /* first, skip to next dag awaiting execution */
771 dag_h = dagArray[i].dags;
772 for (j = 0; j < dagArray[i].numDagsDone; j++)
773 dag_h = dag_h->next;
774 dagArray[i].numDagsFired++;
775 /* XXX and again we pass a different function pointer.. GO */
776 rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess,
777 &dagArray[i]);
778 }
779 }
780 return RF_TRUE;
781 }
782 }
783
784 /* only make it this far if all dags complete successfully */
785 int rf_State_Cleanup(RF_RaidAccessDesc_t *desc)
786 {
787 RF_AccTraceEntry_t *tracerec = &desc->tracerec;
788 RF_AccessStripeMapHeader_t *asmh = desc->asmap;
789 RF_Raid_t *raidPtr = desc->raidPtr;
790 RF_AccessStripeMap_t *asm_p;
791 RF_DagHeader_t *dag_h;
792 RF_Etimer_t timer;
793 int tid, i;
794
795 desc->state ++;
796
797 rf_get_threadid(tid);
798
799 timer = tracerec->timer;
800 RF_ETIMER_STOP(timer);
801 RF_ETIMER_EVAL(timer);
802 tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer);
803
804 /* the RAID I/O is complete. Clean up. */
805 tracerec->specific.user.dag_retry_us = 0;
806
807 RF_ETIMER_START(timer);
808 if (desc->flags & RF_DAG_RETURN_DAG) {
809 /* copy dags into paramDAG */
810 *(desc->paramDAG) = desc->dagArray[0].dags;
811 dag_h = *(desc->paramDAG);
812 for (i = 1; i < desc->numStripes; i++) {
813 /* concatenate dags from remaining stripes */
814 RF_ASSERT(dag_h);
815 while (dag_h->next)
816 dag_h = dag_h->next;
817 dag_h->next = desc->dagArray[i].dags;
818 }
819 }
820 else {
821 /* free all dags */
822 for (i = 0; i < desc->numStripes; i++) {
823 rf_FreeDAG(desc->dagArray[i].dags);
824 }
825 }
826
827 RF_ETIMER_STOP(timer);
828 RF_ETIMER_EVAL(timer);
829 tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer);
830
831 RF_ETIMER_START(timer);
832 if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
833 for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
834 if (!rf_suppressLocksAndLargeWrites &&
835 asm_p->parityInfo &&
836 !(desc->flags&RF_DAG_SUPPRESS_LOCKS))
837 {
838 RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc);
839 rf_ReleaseStripeLock(raidPtr->lockTable, asm_p->stripeID,
840 &asm_p->lockReqDesc);
841 }
842 if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) {
843 rf_UnblockRecon(raidPtr, asm_p);
844 }
845 }
846 }
847
848 #ifdef SIMULATE
849 /* refresh current owner in case blocked ios where allowed to run */
850 rf_SetCurrentOwner(desc->owner);
851 #endif /* SIMULATE */
852
853 RF_ETIMER_STOP(timer);
854 RF_ETIMER_EVAL(timer);
855 tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
856
857 RF_ETIMER_START(timer);
858 if (desc->flags & RF_DAG_RETURN_ASM)
859 *(desc->paramASM) = asmh;
860 else
861 rf_FreeAccessStripeMap(asmh);
862 RF_ETIMER_STOP(timer);
863 RF_ETIMER_EVAL(timer);
864 tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer);
865
866 RF_ETIMER_STOP(desc->timer);
867 RF_ETIMER_EVAL(desc->timer);
868
869 timer = desc->tracerec.tot_timer;
870 RF_ETIMER_STOP(timer);
871 RF_ETIMER_EVAL(timer);
872 desc->tracerec.total_us = RF_ETIMER_VAL_US(timer);
873
874 rf_LogTraceRec(raidPtr, tracerec);
875
876 desc->flags |= RF_DAG_ACCESS_COMPLETE;
877
878 return RF_FALSE;
879 }
880