rf_states.c revision 1.4 1 1.4 oster /* $NetBSD: rf_states.c,v 1.4 1999/01/26 02:34:02 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland, William V. Courtright II, Robby Findler
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster #include <sys/errno.h>
30 1.1 oster
31 1.1 oster #include "rf_archs.h"
32 1.1 oster #include "rf_threadstuff.h"
33 1.1 oster #include "rf_raid.h"
34 1.1 oster #include "rf_dag.h"
35 1.1 oster #include "rf_desc.h"
36 1.1 oster #include "rf_aselect.h"
37 1.1 oster #include "rf_threadid.h"
38 1.1 oster #include "rf_general.h"
39 1.1 oster #include "rf_states.h"
40 1.1 oster #include "rf_dagutils.h"
41 1.1 oster #include "rf_driver.h"
42 1.1 oster #include "rf_engine.h"
43 1.1 oster #include "rf_map.h"
44 1.1 oster #include "rf_etimer.h"
45 1.1 oster
46 1.1 oster #if defined(KERNEL) && (DKUSAGE > 0)
47 1.1 oster #include <sys/dkusage.h>
48 1.1 oster #include <io/common/iotypes.h>
49 1.1 oster #include <io/cam/dec_cam.h>
50 1.1 oster #include <io/cam/cam.h>
51 1.1 oster #include <io/cam/pdrv.h>
52 1.1 oster #endif /* KERNEL && DKUSAGE > 0 */
53 1.1 oster
54 1.1 oster /* prototypes for some of the available states.
55 1.1 oster
56 1.1 oster States must:
57 1.1 oster
58 1.1 oster - not block.
59 1.1 oster
60 1.1 oster - either schedule rf_ContinueRaidAccess as a callback and return
61 1.1 oster RF_TRUE, or complete all of their work and return RF_FALSE.
62 1.1 oster
63 1.1 oster - increment desc->state when they have finished their work.
64 1.1 oster */
65 1.1 oster
66 1.1 oster static char *StateName(RF_AccessState_t state)
67 1.1 oster {
68 1.1 oster switch (state) {
69 1.1 oster case rf_QuiesceState: return "QuiesceState";
70 1.1 oster case rf_MapState: return "MapState";
71 1.1 oster case rf_LockState: return "LockState";
72 1.1 oster case rf_CreateDAGState: return "CreateDAGState";
73 1.1 oster case rf_ExecuteDAGState: return "ExecuteDAGState";
74 1.1 oster case rf_ProcessDAGState: return "ProcessDAGState";
75 1.1 oster case rf_CleanupState: return "CleanupState";
76 1.1 oster case rf_LastState: return "LastState";
77 1.1 oster case rf_IncrAccessesCountState: return "IncrAccessesCountState";
78 1.1 oster case rf_DecrAccessesCountState: return "DecrAccessesCountState";
79 1.1 oster default: return "!!! UnnamedState !!!";
80 1.1 oster }
81 1.1 oster }
82 1.1 oster
83 1.1 oster void rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc)
84 1.1 oster {
85 1.1 oster int suspended = RF_FALSE;
86 1.1 oster int current_state_index = desc->state;
87 1.1 oster RF_AccessState_t current_state = desc->states[current_state_index];
88 1.1 oster
89 1.1 oster do {
90 1.1 oster
91 1.1 oster current_state_index = desc->state;
92 1.1 oster current_state = desc->states [current_state_index];
93 1.1 oster
94 1.1 oster switch (current_state) {
95 1.1 oster
96 1.1 oster case rf_QuiesceState: suspended = rf_State_Quiesce(desc);
97 1.1 oster break;
98 1.1 oster case rf_IncrAccessesCountState: suspended = rf_State_IncrAccessCount(desc);
99 1.1 oster break;
100 1.1 oster case rf_MapState: suspended = rf_State_Map(desc);
101 1.1 oster break;
102 1.1 oster case rf_LockState: suspended = rf_State_Lock(desc);
103 1.1 oster break;
104 1.1 oster case rf_CreateDAGState: suspended = rf_State_CreateDAG(desc);
105 1.1 oster break;
106 1.1 oster case rf_ExecuteDAGState: suspended = rf_State_ExecuteDAG(desc);
107 1.1 oster break;
108 1.1 oster case rf_ProcessDAGState: suspended = rf_State_ProcessDAG(desc);
109 1.1 oster break;
110 1.1 oster case rf_CleanupState: suspended = rf_State_Cleanup(desc);
111 1.1 oster break;
112 1.1 oster case rf_DecrAccessesCountState: suspended = rf_State_DecrAccessCount(desc);
113 1.1 oster break;
114 1.1 oster case rf_LastState: suspended = rf_State_LastState(desc);
115 1.1 oster break;
116 1.1 oster }
117 1.1 oster
118 1.1 oster /* after this point, we cannot dereference desc since desc may
119 1.1 oster have been freed. desc is only freed in LastState, so if we
120 1.1 oster renter this function or loop back up, desc should be valid. */
121 1.1 oster
122 1.1 oster if (rf_printStatesDebug) {
123 1.1 oster int tid;
124 1.1 oster rf_get_threadid (tid);
125 1.1 oster
126 1.1 oster printf ("[%d] State: %-24s StateIndex: %3i desc: 0x%ld %s\n",
127 1.1 oster tid, StateName(current_state), current_state_index, (long)desc,
128 1.1 oster suspended ? "callback scheduled" : "looping");
129 1.1 oster }
130 1.1 oster } while (!suspended && current_state != rf_LastState);
131 1.1 oster
132 1.1 oster return;
133 1.1 oster }
134 1.1 oster
135 1.1 oster
136 1.1 oster void rf_ContinueDagAccess (RF_DagList_t *dagList)
137 1.1 oster {
138 1.1 oster RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec);
139 1.1 oster RF_RaidAccessDesc_t *desc;
140 1.1 oster RF_DagHeader_t *dag_h;
141 1.1 oster RF_Etimer_t timer;
142 1.1 oster int i;
143 1.1 oster
144 1.1 oster desc = dagList->desc;
145 1.1 oster
146 1.1 oster timer = tracerec->timer;
147 1.1 oster RF_ETIMER_STOP(timer);
148 1.1 oster RF_ETIMER_EVAL(timer);
149 1.1 oster tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer);
150 1.1 oster RF_ETIMER_START(tracerec->timer);
151 1.1 oster
152 1.1 oster /* skip to dag which just finished */
153 1.1 oster dag_h = dagList->dags;
154 1.1 oster for (i = 0; i < dagList->numDagsDone; i++) {
155 1.1 oster dag_h = dag_h->next;
156 1.1 oster }
157 1.1 oster
158 1.1 oster /* check to see if retry is required */
159 1.1 oster if (dag_h->status == rf_rollBackward) {
160 1.1 oster /* when a dag fails, mark desc status as bad and allow all other dags
161 1.1 oster * in the desc to execute to completion. then, free all dags and start over */
162 1.1 oster desc->status = 1; /* bad status */
163 1.1 oster #if RF_DEMO > 0
164 1.1 oster if (!rf_demoMode)
165 1.1 oster #endif /* RF_DEMO > 0 */
166 1.1 oster {
167 1.1 oster printf("[%d] DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n",
168 1.1 oster desc->tid, desc->type, (long)desc->raidAddress,
169 1.1 oster (long)desc->raidAddress,(int)desc->numBlocks,
170 1.1 oster (int)desc->numBlocks, (unsigned long) (desc->bufPtr));
171 1.1 oster }
172 1.1 oster }
173 1.1 oster
174 1.1 oster dagList->numDagsDone++;
175 1.1 oster rf_ContinueRaidAccess(desc);
176 1.1 oster }
177 1.1 oster
178 1.1 oster
179 1.1 oster int rf_State_LastState(RF_RaidAccessDesc_t *desc)
180 1.1 oster {
181 1.1 oster void (*callbackFunc)(RF_CBParam_t) = desc->callbackFunc;
182 1.2 drochner RF_CBParam_t callbackArg;
183 1.2 drochner
184 1.2 drochner callbackArg.p = desc->callbackArg;
185 1.1 oster
186 1.1 oster if (!(desc->flags & RF_DAG_TEST_ACCESS)) {/* don't biodone if this */
187 1.1 oster #if DKUSAGE > 0
188 1.1 oster RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid,(struct buf *)desc->bp);
189 1.1 oster #else
190 1.1 oster RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid);
191 1.1 oster #endif /* DKUSAGE > 0 */
192 1.3 explorer
193 1.3 explorer /*
194 1.3 explorer * If this is not an async request, wake up the caller
195 1.3 explorer */
196 1.3 explorer if (desc->async_flag == 0)
197 1.3 explorer wakeup(desc->bp);
198 1.3 explorer
199 1.1 oster /* printf("Calling biodone on 0x%x\n",desc->bp); */
200 1.1 oster biodone(desc->bp); /* access came through ioctl */
201 1.1 oster }
202 1.1 oster
203 1.1 oster if (callbackFunc) callbackFunc(callbackArg);
204 1.1 oster rf_FreeRaidAccDesc(desc);
205 1.1 oster
206 1.1 oster return RF_FALSE;
207 1.1 oster }
208 1.1 oster
209 1.1 oster int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc)
210 1.1 oster {
211 1.1 oster RF_Raid_t *raidPtr;
212 1.1 oster
213 1.1 oster raidPtr = desc->raidPtr;
214 1.1 oster /* Bummer. We have to do this to be 100% safe w.r.t. the increment below */
215 1.1 oster RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
216 1.1 oster raidPtr->accs_in_flight++; /* used to detect quiescence */
217 1.1 oster RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
218 1.1 oster
219 1.1 oster desc->state++;
220 1.1 oster return RF_FALSE;
221 1.1 oster }
222 1.1 oster
223 1.1 oster int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc)
224 1.1 oster {
225 1.1 oster RF_Raid_t *raidPtr;
226 1.1 oster
227 1.1 oster raidPtr = desc->raidPtr;
228 1.1 oster
229 1.1 oster RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
230 1.1 oster raidPtr->accs_in_flight--;
231 1.1 oster if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) {
232 1.1 oster rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc);
233 1.1 oster }
234 1.1 oster rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks);
235 1.1 oster RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
236 1.1 oster
237 1.1 oster desc->state++;
238 1.1 oster return RF_FALSE;
239 1.1 oster }
240 1.1 oster
241 1.1 oster int rf_State_Quiesce(RF_RaidAccessDesc_t *desc)
242 1.1 oster {
243 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
244 1.1 oster RF_Etimer_t timer;
245 1.1 oster int suspended = RF_FALSE;
246 1.1 oster RF_Raid_t *raidPtr;
247 1.1 oster
248 1.1 oster raidPtr = desc->raidPtr;
249 1.1 oster
250 1.1 oster RF_ETIMER_START(timer);
251 1.1 oster RF_ETIMER_START(desc->timer);
252 1.1 oster
253 1.1 oster RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
254 1.1 oster if (raidPtr->accesses_suspended) {
255 1.1 oster RF_CallbackDesc_t *cb;
256 1.1 oster cb = rf_AllocCallbackDesc();
257 1.1 oster /* XXX the following cast is quite bogus... rf_ContinueRaidAccess
258 1.1 oster takes a (RF_RaidAccessDesc_t *) as an argument.. GO */
259 1.1 oster cb->callbackFunc = (void (*)(RF_CBParam_t))rf_ContinueRaidAccess;
260 1.1 oster cb->callbackArg.p = (void *) desc;
261 1.1 oster cb->next = raidPtr->quiesce_wait_list;
262 1.1 oster raidPtr->quiesce_wait_list = cb;
263 1.1 oster suspended = RF_TRUE;
264 1.1 oster }
265 1.1 oster
266 1.1 oster RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
267 1.1 oster
268 1.1 oster RF_ETIMER_STOP(timer);
269 1.1 oster RF_ETIMER_EVAL(timer);
270 1.1 oster tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer);
271 1.1 oster
272 1.1 oster if (suspended && rf_quiesceDebug)
273 1.1 oster printf("Stalling access due to quiescence lock\n");
274 1.1 oster
275 1.1 oster desc->state++;
276 1.1 oster return suspended;
277 1.1 oster }
278 1.1 oster
279 1.1 oster int rf_State_Map(RF_RaidAccessDesc_t *desc)
280 1.1 oster {
281 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
282 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
283 1.1 oster RF_Etimer_t timer;
284 1.1 oster
285 1.1 oster RF_ETIMER_START(timer);
286 1.1 oster
287 1.1 oster if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks,
288 1.1 oster desc->bufPtr, RF_DONT_REMAP)))
289 1.1 oster RF_PANIC();
290 1.1 oster
291 1.1 oster RF_ETIMER_STOP(timer);
292 1.1 oster RF_ETIMER_EVAL(timer);
293 1.1 oster tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer);
294 1.1 oster
295 1.1 oster desc->state ++;
296 1.1 oster return RF_FALSE;
297 1.1 oster }
298 1.1 oster
299 1.1 oster int rf_State_Lock(RF_RaidAccessDesc_t *desc)
300 1.1 oster {
301 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
302 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
303 1.1 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap;
304 1.1 oster RF_AccessStripeMap_t *asm_p;
305 1.1 oster RF_Etimer_t timer;
306 1.1 oster int suspended = RF_FALSE;
307 1.1 oster
308 1.1 oster RF_ETIMER_START(timer);
309 1.1 oster if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
310 1.1 oster RF_StripeNum_t lastStripeID = -1;
311 1.1 oster
312 1.1 oster /* acquire each lock that we don't already hold */
313 1.1 oster for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
314 1.1 oster RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
315 1.1 oster if (!rf_suppressLocksAndLargeWrites &&
316 1.1 oster asm_p->parityInfo &&
317 1.1 oster !(desc->flags& RF_DAG_SUPPRESS_LOCKS) &&
318 1.1 oster !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED))
319 1.1 oster {
320 1.1 oster asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
321 1.1 oster RF_ASSERT(asm_p->stripeID > lastStripeID); /* locks must be acquired
322 1.1 oster hierarchically */
323 1.1 oster lastStripeID = asm_p->stripeID;
324 1.1 oster /* XXX the cast to (void (*)(RF_CBParam_t)) below is bogus! GO */
325 1.1 oster RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type,
326 1.1 oster (void (*)(struct buf *))rf_ContinueRaidAccess, desc, asm_p,
327 1.1 oster raidPtr->Layout.dataSectorsPerStripe);
328 1.1 oster if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID,
329 1.1 oster &asm_p->lockReqDesc))
330 1.1 oster {
331 1.1 oster suspended = RF_TRUE;
332 1.1 oster break;
333 1.1 oster }
334 1.1 oster }
335 1.1 oster
336 1.1 oster if (desc->type == RF_IO_TYPE_WRITE &&
337 1.1 oster raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing)
338 1.1 oster {
339 1.1 oster if (! (asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED) ) {
340 1.1 oster int val;
341 1.1 oster
342 1.1 oster asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED;
343 1.1 oster /* XXX the cast below is quite bogus!!! XXX GO */
344 1.1 oster val = rf_ForceOrBlockRecon(raidPtr, asm_p,
345 1.1 oster (void (*)(RF_Raid_t *,void *))rf_ContinueRaidAccess, desc);
346 1.1 oster if (val == 0) {
347 1.1 oster asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED;
348 1.1 oster }
349 1.1 oster else {
350 1.1 oster suspended = RF_TRUE;
351 1.1 oster break;
352 1.1 oster }
353 1.1 oster }
354 1.1 oster else {
355 1.1 oster if (rf_pssDebug) {
356 1.1 oster printf("[%d] skipping force/block because already done, psid %ld\n",
357 1.1 oster desc->tid,(long)asm_p->stripeID);
358 1.1 oster }
359 1.1 oster }
360 1.1 oster }
361 1.1 oster else {
362 1.1 oster if (rf_pssDebug) {
363 1.1 oster printf("[%d] skipping force/block because not write or not under recon, psid %ld\n",
364 1.1 oster desc->tid,(long)asm_p->stripeID);
365 1.1 oster }
366 1.1 oster }
367 1.1 oster }
368 1.1 oster
369 1.1 oster RF_ETIMER_STOP(timer);
370 1.1 oster RF_ETIMER_EVAL(timer);
371 1.1 oster tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
372 1.1 oster
373 1.1 oster if (suspended)
374 1.1 oster return(RF_TRUE);
375 1.1 oster }
376 1.1 oster
377 1.1 oster desc->state++;
378 1.1 oster return(RF_FALSE);
379 1.1 oster }
380 1.1 oster
381 1.1 oster /*
382 1.1 oster * the following three states create, execute, and post-process dags
383 1.1 oster * the error recovery unit is a single dag.
384 1.1 oster * by default, SelectAlgorithm creates an array of dags, one per parity stripe
385 1.1 oster * in some tricky cases, multiple dags per stripe are created
386 1.1 oster * - dags within a parity stripe are executed sequentially (arbitrary order)
387 1.1 oster * - dags for distinct parity stripes are executed concurrently
388 1.1 oster *
389 1.1 oster * repeat until all dags complete successfully -or- dag selection fails
390 1.1 oster *
391 1.1 oster * while !done
392 1.1 oster * create dag(s) (SelectAlgorithm)
393 1.1 oster * if dag
394 1.1 oster * execute dag (DispatchDAG)
395 1.1 oster * if dag successful
396 1.1 oster * done (SUCCESS)
397 1.1 oster * else
398 1.1 oster * !done (RETRY - start over with new dags)
399 1.1 oster * else
400 1.1 oster * done (FAIL)
401 1.1 oster */
402 1.1 oster int rf_State_CreateDAG (RF_RaidAccessDesc_t *desc)
403 1.1 oster {
404 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
405 1.1 oster RF_Etimer_t timer;
406 1.1 oster RF_DagHeader_t *dag_h;
407 1.1 oster int i, selectStatus;
408 1.1 oster
409 1.1 oster /* generate a dag for the access, and fire it off. When the dag
410 1.1 oster completes, we'll get re-invoked in the next state. */
411 1.1 oster RF_ETIMER_START(timer);
412 1.1 oster /* SelectAlgorithm returns one or more dags */
413 1.1 oster selectStatus = rf_SelectAlgorithm(desc, desc->flags|RF_DAG_SUPPRESS_LOCKS);
414 1.1 oster if (rf_printDAGsDebug)
415 1.1 oster for (i = 0; i < desc->numStripes; i++)
416 1.1 oster rf_PrintDAGList(desc->dagArray[i].dags);
417 1.1 oster RF_ETIMER_STOP(timer);
418 1.1 oster RF_ETIMER_EVAL(timer);
419 1.1 oster /* update time to create all dags */
420 1.1 oster tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer);
421 1.1 oster
422 1.1 oster desc->status = 0; /* good status */
423 1.1 oster
424 1.1 oster if (selectStatus) {
425 1.1 oster /* failed to create a dag */
426 1.1 oster /* this happens when there are too many faults or incomplete dag libraries */
427 1.1 oster printf("[Failed to create a DAG\n]");
428 1.1 oster RF_PANIC();
429 1.1 oster }
430 1.1 oster else {
431 1.1 oster /* bind dags to desc */
432 1.1 oster for (i = 0; i < desc->numStripes; i++) {
433 1.1 oster dag_h = desc->dagArray[i].dags;
434 1.1 oster while (dag_h) {
435 1.1 oster dag_h->bp = (struct buf *) desc->bp;
436 1.1 oster dag_h->tracerec = tracerec;
437 1.1 oster dag_h = dag_h->next;
438 1.1 oster }
439 1.1 oster }
440 1.1 oster desc->flags |= RF_DAG_DISPATCH_RETURNED;
441 1.1 oster desc->state++; /* next state should be rf_State_ExecuteDAG */
442 1.1 oster }
443 1.1 oster return RF_FALSE;
444 1.1 oster }
445 1.1 oster
446 1.1 oster
447 1.1 oster
448 1.1 oster /* the access has an array of dagLists, one dagList per parity stripe.
449 1.1 oster * fire the first dag in each parity stripe (dagList).
450 1.1 oster * dags within a stripe (dagList) must be executed sequentially
451 1.1 oster * - this preserves atomic parity update
452 1.1 oster * dags for independents parity groups (stripes) are fired concurrently */
453 1.1 oster
454 1.1 oster int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
455 1.1 oster {
456 1.1 oster int i;
457 1.1 oster RF_DagHeader_t *dag_h;
458 1.1 oster RF_DagList_t *dagArray = desc->dagArray;
459 1.1 oster
460 1.1 oster /* next state is always rf_State_ProcessDAG
461 1.1 oster * important to do this before firing the first dag
462 1.1 oster * (it may finish before we leave this routine) */
463 1.1 oster desc->state++;
464 1.1 oster
465 1.1 oster /* sweep dag array, a stripe at a time, firing the first dag in each stripe */
466 1.1 oster for (i = 0; i < desc->numStripes; i++) {
467 1.1 oster RF_ASSERT(dagArray[i].numDags > 0);
468 1.1 oster RF_ASSERT(dagArray[i].numDagsDone == 0);
469 1.1 oster RF_ASSERT(dagArray[i].numDagsFired == 0);
470 1.1 oster RF_ETIMER_START(dagArray[i].tracerec.timer);
471 1.1 oster /* fire first dag in this stripe */
472 1.1 oster dag_h = dagArray[i].dags;
473 1.1 oster RF_ASSERT(dag_h);
474 1.1 oster dagArray[i].numDagsFired++;
475 1.1 oster /* XXX Yet another case where we pass in a conflicting function pointer
476 1.1 oster :-( XXX GO */
477 1.1 oster rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess, &dagArray[i]);
478 1.1 oster }
479 1.1 oster
480 1.1 oster /* the DAG will always call the callback, even if there was no
481 1.1 oster * blocking, so we are always suspended in this state */
482 1.1 oster return RF_TRUE;
483 1.1 oster }
484 1.1 oster
485 1.1 oster
486 1.1 oster
487 1.1 oster /* rf_State_ProcessDAG is entered when a dag completes.
488 1.1 oster * first, check to all dags in the access have completed
489 1.1 oster * if not, fire as many dags as possible */
490 1.1 oster
491 1.1 oster int rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc)
492 1.1 oster {
493 1.1 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap;
494 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
495 1.1 oster RF_DagHeader_t *dag_h;
496 1.1 oster int i, j, done = RF_TRUE;
497 1.1 oster RF_DagList_t *dagArray = desc->dagArray;
498 1.1 oster RF_Etimer_t timer;
499 1.1 oster
500 1.1 oster /* check to see if this is the last dag */
501 1.1 oster for (i = 0; i < desc->numStripes; i++)
502 1.1 oster if (dagArray[i].numDags != dagArray[i].numDagsDone)
503 1.1 oster done = RF_FALSE;
504 1.1 oster
505 1.1 oster if (done) {
506 1.1 oster if (desc->status) {
507 1.1 oster /* a dag failed, retry */
508 1.1 oster RF_ETIMER_START(timer);
509 1.1 oster /* free all dags */
510 1.1 oster for (i = 0; i < desc->numStripes; i++) {
511 1.1 oster rf_FreeDAG(desc->dagArray[i].dags);
512 1.1 oster }
513 1.1 oster rf_MarkFailuresInASMList(raidPtr, asmh);
514 1.1 oster /* back up to rf_State_CreateDAG */
515 1.1 oster desc->state = desc->state - 2;
516 1.1 oster return RF_FALSE;
517 1.1 oster }
518 1.1 oster else {
519 1.1 oster /* move on to rf_State_Cleanup */
520 1.1 oster desc->state++;
521 1.1 oster }
522 1.1 oster return RF_FALSE;
523 1.1 oster }
524 1.1 oster else {
525 1.1 oster /* more dags to execute */
526 1.1 oster /* see if any are ready to be fired. if so, fire them */
527 1.1 oster /* don't fire the initial dag in a list, it's fired in rf_State_ExecuteDAG */
528 1.1 oster for (i = 0; i < desc->numStripes; i++) {
529 1.1 oster if ((dagArray[i].numDagsDone < dagArray[i].numDags)
530 1.1 oster && (dagArray[i].numDagsDone == dagArray[i].numDagsFired)
531 1.1 oster && (dagArray[i].numDagsFired > 0)) {
532 1.1 oster RF_ETIMER_START(dagArray[i].tracerec.timer);
533 1.1 oster /* fire next dag in this stripe */
534 1.1 oster /* first, skip to next dag awaiting execution */
535 1.1 oster dag_h = dagArray[i].dags;
536 1.1 oster for (j = 0; j < dagArray[i].numDagsDone; j++)
537 1.1 oster dag_h = dag_h->next;
538 1.1 oster dagArray[i].numDagsFired++;
539 1.1 oster /* XXX and again we pass a different function pointer.. GO */
540 1.1 oster rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess,
541 1.1 oster &dagArray[i]);
542 1.1 oster }
543 1.1 oster }
544 1.1 oster return RF_TRUE;
545 1.1 oster }
546 1.1 oster }
547 1.1 oster
548 1.1 oster /* only make it this far if all dags complete successfully */
549 1.1 oster int rf_State_Cleanup(RF_RaidAccessDesc_t *desc)
550 1.1 oster {
551 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
552 1.1 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap;
553 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
554 1.1 oster RF_AccessStripeMap_t *asm_p;
555 1.1 oster RF_DagHeader_t *dag_h;
556 1.1 oster RF_Etimer_t timer;
557 1.1 oster int tid, i;
558 1.1 oster
559 1.1 oster desc->state ++;
560 1.1 oster
561 1.1 oster rf_get_threadid(tid);
562 1.1 oster
563 1.1 oster timer = tracerec->timer;
564 1.1 oster RF_ETIMER_STOP(timer);
565 1.1 oster RF_ETIMER_EVAL(timer);
566 1.1 oster tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer);
567 1.1 oster
568 1.1 oster /* the RAID I/O is complete. Clean up. */
569 1.1 oster tracerec->specific.user.dag_retry_us = 0;
570 1.1 oster
571 1.1 oster RF_ETIMER_START(timer);
572 1.1 oster if (desc->flags & RF_DAG_RETURN_DAG) {
573 1.1 oster /* copy dags into paramDAG */
574 1.1 oster *(desc->paramDAG) = desc->dagArray[0].dags;
575 1.1 oster dag_h = *(desc->paramDAG);
576 1.1 oster for (i = 1; i < desc->numStripes; i++) {
577 1.1 oster /* concatenate dags from remaining stripes */
578 1.1 oster RF_ASSERT(dag_h);
579 1.1 oster while (dag_h->next)
580 1.1 oster dag_h = dag_h->next;
581 1.1 oster dag_h->next = desc->dagArray[i].dags;
582 1.1 oster }
583 1.1 oster }
584 1.1 oster else {
585 1.1 oster /* free all dags */
586 1.1 oster for (i = 0; i < desc->numStripes; i++) {
587 1.1 oster rf_FreeDAG(desc->dagArray[i].dags);
588 1.1 oster }
589 1.1 oster }
590 1.1 oster
591 1.1 oster RF_ETIMER_STOP(timer);
592 1.1 oster RF_ETIMER_EVAL(timer);
593 1.1 oster tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer);
594 1.1 oster
595 1.1 oster RF_ETIMER_START(timer);
596 1.1 oster if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
597 1.1 oster for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
598 1.1 oster if (!rf_suppressLocksAndLargeWrites &&
599 1.1 oster asm_p->parityInfo &&
600 1.1 oster !(desc->flags&RF_DAG_SUPPRESS_LOCKS))
601 1.1 oster {
602 1.1 oster RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc);
603 1.1 oster rf_ReleaseStripeLock(raidPtr->lockTable, asm_p->stripeID,
604 1.1 oster &asm_p->lockReqDesc);
605 1.1 oster }
606 1.1 oster if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) {
607 1.1 oster rf_UnblockRecon(raidPtr, asm_p);
608 1.1 oster }
609 1.1 oster }
610 1.1 oster }
611 1.1 oster
612 1.1 oster RF_ETIMER_STOP(timer);
613 1.1 oster RF_ETIMER_EVAL(timer);
614 1.1 oster tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
615 1.1 oster
616 1.1 oster RF_ETIMER_START(timer);
617 1.1 oster if (desc->flags & RF_DAG_RETURN_ASM)
618 1.1 oster *(desc->paramASM) = asmh;
619 1.1 oster else
620 1.1 oster rf_FreeAccessStripeMap(asmh);
621 1.1 oster RF_ETIMER_STOP(timer);
622 1.1 oster RF_ETIMER_EVAL(timer);
623 1.1 oster tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer);
624 1.1 oster
625 1.1 oster RF_ETIMER_STOP(desc->timer);
626 1.1 oster RF_ETIMER_EVAL(desc->timer);
627 1.1 oster
628 1.1 oster timer = desc->tracerec.tot_timer;
629 1.1 oster RF_ETIMER_STOP(timer);
630 1.1 oster RF_ETIMER_EVAL(timer);
631 1.1 oster desc->tracerec.total_us = RF_ETIMER_VAL_US(timer);
632 1.1 oster
633 1.1 oster rf_LogTraceRec(raidPtr, tracerec);
634 1.1 oster
635 1.1 oster desc->flags |= RF_DAG_ACCESS_COMPLETE;
636 1.1 oster
637 1.1 oster return RF_FALSE;
638 1.1 oster }
639