rf_states.c revision 1.5 1 1.5 oster /* $NetBSD: rf_states.c,v 1.5 1999/01/26 04:40:03 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland, William V. Courtright II, Robby Findler
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster #include <sys/errno.h>
30 1.1 oster
31 1.1 oster #include "rf_archs.h"
32 1.1 oster #include "rf_threadstuff.h"
33 1.1 oster #include "rf_raid.h"
34 1.1 oster #include "rf_dag.h"
35 1.1 oster #include "rf_desc.h"
36 1.1 oster #include "rf_aselect.h"
37 1.1 oster #include "rf_threadid.h"
38 1.1 oster #include "rf_general.h"
39 1.1 oster #include "rf_states.h"
40 1.1 oster #include "rf_dagutils.h"
41 1.1 oster #include "rf_driver.h"
42 1.1 oster #include "rf_engine.h"
43 1.1 oster #include "rf_map.h"
44 1.1 oster #include "rf_etimer.h"
45 1.1 oster
46 1.1 oster #if defined(KERNEL) && (DKUSAGE > 0)
47 1.1 oster #include <sys/dkusage.h>
48 1.1 oster #include <io/common/iotypes.h>
49 1.1 oster #include <io/cam/dec_cam.h>
50 1.1 oster #include <io/cam/cam.h>
51 1.1 oster #include <io/cam/pdrv.h>
52 1.1 oster #endif /* KERNEL && DKUSAGE > 0 */
53 1.1 oster
54 1.1 oster /* prototypes for some of the available states.
55 1.1 oster
56 1.1 oster States must:
57 1.1 oster
58 1.1 oster - not block.
59 1.1 oster
60 1.1 oster - either schedule rf_ContinueRaidAccess as a callback and return
61 1.1 oster RF_TRUE, or complete all of their work and return RF_FALSE.
62 1.1 oster
63 1.1 oster - increment desc->state when they have finished their work.
64 1.1 oster */
65 1.1 oster
66 1.1 oster static char *StateName(RF_AccessState_t state)
67 1.1 oster {
68 1.1 oster switch (state) {
69 1.1 oster case rf_QuiesceState: return "QuiesceState";
70 1.1 oster case rf_MapState: return "MapState";
71 1.1 oster case rf_LockState: return "LockState";
72 1.1 oster case rf_CreateDAGState: return "CreateDAGState";
73 1.1 oster case rf_ExecuteDAGState: return "ExecuteDAGState";
74 1.1 oster case rf_ProcessDAGState: return "ProcessDAGState";
75 1.1 oster case rf_CleanupState: return "CleanupState";
76 1.1 oster case rf_LastState: return "LastState";
77 1.1 oster case rf_IncrAccessesCountState: return "IncrAccessesCountState";
78 1.1 oster case rf_DecrAccessesCountState: return "DecrAccessesCountState";
79 1.1 oster default: return "!!! UnnamedState !!!";
80 1.1 oster }
81 1.1 oster }
82 1.1 oster
83 1.1 oster void rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc)
84 1.1 oster {
85 1.1 oster int suspended = RF_FALSE;
86 1.1 oster int current_state_index = desc->state;
87 1.1 oster RF_AccessState_t current_state = desc->states[current_state_index];
88 1.1 oster
89 1.1 oster do {
90 1.1 oster
91 1.1 oster current_state_index = desc->state;
92 1.1 oster current_state = desc->states [current_state_index];
93 1.1 oster
94 1.1 oster switch (current_state) {
95 1.1 oster
96 1.1 oster case rf_QuiesceState: suspended = rf_State_Quiesce(desc);
97 1.1 oster break;
98 1.1 oster case rf_IncrAccessesCountState: suspended = rf_State_IncrAccessCount(desc);
99 1.1 oster break;
100 1.1 oster case rf_MapState: suspended = rf_State_Map(desc);
101 1.1 oster break;
102 1.1 oster case rf_LockState: suspended = rf_State_Lock(desc);
103 1.1 oster break;
104 1.1 oster case rf_CreateDAGState: suspended = rf_State_CreateDAG(desc);
105 1.1 oster break;
106 1.1 oster case rf_ExecuteDAGState: suspended = rf_State_ExecuteDAG(desc);
107 1.1 oster break;
108 1.1 oster case rf_ProcessDAGState: suspended = rf_State_ProcessDAG(desc);
109 1.1 oster break;
110 1.1 oster case rf_CleanupState: suspended = rf_State_Cleanup(desc);
111 1.1 oster break;
112 1.1 oster case rf_DecrAccessesCountState: suspended = rf_State_DecrAccessCount(desc);
113 1.1 oster break;
114 1.1 oster case rf_LastState: suspended = rf_State_LastState(desc);
115 1.1 oster break;
116 1.1 oster }
117 1.1 oster
118 1.1 oster /* after this point, we cannot dereference desc since desc may
119 1.1 oster have been freed. desc is only freed in LastState, so if we
120 1.1 oster renter this function or loop back up, desc should be valid. */
121 1.1 oster
122 1.1 oster if (rf_printStatesDebug) {
123 1.1 oster int tid;
124 1.1 oster rf_get_threadid (tid);
125 1.1 oster
126 1.1 oster printf ("[%d] State: %-24s StateIndex: %3i desc: 0x%ld %s\n",
127 1.1 oster tid, StateName(current_state), current_state_index, (long)desc,
128 1.1 oster suspended ? "callback scheduled" : "looping");
129 1.1 oster }
130 1.1 oster } while (!suspended && current_state != rf_LastState);
131 1.1 oster
132 1.1 oster return;
133 1.1 oster }
134 1.1 oster
135 1.1 oster
136 1.1 oster void rf_ContinueDagAccess (RF_DagList_t *dagList)
137 1.1 oster {
138 1.1 oster RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec);
139 1.1 oster RF_RaidAccessDesc_t *desc;
140 1.1 oster RF_DagHeader_t *dag_h;
141 1.1 oster RF_Etimer_t timer;
142 1.1 oster int i;
143 1.1 oster
144 1.1 oster desc = dagList->desc;
145 1.1 oster
146 1.1 oster timer = tracerec->timer;
147 1.1 oster RF_ETIMER_STOP(timer);
148 1.1 oster RF_ETIMER_EVAL(timer);
149 1.1 oster tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer);
150 1.1 oster RF_ETIMER_START(tracerec->timer);
151 1.1 oster
152 1.1 oster /* skip to dag which just finished */
153 1.1 oster dag_h = dagList->dags;
154 1.1 oster for (i = 0; i < dagList->numDagsDone; i++) {
155 1.1 oster dag_h = dag_h->next;
156 1.1 oster }
157 1.1 oster
158 1.1 oster /* check to see if retry is required */
159 1.1 oster if (dag_h->status == rf_rollBackward) {
160 1.1 oster /* when a dag fails, mark desc status as bad and allow all other dags
161 1.1 oster * in the desc to execute to completion. then, free all dags and start over */
162 1.1 oster desc->status = 1; /* bad status */
163 1.1 oster {
164 1.1 oster printf("[%d] DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n",
165 1.1 oster desc->tid, desc->type, (long)desc->raidAddress,
166 1.1 oster (long)desc->raidAddress,(int)desc->numBlocks,
167 1.1 oster (int)desc->numBlocks, (unsigned long) (desc->bufPtr));
168 1.1 oster }
169 1.1 oster }
170 1.1 oster
171 1.1 oster dagList->numDagsDone++;
172 1.1 oster rf_ContinueRaidAccess(desc);
173 1.1 oster }
174 1.1 oster
175 1.1 oster
176 1.1 oster int rf_State_LastState(RF_RaidAccessDesc_t *desc)
177 1.1 oster {
178 1.1 oster void (*callbackFunc)(RF_CBParam_t) = desc->callbackFunc;
179 1.2 drochner RF_CBParam_t callbackArg;
180 1.2 drochner
181 1.2 drochner callbackArg.p = desc->callbackArg;
182 1.1 oster
183 1.1 oster if (!(desc->flags & RF_DAG_TEST_ACCESS)) {/* don't biodone if this */
184 1.1 oster #if DKUSAGE > 0
185 1.1 oster RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid,(struct buf *)desc->bp);
186 1.1 oster #else
187 1.1 oster RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid);
188 1.1 oster #endif /* DKUSAGE > 0 */
189 1.3 explorer
190 1.3 explorer /*
191 1.3 explorer * If this is not an async request, wake up the caller
192 1.3 explorer */
193 1.3 explorer if (desc->async_flag == 0)
194 1.3 explorer wakeup(desc->bp);
195 1.3 explorer
196 1.1 oster /* printf("Calling biodone on 0x%x\n",desc->bp); */
197 1.1 oster biodone(desc->bp); /* access came through ioctl */
198 1.1 oster }
199 1.1 oster
200 1.1 oster if (callbackFunc) callbackFunc(callbackArg);
201 1.1 oster rf_FreeRaidAccDesc(desc);
202 1.1 oster
203 1.1 oster return RF_FALSE;
204 1.1 oster }
205 1.1 oster
206 1.1 oster int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc)
207 1.1 oster {
208 1.1 oster RF_Raid_t *raidPtr;
209 1.1 oster
210 1.1 oster raidPtr = desc->raidPtr;
211 1.1 oster /* Bummer. We have to do this to be 100% safe w.r.t. the increment below */
212 1.1 oster RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
213 1.1 oster raidPtr->accs_in_flight++; /* used to detect quiescence */
214 1.1 oster RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
215 1.1 oster
216 1.1 oster desc->state++;
217 1.1 oster return RF_FALSE;
218 1.1 oster }
219 1.1 oster
220 1.1 oster int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc)
221 1.1 oster {
222 1.1 oster RF_Raid_t *raidPtr;
223 1.1 oster
224 1.1 oster raidPtr = desc->raidPtr;
225 1.1 oster
226 1.1 oster RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
227 1.1 oster raidPtr->accs_in_flight--;
228 1.1 oster if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) {
229 1.1 oster rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc);
230 1.1 oster }
231 1.1 oster rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks);
232 1.1 oster RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
233 1.1 oster
234 1.1 oster desc->state++;
235 1.1 oster return RF_FALSE;
236 1.1 oster }
237 1.1 oster
238 1.1 oster int rf_State_Quiesce(RF_RaidAccessDesc_t *desc)
239 1.1 oster {
240 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
241 1.1 oster RF_Etimer_t timer;
242 1.1 oster int suspended = RF_FALSE;
243 1.1 oster RF_Raid_t *raidPtr;
244 1.1 oster
245 1.1 oster raidPtr = desc->raidPtr;
246 1.1 oster
247 1.1 oster RF_ETIMER_START(timer);
248 1.1 oster RF_ETIMER_START(desc->timer);
249 1.1 oster
250 1.1 oster RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
251 1.1 oster if (raidPtr->accesses_suspended) {
252 1.1 oster RF_CallbackDesc_t *cb;
253 1.1 oster cb = rf_AllocCallbackDesc();
254 1.1 oster /* XXX the following cast is quite bogus... rf_ContinueRaidAccess
255 1.1 oster takes a (RF_RaidAccessDesc_t *) as an argument.. GO */
256 1.1 oster cb->callbackFunc = (void (*)(RF_CBParam_t))rf_ContinueRaidAccess;
257 1.1 oster cb->callbackArg.p = (void *) desc;
258 1.1 oster cb->next = raidPtr->quiesce_wait_list;
259 1.1 oster raidPtr->quiesce_wait_list = cb;
260 1.1 oster suspended = RF_TRUE;
261 1.1 oster }
262 1.1 oster
263 1.1 oster RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
264 1.1 oster
265 1.1 oster RF_ETIMER_STOP(timer);
266 1.1 oster RF_ETIMER_EVAL(timer);
267 1.1 oster tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer);
268 1.1 oster
269 1.1 oster if (suspended && rf_quiesceDebug)
270 1.1 oster printf("Stalling access due to quiescence lock\n");
271 1.1 oster
272 1.1 oster desc->state++;
273 1.1 oster return suspended;
274 1.1 oster }
275 1.1 oster
276 1.1 oster int rf_State_Map(RF_RaidAccessDesc_t *desc)
277 1.1 oster {
278 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
279 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
280 1.1 oster RF_Etimer_t timer;
281 1.1 oster
282 1.1 oster RF_ETIMER_START(timer);
283 1.1 oster
284 1.1 oster if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks,
285 1.1 oster desc->bufPtr, RF_DONT_REMAP)))
286 1.1 oster RF_PANIC();
287 1.1 oster
288 1.1 oster RF_ETIMER_STOP(timer);
289 1.1 oster RF_ETIMER_EVAL(timer);
290 1.1 oster tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer);
291 1.1 oster
292 1.1 oster desc->state ++;
293 1.1 oster return RF_FALSE;
294 1.1 oster }
295 1.1 oster
296 1.1 oster int rf_State_Lock(RF_RaidAccessDesc_t *desc)
297 1.1 oster {
298 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
299 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
300 1.1 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap;
301 1.1 oster RF_AccessStripeMap_t *asm_p;
302 1.1 oster RF_Etimer_t timer;
303 1.1 oster int suspended = RF_FALSE;
304 1.1 oster
305 1.1 oster RF_ETIMER_START(timer);
306 1.1 oster if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
307 1.1 oster RF_StripeNum_t lastStripeID = -1;
308 1.1 oster
309 1.1 oster /* acquire each lock that we don't already hold */
310 1.1 oster for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
311 1.1 oster RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
312 1.1 oster if (!rf_suppressLocksAndLargeWrites &&
313 1.1 oster asm_p->parityInfo &&
314 1.1 oster !(desc->flags& RF_DAG_SUPPRESS_LOCKS) &&
315 1.1 oster !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED))
316 1.1 oster {
317 1.1 oster asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
318 1.1 oster RF_ASSERT(asm_p->stripeID > lastStripeID); /* locks must be acquired
319 1.1 oster hierarchically */
320 1.1 oster lastStripeID = asm_p->stripeID;
321 1.1 oster /* XXX the cast to (void (*)(RF_CBParam_t)) below is bogus! GO */
322 1.1 oster RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type,
323 1.1 oster (void (*)(struct buf *))rf_ContinueRaidAccess, desc, asm_p,
324 1.1 oster raidPtr->Layout.dataSectorsPerStripe);
325 1.1 oster if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID,
326 1.1 oster &asm_p->lockReqDesc))
327 1.1 oster {
328 1.1 oster suspended = RF_TRUE;
329 1.1 oster break;
330 1.1 oster }
331 1.1 oster }
332 1.1 oster
333 1.1 oster if (desc->type == RF_IO_TYPE_WRITE &&
334 1.1 oster raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing)
335 1.1 oster {
336 1.1 oster if (! (asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED) ) {
337 1.1 oster int val;
338 1.1 oster
339 1.1 oster asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED;
340 1.1 oster /* XXX the cast below is quite bogus!!! XXX GO */
341 1.1 oster val = rf_ForceOrBlockRecon(raidPtr, asm_p,
342 1.1 oster (void (*)(RF_Raid_t *,void *))rf_ContinueRaidAccess, desc);
343 1.1 oster if (val == 0) {
344 1.1 oster asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED;
345 1.1 oster }
346 1.1 oster else {
347 1.1 oster suspended = RF_TRUE;
348 1.1 oster break;
349 1.1 oster }
350 1.1 oster }
351 1.1 oster else {
352 1.1 oster if (rf_pssDebug) {
353 1.1 oster printf("[%d] skipping force/block because already done, psid %ld\n",
354 1.1 oster desc->tid,(long)asm_p->stripeID);
355 1.1 oster }
356 1.1 oster }
357 1.1 oster }
358 1.1 oster else {
359 1.1 oster if (rf_pssDebug) {
360 1.1 oster printf("[%d] skipping force/block because not write or not under recon, psid %ld\n",
361 1.1 oster desc->tid,(long)asm_p->stripeID);
362 1.1 oster }
363 1.1 oster }
364 1.1 oster }
365 1.1 oster
366 1.1 oster RF_ETIMER_STOP(timer);
367 1.1 oster RF_ETIMER_EVAL(timer);
368 1.1 oster tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
369 1.1 oster
370 1.1 oster if (suspended)
371 1.1 oster return(RF_TRUE);
372 1.1 oster }
373 1.1 oster
374 1.1 oster desc->state++;
375 1.1 oster return(RF_FALSE);
376 1.1 oster }
377 1.1 oster
378 1.1 oster /*
379 1.1 oster * the following three states create, execute, and post-process dags
380 1.1 oster * the error recovery unit is a single dag.
381 1.1 oster * by default, SelectAlgorithm creates an array of dags, one per parity stripe
382 1.1 oster * in some tricky cases, multiple dags per stripe are created
383 1.1 oster * - dags within a parity stripe are executed sequentially (arbitrary order)
384 1.1 oster * - dags for distinct parity stripes are executed concurrently
385 1.1 oster *
386 1.1 oster * repeat until all dags complete successfully -or- dag selection fails
387 1.1 oster *
388 1.1 oster * while !done
389 1.1 oster * create dag(s) (SelectAlgorithm)
390 1.1 oster * if dag
391 1.1 oster * execute dag (DispatchDAG)
392 1.1 oster * if dag successful
393 1.1 oster * done (SUCCESS)
394 1.1 oster * else
395 1.1 oster * !done (RETRY - start over with new dags)
396 1.1 oster * else
397 1.1 oster * done (FAIL)
398 1.1 oster */
399 1.1 oster int rf_State_CreateDAG (RF_RaidAccessDesc_t *desc)
400 1.1 oster {
401 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
402 1.1 oster RF_Etimer_t timer;
403 1.1 oster RF_DagHeader_t *dag_h;
404 1.1 oster int i, selectStatus;
405 1.1 oster
406 1.1 oster /* generate a dag for the access, and fire it off. When the dag
407 1.1 oster completes, we'll get re-invoked in the next state. */
408 1.1 oster RF_ETIMER_START(timer);
409 1.1 oster /* SelectAlgorithm returns one or more dags */
410 1.1 oster selectStatus = rf_SelectAlgorithm(desc, desc->flags|RF_DAG_SUPPRESS_LOCKS);
411 1.1 oster if (rf_printDAGsDebug)
412 1.1 oster for (i = 0; i < desc->numStripes; i++)
413 1.1 oster rf_PrintDAGList(desc->dagArray[i].dags);
414 1.1 oster RF_ETIMER_STOP(timer);
415 1.1 oster RF_ETIMER_EVAL(timer);
416 1.1 oster /* update time to create all dags */
417 1.1 oster tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer);
418 1.1 oster
419 1.1 oster desc->status = 0; /* good status */
420 1.1 oster
421 1.1 oster if (selectStatus) {
422 1.1 oster /* failed to create a dag */
423 1.1 oster /* this happens when there are too many faults or incomplete dag libraries */
424 1.1 oster printf("[Failed to create a DAG\n]");
425 1.1 oster RF_PANIC();
426 1.1 oster }
427 1.1 oster else {
428 1.1 oster /* bind dags to desc */
429 1.1 oster for (i = 0; i < desc->numStripes; i++) {
430 1.1 oster dag_h = desc->dagArray[i].dags;
431 1.1 oster while (dag_h) {
432 1.1 oster dag_h->bp = (struct buf *) desc->bp;
433 1.1 oster dag_h->tracerec = tracerec;
434 1.1 oster dag_h = dag_h->next;
435 1.1 oster }
436 1.1 oster }
437 1.1 oster desc->flags |= RF_DAG_DISPATCH_RETURNED;
438 1.1 oster desc->state++; /* next state should be rf_State_ExecuteDAG */
439 1.1 oster }
440 1.1 oster return RF_FALSE;
441 1.1 oster }
442 1.1 oster
443 1.1 oster
444 1.1 oster
445 1.1 oster /* the access has an array of dagLists, one dagList per parity stripe.
446 1.1 oster * fire the first dag in each parity stripe (dagList).
447 1.1 oster * dags within a stripe (dagList) must be executed sequentially
448 1.1 oster * - this preserves atomic parity update
449 1.1 oster * dags for independents parity groups (stripes) are fired concurrently */
450 1.1 oster
451 1.1 oster int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
452 1.1 oster {
453 1.1 oster int i;
454 1.1 oster RF_DagHeader_t *dag_h;
455 1.1 oster RF_DagList_t *dagArray = desc->dagArray;
456 1.1 oster
457 1.1 oster /* next state is always rf_State_ProcessDAG
458 1.1 oster * important to do this before firing the first dag
459 1.1 oster * (it may finish before we leave this routine) */
460 1.1 oster desc->state++;
461 1.1 oster
462 1.1 oster /* sweep dag array, a stripe at a time, firing the first dag in each stripe */
463 1.1 oster for (i = 0; i < desc->numStripes; i++) {
464 1.1 oster RF_ASSERT(dagArray[i].numDags > 0);
465 1.1 oster RF_ASSERT(dagArray[i].numDagsDone == 0);
466 1.1 oster RF_ASSERT(dagArray[i].numDagsFired == 0);
467 1.1 oster RF_ETIMER_START(dagArray[i].tracerec.timer);
468 1.1 oster /* fire first dag in this stripe */
469 1.1 oster dag_h = dagArray[i].dags;
470 1.1 oster RF_ASSERT(dag_h);
471 1.1 oster dagArray[i].numDagsFired++;
472 1.1 oster /* XXX Yet another case where we pass in a conflicting function pointer
473 1.1 oster :-( XXX GO */
474 1.1 oster rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess, &dagArray[i]);
475 1.1 oster }
476 1.1 oster
477 1.1 oster /* the DAG will always call the callback, even if there was no
478 1.1 oster * blocking, so we are always suspended in this state */
479 1.1 oster return RF_TRUE;
480 1.1 oster }
481 1.1 oster
482 1.1 oster
483 1.1 oster
484 1.1 oster /* rf_State_ProcessDAG is entered when a dag completes.
485 1.1 oster * first, check to all dags in the access have completed
486 1.1 oster * if not, fire as many dags as possible */
487 1.1 oster
488 1.1 oster int rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc)
489 1.1 oster {
490 1.1 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap;
491 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
492 1.1 oster RF_DagHeader_t *dag_h;
493 1.1 oster int i, j, done = RF_TRUE;
494 1.1 oster RF_DagList_t *dagArray = desc->dagArray;
495 1.1 oster RF_Etimer_t timer;
496 1.1 oster
497 1.1 oster /* check to see if this is the last dag */
498 1.1 oster for (i = 0; i < desc->numStripes; i++)
499 1.1 oster if (dagArray[i].numDags != dagArray[i].numDagsDone)
500 1.1 oster done = RF_FALSE;
501 1.1 oster
502 1.1 oster if (done) {
503 1.1 oster if (desc->status) {
504 1.1 oster /* a dag failed, retry */
505 1.1 oster RF_ETIMER_START(timer);
506 1.1 oster /* free all dags */
507 1.1 oster for (i = 0; i < desc->numStripes; i++) {
508 1.1 oster rf_FreeDAG(desc->dagArray[i].dags);
509 1.1 oster }
510 1.1 oster rf_MarkFailuresInASMList(raidPtr, asmh);
511 1.1 oster /* back up to rf_State_CreateDAG */
512 1.1 oster desc->state = desc->state - 2;
513 1.1 oster return RF_FALSE;
514 1.1 oster }
515 1.1 oster else {
516 1.1 oster /* move on to rf_State_Cleanup */
517 1.1 oster desc->state++;
518 1.1 oster }
519 1.1 oster return RF_FALSE;
520 1.1 oster }
521 1.1 oster else {
522 1.1 oster /* more dags to execute */
523 1.1 oster /* see if any are ready to be fired. if so, fire them */
524 1.1 oster /* don't fire the initial dag in a list, it's fired in rf_State_ExecuteDAG */
525 1.1 oster for (i = 0; i < desc->numStripes; i++) {
526 1.1 oster if ((dagArray[i].numDagsDone < dagArray[i].numDags)
527 1.1 oster && (dagArray[i].numDagsDone == dagArray[i].numDagsFired)
528 1.1 oster && (dagArray[i].numDagsFired > 0)) {
529 1.1 oster RF_ETIMER_START(dagArray[i].tracerec.timer);
530 1.1 oster /* fire next dag in this stripe */
531 1.1 oster /* first, skip to next dag awaiting execution */
532 1.1 oster dag_h = dagArray[i].dags;
533 1.1 oster for (j = 0; j < dagArray[i].numDagsDone; j++)
534 1.1 oster dag_h = dag_h->next;
535 1.1 oster dagArray[i].numDagsFired++;
536 1.1 oster /* XXX and again we pass a different function pointer.. GO */
537 1.1 oster rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess,
538 1.1 oster &dagArray[i]);
539 1.1 oster }
540 1.1 oster }
541 1.1 oster return RF_TRUE;
542 1.1 oster }
543 1.1 oster }
544 1.1 oster
545 1.1 oster /* only make it this far if all dags complete successfully */
546 1.1 oster int rf_State_Cleanup(RF_RaidAccessDesc_t *desc)
547 1.1 oster {
548 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
549 1.1 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap;
550 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
551 1.1 oster RF_AccessStripeMap_t *asm_p;
552 1.1 oster RF_DagHeader_t *dag_h;
553 1.1 oster RF_Etimer_t timer;
554 1.1 oster int tid, i;
555 1.1 oster
556 1.1 oster desc->state ++;
557 1.1 oster
558 1.1 oster rf_get_threadid(tid);
559 1.1 oster
560 1.1 oster timer = tracerec->timer;
561 1.1 oster RF_ETIMER_STOP(timer);
562 1.1 oster RF_ETIMER_EVAL(timer);
563 1.1 oster tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer);
564 1.1 oster
565 1.1 oster /* the RAID I/O is complete. Clean up. */
566 1.1 oster tracerec->specific.user.dag_retry_us = 0;
567 1.1 oster
568 1.1 oster RF_ETIMER_START(timer);
569 1.1 oster if (desc->flags & RF_DAG_RETURN_DAG) {
570 1.1 oster /* copy dags into paramDAG */
571 1.1 oster *(desc->paramDAG) = desc->dagArray[0].dags;
572 1.1 oster dag_h = *(desc->paramDAG);
573 1.1 oster for (i = 1; i < desc->numStripes; i++) {
574 1.1 oster /* concatenate dags from remaining stripes */
575 1.1 oster RF_ASSERT(dag_h);
576 1.1 oster while (dag_h->next)
577 1.1 oster dag_h = dag_h->next;
578 1.1 oster dag_h->next = desc->dagArray[i].dags;
579 1.1 oster }
580 1.1 oster }
581 1.1 oster else {
582 1.1 oster /* free all dags */
583 1.1 oster for (i = 0; i < desc->numStripes; i++) {
584 1.1 oster rf_FreeDAG(desc->dagArray[i].dags);
585 1.1 oster }
586 1.1 oster }
587 1.1 oster
588 1.1 oster RF_ETIMER_STOP(timer);
589 1.1 oster RF_ETIMER_EVAL(timer);
590 1.1 oster tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer);
591 1.1 oster
592 1.1 oster RF_ETIMER_START(timer);
593 1.1 oster if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
594 1.1 oster for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
595 1.1 oster if (!rf_suppressLocksAndLargeWrites &&
596 1.1 oster asm_p->parityInfo &&
597 1.1 oster !(desc->flags&RF_DAG_SUPPRESS_LOCKS))
598 1.1 oster {
599 1.1 oster RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc);
600 1.1 oster rf_ReleaseStripeLock(raidPtr->lockTable, asm_p->stripeID,
601 1.1 oster &asm_p->lockReqDesc);
602 1.1 oster }
603 1.1 oster if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) {
604 1.1 oster rf_UnblockRecon(raidPtr, asm_p);
605 1.1 oster }
606 1.1 oster }
607 1.1 oster }
608 1.1 oster
609 1.1 oster RF_ETIMER_STOP(timer);
610 1.1 oster RF_ETIMER_EVAL(timer);
611 1.1 oster tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
612 1.1 oster
613 1.1 oster RF_ETIMER_START(timer);
614 1.1 oster if (desc->flags & RF_DAG_RETURN_ASM)
615 1.1 oster *(desc->paramASM) = asmh;
616 1.1 oster else
617 1.1 oster rf_FreeAccessStripeMap(asmh);
618 1.1 oster RF_ETIMER_STOP(timer);
619 1.1 oster RF_ETIMER_EVAL(timer);
620 1.1 oster tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer);
621 1.1 oster
622 1.1 oster RF_ETIMER_STOP(desc->timer);
623 1.1 oster RF_ETIMER_EVAL(desc->timer);
624 1.1 oster
625 1.1 oster timer = desc->tracerec.tot_timer;
626 1.1 oster RF_ETIMER_STOP(timer);
627 1.1 oster RF_ETIMER_EVAL(timer);
628 1.1 oster desc->tracerec.total_us = RF_ETIMER_VAL_US(timer);
629 1.1 oster
630 1.1 oster rf_LogTraceRec(raidPtr, tracerec);
631 1.1 oster
632 1.1 oster desc->flags |= RF_DAG_ACCESS_COMPLETE;
633 1.1 oster
634 1.1 oster return RF_FALSE;
635 1.1 oster }
636