rf_states.c revision 1.41.22.2 1 1.41.22.2 ad /* $NetBSD: rf_states.c,v 1.41.22.2 2007/07/29 12:50:23 ad Exp $ */
2 1.41.22.2 ad /*
3 1.41.22.2 ad * Copyright (c) 1995 Carnegie-Mellon University.
4 1.41.22.2 ad * All rights reserved.
5 1.41.22.2 ad *
6 1.41.22.2 ad * Author: Mark Holland, William V. Courtright II, Robby Findler
7 1.41.22.2 ad *
8 1.41.22.2 ad * Permission to use, copy, modify and distribute this software and
9 1.41.22.2 ad * its documentation is hereby granted, provided that both the copyright
10 1.41.22.2 ad * notice and this permission notice appear in all copies of the
11 1.41.22.2 ad * software, derivative works or modified versions, and any portions
12 1.41.22.2 ad * thereof, and that both notices appear in supporting documentation.
13 1.41.22.2 ad *
14 1.41.22.2 ad * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.41.22.2 ad * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.41.22.2 ad * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.41.22.2 ad *
18 1.41.22.2 ad * Carnegie Mellon requests users of this software to return to
19 1.41.22.2 ad *
20 1.41.22.2 ad * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.41.22.2 ad * School of Computer Science
22 1.41.22.2 ad * Carnegie Mellon University
23 1.41.22.2 ad * Pittsburgh PA 15213-3890
24 1.41.22.2 ad *
25 1.41.22.2 ad * any improvements or extensions that they make and grant Carnegie the
26 1.41.22.2 ad * rights to redistribute these changes.
27 1.41.22.2 ad */
28 1.41.22.2 ad
29 1.41.22.2 ad #include <sys/cdefs.h>
30 1.41.22.2 ad __KERNEL_RCSID(0, "$NetBSD: rf_states.c,v 1.41.22.2 2007/07/29 12:50:23 ad Exp $");
31 1.41.22.2 ad
32 1.41.22.2 ad #include <sys/errno.h>
33 1.41.22.2 ad
34 1.41.22.2 ad #include "rf_archs.h"
35 1.41.22.2 ad #include "rf_threadstuff.h"
36 1.41.22.2 ad #include "rf_raid.h"
37 1.41.22.2 ad #include "rf_dag.h"
38 1.41.22.2 ad #include "rf_desc.h"
39 1.41.22.2 ad #include "rf_aselect.h"
40 1.41.22.2 ad #include "rf_general.h"
41 1.41.22.2 ad #include "rf_states.h"
42 1.41.22.2 ad #include "rf_dagutils.h"
43 1.41.22.2 ad #include "rf_driver.h"
44 1.41.22.2 ad #include "rf_engine.h"
45 1.41.22.2 ad #include "rf_map.h"
46 1.41.22.2 ad #include "rf_etimer.h"
47 1.41.22.2 ad #include "rf_kintf.h"
48 1.41.22.2 ad
49 1.41.22.2 ad #ifndef RF_DEBUG_STATES
50 1.41.22.2 ad #define RF_DEBUG_STATES 0
51 1.41.22.2 ad #endif
52 1.41.22.2 ad
53 1.41.22.2 ad /* prototypes for some of the available states.
54 1.41.22.2 ad
55 1.41.22.2 ad States must:
56 1.41.22.2 ad
57 1.41.22.2 ad - not block.
58 1.41.22.2 ad
59 1.41.22.2 ad - either schedule rf_ContinueRaidAccess as a callback and return
60 1.41.22.2 ad RF_TRUE, or complete all of their work and return RF_FALSE.
61 1.41.22.2 ad
62 1.41.22.2 ad - increment desc->state when they have finished their work.
63 1.41.22.2 ad */
64 1.41.22.2 ad
65 1.41.22.2 ad #if RF_DEBUG_STATES
66 1.41.22.2 ad static char *
67 1.41.22.2 ad StateName(RF_AccessState_t state)
68 1.41.22.2 ad {
69 1.41.22.2 ad switch (state) {
70 1.41.22.2 ad case rf_QuiesceState:return "QuiesceState";
71 1.41.22.2 ad case rf_MapState:
72 1.41.22.2 ad return "MapState";
73 1.41.22.2 ad case rf_LockState:
74 1.41.22.2 ad return "LockState";
75 1.41.22.2 ad case rf_CreateDAGState:
76 1.41.22.2 ad return "CreateDAGState";
77 1.41.22.2 ad case rf_ExecuteDAGState:
78 1.41.22.2 ad return "ExecuteDAGState";
79 1.41.22.2 ad case rf_ProcessDAGState:
80 1.41.22.2 ad return "ProcessDAGState";
81 1.41.22.2 ad case rf_CleanupState:
82 1.41.22.2 ad return "CleanupState";
83 1.41.22.2 ad case rf_LastState:
84 1.41.22.2 ad return "LastState";
85 1.41.22.2 ad case rf_IncrAccessesCountState:
86 1.41.22.2 ad return "IncrAccessesCountState";
87 1.41.22.2 ad case rf_DecrAccessesCountState:
88 1.41.22.2 ad return "DecrAccessesCountState";
89 1.41.22.2 ad default:
90 1.41.22.2 ad return "!!! UnnamedState !!!";
91 1.41.22.2 ad }
92 1.41.22.2 ad }
93 1.41.22.2 ad #endif
94 1.41.22.2 ad
95 1.41.22.2 ad void
96 1.41.22.2 ad rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc)
97 1.41.22.2 ad {
98 1.41.22.2 ad int suspended = RF_FALSE;
99 1.41.22.2 ad int current_state_index = desc->state;
100 1.41.22.2 ad RF_AccessState_t current_state = desc->states[current_state_index];
101 1.41.22.2 ad #if RF_DEBUG_STATES
102 1.41.22.2 ad int unit = desc->raidPtr->raidid;
103 1.41.22.2 ad #endif
104 1.41.22.2 ad
105 1.41.22.2 ad do {
106 1.41.22.2 ad
107 1.41.22.2 ad current_state_index = desc->state;
108 1.41.22.2 ad current_state = desc->states[current_state_index];
109 1.41.22.2 ad
110 1.41.22.2 ad switch (current_state) {
111 1.41.22.2 ad
112 1.41.22.2 ad case rf_QuiesceState:
113 1.41.22.2 ad suspended = rf_State_Quiesce(desc);
114 1.41.22.2 ad break;
115 1.41.22.2 ad case rf_IncrAccessesCountState:
116 1.41.22.2 ad suspended = rf_State_IncrAccessCount(desc);
117 1.41.22.2 ad break;
118 1.41.22.2 ad case rf_MapState:
119 1.41.22.2 ad suspended = rf_State_Map(desc);
120 1.41.22.2 ad break;
121 1.41.22.2 ad case rf_LockState:
122 1.41.22.2 ad suspended = rf_State_Lock(desc);
123 1.41.22.2 ad break;
124 1.41.22.2 ad case rf_CreateDAGState:
125 1.41.22.2 ad suspended = rf_State_CreateDAG(desc);
126 1.41.22.2 ad break;
127 1.41.22.2 ad case rf_ExecuteDAGState:
128 1.41.22.2 ad suspended = rf_State_ExecuteDAG(desc);
129 1.41.22.2 ad break;
130 1.41.22.2 ad case rf_ProcessDAGState:
131 1.41.22.2 ad suspended = rf_State_ProcessDAG(desc);
132 1.41.22.2 ad break;
133 1.41.22.2 ad case rf_CleanupState:
134 1.41.22.2 ad suspended = rf_State_Cleanup(desc);
135 1.41.22.2 ad break;
136 1.41.22.2 ad case rf_DecrAccessesCountState:
137 1.41.22.2 ad suspended = rf_State_DecrAccessCount(desc);
138 1.41.22.2 ad break;
139 1.41.22.2 ad case rf_LastState:
140 1.41.22.2 ad suspended = rf_State_LastState(desc);
141 1.41.22.2 ad break;
142 1.41.22.2 ad }
143 1.41.22.2 ad
144 1.41.22.2 ad /* after this point, we cannot dereference desc since
145 1.41.22.2 ad * desc may have been freed. desc is only freed in
146 1.41.22.2 ad * LastState, so if we renter this function or loop
147 1.41.22.2 ad * back up, desc should be valid. */
148 1.41.22.2 ad
149 1.41.22.2 ad #if RF_DEBUG_STATES
150 1.41.22.2 ad if (rf_printStatesDebug) {
151 1.41.22.2 ad printf("raid%d: State: %-24s StateIndex: %3i desc: 0x%ld %s\n",
152 1.41.22.2 ad unit, StateName(current_state),
153 1.41.22.2 ad current_state_index, (long) desc,
154 1.41.22.2 ad suspended ? "callback scheduled" : "looping");
155 1.41.22.2 ad }
156 1.41.22.2 ad #endif
157 1.41.22.2 ad } while (!suspended && current_state != rf_LastState);
158 1.41.22.2 ad
159 1.41.22.2 ad return;
160 1.41.22.2 ad }
161 1.41.22.2 ad
162 1.41.22.2 ad
163 1.41.22.2 ad void
164 1.41.22.2 ad rf_ContinueDagAccess(RF_DagList_t *dagList)
165 1.41.22.2 ad {
166 1.41.22.2 ad #if RF_ACC_TRACE > 0
167 1.41.22.2 ad RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec);
168 1.41.22.2 ad RF_Etimer_t timer;
169 1.41.22.2 ad #endif
170 1.41.22.2 ad RF_RaidAccessDesc_t *desc;
171 1.41.22.2 ad RF_DagHeader_t *dag_h;
172 1.41.22.2 ad int i;
173 1.41.22.2 ad
174 1.41.22.2 ad desc = dagList->desc;
175 1.41.22.2 ad
176 1.41.22.2 ad #if RF_ACC_TRACE > 0
177 1.41.22.2 ad timer = tracerec->timer;
178 1.41.22.2 ad RF_ETIMER_STOP(timer);
179 1.41.22.2 ad RF_ETIMER_EVAL(timer);
180 1.41.22.2 ad tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer);
181 1.41.22.2 ad RF_ETIMER_START(tracerec->timer);
182 1.41.22.2 ad #endif
183 1.41.22.2 ad
184 1.41.22.2 ad /* skip to dag which just finished */
185 1.41.22.2 ad dag_h = dagList->dags;
186 1.41.22.2 ad for (i = 0; i < dagList->numDagsDone; i++) {
187 1.41.22.2 ad dag_h = dag_h->next;
188 1.41.22.2 ad }
189 1.41.22.2 ad
190 1.41.22.2 ad /* check to see if retry is required */
191 1.41.22.2 ad if (dag_h->status == rf_rollBackward) {
192 1.41.22.2 ad /* when a dag fails, mark desc status as bad and allow
193 1.41.22.2 ad * all other dags in the desc to execute to
194 1.41.22.2 ad * completion. then, free all dags and start over */
195 1.41.22.2 ad desc->status = 1; /* bad status */
196 1.41.22.2 ad #if 0
197 1.41.22.2 ad printf("raid%d: DAG failure: %c addr 0x%lx "
198 1.41.22.2 ad "(%ld) nblk 0x%x (%d) buf 0x%lx state %d\n",
199 1.41.22.2 ad desc->raidPtr->raidid, desc->type,
200 1.41.22.2 ad (long) desc->raidAddress,
201 1.41.22.2 ad (long) desc->raidAddress, (int) desc->numBlocks,
202 1.41.22.2 ad (int) desc->numBlocks,
203 1.41.22.2 ad (unsigned long) (desc->bufPtr), desc->state);
204 1.41.22.2 ad #endif
205 1.41.22.2 ad }
206 1.41.22.2 ad dagList->numDagsDone++;
207 1.41.22.2 ad rf_ContinueRaidAccess(desc);
208 1.41.22.2 ad }
209 1.41.22.2 ad
210 1.41.22.2 ad int
211 1.41.22.2 ad rf_State_LastState(RF_RaidAccessDesc_t *desc)
212 1.41.22.2 ad {
213 1.41.22.2 ad void (*callbackFunc) (RF_CBParam_t) = desc->callbackFunc;
214 1.41.22.2 ad RF_CBParam_t callbackArg;
215 1.41.22.2 ad
216 1.41.22.2 ad callbackArg.p = desc->callbackArg;
217 1.41.22.2 ad
218 1.41.22.2 ad /*
219 1.41.22.2 ad * If this is not an async request, wake up the caller
220 1.41.22.2 ad */
221 1.41.22.2 ad if (desc->async_flag == 0)
222 1.41.22.2 ad wakeup(desc->bp);
223 1.41.22.2 ad
224 1.41.22.2 ad /*
225 1.41.22.2 ad * That's all the IO for this one... unbusy the 'disk'.
226 1.41.22.2 ad */
227 1.41.22.2 ad
228 1.41.22.2 ad rf_disk_unbusy(desc);
229 1.41.22.2 ad
230 1.41.22.2 ad /*
231 1.41.22.2 ad * Wakeup any requests waiting to go.
232 1.41.22.2 ad */
233 1.41.22.2 ad
234 1.41.22.2 ad RF_LOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex);
235 1.41.22.2 ad ((RF_Raid_t *) desc->raidPtr)->openings++;
236 1.41.22.2 ad RF_UNLOCK_MUTEX(((RF_Raid_t *) desc->raidPtr)->mutex);
237 1.41.22.2 ad
238 1.41.22.2 ad wakeup(&(desc->raidPtr->iodone));
239 1.41.22.2 ad
240 1.41.22.2 ad /* printf("Calling biodone on 0x%x\n",desc->bp); */
241 1.41.22.2 ad biodone(desc->bp); /* access came through ioctl */
242 1.41.22.2 ad
243 1.41.22.2 ad if (callbackFunc)
244 1.41.22.2 ad callbackFunc(callbackArg);
245 1.41.22.2 ad rf_FreeRaidAccDesc(desc);
246 1.41.22.2 ad
247 1.41.22.2 ad return RF_FALSE;
248 1.41.22.2 ad }
249 1.41.22.2 ad
250 1.41.22.2 ad int
251 1.41.22.2 ad rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc)
252 1.41.22.2 ad {
253 1.41.22.2 ad RF_Raid_t *raidPtr;
254 1.41.22.2 ad
255 1.41.22.2 ad raidPtr = desc->raidPtr;
256 1.41.22.2 ad /* Bummer. We have to do this to be 100% safe w.r.t. the increment
257 1.41.22.2 ad * below */
258 1.41.22.2 ad RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
259 1.41.22.2 ad raidPtr->accs_in_flight++; /* used to detect quiescence */
260 1.41.22.2 ad RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
261 1.41.22.2 ad
262 1.41.22.2 ad desc->state++;
263 1.41.22.2 ad return RF_FALSE;
264 1.41.22.2 ad }
265 1.41.22.2 ad
266 1.41.22.2 ad int
267 1.41.22.2 ad rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc)
268 1.41.22.2 ad {
269 1.41.22.2 ad RF_Raid_t *raidPtr;
270 1.41.22.2 ad
271 1.41.22.2 ad raidPtr = desc->raidPtr;
272 1.41.22.2 ad
273 1.41.22.2 ad RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
274 1.41.22.2 ad raidPtr->accs_in_flight--;
275 1.41.22.2 ad if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) {
276 1.41.22.2 ad rf_SignalQuiescenceLock(raidPtr);
277 1.41.22.2 ad }
278 1.41.22.2 ad RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
279 1.41.22.2 ad
280 1.41.22.2 ad desc->state++;
281 1.41.22.2 ad return RF_FALSE;
282 1.41.22.2 ad }
283 1.41.22.2 ad
284 1.41.22.2 ad int
285 1.41.22.2 ad rf_State_Quiesce(RF_RaidAccessDesc_t *desc)
286 1.41.22.2 ad {
287 1.41.22.2 ad #if RF_ACC_TRACE > 0
288 1.41.22.2 ad RF_AccTraceEntry_t *tracerec = &desc->tracerec;
289 1.41.22.2 ad RF_Etimer_t timer;
290 1.41.22.2 ad #endif
291 1.41.22.2 ad RF_CallbackDesc_t *cb;
292 1.41.22.2 ad RF_Raid_t *raidPtr;
293 1.41.22.2 ad int suspended = RF_FALSE;
294 1.41.22.2 ad int need_cb, used_cb;
295 1.41.22.2 ad
296 1.41.22.2 ad raidPtr = desc->raidPtr;
297 1.41.22.2 ad
298 1.41.22.2 ad #if RF_ACC_TRACE > 0
299 1.41.22.2 ad RF_ETIMER_START(timer);
300 1.41.22.2 ad RF_ETIMER_START(desc->timer);
301 1.41.22.2 ad #endif
302 1.41.22.2 ad
303 1.41.22.2 ad need_cb = 0;
304 1.41.22.2 ad used_cb = 0;
305 1.41.22.2 ad cb = NULL;
306 1.41.22.2 ad
307 1.41.22.2 ad RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
308 1.41.22.2 ad /* Do an initial check to see if we might need a callback structure */
309 1.41.22.2 ad if (raidPtr->accesses_suspended) {
310 1.41.22.2 ad need_cb = 1;
311 1.41.22.2 ad }
312 1.41.22.2 ad RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
313 1.41.22.2 ad
314 1.41.22.2 ad if (need_cb) {
315 1.41.22.2 ad /* create a callback if we might need it...
316 1.41.22.2 ad and we likely do. */
317 1.41.22.2 ad cb = rf_AllocCallbackDesc();
318 1.41.22.2 ad }
319 1.41.22.2 ad
320 1.41.22.2 ad RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
321 1.41.22.2 ad if (raidPtr->accesses_suspended) {
322 1.41.22.2 ad cb->callbackFunc = (void (*) (RF_CBParam_t)) rf_ContinueRaidAccess;
323 1.41.22.2 ad cb->callbackArg.p = (void *) desc;
324 1.41.22.2 ad cb->next = raidPtr->quiesce_wait_list;
325 1.41.22.2 ad raidPtr->quiesce_wait_list = cb;
326 1.41.22.2 ad suspended = RF_TRUE;
327 1.41.22.2 ad used_cb = 1;
328 1.41.22.2 ad }
329 1.41.22.2 ad RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
330 1.41.22.2 ad
331 1.41.22.2 ad if ((need_cb == 1) && (used_cb == 0)) {
332 1.41.22.2 ad rf_FreeCallbackDesc(cb);
333 1.41.22.2 ad }
334 1.41.22.2 ad
335 1.41.22.2 ad #if RF_ACC_TRACE > 0
336 1.41.22.2 ad RF_ETIMER_STOP(timer);
337 1.41.22.2 ad RF_ETIMER_EVAL(timer);
338 1.41.22.2 ad tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer);
339 1.41.22.2 ad #endif
340 1.41.22.2 ad
341 1.41.22.2 ad #if RF_DEBUG_QUIESCE
342 1.41.22.2 ad if (suspended && rf_quiesceDebug)
343 1.41.22.2 ad printf("Stalling access due to quiescence lock\n");
344 1.41.22.2 ad #endif
345 1.41.22.2 ad desc->state++;
346 1.41.22.2 ad return suspended;
347 1.41.22.2 ad }
348 1.41.22.2 ad
349 1.41.22.2 ad int
350 1.41.22.2 ad rf_State_Map(RF_RaidAccessDesc_t *desc)
351 1.41.22.2 ad {
352 1.41.22.2 ad RF_Raid_t *raidPtr = desc->raidPtr;
353 1.41.22.2 ad #if RF_ACC_TRACE > 0
354 1.41.22.2 ad RF_AccTraceEntry_t *tracerec = &desc->tracerec;
355 1.41.22.2 ad RF_Etimer_t timer;
356 1.41.22.2 ad
357 1.41.22.2 ad RF_ETIMER_START(timer);
358 1.41.22.2 ad #endif
359 1.41.22.2 ad
360 1.41.22.2 ad if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks,
361 1.41.22.2 ad desc->bufPtr, RF_DONT_REMAP)))
362 1.41.22.2 ad RF_PANIC();
363 1.41.22.2 ad
364 1.41.22.2 ad #if RF_ACC_TRACE > 0
365 1.41.22.2 ad RF_ETIMER_STOP(timer);
366 1.41.22.2 ad RF_ETIMER_EVAL(timer);
367 1.41.22.2 ad tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer);
368 1.41.22.2 ad #endif
369 1.41.22.2 ad
370 1.41.22.2 ad desc->state++;
371 1.41.22.2 ad return RF_FALSE;
372 1.41.22.2 ad }
373 1.41.22.2 ad
374 1.41.22.2 ad int
375 1.41.22.2 ad rf_State_Lock(RF_RaidAccessDesc_t *desc)
376 1.41.22.2 ad {
377 1.41.22.2 ad #if RF_ACC_TRACE > 0
378 1.41.22.2 ad RF_AccTraceEntry_t *tracerec = &desc->tracerec;
379 1.41.22.2 ad RF_Etimer_t timer;
380 1.41.22.2 ad #endif
381 1.41.22.2 ad RF_Raid_t *raidPtr = desc->raidPtr;
382 1.41.22.2 ad RF_AccessStripeMapHeader_t *asmh = desc->asmap;
383 1.41.22.2 ad RF_AccessStripeMap_t *asm_p;
384 1.41.22.2 ad RF_StripeNum_t lastStripeID = -1;
385 1.41.22.2 ad int suspended = RF_FALSE;
386 1.41.22.2 ad
387 1.41.22.2 ad #if RF_ACC_TRACE > 0
388 1.41.22.2 ad RF_ETIMER_START(timer);
389 1.41.22.2 ad #endif
390 1.41.22.2 ad
391 1.41.22.2 ad /* acquire each lock that we don't already hold */
392 1.41.22.2 ad for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
393 1.41.22.2 ad RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
394 1.41.22.2 ad if (!rf_suppressLocksAndLargeWrites &&
395 1.41.22.2 ad asm_p->parityInfo &&
396 1.41.22.2 ad !(desc->flags & RF_DAG_SUPPRESS_LOCKS) &&
397 1.41.22.2 ad !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED)) {
398 1.41.22.2 ad asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
399 1.41.22.2 ad /* locks must be acquired hierarchically */
400 1.41.22.2 ad RF_ASSERT(asm_p->stripeID > lastStripeID);
401 1.41.22.2 ad lastStripeID = asm_p->stripeID;
402 1.41.22.2 ad
403 1.41.22.2 ad RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type,
404 1.41.22.2 ad (void (*) (struct buf *)) rf_ContinueRaidAccess, desc, asm_p,
405 1.41.22.2 ad raidPtr->Layout.dataSectorsPerStripe);
406 1.41.22.2 ad if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID,
407 1.41.22.2 ad &asm_p->lockReqDesc)) {
408 1.41.22.2 ad suspended = RF_TRUE;
409 1.41.22.2 ad break;
410 1.41.22.2 ad }
411 1.41.22.2 ad }
412 1.41.22.2 ad if (desc->type == RF_IO_TYPE_WRITE &&
413 1.41.22.2 ad raidPtr->status == rf_rs_reconstructing) {
414 1.41.22.2 ad if (!(asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED)) {
415 1.41.22.2 ad int val;
416 1.41.22.2 ad
417 1.41.22.2 ad asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED;
418 1.41.22.2 ad val = rf_ForceOrBlockRecon(raidPtr, asm_p,
419 1.41.22.2 ad (void (*) (RF_Raid_t *, void *)) rf_ContinueRaidAccess, desc);
420 1.41.22.2 ad if (val == 0) {
421 1.41.22.2 ad asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED;
422 1.41.22.2 ad } else {
423 1.41.22.2 ad suspended = RF_TRUE;
424 1.41.22.2 ad break;
425 1.41.22.2 ad }
426 1.41.22.2 ad } else {
427 1.41.22.2 ad #if RF_DEBUG_PSS > 0
428 1.41.22.2 ad if (rf_pssDebug) {
429 1.41.22.2 ad printf("raid%d: skipping force/block because already done, psid %ld\n",
430 1.41.22.2 ad desc->raidPtr->raidid,
431 1.41.22.2 ad (long) asm_p->stripeID);
432 1.41.22.2 ad }
433 1.41.22.2 ad #endif
434 1.41.22.2 ad }
435 1.41.22.2 ad } else {
436 1.41.22.2 ad #if RF_DEBUG_PSS > 0
437 1.41.22.2 ad if (rf_pssDebug) {
438 1.41.22.2 ad printf("raid%d: skipping force/block because not write or not under recon, psid %ld\n",
439 1.41.22.2 ad desc->raidPtr->raidid,
440 1.41.22.2 ad (long) asm_p->stripeID);
441 1.41.22.2 ad }
442 1.41.22.2 ad #endif
443 1.41.22.2 ad }
444 1.41.22.2 ad }
445 1.41.22.2 ad #if RF_ACC_TRACE > 0
446 1.41.22.2 ad RF_ETIMER_STOP(timer);
447 1.41.22.2 ad RF_ETIMER_EVAL(timer);
448 1.41.22.2 ad tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
449 1.41.22.2 ad #endif
450 1.41.22.2 ad if (suspended)
451 1.41.22.2 ad return (RF_TRUE);
452 1.41.22.2 ad
453 1.41.22.2 ad desc->state++;
454 1.41.22.2 ad return (RF_FALSE);
455 1.41.22.2 ad }
456 1.41.22.2 ad /*
457 1.41.22.2 ad * the following three states create, execute, and post-process dags
458 1.41.22.2 ad * the error recovery unit is a single dag.
459 1.41.22.2 ad * by default, SelectAlgorithm creates an array of dags, one per parity stripe
460 1.41.22.2 ad * in some tricky cases, multiple dags per stripe are created
461 1.41.22.2 ad * - dags within a parity stripe are executed sequentially (arbitrary order)
462 1.41.22.2 ad * - dags for distinct parity stripes are executed concurrently
463 1.41.22.2 ad *
464 1.41.22.2 ad * repeat until all dags complete successfully -or- dag selection fails
465 1.41.22.2 ad *
466 1.41.22.2 ad * while !done
467 1.41.22.2 ad * create dag(s) (SelectAlgorithm)
468 1.41.22.2 ad * if dag
469 1.41.22.2 ad * execute dag (DispatchDAG)
470 1.41.22.2 ad * if dag successful
471 1.41.22.2 ad * done (SUCCESS)
472 1.41.22.2 ad * else
473 1.41.22.2 ad * !done (RETRY - start over with new dags)
474 1.41.22.2 ad * else
475 1.41.22.2 ad * done (FAIL)
476 1.41.22.2 ad */
477 1.41.22.2 ad int
478 1.41.22.2 ad rf_State_CreateDAG(RF_RaidAccessDesc_t *desc)
479 1.41.22.2 ad {
480 1.41.22.2 ad #if RF_ACC_TRACE > 0
481 1.41.22.2 ad RF_AccTraceEntry_t *tracerec = &desc->tracerec;
482 1.41.22.2 ad RF_Etimer_t timer;
483 1.41.22.2 ad #endif
484 1.41.22.2 ad RF_DagHeader_t *dag_h;
485 1.41.22.2 ad RF_DagList_t *dagList;
486 1.41.22.2 ad struct buf *bp;
487 1.41.22.2 ad int i, selectStatus;
488 1.41.22.2 ad
489 1.41.22.2 ad /* generate a dag for the access, and fire it off. When the dag
490 1.41.22.2 ad * completes, we'll get re-invoked in the next state. */
491 1.41.22.2 ad #if RF_ACC_TRACE > 0
492 1.41.22.2 ad RF_ETIMER_START(timer);
493 1.41.22.2 ad #endif
494 1.41.22.2 ad /* SelectAlgorithm returns one or more dags */
495 1.41.22.2 ad selectStatus = rf_SelectAlgorithm(desc, desc->flags | RF_DAG_SUPPRESS_LOCKS);
496 1.41.22.2 ad #if RF_DEBUG_VALIDATE_DAG
497 1.41.22.2 ad if (rf_printDAGsDebug) {
498 1.41.22.2 ad dagList = desc->dagList;
499 1.41.22.2 ad for (i = 0; i < desc->numStripes; i++) {
500 1.41.22.2 ad rf_PrintDAGList(dagList.dags);
501 1.41.22.2 ad dagList = dagList->next;
502 1.41.22.2 ad }
503 1.41.22.2 ad }
504 1.41.22.2 ad #endif /* RF_DEBUG_VALIDATE_DAG */
505 1.41.22.2 ad #if RF_ACC_TRACE > 0
506 1.41.22.2 ad RF_ETIMER_STOP(timer);
507 1.41.22.2 ad RF_ETIMER_EVAL(timer);
508 1.41.22.2 ad /* update time to create all dags */
509 1.41.22.2 ad tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer);
510 1.41.22.2 ad #endif
511 1.41.22.2 ad
512 1.41.22.2 ad desc->status = 0; /* good status */
513 1.41.22.2 ad
514 1.41.22.2 ad if (selectStatus || (desc->numRetries > RF_RETRY_THRESHOLD)) {
515 1.41.22.2 ad /* failed to create a dag */
516 1.41.22.2 ad /* this happens when there are too many faults or incomplete
517 1.41.22.2 ad * dag libraries */
518 1.41.22.2 ad if (selectStatus) {
519 1.41.22.2 ad printf("raid%d: failed to create a dag. "
520 1.41.22.2 ad "Too many component failures.\n",
521 1.41.22.2 ad desc->raidPtr->raidid);
522 1.41.22.2 ad } else {
523 1.41.22.2 ad printf("raid%d: IO failed after %d retries.\n",
524 1.41.22.2 ad desc->raidPtr->raidid, RF_RETRY_THRESHOLD);
525 1.41.22.2 ad }
526 1.41.22.2 ad
527 1.41.22.2 ad desc->status = 1; /* bad status */
528 1.41.22.2 ad /* skip straight to rf_State_Cleanup() */
529 1.41.22.2 ad desc->state = rf_CleanupState;
530 1.41.22.2 ad bp = (struct buf *)desc->bp;
531 1.41.22.2 ad bp->b_error = EIO;
532 1.41.22.2 ad } else {
533 1.41.22.2 ad /* bind dags to desc */
534 1.41.22.2 ad dagList = desc->dagList;
535 1.41.22.2 ad for (i = 0; i < desc->numStripes; i++) {
536 1.41.22.2 ad dag_h = dagList->dags;
537 1.41.22.2 ad while (dag_h) {
538 1.41.22.2 ad dag_h->bp = (struct buf *) desc->bp;
539 1.41.22.2 ad #if RF_ACC_TRACE > 0
540 1.41.22.2 ad dag_h->tracerec = tracerec;
541 1.41.22.2 ad #endif
542 1.41.22.2 ad dag_h = dag_h->next;
543 1.41.22.2 ad }
544 1.41.22.2 ad dagList = dagList->next;
545 1.41.22.2 ad }
546 1.41.22.2 ad desc->flags |= RF_DAG_DISPATCH_RETURNED;
547 1.41.22.2 ad desc->state++; /* next state should be rf_State_ExecuteDAG */
548 1.41.22.2 ad }
549 1.41.22.2 ad return RF_FALSE;
550 1.41.22.2 ad }
551 1.41.22.2 ad
552 1.41.22.2 ad
553 1.41.22.2 ad
554 1.41.22.2 ad /* the access has an list of dagLists, one dagList per parity stripe.
555 1.41.22.2 ad * fire the first dag in each parity stripe (dagList).
556 1.41.22.2 ad * dags within a stripe (dagList) must be executed sequentially
557 1.41.22.2 ad * - this preserves atomic parity update
558 1.41.22.2 ad * dags for independents parity groups (stripes) are fired concurrently */
559 1.41.22.2 ad
560 1.41.22.2 ad int
561 1.41.22.2 ad rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
562 1.41.22.2 ad {
563 1.41.22.2 ad int i;
564 1.41.22.2 ad RF_DagHeader_t *dag_h;
565 1.41.22.2 ad RF_DagList_t *dagList;
566 1.41.22.2 ad
567 1.41.22.2 ad /* next state is always rf_State_ProcessDAG important to do
568 1.41.22.2 ad * this before firing the first dag (it may finish before we
569 1.41.22.2 ad * leave this routine) */
570 1.41.22.2 ad desc->state++;
571 1.41.22.2 ad
572 1.41.22.2 ad /* sweep dag array, a stripe at a time, firing the first dag
573 1.41.22.2 ad * in each stripe */
574 1.41.22.2 ad dagList = desc->dagList;
575 1.41.22.2 ad for (i = 0; i < desc->numStripes; i++) {
576 1.41.22.2 ad RF_ASSERT(dagList->numDags > 0);
577 1.41.22.2 ad RF_ASSERT(dagList->numDagsDone == 0);
578 1.41.22.2 ad RF_ASSERT(dagList->numDagsFired == 0);
579 1.41.22.2 ad #if RF_ACC_TRACE > 0
580 1.41.22.2 ad RF_ETIMER_START(dagList->tracerec.timer);
581 1.41.22.2 ad #endif
582 1.41.22.2 ad /* fire first dag in this stripe */
583 1.41.22.2 ad dag_h = dagList->dags;
584 1.41.22.2 ad RF_ASSERT(dag_h);
585 1.41.22.2 ad dagList->numDagsFired++;
586 1.41.22.2 ad rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, dagList);
587 1.41.22.2 ad dagList = dagList->next;
588 1.41.22.2 ad }
589 1.41.22.2 ad
590 1.41.22.2 ad /* the DAG will always call the callback, even if there was no
591 1.41.22.2 ad * blocking, so we are always suspended in this state */
592 1.41.22.2 ad return RF_TRUE;
593 1.41.22.2 ad }
594 1.41.22.2 ad
595 1.41.22.2 ad
596 1.41.22.2 ad
597 1.41.22.2 ad /* rf_State_ProcessDAG is entered when a dag completes.
598 1.41.22.2 ad * first, check to all dags in the access have completed
599 1.41.22.2 ad * if not, fire as many dags as possible */
600 1.41.22.2 ad
601 1.41.22.2 ad int
602 1.41.22.2 ad rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc)
603 1.41.22.2 ad {
604 1.41.22.2 ad RF_AccessStripeMapHeader_t *asmh = desc->asmap;
605 1.41.22.2 ad RF_Raid_t *raidPtr = desc->raidPtr;
606 1.41.22.2 ad RF_DagHeader_t *dag_h;
607 1.41.22.2 ad int i, j, done = RF_TRUE;
608 1.41.22.2 ad RF_DagList_t *dagList, *temp;
609 1.41.22.2 ad
610 1.41.22.2 ad /* check to see if this is the last dag */
611 1.41.22.2 ad dagList = desc->dagList;
612 1.41.22.2 ad for (i = 0; i < desc->numStripes; i++) {
613 1.41.22.2 ad if (dagList->numDags != dagList->numDagsDone)
614 1.41.22.2 ad done = RF_FALSE;
615 1.41.22.2 ad dagList = dagList->next;
616 1.41.22.2 ad }
617 1.41.22.2 ad
618 1.41.22.2 ad if (done) {
619 1.41.22.2 ad if (desc->status) {
620 1.41.22.2 ad /* a dag failed, retry */
621 1.41.22.2 ad /* free all dags */
622 1.41.22.2 ad dagList = desc->dagList;
623 1.41.22.2 ad for (i = 0; i < desc->numStripes; i++) {
624 1.41.22.2 ad rf_FreeDAG(dagList->dags);
625 1.41.22.2 ad temp = dagList;
626 1.41.22.2 ad dagList = dagList->next;
627 1.41.22.2 ad rf_FreeDAGList(temp);
628 1.41.22.2 ad }
629 1.41.22.2 ad desc->dagList = NULL;
630 1.41.22.2 ad
631 1.41.22.2 ad rf_MarkFailuresInASMList(raidPtr, asmh);
632 1.41.22.2 ad
633 1.41.22.2 ad /* note the retry so that we'll bail in
634 1.41.22.2 ad rf_State_CreateDAG() once we've retired
635 1.41.22.2 ad the IO RF_RETRY_THRESHOLD times */
636 1.41.22.2 ad
637 1.41.22.2 ad desc->numRetries++;
638 1.41.22.2 ad
639 1.41.22.2 ad /* back up to rf_State_CreateDAG */
640 1.41.22.2 ad desc->state = desc->state - 2;
641 1.41.22.2 ad return RF_FALSE;
642 1.41.22.2 ad } else {
643 1.41.22.2 ad /* move on to rf_State_Cleanup */
644 1.41.22.2 ad desc->state++;
645 1.41.22.2 ad }
646 1.41.22.2 ad return RF_FALSE;
647 1.41.22.2 ad } else {
648 1.41.22.2 ad /* more dags to execute */
649 1.41.22.2 ad /* see if any are ready to be fired. if so, fire them */
650 1.41.22.2 ad /* don't fire the initial dag in a list, it's fired in
651 1.41.22.2 ad * rf_State_ExecuteDAG */
652 1.41.22.2 ad dagList = desc->dagList;
653 1.41.22.2 ad for (i = 0; i < desc->numStripes; i++) {
654 1.41.22.2 ad if ((dagList->numDagsDone < dagList->numDags)
655 1.41.22.2 ad && (dagList->numDagsDone == dagList->numDagsFired)
656 1.41.22.2 ad && (dagList->numDagsFired > 0)) {
657 1.41.22.2 ad #if RF_ACC_TRACE > 0
658 1.41.22.2 ad RF_ETIMER_START(dagList->tracerec.timer);
659 1.41.22.2 ad #endif
660 1.41.22.2 ad /* fire next dag in this stripe */
661 1.41.22.2 ad /* first, skip to next dag awaiting execution */
662 1.41.22.2 ad dag_h = dagList->dags;
663 1.41.22.2 ad for (j = 0; j < dagList->numDagsDone; j++)
664 1.41.22.2 ad dag_h = dag_h->next;
665 1.41.22.2 ad dagList->numDagsFired++;
666 1.41.22.2 ad rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess,
667 1.41.22.2 ad dagList);
668 1.41.22.2 ad }
669 1.41.22.2 ad dagList = dagList->next;
670 1.41.22.2 ad }
671 1.41.22.2 ad return RF_TRUE;
672 1.41.22.2 ad }
673 1.41.22.2 ad }
674 1.41.22.2 ad /* only make it this far if all dags complete successfully */
675 1.41.22.2 ad int
676 1.41.22.2 ad rf_State_Cleanup(RF_RaidAccessDesc_t *desc)
677 1.41.22.2 ad {
678 1.41.22.2 ad #if RF_ACC_TRACE > 0
679 1.41.22.2 ad RF_AccTraceEntry_t *tracerec = &desc->tracerec;
680 1.41.22.2 ad RF_Etimer_t timer;
681 1.41.22.2 ad #endif
682 1.41.22.2 ad RF_AccessStripeMapHeader_t *asmh = desc->asmap;
683 1.41.22.2 ad RF_Raid_t *raidPtr = desc->raidPtr;
684 1.41.22.2 ad RF_AccessStripeMap_t *asm_p;
685 1.41.22.2 ad RF_DagList_t *dagList;
686 1.41.22.2 ad int i;
687 1.41.22.2 ad
688 1.41.22.2 ad desc->state++;
689 1.41.22.2 ad
690 1.41.22.2 ad #if RF_ACC_TRACE > 0
691 1.41.22.2 ad timer = tracerec->timer;
692 1.41.22.2 ad RF_ETIMER_STOP(timer);
693 1.41.22.2 ad RF_ETIMER_EVAL(timer);
694 1.41.22.2 ad tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer);
695 1.41.22.2 ad
696 1.41.22.2 ad /* the RAID I/O is complete. Clean up. */
697 1.41.22.2 ad tracerec->specific.user.dag_retry_us = 0;
698 1.41.22.2 ad
699 1.41.22.2 ad RF_ETIMER_START(timer);
700 1.41.22.2 ad #endif
701 1.41.22.2 ad /* free all dags */
702 1.41.22.2 ad dagList = desc->dagList;
703 1.41.22.2 ad for (i = 0; i < desc->numStripes; i++) {
704 1.41.22.2 ad rf_FreeDAG(dagList->dags);
705 1.41.22.2 ad dagList = dagList->next;
706 1.41.22.2 ad }
707 1.41.22.2 ad #if RF_ACC_TRACE > 0
708 1.41.22.2 ad RF_ETIMER_STOP(timer);
709 1.41.22.2 ad RF_ETIMER_EVAL(timer);
710 1.41.22.2 ad tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer);
711 1.41.22.2 ad
712 1.41.22.2 ad RF_ETIMER_START(timer);
713 1.41.22.2 ad #endif
714 1.41.22.2 ad for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
715 1.41.22.2 ad if (!rf_suppressLocksAndLargeWrites &&
716 1.41.22.2 ad asm_p->parityInfo &&
717 1.41.22.2 ad !(desc->flags & RF_DAG_SUPPRESS_LOCKS)) {
718 1.41.22.2 ad RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc);
719 1.41.22.2 ad rf_ReleaseStripeLock(raidPtr->lockTable,
720 1.41.22.2 ad asm_p->stripeID,
721 1.41.22.2 ad &asm_p->lockReqDesc);
722 1.41.22.2 ad }
723 1.41.22.2 ad if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) {
724 1.41.22.2 ad rf_UnblockRecon(raidPtr, asm_p);
725 1.41.22.2 ad }
726 1.41.22.2 ad }
727 1.41.22.2 ad #if RF_ACC_TRACE > 0
728 1.41.22.2 ad RF_ETIMER_STOP(timer);
729 1.41.22.2 ad RF_ETIMER_EVAL(timer);
730 1.41.22.2 ad tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
731 1.41.22.2 ad
732 1.41.22.2 ad RF_ETIMER_START(timer);
733 1.41.22.2 ad #endif
734 1.41.22.2 ad rf_FreeAccessStripeMap(asmh);
735 1.41.22.2 ad #if RF_ACC_TRACE > 0
736 1.41.22.2 ad RF_ETIMER_STOP(timer);
737 1.41.22.2 ad RF_ETIMER_EVAL(timer);
738 1.41.22.2 ad tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer);
739 1.41.22.2 ad
740 1.41.22.2 ad RF_ETIMER_STOP(desc->timer);
741 1.41.22.2 ad RF_ETIMER_EVAL(desc->timer);
742 1.41.22.2 ad
743 1.41.22.2 ad timer = desc->tracerec.tot_timer;
744 1.41.22.2 ad RF_ETIMER_STOP(timer);
745 1.41.22.2 ad RF_ETIMER_EVAL(timer);
746 1.41.22.2 ad desc->tracerec.total_us = RF_ETIMER_VAL_US(timer);
747 1.41.22.2 ad
748 1.41.22.2 ad rf_LogTraceRec(raidPtr, tracerec);
749 1.41.22.2 ad #endif
750 1.41.22.2 ad desc->flags |= RF_DAG_ACCESS_COMPLETE;
751 1.41.22.2 ad
752 1.41.22.2 ad return RF_FALSE;
753 1.41.22.2 ad }
754