rf_states.c revision 1.1 1 1.1 oster /* $NetBSD: rf_states.c,v 1.1 1998/11/13 04:20:34 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Author: Mark Holland, William V. Courtright II, Robby Findler
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /*
30 1.1 oster * :
31 1.1 oster * Log: rf_states.c,v
32 1.1 oster * Revision 1.45 1996/07/28 20:31:39 jimz
33 1.1 oster * i386netbsd port
34 1.1 oster * true/false fixup
35 1.1 oster *
36 1.1 oster * Revision 1.44 1996/07/27 23:36:08 jimz
37 1.1 oster * Solaris port of simulator
38 1.1 oster *
39 1.1 oster * Revision 1.43 1996/07/22 19:52:16 jimz
40 1.1 oster * switched node params to RF_DagParam_t, a union of
41 1.1 oster * a 64-bit int and a void *, for better portability
42 1.1 oster * attempted hpux port, but failed partway through for
43 1.1 oster * lack of a single C compiler capable of compiling all
44 1.1 oster * source files
45 1.1 oster *
46 1.1 oster * Revision 1.42 1996/07/17 21:00:58 jimz
47 1.1 oster * clean up timer interface, tracing
48 1.1 oster *
49 1.1 oster * Revision 1.41 1996/07/11 19:08:00 jimz
50 1.1 oster * generalize reconstruction mechanism
51 1.1 oster * allow raid1 reconstructs via copyback (done with array
52 1.1 oster * quiesced, not online, therefore not disk-directed)
53 1.1 oster *
54 1.1 oster * Revision 1.40 1996/06/17 14:38:33 jimz
55 1.1 oster * properly #if out RF_DEMO code
56 1.1 oster * fix bug in MakeConfig that was causing weird behavior
57 1.1 oster * in configuration routines (config was not zeroed at start)
58 1.1 oster * clean up genplot handling of stacks
59 1.1 oster *
60 1.1 oster * Revision 1.39 1996/06/11 18:12:17 jimz
61 1.1 oster * got rid of evil race condition in LastState
62 1.1 oster *
63 1.1 oster * Revision 1.38 1996/06/10 14:18:58 jimz
64 1.1 oster * move user, throughput stats into per-array structure
65 1.1 oster *
66 1.1 oster * Revision 1.37 1996/06/09 02:36:46 jimz
67 1.1 oster * lots of little crufty cleanup- fixup whitespace
68 1.1 oster * issues, comment #ifdefs, improve typing in some
69 1.1 oster * places (esp size-related)
70 1.1 oster *
71 1.1 oster * Revision 1.36 1996/06/07 21:33:04 jimz
72 1.1 oster * begin using consistent types for sector numbers,
73 1.1 oster * stripe numbers, row+col numbers, recon unit numbers
74 1.1 oster *
75 1.1 oster * Revision 1.35 1996/06/05 18:06:02 jimz
76 1.1 oster * Major code cleanup. The Great Renaming is now done.
77 1.1 oster * Better modularity. Better typing. Fixed a bunch of
78 1.1 oster * synchronization bugs. Made a lot of global stuff
79 1.1 oster * per-desc or per-array. Removed dead code.
80 1.1 oster *
81 1.1 oster * Revision 1.34 1996/06/03 23:28:26 jimz
82 1.1 oster * more bugfixes
83 1.1 oster * check in tree to sync for IPDS runs with current bugfixes
84 1.1 oster * there still may be a problem with threads in the script test
85 1.1 oster * getting I/Os stuck- not trivially reproducible (runs ~50 times
86 1.1 oster * in a row without getting stuck)
87 1.1 oster *
88 1.1 oster * Revision 1.33 1996/05/31 22:26:54 jimz
89 1.1 oster * fix a lot of mapping problems, memory allocation problems
90 1.1 oster * found some weird lock issues, fixed 'em
91 1.1 oster * more code cleanup
92 1.1 oster *
93 1.1 oster * Revision 1.32 1996/05/30 12:59:18 jimz
94 1.1 oster * make etimer happier, more portable
95 1.1 oster *
96 1.1 oster * Revision 1.31 1996/05/30 11:29:41 jimz
97 1.1 oster * Numerous bug fixes. Stripe lock release code disagreed with the taking code
98 1.1 oster * about when stripes should be locked (I made it consistent: no parity, no lock)
99 1.1 oster * There was a lot of extra serialization of I/Os which I've removed- a lot of
100 1.1 oster * it was to calculate values for the cache code, which is no longer with us.
101 1.1 oster * More types, function, macro cleanup. Added code to properly quiesce the array
102 1.1 oster * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
103 1.1 oster * before. Fixed memory allocation, freeing bugs.
104 1.1 oster *
105 1.1 oster * Revision 1.30 1996/05/27 18:56:37 jimz
106 1.1 oster * more code cleanup
107 1.1 oster * better typing
108 1.1 oster * compiles in all 3 environments
109 1.1 oster *
110 1.1 oster * Revision 1.29 1996/05/24 22:17:04 jimz
111 1.1 oster * continue code + namespace cleanup
112 1.1 oster * typed a bunch of flags
113 1.1 oster *
114 1.1 oster * Revision 1.28 1996/05/24 04:28:55 jimz
115 1.1 oster * release cleanup ckpt
116 1.1 oster *
117 1.1 oster * Revision 1.27 1996/05/23 21:46:35 jimz
118 1.1 oster * checkpoint in code cleanup (release prep)
119 1.1 oster * lots of types, function names have been fixed
120 1.1 oster *
121 1.1 oster * Revision 1.26 1996/05/23 00:33:23 jimz
122 1.1 oster * code cleanup: move all debug decls to rf_options.c, all extern
123 1.1 oster * debug decls to rf_options.h, all debug vars preceded by rf_
124 1.1 oster *
125 1.1 oster * Revision 1.25 1996/05/20 19:31:46 jimz
126 1.1 oster * straighten out syntax problems
127 1.1 oster *
128 1.1 oster * Revision 1.24 1996/05/18 19:51:34 jimz
129 1.1 oster * major code cleanup- fix syntax, make some types consistent,
130 1.1 oster * add prototypes, clean out dead code, et cetera
131 1.1 oster *
132 1.1 oster * Revision 1.23 1996/05/16 23:37:33 jimz
133 1.1 oster * fix misspelled "else"
134 1.1 oster *
135 1.1 oster * Revision 1.22 1996/05/15 22:33:32 jimz
136 1.1 oster * appropriately #ifdef cache stuff
137 1.1 oster *
138 1.1 oster * Revision 1.21 1996/05/06 22:09:20 wvcii
139 1.1 oster * rf_State_ExecuteDAG now only executes the first dag
140 1.1 oster * of each parity stripe in a multi-stripe access
141 1.1 oster *
142 1.1 oster * rf_State_ProcessDAG now executes all dags in a
143 1.1 oster * multi-stripe access except the first dag of each stripe.
144 1.1 oster *
145 1.1 oster * Revision 1.20 1995/12/12 18:10:06 jimz
146 1.1 oster * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
147 1.1 oster * fix 80-column brain damage in comments
148 1.1 oster *
149 1.1 oster * Revision 1.19 1995/11/19 16:29:50 wvcii
150 1.1 oster * replaced LaunchDAGState with CreateDAGState, ExecuteDAGState
151 1.1 oster * created rf_ContinueDagAccess
152 1.1 oster *
153 1.1 oster * Revision 1.18 1995/11/07 15:37:23 wvcii
154 1.1 oster * deleted states SendDAGState, RetryDAGState
155 1.1 oster * added staes: LaunchDAGState, ProcessDAGState
156 1.1 oster * code no longer has a hard-coded retry count of 1 but will support
157 1.1 oster * retries until a dag can not be found (selected) to perform the user request
158 1.1 oster *
159 1.1 oster * Revision 1.17 1995/10/09 23:36:08 amiri
160 1.1 oster * *** empty log message ***
161 1.1 oster *
162 1.1 oster * Revision 1.16 1995/10/09 18:36:58 jimz
163 1.1 oster * moved call to StopThroughput for user-level driver to rf_driver.c
164 1.1 oster *
165 1.1 oster * Revision 1.15 1995/10/09 18:07:23 wvcii
166 1.1 oster * lastState now call rf_StopThroughputStats
167 1.1 oster *
168 1.1 oster * Revision 1.14 1995/10/05 18:56:31 jimz
169 1.1 oster * no-op file if !INCLUDE_VS
170 1.1 oster *
171 1.1 oster * Revision 1.13 1995/09/30 20:38:24 jimz
172 1.1 oster * LogTraceRec now takes a Raid * as its first argument
173 1.1 oster *
174 1.1 oster * Revision 1.12 1995/09/19 22:58:54 jimz
175 1.1 oster * integrate DKUSAGE into raidframe
176 1.1 oster *
177 1.1 oster * Revision 1.11 1995/09/07 01:26:55 jimz
178 1.1 oster * Achive basic compilation in kernel. Kernel functionality
179 1.1 oster * is not guaranteed at all, but it'll compile. Mostly. I hope.
180 1.1 oster *
181 1.1 oster * Revision 1.10 1995/07/26 03:28:31 robby
182 1.1 oster * intermediary checkin
183 1.1 oster *
184 1.1 oster * Revision 1.9 1995/07/23 02:50:33 robby
185 1.1 oster * oops. fixed boo boo
186 1.1 oster *
187 1.1 oster * Revision 1.8 1995/07/22 22:54:54 robby
188 1.1 oster * removed incorrect comment
189 1.1 oster *
190 1.1 oster * Revision 1.7 1995/07/21 19:30:26 robby
191 1.1 oster * added idle state for rf_when-idle.c
192 1.1 oster *
193 1.1 oster * Revision 1.6 1995/07/10 19:06:28 rachad
194 1.1 oster * *** empty log message ***
195 1.1 oster *
196 1.1 oster * Revision 1.5 1995/07/10 17:30:38 robby
197 1.1 oster * added virtual striping lock states
198 1.1 oster *
199 1.1 oster * Revision 1.4 1995/07/08 18:05:39 rachad
200 1.1 oster * Linked up Claudsons code with the real cache
201 1.1 oster *
202 1.1 oster * Revision 1.3 1995/07/06 14:38:50 robby
203 1.1 oster * changed get_thread_id to get_threadid
204 1.1 oster *
205 1.1 oster * Revision 1.2 1995/07/06 14:24:15 robby
206 1.1 oster * added log
207 1.1 oster *
208 1.1 oster */
209 1.1 oster
210 1.1 oster #ifdef _KERNEL
211 1.1 oster #define KERNEL
212 1.1 oster #endif
213 1.1 oster
214 1.1 oster #ifdef KERNEL
215 1.1 oster #ifndef __NetBSD__
216 1.1 oster #include <dkusage.h>
217 1.1 oster #endif /* !__NetBSD__ */
218 1.1 oster #endif /* KERNEL */
219 1.1 oster
220 1.1 oster #include <sys/errno.h>
221 1.1 oster
222 1.1 oster #include "rf_archs.h"
223 1.1 oster #include "rf_threadstuff.h"
224 1.1 oster #include "rf_raid.h"
225 1.1 oster #include "rf_dag.h"
226 1.1 oster #include "rf_desc.h"
227 1.1 oster #include "rf_aselect.h"
228 1.1 oster #include "rf_threadid.h"
229 1.1 oster #include "rf_general.h"
230 1.1 oster #include "rf_states.h"
231 1.1 oster #include "rf_dagutils.h"
232 1.1 oster #include "rf_driver.h"
233 1.1 oster #include "rf_engine.h"
234 1.1 oster #include "rf_map.h"
235 1.1 oster #include "rf_etimer.h"
236 1.1 oster
237 1.1 oster #if defined(KERNEL) && (DKUSAGE > 0)
238 1.1 oster #include <sys/dkusage.h>
239 1.1 oster #include <io/common/iotypes.h>
240 1.1 oster #include <io/cam/dec_cam.h>
241 1.1 oster #include <io/cam/cam.h>
242 1.1 oster #include <io/cam/pdrv.h>
243 1.1 oster #endif /* KERNEL && DKUSAGE > 0 */
244 1.1 oster
245 1.1 oster /* prototypes for some of the available states.
246 1.1 oster
247 1.1 oster States must:
248 1.1 oster
249 1.1 oster - not block.
250 1.1 oster
251 1.1 oster - either schedule rf_ContinueRaidAccess as a callback and return
252 1.1 oster RF_TRUE, or complete all of their work and return RF_FALSE.
253 1.1 oster
254 1.1 oster - increment desc->state when they have finished their work.
255 1.1 oster */
256 1.1 oster
257 1.1 oster
258 1.1 oster #ifdef SIMULATE
259 1.1 oster extern int global_async_flag;
260 1.1 oster #endif /* SIMULATE */
261 1.1 oster
262 1.1 oster static char *StateName(RF_AccessState_t state)
263 1.1 oster {
264 1.1 oster switch (state) {
265 1.1 oster case rf_QuiesceState: return "QuiesceState";
266 1.1 oster case rf_MapState: return "MapState";
267 1.1 oster case rf_LockState: return "LockState";
268 1.1 oster case rf_CreateDAGState: return "CreateDAGState";
269 1.1 oster case rf_ExecuteDAGState: return "ExecuteDAGState";
270 1.1 oster case rf_ProcessDAGState: return "ProcessDAGState";
271 1.1 oster case rf_CleanupState: return "CleanupState";
272 1.1 oster case rf_LastState: return "LastState";
273 1.1 oster case rf_IncrAccessesCountState: return "IncrAccessesCountState";
274 1.1 oster case rf_DecrAccessesCountState: return "DecrAccessesCountState";
275 1.1 oster default: return "!!! UnnamedState !!!";
276 1.1 oster }
277 1.1 oster }
278 1.1 oster
279 1.1 oster void rf_ContinueRaidAccess(RF_RaidAccessDesc_t *desc)
280 1.1 oster {
281 1.1 oster int suspended = RF_FALSE;
282 1.1 oster int current_state_index = desc->state;
283 1.1 oster RF_AccessState_t current_state = desc->states[current_state_index];
284 1.1 oster
285 1.1 oster #ifdef SIMULATE
286 1.1 oster rf_SetCurrentOwner(desc->owner);
287 1.1 oster #endif /* SIMULATE */
288 1.1 oster
289 1.1 oster do {
290 1.1 oster
291 1.1 oster current_state_index = desc->state;
292 1.1 oster current_state = desc->states [current_state_index];
293 1.1 oster
294 1.1 oster switch (current_state) {
295 1.1 oster
296 1.1 oster case rf_QuiesceState: suspended = rf_State_Quiesce(desc);
297 1.1 oster break;
298 1.1 oster case rf_IncrAccessesCountState: suspended = rf_State_IncrAccessCount(desc);
299 1.1 oster break;
300 1.1 oster case rf_MapState: suspended = rf_State_Map(desc);
301 1.1 oster break;
302 1.1 oster case rf_LockState: suspended = rf_State_Lock(desc);
303 1.1 oster break;
304 1.1 oster case rf_CreateDAGState: suspended = rf_State_CreateDAG(desc);
305 1.1 oster break;
306 1.1 oster case rf_ExecuteDAGState: suspended = rf_State_ExecuteDAG(desc);
307 1.1 oster break;
308 1.1 oster case rf_ProcessDAGState: suspended = rf_State_ProcessDAG(desc);
309 1.1 oster break;
310 1.1 oster case rf_CleanupState: suspended = rf_State_Cleanup(desc);
311 1.1 oster break;
312 1.1 oster case rf_DecrAccessesCountState: suspended = rf_State_DecrAccessCount(desc);
313 1.1 oster break;
314 1.1 oster case rf_LastState: suspended = rf_State_LastState(desc);
315 1.1 oster break;
316 1.1 oster }
317 1.1 oster
318 1.1 oster /* after this point, we cannot dereference desc since desc may
319 1.1 oster have been freed. desc is only freed in LastState, so if we
320 1.1 oster renter this function or loop back up, desc should be valid. */
321 1.1 oster
322 1.1 oster if (rf_printStatesDebug) {
323 1.1 oster int tid;
324 1.1 oster rf_get_threadid (tid);
325 1.1 oster
326 1.1 oster printf ("[%d] State: %-24s StateIndex: %3i desc: 0x%ld %s\n",
327 1.1 oster tid, StateName(current_state), current_state_index, (long)desc,
328 1.1 oster suspended ? "callback scheduled" : "looping");
329 1.1 oster }
330 1.1 oster } while (!suspended && current_state != rf_LastState);
331 1.1 oster
332 1.1 oster return;
333 1.1 oster }
334 1.1 oster
335 1.1 oster
336 1.1 oster void rf_ContinueDagAccess (RF_DagList_t *dagList)
337 1.1 oster {
338 1.1 oster RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec);
339 1.1 oster RF_RaidAccessDesc_t *desc;
340 1.1 oster RF_DagHeader_t *dag_h;
341 1.1 oster RF_Etimer_t timer;
342 1.1 oster int i;
343 1.1 oster
344 1.1 oster desc = dagList->desc;
345 1.1 oster
346 1.1 oster timer = tracerec->timer;
347 1.1 oster RF_ETIMER_STOP(timer);
348 1.1 oster RF_ETIMER_EVAL(timer);
349 1.1 oster tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer);
350 1.1 oster RF_ETIMER_START(tracerec->timer);
351 1.1 oster
352 1.1 oster /* skip to dag which just finished */
353 1.1 oster dag_h = dagList->dags;
354 1.1 oster for (i = 0; i < dagList->numDagsDone; i++) {
355 1.1 oster dag_h = dag_h->next;
356 1.1 oster }
357 1.1 oster
358 1.1 oster /* check to see if retry is required */
359 1.1 oster if (dag_h->status == rf_rollBackward) {
360 1.1 oster /* when a dag fails, mark desc status as bad and allow all other dags
361 1.1 oster * in the desc to execute to completion. then, free all dags and start over */
362 1.1 oster desc->status = 1; /* bad status */
363 1.1 oster #if RF_DEMO > 0
364 1.1 oster if (!rf_demoMode)
365 1.1 oster #endif /* RF_DEMO > 0 */
366 1.1 oster {
367 1.1 oster printf("[%d] DAG failure: %c addr 0x%lx (%ld) nblk 0x%x (%d) buf 0x%lx\n",
368 1.1 oster desc->tid, desc->type, (long)desc->raidAddress,
369 1.1 oster (long)desc->raidAddress,(int)desc->numBlocks,
370 1.1 oster (int)desc->numBlocks, (unsigned long) (desc->bufPtr));
371 1.1 oster }
372 1.1 oster }
373 1.1 oster
374 1.1 oster dagList->numDagsDone++;
375 1.1 oster rf_ContinueRaidAccess(desc);
376 1.1 oster }
377 1.1 oster
378 1.1 oster
379 1.1 oster int rf_State_LastState(RF_RaidAccessDesc_t *desc)
380 1.1 oster {
381 1.1 oster void (*callbackFunc)(RF_CBParam_t) = desc->callbackFunc;
382 1.1 oster void *callbackArg = desc->callbackArg;
383 1.1 oster
384 1.1 oster #ifdef SIMULATE
385 1.1 oster int tid;
386 1.1 oster rf_get_threadid(tid);
387 1.1 oster
388 1.1 oster if (rf_accessDebug)
389 1.1 oster printf("async_flag set to %d\n",global_async_flag);
390 1.1 oster global_async_flag=desc->async_flag;
391 1.1 oster if (rf_accessDebug)
392 1.1 oster printf("Will now do clean up for %d\n",rf_GetCurrentOwner());
393 1.1 oster rf_FreeRaidAccDesc(desc);
394 1.1 oster
395 1.1 oster if (callbackFunc)
396 1.1 oster callbackFunc(callbackArg);
397 1.1 oster #else /* SIMULATE */
398 1.1 oster
399 1.1 oster #ifndef KERNEL
400 1.1 oster
401 1.1 oster if (!(desc->flags & RF_DAG_NONBLOCKING_IO)) {
402 1.1 oster /* bummer that we have to take another lock here */
403 1.1 oster RF_LOCK_MUTEX(desc->mutex);
404 1.1 oster RF_ASSERT(desc->flags&RF_DAG_ACCESS_COMPLETE);
405 1.1 oster RF_SIGNAL_COND(desc->cond); /* DoAccess frees the desc in the blocking-I/O case */
406 1.1 oster RF_UNLOCK_MUTEX(desc->mutex);
407 1.1 oster }
408 1.1 oster else
409 1.1 oster rf_FreeRaidAccDesc(desc);
410 1.1 oster
411 1.1 oster if (callbackFunc)
412 1.1 oster callbackFunc(callbackArg);
413 1.1 oster
414 1.1 oster #else /* KERNEL */
415 1.1 oster if (!(desc->flags & RF_DAG_TEST_ACCESS)) {/* don't biodone if this */
416 1.1 oster #if DKUSAGE > 0
417 1.1 oster RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid,(struct buf *)desc->bp);
418 1.1 oster #else
419 1.1 oster RF_DKU_END_IO(((RF_Raid_t *)desc->raidPtr)->raidid);
420 1.1 oster #endif /* DKUSAGE > 0 */
421 1.1 oster /* printf("Calling biodone on 0x%x\n",desc->bp); */
422 1.1 oster biodone(desc->bp); /* access came through ioctl */
423 1.1 oster }
424 1.1 oster
425 1.1 oster if (callbackFunc) callbackFunc(callbackArg);
426 1.1 oster rf_FreeRaidAccDesc(desc);
427 1.1 oster
428 1.1 oster #endif /* ! KERNEL */
429 1.1 oster #endif /* SIMULATE */
430 1.1 oster
431 1.1 oster return RF_FALSE;
432 1.1 oster }
433 1.1 oster
434 1.1 oster int rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc)
435 1.1 oster {
436 1.1 oster RF_Raid_t *raidPtr;
437 1.1 oster
438 1.1 oster raidPtr = desc->raidPtr;
439 1.1 oster /* Bummer. We have to do this to be 100% safe w.r.t. the increment below */
440 1.1 oster RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
441 1.1 oster raidPtr->accs_in_flight++; /* used to detect quiescence */
442 1.1 oster RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
443 1.1 oster
444 1.1 oster desc->state++;
445 1.1 oster return RF_FALSE;
446 1.1 oster }
447 1.1 oster
448 1.1 oster int rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc)
449 1.1 oster {
450 1.1 oster RF_Raid_t *raidPtr;
451 1.1 oster
452 1.1 oster raidPtr = desc->raidPtr;
453 1.1 oster
454 1.1 oster RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
455 1.1 oster raidPtr->accs_in_flight--;
456 1.1 oster if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) {
457 1.1 oster rf_SignalQuiescenceLock(raidPtr, raidPtr->reconDesc);
458 1.1 oster }
459 1.1 oster rf_UpdateUserStats(raidPtr, RF_ETIMER_VAL_US(desc->timer), desc->numBlocks);
460 1.1 oster RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
461 1.1 oster
462 1.1 oster desc->state++;
463 1.1 oster return RF_FALSE;
464 1.1 oster }
465 1.1 oster
466 1.1 oster int rf_State_Quiesce(RF_RaidAccessDesc_t *desc)
467 1.1 oster {
468 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
469 1.1 oster RF_Etimer_t timer;
470 1.1 oster int suspended = RF_FALSE;
471 1.1 oster RF_Raid_t *raidPtr;
472 1.1 oster
473 1.1 oster raidPtr = desc->raidPtr;
474 1.1 oster
475 1.1 oster RF_ETIMER_START(timer);
476 1.1 oster RF_ETIMER_START(desc->timer);
477 1.1 oster
478 1.1 oster RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
479 1.1 oster if (raidPtr->accesses_suspended) {
480 1.1 oster RF_CallbackDesc_t *cb;
481 1.1 oster cb = rf_AllocCallbackDesc();
482 1.1 oster /* XXX the following cast is quite bogus... rf_ContinueRaidAccess
483 1.1 oster takes a (RF_RaidAccessDesc_t *) as an argument.. GO */
484 1.1 oster cb->callbackFunc = (void (*)(RF_CBParam_t))rf_ContinueRaidAccess;
485 1.1 oster cb->callbackArg.p = (void *) desc;
486 1.1 oster cb->next = raidPtr->quiesce_wait_list;
487 1.1 oster raidPtr->quiesce_wait_list = cb;
488 1.1 oster suspended = RF_TRUE;
489 1.1 oster }
490 1.1 oster
491 1.1 oster RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
492 1.1 oster
493 1.1 oster RF_ETIMER_STOP(timer);
494 1.1 oster RF_ETIMER_EVAL(timer);
495 1.1 oster tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer);
496 1.1 oster
497 1.1 oster if (suspended && rf_quiesceDebug)
498 1.1 oster printf("Stalling access due to quiescence lock\n");
499 1.1 oster
500 1.1 oster desc->state++;
501 1.1 oster return suspended;
502 1.1 oster }
503 1.1 oster
504 1.1 oster int rf_State_Map(RF_RaidAccessDesc_t *desc)
505 1.1 oster {
506 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
507 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
508 1.1 oster RF_Etimer_t timer;
509 1.1 oster
510 1.1 oster RF_ETIMER_START(timer);
511 1.1 oster
512 1.1 oster if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks,
513 1.1 oster desc->bufPtr, RF_DONT_REMAP)))
514 1.1 oster RF_PANIC();
515 1.1 oster
516 1.1 oster RF_ETIMER_STOP(timer);
517 1.1 oster RF_ETIMER_EVAL(timer);
518 1.1 oster tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer);
519 1.1 oster
520 1.1 oster desc->state ++;
521 1.1 oster return RF_FALSE;
522 1.1 oster }
523 1.1 oster
524 1.1 oster int rf_State_Lock(RF_RaidAccessDesc_t *desc)
525 1.1 oster {
526 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
527 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
528 1.1 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap;
529 1.1 oster RF_AccessStripeMap_t *asm_p;
530 1.1 oster RF_Etimer_t timer;
531 1.1 oster int suspended = RF_FALSE;
532 1.1 oster
533 1.1 oster RF_ETIMER_START(timer);
534 1.1 oster if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
535 1.1 oster RF_StripeNum_t lastStripeID = -1;
536 1.1 oster
537 1.1 oster /* acquire each lock that we don't already hold */
538 1.1 oster for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
539 1.1 oster RF_ASSERT(RF_IO_IS_R_OR_W(desc->type));
540 1.1 oster if (!rf_suppressLocksAndLargeWrites &&
541 1.1 oster asm_p->parityInfo &&
542 1.1 oster !(desc->flags& RF_DAG_SUPPRESS_LOCKS) &&
543 1.1 oster !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED))
544 1.1 oster {
545 1.1 oster asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED;
546 1.1 oster RF_ASSERT(asm_p->stripeID > lastStripeID); /* locks must be acquired
547 1.1 oster hierarchically */
548 1.1 oster lastStripeID = asm_p->stripeID;
549 1.1 oster /* XXX the cast to (void (*)(RF_CBParam_t)) below is bogus! GO */
550 1.1 oster RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type,
551 1.1 oster (void (*)(struct buf *))rf_ContinueRaidAccess, desc, asm_p,
552 1.1 oster raidPtr->Layout.dataSectorsPerStripe);
553 1.1 oster if (rf_AcquireStripeLock(raidPtr->lockTable, asm_p->stripeID,
554 1.1 oster &asm_p->lockReqDesc))
555 1.1 oster {
556 1.1 oster suspended = RF_TRUE;
557 1.1 oster break;
558 1.1 oster }
559 1.1 oster }
560 1.1 oster
561 1.1 oster if (desc->type == RF_IO_TYPE_WRITE &&
562 1.1 oster raidPtr->status[asm_p->physInfo->row] == rf_rs_reconstructing)
563 1.1 oster {
564 1.1 oster if (! (asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED) ) {
565 1.1 oster int val;
566 1.1 oster
567 1.1 oster asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED;
568 1.1 oster /* XXX the cast below is quite bogus!!! XXX GO */
569 1.1 oster val = rf_ForceOrBlockRecon(raidPtr, asm_p,
570 1.1 oster (void (*)(RF_Raid_t *,void *))rf_ContinueRaidAccess, desc);
571 1.1 oster if (val == 0) {
572 1.1 oster asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED;
573 1.1 oster }
574 1.1 oster else {
575 1.1 oster suspended = RF_TRUE;
576 1.1 oster break;
577 1.1 oster }
578 1.1 oster }
579 1.1 oster else {
580 1.1 oster if (rf_pssDebug) {
581 1.1 oster printf("[%d] skipping force/block because already done, psid %ld\n",
582 1.1 oster desc->tid,(long)asm_p->stripeID);
583 1.1 oster }
584 1.1 oster }
585 1.1 oster }
586 1.1 oster else {
587 1.1 oster if (rf_pssDebug) {
588 1.1 oster printf("[%d] skipping force/block because not write or not under recon, psid %ld\n",
589 1.1 oster desc->tid,(long)asm_p->stripeID);
590 1.1 oster }
591 1.1 oster }
592 1.1 oster }
593 1.1 oster
594 1.1 oster RF_ETIMER_STOP(timer);
595 1.1 oster RF_ETIMER_EVAL(timer);
596 1.1 oster tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
597 1.1 oster
598 1.1 oster if (suspended)
599 1.1 oster return(RF_TRUE);
600 1.1 oster }
601 1.1 oster
602 1.1 oster desc->state++;
603 1.1 oster return(RF_FALSE);
604 1.1 oster }
605 1.1 oster
606 1.1 oster /*
607 1.1 oster * the following three states create, execute, and post-process dags
608 1.1 oster * the error recovery unit is a single dag.
609 1.1 oster * by default, SelectAlgorithm creates an array of dags, one per parity stripe
610 1.1 oster * in some tricky cases, multiple dags per stripe are created
611 1.1 oster * - dags within a parity stripe are executed sequentially (arbitrary order)
612 1.1 oster * - dags for distinct parity stripes are executed concurrently
613 1.1 oster *
614 1.1 oster * repeat until all dags complete successfully -or- dag selection fails
615 1.1 oster *
616 1.1 oster * while !done
617 1.1 oster * create dag(s) (SelectAlgorithm)
618 1.1 oster * if dag
619 1.1 oster * execute dag (DispatchDAG)
620 1.1 oster * if dag successful
621 1.1 oster * done (SUCCESS)
622 1.1 oster * else
623 1.1 oster * !done (RETRY - start over with new dags)
624 1.1 oster * else
625 1.1 oster * done (FAIL)
626 1.1 oster */
627 1.1 oster int rf_State_CreateDAG (RF_RaidAccessDesc_t *desc)
628 1.1 oster {
629 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
630 1.1 oster RF_Etimer_t timer;
631 1.1 oster RF_DagHeader_t *dag_h;
632 1.1 oster int i, selectStatus;
633 1.1 oster
634 1.1 oster /* generate a dag for the access, and fire it off. When the dag
635 1.1 oster completes, we'll get re-invoked in the next state. */
636 1.1 oster RF_ETIMER_START(timer);
637 1.1 oster /* SelectAlgorithm returns one or more dags */
638 1.1 oster selectStatus = rf_SelectAlgorithm(desc, desc->flags|RF_DAG_SUPPRESS_LOCKS);
639 1.1 oster if (rf_printDAGsDebug)
640 1.1 oster for (i = 0; i < desc->numStripes; i++)
641 1.1 oster rf_PrintDAGList(desc->dagArray[i].dags);
642 1.1 oster RF_ETIMER_STOP(timer);
643 1.1 oster RF_ETIMER_EVAL(timer);
644 1.1 oster /* update time to create all dags */
645 1.1 oster tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer);
646 1.1 oster
647 1.1 oster desc->status = 0; /* good status */
648 1.1 oster
649 1.1 oster if (selectStatus) {
650 1.1 oster /* failed to create a dag */
651 1.1 oster /* this happens when there are too many faults or incomplete dag libraries */
652 1.1 oster printf("[Failed to create a DAG\n]");
653 1.1 oster RF_PANIC();
654 1.1 oster }
655 1.1 oster else {
656 1.1 oster /* bind dags to desc */
657 1.1 oster for (i = 0; i < desc->numStripes; i++) {
658 1.1 oster dag_h = desc->dagArray[i].dags;
659 1.1 oster while (dag_h) {
660 1.1 oster #ifdef KERNEL
661 1.1 oster dag_h->bp = (struct buf *) desc->bp;
662 1.1 oster #endif /* KERNEL */
663 1.1 oster dag_h->tracerec = tracerec;
664 1.1 oster dag_h = dag_h->next;
665 1.1 oster }
666 1.1 oster }
667 1.1 oster desc->flags |= RF_DAG_DISPATCH_RETURNED;
668 1.1 oster desc->state++; /* next state should be rf_State_ExecuteDAG */
669 1.1 oster }
670 1.1 oster return RF_FALSE;
671 1.1 oster }
672 1.1 oster
673 1.1 oster
674 1.1 oster
675 1.1 oster /* the access has an array of dagLists, one dagList per parity stripe.
676 1.1 oster * fire the first dag in each parity stripe (dagList).
677 1.1 oster * dags within a stripe (dagList) must be executed sequentially
678 1.1 oster * - this preserves atomic parity update
679 1.1 oster * dags for independents parity groups (stripes) are fired concurrently */
680 1.1 oster
681 1.1 oster int rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc)
682 1.1 oster {
683 1.1 oster int i;
684 1.1 oster RF_DagHeader_t *dag_h;
685 1.1 oster RF_DagList_t *dagArray = desc->dagArray;
686 1.1 oster
687 1.1 oster /* next state is always rf_State_ProcessDAG
688 1.1 oster * important to do this before firing the first dag
689 1.1 oster * (it may finish before we leave this routine) */
690 1.1 oster desc->state++;
691 1.1 oster
692 1.1 oster /* sweep dag array, a stripe at a time, firing the first dag in each stripe */
693 1.1 oster for (i = 0; i < desc->numStripes; i++) {
694 1.1 oster RF_ASSERT(dagArray[i].numDags > 0);
695 1.1 oster RF_ASSERT(dagArray[i].numDagsDone == 0);
696 1.1 oster RF_ASSERT(dagArray[i].numDagsFired == 0);
697 1.1 oster RF_ETIMER_START(dagArray[i].tracerec.timer);
698 1.1 oster /* fire first dag in this stripe */
699 1.1 oster dag_h = dagArray[i].dags;
700 1.1 oster RF_ASSERT(dag_h);
701 1.1 oster dagArray[i].numDagsFired++;
702 1.1 oster /* XXX Yet another case where we pass in a conflicting function pointer
703 1.1 oster :-( XXX GO */
704 1.1 oster rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess, &dagArray[i]);
705 1.1 oster }
706 1.1 oster
707 1.1 oster /* the DAG will always call the callback, even if there was no
708 1.1 oster * blocking, so we are always suspended in this state */
709 1.1 oster return RF_TRUE;
710 1.1 oster }
711 1.1 oster
712 1.1 oster
713 1.1 oster
714 1.1 oster /* rf_State_ProcessDAG is entered when a dag completes.
715 1.1 oster * first, check to all dags in the access have completed
716 1.1 oster * if not, fire as many dags as possible */
717 1.1 oster
718 1.1 oster int rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc)
719 1.1 oster {
720 1.1 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap;
721 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
722 1.1 oster RF_DagHeader_t *dag_h;
723 1.1 oster int i, j, done = RF_TRUE;
724 1.1 oster RF_DagList_t *dagArray = desc->dagArray;
725 1.1 oster RF_Etimer_t timer;
726 1.1 oster
727 1.1 oster /* check to see if this is the last dag */
728 1.1 oster for (i = 0; i < desc->numStripes; i++)
729 1.1 oster if (dagArray[i].numDags != dagArray[i].numDagsDone)
730 1.1 oster done = RF_FALSE;
731 1.1 oster
732 1.1 oster if (done) {
733 1.1 oster if (desc->status) {
734 1.1 oster /* a dag failed, retry */
735 1.1 oster RF_ETIMER_START(timer);
736 1.1 oster /* free all dags */
737 1.1 oster for (i = 0; i < desc->numStripes; i++) {
738 1.1 oster rf_FreeDAG(desc->dagArray[i].dags);
739 1.1 oster }
740 1.1 oster rf_MarkFailuresInASMList(raidPtr, asmh);
741 1.1 oster /* back up to rf_State_CreateDAG */
742 1.1 oster desc->state = desc->state - 2;
743 1.1 oster return RF_FALSE;
744 1.1 oster }
745 1.1 oster else {
746 1.1 oster /* move on to rf_State_Cleanup */
747 1.1 oster desc->state++;
748 1.1 oster }
749 1.1 oster return RF_FALSE;
750 1.1 oster }
751 1.1 oster else {
752 1.1 oster /* more dags to execute */
753 1.1 oster /* see if any are ready to be fired. if so, fire them */
754 1.1 oster /* don't fire the initial dag in a list, it's fired in rf_State_ExecuteDAG */
755 1.1 oster for (i = 0; i < desc->numStripes; i++) {
756 1.1 oster if ((dagArray[i].numDagsDone < dagArray[i].numDags)
757 1.1 oster && (dagArray[i].numDagsDone == dagArray[i].numDagsFired)
758 1.1 oster && (dagArray[i].numDagsFired > 0)) {
759 1.1 oster RF_ETIMER_START(dagArray[i].tracerec.timer);
760 1.1 oster /* fire next dag in this stripe */
761 1.1 oster /* first, skip to next dag awaiting execution */
762 1.1 oster dag_h = dagArray[i].dags;
763 1.1 oster for (j = 0; j < dagArray[i].numDagsDone; j++)
764 1.1 oster dag_h = dag_h->next;
765 1.1 oster dagArray[i].numDagsFired++;
766 1.1 oster /* XXX and again we pass a different function pointer.. GO */
767 1.1 oster rf_DispatchDAG(dag_h, (void (*)(void *))rf_ContinueDagAccess,
768 1.1 oster &dagArray[i]);
769 1.1 oster }
770 1.1 oster }
771 1.1 oster return RF_TRUE;
772 1.1 oster }
773 1.1 oster }
774 1.1 oster
775 1.1 oster /* only make it this far if all dags complete successfully */
776 1.1 oster int rf_State_Cleanup(RF_RaidAccessDesc_t *desc)
777 1.1 oster {
778 1.1 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec;
779 1.1 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap;
780 1.1 oster RF_Raid_t *raidPtr = desc->raidPtr;
781 1.1 oster RF_AccessStripeMap_t *asm_p;
782 1.1 oster RF_DagHeader_t *dag_h;
783 1.1 oster RF_Etimer_t timer;
784 1.1 oster int tid, i;
785 1.1 oster
786 1.1 oster desc->state ++;
787 1.1 oster
788 1.1 oster rf_get_threadid(tid);
789 1.1 oster
790 1.1 oster timer = tracerec->timer;
791 1.1 oster RF_ETIMER_STOP(timer);
792 1.1 oster RF_ETIMER_EVAL(timer);
793 1.1 oster tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer);
794 1.1 oster
795 1.1 oster /* the RAID I/O is complete. Clean up. */
796 1.1 oster tracerec->specific.user.dag_retry_us = 0;
797 1.1 oster
798 1.1 oster RF_ETIMER_START(timer);
799 1.1 oster if (desc->flags & RF_DAG_RETURN_DAG) {
800 1.1 oster /* copy dags into paramDAG */
801 1.1 oster *(desc->paramDAG) = desc->dagArray[0].dags;
802 1.1 oster dag_h = *(desc->paramDAG);
803 1.1 oster for (i = 1; i < desc->numStripes; i++) {
804 1.1 oster /* concatenate dags from remaining stripes */
805 1.1 oster RF_ASSERT(dag_h);
806 1.1 oster while (dag_h->next)
807 1.1 oster dag_h = dag_h->next;
808 1.1 oster dag_h->next = desc->dagArray[i].dags;
809 1.1 oster }
810 1.1 oster }
811 1.1 oster else {
812 1.1 oster /* free all dags */
813 1.1 oster for (i = 0; i < desc->numStripes; i++) {
814 1.1 oster rf_FreeDAG(desc->dagArray[i].dags);
815 1.1 oster }
816 1.1 oster }
817 1.1 oster
818 1.1 oster RF_ETIMER_STOP(timer);
819 1.1 oster RF_ETIMER_EVAL(timer);
820 1.1 oster tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer);
821 1.1 oster
822 1.1 oster RF_ETIMER_START(timer);
823 1.1 oster if (!(raidPtr->Layout.map->flags & RF_NO_STRIPE_LOCKS)) {
824 1.1 oster for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) {
825 1.1 oster if (!rf_suppressLocksAndLargeWrites &&
826 1.1 oster asm_p->parityInfo &&
827 1.1 oster !(desc->flags&RF_DAG_SUPPRESS_LOCKS))
828 1.1 oster {
829 1.1 oster RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc);
830 1.1 oster rf_ReleaseStripeLock(raidPtr->lockTable, asm_p->stripeID,
831 1.1 oster &asm_p->lockReqDesc);
832 1.1 oster }
833 1.1 oster if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) {
834 1.1 oster rf_UnblockRecon(raidPtr, asm_p);
835 1.1 oster }
836 1.1 oster }
837 1.1 oster }
838 1.1 oster
839 1.1 oster #ifdef SIMULATE
840 1.1 oster /* refresh current owner in case blocked ios where allowed to run */
841 1.1 oster rf_SetCurrentOwner(desc->owner);
842 1.1 oster #endif /* SIMULATE */
843 1.1 oster
844 1.1 oster RF_ETIMER_STOP(timer);
845 1.1 oster RF_ETIMER_EVAL(timer);
846 1.1 oster tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer);
847 1.1 oster
848 1.1 oster RF_ETIMER_START(timer);
849 1.1 oster if (desc->flags & RF_DAG_RETURN_ASM)
850 1.1 oster *(desc->paramASM) = asmh;
851 1.1 oster else
852 1.1 oster rf_FreeAccessStripeMap(asmh);
853 1.1 oster RF_ETIMER_STOP(timer);
854 1.1 oster RF_ETIMER_EVAL(timer);
855 1.1 oster tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer);
856 1.1 oster
857 1.1 oster RF_ETIMER_STOP(desc->timer);
858 1.1 oster RF_ETIMER_EVAL(desc->timer);
859 1.1 oster
860 1.1 oster timer = desc->tracerec.tot_timer;
861 1.1 oster RF_ETIMER_STOP(timer);
862 1.1 oster RF_ETIMER_EVAL(timer);
863 1.1 oster desc->tracerec.total_us = RF_ETIMER_VAL_US(timer);
864 1.1 oster
865 1.1 oster rf_LogTraceRec(raidPtr, tracerec);
866 1.1 oster
867 1.1 oster desc->flags |= RF_DAG_ACCESS_COMPLETE;
868 1.1 oster
869 1.1 oster return RF_FALSE;
870 1.1 oster }
871