1 1.53 oster /* $NetBSD: rf_states.c,v 1.53 2021/07/23 02:35:14 oster Exp $ */ 2 1.1 oster /* 3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University. 4 1.1 oster * All rights reserved. 5 1.1 oster * 6 1.1 oster * Author: Mark Holland, William V. Courtright II, Robby Findler 7 1.1 oster * 8 1.1 oster * Permission to use, copy, modify and distribute this software and 9 1.1 oster * its documentation is hereby granted, provided that both the copyright 10 1.1 oster * notice and this permission notice appear in all copies of the 11 1.1 oster * software, derivative works or modified versions, and any portions 12 1.1 oster * thereof, and that both notices appear in supporting documentation. 13 1.1 oster * 14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 1.1 oster * 18 1.1 oster * Carnegie Mellon requests users of this software to return to 19 1.1 oster * 20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU 21 1.1 oster * School of Computer Science 22 1.1 oster * Carnegie Mellon University 23 1.1 oster * Pittsburgh PA 15213-3890 24 1.1 oster * 25 1.1 oster * any improvements or extensions that they make and grant Carnegie the 26 1.1 oster * rights to redistribute these changes. 27 1.1 oster */ 28 1.16 lukem 29 1.16 lukem #include <sys/cdefs.h> 30 1.53 oster __KERNEL_RCSID(0, "$NetBSD: rf_states.c,v 1.53 2021/07/23 02:35:14 oster Exp $"); 31 1.1 oster 32 1.1 oster #include <sys/errno.h> 33 1.1 oster 34 1.1 oster #include "rf_archs.h" 35 1.1 oster #include "rf_threadstuff.h" 36 1.1 oster #include "rf_raid.h" 37 1.1 oster #include "rf_dag.h" 38 1.1 oster #include "rf_desc.h" 39 1.1 oster #include "rf_aselect.h" 40 1.1 oster #include "rf_general.h" 41 1.1 oster #include "rf_states.h" 42 1.1 oster #include "rf_dagutils.h" 43 1.1 oster #include "rf_driver.h" 44 1.1 oster #include "rf_engine.h" 45 1.1 oster #include "rf_map.h" 46 1.1 oster #include "rf_etimer.h" 47 1.10 oster #include "rf_kintf.h" 48 1.44 jld #include "rf_paritymap.h" 49 1.1 oster 50 1.19 oster #ifndef RF_DEBUG_STATES 51 1.19 oster #define RF_DEBUG_STATES 0 52 1.19 oster #endif 53 1.19 oster 54 1.1 oster /* prototypes for some of the available states. 55 1.1 oster 56 1.1 oster States must: 57 1.1 oster 58 1.1 oster - not block. 59 1.1 oster 60 1.1 oster - either schedule rf_ContinueRaidAccess as a callback and return 61 1.1 oster RF_TRUE, or complete all of their work and return RF_FALSE. 62 1.1 oster 63 1.1 oster - increment desc->state when they have finished their work. 64 1.1 oster */ 65 1.1 oster 66 1.19 oster #if RF_DEBUG_STATES 67 1.6 oster static char * 68 1.6 oster StateName(RF_AccessState_t state) 69 1.1 oster { 70 1.6 oster switch (state) { 71 1.6 oster case rf_QuiesceState:return "QuiesceState"; 72 1.6 oster case rf_MapState: 73 1.6 oster return "MapState"; 74 1.6 oster case rf_LockState: 75 1.6 oster return "LockState"; 76 1.6 oster case rf_CreateDAGState: 77 1.6 oster return "CreateDAGState"; 78 1.6 oster case rf_ExecuteDAGState: 79 1.6 oster return "ExecuteDAGState"; 80 1.6 oster case rf_ProcessDAGState: 81 1.6 oster return "ProcessDAGState"; 82 1.6 oster case rf_CleanupState: 83 1.6 oster return "CleanupState"; 84 1.6 oster case rf_LastState: 85 1.6 oster return "LastState"; 86 1.6 oster case rf_IncrAccessesCountState: 87 1.6 oster return "IncrAccessesCountState"; 88 1.6 oster case rf_DecrAccessesCountState: 89 1.6 oster return "DecrAccessesCountState"; 90 1.6 oster default: 91 1.6 oster return "!!! UnnamedState !!!"; 92 1.6 oster } 93 1.6 oster } 94 1.19 oster #endif 95 1.6 oster 96 1.38 perry void 97 1.51 christos rf_ContinueRaidAccess(void *v) 98 1.6 oster { 99 1.51 christos RF_RaidAccessDesc_t *desc = v; 100 1.6 oster int suspended = RF_FALSE; 101 1.6 oster int current_state_index = desc->state; 102 1.6 oster RF_AccessState_t current_state = desc->states[current_state_index]; 103 1.19 oster #if RF_DEBUG_STATES 104 1.12 oster int unit = desc->raidPtr->raidid; 105 1.19 oster #endif 106 1.6 oster 107 1.6 oster do { 108 1.6 oster 109 1.6 oster current_state_index = desc->state; 110 1.6 oster current_state = desc->states[current_state_index]; 111 1.6 oster 112 1.6 oster switch (current_state) { 113 1.6 oster 114 1.6 oster case rf_QuiesceState: 115 1.6 oster suspended = rf_State_Quiesce(desc); 116 1.6 oster break; 117 1.6 oster case rf_IncrAccessesCountState: 118 1.6 oster suspended = rf_State_IncrAccessCount(desc); 119 1.6 oster break; 120 1.6 oster case rf_MapState: 121 1.6 oster suspended = rf_State_Map(desc); 122 1.6 oster break; 123 1.6 oster case rf_LockState: 124 1.6 oster suspended = rf_State_Lock(desc); 125 1.6 oster break; 126 1.6 oster case rf_CreateDAGState: 127 1.6 oster suspended = rf_State_CreateDAG(desc); 128 1.6 oster break; 129 1.6 oster case rf_ExecuteDAGState: 130 1.6 oster suspended = rf_State_ExecuteDAG(desc); 131 1.6 oster break; 132 1.6 oster case rf_ProcessDAGState: 133 1.6 oster suspended = rf_State_ProcessDAG(desc); 134 1.6 oster break; 135 1.6 oster case rf_CleanupState: 136 1.6 oster suspended = rf_State_Cleanup(desc); 137 1.6 oster break; 138 1.6 oster case rf_DecrAccessesCountState: 139 1.6 oster suspended = rf_State_DecrAccessCount(desc); 140 1.6 oster break; 141 1.6 oster case rf_LastState: 142 1.6 oster suspended = rf_State_LastState(desc); 143 1.6 oster break; 144 1.6 oster } 145 1.6 oster 146 1.23 oster /* after this point, we cannot dereference desc since 147 1.23 oster * desc may have been freed. desc is only freed in 148 1.23 oster * LastState, so if we renter this function or loop 149 1.23 oster * back up, desc should be valid. */ 150 1.6 oster 151 1.19 oster #if RF_DEBUG_STATES 152 1.6 oster if (rf_printStatesDebug) { 153 1.12 oster printf("raid%d: State: %-24s StateIndex: %3i desc: 0x%ld %s\n", 154 1.38 perry unit, StateName(current_state), 155 1.12 oster current_state_index, (long) desc, 156 1.12 oster suspended ? "callback scheduled" : "looping"); 157 1.6 oster } 158 1.19 oster #endif 159 1.6 oster } while (!suspended && current_state != rf_LastState); 160 1.6 oster 161 1.6 oster return; 162 1.6 oster } 163 1.6 oster 164 1.6 oster 165 1.38 perry void 166 1.22 oster rf_ContinueDagAccess(RF_DagList_t *dagList) 167 1.6 oster { 168 1.27 oster #if RF_ACC_TRACE > 0 169 1.6 oster RF_AccTraceEntry_t *tracerec = &(dagList->desc->tracerec); 170 1.28 oster RF_Etimer_t timer; 171 1.27 oster #endif 172 1.6 oster RF_RaidAccessDesc_t *desc; 173 1.6 oster RF_DagHeader_t *dag_h; 174 1.6 oster int i; 175 1.6 oster 176 1.6 oster desc = dagList->desc; 177 1.6 oster 178 1.27 oster #if RF_ACC_TRACE > 0 179 1.6 oster timer = tracerec->timer; 180 1.6 oster RF_ETIMER_STOP(timer); 181 1.6 oster RF_ETIMER_EVAL(timer); 182 1.6 oster tracerec->specific.user.exec_us = RF_ETIMER_VAL_US(timer); 183 1.6 oster RF_ETIMER_START(tracerec->timer); 184 1.27 oster #endif 185 1.6 oster 186 1.6 oster /* skip to dag which just finished */ 187 1.6 oster dag_h = dagList->dags; 188 1.6 oster for (i = 0; i < dagList->numDagsDone; i++) { 189 1.6 oster dag_h = dag_h->next; 190 1.6 oster } 191 1.6 oster 192 1.6 oster /* check to see if retry is required */ 193 1.6 oster if (dag_h->status == rf_rollBackward) { 194 1.23 oster /* when a dag fails, mark desc status as bad and allow 195 1.23 oster * all other dags in the desc to execute to 196 1.23 oster * completion. then, free all dags and start over */ 197 1.6 oster desc->status = 1; /* bad status */ 198 1.25 oster #if 0 199 1.25 oster printf("raid%d: DAG failure: %c addr 0x%lx " 200 1.25 oster "(%ld) nblk 0x%x (%d) buf 0x%lx state %d\n", 201 1.38 perry desc->raidPtr->raidid, desc->type, 202 1.23 oster (long) desc->raidAddress, 203 1.23 oster (long) desc->raidAddress, (int) desc->numBlocks, 204 1.38 perry (int) desc->numBlocks, 205 1.25 oster (unsigned long) (desc->bufPtr), desc->state); 206 1.25 oster #endif 207 1.6 oster } 208 1.6 oster dagList->numDagsDone++; 209 1.6 oster rf_ContinueRaidAccess(desc); 210 1.6 oster } 211 1.6 oster 212 1.38 perry int 213 1.22 oster rf_State_LastState(RF_RaidAccessDesc_t *desc) 214 1.1 oster { 215 1.51 christos void (*callbackFunc) (void *) = desc->callbackFunc; 216 1.51 christos void * callbackArg = desc->callbackArg; 217 1.38 perry 218 1.8 oster /* 219 1.44 jld * The parity_map hook has to go here, because the iodone 220 1.44 jld * callback goes straight into the kintf layer. 221 1.44 jld */ 222 1.44 jld if (desc->raidPtr->parity_map != NULL && 223 1.44 jld desc->type == RF_IO_TYPE_WRITE) 224 1.44 jld rf_paritymap_end(desc->raidPtr->parity_map, 225 1.44 jld desc->raidAddress, desc->numBlocks); 226 1.44 jld 227 1.50 mlelstv /* printf("Calling raiddone on 0x%x\n",desc->bp); */ 228 1.50 mlelstv raiddone(desc->raidPtr, desc->bp); /* access came through ioctl */ 229 1.3 explorer 230 1.6 oster if (callbackFunc) 231 1.6 oster callbackFunc(callbackArg); 232 1.6 oster rf_FreeRaidAccDesc(desc); 233 1.6 oster 234 1.6 oster return RF_FALSE; 235 1.6 oster } 236 1.6 oster 237 1.38 perry int 238 1.22 oster rf_State_IncrAccessCount(RF_RaidAccessDesc_t *desc) 239 1.6 oster { 240 1.6 oster RF_Raid_t *raidPtr; 241 1.6 oster 242 1.6 oster raidPtr = desc->raidPtr; 243 1.6 oster /* Bummer. We have to do this to be 100% safe w.r.t. the increment 244 1.6 oster * below */ 245 1.47 mrg rf_lock_mutex2(raidPtr->access_suspend_mutex); 246 1.6 oster raidPtr->accs_in_flight++; /* used to detect quiescence */ 247 1.47 mrg rf_unlock_mutex2(raidPtr->access_suspend_mutex); 248 1.6 oster 249 1.6 oster desc->state++; 250 1.6 oster return RF_FALSE; 251 1.6 oster } 252 1.6 oster 253 1.38 perry int 254 1.22 oster rf_State_DecrAccessCount(RF_RaidAccessDesc_t *desc) 255 1.6 oster { 256 1.6 oster RF_Raid_t *raidPtr; 257 1.6 oster 258 1.6 oster raidPtr = desc->raidPtr; 259 1.6 oster 260 1.47 mrg rf_lock_mutex2(raidPtr->access_suspend_mutex); 261 1.6 oster raidPtr->accs_in_flight--; 262 1.6 oster if (raidPtr->accesses_suspended && raidPtr->accs_in_flight == 0) { 263 1.20 oster rf_SignalQuiescenceLock(raidPtr); 264 1.6 oster } 265 1.47 mrg rf_unlock_mutex2(raidPtr->access_suspend_mutex); 266 1.6 oster 267 1.6 oster desc->state++; 268 1.6 oster return RF_FALSE; 269 1.6 oster } 270 1.6 oster 271 1.38 perry int 272 1.22 oster rf_State_Quiesce(RF_RaidAccessDesc_t *desc) 273 1.6 oster { 274 1.27 oster #if RF_ACC_TRACE > 0 275 1.6 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec; 276 1.6 oster RF_Etimer_t timer; 277 1.27 oster #endif 278 1.51 christos RF_CallbackFuncDesc_t *cb; 279 1.31 oster RF_Raid_t *raidPtr; 280 1.6 oster int suspended = RF_FALSE; 281 1.31 oster int need_cb, used_cb; 282 1.6 oster 283 1.6 oster raidPtr = desc->raidPtr; 284 1.6 oster 285 1.27 oster #if RF_ACC_TRACE > 0 286 1.6 oster RF_ETIMER_START(timer); 287 1.6 oster RF_ETIMER_START(desc->timer); 288 1.27 oster #endif 289 1.6 oster 290 1.31 oster need_cb = 0; 291 1.31 oster used_cb = 0; 292 1.31 oster cb = NULL; 293 1.31 oster 294 1.47 mrg rf_lock_mutex2(raidPtr->access_suspend_mutex); 295 1.31 oster /* Do an initial check to see if we might need a callback structure */ 296 1.6 oster if (raidPtr->accesses_suspended) { 297 1.31 oster need_cb = 1; 298 1.31 oster } 299 1.47 mrg rf_unlock_mutex2(raidPtr->access_suspend_mutex); 300 1.31 oster 301 1.31 oster if (need_cb) { 302 1.31 oster /* create a callback if we might need it... 303 1.31 oster and we likely do. */ 304 1.52 oster cb = rf_AllocCallbackFuncDesc(raidPtr); 305 1.31 oster } 306 1.23 oster 307 1.47 mrg rf_lock_mutex2(raidPtr->access_suspend_mutex); 308 1.31 oster if (raidPtr->accesses_suspended) { 309 1.51 christos cb->callbackFunc = rf_ContinueRaidAccess; 310 1.51 christos cb->callbackArg = desc; 311 1.6 oster cb->next = raidPtr->quiesce_wait_list; 312 1.6 oster raidPtr->quiesce_wait_list = cb; 313 1.6 oster suspended = RF_TRUE; 314 1.31 oster used_cb = 1; 315 1.6 oster } 316 1.47 mrg rf_unlock_mutex2(raidPtr->access_suspend_mutex); 317 1.6 oster 318 1.31 oster if ((need_cb == 1) && (used_cb == 0)) { 319 1.52 oster rf_FreeCallbackFuncDesc(raidPtr, cb); 320 1.31 oster } 321 1.31 oster 322 1.27 oster #if RF_ACC_TRACE > 0 323 1.6 oster RF_ETIMER_STOP(timer); 324 1.6 oster RF_ETIMER_EVAL(timer); 325 1.6 oster tracerec->specific.user.suspend_ovhd_us += RF_ETIMER_VAL_US(timer); 326 1.27 oster #endif 327 1.6 oster 328 1.18 oster #if RF_DEBUG_QUIESCE 329 1.6 oster if (suspended && rf_quiesceDebug) 330 1.6 oster printf("Stalling access due to quiescence lock\n"); 331 1.18 oster #endif 332 1.6 oster desc->state++; 333 1.6 oster return suspended; 334 1.6 oster } 335 1.6 oster 336 1.38 perry int 337 1.22 oster rf_State_Map(RF_RaidAccessDesc_t *desc) 338 1.6 oster { 339 1.6 oster RF_Raid_t *raidPtr = desc->raidPtr; 340 1.27 oster #if RF_ACC_TRACE > 0 341 1.6 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec; 342 1.6 oster RF_Etimer_t timer; 343 1.6 oster 344 1.6 oster RF_ETIMER_START(timer); 345 1.27 oster #endif 346 1.6 oster 347 1.6 oster if (!(desc->asmap = rf_MapAccess(raidPtr, desc->raidAddress, desc->numBlocks, 348 1.6 oster desc->bufPtr, RF_DONT_REMAP))) 349 1.6 oster RF_PANIC(); 350 1.6 oster 351 1.27 oster #if RF_ACC_TRACE > 0 352 1.6 oster RF_ETIMER_STOP(timer); 353 1.6 oster RF_ETIMER_EVAL(timer); 354 1.6 oster tracerec->specific.user.map_us = RF_ETIMER_VAL_US(timer); 355 1.27 oster #endif 356 1.6 oster 357 1.6 oster desc->state++; 358 1.6 oster return RF_FALSE; 359 1.6 oster } 360 1.6 oster 361 1.38 perry int 362 1.22 oster rf_State_Lock(RF_RaidAccessDesc_t *desc) 363 1.6 oster { 364 1.27 oster #if RF_ACC_TRACE > 0 365 1.6 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec; 366 1.28 oster RF_Etimer_t timer; 367 1.27 oster #endif 368 1.6 oster RF_Raid_t *raidPtr = desc->raidPtr; 369 1.6 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap; 370 1.6 oster RF_AccessStripeMap_t *asm_p; 371 1.32 oster RF_StripeNum_t lastStripeID = -1; 372 1.6 oster int suspended = RF_FALSE; 373 1.6 oster 374 1.27 oster #if RF_ACC_TRACE > 0 375 1.6 oster RF_ETIMER_START(timer); 376 1.27 oster #endif 377 1.38 perry 378 1.32 oster /* acquire each lock that we don't already hold */ 379 1.32 oster for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { 380 1.32 oster RF_ASSERT(RF_IO_IS_R_OR_W(desc->type)); 381 1.32 oster if (!rf_suppressLocksAndLargeWrites && 382 1.32 oster asm_p->parityInfo && 383 1.32 oster !(desc->flags & RF_DAG_SUPPRESS_LOCKS) && 384 1.32 oster !(asm_p->flags & RF_ASM_FLAGS_LOCK_TRIED)) { 385 1.32 oster asm_p->flags |= RF_ASM_FLAGS_LOCK_TRIED; 386 1.23 oster /* locks must be acquired hierarchically */ 387 1.32 oster RF_ASSERT(asm_p->stripeID > lastStripeID); 388 1.32 oster lastStripeID = asm_p->stripeID; 389 1.38 perry 390 1.32 oster RF_INIT_LOCK_REQ_DESC(asm_p->lockReqDesc, desc->type, 391 1.51 christos rf_ContinueRaidAccess, desc, asm_p, 392 1.32 oster raidPtr->Layout.dataSectorsPerStripe); 393 1.52 oster if (rf_AcquireStripeLock(raidPtr, raidPtr->lockTable, asm_p->stripeID, 394 1.32 oster &asm_p->lockReqDesc)) { 395 1.32 oster suspended = RF_TRUE; 396 1.32 oster break; 397 1.32 oster } 398 1.32 oster } 399 1.32 oster if (desc->type == RF_IO_TYPE_WRITE && 400 1.32 oster raidPtr->status == rf_rs_reconstructing) { 401 1.32 oster if (!(asm_p->flags & RF_ASM_FLAGS_FORCE_TRIED)) { 402 1.32 oster int val; 403 1.38 perry 404 1.32 oster asm_p->flags |= RF_ASM_FLAGS_FORCE_TRIED; 405 1.32 oster val = rf_ForceOrBlockRecon(raidPtr, asm_p, 406 1.51 christos rf_ContinueRaidAccess, desc); 407 1.32 oster if (val == 0) { 408 1.32 oster asm_p->flags |= RF_ASM_FLAGS_RECON_BLOCKED; 409 1.32 oster } else { 410 1.6 oster suspended = RF_TRUE; 411 1.6 oster break; 412 1.6 oster } 413 1.6 oster } else { 414 1.29 oster #if RF_DEBUG_PSS > 0 415 1.6 oster if (rf_pssDebug) { 416 1.32 oster printf("raid%d: skipping force/block because already done, psid %ld\n", 417 1.38 perry desc->raidPtr->raidid, 418 1.13 oster (long) asm_p->stripeID); 419 1.6 oster } 420 1.29 oster #endif 421 1.6 oster } 422 1.32 oster } else { 423 1.32 oster #if RF_DEBUG_PSS > 0 424 1.32 oster if (rf_pssDebug) { 425 1.32 oster printf("raid%d: skipping force/block because not write or not under recon, psid %ld\n", 426 1.38 perry desc->raidPtr->raidid, 427 1.32 oster (long) asm_p->stripeID); 428 1.32 oster } 429 1.32 oster #endif 430 1.6 oster } 431 1.32 oster } 432 1.27 oster #if RF_ACC_TRACE > 0 433 1.32 oster RF_ETIMER_STOP(timer); 434 1.32 oster RF_ETIMER_EVAL(timer); 435 1.32 oster tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer); 436 1.27 oster #endif 437 1.32 oster if (suspended) 438 1.32 oster return (RF_TRUE); 439 1.32 oster 440 1.6 oster desc->state++; 441 1.6 oster return (RF_FALSE); 442 1.1 oster } 443 1.1 oster /* 444 1.1 oster * the following three states create, execute, and post-process dags 445 1.1 oster * the error recovery unit is a single dag. 446 1.1 oster * by default, SelectAlgorithm creates an array of dags, one per parity stripe 447 1.1 oster * in some tricky cases, multiple dags per stripe are created 448 1.1 oster * - dags within a parity stripe are executed sequentially (arbitrary order) 449 1.1 oster * - dags for distinct parity stripes are executed concurrently 450 1.1 oster * 451 1.1 oster * repeat until all dags complete successfully -or- dag selection fails 452 1.1 oster * 453 1.1 oster * while !done 454 1.1 oster * create dag(s) (SelectAlgorithm) 455 1.1 oster * if dag 456 1.1 oster * execute dag (DispatchDAG) 457 1.1 oster * if dag successful 458 1.1 oster * done (SUCCESS) 459 1.1 oster * else 460 1.1 oster * !done (RETRY - start over with new dags) 461 1.1 oster * else 462 1.1 oster * done (FAIL) 463 1.1 oster */ 464 1.38 perry int 465 1.22 oster rf_State_CreateDAG(RF_RaidAccessDesc_t *desc) 466 1.1 oster { 467 1.27 oster #if RF_ACC_TRACE > 0 468 1.6 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec; 469 1.6 oster RF_Etimer_t timer; 470 1.27 oster #endif 471 1.6 oster RF_DagHeader_t *dag_h; 472 1.26 oster RF_DagList_t *dagList; 473 1.25 oster struct buf *bp; 474 1.6 oster int i, selectStatus; 475 1.6 oster 476 1.6 oster /* generate a dag for the access, and fire it off. When the dag 477 1.6 oster * completes, we'll get re-invoked in the next state. */ 478 1.27 oster #if RF_ACC_TRACE > 0 479 1.6 oster RF_ETIMER_START(timer); 480 1.27 oster #endif 481 1.6 oster /* SelectAlgorithm returns one or more dags */ 482 1.6 oster selectStatus = rf_SelectAlgorithm(desc, desc->flags | RF_DAG_SUPPRESS_LOCKS); 483 1.17 oster #if RF_DEBUG_VALIDATE_DAG 484 1.26 oster if (rf_printDAGsDebug) { 485 1.26 oster dagList = desc->dagList; 486 1.26 oster for (i = 0; i < desc->numStripes; i++) { 487 1.42 oster rf_PrintDAGList(dagList->dags); 488 1.26 oster dagList = dagList->next; 489 1.26 oster } 490 1.26 oster } 491 1.17 oster #endif /* RF_DEBUG_VALIDATE_DAG */ 492 1.27 oster #if RF_ACC_TRACE > 0 493 1.6 oster RF_ETIMER_STOP(timer); 494 1.6 oster RF_ETIMER_EVAL(timer); 495 1.6 oster /* update time to create all dags */ 496 1.6 oster tracerec->specific.user.dag_create_us = RF_ETIMER_VAL_US(timer); 497 1.27 oster #endif 498 1.6 oster 499 1.6 oster desc->status = 0; /* good status */ 500 1.6 oster 501 1.36 oster if (selectStatus || (desc->numRetries > RF_RETRY_THRESHOLD)) { 502 1.6 oster /* failed to create a dag */ 503 1.6 oster /* this happens when there are too many faults or incomplete 504 1.6 oster * dag libraries */ 505 1.36 oster if (selectStatus) { 506 1.36 oster printf("raid%d: failed to create a dag. " 507 1.38 perry "Too many component failures.\n", 508 1.36 oster desc->raidPtr->raidid); 509 1.36 oster } else { 510 1.36 oster printf("raid%d: IO failed after %d retries.\n", 511 1.36 oster desc->raidPtr->raidid, RF_RETRY_THRESHOLD); 512 1.36 oster } 513 1.25 oster 514 1.38 perry desc->status = 1; /* bad status */ 515 1.25 oster /* skip straight to rf_State_Cleanup() */ 516 1.25 oster desc->state = rf_CleanupState; 517 1.25 oster bp = (struct buf *)desc->bp; 518 1.25 oster bp->b_error = EIO; 519 1.43 oster bp->b_resid = bp->b_bcount; 520 1.6 oster } else { 521 1.6 oster /* bind dags to desc */ 522 1.26 oster dagList = desc->dagList; 523 1.6 oster for (i = 0; i < desc->numStripes; i++) { 524 1.26 oster dag_h = dagList->dags; 525 1.6 oster while (dag_h) { 526 1.6 oster dag_h->bp = (struct buf *) desc->bp; 527 1.27 oster #if RF_ACC_TRACE > 0 528 1.6 oster dag_h->tracerec = tracerec; 529 1.27 oster #endif 530 1.6 oster dag_h = dag_h->next; 531 1.6 oster } 532 1.26 oster dagList = dagList->next; 533 1.6 oster } 534 1.6 oster desc->flags |= RF_DAG_DISPATCH_RETURNED; 535 1.6 oster desc->state++; /* next state should be rf_State_ExecuteDAG */ 536 1.6 oster } 537 1.6 oster return RF_FALSE; 538 1.1 oster } 539 1.1 oster 540 1.1 oster 541 1.1 oster 542 1.26 oster /* the access has an list of dagLists, one dagList per parity stripe. 543 1.1 oster * fire the first dag in each parity stripe (dagList). 544 1.1 oster * dags within a stripe (dagList) must be executed sequentially 545 1.1 oster * - this preserves atomic parity update 546 1.1 oster * dags for independents parity groups (stripes) are fired concurrently */ 547 1.1 oster 548 1.38 perry int 549 1.22 oster rf_State_ExecuteDAG(RF_RaidAccessDesc_t *desc) 550 1.1 oster { 551 1.6 oster int i; 552 1.6 oster RF_DagHeader_t *dag_h; 553 1.26 oster RF_DagList_t *dagList; 554 1.6 oster 555 1.23 oster /* next state is always rf_State_ProcessDAG important to do 556 1.23 oster * this before firing the first dag (it may finish before we 557 1.23 oster * leave this routine) */ 558 1.6 oster desc->state++; 559 1.6 oster 560 1.23 oster /* sweep dag array, a stripe at a time, firing the first dag 561 1.23 oster * in each stripe */ 562 1.26 oster dagList = desc->dagList; 563 1.6 oster for (i = 0; i < desc->numStripes; i++) { 564 1.26 oster RF_ASSERT(dagList->numDags > 0); 565 1.26 oster RF_ASSERT(dagList->numDagsDone == 0); 566 1.26 oster RF_ASSERT(dagList->numDagsFired == 0); 567 1.27 oster #if RF_ACC_TRACE > 0 568 1.26 oster RF_ETIMER_START(dagList->tracerec.timer); 569 1.27 oster #endif 570 1.6 oster /* fire first dag in this stripe */ 571 1.26 oster dag_h = dagList->dags; 572 1.6 oster RF_ASSERT(dag_h); 573 1.26 oster dagList->numDagsFired++; 574 1.26 oster rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, dagList); 575 1.26 oster dagList = dagList->next; 576 1.6 oster } 577 1.6 oster 578 1.6 oster /* the DAG will always call the callback, even if there was no 579 1.6 oster * blocking, so we are always suspended in this state */ 580 1.6 oster return RF_TRUE; 581 1.1 oster } 582 1.1 oster 583 1.1 oster 584 1.1 oster 585 1.1 oster /* rf_State_ProcessDAG is entered when a dag completes. 586 1.1 oster * first, check to all dags in the access have completed 587 1.1 oster * if not, fire as many dags as possible */ 588 1.1 oster 589 1.38 perry int 590 1.22 oster rf_State_ProcessDAG(RF_RaidAccessDesc_t *desc) 591 1.1 oster { 592 1.6 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap; 593 1.6 oster RF_Raid_t *raidPtr = desc->raidPtr; 594 1.6 oster RF_DagHeader_t *dag_h; 595 1.6 oster int i, j, done = RF_TRUE; 596 1.26 oster RF_DagList_t *dagList, *temp; 597 1.6 oster 598 1.6 oster /* check to see if this is the last dag */ 599 1.26 oster dagList = desc->dagList; 600 1.26 oster for (i = 0; i < desc->numStripes; i++) { 601 1.26 oster if (dagList->numDags != dagList->numDagsDone) 602 1.6 oster done = RF_FALSE; 603 1.26 oster dagList = dagList->next; 604 1.26 oster } 605 1.6 oster 606 1.6 oster if (done) { 607 1.6 oster if (desc->status) { 608 1.6 oster /* a dag failed, retry */ 609 1.6 oster /* free all dags */ 610 1.26 oster dagList = desc->dagList; 611 1.6 oster for (i = 0; i < desc->numStripes; i++) { 612 1.26 oster rf_FreeDAG(dagList->dags); 613 1.26 oster temp = dagList; 614 1.35 oster dagList = dagList->next; 615 1.52 oster rf_FreeDAGList(raidPtr, temp); 616 1.6 oster } 617 1.37 oster desc->dagList = NULL; 618 1.37 oster 619 1.6 oster rf_MarkFailuresInASMList(raidPtr, asmh); 620 1.36 oster 621 1.36 oster /* note the retry so that we'll bail in 622 1.36 oster rf_State_CreateDAG() once we've retired 623 1.36 oster the IO RF_RETRY_THRESHOLD times */ 624 1.36 oster 625 1.36 oster desc->numRetries++; 626 1.36 oster 627 1.6 oster /* back up to rf_State_CreateDAG */ 628 1.6 oster desc->state = desc->state - 2; 629 1.6 oster return RF_FALSE; 630 1.6 oster } else { 631 1.6 oster /* move on to rf_State_Cleanup */ 632 1.6 oster desc->state++; 633 1.6 oster } 634 1.6 oster return RF_FALSE; 635 1.6 oster } else { 636 1.6 oster /* more dags to execute */ 637 1.6 oster /* see if any are ready to be fired. if so, fire them */ 638 1.6 oster /* don't fire the initial dag in a list, it's fired in 639 1.6 oster * rf_State_ExecuteDAG */ 640 1.26 oster dagList = desc->dagList; 641 1.6 oster for (i = 0; i < desc->numStripes; i++) { 642 1.26 oster if ((dagList->numDagsDone < dagList->numDags) 643 1.26 oster && (dagList->numDagsDone == dagList->numDagsFired) 644 1.26 oster && (dagList->numDagsFired > 0)) { 645 1.27 oster #if RF_ACC_TRACE > 0 646 1.26 oster RF_ETIMER_START(dagList->tracerec.timer); 647 1.27 oster #endif 648 1.6 oster /* fire next dag in this stripe */ 649 1.6 oster /* first, skip to next dag awaiting execution */ 650 1.26 oster dag_h = dagList->dags; 651 1.26 oster for (j = 0; j < dagList->numDagsDone; j++) 652 1.6 oster dag_h = dag_h->next; 653 1.26 oster dagList->numDagsFired++; 654 1.6 oster rf_DispatchDAG(dag_h, (void (*) (void *)) rf_ContinueDagAccess, 655 1.26 oster dagList); 656 1.6 oster } 657 1.26 oster dagList = dagList->next; 658 1.6 oster } 659 1.6 oster return RF_TRUE; 660 1.6 oster } 661 1.1 oster } 662 1.1 oster /* only make it this far if all dags complete successfully */ 663 1.38 perry int 664 1.22 oster rf_State_Cleanup(RF_RaidAccessDesc_t *desc) 665 1.1 oster { 666 1.27 oster #if RF_ACC_TRACE > 0 667 1.6 oster RF_AccTraceEntry_t *tracerec = &desc->tracerec; 668 1.28 oster RF_Etimer_t timer; 669 1.27 oster #endif 670 1.6 oster RF_AccessStripeMapHeader_t *asmh = desc->asmap; 671 1.6 oster RF_Raid_t *raidPtr = desc->raidPtr; 672 1.6 oster RF_AccessStripeMap_t *asm_p; 673 1.26 oster RF_DagList_t *dagList; 674 1.11 oster int i; 675 1.6 oster 676 1.6 oster desc->state++; 677 1.6 oster 678 1.27 oster #if RF_ACC_TRACE > 0 679 1.6 oster timer = tracerec->timer; 680 1.6 oster RF_ETIMER_STOP(timer); 681 1.6 oster RF_ETIMER_EVAL(timer); 682 1.6 oster tracerec->specific.user.dag_retry_us = RF_ETIMER_VAL_US(timer); 683 1.6 oster 684 1.6 oster /* the RAID I/O is complete. Clean up. */ 685 1.6 oster tracerec->specific.user.dag_retry_us = 0; 686 1.6 oster 687 1.6 oster RF_ETIMER_START(timer); 688 1.27 oster #endif 689 1.24 oster /* free all dags */ 690 1.26 oster dagList = desc->dagList; 691 1.24 oster for (i = 0; i < desc->numStripes; i++) { 692 1.26 oster rf_FreeDAG(dagList->dags); 693 1.26 oster dagList = dagList->next; 694 1.6 oster } 695 1.27 oster #if RF_ACC_TRACE > 0 696 1.6 oster RF_ETIMER_STOP(timer); 697 1.6 oster RF_ETIMER_EVAL(timer); 698 1.6 oster tracerec->specific.user.cleanup_us = RF_ETIMER_VAL_US(timer); 699 1.6 oster 700 1.6 oster RF_ETIMER_START(timer); 701 1.27 oster #endif 702 1.32 oster for (asm_p = asmh->stripeMap; asm_p; asm_p = asm_p->next) { 703 1.32 oster if (!rf_suppressLocksAndLargeWrites && 704 1.32 oster asm_p->parityInfo && 705 1.32 oster !(desc->flags & RF_DAG_SUPPRESS_LOCKS)) { 706 1.32 oster RF_ASSERT_VALID_LOCKREQ(&asm_p->lockReqDesc); 707 1.52 oster rf_ReleaseStripeLock(raidPtr, 708 1.52 oster raidPtr->lockTable, 709 1.32 oster asm_p->stripeID, 710 1.32 oster &asm_p->lockReqDesc); 711 1.32 oster } 712 1.32 oster if (asm_p->flags & RF_ASM_FLAGS_RECON_BLOCKED) { 713 1.32 oster rf_UnblockRecon(raidPtr, asm_p); 714 1.6 oster } 715 1.6 oster } 716 1.27 oster #if RF_ACC_TRACE > 0 717 1.6 oster RF_ETIMER_STOP(timer); 718 1.6 oster RF_ETIMER_EVAL(timer); 719 1.6 oster tracerec->specific.user.lock_us += RF_ETIMER_VAL_US(timer); 720 1.6 oster 721 1.6 oster RF_ETIMER_START(timer); 722 1.27 oster #endif 723 1.52 oster rf_FreeAccessStripeMap(raidPtr, asmh); 724 1.27 oster #if RF_ACC_TRACE > 0 725 1.6 oster RF_ETIMER_STOP(timer); 726 1.6 oster RF_ETIMER_EVAL(timer); 727 1.6 oster tracerec->specific.user.cleanup_us += RF_ETIMER_VAL_US(timer); 728 1.6 oster 729 1.6 oster RF_ETIMER_STOP(desc->timer); 730 1.6 oster RF_ETIMER_EVAL(desc->timer); 731 1.6 oster 732 1.6 oster timer = desc->tracerec.tot_timer; 733 1.6 oster RF_ETIMER_STOP(timer); 734 1.6 oster RF_ETIMER_EVAL(timer); 735 1.6 oster desc->tracerec.total_us = RF_ETIMER_VAL_US(timer); 736 1.1 oster 737 1.6 oster rf_LogTraceRec(raidPtr, tracerec); 738 1.27 oster #endif 739 1.6 oster desc->flags |= RF_DAG_ACCESS_COMPLETE; 740 1.1 oster 741 1.6 oster return RF_FALSE; 742 1.1 oster } 743