rf_dagutils.c revision 1.1 1 1.1 oster /* $NetBSD: rf_dagutils.c,v 1.1 1998/11/13 04:20:28 oster Exp $ */
2 1.1 oster /*
3 1.1 oster * Copyright (c) 1995 Carnegie-Mellon University.
4 1.1 oster * All rights reserved.
5 1.1 oster *
6 1.1 oster * Authors: Mark Holland, William V. Courtright II, Jim Zelenka
7 1.1 oster *
8 1.1 oster * Permission to use, copy, modify and distribute this software and
9 1.1 oster * its documentation is hereby granted, provided that both the copyright
10 1.1 oster * notice and this permission notice appear in all copies of the
11 1.1 oster * software, derivative works or modified versions, and any portions
12 1.1 oster * thereof, and that both notices appear in supporting documentation.
13 1.1 oster *
14 1.1 oster * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 1.1 oster * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 1.1 oster * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 1.1 oster *
18 1.1 oster * Carnegie Mellon requests users of this software to return to
19 1.1 oster *
20 1.1 oster * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
21 1.1 oster * School of Computer Science
22 1.1 oster * Carnegie Mellon University
23 1.1 oster * Pittsburgh PA 15213-3890
24 1.1 oster *
25 1.1 oster * any improvements or extensions that they make and grant Carnegie the
26 1.1 oster * rights to redistribute these changes.
27 1.1 oster */
28 1.1 oster
29 1.1 oster /******************************************************************************
30 1.1 oster *
31 1.1 oster * rf_dagutils.c -- utility routines for manipulating dags
32 1.1 oster *
33 1.1 oster *****************************************************************************/
34 1.1 oster
35 1.1 oster /*
36 1.1 oster * :
37 1.1 oster * Log: rf_dagutils.c,v
38 1.1 oster * Revision 1.55 1996/08/22 14:39:47 jimz
39 1.1 oster * reduce v/k fraction (better load balancing)
40 1.1 oster *
41 1.1 oster * Revision 1.54 1996/08/21 04:14:12 jimz
42 1.1 oster * minor workload shift tweaking
43 1.1 oster *
44 1.1 oster * Revision 1.53 1996/08/20 23:41:16 jimz
45 1.1 oster * fix up workload shift computation
46 1.1 oster *
47 1.1 oster * Revision 1.52 1996/08/20 22:34:16 jimz
48 1.1 oster * first cut at fixing workload shift
49 1.1 oster * needs work
50 1.1 oster *
51 1.1 oster * Revision 1.51 1996/08/20 16:51:16 jimz
52 1.1 oster * comment more verbosely compute_workload_shift()
53 1.1 oster *
54 1.1 oster * Revision 1.50 1996/08/11 00:40:50 jimz
55 1.1 oster * fix up broken comment
56 1.1 oster *
57 1.1 oster * Revision 1.49 1996/07/27 23:36:08 jimz
58 1.1 oster * Solaris port of simulator
59 1.1 oster *
60 1.1 oster * Revision 1.48 1996/07/27 18:40:01 jimz
61 1.1 oster * cleanup sweep
62 1.1 oster *
63 1.1 oster * Revision 1.47 1996/07/22 19:52:16 jimz
64 1.1 oster * switched node params to RF_DagParam_t, a union of
65 1.1 oster * a 64-bit int and a void *, for better portability
66 1.1 oster * attempted hpux port, but failed partway through for
67 1.1 oster * lack of a single C compiler capable of compiling all
68 1.1 oster * source files
69 1.1 oster *
70 1.1 oster * Revision 1.46 1996/07/18 22:57:14 jimz
71 1.1 oster * port simulator to AIX
72 1.1 oster *
73 1.1 oster * Revision 1.45 1996/06/17 03:24:59 jimz
74 1.1 oster * include shutdown.h for define of now-macroized ShutdownCreate
75 1.1 oster *
76 1.1 oster * Revision 1.44 1996/06/10 12:50:57 jimz
77 1.1 oster * Add counters to freelists to track number of allocations, frees,
78 1.1 oster * grows, max size, etc. Adjust a couple sets of PRIME params based
79 1.1 oster * on the results.
80 1.1 oster *
81 1.1 oster * Revision 1.43 1996/06/10 11:55:47 jimz
82 1.1 oster * Straightened out some per-array/not-per-array distinctions, fixed
83 1.1 oster * a couple bugs related to confusion. Added shutdown lists. Removed
84 1.1 oster * layout shutdown function (now subsumed by shutdown lists).
85 1.1 oster *
86 1.1 oster * Revision 1.42 1996/06/07 21:33:04 jimz
87 1.1 oster * begin using consistent types for sector numbers,
88 1.1 oster * stripe numbers, row+col numbers, recon unit numbers
89 1.1 oster *
90 1.1 oster * Revision 1.41 1996/06/06 17:28:58 jimz
91 1.1 oster * make PrintNodeInfoString aware of new mirroring funcs
92 1.1 oster *
93 1.1 oster * Revision 1.40 1996/06/05 18:06:02 jimz
94 1.1 oster * Major code cleanup. The Great Renaming is now done.
95 1.1 oster * Better modularity. Better typing. Fixed a bunch of
96 1.1 oster * synchronization bugs. Made a lot of global stuff
97 1.1 oster * per-desc or per-array. Removed dead code.
98 1.1 oster *
99 1.1 oster * Revision 1.39 1996/06/03 23:28:26 jimz
100 1.1 oster * more bugfixes
101 1.1 oster * check in tree to sync for IPDS runs with current bugfixes
102 1.1 oster * there still may be a problem with threads in the script test
103 1.1 oster * getting I/Os stuck- not trivially reproducible (runs ~50 times
104 1.1 oster * in a row without getting stuck)
105 1.1 oster *
106 1.1 oster * Revision 1.38 1996/06/02 17:31:48 jimz
107 1.1 oster * Moved a lot of global stuff into array structure, where it belongs.
108 1.1 oster * Fixed up paritylogging, pss modules in this manner. Some general
109 1.1 oster * code cleanup. Removed lots of dead code, some dead files.
110 1.1 oster *
111 1.1 oster * Revision 1.37 1996/05/31 22:26:54 jimz
112 1.1 oster * fix a lot of mapping problems, memory allocation problems
113 1.1 oster * found some weird lock issues, fixed 'em
114 1.1 oster * more code cleanup
115 1.1 oster *
116 1.1 oster * Revision 1.36 1996/05/30 23:22:16 jimz
117 1.1 oster * bugfixes of serialization, timing problems
118 1.1 oster * more cleanup
119 1.1 oster *
120 1.1 oster * Revision 1.35 1996/05/30 11:29:41 jimz
121 1.1 oster * Numerous bug fixes. Stripe lock release code disagreed with the taking code
122 1.1 oster * about when stripes should be locked (I made it consistent: no parity, no lock)
123 1.1 oster * There was a lot of extra serialization of I/Os which I've removed- a lot of
124 1.1 oster * it was to calculate values for the cache code, which is no longer with us.
125 1.1 oster * More types, function, macro cleanup. Added code to properly quiesce the array
126 1.1 oster * on shutdown. Made a lot of stuff array-specific which was (bogusly) general
127 1.1 oster * before. Fixed memory allocation, freeing bugs.
128 1.1 oster *
129 1.1 oster * Revision 1.34 1996/05/27 18:56:37 jimz
130 1.1 oster * more code cleanup
131 1.1 oster * better typing
132 1.1 oster * compiles in all 3 environments
133 1.1 oster *
134 1.1 oster * Revision 1.33 1996/05/24 22:17:04 jimz
135 1.1 oster * continue code + namespace cleanup
136 1.1 oster * typed a bunch of flags
137 1.1 oster *
138 1.1 oster * Revision 1.32 1996/05/24 04:28:55 jimz
139 1.1 oster * release cleanup ckpt
140 1.1 oster *
141 1.1 oster * Revision 1.31 1996/05/23 21:46:35 jimz
142 1.1 oster * checkpoint in code cleanup (release prep)
143 1.1 oster * lots of types, function names have been fixed
144 1.1 oster *
145 1.1 oster * Revision 1.30 1996/05/23 00:33:23 jimz
146 1.1 oster * code cleanup: move all debug decls to rf_options.c, all extern
147 1.1 oster * debug decls to rf_options.h, all debug vars preceded by rf_
148 1.1 oster *
149 1.1 oster * Revision 1.29 1996/05/18 19:51:34 jimz
150 1.1 oster * major code cleanup- fix syntax, make some types consistent,
151 1.1 oster * add prototypes, clean out dead code, et cetera
152 1.1 oster *
153 1.1 oster * Revision 1.28 1996/05/16 23:05:52 jimz
154 1.1 oster * changed InitNode() to use dag_ptrs field of node when appropriate
155 1.1 oster * (see rf_dag.h or comments within InitNode() for details)
156 1.1 oster *
157 1.1 oster * Revision 1.27 1996/05/16 15:37:19 jimz
158 1.1 oster * convert to RF_FREELIST stuff for dag headers
159 1.1 oster *
160 1.1 oster * Revision 1.26 1996/05/08 21:01:24 jimz
161 1.1 oster * fixed up enum type names that were conflicting with other
162 1.1 oster * enums and function names (ie, "panic")
163 1.1 oster * future naming trends will be towards RF_ and rf_ for
164 1.1 oster * everything raidframe-related
165 1.1 oster *
166 1.1 oster * Revision 1.25 1996/05/03 19:56:15 wvcii
167 1.1 oster * added misc routines from old dag creation files
168 1.1 oster *
169 1.1 oster * Revision 1.24 1995/12/12 18:10:06 jimz
170 1.1 oster * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
171 1.1 oster * fix 80-column brain damage in comments
172 1.1 oster *
173 1.1 oster * Revision 1.23 1995/12/01 15:59:50 root
174 1.1 oster * added copyright info
175 1.1 oster *
176 1.1 oster * Revision 1.22 1995/11/17 15:14:12 wvcii
177 1.1 oster * PrintDAG now processes DiskReadMirrorFunc nodes
178 1.1 oster *
179 1.1 oster * Revision 1.21 1995/11/07 16:22:38 wvcii
180 1.1 oster * InitNode and InitNodeFromBuf now initialize commit fields
181 1.1 oster * beefed up ValidateDag
182 1.1 oster * prettied up PrintDAGList
183 1.1 oster *
184 1.1 oster */
185 1.1 oster
186 1.1 oster #include "rf_archs.h"
187 1.1 oster #include "rf_types.h"
188 1.1 oster #include "rf_threadstuff.h"
189 1.1 oster #include "rf_raid.h"
190 1.1 oster #include "rf_dag.h"
191 1.1 oster #include "rf_dagutils.h"
192 1.1 oster #include "rf_dagfuncs.h"
193 1.1 oster #include "rf_general.h"
194 1.1 oster #include "rf_freelist.h"
195 1.1 oster #include "rf_map.h"
196 1.1 oster #include "rf_shutdown.h"
197 1.1 oster #include "rf_sys.h"
198 1.1 oster
199 1.1 oster #define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_)))
200 1.1 oster
201 1.1 oster RF_RedFuncs_t rf_xorFuncs = {
202 1.1 oster rf_RegularXorFunc, "Reg Xr",
203 1.1 oster rf_SimpleXorFunc, "Simple Xr"};
204 1.1 oster
205 1.1 oster RF_RedFuncs_t rf_xorRecoveryFuncs = {
206 1.1 oster rf_RecoveryXorFunc, "Recovery Xr",
207 1.1 oster rf_RecoveryXorFunc, "Recovery Xr"};
208 1.1 oster
209 1.1 oster static void rf_RecurPrintDAG(RF_DagNode_t *, int, int);
210 1.1 oster static void rf_PrintDAG(RF_DagHeader_t *);
211 1.1 oster static int rf_ValidateBranch(RF_DagNode_t *, int *, int *,
212 1.1 oster RF_DagNode_t **, int );
213 1.1 oster static void rf_ValidateBranchVisitedBits(RF_DagNode_t *, int, int);
214 1.1 oster static void rf_ValidateVisitedBits(RF_DagHeader_t *);
215 1.1 oster
216 1.1 oster /******************************************************************************
217 1.1 oster *
218 1.1 oster * InitNode - initialize a dag node
219 1.1 oster *
220 1.1 oster * the size of the propList array is always the same as that of the
221 1.1 oster * successors array.
222 1.1 oster *
223 1.1 oster *****************************************************************************/
224 1.1 oster void rf_InitNode(
225 1.1 oster RF_DagNode_t *node,
226 1.1 oster RF_NodeStatus_t initstatus,
227 1.1 oster int commit,
228 1.1 oster int (*doFunc)(RF_DagNode_t *node),
229 1.1 oster int (*undoFunc)(RF_DagNode_t *node),
230 1.1 oster int (*wakeFunc)(RF_DagNode_t *node,int status),
231 1.1 oster int nSucc,
232 1.1 oster int nAnte,
233 1.1 oster int nParam,
234 1.1 oster int nResult,
235 1.1 oster RF_DagHeader_t *hdr,
236 1.1 oster char *name,
237 1.1 oster RF_AllocListElem_t *alist)
238 1.1 oster {
239 1.1 oster void **ptrs;
240 1.1 oster int nptrs;
241 1.1 oster
242 1.1 oster if (nAnte > RF_MAX_ANTECEDENTS)
243 1.1 oster RF_PANIC();
244 1.1 oster node->status = initstatus;
245 1.1 oster node->commitNode = commit;
246 1.1 oster node->doFunc = doFunc;
247 1.1 oster node->undoFunc = undoFunc;
248 1.1 oster node->wakeFunc = wakeFunc;
249 1.1 oster node->numParams = nParam;
250 1.1 oster node->numResults = nResult;
251 1.1 oster node->numAntecedents = nAnte;
252 1.1 oster node->numAntDone = 0;
253 1.1 oster node->next = NULL;
254 1.1 oster node->numSuccedents = nSucc;
255 1.1 oster node->name = name;
256 1.1 oster node->dagHdr = hdr;
257 1.1 oster node->visited = 0;
258 1.1 oster
259 1.1 oster /* allocate all the pointers with one call to malloc */
260 1.1 oster nptrs = nSucc+nAnte+nResult+nSucc;
261 1.1 oster
262 1.1 oster if (nptrs <= RF_DAG_PTRCACHESIZE) {
263 1.1 oster /*
264 1.1 oster * The dag_ptrs field of the node is basically some scribble
265 1.1 oster * space to be used here. We could get rid of it, and always
266 1.1 oster * allocate the range of pointers, but that's expensive. So,
267 1.1 oster * we pick a "common case" size for the pointer cache. Hopefully,
268 1.1 oster * we'll find that:
269 1.1 oster * (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by
270 1.1 oster * only a little bit (least efficient case)
271 1.1 oster * (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE
272 1.1 oster * (wasted memory)
273 1.1 oster */
274 1.1 oster ptrs = (void **)node->dag_ptrs;
275 1.1 oster }
276 1.1 oster else {
277 1.1 oster RF_CallocAndAdd(ptrs, nptrs, sizeof(void *), (void **), alist);
278 1.1 oster }
279 1.1 oster node->succedents = (nSucc) ? (RF_DagNode_t **) ptrs : NULL;
280 1.1 oster node->antecedents = (nAnte) ? (RF_DagNode_t **) (ptrs+nSucc) : NULL;
281 1.1 oster node->results = (nResult) ? (void **) (ptrs+nSucc+nAnte) : NULL;
282 1.1 oster node->propList = (nSucc) ? (RF_PropHeader_t **) (ptrs+nSucc+nAnte+nResult) : NULL;
283 1.1 oster
284 1.1 oster if (nParam) {
285 1.1 oster if (nParam <= RF_DAG_PARAMCACHESIZE) {
286 1.1 oster node->params = (RF_DagParam_t *)node->dag_params;
287 1.1 oster }
288 1.1 oster else {
289 1.1 oster RF_CallocAndAdd(node->params, nParam, sizeof(RF_DagParam_t), (RF_DagParam_t *), alist);
290 1.1 oster }
291 1.1 oster }
292 1.1 oster else {
293 1.1 oster node->params = NULL;
294 1.1 oster }
295 1.1 oster }
296 1.1 oster
297 1.1 oster
298 1.1 oster
299 1.1 oster /******************************************************************************
300 1.1 oster *
301 1.1 oster * allocation and deallocation routines
302 1.1 oster *
303 1.1 oster *****************************************************************************/
304 1.1 oster
305 1.1 oster void rf_FreeDAG(dag_h)
306 1.1 oster RF_DagHeader_t *dag_h;
307 1.1 oster {
308 1.1 oster RF_AccessStripeMapHeader_t *asmap, *t_asmap;
309 1.1 oster RF_DagHeader_t *nextDag;
310 1.1 oster int i;
311 1.1 oster
312 1.1 oster while (dag_h) {
313 1.1 oster nextDag = dag_h->next;
314 1.1 oster for (i=0; dag_h->memChunk[i] && i < RF_MAXCHUNKS; i++) {
315 1.1 oster /* release mem chunks */
316 1.1 oster rf_ReleaseMemChunk(dag_h->memChunk[i]);
317 1.1 oster dag_h->memChunk[i] = NULL;
318 1.1 oster }
319 1.1 oster
320 1.1 oster RF_ASSERT(i == dag_h->chunkIndex);
321 1.1 oster if (dag_h->xtraChunkCnt > 0) {
322 1.1 oster /* free xtraMemChunks */
323 1.1 oster for (i=0; dag_h->xtraMemChunk[i] && i < dag_h->xtraChunkIndex; i++) {
324 1.1 oster rf_ReleaseMemChunk(dag_h->xtraMemChunk[i]);
325 1.1 oster dag_h->xtraMemChunk[i] = NULL;
326 1.1 oster }
327 1.1 oster RF_ASSERT(i == dag_h->xtraChunkIndex);
328 1.1 oster /* free ptrs to xtraMemChunks */
329 1.1 oster RF_Free(dag_h->xtraMemChunk, dag_h->xtraChunkCnt * sizeof(RF_ChunkDesc_t *));
330 1.1 oster }
331 1.1 oster rf_FreeAllocList(dag_h->allocList);
332 1.1 oster for (asmap = dag_h->asmList; asmap;) {
333 1.1 oster t_asmap = asmap;
334 1.1 oster asmap = asmap->next;
335 1.1 oster rf_FreeAccessStripeMap(t_asmap);
336 1.1 oster }
337 1.1 oster rf_FreeDAGHeader(dag_h);
338 1.1 oster dag_h = nextDag;
339 1.1 oster }
340 1.1 oster }
341 1.1 oster
342 1.1 oster RF_PropHeader_t *rf_MakePropListEntry(
343 1.1 oster RF_DagHeader_t *dag_h,
344 1.1 oster int resultNum,
345 1.1 oster int paramNum,
346 1.1 oster RF_PropHeader_t *next,
347 1.1 oster RF_AllocListElem_t *allocList)
348 1.1 oster {
349 1.1 oster RF_PropHeader_t *p;
350 1.1 oster
351 1.1 oster RF_CallocAndAdd(p, 1, sizeof(RF_PropHeader_t),
352 1.1 oster (RF_PropHeader_t *), allocList);
353 1.1 oster p->resultNum = resultNum;
354 1.1 oster p->paramNum = paramNum;
355 1.1 oster p->next = next;
356 1.1 oster return(p);
357 1.1 oster }
358 1.1 oster
359 1.1 oster static RF_FreeList_t *rf_dagh_freelist;
360 1.1 oster
361 1.1 oster #define RF_MAX_FREE_DAGH 128
362 1.1 oster #define RF_DAGH_INC 16
363 1.1 oster #define RF_DAGH_INITIAL 32
364 1.1 oster
365 1.1 oster static void rf_ShutdownDAGs(void *);
366 1.1 oster static void rf_ShutdownDAGs(ignored)
367 1.1 oster void *ignored;
368 1.1 oster {
369 1.1 oster RF_FREELIST_DESTROY(rf_dagh_freelist,next,(RF_DagHeader_t *));
370 1.1 oster }
371 1.1 oster
372 1.1 oster int rf_ConfigureDAGs(listp)
373 1.1 oster RF_ShutdownList_t **listp;
374 1.1 oster {
375 1.1 oster int rc;
376 1.1 oster
377 1.1 oster RF_FREELIST_CREATE(rf_dagh_freelist, RF_MAX_FREE_DAGH,
378 1.1 oster RF_DAGH_INC, sizeof(RF_DagHeader_t));
379 1.1 oster if (rf_dagh_freelist == NULL)
380 1.1 oster return(ENOMEM);
381 1.1 oster rc = rf_ShutdownCreate(listp, rf_ShutdownDAGs, NULL);
382 1.1 oster if (rc) {
383 1.1 oster RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
384 1.1 oster __FILE__, __LINE__, rc);
385 1.1 oster rf_ShutdownDAGs(NULL);
386 1.1 oster return(rc);
387 1.1 oster }
388 1.1 oster RF_FREELIST_PRIME(rf_dagh_freelist, RF_DAGH_INITIAL,next,
389 1.1 oster (RF_DagHeader_t *));
390 1.1 oster return(0);
391 1.1 oster }
392 1.1 oster
393 1.1 oster RF_DagHeader_t *rf_AllocDAGHeader()
394 1.1 oster {
395 1.1 oster RF_DagHeader_t *dh;
396 1.1 oster
397 1.1 oster RF_FREELIST_GET(rf_dagh_freelist,dh,next,(RF_DagHeader_t *));
398 1.1 oster if (dh) {
399 1.1 oster bzero((char *)dh, sizeof(RF_DagHeader_t));
400 1.1 oster }
401 1.1 oster return(dh);
402 1.1 oster }
403 1.1 oster
404 1.1 oster void rf_FreeDAGHeader(RF_DagHeader_t *dh)
405 1.1 oster {
406 1.1 oster RF_FREELIST_FREE(rf_dagh_freelist,dh,next);
407 1.1 oster }
408 1.1 oster
409 1.1 oster /* allocates a buffer big enough to hold the data described by pda */
410 1.1 oster void *rf_AllocBuffer(
411 1.1 oster RF_Raid_t *raidPtr,
412 1.1 oster RF_DagHeader_t *dag_h,
413 1.1 oster RF_PhysDiskAddr_t *pda,
414 1.1 oster RF_AllocListElem_t *allocList)
415 1.1 oster {
416 1.1 oster char *p;
417 1.1 oster
418 1.1 oster RF_MallocAndAdd(p, pda->numSector << raidPtr->logBytesPerSector,
419 1.1 oster (char *), allocList);
420 1.1 oster return((void *)p);
421 1.1 oster }
422 1.1 oster
423 1.1 oster /******************************************************************************
424 1.1 oster *
425 1.1 oster * debug routines
426 1.1 oster *
427 1.1 oster *****************************************************************************/
428 1.1 oster
429 1.1 oster char *rf_NodeStatusString(RF_DagNode_t *node)
430 1.1 oster {
431 1.1 oster switch (node->status) {
432 1.1 oster case rf_wait: return("wait");
433 1.1 oster case rf_fired: return("fired");
434 1.1 oster case rf_good: return("good");
435 1.1 oster case rf_bad: return("bad");
436 1.1 oster default: return("?");
437 1.1 oster }
438 1.1 oster }
439 1.1 oster
440 1.1 oster void rf_PrintNodeInfoString(RF_DagNode_t *node)
441 1.1 oster {
442 1.1 oster RF_PhysDiskAddr_t *pda;
443 1.1 oster int (*df)(RF_DagNode_t *) = node->doFunc;
444 1.1 oster int i, lk, unlk;
445 1.1 oster void *bufPtr;
446 1.1 oster
447 1.1 oster if ((df==rf_DiskReadFunc) || (df==rf_DiskWriteFunc)
448 1.1 oster || (df==rf_DiskReadMirrorIdleFunc)
449 1.1 oster || (df == rf_DiskReadMirrorPartitionFunc))
450 1.1 oster {
451 1.1 oster pda = (RF_PhysDiskAddr_t *)node->params[0].p;
452 1.1 oster bufPtr = (void *)node->params[1].p;
453 1.1 oster lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
454 1.1 oster unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
455 1.1 oster RF_ASSERT( !(lk && unlk) );
456 1.1 oster printf("r %d c %d offs %ld nsect %d buf 0x%lx %s\n", pda->row, pda->col,
457 1.1 oster (long)pda->startSector, (int) pda->numSector, (long)bufPtr,
458 1.1 oster (lk) ? "LOCK" : ((unlk) ? "UNLK" : " "));
459 1.1 oster return;
460 1.1 oster }
461 1.1 oster
462 1.1 oster if (df == rf_DiskUnlockFunc) {
463 1.1 oster pda = (RF_PhysDiskAddr_t *)node->params[0].p;
464 1.1 oster lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
465 1.1 oster unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
466 1.1 oster RF_ASSERT( !(lk && unlk) );
467 1.1 oster printf("r %d c %d %s\n", pda->row, pda->col,
468 1.1 oster (lk) ? "LOCK" : ((unlk) ? "UNLK" : "nop"));
469 1.1 oster return;
470 1.1 oster }
471 1.1 oster
472 1.1 oster if ((df==rf_SimpleXorFunc) || (df==rf_RegularXorFunc)
473 1.1 oster || (df==rf_RecoveryXorFunc))
474 1.1 oster {
475 1.1 oster printf("result buf 0x%lx\n",(long) node->results[0]);
476 1.1 oster for (i=0; i<node->numParams-1; i+=2) {
477 1.1 oster pda = (RF_PhysDiskAddr_t *)node->params[i].p;
478 1.1 oster bufPtr = (RF_PhysDiskAddr_t *)node->params[i+1].p;
479 1.1 oster printf(" buf 0x%lx r%d c%d offs %ld nsect %d\n",
480 1.1 oster (long)bufPtr, pda->row, pda->col,
481 1.1 oster (long)pda->startSector, (int)pda->numSector);
482 1.1 oster }
483 1.1 oster return;
484 1.1 oster }
485 1.1 oster
486 1.1 oster #if RF_INCLUDE_PARITYLOGGING > 0
487 1.1 oster if (df==rf_ParityLogOverwriteFunc || df==rf_ParityLogUpdateFunc) {
488 1.1 oster for (i=0; i<node->numParams-1; i+=2) {
489 1.1 oster pda = (RF_PhysDiskAddr_t *)node->params[i].p;
490 1.1 oster bufPtr = (RF_PhysDiskAddr_t *)node->params[i+1].p;
491 1.1 oster printf(" r%d c%d offs %ld nsect %d buf 0x%lx\n",
492 1.1 oster pda->row, pda->col, (long) pda->startSector,
493 1.1 oster (int) pda->numSector, (long) bufPtr);
494 1.1 oster }
495 1.1 oster return;
496 1.1 oster }
497 1.1 oster #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
498 1.1 oster
499 1.1 oster if ((df==rf_TerminateFunc) || (df==rf_NullNodeFunc)) {
500 1.1 oster printf("\n");
501 1.1 oster return;
502 1.1 oster }
503 1.1 oster
504 1.1 oster printf("?\n");
505 1.1 oster }
506 1.1 oster
507 1.1 oster static void rf_RecurPrintDAG(node, depth, unvisited)
508 1.1 oster RF_DagNode_t *node;
509 1.1 oster int depth;
510 1.1 oster int unvisited;
511 1.1 oster {
512 1.1 oster char *anttype;
513 1.1 oster int i;
514 1.1 oster
515 1.1 oster node->visited = (unvisited) ? 0 : 1;
516 1.1 oster printf("(%d) %d C%d %s: %s,s%d %d/%d,a%d/%d,p%d,r%d S{", depth,
517 1.1 oster node->nodeNum, node->commitNode, node->name, rf_NodeStatusString(node),
518 1.1 oster node->numSuccedents, node->numSuccFired, node->numSuccDone,
519 1.1 oster node->numAntecedents, node->numAntDone, node->numParams,node->numResults);
520 1.1 oster for (i=0; i<node->numSuccedents; i++) {
521 1.1 oster printf("%d%s", node->succedents[i]->nodeNum,
522 1.1 oster ((i==node->numSuccedents-1) ? "\0" : " "));
523 1.1 oster }
524 1.1 oster printf("} A{");
525 1.1 oster for (i=0; i<node->numAntecedents; i++) {
526 1.1 oster switch (node->antType[i]) {
527 1.1 oster case rf_trueData :
528 1.1 oster anttype = "T";
529 1.1 oster break;
530 1.1 oster case rf_antiData :
531 1.1 oster anttype = "A";
532 1.1 oster break;
533 1.1 oster case rf_outputData :
534 1.1 oster anttype = "O";
535 1.1 oster break;
536 1.1 oster case rf_control :
537 1.1 oster anttype = "C";
538 1.1 oster break;
539 1.1 oster default :
540 1.1 oster anttype = "?";
541 1.1 oster break;
542 1.1 oster }
543 1.1 oster printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype, (i==node->numAntecedents-1) ? "\0" : " ");
544 1.1 oster }
545 1.1 oster printf("}; ");
546 1.1 oster rf_PrintNodeInfoString(node);
547 1.1 oster for (i=0; i<node->numSuccedents; i++) {
548 1.1 oster if (node->succedents[i]->visited == unvisited)
549 1.1 oster rf_RecurPrintDAG(node->succedents[i], depth+1, unvisited);
550 1.1 oster }
551 1.1 oster }
552 1.1 oster
553 1.1 oster static void rf_PrintDAG(dag_h)
554 1.1 oster RF_DagHeader_t *dag_h;
555 1.1 oster {
556 1.1 oster int unvisited, i;
557 1.1 oster char *status;
558 1.1 oster
559 1.1 oster /* set dag status */
560 1.1 oster switch (dag_h->status) {
561 1.1 oster case rf_enable :
562 1.1 oster status = "enable";
563 1.1 oster break;
564 1.1 oster case rf_rollForward :
565 1.1 oster status = "rollForward";
566 1.1 oster break;
567 1.1 oster case rf_rollBackward :
568 1.1 oster status = "rollBackward";
569 1.1 oster break;
570 1.1 oster default :
571 1.1 oster status = "illegal!";
572 1.1 oster break;
573 1.1 oster }
574 1.1 oster /* find out if visited bits are currently set or clear */
575 1.1 oster unvisited = dag_h->succedents[0]->visited;
576 1.1 oster
577 1.1 oster printf("DAG type: %s\n", dag_h->creator);
578 1.1 oster printf("format is (depth) num commit type: status,nSucc nSuccFired/nSuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)}; info\n");
579 1.1 oster printf("(0) %d Hdr: %s, s%d, (commit %d/%d) S{", dag_h->nodeNum,
580 1.1 oster status, dag_h->numSuccedents, dag_h->numCommitNodes, dag_h->numCommits);
581 1.1 oster for (i=0; i<dag_h->numSuccedents; i++) {
582 1.1 oster printf("%d%s", dag_h->succedents[i]->nodeNum,
583 1.1 oster ((i==dag_h->numSuccedents-1) ? "\0" : " "));
584 1.1 oster }
585 1.1 oster printf("};\n");
586 1.1 oster for (i=0; i<dag_h->numSuccedents; i++) {
587 1.1 oster if (dag_h->succedents[i]->visited == unvisited)
588 1.1 oster rf_RecurPrintDAG(dag_h->succedents[i], 1, unvisited);
589 1.1 oster }
590 1.1 oster }
591 1.1 oster
592 1.1 oster /* assigns node numbers */
593 1.1 oster int rf_AssignNodeNums(RF_DagHeader_t *dag_h)
594 1.1 oster {
595 1.1 oster int unvisited, i, nnum;
596 1.1 oster RF_DagNode_t *node;
597 1.1 oster
598 1.1 oster nnum = 0;
599 1.1 oster unvisited = dag_h->succedents[0]->visited;
600 1.1 oster
601 1.1 oster dag_h->nodeNum = nnum++;
602 1.1 oster for (i=0; i<dag_h->numSuccedents; i++) {
603 1.1 oster node = dag_h->succedents[i];
604 1.1 oster if (node->visited == unvisited) {
605 1.1 oster nnum = rf_RecurAssignNodeNums(dag_h->succedents[i], nnum, unvisited);
606 1.1 oster }
607 1.1 oster }
608 1.1 oster return(nnum);
609 1.1 oster }
610 1.1 oster
611 1.1 oster int rf_RecurAssignNodeNums(node, num, unvisited)
612 1.1 oster RF_DagNode_t *node;
613 1.1 oster int num;
614 1.1 oster int unvisited;
615 1.1 oster {
616 1.1 oster int i;
617 1.1 oster
618 1.1 oster node->visited = (unvisited) ? 0 : 1;
619 1.1 oster
620 1.1 oster node->nodeNum = num++;
621 1.1 oster for (i=0; i<node->numSuccedents; i++) {
622 1.1 oster if (node->succedents[i]->visited == unvisited) {
623 1.1 oster num = rf_RecurAssignNodeNums(node->succedents[i], num, unvisited);
624 1.1 oster }
625 1.1 oster }
626 1.1 oster return(num);
627 1.1 oster }
628 1.1 oster
629 1.1 oster /* set the header pointers in each node to "newptr" */
630 1.1 oster void rf_ResetDAGHeaderPointers(dag_h, newptr)
631 1.1 oster RF_DagHeader_t *dag_h;
632 1.1 oster RF_DagHeader_t *newptr;
633 1.1 oster {
634 1.1 oster int i;
635 1.1 oster for (i=0; i<dag_h->numSuccedents; i++)
636 1.1 oster if (dag_h->succedents[i]->dagHdr != newptr)
637 1.1 oster rf_RecurResetDAGHeaderPointers(dag_h->succedents[i], newptr);
638 1.1 oster }
639 1.1 oster
640 1.1 oster void rf_RecurResetDAGHeaderPointers(node, newptr)
641 1.1 oster RF_DagNode_t *node;
642 1.1 oster RF_DagHeader_t *newptr;
643 1.1 oster {
644 1.1 oster int i;
645 1.1 oster node->dagHdr = newptr;
646 1.1 oster for (i=0; i<node->numSuccedents; i++)
647 1.1 oster if (node->succedents[i]->dagHdr != newptr)
648 1.1 oster rf_RecurResetDAGHeaderPointers(node->succedents[i], newptr);
649 1.1 oster }
650 1.1 oster
651 1.1 oster
652 1.1 oster void rf_PrintDAGList(RF_DagHeader_t *dag_h)
653 1.1 oster {
654 1.1 oster int i=0;
655 1.1 oster
656 1.1 oster for (; dag_h; dag_h=dag_h->next) {
657 1.1 oster rf_AssignNodeNums(dag_h);
658 1.1 oster printf("\n\nDAG %d IN LIST:\n",i++);
659 1.1 oster rf_PrintDAG(dag_h);
660 1.1 oster }
661 1.1 oster }
662 1.1 oster
663 1.1 oster static int rf_ValidateBranch(node, scount, acount, nodes, unvisited)
664 1.1 oster RF_DagNode_t *node;
665 1.1 oster int *scount;
666 1.1 oster int *acount;
667 1.1 oster RF_DagNode_t **nodes;
668 1.1 oster int unvisited;
669 1.1 oster {
670 1.1 oster int i, retcode = 0;
671 1.1 oster
672 1.1 oster /* construct an array of node pointers indexed by node num */
673 1.1 oster node->visited = (unvisited) ? 0 : 1;
674 1.1 oster nodes[ node->nodeNum ] = node;
675 1.1 oster
676 1.1 oster if (node->next != NULL) {
677 1.1 oster printf("INVALID DAG: next pointer in node is not NULL\n");
678 1.1 oster retcode = 1;
679 1.1 oster }
680 1.1 oster if (node->status != rf_wait) {
681 1.1 oster printf("INVALID DAG: Node status is not wait\n");
682 1.1 oster retcode = 1;
683 1.1 oster }
684 1.1 oster if (node->numAntDone != 0) {
685 1.1 oster printf("INVALID DAG: numAntDone is not zero\n");
686 1.1 oster retcode = 1;
687 1.1 oster }
688 1.1 oster if (node->doFunc == rf_TerminateFunc) {
689 1.1 oster if (node->numSuccedents != 0) {
690 1.1 oster printf("INVALID DAG: Terminator node has succedents\n");
691 1.1 oster retcode = 1;
692 1.1 oster }
693 1.1 oster } else {
694 1.1 oster if (node->numSuccedents == 0) {
695 1.1 oster printf("INVALID DAG: Non-terminator node has no succedents\n");
696 1.1 oster retcode = 1;
697 1.1 oster }
698 1.1 oster }
699 1.1 oster for (i=0; i<node->numSuccedents; i++) {
700 1.1 oster if (!node->succedents[i]) {
701 1.1 oster printf("INVALID DAG: succedent %d of node %s is NULL\n",i,node->name);
702 1.1 oster retcode = 1;
703 1.1 oster }
704 1.1 oster scount[ node->succedents[i]->nodeNum ]++;
705 1.1 oster }
706 1.1 oster for (i=0; i<node->numAntecedents; i++) {
707 1.1 oster if (!node->antecedents[i]) {
708 1.1 oster printf("INVALID DAG: antecedent %d of node %s is NULL\n",i,node->name);
709 1.1 oster retcode = 1;
710 1.1 oster }
711 1.1 oster acount[ node->antecedents[i]->nodeNum ]++;
712 1.1 oster }
713 1.1 oster for (i=0; i<node->numSuccedents; i++) {
714 1.1 oster if (node->succedents[i]->visited == unvisited) {
715 1.1 oster if (rf_ValidateBranch(node->succedents[i], scount,
716 1.1 oster acount, nodes, unvisited))
717 1.1 oster {
718 1.1 oster retcode = 1;
719 1.1 oster }
720 1.1 oster }
721 1.1 oster }
722 1.1 oster return(retcode);
723 1.1 oster }
724 1.1 oster
725 1.1 oster static void rf_ValidateBranchVisitedBits(node, unvisited, rl)
726 1.1 oster RF_DagNode_t *node;
727 1.1 oster int unvisited;
728 1.1 oster int rl;
729 1.1 oster {
730 1.1 oster int i;
731 1.1 oster
732 1.1 oster RF_ASSERT(node->visited == unvisited);
733 1.1 oster for (i=0; i<node->numSuccedents; i++) {
734 1.1 oster if (node->succedents[i] == NULL) {
735 1.1 oster printf("node=%lx node->succedents[%d] is NULL\n", (long)node, i);
736 1.1 oster RF_ASSERT(0);
737 1.1 oster }
738 1.1 oster rf_ValidateBranchVisitedBits(node->succedents[i],unvisited, rl+1);
739 1.1 oster }
740 1.1 oster }
741 1.1 oster
742 1.1 oster /* NOTE: never call this on a big dag, because it is exponential
743 1.1 oster * in execution time
744 1.1 oster */
745 1.1 oster static void rf_ValidateVisitedBits(dag)
746 1.1 oster RF_DagHeader_t *dag;
747 1.1 oster {
748 1.1 oster int i, unvisited;
749 1.1 oster
750 1.1 oster unvisited = dag->succedents[0]->visited;
751 1.1 oster
752 1.1 oster for (i=0; i<dag->numSuccedents; i++) {
753 1.1 oster if (dag->succedents[i] == NULL) {
754 1.1 oster printf("dag=%lx dag->succedents[%d] is NULL\n", (long) dag, i);
755 1.1 oster RF_ASSERT(0);
756 1.1 oster }
757 1.1 oster rf_ValidateBranchVisitedBits(dag->succedents[i],unvisited,0);
758 1.1 oster }
759 1.1 oster }
760 1.1 oster
761 1.1 oster /* validate a DAG. _at entry_ verify that:
762 1.1 oster * -- numNodesCompleted is zero
763 1.1 oster * -- node queue is null
764 1.1 oster * -- dag status is rf_enable
765 1.1 oster * -- next pointer is null on every node
766 1.1 oster * -- all nodes have status wait
767 1.1 oster * -- numAntDone is zero in all nodes
768 1.1 oster * -- terminator node has zero successors
769 1.1 oster * -- no other node besides terminator has zero successors
770 1.1 oster * -- no successor or antecedent pointer in a node is NULL
771 1.1 oster * -- number of times that each node appears as a successor of another node
772 1.1 oster * is equal to the antecedent count on that node
773 1.1 oster * -- number of times that each node appears as an antecedent of another node
774 1.1 oster * is equal to the succedent count on that node
775 1.1 oster * -- what else?
776 1.1 oster */
777 1.1 oster int rf_ValidateDAG(dag_h)
778 1.1 oster RF_DagHeader_t *dag_h;
779 1.1 oster {
780 1.1 oster int i, nodecount;
781 1.1 oster int *scount, *acount; /* per-node successor and antecedent counts */
782 1.1 oster RF_DagNode_t **nodes; /* array of ptrs to nodes in dag */
783 1.1 oster int retcode = 0;
784 1.1 oster int unvisited;
785 1.1 oster int commitNodeCount = 0;
786 1.1 oster
787 1.1 oster if (rf_validateVisitedDebug)
788 1.1 oster rf_ValidateVisitedBits(dag_h);
789 1.1 oster
790 1.1 oster if (dag_h->numNodesCompleted != 0) {
791 1.1 oster printf("INVALID DAG: num nodes completed is %d, should be 0\n",dag_h->numNodesCompleted);
792 1.1 oster retcode = 1; goto validate_dag_bad;
793 1.1 oster }
794 1.1 oster if (dag_h->status != rf_enable) {
795 1.1 oster printf("INVALID DAG: not enabled\n");
796 1.1 oster retcode = 1; goto validate_dag_bad;
797 1.1 oster }
798 1.1 oster if (dag_h->numCommits != 0) {
799 1.1 oster printf("INVALID DAG: numCommits != 0 (%d)\n",dag_h->numCommits);
800 1.1 oster retcode = 1; goto validate_dag_bad;
801 1.1 oster }
802 1.1 oster if (dag_h->numSuccedents != 1) {
803 1.1 oster /* currently, all dags must have only one succedent */
804 1.1 oster printf("INVALID DAG: numSuccedents !1 (%d)\n",dag_h->numSuccedents);
805 1.1 oster retcode = 1; goto validate_dag_bad;
806 1.1 oster }
807 1.1 oster nodecount = rf_AssignNodeNums(dag_h);
808 1.1 oster
809 1.1 oster unvisited = dag_h->succedents[0]->visited;
810 1.1 oster
811 1.1 oster RF_Calloc(scount, nodecount, sizeof(int), (int *));
812 1.1 oster RF_Calloc(acount, nodecount, sizeof(int), (int *));
813 1.1 oster RF_Calloc(nodes, nodecount, sizeof(RF_DagNode_t *), (RF_DagNode_t **));
814 1.1 oster for (i=0; i<dag_h->numSuccedents; i++) {
815 1.1 oster if ((dag_h->succedents[i]->visited == unvisited)
816 1.1 oster && rf_ValidateBranch(dag_h->succedents[i], scount,
817 1.1 oster acount, nodes, unvisited))
818 1.1 oster {
819 1.1 oster retcode = 1;
820 1.1 oster }
821 1.1 oster }
822 1.1 oster /* start at 1 to skip the header node */
823 1.1 oster for (i=1; i<nodecount; i++) {
824 1.1 oster if ( nodes[i]->commitNode )
825 1.1 oster commitNodeCount++;
826 1.1 oster if ( nodes[i]->doFunc == NULL ) {
827 1.1 oster printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name);
828 1.1 oster retcode = 1;
829 1.1 oster goto validate_dag_out;
830 1.1 oster }
831 1.1 oster if ( nodes[i]->undoFunc == NULL ) {
832 1.1 oster printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name);
833 1.1 oster retcode = 1;
834 1.1 oster goto validate_dag_out;
835 1.1 oster }
836 1.1 oster if ( nodes[i]->numAntecedents != scount[ nodes[i]->nodeNum ] ) {
837 1.1 oster printf("INVALID DAG: node %s has %d antecedents but appears as a succedent %d times\n",
838 1.1 oster nodes[i]->name, nodes[i]->numAntecedents, scount[nodes[i]->nodeNum]);
839 1.1 oster retcode = 1;
840 1.1 oster goto validate_dag_out;
841 1.1 oster }
842 1.1 oster if ( nodes[i]->numSuccedents != acount[ nodes[i]->nodeNum ] ) {
843 1.1 oster printf("INVALID DAG: node %s has %d succedents but appears as an antecedent %d times\n",
844 1.1 oster nodes[i]->name, nodes[i]->numSuccedents, acount[nodes[i]->nodeNum]);
845 1.1 oster retcode = 1;
846 1.1 oster goto validate_dag_out;
847 1.1 oster }
848 1.1 oster }
849 1.1 oster
850 1.1 oster if ( dag_h->numCommitNodes != commitNodeCount ) {
851 1.1 oster printf("INVALID DAG: incorrect commit node count. hdr->numCommitNodes (%d) found (%d) commit nodes in graph\n",
852 1.1 oster dag_h->numCommitNodes, commitNodeCount);
853 1.1 oster retcode = 1;
854 1.1 oster goto validate_dag_out;
855 1.1 oster }
856 1.1 oster
857 1.1 oster validate_dag_out:
858 1.1 oster RF_Free(scount, nodecount*sizeof(int));
859 1.1 oster RF_Free(acount, nodecount*sizeof(int));
860 1.1 oster RF_Free(nodes, nodecount*sizeof(RF_DagNode_t *));
861 1.1 oster if (retcode)
862 1.1 oster rf_PrintDAGList(dag_h);
863 1.1 oster
864 1.1 oster if (rf_validateVisitedDebug)
865 1.1 oster rf_ValidateVisitedBits(dag_h);
866 1.1 oster
867 1.1 oster return(retcode);
868 1.1 oster
869 1.1 oster validate_dag_bad:
870 1.1 oster rf_PrintDAGList(dag_h);
871 1.1 oster return(retcode);
872 1.1 oster }
873 1.1 oster
874 1.1 oster
875 1.1 oster /******************************************************************************
876 1.1 oster *
877 1.1 oster * misc construction routines
878 1.1 oster *
879 1.1 oster *****************************************************************************/
880 1.1 oster
881 1.1 oster void rf_redirect_asm(
882 1.1 oster RF_Raid_t *raidPtr,
883 1.1 oster RF_AccessStripeMap_t *asmap)
884 1.1 oster {
885 1.1 oster int ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0;
886 1.1 oster int row = asmap->physInfo->row;
887 1.1 oster int fcol = raidPtr->reconControl[row]->fcol;
888 1.1 oster int srow = raidPtr->reconControl[row]->spareRow;
889 1.1 oster int scol = raidPtr->reconControl[row]->spareCol;
890 1.1 oster RF_PhysDiskAddr_t *pda;
891 1.1 oster
892 1.1 oster RF_ASSERT( raidPtr->status[row] == rf_rs_reconstructing );
893 1.1 oster for (pda = asmap->physInfo; pda; pda=pda->next) {
894 1.1 oster if (pda->col == fcol) {
895 1.1 oster if (rf_dagDebug) {
896 1.1 oster if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap,
897 1.1 oster pda->startSector))
898 1.1 oster {
899 1.1 oster RF_PANIC();
900 1.1 oster }
901 1.1 oster }
902 1.1 oster /*printf("Remapped data for large write\n");*/
903 1.1 oster if (ds) {
904 1.1 oster raidPtr->Layout.map->MapSector(raidPtr, pda->raidAddress,
905 1.1 oster &pda->row, &pda->col, &pda->startSector, RF_REMAP);
906 1.1 oster }
907 1.1 oster else {
908 1.1 oster pda->row = srow; pda->col = scol;
909 1.1 oster }
910 1.1 oster }
911 1.1 oster }
912 1.1 oster for (pda = asmap->parityInfo; pda; pda=pda->next) {
913 1.1 oster if (pda->col == fcol) {
914 1.1 oster if (rf_dagDebug) {
915 1.1 oster if (!rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap, pda->startSector)) {
916 1.1 oster RF_PANIC();
917 1.1 oster }
918 1.1 oster }
919 1.1 oster }
920 1.1 oster if (ds) {
921 1.1 oster (raidPtr->Layout.map->MapParity)(raidPtr, pda->raidAddress, &pda->row, &pda->col, &pda->startSector, RF_REMAP);
922 1.1 oster }
923 1.1 oster else {
924 1.1 oster pda->row = srow; pda->col = scol;
925 1.1 oster }
926 1.1 oster }
927 1.1 oster }
928 1.1 oster
929 1.1 oster
930 1.1 oster /* this routine allocates read buffers and generates stripe maps for the
931 1.1 oster * regions of the array from the start of the stripe to the start of the
932 1.1 oster * access, and from the end of the access to the end of the stripe. It also
933 1.1 oster * computes and returns the number of DAG nodes needed to read all this data.
934 1.1 oster * Note that this routine does the wrong thing if the access is fully
935 1.1 oster * contained within one stripe unit, so we RF_ASSERT against this case at the
936 1.1 oster * start.
937 1.1 oster */
938 1.1 oster void rf_MapUnaccessedPortionOfStripe(
939 1.1 oster RF_Raid_t *raidPtr,
940 1.1 oster RF_RaidLayout_t *layoutPtr, /* in: layout information */
941 1.1 oster RF_AccessStripeMap_t *asmap, /* in: access stripe map */
942 1.1 oster RF_DagHeader_t *dag_h, /* in: header of the dag to create */
943 1.1 oster RF_AccessStripeMapHeader_t **new_asm_h, /* in: ptr to array of 2 headers, to be filled in */
944 1.1 oster int *nRodNodes, /* out: num nodes to be generated to read unaccessed data */
945 1.1 oster char **sosBuffer, /* out: pointers to newly allocated buffer */
946 1.1 oster char **eosBuffer,
947 1.1 oster RF_AllocListElem_t *allocList)
948 1.1 oster {
949 1.1 oster RF_RaidAddr_t sosRaidAddress, eosRaidAddress;
950 1.1 oster RF_SectorNum_t sosNumSector, eosNumSector;
951 1.1 oster
952 1.1 oster RF_ASSERT( asmap->numStripeUnitsAccessed > (layoutPtr->numDataCol/2) );
953 1.1 oster /* generate an access map for the region of the array from start of stripe
954 1.1 oster * to start of access */
955 1.1 oster new_asm_h[0] = new_asm_h[1] = NULL; *nRodNodes = 0;
956 1.1 oster if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->raidAddress)) {
957 1.1 oster sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
958 1.1 oster sosNumSector = asmap->raidAddress - sosRaidAddress;
959 1.1 oster RF_MallocAndAdd(*sosBuffer, rf_RaidAddressToByte(raidPtr, sosNumSector), (char *), allocList);
960 1.1 oster new_asm_h[0] = rf_MapAccess(raidPtr, sosRaidAddress, sosNumSector, *sosBuffer, RF_DONT_REMAP);
961 1.1 oster new_asm_h[0]->next = dag_h->asmList;
962 1.1 oster dag_h->asmList = new_asm_h[0];
963 1.1 oster *nRodNodes += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
964 1.1 oster
965 1.1 oster RF_ASSERT(new_asm_h[0]->stripeMap->next == NULL);
966 1.1 oster /* we're totally within one stripe here */
967 1.1 oster if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
968 1.1 oster rf_redirect_asm(raidPtr, new_asm_h[0]->stripeMap);
969 1.1 oster }
970 1.1 oster /* generate an access map for the region of the array from end of access
971 1.1 oster * to end of stripe */
972 1.1 oster if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->endRaidAddress)) {
973 1.1 oster eosRaidAddress = asmap->endRaidAddress;
974 1.1 oster eosNumSector = rf_RaidAddressOfNextStripeBoundary(layoutPtr, eosRaidAddress) - eosRaidAddress;
975 1.1 oster RF_MallocAndAdd(*eosBuffer, rf_RaidAddressToByte(raidPtr, eosNumSector), (char *), allocList);
976 1.1 oster new_asm_h[1] = rf_MapAccess(raidPtr, eosRaidAddress, eosNumSector, *eosBuffer, RF_DONT_REMAP);
977 1.1 oster new_asm_h[1]->next = dag_h->asmList;
978 1.1 oster dag_h->asmList = new_asm_h[1];
979 1.1 oster *nRodNodes += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
980 1.1 oster
981 1.1 oster RF_ASSERT(new_asm_h[1]->stripeMap->next == NULL);
982 1.1 oster /* we're totally within one stripe here */
983 1.1 oster if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
984 1.1 oster rf_redirect_asm(raidPtr, new_asm_h[1]->stripeMap);
985 1.1 oster }
986 1.1 oster }
987 1.1 oster
988 1.1 oster
989 1.1 oster
990 1.1 oster /* returns non-zero if the indicated ranges of stripe unit offsets overlap */
991 1.1 oster int rf_PDAOverlap(
992 1.1 oster RF_RaidLayout_t *layoutPtr,
993 1.1 oster RF_PhysDiskAddr_t *src,
994 1.1 oster RF_PhysDiskAddr_t *dest)
995 1.1 oster {
996 1.1 oster RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector);
997 1.1 oster RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector);
998 1.1 oster /* use -1 to be sure we stay within SU */
999 1.1 oster RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector-1);
1000 1.1 oster RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector-1);
1001 1.1 oster return( (RF_MAX(soffs,doffs) <= RF_MIN(send,dend)) ? 1 : 0 );
1002 1.1 oster }
1003 1.1 oster
1004 1.1 oster
1005 1.1 oster /* GenerateFailedAccessASMs
1006 1.1 oster *
1007 1.1 oster * this routine figures out what portion of the stripe needs to be read
1008 1.1 oster * to effect the degraded read or write operation. It's primary function
1009 1.1 oster * is to identify everything required to recover the data, and then
1010 1.1 oster * eliminate anything that is already being accessed by the user.
1011 1.1 oster *
1012 1.1 oster * The main result is two new ASMs, one for the region from the start of the
1013 1.1 oster * stripe to the start of the access, and one for the region from the end of
1014 1.1 oster * the access to the end of the stripe. These ASMs describe everything that
1015 1.1 oster * needs to be read to effect the degraded access. Other results are:
1016 1.1 oster * nXorBufs -- the total number of buffers that need to be XORed together to
1017 1.1 oster * recover the lost data,
1018 1.1 oster * rpBufPtr -- ptr to a newly-allocated buffer to hold the parity. If NULL
1019 1.1 oster * at entry, not allocated.
1020 1.1 oster * overlappingPDAs --
1021 1.1 oster * describes which of the non-failed PDAs in the user access
1022 1.1 oster * overlap data that needs to be read to effect recovery.
1023 1.1 oster * overlappingPDAs[i]==1 if and only if, neglecting the failed
1024 1.1 oster * PDA, the ith pda in the input asm overlaps data that needs
1025 1.1 oster * to be read for recovery.
1026 1.1 oster */
1027 1.1 oster /* in: asm - ASM for the actual access, one stripe only */
1028 1.1 oster /* in: faildPDA - which component of the access has failed */
1029 1.1 oster /* in: dag_h - header of the DAG we're going to create */
1030 1.1 oster /* out: new_asm_h - the two new ASMs */
1031 1.1 oster /* out: nXorBufs - the total number of xor bufs required */
1032 1.1 oster /* out: rpBufPtr - a buffer for the parity read */
1033 1.1 oster void rf_GenerateFailedAccessASMs(
1034 1.1 oster RF_Raid_t *raidPtr,
1035 1.1 oster RF_AccessStripeMap_t *asmap,
1036 1.1 oster RF_PhysDiskAddr_t *failedPDA,
1037 1.1 oster RF_DagHeader_t *dag_h,
1038 1.1 oster RF_AccessStripeMapHeader_t **new_asm_h,
1039 1.1 oster int *nXorBufs,
1040 1.1 oster char **rpBufPtr,
1041 1.1 oster char *overlappingPDAs,
1042 1.1 oster RF_AllocListElem_t *allocList)
1043 1.1 oster {
1044 1.1 oster RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
1045 1.1 oster
1046 1.1 oster /* s=start, e=end, s=stripe, a=access, f=failed, su=stripe unit */
1047 1.1 oster RF_RaidAddr_t sosAddr, sosEndAddr, eosStartAddr, eosAddr;
1048 1.1 oster
1049 1.1 oster RF_SectorCount_t numSect[2], numParitySect;
1050 1.1 oster RF_PhysDiskAddr_t *pda;
1051 1.1 oster char *rdBuf, *bufP;
1052 1.1 oster int foundit, i;
1053 1.1 oster
1054 1.1 oster bufP = NULL;
1055 1.1 oster foundit = 0;
1056 1.1 oster /* first compute the following raid addresses:
1057 1.1 oster start of stripe, (sosAddr)
1058 1.1 oster MIN(start of access, start of failed SU), (sosEndAddr)
1059 1.1 oster MAX(end of access, end of failed SU), (eosStartAddr)
1060 1.1 oster end of stripe (i.e. start of next stripe) (eosAddr)
1061 1.1 oster */
1062 1.1 oster sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
1063 1.1 oster sosEndAddr = RF_MIN(asmap->raidAddress, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,failedPDA->raidAddress));
1064 1.1 oster eosStartAddr = RF_MAX(asmap->endRaidAddress, rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, failedPDA->raidAddress));
1065 1.1 oster eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr, asmap->raidAddress);
1066 1.1 oster
1067 1.1 oster /* now generate access stripe maps for each of the above regions of the
1068 1.1 oster * stripe. Use a dummy (NULL) buf ptr for now */
1069 1.1 oster
1070 1.1 oster new_asm_h[0] = (sosAddr != sosEndAddr) ? rf_MapAccess(raidPtr, sosAddr, sosEndAddr-sosAddr, NULL, RF_DONT_REMAP) : NULL;
1071 1.1 oster new_asm_h[1] = (eosStartAddr != eosAddr) ? rf_MapAccess(raidPtr, eosStartAddr, eosAddr-eosStartAddr, NULL, RF_DONT_REMAP) : NULL;
1072 1.1 oster
1073 1.1 oster /* walk through the PDAs and range-restrict each SU to the region of the
1074 1.1 oster * SU touched on the failed PDA. also compute total data buffer space
1075 1.1 oster * requirements in this step. Ignore the parity for now. */
1076 1.1 oster
1077 1.1 oster numSect[0] = numSect[1] = 0;
1078 1.1 oster if (new_asm_h[0]) {
1079 1.1 oster new_asm_h[0]->next = dag_h->asmList; dag_h->asmList = new_asm_h[0];
1080 1.1 oster for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) {
1081 1.1 oster rf_RangeRestrictPDA(raidPtr,failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); numSect[0] += pda->numSector;
1082 1.1 oster }
1083 1.1 oster }
1084 1.1 oster if (new_asm_h[1]) {
1085 1.1 oster new_asm_h[1]->next = dag_h->asmList; dag_h->asmList = new_asm_h[1];
1086 1.1 oster for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) {
1087 1.1 oster rf_RangeRestrictPDA(raidPtr,failedPDA, pda, RF_RESTRICT_NOBUFFER, 0); numSect[1] += pda->numSector;
1088 1.1 oster }
1089 1.1 oster }
1090 1.1 oster numParitySect = failedPDA->numSector;
1091 1.1 oster
1092 1.1 oster /* allocate buffer space for the data & parity we have to read to recover
1093 1.1 oster * from the failure */
1094 1.1 oster
1095 1.1 oster if (numSect[0]+numSect[1]+ ((rpBufPtr) ? numParitySect : 0)) { /* don't allocate parity buf if not needed */
1096 1.1 oster RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr,numSect[0]+numSect[1]+numParitySect), (char *), allocList);
1097 1.1 oster bufP = rdBuf;
1098 1.1 oster if (rf_degDagDebug) printf("Newly allocated buffer (%d bytes) is 0x%lx\n",
1099 1.1 oster (int)rf_RaidAddressToByte(raidPtr,numSect[0]+numSect[1]+numParitySect), (unsigned long) bufP);
1100 1.1 oster }
1101 1.1 oster
1102 1.1 oster /* now walk through the pdas one last time and assign buffer pointers
1103 1.1 oster * (ugh!). Again, ignore the parity. also, count nodes to find out how
1104 1.1 oster * many bufs need to be xored together */
1105 1.1 oster (*nXorBufs) = 1; /* in read case, 1 is for parity. In write case, 1 is for failed data */
1106 1.1 oster if (new_asm_h[0]) {
1107 1.1 oster for (pda=new_asm_h[0]->stripeMap->physInfo; pda; pda=pda->next) {pda->bufPtr = bufP; bufP += rf_RaidAddressToByte(raidPtr,pda->numSector);}
1108 1.1 oster *nXorBufs += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
1109 1.1 oster }
1110 1.1 oster if (new_asm_h[1]) {
1111 1.1 oster for (pda=new_asm_h[1]->stripeMap->physInfo; pda; pda=pda->next) {pda->bufPtr = bufP; bufP += rf_RaidAddressToByte(raidPtr,pda->numSector);}
1112 1.1 oster (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
1113 1.1 oster }
1114 1.1 oster if (rpBufPtr) *rpBufPtr = bufP; /* the rest of the buffer is for parity */
1115 1.1 oster
1116 1.1 oster /* the last step is to figure out how many more distinct buffers need to
1117 1.1 oster * get xor'd to produce the missing unit. there's one for each user-data
1118 1.1 oster * read node that overlaps the portion of the failed unit being accessed */
1119 1.1 oster
1120 1.1 oster for (foundit=i=0,pda=asmap->physInfo; pda; i++,pda=pda->next) {
1121 1.1 oster if (pda == failedPDA) {i--; foundit=1; continue;}
1122 1.1 oster if (rf_PDAOverlap(layoutPtr, pda, failedPDA)) {
1123 1.1 oster overlappingPDAs[i] = 1;
1124 1.1 oster (*nXorBufs)++;
1125 1.1 oster }
1126 1.1 oster }
1127 1.1 oster if (!foundit) {RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA in asm list\n"); RF_ASSERT(0);}
1128 1.1 oster
1129 1.1 oster if (rf_degDagDebug) {
1130 1.1 oster if (new_asm_h[0]) {
1131 1.1 oster printf("First asm:\n"); rf_PrintFullAccessStripeMap(new_asm_h[0], 1);
1132 1.1 oster }
1133 1.1 oster if (new_asm_h[1]) {
1134 1.1 oster printf("Second asm:\n"); rf_PrintFullAccessStripeMap(new_asm_h[1], 1);
1135 1.1 oster }
1136 1.1 oster }
1137 1.1 oster }
1138 1.1 oster
1139 1.1 oster
1140 1.1 oster /* adjusts the offset and number of sectors in the destination pda so that
1141 1.1 oster * it covers at most the region of the SU covered by the source PDA. This
1142 1.1 oster * is exclusively a restriction: the number of sectors indicated by the
1143 1.1 oster * target PDA can only shrink.
1144 1.1 oster *
1145 1.1 oster * For example: s = sectors within SU indicated by source PDA
1146 1.1 oster * d = sectors within SU indicated by dest PDA
1147 1.1 oster * r = results, stored in dest PDA
1148 1.1 oster *
1149 1.1 oster * |--------------- one stripe unit ---------------------|
1150 1.1 oster * | sssssssssssssssssssssssssssssssss |
1151 1.1 oster * | ddddddddddddddddddddddddddddddddddddddddddddd |
1152 1.1 oster * | rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr |
1153 1.1 oster *
1154 1.1 oster * Another example:
1155 1.1 oster *
1156 1.1 oster * |--------------- one stripe unit ---------------------|
1157 1.1 oster * | sssssssssssssssssssssssssssssssss |
1158 1.1 oster * | ddddddddddddddddddddddd |
1159 1.1 oster * | rrrrrrrrrrrrrrrr |
1160 1.1 oster *
1161 1.1 oster */
1162 1.1 oster void rf_RangeRestrictPDA(
1163 1.1 oster RF_Raid_t *raidPtr,
1164 1.1 oster RF_PhysDiskAddr_t *src,
1165 1.1 oster RF_PhysDiskAddr_t *dest,
1166 1.1 oster int dobuffer,
1167 1.1 oster int doraidaddr)
1168 1.1 oster {
1169 1.1 oster RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
1170 1.1 oster RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector);
1171 1.1 oster RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector);
1172 1.1 oster RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector-1); /* use -1 to be sure we stay within SU */
1173 1.1 oster RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector-1);
1174 1.1 oster RF_SectorNum_t subAddr = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->startSector); /* stripe unit boundary */
1175 1.1 oster
1176 1.1 oster dest->startSector = subAddr + RF_MAX(soffs,doffs);
1177 1.1 oster dest->numSector = subAddr + RF_MIN(send,dend) + 1 - dest->startSector;
1178 1.1 oster
1179 1.1 oster if (dobuffer)
1180 1.1 oster dest->bufPtr += (soffs > doffs) ? rf_RaidAddressToByte(raidPtr,soffs-doffs) : 0;
1181 1.1 oster if (doraidaddr) {
1182 1.1 oster dest->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->raidAddress) +
1183 1.1 oster rf_StripeUnitOffset(layoutPtr, dest->startSector);
1184 1.1 oster }
1185 1.1 oster }
1186 1.1 oster
1187 1.1 oster /*
1188 1.1 oster * Want the highest of these primes to be the largest one
1189 1.1 oster * less than the max expected number of columns (won't hurt
1190 1.1 oster * to be too small or too large, but won't be optimal, either)
1191 1.1 oster * --jimz
1192 1.1 oster */
1193 1.1 oster #define NLOWPRIMES 8
1194 1.1 oster static int lowprimes[NLOWPRIMES] = {2,3,5,7,11,13,17,19};
1195 1.1 oster
1196 1.1 oster /*****************************************************************************
1197 1.1 oster * compute the workload shift factor. (chained declustering)
1198 1.1 oster *
1199 1.1 oster * return nonzero if access should shift to secondary, otherwise,
1200 1.1 oster * access is to primary
1201 1.1 oster *****************************************************************************/
1202 1.1 oster int rf_compute_workload_shift(
1203 1.1 oster RF_Raid_t *raidPtr,
1204 1.1 oster RF_PhysDiskAddr_t *pda)
1205 1.1 oster {
1206 1.1 oster /*
1207 1.1 oster * variables:
1208 1.1 oster * d = column of disk containing primary
1209 1.1 oster * f = column of failed disk
1210 1.1 oster * n = number of disks in array
1211 1.1 oster * sd = "shift distance" (number of columns that d is to the right of f)
1212 1.1 oster * row = row of array the access is in
1213 1.1 oster * v = numerator of redirection ratio
1214 1.1 oster * k = denominator of redirection ratio
1215 1.1 oster */
1216 1.1 oster RF_RowCol_t d, f, sd, row, n;
1217 1.1 oster int k, v, ret, i;
1218 1.1 oster
1219 1.1 oster row = pda->row;
1220 1.1 oster n = raidPtr->numCol;
1221 1.1 oster
1222 1.1 oster /* assign column of primary copy to d */
1223 1.1 oster d = pda->col;
1224 1.1 oster
1225 1.1 oster /* assign column of dead disk to f */
1226 1.1 oster for(f=0;((!RF_DEAD_DISK(raidPtr->Disks[row][f].status))&&(f<n));f++);
1227 1.1 oster
1228 1.1 oster RF_ASSERT(f < n);
1229 1.1 oster RF_ASSERT(f != d);
1230 1.1 oster
1231 1.1 oster sd = (f > d) ? (n + d - f) : (d - f);
1232 1.1 oster RF_ASSERT(sd < n);
1233 1.1 oster
1234 1.1 oster /*
1235 1.1 oster * v of every k accesses should be redirected
1236 1.1 oster *
1237 1.1 oster * v/k := (n-1-sd)/(n-1)
1238 1.1 oster */
1239 1.1 oster v = (n-1-sd);
1240 1.1 oster k = (n-1);
1241 1.1 oster
1242 1.1 oster #if 1
1243 1.1 oster /*
1244 1.1 oster * XXX
1245 1.1 oster * Is this worth it?
1246 1.1 oster *
1247 1.1 oster * Now reduce the fraction, by repeatedly factoring
1248 1.1 oster * out primes (just like they teach in elementary school!)
1249 1.1 oster */
1250 1.1 oster for(i=0;i<NLOWPRIMES;i++) {
1251 1.1 oster if (lowprimes[i] > v)
1252 1.1 oster break;
1253 1.1 oster while (((v%lowprimes[i])==0) && ((k%lowprimes[i])==0)) {
1254 1.1 oster v /= lowprimes[i];
1255 1.1 oster k /= lowprimes[i];
1256 1.1 oster }
1257 1.1 oster }
1258 1.1 oster #endif
1259 1.1 oster
1260 1.1 oster raidPtr->hist_diskreq[row][d]++;
1261 1.1 oster if (raidPtr->hist_diskreq[row][d] > v) {
1262 1.1 oster ret = 0; /* do not redirect */
1263 1.1 oster }
1264 1.1 oster else {
1265 1.1 oster ret = 1; /* redirect */
1266 1.1 oster }
1267 1.1 oster
1268 1.1 oster #if 0
1269 1.1 oster printf("d=%d f=%d sd=%d v=%d k=%d ret=%d h=%d\n", d, f, sd, v, k, ret,
1270 1.1 oster raidPtr->hist_diskreq[row][d]);
1271 1.1 oster #endif
1272 1.1 oster
1273 1.1 oster if (raidPtr->hist_diskreq[row][d] >= k) {
1274 1.1 oster /* reset counter */
1275 1.1 oster raidPtr->hist_diskreq[row][d] = 0;
1276 1.1 oster }
1277 1.1 oster
1278 1.1 oster return(ret);
1279 1.1 oster }
1280 1.1 oster
1281 1.1 oster /*
1282 1.1 oster * Disk selection routines
1283 1.1 oster */
1284 1.1 oster
1285 1.1 oster /*
1286 1.1 oster * Selects the disk with the shortest queue from a mirror pair.
1287 1.1 oster * Both the disk I/Os queued in RAIDframe as well as those at the physical
1288 1.1 oster * disk are counted as members of the "queue"
1289 1.1 oster */
1290 1.1 oster void rf_SelectMirrorDiskIdle(RF_DagNode_t *node)
1291 1.1 oster {
1292 1.1 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
1293 1.1 oster RF_RowCol_t rowData, colData, rowMirror, colMirror;
1294 1.1 oster int dataQueueLength, mirrorQueueLength, usemirror;
1295 1.1 oster RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *)node->params[0].p;
1296 1.1 oster RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *)node->params[4].p;
1297 1.1 oster RF_PhysDiskAddr_t *tmp_pda;
1298 1.1 oster RF_RaidDisk_t **disks = raidPtr->Disks;
1299 1.1 oster RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue;
1300 1.1 oster
1301 1.1 oster /* return the [row col] of the disk with the shortest queue */
1302 1.1 oster rowData = data_pda->row;
1303 1.1 oster colData = data_pda->col;
1304 1.1 oster rowMirror = mirror_pda->row;
1305 1.1 oster colMirror = mirror_pda->col;
1306 1.1 oster dataQueue = &(dqs[rowData][colData]);
1307 1.1 oster mirrorQueue = &(dqs[rowMirror][colMirror]);
1308 1.1 oster
1309 1.1 oster #ifdef RF_LOCK_QUEUES_TO_READ_LEN
1310 1.1 oster RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
1311 1.1 oster #endif /* RF_LOCK_QUEUES_TO_READ_LEN */
1312 1.1 oster dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding;
1313 1.1 oster #ifdef RF_LOCK_QUEUES_TO_READ_LEN
1314 1.1 oster RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
1315 1.1 oster RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
1316 1.1 oster #endif /* RF_LOCK_QUEUES_TO_READ_LEN */
1317 1.1 oster mirrorQueueLength = mirrorQueue->queueLength + mirrorQueue->numOutstanding;
1318 1.1 oster #ifdef RF_LOCK_QUEUES_TO_READ_LEN
1319 1.1 oster RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
1320 1.1 oster #endif /* RF_LOCK_QUEUES_TO_READ_LEN */
1321 1.1 oster
1322 1.1 oster usemirror = 0;
1323 1.1 oster if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) {
1324 1.1 oster usemirror = 0;
1325 1.1 oster }
1326 1.1 oster else if (RF_DEAD_DISK(disks[rowData][colData].status)) {
1327 1.1 oster usemirror = 1;
1328 1.1 oster }
1329 1.1 oster else if (dataQueueLength < mirrorQueueLength) {
1330 1.1 oster usemirror = 0;
1331 1.1 oster }
1332 1.1 oster else if (mirrorQueueLength < dataQueueLength) {
1333 1.1 oster usemirror = 1;
1334 1.1 oster }
1335 1.1 oster else {
1336 1.1 oster /* queues are equal length. attempt cleverness. */
1337 1.1 oster if (SNUM_DIFF(dataQueue->last_deq_sector,data_pda->startSector)
1338 1.1 oster <= SNUM_DIFF(mirrorQueue->last_deq_sector,mirror_pda->startSector))
1339 1.1 oster {
1340 1.1 oster usemirror = 0;
1341 1.1 oster }
1342 1.1 oster else {
1343 1.1 oster usemirror = 1;
1344 1.1 oster }
1345 1.1 oster }
1346 1.1 oster
1347 1.1 oster if (usemirror) {
1348 1.1 oster /* use mirror (parity) disk, swap params 0 & 4 */
1349 1.1 oster tmp_pda = data_pda;
1350 1.1 oster node->params[0].p = mirror_pda;
1351 1.1 oster node->params[4].p = tmp_pda;
1352 1.1 oster }
1353 1.1 oster else {
1354 1.1 oster /* use data disk, leave param 0 unchanged */
1355 1.1 oster }
1356 1.1 oster /* printf("dataQueueLength %d, mirrorQueueLength %d\n",dataQueueLength, mirrorQueueLength); */
1357 1.1 oster }
1358 1.1 oster
1359 1.1 oster /*
1360 1.1 oster * Do simple partitioning. This assumes that
1361 1.1 oster * the data and parity disks are laid out identically.
1362 1.1 oster */
1363 1.1 oster void rf_SelectMirrorDiskPartition(RF_DagNode_t *node)
1364 1.1 oster {
1365 1.1 oster RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
1366 1.1 oster RF_RowCol_t rowData, colData, rowMirror, colMirror;
1367 1.1 oster RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *)node->params[0].p;
1368 1.1 oster RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *)node->params[4].p;
1369 1.1 oster RF_PhysDiskAddr_t *tmp_pda;
1370 1.1 oster RF_RaidDisk_t **disks = raidPtr->Disks;
1371 1.1 oster RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue;
1372 1.1 oster int usemirror;
1373 1.1 oster
1374 1.1 oster /* return the [row col] of the disk with the shortest queue */
1375 1.1 oster rowData = data_pda->row;
1376 1.1 oster colData = data_pda->col;
1377 1.1 oster rowMirror = mirror_pda->row;
1378 1.1 oster colMirror = mirror_pda->col;
1379 1.1 oster dataQueue = &(dqs[rowData][colData]);
1380 1.1 oster mirrorQueue = &(dqs[rowMirror][colMirror]);
1381 1.1 oster
1382 1.1 oster usemirror = 0;
1383 1.1 oster if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) {
1384 1.1 oster usemirror = 0;
1385 1.1 oster }
1386 1.1 oster else if (RF_DEAD_DISK(disks[rowData][colData].status)) {
1387 1.1 oster usemirror = 1;
1388 1.1 oster }
1389 1.1 oster else if (data_pda->startSector < (disks[rowData][colData].numBlocks / 2)) {
1390 1.1 oster usemirror = 0;
1391 1.1 oster }
1392 1.1 oster else {
1393 1.1 oster usemirror = 1;
1394 1.1 oster }
1395 1.1 oster
1396 1.1 oster if (usemirror) {
1397 1.1 oster /* use mirror (parity) disk, swap params 0 & 4 */
1398 1.1 oster tmp_pda = data_pda;
1399 1.1 oster node->params[0].p = mirror_pda;
1400 1.1 oster node->params[4].p = tmp_pda;
1401 1.1 oster }
1402 1.1 oster else {
1403 1.1 oster /* use data disk, leave param 0 unchanged */
1404 1.1 oster }
1405 1.1 oster }
1406